summaryrefslogtreecommitdiffstats
path: root/arch/arm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm')
-rw-r--r--arch/arm/Kconfig36
-rw-r--r--arch/arm/Kconfig.debug1
-rw-r--r--arch/arm/Makefile4
-rw-r--r--arch/arm/boot/compressed/Makefile2
-rw-r--r--arch/arm/boot/compressed/head.S4
-rw-r--r--arch/arm/boot/compressed/libfdt_env.h4
-rw-r--r--arch/arm/boot/dts/am57xx-beagle-x15.dts81
-rw-r--r--arch/arm/boot/dts/armada-370-xp.dtsi2
-rw-r--r--arch/arm/boot/dts/armada-370.dtsi8
-rw-r--r--arch/arm/boot/dts/armada-xp-mv78260.dtsi2
-rw-r--r--arch/arm/boot/dts/armada-xp-mv78460.dtsi2
-rw-r--r--arch/arm/boot/dts/armada-xp.dtsi10
-rw-r--r--arch/arm/boot/dts/atlas7.dtsi15
-rw-r--r--arch/arm/boot/dts/dra7.dtsi43
-rw-r--r--arch/arm/boot/dts/dra72-evm.dts110
-rw-r--r--arch/arm/boot/dts/dra72x.dtsi11
-rw-r--r--arch/arm/boot/dts/dra74x.dtsi15
-rw-r--r--arch/arm/boot/dts/dra7xx-clocks.dtsi11
-rw-r--r--arch/arm/boot/dts/exynos4.dtsi2
-rw-r--r--arch/arm/boot/dts/socfpga_arria10.dtsi11
-rw-r--r--arch/arm/common/Makefile1
-rw-r--r--arch/arm/common/mcpm_entry.c281
-rw-r--r--arch/arm/common/mcpm_head.S2
-rw-r--r--arch/arm/common/sa1111.c7
-rw-r--r--arch/arm/common/timer-sp.c304
-rw-r--r--arch/arm/crypto/Kconfig15
-rw-r--r--arch/arm/crypto/Makefile10
-rw-r--r--arch/arm/crypto/aes-ce-core.S7
-rw-r--r--arch/arm/crypto/sha512-armv4.pl649
-rw-r--r--arch/arm/crypto/sha512-armv7-neon.S455
-rw-r--r--arch/arm/crypto/sha512-core.S_shipped1861
-rw-r--r--arch/arm/crypto/sha512-glue.c121
-rw-r--r--arch/arm/crypto/sha512-neon-glue.c98
-rw-r--r--arch/arm/crypto/sha512.h8
-rw-r--r--arch/arm/crypto/sha512_neon_glue.c305
-rw-r--r--arch/arm/include/asm/Kbuild1
-rw-r--r--arch/arm/include/asm/assembler.h17
-rw-r--r--arch/arm/include/asm/barrier.h2
-rw-r--r--arch/arm/include/asm/cacheflush.h7
-rw-r--r--arch/arm/include/asm/cmpxchg.h67
-rw-r--r--arch/arm/include/asm/dma.h2
-rw-r--r--arch/arm/include/asm/edac.h5
-rw-r--r--arch/arm/include/asm/entry-macro-multi.S4
-rw-r--r--arch/arm/include/asm/futex.h13
-rw-r--r--arch/arm/include/asm/hardware/arm_timer.h35
-rw-r--r--arch/arm/include/asm/hardware/timer-sp.h23
-rw-r--r--arch/arm/include/asm/hugetlb.h13
-rw-r--r--arch/arm/include/asm/io.h53
-rw-r--r--arch/arm/include/asm/irqflags.h11
-rw-r--r--arch/arm/include/asm/kvm_asm.h2
-rw-r--r--arch/arm/include/asm/kvm_host.h5
-rw-r--r--arch/arm/include/asm/mach/arch.h2
-rw-r--r--arch/arm/include/asm/mcpm.h73
-rw-r--r--arch/arm/include/asm/memory.h16
-rw-r--r--arch/arm/include/asm/mm-arch-hooks.h15
-rw-r--r--arch/arm/include/asm/module.h12
-rw-r--r--arch/arm/include/asm/pci.h10
-rw-r--r--arch/arm/include/asm/perf_event.h7
-rw-r--r--arch/arm/include/asm/pmu.h19
-rw-r--r--arch/arm/include/asm/proc-fns.h7
-rw-r--r--arch/arm/include/asm/smp.h3
-rw-r--r--arch/arm/include/asm/system_info.h1
-rw-r--r--arch/arm/include/asm/topology.h2
-rw-r--r--arch/arm/include/asm/unified.h2
-rw-r--r--arch/arm/include/asm/xen/hypervisor.h8
-rw-r--r--arch/arm/include/asm/xen/page.h1
-rw-r--r--arch/arm/kernel/Makefile5
-rw-r--r--arch/arm/kernel/entry-armv.S14
-rw-r--r--arch/arm/kernel/entry-common.S6
-rw-r--r--arch/arm/kernel/entry-ftrace.S2
-rw-r--r--arch/arm/kernel/entry-v7m.S13
-rw-r--r--arch/arm/kernel/head-nommu.S27
-rw-r--r--arch/arm/kernel/head.S52
-rw-r--r--arch/arm/kernel/module-plts.c183
-rw-r--r--arch/arm/kernel/module.c32
-rw-r--r--arch/arm/kernel/module.lds4
-rw-r--r--arch/arm/kernel/perf_event.c408
-rw-r--r--arch/arm/kernel/perf_event_cpu.c421
-rw-r--r--arch/arm/kernel/perf_event_v6.c49
-rw-r--r--arch/arm/kernel/perf_event_v7.c129
-rw-r--r--arch/arm/kernel/perf_event_xscale.c32
-rw-r--r--arch/arm/kernel/setup.c32
-rw-r--r--arch/arm/kernel/sleep.S4
-rw-r--r--arch/arm/kernel/smp.c10
-rw-r--r--arch/arm/kernel/tcm.c104
-rw-r--r--arch/arm/kernel/traps.c8
-rw-r--r--arch/arm/kvm/Kconfig1
-rw-r--r--arch/arm/kvm/Makefile2
-rw-r--r--arch/arm/kvm/arm.c24
-rw-r--r--arch/arm/kvm/interrupts.S12
-rw-r--r--arch/arm/kvm/interrupts_head.S23
-rw-r--r--arch/arm/kvm/mmu.c14
-rw-r--r--arch/arm/kvm/psci.c18
-rw-r--r--arch/arm/lib/call_with_stack.S2
-rw-r--r--arch/arm/lib/lib1funcs.S4
-rw-r--r--arch/arm/mach-at91/pm.c2
-rw-r--r--arch/arm/mach-davinci/da850.c1
-rw-r--r--arch/arm/mach-davinci/pm_domain.c32
-rw-r--r--arch/arm/mach-exynos/exynos.c21
-rw-r--r--arch/arm/mach-exynos/suspend.c8
-rw-r--r--arch/arm/mach-footbridge/dma.c2
-rw-r--r--arch/arm/mach-gemini/gpio.c4
-rw-r--r--arch/arm/mach-hisi/platmcpm.c133
-rw-r--r--arch/arm/mach-imx/pm-imx5.c2
-rw-r--r--arch/arm/mach-imx/pm-imx6.c2
-rw-r--r--arch/arm/mach-integrator/integrator_ap.c1
-rw-r--r--arch/arm/mach-keystone/keystone.c41
-rw-r--r--arch/arm/mach-keystone/platsmp.c13
-rw-r--r--arch/arm/mach-keystone/pm_domain.c33
-rw-r--r--arch/arm/mach-lpc32xx/clock.c5
-rw-r--r--arch/arm/mach-lpc32xx/irq.c8
-rw-r--r--arch/arm/mach-mvebu/headsmp-a9.S3
-rw-r--r--arch/arm/mach-mvebu/platsmp-a9.c2
-rw-r--r--arch/arm/mach-nspire/nspire.c2
-rw-r--r--arch/arm/mach-omap1/board-nokia770.c2
-rw-r--r--arch/arm/mach-omap1/pm_bus.c37
-rw-r--r--arch/arm/mach-omap2/clkt2xxx_virt_prcm_set.c12
-rw-r--r--arch/arm/mach-omap2/display.c32
-rw-r--r--arch/arm/mach-omap2/omap_device.c31
-rw-r--r--arch/arm/mach-omap2/omap_hwmod_7xx_data.c33
-rw-r--r--arch/arm/mach-pxa/eseries.c1
-rw-r--r--arch/arm/mach-pxa/lubbock.c1
-rw-r--r--arch/arm/mach-pxa/tosa.c1
-rw-r--r--arch/arm/mach-realview/core.c13
-rw-r--r--arch/arm/mach-rockchip/platsmp.c3
-rw-r--r--arch/arm/mach-sa1100/Makefile2
-rw-r--r--arch/arm/mach-sa1100/generic.c37
-rw-r--r--arch/arm/mach-sa1100/irq.c178
-rw-r--r--arch/arm/mach-sa1100/neponset.c3
-rw-r--r--arch/arm/mach-shmobile/setup-r8a7740.c55
-rw-r--r--arch/arm/mach-socfpga/pm.c2
-rw-r--r--arch/arm/mach-versatile/core.c12
-rw-r--r--arch/arm/mach-vexpress/spc.c2
-rw-r--r--arch/arm/mm/Kconfig24
-rw-r--r--arch/arm/mm/Makefile3
-rw-r--r--arch/arm/mm/cache-l2x0.c107
-rw-r--r--arch/arm/mm/dma-mapping.c32
-rw-r--r--arch/arm/mm/fault.c2
-rw-r--r--arch/arm/mm/highmem.c3
-rw-r--r--arch/arm/mm/hugetlbpage.c5
-rw-r--r--arch/arm/mm/init.c1
-rw-r--r--arch/arm/mm/mmu.c153
-rw-r--r--arch/arm/mm/nommu.c9
-rw-r--r--arch/arm/mm/proc-v7-2level.S12
-rw-r--r--arch/arm/mm/proc-v7-3level.S14
-rw-r--r--arch/arm/mm/proc-v7.S182
-rw-r--r--arch/arm/mm/proc-v7m.S2
-rw-r--r--arch/arm/mm/pv-fixup-asm.S88
-rw-r--r--arch/arm/net/bpf_jit_32.c10
-rw-r--r--arch/arm/plat-orion/common.c6
-rw-r--r--arch/arm/vdso/Makefile18
-rw-r--r--arch/arm/xen/enlighten.c62
-rw-r--r--arch/arm/xen/mm.c2
-rw-r--r--arch/arm/xen/p2m.c2
154 files changed, 5115 insertions, 2965 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 72c4273de003..a750c1425c3a 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -15,6 +15,8 @@ config ARM
select CLONE_BACKWARDS
select CPU_PM if (SUSPEND || CPU_IDLE)
select DCACHE_WORD_ACCESS if HAVE_EFFICIENT_UNALIGNED_ACCESS
+ select EDAC_SUPPORT
+ select EDAC_ATOMIC_SCRUB
select GENERIC_ALLOCATOR
select GENERIC_ATOMIC64 if (CPU_V7M || CPU_V6 || !CPU_32v6K || !AEABI)
select GENERIC_CLOCKEVENTS_BROADCAST if SMP
@@ -31,8 +33,8 @@ config ARM
select HARDIRQS_SW_RESEND
select HAVE_ARCH_AUDITSYSCALL if (AEABI && !OABI_COMPAT)
select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6
- select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
- select HAVE_ARCH_KGDB
+ select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32
+ select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32
select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT)
select HAVE_ARCH_TRACEHOOK
select HAVE_BPF_JIT
@@ -43,7 +45,7 @@ config ARM
select HAVE_DMA_API_DEBUG
select HAVE_DMA_ATTRS
select HAVE_DMA_CONTIGUOUS if MMU
- select HAVE_DYNAMIC_FTRACE if (!XIP_KERNEL)
+ select HAVE_DYNAMIC_FTRACE if (!XIP_KERNEL) && !CPU_ENDIAN_BE32
select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU
select HAVE_FTRACE_MCOUNT_RECORD if (!XIP_KERNEL)
select HAVE_FUNCTION_GRAPH_TRACER if (!THUMB2_KERNEL)
@@ -57,10 +59,10 @@ config ARM
select HAVE_KERNEL_LZMA
select HAVE_KERNEL_LZO
select HAVE_KERNEL_XZ
- select HAVE_KPROBES if !XIP_KERNEL
+ select HAVE_KPROBES if !XIP_KERNEL && !CPU_ENDIAN_BE32 && !CPU_V7M
select HAVE_KRETPROBES if (HAVE_KPROBES)
select HAVE_MEMBLOCK
- select HAVE_MOD_ARCH_SPECIFIC if ARM_UNWIND
+ select HAVE_MOD_ARCH_SPECIFIC
select HAVE_OPROFILE if (HAVE_PERF_EVENTS)
select HAVE_OPTPROBES if !THUMB2_KERNEL
select HAVE_PERF_EVENTS
@@ -171,7 +173,7 @@ config LOCKDEP_SUPPORT
config TRACE_IRQFLAGS_SUPPORT
bool
- default y
+ default !CPU_V7M
config RWSEM_XCHGADD_ALGORITHM
bool
@@ -1008,11 +1010,6 @@ config PLAT_PXA
config PLAT_VERSATILE
bool
-config ARM_TIMER_SP804
- bool
- select CLKSRC_MMIO
- select CLKSRC_OF if OF
-
source "arch/arm/firmware/Kconfig"
source arch/arm/mm/Kconfig
@@ -1340,6 +1337,7 @@ config SMP
depends on GENERIC_CLOCKEVENTS
depends on HAVE_SMP
depends on MMU || ARM_MPU
+ select IRQ_WORK
help
This enables support for systems with more than one CPU. If you have
a system with only one CPU, say N. If you have a system with more
@@ -1715,6 +1713,21 @@ config HAVE_ARCH_TRANSPARENT_HUGEPAGE
config ARCH_WANT_GENERAL_HUGETLB
def_bool y
+config ARM_MODULE_PLTS
+ bool "Use PLTs to allow module memory to spill over into vmalloc area"
+ depends on MODULES
+ help
+ Allocate PLTs when loading modules so that jumps and calls whose
+ targets are too far away for their relative offsets to be encoded
+ in the instructions themselves can be bounced via veneers in the
+ module's PLT. This allows modules to be allocated in the generic
+ vmalloc area after the dedicated module memory area has been
+ exhausted. The modules will use slightly more memory, but after
+ rounding up to page size, the actual memory footprint is usually
+ the same.
+
+ Say y if you are getting out of memory errors while loading modules
+
source "mm/Kconfig"
config FORCE_MAX_ZONEORDER
@@ -1985,6 +1998,7 @@ config XIP_PHYS_ADDR
config KEXEC
bool "Kexec system call (EXPERIMENTAL)"
depends on (!SMP || PM_SLEEP_SMP)
+ depends on !CPU_V7M
help
kexec is a system call that implements the ability to shutdown your
current kernel, and to start another kernel. It is like a reboot
diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
index a6b5d0e35968..f1b157971366 100644
--- a/arch/arm/Kconfig.debug
+++ b/arch/arm/Kconfig.debug
@@ -5,6 +5,7 @@ source "lib/Kconfig.debug"
config ARM_PTDUMP
bool "Export kernel pagetable layout to userspace via debugfs"
depends on DEBUG_KERNEL
+ depends on MMU
select DEBUG_FS
---help---
Say Y here if you want to show the kernel pagetable layout in a
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 2a4fae7e9c44..07ab3d203916 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -19,6 +19,10 @@ LDFLAGS_vmlinux += --be8
LDFLAGS_MODULE += --be8
endif
+ifeq ($(CONFIG_ARM_MODULE_PLTS),y)
+LDFLAGS_MODULE += -T $(srctree)/arch/arm/kernel/module.lds
+endif
+
OBJCOPYFLAGS :=-O binary -R .comment -S
GZFLAGS :=-9
#KBUILD_CFLAGS +=-pipe
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index 6e1fb2b2ecc7..7a13aebacf81 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -103,6 +103,8 @@ extra-y += piggy.gzip piggy.lzo piggy.lzma piggy.xzkern piggy.lz4 \
lib1funcs.S ashldi3.S bswapsdi2.S $(libfdt) $(libfdt_hdrs) \
hyp-stub.S
+KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
+
ifeq ($(CONFIG_FUNCTION_TRACER),y)
ORIG_CFLAGS := $(KBUILD_CFLAGS)
KBUILD_CFLAGS = $(subst -pg, , $(ORIG_CFLAGS))
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index 2c45b5709fa4..06e983f59980 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -130,7 +130,7 @@ start:
.endr
ARM( mov r0, r0 )
ARM( b 1f )
- THUMB( adr r12, BSYM(1f) )
+ THUMB( badr r12, 1f )
THUMB( bx r12 )
.word _magic_sig @ Magic numbers to help the loader
@@ -447,7 +447,7 @@ dtb_check_done:
bl cache_clean_flush
- adr r0, BSYM(restart)
+ badr r0, restart
add r0, r0, r6
mov pc, r0
diff --git a/arch/arm/boot/compressed/libfdt_env.h b/arch/arm/boot/compressed/libfdt_env.h
index 1f4e71876b00..17ae0f3efac8 100644
--- a/arch/arm/boot/compressed/libfdt_env.h
+++ b/arch/arm/boot/compressed/libfdt_env.h
@@ -5,6 +5,10 @@
#include <linux/string.h>
#include <asm/byteorder.h>
+typedef __be16 fdt16_t;
+typedef __be32 fdt32_t;
+typedef __be64 fdt64_t;
+
#define fdt16_to_cpu(x) be16_to_cpu(x)
#define cpu_to_fdt16(x) cpu_to_be16(x)
#define fdt32_to_cpu(x) be32_to_cpu(x)
diff --git a/arch/arm/boot/dts/am57xx-beagle-x15.dts b/arch/arm/boot/dts/am57xx-beagle-x15.dts
index 7128fad991ac..a42cc377a862 100644
--- a/arch/arm/boot/dts/am57xx-beagle-x15.dts
+++ b/arch/arm/boot/dts/am57xx-beagle-x15.dts
@@ -19,6 +19,7 @@
rtc0 = &mcp_rtc;
rtc1 = &tps659038_rtc;
rtc2 = &rtc;
+ display0 = &hdmi0;
};
memory {
@@ -103,6 +104,51 @@
pinctrl-names = "default";
pinctrl-0 = <&extcon_usb2_pins>;
};
+
+ hdmi0: connector {
+ compatible = "hdmi-connector";
+ label = "hdmi";
+
+ type = "a";
+
+ port {
+ hdmi_connector_in: endpoint {
+ remote-endpoint = <&tpd12s015_out>;
+ };
+ };
+ };
+
+ tpd12s015: encoder {
+ compatible = "ti,tpd12s015";
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&tpd12s015_pins>;
+
+ gpios = <&gpio7 10 GPIO_ACTIVE_HIGH>, /* gpio7_10, CT CP HPD */
+ <&gpio6 28 GPIO_ACTIVE_HIGH>, /* gpio6_28, LS OE */
+ <&gpio7 12 GPIO_ACTIVE_HIGH>; /* gpio7_12/sp1_cs2, HPD */
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ tpd12s015_in: endpoint {
+ remote-endpoint = <&hdmi_out>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ tpd12s015_out: endpoint {
+ remote-endpoint = <&hdmi_connector_in>;
+ };
+ };
+ };
+ };
};
&dra7_pmx_core {
@@ -122,6 +168,13 @@
>;
};
+ hdmi_pins: pinmux_hdmi_pins {
+ pinctrl-single,pins = <
+ 0x408 (PIN_INPUT | MUX_MODE1) /* i2c2_sda.hdmi1_ddc_scl */
+ 0x40c (PIN_INPUT | MUX_MODE1) /* i2c2_scl.hdmi1_ddc_sda */
+ >;
+ };
+
i2c3_pins_default: i2c3_pins_default {
pinctrl-single,pins = <
0x2a4 (PIN_INPUT| MUX_MODE10) /* mcasp1_aclkx.i2c3_sda */
@@ -278,6 +331,14 @@
0x3e8 (PIN_INPUT_PULLUP | MUX_MODE14) /* uart1_ctsn.gpio7_24 */
>;
};
+
+ tpd12s015_pins: pinmux_tpd12s015_pins {
+ pinctrl-single,pins = <
+ 0x3b0 (PIN_OUTPUT | MUX_MODE14) /* gpio7_10 CT_CP_HPD */
+ 0x3b8 (PIN_INPUT_PULLDOWN | MUX_MODE14) /* gpio7_12 HPD */
+ 0x370 (PIN_OUTPUT | MUX_MODE14) /* gpio6_28 LS_OE */
+ >;
+ };
};
&i2c1 {
@@ -608,3 +669,23 @@
};
};
};
+
+&dss {
+ status = "ok";
+
+ vdda_video-supply = <&ldoln_reg>;
+};
+
+&hdmi {
+ status = "ok";
+ vdda-supply = <&ldo3_reg>;
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&hdmi_pins>;
+
+ port {
+ hdmi_out: endpoint {
+ remote-endpoint = <&tpd12s015_in>;
+ };
+ };
+};
diff --git a/arch/arm/boot/dts/armada-370-xp.dtsi b/arch/arm/boot/dts/armada-370-xp.dtsi
index 7f0252c580e4..a718866ba52d 100644
--- a/arch/arm/boot/dts/armada-370-xp.dtsi
+++ b/arch/arm/boot/dts/armada-370-xp.dtsi
@@ -268,7 +268,6 @@
};
eth0: ethernet@70000 {
- compatible = "marvell,armada-370-neta";
reg = <0x70000 0x4000>;
interrupts = <8>;
clocks = <&gateclk 4>;
@@ -284,7 +283,6 @@
};
eth1: ethernet@74000 {
- compatible = "marvell,armada-370-neta";
reg = <0x74000 0x4000>;
interrupts = <10>;
clocks = <&gateclk 3>;
diff --git a/arch/arm/boot/dts/armada-370.dtsi b/arch/arm/boot/dts/armada-370.dtsi
index 3f036bd635f4..53a1a5abe147 100644
--- a/arch/arm/boot/dts/armada-370.dtsi
+++ b/arch/arm/boot/dts/armada-370.dtsi
@@ -311,6 +311,14 @@
dmacap,memset;
};
};
+
+ ethernet@70000 {
+ compatible = "marvell,armada-370-neta";
+ };
+
+ ethernet@74000 {
+ compatible = "marvell,armada-370-neta";
+ };
};
};
};
diff --git a/arch/arm/boot/dts/armada-xp-mv78260.dtsi b/arch/arm/boot/dts/armada-xp-mv78260.dtsi
index 8479fdc9e9c2..c5fdc99f0dbe 100644
--- a/arch/arm/boot/dts/armada-xp-mv78260.dtsi
+++ b/arch/arm/boot/dts/armada-xp-mv78260.dtsi
@@ -318,7 +318,7 @@
};
eth3: ethernet@34000 {
- compatible = "marvell,armada-370-neta";
+ compatible = "marvell,armada-xp-neta";
reg = <0x34000 0x4000>;
interrupts = <14>;
clocks = <&gateclk 1>;
diff --git a/arch/arm/boot/dts/armada-xp-mv78460.dtsi b/arch/arm/boot/dts/armada-xp-mv78460.dtsi
index 661d54c81580..0e24f1a38540 100644
--- a/arch/arm/boot/dts/armada-xp-mv78460.dtsi
+++ b/arch/arm/boot/dts/armada-xp-mv78460.dtsi
@@ -356,7 +356,7 @@
};
eth3: ethernet@34000 {
- compatible = "marvell,armada-370-neta";
+ compatible = "marvell,armada-xp-neta";
reg = <0x34000 0x4000>;
interrupts = <14>;
clocks = <&gateclk 1>;
diff --git a/arch/arm/boot/dts/armada-xp.dtsi b/arch/arm/boot/dts/armada-xp.dtsi
index 58eaef46efd5..3de9b761cc1a 100644
--- a/arch/arm/boot/dts/armada-xp.dtsi
+++ b/arch/arm/boot/dts/armada-xp.dtsi
@@ -185,7 +185,7 @@
};
eth2: ethernet@30000 {
- compatible = "marvell,armada-370-neta";
+ compatible = "marvell,armada-xp-neta";
reg = <0x30000 0x4000>;
interrupts = <12>;
clocks = <&gateclk 2>;
@@ -228,6 +228,14 @@
};
};
+ ethernet@70000 {
+ compatible = "marvell,armada-xp-neta";
+ };
+
+ ethernet@74000 {
+ compatible = "marvell,armada-xp-neta";
+ };
+
xor@f0900 {
compatible = "marvell,orion-xor";
reg = <0xF0900 0x100
diff --git a/arch/arm/boot/dts/atlas7.dtsi b/arch/arm/boot/dts/atlas7.dtsi
index a753178abc85..5dfd3a44bf82 100644
--- a/arch/arm/boot/dts/atlas7.dtsi
+++ b/arch/arm/boot/dts/atlas7.dtsi
@@ -38,6 +38,21 @@
};
};
+ clocks {
+ xinw {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <32768>;
+ clock-output-names = "xinw";
+ };
+ xin {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <26000000>;
+ clock-output-names = "xin";
+ };
+ };
+
noc {
compatible = "simple-bus";
#address-cells = <1>;
diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi
index f03a091cd076..8f1e25bcecbd 100644
--- a/arch/arm/boot/dts/dra7.dtsi
+++ b/arch/arm/boot/dts/dra7.dtsi
@@ -131,6 +131,11 @@
regulator-max-microvolt = <3000000>;
};
};
+
+ scm_conf_clocks: clocks {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
};
dra7_pmx_core: pinmux@1400 {
@@ -1469,6 +1474,44 @@
clocks = <&sys_clkin1>;
status = "disabled";
};
+
+ dss: dss@58000000 {
+ compatible = "ti,dra7-dss";
+ /* 'reg' defined in dra72x.dtsi and dra74x.dtsi */
+ /* 'clocks' defined in dra72x.dtsi and dra74x.dtsi */
+ status = "disabled";
+ ti,hwmods = "dss_core";
+ /* CTRL_CORE_DSS_PLL_CONTROL */
+ syscon-pll-ctrl = <&scm_conf 0x538>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+
+ dispc@58001000 {
+ compatible = "ti,dra7-dispc";
+ reg = <0x58001000 0x1000>;
+ interrupts = <GIC_SPI 20 IRQ_TYPE_LEVEL_HIGH>;
+ ti,hwmods = "dss_dispc";
+ clocks = <&dss_dss_clk>;
+ clock-names = "fck";
+ /* CTRL_CORE_SMA_SW_1 */
+ syscon-pol = <&scm_conf 0x534>;
+ };
+
+ hdmi: encoder@58060000 {
+ compatible = "ti,dra7-hdmi";
+ reg = <0x58040000 0x200>,
+ <0x58040200 0x80>,
+ <0x58040300 0x80>,
+ <0x58060000 0x19000>;
+ reg-names = "wp", "pll", "phy", "core";
+ interrupts = <GIC_SPI 96 IRQ_TYPE_LEVEL_HIGH>;
+ status = "disabled";
+ ti,hwmods = "dss_hdmi";
+ clocks = <&dss_48mhz_clk>, <&dss_hdmi_clk>;
+ clock-names = "fck", "sys_clk";
+ };
+ };
};
thermal_zones: thermal-zones {
diff --git a/arch/arm/boot/dts/dra72-evm.dts b/arch/arm/boot/dts/dra72-evm.dts
index ce0390f081d9..4e1b60581782 100644
--- a/arch/arm/boot/dts/dra72-evm.dts
+++ b/arch/arm/boot/dts/dra72-evm.dts
@@ -19,6 +19,10 @@
reg = <0x80000000 0x40000000>; /* 1024 MB */
};
+ aliases {
+ display0 = &hdmi0;
+ };
+
evm_3v3: fixedregulator-evm_3v3 {
compatible = "regulator-fixed";
regulator-name = "evm_3v3";
@@ -35,6 +39,51 @@
compatible = "linux,extcon-usb-gpio";
id-gpio = <&pcf_gpio_21 2 GPIO_ACTIVE_HIGH>;
};
+
+ hdmi0: connector {
+ compatible = "hdmi-connector";
+ label = "hdmi";
+
+ type = "a";
+
+ port {
+ hdmi_connector_in: endpoint {
+ remote-endpoint = <&tpd12s015_out>;
+ };
+ };
+ };
+
+ tpd12s015: encoder {
+ compatible = "ti,tpd12s015";
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&tpd12s015_pins>;
+
+ gpios = <&pcf_hdmi 4 GPIO_ACTIVE_HIGH>, /* P4, CT CP HPD */
+ <&pcf_hdmi 5 GPIO_ACTIVE_HIGH>, /* P5, LS OE */
+ <&gpio7 12 GPIO_ACTIVE_HIGH>; /* gpio7_12/sp1_cs2, HPD */
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ tpd12s015_in: endpoint {
+ remote-endpoint = <&hdmi_out>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ tpd12s015_out: endpoint {
+ remote-endpoint = <&hdmi_connector_in>;
+ };
+ };
+ };
+ };
};
&dra7_pmx_core {
@@ -45,6 +94,13 @@
>;
};
+ i2c5_pins: pinmux_i2c5_pins {
+ pinctrl-single,pins = <
+ 0x2b4 (PIN_INPUT | MUX_MODE10) /* mcasp1_axr0.i2c5_sda */
+ 0x2b8 (PIN_INPUT | MUX_MODE10) /* mcasp1_axr1.i2c5_scl */
+ >;
+ };
+
nand_default: nand_default {
pinctrl-single,pins = <
0x0 (PIN_INPUT | MUX_MODE0) /* gpmc_ad0 */
@@ -142,6 +198,19 @@
0xb8 (PIN_OUTPUT | MUX_MODE1) /* gpmc_cs2.qspi1_cs0 */
>;
};
+
+ hdmi_pins: pinmux_hdmi_pins {
+ pinctrl-single,pins = <
+ 0x408 (PIN_INPUT | MUX_MODE1) /* i2c2_sda.hdmi1_ddc_scl */
+ 0x40c (PIN_INPUT | MUX_MODE1) /* i2c2_scl.hdmi1_ddc_sda */
+ >;
+ };
+
+ tpd12s015_pins: pinmux_tpd12s015_pins {
+ pinctrl-single,pins = <
+ 0x3b8 (PIN_INPUT_PULLDOWN | MUX_MODE14) /* gpio7_12 HPD */
+ >;
+ };
};
&i2c1 {
@@ -277,6 +346,27 @@
};
};
+&i2c5 {
+ status = "okay";
+ pinctrl-names = "default";
+ pinctrl-0 = <&i2c5_pins>;
+ clock-frequency = <400000>;
+
+ pcf_hdmi: pcf8575@26 {
+ compatible = "nxp,pcf8575";
+ reg = <0x26>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ /*
+ * initial state is used here to keep the mdio interface
+ * selected on RU89 through SEL_VIN4_MUX_S0, VIN2_S1 and
+ * VIN2_S0 driven high otherwise Ethernet stops working
+ * VIN6_SEL_S0 is low, thus selecting McASP3 over VIN6
+ */
+ lines-initial-states = <0x0f2b>;
+ };
+};
+
&uart1 {
status = "okay";
};
@@ -566,3 +656,23 @@
};
};
};
+
+&dss {
+ status = "ok";
+
+ vdda_video-supply = <&ldo5_reg>;
+};
+
+&hdmi {
+ status = "ok";
+ vdda-supply = <&ldo3_reg>;
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&hdmi_pins>;
+
+ port {
+ hdmi_out: endpoint {
+ remote-endpoint = <&tpd12s015_in>;
+ };
+ };
+};
diff --git a/arch/arm/boot/dts/dra72x.dtsi b/arch/arm/boot/dts/dra72x.dtsi
index 03d742f8d572..eaca143faa77 100644
--- a/arch/arm/boot/dts/dra72x.dtsi
+++ b/arch/arm/boot/dts/dra72x.dtsi
@@ -34,3 +34,14 @@
interrupts = <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>;
};
};
+
+&dss {
+ reg = <0x58000000 0x80>,
+ <0x58004054 0x4>,
+ <0x58004300 0x20>;
+ reg-names = "dss", "pll1_clkctrl", "pll1";
+
+ clocks = <&dss_dss_clk>,
+ <&dss_video1_clk>;
+ clock-names = "fck", "video1_clk";
+};
diff --git a/arch/arm/boot/dts/dra74x.dtsi b/arch/arm/boot/dts/dra74x.dtsi
index cc560a70926f..fa995d0ca1f2 100644
--- a/arch/arm/boot/dts/dra74x.dtsi
+++ b/arch/arm/boot/dts/dra74x.dtsi
@@ -73,3 +73,18 @@
};
};
};
+
+&dss {
+ reg = <0x58000000 0x80>,
+ <0x58004054 0x4>,
+ <0x58004300 0x20>,
+ <0x58005054 0x4>,
+ <0x58005300 0x20>;
+ reg-names = "dss", "pll1_clkctrl", "pll1",
+ "pll2_clkctrl", "pll2";
+
+ clocks = <&dss_dss_clk>,
+ <&dss_video1_clk>,
+ <&dss_video2_clk>;
+ clock-names = "fck", "video1_clk", "video2_clk";
+};
diff --git a/arch/arm/boot/dts/dra7xx-clocks.dtsi b/arch/arm/boot/dts/dra7xx-clocks.dtsi
index 3b933f74d000..357bedeebfac 100644
--- a/arch/arm/boot/dts/dra7xx-clocks.dtsi
+++ b/arch/arm/boot/dts/dra7xx-clocks.dtsi
@@ -1531,6 +1531,7 @@
clocks = <&dpll_per_h12x2_ck>;
ti,bit-shift = <8>;
reg = <0x1120>;
+ ti,set-rate-parent;
};
dss_hdmi_clk: dss_hdmi_clk {
@@ -2136,3 +2137,13 @@
clocks = <&dpll_usb_ck>;
};
};
+
+&scm_conf_clocks {
+ dss_deshdcp_clk: dss_deshdcp_clk {
+ #clock-cells = <0>;
+ compatible = "ti,gate-clock";
+ clocks = <&l3_iclk_div>;
+ ti,bit-shift = <0>;
+ reg = <0x558>;
+ };
+};
diff --git a/arch/arm/boot/dts/exynos4.dtsi b/arch/arm/boot/dts/exynos4.dtsi
index f716e2b7d0b9..b0d52b1a646a 100644
--- a/arch/arm/boot/dts/exynos4.dtsi
+++ b/arch/arm/boot/dts/exynos4.dtsi
@@ -166,7 +166,7 @@
phys = <&mipi_phy 1>;
phy-names = "dsim";
clocks = <&clock CLK_DSIM0>, <&clock CLK_SCLK_MIPI0>;
- clock-names = "bus_clk", "pll_clk";
+ clock-names = "bus_clk", "sclk_mipi";
status = "disabled";
#address-cells = <1>;
#size-cells = <0>;
diff --git a/arch/arm/boot/dts/socfpga_arria10.dtsi b/arch/arm/boot/dts/socfpga_arria10.dtsi
index f5bebdd6d1be..4779b07310df 100644
--- a/arch/arm/boot/dts/socfpga_arria10.dtsi
+++ b/arch/arm/boot/dts/socfpga_arria10.dtsi
@@ -548,6 +548,17 @@
status = "disabled";
};
+ sdr: sdr@ffc25000 {
+ compatible = "syscon";
+ reg = <0xffcfb100 0x80>;
+ };
+
+ sdramedac {
+ compatible = "altr,sdram-edac-a10";
+ altr,sdr-syscon = <&sdr>;
+ interrupts = <0 2 4>, <0 0 4>;
+ };
+
L2: l2-cache@fffff000 {
compatible = "arm,pl310-cache";
reg = <0xfffff000 0x1000>;
diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile
index 70b1eff477b3..6ee5959a813b 100644
--- a/arch/arm/common/Makefile
+++ b/arch/arm/common/Makefile
@@ -11,7 +11,6 @@ obj-$(CONFIG_SHARP_LOCOMO) += locomo.o
obj-$(CONFIG_SHARP_PARAM) += sharpsl_param.o
obj-$(CONFIG_SHARP_SCOOP) += scoop.o
obj-$(CONFIG_PCI_HOST_ITE8152) += it8152.o
-obj-$(CONFIG_ARM_TIMER_SP804) += timer-sp.o
obj-$(CONFIG_MCPM) += mcpm_head.o mcpm_entry.o mcpm_platsmp.o vlock.o
CFLAGS_REMOVE_mcpm_entry.o = -pg
AFLAGS_mcpm_head.o := -march=armv7-a
diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c
index 5f8a52ac7edf..a923524d1040 100644
--- a/arch/arm/common/mcpm_entry.c
+++ b/arch/arm/common/mcpm_entry.c
@@ -20,6 +20,126 @@
#include <asm/cputype.h>
#include <asm/suspend.h>
+/*
+ * The public API for this code is documented in arch/arm/include/asm/mcpm.h.
+ * For a comprehensive description of the main algorithm used here, please
+ * see Documentation/arm/cluster-pm-race-avoidance.txt.
+ */
+
+struct sync_struct mcpm_sync;
+
+/*
+ * __mcpm_cpu_going_down: Indicates that the cpu is being torn down.
+ * This must be called at the point of committing to teardown of a CPU.
+ * The CPU cache (SCTRL.C bit) is expected to still be active.
+ */
+static void __mcpm_cpu_going_down(unsigned int cpu, unsigned int cluster)
+{
+ mcpm_sync.clusters[cluster].cpus[cpu].cpu = CPU_GOING_DOWN;
+ sync_cache_w(&mcpm_sync.clusters[cluster].cpus[cpu].cpu);
+}
+
+/*
+ * __mcpm_cpu_down: Indicates that cpu teardown is complete and that the
+ * cluster can be torn down without disrupting this CPU.
+ * To avoid deadlocks, this must be called before a CPU is powered down.
+ * The CPU cache (SCTRL.C bit) is expected to be off.
+ * However L2 cache might or might not be active.
+ */
+static void __mcpm_cpu_down(unsigned int cpu, unsigned int cluster)
+{
+ dmb();
+ mcpm_sync.clusters[cluster].cpus[cpu].cpu = CPU_DOWN;
+ sync_cache_w(&mcpm_sync.clusters[cluster].cpus[cpu].cpu);
+ sev();
+}
+
+/*
+ * __mcpm_outbound_leave_critical: Leave the cluster teardown critical section.
+ * @state: the final state of the cluster:
+ * CLUSTER_UP: no destructive teardown was done and the cluster has been
+ * restored to the previous state (CPU cache still active); or
+ * CLUSTER_DOWN: the cluster has been torn-down, ready for power-off
+ * (CPU cache disabled, L2 cache either enabled or disabled).
+ */
+static void __mcpm_outbound_leave_critical(unsigned int cluster, int state)
+{
+ dmb();
+ mcpm_sync.clusters[cluster].cluster = state;
+ sync_cache_w(&mcpm_sync.clusters[cluster].cluster);
+ sev();
+}
+
+/*
+ * __mcpm_outbound_enter_critical: Enter the cluster teardown critical section.
+ * This function should be called by the last man, after local CPU teardown
+ * is complete. CPU cache expected to be active.
+ *
+ * Returns:
+ * false: the critical section was not entered because an inbound CPU was
+ * observed, or the cluster is already being set up;
+ * true: the critical section was entered: it is now safe to tear down the
+ * cluster.
+ */
+static bool __mcpm_outbound_enter_critical(unsigned int cpu, unsigned int cluster)
+{
+ unsigned int i;
+ struct mcpm_sync_struct *c = &mcpm_sync.clusters[cluster];
+
+ /* Warn inbound CPUs that the cluster is being torn down: */
+ c->cluster = CLUSTER_GOING_DOWN;
+ sync_cache_w(&c->cluster);
+
+ /* Back out if the inbound cluster is already in the critical region: */
+ sync_cache_r(&c->inbound);
+ if (c->inbound == INBOUND_COMING_UP)
+ goto abort;
+
+ /*
+ * Wait for all CPUs to get out of the GOING_DOWN state, so that local
+ * teardown is complete on each CPU before tearing down the cluster.
+ *
+ * If any CPU has been woken up again from the DOWN state, then we
+ * shouldn't be taking the cluster down at all: abort in that case.
+ */
+ sync_cache_r(&c->cpus);
+ for (i = 0; i < MAX_CPUS_PER_CLUSTER; i++) {
+ int cpustate;
+
+ if (i == cpu)
+ continue;
+
+ while (1) {
+ cpustate = c->cpus[i].cpu;
+ if (cpustate != CPU_GOING_DOWN)
+ break;
+
+ wfe();
+ sync_cache_r(&c->cpus[i].cpu);
+ }
+
+ switch (cpustate) {
+ case CPU_DOWN:
+ continue;
+
+ default:
+ goto abort;
+ }
+ }
+
+ return true;
+
+abort:
+ __mcpm_outbound_leave_critical(cluster, CLUSTER_UP);
+ return false;
+}
+
+static int __mcpm_cluster_state(unsigned int cluster)
+{
+ sync_cache_r(&mcpm_sync.clusters[cluster].cluster);
+ return mcpm_sync.clusters[cluster].cluster;
+}
+
extern unsigned long mcpm_entry_vectors[MAX_NR_CLUSTERS][MAX_CPUS_PER_CLUSTER];
void mcpm_set_entry_vector(unsigned cpu, unsigned cluster, void *ptr)
@@ -78,16 +198,11 @@ int mcpm_cpu_power_up(unsigned int cpu, unsigned int cluster)
bool cpu_is_down, cluster_is_down;
int ret = 0;
+ pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
if (!platform_ops)
return -EUNATCH; /* try not to shadow power_up errors */
might_sleep();
- /* backward compatibility callback */
- if (platform_ops->power_up)
- return platform_ops->power_up(cpu, cluster);
-
- pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
-
/*
* Since this is called with IRQs enabled, and no arch_spin_lock_irq
* variant exists, we need to disable IRQs manually here.
@@ -128,29 +243,17 @@ void mcpm_cpu_power_down(void)
bool cpu_going_down, last_man;
phys_reset_t phys_reset;
+ mpidr = read_cpuid_mpidr();
+ cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+ cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+ pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
if (WARN_ON_ONCE(!platform_ops))
return;
BUG_ON(!irqs_disabled());
- /*
- * Do this before calling into the power_down method,
- * as it might not always be safe to do afterwards.
- */
setup_mm_for_reboot();
- /* backward compatibility callback */
- if (platform_ops->power_down) {
- platform_ops->power_down();
- goto not_dead;
- }
-
- mpidr = read_cpuid_mpidr();
- cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
- cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
- pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
-
__mcpm_cpu_going_down(cpu, cluster);
-
arch_spin_lock(&mcpm_lock);
BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP);
@@ -187,7 +290,6 @@ void mcpm_cpu_power_down(void)
if (cpu_going_down)
wfi();
-not_dead:
/*
* It is possible for a power_up request to happen concurrently
* with a power_down request for the same CPU. In this case the
@@ -219,22 +321,11 @@ int mcpm_wait_for_cpu_powerdown(unsigned int cpu, unsigned int cluster)
return ret;
}
-void mcpm_cpu_suspend(u64 expected_residency)
+void mcpm_cpu_suspend(void)
{
if (WARN_ON_ONCE(!platform_ops))
return;
- /* backward compatibility callback */
- if (platform_ops->suspend) {
- phys_reset_t phys_reset;
- BUG_ON(!irqs_disabled());
- setup_mm_for_reboot();
- platform_ops->suspend(expected_residency);
- phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset);
- phys_reset(virt_to_phys(mcpm_entry_point));
- BUG();
- }
-
/* Some platforms might have to enable special resume modes, etc. */
if (platform_ops->cpu_suspend_prepare) {
unsigned int mpidr = read_cpuid_mpidr();
@@ -256,12 +347,6 @@ int mcpm_cpu_powered_up(void)
if (!platform_ops)
return -EUNATCH;
- /* backward compatibility callback */
- if (platform_ops->powered_up) {
- platform_ops->powered_up();
- return 0;
- }
-
mpidr = read_cpuid_mpidr();
cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
@@ -334,120 +419,6 @@ int __init mcpm_loopback(void (*cache_disable)(void))
#endif
-struct sync_struct mcpm_sync;
-
-/*
- * __mcpm_cpu_going_down: Indicates that the cpu is being torn down.
- * This must be called at the point of committing to teardown of a CPU.
- * The CPU cache (SCTRL.C bit) is expected to still be active.
- */
-void __mcpm_cpu_going_down(unsigned int cpu, unsigned int cluster)
-{
- mcpm_sync.clusters[cluster].cpus[cpu].cpu = CPU_GOING_DOWN;
- sync_cache_w(&mcpm_sync.clusters[cluster].cpus[cpu].cpu);
-}
-
-/*
- * __mcpm_cpu_down: Indicates that cpu teardown is complete and that the
- * cluster can be torn down without disrupting this CPU.
- * To avoid deadlocks, this must be called before a CPU is powered down.
- * The CPU cache (SCTRL.C bit) is expected to be off.
- * However L2 cache might or might not be active.
- */
-void __mcpm_cpu_down(unsigned int cpu, unsigned int cluster)
-{
- dmb();
- mcpm_sync.clusters[cluster].cpus[cpu].cpu = CPU_DOWN;
- sync_cache_w(&mcpm_sync.clusters[cluster].cpus[cpu].cpu);
- sev();
-}
-
-/*
- * __mcpm_outbound_leave_critical: Leave the cluster teardown critical section.
- * @state: the final state of the cluster:
- * CLUSTER_UP: no destructive teardown was done and the cluster has been
- * restored to the previous state (CPU cache still active); or
- * CLUSTER_DOWN: the cluster has been torn-down, ready for power-off
- * (CPU cache disabled, L2 cache either enabled or disabled).
- */
-void __mcpm_outbound_leave_critical(unsigned int cluster, int state)
-{
- dmb();
- mcpm_sync.clusters[cluster].cluster = state;
- sync_cache_w(&mcpm_sync.clusters[cluster].cluster);
- sev();
-}
-
-/*
- * __mcpm_outbound_enter_critical: Enter the cluster teardown critical section.
- * This function should be called by the last man, after local CPU teardown
- * is complete. CPU cache expected to be active.
- *
- * Returns:
- * false: the critical section was not entered because an inbound CPU was
- * observed, or the cluster is already being set up;
- * true: the critical section was entered: it is now safe to tear down the
- * cluster.
- */
-bool __mcpm_outbound_enter_critical(unsigned int cpu, unsigned int cluster)
-{
- unsigned int i;
- struct mcpm_sync_struct *c = &mcpm_sync.clusters[cluster];
-
- /* Warn inbound CPUs that the cluster is being torn down: */
- c->cluster = CLUSTER_GOING_DOWN;
- sync_cache_w(&c->cluster);
-
- /* Back out if the inbound cluster is already in the critical region: */
- sync_cache_r(&c->inbound);
- if (c->inbound == INBOUND_COMING_UP)
- goto abort;
-
- /*
- * Wait for all CPUs to get out of the GOING_DOWN state, so that local
- * teardown is complete on each CPU before tearing down the cluster.
- *
- * If any CPU has been woken up again from the DOWN state, then we
- * shouldn't be taking the cluster down at all: abort in that case.
- */
- sync_cache_r(&c->cpus);
- for (i = 0; i < MAX_CPUS_PER_CLUSTER; i++) {
- int cpustate;
-
- if (i == cpu)
- continue;
-
- while (1) {
- cpustate = c->cpus[i].cpu;
- if (cpustate != CPU_GOING_DOWN)
- break;
-
- wfe();
- sync_cache_r(&c->cpus[i].cpu);
- }
-
- switch (cpustate) {
- case CPU_DOWN:
- continue;
-
- default:
- goto abort;
- }
- }
-
- return true;
-
-abort:
- __mcpm_outbound_leave_critical(cluster, CLUSTER_UP);
- return false;
-}
-
-int __mcpm_cluster_state(unsigned int cluster)
-{
- sync_cache_r(&mcpm_sync.clusters[cluster].cluster);
- return mcpm_sync.clusters[cluster].cluster;
-}
-
extern unsigned long mcpm_power_up_setup_phys;
int __init mcpm_sync_init(
diff --git a/arch/arm/common/mcpm_head.S b/arch/arm/common/mcpm_head.S
index e02db4b81a66..08b3bb9bc6a2 100644
--- a/arch/arm/common/mcpm_head.S
+++ b/arch/arm/common/mcpm_head.S
@@ -49,7 +49,7 @@
ENTRY(mcpm_entry_point)
ARM_BE8(setend be)
- THUMB( adr r12, BSYM(1f) )
+ THUMB( badr r12, 1f )
THUMB( bx r12 )
THUMB( .thumb )
1:
diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c
index 5cc779c8e9c6..93ee70dbbdd3 100644
--- a/arch/arm/common/sa1111.c
+++ b/arch/arm/common/sa1111.c
@@ -501,8 +501,8 @@ static int sa1111_setup_irq(struct sa1111 *sachip, unsigned irq_base)
* Register SA1111 interrupt
*/
irq_set_irq_type(sachip->irq, IRQ_TYPE_EDGE_RISING);
- irq_set_handler_data(sachip->irq, sachip);
- irq_set_chained_handler(sachip->irq, sa1111_irq_handler);
+ irq_set_chained_handler_and_data(sachip->irq, sa1111_irq_handler,
+ sachip);
dev_info(sachip->dev, "Providing IRQ%u-%u\n",
sachip->irq_base, sachip->irq_base + SA1111_IRQ_NR - 1);
@@ -836,8 +836,7 @@ static void __sa1111_remove(struct sa1111 *sachip)
clk_unprepare(sachip->clk);
if (sachip->irq != NO_IRQ) {
- irq_set_chained_handler(sachip->irq, NULL);
- irq_set_handler_data(sachip->irq, NULL);
+ irq_set_chained_handler_and_data(sachip->irq, NULL, NULL);
irq_free_descs(sachip->irq_base, SA1111_IRQ_NR);
release_mem_region(sachip->phys + SA1111_INTC, 512);
diff --git a/arch/arm/common/timer-sp.c b/arch/arm/common/timer-sp.c
deleted file mode 100644
index 19211324772f..000000000000
--- a/arch/arm/common/timer-sp.c
+++ /dev/null
@@ -1,304 +0,0 @@
-/*
- * linux/arch/arm/common/timer-sp.c
- *
- * Copyright (C) 1999 - 2003 ARM Limited
- * Copyright (C) 2000 Deep Blue Solutions Ltd
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-#include <linux/clk.h>
-#include <linux/clocksource.h>
-#include <linux/clockchips.h>
-#include <linux/err.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/io.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
-#include <linux/sched_clock.h>
-
-#include <asm/hardware/arm_timer.h>
-#include <asm/hardware/timer-sp.h>
-
-static long __init sp804_get_clock_rate(struct clk *clk)
-{
- long rate;
- int err;
-
- err = clk_prepare(clk);
- if (err) {
- pr_err("sp804: clock failed to prepare: %d\n", err);
- clk_put(clk);
- return err;
- }
-
- err = clk_enable(clk);
- if (err) {
- pr_err("sp804: clock failed to enable: %d\n", err);
- clk_unprepare(clk);
- clk_put(clk);
- return err;
- }
-
- rate = clk_get_rate(clk);
- if (rate < 0) {
- pr_err("sp804: clock failed to get rate: %ld\n", rate);
- clk_disable(clk);
- clk_unprepare(clk);
- clk_put(clk);
- }
-
- return rate;
-}
-
-static void __iomem *sched_clock_base;
-
-static u64 notrace sp804_read(void)
-{
- return ~readl_relaxed(sched_clock_base + TIMER_VALUE);
-}
-
-void __init __sp804_clocksource_and_sched_clock_init(void __iomem *base,
- const char *name,
- struct clk *clk,
- int use_sched_clock)
-{
- long rate;
-
- if (!clk) {
- clk = clk_get_sys("sp804", name);
- if (IS_ERR(clk)) {
- pr_err("sp804: clock not found: %d\n",
- (int)PTR_ERR(clk));
- return;
- }
- }
-
- rate = sp804_get_clock_rate(clk);
-
- if (rate < 0)
- return;
-
- /* setup timer 0 as free-running clocksource */
- writel(0, base + TIMER_CTRL);
- writel(0xffffffff, base + TIMER_LOAD);
- writel(0xffffffff, base + TIMER_VALUE);
- writel(TIMER_CTRL_32BIT | TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC,
- base + TIMER_CTRL);
-
- clocksource_mmio_init(base + TIMER_VALUE, name,
- rate, 200, 32, clocksource_mmio_readl_down);
-
- if (use_sched_clock) {
- sched_clock_base = base;
- sched_clock_register(sp804_read, 32, rate);
- }
-}
-
-
-static void __iomem *clkevt_base;
-static unsigned long clkevt_reload;
-
-/*
- * IRQ handler for the timer
- */
-static irqreturn_t sp804_timer_interrupt(int irq, void *dev_id)
-{
- struct clock_event_device *evt = dev_id;
-
- /* clear the interrupt */
- writel(1, clkevt_base + TIMER_INTCLR);
-
- evt->event_handler(evt);
-
- return IRQ_HANDLED;
-}
-
-static void sp804_set_mode(enum clock_event_mode mode,
- struct clock_event_device *evt)
-{
- unsigned long ctrl = TIMER_CTRL_32BIT | TIMER_CTRL_IE;
-
- writel(ctrl, clkevt_base + TIMER_CTRL);
-
- switch (mode) {
- case CLOCK_EVT_MODE_PERIODIC:
- writel(clkevt_reload, clkevt_base + TIMER_LOAD);
- ctrl |= TIMER_CTRL_PERIODIC | TIMER_CTRL_ENABLE;
- break;
-
- case CLOCK_EVT_MODE_ONESHOT:
- /* period set, and timer enabled in 'next_event' hook */
- ctrl |= TIMER_CTRL_ONESHOT;
- break;
-
- case CLOCK_EVT_MODE_UNUSED:
- case CLOCK_EVT_MODE_SHUTDOWN:
- default:
- break;
- }
-
- writel(ctrl, clkevt_base + TIMER_CTRL);
-}
-
-static int sp804_set_next_event(unsigned long next,
- struct clock_event_device *evt)
-{
- unsigned long ctrl = readl(clkevt_base + TIMER_CTRL);
-
- writel(next, clkevt_base + TIMER_LOAD);
- writel(ctrl | TIMER_CTRL_ENABLE, clkevt_base + TIMER_CTRL);
-
- return 0;
-}
-
-static struct clock_event_device sp804_clockevent = {
- .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT |
- CLOCK_EVT_FEAT_DYNIRQ,
- .set_mode = sp804_set_mode,
- .set_next_event = sp804_set_next_event,
- .rating = 300,
-};
-
-static struct irqaction sp804_timer_irq = {
- .name = "timer",
- .flags = IRQF_TIMER | IRQF_IRQPOLL,
- .handler = sp804_timer_interrupt,
- .dev_id = &sp804_clockevent,
-};
-
-void __init __sp804_clockevents_init(void __iomem *base, unsigned int irq, struct clk *clk, const char *name)
-{
- struct clock_event_device *evt = &sp804_clockevent;
- long rate;
-
- if (!clk)
- clk = clk_get_sys("sp804", name);
- if (IS_ERR(clk)) {
- pr_err("sp804: %s clock not found: %d\n", name,
- (int)PTR_ERR(clk));
- return;
- }
-
- rate = sp804_get_clock_rate(clk);
- if (rate < 0)
- return;
-
- clkevt_base = base;
- clkevt_reload = DIV_ROUND_CLOSEST(rate, HZ);
- evt->name = name;
- evt->irq = irq;
- evt->cpumask = cpu_possible_mask;
-
- writel(0, base + TIMER_CTRL);
-
- setup_irq(irq, &sp804_timer_irq);
- clockevents_config_and_register(evt, rate, 0xf, 0xffffffff);
-}
-
-static void __init sp804_of_init(struct device_node *np)
-{
- static bool initialized = false;
- void __iomem *base;
- int irq;
- u32 irq_num = 0;
- struct clk *clk1, *clk2;
- const char *name = of_get_property(np, "compatible", NULL);
-
- base = of_iomap(np, 0);
- if (WARN_ON(!base))
- return;
-
- /* Ensure timers are disabled */
- writel(0, base + TIMER_CTRL);
- writel(0, base + TIMER_2_BASE + TIMER_CTRL);
-
- if (initialized || !of_device_is_available(np))
- goto err;
-
- clk1 = of_clk_get(np, 0);
- if (IS_ERR(clk1))
- clk1 = NULL;
-
- /* Get the 2nd clock if the timer has 3 timer clocks */
- if (of_count_phandle_with_args(np, "clocks", "#clock-cells") == 3) {
- clk2 = of_clk_get(np, 1);
- if (IS_ERR(clk2)) {
- pr_err("sp804: %s clock not found: %d\n", np->name,
- (int)PTR_ERR(clk2));
- clk2 = NULL;
- }
- } else
- clk2 = clk1;
-
- irq = irq_of_parse_and_map(np, 0);
- if (irq <= 0)
- goto err;
-
- of_property_read_u32(np, "arm,sp804-has-irq", &irq_num);
- if (irq_num == 2) {
- __sp804_clockevents_init(base + TIMER_2_BASE, irq, clk2, name);
- __sp804_clocksource_and_sched_clock_init(base, name, clk1, 1);
- } else {
- __sp804_clockevents_init(base, irq, clk1 , name);
- __sp804_clocksource_and_sched_clock_init(base + TIMER_2_BASE,
- name, clk2, 1);
- }
- initialized = true;
-
- return;
-err:
- iounmap(base);
-}
-CLOCKSOURCE_OF_DECLARE(sp804, "arm,sp804", sp804_of_init);
-
-static void __init integrator_cp_of_init(struct device_node *np)
-{
- static int init_count = 0;
- void __iomem *base;
- int irq;
- const char *name = of_get_property(np, "compatible", NULL);
- struct clk *clk;
-
- base = of_iomap(np, 0);
- if (WARN_ON(!base))
- return;
- clk = of_clk_get(np, 0);
- if (WARN_ON(IS_ERR(clk)))
- return;
-
- /* Ensure timer is disabled */
- writel(0, base + TIMER_CTRL);
-
- if (init_count == 2 || !of_device_is_available(np))
- goto err;
-
- if (!init_count)
- __sp804_clocksource_and_sched_clock_init(base, name, clk, 0);
- else {
- irq = irq_of_parse_and_map(np, 0);
- if (irq <= 0)
- goto err;
-
- __sp804_clockevents_init(base, irq, clk, name);
- }
-
- init_count++;
- return;
-err:
- iounmap(base);
-}
-CLOCKSOURCE_OF_DECLARE(intcp, "arm,integrator-cp-timer", integrator_cp_of_init);
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index 8da2207b0072..27ed1b1cd1d7 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -53,20 +53,13 @@ config CRYPTO_SHA256_ARM
SHA-256 secure hash standard (DFIPS 180-2) implemented
using optimized ARM assembler and NEON, when available.
-config CRYPTO_SHA512_ARM_NEON
- tristate "SHA384 and SHA512 digest algorithm (ARM NEON)"
- depends on KERNEL_MODE_NEON
- select CRYPTO_SHA512
+config CRYPTO_SHA512_ARM
+ tristate "SHA-384/512 digest algorithm (ARM-asm and NEON)"
select CRYPTO_HASH
+ depends on !CPU_V7M
help
SHA-512 secure hash standard (DFIPS 180-2) implemented
- using ARM NEON instructions, when available.
-
- This version of SHA implements a 512 bit hash with 256 bits of
- security against collision attacks.
-
- This code also includes SHA-384, a 384 bit hash with 192 bits
- of security against collision attacks.
+ using optimized ARM assembler and NEON, when available.
config CRYPTO_AES_ARM
tristate "AES cipher algorithms (ARM-asm)"
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index 6ea828241fcb..fc5150702b64 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -7,7 +7,7 @@ obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o
obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o
obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
-obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o
+obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o
ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
@@ -30,7 +30,8 @@ sha1-arm-y := sha1-armv4-large.o sha1_glue.o
sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o
sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o
sha256-arm-y := sha256-core.o sha256_glue.o $(sha256-arm-neon-y)
-sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o
+sha512-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha512-neon-glue.o
+sha512-arm-y := sha512-core.o sha512-glue.o $(sha512-arm-neon-y)
sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o
sha2-arm-ce-y := sha2-ce-core.o sha2-ce-glue.o
aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o
@@ -45,4 +46,7 @@ $(src)/aesbs-core.S_shipped: $(src)/bsaes-armv7.pl
$(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl
$(call cmd,perl)
-.PRECIOUS: $(obj)/aesbs-core.S $(obj)/sha256-core.S
+$(src)/sha512-core.S_shipped: $(src)/sha512-armv4.pl
+ $(call cmd,perl)
+
+.PRECIOUS: $(obj)/aesbs-core.S $(obj)/sha256-core.S $(obj)/sha512-core.S
diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S
index 8cfa468ee570..987aa632c9f0 100644
--- a/arch/arm/crypto/aes-ce-core.S
+++ b/arch/arm/crypto/aes-ce-core.S
@@ -101,15 +101,14 @@
\dround q10, q11
blo 0f @ AES-128: 10 rounds
vld1.8 {q10-q11}, [ip]!
- beq 1f @ AES-192: 12 rounds
\dround q12, q13
+ beq 1f @ AES-192: 12 rounds
vld1.8 {q12-q13}, [ip]
\dround q10, q11
0: \fround q12, q13, q14
bx lr
-1: \dround q12, q13
- \fround q10, q11, q14
+1: \fround q10, q11, q14
bx lr
.endm
@@ -122,8 +121,8 @@
* q2 : third in/output block (_3x version only)
* q8 : first round key
* q9 : secound round key
- * ip : address of 3rd round key
* q14 : final round key
+ * r2 : address of round key array
* r3 : number of rounds
*/
.align 6
diff --git a/arch/arm/crypto/sha512-armv4.pl b/arch/arm/crypto/sha512-armv4.pl
new file mode 100644
index 000000000000..a2b11a844357
--- /dev/null
+++ b/arch/arm/crypto/sha512-armv4.pl
@@ -0,0 +1,649 @@
+#!/usr/bin/env perl
+
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+#
+# Permission to use under GPL terms is granted.
+# ====================================================================
+
+# SHA512 block procedure for ARMv4. September 2007.
+
+# This code is ~4.5 (four and a half) times faster than code generated
+# by gcc 3.4 and it spends ~72 clock cycles per byte [on single-issue
+# Xscale PXA250 core].
+#
+# July 2010.
+#
+# Rescheduling for dual-issue pipeline resulted in 6% improvement on
+# Cortex A8 core and ~40 cycles per processed byte.
+
+# February 2011.
+#
+# Profiler-assisted and platform-specific optimization resulted in 7%
+# improvement on Coxtex A8 core and ~38 cycles per byte.
+
+# March 2011.
+#
+# Add NEON implementation. On Cortex A8 it was measured to process
+# one byte in 23.3 cycles or ~60% faster than integer-only code.
+
+# August 2012.
+#
+# Improve NEON performance by 12% on Snapdragon S4. In absolute
+# terms it's 22.6 cycles per byte, which is disappointing result.
+# Technical writers asserted that 3-way S4 pipeline can sustain
+# multiple NEON instructions per cycle, but dual NEON issue could
+# not be observed, see http://www.openssl.org/~appro/Snapdragon-S4.html
+# for further details. On side note Cortex-A15 processes one byte in
+# 16 cycles.
+
+# Byte order [in]dependence. =========================================
+#
+# Originally caller was expected to maintain specific *dword* order in
+# h[0-7], namely with most significant dword at *lower* address, which
+# was reflected in below two parameters as 0 and 4. Now caller is
+# expected to maintain native byte order for whole 64-bit values.
+$hi="HI";
+$lo="LO";
+# ====================================================================
+
+while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
+open STDOUT,">$output";
+
+$ctx="r0"; # parameter block
+$inp="r1";
+$len="r2";
+
+$Tlo="r3";
+$Thi="r4";
+$Alo="r5";
+$Ahi="r6";
+$Elo="r7";
+$Ehi="r8";
+$t0="r9";
+$t1="r10";
+$t2="r11";
+$t3="r12";
+############ r13 is stack pointer
+$Ktbl="r14";
+############ r15 is program counter
+
+$Aoff=8*0;
+$Boff=8*1;
+$Coff=8*2;
+$Doff=8*3;
+$Eoff=8*4;
+$Foff=8*5;
+$Goff=8*6;
+$Hoff=8*7;
+$Xoff=8*8;
+
+sub BODY_00_15() {
+my $magic = shift;
+$code.=<<___;
+ @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
+ @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
+ @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
+ mov $t0,$Elo,lsr#14
+ str $Tlo,[sp,#$Xoff+0]
+ mov $t1,$Ehi,lsr#14
+ str $Thi,[sp,#$Xoff+4]
+ eor $t0,$t0,$Ehi,lsl#18
+ ldr $t2,[sp,#$Hoff+0] @ h.lo
+ eor $t1,$t1,$Elo,lsl#18
+ ldr $t3,[sp,#$Hoff+4] @ h.hi
+ eor $t0,$t0,$Elo,lsr#18
+ eor $t1,$t1,$Ehi,lsr#18
+ eor $t0,$t0,$Ehi,lsl#14
+ eor $t1,$t1,$Elo,lsl#14
+ eor $t0,$t0,$Ehi,lsr#9
+ eor $t1,$t1,$Elo,lsr#9
+ eor $t0,$t0,$Elo,lsl#23
+ eor $t1,$t1,$Ehi,lsl#23 @ Sigma1(e)
+ adds $Tlo,$Tlo,$t0
+ ldr $t0,[sp,#$Foff+0] @ f.lo
+ adc $Thi,$Thi,$t1 @ T += Sigma1(e)
+ ldr $t1,[sp,#$Foff+4] @ f.hi
+ adds $Tlo,$Tlo,$t2
+ ldr $t2,[sp,#$Goff+0] @ g.lo
+ adc $Thi,$Thi,$t3 @ T += h
+ ldr $t3,[sp,#$Goff+4] @ g.hi
+
+ eor $t0,$t0,$t2
+ str $Elo,[sp,#$Eoff+0]
+ eor $t1,$t1,$t3
+ str $Ehi,[sp,#$Eoff+4]
+ and $t0,$t0,$Elo
+ str $Alo,[sp,#$Aoff+0]
+ and $t1,$t1,$Ehi
+ str $Ahi,[sp,#$Aoff+4]
+ eor $t0,$t0,$t2
+ ldr $t2,[$Ktbl,#$lo] @ K[i].lo
+ eor $t1,$t1,$t3 @ Ch(e,f,g)
+ ldr $t3,[$Ktbl,#$hi] @ K[i].hi
+
+ adds $Tlo,$Tlo,$t0
+ ldr $Elo,[sp,#$Doff+0] @ d.lo
+ adc $Thi,$Thi,$t1 @ T += Ch(e,f,g)
+ ldr $Ehi,[sp,#$Doff+4] @ d.hi
+ adds $Tlo,$Tlo,$t2
+ and $t0,$t2,#0xff
+ adc $Thi,$Thi,$t3 @ T += K[i]
+ adds $Elo,$Elo,$Tlo
+ ldr $t2,[sp,#$Boff+0] @ b.lo
+ adc $Ehi,$Ehi,$Thi @ d += T
+ teq $t0,#$magic
+
+ ldr $t3,[sp,#$Coff+0] @ c.lo
+#if __ARM_ARCH__>=7
+ it eq @ Thumb2 thing, sanity check in ARM
+#endif
+ orreq $Ktbl,$Ktbl,#1
+ @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
+ @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
+ @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
+ mov $t0,$Alo,lsr#28
+ mov $t1,$Ahi,lsr#28
+ eor $t0,$t0,$Ahi,lsl#4
+ eor $t1,$t1,$Alo,lsl#4
+ eor $t0,$t0,$Ahi,lsr#2
+ eor $t1,$t1,$Alo,lsr#2
+ eor $t0,$t0,$Alo,lsl#30
+ eor $t1,$t1,$Ahi,lsl#30
+ eor $t0,$t0,$Ahi,lsr#7
+ eor $t1,$t1,$Alo,lsr#7
+ eor $t0,$t0,$Alo,lsl#25
+ eor $t1,$t1,$Ahi,lsl#25 @ Sigma0(a)
+ adds $Tlo,$Tlo,$t0
+ and $t0,$Alo,$t2
+ adc $Thi,$Thi,$t1 @ T += Sigma0(a)
+
+ ldr $t1,[sp,#$Boff+4] @ b.hi
+ orr $Alo,$Alo,$t2
+ ldr $t2,[sp,#$Coff+4] @ c.hi
+ and $Alo,$Alo,$t3
+ and $t3,$Ahi,$t1
+ orr $Ahi,$Ahi,$t1
+ orr $Alo,$Alo,$t0 @ Maj(a,b,c).lo
+ and $Ahi,$Ahi,$t2
+ adds $Alo,$Alo,$Tlo
+ orr $Ahi,$Ahi,$t3 @ Maj(a,b,c).hi
+ sub sp,sp,#8
+ adc $Ahi,$Ahi,$Thi @ h += T
+ tst $Ktbl,#1
+ add $Ktbl,$Ktbl,#8
+___
+}
+$code=<<___;
+#ifndef __KERNEL__
+# include "arm_arch.h"
+# define VFP_ABI_PUSH vstmdb sp!,{d8-d15}
+# define VFP_ABI_POP vldmia sp!,{d8-d15}
+#else
+# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+# define __ARM_MAX_ARCH__ 7
+# define VFP_ABI_PUSH
+# define VFP_ABI_POP
+#endif
+
+#ifdef __ARMEL__
+# define LO 0
+# define HI 4
+# define WORD64(hi0,lo0,hi1,lo1) .word lo0,hi0, lo1,hi1
+#else
+# define HI 0
+# define LO 4
+# define WORD64(hi0,lo0,hi1,lo1) .word hi0,lo0, hi1,lo1
+#endif
+
+.text
+#if __ARM_ARCH__<7
+.code 32
+#else
+.syntax unified
+# ifdef __thumb2__
+# define adrl adr
+.thumb
+# else
+.code 32
+# endif
+#endif
+
+.type K512,%object
+.align 5
+K512:
+WORD64(0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd)
+WORD64(0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc)
+WORD64(0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019)
+WORD64(0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118)
+WORD64(0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe)
+WORD64(0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2)
+WORD64(0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1)
+WORD64(0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694)
+WORD64(0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3)
+WORD64(0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65)
+WORD64(0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483)
+WORD64(0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5)
+WORD64(0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210)
+WORD64(0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4)
+WORD64(0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725)
+WORD64(0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70)
+WORD64(0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926)
+WORD64(0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df)
+WORD64(0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8)
+WORD64(0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b)
+WORD64(0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001)
+WORD64(0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30)
+WORD64(0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910)
+WORD64(0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8)
+WORD64(0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53)
+WORD64(0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8)
+WORD64(0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb)
+WORD64(0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3)
+WORD64(0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60)
+WORD64(0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec)
+WORD64(0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9)
+WORD64(0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b)
+WORD64(0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207)
+WORD64(0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178)
+WORD64(0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6)
+WORD64(0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b)
+WORD64(0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493)
+WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c)
+WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a)
+WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
+.size K512,.-K512
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+.LOPENSSL_armcap:
+.word OPENSSL_armcap_P-sha512_block_data_order
+.skip 32-4
+#else
+.skip 32
+#endif
+
+.global sha512_block_data_order
+.type sha512_block_data_order,%function
+sha512_block_data_order:
+#if __ARM_ARCH__<7
+ sub r3,pc,#8 @ sha512_block_data_order
+#else
+ adr r3,sha512_block_data_order
+#endif
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+ ldr r12,.LOPENSSL_armcap
+ ldr r12,[r3,r12] @ OPENSSL_armcap_P
+ tst r12,#1
+ bne .LNEON
+#endif
+ add $len,$inp,$len,lsl#7 @ len to point at the end of inp
+ stmdb sp!,{r4-r12,lr}
+ sub $Ktbl,r3,#672 @ K512
+ sub sp,sp,#9*8
+
+ ldr $Elo,[$ctx,#$Eoff+$lo]
+ ldr $Ehi,[$ctx,#$Eoff+$hi]
+ ldr $t0, [$ctx,#$Goff+$lo]
+ ldr $t1, [$ctx,#$Goff+$hi]
+ ldr $t2, [$ctx,#$Hoff+$lo]
+ ldr $t3, [$ctx,#$Hoff+$hi]
+.Loop:
+ str $t0, [sp,#$Goff+0]
+ str $t1, [sp,#$Goff+4]
+ str $t2, [sp,#$Hoff+0]
+ str $t3, [sp,#$Hoff+4]
+ ldr $Alo,[$ctx,#$Aoff+$lo]
+ ldr $Ahi,[$ctx,#$Aoff+$hi]
+ ldr $Tlo,[$ctx,#$Boff+$lo]
+ ldr $Thi,[$ctx,#$Boff+$hi]
+ ldr $t0, [$ctx,#$Coff+$lo]
+ ldr $t1, [$ctx,#$Coff+$hi]
+ ldr $t2, [$ctx,#$Doff+$lo]
+ ldr $t3, [$ctx,#$Doff+$hi]
+ str $Tlo,[sp,#$Boff+0]
+ str $Thi,[sp,#$Boff+4]
+ str $t0, [sp,#$Coff+0]
+ str $t1, [sp,#$Coff+4]
+ str $t2, [sp,#$Doff+0]
+ str $t3, [sp,#$Doff+4]
+ ldr $Tlo,[$ctx,#$Foff+$lo]
+ ldr $Thi,[$ctx,#$Foff+$hi]
+ str $Tlo,[sp,#$Foff+0]
+ str $Thi,[sp,#$Foff+4]
+
+.L00_15:
+#if __ARM_ARCH__<7
+ ldrb $Tlo,[$inp,#7]
+ ldrb $t0, [$inp,#6]
+ ldrb $t1, [$inp,#5]
+ ldrb $t2, [$inp,#4]
+ ldrb $Thi,[$inp,#3]
+ ldrb $t3, [$inp,#2]
+ orr $Tlo,$Tlo,$t0,lsl#8
+ ldrb $t0, [$inp,#1]
+ orr $Tlo,$Tlo,$t1,lsl#16
+ ldrb $t1, [$inp],#8
+ orr $Tlo,$Tlo,$t2,lsl#24
+ orr $Thi,$Thi,$t3,lsl#8
+ orr $Thi,$Thi,$t0,lsl#16
+ orr $Thi,$Thi,$t1,lsl#24
+#else
+ ldr $Tlo,[$inp,#4]
+ ldr $Thi,[$inp],#8
+#ifdef __ARMEL__
+ rev $Tlo,$Tlo
+ rev $Thi,$Thi
+#endif
+#endif
+___
+ &BODY_00_15(0x94);
+$code.=<<___;
+ tst $Ktbl,#1
+ beq .L00_15
+ ldr $t0,[sp,#`$Xoff+8*(16-1)`+0]
+ ldr $t1,[sp,#`$Xoff+8*(16-1)`+4]
+ bic $Ktbl,$Ktbl,#1
+.L16_79:
+ @ sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
+ @ LO lo>>1^hi<<31 ^ lo>>8^hi<<24 ^ lo>>7^hi<<25
+ @ HI hi>>1^lo<<31 ^ hi>>8^lo<<24 ^ hi>>7
+ mov $Tlo,$t0,lsr#1
+ ldr $t2,[sp,#`$Xoff+8*(16-14)`+0]
+ mov $Thi,$t1,lsr#1
+ ldr $t3,[sp,#`$Xoff+8*(16-14)`+4]
+ eor $Tlo,$Tlo,$t1,lsl#31
+ eor $Thi,$Thi,$t0,lsl#31
+ eor $Tlo,$Tlo,$t0,lsr#8
+ eor $Thi,$Thi,$t1,lsr#8
+ eor $Tlo,$Tlo,$t1,lsl#24
+ eor $Thi,$Thi,$t0,lsl#24
+ eor $Tlo,$Tlo,$t0,lsr#7
+ eor $Thi,$Thi,$t1,lsr#7
+ eor $Tlo,$Tlo,$t1,lsl#25
+
+ @ sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
+ @ LO lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26
+ @ HI hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6
+ mov $t0,$t2,lsr#19
+ mov $t1,$t3,lsr#19
+ eor $t0,$t0,$t3,lsl#13
+ eor $t1,$t1,$t2,lsl#13
+ eor $t0,$t0,$t3,lsr#29
+ eor $t1,$t1,$t2,lsr#29
+ eor $t0,$t0,$t2,lsl#3
+ eor $t1,$t1,$t3,lsl#3
+ eor $t0,$t0,$t2,lsr#6
+ eor $t1,$t1,$t3,lsr#6
+ ldr $t2,[sp,#`$Xoff+8*(16-9)`+0]
+ eor $t0,$t0,$t3,lsl#26
+
+ ldr $t3,[sp,#`$Xoff+8*(16-9)`+4]
+ adds $Tlo,$Tlo,$t0
+ ldr $t0,[sp,#`$Xoff+8*16`+0]
+ adc $Thi,$Thi,$t1
+
+ ldr $t1,[sp,#`$Xoff+8*16`+4]
+ adds $Tlo,$Tlo,$t2
+ adc $Thi,$Thi,$t3
+ adds $Tlo,$Tlo,$t0
+ adc $Thi,$Thi,$t1
+___
+ &BODY_00_15(0x17);
+$code.=<<___;
+#if __ARM_ARCH__>=7
+ ittt eq @ Thumb2 thing, sanity check in ARM
+#endif
+ ldreq $t0,[sp,#`$Xoff+8*(16-1)`+0]
+ ldreq $t1,[sp,#`$Xoff+8*(16-1)`+4]
+ beq .L16_79
+ bic $Ktbl,$Ktbl,#1
+
+ ldr $Tlo,[sp,#$Boff+0]
+ ldr $Thi,[sp,#$Boff+4]
+ ldr $t0, [$ctx,#$Aoff+$lo]
+ ldr $t1, [$ctx,#$Aoff+$hi]
+ ldr $t2, [$ctx,#$Boff+$lo]
+ ldr $t3, [$ctx,#$Boff+$hi]
+ adds $t0,$Alo,$t0
+ str $t0, [$ctx,#$Aoff+$lo]
+ adc $t1,$Ahi,$t1
+ str $t1, [$ctx,#$Aoff+$hi]
+ adds $t2,$Tlo,$t2
+ str $t2, [$ctx,#$Boff+$lo]
+ adc $t3,$Thi,$t3
+ str $t3, [$ctx,#$Boff+$hi]
+
+ ldr $Alo,[sp,#$Coff+0]
+ ldr $Ahi,[sp,#$Coff+4]
+ ldr $Tlo,[sp,#$Doff+0]
+ ldr $Thi,[sp,#$Doff+4]
+ ldr $t0, [$ctx,#$Coff+$lo]
+ ldr $t1, [$ctx,#$Coff+$hi]
+ ldr $t2, [$ctx,#$Doff+$lo]
+ ldr $t3, [$ctx,#$Doff+$hi]
+ adds $t0,$Alo,$t0
+ str $t0, [$ctx,#$Coff+$lo]
+ adc $t1,$Ahi,$t1
+ str $t1, [$ctx,#$Coff+$hi]
+ adds $t2,$Tlo,$t2
+ str $t2, [$ctx,#$Doff+$lo]
+ adc $t3,$Thi,$t3
+ str $t3, [$ctx,#$Doff+$hi]
+
+ ldr $Tlo,[sp,#$Foff+0]
+ ldr $Thi,[sp,#$Foff+4]
+ ldr $t0, [$ctx,#$Eoff+$lo]
+ ldr $t1, [$ctx,#$Eoff+$hi]
+ ldr $t2, [$ctx,#$Foff+$lo]
+ ldr $t3, [$ctx,#$Foff+$hi]
+ adds $Elo,$Elo,$t0
+ str $Elo,[$ctx,#$Eoff+$lo]
+ adc $Ehi,$Ehi,$t1
+ str $Ehi,[$ctx,#$Eoff+$hi]
+ adds $t2,$Tlo,$t2
+ str $t2, [$ctx,#$Foff+$lo]
+ adc $t3,$Thi,$t3
+ str $t3, [$ctx,#$Foff+$hi]
+
+ ldr $Alo,[sp,#$Goff+0]
+ ldr $Ahi,[sp,#$Goff+4]
+ ldr $Tlo,[sp,#$Hoff+0]
+ ldr $Thi,[sp,#$Hoff+4]
+ ldr $t0, [$ctx,#$Goff+$lo]
+ ldr $t1, [$ctx,#$Goff+$hi]
+ ldr $t2, [$ctx,#$Hoff+$lo]
+ ldr $t3, [$ctx,#$Hoff+$hi]
+ adds $t0,$Alo,$t0
+ str $t0, [$ctx,#$Goff+$lo]
+ adc $t1,$Ahi,$t1
+ str $t1, [$ctx,#$Goff+$hi]
+ adds $t2,$Tlo,$t2
+ str $t2, [$ctx,#$Hoff+$lo]
+ adc $t3,$Thi,$t3
+ str $t3, [$ctx,#$Hoff+$hi]
+
+ add sp,sp,#640
+ sub $Ktbl,$Ktbl,#640
+
+ teq $inp,$len
+ bne .Loop
+
+ add sp,sp,#8*9 @ destroy frame
+#if __ARM_ARCH__>=5
+ ldmia sp!,{r4-r12,pc}
+#else
+ ldmia sp!,{r4-r12,lr}
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ bx lr @ interoperable with Thumb ISA:-)
+#endif
+.size sha512_block_data_order,.-sha512_block_data_order
+___
+
+{
+my @Sigma0=(28,34,39);
+my @Sigma1=(14,18,41);
+my @sigma0=(1, 8, 7);
+my @sigma1=(19,61,6);
+
+my $Ktbl="r3";
+my $cnt="r12"; # volatile register known as ip, intra-procedure-call scratch
+
+my @X=map("d$_",(0..15));
+my @V=($A,$B,$C,$D,$E,$F,$G,$H)=map("d$_",(16..23));
+
+sub NEON_00_15() {
+my $i=shift;
+my ($a,$b,$c,$d,$e,$f,$g,$h)=@_;
+my ($t0,$t1,$t2,$T1,$K,$Ch,$Maj)=map("d$_",(24..31)); # temps
+
+$code.=<<___ if ($i<16 || $i&1);
+ vshr.u64 $t0,$e,#@Sigma1[0] @ $i
+#if $i<16
+ vld1.64 {@X[$i%16]},[$inp]! @ handles unaligned
+#endif
+ vshr.u64 $t1,$e,#@Sigma1[1]
+#if $i>0
+ vadd.i64 $a,$Maj @ h+=Maj from the past
+#endif
+ vshr.u64 $t2,$e,#@Sigma1[2]
+___
+$code.=<<___;
+ vld1.64 {$K},[$Ktbl,:64]! @ K[i++]
+ vsli.64 $t0,$e,#`64-@Sigma1[0]`
+ vsli.64 $t1,$e,#`64-@Sigma1[1]`
+ vmov $Ch,$e
+ vsli.64 $t2,$e,#`64-@Sigma1[2]`
+#if $i<16 && defined(__ARMEL__)
+ vrev64.8 @X[$i],@X[$i]
+#endif
+ veor $t1,$t0
+ vbsl $Ch,$f,$g @ Ch(e,f,g)
+ vshr.u64 $t0,$a,#@Sigma0[0]
+ veor $t2,$t1 @ Sigma1(e)
+ vadd.i64 $T1,$Ch,$h
+ vshr.u64 $t1,$a,#@Sigma0[1]
+ vsli.64 $t0,$a,#`64-@Sigma0[0]`
+ vadd.i64 $T1,$t2
+ vshr.u64 $t2,$a,#@Sigma0[2]
+ vadd.i64 $K,@X[$i%16]
+ vsli.64 $t1,$a,#`64-@Sigma0[1]`
+ veor $Maj,$a,$b
+ vsli.64 $t2,$a,#`64-@Sigma0[2]`
+ veor $h,$t0,$t1
+ vadd.i64 $T1,$K
+ vbsl $Maj,$c,$b @ Maj(a,b,c)
+ veor $h,$t2 @ Sigma0(a)
+ vadd.i64 $d,$T1
+ vadd.i64 $Maj,$T1
+ @ vadd.i64 $h,$Maj
+___
+}
+
+sub NEON_16_79() {
+my $i=shift;
+
+if ($i&1) { &NEON_00_15($i,@_); return; }
+
+# 2x-vectorized, therefore runs every 2nd round
+my @X=map("q$_",(0..7)); # view @X as 128-bit vector
+my ($t0,$t1,$s0,$s1) = map("q$_",(12..15)); # temps
+my ($d0,$d1,$d2) = map("d$_",(24..26)); # temps from NEON_00_15
+my $e=@_[4]; # $e from NEON_00_15
+$i /= 2;
+$code.=<<___;
+ vshr.u64 $t0,@X[($i+7)%8],#@sigma1[0]
+ vshr.u64 $t1,@X[($i+7)%8],#@sigma1[1]
+ vadd.i64 @_[0],d30 @ h+=Maj from the past
+ vshr.u64 $s1,@X[($i+7)%8],#@sigma1[2]
+ vsli.64 $t0,@X[($i+7)%8],#`64-@sigma1[0]`
+ vext.8 $s0,@X[$i%8],@X[($i+1)%8],#8 @ X[i+1]
+ vsli.64 $t1,@X[($i+7)%8],#`64-@sigma1[1]`
+ veor $s1,$t0
+ vshr.u64 $t0,$s0,#@sigma0[0]
+ veor $s1,$t1 @ sigma1(X[i+14])
+ vshr.u64 $t1,$s0,#@sigma0[1]
+ vadd.i64 @X[$i%8],$s1
+ vshr.u64 $s1,$s0,#@sigma0[2]
+ vsli.64 $t0,$s0,#`64-@sigma0[0]`
+ vsli.64 $t1,$s0,#`64-@sigma0[1]`
+ vext.8 $s0,@X[($i+4)%8],@X[($i+5)%8],#8 @ X[i+9]
+ veor $s1,$t0
+ vshr.u64 $d0,$e,#@Sigma1[0] @ from NEON_00_15
+ vadd.i64 @X[$i%8],$s0
+ vshr.u64 $d1,$e,#@Sigma1[1] @ from NEON_00_15
+ veor $s1,$t1 @ sigma0(X[i+1])
+ vshr.u64 $d2,$e,#@Sigma1[2] @ from NEON_00_15
+ vadd.i64 @X[$i%8],$s1
+___
+ &NEON_00_15(2*$i,@_);
+}
+
+$code.=<<___;
+#if __ARM_MAX_ARCH__>=7
+.arch armv7-a
+.fpu neon
+
+.global sha512_block_data_order_neon
+.type sha512_block_data_order_neon,%function
+.align 4
+sha512_block_data_order_neon:
+.LNEON:
+ dmb @ errata #451034 on early Cortex A8
+ add $len,$inp,$len,lsl#7 @ len to point at the end of inp
+ VFP_ABI_PUSH
+ adrl $Ktbl,K512
+ vldmia $ctx,{$A-$H} @ load context
+.Loop_neon:
+___
+for($i=0;$i<16;$i++) { &NEON_00_15($i,@V); unshift(@V,pop(@V)); }
+$code.=<<___;
+ mov $cnt,#4
+.L16_79_neon:
+ subs $cnt,#1
+___
+for(;$i<32;$i++) { &NEON_16_79($i,@V); unshift(@V,pop(@V)); }
+$code.=<<___;
+ bne .L16_79_neon
+
+ vadd.i64 $A,d30 @ h+=Maj from the past
+ vldmia $ctx,{d24-d31} @ load context to temp
+ vadd.i64 q8,q12 @ vectorized accumulate
+ vadd.i64 q9,q13
+ vadd.i64 q10,q14
+ vadd.i64 q11,q15
+ vstmia $ctx,{$A-$H} @ save context
+ teq $inp,$len
+ sub $Ktbl,#640 @ rewind K512
+ bne .Loop_neon
+
+ VFP_ABI_POP
+ ret @ bx lr
+.size sha512_block_data_order_neon,.-sha512_block_data_order_neon
+#endif
+___
+}
+$code.=<<___;
+.asciz "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
+.align 2
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+.comm OPENSSL_armcap_P,4,4
+#endif
+___
+
+$code =~ s/\`([^\`]*)\`/eval $1/gem;
+$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4
+$code =~ s/\bret\b/bx lr/gm;
+
+open SELF,$0;
+while(<SELF>) {
+ next if (/^#!/);
+ last if (!s/^#/@/ and !/^$/);
+ print;
+}
+close SELF;
+
+print $code;
+close STDOUT; # enforce flush
diff --git a/arch/arm/crypto/sha512-armv7-neon.S b/arch/arm/crypto/sha512-armv7-neon.S
deleted file mode 100644
index fe99472e507c..000000000000
--- a/arch/arm/crypto/sha512-armv7-neon.S
+++ /dev/null
@@ -1,455 +0,0 @@
-/* sha512-armv7-neon.S - ARM/NEON assembly implementation of SHA-512 transform
- *
- * Copyright © 2013-2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- */
-
-#include <linux/linkage.h>
-
-
-.syntax unified
-.code 32
-.fpu neon
-
-.text
-
-/* structure of SHA512_CONTEXT */
-#define hd_a 0
-#define hd_b ((hd_a) + 8)
-#define hd_c ((hd_b) + 8)
-#define hd_d ((hd_c) + 8)
-#define hd_e ((hd_d) + 8)
-#define hd_f ((hd_e) + 8)
-#define hd_g ((hd_f) + 8)
-
-/* register macros */
-#define RK %r2
-
-#define RA d0
-#define RB d1
-#define RC d2
-#define RD d3
-#define RE d4
-#define RF d5
-#define RG d6
-#define RH d7
-
-#define RT0 d8
-#define RT1 d9
-#define RT2 d10
-#define RT3 d11
-#define RT4 d12
-#define RT5 d13
-#define RT6 d14
-#define RT7 d15
-
-#define RT01q q4
-#define RT23q q5
-#define RT45q q6
-#define RT67q q7
-
-#define RW0 d16
-#define RW1 d17
-#define RW2 d18
-#define RW3 d19
-#define RW4 d20
-#define RW5 d21
-#define RW6 d22
-#define RW7 d23
-#define RW8 d24
-#define RW9 d25
-#define RW10 d26
-#define RW11 d27
-#define RW12 d28
-#define RW13 d29
-#define RW14 d30
-#define RW15 d31
-
-#define RW01q q8
-#define RW23q q9
-#define RW45q q10
-#define RW67q q11
-#define RW89q q12
-#define RW1011q q13
-#define RW1213q q14
-#define RW1415q q15
-
-/***********************************************************************
- * ARM assembly implementation of sha512 transform
- ***********************************************************************/
-#define rounds2_0_63(ra, rb, rc, rd, re, rf, rg, rh, rw0, rw1, rw01q, rw2, \
- rw23q, rw1415q, rw9, rw10, interleave_op, arg1) \
- /* t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t]; */ \
- vshr.u64 RT2, re, #14; \
- vshl.u64 RT3, re, #64 - 14; \
- interleave_op(arg1); \
- vshr.u64 RT4, re, #18; \
- vshl.u64 RT5, re, #64 - 18; \
- vld1.64 {RT0}, [RK]!; \
- veor.64 RT23q, RT23q, RT45q; \
- vshr.u64 RT4, re, #41; \
- vshl.u64 RT5, re, #64 - 41; \
- vadd.u64 RT0, RT0, rw0; \
- veor.64 RT23q, RT23q, RT45q; \
- vmov.64 RT7, re; \
- veor.64 RT1, RT2, RT3; \
- vbsl.64 RT7, rf, rg; \
- \
- vadd.u64 RT1, RT1, rh; \
- vshr.u64 RT2, ra, #28; \
- vshl.u64 RT3, ra, #64 - 28; \
- vadd.u64 RT1, RT1, RT0; \
- vshr.u64 RT4, ra, #34; \
- vshl.u64 RT5, ra, #64 - 34; \
- vadd.u64 RT1, RT1, RT7; \
- \
- /* h = Sum0 (a) + Maj (a, b, c); */ \
- veor.64 RT23q, RT23q, RT45q; \
- vshr.u64 RT4, ra, #39; \
- vshl.u64 RT5, ra, #64 - 39; \
- veor.64 RT0, ra, rb; \
- veor.64 RT23q, RT23q, RT45q; \
- vbsl.64 RT0, rc, rb; \
- vadd.u64 rd, rd, RT1; /* d+=t1; */ \
- veor.64 rh, RT2, RT3; \
- \
- /* t1 = g + Sum1 (d) + Ch (d, e, f) + k[t] + w[t]; */ \
- vshr.u64 RT2, rd, #14; \
- vshl.u64 RT3, rd, #64 - 14; \
- vadd.u64 rh, rh, RT0; \
- vshr.u64 RT4, rd, #18; \
- vshl.u64 RT5, rd, #64 - 18; \
- vadd.u64 rh, rh, RT1; /* h+=t1; */ \
- vld1.64 {RT0}, [RK]!; \
- veor.64 RT23q, RT23q, RT45q; \
- vshr.u64 RT4, rd, #41; \
- vshl.u64 RT5, rd, #64 - 41; \
- vadd.u64 RT0, RT0, rw1; \
- veor.64 RT23q, RT23q, RT45q; \
- vmov.64 RT7, rd; \
- veor.64 RT1, RT2, RT3; \
- vbsl.64 RT7, re, rf; \
- \
- vadd.u64 RT1, RT1, rg; \
- vshr.u64 RT2, rh, #28; \
- vshl.u64 RT3, rh, #64 - 28; \
- vadd.u64 RT1, RT1, RT0; \
- vshr.u64 RT4, rh, #34; \
- vshl.u64 RT5, rh, #64 - 34; \
- vadd.u64 RT1, RT1, RT7; \
- \
- /* g = Sum0 (h) + Maj (h, a, b); */ \
- veor.64 RT23q, RT23q, RT45q; \
- vshr.u64 RT4, rh, #39; \
- vshl.u64 RT5, rh, #64 - 39; \
- veor.64 RT0, rh, ra; \
- veor.64 RT23q, RT23q, RT45q; \
- vbsl.64 RT0, rb, ra; \
- vadd.u64 rc, rc, RT1; /* c+=t1; */ \
- veor.64 rg, RT2, RT3; \
- \
- /* w[0] += S1 (w[14]) + w[9] + S0 (w[1]); */ \
- /* w[1] += S1 (w[15]) + w[10] + S0 (w[2]); */ \
- \
- /**** S0(w[1:2]) */ \
- \
- /* w[0:1] += w[9:10] */ \
- /* RT23q = rw1:rw2 */ \
- vext.u64 RT23q, rw01q, rw23q, #1; \
- vadd.u64 rw0, rw9; \
- vadd.u64 rg, rg, RT0; \
- vadd.u64 rw1, rw10;\
- vadd.u64 rg, rg, RT1; /* g+=t1; */ \
- \
- vshr.u64 RT45q, RT23q, #1; \
- vshl.u64 RT67q, RT23q, #64 - 1; \
- vshr.u64 RT01q, RT23q, #8; \
- veor.u64 RT45q, RT45q, RT67q; \
- vshl.u64 RT67q, RT23q, #64 - 8; \
- veor.u64 RT45q, RT45q, RT01q; \
- vshr.u64 RT01q, RT23q, #7; \
- veor.u64 RT45q, RT45q, RT67q; \
- \
- /**** S1(w[14:15]) */ \
- vshr.u64 RT23q, rw1415q, #6; \
- veor.u64 RT01q, RT01q, RT45q; \
- vshr.u64 RT45q, rw1415q, #19; \
- vshl.u64 RT67q, rw1415q, #64 - 19; \
- veor.u64 RT23q, RT23q, RT45q; \
- vshr.u64 RT45q, rw1415q, #61; \
- veor.u64 RT23q, RT23q, RT67q; \
- vshl.u64 RT67q, rw1415q, #64 - 61; \
- veor.u64 RT23q, RT23q, RT45q; \
- vadd.u64 rw01q, RT01q; /* w[0:1] += S(w[1:2]) */ \
- veor.u64 RT01q, RT23q, RT67q;
-#define vadd_RT01q(rw01q) \
- /* w[0:1] += S(w[14:15]) */ \
- vadd.u64 rw01q, RT01q;
-
-#define dummy(_) /*_*/
-
-#define rounds2_64_79(ra, rb, rc, rd, re, rf, rg, rh, rw0, rw1, \
- interleave_op1, arg1, interleave_op2, arg2) \
- /* t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t]; */ \
- vshr.u64 RT2, re, #14; \
- vshl.u64 RT3, re, #64 - 14; \
- interleave_op1(arg1); \
- vshr.u64 RT4, re, #18; \
- vshl.u64 RT5, re, #64 - 18; \
- interleave_op2(arg2); \
- vld1.64 {RT0}, [RK]!; \
- veor.64 RT23q, RT23q, RT45q; \
- vshr.u64 RT4, re, #41; \
- vshl.u64 RT5, re, #64 - 41; \
- vadd.u64 RT0, RT0, rw0; \
- veor.64 RT23q, RT23q, RT45q; \
- vmov.64 RT7, re; \
- veor.64 RT1, RT2, RT3; \
- vbsl.64 RT7, rf, rg; \
- \
- vadd.u64 RT1, RT1, rh; \
- vshr.u64 RT2, ra, #28; \
- vshl.u64 RT3, ra, #64 - 28; \
- vadd.u64 RT1, RT1, RT0; \
- vshr.u64 RT4, ra, #34; \
- vshl.u64 RT5, ra, #64 - 34; \
- vadd.u64 RT1, RT1, RT7; \
- \
- /* h = Sum0 (a) + Maj (a, b, c); */ \
- veor.64 RT23q, RT23q, RT45q; \
- vshr.u64 RT4, ra, #39; \
- vshl.u64 RT5, ra, #64 - 39; \
- veor.64 RT0, ra, rb; \
- veor.64 RT23q, RT23q, RT45q; \
- vbsl.64 RT0, rc, rb; \
- vadd.u64 rd, rd, RT1; /* d+=t1; */ \
- veor.64 rh, RT2, RT3; \
- \
- /* t1 = g + Sum1 (d) + Ch (d, e, f) + k[t] + w[t]; */ \
- vshr.u64 RT2, rd, #14; \
- vshl.u64 RT3, rd, #64 - 14; \
- vadd.u64 rh, rh, RT0; \
- vshr.u64 RT4, rd, #18; \
- vshl.u64 RT5, rd, #64 - 18; \
- vadd.u64 rh, rh, RT1; /* h+=t1; */ \
- vld1.64 {RT0}, [RK]!; \
- veor.64 RT23q, RT23q, RT45q; \
- vshr.u64 RT4, rd, #41; \
- vshl.u64 RT5, rd, #64 - 41; \
- vadd.u64 RT0, RT0, rw1; \
- veor.64 RT23q, RT23q, RT45q; \
- vmov.64 RT7, rd; \
- veor.64 RT1, RT2, RT3; \
- vbsl.64 RT7, re, rf; \
- \
- vadd.u64 RT1, RT1, rg; \
- vshr.u64 RT2, rh, #28; \
- vshl.u64 RT3, rh, #64 - 28; \
- vadd.u64 RT1, RT1, RT0; \
- vshr.u64 RT4, rh, #34; \
- vshl.u64 RT5, rh, #64 - 34; \
- vadd.u64 RT1, RT1, RT7; \
- \
- /* g = Sum0 (h) + Maj (h, a, b); */ \
- veor.64 RT23q, RT23q, RT45q; \
- vshr.u64 RT4, rh, #39; \
- vshl.u64 RT5, rh, #64 - 39; \
- veor.64 RT0, rh, ra; \
- veor.64 RT23q, RT23q, RT45q; \
- vbsl.64 RT0, rb, ra; \
- vadd.u64 rc, rc, RT1; /* c+=t1; */ \
- veor.64 rg, RT2, RT3;
-#define vadd_rg_RT0(rg) \
- vadd.u64 rg, rg, RT0;
-#define vadd_rg_RT1(rg) \
- vadd.u64 rg, rg, RT1; /* g+=t1; */
-
-.align 3
-ENTRY(sha512_transform_neon)
- /* Input:
- * %r0: SHA512_CONTEXT
- * %r1: data
- * %r2: u64 k[] constants
- * %r3: nblks
- */
- push {%lr};
-
- mov %lr, #0;
-
- /* Load context to d0-d7 */
- vld1.64 {RA-RD}, [%r0]!;
- vld1.64 {RE-RH}, [%r0];
- sub %r0, #(4*8);
-
- /* Load input to w[16], d16-d31 */
- /* NOTE: Assumes that on ARMv7 unaligned accesses are always allowed. */
- vld1.64 {RW0-RW3}, [%r1]!;
- vld1.64 {RW4-RW7}, [%r1]!;
- vld1.64 {RW8-RW11}, [%r1]!;
- vld1.64 {RW12-RW15}, [%r1]!;
-#ifdef __ARMEL__
- /* byteswap */
- vrev64.8 RW01q, RW01q;
- vrev64.8 RW23q, RW23q;
- vrev64.8 RW45q, RW45q;
- vrev64.8 RW67q, RW67q;
- vrev64.8 RW89q, RW89q;
- vrev64.8 RW1011q, RW1011q;
- vrev64.8 RW1213q, RW1213q;
- vrev64.8 RW1415q, RW1415q;
-#endif
-
- /* EABI says that d8-d15 must be preserved by callee. */
- /*vpush {RT0-RT7};*/
-
-.Loop:
- rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, RW01q, RW2,
- RW23q, RW1415q, RW9, RW10, dummy, _);
- b .Lenter_rounds;
-
-.Loop_rounds:
- rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, RW01q, RW2,
- RW23q, RW1415q, RW9, RW10, vadd_RT01q, RW1415q);
-.Lenter_rounds:
- rounds2_0_63(RG, RH, RA, RB, RC, RD, RE, RF, RW2, RW3, RW23q, RW4,
- RW45q, RW01q, RW11, RW12, vadd_RT01q, RW01q);
- rounds2_0_63(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5, RW45q, RW6,
- RW67q, RW23q, RW13, RW14, vadd_RT01q, RW23q);
- rounds2_0_63(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7, RW67q, RW8,
- RW89q, RW45q, RW15, RW0, vadd_RT01q, RW45q);
- rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9, RW89q, RW10,
- RW1011q, RW67q, RW1, RW2, vadd_RT01q, RW67q);
- rounds2_0_63(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11, RW1011q, RW12,
- RW1213q, RW89q, RW3, RW4, vadd_RT01q, RW89q);
- add %lr, #16;
- rounds2_0_63(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13, RW1213q, RW14,
- RW1415q, RW1011q, RW5, RW6, vadd_RT01q, RW1011q);
- cmp %lr, #64;
- rounds2_0_63(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15, RW1415q, RW0,
- RW01q, RW1213q, RW7, RW8, vadd_RT01q, RW1213q);
- bne .Loop_rounds;
-
- subs %r3, #1;
-
- rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1,
- vadd_RT01q, RW1415q, dummy, _);
- rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW2, RW3,
- vadd_rg_RT0, RG, vadd_rg_RT1, RG);
- beq .Lhandle_tail;
- vld1.64 {RW0-RW3}, [%r1]!;
- rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5,
- vadd_rg_RT0, RE, vadd_rg_RT1, RE);
- rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7,
- vadd_rg_RT0, RC, vadd_rg_RT1, RC);
-#ifdef __ARMEL__
- vrev64.8 RW01q, RW01q;
- vrev64.8 RW23q, RW23q;
-#endif
- vld1.64 {RW4-RW7}, [%r1]!;
- rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9,
- vadd_rg_RT0, RA, vadd_rg_RT1, RA);
- rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11,
- vadd_rg_RT0, RG, vadd_rg_RT1, RG);
-#ifdef __ARMEL__
- vrev64.8 RW45q, RW45q;
- vrev64.8 RW67q, RW67q;
-#endif
- vld1.64 {RW8-RW11}, [%r1]!;
- rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13,
- vadd_rg_RT0, RE, vadd_rg_RT1, RE);
- rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15,
- vadd_rg_RT0, RC, vadd_rg_RT1, RC);
-#ifdef __ARMEL__
- vrev64.8 RW89q, RW89q;
- vrev64.8 RW1011q, RW1011q;
-#endif
- vld1.64 {RW12-RW15}, [%r1]!;
- vadd_rg_RT0(RA);
- vadd_rg_RT1(RA);
-
- /* Load context */
- vld1.64 {RT0-RT3}, [%r0]!;
- vld1.64 {RT4-RT7}, [%r0];
- sub %r0, #(4*8);
-
-#ifdef __ARMEL__
- vrev64.8 RW1213q, RW1213q;
- vrev64.8 RW1415q, RW1415q;
-#endif
-
- vadd.u64 RA, RT0;
- vadd.u64 RB, RT1;
- vadd.u64 RC, RT2;
- vadd.u64 RD, RT3;
- vadd.u64 RE, RT4;
- vadd.u64 RF, RT5;
- vadd.u64 RG, RT6;
- vadd.u64 RH, RT7;
-
- /* Store the first half of context */
- vst1.64 {RA-RD}, [%r0]!;
- sub RK, $(8*80);
- vst1.64 {RE-RH}, [%r0]; /* Store the last half of context */
- mov %lr, #0;
- sub %r0, #(4*8);
-
- b .Loop;
-
-.Lhandle_tail:
- rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5,
- vadd_rg_RT0, RE, vadd_rg_RT1, RE);
- rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7,
- vadd_rg_RT0, RC, vadd_rg_RT1, RC);
- rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9,
- vadd_rg_RT0, RA, vadd_rg_RT1, RA);
- rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11,
- vadd_rg_RT0, RG, vadd_rg_RT1, RG);
- rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13,
- vadd_rg_RT0, RE, vadd_rg_RT1, RE);
- rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15,
- vadd_rg_RT0, RC, vadd_rg_RT1, RC);
-
- /* Load context to d16-d23 */
- vld1.64 {RW0-RW3}, [%r0]!;
- vadd_rg_RT0(RA);
- vld1.64 {RW4-RW7}, [%r0];
- vadd_rg_RT1(RA);
- sub %r0, #(4*8);
-
- vadd.u64 RA, RW0;
- vadd.u64 RB, RW1;
- vadd.u64 RC, RW2;
- vadd.u64 RD, RW3;
- vadd.u64 RE, RW4;
- vadd.u64 RF, RW5;
- vadd.u64 RG, RW6;
- vadd.u64 RH, RW7;
-
- /* Store the first half of context */
- vst1.64 {RA-RD}, [%r0]!;
-
- /* Clear used registers */
- /* d16-d31 */
- veor.u64 RW01q, RW01q;
- veor.u64 RW23q, RW23q;
- veor.u64 RW45q, RW45q;
- veor.u64 RW67q, RW67q;
- vst1.64 {RE-RH}, [%r0]; /* Store the last half of context */
- veor.u64 RW89q, RW89q;
- veor.u64 RW1011q, RW1011q;
- veor.u64 RW1213q, RW1213q;
- veor.u64 RW1415q, RW1415q;
- /* d8-d15 */
- /*vpop {RT0-RT7};*/
- /* d0-d7 (q0-q3) */
- veor.u64 %q0, %q0;
- veor.u64 %q1, %q1;
- veor.u64 %q2, %q2;
- veor.u64 %q3, %q3;
-
- pop {%pc};
-ENDPROC(sha512_transform_neon)
diff --git a/arch/arm/crypto/sha512-core.S_shipped b/arch/arm/crypto/sha512-core.S_shipped
new file mode 100644
index 000000000000..3694c4d4ca2b
--- /dev/null
+++ b/arch/arm/crypto/sha512-core.S_shipped
@@ -0,0 +1,1861 @@
+
+@ ====================================================================
+@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+@ project. The module is, however, dual licensed under OpenSSL and
+@ CRYPTOGAMS licenses depending on where you obtain it. For further
+@ details see http://www.openssl.org/~appro/cryptogams/.
+@
+@ Permission to use under GPL terms is granted.
+@ ====================================================================
+
+@ SHA512 block procedure for ARMv4. September 2007.
+
+@ This code is ~4.5 (four and a half) times faster than code generated
+@ by gcc 3.4 and it spends ~72 clock cycles per byte [on single-issue
+@ Xscale PXA250 core].
+@
+@ July 2010.
+@
+@ Rescheduling for dual-issue pipeline resulted in 6% improvement on
+@ Cortex A8 core and ~40 cycles per processed byte.
+
+@ February 2011.
+@
+@ Profiler-assisted and platform-specific optimization resulted in 7%
+@ improvement on Coxtex A8 core and ~38 cycles per byte.
+
+@ March 2011.
+@
+@ Add NEON implementation. On Cortex A8 it was measured to process
+@ one byte in 23.3 cycles or ~60% faster than integer-only code.
+
+@ August 2012.
+@
+@ Improve NEON performance by 12% on Snapdragon S4. In absolute
+@ terms it's 22.6 cycles per byte, which is disappointing result.
+@ Technical writers asserted that 3-way S4 pipeline can sustain
+@ multiple NEON instructions per cycle, but dual NEON issue could
+@ not be observed, see http://www.openssl.org/~appro/Snapdragon-S4.html
+@ for further details. On side note Cortex-A15 processes one byte in
+@ 16 cycles.
+
+@ Byte order [in]dependence. =========================================
+@
+@ Originally caller was expected to maintain specific *dword* order in
+@ h[0-7], namely with most significant dword at *lower* address, which
+@ was reflected in below two parameters as 0 and 4. Now caller is
+@ expected to maintain native byte order for whole 64-bit values.
+#ifndef __KERNEL__
+# include "arm_arch.h"
+# define VFP_ABI_PUSH vstmdb sp!,{d8-d15}
+# define VFP_ABI_POP vldmia sp!,{d8-d15}
+#else
+# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+# define __ARM_MAX_ARCH__ 7
+# define VFP_ABI_PUSH
+# define VFP_ABI_POP
+#endif
+
+#ifdef __ARMEL__
+# define LO 0
+# define HI 4
+# define WORD64(hi0,lo0,hi1,lo1) .word lo0,hi0, lo1,hi1
+#else
+# define HI 0
+# define LO 4
+# define WORD64(hi0,lo0,hi1,lo1) .word hi0,lo0, hi1,lo1
+#endif
+
+.text
+#if __ARM_ARCH__<7
+.code 32
+#else
+.syntax unified
+# ifdef __thumb2__
+# define adrl adr
+.thumb
+# else
+.code 32
+# endif
+#endif
+
+.type K512,%object
+.align 5
+K512:
+WORD64(0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd)
+WORD64(0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc)
+WORD64(0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019)
+WORD64(0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118)
+WORD64(0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe)
+WORD64(0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2)
+WORD64(0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1)
+WORD64(0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694)
+WORD64(0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3)
+WORD64(0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65)
+WORD64(0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483)
+WORD64(0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5)
+WORD64(0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210)
+WORD64(0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4)
+WORD64(0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725)
+WORD64(0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70)
+WORD64(0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926)
+WORD64(0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df)
+WORD64(0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8)
+WORD64(0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b)
+WORD64(0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001)
+WORD64(0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30)
+WORD64(0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910)
+WORD64(0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8)
+WORD64(0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53)
+WORD64(0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8)
+WORD64(0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb)
+WORD64(0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3)
+WORD64(0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60)
+WORD64(0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec)
+WORD64(0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9)
+WORD64(0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b)
+WORD64(0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207)
+WORD64(0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178)
+WORD64(0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6)
+WORD64(0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b)
+WORD64(0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493)
+WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c)
+WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a)
+WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
+.size K512,.-K512
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+.LOPENSSL_armcap:
+.word OPENSSL_armcap_P-sha512_block_data_order
+.skip 32-4
+#else
+.skip 32
+#endif
+
+.global sha512_block_data_order
+.type sha512_block_data_order,%function
+sha512_block_data_order:
+#if __ARM_ARCH__<7
+ sub r3,pc,#8 @ sha512_block_data_order
+#else
+ adr r3,sha512_block_data_order
+#endif
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+ ldr r12,.LOPENSSL_armcap
+ ldr r12,[r3,r12] @ OPENSSL_armcap_P
+ tst r12,#1
+ bne .LNEON
+#endif
+ add r2,r1,r2,lsl#7 @ len to point at the end of inp
+ stmdb sp!,{r4-r12,lr}
+ sub r14,r3,#672 @ K512
+ sub sp,sp,#9*8
+
+ ldr r7,[r0,#32+LO]
+ ldr r8,[r0,#32+HI]
+ ldr r9, [r0,#48+LO]
+ ldr r10, [r0,#48+HI]
+ ldr r11, [r0,#56+LO]
+ ldr r12, [r0,#56+HI]
+.Loop:
+ str r9, [sp,#48+0]
+ str r10, [sp,#48+4]
+ str r11, [sp,#56+0]
+ str r12, [sp,#56+4]
+ ldr r5,[r0,#0+LO]
+ ldr r6,[r0,#0+HI]
+ ldr r3,[r0,#8+LO]
+ ldr r4,[r0,#8+HI]
+ ldr r9, [r0,#16+LO]
+ ldr r10, [r0,#16+HI]
+ ldr r11, [r0,#24+LO]
+ ldr r12, [r0,#24+HI]
+ str r3,[sp,#8+0]
+ str r4,[sp,#8+4]
+ str r9, [sp,#16+0]
+ str r10, [sp,#16+4]
+ str r11, [sp,#24+0]
+ str r12, [sp,#24+4]
+ ldr r3,[r0,#40+LO]
+ ldr r4,[r0,#40+HI]
+ str r3,[sp,#40+0]
+ str r4,[sp,#40+4]
+
+.L00_15:
+#if __ARM_ARCH__<7
+ ldrb r3,[r1,#7]
+ ldrb r9, [r1,#6]
+ ldrb r10, [r1,#5]
+ ldrb r11, [r1,#4]
+ ldrb r4,[r1,#3]
+ ldrb r12, [r1,#2]
+ orr r3,r3,r9,lsl#8
+ ldrb r9, [r1,#1]
+ orr r3,r3,r10,lsl#16
+ ldrb r10, [r1],#8
+ orr r3,r3,r11,lsl#24
+ orr r4,r4,r12,lsl#8
+ orr r4,r4,r9,lsl#16
+ orr r4,r4,r10,lsl#24
+#else
+ ldr r3,[r1,#4]
+ ldr r4,[r1],#8
+#ifdef __ARMEL__
+ rev r3,r3
+ rev r4,r4
+#endif
+#endif
+ @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
+ @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
+ @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
+ mov r9,r7,lsr#14
+ str r3,[sp,#64+0]
+ mov r10,r8,lsr#14
+ str r4,[sp,#64+4]
+ eor r9,r9,r8,lsl#18
+ ldr r11,[sp,#56+0] @ h.lo
+ eor r10,r10,r7,lsl#18
+ ldr r12,[sp,#56+4] @ h.hi
+ eor r9,r9,r7,lsr#18
+ eor r10,r10,r8,lsr#18
+ eor r9,r9,r8,lsl#14
+ eor r10,r10,r7,lsl#14
+ eor r9,r9,r8,lsr#9
+ eor r10,r10,r7,lsr#9
+ eor r9,r9,r7,lsl#23
+ eor r10,r10,r8,lsl#23 @ Sigma1(e)
+ adds r3,r3,r9
+ ldr r9,[sp,#40+0] @ f.lo
+ adc r4,r4,r10 @ T += Sigma1(e)
+ ldr r10,[sp,#40+4] @ f.hi
+ adds r3,r3,r11
+ ldr r11,[sp,#48+0] @ g.lo
+ adc r4,r4,r12 @ T += h
+ ldr r12,[sp,#48+4] @ g.hi
+
+ eor r9,r9,r11
+ str r7,[sp,#32+0]
+ eor r10,r10,r12
+ str r8,[sp,#32+4]
+ and r9,r9,r7
+ str r5,[sp,#0+0]
+ and r10,r10,r8
+ str r6,[sp,#0+4]
+ eor r9,r9,r11
+ ldr r11,[r14,#LO] @ K[i].lo
+ eor r10,r10,r12 @ Ch(e,f,g)
+ ldr r12,[r14,#HI] @ K[i].hi
+
+ adds r3,r3,r9
+ ldr r7,[sp,#24+0] @ d.lo
+ adc r4,r4,r10 @ T += Ch(e,f,g)
+ ldr r8,[sp,#24+4] @ d.hi
+ adds r3,r3,r11
+ and r9,r11,#0xff
+ adc r4,r4,r12 @ T += K[i]
+ adds r7,r7,r3
+ ldr r11,[sp,#8+0] @ b.lo
+ adc r8,r8,r4 @ d += T
+ teq r9,#148
+
+ ldr r12,[sp,#16+0] @ c.lo
+#if __ARM_ARCH__>=7
+ it eq @ Thumb2 thing, sanity check in ARM
+#endif
+ orreq r14,r14,#1
+ @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
+ @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
+ @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
+ mov r9,r5,lsr#28
+ mov r10,r6,lsr#28
+ eor r9,r9,r6,lsl#4
+ eor r10,r10,r5,lsl#4
+ eor r9,r9,r6,lsr#2
+ eor r10,r10,r5,lsr#2
+ eor r9,r9,r5,lsl#30
+ eor r10,r10,r6,lsl#30
+ eor r9,r9,r6,lsr#7
+ eor r10,r10,r5,lsr#7
+ eor r9,r9,r5,lsl#25
+ eor r10,r10,r6,lsl#25 @ Sigma0(a)
+ adds r3,r3,r9
+ and r9,r5,r11
+ adc r4,r4,r10 @ T += Sigma0(a)
+
+ ldr r10,[sp,#8+4] @ b.hi
+ orr r5,r5,r11
+ ldr r11,[sp,#16+4] @ c.hi
+ and r5,r5,r12
+ and r12,r6,r10
+ orr r6,r6,r10
+ orr r5,r5,r9 @ Maj(a,b,c).lo
+ and r6,r6,r11
+ adds r5,r5,r3
+ orr r6,r6,r12 @ Maj(a,b,c).hi
+ sub sp,sp,#8
+ adc r6,r6,r4 @ h += T
+ tst r14,#1
+ add r14,r14,#8
+ tst r14,#1
+ beq .L00_15
+ ldr r9,[sp,#184+0]
+ ldr r10,[sp,#184+4]
+ bic r14,r14,#1
+.L16_79:
+ @ sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
+ @ LO lo>>1^hi<<31 ^ lo>>8^hi<<24 ^ lo>>7^hi<<25
+ @ HI hi>>1^lo<<31 ^ hi>>8^lo<<24 ^ hi>>7
+ mov r3,r9,lsr#1
+ ldr r11,[sp,#80+0]
+ mov r4,r10,lsr#1
+ ldr r12,[sp,#80+4]
+ eor r3,r3,r10,lsl#31
+ eor r4,r4,r9,lsl#31
+ eor r3,r3,r9,lsr#8
+ eor r4,r4,r10,lsr#8
+ eor r3,r3,r10,lsl#24
+ eor r4,r4,r9,lsl#24
+ eor r3,r3,r9,lsr#7
+ eor r4,r4,r10,lsr#7
+ eor r3,r3,r10,lsl#25
+
+ @ sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
+ @ LO lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26
+ @ HI hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6
+ mov r9,r11,lsr#19
+ mov r10,r12,lsr#19
+ eor r9,r9,r12,lsl#13
+ eor r10,r10,r11,lsl#13
+ eor r9,r9,r12,lsr#29
+ eor r10,r10,r11,lsr#29
+ eor r9,r9,r11,lsl#3
+ eor r10,r10,r12,lsl#3
+ eor r9,r9,r11,lsr#6
+ eor r10,r10,r12,lsr#6
+ ldr r11,[sp,#120+0]
+ eor r9,r9,r12,lsl#26
+
+ ldr r12,[sp,#120+4]
+ adds r3,r3,r9
+ ldr r9,[sp,#192+0]
+ adc r4,r4,r10
+
+ ldr r10,[sp,#192+4]
+ adds r3,r3,r11
+ adc r4,r4,r12
+ adds r3,r3,r9
+ adc r4,r4,r10
+ @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
+ @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
+ @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
+ mov r9,r7,lsr#14
+ str r3,[sp,#64+0]
+ mov r10,r8,lsr#14
+ str r4,[sp,#64+4]
+ eor r9,r9,r8,lsl#18
+ ldr r11,[sp,#56+0] @ h.lo
+ eor r10,r10,r7,lsl#18
+ ldr r12,[sp,#56+4] @ h.hi
+ eor r9,r9,r7,lsr#18
+ eor r10,r10,r8,lsr#18
+ eor r9,r9,r8,lsl#14
+ eor r10,r10,r7,lsl#14
+ eor r9,r9,r8,lsr#9
+ eor r10,r10,r7,lsr#9
+ eor r9,r9,r7,lsl#23
+ eor r10,r10,r8,lsl#23 @ Sigma1(e)
+ adds r3,r3,r9
+ ldr r9,[sp,#40+0] @ f.lo
+ adc r4,r4,r10 @ T += Sigma1(e)
+ ldr r10,[sp,#40+4] @ f.hi
+ adds r3,r3,r11
+ ldr r11,[sp,#48+0] @ g.lo
+ adc r4,r4,r12 @ T += h
+ ldr r12,[sp,#48+4] @ g.hi
+
+ eor r9,r9,r11
+ str r7,[sp,#32+0]
+ eor r10,r10,r12
+ str r8,[sp,#32+4]
+ and r9,r9,r7
+ str r5,[sp,#0+0]
+ and r10,r10,r8
+ str r6,[sp,#0+4]
+ eor r9,r9,r11
+ ldr r11,[r14,#LO] @ K[i].lo
+ eor r10,r10,r12 @ Ch(e,f,g)
+ ldr r12,[r14,#HI] @ K[i].hi
+
+ adds r3,r3,r9
+ ldr r7,[sp,#24+0] @ d.lo
+ adc r4,r4,r10 @ T += Ch(e,f,g)
+ ldr r8,[sp,#24+4] @ d.hi
+ adds r3,r3,r11
+ and r9,r11,#0xff
+ adc r4,r4,r12 @ T += K[i]
+ adds r7,r7,r3
+ ldr r11,[sp,#8+0] @ b.lo
+ adc r8,r8,r4 @ d += T
+ teq r9,#23
+
+ ldr r12,[sp,#16+0] @ c.lo
+#if __ARM_ARCH__>=7
+ it eq @ Thumb2 thing, sanity check in ARM
+#endif
+ orreq r14,r14,#1
+ @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
+ @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
+ @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
+ mov r9,r5,lsr#28
+ mov r10,r6,lsr#28
+ eor r9,r9,r6,lsl#4
+ eor r10,r10,r5,lsl#4
+ eor r9,r9,r6,lsr#2
+ eor r10,r10,r5,lsr#2
+ eor r9,r9,r5,lsl#30
+ eor r10,r10,r6,lsl#30
+ eor r9,r9,r6,lsr#7
+ eor r10,r10,r5,lsr#7
+ eor r9,r9,r5,lsl#25
+ eor r10,r10,r6,lsl#25 @ Sigma0(a)
+ adds r3,r3,r9
+ and r9,r5,r11
+ adc r4,r4,r10 @ T += Sigma0(a)
+
+ ldr r10,[sp,#8+4] @ b.hi
+ orr r5,r5,r11
+ ldr r11,[sp,#16+4] @ c.hi
+ and r5,r5,r12
+ and r12,r6,r10
+ orr r6,r6,r10
+ orr r5,r5,r9 @ Maj(a,b,c).lo
+ and r6,r6,r11
+ adds r5,r5,r3
+ orr r6,r6,r12 @ Maj(a,b,c).hi
+ sub sp,sp,#8
+ adc r6,r6,r4 @ h += T
+ tst r14,#1
+ add r14,r14,#8
+#if __ARM_ARCH__>=7
+ ittt eq @ Thumb2 thing, sanity check in ARM
+#endif
+ ldreq r9,[sp,#184+0]
+ ldreq r10,[sp,#184+4]
+ beq .L16_79
+ bic r14,r14,#1
+
+ ldr r3,[sp,#8+0]
+ ldr r4,[sp,#8+4]
+ ldr r9, [r0,#0+LO]
+ ldr r10, [r0,#0+HI]
+ ldr r11, [r0,#8+LO]
+ ldr r12, [r0,#8+HI]
+ adds r9,r5,r9
+ str r9, [r0,#0+LO]
+ adc r10,r6,r10
+ str r10, [r0,#0+HI]
+ adds r11,r3,r11
+ str r11, [r0,#8+LO]
+ adc r12,r4,r12
+ str r12, [r0,#8+HI]
+
+ ldr r5,[sp,#16+0]
+ ldr r6,[sp,#16+4]
+ ldr r3,[sp,#24+0]
+ ldr r4,[sp,#24+4]
+ ldr r9, [r0,#16+LO]
+ ldr r10, [r0,#16+HI]
+ ldr r11, [r0,#24+LO]
+ ldr r12, [r0,#24+HI]
+ adds r9,r5,r9
+ str r9, [r0,#16+LO]
+ adc r10,r6,r10
+ str r10, [r0,#16+HI]
+ adds r11,r3,r11
+ str r11, [r0,#24+LO]
+ adc r12,r4,r12
+ str r12, [r0,#24+HI]
+
+ ldr r3,[sp,#40+0]
+ ldr r4,[sp,#40+4]
+ ldr r9, [r0,#32+LO]
+ ldr r10, [r0,#32+HI]
+ ldr r11, [r0,#40+LO]
+ ldr r12, [r0,#40+HI]
+ adds r7,r7,r9
+ str r7,[r0,#32+LO]
+ adc r8,r8,r10
+ str r8,[r0,#32+HI]
+ adds r11,r3,r11
+ str r11, [r0,#40+LO]
+ adc r12,r4,r12
+ str r12, [r0,#40+HI]
+
+ ldr r5,[sp,#48+0]
+ ldr r6,[sp,#48+4]
+ ldr r3,[sp,#56+0]
+ ldr r4,[sp,#56+4]
+ ldr r9, [r0,#48+LO]
+ ldr r10, [r0,#48+HI]
+ ldr r11, [r0,#56+LO]
+ ldr r12, [r0,#56+HI]
+ adds r9,r5,r9
+ str r9, [r0,#48+LO]
+ adc r10,r6,r10
+ str r10, [r0,#48+HI]
+ adds r11,r3,r11
+ str r11, [r0,#56+LO]
+ adc r12,r4,r12
+ str r12, [r0,#56+HI]
+
+ add sp,sp,#640
+ sub r14,r14,#640
+
+ teq r1,r2
+ bne .Loop
+
+ add sp,sp,#8*9 @ destroy frame
+#if __ARM_ARCH__>=5
+ ldmia sp!,{r4-r12,pc}
+#else
+ ldmia sp!,{r4-r12,lr}
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ .word 0xe12fff1e @ interoperable with Thumb ISA:-)
+#endif
+.size sha512_block_data_order,.-sha512_block_data_order
+#if __ARM_MAX_ARCH__>=7
+.arch armv7-a
+.fpu neon
+
+.global sha512_block_data_order_neon
+.type sha512_block_data_order_neon,%function
+.align 4
+sha512_block_data_order_neon:
+.LNEON:
+ dmb @ errata #451034 on early Cortex A8
+ add r2,r1,r2,lsl#7 @ len to point at the end of inp
+ VFP_ABI_PUSH
+ adrl r3,K512
+ vldmia r0,{d16-d23} @ load context
+.Loop_neon:
+ vshr.u64 d24,d20,#14 @ 0
+#if 0<16
+ vld1.64 {d0},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d20,#18
+#if 0>0
+ vadd.i64 d16,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d20,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d20,#50
+ vsli.64 d25,d20,#46
+ vmov d29,d20
+ vsli.64 d26,d20,#23
+#if 0<16 && defined(__ARMEL__)
+ vrev64.8 d0,d0
+#endif
+ veor d25,d24
+ vbsl d29,d21,d22 @ Ch(e,f,g)
+ vshr.u64 d24,d16,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d23
+ vshr.u64 d25,d16,#34
+ vsli.64 d24,d16,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d16,#39
+ vadd.i64 d28,d0
+ vsli.64 d25,d16,#30
+ veor d30,d16,d17
+ vsli.64 d26,d16,#25
+ veor d23,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d18,d17 @ Maj(a,b,c)
+ veor d23,d26 @ Sigma0(a)
+ vadd.i64 d19,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d23,d30
+ vshr.u64 d24,d19,#14 @ 1
+#if 1<16
+ vld1.64 {d1},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d19,#18
+#if 1>0
+ vadd.i64 d23,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d19,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d19,#50
+ vsli.64 d25,d19,#46
+ vmov d29,d19
+ vsli.64 d26,d19,#23
+#if 1<16 && defined(__ARMEL__)
+ vrev64.8 d1,d1
+#endif
+ veor d25,d24
+ vbsl d29,d20,d21 @ Ch(e,f,g)
+ vshr.u64 d24,d23,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d22
+ vshr.u64 d25,d23,#34
+ vsli.64 d24,d23,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d23,#39
+ vadd.i64 d28,d1
+ vsli.64 d25,d23,#30
+ veor d30,d23,d16
+ vsli.64 d26,d23,#25
+ veor d22,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d17,d16 @ Maj(a,b,c)
+ veor d22,d26 @ Sigma0(a)
+ vadd.i64 d18,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d22,d30
+ vshr.u64 d24,d18,#14 @ 2
+#if 2<16
+ vld1.64 {d2},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d18,#18
+#if 2>0
+ vadd.i64 d22,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d18,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d18,#50
+ vsli.64 d25,d18,#46
+ vmov d29,d18
+ vsli.64 d26,d18,#23
+#if 2<16 && defined(__ARMEL__)
+ vrev64.8 d2,d2
+#endif
+ veor d25,d24
+ vbsl d29,d19,d20 @ Ch(e,f,g)
+ vshr.u64 d24,d22,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d21
+ vshr.u64 d25,d22,#34
+ vsli.64 d24,d22,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d22,#39
+ vadd.i64 d28,d2
+ vsli.64 d25,d22,#30
+ veor d30,d22,d23
+ vsli.64 d26,d22,#25
+ veor d21,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d16,d23 @ Maj(a,b,c)
+ veor d21,d26 @ Sigma0(a)
+ vadd.i64 d17,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d21,d30
+ vshr.u64 d24,d17,#14 @ 3
+#if 3<16
+ vld1.64 {d3},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d17,#18
+#if 3>0
+ vadd.i64 d21,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d17,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d17,#50
+ vsli.64 d25,d17,#46
+ vmov d29,d17
+ vsli.64 d26,d17,#23
+#if 3<16 && defined(__ARMEL__)
+ vrev64.8 d3,d3
+#endif
+ veor d25,d24
+ vbsl d29,d18,d19 @ Ch(e,f,g)
+ vshr.u64 d24,d21,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d20
+ vshr.u64 d25,d21,#34
+ vsli.64 d24,d21,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d21,#39
+ vadd.i64 d28,d3
+ vsli.64 d25,d21,#30
+ veor d30,d21,d22
+ vsli.64 d26,d21,#25
+ veor d20,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d23,d22 @ Maj(a,b,c)
+ veor d20,d26 @ Sigma0(a)
+ vadd.i64 d16,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d20,d30
+ vshr.u64 d24,d16,#14 @ 4
+#if 4<16
+ vld1.64 {d4},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d16,#18
+#if 4>0
+ vadd.i64 d20,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d16,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d16,#50
+ vsli.64 d25,d16,#46
+ vmov d29,d16
+ vsli.64 d26,d16,#23
+#if 4<16 && defined(__ARMEL__)
+ vrev64.8 d4,d4
+#endif
+ veor d25,d24
+ vbsl d29,d17,d18 @ Ch(e,f,g)
+ vshr.u64 d24,d20,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d19
+ vshr.u64 d25,d20,#34
+ vsli.64 d24,d20,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d20,#39
+ vadd.i64 d28,d4
+ vsli.64 d25,d20,#30
+ veor d30,d20,d21
+ vsli.64 d26,d20,#25
+ veor d19,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d22,d21 @ Maj(a,b,c)
+ veor d19,d26 @ Sigma0(a)
+ vadd.i64 d23,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d19,d30
+ vshr.u64 d24,d23,#14 @ 5
+#if 5<16
+ vld1.64 {d5},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d23,#18
+#if 5>0
+ vadd.i64 d19,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d23,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d23,#50
+ vsli.64 d25,d23,#46
+ vmov d29,d23
+ vsli.64 d26,d23,#23
+#if 5<16 && defined(__ARMEL__)
+ vrev64.8 d5,d5
+#endif
+ veor d25,d24
+ vbsl d29,d16,d17 @ Ch(e,f,g)
+ vshr.u64 d24,d19,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d18
+ vshr.u64 d25,d19,#34
+ vsli.64 d24,d19,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d19,#39
+ vadd.i64 d28,d5
+ vsli.64 d25,d19,#30
+ veor d30,d19,d20
+ vsli.64 d26,d19,#25
+ veor d18,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d21,d20 @ Maj(a,b,c)
+ veor d18,d26 @ Sigma0(a)
+ vadd.i64 d22,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d18,d30
+ vshr.u64 d24,d22,#14 @ 6
+#if 6<16
+ vld1.64 {d6},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d22,#18
+#if 6>0
+ vadd.i64 d18,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d22,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d22,#50
+ vsli.64 d25,d22,#46
+ vmov d29,d22
+ vsli.64 d26,d22,#23
+#if 6<16 && defined(__ARMEL__)
+ vrev64.8 d6,d6
+#endif
+ veor d25,d24
+ vbsl d29,d23,d16 @ Ch(e,f,g)
+ vshr.u64 d24,d18,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d17
+ vshr.u64 d25,d18,#34
+ vsli.64 d24,d18,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d18,#39
+ vadd.i64 d28,d6
+ vsli.64 d25,d18,#30
+ veor d30,d18,d19
+ vsli.64 d26,d18,#25
+ veor d17,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d20,d19 @ Maj(a,b,c)
+ veor d17,d26 @ Sigma0(a)
+ vadd.i64 d21,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d17,d30
+ vshr.u64 d24,d21,#14 @ 7
+#if 7<16
+ vld1.64 {d7},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d21,#18
+#if 7>0
+ vadd.i64 d17,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d21,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d21,#50
+ vsli.64 d25,d21,#46
+ vmov d29,d21
+ vsli.64 d26,d21,#23
+#if 7<16 && defined(__ARMEL__)
+ vrev64.8 d7,d7
+#endif
+ veor d25,d24
+ vbsl d29,d22,d23 @ Ch(e,f,g)
+ vshr.u64 d24,d17,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d16
+ vshr.u64 d25,d17,#34
+ vsli.64 d24,d17,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d17,#39
+ vadd.i64 d28,d7
+ vsli.64 d25,d17,#30
+ veor d30,d17,d18
+ vsli.64 d26,d17,#25
+ veor d16,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d19,d18 @ Maj(a,b,c)
+ veor d16,d26 @ Sigma0(a)
+ vadd.i64 d20,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d16,d30
+ vshr.u64 d24,d20,#14 @ 8
+#if 8<16
+ vld1.64 {d8},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d20,#18
+#if 8>0
+ vadd.i64 d16,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d20,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d20,#50
+ vsli.64 d25,d20,#46
+ vmov d29,d20
+ vsli.64 d26,d20,#23
+#if 8<16 && defined(__ARMEL__)
+ vrev64.8 d8,d8
+#endif
+ veor d25,d24
+ vbsl d29,d21,d22 @ Ch(e,f,g)
+ vshr.u64 d24,d16,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d23
+ vshr.u64 d25,d16,#34
+ vsli.64 d24,d16,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d16,#39
+ vadd.i64 d28,d8
+ vsli.64 d25,d16,#30
+ veor d30,d16,d17
+ vsli.64 d26,d16,#25
+ veor d23,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d18,d17 @ Maj(a,b,c)
+ veor d23,d26 @ Sigma0(a)
+ vadd.i64 d19,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d23,d30
+ vshr.u64 d24,d19,#14 @ 9
+#if 9<16
+ vld1.64 {d9},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d19,#18
+#if 9>0
+ vadd.i64 d23,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d19,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d19,#50
+ vsli.64 d25,d19,#46
+ vmov d29,d19
+ vsli.64 d26,d19,#23
+#if 9<16 && defined(__ARMEL__)
+ vrev64.8 d9,d9
+#endif
+ veor d25,d24
+ vbsl d29,d20,d21 @ Ch(e,f,g)
+ vshr.u64 d24,d23,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d22
+ vshr.u64 d25,d23,#34
+ vsli.64 d24,d23,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d23,#39
+ vadd.i64 d28,d9
+ vsli.64 d25,d23,#30
+ veor d30,d23,d16
+ vsli.64 d26,d23,#25
+ veor d22,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d17,d16 @ Maj(a,b,c)
+ veor d22,d26 @ Sigma0(a)
+ vadd.i64 d18,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d22,d30
+ vshr.u64 d24,d18,#14 @ 10
+#if 10<16
+ vld1.64 {d10},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d18,#18
+#if 10>0
+ vadd.i64 d22,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d18,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d18,#50
+ vsli.64 d25,d18,#46
+ vmov d29,d18
+ vsli.64 d26,d18,#23
+#if 10<16 && defined(__ARMEL__)
+ vrev64.8 d10,d10
+#endif
+ veor d25,d24
+ vbsl d29,d19,d20 @ Ch(e,f,g)
+ vshr.u64 d24,d22,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d21
+ vshr.u64 d25,d22,#34
+ vsli.64 d24,d22,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d22,#39
+ vadd.i64 d28,d10
+ vsli.64 d25,d22,#30
+ veor d30,d22,d23
+ vsli.64 d26,d22,#25
+ veor d21,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d16,d23 @ Maj(a,b,c)
+ veor d21,d26 @ Sigma0(a)
+ vadd.i64 d17,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d21,d30
+ vshr.u64 d24,d17,#14 @ 11
+#if 11<16
+ vld1.64 {d11},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d17,#18
+#if 11>0
+ vadd.i64 d21,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d17,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d17,#50
+ vsli.64 d25,d17,#46
+ vmov d29,d17
+ vsli.64 d26,d17,#23
+#if 11<16 && defined(__ARMEL__)
+ vrev64.8 d11,d11
+#endif
+ veor d25,d24
+ vbsl d29,d18,d19 @ Ch(e,f,g)
+ vshr.u64 d24,d21,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d20
+ vshr.u64 d25,d21,#34
+ vsli.64 d24,d21,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d21,#39
+ vadd.i64 d28,d11
+ vsli.64 d25,d21,#30
+ veor d30,d21,d22
+ vsli.64 d26,d21,#25
+ veor d20,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d23,d22 @ Maj(a,b,c)
+ veor d20,d26 @ Sigma0(a)
+ vadd.i64 d16,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d20,d30
+ vshr.u64 d24,d16,#14 @ 12
+#if 12<16
+ vld1.64 {d12},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d16,#18
+#if 12>0
+ vadd.i64 d20,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d16,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d16,#50
+ vsli.64 d25,d16,#46
+ vmov d29,d16
+ vsli.64 d26,d16,#23
+#if 12<16 && defined(__ARMEL__)
+ vrev64.8 d12,d12
+#endif
+ veor d25,d24
+ vbsl d29,d17,d18 @ Ch(e,f,g)
+ vshr.u64 d24,d20,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d19
+ vshr.u64 d25,d20,#34
+ vsli.64 d24,d20,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d20,#39
+ vadd.i64 d28,d12
+ vsli.64 d25,d20,#30
+ veor d30,d20,d21
+ vsli.64 d26,d20,#25
+ veor d19,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d22,d21 @ Maj(a,b,c)
+ veor d19,d26 @ Sigma0(a)
+ vadd.i64 d23,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d19,d30
+ vshr.u64 d24,d23,#14 @ 13
+#if 13<16
+ vld1.64 {d13},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d23,#18
+#if 13>0
+ vadd.i64 d19,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d23,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d23,#50
+ vsli.64 d25,d23,#46
+ vmov d29,d23
+ vsli.64 d26,d23,#23
+#if 13<16 && defined(__ARMEL__)
+ vrev64.8 d13,d13
+#endif
+ veor d25,d24
+ vbsl d29,d16,d17 @ Ch(e,f,g)
+ vshr.u64 d24,d19,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d18
+ vshr.u64 d25,d19,#34
+ vsli.64 d24,d19,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d19,#39
+ vadd.i64 d28,d13
+ vsli.64 d25,d19,#30
+ veor d30,d19,d20
+ vsli.64 d26,d19,#25
+ veor d18,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d21,d20 @ Maj(a,b,c)
+ veor d18,d26 @ Sigma0(a)
+ vadd.i64 d22,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d18,d30
+ vshr.u64 d24,d22,#14 @ 14
+#if 14<16
+ vld1.64 {d14},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d22,#18
+#if 14>0
+ vadd.i64 d18,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d22,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d22,#50
+ vsli.64 d25,d22,#46
+ vmov d29,d22
+ vsli.64 d26,d22,#23
+#if 14<16 && defined(__ARMEL__)
+ vrev64.8 d14,d14
+#endif
+ veor d25,d24
+ vbsl d29,d23,d16 @ Ch(e,f,g)
+ vshr.u64 d24,d18,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d17
+ vshr.u64 d25,d18,#34
+ vsli.64 d24,d18,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d18,#39
+ vadd.i64 d28,d14
+ vsli.64 d25,d18,#30
+ veor d30,d18,d19
+ vsli.64 d26,d18,#25
+ veor d17,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d20,d19 @ Maj(a,b,c)
+ veor d17,d26 @ Sigma0(a)
+ vadd.i64 d21,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d17,d30
+ vshr.u64 d24,d21,#14 @ 15
+#if 15<16
+ vld1.64 {d15},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d21,#18
+#if 15>0
+ vadd.i64 d17,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d21,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d21,#50
+ vsli.64 d25,d21,#46
+ vmov d29,d21
+ vsli.64 d26,d21,#23
+#if 15<16 && defined(__ARMEL__)
+ vrev64.8 d15,d15
+#endif
+ veor d25,d24
+ vbsl d29,d22,d23 @ Ch(e,f,g)
+ vshr.u64 d24,d17,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d16
+ vshr.u64 d25,d17,#34
+ vsli.64 d24,d17,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d17,#39
+ vadd.i64 d28,d15
+ vsli.64 d25,d17,#30
+ veor d30,d17,d18
+ vsli.64 d26,d17,#25
+ veor d16,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d19,d18 @ Maj(a,b,c)
+ veor d16,d26 @ Sigma0(a)
+ vadd.i64 d20,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d16,d30
+ mov r12,#4
+.L16_79_neon:
+ subs r12,#1
+ vshr.u64 q12,q7,#19
+ vshr.u64 q13,q7,#61
+ vadd.i64 d16,d30 @ h+=Maj from the past
+ vshr.u64 q15,q7,#6
+ vsli.64 q12,q7,#45
+ vext.8 q14,q0,q1,#8 @ X[i+1]
+ vsli.64 q13,q7,#3
+ veor q15,q12
+ vshr.u64 q12,q14,#1
+ veor q15,q13 @ sigma1(X[i+14])
+ vshr.u64 q13,q14,#8
+ vadd.i64 q0,q15
+ vshr.u64 q15,q14,#7
+ vsli.64 q12,q14,#63
+ vsli.64 q13,q14,#56
+ vext.8 q14,q4,q5,#8 @ X[i+9]
+ veor q15,q12
+ vshr.u64 d24,d20,#14 @ from NEON_00_15
+ vadd.i64 q0,q14
+ vshr.u64 d25,d20,#18 @ from NEON_00_15
+ veor q15,q13 @ sigma0(X[i+1])
+ vshr.u64 d26,d20,#41 @ from NEON_00_15
+ vadd.i64 q0,q15
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d20,#50
+ vsli.64 d25,d20,#46
+ vmov d29,d20
+ vsli.64 d26,d20,#23
+#if 16<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ veor d25,d24
+ vbsl d29,d21,d22 @ Ch(e,f,g)
+ vshr.u64 d24,d16,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d23
+ vshr.u64 d25,d16,#34
+ vsli.64 d24,d16,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d16,#39
+ vadd.i64 d28,d0
+ vsli.64 d25,d16,#30
+ veor d30,d16,d17
+ vsli.64 d26,d16,#25
+ veor d23,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d18,d17 @ Maj(a,b,c)
+ veor d23,d26 @ Sigma0(a)
+ vadd.i64 d19,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d23,d30
+ vshr.u64 d24,d19,#14 @ 17
+#if 17<16
+ vld1.64 {d1},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d19,#18
+#if 17>0
+ vadd.i64 d23,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d19,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d19,#50
+ vsli.64 d25,d19,#46
+ vmov d29,d19
+ vsli.64 d26,d19,#23
+#if 17<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ veor d25,d24
+ vbsl d29,d20,d21 @ Ch(e,f,g)
+ vshr.u64 d24,d23,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d22
+ vshr.u64 d25,d23,#34
+ vsli.64 d24,d23,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d23,#39
+ vadd.i64 d28,d1
+ vsli.64 d25,d23,#30
+ veor d30,d23,d16
+ vsli.64 d26,d23,#25
+ veor d22,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d17,d16 @ Maj(a,b,c)
+ veor d22,d26 @ Sigma0(a)
+ vadd.i64 d18,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d22,d30
+ vshr.u64 q12,q0,#19
+ vshr.u64 q13,q0,#61
+ vadd.i64 d22,d30 @ h+=Maj from the past
+ vshr.u64 q15,q0,#6
+ vsli.64 q12,q0,#45
+ vext.8 q14,q1,q2,#8 @ X[i+1]
+ vsli.64 q13,q0,#3
+ veor q15,q12
+ vshr.u64 q12,q14,#1
+ veor q15,q13 @ sigma1(X[i+14])
+ vshr.u64 q13,q14,#8
+ vadd.i64 q1,q15
+ vshr.u64 q15,q14,#7
+ vsli.64 q12,q14,#63
+ vsli.64 q13,q14,#56
+ vext.8 q14,q5,q6,#8 @ X[i+9]
+ veor q15,q12
+ vshr.u64 d24,d18,#14 @ from NEON_00_15
+ vadd.i64 q1,q14
+ vshr.u64 d25,d18,#18 @ from NEON_00_15
+ veor q15,q13 @ sigma0(X[i+1])
+ vshr.u64 d26,d18,#41 @ from NEON_00_15
+ vadd.i64 q1,q15
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d18,#50
+ vsli.64 d25,d18,#46
+ vmov d29,d18
+ vsli.64 d26,d18,#23
+#if 18<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ veor d25,d24
+ vbsl d29,d19,d20 @ Ch(e,f,g)
+ vshr.u64 d24,d22,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d21
+ vshr.u64 d25,d22,#34
+ vsli.64 d24,d22,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d22,#39
+ vadd.i64 d28,d2
+ vsli.64 d25,d22,#30
+ veor d30,d22,d23
+ vsli.64 d26,d22,#25
+ veor d21,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d16,d23 @ Maj(a,b,c)
+ veor d21,d26 @ Sigma0(a)
+ vadd.i64 d17,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d21,d30
+ vshr.u64 d24,d17,#14 @ 19
+#if 19<16
+ vld1.64 {d3},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d17,#18
+#if 19>0
+ vadd.i64 d21,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d17,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d17,#50
+ vsli.64 d25,d17,#46
+ vmov d29,d17
+ vsli.64 d26,d17,#23
+#if 19<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ veor d25,d24
+ vbsl d29,d18,d19 @ Ch(e,f,g)
+ vshr.u64 d24,d21,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d20
+ vshr.u64 d25,d21,#34
+ vsli.64 d24,d21,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d21,#39
+ vadd.i64 d28,d3
+ vsli.64 d25,d21,#30
+ veor d30,d21,d22
+ vsli.64 d26,d21,#25
+ veor d20,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d23,d22 @ Maj(a,b,c)
+ veor d20,d26 @ Sigma0(a)
+ vadd.i64 d16,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d20,d30
+ vshr.u64 q12,q1,#19
+ vshr.u64 q13,q1,#61
+ vadd.i64 d20,d30 @ h+=Maj from the past
+ vshr.u64 q15,q1,#6
+ vsli.64 q12,q1,#45
+ vext.8 q14,q2,q3,#8 @ X[i+1]
+ vsli.64 q13,q1,#3
+ veor q15,q12
+ vshr.u64 q12,q14,#1
+ veor q15,q13 @ sigma1(X[i+14])
+ vshr.u64 q13,q14,#8
+ vadd.i64 q2,q15
+ vshr.u64 q15,q14,#7
+ vsli.64 q12,q14,#63
+ vsli.64 q13,q14,#56
+ vext.8 q14,q6,q7,#8 @ X[i+9]
+ veor q15,q12
+ vshr.u64 d24,d16,#14 @ from NEON_00_15
+ vadd.i64 q2,q14
+ vshr.u64 d25,d16,#18 @ from NEON_00_15
+ veor q15,q13 @ sigma0(X[i+1])
+ vshr.u64 d26,d16,#41 @ from NEON_00_15
+ vadd.i64 q2,q15
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d16,#50
+ vsli.64 d25,d16,#46
+ vmov d29,d16
+ vsli.64 d26,d16,#23
+#if 20<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ veor d25,d24
+ vbsl d29,d17,d18 @ Ch(e,f,g)
+ vshr.u64 d24,d20,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d19
+ vshr.u64 d25,d20,#34
+ vsli.64 d24,d20,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d20,#39
+ vadd.i64 d28,d4
+ vsli.64 d25,d20,#30
+ veor d30,d20,d21
+ vsli.64 d26,d20,#25
+ veor d19,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d22,d21 @ Maj(a,b,c)
+ veor d19,d26 @ Sigma0(a)
+ vadd.i64 d23,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d19,d30
+ vshr.u64 d24,d23,#14 @ 21
+#if 21<16
+ vld1.64 {d5},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d23,#18
+#if 21>0
+ vadd.i64 d19,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d23,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d23,#50
+ vsli.64 d25,d23,#46
+ vmov d29,d23
+ vsli.64 d26,d23,#23
+#if 21<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ veor d25,d24
+ vbsl d29,d16,d17 @ Ch(e,f,g)
+ vshr.u64 d24,d19,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d18
+ vshr.u64 d25,d19,#34
+ vsli.64 d24,d19,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d19,#39
+ vadd.i64 d28,d5
+ vsli.64 d25,d19,#30
+ veor d30,d19,d20
+ vsli.64 d26,d19,#25
+ veor d18,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d21,d20 @ Maj(a,b,c)
+ veor d18,d26 @ Sigma0(a)
+ vadd.i64 d22,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d18,d30
+ vshr.u64 q12,q2,#19
+ vshr.u64 q13,q2,#61
+ vadd.i64 d18,d30 @ h+=Maj from the past
+ vshr.u64 q15,q2,#6
+ vsli.64 q12,q2,#45
+ vext.8 q14,q3,q4,#8 @ X[i+1]
+ vsli.64 q13,q2,#3
+ veor q15,q12
+ vshr.u64 q12,q14,#1
+ veor q15,q13 @ sigma1(X[i+14])
+ vshr.u64 q13,q14,#8
+ vadd.i64 q3,q15
+ vshr.u64 q15,q14,#7
+ vsli.64 q12,q14,#63
+ vsli.64 q13,q14,#56
+ vext.8 q14,q7,q0,#8 @ X[i+9]
+ veor q15,q12
+ vshr.u64 d24,d22,#14 @ from NEON_00_15
+ vadd.i64 q3,q14
+ vshr.u64 d25,d22,#18 @ from NEON_00_15
+ veor q15,q13 @ sigma0(X[i+1])
+ vshr.u64 d26,d22,#41 @ from NEON_00_15
+ vadd.i64 q3,q15
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d22,#50
+ vsli.64 d25,d22,#46
+ vmov d29,d22
+ vsli.64 d26,d22,#23
+#if 22<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ veor d25,d24
+ vbsl d29,d23,d16 @ Ch(e,f,g)
+ vshr.u64 d24,d18,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d17
+ vshr.u64 d25,d18,#34
+ vsli.64 d24,d18,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d18,#39
+ vadd.i64 d28,d6
+ vsli.64 d25,d18,#30
+ veor d30,d18,d19
+ vsli.64 d26,d18,#25
+ veor d17,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d20,d19 @ Maj(a,b,c)
+ veor d17,d26 @ Sigma0(a)
+ vadd.i64 d21,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d17,d30
+ vshr.u64 d24,d21,#14 @ 23
+#if 23<16
+ vld1.64 {d7},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d21,#18
+#if 23>0
+ vadd.i64 d17,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d21,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d21,#50
+ vsli.64 d25,d21,#46
+ vmov d29,d21
+ vsli.64 d26,d21,#23
+#if 23<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ veor d25,d24
+ vbsl d29,d22,d23 @ Ch(e,f,g)
+ vshr.u64 d24,d17,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d16
+ vshr.u64 d25,d17,#34
+ vsli.64 d24,d17,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d17,#39
+ vadd.i64 d28,d7
+ vsli.64 d25,d17,#30
+ veor d30,d17,d18
+ vsli.64 d26,d17,#25
+ veor d16,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d19,d18 @ Maj(a,b,c)
+ veor d16,d26 @ Sigma0(a)
+ vadd.i64 d20,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d16,d30
+ vshr.u64 q12,q3,#19
+ vshr.u64 q13,q3,#61
+ vadd.i64 d16,d30 @ h+=Maj from the past
+ vshr.u64 q15,q3,#6
+ vsli.64 q12,q3,#45
+ vext.8 q14,q4,q5,#8 @ X[i+1]
+ vsli.64 q13,q3,#3
+ veor q15,q12
+ vshr.u64 q12,q14,#1
+ veor q15,q13 @ sigma1(X[i+14])
+ vshr.u64 q13,q14,#8
+ vadd.i64 q4,q15
+ vshr.u64 q15,q14,#7
+ vsli.64 q12,q14,#63
+ vsli.64 q13,q14,#56
+ vext.8 q14,q0,q1,#8 @ X[i+9]
+ veor q15,q12
+ vshr.u64 d24,d20,#14 @ from NEON_00_15
+ vadd.i64 q4,q14
+ vshr.u64 d25,d20,#18 @ from NEON_00_15
+ veor q15,q13 @ sigma0(X[i+1])
+ vshr.u64 d26,d20,#41 @ from NEON_00_15
+ vadd.i64 q4,q15
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d20,#50
+ vsli.64 d25,d20,#46
+ vmov d29,d20
+ vsli.64 d26,d20,#23
+#if 24<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ veor d25,d24
+ vbsl d29,d21,d22 @ Ch(e,f,g)
+ vshr.u64 d24,d16,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d23
+ vshr.u64 d25,d16,#34
+ vsli.64 d24,d16,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d16,#39
+ vadd.i64 d28,d8
+ vsli.64 d25,d16,#30
+ veor d30,d16,d17
+ vsli.64 d26,d16,#25
+ veor d23,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d18,d17 @ Maj(a,b,c)
+ veor d23,d26 @ Sigma0(a)
+ vadd.i64 d19,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d23,d30
+ vshr.u64 d24,d19,#14 @ 25
+#if 25<16
+ vld1.64 {d9},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d19,#18
+#if 25>0
+ vadd.i64 d23,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d19,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d19,#50
+ vsli.64 d25,d19,#46
+ vmov d29,d19
+ vsli.64 d26,d19,#23
+#if 25<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ veor d25,d24
+ vbsl d29,d20,d21 @ Ch(e,f,g)
+ vshr.u64 d24,d23,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d22
+ vshr.u64 d25,d23,#34
+ vsli.64 d24,d23,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d23,#39
+ vadd.i64 d28,d9
+ vsli.64 d25,d23,#30
+ veor d30,d23,d16
+ vsli.64 d26,d23,#25
+ veor d22,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d17,d16 @ Maj(a,b,c)
+ veor d22,d26 @ Sigma0(a)
+ vadd.i64 d18,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d22,d30
+ vshr.u64 q12,q4,#19
+ vshr.u64 q13,q4,#61
+ vadd.i64 d22,d30 @ h+=Maj from the past
+ vshr.u64 q15,q4,#6
+ vsli.64 q12,q4,#45
+ vext.8 q14,q5,q6,#8 @ X[i+1]
+ vsli.64 q13,q4,#3
+ veor q15,q12
+ vshr.u64 q12,q14,#1
+ veor q15,q13 @ sigma1(X[i+14])
+ vshr.u64 q13,q14,#8
+ vadd.i64 q5,q15
+ vshr.u64 q15,q14,#7
+ vsli.64 q12,q14,#63
+ vsli.64 q13,q14,#56
+ vext.8 q14,q1,q2,#8 @ X[i+9]
+ veor q15,q12
+ vshr.u64 d24,d18,#14 @ from NEON_00_15
+ vadd.i64 q5,q14
+ vshr.u64 d25,d18,#18 @ from NEON_00_15
+ veor q15,q13 @ sigma0(X[i+1])
+ vshr.u64 d26,d18,#41 @ from NEON_00_15
+ vadd.i64 q5,q15
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d18,#50
+ vsli.64 d25,d18,#46
+ vmov d29,d18
+ vsli.64 d26,d18,#23
+#if 26<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ veor d25,d24
+ vbsl d29,d19,d20 @ Ch(e,f,g)
+ vshr.u64 d24,d22,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d21
+ vshr.u64 d25,d22,#34
+ vsli.64 d24,d22,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d22,#39
+ vadd.i64 d28,d10
+ vsli.64 d25,d22,#30
+ veor d30,d22,d23
+ vsli.64 d26,d22,#25
+ veor d21,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d16,d23 @ Maj(a,b,c)
+ veor d21,d26 @ Sigma0(a)
+ vadd.i64 d17,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d21,d30
+ vshr.u64 d24,d17,#14 @ 27
+#if 27<16
+ vld1.64 {d11},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d17,#18
+#if 27>0
+ vadd.i64 d21,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d17,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d17,#50
+ vsli.64 d25,d17,#46
+ vmov d29,d17
+ vsli.64 d26,d17,#23
+#if 27<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ veor d25,d24
+ vbsl d29,d18,d19 @ Ch(e,f,g)
+ vshr.u64 d24,d21,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d20
+ vshr.u64 d25,d21,#34
+ vsli.64 d24,d21,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d21,#39
+ vadd.i64 d28,d11
+ vsli.64 d25,d21,#30
+ veor d30,d21,d22
+ vsli.64 d26,d21,#25
+ veor d20,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d23,d22 @ Maj(a,b,c)
+ veor d20,d26 @ Sigma0(a)
+ vadd.i64 d16,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d20,d30
+ vshr.u64 q12,q5,#19
+ vshr.u64 q13,q5,#61
+ vadd.i64 d20,d30 @ h+=Maj from the past
+ vshr.u64 q15,q5,#6
+ vsli.64 q12,q5,#45
+ vext.8 q14,q6,q7,#8 @ X[i+1]
+ vsli.64 q13,q5,#3
+ veor q15,q12
+ vshr.u64 q12,q14,#1
+ veor q15,q13 @ sigma1(X[i+14])
+ vshr.u64 q13,q14,#8
+ vadd.i64 q6,q15
+ vshr.u64 q15,q14,#7
+ vsli.64 q12,q14,#63
+ vsli.64 q13,q14,#56
+ vext.8 q14,q2,q3,#8 @ X[i+9]
+ veor q15,q12
+ vshr.u64 d24,d16,#14 @ from NEON_00_15
+ vadd.i64 q6,q14
+ vshr.u64 d25,d16,#18 @ from NEON_00_15
+ veor q15,q13 @ sigma0(X[i+1])
+ vshr.u64 d26,d16,#41 @ from NEON_00_15
+ vadd.i64 q6,q15
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d16,#50
+ vsli.64 d25,d16,#46
+ vmov d29,d16
+ vsli.64 d26,d16,#23
+#if 28<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ veor d25,d24
+ vbsl d29,d17,d18 @ Ch(e,f,g)
+ vshr.u64 d24,d20,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d19
+ vshr.u64 d25,d20,#34
+ vsli.64 d24,d20,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d20,#39
+ vadd.i64 d28,d12
+ vsli.64 d25,d20,#30
+ veor d30,d20,d21
+ vsli.64 d26,d20,#25
+ veor d19,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d22,d21 @ Maj(a,b,c)
+ veor d19,d26 @ Sigma0(a)
+ vadd.i64 d23,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d19,d30
+ vshr.u64 d24,d23,#14 @ 29
+#if 29<16
+ vld1.64 {d13},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d23,#18
+#if 29>0
+ vadd.i64 d19,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d23,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d23,#50
+ vsli.64 d25,d23,#46
+ vmov d29,d23
+ vsli.64 d26,d23,#23
+#if 29<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ veor d25,d24
+ vbsl d29,d16,d17 @ Ch(e,f,g)
+ vshr.u64 d24,d19,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d18
+ vshr.u64 d25,d19,#34
+ vsli.64 d24,d19,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d19,#39
+ vadd.i64 d28,d13
+ vsli.64 d25,d19,#30
+ veor d30,d19,d20
+ vsli.64 d26,d19,#25
+ veor d18,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d21,d20 @ Maj(a,b,c)
+ veor d18,d26 @ Sigma0(a)
+ vadd.i64 d22,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d18,d30
+ vshr.u64 q12,q6,#19
+ vshr.u64 q13,q6,#61
+ vadd.i64 d18,d30 @ h+=Maj from the past
+ vshr.u64 q15,q6,#6
+ vsli.64 q12,q6,#45
+ vext.8 q14,q7,q0,#8 @ X[i+1]
+ vsli.64 q13,q6,#3
+ veor q15,q12
+ vshr.u64 q12,q14,#1
+ veor q15,q13 @ sigma1(X[i+14])
+ vshr.u64 q13,q14,#8
+ vadd.i64 q7,q15
+ vshr.u64 q15,q14,#7
+ vsli.64 q12,q14,#63
+ vsli.64 q13,q14,#56
+ vext.8 q14,q3,q4,#8 @ X[i+9]
+ veor q15,q12
+ vshr.u64 d24,d22,#14 @ from NEON_00_15
+ vadd.i64 q7,q14
+ vshr.u64 d25,d22,#18 @ from NEON_00_15
+ veor q15,q13 @ sigma0(X[i+1])
+ vshr.u64 d26,d22,#41 @ from NEON_00_15
+ vadd.i64 q7,q15
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d22,#50
+ vsli.64 d25,d22,#46
+ vmov d29,d22
+ vsli.64 d26,d22,#23
+#if 30<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ veor d25,d24
+ vbsl d29,d23,d16 @ Ch(e,f,g)
+ vshr.u64 d24,d18,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d17
+ vshr.u64 d25,d18,#34
+ vsli.64 d24,d18,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d18,#39
+ vadd.i64 d28,d14
+ vsli.64 d25,d18,#30
+ veor d30,d18,d19
+ vsli.64 d26,d18,#25
+ veor d17,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d20,d19 @ Maj(a,b,c)
+ veor d17,d26 @ Sigma0(a)
+ vadd.i64 d21,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d17,d30
+ vshr.u64 d24,d21,#14 @ 31
+#if 31<16
+ vld1.64 {d15},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d21,#18
+#if 31>0
+ vadd.i64 d17,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d21,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d21,#50
+ vsli.64 d25,d21,#46
+ vmov d29,d21
+ vsli.64 d26,d21,#23
+#if 31<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ veor d25,d24
+ vbsl d29,d22,d23 @ Ch(e,f,g)
+ vshr.u64 d24,d17,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d16
+ vshr.u64 d25,d17,#34
+ vsli.64 d24,d17,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d17,#39
+ vadd.i64 d28,d15
+ vsli.64 d25,d17,#30
+ veor d30,d17,d18
+ vsli.64 d26,d17,#25
+ veor d16,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d19,d18 @ Maj(a,b,c)
+ veor d16,d26 @ Sigma0(a)
+ vadd.i64 d20,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d16,d30
+ bne .L16_79_neon
+
+ vadd.i64 d16,d30 @ h+=Maj from the past
+ vldmia r0,{d24-d31} @ load context to temp
+ vadd.i64 q8,q12 @ vectorized accumulate
+ vadd.i64 q9,q13
+ vadd.i64 q10,q14
+ vadd.i64 q11,q15
+ vstmia r0,{d16-d23} @ save context
+ teq r1,r2
+ sub r3,#640 @ rewind K512
+ bne .Loop_neon
+
+ VFP_ABI_POP
+ bx lr @ .word 0xe12fff1e
+.size sha512_block_data_order_neon,.-sha512_block_data_order_neon
+#endif
+.asciz "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro@openssl.org>"
+.align 2
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+.comm OPENSSL_armcap_P,4,4
+#endif
diff --git a/arch/arm/crypto/sha512-glue.c b/arch/arm/crypto/sha512-glue.c
new file mode 100644
index 000000000000..269a394e4a53
--- /dev/null
+++ b/arch/arm/crypto/sha512-glue.c
@@ -0,0 +1,121 @@
+/*
+ * sha512-glue.c - accelerated SHA-384/512 for ARM
+ *
+ * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <crypto/internal/hash.h>
+#include <crypto/sha.h>
+#include <crypto/sha512_base.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+
+#include "sha512.h"
+
+MODULE_DESCRIPTION("Accelerated SHA-384/SHA-512 secure hash for ARM");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+MODULE_ALIAS_CRYPTO("sha384");
+MODULE_ALIAS_CRYPTO("sha512");
+MODULE_ALIAS_CRYPTO("sha384-arm");
+MODULE_ALIAS_CRYPTO("sha512-arm");
+
+asmlinkage void sha512_block_data_order(u64 *state, u8 const *src, int blocks);
+
+int sha512_arm_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ return sha512_base_do_update(desc, data, len,
+ (sha512_block_fn *)sha512_block_data_order);
+}
+
+int sha512_arm_final(struct shash_desc *desc, u8 *out)
+{
+ sha512_base_do_finalize(desc,
+ (sha512_block_fn *)sha512_block_data_order);
+ return sha512_base_finish(desc, out);
+}
+
+int sha512_arm_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
+{
+ sha512_base_do_update(desc, data, len,
+ (sha512_block_fn *)sha512_block_data_order);
+ return sha512_arm_final(desc, out);
+}
+
+static struct shash_alg sha512_arm_algs[] = { {
+ .init = sha384_base_init,
+ .update = sha512_arm_update,
+ .final = sha512_arm_final,
+ .finup = sha512_arm_finup,
+ .descsize = sizeof(struct sha512_state),
+ .digestsize = SHA384_DIGEST_SIZE,
+ .base = {
+ .cra_name = "sha384",
+ .cra_driver_name = "sha384-arm",
+ .cra_priority = 250,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA512_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+}, {
+ .init = sha512_base_init,
+ .update = sha512_arm_update,
+ .final = sha512_arm_final,
+ .finup = sha512_arm_finup,
+ .descsize = sizeof(struct sha512_state),
+ .digestsize = SHA512_DIGEST_SIZE,
+ .base = {
+ .cra_name = "sha512",
+ .cra_driver_name = "sha512-arm",
+ .cra_priority = 250,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA512_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+} };
+
+static int __init sha512_arm_mod_init(void)
+{
+ int err;
+
+ err = crypto_register_shashes(sha512_arm_algs,
+ ARRAY_SIZE(sha512_arm_algs));
+ if (err)
+ return err;
+
+ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon()) {
+ err = crypto_register_shashes(sha512_neon_algs,
+ ARRAY_SIZE(sha512_neon_algs));
+ if (err)
+ goto err_unregister;
+ }
+ return 0;
+
+err_unregister:
+ crypto_unregister_shashes(sha512_arm_algs,
+ ARRAY_SIZE(sha512_arm_algs));
+
+ return err;
+}
+
+static void __exit sha512_arm_mod_fini(void)
+{
+ crypto_unregister_shashes(sha512_arm_algs,
+ ARRAY_SIZE(sha512_arm_algs));
+ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon())
+ crypto_unregister_shashes(sha512_neon_algs,
+ ARRAY_SIZE(sha512_neon_algs));
+}
+
+module_init(sha512_arm_mod_init);
+module_exit(sha512_arm_mod_fini);
diff --git a/arch/arm/crypto/sha512-neon-glue.c b/arch/arm/crypto/sha512-neon-glue.c
new file mode 100644
index 000000000000..32693684a3ab
--- /dev/null
+++ b/arch/arm/crypto/sha512-neon-glue.c
@@ -0,0 +1,98 @@
+/*
+ * sha512-neon-glue.c - accelerated SHA-384/512 for ARM NEON
+ *
+ * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <crypto/internal/hash.h>
+#include <crypto/sha.h>
+#include <crypto/sha512_base.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+#include <asm/simd.h>
+#include <asm/neon.h>
+
+#include "sha512.h"
+
+MODULE_ALIAS_CRYPTO("sha384-neon");
+MODULE_ALIAS_CRYPTO("sha512-neon");
+
+asmlinkage void sha512_block_data_order_neon(u64 *state, u8 const *src,
+ int blocks);
+
+static int sha512_neon_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ struct sha512_state *sctx = shash_desc_ctx(desc);
+
+ if (!may_use_simd() ||
+ (sctx->count[0] % SHA512_BLOCK_SIZE) + len < SHA512_BLOCK_SIZE)
+ return sha512_arm_update(desc, data, len);
+
+ kernel_neon_begin();
+ sha512_base_do_update(desc, data, len,
+ (sha512_block_fn *)sha512_block_data_order_neon);
+ kernel_neon_end();
+
+ return 0;
+}
+
+static int sha512_neon_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
+{
+ if (!may_use_simd())
+ return sha512_arm_finup(desc, data, len, out);
+
+ kernel_neon_begin();
+ if (len)
+ sha512_base_do_update(desc, data, len,
+ (sha512_block_fn *)sha512_block_data_order_neon);
+ sha512_base_do_finalize(desc,
+ (sha512_block_fn *)sha512_block_data_order_neon);
+ kernel_neon_end();
+
+ return sha512_base_finish(desc, out);
+}
+
+static int sha512_neon_final(struct shash_desc *desc, u8 *out)
+{
+ return sha512_neon_finup(desc, NULL, 0, out);
+}
+
+struct shash_alg sha512_neon_algs[] = { {
+ .init = sha384_base_init,
+ .update = sha512_neon_update,
+ .final = sha512_neon_final,
+ .finup = sha512_neon_finup,
+ .descsize = sizeof(struct sha512_state),
+ .digestsize = SHA384_DIGEST_SIZE,
+ .base = {
+ .cra_name = "sha384",
+ .cra_driver_name = "sha384-neon",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA384_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+
+ }
+}, {
+ .init = sha512_base_init,
+ .update = sha512_neon_update,
+ .final = sha512_neon_final,
+ .finup = sha512_neon_finup,
+ .descsize = sizeof(struct sha512_state),
+ .digestsize = SHA512_DIGEST_SIZE,
+ .base = {
+ .cra_name = "sha512",
+ .cra_driver_name = "sha512-neon",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA512_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+} };
diff --git a/arch/arm/crypto/sha512.h b/arch/arm/crypto/sha512.h
new file mode 100644
index 000000000000..a75d9a82988a
--- /dev/null
+++ b/arch/arm/crypto/sha512.h
@@ -0,0 +1,8 @@
+
+int sha512_arm_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len);
+
+int sha512_arm_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out);
+
+extern struct shash_alg sha512_neon_algs[2];
diff --git a/arch/arm/crypto/sha512_neon_glue.c b/arch/arm/crypto/sha512_neon_glue.c
deleted file mode 100644
index b124dce838d6..000000000000
--- a/arch/arm/crypto/sha512_neon_glue.c
+++ /dev/null
@@ -1,305 +0,0 @@
-/*
- * Glue code for the SHA512 Secure Hash Algorithm assembly implementation
- * using NEON instructions.
- *
- * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
- *
- * This file is based on sha512_ssse3_glue.c:
- * Copyright (C) 2013 Intel Corporation
- * Author: Tim Chen <tim.c.chen@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- */
-
-#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/cryptohash.h>
-#include <linux/types.h>
-#include <linux/string.h>
-#include <crypto/sha.h>
-#include <asm/byteorder.h>
-#include <asm/simd.h>
-#include <asm/neon.h>
-
-
-static const u64 sha512_k[] = {
- 0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL,
- 0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL,
- 0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,
- 0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL,
- 0xd807aa98a3030242ULL, 0x12835b0145706fbeULL,
- 0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL,
- 0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL,
- 0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL,
- 0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL,
- 0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL,
- 0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL,
- 0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL,
- 0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL,
- 0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL,
- 0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL,
- 0x06ca6351e003826fULL, 0x142929670a0e6e70ULL,
- 0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL,
- 0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL,
- 0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL,
- 0x81c2c92e47edaee6ULL, 0x92722c851482353bULL,
- 0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL,
- 0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL,
- 0xd192e819d6ef5218ULL, 0xd69906245565a910ULL,
- 0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL,
- 0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL,
- 0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL,
- 0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL,
- 0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL,
- 0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL,
- 0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL,
- 0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL,
- 0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL,
- 0xca273eceea26619cULL, 0xd186b8c721c0c207ULL,
- 0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL,
- 0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL,
- 0x113f9804bef90daeULL, 0x1b710b35131c471bULL,
- 0x28db77f523047d84ULL, 0x32caab7b40c72493ULL,
- 0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL,
- 0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL,
- 0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL
-};
-
-
-asmlinkage void sha512_transform_neon(u64 *digest, const void *data,
- const u64 k[], unsigned int num_blks);
-
-
-static int sha512_neon_init(struct shash_desc *desc)
-{
- struct sha512_state *sctx = shash_desc_ctx(desc);
-
- sctx->state[0] = SHA512_H0;
- sctx->state[1] = SHA512_H1;
- sctx->state[2] = SHA512_H2;
- sctx->state[3] = SHA512_H3;
- sctx->state[4] = SHA512_H4;
- sctx->state[5] = SHA512_H5;
- sctx->state[6] = SHA512_H6;
- sctx->state[7] = SHA512_H7;
- sctx->count[0] = sctx->count[1] = 0;
-
- return 0;
-}
-
-static int __sha512_neon_update(struct shash_desc *desc, const u8 *data,
- unsigned int len, unsigned int partial)
-{
- struct sha512_state *sctx = shash_desc_ctx(desc);
- unsigned int done = 0;
-
- sctx->count[0] += len;
- if (sctx->count[0] < len)
- sctx->count[1]++;
-
- if (partial) {
- done = SHA512_BLOCK_SIZE - partial;
- memcpy(sctx->buf + partial, data, done);
- sha512_transform_neon(sctx->state, sctx->buf, sha512_k, 1);
- }
-
- if (len - done >= SHA512_BLOCK_SIZE) {
- const unsigned int rounds = (len - done) / SHA512_BLOCK_SIZE;
-
- sha512_transform_neon(sctx->state, data + done, sha512_k,
- rounds);
-
- done += rounds * SHA512_BLOCK_SIZE;
- }
-
- memcpy(sctx->buf, data + done, len - done);
-
- return 0;
-}
-
-static int sha512_neon_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
-{
- struct sha512_state *sctx = shash_desc_ctx(desc);
- unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE;
- int res;
-
- /* Handle the fast case right here */
- if (partial + len < SHA512_BLOCK_SIZE) {
- sctx->count[0] += len;
- if (sctx->count[0] < len)
- sctx->count[1]++;
- memcpy(sctx->buf + partial, data, len);
-
- return 0;
- }
-
- if (!may_use_simd()) {
- res = crypto_sha512_update(desc, data, len);
- } else {
- kernel_neon_begin();
- res = __sha512_neon_update(desc, data, len, partial);
- kernel_neon_end();
- }
-
- return res;
-}
-
-
-/* Add padding and return the message digest. */
-static int sha512_neon_final(struct shash_desc *desc, u8 *out)
-{
- struct sha512_state *sctx = shash_desc_ctx(desc);
- unsigned int i, index, padlen;
- __be64 *dst = (__be64 *)out;
- __be64 bits[2];
- static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, };
-
- /* save number of bits */
- bits[1] = cpu_to_be64(sctx->count[0] << 3);
- bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61);
-
- /* Pad out to 112 mod 128 and append length */
- index = sctx->count[0] & 0x7f;
- padlen = (index < 112) ? (112 - index) : ((128+112) - index);
-
- if (!may_use_simd()) {
- crypto_sha512_update(desc, padding, padlen);
- crypto_sha512_update(desc, (const u8 *)&bits, sizeof(bits));
- } else {
- kernel_neon_begin();
- /* We need to fill a whole block for __sha512_neon_update() */
- if (padlen <= 112) {
- sctx->count[0] += padlen;
- if (sctx->count[0] < padlen)
- sctx->count[1]++;
- memcpy(sctx->buf + index, padding, padlen);
- } else {
- __sha512_neon_update(desc, padding, padlen, index);
- }
- __sha512_neon_update(desc, (const u8 *)&bits,
- sizeof(bits), 112);
- kernel_neon_end();
- }
-
- /* Store state in digest */
- for (i = 0; i < 8; i++)
- dst[i] = cpu_to_be64(sctx->state[i]);
-
- /* Wipe context */
- memset(sctx, 0, sizeof(*sctx));
-
- return 0;
-}
-
-static int sha512_neon_export(struct shash_desc *desc, void *out)
-{
- struct sha512_state *sctx = shash_desc_ctx(desc);
-
- memcpy(out, sctx, sizeof(*sctx));
-
- return 0;
-}
-
-static int sha512_neon_import(struct shash_desc *desc, const void *in)
-{
- struct sha512_state *sctx = shash_desc_ctx(desc);
-
- memcpy(sctx, in, sizeof(*sctx));
-
- return 0;
-}
-
-static int sha384_neon_init(struct shash_desc *desc)
-{
- struct sha512_state *sctx = shash_desc_ctx(desc);
-
- sctx->state[0] = SHA384_H0;
- sctx->state[1] = SHA384_H1;
- sctx->state[2] = SHA384_H2;
- sctx->state[3] = SHA384_H3;
- sctx->state[4] = SHA384_H4;
- sctx->state[5] = SHA384_H5;
- sctx->state[6] = SHA384_H6;
- sctx->state[7] = SHA384_H7;
-
- sctx->count[0] = sctx->count[1] = 0;
-
- return 0;
-}
-
-static int sha384_neon_final(struct shash_desc *desc, u8 *hash)
-{
- u8 D[SHA512_DIGEST_SIZE];
-
- sha512_neon_final(desc, D);
-
- memcpy(hash, D, SHA384_DIGEST_SIZE);
- memzero_explicit(D, SHA512_DIGEST_SIZE);
-
- return 0;
-}
-
-static struct shash_alg algs[] = { {
- .digestsize = SHA512_DIGEST_SIZE,
- .init = sha512_neon_init,
- .update = sha512_neon_update,
- .final = sha512_neon_final,
- .export = sha512_neon_export,
- .import = sha512_neon_import,
- .descsize = sizeof(struct sha512_state),
- .statesize = sizeof(struct sha512_state),
- .base = {
- .cra_name = "sha512",
- .cra_driver_name = "sha512-neon",
- .cra_priority = 250,
- .cra_flags = CRYPTO_ALG_TYPE_SHASH,
- .cra_blocksize = SHA512_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-}, {
- .digestsize = SHA384_DIGEST_SIZE,
- .init = sha384_neon_init,
- .update = sha512_neon_update,
- .final = sha384_neon_final,
- .export = sha512_neon_export,
- .import = sha512_neon_import,
- .descsize = sizeof(struct sha512_state),
- .statesize = sizeof(struct sha512_state),
- .base = {
- .cra_name = "sha384",
- .cra_driver_name = "sha384-neon",
- .cra_priority = 250,
- .cra_flags = CRYPTO_ALG_TYPE_SHASH,
- .cra_blocksize = SHA384_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-} };
-
-static int __init sha512_neon_mod_init(void)
-{
- if (!cpu_has_neon())
- return -ENODEV;
-
- return crypto_register_shashes(algs, ARRAY_SIZE(algs));
-}
-
-static void __exit sha512_neon_mod_fini(void)
-{
- crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
-}
-
-module_init(sha512_neon_mod_init);
-module_exit(sha512_neon_mod_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, NEON accelerated");
-
-MODULE_ALIAS_CRYPTO("sha512");
-MODULE_ALIAS_CRYPTO("sha384");
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index 3c4596d0ce6c..83c50193626c 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -20,7 +20,6 @@ generic-y += poll.h
generic-y += preempt.h
generic-y += resource.h
generic-y += rwsem.h
-generic-y += scatterlist.h
generic-y += seccomp.h
generic-y += sections.h
generic-y += segment.h
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 186270b3e194..4abe57279c66 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -178,6 +178,21 @@
.endm
/*
+ * Assembly version of "adr rd, BSYM(sym)". This should only be used to
+ * reference local symbols in the same assembly file which are to be
+ * resolved by the assembler. Other usage is undefined.
+ */
+ .irp c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo
+ .macro badr\c, rd, sym
+#ifdef CONFIG_THUMB2_KERNEL
+ adr\c \rd, \sym + 1
+#else
+ adr\c \rd, \sym
+#endif
+ .endm
+ .endr
+
+/*
* Get current thread_info.
*/
.macro get_thread_info, rd
@@ -326,7 +341,7 @@
THUMB( orr \reg , \reg , #PSR_T_BIT )
bne 1f
orr \reg, \reg, #PSR_A_BIT
- adr lr, BSYM(2f)
+ badr lr, 2f
msr spsr_cxsf, \reg
__MSR_ELR_HYP(14)
__ERET
diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h
index d2f81e6b8c1c..6c2327e1c732 100644
--- a/arch/arm/include/asm/barrier.h
+++ b/arch/arm/include/asm/barrier.h
@@ -81,7 +81,7 @@ do { \
#define read_barrier_depends() do { } while(0)
#define smp_read_barrier_depends() do { } while(0)
-#define set_mb(var, value) do { var = value; smp_mb(); } while (0)
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
#define smp_mb__before_atomic() smp_mb()
#define smp_mb__after_atomic() smp_mb()
diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index 2d46862e7bef..4812cda8fd17 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -482,10 +482,17 @@ static inline void __sync_cache_range_r(volatile void *p, size_t size)
: : : "r0","r1","r2","r3","r4","r5","r6","r7", \
"r9","r10","lr","memory" )
+#ifdef CONFIG_MMU
int set_memory_ro(unsigned long addr, int numpages);
int set_memory_rw(unsigned long addr, int numpages);
int set_memory_x(unsigned long addr, int numpages);
int set_memory_nx(unsigned long addr, int numpages);
+#else
+static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; }
+static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; }
+static inline int set_memory_x(unsigned long addr, int numpages) { return 0; }
+static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
+#endif
#ifdef CONFIG_DEBUG_RODATA
void mark_rodata_ro(void);
diff --git a/arch/arm/include/asm/cmpxchg.h b/arch/arm/include/asm/cmpxchg.h
index abb2c3769b01..1692a05d3207 100644
--- a/arch/arm/include/asm/cmpxchg.h
+++ b/arch/arm/include/asm/cmpxchg.h
@@ -94,6 +94,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
break;
#endif
default:
+ /* Cause a link-time error, the xchg() size is not supported */
__bad_xchg(ptr, size), ret = 0;
break;
}
@@ -102,8 +103,10 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
return ret;
}
-#define xchg(ptr,x) \
- ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr))))
+#define xchg(ptr, x) ({ \
+ (__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), \
+ sizeof(*(ptr))); \
+})
#include <asm-generic/cmpxchg-local.h>
@@ -118,14 +121,16 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
* cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make
* them available.
*/
-#define cmpxchg_local(ptr, o, n) \
- ((__typeof__(*(ptr)))__cmpxchg_local_generic((ptr), (unsigned long)(o),\
- (unsigned long)(n), sizeof(*(ptr))))
+#define cmpxchg_local(ptr, o, n) ({ \
+ (__typeof(*ptr))__cmpxchg_local_generic((ptr), \
+ (unsigned long)(o), \
+ (unsigned long)(n), \
+ sizeof(*(ptr))); \
+})
+
#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
-#ifndef CONFIG_SMP
#include <asm-generic/cmpxchg.h>
-#endif
#else /* min ARCH >= ARMv6 */
@@ -201,11 +206,12 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
return ret;
}
-#define cmpxchg(ptr,o,n) \
- ((__typeof__(*(ptr)))__cmpxchg_mb((ptr), \
- (unsigned long)(o), \
- (unsigned long)(n), \
- sizeof(*(ptr))))
+#define cmpxchg(ptr,o,n) ({ \
+ (__typeof__(*(ptr)))__cmpxchg_mb((ptr), \
+ (unsigned long)(o), \
+ (unsigned long)(n), \
+ sizeof(*(ptr))); \
+})
static inline unsigned long __cmpxchg_local(volatile void *ptr,
unsigned long old,
@@ -227,6 +233,13 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr,
return ret;
}
+#define cmpxchg_local(ptr, o, n) ({ \
+ (__typeof(*ptr))__cmpxchg_local((ptr), \
+ (unsigned long)(o), \
+ (unsigned long)(n), \
+ sizeof(*(ptr))); \
+})
+
static inline unsigned long long __cmpxchg64(unsigned long long *ptr,
unsigned long long old,
unsigned long long new)
@@ -252,6 +265,14 @@ static inline unsigned long long __cmpxchg64(unsigned long long *ptr,
return oldval;
}
+#define cmpxchg64_relaxed(ptr, o, n) ({ \
+ (__typeof__(*(ptr)))__cmpxchg64((ptr), \
+ (unsigned long long)(o), \
+ (unsigned long long)(n)); \
+})
+
+#define cmpxchg64_local(ptr, o, n) cmpxchg64_relaxed((ptr), (o), (n))
+
static inline unsigned long long __cmpxchg64_mb(unsigned long long *ptr,
unsigned long long old,
unsigned long long new)
@@ -265,23 +286,11 @@ static inline unsigned long long __cmpxchg64_mb(unsigned long long *ptr,
return ret;
}
-#define cmpxchg_local(ptr,o,n) \
- ((__typeof__(*(ptr)))__cmpxchg_local((ptr), \
- (unsigned long)(o), \
- (unsigned long)(n), \
- sizeof(*(ptr))))
-
-#define cmpxchg64(ptr, o, n) \
- ((__typeof__(*(ptr)))__cmpxchg64_mb((ptr), \
- (unsigned long long)(o), \
- (unsigned long long)(n)))
-
-#define cmpxchg64_relaxed(ptr, o, n) \
- ((__typeof__(*(ptr)))__cmpxchg64((ptr), \
- (unsigned long long)(o), \
- (unsigned long long)(n)))
-
-#define cmpxchg64_local(ptr, o, n) cmpxchg64_relaxed((ptr), (o), (n))
+#define cmpxchg64(ptr, o, n) ({ \
+ (__typeof__(*(ptr)))__cmpxchg64_mb((ptr), \
+ (unsigned long long)(o), \
+ (unsigned long long)(n)); \
+})
#endif /* __LINUX_ARM_ARCH__ >= 6 */
diff --git a/arch/arm/include/asm/dma.h b/arch/arm/include/asm/dma.h
index 99084431d6ae..bb4fa67da541 100644
--- a/arch/arm/include/asm/dma.h
+++ b/arch/arm/include/asm/dma.h
@@ -19,7 +19,7 @@
* It should not be re-used except for that purpose.
*/
#include <linux/spinlock.h>
-#include <asm/scatterlist.h>
+#include <linux/scatterlist.h>
#include <mach/isa-dma.h>
diff --git a/arch/arm/include/asm/edac.h b/arch/arm/include/asm/edac.h
index 0df7a2c1fc3d..5189fa819b60 100644
--- a/arch/arm/include/asm/edac.h
+++ b/arch/arm/include/asm/edac.h
@@ -18,11 +18,12 @@
#define ASM_EDAC_H
/*
* ECC atomic, DMA, SMP and interrupt safe scrub function.
- * Implements the per arch atomic_scrub() that EDAC use for software
+ * Implements the per arch edac_atomic_scrub() that EDAC use for software
* ECC scrubbing. It reads memory and then writes back the original
* value, allowing the hardware to detect and correct memory errors.
*/
-static inline void atomic_scrub(void *va, u32 size)
+
+static inline void edac_atomic_scrub(void *va, u32 size)
{
#if __LINUX_ARM_ARCH__ >= 6
unsigned int *virt_addr = va;
diff --git a/arch/arm/include/asm/entry-macro-multi.S b/arch/arm/include/asm/entry-macro-multi.S
index 469a2b30fa27..609184f522ee 100644
--- a/arch/arm/include/asm/entry-macro-multi.S
+++ b/arch/arm/include/asm/entry-macro-multi.S
@@ -10,7 +10,7 @@
@
@ routine called with r0 = irq number, r1 = struct pt_regs *
@
- adrne lr, BSYM(1b)
+ badrne lr, 1b
bne asm_do_IRQ
#ifdef CONFIG_SMP
@@ -23,7 +23,7 @@
ALT_SMP(test_for_ipi r0, r2, r6, lr)
ALT_UP_B(9997f)
movne r1, sp
- adrne lr, BSYM(1b)
+ badrne lr, 1b
bne do_IPI
#endif
9997:
diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h
index 4e78065a16aa..5eed82809d82 100644
--- a/arch/arm/include/asm/futex.h
+++ b/arch/arm/include/asm/futex.h
@@ -93,6 +93,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
+ preempt_disable();
__asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n"
"1: " TUSER(ldr) " %1, [%4]\n"
" teq %1, %2\n"
@@ -104,6 +105,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
: "cc", "memory");
*uval = val;
+ preempt_enable();
+
return ret;
}
@@ -124,7 +127,10 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
- pagefault_disable(); /* implies preempt_disable() */
+#ifndef CONFIG_SMP
+ preempt_disable();
+#endif
+ pagefault_disable();
switch (op) {
case FUTEX_OP_SET:
@@ -146,7 +152,10 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
ret = -ENOSYS;
}
- pagefault_enable(); /* subsumes preempt_enable() */
+ pagefault_enable();
+#ifndef CONFIG_SMP
+ preempt_enable();
+#endif
if (!ret) {
switch (cmp) {
diff --git a/arch/arm/include/asm/hardware/arm_timer.h b/arch/arm/include/asm/hardware/arm_timer.h
deleted file mode 100644
index d6030ff599db..000000000000
--- a/arch/arm/include/asm/hardware/arm_timer.h
+++ /dev/null
@@ -1,35 +0,0 @@
-#ifndef __ASM_ARM_HARDWARE_ARM_TIMER_H
-#define __ASM_ARM_HARDWARE_ARM_TIMER_H
-
-/*
- * ARM timer implementation, found in Integrator, Versatile and Realview
- * platforms. Not all platforms support all registers and bits in these
- * registers, so we mark them with A for Integrator AP, C for Integrator
- * CP, V for Versatile and R for Realview.
- *
- * Integrator AP has 16-bit timers, Integrator CP, Versatile and Realview
- * can have 16-bit or 32-bit selectable via a bit in the control register.
- *
- * Every SP804 contains two identical timers.
- */
-#define TIMER_1_BASE 0x00
-#define TIMER_2_BASE 0x20
-
-#define TIMER_LOAD 0x00 /* ACVR rw */
-#define TIMER_VALUE 0x04 /* ACVR ro */
-#define TIMER_CTRL 0x08 /* ACVR rw */
-#define TIMER_CTRL_ONESHOT (1 << 0) /* CVR */
-#define TIMER_CTRL_32BIT (1 << 1) /* CVR */
-#define TIMER_CTRL_DIV1 (0 << 2) /* ACVR */
-#define TIMER_CTRL_DIV16 (1 << 2) /* ACVR */
-#define TIMER_CTRL_DIV256 (2 << 2) /* ACVR */
-#define TIMER_CTRL_IE (1 << 5) /* VR */
-#define TIMER_CTRL_PERIODIC (1 << 6) /* ACVR */
-#define TIMER_CTRL_ENABLE (1 << 7) /* ACVR */
-
-#define TIMER_INTCLR 0x0c /* ACVR wo */
-#define TIMER_RIS 0x10 /* CVR ro */
-#define TIMER_MIS 0x14 /* CVR ro */
-#define TIMER_BGLOAD 0x18 /* CVR rw */
-
-#endif
diff --git a/arch/arm/include/asm/hardware/timer-sp.h b/arch/arm/include/asm/hardware/timer-sp.h
deleted file mode 100644
index bb28af7c32de..000000000000
--- a/arch/arm/include/asm/hardware/timer-sp.h
+++ /dev/null
@@ -1,23 +0,0 @@
-struct clk;
-
-void __sp804_clocksource_and_sched_clock_init(void __iomem *,
- const char *, struct clk *, int);
-void __sp804_clockevents_init(void __iomem *, unsigned int,
- struct clk *, const char *);
-
-static inline void sp804_clocksource_init(void __iomem *base, const char *name)
-{
- __sp804_clocksource_and_sched_clock_init(base, name, NULL, 0);
-}
-
-static inline void sp804_clocksource_and_sched_clock_init(void __iomem *base,
- const char *name)
-{
- __sp804_clocksource_and_sched_clock_init(base, name, NULL, 1);
-}
-
-static inline void sp804_clockevents_init(void __iomem *base, unsigned int irq, const char *name)
-{
- __sp804_clockevents_init(base, irq, NULL, name);
-
-}
diff --git a/arch/arm/include/asm/hugetlb.h b/arch/arm/include/asm/hugetlb.h
index 1f1b1cd112f3..7d26f6c4f0f5 100644
--- a/arch/arm/include/asm/hugetlb.h
+++ b/arch/arm/include/asm/hugetlb.h
@@ -53,10 +53,6 @@ static inline int prepare_hugepage_range(struct file *file,
return 0;
}
-static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm)
-{
-}
-
static inline int huge_pte_none(pte_t pte)
{
return pte_none(pte);
@@ -67,15 +63,6 @@ static inline pte_t huge_pte_wrprotect(pte_t pte)
return pte_wrprotect(pte);
}
-static inline int arch_prepare_hugepage(struct page *page)
-{
- return 0;
-}
-
-static inline void arch_release_hugepage(struct page *page)
-{
-}
-
static inline void arch_clear_hugepage_flags(struct page *page)
{
clear_bit(PG_dcache_clean, &page->flags);
diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
index db58deb00aa7..1c3938f26beb 100644
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -23,6 +23,7 @@
#ifdef __KERNEL__
+#include <linux/string.h>
#include <linux/types.h>
#include <linux/blk_types.h>
#include <asm/byteorder.h>
@@ -73,17 +74,16 @@ void __raw_readsl(const volatile void __iomem *addr, void *data, int longlen);
static inline void __raw_writew(u16 val, volatile void __iomem *addr)
{
asm volatile("strh %1, %0"
- : "+Q" (*(volatile u16 __force *)addr)
- : "r" (val));
+ : : "Q" (*(volatile u16 __force *)addr), "r" (val));
}
#define __raw_readw __raw_readw
static inline u16 __raw_readw(const volatile void __iomem *addr)
{
u16 val;
- asm volatile("ldrh %1, %0"
- : "+Q" (*(volatile u16 __force *)addr),
- "=r" (val));
+ asm volatile("ldrh %0, %1"
+ : "=r" (val)
+ : "Q" (*(volatile u16 __force *)addr));
return val;
}
#endif
@@ -92,25 +92,23 @@ static inline u16 __raw_readw(const volatile void __iomem *addr)
static inline void __raw_writeb(u8 val, volatile void __iomem *addr)
{
asm volatile("strb %1, %0"
- : "+Qo" (*(volatile u8 __force *)addr)
- : "r" (val));
+ : : "Qo" (*(volatile u8 __force *)addr), "r" (val));
}
#define __raw_writel __raw_writel
static inline void __raw_writel(u32 val, volatile void __iomem *addr)
{
asm volatile("str %1, %0"
- : "+Qo" (*(volatile u32 __force *)addr)
- : "r" (val));
+ : : "Qo" (*(volatile u32 __force *)addr), "r" (val));
}
#define __raw_readb __raw_readb
static inline u8 __raw_readb(const volatile void __iomem *addr)
{
u8 val;
- asm volatile("ldrb %1, %0"
- : "+Qo" (*(volatile u8 __force *)addr),
- "=r" (val));
+ asm volatile("ldrb %0, %1"
+ : "=r" (val)
+ : "Qo" (*(volatile u8 __force *)addr));
return val;
}
@@ -118,9 +116,9 @@ static inline u8 __raw_readb(const volatile void __iomem *addr)
static inline u32 __raw_readl(const volatile void __iomem *addr)
{
u32 val;
- asm volatile("ldr %1, %0"
- : "+Qo" (*(volatile u32 __force *)addr),
- "=r" (val));
+ asm volatile("ldr %0, %1"
+ : "=r" (val)
+ : "Qo" (*(volatile u32 __force *)addr));
return val;
}
@@ -319,9 +317,33 @@ extern void _memset_io(volatile void __iomem *, int, size_t);
#define writesw(p,d,l) __raw_writesw(p,d,l)
#define writesl(p,d,l) __raw_writesl(p,d,l)
+#ifndef __ARMBE__
+static inline void memset_io(volatile void __iomem *dst, unsigned c,
+ size_t count)
+{
+ memset((void __force *)dst, c, count);
+}
+#define memset_io(dst,c,count) memset_io(dst,c,count)
+
+static inline void memcpy_fromio(void *to, const volatile void __iomem *from,
+ size_t count)
+{
+ memcpy(to, (const void __force *)from, count);
+}
+#define memcpy_fromio(to,from,count) memcpy_fromio(to,from,count)
+
+static inline void memcpy_toio(volatile void __iomem *to, const void *from,
+ size_t count)
+{
+ memcpy((void __force *)to, from, count);
+}
+#define memcpy_toio(to,from,count) memcpy_toio(to,from,count)
+
+#else
#define memset_io(c,v,l) _memset_io(c,(v),(l))
#define memcpy_fromio(a,c,l) _memcpy_fromio((a),c,(l))
#define memcpy_toio(c,a,l) _memcpy_toio(c,(a),(l))
+#endif
#endif /* readl */
@@ -336,6 +358,7 @@ extern void _memset_io(volatile void __iomem *, int, size_t);
#define ioremap_nocache(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE)
#define ioremap_cache(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE_CACHED)
#define ioremap_wc(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE_WC)
+#define ioremap_wt(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE)
#define iounmap __arm_iounmap
/*
diff --git a/arch/arm/include/asm/irqflags.h b/arch/arm/include/asm/irqflags.h
index 3b763d6652a0..43908146a5cf 100644
--- a/arch/arm/include/asm/irqflags.h
+++ b/arch/arm/include/asm/irqflags.h
@@ -20,6 +20,7 @@
#if __LINUX_ARM_ARCH__ >= 6
+#define arch_local_irq_save arch_local_irq_save
static inline unsigned long arch_local_irq_save(void)
{
unsigned long flags;
@@ -31,6 +32,7 @@ static inline unsigned long arch_local_irq_save(void)
return flags;
}
+#define arch_local_irq_enable arch_local_irq_enable
static inline void arch_local_irq_enable(void)
{
asm volatile(
@@ -40,6 +42,7 @@ static inline void arch_local_irq_enable(void)
: "memory", "cc");
}
+#define arch_local_irq_disable arch_local_irq_disable
static inline void arch_local_irq_disable(void)
{
asm volatile(
@@ -56,6 +59,7 @@ static inline void arch_local_irq_disable(void)
/*
* Save the current interrupt enable state & disable IRQs
*/
+#define arch_local_irq_save arch_local_irq_save
static inline unsigned long arch_local_irq_save(void)
{
unsigned long flags, temp;
@@ -73,6 +77,7 @@ static inline unsigned long arch_local_irq_save(void)
/*
* Enable IRQs
*/
+#define arch_local_irq_enable arch_local_irq_enable
static inline void arch_local_irq_enable(void)
{
unsigned long temp;
@@ -88,6 +93,7 @@ static inline void arch_local_irq_enable(void)
/*
* Disable IRQs
*/
+#define arch_local_irq_disable arch_local_irq_disable
static inline void arch_local_irq_disable(void)
{
unsigned long temp;
@@ -135,6 +141,7 @@ static inline void arch_local_irq_disable(void)
/*
* Save the current interrupt enable state.
*/
+#define arch_local_save_flags arch_local_save_flags
static inline unsigned long arch_local_save_flags(void)
{
unsigned long flags;
@@ -147,6 +154,7 @@ static inline unsigned long arch_local_save_flags(void)
/*
* restore saved IRQ & FIQ state
*/
+#define arch_local_irq_restore arch_local_irq_restore
static inline void arch_local_irq_restore(unsigned long flags)
{
asm volatile(
@@ -156,10 +164,13 @@ static inline void arch_local_irq_restore(unsigned long flags)
: "memory", "cc");
}
+#define arch_irqs_disabled_flags arch_irqs_disabled_flags
static inline int arch_irqs_disabled_flags(unsigned long flags)
{
return flags & IRQMASK_I_BIT;
}
+#include <asm-generic/irqflags.h>
+
#endif /* ifdef __KERNEL__ */
#endif /* ifndef __ASM_ARM_IRQFLAGS_H */
diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 25410b2d8bc1..194c91b610ff 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -23,7 +23,7 @@
#define c0_MPIDR 1 /* MultiProcessor ID Register */
#define c0_CSSELR 2 /* Cache Size Selection Register */
#define c1_SCTLR 3 /* System Control Register */
-#define c1_ACTLR 4 /* Auxilliary Control Register */
+#define c1_ACTLR 4 /* Auxiliary Control Register */
#define c1_CPACR 5 /* Coprocessor Access Control */
#define c2_TTBR0 6 /* Translation Table Base Register 0 */
#define c2_TTBR0_high 7 /* TTBR0 top 32 bits */
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index d71607c16601..e896d2c196e6 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -218,11 +218,6 @@ static inline int kvm_arch_dev_ioctl_check_extension(long ext)
return 0;
}
-static inline void vgic_arch_setup(const struct vgic_params *vgic)
-{
- BUG_ON(vgic->type != VGIC_V2);
-}
-
int kvm_perf_init(void);
int kvm_perf_teardown(void);
diff --git a/arch/arm/include/asm/mach/arch.h b/arch/arm/include/asm/mach/arch.h
index 0406cb3f1af7..cb3a40717edd 100644
--- a/arch/arm/include/asm/mach/arch.h
+++ b/arch/arm/include/asm/mach/arch.h
@@ -51,7 +51,7 @@ struct machine_desc {
bool (*smp_init)(void);
void (*fixup)(struct tag *, char **);
void (*dt_fixup)(void);
- void (*init_meminfo)(void);
+ long long (*pv_fixup)(void);
void (*reserve)(void);/* reserve mem blocks */
void (*map_io)(void);/* IO mapping function */
void (*init_early)(void);
diff --git a/arch/arm/include/asm/mcpm.h b/arch/arm/include/asm/mcpm.h
index 50b378f59e08..acd4983d9b1f 100644
--- a/arch/arm/include/asm/mcpm.h
+++ b/arch/arm/include/asm/mcpm.h
@@ -137,17 +137,12 @@ int mcpm_wait_for_cpu_powerdown(unsigned int cpu, unsigned int cluster);
/**
* mcpm_cpu_suspend - bring the calling CPU in a suspended state
*
- * @expected_residency: duration in microseconds the CPU is expected
- * to remain suspended, or 0 if unknown/infinity.
- *
- * The calling CPU is suspended. The expected residency argument is used
- * as a hint by the platform specific backend to implement the appropriate
- * sleep state level according to the knowledge it has on wake-up latency
- * for the given hardware.
+ * The calling CPU is suspended. This is similar to mcpm_cpu_power_down()
+ * except for possible extra platform specific configuration steps to allow
+ * an asynchronous wake-up e.g. with a pending interrupt.
*
* If this CPU is found to be the "last man standing" in the cluster
- * then the cluster may be prepared for power-down too, if the expected
- * residency makes it worthwhile.
+ * then the cluster may be prepared for power-down too.
*
* This must be called with interrupts disabled.
*
@@ -157,7 +152,7 @@ int mcpm_wait_for_cpu_powerdown(unsigned int cpu, unsigned int cluster);
* This will return if mcpm_platform_register() has not been called
* previously in which case the caller should take appropriate action.
*/
-void mcpm_cpu_suspend(u64 expected_residency);
+void mcpm_cpu_suspend(void);
/**
* mcpm_cpu_powered_up - housekeeping workafter a CPU has been powered up
@@ -234,12 +229,6 @@ struct mcpm_platform_ops {
void (*cpu_is_up)(unsigned int cpu, unsigned int cluster);
void (*cluster_is_up)(unsigned int cluster);
int (*wait_for_powerdown)(unsigned int cpu, unsigned int cluster);
-
- /* deprecated callbacks */
- int (*power_up)(unsigned int cpu, unsigned int cluster);
- void (*power_down)(void);
- void (*suspend)(u64);
- void (*powered_up)(void);
};
/**
@@ -251,35 +240,6 @@ struct mcpm_platform_ops {
*/
int __init mcpm_platform_register(const struct mcpm_platform_ops *ops);
-/* Synchronisation structures for coordinating safe cluster setup/teardown: */
-
-/*
- * When modifying this structure, make sure you update the MCPM_SYNC_ defines
- * to match.
- */
-struct mcpm_sync_struct {
- /* individual CPU states */
- struct {
- s8 cpu __aligned(__CACHE_WRITEBACK_GRANULE);
- } cpus[MAX_CPUS_PER_CLUSTER];
-
- /* cluster state */
- s8 cluster __aligned(__CACHE_WRITEBACK_GRANULE);
-
- /* inbound-side state */
- s8 inbound __aligned(__CACHE_WRITEBACK_GRANULE);
-};
-
-struct sync_struct {
- struct mcpm_sync_struct clusters[MAX_NR_CLUSTERS];
-};
-
-void __mcpm_cpu_going_down(unsigned int cpu, unsigned int cluster);
-void __mcpm_cpu_down(unsigned int cpu, unsigned int cluster);
-void __mcpm_outbound_leave_critical(unsigned int cluster, int state);
-bool __mcpm_outbound_enter_critical(unsigned int this_cpu, unsigned int cluster);
-int __mcpm_cluster_state(unsigned int cluster);
-
/**
* mcpm_sync_init - Initialize the cluster synchronization support
*
@@ -318,6 +278,29 @@ int __init mcpm_loopback(void (*cache_disable)(void));
void __init mcpm_smp_set_ops(void);
+/*
+ * Synchronisation structures for coordinating safe cluster setup/teardown.
+ * This is private to the MCPM core code and shared between C and assembly.
+ * When modifying this structure, make sure you update the MCPM_SYNC_ defines
+ * to match.
+ */
+struct mcpm_sync_struct {
+ /* individual CPU states */
+ struct {
+ s8 cpu __aligned(__CACHE_WRITEBACK_GRANULE);
+ } cpus[MAX_CPUS_PER_CLUSTER];
+
+ /* cluster state */
+ s8 cluster __aligned(__CACHE_WRITEBACK_GRANULE);
+
+ /* inbound-side state */
+ s8 inbound __aligned(__CACHE_WRITEBACK_GRANULE);
+};
+
+struct sync_struct {
+ struct mcpm_sync_struct clusters[MAX_NR_CLUSTERS];
+};
+
#else
/*
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 184def0e1652..3a72d69b3255 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -18,8 +18,6 @@
#include <linux/types.h>
#include <linux/sizes.h>
-#include <asm/cache.h>
-
#ifdef CONFIG_NEED_MACH_MEMORY_H
#include <mach/memory.h>
#endif
@@ -133,20 +131,6 @@
#define phys_to_page(phys) (pfn_to_page(__phys_to_pfn(phys)))
/*
- * Minimum guaranted alignment in pgd_alloc(). The page table pointers passed
- * around in head.S and proc-*.S are shifted by this amount, in order to
- * leave spare high bits for systems with physical address extension. This
- * does not fully accomodate the 40-bit addressing capability of ARM LPAE, but
- * gives us about 38-bits or so.
- */
-#ifdef CONFIG_ARM_LPAE
-#define ARCH_PGD_SHIFT L1_CACHE_SHIFT
-#else
-#define ARCH_PGD_SHIFT 0
-#endif
-#define ARCH_PGD_MASK ((1 << ARCH_PGD_SHIFT) - 1)
-
-/*
* PLAT_PHYS_OFFSET is the offset (from zero) of the start of physical
* memory. This is used for XIP and NoMMU kernels, and on platforms that don't
* have CONFIG_ARM_PATCH_PHYS_VIRT. Assembly code must always use
diff --git a/arch/arm/include/asm/mm-arch-hooks.h b/arch/arm/include/asm/mm-arch-hooks.h
new file mode 100644
index 000000000000..7056660c7cc4
--- /dev/null
+++ b/arch/arm/include/asm/mm-arch-hooks.h
@@ -0,0 +1,15 @@
+/*
+ * Architecture specific mm hooks
+ *
+ * Copyright (C) 2015, IBM Corporation
+ * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _ASM_ARM_MM_ARCH_HOOKS_H
+#define _ASM_ARM_MM_ARCH_HOOKS_H
+
+#endif /* _ASM_ARM_MM_ARCH_HOOKS_H */
diff --git a/arch/arm/include/asm/module.h b/arch/arm/include/asm/module.h
index ed690c49ef93..e358b7966c06 100644
--- a/arch/arm/include/asm/module.h
+++ b/arch/arm/include/asm/module.h
@@ -16,11 +16,21 @@ enum {
ARM_SEC_UNLIKELY,
ARM_SEC_MAX,
};
+#endif
struct mod_arch_specific {
+#ifdef CONFIG_ARM_UNWIND
struct unwind_table *unwind[ARM_SEC_MAX];
-};
#endif
+#ifdef CONFIG_ARM_MODULE_PLTS
+ struct elf32_shdr *core_plt;
+ struct elf32_shdr *init_plt;
+ int core_plt_count;
+ int init_plt_count;
+#endif
+};
+
+u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val);
/*
* Add the ARM architecture version to the version magic string
diff --git a/arch/arm/include/asm/pci.h b/arch/arm/include/asm/pci.h
index 585dc33a7a24..a5635444ca41 100644
--- a/arch/arm/include/asm/pci.h
+++ b/arch/arm/include/asm/pci.h
@@ -31,16 +31,6 @@ static inline int pci_proc_domain(struct pci_bus *bus)
*/
#define PCI_DMA_BUS_IS_PHYS (1)
-#ifdef CONFIG_PCI
-static inline void pci_dma_burst_advice(struct pci_dev *pdev,
- enum pci_dma_burst_strategy *strat,
- unsigned long *strategy_parameter)
-{
- *strat = PCI_DMA_BURST_INFINITY;
- *strategy_parameter = ~0UL;
-}
-#endif
-
#define HAVE_PCI_MMAP
extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
enum pci_mmap_state mmap_state, int write_combine);
diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h
index d9cf138fd7d4..4f9dec489931 100644
--- a/arch/arm/include/asm/perf_event.h
+++ b/arch/arm/include/asm/perf_event.h
@@ -19,4 +19,11 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs);
#define perf_misc_flags(regs) perf_misc_flags(regs)
#endif
+#define perf_arch_fetch_caller_regs(regs, __ip) { \
+ (regs)->ARM_pc = (__ip); \
+ (regs)->ARM_fp = (unsigned long) __builtin_frame_address(0); \
+ (regs)->ARM_sp = current_stack_pointer; \
+ (regs)->ARM_cpsr = SVC_MODE; \
+}
+
#endif /* __ARM_PERF_EVENT_H__ */
diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h
index 675e4ab79f68..3fc87dfd77e6 100644
--- a/arch/arm/include/asm/pmu.h
+++ b/arch/arm/include/asm/pmu.h
@@ -24,22 +24,10 @@
* interrupt and passed the address of the low level handler,
* and can be used to implement any platform specific handling
* before or after calling it.
- * @runtime_resume: an optional handler which will be called by the
- * runtime PM framework following a call to pm_runtime_get().
- * Note that if pm_runtime_get() is called more than once in
- * succession this handler will only be called once.
- * @runtime_suspend: an optional handler which will be called by the
- * runtime PM framework following a call to pm_runtime_put().
- * Note that if pm_runtime_get() is called more than once in
- * succession this handler will only be called following the
- * final call to pm_runtime_put() that actually disables the
- * hardware.
*/
struct arm_pmu_platdata {
irqreturn_t (*handle_irq)(int irq, void *dev,
irq_handler_t pmu_handler);
- int (*runtime_resume)(struct device *dev);
- int (*runtime_suspend)(struct device *dev);
};
#ifdef CONFIG_HW_PERF_EVENTS
@@ -92,6 +80,7 @@ struct pmu_hw_events {
struct arm_pmu {
struct pmu pmu;
cpumask_t active_irqs;
+ cpumask_t supported_cpus;
int *irq_affinity;
char *name;
irqreturn_t (*handle_irq)(int irq_num, void *dev);
@@ -122,8 +111,6 @@ struct arm_pmu {
#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))
-extern const struct dev_pm_ops armpmu_dev_pm_ops;
-
int armpmu_register(struct arm_pmu *armpmu, int type);
u64 armpmu_event_update(struct perf_event *event);
@@ -158,6 +145,10 @@ struct pmu_probe_info {
#define XSCALE_PMU_PROBE(_version, _fn) \
PMU_PROBE(ARM_CPU_IMP_INTEL << 24 | _version, ARM_PMU_XSCALE_MASK, _fn)
+int arm_pmu_device_probe(struct platform_device *pdev,
+ const struct of_device_id *of_table,
+ const struct pmu_probe_info *probe_table);
+
#endif /* CONFIG_HW_PERF_EVENTS */
#endif /* __ARM_PMU_H__ */
diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h
index 5324c1112f3a..8877ad5ffe10 100644
--- a/arch/arm/include/asm/proc-fns.h
+++ b/arch/arm/include/asm/proc-fns.h
@@ -125,13 +125,6 @@ extern void cpu_resume(void);
ttbr; \
})
-#define cpu_set_ttbr(nr, val) \
- do { \
- u64 ttbr = val; \
- __asm__("mcrr p15, " #nr ", %Q0, %R0, c2" \
- : : "r" (ttbr)); \
- } while (0)
-
#define cpu_get_pgd() \
({ \
u64 pg = cpu_get_ttbr(0); \
diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h
index 18f5a554134f..2f3ac1ba6fb4 100644
--- a/arch/arm/include/asm/smp.h
+++ b/arch/arm/include/asm/smp.h
@@ -61,7 +61,7 @@ asmlinkage void secondary_start_kernel(void);
struct secondary_data {
union {
unsigned long mpu_rgn_szr;
- unsigned long pgdir;
+ u64 pgdir;
};
unsigned long swapper_pg_dir;
void *stack;
@@ -69,6 +69,7 @@ struct secondary_data {
extern struct secondary_data secondary_data;
extern volatile int pen_release;
extern void secondary_startup(void);
+extern void secondary_startup_arm(void);
extern int __cpu_disable(void);
diff --git a/arch/arm/include/asm/system_info.h b/arch/arm/include/asm/system_info.h
index 720ea0320a6d..3860cbd401ec 100644
--- a/arch/arm/include/asm/system_info.h
+++ b/arch/arm/include/asm/system_info.h
@@ -17,6 +17,7 @@
/* information about the system we're running on */
extern unsigned int system_rev;
+extern const char *system_serial;
extern unsigned int system_serial_low;
extern unsigned int system_serial_high;
extern unsigned int mem_fclk_21285;
diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h
index 2fe85fff5cca..370f7a732900 100644
--- a/arch/arm/include/asm/topology.h
+++ b/arch/arm/include/asm/topology.h
@@ -18,7 +18,7 @@ extern struct cputopo_arm cpu_topology[NR_CPUS];
#define topology_physical_package_id(cpu) (cpu_topology[cpu].socket_id)
#define topology_core_id(cpu) (cpu_topology[cpu].core_id)
#define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_sibling)
-#define topology_thread_cpumask(cpu) (&cpu_topology[cpu].thread_sibling)
+#define topology_sibling_cpumask(cpu) (&cpu_topology[cpu].thread_sibling)
void init_cpu_topology(void);
void store_cpu_topology(unsigned int cpuid);
diff --git a/arch/arm/include/asm/unified.h b/arch/arm/include/asm/unified.h
index 200f9a7cd623..a91ae499614c 100644
--- a/arch/arm/include/asm/unified.h
+++ b/arch/arm/include/asm/unified.h
@@ -45,7 +45,6 @@
#define THUMB(x...) x
#ifdef __ASSEMBLY__
#define W(instr) instr.w
-#define BSYM(sym) sym + 1
#else
#define WASM(instr) #instr ".w"
#endif
@@ -59,7 +58,6 @@
#define THUMB(x...)
#ifdef __ASSEMBLY__
#define W(instr) instr
-#define BSYM(sym) sym
#else
#define WASM(instr) #instr
#endif
diff --git a/arch/arm/include/asm/xen/hypervisor.h b/arch/arm/include/asm/xen/hypervisor.h
index 1317ee40f4df..04ff8e7b37df 100644
--- a/arch/arm/include/asm/xen/hypervisor.h
+++ b/arch/arm/include/asm/xen/hypervisor.h
@@ -1,6 +1,8 @@
#ifndef _ASM_ARM_XEN_HYPERVISOR_H
#define _ASM_ARM_XEN_HYPERVISOR_H
+#include <linux/init.h>
+
extern struct shared_info *HYPERVISOR_shared_info;
extern struct start_info *xen_start_info;
@@ -18,4 +20,10 @@ static inline enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
extern struct dma_map_ops *xen_dma_ops;
+#ifdef CONFIG_XEN
+void __init xen_early_init(void);
+#else
+static inline void xen_early_init(void) { return; }
+#endif
+
#endif /* _ASM_ARM_XEN_HYPERVISOR_H */
diff --git a/arch/arm/include/asm/xen/page.h b/arch/arm/include/asm/xen/page.h
index 0b579b2f4e0e..1bee8ca12494 100644
--- a/arch/arm/include/asm/xen/page.h
+++ b/arch/arm/include/asm/xen/page.h
@@ -12,7 +12,6 @@
#include <xen/interface/grant_table.h>
#define phys_to_machine_mapping_valid(pfn) (1)
-#define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT))
#define pte_mfn pte_pfn
#define mfn_pte pfn_pte
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 752725dcbf42..e69f7a19735d 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -34,6 +34,7 @@ obj-$(CONFIG_CPU_IDLE) += cpuidle.o
obj-$(CONFIG_ISA_DMA_API) += dma.o
obj-$(CONFIG_FIQ) += fiq.o fiqasm.o
obj-$(CONFIG_MODULES) += armksyms.o module.o
+obj-$(CONFIG_ARM_MODULE_PLTS) += module-plts.o
obj-$(CONFIG_ISA_DMA) += dma-isa.o
obj-$(CONFIG_PCI) += bios32.o isa.o
obj-$(CONFIG_ARM_CPU_SUSPEND) += sleep.o suspend.o
@@ -70,7 +71,9 @@ obj-$(CONFIG_CPU_PJ4) += pj4-cp0.o
obj-$(CONFIG_CPU_PJ4B) += pj4-cp0.o
obj-$(CONFIG_IWMMXT) += iwmmxt.o
obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o
-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o perf_event_cpu.o
+obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o \
+ perf_event_xscale.o perf_event_v6.o \
+ perf_event_v7.o
CFLAGS_pj4-cp0.o := -marm
AFLAGS_iwmmxt.o := -Wa,-mcpu=iwmmxt
obj-$(CONFIG_ARM_CPU_TOPOLOGY) += topology.o
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 570306c49406..7dac3086e361 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -15,6 +15,8 @@
* that causes it to save wrong values... Be aware!
*/
+#include <linux/init.h>
+
#include <asm/assembler.h>
#include <asm/memory.h>
#include <asm/glue-df.h>
@@ -40,7 +42,7 @@
#ifdef CONFIG_MULTI_IRQ_HANDLER
ldr r1, =handle_arch_irq
mov r0, sp
- adr lr, BSYM(9997f)
+ badr lr, 9997f
ldr pc, [r1]
#else
arch_irq_handler_default
@@ -273,7 +275,7 @@ __und_svc:
str r4, [sp, #S_PC]
orr r0, r9, r0, lsl #16
#endif
- adr r9, BSYM(__und_svc_finish)
+ badr r9, __und_svc_finish
mov r2, r4
bl call_fpe
@@ -469,7 +471,7 @@ __und_usr:
@ instruction, or the more conventional lr if we are to treat
@ this as a real undefined instruction
@
- adr r9, BSYM(ret_from_exception)
+ badr r9, ret_from_exception
@ IRQs must be enabled before attempting to read the instruction from
@ user space since that could cause a page/translation fault if the
@@ -486,7 +488,7 @@ __und_usr:
@ r2 = PC value for the following instruction (:= regs->ARM_pc)
@ r4 = PC value for the faulting instruction
@ lr = 32-bit undefined instruction function
- adr lr, BSYM(__und_usr_fault_32)
+ badr lr, __und_usr_fault_32
b call_fpe
__und_usr_thumb:
@@ -522,7 +524,7 @@ ARM_BE8(rev16 r0, r0) @ little endian instruction
add r2, r2, #2 @ r2 is PC + 2, make it PC + 4
str r2, [sp, #S_PC] @ it's a 2x16bit instr, update
orr r0, r0, r5, lsl #16
- adr lr, BSYM(__und_usr_fault_32)
+ badr lr, __und_usr_fault_32
@ r0 = the two 16-bit Thumb instructions which caused the exception
@ r2 = PC value for the following Thumb instruction (:= regs->ARM_pc)
@ r4 = PC value for the first 16-bit Thumb instruction
@@ -716,7 +718,7 @@ __und_usr_fault_32:
__und_usr_fault_16:
mov r1, #2
1: mov r0, sp
- adr lr, BSYM(ret_from_exception)
+ badr lr, ret_from_exception
b __und_fault
ENDPROC(__und_usr_fault_32)
ENDPROC(__und_usr_fault_16)
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 4e7f40c577e6..92828a1dec80 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -90,7 +90,7 @@ ENTRY(ret_from_fork)
bl schedule_tail
cmp r5, #0
movne r0, r4
- adrne lr, BSYM(1f)
+ badrne lr, 1f
retne r5
1: get_thread_info tsk
b ret_slow_syscall
@@ -198,7 +198,7 @@ local_restart:
bne __sys_trace
cmp scno, #NR_syscalls @ check upper syscall limit
- adr lr, BSYM(ret_fast_syscall) @ return address
+ badr lr, ret_fast_syscall @ return address
ldrcc pc, [tbl, scno, lsl #2] @ call sys_* routine
add r1, sp, #S_OFF
@@ -233,7 +233,7 @@ __sys_trace:
add r0, sp, #S_OFF
bl syscall_trace_enter
- adr lr, BSYM(__sys_trace_return) @ return address
+ badr lr, __sys_trace_return @ return address
mov scno, r0 @ syscall number (possibly new)
add r1, sp, #S_R0 + S_OFF @ pointer to regs
cmp scno, #NR_syscalls @ check upper syscall limit
diff --git a/arch/arm/kernel/entry-ftrace.S b/arch/arm/kernel/entry-ftrace.S
index fe57c73e70a4..c73c4030ca5d 100644
--- a/arch/arm/kernel/entry-ftrace.S
+++ b/arch/arm/kernel/entry-ftrace.S
@@ -87,7 +87,7 @@
1: mcount_get_lr r1 @ lr of instrumented func
mcount_adjust_addr r0, lr @ instrumented function
- adr lr, BSYM(2f)
+ badr lr, 2f
mov pc, r2
2: mcount_exit
.endm
diff --git a/arch/arm/kernel/entry-v7m.S b/arch/arm/kernel/entry-v7m.S
index 8944f4991c3c..b6c8bb9315e7 100644
--- a/arch/arm/kernel/entry-v7m.S
+++ b/arch/arm/kernel/entry-v7m.S
@@ -117,9 +117,14 @@ ENTRY(__switch_to)
ENDPROC(__switch_to)
.data
- .align 8
+#if CONFIG_CPU_V7M_NUM_IRQ <= 112
+ .align 9
+#else
+ .align 10
+#endif
+
/*
- * Vector table (64 words => 256 bytes natural alignment)
+ * Vector table (Natural alignment need to be ensured)
*/
ENTRY(vector_table)
.long 0 @ 0 - Reset stack pointer
@@ -138,6 +143,6 @@ ENTRY(vector_table)
.long __invalid_entry @ 13 - Reserved
.long __pendsv_entry @ 14 - PendSV
.long __invalid_entry @ 15 - SysTick
- .rept 64 - 16
- .long __irq_entry @ 16..64 - External Interrupts
+ .rept CONFIG_CPU_V7M_NUM_IRQ
+ .long __irq_entry @ External Interrupts
.endr
diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
index aebfbf79a1a3..9b8c5a113434 100644
--- a/arch/arm/kernel/head-nommu.S
+++ b/arch/arm/kernel/head-nommu.S
@@ -46,7 +46,7 @@ ENTRY(stext)
.arm
ENTRY(stext)
- THUMB( adr r9, BSYM(1f) ) @ Kernel is always entered in ARM.
+ THUMB( badr r9, 1f ) @ Kernel is always entered in ARM.
THUMB( bx r9 ) @ If this is a Thumb-2 kernel,
THUMB( .thumb ) @ switch to Thumb now.
THUMB(1: )
@@ -77,13 +77,13 @@ ENTRY(stext)
orr r6, r6, #(1 << MPU_RSR_EN) @ Set region enabled bit
bl __setup_mpu
#endif
- ldr r13, =__mmap_switched @ address to jump to after
- @ initialising sctlr
- adr lr, BSYM(1f) @ return (PIC) address
+
+ badr lr, 1f @ return (PIC) address
ldr r12, [r10, #PROCINFO_INITFUNC]
add r12, r12, r10
ret r12
- 1: b __after_proc_init
+1: bl __after_proc_init
+ b __mmap_switched
ENDPROC(stext)
#ifdef CONFIG_SMP
@@ -106,8 +106,7 @@ ENTRY(secondary_startup)
movs r10, r5 @ invalid processor?
beq __error_p @ yes, error 'p'
- adr r4, __secondary_data
- ldmia r4, {r7, r12}
+ ldr r7, __secondary_data
#ifdef CONFIG_ARM_MPU
/* Use MPU region info supplied by __cpu_up */
@@ -115,23 +114,19 @@ ENTRY(secondary_startup)
bl __setup_mpu @ Initialize the MPU
#endif
- adr lr, BSYM(__after_proc_init) @ return address
- mov r13, r12 @ __secondary_switched address
+ badr lr, 1f @ return (PIC) address
ldr r12, [r10, #PROCINFO_INITFUNC]
add r12, r12, r10
ret r12
-ENDPROC(secondary_startup)
-
-ENTRY(__secondary_switched)
- ldr sp, [r7, #8] @ set up the stack pointer
+1: bl __after_proc_init
+ ldr sp, [r7, #12] @ set up the stack pointer
mov fp, #0
b secondary_start_kernel
-ENDPROC(__secondary_switched)
+ENDPROC(secondary_startup)
.type __secondary_data, %object
__secondary_data:
.long secondary_data
- .long __secondary_switched
#endif /* CONFIG_SMP */
/*
@@ -164,7 +159,7 @@ __after_proc_init:
#endif
mcr p15, 0, r0, c1, c0, 0 @ write control reg
#endif /* CONFIG_CPU_CP15 */
- ret r13
+ ret lr
ENDPROC(__after_proc_init)
.ltorg
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 3637973a9708..bd755d97e459 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -80,7 +80,7 @@
ENTRY(stext)
ARM_BE8(setend be ) @ ensure we are in BE8 mode
- THUMB( adr r9, BSYM(1f) ) @ Kernel is always entered in ARM.
+ THUMB( badr r9, 1f ) @ Kernel is always entered in ARM.
THUMB( bx r9 ) @ If this is a Thumb-2 kernel,
THUMB( .thumb ) @ switch to Thumb now.
THUMB(1: )
@@ -131,13 +131,30 @@ ENTRY(stext)
* The following calls CPU specific code in a position independent
* manner. See arch/arm/mm/proc-*.S for details. r10 = base of
* xxx_proc_info structure selected by __lookup_processor_type
- * above. On return, the CPU will be ready for the MMU to be
- * turned on, and r0 will hold the CPU control register value.
+ * above.
+ *
+ * The processor init function will be called with:
+ * r1 - machine type
+ * r2 - boot data (atags/dt) pointer
+ * r4 - translation table base (low word)
+ * r5 - translation table base (high word, if LPAE)
+ * r8 - translation table base 1 (pfn if LPAE)
+ * r9 - cpuid
+ * r13 - virtual address for __enable_mmu -> __turn_mmu_on
+ *
+ * On return, the CPU will be ready for the MMU to be turned on,
+ * r0 will hold the CPU control register value, r1, r2, r4, and
+ * r9 will be preserved. r5 will also be preserved if LPAE.
*/
ldr r13, =__mmap_switched @ address to jump to after
@ mmu has been enabled
- adr lr, BSYM(1f) @ return (PIC) address
+ badr lr, 1f @ return (PIC) address
+#ifdef CONFIG_ARM_LPAE
+ mov r5, #0 @ high TTBR0
+ mov r8, r4, lsr #12 @ TTBR1 is swapper_pg_dir pfn
+#else
mov r8, r4 @ set TTBR1 to swapper_pg_dir
+#endif
ldr r12, [r10, #PROCINFO_INITFUNC]
add r12, r12, r10
ret r12
@@ -158,7 +175,7 @@ ENDPROC(stext)
*
* Returns:
* r0, r3, r5-r7 corrupted
- * r4 = page table (see ARCH_PGD_SHIFT in asm/memory.h)
+ * r4 = physical page table address
*/
__create_page_tables:
pgtbl r4, r8 @ page table address
@@ -333,7 +350,6 @@ __create_page_tables:
#endif
#ifdef CONFIG_ARM_LPAE
sub r4, r4, #0x1000 @ point to the PGD table
- mov r4, r4, lsr #ARCH_PGD_SHIFT
#endif
ret lr
ENDPROC(__create_page_tables)
@@ -346,9 +362,9 @@ __turn_mmu_on_loc:
#if defined(CONFIG_SMP)
.text
-ENTRY(secondary_startup_arm)
.arm
- THUMB( adr r9, BSYM(1f) ) @ Kernel is entered in ARM.
+ENTRY(secondary_startup_arm)
+ THUMB( badr r9, 1f ) @ Kernel is entered in ARM.
THUMB( bx r9 ) @ If this is a Thumb-2 kernel,
THUMB( .thumb ) @ switch to Thumb now.
THUMB(1: )
@@ -381,10 +397,10 @@ ENTRY(secondary_startup)
adr r4, __secondary_data
ldmia r4, {r5, r7, r12} @ address to jump to after
sub lr, r4, r5 @ mmu has been enabled
- ldr r4, [r7, lr] @ get secondary_data.pgdir
- add r7, r7, #4
- ldr r8, [r7, lr] @ get secondary_data.swapper_pg_dir
- adr lr, BSYM(__enable_mmu) @ return address
+ add r3, r7, lr
+ ldrd r4, [r3, #0] @ get secondary_data.pgdir
+ ldr r8, [r3, #8] @ get secondary_data.swapper_pg_dir
+ badr lr, __enable_mmu @ return address
mov r13, r12 @ __secondary_switched address
ldr r12, [r10, #PROCINFO_INITFUNC]
add r12, r12, r10 @ initialise processor
@@ -397,7 +413,7 @@ ENDPROC(secondary_startup_arm)
* r6 = &secondary_data
*/
ENTRY(__secondary_switched)
- ldr sp, [r7, #4] @ get secondary_data.stack
+ ldr sp, [r7, #12] @ get secondary_data.stack
mov fp, #0
b secondary_start_kernel
ENDPROC(__secondary_switched)
@@ -416,12 +432,14 @@ __secondary_data:
/*
* Setup common bits before finally enabling the MMU. Essentially
* this is just loading the page table pointer and domain access
- * registers.
+ * registers. All these registers need to be preserved by the
+ * processor setup function (or set in the case of r0)
*
* r0 = cp#15 control register
* r1 = machine ID
* r2 = atags or dtb pointer
- * r4 = page table (see ARCH_PGD_SHIFT in asm/memory.h)
+ * r4 = TTBR pointer (low word)
+ * r5 = TTBR pointer (high word if LPAE)
* r9 = processor ID
* r13 = *virtual* address to jump to upon completion
*/
@@ -440,7 +458,9 @@ __enable_mmu:
#ifdef CONFIG_CPU_ICACHE_DISABLE
bic r0, r0, #CR_I
#endif
-#ifndef CONFIG_ARM_LPAE
+#ifdef CONFIG_ARM_LPAE
+ mcrr p15, 0, r4, r5, c2 @ load TTBR0
+#else
mov r5, #(domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \
domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \
domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \
diff --git a/arch/arm/kernel/module-plts.c b/arch/arm/kernel/module-plts.c
new file mode 100644
index 000000000000..097e2e201b9f
--- /dev/null
+++ b/arch/arm/kernel/module-plts.c
@@ -0,0 +1,183 @@
+/*
+ * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/elf.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <asm/cache.h>
+#include <asm/opcodes.h>
+
+#define PLT_ENT_STRIDE L1_CACHE_BYTES
+#define PLT_ENT_COUNT (PLT_ENT_STRIDE / sizeof(u32))
+#define PLT_ENT_SIZE (sizeof(struct plt_entries) / PLT_ENT_COUNT)
+
+#ifdef CONFIG_THUMB2_KERNEL
+#define PLT_ENT_LDR __opcode_to_mem_thumb32(0xf8dff000 | \
+ (PLT_ENT_STRIDE - 4))
+#else
+#define PLT_ENT_LDR __opcode_to_mem_arm(0xe59ff000 | \
+ (PLT_ENT_STRIDE - 8))
+#endif
+
+struct plt_entries {
+ u32 ldr[PLT_ENT_COUNT];
+ u32 lit[PLT_ENT_COUNT];
+};
+
+static bool in_init(const struct module *mod, u32 addr)
+{
+ return addr - (u32)mod->module_init < mod->init_size;
+}
+
+u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val)
+{
+ struct plt_entries *plt, *plt_end;
+ int c, *count;
+
+ if (in_init(mod, loc)) {
+ plt = (void *)mod->arch.init_plt->sh_addr;
+ plt_end = (void *)plt + mod->arch.init_plt->sh_size;
+ count = &mod->arch.init_plt_count;
+ } else {
+ plt = (void *)mod->arch.core_plt->sh_addr;
+ plt_end = (void *)plt + mod->arch.core_plt->sh_size;
+ count = &mod->arch.core_plt_count;
+ }
+
+ /* Look for an existing entry pointing to 'val' */
+ for (c = *count; plt < plt_end; c -= PLT_ENT_COUNT, plt++) {
+ int i;
+
+ if (!c) {
+ /* Populate a new set of entries */
+ *plt = (struct plt_entries){
+ { [0 ... PLT_ENT_COUNT - 1] = PLT_ENT_LDR, },
+ { val, }
+ };
+ ++*count;
+ return (u32)plt->ldr;
+ }
+ for (i = 0; i < PLT_ENT_COUNT; i++) {
+ if (!plt->lit[i]) {
+ plt->lit[i] = val;
+ ++*count;
+ }
+ if (plt->lit[i] == val)
+ return (u32)&plt->ldr[i];
+ }
+ }
+ BUG();
+}
+
+static int duplicate_rel(Elf32_Addr base, const Elf32_Rel *rel, int num,
+ u32 mask)
+{
+ u32 *loc1, *loc2;
+ int i;
+
+ for (i = 0; i < num; i++) {
+ if (rel[i].r_info != rel[num].r_info)
+ continue;
+
+ /*
+ * Identical relocation types against identical symbols can
+ * still result in different PLT entries if the addend in the
+ * place is different. So resolve the target of the relocation
+ * to compare the values.
+ */
+ loc1 = (u32 *)(base + rel[i].r_offset);
+ loc2 = (u32 *)(base + rel[num].r_offset);
+ if (((*loc1 ^ *loc2) & mask) == 0)
+ return 1;
+ }
+ return 0;
+}
+
+/* Count how many PLT entries we may need */
+static unsigned int count_plts(Elf32_Addr base, const Elf32_Rel *rel, int num)
+{
+ unsigned int ret = 0;
+ int i;
+
+ /*
+ * Sure, this is order(n^2), but it's usually short, and not
+ * time critical
+ */
+ for (i = 0; i < num; i++)
+ switch (ELF32_R_TYPE(rel[i].r_info)) {
+ case R_ARM_CALL:
+ case R_ARM_PC24:
+ case R_ARM_JUMP24:
+ if (!duplicate_rel(base, rel, i,
+ __opcode_to_mem_arm(0x00ffffff)))
+ ret++;
+ break;
+#ifdef CONFIG_THUMB2_KERNEL
+ case R_ARM_THM_CALL:
+ case R_ARM_THM_JUMP24:
+ if (!duplicate_rel(base, rel, i,
+ __opcode_to_mem_thumb32(0x07ff2fff)))
+ ret++;
+#endif
+ }
+ return ret;
+}
+
+int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
+ char *secstrings, struct module *mod)
+{
+ unsigned long core_plts = 0, init_plts = 0;
+ Elf32_Shdr *s, *sechdrs_end = sechdrs + ehdr->e_shnum;
+
+ /*
+ * To store the PLTs, we expand the .text section for core module code
+ * and the .init.text section for initialization code.
+ */
+ for (s = sechdrs; s < sechdrs_end; ++s)
+ if (strcmp(".core.plt", secstrings + s->sh_name) == 0)
+ mod->arch.core_plt = s;
+ else if (strcmp(".init.plt", secstrings + s->sh_name) == 0)
+ mod->arch.init_plt = s;
+
+ if (!mod->arch.core_plt || !mod->arch.init_plt) {
+ pr_err("%s: sections missing\n", mod->name);
+ return -ENOEXEC;
+ }
+
+ for (s = sechdrs + 1; s < sechdrs_end; ++s) {
+ const Elf32_Rel *rels = (void *)ehdr + s->sh_offset;
+ int numrels = s->sh_size / sizeof(Elf32_Rel);
+ Elf32_Shdr *dstsec = sechdrs + s->sh_info;
+
+ if (s->sh_type != SHT_REL)
+ continue;
+
+ if (strstr(secstrings + s->sh_name, ".init"))
+ init_plts += count_plts(dstsec->sh_addr, rels, numrels);
+ else
+ core_plts += count_plts(dstsec->sh_addr, rels, numrels);
+ }
+
+ mod->arch.core_plt->sh_type = SHT_NOBITS;
+ mod->arch.core_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+ mod->arch.core_plt->sh_addralign = L1_CACHE_BYTES;
+ mod->arch.core_plt->sh_size = round_up(core_plts * PLT_ENT_SIZE,
+ sizeof(struct plt_entries));
+ mod->arch.core_plt_count = 0;
+
+ mod->arch.init_plt->sh_type = SHT_NOBITS;
+ mod->arch.init_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+ mod->arch.init_plt->sh_addralign = L1_CACHE_BYTES;
+ mod->arch.init_plt->sh_size = round_up(init_plts * PLT_ENT_SIZE,
+ sizeof(struct plt_entries));
+ mod->arch.init_plt_count = 0;
+ pr_debug("%s: core.plt=%x, init.plt=%x\n", __func__,
+ mod->arch.core_plt->sh_size, mod->arch.init_plt->sh_size);
+ return 0;
+}
diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c
index af791f4a6205..efdddcb97dd1 100644
--- a/arch/arm/kernel/module.c
+++ b/arch/arm/kernel/module.c
@@ -40,7 +40,12 @@
#ifdef CONFIG_MMU
void *module_alloc(unsigned long size)
{
- return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
+ void *p = __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
+ GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
+ __builtin_return_address(0));
+ if (!IS_ENABLED(CONFIG_ARM_MODULE_PLTS) || p)
+ return p;
+ return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
__builtin_return_address(0));
}
@@ -110,6 +115,20 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
offset -= 0x04000000;
offset += sym->st_value - loc;
+
+ /*
+ * Route through a PLT entry if 'offset' exceeds the
+ * supported range. Note that 'offset + loc + 8'
+ * contains the absolute jump target, i.e.,
+ * @sym + addend, corrected for the +8 PC bias.
+ */
+ if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS) &&
+ (offset <= (s32)0xfe000000 ||
+ offset >= (s32)0x02000000))
+ offset = get_module_plt(module, loc,
+ offset + loc + 8)
+ - loc - 8;
+
if (offset <= (s32)0xfe000000 ||
offset >= (s32)0x02000000) {
pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n",
@@ -203,6 +222,17 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
offset -= 0x02000000;
offset += sym->st_value - loc;
+ /*
+ * Route through a PLT entry if 'offset' exceeds the
+ * supported range.
+ */
+ if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS) &&
+ (offset <= (s32)0xff000000 ||
+ offset >= (s32)0x01000000))
+ offset = get_module_plt(module, loc,
+ offset + loc + 4)
+ - loc - 4;
+
if (offset <= (s32)0xff000000 ||
offset >= (s32)0x01000000) {
pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n",
diff --git a/arch/arm/kernel/module.lds b/arch/arm/kernel/module.lds
new file mode 100644
index 000000000000..3682fa107918
--- /dev/null
+++ b/arch/arm/kernel/module.lds
@@ -0,0 +1,4 @@
+SECTIONS {
+ .core.plt : { BYTE(0) }
+ .init.plt : { BYTE(0) }
+}
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 4a86a0133ac3..357f57ea83f4 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -11,12 +11,18 @@
*/
#define pr_fmt(fmt) "hw perfevents: " fmt
+#include <linux/bitmap.h>
+#include <linux/cpumask.h>
+#include <linux/export.h>
#include <linux/kernel.h>
+#include <linux/of.h>
#include <linux/platform_device.h>
-#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
#include <linux/irq.h>
#include <linux/irqdesc.h>
+#include <asm/cputype.h>
#include <asm/irq_regs.h>
#include <asm/pmu.h>
@@ -229,6 +235,10 @@ armpmu_add(struct perf_event *event, int flags)
int idx;
int err = 0;
+ /* An event following a process won't be stopped earlier */
+ if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
+ return -ENOENT;
+
perf_pmu_disable(event->pmu);
/* If we don't have a space for the counter then finish early. */
@@ -344,20 +354,12 @@ static void
armpmu_release_hardware(struct arm_pmu *armpmu)
{
armpmu->free_irq(armpmu);
- pm_runtime_put_sync(&armpmu->plat_device->dev);
}
static int
armpmu_reserve_hardware(struct arm_pmu *armpmu)
{
- int err;
- struct platform_device *pmu_device = armpmu->plat_device;
-
- if (!pmu_device)
- return -ENODEV;
-
- pm_runtime_get_sync(&pmu_device->dev);
- err = armpmu->request_irq(armpmu, armpmu_dispatch_irq);
+ int err = armpmu->request_irq(armpmu, armpmu_dispatch_irq);
if (err) {
armpmu_release_hardware(armpmu);
return err;
@@ -454,6 +456,17 @@ static int armpmu_event_init(struct perf_event *event)
int err = 0;
atomic_t *active_events = &armpmu->active_events;
+ /*
+ * Reject CPU-affine events for CPUs that are of a different class to
+ * that which this PMU handles. Process-following events (where
+ * event->cpu == -1) can be migrated between CPUs, and thus we have to
+ * reject them later (in armpmu_add) if they're scheduled on a
+ * different class of CPU.
+ */
+ if (event->cpu != -1 &&
+ !cpumask_test_cpu(event->cpu, &armpmu->supported_cpus))
+ return -ENOENT;
+
/* does not support taken branch sampling */
if (has_branch_stack(event))
return -EOPNOTSUPP;
@@ -489,6 +502,10 @@ static void armpmu_enable(struct pmu *pmu)
struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
+ /* For task-bound events we may be called on other CPUs */
+ if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
+ return;
+
if (enabled)
armpmu->start(armpmu);
}
@@ -496,34 +513,25 @@ static void armpmu_enable(struct pmu *pmu)
static void armpmu_disable(struct pmu *pmu)
{
struct arm_pmu *armpmu = to_arm_pmu(pmu);
- armpmu->stop(armpmu);
-}
-
-#ifdef CONFIG_PM
-static int armpmu_runtime_resume(struct device *dev)
-{
- struct arm_pmu_platdata *plat = dev_get_platdata(dev);
- if (plat && plat->runtime_resume)
- return plat->runtime_resume(dev);
+ /* For task-bound events we may be called on other CPUs */
+ if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
+ return;
- return 0;
+ armpmu->stop(armpmu);
}
-static int armpmu_runtime_suspend(struct device *dev)
+/*
+ * In heterogeneous systems, events are specific to a particular
+ * microarchitecture, and aren't suitable for another. Thus, only match CPUs of
+ * the same microarchitecture.
+ */
+static int armpmu_filter_match(struct perf_event *event)
{
- struct arm_pmu_platdata *plat = dev_get_platdata(dev);
-
- if (plat && plat->runtime_suspend)
- return plat->runtime_suspend(dev);
-
- return 0;
+ struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+ unsigned int cpu = smp_processor_id();
+ return cpumask_test_cpu(cpu, &armpmu->supported_cpus);
}
-#endif
-
-const struct dev_pm_ops armpmu_dev_pm_ops = {
- SET_RUNTIME_PM_OPS(armpmu_runtime_suspend, armpmu_runtime_resume, NULL)
-};
static void armpmu_init(struct arm_pmu *armpmu)
{
@@ -539,15 +547,349 @@ static void armpmu_init(struct arm_pmu *armpmu)
.start = armpmu_start,
.stop = armpmu_stop,
.read = armpmu_read,
+ .filter_match = armpmu_filter_match,
};
}
int armpmu_register(struct arm_pmu *armpmu, int type)
{
armpmu_init(armpmu);
- pm_runtime_enable(&armpmu->plat_device->dev);
pr_info("enabled with %s PMU driver, %d counters available\n",
armpmu->name, armpmu->num_events);
return perf_pmu_register(&armpmu->pmu, armpmu->name, type);
}
+/* Set at runtime when we know what CPU type we are. */
+static struct arm_pmu *__oprofile_cpu_pmu;
+
+/*
+ * Despite the names, these two functions are CPU-specific and are used
+ * by the OProfile/perf code.
+ */
+const char *perf_pmu_name(void)
+{
+ if (!__oprofile_cpu_pmu)
+ return NULL;
+
+ return __oprofile_cpu_pmu->name;
+}
+EXPORT_SYMBOL_GPL(perf_pmu_name);
+
+int perf_num_counters(void)
+{
+ int max_events = 0;
+
+ if (__oprofile_cpu_pmu != NULL)
+ max_events = __oprofile_cpu_pmu->num_events;
+
+ return max_events;
+}
+EXPORT_SYMBOL_GPL(perf_num_counters);
+
+static void cpu_pmu_enable_percpu_irq(void *data)
+{
+ int irq = *(int *)data;
+
+ enable_percpu_irq(irq, IRQ_TYPE_NONE);
+}
+
+static void cpu_pmu_disable_percpu_irq(void *data)
+{
+ int irq = *(int *)data;
+
+ disable_percpu_irq(irq);
+}
+
+static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu)
+{
+ int i, irq, irqs;
+ struct platform_device *pmu_device = cpu_pmu->plat_device;
+ struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events;
+
+ irqs = min(pmu_device->num_resources, num_possible_cpus());
+
+ irq = platform_get_irq(pmu_device, 0);
+ if (irq >= 0 && irq_is_percpu(irq)) {
+ on_each_cpu(cpu_pmu_disable_percpu_irq, &irq, 1);
+ free_percpu_irq(irq, &hw_events->percpu_pmu);
+ } else {
+ for (i = 0; i < irqs; ++i) {
+ int cpu = i;
+
+ if (cpu_pmu->irq_affinity)
+ cpu = cpu_pmu->irq_affinity[i];
+
+ if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs))
+ continue;
+ irq = platform_get_irq(pmu_device, i);
+ if (irq >= 0)
+ free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu));
+ }
+ }
+}
+
+static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
+{
+ int i, err, irq, irqs;
+ struct platform_device *pmu_device = cpu_pmu->plat_device;
+ struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events;
+
+ if (!pmu_device)
+ return -ENODEV;
+
+ irqs = min(pmu_device->num_resources, num_possible_cpus());
+ if (irqs < 1) {
+ pr_warn_once("perf/ARM: No irqs for PMU defined, sampling events not supported\n");
+ return 0;
+ }
+
+ irq = platform_get_irq(pmu_device, 0);
+ if (irq >= 0 && irq_is_percpu(irq)) {
+ err = request_percpu_irq(irq, handler, "arm-pmu",
+ &hw_events->percpu_pmu);
+ if (err) {
+ pr_err("unable to request IRQ%d for ARM PMU counters\n",
+ irq);
+ return err;
+ }
+ on_each_cpu(cpu_pmu_enable_percpu_irq, &irq, 1);
+ } else {
+ for (i = 0; i < irqs; ++i) {
+ int cpu = i;
+
+ err = 0;
+ irq = platform_get_irq(pmu_device, i);
+ if (irq < 0)
+ continue;
+
+ if (cpu_pmu->irq_affinity)
+ cpu = cpu_pmu->irq_affinity[i];
+
+ /*
+ * If we have a single PMU interrupt that we can't shift,
+ * assume that we're running on a uniprocessor machine and
+ * continue. Otherwise, continue without this interrupt.
+ */
+ if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) {
+ pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n",
+ irq, cpu);
+ continue;
+ }
+
+ err = request_irq(irq, handler,
+ IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu",
+ per_cpu_ptr(&hw_events->percpu_pmu, cpu));
+ if (err) {
+ pr_err("unable to request IRQ%d for ARM PMU counters\n",
+ irq);
+ return err;
+ }
+
+ cpumask_set_cpu(cpu, &cpu_pmu->active_irqs);
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * PMU hardware loses all context when a CPU goes offline.
+ * When a CPU is hotplugged back in, since some hardware registers are
+ * UNKNOWN at reset, the PMU must be explicitly reset to avoid reading
+ * junk values out of them.
+ */
+static int cpu_pmu_notify(struct notifier_block *b, unsigned long action,
+ void *hcpu)
+{
+ int cpu = (unsigned long)hcpu;
+ struct arm_pmu *pmu = container_of(b, struct arm_pmu, hotplug_nb);
+
+ if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING)
+ return NOTIFY_DONE;
+
+ if (!cpumask_test_cpu(cpu, &pmu->supported_cpus))
+ return NOTIFY_DONE;
+
+ if (pmu->reset)
+ pmu->reset(pmu);
+ else
+ return NOTIFY_DONE;
+
+ return NOTIFY_OK;
+}
+
+static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ int err;
+ int cpu;
+ struct pmu_hw_events __percpu *cpu_hw_events;
+
+ cpu_hw_events = alloc_percpu(struct pmu_hw_events);
+ if (!cpu_hw_events)
+ return -ENOMEM;
+
+ cpu_pmu->hotplug_nb.notifier_call = cpu_pmu_notify;
+ err = register_cpu_notifier(&cpu_pmu->hotplug_nb);
+ if (err)
+ goto out_hw_events;
+
+ for_each_possible_cpu(cpu) {
+ struct pmu_hw_events *events = per_cpu_ptr(cpu_hw_events, cpu);
+ raw_spin_lock_init(&events->pmu_lock);
+ events->percpu_pmu = cpu_pmu;
+ }
+
+ cpu_pmu->hw_events = cpu_hw_events;
+ cpu_pmu->request_irq = cpu_pmu_request_irq;
+ cpu_pmu->free_irq = cpu_pmu_free_irq;
+
+ /* Ensure the PMU has sane values out of reset. */
+ if (cpu_pmu->reset)
+ on_each_cpu_mask(&cpu_pmu->supported_cpus, cpu_pmu->reset,
+ cpu_pmu, 1);
+
+ /* If no interrupts available, set the corresponding capability flag */
+ if (!platform_get_irq(cpu_pmu->plat_device, 0))
+ cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+
+ return 0;
+
+out_hw_events:
+ free_percpu(cpu_hw_events);
+ return err;
+}
+
+static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu)
+{
+ unregister_cpu_notifier(&cpu_pmu->hotplug_nb);
+ free_percpu(cpu_pmu->hw_events);
+}
+
+/*
+ * CPU PMU identification and probing.
+ */
+static int probe_current_pmu(struct arm_pmu *pmu,
+ const struct pmu_probe_info *info)
+{
+ int cpu = get_cpu();
+ unsigned int cpuid = read_cpuid_id();
+ int ret = -ENODEV;
+
+ pr_info("probing PMU on CPU %d\n", cpu);
+
+ for (; info->init != NULL; info++) {
+ if ((cpuid & info->mask) != info->cpuid)
+ continue;
+ ret = info->init(pmu);
+ break;
+ }
+
+ put_cpu();
+ return ret;
+}
+
+static int of_pmu_irq_cfg(struct arm_pmu *pmu)
+{
+ int i, irq, *irqs;
+ struct platform_device *pdev = pmu->plat_device;
+
+ /* Don't bother with PPIs; they're already affine */
+ irq = platform_get_irq(pdev, 0);
+ if (irq >= 0 && irq_is_percpu(irq))
+ return 0;
+
+ irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
+ if (!irqs)
+ return -ENOMEM;
+
+ for (i = 0; i < pdev->num_resources; ++i) {
+ struct device_node *dn;
+ int cpu;
+
+ dn = of_parse_phandle(pdev->dev.of_node, "interrupt-affinity",
+ i);
+ if (!dn) {
+ pr_warn("Failed to parse %s/interrupt-affinity[%d]\n",
+ of_node_full_name(pdev->dev.of_node), i);
+ break;
+ }
+
+ for_each_possible_cpu(cpu)
+ if (arch_find_n_match_cpu_physical_id(dn, cpu, NULL))
+ break;
+
+ of_node_put(dn);
+ if (cpu >= nr_cpu_ids) {
+ pr_warn("Failed to find logical CPU for %s\n",
+ dn->name);
+ break;
+ }
+
+ irqs[i] = cpu;
+ cpumask_set_cpu(cpu, &pmu->supported_cpus);
+ }
+
+ if (i == pdev->num_resources) {
+ pmu->irq_affinity = irqs;
+ } else {
+ kfree(irqs);
+ cpumask_setall(&pmu->supported_cpus);
+ }
+
+ return 0;
+}
+
+int arm_pmu_device_probe(struct platform_device *pdev,
+ const struct of_device_id *of_table,
+ const struct pmu_probe_info *probe_table)
+{
+ const struct of_device_id *of_id;
+ const int (*init_fn)(struct arm_pmu *);
+ struct device_node *node = pdev->dev.of_node;
+ struct arm_pmu *pmu;
+ int ret = -ENODEV;
+
+ pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL);
+ if (!pmu) {
+ pr_info("failed to allocate PMU device!\n");
+ return -ENOMEM;
+ }
+
+ if (!__oprofile_cpu_pmu)
+ __oprofile_cpu_pmu = pmu;
+
+ pmu->plat_device = pdev;
+
+ if (node && (of_id = of_match_node(of_table, pdev->dev.of_node))) {
+ init_fn = of_id->data;
+
+ ret = of_pmu_irq_cfg(pmu);
+ if (!ret)
+ ret = init_fn(pmu);
+ } else {
+ ret = probe_current_pmu(pmu, probe_table);
+ cpumask_setall(&pmu->supported_cpus);
+ }
+
+ if (ret) {
+ pr_info("failed to probe PMU!\n");
+ goto out_free;
+ }
+
+ ret = cpu_pmu_init(pmu);
+ if (ret)
+ goto out_free;
+
+ ret = armpmu_register(pmu, -1);
+ if (ret)
+ goto out_destroy;
+
+ return 0;
+
+out_destroy:
+ cpu_pmu_destroy(pmu);
+out_free:
+ pr_info("failed to register PMU devices!\n");
+ kfree(pmu);
+ return ret;
+}
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
deleted file mode 100644
index 3b8c2833c537..000000000000
--- a/arch/arm/kernel/perf_event_cpu.c
+++ /dev/null
@@ -1,421 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright (C) 2012 ARM Limited
- *
- * Author: Will Deacon <will.deacon@arm.com>
- */
-#define pr_fmt(fmt) "CPU PMU: " fmt
-
-#include <linux/bitmap.h>
-#include <linux/export.h>
-#include <linux/kernel.h>
-#include <linux/of.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/irq.h>
-#include <linux/irqdesc.h>
-
-#include <asm/cputype.h>
-#include <asm/irq_regs.h>
-#include <asm/pmu.h>
-
-/* Set at runtime when we know what CPU type we are. */
-static struct arm_pmu *cpu_pmu;
-
-/*
- * Despite the names, these two functions are CPU-specific and are used
- * by the OProfile/perf code.
- */
-const char *perf_pmu_name(void)
-{
- if (!cpu_pmu)
- return NULL;
-
- return cpu_pmu->name;
-}
-EXPORT_SYMBOL_GPL(perf_pmu_name);
-
-int perf_num_counters(void)
-{
- int max_events = 0;
-
- if (cpu_pmu != NULL)
- max_events = cpu_pmu->num_events;
-
- return max_events;
-}
-EXPORT_SYMBOL_GPL(perf_num_counters);
-
-/* Include the PMU-specific implementations. */
-#include "perf_event_xscale.c"
-#include "perf_event_v6.c"
-#include "perf_event_v7.c"
-
-static void cpu_pmu_enable_percpu_irq(void *data)
-{
- int irq = *(int *)data;
-
- enable_percpu_irq(irq, IRQ_TYPE_NONE);
-}
-
-static void cpu_pmu_disable_percpu_irq(void *data)
-{
- int irq = *(int *)data;
-
- disable_percpu_irq(irq);
-}
-
-static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu)
-{
- int i, irq, irqs;
- struct platform_device *pmu_device = cpu_pmu->plat_device;
- struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events;
-
- irqs = min(pmu_device->num_resources, num_possible_cpus());
-
- irq = platform_get_irq(pmu_device, 0);
- if (irq >= 0 && irq_is_percpu(irq)) {
- on_each_cpu(cpu_pmu_disable_percpu_irq, &irq, 1);
- free_percpu_irq(irq, &hw_events->percpu_pmu);
- } else {
- for (i = 0; i < irqs; ++i) {
- int cpu = i;
-
- if (cpu_pmu->irq_affinity)
- cpu = cpu_pmu->irq_affinity[i];
-
- if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs))
- continue;
- irq = platform_get_irq(pmu_device, i);
- if (irq >= 0)
- free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu));
- }
- }
-}
-
-static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
-{
- int i, err, irq, irqs;
- struct platform_device *pmu_device = cpu_pmu->plat_device;
- struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events;
-
- if (!pmu_device)
- return -ENODEV;
-
- irqs = min(pmu_device->num_resources, num_possible_cpus());
- if (irqs < 1) {
- pr_warn_once("perf/ARM: No irqs for PMU defined, sampling events not supported\n");
- return 0;
- }
-
- irq = platform_get_irq(pmu_device, 0);
- if (irq >= 0 && irq_is_percpu(irq)) {
- err = request_percpu_irq(irq, handler, "arm-pmu",
- &hw_events->percpu_pmu);
- if (err) {
- pr_err("unable to request IRQ%d for ARM PMU counters\n",
- irq);
- return err;
- }
- on_each_cpu(cpu_pmu_enable_percpu_irq, &irq, 1);
- } else {
- for (i = 0; i < irqs; ++i) {
- int cpu = i;
-
- err = 0;
- irq = platform_get_irq(pmu_device, i);
- if (irq < 0)
- continue;
-
- if (cpu_pmu->irq_affinity)
- cpu = cpu_pmu->irq_affinity[i];
-
- /*
- * If we have a single PMU interrupt that we can't shift,
- * assume that we're running on a uniprocessor machine and
- * continue. Otherwise, continue without this interrupt.
- */
- if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) {
- pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n",
- irq, cpu);
- continue;
- }
-
- err = request_irq(irq, handler,
- IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu",
- per_cpu_ptr(&hw_events->percpu_pmu, cpu));
- if (err) {
- pr_err("unable to request IRQ%d for ARM PMU counters\n",
- irq);
- return err;
- }
-
- cpumask_set_cpu(cpu, &cpu_pmu->active_irqs);
- }
- }
-
- return 0;
-}
-
-/*
- * PMU hardware loses all context when a CPU goes offline.
- * When a CPU is hotplugged back in, since some hardware registers are
- * UNKNOWN at reset, the PMU must be explicitly reset to avoid reading
- * junk values out of them.
- */
-static int cpu_pmu_notify(struct notifier_block *b, unsigned long action,
- void *hcpu)
-{
- struct arm_pmu *pmu = container_of(b, struct arm_pmu, hotplug_nb);
-
- if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING)
- return NOTIFY_DONE;
-
- if (pmu->reset)
- pmu->reset(pmu);
- else
- return NOTIFY_DONE;
-
- return NOTIFY_OK;
-}
-
-static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
-{
- int err;
- int cpu;
- struct pmu_hw_events __percpu *cpu_hw_events;
-
- cpu_hw_events = alloc_percpu(struct pmu_hw_events);
- if (!cpu_hw_events)
- return -ENOMEM;
-
- cpu_pmu->hotplug_nb.notifier_call = cpu_pmu_notify;
- err = register_cpu_notifier(&cpu_pmu->hotplug_nb);
- if (err)
- goto out_hw_events;
-
- for_each_possible_cpu(cpu) {
- struct pmu_hw_events *events = per_cpu_ptr(cpu_hw_events, cpu);
- raw_spin_lock_init(&events->pmu_lock);
- events->percpu_pmu = cpu_pmu;
- }
-
- cpu_pmu->hw_events = cpu_hw_events;
- cpu_pmu->request_irq = cpu_pmu_request_irq;
- cpu_pmu->free_irq = cpu_pmu_free_irq;
-
- /* Ensure the PMU has sane values out of reset. */
- if (cpu_pmu->reset)
- on_each_cpu(cpu_pmu->reset, cpu_pmu, 1);
-
- /* If no interrupts available, set the corresponding capability flag */
- if (!platform_get_irq(cpu_pmu->plat_device, 0))
- cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
-
- return 0;
-
-out_hw_events:
- free_percpu(cpu_hw_events);
- return err;
-}
-
-static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu)
-{
- unregister_cpu_notifier(&cpu_pmu->hotplug_nb);
- free_percpu(cpu_pmu->hw_events);
-}
-
-/*
- * PMU platform driver and devicetree bindings.
- */
-static const struct of_device_id cpu_pmu_of_device_ids[] = {
- {.compatible = "arm,cortex-a17-pmu", .data = armv7_a17_pmu_init},
- {.compatible = "arm,cortex-a15-pmu", .data = armv7_a15_pmu_init},
- {.compatible = "arm,cortex-a12-pmu", .data = armv7_a12_pmu_init},
- {.compatible = "arm,cortex-a9-pmu", .data = armv7_a9_pmu_init},
- {.compatible = "arm,cortex-a8-pmu", .data = armv7_a8_pmu_init},
- {.compatible = "arm,cortex-a7-pmu", .data = armv7_a7_pmu_init},
- {.compatible = "arm,cortex-a5-pmu", .data = armv7_a5_pmu_init},
- {.compatible = "arm,arm11mpcore-pmu", .data = armv6mpcore_pmu_init},
- {.compatible = "arm,arm1176-pmu", .data = armv6_1176_pmu_init},
- {.compatible = "arm,arm1136-pmu", .data = armv6_1136_pmu_init},
- {.compatible = "qcom,krait-pmu", .data = krait_pmu_init},
- {.compatible = "qcom,scorpion-pmu", .data = scorpion_pmu_init},
- {.compatible = "qcom,scorpion-mp-pmu", .data = scorpion_mp_pmu_init},
- {},
-};
-
-static struct platform_device_id cpu_pmu_plat_device_ids[] = {
- {.name = "arm-pmu"},
- {.name = "armv6-pmu"},
- {.name = "armv7-pmu"},
- {.name = "xscale-pmu"},
- {},
-};
-
-static const struct pmu_probe_info pmu_probe_table[] = {
- ARM_PMU_PROBE(ARM_CPU_PART_ARM1136, armv6_1136_pmu_init),
- ARM_PMU_PROBE(ARM_CPU_PART_ARM1156, armv6_1156_pmu_init),
- ARM_PMU_PROBE(ARM_CPU_PART_ARM1176, armv6_1176_pmu_init),
- ARM_PMU_PROBE(ARM_CPU_PART_ARM11MPCORE, armv6mpcore_pmu_init),
- ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A8, armv7_a8_pmu_init),
- ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A9, armv7_a9_pmu_init),
- XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V1, xscale1pmu_init),
- XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V2, xscale2pmu_init),
- { /* sentinel value */ }
-};
-
-/*
- * CPU PMU identification and probing.
- */
-static int probe_current_pmu(struct arm_pmu *pmu)
-{
- int cpu = get_cpu();
- unsigned int cpuid = read_cpuid_id();
- int ret = -ENODEV;
- const struct pmu_probe_info *info;
-
- pr_info("probing PMU on CPU %d\n", cpu);
-
- for (info = pmu_probe_table; info->init != NULL; info++) {
- if ((cpuid & info->mask) != info->cpuid)
- continue;
- ret = info->init(pmu);
- break;
- }
-
- put_cpu();
- return ret;
-}
-
-static int of_pmu_irq_cfg(struct platform_device *pdev)
-{
- int i, irq;
- int *irqs;
-
- /* Don't bother with PPIs; they're already affine */
- irq = platform_get_irq(pdev, 0);
- if (irq >= 0 && irq_is_percpu(irq))
- return 0;
-
- irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
- if (!irqs)
- return -ENOMEM;
-
- for (i = 0; i < pdev->num_resources; ++i) {
- struct device_node *dn;
- int cpu;
-
- dn = of_parse_phandle(pdev->dev.of_node, "interrupt-affinity",
- i);
- if (!dn) {
- pr_warn("Failed to parse %s/interrupt-affinity[%d]\n",
- of_node_full_name(pdev->dev.of_node), i);
- break;
- }
-
- for_each_possible_cpu(cpu)
- if (arch_find_n_match_cpu_physical_id(dn, cpu, NULL))
- break;
-
- of_node_put(dn);
- if (cpu >= nr_cpu_ids) {
- pr_warn("Failed to find logical CPU for %s\n",
- dn->name);
- break;
- }
-
- irqs[i] = cpu;
- }
-
- if (i == pdev->num_resources)
- cpu_pmu->irq_affinity = irqs;
- else
- kfree(irqs);
-
- return 0;
-}
-
-static int cpu_pmu_device_probe(struct platform_device *pdev)
-{
- const struct of_device_id *of_id;
- const int (*init_fn)(struct arm_pmu *);
- struct device_node *node = pdev->dev.of_node;
- struct arm_pmu *pmu;
- int ret = -ENODEV;
-
- if (cpu_pmu) {
- pr_info("attempt to register multiple PMU devices!\n");
- return -ENOSPC;
- }
-
- pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL);
- if (!pmu) {
- pr_info("failed to allocate PMU device!\n");
- return -ENOMEM;
- }
-
- cpu_pmu = pmu;
- cpu_pmu->plat_device = pdev;
-
- if (node && (of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node))) {
- init_fn = of_id->data;
-
- ret = of_pmu_irq_cfg(pdev);
- if (!ret)
- ret = init_fn(pmu);
- } else {
- ret = probe_current_pmu(pmu);
- }
-
- if (ret) {
- pr_info("failed to probe PMU!\n");
- goto out_free;
- }
-
- ret = cpu_pmu_init(cpu_pmu);
- if (ret)
- goto out_free;
-
- ret = armpmu_register(cpu_pmu, -1);
- if (ret)
- goto out_destroy;
-
- return 0;
-
-out_destroy:
- cpu_pmu_destroy(cpu_pmu);
-out_free:
- pr_info("failed to register PMU devices!\n");
- kfree(pmu);
- return ret;
-}
-
-static struct platform_driver cpu_pmu_driver = {
- .driver = {
- .name = "arm-pmu",
- .pm = &armpmu_dev_pm_ops,
- .of_match_table = cpu_pmu_of_device_ids,
- },
- .probe = cpu_pmu_device_probe,
- .id_table = cpu_pmu_plat_device_ids,
-};
-
-static int __init register_pmu_driver(void)
-{
- return platform_driver_register(&cpu_pmu_driver);
-}
-device_initcall(register_pmu_driver);
diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c
index f2ffd5c542ed..09f83e414a72 100644
--- a/arch/arm/kernel/perf_event_v6.c
+++ b/arch/arm/kernel/perf_event_v6.c
@@ -31,6 +31,14 @@
*/
#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K)
+
+#include <asm/cputype.h>
+#include <asm/irq_regs.h>
+#include <asm/pmu.h>
+
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
enum armv6_perf_types {
ARMV6_PERFCTR_ICACHE_MISS = 0x0,
ARMV6_PERFCTR_IBUF_STALL = 0x1,
@@ -543,24 +551,39 @@ static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu)
return 0;
}
-#else
-static int armv6_1136_pmu_init(struct arm_pmu *cpu_pmu)
-{
- return -ENODEV;
-}
-static int armv6_1156_pmu_init(struct arm_pmu *cpu_pmu)
-{
- return -ENODEV;
-}
+static struct of_device_id armv6_pmu_of_device_ids[] = {
+ {.compatible = "arm,arm11mpcore-pmu", .data = armv6mpcore_pmu_init},
+ {.compatible = "arm,arm1176-pmu", .data = armv6_1176_pmu_init},
+ {.compatible = "arm,arm1136-pmu", .data = armv6_1136_pmu_init},
+ { /* sentinel value */ }
+};
-static int armv6_1176_pmu_init(struct arm_pmu *cpu_pmu)
+static const struct pmu_probe_info armv6_pmu_probe_table[] = {
+ ARM_PMU_PROBE(ARM_CPU_PART_ARM1136, armv6_1136_pmu_init),
+ ARM_PMU_PROBE(ARM_CPU_PART_ARM1156, armv6_1156_pmu_init),
+ ARM_PMU_PROBE(ARM_CPU_PART_ARM1176, armv6_1176_pmu_init),
+ ARM_PMU_PROBE(ARM_CPU_PART_ARM11MPCORE, armv6mpcore_pmu_init),
+ { /* sentinel value */ }
+};
+
+static int armv6_pmu_device_probe(struct platform_device *pdev)
{
- return -ENODEV;
+ return arm_pmu_device_probe(pdev, armv6_pmu_of_device_ids,
+ armv6_pmu_probe_table);
}
-static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu)
+static struct platform_driver armv6_pmu_driver = {
+ .driver = {
+ .name = "armv6-pmu",
+ .of_match_table = armv6_pmu_of_device_ids,
+ },
+ .probe = armv6_pmu_device_probe,
+};
+
+static int __init register_armv6_pmu_driver(void)
{
- return -ENODEV;
+ return platform_driver_register(&armv6_pmu_driver);
}
+device_initcall(register_armv6_pmu_driver);
#endif /* CONFIG_CPU_V6 || CONFIG_CPU_V6K */
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index f4207a4dcb01..f9b37f876e20 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -19,9 +19,15 @@
#ifdef CONFIG_CPU_V7
#include <asm/cp15.h>
+#include <asm/cputype.h>
+#include <asm/irq_regs.h>
+#include <asm/pmu.h>
#include <asm/vfp.h>
#include "../vfp/vfpinstr.h"
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
/*
* Common ARMv7 event types
*
@@ -1056,15 +1062,22 @@ static void armv7pmu_init(struct arm_pmu *cpu_pmu)
cpu_pmu->max_period = (1LLU << 32) - 1;
};
-static u32 armv7_read_num_pmnc_events(void)
+static void armv7_read_num_pmnc_events(void *info)
{
- u32 nb_cnt;
+ int *nb_cnt = info;
/* Read the nb of CNTx counters supported from PMNC */
- nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK;
+ *nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK;
- /* Add the CPU cycles counter and return */
- return nb_cnt + 1;
+ /* Add the CPU cycles counter */
+ *nb_cnt += 1;
+}
+
+static int armv7_probe_num_events(struct arm_pmu *arm_pmu)
+{
+ return smp_call_function_any(&arm_pmu->supported_cpus,
+ armv7_read_num_pmnc_events,
+ &arm_pmu->num_events, 1);
}
static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu)
@@ -1072,8 +1085,7 @@ static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu)
armv7pmu_init(cpu_pmu);
cpu_pmu->name = "armv7_cortex_a8";
cpu_pmu->map_event = armv7_a8_map_event;
- cpu_pmu->num_events = armv7_read_num_pmnc_events();
- return 0;
+ return armv7_probe_num_events(cpu_pmu);
}
static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu)
@@ -1081,8 +1093,7 @@ static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu)
armv7pmu_init(cpu_pmu);
cpu_pmu->name = "armv7_cortex_a9";
cpu_pmu->map_event = armv7_a9_map_event;
- cpu_pmu->num_events = armv7_read_num_pmnc_events();
- return 0;
+ return armv7_probe_num_events(cpu_pmu);
}
static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu)
@@ -1090,8 +1101,7 @@ static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu)
armv7pmu_init(cpu_pmu);
cpu_pmu->name = "armv7_cortex_a5";
cpu_pmu->map_event = armv7_a5_map_event;
- cpu_pmu->num_events = armv7_read_num_pmnc_events();
- return 0;
+ return armv7_probe_num_events(cpu_pmu);
}
static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu)
@@ -1099,9 +1109,8 @@ static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu)
armv7pmu_init(cpu_pmu);
cpu_pmu->name = "armv7_cortex_a15";
cpu_pmu->map_event = armv7_a15_map_event;
- cpu_pmu->num_events = armv7_read_num_pmnc_events();
cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
- return 0;
+ return armv7_probe_num_events(cpu_pmu);
}
static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu)
@@ -1109,9 +1118,8 @@ static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu)
armv7pmu_init(cpu_pmu);
cpu_pmu->name = "armv7_cortex_a7";
cpu_pmu->map_event = armv7_a7_map_event;
- cpu_pmu->num_events = armv7_read_num_pmnc_events();
cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
- return 0;
+ return armv7_probe_num_events(cpu_pmu);
}
static int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu)
@@ -1119,16 +1127,15 @@ static int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu)
armv7pmu_init(cpu_pmu);
cpu_pmu->name = "armv7_cortex_a12";
cpu_pmu->map_event = armv7_a12_map_event;
- cpu_pmu->num_events = armv7_read_num_pmnc_events();
cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
- return 0;
+ return armv7_probe_num_events(cpu_pmu);
}
static int armv7_a17_pmu_init(struct arm_pmu *cpu_pmu)
{
- armv7_a12_pmu_init(cpu_pmu);
+ int ret = armv7_a12_pmu_init(cpu_pmu);
cpu_pmu->name = "armv7_cortex_a17";
- return 0;
+ return ret;
}
/*
@@ -1508,14 +1515,13 @@ static int krait_pmu_init(struct arm_pmu *cpu_pmu)
cpu_pmu->map_event = krait_map_event_no_branch;
else
cpu_pmu->map_event = krait_map_event;
- cpu_pmu->num_events = armv7_read_num_pmnc_events();
cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
cpu_pmu->reset = krait_pmu_reset;
cpu_pmu->enable = krait_pmu_enable_event;
cpu_pmu->disable = krait_pmu_disable_event;
cpu_pmu->get_event_idx = krait_pmu_get_event_idx;
cpu_pmu->clear_event_idx = krait_pmu_clear_event_idx;
- return 0;
+ return armv7_probe_num_events(cpu_pmu);
}
/*
@@ -1833,13 +1839,12 @@ static int scorpion_pmu_init(struct arm_pmu *cpu_pmu)
armv7pmu_init(cpu_pmu);
cpu_pmu->name = "armv7_scorpion";
cpu_pmu->map_event = scorpion_map_event;
- cpu_pmu->num_events = armv7_read_num_pmnc_events();
cpu_pmu->reset = scorpion_pmu_reset;
cpu_pmu->enable = scorpion_pmu_enable_event;
cpu_pmu->disable = scorpion_pmu_disable_event;
cpu_pmu->get_event_idx = scorpion_pmu_get_event_idx;
cpu_pmu->clear_event_idx = scorpion_pmu_clear_event_idx;
- return 0;
+ return armv7_probe_num_events(cpu_pmu);
}
static int scorpion_mp_pmu_init(struct arm_pmu *cpu_pmu)
@@ -1847,62 +1852,52 @@ static int scorpion_mp_pmu_init(struct arm_pmu *cpu_pmu)
armv7pmu_init(cpu_pmu);
cpu_pmu->name = "armv7_scorpion_mp";
cpu_pmu->map_event = scorpion_map_event;
- cpu_pmu->num_events = armv7_read_num_pmnc_events();
cpu_pmu->reset = scorpion_pmu_reset;
cpu_pmu->enable = scorpion_pmu_enable_event;
cpu_pmu->disable = scorpion_pmu_disable_event;
cpu_pmu->get_event_idx = scorpion_pmu_get_event_idx;
cpu_pmu->clear_event_idx = scorpion_pmu_clear_event_idx;
- return 0;
-}
-#else
-static inline int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu)
-{
- return -ENODEV;
-}
-
-static inline int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu)
-{
- return -ENODEV;
-}
-
-static inline int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu)
-{
- return -ENODEV;
-}
-
-static inline int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu)
-{
- return -ENODEV;
-}
-
-static inline int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu)
-{
- return -ENODEV;
-}
+ return armv7_probe_num_events(cpu_pmu);
+}
+
+static const struct of_device_id armv7_pmu_of_device_ids[] = {
+ {.compatible = "arm,cortex-a17-pmu", .data = armv7_a17_pmu_init},
+ {.compatible = "arm,cortex-a15-pmu", .data = armv7_a15_pmu_init},
+ {.compatible = "arm,cortex-a12-pmu", .data = armv7_a12_pmu_init},
+ {.compatible = "arm,cortex-a9-pmu", .data = armv7_a9_pmu_init},
+ {.compatible = "arm,cortex-a8-pmu", .data = armv7_a8_pmu_init},
+ {.compatible = "arm,cortex-a7-pmu", .data = armv7_a7_pmu_init},
+ {.compatible = "arm,cortex-a5-pmu", .data = armv7_a5_pmu_init},
+ {.compatible = "qcom,krait-pmu", .data = krait_pmu_init},
+ {.compatible = "qcom,scorpion-pmu", .data = scorpion_pmu_init},
+ {.compatible = "qcom,scorpion-mp-pmu", .data = scorpion_mp_pmu_init},
+ {},
+};
-static inline int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu)
-{
- return -ENODEV;
-}
+static const struct pmu_probe_info armv7_pmu_probe_table[] = {
+ ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A8, armv7_a8_pmu_init),
+ ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A9, armv7_a9_pmu_init),
+ { /* sentinel value */ }
+};
-static inline int armv7_a17_pmu_init(struct arm_pmu *cpu_pmu)
-{
- return -ENODEV;
-}
-static inline int krait_pmu_init(struct arm_pmu *cpu_pmu)
+static int armv7_pmu_device_probe(struct platform_device *pdev)
{
- return -ENODEV;
+ return arm_pmu_device_probe(pdev, armv7_pmu_of_device_ids,
+ armv7_pmu_probe_table);
}
-static inline int scorpion_pmu_init(struct arm_pmu *cpu_pmu)
-{
- return -ENODEV;
-}
+static struct platform_driver armv7_pmu_driver = {
+ .driver = {
+ .name = "armv7-pmu",
+ .of_match_table = armv7_pmu_of_device_ids,
+ },
+ .probe = armv7_pmu_device_probe,
+};
-static inline int scorpion_mp_pmu_init(struct arm_pmu *cpu_pmu)
+static int __init register_armv7_pmu_driver(void)
{
- return -ENODEV;
+ return platform_driver_register(&armv7_pmu_driver);
}
+device_initcall(register_armv7_pmu_driver);
#endif /* CONFIG_CPU_V7 */
diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c
index 8af9f1f82c68..304d056d5b25 100644
--- a/arch/arm/kernel/perf_event_xscale.c
+++ b/arch/arm/kernel/perf_event_xscale.c
@@ -13,6 +13,14 @@
*/
#ifdef CONFIG_CPU_XSCALE
+
+#include <asm/cputype.h>
+#include <asm/irq_regs.h>
+#include <asm/pmu.h>
+
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
enum xscale_perf_types {
XSCALE_PERFCTR_ICACHE_MISS = 0x00,
XSCALE_PERFCTR_ICACHE_NO_DELIVER = 0x01,
@@ -740,14 +748,28 @@ static int xscale2pmu_init(struct arm_pmu *cpu_pmu)
return 0;
}
-#else
-static inline int xscale1pmu_init(struct arm_pmu *cpu_pmu)
+
+static const struct pmu_probe_info xscale_pmu_probe_table[] = {
+ XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V1, xscale1pmu_init),
+ XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V2, xscale2pmu_init),
+ { /* sentinel value */ }
+};
+
+static int xscale_pmu_device_probe(struct platform_device *pdev)
{
- return -ENODEV;
+ return arm_pmu_device_probe(pdev, NULL, xscale_pmu_probe_table);
}
-static inline int xscale2pmu_init(struct arm_pmu *cpu_pmu)
+static struct platform_driver xscale_pmu_driver = {
+ .driver = {
+ .name = "xscale-pmu",
+ },
+ .probe = xscale_pmu_device_probe,
+};
+
+static int __init register_xscale_pmu_driver(void)
{
- return -ENODEV;
+ return platform_driver_register(&xscale_pmu_driver);
}
+device_initcall(register_xscale_pmu_driver);
#endif /* CONFIG_CPU_XSCALE */
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 6c777e908a24..36c18b73c1f4 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -46,6 +46,7 @@
#include <asm/cacheflush.h>
#include <asm/cachetype.h>
#include <asm/tlbflush.h>
+#include <asm/xen/hypervisor.h>
#include <asm/prom.h>
#include <asm/mach/arch.h>
@@ -75,8 +76,7 @@ __setup("fpe=", fpe_setup);
extern void init_default_cache_policy(unsigned long);
extern void paging_init(const struct machine_desc *desc);
-extern void early_paging_init(const struct machine_desc *,
- struct proc_info_list *);
+extern void early_paging_init(const struct machine_desc *);
extern void sanity_check_meminfo(void);
extern enum reboot_mode reboot_mode;
extern void setup_dma_zone(const struct machine_desc *desc);
@@ -93,6 +93,9 @@ unsigned int __atags_pointer __initdata;
unsigned int system_rev;
EXPORT_SYMBOL(system_rev);
+const char *system_serial;
+EXPORT_SYMBOL(system_serial);
+
unsigned int system_serial_low;
EXPORT_SYMBOL(system_serial_low);
@@ -839,8 +842,25 @@ arch_initcall(customize_machine);
static int __init init_machine_late(void)
{
+ struct device_node *root;
+ int ret;
+
if (machine_desc->init_late)
machine_desc->init_late();
+
+ root = of_find_node_by_path("/");
+ if (root) {
+ ret = of_property_read_string(root, "serial-number",
+ &system_serial);
+ if (ret)
+ system_serial = NULL;
+ }
+
+ if (!system_serial)
+ system_serial = kasprintf(GFP_KERNEL, "%08x%08x",
+ system_serial_high,
+ system_serial_low);
+
return 0;
}
late_initcall(init_machine_late);
@@ -936,7 +956,9 @@ void __init setup_arch(char **cmdline_p)
parse_early_param();
- early_paging_init(mdesc, lookup_processor_type(read_cpuid_id()));
+#ifdef CONFIG_MMU
+ early_paging_init(mdesc);
+#endif
setup_dma_zone(mdesc);
sanity_check_meminfo();
arm_memblock_init(mdesc);
@@ -951,6 +973,7 @@ void __init setup_arch(char **cmdline_p)
arm_dt_init_cpu_maps();
psci_init();
+ xen_early_init();
#ifdef CONFIG_SMP
if (is_smp()) {
if (!mdesc->smp_init || !mdesc->smp_init()) {
@@ -1109,8 +1132,7 @@ static int c_show(struct seq_file *m, void *v)
seq_printf(m, "Hardware\t: %s\n", machine_name);
seq_printf(m, "Revision\t: %04x\n", system_rev);
- seq_printf(m, "Serial\t\t: %08x%08x\n",
- system_serial_high, system_serial_low);
+ seq_printf(m, "Serial\t\t: %s\n", system_serial);
return 0;
}
diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S
index 6060dbc7844e..0f6c1000582c 100644
--- a/arch/arm/kernel/sleep.S
+++ b/arch/arm/kernel/sleep.S
@@ -81,7 +81,7 @@ ENTRY(__cpu_suspend)
mov r1, r4 @ size of save block
add r0, sp, #8 @ pointer to save block
bl __cpu_suspend_save
- adr lr, BSYM(cpu_suspend_abort)
+ badr lr, cpu_suspend_abort
ldmfd sp!, {r0, pc} @ call suspend fn
ENDPROC(__cpu_suspend)
.ltorg
@@ -122,7 +122,7 @@ ENDPROC(cpu_resume_after_mmu)
#ifdef CONFIG_MMU
.arm
ENTRY(cpu_resume_arm)
- THUMB( adr r9, BSYM(1f) ) @ Kernel is entered in ARM.
+ THUMB( badr r9, 1f ) @ Kernel is entered in ARM.
THUMB( bx r9 ) @ If this is a Thumb-2 kernel,
THUMB( .thumb ) @ switch to Thumb now.
THUMB(1: )
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index cca5b8758185..90dfbedfbfb8 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -86,9 +86,11 @@ void __init smp_set_ops(struct smp_operations *ops)
static unsigned long get_arch_pgd(pgd_t *pgd)
{
- phys_addr_t pgdir = virt_to_idmap(pgd);
- BUG_ON(pgdir & ARCH_PGD_MASK);
- return pgdir >> ARCH_PGD_SHIFT;
+#ifdef CONFIG_ARM_LPAE
+ return __phys_to_pfn(virt_to_phys(pgd));
+#else
+ return virt_to_phys(pgd);
+#endif
}
int __cpu_up(unsigned int cpu, struct task_struct *idle)
@@ -108,7 +110,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
#endif
#ifdef CONFIG_MMU
- secondary_data.pgdir = get_arch_pgd(idmap_pgd);
+ secondary_data.pgdir = virt_to_phys(idmap_pgd);
secondary_data.swapper_pg_dir = get_arch_pgd(swapper_pg_dir);
#endif
sync_cache_w(&secondary_data);
diff --git a/arch/arm/kernel/tcm.c b/arch/arm/kernel/tcm.c
index 7a3be1d4d0b1..b10e1360762e 100644
--- a/arch/arm/kernel/tcm.c
+++ b/arch/arm/kernel/tcm.c
@@ -17,6 +17,9 @@
#include <asm/mach/map.h>
#include <asm/memory.h>
#include <asm/system_info.h>
+#include <asm/traps.h>
+
+#define TCMTR_FORMAT_MASK 0xe0000000U
static struct gen_pool *tcm_pool;
static bool dtcm_present;
@@ -176,6 +179,77 @@ static int __init setup_tcm_bank(u8 type, u8 bank, u8 banks,
}
/*
+ * When we are running in the non-secure world and the secure world
+ * has not explicitly given us access to the TCM we will get an
+ * undefined error when reading the TCM region register in the
+ * setup_tcm_bank function (above).
+ *
+ * There are two variants of this register read that we need to trap,
+ * the read for the data TCM and the read for the instruction TCM:
+ * c0370628: ee196f11 mrc 15, 0, r6, cr9, cr1, {0}
+ * c0370674: ee196f31 mrc 15, 0, r6, cr9, cr1, {1}
+ *
+ * Our undef hook mask explicitly matches all fields of the encoded
+ * instruction other than the destination register. The mask also
+ * only allows operand 2 to have the values 0 or 1.
+ *
+ * The undefined hook is defined as __init and __initdata, and therefore
+ * must be removed before tcm_init returns.
+ *
+ * In this particular case (MRC with ARM condition code ALways) the
+ * Thumb-2 and ARM instruction encoding are identical, so this hook
+ * will work on a Thumb-2 kernel.
+ *
+ * See A8.8.107, DDI0406C_C ARM Architecture Reference Manual, Encoding
+ * T1/A1 for the bit-by-bit details.
+ *
+ * mrc p15, 0, XX, c9, c1, 0
+ * mrc p15, 0, XX, c9, c1, 1
+ * | | | | | | | +---- opc2 0|1 = 000|001
+ * | | | | | | +------- CRm 0 = 0001
+ * | | | | | +----------- CRn 0 = 1001
+ * | | | | +--------------- Rt ? = ????
+ * | | | +------------------- opc1 0 = 000
+ * | | +----------------------- coproc 15 = 1111
+ * | +-------------------------- condition ALways = 1110
+ * +----------------------------- instruction MRC = 1110
+ *
+ * Encoding this as per A8.8.107 of DDI0406C, Encoding T1/A1, yields:
+ * 1111 1111 1111 1111 0000 1111 1101 1111 Required Mask
+ * 1110 1110 0001 1001 ???? 1111 0001 0001 mrc p15, 0, XX, c9, c1, 0
+ * 1110 1110 0001 1001 ???? 1111 0011 0001 mrc p15, 0, XX, c9, c1, 1
+ * [ ] [ ] [ ]| [ ] [ ] [ ] [ ]| +--- CRm
+ * | | | | | | | | +----- SBO
+ * | | | | | | | +------- opc2
+ * | | | | | | +----------- coproc
+ * | | | | | +---------------- Rt
+ * | | | | +--------------------- CRn
+ * | | | +------------------------- SBO
+ * | | +--------------------------- opc1
+ * | +------------------------------- instruction
+ * +------------------------------------ condition
+ */
+#define TCM_REGION_READ_MASK 0xffff0fdf
+#define TCM_REGION_READ_INSTR 0xee190f11
+#define DEST_REG_SHIFT 12
+#define DEST_REG_MASK 0xf
+
+static int __init tcm_handler(struct pt_regs *regs, unsigned int instr)
+{
+ regs->uregs[(instr >> DEST_REG_SHIFT) & DEST_REG_MASK] = 0;
+ regs->ARM_pc += 4;
+ return 0;
+}
+
+static struct undef_hook tcm_hook __initdata = {
+ .instr_mask = TCM_REGION_READ_MASK,
+ .instr_val = TCM_REGION_READ_INSTR,
+ .cpsr_mask = MODE_MASK,
+ .cpsr_val = SVC_MODE,
+ .fn = tcm_handler
+};
+
+/*
* This initializes the TCM memory
*/
void __init tcm_init(void)
@@ -204,9 +278,18 @@ void __init tcm_init(void)
}
tcm_status = read_cpuid_tcmstatus();
+
+ /*
+ * This code only supports v6-compatible TCMTR implementations.
+ */
+ if (tcm_status & TCMTR_FORMAT_MASK)
+ return;
+
dtcm_banks = (tcm_status >> 16) & 0x03;
itcm_banks = (tcm_status & 0x03);
+ register_undef_hook(&tcm_hook);
+
/* Values greater than 2 for D/ITCM banks are "reserved" */
if (dtcm_banks > 2)
dtcm_banks = 0;
@@ -218,7 +301,7 @@ void __init tcm_init(void)
for (i = 0; i < dtcm_banks; i++) {
ret = setup_tcm_bank(0, i, dtcm_banks, &dtcm_end);
if (ret)
- return;
+ goto unregister;
}
/* This means you compiled more code than fits into DTCM */
if (dtcm_code_sz > (dtcm_end - DTCM_OFFSET)) {
@@ -227,6 +310,12 @@ void __init tcm_init(void)
dtcm_code_sz, (dtcm_end - DTCM_OFFSET));
goto no_dtcm;
}
+ /*
+ * This means that the DTCM sizes were 0 or the DTCM banks
+ * were inaccessible due to TrustZone configuration.
+ */
+ if (!(dtcm_end - DTCM_OFFSET))
+ goto no_dtcm;
dtcm_res.end = dtcm_end - 1;
request_resource(&iomem_resource, &dtcm_res);
dtcm_iomap[0].length = dtcm_end - DTCM_OFFSET;
@@ -250,15 +339,21 @@ no_dtcm:
for (i = 0; i < itcm_banks; i++) {
ret = setup_tcm_bank(1, i, itcm_banks, &itcm_end);
if (ret)
- return;
+ goto unregister;
}
/* This means you compiled more code than fits into ITCM */
if (itcm_code_sz > (itcm_end - ITCM_OFFSET)) {
pr_info("CPU ITCM: %u bytes of code compiled to "
"ITCM but only %lu bytes of ITCM present\n",
itcm_code_sz, (itcm_end - ITCM_OFFSET));
- return;
+ goto unregister;
}
+ /*
+ * This means that the ITCM sizes were 0 or the ITCM banks
+ * were inaccessible due to TrustZone configuration.
+ */
+ if (!(itcm_end - ITCM_OFFSET))
+ goto unregister;
itcm_res.end = itcm_end - 1;
request_resource(&iomem_resource, &itcm_res);
itcm_iomap[0].length = itcm_end - ITCM_OFFSET;
@@ -275,6 +370,9 @@ no_dtcm:
pr_info("CPU ITCM: %u bytes of code compiled to ITCM but no "
"ITCM banks present in CPU\n", itcm_code_sz);
}
+
+unregister:
+ unregister_undef_hook(&tcm_hook);
}
/*
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 3dce1a342030..d358226236f2 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -749,14 +749,6 @@ late_initcall(arm_mrc_hook_init);
#endif
-void __bad_xchg(volatile void *ptr, int size)
-{
- pr_err("xchg: bad data size: pc 0x%p, ptr 0x%p, size %d\n",
- __builtin_return_address(0), ptr, size);
- BUG();
-}
-EXPORT_SYMBOL(__bad_xchg);
-
/*
* A data abort trap was taken, but we did not handle the instruction.
* Try to abort the user program, or panic if it was the kernel.
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index f1f79d104309..bfb915d05665 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -28,6 +28,7 @@ config KVM
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
select SRCU
select MMU_NOTIFIER
+ select KVM_VFIO
select HAVE_KVM_EVENTFD
select HAVE_KVM_IRQFD
depends on ARM_VIRT_EXT && ARM_LPAE && ARM_ARCH_TIMER
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index 139e46c08b6e..c5eef02c52ba 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -15,7 +15,7 @@ AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt)
AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt)
KVM := ../../../virt/kvm
-kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o
+kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o
obj-y += kvm-arm.o init.o interrupts.o
obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index d9631ecddd56..bc738d2b8392 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -171,7 +171,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
int r;
switch (ext) {
case KVM_CAP_IRQCHIP:
- case KVM_CAP_IRQFD:
case KVM_CAP_IOEVENTFD:
case KVM_CAP_DEVICE_CTRL:
case KVM_CAP_USER_MEMORY:
@@ -532,6 +531,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
kvm_vgic_flush_hwstate(vcpu);
kvm_timer_flush_hwstate(vcpu);
+ preempt_disable();
local_irq_disable();
/*
@@ -544,6 +544,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
if (ret <= 0 || need_new_vmid_gen(vcpu->kvm)) {
local_irq_enable();
+ preempt_enable();
kvm_timer_sync_hwstate(vcpu);
kvm_vgic_sync_hwstate(vcpu);
continue;
@@ -553,14 +554,16 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
* Enter the guest
*/
trace_kvm_entry(*vcpu_pc(vcpu));
- kvm_guest_enter();
+ __kvm_guest_enter();
vcpu->mode = IN_GUEST_MODE;
ret = kvm_call_hyp(__kvm_vcpu_run, vcpu);
vcpu->mode = OUTSIDE_GUEST_MODE;
- kvm_guest_exit();
- trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
+ /*
+ * Back from guest
+ *************************************************************/
+
/*
* We may have taken a host interrupt in HYP mode (ie
* while executing the guest). This interrupt is still
@@ -574,8 +577,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
local_irq_enable();
/*
- * Back from guest
- *************************************************************/
+ * We do local_irq_enable() before calling kvm_guest_exit() so
+ * that if a timer interrupt hits while running the guest we
+ * account that tick as being spent in the guest. We enable
+ * preemption after calling kvm_guest_exit() so that if we get
+ * preempted we make sure ticks after that is not counted as
+ * guest time.
+ */
+ kvm_guest_exit();
+ trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
+ preempt_enable();
+
kvm_timer_sync_hwstate(vcpu);
kvm_vgic_sync_hwstate(vcpu);
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index 79caf79b304a..568494dbbbb5 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -170,13 +170,9 @@ __kvm_vcpu_return:
@ Don't trap coprocessor accesses for host kernel
set_hstr vmexit
set_hdcr vmexit
- set_hcptr vmexit, (HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11))
+ set_hcptr vmexit, (HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11)), after_vfp_restore
#ifdef CONFIG_VFPv3
- @ Save floating point registers we if let guest use them.
- tst r2, #(HCPTR_TCP(10) | HCPTR_TCP(11))
- bne after_vfp_restore
-
@ Switch VFP/NEON hardware state to the host's
add r7, vcpu, #VCPU_VFP_GUEST
store_vfp_state r7
@@ -188,6 +184,8 @@ after_vfp_restore:
@ Restore FPEXC_EN which we clobbered on entry
pop {r2}
VFPFMXR FPEXC, r2
+#else
+after_vfp_restore:
#endif
@ Reset Hyp-role
@@ -309,7 +307,7 @@ ENTRY(kvm_call_hyp)
THUMB( orr r2, r2, #PSR_T_BIT )
msr spsr_cxsf, r2
mrs r1, ELR_hyp
- ldr r2, =BSYM(panic)
+ ldr r2, =panic
msr ELR_hyp, r2
ldr r0, =\panic_str
clrex @ Clear exclusive monitor
@@ -483,7 +481,7 @@ switch_to_guest_vfp:
push {r3-r7}
@ NEON/VFP used. Turn on VFP access.
- set_hcptr vmexit, (HCPTR_TCP(10) | HCPTR_TCP(11))
+ set_hcptr vmtrap, (HCPTR_TCP(10) | HCPTR_TCP(11))
@ Switch VFP/NEON hardware state to the guest's
add r7, r0, #VCPU_VFP_HOST
diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S
index 35e4a3a0c476..702740d37465 100644
--- a/arch/arm/kvm/interrupts_head.S
+++ b/arch/arm/kvm/interrupts_head.S
@@ -412,7 +412,6 @@ vcpu .req r0 @ vcpu pointer always in r0
add r11, vcpu, #VCPU_VGIC_CPU
/* Save all interesting registers */
- ldr r3, [r2, #GICH_HCR]
ldr r4, [r2, #GICH_VMCR]
ldr r5, [r2, #GICH_MISR]
ldr r6, [r2, #GICH_EISR0]
@@ -420,7 +419,6 @@ vcpu .req r0 @ vcpu pointer always in r0
ldr r8, [r2, #GICH_ELRSR0]
ldr r9, [r2, #GICH_ELRSR1]
ldr r10, [r2, #GICH_APR]
-ARM_BE8(rev r3, r3 )
ARM_BE8(rev r4, r4 )
ARM_BE8(rev r5, r5 )
ARM_BE8(rev r6, r6 )
@@ -429,7 +427,6 @@ ARM_BE8(rev r8, r8 )
ARM_BE8(rev r9, r9 )
ARM_BE8(rev r10, r10 )
- str r3, [r11, #VGIC_V2_CPU_HCR]
str r4, [r11, #VGIC_V2_CPU_VMCR]
str r5, [r11, #VGIC_V2_CPU_MISR]
#ifdef CONFIG_CPU_ENDIAN_BE8
@@ -591,8 +588,13 @@ ARM_BE8(rev r6, r6 )
.endm
/* Configures the HCPTR (Hyp Coprocessor Trap Register) on entry/return
- * (hardware reset value is 0). Keep previous value in r2. */
-.macro set_hcptr operation, mask
+ * (hardware reset value is 0). Keep previous value in r2.
+ * An ISB is emited on vmexit/vmtrap, but executed on vmexit only if
+ * VFP wasn't already enabled (always executed on vmtrap).
+ * If a label is specified with vmexit, it is branched to if VFP wasn't
+ * enabled.
+ */
+.macro set_hcptr operation, mask, label = none
mrc p15, 4, r2, c1, c1, 2
ldr r3, =\mask
.if \operation == vmentry
@@ -601,6 +603,17 @@ ARM_BE8(rev r6, r6 )
bic r3, r2, r3 @ Don't trap defined coproc-accesses
.endif
mcr p15, 4, r3, c1, c1, 2
+ .if \operation != vmentry
+ .if \operation == vmexit
+ tst r2, #(HCPTR_TCP(10) | HCPTR_TCP(11))
+ beq 1f
+ .endif
+ isb
+ .if \label != none
+ b \label
+ .endif
+1:
+ .endif
.endm
/* Configures the HDCR (Hyp Debug Configuration Register) on entry/return
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 1d5accbd3dcf..7b4201294187 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -691,8 +691,8 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
* work. This is not used by the hardware and we have no
* alignment requirement for this allocation.
*/
- pgd = (pgd_t *)kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t),
- GFP_KERNEL | __GFP_ZERO);
+ pgd = kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t),
+ GFP_KERNEL | __GFP_ZERO);
if (!pgd) {
kvm_free_hwpgd(hwpgd);
@@ -1155,7 +1155,8 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
*/
void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
{
- struct kvm_memory_slot *memslot = id_to_memslot(kvm->memslots, slot);
+ struct kvm_memslots *slots = kvm_memslots(kvm);
+ struct kvm_memory_slot *memslot = id_to_memslot(slots, slot);
phys_addr_t start = memslot->base_gfn << PAGE_SHIFT;
phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
@@ -1718,8 +1719,9 @@ out:
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem,
+ const struct kvm_userspace_memory_region *mem,
const struct kvm_memory_slot *old,
+ const struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
/*
@@ -1733,7 +1735,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
- struct kvm_userspace_memory_region *mem,
+ const struct kvm_userspace_memory_region *mem,
enum kvm_mr_change change)
{
hva_t hva = mem->userspace_addr;
@@ -1838,7 +1840,7 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
return 0;
}
-void kvm_arch_memslots_updated(struct kvm *kvm)
+void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots)
{
}
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 02fa8eff6ae1..4b94b513168d 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -24,6 +24,8 @@
#include <asm/kvm_psci.h>
#include <asm/kvm_host.h>
+#include <uapi/linux/psci.h>
+
/*
* This is an implementation of the Power State Coordination Interface
* as described in ARM document number ARM DEN 0022A.
@@ -230,10 +232,6 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
case PSCI_0_2_FN64_AFFINITY_INFO:
val = kvm_psci_vcpu_affinity_info(vcpu);
break;
- case PSCI_0_2_FN_MIGRATE:
- case PSCI_0_2_FN64_MIGRATE:
- val = PSCI_RET_NOT_SUPPORTED;
- break;
case PSCI_0_2_FN_MIGRATE_INFO_TYPE:
/*
* Trusted OS is MP hence does not require migration
@@ -242,10 +240,6 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
*/
val = PSCI_0_2_TOS_MP;
break;
- case PSCI_0_2_FN_MIGRATE_INFO_UP_CPU:
- case PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU:
- val = PSCI_RET_NOT_SUPPORTED;
- break;
case PSCI_0_2_FN_SYSTEM_OFF:
kvm_psci_system_off(vcpu);
/*
@@ -271,7 +265,8 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
ret = 0;
break;
default:
- return -EINVAL;
+ val = PSCI_RET_NOT_SUPPORTED;
+ break;
}
*vcpu_reg(vcpu, 0) = val;
@@ -291,12 +286,9 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
case KVM_PSCI_FN_CPU_ON:
val = kvm_psci_vcpu_on(vcpu);
break;
- case KVM_PSCI_FN_CPU_SUSPEND:
- case KVM_PSCI_FN_MIGRATE:
+ default:
val = PSCI_RET_NOT_SUPPORTED;
break;
- default:
- return -EINVAL;
}
*vcpu_reg(vcpu, 0) = val;
diff --git a/arch/arm/lib/call_with_stack.S b/arch/arm/lib/call_with_stack.S
index ed1a421813cb..bf3a40889205 100644
--- a/arch/arm/lib/call_with_stack.S
+++ b/arch/arm/lib/call_with_stack.S
@@ -35,7 +35,7 @@ ENTRY(call_with_stack)
mov r2, r0
mov r0, r1
- adr lr, BSYM(1f)
+ badr lr, 1f
ret r2
1: ldr lr, [sp]
diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S
index 947567ff67f9..af2267f6a529 100644
--- a/arch/arm/lib/lib1funcs.S
+++ b/arch/arm/lib/lib1funcs.S
@@ -167,7 +167,7 @@ Boston, MA 02111-1307, USA. */
#endif
- @ Perform all needed substractions to keep only the reminder.
+ @ Perform all needed subtractions to keep only the reminder.
@ Do comparisons in batch of 4 first.
subs \order, \order, #3 @ yes, 3 is intended here
blt 2f
@@ -189,7 +189,7 @@ Boston, MA 02111-1307, USA. */
teqne \dividend, #0
beq 5f
- @ Either 1, 2 or 3 comparison/substractions are left.
+ @ Either 1, 2 or 3 comparison/subtractions are left.
2: cmn \order, #2
blt 4f
beq 3f
diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c
index 1e184767c3be..e24df77abd79 100644
--- a/arch/arm/mach-at91/pm.c
+++ b/arch/arm/mach-at91/pm.c
@@ -369,7 +369,7 @@ static void __init at91_pm_sram_init(void)
return;
}
- sram_pool = dev_get_gen_pool(&pdev->dev);
+ sram_pool = gen_pool_get(&pdev->dev);
if (!sram_pool) {
pr_warn("%s: sram pool unavailable!\n", __func__);
return;
diff --git a/arch/arm/mach-davinci/da850.c b/arch/arm/mach-davinci/da850.c
index 45ce065e7170..3b8740c083c4 100644
--- a/arch/arm/mach-davinci/da850.c
+++ b/arch/arm/mach-davinci/da850.c
@@ -11,6 +11,7 @@
* is licensed "as is" without any warranty of any kind, whether express
* or implied.
*/
+#include <linux/clkdev.h>
#include <linux/gpio.h>
#include <linux/init.h>
#include <linux/clk.h>
diff --git a/arch/arm/mach-davinci/pm_domain.c b/arch/arm/mach-davinci/pm_domain.c
index 641edc313938..78eac2c0c146 100644
--- a/arch/arm/mach-davinci/pm_domain.c
+++ b/arch/arm/mach-davinci/pm_domain.c
@@ -14,39 +14,9 @@
#include <linux/pm_clock.h>
#include <linux/platform_device.h>
-#ifdef CONFIG_PM
-static int davinci_pm_runtime_suspend(struct device *dev)
-{
- int ret;
-
- dev_dbg(dev, "%s\n", __func__);
-
- ret = pm_generic_runtime_suspend(dev);
- if (ret)
- return ret;
-
- ret = pm_clk_suspend(dev);
- if (ret) {
- pm_generic_runtime_resume(dev);
- return ret;
- }
-
- return 0;
-}
-
-static int davinci_pm_runtime_resume(struct device *dev)
-{
- dev_dbg(dev, "%s\n", __func__);
-
- pm_clk_resume(dev);
- return pm_generic_runtime_resume(dev);
-}
-#endif
-
static struct dev_pm_domain davinci_pm_domain = {
.ops = {
- SET_RUNTIME_PM_OPS(davinci_pm_runtime_suspend,
- davinci_pm_runtime_resume, NULL)
+ USE_PM_CLK_RUNTIME_OPS
USE_PLATFORM_PM_SLEEP_OPS
},
};
diff --git a/arch/arm/mach-exynos/exynos.c b/arch/arm/mach-exynos/exynos.c
index 4bd8b7653817..5f8ddcdeeacf 100644
--- a/arch/arm/mach-exynos/exynos.c
+++ b/arch/arm/mach-exynos/exynos.c
@@ -224,6 +224,25 @@ static void __init exynos_init_irq(void)
exynos_map_pmu();
}
+static const struct of_device_id exynos_cpufreq_matches[] = {
+ { .compatible = "samsung,exynos4210", .data = "cpufreq-dt" },
+ { /* sentinel */ }
+};
+
+static void __init exynos_cpufreq_init(void)
+{
+ struct device_node *root = of_find_node_by_path("/");
+ const struct of_device_id *match;
+
+ match = of_match_node(exynos_cpufreq_matches, root);
+ if (!match) {
+ platform_device_register_simple("exynos-cpufreq", -1, NULL, 0);
+ return;
+ }
+
+ platform_device_register_simple(match->data, -1, NULL, 0);
+}
+
static void __init exynos_dt_machine_init(void)
{
/*
@@ -246,7 +265,7 @@ static void __init exynos_dt_machine_init(void)
of_machine_is_compatible("samsung,exynos5250"))
platform_device_register(&exynos_cpuidle);
- platform_device_register_simple("exynos-cpufreq", -1, NULL, 0);
+ exynos_cpufreq_init();
of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
}
diff --git a/arch/arm/mach-exynos/suspend.c b/arch/arm/mach-exynos/suspend.c
index 96866d03d281..f572219c7a40 100644
--- a/arch/arm/mach-exynos/suspend.c
+++ b/arch/arm/mach-exynos/suspend.c
@@ -311,13 +311,7 @@ static int exynos5420_cpu_suspend(unsigned long arg)
if (IS_ENABLED(CONFIG_EXYNOS5420_MCPM)) {
mcpm_set_entry_vector(cpu, cluster, exynos_cpu_resume);
-
- /*
- * Residency value passed to mcpm_cpu_suspend back-end
- * has to be given clear semantics. Set to 0 as a
- * temporary value.
- */
- mcpm_cpu_suspend(0);
+ mcpm_cpu_suspend();
}
pr_info("Failed to suspend the system\n");
diff --git a/arch/arm/mach-footbridge/dma.c b/arch/arm/mach-footbridge/dma.c
index e2e0df8bcee2..22536b85a81d 100644
--- a/arch/arm/mach-footbridge/dma.c
+++ b/arch/arm/mach-footbridge/dma.c
@@ -13,9 +13,9 @@
#include <linux/init.h>
#include <linux/io.h>
#include <linux/spinlock.h>
+#include <linux/scatterlist.h>
#include <asm/dma.h>
-#include <asm/scatterlist.h>
#include <asm/mach/dma.h>
#include <asm/hardware/dec21285.h>
diff --git a/arch/arm/mach-gemini/gpio.c b/arch/arm/mach-gemini/gpio.c
index f8cb5710d6ee..3292f2e6ed6f 100644
--- a/arch/arm/mach-gemini/gpio.c
+++ b/arch/arm/mach-gemini/gpio.c
@@ -223,8 +223,8 @@ void __init gemini_gpio_init(void)
set_irq_flags(j, IRQF_VALID);
}
- irq_set_chained_handler(IRQ_GPIO(i), gpio_irq_handler);
- irq_set_handler_data(IRQ_GPIO(i), (void *)i);
+ irq_set_chained_handler_and_data(IRQ_GPIO(i), gpio_irq_handler,
+ (void *)i);
}
BUG_ON(gpiochip_add(&gemini_gpio_chip));
diff --git a/arch/arm/mach-hisi/platmcpm.c b/arch/arm/mach-hisi/platmcpm.c
index 280f3f14f77c..b5f8f5ffda79 100644
--- a/arch/arm/mach-hisi/platmcpm.c
+++ b/arch/arm/mach-hisi/platmcpm.c
@@ -6,6 +6,8 @@
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*/
+#include <linux/init.h>
+#include <linux/smp.h>
#include <linux/delay.h>
#include <linux/io.h>
#include <linux/memblock.h>
@@ -13,7 +15,9 @@
#include <asm/cputype.h>
#include <asm/cp15.h>
-#include <asm/mcpm.h>
+#include <asm/cacheflush.h>
+#include <asm/smp.h>
+#include <asm/smp_plat.h>
#include "core.h"
@@ -94,11 +98,16 @@ static void hip04_set_snoop_filter(unsigned int cluster, unsigned int on)
} while (data != readl_relaxed(fabric + FAB_SF_MODE));
}
-static int hip04_mcpm_power_up(unsigned int cpu, unsigned int cluster)
+static int hip04_boot_secondary(unsigned int l_cpu, struct task_struct *idle)
{
+ unsigned int mpidr, cpu, cluster;
unsigned long data;
void __iomem *sys_dreq, *sys_status;
+ mpidr = cpu_logical_map(l_cpu);
+ cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+ cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+
if (!sysctrl)
return -ENODEV;
if (cluster >= HIP04_MAX_CLUSTERS || cpu >= HIP04_MAX_CPUS_PER_CLUSTER)
@@ -118,6 +127,7 @@ static int hip04_mcpm_power_up(unsigned int cpu, unsigned int cluster)
cpu_relax();
data = readl_relaxed(sys_status);
} while (data & CLUSTER_DEBUG_RESET_STATUS);
+ hip04_set_snoop_filter(cluster, 1);
}
data = CORE_RESET_BIT(cpu) | NEON_RESET_BIT(cpu) | \
@@ -126,11 +136,15 @@ static int hip04_mcpm_power_up(unsigned int cpu, unsigned int cluster)
do {
cpu_relax();
} while (data == readl_relaxed(sys_status));
+
/*
* We may fail to power up core again without this delay.
* It's not mentioned in document. It's found by test.
*/
udelay(20);
+
+ arch_send_wakeup_ipi_mask(cpumask_of(l_cpu));
+
out:
hip04_cpu_table[cluster][cpu]++;
spin_unlock_irq(&boot_lock);
@@ -138,31 +152,30 @@ out:
return 0;
}
-static void hip04_mcpm_power_down(void)
+#ifdef CONFIG_HOTPLUG_CPU
+static void hip04_cpu_die(unsigned int l_cpu)
{
unsigned int mpidr, cpu, cluster;
- bool skip_wfi = false, last_man = false;
+ bool last_man;
- mpidr = read_cpuid_mpidr();
+ mpidr = cpu_logical_map(l_cpu);
cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
- __mcpm_cpu_going_down(cpu, cluster);
-
spin_lock(&boot_lock);
- BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP);
hip04_cpu_table[cluster][cpu]--;
if (hip04_cpu_table[cluster][cpu] == 1) {
/* A power_up request went ahead of us. */
- skip_wfi = true;
+ spin_unlock(&boot_lock);
+ return;
} else if (hip04_cpu_table[cluster][cpu] > 1) {
pr_err("Cluster %d CPU%d boots multiple times\n", cluster, cpu);
BUG();
}
last_man = hip04_cluster_is_down(cluster);
- if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) {
- spin_unlock(&boot_lock);
+ spin_unlock(&boot_lock);
+ if (last_man) {
/* Since it's Cortex A15, disable L2 prefetching. */
asm volatile(
"mcr p15, 1, %0, c15, c0, 3 \n\t"
@@ -170,34 +183,30 @@ static void hip04_mcpm_power_down(void)
"dsb "
: : "r" (0x400) );
v7_exit_coherency_flush(all);
- hip04_set_snoop_filter(cluster, 0);
- __mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN);
} else {
- spin_unlock(&boot_lock);
v7_exit_coherency_flush(louis);
}
- __mcpm_cpu_down(cpu, cluster);
-
- if (!skip_wfi)
+ for (;;)
wfi();
}
-static int hip04_mcpm_wait_for_powerdown(unsigned int cpu, unsigned int cluster)
+static int hip04_cpu_kill(unsigned int l_cpu)
{
+ unsigned int mpidr, cpu, cluster;
unsigned int data, tries, count;
- int ret = -ETIMEDOUT;
+ mpidr = cpu_logical_map(l_cpu);
+ cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+ cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
BUG_ON(cluster >= HIP04_MAX_CLUSTERS ||
cpu >= HIP04_MAX_CPUS_PER_CLUSTER);
count = TIMEOUT_MSEC / POLL_MSEC;
spin_lock_irq(&boot_lock);
for (tries = 0; tries < count; tries++) {
- if (hip04_cpu_table[cluster][cpu]) {
- ret = -EBUSY;
+ if (hip04_cpu_table[cluster][cpu])
goto err;
- }
cpu_relax();
data = readl_relaxed(sysctrl + SC_CPU_RESET_STATUS(cluster));
if (data & CORE_WFI_STATUS(cpu))
@@ -220,64 +229,22 @@ static int hip04_mcpm_wait_for_powerdown(unsigned int cpu, unsigned int cluster)
}
if (tries >= count)
goto err;
+ if (hip04_cluster_is_down(cluster))
+ hip04_set_snoop_filter(cluster, 0);
spin_unlock_irq(&boot_lock);
- return 0;
+ return 1;
err:
spin_unlock_irq(&boot_lock);
- return ret;
-}
-
-static void hip04_mcpm_powered_up(void)
-{
- unsigned int mpidr, cpu, cluster;
-
- mpidr = read_cpuid_mpidr();
- cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
- cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
-
- spin_lock(&boot_lock);
- if (!hip04_cpu_table[cluster][cpu])
- hip04_cpu_table[cluster][cpu] = 1;
- spin_unlock(&boot_lock);
-}
-
-static void __naked hip04_mcpm_power_up_setup(unsigned int affinity_level)
-{
- asm volatile (" \n"
-" cmp r0, #0 \n"
-" bxeq lr \n"
- /* calculate fabric phys address */
-" adr r2, 2f \n"
-" ldmia r2, {r1, r3} \n"
-" sub r0, r2, r1 \n"
-" ldr r2, [r0, r3] \n"
- /* get cluster id from MPIDR */
-" mrc p15, 0, r0, c0, c0, 5 \n"
-" ubfx r1, r0, #8, #8 \n"
- /* 1 << cluster id */
-" mov r0, #1 \n"
-" mov r3, r0, lsl r1 \n"
-" ldr r0, [r2, #"__stringify(FAB_SF_MODE)"] \n"
-" tst r0, r3 \n"
-" bxne lr \n"
-" orr r1, r0, r3 \n"
-" str r1, [r2, #"__stringify(FAB_SF_MODE)"] \n"
-"1: ldr r0, [r2, #"__stringify(FAB_SF_MODE)"] \n"
-" tst r0, r3 \n"
-" beq 1b \n"
-" bx lr \n"
-
-" .align 2 \n"
-"2: .word . \n"
-" .word fabric_phys_addr \n"
- );
+ return 0;
}
-
-static const struct mcpm_platform_ops hip04_mcpm_ops = {
- .power_up = hip04_mcpm_power_up,
- .power_down = hip04_mcpm_power_down,
- .wait_for_powerdown = hip04_mcpm_wait_for_powerdown,
- .powered_up = hip04_mcpm_powered_up,
+#endif
+
+static struct smp_operations __initdata hip04_smp_ops = {
+ .smp_boot_secondary = hip04_boot_secondary,
+#ifdef CONFIG_HOTPLUG_CPU
+ .cpu_die = hip04_cpu_die,
+ .cpu_kill = hip04_cpu_kill,
+#endif
};
static bool __init hip04_cpu_table_init(void)
@@ -298,7 +265,7 @@ static bool __init hip04_cpu_table_init(void)
return true;
}
-static int __init hip04_mcpm_init(void)
+static int __init hip04_smp_init(void)
{
struct device_node *np, *np_sctl, *np_fab;
struct resource fab_res;
@@ -353,10 +320,6 @@ static int __init hip04_mcpm_init(void)
ret = -EINVAL;
goto err_table;
}
- ret = mcpm_platform_register(&hip04_mcpm_ops);
- if (ret) {
- goto err_table;
- }
/*
* Fill the instruction address that is used after secondary core
@@ -364,13 +327,11 @@ static int __init hip04_mcpm_init(void)
*/
writel_relaxed(hip04_boot_method[0], relocation);
writel_relaxed(0xa5a5a5a5, relocation + 4); /* magic number */
- writel_relaxed(virt_to_phys(mcpm_entry_point), relocation + 8);
+ writel_relaxed(virt_to_phys(secondary_startup), relocation + 8);
writel_relaxed(0, relocation + 12);
iounmap(relocation);
- mcpm_sync_init(hip04_mcpm_power_up_setup);
- mcpm_smp_set_ops();
- pr_info("HiP04 MCPM initialized\n");
+ smp_set_ops(&hip04_smp_ops);
return ret;
err_table:
iounmap(fabric);
@@ -383,4 +344,4 @@ err_reloc:
err:
return ret;
}
-early_initcall(hip04_mcpm_init);
+early_initcall(hip04_smp_init);
diff --git a/arch/arm/mach-imx/pm-imx5.c b/arch/arm/mach-imx/pm-imx5.c
index 0309ccda36a9..1885676c23c0 100644
--- a/arch/arm/mach-imx/pm-imx5.c
+++ b/arch/arm/mach-imx/pm-imx5.c
@@ -297,7 +297,7 @@ static int __init imx_suspend_alloc_ocram(
goto put_node;
}
- ocram_pool = dev_get_gen_pool(&pdev->dev);
+ ocram_pool = gen_pool_get(&pdev->dev);
if (!ocram_pool) {
pr_warn("%s: ocram pool unavailable!\n", __func__);
ret = -ENODEV;
diff --git a/arch/arm/mach-imx/pm-imx6.c b/arch/arm/mach-imx/pm-imx6.c
index b01650d94f91..93ecf559d06d 100644
--- a/arch/arm/mach-imx/pm-imx6.c
+++ b/arch/arm/mach-imx/pm-imx6.c
@@ -451,7 +451,7 @@ static int __init imx6q_suspend_init(const struct imx6_pm_socdata *socdata)
goto put_node;
}
- ocram_pool = dev_get_gen_pool(&pdev->dev);
+ ocram_pool = gen_pool_get(&pdev->dev);
if (!ocram_pool) {
pr_warn("%s: ocram pool unavailable!\n", __func__);
ret = -ENODEV;
diff --git a/arch/arm/mach-integrator/integrator_ap.c b/arch/arm/mach-integrator/integrator_ap.c
index 30003ba447a5..5b0e363fe5ba 100644
--- a/arch/arm/mach-integrator/integrator_ap.c
+++ b/arch/arm/mach-integrator/integrator_ap.c
@@ -37,7 +37,6 @@
#include <linux/stat.h>
#include <linux/termios.h>
-#include <asm/hardware/arm_timer.h>
#include <asm/setup.h>
#include <asm/param.h> /* HZ */
#include <asm/mach-types.h>
diff --git a/arch/arm/mach-keystone/keystone.c b/arch/arm/mach-keystone/keystone.c
index 06620875813a..e288010522f9 100644
--- a/arch/arm/mach-keystone/keystone.c
+++ b/arch/arm/mach-keystone/keystone.c
@@ -27,7 +27,6 @@
#include "keystone.h"
-static struct notifier_block platform_nb;
static unsigned long keystone_dma_pfn_offset __read_mostly;
static int keystone_platform_notifier(struct notifier_block *nb,
@@ -49,11 +48,18 @@ static int keystone_platform_notifier(struct notifier_block *nb,
return NOTIFY_OK;
}
+static struct notifier_block platform_nb = {
+ .notifier_call = keystone_platform_notifier,
+};
+
static void __init keystone_init(void)
{
- keystone_pm_runtime_init();
- if (platform_nb.notifier_call)
+ if (PHYS_OFFSET >= KEYSTONE_HIGH_PHYS_START) {
+ keystone_dma_pfn_offset = PFN_DOWN(KEYSTONE_HIGH_PHYS_START -
+ KEYSTONE_LOW_PHYS_START);
bus_register_notifier(&platform_bus_type, &platform_nb);
+ }
+ keystone_pm_runtime_init();
of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
}
@@ -62,11 +68,9 @@ static phys_addr_t keystone_virt_to_idmap(unsigned long x)
return (phys_addr_t)(x) - CONFIG_PAGE_OFFSET + KEYSTONE_LOW_PHYS_START;
}
-static void __init keystone_init_meminfo(void)
+static long long __init keystone_pv_fixup(void)
{
- bool lpae = IS_ENABLED(CONFIG_ARM_LPAE);
- bool pvpatch = IS_ENABLED(CONFIG_ARM_PATCH_PHYS_VIRT);
- phys_addr_t offset = PHYS_OFFSET - KEYSTONE_LOW_PHYS_START;
+ long long offset;
phys_addr_t mem_start, mem_end;
mem_start = memblock_start_of_DRAM();
@@ -75,32 +79,21 @@ static void __init keystone_init_meminfo(void)
/* nothing to do if we are running out of the <32-bit space */
if (mem_start >= KEYSTONE_LOW_PHYS_START &&
mem_end <= KEYSTONE_LOW_PHYS_END)
- return;
-
- if (!lpae || !pvpatch) {
- pr_crit("Enable %s%s%s to run outside 32-bit space\n",
- !lpae ? __stringify(CONFIG_ARM_LPAE) : "",
- (!lpae && !pvpatch) ? " and " : "",
- !pvpatch ? __stringify(CONFIG_ARM_PATCH_PHYS_VIRT) : "");
- }
+ return 0;
if (mem_start < KEYSTONE_HIGH_PHYS_START ||
mem_end > KEYSTONE_HIGH_PHYS_END) {
pr_crit("Invalid address space for memory (%08llx-%08llx)\n",
- (u64)mem_start, (u64)mem_end);
+ (u64)mem_start, (u64)mem_end);
+ return 0;
}
- offset += KEYSTONE_HIGH_PHYS_START;
- __pv_phys_pfn_offset = PFN_DOWN(offset);
- __pv_offset = (offset - PAGE_OFFSET);
+ offset = KEYSTONE_HIGH_PHYS_START - KEYSTONE_LOW_PHYS_START;
/* Populate the arch idmap hook */
arch_virt_to_idmap = keystone_virt_to_idmap;
- platform_nb.notifier_call = keystone_platform_notifier;
- keystone_dma_pfn_offset = PFN_DOWN(KEYSTONE_HIGH_PHYS_START -
- KEYSTONE_LOW_PHYS_START);
- pr_info("Switching to high address space at 0x%llx\n", (u64)offset);
+ return offset;
}
static const char *const keystone_match[] __initconst = {
@@ -115,5 +108,5 @@ DT_MACHINE_START(KEYSTONE, "Keystone")
.smp = smp_ops(keystone_smp_ops),
.init_machine = keystone_init,
.dt_compat = keystone_match,
- .init_meminfo = keystone_init_meminfo,
+ .pv_fixup = keystone_pv_fixup,
MACHINE_END
diff --git a/arch/arm/mach-keystone/platsmp.c b/arch/arm/mach-keystone/platsmp.c
index 5f46a7cf907b..4bbb18463bfd 100644
--- a/arch/arm/mach-keystone/platsmp.c
+++ b/arch/arm/mach-keystone/platsmp.c
@@ -39,19 +39,6 @@ static int keystone_smp_boot_secondary(unsigned int cpu,
return error;
}
-#ifdef CONFIG_ARM_LPAE
-static void __cpuinit keystone_smp_secondary_initmem(unsigned int cpu)
-{
- pgd_t *pgd0 = pgd_offset_k(0);
- cpu_set_ttbr(1, __pa(pgd0) + TTBR1_OFFSET);
- local_flush_tlb_all();
-}
-#else
-static inline void __cpuinit keystone_smp_secondary_initmem(unsigned int cpu)
-{}
-#endif
-
struct smp_operations keystone_smp_ops __initdata = {
.smp_boot_secondary = keystone_smp_boot_secondary,
- .smp_secondary_init = keystone_smp_secondary_initmem,
};
diff --git a/arch/arm/mach-keystone/pm_domain.c b/arch/arm/mach-keystone/pm_domain.c
index 41bebfd296dc..edea697e8253 100644
--- a/arch/arm/mach-keystone/pm_domain.c
+++ b/arch/arm/mach-keystone/pm_domain.c
@@ -19,40 +19,9 @@
#include <linux/clk-provider.h>
#include <linux/of.h>
-#ifdef CONFIG_PM
-static int keystone_pm_runtime_suspend(struct device *dev)
-{
- int ret;
-
- dev_dbg(dev, "%s\n", __func__);
-
- ret = pm_generic_runtime_suspend(dev);
- if (ret)
- return ret;
-
- ret = pm_clk_suspend(dev);
- if (ret) {
- pm_generic_runtime_resume(dev);
- return ret;
- }
-
- return 0;
-}
-
-static int keystone_pm_runtime_resume(struct device *dev)
-{
- dev_dbg(dev, "%s\n", __func__);
-
- pm_clk_resume(dev);
-
- return pm_generic_runtime_resume(dev);
-}
-#endif
-
static struct dev_pm_domain keystone_pm_domain = {
.ops = {
- SET_RUNTIME_PM_OPS(keystone_pm_runtime_suspend,
- keystone_pm_runtime_resume, NULL)
+ USE_PM_CLK_RUNTIME_OPS
USE_PLATFORM_PM_SLEEP_OPS
},
};
diff --git a/arch/arm/mach-lpc32xx/clock.c b/arch/arm/mach-lpc32xx/clock.c
index dd5d6f532e8c..661c8f4b2310 100644
--- a/arch/arm/mach-lpc32xx/clock.c
+++ b/arch/arm/mach-lpc32xx/clock.c
@@ -1238,10 +1238,7 @@ static struct clk_lookup lookups[] = {
static int __init clk_init(void)
{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(lookups); i++)
- clkdev_add(&lookups[i]);
+ clkdev_add_table(lookups, ARRAY_SIZE(lookups));
/*
* Setup muxed SYSCLK for HCLK PLL base -this selects the
diff --git a/arch/arm/mach-lpc32xx/irq.c b/arch/arm/mach-lpc32xx/irq.c
index 9ecb8f9c4ef5..d4f7dc87042b 100644
--- a/arch/arm/mach-lpc32xx/irq.c
+++ b/arch/arm/mach-lpc32xx/irq.c
@@ -283,25 +283,25 @@ static int lpc32xx_set_irq_type(struct irq_data *d, unsigned int type)
case IRQ_TYPE_EDGE_RISING:
/* Rising edge sensitive */
__lpc32xx_set_irq_type(d->hwirq, 1, 1);
- __irq_set_handler_locked(d->hwirq, handle_edge_irq);
+ __irq_set_handler_locked(d->irq, handle_edge_irq);
break;
case IRQ_TYPE_EDGE_FALLING:
/* Falling edge sensitive */
__lpc32xx_set_irq_type(d->hwirq, 0, 1);
- __irq_set_handler_locked(d->hwirq, handle_edge_irq);
+ __irq_set_handler_locked(d->irq, handle_edge_irq);
break;
case IRQ_TYPE_LEVEL_LOW:
/* Low level sensitive */
__lpc32xx_set_irq_type(d->hwirq, 0, 0);
- __irq_set_handler_locked(d->hwirq, handle_level_irq);
+ __irq_set_handler_locked(d->irq, handle_level_irq);
break;
case IRQ_TYPE_LEVEL_HIGH:
/* High level sensitive */
__lpc32xx_set_irq_type(d->hwirq, 1, 0);
- __irq_set_handler_locked(d->hwirq, handle_level_irq);
+ __irq_set_handler_locked(d->irq, handle_level_irq);
break;
/* Other modes are not supported */
diff --git a/arch/arm/mach-mvebu/headsmp-a9.S b/arch/arm/mach-mvebu/headsmp-a9.S
index 48e4c4b3cd1c..b093a196e801 100644
--- a/arch/arm/mach-mvebu/headsmp-a9.S
+++ b/arch/arm/mach-mvebu/headsmp-a9.S
@@ -13,12 +13,9 @@
*/
#include <linux/linkage.h>
-#include <linux/init.h>
#include <asm/assembler.h>
- __CPUINIT
-
ENTRY(mvebu_cortex_a9_secondary_startup)
ARM_BE8(setend be)
bl armada_38x_scu_power_up
diff --git a/arch/arm/mach-mvebu/platsmp-a9.c b/arch/arm/mach-mvebu/platsmp-a9.c
index df0a9cc5da59..3d5000481c11 100644
--- a/arch/arm/mach-mvebu/platsmp-a9.c
+++ b/arch/arm/mach-mvebu/platsmp-a9.c
@@ -24,7 +24,7 @@
extern void mvebu_cortex_a9_secondary_startup(void);
-static int __cpuinit mvebu_cortex_a9_boot_secondary(unsigned int cpu,
+static int mvebu_cortex_a9_boot_secondary(unsigned int cpu,
struct task_struct *idle)
{
int ret, hw_cpu;
diff --git a/arch/arm/mach-nspire/nspire.c b/arch/arm/mach-nspire/nspire.c
index 3445a5686805..34c2a1b32e7d 100644
--- a/arch/arm/mach-nspire/nspire.c
+++ b/arch/arm/mach-nspire/nspire.c
@@ -22,8 +22,6 @@
#include <asm/mach-types.h>
#include <asm/mach/map.h>
-#include <asm/hardware/timer-sp.h>
-
#include "mmio.h"
#include "clcd.h"
diff --git a/arch/arm/mach-omap1/board-nokia770.c b/arch/arm/mach-omap1/board-nokia770.c
index 9f6c7af3a4e7..dd3a3ad797ea 100644
--- a/arch/arm/mach-omap1/board-nokia770.c
+++ b/arch/arm/mach-omap1/board-nokia770.c
@@ -7,6 +7,7 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
+#include <linux/clkdev.h>
#include <linux/irq.h>
#include <linux/gpio.h>
#include <linux/kernel.h>
@@ -14,7 +15,6 @@
#include <linux/mutex.h>
#include <linux/platform_device.h>
#include <linux/input.h>
-#include <linux/clk.h>
#include <linux/omapfb.h>
#include <linux/spi/spi.h>
diff --git a/arch/arm/mach-omap1/pm_bus.c b/arch/arm/mach-omap1/pm_bus.c
index c40e209de65c..667c1637ff91 100644
--- a/arch/arm/mach-omap1/pm_bus.c
+++ b/arch/arm/mach-omap1/pm_bus.c
@@ -21,48 +21,15 @@
#include "soc.h"
-#ifdef CONFIG_PM
-static int omap1_pm_runtime_suspend(struct device *dev)
-{
- int ret;
-
- dev_dbg(dev, "%s\n", __func__);
-
- ret = pm_generic_runtime_suspend(dev);
- if (ret)
- return ret;
-
- ret = pm_clk_suspend(dev);
- if (ret) {
- pm_generic_runtime_resume(dev);
- return ret;
- }
-
- return 0;
-}
-
-static int omap1_pm_runtime_resume(struct device *dev)
-{
- dev_dbg(dev, "%s\n", __func__);
-
- pm_clk_resume(dev);
- return pm_generic_runtime_resume(dev);
-}
-
static struct dev_pm_domain default_pm_domain = {
.ops = {
- .runtime_suspend = omap1_pm_runtime_suspend,
- .runtime_resume = omap1_pm_runtime_resume,
+ USE_PM_CLK_RUNTIME_OPS
USE_PLATFORM_PM_SLEEP_OPS
},
};
-#define OMAP1_PM_DOMAIN (&default_pm_domain)
-#else
-#define OMAP1_PM_DOMAIN NULL
-#endif /* CONFIG_PM */
static struct pm_clk_notifier_block platform_bus_notifier = {
- .pm_domain = OMAP1_PM_DOMAIN,
+ .pm_domain = &default_pm_domain,
.con_ids = { "ick", "fck", NULL, },
};
diff --git a/arch/arm/mach-omap2/clkt2xxx_virt_prcm_set.c b/arch/arm/mach-omap2/clkt2xxx_virt_prcm_set.c
index 85e0b0c06718..b64d717bfab6 100644
--- a/arch/arm/mach-omap2/clkt2xxx_virt_prcm_set.c
+++ b/arch/arm/mach-omap2/clkt2xxx_virt_prcm_set.c
@@ -232,14 +232,12 @@ void omap2xxx_clkt_vps_init(void)
struct clk_hw_omap *hw = NULL;
struct clk *clk;
const char *parent_name = "mpu_ck";
- struct clk_lookup *lookup = NULL;
omap2xxx_clkt_vps_late_init();
omap2xxx_clkt_vps_check_bootloader_rates();
hw = kzalloc(sizeof(*hw), GFP_KERNEL);
- lookup = kzalloc(sizeof(*lookup), GFP_KERNEL);
- if (!hw || !lookup)
+ if (!hw)
goto cleanup;
init.name = "virt_prcm_set";
init.ops = &virt_prcm_set_ops;
@@ -249,15 +247,9 @@ void omap2xxx_clkt_vps_init(void)
hw->hw.init = &init;
clk = clk_register(NULL, &hw->hw);
-
- lookup->dev_id = NULL;
- lookup->con_id = "cpufreq_ck";
- lookup->clk = clk;
-
- clkdev_add(lookup);
+ clkdev_create(clk, "cpufreq_ck", NULL);
return;
cleanup:
kfree(hw);
- kfree(lookup);
}
#endif
diff --git a/arch/arm/mach-omap2/display.c b/arch/arm/mach-omap2/display.c
index f492ae147c6a..6ab13d18c636 100644
--- a/arch/arm/mach-omap2/display.c
+++ b/arch/arm/mach-omap2/display.c
@@ -287,6 +287,8 @@ static enum omapdss_version __init omap_display_get_version(void)
return OMAPDSS_VER_OMAP5;
else if (soc_is_am43xx())
return OMAPDSS_VER_AM43xx;
+ else if (soc_is_dra7xx())
+ return OMAPDSS_VER_DRA7xx;
else
return OMAPDSS_VER_UNKNOWN;
}
@@ -568,25 +570,25 @@ void __init omapdss_early_init_of(void)
}
+static const char * const omapdss_compat_names[] __initconst = {
+ "ti,omap2-dss",
+ "ti,omap3-dss",
+ "ti,omap4-dss",
+ "ti,omap5-dss",
+ "ti,dra7-dss",
+};
+
struct device_node * __init omapdss_find_dss_of_node(void)
{
struct device_node *node;
+ int i;
- node = of_find_compatible_node(NULL, NULL, "ti,omap2-dss");
- if (node)
- return node;
-
- node = of_find_compatible_node(NULL, NULL, "ti,omap3-dss");
- if (node)
- return node;
-
- node = of_find_compatible_node(NULL, NULL, "ti,omap4-dss");
- if (node)
- return node;
-
- node = of_find_compatible_node(NULL, NULL, "ti,omap5-dss");
- if (node)
- return node;
+ for (i = 0; i < ARRAY_SIZE(omapdss_compat_names); ++i) {
+ node = of_find_compatible_node(NULL, NULL,
+ omapdss_compat_names[i]);
+ if (node)
+ return node;
+ }
return NULL;
}
diff --git a/arch/arm/mach-omap2/omap_device.c b/arch/arm/mach-omap2/omap_device.c
index 4a7303cf563e..4cb8fd9f741f 100644
--- a/arch/arm/mach-omap2/omap_device.c
+++ b/arch/arm/mach-omap2/omap_device.c
@@ -47,7 +47,7 @@ static void _add_clkdev(struct omap_device *od, const char *clk_alias,
const char *clk_name)
{
struct clk *r;
- struct clk_lookup *l;
+ int rc;
if (!clk_alias || !clk_name)
return;
@@ -62,21 +62,15 @@ static void _add_clkdev(struct omap_device *od, const char *clk_alias,
return;
}
- r = clk_get(NULL, clk_name);
- if (IS_ERR(r)) {
- dev_err(&od->pdev->dev,
- "clk_get for %s failed\n", clk_name);
- return;
+ rc = clk_add_alias(clk_alias, dev_name(&od->pdev->dev), clk_name, NULL);
+ if (rc) {
+ if (rc == -ENODEV || rc == -ENOMEM)
+ dev_err(&od->pdev->dev,
+ "clkdev_alloc for %s failed\n", clk_alias);
+ else
+ dev_err(&od->pdev->dev,
+ "clk_get for %s failed\n", clk_name);
}
-
- l = clkdev_alloc(r, clk_alias, dev_name(&od->pdev->dev));
- if (!l) {
- dev_err(&od->pdev->dev,
- "clkdev_alloc for %s failed\n", clk_alias);
- return;
- }
-
- clkdev_add(l);
}
/**
@@ -690,11 +684,8 @@ struct dev_pm_domain omap_device_pm_domain = {
SET_RUNTIME_PM_OPS(_od_runtime_suspend, _od_runtime_resume,
NULL)
USE_PLATFORM_PM_SLEEP_OPS
- .suspend_noirq = _od_suspend_noirq,
- .resume_noirq = _od_resume_noirq,
- .freeze_noirq = _od_suspend_noirq,
- .thaw_noirq = _od_resume_noirq,
- .restore_noirq = _od_resume_noirq,
+ SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(_od_suspend_noirq,
+ _od_resume_noirq)
}
};
diff --git a/arch/arm/mach-omap2/omap_hwmod_7xx_data.c b/arch/arm/mach-omap2/omap_hwmod_7xx_data.c
index a0411f32e8b1..2606c6608bd8 100644
--- a/arch/arm/mach-omap2/omap_hwmod_7xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_7xx_data.c
@@ -49,6 +49,27 @@
*/
/*
+ * 'dmm' class
+ * instance(s): dmm
+ */
+static struct omap_hwmod_class dra7xx_dmm_hwmod_class = {
+ .name = "dmm",
+};
+
+/* dmm */
+static struct omap_hwmod dra7xx_dmm_hwmod = {
+ .name = "dmm",
+ .class = &dra7xx_dmm_hwmod_class,
+ .clkdm_name = "emif_clkdm",
+ .prcm = {
+ .omap4 = {
+ .clkctrl_offs = DRA7XX_CM_EMIF_DMM_CLKCTRL_OFFSET,
+ .context_offs = DRA7XX_RM_EMIF_DMM_CONTEXT_OFFSET,
+ },
+ },
+};
+
+/*
* 'l3' class
* instance(s): l3_instr, l3_main_1, l3_main_2
*/
@@ -438,6 +459,7 @@ static struct omap_hwmod_opt_clk dss_opt_clks[] = {
{ .role = "video2_clk", .clk = "dss_video2_clk" },
{ .role = "video1_clk", .clk = "dss_video1_clk" },
{ .role = "hdmi_clk", .clk = "dss_hdmi_clk" },
+ { .role = "hdcp_clk", .clk = "dss_deshdcp_clk" },
};
static struct omap_hwmod dra7xx_dss_hwmod = {
@@ -500,6 +522,7 @@ static struct omap_hwmod dra7xx_dss_dispc_hwmod = {
},
},
.dev_attr = &dss_dispc_dev_attr,
+ .parent_hwmod = &dra7xx_dss_hwmod,
};
/*
@@ -541,6 +564,7 @@ static struct omap_hwmod dra7xx_dss_hdmi_hwmod = {
},
.opt_clks = dss_hdmi_opt_clks,
.opt_clks_cnt = ARRAY_SIZE(dss_hdmi_opt_clks),
+ .parent_hwmod = &dra7xx_dss_hwmod,
};
/*
@@ -2321,6 +2345,14 @@ static struct omap_hwmod dra7xx_wd_timer2_hwmod = {
* Interfaces
*/
+/* l3_main_1 -> dmm */
+static struct omap_hwmod_ocp_if dra7xx_l3_main_1__dmm = {
+ .master = &dra7xx_l3_main_1_hwmod,
+ .slave = &dra7xx_dmm_hwmod,
+ .clk = "l3_iclk_div",
+ .user = OCP_USER_SDMA,
+};
+
/* l3_main_2 -> l3_instr */
static struct omap_hwmod_ocp_if dra7xx_l3_main_2__l3_instr = {
.master = &dra7xx_l3_main_2_hwmod,
@@ -3289,6 +3321,7 @@ static struct omap_hwmod_ocp_if dra7xx_l4_wkup__wd_timer2 = {
};
static struct omap_hwmod_ocp_if *dra7xx_hwmod_ocp_ifs[] __initdata = {
+ &dra7xx_l3_main_1__dmm,
&dra7xx_l3_main_2__l3_instr,
&dra7xx_l4_cfg__l3_main_1,
&dra7xx_mpu__l3_main_1,
diff --git a/arch/arm/mach-pxa/eseries.c b/arch/arm/mach-pxa/eseries.c
index 11863be59066..16dc95f68125 100644
--- a/arch/arm/mach-pxa/eseries.c
+++ b/arch/arm/mach-pxa/eseries.c
@@ -10,6 +10,7 @@
*
*/
+#include <linux/clkdev.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/clk-provider.h>
diff --git a/arch/arm/mach-pxa/lubbock.c b/arch/arm/mach-pxa/lubbock.c
index 2d4bf1fb7312..6de32fa0e251 100644
--- a/arch/arm/mach-pxa/lubbock.c
+++ b/arch/arm/mach-pxa/lubbock.c
@@ -11,6 +11,7 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
+#include <linux/clkdev.h>
#include <linux/gpio.h>
#include <linux/gpio/machine.h>
#include <linux/module.h>
diff --git a/arch/arm/mach-pxa/tosa.c b/arch/arm/mach-pxa/tosa.c
index 93bf4ef44d2c..e6e27c0468e4 100644
--- a/arch/arm/mach-pxa/tosa.c
+++ b/arch/arm/mach-pxa/tosa.c
@@ -12,6 +12,7 @@
*
*/
+#include <linux/clkdev.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/platform_device.h>
diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c
index c309593abdb2..44575edc44b1 100644
--- a/arch/arm/mach-realview/core.c
+++ b/arch/arm/mach-realview/core.c
@@ -35,20 +35,19 @@
#include <linux/mtd/physmap.h>
#include <linux/memblock.h>
+#include <clocksource/timer-sp804.h>
+
#include <mach/hardware.h>
#include <asm/irq.h>
#include <asm/mach-types.h>
-#include <asm/hardware/arm_timer.h>
#include <asm/hardware/icst.h>
#include <asm/mach/arch.h>
#include <asm/mach/irq.h>
#include <asm/mach/map.h>
-
#include <mach/platform.h>
#include <mach/irqs.h>
-#include <asm/hardware/timer-sp.h>
#include <plat/sched_clock.h>
@@ -381,10 +380,10 @@ void __init realview_timer_init(unsigned int timer_irq)
/*
* Initialise to a known state (all timers off)
*/
- writel(0, timer0_va_base + TIMER_CTRL);
- writel(0, timer1_va_base + TIMER_CTRL);
- writel(0, timer2_va_base + TIMER_CTRL);
- writel(0, timer3_va_base + TIMER_CTRL);
+ sp804_timer_disable(timer0_va_base);
+ sp804_timer_disable(timer1_va_base);
+ sp804_timer_disable(timer2_va_base);
+ sp804_timer_disable(timer3_va_base);
sp804_clocksource_init(timer3_va_base, "timer3");
sp804_clockevents_init(timer0_va_base, timer_irq, "timer0");
diff --git a/arch/arm/mach-rockchip/platsmp.c b/arch/arm/mach-rockchip/platsmp.c
index 2e6ab67e2284..8fcec1cc101e 100644
--- a/arch/arm/mach-rockchip/platsmp.c
+++ b/arch/arm/mach-rockchip/platsmp.c
@@ -119,8 +119,7 @@ static int pmu_set_power_domain(int pd, bool on)
* Handling of CPU cores
*/
-static int __cpuinit rockchip_boot_secondary(unsigned int cpu,
- struct task_struct *idle)
+static int rockchip_boot_secondary(unsigned int cpu, struct task_struct *idle)
{
int ret;
diff --git a/arch/arm/mach-sa1100/Makefile b/arch/arm/mach-sa1100/Makefile
index 61ff91e76e0a..ebc4d58e1a32 100644
--- a/arch/arm/mach-sa1100/Makefile
+++ b/arch/arm/mach-sa1100/Makefile
@@ -3,7 +3,7 @@
#
# Common support
-obj-y := clock.o generic.o irq.o #nmi-oopser.o
+obj-y := clock.o generic.o #nmi-oopser.o
# Specific board support
obj-$(CONFIG_SA1100_ASSABET) += assabet.o
diff --git a/arch/arm/mach-sa1100/generic.c b/arch/arm/mach-sa1100/generic.c
index 40e0d8619a2d..345e63f4eb71 100644
--- a/arch/arm/mach-sa1100/generic.c
+++ b/arch/arm/mach-sa1100/generic.c
@@ -20,9 +20,12 @@
#include <linux/ioport.h>
#include <linux/platform_device.h>
#include <linux/reboot.h>
+#include <linux/irqchip/irq-sa11x0.h>
#include <video/sa1100fb.h>
+#include <soc/sa1100/pwer.h>
+
#include <asm/div64.h>
#include <asm/mach/map.h>
#include <asm/mach/flash.h>
@@ -375,6 +378,18 @@ void __init sa1100_timer_init(void)
pxa_timer_nodt_init(IRQ_OST0, io_p2v(0x90000000), 3686400);
}
+static struct resource irq_resource =
+ DEFINE_RES_MEM_NAMED(0x90050000, SZ_64K, "irqs");
+
+void __init sa1100_init_irq(void)
+{
+ request_resource(&iomem_resource, &irq_resource);
+
+ sa11x0_init_irq_nodt(IRQ_GPIO0_SC, irq_resource.start);
+
+ sa1100_init_gpio();
+}
+
/*
* Disable the memory bus request/grant signals on the SA1110 to
* ensure that we don't receive spurious memory requests. We set
@@ -416,3 +431,25 @@ void sa1110_mb_enable(void)
local_irq_restore(flags);
}
+int sa11x0_gpio_set_wake(unsigned int gpio, unsigned int on)
+{
+ if (on)
+ PWER |= BIT(gpio);
+ else
+ PWER &= ~BIT(gpio);
+
+ return 0;
+}
+
+int sa11x0_sc_set_wake(unsigned int irq, unsigned int on)
+{
+ if (BIT(irq) != IC_RTCAlrm)
+ return -EINVAL;
+
+ if (on)
+ PWER |= PWER_RTC;
+ else
+ PWER &= ~PWER_RTC;
+
+ return 0;
+}
diff --git a/arch/arm/mach-sa1100/irq.c b/arch/arm/mach-sa1100/irq.c
deleted file mode 100644
index 65aebfa66fe5..000000000000
--- a/arch/arm/mach-sa1100/irq.c
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- * linux/arch/arm/mach-sa1100/irq.c
- *
- * Copyright (C) 1999-2001 Nicolas Pitre
- *
- * Generic IRQ handling for the SA11x0, GPIO 11-27 IRQ demultiplexing.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/interrupt.h>
-#include <linux/io.h>
-#include <linux/irq.h>
-#include <linux/irqdomain.h>
-#include <linux/ioport.h>
-#include <linux/syscore_ops.h>
-
-#include <mach/hardware.h>
-#include <mach/irqs.h>
-#include <asm/mach/irq.h>
-#include <asm/exception.h>
-
-#include "generic.h"
-
-
-/*
- * We don't need to ACK IRQs on the SA1100 unless they're GPIOs
- * this is for internal IRQs i.e. from IRQ LCD to RTCAlrm.
- */
-static void sa1100_mask_irq(struct irq_data *d)
-{
- ICMR &= ~BIT(d->hwirq);
-}
-
-static void sa1100_unmask_irq(struct irq_data *d)
-{
- ICMR |= BIT(d->hwirq);
-}
-
-/*
- * Apart form GPIOs, only the RTC alarm can be a wakeup event.
- */
-static int sa1100_set_wake(struct irq_data *d, unsigned int on)
-{
- if (BIT(d->hwirq) == IC_RTCAlrm) {
- if (on)
- PWER |= PWER_RTC;
- else
- PWER &= ~PWER_RTC;
- return 0;
- }
- return -EINVAL;
-}
-
-static struct irq_chip sa1100_normal_chip = {
- .name = "SC",
- .irq_ack = sa1100_mask_irq,
- .irq_mask = sa1100_mask_irq,
- .irq_unmask = sa1100_unmask_irq,
- .irq_set_wake = sa1100_set_wake,
-};
-
-static int sa1100_normal_irqdomain_map(struct irq_domain *d,
- unsigned int irq, irq_hw_number_t hwirq)
-{
- irq_set_chip_and_handler(irq, &sa1100_normal_chip,
- handle_level_irq);
- set_irq_flags(irq, IRQF_VALID);
-
- return 0;
-}
-
-static struct irq_domain_ops sa1100_normal_irqdomain_ops = {
- .map = sa1100_normal_irqdomain_map,
- .xlate = irq_domain_xlate_onetwocell,
-};
-
-static struct irq_domain *sa1100_normal_irqdomain;
-
-static struct resource irq_resource =
- DEFINE_RES_MEM_NAMED(0x90050000, SZ_64K, "irqs");
-
-static struct sa1100irq_state {
- unsigned int saved;
- unsigned int icmr;
- unsigned int iclr;
- unsigned int iccr;
-} sa1100irq_state;
-
-static int sa1100irq_suspend(void)
-{
- struct sa1100irq_state *st = &sa1100irq_state;
-
- st->saved = 1;
- st->icmr = ICMR;
- st->iclr = ICLR;
- st->iccr = ICCR;
-
- /*
- * Disable all GPIO-based interrupts.
- */
- ICMR &= ~(IC_GPIO11_27|IC_GPIO10|IC_GPIO9|IC_GPIO8|IC_GPIO7|
- IC_GPIO6|IC_GPIO5|IC_GPIO4|IC_GPIO3|IC_GPIO2|
- IC_GPIO1|IC_GPIO0);
-
- return 0;
-}
-
-static void sa1100irq_resume(void)
-{
- struct sa1100irq_state *st = &sa1100irq_state;
-
- if (st->saved) {
- ICCR = st->iccr;
- ICLR = st->iclr;
-
- ICMR = st->icmr;
- }
-}
-
-static struct syscore_ops sa1100irq_syscore_ops = {
- .suspend = sa1100irq_suspend,
- .resume = sa1100irq_resume,
-};
-
-static int __init sa1100irq_init_devicefs(void)
-{
- register_syscore_ops(&sa1100irq_syscore_ops);
- return 0;
-}
-
-device_initcall(sa1100irq_init_devicefs);
-
-static asmlinkage void __exception_irq_entry
-sa1100_handle_irq(struct pt_regs *regs)
-{
- uint32_t icip, icmr, mask;
-
- do {
- icip = (ICIP);
- icmr = (ICMR);
- mask = icip & icmr;
-
- if (mask == 0)
- break;
-
- handle_domain_irq(sa1100_normal_irqdomain,
- ffs(mask) - 1, regs);
- } while (1);
-}
-
-void __init sa1100_init_irq(void)
-{
- request_resource(&iomem_resource, &irq_resource);
-
- /* disable all IRQs */
- ICMR = 0;
-
- /* all IRQs are IRQ, not FIQ */
- ICLR = 0;
-
- /*
- * Whatever the doc says, this has to be set for the wait-on-irq
- * instruction to work... on a SA1100 rev 9 at least.
- */
- ICCR = 1;
-
- sa1100_normal_irqdomain = irq_domain_add_simple(NULL,
- 32, IRQ_GPIO0_SC,
- &sa1100_normal_irqdomain_ops, NULL);
-
- set_handle_irq(sa1100_handle_irq);
-
- sa1100_init_gpio();
-}
diff --git a/arch/arm/mach-sa1100/neponset.c b/arch/arm/mach-sa1100/neponset.c
index af868d258e66..99d9a3b1bf34 100644
--- a/arch/arm/mach-sa1100/neponset.c
+++ b/arch/arm/mach-sa1100/neponset.c
@@ -327,8 +327,7 @@ static int neponset_probe(struct platform_device *dev)
irq_set_chip(d->irq_base + NEP_IRQ_SA1111, &nochip);
irq_set_irq_type(irq, IRQ_TYPE_EDGE_RISING);
- irq_set_handler_data(irq, d);
- irq_set_chained_handler(irq, neponset_irq_handler);
+ irq_set_chained_handler_and_data(irq, neponset_irq_handler, d);
/*
* We would set IRQ_GPIO25 to be a wake-up IRQ, but unfortunately
diff --git a/arch/arm/mach-shmobile/setup-r8a7740.c b/arch/arm/mach-shmobile/setup-r8a7740.c
index 9832e48396a4..00291cc1772d 100644
--- a/arch/arm/mach-shmobile/setup-r8a7740.c
+++ b/arch/arm/mach-shmobile/setup-r8a7740.c
@@ -13,7 +13,6 @@
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
-#include <linux/delay.h>
#include <linux/dma-mapping.h>
#include <linux/kernel.h>
#include <linux/init.h>
@@ -690,56 +689,6 @@ void __init r8a7740_meram_workaround(void)
}
}
-#define ICCR 0x0004
-#define ICSTART 0x0070
-
-#define i2c_read(reg, offset) ioread8(reg + offset)
-#define i2c_write(reg, offset, data) iowrite8(data, reg + offset)
-
-/*
- * r8a7740 chip has lasting errata on I2C I/O pad reset.
- * this is work-around for it.
- */
-static void r8a7740_i2c_workaround(struct platform_device *pdev)
-{
- struct resource *res;
- void __iomem *reg;
-
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (unlikely(!res)) {
- pr_err("r8a7740 i2c workaround fail (cannot find resource)\n");
- return;
- }
-
- reg = ioremap(res->start, resource_size(res));
- if (unlikely(!reg)) {
- pr_err("r8a7740 i2c workaround fail (cannot map IO)\n");
- return;
- }
-
- i2c_write(reg, ICCR, i2c_read(reg, ICCR) | 0x80);
- i2c_read(reg, ICCR); /* dummy read */
-
- i2c_write(reg, ICSTART, i2c_read(reg, ICSTART) | 0x10);
- i2c_read(reg, ICSTART); /* dummy read */
-
- udelay(10);
-
- i2c_write(reg, ICCR, 0x01);
- i2c_write(reg, ICSTART, 0x00);
-
- udelay(10);
-
- i2c_write(reg, ICCR, 0x10);
- udelay(10);
- i2c_write(reg, ICCR, 0x00);
- udelay(10);
- i2c_write(reg, ICCR, 0x10);
- udelay(10);
-
- iounmap(reg);
-}
-
void __init r8a7740_add_standard_devices(void)
{
static struct pm_domain_device domain_devices[] __initdata = {
@@ -766,10 +715,6 @@ void __init r8a7740_add_standard_devices(void)
{ "A3SP", &usb_dma_device },
};
- /* I2C work-around */
- r8a7740_i2c_workaround(&i2c0_device);
- r8a7740_i2c_workaround(&i2c1_device);
-
r8a7740_init_pm_domains();
/* add devices */
diff --git a/arch/arm/mach-socfpga/pm.c b/arch/arm/mach-socfpga/pm.c
index 1ed89fc2b7a8..6a4199f2bffb 100644
--- a/arch/arm/mach-socfpga/pm.c
+++ b/arch/arm/mach-socfpga/pm.c
@@ -56,7 +56,7 @@ static int socfpga_setup_ocram_self_refresh(void)
goto put_node;
}
- ocram_pool = dev_get_gen_pool(&pdev->dev);
+ ocram_pool = gen_pool_get(&pdev->dev);
if (!ocram_pool) {
pr_warn("%s: ocram pool unavailable!\n", __func__);
ret = -ENODEV;
diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c
index 6ea09fe53426..23a04fe5d2ad 100644
--- a/arch/arm/mach-versatile/core.c
+++ b/arch/arm/mach-versatile/core.c
@@ -41,8 +41,9 @@
#include <linux/bitops.h>
#include <linux/reboot.h>
+#include <clocksource/timer-sp804.h>
+
#include <asm/irq.h>
-#include <asm/hardware/arm_timer.h>
#include <asm/hardware/icst.h>
#include <asm/mach-types.h>
@@ -52,7 +53,6 @@
#include <asm/mach/map.h>
#include <mach/hardware.h>
#include <mach/platform.h>
-#include <asm/hardware/timer-sp.h>
#include <plat/sched_clock.h>
@@ -798,10 +798,10 @@ void __init versatile_timer_init(void)
/*
* Initialise to a known state (all timers off)
*/
- writel(0, TIMER0_VA_BASE + TIMER_CTRL);
- writel(0, TIMER1_VA_BASE + TIMER_CTRL);
- writel(0, TIMER2_VA_BASE + TIMER_CTRL);
- writel(0, TIMER3_VA_BASE + TIMER_CTRL);
+ sp804_timer_disable(TIMER0_VA_BASE);
+ sp804_timer_disable(TIMER1_VA_BASE);
+ sp804_timer_disable(TIMER2_VA_BASE);
+ sp804_timer_disable(TIMER3_VA_BASE);
sp804_clocksource_init(TIMER3_VA_BASE, "timer3");
sp804_clockevents_init(TIMER0_VA_BASE, IRQ_TIMERINT0_1, "timer0");
diff --git a/arch/arm/mach-vexpress/spc.c b/arch/arm/mach-vexpress/spc.c
index f61158c6ce71..5766ce2be32b 100644
--- a/arch/arm/mach-vexpress/spc.c
+++ b/arch/arm/mach-vexpress/spc.c
@@ -589,4 +589,4 @@ static int __init ve_spc_clk_init(void)
platform_device_register_simple("vexpress-spc-cpufreq", -1, NULL, 0);
return 0;
}
-module_init(ve_spc_clk_init);
+device_initcall(ve_spc_clk_init);
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index b4f92b9a13ac..7c6b976ab8d3 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -6,7 +6,7 @@ comment "Processor Type"
# ARM7TDMI
config CPU_ARM7TDMI
- bool "Support ARM7TDMI processor"
+ bool
depends on !MMU
select CPU_32v4T
select CPU_ABRT_LV4T
@@ -56,7 +56,7 @@ config CPU_ARM740T
# ARM9TDMI
config CPU_ARM9TDMI
- bool "Support ARM9TDMI processor"
+ bool
depends on !MMU
select CPU_32v4T
select CPU_ABRT_NOMMU
@@ -604,6 +604,22 @@ config CPU_USE_DOMAINS
This option enables or disables the use of domain switching
via the set_fs() function.
+config CPU_V7M_NUM_IRQ
+ int "Number of external interrupts connected to the NVIC"
+ depends on CPU_V7M
+ default 90 if ARCH_STM32
+ default 38 if ARCH_EFM32
+ default 112 if SOC_VF610
+ default 240
+ help
+ This option indicates the number of interrupts connected to the NVIC.
+ The value can be larger than the real number of interrupts supported
+ by the system, but must not be lower.
+ The default value is 240, corresponding to the maximum number of
+ interrupts supported by the NVIC on Cortex-M family.
+
+ If unsure, keep default value.
+
#
# CPU supports 36-bit I/O
#
@@ -624,6 +640,10 @@ config ARM_LPAE
If unsure, say N.
+config ARM_PV_FIXUP
+ def_bool y
+ depends on ARM_LPAE && ARM_PATCH_PHYS_VIRT && ARCH_KEYSTONE
+
config ARCH_PHYS_ADDR_T_64BIT
def_bool ARM_LPAE
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index d3afdf9eb65a..57c8df500e8c 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -18,6 +18,7 @@ obj-$(CONFIG_MODULES) += proc-syms.o
obj-$(CONFIG_ALIGNMENT_TRAP) += alignment.o
obj-$(CONFIG_HIGHMEM) += highmem.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
+obj-$(CONFIG_ARM_PV_FIXUP) += pv-fixup-asm.o
obj-$(CONFIG_CPU_ABRT_NOMMU) += abort-nommu.o
obj-$(CONFIG_CPU_ABRT_EV4) += abort-ev4.o
@@ -55,6 +56,8 @@ obj-$(CONFIG_CPU_XSCALE) += copypage-xscale.o
obj-$(CONFIG_CPU_XSC3) += copypage-xsc3.o
obj-$(CONFIG_CPU_COPY_FA) += copypage-fa.o
+CFLAGS_copypage-feroceon.o := -march=armv5te
+
obj-$(CONFIG_CPU_TLB_V4WT) += tlb-v4.o
obj-$(CONFIG_CPU_TLB_V4WB) += tlb-v4wb.o
obj-$(CONFIG_CPU_TLB_V4WBI) += tlb-v4wbi.o
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index e309c8f35af5..71b3d3309024 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -38,10 +38,11 @@ struct l2c_init_data {
unsigned way_size_0;
unsigned num_lock;
void (*of_parse)(const struct device_node *, u32 *, u32 *);
- void (*enable)(void __iomem *, u32, unsigned);
+ void (*enable)(void __iomem *, unsigned);
void (*fixup)(void __iomem *, u32, struct outer_cache_fns *);
void (*save)(void __iomem *);
void (*configure)(void __iomem *);
+ void (*unlock)(void __iomem *, unsigned);
struct outer_cache_fns outer_cache;
};
@@ -110,14 +111,6 @@ static inline void l2c_unlock(void __iomem *base, unsigned num)
static void l2c_configure(void __iomem *base)
{
- if (outer_cache.configure) {
- outer_cache.configure(&l2x0_saved_regs);
- return;
- }
-
- if (l2x0_data->configure)
- l2x0_data->configure(base);
-
l2c_write_sec(l2x0_saved_regs.aux_ctrl, base, L2X0_AUX_CTRL);
}
@@ -125,18 +118,16 @@ static void l2c_configure(void __iomem *base)
* Enable the L2 cache controller. This function must only be
* called when the cache controller is known to be disabled.
*/
-static void l2c_enable(void __iomem *base, u32 aux, unsigned num_lock)
+static void l2c_enable(void __iomem *base, unsigned num_lock)
{
unsigned long flags;
- /* Do not touch the controller if already enabled. */
- if (readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)
- return;
-
- l2x0_saved_regs.aux_ctrl = aux;
- l2c_configure(base);
+ if (outer_cache.configure)
+ outer_cache.configure(&l2x0_saved_regs);
+ else
+ l2x0_data->configure(base);
- l2c_unlock(base, num_lock);
+ l2x0_data->unlock(base, num_lock);
local_irq_save(flags);
__l2c_op_way(base + L2X0_INV_WAY);
@@ -163,7 +154,11 @@ static void l2c_save(void __iomem *base)
static void l2c_resume(void)
{
- l2c_enable(l2x0_base, l2x0_saved_regs.aux_ctrl, l2x0_data->num_lock);
+ void __iomem *base = l2x0_base;
+
+ /* Do not touch the controller if already enabled. */
+ if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN))
+ l2c_enable(base, l2x0_data->num_lock);
}
/*
@@ -252,6 +247,8 @@ static const struct l2c_init_data l2c210_data __initconst = {
.num_lock = 1,
.enable = l2c_enable,
.save = l2c_save,
+ .configure = l2c_configure,
+ .unlock = l2c_unlock,
.outer_cache = {
.inv_range = l2c210_inv_range,
.clean_range = l2c210_clean_range,
@@ -391,16 +388,22 @@ static void l2c220_sync(void)
raw_spin_unlock_irqrestore(&l2x0_lock, flags);
}
-static void l2c220_enable(void __iomem *base, u32 aux, unsigned num_lock)
+static void l2c220_enable(void __iomem *base, unsigned num_lock)
{
/*
* Always enable non-secure access to the lockdown registers -
* we write to them as part of the L2C enable sequence so they
* need to be accessible.
*/
- aux |= L220_AUX_CTRL_NS_LOCKDOWN;
+ l2x0_saved_regs.aux_ctrl |= L220_AUX_CTRL_NS_LOCKDOWN;
- l2c_enable(base, aux, num_lock);
+ l2c_enable(base, num_lock);
+}
+
+static void l2c220_unlock(void __iomem *base, unsigned num_lock)
+{
+ if (readl_relaxed(base + L2X0_AUX_CTRL) & L220_AUX_CTRL_NS_LOCKDOWN)
+ l2c_unlock(base, num_lock);
}
static const struct l2c_init_data l2c220_data = {
@@ -409,6 +412,8 @@ static const struct l2c_init_data l2c220_data = {
.num_lock = 1,
.enable = l2c220_enable,
.save = l2c_save,
+ .configure = l2c_configure,
+ .unlock = l2c220_unlock,
.outer_cache = {
.inv_range = l2c220_inv_range,
.clean_range = l2c220_clean_range,
@@ -569,6 +574,8 @@ static void l2c310_configure(void __iomem *base)
{
unsigned revision;
+ l2c_configure(base);
+
/* restore pl310 setup */
l2c_write_sec(l2x0_saved_regs.tag_latency, base,
L310_TAG_LATENCY_CTRL);
@@ -603,10 +610,11 @@ static int l2c310_cpu_enable_flz(struct notifier_block *nb, unsigned long act, v
return NOTIFY_OK;
}
-static void __init l2c310_enable(void __iomem *base, u32 aux, unsigned num_lock)
+static void __init l2c310_enable(void __iomem *base, unsigned num_lock)
{
unsigned rev = readl_relaxed(base + L2X0_CACHE_ID) & L2X0_CACHE_ID_RTL_MASK;
bool cortex_a9 = read_cpuid_part() == ARM_CPU_PART_CORTEX_A9;
+ u32 aux = l2x0_saved_regs.aux_ctrl;
if (rev >= L310_CACHE_ID_RTL_R2P0) {
if (cortex_a9) {
@@ -649,9 +657,9 @@ static void __init l2c310_enable(void __iomem *base, u32 aux, unsigned num_lock)
* we write to them as part of the L2C enable sequence so they
* need to be accessible.
*/
- aux |= L310_AUX_CTRL_NS_LOCKDOWN;
+ l2x0_saved_regs.aux_ctrl = aux | L310_AUX_CTRL_NS_LOCKDOWN;
- l2c_enable(base, aux, num_lock);
+ l2c_enable(base, num_lock);
/* Read back resulting AUX_CTRL value as it could have been altered. */
aux = readl_relaxed(base + L2X0_AUX_CTRL);
@@ -755,6 +763,12 @@ static void l2c310_resume(void)
set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1));
}
+static void l2c310_unlock(void __iomem *base, unsigned num_lock)
+{
+ if (readl_relaxed(base + L2X0_AUX_CTRL) & L310_AUX_CTRL_NS_LOCKDOWN)
+ l2c_unlock(base, num_lock);
+}
+
static const struct l2c_init_data l2c310_init_fns __initconst = {
.type = "L2C-310",
.way_size_0 = SZ_8K,
@@ -763,6 +777,7 @@ static const struct l2c_init_data l2c310_init_fns __initconst = {
.fixup = l2c310_fixup,
.save = l2c310_save,
.configure = l2c310_configure,
+ .unlock = l2c310_unlock,
.outer_cache = {
.inv_range = l2c210_inv_range,
.clean_range = l2c210_clean_range,
@@ -856,8 +871,11 @@ static int __init __l2c_init(const struct l2c_init_data *data,
* Check if l2x0 controller is already enabled. If we are booting
* in non-secure mode accessing the below registers will fault.
*/
- if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN))
- data->enable(l2x0_base, aux, data->num_lock);
+ if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
+ l2x0_saved_regs.aux_ctrl = aux;
+
+ data->enable(l2x0_base, data->num_lock);
+ }
outer_cache = fns;
@@ -1066,6 +1084,8 @@ static const struct l2c_init_data of_l2c210_data __initconst = {
.of_parse = l2x0_of_parse,
.enable = l2c_enable,
.save = l2c_save,
+ .configure = l2c_configure,
+ .unlock = l2c_unlock,
.outer_cache = {
.inv_range = l2c210_inv_range,
.clean_range = l2c210_clean_range,
@@ -1084,6 +1104,8 @@ static const struct l2c_init_data of_l2c220_data __initconst = {
.of_parse = l2x0_of_parse,
.enable = l2c220_enable,
.save = l2c_save,
+ .configure = l2c_configure,
+ .unlock = l2c220_unlock,
.outer_cache = {
.inv_range = l2c220_inv_range,
.clean_range = l2c220_clean_range,
@@ -1199,6 +1221,26 @@ static void __init l2c310_of_parse(const struct device_node *np,
pr_err("L2C-310 OF arm,prefetch-offset property value is missing\n");
}
+ ret = of_property_read_u32(np, "prefetch-data", &val);
+ if (ret == 0) {
+ if (val)
+ prefetch |= L310_PREFETCH_CTRL_DATA_PREFETCH;
+ else
+ prefetch &= ~L310_PREFETCH_CTRL_DATA_PREFETCH;
+ } else if (ret != -EINVAL) {
+ pr_err("L2C-310 OF prefetch-data property value is missing\n");
+ }
+
+ ret = of_property_read_u32(np, "prefetch-instr", &val);
+ if (ret == 0) {
+ if (val)
+ prefetch |= L310_PREFETCH_CTRL_INSTR_PREFETCH;
+ else
+ prefetch &= ~L310_PREFETCH_CTRL_INSTR_PREFETCH;
+ } else if (ret != -EINVAL) {
+ pr_err("L2C-310 OF prefetch-instr property value is missing\n");
+ }
+
l2x0_saved_regs.prefetch_ctrl = prefetch;
}
@@ -1211,6 +1253,7 @@ static const struct l2c_init_data of_l2c310_data __initconst = {
.fixup = l2c310_fixup,
.save = l2c310_save,
.configure = l2c310_configure,
+ .unlock = l2c310_unlock,
.outer_cache = {
.inv_range = l2c210_inv_range,
.clean_range = l2c210_clean_range,
@@ -1240,6 +1283,7 @@ static const struct l2c_init_data of_l2c310_coherent_data __initconst = {
.fixup = l2c310_fixup,
.save = l2c310_save,
.configure = l2c310_configure,
+ .unlock = l2c310_unlock,
.outer_cache = {
.inv_range = l2c210_inv_range,
.clean_range = l2c210_clean_range,
@@ -1366,7 +1410,7 @@ static void aurora_save(void __iomem *base)
* For Aurora cache in no outer mode, enable via the CP15 coprocessor
* broadcasting of cache commands to L2.
*/
-static void __init aurora_enable_no_outer(void __iomem *base, u32 aux,
+static void __init aurora_enable_no_outer(void __iomem *base,
unsigned num_lock)
{
u32 u;
@@ -1377,7 +1421,7 @@ static void __init aurora_enable_no_outer(void __iomem *base, u32 aux,
isb();
- l2c_enable(base, aux, num_lock);
+ l2c_enable(base, num_lock);
}
static void __init aurora_fixup(void __iomem *base, u32 cache_id,
@@ -1416,6 +1460,8 @@ static const struct l2c_init_data of_aurora_with_outer_data __initconst = {
.enable = l2c_enable,
.fixup = aurora_fixup,
.save = aurora_save,
+ .configure = l2c_configure,
+ .unlock = l2c_unlock,
.outer_cache = {
.inv_range = aurora_inv_range,
.clean_range = aurora_clean_range,
@@ -1435,6 +1481,8 @@ static const struct l2c_init_data of_aurora_no_outer_data __initconst = {
.enable = aurora_enable_no_outer,
.fixup = aurora_fixup,
.save = aurora_save,
+ .configure = l2c_configure,
+ .unlock = l2c_unlock,
.outer_cache = {
.resume = l2c_resume,
},
@@ -1585,6 +1633,7 @@ static const struct l2c_init_data of_bcm_l2x0_data __initconst = {
.enable = l2c310_enable,
.save = l2c310_save,
.configure = l2c310_configure,
+ .unlock = l2c310_unlock,
.outer_cache = {
.inv_range = bcm_inv_range,
.clean_range = bcm_clean_range,
@@ -1608,6 +1657,7 @@ static void __init tauros3_save(void __iomem *base)
static void tauros3_configure(void __iomem *base)
{
+ l2c_configure(base);
writel_relaxed(l2x0_saved_regs.aux2_ctrl,
base + TAUROS3_AUX2_CTRL);
writel_relaxed(l2x0_saved_regs.prefetch_ctrl,
@@ -1621,6 +1671,7 @@ static const struct l2c_init_data of_tauros3_data __initconst = {
.enable = l2c_enable,
.save = tauros3_save,
.configure = tauros3_configure,
+ .unlock = l2c_unlock,
/* Tauros3 broadcasts L1 cache operations to L2 */
.outer_cache = {
.resume = l2c_resume,
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 7e7583ddd607..1ced8a0f7a52 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -148,11 +148,14 @@ static void *arm_coherent_dma_alloc(struct device *dev, size_t size,
dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs);
static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_addr,
dma_addr_t handle, struct dma_attrs *attrs);
+static int arm_coherent_dma_mmap(struct device *dev, struct vm_area_struct *vma,
+ void *cpu_addr, dma_addr_t dma_addr, size_t size,
+ struct dma_attrs *attrs);
struct dma_map_ops arm_coherent_dma_ops = {
.alloc = arm_coherent_dma_alloc,
.free = arm_coherent_dma_free,
- .mmap = arm_dma_mmap,
+ .mmap = arm_coherent_dma_mmap,
.get_sgtable = arm_dma_get_sgtable,
.map_page = arm_coherent_dma_map_page,
.map_sg = arm_dma_map_sg,
@@ -690,10 +693,7 @@ static void *arm_coherent_dma_alloc(struct device *dev, size_t size,
attrs, __builtin_return_address(0));
}
-/*
- * Create userspace mapping for the DMA-coherent memory.
- */
-int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
+static int __arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
void *cpu_addr, dma_addr_t dma_addr, size_t size,
struct dma_attrs *attrs)
{
@@ -704,8 +704,6 @@ int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
unsigned long pfn = dma_to_pfn(dev, dma_addr);
unsigned long off = vma->vm_pgoff;
- vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot);
-
if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
return ret;
@@ -721,6 +719,26 @@ int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
}
/*
+ * Create userspace mapping for the DMA-coherent memory.
+ */
+static int arm_coherent_dma_mmap(struct device *dev, struct vm_area_struct *vma,
+ void *cpu_addr, dma_addr_t dma_addr, size_t size,
+ struct dma_attrs *attrs)
+{
+ return __arm_dma_mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
+}
+
+int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
+ void *cpu_addr, dma_addr_t dma_addr, size_t size,
+ struct dma_attrs *attrs)
+{
+#ifdef CONFIG_MMU
+ vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot);
+#endif /* CONFIG_MMU */
+ return __arm_dma_mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
+}
+
+/*
* Free a buffer as defined by the above mapping.
*/
static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 6333d9c17875..0d629b8f973f 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -276,7 +276,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
* If we're in an interrupt or have no user
* context, we must not take the fault..
*/
- if (in_atomic() || !mm)
+ if (faulthandler_disabled() || !mm)
goto no_context;
if (user_mode(regs))
diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c
index b98895d9fe57..ee8dfa793989 100644
--- a/arch/arm/mm/highmem.c
+++ b/arch/arm/mm/highmem.c
@@ -59,6 +59,7 @@ void *kmap_atomic(struct page *page)
void *kmap;
int type;
+ preempt_disable();
pagefault_disable();
if (!PageHighMem(page))
return page_address(page);
@@ -121,6 +122,7 @@ void __kunmap_atomic(void *kvaddr)
kunmap_high(pte_page(pkmap_page_table[PKMAP_NR(vaddr)]));
}
pagefault_enable();
+ preempt_enable();
}
EXPORT_SYMBOL(__kunmap_atomic);
@@ -130,6 +132,7 @@ void *kmap_atomic_pfn(unsigned long pfn)
int idx, type;
struct page *page = pfn_to_page(pfn);
+ preempt_disable();
pagefault_disable();
if (!PageHighMem(page))
return page_address(page);
diff --git a/arch/arm/mm/hugetlbpage.c b/arch/arm/mm/hugetlbpage.c
index c72412415093..fcafb521f14e 100644
--- a/arch/arm/mm/hugetlbpage.c
+++ b/arch/arm/mm/hugetlbpage.c
@@ -41,11 +41,6 @@ int pud_huge(pud_t pud)
return 0;
}
-int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
-{
- return 0;
-}
-
int pmd_huge(pmd_t pmd)
{
return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT);
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index be92fa0f2f35..8a63b4cdc0f2 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -268,6 +268,7 @@ void __init arm_memblock_init(const struct machine_desc *mdesc)
if (mdesc->reserve)
mdesc->reserve();
+ early_init_fdt_reserve_self();
early_init_fdt_scan_reserved_mem();
/* reserve memory for DMA contiguous allocations */
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 7186382672b5..6ca7d9aa896f 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -1387,123 +1387,98 @@ static void __init map_lowmem(void)
}
}
-#ifdef CONFIG_ARM_LPAE
+#ifdef CONFIG_ARM_PV_FIXUP
+extern unsigned long __atags_pointer;
+typedef void pgtables_remap(long long offset, unsigned long pgd, void *bdata);
+pgtables_remap lpae_pgtables_remap_asm;
+
/*
* early_paging_init() recreates boot time page table setup, allowing machines
* to switch over to a high (>4G) address space on LPAE systems
*/
-void __init early_paging_init(const struct machine_desc *mdesc,
- struct proc_info_list *procinfo)
+void __init early_paging_init(const struct machine_desc *mdesc)
{
- pmdval_t pmdprot = procinfo->__cpu_mm_mmu_flags;
- unsigned long map_start, map_end;
- pgd_t *pgd0, *pgdk;
- pud_t *pud0, *pudk, *pud_start;
- pmd_t *pmd0, *pmdk;
- phys_addr_t phys;
- int i;
+ pgtables_remap *lpae_pgtables_remap;
+ unsigned long pa_pgd;
+ unsigned int cr, ttbcr;
+ long long offset;
+ void *boot_data;
- if (!(mdesc->init_meminfo))
+ if (!mdesc->pv_fixup)
return;
- /* remap kernel code and data */
- map_start = init_mm.start_code & PMD_MASK;
- map_end = ALIGN(init_mm.brk, PMD_SIZE);
+ offset = mdesc->pv_fixup();
+ if (offset == 0)
+ return;
- /* get a handle on things... */
- pgd0 = pgd_offset_k(0);
- pud_start = pud0 = pud_offset(pgd0, 0);
- pmd0 = pmd_offset(pud0, 0);
+ /*
+ * Get the address of the remap function in the 1:1 identity
+ * mapping setup by the early page table assembly code. We
+ * must get this prior to the pv update. The following barrier
+ * ensures that this is complete before we fixup any P:V offsets.
+ */
+ lpae_pgtables_remap = (pgtables_remap *)(unsigned long)__pa(lpae_pgtables_remap_asm);
+ pa_pgd = __pa(swapper_pg_dir);
+ boot_data = __va(__atags_pointer);
+ barrier();
- pgdk = pgd_offset_k(map_start);
- pudk = pud_offset(pgdk, map_start);
- pmdk = pmd_offset(pudk, map_start);
+ pr_info("Switching physical address space to 0x%08llx\n",
+ (u64)PHYS_OFFSET + offset);
- mdesc->init_meminfo();
+ /* Re-set the phys pfn offset, and the pv offset */
+ __pv_offset += offset;
+ __pv_phys_pfn_offset += PFN_DOWN(offset);
/* Run the patch stub to update the constants */
fixup_pv_table(&__pv_table_begin,
(&__pv_table_end - &__pv_table_begin) << 2);
/*
- * Cache cleaning operations for self-modifying code
- * We should clean the entries by MVA but running a
- * for loop over every pv_table entry pointer would
- * just complicate the code.
- */
- flush_cache_louis();
- dsb(ishst);
- isb();
-
- /*
- * FIXME: This code is not architecturally compliant: we modify
- * the mappings in-place, indeed while they are in use by this
- * very same code. This may lead to unpredictable behaviour of
- * the CPU.
- *
- * Even modifying the mappings in a separate page table does
- * not resolve this.
- *
- * The architecture strongly recommends that when a mapping is
- * changed, that it is changed by first going via an invalid
- * mapping and back to the new mapping. This is to ensure that
- * no TLB conflicts (caused by the TLB having more than one TLB
- * entry match a translation) can occur. However, doing that
- * here will result in unmapping the code we are running.
- */
- pr_warn("WARNING: unsafe modification of in-place page tables - tainting kernel\n");
- add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
-
- /*
- * Remap level 1 table. This changes the physical addresses
- * used to refer to the level 2 page tables to the high
- * physical address alias, leaving everything else the same.
- */
- for (i = 0; i < PTRS_PER_PGD; pud0++, i++) {
- set_pud(pud0,
- __pud(__pa(pmd0) | PMD_TYPE_TABLE | L_PGD_SWAPPER));
- pmd0 += PTRS_PER_PMD;
- }
-
- /*
- * Remap the level 2 table, pointing the mappings at the high
- * physical address alias of these pages.
- */
- phys = __pa(map_start);
- do {
- *pmdk++ = __pmd(phys | pmdprot);
- phys += PMD_SIZE;
- } while (phys < map_end);
-
- /*
- * Ensure that the above updates are flushed out of the cache.
- * This is not strictly correct; on a system where the caches
- * are coherent with each other, but the MMU page table walks
- * may not be coherent, flush_cache_all() may be a no-op, and
- * this will fail.
+ * We changing not only the virtual to physical mapping, but also
+ * the physical addresses used to access memory. We need to flush
+ * all levels of cache in the system with caching disabled to
+ * ensure that all data is written back, and nothing is prefetched
+ * into the caches. We also need to prevent the TLB walkers
+ * allocating into the caches too. Note that this is ARMv7 LPAE
+ * specific.
*/
+ cr = get_cr();
+ set_cr(cr & ~(CR_I | CR_C));
+ asm("mrc p15, 0, %0, c2, c0, 2" : "=r" (ttbcr));
+ asm volatile("mcr p15, 0, %0, c2, c0, 2"
+ : : "r" (ttbcr & ~(3 << 8 | 3 << 10)));
flush_cache_all();
/*
- * Re-write the TTBR values to point them at the high physical
- * alias of the page tables. We expect __va() will work on
- * cpu_get_pgd(), which returns the value of TTBR0.
+ * Fixup the page tables - this must be in the idmap region as
+ * we need to disable the MMU to do this safely, and hence it
+ * needs to be assembly. It's fairly simple, as we're using the
+ * temporary tables setup by the initial assembly code.
*/
- cpu_switch_mm(pgd0, &init_mm);
- cpu_set_ttbr(1, __pa(pgd0) + TTBR1_OFFSET);
+ lpae_pgtables_remap(offset, pa_pgd, boot_data);
- /* Finally flush any stale TLB values. */
- local_flush_bp_all();
- local_flush_tlb_all();
+ /* Re-enable the caches and cacheable TLB walks */
+ asm volatile("mcr p15, 0, %0, c2, c0, 2" : : "r" (ttbcr));
+ set_cr(cr);
}
#else
-void __init early_paging_init(const struct machine_desc *mdesc,
- struct proc_info_list *procinfo)
+void __init early_paging_init(const struct machine_desc *mdesc)
{
- if (mdesc->init_meminfo)
- mdesc->init_meminfo();
+ long long offset;
+
+ if (!mdesc->pv_fixup)
+ return;
+
+ offset = mdesc->pv_fixup();
+ if (offset == 0)
+ return;
+
+ pr_crit("Physical address space modification is only to support Keystone2.\n");
+ pr_crit("Please enable ARM_LPAE and ARM_PATCH_PHYS_VIRT support to use this\n");
+ pr_crit("feature. Your kernel may crash now, have a good day.\n");
+ add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
}
#endif
diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c
index a014dfacd5ca..afd7e05d95f1 100644
--- a/arch/arm/mm/nommu.c
+++ b/arch/arm/mm/nommu.c
@@ -304,15 +304,6 @@ void __init sanity_check_meminfo(void)
}
/*
- * early_paging_init() recreates boot time page table setup, allowing machines
- * to switch over to a high (>4G) address space on LPAE systems
- */
-void __init early_paging_init(const struct machine_desc *mdesc,
- struct proc_info_list *procinfo)
-{
-}
-
-/*
* paging_init() sets up the page tables, initialises the zone memory
* maps, and sets up the zero page, bad page and bad page tables.
*/
diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S
index 10405b8d31af..c6141a5435c3 100644
--- a/arch/arm/mm/proc-v7-2level.S
+++ b/arch/arm/mm/proc-v7-2level.S
@@ -36,14 +36,16 @@
*
* It is assumed that:
* - we are not using split page tables
+ *
+ * Note that we always need to flush BTAC/BTB if IBE is set
+ * even on Cortex-A8 revisions not affected by 430973.
+ * If IBE is not set, the flush BTAC/BTB won't do anything.
*/
ENTRY(cpu_ca8_switch_mm)
#ifdef CONFIG_MMU
mov r2, #0
-#ifdef CONFIG_ARM_ERRATA_430973
mcr p15, 0, r2, c7, c5, 6 @ flush BTAC/BTB
#endif
-#endif
ENTRY(cpu_v7_switch_mm)
#ifdef CONFIG_MMU
mmid r1, r1 @ get mm->context.id
@@ -148,10 +150,10 @@ ENDPROC(cpu_v7_set_pte_ext)
* Macro for setting up the TTBRx and TTBCR registers.
* - \ttb0 and \ttb1 updated with the corresponding flags.
*/
- .macro v7_ttb_setup, zero, ttbr0, ttbr1, tmp
+ .macro v7_ttb_setup, zero, ttbr0l, ttbr0h, ttbr1, tmp
mcr p15, 0, \zero, c2, c0, 2 @ TTB control register
- ALT_SMP(orr \ttbr0, \ttbr0, #TTB_FLAGS_SMP)
- ALT_UP(orr \ttbr0, \ttbr0, #TTB_FLAGS_UP)
+ ALT_SMP(orr \ttbr0l, \ttbr0l, #TTB_FLAGS_SMP)
+ ALT_UP(orr \ttbr0l, \ttbr0l, #TTB_FLAGS_UP)
ALT_SMP(orr \ttbr1, \ttbr1, #TTB_FLAGS_SMP)
ALT_UP(orr \ttbr1, \ttbr1, #TTB_FLAGS_UP)
mcr p15, 0, \ttbr1, c2, c0, 1 @ load TTB1
diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S
index d3daed0ae0ad..5e5720e8bc5f 100644
--- a/arch/arm/mm/proc-v7-3level.S
+++ b/arch/arm/mm/proc-v7-3level.S
@@ -126,11 +126,10 @@ ENDPROC(cpu_v7_set_pte_ext)
* Macro for setting up the TTBRx and TTBCR registers.
* - \ttbr1 updated.
*/
- .macro v7_ttb_setup, zero, ttbr0, ttbr1, tmp
+ .macro v7_ttb_setup, zero, ttbr0l, ttbr0h, ttbr1, tmp
ldr \tmp, =swapper_pg_dir @ swapper_pg_dir virtual address
- mov \tmp, \tmp, lsr #ARCH_PGD_SHIFT
- cmp \ttbr1, \tmp @ PHYS_OFFSET > PAGE_OFFSET?
- mrc p15, 0, \tmp, c2, c0, 2 @ TTB control register
+ cmp \ttbr1, \tmp, lsr #12 @ PHYS_OFFSET > PAGE_OFFSET?
+ mrc p15, 0, \tmp, c2, c0, 2 @ TTB control egister
orr \tmp, \tmp, #TTB_EAE
ALT_SMP(orr \tmp, \tmp, #TTB_FLAGS_SMP)
ALT_UP(orr \tmp, \tmp, #TTB_FLAGS_UP)
@@ -143,13 +142,10 @@ ENDPROC(cpu_v7_set_pte_ext)
*/
orrls \tmp, \tmp, #TTBR1_SIZE @ TTBCR.T1SZ
mcr p15, 0, \tmp, c2, c0, 2 @ TTBCR
- mov \tmp, \ttbr1, lsr #(32 - ARCH_PGD_SHIFT) @ upper bits
- mov \ttbr1, \ttbr1, lsl #ARCH_PGD_SHIFT @ lower bits
+ mov \tmp, \ttbr1, lsr #20
+ mov \ttbr1, \ttbr1, lsl #12
addls \ttbr1, \ttbr1, #TTBR1_OFFSET
mcrr p15, 1, \ttbr1, \tmp, c2 @ load TTBR1
- mov \tmp, \ttbr0, lsr #(32 - ARCH_PGD_SHIFT) @ upper bits
- mov \ttbr0, \ttbr0, lsl #ARCH_PGD_SHIFT @ lower bits
- mcrr p15, 0, \ttbr0, \tmp, c2 @ load TTBR0
.endm
/*
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 75ae72160099..0716bbe19872 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -252,6 +252,12 @@ ENDPROC(cpu_pj4b_do_resume)
* Initialise TLB, Caches, and MMU state ready to switch the MMU
* on. Return in r0 the new CP15 C1 control register setting.
*
+ * r1, r2, r4, r5, r9, r13 must be preserved - r13 is not a stack
+ * r4: TTBR0 (low word)
+ * r5: TTBR0 (high word if LPAE)
+ * r8: TTBR1
+ * r9: Main ID register
+ *
* This should be able to cover all ARMv7 cores.
*
* It is assumed that:
@@ -279,6 +285,78 @@ __v7_ca17mp_setup:
#endif
b __v7_setup
+/*
+ * Errata:
+ * r0, r10 available for use
+ * r1, r2, r4, r5, r9, r13: must be preserved
+ * r3: contains MIDR rX number in bits 23-20
+ * r6: contains MIDR rXpY as 8-bit XY number
+ * r9: MIDR
+ */
+__ca8_errata:
+#if defined(CONFIG_ARM_ERRATA_430973) && !defined(CONFIG_ARCH_MULTIPLATFORM)
+ teq r3, #0x00100000 @ only present in r1p*
+ mrceq p15, 0, r0, c1, c0, 1 @ read aux control register
+ orreq r0, r0, #(1 << 6) @ set IBE to 1
+ mcreq p15, 0, r0, c1, c0, 1 @ write aux control register
+#endif
+#ifdef CONFIG_ARM_ERRATA_458693
+ teq r6, #0x20 @ only present in r2p0
+ mrceq p15, 0, r0, c1, c0, 1 @ read aux control register
+ orreq r0, r0, #(1 << 5) @ set L1NEON to 1
+ orreq r0, r0, #(1 << 9) @ set PLDNOP to 1
+ mcreq p15, 0, r0, c1, c0, 1 @ write aux control register
+#endif
+#ifdef CONFIG_ARM_ERRATA_460075
+ teq r6, #0x20 @ only present in r2p0
+ mrceq p15, 1, r0, c9, c0, 2 @ read L2 cache aux ctrl register
+ tsteq r0, #1 << 22
+ orreq r0, r0, #(1 << 22) @ set the Write Allocate disable bit
+ mcreq p15, 1, r0, c9, c0, 2 @ write the L2 cache aux ctrl register
+#endif
+ b __errata_finish
+
+__ca9_errata:
+#ifdef CONFIG_ARM_ERRATA_742230
+ cmp r6, #0x22 @ only present up to r2p2
+ mrcle p15, 0, r0, c15, c0, 1 @ read diagnostic register
+ orrle r0, r0, #1 << 4 @ set bit #4
+ mcrle p15, 0, r0, c15, c0, 1 @ write diagnostic register
+#endif
+#ifdef CONFIG_ARM_ERRATA_742231
+ teq r6, #0x20 @ present in r2p0
+ teqne r6, #0x21 @ present in r2p1
+ teqne r6, #0x22 @ present in r2p2
+ mrceq p15, 0, r0, c15, c0, 1 @ read diagnostic register
+ orreq r0, r0, #1 << 12 @ set bit #12
+ orreq r0, r0, #1 << 22 @ set bit #22
+ mcreq p15, 0, r0, c15, c0, 1 @ write diagnostic register
+#endif
+#ifdef CONFIG_ARM_ERRATA_743622
+ teq r3, #0x00200000 @ only present in r2p*
+ mrceq p15, 0, r0, c15, c0, 1 @ read diagnostic register
+ orreq r0, r0, #1 << 6 @ set bit #6
+ mcreq p15, 0, r0, c15, c0, 1 @ write diagnostic register
+#endif
+#if defined(CONFIG_ARM_ERRATA_751472) && defined(CONFIG_SMP)
+ ALT_SMP(cmp r6, #0x30) @ present prior to r3p0
+ ALT_UP_B(1f)
+ mrclt p15, 0, r0, c15, c0, 1 @ read diagnostic register
+ orrlt r0, r0, #1 << 11 @ set bit #11
+ mcrlt p15, 0, r0, c15, c0, 1 @ write diagnostic register
+1:
+#endif
+ b __errata_finish
+
+__ca15_errata:
+#ifdef CONFIG_ARM_ERRATA_773022
+ cmp r6, #0x4 @ only present up to r0p4
+ mrcle p15, 0, r0, c1, c0, 1 @ read aux control register
+ orrle r0, r0, #1 << 1 @ disable loop buffer
+ mcrle p15, 0, r0, c1, c0, 1 @ write aux control register
+#endif
+ b __errata_finish
+
__v7_pj4b_setup:
#ifdef CONFIG_CPU_PJ4B
@@ -339,96 +417,38 @@ __v7_setup:
bl v7_invalidate_l1
ldmia r12, {r0-r5, r7, r9, r11, lr}
- mrc p15, 0, r0, c0, c0, 0 @ read main ID register
- and r10, r0, #0xff000000 @ ARM?
- teq r10, #0x41000000
- bne 3f
- and r5, r0, #0x00f00000 @ variant
- and r6, r0, #0x0000000f @ revision
- orr r6, r6, r5, lsr #20-4 @ combine variant and revision
- ubfx r0, r0, #4, #12 @ primary part number
+ and r0, r9, #0xff000000 @ ARM?
+ teq r0, #0x41000000
+ bne __errata_finish
+ and r3, r9, #0x00f00000 @ variant
+ and r6, r9, #0x0000000f @ revision
+ orr r6, r6, r3, lsr #20-4 @ combine variant and revision
+ ubfx r0, r9, #4, #12 @ primary part number
/* Cortex-A8 Errata */
ldr r10, =0x00000c08 @ Cortex-A8 primary part number
teq r0, r10
- bne 2f
-#if defined(CONFIG_ARM_ERRATA_430973) && !defined(CONFIG_ARCH_MULTIPLATFORM)
-
- teq r5, #0x00100000 @ only present in r1p*
- mrceq p15, 0, r10, c1, c0, 1 @ read aux control register
- orreq r10, r10, #(1 << 6) @ set IBE to 1
- mcreq p15, 0, r10, c1, c0, 1 @ write aux control register
-#endif
-#ifdef CONFIG_ARM_ERRATA_458693
- teq r6, #0x20 @ only present in r2p0
- mrceq p15, 0, r10, c1, c0, 1 @ read aux control register
- orreq r10, r10, #(1 << 5) @ set L1NEON to 1
- orreq r10, r10, #(1 << 9) @ set PLDNOP to 1
- mcreq p15, 0, r10, c1, c0, 1 @ write aux control register
-#endif
-#ifdef CONFIG_ARM_ERRATA_460075
- teq r6, #0x20 @ only present in r2p0
- mrceq p15, 1, r10, c9, c0, 2 @ read L2 cache aux ctrl register
- tsteq r10, #1 << 22
- orreq r10, r10, #(1 << 22) @ set the Write Allocate disable bit
- mcreq p15, 1, r10, c9, c0, 2 @ write the L2 cache aux ctrl register
-#endif
- b 3f
+ beq __ca8_errata
/* Cortex-A9 Errata */
-2: ldr r10, =0x00000c09 @ Cortex-A9 primary part number
+ ldr r10, =0x00000c09 @ Cortex-A9 primary part number
teq r0, r10
- bne 3f
-#ifdef CONFIG_ARM_ERRATA_742230
- cmp r6, #0x22 @ only present up to r2p2
- mrcle p15, 0, r10, c15, c0, 1 @ read diagnostic register
- orrle r10, r10, #1 << 4 @ set bit #4
- mcrle p15, 0, r10, c15, c0, 1 @ write diagnostic register
-#endif
-#ifdef CONFIG_ARM_ERRATA_742231
- teq r6, #0x20 @ present in r2p0
- teqne r6, #0x21 @ present in r2p1
- teqne r6, #0x22 @ present in r2p2
- mrceq p15, 0, r10, c15, c0, 1 @ read diagnostic register
- orreq r10, r10, #1 << 12 @ set bit #12
- orreq r10, r10, #1 << 22 @ set bit #22
- mcreq p15, 0, r10, c15, c0, 1 @ write diagnostic register
-#endif
-#ifdef CONFIG_ARM_ERRATA_743622
- teq r5, #0x00200000 @ only present in r2p*
- mrceq p15, 0, r10, c15, c0, 1 @ read diagnostic register
- orreq r10, r10, #1 << 6 @ set bit #6
- mcreq p15, 0, r10, c15, c0, 1 @ write diagnostic register
-#endif
-#if defined(CONFIG_ARM_ERRATA_751472) && defined(CONFIG_SMP)
- ALT_SMP(cmp r6, #0x30) @ present prior to r3p0
- ALT_UP_B(1f)
- mrclt p15, 0, r10, c15, c0, 1 @ read diagnostic register
- orrlt r10, r10, #1 << 11 @ set bit #11
- mcrlt p15, 0, r10, c15, c0, 1 @ write diagnostic register
-1:
-#endif
+ beq __ca9_errata
/* Cortex-A15 Errata */
-3: ldr r10, =0x00000c0f @ Cortex-A15 primary part number
+ ldr r10, =0x00000c0f @ Cortex-A15 primary part number
teq r0, r10
- bne 4f
+ beq __ca15_errata
-#ifdef CONFIG_ARM_ERRATA_773022
- cmp r6, #0x4 @ only present up to r0p4
- mrcle p15, 0, r10, c1, c0, 1 @ read aux control register
- orrle r10, r10, #1 << 1 @ disable loop buffer
- mcrle p15, 0, r10, c1, c0, 1 @ write aux control register
-#endif
-
-4: mov r10, #0
+__errata_finish:
+ mov r10, #0
mcr p15, 0, r10, c7, c5, 0 @ I+BTB cache invalidate
#ifdef CONFIG_MMU
mcr p15, 0, r10, c8, c7, 0 @ invalidate I + D TLBs
- v7_ttb_setup r10, r4, r8, r5 @ TTBCR, TTBRx setup
- ldr r5, =PRRR @ PRRR
+ v7_ttb_setup r10, r4, r5, r8, r3 @ TTBCR, TTBRx setup
+ ldr r3, =PRRR @ PRRR
ldr r6, =NMRR @ NMRR
- mcr p15, 0, r5, c10, c2, 0 @ write PRRR
+ mcr p15, 0, r3, c10, c2, 0 @ write PRRR
mcr p15, 0, r6, c10, c2, 1 @ write NMRR
#endif
dsb @ Complete invalidations
@@ -437,22 +457,22 @@ __v7_setup:
and r0, r0, #(0xf << 12) @ ThumbEE enabled field
teq r0, #(1 << 12) @ check if ThumbEE is present
bne 1f
- mov r5, #0
- mcr p14, 6, r5, c1, c0, 0 @ Initialize TEEHBR to 0
+ mov r3, #0
+ mcr p14, 6, r3, c1, c0, 0 @ Initialize TEEHBR to 0
mrc p14, 6, r0, c0, c0, 0 @ load TEECR
orr r0, r0, #1 @ set the 1st bit in order to
mcr p14, 6, r0, c0, c0, 0 @ stop userspace TEEHBR access
1:
#endif
- adr r5, v7_crval
- ldmia r5, {r5, r6}
+ adr r3, v7_crval
+ ldmia r3, {r3, r6}
ARM_BE8(orr r6, r6, #1 << 25) @ big-endian page tables
#ifdef CONFIG_SWP_EMULATE
- orr r5, r5, #(1 << 10) @ set SW bit in "clear"
+ orr r3, r3, #(1 << 10) @ set SW bit in "clear"
bic r6, r6, #(1 << 10) @ clear it in "mmuset"
#endif
mrc p15, 0, r0, c1, c0, 0 @ read control register
- bic r0, r0, r5 @ clear bits them
+ bic r0, r0, r3 @ clear bits them
orr r0, r0, r6 @ set them
THUMB( orr r0, r0, #1 << 30 ) @ Thumb exceptions
ret lr @ return to head.S:__ret
diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S
index e08e1f2bab76..67d9209077c6 100644
--- a/arch/arm/mm/proc-v7m.S
+++ b/arch/arm/mm/proc-v7m.S
@@ -98,7 +98,7 @@ __v7m_setup:
str r5, [r0, V7M_SCB_SHPR3] @ set PendSV priority
@ SVC to run the kernel in this mode
- adr r1, BSYM(1f)
+ badr r1, 1f
ldr r5, [r12, #11 * 4] @ read the SVC vector entry
str r1, [r12, #11 * 4] @ write the temporary SVC vector entry
mov r6, lr @ save LR
diff --git a/arch/arm/mm/pv-fixup-asm.S b/arch/arm/mm/pv-fixup-asm.S
new file mode 100644
index 000000000000..1867f3e43016
--- /dev/null
+++ b/arch/arm/mm/pv-fixup-asm.S
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2015 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This assembly is required to safely remap the physical address space
+ * for Keystone 2
+ */
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/cp15.h>
+#include <asm/memory.h>
+#include <asm/pgtable.h>
+
+ .section ".idmap.text", "ax"
+
+#define L1_ORDER 3
+#define L2_ORDER 3
+
+ENTRY(lpae_pgtables_remap_asm)
+ stmfd sp!, {r4-r8, lr}
+
+ mrc p15, 0, r8, c1, c0, 0 @ read control reg
+ bic ip, r8, #CR_M @ disable caches and MMU
+ mcr p15, 0, ip, c1, c0, 0
+ dsb
+ isb
+
+ /* Update level 2 entries covering the kernel */
+ ldr r6, =(_end - 1)
+ add r7, r2, #0x1000
+ add r6, r7, r6, lsr #SECTION_SHIFT - L2_ORDER
+ add r7, r7, #PAGE_OFFSET >> (SECTION_SHIFT - L2_ORDER)
+1: ldrd r4, [r7]
+ adds r4, r4, r0
+ adc r5, r5, r1
+ strd r4, [r7], #1 << L2_ORDER
+ cmp r7, r6
+ bls 1b
+
+ /* Update level 2 entries for the boot data */
+ add r7, r2, #0x1000
+ add r7, r7, r3, lsr #SECTION_SHIFT - L2_ORDER
+ bic r7, r7, #(1 << L2_ORDER) - 1
+ ldrd r4, [r7]
+ adds r4, r4, r0
+ adc r5, r5, r1
+ strd r4, [r7], #1 << L2_ORDER
+ ldrd r4, [r7]
+ adds r4, r4, r0
+ adc r5, r5, r1
+ strd r4, [r7]
+
+ /* Update level 1 entries */
+ mov r6, #4
+ mov r7, r2
+2: ldrd r4, [r7]
+ adds r4, r4, r0
+ adc r5, r5, r1
+ strd r4, [r7], #1 << L1_ORDER
+ subs r6, r6, #1
+ bne 2b
+
+ mrrc p15, 0, r4, r5, c2 @ read TTBR0
+ adds r4, r4, r0 @ update physical address
+ adc r5, r5, r1
+ mcrr p15, 0, r4, r5, c2 @ write back TTBR0
+ mrrc p15, 1, r4, r5, c2 @ read TTBR1
+ adds r4, r4, r0 @ update physical address
+ adc r5, r5, r1
+ mcrr p15, 1, r4, r5, c2 @ write back TTBR1
+
+ dsb
+
+ mov ip, #0
+ mcr p15, 0, ip, c7, c5, 0 @ I+BTB cache invalidate
+ mcr p15, 0, ip, c8, c7, 0 @ local_flush_tlb_all()
+ dsb
+ isb
+
+ mcr p15, 0, r8, c1, c0, 0 @ re-enable MMU
+ dsb
+ isb
+
+ ldmfd sp!, {r4-r8, pc}
+ENDPROC(lpae_pgtables_remap_asm)
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index e0e23582c8b4..4550d247e308 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -873,6 +873,16 @@ b_epilogue:
off = offsetof(struct sk_buff, queue_mapping);
emit(ARM_LDRH_I(r_A, r_skb, off), ctx);
break;
+ case BPF_LDX | BPF_W | BPF_ABS:
+ /*
+ * load a 32bit word from struct seccomp_data.
+ * seccomp_check_filter() will already have checked
+ * that k is 32bit aligned and lies within the
+ * struct seccomp_data.
+ */
+ ctx->seen |= SEEN_SKB;
+ emit(ARM_LDR_I(r_A, r_skb, k), ctx);
+ break;
default:
return -1;
}
diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c
index f5b00f41c4f6..2235081a04ee 100644
--- a/arch/arm/plat-orion/common.c
+++ b/arch/arm/plat-orion/common.c
@@ -28,11 +28,7 @@
void __init orion_clkdev_add(const char *con_id, const char *dev_id,
struct clk *clk)
{
- struct clk_lookup *cl;
-
- cl = clkdev_alloc(clk, con_id, dev_id);
- if (cl)
- clkdev_add(cl);
+ clkdev_create(clk, con_id, "%s", dev_id);
}
/* Create clkdev entries for all orion platforms except kirkwood.
diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile
index 8aa791051029..9d259d94e429 100644
--- a/arch/arm/vdso/Makefile
+++ b/arch/arm/vdso/Makefile
@@ -6,9 +6,15 @@ obj-vdso := vgettimeofday.o datapage.o
targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.so.raw vdso.lds
obj-vdso := $(addprefix $(obj)/, $(obj-vdso))
-ccflags-y := -shared -fPIC -fno-common -fno-builtin -fno-stack-protector
-ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 -DDISABLE_BRANCH_PROFILING
-ccflags-y += -Wl,--no-undefined $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+ccflags-y := -fPIC -fno-common -fno-builtin -fno-stack-protector
+ccflags-y += -DDISABLE_BRANCH_PROFILING
+
+VDSO_LDFLAGS := -Wl,-Bsymbolic -Wl,--no-undefined -Wl,-soname=linux-vdso.so.1
+VDSO_LDFLAGS += -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
+VDSO_LDFLAGS += -nostdlib -shared
+VDSO_LDFLAGS += $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+VDSO_LDFLAGS += $(call cc-ldoption, -Wl$(comma)--build-id)
+VDSO_LDFLAGS += $(call cc-option, -fuse-ld=bfd)
obj-$(CONFIG_VDSO) += vdso.o
extra-$(CONFIG_VDSO) += vdso.lds
@@ -40,10 +46,8 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
# Actual build commands
quiet_cmd_vdsold = VDSO $@
- cmd_vdsold = $(CC) $(c_flags) -Wl,-T $(filter %.lds,$^) $(filter %.o,$^) \
- $(call cc-ldoption, -Wl$(comma)--build-id) \
- -Wl,-Bsymbolic -Wl,-z,max-page-size=4096 \
- -Wl,-z,common-page-size=4096 -o $@
+ cmd_vdsold = $(CC) $(c_flags) $(VDSO_LDFLAGS) \
+ -Wl,-T $(filter %.lds,$^) $(filter %.o,$^) -o $@
quiet_cmd_vdsomunge = MUNGE $@
cmd_vdsomunge = $(objtree)/$(obj)/vdsomunge $< $@
diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index 7d0f07020c80..6c09cc440a2b 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -24,6 +24,7 @@
#include <linux/cpuidle.h>
#include <linux/cpufreq.h>
#include <linux/cpu.h>
+#include <linux/console.h>
#include <linux/mm.h>
@@ -51,7 +52,9 @@ EXPORT_SYMBOL_GPL(xen_have_vector_callback);
int xen_platform_pci_unplug = XEN_UNPLUG_ALL;
EXPORT_SYMBOL_GPL(xen_platform_pci_unplug);
-static __read_mostly int xen_events_irq = -1;
+static __read_mostly unsigned int xen_events_irq;
+
+static __initdata struct device_node *xen_node;
int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
unsigned long addr,
@@ -150,40 +153,28 @@ static irqreturn_t xen_arm_callback(int irq, void *arg)
* documentation of the Xen Device Tree format.
*/
#define GRANT_TABLE_PHYSADDR 0
-static int __init xen_guest_init(void)
+void __init xen_early_init(void)
{
- struct xen_add_to_physmap xatp;
- static struct shared_info *shared_info_page = 0;
- struct device_node *node;
int len;
const char *s = NULL;
const char *version = NULL;
const char *xen_prefix = "xen,xen-";
- struct resource res;
- phys_addr_t grant_frames;
- node = of_find_compatible_node(NULL, NULL, "xen,xen");
- if (!node) {
+ xen_node = of_find_compatible_node(NULL, NULL, "xen,xen");
+ if (!xen_node) {
pr_debug("No Xen support\n");
- return 0;
+ return;
}
- s = of_get_property(node, "compatible", &len);
+ s = of_get_property(xen_node, "compatible", &len);
if (strlen(xen_prefix) + 3 < len &&
!strncmp(xen_prefix, s, strlen(xen_prefix)))
version = s + strlen(xen_prefix);
if (version == NULL) {
pr_debug("Xen version not found\n");
- return 0;
+ return;
}
- if (of_address_to_resource(node, GRANT_TABLE_PHYSADDR, &res))
- return 0;
- grant_frames = res.start;
- xen_events_irq = irq_of_parse_and_map(node, 0);
- pr_info("Xen %s support found, events_irq=%d gnttab_frame=%pa\n",
- version, xen_events_irq, &grant_frames);
- if (xen_events_irq < 0)
- return -ENODEV;
+ pr_info("Xen %s support found\n", version);
xen_domain_type = XEN_HVM_DOMAIN;
@@ -194,9 +185,34 @@ static int __init xen_guest_init(void)
else
xen_start_info->flags &= ~(SIF_INITDOMAIN|SIF_PRIVILEGED);
- if (!shared_info_page)
- shared_info_page = (struct shared_info *)
- get_zeroed_page(GFP_KERNEL);
+ if (!console_set_on_cmdline && !xen_initial_domain())
+ add_preferred_console("hvc", 0, NULL);
+}
+
+static int __init xen_guest_init(void)
+{
+ struct xen_add_to_physmap xatp;
+ struct shared_info *shared_info_page = NULL;
+ struct resource res;
+ phys_addr_t grant_frames;
+
+ if (!xen_domain())
+ return 0;
+
+ if (of_address_to_resource(xen_node, GRANT_TABLE_PHYSADDR, &res)) {
+ pr_err("Xen grant table base address not found\n");
+ return -ENODEV;
+ }
+ grant_frames = res.start;
+
+ xen_events_irq = irq_of_parse_and_map(xen_node, 0);
+ if (!xen_events_irq) {
+ pr_err("Xen event channel interrupt not found\n");
+ return -ENODEV;
+ }
+
+ shared_info_page = (struct shared_info *)get_zeroed_page(GFP_KERNEL);
+
if (!shared_info_page) {
pr_err("not enough memory\n");
return -ENOMEM;
diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index 498325074a06..03e75fef15b8 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -15,10 +15,10 @@
#include <xen/xen.h>
#include <xen/interface/grant_table.h>
#include <xen/interface/memory.h>
+#include <xen/page.h>
#include <xen/swiotlb-xen.h>
#include <asm/cacheflush.h>
-#include <asm/xen/page.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/interface.h>
diff --git a/arch/arm/xen/p2m.c b/arch/arm/xen/p2m.c
index cb7a14c5cd69..887596c67b12 100644
--- a/arch/arm/xen/p2m.c
+++ b/arch/arm/xen/p2m.c
@@ -10,10 +10,10 @@
#include <xen/xen.h>
#include <xen/interface/memory.h>
+#include <xen/page.h>
#include <xen/swiotlb-xen.h>
#include <asm/cacheflush.h>
-#include <asm/xen/page.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/interface.h>