diff options
Diffstat (limited to 'arch/arm')
154 files changed, 5115 insertions, 2965 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 72c4273de003..a750c1425c3a 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -15,6 +15,8 @@ config ARM select CLONE_BACKWARDS select CPU_PM if (SUSPEND || CPU_IDLE) select DCACHE_WORD_ACCESS if HAVE_EFFICIENT_UNALIGNED_ACCESS + select EDAC_SUPPORT + select EDAC_ATOMIC_SCRUB select GENERIC_ALLOCATOR select GENERIC_ATOMIC64 if (CPU_V7M || CPU_V6 || !CPU_32v6K || !AEABI) select GENERIC_CLOCKEVENTS_BROADCAST if SMP @@ -31,8 +33,8 @@ config ARM select HARDIRQS_SW_RESEND select HAVE_ARCH_AUDITSYSCALL if (AEABI && !OABI_COMPAT) select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6 - select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL - select HAVE_ARCH_KGDB + select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 + select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT) select HAVE_ARCH_TRACEHOOK select HAVE_BPF_JIT @@ -43,7 +45,7 @@ config ARM select HAVE_DMA_API_DEBUG select HAVE_DMA_ATTRS select HAVE_DMA_CONTIGUOUS if MMU - select HAVE_DYNAMIC_FTRACE if (!XIP_KERNEL) + select HAVE_DYNAMIC_FTRACE if (!XIP_KERNEL) && !CPU_ENDIAN_BE32 select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU select HAVE_FTRACE_MCOUNT_RECORD if (!XIP_KERNEL) select HAVE_FUNCTION_GRAPH_TRACER if (!THUMB2_KERNEL) @@ -57,10 +59,10 @@ config ARM select HAVE_KERNEL_LZMA select HAVE_KERNEL_LZO select HAVE_KERNEL_XZ - select HAVE_KPROBES if !XIP_KERNEL + select HAVE_KPROBES if !XIP_KERNEL && !CPU_ENDIAN_BE32 && !CPU_V7M select HAVE_KRETPROBES if (HAVE_KPROBES) select HAVE_MEMBLOCK - select HAVE_MOD_ARCH_SPECIFIC if ARM_UNWIND + select HAVE_MOD_ARCH_SPECIFIC select HAVE_OPROFILE if (HAVE_PERF_EVENTS) select HAVE_OPTPROBES if !THUMB2_KERNEL select HAVE_PERF_EVENTS @@ -171,7 +173,7 @@ config LOCKDEP_SUPPORT config TRACE_IRQFLAGS_SUPPORT bool - default y + default !CPU_V7M config RWSEM_XCHGADD_ALGORITHM bool @@ -1008,11 +1010,6 @@ config PLAT_PXA config PLAT_VERSATILE bool -config ARM_TIMER_SP804 - bool - select CLKSRC_MMIO - select CLKSRC_OF if OF - source "arch/arm/firmware/Kconfig" source arch/arm/mm/Kconfig @@ -1340,6 +1337,7 @@ config SMP depends on GENERIC_CLOCKEVENTS depends on HAVE_SMP depends on MMU || ARM_MPU + select IRQ_WORK help This enables support for systems with more than one CPU. If you have a system with only one CPU, say N. If you have a system with more @@ -1715,6 +1713,21 @@ config HAVE_ARCH_TRANSPARENT_HUGEPAGE config ARCH_WANT_GENERAL_HUGETLB def_bool y +config ARM_MODULE_PLTS + bool "Use PLTs to allow module memory to spill over into vmalloc area" + depends on MODULES + help + Allocate PLTs when loading modules so that jumps and calls whose + targets are too far away for their relative offsets to be encoded + in the instructions themselves can be bounced via veneers in the + module's PLT. This allows modules to be allocated in the generic + vmalloc area after the dedicated module memory area has been + exhausted. The modules will use slightly more memory, but after + rounding up to page size, the actual memory footprint is usually + the same. + + Say y if you are getting out of memory errors while loading modules + source "mm/Kconfig" config FORCE_MAX_ZONEORDER @@ -1985,6 +1998,7 @@ config XIP_PHYS_ADDR config KEXEC bool "Kexec system call (EXPERIMENTAL)" depends on (!SMP || PM_SLEEP_SMP) + depends on !CPU_V7M help kexec is a system call that implements the ability to shutdown your current kernel, and to start another kernel. It is like a reboot diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index a6b5d0e35968..f1b157971366 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -5,6 +5,7 @@ source "lib/Kconfig.debug" config ARM_PTDUMP bool "Export kernel pagetable layout to userspace via debugfs" depends on DEBUG_KERNEL + depends on MMU select DEBUG_FS ---help--- Say Y here if you want to show the kernel pagetable layout in a diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 2a4fae7e9c44..07ab3d203916 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -19,6 +19,10 @@ LDFLAGS_vmlinux += --be8 LDFLAGS_MODULE += --be8 endif +ifeq ($(CONFIG_ARM_MODULE_PLTS),y) +LDFLAGS_MODULE += -T $(srctree)/arch/arm/kernel/module.lds +endif + OBJCOPYFLAGS :=-O binary -R .comment -S GZFLAGS :=-9 #KBUILD_CFLAGS +=-pipe diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 6e1fb2b2ecc7..7a13aebacf81 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -103,6 +103,8 @@ extra-y += piggy.gzip piggy.lzo piggy.lzma piggy.xzkern piggy.lz4 \ lib1funcs.S ashldi3.S bswapsdi2.S $(libfdt) $(libfdt_hdrs) \ hyp-stub.S +KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING + ifeq ($(CONFIG_FUNCTION_TRACER),y) ORIG_CFLAGS := $(KBUILD_CFLAGS) KBUILD_CFLAGS = $(subst -pg, , $(ORIG_CFLAGS)) diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 2c45b5709fa4..06e983f59980 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -130,7 +130,7 @@ start: .endr ARM( mov r0, r0 ) ARM( b 1f ) - THUMB( adr r12, BSYM(1f) ) + THUMB( badr r12, 1f ) THUMB( bx r12 ) .word _magic_sig @ Magic numbers to help the loader @@ -447,7 +447,7 @@ dtb_check_done: bl cache_clean_flush - adr r0, BSYM(restart) + badr r0, restart add r0, r0, r6 mov pc, r0 diff --git a/arch/arm/boot/compressed/libfdt_env.h b/arch/arm/boot/compressed/libfdt_env.h index 1f4e71876b00..17ae0f3efac8 100644 --- a/arch/arm/boot/compressed/libfdt_env.h +++ b/arch/arm/boot/compressed/libfdt_env.h @@ -5,6 +5,10 @@ #include <linux/string.h> #include <asm/byteorder.h> +typedef __be16 fdt16_t; +typedef __be32 fdt32_t; +typedef __be64 fdt64_t; + #define fdt16_to_cpu(x) be16_to_cpu(x) #define cpu_to_fdt16(x) cpu_to_be16(x) #define fdt32_to_cpu(x) be32_to_cpu(x) diff --git a/arch/arm/boot/dts/am57xx-beagle-x15.dts b/arch/arm/boot/dts/am57xx-beagle-x15.dts index 7128fad991ac..a42cc377a862 100644 --- a/arch/arm/boot/dts/am57xx-beagle-x15.dts +++ b/arch/arm/boot/dts/am57xx-beagle-x15.dts @@ -19,6 +19,7 @@ rtc0 = &mcp_rtc; rtc1 = &tps659038_rtc; rtc2 = &rtc; + display0 = &hdmi0; }; memory { @@ -103,6 +104,51 @@ pinctrl-names = "default"; pinctrl-0 = <&extcon_usb2_pins>; }; + + hdmi0: connector { + compatible = "hdmi-connector"; + label = "hdmi"; + + type = "a"; + + port { + hdmi_connector_in: endpoint { + remote-endpoint = <&tpd12s015_out>; + }; + }; + }; + + tpd12s015: encoder { + compatible = "ti,tpd12s015"; + + pinctrl-names = "default"; + pinctrl-0 = <&tpd12s015_pins>; + + gpios = <&gpio7 10 GPIO_ACTIVE_HIGH>, /* gpio7_10, CT CP HPD */ + <&gpio6 28 GPIO_ACTIVE_HIGH>, /* gpio6_28, LS OE */ + <&gpio7 12 GPIO_ACTIVE_HIGH>; /* gpio7_12/sp1_cs2, HPD */ + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + + tpd12s015_in: endpoint { + remote-endpoint = <&hdmi_out>; + }; + }; + + port@1 { + reg = <1>; + + tpd12s015_out: endpoint { + remote-endpoint = <&hdmi_connector_in>; + }; + }; + }; + }; }; &dra7_pmx_core { @@ -122,6 +168,13 @@ >; }; + hdmi_pins: pinmux_hdmi_pins { + pinctrl-single,pins = < + 0x408 (PIN_INPUT | MUX_MODE1) /* i2c2_sda.hdmi1_ddc_scl */ + 0x40c (PIN_INPUT | MUX_MODE1) /* i2c2_scl.hdmi1_ddc_sda */ + >; + }; + i2c3_pins_default: i2c3_pins_default { pinctrl-single,pins = < 0x2a4 (PIN_INPUT| MUX_MODE10) /* mcasp1_aclkx.i2c3_sda */ @@ -278,6 +331,14 @@ 0x3e8 (PIN_INPUT_PULLUP | MUX_MODE14) /* uart1_ctsn.gpio7_24 */ >; }; + + tpd12s015_pins: pinmux_tpd12s015_pins { + pinctrl-single,pins = < + 0x3b0 (PIN_OUTPUT | MUX_MODE14) /* gpio7_10 CT_CP_HPD */ + 0x3b8 (PIN_INPUT_PULLDOWN | MUX_MODE14) /* gpio7_12 HPD */ + 0x370 (PIN_OUTPUT | MUX_MODE14) /* gpio6_28 LS_OE */ + >; + }; }; &i2c1 { @@ -608,3 +669,23 @@ }; }; }; + +&dss { + status = "ok"; + + vdda_video-supply = <&ldoln_reg>; +}; + +&hdmi { + status = "ok"; + vdda-supply = <&ldo3_reg>; + + pinctrl-names = "default"; + pinctrl-0 = <&hdmi_pins>; + + port { + hdmi_out: endpoint { + remote-endpoint = <&tpd12s015_in>; + }; + }; +}; diff --git a/arch/arm/boot/dts/armada-370-xp.dtsi b/arch/arm/boot/dts/armada-370-xp.dtsi index 7f0252c580e4..a718866ba52d 100644 --- a/arch/arm/boot/dts/armada-370-xp.dtsi +++ b/arch/arm/boot/dts/armada-370-xp.dtsi @@ -268,7 +268,6 @@ }; eth0: ethernet@70000 { - compatible = "marvell,armada-370-neta"; reg = <0x70000 0x4000>; interrupts = <8>; clocks = <&gateclk 4>; @@ -284,7 +283,6 @@ }; eth1: ethernet@74000 { - compatible = "marvell,armada-370-neta"; reg = <0x74000 0x4000>; interrupts = <10>; clocks = <&gateclk 3>; diff --git a/arch/arm/boot/dts/armada-370.dtsi b/arch/arm/boot/dts/armada-370.dtsi index 3f036bd635f4..53a1a5abe147 100644 --- a/arch/arm/boot/dts/armada-370.dtsi +++ b/arch/arm/boot/dts/armada-370.dtsi @@ -311,6 +311,14 @@ dmacap,memset; }; }; + + ethernet@70000 { + compatible = "marvell,armada-370-neta"; + }; + + ethernet@74000 { + compatible = "marvell,armada-370-neta"; + }; }; }; }; diff --git a/arch/arm/boot/dts/armada-xp-mv78260.dtsi b/arch/arm/boot/dts/armada-xp-mv78260.dtsi index 8479fdc9e9c2..c5fdc99f0dbe 100644 --- a/arch/arm/boot/dts/armada-xp-mv78260.dtsi +++ b/arch/arm/boot/dts/armada-xp-mv78260.dtsi @@ -318,7 +318,7 @@ }; eth3: ethernet@34000 { - compatible = "marvell,armada-370-neta"; + compatible = "marvell,armada-xp-neta"; reg = <0x34000 0x4000>; interrupts = <14>; clocks = <&gateclk 1>; diff --git a/arch/arm/boot/dts/armada-xp-mv78460.dtsi b/arch/arm/boot/dts/armada-xp-mv78460.dtsi index 661d54c81580..0e24f1a38540 100644 --- a/arch/arm/boot/dts/armada-xp-mv78460.dtsi +++ b/arch/arm/boot/dts/armada-xp-mv78460.dtsi @@ -356,7 +356,7 @@ }; eth3: ethernet@34000 { - compatible = "marvell,armada-370-neta"; + compatible = "marvell,armada-xp-neta"; reg = <0x34000 0x4000>; interrupts = <14>; clocks = <&gateclk 1>; diff --git a/arch/arm/boot/dts/armada-xp.dtsi b/arch/arm/boot/dts/armada-xp.dtsi index 58eaef46efd5..3de9b761cc1a 100644 --- a/arch/arm/boot/dts/armada-xp.dtsi +++ b/arch/arm/boot/dts/armada-xp.dtsi @@ -185,7 +185,7 @@ }; eth2: ethernet@30000 { - compatible = "marvell,armada-370-neta"; + compatible = "marvell,armada-xp-neta"; reg = <0x30000 0x4000>; interrupts = <12>; clocks = <&gateclk 2>; @@ -228,6 +228,14 @@ }; }; + ethernet@70000 { + compatible = "marvell,armada-xp-neta"; + }; + + ethernet@74000 { + compatible = "marvell,armada-xp-neta"; + }; + xor@f0900 { compatible = "marvell,orion-xor"; reg = <0xF0900 0x100 diff --git a/arch/arm/boot/dts/atlas7.dtsi b/arch/arm/boot/dts/atlas7.dtsi index a753178abc85..5dfd3a44bf82 100644 --- a/arch/arm/boot/dts/atlas7.dtsi +++ b/arch/arm/boot/dts/atlas7.dtsi @@ -38,6 +38,21 @@ }; }; + clocks { + xinw { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <32768>; + clock-output-names = "xinw"; + }; + xin { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <26000000>; + clock-output-names = "xin"; + }; + }; + noc { compatible = "simple-bus"; #address-cells = <1>; diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index f03a091cd076..8f1e25bcecbd 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -131,6 +131,11 @@ regulator-max-microvolt = <3000000>; }; }; + + scm_conf_clocks: clocks { + #address-cells = <1>; + #size-cells = <0>; + }; }; dra7_pmx_core: pinmux@1400 { @@ -1469,6 +1474,44 @@ clocks = <&sys_clkin1>; status = "disabled"; }; + + dss: dss@58000000 { + compatible = "ti,dra7-dss"; + /* 'reg' defined in dra72x.dtsi and dra74x.dtsi */ + /* 'clocks' defined in dra72x.dtsi and dra74x.dtsi */ + status = "disabled"; + ti,hwmods = "dss_core"; + /* CTRL_CORE_DSS_PLL_CONTROL */ + syscon-pll-ctrl = <&scm_conf 0x538>; + #address-cells = <1>; + #size-cells = <1>; + ranges; + + dispc@58001000 { + compatible = "ti,dra7-dispc"; + reg = <0x58001000 0x1000>; + interrupts = <GIC_SPI 20 IRQ_TYPE_LEVEL_HIGH>; + ti,hwmods = "dss_dispc"; + clocks = <&dss_dss_clk>; + clock-names = "fck"; + /* CTRL_CORE_SMA_SW_1 */ + syscon-pol = <&scm_conf 0x534>; + }; + + hdmi: encoder@58060000 { + compatible = "ti,dra7-hdmi"; + reg = <0x58040000 0x200>, + <0x58040200 0x80>, + <0x58040300 0x80>, + <0x58060000 0x19000>; + reg-names = "wp", "pll", "phy", "core"; + interrupts = <GIC_SPI 96 IRQ_TYPE_LEVEL_HIGH>; + status = "disabled"; + ti,hwmods = "dss_hdmi"; + clocks = <&dss_48mhz_clk>, <&dss_hdmi_clk>; + clock-names = "fck", "sys_clk"; + }; + }; }; thermal_zones: thermal-zones { diff --git a/arch/arm/boot/dts/dra72-evm.dts b/arch/arm/boot/dts/dra72-evm.dts index ce0390f081d9..4e1b60581782 100644 --- a/arch/arm/boot/dts/dra72-evm.dts +++ b/arch/arm/boot/dts/dra72-evm.dts @@ -19,6 +19,10 @@ reg = <0x80000000 0x40000000>; /* 1024 MB */ }; + aliases { + display0 = &hdmi0; + }; + evm_3v3: fixedregulator-evm_3v3 { compatible = "regulator-fixed"; regulator-name = "evm_3v3"; @@ -35,6 +39,51 @@ compatible = "linux,extcon-usb-gpio"; id-gpio = <&pcf_gpio_21 2 GPIO_ACTIVE_HIGH>; }; + + hdmi0: connector { + compatible = "hdmi-connector"; + label = "hdmi"; + + type = "a"; + + port { + hdmi_connector_in: endpoint { + remote-endpoint = <&tpd12s015_out>; + }; + }; + }; + + tpd12s015: encoder { + compatible = "ti,tpd12s015"; + + pinctrl-names = "default"; + pinctrl-0 = <&tpd12s015_pins>; + + gpios = <&pcf_hdmi 4 GPIO_ACTIVE_HIGH>, /* P4, CT CP HPD */ + <&pcf_hdmi 5 GPIO_ACTIVE_HIGH>, /* P5, LS OE */ + <&gpio7 12 GPIO_ACTIVE_HIGH>; /* gpio7_12/sp1_cs2, HPD */ + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + + tpd12s015_in: endpoint { + remote-endpoint = <&hdmi_out>; + }; + }; + + port@1 { + reg = <1>; + + tpd12s015_out: endpoint { + remote-endpoint = <&hdmi_connector_in>; + }; + }; + }; + }; }; &dra7_pmx_core { @@ -45,6 +94,13 @@ >; }; + i2c5_pins: pinmux_i2c5_pins { + pinctrl-single,pins = < + 0x2b4 (PIN_INPUT | MUX_MODE10) /* mcasp1_axr0.i2c5_sda */ + 0x2b8 (PIN_INPUT | MUX_MODE10) /* mcasp1_axr1.i2c5_scl */ + >; + }; + nand_default: nand_default { pinctrl-single,pins = < 0x0 (PIN_INPUT | MUX_MODE0) /* gpmc_ad0 */ @@ -142,6 +198,19 @@ 0xb8 (PIN_OUTPUT | MUX_MODE1) /* gpmc_cs2.qspi1_cs0 */ >; }; + + hdmi_pins: pinmux_hdmi_pins { + pinctrl-single,pins = < + 0x408 (PIN_INPUT | MUX_MODE1) /* i2c2_sda.hdmi1_ddc_scl */ + 0x40c (PIN_INPUT | MUX_MODE1) /* i2c2_scl.hdmi1_ddc_sda */ + >; + }; + + tpd12s015_pins: pinmux_tpd12s015_pins { + pinctrl-single,pins = < + 0x3b8 (PIN_INPUT_PULLDOWN | MUX_MODE14) /* gpio7_12 HPD */ + >; + }; }; &i2c1 { @@ -277,6 +346,27 @@ }; }; +&i2c5 { + status = "okay"; + pinctrl-names = "default"; + pinctrl-0 = <&i2c5_pins>; + clock-frequency = <400000>; + + pcf_hdmi: pcf8575@26 { + compatible = "nxp,pcf8575"; + reg = <0x26>; + gpio-controller; + #gpio-cells = <2>; + /* + * initial state is used here to keep the mdio interface + * selected on RU89 through SEL_VIN4_MUX_S0, VIN2_S1 and + * VIN2_S0 driven high otherwise Ethernet stops working + * VIN6_SEL_S0 is low, thus selecting McASP3 over VIN6 + */ + lines-initial-states = <0x0f2b>; + }; +}; + &uart1 { status = "okay"; }; @@ -566,3 +656,23 @@ }; }; }; + +&dss { + status = "ok"; + + vdda_video-supply = <&ldo5_reg>; +}; + +&hdmi { + status = "ok"; + vdda-supply = <&ldo3_reg>; + + pinctrl-names = "default"; + pinctrl-0 = <&hdmi_pins>; + + port { + hdmi_out: endpoint { + remote-endpoint = <&tpd12s015_in>; + }; + }; +}; diff --git a/arch/arm/boot/dts/dra72x.dtsi b/arch/arm/boot/dts/dra72x.dtsi index 03d742f8d572..eaca143faa77 100644 --- a/arch/arm/boot/dts/dra72x.dtsi +++ b/arch/arm/boot/dts/dra72x.dtsi @@ -34,3 +34,14 @@ interrupts = <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>; }; }; + +&dss { + reg = <0x58000000 0x80>, + <0x58004054 0x4>, + <0x58004300 0x20>; + reg-names = "dss", "pll1_clkctrl", "pll1"; + + clocks = <&dss_dss_clk>, + <&dss_video1_clk>; + clock-names = "fck", "video1_clk"; +}; diff --git a/arch/arm/boot/dts/dra74x.dtsi b/arch/arm/boot/dts/dra74x.dtsi index cc560a70926f..fa995d0ca1f2 100644 --- a/arch/arm/boot/dts/dra74x.dtsi +++ b/arch/arm/boot/dts/dra74x.dtsi @@ -73,3 +73,18 @@ }; }; }; + +&dss { + reg = <0x58000000 0x80>, + <0x58004054 0x4>, + <0x58004300 0x20>, + <0x58005054 0x4>, + <0x58005300 0x20>; + reg-names = "dss", "pll1_clkctrl", "pll1", + "pll2_clkctrl", "pll2"; + + clocks = <&dss_dss_clk>, + <&dss_video1_clk>, + <&dss_video2_clk>; + clock-names = "fck", "video1_clk", "video2_clk"; +}; diff --git a/arch/arm/boot/dts/dra7xx-clocks.dtsi b/arch/arm/boot/dts/dra7xx-clocks.dtsi index 3b933f74d000..357bedeebfac 100644 --- a/arch/arm/boot/dts/dra7xx-clocks.dtsi +++ b/arch/arm/boot/dts/dra7xx-clocks.dtsi @@ -1531,6 +1531,7 @@ clocks = <&dpll_per_h12x2_ck>; ti,bit-shift = <8>; reg = <0x1120>; + ti,set-rate-parent; }; dss_hdmi_clk: dss_hdmi_clk { @@ -2136,3 +2137,13 @@ clocks = <&dpll_usb_ck>; }; }; + +&scm_conf_clocks { + dss_deshdcp_clk: dss_deshdcp_clk { + #clock-cells = <0>; + compatible = "ti,gate-clock"; + clocks = <&l3_iclk_div>; + ti,bit-shift = <0>; + reg = <0x558>; + }; +}; diff --git a/arch/arm/boot/dts/exynos4.dtsi b/arch/arm/boot/dts/exynos4.dtsi index f716e2b7d0b9..b0d52b1a646a 100644 --- a/arch/arm/boot/dts/exynos4.dtsi +++ b/arch/arm/boot/dts/exynos4.dtsi @@ -166,7 +166,7 @@ phys = <&mipi_phy 1>; phy-names = "dsim"; clocks = <&clock CLK_DSIM0>, <&clock CLK_SCLK_MIPI0>; - clock-names = "bus_clk", "pll_clk"; + clock-names = "bus_clk", "sclk_mipi"; status = "disabled"; #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm/boot/dts/socfpga_arria10.dtsi b/arch/arm/boot/dts/socfpga_arria10.dtsi index f5bebdd6d1be..4779b07310df 100644 --- a/arch/arm/boot/dts/socfpga_arria10.dtsi +++ b/arch/arm/boot/dts/socfpga_arria10.dtsi @@ -548,6 +548,17 @@ status = "disabled"; }; + sdr: sdr@ffc25000 { + compatible = "syscon"; + reg = <0xffcfb100 0x80>; + }; + + sdramedac { + compatible = "altr,sdram-edac-a10"; + altr,sdr-syscon = <&sdr>; + interrupts = <0 2 4>, <0 0 4>; + }; + L2: l2-cache@fffff000 { compatible = "arm,pl310-cache"; reg = <0xfffff000 0x1000>; diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile index 70b1eff477b3..6ee5959a813b 100644 --- a/arch/arm/common/Makefile +++ b/arch/arm/common/Makefile @@ -11,7 +11,6 @@ obj-$(CONFIG_SHARP_LOCOMO) += locomo.o obj-$(CONFIG_SHARP_PARAM) += sharpsl_param.o obj-$(CONFIG_SHARP_SCOOP) += scoop.o obj-$(CONFIG_PCI_HOST_ITE8152) += it8152.o -obj-$(CONFIG_ARM_TIMER_SP804) += timer-sp.o obj-$(CONFIG_MCPM) += mcpm_head.o mcpm_entry.o mcpm_platsmp.o vlock.o CFLAGS_REMOVE_mcpm_entry.o = -pg AFLAGS_mcpm_head.o := -march=armv7-a diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c index 5f8a52ac7edf..a923524d1040 100644 --- a/arch/arm/common/mcpm_entry.c +++ b/arch/arm/common/mcpm_entry.c @@ -20,6 +20,126 @@ #include <asm/cputype.h> #include <asm/suspend.h> +/* + * The public API for this code is documented in arch/arm/include/asm/mcpm.h. + * For a comprehensive description of the main algorithm used here, please + * see Documentation/arm/cluster-pm-race-avoidance.txt. + */ + +struct sync_struct mcpm_sync; + +/* + * __mcpm_cpu_going_down: Indicates that the cpu is being torn down. + * This must be called at the point of committing to teardown of a CPU. + * The CPU cache (SCTRL.C bit) is expected to still be active. + */ +static void __mcpm_cpu_going_down(unsigned int cpu, unsigned int cluster) +{ + mcpm_sync.clusters[cluster].cpus[cpu].cpu = CPU_GOING_DOWN; + sync_cache_w(&mcpm_sync.clusters[cluster].cpus[cpu].cpu); +} + +/* + * __mcpm_cpu_down: Indicates that cpu teardown is complete and that the + * cluster can be torn down without disrupting this CPU. + * To avoid deadlocks, this must be called before a CPU is powered down. + * The CPU cache (SCTRL.C bit) is expected to be off. + * However L2 cache might or might not be active. + */ +static void __mcpm_cpu_down(unsigned int cpu, unsigned int cluster) +{ + dmb(); + mcpm_sync.clusters[cluster].cpus[cpu].cpu = CPU_DOWN; + sync_cache_w(&mcpm_sync.clusters[cluster].cpus[cpu].cpu); + sev(); +} + +/* + * __mcpm_outbound_leave_critical: Leave the cluster teardown critical section. + * @state: the final state of the cluster: + * CLUSTER_UP: no destructive teardown was done and the cluster has been + * restored to the previous state (CPU cache still active); or + * CLUSTER_DOWN: the cluster has been torn-down, ready for power-off + * (CPU cache disabled, L2 cache either enabled or disabled). + */ +static void __mcpm_outbound_leave_critical(unsigned int cluster, int state) +{ + dmb(); + mcpm_sync.clusters[cluster].cluster = state; + sync_cache_w(&mcpm_sync.clusters[cluster].cluster); + sev(); +} + +/* + * __mcpm_outbound_enter_critical: Enter the cluster teardown critical section. + * This function should be called by the last man, after local CPU teardown + * is complete. CPU cache expected to be active. + * + * Returns: + * false: the critical section was not entered because an inbound CPU was + * observed, or the cluster is already being set up; + * true: the critical section was entered: it is now safe to tear down the + * cluster. + */ +static bool __mcpm_outbound_enter_critical(unsigned int cpu, unsigned int cluster) +{ + unsigned int i; + struct mcpm_sync_struct *c = &mcpm_sync.clusters[cluster]; + + /* Warn inbound CPUs that the cluster is being torn down: */ + c->cluster = CLUSTER_GOING_DOWN; + sync_cache_w(&c->cluster); + + /* Back out if the inbound cluster is already in the critical region: */ + sync_cache_r(&c->inbound); + if (c->inbound == INBOUND_COMING_UP) + goto abort; + + /* + * Wait for all CPUs to get out of the GOING_DOWN state, so that local + * teardown is complete on each CPU before tearing down the cluster. + * + * If any CPU has been woken up again from the DOWN state, then we + * shouldn't be taking the cluster down at all: abort in that case. + */ + sync_cache_r(&c->cpus); + for (i = 0; i < MAX_CPUS_PER_CLUSTER; i++) { + int cpustate; + + if (i == cpu) + continue; + + while (1) { + cpustate = c->cpus[i].cpu; + if (cpustate != CPU_GOING_DOWN) + break; + + wfe(); + sync_cache_r(&c->cpus[i].cpu); + } + + switch (cpustate) { + case CPU_DOWN: + continue; + + default: + goto abort; + } + } + + return true; + +abort: + __mcpm_outbound_leave_critical(cluster, CLUSTER_UP); + return false; +} + +static int __mcpm_cluster_state(unsigned int cluster) +{ + sync_cache_r(&mcpm_sync.clusters[cluster].cluster); + return mcpm_sync.clusters[cluster].cluster; +} + extern unsigned long mcpm_entry_vectors[MAX_NR_CLUSTERS][MAX_CPUS_PER_CLUSTER]; void mcpm_set_entry_vector(unsigned cpu, unsigned cluster, void *ptr) @@ -78,16 +198,11 @@ int mcpm_cpu_power_up(unsigned int cpu, unsigned int cluster) bool cpu_is_down, cluster_is_down; int ret = 0; + pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster); if (!platform_ops) return -EUNATCH; /* try not to shadow power_up errors */ might_sleep(); - /* backward compatibility callback */ - if (platform_ops->power_up) - return platform_ops->power_up(cpu, cluster); - - pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster); - /* * Since this is called with IRQs enabled, and no arch_spin_lock_irq * variant exists, we need to disable IRQs manually here. @@ -128,29 +243,17 @@ void mcpm_cpu_power_down(void) bool cpu_going_down, last_man; phys_reset_t phys_reset; + mpidr = read_cpuid_mpidr(); + cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); + cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); + pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster); if (WARN_ON_ONCE(!platform_ops)) return; BUG_ON(!irqs_disabled()); - /* - * Do this before calling into the power_down method, - * as it might not always be safe to do afterwards. - */ setup_mm_for_reboot(); - /* backward compatibility callback */ - if (platform_ops->power_down) { - platform_ops->power_down(); - goto not_dead; - } - - mpidr = read_cpuid_mpidr(); - cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); - cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); - pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster); - __mcpm_cpu_going_down(cpu, cluster); - arch_spin_lock(&mcpm_lock); BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP); @@ -187,7 +290,6 @@ void mcpm_cpu_power_down(void) if (cpu_going_down) wfi(); -not_dead: /* * It is possible for a power_up request to happen concurrently * with a power_down request for the same CPU. In this case the @@ -219,22 +321,11 @@ int mcpm_wait_for_cpu_powerdown(unsigned int cpu, unsigned int cluster) return ret; } -void mcpm_cpu_suspend(u64 expected_residency) +void mcpm_cpu_suspend(void) { if (WARN_ON_ONCE(!platform_ops)) return; - /* backward compatibility callback */ - if (platform_ops->suspend) { - phys_reset_t phys_reset; - BUG_ON(!irqs_disabled()); - setup_mm_for_reboot(); - platform_ops->suspend(expected_residency); - phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset); - phys_reset(virt_to_phys(mcpm_entry_point)); - BUG(); - } - /* Some platforms might have to enable special resume modes, etc. */ if (platform_ops->cpu_suspend_prepare) { unsigned int mpidr = read_cpuid_mpidr(); @@ -256,12 +347,6 @@ int mcpm_cpu_powered_up(void) if (!platform_ops) return -EUNATCH; - /* backward compatibility callback */ - if (platform_ops->powered_up) { - platform_ops->powered_up(); - return 0; - } - mpidr = read_cpuid_mpidr(); cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); @@ -334,120 +419,6 @@ int __init mcpm_loopback(void (*cache_disable)(void)) #endif -struct sync_struct mcpm_sync; - -/* - * __mcpm_cpu_going_down: Indicates that the cpu is being torn down. - * This must be called at the point of committing to teardown of a CPU. - * The CPU cache (SCTRL.C bit) is expected to still be active. - */ -void __mcpm_cpu_going_down(unsigned int cpu, unsigned int cluster) -{ - mcpm_sync.clusters[cluster].cpus[cpu].cpu = CPU_GOING_DOWN; - sync_cache_w(&mcpm_sync.clusters[cluster].cpus[cpu].cpu); -} - -/* - * __mcpm_cpu_down: Indicates that cpu teardown is complete and that the - * cluster can be torn down without disrupting this CPU. - * To avoid deadlocks, this must be called before a CPU is powered down. - * The CPU cache (SCTRL.C bit) is expected to be off. - * However L2 cache might or might not be active. - */ -void __mcpm_cpu_down(unsigned int cpu, unsigned int cluster) -{ - dmb(); - mcpm_sync.clusters[cluster].cpus[cpu].cpu = CPU_DOWN; - sync_cache_w(&mcpm_sync.clusters[cluster].cpus[cpu].cpu); - sev(); -} - -/* - * __mcpm_outbound_leave_critical: Leave the cluster teardown critical section. - * @state: the final state of the cluster: - * CLUSTER_UP: no destructive teardown was done and the cluster has been - * restored to the previous state (CPU cache still active); or - * CLUSTER_DOWN: the cluster has been torn-down, ready for power-off - * (CPU cache disabled, L2 cache either enabled or disabled). - */ -void __mcpm_outbound_leave_critical(unsigned int cluster, int state) -{ - dmb(); - mcpm_sync.clusters[cluster].cluster = state; - sync_cache_w(&mcpm_sync.clusters[cluster].cluster); - sev(); -} - -/* - * __mcpm_outbound_enter_critical: Enter the cluster teardown critical section. - * This function should be called by the last man, after local CPU teardown - * is complete. CPU cache expected to be active. - * - * Returns: - * false: the critical section was not entered because an inbound CPU was - * observed, or the cluster is already being set up; - * true: the critical section was entered: it is now safe to tear down the - * cluster. - */ -bool __mcpm_outbound_enter_critical(unsigned int cpu, unsigned int cluster) -{ - unsigned int i; - struct mcpm_sync_struct *c = &mcpm_sync.clusters[cluster]; - - /* Warn inbound CPUs that the cluster is being torn down: */ - c->cluster = CLUSTER_GOING_DOWN; - sync_cache_w(&c->cluster); - - /* Back out if the inbound cluster is already in the critical region: */ - sync_cache_r(&c->inbound); - if (c->inbound == INBOUND_COMING_UP) - goto abort; - - /* - * Wait for all CPUs to get out of the GOING_DOWN state, so that local - * teardown is complete on each CPU before tearing down the cluster. - * - * If any CPU has been woken up again from the DOWN state, then we - * shouldn't be taking the cluster down at all: abort in that case. - */ - sync_cache_r(&c->cpus); - for (i = 0; i < MAX_CPUS_PER_CLUSTER; i++) { - int cpustate; - - if (i == cpu) - continue; - - while (1) { - cpustate = c->cpus[i].cpu; - if (cpustate != CPU_GOING_DOWN) - break; - - wfe(); - sync_cache_r(&c->cpus[i].cpu); - } - - switch (cpustate) { - case CPU_DOWN: - continue; - - default: - goto abort; - } - } - - return true; - -abort: - __mcpm_outbound_leave_critical(cluster, CLUSTER_UP); - return false; -} - -int __mcpm_cluster_state(unsigned int cluster) -{ - sync_cache_r(&mcpm_sync.clusters[cluster].cluster); - return mcpm_sync.clusters[cluster].cluster; -} - extern unsigned long mcpm_power_up_setup_phys; int __init mcpm_sync_init( diff --git a/arch/arm/common/mcpm_head.S b/arch/arm/common/mcpm_head.S index e02db4b81a66..08b3bb9bc6a2 100644 --- a/arch/arm/common/mcpm_head.S +++ b/arch/arm/common/mcpm_head.S @@ -49,7 +49,7 @@ ENTRY(mcpm_entry_point) ARM_BE8(setend be) - THUMB( adr r12, BSYM(1f) ) + THUMB( badr r12, 1f ) THUMB( bx r12 ) THUMB( .thumb ) 1: diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c index 5cc779c8e9c6..93ee70dbbdd3 100644 --- a/arch/arm/common/sa1111.c +++ b/arch/arm/common/sa1111.c @@ -501,8 +501,8 @@ static int sa1111_setup_irq(struct sa1111 *sachip, unsigned irq_base) * Register SA1111 interrupt */ irq_set_irq_type(sachip->irq, IRQ_TYPE_EDGE_RISING); - irq_set_handler_data(sachip->irq, sachip); - irq_set_chained_handler(sachip->irq, sa1111_irq_handler); + irq_set_chained_handler_and_data(sachip->irq, sa1111_irq_handler, + sachip); dev_info(sachip->dev, "Providing IRQ%u-%u\n", sachip->irq_base, sachip->irq_base + SA1111_IRQ_NR - 1); @@ -836,8 +836,7 @@ static void __sa1111_remove(struct sa1111 *sachip) clk_unprepare(sachip->clk); if (sachip->irq != NO_IRQ) { - irq_set_chained_handler(sachip->irq, NULL); - irq_set_handler_data(sachip->irq, NULL); + irq_set_chained_handler_and_data(sachip->irq, NULL, NULL); irq_free_descs(sachip->irq_base, SA1111_IRQ_NR); release_mem_region(sachip->phys + SA1111_INTC, 512); diff --git a/arch/arm/common/timer-sp.c b/arch/arm/common/timer-sp.c deleted file mode 100644 index 19211324772f..000000000000 --- a/arch/arm/common/timer-sp.c +++ /dev/null @@ -1,304 +0,0 @@ -/* - * linux/arch/arm/common/timer-sp.c - * - * Copyright (C) 1999 - 2003 ARM Limited - * Copyright (C) 2000 Deep Blue Solutions Ltd - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -#include <linux/clk.h> -#include <linux/clocksource.h> -#include <linux/clockchips.h> -#include <linux/err.h> -#include <linux/interrupt.h> -#include <linux/irq.h> -#include <linux/io.h> -#include <linux/of.h> -#include <linux/of_address.h> -#include <linux/of_irq.h> -#include <linux/sched_clock.h> - -#include <asm/hardware/arm_timer.h> -#include <asm/hardware/timer-sp.h> - -static long __init sp804_get_clock_rate(struct clk *clk) -{ - long rate; - int err; - - err = clk_prepare(clk); - if (err) { - pr_err("sp804: clock failed to prepare: %d\n", err); - clk_put(clk); - return err; - } - - err = clk_enable(clk); - if (err) { - pr_err("sp804: clock failed to enable: %d\n", err); - clk_unprepare(clk); - clk_put(clk); - return err; - } - - rate = clk_get_rate(clk); - if (rate < 0) { - pr_err("sp804: clock failed to get rate: %ld\n", rate); - clk_disable(clk); - clk_unprepare(clk); - clk_put(clk); - } - - return rate; -} - -static void __iomem *sched_clock_base; - -static u64 notrace sp804_read(void) -{ - return ~readl_relaxed(sched_clock_base + TIMER_VALUE); -} - -void __init __sp804_clocksource_and_sched_clock_init(void __iomem *base, - const char *name, - struct clk *clk, - int use_sched_clock) -{ - long rate; - - if (!clk) { - clk = clk_get_sys("sp804", name); - if (IS_ERR(clk)) { - pr_err("sp804: clock not found: %d\n", - (int)PTR_ERR(clk)); - return; - } - } - - rate = sp804_get_clock_rate(clk); - - if (rate < 0) - return; - - /* setup timer 0 as free-running clocksource */ - writel(0, base + TIMER_CTRL); - writel(0xffffffff, base + TIMER_LOAD); - writel(0xffffffff, base + TIMER_VALUE); - writel(TIMER_CTRL_32BIT | TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC, - base + TIMER_CTRL); - - clocksource_mmio_init(base + TIMER_VALUE, name, - rate, 200, 32, clocksource_mmio_readl_down); - - if (use_sched_clock) { - sched_clock_base = base; - sched_clock_register(sp804_read, 32, rate); - } -} - - -static void __iomem *clkevt_base; -static unsigned long clkevt_reload; - -/* - * IRQ handler for the timer - */ -static irqreturn_t sp804_timer_interrupt(int irq, void *dev_id) -{ - struct clock_event_device *evt = dev_id; - - /* clear the interrupt */ - writel(1, clkevt_base + TIMER_INTCLR); - - evt->event_handler(evt); - - return IRQ_HANDLED; -} - -static void sp804_set_mode(enum clock_event_mode mode, - struct clock_event_device *evt) -{ - unsigned long ctrl = TIMER_CTRL_32BIT | TIMER_CTRL_IE; - - writel(ctrl, clkevt_base + TIMER_CTRL); - - switch (mode) { - case CLOCK_EVT_MODE_PERIODIC: - writel(clkevt_reload, clkevt_base + TIMER_LOAD); - ctrl |= TIMER_CTRL_PERIODIC | TIMER_CTRL_ENABLE; - break; - - case CLOCK_EVT_MODE_ONESHOT: - /* period set, and timer enabled in 'next_event' hook */ - ctrl |= TIMER_CTRL_ONESHOT; - break; - - case CLOCK_EVT_MODE_UNUSED: - case CLOCK_EVT_MODE_SHUTDOWN: - default: - break; - } - - writel(ctrl, clkevt_base + TIMER_CTRL); -} - -static int sp804_set_next_event(unsigned long next, - struct clock_event_device *evt) -{ - unsigned long ctrl = readl(clkevt_base + TIMER_CTRL); - - writel(next, clkevt_base + TIMER_LOAD); - writel(ctrl | TIMER_CTRL_ENABLE, clkevt_base + TIMER_CTRL); - - return 0; -} - -static struct clock_event_device sp804_clockevent = { - .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT | - CLOCK_EVT_FEAT_DYNIRQ, - .set_mode = sp804_set_mode, - .set_next_event = sp804_set_next_event, - .rating = 300, -}; - -static struct irqaction sp804_timer_irq = { - .name = "timer", - .flags = IRQF_TIMER | IRQF_IRQPOLL, - .handler = sp804_timer_interrupt, - .dev_id = &sp804_clockevent, -}; - -void __init __sp804_clockevents_init(void __iomem *base, unsigned int irq, struct clk *clk, const char *name) -{ - struct clock_event_device *evt = &sp804_clockevent; - long rate; - - if (!clk) - clk = clk_get_sys("sp804", name); - if (IS_ERR(clk)) { - pr_err("sp804: %s clock not found: %d\n", name, - (int)PTR_ERR(clk)); - return; - } - - rate = sp804_get_clock_rate(clk); - if (rate < 0) - return; - - clkevt_base = base; - clkevt_reload = DIV_ROUND_CLOSEST(rate, HZ); - evt->name = name; - evt->irq = irq; - evt->cpumask = cpu_possible_mask; - - writel(0, base + TIMER_CTRL); - - setup_irq(irq, &sp804_timer_irq); - clockevents_config_and_register(evt, rate, 0xf, 0xffffffff); -} - -static void __init sp804_of_init(struct device_node *np) -{ - static bool initialized = false; - void __iomem *base; - int irq; - u32 irq_num = 0; - struct clk *clk1, *clk2; - const char *name = of_get_property(np, "compatible", NULL); - - base = of_iomap(np, 0); - if (WARN_ON(!base)) - return; - - /* Ensure timers are disabled */ - writel(0, base + TIMER_CTRL); - writel(0, base + TIMER_2_BASE + TIMER_CTRL); - - if (initialized || !of_device_is_available(np)) - goto err; - - clk1 = of_clk_get(np, 0); - if (IS_ERR(clk1)) - clk1 = NULL; - - /* Get the 2nd clock if the timer has 3 timer clocks */ - if (of_count_phandle_with_args(np, "clocks", "#clock-cells") == 3) { - clk2 = of_clk_get(np, 1); - if (IS_ERR(clk2)) { - pr_err("sp804: %s clock not found: %d\n", np->name, - (int)PTR_ERR(clk2)); - clk2 = NULL; - } - } else - clk2 = clk1; - - irq = irq_of_parse_and_map(np, 0); - if (irq <= 0) - goto err; - - of_property_read_u32(np, "arm,sp804-has-irq", &irq_num); - if (irq_num == 2) { - __sp804_clockevents_init(base + TIMER_2_BASE, irq, clk2, name); - __sp804_clocksource_and_sched_clock_init(base, name, clk1, 1); - } else { - __sp804_clockevents_init(base, irq, clk1 , name); - __sp804_clocksource_and_sched_clock_init(base + TIMER_2_BASE, - name, clk2, 1); - } - initialized = true; - - return; -err: - iounmap(base); -} -CLOCKSOURCE_OF_DECLARE(sp804, "arm,sp804", sp804_of_init); - -static void __init integrator_cp_of_init(struct device_node *np) -{ - static int init_count = 0; - void __iomem *base; - int irq; - const char *name = of_get_property(np, "compatible", NULL); - struct clk *clk; - - base = of_iomap(np, 0); - if (WARN_ON(!base)) - return; - clk = of_clk_get(np, 0); - if (WARN_ON(IS_ERR(clk))) - return; - - /* Ensure timer is disabled */ - writel(0, base + TIMER_CTRL); - - if (init_count == 2 || !of_device_is_available(np)) - goto err; - - if (!init_count) - __sp804_clocksource_and_sched_clock_init(base, name, clk, 0); - else { - irq = irq_of_parse_and_map(np, 0); - if (irq <= 0) - goto err; - - __sp804_clockevents_init(base, irq, clk, name); - } - - init_count++; - return; -err: - iounmap(base); -} -CLOCKSOURCE_OF_DECLARE(intcp, "arm,integrator-cp-timer", integrator_cp_of_init); diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 8da2207b0072..27ed1b1cd1d7 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -53,20 +53,13 @@ config CRYPTO_SHA256_ARM SHA-256 secure hash standard (DFIPS 180-2) implemented using optimized ARM assembler and NEON, when available. -config CRYPTO_SHA512_ARM_NEON - tristate "SHA384 and SHA512 digest algorithm (ARM NEON)" - depends on KERNEL_MODE_NEON - select CRYPTO_SHA512 +config CRYPTO_SHA512_ARM + tristate "SHA-384/512 digest algorithm (ARM-asm and NEON)" select CRYPTO_HASH + depends on !CPU_V7M help SHA-512 secure hash standard (DFIPS 180-2) implemented - using ARM NEON instructions, when available. - - This version of SHA implements a 512 bit hash with 256 bits of - security against collision attacks. - - This code also includes SHA-384, a 384 bit hash with 192 bits - of security against collision attacks. + using optimized ARM assembler and NEON, when available. config CRYPTO_AES_ARM tristate "AES cipher algorithms (ARM-asm)" diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index 6ea828241fcb..fc5150702b64 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -7,7 +7,7 @@ obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o -obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o +obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o @@ -30,7 +30,8 @@ sha1-arm-y := sha1-armv4-large.o sha1_glue.o sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o sha256-arm-y := sha256-core.o sha256_glue.o $(sha256-arm-neon-y) -sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o +sha512-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha512-neon-glue.o +sha512-arm-y := sha512-core.o sha512-glue.o $(sha512-arm-neon-y) sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o sha2-arm-ce-y := sha2-ce-core.o sha2-ce-glue.o aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o @@ -45,4 +46,7 @@ $(src)/aesbs-core.S_shipped: $(src)/bsaes-armv7.pl $(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl $(call cmd,perl) -.PRECIOUS: $(obj)/aesbs-core.S $(obj)/sha256-core.S +$(src)/sha512-core.S_shipped: $(src)/sha512-armv4.pl + $(call cmd,perl) + +.PRECIOUS: $(obj)/aesbs-core.S $(obj)/sha256-core.S $(obj)/sha512-core.S diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S index 8cfa468ee570..987aa632c9f0 100644 --- a/arch/arm/crypto/aes-ce-core.S +++ b/arch/arm/crypto/aes-ce-core.S @@ -101,15 +101,14 @@ \dround q10, q11 blo 0f @ AES-128: 10 rounds vld1.8 {q10-q11}, [ip]! - beq 1f @ AES-192: 12 rounds \dround q12, q13 + beq 1f @ AES-192: 12 rounds vld1.8 {q12-q13}, [ip] \dround q10, q11 0: \fround q12, q13, q14 bx lr -1: \dround q12, q13 - \fround q10, q11, q14 +1: \fround q10, q11, q14 bx lr .endm @@ -122,8 +121,8 @@ * q2 : third in/output block (_3x version only) * q8 : first round key * q9 : secound round key - * ip : address of 3rd round key * q14 : final round key + * r2 : address of round key array * r3 : number of rounds */ .align 6 diff --git a/arch/arm/crypto/sha512-armv4.pl b/arch/arm/crypto/sha512-armv4.pl new file mode 100644 index 000000000000..a2b11a844357 --- /dev/null +++ b/arch/arm/crypto/sha512-armv4.pl @@ -0,0 +1,649 @@ +#!/usr/bin/env perl + +# ==================================================================== +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# +# Permission to use under GPL terms is granted. +# ==================================================================== + +# SHA512 block procedure for ARMv4. September 2007. + +# This code is ~4.5 (four and a half) times faster than code generated +# by gcc 3.4 and it spends ~72 clock cycles per byte [on single-issue +# Xscale PXA250 core]. +# +# July 2010. +# +# Rescheduling for dual-issue pipeline resulted in 6% improvement on +# Cortex A8 core and ~40 cycles per processed byte. + +# February 2011. +# +# Profiler-assisted and platform-specific optimization resulted in 7% +# improvement on Coxtex A8 core and ~38 cycles per byte. + +# March 2011. +# +# Add NEON implementation. On Cortex A8 it was measured to process +# one byte in 23.3 cycles or ~60% faster than integer-only code. + +# August 2012. +# +# Improve NEON performance by 12% on Snapdragon S4. In absolute +# terms it's 22.6 cycles per byte, which is disappointing result. +# Technical writers asserted that 3-way S4 pipeline can sustain +# multiple NEON instructions per cycle, but dual NEON issue could +# not be observed, see http://www.openssl.org/~appro/Snapdragon-S4.html +# for further details. On side note Cortex-A15 processes one byte in +# 16 cycles. + +# Byte order [in]dependence. ========================================= +# +# Originally caller was expected to maintain specific *dword* order in +# h[0-7], namely with most significant dword at *lower* address, which +# was reflected in below two parameters as 0 and 4. Now caller is +# expected to maintain native byte order for whole 64-bit values. +$hi="HI"; +$lo="LO"; +# ==================================================================== + +while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} +open STDOUT,">$output"; + +$ctx="r0"; # parameter block +$inp="r1"; +$len="r2"; + +$Tlo="r3"; +$Thi="r4"; +$Alo="r5"; +$Ahi="r6"; +$Elo="r7"; +$Ehi="r8"; +$t0="r9"; +$t1="r10"; +$t2="r11"; +$t3="r12"; +############ r13 is stack pointer +$Ktbl="r14"; +############ r15 is program counter + +$Aoff=8*0; +$Boff=8*1; +$Coff=8*2; +$Doff=8*3; +$Eoff=8*4; +$Foff=8*5; +$Goff=8*6; +$Hoff=8*7; +$Xoff=8*8; + +sub BODY_00_15() { +my $magic = shift; +$code.=<<___; + @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41)) + @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23 + @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23 + mov $t0,$Elo,lsr#14 + str $Tlo,[sp,#$Xoff+0] + mov $t1,$Ehi,lsr#14 + str $Thi,[sp,#$Xoff+4] + eor $t0,$t0,$Ehi,lsl#18 + ldr $t2,[sp,#$Hoff+0] @ h.lo + eor $t1,$t1,$Elo,lsl#18 + ldr $t3,[sp,#$Hoff+4] @ h.hi + eor $t0,$t0,$Elo,lsr#18 + eor $t1,$t1,$Ehi,lsr#18 + eor $t0,$t0,$Ehi,lsl#14 + eor $t1,$t1,$Elo,lsl#14 + eor $t0,$t0,$Ehi,lsr#9 + eor $t1,$t1,$Elo,lsr#9 + eor $t0,$t0,$Elo,lsl#23 + eor $t1,$t1,$Ehi,lsl#23 @ Sigma1(e) + adds $Tlo,$Tlo,$t0 + ldr $t0,[sp,#$Foff+0] @ f.lo + adc $Thi,$Thi,$t1 @ T += Sigma1(e) + ldr $t1,[sp,#$Foff+4] @ f.hi + adds $Tlo,$Tlo,$t2 + ldr $t2,[sp,#$Goff+0] @ g.lo + adc $Thi,$Thi,$t3 @ T += h + ldr $t3,[sp,#$Goff+4] @ g.hi + + eor $t0,$t0,$t2 + str $Elo,[sp,#$Eoff+0] + eor $t1,$t1,$t3 + str $Ehi,[sp,#$Eoff+4] + and $t0,$t0,$Elo + str $Alo,[sp,#$Aoff+0] + and $t1,$t1,$Ehi + str $Ahi,[sp,#$Aoff+4] + eor $t0,$t0,$t2 + ldr $t2,[$Ktbl,#$lo] @ K[i].lo + eor $t1,$t1,$t3 @ Ch(e,f,g) + ldr $t3,[$Ktbl,#$hi] @ K[i].hi + + adds $Tlo,$Tlo,$t0 + ldr $Elo,[sp,#$Doff+0] @ d.lo + adc $Thi,$Thi,$t1 @ T += Ch(e,f,g) + ldr $Ehi,[sp,#$Doff+4] @ d.hi + adds $Tlo,$Tlo,$t2 + and $t0,$t2,#0xff + adc $Thi,$Thi,$t3 @ T += K[i] + adds $Elo,$Elo,$Tlo + ldr $t2,[sp,#$Boff+0] @ b.lo + adc $Ehi,$Ehi,$Thi @ d += T + teq $t0,#$magic + + ldr $t3,[sp,#$Coff+0] @ c.lo +#if __ARM_ARCH__>=7 + it eq @ Thumb2 thing, sanity check in ARM +#endif + orreq $Ktbl,$Ktbl,#1 + @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) + @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25 + @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25 + mov $t0,$Alo,lsr#28 + mov $t1,$Ahi,lsr#28 + eor $t0,$t0,$Ahi,lsl#4 + eor $t1,$t1,$Alo,lsl#4 + eor $t0,$t0,$Ahi,lsr#2 + eor $t1,$t1,$Alo,lsr#2 + eor $t0,$t0,$Alo,lsl#30 + eor $t1,$t1,$Ahi,lsl#30 + eor $t0,$t0,$Ahi,lsr#7 + eor $t1,$t1,$Alo,lsr#7 + eor $t0,$t0,$Alo,lsl#25 + eor $t1,$t1,$Ahi,lsl#25 @ Sigma0(a) + adds $Tlo,$Tlo,$t0 + and $t0,$Alo,$t2 + adc $Thi,$Thi,$t1 @ T += Sigma0(a) + + ldr $t1,[sp,#$Boff+4] @ b.hi + orr $Alo,$Alo,$t2 + ldr $t2,[sp,#$Coff+4] @ c.hi + and $Alo,$Alo,$t3 + and $t3,$Ahi,$t1 + orr $Ahi,$Ahi,$t1 + orr $Alo,$Alo,$t0 @ Maj(a,b,c).lo + and $Ahi,$Ahi,$t2 + adds $Alo,$Alo,$Tlo + orr $Ahi,$Ahi,$t3 @ Maj(a,b,c).hi + sub sp,sp,#8 + adc $Ahi,$Ahi,$Thi @ h += T + tst $Ktbl,#1 + add $Ktbl,$Ktbl,#8 +___ +} +$code=<<___; +#ifndef __KERNEL__ +# include "arm_arch.h" +# define VFP_ABI_PUSH vstmdb sp!,{d8-d15} +# define VFP_ABI_POP vldmia sp!,{d8-d15} +#else +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +# define __ARM_MAX_ARCH__ 7 +# define VFP_ABI_PUSH +# define VFP_ABI_POP +#endif + +#ifdef __ARMEL__ +# define LO 0 +# define HI 4 +# define WORD64(hi0,lo0,hi1,lo1) .word lo0,hi0, lo1,hi1 +#else +# define HI 0 +# define LO 4 +# define WORD64(hi0,lo0,hi1,lo1) .word hi0,lo0, hi1,lo1 +#endif + +.text +#if __ARM_ARCH__<7 +.code 32 +#else +.syntax unified +# ifdef __thumb2__ +# define adrl adr +.thumb +# else +.code 32 +# endif +#endif + +.type K512,%object +.align 5 +K512: +WORD64(0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd) +WORD64(0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc) +WORD64(0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019) +WORD64(0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118) +WORD64(0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe) +WORD64(0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2) +WORD64(0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1) +WORD64(0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694) +WORD64(0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3) +WORD64(0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65) +WORD64(0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483) +WORD64(0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5) +WORD64(0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210) +WORD64(0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4) +WORD64(0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725) +WORD64(0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70) +WORD64(0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926) +WORD64(0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df) +WORD64(0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8) +WORD64(0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b) +WORD64(0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001) +WORD64(0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30) +WORD64(0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910) +WORD64(0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8) +WORD64(0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53) +WORD64(0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8) +WORD64(0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb) +WORD64(0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3) +WORD64(0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60) +WORD64(0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec) +WORD64(0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9) +WORD64(0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b) +WORD64(0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207) +WORD64(0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178) +WORD64(0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6) +WORD64(0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b) +WORD64(0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493) +WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c) +WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a) +WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817) +.size K512,.-K512 +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) +.LOPENSSL_armcap: +.word OPENSSL_armcap_P-sha512_block_data_order +.skip 32-4 +#else +.skip 32 +#endif + +.global sha512_block_data_order +.type sha512_block_data_order,%function +sha512_block_data_order: +#if __ARM_ARCH__<7 + sub r3,pc,#8 @ sha512_block_data_order +#else + adr r3,sha512_block_data_order +#endif +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) + ldr r12,.LOPENSSL_armcap + ldr r12,[r3,r12] @ OPENSSL_armcap_P + tst r12,#1 + bne .LNEON +#endif + add $len,$inp,$len,lsl#7 @ len to point at the end of inp + stmdb sp!,{r4-r12,lr} + sub $Ktbl,r3,#672 @ K512 + sub sp,sp,#9*8 + + ldr $Elo,[$ctx,#$Eoff+$lo] + ldr $Ehi,[$ctx,#$Eoff+$hi] + ldr $t0, [$ctx,#$Goff+$lo] + ldr $t1, [$ctx,#$Goff+$hi] + ldr $t2, [$ctx,#$Hoff+$lo] + ldr $t3, [$ctx,#$Hoff+$hi] +.Loop: + str $t0, [sp,#$Goff+0] + str $t1, [sp,#$Goff+4] + str $t2, [sp,#$Hoff+0] + str $t3, [sp,#$Hoff+4] + ldr $Alo,[$ctx,#$Aoff+$lo] + ldr $Ahi,[$ctx,#$Aoff+$hi] + ldr $Tlo,[$ctx,#$Boff+$lo] + ldr $Thi,[$ctx,#$Boff+$hi] + ldr $t0, [$ctx,#$Coff+$lo] + ldr $t1, [$ctx,#$Coff+$hi] + ldr $t2, [$ctx,#$Doff+$lo] + ldr $t3, [$ctx,#$Doff+$hi] + str $Tlo,[sp,#$Boff+0] + str $Thi,[sp,#$Boff+4] + str $t0, [sp,#$Coff+0] + str $t1, [sp,#$Coff+4] + str $t2, [sp,#$Doff+0] + str $t3, [sp,#$Doff+4] + ldr $Tlo,[$ctx,#$Foff+$lo] + ldr $Thi,[$ctx,#$Foff+$hi] + str $Tlo,[sp,#$Foff+0] + str $Thi,[sp,#$Foff+4] + +.L00_15: +#if __ARM_ARCH__<7 + ldrb $Tlo,[$inp,#7] + ldrb $t0, [$inp,#6] + ldrb $t1, [$inp,#5] + ldrb $t2, [$inp,#4] + ldrb $Thi,[$inp,#3] + ldrb $t3, [$inp,#2] + orr $Tlo,$Tlo,$t0,lsl#8 + ldrb $t0, [$inp,#1] + orr $Tlo,$Tlo,$t1,lsl#16 + ldrb $t1, [$inp],#8 + orr $Tlo,$Tlo,$t2,lsl#24 + orr $Thi,$Thi,$t3,lsl#8 + orr $Thi,$Thi,$t0,lsl#16 + orr $Thi,$Thi,$t1,lsl#24 +#else + ldr $Tlo,[$inp,#4] + ldr $Thi,[$inp],#8 +#ifdef __ARMEL__ + rev $Tlo,$Tlo + rev $Thi,$Thi +#endif +#endif +___ + &BODY_00_15(0x94); +$code.=<<___; + tst $Ktbl,#1 + beq .L00_15 + ldr $t0,[sp,#`$Xoff+8*(16-1)`+0] + ldr $t1,[sp,#`$Xoff+8*(16-1)`+4] + bic $Ktbl,$Ktbl,#1 +.L16_79: + @ sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7)) + @ LO lo>>1^hi<<31 ^ lo>>8^hi<<24 ^ lo>>7^hi<<25 + @ HI hi>>1^lo<<31 ^ hi>>8^lo<<24 ^ hi>>7 + mov $Tlo,$t0,lsr#1 + ldr $t2,[sp,#`$Xoff+8*(16-14)`+0] + mov $Thi,$t1,lsr#1 + ldr $t3,[sp,#`$Xoff+8*(16-14)`+4] + eor $Tlo,$Tlo,$t1,lsl#31 + eor $Thi,$Thi,$t0,lsl#31 + eor $Tlo,$Tlo,$t0,lsr#8 + eor $Thi,$Thi,$t1,lsr#8 + eor $Tlo,$Tlo,$t1,lsl#24 + eor $Thi,$Thi,$t0,lsl#24 + eor $Tlo,$Tlo,$t0,lsr#7 + eor $Thi,$Thi,$t1,lsr#7 + eor $Tlo,$Tlo,$t1,lsl#25 + + @ sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6)) + @ LO lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26 + @ HI hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6 + mov $t0,$t2,lsr#19 + mov $t1,$t3,lsr#19 + eor $t0,$t0,$t3,lsl#13 + eor $t1,$t1,$t2,lsl#13 + eor $t0,$t0,$t3,lsr#29 + eor $t1,$t1,$t2,lsr#29 + eor $t0,$t0,$t2,lsl#3 + eor $t1,$t1,$t3,lsl#3 + eor $t0,$t0,$t2,lsr#6 + eor $t1,$t1,$t3,lsr#6 + ldr $t2,[sp,#`$Xoff+8*(16-9)`+0] + eor $t0,$t0,$t3,lsl#26 + + ldr $t3,[sp,#`$Xoff+8*(16-9)`+4] + adds $Tlo,$Tlo,$t0 + ldr $t0,[sp,#`$Xoff+8*16`+0] + adc $Thi,$Thi,$t1 + + ldr $t1,[sp,#`$Xoff+8*16`+4] + adds $Tlo,$Tlo,$t2 + adc $Thi,$Thi,$t3 + adds $Tlo,$Tlo,$t0 + adc $Thi,$Thi,$t1 +___ + &BODY_00_15(0x17); +$code.=<<___; +#if __ARM_ARCH__>=7 + ittt eq @ Thumb2 thing, sanity check in ARM +#endif + ldreq $t0,[sp,#`$Xoff+8*(16-1)`+0] + ldreq $t1,[sp,#`$Xoff+8*(16-1)`+4] + beq .L16_79 + bic $Ktbl,$Ktbl,#1 + + ldr $Tlo,[sp,#$Boff+0] + ldr $Thi,[sp,#$Boff+4] + ldr $t0, [$ctx,#$Aoff+$lo] + ldr $t1, [$ctx,#$Aoff+$hi] + ldr $t2, [$ctx,#$Boff+$lo] + ldr $t3, [$ctx,#$Boff+$hi] + adds $t0,$Alo,$t0 + str $t0, [$ctx,#$Aoff+$lo] + adc $t1,$Ahi,$t1 + str $t1, [$ctx,#$Aoff+$hi] + adds $t2,$Tlo,$t2 + str $t2, [$ctx,#$Boff+$lo] + adc $t3,$Thi,$t3 + str $t3, [$ctx,#$Boff+$hi] + + ldr $Alo,[sp,#$Coff+0] + ldr $Ahi,[sp,#$Coff+4] + ldr $Tlo,[sp,#$Doff+0] + ldr $Thi,[sp,#$Doff+4] + ldr $t0, [$ctx,#$Coff+$lo] + ldr $t1, [$ctx,#$Coff+$hi] + ldr $t2, [$ctx,#$Doff+$lo] + ldr $t3, [$ctx,#$Doff+$hi] + adds $t0,$Alo,$t0 + str $t0, [$ctx,#$Coff+$lo] + adc $t1,$Ahi,$t1 + str $t1, [$ctx,#$Coff+$hi] + adds $t2,$Tlo,$t2 + str $t2, [$ctx,#$Doff+$lo] + adc $t3,$Thi,$t3 + str $t3, [$ctx,#$Doff+$hi] + + ldr $Tlo,[sp,#$Foff+0] + ldr $Thi,[sp,#$Foff+4] + ldr $t0, [$ctx,#$Eoff+$lo] + ldr $t1, [$ctx,#$Eoff+$hi] + ldr $t2, [$ctx,#$Foff+$lo] + ldr $t3, [$ctx,#$Foff+$hi] + adds $Elo,$Elo,$t0 + str $Elo,[$ctx,#$Eoff+$lo] + adc $Ehi,$Ehi,$t1 + str $Ehi,[$ctx,#$Eoff+$hi] + adds $t2,$Tlo,$t2 + str $t2, [$ctx,#$Foff+$lo] + adc $t3,$Thi,$t3 + str $t3, [$ctx,#$Foff+$hi] + + ldr $Alo,[sp,#$Goff+0] + ldr $Ahi,[sp,#$Goff+4] + ldr $Tlo,[sp,#$Hoff+0] + ldr $Thi,[sp,#$Hoff+4] + ldr $t0, [$ctx,#$Goff+$lo] + ldr $t1, [$ctx,#$Goff+$hi] + ldr $t2, [$ctx,#$Hoff+$lo] + ldr $t3, [$ctx,#$Hoff+$hi] + adds $t0,$Alo,$t0 + str $t0, [$ctx,#$Goff+$lo] + adc $t1,$Ahi,$t1 + str $t1, [$ctx,#$Goff+$hi] + adds $t2,$Tlo,$t2 + str $t2, [$ctx,#$Hoff+$lo] + adc $t3,$Thi,$t3 + str $t3, [$ctx,#$Hoff+$hi] + + add sp,sp,#640 + sub $Ktbl,$Ktbl,#640 + + teq $inp,$len + bne .Loop + + add sp,sp,#8*9 @ destroy frame +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r12,pc} +#else + ldmia sp!,{r4-r12,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + bx lr @ interoperable with Thumb ISA:-) +#endif +.size sha512_block_data_order,.-sha512_block_data_order +___ + +{ +my @Sigma0=(28,34,39); +my @Sigma1=(14,18,41); +my @sigma0=(1, 8, 7); +my @sigma1=(19,61,6); + +my $Ktbl="r3"; +my $cnt="r12"; # volatile register known as ip, intra-procedure-call scratch + +my @X=map("d$_",(0..15)); +my @V=($A,$B,$C,$D,$E,$F,$G,$H)=map("d$_",(16..23)); + +sub NEON_00_15() { +my $i=shift; +my ($a,$b,$c,$d,$e,$f,$g,$h)=@_; +my ($t0,$t1,$t2,$T1,$K,$Ch,$Maj)=map("d$_",(24..31)); # temps + +$code.=<<___ if ($i<16 || $i&1); + vshr.u64 $t0,$e,#@Sigma1[0] @ $i +#if $i<16 + vld1.64 {@X[$i%16]},[$inp]! @ handles unaligned +#endif + vshr.u64 $t1,$e,#@Sigma1[1] +#if $i>0 + vadd.i64 $a,$Maj @ h+=Maj from the past +#endif + vshr.u64 $t2,$e,#@Sigma1[2] +___ +$code.=<<___; + vld1.64 {$K},[$Ktbl,:64]! @ K[i++] + vsli.64 $t0,$e,#`64-@Sigma1[0]` + vsli.64 $t1,$e,#`64-@Sigma1[1]` + vmov $Ch,$e + vsli.64 $t2,$e,#`64-@Sigma1[2]` +#if $i<16 && defined(__ARMEL__) + vrev64.8 @X[$i],@X[$i] +#endif + veor $t1,$t0 + vbsl $Ch,$f,$g @ Ch(e,f,g) + vshr.u64 $t0,$a,#@Sigma0[0] + veor $t2,$t1 @ Sigma1(e) + vadd.i64 $T1,$Ch,$h + vshr.u64 $t1,$a,#@Sigma0[1] + vsli.64 $t0,$a,#`64-@Sigma0[0]` + vadd.i64 $T1,$t2 + vshr.u64 $t2,$a,#@Sigma0[2] + vadd.i64 $K,@X[$i%16] + vsli.64 $t1,$a,#`64-@Sigma0[1]` + veor $Maj,$a,$b + vsli.64 $t2,$a,#`64-@Sigma0[2]` + veor $h,$t0,$t1 + vadd.i64 $T1,$K + vbsl $Maj,$c,$b @ Maj(a,b,c) + veor $h,$t2 @ Sigma0(a) + vadd.i64 $d,$T1 + vadd.i64 $Maj,$T1 + @ vadd.i64 $h,$Maj +___ +} + +sub NEON_16_79() { +my $i=shift; + +if ($i&1) { &NEON_00_15($i,@_); return; } + +# 2x-vectorized, therefore runs every 2nd round +my @X=map("q$_",(0..7)); # view @X as 128-bit vector +my ($t0,$t1,$s0,$s1) = map("q$_",(12..15)); # temps +my ($d0,$d1,$d2) = map("d$_",(24..26)); # temps from NEON_00_15 +my $e=@_[4]; # $e from NEON_00_15 +$i /= 2; +$code.=<<___; + vshr.u64 $t0,@X[($i+7)%8],#@sigma1[0] + vshr.u64 $t1,@X[($i+7)%8],#@sigma1[1] + vadd.i64 @_[0],d30 @ h+=Maj from the past + vshr.u64 $s1,@X[($i+7)%8],#@sigma1[2] + vsli.64 $t0,@X[($i+7)%8],#`64-@sigma1[0]` + vext.8 $s0,@X[$i%8],@X[($i+1)%8],#8 @ X[i+1] + vsli.64 $t1,@X[($i+7)%8],#`64-@sigma1[1]` + veor $s1,$t0 + vshr.u64 $t0,$s0,#@sigma0[0] + veor $s1,$t1 @ sigma1(X[i+14]) + vshr.u64 $t1,$s0,#@sigma0[1] + vadd.i64 @X[$i%8],$s1 + vshr.u64 $s1,$s0,#@sigma0[2] + vsli.64 $t0,$s0,#`64-@sigma0[0]` + vsli.64 $t1,$s0,#`64-@sigma0[1]` + vext.8 $s0,@X[($i+4)%8],@X[($i+5)%8],#8 @ X[i+9] + veor $s1,$t0 + vshr.u64 $d0,$e,#@Sigma1[0] @ from NEON_00_15 + vadd.i64 @X[$i%8],$s0 + vshr.u64 $d1,$e,#@Sigma1[1] @ from NEON_00_15 + veor $s1,$t1 @ sigma0(X[i+1]) + vshr.u64 $d2,$e,#@Sigma1[2] @ from NEON_00_15 + vadd.i64 @X[$i%8],$s1 +___ + &NEON_00_15(2*$i,@_); +} + +$code.=<<___; +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + +.global sha512_block_data_order_neon +.type sha512_block_data_order_neon,%function +.align 4 +sha512_block_data_order_neon: +.LNEON: + dmb @ errata #451034 on early Cortex A8 + add $len,$inp,$len,lsl#7 @ len to point at the end of inp + VFP_ABI_PUSH + adrl $Ktbl,K512 + vldmia $ctx,{$A-$H} @ load context +.Loop_neon: +___ +for($i=0;$i<16;$i++) { &NEON_00_15($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; + mov $cnt,#4 +.L16_79_neon: + subs $cnt,#1 +___ +for(;$i<32;$i++) { &NEON_16_79($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; + bne .L16_79_neon + + vadd.i64 $A,d30 @ h+=Maj from the past + vldmia $ctx,{d24-d31} @ load context to temp + vadd.i64 q8,q12 @ vectorized accumulate + vadd.i64 q9,q13 + vadd.i64 q10,q14 + vadd.i64 q11,q15 + vstmia $ctx,{$A-$H} @ save context + teq $inp,$len + sub $Ktbl,#640 @ rewind K512 + bne .Loop_neon + + VFP_ABI_POP + ret @ bx lr +.size sha512_block_data_order_neon,.-sha512_block_data_order_neon +#endif +___ +} +$code.=<<___; +.asciz "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>" +.align 2 +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) +.comm OPENSSL_armcap_P,4,4 +#endif +___ + +$code =~ s/\`([^\`]*)\`/eval $1/gem; +$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 +$code =~ s/\bret\b/bx lr/gm; + +open SELF,$0; +while(<SELF>) { + next if (/^#!/); + last if (!s/^#/@/ and !/^$/); + print; +} +close SELF; + +print $code; +close STDOUT; # enforce flush diff --git a/arch/arm/crypto/sha512-armv7-neon.S b/arch/arm/crypto/sha512-armv7-neon.S deleted file mode 100644 index fe99472e507c..000000000000 --- a/arch/arm/crypto/sha512-armv7-neon.S +++ /dev/null @@ -1,455 +0,0 @@ -/* sha512-armv7-neon.S - ARM/NEON assembly implementation of SHA-512 transform - * - * Copyright © 2013-2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - */ - -#include <linux/linkage.h> - - -.syntax unified -.code 32 -.fpu neon - -.text - -/* structure of SHA512_CONTEXT */ -#define hd_a 0 -#define hd_b ((hd_a) + 8) -#define hd_c ((hd_b) + 8) -#define hd_d ((hd_c) + 8) -#define hd_e ((hd_d) + 8) -#define hd_f ((hd_e) + 8) -#define hd_g ((hd_f) + 8) - -/* register macros */ -#define RK %r2 - -#define RA d0 -#define RB d1 -#define RC d2 -#define RD d3 -#define RE d4 -#define RF d5 -#define RG d6 -#define RH d7 - -#define RT0 d8 -#define RT1 d9 -#define RT2 d10 -#define RT3 d11 -#define RT4 d12 -#define RT5 d13 -#define RT6 d14 -#define RT7 d15 - -#define RT01q q4 -#define RT23q q5 -#define RT45q q6 -#define RT67q q7 - -#define RW0 d16 -#define RW1 d17 -#define RW2 d18 -#define RW3 d19 -#define RW4 d20 -#define RW5 d21 -#define RW6 d22 -#define RW7 d23 -#define RW8 d24 -#define RW9 d25 -#define RW10 d26 -#define RW11 d27 -#define RW12 d28 -#define RW13 d29 -#define RW14 d30 -#define RW15 d31 - -#define RW01q q8 -#define RW23q q9 -#define RW45q q10 -#define RW67q q11 -#define RW89q q12 -#define RW1011q q13 -#define RW1213q q14 -#define RW1415q q15 - -/*********************************************************************** - * ARM assembly implementation of sha512 transform - ***********************************************************************/ -#define rounds2_0_63(ra, rb, rc, rd, re, rf, rg, rh, rw0, rw1, rw01q, rw2, \ - rw23q, rw1415q, rw9, rw10, interleave_op, arg1) \ - /* t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t]; */ \ - vshr.u64 RT2, re, #14; \ - vshl.u64 RT3, re, #64 - 14; \ - interleave_op(arg1); \ - vshr.u64 RT4, re, #18; \ - vshl.u64 RT5, re, #64 - 18; \ - vld1.64 {RT0}, [RK]!; \ - veor.64 RT23q, RT23q, RT45q; \ - vshr.u64 RT4, re, #41; \ - vshl.u64 RT5, re, #64 - 41; \ - vadd.u64 RT0, RT0, rw0; \ - veor.64 RT23q, RT23q, RT45q; \ - vmov.64 RT7, re; \ - veor.64 RT1, RT2, RT3; \ - vbsl.64 RT7, rf, rg; \ - \ - vadd.u64 RT1, RT1, rh; \ - vshr.u64 RT2, ra, #28; \ - vshl.u64 RT3, ra, #64 - 28; \ - vadd.u64 RT1, RT1, RT0; \ - vshr.u64 RT4, ra, #34; \ - vshl.u64 RT5, ra, #64 - 34; \ - vadd.u64 RT1, RT1, RT7; \ - \ - /* h = Sum0 (a) + Maj (a, b, c); */ \ - veor.64 RT23q, RT23q, RT45q; \ - vshr.u64 RT4, ra, #39; \ - vshl.u64 RT5, ra, #64 - 39; \ - veor.64 RT0, ra, rb; \ - veor.64 RT23q, RT23q, RT45q; \ - vbsl.64 RT0, rc, rb; \ - vadd.u64 rd, rd, RT1; /* d+=t1; */ \ - veor.64 rh, RT2, RT3; \ - \ - /* t1 = g + Sum1 (d) + Ch (d, e, f) + k[t] + w[t]; */ \ - vshr.u64 RT2, rd, #14; \ - vshl.u64 RT3, rd, #64 - 14; \ - vadd.u64 rh, rh, RT0; \ - vshr.u64 RT4, rd, #18; \ - vshl.u64 RT5, rd, #64 - 18; \ - vadd.u64 rh, rh, RT1; /* h+=t1; */ \ - vld1.64 {RT0}, [RK]!; \ - veor.64 RT23q, RT23q, RT45q; \ - vshr.u64 RT4, rd, #41; \ - vshl.u64 RT5, rd, #64 - 41; \ - vadd.u64 RT0, RT0, rw1; \ - veor.64 RT23q, RT23q, RT45q; \ - vmov.64 RT7, rd; \ - veor.64 RT1, RT2, RT3; \ - vbsl.64 RT7, re, rf; \ - \ - vadd.u64 RT1, RT1, rg; \ - vshr.u64 RT2, rh, #28; \ - vshl.u64 RT3, rh, #64 - 28; \ - vadd.u64 RT1, RT1, RT0; \ - vshr.u64 RT4, rh, #34; \ - vshl.u64 RT5, rh, #64 - 34; \ - vadd.u64 RT1, RT1, RT7; \ - \ - /* g = Sum0 (h) + Maj (h, a, b); */ \ - veor.64 RT23q, RT23q, RT45q; \ - vshr.u64 RT4, rh, #39; \ - vshl.u64 RT5, rh, #64 - 39; \ - veor.64 RT0, rh, ra; \ - veor.64 RT23q, RT23q, RT45q; \ - vbsl.64 RT0, rb, ra; \ - vadd.u64 rc, rc, RT1; /* c+=t1; */ \ - veor.64 rg, RT2, RT3; \ - \ - /* w[0] += S1 (w[14]) + w[9] + S0 (w[1]); */ \ - /* w[1] += S1 (w[15]) + w[10] + S0 (w[2]); */ \ - \ - /**** S0(w[1:2]) */ \ - \ - /* w[0:1] += w[9:10] */ \ - /* RT23q = rw1:rw2 */ \ - vext.u64 RT23q, rw01q, rw23q, #1; \ - vadd.u64 rw0, rw9; \ - vadd.u64 rg, rg, RT0; \ - vadd.u64 rw1, rw10;\ - vadd.u64 rg, rg, RT1; /* g+=t1; */ \ - \ - vshr.u64 RT45q, RT23q, #1; \ - vshl.u64 RT67q, RT23q, #64 - 1; \ - vshr.u64 RT01q, RT23q, #8; \ - veor.u64 RT45q, RT45q, RT67q; \ - vshl.u64 RT67q, RT23q, #64 - 8; \ - veor.u64 RT45q, RT45q, RT01q; \ - vshr.u64 RT01q, RT23q, #7; \ - veor.u64 RT45q, RT45q, RT67q; \ - \ - /**** S1(w[14:15]) */ \ - vshr.u64 RT23q, rw1415q, #6; \ - veor.u64 RT01q, RT01q, RT45q; \ - vshr.u64 RT45q, rw1415q, #19; \ - vshl.u64 RT67q, rw1415q, #64 - 19; \ - veor.u64 RT23q, RT23q, RT45q; \ - vshr.u64 RT45q, rw1415q, #61; \ - veor.u64 RT23q, RT23q, RT67q; \ - vshl.u64 RT67q, rw1415q, #64 - 61; \ - veor.u64 RT23q, RT23q, RT45q; \ - vadd.u64 rw01q, RT01q; /* w[0:1] += S(w[1:2]) */ \ - veor.u64 RT01q, RT23q, RT67q; -#define vadd_RT01q(rw01q) \ - /* w[0:1] += S(w[14:15]) */ \ - vadd.u64 rw01q, RT01q; - -#define dummy(_) /*_*/ - -#define rounds2_64_79(ra, rb, rc, rd, re, rf, rg, rh, rw0, rw1, \ - interleave_op1, arg1, interleave_op2, arg2) \ - /* t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t]; */ \ - vshr.u64 RT2, re, #14; \ - vshl.u64 RT3, re, #64 - 14; \ - interleave_op1(arg1); \ - vshr.u64 RT4, re, #18; \ - vshl.u64 RT5, re, #64 - 18; \ - interleave_op2(arg2); \ - vld1.64 {RT0}, [RK]!; \ - veor.64 RT23q, RT23q, RT45q; \ - vshr.u64 RT4, re, #41; \ - vshl.u64 RT5, re, #64 - 41; \ - vadd.u64 RT0, RT0, rw0; \ - veor.64 RT23q, RT23q, RT45q; \ - vmov.64 RT7, re; \ - veor.64 RT1, RT2, RT3; \ - vbsl.64 RT7, rf, rg; \ - \ - vadd.u64 RT1, RT1, rh; \ - vshr.u64 RT2, ra, #28; \ - vshl.u64 RT3, ra, #64 - 28; \ - vadd.u64 RT1, RT1, RT0; \ - vshr.u64 RT4, ra, #34; \ - vshl.u64 RT5, ra, #64 - 34; \ - vadd.u64 RT1, RT1, RT7; \ - \ - /* h = Sum0 (a) + Maj (a, b, c); */ \ - veor.64 RT23q, RT23q, RT45q; \ - vshr.u64 RT4, ra, #39; \ - vshl.u64 RT5, ra, #64 - 39; \ - veor.64 RT0, ra, rb; \ - veor.64 RT23q, RT23q, RT45q; \ - vbsl.64 RT0, rc, rb; \ - vadd.u64 rd, rd, RT1; /* d+=t1; */ \ - veor.64 rh, RT2, RT3; \ - \ - /* t1 = g + Sum1 (d) + Ch (d, e, f) + k[t] + w[t]; */ \ - vshr.u64 RT2, rd, #14; \ - vshl.u64 RT3, rd, #64 - 14; \ - vadd.u64 rh, rh, RT0; \ - vshr.u64 RT4, rd, #18; \ - vshl.u64 RT5, rd, #64 - 18; \ - vadd.u64 rh, rh, RT1; /* h+=t1; */ \ - vld1.64 {RT0}, [RK]!; \ - veor.64 RT23q, RT23q, RT45q; \ - vshr.u64 RT4, rd, #41; \ - vshl.u64 RT5, rd, #64 - 41; \ - vadd.u64 RT0, RT0, rw1; \ - veor.64 RT23q, RT23q, RT45q; \ - vmov.64 RT7, rd; \ - veor.64 RT1, RT2, RT3; \ - vbsl.64 RT7, re, rf; \ - \ - vadd.u64 RT1, RT1, rg; \ - vshr.u64 RT2, rh, #28; \ - vshl.u64 RT3, rh, #64 - 28; \ - vadd.u64 RT1, RT1, RT0; \ - vshr.u64 RT4, rh, #34; \ - vshl.u64 RT5, rh, #64 - 34; \ - vadd.u64 RT1, RT1, RT7; \ - \ - /* g = Sum0 (h) + Maj (h, a, b); */ \ - veor.64 RT23q, RT23q, RT45q; \ - vshr.u64 RT4, rh, #39; \ - vshl.u64 RT5, rh, #64 - 39; \ - veor.64 RT0, rh, ra; \ - veor.64 RT23q, RT23q, RT45q; \ - vbsl.64 RT0, rb, ra; \ - vadd.u64 rc, rc, RT1; /* c+=t1; */ \ - veor.64 rg, RT2, RT3; -#define vadd_rg_RT0(rg) \ - vadd.u64 rg, rg, RT0; -#define vadd_rg_RT1(rg) \ - vadd.u64 rg, rg, RT1; /* g+=t1; */ - -.align 3 -ENTRY(sha512_transform_neon) - /* Input: - * %r0: SHA512_CONTEXT - * %r1: data - * %r2: u64 k[] constants - * %r3: nblks - */ - push {%lr}; - - mov %lr, #0; - - /* Load context to d0-d7 */ - vld1.64 {RA-RD}, [%r0]!; - vld1.64 {RE-RH}, [%r0]; - sub %r0, #(4*8); - - /* Load input to w[16], d16-d31 */ - /* NOTE: Assumes that on ARMv7 unaligned accesses are always allowed. */ - vld1.64 {RW0-RW3}, [%r1]!; - vld1.64 {RW4-RW7}, [%r1]!; - vld1.64 {RW8-RW11}, [%r1]!; - vld1.64 {RW12-RW15}, [%r1]!; -#ifdef __ARMEL__ - /* byteswap */ - vrev64.8 RW01q, RW01q; - vrev64.8 RW23q, RW23q; - vrev64.8 RW45q, RW45q; - vrev64.8 RW67q, RW67q; - vrev64.8 RW89q, RW89q; - vrev64.8 RW1011q, RW1011q; - vrev64.8 RW1213q, RW1213q; - vrev64.8 RW1415q, RW1415q; -#endif - - /* EABI says that d8-d15 must be preserved by callee. */ - /*vpush {RT0-RT7};*/ - -.Loop: - rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, RW01q, RW2, - RW23q, RW1415q, RW9, RW10, dummy, _); - b .Lenter_rounds; - -.Loop_rounds: - rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, RW01q, RW2, - RW23q, RW1415q, RW9, RW10, vadd_RT01q, RW1415q); -.Lenter_rounds: - rounds2_0_63(RG, RH, RA, RB, RC, RD, RE, RF, RW2, RW3, RW23q, RW4, - RW45q, RW01q, RW11, RW12, vadd_RT01q, RW01q); - rounds2_0_63(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5, RW45q, RW6, - RW67q, RW23q, RW13, RW14, vadd_RT01q, RW23q); - rounds2_0_63(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7, RW67q, RW8, - RW89q, RW45q, RW15, RW0, vadd_RT01q, RW45q); - rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9, RW89q, RW10, - RW1011q, RW67q, RW1, RW2, vadd_RT01q, RW67q); - rounds2_0_63(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11, RW1011q, RW12, - RW1213q, RW89q, RW3, RW4, vadd_RT01q, RW89q); - add %lr, #16; - rounds2_0_63(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13, RW1213q, RW14, - RW1415q, RW1011q, RW5, RW6, vadd_RT01q, RW1011q); - cmp %lr, #64; - rounds2_0_63(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15, RW1415q, RW0, - RW01q, RW1213q, RW7, RW8, vadd_RT01q, RW1213q); - bne .Loop_rounds; - - subs %r3, #1; - - rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, - vadd_RT01q, RW1415q, dummy, _); - rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW2, RW3, - vadd_rg_RT0, RG, vadd_rg_RT1, RG); - beq .Lhandle_tail; - vld1.64 {RW0-RW3}, [%r1]!; - rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5, - vadd_rg_RT0, RE, vadd_rg_RT1, RE); - rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7, - vadd_rg_RT0, RC, vadd_rg_RT1, RC); -#ifdef __ARMEL__ - vrev64.8 RW01q, RW01q; - vrev64.8 RW23q, RW23q; -#endif - vld1.64 {RW4-RW7}, [%r1]!; - rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9, - vadd_rg_RT0, RA, vadd_rg_RT1, RA); - rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11, - vadd_rg_RT0, RG, vadd_rg_RT1, RG); -#ifdef __ARMEL__ - vrev64.8 RW45q, RW45q; - vrev64.8 RW67q, RW67q; -#endif - vld1.64 {RW8-RW11}, [%r1]!; - rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13, - vadd_rg_RT0, RE, vadd_rg_RT1, RE); - rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15, - vadd_rg_RT0, RC, vadd_rg_RT1, RC); -#ifdef __ARMEL__ - vrev64.8 RW89q, RW89q; - vrev64.8 RW1011q, RW1011q; -#endif - vld1.64 {RW12-RW15}, [%r1]!; - vadd_rg_RT0(RA); - vadd_rg_RT1(RA); - - /* Load context */ - vld1.64 {RT0-RT3}, [%r0]!; - vld1.64 {RT4-RT7}, [%r0]; - sub %r0, #(4*8); - -#ifdef __ARMEL__ - vrev64.8 RW1213q, RW1213q; - vrev64.8 RW1415q, RW1415q; -#endif - - vadd.u64 RA, RT0; - vadd.u64 RB, RT1; - vadd.u64 RC, RT2; - vadd.u64 RD, RT3; - vadd.u64 RE, RT4; - vadd.u64 RF, RT5; - vadd.u64 RG, RT6; - vadd.u64 RH, RT7; - - /* Store the first half of context */ - vst1.64 {RA-RD}, [%r0]!; - sub RK, $(8*80); - vst1.64 {RE-RH}, [%r0]; /* Store the last half of context */ - mov %lr, #0; - sub %r0, #(4*8); - - b .Loop; - -.Lhandle_tail: - rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5, - vadd_rg_RT0, RE, vadd_rg_RT1, RE); - rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7, - vadd_rg_RT0, RC, vadd_rg_RT1, RC); - rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9, - vadd_rg_RT0, RA, vadd_rg_RT1, RA); - rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11, - vadd_rg_RT0, RG, vadd_rg_RT1, RG); - rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13, - vadd_rg_RT0, RE, vadd_rg_RT1, RE); - rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15, - vadd_rg_RT0, RC, vadd_rg_RT1, RC); - - /* Load context to d16-d23 */ - vld1.64 {RW0-RW3}, [%r0]!; - vadd_rg_RT0(RA); - vld1.64 {RW4-RW7}, [%r0]; - vadd_rg_RT1(RA); - sub %r0, #(4*8); - - vadd.u64 RA, RW0; - vadd.u64 RB, RW1; - vadd.u64 RC, RW2; - vadd.u64 RD, RW3; - vadd.u64 RE, RW4; - vadd.u64 RF, RW5; - vadd.u64 RG, RW6; - vadd.u64 RH, RW7; - - /* Store the first half of context */ - vst1.64 {RA-RD}, [%r0]!; - - /* Clear used registers */ - /* d16-d31 */ - veor.u64 RW01q, RW01q; - veor.u64 RW23q, RW23q; - veor.u64 RW45q, RW45q; - veor.u64 RW67q, RW67q; - vst1.64 {RE-RH}, [%r0]; /* Store the last half of context */ - veor.u64 RW89q, RW89q; - veor.u64 RW1011q, RW1011q; - veor.u64 RW1213q, RW1213q; - veor.u64 RW1415q, RW1415q; - /* d8-d15 */ - /*vpop {RT0-RT7};*/ - /* d0-d7 (q0-q3) */ - veor.u64 %q0, %q0; - veor.u64 %q1, %q1; - veor.u64 %q2, %q2; - veor.u64 %q3, %q3; - - pop {%pc}; -ENDPROC(sha512_transform_neon) diff --git a/arch/arm/crypto/sha512-core.S_shipped b/arch/arm/crypto/sha512-core.S_shipped new file mode 100644 index 000000000000..3694c4d4ca2b --- /dev/null +++ b/arch/arm/crypto/sha512-core.S_shipped @@ -0,0 +1,1861 @@ + +@ ==================================================================== +@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL +@ project. The module is, however, dual licensed under OpenSSL and +@ CRYPTOGAMS licenses depending on where you obtain it. For further +@ details see http://www.openssl.org/~appro/cryptogams/. +@ +@ Permission to use under GPL terms is granted. +@ ==================================================================== + +@ SHA512 block procedure for ARMv4. September 2007. + +@ This code is ~4.5 (four and a half) times faster than code generated +@ by gcc 3.4 and it spends ~72 clock cycles per byte [on single-issue +@ Xscale PXA250 core]. +@ +@ July 2010. +@ +@ Rescheduling for dual-issue pipeline resulted in 6% improvement on +@ Cortex A8 core and ~40 cycles per processed byte. + +@ February 2011. +@ +@ Profiler-assisted and platform-specific optimization resulted in 7% +@ improvement on Coxtex A8 core and ~38 cycles per byte. + +@ March 2011. +@ +@ Add NEON implementation. On Cortex A8 it was measured to process +@ one byte in 23.3 cycles or ~60% faster than integer-only code. + +@ August 2012. +@ +@ Improve NEON performance by 12% on Snapdragon S4. In absolute +@ terms it's 22.6 cycles per byte, which is disappointing result. +@ Technical writers asserted that 3-way S4 pipeline can sustain +@ multiple NEON instructions per cycle, but dual NEON issue could +@ not be observed, see http://www.openssl.org/~appro/Snapdragon-S4.html +@ for further details. On side note Cortex-A15 processes one byte in +@ 16 cycles. + +@ Byte order [in]dependence. ========================================= +@ +@ Originally caller was expected to maintain specific *dword* order in +@ h[0-7], namely with most significant dword at *lower* address, which +@ was reflected in below two parameters as 0 and 4. Now caller is +@ expected to maintain native byte order for whole 64-bit values. +#ifndef __KERNEL__ +# include "arm_arch.h" +# define VFP_ABI_PUSH vstmdb sp!,{d8-d15} +# define VFP_ABI_POP vldmia sp!,{d8-d15} +#else +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +# define __ARM_MAX_ARCH__ 7 +# define VFP_ABI_PUSH +# define VFP_ABI_POP +#endif + +#ifdef __ARMEL__ +# define LO 0 +# define HI 4 +# define WORD64(hi0,lo0,hi1,lo1) .word lo0,hi0, lo1,hi1 +#else +# define HI 0 +# define LO 4 +# define WORD64(hi0,lo0,hi1,lo1) .word hi0,lo0, hi1,lo1 +#endif + +.text +#if __ARM_ARCH__<7 +.code 32 +#else +.syntax unified +# ifdef __thumb2__ +# define adrl adr +.thumb +# else +.code 32 +# endif +#endif + +.type K512,%object +.align 5 +K512: +WORD64(0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd) +WORD64(0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc) +WORD64(0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019) +WORD64(0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118) +WORD64(0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe) +WORD64(0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2) +WORD64(0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1) +WORD64(0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694) +WORD64(0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3) +WORD64(0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65) +WORD64(0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483) +WORD64(0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5) +WORD64(0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210) +WORD64(0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4) +WORD64(0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725) +WORD64(0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70) +WORD64(0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926) +WORD64(0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df) +WORD64(0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8) +WORD64(0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b) +WORD64(0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001) +WORD64(0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30) +WORD64(0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910) +WORD64(0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8) +WORD64(0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53) +WORD64(0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8) +WORD64(0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb) +WORD64(0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3) +WORD64(0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60) +WORD64(0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec) +WORD64(0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9) +WORD64(0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b) +WORD64(0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207) +WORD64(0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178) +WORD64(0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6) +WORD64(0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b) +WORD64(0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493) +WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c) +WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a) +WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817) +.size K512,.-K512 +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) +.LOPENSSL_armcap: +.word OPENSSL_armcap_P-sha512_block_data_order +.skip 32-4 +#else +.skip 32 +#endif + +.global sha512_block_data_order +.type sha512_block_data_order,%function +sha512_block_data_order: +#if __ARM_ARCH__<7 + sub r3,pc,#8 @ sha512_block_data_order +#else + adr r3,sha512_block_data_order +#endif +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) + ldr r12,.LOPENSSL_armcap + ldr r12,[r3,r12] @ OPENSSL_armcap_P + tst r12,#1 + bne .LNEON +#endif + add r2,r1,r2,lsl#7 @ len to point at the end of inp + stmdb sp!,{r4-r12,lr} + sub r14,r3,#672 @ K512 + sub sp,sp,#9*8 + + ldr r7,[r0,#32+LO] + ldr r8,[r0,#32+HI] + ldr r9, [r0,#48+LO] + ldr r10, [r0,#48+HI] + ldr r11, [r0,#56+LO] + ldr r12, [r0,#56+HI] +.Loop: + str r9, [sp,#48+0] + str r10, [sp,#48+4] + str r11, [sp,#56+0] + str r12, [sp,#56+4] + ldr r5,[r0,#0+LO] + ldr r6,[r0,#0+HI] + ldr r3,[r0,#8+LO] + ldr r4,[r0,#8+HI] + ldr r9, [r0,#16+LO] + ldr r10, [r0,#16+HI] + ldr r11, [r0,#24+LO] + ldr r12, [r0,#24+HI] + str r3,[sp,#8+0] + str r4,[sp,#8+4] + str r9, [sp,#16+0] + str r10, [sp,#16+4] + str r11, [sp,#24+0] + str r12, [sp,#24+4] + ldr r3,[r0,#40+LO] + ldr r4,[r0,#40+HI] + str r3,[sp,#40+0] + str r4,[sp,#40+4] + +.L00_15: +#if __ARM_ARCH__<7 + ldrb r3,[r1,#7] + ldrb r9, [r1,#6] + ldrb r10, [r1,#5] + ldrb r11, [r1,#4] + ldrb r4,[r1,#3] + ldrb r12, [r1,#2] + orr r3,r3,r9,lsl#8 + ldrb r9, [r1,#1] + orr r3,r3,r10,lsl#16 + ldrb r10, [r1],#8 + orr r3,r3,r11,lsl#24 + orr r4,r4,r12,lsl#8 + orr r4,r4,r9,lsl#16 + orr r4,r4,r10,lsl#24 +#else + ldr r3,[r1,#4] + ldr r4,[r1],#8 +#ifdef __ARMEL__ + rev r3,r3 + rev r4,r4 +#endif +#endif + @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41)) + @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23 + @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23 + mov r9,r7,lsr#14 + str r3,[sp,#64+0] + mov r10,r8,lsr#14 + str r4,[sp,#64+4] + eor r9,r9,r8,lsl#18 + ldr r11,[sp,#56+0] @ h.lo + eor r10,r10,r7,lsl#18 + ldr r12,[sp,#56+4] @ h.hi + eor r9,r9,r7,lsr#18 + eor r10,r10,r8,lsr#18 + eor r9,r9,r8,lsl#14 + eor r10,r10,r7,lsl#14 + eor r9,r9,r8,lsr#9 + eor r10,r10,r7,lsr#9 + eor r9,r9,r7,lsl#23 + eor r10,r10,r8,lsl#23 @ Sigma1(e) + adds r3,r3,r9 + ldr r9,[sp,#40+0] @ f.lo + adc r4,r4,r10 @ T += Sigma1(e) + ldr r10,[sp,#40+4] @ f.hi + adds r3,r3,r11 + ldr r11,[sp,#48+0] @ g.lo + adc r4,r4,r12 @ T += h + ldr r12,[sp,#48+4] @ g.hi + + eor r9,r9,r11 + str r7,[sp,#32+0] + eor r10,r10,r12 + str r8,[sp,#32+4] + and r9,r9,r7 + str r5,[sp,#0+0] + and r10,r10,r8 + str r6,[sp,#0+4] + eor r9,r9,r11 + ldr r11,[r14,#LO] @ K[i].lo + eor r10,r10,r12 @ Ch(e,f,g) + ldr r12,[r14,#HI] @ K[i].hi + + adds r3,r3,r9 + ldr r7,[sp,#24+0] @ d.lo + adc r4,r4,r10 @ T += Ch(e,f,g) + ldr r8,[sp,#24+4] @ d.hi + adds r3,r3,r11 + and r9,r11,#0xff + adc r4,r4,r12 @ T += K[i] + adds r7,r7,r3 + ldr r11,[sp,#8+0] @ b.lo + adc r8,r8,r4 @ d += T + teq r9,#148 + + ldr r12,[sp,#16+0] @ c.lo +#if __ARM_ARCH__>=7 + it eq @ Thumb2 thing, sanity check in ARM +#endif + orreq r14,r14,#1 + @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) + @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25 + @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25 + mov r9,r5,lsr#28 + mov r10,r6,lsr#28 + eor r9,r9,r6,lsl#4 + eor r10,r10,r5,lsl#4 + eor r9,r9,r6,lsr#2 + eor r10,r10,r5,lsr#2 + eor r9,r9,r5,lsl#30 + eor r10,r10,r6,lsl#30 + eor r9,r9,r6,lsr#7 + eor r10,r10,r5,lsr#7 + eor r9,r9,r5,lsl#25 + eor r10,r10,r6,lsl#25 @ Sigma0(a) + adds r3,r3,r9 + and r9,r5,r11 + adc r4,r4,r10 @ T += Sigma0(a) + + ldr r10,[sp,#8+4] @ b.hi + orr r5,r5,r11 + ldr r11,[sp,#16+4] @ c.hi + and r5,r5,r12 + and r12,r6,r10 + orr r6,r6,r10 + orr r5,r5,r9 @ Maj(a,b,c).lo + and r6,r6,r11 + adds r5,r5,r3 + orr r6,r6,r12 @ Maj(a,b,c).hi + sub sp,sp,#8 + adc r6,r6,r4 @ h += T + tst r14,#1 + add r14,r14,#8 + tst r14,#1 + beq .L00_15 + ldr r9,[sp,#184+0] + ldr r10,[sp,#184+4] + bic r14,r14,#1 +.L16_79: + @ sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7)) + @ LO lo>>1^hi<<31 ^ lo>>8^hi<<24 ^ lo>>7^hi<<25 + @ HI hi>>1^lo<<31 ^ hi>>8^lo<<24 ^ hi>>7 + mov r3,r9,lsr#1 + ldr r11,[sp,#80+0] + mov r4,r10,lsr#1 + ldr r12,[sp,#80+4] + eor r3,r3,r10,lsl#31 + eor r4,r4,r9,lsl#31 + eor r3,r3,r9,lsr#8 + eor r4,r4,r10,lsr#8 + eor r3,r3,r10,lsl#24 + eor r4,r4,r9,lsl#24 + eor r3,r3,r9,lsr#7 + eor r4,r4,r10,lsr#7 + eor r3,r3,r10,lsl#25 + + @ sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6)) + @ LO lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26 + @ HI hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6 + mov r9,r11,lsr#19 + mov r10,r12,lsr#19 + eor r9,r9,r12,lsl#13 + eor r10,r10,r11,lsl#13 + eor r9,r9,r12,lsr#29 + eor r10,r10,r11,lsr#29 + eor r9,r9,r11,lsl#3 + eor r10,r10,r12,lsl#3 + eor r9,r9,r11,lsr#6 + eor r10,r10,r12,lsr#6 + ldr r11,[sp,#120+0] + eor r9,r9,r12,lsl#26 + + ldr r12,[sp,#120+4] + adds r3,r3,r9 + ldr r9,[sp,#192+0] + adc r4,r4,r10 + + ldr r10,[sp,#192+4] + adds r3,r3,r11 + adc r4,r4,r12 + adds r3,r3,r9 + adc r4,r4,r10 + @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41)) + @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23 + @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23 + mov r9,r7,lsr#14 + str r3,[sp,#64+0] + mov r10,r8,lsr#14 + str r4,[sp,#64+4] + eor r9,r9,r8,lsl#18 + ldr r11,[sp,#56+0] @ h.lo + eor r10,r10,r7,lsl#18 + ldr r12,[sp,#56+4] @ h.hi + eor r9,r9,r7,lsr#18 + eor r10,r10,r8,lsr#18 + eor r9,r9,r8,lsl#14 + eor r10,r10,r7,lsl#14 + eor r9,r9,r8,lsr#9 + eor r10,r10,r7,lsr#9 + eor r9,r9,r7,lsl#23 + eor r10,r10,r8,lsl#23 @ Sigma1(e) + adds r3,r3,r9 + ldr r9,[sp,#40+0] @ f.lo + adc r4,r4,r10 @ T += Sigma1(e) + ldr r10,[sp,#40+4] @ f.hi + adds r3,r3,r11 + ldr r11,[sp,#48+0] @ g.lo + adc r4,r4,r12 @ T += h + ldr r12,[sp,#48+4] @ g.hi + + eor r9,r9,r11 + str r7,[sp,#32+0] + eor r10,r10,r12 + str r8,[sp,#32+4] + and r9,r9,r7 + str r5,[sp,#0+0] + and r10,r10,r8 + str r6,[sp,#0+4] + eor r9,r9,r11 + ldr r11,[r14,#LO] @ K[i].lo + eor r10,r10,r12 @ Ch(e,f,g) + ldr r12,[r14,#HI] @ K[i].hi + + adds r3,r3,r9 + ldr r7,[sp,#24+0] @ d.lo + adc r4,r4,r10 @ T += Ch(e,f,g) + ldr r8,[sp,#24+4] @ d.hi + adds r3,r3,r11 + and r9,r11,#0xff + adc r4,r4,r12 @ T += K[i] + adds r7,r7,r3 + ldr r11,[sp,#8+0] @ b.lo + adc r8,r8,r4 @ d += T + teq r9,#23 + + ldr r12,[sp,#16+0] @ c.lo +#if __ARM_ARCH__>=7 + it eq @ Thumb2 thing, sanity check in ARM +#endif + orreq r14,r14,#1 + @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) + @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25 + @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25 + mov r9,r5,lsr#28 + mov r10,r6,lsr#28 + eor r9,r9,r6,lsl#4 + eor r10,r10,r5,lsl#4 + eor r9,r9,r6,lsr#2 + eor r10,r10,r5,lsr#2 + eor r9,r9,r5,lsl#30 + eor r10,r10,r6,lsl#30 + eor r9,r9,r6,lsr#7 + eor r10,r10,r5,lsr#7 + eor r9,r9,r5,lsl#25 + eor r10,r10,r6,lsl#25 @ Sigma0(a) + adds r3,r3,r9 + and r9,r5,r11 + adc r4,r4,r10 @ T += Sigma0(a) + + ldr r10,[sp,#8+4] @ b.hi + orr r5,r5,r11 + ldr r11,[sp,#16+4] @ c.hi + and r5,r5,r12 + and r12,r6,r10 + orr r6,r6,r10 + orr r5,r5,r9 @ Maj(a,b,c).lo + and r6,r6,r11 + adds r5,r5,r3 + orr r6,r6,r12 @ Maj(a,b,c).hi + sub sp,sp,#8 + adc r6,r6,r4 @ h += T + tst r14,#1 + add r14,r14,#8 +#if __ARM_ARCH__>=7 + ittt eq @ Thumb2 thing, sanity check in ARM +#endif + ldreq r9,[sp,#184+0] + ldreq r10,[sp,#184+4] + beq .L16_79 + bic r14,r14,#1 + + ldr r3,[sp,#8+0] + ldr r4,[sp,#8+4] + ldr r9, [r0,#0+LO] + ldr r10, [r0,#0+HI] + ldr r11, [r0,#8+LO] + ldr r12, [r0,#8+HI] + adds r9,r5,r9 + str r9, [r0,#0+LO] + adc r10,r6,r10 + str r10, [r0,#0+HI] + adds r11,r3,r11 + str r11, [r0,#8+LO] + adc r12,r4,r12 + str r12, [r0,#8+HI] + + ldr r5,[sp,#16+0] + ldr r6,[sp,#16+4] + ldr r3,[sp,#24+0] + ldr r4,[sp,#24+4] + ldr r9, [r0,#16+LO] + ldr r10, [r0,#16+HI] + ldr r11, [r0,#24+LO] + ldr r12, [r0,#24+HI] + adds r9,r5,r9 + str r9, [r0,#16+LO] + adc r10,r6,r10 + str r10, [r0,#16+HI] + adds r11,r3,r11 + str r11, [r0,#24+LO] + adc r12,r4,r12 + str r12, [r0,#24+HI] + + ldr r3,[sp,#40+0] + ldr r4,[sp,#40+4] + ldr r9, [r0,#32+LO] + ldr r10, [r0,#32+HI] + ldr r11, [r0,#40+LO] + ldr r12, [r0,#40+HI] + adds r7,r7,r9 + str r7,[r0,#32+LO] + adc r8,r8,r10 + str r8,[r0,#32+HI] + adds r11,r3,r11 + str r11, [r0,#40+LO] + adc r12,r4,r12 + str r12, [r0,#40+HI] + + ldr r5,[sp,#48+0] + ldr r6,[sp,#48+4] + ldr r3,[sp,#56+0] + ldr r4,[sp,#56+4] + ldr r9, [r0,#48+LO] + ldr r10, [r0,#48+HI] + ldr r11, [r0,#56+LO] + ldr r12, [r0,#56+HI] + adds r9,r5,r9 + str r9, [r0,#48+LO] + adc r10,r6,r10 + str r10, [r0,#48+HI] + adds r11,r3,r11 + str r11, [r0,#56+LO] + adc r12,r4,r12 + str r12, [r0,#56+HI] + + add sp,sp,#640 + sub r14,r14,#640 + + teq r1,r2 + bne .Loop + + add sp,sp,#8*9 @ destroy frame +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r12,pc} +#else + ldmia sp!,{r4-r12,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size sha512_block_data_order,.-sha512_block_data_order +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + +.global sha512_block_data_order_neon +.type sha512_block_data_order_neon,%function +.align 4 +sha512_block_data_order_neon: +.LNEON: + dmb @ errata #451034 on early Cortex A8 + add r2,r1,r2,lsl#7 @ len to point at the end of inp + VFP_ABI_PUSH + adrl r3,K512 + vldmia r0,{d16-d23} @ load context +.Loop_neon: + vshr.u64 d24,d20,#14 @ 0 +#if 0<16 + vld1.64 {d0},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d20,#18 +#if 0>0 + vadd.i64 d16,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d20,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d20,#50 + vsli.64 d25,d20,#46 + vmov d29,d20 + vsli.64 d26,d20,#23 +#if 0<16 && defined(__ARMEL__) + vrev64.8 d0,d0 +#endif + veor d25,d24 + vbsl d29,d21,d22 @ Ch(e,f,g) + vshr.u64 d24,d16,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d23 + vshr.u64 d25,d16,#34 + vsli.64 d24,d16,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d16,#39 + vadd.i64 d28,d0 + vsli.64 d25,d16,#30 + veor d30,d16,d17 + vsli.64 d26,d16,#25 + veor d23,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d18,d17 @ Maj(a,b,c) + veor d23,d26 @ Sigma0(a) + vadd.i64 d19,d27 + vadd.i64 d30,d27 + @ vadd.i64 d23,d30 + vshr.u64 d24,d19,#14 @ 1 +#if 1<16 + vld1.64 {d1},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d19,#18 +#if 1>0 + vadd.i64 d23,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d19,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d19,#50 + vsli.64 d25,d19,#46 + vmov d29,d19 + vsli.64 d26,d19,#23 +#if 1<16 && defined(__ARMEL__) + vrev64.8 d1,d1 +#endif + veor d25,d24 + vbsl d29,d20,d21 @ Ch(e,f,g) + vshr.u64 d24,d23,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d22 + vshr.u64 d25,d23,#34 + vsli.64 d24,d23,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d23,#39 + vadd.i64 d28,d1 + vsli.64 d25,d23,#30 + veor d30,d23,d16 + vsli.64 d26,d23,#25 + veor d22,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d17,d16 @ Maj(a,b,c) + veor d22,d26 @ Sigma0(a) + vadd.i64 d18,d27 + vadd.i64 d30,d27 + @ vadd.i64 d22,d30 + vshr.u64 d24,d18,#14 @ 2 +#if 2<16 + vld1.64 {d2},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d18,#18 +#if 2>0 + vadd.i64 d22,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d18,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d18,#50 + vsli.64 d25,d18,#46 + vmov d29,d18 + vsli.64 d26,d18,#23 +#if 2<16 && defined(__ARMEL__) + vrev64.8 d2,d2 +#endif + veor d25,d24 + vbsl d29,d19,d20 @ Ch(e,f,g) + vshr.u64 d24,d22,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d21 + vshr.u64 d25,d22,#34 + vsli.64 d24,d22,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d22,#39 + vadd.i64 d28,d2 + vsli.64 d25,d22,#30 + veor d30,d22,d23 + vsli.64 d26,d22,#25 + veor d21,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d16,d23 @ Maj(a,b,c) + veor d21,d26 @ Sigma0(a) + vadd.i64 d17,d27 + vadd.i64 d30,d27 + @ vadd.i64 d21,d30 + vshr.u64 d24,d17,#14 @ 3 +#if 3<16 + vld1.64 {d3},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d17,#18 +#if 3>0 + vadd.i64 d21,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d17,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d17,#50 + vsli.64 d25,d17,#46 + vmov d29,d17 + vsli.64 d26,d17,#23 +#if 3<16 && defined(__ARMEL__) + vrev64.8 d3,d3 +#endif + veor d25,d24 + vbsl d29,d18,d19 @ Ch(e,f,g) + vshr.u64 d24,d21,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d20 + vshr.u64 d25,d21,#34 + vsli.64 d24,d21,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d21,#39 + vadd.i64 d28,d3 + vsli.64 d25,d21,#30 + veor d30,d21,d22 + vsli.64 d26,d21,#25 + veor d20,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d23,d22 @ Maj(a,b,c) + veor d20,d26 @ Sigma0(a) + vadd.i64 d16,d27 + vadd.i64 d30,d27 + @ vadd.i64 d20,d30 + vshr.u64 d24,d16,#14 @ 4 +#if 4<16 + vld1.64 {d4},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d16,#18 +#if 4>0 + vadd.i64 d20,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d16,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d16,#50 + vsli.64 d25,d16,#46 + vmov d29,d16 + vsli.64 d26,d16,#23 +#if 4<16 && defined(__ARMEL__) + vrev64.8 d4,d4 +#endif + veor d25,d24 + vbsl d29,d17,d18 @ Ch(e,f,g) + vshr.u64 d24,d20,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d19 + vshr.u64 d25,d20,#34 + vsli.64 d24,d20,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d20,#39 + vadd.i64 d28,d4 + vsli.64 d25,d20,#30 + veor d30,d20,d21 + vsli.64 d26,d20,#25 + veor d19,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d22,d21 @ Maj(a,b,c) + veor d19,d26 @ Sigma0(a) + vadd.i64 d23,d27 + vadd.i64 d30,d27 + @ vadd.i64 d19,d30 + vshr.u64 d24,d23,#14 @ 5 +#if 5<16 + vld1.64 {d5},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d23,#18 +#if 5>0 + vadd.i64 d19,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d23,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d23,#50 + vsli.64 d25,d23,#46 + vmov d29,d23 + vsli.64 d26,d23,#23 +#if 5<16 && defined(__ARMEL__) + vrev64.8 d5,d5 +#endif + veor d25,d24 + vbsl d29,d16,d17 @ Ch(e,f,g) + vshr.u64 d24,d19,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d18 + vshr.u64 d25,d19,#34 + vsli.64 d24,d19,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d19,#39 + vadd.i64 d28,d5 + vsli.64 d25,d19,#30 + veor d30,d19,d20 + vsli.64 d26,d19,#25 + veor d18,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d21,d20 @ Maj(a,b,c) + veor d18,d26 @ Sigma0(a) + vadd.i64 d22,d27 + vadd.i64 d30,d27 + @ vadd.i64 d18,d30 + vshr.u64 d24,d22,#14 @ 6 +#if 6<16 + vld1.64 {d6},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d22,#18 +#if 6>0 + vadd.i64 d18,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d22,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d22,#50 + vsli.64 d25,d22,#46 + vmov d29,d22 + vsli.64 d26,d22,#23 +#if 6<16 && defined(__ARMEL__) + vrev64.8 d6,d6 +#endif + veor d25,d24 + vbsl d29,d23,d16 @ Ch(e,f,g) + vshr.u64 d24,d18,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d17 + vshr.u64 d25,d18,#34 + vsli.64 d24,d18,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d18,#39 + vadd.i64 d28,d6 + vsli.64 d25,d18,#30 + veor d30,d18,d19 + vsli.64 d26,d18,#25 + veor d17,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d20,d19 @ Maj(a,b,c) + veor d17,d26 @ Sigma0(a) + vadd.i64 d21,d27 + vadd.i64 d30,d27 + @ vadd.i64 d17,d30 + vshr.u64 d24,d21,#14 @ 7 +#if 7<16 + vld1.64 {d7},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d21,#18 +#if 7>0 + vadd.i64 d17,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d21,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d21,#50 + vsli.64 d25,d21,#46 + vmov d29,d21 + vsli.64 d26,d21,#23 +#if 7<16 && defined(__ARMEL__) + vrev64.8 d7,d7 +#endif + veor d25,d24 + vbsl d29,d22,d23 @ Ch(e,f,g) + vshr.u64 d24,d17,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d16 + vshr.u64 d25,d17,#34 + vsli.64 d24,d17,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d17,#39 + vadd.i64 d28,d7 + vsli.64 d25,d17,#30 + veor d30,d17,d18 + vsli.64 d26,d17,#25 + veor d16,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d19,d18 @ Maj(a,b,c) + veor d16,d26 @ Sigma0(a) + vadd.i64 d20,d27 + vadd.i64 d30,d27 + @ vadd.i64 d16,d30 + vshr.u64 d24,d20,#14 @ 8 +#if 8<16 + vld1.64 {d8},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d20,#18 +#if 8>0 + vadd.i64 d16,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d20,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d20,#50 + vsli.64 d25,d20,#46 + vmov d29,d20 + vsli.64 d26,d20,#23 +#if 8<16 && defined(__ARMEL__) + vrev64.8 d8,d8 +#endif + veor d25,d24 + vbsl d29,d21,d22 @ Ch(e,f,g) + vshr.u64 d24,d16,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d23 + vshr.u64 d25,d16,#34 + vsli.64 d24,d16,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d16,#39 + vadd.i64 d28,d8 + vsli.64 d25,d16,#30 + veor d30,d16,d17 + vsli.64 d26,d16,#25 + veor d23,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d18,d17 @ Maj(a,b,c) + veor d23,d26 @ Sigma0(a) + vadd.i64 d19,d27 + vadd.i64 d30,d27 + @ vadd.i64 d23,d30 + vshr.u64 d24,d19,#14 @ 9 +#if 9<16 + vld1.64 {d9},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d19,#18 +#if 9>0 + vadd.i64 d23,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d19,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d19,#50 + vsli.64 d25,d19,#46 + vmov d29,d19 + vsli.64 d26,d19,#23 +#if 9<16 && defined(__ARMEL__) + vrev64.8 d9,d9 +#endif + veor d25,d24 + vbsl d29,d20,d21 @ Ch(e,f,g) + vshr.u64 d24,d23,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d22 + vshr.u64 d25,d23,#34 + vsli.64 d24,d23,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d23,#39 + vadd.i64 d28,d9 + vsli.64 d25,d23,#30 + veor d30,d23,d16 + vsli.64 d26,d23,#25 + veor d22,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d17,d16 @ Maj(a,b,c) + veor d22,d26 @ Sigma0(a) + vadd.i64 d18,d27 + vadd.i64 d30,d27 + @ vadd.i64 d22,d30 + vshr.u64 d24,d18,#14 @ 10 +#if 10<16 + vld1.64 {d10},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d18,#18 +#if 10>0 + vadd.i64 d22,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d18,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d18,#50 + vsli.64 d25,d18,#46 + vmov d29,d18 + vsli.64 d26,d18,#23 +#if 10<16 && defined(__ARMEL__) + vrev64.8 d10,d10 +#endif + veor d25,d24 + vbsl d29,d19,d20 @ Ch(e,f,g) + vshr.u64 d24,d22,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d21 + vshr.u64 d25,d22,#34 + vsli.64 d24,d22,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d22,#39 + vadd.i64 d28,d10 + vsli.64 d25,d22,#30 + veor d30,d22,d23 + vsli.64 d26,d22,#25 + veor d21,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d16,d23 @ Maj(a,b,c) + veor d21,d26 @ Sigma0(a) + vadd.i64 d17,d27 + vadd.i64 d30,d27 + @ vadd.i64 d21,d30 + vshr.u64 d24,d17,#14 @ 11 +#if 11<16 + vld1.64 {d11},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d17,#18 +#if 11>0 + vadd.i64 d21,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d17,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d17,#50 + vsli.64 d25,d17,#46 + vmov d29,d17 + vsli.64 d26,d17,#23 +#if 11<16 && defined(__ARMEL__) + vrev64.8 d11,d11 +#endif + veor d25,d24 + vbsl d29,d18,d19 @ Ch(e,f,g) + vshr.u64 d24,d21,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d20 + vshr.u64 d25,d21,#34 + vsli.64 d24,d21,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d21,#39 + vadd.i64 d28,d11 + vsli.64 d25,d21,#30 + veor d30,d21,d22 + vsli.64 d26,d21,#25 + veor d20,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d23,d22 @ Maj(a,b,c) + veor d20,d26 @ Sigma0(a) + vadd.i64 d16,d27 + vadd.i64 d30,d27 + @ vadd.i64 d20,d30 + vshr.u64 d24,d16,#14 @ 12 +#if 12<16 + vld1.64 {d12},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d16,#18 +#if 12>0 + vadd.i64 d20,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d16,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d16,#50 + vsli.64 d25,d16,#46 + vmov d29,d16 + vsli.64 d26,d16,#23 +#if 12<16 && defined(__ARMEL__) + vrev64.8 d12,d12 +#endif + veor d25,d24 + vbsl d29,d17,d18 @ Ch(e,f,g) + vshr.u64 d24,d20,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d19 + vshr.u64 d25,d20,#34 + vsli.64 d24,d20,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d20,#39 + vadd.i64 d28,d12 + vsli.64 d25,d20,#30 + veor d30,d20,d21 + vsli.64 d26,d20,#25 + veor d19,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d22,d21 @ Maj(a,b,c) + veor d19,d26 @ Sigma0(a) + vadd.i64 d23,d27 + vadd.i64 d30,d27 + @ vadd.i64 d19,d30 + vshr.u64 d24,d23,#14 @ 13 +#if 13<16 + vld1.64 {d13},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d23,#18 +#if 13>0 + vadd.i64 d19,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d23,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d23,#50 + vsli.64 d25,d23,#46 + vmov d29,d23 + vsli.64 d26,d23,#23 +#if 13<16 && defined(__ARMEL__) + vrev64.8 d13,d13 +#endif + veor d25,d24 + vbsl d29,d16,d17 @ Ch(e,f,g) + vshr.u64 d24,d19,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d18 + vshr.u64 d25,d19,#34 + vsli.64 d24,d19,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d19,#39 + vadd.i64 d28,d13 + vsli.64 d25,d19,#30 + veor d30,d19,d20 + vsli.64 d26,d19,#25 + veor d18,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d21,d20 @ Maj(a,b,c) + veor d18,d26 @ Sigma0(a) + vadd.i64 d22,d27 + vadd.i64 d30,d27 + @ vadd.i64 d18,d30 + vshr.u64 d24,d22,#14 @ 14 +#if 14<16 + vld1.64 {d14},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d22,#18 +#if 14>0 + vadd.i64 d18,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d22,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d22,#50 + vsli.64 d25,d22,#46 + vmov d29,d22 + vsli.64 d26,d22,#23 +#if 14<16 && defined(__ARMEL__) + vrev64.8 d14,d14 +#endif + veor d25,d24 + vbsl d29,d23,d16 @ Ch(e,f,g) + vshr.u64 d24,d18,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d17 + vshr.u64 d25,d18,#34 + vsli.64 d24,d18,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d18,#39 + vadd.i64 d28,d14 + vsli.64 d25,d18,#30 + veor d30,d18,d19 + vsli.64 d26,d18,#25 + veor d17,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d20,d19 @ Maj(a,b,c) + veor d17,d26 @ Sigma0(a) + vadd.i64 d21,d27 + vadd.i64 d30,d27 + @ vadd.i64 d17,d30 + vshr.u64 d24,d21,#14 @ 15 +#if 15<16 + vld1.64 {d15},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d21,#18 +#if 15>0 + vadd.i64 d17,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d21,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d21,#50 + vsli.64 d25,d21,#46 + vmov d29,d21 + vsli.64 d26,d21,#23 +#if 15<16 && defined(__ARMEL__) + vrev64.8 d15,d15 +#endif + veor d25,d24 + vbsl d29,d22,d23 @ Ch(e,f,g) + vshr.u64 d24,d17,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d16 + vshr.u64 d25,d17,#34 + vsli.64 d24,d17,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d17,#39 + vadd.i64 d28,d15 + vsli.64 d25,d17,#30 + veor d30,d17,d18 + vsli.64 d26,d17,#25 + veor d16,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d19,d18 @ Maj(a,b,c) + veor d16,d26 @ Sigma0(a) + vadd.i64 d20,d27 + vadd.i64 d30,d27 + @ vadd.i64 d16,d30 + mov r12,#4 +.L16_79_neon: + subs r12,#1 + vshr.u64 q12,q7,#19 + vshr.u64 q13,q7,#61 + vadd.i64 d16,d30 @ h+=Maj from the past + vshr.u64 q15,q7,#6 + vsli.64 q12,q7,#45 + vext.8 q14,q0,q1,#8 @ X[i+1] + vsli.64 q13,q7,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q0,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q4,q5,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d20,#14 @ from NEON_00_15 + vadd.i64 q0,q14 + vshr.u64 d25,d20,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d20,#41 @ from NEON_00_15 + vadd.i64 q0,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d20,#50 + vsli.64 d25,d20,#46 + vmov d29,d20 + vsli.64 d26,d20,#23 +#if 16<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d21,d22 @ Ch(e,f,g) + vshr.u64 d24,d16,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d23 + vshr.u64 d25,d16,#34 + vsli.64 d24,d16,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d16,#39 + vadd.i64 d28,d0 + vsli.64 d25,d16,#30 + veor d30,d16,d17 + vsli.64 d26,d16,#25 + veor d23,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d18,d17 @ Maj(a,b,c) + veor d23,d26 @ Sigma0(a) + vadd.i64 d19,d27 + vadd.i64 d30,d27 + @ vadd.i64 d23,d30 + vshr.u64 d24,d19,#14 @ 17 +#if 17<16 + vld1.64 {d1},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d19,#18 +#if 17>0 + vadd.i64 d23,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d19,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d19,#50 + vsli.64 d25,d19,#46 + vmov d29,d19 + vsli.64 d26,d19,#23 +#if 17<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d20,d21 @ Ch(e,f,g) + vshr.u64 d24,d23,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d22 + vshr.u64 d25,d23,#34 + vsli.64 d24,d23,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d23,#39 + vadd.i64 d28,d1 + vsli.64 d25,d23,#30 + veor d30,d23,d16 + vsli.64 d26,d23,#25 + veor d22,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d17,d16 @ Maj(a,b,c) + veor d22,d26 @ Sigma0(a) + vadd.i64 d18,d27 + vadd.i64 d30,d27 + @ vadd.i64 d22,d30 + vshr.u64 q12,q0,#19 + vshr.u64 q13,q0,#61 + vadd.i64 d22,d30 @ h+=Maj from the past + vshr.u64 q15,q0,#6 + vsli.64 q12,q0,#45 + vext.8 q14,q1,q2,#8 @ X[i+1] + vsli.64 q13,q0,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q1,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q5,q6,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d18,#14 @ from NEON_00_15 + vadd.i64 q1,q14 + vshr.u64 d25,d18,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d18,#41 @ from NEON_00_15 + vadd.i64 q1,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d18,#50 + vsli.64 d25,d18,#46 + vmov d29,d18 + vsli.64 d26,d18,#23 +#if 18<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d19,d20 @ Ch(e,f,g) + vshr.u64 d24,d22,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d21 + vshr.u64 d25,d22,#34 + vsli.64 d24,d22,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d22,#39 + vadd.i64 d28,d2 + vsli.64 d25,d22,#30 + veor d30,d22,d23 + vsli.64 d26,d22,#25 + veor d21,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d16,d23 @ Maj(a,b,c) + veor d21,d26 @ Sigma0(a) + vadd.i64 d17,d27 + vadd.i64 d30,d27 + @ vadd.i64 d21,d30 + vshr.u64 d24,d17,#14 @ 19 +#if 19<16 + vld1.64 {d3},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d17,#18 +#if 19>0 + vadd.i64 d21,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d17,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d17,#50 + vsli.64 d25,d17,#46 + vmov d29,d17 + vsli.64 d26,d17,#23 +#if 19<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d18,d19 @ Ch(e,f,g) + vshr.u64 d24,d21,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d20 + vshr.u64 d25,d21,#34 + vsli.64 d24,d21,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d21,#39 + vadd.i64 d28,d3 + vsli.64 d25,d21,#30 + veor d30,d21,d22 + vsli.64 d26,d21,#25 + veor d20,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d23,d22 @ Maj(a,b,c) + veor d20,d26 @ Sigma0(a) + vadd.i64 d16,d27 + vadd.i64 d30,d27 + @ vadd.i64 d20,d30 + vshr.u64 q12,q1,#19 + vshr.u64 q13,q1,#61 + vadd.i64 d20,d30 @ h+=Maj from the past + vshr.u64 q15,q1,#6 + vsli.64 q12,q1,#45 + vext.8 q14,q2,q3,#8 @ X[i+1] + vsli.64 q13,q1,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q2,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q6,q7,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d16,#14 @ from NEON_00_15 + vadd.i64 q2,q14 + vshr.u64 d25,d16,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d16,#41 @ from NEON_00_15 + vadd.i64 q2,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d16,#50 + vsli.64 d25,d16,#46 + vmov d29,d16 + vsli.64 d26,d16,#23 +#if 20<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d17,d18 @ Ch(e,f,g) + vshr.u64 d24,d20,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d19 + vshr.u64 d25,d20,#34 + vsli.64 d24,d20,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d20,#39 + vadd.i64 d28,d4 + vsli.64 d25,d20,#30 + veor d30,d20,d21 + vsli.64 d26,d20,#25 + veor d19,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d22,d21 @ Maj(a,b,c) + veor d19,d26 @ Sigma0(a) + vadd.i64 d23,d27 + vadd.i64 d30,d27 + @ vadd.i64 d19,d30 + vshr.u64 d24,d23,#14 @ 21 +#if 21<16 + vld1.64 {d5},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d23,#18 +#if 21>0 + vadd.i64 d19,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d23,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d23,#50 + vsli.64 d25,d23,#46 + vmov d29,d23 + vsli.64 d26,d23,#23 +#if 21<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d16,d17 @ Ch(e,f,g) + vshr.u64 d24,d19,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d18 + vshr.u64 d25,d19,#34 + vsli.64 d24,d19,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d19,#39 + vadd.i64 d28,d5 + vsli.64 d25,d19,#30 + veor d30,d19,d20 + vsli.64 d26,d19,#25 + veor d18,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d21,d20 @ Maj(a,b,c) + veor d18,d26 @ Sigma0(a) + vadd.i64 d22,d27 + vadd.i64 d30,d27 + @ vadd.i64 d18,d30 + vshr.u64 q12,q2,#19 + vshr.u64 q13,q2,#61 + vadd.i64 d18,d30 @ h+=Maj from the past + vshr.u64 q15,q2,#6 + vsli.64 q12,q2,#45 + vext.8 q14,q3,q4,#8 @ X[i+1] + vsli.64 q13,q2,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q3,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q7,q0,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d22,#14 @ from NEON_00_15 + vadd.i64 q3,q14 + vshr.u64 d25,d22,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d22,#41 @ from NEON_00_15 + vadd.i64 q3,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d22,#50 + vsli.64 d25,d22,#46 + vmov d29,d22 + vsli.64 d26,d22,#23 +#if 22<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d23,d16 @ Ch(e,f,g) + vshr.u64 d24,d18,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d17 + vshr.u64 d25,d18,#34 + vsli.64 d24,d18,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d18,#39 + vadd.i64 d28,d6 + vsli.64 d25,d18,#30 + veor d30,d18,d19 + vsli.64 d26,d18,#25 + veor d17,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d20,d19 @ Maj(a,b,c) + veor d17,d26 @ Sigma0(a) + vadd.i64 d21,d27 + vadd.i64 d30,d27 + @ vadd.i64 d17,d30 + vshr.u64 d24,d21,#14 @ 23 +#if 23<16 + vld1.64 {d7},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d21,#18 +#if 23>0 + vadd.i64 d17,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d21,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d21,#50 + vsli.64 d25,d21,#46 + vmov d29,d21 + vsli.64 d26,d21,#23 +#if 23<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d22,d23 @ Ch(e,f,g) + vshr.u64 d24,d17,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d16 + vshr.u64 d25,d17,#34 + vsli.64 d24,d17,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d17,#39 + vadd.i64 d28,d7 + vsli.64 d25,d17,#30 + veor d30,d17,d18 + vsli.64 d26,d17,#25 + veor d16,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d19,d18 @ Maj(a,b,c) + veor d16,d26 @ Sigma0(a) + vadd.i64 d20,d27 + vadd.i64 d30,d27 + @ vadd.i64 d16,d30 + vshr.u64 q12,q3,#19 + vshr.u64 q13,q3,#61 + vadd.i64 d16,d30 @ h+=Maj from the past + vshr.u64 q15,q3,#6 + vsli.64 q12,q3,#45 + vext.8 q14,q4,q5,#8 @ X[i+1] + vsli.64 q13,q3,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q4,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q0,q1,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d20,#14 @ from NEON_00_15 + vadd.i64 q4,q14 + vshr.u64 d25,d20,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d20,#41 @ from NEON_00_15 + vadd.i64 q4,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d20,#50 + vsli.64 d25,d20,#46 + vmov d29,d20 + vsli.64 d26,d20,#23 +#if 24<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d21,d22 @ Ch(e,f,g) + vshr.u64 d24,d16,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d23 + vshr.u64 d25,d16,#34 + vsli.64 d24,d16,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d16,#39 + vadd.i64 d28,d8 + vsli.64 d25,d16,#30 + veor d30,d16,d17 + vsli.64 d26,d16,#25 + veor d23,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d18,d17 @ Maj(a,b,c) + veor d23,d26 @ Sigma0(a) + vadd.i64 d19,d27 + vadd.i64 d30,d27 + @ vadd.i64 d23,d30 + vshr.u64 d24,d19,#14 @ 25 +#if 25<16 + vld1.64 {d9},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d19,#18 +#if 25>0 + vadd.i64 d23,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d19,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d19,#50 + vsli.64 d25,d19,#46 + vmov d29,d19 + vsli.64 d26,d19,#23 +#if 25<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d20,d21 @ Ch(e,f,g) + vshr.u64 d24,d23,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d22 + vshr.u64 d25,d23,#34 + vsli.64 d24,d23,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d23,#39 + vadd.i64 d28,d9 + vsli.64 d25,d23,#30 + veor d30,d23,d16 + vsli.64 d26,d23,#25 + veor d22,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d17,d16 @ Maj(a,b,c) + veor d22,d26 @ Sigma0(a) + vadd.i64 d18,d27 + vadd.i64 d30,d27 + @ vadd.i64 d22,d30 + vshr.u64 q12,q4,#19 + vshr.u64 q13,q4,#61 + vadd.i64 d22,d30 @ h+=Maj from the past + vshr.u64 q15,q4,#6 + vsli.64 q12,q4,#45 + vext.8 q14,q5,q6,#8 @ X[i+1] + vsli.64 q13,q4,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q5,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q1,q2,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d18,#14 @ from NEON_00_15 + vadd.i64 q5,q14 + vshr.u64 d25,d18,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d18,#41 @ from NEON_00_15 + vadd.i64 q5,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d18,#50 + vsli.64 d25,d18,#46 + vmov d29,d18 + vsli.64 d26,d18,#23 +#if 26<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d19,d20 @ Ch(e,f,g) + vshr.u64 d24,d22,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d21 + vshr.u64 d25,d22,#34 + vsli.64 d24,d22,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d22,#39 + vadd.i64 d28,d10 + vsli.64 d25,d22,#30 + veor d30,d22,d23 + vsli.64 d26,d22,#25 + veor d21,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d16,d23 @ Maj(a,b,c) + veor d21,d26 @ Sigma0(a) + vadd.i64 d17,d27 + vadd.i64 d30,d27 + @ vadd.i64 d21,d30 + vshr.u64 d24,d17,#14 @ 27 +#if 27<16 + vld1.64 {d11},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d17,#18 +#if 27>0 + vadd.i64 d21,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d17,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d17,#50 + vsli.64 d25,d17,#46 + vmov d29,d17 + vsli.64 d26,d17,#23 +#if 27<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d18,d19 @ Ch(e,f,g) + vshr.u64 d24,d21,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d20 + vshr.u64 d25,d21,#34 + vsli.64 d24,d21,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d21,#39 + vadd.i64 d28,d11 + vsli.64 d25,d21,#30 + veor d30,d21,d22 + vsli.64 d26,d21,#25 + veor d20,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d23,d22 @ Maj(a,b,c) + veor d20,d26 @ Sigma0(a) + vadd.i64 d16,d27 + vadd.i64 d30,d27 + @ vadd.i64 d20,d30 + vshr.u64 q12,q5,#19 + vshr.u64 q13,q5,#61 + vadd.i64 d20,d30 @ h+=Maj from the past + vshr.u64 q15,q5,#6 + vsli.64 q12,q5,#45 + vext.8 q14,q6,q7,#8 @ X[i+1] + vsli.64 q13,q5,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q6,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q2,q3,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d16,#14 @ from NEON_00_15 + vadd.i64 q6,q14 + vshr.u64 d25,d16,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d16,#41 @ from NEON_00_15 + vadd.i64 q6,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d16,#50 + vsli.64 d25,d16,#46 + vmov d29,d16 + vsli.64 d26,d16,#23 +#if 28<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d17,d18 @ Ch(e,f,g) + vshr.u64 d24,d20,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d19 + vshr.u64 d25,d20,#34 + vsli.64 d24,d20,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d20,#39 + vadd.i64 d28,d12 + vsli.64 d25,d20,#30 + veor d30,d20,d21 + vsli.64 d26,d20,#25 + veor d19,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d22,d21 @ Maj(a,b,c) + veor d19,d26 @ Sigma0(a) + vadd.i64 d23,d27 + vadd.i64 d30,d27 + @ vadd.i64 d19,d30 + vshr.u64 d24,d23,#14 @ 29 +#if 29<16 + vld1.64 {d13},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d23,#18 +#if 29>0 + vadd.i64 d19,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d23,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d23,#50 + vsli.64 d25,d23,#46 + vmov d29,d23 + vsli.64 d26,d23,#23 +#if 29<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d16,d17 @ Ch(e,f,g) + vshr.u64 d24,d19,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d18 + vshr.u64 d25,d19,#34 + vsli.64 d24,d19,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d19,#39 + vadd.i64 d28,d13 + vsli.64 d25,d19,#30 + veor d30,d19,d20 + vsli.64 d26,d19,#25 + veor d18,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d21,d20 @ Maj(a,b,c) + veor d18,d26 @ Sigma0(a) + vadd.i64 d22,d27 + vadd.i64 d30,d27 + @ vadd.i64 d18,d30 + vshr.u64 q12,q6,#19 + vshr.u64 q13,q6,#61 + vadd.i64 d18,d30 @ h+=Maj from the past + vshr.u64 q15,q6,#6 + vsli.64 q12,q6,#45 + vext.8 q14,q7,q0,#8 @ X[i+1] + vsli.64 q13,q6,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q7,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q3,q4,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d22,#14 @ from NEON_00_15 + vadd.i64 q7,q14 + vshr.u64 d25,d22,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d22,#41 @ from NEON_00_15 + vadd.i64 q7,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d22,#50 + vsli.64 d25,d22,#46 + vmov d29,d22 + vsli.64 d26,d22,#23 +#if 30<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d23,d16 @ Ch(e,f,g) + vshr.u64 d24,d18,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d17 + vshr.u64 d25,d18,#34 + vsli.64 d24,d18,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d18,#39 + vadd.i64 d28,d14 + vsli.64 d25,d18,#30 + veor d30,d18,d19 + vsli.64 d26,d18,#25 + veor d17,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d20,d19 @ Maj(a,b,c) + veor d17,d26 @ Sigma0(a) + vadd.i64 d21,d27 + vadd.i64 d30,d27 + @ vadd.i64 d17,d30 + vshr.u64 d24,d21,#14 @ 31 +#if 31<16 + vld1.64 {d15},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d21,#18 +#if 31>0 + vadd.i64 d17,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d21,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d21,#50 + vsli.64 d25,d21,#46 + vmov d29,d21 + vsli.64 d26,d21,#23 +#if 31<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d22,d23 @ Ch(e,f,g) + vshr.u64 d24,d17,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d16 + vshr.u64 d25,d17,#34 + vsli.64 d24,d17,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d17,#39 + vadd.i64 d28,d15 + vsli.64 d25,d17,#30 + veor d30,d17,d18 + vsli.64 d26,d17,#25 + veor d16,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d19,d18 @ Maj(a,b,c) + veor d16,d26 @ Sigma0(a) + vadd.i64 d20,d27 + vadd.i64 d30,d27 + @ vadd.i64 d16,d30 + bne .L16_79_neon + + vadd.i64 d16,d30 @ h+=Maj from the past + vldmia r0,{d24-d31} @ load context to temp + vadd.i64 q8,q12 @ vectorized accumulate + vadd.i64 q9,q13 + vadd.i64 q10,q14 + vadd.i64 q11,q15 + vstmia r0,{d16-d23} @ save context + teq r1,r2 + sub r3,#640 @ rewind K512 + bne .Loop_neon + + VFP_ABI_POP + bx lr @ .word 0xe12fff1e +.size sha512_block_data_order_neon,.-sha512_block_data_order_neon +#endif +.asciz "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro@openssl.org>" +.align 2 +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) +.comm OPENSSL_armcap_P,4,4 +#endif diff --git a/arch/arm/crypto/sha512-glue.c b/arch/arm/crypto/sha512-glue.c new file mode 100644 index 000000000000..269a394e4a53 --- /dev/null +++ b/arch/arm/crypto/sha512-glue.c @@ -0,0 +1,121 @@ +/* + * sha512-glue.c - accelerated SHA-384/512 for ARM + * + * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <crypto/internal/hash.h> +#include <crypto/sha.h> +#include <crypto/sha512_base.h> +#include <linux/crypto.h> +#include <linux/module.h> + +#include <asm/hwcap.h> +#include <asm/neon.h> + +#include "sha512.h" + +MODULE_DESCRIPTION("Accelerated SHA-384/SHA-512 secure hash for ARM"); +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_LICENSE("GPL v2"); + +MODULE_ALIAS_CRYPTO("sha384"); +MODULE_ALIAS_CRYPTO("sha512"); +MODULE_ALIAS_CRYPTO("sha384-arm"); +MODULE_ALIAS_CRYPTO("sha512-arm"); + +asmlinkage void sha512_block_data_order(u64 *state, u8 const *src, int blocks); + +int sha512_arm_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + return sha512_base_do_update(desc, data, len, + (sha512_block_fn *)sha512_block_data_order); +} + +int sha512_arm_final(struct shash_desc *desc, u8 *out) +{ + sha512_base_do_finalize(desc, + (sha512_block_fn *)sha512_block_data_order); + return sha512_base_finish(desc, out); +} + +int sha512_arm_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + sha512_base_do_update(desc, data, len, + (sha512_block_fn *)sha512_block_data_order); + return sha512_arm_final(desc, out); +} + +static struct shash_alg sha512_arm_algs[] = { { + .init = sha384_base_init, + .update = sha512_arm_update, + .final = sha512_arm_final, + .finup = sha512_arm_finup, + .descsize = sizeof(struct sha512_state), + .digestsize = SHA384_DIGEST_SIZE, + .base = { + .cra_name = "sha384", + .cra_driver_name = "sha384-arm", + .cra_priority = 250, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA512_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}, { + .init = sha512_base_init, + .update = sha512_arm_update, + .final = sha512_arm_final, + .finup = sha512_arm_finup, + .descsize = sizeof(struct sha512_state), + .digestsize = SHA512_DIGEST_SIZE, + .base = { + .cra_name = "sha512", + .cra_driver_name = "sha512-arm", + .cra_priority = 250, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA512_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +} }; + +static int __init sha512_arm_mod_init(void) +{ + int err; + + err = crypto_register_shashes(sha512_arm_algs, + ARRAY_SIZE(sha512_arm_algs)); + if (err) + return err; + + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon()) { + err = crypto_register_shashes(sha512_neon_algs, + ARRAY_SIZE(sha512_neon_algs)); + if (err) + goto err_unregister; + } + return 0; + +err_unregister: + crypto_unregister_shashes(sha512_arm_algs, + ARRAY_SIZE(sha512_arm_algs)); + + return err; +} + +static void __exit sha512_arm_mod_fini(void) +{ + crypto_unregister_shashes(sha512_arm_algs, + ARRAY_SIZE(sha512_arm_algs)); + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon()) + crypto_unregister_shashes(sha512_neon_algs, + ARRAY_SIZE(sha512_neon_algs)); +} + +module_init(sha512_arm_mod_init); +module_exit(sha512_arm_mod_fini); diff --git a/arch/arm/crypto/sha512-neon-glue.c b/arch/arm/crypto/sha512-neon-glue.c new file mode 100644 index 000000000000..32693684a3ab --- /dev/null +++ b/arch/arm/crypto/sha512-neon-glue.c @@ -0,0 +1,98 @@ +/* + * sha512-neon-glue.c - accelerated SHA-384/512 for ARM NEON + * + * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <crypto/internal/hash.h> +#include <crypto/sha.h> +#include <crypto/sha512_base.h> +#include <linux/crypto.h> +#include <linux/module.h> + +#include <asm/simd.h> +#include <asm/neon.h> + +#include "sha512.h" + +MODULE_ALIAS_CRYPTO("sha384-neon"); +MODULE_ALIAS_CRYPTO("sha512-neon"); + +asmlinkage void sha512_block_data_order_neon(u64 *state, u8 const *src, + int blocks); + +static int sha512_neon_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + + if (!may_use_simd() || + (sctx->count[0] % SHA512_BLOCK_SIZE) + len < SHA512_BLOCK_SIZE) + return sha512_arm_update(desc, data, len); + + kernel_neon_begin(); + sha512_base_do_update(desc, data, len, + (sha512_block_fn *)sha512_block_data_order_neon); + kernel_neon_end(); + + return 0; +} + +static int sha512_neon_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + if (!may_use_simd()) + return sha512_arm_finup(desc, data, len, out); + + kernel_neon_begin(); + if (len) + sha512_base_do_update(desc, data, len, + (sha512_block_fn *)sha512_block_data_order_neon); + sha512_base_do_finalize(desc, + (sha512_block_fn *)sha512_block_data_order_neon); + kernel_neon_end(); + + return sha512_base_finish(desc, out); +} + +static int sha512_neon_final(struct shash_desc *desc, u8 *out) +{ + return sha512_neon_finup(desc, NULL, 0, out); +} + +struct shash_alg sha512_neon_algs[] = { { + .init = sha384_base_init, + .update = sha512_neon_update, + .final = sha512_neon_final, + .finup = sha512_neon_finup, + .descsize = sizeof(struct sha512_state), + .digestsize = SHA384_DIGEST_SIZE, + .base = { + .cra_name = "sha384", + .cra_driver_name = "sha384-neon", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA384_BLOCK_SIZE, + .cra_module = THIS_MODULE, + + } +}, { + .init = sha512_base_init, + .update = sha512_neon_update, + .final = sha512_neon_final, + .finup = sha512_neon_finup, + .descsize = sizeof(struct sha512_state), + .digestsize = SHA512_DIGEST_SIZE, + .base = { + .cra_name = "sha512", + .cra_driver_name = "sha512-neon", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA512_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +} }; diff --git a/arch/arm/crypto/sha512.h b/arch/arm/crypto/sha512.h new file mode 100644 index 000000000000..a75d9a82988a --- /dev/null +++ b/arch/arm/crypto/sha512.h @@ -0,0 +1,8 @@ + +int sha512_arm_update(struct shash_desc *desc, const u8 *data, + unsigned int len); + +int sha512_arm_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out); + +extern struct shash_alg sha512_neon_algs[2]; diff --git a/arch/arm/crypto/sha512_neon_glue.c b/arch/arm/crypto/sha512_neon_glue.c deleted file mode 100644 index b124dce838d6..000000000000 --- a/arch/arm/crypto/sha512_neon_glue.c +++ /dev/null @@ -1,305 +0,0 @@ -/* - * Glue code for the SHA512 Secure Hash Algorithm assembly implementation - * using NEON instructions. - * - * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> - * - * This file is based on sha512_ssse3_glue.c: - * Copyright (C) 2013 Intel Corporation - * Author: Tim Chen <tim.c.chen@linux.intel.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - */ - -#include <crypto/internal/hash.h> -#include <linux/init.h> -#include <linux/module.h> -#include <linux/mm.h> -#include <linux/cryptohash.h> -#include <linux/types.h> -#include <linux/string.h> -#include <crypto/sha.h> -#include <asm/byteorder.h> -#include <asm/simd.h> -#include <asm/neon.h> - - -static const u64 sha512_k[] = { - 0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL, - 0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL, - 0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL, - 0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL, - 0xd807aa98a3030242ULL, 0x12835b0145706fbeULL, - 0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL, - 0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL, - 0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL, - 0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL, - 0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL, - 0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL, - 0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL, - 0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL, - 0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL, - 0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL, - 0x06ca6351e003826fULL, 0x142929670a0e6e70ULL, - 0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL, - 0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL, - 0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL, - 0x81c2c92e47edaee6ULL, 0x92722c851482353bULL, - 0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL, - 0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL, - 0xd192e819d6ef5218ULL, 0xd69906245565a910ULL, - 0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL, - 0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL, - 0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL, - 0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL, - 0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL, - 0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL, - 0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL, - 0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL, - 0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL, - 0xca273eceea26619cULL, 0xd186b8c721c0c207ULL, - 0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL, - 0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL, - 0x113f9804bef90daeULL, 0x1b710b35131c471bULL, - 0x28db77f523047d84ULL, 0x32caab7b40c72493ULL, - 0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL, - 0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL, - 0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL -}; - - -asmlinkage void sha512_transform_neon(u64 *digest, const void *data, - const u64 k[], unsigned int num_blks); - - -static int sha512_neon_init(struct shash_desc *desc) -{ - struct sha512_state *sctx = shash_desc_ctx(desc); - - sctx->state[0] = SHA512_H0; - sctx->state[1] = SHA512_H1; - sctx->state[2] = SHA512_H2; - sctx->state[3] = SHA512_H3; - sctx->state[4] = SHA512_H4; - sctx->state[5] = SHA512_H5; - sctx->state[6] = SHA512_H6; - sctx->state[7] = SHA512_H7; - sctx->count[0] = sctx->count[1] = 0; - - return 0; -} - -static int __sha512_neon_update(struct shash_desc *desc, const u8 *data, - unsigned int len, unsigned int partial) -{ - struct sha512_state *sctx = shash_desc_ctx(desc); - unsigned int done = 0; - - sctx->count[0] += len; - if (sctx->count[0] < len) - sctx->count[1]++; - - if (partial) { - done = SHA512_BLOCK_SIZE - partial; - memcpy(sctx->buf + partial, data, done); - sha512_transform_neon(sctx->state, sctx->buf, sha512_k, 1); - } - - if (len - done >= SHA512_BLOCK_SIZE) { - const unsigned int rounds = (len - done) / SHA512_BLOCK_SIZE; - - sha512_transform_neon(sctx->state, data + done, sha512_k, - rounds); - - done += rounds * SHA512_BLOCK_SIZE; - } - - memcpy(sctx->buf, data + done, len - done); - - return 0; -} - -static int sha512_neon_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - struct sha512_state *sctx = shash_desc_ctx(desc); - unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE; - int res; - - /* Handle the fast case right here */ - if (partial + len < SHA512_BLOCK_SIZE) { - sctx->count[0] += len; - if (sctx->count[0] < len) - sctx->count[1]++; - memcpy(sctx->buf + partial, data, len); - - return 0; - } - - if (!may_use_simd()) { - res = crypto_sha512_update(desc, data, len); - } else { - kernel_neon_begin(); - res = __sha512_neon_update(desc, data, len, partial); - kernel_neon_end(); - } - - return res; -} - - -/* Add padding and return the message digest. */ -static int sha512_neon_final(struct shash_desc *desc, u8 *out) -{ - struct sha512_state *sctx = shash_desc_ctx(desc); - unsigned int i, index, padlen; - __be64 *dst = (__be64 *)out; - __be64 bits[2]; - static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, }; - - /* save number of bits */ - bits[1] = cpu_to_be64(sctx->count[0] << 3); - bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61); - - /* Pad out to 112 mod 128 and append length */ - index = sctx->count[0] & 0x7f; - padlen = (index < 112) ? (112 - index) : ((128+112) - index); - - if (!may_use_simd()) { - crypto_sha512_update(desc, padding, padlen); - crypto_sha512_update(desc, (const u8 *)&bits, sizeof(bits)); - } else { - kernel_neon_begin(); - /* We need to fill a whole block for __sha512_neon_update() */ - if (padlen <= 112) { - sctx->count[0] += padlen; - if (sctx->count[0] < padlen) - sctx->count[1]++; - memcpy(sctx->buf + index, padding, padlen); - } else { - __sha512_neon_update(desc, padding, padlen, index); - } - __sha512_neon_update(desc, (const u8 *)&bits, - sizeof(bits), 112); - kernel_neon_end(); - } - - /* Store state in digest */ - for (i = 0; i < 8; i++) - dst[i] = cpu_to_be64(sctx->state[i]); - - /* Wipe context */ - memset(sctx, 0, sizeof(*sctx)); - - return 0; -} - -static int sha512_neon_export(struct shash_desc *desc, void *out) -{ - struct sha512_state *sctx = shash_desc_ctx(desc); - - memcpy(out, sctx, sizeof(*sctx)); - - return 0; -} - -static int sha512_neon_import(struct shash_desc *desc, const void *in) -{ - struct sha512_state *sctx = shash_desc_ctx(desc); - - memcpy(sctx, in, sizeof(*sctx)); - - return 0; -} - -static int sha384_neon_init(struct shash_desc *desc) -{ - struct sha512_state *sctx = shash_desc_ctx(desc); - - sctx->state[0] = SHA384_H0; - sctx->state[1] = SHA384_H1; - sctx->state[2] = SHA384_H2; - sctx->state[3] = SHA384_H3; - sctx->state[4] = SHA384_H4; - sctx->state[5] = SHA384_H5; - sctx->state[6] = SHA384_H6; - sctx->state[7] = SHA384_H7; - - sctx->count[0] = sctx->count[1] = 0; - - return 0; -} - -static int sha384_neon_final(struct shash_desc *desc, u8 *hash) -{ - u8 D[SHA512_DIGEST_SIZE]; - - sha512_neon_final(desc, D); - - memcpy(hash, D, SHA384_DIGEST_SIZE); - memzero_explicit(D, SHA512_DIGEST_SIZE); - - return 0; -} - -static struct shash_alg algs[] = { { - .digestsize = SHA512_DIGEST_SIZE, - .init = sha512_neon_init, - .update = sha512_neon_update, - .final = sha512_neon_final, - .export = sha512_neon_export, - .import = sha512_neon_import, - .descsize = sizeof(struct sha512_state), - .statesize = sizeof(struct sha512_state), - .base = { - .cra_name = "sha512", - .cra_driver_name = "sha512-neon", - .cra_priority = 250, - .cra_flags = CRYPTO_ALG_TYPE_SHASH, - .cra_blocksize = SHA512_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}, { - .digestsize = SHA384_DIGEST_SIZE, - .init = sha384_neon_init, - .update = sha512_neon_update, - .final = sha384_neon_final, - .export = sha512_neon_export, - .import = sha512_neon_import, - .descsize = sizeof(struct sha512_state), - .statesize = sizeof(struct sha512_state), - .base = { - .cra_name = "sha384", - .cra_driver_name = "sha384-neon", - .cra_priority = 250, - .cra_flags = CRYPTO_ALG_TYPE_SHASH, - .cra_blocksize = SHA384_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -} }; - -static int __init sha512_neon_mod_init(void) -{ - if (!cpu_has_neon()) - return -ENODEV; - - return crypto_register_shashes(algs, ARRAY_SIZE(algs)); -} - -static void __exit sha512_neon_mod_fini(void) -{ - crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); -} - -module_init(sha512_neon_mod_init); -module_exit(sha512_neon_mod_fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, NEON accelerated"); - -MODULE_ALIAS_CRYPTO("sha512"); -MODULE_ALIAS_CRYPTO("sha384"); diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index 3c4596d0ce6c..83c50193626c 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -20,7 +20,6 @@ generic-y += poll.h generic-y += preempt.h generic-y += resource.h generic-y += rwsem.h -generic-y += scatterlist.h generic-y += seccomp.h generic-y += sections.h generic-y += segment.h diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 186270b3e194..4abe57279c66 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -178,6 +178,21 @@ .endm /* + * Assembly version of "adr rd, BSYM(sym)". This should only be used to + * reference local symbols in the same assembly file which are to be + * resolved by the assembler. Other usage is undefined. + */ + .irp c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo + .macro badr\c, rd, sym +#ifdef CONFIG_THUMB2_KERNEL + adr\c \rd, \sym + 1 +#else + adr\c \rd, \sym +#endif + .endm + .endr + +/* * Get current thread_info. */ .macro get_thread_info, rd @@ -326,7 +341,7 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) bne 1f orr \reg, \reg, #PSR_A_BIT - adr lr, BSYM(2f) + badr lr, 2f msr spsr_cxsf, \reg __MSR_ELR_HYP(14) __ERET diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h index d2f81e6b8c1c..6c2327e1c732 100644 --- a/arch/arm/include/asm/barrier.h +++ b/arch/arm/include/asm/barrier.h @@ -81,7 +81,7 @@ do { \ #define read_barrier_depends() do { } while(0) #define smp_read_barrier_depends() do { } while(0) -#define set_mb(var, value) do { var = value; smp_mb(); } while (0) +#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0) #define smp_mb__before_atomic() smp_mb() #define smp_mb__after_atomic() smp_mb() diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index 2d46862e7bef..4812cda8fd17 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -482,10 +482,17 @@ static inline void __sync_cache_range_r(volatile void *p, size_t size) : : : "r0","r1","r2","r3","r4","r5","r6","r7", \ "r9","r10","lr","memory" ) +#ifdef CONFIG_MMU int set_memory_ro(unsigned long addr, int numpages); int set_memory_rw(unsigned long addr, int numpages); int set_memory_x(unsigned long addr, int numpages); int set_memory_nx(unsigned long addr, int numpages); +#else +static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; } +static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; } +static inline int set_memory_x(unsigned long addr, int numpages) { return 0; } +static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; } +#endif #ifdef CONFIG_DEBUG_RODATA void mark_rodata_ro(void); diff --git a/arch/arm/include/asm/cmpxchg.h b/arch/arm/include/asm/cmpxchg.h index abb2c3769b01..1692a05d3207 100644 --- a/arch/arm/include/asm/cmpxchg.h +++ b/arch/arm/include/asm/cmpxchg.h @@ -94,6 +94,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size break; #endif default: + /* Cause a link-time error, the xchg() size is not supported */ __bad_xchg(ptr, size), ret = 0; break; } @@ -102,8 +103,10 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size return ret; } -#define xchg(ptr,x) \ - ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr)))) +#define xchg(ptr, x) ({ \ + (__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), \ + sizeof(*(ptr))); \ +}) #include <asm-generic/cmpxchg-local.h> @@ -118,14 +121,16 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size * cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make * them available. */ -#define cmpxchg_local(ptr, o, n) \ - ((__typeof__(*(ptr)))__cmpxchg_local_generic((ptr), (unsigned long)(o),\ - (unsigned long)(n), sizeof(*(ptr)))) +#define cmpxchg_local(ptr, o, n) ({ \ + (__typeof(*ptr))__cmpxchg_local_generic((ptr), \ + (unsigned long)(o), \ + (unsigned long)(n), \ + sizeof(*(ptr))); \ +}) + #define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n)) -#ifndef CONFIG_SMP #include <asm-generic/cmpxchg.h> -#endif #else /* min ARCH >= ARMv6 */ @@ -201,11 +206,12 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old, return ret; } -#define cmpxchg(ptr,o,n) \ - ((__typeof__(*(ptr)))__cmpxchg_mb((ptr), \ - (unsigned long)(o), \ - (unsigned long)(n), \ - sizeof(*(ptr)))) +#define cmpxchg(ptr,o,n) ({ \ + (__typeof__(*(ptr)))__cmpxchg_mb((ptr), \ + (unsigned long)(o), \ + (unsigned long)(n), \ + sizeof(*(ptr))); \ +}) static inline unsigned long __cmpxchg_local(volatile void *ptr, unsigned long old, @@ -227,6 +233,13 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr, return ret; } +#define cmpxchg_local(ptr, o, n) ({ \ + (__typeof(*ptr))__cmpxchg_local((ptr), \ + (unsigned long)(o), \ + (unsigned long)(n), \ + sizeof(*(ptr))); \ +}) + static inline unsigned long long __cmpxchg64(unsigned long long *ptr, unsigned long long old, unsigned long long new) @@ -252,6 +265,14 @@ static inline unsigned long long __cmpxchg64(unsigned long long *ptr, return oldval; } +#define cmpxchg64_relaxed(ptr, o, n) ({ \ + (__typeof__(*(ptr)))__cmpxchg64((ptr), \ + (unsigned long long)(o), \ + (unsigned long long)(n)); \ +}) + +#define cmpxchg64_local(ptr, o, n) cmpxchg64_relaxed((ptr), (o), (n)) + static inline unsigned long long __cmpxchg64_mb(unsigned long long *ptr, unsigned long long old, unsigned long long new) @@ -265,23 +286,11 @@ static inline unsigned long long __cmpxchg64_mb(unsigned long long *ptr, return ret; } -#define cmpxchg_local(ptr,o,n) \ - ((__typeof__(*(ptr)))__cmpxchg_local((ptr), \ - (unsigned long)(o), \ - (unsigned long)(n), \ - sizeof(*(ptr)))) - -#define cmpxchg64(ptr, o, n) \ - ((__typeof__(*(ptr)))__cmpxchg64_mb((ptr), \ - (unsigned long long)(o), \ - (unsigned long long)(n))) - -#define cmpxchg64_relaxed(ptr, o, n) \ - ((__typeof__(*(ptr)))__cmpxchg64((ptr), \ - (unsigned long long)(o), \ - (unsigned long long)(n))) - -#define cmpxchg64_local(ptr, o, n) cmpxchg64_relaxed((ptr), (o), (n)) +#define cmpxchg64(ptr, o, n) ({ \ + (__typeof__(*(ptr)))__cmpxchg64_mb((ptr), \ + (unsigned long long)(o), \ + (unsigned long long)(n)); \ +}) #endif /* __LINUX_ARM_ARCH__ >= 6 */ diff --git a/arch/arm/include/asm/dma.h b/arch/arm/include/asm/dma.h index 99084431d6ae..bb4fa67da541 100644 --- a/arch/arm/include/asm/dma.h +++ b/arch/arm/include/asm/dma.h @@ -19,7 +19,7 @@ * It should not be re-used except for that purpose. */ #include <linux/spinlock.h> -#include <asm/scatterlist.h> +#include <linux/scatterlist.h> #include <mach/isa-dma.h> diff --git a/arch/arm/include/asm/edac.h b/arch/arm/include/asm/edac.h index 0df7a2c1fc3d..5189fa819b60 100644 --- a/arch/arm/include/asm/edac.h +++ b/arch/arm/include/asm/edac.h @@ -18,11 +18,12 @@ #define ASM_EDAC_H /* * ECC atomic, DMA, SMP and interrupt safe scrub function. - * Implements the per arch atomic_scrub() that EDAC use for software + * Implements the per arch edac_atomic_scrub() that EDAC use for software * ECC scrubbing. It reads memory and then writes back the original * value, allowing the hardware to detect and correct memory errors. */ -static inline void atomic_scrub(void *va, u32 size) + +static inline void edac_atomic_scrub(void *va, u32 size) { #if __LINUX_ARM_ARCH__ >= 6 unsigned int *virt_addr = va; diff --git a/arch/arm/include/asm/entry-macro-multi.S b/arch/arm/include/asm/entry-macro-multi.S index 469a2b30fa27..609184f522ee 100644 --- a/arch/arm/include/asm/entry-macro-multi.S +++ b/arch/arm/include/asm/entry-macro-multi.S @@ -10,7 +10,7 @@ @ @ routine called with r0 = irq number, r1 = struct pt_regs * @ - adrne lr, BSYM(1b) + badrne lr, 1b bne asm_do_IRQ #ifdef CONFIG_SMP @@ -23,7 +23,7 @@ ALT_SMP(test_for_ipi r0, r2, r6, lr) ALT_UP_B(9997f) movne r1, sp - adrne lr, BSYM(1b) + badrne lr, 1b bne do_IPI #endif 9997: diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h index 4e78065a16aa..5eed82809d82 100644 --- a/arch/arm/include/asm/futex.h +++ b/arch/arm/include/asm/futex.h @@ -93,6 +93,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; + preempt_disable(); __asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n" "1: " TUSER(ldr) " %1, [%4]\n" " teq %1, %2\n" @@ -104,6 +105,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, : "cc", "memory"); *uval = val; + preempt_enable(); + return ret; } @@ -124,7 +127,10 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; - pagefault_disable(); /* implies preempt_disable() */ +#ifndef CONFIG_SMP + preempt_disable(); +#endif + pagefault_disable(); switch (op) { case FUTEX_OP_SET: @@ -146,7 +152,10 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) ret = -ENOSYS; } - pagefault_enable(); /* subsumes preempt_enable() */ + pagefault_enable(); +#ifndef CONFIG_SMP + preempt_enable(); +#endif if (!ret) { switch (cmp) { diff --git a/arch/arm/include/asm/hardware/arm_timer.h b/arch/arm/include/asm/hardware/arm_timer.h deleted file mode 100644 index d6030ff599db..000000000000 --- a/arch/arm/include/asm/hardware/arm_timer.h +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef __ASM_ARM_HARDWARE_ARM_TIMER_H -#define __ASM_ARM_HARDWARE_ARM_TIMER_H - -/* - * ARM timer implementation, found in Integrator, Versatile and Realview - * platforms. Not all platforms support all registers and bits in these - * registers, so we mark them with A for Integrator AP, C for Integrator - * CP, V for Versatile and R for Realview. - * - * Integrator AP has 16-bit timers, Integrator CP, Versatile and Realview - * can have 16-bit or 32-bit selectable via a bit in the control register. - * - * Every SP804 contains two identical timers. - */ -#define TIMER_1_BASE 0x00 -#define TIMER_2_BASE 0x20 - -#define TIMER_LOAD 0x00 /* ACVR rw */ -#define TIMER_VALUE 0x04 /* ACVR ro */ -#define TIMER_CTRL 0x08 /* ACVR rw */ -#define TIMER_CTRL_ONESHOT (1 << 0) /* CVR */ -#define TIMER_CTRL_32BIT (1 << 1) /* CVR */ -#define TIMER_CTRL_DIV1 (0 << 2) /* ACVR */ -#define TIMER_CTRL_DIV16 (1 << 2) /* ACVR */ -#define TIMER_CTRL_DIV256 (2 << 2) /* ACVR */ -#define TIMER_CTRL_IE (1 << 5) /* VR */ -#define TIMER_CTRL_PERIODIC (1 << 6) /* ACVR */ -#define TIMER_CTRL_ENABLE (1 << 7) /* ACVR */ - -#define TIMER_INTCLR 0x0c /* ACVR wo */ -#define TIMER_RIS 0x10 /* CVR ro */ -#define TIMER_MIS 0x14 /* CVR ro */ -#define TIMER_BGLOAD 0x18 /* CVR rw */ - -#endif diff --git a/arch/arm/include/asm/hardware/timer-sp.h b/arch/arm/include/asm/hardware/timer-sp.h deleted file mode 100644 index bb28af7c32de..000000000000 --- a/arch/arm/include/asm/hardware/timer-sp.h +++ /dev/null @@ -1,23 +0,0 @@ -struct clk; - -void __sp804_clocksource_and_sched_clock_init(void __iomem *, - const char *, struct clk *, int); -void __sp804_clockevents_init(void __iomem *, unsigned int, - struct clk *, const char *); - -static inline void sp804_clocksource_init(void __iomem *base, const char *name) -{ - __sp804_clocksource_and_sched_clock_init(base, name, NULL, 0); -} - -static inline void sp804_clocksource_and_sched_clock_init(void __iomem *base, - const char *name) -{ - __sp804_clocksource_and_sched_clock_init(base, name, NULL, 1); -} - -static inline void sp804_clockevents_init(void __iomem *base, unsigned int irq, const char *name) -{ - __sp804_clockevents_init(base, irq, NULL, name); - -} diff --git a/arch/arm/include/asm/hugetlb.h b/arch/arm/include/asm/hugetlb.h index 1f1b1cd112f3..7d26f6c4f0f5 100644 --- a/arch/arm/include/asm/hugetlb.h +++ b/arch/arm/include/asm/hugetlb.h @@ -53,10 +53,6 @@ static inline int prepare_hugepage_range(struct file *file, return 0; } -static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm) -{ -} - static inline int huge_pte_none(pte_t pte) { return pte_none(pte); @@ -67,15 +63,6 @@ static inline pte_t huge_pte_wrprotect(pte_t pte) return pte_wrprotect(pte); } -static inline int arch_prepare_hugepage(struct page *page) -{ - return 0; -} - -static inline void arch_release_hugepage(struct page *page) -{ -} - static inline void arch_clear_hugepage_flags(struct page *page) { clear_bit(PG_dcache_clean, &page->flags); diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index db58deb00aa7..1c3938f26beb 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -23,6 +23,7 @@ #ifdef __KERNEL__ +#include <linux/string.h> #include <linux/types.h> #include <linux/blk_types.h> #include <asm/byteorder.h> @@ -73,17 +74,16 @@ void __raw_readsl(const volatile void __iomem *addr, void *data, int longlen); static inline void __raw_writew(u16 val, volatile void __iomem *addr) { asm volatile("strh %1, %0" - : "+Q" (*(volatile u16 __force *)addr) - : "r" (val)); + : : "Q" (*(volatile u16 __force *)addr), "r" (val)); } #define __raw_readw __raw_readw static inline u16 __raw_readw(const volatile void __iomem *addr) { u16 val; - asm volatile("ldrh %1, %0" - : "+Q" (*(volatile u16 __force *)addr), - "=r" (val)); + asm volatile("ldrh %0, %1" + : "=r" (val) + : "Q" (*(volatile u16 __force *)addr)); return val; } #endif @@ -92,25 +92,23 @@ static inline u16 __raw_readw(const volatile void __iomem *addr) static inline void __raw_writeb(u8 val, volatile void __iomem *addr) { asm volatile("strb %1, %0" - : "+Qo" (*(volatile u8 __force *)addr) - : "r" (val)); + : : "Qo" (*(volatile u8 __force *)addr), "r" (val)); } #define __raw_writel __raw_writel static inline void __raw_writel(u32 val, volatile void __iomem *addr) { asm volatile("str %1, %0" - : "+Qo" (*(volatile u32 __force *)addr) - : "r" (val)); + : : "Qo" (*(volatile u32 __force *)addr), "r" (val)); } #define __raw_readb __raw_readb static inline u8 __raw_readb(const volatile void __iomem *addr) { u8 val; - asm volatile("ldrb %1, %0" - : "+Qo" (*(volatile u8 __force *)addr), - "=r" (val)); + asm volatile("ldrb %0, %1" + : "=r" (val) + : "Qo" (*(volatile u8 __force *)addr)); return val; } @@ -118,9 +116,9 @@ static inline u8 __raw_readb(const volatile void __iomem *addr) static inline u32 __raw_readl(const volatile void __iomem *addr) { u32 val; - asm volatile("ldr %1, %0" - : "+Qo" (*(volatile u32 __force *)addr), - "=r" (val)); + asm volatile("ldr %0, %1" + : "=r" (val) + : "Qo" (*(volatile u32 __force *)addr)); return val; } @@ -319,9 +317,33 @@ extern void _memset_io(volatile void __iomem *, int, size_t); #define writesw(p,d,l) __raw_writesw(p,d,l) #define writesl(p,d,l) __raw_writesl(p,d,l) +#ifndef __ARMBE__ +static inline void memset_io(volatile void __iomem *dst, unsigned c, + size_t count) +{ + memset((void __force *)dst, c, count); +} +#define memset_io(dst,c,count) memset_io(dst,c,count) + +static inline void memcpy_fromio(void *to, const volatile void __iomem *from, + size_t count) +{ + memcpy(to, (const void __force *)from, count); +} +#define memcpy_fromio(to,from,count) memcpy_fromio(to,from,count) + +static inline void memcpy_toio(volatile void __iomem *to, const void *from, + size_t count) +{ + memcpy((void __force *)to, from, count); +} +#define memcpy_toio(to,from,count) memcpy_toio(to,from,count) + +#else #define memset_io(c,v,l) _memset_io(c,(v),(l)) #define memcpy_fromio(a,c,l) _memcpy_fromio((a),c,(l)) #define memcpy_toio(c,a,l) _memcpy_toio(c,(a),(l)) +#endif #endif /* readl */ @@ -336,6 +358,7 @@ extern void _memset_io(volatile void __iomem *, int, size_t); #define ioremap_nocache(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE) #define ioremap_cache(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE_CACHED) #define ioremap_wc(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE_WC) +#define ioremap_wt(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE) #define iounmap __arm_iounmap /* diff --git a/arch/arm/include/asm/irqflags.h b/arch/arm/include/asm/irqflags.h index 3b763d6652a0..43908146a5cf 100644 --- a/arch/arm/include/asm/irqflags.h +++ b/arch/arm/include/asm/irqflags.h @@ -20,6 +20,7 @@ #if __LINUX_ARM_ARCH__ >= 6 +#define arch_local_irq_save arch_local_irq_save static inline unsigned long arch_local_irq_save(void) { unsigned long flags; @@ -31,6 +32,7 @@ static inline unsigned long arch_local_irq_save(void) return flags; } +#define arch_local_irq_enable arch_local_irq_enable static inline void arch_local_irq_enable(void) { asm volatile( @@ -40,6 +42,7 @@ static inline void arch_local_irq_enable(void) : "memory", "cc"); } +#define arch_local_irq_disable arch_local_irq_disable static inline void arch_local_irq_disable(void) { asm volatile( @@ -56,6 +59,7 @@ static inline void arch_local_irq_disable(void) /* * Save the current interrupt enable state & disable IRQs */ +#define arch_local_irq_save arch_local_irq_save static inline unsigned long arch_local_irq_save(void) { unsigned long flags, temp; @@ -73,6 +77,7 @@ static inline unsigned long arch_local_irq_save(void) /* * Enable IRQs */ +#define arch_local_irq_enable arch_local_irq_enable static inline void arch_local_irq_enable(void) { unsigned long temp; @@ -88,6 +93,7 @@ static inline void arch_local_irq_enable(void) /* * Disable IRQs */ +#define arch_local_irq_disable arch_local_irq_disable static inline void arch_local_irq_disable(void) { unsigned long temp; @@ -135,6 +141,7 @@ static inline void arch_local_irq_disable(void) /* * Save the current interrupt enable state. */ +#define arch_local_save_flags arch_local_save_flags static inline unsigned long arch_local_save_flags(void) { unsigned long flags; @@ -147,6 +154,7 @@ static inline unsigned long arch_local_save_flags(void) /* * restore saved IRQ & FIQ state */ +#define arch_local_irq_restore arch_local_irq_restore static inline void arch_local_irq_restore(unsigned long flags) { asm volatile( @@ -156,10 +164,13 @@ static inline void arch_local_irq_restore(unsigned long flags) : "memory", "cc"); } +#define arch_irqs_disabled_flags arch_irqs_disabled_flags static inline int arch_irqs_disabled_flags(unsigned long flags) { return flags & IRQMASK_I_BIT; } +#include <asm-generic/irqflags.h> + #endif /* ifdef __KERNEL__ */ #endif /* ifndef __ASM_ARM_IRQFLAGS_H */ diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h index 25410b2d8bc1..194c91b610ff 100644 --- a/arch/arm/include/asm/kvm_asm.h +++ b/arch/arm/include/asm/kvm_asm.h @@ -23,7 +23,7 @@ #define c0_MPIDR 1 /* MultiProcessor ID Register */ #define c0_CSSELR 2 /* Cache Size Selection Register */ #define c1_SCTLR 3 /* System Control Register */ -#define c1_ACTLR 4 /* Auxilliary Control Register */ +#define c1_ACTLR 4 /* Auxiliary Control Register */ #define c1_CPACR 5 /* Coprocessor Access Control */ #define c2_TTBR0 6 /* Translation Table Base Register 0 */ #define c2_TTBR0_high 7 /* TTBR0 top 32 bits */ diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index d71607c16601..e896d2c196e6 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -218,11 +218,6 @@ static inline int kvm_arch_dev_ioctl_check_extension(long ext) return 0; } -static inline void vgic_arch_setup(const struct vgic_params *vgic) -{ - BUG_ON(vgic->type != VGIC_V2); -} - int kvm_perf_init(void); int kvm_perf_teardown(void); diff --git a/arch/arm/include/asm/mach/arch.h b/arch/arm/include/asm/mach/arch.h index 0406cb3f1af7..cb3a40717edd 100644 --- a/arch/arm/include/asm/mach/arch.h +++ b/arch/arm/include/asm/mach/arch.h @@ -51,7 +51,7 @@ struct machine_desc { bool (*smp_init)(void); void (*fixup)(struct tag *, char **); void (*dt_fixup)(void); - void (*init_meminfo)(void); + long long (*pv_fixup)(void); void (*reserve)(void);/* reserve mem blocks */ void (*map_io)(void);/* IO mapping function */ void (*init_early)(void); diff --git a/arch/arm/include/asm/mcpm.h b/arch/arm/include/asm/mcpm.h index 50b378f59e08..acd4983d9b1f 100644 --- a/arch/arm/include/asm/mcpm.h +++ b/arch/arm/include/asm/mcpm.h @@ -137,17 +137,12 @@ int mcpm_wait_for_cpu_powerdown(unsigned int cpu, unsigned int cluster); /** * mcpm_cpu_suspend - bring the calling CPU in a suspended state * - * @expected_residency: duration in microseconds the CPU is expected - * to remain suspended, or 0 if unknown/infinity. - * - * The calling CPU is suspended. The expected residency argument is used - * as a hint by the platform specific backend to implement the appropriate - * sleep state level according to the knowledge it has on wake-up latency - * for the given hardware. + * The calling CPU is suspended. This is similar to mcpm_cpu_power_down() + * except for possible extra platform specific configuration steps to allow + * an asynchronous wake-up e.g. with a pending interrupt. * * If this CPU is found to be the "last man standing" in the cluster - * then the cluster may be prepared for power-down too, if the expected - * residency makes it worthwhile. + * then the cluster may be prepared for power-down too. * * This must be called with interrupts disabled. * @@ -157,7 +152,7 @@ int mcpm_wait_for_cpu_powerdown(unsigned int cpu, unsigned int cluster); * This will return if mcpm_platform_register() has not been called * previously in which case the caller should take appropriate action. */ -void mcpm_cpu_suspend(u64 expected_residency); +void mcpm_cpu_suspend(void); /** * mcpm_cpu_powered_up - housekeeping workafter a CPU has been powered up @@ -234,12 +229,6 @@ struct mcpm_platform_ops { void (*cpu_is_up)(unsigned int cpu, unsigned int cluster); void (*cluster_is_up)(unsigned int cluster); int (*wait_for_powerdown)(unsigned int cpu, unsigned int cluster); - - /* deprecated callbacks */ - int (*power_up)(unsigned int cpu, unsigned int cluster); - void (*power_down)(void); - void (*suspend)(u64); - void (*powered_up)(void); }; /** @@ -251,35 +240,6 @@ struct mcpm_platform_ops { */ int __init mcpm_platform_register(const struct mcpm_platform_ops *ops); -/* Synchronisation structures for coordinating safe cluster setup/teardown: */ - -/* - * When modifying this structure, make sure you update the MCPM_SYNC_ defines - * to match. - */ -struct mcpm_sync_struct { - /* individual CPU states */ - struct { - s8 cpu __aligned(__CACHE_WRITEBACK_GRANULE); - } cpus[MAX_CPUS_PER_CLUSTER]; - - /* cluster state */ - s8 cluster __aligned(__CACHE_WRITEBACK_GRANULE); - - /* inbound-side state */ - s8 inbound __aligned(__CACHE_WRITEBACK_GRANULE); -}; - -struct sync_struct { - struct mcpm_sync_struct clusters[MAX_NR_CLUSTERS]; -}; - -void __mcpm_cpu_going_down(unsigned int cpu, unsigned int cluster); -void __mcpm_cpu_down(unsigned int cpu, unsigned int cluster); -void __mcpm_outbound_leave_critical(unsigned int cluster, int state); -bool __mcpm_outbound_enter_critical(unsigned int this_cpu, unsigned int cluster); -int __mcpm_cluster_state(unsigned int cluster); - /** * mcpm_sync_init - Initialize the cluster synchronization support * @@ -318,6 +278,29 @@ int __init mcpm_loopback(void (*cache_disable)(void)); void __init mcpm_smp_set_ops(void); +/* + * Synchronisation structures for coordinating safe cluster setup/teardown. + * This is private to the MCPM core code and shared between C and assembly. + * When modifying this structure, make sure you update the MCPM_SYNC_ defines + * to match. + */ +struct mcpm_sync_struct { + /* individual CPU states */ + struct { + s8 cpu __aligned(__CACHE_WRITEBACK_GRANULE); + } cpus[MAX_CPUS_PER_CLUSTER]; + + /* cluster state */ + s8 cluster __aligned(__CACHE_WRITEBACK_GRANULE); + + /* inbound-side state */ + s8 inbound __aligned(__CACHE_WRITEBACK_GRANULE); +}; + +struct sync_struct { + struct mcpm_sync_struct clusters[MAX_NR_CLUSTERS]; +}; + #else /* diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 184def0e1652..3a72d69b3255 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -18,8 +18,6 @@ #include <linux/types.h> #include <linux/sizes.h> -#include <asm/cache.h> - #ifdef CONFIG_NEED_MACH_MEMORY_H #include <mach/memory.h> #endif @@ -133,20 +131,6 @@ #define phys_to_page(phys) (pfn_to_page(__phys_to_pfn(phys))) /* - * Minimum guaranted alignment in pgd_alloc(). The page table pointers passed - * around in head.S and proc-*.S are shifted by this amount, in order to - * leave spare high bits for systems with physical address extension. This - * does not fully accomodate the 40-bit addressing capability of ARM LPAE, but - * gives us about 38-bits or so. - */ -#ifdef CONFIG_ARM_LPAE -#define ARCH_PGD_SHIFT L1_CACHE_SHIFT -#else -#define ARCH_PGD_SHIFT 0 -#endif -#define ARCH_PGD_MASK ((1 << ARCH_PGD_SHIFT) - 1) - -/* * PLAT_PHYS_OFFSET is the offset (from zero) of the start of physical * memory. This is used for XIP and NoMMU kernels, and on platforms that don't * have CONFIG_ARM_PATCH_PHYS_VIRT. Assembly code must always use diff --git a/arch/arm/include/asm/mm-arch-hooks.h b/arch/arm/include/asm/mm-arch-hooks.h new file mode 100644 index 000000000000..7056660c7cc4 --- /dev/null +++ b/arch/arm/include/asm/mm-arch-hooks.h @@ -0,0 +1,15 @@ +/* + * Architecture specific mm hooks + * + * Copyright (C) 2015, IBM Corporation + * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_ARM_MM_ARCH_HOOKS_H +#define _ASM_ARM_MM_ARCH_HOOKS_H + +#endif /* _ASM_ARM_MM_ARCH_HOOKS_H */ diff --git a/arch/arm/include/asm/module.h b/arch/arm/include/asm/module.h index ed690c49ef93..e358b7966c06 100644 --- a/arch/arm/include/asm/module.h +++ b/arch/arm/include/asm/module.h @@ -16,11 +16,21 @@ enum { ARM_SEC_UNLIKELY, ARM_SEC_MAX, }; +#endif struct mod_arch_specific { +#ifdef CONFIG_ARM_UNWIND struct unwind_table *unwind[ARM_SEC_MAX]; -}; #endif +#ifdef CONFIG_ARM_MODULE_PLTS + struct elf32_shdr *core_plt; + struct elf32_shdr *init_plt; + int core_plt_count; + int init_plt_count; +#endif +}; + +u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val); /* * Add the ARM architecture version to the version magic string diff --git a/arch/arm/include/asm/pci.h b/arch/arm/include/asm/pci.h index 585dc33a7a24..a5635444ca41 100644 --- a/arch/arm/include/asm/pci.h +++ b/arch/arm/include/asm/pci.h @@ -31,16 +31,6 @@ static inline int pci_proc_domain(struct pci_bus *bus) */ #define PCI_DMA_BUS_IS_PHYS (1) -#ifdef CONFIG_PCI -static inline void pci_dma_burst_advice(struct pci_dev *pdev, - enum pci_dma_burst_strategy *strat, - unsigned long *strategy_parameter) -{ - *strat = PCI_DMA_BURST_INFINITY; - *strategy_parameter = ~0UL; -} -#endif - #define HAVE_PCI_MMAP extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, enum pci_mmap_state mmap_state, int write_combine); diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h index d9cf138fd7d4..4f9dec489931 100644 --- a/arch/arm/include/asm/perf_event.h +++ b/arch/arm/include/asm/perf_event.h @@ -19,4 +19,11 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs); #define perf_misc_flags(regs) perf_misc_flags(regs) #endif +#define perf_arch_fetch_caller_regs(regs, __ip) { \ + (regs)->ARM_pc = (__ip); \ + (regs)->ARM_fp = (unsigned long) __builtin_frame_address(0); \ + (regs)->ARM_sp = current_stack_pointer; \ + (regs)->ARM_cpsr = SVC_MODE; \ +} + #endif /* __ARM_PERF_EVENT_H__ */ diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h index 675e4ab79f68..3fc87dfd77e6 100644 --- a/arch/arm/include/asm/pmu.h +++ b/arch/arm/include/asm/pmu.h @@ -24,22 +24,10 @@ * interrupt and passed the address of the low level handler, * and can be used to implement any platform specific handling * before or after calling it. - * @runtime_resume: an optional handler which will be called by the - * runtime PM framework following a call to pm_runtime_get(). - * Note that if pm_runtime_get() is called more than once in - * succession this handler will only be called once. - * @runtime_suspend: an optional handler which will be called by the - * runtime PM framework following a call to pm_runtime_put(). - * Note that if pm_runtime_get() is called more than once in - * succession this handler will only be called following the - * final call to pm_runtime_put() that actually disables the - * hardware. */ struct arm_pmu_platdata { irqreturn_t (*handle_irq)(int irq, void *dev, irq_handler_t pmu_handler); - int (*runtime_resume)(struct device *dev); - int (*runtime_suspend)(struct device *dev); }; #ifdef CONFIG_HW_PERF_EVENTS @@ -92,6 +80,7 @@ struct pmu_hw_events { struct arm_pmu { struct pmu pmu; cpumask_t active_irqs; + cpumask_t supported_cpus; int *irq_affinity; char *name; irqreturn_t (*handle_irq)(int irq_num, void *dev); @@ -122,8 +111,6 @@ struct arm_pmu { #define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu)) -extern const struct dev_pm_ops armpmu_dev_pm_ops; - int armpmu_register(struct arm_pmu *armpmu, int type); u64 armpmu_event_update(struct perf_event *event); @@ -158,6 +145,10 @@ struct pmu_probe_info { #define XSCALE_PMU_PROBE(_version, _fn) \ PMU_PROBE(ARM_CPU_IMP_INTEL << 24 | _version, ARM_PMU_XSCALE_MASK, _fn) +int arm_pmu_device_probe(struct platform_device *pdev, + const struct of_device_id *of_table, + const struct pmu_probe_info *probe_table); + #endif /* CONFIG_HW_PERF_EVENTS */ #endif /* __ARM_PMU_H__ */ diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h index 5324c1112f3a..8877ad5ffe10 100644 --- a/arch/arm/include/asm/proc-fns.h +++ b/arch/arm/include/asm/proc-fns.h @@ -125,13 +125,6 @@ extern void cpu_resume(void); ttbr; \ }) -#define cpu_set_ttbr(nr, val) \ - do { \ - u64 ttbr = val; \ - __asm__("mcrr p15, " #nr ", %Q0, %R0, c2" \ - : : "r" (ttbr)); \ - } while (0) - #define cpu_get_pgd() \ ({ \ u64 pg = cpu_get_ttbr(0); \ diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h index 18f5a554134f..2f3ac1ba6fb4 100644 --- a/arch/arm/include/asm/smp.h +++ b/arch/arm/include/asm/smp.h @@ -61,7 +61,7 @@ asmlinkage void secondary_start_kernel(void); struct secondary_data { union { unsigned long mpu_rgn_szr; - unsigned long pgdir; + u64 pgdir; }; unsigned long swapper_pg_dir; void *stack; @@ -69,6 +69,7 @@ struct secondary_data { extern struct secondary_data secondary_data; extern volatile int pen_release; extern void secondary_startup(void); +extern void secondary_startup_arm(void); extern int __cpu_disable(void); diff --git a/arch/arm/include/asm/system_info.h b/arch/arm/include/asm/system_info.h index 720ea0320a6d..3860cbd401ec 100644 --- a/arch/arm/include/asm/system_info.h +++ b/arch/arm/include/asm/system_info.h @@ -17,6 +17,7 @@ /* information about the system we're running on */ extern unsigned int system_rev; +extern const char *system_serial; extern unsigned int system_serial_low; extern unsigned int system_serial_high; extern unsigned int mem_fclk_21285; diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h index 2fe85fff5cca..370f7a732900 100644 --- a/arch/arm/include/asm/topology.h +++ b/arch/arm/include/asm/topology.h @@ -18,7 +18,7 @@ extern struct cputopo_arm cpu_topology[NR_CPUS]; #define topology_physical_package_id(cpu) (cpu_topology[cpu].socket_id) #define topology_core_id(cpu) (cpu_topology[cpu].core_id) #define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_sibling) -#define topology_thread_cpumask(cpu) (&cpu_topology[cpu].thread_sibling) +#define topology_sibling_cpumask(cpu) (&cpu_topology[cpu].thread_sibling) void init_cpu_topology(void); void store_cpu_topology(unsigned int cpuid); diff --git a/arch/arm/include/asm/unified.h b/arch/arm/include/asm/unified.h index 200f9a7cd623..a91ae499614c 100644 --- a/arch/arm/include/asm/unified.h +++ b/arch/arm/include/asm/unified.h @@ -45,7 +45,6 @@ #define THUMB(x...) x #ifdef __ASSEMBLY__ #define W(instr) instr.w -#define BSYM(sym) sym + 1 #else #define WASM(instr) #instr ".w" #endif @@ -59,7 +58,6 @@ #define THUMB(x...) #ifdef __ASSEMBLY__ #define W(instr) instr -#define BSYM(sym) sym #else #define WASM(instr) #instr #endif diff --git a/arch/arm/include/asm/xen/hypervisor.h b/arch/arm/include/asm/xen/hypervisor.h index 1317ee40f4df..04ff8e7b37df 100644 --- a/arch/arm/include/asm/xen/hypervisor.h +++ b/arch/arm/include/asm/xen/hypervisor.h @@ -1,6 +1,8 @@ #ifndef _ASM_ARM_XEN_HYPERVISOR_H #define _ASM_ARM_XEN_HYPERVISOR_H +#include <linux/init.h> + extern struct shared_info *HYPERVISOR_shared_info; extern struct start_info *xen_start_info; @@ -18,4 +20,10 @@ static inline enum paravirt_lazy_mode paravirt_get_lazy_mode(void) extern struct dma_map_ops *xen_dma_ops; +#ifdef CONFIG_XEN +void __init xen_early_init(void); +#else +static inline void xen_early_init(void) { return; } +#endif + #endif /* _ASM_ARM_XEN_HYPERVISOR_H */ diff --git a/arch/arm/include/asm/xen/page.h b/arch/arm/include/asm/xen/page.h index 0b579b2f4e0e..1bee8ca12494 100644 --- a/arch/arm/include/asm/xen/page.h +++ b/arch/arm/include/asm/xen/page.h @@ -12,7 +12,6 @@ #include <xen/interface/grant_table.h> #define phys_to_machine_mapping_valid(pfn) (1) -#define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT)) #define pte_mfn pte_pfn #define mfn_pte pfn_pte diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 752725dcbf42..e69f7a19735d 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -34,6 +34,7 @@ obj-$(CONFIG_CPU_IDLE) += cpuidle.o obj-$(CONFIG_ISA_DMA_API) += dma.o obj-$(CONFIG_FIQ) += fiq.o fiqasm.o obj-$(CONFIG_MODULES) += armksyms.o module.o +obj-$(CONFIG_ARM_MODULE_PLTS) += module-plts.o obj-$(CONFIG_ISA_DMA) += dma-isa.o obj-$(CONFIG_PCI) += bios32.o isa.o obj-$(CONFIG_ARM_CPU_SUSPEND) += sleep.o suspend.o @@ -70,7 +71,9 @@ obj-$(CONFIG_CPU_PJ4) += pj4-cp0.o obj-$(CONFIG_CPU_PJ4B) += pj4-cp0.o obj-$(CONFIG_IWMMXT) += iwmmxt.o obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o -obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o perf_event_cpu.o +obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o \ + perf_event_xscale.o perf_event_v6.o \ + perf_event_v7.o CFLAGS_pj4-cp0.o := -marm AFLAGS_iwmmxt.o := -Wa,-mcpu=iwmmxt obj-$(CONFIG_ARM_CPU_TOPOLOGY) += topology.o diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 570306c49406..7dac3086e361 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -15,6 +15,8 @@ * that causes it to save wrong values... Be aware! */ +#include <linux/init.h> + #include <asm/assembler.h> #include <asm/memory.h> #include <asm/glue-df.h> @@ -40,7 +42,7 @@ #ifdef CONFIG_MULTI_IRQ_HANDLER ldr r1, =handle_arch_irq mov r0, sp - adr lr, BSYM(9997f) + badr lr, 9997f ldr pc, [r1] #else arch_irq_handler_default @@ -273,7 +275,7 @@ __und_svc: str r4, [sp, #S_PC] orr r0, r9, r0, lsl #16 #endif - adr r9, BSYM(__und_svc_finish) + badr r9, __und_svc_finish mov r2, r4 bl call_fpe @@ -469,7 +471,7 @@ __und_usr: @ instruction, or the more conventional lr if we are to treat @ this as a real undefined instruction @ - adr r9, BSYM(ret_from_exception) + badr r9, ret_from_exception @ IRQs must be enabled before attempting to read the instruction from @ user space since that could cause a page/translation fault if the @@ -486,7 +488,7 @@ __und_usr: @ r2 = PC value for the following instruction (:= regs->ARM_pc) @ r4 = PC value for the faulting instruction @ lr = 32-bit undefined instruction function - adr lr, BSYM(__und_usr_fault_32) + badr lr, __und_usr_fault_32 b call_fpe __und_usr_thumb: @@ -522,7 +524,7 @@ ARM_BE8(rev16 r0, r0) @ little endian instruction add r2, r2, #2 @ r2 is PC + 2, make it PC + 4 str r2, [sp, #S_PC] @ it's a 2x16bit instr, update orr r0, r0, r5, lsl #16 - adr lr, BSYM(__und_usr_fault_32) + badr lr, __und_usr_fault_32 @ r0 = the two 16-bit Thumb instructions which caused the exception @ r2 = PC value for the following Thumb instruction (:= regs->ARM_pc) @ r4 = PC value for the first 16-bit Thumb instruction @@ -716,7 +718,7 @@ __und_usr_fault_32: __und_usr_fault_16: mov r1, #2 1: mov r0, sp - adr lr, BSYM(ret_from_exception) + badr lr, ret_from_exception b __und_fault ENDPROC(__und_usr_fault_32) ENDPROC(__und_usr_fault_16) diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 4e7f40c577e6..92828a1dec80 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -90,7 +90,7 @@ ENTRY(ret_from_fork) bl schedule_tail cmp r5, #0 movne r0, r4 - adrne lr, BSYM(1f) + badrne lr, 1f retne r5 1: get_thread_info tsk b ret_slow_syscall @@ -198,7 +198,7 @@ local_restart: bne __sys_trace cmp scno, #NR_syscalls @ check upper syscall limit - adr lr, BSYM(ret_fast_syscall) @ return address + badr lr, ret_fast_syscall @ return address ldrcc pc, [tbl, scno, lsl #2] @ call sys_* routine add r1, sp, #S_OFF @@ -233,7 +233,7 @@ __sys_trace: add r0, sp, #S_OFF bl syscall_trace_enter - adr lr, BSYM(__sys_trace_return) @ return address + badr lr, __sys_trace_return @ return address mov scno, r0 @ syscall number (possibly new) add r1, sp, #S_R0 + S_OFF @ pointer to regs cmp scno, #NR_syscalls @ check upper syscall limit diff --git a/arch/arm/kernel/entry-ftrace.S b/arch/arm/kernel/entry-ftrace.S index fe57c73e70a4..c73c4030ca5d 100644 --- a/arch/arm/kernel/entry-ftrace.S +++ b/arch/arm/kernel/entry-ftrace.S @@ -87,7 +87,7 @@ 1: mcount_get_lr r1 @ lr of instrumented func mcount_adjust_addr r0, lr @ instrumented function - adr lr, BSYM(2f) + badr lr, 2f mov pc, r2 2: mcount_exit .endm diff --git a/arch/arm/kernel/entry-v7m.S b/arch/arm/kernel/entry-v7m.S index 8944f4991c3c..b6c8bb9315e7 100644 --- a/arch/arm/kernel/entry-v7m.S +++ b/arch/arm/kernel/entry-v7m.S @@ -117,9 +117,14 @@ ENTRY(__switch_to) ENDPROC(__switch_to) .data - .align 8 +#if CONFIG_CPU_V7M_NUM_IRQ <= 112 + .align 9 +#else + .align 10 +#endif + /* - * Vector table (64 words => 256 bytes natural alignment) + * Vector table (Natural alignment need to be ensured) */ ENTRY(vector_table) .long 0 @ 0 - Reset stack pointer @@ -138,6 +143,6 @@ ENTRY(vector_table) .long __invalid_entry @ 13 - Reserved .long __pendsv_entry @ 14 - PendSV .long __invalid_entry @ 15 - SysTick - .rept 64 - 16 - .long __irq_entry @ 16..64 - External Interrupts + .rept CONFIG_CPU_V7M_NUM_IRQ + .long __irq_entry @ External Interrupts .endr diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S index aebfbf79a1a3..9b8c5a113434 100644 --- a/arch/arm/kernel/head-nommu.S +++ b/arch/arm/kernel/head-nommu.S @@ -46,7 +46,7 @@ ENTRY(stext) .arm ENTRY(stext) - THUMB( adr r9, BSYM(1f) ) @ Kernel is always entered in ARM. + THUMB( badr r9, 1f ) @ Kernel is always entered in ARM. THUMB( bx r9 ) @ If this is a Thumb-2 kernel, THUMB( .thumb ) @ switch to Thumb now. THUMB(1: ) @@ -77,13 +77,13 @@ ENTRY(stext) orr r6, r6, #(1 << MPU_RSR_EN) @ Set region enabled bit bl __setup_mpu #endif - ldr r13, =__mmap_switched @ address to jump to after - @ initialising sctlr - adr lr, BSYM(1f) @ return (PIC) address + + badr lr, 1f @ return (PIC) address ldr r12, [r10, #PROCINFO_INITFUNC] add r12, r12, r10 ret r12 - 1: b __after_proc_init +1: bl __after_proc_init + b __mmap_switched ENDPROC(stext) #ifdef CONFIG_SMP @@ -106,8 +106,7 @@ ENTRY(secondary_startup) movs r10, r5 @ invalid processor? beq __error_p @ yes, error 'p' - adr r4, __secondary_data - ldmia r4, {r7, r12} + ldr r7, __secondary_data #ifdef CONFIG_ARM_MPU /* Use MPU region info supplied by __cpu_up */ @@ -115,23 +114,19 @@ ENTRY(secondary_startup) bl __setup_mpu @ Initialize the MPU #endif - adr lr, BSYM(__after_proc_init) @ return address - mov r13, r12 @ __secondary_switched address + badr lr, 1f @ return (PIC) address ldr r12, [r10, #PROCINFO_INITFUNC] add r12, r12, r10 ret r12 -ENDPROC(secondary_startup) - -ENTRY(__secondary_switched) - ldr sp, [r7, #8] @ set up the stack pointer +1: bl __after_proc_init + ldr sp, [r7, #12] @ set up the stack pointer mov fp, #0 b secondary_start_kernel -ENDPROC(__secondary_switched) +ENDPROC(secondary_startup) .type __secondary_data, %object __secondary_data: .long secondary_data - .long __secondary_switched #endif /* CONFIG_SMP */ /* @@ -164,7 +159,7 @@ __after_proc_init: #endif mcr p15, 0, r0, c1, c0, 0 @ write control reg #endif /* CONFIG_CPU_CP15 */ - ret r13 + ret lr ENDPROC(__after_proc_init) .ltorg diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 3637973a9708..bd755d97e459 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -80,7 +80,7 @@ ENTRY(stext) ARM_BE8(setend be ) @ ensure we are in BE8 mode - THUMB( adr r9, BSYM(1f) ) @ Kernel is always entered in ARM. + THUMB( badr r9, 1f ) @ Kernel is always entered in ARM. THUMB( bx r9 ) @ If this is a Thumb-2 kernel, THUMB( .thumb ) @ switch to Thumb now. THUMB(1: ) @@ -131,13 +131,30 @@ ENTRY(stext) * The following calls CPU specific code in a position independent * manner. See arch/arm/mm/proc-*.S for details. r10 = base of * xxx_proc_info structure selected by __lookup_processor_type - * above. On return, the CPU will be ready for the MMU to be - * turned on, and r0 will hold the CPU control register value. + * above. + * + * The processor init function will be called with: + * r1 - machine type + * r2 - boot data (atags/dt) pointer + * r4 - translation table base (low word) + * r5 - translation table base (high word, if LPAE) + * r8 - translation table base 1 (pfn if LPAE) + * r9 - cpuid + * r13 - virtual address for __enable_mmu -> __turn_mmu_on + * + * On return, the CPU will be ready for the MMU to be turned on, + * r0 will hold the CPU control register value, r1, r2, r4, and + * r9 will be preserved. r5 will also be preserved if LPAE. */ ldr r13, =__mmap_switched @ address to jump to after @ mmu has been enabled - adr lr, BSYM(1f) @ return (PIC) address + badr lr, 1f @ return (PIC) address +#ifdef CONFIG_ARM_LPAE + mov r5, #0 @ high TTBR0 + mov r8, r4, lsr #12 @ TTBR1 is swapper_pg_dir pfn +#else mov r8, r4 @ set TTBR1 to swapper_pg_dir +#endif ldr r12, [r10, #PROCINFO_INITFUNC] add r12, r12, r10 ret r12 @@ -158,7 +175,7 @@ ENDPROC(stext) * * Returns: * r0, r3, r5-r7 corrupted - * r4 = page table (see ARCH_PGD_SHIFT in asm/memory.h) + * r4 = physical page table address */ __create_page_tables: pgtbl r4, r8 @ page table address @@ -333,7 +350,6 @@ __create_page_tables: #endif #ifdef CONFIG_ARM_LPAE sub r4, r4, #0x1000 @ point to the PGD table - mov r4, r4, lsr #ARCH_PGD_SHIFT #endif ret lr ENDPROC(__create_page_tables) @@ -346,9 +362,9 @@ __turn_mmu_on_loc: #if defined(CONFIG_SMP) .text -ENTRY(secondary_startup_arm) .arm - THUMB( adr r9, BSYM(1f) ) @ Kernel is entered in ARM. +ENTRY(secondary_startup_arm) + THUMB( badr r9, 1f ) @ Kernel is entered in ARM. THUMB( bx r9 ) @ If this is a Thumb-2 kernel, THUMB( .thumb ) @ switch to Thumb now. THUMB(1: ) @@ -381,10 +397,10 @@ ENTRY(secondary_startup) adr r4, __secondary_data ldmia r4, {r5, r7, r12} @ address to jump to after sub lr, r4, r5 @ mmu has been enabled - ldr r4, [r7, lr] @ get secondary_data.pgdir - add r7, r7, #4 - ldr r8, [r7, lr] @ get secondary_data.swapper_pg_dir - adr lr, BSYM(__enable_mmu) @ return address + add r3, r7, lr + ldrd r4, [r3, #0] @ get secondary_data.pgdir + ldr r8, [r3, #8] @ get secondary_data.swapper_pg_dir + badr lr, __enable_mmu @ return address mov r13, r12 @ __secondary_switched address ldr r12, [r10, #PROCINFO_INITFUNC] add r12, r12, r10 @ initialise processor @@ -397,7 +413,7 @@ ENDPROC(secondary_startup_arm) * r6 = &secondary_data */ ENTRY(__secondary_switched) - ldr sp, [r7, #4] @ get secondary_data.stack + ldr sp, [r7, #12] @ get secondary_data.stack mov fp, #0 b secondary_start_kernel ENDPROC(__secondary_switched) @@ -416,12 +432,14 @@ __secondary_data: /* * Setup common bits before finally enabling the MMU. Essentially * this is just loading the page table pointer and domain access - * registers. + * registers. All these registers need to be preserved by the + * processor setup function (or set in the case of r0) * * r0 = cp#15 control register * r1 = machine ID * r2 = atags or dtb pointer - * r4 = page table (see ARCH_PGD_SHIFT in asm/memory.h) + * r4 = TTBR pointer (low word) + * r5 = TTBR pointer (high word if LPAE) * r9 = processor ID * r13 = *virtual* address to jump to upon completion */ @@ -440,7 +458,9 @@ __enable_mmu: #ifdef CONFIG_CPU_ICACHE_DISABLE bic r0, r0, #CR_I #endif -#ifndef CONFIG_ARM_LPAE +#ifdef CONFIG_ARM_LPAE + mcrr p15, 0, r4, r5, c2 @ load TTBR0 +#else mov r5, #(domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \ domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \ domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \ diff --git a/arch/arm/kernel/module-plts.c b/arch/arm/kernel/module-plts.c new file mode 100644 index 000000000000..097e2e201b9f --- /dev/null +++ b/arch/arm/kernel/module-plts.c @@ -0,0 +1,183 @@ +/* + * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/elf.h> +#include <linux/kernel.h> +#include <linux/module.h> + +#include <asm/cache.h> +#include <asm/opcodes.h> + +#define PLT_ENT_STRIDE L1_CACHE_BYTES +#define PLT_ENT_COUNT (PLT_ENT_STRIDE / sizeof(u32)) +#define PLT_ENT_SIZE (sizeof(struct plt_entries) / PLT_ENT_COUNT) + +#ifdef CONFIG_THUMB2_KERNEL +#define PLT_ENT_LDR __opcode_to_mem_thumb32(0xf8dff000 | \ + (PLT_ENT_STRIDE - 4)) +#else +#define PLT_ENT_LDR __opcode_to_mem_arm(0xe59ff000 | \ + (PLT_ENT_STRIDE - 8)) +#endif + +struct plt_entries { + u32 ldr[PLT_ENT_COUNT]; + u32 lit[PLT_ENT_COUNT]; +}; + +static bool in_init(const struct module *mod, u32 addr) +{ + return addr - (u32)mod->module_init < mod->init_size; +} + +u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val) +{ + struct plt_entries *plt, *plt_end; + int c, *count; + + if (in_init(mod, loc)) { + plt = (void *)mod->arch.init_plt->sh_addr; + plt_end = (void *)plt + mod->arch.init_plt->sh_size; + count = &mod->arch.init_plt_count; + } else { + plt = (void *)mod->arch.core_plt->sh_addr; + plt_end = (void *)plt + mod->arch.core_plt->sh_size; + count = &mod->arch.core_plt_count; + } + + /* Look for an existing entry pointing to 'val' */ + for (c = *count; plt < plt_end; c -= PLT_ENT_COUNT, plt++) { + int i; + + if (!c) { + /* Populate a new set of entries */ + *plt = (struct plt_entries){ + { [0 ... PLT_ENT_COUNT - 1] = PLT_ENT_LDR, }, + { val, } + }; + ++*count; + return (u32)plt->ldr; + } + for (i = 0; i < PLT_ENT_COUNT; i++) { + if (!plt->lit[i]) { + plt->lit[i] = val; + ++*count; + } + if (plt->lit[i] == val) + return (u32)&plt->ldr[i]; + } + } + BUG(); +} + +static int duplicate_rel(Elf32_Addr base, const Elf32_Rel *rel, int num, + u32 mask) +{ + u32 *loc1, *loc2; + int i; + + for (i = 0; i < num; i++) { + if (rel[i].r_info != rel[num].r_info) + continue; + + /* + * Identical relocation types against identical symbols can + * still result in different PLT entries if the addend in the + * place is different. So resolve the target of the relocation + * to compare the values. + */ + loc1 = (u32 *)(base + rel[i].r_offset); + loc2 = (u32 *)(base + rel[num].r_offset); + if (((*loc1 ^ *loc2) & mask) == 0) + return 1; + } + return 0; +} + +/* Count how many PLT entries we may need */ +static unsigned int count_plts(Elf32_Addr base, const Elf32_Rel *rel, int num) +{ + unsigned int ret = 0; + int i; + + /* + * Sure, this is order(n^2), but it's usually short, and not + * time critical + */ + for (i = 0; i < num; i++) + switch (ELF32_R_TYPE(rel[i].r_info)) { + case R_ARM_CALL: + case R_ARM_PC24: + case R_ARM_JUMP24: + if (!duplicate_rel(base, rel, i, + __opcode_to_mem_arm(0x00ffffff))) + ret++; + break; +#ifdef CONFIG_THUMB2_KERNEL + case R_ARM_THM_CALL: + case R_ARM_THM_JUMP24: + if (!duplicate_rel(base, rel, i, + __opcode_to_mem_thumb32(0x07ff2fff))) + ret++; +#endif + } + return ret; +} + +int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, + char *secstrings, struct module *mod) +{ + unsigned long core_plts = 0, init_plts = 0; + Elf32_Shdr *s, *sechdrs_end = sechdrs + ehdr->e_shnum; + + /* + * To store the PLTs, we expand the .text section for core module code + * and the .init.text section for initialization code. + */ + for (s = sechdrs; s < sechdrs_end; ++s) + if (strcmp(".core.plt", secstrings + s->sh_name) == 0) + mod->arch.core_plt = s; + else if (strcmp(".init.plt", secstrings + s->sh_name) == 0) + mod->arch.init_plt = s; + + if (!mod->arch.core_plt || !mod->arch.init_plt) { + pr_err("%s: sections missing\n", mod->name); + return -ENOEXEC; + } + + for (s = sechdrs + 1; s < sechdrs_end; ++s) { + const Elf32_Rel *rels = (void *)ehdr + s->sh_offset; + int numrels = s->sh_size / sizeof(Elf32_Rel); + Elf32_Shdr *dstsec = sechdrs + s->sh_info; + + if (s->sh_type != SHT_REL) + continue; + + if (strstr(secstrings + s->sh_name, ".init")) + init_plts += count_plts(dstsec->sh_addr, rels, numrels); + else + core_plts += count_plts(dstsec->sh_addr, rels, numrels); + } + + mod->arch.core_plt->sh_type = SHT_NOBITS; + mod->arch.core_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + mod->arch.core_plt->sh_addralign = L1_CACHE_BYTES; + mod->arch.core_plt->sh_size = round_up(core_plts * PLT_ENT_SIZE, + sizeof(struct plt_entries)); + mod->arch.core_plt_count = 0; + + mod->arch.init_plt->sh_type = SHT_NOBITS; + mod->arch.init_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + mod->arch.init_plt->sh_addralign = L1_CACHE_BYTES; + mod->arch.init_plt->sh_size = round_up(init_plts * PLT_ENT_SIZE, + sizeof(struct plt_entries)); + mod->arch.init_plt_count = 0; + pr_debug("%s: core.plt=%x, init.plt=%x\n", __func__, + mod->arch.core_plt->sh_size, mod->arch.init_plt->sh_size); + return 0; +} diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index af791f4a6205..efdddcb97dd1 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -40,7 +40,12 @@ #ifdef CONFIG_MMU void *module_alloc(unsigned long size) { - return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, + void *p = __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, + GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, + __builtin_return_address(0)); + if (!IS_ENABLED(CONFIG_ARM_MODULE_PLTS) || p) + return p; + return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, __builtin_return_address(0)); } @@ -110,6 +115,20 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, offset -= 0x04000000; offset += sym->st_value - loc; + + /* + * Route through a PLT entry if 'offset' exceeds the + * supported range. Note that 'offset + loc + 8' + * contains the absolute jump target, i.e., + * @sym + addend, corrected for the +8 PC bias. + */ + if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS) && + (offset <= (s32)0xfe000000 || + offset >= (s32)0x02000000)) + offset = get_module_plt(module, loc, + offset + loc + 8) + - loc - 8; + if (offset <= (s32)0xfe000000 || offset >= (s32)0x02000000) { pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n", @@ -203,6 +222,17 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, offset -= 0x02000000; offset += sym->st_value - loc; + /* + * Route through a PLT entry if 'offset' exceeds the + * supported range. + */ + if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS) && + (offset <= (s32)0xff000000 || + offset >= (s32)0x01000000)) + offset = get_module_plt(module, loc, + offset + loc + 4) + - loc - 4; + if (offset <= (s32)0xff000000 || offset >= (s32)0x01000000) { pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n", diff --git a/arch/arm/kernel/module.lds b/arch/arm/kernel/module.lds new file mode 100644 index 000000000000..3682fa107918 --- /dev/null +++ b/arch/arm/kernel/module.lds @@ -0,0 +1,4 @@ +SECTIONS { + .core.plt : { BYTE(0) } + .init.plt : { BYTE(0) } +} diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 4a86a0133ac3..357f57ea83f4 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -11,12 +11,18 @@ */ #define pr_fmt(fmt) "hw perfevents: " fmt +#include <linux/bitmap.h> +#include <linux/cpumask.h> +#include <linux/export.h> #include <linux/kernel.h> +#include <linux/of.h> #include <linux/platform_device.h> -#include <linux/pm_runtime.h> +#include <linux/slab.h> +#include <linux/spinlock.h> #include <linux/irq.h> #include <linux/irqdesc.h> +#include <asm/cputype.h> #include <asm/irq_regs.h> #include <asm/pmu.h> @@ -229,6 +235,10 @@ armpmu_add(struct perf_event *event, int flags) int idx; int err = 0; + /* An event following a process won't be stopped earlier */ + if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) + return -ENOENT; + perf_pmu_disable(event->pmu); /* If we don't have a space for the counter then finish early. */ @@ -344,20 +354,12 @@ static void armpmu_release_hardware(struct arm_pmu *armpmu) { armpmu->free_irq(armpmu); - pm_runtime_put_sync(&armpmu->plat_device->dev); } static int armpmu_reserve_hardware(struct arm_pmu *armpmu) { - int err; - struct platform_device *pmu_device = armpmu->plat_device; - - if (!pmu_device) - return -ENODEV; - - pm_runtime_get_sync(&pmu_device->dev); - err = armpmu->request_irq(armpmu, armpmu_dispatch_irq); + int err = armpmu->request_irq(armpmu, armpmu_dispatch_irq); if (err) { armpmu_release_hardware(armpmu); return err; @@ -454,6 +456,17 @@ static int armpmu_event_init(struct perf_event *event) int err = 0; atomic_t *active_events = &armpmu->active_events; + /* + * Reject CPU-affine events for CPUs that are of a different class to + * that which this PMU handles. Process-following events (where + * event->cpu == -1) can be migrated between CPUs, and thus we have to + * reject them later (in armpmu_add) if they're scheduled on a + * different class of CPU. + */ + if (event->cpu != -1 && + !cpumask_test_cpu(event->cpu, &armpmu->supported_cpus)) + return -ENOENT; + /* does not support taken branch sampling */ if (has_branch_stack(event)) return -EOPNOTSUPP; @@ -489,6 +502,10 @@ static void armpmu_enable(struct pmu *pmu) struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events); int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events); + /* For task-bound events we may be called on other CPUs */ + if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) + return; + if (enabled) armpmu->start(armpmu); } @@ -496,34 +513,25 @@ static void armpmu_enable(struct pmu *pmu) static void armpmu_disable(struct pmu *pmu) { struct arm_pmu *armpmu = to_arm_pmu(pmu); - armpmu->stop(armpmu); -} - -#ifdef CONFIG_PM -static int armpmu_runtime_resume(struct device *dev) -{ - struct arm_pmu_platdata *plat = dev_get_platdata(dev); - if (plat && plat->runtime_resume) - return plat->runtime_resume(dev); + /* For task-bound events we may be called on other CPUs */ + if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) + return; - return 0; + armpmu->stop(armpmu); } -static int armpmu_runtime_suspend(struct device *dev) +/* + * In heterogeneous systems, events are specific to a particular + * microarchitecture, and aren't suitable for another. Thus, only match CPUs of + * the same microarchitecture. + */ +static int armpmu_filter_match(struct perf_event *event) { - struct arm_pmu_platdata *plat = dev_get_platdata(dev); - - if (plat && plat->runtime_suspend) - return plat->runtime_suspend(dev); - - return 0; + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + unsigned int cpu = smp_processor_id(); + return cpumask_test_cpu(cpu, &armpmu->supported_cpus); } -#endif - -const struct dev_pm_ops armpmu_dev_pm_ops = { - SET_RUNTIME_PM_OPS(armpmu_runtime_suspend, armpmu_runtime_resume, NULL) -}; static void armpmu_init(struct arm_pmu *armpmu) { @@ -539,15 +547,349 @@ static void armpmu_init(struct arm_pmu *armpmu) .start = armpmu_start, .stop = armpmu_stop, .read = armpmu_read, + .filter_match = armpmu_filter_match, }; } int armpmu_register(struct arm_pmu *armpmu, int type) { armpmu_init(armpmu); - pm_runtime_enable(&armpmu->plat_device->dev); pr_info("enabled with %s PMU driver, %d counters available\n", armpmu->name, armpmu->num_events); return perf_pmu_register(&armpmu->pmu, armpmu->name, type); } +/* Set at runtime when we know what CPU type we are. */ +static struct arm_pmu *__oprofile_cpu_pmu; + +/* + * Despite the names, these two functions are CPU-specific and are used + * by the OProfile/perf code. + */ +const char *perf_pmu_name(void) +{ + if (!__oprofile_cpu_pmu) + return NULL; + + return __oprofile_cpu_pmu->name; +} +EXPORT_SYMBOL_GPL(perf_pmu_name); + +int perf_num_counters(void) +{ + int max_events = 0; + + if (__oprofile_cpu_pmu != NULL) + max_events = __oprofile_cpu_pmu->num_events; + + return max_events; +} +EXPORT_SYMBOL_GPL(perf_num_counters); + +static void cpu_pmu_enable_percpu_irq(void *data) +{ + int irq = *(int *)data; + + enable_percpu_irq(irq, IRQ_TYPE_NONE); +} + +static void cpu_pmu_disable_percpu_irq(void *data) +{ + int irq = *(int *)data; + + disable_percpu_irq(irq); +} + +static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu) +{ + int i, irq, irqs; + struct platform_device *pmu_device = cpu_pmu->plat_device; + struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events; + + irqs = min(pmu_device->num_resources, num_possible_cpus()); + + irq = platform_get_irq(pmu_device, 0); + if (irq >= 0 && irq_is_percpu(irq)) { + on_each_cpu(cpu_pmu_disable_percpu_irq, &irq, 1); + free_percpu_irq(irq, &hw_events->percpu_pmu); + } else { + for (i = 0; i < irqs; ++i) { + int cpu = i; + + if (cpu_pmu->irq_affinity) + cpu = cpu_pmu->irq_affinity[i]; + + if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs)) + continue; + irq = platform_get_irq(pmu_device, i); + if (irq >= 0) + free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu)); + } + } +} + +static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) +{ + int i, err, irq, irqs; + struct platform_device *pmu_device = cpu_pmu->plat_device; + struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events; + + if (!pmu_device) + return -ENODEV; + + irqs = min(pmu_device->num_resources, num_possible_cpus()); + if (irqs < 1) { + pr_warn_once("perf/ARM: No irqs for PMU defined, sampling events not supported\n"); + return 0; + } + + irq = platform_get_irq(pmu_device, 0); + if (irq >= 0 && irq_is_percpu(irq)) { + err = request_percpu_irq(irq, handler, "arm-pmu", + &hw_events->percpu_pmu); + if (err) { + pr_err("unable to request IRQ%d for ARM PMU counters\n", + irq); + return err; + } + on_each_cpu(cpu_pmu_enable_percpu_irq, &irq, 1); + } else { + for (i = 0; i < irqs; ++i) { + int cpu = i; + + err = 0; + irq = platform_get_irq(pmu_device, i); + if (irq < 0) + continue; + + if (cpu_pmu->irq_affinity) + cpu = cpu_pmu->irq_affinity[i]; + + /* + * If we have a single PMU interrupt that we can't shift, + * assume that we're running on a uniprocessor machine and + * continue. Otherwise, continue without this interrupt. + */ + if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) { + pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n", + irq, cpu); + continue; + } + + err = request_irq(irq, handler, + IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu", + per_cpu_ptr(&hw_events->percpu_pmu, cpu)); + if (err) { + pr_err("unable to request IRQ%d for ARM PMU counters\n", + irq); + return err; + } + + cpumask_set_cpu(cpu, &cpu_pmu->active_irqs); + } + } + + return 0; +} + +/* + * PMU hardware loses all context when a CPU goes offline. + * When a CPU is hotplugged back in, since some hardware registers are + * UNKNOWN at reset, the PMU must be explicitly reset to avoid reading + * junk values out of them. + */ +static int cpu_pmu_notify(struct notifier_block *b, unsigned long action, + void *hcpu) +{ + int cpu = (unsigned long)hcpu; + struct arm_pmu *pmu = container_of(b, struct arm_pmu, hotplug_nb); + + if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING) + return NOTIFY_DONE; + + if (!cpumask_test_cpu(cpu, &pmu->supported_cpus)) + return NOTIFY_DONE; + + if (pmu->reset) + pmu->reset(pmu); + else + return NOTIFY_DONE; + + return NOTIFY_OK; +} + +static int cpu_pmu_init(struct arm_pmu *cpu_pmu) +{ + int err; + int cpu; + struct pmu_hw_events __percpu *cpu_hw_events; + + cpu_hw_events = alloc_percpu(struct pmu_hw_events); + if (!cpu_hw_events) + return -ENOMEM; + + cpu_pmu->hotplug_nb.notifier_call = cpu_pmu_notify; + err = register_cpu_notifier(&cpu_pmu->hotplug_nb); + if (err) + goto out_hw_events; + + for_each_possible_cpu(cpu) { + struct pmu_hw_events *events = per_cpu_ptr(cpu_hw_events, cpu); + raw_spin_lock_init(&events->pmu_lock); + events->percpu_pmu = cpu_pmu; + } + + cpu_pmu->hw_events = cpu_hw_events; + cpu_pmu->request_irq = cpu_pmu_request_irq; + cpu_pmu->free_irq = cpu_pmu_free_irq; + + /* Ensure the PMU has sane values out of reset. */ + if (cpu_pmu->reset) + on_each_cpu_mask(&cpu_pmu->supported_cpus, cpu_pmu->reset, + cpu_pmu, 1); + + /* If no interrupts available, set the corresponding capability flag */ + if (!platform_get_irq(cpu_pmu->plat_device, 0)) + cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; + + return 0; + +out_hw_events: + free_percpu(cpu_hw_events); + return err; +} + +static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu) +{ + unregister_cpu_notifier(&cpu_pmu->hotplug_nb); + free_percpu(cpu_pmu->hw_events); +} + +/* + * CPU PMU identification and probing. + */ +static int probe_current_pmu(struct arm_pmu *pmu, + const struct pmu_probe_info *info) +{ + int cpu = get_cpu(); + unsigned int cpuid = read_cpuid_id(); + int ret = -ENODEV; + + pr_info("probing PMU on CPU %d\n", cpu); + + for (; info->init != NULL; info++) { + if ((cpuid & info->mask) != info->cpuid) + continue; + ret = info->init(pmu); + break; + } + + put_cpu(); + return ret; +} + +static int of_pmu_irq_cfg(struct arm_pmu *pmu) +{ + int i, irq, *irqs; + struct platform_device *pdev = pmu->plat_device; + + /* Don't bother with PPIs; they're already affine */ + irq = platform_get_irq(pdev, 0); + if (irq >= 0 && irq_is_percpu(irq)) + return 0; + + irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL); + if (!irqs) + return -ENOMEM; + + for (i = 0; i < pdev->num_resources; ++i) { + struct device_node *dn; + int cpu; + + dn = of_parse_phandle(pdev->dev.of_node, "interrupt-affinity", + i); + if (!dn) { + pr_warn("Failed to parse %s/interrupt-affinity[%d]\n", + of_node_full_name(pdev->dev.of_node), i); + break; + } + + for_each_possible_cpu(cpu) + if (arch_find_n_match_cpu_physical_id(dn, cpu, NULL)) + break; + + of_node_put(dn); + if (cpu >= nr_cpu_ids) { + pr_warn("Failed to find logical CPU for %s\n", + dn->name); + break; + } + + irqs[i] = cpu; + cpumask_set_cpu(cpu, &pmu->supported_cpus); + } + + if (i == pdev->num_resources) { + pmu->irq_affinity = irqs; + } else { + kfree(irqs); + cpumask_setall(&pmu->supported_cpus); + } + + return 0; +} + +int arm_pmu_device_probe(struct platform_device *pdev, + const struct of_device_id *of_table, + const struct pmu_probe_info *probe_table) +{ + const struct of_device_id *of_id; + const int (*init_fn)(struct arm_pmu *); + struct device_node *node = pdev->dev.of_node; + struct arm_pmu *pmu; + int ret = -ENODEV; + + pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL); + if (!pmu) { + pr_info("failed to allocate PMU device!\n"); + return -ENOMEM; + } + + if (!__oprofile_cpu_pmu) + __oprofile_cpu_pmu = pmu; + + pmu->plat_device = pdev; + + if (node && (of_id = of_match_node(of_table, pdev->dev.of_node))) { + init_fn = of_id->data; + + ret = of_pmu_irq_cfg(pmu); + if (!ret) + ret = init_fn(pmu); + } else { + ret = probe_current_pmu(pmu, probe_table); + cpumask_setall(&pmu->supported_cpus); + } + + if (ret) { + pr_info("failed to probe PMU!\n"); + goto out_free; + } + + ret = cpu_pmu_init(pmu); + if (ret) + goto out_free; + + ret = armpmu_register(pmu, -1); + if (ret) + goto out_destroy; + + return 0; + +out_destroy: + cpu_pmu_destroy(pmu); +out_free: + pr_info("failed to register PMU devices!\n"); + kfree(pmu); + return ret; +} diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c deleted file mode 100644 index 3b8c2833c537..000000000000 --- a/arch/arm/kernel/perf_event_cpu.c +++ /dev/null @@ -1,421 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Copyright (C) 2012 ARM Limited - * - * Author: Will Deacon <will.deacon@arm.com> - */ -#define pr_fmt(fmt) "CPU PMU: " fmt - -#include <linux/bitmap.h> -#include <linux/export.h> -#include <linux/kernel.h> -#include <linux/of.h> -#include <linux/platform_device.h> -#include <linux/slab.h> -#include <linux/spinlock.h> -#include <linux/irq.h> -#include <linux/irqdesc.h> - -#include <asm/cputype.h> -#include <asm/irq_regs.h> -#include <asm/pmu.h> - -/* Set at runtime when we know what CPU type we are. */ -static struct arm_pmu *cpu_pmu; - -/* - * Despite the names, these two functions are CPU-specific and are used - * by the OProfile/perf code. - */ -const char *perf_pmu_name(void) -{ - if (!cpu_pmu) - return NULL; - - return cpu_pmu->name; -} -EXPORT_SYMBOL_GPL(perf_pmu_name); - -int perf_num_counters(void) -{ - int max_events = 0; - - if (cpu_pmu != NULL) - max_events = cpu_pmu->num_events; - - return max_events; -} -EXPORT_SYMBOL_GPL(perf_num_counters); - -/* Include the PMU-specific implementations. */ -#include "perf_event_xscale.c" -#include "perf_event_v6.c" -#include "perf_event_v7.c" - -static void cpu_pmu_enable_percpu_irq(void *data) -{ - int irq = *(int *)data; - - enable_percpu_irq(irq, IRQ_TYPE_NONE); -} - -static void cpu_pmu_disable_percpu_irq(void *data) -{ - int irq = *(int *)data; - - disable_percpu_irq(irq); -} - -static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu) -{ - int i, irq, irqs; - struct platform_device *pmu_device = cpu_pmu->plat_device; - struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events; - - irqs = min(pmu_device->num_resources, num_possible_cpus()); - - irq = platform_get_irq(pmu_device, 0); - if (irq >= 0 && irq_is_percpu(irq)) { - on_each_cpu(cpu_pmu_disable_percpu_irq, &irq, 1); - free_percpu_irq(irq, &hw_events->percpu_pmu); - } else { - for (i = 0; i < irqs; ++i) { - int cpu = i; - - if (cpu_pmu->irq_affinity) - cpu = cpu_pmu->irq_affinity[i]; - - if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs)) - continue; - irq = platform_get_irq(pmu_device, i); - if (irq >= 0) - free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu)); - } - } -} - -static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) -{ - int i, err, irq, irqs; - struct platform_device *pmu_device = cpu_pmu->plat_device; - struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events; - - if (!pmu_device) - return -ENODEV; - - irqs = min(pmu_device->num_resources, num_possible_cpus()); - if (irqs < 1) { - pr_warn_once("perf/ARM: No irqs for PMU defined, sampling events not supported\n"); - return 0; - } - - irq = platform_get_irq(pmu_device, 0); - if (irq >= 0 && irq_is_percpu(irq)) { - err = request_percpu_irq(irq, handler, "arm-pmu", - &hw_events->percpu_pmu); - if (err) { - pr_err("unable to request IRQ%d for ARM PMU counters\n", - irq); - return err; - } - on_each_cpu(cpu_pmu_enable_percpu_irq, &irq, 1); - } else { - for (i = 0; i < irqs; ++i) { - int cpu = i; - - err = 0; - irq = platform_get_irq(pmu_device, i); - if (irq < 0) - continue; - - if (cpu_pmu->irq_affinity) - cpu = cpu_pmu->irq_affinity[i]; - - /* - * If we have a single PMU interrupt that we can't shift, - * assume that we're running on a uniprocessor machine and - * continue. Otherwise, continue without this interrupt. - */ - if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) { - pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n", - irq, cpu); - continue; - } - - err = request_irq(irq, handler, - IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu", - per_cpu_ptr(&hw_events->percpu_pmu, cpu)); - if (err) { - pr_err("unable to request IRQ%d for ARM PMU counters\n", - irq); - return err; - } - - cpumask_set_cpu(cpu, &cpu_pmu->active_irqs); - } - } - - return 0; -} - -/* - * PMU hardware loses all context when a CPU goes offline. - * When a CPU is hotplugged back in, since some hardware registers are - * UNKNOWN at reset, the PMU must be explicitly reset to avoid reading - * junk values out of them. - */ -static int cpu_pmu_notify(struct notifier_block *b, unsigned long action, - void *hcpu) -{ - struct arm_pmu *pmu = container_of(b, struct arm_pmu, hotplug_nb); - - if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING) - return NOTIFY_DONE; - - if (pmu->reset) - pmu->reset(pmu); - else - return NOTIFY_DONE; - - return NOTIFY_OK; -} - -static int cpu_pmu_init(struct arm_pmu *cpu_pmu) -{ - int err; - int cpu; - struct pmu_hw_events __percpu *cpu_hw_events; - - cpu_hw_events = alloc_percpu(struct pmu_hw_events); - if (!cpu_hw_events) - return -ENOMEM; - - cpu_pmu->hotplug_nb.notifier_call = cpu_pmu_notify; - err = register_cpu_notifier(&cpu_pmu->hotplug_nb); - if (err) - goto out_hw_events; - - for_each_possible_cpu(cpu) { - struct pmu_hw_events *events = per_cpu_ptr(cpu_hw_events, cpu); - raw_spin_lock_init(&events->pmu_lock); - events->percpu_pmu = cpu_pmu; - } - - cpu_pmu->hw_events = cpu_hw_events; - cpu_pmu->request_irq = cpu_pmu_request_irq; - cpu_pmu->free_irq = cpu_pmu_free_irq; - - /* Ensure the PMU has sane values out of reset. */ - if (cpu_pmu->reset) - on_each_cpu(cpu_pmu->reset, cpu_pmu, 1); - - /* If no interrupts available, set the corresponding capability flag */ - if (!platform_get_irq(cpu_pmu->plat_device, 0)) - cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; - - return 0; - -out_hw_events: - free_percpu(cpu_hw_events); - return err; -} - -static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu) -{ - unregister_cpu_notifier(&cpu_pmu->hotplug_nb); - free_percpu(cpu_pmu->hw_events); -} - -/* - * PMU platform driver and devicetree bindings. - */ -static const struct of_device_id cpu_pmu_of_device_ids[] = { - {.compatible = "arm,cortex-a17-pmu", .data = armv7_a17_pmu_init}, - {.compatible = "arm,cortex-a15-pmu", .data = armv7_a15_pmu_init}, - {.compatible = "arm,cortex-a12-pmu", .data = armv7_a12_pmu_init}, - {.compatible = "arm,cortex-a9-pmu", .data = armv7_a9_pmu_init}, - {.compatible = "arm,cortex-a8-pmu", .data = armv7_a8_pmu_init}, - {.compatible = "arm,cortex-a7-pmu", .data = armv7_a7_pmu_init}, - {.compatible = "arm,cortex-a5-pmu", .data = armv7_a5_pmu_init}, - {.compatible = "arm,arm11mpcore-pmu", .data = armv6mpcore_pmu_init}, - {.compatible = "arm,arm1176-pmu", .data = armv6_1176_pmu_init}, - {.compatible = "arm,arm1136-pmu", .data = armv6_1136_pmu_init}, - {.compatible = "qcom,krait-pmu", .data = krait_pmu_init}, - {.compatible = "qcom,scorpion-pmu", .data = scorpion_pmu_init}, - {.compatible = "qcom,scorpion-mp-pmu", .data = scorpion_mp_pmu_init}, - {}, -}; - -static struct platform_device_id cpu_pmu_plat_device_ids[] = { - {.name = "arm-pmu"}, - {.name = "armv6-pmu"}, - {.name = "armv7-pmu"}, - {.name = "xscale-pmu"}, - {}, -}; - -static const struct pmu_probe_info pmu_probe_table[] = { - ARM_PMU_PROBE(ARM_CPU_PART_ARM1136, armv6_1136_pmu_init), - ARM_PMU_PROBE(ARM_CPU_PART_ARM1156, armv6_1156_pmu_init), - ARM_PMU_PROBE(ARM_CPU_PART_ARM1176, armv6_1176_pmu_init), - ARM_PMU_PROBE(ARM_CPU_PART_ARM11MPCORE, armv6mpcore_pmu_init), - ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A8, armv7_a8_pmu_init), - ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A9, armv7_a9_pmu_init), - XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V1, xscale1pmu_init), - XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V2, xscale2pmu_init), - { /* sentinel value */ } -}; - -/* - * CPU PMU identification and probing. - */ -static int probe_current_pmu(struct arm_pmu *pmu) -{ - int cpu = get_cpu(); - unsigned int cpuid = read_cpuid_id(); - int ret = -ENODEV; - const struct pmu_probe_info *info; - - pr_info("probing PMU on CPU %d\n", cpu); - - for (info = pmu_probe_table; info->init != NULL; info++) { - if ((cpuid & info->mask) != info->cpuid) - continue; - ret = info->init(pmu); - break; - } - - put_cpu(); - return ret; -} - -static int of_pmu_irq_cfg(struct platform_device *pdev) -{ - int i, irq; - int *irqs; - - /* Don't bother with PPIs; they're already affine */ - irq = platform_get_irq(pdev, 0); - if (irq >= 0 && irq_is_percpu(irq)) - return 0; - - irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL); - if (!irqs) - return -ENOMEM; - - for (i = 0; i < pdev->num_resources; ++i) { - struct device_node *dn; - int cpu; - - dn = of_parse_phandle(pdev->dev.of_node, "interrupt-affinity", - i); - if (!dn) { - pr_warn("Failed to parse %s/interrupt-affinity[%d]\n", - of_node_full_name(pdev->dev.of_node), i); - break; - } - - for_each_possible_cpu(cpu) - if (arch_find_n_match_cpu_physical_id(dn, cpu, NULL)) - break; - - of_node_put(dn); - if (cpu >= nr_cpu_ids) { - pr_warn("Failed to find logical CPU for %s\n", - dn->name); - break; - } - - irqs[i] = cpu; - } - - if (i == pdev->num_resources) - cpu_pmu->irq_affinity = irqs; - else - kfree(irqs); - - return 0; -} - -static int cpu_pmu_device_probe(struct platform_device *pdev) -{ - const struct of_device_id *of_id; - const int (*init_fn)(struct arm_pmu *); - struct device_node *node = pdev->dev.of_node; - struct arm_pmu *pmu; - int ret = -ENODEV; - - if (cpu_pmu) { - pr_info("attempt to register multiple PMU devices!\n"); - return -ENOSPC; - } - - pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL); - if (!pmu) { - pr_info("failed to allocate PMU device!\n"); - return -ENOMEM; - } - - cpu_pmu = pmu; - cpu_pmu->plat_device = pdev; - - if (node && (of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node))) { - init_fn = of_id->data; - - ret = of_pmu_irq_cfg(pdev); - if (!ret) - ret = init_fn(pmu); - } else { - ret = probe_current_pmu(pmu); - } - - if (ret) { - pr_info("failed to probe PMU!\n"); - goto out_free; - } - - ret = cpu_pmu_init(cpu_pmu); - if (ret) - goto out_free; - - ret = armpmu_register(cpu_pmu, -1); - if (ret) - goto out_destroy; - - return 0; - -out_destroy: - cpu_pmu_destroy(cpu_pmu); -out_free: - pr_info("failed to register PMU devices!\n"); - kfree(pmu); - return ret; -} - -static struct platform_driver cpu_pmu_driver = { - .driver = { - .name = "arm-pmu", - .pm = &armpmu_dev_pm_ops, - .of_match_table = cpu_pmu_of_device_ids, - }, - .probe = cpu_pmu_device_probe, - .id_table = cpu_pmu_plat_device_ids, -}; - -static int __init register_pmu_driver(void) -{ - return platform_driver_register(&cpu_pmu_driver); -} -device_initcall(register_pmu_driver); diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c index f2ffd5c542ed..09f83e414a72 100644 --- a/arch/arm/kernel/perf_event_v6.c +++ b/arch/arm/kernel/perf_event_v6.c @@ -31,6 +31,14 @@ */ #if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) + +#include <asm/cputype.h> +#include <asm/irq_regs.h> +#include <asm/pmu.h> + +#include <linux/of.h> +#include <linux/platform_device.h> + enum armv6_perf_types { ARMV6_PERFCTR_ICACHE_MISS = 0x0, ARMV6_PERFCTR_IBUF_STALL = 0x1, @@ -543,24 +551,39 @@ static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu) return 0; } -#else -static int armv6_1136_pmu_init(struct arm_pmu *cpu_pmu) -{ - return -ENODEV; -} -static int armv6_1156_pmu_init(struct arm_pmu *cpu_pmu) -{ - return -ENODEV; -} +static struct of_device_id armv6_pmu_of_device_ids[] = { + {.compatible = "arm,arm11mpcore-pmu", .data = armv6mpcore_pmu_init}, + {.compatible = "arm,arm1176-pmu", .data = armv6_1176_pmu_init}, + {.compatible = "arm,arm1136-pmu", .data = armv6_1136_pmu_init}, + { /* sentinel value */ } +}; -static int armv6_1176_pmu_init(struct arm_pmu *cpu_pmu) +static const struct pmu_probe_info armv6_pmu_probe_table[] = { + ARM_PMU_PROBE(ARM_CPU_PART_ARM1136, armv6_1136_pmu_init), + ARM_PMU_PROBE(ARM_CPU_PART_ARM1156, armv6_1156_pmu_init), + ARM_PMU_PROBE(ARM_CPU_PART_ARM1176, armv6_1176_pmu_init), + ARM_PMU_PROBE(ARM_CPU_PART_ARM11MPCORE, armv6mpcore_pmu_init), + { /* sentinel value */ } +}; + +static int armv6_pmu_device_probe(struct platform_device *pdev) { - return -ENODEV; + return arm_pmu_device_probe(pdev, armv6_pmu_of_device_ids, + armv6_pmu_probe_table); } -static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu) +static struct platform_driver armv6_pmu_driver = { + .driver = { + .name = "armv6-pmu", + .of_match_table = armv6_pmu_of_device_ids, + }, + .probe = armv6_pmu_device_probe, +}; + +static int __init register_armv6_pmu_driver(void) { - return -ENODEV; + return platform_driver_register(&armv6_pmu_driver); } +device_initcall(register_armv6_pmu_driver); #endif /* CONFIG_CPU_V6 || CONFIG_CPU_V6K */ diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index f4207a4dcb01..f9b37f876e20 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -19,9 +19,15 @@ #ifdef CONFIG_CPU_V7 #include <asm/cp15.h> +#include <asm/cputype.h> +#include <asm/irq_regs.h> +#include <asm/pmu.h> #include <asm/vfp.h> #include "../vfp/vfpinstr.h" +#include <linux/of.h> +#include <linux/platform_device.h> + /* * Common ARMv7 event types * @@ -1056,15 +1062,22 @@ static void armv7pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->max_period = (1LLU << 32) - 1; }; -static u32 armv7_read_num_pmnc_events(void) +static void armv7_read_num_pmnc_events(void *info) { - u32 nb_cnt; + int *nb_cnt = info; /* Read the nb of CNTx counters supported from PMNC */ - nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK; + *nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK; - /* Add the CPU cycles counter and return */ - return nb_cnt + 1; + /* Add the CPU cycles counter */ + *nb_cnt += 1; +} + +static int armv7_probe_num_events(struct arm_pmu *arm_pmu) +{ + return smp_call_function_any(&arm_pmu->supported_cpus, + armv7_read_num_pmnc_events, + &arm_pmu->num_events, 1); } static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu) @@ -1072,8 +1085,7 @@ static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu) armv7pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a8"; cpu_pmu->map_event = armv7_a8_map_event; - cpu_pmu->num_events = armv7_read_num_pmnc_events(); - return 0; + return armv7_probe_num_events(cpu_pmu); } static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu) @@ -1081,8 +1093,7 @@ static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu) armv7pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a9"; cpu_pmu->map_event = armv7_a9_map_event; - cpu_pmu->num_events = armv7_read_num_pmnc_events(); - return 0; + return armv7_probe_num_events(cpu_pmu); } static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu) @@ -1090,8 +1101,7 @@ static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu) armv7pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a5"; cpu_pmu->map_event = armv7_a5_map_event; - cpu_pmu->num_events = armv7_read_num_pmnc_events(); - return 0; + return armv7_probe_num_events(cpu_pmu); } static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu) @@ -1099,9 +1109,8 @@ static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu) armv7pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a15"; cpu_pmu->map_event = armv7_a15_map_event; - cpu_pmu->num_events = armv7_read_num_pmnc_events(); cpu_pmu->set_event_filter = armv7pmu_set_event_filter; - return 0; + return armv7_probe_num_events(cpu_pmu); } static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu) @@ -1109,9 +1118,8 @@ static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu) armv7pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a7"; cpu_pmu->map_event = armv7_a7_map_event; - cpu_pmu->num_events = armv7_read_num_pmnc_events(); cpu_pmu->set_event_filter = armv7pmu_set_event_filter; - return 0; + return armv7_probe_num_events(cpu_pmu); } static int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu) @@ -1119,16 +1127,15 @@ static int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu) armv7pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a12"; cpu_pmu->map_event = armv7_a12_map_event; - cpu_pmu->num_events = armv7_read_num_pmnc_events(); cpu_pmu->set_event_filter = armv7pmu_set_event_filter; - return 0; + return armv7_probe_num_events(cpu_pmu); } static int armv7_a17_pmu_init(struct arm_pmu *cpu_pmu) { - armv7_a12_pmu_init(cpu_pmu); + int ret = armv7_a12_pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a17"; - return 0; + return ret; } /* @@ -1508,14 +1515,13 @@ static int krait_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->map_event = krait_map_event_no_branch; else cpu_pmu->map_event = krait_map_event; - cpu_pmu->num_events = armv7_read_num_pmnc_events(); cpu_pmu->set_event_filter = armv7pmu_set_event_filter; cpu_pmu->reset = krait_pmu_reset; cpu_pmu->enable = krait_pmu_enable_event; cpu_pmu->disable = krait_pmu_disable_event; cpu_pmu->get_event_idx = krait_pmu_get_event_idx; cpu_pmu->clear_event_idx = krait_pmu_clear_event_idx; - return 0; + return armv7_probe_num_events(cpu_pmu); } /* @@ -1833,13 +1839,12 @@ static int scorpion_pmu_init(struct arm_pmu *cpu_pmu) armv7pmu_init(cpu_pmu); cpu_pmu->name = "armv7_scorpion"; cpu_pmu->map_event = scorpion_map_event; - cpu_pmu->num_events = armv7_read_num_pmnc_events(); cpu_pmu->reset = scorpion_pmu_reset; cpu_pmu->enable = scorpion_pmu_enable_event; cpu_pmu->disable = scorpion_pmu_disable_event; cpu_pmu->get_event_idx = scorpion_pmu_get_event_idx; cpu_pmu->clear_event_idx = scorpion_pmu_clear_event_idx; - return 0; + return armv7_probe_num_events(cpu_pmu); } static int scorpion_mp_pmu_init(struct arm_pmu *cpu_pmu) @@ -1847,62 +1852,52 @@ static int scorpion_mp_pmu_init(struct arm_pmu *cpu_pmu) armv7pmu_init(cpu_pmu); cpu_pmu->name = "armv7_scorpion_mp"; cpu_pmu->map_event = scorpion_map_event; - cpu_pmu->num_events = armv7_read_num_pmnc_events(); cpu_pmu->reset = scorpion_pmu_reset; cpu_pmu->enable = scorpion_pmu_enable_event; cpu_pmu->disable = scorpion_pmu_disable_event; cpu_pmu->get_event_idx = scorpion_pmu_get_event_idx; cpu_pmu->clear_event_idx = scorpion_pmu_clear_event_idx; - return 0; -} -#else -static inline int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu) -{ - return -ENODEV; -} - -static inline int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu) -{ - return -ENODEV; -} - -static inline int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu) -{ - return -ENODEV; -} - -static inline int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu) -{ - return -ENODEV; -} - -static inline int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu) -{ - return -ENODEV; -} + return armv7_probe_num_events(cpu_pmu); +} + +static const struct of_device_id armv7_pmu_of_device_ids[] = { + {.compatible = "arm,cortex-a17-pmu", .data = armv7_a17_pmu_init}, + {.compatible = "arm,cortex-a15-pmu", .data = armv7_a15_pmu_init}, + {.compatible = "arm,cortex-a12-pmu", .data = armv7_a12_pmu_init}, + {.compatible = "arm,cortex-a9-pmu", .data = armv7_a9_pmu_init}, + {.compatible = "arm,cortex-a8-pmu", .data = armv7_a8_pmu_init}, + {.compatible = "arm,cortex-a7-pmu", .data = armv7_a7_pmu_init}, + {.compatible = "arm,cortex-a5-pmu", .data = armv7_a5_pmu_init}, + {.compatible = "qcom,krait-pmu", .data = krait_pmu_init}, + {.compatible = "qcom,scorpion-pmu", .data = scorpion_pmu_init}, + {.compatible = "qcom,scorpion-mp-pmu", .data = scorpion_mp_pmu_init}, + {}, +}; -static inline int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu) -{ - return -ENODEV; -} +static const struct pmu_probe_info armv7_pmu_probe_table[] = { + ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A8, armv7_a8_pmu_init), + ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A9, armv7_a9_pmu_init), + { /* sentinel value */ } +}; -static inline int armv7_a17_pmu_init(struct arm_pmu *cpu_pmu) -{ - return -ENODEV; -} -static inline int krait_pmu_init(struct arm_pmu *cpu_pmu) +static int armv7_pmu_device_probe(struct platform_device *pdev) { - return -ENODEV; + return arm_pmu_device_probe(pdev, armv7_pmu_of_device_ids, + armv7_pmu_probe_table); } -static inline int scorpion_pmu_init(struct arm_pmu *cpu_pmu) -{ - return -ENODEV; -} +static struct platform_driver armv7_pmu_driver = { + .driver = { + .name = "armv7-pmu", + .of_match_table = armv7_pmu_of_device_ids, + }, + .probe = armv7_pmu_device_probe, +}; -static inline int scorpion_mp_pmu_init(struct arm_pmu *cpu_pmu) +static int __init register_armv7_pmu_driver(void) { - return -ENODEV; + return platform_driver_register(&armv7_pmu_driver); } +device_initcall(register_armv7_pmu_driver); #endif /* CONFIG_CPU_V7 */ diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c index 8af9f1f82c68..304d056d5b25 100644 --- a/arch/arm/kernel/perf_event_xscale.c +++ b/arch/arm/kernel/perf_event_xscale.c @@ -13,6 +13,14 @@ */ #ifdef CONFIG_CPU_XSCALE + +#include <asm/cputype.h> +#include <asm/irq_regs.h> +#include <asm/pmu.h> + +#include <linux/of.h> +#include <linux/platform_device.h> + enum xscale_perf_types { XSCALE_PERFCTR_ICACHE_MISS = 0x00, XSCALE_PERFCTR_ICACHE_NO_DELIVER = 0x01, @@ -740,14 +748,28 @@ static int xscale2pmu_init(struct arm_pmu *cpu_pmu) return 0; } -#else -static inline int xscale1pmu_init(struct arm_pmu *cpu_pmu) + +static const struct pmu_probe_info xscale_pmu_probe_table[] = { + XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V1, xscale1pmu_init), + XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V2, xscale2pmu_init), + { /* sentinel value */ } +}; + +static int xscale_pmu_device_probe(struct platform_device *pdev) { - return -ENODEV; + return arm_pmu_device_probe(pdev, NULL, xscale_pmu_probe_table); } -static inline int xscale2pmu_init(struct arm_pmu *cpu_pmu) +static struct platform_driver xscale_pmu_driver = { + .driver = { + .name = "xscale-pmu", + }, + .probe = xscale_pmu_device_probe, +}; + +static int __init register_xscale_pmu_driver(void) { - return -ENODEV; + return platform_driver_register(&xscale_pmu_driver); } +device_initcall(register_xscale_pmu_driver); #endif /* CONFIG_CPU_XSCALE */ diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 6c777e908a24..36c18b73c1f4 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -46,6 +46,7 @@ #include <asm/cacheflush.h> #include <asm/cachetype.h> #include <asm/tlbflush.h> +#include <asm/xen/hypervisor.h> #include <asm/prom.h> #include <asm/mach/arch.h> @@ -75,8 +76,7 @@ __setup("fpe=", fpe_setup); extern void init_default_cache_policy(unsigned long); extern void paging_init(const struct machine_desc *desc); -extern void early_paging_init(const struct machine_desc *, - struct proc_info_list *); +extern void early_paging_init(const struct machine_desc *); extern void sanity_check_meminfo(void); extern enum reboot_mode reboot_mode; extern void setup_dma_zone(const struct machine_desc *desc); @@ -93,6 +93,9 @@ unsigned int __atags_pointer __initdata; unsigned int system_rev; EXPORT_SYMBOL(system_rev); +const char *system_serial; +EXPORT_SYMBOL(system_serial); + unsigned int system_serial_low; EXPORT_SYMBOL(system_serial_low); @@ -839,8 +842,25 @@ arch_initcall(customize_machine); static int __init init_machine_late(void) { + struct device_node *root; + int ret; + if (machine_desc->init_late) machine_desc->init_late(); + + root = of_find_node_by_path("/"); + if (root) { + ret = of_property_read_string(root, "serial-number", + &system_serial); + if (ret) + system_serial = NULL; + } + + if (!system_serial) + system_serial = kasprintf(GFP_KERNEL, "%08x%08x", + system_serial_high, + system_serial_low); + return 0; } late_initcall(init_machine_late); @@ -936,7 +956,9 @@ void __init setup_arch(char **cmdline_p) parse_early_param(); - early_paging_init(mdesc, lookup_processor_type(read_cpuid_id())); +#ifdef CONFIG_MMU + early_paging_init(mdesc); +#endif setup_dma_zone(mdesc); sanity_check_meminfo(); arm_memblock_init(mdesc); @@ -951,6 +973,7 @@ void __init setup_arch(char **cmdline_p) arm_dt_init_cpu_maps(); psci_init(); + xen_early_init(); #ifdef CONFIG_SMP if (is_smp()) { if (!mdesc->smp_init || !mdesc->smp_init()) { @@ -1109,8 +1132,7 @@ static int c_show(struct seq_file *m, void *v) seq_printf(m, "Hardware\t: %s\n", machine_name); seq_printf(m, "Revision\t: %04x\n", system_rev); - seq_printf(m, "Serial\t\t: %08x%08x\n", - system_serial_high, system_serial_low); + seq_printf(m, "Serial\t\t: %s\n", system_serial); return 0; } diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S index 6060dbc7844e..0f6c1000582c 100644 --- a/arch/arm/kernel/sleep.S +++ b/arch/arm/kernel/sleep.S @@ -81,7 +81,7 @@ ENTRY(__cpu_suspend) mov r1, r4 @ size of save block add r0, sp, #8 @ pointer to save block bl __cpu_suspend_save - adr lr, BSYM(cpu_suspend_abort) + badr lr, cpu_suspend_abort ldmfd sp!, {r0, pc} @ call suspend fn ENDPROC(__cpu_suspend) .ltorg @@ -122,7 +122,7 @@ ENDPROC(cpu_resume_after_mmu) #ifdef CONFIG_MMU .arm ENTRY(cpu_resume_arm) - THUMB( adr r9, BSYM(1f) ) @ Kernel is entered in ARM. + THUMB( badr r9, 1f ) @ Kernel is entered in ARM. THUMB( bx r9 ) @ If this is a Thumb-2 kernel, THUMB( .thumb ) @ switch to Thumb now. THUMB(1: ) diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index cca5b8758185..90dfbedfbfb8 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -86,9 +86,11 @@ void __init smp_set_ops(struct smp_operations *ops) static unsigned long get_arch_pgd(pgd_t *pgd) { - phys_addr_t pgdir = virt_to_idmap(pgd); - BUG_ON(pgdir & ARCH_PGD_MASK); - return pgdir >> ARCH_PGD_SHIFT; +#ifdef CONFIG_ARM_LPAE + return __phys_to_pfn(virt_to_phys(pgd)); +#else + return virt_to_phys(pgd); +#endif } int __cpu_up(unsigned int cpu, struct task_struct *idle) @@ -108,7 +110,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) #endif #ifdef CONFIG_MMU - secondary_data.pgdir = get_arch_pgd(idmap_pgd); + secondary_data.pgdir = virt_to_phys(idmap_pgd); secondary_data.swapper_pg_dir = get_arch_pgd(swapper_pg_dir); #endif sync_cache_w(&secondary_data); diff --git a/arch/arm/kernel/tcm.c b/arch/arm/kernel/tcm.c index 7a3be1d4d0b1..b10e1360762e 100644 --- a/arch/arm/kernel/tcm.c +++ b/arch/arm/kernel/tcm.c @@ -17,6 +17,9 @@ #include <asm/mach/map.h> #include <asm/memory.h> #include <asm/system_info.h> +#include <asm/traps.h> + +#define TCMTR_FORMAT_MASK 0xe0000000U static struct gen_pool *tcm_pool; static bool dtcm_present; @@ -176,6 +179,77 @@ static int __init setup_tcm_bank(u8 type, u8 bank, u8 banks, } /* + * When we are running in the non-secure world and the secure world + * has not explicitly given us access to the TCM we will get an + * undefined error when reading the TCM region register in the + * setup_tcm_bank function (above). + * + * There are two variants of this register read that we need to trap, + * the read for the data TCM and the read for the instruction TCM: + * c0370628: ee196f11 mrc 15, 0, r6, cr9, cr1, {0} + * c0370674: ee196f31 mrc 15, 0, r6, cr9, cr1, {1} + * + * Our undef hook mask explicitly matches all fields of the encoded + * instruction other than the destination register. The mask also + * only allows operand 2 to have the values 0 or 1. + * + * The undefined hook is defined as __init and __initdata, and therefore + * must be removed before tcm_init returns. + * + * In this particular case (MRC with ARM condition code ALways) the + * Thumb-2 and ARM instruction encoding are identical, so this hook + * will work on a Thumb-2 kernel. + * + * See A8.8.107, DDI0406C_C ARM Architecture Reference Manual, Encoding + * T1/A1 for the bit-by-bit details. + * + * mrc p15, 0, XX, c9, c1, 0 + * mrc p15, 0, XX, c9, c1, 1 + * | | | | | | | +---- opc2 0|1 = 000|001 + * | | | | | | +------- CRm 0 = 0001 + * | | | | | +----------- CRn 0 = 1001 + * | | | | +--------------- Rt ? = ???? + * | | | +------------------- opc1 0 = 000 + * | | +----------------------- coproc 15 = 1111 + * | +-------------------------- condition ALways = 1110 + * +----------------------------- instruction MRC = 1110 + * + * Encoding this as per A8.8.107 of DDI0406C, Encoding T1/A1, yields: + * 1111 1111 1111 1111 0000 1111 1101 1111 Required Mask + * 1110 1110 0001 1001 ???? 1111 0001 0001 mrc p15, 0, XX, c9, c1, 0 + * 1110 1110 0001 1001 ???? 1111 0011 0001 mrc p15, 0, XX, c9, c1, 1 + * [ ] [ ] [ ]| [ ] [ ] [ ] [ ]| +--- CRm + * | | | | | | | | +----- SBO + * | | | | | | | +------- opc2 + * | | | | | | +----------- coproc + * | | | | | +---------------- Rt + * | | | | +--------------------- CRn + * | | | +------------------------- SBO + * | | +--------------------------- opc1 + * | +------------------------------- instruction + * +------------------------------------ condition + */ +#define TCM_REGION_READ_MASK 0xffff0fdf +#define TCM_REGION_READ_INSTR 0xee190f11 +#define DEST_REG_SHIFT 12 +#define DEST_REG_MASK 0xf + +static int __init tcm_handler(struct pt_regs *regs, unsigned int instr) +{ + regs->uregs[(instr >> DEST_REG_SHIFT) & DEST_REG_MASK] = 0; + regs->ARM_pc += 4; + return 0; +} + +static struct undef_hook tcm_hook __initdata = { + .instr_mask = TCM_REGION_READ_MASK, + .instr_val = TCM_REGION_READ_INSTR, + .cpsr_mask = MODE_MASK, + .cpsr_val = SVC_MODE, + .fn = tcm_handler +}; + +/* * This initializes the TCM memory */ void __init tcm_init(void) @@ -204,9 +278,18 @@ void __init tcm_init(void) } tcm_status = read_cpuid_tcmstatus(); + + /* + * This code only supports v6-compatible TCMTR implementations. + */ + if (tcm_status & TCMTR_FORMAT_MASK) + return; + dtcm_banks = (tcm_status >> 16) & 0x03; itcm_banks = (tcm_status & 0x03); + register_undef_hook(&tcm_hook); + /* Values greater than 2 for D/ITCM banks are "reserved" */ if (dtcm_banks > 2) dtcm_banks = 0; @@ -218,7 +301,7 @@ void __init tcm_init(void) for (i = 0; i < dtcm_banks; i++) { ret = setup_tcm_bank(0, i, dtcm_banks, &dtcm_end); if (ret) - return; + goto unregister; } /* This means you compiled more code than fits into DTCM */ if (dtcm_code_sz > (dtcm_end - DTCM_OFFSET)) { @@ -227,6 +310,12 @@ void __init tcm_init(void) dtcm_code_sz, (dtcm_end - DTCM_OFFSET)); goto no_dtcm; } + /* + * This means that the DTCM sizes were 0 or the DTCM banks + * were inaccessible due to TrustZone configuration. + */ + if (!(dtcm_end - DTCM_OFFSET)) + goto no_dtcm; dtcm_res.end = dtcm_end - 1; request_resource(&iomem_resource, &dtcm_res); dtcm_iomap[0].length = dtcm_end - DTCM_OFFSET; @@ -250,15 +339,21 @@ no_dtcm: for (i = 0; i < itcm_banks; i++) { ret = setup_tcm_bank(1, i, itcm_banks, &itcm_end); if (ret) - return; + goto unregister; } /* This means you compiled more code than fits into ITCM */ if (itcm_code_sz > (itcm_end - ITCM_OFFSET)) { pr_info("CPU ITCM: %u bytes of code compiled to " "ITCM but only %lu bytes of ITCM present\n", itcm_code_sz, (itcm_end - ITCM_OFFSET)); - return; + goto unregister; } + /* + * This means that the ITCM sizes were 0 or the ITCM banks + * were inaccessible due to TrustZone configuration. + */ + if (!(itcm_end - ITCM_OFFSET)) + goto unregister; itcm_res.end = itcm_end - 1; request_resource(&iomem_resource, &itcm_res); itcm_iomap[0].length = itcm_end - ITCM_OFFSET; @@ -275,6 +370,9 @@ no_dtcm: pr_info("CPU ITCM: %u bytes of code compiled to ITCM but no " "ITCM banks present in CPU\n", itcm_code_sz); } + +unregister: + unregister_undef_hook(&tcm_hook); } /* diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 3dce1a342030..d358226236f2 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -749,14 +749,6 @@ late_initcall(arm_mrc_hook_init); #endif -void __bad_xchg(volatile void *ptr, int size) -{ - pr_err("xchg: bad data size: pc 0x%p, ptr 0x%p, size %d\n", - __builtin_return_address(0), ptr, size); - BUG(); -} -EXPORT_SYMBOL(__bad_xchg); - /* * A data abort trap was taken, but we did not handle the instruction. * Try to abort the user program, or panic if it was the kernel. diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index f1f79d104309..bfb915d05665 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -28,6 +28,7 @@ config KVM select KVM_GENERIC_DIRTYLOG_READ_PROTECT select SRCU select MMU_NOTIFIER + select KVM_VFIO select HAVE_KVM_EVENTFD select HAVE_KVM_IRQFD depends on ARM_VIRT_EXT && ARM_LPAE && ARM_ARCH_TIMER diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index 139e46c08b6e..c5eef02c52ba 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -15,7 +15,7 @@ AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt) AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt) KVM := ../../../virt/kvm -kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o +kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o obj-y += kvm-arm.o init.o interrupts.o obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index d9631ecddd56..bc738d2b8392 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -171,7 +171,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) int r; switch (ext) { case KVM_CAP_IRQCHIP: - case KVM_CAP_IRQFD: case KVM_CAP_IOEVENTFD: case KVM_CAP_DEVICE_CTRL: case KVM_CAP_USER_MEMORY: @@ -532,6 +531,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) kvm_vgic_flush_hwstate(vcpu); kvm_timer_flush_hwstate(vcpu); + preempt_disable(); local_irq_disable(); /* @@ -544,6 +544,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) if (ret <= 0 || need_new_vmid_gen(vcpu->kvm)) { local_irq_enable(); + preempt_enable(); kvm_timer_sync_hwstate(vcpu); kvm_vgic_sync_hwstate(vcpu); continue; @@ -553,14 +554,16 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) * Enter the guest */ trace_kvm_entry(*vcpu_pc(vcpu)); - kvm_guest_enter(); + __kvm_guest_enter(); vcpu->mode = IN_GUEST_MODE; ret = kvm_call_hyp(__kvm_vcpu_run, vcpu); vcpu->mode = OUTSIDE_GUEST_MODE; - kvm_guest_exit(); - trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); + /* + * Back from guest + *************************************************************/ + /* * We may have taken a host interrupt in HYP mode (ie * while executing the guest). This interrupt is still @@ -574,8 +577,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) local_irq_enable(); /* - * Back from guest - *************************************************************/ + * We do local_irq_enable() before calling kvm_guest_exit() so + * that if a timer interrupt hits while running the guest we + * account that tick as being spent in the guest. We enable + * preemption after calling kvm_guest_exit() so that if we get + * preempted we make sure ticks after that is not counted as + * guest time. + */ + kvm_guest_exit(); + trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); + preempt_enable(); + kvm_timer_sync_hwstate(vcpu); kvm_vgic_sync_hwstate(vcpu); diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index 79caf79b304a..568494dbbbb5 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S @@ -170,13 +170,9 @@ __kvm_vcpu_return: @ Don't trap coprocessor accesses for host kernel set_hstr vmexit set_hdcr vmexit - set_hcptr vmexit, (HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11)) + set_hcptr vmexit, (HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11)), after_vfp_restore #ifdef CONFIG_VFPv3 - @ Save floating point registers we if let guest use them. - tst r2, #(HCPTR_TCP(10) | HCPTR_TCP(11)) - bne after_vfp_restore - @ Switch VFP/NEON hardware state to the host's add r7, vcpu, #VCPU_VFP_GUEST store_vfp_state r7 @@ -188,6 +184,8 @@ after_vfp_restore: @ Restore FPEXC_EN which we clobbered on entry pop {r2} VFPFMXR FPEXC, r2 +#else +after_vfp_restore: #endif @ Reset Hyp-role @@ -309,7 +307,7 @@ ENTRY(kvm_call_hyp) THUMB( orr r2, r2, #PSR_T_BIT ) msr spsr_cxsf, r2 mrs r1, ELR_hyp - ldr r2, =BSYM(panic) + ldr r2, =panic msr ELR_hyp, r2 ldr r0, =\panic_str clrex @ Clear exclusive monitor @@ -483,7 +481,7 @@ switch_to_guest_vfp: push {r3-r7} @ NEON/VFP used. Turn on VFP access. - set_hcptr vmexit, (HCPTR_TCP(10) | HCPTR_TCP(11)) + set_hcptr vmtrap, (HCPTR_TCP(10) | HCPTR_TCP(11)) @ Switch VFP/NEON hardware state to the guest's add r7, r0, #VCPU_VFP_HOST diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S index 35e4a3a0c476..702740d37465 100644 --- a/arch/arm/kvm/interrupts_head.S +++ b/arch/arm/kvm/interrupts_head.S @@ -412,7 +412,6 @@ vcpu .req r0 @ vcpu pointer always in r0 add r11, vcpu, #VCPU_VGIC_CPU /* Save all interesting registers */ - ldr r3, [r2, #GICH_HCR] ldr r4, [r2, #GICH_VMCR] ldr r5, [r2, #GICH_MISR] ldr r6, [r2, #GICH_EISR0] @@ -420,7 +419,6 @@ vcpu .req r0 @ vcpu pointer always in r0 ldr r8, [r2, #GICH_ELRSR0] ldr r9, [r2, #GICH_ELRSR1] ldr r10, [r2, #GICH_APR] -ARM_BE8(rev r3, r3 ) ARM_BE8(rev r4, r4 ) ARM_BE8(rev r5, r5 ) ARM_BE8(rev r6, r6 ) @@ -429,7 +427,6 @@ ARM_BE8(rev r8, r8 ) ARM_BE8(rev r9, r9 ) ARM_BE8(rev r10, r10 ) - str r3, [r11, #VGIC_V2_CPU_HCR] str r4, [r11, #VGIC_V2_CPU_VMCR] str r5, [r11, #VGIC_V2_CPU_MISR] #ifdef CONFIG_CPU_ENDIAN_BE8 @@ -591,8 +588,13 @@ ARM_BE8(rev r6, r6 ) .endm /* Configures the HCPTR (Hyp Coprocessor Trap Register) on entry/return - * (hardware reset value is 0). Keep previous value in r2. */ -.macro set_hcptr operation, mask + * (hardware reset value is 0). Keep previous value in r2. + * An ISB is emited on vmexit/vmtrap, but executed on vmexit only if + * VFP wasn't already enabled (always executed on vmtrap). + * If a label is specified with vmexit, it is branched to if VFP wasn't + * enabled. + */ +.macro set_hcptr operation, mask, label = none mrc p15, 4, r2, c1, c1, 2 ldr r3, =\mask .if \operation == vmentry @@ -601,6 +603,17 @@ ARM_BE8(rev r6, r6 ) bic r3, r2, r3 @ Don't trap defined coproc-accesses .endif mcr p15, 4, r3, c1, c1, 2 + .if \operation != vmentry + .if \operation == vmexit + tst r2, #(HCPTR_TCP(10) | HCPTR_TCP(11)) + beq 1f + .endif + isb + .if \label != none + b \label + .endif +1: + .endif .endm /* Configures the HDCR (Hyp Debug Configuration Register) on entry/return diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 1d5accbd3dcf..7b4201294187 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -691,8 +691,8 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm) * work. This is not used by the hardware and we have no * alignment requirement for this allocation. */ - pgd = (pgd_t *)kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t), - GFP_KERNEL | __GFP_ZERO); + pgd = kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t), + GFP_KERNEL | __GFP_ZERO); if (!pgd) { kvm_free_hwpgd(hwpgd); @@ -1155,7 +1155,8 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) */ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot) { - struct kvm_memory_slot *memslot = id_to_memslot(kvm->memslots, slot); + struct kvm_memslots *slots = kvm_memslots(kvm); + struct kvm_memory_slot *memslot = id_to_memslot(slots, slot); phys_addr_t start = memslot->base_gfn << PAGE_SHIFT; phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT; @@ -1718,8 +1719,9 @@ out: } void kvm_arch_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, + const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old, + const struct kvm_memory_slot *new, enum kvm_mr_change change) { /* @@ -1733,7 +1735,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_userspace_memory_region *mem, + const struct kvm_userspace_memory_region *mem, enum kvm_mr_change change) { hva_t hva = mem->userspace_addr; @@ -1838,7 +1840,7 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, return 0; } -void kvm_arch_memslots_updated(struct kvm *kvm) +void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) { } diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index 02fa8eff6ae1..4b94b513168d 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -24,6 +24,8 @@ #include <asm/kvm_psci.h> #include <asm/kvm_host.h> +#include <uapi/linux/psci.h> + /* * This is an implementation of the Power State Coordination Interface * as described in ARM document number ARM DEN 0022A. @@ -230,10 +232,6 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) case PSCI_0_2_FN64_AFFINITY_INFO: val = kvm_psci_vcpu_affinity_info(vcpu); break; - case PSCI_0_2_FN_MIGRATE: - case PSCI_0_2_FN64_MIGRATE: - val = PSCI_RET_NOT_SUPPORTED; - break; case PSCI_0_2_FN_MIGRATE_INFO_TYPE: /* * Trusted OS is MP hence does not require migration @@ -242,10 +240,6 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) */ val = PSCI_0_2_TOS_MP; break; - case PSCI_0_2_FN_MIGRATE_INFO_UP_CPU: - case PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU: - val = PSCI_RET_NOT_SUPPORTED; - break; case PSCI_0_2_FN_SYSTEM_OFF: kvm_psci_system_off(vcpu); /* @@ -271,7 +265,8 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) ret = 0; break; default: - return -EINVAL; + val = PSCI_RET_NOT_SUPPORTED; + break; } *vcpu_reg(vcpu, 0) = val; @@ -291,12 +286,9 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) case KVM_PSCI_FN_CPU_ON: val = kvm_psci_vcpu_on(vcpu); break; - case KVM_PSCI_FN_CPU_SUSPEND: - case KVM_PSCI_FN_MIGRATE: + default: val = PSCI_RET_NOT_SUPPORTED; break; - default: - return -EINVAL; } *vcpu_reg(vcpu, 0) = val; diff --git a/arch/arm/lib/call_with_stack.S b/arch/arm/lib/call_with_stack.S index ed1a421813cb..bf3a40889205 100644 --- a/arch/arm/lib/call_with_stack.S +++ b/arch/arm/lib/call_with_stack.S @@ -35,7 +35,7 @@ ENTRY(call_with_stack) mov r2, r0 mov r0, r1 - adr lr, BSYM(1f) + badr lr, 1f ret r2 1: ldr lr, [sp] diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S index 947567ff67f9..af2267f6a529 100644 --- a/arch/arm/lib/lib1funcs.S +++ b/arch/arm/lib/lib1funcs.S @@ -167,7 +167,7 @@ Boston, MA 02111-1307, USA. */ #endif - @ Perform all needed substractions to keep only the reminder. + @ Perform all needed subtractions to keep only the reminder. @ Do comparisons in batch of 4 first. subs \order, \order, #3 @ yes, 3 is intended here blt 2f @@ -189,7 +189,7 @@ Boston, MA 02111-1307, USA. */ teqne \dividend, #0 beq 5f - @ Either 1, 2 or 3 comparison/substractions are left. + @ Either 1, 2 or 3 comparison/subtractions are left. 2: cmn \order, #2 blt 4f beq 3f diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c index 1e184767c3be..e24df77abd79 100644 --- a/arch/arm/mach-at91/pm.c +++ b/arch/arm/mach-at91/pm.c @@ -369,7 +369,7 @@ static void __init at91_pm_sram_init(void) return; } - sram_pool = dev_get_gen_pool(&pdev->dev); + sram_pool = gen_pool_get(&pdev->dev); if (!sram_pool) { pr_warn("%s: sram pool unavailable!\n", __func__); return; diff --git a/arch/arm/mach-davinci/da850.c b/arch/arm/mach-davinci/da850.c index 45ce065e7170..3b8740c083c4 100644 --- a/arch/arm/mach-davinci/da850.c +++ b/arch/arm/mach-davinci/da850.c @@ -11,6 +11,7 @@ * is licensed "as is" without any warranty of any kind, whether express * or implied. */ +#include <linux/clkdev.h> #include <linux/gpio.h> #include <linux/init.h> #include <linux/clk.h> diff --git a/arch/arm/mach-davinci/pm_domain.c b/arch/arm/mach-davinci/pm_domain.c index 641edc313938..78eac2c0c146 100644 --- a/arch/arm/mach-davinci/pm_domain.c +++ b/arch/arm/mach-davinci/pm_domain.c @@ -14,39 +14,9 @@ #include <linux/pm_clock.h> #include <linux/platform_device.h> -#ifdef CONFIG_PM -static int davinci_pm_runtime_suspend(struct device *dev) -{ - int ret; - - dev_dbg(dev, "%s\n", __func__); - - ret = pm_generic_runtime_suspend(dev); - if (ret) - return ret; - - ret = pm_clk_suspend(dev); - if (ret) { - pm_generic_runtime_resume(dev); - return ret; - } - - return 0; -} - -static int davinci_pm_runtime_resume(struct device *dev) -{ - dev_dbg(dev, "%s\n", __func__); - - pm_clk_resume(dev); - return pm_generic_runtime_resume(dev); -} -#endif - static struct dev_pm_domain davinci_pm_domain = { .ops = { - SET_RUNTIME_PM_OPS(davinci_pm_runtime_suspend, - davinci_pm_runtime_resume, NULL) + USE_PM_CLK_RUNTIME_OPS USE_PLATFORM_PM_SLEEP_OPS }, }; diff --git a/arch/arm/mach-exynos/exynos.c b/arch/arm/mach-exynos/exynos.c index 4bd8b7653817..5f8ddcdeeacf 100644 --- a/arch/arm/mach-exynos/exynos.c +++ b/arch/arm/mach-exynos/exynos.c @@ -224,6 +224,25 @@ static void __init exynos_init_irq(void) exynos_map_pmu(); } +static const struct of_device_id exynos_cpufreq_matches[] = { + { .compatible = "samsung,exynos4210", .data = "cpufreq-dt" }, + { /* sentinel */ } +}; + +static void __init exynos_cpufreq_init(void) +{ + struct device_node *root = of_find_node_by_path("/"); + const struct of_device_id *match; + + match = of_match_node(exynos_cpufreq_matches, root); + if (!match) { + platform_device_register_simple("exynos-cpufreq", -1, NULL, 0); + return; + } + + platform_device_register_simple(match->data, -1, NULL, 0); +} + static void __init exynos_dt_machine_init(void) { /* @@ -246,7 +265,7 @@ static void __init exynos_dt_machine_init(void) of_machine_is_compatible("samsung,exynos5250")) platform_device_register(&exynos_cpuidle); - platform_device_register_simple("exynos-cpufreq", -1, NULL, 0); + exynos_cpufreq_init(); of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL); } diff --git a/arch/arm/mach-exynos/suspend.c b/arch/arm/mach-exynos/suspend.c index 96866d03d281..f572219c7a40 100644 --- a/arch/arm/mach-exynos/suspend.c +++ b/arch/arm/mach-exynos/suspend.c @@ -311,13 +311,7 @@ static int exynos5420_cpu_suspend(unsigned long arg) if (IS_ENABLED(CONFIG_EXYNOS5420_MCPM)) { mcpm_set_entry_vector(cpu, cluster, exynos_cpu_resume); - - /* - * Residency value passed to mcpm_cpu_suspend back-end - * has to be given clear semantics. Set to 0 as a - * temporary value. - */ - mcpm_cpu_suspend(0); + mcpm_cpu_suspend(); } pr_info("Failed to suspend the system\n"); diff --git a/arch/arm/mach-footbridge/dma.c b/arch/arm/mach-footbridge/dma.c index e2e0df8bcee2..22536b85a81d 100644 --- a/arch/arm/mach-footbridge/dma.c +++ b/arch/arm/mach-footbridge/dma.c @@ -13,9 +13,9 @@ #include <linux/init.h> #include <linux/io.h> #include <linux/spinlock.h> +#include <linux/scatterlist.h> #include <asm/dma.h> -#include <asm/scatterlist.h> #include <asm/mach/dma.h> #include <asm/hardware/dec21285.h> diff --git a/arch/arm/mach-gemini/gpio.c b/arch/arm/mach-gemini/gpio.c index f8cb5710d6ee..3292f2e6ed6f 100644 --- a/arch/arm/mach-gemini/gpio.c +++ b/arch/arm/mach-gemini/gpio.c @@ -223,8 +223,8 @@ void __init gemini_gpio_init(void) set_irq_flags(j, IRQF_VALID); } - irq_set_chained_handler(IRQ_GPIO(i), gpio_irq_handler); - irq_set_handler_data(IRQ_GPIO(i), (void *)i); + irq_set_chained_handler_and_data(IRQ_GPIO(i), gpio_irq_handler, + (void *)i); } BUG_ON(gpiochip_add(&gemini_gpio_chip)); diff --git a/arch/arm/mach-hisi/platmcpm.c b/arch/arm/mach-hisi/platmcpm.c index 280f3f14f77c..b5f8f5ffda79 100644 --- a/arch/arm/mach-hisi/platmcpm.c +++ b/arch/arm/mach-hisi/platmcpm.c @@ -6,6 +6,8 @@ * under the terms and conditions of the GNU General Public License, * version 2, as published by the Free Software Foundation. */ +#include <linux/init.h> +#include <linux/smp.h> #include <linux/delay.h> #include <linux/io.h> #include <linux/memblock.h> @@ -13,7 +15,9 @@ #include <asm/cputype.h> #include <asm/cp15.h> -#include <asm/mcpm.h> +#include <asm/cacheflush.h> +#include <asm/smp.h> +#include <asm/smp_plat.h> #include "core.h" @@ -94,11 +98,16 @@ static void hip04_set_snoop_filter(unsigned int cluster, unsigned int on) } while (data != readl_relaxed(fabric + FAB_SF_MODE)); } -static int hip04_mcpm_power_up(unsigned int cpu, unsigned int cluster) +static int hip04_boot_secondary(unsigned int l_cpu, struct task_struct *idle) { + unsigned int mpidr, cpu, cluster; unsigned long data; void __iomem *sys_dreq, *sys_status; + mpidr = cpu_logical_map(l_cpu); + cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); + cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); + if (!sysctrl) return -ENODEV; if (cluster >= HIP04_MAX_CLUSTERS || cpu >= HIP04_MAX_CPUS_PER_CLUSTER) @@ -118,6 +127,7 @@ static int hip04_mcpm_power_up(unsigned int cpu, unsigned int cluster) cpu_relax(); data = readl_relaxed(sys_status); } while (data & CLUSTER_DEBUG_RESET_STATUS); + hip04_set_snoop_filter(cluster, 1); } data = CORE_RESET_BIT(cpu) | NEON_RESET_BIT(cpu) | \ @@ -126,11 +136,15 @@ static int hip04_mcpm_power_up(unsigned int cpu, unsigned int cluster) do { cpu_relax(); } while (data == readl_relaxed(sys_status)); + /* * We may fail to power up core again without this delay. * It's not mentioned in document. It's found by test. */ udelay(20); + + arch_send_wakeup_ipi_mask(cpumask_of(l_cpu)); + out: hip04_cpu_table[cluster][cpu]++; spin_unlock_irq(&boot_lock); @@ -138,31 +152,30 @@ out: return 0; } -static void hip04_mcpm_power_down(void) +#ifdef CONFIG_HOTPLUG_CPU +static void hip04_cpu_die(unsigned int l_cpu) { unsigned int mpidr, cpu, cluster; - bool skip_wfi = false, last_man = false; + bool last_man; - mpidr = read_cpuid_mpidr(); + mpidr = cpu_logical_map(l_cpu); cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); - __mcpm_cpu_going_down(cpu, cluster); - spin_lock(&boot_lock); - BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP); hip04_cpu_table[cluster][cpu]--; if (hip04_cpu_table[cluster][cpu] == 1) { /* A power_up request went ahead of us. */ - skip_wfi = true; + spin_unlock(&boot_lock); + return; } else if (hip04_cpu_table[cluster][cpu] > 1) { pr_err("Cluster %d CPU%d boots multiple times\n", cluster, cpu); BUG(); } last_man = hip04_cluster_is_down(cluster); - if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) { - spin_unlock(&boot_lock); + spin_unlock(&boot_lock); + if (last_man) { /* Since it's Cortex A15, disable L2 prefetching. */ asm volatile( "mcr p15, 1, %0, c15, c0, 3 \n\t" @@ -170,34 +183,30 @@ static void hip04_mcpm_power_down(void) "dsb " : : "r" (0x400) ); v7_exit_coherency_flush(all); - hip04_set_snoop_filter(cluster, 0); - __mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN); } else { - spin_unlock(&boot_lock); v7_exit_coherency_flush(louis); } - __mcpm_cpu_down(cpu, cluster); - - if (!skip_wfi) + for (;;) wfi(); } -static int hip04_mcpm_wait_for_powerdown(unsigned int cpu, unsigned int cluster) +static int hip04_cpu_kill(unsigned int l_cpu) { + unsigned int mpidr, cpu, cluster; unsigned int data, tries, count; - int ret = -ETIMEDOUT; + mpidr = cpu_logical_map(l_cpu); + cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); + cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); BUG_ON(cluster >= HIP04_MAX_CLUSTERS || cpu >= HIP04_MAX_CPUS_PER_CLUSTER); count = TIMEOUT_MSEC / POLL_MSEC; spin_lock_irq(&boot_lock); for (tries = 0; tries < count; tries++) { - if (hip04_cpu_table[cluster][cpu]) { - ret = -EBUSY; + if (hip04_cpu_table[cluster][cpu]) goto err; - } cpu_relax(); data = readl_relaxed(sysctrl + SC_CPU_RESET_STATUS(cluster)); if (data & CORE_WFI_STATUS(cpu)) @@ -220,64 +229,22 @@ static int hip04_mcpm_wait_for_powerdown(unsigned int cpu, unsigned int cluster) } if (tries >= count) goto err; + if (hip04_cluster_is_down(cluster)) + hip04_set_snoop_filter(cluster, 0); spin_unlock_irq(&boot_lock); - return 0; + return 1; err: spin_unlock_irq(&boot_lock); - return ret; -} - -static void hip04_mcpm_powered_up(void) -{ - unsigned int mpidr, cpu, cluster; - - mpidr = read_cpuid_mpidr(); - cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); - cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); - - spin_lock(&boot_lock); - if (!hip04_cpu_table[cluster][cpu]) - hip04_cpu_table[cluster][cpu] = 1; - spin_unlock(&boot_lock); -} - -static void __naked hip04_mcpm_power_up_setup(unsigned int affinity_level) -{ - asm volatile (" \n" -" cmp r0, #0 \n" -" bxeq lr \n" - /* calculate fabric phys address */ -" adr r2, 2f \n" -" ldmia r2, {r1, r3} \n" -" sub r0, r2, r1 \n" -" ldr r2, [r0, r3] \n" - /* get cluster id from MPIDR */ -" mrc p15, 0, r0, c0, c0, 5 \n" -" ubfx r1, r0, #8, #8 \n" - /* 1 << cluster id */ -" mov r0, #1 \n" -" mov r3, r0, lsl r1 \n" -" ldr r0, [r2, #"__stringify(FAB_SF_MODE)"] \n" -" tst r0, r3 \n" -" bxne lr \n" -" orr r1, r0, r3 \n" -" str r1, [r2, #"__stringify(FAB_SF_MODE)"] \n" -"1: ldr r0, [r2, #"__stringify(FAB_SF_MODE)"] \n" -" tst r0, r3 \n" -" beq 1b \n" -" bx lr \n" - -" .align 2 \n" -"2: .word . \n" -" .word fabric_phys_addr \n" - ); + return 0; } - -static const struct mcpm_platform_ops hip04_mcpm_ops = { - .power_up = hip04_mcpm_power_up, - .power_down = hip04_mcpm_power_down, - .wait_for_powerdown = hip04_mcpm_wait_for_powerdown, - .powered_up = hip04_mcpm_powered_up, +#endif + +static struct smp_operations __initdata hip04_smp_ops = { + .smp_boot_secondary = hip04_boot_secondary, +#ifdef CONFIG_HOTPLUG_CPU + .cpu_die = hip04_cpu_die, + .cpu_kill = hip04_cpu_kill, +#endif }; static bool __init hip04_cpu_table_init(void) @@ -298,7 +265,7 @@ static bool __init hip04_cpu_table_init(void) return true; } -static int __init hip04_mcpm_init(void) +static int __init hip04_smp_init(void) { struct device_node *np, *np_sctl, *np_fab; struct resource fab_res; @@ -353,10 +320,6 @@ static int __init hip04_mcpm_init(void) ret = -EINVAL; goto err_table; } - ret = mcpm_platform_register(&hip04_mcpm_ops); - if (ret) { - goto err_table; - } /* * Fill the instruction address that is used after secondary core @@ -364,13 +327,11 @@ static int __init hip04_mcpm_init(void) */ writel_relaxed(hip04_boot_method[0], relocation); writel_relaxed(0xa5a5a5a5, relocation + 4); /* magic number */ - writel_relaxed(virt_to_phys(mcpm_entry_point), relocation + 8); + writel_relaxed(virt_to_phys(secondary_startup), relocation + 8); writel_relaxed(0, relocation + 12); iounmap(relocation); - mcpm_sync_init(hip04_mcpm_power_up_setup); - mcpm_smp_set_ops(); - pr_info("HiP04 MCPM initialized\n"); + smp_set_ops(&hip04_smp_ops); return ret; err_table: iounmap(fabric); @@ -383,4 +344,4 @@ err_reloc: err: return ret; } -early_initcall(hip04_mcpm_init); +early_initcall(hip04_smp_init); diff --git a/arch/arm/mach-imx/pm-imx5.c b/arch/arm/mach-imx/pm-imx5.c index 0309ccda36a9..1885676c23c0 100644 --- a/arch/arm/mach-imx/pm-imx5.c +++ b/arch/arm/mach-imx/pm-imx5.c @@ -297,7 +297,7 @@ static int __init imx_suspend_alloc_ocram( goto put_node; } - ocram_pool = dev_get_gen_pool(&pdev->dev); + ocram_pool = gen_pool_get(&pdev->dev); if (!ocram_pool) { pr_warn("%s: ocram pool unavailable!\n", __func__); ret = -ENODEV; diff --git a/arch/arm/mach-imx/pm-imx6.c b/arch/arm/mach-imx/pm-imx6.c index b01650d94f91..93ecf559d06d 100644 --- a/arch/arm/mach-imx/pm-imx6.c +++ b/arch/arm/mach-imx/pm-imx6.c @@ -451,7 +451,7 @@ static int __init imx6q_suspend_init(const struct imx6_pm_socdata *socdata) goto put_node; } - ocram_pool = dev_get_gen_pool(&pdev->dev); + ocram_pool = gen_pool_get(&pdev->dev); if (!ocram_pool) { pr_warn("%s: ocram pool unavailable!\n", __func__); ret = -ENODEV; diff --git a/arch/arm/mach-integrator/integrator_ap.c b/arch/arm/mach-integrator/integrator_ap.c index 30003ba447a5..5b0e363fe5ba 100644 --- a/arch/arm/mach-integrator/integrator_ap.c +++ b/arch/arm/mach-integrator/integrator_ap.c @@ -37,7 +37,6 @@ #include <linux/stat.h> #include <linux/termios.h> -#include <asm/hardware/arm_timer.h> #include <asm/setup.h> #include <asm/param.h> /* HZ */ #include <asm/mach-types.h> diff --git a/arch/arm/mach-keystone/keystone.c b/arch/arm/mach-keystone/keystone.c index 06620875813a..e288010522f9 100644 --- a/arch/arm/mach-keystone/keystone.c +++ b/arch/arm/mach-keystone/keystone.c @@ -27,7 +27,6 @@ #include "keystone.h" -static struct notifier_block platform_nb; static unsigned long keystone_dma_pfn_offset __read_mostly; static int keystone_platform_notifier(struct notifier_block *nb, @@ -49,11 +48,18 @@ static int keystone_platform_notifier(struct notifier_block *nb, return NOTIFY_OK; } +static struct notifier_block platform_nb = { + .notifier_call = keystone_platform_notifier, +}; + static void __init keystone_init(void) { - keystone_pm_runtime_init(); - if (platform_nb.notifier_call) + if (PHYS_OFFSET >= KEYSTONE_HIGH_PHYS_START) { + keystone_dma_pfn_offset = PFN_DOWN(KEYSTONE_HIGH_PHYS_START - + KEYSTONE_LOW_PHYS_START); bus_register_notifier(&platform_bus_type, &platform_nb); + } + keystone_pm_runtime_init(); of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL); } @@ -62,11 +68,9 @@ static phys_addr_t keystone_virt_to_idmap(unsigned long x) return (phys_addr_t)(x) - CONFIG_PAGE_OFFSET + KEYSTONE_LOW_PHYS_START; } -static void __init keystone_init_meminfo(void) +static long long __init keystone_pv_fixup(void) { - bool lpae = IS_ENABLED(CONFIG_ARM_LPAE); - bool pvpatch = IS_ENABLED(CONFIG_ARM_PATCH_PHYS_VIRT); - phys_addr_t offset = PHYS_OFFSET - KEYSTONE_LOW_PHYS_START; + long long offset; phys_addr_t mem_start, mem_end; mem_start = memblock_start_of_DRAM(); @@ -75,32 +79,21 @@ static void __init keystone_init_meminfo(void) /* nothing to do if we are running out of the <32-bit space */ if (mem_start >= KEYSTONE_LOW_PHYS_START && mem_end <= KEYSTONE_LOW_PHYS_END) - return; - - if (!lpae || !pvpatch) { - pr_crit("Enable %s%s%s to run outside 32-bit space\n", - !lpae ? __stringify(CONFIG_ARM_LPAE) : "", - (!lpae && !pvpatch) ? " and " : "", - !pvpatch ? __stringify(CONFIG_ARM_PATCH_PHYS_VIRT) : ""); - } + return 0; if (mem_start < KEYSTONE_HIGH_PHYS_START || mem_end > KEYSTONE_HIGH_PHYS_END) { pr_crit("Invalid address space for memory (%08llx-%08llx)\n", - (u64)mem_start, (u64)mem_end); + (u64)mem_start, (u64)mem_end); + return 0; } - offset += KEYSTONE_HIGH_PHYS_START; - __pv_phys_pfn_offset = PFN_DOWN(offset); - __pv_offset = (offset - PAGE_OFFSET); + offset = KEYSTONE_HIGH_PHYS_START - KEYSTONE_LOW_PHYS_START; /* Populate the arch idmap hook */ arch_virt_to_idmap = keystone_virt_to_idmap; - platform_nb.notifier_call = keystone_platform_notifier; - keystone_dma_pfn_offset = PFN_DOWN(KEYSTONE_HIGH_PHYS_START - - KEYSTONE_LOW_PHYS_START); - pr_info("Switching to high address space at 0x%llx\n", (u64)offset); + return offset; } static const char *const keystone_match[] __initconst = { @@ -115,5 +108,5 @@ DT_MACHINE_START(KEYSTONE, "Keystone") .smp = smp_ops(keystone_smp_ops), .init_machine = keystone_init, .dt_compat = keystone_match, - .init_meminfo = keystone_init_meminfo, + .pv_fixup = keystone_pv_fixup, MACHINE_END diff --git a/arch/arm/mach-keystone/platsmp.c b/arch/arm/mach-keystone/platsmp.c index 5f46a7cf907b..4bbb18463bfd 100644 --- a/arch/arm/mach-keystone/platsmp.c +++ b/arch/arm/mach-keystone/platsmp.c @@ -39,19 +39,6 @@ static int keystone_smp_boot_secondary(unsigned int cpu, return error; } -#ifdef CONFIG_ARM_LPAE -static void __cpuinit keystone_smp_secondary_initmem(unsigned int cpu) -{ - pgd_t *pgd0 = pgd_offset_k(0); - cpu_set_ttbr(1, __pa(pgd0) + TTBR1_OFFSET); - local_flush_tlb_all(); -} -#else -static inline void __cpuinit keystone_smp_secondary_initmem(unsigned int cpu) -{} -#endif - struct smp_operations keystone_smp_ops __initdata = { .smp_boot_secondary = keystone_smp_boot_secondary, - .smp_secondary_init = keystone_smp_secondary_initmem, }; diff --git a/arch/arm/mach-keystone/pm_domain.c b/arch/arm/mach-keystone/pm_domain.c index 41bebfd296dc..edea697e8253 100644 --- a/arch/arm/mach-keystone/pm_domain.c +++ b/arch/arm/mach-keystone/pm_domain.c @@ -19,40 +19,9 @@ #include <linux/clk-provider.h> #include <linux/of.h> -#ifdef CONFIG_PM -static int keystone_pm_runtime_suspend(struct device *dev) -{ - int ret; - - dev_dbg(dev, "%s\n", __func__); - - ret = pm_generic_runtime_suspend(dev); - if (ret) - return ret; - - ret = pm_clk_suspend(dev); - if (ret) { - pm_generic_runtime_resume(dev); - return ret; - } - - return 0; -} - -static int keystone_pm_runtime_resume(struct device *dev) -{ - dev_dbg(dev, "%s\n", __func__); - - pm_clk_resume(dev); - - return pm_generic_runtime_resume(dev); -} -#endif - static struct dev_pm_domain keystone_pm_domain = { .ops = { - SET_RUNTIME_PM_OPS(keystone_pm_runtime_suspend, - keystone_pm_runtime_resume, NULL) + USE_PM_CLK_RUNTIME_OPS USE_PLATFORM_PM_SLEEP_OPS }, }; diff --git a/arch/arm/mach-lpc32xx/clock.c b/arch/arm/mach-lpc32xx/clock.c index dd5d6f532e8c..661c8f4b2310 100644 --- a/arch/arm/mach-lpc32xx/clock.c +++ b/arch/arm/mach-lpc32xx/clock.c @@ -1238,10 +1238,7 @@ static struct clk_lookup lookups[] = { static int __init clk_init(void) { - int i; - - for (i = 0; i < ARRAY_SIZE(lookups); i++) - clkdev_add(&lookups[i]); + clkdev_add_table(lookups, ARRAY_SIZE(lookups)); /* * Setup muxed SYSCLK for HCLK PLL base -this selects the diff --git a/arch/arm/mach-lpc32xx/irq.c b/arch/arm/mach-lpc32xx/irq.c index 9ecb8f9c4ef5..d4f7dc87042b 100644 --- a/arch/arm/mach-lpc32xx/irq.c +++ b/arch/arm/mach-lpc32xx/irq.c @@ -283,25 +283,25 @@ static int lpc32xx_set_irq_type(struct irq_data *d, unsigned int type) case IRQ_TYPE_EDGE_RISING: /* Rising edge sensitive */ __lpc32xx_set_irq_type(d->hwirq, 1, 1); - __irq_set_handler_locked(d->hwirq, handle_edge_irq); + __irq_set_handler_locked(d->irq, handle_edge_irq); break; case IRQ_TYPE_EDGE_FALLING: /* Falling edge sensitive */ __lpc32xx_set_irq_type(d->hwirq, 0, 1); - __irq_set_handler_locked(d->hwirq, handle_edge_irq); + __irq_set_handler_locked(d->irq, handle_edge_irq); break; case IRQ_TYPE_LEVEL_LOW: /* Low level sensitive */ __lpc32xx_set_irq_type(d->hwirq, 0, 0); - __irq_set_handler_locked(d->hwirq, handle_level_irq); + __irq_set_handler_locked(d->irq, handle_level_irq); break; case IRQ_TYPE_LEVEL_HIGH: /* High level sensitive */ __lpc32xx_set_irq_type(d->hwirq, 1, 0); - __irq_set_handler_locked(d->hwirq, handle_level_irq); + __irq_set_handler_locked(d->irq, handle_level_irq); break; /* Other modes are not supported */ diff --git a/arch/arm/mach-mvebu/headsmp-a9.S b/arch/arm/mach-mvebu/headsmp-a9.S index 48e4c4b3cd1c..b093a196e801 100644 --- a/arch/arm/mach-mvebu/headsmp-a9.S +++ b/arch/arm/mach-mvebu/headsmp-a9.S @@ -13,12 +13,9 @@ */ #include <linux/linkage.h> -#include <linux/init.h> #include <asm/assembler.h> - __CPUINIT - ENTRY(mvebu_cortex_a9_secondary_startup) ARM_BE8(setend be) bl armada_38x_scu_power_up diff --git a/arch/arm/mach-mvebu/platsmp-a9.c b/arch/arm/mach-mvebu/platsmp-a9.c index df0a9cc5da59..3d5000481c11 100644 --- a/arch/arm/mach-mvebu/platsmp-a9.c +++ b/arch/arm/mach-mvebu/platsmp-a9.c @@ -24,7 +24,7 @@ extern void mvebu_cortex_a9_secondary_startup(void); -static int __cpuinit mvebu_cortex_a9_boot_secondary(unsigned int cpu, +static int mvebu_cortex_a9_boot_secondary(unsigned int cpu, struct task_struct *idle) { int ret, hw_cpu; diff --git a/arch/arm/mach-nspire/nspire.c b/arch/arm/mach-nspire/nspire.c index 3445a5686805..34c2a1b32e7d 100644 --- a/arch/arm/mach-nspire/nspire.c +++ b/arch/arm/mach-nspire/nspire.c @@ -22,8 +22,6 @@ #include <asm/mach-types.h> #include <asm/mach/map.h> -#include <asm/hardware/timer-sp.h> - #include "mmio.h" #include "clcd.h" diff --git a/arch/arm/mach-omap1/board-nokia770.c b/arch/arm/mach-omap1/board-nokia770.c index 9f6c7af3a4e7..dd3a3ad797ea 100644 --- a/arch/arm/mach-omap1/board-nokia770.c +++ b/arch/arm/mach-omap1/board-nokia770.c @@ -7,6 +7,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#include <linux/clkdev.h> #include <linux/irq.h> #include <linux/gpio.h> #include <linux/kernel.h> @@ -14,7 +15,6 @@ #include <linux/mutex.h> #include <linux/platform_device.h> #include <linux/input.h> -#include <linux/clk.h> #include <linux/omapfb.h> #include <linux/spi/spi.h> diff --git a/arch/arm/mach-omap1/pm_bus.c b/arch/arm/mach-omap1/pm_bus.c index c40e209de65c..667c1637ff91 100644 --- a/arch/arm/mach-omap1/pm_bus.c +++ b/arch/arm/mach-omap1/pm_bus.c @@ -21,48 +21,15 @@ #include "soc.h" -#ifdef CONFIG_PM -static int omap1_pm_runtime_suspend(struct device *dev) -{ - int ret; - - dev_dbg(dev, "%s\n", __func__); - - ret = pm_generic_runtime_suspend(dev); - if (ret) - return ret; - - ret = pm_clk_suspend(dev); - if (ret) { - pm_generic_runtime_resume(dev); - return ret; - } - - return 0; -} - -static int omap1_pm_runtime_resume(struct device *dev) -{ - dev_dbg(dev, "%s\n", __func__); - - pm_clk_resume(dev); - return pm_generic_runtime_resume(dev); -} - static struct dev_pm_domain default_pm_domain = { .ops = { - .runtime_suspend = omap1_pm_runtime_suspend, - .runtime_resume = omap1_pm_runtime_resume, + USE_PM_CLK_RUNTIME_OPS USE_PLATFORM_PM_SLEEP_OPS }, }; -#define OMAP1_PM_DOMAIN (&default_pm_domain) -#else -#define OMAP1_PM_DOMAIN NULL -#endif /* CONFIG_PM */ static struct pm_clk_notifier_block platform_bus_notifier = { - .pm_domain = OMAP1_PM_DOMAIN, + .pm_domain = &default_pm_domain, .con_ids = { "ick", "fck", NULL, }, }; diff --git a/arch/arm/mach-omap2/clkt2xxx_virt_prcm_set.c b/arch/arm/mach-omap2/clkt2xxx_virt_prcm_set.c index 85e0b0c06718..b64d717bfab6 100644 --- a/arch/arm/mach-omap2/clkt2xxx_virt_prcm_set.c +++ b/arch/arm/mach-omap2/clkt2xxx_virt_prcm_set.c @@ -232,14 +232,12 @@ void omap2xxx_clkt_vps_init(void) struct clk_hw_omap *hw = NULL; struct clk *clk; const char *parent_name = "mpu_ck"; - struct clk_lookup *lookup = NULL; omap2xxx_clkt_vps_late_init(); omap2xxx_clkt_vps_check_bootloader_rates(); hw = kzalloc(sizeof(*hw), GFP_KERNEL); - lookup = kzalloc(sizeof(*lookup), GFP_KERNEL); - if (!hw || !lookup) + if (!hw) goto cleanup; init.name = "virt_prcm_set"; init.ops = &virt_prcm_set_ops; @@ -249,15 +247,9 @@ void omap2xxx_clkt_vps_init(void) hw->hw.init = &init; clk = clk_register(NULL, &hw->hw); - - lookup->dev_id = NULL; - lookup->con_id = "cpufreq_ck"; - lookup->clk = clk; - - clkdev_add(lookup); + clkdev_create(clk, "cpufreq_ck", NULL); return; cleanup: kfree(hw); - kfree(lookup); } #endif diff --git a/arch/arm/mach-omap2/display.c b/arch/arm/mach-omap2/display.c index f492ae147c6a..6ab13d18c636 100644 --- a/arch/arm/mach-omap2/display.c +++ b/arch/arm/mach-omap2/display.c @@ -287,6 +287,8 @@ static enum omapdss_version __init omap_display_get_version(void) return OMAPDSS_VER_OMAP5; else if (soc_is_am43xx()) return OMAPDSS_VER_AM43xx; + else if (soc_is_dra7xx()) + return OMAPDSS_VER_DRA7xx; else return OMAPDSS_VER_UNKNOWN; } @@ -568,25 +570,25 @@ void __init omapdss_early_init_of(void) } +static const char * const omapdss_compat_names[] __initconst = { + "ti,omap2-dss", + "ti,omap3-dss", + "ti,omap4-dss", + "ti,omap5-dss", + "ti,dra7-dss", +}; + struct device_node * __init omapdss_find_dss_of_node(void) { struct device_node *node; + int i; - node = of_find_compatible_node(NULL, NULL, "ti,omap2-dss"); - if (node) - return node; - - node = of_find_compatible_node(NULL, NULL, "ti,omap3-dss"); - if (node) - return node; - - node = of_find_compatible_node(NULL, NULL, "ti,omap4-dss"); - if (node) - return node; - - node = of_find_compatible_node(NULL, NULL, "ti,omap5-dss"); - if (node) - return node; + for (i = 0; i < ARRAY_SIZE(omapdss_compat_names); ++i) { + node = of_find_compatible_node(NULL, NULL, + omapdss_compat_names[i]); + if (node) + return node; + } return NULL; } diff --git a/arch/arm/mach-omap2/omap_device.c b/arch/arm/mach-omap2/omap_device.c index 4a7303cf563e..4cb8fd9f741f 100644 --- a/arch/arm/mach-omap2/omap_device.c +++ b/arch/arm/mach-omap2/omap_device.c @@ -47,7 +47,7 @@ static void _add_clkdev(struct omap_device *od, const char *clk_alias, const char *clk_name) { struct clk *r; - struct clk_lookup *l; + int rc; if (!clk_alias || !clk_name) return; @@ -62,21 +62,15 @@ static void _add_clkdev(struct omap_device *od, const char *clk_alias, return; } - r = clk_get(NULL, clk_name); - if (IS_ERR(r)) { - dev_err(&od->pdev->dev, - "clk_get for %s failed\n", clk_name); - return; + rc = clk_add_alias(clk_alias, dev_name(&od->pdev->dev), clk_name, NULL); + if (rc) { + if (rc == -ENODEV || rc == -ENOMEM) + dev_err(&od->pdev->dev, + "clkdev_alloc for %s failed\n", clk_alias); + else + dev_err(&od->pdev->dev, + "clk_get for %s failed\n", clk_name); } - - l = clkdev_alloc(r, clk_alias, dev_name(&od->pdev->dev)); - if (!l) { - dev_err(&od->pdev->dev, - "clkdev_alloc for %s failed\n", clk_alias); - return; - } - - clkdev_add(l); } /** @@ -690,11 +684,8 @@ struct dev_pm_domain omap_device_pm_domain = { SET_RUNTIME_PM_OPS(_od_runtime_suspend, _od_runtime_resume, NULL) USE_PLATFORM_PM_SLEEP_OPS - .suspend_noirq = _od_suspend_noirq, - .resume_noirq = _od_resume_noirq, - .freeze_noirq = _od_suspend_noirq, - .thaw_noirq = _od_resume_noirq, - .restore_noirq = _od_resume_noirq, + SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(_od_suspend_noirq, + _od_resume_noirq) } }; diff --git a/arch/arm/mach-omap2/omap_hwmod_7xx_data.c b/arch/arm/mach-omap2/omap_hwmod_7xx_data.c index a0411f32e8b1..2606c6608bd8 100644 --- a/arch/arm/mach-omap2/omap_hwmod_7xx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_7xx_data.c @@ -49,6 +49,27 @@ */ /* + * 'dmm' class + * instance(s): dmm + */ +static struct omap_hwmod_class dra7xx_dmm_hwmod_class = { + .name = "dmm", +}; + +/* dmm */ +static struct omap_hwmod dra7xx_dmm_hwmod = { + .name = "dmm", + .class = &dra7xx_dmm_hwmod_class, + .clkdm_name = "emif_clkdm", + .prcm = { + .omap4 = { + .clkctrl_offs = DRA7XX_CM_EMIF_DMM_CLKCTRL_OFFSET, + .context_offs = DRA7XX_RM_EMIF_DMM_CONTEXT_OFFSET, + }, + }, +}; + +/* * 'l3' class * instance(s): l3_instr, l3_main_1, l3_main_2 */ @@ -438,6 +459,7 @@ static struct omap_hwmod_opt_clk dss_opt_clks[] = { { .role = "video2_clk", .clk = "dss_video2_clk" }, { .role = "video1_clk", .clk = "dss_video1_clk" }, { .role = "hdmi_clk", .clk = "dss_hdmi_clk" }, + { .role = "hdcp_clk", .clk = "dss_deshdcp_clk" }, }; static struct omap_hwmod dra7xx_dss_hwmod = { @@ -500,6 +522,7 @@ static struct omap_hwmod dra7xx_dss_dispc_hwmod = { }, }, .dev_attr = &dss_dispc_dev_attr, + .parent_hwmod = &dra7xx_dss_hwmod, }; /* @@ -541,6 +564,7 @@ static struct omap_hwmod dra7xx_dss_hdmi_hwmod = { }, .opt_clks = dss_hdmi_opt_clks, .opt_clks_cnt = ARRAY_SIZE(dss_hdmi_opt_clks), + .parent_hwmod = &dra7xx_dss_hwmod, }; /* @@ -2321,6 +2345,14 @@ static struct omap_hwmod dra7xx_wd_timer2_hwmod = { * Interfaces */ +/* l3_main_1 -> dmm */ +static struct omap_hwmod_ocp_if dra7xx_l3_main_1__dmm = { + .master = &dra7xx_l3_main_1_hwmod, + .slave = &dra7xx_dmm_hwmod, + .clk = "l3_iclk_div", + .user = OCP_USER_SDMA, +}; + /* l3_main_2 -> l3_instr */ static struct omap_hwmod_ocp_if dra7xx_l3_main_2__l3_instr = { .master = &dra7xx_l3_main_2_hwmod, @@ -3289,6 +3321,7 @@ static struct omap_hwmod_ocp_if dra7xx_l4_wkup__wd_timer2 = { }; static struct omap_hwmod_ocp_if *dra7xx_hwmod_ocp_ifs[] __initdata = { + &dra7xx_l3_main_1__dmm, &dra7xx_l3_main_2__l3_instr, &dra7xx_l4_cfg__l3_main_1, &dra7xx_mpu__l3_main_1, diff --git a/arch/arm/mach-pxa/eseries.c b/arch/arm/mach-pxa/eseries.c index 11863be59066..16dc95f68125 100644 --- a/arch/arm/mach-pxa/eseries.c +++ b/arch/arm/mach-pxa/eseries.c @@ -10,6 +10,7 @@ * */ +#include <linux/clkdev.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/clk-provider.h> diff --git a/arch/arm/mach-pxa/lubbock.c b/arch/arm/mach-pxa/lubbock.c index 2d4bf1fb7312..6de32fa0e251 100644 --- a/arch/arm/mach-pxa/lubbock.c +++ b/arch/arm/mach-pxa/lubbock.c @@ -11,6 +11,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#include <linux/clkdev.h> #include <linux/gpio.h> #include <linux/gpio/machine.h> #include <linux/module.h> diff --git a/arch/arm/mach-pxa/tosa.c b/arch/arm/mach-pxa/tosa.c index 93bf4ef44d2c..e6e27c0468e4 100644 --- a/arch/arm/mach-pxa/tosa.c +++ b/arch/arm/mach-pxa/tosa.c @@ -12,6 +12,7 @@ * */ +#include <linux/clkdev.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/platform_device.h> diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c index c309593abdb2..44575edc44b1 100644 --- a/arch/arm/mach-realview/core.c +++ b/arch/arm/mach-realview/core.c @@ -35,20 +35,19 @@ #include <linux/mtd/physmap.h> #include <linux/memblock.h> +#include <clocksource/timer-sp804.h> + #include <mach/hardware.h> #include <asm/irq.h> #include <asm/mach-types.h> -#include <asm/hardware/arm_timer.h> #include <asm/hardware/icst.h> #include <asm/mach/arch.h> #include <asm/mach/irq.h> #include <asm/mach/map.h> - #include <mach/platform.h> #include <mach/irqs.h> -#include <asm/hardware/timer-sp.h> #include <plat/sched_clock.h> @@ -381,10 +380,10 @@ void __init realview_timer_init(unsigned int timer_irq) /* * Initialise to a known state (all timers off) */ - writel(0, timer0_va_base + TIMER_CTRL); - writel(0, timer1_va_base + TIMER_CTRL); - writel(0, timer2_va_base + TIMER_CTRL); - writel(0, timer3_va_base + TIMER_CTRL); + sp804_timer_disable(timer0_va_base); + sp804_timer_disable(timer1_va_base); + sp804_timer_disable(timer2_va_base); + sp804_timer_disable(timer3_va_base); sp804_clocksource_init(timer3_va_base, "timer3"); sp804_clockevents_init(timer0_va_base, timer_irq, "timer0"); diff --git a/arch/arm/mach-rockchip/platsmp.c b/arch/arm/mach-rockchip/platsmp.c index 2e6ab67e2284..8fcec1cc101e 100644 --- a/arch/arm/mach-rockchip/platsmp.c +++ b/arch/arm/mach-rockchip/platsmp.c @@ -119,8 +119,7 @@ static int pmu_set_power_domain(int pd, bool on) * Handling of CPU cores */ -static int __cpuinit rockchip_boot_secondary(unsigned int cpu, - struct task_struct *idle) +static int rockchip_boot_secondary(unsigned int cpu, struct task_struct *idle) { int ret; diff --git a/arch/arm/mach-sa1100/Makefile b/arch/arm/mach-sa1100/Makefile index 61ff91e76e0a..ebc4d58e1a32 100644 --- a/arch/arm/mach-sa1100/Makefile +++ b/arch/arm/mach-sa1100/Makefile @@ -3,7 +3,7 @@ # # Common support -obj-y := clock.o generic.o irq.o #nmi-oopser.o +obj-y := clock.o generic.o #nmi-oopser.o # Specific board support obj-$(CONFIG_SA1100_ASSABET) += assabet.o diff --git a/arch/arm/mach-sa1100/generic.c b/arch/arm/mach-sa1100/generic.c index 40e0d8619a2d..345e63f4eb71 100644 --- a/arch/arm/mach-sa1100/generic.c +++ b/arch/arm/mach-sa1100/generic.c @@ -20,9 +20,12 @@ #include <linux/ioport.h> #include <linux/platform_device.h> #include <linux/reboot.h> +#include <linux/irqchip/irq-sa11x0.h> #include <video/sa1100fb.h> +#include <soc/sa1100/pwer.h> + #include <asm/div64.h> #include <asm/mach/map.h> #include <asm/mach/flash.h> @@ -375,6 +378,18 @@ void __init sa1100_timer_init(void) pxa_timer_nodt_init(IRQ_OST0, io_p2v(0x90000000), 3686400); } +static struct resource irq_resource = + DEFINE_RES_MEM_NAMED(0x90050000, SZ_64K, "irqs"); + +void __init sa1100_init_irq(void) +{ + request_resource(&iomem_resource, &irq_resource); + + sa11x0_init_irq_nodt(IRQ_GPIO0_SC, irq_resource.start); + + sa1100_init_gpio(); +} + /* * Disable the memory bus request/grant signals on the SA1110 to * ensure that we don't receive spurious memory requests. We set @@ -416,3 +431,25 @@ void sa1110_mb_enable(void) local_irq_restore(flags); } +int sa11x0_gpio_set_wake(unsigned int gpio, unsigned int on) +{ + if (on) + PWER |= BIT(gpio); + else + PWER &= ~BIT(gpio); + + return 0; +} + +int sa11x0_sc_set_wake(unsigned int irq, unsigned int on) +{ + if (BIT(irq) != IC_RTCAlrm) + return -EINVAL; + + if (on) + PWER |= PWER_RTC; + else + PWER &= ~PWER_RTC; + + return 0; +} diff --git a/arch/arm/mach-sa1100/irq.c b/arch/arm/mach-sa1100/irq.c deleted file mode 100644 index 65aebfa66fe5..000000000000 --- a/arch/arm/mach-sa1100/irq.c +++ /dev/null @@ -1,178 +0,0 @@ -/* - * linux/arch/arm/mach-sa1100/irq.c - * - * Copyright (C) 1999-2001 Nicolas Pitre - * - * Generic IRQ handling for the SA11x0, GPIO 11-27 IRQ demultiplexing. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include <linux/init.h> -#include <linux/module.h> -#include <linux/interrupt.h> -#include <linux/io.h> -#include <linux/irq.h> -#include <linux/irqdomain.h> -#include <linux/ioport.h> -#include <linux/syscore_ops.h> - -#include <mach/hardware.h> -#include <mach/irqs.h> -#include <asm/mach/irq.h> -#include <asm/exception.h> - -#include "generic.h" - - -/* - * We don't need to ACK IRQs on the SA1100 unless they're GPIOs - * this is for internal IRQs i.e. from IRQ LCD to RTCAlrm. - */ -static void sa1100_mask_irq(struct irq_data *d) -{ - ICMR &= ~BIT(d->hwirq); -} - -static void sa1100_unmask_irq(struct irq_data *d) -{ - ICMR |= BIT(d->hwirq); -} - -/* - * Apart form GPIOs, only the RTC alarm can be a wakeup event. - */ -static int sa1100_set_wake(struct irq_data *d, unsigned int on) -{ - if (BIT(d->hwirq) == IC_RTCAlrm) { - if (on) - PWER |= PWER_RTC; - else - PWER &= ~PWER_RTC; - return 0; - } - return -EINVAL; -} - -static struct irq_chip sa1100_normal_chip = { - .name = "SC", - .irq_ack = sa1100_mask_irq, - .irq_mask = sa1100_mask_irq, - .irq_unmask = sa1100_unmask_irq, - .irq_set_wake = sa1100_set_wake, -}; - -static int sa1100_normal_irqdomain_map(struct irq_domain *d, - unsigned int irq, irq_hw_number_t hwirq) -{ - irq_set_chip_and_handler(irq, &sa1100_normal_chip, - handle_level_irq); - set_irq_flags(irq, IRQF_VALID); - - return 0; -} - -static struct irq_domain_ops sa1100_normal_irqdomain_ops = { - .map = sa1100_normal_irqdomain_map, - .xlate = irq_domain_xlate_onetwocell, -}; - -static struct irq_domain *sa1100_normal_irqdomain; - -static struct resource irq_resource = - DEFINE_RES_MEM_NAMED(0x90050000, SZ_64K, "irqs"); - -static struct sa1100irq_state { - unsigned int saved; - unsigned int icmr; - unsigned int iclr; - unsigned int iccr; -} sa1100irq_state; - -static int sa1100irq_suspend(void) -{ - struct sa1100irq_state *st = &sa1100irq_state; - - st->saved = 1; - st->icmr = ICMR; - st->iclr = ICLR; - st->iccr = ICCR; - - /* - * Disable all GPIO-based interrupts. - */ - ICMR &= ~(IC_GPIO11_27|IC_GPIO10|IC_GPIO9|IC_GPIO8|IC_GPIO7| - IC_GPIO6|IC_GPIO5|IC_GPIO4|IC_GPIO3|IC_GPIO2| - IC_GPIO1|IC_GPIO0); - - return 0; -} - -static void sa1100irq_resume(void) -{ - struct sa1100irq_state *st = &sa1100irq_state; - - if (st->saved) { - ICCR = st->iccr; - ICLR = st->iclr; - - ICMR = st->icmr; - } -} - -static struct syscore_ops sa1100irq_syscore_ops = { - .suspend = sa1100irq_suspend, - .resume = sa1100irq_resume, -}; - -static int __init sa1100irq_init_devicefs(void) -{ - register_syscore_ops(&sa1100irq_syscore_ops); - return 0; -} - -device_initcall(sa1100irq_init_devicefs); - -static asmlinkage void __exception_irq_entry -sa1100_handle_irq(struct pt_regs *regs) -{ - uint32_t icip, icmr, mask; - - do { - icip = (ICIP); - icmr = (ICMR); - mask = icip & icmr; - - if (mask == 0) - break; - - handle_domain_irq(sa1100_normal_irqdomain, - ffs(mask) - 1, regs); - } while (1); -} - -void __init sa1100_init_irq(void) -{ - request_resource(&iomem_resource, &irq_resource); - - /* disable all IRQs */ - ICMR = 0; - - /* all IRQs are IRQ, not FIQ */ - ICLR = 0; - - /* - * Whatever the doc says, this has to be set for the wait-on-irq - * instruction to work... on a SA1100 rev 9 at least. - */ - ICCR = 1; - - sa1100_normal_irqdomain = irq_domain_add_simple(NULL, - 32, IRQ_GPIO0_SC, - &sa1100_normal_irqdomain_ops, NULL); - - set_handle_irq(sa1100_handle_irq); - - sa1100_init_gpio(); -} diff --git a/arch/arm/mach-sa1100/neponset.c b/arch/arm/mach-sa1100/neponset.c index af868d258e66..99d9a3b1bf34 100644 --- a/arch/arm/mach-sa1100/neponset.c +++ b/arch/arm/mach-sa1100/neponset.c @@ -327,8 +327,7 @@ static int neponset_probe(struct platform_device *dev) irq_set_chip(d->irq_base + NEP_IRQ_SA1111, &nochip); irq_set_irq_type(irq, IRQ_TYPE_EDGE_RISING); - irq_set_handler_data(irq, d); - irq_set_chained_handler(irq, neponset_irq_handler); + irq_set_chained_handler_and_data(irq, neponset_irq_handler, d); /* * We would set IRQ_GPIO25 to be a wake-up IRQ, but unfortunately diff --git a/arch/arm/mach-shmobile/setup-r8a7740.c b/arch/arm/mach-shmobile/setup-r8a7740.c index 9832e48396a4..00291cc1772d 100644 --- a/arch/arm/mach-shmobile/setup-r8a7740.c +++ b/arch/arm/mach-shmobile/setup-r8a7740.c @@ -13,7 +13,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. */ -#include <linux/delay.h> #include <linux/dma-mapping.h> #include <linux/kernel.h> #include <linux/init.h> @@ -690,56 +689,6 @@ void __init r8a7740_meram_workaround(void) } } -#define ICCR 0x0004 -#define ICSTART 0x0070 - -#define i2c_read(reg, offset) ioread8(reg + offset) -#define i2c_write(reg, offset, data) iowrite8(data, reg + offset) - -/* - * r8a7740 chip has lasting errata on I2C I/O pad reset. - * this is work-around for it. - */ -static void r8a7740_i2c_workaround(struct platform_device *pdev) -{ - struct resource *res; - void __iomem *reg; - - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (unlikely(!res)) { - pr_err("r8a7740 i2c workaround fail (cannot find resource)\n"); - return; - } - - reg = ioremap(res->start, resource_size(res)); - if (unlikely(!reg)) { - pr_err("r8a7740 i2c workaround fail (cannot map IO)\n"); - return; - } - - i2c_write(reg, ICCR, i2c_read(reg, ICCR) | 0x80); - i2c_read(reg, ICCR); /* dummy read */ - - i2c_write(reg, ICSTART, i2c_read(reg, ICSTART) | 0x10); - i2c_read(reg, ICSTART); /* dummy read */ - - udelay(10); - - i2c_write(reg, ICCR, 0x01); - i2c_write(reg, ICSTART, 0x00); - - udelay(10); - - i2c_write(reg, ICCR, 0x10); - udelay(10); - i2c_write(reg, ICCR, 0x00); - udelay(10); - i2c_write(reg, ICCR, 0x10); - udelay(10); - - iounmap(reg); -} - void __init r8a7740_add_standard_devices(void) { static struct pm_domain_device domain_devices[] __initdata = { @@ -766,10 +715,6 @@ void __init r8a7740_add_standard_devices(void) { "A3SP", &usb_dma_device }, }; - /* I2C work-around */ - r8a7740_i2c_workaround(&i2c0_device); - r8a7740_i2c_workaround(&i2c1_device); - r8a7740_init_pm_domains(); /* add devices */ diff --git a/arch/arm/mach-socfpga/pm.c b/arch/arm/mach-socfpga/pm.c index 1ed89fc2b7a8..6a4199f2bffb 100644 --- a/arch/arm/mach-socfpga/pm.c +++ b/arch/arm/mach-socfpga/pm.c @@ -56,7 +56,7 @@ static int socfpga_setup_ocram_self_refresh(void) goto put_node; } - ocram_pool = dev_get_gen_pool(&pdev->dev); + ocram_pool = gen_pool_get(&pdev->dev); if (!ocram_pool) { pr_warn("%s: ocram pool unavailable!\n", __func__); ret = -ENODEV; diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c index 6ea09fe53426..23a04fe5d2ad 100644 --- a/arch/arm/mach-versatile/core.c +++ b/arch/arm/mach-versatile/core.c @@ -41,8 +41,9 @@ #include <linux/bitops.h> #include <linux/reboot.h> +#include <clocksource/timer-sp804.h> + #include <asm/irq.h> -#include <asm/hardware/arm_timer.h> #include <asm/hardware/icst.h> #include <asm/mach-types.h> @@ -52,7 +53,6 @@ #include <asm/mach/map.h> #include <mach/hardware.h> #include <mach/platform.h> -#include <asm/hardware/timer-sp.h> #include <plat/sched_clock.h> @@ -798,10 +798,10 @@ void __init versatile_timer_init(void) /* * Initialise to a known state (all timers off) */ - writel(0, TIMER0_VA_BASE + TIMER_CTRL); - writel(0, TIMER1_VA_BASE + TIMER_CTRL); - writel(0, TIMER2_VA_BASE + TIMER_CTRL); - writel(0, TIMER3_VA_BASE + TIMER_CTRL); + sp804_timer_disable(TIMER0_VA_BASE); + sp804_timer_disable(TIMER1_VA_BASE); + sp804_timer_disable(TIMER2_VA_BASE); + sp804_timer_disable(TIMER3_VA_BASE); sp804_clocksource_init(TIMER3_VA_BASE, "timer3"); sp804_clockevents_init(TIMER0_VA_BASE, IRQ_TIMERINT0_1, "timer0"); diff --git a/arch/arm/mach-vexpress/spc.c b/arch/arm/mach-vexpress/spc.c index f61158c6ce71..5766ce2be32b 100644 --- a/arch/arm/mach-vexpress/spc.c +++ b/arch/arm/mach-vexpress/spc.c @@ -589,4 +589,4 @@ static int __init ve_spc_clk_init(void) platform_device_register_simple("vexpress-spc-cpufreq", -1, NULL, 0); return 0; } -module_init(ve_spc_clk_init); +device_initcall(ve_spc_clk_init); diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index b4f92b9a13ac..7c6b976ab8d3 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -6,7 +6,7 @@ comment "Processor Type" # ARM7TDMI config CPU_ARM7TDMI - bool "Support ARM7TDMI processor" + bool depends on !MMU select CPU_32v4T select CPU_ABRT_LV4T @@ -56,7 +56,7 @@ config CPU_ARM740T # ARM9TDMI config CPU_ARM9TDMI - bool "Support ARM9TDMI processor" + bool depends on !MMU select CPU_32v4T select CPU_ABRT_NOMMU @@ -604,6 +604,22 @@ config CPU_USE_DOMAINS This option enables or disables the use of domain switching via the set_fs() function. +config CPU_V7M_NUM_IRQ + int "Number of external interrupts connected to the NVIC" + depends on CPU_V7M + default 90 if ARCH_STM32 + default 38 if ARCH_EFM32 + default 112 if SOC_VF610 + default 240 + help + This option indicates the number of interrupts connected to the NVIC. + The value can be larger than the real number of interrupts supported + by the system, but must not be lower. + The default value is 240, corresponding to the maximum number of + interrupts supported by the NVIC on Cortex-M family. + + If unsure, keep default value. + # # CPU supports 36-bit I/O # @@ -624,6 +640,10 @@ config ARM_LPAE If unsure, say N. +config ARM_PV_FIXUP + def_bool y + depends on ARM_LPAE && ARM_PATCH_PHYS_VIRT && ARCH_KEYSTONE + config ARCH_PHYS_ADDR_T_64BIT def_bool ARM_LPAE diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile index d3afdf9eb65a..57c8df500e8c 100644 --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -18,6 +18,7 @@ obj-$(CONFIG_MODULES) += proc-syms.o obj-$(CONFIG_ALIGNMENT_TRAP) += alignment.o obj-$(CONFIG_HIGHMEM) += highmem.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o +obj-$(CONFIG_ARM_PV_FIXUP) += pv-fixup-asm.o obj-$(CONFIG_CPU_ABRT_NOMMU) += abort-nommu.o obj-$(CONFIG_CPU_ABRT_EV4) += abort-ev4.o @@ -55,6 +56,8 @@ obj-$(CONFIG_CPU_XSCALE) += copypage-xscale.o obj-$(CONFIG_CPU_XSC3) += copypage-xsc3.o obj-$(CONFIG_CPU_COPY_FA) += copypage-fa.o +CFLAGS_copypage-feroceon.o := -march=armv5te + obj-$(CONFIG_CPU_TLB_V4WT) += tlb-v4.o obj-$(CONFIG_CPU_TLB_V4WB) += tlb-v4wb.o obj-$(CONFIG_CPU_TLB_V4WBI) += tlb-v4wbi.o diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index e309c8f35af5..71b3d3309024 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -38,10 +38,11 @@ struct l2c_init_data { unsigned way_size_0; unsigned num_lock; void (*of_parse)(const struct device_node *, u32 *, u32 *); - void (*enable)(void __iomem *, u32, unsigned); + void (*enable)(void __iomem *, unsigned); void (*fixup)(void __iomem *, u32, struct outer_cache_fns *); void (*save)(void __iomem *); void (*configure)(void __iomem *); + void (*unlock)(void __iomem *, unsigned); struct outer_cache_fns outer_cache; }; @@ -110,14 +111,6 @@ static inline void l2c_unlock(void __iomem *base, unsigned num) static void l2c_configure(void __iomem *base) { - if (outer_cache.configure) { - outer_cache.configure(&l2x0_saved_regs); - return; - } - - if (l2x0_data->configure) - l2x0_data->configure(base); - l2c_write_sec(l2x0_saved_regs.aux_ctrl, base, L2X0_AUX_CTRL); } @@ -125,18 +118,16 @@ static void l2c_configure(void __iomem *base) * Enable the L2 cache controller. This function must only be * called when the cache controller is known to be disabled. */ -static void l2c_enable(void __iomem *base, u32 aux, unsigned num_lock) +static void l2c_enable(void __iomem *base, unsigned num_lock) { unsigned long flags; - /* Do not touch the controller if already enabled. */ - if (readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN) - return; - - l2x0_saved_regs.aux_ctrl = aux; - l2c_configure(base); + if (outer_cache.configure) + outer_cache.configure(&l2x0_saved_regs); + else + l2x0_data->configure(base); - l2c_unlock(base, num_lock); + l2x0_data->unlock(base, num_lock); local_irq_save(flags); __l2c_op_way(base + L2X0_INV_WAY); @@ -163,7 +154,11 @@ static void l2c_save(void __iomem *base) static void l2c_resume(void) { - l2c_enable(l2x0_base, l2x0_saved_regs.aux_ctrl, l2x0_data->num_lock); + void __iomem *base = l2x0_base; + + /* Do not touch the controller if already enabled. */ + if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) + l2c_enable(base, l2x0_data->num_lock); } /* @@ -252,6 +247,8 @@ static const struct l2c_init_data l2c210_data __initconst = { .num_lock = 1, .enable = l2c_enable, .save = l2c_save, + .configure = l2c_configure, + .unlock = l2c_unlock, .outer_cache = { .inv_range = l2c210_inv_range, .clean_range = l2c210_clean_range, @@ -391,16 +388,22 @@ static void l2c220_sync(void) raw_spin_unlock_irqrestore(&l2x0_lock, flags); } -static void l2c220_enable(void __iomem *base, u32 aux, unsigned num_lock) +static void l2c220_enable(void __iomem *base, unsigned num_lock) { /* * Always enable non-secure access to the lockdown registers - * we write to them as part of the L2C enable sequence so they * need to be accessible. */ - aux |= L220_AUX_CTRL_NS_LOCKDOWN; + l2x0_saved_regs.aux_ctrl |= L220_AUX_CTRL_NS_LOCKDOWN; - l2c_enable(base, aux, num_lock); + l2c_enable(base, num_lock); +} + +static void l2c220_unlock(void __iomem *base, unsigned num_lock) +{ + if (readl_relaxed(base + L2X0_AUX_CTRL) & L220_AUX_CTRL_NS_LOCKDOWN) + l2c_unlock(base, num_lock); } static const struct l2c_init_data l2c220_data = { @@ -409,6 +412,8 @@ static const struct l2c_init_data l2c220_data = { .num_lock = 1, .enable = l2c220_enable, .save = l2c_save, + .configure = l2c_configure, + .unlock = l2c220_unlock, .outer_cache = { .inv_range = l2c220_inv_range, .clean_range = l2c220_clean_range, @@ -569,6 +574,8 @@ static void l2c310_configure(void __iomem *base) { unsigned revision; + l2c_configure(base); + /* restore pl310 setup */ l2c_write_sec(l2x0_saved_regs.tag_latency, base, L310_TAG_LATENCY_CTRL); @@ -603,10 +610,11 @@ static int l2c310_cpu_enable_flz(struct notifier_block *nb, unsigned long act, v return NOTIFY_OK; } -static void __init l2c310_enable(void __iomem *base, u32 aux, unsigned num_lock) +static void __init l2c310_enable(void __iomem *base, unsigned num_lock) { unsigned rev = readl_relaxed(base + L2X0_CACHE_ID) & L2X0_CACHE_ID_RTL_MASK; bool cortex_a9 = read_cpuid_part() == ARM_CPU_PART_CORTEX_A9; + u32 aux = l2x0_saved_regs.aux_ctrl; if (rev >= L310_CACHE_ID_RTL_R2P0) { if (cortex_a9) { @@ -649,9 +657,9 @@ static void __init l2c310_enable(void __iomem *base, u32 aux, unsigned num_lock) * we write to them as part of the L2C enable sequence so they * need to be accessible. */ - aux |= L310_AUX_CTRL_NS_LOCKDOWN; + l2x0_saved_regs.aux_ctrl = aux | L310_AUX_CTRL_NS_LOCKDOWN; - l2c_enable(base, aux, num_lock); + l2c_enable(base, num_lock); /* Read back resulting AUX_CTRL value as it could have been altered. */ aux = readl_relaxed(base + L2X0_AUX_CTRL); @@ -755,6 +763,12 @@ static void l2c310_resume(void) set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1)); } +static void l2c310_unlock(void __iomem *base, unsigned num_lock) +{ + if (readl_relaxed(base + L2X0_AUX_CTRL) & L310_AUX_CTRL_NS_LOCKDOWN) + l2c_unlock(base, num_lock); +} + static const struct l2c_init_data l2c310_init_fns __initconst = { .type = "L2C-310", .way_size_0 = SZ_8K, @@ -763,6 +777,7 @@ static const struct l2c_init_data l2c310_init_fns __initconst = { .fixup = l2c310_fixup, .save = l2c310_save, .configure = l2c310_configure, + .unlock = l2c310_unlock, .outer_cache = { .inv_range = l2c210_inv_range, .clean_range = l2c210_clean_range, @@ -856,8 +871,11 @@ static int __init __l2c_init(const struct l2c_init_data *data, * Check if l2x0 controller is already enabled. If we are booting * in non-secure mode accessing the below registers will fault. */ - if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) - data->enable(l2x0_base, aux, data->num_lock); + if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) { + l2x0_saved_regs.aux_ctrl = aux; + + data->enable(l2x0_base, data->num_lock); + } outer_cache = fns; @@ -1066,6 +1084,8 @@ static const struct l2c_init_data of_l2c210_data __initconst = { .of_parse = l2x0_of_parse, .enable = l2c_enable, .save = l2c_save, + .configure = l2c_configure, + .unlock = l2c_unlock, .outer_cache = { .inv_range = l2c210_inv_range, .clean_range = l2c210_clean_range, @@ -1084,6 +1104,8 @@ static const struct l2c_init_data of_l2c220_data __initconst = { .of_parse = l2x0_of_parse, .enable = l2c220_enable, .save = l2c_save, + .configure = l2c_configure, + .unlock = l2c220_unlock, .outer_cache = { .inv_range = l2c220_inv_range, .clean_range = l2c220_clean_range, @@ -1199,6 +1221,26 @@ static void __init l2c310_of_parse(const struct device_node *np, pr_err("L2C-310 OF arm,prefetch-offset property value is missing\n"); } + ret = of_property_read_u32(np, "prefetch-data", &val); + if (ret == 0) { + if (val) + prefetch |= L310_PREFETCH_CTRL_DATA_PREFETCH; + else + prefetch &= ~L310_PREFETCH_CTRL_DATA_PREFETCH; + } else if (ret != -EINVAL) { + pr_err("L2C-310 OF prefetch-data property value is missing\n"); + } + + ret = of_property_read_u32(np, "prefetch-instr", &val); + if (ret == 0) { + if (val) + prefetch |= L310_PREFETCH_CTRL_INSTR_PREFETCH; + else + prefetch &= ~L310_PREFETCH_CTRL_INSTR_PREFETCH; + } else if (ret != -EINVAL) { + pr_err("L2C-310 OF prefetch-instr property value is missing\n"); + } + l2x0_saved_regs.prefetch_ctrl = prefetch; } @@ -1211,6 +1253,7 @@ static const struct l2c_init_data of_l2c310_data __initconst = { .fixup = l2c310_fixup, .save = l2c310_save, .configure = l2c310_configure, + .unlock = l2c310_unlock, .outer_cache = { .inv_range = l2c210_inv_range, .clean_range = l2c210_clean_range, @@ -1240,6 +1283,7 @@ static const struct l2c_init_data of_l2c310_coherent_data __initconst = { .fixup = l2c310_fixup, .save = l2c310_save, .configure = l2c310_configure, + .unlock = l2c310_unlock, .outer_cache = { .inv_range = l2c210_inv_range, .clean_range = l2c210_clean_range, @@ -1366,7 +1410,7 @@ static void aurora_save(void __iomem *base) * For Aurora cache in no outer mode, enable via the CP15 coprocessor * broadcasting of cache commands to L2. */ -static void __init aurora_enable_no_outer(void __iomem *base, u32 aux, +static void __init aurora_enable_no_outer(void __iomem *base, unsigned num_lock) { u32 u; @@ -1377,7 +1421,7 @@ static void __init aurora_enable_no_outer(void __iomem *base, u32 aux, isb(); - l2c_enable(base, aux, num_lock); + l2c_enable(base, num_lock); } static void __init aurora_fixup(void __iomem *base, u32 cache_id, @@ -1416,6 +1460,8 @@ static const struct l2c_init_data of_aurora_with_outer_data __initconst = { .enable = l2c_enable, .fixup = aurora_fixup, .save = aurora_save, + .configure = l2c_configure, + .unlock = l2c_unlock, .outer_cache = { .inv_range = aurora_inv_range, .clean_range = aurora_clean_range, @@ -1435,6 +1481,8 @@ static const struct l2c_init_data of_aurora_no_outer_data __initconst = { .enable = aurora_enable_no_outer, .fixup = aurora_fixup, .save = aurora_save, + .configure = l2c_configure, + .unlock = l2c_unlock, .outer_cache = { .resume = l2c_resume, }, @@ -1585,6 +1633,7 @@ static const struct l2c_init_data of_bcm_l2x0_data __initconst = { .enable = l2c310_enable, .save = l2c310_save, .configure = l2c310_configure, + .unlock = l2c310_unlock, .outer_cache = { .inv_range = bcm_inv_range, .clean_range = bcm_clean_range, @@ -1608,6 +1657,7 @@ static void __init tauros3_save(void __iomem *base) static void tauros3_configure(void __iomem *base) { + l2c_configure(base); writel_relaxed(l2x0_saved_regs.aux2_ctrl, base + TAUROS3_AUX2_CTRL); writel_relaxed(l2x0_saved_regs.prefetch_ctrl, @@ -1621,6 +1671,7 @@ static const struct l2c_init_data of_tauros3_data __initconst = { .enable = l2c_enable, .save = tauros3_save, .configure = tauros3_configure, + .unlock = l2c_unlock, /* Tauros3 broadcasts L1 cache operations to L2 */ .outer_cache = { .resume = l2c_resume, diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 7e7583ddd607..1ced8a0f7a52 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -148,11 +148,14 @@ static void *arm_coherent_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs); static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle, struct dma_attrs *attrs); +static int arm_coherent_dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + struct dma_attrs *attrs); struct dma_map_ops arm_coherent_dma_ops = { .alloc = arm_coherent_dma_alloc, .free = arm_coherent_dma_free, - .mmap = arm_dma_mmap, + .mmap = arm_coherent_dma_mmap, .get_sgtable = arm_dma_get_sgtable, .map_page = arm_coherent_dma_map_page, .map_sg = arm_dma_map_sg, @@ -690,10 +693,7 @@ static void *arm_coherent_dma_alloc(struct device *dev, size_t size, attrs, __builtin_return_address(0)); } -/* - * Create userspace mapping for the DMA-coherent memory. - */ -int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, +static int __arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, struct dma_attrs *attrs) { @@ -704,8 +704,6 @@ int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, unsigned long pfn = dma_to_pfn(dev, dma_addr); unsigned long off = vma->vm_pgoff; - vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); - if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) return ret; @@ -721,6 +719,26 @@ int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, } /* + * Create userspace mapping for the DMA-coherent memory. + */ +static int arm_coherent_dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + struct dma_attrs *attrs) +{ + return __arm_dma_mmap(dev, vma, cpu_addr, dma_addr, size, attrs); +} + +int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + struct dma_attrs *attrs) +{ +#ifdef CONFIG_MMU + vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); +#endif /* CONFIG_MMU */ + return __arm_dma_mmap(dev, vma, cpu_addr, dma_addr, size, attrs); +} + +/* * Free a buffer as defined by the above mapping. */ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr, diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 6333d9c17875..0d629b8f973f 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -276,7 +276,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_atomic() || !mm) + if (faulthandler_disabled() || !mm) goto no_context; if (user_mode(regs)) diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c index b98895d9fe57..ee8dfa793989 100644 --- a/arch/arm/mm/highmem.c +++ b/arch/arm/mm/highmem.c @@ -59,6 +59,7 @@ void *kmap_atomic(struct page *page) void *kmap; int type; + preempt_disable(); pagefault_disable(); if (!PageHighMem(page)) return page_address(page); @@ -121,6 +122,7 @@ void __kunmap_atomic(void *kvaddr) kunmap_high(pte_page(pkmap_page_table[PKMAP_NR(vaddr)])); } pagefault_enable(); + preempt_enable(); } EXPORT_SYMBOL(__kunmap_atomic); @@ -130,6 +132,7 @@ void *kmap_atomic_pfn(unsigned long pfn) int idx, type; struct page *page = pfn_to_page(pfn); + preempt_disable(); pagefault_disable(); if (!PageHighMem(page)) return page_address(page); diff --git a/arch/arm/mm/hugetlbpage.c b/arch/arm/mm/hugetlbpage.c index c72412415093..fcafb521f14e 100644 --- a/arch/arm/mm/hugetlbpage.c +++ b/arch/arm/mm/hugetlbpage.c @@ -41,11 +41,6 @@ int pud_huge(pud_t pud) return 0; } -int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) -{ - return 0; -} - int pmd_huge(pmd_t pmd) { return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT); diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index be92fa0f2f35..8a63b4cdc0f2 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -268,6 +268,7 @@ void __init arm_memblock_init(const struct machine_desc *mdesc) if (mdesc->reserve) mdesc->reserve(); + early_init_fdt_reserve_self(); early_init_fdt_scan_reserved_mem(); /* reserve memory for DMA contiguous allocations */ diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 7186382672b5..6ca7d9aa896f 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1387,123 +1387,98 @@ static void __init map_lowmem(void) } } -#ifdef CONFIG_ARM_LPAE +#ifdef CONFIG_ARM_PV_FIXUP +extern unsigned long __atags_pointer; +typedef void pgtables_remap(long long offset, unsigned long pgd, void *bdata); +pgtables_remap lpae_pgtables_remap_asm; + /* * early_paging_init() recreates boot time page table setup, allowing machines * to switch over to a high (>4G) address space on LPAE systems */ -void __init early_paging_init(const struct machine_desc *mdesc, - struct proc_info_list *procinfo) +void __init early_paging_init(const struct machine_desc *mdesc) { - pmdval_t pmdprot = procinfo->__cpu_mm_mmu_flags; - unsigned long map_start, map_end; - pgd_t *pgd0, *pgdk; - pud_t *pud0, *pudk, *pud_start; - pmd_t *pmd0, *pmdk; - phys_addr_t phys; - int i; + pgtables_remap *lpae_pgtables_remap; + unsigned long pa_pgd; + unsigned int cr, ttbcr; + long long offset; + void *boot_data; - if (!(mdesc->init_meminfo)) + if (!mdesc->pv_fixup) return; - /* remap kernel code and data */ - map_start = init_mm.start_code & PMD_MASK; - map_end = ALIGN(init_mm.brk, PMD_SIZE); + offset = mdesc->pv_fixup(); + if (offset == 0) + return; - /* get a handle on things... */ - pgd0 = pgd_offset_k(0); - pud_start = pud0 = pud_offset(pgd0, 0); - pmd0 = pmd_offset(pud0, 0); + /* + * Get the address of the remap function in the 1:1 identity + * mapping setup by the early page table assembly code. We + * must get this prior to the pv update. The following barrier + * ensures that this is complete before we fixup any P:V offsets. + */ + lpae_pgtables_remap = (pgtables_remap *)(unsigned long)__pa(lpae_pgtables_remap_asm); + pa_pgd = __pa(swapper_pg_dir); + boot_data = __va(__atags_pointer); + barrier(); - pgdk = pgd_offset_k(map_start); - pudk = pud_offset(pgdk, map_start); - pmdk = pmd_offset(pudk, map_start); + pr_info("Switching physical address space to 0x%08llx\n", + (u64)PHYS_OFFSET + offset); - mdesc->init_meminfo(); + /* Re-set the phys pfn offset, and the pv offset */ + __pv_offset += offset; + __pv_phys_pfn_offset += PFN_DOWN(offset); /* Run the patch stub to update the constants */ fixup_pv_table(&__pv_table_begin, (&__pv_table_end - &__pv_table_begin) << 2); /* - * Cache cleaning operations for self-modifying code - * We should clean the entries by MVA but running a - * for loop over every pv_table entry pointer would - * just complicate the code. - */ - flush_cache_louis(); - dsb(ishst); - isb(); - - /* - * FIXME: This code is not architecturally compliant: we modify - * the mappings in-place, indeed while they are in use by this - * very same code. This may lead to unpredictable behaviour of - * the CPU. - * - * Even modifying the mappings in a separate page table does - * not resolve this. - * - * The architecture strongly recommends that when a mapping is - * changed, that it is changed by first going via an invalid - * mapping and back to the new mapping. This is to ensure that - * no TLB conflicts (caused by the TLB having more than one TLB - * entry match a translation) can occur. However, doing that - * here will result in unmapping the code we are running. - */ - pr_warn("WARNING: unsafe modification of in-place page tables - tainting kernel\n"); - add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); - - /* - * Remap level 1 table. This changes the physical addresses - * used to refer to the level 2 page tables to the high - * physical address alias, leaving everything else the same. - */ - for (i = 0; i < PTRS_PER_PGD; pud0++, i++) { - set_pud(pud0, - __pud(__pa(pmd0) | PMD_TYPE_TABLE | L_PGD_SWAPPER)); - pmd0 += PTRS_PER_PMD; - } - - /* - * Remap the level 2 table, pointing the mappings at the high - * physical address alias of these pages. - */ - phys = __pa(map_start); - do { - *pmdk++ = __pmd(phys | pmdprot); - phys += PMD_SIZE; - } while (phys < map_end); - - /* - * Ensure that the above updates are flushed out of the cache. - * This is not strictly correct; on a system where the caches - * are coherent with each other, but the MMU page table walks - * may not be coherent, flush_cache_all() may be a no-op, and - * this will fail. + * We changing not only the virtual to physical mapping, but also + * the physical addresses used to access memory. We need to flush + * all levels of cache in the system with caching disabled to + * ensure that all data is written back, and nothing is prefetched + * into the caches. We also need to prevent the TLB walkers + * allocating into the caches too. Note that this is ARMv7 LPAE + * specific. */ + cr = get_cr(); + set_cr(cr & ~(CR_I | CR_C)); + asm("mrc p15, 0, %0, c2, c0, 2" : "=r" (ttbcr)); + asm volatile("mcr p15, 0, %0, c2, c0, 2" + : : "r" (ttbcr & ~(3 << 8 | 3 << 10))); flush_cache_all(); /* - * Re-write the TTBR values to point them at the high physical - * alias of the page tables. We expect __va() will work on - * cpu_get_pgd(), which returns the value of TTBR0. + * Fixup the page tables - this must be in the idmap region as + * we need to disable the MMU to do this safely, and hence it + * needs to be assembly. It's fairly simple, as we're using the + * temporary tables setup by the initial assembly code. */ - cpu_switch_mm(pgd0, &init_mm); - cpu_set_ttbr(1, __pa(pgd0) + TTBR1_OFFSET); + lpae_pgtables_remap(offset, pa_pgd, boot_data); - /* Finally flush any stale TLB values. */ - local_flush_bp_all(); - local_flush_tlb_all(); + /* Re-enable the caches and cacheable TLB walks */ + asm volatile("mcr p15, 0, %0, c2, c0, 2" : : "r" (ttbcr)); + set_cr(cr); } #else -void __init early_paging_init(const struct machine_desc *mdesc, - struct proc_info_list *procinfo) +void __init early_paging_init(const struct machine_desc *mdesc) { - if (mdesc->init_meminfo) - mdesc->init_meminfo(); + long long offset; + + if (!mdesc->pv_fixup) + return; + + offset = mdesc->pv_fixup(); + if (offset == 0) + return; + + pr_crit("Physical address space modification is only to support Keystone2.\n"); + pr_crit("Please enable ARM_LPAE and ARM_PATCH_PHYS_VIRT support to use this\n"); + pr_crit("feature. Your kernel may crash now, have a good day.\n"); + add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); } #endif diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index a014dfacd5ca..afd7e05d95f1 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c @@ -304,15 +304,6 @@ void __init sanity_check_meminfo(void) } /* - * early_paging_init() recreates boot time page table setup, allowing machines - * to switch over to a high (>4G) address space on LPAE systems - */ -void __init early_paging_init(const struct machine_desc *mdesc, - struct proc_info_list *procinfo) -{ -} - -/* * paging_init() sets up the page tables, initialises the zone memory * maps, and sets up the zero page, bad page and bad page tables. */ diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S index 10405b8d31af..c6141a5435c3 100644 --- a/arch/arm/mm/proc-v7-2level.S +++ b/arch/arm/mm/proc-v7-2level.S @@ -36,14 +36,16 @@ * * It is assumed that: * - we are not using split page tables + * + * Note that we always need to flush BTAC/BTB if IBE is set + * even on Cortex-A8 revisions not affected by 430973. + * If IBE is not set, the flush BTAC/BTB won't do anything. */ ENTRY(cpu_ca8_switch_mm) #ifdef CONFIG_MMU mov r2, #0 -#ifdef CONFIG_ARM_ERRATA_430973 mcr p15, 0, r2, c7, c5, 6 @ flush BTAC/BTB #endif -#endif ENTRY(cpu_v7_switch_mm) #ifdef CONFIG_MMU mmid r1, r1 @ get mm->context.id @@ -148,10 +150,10 @@ ENDPROC(cpu_v7_set_pte_ext) * Macro for setting up the TTBRx and TTBCR registers. * - \ttb0 and \ttb1 updated with the corresponding flags. */ - .macro v7_ttb_setup, zero, ttbr0, ttbr1, tmp + .macro v7_ttb_setup, zero, ttbr0l, ttbr0h, ttbr1, tmp mcr p15, 0, \zero, c2, c0, 2 @ TTB control register - ALT_SMP(orr \ttbr0, \ttbr0, #TTB_FLAGS_SMP) - ALT_UP(orr \ttbr0, \ttbr0, #TTB_FLAGS_UP) + ALT_SMP(orr \ttbr0l, \ttbr0l, #TTB_FLAGS_SMP) + ALT_UP(orr \ttbr0l, \ttbr0l, #TTB_FLAGS_UP) ALT_SMP(orr \ttbr1, \ttbr1, #TTB_FLAGS_SMP) ALT_UP(orr \ttbr1, \ttbr1, #TTB_FLAGS_UP) mcr p15, 0, \ttbr1, c2, c0, 1 @ load TTB1 diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S index d3daed0ae0ad..5e5720e8bc5f 100644 --- a/arch/arm/mm/proc-v7-3level.S +++ b/arch/arm/mm/proc-v7-3level.S @@ -126,11 +126,10 @@ ENDPROC(cpu_v7_set_pte_ext) * Macro for setting up the TTBRx and TTBCR registers. * - \ttbr1 updated. */ - .macro v7_ttb_setup, zero, ttbr0, ttbr1, tmp + .macro v7_ttb_setup, zero, ttbr0l, ttbr0h, ttbr1, tmp ldr \tmp, =swapper_pg_dir @ swapper_pg_dir virtual address - mov \tmp, \tmp, lsr #ARCH_PGD_SHIFT - cmp \ttbr1, \tmp @ PHYS_OFFSET > PAGE_OFFSET? - mrc p15, 0, \tmp, c2, c0, 2 @ TTB control register + cmp \ttbr1, \tmp, lsr #12 @ PHYS_OFFSET > PAGE_OFFSET? + mrc p15, 0, \tmp, c2, c0, 2 @ TTB control egister orr \tmp, \tmp, #TTB_EAE ALT_SMP(orr \tmp, \tmp, #TTB_FLAGS_SMP) ALT_UP(orr \tmp, \tmp, #TTB_FLAGS_UP) @@ -143,13 +142,10 @@ ENDPROC(cpu_v7_set_pte_ext) */ orrls \tmp, \tmp, #TTBR1_SIZE @ TTBCR.T1SZ mcr p15, 0, \tmp, c2, c0, 2 @ TTBCR - mov \tmp, \ttbr1, lsr #(32 - ARCH_PGD_SHIFT) @ upper bits - mov \ttbr1, \ttbr1, lsl #ARCH_PGD_SHIFT @ lower bits + mov \tmp, \ttbr1, lsr #20 + mov \ttbr1, \ttbr1, lsl #12 addls \ttbr1, \ttbr1, #TTBR1_OFFSET mcrr p15, 1, \ttbr1, \tmp, c2 @ load TTBR1 - mov \tmp, \ttbr0, lsr #(32 - ARCH_PGD_SHIFT) @ upper bits - mov \ttbr0, \ttbr0, lsl #ARCH_PGD_SHIFT @ lower bits - mcrr p15, 0, \ttbr0, \tmp, c2 @ load TTBR0 .endm /* diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 75ae72160099..0716bbe19872 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -252,6 +252,12 @@ ENDPROC(cpu_pj4b_do_resume) * Initialise TLB, Caches, and MMU state ready to switch the MMU * on. Return in r0 the new CP15 C1 control register setting. * + * r1, r2, r4, r5, r9, r13 must be preserved - r13 is not a stack + * r4: TTBR0 (low word) + * r5: TTBR0 (high word if LPAE) + * r8: TTBR1 + * r9: Main ID register + * * This should be able to cover all ARMv7 cores. * * It is assumed that: @@ -279,6 +285,78 @@ __v7_ca17mp_setup: #endif b __v7_setup +/* + * Errata: + * r0, r10 available for use + * r1, r2, r4, r5, r9, r13: must be preserved + * r3: contains MIDR rX number in bits 23-20 + * r6: contains MIDR rXpY as 8-bit XY number + * r9: MIDR + */ +__ca8_errata: +#if defined(CONFIG_ARM_ERRATA_430973) && !defined(CONFIG_ARCH_MULTIPLATFORM) + teq r3, #0x00100000 @ only present in r1p* + mrceq p15, 0, r0, c1, c0, 1 @ read aux control register + orreq r0, r0, #(1 << 6) @ set IBE to 1 + mcreq p15, 0, r0, c1, c0, 1 @ write aux control register +#endif +#ifdef CONFIG_ARM_ERRATA_458693 + teq r6, #0x20 @ only present in r2p0 + mrceq p15, 0, r0, c1, c0, 1 @ read aux control register + orreq r0, r0, #(1 << 5) @ set L1NEON to 1 + orreq r0, r0, #(1 << 9) @ set PLDNOP to 1 + mcreq p15, 0, r0, c1, c0, 1 @ write aux control register +#endif +#ifdef CONFIG_ARM_ERRATA_460075 + teq r6, #0x20 @ only present in r2p0 + mrceq p15, 1, r0, c9, c0, 2 @ read L2 cache aux ctrl register + tsteq r0, #1 << 22 + orreq r0, r0, #(1 << 22) @ set the Write Allocate disable bit + mcreq p15, 1, r0, c9, c0, 2 @ write the L2 cache aux ctrl register +#endif + b __errata_finish + +__ca9_errata: +#ifdef CONFIG_ARM_ERRATA_742230 + cmp r6, #0x22 @ only present up to r2p2 + mrcle p15, 0, r0, c15, c0, 1 @ read diagnostic register + orrle r0, r0, #1 << 4 @ set bit #4 + mcrle p15, 0, r0, c15, c0, 1 @ write diagnostic register +#endif +#ifdef CONFIG_ARM_ERRATA_742231 + teq r6, #0x20 @ present in r2p0 + teqne r6, #0x21 @ present in r2p1 + teqne r6, #0x22 @ present in r2p2 + mrceq p15, 0, r0, c15, c0, 1 @ read diagnostic register + orreq r0, r0, #1 << 12 @ set bit #12 + orreq r0, r0, #1 << 22 @ set bit #22 + mcreq p15, 0, r0, c15, c0, 1 @ write diagnostic register +#endif +#ifdef CONFIG_ARM_ERRATA_743622 + teq r3, #0x00200000 @ only present in r2p* + mrceq p15, 0, r0, c15, c0, 1 @ read diagnostic register + orreq r0, r0, #1 << 6 @ set bit #6 + mcreq p15, 0, r0, c15, c0, 1 @ write diagnostic register +#endif +#if defined(CONFIG_ARM_ERRATA_751472) && defined(CONFIG_SMP) + ALT_SMP(cmp r6, #0x30) @ present prior to r3p0 + ALT_UP_B(1f) + mrclt p15, 0, r0, c15, c0, 1 @ read diagnostic register + orrlt r0, r0, #1 << 11 @ set bit #11 + mcrlt p15, 0, r0, c15, c0, 1 @ write diagnostic register +1: +#endif + b __errata_finish + +__ca15_errata: +#ifdef CONFIG_ARM_ERRATA_773022 + cmp r6, #0x4 @ only present up to r0p4 + mrcle p15, 0, r0, c1, c0, 1 @ read aux control register + orrle r0, r0, #1 << 1 @ disable loop buffer + mcrle p15, 0, r0, c1, c0, 1 @ write aux control register +#endif + b __errata_finish + __v7_pj4b_setup: #ifdef CONFIG_CPU_PJ4B @@ -339,96 +417,38 @@ __v7_setup: bl v7_invalidate_l1 ldmia r12, {r0-r5, r7, r9, r11, lr} - mrc p15, 0, r0, c0, c0, 0 @ read main ID register - and r10, r0, #0xff000000 @ ARM? - teq r10, #0x41000000 - bne 3f - and r5, r0, #0x00f00000 @ variant - and r6, r0, #0x0000000f @ revision - orr r6, r6, r5, lsr #20-4 @ combine variant and revision - ubfx r0, r0, #4, #12 @ primary part number + and r0, r9, #0xff000000 @ ARM? + teq r0, #0x41000000 + bne __errata_finish + and r3, r9, #0x00f00000 @ variant + and r6, r9, #0x0000000f @ revision + orr r6, r6, r3, lsr #20-4 @ combine variant and revision + ubfx r0, r9, #4, #12 @ primary part number /* Cortex-A8 Errata */ ldr r10, =0x00000c08 @ Cortex-A8 primary part number teq r0, r10 - bne 2f -#if defined(CONFIG_ARM_ERRATA_430973) && !defined(CONFIG_ARCH_MULTIPLATFORM) - - teq r5, #0x00100000 @ only present in r1p* - mrceq p15, 0, r10, c1, c0, 1 @ read aux control register - orreq r10, r10, #(1 << 6) @ set IBE to 1 - mcreq p15, 0, r10, c1, c0, 1 @ write aux control register -#endif -#ifdef CONFIG_ARM_ERRATA_458693 - teq r6, #0x20 @ only present in r2p0 - mrceq p15, 0, r10, c1, c0, 1 @ read aux control register - orreq r10, r10, #(1 << 5) @ set L1NEON to 1 - orreq r10, r10, #(1 << 9) @ set PLDNOP to 1 - mcreq p15, 0, r10, c1, c0, 1 @ write aux control register -#endif -#ifdef CONFIG_ARM_ERRATA_460075 - teq r6, #0x20 @ only present in r2p0 - mrceq p15, 1, r10, c9, c0, 2 @ read L2 cache aux ctrl register - tsteq r10, #1 << 22 - orreq r10, r10, #(1 << 22) @ set the Write Allocate disable bit - mcreq p15, 1, r10, c9, c0, 2 @ write the L2 cache aux ctrl register -#endif - b 3f + beq __ca8_errata /* Cortex-A9 Errata */ -2: ldr r10, =0x00000c09 @ Cortex-A9 primary part number + ldr r10, =0x00000c09 @ Cortex-A9 primary part number teq r0, r10 - bne 3f -#ifdef CONFIG_ARM_ERRATA_742230 - cmp r6, #0x22 @ only present up to r2p2 - mrcle p15, 0, r10, c15, c0, 1 @ read diagnostic register - orrle r10, r10, #1 << 4 @ set bit #4 - mcrle p15, 0, r10, c15, c0, 1 @ write diagnostic register -#endif -#ifdef CONFIG_ARM_ERRATA_742231 - teq r6, #0x20 @ present in r2p0 - teqne r6, #0x21 @ present in r2p1 - teqne r6, #0x22 @ present in r2p2 - mrceq p15, 0, r10, c15, c0, 1 @ read diagnostic register - orreq r10, r10, #1 << 12 @ set bit #12 - orreq r10, r10, #1 << 22 @ set bit #22 - mcreq p15, 0, r10, c15, c0, 1 @ write diagnostic register -#endif -#ifdef CONFIG_ARM_ERRATA_743622 - teq r5, #0x00200000 @ only present in r2p* - mrceq p15, 0, r10, c15, c0, 1 @ read diagnostic register - orreq r10, r10, #1 << 6 @ set bit #6 - mcreq p15, 0, r10, c15, c0, 1 @ write diagnostic register -#endif -#if defined(CONFIG_ARM_ERRATA_751472) && defined(CONFIG_SMP) - ALT_SMP(cmp r6, #0x30) @ present prior to r3p0 - ALT_UP_B(1f) - mrclt p15, 0, r10, c15, c0, 1 @ read diagnostic register - orrlt r10, r10, #1 << 11 @ set bit #11 - mcrlt p15, 0, r10, c15, c0, 1 @ write diagnostic register -1: -#endif + beq __ca9_errata /* Cortex-A15 Errata */ -3: ldr r10, =0x00000c0f @ Cortex-A15 primary part number + ldr r10, =0x00000c0f @ Cortex-A15 primary part number teq r0, r10 - bne 4f + beq __ca15_errata -#ifdef CONFIG_ARM_ERRATA_773022 - cmp r6, #0x4 @ only present up to r0p4 - mrcle p15, 0, r10, c1, c0, 1 @ read aux control register - orrle r10, r10, #1 << 1 @ disable loop buffer - mcrle p15, 0, r10, c1, c0, 1 @ write aux control register -#endif - -4: mov r10, #0 +__errata_finish: + mov r10, #0 mcr p15, 0, r10, c7, c5, 0 @ I+BTB cache invalidate #ifdef CONFIG_MMU mcr p15, 0, r10, c8, c7, 0 @ invalidate I + D TLBs - v7_ttb_setup r10, r4, r8, r5 @ TTBCR, TTBRx setup - ldr r5, =PRRR @ PRRR + v7_ttb_setup r10, r4, r5, r8, r3 @ TTBCR, TTBRx setup + ldr r3, =PRRR @ PRRR ldr r6, =NMRR @ NMRR - mcr p15, 0, r5, c10, c2, 0 @ write PRRR + mcr p15, 0, r3, c10, c2, 0 @ write PRRR mcr p15, 0, r6, c10, c2, 1 @ write NMRR #endif dsb @ Complete invalidations @@ -437,22 +457,22 @@ __v7_setup: and r0, r0, #(0xf << 12) @ ThumbEE enabled field teq r0, #(1 << 12) @ check if ThumbEE is present bne 1f - mov r5, #0 - mcr p14, 6, r5, c1, c0, 0 @ Initialize TEEHBR to 0 + mov r3, #0 + mcr p14, 6, r3, c1, c0, 0 @ Initialize TEEHBR to 0 mrc p14, 6, r0, c0, c0, 0 @ load TEECR orr r0, r0, #1 @ set the 1st bit in order to mcr p14, 6, r0, c0, c0, 0 @ stop userspace TEEHBR access 1: #endif - adr r5, v7_crval - ldmia r5, {r5, r6} + adr r3, v7_crval + ldmia r3, {r3, r6} ARM_BE8(orr r6, r6, #1 << 25) @ big-endian page tables #ifdef CONFIG_SWP_EMULATE - orr r5, r5, #(1 << 10) @ set SW bit in "clear" + orr r3, r3, #(1 << 10) @ set SW bit in "clear" bic r6, r6, #(1 << 10) @ clear it in "mmuset" #endif mrc p15, 0, r0, c1, c0, 0 @ read control register - bic r0, r0, r5 @ clear bits them + bic r0, r0, r3 @ clear bits them orr r0, r0, r6 @ set them THUMB( orr r0, r0, #1 << 30 ) @ Thumb exceptions ret lr @ return to head.S:__ret diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S index e08e1f2bab76..67d9209077c6 100644 --- a/arch/arm/mm/proc-v7m.S +++ b/arch/arm/mm/proc-v7m.S @@ -98,7 +98,7 @@ __v7m_setup: str r5, [r0, V7M_SCB_SHPR3] @ set PendSV priority @ SVC to run the kernel in this mode - adr r1, BSYM(1f) + badr r1, 1f ldr r5, [r12, #11 * 4] @ read the SVC vector entry str r1, [r12, #11 * 4] @ write the temporary SVC vector entry mov r6, lr @ save LR diff --git a/arch/arm/mm/pv-fixup-asm.S b/arch/arm/mm/pv-fixup-asm.S new file mode 100644 index 000000000000..1867f3e43016 --- /dev/null +++ b/arch/arm/mm/pv-fixup-asm.S @@ -0,0 +1,88 @@ +/* + * Copyright (C) 2015 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This assembly is required to safely remap the physical address space + * for Keystone 2 + */ +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/cp15.h> +#include <asm/memory.h> +#include <asm/pgtable.h> + + .section ".idmap.text", "ax" + +#define L1_ORDER 3 +#define L2_ORDER 3 + +ENTRY(lpae_pgtables_remap_asm) + stmfd sp!, {r4-r8, lr} + + mrc p15, 0, r8, c1, c0, 0 @ read control reg + bic ip, r8, #CR_M @ disable caches and MMU + mcr p15, 0, ip, c1, c0, 0 + dsb + isb + + /* Update level 2 entries covering the kernel */ + ldr r6, =(_end - 1) + add r7, r2, #0x1000 + add r6, r7, r6, lsr #SECTION_SHIFT - L2_ORDER + add r7, r7, #PAGE_OFFSET >> (SECTION_SHIFT - L2_ORDER) +1: ldrd r4, [r7] + adds r4, r4, r0 + adc r5, r5, r1 + strd r4, [r7], #1 << L2_ORDER + cmp r7, r6 + bls 1b + + /* Update level 2 entries for the boot data */ + add r7, r2, #0x1000 + add r7, r7, r3, lsr #SECTION_SHIFT - L2_ORDER + bic r7, r7, #(1 << L2_ORDER) - 1 + ldrd r4, [r7] + adds r4, r4, r0 + adc r5, r5, r1 + strd r4, [r7], #1 << L2_ORDER + ldrd r4, [r7] + adds r4, r4, r0 + adc r5, r5, r1 + strd r4, [r7] + + /* Update level 1 entries */ + mov r6, #4 + mov r7, r2 +2: ldrd r4, [r7] + adds r4, r4, r0 + adc r5, r5, r1 + strd r4, [r7], #1 << L1_ORDER + subs r6, r6, #1 + bne 2b + + mrrc p15, 0, r4, r5, c2 @ read TTBR0 + adds r4, r4, r0 @ update physical address + adc r5, r5, r1 + mcrr p15, 0, r4, r5, c2 @ write back TTBR0 + mrrc p15, 1, r4, r5, c2 @ read TTBR1 + adds r4, r4, r0 @ update physical address + adc r5, r5, r1 + mcrr p15, 1, r4, r5, c2 @ write back TTBR1 + + dsb + + mov ip, #0 + mcr p15, 0, ip, c7, c5, 0 @ I+BTB cache invalidate + mcr p15, 0, ip, c8, c7, 0 @ local_flush_tlb_all() + dsb + isb + + mcr p15, 0, r8, c1, c0, 0 @ re-enable MMU + dsb + isb + + ldmfd sp!, {r4-r8, pc} +ENDPROC(lpae_pgtables_remap_asm) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index e0e23582c8b4..4550d247e308 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -873,6 +873,16 @@ b_epilogue: off = offsetof(struct sk_buff, queue_mapping); emit(ARM_LDRH_I(r_A, r_skb, off), ctx); break; + case BPF_LDX | BPF_W | BPF_ABS: + /* + * load a 32bit word from struct seccomp_data. + * seccomp_check_filter() will already have checked + * that k is 32bit aligned and lies within the + * struct seccomp_data. + */ + ctx->seen |= SEEN_SKB; + emit(ARM_LDR_I(r_A, r_skb, k), ctx); + break; default: return -1; } diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c index f5b00f41c4f6..2235081a04ee 100644 --- a/arch/arm/plat-orion/common.c +++ b/arch/arm/plat-orion/common.c @@ -28,11 +28,7 @@ void __init orion_clkdev_add(const char *con_id, const char *dev_id, struct clk *clk) { - struct clk_lookup *cl; - - cl = clkdev_alloc(clk, con_id, dev_id); - if (cl) - clkdev_add(cl); + clkdev_create(clk, con_id, "%s", dev_id); } /* Create clkdev entries for all orion platforms except kirkwood. diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile index 8aa791051029..9d259d94e429 100644 --- a/arch/arm/vdso/Makefile +++ b/arch/arm/vdso/Makefile @@ -6,9 +6,15 @@ obj-vdso := vgettimeofday.o datapage.o targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.so.raw vdso.lds obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) -ccflags-y := -shared -fPIC -fno-common -fno-builtin -fno-stack-protector -ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 -DDISABLE_BRANCH_PROFILING -ccflags-y += -Wl,--no-undefined $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) +ccflags-y := -fPIC -fno-common -fno-builtin -fno-stack-protector +ccflags-y += -DDISABLE_BRANCH_PROFILING + +VDSO_LDFLAGS := -Wl,-Bsymbolic -Wl,--no-undefined -Wl,-soname=linux-vdso.so.1 +VDSO_LDFLAGS += -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 +VDSO_LDFLAGS += -nostdlib -shared +VDSO_LDFLAGS += $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) +VDSO_LDFLAGS += $(call cc-ldoption, -Wl$(comma)--build-id) +VDSO_LDFLAGS += $(call cc-option, -fuse-ld=bfd) obj-$(CONFIG_VDSO) += vdso.o extra-$(CONFIG_VDSO) += vdso.lds @@ -40,10 +46,8 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE # Actual build commands quiet_cmd_vdsold = VDSO $@ - cmd_vdsold = $(CC) $(c_flags) -Wl,-T $(filter %.lds,$^) $(filter %.o,$^) \ - $(call cc-ldoption, -Wl$(comma)--build-id) \ - -Wl,-Bsymbolic -Wl,-z,max-page-size=4096 \ - -Wl,-z,common-page-size=4096 -o $@ + cmd_vdsold = $(CC) $(c_flags) $(VDSO_LDFLAGS) \ + -Wl,-T $(filter %.lds,$^) $(filter %.o,$^) -o $@ quiet_cmd_vdsomunge = MUNGE $@ cmd_vdsomunge = $(objtree)/$(obj)/vdsomunge $< $@ diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 7d0f07020c80..6c09cc440a2b 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -24,6 +24,7 @@ #include <linux/cpuidle.h> #include <linux/cpufreq.h> #include <linux/cpu.h> +#include <linux/console.h> #include <linux/mm.h> @@ -51,7 +52,9 @@ EXPORT_SYMBOL_GPL(xen_have_vector_callback); int xen_platform_pci_unplug = XEN_UNPLUG_ALL; EXPORT_SYMBOL_GPL(xen_platform_pci_unplug); -static __read_mostly int xen_events_irq = -1; +static __read_mostly unsigned int xen_events_irq; + +static __initdata struct device_node *xen_node; int xen_remap_domain_mfn_array(struct vm_area_struct *vma, unsigned long addr, @@ -150,40 +153,28 @@ static irqreturn_t xen_arm_callback(int irq, void *arg) * documentation of the Xen Device Tree format. */ #define GRANT_TABLE_PHYSADDR 0 -static int __init xen_guest_init(void) +void __init xen_early_init(void) { - struct xen_add_to_physmap xatp; - static struct shared_info *shared_info_page = 0; - struct device_node *node; int len; const char *s = NULL; const char *version = NULL; const char *xen_prefix = "xen,xen-"; - struct resource res; - phys_addr_t grant_frames; - node = of_find_compatible_node(NULL, NULL, "xen,xen"); - if (!node) { + xen_node = of_find_compatible_node(NULL, NULL, "xen,xen"); + if (!xen_node) { pr_debug("No Xen support\n"); - return 0; + return; } - s = of_get_property(node, "compatible", &len); + s = of_get_property(xen_node, "compatible", &len); if (strlen(xen_prefix) + 3 < len && !strncmp(xen_prefix, s, strlen(xen_prefix))) version = s + strlen(xen_prefix); if (version == NULL) { pr_debug("Xen version not found\n"); - return 0; + return; } - if (of_address_to_resource(node, GRANT_TABLE_PHYSADDR, &res)) - return 0; - grant_frames = res.start; - xen_events_irq = irq_of_parse_and_map(node, 0); - pr_info("Xen %s support found, events_irq=%d gnttab_frame=%pa\n", - version, xen_events_irq, &grant_frames); - if (xen_events_irq < 0) - return -ENODEV; + pr_info("Xen %s support found\n", version); xen_domain_type = XEN_HVM_DOMAIN; @@ -194,9 +185,34 @@ static int __init xen_guest_init(void) else xen_start_info->flags &= ~(SIF_INITDOMAIN|SIF_PRIVILEGED); - if (!shared_info_page) - shared_info_page = (struct shared_info *) - get_zeroed_page(GFP_KERNEL); + if (!console_set_on_cmdline && !xen_initial_domain()) + add_preferred_console("hvc", 0, NULL); +} + +static int __init xen_guest_init(void) +{ + struct xen_add_to_physmap xatp; + struct shared_info *shared_info_page = NULL; + struct resource res; + phys_addr_t grant_frames; + + if (!xen_domain()) + return 0; + + if (of_address_to_resource(xen_node, GRANT_TABLE_PHYSADDR, &res)) { + pr_err("Xen grant table base address not found\n"); + return -ENODEV; + } + grant_frames = res.start; + + xen_events_irq = irq_of_parse_and_map(xen_node, 0); + if (!xen_events_irq) { + pr_err("Xen event channel interrupt not found\n"); + return -ENODEV; + } + + shared_info_page = (struct shared_info *)get_zeroed_page(GFP_KERNEL); + if (!shared_info_page) { pr_err("not enough memory\n"); return -ENOMEM; diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c index 498325074a06..03e75fef15b8 100644 --- a/arch/arm/xen/mm.c +++ b/arch/arm/xen/mm.c @@ -15,10 +15,10 @@ #include <xen/xen.h> #include <xen/interface/grant_table.h> #include <xen/interface/memory.h> +#include <xen/page.h> #include <xen/swiotlb-xen.h> #include <asm/cacheflush.h> -#include <asm/xen/page.h> #include <asm/xen/hypercall.h> #include <asm/xen/interface.h> diff --git a/arch/arm/xen/p2m.c b/arch/arm/xen/p2m.c index cb7a14c5cd69..887596c67b12 100644 --- a/arch/arm/xen/p2m.c +++ b/arch/arm/xen/p2m.c @@ -10,10 +10,10 @@ #include <xen/xen.h> #include <xen/interface/memory.h> +#include <xen/page.h> #include <xen/swiotlb-xen.h> #include <asm/cacheflush.h> -#include <asm/xen/page.h> #include <asm/xen/hypercall.h> #include <asm/xen/interface.h> |