diff options
author | Russell King <rmk+kernel@arm.linux.org.uk> | 2013-10-07 16:43:04 +0200 |
---|---|---|
committer | Russell King <rmk+kernel@arm.linux.org.uk> | 2013-10-07 16:43:04 +0200 |
commit | a56e74f546b64be93731e42d83baf5b538cc1b11 (patch) | |
tree | 18f6dee45d801e57ac9db2a31664b0d5c0762c50 /arch | |
parent | ARM: bL_switcher: Add query interface to discover CPU affinities (diff) | |
parent | ARM: add support for bit sliced AES using NEON instructions (diff) | |
download | linux-a56e74f546b64be93731e42d83baf5b538cc1b11.tar.xz linux-a56e74f546b64be93731e42d83baf5b538cc1b11.zip |
Merge branch 'arm-aesbs' of git://git.linaro.org/people/ardbiesheuvel/linux-arm into devel-stable
Diffstat (limited to 'arch')
144 files changed, 6844 insertions, 858 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 1feb169274fe..af2cc6eabcc7 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -286,9 +286,6 @@ config HAVE_PERF_USER_STACK_DUMP config HAVE_ARCH_JUMP_LABEL bool -config HAVE_ARCH_MUTEX_CPU_RELAX - bool - config HAVE_RCU_TABLE_FREE bool diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index b3135378ac39..22efc5d9c952 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -2243,8 +2243,7 @@ config NEON config KERNEL_MODE_NEON bool "Support for NEON in kernel mode" - default n - depends on NEON + depends on NEON && AEABI help Say Y to include support for NEON in kernel mode. diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index cc0f1fb61753..e95af3f5433b 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -183,6 +183,7 @@ dtb-$(CONFIG_ARCH_OMAP2PLUS) += omap2420-h4.dtb \ am335x-evm.dtb \ am335x-evmsk.dtb \ am335x-bone.dtb \ + am335x-boneblack.dtb \ am3517-evm.dtb \ am3517_mt_ventoux.dtb \ am43x-epos-evm.dtb diff --git a/arch/arm/boot/dts/am335x-bone-common.dtsi b/arch/arm/boot/dts/am335x-bone-common.dtsi new file mode 100644 index 000000000000..2f66deda9f5c --- /dev/null +++ b/arch/arm/boot/dts/am335x-bone-common.dtsi @@ -0,0 +1,262 @@ +/* + * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/ { + model = "TI AM335x BeagleBone"; + compatible = "ti,am335x-bone", "ti,am33xx"; + + cpus { + cpu@0 { + cpu0-supply = <&dcdc2_reg>; + }; + }; + + memory { + device_type = "memory"; + reg = <0x80000000 0x10000000>; /* 256 MB */ + }; + + am33xx_pinmux: pinmux@44e10800 { + pinctrl-names = "default"; + pinctrl-0 = <&clkout2_pin>; + + user_leds_s0: user_leds_s0 { + pinctrl-single,pins = < + 0x54 (PIN_OUTPUT_PULLDOWN | MUX_MODE7) /* gpmc_a5.gpio1_21 */ + 0x58 (PIN_OUTPUT_PULLUP | MUX_MODE7) /* gpmc_a6.gpio1_22 */ + 0x5c (PIN_OUTPUT_PULLDOWN | MUX_MODE7) /* gpmc_a7.gpio1_23 */ + 0x60 (PIN_OUTPUT_PULLUP | MUX_MODE7) /* gpmc_a8.gpio1_24 */ + >; + }; + + i2c0_pins: pinmux_i2c0_pins { + pinctrl-single,pins = < + 0x188 (PIN_INPUT_PULLUP | MUX_MODE0) /* i2c0_sda.i2c0_sda */ + 0x18c (PIN_INPUT_PULLUP | MUX_MODE0) /* i2c0_scl.i2c0_scl */ + >; + }; + + uart0_pins: pinmux_uart0_pins { + pinctrl-single,pins = < + 0x170 (PIN_INPUT_PULLUP | MUX_MODE0) /* uart0_rxd.uart0_rxd */ + 0x174 (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* uart0_txd.uart0_txd */ + >; + }; + + clkout2_pin: pinmux_clkout2_pin { + pinctrl-single,pins = < + 0x1b4 (PIN_OUTPUT_PULLDOWN | MUX_MODE3) /* xdma_event_intr1.clkout2 */ + >; + }; + + cpsw_default: cpsw_default { + pinctrl-single,pins = < + /* Slave 1 */ + 0x110 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxerr.mii1_rxerr */ + 0x114 (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* mii1_txen.mii1_txen */ + 0x118 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxdv.mii1_rxdv */ + 0x11c (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* mii1_txd3.mii1_txd3 */ + 0x120 (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* mii1_txd2.mii1_txd2 */ + 0x124 (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* mii1_txd1.mii1_txd1 */ + 0x128 (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* mii1_txd0.mii1_txd0 */ + 0x12c (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_txclk.mii1_txclk */ + 0x130 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxclk.mii1_rxclk */ + 0x134 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxd3.mii1_rxd3 */ + 0x138 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxd2.mii1_rxd2 */ + 0x13c (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxd1.mii1_rxd1 */ + 0x140 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxd0.mii1_rxd0 */ + >; + }; + + cpsw_sleep: cpsw_sleep { + pinctrl-single,pins = < + /* Slave 1 reset value */ + 0x110 (PIN_INPUT_PULLDOWN | MUX_MODE7) + 0x114 (PIN_INPUT_PULLDOWN | MUX_MODE7) + 0x118 (PIN_INPUT_PULLDOWN | MUX_MODE7) + 0x11c (PIN_INPUT_PULLDOWN | MUX_MODE7) + 0x120 (PIN_INPUT_PULLDOWN | MUX_MODE7) + 0x124 (PIN_INPUT_PULLDOWN | MUX_MODE7) + 0x128 (PIN_INPUT_PULLDOWN | MUX_MODE7) + 0x12c (PIN_INPUT_PULLDOWN | MUX_MODE7) + 0x130 (PIN_INPUT_PULLDOWN | MUX_MODE7) + 0x134 (PIN_INPUT_PULLDOWN | MUX_MODE7) + 0x138 (PIN_INPUT_PULLDOWN | MUX_MODE7) + 0x13c (PIN_INPUT_PULLDOWN | MUX_MODE7) + 0x140 (PIN_INPUT_PULLDOWN | MUX_MODE7) + >; + }; + + davinci_mdio_default: davinci_mdio_default { + pinctrl-single,pins = < + /* MDIO */ + 0x148 (PIN_INPUT_PULLUP | SLEWCTRL_FAST | MUX_MODE0) /* mdio_data.mdio_data */ + 0x14c (PIN_OUTPUT_PULLUP | MUX_MODE0) /* mdio_clk.mdio_clk */ + >; + }; + + davinci_mdio_sleep: davinci_mdio_sleep { + pinctrl-single,pins = < + /* MDIO reset value */ + 0x148 (PIN_INPUT_PULLDOWN | MUX_MODE7) + 0x14c (PIN_INPUT_PULLDOWN | MUX_MODE7) + >; + }; + }; + + ocp { + uart0: serial@44e09000 { + pinctrl-names = "default"; + pinctrl-0 = <&uart0_pins>; + + status = "okay"; + }; + + musb: usb@47400000 { + status = "okay"; + + control@44e10000 { + status = "okay"; + }; + + usb-phy@47401300 { + status = "okay"; + }; + + usb-phy@47401b00 { + status = "okay"; + }; + + usb@47401000 { + status = "okay"; + }; + + usb@47401800 { + status = "okay"; + dr_mode = "host"; + }; + + dma-controller@07402000 { + status = "okay"; + }; + }; + + i2c0: i2c@44e0b000 { + pinctrl-names = "default"; + pinctrl-0 = <&i2c0_pins>; + + status = "okay"; + clock-frequency = <400000>; + + tps: tps@24 { + reg = <0x24>; + }; + + }; + }; + + leds { + pinctrl-names = "default"; + pinctrl-0 = <&user_leds_s0>; + + compatible = "gpio-leds"; + + led@2 { + label = "beaglebone:green:heartbeat"; + gpios = <&gpio1 21 GPIO_ACTIVE_HIGH>; + linux,default-trigger = "heartbeat"; + default-state = "off"; + }; + + led@3 { + label = "beaglebone:green:mmc0"; + gpios = <&gpio1 22 GPIO_ACTIVE_HIGH>; + linux,default-trigger = "mmc0"; + default-state = "off"; + }; + + led@4 { + label = "beaglebone:green:usr2"; + gpios = <&gpio1 23 GPIO_ACTIVE_HIGH>; + default-state = "off"; + }; + + led@5 { + label = "beaglebone:green:usr3"; + gpios = <&gpio1 24 GPIO_ACTIVE_HIGH>; + default-state = "off"; + }; + }; +}; + +/include/ "tps65217.dtsi" + +&tps { + regulators { + dcdc1_reg: regulator@0 { + regulator-always-on; + }; + + dcdc2_reg: regulator@1 { + /* VDD_MPU voltage limits 0.95V - 1.26V with +/-4% tolerance */ + regulator-name = "vdd_mpu"; + regulator-min-microvolt = <925000>; + regulator-max-microvolt = <1325000>; + regulator-boot-on; + regulator-always-on; + }; + + dcdc3_reg: regulator@2 { + /* VDD_CORE voltage limits 0.95V - 1.1V with +/-4% tolerance */ + regulator-name = "vdd_core"; + regulator-min-microvolt = <925000>; + regulator-max-microvolt = <1150000>; + regulator-boot-on; + regulator-always-on; + }; + + ldo1_reg: regulator@3 { + regulator-always-on; + }; + + ldo2_reg: regulator@4 { + regulator-always-on; + }; + + ldo3_reg: regulator@5 { + regulator-always-on; + }; + + ldo4_reg: regulator@6 { + regulator-always-on; + }; + }; +}; + +&cpsw_emac0 { + phy_id = <&davinci_mdio>, <0>; + phy-mode = "mii"; +}; + +&cpsw_emac1 { + phy_id = <&davinci_mdio>, <1>; + phy-mode = "mii"; +}; + +&mac { + pinctrl-names = "default", "sleep"; + pinctrl-0 = <&cpsw_default>; + pinctrl-1 = <&cpsw_sleep>; + +}; + +&davinci_mdio { + pinctrl-names = "default", "sleep"; + pinctrl-0 = <&davinci_mdio_default>; + pinctrl-1 = <&davinci_mdio_sleep>; +}; diff --git a/arch/arm/boot/dts/am335x-bone.dts b/arch/arm/boot/dts/am335x-bone.dts index d318987d44a1..7993c489982c 100644 --- a/arch/arm/boot/dts/am335x-bone.dts +++ b/arch/arm/boot/dts/am335x-bone.dts @@ -8,258 +8,4 @@ /dts-v1/; #include "am33xx.dtsi" - -/ { - model = "TI AM335x BeagleBone"; - compatible = "ti,am335x-bone", "ti,am33xx"; - - cpus { - cpu@0 { - cpu0-supply = <&dcdc2_reg>; - }; - }; - - memory { - device_type = "memory"; - reg = <0x80000000 0x10000000>; /* 256 MB */ - }; - - am33xx_pinmux: pinmux@44e10800 { - pinctrl-names = "default"; - pinctrl-0 = <&clkout2_pin>; - - user_leds_s0: user_leds_s0 { - pinctrl-single,pins = < - 0x54 (PIN_OUTPUT_PULLDOWN | MUX_MODE7) /* gpmc_a5.gpio1_21 */ - 0x58 (PIN_OUTPUT_PULLUP | MUX_MODE7) /* gpmc_a6.gpio1_22 */ - 0x5c (PIN_OUTPUT_PULLDOWN | MUX_MODE7) /* gpmc_a7.gpio1_23 */ - 0x60 (PIN_OUTPUT_PULLUP | MUX_MODE7) /* gpmc_a8.gpio1_24 */ - >; - }; - - i2c0_pins: pinmux_i2c0_pins { - pinctrl-single,pins = < - 0x188 (PIN_INPUT_PULLUP | MUX_MODE0) /* i2c0_sda.i2c0_sda */ - 0x18c (PIN_INPUT_PULLUP | MUX_MODE0) /* i2c0_scl.i2c0_scl */ - >; - }; - - uart0_pins: pinmux_uart0_pins { - pinctrl-single,pins = < - 0x170 (PIN_INPUT_PULLUP | MUX_MODE0) /* uart0_rxd.uart0_rxd */ - 0x174 (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* uart0_txd.uart0_txd */ - >; - }; - - clkout2_pin: pinmux_clkout2_pin { - pinctrl-single,pins = < - 0x1b4 (PIN_OUTPUT_PULLDOWN | MUX_MODE3) /* xdma_event_intr1.clkout2 */ - >; - }; - - cpsw_default: cpsw_default { - pinctrl-single,pins = < - /* Slave 1 */ - 0x110 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxerr.mii1_rxerr */ - 0x114 (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* mii1_txen.mii1_txen */ - 0x118 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxdv.mii1_rxdv */ - 0x11c (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* mii1_txd3.mii1_txd3 */ - 0x120 (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* mii1_txd2.mii1_txd2 */ - 0x124 (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* mii1_txd1.mii1_txd1 */ - 0x128 (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* mii1_txd0.mii1_txd0 */ - 0x12c (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_txclk.mii1_txclk */ - 0x130 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxclk.mii1_rxclk */ - 0x134 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxd3.mii1_rxd3 */ - 0x138 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxd2.mii1_rxd2 */ - 0x13c (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxd1.mii1_rxd1 */ - 0x140 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxd0.mii1_rxd0 */ - >; - }; - - cpsw_sleep: cpsw_sleep { - pinctrl-single,pins = < - /* Slave 1 reset value */ - 0x110 (PIN_INPUT_PULLDOWN | MUX_MODE7) - 0x114 (PIN_INPUT_PULLDOWN | MUX_MODE7) - 0x118 (PIN_INPUT_PULLDOWN | MUX_MODE7) - 0x11c (PIN_INPUT_PULLDOWN | MUX_MODE7) - 0x120 (PIN_INPUT_PULLDOWN | MUX_MODE7) - 0x124 (PIN_INPUT_PULLDOWN | MUX_MODE7) - 0x128 (PIN_INPUT_PULLDOWN | MUX_MODE7) - 0x12c (PIN_INPUT_PULLDOWN | MUX_MODE7) - 0x130 (PIN_INPUT_PULLDOWN | MUX_MODE7) - 0x134 (PIN_INPUT_PULLDOWN | MUX_MODE7) - 0x138 (PIN_INPUT_PULLDOWN | MUX_MODE7) - 0x13c (PIN_INPUT_PULLDOWN | MUX_MODE7) - 0x140 (PIN_INPUT_PULLDOWN | MUX_MODE7) - >; - }; - - davinci_mdio_default: davinci_mdio_default { - pinctrl-single,pins = < - /* MDIO */ - 0x148 (PIN_INPUT_PULLUP | SLEWCTRL_FAST | MUX_MODE0) /* mdio_data.mdio_data */ - 0x14c (PIN_OUTPUT_PULLUP | MUX_MODE0) /* mdio_clk.mdio_clk */ - >; - }; - - davinci_mdio_sleep: davinci_mdio_sleep { - pinctrl-single,pins = < - /* MDIO reset value */ - 0x148 (PIN_INPUT_PULLDOWN | MUX_MODE7) - 0x14c (PIN_INPUT_PULLDOWN | MUX_MODE7) - >; - }; - }; - - ocp { - uart0: serial@44e09000 { - pinctrl-names = "default"; - pinctrl-0 = <&uart0_pins>; - - status = "okay"; - }; - - musb: usb@47400000 { - status = "okay"; - - control@44e10000 { - status = "okay"; - }; - - usb-phy@47401300 { - status = "okay"; - }; - - usb-phy@47401b00 { - status = "okay"; - }; - - usb@47401000 { - status = "okay"; - }; - - usb@47401800 { - status = "okay"; - dr_mode = "host"; - }; - - dma-controller@07402000 { - status = "okay"; - }; - }; - - i2c0: i2c@44e0b000 { - pinctrl-names = "default"; - pinctrl-0 = <&i2c0_pins>; - - status = "okay"; - clock-frequency = <400000>; - - tps: tps@24 { - reg = <0x24>; - }; - - }; - }; - - leds { - pinctrl-names = "default"; - pinctrl-0 = <&user_leds_s0>; - - compatible = "gpio-leds"; - - led@2 { - label = "beaglebone:green:heartbeat"; - gpios = <&gpio1 21 GPIO_ACTIVE_HIGH>; - linux,default-trigger = "heartbeat"; - default-state = "off"; - }; - - led@3 { - label = "beaglebone:green:mmc0"; - gpios = <&gpio1 22 GPIO_ACTIVE_HIGH>; - linux,default-trigger = "mmc0"; - default-state = "off"; - }; - - led@4 { - label = "beaglebone:green:usr2"; - gpios = <&gpio1 23 GPIO_ACTIVE_HIGH>; - default-state = "off"; - }; - - led@5 { - label = "beaglebone:green:usr3"; - gpios = <&gpio1 24 GPIO_ACTIVE_HIGH>; - default-state = "off"; - }; - }; -}; - -/include/ "tps65217.dtsi" - -&tps { - regulators { - dcdc1_reg: regulator@0 { - regulator-always-on; - }; - - dcdc2_reg: regulator@1 { - /* VDD_MPU voltage limits 0.95V - 1.26V with +/-4% tolerance */ - regulator-name = "vdd_mpu"; - regulator-min-microvolt = <925000>; - regulator-max-microvolt = <1325000>; - regulator-boot-on; - regulator-always-on; - }; - - dcdc3_reg: regulator@2 { - /* VDD_CORE voltage limits 0.95V - 1.1V with +/-4% tolerance */ - regulator-name = "vdd_core"; - regulator-min-microvolt = <925000>; - regulator-max-microvolt = <1150000>; - regulator-boot-on; - regulator-always-on; - }; - - ldo1_reg: regulator@3 { - regulator-always-on; - }; - - ldo2_reg: regulator@4 { - regulator-always-on; - }; - - ldo3_reg: regulator@5 { - regulator-always-on; - }; - - ldo4_reg: regulator@6 { - regulator-always-on; - }; - }; -}; - -&cpsw_emac0 { - phy_id = <&davinci_mdio>, <0>; - phy-mode = "mii"; -}; - -&cpsw_emac1 { - phy_id = <&davinci_mdio>, <1>; - phy-mode = "mii"; -}; - -&mac { - pinctrl-names = "default", "sleep"; - pinctrl-0 = <&cpsw_default>; - pinctrl-1 = <&cpsw_sleep>; - -}; - -&davinci_mdio { - pinctrl-names = "default", "sleep"; - pinctrl-0 = <&davinci_mdio_default>; - pinctrl-1 = <&davinci_mdio_sleep>; -}; +#include "am335x-bone-common.dtsi" diff --git a/arch/arm/boot/dts/am335x-boneblack.dts b/arch/arm/boot/dts/am335x-boneblack.dts new file mode 100644 index 000000000000..197cadf72d2c --- /dev/null +++ b/arch/arm/boot/dts/am335x-boneblack.dts @@ -0,0 +1,17 @@ +/* + * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +/dts-v1/; + +#include "am33xx.dtsi" +#include "am335x-bone-common.dtsi" + +&ldo3_reg { + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + regulator-always-on; +}; diff --git a/arch/arm/boot/dts/imx27.dtsi b/arch/arm/boot/dts/imx27.dtsi index c037c223619a..b7a1c6d950b9 100644 --- a/arch/arm/boot/dts/imx27.dtsi +++ b/arch/arm/boot/dts/imx27.dtsi @@ -187,7 +187,7 @@ compatible = "fsl,imx27-cspi"; reg = <0x1000e000 0x1000>; interrupts = <16>; - clocks = <&clks 53>, <&clks 53>; + clocks = <&clks 53>, <&clks 60>; clock-names = "ipg", "per"; status = "disabled"; }; @@ -198,7 +198,7 @@ compatible = "fsl,imx27-cspi"; reg = <0x1000f000 0x1000>; interrupts = <15>; - clocks = <&clks 52>, <&clks 52>; + clocks = <&clks 52>, <&clks 60>; clock-names = "ipg", "per"; status = "disabled"; }; @@ -309,7 +309,7 @@ compatible = "fsl,imx27-cspi"; reg = <0x10017000 0x1000>; interrupts = <6>; - clocks = <&clks 51>, <&clks 51>; + clocks = <&clks 51>, <&clks 60>; clock-names = "ipg", "per"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/imx51.dtsi b/arch/arm/boot/dts/imx51.dtsi index a85abb424c34..54cee6517902 100644 --- a/arch/arm/boot/dts/imx51.dtsi +++ b/arch/arm/boot/dts/imx51.dtsi @@ -474,7 +474,7 @@ compatible = "fsl,imx51-pata", "fsl,imx27-pata"; reg = <0x83fe0000 0x4000>; interrupts = <70>; - clocks = <&clks 161>; + clocks = <&clks 172>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/imx6q-pinfunc.h b/arch/arm/boot/dts/imx6q-pinfunc.h index c0e38a45e4bb..9bbe82bdee41 100644 --- a/arch/arm/boot/dts/imx6q-pinfunc.h +++ b/arch/arm/boot/dts/imx6q-pinfunc.h @@ -207,8 +207,8 @@ #define MX6QDL_PAD_EIM_D29__ECSPI4_SS0 0x0c8 0x3dc 0x824 0x2 0x1 #define MX6QDL_PAD_EIM_D29__UART2_RTS_B 0x0c8 0x3dc 0x924 0x4 0x1 #define MX6QDL_PAD_EIM_D29__UART2_CTS_B 0x0c8 0x3dc 0x000 0x4 0x0 -#define MX6QDL_PAD_EIM_D29__UART2_DTE_RTS_B 0x0c4 0x3dc 0x000 0x4 0x0 -#define MX6QDL_PAD_EIM_D29__UART2_DTE_CTS_B 0x0c4 0x3dc 0x924 0x4 0x1 +#define MX6QDL_PAD_EIM_D29__UART2_DTE_RTS_B 0x0c8 0x3dc 0x000 0x4 0x0 +#define MX6QDL_PAD_EIM_D29__UART2_DTE_CTS_B 0x0c8 0x3dc 0x924 0x4 0x1 #define MX6QDL_PAD_EIM_D29__GPIO3_IO29 0x0c8 0x3dc 0x000 0x5 0x0 #define MX6QDL_PAD_EIM_D29__IPU2_CSI1_VSYNC 0x0c8 0x3dc 0x8e4 0x6 0x0 #define MX6QDL_PAD_EIM_D29__IPU1_DI0_PIN14 0x0c8 0x3dc 0x000 0x7 0x0 diff --git a/arch/arm/boot/dts/omap3-beagle-xm.dts b/arch/arm/boot/dts/omap3-beagle-xm.dts index afdb16417d4e..0c514dc8460c 100644 --- a/arch/arm/boot/dts/omap3-beagle-xm.dts +++ b/arch/arm/boot/dts/omap3-beagle-xm.dts @@ -11,7 +11,7 @@ / { model = "TI OMAP3 BeagleBoard xM"; - compatible = "ti,omap3-beagle-xm, ti,omap3-beagle", "ti,omap3"; + compatible = "ti,omap3-beagle-xm", "ti,omap3-beagle", "ti,omap3"; cpus { cpu@0 { diff --git a/arch/arm/boot/dts/omap3-igep.dtsi b/arch/arm/boot/dts/omap3-igep.dtsi index bc48b114eae6..2326d11462a5 100644 --- a/arch/arm/boot/dts/omap3-igep.dtsi +++ b/arch/arm/boot/dts/omap3-igep.dtsi @@ -48,6 +48,15 @@ >; }; + mcbsp2_pins: pinmux_mcbsp2_pins { + pinctrl-single,pins = < + 0x10c (PIN_INPUT | MUX_MODE0) /* mcbsp2_fsx.mcbsp2_fsx */ + 0x10e (PIN_INPUT | MUX_MODE0) /* mcbsp2_clkx.mcbsp2_clkx */ + 0x110 (PIN_INPUT | MUX_MODE0) /* mcbsp2_dr.mcbsp2.dr */ + 0x112 (PIN_OUTPUT | MUX_MODE0) /* mcbsp2_dx.mcbsp2_dx */ + >; + }; + mmc1_pins: pinmux_mmc1_pins { pinctrl-single,pins = < 0x114 (PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc1_clk.sdmmc1_clk */ @@ -93,6 +102,11 @@ clock-frequency = <400000>; }; +&mcbsp2 { + pinctrl-names = "default"; + pinctrl-0 = <&mcbsp2_pins>; +}; + &mmc1 { pinctrl-names = "default"; pinctrl-0 = <&mmc1_pins>; diff --git a/arch/arm/boot/dts/omap4-panda-common.dtsi b/arch/arm/boot/dts/omap4-panda-common.dtsi index faa95b5b242e..814ab67c8c29 100644 --- a/arch/arm/boot/dts/omap4-panda-common.dtsi +++ b/arch/arm/boot/dts/omap4-panda-common.dtsi @@ -107,6 +107,19 @@ */ clock-frequency = <19200000>; }; + + /* regulator for wl12xx on sdio5 */ + wl12xx_vmmc: wl12xx_vmmc { + pinctrl-names = "default"; + pinctrl-0 = <&wl12xx_gpio>; + compatible = "regulator-fixed"; + regulator-name = "vwl1271"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + gpio = <&gpio2 11 0>; + startup-delay-us = <70000>; + enable-active-high; + }; }; &omap4_pmx_wkup { @@ -235,6 +248,33 @@ 0x1c (PIN_OUTPUT | MUX_MODE3) /* gpio_wk8 */ >; }; + + /* + * wl12xx GPIO outputs for WLAN_EN, BT_EN, FM_EN, BT_WAKEUP + * REVISIT: Are the pull-ups needed for GPIO 48 and 49? + */ + wl12xx_gpio: pinmux_wl12xx_gpio { + pinctrl-single,pins = < + 0x26 (PIN_OUTPUT | MUX_MODE3) /* gpmc_a19.gpio_43 */ + 0x2c (PIN_OUTPUT | MUX_MODE3) /* gpmc_a22.gpio_46 */ + 0x30 (PIN_OUTPUT_PULLUP | MUX_MODE3) /* gpmc_a24.gpio_48 */ + 0x32 (PIN_OUTPUT_PULLUP | MUX_MODE3) /* gpmc_a25.gpio_49 */ + >; + }; + + /* wl12xx GPIO inputs and SDIO pins */ + wl12xx_pins: pinmux_wl12xx_pins { + pinctrl-single,pins = < + 0x38 (PIN_INPUT | MUX_MODE3) /* gpmc_ncs2.gpio_52 */ + 0x3a (PIN_INPUT | MUX_MODE3) /* gpmc_ncs3.gpio_53 */ + 0x108 (PIN_OUTPUT | MUX_MODE0) /* sdmmc5_clk.sdmmc5_clk */ + 0x10a (PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc5_cmd.sdmmc5_cmd */ + 0x10c (PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc5_dat0.sdmmc5_dat0 */ + 0x10e (PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc5_dat1.sdmmc5_dat1 */ + 0x110 (PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc5_dat2.sdmmc5_dat2 */ + 0x112 (PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc5_dat3.sdmmc5_dat3 */ + >; + }; }; &i2c1 { @@ -314,8 +354,12 @@ }; &mmc5 { - ti,non-removable; + pinctrl-names = "default"; + pinctrl-0 = <&wl12xx_pins>; + vmmc-supply = <&wl12xx_vmmc>; + non-removable; bus-width = <4>; + cap-power-off-card; }; &emif1 { diff --git a/arch/arm/boot/dts/omap4-sdp.dts b/arch/arm/boot/dts/omap4-sdp.dts index 7951b4ea500a..4f78380ecdb8 100644 --- a/arch/arm/boot/dts/omap4-sdp.dts +++ b/arch/arm/boot/dts/omap4-sdp.dts @@ -140,6 +140,19 @@ "DMic", "Digital Mic", "Digital Mic", "Digital Mic1 Bias"; }; + + /* regulator for wl12xx on sdio5 */ + wl12xx_vmmc: wl12xx_vmmc { + pinctrl-names = "default"; + pinctrl-0 = <&wl12xx_gpio>; + compatible = "regulator-fixed"; + regulator-name = "vwl1271"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + gpio = <&gpio2 22 0>; + startup-delay-us = <70000>; + enable-active-high; + }; }; &omap4_pmx_wkup { @@ -295,6 +308,26 @@ 0xf0 (PIN_INPUT_PULLUP | MUX_MODE0) /* i2c4_sda */ >; }; + + /* wl12xx GPIO output for WLAN_EN */ + wl12xx_gpio: pinmux_wl12xx_gpio { + pinctrl-single,pins = < + 0x3c (PIN_OUTPUT | MUX_MODE3) /* gpmc_nwp.gpio_54 */ + >; + }; + + /* wl12xx GPIO inputs and SDIO pins */ + wl12xx_pins: pinmux_wl12xx_pins { + pinctrl-single,pins = < + 0x3a (PIN_INPUT | MUX_MODE3) /* gpmc_ncs3.gpio_53 */ + 0x108 (PIN_OUTPUT | MUX_MODE3) /* sdmmc5_clk.sdmmc5_clk */ + 0x10a (PIN_INPUT_PULLUP | MUX_MODE3) /* sdmmc5_cmd.sdmmc5_cmd */ + 0x10c (PIN_INPUT_PULLUP | MUX_MODE3) /* sdmmc5_dat0.sdmmc5_dat0 */ + 0x10e (PIN_INPUT_PULLUP | MUX_MODE3) /* sdmmc5_dat1.sdmmc5_dat1 */ + 0x110 (PIN_INPUT_PULLUP | MUX_MODE3) /* sdmmc5_dat2.sdmmc5_dat2 */ + 0x112 (PIN_INPUT_PULLUP | MUX_MODE3) /* sdmmc5_dat3.sdmmc5_dat3 */ + >; + }; }; &i2c1 { @@ -420,8 +453,12 @@ }; &mmc5 { + pinctrl-names = "default"; + pinctrl-0 = <&wl12xx_pins>; + vmmc-supply = <&wl12xx_vmmc>; + non-removable; bus-width = <4>; - ti,non-removable; + cap-power-off-card; }; &emif1 { diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi index 07be2cd7b318..7cdea1bfea09 100644 --- a/arch/arm/boot/dts/omap5.dtsi +++ b/arch/arm/boot/dts/omap5.dtsi @@ -637,7 +637,7 @@ omap_dwc3@4a020000 { compatible = "ti,dwc3"; ti,hwmods = "usb_otg_ss"; - reg = <0x4a020000 0x1000>; + reg = <0x4a020000 0x10000>; interrupts = <GIC_SPI 93 IRQ_TYPE_LEVEL_HIGH>; #address-cells = <1>; #size-cells = <1>; @@ -645,17 +645,18 @@ ranges; dwc3@4a030000 { compatible = "snps,dwc3"; - reg = <0x4a030000 0x1000>; + reg = <0x4a030000 0x10000>; interrupts = <GIC_SPI 92 IRQ_TYPE_LEVEL_HIGH>; usb-phy = <&usb2_phy>, <&usb3_phy>; tx-fifo-resize; }; }; - ocp2scp { + ocp2scp@4a080000 { compatible = "ti,omap-ocp2scp"; #address-cells = <1>; #size-cells = <1>; + reg = <0x4a080000 0x20>; ranges; ti,hwmods = "ocp2scp1"; usb2_phy: usb2phy@4a084000 { diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index 6e572c64cf5a..f3935b46df29 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -36,6 +36,7 @@ CONFIG_ARCH_TEGRA_114_SOC=y CONFIG_TEGRA_PCI=y CONFIG_TEGRA_EMC_SCALING_ENABLE=y CONFIG_ARCH_U8500=y +CONFIG_MACH_HREFV60=y CONFIG_MACH_SNOWBALL=y CONFIG_MACH_UX500_DT=y CONFIG_ARCH_VEXPRESS=y @@ -46,6 +47,7 @@ CONFIG_ARCH_ZYNQ=y CONFIG_SMP=y CONFIG_HIGHPTE=y CONFIG_ARM_APPENDED_DTB=y +CONFIG_ARM_ATAG_DTB_COMPAT=y CONFIG_NET=y CONFIG_UNIX=y CONFIG_INET=y diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index a2c83851bc90..81cda39860c5 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -3,7 +3,17 @@ # obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o +obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o -aes-arm-y := aes-armv4.o aes_glue.o -sha1-arm-y := sha1-armv4-large.o sha1_glue.o +aes-arm-y := aes-armv4.o aes_glue.o +aes-arm-bs-y := aesbs-core.o aesbs-glue.o +sha1-arm-y := sha1-armv4-large.o sha1_glue.o + +quiet_cmd_perl = PERL $@ + cmd_perl = $(PERL) $(<) > $(@) + +$(src)/aesbs-core.S_shipped: $(src)/bsaes-armv7.pl + $(call cmd,perl) + +.PRECIOUS: $(obj)/aesbs-core.S diff --git a/arch/arm/crypto/aes-armv4.S b/arch/arm/crypto/aes-armv4.S index 19d6cd6f29f9..3a14ea8fe97e 100644 --- a/arch/arm/crypto/aes-armv4.S +++ b/arch/arm/crypto/aes-armv4.S @@ -148,7 +148,7 @@ AES_Te: @ const AES_KEY *key) { .align 5 ENTRY(AES_encrypt) - sub r3,pc,#8 @ AES_encrypt + adr r3,AES_encrypt stmdb sp!,{r1,r4-r12,lr} mov r12,r0 @ inp mov r11,r2 @@ -381,7 +381,7 @@ _armv4_AES_encrypt: .align 5 ENTRY(private_AES_set_encrypt_key) _armv4_AES_set_encrypt_key: - sub r3,pc,#8 @ AES_set_encrypt_key + adr r3,_armv4_AES_set_encrypt_key teq r0,#0 moveq r0,#-1 beq .Labrt @@ -843,7 +843,7 @@ AES_Td: @ const AES_KEY *key) { .align 5 ENTRY(AES_decrypt) - sub r3,pc,#8 @ AES_decrypt + adr r3,AES_decrypt stmdb sp!,{r1,r4-r12,lr} mov r12,r0 @ inp mov r11,r2 diff --git a/arch/arm/crypto/aes_glue.c b/arch/arm/crypto/aes_glue.c index 59f7877ead6a..3003fa1f6fb4 100644 --- a/arch/arm/crypto/aes_glue.c +++ b/arch/arm/crypto/aes_glue.c @@ -6,22 +6,12 @@ #include <linux/crypto.h> #include <crypto/aes.h> -#define AES_MAXNR 14 +#include "aes_glue.h" -typedef struct { - unsigned int rd_key[4 *(AES_MAXNR + 1)]; - int rounds; -} AES_KEY; - -struct AES_CTX { - AES_KEY enc_key; - AES_KEY dec_key; -}; - -asmlinkage void AES_encrypt(const u8 *in, u8 *out, AES_KEY *ctx); -asmlinkage void AES_decrypt(const u8 *in, u8 *out, AES_KEY *ctx); -asmlinkage int private_AES_set_decrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key); -asmlinkage int private_AES_set_encrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key); +EXPORT_SYMBOL(AES_encrypt); +EXPORT_SYMBOL(AES_decrypt); +EXPORT_SYMBOL(private_AES_set_encrypt_key); +EXPORT_SYMBOL(private_AES_set_decrypt_key); static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { @@ -81,7 +71,7 @@ static struct crypto_alg aes_alg = { .cipher = { .cia_min_keysize = AES_MIN_KEY_SIZE, .cia_max_keysize = AES_MAX_KEY_SIZE, - .cia_setkey = aes_set_key, + .cia_setkey = aes_set_key, .cia_encrypt = aes_encrypt, .cia_decrypt = aes_decrypt } diff --git a/arch/arm/crypto/aes_glue.h b/arch/arm/crypto/aes_glue.h new file mode 100644 index 000000000000..cca3e51eb606 --- /dev/null +++ b/arch/arm/crypto/aes_glue.h @@ -0,0 +1,19 @@ + +#define AES_MAXNR 14 + +struct AES_KEY { + unsigned int rd_key[4 * (AES_MAXNR + 1)]; + int rounds; +}; + +struct AES_CTX { + struct AES_KEY enc_key; + struct AES_KEY dec_key; +}; + +asmlinkage void AES_encrypt(const u8 *in, u8 *out, struct AES_KEY *ctx); +asmlinkage void AES_decrypt(const u8 *in, u8 *out, struct AES_KEY *ctx); +asmlinkage int private_AES_set_decrypt_key(const unsigned char *userKey, + const int bits, struct AES_KEY *key); +asmlinkage int private_AES_set_encrypt_key(const unsigned char *userKey, + const int bits, struct AES_KEY *key); diff --git a/arch/arm/crypto/aesbs-core.S_shipped b/arch/arm/crypto/aesbs-core.S_shipped new file mode 100644 index 000000000000..64205d453260 --- /dev/null +++ b/arch/arm/crypto/aesbs-core.S_shipped @@ -0,0 +1,2544 @@ + +@ ==================================================================== +@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL +@ project. The module is, however, dual licensed under OpenSSL and +@ CRYPTOGAMS licenses depending on where you obtain it. For further +@ details see http://www.openssl.org/~appro/cryptogams/. +@ +@ Specific modes and adaptation for Linux kernel by Ard Biesheuvel +@ <ard.biesheuvel@linaro.org>. Permission to use under GPL terms is +@ granted. +@ ==================================================================== + +@ Bit-sliced AES for ARM NEON +@ +@ February 2012. +@ +@ This implementation is direct adaptation of bsaes-x86_64 module for +@ ARM NEON. Except that this module is endian-neutral [in sense that +@ it can be compiled for either endianness] by courtesy of vld1.8's +@ neutrality. Initial version doesn't implement interface to OpenSSL, +@ only low-level primitives and unsupported entry points, just enough +@ to collect performance results, which for Cortex-A8 core are: +@ +@ encrypt 19.5 cycles per byte processed with 128-bit key +@ decrypt 22.1 cycles per byte processed with 128-bit key +@ key conv. 440 cycles per 128-bit key/0.18 of 8x block +@ +@ Snapdragon S4 encrypts byte in 17.6 cycles and decrypts in 19.7, +@ which is [much] worse than anticipated (for further details see +@ http://www.openssl.org/~appro/Snapdragon-S4.html). +@ +@ Cortex-A15 manages in 14.2/16.1 cycles [when integer-only code +@ manages in 20.0 cycles]. +@ +@ When comparing to x86_64 results keep in mind that NEON unit is +@ [mostly] single-issue and thus can't [fully] benefit from +@ instruction-level parallelism. And when comparing to aes-armv4 +@ results keep in mind key schedule conversion overhead (see +@ bsaes-x86_64.pl for further details)... +@ +@ <appro@openssl.org> + +@ April-August 2013 +@ +@ Add CBC, CTR and XTS subroutines, adapt for kernel use. +@ +@ <ard.biesheuvel@linaro.org> + +#ifndef __KERNEL__ +# include "arm_arch.h" + +# define VFP_ABI_PUSH vstmdb sp!,{d8-d15} +# define VFP_ABI_POP vldmia sp!,{d8-d15} +# define VFP_ABI_FRAME 0x40 +#else +# define VFP_ABI_PUSH +# define VFP_ABI_POP +# define VFP_ABI_FRAME 0 +# define BSAES_ASM_EXTENDED_KEY +# define XTS_CHAIN_TWEAK +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +#endif + +#ifdef __thumb__ +# define adrl adr +#endif + +#if __ARM_ARCH__>=7 +.text +.syntax unified @ ARMv7-capable assembler is expected to handle this +#ifdef __thumb2__ +.thumb +#else +.code 32 +#endif + +.fpu neon + +.type _bsaes_decrypt8,%function +.align 4 +_bsaes_decrypt8: + adr r6,_bsaes_decrypt8 + vldmia r4!, {q9} @ round 0 key + add r6,r6,#.LM0ISR-_bsaes_decrypt8 + + vldmia r6!, {q8} @ .LM0ISR + veor q10, q0, q9 @ xor with round0 key + veor q11, q1, q9 + vtbl.8 d0, {q10}, d16 + vtbl.8 d1, {q10}, d17 + veor q12, q2, q9 + vtbl.8 d2, {q11}, d16 + vtbl.8 d3, {q11}, d17 + veor q13, q3, q9 + vtbl.8 d4, {q12}, d16 + vtbl.8 d5, {q12}, d17 + veor q14, q4, q9 + vtbl.8 d6, {q13}, d16 + vtbl.8 d7, {q13}, d17 + veor q15, q5, q9 + vtbl.8 d8, {q14}, d16 + vtbl.8 d9, {q14}, d17 + veor q10, q6, q9 + vtbl.8 d10, {q15}, d16 + vtbl.8 d11, {q15}, d17 + veor q11, q7, q9 + vtbl.8 d12, {q10}, d16 + vtbl.8 d13, {q10}, d17 + vtbl.8 d14, {q11}, d16 + vtbl.8 d15, {q11}, d17 + vmov.i8 q8,#0x55 @ compose .LBS0 + vmov.i8 q9,#0x33 @ compose .LBS1 + vshr.u64 q10, q6, #1 + vshr.u64 q11, q4, #1 + veor q10, q10, q7 + veor q11, q11, q5 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #1 + veor q5, q5, q11 + vshl.u64 q11, q11, #1 + veor q6, q6, q10 + veor q4, q4, q11 + vshr.u64 q10, q2, #1 + vshr.u64 q11, q0, #1 + veor q10, q10, q3 + veor q11, q11, q1 + vand q10, q10, q8 + vand q11, q11, q8 + veor q3, q3, q10 + vshl.u64 q10, q10, #1 + veor q1, q1, q11 + vshl.u64 q11, q11, #1 + veor q2, q2, q10 + veor q0, q0, q11 + vmov.i8 q8,#0x0f @ compose .LBS2 + vshr.u64 q10, q5, #2 + vshr.u64 q11, q4, #2 + veor q10, q10, q7 + veor q11, q11, q6 + vand q10, q10, q9 + vand q11, q11, q9 + veor q7, q7, q10 + vshl.u64 q10, q10, #2 + veor q6, q6, q11 + vshl.u64 q11, q11, #2 + veor q5, q5, q10 + veor q4, q4, q11 + vshr.u64 q10, q1, #2 + vshr.u64 q11, q0, #2 + veor q10, q10, q3 + veor q11, q11, q2 + vand q10, q10, q9 + vand q11, q11, q9 + veor q3, q3, q10 + vshl.u64 q10, q10, #2 + veor q2, q2, q11 + vshl.u64 q11, q11, #2 + veor q1, q1, q10 + veor q0, q0, q11 + vshr.u64 q10, q3, #4 + vshr.u64 q11, q2, #4 + veor q10, q10, q7 + veor q11, q11, q6 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #4 + veor q6, q6, q11 + vshl.u64 q11, q11, #4 + veor q3, q3, q10 + veor q2, q2, q11 + vshr.u64 q10, q1, #4 + vshr.u64 q11, q0, #4 + veor q10, q10, q5 + veor q11, q11, q4 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #4 + veor q4, q4, q11 + vshl.u64 q11, q11, #4 + veor q1, q1, q10 + veor q0, q0, q11 + sub r5,r5,#1 + b .Ldec_sbox +.align 4 +.Ldec_loop: + vldmia r4!, {q8-q11} + veor q8, q8, q0 + veor q9, q9, q1 + vtbl.8 d0, {q8}, d24 + vtbl.8 d1, {q8}, d25 + vldmia r4!, {q8} + veor q10, q10, q2 + vtbl.8 d2, {q9}, d24 + vtbl.8 d3, {q9}, d25 + vldmia r4!, {q9} + veor q11, q11, q3 + vtbl.8 d4, {q10}, d24 + vtbl.8 d5, {q10}, d25 + vldmia r4!, {q10} + vtbl.8 d6, {q11}, d24 + vtbl.8 d7, {q11}, d25 + vldmia r4!, {q11} + veor q8, q8, q4 + veor q9, q9, q5 + vtbl.8 d8, {q8}, d24 + vtbl.8 d9, {q8}, d25 + veor q10, q10, q6 + vtbl.8 d10, {q9}, d24 + vtbl.8 d11, {q9}, d25 + veor q11, q11, q7 + vtbl.8 d12, {q10}, d24 + vtbl.8 d13, {q10}, d25 + vtbl.8 d14, {q11}, d24 + vtbl.8 d15, {q11}, d25 +.Ldec_sbox: + veor q1, q1, q4 + veor q3, q3, q4 + + veor q4, q4, q7 + veor q1, q1, q6 + veor q2, q2, q7 + veor q6, q6, q4 + + veor q0, q0, q1 + veor q2, q2, q5 + veor q7, q7, q6 + veor q3, q3, q0 + veor q5, q5, q0 + veor q1, q1, q3 + veor q11, q3, q0 + veor q10, q7, q4 + veor q9, q1, q6 + veor q13, q4, q0 + vmov q8, q10 + veor q12, q5, q2 + + vorr q10, q10, q9 + veor q15, q11, q8 + vand q14, q11, q12 + vorr q11, q11, q12 + veor q12, q12, q9 + vand q8, q8, q9 + veor q9, q6, q2 + vand q15, q15, q12 + vand q13, q13, q9 + veor q9, q3, q7 + veor q12, q1, q5 + veor q11, q11, q13 + veor q10, q10, q13 + vand q13, q9, q12 + vorr q9, q9, q12 + veor q11, q11, q15 + veor q8, q8, q13 + veor q10, q10, q14 + veor q9, q9, q15 + veor q8, q8, q14 + vand q12, q4, q6 + veor q9, q9, q14 + vand q13, q0, q2 + vand q14, q7, q1 + vorr q15, q3, q5 + veor q11, q11, q12 + veor q9, q9, q14 + veor q8, q8, q15 + veor q10, q10, q13 + + @ Inv_GF16 0, 1, 2, 3, s0, s1, s2, s3 + + @ new smaller inversion + + vand q14, q11, q9 + vmov q12, q8 + + veor q13, q10, q14 + veor q15, q8, q14 + veor q14, q8, q14 @ q14=q15 + + vbsl q13, q9, q8 + vbsl q15, q11, q10 + veor q11, q11, q10 + + vbsl q12, q13, q14 + vbsl q8, q14, q13 + + vand q14, q12, q15 + veor q9, q9, q8 + + veor q14, q14, q11 + veor q12, q5, q2 + veor q8, q1, q6 + veor q10, q15, q14 + vand q10, q10, q5 + veor q5, q5, q1 + vand q11, q1, q15 + vand q5, q5, q14 + veor q1, q11, q10 + veor q5, q5, q11 + veor q15, q15, q13 + veor q14, q14, q9 + veor q11, q15, q14 + veor q10, q13, q9 + vand q11, q11, q12 + vand q10, q10, q2 + veor q12, q12, q8 + veor q2, q2, q6 + vand q8, q8, q15 + vand q6, q6, q13 + vand q12, q12, q14 + vand q2, q2, q9 + veor q8, q8, q12 + veor q2, q2, q6 + veor q12, q12, q11 + veor q6, q6, q10 + veor q5, q5, q12 + veor q2, q2, q12 + veor q1, q1, q8 + veor q6, q6, q8 + + veor q12, q3, q0 + veor q8, q7, q4 + veor q11, q15, q14 + veor q10, q13, q9 + vand q11, q11, q12 + vand q10, q10, q0 + veor q12, q12, q8 + veor q0, q0, q4 + vand q8, q8, q15 + vand q4, q4, q13 + vand q12, q12, q14 + vand q0, q0, q9 + veor q8, q8, q12 + veor q0, q0, q4 + veor q12, q12, q11 + veor q4, q4, q10 + veor q15, q15, q13 + veor q14, q14, q9 + veor q10, q15, q14 + vand q10, q10, q3 + veor q3, q3, q7 + vand q11, q7, q15 + vand q3, q3, q14 + veor q7, q11, q10 + veor q3, q3, q11 + veor q3, q3, q12 + veor q0, q0, q12 + veor q7, q7, q8 + veor q4, q4, q8 + veor q1, q1, q7 + veor q6, q6, q5 + + veor q4, q4, q1 + veor q2, q2, q7 + veor q5, q5, q7 + veor q4, q4, q2 + veor q7, q7, q0 + veor q4, q4, q5 + veor q3, q3, q6 + veor q6, q6, q1 + veor q3, q3, q4 + + veor q4, q4, q0 + veor q7, q7, q3 + subs r5,r5,#1 + bcc .Ldec_done + @ multiplication by 0x05-0x00-0x04-0x00 + vext.8 q8, q0, q0, #8 + vext.8 q14, q3, q3, #8 + vext.8 q15, q5, q5, #8 + veor q8, q8, q0 + vext.8 q9, q1, q1, #8 + veor q14, q14, q3 + vext.8 q10, q6, q6, #8 + veor q15, q15, q5 + vext.8 q11, q4, q4, #8 + veor q9, q9, q1 + vext.8 q12, q2, q2, #8 + veor q10, q10, q6 + vext.8 q13, q7, q7, #8 + veor q11, q11, q4 + veor q12, q12, q2 + veor q13, q13, q7 + + veor q0, q0, q14 + veor q1, q1, q14 + veor q6, q6, q8 + veor q2, q2, q10 + veor q4, q4, q9 + veor q1, q1, q15 + veor q6, q6, q15 + veor q2, q2, q14 + veor q7, q7, q11 + veor q4, q4, q14 + veor q3, q3, q12 + veor q2, q2, q15 + veor q7, q7, q15 + veor q5, q5, q13 + vext.8 q8, q0, q0, #12 @ x0 <<< 32 + vext.8 q9, q1, q1, #12 + veor q0, q0, q8 @ x0 ^ (x0 <<< 32) + vext.8 q10, q6, q6, #12 + veor q1, q1, q9 + vext.8 q11, q4, q4, #12 + veor q6, q6, q10 + vext.8 q12, q2, q2, #12 + veor q4, q4, q11 + vext.8 q13, q7, q7, #12 + veor q2, q2, q12 + vext.8 q14, q3, q3, #12 + veor q7, q7, q13 + vext.8 q15, q5, q5, #12 + veor q3, q3, q14 + + veor q9, q9, q0 + veor q5, q5, q15 + vext.8 q0, q0, q0, #8 @ (x0 ^ (x0 <<< 32)) <<< 64) + veor q10, q10, q1 + veor q8, q8, q5 + veor q9, q9, q5 + vext.8 q1, q1, q1, #8 + veor q13, q13, q2 + veor q0, q0, q8 + veor q14, q14, q7 + veor q1, q1, q9 + vext.8 q8, q2, q2, #8 + veor q12, q12, q4 + vext.8 q9, q7, q7, #8 + veor q15, q15, q3 + vext.8 q2, q4, q4, #8 + veor q11, q11, q6 + vext.8 q7, q5, q5, #8 + veor q12, q12, q5 + vext.8 q4, q3, q3, #8 + veor q11, q11, q5 + vext.8 q3, q6, q6, #8 + veor q5, q9, q13 + veor q11, q11, q2 + veor q7, q7, q15 + veor q6, q4, q14 + veor q4, q8, q12 + veor q2, q3, q10 + vmov q3, q11 + @ vmov q5, q9 + vldmia r6, {q12} @ .LISR + ite eq @ Thumb2 thing, sanity check in ARM + addeq r6,r6,#0x10 + bne .Ldec_loop + vldmia r6, {q12} @ .LISRM0 + b .Ldec_loop +.align 4 +.Ldec_done: + vmov.i8 q8,#0x55 @ compose .LBS0 + vmov.i8 q9,#0x33 @ compose .LBS1 + vshr.u64 q10, q3, #1 + vshr.u64 q11, q2, #1 + veor q10, q10, q5 + veor q11, q11, q7 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #1 + veor q7, q7, q11 + vshl.u64 q11, q11, #1 + veor q3, q3, q10 + veor q2, q2, q11 + vshr.u64 q10, q6, #1 + vshr.u64 q11, q0, #1 + veor q10, q10, q4 + veor q11, q11, q1 + vand q10, q10, q8 + vand q11, q11, q8 + veor q4, q4, q10 + vshl.u64 q10, q10, #1 + veor q1, q1, q11 + vshl.u64 q11, q11, #1 + veor q6, q6, q10 + veor q0, q0, q11 + vmov.i8 q8,#0x0f @ compose .LBS2 + vshr.u64 q10, q7, #2 + vshr.u64 q11, q2, #2 + veor q10, q10, q5 + veor q11, q11, q3 + vand q10, q10, q9 + vand q11, q11, q9 + veor q5, q5, q10 + vshl.u64 q10, q10, #2 + veor q3, q3, q11 + vshl.u64 q11, q11, #2 + veor q7, q7, q10 + veor q2, q2, q11 + vshr.u64 q10, q1, #2 + vshr.u64 q11, q0, #2 + veor q10, q10, q4 + veor q11, q11, q6 + vand q10, q10, q9 + vand q11, q11, q9 + veor q4, q4, q10 + vshl.u64 q10, q10, #2 + veor q6, q6, q11 + vshl.u64 q11, q11, #2 + veor q1, q1, q10 + veor q0, q0, q11 + vshr.u64 q10, q4, #4 + vshr.u64 q11, q6, #4 + veor q10, q10, q5 + veor q11, q11, q3 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #4 + veor q3, q3, q11 + vshl.u64 q11, q11, #4 + veor q4, q4, q10 + veor q6, q6, q11 + vshr.u64 q10, q1, #4 + vshr.u64 q11, q0, #4 + veor q10, q10, q7 + veor q11, q11, q2 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #4 + veor q2, q2, q11 + vshl.u64 q11, q11, #4 + veor q1, q1, q10 + veor q0, q0, q11 + vldmia r4, {q8} @ last round key + veor q6, q6, q8 + veor q4, q4, q8 + veor q2, q2, q8 + veor q7, q7, q8 + veor q3, q3, q8 + veor q5, q5, q8 + veor q0, q0, q8 + veor q1, q1, q8 + bx lr +.size _bsaes_decrypt8,.-_bsaes_decrypt8 + +.type _bsaes_const,%object +.align 6 +_bsaes_const: +.LM0ISR: @ InvShiftRows constants + .quad 0x0a0e0206070b0f03, 0x0004080c0d010509 +.LISR: + .quad 0x0504070602010003, 0x0f0e0d0c080b0a09 +.LISRM0: + .quad 0x01040b0e0205080f, 0x0306090c00070a0d +.LM0SR: @ ShiftRows constants + .quad 0x0a0e02060f03070b, 0x0004080c05090d01 +.LSR: + .quad 0x0504070600030201, 0x0f0e0d0c0a09080b +.LSRM0: + .quad 0x0304090e00050a0f, 0x01060b0c0207080d +.LM0: + .quad 0x02060a0e03070b0f, 0x0004080c0105090d +.LREVM0SR: + .quad 0x090d01050c000408, 0x03070b0f060a0e02 +.asciz "Bit-sliced AES for NEON, CRYPTOGAMS by <appro@openssl.org>" +.align 6 +.size _bsaes_const,.-_bsaes_const + +.type _bsaes_encrypt8,%function +.align 4 +_bsaes_encrypt8: + adr r6,_bsaes_encrypt8 + vldmia r4!, {q9} @ round 0 key + sub r6,r6,#_bsaes_encrypt8-.LM0SR + + vldmia r6!, {q8} @ .LM0SR +_bsaes_encrypt8_alt: + veor q10, q0, q9 @ xor with round0 key + veor q11, q1, q9 + vtbl.8 d0, {q10}, d16 + vtbl.8 d1, {q10}, d17 + veor q12, q2, q9 + vtbl.8 d2, {q11}, d16 + vtbl.8 d3, {q11}, d17 + veor q13, q3, q9 + vtbl.8 d4, {q12}, d16 + vtbl.8 d5, {q12}, d17 + veor q14, q4, q9 + vtbl.8 d6, {q13}, d16 + vtbl.8 d7, {q13}, d17 + veor q15, q5, q9 + vtbl.8 d8, {q14}, d16 + vtbl.8 d9, {q14}, d17 + veor q10, q6, q9 + vtbl.8 d10, {q15}, d16 + vtbl.8 d11, {q15}, d17 + veor q11, q7, q9 + vtbl.8 d12, {q10}, d16 + vtbl.8 d13, {q10}, d17 + vtbl.8 d14, {q11}, d16 + vtbl.8 d15, {q11}, d17 +_bsaes_encrypt8_bitslice: + vmov.i8 q8,#0x55 @ compose .LBS0 + vmov.i8 q9,#0x33 @ compose .LBS1 + vshr.u64 q10, q6, #1 + vshr.u64 q11, q4, #1 + veor q10, q10, q7 + veor q11, q11, q5 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #1 + veor q5, q5, q11 + vshl.u64 q11, q11, #1 + veor q6, q6, q10 + veor q4, q4, q11 + vshr.u64 q10, q2, #1 + vshr.u64 q11, q0, #1 + veor q10, q10, q3 + veor q11, q11, q1 + vand q10, q10, q8 + vand q11, q11, q8 + veor q3, q3, q10 + vshl.u64 q10, q10, #1 + veor q1, q1, q11 + vshl.u64 q11, q11, #1 + veor q2, q2, q10 + veor q0, q0, q11 + vmov.i8 q8,#0x0f @ compose .LBS2 + vshr.u64 q10, q5, #2 + vshr.u64 q11, q4, #2 + veor q10, q10, q7 + veor q11, q11, q6 + vand q10, q10, q9 + vand q11, q11, q9 + veor q7, q7, q10 + vshl.u64 q10, q10, #2 + veor q6, q6, q11 + vshl.u64 q11, q11, #2 + veor q5, q5, q10 + veor q4, q4, q11 + vshr.u64 q10, q1, #2 + vshr.u64 q11, q0, #2 + veor q10, q10, q3 + veor q11, q11, q2 + vand q10, q10, q9 + vand q11, q11, q9 + veor q3, q3, q10 + vshl.u64 q10, q10, #2 + veor q2, q2, q11 + vshl.u64 q11, q11, #2 + veor q1, q1, q10 + veor q0, q0, q11 + vshr.u64 q10, q3, #4 + vshr.u64 q11, q2, #4 + veor q10, q10, q7 + veor q11, q11, q6 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #4 + veor q6, q6, q11 + vshl.u64 q11, q11, #4 + veor q3, q3, q10 + veor q2, q2, q11 + vshr.u64 q10, q1, #4 + vshr.u64 q11, q0, #4 + veor q10, q10, q5 + veor q11, q11, q4 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #4 + veor q4, q4, q11 + vshl.u64 q11, q11, #4 + veor q1, q1, q10 + veor q0, q0, q11 + sub r5,r5,#1 + b .Lenc_sbox +.align 4 +.Lenc_loop: + vldmia r4!, {q8-q11} + veor q8, q8, q0 + veor q9, q9, q1 + vtbl.8 d0, {q8}, d24 + vtbl.8 d1, {q8}, d25 + vldmia r4!, {q8} + veor q10, q10, q2 + vtbl.8 d2, {q9}, d24 + vtbl.8 d3, {q9}, d25 + vldmia r4!, {q9} + veor q11, q11, q3 + vtbl.8 d4, {q10}, d24 + vtbl.8 d5, {q10}, d25 + vldmia r4!, {q10} + vtbl.8 d6, {q11}, d24 + vtbl.8 d7, {q11}, d25 + vldmia r4!, {q11} + veor q8, q8, q4 + veor q9, q9, q5 + vtbl.8 d8, {q8}, d24 + vtbl.8 d9, {q8}, d25 + veor q10, q10, q6 + vtbl.8 d10, {q9}, d24 + vtbl.8 d11, {q9}, d25 + veor q11, q11, q7 + vtbl.8 d12, {q10}, d24 + vtbl.8 d13, {q10}, d25 + vtbl.8 d14, {q11}, d24 + vtbl.8 d15, {q11}, d25 +.Lenc_sbox: + veor q2, q2, q1 + veor q5, q5, q6 + veor q3, q3, q0 + veor q6, q6, q2 + veor q5, q5, q0 + + veor q6, q6, q3 + veor q3, q3, q7 + veor q7, q7, q5 + veor q3, q3, q4 + veor q4, q4, q5 + + veor q2, q2, q7 + veor q3, q3, q1 + veor q1, q1, q5 + veor q11, q7, q4 + veor q10, q1, q2 + veor q9, q5, q3 + veor q13, q2, q4 + vmov q8, q10 + veor q12, q6, q0 + + vorr q10, q10, q9 + veor q15, q11, q8 + vand q14, q11, q12 + vorr q11, q11, q12 + veor q12, q12, q9 + vand q8, q8, q9 + veor q9, q3, q0 + vand q15, q15, q12 + vand q13, q13, q9 + veor q9, q7, q1 + veor q12, q5, q6 + veor q11, q11, q13 + veor q10, q10, q13 + vand q13, q9, q12 + vorr q9, q9, q12 + veor q11, q11, q15 + veor q8, q8, q13 + veor q10, q10, q14 + veor q9, q9, q15 + veor q8, q8, q14 + vand q12, q2, q3 + veor q9, q9, q14 + vand q13, q4, q0 + vand q14, q1, q5 + vorr q15, q7, q6 + veor q11, q11, q12 + veor q9, q9, q14 + veor q8, q8, q15 + veor q10, q10, q13 + + @ Inv_GF16 0, 1, 2, 3, s0, s1, s2, s3 + + @ new smaller inversion + + vand q14, q11, q9 + vmov q12, q8 + + veor q13, q10, q14 + veor q15, q8, q14 + veor q14, q8, q14 @ q14=q15 + + vbsl q13, q9, q8 + vbsl q15, q11, q10 + veor q11, q11, q10 + + vbsl q12, q13, q14 + vbsl q8, q14, q13 + + vand q14, q12, q15 + veor q9, q9, q8 + + veor q14, q14, q11 + veor q12, q6, q0 + veor q8, q5, q3 + veor q10, q15, q14 + vand q10, q10, q6 + veor q6, q6, q5 + vand q11, q5, q15 + vand q6, q6, q14 + veor q5, q11, q10 + veor q6, q6, q11 + veor q15, q15, q13 + veor q14, q14, q9 + veor q11, q15, q14 + veor q10, q13, q9 + vand q11, q11, q12 + vand q10, q10, q0 + veor q12, q12, q8 + veor q0, q0, q3 + vand q8, q8, q15 + vand q3, q3, q13 + vand q12, q12, q14 + vand q0, q0, q9 + veor q8, q8, q12 + veor q0, q0, q3 + veor q12, q12, q11 + veor q3, q3, q10 + veor q6, q6, q12 + veor q0, q0, q12 + veor q5, q5, q8 + veor q3, q3, q8 + + veor q12, q7, q4 + veor q8, q1, q2 + veor q11, q15, q14 + veor q10, q13, q9 + vand q11, q11, q12 + vand q10, q10, q4 + veor q12, q12, q8 + veor q4, q4, q2 + vand q8, q8, q15 + vand q2, q2, q13 + vand q12, q12, q14 + vand q4, q4, q9 + veor q8, q8, q12 + veor q4, q4, q2 + veor q12, q12, q11 + veor q2, q2, q10 + veor q15, q15, q13 + veor q14, q14, q9 + veor q10, q15, q14 + vand q10, q10, q7 + veor q7, q7, q1 + vand q11, q1, q15 + vand q7, q7, q14 + veor q1, q11, q10 + veor q7, q7, q11 + veor q7, q7, q12 + veor q4, q4, q12 + veor q1, q1, q8 + veor q2, q2, q8 + veor q7, q7, q0 + veor q1, q1, q6 + veor q6, q6, q0 + veor q4, q4, q7 + veor q0, q0, q1 + + veor q1, q1, q5 + veor q5, q5, q2 + veor q2, q2, q3 + veor q3, q3, q5 + veor q4, q4, q5 + + veor q6, q6, q3 + subs r5,r5,#1 + bcc .Lenc_done + vext.8 q8, q0, q0, #12 @ x0 <<< 32 + vext.8 q9, q1, q1, #12 + veor q0, q0, q8 @ x0 ^ (x0 <<< 32) + vext.8 q10, q4, q4, #12 + veor q1, q1, q9 + vext.8 q11, q6, q6, #12 + veor q4, q4, q10 + vext.8 q12, q3, q3, #12 + veor q6, q6, q11 + vext.8 q13, q7, q7, #12 + veor q3, q3, q12 + vext.8 q14, q2, q2, #12 + veor q7, q7, q13 + vext.8 q15, q5, q5, #12 + veor q2, q2, q14 + + veor q9, q9, q0 + veor q5, q5, q15 + vext.8 q0, q0, q0, #8 @ (x0 ^ (x0 <<< 32)) <<< 64) + veor q10, q10, q1 + veor q8, q8, q5 + veor q9, q9, q5 + vext.8 q1, q1, q1, #8 + veor q13, q13, q3 + veor q0, q0, q8 + veor q14, q14, q7 + veor q1, q1, q9 + vext.8 q8, q3, q3, #8 + veor q12, q12, q6 + vext.8 q9, q7, q7, #8 + veor q15, q15, q2 + vext.8 q3, q6, q6, #8 + veor q11, q11, q4 + vext.8 q7, q5, q5, #8 + veor q12, q12, q5 + vext.8 q6, q2, q2, #8 + veor q11, q11, q5 + vext.8 q2, q4, q4, #8 + veor q5, q9, q13 + veor q4, q8, q12 + veor q3, q3, q11 + veor q7, q7, q15 + veor q6, q6, q14 + @ vmov q4, q8 + veor q2, q2, q10 + @ vmov q5, q9 + vldmia r6, {q12} @ .LSR + ite eq @ Thumb2 thing, samity check in ARM + addeq r6,r6,#0x10 + bne .Lenc_loop + vldmia r6, {q12} @ .LSRM0 + b .Lenc_loop +.align 4 +.Lenc_done: + vmov.i8 q8,#0x55 @ compose .LBS0 + vmov.i8 q9,#0x33 @ compose .LBS1 + vshr.u64 q10, q2, #1 + vshr.u64 q11, q3, #1 + veor q10, q10, q5 + veor q11, q11, q7 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #1 + veor q7, q7, q11 + vshl.u64 q11, q11, #1 + veor q2, q2, q10 + veor q3, q3, q11 + vshr.u64 q10, q4, #1 + vshr.u64 q11, q0, #1 + veor q10, q10, q6 + veor q11, q11, q1 + vand q10, q10, q8 + vand q11, q11, q8 + veor q6, q6, q10 + vshl.u64 q10, q10, #1 + veor q1, q1, q11 + vshl.u64 q11, q11, #1 + veor q4, q4, q10 + veor q0, q0, q11 + vmov.i8 q8,#0x0f @ compose .LBS2 + vshr.u64 q10, q7, #2 + vshr.u64 q11, q3, #2 + veor q10, q10, q5 + veor q11, q11, q2 + vand q10, q10, q9 + vand q11, q11, q9 + veor q5, q5, q10 + vshl.u64 q10, q10, #2 + veor q2, q2, q11 + vshl.u64 q11, q11, #2 + veor q7, q7, q10 + veor q3, q3, q11 + vshr.u64 q10, q1, #2 + vshr.u64 q11, q0, #2 + veor q10, q10, q6 + veor q11, q11, q4 + vand q10, q10, q9 + vand q11, q11, q9 + veor q6, q6, q10 + vshl.u64 q10, q10, #2 + veor q4, q4, q11 + vshl.u64 q11, q11, #2 + veor q1, q1, q10 + veor q0, q0, q11 + vshr.u64 q10, q6, #4 + vshr.u64 q11, q4, #4 + veor q10, q10, q5 + veor q11, q11, q2 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #4 + veor q2, q2, q11 + vshl.u64 q11, q11, #4 + veor q6, q6, q10 + veor q4, q4, q11 + vshr.u64 q10, q1, #4 + vshr.u64 q11, q0, #4 + veor q10, q10, q7 + veor q11, q11, q3 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #4 + veor q3, q3, q11 + vshl.u64 q11, q11, #4 + veor q1, q1, q10 + veor q0, q0, q11 + vldmia r4, {q8} @ last round key + veor q4, q4, q8 + veor q6, q6, q8 + veor q3, q3, q8 + veor q7, q7, q8 + veor q2, q2, q8 + veor q5, q5, q8 + veor q0, q0, q8 + veor q1, q1, q8 + bx lr +.size _bsaes_encrypt8,.-_bsaes_encrypt8 +.type _bsaes_key_convert,%function +.align 4 +_bsaes_key_convert: + adr r6,_bsaes_key_convert + vld1.8 {q7}, [r4]! @ load round 0 key + sub r6,r6,#_bsaes_key_convert-.LM0 + vld1.8 {q15}, [r4]! @ load round 1 key + + vmov.i8 q8, #0x01 @ bit masks + vmov.i8 q9, #0x02 + vmov.i8 q10, #0x04 + vmov.i8 q11, #0x08 + vmov.i8 q12, #0x10 + vmov.i8 q13, #0x20 + vldmia r6, {q14} @ .LM0 + +#ifdef __ARMEL__ + vrev32.8 q7, q7 + vrev32.8 q15, q15 +#endif + sub r5,r5,#1 + vstmia r12!, {q7} @ save round 0 key + b .Lkey_loop + +.align 4 +.Lkey_loop: + vtbl.8 d14,{q15},d28 + vtbl.8 d15,{q15},d29 + vmov.i8 q6, #0x40 + vmov.i8 q15, #0x80 + + vtst.8 q0, q7, q8 + vtst.8 q1, q7, q9 + vtst.8 q2, q7, q10 + vtst.8 q3, q7, q11 + vtst.8 q4, q7, q12 + vtst.8 q5, q7, q13 + vtst.8 q6, q7, q6 + vtst.8 q7, q7, q15 + vld1.8 {q15}, [r4]! @ load next round key + vmvn q0, q0 @ "pnot" + vmvn q1, q1 + vmvn q5, q5 + vmvn q6, q6 +#ifdef __ARMEL__ + vrev32.8 q15, q15 +#endif + subs r5,r5,#1 + vstmia r12!,{q0-q7} @ write bit-sliced round key + bne .Lkey_loop + + vmov.i8 q7,#0x63 @ compose .L63 + @ don't save last round key + bx lr +.size _bsaes_key_convert,.-_bsaes_key_convert +.extern AES_cbc_encrypt +.extern AES_decrypt + +.global bsaes_cbc_encrypt +.type bsaes_cbc_encrypt,%function +.align 5 +bsaes_cbc_encrypt: +#ifndef __KERNEL__ + cmp r2, #128 +#ifndef __thumb__ + blo AES_cbc_encrypt +#else + bhs 1f + b AES_cbc_encrypt +1: +#endif +#endif + + @ it is up to the caller to make sure we are called with enc == 0 + + mov ip, sp + stmdb sp!, {r4-r10, lr} + VFP_ABI_PUSH + ldr r8, [ip] @ IV is 1st arg on the stack + mov r2, r2, lsr#4 @ len in 16 byte blocks + sub sp, #0x10 @ scratch space to carry over the IV + mov r9, sp @ save sp + + ldr r10, [r3, #240] @ get # of rounds +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, r10, lsl#7 @ 128 bytes per inner round key + add r12, #96 @ sifze of bit-slices key schedule + + @ populate the key schedule + mov r4, r3 @ pass key + mov r5, r10 @ pass # of rounds + mov sp, r12 @ sp is sp + bl _bsaes_key_convert + vldmia sp, {q6} + vstmia r12, {q15} @ save last round key + veor q7, q7, q6 @ fix up round 0 key + vstmia sp, {q7} +#else + ldr r12, [r3, #244] + eors r12, #1 + beq 0f + + @ populate the key schedule + str r12, [r3, #244] + mov r4, r3 @ pass key + mov r5, r10 @ pass # of rounds + add r12, r3, #248 @ pass key schedule + bl _bsaes_key_convert + add r4, r3, #248 + vldmia r4, {q6} + vstmia r12, {q15} @ save last round key + veor q7, q7, q6 @ fix up round 0 key + vstmia r4, {q7} + +.align 2 +0: +#endif + + vld1.8 {q15}, [r8] @ load IV + b .Lcbc_dec_loop + +.align 4 +.Lcbc_dec_loop: + subs r2, r2, #0x8 + bmi .Lcbc_dec_loop_finish + + vld1.8 {q0-q1}, [r0]! @ load input + vld1.8 {q2-q3}, [r0]! +#ifndef BSAES_ASM_EXTENDED_KEY + mov r4, sp @ pass the key +#else + add r4, r3, #248 +#endif + vld1.8 {q4-q5}, [r0]! + mov r5, r10 + vld1.8 {q6-q7}, [r0] + sub r0, r0, #0x60 + vstmia r9, {q15} @ put aside IV + + bl _bsaes_decrypt8 + + vldmia r9, {q14} @ reload IV + vld1.8 {q8-q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q10-q11}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vld1.8 {q12-q13}, [r0]! + veor q4, q4, q10 + veor q2, q2, q11 + vld1.8 {q14-q15}, [r0]! + veor q7, q7, q12 + vst1.8 {q0-q1}, [r1]! @ write output + veor q3, q3, q13 + vst1.8 {q6}, [r1]! + veor q5, q5, q14 + vst1.8 {q4}, [r1]! + vst1.8 {q2}, [r1]! + vst1.8 {q7}, [r1]! + vst1.8 {q3}, [r1]! + vst1.8 {q5}, [r1]! + + b .Lcbc_dec_loop + +.Lcbc_dec_loop_finish: + adds r2, r2, #8 + beq .Lcbc_dec_done + + vld1.8 {q0}, [r0]! @ load input + cmp r2, #2 + blo .Lcbc_dec_one + vld1.8 {q1}, [r0]! +#ifndef BSAES_ASM_EXTENDED_KEY + mov r4, sp @ pass the key +#else + add r4, r3, #248 +#endif + mov r5, r10 + vstmia r9, {q15} @ put aside IV + beq .Lcbc_dec_two + vld1.8 {q2}, [r0]! + cmp r2, #4 + blo .Lcbc_dec_three + vld1.8 {q3}, [r0]! + beq .Lcbc_dec_four + vld1.8 {q4}, [r0]! + cmp r2, #6 + blo .Lcbc_dec_five + vld1.8 {q5}, [r0]! + beq .Lcbc_dec_six + vld1.8 {q6}, [r0]! + sub r0, r0, #0x70 + + bl _bsaes_decrypt8 + + vldmia r9, {q14} @ reload IV + vld1.8 {q8-q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q10-q11}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vld1.8 {q12-q13}, [r0]! + veor q4, q4, q10 + veor q2, q2, q11 + vld1.8 {q15}, [r0]! + veor q7, q7, q12 + vst1.8 {q0-q1}, [r1]! @ write output + veor q3, q3, q13 + vst1.8 {q6}, [r1]! + vst1.8 {q4}, [r1]! + vst1.8 {q2}, [r1]! + vst1.8 {q7}, [r1]! + vst1.8 {q3}, [r1]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_six: + sub r0, r0, #0x60 + bl _bsaes_decrypt8 + vldmia r9,{q14} @ reload IV + vld1.8 {q8-q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q10-q11}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vld1.8 {q12}, [r0]! + veor q4, q4, q10 + veor q2, q2, q11 + vld1.8 {q15}, [r0]! + veor q7, q7, q12 + vst1.8 {q0-q1}, [r1]! @ write output + vst1.8 {q6}, [r1]! + vst1.8 {q4}, [r1]! + vst1.8 {q2}, [r1]! + vst1.8 {q7}, [r1]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_five: + sub r0, r0, #0x50 + bl _bsaes_decrypt8 + vldmia r9, {q14} @ reload IV + vld1.8 {q8-q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q10-q11}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vld1.8 {q15}, [r0]! + veor q4, q4, q10 + vst1.8 {q0-q1}, [r1]! @ write output + veor q2, q2, q11 + vst1.8 {q6}, [r1]! + vst1.8 {q4}, [r1]! + vst1.8 {q2}, [r1]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_four: + sub r0, r0, #0x40 + bl _bsaes_decrypt8 + vldmia r9, {q14} @ reload IV + vld1.8 {q8-q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q10}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vld1.8 {q15}, [r0]! + veor q4, q4, q10 + vst1.8 {q0-q1}, [r1]! @ write output + vst1.8 {q6}, [r1]! + vst1.8 {q4}, [r1]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_three: + sub r0, r0, #0x30 + bl _bsaes_decrypt8 + vldmia r9, {q14} @ reload IV + vld1.8 {q8-q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q15}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vst1.8 {q0-q1}, [r1]! @ write output + vst1.8 {q6}, [r1]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_two: + sub r0, r0, #0x20 + bl _bsaes_decrypt8 + vldmia r9, {q14} @ reload IV + vld1.8 {q8}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q15}, [r0]! @ reload input + veor q1, q1, q8 + vst1.8 {q0-q1}, [r1]! @ write output + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_one: + sub r0, r0, #0x10 + mov r10, r1 @ save original out pointer + mov r1, r9 @ use the iv scratch space as out buffer + mov r2, r3 + vmov q4,q15 @ just in case ensure that IV + vmov q5,q0 @ and input are preserved + bl AES_decrypt + vld1.8 {q0}, [r9,:64] @ load result + veor q0, q0, q4 @ ^= IV + vmov q15, q5 @ q5 holds input + vst1.8 {q0}, [r10] @ write output + +.Lcbc_dec_done: +#ifndef BSAES_ASM_EXTENDED_KEY + vmov.i32 q0, #0 + vmov.i32 q1, #0 +.Lcbc_dec_bzero: @ wipe key schedule [if any] + vstmia sp!, {q0-q1} + cmp sp, r9 + bne .Lcbc_dec_bzero +#endif + + mov sp, r9 + add sp, #0x10 @ add sp,r9,#0x10 is no good for thumb + vst1.8 {q15}, [r8] @ return IV + VFP_ABI_POP + ldmia sp!, {r4-r10, pc} +.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt +.extern AES_encrypt +.global bsaes_ctr32_encrypt_blocks +.type bsaes_ctr32_encrypt_blocks,%function +.align 5 +bsaes_ctr32_encrypt_blocks: + cmp r2, #8 @ use plain AES for + blo .Lctr_enc_short @ small sizes + + mov ip, sp + stmdb sp!, {r4-r10, lr} + VFP_ABI_PUSH + ldr r8, [ip] @ ctr is 1st arg on the stack + sub sp, sp, #0x10 @ scratch space to carry over the ctr + mov r9, sp @ save sp + + ldr r10, [r3, #240] @ get # of rounds +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, r10, lsl#7 @ 128 bytes per inner round key + add r12, #96 @ size of bit-sliced key schedule + + @ populate the key schedule + mov r4, r3 @ pass key + mov r5, r10 @ pass # of rounds + mov sp, r12 @ sp is sp + bl _bsaes_key_convert + veor q7,q7,q15 @ fix up last round key + vstmia r12, {q7} @ save last round key + + vld1.8 {q0}, [r8] @ load counter + add r8, r6, #.LREVM0SR-.LM0 @ borrow r8 + vldmia sp, {q4} @ load round0 key +#else + ldr r12, [r3, #244] + eors r12, #1 + beq 0f + + @ populate the key schedule + str r12, [r3, #244] + mov r4, r3 @ pass key + mov r5, r10 @ pass # of rounds + add r12, r3, #248 @ pass key schedule + bl _bsaes_key_convert + veor q7,q7,q15 @ fix up last round key + vstmia r12, {q7} @ save last round key + +.align 2 +0: add r12, r3, #248 + vld1.8 {q0}, [r8] @ load counter + adrl r8, .LREVM0SR @ borrow r8 + vldmia r12, {q4} @ load round0 key + sub sp, #0x10 @ place for adjusted round0 key +#endif + + vmov.i32 q8,#1 @ compose 1<<96 + veor q9,q9,q9 + vrev32.8 q0,q0 + vext.8 q8,q9,q8,#4 + vrev32.8 q4,q4 + vadd.u32 q9,q8,q8 @ compose 2<<96 + vstmia sp, {q4} @ save adjusted round0 key + b .Lctr_enc_loop + +.align 4 +.Lctr_enc_loop: + vadd.u32 q10, q8, q9 @ compose 3<<96 + vadd.u32 q1, q0, q8 @ +1 + vadd.u32 q2, q0, q9 @ +2 + vadd.u32 q3, q0, q10 @ +3 + vadd.u32 q4, q1, q10 + vadd.u32 q5, q2, q10 + vadd.u32 q6, q3, q10 + vadd.u32 q7, q4, q10 + vadd.u32 q10, q5, q10 @ next counter + + @ Borrow prologue from _bsaes_encrypt8 to use the opportunity + @ to flip byte order in 32-bit counter + + vldmia sp, {q9} @ load round0 key +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x10 @ pass next round key +#else + add r4, r3, #264 +#endif + vldmia r8, {q8} @ .LREVM0SR + mov r5, r10 @ pass rounds + vstmia r9, {q10} @ save next counter + sub r6, r8, #.LREVM0SR-.LSR @ pass constants + + bl _bsaes_encrypt8_alt + + subs r2, r2, #8 + blo .Lctr_enc_loop_done + + vld1.8 {q8-q9}, [r0]! @ load input + vld1.8 {q10-q11}, [r0]! + veor q0, q8 + veor q1, q9 + vld1.8 {q12-q13}, [r0]! + veor q4, q10 + veor q6, q11 + vld1.8 {q14-q15}, [r0]! + veor q3, q12 + vst1.8 {q0-q1}, [r1]! @ write output + veor q7, q13 + veor q2, q14 + vst1.8 {q4}, [r1]! + veor q5, q15 + vst1.8 {q6}, [r1]! + vmov.i32 q8, #1 @ compose 1<<96 + vst1.8 {q3}, [r1]! + veor q9, q9, q9 + vst1.8 {q7}, [r1]! + vext.8 q8, q9, q8, #4 + vst1.8 {q2}, [r1]! + vadd.u32 q9,q8,q8 @ compose 2<<96 + vst1.8 {q5}, [r1]! + vldmia r9, {q0} @ load counter + + bne .Lctr_enc_loop + b .Lctr_enc_done + +.align 4 +.Lctr_enc_loop_done: + add r2, r2, #8 + vld1.8 {q8}, [r0]! @ load input + veor q0, q8 + vst1.8 {q0}, [r1]! @ write output + cmp r2, #2 + blo .Lctr_enc_done + vld1.8 {q9}, [r0]! + veor q1, q9 + vst1.8 {q1}, [r1]! + beq .Lctr_enc_done + vld1.8 {q10}, [r0]! + veor q4, q10 + vst1.8 {q4}, [r1]! + cmp r2, #4 + blo .Lctr_enc_done + vld1.8 {q11}, [r0]! + veor q6, q11 + vst1.8 {q6}, [r1]! + beq .Lctr_enc_done + vld1.8 {q12}, [r0]! + veor q3, q12 + vst1.8 {q3}, [r1]! + cmp r2, #6 + blo .Lctr_enc_done + vld1.8 {q13}, [r0]! + veor q7, q13 + vst1.8 {q7}, [r1]! + beq .Lctr_enc_done + vld1.8 {q14}, [r0] + veor q2, q14 + vst1.8 {q2}, [r1]! + +.Lctr_enc_done: + vmov.i32 q0, #0 + vmov.i32 q1, #0 +#ifndef BSAES_ASM_EXTENDED_KEY +.Lctr_enc_bzero: @ wipe key schedule [if any] + vstmia sp!, {q0-q1} + cmp sp, r9 + bne .Lctr_enc_bzero +#else + vstmia sp, {q0-q1} +#endif + + mov sp, r9 + add sp, #0x10 @ add sp,r9,#0x10 is no good for thumb + VFP_ABI_POP + ldmia sp!, {r4-r10, pc} @ return + +.align 4 +.Lctr_enc_short: + ldr ip, [sp] @ ctr pointer is passed on stack + stmdb sp!, {r4-r8, lr} + + mov r4, r0 @ copy arguments + mov r5, r1 + mov r6, r2 + mov r7, r3 + ldr r8, [ip, #12] @ load counter LSW + vld1.8 {q1}, [ip] @ load whole counter value +#ifdef __ARMEL__ + rev r8, r8 +#endif + sub sp, sp, #0x10 + vst1.8 {q1}, [sp,:64] @ copy counter value + sub sp, sp, #0x10 + +.Lctr_enc_short_loop: + add r0, sp, #0x10 @ input counter value + mov r1, sp @ output on the stack + mov r2, r7 @ key + + bl AES_encrypt + + vld1.8 {q0}, [r4]! @ load input + vld1.8 {q1}, [sp,:64] @ load encrypted counter + add r8, r8, #1 +#ifdef __ARMEL__ + rev r0, r8 + str r0, [sp, #0x1c] @ next counter value +#else + str r8, [sp, #0x1c] @ next counter value +#endif + veor q0,q0,q1 + vst1.8 {q0}, [r5]! @ store output + subs r6, r6, #1 + bne .Lctr_enc_short_loop + + vmov.i32 q0, #0 + vmov.i32 q1, #0 + vstmia sp!, {q0-q1} + + ldmia sp!, {r4-r8, pc} +.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks +.globl bsaes_xts_encrypt +.type bsaes_xts_encrypt,%function +.align 4 +bsaes_xts_encrypt: + mov ip, sp + stmdb sp!, {r4-r10, lr} @ 0x20 + VFP_ABI_PUSH + mov r6, sp @ future r3 + + mov r7, r0 + mov r8, r1 + mov r9, r2 + mov r10, r3 + + sub r0, sp, #0x10 @ 0x10 + bic r0, #0xf @ align at 16 bytes + mov sp, r0 + +#ifdef XTS_CHAIN_TWEAK + ldr r0, [ip] @ pointer to input tweak +#else + @ generate initial tweak + ldr r0, [ip, #4] @ iv[] + mov r1, sp + ldr r2, [ip, #0] @ key2 + bl AES_encrypt + mov r0,sp @ pointer to initial tweak +#endif + + ldr r1, [r10, #240] @ get # of rounds + mov r3, r6 +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, r1, lsl#7 @ 128 bytes per inner round key + @ add r12, #96 @ size of bit-sliced key schedule + sub r12, #48 @ place for tweak[9] + + @ populate the key schedule + mov r4, r10 @ pass key + mov r5, r1 @ pass # of rounds + mov sp, r12 + add r12, #0x90 @ pass key schedule + bl _bsaes_key_convert + veor q7, q7, q15 @ fix up last round key + vstmia r12, {q7} @ save last round key +#else + ldr r12, [r10, #244] + eors r12, #1 + beq 0f + + str r12, [r10, #244] + mov r4, r10 @ pass key + mov r5, r1 @ pass # of rounds + add r12, r10, #248 @ pass key schedule + bl _bsaes_key_convert + veor q7, q7, q15 @ fix up last round key + vstmia r12, {q7} + +.align 2 +0: sub sp, #0x90 @ place for tweak[9] +#endif + + vld1.8 {q8}, [r0] @ initial tweak + adr r2, .Lxts_magic + + subs r9, #0x80 + blo .Lxts_enc_short + b .Lxts_enc_loop + +.align 4 +.Lxts_enc_loop: + vldmia r2, {q5} @ load XTS magic + vshr.s64 q6, q8, #63 + mov r0, sp + vand q6, q6, q5 + vadd.u64 q9, q8, q8 + vst1.64 {q8}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q9, #63 + veor q9, q9, q6 + vand q7, q7, q5 + vadd.u64 q10, q9, q9 + vst1.64 {q9}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q10, #63 + veor q10, q10, q7 + vand q6, q6, q5 + vld1.8 {q0}, [r7]! + vadd.u64 q11, q10, q10 + vst1.64 {q10}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q11, #63 + veor q11, q11, q6 + vand q7, q7, q5 + vld1.8 {q1}, [r7]! + veor q0, q0, q8 + vadd.u64 q12, q11, q11 + vst1.64 {q11}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q12, #63 + veor q12, q12, q7 + vand q6, q6, q5 + vld1.8 {q2}, [r7]! + veor q1, q1, q9 + vadd.u64 q13, q12, q12 + vst1.64 {q12}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q13, #63 + veor q13, q13, q6 + vand q7, q7, q5 + vld1.8 {q3}, [r7]! + veor q2, q2, q10 + vadd.u64 q14, q13, q13 + vst1.64 {q13}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q14, #63 + veor q14, q14, q7 + vand q6, q6, q5 + vld1.8 {q4}, [r7]! + veor q3, q3, q11 + vadd.u64 q15, q14, q14 + vst1.64 {q14}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q15, #63 + veor q15, q15, q6 + vand q7, q7, q5 + vld1.8 {q5}, [r7]! + veor q4, q4, q12 + vadd.u64 q8, q15, q15 + vst1.64 {q15}, [r0,:128]! + vswp d15,d14 + veor q8, q8, q7 + vst1.64 {q8}, [r0,:128] @ next round tweak + + vld1.8 {q6-q7}, [r7]! + veor q5, q5, q13 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q6, q6, q14 + mov r5, r1 @ pass rounds + veor q7, q7, q15 + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12-q13}, [r0,:128]! + veor q1, q1, q9 + veor q8, q4, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q6, q11 + vld1.64 {q14-q15}, [r0,:128]! + veor q10, q3, q12 + vst1.8 {q8-q9}, [r8]! + veor q11, q7, q13 + veor q12, q2, q14 + vst1.8 {q10-q11}, [r8]! + veor q13, q5, q15 + vst1.8 {q12-q13}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + + subs r9, #0x80 + bpl .Lxts_enc_loop + +.Lxts_enc_short: + adds r9, #0x70 + bmi .Lxts_enc_done + + vldmia r2, {q5} @ load XTS magic + vshr.s64 q7, q8, #63 + mov r0, sp + vand q7, q7, q5 + vadd.u64 q9, q8, q8 + vst1.64 {q8}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q9, #63 + veor q9, q9, q7 + vand q6, q6, q5 + vadd.u64 q10, q9, q9 + vst1.64 {q9}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q10, #63 + veor q10, q10, q6 + vand q7, q7, q5 + vld1.8 {q0}, [r7]! + subs r9, #0x10 + bmi .Lxts_enc_1 + vadd.u64 q11, q10, q10 + vst1.64 {q10}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q11, #63 + veor q11, q11, q7 + vand q6, q6, q5 + vld1.8 {q1}, [r7]! + subs r9, #0x10 + bmi .Lxts_enc_2 + veor q0, q0, q8 + vadd.u64 q12, q11, q11 + vst1.64 {q11}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q12, #63 + veor q12, q12, q6 + vand q7, q7, q5 + vld1.8 {q2}, [r7]! + subs r9, #0x10 + bmi .Lxts_enc_3 + veor q1, q1, q9 + vadd.u64 q13, q12, q12 + vst1.64 {q12}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q13, #63 + veor q13, q13, q7 + vand q6, q6, q5 + vld1.8 {q3}, [r7]! + subs r9, #0x10 + bmi .Lxts_enc_4 + veor q2, q2, q10 + vadd.u64 q14, q13, q13 + vst1.64 {q13}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q14, #63 + veor q14, q14, q6 + vand q7, q7, q5 + vld1.8 {q4}, [r7]! + subs r9, #0x10 + bmi .Lxts_enc_5 + veor q3, q3, q11 + vadd.u64 q15, q14, q14 + vst1.64 {q14}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q15, #63 + veor q15, q15, q7 + vand q6, q6, q5 + vld1.8 {q5}, [r7]! + subs r9, #0x10 + bmi .Lxts_enc_6 + veor q4, q4, q12 + sub r9, #0x10 + vst1.64 {q15}, [r0,:128] @ next round tweak + + vld1.8 {q6}, [r7]! + veor q5, q5, q13 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q6, q6, q14 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12-q13}, [r0,:128]! + veor q1, q1, q9 + veor q8, q4, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q6, q11 + vld1.64 {q14}, [r0,:128]! + veor q10, q3, q12 + vst1.8 {q8-q9}, [r8]! + veor q11, q7, q13 + veor q12, q2, q14 + vst1.8 {q10-q11}, [r8]! + vst1.8 {q12}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_6: + vst1.64 {q14}, [r0,:128] @ next round tweak + + veor q4, q4, q12 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q5, q5, q13 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12-q13}, [r0,:128]! + veor q1, q1, q9 + veor q8, q4, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q6, q11 + veor q10, q3, q12 + vst1.8 {q8-q9}, [r8]! + veor q11, q7, q13 + vst1.8 {q10-q11}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_enc_done + +@ put this in range for both ARM and Thumb mode adr instructions +.align 5 +.Lxts_magic: + .quad 1, 0x87 + +.align 5 +.Lxts_enc_5: + vst1.64 {q13}, [r0,:128] @ next round tweak + + veor q3, q3, q11 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q4, q4, q12 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12}, [r0,:128]! + veor q1, q1, q9 + veor q8, q4, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q6, q11 + veor q10, q3, q12 + vst1.8 {q8-q9}, [r8]! + vst1.8 {q10}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_4: + vst1.64 {q12}, [r0,:128] @ next round tweak + + veor q2, q2, q10 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q3, q3, q11 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + veor q1, q1, q9 + veor q8, q4, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q6, q11 + vst1.8 {q8-q9}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_3: + vst1.64 {q11}, [r0,:128] @ next round tweak + + veor q1, q1, q9 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q2, q2, q10 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10}, [r0,:128]! + veor q0, q0, q8 + veor q1, q1, q9 + veor q8, q4, q10 + vst1.8 {q0-q1}, [r8]! + vst1.8 {q8}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_2: + vst1.64 {q10}, [r0,:128] @ next round tweak + + veor q0, q0, q8 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q1, q1, q9 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + veor q0, q0, q8 + veor q1, q1, q9 + vst1.8 {q0-q1}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_1: + mov r0, sp + veor q0, q8 + mov r1, sp + vst1.8 {q0}, [sp,:128] + mov r2, r10 + mov r4, r3 @ preserve fp + + bl AES_encrypt + + vld1.8 {q0}, [sp,:128] + veor q0, q0, q8 + vst1.8 {q0}, [r8]! + mov r3, r4 + + vmov q8, q9 @ next round tweak + +.Lxts_enc_done: +#ifndef XTS_CHAIN_TWEAK + adds r9, #0x10 + beq .Lxts_enc_ret + sub r6, r8, #0x10 + +.Lxts_enc_steal: + ldrb r0, [r7], #1 + ldrb r1, [r8, #-0x10] + strb r0, [r8, #-0x10] + strb r1, [r8], #1 + + subs r9, #1 + bhi .Lxts_enc_steal + + vld1.8 {q0}, [r6] + mov r0, sp + veor q0, q0, q8 + mov r1, sp + vst1.8 {q0}, [sp,:128] + mov r2, r10 + mov r4, r3 @ preserve fp + + bl AES_encrypt + + vld1.8 {q0}, [sp,:128] + veor q0, q0, q8 + vst1.8 {q0}, [r6] + mov r3, r4 +#endif + +.Lxts_enc_ret: + bic r0, r3, #0xf + vmov.i32 q0, #0 + vmov.i32 q1, #0 +#ifdef XTS_CHAIN_TWEAK + ldr r1, [r3, #0x20+VFP_ABI_FRAME] @ chain tweak +#endif +.Lxts_enc_bzero: @ wipe key schedule [if any] + vstmia sp!, {q0-q1} + cmp sp, r0 + bne .Lxts_enc_bzero + + mov sp, r3 +#ifdef XTS_CHAIN_TWEAK + vst1.8 {q8}, [r1] +#endif + VFP_ABI_POP + ldmia sp!, {r4-r10, pc} @ return + +.size bsaes_xts_encrypt,.-bsaes_xts_encrypt + +.globl bsaes_xts_decrypt +.type bsaes_xts_decrypt,%function +.align 4 +bsaes_xts_decrypt: + mov ip, sp + stmdb sp!, {r4-r10, lr} @ 0x20 + VFP_ABI_PUSH + mov r6, sp @ future r3 + + mov r7, r0 + mov r8, r1 + mov r9, r2 + mov r10, r3 + + sub r0, sp, #0x10 @ 0x10 + bic r0, #0xf @ align at 16 bytes + mov sp, r0 + +#ifdef XTS_CHAIN_TWEAK + ldr r0, [ip] @ pointer to input tweak +#else + @ generate initial tweak + ldr r0, [ip, #4] @ iv[] + mov r1, sp + ldr r2, [ip, #0] @ key2 + bl AES_encrypt + mov r0, sp @ pointer to initial tweak +#endif + + ldr r1, [r10, #240] @ get # of rounds + mov r3, r6 +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, r1, lsl#7 @ 128 bytes per inner round key + @ add r12, #96 @ size of bit-sliced key schedule + sub r12, #48 @ place for tweak[9] + + @ populate the key schedule + mov r4, r10 @ pass key + mov r5, r1 @ pass # of rounds + mov sp, r12 + add r12, #0x90 @ pass key schedule + bl _bsaes_key_convert + add r4, sp, #0x90 + vldmia r4, {q6} + vstmia r12, {q15} @ save last round key + veor q7, q7, q6 @ fix up round 0 key + vstmia r4, {q7} +#else + ldr r12, [r10, #244] + eors r12, #1 + beq 0f + + str r12, [r10, #244] + mov r4, r10 @ pass key + mov r5, r1 @ pass # of rounds + add r12, r10, #248 @ pass key schedule + bl _bsaes_key_convert + add r4, r10, #248 + vldmia r4, {q6} + vstmia r12, {q15} @ save last round key + veor q7, q7, q6 @ fix up round 0 key + vstmia r4, {q7} + +.align 2 +0: sub sp, #0x90 @ place for tweak[9] +#endif + vld1.8 {q8}, [r0] @ initial tweak + adr r2, .Lxts_magic + + tst r9, #0xf @ if not multiple of 16 + it ne @ Thumb2 thing, sanity check in ARM + subne r9, #0x10 @ subtract another 16 bytes + subs r9, #0x80 + + blo .Lxts_dec_short + b .Lxts_dec_loop + +.align 4 +.Lxts_dec_loop: + vldmia r2, {q5} @ load XTS magic + vshr.s64 q6, q8, #63 + mov r0, sp + vand q6, q6, q5 + vadd.u64 q9, q8, q8 + vst1.64 {q8}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q9, #63 + veor q9, q9, q6 + vand q7, q7, q5 + vadd.u64 q10, q9, q9 + vst1.64 {q9}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q10, #63 + veor q10, q10, q7 + vand q6, q6, q5 + vld1.8 {q0}, [r7]! + vadd.u64 q11, q10, q10 + vst1.64 {q10}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q11, #63 + veor q11, q11, q6 + vand q7, q7, q5 + vld1.8 {q1}, [r7]! + veor q0, q0, q8 + vadd.u64 q12, q11, q11 + vst1.64 {q11}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q12, #63 + veor q12, q12, q7 + vand q6, q6, q5 + vld1.8 {q2}, [r7]! + veor q1, q1, q9 + vadd.u64 q13, q12, q12 + vst1.64 {q12}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q13, #63 + veor q13, q13, q6 + vand q7, q7, q5 + vld1.8 {q3}, [r7]! + veor q2, q2, q10 + vadd.u64 q14, q13, q13 + vst1.64 {q13}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q14, #63 + veor q14, q14, q7 + vand q6, q6, q5 + vld1.8 {q4}, [r7]! + veor q3, q3, q11 + vadd.u64 q15, q14, q14 + vst1.64 {q14}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q15, #63 + veor q15, q15, q6 + vand q7, q7, q5 + vld1.8 {q5}, [r7]! + veor q4, q4, q12 + vadd.u64 q8, q15, q15 + vst1.64 {q15}, [r0,:128]! + vswp d15,d14 + veor q8, q8, q7 + vst1.64 {q8}, [r0,:128] @ next round tweak + + vld1.8 {q6-q7}, [r7]! + veor q5, q5, q13 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q6, q6, q14 + mov r5, r1 @ pass rounds + veor q7, q7, q15 + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12-q13}, [r0,:128]! + veor q1, q1, q9 + veor q8, q6, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q4, q11 + vld1.64 {q14-q15}, [r0,:128]! + veor q10, q2, q12 + vst1.8 {q8-q9}, [r8]! + veor q11, q7, q13 + veor q12, q3, q14 + vst1.8 {q10-q11}, [r8]! + veor q13, q5, q15 + vst1.8 {q12-q13}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + + subs r9, #0x80 + bpl .Lxts_dec_loop + +.Lxts_dec_short: + adds r9, #0x70 + bmi .Lxts_dec_done + + vldmia r2, {q5} @ load XTS magic + vshr.s64 q7, q8, #63 + mov r0, sp + vand q7, q7, q5 + vadd.u64 q9, q8, q8 + vst1.64 {q8}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q9, #63 + veor q9, q9, q7 + vand q6, q6, q5 + vadd.u64 q10, q9, q9 + vst1.64 {q9}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q10, #63 + veor q10, q10, q6 + vand q7, q7, q5 + vld1.8 {q0}, [r7]! + subs r9, #0x10 + bmi .Lxts_dec_1 + vadd.u64 q11, q10, q10 + vst1.64 {q10}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q11, #63 + veor q11, q11, q7 + vand q6, q6, q5 + vld1.8 {q1}, [r7]! + subs r9, #0x10 + bmi .Lxts_dec_2 + veor q0, q0, q8 + vadd.u64 q12, q11, q11 + vst1.64 {q11}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q12, #63 + veor q12, q12, q6 + vand q7, q7, q5 + vld1.8 {q2}, [r7]! + subs r9, #0x10 + bmi .Lxts_dec_3 + veor q1, q1, q9 + vadd.u64 q13, q12, q12 + vst1.64 {q12}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q13, #63 + veor q13, q13, q7 + vand q6, q6, q5 + vld1.8 {q3}, [r7]! + subs r9, #0x10 + bmi .Lxts_dec_4 + veor q2, q2, q10 + vadd.u64 q14, q13, q13 + vst1.64 {q13}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q14, #63 + veor q14, q14, q6 + vand q7, q7, q5 + vld1.8 {q4}, [r7]! + subs r9, #0x10 + bmi .Lxts_dec_5 + veor q3, q3, q11 + vadd.u64 q15, q14, q14 + vst1.64 {q14}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q15, #63 + veor q15, q15, q7 + vand q6, q6, q5 + vld1.8 {q5}, [r7]! + subs r9, #0x10 + bmi .Lxts_dec_6 + veor q4, q4, q12 + sub r9, #0x10 + vst1.64 {q15}, [r0,:128] @ next round tweak + + vld1.8 {q6}, [r7]! + veor q5, q5, q13 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q6, q6, q14 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12-q13}, [r0,:128]! + veor q1, q1, q9 + veor q8, q6, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q4, q11 + vld1.64 {q14}, [r0,:128]! + veor q10, q2, q12 + vst1.8 {q8-q9}, [r8]! + veor q11, q7, q13 + veor q12, q3, q14 + vst1.8 {q10-q11}, [r8]! + vst1.8 {q12}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_6: + vst1.64 {q14}, [r0,:128] @ next round tweak + + veor q4, q4, q12 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q5, q5, q13 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12-q13}, [r0,:128]! + veor q1, q1, q9 + veor q8, q6, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q4, q11 + veor q10, q2, q12 + vst1.8 {q8-q9}, [r8]! + veor q11, q7, q13 + vst1.8 {q10-q11}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_5: + vst1.64 {q13}, [r0,:128] @ next round tweak + + veor q3, q3, q11 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q4, q4, q12 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12}, [r0,:128]! + veor q1, q1, q9 + veor q8, q6, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q4, q11 + veor q10, q2, q12 + vst1.8 {q8-q9}, [r8]! + vst1.8 {q10}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_4: + vst1.64 {q12}, [r0,:128] @ next round tweak + + veor q2, q2, q10 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q3, q3, q11 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + veor q1, q1, q9 + veor q8, q6, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q4, q11 + vst1.8 {q8-q9}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_3: + vst1.64 {q11}, [r0,:128] @ next round tweak + + veor q1, q1, q9 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q2, q2, q10 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10}, [r0,:128]! + veor q0, q0, q8 + veor q1, q1, q9 + veor q8, q6, q10 + vst1.8 {q0-q1}, [r8]! + vst1.8 {q8}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_2: + vst1.64 {q10}, [r0,:128] @ next round tweak + + veor q0, q0, q8 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q1, q1, q9 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + veor q0, q0, q8 + veor q1, q1, q9 + vst1.8 {q0-q1}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_1: + mov r0, sp + veor q0, q8 + mov r1, sp + vst1.8 {q0}, [sp,:128] + mov r2, r10 + mov r4, r3 @ preserve fp + mov r5, r2 @ preserve magic + + bl AES_decrypt + + vld1.8 {q0}, [sp,:128] + veor q0, q0, q8 + vst1.8 {q0}, [r8]! + mov r3, r4 + mov r2, r5 + + vmov q8, q9 @ next round tweak + +.Lxts_dec_done: +#ifndef XTS_CHAIN_TWEAK + adds r9, #0x10 + beq .Lxts_dec_ret + + @ calculate one round of extra tweak for the stolen ciphertext + vldmia r2, {q5} + vshr.s64 q6, q8, #63 + vand q6, q6, q5 + vadd.u64 q9, q8, q8 + vswp d13,d12 + veor q9, q9, q6 + + @ perform the final decryption with the last tweak value + vld1.8 {q0}, [r7]! + mov r0, sp + veor q0, q0, q9 + mov r1, sp + vst1.8 {q0}, [sp,:128] + mov r2, r10 + mov r4, r3 @ preserve fp + + bl AES_decrypt + + vld1.8 {q0}, [sp,:128] + veor q0, q0, q9 + vst1.8 {q0}, [r8] + + mov r6, r8 +.Lxts_dec_steal: + ldrb r1, [r8] + ldrb r0, [r7], #1 + strb r1, [r8, #0x10] + strb r0, [r8], #1 + + subs r9, #1 + bhi .Lxts_dec_steal + + vld1.8 {q0}, [r6] + mov r0, sp + veor q0, q8 + mov r1, sp + vst1.8 {q0}, [sp,:128] + mov r2, r10 + + bl AES_decrypt + + vld1.8 {q0}, [sp,:128] + veor q0, q0, q8 + vst1.8 {q0}, [r6] + mov r3, r4 +#endif + +.Lxts_dec_ret: + bic r0, r3, #0xf + vmov.i32 q0, #0 + vmov.i32 q1, #0 +#ifdef XTS_CHAIN_TWEAK + ldr r1, [r3, #0x20+VFP_ABI_FRAME] @ chain tweak +#endif +.Lxts_dec_bzero: @ wipe key schedule [if any] + vstmia sp!, {q0-q1} + cmp sp, r0 + bne .Lxts_dec_bzero + + mov sp, r3 +#ifdef XTS_CHAIN_TWEAK + vst1.8 {q8}, [r1] +#endif + VFP_ABI_POP + ldmia sp!, {r4-r10, pc} @ return + +.size bsaes_xts_decrypt,.-bsaes_xts_decrypt +#endif diff --git a/arch/arm/crypto/aesbs-glue.c b/arch/arm/crypto/aesbs-glue.c new file mode 100644 index 000000000000..4522366da759 --- /dev/null +++ b/arch/arm/crypto/aesbs-glue.c @@ -0,0 +1,434 @@ +/* + * linux/arch/arm/crypto/aesbs-glue.c - glue code for NEON bit sliced AES + * + * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <asm/neon.h> +#include <crypto/aes.h> +#include <crypto/ablk_helper.h> +#include <crypto/algapi.h> +#include <linux/module.h> + +#include "aes_glue.h" + +#define BIT_SLICED_KEY_MAXSIZE (128 * (AES_MAXNR - 1) + 2 * AES_BLOCK_SIZE) + +struct BS_KEY { + struct AES_KEY rk; + int converted; + u8 __aligned(8) bs[BIT_SLICED_KEY_MAXSIZE]; +} __aligned(8); + +asmlinkage void bsaes_enc_key_convert(u8 out[], struct AES_KEY const *in); +asmlinkage void bsaes_dec_key_convert(u8 out[], struct AES_KEY const *in); + +asmlinkage void bsaes_cbc_encrypt(u8 const in[], u8 out[], u32 bytes, + struct BS_KEY *key, u8 iv[]); + +asmlinkage void bsaes_ctr32_encrypt_blocks(u8 const in[], u8 out[], u32 blocks, + struct BS_KEY *key, u8 const iv[]); + +asmlinkage void bsaes_xts_encrypt(u8 const in[], u8 out[], u32 bytes, + struct BS_KEY *key, u8 tweak[]); + +asmlinkage void bsaes_xts_decrypt(u8 const in[], u8 out[], u32 bytes, + struct BS_KEY *key, u8 tweak[]); + +struct aesbs_cbc_ctx { + struct AES_KEY enc; + struct BS_KEY dec; +}; + +struct aesbs_ctr_ctx { + struct BS_KEY enc; +}; + +struct aesbs_xts_ctx { + struct BS_KEY enc; + struct BS_KEY dec; + struct AES_KEY twkey; +}; + +static int aesbs_cbc_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct aesbs_cbc_ctx *ctx = crypto_tfm_ctx(tfm); + int bits = key_len * 8; + + if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc)) { + tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + ctx->dec.rk = ctx->enc; + private_AES_set_decrypt_key(in_key, bits, &ctx->dec.rk); + ctx->dec.converted = 0; + return 0; +} + +static int aesbs_ctr_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct aesbs_ctr_ctx *ctx = crypto_tfm_ctx(tfm); + int bits = key_len * 8; + + if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc.rk)) { + tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + ctx->enc.converted = 0; + return 0; +} + +static int aesbs_xts_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct aesbs_xts_ctx *ctx = crypto_tfm_ctx(tfm); + int bits = key_len * 4; + + if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc.rk)) { + tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + ctx->dec.rk = ctx->enc.rk; + private_AES_set_decrypt_key(in_key, bits, &ctx->dec.rk); + private_AES_set_encrypt_key(in_key + key_len / 2, bits, &ctx->twkey); + ctx->enc.converted = ctx->dec.converted = 0; + return 0; +} + +static int aesbs_cbc_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct aesbs_cbc_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + while (walk.nbytes) { + u32 blocks = walk.nbytes / AES_BLOCK_SIZE; + u8 *src = walk.src.virt.addr; + + if (walk.dst.virt.addr == walk.src.virt.addr) { + u8 *iv = walk.iv; + + do { + crypto_xor(src, iv, AES_BLOCK_SIZE); + AES_encrypt(src, src, &ctx->enc); + iv = src; + src += AES_BLOCK_SIZE; + } while (--blocks); + memcpy(walk.iv, iv, AES_BLOCK_SIZE); + } else { + u8 *dst = walk.dst.virt.addr; + + do { + crypto_xor(walk.iv, src, AES_BLOCK_SIZE); + AES_encrypt(walk.iv, dst, &ctx->enc); + memcpy(walk.iv, dst, AES_BLOCK_SIZE); + src += AES_BLOCK_SIZE; + dst += AES_BLOCK_SIZE; + } while (--blocks); + } + err = blkcipher_walk_done(desc, &walk, 0); + } + return err; +} + +static int aesbs_cbc_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct aesbs_cbc_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE); + + while ((walk.nbytes / AES_BLOCK_SIZE) >= 8) { + kernel_neon_begin(); + bsaes_cbc_encrypt(walk.src.virt.addr, walk.dst.virt.addr, + walk.nbytes, &ctx->dec, walk.iv); + kernel_neon_end(); + err = blkcipher_walk_done(desc, &walk, 0); + } + while (walk.nbytes) { + u32 blocks = walk.nbytes / AES_BLOCK_SIZE; + u8 *dst = walk.dst.virt.addr; + u8 *src = walk.src.virt.addr; + u8 bk[2][AES_BLOCK_SIZE]; + u8 *iv = walk.iv; + + do { + if (walk.dst.virt.addr == walk.src.virt.addr) + memcpy(bk[blocks & 1], src, AES_BLOCK_SIZE); + + AES_decrypt(src, dst, &ctx->dec.rk); + crypto_xor(dst, iv, AES_BLOCK_SIZE); + + if (walk.dst.virt.addr == walk.src.virt.addr) + iv = bk[blocks & 1]; + else + iv = src; + + dst += AES_BLOCK_SIZE; + src += AES_BLOCK_SIZE; + } while (--blocks); + err = blkcipher_walk_done(desc, &walk, 0); + } + return err; +} + +static void inc_be128_ctr(__be32 ctr[], u32 addend) +{ + int i; + + for (i = 3; i >= 0; i--, addend = 1) { + u32 n = be32_to_cpu(ctr[i]) + addend; + + ctr[i] = cpu_to_be32(n); + if (n >= addend) + break; + } +} + +static int aesbs_ctr_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct aesbs_ctr_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + u32 blocks; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE); + + while ((blocks = walk.nbytes / AES_BLOCK_SIZE)) { + u32 tail = walk.nbytes % AES_BLOCK_SIZE; + __be32 *ctr = (__be32 *)walk.iv; + u32 headroom = UINT_MAX - be32_to_cpu(ctr[3]); + + /* avoid 32 bit counter overflow in the NEON code */ + if (unlikely(headroom < blocks)) { + blocks = headroom + 1; + tail = walk.nbytes - blocks * AES_BLOCK_SIZE; + } + kernel_neon_begin(); + bsaes_ctr32_encrypt_blocks(walk.src.virt.addr, + walk.dst.virt.addr, blocks, + &ctx->enc, walk.iv); + kernel_neon_end(); + inc_be128_ctr(ctr, blocks); + + nbytes -= blocks * AES_BLOCK_SIZE; + if (nbytes && nbytes == tail && nbytes <= AES_BLOCK_SIZE) + break; + + err = blkcipher_walk_done(desc, &walk, tail); + } + if (walk.nbytes) { + u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; + u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE; + u8 ks[AES_BLOCK_SIZE]; + + AES_encrypt(walk.iv, ks, &ctx->enc.rk); + if (tdst != tsrc) + memcpy(tdst, tsrc, nbytes); + crypto_xor(tdst, ks, nbytes); + err = blkcipher_walk_done(desc, &walk, 0); + } + return err; +} + +static int aesbs_xts_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct aesbs_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE); + + /* generate the initial tweak */ + AES_encrypt(walk.iv, walk.iv, &ctx->twkey); + + while (walk.nbytes) { + kernel_neon_begin(); + bsaes_xts_encrypt(walk.src.virt.addr, walk.dst.virt.addr, + walk.nbytes, &ctx->enc, walk.iv); + kernel_neon_end(); + err = blkcipher_walk_done(desc, &walk, 0); + } + return err; +} + +static int aesbs_xts_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct aesbs_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE); + + /* generate the initial tweak */ + AES_encrypt(walk.iv, walk.iv, &ctx->twkey); + + while (walk.nbytes) { + kernel_neon_begin(); + bsaes_xts_decrypt(walk.src.virt.addr, walk.dst.virt.addr, + walk.nbytes, &ctx->dec, walk.iv); + kernel_neon_end(); + err = blkcipher_walk_done(desc, &walk, 0); + } + return err; +} + +static struct crypto_alg aesbs_algs[] = { { + .cra_name = "__cbc-aes-neonbs", + .cra_driver_name = "__driver-cbc-aes-neonbs", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct aesbs_cbc_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = aesbs_cbc_set_key, + .encrypt = aesbs_cbc_encrypt, + .decrypt = aesbs_cbc_decrypt, + }, +}, { + .cra_name = "__ctr-aes-neonbs", + .cra_driver_name = "__driver-ctr-aes-neonbs", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct aesbs_ctr_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = aesbs_ctr_set_key, + .encrypt = aesbs_ctr_encrypt, + .decrypt = aesbs_ctr_encrypt, + }, +}, { + .cra_name = "__xts-aes-neonbs", + .cra_driver_name = "__driver-xts-aes-neonbs", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct aesbs_xts_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_blkcipher = { + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = aesbs_xts_set_key, + .encrypt = aesbs_xts_encrypt, + .decrypt = aesbs_xts_decrypt, + }, +}, { + .cra_name = "cbc(aes)", + .cra_driver_name = "cbc-aes-neonbs", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = __ablk_encrypt, + .decrypt = ablk_decrypt, + } +}, { + .cra_name = "ctr(aes)", + .cra_driver_name = "ctr-aes-neonbs", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + } +}, { + .cra_name = "xts(aes)", + .cra_driver_name = "xts-aes-neonbs", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_ablkcipher = { + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + } +} }; + +static int __init aesbs_mod_init(void) +{ + if (!cpu_has_neon()) + return -ENODEV; + + return crypto_register_algs(aesbs_algs, ARRAY_SIZE(aesbs_algs)); +} + +static void __exit aesbs_mod_exit(void) +{ + crypto_unregister_algs(aesbs_algs, ARRAY_SIZE(aesbs_algs)); +} + +module_init(aesbs_mod_init); +module_exit(aesbs_mod_exit); + +MODULE_DESCRIPTION("Bit sliced AES in CBC/CTR/XTS modes using NEON"); +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_LICENSE("GPL"); diff --git a/arch/arm/crypto/bsaes-armv7.pl b/arch/arm/crypto/bsaes-armv7.pl new file mode 100644 index 000000000000..f3d96d932573 --- /dev/null +++ b/arch/arm/crypto/bsaes-armv7.pl @@ -0,0 +1,2467 @@ +#!/usr/bin/env perl + +# ==================================================================== +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# +# Specific modes and adaptation for Linux kernel by Ard Biesheuvel +# <ard.biesheuvel@linaro.org>. Permission to use under GPL terms is +# granted. +# ==================================================================== + +# Bit-sliced AES for ARM NEON +# +# February 2012. +# +# This implementation is direct adaptation of bsaes-x86_64 module for +# ARM NEON. Except that this module is endian-neutral [in sense that +# it can be compiled for either endianness] by courtesy of vld1.8's +# neutrality. Initial version doesn't implement interface to OpenSSL, +# only low-level primitives and unsupported entry points, just enough +# to collect performance results, which for Cortex-A8 core are: +# +# encrypt 19.5 cycles per byte processed with 128-bit key +# decrypt 22.1 cycles per byte processed with 128-bit key +# key conv. 440 cycles per 128-bit key/0.18 of 8x block +# +# Snapdragon S4 encrypts byte in 17.6 cycles and decrypts in 19.7, +# which is [much] worse than anticipated (for further details see +# http://www.openssl.org/~appro/Snapdragon-S4.html). +# +# Cortex-A15 manages in 14.2/16.1 cycles [when integer-only code +# manages in 20.0 cycles]. +# +# When comparing to x86_64 results keep in mind that NEON unit is +# [mostly] single-issue and thus can't [fully] benefit from +# instruction-level parallelism. And when comparing to aes-armv4 +# results keep in mind key schedule conversion overhead (see +# bsaes-x86_64.pl for further details)... +# +# <appro@openssl.org> + +# April-August 2013 +# +# Add CBC, CTR and XTS subroutines, adapt for kernel use. +# +# <ard.biesheuvel@linaro.org> + +while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} +open STDOUT,">$output"; + +my ($inp,$out,$len,$key)=("r0","r1","r2","r3"); +my @XMM=map("q$_",(0..15)); + +{ +my ($key,$rounds,$const)=("r4","r5","r6"); + +sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; } +sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; } + +sub Sbox { +# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb +# output in lsb > [b0, b1, b4, b6, b3, b7, b2, b5] < msb +my @b=@_[0..7]; +my @t=@_[8..11]; +my @s=@_[12..15]; + &InBasisChange (@b); + &Inv_GF256 (@b[6,5,0,3,7,1,4,2],@t,@s); + &OutBasisChange (@b[7,1,4,2,6,5,0,3]); +} + +sub InBasisChange { +# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb +# output in lsb > [b6, b5, b0, b3, b7, b1, b4, b2] < msb +my @b=@_[0..7]; +$code.=<<___; + veor @b[2], @b[2], @b[1] + veor @b[5], @b[5], @b[6] + veor @b[3], @b[3], @b[0] + veor @b[6], @b[6], @b[2] + veor @b[5], @b[5], @b[0] + + veor @b[6], @b[6], @b[3] + veor @b[3], @b[3], @b[7] + veor @b[7], @b[7], @b[5] + veor @b[3], @b[3], @b[4] + veor @b[4], @b[4], @b[5] + + veor @b[2], @b[2], @b[7] + veor @b[3], @b[3], @b[1] + veor @b[1], @b[1], @b[5] +___ +} + +sub OutBasisChange { +# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb +# output in lsb > [b6, b1, b2, b4, b7, b0, b3, b5] < msb +my @b=@_[0..7]; +$code.=<<___; + veor @b[0], @b[0], @b[6] + veor @b[1], @b[1], @b[4] + veor @b[4], @b[4], @b[6] + veor @b[2], @b[2], @b[0] + veor @b[6], @b[6], @b[1] + + veor @b[1], @b[1], @b[5] + veor @b[5], @b[5], @b[3] + veor @b[3], @b[3], @b[7] + veor @b[7], @b[7], @b[5] + veor @b[2], @b[2], @b[5] + + veor @b[4], @b[4], @b[7] +___ +} + +sub InvSbox { +# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb +# output in lsb > [b0, b1, b6, b4, b2, b7, b3, b5] < msb +my @b=@_[0..7]; +my @t=@_[8..11]; +my @s=@_[12..15]; + &InvInBasisChange (@b); + &Inv_GF256 (@b[5,1,2,6,3,7,0,4],@t,@s); + &InvOutBasisChange (@b[3,7,0,4,5,1,2,6]); +} + +sub InvInBasisChange { # OutBasisChange in reverse (with twist) +my @b=@_[5,1,2,6,3,7,0,4]; +$code.=<<___ + veor @b[1], @b[1], @b[7] + veor @b[4], @b[4], @b[7] + + veor @b[7], @b[7], @b[5] + veor @b[1], @b[1], @b[3] + veor @b[2], @b[2], @b[5] + veor @b[3], @b[3], @b[7] + + veor @b[6], @b[6], @b[1] + veor @b[2], @b[2], @b[0] + veor @b[5], @b[5], @b[3] + veor @b[4], @b[4], @b[6] + veor @b[0], @b[0], @b[6] + veor @b[1], @b[1], @b[4] +___ +} + +sub InvOutBasisChange { # InBasisChange in reverse +my @b=@_[2,5,7,3,6,1,0,4]; +$code.=<<___; + veor @b[1], @b[1], @b[5] + veor @b[2], @b[2], @b[7] + + veor @b[3], @b[3], @b[1] + veor @b[4], @b[4], @b[5] + veor @b[7], @b[7], @b[5] + veor @b[3], @b[3], @b[4] + veor @b[5], @b[5], @b[0] + veor @b[3], @b[3], @b[7] + veor @b[6], @b[6], @b[2] + veor @b[2], @b[2], @b[1] + veor @b[6], @b[6], @b[3] + + veor @b[3], @b[3], @b[0] + veor @b[5], @b[5], @b[6] +___ +} + +sub Mul_GF4 { +#;************************************************************* +#;* Mul_GF4: Input x0-x1,y0-y1 Output x0-x1 Temp t0 (8) * +#;************************************************************* +my ($x0,$x1,$y0,$y1,$t0,$t1)=@_; +$code.=<<___; + veor $t0, $y0, $y1 + vand $t0, $t0, $x0 + veor $x0, $x0, $x1 + vand $t1, $x1, $y0 + vand $x0, $x0, $y1 + veor $x1, $t1, $t0 + veor $x0, $x0, $t1 +___ +} + +sub Mul_GF4_N { # not used, see next subroutine +# multiply and scale by N +my ($x0,$x1,$y0,$y1,$t0)=@_; +$code.=<<___; + veor $t0, $y0, $y1 + vand $t0, $t0, $x0 + veor $x0, $x0, $x1 + vand $x1, $x1, $y0 + vand $x0, $x0, $y1 + veor $x1, $x1, $x0 + veor $x0, $x0, $t0 +___ +} + +sub Mul_GF4_N_GF4 { +# interleaved Mul_GF4_N and Mul_GF4 +my ($x0,$x1,$y0,$y1,$t0, + $x2,$x3,$y2,$y3,$t1)=@_; +$code.=<<___; + veor $t0, $y0, $y1 + veor $t1, $y2, $y3 + vand $t0, $t0, $x0 + vand $t1, $t1, $x2 + veor $x0, $x0, $x1 + veor $x2, $x2, $x3 + vand $x1, $x1, $y0 + vand $x3, $x3, $y2 + vand $x0, $x0, $y1 + vand $x2, $x2, $y3 + veor $x1, $x1, $x0 + veor $x2, $x2, $x3 + veor $x0, $x0, $t0 + veor $x3, $x3, $t1 +___ +} +sub Mul_GF16_2 { +my @x=@_[0..7]; +my @y=@_[8..11]; +my @t=@_[12..15]; +$code.=<<___; + veor @t[0], @x[0], @x[2] + veor @t[1], @x[1], @x[3] +___ + &Mul_GF4 (@x[0], @x[1], @y[0], @y[1], @t[2..3]); +$code.=<<___; + veor @y[0], @y[0], @y[2] + veor @y[1], @y[1], @y[3] +___ + Mul_GF4_N_GF4 (@t[0], @t[1], @y[0], @y[1], @t[3], + @x[2], @x[3], @y[2], @y[3], @t[2]); +$code.=<<___; + veor @x[0], @x[0], @t[0] + veor @x[2], @x[2], @t[0] + veor @x[1], @x[1], @t[1] + veor @x[3], @x[3], @t[1] + + veor @t[0], @x[4], @x[6] + veor @t[1], @x[5], @x[7] +___ + &Mul_GF4_N_GF4 (@t[0], @t[1], @y[0], @y[1], @t[3], + @x[6], @x[7], @y[2], @y[3], @t[2]); +$code.=<<___; + veor @y[0], @y[0], @y[2] + veor @y[1], @y[1], @y[3] +___ + &Mul_GF4 (@x[4], @x[5], @y[0], @y[1], @t[2..3]); +$code.=<<___; + veor @x[4], @x[4], @t[0] + veor @x[6], @x[6], @t[0] + veor @x[5], @x[5], @t[1] + veor @x[7], @x[7], @t[1] +___ +} +sub Inv_GF256 { +#;******************************************************************** +#;* Inv_GF256: Input x0-x7 Output x0-x7 Temp t0-t3,s0-s3 (144) * +#;******************************************************************** +my @x=@_[0..7]; +my @t=@_[8..11]; +my @s=@_[12..15]; +# direct optimizations from hardware +$code.=<<___; + veor @t[3], @x[4], @x[6] + veor @t[2], @x[5], @x[7] + veor @t[1], @x[1], @x[3] + veor @s[1], @x[7], @x[6] + vmov @t[0], @t[2] + veor @s[0], @x[0], @x[2] + + vorr @t[2], @t[2], @t[1] + veor @s[3], @t[3], @t[0] + vand @s[2], @t[3], @s[0] + vorr @t[3], @t[3], @s[0] + veor @s[0], @s[0], @t[1] + vand @t[0], @t[0], @t[1] + veor @t[1], @x[3], @x[2] + vand @s[3], @s[3], @s[0] + vand @s[1], @s[1], @t[1] + veor @t[1], @x[4], @x[5] + veor @s[0], @x[1], @x[0] + veor @t[3], @t[3], @s[1] + veor @t[2], @t[2], @s[1] + vand @s[1], @t[1], @s[0] + vorr @t[1], @t[1], @s[0] + veor @t[3], @t[3], @s[3] + veor @t[0], @t[0], @s[1] + veor @t[2], @t[2], @s[2] + veor @t[1], @t[1], @s[3] + veor @t[0], @t[0], @s[2] + vand @s[0], @x[7], @x[3] + veor @t[1], @t[1], @s[2] + vand @s[1], @x[6], @x[2] + vand @s[2], @x[5], @x[1] + vorr @s[3], @x[4], @x[0] + veor @t[3], @t[3], @s[0] + veor @t[1], @t[1], @s[2] + veor @t[0], @t[0], @s[3] + veor @t[2], @t[2], @s[1] + + @ Inv_GF16 \t0, \t1, \t2, \t3, \s0, \s1, \s2, \s3 + + @ new smaller inversion + + vand @s[2], @t[3], @t[1] + vmov @s[0], @t[0] + + veor @s[1], @t[2], @s[2] + veor @s[3], @t[0], @s[2] + veor @s[2], @t[0], @s[2] @ @s[2]=@s[3] + + vbsl @s[1], @t[1], @t[0] + vbsl @s[3], @t[3], @t[2] + veor @t[3], @t[3], @t[2] + + vbsl @s[0], @s[1], @s[2] + vbsl @t[0], @s[2], @s[1] + + vand @s[2], @s[0], @s[3] + veor @t[1], @t[1], @t[0] + + veor @s[2], @s[2], @t[3] +___ +# output in s3, s2, s1, t1 + +# Mul_GF16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \t2, \t3, \t0, \t1, \s0, \s1, \s2, \s3 + +# Mul_GF16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3 + &Mul_GF16_2(@x,@s[3,2,1],@t[1],@s[0],@t[0,2,3]); + +### output msb > [x3,x2,x1,x0,x7,x6,x5,x4] < lsb +} + +# AES linear components + +sub ShiftRows { +my @x=@_[0..7]; +my @t=@_[8..11]; +my $mask=pop; +$code.=<<___; + vldmia $key!, {@t[0]-@t[3]} + veor @t[0], @t[0], @x[0] + veor @t[1], @t[1], @x[1] + vtbl.8 `&Dlo(@x[0])`, {@t[0]}, `&Dlo($mask)` + vtbl.8 `&Dhi(@x[0])`, {@t[0]}, `&Dhi($mask)` + vldmia $key!, {@t[0]} + veor @t[2], @t[2], @x[2] + vtbl.8 `&Dlo(@x[1])`, {@t[1]}, `&Dlo($mask)` + vtbl.8 `&Dhi(@x[1])`, {@t[1]}, `&Dhi($mask)` + vldmia $key!, {@t[1]} + veor @t[3], @t[3], @x[3] + vtbl.8 `&Dlo(@x[2])`, {@t[2]}, `&Dlo($mask)` + vtbl.8 `&Dhi(@x[2])`, {@t[2]}, `&Dhi($mask)` + vldmia $key!, {@t[2]} + vtbl.8 `&Dlo(@x[3])`, {@t[3]}, `&Dlo($mask)` + vtbl.8 `&Dhi(@x[3])`, {@t[3]}, `&Dhi($mask)` + vldmia $key!, {@t[3]} + veor @t[0], @t[0], @x[4] + veor @t[1], @t[1], @x[5] + vtbl.8 `&Dlo(@x[4])`, {@t[0]}, `&Dlo($mask)` + vtbl.8 `&Dhi(@x[4])`, {@t[0]}, `&Dhi($mask)` + veor @t[2], @t[2], @x[6] + vtbl.8 `&Dlo(@x[5])`, {@t[1]}, `&Dlo($mask)` + vtbl.8 `&Dhi(@x[5])`, {@t[1]}, `&Dhi($mask)` + veor @t[3], @t[3], @x[7] + vtbl.8 `&Dlo(@x[6])`, {@t[2]}, `&Dlo($mask)` + vtbl.8 `&Dhi(@x[6])`, {@t[2]}, `&Dhi($mask)` + vtbl.8 `&Dlo(@x[7])`, {@t[3]}, `&Dlo($mask)` + vtbl.8 `&Dhi(@x[7])`, {@t[3]}, `&Dhi($mask)` +___ +} + +sub MixColumns { +# modified to emit output in order suitable for feeding back to aesenc[last] +my @x=@_[0..7]; +my @t=@_[8..15]; +my $inv=@_[16]; # optional +$code.=<<___; + vext.8 @t[0], @x[0], @x[0], #12 @ x0 <<< 32 + vext.8 @t[1], @x[1], @x[1], #12 + veor @x[0], @x[0], @t[0] @ x0 ^ (x0 <<< 32) + vext.8 @t[2], @x[2], @x[2], #12 + veor @x[1], @x[1], @t[1] + vext.8 @t[3], @x[3], @x[3], #12 + veor @x[2], @x[2], @t[2] + vext.8 @t[4], @x[4], @x[4], #12 + veor @x[3], @x[3], @t[3] + vext.8 @t[5], @x[5], @x[5], #12 + veor @x[4], @x[4], @t[4] + vext.8 @t[6], @x[6], @x[6], #12 + veor @x[5], @x[5], @t[5] + vext.8 @t[7], @x[7], @x[7], #12 + veor @x[6], @x[6], @t[6] + + veor @t[1], @t[1], @x[0] + veor @x[7], @x[7], @t[7] + vext.8 @x[0], @x[0], @x[0], #8 @ (x0 ^ (x0 <<< 32)) <<< 64) + veor @t[2], @t[2], @x[1] + veor @t[0], @t[0], @x[7] + veor @t[1], @t[1], @x[7] + vext.8 @x[1], @x[1], @x[1], #8 + veor @t[5], @t[5], @x[4] + veor @x[0], @x[0], @t[0] + veor @t[6], @t[6], @x[5] + veor @x[1], @x[1], @t[1] + vext.8 @t[0], @x[4], @x[4], #8 + veor @t[4], @t[4], @x[3] + vext.8 @t[1], @x[5], @x[5], #8 + veor @t[7], @t[7], @x[6] + vext.8 @x[4], @x[3], @x[3], #8 + veor @t[3], @t[3], @x[2] + vext.8 @x[5], @x[7], @x[7], #8 + veor @t[4], @t[4], @x[7] + vext.8 @x[3], @x[6], @x[6], #8 + veor @t[3], @t[3], @x[7] + vext.8 @x[6], @x[2], @x[2], #8 + veor @x[7], @t[1], @t[5] +___ +$code.=<<___ if (!$inv); + veor @x[2], @t[0], @t[4] + veor @x[4], @x[4], @t[3] + veor @x[5], @x[5], @t[7] + veor @x[3], @x[3], @t[6] + @ vmov @x[2], @t[0] + veor @x[6], @x[6], @t[2] + @ vmov @x[7], @t[1] +___ +$code.=<<___ if ($inv); + veor @t[3], @t[3], @x[4] + veor @x[5], @x[5], @t[7] + veor @x[2], @x[3], @t[6] + veor @x[3], @t[0], @t[4] + veor @x[4], @x[6], @t[2] + vmov @x[6], @t[3] + @ vmov @x[7], @t[1] +___ +} + +sub InvMixColumns_orig { +my @x=@_[0..7]; +my @t=@_[8..15]; + +$code.=<<___; + @ multiplication by 0x0e + vext.8 @t[7], @x[7], @x[7], #12 + vmov @t[2], @x[2] + veor @x[2], @x[2], @x[5] @ 2 5 + veor @x[7], @x[7], @x[5] @ 7 5 + vext.8 @t[0], @x[0], @x[0], #12 + vmov @t[5], @x[5] + veor @x[5], @x[5], @x[0] @ 5 0 [1] + veor @x[0], @x[0], @x[1] @ 0 1 + vext.8 @t[1], @x[1], @x[1], #12 + veor @x[1], @x[1], @x[2] @ 1 25 + veor @x[0], @x[0], @x[6] @ 01 6 [2] + vext.8 @t[3], @x[3], @x[3], #12 + veor @x[1], @x[1], @x[3] @ 125 3 [4] + veor @x[2], @x[2], @x[0] @ 25 016 [3] + veor @x[3], @x[3], @x[7] @ 3 75 + veor @x[7], @x[7], @x[6] @ 75 6 [0] + vext.8 @t[6], @x[6], @x[6], #12 + vmov @t[4], @x[4] + veor @x[6], @x[6], @x[4] @ 6 4 + veor @x[4], @x[4], @x[3] @ 4 375 [6] + veor @x[3], @x[3], @x[7] @ 375 756=36 + veor @x[6], @x[6], @t[5] @ 64 5 [7] + veor @x[3], @x[3], @t[2] @ 36 2 + vext.8 @t[5], @t[5], @t[5], #12 + veor @x[3], @x[3], @t[4] @ 362 4 [5] +___ + my @y = @x[7,5,0,2,1,3,4,6]; +$code.=<<___; + @ multiplication by 0x0b + veor @y[1], @y[1], @y[0] + veor @y[0], @y[0], @t[0] + vext.8 @t[2], @t[2], @t[2], #12 + veor @y[1], @y[1], @t[1] + veor @y[0], @y[0], @t[5] + vext.8 @t[4], @t[4], @t[4], #12 + veor @y[1], @y[1], @t[6] + veor @y[0], @y[0], @t[7] + veor @t[7], @t[7], @t[6] @ clobber t[7] + + veor @y[3], @y[3], @t[0] + veor @y[1], @y[1], @y[0] + vext.8 @t[0], @t[0], @t[0], #12 + veor @y[2], @y[2], @t[1] + veor @y[4], @y[4], @t[1] + vext.8 @t[1], @t[1], @t[1], #12 + veor @y[2], @y[2], @t[2] + veor @y[3], @y[3], @t[2] + veor @y[5], @y[5], @t[2] + veor @y[2], @y[2], @t[7] + vext.8 @t[2], @t[2], @t[2], #12 + veor @y[3], @y[3], @t[3] + veor @y[6], @y[6], @t[3] + veor @y[4], @y[4], @t[3] + veor @y[7], @y[7], @t[4] + vext.8 @t[3], @t[3], @t[3], #12 + veor @y[5], @y[5], @t[4] + veor @y[7], @y[7], @t[7] + veor @t[7], @t[7], @t[5] @ clobber t[7] even more + veor @y[3], @y[3], @t[5] + veor @y[4], @y[4], @t[4] + + veor @y[5], @y[5], @t[7] + vext.8 @t[4], @t[4], @t[4], #12 + veor @y[6], @y[6], @t[7] + veor @y[4], @y[4], @t[7] + + veor @t[7], @t[7], @t[5] + vext.8 @t[5], @t[5], @t[5], #12 + + @ multiplication by 0x0d + veor @y[4], @y[4], @y[7] + veor @t[7], @t[7], @t[6] @ restore t[7] + veor @y[7], @y[7], @t[4] + vext.8 @t[6], @t[6], @t[6], #12 + veor @y[2], @y[2], @t[0] + veor @y[7], @y[7], @t[5] + vext.8 @t[7], @t[7], @t[7], #12 + veor @y[2], @y[2], @t[2] + + veor @y[3], @y[3], @y[1] + veor @y[1], @y[1], @t[1] + veor @y[0], @y[0], @t[0] + veor @y[3], @y[3], @t[0] + veor @y[1], @y[1], @t[5] + veor @y[0], @y[0], @t[5] + vext.8 @t[0], @t[0], @t[0], #12 + veor @y[1], @y[1], @t[7] + veor @y[0], @y[0], @t[6] + veor @y[3], @y[3], @y[1] + veor @y[4], @y[4], @t[1] + vext.8 @t[1], @t[1], @t[1], #12 + + veor @y[7], @y[7], @t[7] + veor @y[4], @y[4], @t[2] + veor @y[5], @y[5], @t[2] + veor @y[2], @y[2], @t[6] + veor @t[6], @t[6], @t[3] @ clobber t[6] + vext.8 @t[2], @t[2], @t[2], #12 + veor @y[4], @y[4], @y[7] + veor @y[3], @y[3], @t[6] + + veor @y[6], @y[6], @t[6] + veor @y[5], @y[5], @t[5] + vext.8 @t[5], @t[5], @t[5], #12 + veor @y[6], @y[6], @t[4] + vext.8 @t[4], @t[4], @t[4], #12 + veor @y[5], @y[5], @t[6] + veor @y[6], @y[6], @t[7] + vext.8 @t[7], @t[7], @t[7], #12 + veor @t[6], @t[6], @t[3] @ restore t[6] + vext.8 @t[3], @t[3], @t[3], #12 + + @ multiplication by 0x09 + veor @y[4], @y[4], @y[1] + veor @t[1], @t[1], @y[1] @ t[1]=y[1] + veor @t[0], @t[0], @t[5] @ clobber t[0] + vext.8 @t[6], @t[6], @t[6], #12 + veor @t[1], @t[1], @t[5] + veor @y[3], @y[3], @t[0] + veor @t[0], @t[0], @y[0] @ t[0]=y[0] + veor @t[1], @t[1], @t[6] + veor @t[6], @t[6], @t[7] @ clobber t[6] + veor @y[4], @y[4], @t[1] + veor @y[7], @y[7], @t[4] + veor @y[6], @y[6], @t[3] + veor @y[5], @y[5], @t[2] + veor @t[4], @t[4], @y[4] @ t[4]=y[4] + veor @t[3], @t[3], @y[3] @ t[3]=y[3] + veor @t[5], @t[5], @y[5] @ t[5]=y[5] + veor @t[2], @t[2], @y[2] @ t[2]=y[2] + veor @t[3], @t[3], @t[7] + veor @XMM[5], @t[5], @t[6] + veor @XMM[6], @t[6], @y[6] @ t[6]=y[6] + veor @XMM[2], @t[2], @t[6] + veor @XMM[7], @t[7], @y[7] @ t[7]=y[7] + + vmov @XMM[0], @t[0] + vmov @XMM[1], @t[1] + @ vmov @XMM[2], @t[2] + vmov @XMM[3], @t[3] + vmov @XMM[4], @t[4] + @ vmov @XMM[5], @t[5] + @ vmov @XMM[6], @t[6] + @ vmov @XMM[7], @t[7] +___ +} + +sub InvMixColumns { +my @x=@_[0..7]; +my @t=@_[8..15]; + +# Thanks to Jussi Kivilinna for providing pointer to +# +# | 0e 0b 0d 09 | | 02 03 01 01 | | 05 00 04 00 | +# | 09 0e 0b 0d | = | 01 02 03 01 | x | 00 05 00 04 | +# | 0d 09 0e 0b | | 01 01 02 03 | | 04 00 05 00 | +# | 0b 0d 09 0e | | 03 01 01 02 | | 00 04 00 05 | + +$code.=<<___; + @ multiplication by 0x05-0x00-0x04-0x00 + vext.8 @t[0], @x[0], @x[0], #8 + vext.8 @t[6], @x[6], @x[6], #8 + vext.8 @t[7], @x[7], @x[7], #8 + veor @t[0], @t[0], @x[0] + vext.8 @t[1], @x[1], @x[1], #8 + veor @t[6], @t[6], @x[6] + vext.8 @t[2], @x[2], @x[2], #8 + veor @t[7], @t[7], @x[7] + vext.8 @t[3], @x[3], @x[3], #8 + veor @t[1], @t[1], @x[1] + vext.8 @t[4], @x[4], @x[4], #8 + veor @t[2], @t[2], @x[2] + vext.8 @t[5], @x[5], @x[5], #8 + veor @t[3], @t[3], @x[3] + veor @t[4], @t[4], @x[4] + veor @t[5], @t[5], @x[5] + + veor @x[0], @x[0], @t[6] + veor @x[1], @x[1], @t[6] + veor @x[2], @x[2], @t[0] + veor @x[4], @x[4], @t[2] + veor @x[3], @x[3], @t[1] + veor @x[1], @x[1], @t[7] + veor @x[2], @x[2], @t[7] + veor @x[4], @x[4], @t[6] + veor @x[5], @x[5], @t[3] + veor @x[3], @x[3], @t[6] + veor @x[6], @x[6], @t[4] + veor @x[4], @x[4], @t[7] + veor @x[5], @x[5], @t[7] + veor @x[7], @x[7], @t[5] +___ + &MixColumns (@x,@t,1); # flipped 2<->3 and 4<->6 +} + +sub swapmove { +my ($a,$b,$n,$mask,$t)=@_; +$code.=<<___; + vshr.u64 $t, $b, #$n + veor $t, $t, $a + vand $t, $t, $mask + veor $a, $a, $t + vshl.u64 $t, $t, #$n + veor $b, $b, $t +___ +} +sub swapmove2x { +my ($a0,$b0,$a1,$b1,$n,$mask,$t0,$t1)=@_; +$code.=<<___; + vshr.u64 $t0, $b0, #$n + vshr.u64 $t1, $b1, #$n + veor $t0, $t0, $a0 + veor $t1, $t1, $a1 + vand $t0, $t0, $mask + vand $t1, $t1, $mask + veor $a0, $a0, $t0 + vshl.u64 $t0, $t0, #$n + veor $a1, $a1, $t1 + vshl.u64 $t1, $t1, #$n + veor $b0, $b0, $t0 + veor $b1, $b1, $t1 +___ +} + +sub bitslice { +my @x=reverse(@_[0..7]); +my ($t0,$t1,$t2,$t3)=@_[8..11]; +$code.=<<___; + vmov.i8 $t0,#0x55 @ compose .LBS0 + vmov.i8 $t1,#0x33 @ compose .LBS1 +___ + &swapmove2x(@x[0,1,2,3],1,$t0,$t2,$t3); + &swapmove2x(@x[4,5,6,7],1,$t0,$t2,$t3); +$code.=<<___; + vmov.i8 $t0,#0x0f @ compose .LBS2 +___ + &swapmove2x(@x[0,2,1,3],2,$t1,$t2,$t3); + &swapmove2x(@x[4,6,5,7],2,$t1,$t2,$t3); + + &swapmove2x(@x[0,4,1,5],4,$t0,$t2,$t3); + &swapmove2x(@x[2,6,3,7],4,$t0,$t2,$t3); +} + +$code.=<<___; +#ifndef __KERNEL__ +# include "arm_arch.h" + +# define VFP_ABI_PUSH vstmdb sp!,{d8-d15} +# define VFP_ABI_POP vldmia sp!,{d8-d15} +# define VFP_ABI_FRAME 0x40 +#else +# define VFP_ABI_PUSH +# define VFP_ABI_POP +# define VFP_ABI_FRAME 0 +# define BSAES_ASM_EXTENDED_KEY +# define XTS_CHAIN_TWEAK +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +#endif + +#ifdef __thumb__ +# define adrl adr +#endif + +#if __ARM_ARCH__>=7 +.text +.syntax unified @ ARMv7-capable assembler is expected to handle this +#ifdef __thumb2__ +.thumb +#else +.code 32 +#endif + +.fpu neon + +.type _bsaes_decrypt8,%function +.align 4 +_bsaes_decrypt8: + adr $const,_bsaes_decrypt8 + vldmia $key!, {@XMM[9]} @ round 0 key + add $const,$const,#.LM0ISR-_bsaes_decrypt8 + + vldmia $const!, {@XMM[8]} @ .LM0ISR + veor @XMM[10], @XMM[0], @XMM[9] @ xor with round0 key + veor @XMM[11], @XMM[1], @XMM[9] + vtbl.8 `&Dlo(@XMM[0])`, {@XMM[10]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[0])`, {@XMM[10]}, `&Dhi(@XMM[8])` + veor @XMM[12], @XMM[2], @XMM[9] + vtbl.8 `&Dlo(@XMM[1])`, {@XMM[11]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[1])`, {@XMM[11]}, `&Dhi(@XMM[8])` + veor @XMM[13], @XMM[3], @XMM[9] + vtbl.8 `&Dlo(@XMM[2])`, {@XMM[12]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[2])`, {@XMM[12]}, `&Dhi(@XMM[8])` + veor @XMM[14], @XMM[4], @XMM[9] + vtbl.8 `&Dlo(@XMM[3])`, {@XMM[13]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[3])`, {@XMM[13]}, `&Dhi(@XMM[8])` + veor @XMM[15], @XMM[5], @XMM[9] + vtbl.8 `&Dlo(@XMM[4])`, {@XMM[14]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[4])`, {@XMM[14]}, `&Dhi(@XMM[8])` + veor @XMM[10], @XMM[6], @XMM[9] + vtbl.8 `&Dlo(@XMM[5])`, {@XMM[15]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[5])`, {@XMM[15]}, `&Dhi(@XMM[8])` + veor @XMM[11], @XMM[7], @XMM[9] + vtbl.8 `&Dlo(@XMM[6])`, {@XMM[10]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[6])`, {@XMM[10]}, `&Dhi(@XMM[8])` + vtbl.8 `&Dlo(@XMM[7])`, {@XMM[11]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[7])`, {@XMM[11]}, `&Dhi(@XMM[8])` +___ + &bitslice (@XMM[0..7, 8..11]); +$code.=<<___; + sub $rounds,$rounds,#1 + b .Ldec_sbox +.align 4 +.Ldec_loop: +___ + &ShiftRows (@XMM[0..7, 8..12]); +$code.=".Ldec_sbox:\n"; + &InvSbox (@XMM[0..7, 8..15]); +$code.=<<___; + subs $rounds,$rounds,#1 + bcc .Ldec_done +___ + &InvMixColumns (@XMM[0,1,6,4,2,7,3,5, 8..15]); +$code.=<<___; + vldmia $const, {@XMM[12]} @ .LISR + ite eq @ Thumb2 thing, sanity check in ARM + addeq $const,$const,#0x10 + bne .Ldec_loop + vldmia $const, {@XMM[12]} @ .LISRM0 + b .Ldec_loop +.align 4 +.Ldec_done: +___ + &bitslice (@XMM[0,1,6,4,2,7,3,5, 8..11]); +$code.=<<___; + vldmia $key, {@XMM[8]} @ last round key + veor @XMM[6], @XMM[6], @XMM[8] + veor @XMM[4], @XMM[4], @XMM[8] + veor @XMM[2], @XMM[2], @XMM[8] + veor @XMM[7], @XMM[7], @XMM[8] + veor @XMM[3], @XMM[3], @XMM[8] + veor @XMM[5], @XMM[5], @XMM[8] + veor @XMM[0], @XMM[0], @XMM[8] + veor @XMM[1], @XMM[1], @XMM[8] + bx lr +.size _bsaes_decrypt8,.-_bsaes_decrypt8 + +.type _bsaes_const,%object +.align 6 +_bsaes_const: +.LM0ISR: @ InvShiftRows constants + .quad 0x0a0e0206070b0f03, 0x0004080c0d010509 +.LISR: + .quad 0x0504070602010003, 0x0f0e0d0c080b0a09 +.LISRM0: + .quad 0x01040b0e0205080f, 0x0306090c00070a0d +.LM0SR: @ ShiftRows constants + .quad 0x0a0e02060f03070b, 0x0004080c05090d01 +.LSR: + .quad 0x0504070600030201, 0x0f0e0d0c0a09080b +.LSRM0: + .quad 0x0304090e00050a0f, 0x01060b0c0207080d +.LM0: + .quad 0x02060a0e03070b0f, 0x0004080c0105090d +.LREVM0SR: + .quad 0x090d01050c000408, 0x03070b0f060a0e02 +.asciz "Bit-sliced AES for NEON, CRYPTOGAMS by <appro\@openssl.org>" +.align 6 +.size _bsaes_const,.-_bsaes_const + +.type _bsaes_encrypt8,%function +.align 4 +_bsaes_encrypt8: + adr $const,_bsaes_encrypt8 + vldmia $key!, {@XMM[9]} @ round 0 key + sub $const,$const,#_bsaes_encrypt8-.LM0SR + + vldmia $const!, {@XMM[8]} @ .LM0SR +_bsaes_encrypt8_alt: + veor @XMM[10], @XMM[0], @XMM[9] @ xor with round0 key + veor @XMM[11], @XMM[1], @XMM[9] + vtbl.8 `&Dlo(@XMM[0])`, {@XMM[10]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[0])`, {@XMM[10]}, `&Dhi(@XMM[8])` + veor @XMM[12], @XMM[2], @XMM[9] + vtbl.8 `&Dlo(@XMM[1])`, {@XMM[11]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[1])`, {@XMM[11]}, `&Dhi(@XMM[8])` + veor @XMM[13], @XMM[3], @XMM[9] + vtbl.8 `&Dlo(@XMM[2])`, {@XMM[12]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[2])`, {@XMM[12]}, `&Dhi(@XMM[8])` + veor @XMM[14], @XMM[4], @XMM[9] + vtbl.8 `&Dlo(@XMM[3])`, {@XMM[13]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[3])`, {@XMM[13]}, `&Dhi(@XMM[8])` + veor @XMM[15], @XMM[5], @XMM[9] + vtbl.8 `&Dlo(@XMM[4])`, {@XMM[14]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[4])`, {@XMM[14]}, `&Dhi(@XMM[8])` + veor @XMM[10], @XMM[6], @XMM[9] + vtbl.8 `&Dlo(@XMM[5])`, {@XMM[15]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[5])`, {@XMM[15]}, `&Dhi(@XMM[8])` + veor @XMM[11], @XMM[7], @XMM[9] + vtbl.8 `&Dlo(@XMM[6])`, {@XMM[10]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[6])`, {@XMM[10]}, `&Dhi(@XMM[8])` + vtbl.8 `&Dlo(@XMM[7])`, {@XMM[11]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[7])`, {@XMM[11]}, `&Dhi(@XMM[8])` +_bsaes_encrypt8_bitslice: +___ + &bitslice (@XMM[0..7, 8..11]); +$code.=<<___; + sub $rounds,$rounds,#1 + b .Lenc_sbox +.align 4 +.Lenc_loop: +___ + &ShiftRows (@XMM[0..7, 8..12]); +$code.=".Lenc_sbox:\n"; + &Sbox (@XMM[0..7, 8..15]); +$code.=<<___; + subs $rounds,$rounds,#1 + bcc .Lenc_done +___ + &MixColumns (@XMM[0,1,4,6,3,7,2,5, 8..15]); +$code.=<<___; + vldmia $const, {@XMM[12]} @ .LSR + ite eq @ Thumb2 thing, samity check in ARM + addeq $const,$const,#0x10 + bne .Lenc_loop + vldmia $const, {@XMM[12]} @ .LSRM0 + b .Lenc_loop +.align 4 +.Lenc_done: +___ + # output in lsb > [t0, t1, t4, t6, t3, t7, t2, t5] < msb + &bitslice (@XMM[0,1,4,6,3,7,2,5, 8..11]); +$code.=<<___; + vldmia $key, {@XMM[8]} @ last round key + veor @XMM[4], @XMM[4], @XMM[8] + veor @XMM[6], @XMM[6], @XMM[8] + veor @XMM[3], @XMM[3], @XMM[8] + veor @XMM[7], @XMM[7], @XMM[8] + veor @XMM[2], @XMM[2], @XMM[8] + veor @XMM[5], @XMM[5], @XMM[8] + veor @XMM[0], @XMM[0], @XMM[8] + veor @XMM[1], @XMM[1], @XMM[8] + bx lr +.size _bsaes_encrypt8,.-_bsaes_encrypt8 +___ +} +{ +my ($out,$inp,$rounds,$const)=("r12","r4","r5","r6"); + +sub bitslice_key { +my @x=reverse(@_[0..7]); +my ($bs0,$bs1,$bs2,$t2,$t3)=@_[8..12]; + + &swapmove (@x[0,1],1,$bs0,$t2,$t3); +$code.=<<___; + @ &swapmove(@x[2,3],1,$t0,$t2,$t3); + vmov @x[2], @x[0] + vmov @x[3], @x[1] +___ + #&swapmove2x(@x[4,5,6,7],1,$t0,$t2,$t3); + + &swapmove2x (@x[0,2,1,3],2,$bs1,$t2,$t3); +$code.=<<___; + @ &swapmove2x(@x[4,6,5,7],2,$t1,$t2,$t3); + vmov @x[4], @x[0] + vmov @x[6], @x[2] + vmov @x[5], @x[1] + vmov @x[7], @x[3] +___ + &swapmove2x (@x[0,4,1,5],4,$bs2,$t2,$t3); + &swapmove2x (@x[2,6,3,7],4,$bs2,$t2,$t3); +} + +$code.=<<___; +.type _bsaes_key_convert,%function +.align 4 +_bsaes_key_convert: + adr $const,_bsaes_key_convert + vld1.8 {@XMM[7]}, [$inp]! @ load round 0 key + sub $const,$const,#_bsaes_key_convert-.LM0 + vld1.8 {@XMM[15]}, [$inp]! @ load round 1 key + + vmov.i8 @XMM[8], #0x01 @ bit masks + vmov.i8 @XMM[9], #0x02 + vmov.i8 @XMM[10], #0x04 + vmov.i8 @XMM[11], #0x08 + vmov.i8 @XMM[12], #0x10 + vmov.i8 @XMM[13], #0x20 + vldmia $const, {@XMM[14]} @ .LM0 + +#ifdef __ARMEL__ + vrev32.8 @XMM[7], @XMM[7] + vrev32.8 @XMM[15], @XMM[15] +#endif + sub $rounds,$rounds,#1 + vstmia $out!, {@XMM[7]} @ save round 0 key + b .Lkey_loop + +.align 4 +.Lkey_loop: + vtbl.8 `&Dlo(@XMM[7])`,{@XMM[15]},`&Dlo(@XMM[14])` + vtbl.8 `&Dhi(@XMM[7])`,{@XMM[15]},`&Dhi(@XMM[14])` + vmov.i8 @XMM[6], #0x40 + vmov.i8 @XMM[15], #0x80 + + vtst.8 @XMM[0], @XMM[7], @XMM[8] + vtst.8 @XMM[1], @XMM[7], @XMM[9] + vtst.8 @XMM[2], @XMM[7], @XMM[10] + vtst.8 @XMM[3], @XMM[7], @XMM[11] + vtst.8 @XMM[4], @XMM[7], @XMM[12] + vtst.8 @XMM[5], @XMM[7], @XMM[13] + vtst.8 @XMM[6], @XMM[7], @XMM[6] + vtst.8 @XMM[7], @XMM[7], @XMM[15] + vld1.8 {@XMM[15]}, [$inp]! @ load next round key + vmvn @XMM[0], @XMM[0] @ "pnot" + vmvn @XMM[1], @XMM[1] + vmvn @XMM[5], @XMM[5] + vmvn @XMM[6], @XMM[6] +#ifdef __ARMEL__ + vrev32.8 @XMM[15], @XMM[15] +#endif + subs $rounds,$rounds,#1 + vstmia $out!,{@XMM[0]-@XMM[7]} @ write bit-sliced round key + bne .Lkey_loop + + vmov.i8 @XMM[7],#0x63 @ compose .L63 + @ don't save last round key + bx lr +.size _bsaes_key_convert,.-_bsaes_key_convert +___ +} + +if (0) { # following four functions are unsupported interface + # used for benchmarking... +$code.=<<___; +.globl bsaes_enc_key_convert +.type bsaes_enc_key_convert,%function +.align 4 +bsaes_enc_key_convert: + stmdb sp!,{r4-r6,lr} + vstmdb sp!,{d8-d15} @ ABI specification says so + + ldr r5,[$inp,#240] @ pass rounds + mov r4,$inp @ pass key + mov r12,$out @ pass key schedule + bl _bsaes_key_convert + veor @XMM[7],@XMM[7],@XMM[15] @ fix up last round key + vstmia r12, {@XMM[7]} @ save last round key + + vldmia sp!,{d8-d15} + ldmia sp!,{r4-r6,pc} +.size bsaes_enc_key_convert,.-bsaes_enc_key_convert + +.globl bsaes_encrypt_128 +.type bsaes_encrypt_128,%function +.align 4 +bsaes_encrypt_128: + stmdb sp!,{r4-r6,lr} + vstmdb sp!,{d8-d15} @ ABI specification says so +.Lenc128_loop: + vld1.8 {@XMM[0]-@XMM[1]}, [$inp]! @ load input + vld1.8 {@XMM[2]-@XMM[3]}, [$inp]! + mov r4,$key @ pass the key + vld1.8 {@XMM[4]-@XMM[5]}, [$inp]! + mov r5,#10 @ pass rounds + vld1.8 {@XMM[6]-@XMM[7]}, [$inp]! + + bl _bsaes_encrypt8 + + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + vst1.8 {@XMM[4]}, [$out]! + vst1.8 {@XMM[6]}, [$out]! + vst1.8 {@XMM[3]}, [$out]! + vst1.8 {@XMM[7]}, [$out]! + vst1.8 {@XMM[2]}, [$out]! + subs $len,$len,#0x80 + vst1.8 {@XMM[5]}, [$out]! + bhi .Lenc128_loop + + vldmia sp!,{d8-d15} + ldmia sp!,{r4-r6,pc} +.size bsaes_encrypt_128,.-bsaes_encrypt_128 + +.globl bsaes_dec_key_convert +.type bsaes_dec_key_convert,%function +.align 4 +bsaes_dec_key_convert: + stmdb sp!,{r4-r6,lr} + vstmdb sp!,{d8-d15} @ ABI specification says so + + ldr r5,[$inp,#240] @ pass rounds + mov r4,$inp @ pass key + mov r12,$out @ pass key schedule + bl _bsaes_key_convert + vldmia $out, {@XMM[6]} + vstmia r12, {@XMM[15]} @ save last round key + veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key + vstmia $out, {@XMM[7]} + + vldmia sp!,{d8-d15} + ldmia sp!,{r4-r6,pc} +.size bsaes_dec_key_convert,.-bsaes_dec_key_convert + +.globl bsaes_decrypt_128 +.type bsaes_decrypt_128,%function +.align 4 +bsaes_decrypt_128: + stmdb sp!,{r4-r6,lr} + vstmdb sp!,{d8-d15} @ ABI specification says so +.Ldec128_loop: + vld1.8 {@XMM[0]-@XMM[1]}, [$inp]! @ load input + vld1.8 {@XMM[2]-@XMM[3]}, [$inp]! + mov r4,$key @ pass the key + vld1.8 {@XMM[4]-@XMM[5]}, [$inp]! + mov r5,#10 @ pass rounds + vld1.8 {@XMM[6]-@XMM[7]}, [$inp]! + + bl _bsaes_decrypt8 + + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + vst1.8 {@XMM[6]}, [$out]! + vst1.8 {@XMM[4]}, [$out]! + vst1.8 {@XMM[2]}, [$out]! + vst1.8 {@XMM[7]}, [$out]! + vst1.8 {@XMM[3]}, [$out]! + subs $len,$len,#0x80 + vst1.8 {@XMM[5]}, [$out]! + bhi .Ldec128_loop + + vldmia sp!,{d8-d15} + ldmia sp!,{r4-r6,pc} +.size bsaes_decrypt_128,.-bsaes_decrypt_128 +___ +} +{ +my ($inp,$out,$len,$key, $ivp,$fp,$rounds)=map("r$_",(0..3,8..10)); +my ($keysched)=("sp"); + +$code.=<<___; +.extern AES_cbc_encrypt +.extern AES_decrypt + +.global bsaes_cbc_encrypt +.type bsaes_cbc_encrypt,%function +.align 5 +bsaes_cbc_encrypt: +#ifndef __KERNEL__ + cmp $len, #128 +#ifndef __thumb__ + blo AES_cbc_encrypt +#else + bhs 1f + b AES_cbc_encrypt +1: +#endif +#endif + + @ it is up to the caller to make sure we are called with enc == 0 + + mov ip, sp + stmdb sp!, {r4-r10, lr} + VFP_ABI_PUSH + ldr $ivp, [ip] @ IV is 1st arg on the stack + mov $len, $len, lsr#4 @ len in 16 byte blocks + sub sp, #0x10 @ scratch space to carry over the IV + mov $fp, sp @ save sp + + ldr $rounds, [$key, #240] @ get # of rounds +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key + add r12, #`128-32` @ sifze of bit-slices key schedule + + @ populate the key schedule + mov r4, $key @ pass key + mov r5, $rounds @ pass # of rounds + mov sp, r12 @ sp is $keysched + bl _bsaes_key_convert + vldmia $keysched, {@XMM[6]} + vstmia r12, {@XMM[15]} @ save last round key + veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key + vstmia $keysched, {@XMM[7]} +#else + ldr r12, [$key, #244] + eors r12, #1 + beq 0f + + @ populate the key schedule + str r12, [$key, #244] + mov r4, $key @ pass key + mov r5, $rounds @ pass # of rounds + add r12, $key, #248 @ pass key schedule + bl _bsaes_key_convert + add r4, $key, #248 + vldmia r4, {@XMM[6]} + vstmia r12, {@XMM[15]} @ save last round key + veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key + vstmia r4, {@XMM[7]} + +.align 2 +0: +#endif + + vld1.8 {@XMM[15]}, [$ivp] @ load IV + b .Lcbc_dec_loop + +.align 4 +.Lcbc_dec_loop: + subs $len, $len, #0x8 + bmi .Lcbc_dec_loop_finish + + vld1.8 {@XMM[0]-@XMM[1]}, [$inp]! @ load input + vld1.8 {@XMM[2]-@XMM[3]}, [$inp]! +#ifndef BSAES_ASM_EXTENDED_KEY + mov r4, $keysched @ pass the key +#else + add r4, $key, #248 +#endif + vld1.8 {@XMM[4]-@XMM[5]}, [$inp]! + mov r5, $rounds + vld1.8 {@XMM[6]-@XMM[7]}, [$inp] + sub $inp, $inp, #0x60 + vstmia $fp, {@XMM[15]} @ put aside IV + + bl _bsaes_decrypt8 + + vldmia $fp, {@XMM[14]} @ reload IV + vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input + veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV + vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! + veor @XMM[1], @XMM[1], @XMM[8] + veor @XMM[6], @XMM[6], @XMM[9] + vld1.8 {@XMM[12]-@XMM[13]}, [$inp]! + veor @XMM[4], @XMM[4], @XMM[10] + veor @XMM[2], @XMM[2], @XMM[11] + vld1.8 {@XMM[14]-@XMM[15]}, [$inp]! + veor @XMM[7], @XMM[7], @XMM[12] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + veor @XMM[3], @XMM[3], @XMM[13] + vst1.8 {@XMM[6]}, [$out]! + veor @XMM[5], @XMM[5], @XMM[14] + vst1.8 {@XMM[4]}, [$out]! + vst1.8 {@XMM[2]}, [$out]! + vst1.8 {@XMM[7]}, [$out]! + vst1.8 {@XMM[3]}, [$out]! + vst1.8 {@XMM[5]}, [$out]! + + b .Lcbc_dec_loop + +.Lcbc_dec_loop_finish: + adds $len, $len, #8 + beq .Lcbc_dec_done + + vld1.8 {@XMM[0]}, [$inp]! @ load input + cmp $len, #2 + blo .Lcbc_dec_one + vld1.8 {@XMM[1]}, [$inp]! +#ifndef BSAES_ASM_EXTENDED_KEY + mov r4, $keysched @ pass the key +#else + add r4, $key, #248 +#endif + mov r5, $rounds + vstmia $fp, {@XMM[15]} @ put aside IV + beq .Lcbc_dec_two + vld1.8 {@XMM[2]}, [$inp]! + cmp $len, #4 + blo .Lcbc_dec_three + vld1.8 {@XMM[3]}, [$inp]! + beq .Lcbc_dec_four + vld1.8 {@XMM[4]}, [$inp]! + cmp $len, #6 + blo .Lcbc_dec_five + vld1.8 {@XMM[5]}, [$inp]! + beq .Lcbc_dec_six + vld1.8 {@XMM[6]}, [$inp]! + sub $inp, $inp, #0x70 + + bl _bsaes_decrypt8 + + vldmia $fp, {@XMM[14]} @ reload IV + vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input + veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV + vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! + veor @XMM[1], @XMM[1], @XMM[8] + veor @XMM[6], @XMM[6], @XMM[9] + vld1.8 {@XMM[12]-@XMM[13]}, [$inp]! + veor @XMM[4], @XMM[4], @XMM[10] + veor @XMM[2], @XMM[2], @XMM[11] + vld1.8 {@XMM[15]}, [$inp]! + veor @XMM[7], @XMM[7], @XMM[12] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + veor @XMM[3], @XMM[3], @XMM[13] + vst1.8 {@XMM[6]}, [$out]! + vst1.8 {@XMM[4]}, [$out]! + vst1.8 {@XMM[2]}, [$out]! + vst1.8 {@XMM[7]}, [$out]! + vst1.8 {@XMM[3]}, [$out]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_six: + sub $inp, $inp, #0x60 + bl _bsaes_decrypt8 + vldmia $fp,{@XMM[14]} @ reload IV + vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input + veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV + vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! + veor @XMM[1], @XMM[1], @XMM[8] + veor @XMM[6], @XMM[6], @XMM[9] + vld1.8 {@XMM[12]}, [$inp]! + veor @XMM[4], @XMM[4], @XMM[10] + veor @XMM[2], @XMM[2], @XMM[11] + vld1.8 {@XMM[15]}, [$inp]! + veor @XMM[7], @XMM[7], @XMM[12] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + vst1.8 {@XMM[6]}, [$out]! + vst1.8 {@XMM[4]}, [$out]! + vst1.8 {@XMM[2]}, [$out]! + vst1.8 {@XMM[7]}, [$out]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_five: + sub $inp, $inp, #0x50 + bl _bsaes_decrypt8 + vldmia $fp, {@XMM[14]} @ reload IV + vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input + veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV + vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! + veor @XMM[1], @XMM[1], @XMM[8] + veor @XMM[6], @XMM[6], @XMM[9] + vld1.8 {@XMM[15]}, [$inp]! + veor @XMM[4], @XMM[4], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + veor @XMM[2], @XMM[2], @XMM[11] + vst1.8 {@XMM[6]}, [$out]! + vst1.8 {@XMM[4]}, [$out]! + vst1.8 {@XMM[2]}, [$out]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_four: + sub $inp, $inp, #0x40 + bl _bsaes_decrypt8 + vldmia $fp, {@XMM[14]} @ reload IV + vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input + veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV + vld1.8 {@XMM[10]}, [$inp]! + veor @XMM[1], @XMM[1], @XMM[8] + veor @XMM[6], @XMM[6], @XMM[9] + vld1.8 {@XMM[15]}, [$inp]! + veor @XMM[4], @XMM[4], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + vst1.8 {@XMM[6]}, [$out]! + vst1.8 {@XMM[4]}, [$out]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_three: + sub $inp, $inp, #0x30 + bl _bsaes_decrypt8 + vldmia $fp, {@XMM[14]} @ reload IV + vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input + veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV + vld1.8 {@XMM[15]}, [$inp]! + veor @XMM[1], @XMM[1], @XMM[8] + veor @XMM[6], @XMM[6], @XMM[9] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + vst1.8 {@XMM[6]}, [$out]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_two: + sub $inp, $inp, #0x20 + bl _bsaes_decrypt8 + vldmia $fp, {@XMM[14]} @ reload IV + vld1.8 {@XMM[8]}, [$inp]! @ reload input + veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV + vld1.8 {@XMM[15]}, [$inp]! @ reload input + veor @XMM[1], @XMM[1], @XMM[8] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_one: + sub $inp, $inp, #0x10 + mov $rounds, $out @ save original out pointer + mov $out, $fp @ use the iv scratch space as out buffer + mov r2, $key + vmov @XMM[4],@XMM[15] @ just in case ensure that IV + vmov @XMM[5],@XMM[0] @ and input are preserved + bl AES_decrypt + vld1.8 {@XMM[0]}, [$fp,:64] @ load result + veor @XMM[0], @XMM[0], @XMM[4] @ ^= IV + vmov @XMM[15], @XMM[5] @ @XMM[5] holds input + vst1.8 {@XMM[0]}, [$rounds] @ write output + +.Lcbc_dec_done: +#ifndef BSAES_ASM_EXTENDED_KEY + vmov.i32 q0, #0 + vmov.i32 q1, #0 +.Lcbc_dec_bzero: @ wipe key schedule [if any] + vstmia $keysched!, {q0-q1} + cmp $keysched, $fp + bne .Lcbc_dec_bzero +#endif + + mov sp, $fp + add sp, #0x10 @ add sp,$fp,#0x10 is no good for thumb + vst1.8 {@XMM[15]}, [$ivp] @ return IV + VFP_ABI_POP + ldmia sp!, {r4-r10, pc} +.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt +___ +} +{ +my ($inp,$out,$len,$key, $ctr,$fp,$rounds)=(map("r$_",(0..3,8..10))); +my $const = "r6"; # shared with _bsaes_encrypt8_alt +my $keysched = "sp"; + +$code.=<<___; +.extern AES_encrypt +.global bsaes_ctr32_encrypt_blocks +.type bsaes_ctr32_encrypt_blocks,%function +.align 5 +bsaes_ctr32_encrypt_blocks: + cmp $len, #8 @ use plain AES for + blo .Lctr_enc_short @ small sizes + + mov ip, sp + stmdb sp!, {r4-r10, lr} + VFP_ABI_PUSH + ldr $ctr, [ip] @ ctr is 1st arg on the stack + sub sp, sp, #0x10 @ scratch space to carry over the ctr + mov $fp, sp @ save sp + + ldr $rounds, [$key, #240] @ get # of rounds +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key + add r12, #`128-32` @ size of bit-sliced key schedule + + @ populate the key schedule + mov r4, $key @ pass key + mov r5, $rounds @ pass # of rounds + mov sp, r12 @ sp is $keysched + bl _bsaes_key_convert + veor @XMM[7],@XMM[7],@XMM[15] @ fix up last round key + vstmia r12, {@XMM[7]} @ save last round key + + vld1.8 {@XMM[0]}, [$ctr] @ load counter + add $ctr, $const, #.LREVM0SR-.LM0 @ borrow $ctr + vldmia $keysched, {@XMM[4]} @ load round0 key +#else + ldr r12, [$key, #244] + eors r12, #1 + beq 0f + + @ populate the key schedule + str r12, [$key, #244] + mov r4, $key @ pass key + mov r5, $rounds @ pass # of rounds + add r12, $key, #248 @ pass key schedule + bl _bsaes_key_convert + veor @XMM[7],@XMM[7],@XMM[15] @ fix up last round key + vstmia r12, {@XMM[7]} @ save last round key + +.align 2 +0: add r12, $key, #248 + vld1.8 {@XMM[0]}, [$ctr] @ load counter + adrl $ctr, .LREVM0SR @ borrow $ctr + vldmia r12, {@XMM[4]} @ load round0 key + sub sp, #0x10 @ place for adjusted round0 key +#endif + + vmov.i32 @XMM[8],#1 @ compose 1<<96 + veor @XMM[9],@XMM[9],@XMM[9] + vrev32.8 @XMM[0],@XMM[0] + vext.8 @XMM[8],@XMM[9],@XMM[8],#4 + vrev32.8 @XMM[4],@XMM[4] + vadd.u32 @XMM[9],@XMM[8],@XMM[8] @ compose 2<<96 + vstmia $keysched, {@XMM[4]} @ save adjusted round0 key + b .Lctr_enc_loop + +.align 4 +.Lctr_enc_loop: + vadd.u32 @XMM[10], @XMM[8], @XMM[9] @ compose 3<<96 + vadd.u32 @XMM[1], @XMM[0], @XMM[8] @ +1 + vadd.u32 @XMM[2], @XMM[0], @XMM[9] @ +2 + vadd.u32 @XMM[3], @XMM[0], @XMM[10] @ +3 + vadd.u32 @XMM[4], @XMM[1], @XMM[10] + vadd.u32 @XMM[5], @XMM[2], @XMM[10] + vadd.u32 @XMM[6], @XMM[3], @XMM[10] + vadd.u32 @XMM[7], @XMM[4], @XMM[10] + vadd.u32 @XMM[10], @XMM[5], @XMM[10] @ next counter + + @ Borrow prologue from _bsaes_encrypt8 to use the opportunity + @ to flip byte order in 32-bit counter + + vldmia $keysched, {@XMM[9]} @ load round0 key +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, $keysched, #0x10 @ pass next round key +#else + add r4, $key, #`248+16` +#endif + vldmia $ctr, {@XMM[8]} @ .LREVM0SR + mov r5, $rounds @ pass rounds + vstmia $fp, {@XMM[10]} @ save next counter + sub $const, $ctr, #.LREVM0SR-.LSR @ pass constants + + bl _bsaes_encrypt8_alt + + subs $len, $len, #8 + blo .Lctr_enc_loop_done + + vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ load input + vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! + veor @XMM[0], @XMM[8] + veor @XMM[1], @XMM[9] + vld1.8 {@XMM[12]-@XMM[13]}, [$inp]! + veor @XMM[4], @XMM[10] + veor @XMM[6], @XMM[11] + vld1.8 {@XMM[14]-@XMM[15]}, [$inp]! + veor @XMM[3], @XMM[12] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + veor @XMM[7], @XMM[13] + veor @XMM[2], @XMM[14] + vst1.8 {@XMM[4]}, [$out]! + veor @XMM[5], @XMM[15] + vst1.8 {@XMM[6]}, [$out]! + vmov.i32 @XMM[8], #1 @ compose 1<<96 + vst1.8 {@XMM[3]}, [$out]! + veor @XMM[9], @XMM[9], @XMM[9] + vst1.8 {@XMM[7]}, [$out]! + vext.8 @XMM[8], @XMM[9], @XMM[8], #4 + vst1.8 {@XMM[2]}, [$out]! + vadd.u32 @XMM[9],@XMM[8],@XMM[8] @ compose 2<<96 + vst1.8 {@XMM[5]}, [$out]! + vldmia $fp, {@XMM[0]} @ load counter + + bne .Lctr_enc_loop + b .Lctr_enc_done + +.align 4 +.Lctr_enc_loop_done: + add $len, $len, #8 + vld1.8 {@XMM[8]}, [$inp]! @ load input + veor @XMM[0], @XMM[8] + vst1.8 {@XMM[0]}, [$out]! @ write output + cmp $len, #2 + blo .Lctr_enc_done + vld1.8 {@XMM[9]}, [$inp]! + veor @XMM[1], @XMM[9] + vst1.8 {@XMM[1]}, [$out]! + beq .Lctr_enc_done + vld1.8 {@XMM[10]}, [$inp]! + veor @XMM[4], @XMM[10] + vst1.8 {@XMM[4]}, [$out]! + cmp $len, #4 + blo .Lctr_enc_done + vld1.8 {@XMM[11]}, [$inp]! + veor @XMM[6], @XMM[11] + vst1.8 {@XMM[6]}, [$out]! + beq .Lctr_enc_done + vld1.8 {@XMM[12]}, [$inp]! + veor @XMM[3], @XMM[12] + vst1.8 {@XMM[3]}, [$out]! + cmp $len, #6 + blo .Lctr_enc_done + vld1.8 {@XMM[13]}, [$inp]! + veor @XMM[7], @XMM[13] + vst1.8 {@XMM[7]}, [$out]! + beq .Lctr_enc_done + vld1.8 {@XMM[14]}, [$inp] + veor @XMM[2], @XMM[14] + vst1.8 {@XMM[2]}, [$out]! + +.Lctr_enc_done: + vmov.i32 q0, #0 + vmov.i32 q1, #0 +#ifndef BSAES_ASM_EXTENDED_KEY +.Lctr_enc_bzero: @ wipe key schedule [if any] + vstmia $keysched!, {q0-q1} + cmp $keysched, $fp + bne .Lctr_enc_bzero +#else + vstmia $keysched, {q0-q1} +#endif + + mov sp, $fp + add sp, #0x10 @ add sp,$fp,#0x10 is no good for thumb + VFP_ABI_POP + ldmia sp!, {r4-r10, pc} @ return + +.align 4 +.Lctr_enc_short: + ldr ip, [sp] @ ctr pointer is passed on stack + stmdb sp!, {r4-r8, lr} + + mov r4, $inp @ copy arguments + mov r5, $out + mov r6, $len + mov r7, $key + ldr r8, [ip, #12] @ load counter LSW + vld1.8 {@XMM[1]}, [ip] @ load whole counter value +#ifdef __ARMEL__ + rev r8, r8 +#endif + sub sp, sp, #0x10 + vst1.8 {@XMM[1]}, [sp,:64] @ copy counter value + sub sp, sp, #0x10 + +.Lctr_enc_short_loop: + add r0, sp, #0x10 @ input counter value + mov r1, sp @ output on the stack + mov r2, r7 @ key + + bl AES_encrypt + + vld1.8 {@XMM[0]}, [r4]! @ load input + vld1.8 {@XMM[1]}, [sp,:64] @ load encrypted counter + add r8, r8, #1 +#ifdef __ARMEL__ + rev r0, r8 + str r0, [sp, #0x1c] @ next counter value +#else + str r8, [sp, #0x1c] @ next counter value +#endif + veor @XMM[0],@XMM[0],@XMM[1] + vst1.8 {@XMM[0]}, [r5]! @ store output + subs r6, r6, #1 + bne .Lctr_enc_short_loop + + vmov.i32 q0, #0 + vmov.i32 q1, #0 + vstmia sp!, {q0-q1} + + ldmia sp!, {r4-r8, pc} +.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks +___ +} +{ +###################################################################### +# void bsaes_xts_[en|de]crypt(const char *inp,char *out,size_t len, +# const AES_KEY *key1, const AES_KEY *key2, +# const unsigned char iv[16]); +# +my ($inp,$out,$len,$key,$rounds,$magic,$fp)=(map("r$_",(7..10,1..3))); +my $const="r6"; # returned by _bsaes_key_convert +my $twmask=@XMM[5]; +my @T=@XMM[6..7]; + +$code.=<<___; +.globl bsaes_xts_encrypt +.type bsaes_xts_encrypt,%function +.align 4 +bsaes_xts_encrypt: + mov ip, sp + stmdb sp!, {r4-r10, lr} @ 0x20 + VFP_ABI_PUSH + mov r6, sp @ future $fp + + mov $inp, r0 + mov $out, r1 + mov $len, r2 + mov $key, r3 + + sub r0, sp, #0x10 @ 0x10 + bic r0, #0xf @ align at 16 bytes + mov sp, r0 + +#ifdef XTS_CHAIN_TWEAK + ldr r0, [ip] @ pointer to input tweak +#else + @ generate initial tweak + ldr r0, [ip, #4] @ iv[] + mov r1, sp + ldr r2, [ip, #0] @ key2 + bl AES_encrypt + mov r0,sp @ pointer to initial tweak +#endif + + ldr $rounds, [$key, #240] @ get # of rounds + mov $fp, r6 +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key + @ add r12, #`128-32` @ size of bit-sliced key schedule + sub r12, #`32+16` @ place for tweak[9] + + @ populate the key schedule + mov r4, $key @ pass key + mov r5, $rounds @ pass # of rounds + mov sp, r12 + add r12, #0x90 @ pass key schedule + bl _bsaes_key_convert + veor @XMM[7], @XMM[7], @XMM[15] @ fix up last round key + vstmia r12, {@XMM[7]} @ save last round key +#else + ldr r12, [$key, #244] + eors r12, #1 + beq 0f + + str r12, [$key, #244] + mov r4, $key @ pass key + mov r5, $rounds @ pass # of rounds + add r12, $key, #248 @ pass key schedule + bl _bsaes_key_convert + veor @XMM[7], @XMM[7], @XMM[15] @ fix up last round key + vstmia r12, {@XMM[7]} + +.align 2 +0: sub sp, #0x90 @ place for tweak[9] +#endif + + vld1.8 {@XMM[8]}, [r0] @ initial tweak + adr $magic, .Lxts_magic + + subs $len, #0x80 + blo .Lxts_enc_short + b .Lxts_enc_loop + +.align 4 +.Lxts_enc_loop: + vldmia $magic, {$twmask} @ load XTS magic + vshr.s64 @T[0], @XMM[8], #63 + mov r0, sp + vand @T[0], @T[0], $twmask +___ +for($i=9;$i<16;$i++) { +$code.=<<___; + vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1] + vst1.64 {@XMM[$i-1]}, [r0,:128]! + vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` + vshr.s64 @T[1], @XMM[$i], #63 + veor @XMM[$i], @XMM[$i], @T[0] + vand @T[1], @T[1], $twmask +___ + @T=reverse(@T); + +$code.=<<___ if ($i>=10); + vld1.8 {@XMM[$i-10]}, [$inp]! +___ +$code.=<<___ if ($i>=11); + veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3] +___ +} +$code.=<<___; + vadd.u64 @XMM[8], @XMM[15], @XMM[15] + vst1.64 {@XMM[15]}, [r0,:128]! + vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` + veor @XMM[8], @XMM[8], @T[0] + vst1.64 {@XMM[8]}, [r0,:128] @ next round tweak + + vld1.8 {@XMM[6]-@XMM[7]}, [$inp]! + veor @XMM[5], @XMM[5], @XMM[13] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[6], @XMM[6], @XMM[14] + mov r5, $rounds @ pass rounds + veor @XMM[7], @XMM[7], @XMM[15] + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[4], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[6], @XMM[11] + vld1.64 {@XMM[14]-@XMM[15]}, [r0,:128]! + veor @XMM[10], @XMM[3], @XMM[12] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + veor @XMM[11], @XMM[7], @XMM[13] + veor @XMM[12], @XMM[2], @XMM[14] + vst1.8 {@XMM[10]-@XMM[11]}, [$out]! + veor @XMM[13], @XMM[5], @XMM[15] + vst1.8 {@XMM[12]-@XMM[13]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + + subs $len, #0x80 + bpl .Lxts_enc_loop + +.Lxts_enc_short: + adds $len, #0x70 + bmi .Lxts_enc_done + + vldmia $magic, {$twmask} @ load XTS magic + vshr.s64 @T[0], @XMM[8], #63 + mov r0, sp + vand @T[0], @T[0], $twmask +___ +for($i=9;$i<16;$i++) { +$code.=<<___; + vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1] + vst1.64 {@XMM[$i-1]}, [r0,:128]! + vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` + vshr.s64 @T[1], @XMM[$i], #63 + veor @XMM[$i], @XMM[$i], @T[0] + vand @T[1], @T[1], $twmask +___ + @T=reverse(@T); + +$code.=<<___ if ($i>=10); + vld1.8 {@XMM[$i-10]}, [$inp]! + subs $len, #0x10 + bmi .Lxts_enc_`$i-9` +___ +$code.=<<___ if ($i>=11); + veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3] +___ +} +$code.=<<___; + sub $len, #0x10 + vst1.64 {@XMM[15]}, [r0,:128] @ next round tweak + + vld1.8 {@XMM[6]}, [$inp]! + veor @XMM[5], @XMM[5], @XMM[13] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[6], @XMM[6], @XMM[14] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[4], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[6], @XMM[11] + vld1.64 {@XMM[14]}, [r0,:128]! + veor @XMM[10], @XMM[3], @XMM[12] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + veor @XMM[11], @XMM[7], @XMM[13] + veor @XMM[12], @XMM[2], @XMM[14] + vst1.8 {@XMM[10]-@XMM[11]}, [$out]! + vst1.8 {@XMM[12]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_6: + vst1.64 {@XMM[14]}, [r0,:128] @ next round tweak + + veor @XMM[4], @XMM[4], @XMM[12] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[5], @XMM[5], @XMM[13] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[4], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[6], @XMM[11] + veor @XMM[10], @XMM[3], @XMM[12] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + veor @XMM[11], @XMM[7], @XMM[13] + vst1.8 {@XMM[10]-@XMM[11]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_enc_done + +@ put this in range for both ARM and Thumb mode adr instructions +.align 5 +.Lxts_magic: + .quad 1, 0x87 + +.align 5 +.Lxts_enc_5: + vst1.64 {@XMM[13]}, [r0,:128] @ next round tweak + + veor @XMM[3], @XMM[3], @XMM[11] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[4], @XMM[4], @XMM[12] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + vld1.64 {@XMM[12]}, [r0,:128]! + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[4], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[6], @XMM[11] + veor @XMM[10], @XMM[3], @XMM[12] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + vst1.8 {@XMM[10]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_4: + vst1.64 {@XMM[12]}, [r0,:128] @ next round tweak + + veor @XMM[2], @XMM[2], @XMM[10] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[3], @XMM[3], @XMM[11] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[4], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[6], @XMM[11] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_3: + vst1.64 {@XMM[11]}, [r0,:128] @ next round tweak + + veor @XMM[1], @XMM[1], @XMM[9] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[2], @XMM[2], @XMM[10] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]! + vld1.64 {@XMM[10]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[4], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + vst1.8 {@XMM[8]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_2: + vst1.64 {@XMM[10]}, [r0,:128] @ next round tweak + + veor @XMM[0], @XMM[0], @XMM[8] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[1], @XMM[1], @XMM[9] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + veor @XMM[1], @XMM[1], @XMM[ 9] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_1: + mov r0, sp + veor @XMM[0], @XMM[8] + mov r1, sp + vst1.8 {@XMM[0]}, [sp,:128] + mov r2, $key + mov r4, $fp @ preserve fp + + bl AES_encrypt + + vld1.8 {@XMM[0]}, [sp,:128] + veor @XMM[0], @XMM[0], @XMM[8] + vst1.8 {@XMM[0]}, [$out]! + mov $fp, r4 + + vmov @XMM[8], @XMM[9] @ next round tweak + +.Lxts_enc_done: +#ifndef XTS_CHAIN_TWEAK + adds $len, #0x10 + beq .Lxts_enc_ret + sub r6, $out, #0x10 + +.Lxts_enc_steal: + ldrb r0, [$inp], #1 + ldrb r1, [$out, #-0x10] + strb r0, [$out, #-0x10] + strb r1, [$out], #1 + + subs $len, #1 + bhi .Lxts_enc_steal + + vld1.8 {@XMM[0]}, [r6] + mov r0, sp + veor @XMM[0], @XMM[0], @XMM[8] + mov r1, sp + vst1.8 {@XMM[0]}, [sp,:128] + mov r2, $key + mov r4, $fp @ preserve fp + + bl AES_encrypt + + vld1.8 {@XMM[0]}, [sp,:128] + veor @XMM[0], @XMM[0], @XMM[8] + vst1.8 {@XMM[0]}, [r6] + mov $fp, r4 +#endif + +.Lxts_enc_ret: + bic r0, $fp, #0xf + vmov.i32 q0, #0 + vmov.i32 q1, #0 +#ifdef XTS_CHAIN_TWEAK + ldr r1, [$fp, #0x20+VFP_ABI_FRAME] @ chain tweak +#endif +.Lxts_enc_bzero: @ wipe key schedule [if any] + vstmia sp!, {q0-q1} + cmp sp, r0 + bne .Lxts_enc_bzero + + mov sp, $fp +#ifdef XTS_CHAIN_TWEAK + vst1.8 {@XMM[8]}, [r1] +#endif + VFP_ABI_POP + ldmia sp!, {r4-r10, pc} @ return + +.size bsaes_xts_encrypt,.-bsaes_xts_encrypt + +.globl bsaes_xts_decrypt +.type bsaes_xts_decrypt,%function +.align 4 +bsaes_xts_decrypt: + mov ip, sp + stmdb sp!, {r4-r10, lr} @ 0x20 + VFP_ABI_PUSH + mov r6, sp @ future $fp + + mov $inp, r0 + mov $out, r1 + mov $len, r2 + mov $key, r3 + + sub r0, sp, #0x10 @ 0x10 + bic r0, #0xf @ align at 16 bytes + mov sp, r0 + +#ifdef XTS_CHAIN_TWEAK + ldr r0, [ip] @ pointer to input tweak +#else + @ generate initial tweak + ldr r0, [ip, #4] @ iv[] + mov r1, sp + ldr r2, [ip, #0] @ key2 + bl AES_encrypt + mov r0, sp @ pointer to initial tweak +#endif + + ldr $rounds, [$key, #240] @ get # of rounds + mov $fp, r6 +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key + @ add r12, #`128-32` @ size of bit-sliced key schedule + sub r12, #`32+16` @ place for tweak[9] + + @ populate the key schedule + mov r4, $key @ pass key + mov r5, $rounds @ pass # of rounds + mov sp, r12 + add r12, #0x90 @ pass key schedule + bl _bsaes_key_convert + add r4, sp, #0x90 + vldmia r4, {@XMM[6]} + vstmia r12, {@XMM[15]} @ save last round key + veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key + vstmia r4, {@XMM[7]} +#else + ldr r12, [$key, #244] + eors r12, #1 + beq 0f + + str r12, [$key, #244] + mov r4, $key @ pass key + mov r5, $rounds @ pass # of rounds + add r12, $key, #248 @ pass key schedule + bl _bsaes_key_convert + add r4, $key, #248 + vldmia r4, {@XMM[6]} + vstmia r12, {@XMM[15]} @ save last round key + veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key + vstmia r4, {@XMM[7]} + +.align 2 +0: sub sp, #0x90 @ place for tweak[9] +#endif + vld1.8 {@XMM[8]}, [r0] @ initial tweak + adr $magic, .Lxts_magic + + tst $len, #0xf @ if not multiple of 16 + it ne @ Thumb2 thing, sanity check in ARM + subne $len, #0x10 @ subtract another 16 bytes + subs $len, #0x80 + + blo .Lxts_dec_short + b .Lxts_dec_loop + +.align 4 +.Lxts_dec_loop: + vldmia $magic, {$twmask} @ load XTS magic + vshr.s64 @T[0], @XMM[8], #63 + mov r0, sp + vand @T[0], @T[0], $twmask +___ +for($i=9;$i<16;$i++) { +$code.=<<___; + vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1] + vst1.64 {@XMM[$i-1]}, [r0,:128]! + vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` + vshr.s64 @T[1], @XMM[$i], #63 + veor @XMM[$i], @XMM[$i], @T[0] + vand @T[1], @T[1], $twmask +___ + @T=reverse(@T); + +$code.=<<___ if ($i>=10); + vld1.8 {@XMM[$i-10]}, [$inp]! +___ +$code.=<<___ if ($i>=11); + veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3] +___ +} +$code.=<<___; + vadd.u64 @XMM[8], @XMM[15], @XMM[15] + vst1.64 {@XMM[15]}, [r0,:128]! + vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` + veor @XMM[8], @XMM[8], @T[0] + vst1.64 {@XMM[8]}, [r0,:128] @ next round tweak + + vld1.8 {@XMM[6]-@XMM[7]}, [$inp]! + veor @XMM[5], @XMM[5], @XMM[13] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[6], @XMM[6], @XMM[14] + mov r5, $rounds @ pass rounds + veor @XMM[7], @XMM[7], @XMM[15] + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[6], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[4], @XMM[11] + vld1.64 {@XMM[14]-@XMM[15]}, [r0,:128]! + veor @XMM[10], @XMM[2], @XMM[12] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + veor @XMM[11], @XMM[7], @XMM[13] + veor @XMM[12], @XMM[3], @XMM[14] + vst1.8 {@XMM[10]-@XMM[11]}, [$out]! + veor @XMM[13], @XMM[5], @XMM[15] + vst1.8 {@XMM[12]-@XMM[13]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + + subs $len, #0x80 + bpl .Lxts_dec_loop + +.Lxts_dec_short: + adds $len, #0x70 + bmi .Lxts_dec_done + + vldmia $magic, {$twmask} @ load XTS magic + vshr.s64 @T[0], @XMM[8], #63 + mov r0, sp + vand @T[0], @T[0], $twmask +___ +for($i=9;$i<16;$i++) { +$code.=<<___; + vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1] + vst1.64 {@XMM[$i-1]}, [r0,:128]! + vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` + vshr.s64 @T[1], @XMM[$i], #63 + veor @XMM[$i], @XMM[$i], @T[0] + vand @T[1], @T[1], $twmask +___ + @T=reverse(@T); + +$code.=<<___ if ($i>=10); + vld1.8 {@XMM[$i-10]}, [$inp]! + subs $len, #0x10 + bmi .Lxts_dec_`$i-9` +___ +$code.=<<___ if ($i>=11); + veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3] +___ +} +$code.=<<___; + sub $len, #0x10 + vst1.64 {@XMM[15]}, [r0,:128] @ next round tweak + + vld1.8 {@XMM[6]}, [$inp]! + veor @XMM[5], @XMM[5], @XMM[13] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[6], @XMM[6], @XMM[14] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[6], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[4], @XMM[11] + vld1.64 {@XMM[14]}, [r0,:128]! + veor @XMM[10], @XMM[2], @XMM[12] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + veor @XMM[11], @XMM[7], @XMM[13] + veor @XMM[12], @XMM[3], @XMM[14] + vst1.8 {@XMM[10]-@XMM[11]}, [$out]! + vst1.8 {@XMM[12]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_6: + vst1.64 {@XMM[14]}, [r0,:128] @ next round tweak + + veor @XMM[4], @XMM[4], @XMM[12] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[5], @XMM[5], @XMM[13] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[6], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[4], @XMM[11] + veor @XMM[10], @XMM[2], @XMM[12] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + veor @XMM[11], @XMM[7], @XMM[13] + vst1.8 {@XMM[10]-@XMM[11]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_5: + vst1.64 {@XMM[13]}, [r0,:128] @ next round tweak + + veor @XMM[3], @XMM[3], @XMM[11] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[4], @XMM[4], @XMM[12] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + vld1.64 {@XMM[12]}, [r0,:128]! + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[6], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[4], @XMM[11] + veor @XMM[10], @XMM[2], @XMM[12] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + vst1.8 {@XMM[10]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_4: + vst1.64 {@XMM[12]}, [r0,:128] @ next round tweak + + veor @XMM[2], @XMM[2], @XMM[10] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[3], @XMM[3], @XMM[11] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[6], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[4], @XMM[11] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_3: + vst1.64 {@XMM[11]}, [r0,:128] @ next round tweak + + veor @XMM[1], @XMM[1], @XMM[9] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[2], @XMM[2], @XMM[10] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]! + vld1.64 {@XMM[10]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[6], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + vst1.8 {@XMM[8]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_2: + vst1.64 {@XMM[10]}, [r0,:128] @ next round tweak + + veor @XMM[0], @XMM[0], @XMM[8] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[1], @XMM[1], @XMM[9] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + veor @XMM[1], @XMM[1], @XMM[ 9] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_1: + mov r0, sp + veor @XMM[0], @XMM[8] + mov r1, sp + vst1.8 {@XMM[0]}, [sp,:128] + mov r2, $key + mov r4, $fp @ preserve fp + mov r5, $magic @ preserve magic + + bl AES_decrypt + + vld1.8 {@XMM[0]}, [sp,:128] + veor @XMM[0], @XMM[0], @XMM[8] + vst1.8 {@XMM[0]}, [$out]! + mov $fp, r4 + mov $magic, r5 + + vmov @XMM[8], @XMM[9] @ next round tweak + +.Lxts_dec_done: +#ifndef XTS_CHAIN_TWEAK + adds $len, #0x10 + beq .Lxts_dec_ret + + @ calculate one round of extra tweak for the stolen ciphertext + vldmia $magic, {$twmask} + vshr.s64 @XMM[6], @XMM[8], #63 + vand @XMM[6], @XMM[6], $twmask + vadd.u64 @XMM[9], @XMM[8], @XMM[8] + vswp `&Dhi("@XMM[6]")`,`&Dlo("@XMM[6]")` + veor @XMM[9], @XMM[9], @XMM[6] + + @ perform the final decryption with the last tweak value + vld1.8 {@XMM[0]}, [$inp]! + mov r0, sp + veor @XMM[0], @XMM[0], @XMM[9] + mov r1, sp + vst1.8 {@XMM[0]}, [sp,:128] + mov r2, $key + mov r4, $fp @ preserve fp + + bl AES_decrypt + + vld1.8 {@XMM[0]}, [sp,:128] + veor @XMM[0], @XMM[0], @XMM[9] + vst1.8 {@XMM[0]}, [$out] + + mov r6, $out +.Lxts_dec_steal: + ldrb r1, [$out] + ldrb r0, [$inp], #1 + strb r1, [$out, #0x10] + strb r0, [$out], #1 + + subs $len, #1 + bhi .Lxts_dec_steal + + vld1.8 {@XMM[0]}, [r6] + mov r0, sp + veor @XMM[0], @XMM[8] + mov r1, sp + vst1.8 {@XMM[0]}, [sp,:128] + mov r2, $key + + bl AES_decrypt + + vld1.8 {@XMM[0]}, [sp,:128] + veor @XMM[0], @XMM[0], @XMM[8] + vst1.8 {@XMM[0]}, [r6] + mov $fp, r4 +#endif + +.Lxts_dec_ret: + bic r0, $fp, #0xf + vmov.i32 q0, #0 + vmov.i32 q1, #0 +#ifdef XTS_CHAIN_TWEAK + ldr r1, [$fp, #0x20+VFP_ABI_FRAME] @ chain tweak +#endif +.Lxts_dec_bzero: @ wipe key schedule [if any] + vstmia sp!, {q0-q1} + cmp sp, r0 + bne .Lxts_dec_bzero + + mov sp, $fp +#ifdef XTS_CHAIN_TWEAK + vst1.8 {@XMM[8]}, [r1] +#endif + VFP_ABI_POP + ldmia sp!, {r4-r10, pc} @ return + +.size bsaes_xts_decrypt,.-bsaes_xts_decrypt +___ +} +$code.=<<___; +#endif +___ + +$code =~ s/\`([^\`]*)\`/eval($1)/gem; + +open SELF,$0; +while(<SELF>) { + next if (/^#!/); + last if (!s/^#/@/ and !/^$/); + print; +} +close SELF; + +print $code; + +close STDOUT; diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index d3db39860b9c..6577b8aeb711 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -24,6 +24,7 @@ generic-y += sembuf.h generic-y += serial.h generic-y += shmbuf.h generic-y += siginfo.h +generic-y += simd.h generic-y += sizes.h generic-y += socket.h generic-y += sockios.h diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 7e1f76027f66..72abdc541f38 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -19,6 +19,13 @@ #include <asm/unified.h> #include <asm/compiler.h> +#if __LINUX_ARM_ARCH__ < 6 +#include <asm-generic/uaccess-unaligned.h> +#else +#define __get_user_unaligned __get_user +#define __put_user_unaligned __put_user +#endif + #define VERIFY_READ 0 #define VERIFY_WRITE 1 diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 74ad15d1a065..bc6bd9683ba4 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -442,10 +442,10 @@ local_restart: ldrcc pc, [tbl, scno, lsl #2] @ call sys_* routine add r1, sp, #S_OFF - cmp scno, #(__ARM_NR_BASE - __NR_SYSCALL_BASE) +2: cmp scno, #(__ARM_NR_BASE - __NR_SYSCALL_BASE) eor r0, scno, #__NR_SYSCALL_BASE @ put OS number back bcs arm_syscall -2: mov why, #0 @ no longer a real syscall + mov why, #0 @ no longer a real syscall b sys_ni_syscall @ not private func #if defined(CONFIG_OABI_COMPAT) || !defined(CONFIG_AEABI) diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S index de23a9beed13..39f89fbd5111 100644 --- a/arch/arm/kernel/entry-header.S +++ b/arch/arm/kernel/entry-header.S @@ -329,10 +329,10 @@ #ifdef CONFIG_CONTEXT_TRACKING .if \save stmdb sp!, {r0-r3, ip, lr} - bl user_exit + bl context_tracking_user_exit ldmia sp!, {r0-r3, ip, lr} .else - bl user_exit + bl context_tracking_user_exit .endif #endif .endm @@ -341,10 +341,10 @@ #ifdef CONFIG_CONTEXT_TRACKING .if \save stmdb sp!, {r0-r3, ip, lr} - bl user_enter + bl context_tracking_user_enter ldmia sp!, {r0-r3, ip, lr} .else - bl user_enter + bl context_tracking_user_enter .endif #endif .endm diff --git a/arch/arm/mach-imx/clk-fixup-mux.c b/arch/arm/mach-imx/clk-fixup-mux.c index deb4b8093b30..0d40b35c557c 100644 --- a/arch/arm/mach-imx/clk-fixup-mux.c +++ b/arch/arm/mach-imx/clk-fixup-mux.c @@ -90,6 +90,7 @@ struct clk *imx_clk_fixup_mux(const char *name, void __iomem *reg, init.ops = &clk_fixup_mux_ops; init.parent_names = parents; init.num_parents = num_parents; + init.flags = 0; fixup_mux->mux.reg = reg; fixup_mux->mux.shift = shift; diff --git a/arch/arm/mach-imx/clk-imx27.c b/arch/arm/mach-imx/clk-imx27.c index c3cfa4116dc0..c6b40f386786 100644 --- a/arch/arm/mach-imx/clk-imx27.c +++ b/arch/arm/mach-imx/clk-imx27.c @@ -285,7 +285,7 @@ int __init mx27_clocks_init(unsigned long fref) clk_register_clkdev(clk[ata_ahb_gate], "ata", NULL); clk_register_clkdev(clk[rtc_ipg_gate], NULL, "imx21-rtc"); clk_register_clkdev(clk[scc_ipg_gate], "scc", NULL); - clk_register_clkdev(clk[cpu_div], NULL, "cpufreq-cpu0.0"); + clk_register_clkdev(clk[cpu_div], NULL, "cpu0"); clk_register_clkdev(clk[emi_ahb_gate], "emi_ahb" , NULL); mxc_timer_init(MX27_IO_ADDRESS(MX27_GPT1_BASE_ADDR), MX27_INT_GPT1); diff --git a/arch/arm/mach-imx/clk-imx51-imx53.c b/arch/arm/mach-imx/clk-imx51-imx53.c index 1a56a3319997..7c0dc4540aa4 100644 --- a/arch/arm/mach-imx/clk-imx51-imx53.c +++ b/arch/arm/mach-imx/clk-imx51-imx53.c @@ -328,7 +328,7 @@ static void __init mx5_clocks_common_init(unsigned long rate_ckil, clk_register_clkdev(clk[ssi2_ipg_gate], NULL, "imx-ssi.1"); clk_register_clkdev(clk[ssi3_ipg_gate], NULL, "imx-ssi.2"); clk_register_clkdev(clk[sdma_gate], NULL, "imx35-sdma"); - clk_register_clkdev(clk[cpu_podf], NULL, "cpufreq-cpu0.0"); + clk_register_clkdev(clk[cpu_podf], NULL, "cpu0"); clk_register_clkdev(clk[iim_gate], "iim", NULL); clk_register_clkdev(clk[dummy], NULL, "imx2-wdt.0"); clk_register_clkdev(clk[dummy], NULL, "imx2-wdt.1"); @@ -397,7 +397,7 @@ int __init mx51_clocks_init(unsigned long rate_ckil, unsigned long rate_osc, mx51_spdif_xtal_sel, ARRAY_SIZE(mx51_spdif_xtal_sel)); clk[spdif1_sel] = imx_clk_mux("spdif1_sel", MXC_CCM_CSCMR2, 2, 2, spdif_sel, ARRAY_SIZE(spdif_sel)); - clk[spdif1_pred] = imx_clk_divider("spdif1_podf", "spdif1_sel", MXC_CCM_CDCDR, 16, 3); + clk[spdif1_pred] = imx_clk_divider("spdif1_pred", "spdif1_sel", MXC_CCM_CDCDR, 16, 3); clk[spdif1_podf] = imx_clk_divider("spdif1_podf", "spdif1_pred", MXC_CCM_CDCDR, 9, 6); clk[spdif1_com_sel] = imx_clk_mux("spdif1_com_sel", MXC_CCM_CSCMR2, 5, 1, mx51_spdif1_com_sel, ARRAY_SIZE(mx51_spdif1_com_sel)); diff --git a/arch/arm/mach-imx/mach-imx6q.c b/arch/arm/mach-imx/mach-imx6q.c index 85a1b51346c8..90372a21087f 100644 --- a/arch/arm/mach-imx/mach-imx6q.c +++ b/arch/arm/mach-imx/mach-imx6q.c @@ -233,10 +233,15 @@ put_node: of_node_put(np); } -static void __init imx6q_opp_init(struct device *cpu_dev) +static void __init imx6q_opp_init(void) { struct device_node *np; + struct device *cpu_dev = get_cpu_device(0); + if (!cpu_dev) { + pr_warn("failed to get cpu0 device\n"); + return; + } np = of_node_get(cpu_dev->of_node); if (!np) { pr_warn("failed to find cpu0 node\n"); @@ -268,7 +273,7 @@ static void __init imx6q_init_late(void) imx6q_cpuidle_init(); if (IS_ENABLED(CONFIG_ARM_IMX6Q_CPUFREQ)) { - imx6q_opp_init(&imx6q_cpufreq_pdev.dev); + imx6q_opp_init(); platform_device_register(&imx6q_cpufreq_pdev); } } diff --git a/arch/arm/mach-imx/system.c b/arch/arm/mach-imx/system.c index 64ff37ea72b1..80c177c36c5f 100644 --- a/arch/arm/mach-imx/system.c +++ b/arch/arm/mach-imx/system.c @@ -117,6 +117,17 @@ void __init imx_init_l2cache(void) /* Configure the L2 PREFETCH and POWER registers */ val = readl_relaxed(l2x0_base + L2X0_PREFETCH_CTRL); val |= 0x70800000; + /* + * The L2 cache controller(PL310) version on the i.MX6D/Q is r3p1-50rel0 + * The L2 cache controller(PL310) version on the i.MX6DL/SOLO/SL is r3p2 + * But according to ARM PL310 errata: 752271 + * ID: 752271: Double linefill feature can cause data corruption + * Fault Status: Present in: r3p0, r3p1, r3p1-50rel0. Fixed in r3p2 + * Workaround: The only workaround to this erratum is to disable the + * double linefill feature. This is the default behavior. + */ + if (cpu_is_imx6q()) + val &= ~(1 << 30 | 1 << 23); writel_relaxed(val, l2x0_base + L2X0_PREFETCH_CTRL); val = L2X0_DYNAMIC_CLK_GATING_EN | L2X0_STNDBY_MODE_EN; writel_relaxed(val, l2x0_base + L2X0_POWER_CTRL); diff --git a/arch/arm/mach-omap2/cclock44xx_data.c b/arch/arm/mach-omap2/cclock44xx_data.c index 1d5b5290d2af..b237950eb8a3 100644 --- a/arch/arm/mach-omap2/cclock44xx_data.c +++ b/arch/arm/mach-omap2/cclock44xx_data.c @@ -1632,7 +1632,7 @@ static struct omap_clk omap44xx_clks[] = { CLK(NULL, "auxclk5_src_ck", &auxclk5_src_ck), CLK(NULL, "auxclk5_ck", &auxclk5_ck), CLK(NULL, "auxclkreq5_ck", &auxclkreq5_ck), - CLK("omap-gpmc", "fck", &dummy_ck), + CLK("50000000.gpmc", "fck", &dummy_ck), CLK("omap_i2c.1", "ick", &dummy_ck), CLK("omap_i2c.2", "ick", &dummy_ck), CLK("omap_i2c.3", "ick", &dummy_ck), diff --git a/arch/arm/mach-omap2/cpuidle44xx.c b/arch/arm/mach-omap2/cpuidle44xx.c index c443f2e97e10..4c8982ae9529 100644 --- a/arch/arm/mach-omap2/cpuidle44xx.c +++ b/arch/arm/mach-omap2/cpuidle44xx.c @@ -143,7 +143,7 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev, * Call idle CPU cluster PM exit notifier chain * to restore GIC and wakeupgen context. */ - if ((cx->mpu_state == PWRDM_POWER_RET) && + if (dev->cpu == 0 && (cx->mpu_state == PWRDM_POWER_RET) && (cx->mpu_logic_state == PWRDM_POWER_OFF)) cpu_cluster_pm_exit(); diff --git a/arch/arm/mach-omap2/gpmc.c b/arch/arm/mach-omap2/gpmc.c index 9f4795aff48a..579697adaae7 100644 --- a/arch/arm/mach-omap2/gpmc.c +++ b/arch/arm/mach-omap2/gpmc.c @@ -1491,8 +1491,8 @@ static int gpmc_probe_generic_child(struct platform_device *pdev, */ ret = gpmc_cs_remap(cs, res.start); if (ret < 0) { - dev_err(&pdev->dev, "cannot remap GPMC CS %d to 0x%x\n", - cs, res.start); + dev_err(&pdev->dev, "cannot remap GPMC CS %d to %pa\n", + cs, &res.start); goto err; } diff --git a/arch/arm/mach-omap2/mux34xx.c b/arch/arm/mach-omap2/mux34xx.c index c53609f46294..be271f1d585b 100644 --- a/arch/arm/mach-omap2/mux34xx.c +++ b/arch/arm/mach-omap2/mux34xx.c @@ -620,7 +620,7 @@ static struct omap_mux __initdata omap3_muxmodes[] = { "uart1_rts", "ssi1_flag_tx", NULL, NULL, "gpio_149", NULL, NULL, "safe_mode"), _OMAP3_MUXENTRY(UART1_RX, 151, - "uart1_rx", "ss1_wake_tx", "mcbsp1_clkr", "mcspi4_clk", + "uart1_rx", "ssi1_wake_tx", "mcbsp1_clkr", "mcspi4_clk", "gpio_151", NULL, NULL, "safe_mode"), _OMAP3_MUXENTRY(UART1_TX, 148, "uart1_tx", "ssi1_dat_tx", NULL, NULL, diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c index 8708b2a9da45..891211093295 100644 --- a/arch/arm/mach-omap2/omap-smp.c +++ b/arch/arm/mach-omap2/omap-smp.c @@ -1,5 +1,5 @@ /* - * OMAP4 SMP source file. It contains platform specific fucntions + * OMAP4 SMP source file. It contains platform specific functions * needed for the linux smp kernel. * * Copyright (C) 2009 Texas Instruments, Inc. diff --git a/arch/arm/mach-omap2/omap_device.c b/arch/arm/mach-omap2/omap_device.c index f99f68e1e85b..b69dd9abb50a 100644 --- a/arch/arm/mach-omap2/omap_device.c +++ b/arch/arm/mach-omap2/omap_device.c @@ -158,7 +158,7 @@ static int omap_device_build_from_dt(struct platform_device *pdev) } od = omap_device_alloc(pdev, hwmods, oh_cnt); - if (!od) { + if (IS_ERR(od)) { dev_err(&pdev->dev, "Cannot allocate omap_device for :%s\n", oh_name); ret = PTR_ERR(od); diff --git a/arch/arm/mach-sa1100/collie.c b/arch/arm/mach-sa1100/collie.c index 612a45689770..7fb96ebdc0fb 100644 --- a/arch/arm/mach-sa1100/collie.c +++ b/arch/arm/mach-sa1100/collie.c @@ -289,7 +289,7 @@ static void collie_flash_exit(void) } static struct flash_platform_data collie_flash_data = { - .map_name = "cfi_probe", + .map_name = "jedec_probe", .init = collie_flash_init, .set_vpp = collie_set_vpp, .exit = collie_flash_exit, diff --git a/arch/arm/mach-shmobile/clock-r8a73a4.c b/arch/arm/mach-shmobile/clock-r8a73a4.c index 8ea5ef6c79cc..5bd2e851e3c7 100644 --- a/arch/arm/mach-shmobile/clock-r8a73a4.c +++ b/arch/arm/mach-shmobile/clock-r8a73a4.c @@ -555,7 +555,7 @@ static struct clk_lookup lookups[] = { CLKDEV_CON_ID("pll2h", &pll2h_clk), /* CPU clock */ - CLKDEV_DEV_ID("cpufreq-cpu0", &z_clk), + CLKDEV_DEV_ID("cpu0", &z_clk), /* DIV6 */ CLKDEV_CON_ID("zb", &div6_clks[DIV6_ZB]), diff --git a/arch/arm/mach-shmobile/clock-sh73a0.c b/arch/arm/mach-shmobile/clock-sh73a0.c index 1942eaef5181..c92c023f0d27 100644 --- a/arch/arm/mach-shmobile/clock-sh73a0.c +++ b/arch/arm/mach-shmobile/clock-sh73a0.c @@ -616,7 +616,7 @@ static struct clk_lookup lookups[] = { CLKDEV_DEV_ID("smp_twd", &twd_clk), /* smp_twd */ /* DIV4 clocks */ - CLKDEV_DEV_ID("cpufreq-cpu0", &div4_clks[DIV4_Z]), + CLKDEV_DEV_ID("cpu0", &div4_clks[DIV4_Z]), /* DIV6 clocks */ CLKDEV_CON_ID("vck1_clk", &div6_clks[DIV6_VCK1]), diff --git a/arch/arm/mach-u300/Kconfig b/arch/arm/mach-u300/Kconfig index a85adcd00882..a1659863bfd5 100644 --- a/arch/arm/mach-u300/Kconfig +++ b/arch/arm/mach-u300/Kconfig @@ -1,7 +1,3 @@ -menu "ST-Ericsson AB U300/U335 Platform" - -comment "ST-Ericsson Mobile Platform Products" - config ARCH_U300 bool "ST-Ericsson U300 Series" if ARCH_MULTI_V5 depends on MMU @@ -25,7 +21,9 @@ config ARCH_U300 help Support for ST-Ericsson U300 series mobile platforms. -comment "ST-Ericsson U300/U335 Feature Selections" +if ARCH_U300 + +menu "ST-Ericsson AB U300/U335 Platform" config MACH_U300 depends on ARCH_U300 @@ -53,3 +51,5 @@ config MACH_U300_SPIDUMMY SPI framework and ARM PL022 support. endmenu + +endif diff --git a/arch/arm/mach-ux500/cache-l2x0.c b/arch/arm/mach-ux500/cache-l2x0.c index 82ccf1d98735..264f894c0e3d 100644 --- a/arch/arm/mach-ux500/cache-l2x0.c +++ b/arch/arm/mach-ux500/cache-l2x0.c @@ -69,6 +69,7 @@ static int __init ux500_l2x0_init(void) * some SMI service available. */ outer_cache.disable = NULL; + outer_cache.set_debug = NULL; return 0; } diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 6d4482fa35bc..e2950b098e76 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -43,6 +43,6 @@ COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\ COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV) -extern unsigned int elf_hwcap; +extern unsigned long elf_hwcap; #endif #endif diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 57fb55c44c90..7ae8a1f00c3c 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -143,15 +143,26 @@ void machine_restart(char *cmd) void __show_regs(struct pt_regs *regs) { - int i; + int i, top_reg; + u64 lr, sp; + + if (compat_user_mode(regs)) { + lr = regs->compat_lr; + sp = regs->compat_sp; + top_reg = 12; + } else { + lr = regs->regs[30]; + sp = regs->sp; + top_reg = 29; + } show_regs_print_info(KERN_DEFAULT); print_symbol("PC is at %s\n", instruction_pointer(regs)); - print_symbol("LR is at %s\n", regs->regs[30]); + print_symbol("LR is at %s\n", lr); printk("pc : [<%016llx>] lr : [<%016llx>] pstate: %08llx\n", - regs->pc, regs->regs[30], regs->pstate); - printk("sp : %016llx\n", regs->sp); - for (i = 29; i >= 0; i--) { + regs->pc, lr, regs->pstate); + printk("sp : %016llx\n", sp); + for (i = top_reg; i >= 0; i--) { printk("x%-2d: %016llx ", i, regs->regs[i]); if (i % 2 == 0) printk("\n"); diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 12ad8f3d0cfd..055cfb80e05c 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -57,7 +57,7 @@ unsigned int processor_id; EXPORT_SYMBOL(processor_id); -unsigned int elf_hwcap __read_mostly; +unsigned long elf_hwcap __read_mostly; EXPORT_SYMBOL_GPL(elf_hwcap); static const char *cpu_name; diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 6d6acf153bff..c23751b06120 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -130,7 +130,7 @@ static void __do_user_fault(struct task_struct *tsk, unsigned long addr, force_sig_info(sig, &si, tsk); } -void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs) +static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs) { struct task_struct *tsk = current; struct mm_struct *mm = tsk->active_mm; diff --git a/arch/mips/Makefile b/arch/mips/Makefile index 75a36ad11ff5..ca8f8340d75f 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -288,9 +288,6 @@ endif vmlinux.32: vmlinux $(OBJCOPY) -O $(32bit-bfd) $(OBJCOPYFLAGS) $< $@ - -#obj-$(CONFIG_KPROBES) += kprobes.o - # # The 64-bit ELF tools are pretty broken so at this time we generate 64-bit # ELF files from 32-bit files by conversion. diff --git a/arch/mips/alchemy/common/usb.c b/arch/mips/alchemy/common/usb.c index fcc695626117..2adc7edda49c 100644 --- a/arch/mips/alchemy/common/usb.c +++ b/arch/mips/alchemy/common/usb.c @@ -14,6 +14,7 @@ #include <linux/module.h> #include <linux/spinlock.h> #include <linux/syscore_ops.h> +#include <asm/cpu.h> #include <asm/mach-au1x00/au1000.h> /* control register offsets */ @@ -358,7 +359,7 @@ static inline int au1200_coherency_bug(void) { #if defined(CONFIG_DMA_COHERENT) /* Au1200 AB USB does not support coherent memory */ - if (!(read_c0_prid() & 0xff)) { + if (!(read_c0_prid() & PRID_REV_MASK)) { printk(KERN_INFO "Au1200 USB: this is chip revision AB !!\n"); printk(KERN_INFO "Au1200 USB: update your board or re-configure" " the kernel\n"); diff --git a/arch/mips/bcm63xx/cpu.c b/arch/mips/bcm63xx/cpu.c index 7e17374a9ae8..b713cd64b087 100644 --- a/arch/mips/bcm63xx/cpu.c +++ b/arch/mips/bcm63xx/cpu.c @@ -306,14 +306,14 @@ void __init bcm63xx_cpu_init(void) switch (c->cputype) { case CPU_BMIPS3300: - if ((read_c0_prid() & 0xff00) != PRID_IMP_BMIPS3300_ALT) + if ((read_c0_prid() & PRID_IMP_MASK) != PRID_IMP_BMIPS3300_ALT) __cpu_name[cpu] = "Broadcom BCM6338"; /* fall-through */ case CPU_BMIPS32: chipid_reg = BCM_6345_PERF_BASE; break; case CPU_BMIPS4350: - switch ((read_c0_prid() & 0xff)) { + switch ((read_c0_prid() & PRID_REV_MASK)) { case 0x04: chipid_reg = BCM_3368_PERF_BASE; break; diff --git a/arch/mips/boot/dts/include/dt-bindings b/arch/mips/boot/dts/include/dt-bindings index 68ae3887b3e5..08c00e4972fa 120000 --- a/arch/mips/boot/dts/include/dt-bindings +++ b/arch/mips/boot/dts/include/dt-bindings @@ -1 +1 @@ -../../../../../include/dt-bindings +../../../../../include/dt-bindings
\ No newline at end of file diff --git a/arch/mips/cavium-octeon/csrc-octeon.c b/arch/mips/cavium-octeon/csrc-octeon.c index 02193953eb9e..b752c4ed0b79 100644 --- a/arch/mips/cavium-octeon/csrc-octeon.c +++ b/arch/mips/cavium-octeon/csrc-octeon.c @@ -12,6 +12,7 @@ #include <linux/smp.h> #include <asm/cpu-info.h> +#include <asm/cpu-type.h> #include <asm/time.h> #include <asm/octeon/octeon.h> diff --git a/arch/mips/dec/prom/init.c b/arch/mips/dec/prom/init.c index ab169046e442..468f665de7bb 100644 --- a/arch/mips/dec/prom/init.c +++ b/arch/mips/dec/prom/init.c @@ -13,6 +13,7 @@ #include <asm/bootinfo.h> #include <asm/cpu.h> +#include <asm/cpu-type.h> #include <asm/processor.h> #include <asm/dec/prom.h> diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h index fa44f3ec5302..d445d060e346 100644 --- a/arch/mips/include/asm/cpu-features.h +++ b/arch/mips/include/asm/cpu-features.h @@ -13,12 +13,6 @@ #include <asm/cpu-info.h> #include <cpu-feature-overrides.h> -#ifndef current_cpu_type -#define current_cpu_type() current_cpu_data.cputype -#endif - -#define boot_cpu_type() cpu_data[0].cputype - /* * SMP assumption: Options of CPU 0 are a superset of all processors. * This is true for all known MIPS systems. @@ -193,7 +187,7 @@ /* * MIPS32, MIPS64, VR5500, IDT32332, IDT32334 and maybe a few other - * pre-MIPS32/MIPS53 processors have CLO, CLZ. The IDT RC64574 is 64-bit and + * pre-MIPS32/MIPS64 processors have CLO, CLZ. The IDT RC64574 is 64-bit and * has CLO and CLZ but not DCLO nor DCLZ. For 64-bit kernels * cpu_has_clo_clz also indicates the availability of DCLO and DCLZ. */ diff --git a/arch/mips/include/asm/cpu-info.h b/arch/mips/include/asm/cpu-info.h index 41401d8eb7d1..21c8e29c8f91 100644 --- a/arch/mips/include/asm/cpu-info.h +++ b/arch/mips/include/asm/cpu-info.h @@ -84,6 +84,7 @@ struct cpuinfo_mips { extern struct cpuinfo_mips cpu_data[]; #define current_cpu_data cpu_data[smp_processor_id()] #define raw_current_cpu_data cpu_data[raw_smp_processor_id()] +#define boot_cpu_data cpu_data[0] extern void cpu_probe(void); extern void cpu_report(void); diff --git a/arch/mips/include/asm/cpu-type.h b/arch/mips/include/asm/cpu-type.h new file mode 100644 index 000000000000..4a402cc60c03 --- /dev/null +++ b/arch/mips/include/asm/cpu-type.h @@ -0,0 +1,203 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2003, 2004 Ralf Baechle + * Copyright (C) 2004 Maciej W. Rozycki + */ +#ifndef __ASM_CPU_TYPE_H +#define __ASM_CPU_TYPE_H + +#include <linux/smp.h> +#include <linux/compiler.h> + +static inline int __pure __get_cpu_type(const int cpu_type) +{ + switch (cpu_type) { +#if defined(CONFIG_SYS_HAS_CPU_LOONGSON2E) || \ + defined(CONFIG_SYS_HAS_CPU_LOONGSON2F) + case CPU_LOONGSON2: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_LOONGSON1B + case CPU_LOONGSON1: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_MIPS32_R1 + case CPU_4KC: + case CPU_ALCHEMY: + case CPU_BMIPS3300: + case CPU_BMIPS4350: + case CPU_PR4450: + case CPU_BMIPS32: + case CPU_JZRISC: +#endif + +#if defined(CONFIG_SYS_HAS_CPU_MIPS32_R1) || \ + defined(CONFIG_SYS_HAS_CPU_MIPS32_R2) + case CPU_4KEC: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_MIPS32_R2 + case CPU_4KSC: + case CPU_24K: + case CPU_34K: + case CPU_1004K: + case CPU_74K: + case CPU_M14KC: + case CPU_M14KEC: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_MIPS64_R1 + case CPU_5KC: + case CPU_5KE: + case CPU_20KC: + case CPU_25KF: + case CPU_SB1: + case CPU_SB1A: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_MIPS64_R2 + /* + * All MIPS64 R2 processors have their own special symbols. That is, + * there currently is no pure R2 core + */ +#endif + +#ifdef CONFIG_SYS_HAS_CPU_R3000 + case CPU_R2000: + case CPU_R3000: + case CPU_R3000A: + case CPU_R3041: + case CPU_R3051: + case CPU_R3052: + case CPU_R3081: + case CPU_R3081E: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_TX39XX + case CPU_TX3912: + case CPU_TX3922: + case CPU_TX3927: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_VR41XX + case CPU_VR41XX: + case CPU_VR4111: + case CPU_VR4121: + case CPU_VR4122: + case CPU_VR4131: + case CPU_VR4133: + case CPU_VR4181: + case CPU_VR4181A: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_R4300 + case CPU_R4300: + case CPU_R4310: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_R4X00 + case CPU_R4000PC: + case CPU_R4000SC: + case CPU_R4000MC: + case CPU_R4200: + case CPU_R4400PC: + case CPU_R4400SC: + case CPU_R4400MC: + case CPU_R4600: + case CPU_R4700: + case CPU_R4640: + case CPU_R4650: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_TX49XX + case CPU_TX49XX: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_R5000 + case CPU_R5000: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_R5432 + case CPU_R5432: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_R5500 + case CPU_R5500: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_R6000 + case CPU_R6000: + case CPU_R6000A: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_NEVADA + case CPU_NEVADA: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_R8000 + case CPU_R8000: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_R10000 + case CPU_R10000: + case CPU_R12000: + case CPU_R14000: +#endif +#ifdef CONFIG_SYS_HAS_CPU_RM7000 + case CPU_RM7000: + case CPU_SR71000: +#endif +#ifdef CONFIG_SYS_HAS_CPU_RM9000 + case CPU_RM9000: +#endif +#ifdef CONFIG_SYS_HAS_CPU_SB1 + case CPU_SB1: + case CPU_SB1A: +#endif +#ifdef CONFIG_SYS_HAS_CPU_CAVIUM_OCTEON + case CPU_CAVIUM_OCTEON: + case CPU_CAVIUM_OCTEON_PLUS: + case CPU_CAVIUM_OCTEON2: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_BMIPS4380 + case CPU_BMIPS4380: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_BMIPS5000 + case CPU_BMIPS5000: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_XLP + case CPU_XLP: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_XLR + case CPU_XLR: +#endif + break; + default: + unreachable(); + } + + return cpu_type; +} + +static inline int __pure current_cpu_type(void) +{ + const int cpu_type = current_cpu_data.cputype; + + return __get_cpu_type(cpu_type); +} + +static inline int __pure boot_cpu_type(void) +{ + const int cpu_type = cpu_data[0].cputype; + + return __get_cpu_type(cpu_type); +} + +#endif /* __ASM_CPU_TYPE_H */ diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h index 71b9f1998be7..d2035e16502a 100644 --- a/arch/mips/include/asm/cpu.h +++ b/arch/mips/include/asm/cpu.h @@ -3,15 +3,14 @@ * various MIPS cpu types. * * Copyright (C) 1996 David S. Miller (davem@davemloft.net) - * Copyright (C) 2004 Maciej W. Rozycki + * Copyright (C) 2004, 2013 Maciej W. Rozycki */ #ifndef _ASM_CPU_H #define _ASM_CPU_H -/* Assigned Company values for bits 23:16 of the PRId Register - (CP0 register 15, select 0). As of the MIPS32 and MIPS64 specs from - MTI, the PRId register is defined in this (backwards compatible) - way: +/* + As of the MIPS32 and MIPS64 specs from MTI, the PRId register (CP0 + register 15, select 0) is defined in this (backwards compatible) way: +----------------+----------------+----------------+----------------+ | Company Options| Company ID | Processor ID | Revision | @@ -23,6 +22,14 @@ spec. */ +#define PRID_OPT_MASK 0xff000000 + +/* + * Assigned Company values for bits 23:16 of the PRId register. + */ + +#define PRID_COMP_MASK 0xff0000 + #define PRID_COMP_LEGACY 0x000000 #define PRID_COMP_MIPS 0x010000 #define PRID_COMP_BROADCOM 0x020000 @@ -38,10 +45,17 @@ #define PRID_COMP_INGENIC 0xd00000 /* - * Assigned values for the product ID register. In order to detect a - * certain CPU type exactly eventually additional registers may need to - * be examined. These are valid when 23:16 == PRID_COMP_LEGACY + * Assigned Processor ID (implementation) values for bits 15:8 of the PRId + * register. In order to detect a certain CPU type exactly eventually + * additional registers may need to be examined. */ + +#define PRID_IMP_MASK 0xff00 + +/* + * These are valid when 23:16 == PRID_COMP_LEGACY + */ + #define PRID_IMP_R2000 0x0100 #define PRID_IMP_AU1_REV1 0x0100 #define PRID_IMP_AU1_REV2 0x0200 @@ -182,11 +196,15 @@ #define PRID_IMP_NETLOGIC_XLP2XX 0x1200 /* - * Definitions for 7:0 on legacy processors + * Particular Revision values for bits 7:0 of the PRId register. */ #define PRID_REV_MASK 0x00ff +/* + * Definitions for 7:0 on legacy processors + */ + #define PRID_REV_TX4927 0x0022 #define PRID_REV_TX4937 0x0030 #define PRID_REV_R4400 0x0040 @@ -227,6 +245,8 @@ * 31 16 15 8 7 0 */ +#define FPIR_IMP_MASK 0xff00 + #define FPIR_IMP_NONE 0x0000 enum cpu_type_enum { diff --git a/arch/mips/include/asm/mach-au1x00/au1000.h b/arch/mips/include/asm/mach-au1x00/au1000.h index 3e11a468cdf8..54f9e84db8ac 100644 --- a/arch/mips/include/asm/mach-au1x00/au1000.h +++ b/arch/mips/include/asm/mach-au1x00/au1000.h @@ -43,6 +43,8 @@ #include <linux/io.h> #include <linux/irq.h> +#include <asm/cpu.h> + /* cpu pipeline flush */ void static inline au_sync(void) { @@ -140,7 +142,7 @@ static inline int au1xxx_cpu_needs_config_od(void) static inline int alchemy_get_cputype(void) { - switch (read_c0_prid() & 0xffff0000) { + switch (read_c0_prid() & (PRID_OPT_MASK | PRID_COMP_MASK)) { case 0x00030000: return ALCHEMY_CPU_AU1000; break; diff --git a/arch/mips/include/asm/mach-ip22/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ip22/cpu-feature-overrides.h index f4caacd25552..1bcb6421205e 100644 --- a/arch/mips/include/asm/mach-ip22/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ip22/cpu-feature-overrides.h @@ -8,6 +8,8 @@ #ifndef __ASM_MACH_IP22_CPU_FEATURE_OVERRIDES_H #define __ASM_MACH_IP22_CPU_FEATURE_OVERRIDES_H +#include <asm/cpu.h> + /* * IP22 with a variety of processors so we can't use defaults for everything. */ diff --git a/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h index 1d2b6ff60d33..d6111aa2e886 100644 --- a/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h @@ -8,6 +8,8 @@ #ifndef __ASM_MACH_IP27_CPU_FEATURE_OVERRIDES_H #define __ASM_MACH_IP27_CPU_FEATURE_OVERRIDES_H +#include <asm/cpu.h> + /* * IP27 only comes with R10000 family processors all using the same config */ diff --git a/arch/mips/include/asm/mach-ip28/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ip28/cpu-feature-overrides.h index 65e9c856390d..4cec06d133db 100644 --- a/arch/mips/include/asm/mach-ip28/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ip28/cpu-feature-overrides.h @@ -9,6 +9,8 @@ #ifndef __ASM_MACH_IP28_CPU_FEATURE_OVERRIDES_H #define __ASM_MACH_IP28_CPU_FEATURE_OVERRIDES_H +#include <asm/cpu.h> + /* * IP28 only comes with R10000 family processors all using the same config */ diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h index fed1c3e9b486..e0331414c7d6 100644 --- a/arch/mips/include/asm/mipsregs.h +++ b/arch/mips/include/asm/mipsregs.h @@ -603,6 +603,13 @@ #define MIPS_CONF4_MMUEXTDEF (_ULCAST_(3) << 14) #define MIPS_CONF4_MMUEXTDEF_MMUSIZEEXT (_ULCAST_(1) << 14) +#define MIPS_CONF5_NF (_ULCAST_(1) << 0) +#define MIPS_CONF5_UFR (_ULCAST_(1) << 2) +#define MIPS_CONF5_MSAEN (_ULCAST_(1) << 27) +#define MIPS_CONF5_EVA (_ULCAST_(1) << 28) +#define MIPS_CONF5_CV (_ULCAST_(1) << 29) +#define MIPS_CONF5_K (_ULCAST_(1) << 30) + #define MIPS_CONF6_SYND (_ULCAST_(1) << 13) #define MIPS_CONF7_WII (_ULCAST_(1) << 31) diff --git a/arch/mips/include/asm/pci.h b/arch/mips/include/asm/pci.h index f194c08bd057..12d6842962be 100644 --- a/arch/mips/include/asm/pci.h +++ b/arch/mips/include/asm/pci.h @@ -83,6 +83,18 @@ static inline void pcibios_penalize_isa_irq(int irq, int active) extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, enum pci_mmap_state mmap_state, int write_combine); +#define HAVE_ARCH_PCI_RESOURCE_TO_USER + +static inline void pci_resource_to_user(const struct pci_dev *dev, int bar, + const struct resource *rsrc, resource_size_t *start, + resource_size_t *end) +{ + phys_t size = resource_size(rsrc); + + *start = fixup_bigphys_addr(rsrc->start, size); + *end = rsrc->start + size; +} + /* * Dynamic DMA mapping stuff. * MIPS has everything mapped statically. diff --git a/arch/mips/include/asm/timex.h b/arch/mips/include/asm/timex.h index 6529704aa73a..c5424757da65 100644 --- a/arch/mips/include/asm/timex.h +++ b/arch/mips/include/asm/timex.h @@ -10,7 +10,9 @@ #ifdef __KERNEL__ +#include <asm/cpu-features.h> #include <asm/mipsregs.h> +#include <asm/cpu-type.h> /* * This is the clock rate of the i8253 PIT. A MIPS system may not have @@ -33,9 +35,38 @@ typedef unsigned int cycles_t; +/* + * On R4000/R4400 before version 5.0 an erratum exists such that if the + * cycle counter is read in the exact moment that it is matching the + * compare register, no interrupt will be generated. + * + * There is a suggested workaround and also the erratum can't strike if + * the compare interrupt isn't being used as the clock source device. + * However for now the implementaton of this function doesn't get these + * fine details right. + */ static inline cycles_t get_cycles(void) { - return 0; + switch (boot_cpu_type()) { + case CPU_R4400PC: + case CPU_R4400SC: + case CPU_R4400MC: + if ((read_c0_prid() & 0xff) >= 0x0050) + return read_c0_count(); + break; + + case CPU_R4000PC: + case CPU_R4000SC: + case CPU_R4000MC: + break; + + default: + if (cpu_has_counter) + return read_c0_count(); + break; + } + + return 0; /* no usable counter */ } #endif /* __KERNEL__ */ diff --git a/arch/mips/include/asm/vga.h b/arch/mips/include/asm/vga.h index f4cff7e4fa8a..f82c83749a08 100644 --- a/arch/mips/include/asm/vga.h +++ b/arch/mips/include/asm/vga.h @@ -6,6 +6,7 @@ #ifndef _ASM_VGA_H #define _ASM_VGA_H +#include <asm/addrspace.h> #include <asm/byteorder.h> /* @@ -13,7 +14,7 @@ * access the videoram directly without any black magic. */ -#define VGA_MAP_MEM(x, s) (0xb0000000L + (unsigned long)(x)) +#define VGA_MAP_MEM(x, s) CKSEG1ADDR(0x10000000L + (unsigned long)(x)) #define vga_readb(x) (*(x)) #define vga_writeb(x, y) (*(y) = (x)) diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index 37663c7862a5..5465dc183e5a 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -20,6 +20,7 @@ #include <asm/bugs.h> #include <asm/cpu.h> +#include <asm/cpu-type.h> #include <asm/fpu.h> #include <asm/mipsregs.h> #include <asm/watch.h> @@ -55,7 +56,7 @@ static inline void check_errata(void) { struct cpuinfo_mips *c = ¤t_cpu_data; - switch (c->cputype) { + switch (current_cpu_type()) { case CPU_34K: /* * Erratum "RPS May Cause Incorrect Instruction Execution" @@ -122,7 +123,7 @@ static inline unsigned long cpu_get_fpu_id(void) */ static inline int __cpu_has_fpu(void) { - return ((cpu_get_fpu_id() & 0xff00) != FPIR_IMP_NONE); + return ((cpu_get_fpu_id() & FPIR_IMP_MASK) != FPIR_IMP_NONE); } static inline void cpu_probe_vmbits(struct cpuinfo_mips *c) @@ -290,6 +291,17 @@ static inline unsigned int decode_config4(struct cpuinfo_mips *c) return config4 & MIPS_CONF_M; } +static inline unsigned int decode_config5(struct cpuinfo_mips *c) +{ + unsigned int config5; + + config5 = read_c0_config5(); + config5 &= ~MIPS_CONF5_UFR; + write_c0_config5(config5); + + return config5 & MIPS_CONF_M; +} + static void decode_configs(struct cpuinfo_mips *c) { int ok; @@ -310,6 +322,8 @@ static void decode_configs(struct cpuinfo_mips *c) ok = decode_config3(c); if (ok) ok = decode_config4(c); + if (ok) + ok = decode_config5(c); mips_probe_watch_registers(c); @@ -322,7 +336,7 @@ static void decode_configs(struct cpuinfo_mips *c) static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu) { - switch (c->processor_id & 0xff00) { + switch (c->processor_id & PRID_IMP_MASK) { case PRID_IMP_R2000: c->cputype = CPU_R2000; __cpu_name[cpu] = "R2000"; @@ -333,7 +347,7 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu) c->tlbsize = 64; break; case PRID_IMP_R3000: - if ((c->processor_id & 0xff) == PRID_REV_R3000A) { + if ((c->processor_id & PRID_REV_MASK) == PRID_REV_R3000A) { if (cpu_has_confreg()) { c->cputype = CPU_R3081E; __cpu_name[cpu] = "R3081"; @@ -353,7 +367,8 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu) break; case PRID_IMP_R4000: if (read_c0_config() & CONF_SC) { - if ((c->processor_id & 0xff) >= PRID_REV_R4400) { + if ((c->processor_id & PRID_REV_MASK) >= + PRID_REV_R4400) { c->cputype = CPU_R4400PC; __cpu_name[cpu] = "R4400PC"; } else { @@ -361,7 +376,8 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu) __cpu_name[cpu] = "R4000PC"; } } else { - if ((c->processor_id & 0xff) >= PRID_REV_R4400) { + if ((c->processor_id & PRID_REV_MASK) >= + PRID_REV_R4400) { c->cputype = CPU_R4400SC; __cpu_name[cpu] = "R4400SC"; } else { @@ -454,7 +470,7 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu) __cpu_name[cpu] = "TX3927"; c->tlbsize = 64; } else { - switch (c->processor_id & 0xff) { + switch (c->processor_id & PRID_REV_MASK) { case PRID_REV_TX3912: c->cputype = CPU_TX3912; __cpu_name[cpu] = "TX3912"; @@ -640,7 +656,7 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu) static inline void cpu_probe_mips(struct cpuinfo_mips *c, unsigned int cpu) { decode_configs(c); - switch (c->processor_id & 0xff00) { + switch (c->processor_id & PRID_IMP_MASK) { case PRID_IMP_4KC: c->cputype = CPU_4KC; __cpu_name[cpu] = "MIPS 4Kc"; @@ -711,7 +727,7 @@ static inline void cpu_probe_mips(struct cpuinfo_mips *c, unsigned int cpu) static inline void cpu_probe_alchemy(struct cpuinfo_mips *c, unsigned int cpu) { decode_configs(c); - switch (c->processor_id & 0xff00) { + switch (c->processor_id & PRID_IMP_MASK) { case PRID_IMP_AU1_REV1: case PRID_IMP_AU1_REV2: c->cputype = CPU_ALCHEMY; @@ -730,7 +746,7 @@ static inline void cpu_probe_alchemy(struct cpuinfo_mips *c, unsigned int cpu) break; case 4: __cpu_name[cpu] = "Au1200"; - if ((c->processor_id & 0xff) == 2) + if ((c->processor_id & PRID_REV_MASK) == 2) __cpu_name[cpu] = "Au1250"; break; case 5: @@ -748,12 +764,12 @@ static inline void cpu_probe_sibyte(struct cpuinfo_mips *c, unsigned int cpu) { decode_configs(c); - switch (c->processor_id & 0xff00) { + switch (c->processor_id & PRID_IMP_MASK) { case PRID_IMP_SB1: c->cputype = CPU_SB1; __cpu_name[cpu] = "SiByte SB1"; /* FPU in pass1 is known to have issues. */ - if ((c->processor_id & 0xff) < 0x02) + if ((c->processor_id & PRID_REV_MASK) < 0x02) c->options &= ~(MIPS_CPU_FPU | MIPS_CPU_32FPR); break; case PRID_IMP_SB1A: @@ -766,7 +782,7 @@ static inline void cpu_probe_sibyte(struct cpuinfo_mips *c, unsigned int cpu) static inline void cpu_probe_sandcraft(struct cpuinfo_mips *c, unsigned int cpu) { decode_configs(c); - switch (c->processor_id & 0xff00) { + switch (c->processor_id & PRID_IMP_MASK) { case PRID_IMP_SR71000: c->cputype = CPU_SR71000; __cpu_name[cpu] = "Sandcraft SR71000"; @@ -779,7 +795,7 @@ static inline void cpu_probe_sandcraft(struct cpuinfo_mips *c, unsigned int cpu) static inline void cpu_probe_nxp(struct cpuinfo_mips *c, unsigned int cpu) { decode_configs(c); - switch (c->processor_id & 0xff00) { + switch (c->processor_id & PRID_IMP_MASK) { case PRID_IMP_PR4450: c->cputype = CPU_PR4450; __cpu_name[cpu] = "Philips PR4450"; @@ -791,7 +807,7 @@ static inline void cpu_probe_nxp(struct cpuinfo_mips *c, unsigned int cpu) static inline void cpu_probe_broadcom(struct cpuinfo_mips *c, unsigned int cpu) { decode_configs(c); - switch (c->processor_id & 0xff00) { + switch (c->processor_id & PRID_IMP_MASK) { case PRID_IMP_BMIPS32_REV4: case PRID_IMP_BMIPS32_REV8: c->cputype = CPU_BMIPS32; @@ -806,7 +822,7 @@ static inline void cpu_probe_broadcom(struct cpuinfo_mips *c, unsigned int cpu) set_elf_platform(cpu, "bmips3300"); break; case PRID_IMP_BMIPS43XX: { - int rev = c->processor_id & 0xff; + int rev = c->processor_id & PRID_REV_MASK; if (rev >= PRID_REV_BMIPS4380_LO && rev <= PRID_REV_BMIPS4380_HI) { @@ -832,7 +848,7 @@ static inline void cpu_probe_broadcom(struct cpuinfo_mips *c, unsigned int cpu) static inline void cpu_probe_cavium(struct cpuinfo_mips *c, unsigned int cpu) { decode_configs(c); - switch (c->processor_id & 0xff00) { + switch (c->processor_id & PRID_IMP_MASK) { case PRID_IMP_CAVIUM_CN38XX: case PRID_IMP_CAVIUM_CN31XX: case PRID_IMP_CAVIUM_CN30XX: @@ -875,7 +891,7 @@ static inline void cpu_probe_ingenic(struct cpuinfo_mips *c, unsigned int cpu) decode_configs(c); /* JZRISC does not implement the CP0 counter. */ c->options &= ~MIPS_CPU_COUNTER; - switch (c->processor_id & 0xff00) { + switch (c->processor_id & PRID_IMP_MASK) { case PRID_IMP_JZRISC: c->cputype = CPU_JZRISC; __cpu_name[cpu] = "Ingenic JZRISC"; @@ -890,7 +906,7 @@ static inline void cpu_probe_netlogic(struct cpuinfo_mips *c, int cpu) { decode_configs(c); - if ((c->processor_id & 0xff00) == PRID_IMP_NETLOGIC_AU13XX) { + if ((c->processor_id & PRID_IMP_MASK) == PRID_IMP_NETLOGIC_AU13XX) { c->cputype = CPU_ALCHEMY; __cpu_name[cpu] = "Au1300"; /* following stuff is not for Alchemy */ @@ -905,7 +921,7 @@ static inline void cpu_probe_netlogic(struct cpuinfo_mips *c, int cpu) MIPS_CPU_EJTAG | MIPS_CPU_LLSC); - switch (c->processor_id & 0xff00) { + switch (c->processor_id & PRID_IMP_MASK) { case PRID_IMP_NETLOGIC_XLP2XX: c->cputype = CPU_XLP; __cpu_name[cpu] = "Broadcom XLPII"; @@ -984,7 +1000,7 @@ void cpu_probe(void) c->cputype = CPU_UNKNOWN; c->processor_id = read_c0_prid(); - switch (c->processor_id & 0xff0000) { + switch (c->processor_id & PRID_COMP_MASK) { case PRID_COMP_LEGACY: cpu_probe_legacy(c, cpu); break; diff --git a/arch/mips/kernel/idle.c b/arch/mips/kernel/idle.c index 42f8875d2444..f7991d95bff9 100644 --- a/arch/mips/kernel/idle.c +++ b/arch/mips/kernel/idle.c @@ -18,6 +18,7 @@ #include <linux/sched.h> #include <asm/cpu.h> #include <asm/cpu-info.h> +#include <asm/cpu-type.h> #include <asm/idle.h> #include <asm/mipsregs.h> @@ -136,7 +137,7 @@ void __init check_wait(void) return; } - switch (c->cputype) { + switch (current_cpu_type()) { case CPU_R3081: case CPU_R3081E: cpu_wait = r3081_wait; diff --git a/arch/mips/kernel/time.c b/arch/mips/kernel/time.c index 364d26ae4215..dcb8e5d3bb8a 100644 --- a/arch/mips/kernel/time.c +++ b/arch/mips/kernel/time.c @@ -24,6 +24,7 @@ #include <linux/export.h> #include <asm/cpu-features.h> +#include <asm/cpu-type.h> #include <asm/div64.h> #include <asm/smtc_ipi.h> #include <asm/time.h> diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index aec3408edd4b..524841f02803 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -39,6 +39,7 @@ #include <asm/break.h> #include <asm/cop2.h> #include <asm/cpu.h> +#include <asm/cpu-type.h> #include <asm/dsp.h> #include <asm/fpu.h> #include <asm/fpu_emulator.h> @@ -622,7 +623,7 @@ static int simulate_rdhwr(struct pt_regs *regs, int rd, int rt) regs->regs[rt] = read_c0_count(); return 0; case 3: /* Count register resolution */ - switch (current_cpu_data.cputype) { + switch (current_cpu_type()) { case CPU_20KC: case CPU_25KF: regs->regs[rt] = 1; diff --git a/arch/mips/mm/c-octeon.c b/arch/mips/mm/c-octeon.c index 729e7702b1de..c8efdb5b6ee0 100644 --- a/arch/mips/mm/c-octeon.c +++ b/arch/mips/mm/c-octeon.c @@ -19,6 +19,7 @@ #include <asm/bootinfo.h> #include <asm/cacheops.h> #include <asm/cpu-features.h> +#include <asm/cpu-type.h> #include <asm/page.h> #include <asm/pgtable.h> #include <asm/r4kcache.h> @@ -186,9 +187,10 @@ static void probe_octeon(void) unsigned long dcache_size; unsigned int config1; struct cpuinfo_mips *c = ¤t_cpu_data; + int cputype = current_cpu_type(); config1 = read_c0_config1(); - switch (c->cputype) { + switch (cputype) { case CPU_CAVIUM_OCTEON: case CPU_CAVIUM_OCTEON_PLUS: c->icache.linesz = 2 << ((config1 >> 19) & 7); @@ -199,7 +201,7 @@ static void probe_octeon(void) c->icache.sets * c->icache.ways * c->icache.linesz; c->icache.waybit = ffs(icache_size / c->icache.ways) - 1; c->dcache.linesz = 128; - if (c->cputype == CPU_CAVIUM_OCTEON_PLUS) + if (cputype == CPU_CAVIUM_OCTEON_PLUS) c->dcache.sets = 2; /* CN5XXX has two Dcache sets */ else c->dcache.sets = 1; /* CN3XXX has one Dcache set */ diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index f749f687ee87..627883bc6d5f 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -12,6 +12,7 @@ #include <linux/highmem.h> #include <linux/kernel.h> #include <linux/linkage.h> +#include <linux/preempt.h> #include <linux/sched.h> #include <linux/smp.h> #include <linux/mm.h> @@ -24,6 +25,7 @@ #include <asm/cacheops.h> #include <asm/cpu.h> #include <asm/cpu-features.h> +#include <asm/cpu-type.h> #include <asm/io.h> #include <asm/page.h> #include <asm/pgtable.h> @@ -601,6 +603,7 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size) /* Catch bad driver code */ BUG_ON(size == 0); + preempt_disable(); if (cpu_has_inclusive_pcaches) { if (size >= scache_size) r4k_blast_scache(); @@ -621,6 +624,7 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size) R4600_HIT_CACHEOP_WAR_IMPL; blast_dcache_range(addr, addr + size); } + preempt_enable(); bc_wback_inv(addr, size); __sync(); @@ -631,6 +635,7 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size) /* Catch bad driver code */ BUG_ON(size == 0); + preempt_disable(); if (cpu_has_inclusive_pcaches) { if (size >= scache_size) r4k_blast_scache(); @@ -655,6 +660,7 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size) R4600_HIT_CACHEOP_WAR_IMPL; blast_inv_dcache_range(addr, addr + size); } + preempt_enable(); bc_inv(addr, size); __sync(); @@ -780,20 +786,30 @@ static inline void rm7k_erratum31(void) static inline void alias_74k_erratum(struct cpuinfo_mips *c) { + unsigned int imp = c->processor_id & PRID_IMP_MASK; + unsigned int rev = c->processor_id & PRID_REV_MASK; + /* * Early versions of the 74K do not update the cache tags on a * vtag miss/ptag hit which can occur in the case of KSEG0/KUSEG * aliases. In this case it is better to treat the cache as always * having aliases. */ - if ((c->processor_id & 0xff) <= PRID_REV_ENCODE_332(2, 4, 0)) - c->dcache.flags |= MIPS_CACHE_VTAG; - if ((c->processor_id & 0xff) == PRID_REV_ENCODE_332(2, 4, 0)) - write_c0_config6(read_c0_config6() | MIPS_CONF6_SYND); - if (((c->processor_id & 0xff00) == PRID_IMP_1074K) && - ((c->processor_id & 0xff) <= PRID_REV_ENCODE_332(1, 1, 0))) { - c->dcache.flags |= MIPS_CACHE_VTAG; - write_c0_config6(read_c0_config6() | MIPS_CONF6_SYND); + switch (imp) { + case PRID_IMP_74K: + if (rev <= PRID_REV_ENCODE_332(2, 4, 0)) + c->dcache.flags |= MIPS_CACHE_VTAG; + if (rev == PRID_REV_ENCODE_332(2, 4, 0)) + write_c0_config6(read_c0_config6() | MIPS_CONF6_SYND); + break; + case PRID_IMP_1074K: + if (rev <= PRID_REV_ENCODE_332(1, 1, 0)) { + c->dcache.flags |= MIPS_CACHE_VTAG; + write_c0_config6(read_c0_config6() | MIPS_CONF6_SYND); + } + break; + default: + BUG(); } } @@ -809,7 +825,7 @@ static void probe_pcache(void) unsigned long config1; unsigned int lsize; - switch (c->cputype) { + switch (current_cpu_type()) { case CPU_R4600: /* QED style two way caches? */ case CPU_R4700: case CPU_R5000: @@ -1025,7 +1041,8 @@ static void probe_pcache(void) * presumably no vendor is shipping his hardware in the "bad" * configuration. */ - if ((prid & 0xff00) == PRID_IMP_R4000 && (prid & 0xff) < 0x40 && + if ((prid & PRID_IMP_MASK) == PRID_IMP_R4000 && + (prid & PRID_REV_MASK) < PRID_REV_R4400 && !(config & CONF_SC) && c->icache.linesz != 16 && PAGE_SIZE <= 0x8000) panic("Improper R4000SC processor configuration detected"); @@ -1045,7 +1062,7 @@ static void probe_pcache(void) * normally they'd suffer from aliases but magic in the hardware deals * with that for us so we don't need to take care ourselves. */ - switch (c->cputype) { + switch (current_cpu_type()) { case CPU_20KC: case CPU_25KF: case CPU_SB1: @@ -1065,7 +1082,7 @@ static void probe_pcache(void) case CPU_34K: case CPU_74K: case CPU_1004K: - if (c->cputype == CPU_74K) + if (current_cpu_type() == CPU_74K) alias_74k_erratum(c); if ((read_c0_config7() & (1 << 16))) { /* effectively physically indexed dcache, @@ -1078,7 +1095,7 @@ static void probe_pcache(void) c->dcache.flags |= MIPS_CACHE_ALIASES; } - switch (c->cputype) { + switch (current_cpu_type()) { case CPU_20KC: /* * Some older 20Kc chips doesn't have the 'VI' bit in @@ -1207,7 +1224,7 @@ static void setup_scache(void) * processors don't have a S-cache that would be relevant to the * Linux memory management. */ - switch (c->cputype) { + switch (current_cpu_type()) { case CPU_R4000SC: case CPU_R4000MC: case CPU_R4400SC: @@ -1384,9 +1401,8 @@ static void r4k_cache_error_setup(void) { extern char __weak except_vec2_generic; extern char __weak except_vec2_sb1; - struct cpuinfo_mips *c = ¤t_cpu_data; - switch (c->cputype) { + switch (current_cpu_type()) { case CPU_SB1: case CPU_SB1A: set_uncached_handler(0x100, &except_vec2_sb1, 0x80); diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c index 664e523653d0..5f8b95512580 100644 --- a/arch/mips/mm/dma-default.c +++ b/arch/mips/mm/dma-default.c @@ -18,6 +18,7 @@ #include <linux/highmem.h> #include <asm/cache.h> +#include <asm/cpu-type.h> #include <asm/io.h> #include <dma-coherence.h> @@ -307,12 +308,10 @@ static void mips_dma_sync_sg_for_cpu(struct device *dev, { int i; - /* Make sure that gcc doesn't leave the empty loop body. */ - for (i = 0; i < nelems; i++, sg++) { - if (cpu_needs_post_dma_flush(dev)) + if (cpu_needs_post_dma_flush(dev)) + for (i = 0; i < nelems; i++, sg++) __dma_sync(sg_page(sg), sg->offset, sg->length, direction); - } } static void mips_dma_sync_sg_for_device(struct device *dev, @@ -320,12 +319,10 @@ static void mips_dma_sync_sg_for_device(struct device *dev, { int i; - /* Make sure that gcc doesn't leave the empty loop body. */ - for (i = 0; i < nelems; i++, sg++) { - if (!plat_device_is_coherent(dev)) + if (!plat_device_is_coherent(dev)) + for (i = 0; i < nelems; i++, sg++) __dma_sync(sg_page(sg), sg->offset, sg->length, direction); - } } int mips_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c index 218c2109a55d..cbd81d17793a 100644 --- a/arch/mips/mm/page.c +++ b/arch/mips/mm/page.c @@ -18,6 +18,7 @@ #include <asm/bugs.h> #include <asm/cacheops.h> +#include <asm/cpu-type.h> #include <asm/inst.h> #include <asm/io.h> #include <asm/page.h> diff --git a/arch/mips/mm/sc-mips.c b/arch/mips/mm/sc-mips.c index 5d01392e3518..08d05aee8788 100644 --- a/arch/mips/mm/sc-mips.c +++ b/arch/mips/mm/sc-mips.c @@ -6,6 +6,7 @@ #include <linux/sched.h> #include <linux/mm.h> +#include <asm/cpu-type.h> #include <asm/mipsregs.h> #include <asm/bcache.h> #include <asm/cacheops.h> @@ -71,7 +72,7 @@ static inline int mips_sc_is_activated(struct cpuinfo_mips *c) unsigned int tmp; /* Check the bypass bit (L2B) */ - switch (c->cputype) { + switch (current_cpu_type()) { case CPU_34K: case CPU_74K: case CPU_1004K: diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c index 00b26a67a06d..bb3a5f643e97 100644 --- a/arch/mips/mm/tlb-r4k.c +++ b/arch/mips/mm/tlb-r4k.c @@ -16,6 +16,7 @@ #include <linux/module.h> #include <asm/cpu.h> +#include <asm/cpu-type.h> #include <asm/bootinfo.h> #include <asm/mmu_context.h> #include <asm/pgtable.h> diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index 821b45175dc1..9bb3a9363b06 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -30,6 +30,7 @@ #include <linux/cache.h> #include <asm/cacheflush.h> +#include <asm/cpu-type.h> #include <asm/pgtable.h> #include <asm/war.h> #include <asm/uasm.h> diff --git a/arch/mips/mti-malta/malta-time.c b/arch/mips/mti-malta/malta-time.c index 53aad4a35375..a18af5fce67e 100644 --- a/arch/mips/mti-malta/malta-time.c +++ b/arch/mips/mti-malta/malta-time.c @@ -27,6 +27,7 @@ #include <linux/timex.h> #include <linux/mc146818rtc.h> +#include <asm/cpu.h> #include <asm/mipsregs.h> #include <asm/mipsmtregs.h> #include <asm/hardirq.h> @@ -76,7 +77,7 @@ static void __init estimate_frequencies(void) #endif #if defined (CONFIG_KVM_GUEST) && defined (CONFIG_KVM_HOST_FREQ) - unsigned int prid = read_c0_prid() & 0xffff00; + unsigned int prid = read_c0_prid() & (PRID_COMP_MASK | PRID_IMP_MASK); /* * XXXKYMA: hardwire the CPU frequency to Host Freq/4 @@ -169,7 +170,7 @@ unsigned int get_c0_compare_int(void) void __init plat_time_init(void) { - unsigned int prid = read_c0_prid() & 0xffff00; + unsigned int prid = read_c0_prid() & (PRID_COMP_MASK | PRID_IMP_MASK); unsigned int freq; estimate_frequencies(); diff --git a/arch/mips/mti-sead3/sead3-time.c b/arch/mips/mti-sead3/sead3-time.c index a43ea3cc0a3b..552d26c34386 100644 --- a/arch/mips/mti-sead3/sead3-time.c +++ b/arch/mips/mti-sead3/sead3-time.c @@ -7,6 +7,7 @@ */ #include <linux/init.h> +#include <asm/cpu.h> #include <asm/setup.h> #include <asm/time.h> #include <asm/irq.h> @@ -34,7 +35,7 @@ static void __iomem *status_reg = (void __iomem *)0xbf000410; */ static unsigned int __init estimate_cpu_frequency(void) { - unsigned int prid = read_c0_prid() & 0xffff00; + unsigned int prid = read_c0_prid() & (PRID_COMP_MASK | PRID_IMP_MASK); unsigned int tick = 0; unsigned int freq; unsigned int orig; diff --git a/arch/mips/netlogic/xlr/fmn-config.c b/arch/mips/netlogic/xlr/fmn-config.c index ed3bf0e3f309..c7622c6e5f67 100644 --- a/arch/mips/netlogic/xlr/fmn-config.c +++ b/arch/mips/netlogic/xlr/fmn-config.c @@ -36,6 +36,7 @@ #include <linux/irq.h> #include <linux/interrupt.h> +#include <asm/cpu.h> #include <asm/mipsregs.h> #include <asm/netlogic/xlr/fmn.h> #include <asm/netlogic/xlr/xlr.h> @@ -187,7 +188,7 @@ void xlr_board_info_setup(void) int processor_id, num_core; num_core = hweight32(nlm_current_node()->coremask); - processor_id = read_c0_prid() & 0xff00; + processor_id = read_c0_prid() & PRID_IMP_MASK; setup_cpu_fmninfo(cpu, num_core); switch (processor_id) { diff --git a/arch/mips/oprofile/common.c b/arch/mips/oprofile/common.c index 5e5424753b56..4d1736fc1955 100644 --- a/arch/mips/oprofile/common.c +++ b/arch/mips/oprofile/common.c @@ -12,6 +12,7 @@ #include <linux/oprofile.h> #include <linux/smp.h> #include <asm/cpu-info.h> +#include <asm/cpu-type.h> #include "op_impl.h" diff --git a/arch/mips/pci/pci-bcm1480.c b/arch/mips/pci/pci-bcm1480.c index 44dd5aa2e36f..5ec2a7bae02c 100644 --- a/arch/mips/pci/pci-bcm1480.c +++ b/arch/mips/pci/pci-bcm1480.c @@ -39,6 +39,7 @@ #include <linux/mm.h> #include <linux/console.h> #include <linux/tty.h> +#include <linux/vt.h> #include <asm/sibyte/bcm1480_regs.h> #include <asm/sibyte/bcm1480_scd.h> diff --git a/arch/mips/sibyte/bcm1480/setup.c b/arch/mips/sibyte/bcm1480/setup.c index 05ed92c92b69..8e2e04f77870 100644 --- a/arch/mips/sibyte/bcm1480/setup.c +++ b/arch/mips/sibyte/bcm1480/setup.c @@ -22,6 +22,7 @@ #include <linux/string.h> #include <asm/bootinfo.h> +#include <asm/cpu.h> #include <asm/mipsregs.h> #include <asm/io.h> #include <asm/sibyte/sb1250.h> @@ -119,7 +120,7 @@ void __init bcm1480_setup(void) uint64_t sys_rev; int plldiv; - sb1_pass = read_c0_prid() & 0xff; + sb1_pass = read_c0_prid() & PRID_REV_MASK; sys_rev = __raw_readq(IOADDR(A_SCD_SYSTEM_REVISION)); soc_type = SYS_SOC_TYPE(sys_rev); part_type = G_SYS_PART(sys_rev); diff --git a/arch/mips/sibyte/sb1250/setup.c b/arch/mips/sibyte/sb1250/setup.c index a14bd4cb0bc0..3c02b2a77ae9 100644 --- a/arch/mips/sibyte/sb1250/setup.c +++ b/arch/mips/sibyte/sb1250/setup.c @@ -22,6 +22,7 @@ #include <linux/string.h> #include <asm/bootinfo.h> +#include <asm/cpu.h> #include <asm/mipsregs.h> #include <asm/io.h> #include <asm/sibyte/sb1250.h> @@ -182,7 +183,7 @@ void __init sb1250_setup(void) int plldiv; int bad_config = 0; - sb1_pass = read_c0_prid() & 0xff; + sb1_pass = read_c0_prid() & PRID_REV_MASK; sys_rev = __raw_readq(IOADDR(A_SCD_SYSTEM_REVISION)); soc_type = SYS_SOC_TYPE(sys_rev); soc_pass = G_SYS_REVISION(sys_rev); diff --git a/arch/mips/sni/setup.c b/arch/mips/sni/setup.c index 5b09b3544edd..efad85c8c823 100644 --- a/arch/mips/sni/setup.c +++ b/arch/mips/sni/setup.c @@ -25,6 +25,7 @@ #endif #include <asm/bootinfo.h> +#include <asm/cpu.h> #include <asm/io.h> #include <asm/reboot.h> #include <asm/sni.h> @@ -173,7 +174,7 @@ void __init plat_mem_setup(void) system_type = "RM300-Cxx"; break; case SNI_BRD_PCI_DESKTOP: - switch (read_c0_prid() & 0xff00) { + switch (read_c0_prid() & PRID_IMP_MASK) { case PRID_IMP_R4600: case PRID_IMP_R4700: system_type = "RM200-C20"; diff --git a/arch/openrisc/include/asm/prom.h b/arch/openrisc/include/asm/prom.h index eb59bfe23e85..93c9980e1b6b 100644 --- a/arch/openrisc/include/asm/prom.h +++ b/arch/openrisc/include/asm/prom.h @@ -14,53 +14,9 @@ * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. */ - -#include <linux/of.h> /* linux/of.h gets to determine #include ordering */ - #ifndef _ASM_OPENRISC_PROM_H #define _ASM_OPENRISC_PROM_H -#ifdef __KERNEL__ -#ifndef __ASSEMBLY__ -#include <linux/types.h> -#include <asm/irq.h> -#include <linux/irqdomain.h> -#include <linux/atomic.h> -#include <linux/of_irq.h> -#include <linux/of_fdt.h> -#include <linux/of_address.h> -#include <linux/proc_fs.h> -#include <linux/platform_device.h> #define HAVE_ARCH_DEVTREE_FIXUPS -/* Other Prototypes */ -extern int early_uartlite_console(void); - -/* Parse the ibm,dma-window property of an OF node into the busno, phys and - * size parameters. - */ -void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop, - unsigned long *busno, unsigned long *phys, unsigned long *size); - -extern void kdump_move_device_tree(void); - -/* Get the MAC address */ -extern const void *of_get_mac_address(struct device_node *np); - -/** - * of_irq_map_pci - Resolve the interrupt for a PCI device - * @pdev: the device whose interrupt is to be resolved - * @out_irq: structure of_irq filled by this function - * - * This function resolves the PCI interrupt for a given PCI device. If a - * device-node exists for a given pci_dev, it will use normal OF tree - * walking. If not, it will implement standard swizzling and walk up the - * PCI tree until an device-node is found, at which point it will finish - * resolving using the OF tree walking. - */ -struct pci_dev; -extern int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq); - -#endif /* __ASSEMBLY__ */ -#endif /* __KERNEL__ */ #endif /* _ASM_OPENRISC_PROM_H */ diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 6a15c968d214..15ca2255f438 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -74,7 +74,7 @@ src-wlib-$(CONFIG_8xx) += mpc8xx.c planetcore.c src-wlib-$(CONFIG_PPC_82xx) += pq2.c fsl-soc.c planetcore.c src-wlib-$(CONFIG_EMBEDDED6xx) += mv64x60.c mv64x60_i2c.c ugecon.c -src-plat-y := of.c +src-plat-y := of.c epapr.c src-plat-$(CONFIG_40x) += fixed-head.S ep405.c cuboot-hotfoot.c \ treeboot-walnut.c cuboot-acadia.c \ cuboot-kilauea.c simpleboot.c \ @@ -97,7 +97,7 @@ src-plat-$(CONFIG_EMBEDDED6xx) += cuboot-pq2.c cuboot-mpc7448hpc2.c \ prpmc2800.c src-plat-$(CONFIG_AMIGAONE) += cuboot-amigaone.c src-plat-$(CONFIG_PPC_PS3) += ps3-head.S ps3-hvcall.S ps3.c -src-plat-$(CONFIG_EPAPR_BOOT) += epapr.c +src-plat-$(CONFIG_EPAPR_BOOT) += epapr.c epapr-wrapper.c src-wlib := $(sort $(src-wlib-y)) src-plat := $(sort $(src-plat-y)) diff --git a/arch/powerpc/boot/epapr-wrapper.c b/arch/powerpc/boot/epapr-wrapper.c new file mode 100644 index 000000000000..c10191006673 --- /dev/null +++ b/arch/powerpc/boot/epapr-wrapper.c @@ -0,0 +1,9 @@ +extern void epapr_platform_init(unsigned long r3, unsigned long r4, + unsigned long r5, unsigned long r6, + unsigned long r7); + +void platform_init(unsigned long r3, unsigned long r4, unsigned long r5, + unsigned long r6, unsigned long r7) +{ + epapr_platform_init(r3, r4, r5, r6, r7); +} diff --git a/arch/powerpc/boot/epapr.c b/arch/powerpc/boot/epapr.c index 06c1961bd124..02e91aa2194a 100644 --- a/arch/powerpc/boot/epapr.c +++ b/arch/powerpc/boot/epapr.c @@ -48,8 +48,8 @@ static void platform_fixups(void) fdt_addr, fdt_totalsize((void *)fdt_addr), ima_size); } -void platform_init(unsigned long r3, unsigned long r4, unsigned long r5, - unsigned long r6, unsigned long r7) +void epapr_platform_init(unsigned long r3, unsigned long r4, unsigned long r5, + unsigned long r6, unsigned long r7) { epapr_magic = r6; ima_size = r7; diff --git a/arch/powerpc/boot/of.c b/arch/powerpc/boot/of.c index 61d9899aa0d0..62e2f43ec1df 100644 --- a/arch/powerpc/boot/of.c +++ b/arch/powerpc/boot/of.c @@ -26,6 +26,9 @@ static unsigned long claim_base; +void epapr_platform_init(unsigned long r3, unsigned long r4, unsigned long r5, + unsigned long r6, unsigned long r7); + static void *of_try_claim(unsigned long size) { unsigned long addr = 0; @@ -61,7 +64,7 @@ static void of_image_hdr(const void *hdr) } } -void platform_init(unsigned long a1, unsigned long a2, void *promptr) +static void of_platform_init(unsigned long a1, unsigned long a2, void *promptr) { platform_ops.image_hdr = of_image_hdr; platform_ops.malloc = of_try_claim; @@ -81,3 +84,14 @@ void platform_init(unsigned long a1, unsigned long a2, void *promptr) loader_info.initrd_size = a2; } } + +void platform_init(unsigned long r3, unsigned long r4, unsigned long r5, + unsigned long r6, unsigned long r7) +{ + /* Detect OF vs. ePAPR boot */ + if (r5) + of_platform_init(r3, r4, (void *)r5); + else + epapr_platform_init(r3, r4, r5, r6, r7); +} + diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index 6761c746048d..cd7af841ba05 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -148,18 +148,18 @@ make_space=y case "$platform" in pseries) - platformo=$object/of.o + platformo="$object/of.o $object/epapr.o" link_address='0x4000000' ;; maple) - platformo=$object/of.o + platformo="$object/of.o $object/epapr.o" link_address='0x400000' ;; pmac|chrp) - platformo=$object/of.o + platformo="$object/of.o $object/epapr.o" ;; coff) - platformo="$object/crt0.o $object/of.o" + platformo="$object/crt0.o $object/of.o $object/epapr.o" lds=$object/zImage.coff.lds link_address='0x500000' pie= @@ -253,6 +253,7 @@ treeboot-iss4xx-mpic) platformo="$object/treeboot-iss4xx.o" ;; epapr) + platformo="$object/epapr.o $object/epapr-wrapper.o" link_address='0x20000000' pie=-pie ;; diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h index 0e40843a1c6e..41f13cec8a8f 100644 --- a/arch/powerpc/include/asm/irq.h +++ b/arch/powerpc/include/asm/irq.h @@ -69,9 +69,9 @@ extern struct thread_info *softirq_ctx[NR_CPUS]; extern void irq_ctx_init(void); extern void call_do_softirq(struct thread_info *tp); -extern int call_handle_irq(int irq, void *p1, - struct thread_info *tp, void *func); +extern void call_do_irq(struct pt_regs *regs, struct thread_info *tp); extern void do_IRQ(struct pt_regs *regs); +extern void __do_irq(struct pt_regs *regs); int irq_choose_cpu(const struct cpumask *mask); diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index e378cccfca55..ce4de5aed7b5 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -149,8 +149,6 @@ typedef struct { struct thread_struct { unsigned long ksp; /* Kernel stack pointer */ - unsigned long ksp_limit; /* if ksp <= ksp_limit stack overflow */ - #ifdef CONFIG_PPC64 unsigned long ksp_vsid; #endif @@ -162,6 +160,7 @@ struct thread_struct { #endif #ifdef CONFIG_PPC32 void *pgdir; /* root of page-table tree */ + unsigned long ksp_limit; /* if ksp <= ksp_limit stack overflow */ #endif #ifdef CONFIG_PPC_ADV_DEBUG_REGS /* @@ -321,7 +320,6 @@ struct thread_struct { #else #define INIT_THREAD { \ .ksp = INIT_SP, \ - .ksp_limit = INIT_SP_LIMIT, \ .regs = (struct pt_regs *)INIT_SP - 1, /* XXX bogus, I think */ \ .fs = KERNEL_DS, \ .fpr = {{0}}, \ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index d8958be5f31a..502c7a4e73f7 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -80,10 +80,11 @@ int main(void) DEFINE(TASKTHREADPPR, offsetof(struct task_struct, thread.ppr)); #else DEFINE(THREAD_INFO, offsetof(struct task_struct, stack)); + DEFINE(THREAD_INFO_GAP, _ALIGN_UP(sizeof(struct thread_info), 16)); + DEFINE(KSP_LIMIT, offsetof(struct thread_struct, ksp_limit)); #endif /* CONFIG_PPC64 */ DEFINE(KSP, offsetof(struct thread_struct, ksp)); - DEFINE(KSP_LIMIT, offsetof(struct thread_struct, ksp_limit)); DEFINE(PT_REGS, offsetof(struct thread_struct, regs)); #ifdef CONFIG_BOOKE DEFINE(THREAD_NORMSAVES, offsetof(struct thread_struct, normsave[0])); diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index c69440cef7af..57d286a78f86 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -441,50 +441,6 @@ void migrate_irqs(void) } #endif -static inline void handle_one_irq(unsigned int irq) -{ - struct thread_info *curtp, *irqtp; - unsigned long saved_sp_limit; - struct irq_desc *desc; - - desc = irq_to_desc(irq); - if (!desc) - return; - - /* Switch to the irq stack to handle this */ - curtp = current_thread_info(); - irqtp = hardirq_ctx[smp_processor_id()]; - - if (curtp == irqtp) { - /* We're already on the irq stack, just handle it */ - desc->handle_irq(irq, desc); - return; - } - - saved_sp_limit = current->thread.ksp_limit; - - irqtp->task = curtp->task; - irqtp->flags = 0; - - /* Copy the softirq bits in preempt_count so that the - * softirq checks work in the hardirq context. */ - irqtp->preempt_count = (irqtp->preempt_count & ~SOFTIRQ_MASK) | - (curtp->preempt_count & SOFTIRQ_MASK); - - current->thread.ksp_limit = (unsigned long)irqtp + - _ALIGN_UP(sizeof(struct thread_info), 16); - - call_handle_irq(irq, desc, irqtp, desc->handle_irq); - current->thread.ksp_limit = saved_sp_limit; - irqtp->task = NULL; - - /* Set any flag that may have been set on the - * alternate stack - */ - if (irqtp->flags) - set_bits(irqtp->flags, &curtp->flags); -} - static inline void check_stack_overflow(void) { #ifdef CONFIG_DEBUG_STACKOVERFLOW @@ -501,9 +457,9 @@ static inline void check_stack_overflow(void) #endif } -void do_IRQ(struct pt_regs *regs) +void __do_irq(struct pt_regs *regs) { - struct pt_regs *old_regs = set_irq_regs(regs); + struct irq_desc *desc; unsigned int irq; irq_enter(); @@ -519,18 +475,56 @@ void do_IRQ(struct pt_regs *regs) */ irq = ppc_md.get_irq(); - /* We can hard enable interrupts now */ + /* We can hard enable interrupts now to allow perf interrupts */ may_hard_irq_enable(); /* And finally process it */ - if (irq != NO_IRQ) - handle_one_irq(irq); - else + if (unlikely(irq == NO_IRQ)) __get_cpu_var(irq_stat).spurious_irqs++; + else { + desc = irq_to_desc(irq); + if (likely(desc)) + desc->handle_irq(irq, desc); + } trace_irq_exit(regs); irq_exit(); +} + +void do_IRQ(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + struct thread_info *curtp, *irqtp; + + /* Switch to the irq stack to handle this */ + curtp = current_thread_info(); + irqtp = hardirq_ctx[raw_smp_processor_id()]; + + /* Already there ? */ + if (unlikely(curtp == irqtp)) { + __do_irq(regs); + set_irq_regs(old_regs); + return; + } + + /* Prepare the thread_info in the irq stack */ + irqtp->task = curtp->task; + irqtp->flags = 0; + + /* Copy the preempt_count so that the [soft]irq checks work. */ + irqtp->preempt_count = curtp->preempt_count; + + /* Switch stack and call */ + call_do_irq(regs, irqtp); + + /* Restore stack limit */ + irqtp->task = NULL; + + /* Copy back updates to the thread_info */ + if (irqtp->flags) + set_bits(irqtp->flags, &curtp->flags); + set_irq_regs(old_regs); } @@ -592,28 +586,22 @@ void irq_ctx_init(void) memset((void *)softirq_ctx[i], 0, THREAD_SIZE); tp = softirq_ctx[i]; tp->cpu = i; - tp->preempt_count = 0; memset((void *)hardirq_ctx[i], 0, THREAD_SIZE); tp = hardirq_ctx[i]; tp->cpu = i; - tp->preempt_count = HARDIRQ_OFFSET; } } static inline void do_softirq_onstack(void) { struct thread_info *curtp, *irqtp; - unsigned long saved_sp_limit = current->thread.ksp_limit; curtp = current_thread_info(); irqtp = softirq_ctx[smp_processor_id()]; irqtp->task = curtp->task; irqtp->flags = 0; - current->thread.ksp_limit = (unsigned long)irqtp + - _ALIGN_UP(sizeof(struct thread_info), 16); call_do_softirq(irqtp); - current->thread.ksp_limit = saved_sp_limit; irqtp->task = NULL; /* Set any flag that may have been set on the diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 777d999f563b..2b0ad9845363 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -36,26 +36,41 @@ .text +/* + * We store the saved ksp_limit in the unused part + * of the STACK_FRAME_OVERHEAD + */ _GLOBAL(call_do_softirq) mflr r0 stw r0,4(r1) + lwz r10,THREAD+KSP_LIMIT(r2) + addi r11,r3,THREAD_INFO_GAP stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3) mr r1,r3 + stw r10,8(r1) + stw r11,THREAD+KSP_LIMIT(r2) bl __do_softirq + lwz r10,8(r1) lwz r1,0(r1) lwz r0,4(r1) + stw r10,THREAD+KSP_LIMIT(r2) mtlr r0 blr -_GLOBAL(call_handle_irq) +_GLOBAL(call_do_irq) mflr r0 stw r0,4(r1) - mtctr r6 - stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r5) - mr r1,r5 - bctrl + lwz r10,THREAD+KSP_LIMIT(r2) + addi r11,r3,THREAD_INFO_GAP + stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4) + mr r1,r4 + stw r10,8(r1) + stw r11,THREAD+KSP_LIMIT(r2) + bl __do_irq + lwz r10,8(r1) lwz r1,0(r1) lwz r0,4(r1) + stw r10,THREAD+KSP_LIMIT(r2) mtlr r0 blr diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 971d7e78aff2..e59caf874d05 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -40,14 +40,12 @@ _GLOBAL(call_do_softirq) mtlr r0 blr -_GLOBAL(call_handle_irq) - ld r8,0(r6) +_GLOBAL(call_do_irq) mflr r0 std r0,16(r1) - mtctr r8 - stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r5) - mr r1,r5 - bctrl + stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4) + mr r1,r4 + bl .__do_irq ld r1,0(r1) ld r0,16(r1) mtlr r0 diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 6f428da53e20..96d2fdf3aa9e 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1000,9 +1000,10 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, kregs = (struct pt_regs *) sp; sp -= STACK_FRAME_OVERHEAD; p->thread.ksp = sp; +#ifdef CONFIG_PPC32 p->thread.ksp_limit = (unsigned long)task_stack_page(p) + _ALIGN_UP(sizeof(struct thread_info), 16); - +#endif #ifdef CONFIG_HAVE_HW_BREAKPOINT p->thread.ptrace_bps[0] = NULL; #endif diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 12e656ffe60e..5fe2842e8bab 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -196,6 +196,8 @@ static int __initdata mem_reserve_cnt; static cell_t __initdata regbuf[1024]; +static bool rtas_has_query_cpu_stopped; + /* * Error results ... some OF calls will return "-1" on error, some @@ -1574,6 +1576,11 @@ static void __init prom_instantiate_rtas(void) prom_setprop(rtas_node, "/rtas", "linux,rtas-entry", &val, sizeof(val)); + /* Check if it supports "query-cpu-stopped-state" */ + if (prom_getprop(rtas_node, "query-cpu-stopped-state", + &val, sizeof(val)) != PROM_ERROR) + rtas_has_query_cpu_stopped = true; + #if defined(CONFIG_PPC_POWERNV) && defined(__BIG_ENDIAN__) /* PowerVN takeover hack */ prom_rtas_data = base; @@ -1815,6 +1822,18 @@ static void __init prom_hold_cpus(void) = (void *) LOW_ADDR(__secondary_hold_acknowledge); unsigned long secondary_hold = LOW_ADDR(__secondary_hold); + /* + * On pseries, if RTAS supports "query-cpu-stopped-state", + * we skip this stage, the CPUs will be started by the + * kernel using RTAS. + */ + if ((of_platform == PLATFORM_PSERIES || + of_platform == PLATFORM_PSERIES_LPAR) && + rtas_has_query_cpu_stopped) { + prom_printf("prom_hold_cpus: skipped\n"); + return; + } + prom_debug("prom_hold_cpus: start...\n"); prom_debug(" 1) spinloop = 0x%x\n", (unsigned long)spinloop); prom_debug(" 1) *spinloop = 0x%x\n", *spinloop); @@ -3011,6 +3030,8 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, * On non-powermacs, put all CPUs in spin-loops. * * PowerMacs use a different mechanism to spin CPUs + * + * (This must be done after instanciating RTAS) */ if (of_platform != PLATFORM_POWERMAC && of_platform != PLATFORM_OPAL) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index a7ee978fb860..b1faa1593c90 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1505,6 +1505,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) */ if ((ra == 1) && !(regs->msr & MSR_PR) \ && (val3 >= (regs->gpr[1] - STACK_INT_FRAME_SIZE))) { +#ifdef CONFIG_PPC32 /* * Check if we will touch kernel sack overflow */ @@ -1513,7 +1514,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) err = -EINVAL; break; } - +#endif /* CONFIG_PPC32 */ /* * Check if we already set since that means we'll * lose the previous value. diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index 1c1771a40250..24f58cb0a543 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -233,18 +233,24 @@ static void __init smp_init_pseries(void) alloc_bootmem_cpumask_var(&of_spin_mask); - /* Mark threads which are still spinning in hold loops. */ - if (cpu_has_feature(CPU_FTR_SMT)) { - for_each_present_cpu(i) { - if (cpu_thread_in_core(i) == 0) - cpumask_set_cpu(i, of_spin_mask); - } - } else { - cpumask_copy(of_spin_mask, cpu_present_mask); + /* + * Mark threads which are still spinning in hold loops + * + * We know prom_init will not have started them if RTAS supports + * query-cpu-stopped-state. + */ + if (rtas_token("query-cpu-stopped-state") == RTAS_UNKNOWN_SERVICE) { + if (cpu_has_feature(CPU_FTR_SMT)) { + for_each_present_cpu(i) { + if (cpu_thread_in_core(i) == 0) + cpumask_set_cpu(i, of_spin_mask); + } + } else + cpumask_copy(of_spin_mask, cpu_present_mask); + + cpumask_clear_cpu(boot_cpuid, of_spin_mask); } - cpumask_clear_cpu(boot_cpuid, of_spin_mask); - /* Non-lpar has additional take/give timebase */ if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) { smp_ops->give_timebase = rtas_give_timebase; diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index dcc6ac2d8026..7143793859fa 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -93,6 +93,7 @@ config S390 select ARCH_INLINE_WRITE_UNLOCK_IRQ select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE select ARCH_SAVE_PAGE_KEYS if HIBERNATION + select ARCH_USE_CMPXCHG_LOCKREF select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS2 @@ -102,7 +103,6 @@ config S390 select GENERIC_TIME_VSYSCALL_OLD select HAVE_ALIGNED_STRUCT_PAGE if SLUB select HAVE_ARCH_JUMP_LABEL if !MARCH_G5 - select HAVE_ARCH_MUTEX_CPU_RELAX select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT diff --git a/arch/s390/include/asm/mutex.h b/arch/s390/include/asm/mutex.h index 688271f5f2e4..458c1f7fbc18 100644 --- a/arch/s390/include/asm/mutex.h +++ b/arch/s390/include/asm/mutex.h @@ -7,5 +7,3 @@ */ #include <asm-generic/mutex-dec.h> - -#define arch_mutex_cpu_relax() barrier() diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 0eb37505cab1..ca7821f07260 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -198,6 +198,8 @@ static inline void cpu_relax(void) barrier(); } +#define arch_mutex_cpu_relax() barrier() + static inline void psw_set_key(unsigned int key) { asm volatile("spka 0(%0)" : : "d" (key)); diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index 701fe8c59e1f..83e5d216105e 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -44,6 +44,11 @@ extern void arch_spin_lock_wait_flags(arch_spinlock_t *, unsigned long flags); extern int arch_spin_trylock_retry(arch_spinlock_t *); extern void arch_spin_relax(arch_spinlock_t *lock); +static inline int arch_spin_value_unlocked(arch_spinlock_t lock) +{ + return lock.owner_cpu == 0; +} + static inline void arch_spin_lock(arch_spinlock_t *lp) { int old; diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 8a7cc663b3f8..d45a2c48f185 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -361,7 +361,7 @@ config CMDLINE_OVERRIDE config VMALLOC_RESERVE hex - default 0x1000000 + default 0x2000000 config HARDWALL bool "Hardwall support to allow access to user dynamic network" diff --git a/arch/tile/gxio/iorpc_mpipe.c b/arch/tile/gxio/iorpc_mpipe.c index 4f8f3d619c4a..e19325c4c431 100644 --- a/arch/tile/gxio/iorpc_mpipe.c +++ b/arch/tile/gxio/iorpc_mpipe.c @@ -21,7 +21,7 @@ struct alloc_buffer_stacks_param { unsigned int flags; }; -int gxio_mpipe_alloc_buffer_stacks(gxio_mpipe_context_t * context, +int gxio_mpipe_alloc_buffer_stacks(gxio_mpipe_context_t *context, unsigned int count, unsigned int first, unsigned int flags) { @@ -45,7 +45,7 @@ struct init_buffer_stack_aux_param { unsigned int buffer_size_enum; }; -int gxio_mpipe_init_buffer_stack_aux(gxio_mpipe_context_t * context, +int gxio_mpipe_init_buffer_stack_aux(gxio_mpipe_context_t *context, void *mem_va, size_t mem_size, unsigned int mem_flags, unsigned int stack, unsigned int buffer_size_enum) @@ -80,7 +80,7 @@ struct alloc_notif_rings_param { unsigned int flags; }; -int gxio_mpipe_alloc_notif_rings(gxio_mpipe_context_t * context, +int gxio_mpipe_alloc_notif_rings(gxio_mpipe_context_t *context, unsigned int count, unsigned int first, unsigned int flags) { @@ -102,7 +102,7 @@ struct init_notif_ring_aux_param { unsigned int ring; }; -int gxio_mpipe_init_notif_ring_aux(gxio_mpipe_context_t * context, void *mem_va, +int gxio_mpipe_init_notif_ring_aux(gxio_mpipe_context_t *context, void *mem_va, size_t mem_size, unsigned int mem_flags, unsigned int ring) { @@ -133,7 +133,7 @@ struct request_notif_ring_interrupt_param { unsigned int ring; }; -int gxio_mpipe_request_notif_ring_interrupt(gxio_mpipe_context_t * context, +int gxio_mpipe_request_notif_ring_interrupt(gxio_mpipe_context_t *context, int inter_x, int inter_y, int inter_ipi, int inter_event, unsigned int ring) @@ -158,7 +158,7 @@ struct enable_notif_ring_interrupt_param { unsigned int ring; }; -int gxio_mpipe_enable_notif_ring_interrupt(gxio_mpipe_context_t * context, +int gxio_mpipe_enable_notif_ring_interrupt(gxio_mpipe_context_t *context, unsigned int ring) { struct enable_notif_ring_interrupt_param temp; @@ -179,7 +179,7 @@ struct alloc_notif_groups_param { unsigned int flags; }; -int gxio_mpipe_alloc_notif_groups(gxio_mpipe_context_t * context, +int gxio_mpipe_alloc_notif_groups(gxio_mpipe_context_t *context, unsigned int count, unsigned int first, unsigned int flags) { @@ -201,7 +201,7 @@ struct init_notif_group_param { gxio_mpipe_notif_group_bits_t bits; }; -int gxio_mpipe_init_notif_group(gxio_mpipe_context_t * context, +int gxio_mpipe_init_notif_group(gxio_mpipe_context_t *context, unsigned int group, gxio_mpipe_notif_group_bits_t bits) { @@ -223,7 +223,7 @@ struct alloc_buckets_param { unsigned int flags; }; -int gxio_mpipe_alloc_buckets(gxio_mpipe_context_t * context, unsigned int count, +int gxio_mpipe_alloc_buckets(gxio_mpipe_context_t *context, unsigned int count, unsigned int first, unsigned int flags) { struct alloc_buckets_param temp; @@ -244,7 +244,7 @@ struct init_bucket_param { MPIPE_LBL_INIT_DAT_BSTS_TBL_t bucket_info; }; -int gxio_mpipe_init_bucket(gxio_mpipe_context_t * context, unsigned int bucket, +int gxio_mpipe_init_bucket(gxio_mpipe_context_t *context, unsigned int bucket, MPIPE_LBL_INIT_DAT_BSTS_TBL_t bucket_info) { struct init_bucket_param temp; @@ -265,7 +265,7 @@ struct alloc_edma_rings_param { unsigned int flags; }; -int gxio_mpipe_alloc_edma_rings(gxio_mpipe_context_t * context, +int gxio_mpipe_alloc_edma_rings(gxio_mpipe_context_t *context, unsigned int count, unsigned int first, unsigned int flags) { @@ -288,7 +288,7 @@ struct init_edma_ring_aux_param { unsigned int channel; }; -int gxio_mpipe_init_edma_ring_aux(gxio_mpipe_context_t * context, void *mem_va, +int gxio_mpipe_init_edma_ring_aux(gxio_mpipe_context_t *context, void *mem_va, size_t mem_size, unsigned int mem_flags, unsigned int ring, unsigned int channel) { @@ -315,7 +315,7 @@ int gxio_mpipe_init_edma_ring_aux(gxio_mpipe_context_t * context, void *mem_va, EXPORT_SYMBOL(gxio_mpipe_init_edma_ring_aux); -int gxio_mpipe_commit_rules(gxio_mpipe_context_t * context, const void *blob, +int gxio_mpipe_commit_rules(gxio_mpipe_context_t *context, const void *blob, size_t blob_size) { const void *params = blob; @@ -332,7 +332,7 @@ struct register_client_memory_param { unsigned int flags; }; -int gxio_mpipe_register_client_memory(gxio_mpipe_context_t * context, +int gxio_mpipe_register_client_memory(gxio_mpipe_context_t *context, unsigned int iotlb, HV_PTE pte, unsigned int flags) { @@ -355,7 +355,7 @@ struct link_open_aux_param { unsigned int flags; }; -int gxio_mpipe_link_open_aux(gxio_mpipe_context_t * context, +int gxio_mpipe_link_open_aux(gxio_mpipe_context_t *context, _gxio_mpipe_link_name_t name, unsigned int flags) { struct link_open_aux_param temp; @@ -374,7 +374,7 @@ struct link_close_aux_param { int mac; }; -int gxio_mpipe_link_close_aux(gxio_mpipe_context_t * context, int mac) +int gxio_mpipe_link_close_aux(gxio_mpipe_context_t *context, int mac) { struct link_close_aux_param temp; struct link_close_aux_param *params = &temp; @@ -393,7 +393,7 @@ struct link_set_attr_aux_param { int64_t val; }; -int gxio_mpipe_link_set_attr_aux(gxio_mpipe_context_t * context, int mac, +int gxio_mpipe_link_set_attr_aux(gxio_mpipe_context_t *context, int mac, uint32_t attr, int64_t val) { struct link_set_attr_aux_param temp; @@ -415,8 +415,8 @@ struct get_timestamp_aux_param { uint64_t cycles; }; -int gxio_mpipe_get_timestamp_aux(gxio_mpipe_context_t * context, uint64_t * sec, - uint64_t * nsec, uint64_t * cycles) +int gxio_mpipe_get_timestamp_aux(gxio_mpipe_context_t *context, uint64_t *sec, + uint64_t *nsec, uint64_t *cycles) { int __result; struct get_timestamp_aux_param temp; @@ -440,7 +440,7 @@ struct set_timestamp_aux_param { uint64_t cycles; }; -int gxio_mpipe_set_timestamp_aux(gxio_mpipe_context_t * context, uint64_t sec, +int gxio_mpipe_set_timestamp_aux(gxio_mpipe_context_t *context, uint64_t sec, uint64_t nsec, uint64_t cycles) { struct set_timestamp_aux_param temp; @@ -460,8 +460,7 @@ struct adjust_timestamp_aux_param { int64_t nsec; }; -int gxio_mpipe_adjust_timestamp_aux(gxio_mpipe_context_t * context, - int64_t nsec) +int gxio_mpipe_adjust_timestamp_aux(gxio_mpipe_context_t *context, int64_t nsec) { struct adjust_timestamp_aux_param temp; struct adjust_timestamp_aux_param *params = &temp; @@ -475,25 +474,6 @@ int gxio_mpipe_adjust_timestamp_aux(gxio_mpipe_context_t * context, EXPORT_SYMBOL(gxio_mpipe_adjust_timestamp_aux); -struct adjust_timestamp_freq_param { - int32_t ppb; -}; - -int gxio_mpipe_adjust_timestamp_freq(gxio_mpipe_context_t * context, - int32_t ppb) -{ - struct adjust_timestamp_freq_param temp; - struct adjust_timestamp_freq_param *params = &temp; - - params->ppb = ppb; - - return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, - sizeof(*params), - GXIO_MPIPE_OP_ADJUST_TIMESTAMP_FREQ); -} - -EXPORT_SYMBOL(gxio_mpipe_adjust_timestamp_freq); - struct config_edma_ring_blks_param { unsigned int ering; unsigned int max_blks; @@ -501,7 +481,7 @@ struct config_edma_ring_blks_param { unsigned int db; }; -int gxio_mpipe_config_edma_ring_blks(gxio_mpipe_context_t * context, +int gxio_mpipe_config_edma_ring_blks(gxio_mpipe_context_t *context, unsigned int ering, unsigned int max_blks, unsigned int min_snf_blks, unsigned int db) { @@ -520,11 +500,29 @@ int gxio_mpipe_config_edma_ring_blks(gxio_mpipe_context_t * context, EXPORT_SYMBOL(gxio_mpipe_config_edma_ring_blks); +struct adjust_timestamp_freq_param { + int32_t ppb; +}; + +int gxio_mpipe_adjust_timestamp_freq(gxio_mpipe_context_t *context, int32_t ppb) +{ + struct adjust_timestamp_freq_param temp; + struct adjust_timestamp_freq_param *params = &temp; + + params->ppb = ppb; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), + GXIO_MPIPE_OP_ADJUST_TIMESTAMP_FREQ); +} + +EXPORT_SYMBOL(gxio_mpipe_adjust_timestamp_freq); + struct arm_pollfd_param { union iorpc_pollfd pollfd; }; -int gxio_mpipe_arm_pollfd(gxio_mpipe_context_t * context, int pollfd_cookie) +int gxio_mpipe_arm_pollfd(gxio_mpipe_context_t *context, int pollfd_cookie) { struct arm_pollfd_param temp; struct arm_pollfd_param *params = &temp; @@ -541,7 +539,7 @@ struct close_pollfd_param { union iorpc_pollfd pollfd; }; -int gxio_mpipe_close_pollfd(gxio_mpipe_context_t * context, int pollfd_cookie) +int gxio_mpipe_close_pollfd(gxio_mpipe_context_t *context, int pollfd_cookie) { struct close_pollfd_param temp; struct close_pollfd_param *params = &temp; @@ -558,7 +556,7 @@ struct get_mmio_base_param { HV_PTE base; }; -int gxio_mpipe_get_mmio_base(gxio_mpipe_context_t * context, HV_PTE *base) +int gxio_mpipe_get_mmio_base(gxio_mpipe_context_t *context, HV_PTE *base) { int __result; struct get_mmio_base_param temp; @@ -579,7 +577,7 @@ struct check_mmio_offset_param { unsigned long size; }; -int gxio_mpipe_check_mmio_offset(gxio_mpipe_context_t * context, +int gxio_mpipe_check_mmio_offset(gxio_mpipe_context_t *context, unsigned long offset, unsigned long size) { struct check_mmio_offset_param temp; diff --git a/arch/tile/gxio/iorpc_mpipe_info.c b/arch/tile/gxio/iorpc_mpipe_info.c index 64883aabeb9c..77019c6e9b4a 100644 --- a/arch/tile/gxio/iorpc_mpipe_info.c +++ b/arch/tile/gxio/iorpc_mpipe_info.c @@ -15,12 +15,11 @@ /* This file is machine-generated; DO NOT EDIT! */ #include "gxio/iorpc_mpipe_info.h" - struct instance_aux_param { _gxio_mpipe_link_name_t name; }; -int gxio_mpipe_info_instance_aux(gxio_mpipe_info_context_t * context, +int gxio_mpipe_info_instance_aux(gxio_mpipe_info_context_t *context, _gxio_mpipe_link_name_t name) { struct instance_aux_param temp; @@ -39,10 +38,10 @@ struct enumerate_aux_param { _gxio_mpipe_link_mac_t mac; }; -int gxio_mpipe_info_enumerate_aux(gxio_mpipe_info_context_t * context, +int gxio_mpipe_info_enumerate_aux(gxio_mpipe_info_context_t *context, unsigned int idx, - _gxio_mpipe_link_name_t * name, - _gxio_mpipe_link_mac_t * mac) + _gxio_mpipe_link_name_t *name, + _gxio_mpipe_link_mac_t *mac) { int __result; struct enumerate_aux_param temp; @@ -50,7 +49,7 @@ int gxio_mpipe_info_enumerate_aux(gxio_mpipe_info_context_t * context, __result = hv_dev_pread(context->fd, 0, (HV_VirtAddr) params, sizeof(*params), - (((uint64_t) idx << 32) | + (((uint64_t)idx << 32) | GXIO_MPIPE_INFO_OP_ENUMERATE_AUX)); *name = params->name; *mac = params->mac; @@ -64,7 +63,7 @@ struct get_mmio_base_param { HV_PTE base; }; -int gxio_mpipe_info_get_mmio_base(gxio_mpipe_info_context_t * context, +int gxio_mpipe_info_get_mmio_base(gxio_mpipe_info_context_t *context, HV_PTE *base) { int __result; @@ -86,7 +85,7 @@ struct check_mmio_offset_param { unsigned long size; }; -int gxio_mpipe_info_check_mmio_offset(gxio_mpipe_info_context_t * context, +int gxio_mpipe_info_check_mmio_offset(gxio_mpipe_info_context_t *context, unsigned long offset, unsigned long size) { struct check_mmio_offset_param temp; diff --git a/arch/tile/gxio/iorpc_trio.c b/arch/tile/gxio/iorpc_trio.c index da6e18e049c3..1d3cedb9aeb4 100644 --- a/arch/tile/gxio/iorpc_trio.c +++ b/arch/tile/gxio/iorpc_trio.c @@ -21,7 +21,7 @@ struct alloc_asids_param { unsigned int flags; }; -int gxio_trio_alloc_asids(gxio_trio_context_t * context, unsigned int count, +int gxio_trio_alloc_asids(gxio_trio_context_t *context, unsigned int count, unsigned int first, unsigned int flags) { struct alloc_asids_param temp; @@ -44,7 +44,7 @@ struct alloc_memory_maps_param { unsigned int flags; }; -int gxio_trio_alloc_memory_maps(gxio_trio_context_t * context, +int gxio_trio_alloc_memory_maps(gxio_trio_context_t *context, unsigned int count, unsigned int first, unsigned int flags) { @@ -67,7 +67,7 @@ struct alloc_scatter_queues_param { unsigned int flags; }; -int gxio_trio_alloc_scatter_queues(gxio_trio_context_t * context, +int gxio_trio_alloc_scatter_queues(gxio_trio_context_t *context, unsigned int count, unsigned int first, unsigned int flags) { @@ -91,7 +91,7 @@ struct alloc_pio_regions_param { unsigned int flags; }; -int gxio_trio_alloc_pio_regions(gxio_trio_context_t * context, +int gxio_trio_alloc_pio_regions(gxio_trio_context_t *context, unsigned int count, unsigned int first, unsigned int flags) { @@ -115,7 +115,7 @@ struct init_pio_region_aux_param { unsigned int flags; }; -int gxio_trio_init_pio_region_aux(gxio_trio_context_t * context, +int gxio_trio_init_pio_region_aux(gxio_trio_context_t *context, unsigned int pio_region, unsigned int mac, uint32_t bus_address_hi, unsigned int flags) { @@ -145,7 +145,7 @@ struct init_memory_map_mmu_aux_param { unsigned int order_mode; }; -int gxio_trio_init_memory_map_mmu_aux(gxio_trio_context_t * context, +int gxio_trio_init_memory_map_mmu_aux(gxio_trio_context_t *context, unsigned int map, unsigned long va, uint64_t size, unsigned int asid, unsigned int mac, uint64_t bus_address, @@ -175,7 +175,7 @@ struct get_port_property_param { struct pcie_trio_ports_property trio_ports; }; -int gxio_trio_get_port_property(gxio_trio_context_t * context, +int gxio_trio_get_port_property(gxio_trio_context_t *context, struct pcie_trio_ports_property *trio_ports) { int __result; @@ -198,7 +198,7 @@ struct config_legacy_intr_param { unsigned int intx; }; -int gxio_trio_config_legacy_intr(gxio_trio_context_t * context, int inter_x, +int gxio_trio_config_legacy_intr(gxio_trio_context_t *context, int inter_x, int inter_y, int inter_ipi, int inter_event, unsigned int mac, unsigned int intx) { @@ -227,7 +227,7 @@ struct config_msi_intr_param { unsigned int asid; }; -int gxio_trio_config_msi_intr(gxio_trio_context_t * context, int inter_x, +int gxio_trio_config_msi_intr(gxio_trio_context_t *context, int inter_x, int inter_y, int inter_ipi, int inter_event, unsigned int mac, unsigned int mem_map, uint64_t mem_map_base, uint64_t mem_map_limit, @@ -259,7 +259,7 @@ struct set_mps_mrs_param { unsigned int mac; }; -int gxio_trio_set_mps_mrs(gxio_trio_context_t * context, uint16_t mps, +int gxio_trio_set_mps_mrs(gxio_trio_context_t *context, uint16_t mps, uint16_t mrs, unsigned int mac) { struct set_mps_mrs_param temp; @@ -279,7 +279,7 @@ struct force_rc_link_up_param { unsigned int mac; }; -int gxio_trio_force_rc_link_up(gxio_trio_context_t * context, unsigned int mac) +int gxio_trio_force_rc_link_up(gxio_trio_context_t *context, unsigned int mac) { struct force_rc_link_up_param temp; struct force_rc_link_up_param *params = &temp; @@ -296,7 +296,7 @@ struct force_ep_link_up_param { unsigned int mac; }; -int gxio_trio_force_ep_link_up(gxio_trio_context_t * context, unsigned int mac) +int gxio_trio_force_ep_link_up(gxio_trio_context_t *context, unsigned int mac) { struct force_ep_link_up_param temp; struct force_ep_link_up_param *params = &temp; @@ -313,7 +313,7 @@ struct get_mmio_base_param { HV_PTE base; }; -int gxio_trio_get_mmio_base(gxio_trio_context_t * context, HV_PTE *base) +int gxio_trio_get_mmio_base(gxio_trio_context_t *context, HV_PTE *base) { int __result; struct get_mmio_base_param temp; @@ -334,7 +334,7 @@ struct check_mmio_offset_param { unsigned long size; }; -int gxio_trio_check_mmio_offset(gxio_trio_context_t * context, +int gxio_trio_check_mmio_offset(gxio_trio_context_t *context, unsigned long offset, unsigned long size) { struct check_mmio_offset_param temp; diff --git a/arch/tile/gxio/iorpc_usb_host.c b/arch/tile/gxio/iorpc_usb_host.c index cf3c3cc12204..9c820073bfc0 100644 --- a/arch/tile/gxio/iorpc_usb_host.c +++ b/arch/tile/gxio/iorpc_usb_host.c @@ -19,7 +19,7 @@ struct cfg_interrupt_param { union iorpc_interrupt interrupt; }; -int gxio_usb_host_cfg_interrupt(gxio_usb_host_context_t * context, int inter_x, +int gxio_usb_host_cfg_interrupt(gxio_usb_host_context_t *context, int inter_x, int inter_y, int inter_ipi, int inter_event) { struct cfg_interrupt_param temp; @@ -41,7 +41,7 @@ struct register_client_memory_param { unsigned int flags; }; -int gxio_usb_host_register_client_memory(gxio_usb_host_context_t * context, +int gxio_usb_host_register_client_memory(gxio_usb_host_context_t *context, HV_PTE pte, unsigned int flags) { struct register_client_memory_param temp; @@ -61,7 +61,7 @@ struct get_mmio_base_param { HV_PTE base; }; -int gxio_usb_host_get_mmio_base(gxio_usb_host_context_t * context, HV_PTE *base) +int gxio_usb_host_get_mmio_base(gxio_usb_host_context_t *context, HV_PTE *base) { int __result; struct get_mmio_base_param temp; @@ -82,7 +82,7 @@ struct check_mmio_offset_param { unsigned long size; }; -int gxio_usb_host_check_mmio_offset(gxio_usb_host_context_t * context, +int gxio_usb_host_check_mmio_offset(gxio_usb_host_context_t *context, unsigned long offset, unsigned long size) { struct check_mmio_offset_param temp; diff --git a/arch/tile/gxio/usb_host.c b/arch/tile/gxio/usb_host.c index 66b002f54ecc..785afad7922e 100644 --- a/arch/tile/gxio/usb_host.c +++ b/arch/tile/gxio/usb_host.c @@ -26,7 +26,7 @@ #include <gxio/kiorpc.h> #include <gxio/usb_host.h> -int gxio_usb_host_init(gxio_usb_host_context_t * context, int usb_index, +int gxio_usb_host_init(gxio_usb_host_context_t *context, int usb_index, int is_ehci) { char file[32]; @@ -63,7 +63,7 @@ int gxio_usb_host_init(gxio_usb_host_context_t * context, int usb_index, EXPORT_SYMBOL_GPL(gxio_usb_host_init); -int gxio_usb_host_destroy(gxio_usb_host_context_t * context) +int gxio_usb_host_destroy(gxio_usb_host_context_t *context) { iounmap((void __force __iomem *)(context->mmio_base)); hv_dev_close(context->fd); @@ -76,14 +76,14 @@ int gxio_usb_host_destroy(gxio_usb_host_context_t * context) EXPORT_SYMBOL_GPL(gxio_usb_host_destroy); -void *gxio_usb_host_get_reg_start(gxio_usb_host_context_t * context) +void *gxio_usb_host_get_reg_start(gxio_usb_host_context_t *context) { return context->mmio_base; } EXPORT_SYMBOL_GPL(gxio_usb_host_get_reg_start); -size_t gxio_usb_host_get_reg_len(gxio_usb_host_context_t * context) +size_t gxio_usb_host_get_reg_len(gxio_usb_host_context_t *context) { return HV_USB_HOST_MMIO_SIZE; } diff --git a/arch/tile/include/arch/mpipe.h b/arch/tile/include/arch/mpipe.h index 8a33912fd6cc..904538e754d8 100644 --- a/arch/tile/include/arch/mpipe.h +++ b/arch/tile/include/arch/mpipe.h @@ -176,7 +176,18 @@ typedef union */ uint_reg_t stack_idx : 5; /* Reserved. */ - uint_reg_t __reserved_2 : 5; + uint_reg_t __reserved_2 : 3; + /* + * Instance ID. For devices that support automatic buffer return between + * mPIPE instances, this field indicates the buffer owner. If the INST + * field does not match the mPIPE's instance number when a packet is + * egressed, buffers with HWB set will be returned to the other mPIPE + * instance. Note that not all devices support multi-mPIPE buffer + * return. The MPIPE_EDMA_INFO.REMOTE_BUFF_RTN_SUPPORT bit indicates + * whether the INST field in the buffer descriptor is populated by iDMA + * hardware. This field is ignored on writes. + */ + uint_reg_t inst : 2; /* * Reads as one to indicate that this is a hardware managed buffer. * Ignored on writes since all buffers on a given stack are the same size. @@ -205,7 +216,8 @@ typedef union uint_reg_t c : 2; uint_reg_t size : 3; uint_reg_t hwb : 1; - uint_reg_t __reserved_2 : 5; + uint_reg_t inst : 2; + uint_reg_t __reserved_2 : 3; uint_reg_t stack_idx : 5; uint_reg_t __reserved_1 : 6; int_reg_t va : 35; @@ -231,9 +243,9 @@ typedef union /* Reserved. */ uint_reg_t __reserved_0 : 3; /* eDMA ring being accessed */ - uint_reg_t ring : 5; + uint_reg_t ring : 6; /* Reserved. */ - uint_reg_t __reserved_1 : 18; + uint_reg_t __reserved_1 : 17; /* * This field of the address selects the region (address space) to be * accessed. For the egress DMA post region, this field must be 5. @@ -250,8 +262,8 @@ typedef union uint_reg_t svc_dom : 5; uint_reg_t __reserved_2 : 6; uint_reg_t region : 3; - uint_reg_t __reserved_1 : 18; - uint_reg_t ring : 5; + uint_reg_t __reserved_1 : 17; + uint_reg_t ring : 6; uint_reg_t __reserved_0 : 3; #endif }; diff --git a/arch/tile/include/arch/mpipe_constants.h b/arch/tile/include/arch/mpipe_constants.h index 410a0400e055..84022ac5fe82 100644 --- a/arch/tile/include/arch/mpipe_constants.h +++ b/arch/tile/include/arch/mpipe_constants.h @@ -16,13 +16,13 @@ #ifndef __ARCH_MPIPE_CONSTANTS_H__ #define __ARCH_MPIPE_CONSTANTS_H__ -#define MPIPE_NUM_CLASSIFIERS 10 +#define MPIPE_NUM_CLASSIFIERS 16 #define MPIPE_CLS_MHZ 1200 -#define MPIPE_NUM_EDMA_RINGS 32 +#define MPIPE_NUM_EDMA_RINGS 64 #define MPIPE_NUM_SGMII_MACS 16 -#define MPIPE_NUM_XAUI_MACS 4 +#define MPIPE_NUM_XAUI_MACS 16 #define MPIPE_NUM_LOOPBACK_CHANNELS 4 #define MPIPE_NUM_NON_LB_CHANNELS 28 diff --git a/arch/tile/include/arch/mpipe_shm.h b/arch/tile/include/arch/mpipe_shm.h index f2e9e122818d..13b3c4300e50 100644 --- a/arch/tile/include/arch/mpipe_shm.h +++ b/arch/tile/include/arch/mpipe_shm.h @@ -44,8 +44,14 @@ typedef union * descriptors toggles each time the ring tail pointer wraps. */ uint_reg_t gen : 1; + /** + * For devices with EDMA reorder support, this field allows the + * descriptor to select the egress FIFO. The associated DMA ring must + * have ALLOW_EFIFO_SEL enabled. + */ + uint_reg_t efifo_sel : 6; /** Reserved. Must be zero. */ - uint_reg_t r0 : 7; + uint_reg_t r0 : 1; /** Checksum generation enabled for this transfer. */ uint_reg_t csum : 1; /** @@ -110,7 +116,8 @@ typedef union uint_reg_t notif : 1; uint_reg_t ns : 1; uint_reg_t csum : 1; - uint_reg_t r0 : 7; + uint_reg_t r0 : 1; + uint_reg_t efifo_sel : 6; uint_reg_t gen : 1; #endif @@ -126,14 +133,16 @@ typedef union /** Reserved. */ uint_reg_t __reserved_1 : 3; /** - * Instance ID. For devices that support more than one mPIPE instance, - * this field indicates the buffer owner. If the INST field does not - * match the mPIPE's instance number when a packet is egressed, buffers - * with HWB set will be returned to the other mPIPE instance. + * Instance ID. For devices that support automatic buffer return between + * mPIPE instances, this field indicates the buffer owner. If the INST + * field does not match the mPIPE's instance number when a packet is + * egressed, buffers with HWB set will be returned to the other mPIPE + * instance. Note that not all devices support multi-mPIPE buffer + * return. The MPIPE_EDMA_INFO.REMOTE_BUFF_RTN_SUPPORT bit indicates + * whether the INST field in the buffer descriptor is populated by iDMA + * hardware. */ - uint_reg_t inst : 1; - /** Reserved. */ - uint_reg_t __reserved_2 : 1; + uint_reg_t inst : 2; /** * Always set to one by hardware in iDMA packet descriptors. For eDMA, * indicates whether the buffer will be released to the buffer stack @@ -166,8 +175,7 @@ typedef union uint_reg_t c : 2; uint_reg_t size : 3; uint_reg_t hwb : 1; - uint_reg_t __reserved_2 : 1; - uint_reg_t inst : 1; + uint_reg_t inst : 2; uint_reg_t __reserved_1 : 3; uint_reg_t stack_idx : 5; uint_reg_t __reserved_0 : 6; @@ -408,7 +416,10 @@ typedef union /** * Sequence number applied when packet is distributed. Classifier * selects which sequence number is to be applied by writing the 13-bit - * SQN-selector into this field. + * SQN-selector into this field. For devices that support EXT_SQN (as + * indicated in IDMA_INFO.EXT_SQN_SUPPORT), the GP_SQN can be extended to + * 32-bits via the IDMA_CTL.EXT_SQN register. In this case the + * PACKET_SQN will be reduced to 32 bits. */ uint_reg_t gp_sqn : 16; /** @@ -451,14 +462,16 @@ typedef union /** Reserved. */ uint_reg_t __reserved_5 : 3; /** - * Instance ID. For devices that support more than one mPIPE instance, - * this field indicates the buffer owner. If the INST field does not - * match the mPIPE's instance number when a packet is egressed, buffers - * with HWB set will be returned to the other mPIPE instance. + * Instance ID. For devices that support automatic buffer return between + * mPIPE instances, this field indicates the buffer owner. If the INST + * field does not match the mPIPE's instance number when a packet is + * egressed, buffers with HWB set will be returned to the other mPIPE + * instance. Note that not all devices support multi-mPIPE buffer + * return. The MPIPE_EDMA_INFO.REMOTE_BUFF_RTN_SUPPORT bit indicates + * whether the INST field in the buffer descriptor is populated by iDMA + * hardware. */ - uint_reg_t inst : 1; - /** Reserved. */ - uint_reg_t __reserved_6 : 1; + uint_reg_t inst : 2; /** * Always set to one by hardware in iDMA packet descriptors. For eDMA, * indicates whether the buffer will be released to the buffer stack @@ -491,8 +504,7 @@ typedef union uint_reg_t c : 2; uint_reg_t size : 3; uint_reg_t hwb : 1; - uint_reg_t __reserved_6 : 1; - uint_reg_t inst : 1; + uint_reg_t inst : 2; uint_reg_t __reserved_5 : 3; uint_reg_t stack_idx : 5; uint_reg_t __reserved_4 : 6; diff --git a/arch/tile/include/arch/trio_constants.h b/arch/tile/include/arch/trio_constants.h index 628b045436b8..85647e91a458 100644 --- a/arch/tile/include/arch/trio_constants.h +++ b/arch/tile/include/arch/trio_constants.h @@ -16,21 +16,21 @@ #ifndef __ARCH_TRIO_CONSTANTS_H__ #define __ARCH_TRIO_CONSTANTS_H__ -#define TRIO_NUM_ASIDS 16 +#define TRIO_NUM_ASIDS 32 #define TRIO_NUM_TLBS_PER_ASID 16 #define TRIO_NUM_TPIO_REGIONS 8 #define TRIO_LOG2_NUM_TPIO_REGIONS 3 -#define TRIO_NUM_MAP_MEM_REGIONS 16 -#define TRIO_LOG2_NUM_MAP_MEM_REGIONS 4 +#define TRIO_NUM_MAP_MEM_REGIONS 32 +#define TRIO_LOG2_NUM_MAP_MEM_REGIONS 5 #define TRIO_NUM_MAP_SQ_REGIONS 8 #define TRIO_LOG2_NUM_MAP_SQ_REGIONS 3 #define TRIO_LOG2_NUM_SQ_FIFO_ENTRIES 6 -#define TRIO_NUM_PUSH_DMA_RINGS 32 +#define TRIO_NUM_PUSH_DMA_RINGS 64 -#define TRIO_NUM_PULL_DMA_RINGS 32 +#define TRIO_NUM_PULL_DMA_RINGS 64 #endif /* __ARCH_TRIO_CONSTANTS_H__ */ diff --git a/arch/tile/include/asm/page.h b/arch/tile/include/asm/page.h index 6346888f7bdc..672768008618 100644 --- a/arch/tile/include/asm/page.h +++ b/arch/tile/include/asm/page.h @@ -182,10 +182,9 @@ static inline __attribute_const__ int get_order(unsigned long size) #define PAGE_OFFSET (-(_AC(1, UL) << (MAX_VA_WIDTH - 1))) #define KERNEL_HIGH_VADDR _AC(0xfffffff800000000, UL) /* high 32GB */ -#define FIXADDR_BASE (KERNEL_HIGH_VADDR - 0x400000000) /* 4 GB */ -#define FIXADDR_TOP (KERNEL_HIGH_VADDR - 0x300000000) /* 4 GB */ +#define FIXADDR_BASE (KERNEL_HIGH_VADDR - 0x300000000) /* 4 GB */ +#define FIXADDR_TOP (KERNEL_HIGH_VADDR - 0x200000000) /* 4 GB */ #define _VMALLOC_START FIXADDR_TOP -#define HUGE_VMAP_BASE (KERNEL_HIGH_VADDR - 0x200000000) /* 4 GB */ #define MEM_SV_START (KERNEL_HIGH_VADDR - 0x100000000) /* 256 MB */ #define MEM_MODULE_START (MEM_SV_START + (256*1024*1024)) /* 256 MB */ #define MEM_MODULE_END (MEM_MODULE_START + (256*1024*1024)) diff --git a/arch/tile/include/asm/pgtable_32.h b/arch/tile/include/asm/pgtable_32.h index 63142ab3b3dd..d26a42279036 100644 --- a/arch/tile/include/asm/pgtable_32.h +++ b/arch/tile/include/asm/pgtable_32.h @@ -55,17 +55,9 @@ #define PKMAP_BASE ((FIXADDR_BOOT_START - PAGE_SIZE*LAST_PKMAP) & PGDIR_MASK) #ifdef CONFIG_HIGHMEM -# define __VMAPPING_END (PKMAP_BASE & ~(HPAGE_SIZE-1)) +# define _VMALLOC_END (PKMAP_BASE & ~(HPAGE_SIZE-1)) #else -# define __VMAPPING_END (FIXADDR_START & ~(HPAGE_SIZE-1)) -#endif - -#ifdef CONFIG_HUGEVMAP -#define HUGE_VMAP_END __VMAPPING_END -#define HUGE_VMAP_BASE (HUGE_VMAP_END - CONFIG_NR_HUGE_VMAPS * HPAGE_SIZE) -#define _VMALLOC_END HUGE_VMAP_BASE -#else -#define _VMALLOC_END __VMAPPING_END +# define _VMALLOC_END (FIXADDR_START & ~(HPAGE_SIZE-1)) #endif /* diff --git a/arch/tile/include/asm/pgtable_64.h b/arch/tile/include/asm/pgtable_64.h index 3421177f7370..2c8a9cd102d3 100644 --- a/arch/tile/include/asm/pgtable_64.h +++ b/arch/tile/include/asm/pgtable_64.h @@ -52,12 +52,10 @@ * memory allocation code). The vmalloc code puts in an internal * guard page between each allocation. */ -#define _VMALLOC_END HUGE_VMAP_BASE +#define _VMALLOC_END MEM_SV_START #define VMALLOC_END _VMALLOC_END #define VMALLOC_START _VMALLOC_START -#define HUGE_VMAP_END (HUGE_VMAP_BASE + PGDIR_SIZE) - #ifndef __ASSEMBLY__ /* We have no pud since we are a three-level page table. */ diff --git a/arch/tile/include/gxio/iorpc_mpipe.h b/arch/tile/include/gxio/iorpc_mpipe.h index fdd07f88cfd7..4cda03de734f 100644 --- a/arch/tile/include/gxio/iorpc_mpipe.h +++ b/arch/tile/include/gxio/iorpc_mpipe.h @@ -56,89 +56,89 @@ #define GXIO_MPIPE_OP_GET_MMIO_BASE IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8000) #define GXIO_MPIPE_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001) -int gxio_mpipe_alloc_buffer_stacks(gxio_mpipe_context_t * context, +int gxio_mpipe_alloc_buffer_stacks(gxio_mpipe_context_t *context, unsigned int count, unsigned int first, unsigned int flags); -int gxio_mpipe_init_buffer_stack_aux(gxio_mpipe_context_t * context, +int gxio_mpipe_init_buffer_stack_aux(gxio_mpipe_context_t *context, void *mem_va, size_t mem_size, unsigned int mem_flags, unsigned int stack, unsigned int buffer_size_enum); -int gxio_mpipe_alloc_notif_rings(gxio_mpipe_context_t * context, +int gxio_mpipe_alloc_notif_rings(gxio_mpipe_context_t *context, unsigned int count, unsigned int first, unsigned int flags); -int gxio_mpipe_init_notif_ring_aux(gxio_mpipe_context_t * context, void *mem_va, +int gxio_mpipe_init_notif_ring_aux(gxio_mpipe_context_t *context, void *mem_va, size_t mem_size, unsigned int mem_flags, unsigned int ring); -int gxio_mpipe_request_notif_ring_interrupt(gxio_mpipe_context_t * context, +int gxio_mpipe_request_notif_ring_interrupt(gxio_mpipe_context_t *context, int inter_x, int inter_y, int inter_ipi, int inter_event, unsigned int ring); -int gxio_mpipe_enable_notif_ring_interrupt(gxio_mpipe_context_t * context, +int gxio_mpipe_enable_notif_ring_interrupt(gxio_mpipe_context_t *context, unsigned int ring); -int gxio_mpipe_alloc_notif_groups(gxio_mpipe_context_t * context, +int gxio_mpipe_alloc_notif_groups(gxio_mpipe_context_t *context, unsigned int count, unsigned int first, unsigned int flags); -int gxio_mpipe_init_notif_group(gxio_mpipe_context_t * context, +int gxio_mpipe_init_notif_group(gxio_mpipe_context_t *context, unsigned int group, gxio_mpipe_notif_group_bits_t bits); -int gxio_mpipe_alloc_buckets(gxio_mpipe_context_t * context, unsigned int count, +int gxio_mpipe_alloc_buckets(gxio_mpipe_context_t *context, unsigned int count, unsigned int first, unsigned int flags); -int gxio_mpipe_init_bucket(gxio_mpipe_context_t * context, unsigned int bucket, +int gxio_mpipe_init_bucket(gxio_mpipe_context_t *context, unsigned int bucket, MPIPE_LBL_INIT_DAT_BSTS_TBL_t bucket_info); -int gxio_mpipe_alloc_edma_rings(gxio_mpipe_context_t * context, +int gxio_mpipe_alloc_edma_rings(gxio_mpipe_context_t *context, unsigned int count, unsigned int first, unsigned int flags); -int gxio_mpipe_init_edma_ring_aux(gxio_mpipe_context_t * context, void *mem_va, +int gxio_mpipe_init_edma_ring_aux(gxio_mpipe_context_t *context, void *mem_va, size_t mem_size, unsigned int mem_flags, unsigned int ring, unsigned int channel); -int gxio_mpipe_commit_rules(gxio_mpipe_context_t * context, const void *blob, +int gxio_mpipe_commit_rules(gxio_mpipe_context_t *context, const void *blob, size_t blob_size); -int gxio_mpipe_register_client_memory(gxio_mpipe_context_t * context, +int gxio_mpipe_register_client_memory(gxio_mpipe_context_t *context, unsigned int iotlb, HV_PTE pte, unsigned int flags); -int gxio_mpipe_link_open_aux(gxio_mpipe_context_t * context, +int gxio_mpipe_link_open_aux(gxio_mpipe_context_t *context, _gxio_mpipe_link_name_t name, unsigned int flags); -int gxio_mpipe_link_close_aux(gxio_mpipe_context_t * context, int mac); +int gxio_mpipe_link_close_aux(gxio_mpipe_context_t *context, int mac); -int gxio_mpipe_link_set_attr_aux(gxio_mpipe_context_t * context, int mac, +int gxio_mpipe_link_set_attr_aux(gxio_mpipe_context_t *context, int mac, uint32_t attr, int64_t val); -int gxio_mpipe_get_timestamp_aux(gxio_mpipe_context_t * context, uint64_t * sec, - uint64_t * nsec, uint64_t * cycles); +int gxio_mpipe_get_timestamp_aux(gxio_mpipe_context_t *context, uint64_t *sec, + uint64_t *nsec, uint64_t *cycles); -int gxio_mpipe_set_timestamp_aux(gxio_mpipe_context_t * context, uint64_t sec, +int gxio_mpipe_set_timestamp_aux(gxio_mpipe_context_t *context, uint64_t sec, uint64_t nsec, uint64_t cycles); -int gxio_mpipe_adjust_timestamp_aux(gxio_mpipe_context_t * context, +int gxio_mpipe_adjust_timestamp_aux(gxio_mpipe_context_t *context, int64_t nsec); -int gxio_mpipe_adjust_timestamp_freq(gxio_mpipe_context_t * context, +int gxio_mpipe_adjust_timestamp_freq(gxio_mpipe_context_t *context, int32_t ppb); -int gxio_mpipe_arm_pollfd(gxio_mpipe_context_t * context, int pollfd_cookie); +int gxio_mpipe_arm_pollfd(gxio_mpipe_context_t *context, int pollfd_cookie); -int gxio_mpipe_close_pollfd(gxio_mpipe_context_t * context, int pollfd_cookie); +int gxio_mpipe_close_pollfd(gxio_mpipe_context_t *context, int pollfd_cookie); -int gxio_mpipe_get_mmio_base(gxio_mpipe_context_t * context, HV_PTE *base); +int gxio_mpipe_get_mmio_base(gxio_mpipe_context_t *context, HV_PTE *base); -int gxio_mpipe_check_mmio_offset(gxio_mpipe_context_t * context, +int gxio_mpipe_check_mmio_offset(gxio_mpipe_context_t *context, unsigned long offset, unsigned long size); #endif /* !__GXIO_MPIPE_LINUX_RPC_H__ */ diff --git a/arch/tile/include/gxio/iorpc_mpipe_info.h b/arch/tile/include/gxio/iorpc_mpipe_info.h index 476c5e5ca22c..f0b04284468b 100644 --- a/arch/tile/include/gxio/iorpc_mpipe_info.h +++ b/arch/tile/include/gxio/iorpc_mpipe_info.h @@ -33,18 +33,18 @@ #define GXIO_MPIPE_INFO_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001) -int gxio_mpipe_info_instance_aux(gxio_mpipe_info_context_t * context, +int gxio_mpipe_info_instance_aux(gxio_mpipe_info_context_t *context, _gxio_mpipe_link_name_t name); -int gxio_mpipe_info_enumerate_aux(gxio_mpipe_info_context_t * context, +int gxio_mpipe_info_enumerate_aux(gxio_mpipe_info_context_t *context, unsigned int idx, - _gxio_mpipe_link_name_t * name, - _gxio_mpipe_link_mac_t * mac); + _gxio_mpipe_link_name_t *name, + _gxio_mpipe_link_mac_t *mac); -int gxio_mpipe_info_get_mmio_base(gxio_mpipe_info_context_t * context, +int gxio_mpipe_info_get_mmio_base(gxio_mpipe_info_context_t *context, HV_PTE *base); -int gxio_mpipe_info_check_mmio_offset(gxio_mpipe_info_context_t * context, +int gxio_mpipe_info_check_mmio_offset(gxio_mpipe_info_context_t *context, unsigned long offset, unsigned long size); #endif /* !__GXIO_MPIPE_INFO_LINUX_RPC_H__ */ diff --git a/arch/tile/include/gxio/iorpc_trio.h b/arch/tile/include/gxio/iorpc_trio.h index d95b96fd6c93..376a4f771167 100644 --- a/arch/tile/include/gxio/iorpc_trio.h +++ b/arch/tile/include/gxio/iorpc_trio.h @@ -46,59 +46,59 @@ #define GXIO_TRIO_OP_GET_MMIO_BASE IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8000) #define GXIO_TRIO_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001) -int gxio_trio_alloc_asids(gxio_trio_context_t * context, unsigned int count, +int gxio_trio_alloc_asids(gxio_trio_context_t *context, unsigned int count, unsigned int first, unsigned int flags); -int gxio_trio_alloc_memory_maps(gxio_trio_context_t * context, +int gxio_trio_alloc_memory_maps(gxio_trio_context_t *context, unsigned int count, unsigned int first, unsigned int flags); -int gxio_trio_alloc_scatter_queues(gxio_trio_context_t * context, +int gxio_trio_alloc_scatter_queues(gxio_trio_context_t *context, unsigned int count, unsigned int first, unsigned int flags); -int gxio_trio_alloc_pio_regions(gxio_trio_context_t * context, +int gxio_trio_alloc_pio_regions(gxio_trio_context_t *context, unsigned int count, unsigned int first, unsigned int flags); -int gxio_trio_init_pio_region_aux(gxio_trio_context_t * context, +int gxio_trio_init_pio_region_aux(gxio_trio_context_t *context, unsigned int pio_region, unsigned int mac, uint32_t bus_address_hi, unsigned int flags); -int gxio_trio_init_memory_map_mmu_aux(gxio_trio_context_t * context, +int gxio_trio_init_memory_map_mmu_aux(gxio_trio_context_t *context, unsigned int map, unsigned long va, uint64_t size, unsigned int asid, unsigned int mac, uint64_t bus_address, unsigned int node, unsigned int order_mode); -int gxio_trio_get_port_property(gxio_trio_context_t * context, +int gxio_trio_get_port_property(gxio_trio_context_t *context, struct pcie_trio_ports_property *trio_ports); -int gxio_trio_config_legacy_intr(gxio_trio_context_t * context, int inter_x, +int gxio_trio_config_legacy_intr(gxio_trio_context_t *context, int inter_x, int inter_y, int inter_ipi, int inter_event, unsigned int mac, unsigned int intx); -int gxio_trio_config_msi_intr(gxio_trio_context_t * context, int inter_x, +int gxio_trio_config_msi_intr(gxio_trio_context_t *context, int inter_x, int inter_y, int inter_ipi, int inter_event, unsigned int mac, unsigned int mem_map, uint64_t mem_map_base, uint64_t mem_map_limit, unsigned int asid); -int gxio_trio_set_mps_mrs(gxio_trio_context_t * context, uint16_t mps, +int gxio_trio_set_mps_mrs(gxio_trio_context_t *context, uint16_t mps, uint16_t mrs, unsigned int mac); -int gxio_trio_force_rc_link_up(gxio_trio_context_t * context, unsigned int mac); +int gxio_trio_force_rc_link_up(gxio_trio_context_t *context, unsigned int mac); -int gxio_trio_force_ep_link_up(gxio_trio_context_t * context, unsigned int mac); +int gxio_trio_force_ep_link_up(gxio_trio_context_t *context, unsigned int mac); -int gxio_trio_get_mmio_base(gxio_trio_context_t * context, HV_PTE *base); +int gxio_trio_get_mmio_base(gxio_trio_context_t *context, HV_PTE *base); -int gxio_trio_check_mmio_offset(gxio_trio_context_t * context, +int gxio_trio_check_mmio_offset(gxio_trio_context_t *context, unsigned long offset, unsigned long size); #endif /* !__GXIO_TRIO_LINUX_RPC_H__ */ diff --git a/arch/tile/include/gxio/iorpc_usb_host.h b/arch/tile/include/gxio/iorpc_usb_host.h index 8622e7d126ad..79962a97de8e 100644 --- a/arch/tile/include/gxio/iorpc_usb_host.h +++ b/arch/tile/include/gxio/iorpc_usb_host.h @@ -31,16 +31,16 @@ #define GXIO_USB_HOST_OP_GET_MMIO_BASE IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8000) #define GXIO_USB_HOST_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001) -int gxio_usb_host_cfg_interrupt(gxio_usb_host_context_t * context, int inter_x, +int gxio_usb_host_cfg_interrupt(gxio_usb_host_context_t *context, int inter_x, int inter_y, int inter_ipi, int inter_event); -int gxio_usb_host_register_client_memory(gxio_usb_host_context_t * context, +int gxio_usb_host_register_client_memory(gxio_usb_host_context_t *context, HV_PTE pte, unsigned int flags); -int gxio_usb_host_get_mmio_base(gxio_usb_host_context_t * context, +int gxio_usb_host_get_mmio_base(gxio_usb_host_context_t *context, HV_PTE *base); -int gxio_usb_host_check_mmio_offset(gxio_usb_host_context_t * context, +int gxio_usb_host_check_mmio_offset(gxio_usb_host_context_t *context, unsigned long offset, unsigned long size); #endif /* !__GXIO_USB_HOST_LINUX_RPC_H__ */ diff --git a/arch/tile/include/gxio/usb_host.h b/arch/tile/include/gxio/usb_host.h index 5eedec0e988e..93c9636d2dd7 100644 --- a/arch/tile/include/gxio/usb_host.h +++ b/arch/tile/include/gxio/usb_host.h @@ -53,7 +53,7 @@ typedef struct { * @return Zero if the context was successfully initialized, else a * GXIO_ERR_xxx error code. */ -extern int gxio_usb_host_init(gxio_usb_host_context_t * context, int usb_index, +extern int gxio_usb_host_init(gxio_usb_host_context_t *context, int usb_index, int is_ehci); /* Destroy a USB context. @@ -68,20 +68,20 @@ extern int gxio_usb_host_init(gxio_usb_host_context_t * context, int usb_index, * @return Zero if the context was successfully destroyed, else a * GXIO_ERR_xxx error code. */ -extern int gxio_usb_host_destroy(gxio_usb_host_context_t * context); +extern int gxio_usb_host_destroy(gxio_usb_host_context_t *context); /* Retrieve the address of the shim's MMIO registers. * * @param context Pointer to a properly initialized gxio_usb_host_context_t. * @return The address of the shim's MMIO registers. */ -extern void *gxio_usb_host_get_reg_start(gxio_usb_host_context_t * context); +extern void *gxio_usb_host_get_reg_start(gxio_usb_host_context_t *context); /* Retrieve the length of the shim's MMIO registers. * * @param context Pointer to a properly initialized gxio_usb_host_context_t. * @return The length of the shim's MMIO registers. */ -extern size_t gxio_usb_host_get_reg_len(gxio_usb_host_context_t * context); +extern size_t gxio_usb_host_get_reg_len(gxio_usb_host_context_t *context); #endif /* _GXIO_USB_H_ */ diff --git a/arch/tile/kernel/compat.c b/arch/tile/kernel/compat.c index ed378416b86a..49120843ff96 100644 --- a/arch/tile/kernel/compat.c +++ b/arch/tile/kernel/compat.c @@ -84,7 +84,7 @@ COMPAT_SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned int, offset_high, { return sys_llseek(fd, offset_high, offset_low, result, origin); } - + /* Provide the compat syscall number to call mapping. */ #undef __SYSCALL #define __SYSCALL(nr, call) [nr] = (call), diff --git a/arch/tile/kernel/futex_64.S b/arch/tile/kernel/futex_64.S deleted file mode 100644 index f465d1eda20f..000000000000 --- a/arch/tile/kernel/futex_64.S +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright 2011 Tilera Corporation. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for - * more details. - * - * Atomically access user memory, but use MMU to avoid propagating - * kernel exceptions. - */ - -#include <linux/linkage.h> -#include <asm/errno.h> -#include <asm/futex.h> -#include <asm/page.h> -#include <asm/processor.h> - -/* - * Provide a set of atomic memory operations supporting <asm/futex.h>. - * - * r0: user address to manipulate - * r1: new value to write, or for cmpxchg, old value to compare against - * r2: (cmpxchg only) new value to write - * - * Return __get_user struct, r0 with value, r1 with error. - */ -#define FUTEX_OP(name, ...) \ -STD_ENTRY(futex_##name) \ - __VA_ARGS__; \ - { \ - move r1, zero; \ - jrp lr \ - }; \ - STD_ENDPROC(futex_##name); \ - .pushsection __ex_table,"a"; \ - .quad 1b, get_user_fault; \ - .popsection - - .pushsection .fixup,"ax" -get_user_fault: - { movei r1, -EFAULT; jrp lr } - ENDPROC(get_user_fault) - .popsection - -FUTEX_OP(cmpxchg, mtspr CMPEXCH_VALUE, r1; 1: cmpexch4 r0, r0, r2) -FUTEX_OP(set, 1: exch4 r0, r0, r1) -FUTEX_OP(add, 1: fetchadd4 r0, r0, r1) -FUTEX_OP(or, 1: fetchor4 r0, r0, r1) -FUTEX_OP(andn, nor r1, r1, zero; 1: fetchand4 r0, r0, r1) diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index 4c34caea9dd3..74c91729a62a 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -1268,8 +1268,7 @@ static void __init validate_va(void) if ((long)VMALLOC_START >= 0) early_panic( "Linux VMALLOC region below the 2GB line (%#lx)!\n" - "Reconfigure the kernel with fewer NR_HUGE_VMAPS\n" - "or smaller VMALLOC_RESERVE.\n", + "Reconfigure the kernel with smaller VMALLOC_RESERVE.\n", VMALLOC_START); #endif } diff --git a/arch/tile/kernel/unaligned.c b/arch/tile/kernel/unaligned.c index b425fb6a480d..b030b4e78845 100644 --- a/arch/tile/kernel/unaligned.c +++ b/arch/tile/kernel/unaligned.c @@ -551,8 +551,8 @@ static tilegx_bundle_bits jit_x1_bnezt(int ra, int broff) /* * This function generates unalign fixup JIT. * - * We fist find unalign load/store instruction's destination, source - * reguisters: ra, rb and rd. and 3 scratch registers by calling + * We first find unalign load/store instruction's destination, source + * registers: ra, rb and rd. and 3 scratch registers by calling * find_regs(...). 3 scratch clobbers should not alias with any register * used in the fault bundle. Then analyze the fault bundle to determine * if it's a load or store, operand width, branch or address increment etc. diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c index 4c288f199453..6c0571216a9d 100644 --- a/arch/tile/mm/fault.c +++ b/arch/tile/mm/fault.c @@ -149,8 +149,6 @@ static inline int vmalloc_fault(pgd_t *pgd, unsigned long address) pmd_k = vmalloc_sync_one(pgd, address); if (!pmd_k) return -1; - if (pmd_huge(*pmd_k)) - return 0; /* support TILE huge_vmap() API */ pte_k = pte_offset_kernel(pmd_k, address); if (!pte_present(*pte_k)) return -1; diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c index 4e316deb92fd..0fa1acfac79a 100644 --- a/arch/tile/mm/init.c +++ b/arch/tile/mm/init.c @@ -828,10 +828,6 @@ void __init mem_init(void) printk(KERN_DEBUG " PKMAP %#lx - %#lx\n", PKMAP_BASE, PKMAP_ADDR(LAST_PKMAP) - 1); #endif -#ifdef CONFIG_HUGEVMAP - printk(KERN_DEBUG " HUGEMAP %#lx - %#lx\n", - HUGE_VMAP_BASE, HUGE_VMAP_END - 1); -#endif printk(KERN_DEBUG " VMALLOC %#lx - %#lx\n", _VMALLOC_START, _VMALLOC_END - 1); #ifdef __tilegx__ diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c index 2deaddf3e01f..4fd9ec0b58ed 100644 --- a/arch/tile/mm/pgtable.c +++ b/arch/tile/mm/pgtable.c @@ -127,8 +127,7 @@ void shatter_huge_page(unsigned long addr) } /* Shatter the huge page into the preallocated L2 page table. */ - pmd_populate_kernel(&init_mm, pmd, - get_prealloc_pte(pte_pfn(*(pte_t *)pmd))); + pmd_populate_kernel(&init_mm, pmd, get_prealloc_pte(pmd_pfn(*pmd))); #ifdef __PAGETABLE_PMD_FOLDED /* Walk every pgd on the system and update the pmd there. */ diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e241a1930c98..ee2fb9d37745 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -481,11 +481,12 @@ config X86_INTEL_LPSS bool "Intel Low Power Subsystem Support" depends on ACPI select COMMON_CLK + select PINCTRL ---help--- Select to build support for Intel Low Power Subsystem such as found on Intel Lynxpoint PCH. Selecting this option enables - things like clock tree (common clock framework) which are needed - by the LPSS peripheral drivers. + things like clock tree (common clock framework) and pincontrol + which are needed by the LPSS peripheral drivers. config X86_RDC321X bool "RDC R-321x SoC" diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 6aef9fbc09b7..b913915e8e63 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -79,30 +79,38 @@ static inline int phys_to_machine_mapping_valid(unsigned long pfn) return get_phys_to_machine(pfn) != INVALID_P2M_ENTRY; } -static inline unsigned long mfn_to_pfn(unsigned long mfn) +static inline unsigned long mfn_to_pfn_no_overrides(unsigned long mfn) { unsigned long pfn; - int ret = 0; + int ret; if (xen_feature(XENFEAT_auto_translated_physmap)) return mfn; - if (unlikely(mfn >= machine_to_phys_nr)) { - pfn = ~0; - goto try_override; - } - pfn = 0; + if (unlikely(mfn >= machine_to_phys_nr)) + return ~0; + /* * The array access can fail (e.g., device space beyond end of RAM). * In such cases it doesn't matter what we return (we return garbage), * but we must handle the fault without crashing! */ ret = __get_user(pfn, &machine_to_phys_mapping[mfn]); -try_override: - /* ret might be < 0 if there are no entries in the m2p for mfn */ if (ret < 0) - pfn = ~0; - else if (get_phys_to_machine(pfn) != mfn) + return ~0; + + return pfn; +} + +static inline unsigned long mfn_to_pfn(unsigned long mfn) +{ + unsigned long pfn; + + if (xen_feature(XENFEAT_auto_translated_physmap)) + return mfn; + + pfn = mfn_to_pfn_no_overrides(mfn); + if (get_phys_to_machine(pfn) != mfn) { /* * If this appears to be a foreign mfn (because the pfn * doesn't map back to the mfn), then check the local override @@ -111,6 +119,7 @@ try_override: * m2p_find_override_pfn returns ~0 if it doesn't find anything. */ pfn = m2p_find_override_pfn(mfn, ~0); + } /* * pfn is ~0 if there are no entries in the m2p for mfn or if the diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 8355c84b9729..897783b3302a 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1506,7 +1506,7 @@ static int __init init_hw_perf_events(void) err = amd_pmu_init(); break; default: - return 0; + err = -ENOTSUPP; } if (err != 0) { pr_cont("no PMU driver, software events only.\n"); @@ -1883,9 +1883,9 @@ static struct pmu pmu = { void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) { - userpg->cap_usr_time = 0; - userpg->cap_usr_time_zero = 0; - userpg->cap_usr_rdpmc = x86_pmu.attr_rdpmc; + userpg->cap_user_time = 0; + userpg->cap_user_time_zero = 0; + userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc; userpg->pmc_width = x86_pmu.cntval_bits; if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) @@ -1894,13 +1894,13 @@ void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) return; - userpg->cap_usr_time = 1; + userpg->cap_user_time = 1; userpg->time_mult = this_cpu_read(cyc2ns); userpg->time_shift = CYC2NS_SCALE_FACTOR; userpg->time_offset = this_cpu_read(cyc2ns_offset) - now; if (sched_clock_stable && !check_tsc_disabled()) { - userpg->cap_usr_time_zero = 1; + userpg->cap_user_time_zero = 1; userpg->time_zero = this_cpu_read(cyc2ns_offset); } } diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index c62d88396ad5..f31a1655d1ff 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -899,8 +899,8 @@ static __initconst const u64 atom_hw_cache_event_ids static struct extra_reg intel_slm_extra_regs[] __read_mostly = { /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ - INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x768005ffff, RSP_0), - INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x768005ffff, RSP_1), + INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x768005ffffull, RSP_0), + INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x768005ffffull, RSP_1), EVENT_EXTRA_END }; @@ -2325,6 +2325,7 @@ __init int intel_pmu_init(void) break; case 55: /* Atom 22nm "Silvermont" */ + case 77: /* Avoton "Silvermont" */ memcpy(hw_cache_event_ids, slm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs, diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 63438aad177f..ab3ba1c1b7dd 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -584,6 +584,7 @@ struct event_constraint intel_snb_pebs_event_constraints[] = { INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */ EVENT_CONSTRAINT_END }; diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 8ed44589b0e4..4118f9f68315 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -2706,14 +2706,14 @@ static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box) box->hrtimer.function = uncore_pmu_hrtimer; } -struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int cpu) +static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int node) { struct intel_uncore_box *box; int i, size; size = sizeof(*box) + type->num_shared_regs * sizeof(struct intel_uncore_extra_reg); - box = kzalloc_node(size, GFP_KERNEL, cpu_to_node(cpu)); + box = kzalloc_node(size, GFP_KERNEL, node); if (!box) return NULL; @@ -3031,7 +3031,7 @@ static int uncore_validate_group(struct intel_uncore_pmu *pmu, struct intel_uncore_box *fake_box; int ret = -EINVAL, n; - fake_box = uncore_alloc_box(pmu->type, smp_processor_id()); + fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE); if (!fake_box) return -ENOMEM; @@ -3294,7 +3294,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id } type = pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)]; - box = uncore_alloc_box(type, 0); + box = uncore_alloc_box(type, NUMA_NO_NODE); if (!box) return -ENOMEM; @@ -3499,7 +3499,7 @@ static int uncore_cpu_prepare(int cpu, int phys_id) if (pmu->func_id < 0) pmu->func_id = j; - box = uncore_alloc_box(type, cpu); + box = uncore_alloc_box(type, cpu_to_node(cpu)); if (!box) return -ENOMEM; diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 1b69951a81e2..b077f4cc225a 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -487,21 +487,6 @@ ENDPROC(native_usergs_sysret64) TRACE_IRQS_OFF .endm -ENTRY(save_rest) - PARTIAL_FRAME 1 (REST_SKIP+8) - movq 5*8+16(%rsp), %r11 /* save return address */ - movq_cfi rbx, RBX+16 - movq_cfi rbp, RBP+16 - movq_cfi r12, R12+16 - movq_cfi r13, R13+16 - movq_cfi r14, R14+16 - movq_cfi r15, R15+16 - movq %r11, 8(%rsp) /* return address */ - FIXUP_TOP_OF_STACK %r11, 16 - ret - CFI_ENDPROC -END(save_rest) - /* save complete stack frame */ .pushsection .kprobes.text, "ax" ENTRY(save_paranoid) diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 7123b5df479d..af99f71aeb7f 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -216,6 +216,7 @@ int apply_microcode_amd(int cpu) /* need to apply patch? */ if (rev >= mc_amd->hdr.patch_id) { c->microcode = rev; + uci->cpu_sig.rev = rev; return 0; } diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 563ed91e6faa..e643e744e4d8 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -352,12 +352,28 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { }, { /* Handle problems with rebooting on the Precision M6600. */ .callback = set_pci_reboot, - .ident = "Dell OptiPlex 990", + .ident = "Dell Precision M6600", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "Precision M6600"), }, }, + { /* Handle problems with rebooting on the Dell PowerEdge C6100. */ + .callback = set_pci_reboot, + .ident = "Dell PowerEdge C6100", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "C6100"), + }, + }, + { /* Some C6100 machines were shipped with vendor being 'Dell'. */ + .callback = set_pci_reboot, + .ident = "Dell PowerEdge C6100", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell"), + DMI_MATCH(DMI_PRODUCT_NAME, "C6100"), + }, + }, { } }; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index aecc98a93d1b..6cacab671f9b 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -653,6 +653,7 @@ static void announce_cpu(int cpu, int apicid) { static int current_node = -1; int node = early_cpu_to_node(cpu); + int max_cpu_present = find_last_bit(cpumask_bits(cpu_present_mask), NR_CPUS); if (system_state == SYSTEM_BOOTING) { if (node != current_node) { @@ -661,7 +662,7 @@ static void announce_cpu(int cpu, int apicid) current_node = node; pr_info("Booting Node %3d, Processors ", node); } - pr_cont(" #%d%s", cpu, cpu == (nr_cpu_ids - 1) ? " OK\n" : ""); + pr_cont(" #%4d%s", cpu, cpu == max_cpu_present ? " OK\n" : ""); return; } else pr_info("Booting Node %d Processor %d APIC 0x%x\n", diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 2bc1e81045b0..ddc3f3d2afdb 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2025,6 +2025,17 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt) return rc; } +static int em_ret_far_imm(struct x86_emulate_ctxt *ctxt) +{ + int rc; + + rc = em_ret_far(ctxt); + if (rc != X86EMUL_CONTINUE) + return rc; + rsp_increment(ctxt, ctxt->src.val); + return X86EMUL_CONTINUE; +} + static int em_cmpxchg(struct x86_emulate_ctxt *ctxt) { /* Save real source value, then compare EAX against destination. */ @@ -3763,7 +3774,8 @@ static const struct opcode opcode_table[256] = { G(ByteOp, group11), G(0, group11), /* 0xC8 - 0xCF */ I(Stack | SrcImmU16 | Src2ImmByte, em_enter), I(Stack, em_leave), - N, I(ImplicitOps | Stack, em_ret_far), + I(ImplicitOps | Stack | SrcImmU16, em_ret_far_imm), + I(ImplicitOps | Stack, em_ret_far), D(ImplicitOps), DI(SrcImmByte, intn), D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret), /* 0xD0 - 0xD7 */ diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 043330159179..ad75d77999d0 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -99,6 +99,7 @@ struct guest_walker { pt_element_t prefetch_ptes[PTE_PREFETCH_NUM]; gpa_t pte_gpa[PT_MAX_FULL_LEVELS]; pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS]; + bool pte_writable[PT_MAX_FULL_LEVELS]; unsigned pt_access; unsigned pte_access; gfn_t gfn; @@ -235,6 +236,22 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, if (pte == orig_pte) continue; + /* + * If the slot is read-only, simply do not process the accessed + * and dirty bits. This is the correct thing to do if the slot + * is ROM, and page tables in read-as-ROM/write-as-MMIO slots + * are only supported if the accessed and dirty bits are already + * set in the ROM (so that MMIO writes are never needed). + * + * Note that NPT does not allow this at all and faults, since + * it always wants nested page table entries for the guest + * page tables to be writable. And EPT works but will simply + * overwrite the read-only memory to set the accessed and dirty + * bits. + */ + if (unlikely(!walker->pte_writable[level - 1])) + continue; + ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, orig_pte, pte); if (ret) return ret; @@ -309,7 +326,8 @@ retry_walk: goto error; real_gfn = gpa_to_gfn(real_gfn); - host_addr = gfn_to_hva(vcpu->kvm, real_gfn); + host_addr = gfn_to_hva_prot(vcpu->kvm, real_gfn, + &walker->pte_writable[walker->level - 1]); if (unlikely(kvm_is_error_hva(host_addr))) goto error; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 1f1da43ff2a2..a1216de9ffda 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5339,6 +5339,15 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) return 0; } + /* + * EPT violation happened while executing iret from NMI, + * "blocked by NMI" bit has to be set before next VM entry. + * There are errata that may cause this bit to not be set: + * AAK134, BY25. + */ + if (exit_qualification & INTR_INFO_UNBLOCK_NMI) + vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); + gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); trace_kvm_page_fault(gpa, exit_qualification); @@ -7766,6 +7775,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1); vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2); vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3); + __clear_bit(VCPU_EXREG_PDPTR, + (unsigned long *)&vcpu->arch.regs_avail); + __clear_bit(VCPU_EXREG_PDPTR, + (unsigned long *)&vcpu->arch.regs_dirty); } kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp); diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 90f6ed127096..c7e22ab29a5a 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -912,10 +912,13 @@ void __init efi_enter_virtual_mode(void) for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { md = p; - if (!(md->attribute & EFI_MEMORY_RUNTIME) && - md->type != EFI_BOOT_SERVICES_CODE && - md->type != EFI_BOOT_SERVICES_DATA) - continue; + if (!(md->attribute & EFI_MEMORY_RUNTIME)) { +#ifdef CONFIG_X86_64 + if (md->type != EFI_BOOT_SERVICES_CODE && + md->type != EFI_BOOT_SERVICES_DATA) +#endif + continue; + } size = md->num_pages << EFI_PAGE_SHIFT; end = md->phys_addr + size; diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 8b901e8d782d..a61c7d5811be 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -879,7 +879,6 @@ int m2p_add_override(unsigned long mfn, struct page *page, unsigned long uninitialized_var(address); unsigned level; pte_t *ptep = NULL; - int ret = 0; pfn = page_to_pfn(page); if (!PageHighMem(page)) { @@ -926,8 +925,8 @@ int m2p_add_override(unsigned long mfn, struct page *page, * frontend pages while they are being shared with the backend, * because mfn_to_pfn (that ends up being called by GUPF) will * return the backend pfn rather than the frontend pfn. */ - ret = __get_user(pfn, &machine_to_phys_mapping[mfn]); - if (ret == 0 && get_phys_to_machine(pfn) == mfn) + pfn = mfn_to_pfn_no_overrides(mfn); + if (get_phys_to_machine(pfn) == mfn) set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)); return 0; @@ -942,7 +941,6 @@ int m2p_remove_override(struct page *page, unsigned long uninitialized_var(address); unsigned level; pte_t *ptep = NULL; - int ret = 0; pfn = page_to_pfn(page); mfn = get_phys_to_machine(pfn); @@ -1029,8 +1027,8 @@ int m2p_remove_override(struct page *page, * the original pfn causes mfn_to_pfn(mfn) to return the frontend * pfn again. */ mfn &= ~FOREIGN_FRAME_BIT; - ret = __get_user(pfn, &machine_to_phys_mapping[mfn]); - if (ret == 0 && get_phys_to_machine(pfn) == FOREIGN_FRAME(mfn) && + pfn = mfn_to_pfn_no_overrides(mfn); + if (get_phys_to_machine(pfn) == FOREIGN_FRAME(mfn) && m2p_find_override(mfn) == NULL) set_phys_to_machine(pfn, mfn); diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 253f63fceea1..be6b86078957 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -259,6 +259,14 @@ void xen_uninit_lock_cpu(int cpu) } +/* + * Our init of PV spinlocks is split in two init functions due to us + * using paravirt patching and jump labels patching and having to do + * all of this before SMP code is invoked. + * + * The paravirt patching needs to be done _before_ the alternative asm code + * is started, otherwise we would not patch the core kernel code. + */ void __init xen_init_spinlocks(void) { @@ -267,12 +275,26 @@ void __init xen_init_spinlocks(void) return; } - static_key_slow_inc(¶virt_ticketlocks_enabled); - pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning); pv_lock_ops.unlock_kick = xen_unlock_kick; } +/* + * While the jump_label init code needs to happend _after_ the jump labels are + * enabled and before SMP is started. Hence we use pre-SMP initcall level + * init. We cannot do it in xen_init_spinlocks as that is done before + * jump labels are activated. + */ +static __init int xen_init_spinlocks_jump(void) +{ + if (!xen_pvspin) + return 0; + + static_key_slow_inc(¶virt_ticketlocks_enabled); + return 0; +} +early_initcall(xen_init_spinlocks_jump); + static __init int xen_parse_nopvspin(char *arg) { xen_pvspin = false; |