diff options
Diffstat (limited to 'arch/arm64')
102 files changed, 2595 insertions, 1363 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index a39029b5414e..3dcd7ec69bca 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -2,6 +2,7 @@ config ARM64 def_bool y select ACPI_CCA_REQUIRED if ACPI select ACPI_GENERIC_GSI if ACPI + select ACPI_GTDT if ACPI select ACPI_REDUCED_HARDWARE_ONLY if ACPI select ACPI_MCFG if ACPI select ACPI_SPCR_TABLE if ACPI @@ -60,7 +61,6 @@ config ARM64 select HAVE_ALIGNED_STRUCT_PAGE if SLUB select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_BITREVERSE - select HAVE_ARCH_HARDENED_USERCOPY select HAVE_ARCH_HUGE_VMAP select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_KASAN if SPARSEMEM_VMEMMAP && !(ARM64_16K_PAGES && ARM64_VA_BITS_48) @@ -508,6 +508,16 @@ config QCOM_FALKOR_ERRATUM_1009 If unsure, say Y. +config QCOM_QDF2400_ERRATUM_0065 + bool "QDF2400 E0065: Incorrect GITS_TYPER.ITT_Entry_size" + default y + help + On Qualcomm Datacenter Technologies QDF2400 SoC, ITS hardware reports + ITE size incorrectly. The GITS_TYPER.ITT_Entry_size field should have + been indicated as 16Bytes (0xf), not 8Bytes (0x7). + + If unsure, say Y. + endmenu @@ -726,6 +736,17 @@ config KEXEC but it is independent of the system firmware. And like a reboot you can start any kernel with it, not just Linux. +config CRASH_DUMP + bool "Build kdump crash kernel" + help + Generate crash dump after being started by kexec. This should + be normally only set in special crash dump kernels which are + loaded in the main kernel with kexec-tools into a specially + reserved region and then later executed after a crash by + kdump/kexec. + + For more details see Documentation/kdump/kdump.txt + config XEN_DOM0 def_bool y depends on XEN @@ -1063,6 +1084,10 @@ config SYSVIPC_COMPAT def_bool y depends on COMPAT && SYSVIPC +config KEYS_COMPAT + def_bool y + depends on COMPAT && KEYS + endmenu menu "Power management options" diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug index fca2f02cde68..cc6bd559af85 100644 --- a/arch/arm64/Kconfig.debug +++ b/arch/arm64/Kconfig.debug @@ -92,6 +92,10 @@ config DEBUG_EFI the kernel that are only useful when using a debug build of the UEFI firmware +config ARM64_RELOC_TEST + depends on m + tristate "Relocation testing module" + source "drivers/hwtracing/coresight/Kconfig" endmenu diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index b9a4a934ca05..7dedf2d8494e 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -37,10 +37,12 @@ $(warning LSE atomics not supported by binutils) endif endif +ifeq ($(CONFIG_ARM64), y) brokengasinst := $(call as-instr,1:\n.inst 0\n.rept . - 1b\n\nnop\n.endr\n,,-DCONFIG_BROKEN_GAS_INST=1) -ifneq ($(brokengasinst),) + ifneq ($(brokengasinst),) $(warning Detected assembler with broken .inst; disassembly will be unreliable) + endif endif KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) $(brokengasinst) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi index 1c64ea2d23f9..0565779e66fa 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi @@ -179,8 +179,10 @@ usbphy: phy@01c19400 { compatible = "allwinner,sun50i-a64-usb-phy"; reg = <0x01c19400 0x14>, + <0x01c1a800 0x4>, <0x01c1b800 0x4>; reg-names = "phy_ctrl", + "pmu0", "pmu1"; clocks = <&ccu CLK_USB_PHY0>, <&ccu CLK_USB_PHY1>; diff --git a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi index 5d995f7724af..620495a43363 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi @@ -380,7 +380,7 @@ #size-cells = <2>; ranges = <0x0 0x0 0x0 0xc8834000 0x0 0x2000>; - rng { + hwrng: rng { compatible = "amlogic,meson-rng"; reg = <0x0 0x0 0x0 0x4>; }; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi index 04b3324bc132..a375cb21cc8b 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi @@ -524,3 +524,8 @@ &vpu { compatible = "amlogic,meson-gxbb-vpu", "amlogic,meson-gx-vpu"; }; + +&hwrng { + clocks = <&clkc CLKID_RNG0>; + clock-names = "core"; +}; diff --git a/arch/arm64/boot/dts/broadcom/ns2.dtsi b/arch/arm64/boot/dts/broadcom/ns2.dtsi index 9f9e203c09c5..bcb03fc32665 100644 --- a/arch/arm64/boot/dts/broadcom/ns2.dtsi +++ b/arch/arm64/boot/dts/broadcom/ns2.dtsi @@ -114,6 +114,7 @@ pcie0: pcie@20020000 { compatible = "brcm,iproc-pcie"; reg = <0 0x20020000 0 0x1000>; + dma-coherent; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0>; @@ -144,6 +145,7 @@ pcie4: pcie@50020000 { compatible = "brcm,iproc-pcie"; reg = <0 0x50020000 0 0x1000>; + dma-coherent; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0>; @@ -174,6 +176,7 @@ pcie8: pcie@60c00000 { compatible = "brcm,iproc-pcie-paxc"; reg = <0 0x60c00000 0 0x1000>; + dma-coherent; linux,pci-domain = <8>; bus-range = <0x0 0x1>; @@ -203,6 +206,7 @@ <0x61030000 0x100>; reg-names = "amac_base", "idm_base", "nicpm_base"; interrupts = <GIC_SPI 341 IRQ_TYPE_LEVEL_HIGH>; + dma-coherent; phy-handle = <&gphy0>; phy-mode = "rgmii"; status = "disabled"; @@ -213,6 +217,7 @@ reg = <0x612c0000 0x445>; /* PDC FS0 regs */ interrupts = <GIC_SPI 187 IRQ_TYPE_LEVEL_HIGH>; #mbox-cells = <1>; + dma-coherent; brcm,rx-status-len = <32>; brcm,use-bcm-hdr; }; @@ -222,6 +227,7 @@ reg = <0x612e0000 0x445>; /* PDC FS1 regs */ interrupts = <GIC_SPI 189 IRQ_TYPE_LEVEL_HIGH>; #mbox-cells = <1>; + dma-coherent; brcm,rx-status-len = <32>; brcm,use-bcm-hdr; }; @@ -231,6 +237,7 @@ reg = <0x61300000 0x445>; /* PDC FS2 regs */ interrupts = <GIC_SPI 191 IRQ_TYPE_LEVEL_HIGH>; #mbox-cells = <1>; + dma-coherent; brcm,rx-status-len = <32>; brcm,use-bcm-hdr; }; @@ -240,6 +247,7 @@ reg = <0x61320000 0x445>; /* PDC FS3 regs */ interrupts = <GIC_SPI 193 IRQ_TYPE_LEVEL_HIGH>; #mbox-cells = <1>; + dma-coherent; brcm,rx-status-len = <32>; brcm,use-bcm-hdr; }; @@ -644,6 +652,7 @@ sata: ahci@663f2000 { compatible = "brcm,iproc-ahci", "generic-ahci"; reg = <0x663f2000 0x1000>; + dma-coherent; reg-names = "ahci"; interrupts = <GIC_SPI 438 IRQ_TYPE_LEVEL_HIGH>; #address-cells = <1>; @@ -667,6 +676,7 @@ compatible = "brcm,sdhci-iproc-cygnus"; reg = <0x66420000 0x100>; interrupts = <GIC_SPI 421 IRQ_TYPE_LEVEL_HIGH>; + dma-coherent; bus-width = <8>; clocks = <&genpll_sw BCM_NS2_GENPLL_SW_SDIO_CLK>; status = "disabled"; @@ -676,6 +686,7 @@ compatible = "brcm,sdhci-iproc-cygnus"; reg = <0x66430000 0x100>; interrupts = <GIC_SPI 422 IRQ_TYPE_LEVEL_HIGH>; + dma-coherent; bus-width = <8>; clocks = <&genpll_sw BCM_NS2_GENPLL_SW_SDIO_CLK>; status = "disabled"; diff --git a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts index dba3c131c62c..9b4ba7169210 100644 --- a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts +++ b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts @@ -98,6 +98,11 @@ assigned-clocks = <&sys_ctrl HI6220_UART1_SRC>; assigned-clock-rates = <150000000>; status = "ok"; + + bluetooth { + compatible = "ti,wl1835-st"; + enable-gpios = <&gpio1 7 GPIO_ACTIVE_HIGH>; + }; }; uart2: uart@f7112000 { diff --git a/arch/arm64/boot/dts/marvell/armada-3720-db.dts b/arch/arm64/boot/dts/marvell/armada-3720-db.dts index 86602c907a61..a07a0c1cd4e6 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-db.dts +++ b/arch/arm64/boot/dts/marvell/armada-3720-db.dts @@ -116,6 +116,12 @@ status = "okay"; }; +/* CON27 */ +&usb2 { + status = "okay"; +}; + + &mdio { status = "okay"; phy0: ethernet-phy@0 { diff --git a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi index b48d668a6ab6..42747b7db683 100644 --- a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi @@ -200,6 +200,13 @@ status = "disabled"; }; + usb2: usb@5e000 { + compatible = "marvell,armada-3700-ehci"; + reg = <0x5e000 0x2000>; + interrupts = <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>; + status = "disabled"; + }; + xor@60900 { compatible = "marvell,armada-3700-xor"; reg = <0x60900 0x100 diff --git a/arch/arm64/boot/dts/marvell/armada-ap806.dtsi b/arch/arm64/boot/dts/marvell/armada-ap806.dtsi index a749ba2edec4..5019c8f4acd0 100644 --- a/arch/arm64/boot/dts/marvell/armada-ap806.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-ap806.dtsi @@ -235,7 +235,8 @@ #clock-cells = <1>; clock-output-names = "ap-cpu-cluster-0", "ap-cpu-cluster-1", - "ap-fixed", "ap-mss"; + "ap-fixed", "ap-mss", + "ap-emmc"; reg = <0x6f4000 0x1000>; }; }; diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 7c48028ec64a..927ee18bbdf2 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -82,6 +82,7 @@ CONFIG_CMA=y CONFIG_SECCOMP=y CONFIG_XEN=y CONFIG_KEXEC=y +CONFIG_CRASH_DUMP=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_COMPAT=y CONFIG_CPU_IDLE=y diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index a12f1afc95a3..a7a97a608033 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -29,6 +29,7 @@ generic-y += rwsem.h generic-y += segment.h generic-y += sembuf.h generic-y += serial.h +generic-y += set_memory.h generic-y += shmbuf.h generic-y += simd.h generic-y += sizes.h diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index c1976c0adca7..0e99978da3f0 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -85,6 +85,8 @@ static inline bool acpi_has_cpu_in_madt(void) return true; } +struct acpi_madt_generic_interrupt *acpi_cpu_get_madt_gicc(int cpu); + static inline void arch_fix_phys_package_id(int num, u32 slot) { } void __init acpi_init_cpus(void); diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index f37e3a21f6e7..1a98bc8602a2 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -20,69 +20,14 @@ #include <asm/sysreg.h> -#define ICC_EOIR1_EL1 sys_reg(3, 0, 12, 12, 1) -#define ICC_DIR_EL1 sys_reg(3, 0, 12, 11, 1) -#define ICC_IAR1_EL1 sys_reg(3, 0, 12, 12, 0) -#define ICC_SGI1R_EL1 sys_reg(3, 0, 12, 11, 5) -#define ICC_PMR_EL1 sys_reg(3, 0, 4, 6, 0) -#define ICC_CTLR_EL1 sys_reg(3, 0, 12, 12, 4) -#define ICC_SRE_EL1 sys_reg(3, 0, 12, 12, 5) -#define ICC_GRPEN1_EL1 sys_reg(3, 0, 12, 12, 7) -#define ICC_BPR1_EL1 sys_reg(3, 0, 12, 12, 3) - -#define ICC_SRE_EL2 sys_reg(3, 4, 12, 9, 5) - -/* - * System register definitions - */ -#define ICH_VSEIR_EL2 sys_reg(3, 4, 12, 9, 4) -#define ICH_HCR_EL2 sys_reg(3, 4, 12, 11, 0) -#define ICH_VTR_EL2 sys_reg(3, 4, 12, 11, 1) -#define ICH_MISR_EL2 sys_reg(3, 4, 12, 11, 2) -#define ICH_EISR_EL2 sys_reg(3, 4, 12, 11, 3) -#define ICH_ELSR_EL2 sys_reg(3, 4, 12, 11, 5) -#define ICH_VMCR_EL2 sys_reg(3, 4, 12, 11, 7) - -#define __LR0_EL2(x) sys_reg(3, 4, 12, 12, x) -#define __LR8_EL2(x) sys_reg(3, 4, 12, 13, x) - -#define ICH_LR0_EL2 __LR0_EL2(0) -#define ICH_LR1_EL2 __LR0_EL2(1) -#define ICH_LR2_EL2 __LR0_EL2(2) -#define ICH_LR3_EL2 __LR0_EL2(3) -#define ICH_LR4_EL2 __LR0_EL2(4) -#define ICH_LR5_EL2 __LR0_EL2(5) -#define ICH_LR6_EL2 __LR0_EL2(6) -#define ICH_LR7_EL2 __LR0_EL2(7) -#define ICH_LR8_EL2 __LR8_EL2(0) -#define ICH_LR9_EL2 __LR8_EL2(1) -#define ICH_LR10_EL2 __LR8_EL2(2) -#define ICH_LR11_EL2 __LR8_EL2(3) -#define ICH_LR12_EL2 __LR8_EL2(4) -#define ICH_LR13_EL2 __LR8_EL2(5) -#define ICH_LR14_EL2 __LR8_EL2(6) -#define ICH_LR15_EL2 __LR8_EL2(7) - -#define __AP0Rx_EL2(x) sys_reg(3, 4, 12, 8, x) -#define ICH_AP0R0_EL2 __AP0Rx_EL2(0) -#define ICH_AP0R1_EL2 __AP0Rx_EL2(1) -#define ICH_AP0R2_EL2 __AP0Rx_EL2(2) -#define ICH_AP0R3_EL2 __AP0Rx_EL2(3) - -#define __AP1Rx_EL2(x) sys_reg(3, 4, 12, 9, x) -#define ICH_AP1R0_EL2 __AP1Rx_EL2(0) -#define ICH_AP1R1_EL2 __AP1Rx_EL2(1) -#define ICH_AP1R2_EL2 __AP1Rx_EL2(2) -#define ICH_AP1R3_EL2 __AP1Rx_EL2(3) - #ifndef __ASSEMBLY__ #include <linux/stringify.h> #include <asm/barrier.h> #include <asm/cacheflush.h> -#define read_gicreg read_sysreg_s -#define write_gicreg write_sysreg_s +#define read_gicreg(r) read_sysreg_s(SYS_ ## r) +#define write_gicreg(v, r) write_sysreg_s(v, SYS_ ## r) /* * Low-level accessors @@ -93,13 +38,13 @@ static inline void gic_write_eoir(u32 irq) { - write_sysreg_s(irq, ICC_EOIR1_EL1); + write_sysreg_s(irq, SYS_ICC_EOIR1_EL1); isb(); } static inline void gic_write_dir(u32 irq) { - write_sysreg_s(irq, ICC_DIR_EL1); + write_sysreg_s(irq, SYS_ICC_DIR_EL1); isb(); } @@ -107,7 +52,7 @@ static inline u64 gic_read_iar_common(void) { u64 irqstat; - irqstat = read_sysreg_s(ICC_IAR1_EL1); + irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1); dsb(sy); return irqstat; } @@ -124,7 +69,7 @@ static inline u64 gic_read_iar_cavium_thunderx(void) u64 irqstat; nops(8); - irqstat = read_sysreg_s(ICC_IAR1_EL1); + irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1); nops(4); mb(); @@ -133,40 +78,40 @@ static inline u64 gic_read_iar_cavium_thunderx(void) static inline void gic_write_pmr(u32 val) { - write_sysreg_s(val, ICC_PMR_EL1); + write_sysreg_s(val, SYS_ICC_PMR_EL1); } static inline void gic_write_ctlr(u32 val) { - write_sysreg_s(val, ICC_CTLR_EL1); + write_sysreg_s(val, SYS_ICC_CTLR_EL1); isb(); } static inline void gic_write_grpen1(u32 val) { - write_sysreg_s(val, ICC_GRPEN1_EL1); + write_sysreg_s(val, SYS_ICC_GRPEN1_EL1); isb(); } static inline void gic_write_sgi1r(u64 val) { - write_sysreg_s(val, ICC_SGI1R_EL1); + write_sysreg_s(val, SYS_ICC_SGI1R_EL1); } static inline u32 gic_read_sre(void) { - return read_sysreg_s(ICC_SRE_EL1); + return read_sysreg_s(SYS_ICC_SRE_EL1); } static inline void gic_write_sre(u32 val) { - write_sysreg_s(val, ICC_SRE_EL1); + write_sysreg_s(val, SYS_ICC_SRE_EL1); isb(); } static inline void gic_write_bpr1(u32 val) { - asm volatile("msr_s " __stringify(ICC_BPR1_EL1) ", %0" : : "r" (val)); + write_sysreg_s(val, SYS_ICC_BPR1_EL1); } #define gic_read_typer(c) readq_relaxed(c) diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index b4b34004a21e..74d08e44a651 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -25,6 +25,7 @@ #include <linux/bug.h> #include <linux/init.h> #include <linux/jump_label.h> +#include <linux/smp.h> #include <linux/types.h> #include <clocksource/arm_arch_timer.h> @@ -37,24 +38,44 @@ extern struct static_key_false arch_timer_read_ool_enabled; #define needs_unstable_timer_counter_workaround() false #endif +enum arch_timer_erratum_match_type { + ate_match_dt, + ate_match_local_cap_id, + ate_match_acpi_oem_info, +}; + +struct clock_event_device; struct arch_timer_erratum_workaround { - const char *id; /* Indicate the Erratum ID */ + enum arch_timer_erratum_match_type match_type; + const void *id; + const char *desc; u32 (*read_cntp_tval_el0)(void); u32 (*read_cntv_tval_el0)(void); u64 (*read_cntvct_el0)(void); + int (*set_next_event_phys)(unsigned long, struct clock_event_device *); + int (*set_next_event_virt)(unsigned long, struct clock_event_device *); }; -extern const struct arch_timer_erratum_workaround *timer_unstable_counter_workaround; - -#define arch_timer_reg_read_stable(reg) \ -({ \ - u64 _val; \ - if (needs_unstable_timer_counter_workaround()) \ - _val = timer_unstable_counter_workaround->read_##reg();\ - else \ - _val = read_sysreg(reg); \ - _val; \ +DECLARE_PER_CPU(const struct arch_timer_erratum_workaround *, + timer_unstable_counter_workaround); + +#define arch_timer_reg_read_stable(reg) \ +({ \ + u64 _val; \ + if (needs_unstable_timer_counter_workaround()) { \ + const struct arch_timer_erratum_workaround *wa; \ + preempt_disable(); \ + wa = __this_cpu_read(timer_unstable_counter_workaround); \ + if (wa && wa->read_##reg) \ + _val = wa->read_##reg(); \ + else \ + _val = read_sysreg(reg); \ + preempt_enable(); \ + } else { \ + _val = read_sysreg(reg); \ + } \ + _val; \ }) /* diff --git a/arch/arm64/include/asm/bug.h b/arch/arm64/include/asm/bug.h index 561190d15881..366448eb0fb7 100644 --- a/arch/arm64/include/asm/bug.h +++ b/arch/arm64/include/asm/bug.h @@ -20,9 +20,6 @@ #include <asm/brk-imm.h> -#ifdef CONFIG_GENERIC_BUG -#define HAVE_ARCH_BUG - #ifdef CONFIG_DEBUG_BUGVERBOSE #define _BUGVERBOSE_LOCATION(file, line) __BUGVERBOSE_LOCATION(file, line) #define __BUGVERBOSE_LOCATION(file, line) \ @@ -36,28 +33,35 @@ #define _BUGVERBOSE_LOCATION(file, line) #endif -#define _BUG_FLAGS(flags) __BUG_FLAGS(flags) +#ifdef CONFIG_GENERIC_BUG -#define __BUG_FLAGS(flags) asm volatile ( \ +#define __BUG_ENTRY(flags) \ ".pushsection __bug_table,\"a\"\n\t" \ ".align 2\n\t" \ "0: .long 1f - 0b\n\t" \ _BUGVERBOSE_LOCATION(__FILE__, __LINE__) \ ".short " #flags "\n\t" \ ".popsection\n" \ - \ - "1: brk %[imm]" \ - :: [imm] "i" (BUG_BRK_IMM) \ -) + "1: " +#else +#define __BUG_ENTRY(flags) "" +#endif + +#define __BUG_FLAGS(flags) \ + asm volatile ( \ + __BUG_ENTRY(flags) \ + "brk %[imm]" :: [imm] "i" (BUG_BRK_IMM) \ + ); -#define BUG() do { \ - _BUG_FLAGS(0); \ - unreachable(); \ + +#define BUG() do { \ + __BUG_FLAGS(0); \ + unreachable(); \ } while (0) -#define __WARN_TAINT(taint) _BUG_FLAGS(BUGFLAG_TAINT(taint)) +#define __WARN_FLAGS(flags) __BUG_FLAGS(BUGFLAG_WARNING|(flags)) -#endif /* ! CONFIG_GENERIC_BUG */ +#define HAVE_ARCH_BUG #include <asm-generic/bug.h> diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index 5082b30bc2c0..ea9bb4e0e9bb 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -16,7 +16,18 @@ #ifndef __ASM_CACHE_H #define __ASM_CACHE_H -#include <asm/cachetype.h> +#include <asm/cputype.h> + +#define CTR_L1IP_SHIFT 14 +#define CTR_L1IP_MASK 3 +#define CTR_CWG_SHIFT 24 +#define CTR_CWG_MASK 15 + +#define CTR_L1IP(ctr) (((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK) + +#define ICACHE_POLICY_VPIPT 0 +#define ICACHE_POLICY_VIPT 2 +#define ICACHE_POLICY_PIPT 3 #define L1_CACHE_SHIFT 7 #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) @@ -32,6 +43,31 @@ #ifndef __ASSEMBLY__ +#include <linux/bitops.h> + +#define ICACHEF_ALIASING 0 +#define ICACHEF_VPIPT 1 +extern unsigned long __icache_flags; + +/* + * Whilst the D-side always behaves as PIPT on AArch64, aliasing is + * permitted in the I-cache. + */ +static inline int icache_is_aliasing(void) +{ + return test_bit(ICACHEF_ALIASING, &__icache_flags); +} + +static inline int icache_is_vpipt(void) +{ + return test_bit(ICACHEF_VPIPT, &__icache_flags); +} + +static inline u32 cache_type_cwg(void) +{ + return (read_cpuid_cachetype() >> CTR_CWG_SHIFT) & CTR_CWG_MASK; +} + #define __read_mostly __attribute__((__section__(".data..read_mostly"))) static inline int cache_line_size(void) diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 5a2a6ee65f65..d74a284abdc2 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -150,9 +150,6 @@ static inline void flush_cache_vunmap(unsigned long start, unsigned long end) { } -int set_memory_ro(unsigned long addr, int numpages); -int set_memory_rw(unsigned long addr, int numpages); -int set_memory_x(unsigned long addr, int numpages); -int set_memory_nx(unsigned long addr, int numpages); +int set_memory_valid(unsigned long addr, unsigned long size, int enable); #endif diff --git a/arch/arm64/include/asm/cachetype.h b/arch/arm64/include/asm/cachetype.h deleted file mode 100644 index f5588692f1d4..000000000000 --- a/arch/arm64/include/asm/cachetype.h +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ -#ifndef __ASM_CACHETYPE_H -#define __ASM_CACHETYPE_H - -#include <asm/cputype.h> - -#define CTR_L1IP_SHIFT 14 -#define CTR_L1IP_MASK 3 -#define CTR_CWG_SHIFT 24 -#define CTR_CWG_MASK 15 - -#define ICACHE_POLICY_RESERVED 0 -#define ICACHE_POLICY_AIVIVT 1 -#define ICACHE_POLICY_VIPT 2 -#define ICACHE_POLICY_PIPT 3 - -#ifndef __ASSEMBLY__ - -#include <linux/bitops.h> - -#define CTR_L1IP(ctr) (((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK) - -#define ICACHEF_ALIASING 0 -#define ICACHEF_AIVIVT 1 - -extern unsigned long __icache_flags; - -/* - * NumSets, bits[27:13] - (Number of sets in cache) - 1 - * Associativity, bits[12:3] - (Associativity of cache) - 1 - * LineSize, bits[2:0] - (Log2(Number of words in cache line)) - 2 - */ -#define CCSIDR_EL1_WRITE_THROUGH BIT(31) -#define CCSIDR_EL1_WRITE_BACK BIT(30) -#define CCSIDR_EL1_READ_ALLOCATE BIT(29) -#define CCSIDR_EL1_WRITE_ALLOCATE BIT(28) -#define CCSIDR_EL1_LINESIZE_MASK 0x7 -#define CCSIDR_EL1_LINESIZE(x) ((x) & CCSIDR_EL1_LINESIZE_MASK) -#define CCSIDR_EL1_ASSOCIATIVITY_SHIFT 3 -#define CCSIDR_EL1_ASSOCIATIVITY_MASK 0x3ff -#define CCSIDR_EL1_ASSOCIATIVITY(x) \ - (((x) >> CCSIDR_EL1_ASSOCIATIVITY_SHIFT) & CCSIDR_EL1_ASSOCIATIVITY_MASK) -#define CCSIDR_EL1_NUMSETS_SHIFT 13 -#define CCSIDR_EL1_NUMSETS_MASK 0x7fff -#define CCSIDR_EL1_NUMSETS(x) \ - (((x) >> CCSIDR_EL1_NUMSETS_SHIFT) & CCSIDR_EL1_NUMSETS_MASK) - -#define CACHE_LINESIZE(x) (16 << CCSIDR_EL1_LINESIZE(x)) -#define CACHE_NUMSETS(x) (CCSIDR_EL1_NUMSETS(x) + 1) -#define CACHE_ASSOCIATIVITY(x) (CCSIDR_EL1_ASSOCIATIVITY(x) + 1) - -extern u64 __attribute_const__ cache_get_ccsidr(u64 csselr); - -/* Helpers for Level 1 Instruction cache csselr = 1L */ -static inline int icache_get_linesize(void) -{ - return CACHE_LINESIZE(cache_get_ccsidr(1L)); -} - -static inline int icache_get_numsets(void) -{ - return CACHE_NUMSETS(cache_get_ccsidr(1L)); -} - -/* - * Whilst the D-side always behaves as PIPT on AArch64, aliasing is - * permitted in the I-cache. - */ -static inline int icache_is_aliasing(void) -{ - return test_bit(ICACHEF_ALIASING, &__icache_flags); -} - -static inline int icache_is_aivivt(void) -{ - return test_bit(ICACHEF_AIVIVT, &__icache_flags); -} - -static inline u32 cache_type_cwg(void) -{ - return (read_cpuid_cachetype() >> CTR_CWG_SHIFT) & CTR_CWG_MASK; -} - -#endif /* __ASSEMBLY__ */ - -#endif /* __ASM_CACHETYPE_H */ diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index fb78a5d3b60b..b3aab8a17868 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -37,7 +37,8 @@ #define ARM64_HAS_NO_FPSIMD 16 #define ARM64_WORKAROUND_REPEAT_TLBI 17 #define ARM64_WORKAROUND_QCOM_FALKOR_E1003 18 +#define ARM64_WORKAROUND_858921 19 -#define ARM64_NCAPS 19 +#define ARM64_NCAPS 20 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 05310ad8c5ab..e7f84a7b4465 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -226,7 +226,7 @@ void update_cpu_errata_workarounds(void); void __init enable_errata_workarounds(void); void verify_local_cpu_errata_workarounds(void); -u64 read_system_reg(u32 id); +u64 read_sanitised_ftr_reg(u32 id); static inline bool cpu_supports_mixed_endian_el0(void) { @@ -240,7 +240,7 @@ static inline bool system_supports_32bit_el0(void) static inline bool system_supports_mixed_endian_el0(void) { - return id_aa64mmfr0_mixed_endian_el0(read_system_reg(SYS_ID_AA64MMFR0_EL1)); + return id_aa64mmfr0_mixed_endian_el0(read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1)); } static inline bool system_supports_fpsimd(void) @@ -251,7 +251,7 @@ static inline bool system_supports_fpsimd(void) static inline bool system_uses_ttbr0_pan(void) { return IS_ENABLED(CONFIG_ARM64_SW_TTBR0_PAN) && - !cpus_have_cap(ARM64_HAS_PAN); + !cpus_have_const_cap(ARM64_HAS_PAN); } #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index fc502713ab37..0984d1b3a8f2 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -80,6 +80,7 @@ #define ARM_CPU_PART_FOUNDATION 0xD00 #define ARM_CPU_PART_CORTEX_A57 0xD07 #define ARM_CPU_PART_CORTEX_A53 0xD03 +#define ARM_CPU_PART_CORTEX_A73 0xD09 #define APM_CPU_PART_POTENZA 0x000 @@ -92,6 +93,7 @@ #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) +#define MIDR_CORTEX_A73 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A73) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1) diff --git a/arch/arm64/include/asm/current.h b/arch/arm64/include/asm/current.h index 86c404171305..f6580d4afb0e 100644 --- a/arch/arm64/include/asm/current.h +++ b/arch/arm64/include/asm/current.h @@ -3,8 +3,6 @@ #include <linux/compiler.h> -#include <asm/sysreg.h> - #ifndef __ASSEMBLY__ struct task_struct; diff --git a/arch/arm64/include/asm/device.h b/arch/arm64/include/asm/device.h index 73d5bab015eb..5a5fa47a6b18 100644 --- a/arch/arm64/include/asm/device.h +++ b/arch/arm64/include/asm/device.h @@ -20,6 +20,9 @@ struct dev_archdata { #ifdef CONFIG_IOMMU_API void *iommu; /* private IOMMU data */ #endif +#ifdef CONFIG_XEN + const struct dma_map_ops *dev_dma_ops; +#endif bool dma_coherent; }; diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h index 505756cdc67a..5392dbeffa45 100644 --- a/arch/arm64/include/asm/dma-mapping.h +++ b/arch/arm64/include/asm/dma-mapping.h @@ -27,11 +27,8 @@ #define DMA_ERROR_CODE (~(dma_addr_t)0) extern const struct dma_map_ops dummy_dma_ops; -static inline const struct dma_map_ops *__generic_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { - if (dev && dev->dma_ops) - return dev->dma_ops; - /* * We expect no ISA devices, and all other DMA masters are expected to * have someone call arch_setup_dma_ops at device creation time. @@ -39,14 +36,6 @@ static inline const struct dma_map_ops *__generic_dma_ops(struct device *dev) return &dummy_dma_ops; } -static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) -{ - if (xen_initial_domain()) - return xen_dma_ops; - else - return __generic_dma_ops(NULL); -} - void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, const struct iommu_ops *iommu, bool coherent); #define arch_setup_dma_ops arch_setup_dma_ops diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index e7445281e534..8f3043aba873 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -1,6 +1,7 @@ #ifndef _ASM_EFI_H #define _ASM_EFI_H +#include <asm/boot.h> #include <asm/cpufeature.h> #include <asm/io.h> #include <asm/mmu_context.h> @@ -46,7 +47,28 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); * 2MiB so we know it won't cross a 2MiB boundary. */ #define EFI_FDT_ALIGN SZ_2M /* used by allocate_new_fdt_and_exit_boot() */ -#define MAX_FDT_OFFSET SZ_512M + +/* on arm64, the FDT may be located anywhere in system RAM */ +static inline unsigned long efi_get_max_fdt_addr(unsigned long dram_base) +{ + return ULONG_MAX; +} + +/* + * On arm64, we have to ensure that the initrd ends up in the linear region, + * which is a 1 GB aligned region of size '1UL << (VA_BITS - 1)' that is + * guaranteed to cover the kernel Image. + * + * Since the EFI stub is part of the kernel Image, we can relax the + * usual requirements in Documentation/arm64/booting.txt, which still + * apply to other bootloaders, and are required for some kernel + * configurations. + */ +static inline unsigned long efi_get_max_initrd_addr(unsigned long dram_base, + unsigned long image_addr) +{ + return (image_addr & ~(SZ_1G - 1UL)) + (1UL << (VA_BITS - 1)); +} #define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__) #define __efi_call_early(f, ...) f(__VA_ARGS__) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index d14c478976d0..85997c0e5443 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -175,6 +175,12 @@ #define ESR_ELx_SYS64_ISS_SYS_CTR_READ (ESR_ELx_SYS64_ISS_SYS_CTR | \ ESR_ELx_SYS64_ISS_DIR_READ) +#define ESR_ELx_SYS64_ISS_SYS_CNTVCT (ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 2, 14, 0) | \ + ESR_ELx_SYS64_ISS_DIR_READ) + +#define ESR_ELx_SYS64_ISS_SYS_CNTFRQ (ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 0, 14, 0) | \ + ESR_ELx_SYS64_ISS_DIR_READ) + #ifndef __ASSEMBLY__ #include <asm/types.h> diff --git a/arch/arm64/include/asm/extable.h b/arch/arm64/include/asm/extable.h new file mode 100644 index 000000000000..42f50f15a44c --- /dev/null +++ b/arch/arm64/include/asm/extable.h @@ -0,0 +1,25 @@ +#ifndef __ASM_EXTABLE_H +#define __ASM_EXTABLE_H + +/* + * The exception table consists of pairs of relative offsets: the first + * is the relative offset to an instruction that is allowed to fault, + * and the second is the relative offset at which the program should + * continue. No registers are modified, so it is entirely up to the + * continuation code to figure out what to do. + * + * All the routines below use bits of fixup code that are out of line + * with the main instruction path. This means when everything is well, + * we don't even have to jump over them. Further, they do not intrude + * on our cache or tlb entries. + */ + +struct exception_table_entry +{ + int insn, fixup; +}; + +#define ARCH_HAS_RELATIVE_EXTABLE + +extern int fixup_exception(struct pt_regs *regs); +#endif diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h index 8740297dac77..1473fc2f7ab7 100644 --- a/arch/arm64/include/asm/hardirq.h +++ b/arch/arm64/include/asm/hardirq.h @@ -20,7 +20,7 @@ #include <linux/threads.h> #include <asm/irq.h> -#define NR_IPI 6 +#define NR_IPI 7 typedef struct { unsigned int __softirq_pending; diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h index b6b167ac082b..41770766d964 100644 --- a/arch/arm64/include/asm/hw_breakpoint.h +++ b/arch/arm64/include/asm/hw_breakpoint.h @@ -149,7 +149,7 @@ static inline void ptrace_hw_copy_thread(struct task_struct *task) /* Determine number of BRP registers available. */ static inline int get_num_brps(void) { - u64 dfr0 = read_system_reg(SYS_ID_AA64DFR0_EL1); + u64 dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1); return 1 + cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_BRPS_SHIFT); @@ -158,7 +158,7 @@ static inline int get_num_brps(void) /* Determine number of WRP registers available. */ static inline int get_num_wrps(void) { - u64 dfr0 = read_system_reg(SYS_ID_AA64DFR0_EL1); + u64 dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1); return 1 + cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_WRPS_SHIFT); diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index aecc07e09a18..29cb2ca756f6 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -80,6 +80,7 @@ enum aarch64_insn_register_type { AARCH64_INSN_REGTYPE_RM, AARCH64_INSN_REGTYPE_RD, AARCH64_INSN_REGTYPE_RA, + AARCH64_INSN_REGTYPE_RS, }; enum aarch64_insn_register { @@ -188,6 +189,8 @@ enum aarch64_insn_ldst_type { AARCH64_INSN_LDST_STORE_PAIR_PRE_INDEX, AARCH64_INSN_LDST_LOAD_PAIR_POST_INDEX, AARCH64_INSN_LDST_STORE_PAIR_POST_INDEX, + AARCH64_INSN_LDST_LOAD_EX, + AARCH64_INSN_LDST_STORE_EX, }; enum aarch64_insn_adsb_type { @@ -240,6 +243,23 @@ enum aarch64_insn_logic_type { AARCH64_INSN_LOGIC_BIC_SETFLAGS }; +enum aarch64_insn_prfm_type { + AARCH64_INSN_PRFM_TYPE_PLD, + AARCH64_INSN_PRFM_TYPE_PLI, + AARCH64_INSN_PRFM_TYPE_PST, +}; + +enum aarch64_insn_prfm_target { + AARCH64_INSN_PRFM_TARGET_L1, + AARCH64_INSN_PRFM_TARGET_L2, + AARCH64_INSN_PRFM_TARGET_L3, +}; + +enum aarch64_insn_prfm_policy { + AARCH64_INSN_PRFM_POLICY_KEEP, + AARCH64_INSN_PRFM_POLICY_STRM, +}; + #define __AARCH64_INSN_FUNCS(abbr, mask, val) \ static __always_inline bool aarch64_insn_is_##abbr(u32 code) \ { return (code & (mask)) == (val); } \ @@ -248,6 +268,7 @@ static __always_inline u32 aarch64_insn_get_##abbr##_value(void) \ __AARCH64_INSN_FUNCS(adr, 0x9F000000, 0x10000000) __AARCH64_INSN_FUNCS(adrp, 0x9F000000, 0x90000000) +__AARCH64_INSN_FUNCS(prfm, 0x3FC00000, 0x39800000) __AARCH64_INSN_FUNCS(prfm_lit, 0xFF000000, 0xD8000000) __AARCH64_INSN_FUNCS(str_reg, 0x3FE0EC00, 0x38206800) __AARCH64_INSN_FUNCS(ldr_reg, 0x3FE0EC00, 0x38606800) @@ -357,6 +378,11 @@ u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1, int offset, enum aarch64_insn_variant variant, enum aarch64_insn_ldst_type type); +u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg, + enum aarch64_insn_register base, + enum aarch64_insn_register state, + enum aarch64_insn_size_type size, + enum aarch64_insn_ldst_type type); u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst, enum aarch64_insn_register src, int imm, enum aarch64_insn_variant variant, @@ -397,6 +423,10 @@ u32 aarch64_insn_gen_logical_shifted_reg(enum aarch64_insn_register dst, int shift, enum aarch64_insn_variant variant, enum aarch64_insn_logic_type type); +u32 aarch64_insn_gen_prefetch(enum aarch64_insn_register base, + enum aarch64_insn_prfm_type type, + enum aarch64_insn_prfm_target target, + enum aarch64_insn_prfm_policy policy); s32 aarch64_get_branch_offset(u32 insn); u32 aarch64_set_branch_offset(u32 insn, s32 offset); diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h index 04744dc5fb61..e17f0529a882 100644 --- a/arch/arm64/include/asm/kexec.h +++ b/arch/arm64/include/asm/kexec.h @@ -40,9 +40,59 @@ static inline void crash_setup_regs(struct pt_regs *newregs, struct pt_regs *oldregs) { - /* Empty routine needed to avoid build errors. */ + if (oldregs) { + memcpy(newregs, oldregs, sizeof(*newregs)); + } else { + u64 tmp1, tmp2; + + __asm__ __volatile__ ( + "stp x0, x1, [%2, #16 * 0]\n" + "stp x2, x3, [%2, #16 * 1]\n" + "stp x4, x5, [%2, #16 * 2]\n" + "stp x6, x7, [%2, #16 * 3]\n" + "stp x8, x9, [%2, #16 * 4]\n" + "stp x10, x11, [%2, #16 * 5]\n" + "stp x12, x13, [%2, #16 * 6]\n" + "stp x14, x15, [%2, #16 * 7]\n" + "stp x16, x17, [%2, #16 * 8]\n" + "stp x18, x19, [%2, #16 * 9]\n" + "stp x20, x21, [%2, #16 * 10]\n" + "stp x22, x23, [%2, #16 * 11]\n" + "stp x24, x25, [%2, #16 * 12]\n" + "stp x26, x27, [%2, #16 * 13]\n" + "stp x28, x29, [%2, #16 * 14]\n" + "mov %0, sp\n" + "stp x30, %0, [%2, #16 * 15]\n" + + "/* faked current PSTATE */\n" + "mrs %0, CurrentEL\n" + "mrs %1, SPSEL\n" + "orr %0, %0, %1\n" + "mrs %1, DAIF\n" + "orr %0, %0, %1\n" + "mrs %1, NZCV\n" + "orr %0, %0, %1\n" + /* pc */ + "adr %1, 1f\n" + "1:\n" + "stp %1, %0, [%2, #16 * 16]\n" + : "=&r" (tmp1), "=&r" (tmp2) + : "r" (newregs) + : "memory" + ); + } } +#if defined(CONFIG_KEXEC_CORE) && defined(CONFIG_HIBERNATION) +extern bool crash_is_nosave(unsigned long pfn); +extern void crash_prepare_suspend(void); +extern void crash_post_resume(void); +#else +static inline bool crash_is_nosave(unsigned long pfn) {return false; } +static inline void crash_prepare_suspend(void) {} +static inline void crash_post_resume(void) {} +#endif + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index ec3553eb9349..26a64d0f9ab9 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -28,7 +28,7 @@ #define ARM_EXCEPTION_EL1_SERROR 1 #define ARM_EXCEPTION_TRAP 2 /* The hyp-stub will return this for any kvm_call_hyp() call */ -#define ARM_EXCEPTION_HYP_GONE 3 +#define ARM_EXCEPTION_HYP_GONE HVC_STUB_ERR #define KVM_ARM64_DEBUG_DIRTY_SHIFT 0 #define KVM_ARM64_DEBUG_DIRTY (1 << KVM_ARM64_DEBUG_DIRTY_SHIFT) @@ -47,7 +47,6 @@ struct kvm_vcpu; extern char __kvm_hyp_init[]; extern char __kvm_hyp_init_end[]; -extern char __kvm_hyp_reset[]; extern char __kvm_hyp_vector[]; @@ -59,6 +58,8 @@ extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu); extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); extern u64 __vgic_v3_get_ich_vtr_el2(void); +extern u64 __vgic_v3_read_vmcr(void); +extern void __vgic_v3_write_vmcr(u32 vmcr); extern void __vgic_v3_init_lrs(void); extern u32 __kvm_get_mdcr_el2(void); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index f21fd3894370..5e19165c5fa8 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -30,9 +30,7 @@ #define __KVM_HAVE_ARCH_INTC_INITIALIZED -#define KVM_USER_MEM_SLOTS 32 -#define KVM_PRIVATE_MEM_SLOTS 4 -#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 +#define KVM_USER_MEM_SLOTS 512 #define KVM_HALT_POLL_NS_DEFAULT 500000 #include <kvm/arm_vgic.h> @@ -43,7 +41,7 @@ #define KVM_VCPU_MAX_FEATURES 4 -#define KVM_REQ_VCPU_EXIT 8 +#define KVM_REQ_VCPU_EXIT (8 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) int __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); @@ -363,13 +361,6 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, __kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr); } -void __kvm_hyp_teardown(void); -static inline void __cpu_reset_hyp_mode(unsigned long vector_ptr, - phys_addr_t phys_idmap_start) -{ - kvm_call_hyp(__kvm_hyp_teardown, phys_idmap_start); -} - static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index ed1246014901..a89cc22abadc 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -108,7 +108,7 @@ alternative_else_nop_endif #else #include <asm/pgalloc.h> -#include <asm/cachetype.h> +#include <asm/cache.h> #include <asm/cacheflush.h> #include <asm/mmu_context.h> #include <asm/pgtable.h> @@ -155,7 +155,6 @@ void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu); phys_addr_t kvm_mmu_get_httbr(void); phys_addr_t kvm_get_idmap_vector(void); -phys_addr_t kvm_get_idmap_start(void); int kvm_mmu_init(void); void kvm_clear_hyp_idmap(void); @@ -242,12 +241,13 @@ static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, kvm_flush_dcache_to_poc(va, size); - if (!icache_is_aliasing()) { /* PIPT */ - flush_icache_range((unsigned long)va, - (unsigned long)va + size); - } else if (!icache_is_aivivt()) { /* non ASID-tagged VIVT */ + if (icache_is_aliasing()) { /* any kind of VIPT cache */ __flush_icache_all(); + } else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) { + /* PIPT or VPIPT at EL2 (see comment in __kvm_tlb_flush_vmid_ipa) */ + flush_icache_range((unsigned long)va, + (unsigned long)va + size); } } @@ -307,7 +307,7 @@ static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd, static inline unsigned int kvm_get_vmid_bits(void) { - int reg = read_system_reg(SYS_ID_AA64MMFR1_EL1); + int reg = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8; } diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 47619411f0ff..5468c834b072 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -37,5 +37,6 @@ extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot, bool page_mappings_only); extern void *fixmap_remap_fdt(phys_addr_t dt_phys); +extern void mark_linear_text_alias_ro(void); #endif diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index 06ff7fd9e81f..d57693f5d4ec 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -17,26 +17,26 @@ #define __ASM_MODULE_H #include <asm-generic/module.h> -#include <asm/memory.h> #define MODULE_ARCH_VERMAGIC "aarch64" #ifdef CONFIG_ARM64_MODULE_PLTS -struct mod_arch_specific { +struct mod_plt_sec { struct elf64_shdr *plt; int plt_num_entries; int plt_max_entries; }; + +struct mod_arch_specific { + struct mod_plt_sec core; + struct mod_plt_sec init; +}; #endif -u64 module_emit_plt_entry(struct module *mod, const Elf64_Rela *rela, +u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela, Elf64_Sym *sym); #ifdef CONFIG_RANDOMIZE_BASE -#ifdef CONFIG_MODVERSIONS -#define ARCH_RELOCATES_KCRCTAB -#define reloc_start (kimage_vaddr - KIMAGE_VADDR) -#endif extern u64 module_alloc_base; #else #define module_alloc_base ((u64)_etext - MODULES_VSIZE) diff --git a/arch/arm64/include/asm/pgtable-types.h b/arch/arm64/include/asm/pgtable-types.h index 69b2fd41503c..345a072b5856 100644 --- a/arch/arm64/include/asm/pgtable-types.h +++ b/arch/arm64/include/asm/pgtable-types.h @@ -55,9 +55,13 @@ typedef struct { pteval_t pgprot; } pgprot_t; #define __pgprot(x) ((pgprot_t) { (x) } ) #if CONFIG_PGTABLE_LEVELS == 2 +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> #elif CONFIG_PGTABLE_LEVELS == 3 +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopud.h> +#elif CONFIG_PGTABLE_LEVELS == 4 +#include <asm-generic/5level-fixup.h> #endif #endif /* __ASM_PGTABLE_TYPES_H */ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 0eef6064bf3b..c213fdbd056c 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -74,6 +74,16 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define pte_user_exec(pte) (!(pte_val(pte) & PTE_UXN)) #define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT)) +#define pte_cont_addr_end(addr, end) \ +({ unsigned long __boundary = ((addr) + CONT_PTE_SIZE) & CONT_PTE_MASK; \ + (__boundary - 1 < (end) - 1) ? __boundary : (end); \ +}) + +#define pmd_cont_addr_end(addr, end) \ +({ unsigned long __boundary = ((addr) + CONT_PMD_SIZE) & CONT_PMD_MASK; \ + (__boundary - 1 < (end) - 1) ? __boundary : (end); \ +}) + #ifdef CONFIG_ARM64_HW_AFDBM #define pte_hw_dirty(pte) (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY)) #else diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index c97b8bd2acba..9428b93fefb2 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -50,6 +50,7 @@ extern phys_addr_t arm64_dma_phys_limit; #define ARCH_LOW_ADDRESS_LIMIT (arm64_dma_phys_limit - 1) struct debug_info { +#ifdef CONFIG_HAVE_HW_BREAKPOINT /* Have we suspended stepping by a debugger? */ int suspended_step; /* Allow breakpoints and watchpoints to be disabled for this thread. */ @@ -58,6 +59,7 @@ struct debug_info { /* Hardware breakpoints pinned to this task. */ struct perf_event *hbp_break[ARM_MAX_BRP]; struct perf_event *hbp_watch[ARM_MAX_WRP]; +#endif }; struct cpu_context { diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h index 4e7e7067afdb..941267caa39c 100644 --- a/arch/arm64/include/asm/sections.h +++ b/arch/arm64/include/asm/sections.h @@ -24,6 +24,8 @@ extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[]; extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; extern char __hyp_text_start[], __hyp_text_end[]; extern char __idmap_text_start[], __idmap_text_end[]; +extern char __initdata_begin[], __initdata_end[]; +extern char __inittext_begin[], __inittext_end[]; extern char __irqentry_text_start[], __irqentry_text_end[]; extern char __mmuoff_data_start[], __mmuoff_data_end[]; diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index d050d720a1b4..55f08c5acfad 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -148,6 +148,9 @@ static inline void cpu_panic_kernel(void) */ bool cpus_are_stuck_in_kernel(void); +extern void smp_send_crash_stop(void); +extern bool smp_crash_stop_failed(void); + #endif /* ifndef __ASSEMBLY__ */ #endif /* ifndef __ASM_SMP_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index ac24b6e798b1..15c142ce991c 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -48,6 +48,8 @@ ((crn) << CRn_shift) | ((crm) << CRm_shift) | \ ((op2) << Op2_shift)) +#define sys_insn sys_reg + #define sys_reg_Op0(id) (((id) >> Op0_shift) & Op0_mask) #define sys_reg_Op1(id) (((id) >> Op1_shift) & Op1_mask) #define sys_reg_CRn(id) (((id) >> CRn_shift) & CRn_mask) @@ -81,6 +83,41 @@ #endif /* CONFIG_BROKEN_GAS_INST */ +#define REG_PSTATE_PAN_IMM sys_reg(0, 0, 4, 0, 4) +#define REG_PSTATE_UAO_IMM sys_reg(0, 0, 4, 0, 3) + +#define SET_PSTATE_PAN(x) __emit_inst(0xd5000000 | REG_PSTATE_PAN_IMM | \ + (!!x)<<8 | 0x1f) +#define SET_PSTATE_UAO(x) __emit_inst(0xd5000000 | REG_PSTATE_UAO_IMM | \ + (!!x)<<8 | 0x1f) + +#define SYS_DC_ISW sys_insn(1, 0, 7, 6, 2) +#define SYS_DC_CSW sys_insn(1, 0, 7, 10, 2) +#define SYS_DC_CISW sys_insn(1, 0, 7, 14, 2) + +#define SYS_OSDTRRX_EL1 sys_reg(2, 0, 0, 0, 2) +#define SYS_MDCCINT_EL1 sys_reg(2, 0, 0, 2, 0) +#define SYS_MDSCR_EL1 sys_reg(2, 0, 0, 2, 2) +#define SYS_OSDTRTX_EL1 sys_reg(2, 0, 0, 3, 2) +#define SYS_OSECCR_EL1 sys_reg(2, 0, 0, 6, 2) +#define SYS_DBGBVRn_EL1(n) sys_reg(2, 0, 0, n, 4) +#define SYS_DBGBCRn_EL1(n) sys_reg(2, 0, 0, n, 5) +#define SYS_DBGWVRn_EL1(n) sys_reg(2, 0, 0, n, 6) +#define SYS_DBGWCRn_EL1(n) sys_reg(2, 0, 0, n, 7) +#define SYS_MDRAR_EL1 sys_reg(2, 0, 1, 0, 0) +#define SYS_OSLAR_EL1 sys_reg(2, 0, 1, 0, 4) +#define SYS_OSLSR_EL1 sys_reg(2, 0, 1, 1, 4) +#define SYS_OSDLR_EL1 sys_reg(2, 0, 1, 3, 4) +#define SYS_DBGPRCR_EL1 sys_reg(2, 0, 1, 4, 4) +#define SYS_DBGCLAIMSET_EL1 sys_reg(2, 0, 7, 8, 6) +#define SYS_DBGCLAIMCLR_EL1 sys_reg(2, 0, 7, 9, 6) +#define SYS_DBGAUTHSTATUS_EL1 sys_reg(2, 0, 7, 14, 6) +#define SYS_MDCCSR_EL0 sys_reg(2, 3, 0, 1, 0) +#define SYS_DBGDTR_EL0 sys_reg(2, 3, 0, 4, 0) +#define SYS_DBGDTRRX_EL0 sys_reg(2, 3, 0, 5, 0) +#define SYS_DBGDTRTX_EL0 sys_reg(2, 3, 0, 5, 0) +#define SYS_DBGVCR32_EL2 sys_reg(2, 4, 0, 7, 0) + #define SYS_MIDR_EL1 sys_reg(3, 0, 0, 0, 0) #define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5) #define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6) @@ -88,6 +125,7 @@ #define SYS_ID_PFR0_EL1 sys_reg(3, 0, 0, 1, 0) #define SYS_ID_PFR1_EL1 sys_reg(3, 0, 0, 1, 1) #define SYS_ID_DFR0_EL1 sys_reg(3, 0, 0, 1, 2) +#define SYS_ID_AFR0_EL1 sys_reg(3, 0, 0, 1, 3) #define SYS_ID_MMFR0_EL1 sys_reg(3, 0, 0, 1, 4) #define SYS_ID_MMFR1_EL1 sys_reg(3, 0, 0, 1, 5) #define SYS_ID_MMFR2_EL1 sys_reg(3, 0, 0, 1, 6) @@ -118,17 +156,127 @@ #define SYS_ID_AA64MMFR1_EL1 sys_reg(3, 0, 0, 7, 1) #define SYS_ID_AA64MMFR2_EL1 sys_reg(3, 0, 0, 7, 2) -#define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0) +#define SYS_SCTLR_EL1 sys_reg(3, 0, 1, 0, 0) +#define SYS_ACTLR_EL1 sys_reg(3, 0, 1, 0, 1) +#define SYS_CPACR_EL1 sys_reg(3, 0, 1, 0, 2) + +#define SYS_TTBR0_EL1 sys_reg(3, 0, 2, 0, 0) +#define SYS_TTBR1_EL1 sys_reg(3, 0, 2, 0, 1) +#define SYS_TCR_EL1 sys_reg(3, 0, 2, 0, 2) + +#define SYS_ICC_PMR_EL1 sys_reg(3, 0, 4, 6, 0) + +#define SYS_AFSR0_EL1 sys_reg(3, 0, 5, 1, 0) +#define SYS_AFSR1_EL1 sys_reg(3, 0, 5, 1, 1) +#define SYS_ESR_EL1 sys_reg(3, 0, 5, 2, 0) +#define SYS_FAR_EL1 sys_reg(3, 0, 6, 0, 0) +#define SYS_PAR_EL1 sys_reg(3, 0, 7, 4, 0) + +#define SYS_PMINTENSET_EL1 sys_reg(3, 0, 9, 14, 1) +#define SYS_PMINTENCLR_EL1 sys_reg(3, 0, 9, 14, 2) + +#define SYS_MAIR_EL1 sys_reg(3, 0, 10, 2, 0) +#define SYS_AMAIR_EL1 sys_reg(3, 0, 10, 3, 0) + +#define SYS_VBAR_EL1 sys_reg(3, 0, 12, 0, 0) + +#define SYS_ICC_DIR_EL1 sys_reg(3, 0, 12, 11, 1) +#define SYS_ICC_SGI1R_EL1 sys_reg(3, 0, 12, 11, 5) +#define SYS_ICC_IAR1_EL1 sys_reg(3, 0, 12, 12, 0) +#define SYS_ICC_EOIR1_EL1 sys_reg(3, 0, 12, 12, 1) +#define SYS_ICC_BPR1_EL1 sys_reg(3, 0, 12, 12, 3) +#define SYS_ICC_CTLR_EL1 sys_reg(3, 0, 12, 12, 4) +#define SYS_ICC_SRE_EL1 sys_reg(3, 0, 12, 12, 5) +#define SYS_ICC_GRPEN1_EL1 sys_reg(3, 0, 12, 12, 7) + +#define SYS_CONTEXTIDR_EL1 sys_reg(3, 0, 13, 0, 1) +#define SYS_TPIDR_EL1 sys_reg(3, 0, 13, 0, 4) + +#define SYS_CNTKCTL_EL1 sys_reg(3, 0, 14, 1, 0) + +#define SYS_CLIDR_EL1 sys_reg(3, 1, 0, 0, 1) +#define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7) + +#define SYS_CSSELR_EL1 sys_reg(3, 2, 0, 0, 0) + #define SYS_CTR_EL0 sys_reg(3, 3, 0, 0, 1) #define SYS_DCZID_EL0 sys_reg(3, 3, 0, 0, 7) -#define REG_PSTATE_PAN_IMM sys_reg(0, 0, 4, 0, 4) -#define REG_PSTATE_UAO_IMM sys_reg(0, 0, 4, 0, 3) +#define SYS_PMCR_EL0 sys_reg(3, 3, 9, 12, 0) +#define SYS_PMCNTENSET_EL0 sys_reg(3, 3, 9, 12, 1) +#define SYS_PMCNTENCLR_EL0 sys_reg(3, 3, 9, 12, 2) +#define SYS_PMOVSCLR_EL0 sys_reg(3, 3, 9, 12, 3) +#define SYS_PMSWINC_EL0 sys_reg(3, 3, 9, 12, 4) +#define SYS_PMSELR_EL0 sys_reg(3, 3, 9, 12, 5) +#define SYS_PMCEID0_EL0 sys_reg(3, 3, 9, 12, 6) +#define SYS_PMCEID1_EL0 sys_reg(3, 3, 9, 12, 7) +#define SYS_PMCCNTR_EL0 sys_reg(3, 3, 9, 13, 0) +#define SYS_PMXEVTYPER_EL0 sys_reg(3, 3, 9, 13, 1) +#define SYS_PMXEVCNTR_EL0 sys_reg(3, 3, 9, 13, 2) +#define SYS_PMUSERENR_EL0 sys_reg(3, 3, 9, 14, 0) +#define SYS_PMOVSSET_EL0 sys_reg(3, 3, 9, 14, 3) + +#define SYS_TPIDR_EL0 sys_reg(3, 3, 13, 0, 2) +#define SYS_TPIDRRO_EL0 sys_reg(3, 3, 13, 0, 3) -#define SET_PSTATE_PAN(x) __emit_inst(0xd5000000 | REG_PSTATE_PAN_IMM | \ - (!!x)<<8 | 0x1f) -#define SET_PSTATE_UAO(x) __emit_inst(0xd5000000 | REG_PSTATE_UAO_IMM | \ - (!!x)<<8 | 0x1f) +#define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0) + +#define SYS_CNTP_TVAL_EL0 sys_reg(3, 3, 14, 2, 0) +#define SYS_CNTP_CTL_EL0 sys_reg(3, 3, 14, 2, 1) +#define SYS_CNTP_CVAL_EL0 sys_reg(3, 3, 14, 2, 2) + +#define __PMEV_op2(n) ((n) & 0x7) +#define __CNTR_CRm(n) (0x8 | (((n) >> 3) & 0x3)) +#define SYS_PMEVCNTRn_EL0(n) sys_reg(3, 3, 14, __CNTR_CRm(n), __PMEV_op2(n)) +#define __TYPER_CRm(n) (0xc | (((n) >> 3) & 0x3)) +#define SYS_PMEVTYPERn_EL0(n) sys_reg(3, 3, 14, __TYPER_CRm(n), __PMEV_op2(n)) + +#define SYS_PMCCFILTR_EL0 sys_reg (3, 3, 14, 15, 7) + +#define SYS_DACR32_EL2 sys_reg(3, 4, 3, 0, 0) +#define SYS_IFSR32_EL2 sys_reg(3, 4, 5, 0, 1) +#define SYS_FPEXC32_EL2 sys_reg(3, 4, 5, 3, 0) + +#define __SYS__AP0Rx_EL2(x) sys_reg(3, 4, 12, 8, x) +#define SYS_ICH_AP0R0_EL2 __SYS__AP0Rx_EL2(0) +#define SYS_ICH_AP0R1_EL2 __SYS__AP0Rx_EL2(1) +#define SYS_ICH_AP0R2_EL2 __SYS__AP0Rx_EL2(2) +#define SYS_ICH_AP0R3_EL2 __SYS__AP0Rx_EL2(3) + +#define __SYS__AP1Rx_EL2(x) sys_reg(3, 4, 12, 9, x) +#define SYS_ICH_AP1R0_EL2 __SYS__AP1Rx_EL2(0) +#define SYS_ICH_AP1R1_EL2 __SYS__AP1Rx_EL2(1) +#define SYS_ICH_AP1R2_EL2 __SYS__AP1Rx_EL2(2) +#define SYS_ICH_AP1R3_EL2 __SYS__AP1Rx_EL2(3) + +#define SYS_ICH_VSEIR_EL2 sys_reg(3, 4, 12, 9, 4) +#define SYS_ICC_SRE_EL2 sys_reg(3, 4, 12, 9, 5) +#define SYS_ICH_HCR_EL2 sys_reg(3, 4, 12, 11, 0) +#define SYS_ICH_VTR_EL2 sys_reg(3, 4, 12, 11, 1) +#define SYS_ICH_MISR_EL2 sys_reg(3, 4, 12, 11, 2) +#define SYS_ICH_EISR_EL2 sys_reg(3, 4, 12, 11, 3) +#define SYS_ICH_ELSR_EL2 sys_reg(3, 4, 12, 11, 5) +#define SYS_ICH_VMCR_EL2 sys_reg(3, 4, 12, 11, 7) + +#define __SYS__LR0_EL2(x) sys_reg(3, 4, 12, 12, x) +#define SYS_ICH_LR0_EL2 __SYS__LR0_EL2(0) +#define SYS_ICH_LR1_EL2 __SYS__LR0_EL2(1) +#define SYS_ICH_LR2_EL2 __SYS__LR0_EL2(2) +#define SYS_ICH_LR3_EL2 __SYS__LR0_EL2(3) +#define SYS_ICH_LR4_EL2 __SYS__LR0_EL2(4) +#define SYS_ICH_LR5_EL2 __SYS__LR0_EL2(5) +#define SYS_ICH_LR6_EL2 __SYS__LR0_EL2(6) +#define SYS_ICH_LR7_EL2 __SYS__LR0_EL2(7) + +#define __SYS__LR8_EL2(x) sys_reg(3, 4, 12, 13, x) +#define SYS_ICH_LR8_EL2 __SYS__LR8_EL2(0) +#define SYS_ICH_LR9_EL2 __SYS__LR8_EL2(1) +#define SYS_ICH_LR10_EL2 __SYS__LR8_EL2(2) +#define SYS_ICH_LR11_EL2 __SYS__LR8_EL2(3) +#define SYS_ICH_LR12_EL2 __SYS__LR8_EL2(4) +#define SYS_ICH_LR13_EL2 __SYS__LR8_EL2(5) +#define SYS_ICH_LR14_EL2 __SYS__LR8_EL2(6) +#define SYS_ICH_LR15_EL2 __SYS__LR8_EL2(7) /* Common SCTLR_ELx flags. */ #define SCTLR_ELx_EE (1 << 25) @@ -156,6 +304,11 @@ #define ID_AA64ISAR0_SHA1_SHIFT 8 #define ID_AA64ISAR0_AES_SHIFT 4 +/* id_aa64isar1 */ +#define ID_AA64ISAR1_LRCPC_SHIFT 20 +#define ID_AA64ISAR1_FCMA_SHIFT 16 +#define ID_AA64ISAR1_JSCVT_SHIFT 12 + /* id_aa64pfr0 */ #define ID_AA64PFR0_GIC_SHIFT 24 #define ID_AA64PFR0_ASIMD_SHIFT 20 diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 5308d696311b..ba497172610d 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -28,38 +28,12 @@ #include <linux/bitops.h> #include <linux/kasan-checks.h> #include <linux/string.h> -#include <linux/thread_info.h> #include <asm/cpufeature.h> #include <asm/ptrace.h> -#include <asm/errno.h> #include <asm/memory.h> #include <asm/compiler.h> - -#define VERIFY_READ 0 -#define VERIFY_WRITE 1 - -/* - * The exception table consists of pairs of relative offsets: the first - * is the relative offset to an instruction that is allowed to fault, - * and the second is the relative offset at which the program should - * continue. No registers are modified, so it is entirely up to the - * continuation code to figure out what to do. - * - * All the routines below use bits of fixup code that are out of line - * with the main instruction path. This means when everything is well, - * we don't even have to jump over them. Further, they do not intrude - * on our cache or tlb entries. - */ - -struct exception_table_entry -{ - int insn, fixup; -}; - -#define ARCH_HAS_RELATIVE_EXTABLE - -extern int fixup_exception(struct pt_regs *regs); +#include <asm/extable.h> #define KERNEL_DS (-1UL) #define get_ds() (KERNEL_DS) @@ -357,58 +331,13 @@ do { \ }) extern unsigned long __must_check __arch_copy_from_user(void *to, const void __user *from, unsigned long n); +#define raw_copy_from_user __arch_copy_from_user extern unsigned long __must_check __arch_copy_to_user(void __user *to, const void *from, unsigned long n); -extern unsigned long __must_check __copy_in_user(void __user *to, const void __user *from, unsigned long n); +#define raw_copy_to_user __arch_copy_to_user +extern unsigned long __must_check raw_copy_in_user(void __user *to, const void __user *from, unsigned long n); extern unsigned long __must_check __clear_user(void __user *addr, unsigned long n); - -static inline unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n) -{ - kasan_check_write(to, n); - check_object_size(to, n, false); - return __arch_copy_from_user(to, from, n); -} - -static inline unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n) -{ - kasan_check_read(from, n); - check_object_size(from, n, true); - return __arch_copy_to_user(to, from, n); -} - -static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) -{ - unsigned long res = n; - kasan_check_write(to, n); - check_object_size(to, n, false); - - if (access_ok(VERIFY_READ, from, n)) { - res = __arch_copy_from_user(to, from, n); - } - if (unlikely(res)) - memset(to + (n - res), 0, res); - return res; -} - -static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n) -{ - kasan_check_read(from, n); - check_object_size(from, n, true); - - if (access_ok(VERIFY_WRITE, to, n)) { - n = __arch_copy_to_user(to, from, n); - } - return n; -} - -static inline unsigned long __must_check copy_in_user(void __user *to, const void __user *from, unsigned long n) -{ - if (access_ok(VERIFY_READ, from, n) && access_ok(VERIFY_WRITE, to, n)) - n = __copy_in_user(to, from, n); - return n; -} - -#define __copy_to_user_inatomic __copy_to_user -#define __copy_from_user_inatomic __copy_from_user +#define INLINE_COPY_TO_USER +#define INLINE_COPY_FROM_USER static inline unsigned long __must_check clear_user(void __user *to, unsigned long n) { diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index e78ac26324bd..a0baa9af5487 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -14,7 +14,6 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #ifdef CONFIG_COMPAT -#define __ARCH_WANT_COMPAT_SYS_GETDENTS64 #define __ARCH_WANT_COMPAT_STAT64 #define __ARCH_WANT_SYS_GETHOSTNAME #define __ARCH_WANT_SYS_PAUSE @@ -44,7 +43,7 @@ #define __ARM_NR_compat_cacheflush (__ARM_NR_COMPAT_BASE+2) #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE+5) -#define __NR_compat_syscalls 394 +#define __NR_compat_syscalls 398 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index b7e8ef16ff0d..ef292160748c 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -456,7 +456,7 @@ __SYSCALL(__NR_setfsuid32, sys_setfsuid) #define __NR_setfsgid32 216 __SYSCALL(__NR_setfsgid32, sys_setfsgid) #define __NR_getdents64 217 -__SYSCALL(__NR_getdents64, compat_sys_getdents64) +__SYSCALL(__NR_getdents64, sys_getdents64) #define __NR_pivot_root 218 __SYSCALL(__NR_pivot_root, sys_pivot_root) #define __NR_mincore 219 @@ -809,6 +809,14 @@ __SYSCALL(__NR_copy_file_range, sys_copy_file_range) __SYSCALL(__NR_preadv2, compat_sys_preadv2) #define __NR_pwritev2 393 __SYSCALL(__NR_pwritev2, compat_sys_pwritev2) +#define __NR_pkey_mprotect 394 +__SYSCALL(__NR_pkey_mprotect, sys_pkey_mprotect) +#define __NR_pkey_alloc 395 +__SYSCALL(__NR_pkey_alloc, sys_pkey_alloc) +#define __NR_pkey_free 396 +__SYSCALL(__NR_pkey_free, sys_pkey_free) +#define __NR_statx 397 +__SYSCALL(__NR_statx, sys_statx) /* * Please add new compat syscalls above this comment and update diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 439f6b5d31f6..c5f89442785c 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -19,25 +19,38 @@ #define __ASM__VIRT_H /* - * The arm64 hcall implementation uses x0 to specify the hcall type. A value - * less than 0xfff indicates a special hcall, such as get/set vector. - * Any other value is used as a pointer to the function to call. + * The arm64 hcall implementation uses x0 to specify the hcall + * number. A value less than HVC_STUB_HCALL_NR indicates a special + * hcall, such as set vector. Any other value is handled in a + * hypervisor specific way. + * + * The hypercall is allowed to clobber any of the caller-saved + * registers (x0-x18), so it is advisable to use it through the + * indirection of a function call (as implemented in hyp-stub.S). */ -/* HVC_GET_VECTORS - Return the value of the vbar_el2 register. */ -#define HVC_GET_VECTORS 0 - /* * HVC_SET_VECTORS - Set the value of the vbar_el2 register. * * @x1: Physical address of the new vector table. */ -#define HVC_SET_VECTORS 1 +#define HVC_SET_VECTORS 0 /* * HVC_SOFT_RESTART - CPU soft reset, used by the cpu_soft_restart routine. */ -#define HVC_SOFT_RESTART 2 +#define HVC_SOFT_RESTART 1 + +/* + * HVC_RESET_VECTORS - Restore the vectors to the original HYP stubs + */ +#define HVC_RESET_VECTORS 2 + +/* Max number of HYP stub hypercalls */ +#define HVC_STUB_HCALL_NR 3 + +/* Error returned when an invalid stub number is passed into x0 */ +#define HVC_STUB_ERR 0xbadca11 #define BOOT_CPU_MODE_EL1 (0xe11) #define BOOT_CPU_MODE_EL2 (0xe12) @@ -61,7 +74,7 @@ extern u32 __boot_cpu_mode[2]; void __hyp_set_vectors(phys_addr_t phys_vector_base); -phys_addr_t __hyp_get_vectors(void); +void __hyp_reset_vectors(void); /* Reports the availability of HYP mode */ static inline bool is_hyp_mode_available(void) diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 61c263cba272..4e187ce2a811 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -32,5 +32,8 @@ #define HWCAP_ASIMDHP (1 << 10) #define HWCAP_CPUID (1 << 11) #define HWCAP_ASIMDRDM (1 << 12) +#define HWCAP_JSCVT (1 << 13) +#define HWCAP_FCMA (1 << 14) +#define HWCAP_LRCPC (1 << 15) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index c2860358ae3e..869ee480deed 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -39,6 +39,8 @@ #define __KVM_HAVE_IRQ_LINE #define __KVM_HAVE_READONLY_MEM +#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + #define KVM_REG_SIZE(id) \ (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) @@ -143,6 +145,8 @@ struct kvm_debug_exit_arch { #define KVM_GUESTDBG_USE_HW (1 << 17) struct kvm_sync_regs { + /* Used with KVM_CAP_ARM_USER_IRQ */ + __u64 device_irq_level; }; struct kvm_arch_memory_slot { diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 1606c6b2a280..1dcb69d3d0e5 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -50,6 +50,9 @@ arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o arm64-obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o arm64-obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o \ cpu-reset.o +arm64-obj-$(CONFIG_ARM64_RELOC_TEST) += arm64-reloc-test.o +arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o +arm64-obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-y += $(arm64-obj-y) vdso/ probes/ obj-m += $(arm64-obj-m) diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index 64d9cbd61678..e25c11e727fe 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -18,6 +18,7 @@ #include <linux/acpi.h> #include <linux/bootmem.h> #include <linux/cpumask.h> +#include <linux/efi-bgrt.h> #include <linux/init.h> #include <linux/irq.h> #include <linux/irqdomain.h> @@ -233,6 +234,8 @@ done: early_init_dt_scan_chosen_stdout(); } else { parse_spcr(earlycon_init_is_deferred); + if (IS_ENABLED(CONFIG_ACPI_BGRT)) + acpi_table_parse(ACPI_SIG_BGRT, acpi_parse_bgrt); } } diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 06d650f61da7..8840c109c5d6 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -105,11 +105,11 @@ static u32 get_alt_insn(struct alt_instr *alt, u32 *insnptr, u32 *altinsnptr) return insn; } -static void __apply_alternatives(void *alt_region) +static void __apply_alternatives(void *alt_region, bool use_linear_alias) { struct alt_instr *alt; struct alt_region *region = alt_region; - u32 *origptr, *replptr; + u32 *origptr, *replptr, *updptr; for (alt = region->begin; alt < region->end; alt++) { u32 insn; @@ -124,11 +124,12 @@ static void __apply_alternatives(void *alt_region) origptr = ALT_ORIG_PTR(alt); replptr = ALT_REPL_PTR(alt); + updptr = use_linear_alias ? (u32 *)lm_alias(origptr) : origptr; nr_inst = alt->alt_len / sizeof(insn); for (i = 0; i < nr_inst; i++) { insn = get_alt_insn(alt, origptr + i, replptr + i); - *(origptr + i) = cpu_to_le32(insn); + updptr[i] = cpu_to_le32(insn); } flush_icache_range((uintptr_t)origptr, @@ -155,7 +156,7 @@ static int __apply_alternatives_multi_stop(void *unused) isb(); } else { BUG_ON(patched); - __apply_alternatives(®ion); + __apply_alternatives(®ion, true); /* Barriers provided by the cache flushing */ WRITE_ONCE(patched, 1); } @@ -176,5 +177,5 @@ void apply_alternatives(void *start, size_t length) .end = start + length, }; - __apply_alternatives(®ion); + __apply_alternatives(®ion, false); } diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c index e9c4dc9e0ada..67368c7329c0 100644 --- a/arch/arm64/kernel/arm64ksyms.c +++ b/arch/arm64/kernel/arm64ksyms.c @@ -38,7 +38,7 @@ EXPORT_SYMBOL(clear_page); EXPORT_SYMBOL(__arch_copy_from_user); EXPORT_SYMBOL(__arch_copy_to_user); EXPORT_SYMBOL(__clear_user); -EXPORT_SYMBOL(__copy_in_user); +EXPORT_SYMBOL(raw_copy_in_user); /* physical memory */ EXPORT_SYMBOL(memstart_addr); diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c index 3f2250fc391b..380f2e2fbed5 100644 --- a/arch/arm64/kernel/cacheinfo.c +++ b/arch/arm64/kernel/cacheinfo.c @@ -17,15 +17,9 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include <linux/bitops.h> #include <linux/cacheinfo.h> -#include <linux/cpu.h> -#include <linux/compiler.h> #include <linux/of.h> -#include <asm/cachetype.h> -#include <asm/processor.h> - #define MAX_CACHE_LEVEL 7 /* Max 7 level supported */ /* Ctypen, bits[3(n - 1) + 2 : 3(n - 1)], for n = 1 to 7 */ #define CLIDR_CTYPE_SHIFT(level) (3 * (level - 1)) @@ -43,43 +37,11 @@ static inline enum cache_type get_cache_type(int level) return CLIDR_CTYPE(clidr, level); } -/* - * Cache Size Selection Register(CSSELR) selects which Cache Size ID - * Register(CCSIDR) is accessible by specifying the required cache - * level and the cache type. We need to ensure that no one else changes - * CSSELR by calling this in non-preemtible context - */ -u64 __attribute_const__ cache_get_ccsidr(u64 csselr) -{ - u64 ccsidr; - - WARN_ON(preemptible()); - - write_sysreg(csselr, csselr_el1); - isb(); - ccsidr = read_sysreg(ccsidr_el1); - - return ccsidr; -} - static void ci_leaf_init(struct cacheinfo *this_leaf, enum cache_type type, unsigned int level) { - bool is_icache = type & CACHE_TYPE_INST; - u64 tmp = cache_get_ccsidr((level - 1) << 1 | is_icache); - this_leaf->level = level; this_leaf->type = type; - this_leaf->coherency_line_size = CACHE_LINESIZE(tmp); - this_leaf->number_of_sets = CACHE_NUMSETS(tmp); - this_leaf->ways_of_associativity = CACHE_ASSOCIATIVITY(tmp); - this_leaf->size = this_leaf->number_of_sets * - this_leaf->coherency_line_size * this_leaf->ways_of_associativity; - this_leaf->attributes = - ((tmp & CCSIDR_EL1_WRITE_THROUGH) ? CACHE_WRITE_THROUGH : 0) | - ((tmp & CCSIDR_EL1_WRITE_BACK) ? CACHE_WRITE_BACK : 0) | - ((tmp & CCSIDR_EL1_READ_ALLOCATE) ? CACHE_READ_ALLOCATE : 0) | - ((tmp & CCSIDR_EL1_WRITE_ALLOCATE) ? CACHE_WRITE_ALLOCATE : 0); } static int __init_cache_level(unsigned int cpu) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index f6cc67e7626e..2ed2a7657711 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -53,6 +53,13 @@ static int cpu_enable_trap_ctr_access(void *__unused) .midr_range_min = min, \ .midr_range_max = max +#define MIDR_ALL_VERSIONS(model) \ + .def_scope = SCOPE_LOCAL_CPU, \ + .matches = is_affected_midr_range, \ + .midr_model = model, \ + .midr_range_min = 0, \ + .midr_range_max = (MIDR_VARIANT_MASK | MIDR_REVISION_MASK) + const struct arm64_cpu_capabilities arm64_errata[] = { #if defined(CONFIG_ARM64_ERRATUM_826319) || \ defined(CONFIG_ARM64_ERRATUM_827319) || \ @@ -151,6 +158,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = { MIDR_CPU_VAR_REV(0, 0)), }, #endif +#ifdef CONFIG_ARM64_ERRATUM_858921 + { + /* Cortex-A73 all versions */ + .desc = "ARM erratum 858921", + .capability = ARM64_WORKAROUND_858921, + MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), + }, +#endif { } }; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index abda8e861865..94b8f7fc3310 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -97,6 +97,13 @@ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { ARM64_FTR_END, }; +static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_LRCPC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_FCMA_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_JSCVT_SHIFT, 4, 0), + ARM64_FTR_END, +}; + static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64PFR0_GIC_SHIFT, 4, 0), S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI), @@ -153,9 +160,9 @@ static const struct arm64_ftr_bits ftr_ctr[] = { /* * Linux can handle differing I-cache policies. Userspace JITs will * make use of *minLine. - * If we have differing I-cache policies, report it as the weakest - AIVIVT. + * If we have differing I-cache policies, report it as the weakest - VIPT. */ - ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_EXACT, 14, 2, ICACHE_POLICY_AIVIVT), /* L1Ip */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_EXACT, 14, 2, ICACHE_POLICY_VIPT), /* L1Ip */ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* IminLine */ ARM64_FTR_END, }; @@ -314,7 +321,7 @@ static const struct __ftr_reg_entry { /* Op1 = 0, CRn = 0, CRm = 6 */ ARM64_FTR_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0), - ARM64_FTR_REG(SYS_ID_AA64ISAR1_EL1, ftr_raz), + ARM64_FTR_REG(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1), /* Op1 = 0, CRn = 0, CRm = 7 */ ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0), @@ -585,7 +592,7 @@ void update_cpu_features(int cpu, * If we have AArch32, we care about 32-bit features for compat. * If the system doesn't support AArch32, don't update them. */ - if (id_aa64pfr0_32bit_el0(read_system_reg(SYS_ID_AA64PFR0_EL1)) && + if (id_aa64pfr0_32bit_el0(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1)) && id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) { taint |= check_update_ftr_reg(SYS_ID_DFR0_EL1, cpu, @@ -636,7 +643,7 @@ void update_cpu_features(int cpu, "Unsupported CPU feature variation.\n"); } -u64 read_system_reg(u32 id) +u64 read_sanitised_ftr_reg(u32 id) { struct arm64_ftr_reg *regp = get_arm64_ftr_reg(id); @@ -649,10 +656,10 @@ u64 read_system_reg(u32 id) case r: return read_sysreg_s(r) /* - * __raw_read_system_reg() - Used by a STARTING cpu before cpuinfo is populated. + * __read_sysreg_by_encoding() - Used by a STARTING cpu before cpuinfo is populated. * Read the system register on the current CPU */ -static u64 __raw_read_system_reg(u32 sys_id) +static u64 __read_sysreg_by_encoding(u32 sys_id) { switch (sys_id) { read_sysreg_case(SYS_ID_PFR0_EL1); @@ -709,9 +716,9 @@ has_cpuid_feature(const struct arm64_cpu_capabilities *entry, int scope) WARN_ON(scope == SCOPE_LOCAL_CPU && preemptible()); if (scope == SCOPE_SYSTEM) - val = read_system_reg(entry->sys_reg); + val = read_sanitised_ftr_reg(entry->sys_reg); else - val = __raw_read_system_reg(entry->sys_reg); + val = __read_sysreg_by_encoding(entry->sys_reg); return feature_matches(val, entry); } @@ -761,7 +768,7 @@ static bool hyp_offset_low(const struct arm64_cpu_capabilities *entry, static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unused) { - u64 pfr0 = read_system_reg(SYS_ID_AA64PFR0_EL1); + u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); return cpuid_feature_extract_signed_field(pfr0, ID_AA64PFR0_FP_SHIFT) < 0; @@ -888,6 +895,9 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_ASIMDHP), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_JSCVT), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_FCMA), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_LRCPC), {}, }; @@ -1090,20 +1100,29 @@ static void __init setup_feature_capabilities(void) * Check if the current CPU has a given feature capability. * Should be called from non-preemptible context. */ -bool this_cpu_has_cap(unsigned int cap) +static bool __this_cpu_has_cap(const struct arm64_cpu_capabilities *cap_array, + unsigned int cap) { const struct arm64_cpu_capabilities *caps; if (WARN_ON(preemptible())) return false; - for (caps = arm64_features; caps->desc; caps++) + for (caps = cap_array; caps->desc; caps++) if (caps->capability == cap && caps->matches) return caps->matches(caps, SCOPE_LOCAL_CPU); return false; } +extern const struct arm64_cpu_capabilities arm64_errata[]; + +bool this_cpu_has_cap(unsigned int cap) +{ + return (__this_cpu_has_cap(arm64_features, cap) || + __this_cpu_has_cap(arm64_errata, cap)); +} + void __init setup_cpu_features(void) { u32 cwg; diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c index 75a0f8acef66..fd691087dc9a 100644 --- a/arch/arm64/kernel/cpuidle.c +++ b/arch/arm64/kernel/cpuidle.c @@ -30,7 +30,7 @@ int arm_cpuidle_init(unsigned int cpu) } /** - * cpu_suspend() - function to enter a low-power idle state + * arm_cpuidle_suspend() - function to enter a low-power idle state * @arg: argument to pass to CPU suspend operations * * Return: 0 on success, -EOPNOTSUPP if CPU suspend hook not initialized, CPU diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 5b22c687f02a..68b1f364c515 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -15,7 +15,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include <asm/arch_timer.h> -#include <asm/cachetype.h> +#include <asm/cache.h> #include <asm/cpu.h> #include <asm/cputype.h> #include <asm/cpufeature.h> @@ -43,10 +43,10 @@ DEFINE_PER_CPU(struct cpuinfo_arm64, cpu_data); static struct cpuinfo_arm64 boot_cpu_data; static char *icache_policy_str[] = { - [ICACHE_POLICY_RESERVED] = "RESERVED/UNKNOWN", - [ICACHE_POLICY_AIVIVT] = "AIVIVT", - [ICACHE_POLICY_VIPT] = "VIPT", - [ICACHE_POLICY_PIPT] = "PIPT", + [0 ... ICACHE_POLICY_PIPT] = "RESERVED/UNKNOWN", + [ICACHE_POLICY_VIPT] = "VIPT", + [ICACHE_POLICY_PIPT] = "PIPT", + [ICACHE_POLICY_VPIPT] = "VPIPT", }; unsigned long __icache_flags; @@ -65,6 +65,9 @@ static const char *const hwcap_str[] = { "asimdhp", "cpuid", "asimdrdm", + "jscvt", + "fcma", + "lrcpc", NULL }; @@ -289,20 +292,18 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info) unsigned int cpu = smp_processor_id(); u32 l1ip = CTR_L1IP(info->reg_ctr); - if (l1ip != ICACHE_POLICY_PIPT) { - /* - * VIPT caches are non-aliasing if the VA always equals the PA - * in all bit positions that are covered by the index. This is - * the case if the size of a way (# of sets * line size) does - * not exceed PAGE_SIZE. - */ - u32 waysize = icache_get_numsets() * icache_get_linesize(); - - if (l1ip != ICACHE_POLICY_VIPT || waysize > PAGE_SIZE) - set_bit(ICACHEF_ALIASING, &__icache_flags); + switch (l1ip) { + case ICACHE_POLICY_PIPT: + break; + case ICACHE_POLICY_VPIPT: + set_bit(ICACHEF_VPIPT, &__icache_flags); + break; + default: + /* Fallthrough */ + case ICACHE_POLICY_VIPT: + /* Assume aliasing */ + set_bit(ICACHEF_ALIASING, &__icache_flags); } - if (l1ip == ICACHE_POLICY_AIVIVT) - set_bit(ICACHEF_AIVIVT, &__icache_flags); pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu); } diff --git a/arch/arm64/kernel/crash_dump.c b/arch/arm64/kernel/crash_dump.c new file mode 100644 index 000000000000..f46d57c31443 --- /dev/null +++ b/arch/arm64/kernel/crash_dump.c @@ -0,0 +1,71 @@ +/* + * Routines for doing kexec-based kdump + * + * Copyright (C) 2017 Linaro Limited + * Author: AKASHI Takahiro <takahiro.akashi@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/crash_dump.h> +#include <linux/errno.h> +#include <linux/io.h> +#include <linux/memblock.h> +#include <linux/uaccess.h> +#include <asm/memory.h> + +/** + * copy_oldmem_page() - copy one page from old kernel memory + * @pfn: page frame number to be copied + * @buf: buffer where the copied page is placed + * @csize: number of bytes to copy + * @offset: offset in bytes into the page + * @userbuf: if set, @buf is in a user address space + * + * This function copies one page from old kernel memory into buffer pointed by + * @buf. If @buf is in userspace, set @userbuf to %1. Returns number of bytes + * copied or negative error in case of failure. + */ +ssize_t copy_oldmem_page(unsigned long pfn, char *buf, + size_t csize, unsigned long offset, + int userbuf) +{ + void *vaddr; + + if (!csize) + return 0; + + vaddr = memremap(__pfn_to_phys(pfn), PAGE_SIZE, MEMREMAP_WB); + if (!vaddr) + return -ENOMEM; + + if (userbuf) { + if (copy_to_user((char __user *)buf, vaddr + offset, csize)) { + memunmap(vaddr); + return -EFAULT; + } + } else { + memcpy(buf, vaddr + offset, csize); + } + + memunmap(vaddr); + + return csize; +} + +/** + * elfcorehdr_read - read from ELF core header + * @buf: buffer where the data is placed + * @csize: number of bytes to read + * @ppos: address in the memory + * + * This function reads @count bytes from elf core header which exists + * on crash dump kernel's memory. + */ +ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos) +{ + memcpy(buf, phys_to_virt((phys_addr_t)*ppos), count); + return count; +} diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 32913567da08..d618e25c3de1 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -36,7 +36,7 @@ /* Determine debug architecture. */ u8 debug_monitors_arch(void) { - return cpuid_feature_extract_unsigned_field(read_system_reg(SYS_ID_AA64DFR0_EL1), + return cpuid_feature_extract_unsigned_field(read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1), ID_AA64DFR0_DEBUGVER_SHIFT); } diff --git a/arch/arm64/kernel/efi-header.S b/arch/arm64/kernel/efi-header.S new file mode 100644 index 000000000000..613fc3000677 --- /dev/null +++ b/arch/arm64/kernel/efi-header.S @@ -0,0 +1,155 @@ +/* + * Copyright (C) 2013 - 2017 Linaro, Ltd. + * Copyright (C) 2013, 2014 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/pe.h> +#include <linux/sizes.h> + + .macro __EFI_PE_HEADER + .long PE_MAGIC +coff_header: + .short IMAGE_FILE_MACHINE_ARM64 // Machine + .short section_count // NumberOfSections + .long 0 // TimeDateStamp + .long 0 // PointerToSymbolTable + .long 0 // NumberOfSymbols + .short section_table - optional_header // SizeOfOptionalHeader + .short IMAGE_FILE_DEBUG_STRIPPED | \ + IMAGE_FILE_EXECUTABLE_IMAGE | \ + IMAGE_FILE_LINE_NUMS_STRIPPED // Characteristics + +optional_header: + .short PE_OPT_MAGIC_PE32PLUS // PE32+ format + .byte 0x02 // MajorLinkerVersion + .byte 0x14 // MinorLinkerVersion + .long __initdata_begin - efi_header_end // SizeOfCode + .long __pecoff_data_size // SizeOfInitializedData + .long 0 // SizeOfUninitializedData + .long __efistub_entry - _head // AddressOfEntryPoint + .long efi_header_end - _head // BaseOfCode + +extra_header_fields: + .quad 0 // ImageBase + .long SZ_4K // SectionAlignment + .long PECOFF_FILE_ALIGNMENT // FileAlignment + .short 0 // MajorOperatingSystemVersion + .short 0 // MinorOperatingSystemVersion + .short 0 // MajorImageVersion + .short 0 // MinorImageVersion + .short 0 // MajorSubsystemVersion + .short 0 // MinorSubsystemVersion + .long 0 // Win32VersionValue + + .long _end - _head // SizeOfImage + + // Everything before the kernel image is considered part of the header + .long efi_header_end - _head // SizeOfHeaders + .long 0 // CheckSum + .short IMAGE_SUBSYSTEM_EFI_APPLICATION // Subsystem + .short 0 // DllCharacteristics + .quad 0 // SizeOfStackReserve + .quad 0 // SizeOfStackCommit + .quad 0 // SizeOfHeapReserve + .quad 0 // SizeOfHeapCommit + .long 0 // LoaderFlags + .long (section_table - .) / 8 // NumberOfRvaAndSizes + + .quad 0 // ExportTable + .quad 0 // ImportTable + .quad 0 // ResourceTable + .quad 0 // ExceptionTable + .quad 0 // CertificationTable + .quad 0 // BaseRelocationTable + +#ifdef CONFIG_DEBUG_EFI + .long efi_debug_table - _head // DebugTable + .long efi_debug_table_size +#endif + + // Section table +section_table: + .ascii ".text\0\0\0" + .long __initdata_begin - efi_header_end // VirtualSize + .long efi_header_end - _head // VirtualAddress + .long __initdata_begin - efi_header_end // SizeOfRawData + .long efi_header_end - _head // PointerToRawData + + .long 0 // PointerToRelocations + .long 0 // PointerToLineNumbers + .short 0 // NumberOfRelocations + .short 0 // NumberOfLineNumbers + .long IMAGE_SCN_CNT_CODE | \ + IMAGE_SCN_MEM_READ | \ + IMAGE_SCN_MEM_EXECUTE // Characteristics + + .ascii ".data\0\0\0" + .long __pecoff_data_size // VirtualSize + .long __initdata_begin - _head // VirtualAddress + .long __pecoff_data_rawsize // SizeOfRawData + .long __initdata_begin - _head // PointerToRawData + + .long 0 // PointerToRelocations + .long 0 // PointerToLineNumbers + .short 0 // NumberOfRelocations + .short 0 // NumberOfLineNumbers + .long IMAGE_SCN_CNT_INITIALIZED_DATA | \ + IMAGE_SCN_MEM_READ | \ + IMAGE_SCN_MEM_WRITE // Characteristics + + .set section_count, (. - section_table) / 40 + +#ifdef CONFIG_DEBUG_EFI + /* + * The debug table is referenced via its Relative Virtual Address (RVA), + * which is only defined for those parts of the image that are covered + * by a section declaration. Since this header is not covered by any + * section, the debug table must be emitted elsewhere. So stick it in + * the .init.rodata section instead. + * + * Note that the EFI debug entry itself may legally have a zero RVA, + * which means we can simply put it right after the section headers. + */ + __INITRODATA + + .align 2 +efi_debug_table: + // EFI_IMAGE_DEBUG_DIRECTORY_ENTRY + .long 0 // Characteristics + .long 0 // TimeDateStamp + .short 0 // MajorVersion + .short 0 // MinorVersion + .long IMAGE_DEBUG_TYPE_CODEVIEW // Type + .long efi_debug_entry_size // SizeOfData + .long 0 // RVA + .long efi_debug_entry - _head // FileOffset + + .set efi_debug_table_size, . - efi_debug_table + .previous + +efi_debug_entry: + // EFI_IMAGE_DEBUG_CODEVIEW_NB10_ENTRY + .ascii "NB10" // Signature + .long 0 // Unknown + .long 0 // Unknown2 + .long 0 // Unknown3 + + .asciz VMLINUX_PATH + + .set efi_debug_entry_size, . - efi_debug_entry +#endif + + /* + * EFI will load .text onwards at the 4k section alignment + * described in the PE/COFF header. To ensure that instruction + * sequences using an adrp and a :lo12: immediate will function + * correctly at this alignment, we must ensure that .text is + * placed at a 4k boundary in the Image to begin with. + */ + .align 12 +efi_header_end: + .endm diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 4fb6ccd886d1..973df7de7bf8 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -42,6 +42,8 @@ #include <asm/thread_info.h> #include <asm/virt.h> +#include "efi-header.S" + #define __PHYS_OFFSET (KERNEL_START - TEXT_OFFSET) #if (TEXT_OFFSET & 0xfff) != 0 @@ -89,166 +91,14 @@ _head: .quad 0 // reserved .quad 0 // reserved .quad 0 // reserved - .byte 0x41 // Magic number, "ARM\x64" - .byte 0x52 - .byte 0x4d - .byte 0x64 + .ascii "ARM\x64" // Magic number #ifdef CONFIG_EFI .long pe_header - _head // Offset to the PE header. -#else - .word 0 // reserved -#endif -#ifdef CONFIG_EFI - .align 3 pe_header: - .ascii "PE" - .short 0 -coff_header: - .short 0xaa64 // AArch64 - .short 2 // nr_sections - .long 0 // TimeDateStamp - .long 0 // PointerToSymbolTable - .long 1 // NumberOfSymbols - .short section_table - optional_header // SizeOfOptionalHeader - .short 0x206 // Characteristics. - // IMAGE_FILE_DEBUG_STRIPPED | - // IMAGE_FILE_EXECUTABLE_IMAGE | - // IMAGE_FILE_LINE_NUMS_STRIPPED -optional_header: - .short 0x20b // PE32+ format - .byte 0x02 // MajorLinkerVersion - .byte 0x14 // MinorLinkerVersion - .long _end - efi_header_end // SizeOfCode - .long 0 // SizeOfInitializedData - .long 0 // SizeOfUninitializedData - .long __efistub_entry - _head // AddressOfEntryPoint - .long efi_header_end - _head // BaseOfCode - -extra_header_fields: - .quad 0 // ImageBase - .long 0x1000 // SectionAlignment - .long PECOFF_FILE_ALIGNMENT // FileAlignment - .short 0 // MajorOperatingSystemVersion - .short 0 // MinorOperatingSystemVersion - .short 0 // MajorImageVersion - .short 0 // MinorImageVersion - .short 0 // MajorSubsystemVersion - .short 0 // MinorSubsystemVersion - .long 0 // Win32VersionValue - - .long _end - _head // SizeOfImage - - // Everything before the kernel image is considered part of the header - .long efi_header_end - _head // SizeOfHeaders - .long 0 // CheckSum - .short 0xa // Subsystem (EFI application) - .short 0 // DllCharacteristics - .quad 0 // SizeOfStackReserve - .quad 0 // SizeOfStackCommit - .quad 0 // SizeOfHeapReserve - .quad 0 // SizeOfHeapCommit - .long 0 // LoaderFlags - .long (section_table - .) / 8 // NumberOfRvaAndSizes - - .quad 0 // ExportTable - .quad 0 // ImportTable - .quad 0 // ResourceTable - .quad 0 // ExceptionTable - .quad 0 // CertificationTable - .quad 0 // BaseRelocationTable - -#ifdef CONFIG_DEBUG_EFI - .long efi_debug_table - _head // DebugTable - .long efi_debug_table_size -#endif - - // Section table -section_table: - - /* - * The EFI application loader requires a relocation section - * because EFI applications must be relocatable. This is a - * dummy section as far as we are concerned. - */ - .ascii ".reloc" - .byte 0 - .byte 0 // end of 0 padding of section name - .long 0 - .long 0 - .long 0 // SizeOfRawData - .long 0 // PointerToRawData - .long 0 // PointerToRelocations - .long 0 // PointerToLineNumbers - .short 0 // NumberOfRelocations - .short 0 // NumberOfLineNumbers - .long 0x42100040 // Characteristics (section flags) - - - .ascii ".text" - .byte 0 - .byte 0 - .byte 0 // end of 0 padding of section name - .long _end - efi_header_end // VirtualSize - .long efi_header_end - _head // VirtualAddress - .long _edata - efi_header_end // SizeOfRawData - .long efi_header_end - _head // PointerToRawData - - .long 0 // PointerToRelocations (0 for executables) - .long 0 // PointerToLineNumbers (0 for executables) - .short 0 // NumberOfRelocations (0 for executables) - .short 0 // NumberOfLineNumbers (0 for executables) - .long 0xe0500020 // Characteristics (section flags) - -#ifdef CONFIG_DEBUG_EFI - /* - * The debug table is referenced via its Relative Virtual Address (RVA), - * which is only defined for those parts of the image that are covered - * by a section declaration. Since this header is not covered by any - * section, the debug table must be emitted elsewhere. So stick it in - * the .init.rodata section instead. - * - * Note that the EFI debug entry itself may legally have a zero RVA, - * which means we can simply put it right after the section headers. - */ - __INITRODATA - - .align 2 -efi_debug_table: - // EFI_IMAGE_DEBUG_DIRECTORY_ENTRY - .long 0 // Characteristics - .long 0 // TimeDateStamp - .short 0 // MajorVersion - .short 0 // MinorVersion - .long 2 // Type == EFI_IMAGE_DEBUG_TYPE_CODEVIEW - .long efi_debug_entry_size // SizeOfData - .long 0 // RVA - .long efi_debug_entry - _head // FileOffset - - .set efi_debug_table_size, . - efi_debug_table - .previous - -efi_debug_entry: - // EFI_IMAGE_DEBUG_CODEVIEW_NB10_ENTRY - .ascii "NB10" // Signature - .long 0 // Unknown - .long 0 // Unknown2 - .long 0 // Unknown3 - - .asciz VMLINUX_PATH - - .set efi_debug_entry_size, . - efi_debug_entry -#endif - - /* - * EFI will load .text onwards at the 4k section alignment - * described in the PE/COFF header. To ensure that instruction - * sequences using an adrp and a :lo12: immediate will function - * correctly at this alignment, we must ensure that .text is - * placed at a 4k boundary in the Image to begin with. - */ - .align 12 -efi_header_end: + __EFI_PE_HEADER +#else + .long 0 // reserved #endif __INIT @@ -534,13 +384,8 @@ ENTRY(kimage_vaddr) ENTRY(el2_setup) mrs x0, CurrentEL cmp x0, #CurrentEL_EL2 - b.ne 1f - mrs x0, sctlr_el2 -CPU_BE( orr x0, x0, #(1 << 25) ) // Set the EE bit for EL2 -CPU_LE( bic x0, x0, #(1 << 25) ) // Clear the EE bit for EL2 - msr sctlr_el2, x0 - b 2f -1: mrs x0, sctlr_el1 + b.eq 1f + mrs x0, sctlr_el1 CPU_BE( orr x0, x0, #(3 << 24) ) // Set the EE and E0E bits for EL1 CPU_LE( bic x0, x0, #(3 << 24) ) // Clear the EE and E0E bits for EL1 msr sctlr_el1, x0 @@ -548,7 +393,11 @@ CPU_LE( bic x0, x0, #(3 << 24) ) // Clear the EE and E0E bits for EL1 isb ret -2: +1: mrs x0, sctlr_el2 +CPU_BE( orr x0, x0, #(1 << 25) ) // Set the EE bit for EL2 +CPU_LE( bic x0, x0, #(1 << 25) ) // Clear the EE bit for EL2 + msr sctlr_el2, x0 + #ifdef CONFIG_ARM64_VHE /* * Check for VHE being present. For the rest of the EL2 setup, @@ -594,14 +443,14 @@ set_hcr: cmp x0, #1 b.ne 3f - mrs_s x0, ICC_SRE_EL2 + mrs_s x0, SYS_ICC_SRE_EL2 orr x0, x0, #ICC_SRE_EL2_SRE // Set ICC_SRE_EL2.SRE==1 orr x0, x0, #ICC_SRE_EL2_ENABLE // Set ICC_SRE_EL2.Enable==1 - msr_s ICC_SRE_EL2, x0 + msr_s SYS_ICC_SRE_EL2, x0 isb // Make sure SRE is now set - mrs_s x0, ICC_SRE_EL2 // Read SRE back, + mrs_s x0, SYS_ICC_SRE_EL2 // Read SRE back, tbz x0, #0, 3f // and check that it sticks - msr_s ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults + msr_s SYS_ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults 3: #endif @@ -612,26 +461,6 @@ set_hcr: msr vpidr_el2, x0 msr vmpidr_el2, x1 - /* - * When VHE is not in use, early init of EL2 and EL1 needs to be - * done here. - * When VHE _is_ in use, EL1 will not be used in the host and - * requires no configuration, and all non-hyp-specific EL2 setup - * will be done via the _EL1 system register aliases in __cpu_setup. - */ - cbnz x2, 1f - - /* sctlr_el1 */ - mov x0, #0x0800 // Set/clear RES{1,0} bits -CPU_BE( movk x0, #0x33d0, lsl #16 ) // Set EE and E0E on BE systems -CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems - msr sctlr_el1, x0 - - /* Coprocessor traps. */ - mov x0, #0x33ff - msr cptr_el2, x0 // Disable copro. traps to EL2 -1: - #ifdef CONFIG_COMPAT msr hstr_el2, xzr // Disable CP15 traps to EL2 #endif @@ -668,6 +497,23 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems ret install_el2_stub: + /* + * When VHE is not in use, early init of EL2 and EL1 needs to be + * done here. + * When VHE _is_ in use, EL1 will not be used in the host and + * requires no configuration, and all non-hyp-specific EL2 setup + * will be done via the _EL1 system register aliases in __cpu_setup. + */ + /* sctlr_el1 */ + mov x0, #0x0800 // Set/clear RES{1,0} bits +CPU_BE( movk x0, #0x33d0, lsl #16 ) // Set EE and E0E on BE systems +CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems + msr sctlr_el1, x0 + + /* Coprocessor traps. */ + mov x0, #0x33ff + msr cptr_el2, x0 // Disable copro. traps to EL2 + /* Hypervisor stub */ adr_l x0, __hyp_stub_vectors msr vbar_el2, x0 diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 97a7384100f3..a44e13942d30 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -28,6 +28,7 @@ #include <asm/cacheflush.h> #include <asm/cputype.h> #include <asm/irqflags.h> +#include <asm/kexec.h> #include <asm/memory.h> #include <asm/mmu_context.h> #include <asm/pgalloc.h> @@ -102,7 +103,8 @@ int pfn_is_nosave(unsigned long pfn) unsigned long nosave_begin_pfn = sym_to_pfn(&__nosave_begin); unsigned long nosave_end_pfn = sym_to_pfn(&__nosave_end - 1); - return (pfn >= nosave_begin_pfn) && (pfn <= nosave_end_pfn); + return ((pfn >= nosave_begin_pfn) && (pfn <= nosave_end_pfn)) || + crash_is_nosave(pfn); } void notrace save_processor_state(void) @@ -286,6 +288,9 @@ int swsusp_arch_suspend(void) local_dbg_save(flags); if (__cpu_suspend_enter(&state)) { + /* make the crash dump kernel image visible/saveable */ + crash_prepare_suspend(); + sleep_cpu = smp_processor_id(); ret = swsusp_save(); } else { @@ -297,6 +302,9 @@ int swsusp_arch_suspend(void) if (el2_reset_needed()) dcache_clean_range(__hyp_idmap_text_start, __hyp_idmap_text_end); + /* make the crash dump kernel image protected again */ + crash_post_resume(); + /* * Tell the hibernation core that we've just restored * the memory diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index d3b5f75e652e..e1261fbaa374 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -55,18 +55,7 @@ ENDPROC(__hyp_stub_vectors) .align 11 el1_sync: - mrs x30, esr_el2 - lsr x30, x30, #ESR_ELx_EC_SHIFT - - cmp x30, #ESR_ELx_EC_HVC64 - b.ne 9f // Not an HVC trap - - cmp x0, #HVC_GET_VECTORS - b.ne 1f - mrs x0, vbar_el2 - b 9f - -1: cmp x0, #HVC_SET_VECTORS + cmp x0, #HVC_SET_VECTORS b.ne 2f msr vbar_el2, x1 b 9f @@ -79,10 +68,15 @@ el1_sync: mov x1, x3 br x4 // no return +3: cmp x0, #HVC_RESET_VECTORS + beq 9f // Nothing to reset! + /* Someone called kvm_call_hyp() against the hyp-stub... */ -3: mov x0, #ARM_EXCEPTION_HYP_GONE + ldr x0, =HVC_STUB_ERR + eret -9: eret +9: mov x0, xzr + eret ENDPROC(el1_sync) .macro invalid_vector label @@ -121,19 +115,15 @@ ENDPROC(\label) * initialisation entry point. */ -ENTRY(__hyp_get_vectors) - str lr, [sp, #-16]! - mov x0, #HVC_GET_VECTORS - hvc #0 - ldr lr, [sp], #16 - ret -ENDPROC(__hyp_get_vectors) - ENTRY(__hyp_set_vectors) - str lr, [sp, #-16]! mov x1, x0 mov x0, #HVC_SET_VECTORS hvc #0 - ldr lr, [sp], #16 ret ENDPROC(__hyp_set_vectors) + +ENTRY(__hyp_reset_vectors) + mov x0, #HVC_RESET_VECTORS + hvc #0 + ret +ENDPROC(__hyp_reset_vectors) diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 3a63954a8b14..b884a926a632 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -474,6 +474,7 @@ static u32 aarch64_insn_encode_register(enum aarch64_insn_register_type type, shift = 10; break; case AARCH64_INSN_REGTYPE_RM: + case AARCH64_INSN_REGTYPE_RS: shift = 16; break; default: @@ -757,6 +758,111 @@ u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1, offset >> shift); } +u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg, + enum aarch64_insn_register base, + enum aarch64_insn_register state, + enum aarch64_insn_size_type size, + enum aarch64_insn_ldst_type type) +{ + u32 insn; + + switch (type) { + case AARCH64_INSN_LDST_LOAD_EX: + insn = aarch64_insn_get_load_ex_value(); + break; + case AARCH64_INSN_LDST_STORE_EX: + insn = aarch64_insn_get_store_ex_value(); + break; + default: + pr_err("%s: unknown load/store exclusive encoding %d\n", __func__, type); + return AARCH64_BREAK_FAULT; + } + + insn = aarch64_insn_encode_ldst_size(size, insn); + + insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn, + reg); + + insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, + base); + + insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT2, insn, + AARCH64_INSN_REG_ZR); + + return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RS, insn, + state); +} + +static u32 aarch64_insn_encode_prfm_imm(enum aarch64_insn_prfm_type type, + enum aarch64_insn_prfm_target target, + enum aarch64_insn_prfm_policy policy, + u32 insn) +{ + u32 imm_type = 0, imm_target = 0, imm_policy = 0; + + switch (type) { + case AARCH64_INSN_PRFM_TYPE_PLD: + break; + case AARCH64_INSN_PRFM_TYPE_PLI: + imm_type = BIT(0); + break; + case AARCH64_INSN_PRFM_TYPE_PST: + imm_type = BIT(1); + break; + default: + pr_err("%s: unknown prfm type encoding %d\n", __func__, type); + return AARCH64_BREAK_FAULT; + } + + switch (target) { + case AARCH64_INSN_PRFM_TARGET_L1: + break; + case AARCH64_INSN_PRFM_TARGET_L2: + imm_target = BIT(0); + break; + case AARCH64_INSN_PRFM_TARGET_L3: + imm_target = BIT(1); + break; + default: + pr_err("%s: unknown prfm target encoding %d\n", __func__, target); + return AARCH64_BREAK_FAULT; + } + + switch (policy) { + case AARCH64_INSN_PRFM_POLICY_KEEP: + break; + case AARCH64_INSN_PRFM_POLICY_STRM: + imm_policy = BIT(0); + break; + default: + pr_err("%s: unknown prfm policy encoding %d\n", __func__, policy); + return AARCH64_BREAK_FAULT; + } + + /* In this case, imm5 is encoded into Rt field. */ + insn &= ~GENMASK(4, 0); + insn |= imm_policy | (imm_target << 1) | (imm_type << 3); + + return insn; +} + +u32 aarch64_insn_gen_prefetch(enum aarch64_insn_register base, + enum aarch64_insn_prfm_type type, + enum aarch64_insn_prfm_target target, + enum aarch64_insn_prfm_policy policy) +{ + u32 insn = aarch64_insn_get_prfm_value(); + + insn = aarch64_insn_encode_ldst_size(AARCH64_INSN_SIZE_64, insn); + + insn = aarch64_insn_encode_prfm_imm(type, target, policy, insn); + + insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, + base); + + return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_12, insn, 0); +} + u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst, enum aarch64_insn_register src, int imm, enum aarch64_insn_variant variant, diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index 769f24ef628c..d7e90d97f5c4 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -131,11 +131,15 @@ u64 __init kaslr_early_init(u64 dt_phys, u64 modulo_offset) /* * The kernel Image should not extend across a 1GB/32MB/512MB alignment * boundary (for 4KB/16KB/64KB granule kernels, respectively). If this - * happens, increase the KASLR offset by the size of the kernel image. + * happens, increase the KASLR offset by the size of the kernel image + * rounded up by SWAPPER_BLOCK_SIZE. */ if ((((u64)_text + offset + modulo_offset) >> SWAPPER_TABLE_SHIFT) != - (((u64)_end + offset + modulo_offset) >> SWAPPER_TABLE_SHIFT)) - offset = (offset + (u64)(_end - _text)) & mask; + (((u64)_end + offset + modulo_offset) >> SWAPPER_TABLE_SHIFT)) { + u64 kimg_sz = _end - _text; + offset = (offset + round_up(kimg_sz, SWAPPER_BLOCK_SIZE)) + & mask; + } if (IS_ENABLED(CONFIG_KASAN)) /* diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index bc96c8a7fc79..481f54a866c5 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -9,12 +9,19 @@ * published by the Free Software Foundation. */ +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/kernel.h> #include <linux/kexec.h> +#include <linux/page-flags.h> #include <linux/smp.h> #include <asm/cacheflush.h> #include <asm/cpu_ops.h> +#include <asm/memory.h> +#include <asm/mmu.h> #include <asm/mmu_context.h> +#include <asm/page.h> #include "cpu-reset.h" @@ -22,8 +29,6 @@ extern const unsigned char arm64_relocate_new_kernel[]; extern const unsigned long arm64_relocate_new_kernel_size; -static unsigned long kimage_start; - /** * kexec_image_info - For debugging output. */ @@ -64,8 +69,6 @@ void machine_kexec_cleanup(struct kimage *kimage) */ int machine_kexec_prepare(struct kimage *kimage) { - kimage_start = kimage->start; - kexec_image_info(kimage); if (kimage->type != KEXEC_TYPE_CRASH && cpus_are_stuck_in_kernel()) { @@ -144,11 +147,15 @@ void machine_kexec(struct kimage *kimage) { phys_addr_t reboot_code_buffer_phys; void *reboot_code_buffer; + bool in_kexec_crash = (kimage == kexec_crash_image); + bool stuck_cpus = cpus_are_stuck_in_kernel(); /* * New cpus may have become stuck_in_kernel after we loaded the image. */ - BUG_ON(cpus_are_stuck_in_kernel() || (num_online_cpus() > 1)); + BUG_ON(!in_kexec_crash && (stuck_cpus || (num_online_cpus() > 1))); + WARN(in_kexec_crash && (stuck_cpus || smp_crash_stop_failed()), + "Some CPUs may be stale, kdump will be unreliable.\n"); reboot_code_buffer_phys = page_to_phys(kimage->control_code_page); reboot_code_buffer = phys_to_virt(reboot_code_buffer_phys); @@ -183,7 +190,7 @@ void machine_kexec(struct kimage *kimage) kexec_list_flush(kimage); /* Flush the new image if already in place. */ - if (kimage->head & IND_DONE) + if ((kimage != kexec_crash_image) && (kimage->head & IND_DONE)) kexec_segment_flush(kimage); pr_info("Bye!\n"); @@ -200,13 +207,158 @@ void machine_kexec(struct kimage *kimage) * relocation is complete. */ - cpu_soft_restart(1, reboot_code_buffer_phys, kimage->head, - kimage_start, 0); + cpu_soft_restart(kimage != kexec_crash_image, + reboot_code_buffer_phys, kimage->head, kimage->start, 0); BUG(); /* Should never get here. */ } +static void machine_kexec_mask_interrupts(void) +{ + unsigned int i; + struct irq_desc *desc; + + for_each_irq_desc(i, desc) { + struct irq_chip *chip; + int ret; + + chip = irq_desc_get_chip(desc); + if (!chip) + continue; + + /* + * First try to remove the active state. If this + * fails, try to EOI the interrupt. + */ + ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false); + + if (ret && irqd_irq_inprogress(&desc->irq_data) && + chip->irq_eoi) + chip->irq_eoi(&desc->irq_data); + + if (chip->irq_mask) + chip->irq_mask(&desc->irq_data); + + if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data)) + chip->irq_disable(&desc->irq_data); + } +} + +/** + * machine_crash_shutdown - shutdown non-crashing cpus and save registers + */ void machine_crash_shutdown(struct pt_regs *regs) { - /* Empty routine needed to avoid build errors. */ + local_irq_disable(); + + /* shutdown non-crashing cpus */ + smp_send_crash_stop(); + + /* for crashing cpu */ + crash_save_cpu(regs, smp_processor_id()); + machine_kexec_mask_interrupts(); + + pr_info("Starting crashdump kernel...\n"); +} + +void arch_kexec_protect_crashkres(void) +{ + int i; + + kexec_segment_flush(kexec_crash_image); + + for (i = 0; i < kexec_crash_image->nr_segments; i++) + set_memory_valid( + __phys_to_virt(kexec_crash_image->segment[i].mem), + kexec_crash_image->segment[i].memsz >> PAGE_SHIFT, 0); +} + +void arch_kexec_unprotect_crashkres(void) +{ + int i; + + for (i = 0; i < kexec_crash_image->nr_segments; i++) + set_memory_valid( + __phys_to_virt(kexec_crash_image->segment[i].mem), + kexec_crash_image->segment[i].memsz >> PAGE_SHIFT, 1); +} + +#ifdef CONFIG_HIBERNATION +/* + * To preserve the crash dump kernel image, the relevant memory segments + * should be mapped again around the hibernation. + */ +void crash_prepare_suspend(void) +{ + if (kexec_crash_image) + arch_kexec_unprotect_crashkres(); +} + +void crash_post_resume(void) +{ + if (kexec_crash_image) + arch_kexec_protect_crashkres(); +} + +/* + * crash_is_nosave + * + * Return true only if a page is part of reserved memory for crash dump kernel, + * but does not hold any data of loaded kernel image. + * + * Note that all the pages in crash dump kernel memory have been initially + * marked as Reserved in kexec_reserve_crashkres_pages(). + * + * In hibernation, the pages which are Reserved and yet "nosave" are excluded + * from the hibernation iamge. crash_is_nosave() does thich check for crash + * dump kernel and will reduce the total size of hibernation image. + */ + +bool crash_is_nosave(unsigned long pfn) +{ + int i; + phys_addr_t addr; + + if (!crashk_res.end) + return false; + + /* in reserved memory? */ + addr = __pfn_to_phys(pfn); + if ((addr < crashk_res.start) || (crashk_res.end < addr)) + return false; + + if (!kexec_crash_image) + return true; + + /* not part of loaded kernel image? */ + for (i = 0; i < kexec_crash_image->nr_segments; i++) + if (addr >= kexec_crash_image->segment[i].mem && + addr < (kexec_crash_image->segment[i].mem + + kexec_crash_image->segment[i].memsz)) + return false; + + return true; +} + +void crash_free_reserved_phys_range(unsigned long begin, unsigned long end) +{ + unsigned long addr; + struct page *page; + + for (addr = begin; addr < end; addr += PAGE_SIZE) { + page = phys_to_page(addr); + ClearPageReserved(page); + free_reserved_page(page); + } +} +#endif /* CONFIG_HIBERNATION */ + +void arch_crash_save_vmcoreinfo(void) +{ + VMCOREINFO_NUMBER(VA_BITS); + /* Please note VMCOREINFO_NUMBER() uses "%d", not "%x" */ + vmcoreinfo_append_str("NUMBER(kimage_voffset)=0x%llx\n", + kimage_voffset); + vmcoreinfo_append_str("NUMBER(PHYS_OFFSET)=0x%llx\n", + PHYS_OFFSET); } diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c index 1ce90d8450ae..d05dbe658409 100644 --- a/arch/arm64/kernel/module-plts.c +++ b/arch/arm64/kernel/module-plts.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2014-2016 Linaro Ltd. <ard.biesheuvel@linaro.org> + * Copyright (C) 2014-2017 Linaro Ltd. <ard.biesheuvel@linaro.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -26,35 +26,21 @@ struct plt_entry { __le32 br; /* br x16 */ }; -u64 module_emit_plt_entry(struct module *mod, const Elf64_Rela *rela, +static bool in_init(const struct module *mod, void *loc) +{ + return (u64)loc - (u64)mod->init_layout.base < mod->init_layout.size; +} + +u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela, Elf64_Sym *sym) { - struct plt_entry *plt = (struct plt_entry *)mod->arch.plt->sh_addr; - int i = mod->arch.plt_num_entries; + struct mod_plt_sec *pltsec = !in_init(mod, loc) ? &mod->arch.core : + &mod->arch.init; + struct plt_entry *plt = (struct plt_entry *)pltsec->plt->sh_addr; + int i = pltsec->plt_num_entries; u64 val = sym->st_value + rela->r_addend; /* - * We only emit PLT entries against undefined (SHN_UNDEF) symbols, - * which are listed in the ELF symtab section, but without a type - * or a size. - * So, similar to how the module loader uses the Elf64_Sym::st_value - * field to store the resolved addresses of undefined symbols, let's - * borrow the Elf64_Sym::st_size field (whose value is never used by - * the module loader, even for symbols that are defined) to record - * the address of a symbol's associated PLT entry as we emit it for a - * zero addend relocation (which is the only kind we have to deal with - * in practice). This allows us to find duplicates without having to - * go through the table every time. - */ - if (rela->r_addend == 0 && sym->st_size != 0) { - BUG_ON(sym->st_size < (u64)plt || sym->st_size >= (u64)&plt[i]); - return sym->st_size; - } - - mod->arch.plt_num_entries++; - BUG_ON(mod->arch.plt_num_entries > mod->arch.plt_max_entries); - - /* * MOVK/MOVN/MOVZ opcode: * +--------+------------+--------+-----------+-------------+---------+ * | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] | @@ -72,8 +58,19 @@ u64 module_emit_plt_entry(struct module *mod, const Elf64_Rela *rela, cpu_to_le32(0xd61f0200) }; - if (rela->r_addend == 0) - sym->st_size = (u64)&plt[i]; + /* + * Check if the entry we just created is a duplicate. Given that the + * relocations are sorted, this will be the last entry we allocated. + * (if one exists). + */ + if (i > 0 && + plt[i].mov0 == plt[i - 1].mov0 && + plt[i].mov1 == plt[i - 1].mov1 && + plt[i].mov2 == plt[i - 1].mov2) + return (u64)&plt[i - 1]; + + pltsec->plt_num_entries++; + BUG_ON(pltsec->plt_num_entries > pltsec->plt_max_entries); return (u64)&plt[i]; } @@ -104,7 +101,8 @@ static bool duplicate_rel(const Elf64_Rela *rela, int num) return num > 0 && cmp_rela(rela + num, rela + num - 1) == 0; } -static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num) +static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num, + Elf64_Word dstidx) { unsigned int ret = 0; Elf64_Sym *s; @@ -116,13 +114,17 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num) case R_AARCH64_CALL26: /* * We only have to consider branch targets that resolve - * to undefined symbols. This is not simply a heuristic, - * it is a fundamental limitation, since the PLT itself - * is part of the module, and needs to be within 128 MB - * as well, so modules can never grow beyond that limit. + * to symbols that are defined in a different section. + * This is not simply a heuristic, it is a fundamental + * limitation, since there is no guaranteed way to emit + * PLT entries sufficiently close to the branch if the + * section size exceeds the range of a branch + * instruction. So ignore relocations against defined + * symbols if they live in the same section as the + * relocation target. */ s = syms + ELF64_R_SYM(rela[i].r_info); - if (s->st_shndx != SHN_UNDEF) + if (s->st_shndx == dstidx) break; /* @@ -149,7 +151,8 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num) int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings, struct module *mod) { - unsigned long plt_max_entries = 0; + unsigned long core_plts = 0; + unsigned long init_plts = 0; Elf64_Sym *syms = NULL; int i; @@ -158,14 +161,16 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, * entries. Record the symtab address as well. */ for (i = 0; i < ehdr->e_shnum; i++) { - if (strcmp(".plt", secstrings + sechdrs[i].sh_name) == 0) - mod->arch.plt = sechdrs + i; + if (!strcmp(secstrings + sechdrs[i].sh_name, ".plt")) + mod->arch.core.plt = sechdrs + i; + else if (!strcmp(secstrings + sechdrs[i].sh_name, ".init.plt")) + mod->arch.init.plt = sechdrs + i; else if (sechdrs[i].sh_type == SHT_SYMTAB) syms = (Elf64_Sym *)sechdrs[i].sh_addr; } - if (!mod->arch.plt) { - pr_err("%s: module PLT section missing\n", mod->name); + if (!mod->arch.core.plt || !mod->arch.init.plt) { + pr_err("%s: module PLT section(s) missing\n", mod->name); return -ENOEXEC; } if (!syms) { @@ -188,14 +193,27 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, /* sort by type, symbol index and addend */ sort(rels, numrels, sizeof(Elf64_Rela), cmp_rela, NULL); - plt_max_entries += count_plts(syms, rels, numrels); + if (strncmp(secstrings + dstsec->sh_name, ".init", 5) != 0) + core_plts += count_plts(syms, rels, numrels, + sechdrs[i].sh_info); + else + init_plts += count_plts(syms, rels, numrels, + sechdrs[i].sh_info); } - mod->arch.plt->sh_type = SHT_NOBITS; - mod->arch.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; - mod->arch.plt->sh_addralign = L1_CACHE_BYTES; - mod->arch.plt->sh_size = plt_max_entries * sizeof(struct plt_entry); - mod->arch.plt_num_entries = 0; - mod->arch.plt_max_entries = plt_max_entries; + mod->arch.core.plt->sh_type = SHT_NOBITS; + mod->arch.core.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + mod->arch.core.plt->sh_addralign = L1_CACHE_BYTES; + mod->arch.core.plt->sh_size = (core_plts + 1) * sizeof(struct plt_entry); + mod->arch.core.plt_num_entries = 0; + mod->arch.core.plt_max_entries = core_plts; + + mod->arch.init.plt->sh_type = SHT_NOBITS; + mod->arch.init.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + mod->arch.init.plt->sh_addralign = L1_CACHE_BYTES; + mod->arch.init.plt->sh_size = (init_plts + 1) * sizeof(struct plt_entry); + mod->arch.init.plt_num_entries = 0; + mod->arch.init.plt_max_entries = init_plts; + return 0; } diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 7f316982ce00..c9a2ab446dc6 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -380,7 +380,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && ovf == -ERANGE) { - val = module_emit_plt_entry(me, &rel[i], sym); + val = module_emit_plt_entry(me, loc, &rel[i], sym); ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 26, AARCH64_INSN_IMM_26); } diff --git a/arch/arm64/kernel/module.lds b/arch/arm64/kernel/module.lds index 8949f6c6f729..f7c9781a9d48 100644 --- a/arch/arm64/kernel/module.lds +++ b/arch/arm64/kernel/module.lds @@ -1,3 +1,4 @@ SECTIONS { .plt (NOLOAD) : { BYTE(0) } + .init.plt (NOLOAD) : { BYTE(0) } } diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 57ae9d9ed9bb..bcc79471b38e 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -290,6 +290,12 @@ static const unsigned armv8_a53_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE, [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL, + [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L2D_CACHE, + [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL, + [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L2D_CACHE, + [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL, + + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL, [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL, [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, @@ -957,10 +963,26 @@ static int armv8_vulcan_map_event(struct perf_event *event) ARMV8_PMU_EVTYPE_EVENT); } +struct armv8pmu_probe_info { + struct arm_pmu *pmu; + bool present; +}; + static void __armv8pmu_probe_pmu(void *info) { - struct arm_pmu *cpu_pmu = info; + struct armv8pmu_probe_info *probe = info; + struct arm_pmu *cpu_pmu = probe->pmu; + u64 dfr0; u32 pmceid[2]; + int pmuver; + + dfr0 = read_sysreg(id_aa64dfr0_el1); + pmuver = cpuid_feature_extract_signed_field(dfr0, + ID_AA64DFR0_PMUVER_SHIFT); + if (pmuver < 1) + return; + + probe->present = true; /* Read the nb of CNTx counters supported from PMNC */ cpu_pmu->num_events = (armv8pmu_pmcr_read() >> ARMV8_PMU_PMCR_N_SHIFT) @@ -979,13 +1001,27 @@ static void __armv8pmu_probe_pmu(void *info) static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu) { - return smp_call_function_any(&cpu_pmu->supported_cpus, + struct armv8pmu_probe_info probe = { + .pmu = cpu_pmu, + .present = false, + }; + int ret; + + ret = smp_call_function_any(&cpu_pmu->supported_cpus, __armv8pmu_probe_pmu, - cpu_pmu, 1); + &probe, 1); + if (ret) + return ret; + + return probe.present ? 0 : -ENODEV; } -static void armv8_pmu_init(struct arm_pmu *cpu_pmu) +static int armv8_pmu_init(struct arm_pmu *cpu_pmu) { + int ret = armv8pmu_probe_pmu(cpu_pmu); + if (ret) + return ret; + cpu_pmu->handle_irq = armv8pmu_handle_irq, cpu_pmu->enable = armv8pmu_enable_event, cpu_pmu->disable = armv8pmu_disable_event, @@ -997,78 +1033,104 @@ static void armv8_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->reset = armv8pmu_reset, cpu_pmu->max_period = (1LLU << 32) - 1, cpu_pmu->set_event_filter = armv8pmu_set_event_filter; + + return 0; } static int armv8_pmuv3_init(struct arm_pmu *cpu_pmu) { - armv8_pmu_init(cpu_pmu); + int ret = armv8_pmu_init(cpu_pmu); + if (ret) + return ret; + cpu_pmu->name = "armv8_pmuv3"; cpu_pmu->map_event = armv8_pmuv3_map_event; cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = &armv8_pmuv3_events_attr_group; cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = &armv8_pmuv3_format_attr_group; - return armv8pmu_probe_pmu(cpu_pmu); + + return 0; } static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu) { - armv8_pmu_init(cpu_pmu); + int ret = armv8_pmu_init(cpu_pmu); + if (ret) + return ret; + cpu_pmu->name = "armv8_cortex_a53"; cpu_pmu->map_event = armv8_a53_map_event; cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = &armv8_pmuv3_events_attr_group; cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = &armv8_pmuv3_format_attr_group; - return armv8pmu_probe_pmu(cpu_pmu); + + return 0; } static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu) { - armv8_pmu_init(cpu_pmu); + int ret = armv8_pmu_init(cpu_pmu); + if (ret) + return ret; + cpu_pmu->name = "armv8_cortex_a57"; cpu_pmu->map_event = armv8_a57_map_event; cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = &armv8_pmuv3_events_attr_group; cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = &armv8_pmuv3_format_attr_group; - return armv8pmu_probe_pmu(cpu_pmu); + + return 0; } static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu) { - armv8_pmu_init(cpu_pmu); + int ret = armv8_pmu_init(cpu_pmu); + if (ret) + return ret; + cpu_pmu->name = "armv8_cortex_a72"; cpu_pmu->map_event = armv8_a57_map_event; cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = &armv8_pmuv3_events_attr_group; cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = &armv8_pmuv3_format_attr_group; - return armv8pmu_probe_pmu(cpu_pmu); + + return 0; } static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu) { - armv8_pmu_init(cpu_pmu); + int ret = armv8_pmu_init(cpu_pmu); + if (ret) + return ret; + cpu_pmu->name = "armv8_cavium_thunder"; cpu_pmu->map_event = armv8_thunder_map_event; cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = &armv8_pmuv3_events_attr_group; cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = &armv8_pmuv3_format_attr_group; - return armv8pmu_probe_pmu(cpu_pmu); + + return 0; } static int armv8_vulcan_pmu_init(struct arm_pmu *cpu_pmu) { - armv8_pmu_init(cpu_pmu); + int ret = armv8_pmu_init(cpu_pmu); + if (ret) + return ret; + cpu_pmu->name = "armv8_brcm_vulcan"; cpu_pmu->map_event = armv8_vulcan_map_event; cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = &armv8_pmuv3_events_attr_group; cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = &armv8_pmuv3_format_attr_group; - return armv8pmu_probe_pmu(cpu_pmu); + + return 0; } static const struct of_device_id armv8_pmu_of_device_ids[] = { @@ -1081,24 +1143,9 @@ static const struct of_device_id armv8_pmu_of_device_ids[] = { {}, }; -/* - * Non DT systems have their micro/arch events probed at run-time. - * A fairly complete list of generic events are provided and ones that - * aren't supported by the current PMU are disabled. - */ -static const struct pmu_probe_info armv8_pmu_probe_table[] = { - PMU_PROBE(0, 0, armv8_pmuv3_init), /* enable all defined counters */ - { /* sentinel value */ } -}; - static int armv8_pmu_device_probe(struct platform_device *pdev) { - if (acpi_disabled) - return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids, - NULL); - - return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids, - armv8_pmu_probe_table); + return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids, NULL); } static struct platform_driver armv8_pmu_driver = { @@ -1109,4 +1156,11 @@ static struct platform_driver armv8_pmu_driver = { .probe = armv8_pmu_device_probe, }; -builtin_platform_driver(armv8_pmu_driver); +static int __init armv8_pmu_driver_init(void) +{ + if (acpi_disabled) + return platform_driver_register(&armv8_pmu_driver); + else + return arm_pmu_acpi_probe(armv8_pmuv3_init); +} +device_initcall(armv8_pmu_driver_init) diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 2a07aae5b8a2..c5c45942fb6e 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -372,12 +372,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr) return 0; } -int __kprobes kprobe_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data) -{ - return NOTIFY_DONE; -} - static void __kprobes kprobe_handler(struct pt_regs *regs) { struct kprobe *p, *cur_kprobe; diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 043d373b8369..ae2a835898d7 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -205,12 +205,10 @@ void __show_regs(struct pt_regs *regs) pr_cont("\n"); } - printk("\n"); } void show_regs(struct pt_regs * regs) { - printk("\n"); __show_regs(regs); } diff --git a/arch/arm64/kernel/reloc_test_core.c b/arch/arm64/kernel/reloc_test_core.c new file mode 100644 index 000000000000..c124752a8bd3 --- /dev/null +++ b/arch/arm64/kernel/reloc_test_core.c @@ -0,0 +1,81 @@ +/* + * Copyright (C) 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/module.h> + +int sym64_rel; + +#define SYM64_ABS_VAL 0xffff880000cccccc +#define SYM32_ABS_VAL 0xf800cccc +#define SYM16_ABS_VAL 0xf8cc + +#define __SET_ABS(name, val) asm(".globl " #name "; .set "#name ", " #val) +#define SET_ABS(name, val) __SET_ABS(name, val) + +SET_ABS(sym64_abs, SYM64_ABS_VAL); +SET_ABS(sym32_abs, SYM32_ABS_VAL); +SET_ABS(sym16_abs, SYM16_ABS_VAL); + +asmlinkage u64 absolute_data64(void); +asmlinkage u64 absolute_data32(void); +asmlinkage u64 absolute_data16(void); +asmlinkage u64 signed_movw(void); +asmlinkage u64 unsigned_movw(void); +asmlinkage u64 relative_adrp(void); +asmlinkage u64 relative_adr(void); +asmlinkage u64 relative_data64(void); +asmlinkage u64 relative_data32(void); +asmlinkage u64 relative_data16(void); + +static struct { + char name[32]; + u64 (*f)(void); + u64 expect; +} const funcs[] = { + { "R_AARCH64_ABS64", absolute_data64, UL(SYM64_ABS_VAL) }, + { "R_AARCH64_ABS32", absolute_data32, UL(SYM32_ABS_VAL) }, + { "R_AARCH64_ABS16", absolute_data16, UL(SYM16_ABS_VAL) }, + { "R_AARCH64_MOVW_SABS_Gn", signed_movw, UL(SYM64_ABS_VAL) }, + { "R_AARCH64_MOVW_UABS_Gn", unsigned_movw, UL(SYM64_ABS_VAL) }, +#ifndef CONFIG_ARM64_ERRATUM_843419 + { "R_AARCH64_ADR_PREL_PG_HI21", relative_adrp, (u64)&sym64_rel }, +#endif + { "R_AARCH64_ADR_PREL_LO21", relative_adr, (u64)&sym64_rel }, + { "R_AARCH64_PREL64", relative_data64, (u64)&sym64_rel }, + { "R_AARCH64_PREL32", relative_data32, (u64)&sym64_rel }, + { "R_AARCH64_PREL16", relative_data16, (u64)&sym64_rel }, +}; + +static int reloc_test_init(void) +{ + int i; + + pr_info("Relocation test:\n"); + pr_info("-------------------------------------------------------\n"); + + for (i = 0; i < ARRAY_SIZE(funcs); i++) { + u64 ret = funcs[i].f(); + + pr_info("%-31s 0x%016llx %s\n", funcs[i].name, ret, + ret == funcs[i].expect ? "pass" : "fail"); + if (ret != funcs[i].expect) + pr_err("Relocation failed, expected 0x%016llx, not 0x%016llx\n", + funcs[i].expect, ret); + } + return 0; +} + +static void reloc_test_exit(void) +{ +} + +module_init(reloc_test_init); +module_exit(reloc_test_exit); + +MODULE_LICENSE("GPL v2"); diff --git a/arch/arm64/kernel/reloc_test_syms.S b/arch/arm64/kernel/reloc_test_syms.S new file mode 100644 index 000000000000..e1edcefeb02d --- /dev/null +++ b/arch/arm64/kernel/reloc_test_syms.S @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/linkage.h> + +ENTRY(absolute_data64) + ldr x0, 0f + ret +0: .quad sym64_abs +ENDPROC(absolute_data64) + +ENTRY(absolute_data32) + ldr w0, 0f + ret +0: .long sym32_abs +ENDPROC(absolute_data32) + +ENTRY(absolute_data16) + adr x0, 0f + ldrh w0, [x0] + ret +0: .short sym16_abs, 0 +ENDPROC(absolute_data16) + +ENTRY(signed_movw) + movz x0, #:abs_g2_s:sym64_abs + movk x0, #:abs_g1_nc:sym64_abs + movk x0, #:abs_g0_nc:sym64_abs + ret +ENDPROC(signed_movw) + +ENTRY(unsigned_movw) + movz x0, #:abs_g3:sym64_abs + movk x0, #:abs_g2_nc:sym64_abs + movk x0, #:abs_g1_nc:sym64_abs + movk x0, #:abs_g0_nc:sym64_abs + ret +ENDPROC(unsigned_movw) + +#ifndef CONFIG_ARM64_ERRATUM_843419 + +ENTRY(relative_adrp) + adrp x0, sym64_rel + add x0, x0, #:lo12:sym64_rel + ret +ENDPROC(relative_adrp) + +#endif + +ENTRY(relative_adr) + adr x0, sym64_rel + ret +ENDPROC(relative_adr) + +ENTRY(relative_data64) + adr x1, 0f + ldr x0, [x1] + add x0, x0, x1 + ret +0: .quad sym64_rel - . +ENDPROC(relative_data64) + +ENTRY(relative_data32) + adr x1, 0f + ldr w0, [x1] + add x0, x0, x1 + ret +0: .long sym64_rel - . +ENDPROC(relative_data32) + +ENTRY(relative_data16) + adr x1, 0f + ldrsh w0, [x1] + add x0, x0, x1 + ret +0: .short sym64_rel - ., 0 +ENDPROC(relative_data16) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 42274bda0ccb..2c822ef94f34 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -31,7 +31,6 @@ #include <linux/screen_info.h> #include <linux/init.h> #include <linux/kexec.h> -#include <linux/crash_dump.h> #include <linux/root_dev.h> #include <linux/cpu.h> #include <linux/interrupt.h> @@ -181,6 +180,7 @@ static void __init smp_build_mpidr_hash(void) static void __init setup_machine_fdt(phys_addr_t dt_phys) { void *dt_virt = fixmap_remap_fdt(dt_phys); + const char *name; if (!dt_virt || !early_init_dt_scan(dt_virt)) { pr_crit("\n" @@ -193,7 +193,9 @@ static void __init setup_machine_fdt(phys_addr_t dt_phys) cpu_relax(); } - dump_stack_set_arch_desc("%s (DT)", of_flat_dt_get_machine_name()); + name = of_flat_dt_get_machine_name(); + pr_info("Machine model: %s\n", name); + dump_stack_set_arch_desc("%s (DT)", name); } static void __init request_standard_resources(void) @@ -226,6 +228,12 @@ static void __init request_standard_resources(void) if (kernel_data.start >= res->start && kernel_data.end <= res->end) request_resource(res, &kernel_data); +#ifdef CONFIG_KEXEC_CORE + /* Userspace will find "Crash kernel" region in /proc/iomem. */ + if (crashk_res.end && crashk_res.start >= res->start && + crashk_res.end <= res->end) + request_resource(res, &crashk_res); +#endif } } diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index ef1caae02110..6e0e16a3a7d4 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -39,6 +39,7 @@ #include <linux/completion.h> #include <linux/of.h> #include <linux/irq_work.h> +#include <linux/kexec.h> #include <asm/alternative.h> #include <asm/atomic.h> @@ -76,6 +77,7 @@ enum ipi_msg_type { IPI_RESCHEDULE, IPI_CALL_FUNC, IPI_CPU_STOP, + IPI_CPU_CRASH_STOP, IPI_TIMER, IPI_IRQ_WORK, IPI_WAKEUP @@ -434,6 +436,7 @@ void __init smp_cpus_done(unsigned int max_cpus) setup_cpu_features(); hyp_mode_check(); apply_alternatives_all(); + mark_linear_text_alias_ro(); } void __init smp_prepare_boot_cpu(void) @@ -518,6 +521,13 @@ static bool bootcpu_valid __initdata; static unsigned int cpu_count = 1; #ifdef CONFIG_ACPI +static struct acpi_madt_generic_interrupt cpu_madt_gicc[NR_CPUS]; + +struct acpi_madt_generic_interrupt *acpi_cpu_get_madt_gicc(int cpu) +{ + return &cpu_madt_gicc[cpu]; +} + /* * acpi_map_gic_cpu_interface - parse processor MADT entry * @@ -552,6 +562,7 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor) return; } bootcpu_valid = true; + cpu_madt_gicc[0] = *processor; early_map_cpu_to_node(0, acpi_numa_get_nid(0, hwid)); return; } @@ -562,6 +573,8 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor) /* map the logical cpu id to cpu MPIDR */ cpu_logical_map(cpu_count) = hwid; + cpu_madt_gicc[cpu_count] = *processor; + /* * Set-up the ACPI parking protocol cpu entries * while initializing the cpu_logical_map to @@ -755,6 +768,7 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = { S(IPI_RESCHEDULE, "Rescheduling interrupts"), S(IPI_CALL_FUNC, "Function call interrupts"), S(IPI_CPU_STOP, "CPU stop interrupts"), + S(IPI_CPU_CRASH_STOP, "CPU stop (for crash dump) interrupts"), S(IPI_TIMER, "Timer broadcast interrupts"), S(IPI_IRQ_WORK, "IRQ work interrupts"), S(IPI_WAKEUP, "CPU wake-up interrupts"), @@ -829,6 +843,29 @@ static void ipi_cpu_stop(unsigned int cpu) cpu_relax(); } +#ifdef CONFIG_KEXEC_CORE +static atomic_t waiting_for_crash_ipi = ATOMIC_INIT(0); +#endif + +static void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs) +{ +#ifdef CONFIG_KEXEC_CORE + crash_save_cpu(regs, cpu); + + atomic_dec(&waiting_for_crash_ipi); + + local_irq_disable(); + +#ifdef CONFIG_HOTPLUG_CPU + if (cpu_ops[cpu]->cpu_die) + cpu_ops[cpu]->cpu_die(cpu); +#endif + + /* just in case */ + cpu_park_loop(); +#endif +} + /* * Main handler for inter-processor interrupts */ @@ -859,6 +896,15 @@ void handle_IPI(int ipinr, struct pt_regs *regs) irq_exit(); break; + case IPI_CPU_CRASH_STOP: + if (IS_ENABLED(CONFIG_KEXEC_CORE)) { + irq_enter(); + ipi_cpu_crash_stop(cpu, regs); + + unreachable(); + } + break; + #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST case IPI_TIMER: irq_enter(); @@ -931,6 +977,39 @@ void smp_send_stop(void) cpumask_pr_args(cpu_online_mask)); } +#ifdef CONFIG_KEXEC_CORE +void smp_send_crash_stop(void) +{ + cpumask_t mask; + unsigned long timeout; + + if (num_online_cpus() == 1) + return; + + cpumask_copy(&mask, cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), &mask); + + atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); + + pr_crit("SMP: stopping secondary CPUs\n"); + smp_cross_call(&mask, IPI_CPU_CRASH_STOP); + + /* Wait up to one second for other CPUs to stop */ + timeout = USEC_PER_SEC; + while ((atomic_read(&waiting_for_crash_ipi) > 0) && timeout--) + udelay(1); + + if (atomic_read(&waiting_for_crash_ipi) > 0) + pr_warning("SMP: failed to stop secondary CPUs %*pbl\n", + cpumask_pr_args(&mask)); +} + +bool smp_crash_stop_failed(void) +{ + return (atomic_read(&waiting_for_crash_ipi) > 0); +} +#endif + /* * not supported here */ @@ -944,7 +1023,7 @@ static bool have_cpu_die(void) #ifdef CONFIG_HOTPLUG_CPU int any_cpu = raw_smp_processor_id(); - if (cpu_ops[any_cpu]->cpu_die) + if (cpu_ops[any_cpu] && cpu_ops[any_cpu]->cpu_die) return true; #endif return false; diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index e52be6aa44ee..d4d6ae02cd55 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -505,6 +505,22 @@ static void ctr_read_handler(unsigned int esr, struct pt_regs *regs) regs->pc += 4; } +static void cntvct_read_handler(unsigned int esr, struct pt_regs *regs) +{ + int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + + pt_regs_write_reg(regs, rt, arch_counter_get_cntvct()); + regs->pc += 4; +} + +static void cntfrq_read_handler(unsigned int esr, struct pt_regs *regs) +{ + int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + + pt_regs_write_reg(regs, rt, read_sysreg(cntfrq_el0)); + regs->pc += 4; +} + struct sys64_hook { unsigned int esr_mask; unsigned int esr_val; @@ -523,6 +539,18 @@ static struct sys64_hook sys64_hooks[] = { .esr_val = ESR_ELx_SYS64_ISS_SYS_CTR_READ, .handler = ctr_read_handler, }, + { + /* Trap read access to CNTVCT_EL0 */ + .esr_mask = ESR_ELx_SYS64_ISS_SYS_OP_MASK, + .esr_val = ESR_ELx_SYS64_ISS_SYS_CNTVCT, + .handler = cntvct_read_handler, + }, + { + /* Trap read access to CNTFRQ_EL0 */ + .esr_mask = ESR_ELx_SYS64_ISS_SYS_OP_MASK, + .esr_val = ESR_ELx_SYS64_ISS_SYS_CNTFRQ, + .handler = cntfrq_read_handler, + }, {}, }; diff --git a/arch/arm64/kernel/vdso/.gitignore b/arch/arm64/kernel/vdso/.gitignore index b8cc94e9698b..f8b69d84238e 100644 --- a/arch/arm64/kernel/vdso/.gitignore +++ b/arch/arm64/kernel/vdso/.gitignore @@ -1,2 +1 @@ vdso.lds -vdso-offsets.h diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index b8deffa9e1bf..987a00ee446c 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -143,12 +143,27 @@ SECTIONS . = ALIGN(SEGMENT_ALIGN); __init_begin = .; + __inittext_begin = .; INIT_TEXT_SECTION(8) .exit.text : { ARM_EXIT_KEEP(EXIT_TEXT) } + . = ALIGN(4); + .altinstructions : { + __alt_instructions = .; + *(.altinstructions) + __alt_instructions_end = .; + } + .altinstr_replacement : { + *(.altinstr_replacement) + } + + . = ALIGN(PAGE_SIZE); + __inittext_end = .; + __initdata_begin = .; + .init.data : { INIT_DATA INIT_SETUP(16) @@ -164,15 +179,6 @@ SECTIONS PERCPU_SECTION(L1_CACHE_BYTES) - . = ALIGN(4); - .altinstructions : { - __alt_instructions = .; - *(.altinstructions) - __alt_instructions_end = .; - } - .altinstr_replacement : { - *(.altinstr_replacement) - } .rela : ALIGN(8) { *(.rela .rela*) } @@ -181,6 +187,7 @@ SECTIONS __rela_size = SIZEOF(.rela); . = ALIGN(SEGMENT_ALIGN); + __initdata_end = .; __init_end = .; _data = .; @@ -206,6 +213,7 @@ SECTIONS } PECOFF_EDATA_PADDING + __pecoff_data_rawsize = ABSOLUTE(. - __initdata_begin); _edata = .; BSS_SECTION(0, 0, 0) @@ -221,6 +229,7 @@ SECTIONS . += RESERVED_TTBR0_SIZE; #endif + __pecoff_data_size = ABSOLUTE(. - __initdata_begin); _end = .; STABS_DEBUG diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 1bfe30dfbfe7..fa1b18e364fc 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -135,7 +135,19 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run) return ret; } +static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + u32 hsr = kvm_vcpu_get_hsr(vcpu); + + kvm_pr_unimpl("Unknown exception class: hsr: %#08x -- %s\n", + hsr, esr_get_class_string(hsr)); + + kvm_inject_undefined(vcpu); + return 1; +} + static exit_handle_fn arm_exit_handlers[] = { + [0 ... ESR_ELx_EC_MAX] = kvm_handle_unknown_ec, [ESR_ELx_EC_WFx] = kvm_handle_wfx, [ESR_ELx_EC_CP15_32] = kvm_handle_cp15_32, [ESR_ELx_EC_CP15_64] = kvm_handle_cp15_64, @@ -162,13 +174,6 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) u32 hsr = kvm_vcpu_get_hsr(vcpu); u8 hsr_ec = ESR_ELx_EC(hsr); - if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) || - !arm_exit_handlers[hsr_ec]) { - kvm_err("Unknown exception class: hsr: %#08x -- %s\n", - hsr, esr_get_class_string(hsr)); - BUG(); - } - return arm_exit_handlers[hsr_ec]; } diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index 6b29d3d9e1f2..839425c24b1c 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S @@ -22,6 +22,7 @@ #include <asm/kvm_mmu.h> #include <asm/pgtable-hwdef.h> #include <asm/sysreg.h> +#include <asm/virt.h> .text .pushsection .hyp.idmap.text, "ax" @@ -58,6 +59,9 @@ __invalid: * x2: HYP vectors */ __do_hyp_init: + /* Check for a stub HVC call */ + cmp x0, #HVC_STUB_HCALL_NR + b.lo __kvm_handle_stub_hvc msr ttbr0_el2, x0 @@ -119,23 +123,45 @@ __do_hyp_init: eret ENDPROC(__kvm_hyp_init) +ENTRY(__kvm_handle_stub_hvc) + cmp x0, #HVC_SOFT_RESTART + b.ne 1f + + /* This is where we're about to jump, staying at EL2 */ + msr elr_el2, x1 + mov x0, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT | PSR_MODE_EL2h) + msr spsr_el2, x0 + + /* Shuffle the arguments, and don't come back */ + mov x0, x2 + mov x1, x3 + mov x2, x4 + b reset + +1: cmp x0, #HVC_RESET_VECTORS + b.ne 1f +reset: /* - * Reset kvm back to the hyp stub. + * Reset kvm back to the hyp stub. Do not clobber x0-x4 in + * case we coming via HVC_SOFT_RESTART. */ -ENTRY(__kvm_hyp_reset) - /* We're now in idmap, disable MMU */ - mrs x0, sctlr_el2 - ldr x1, =SCTLR_ELx_FLAGS - bic x0, x0, x1 // Clear SCTL_M and etc - msr sctlr_el2, x0 + mrs x5, sctlr_el2 + ldr x6, =SCTLR_ELx_FLAGS + bic x5, x5, x6 // Clear SCTL_M and etc + msr sctlr_el2, x5 isb /* Install stub vectors */ - adr_l x0, __hyp_stub_vectors - msr vbar_el2, x0 + adr_l x5, __hyp_stub_vectors + msr vbar_el2, x5 + mov x0, xzr + eret +1: /* Bad stub call */ + ldr x0, =HVC_STUB_ERR eret -ENDPROC(__kvm_hyp_reset) + +ENDPROC(__kvm_handle_stub_hvc) .ltorg diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 2726635dceba..952f6cb9cf72 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -36,15 +36,12 @@ * passed in x0. * * A function pointer with a value less than 0xfff has a special meaning, - * and is used to implement __hyp_get_vectors in the same way as in + * and is used to implement hyp stubs in the same way as in * arch/arm64/kernel/hyp_stub.S. - * HVC behaves as a 'bl' call and will clobber lr. */ ENTRY(__kvm_call_hyp) alternative_if_not ARM64_HAS_VIRT_HOST_EXTN - str lr, [sp, #-16]! hvc #0 - ldr lr, [sp], #16 ret alternative_else_nop_endif b __vhe_hyp_call diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 5e9052f087f2..5170ce1021da 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -32,17 +32,17 @@ * Shuffle the parameters before calling the function * pointed to in x0. Assumes parameters in x[1,2,3]. */ + str lr, [sp, #-16]! mov lr, x0 mov x0, x1 mov x1, x2 mov x2, x3 blr lr + ldr lr, [sp], #16 .endm ENTRY(__vhe_hyp_call) - str lr, [sp, #-16]! do_el2_call - ldr lr, [sp], #16 /* * We used to rely on having an exception return to get * an implicit isb. In the E2H case, we don't have it anymore. @@ -53,21 +53,6 @@ ENTRY(__vhe_hyp_call) ret ENDPROC(__vhe_hyp_call) -/* - * Compute the idmap address of __kvm_hyp_reset based on the idmap - * start passed as a parameter, and jump there. - * - * x0: HYP phys_idmap_start - */ -ENTRY(__kvm_hyp_teardown) - mov x4, x0 - adr_l x3, __kvm_hyp_reset - - /* insert __kvm_hyp_reset()s offset into phys_idmap_start */ - bfi x4, x3, #0, #PAGE_SHIFT - br x4 -ENDPROC(__kvm_hyp_teardown) - el1_sync: // Guest trapped into EL2 stp x0, x1, [sp, #-16]! @@ -87,10 +72,24 @@ alternative_endif /* Here, we're pretty sure the host called HVC. */ ldp x0, x1, [sp], #16 - cmp x0, #HVC_GET_VECTORS - b.ne 1f - mrs x0, vbar_el2 - b 2f + /* Check for a stub HVC call */ + cmp x0, #HVC_STUB_HCALL_NR + b.hs 1f + + /* + * Compute the idmap address of __kvm_handle_stub_hvc and + * jump there. Since we use kimage_voffset, do not use the + * HYP VA for __kvm_handle_stub_hvc, but the kernel VA instead + * (by loading it from the constant pool). + * + * Preserve x0-x4, which may contain stub parameters. + */ + ldr x5, =__kvm_handle_stub_hvc + ldr_l x6, kimage_voffset + + /* x5 = __pa(x5) */ + sub x5, x5, x6 + br x5 1: /* @@ -99,7 +98,7 @@ alternative_endif kern_hyp_va x0 do_el2_call -2: eret + eret el1_trap: /* diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c index e8e7ba2bc11f..73464a96c365 100644 --- a/arch/arm64/kvm/hyp/tlb.c +++ b/arch/arm64/kvm/hyp/tlb.c @@ -18,14 +18,62 @@ #include <asm/kvm_hyp.h> #include <asm/tlbflush.h> +static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm) +{ + u64 val; + + /* + * With VHE enabled, we have HCR_EL2.{E2H,TGE} = {1,1}, and + * most TLB operations target EL2/EL0. In order to affect the + * guest TLBs (EL1/EL0), we need to change one of these two + * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so + * let's flip TGE before executing the TLB operation. + */ + write_sysreg(kvm->arch.vttbr, vttbr_el2); + val = read_sysreg(hcr_el2); + val &= ~HCR_TGE; + write_sysreg(val, hcr_el2); + isb(); +} + +static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm) +{ + write_sysreg(kvm->arch.vttbr, vttbr_el2); + isb(); +} + +static hyp_alternate_select(__tlb_switch_to_guest, + __tlb_switch_to_guest_nvhe, + __tlb_switch_to_guest_vhe, + ARM64_HAS_VIRT_HOST_EXTN); + +static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm) +{ + /* + * We're done with the TLB operation, let's restore the host's + * view of HCR_EL2. + */ + write_sysreg(0, vttbr_el2); + write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); +} + +static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm) +{ + write_sysreg(0, vttbr_el2); +} + +static hyp_alternate_select(__tlb_switch_to_host, + __tlb_switch_to_host_nvhe, + __tlb_switch_to_host_vhe, + ARM64_HAS_VIRT_HOST_EXTN); + void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) { dsb(ishst); /* Switch to requested VMID */ kvm = kern_hyp_va(kvm); - write_sysreg(kvm->arch.vttbr, vttbr_el2); - isb(); + __tlb_switch_to_guest()(kvm); /* * We could do so much better if we had the VA as well. @@ -46,7 +94,29 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) dsb(ish); isb(); - write_sysreg(0, vttbr_el2); + /* + * If the host is running at EL1 and we have a VPIPT I-cache, + * then we must perform I-cache maintenance at EL2 in order for + * it to have an effect on the guest. Since the guest cannot hit + * I-cache lines allocated with a different VMID, we don't need + * to worry about junk out of guest reset (we nuke the I-cache on + * VMID rollover), but we do need to be careful when remapping + * executable pages for the same guest. This can happen when KSM + * takes a CoW fault on an executable page, copies the page into + * a page that was previously mapped in the guest and then needs + * to invalidate the guest view of the I-cache for that page + * from EL1. To solve this, we invalidate the entire I-cache when + * unmapping a page from a guest if we have a VPIPT I-cache but + * the host is running at EL1. As above, we could do better if + * we had the VA. + * + * The moral of this story is: if you have a VPIPT I-cache, then + * you should be running with VHE enabled. + */ + if (!has_vhe() && icache_is_vpipt()) + __flush_icache_all(); + + __tlb_switch_to_host()(kvm); } void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm) @@ -55,14 +125,13 @@ void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm) /* Switch to requested VMID */ kvm = kern_hyp_va(kvm); - write_sysreg(kvm->arch.vttbr, vttbr_el2); - isb(); + __tlb_switch_to_guest()(kvm); __tlbi(vmalls12e1is); dsb(ish); isb(); - write_sysreg(0, vttbr_el2); + __tlb_switch_to_host()(kvm); } void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) @@ -70,14 +139,13 @@ void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm); /* Switch to requested VMID */ - write_sysreg(kvm->arch.vttbr, vttbr_el2); - isb(); + __tlb_switch_to_guest()(kvm); __tlbi(vmalle1); dsb(nsh); isb(); - write_sysreg(0, vttbr_el2); + __tlb_switch_to_host()(kvm); } void __hyp_text __kvm_flush_vm_context(void) diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index d9e9697de1b2..561badf93de8 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -60,7 +60,7 @@ static bool cpu_has_32bit_el1(void) { u64 pfr0; - pfr0 = read_system_reg(SYS_ID_AA64PFR0_EL1); + pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); return !!(pfr0 & 0x20); } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 0e26f8c2b56f..efbe9e8e7a78 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -55,6 +55,15 @@ * 64bit interface. */ +static bool read_from_write_only(struct kvm_vcpu *vcpu, + const struct sys_reg_params *params) +{ + WARN_ONCE(1, "Unexpected sys_reg read to write-only register\n"); + print_sys_reg_instr(params); + kvm_inject_undefined(vcpu); + return false; +} + /* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */ static u32 cache_levels; @@ -460,35 +469,35 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) vcpu_sys_reg(vcpu, PMCR_EL0) = val; } -static bool pmu_access_el0_disabled(struct kvm_vcpu *vcpu) +static bool check_pmu_access_disabled(struct kvm_vcpu *vcpu, u64 flags) { u64 reg = vcpu_sys_reg(vcpu, PMUSERENR_EL0); + bool enabled = (reg & flags) || vcpu_mode_priv(vcpu); - return !((reg & ARMV8_PMU_USERENR_EN) || vcpu_mode_priv(vcpu)); + if (!enabled) + kvm_inject_undefined(vcpu); + + return !enabled; } -static bool pmu_write_swinc_el0_disabled(struct kvm_vcpu *vcpu) +static bool pmu_access_el0_disabled(struct kvm_vcpu *vcpu) { - u64 reg = vcpu_sys_reg(vcpu, PMUSERENR_EL0); + return check_pmu_access_disabled(vcpu, ARMV8_PMU_USERENR_EN); +} - return !((reg & (ARMV8_PMU_USERENR_SW | ARMV8_PMU_USERENR_EN)) - || vcpu_mode_priv(vcpu)); +static bool pmu_write_swinc_el0_disabled(struct kvm_vcpu *vcpu) +{ + return check_pmu_access_disabled(vcpu, ARMV8_PMU_USERENR_SW | ARMV8_PMU_USERENR_EN); } static bool pmu_access_cycle_counter_el0_disabled(struct kvm_vcpu *vcpu) { - u64 reg = vcpu_sys_reg(vcpu, PMUSERENR_EL0); - - return !((reg & (ARMV8_PMU_USERENR_CR | ARMV8_PMU_USERENR_EN)) - || vcpu_mode_priv(vcpu)); + return check_pmu_access_disabled(vcpu, ARMV8_PMU_USERENR_CR | ARMV8_PMU_USERENR_EN); } static bool pmu_access_event_counter_el0_disabled(struct kvm_vcpu *vcpu) { - u64 reg = vcpu_sys_reg(vcpu, PMUSERENR_EL0); - - return !((reg & (ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_EN)) - || vcpu_mode_priv(vcpu)); + return check_pmu_access_disabled(vcpu, ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_EN); } static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, @@ -567,8 +576,10 @@ static bool pmu_counter_idx_valid(struct kvm_vcpu *vcpu, u64 idx) pmcr = vcpu_sys_reg(vcpu, PMCR_EL0); val = (pmcr >> ARMV8_PMU_PMCR_N_SHIFT) & ARMV8_PMU_PMCR_N_MASK; - if (idx >= val && idx != ARMV8_PMU_CYCLE_IDX) + if (idx >= val && idx != ARMV8_PMU_CYCLE_IDX) { + kvm_inject_undefined(vcpu); return false; + } return true; } @@ -707,8 +718,10 @@ static bool access_pminten(struct kvm_vcpu *vcpu, struct sys_reg_params *p, if (!kvm_arm_pmu_v3_ready(vcpu)) return trap_raz_wi(vcpu, p, r); - if (!vcpu_mode_priv(vcpu)) + if (!vcpu_mode_priv(vcpu)) { + kvm_inject_undefined(vcpu); return false; + } if (p->is_write) { u64 val = p->regval & mask; @@ -759,16 +772,15 @@ static bool access_pmswinc(struct kvm_vcpu *vcpu, struct sys_reg_params *p, if (!kvm_arm_pmu_v3_ready(vcpu)) return trap_raz_wi(vcpu, p, r); + if (!p->is_write) + return read_from_write_only(vcpu, p); + if (pmu_write_swinc_el0_disabled(vcpu)) return false; - if (p->is_write) { - mask = kvm_pmu_valid_counter_mask(vcpu); - kvm_pmu_software_increment(vcpu, p->regval & mask); - return true; - } - - return false; + mask = kvm_pmu_valid_counter_mask(vcpu); + kvm_pmu_software_increment(vcpu, p->regval & mask); + return true; } static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, @@ -778,8 +790,10 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, return trap_raz_wi(vcpu, p, r); if (p->is_write) { - if (!vcpu_mode_priv(vcpu)) + if (!vcpu_mode_priv(vcpu)) { + kvm_inject_undefined(vcpu); return false; + } vcpu_sys_reg(vcpu, PMUSERENR_EL0) = p->regval & ARMV8_PMU_USERENR_MASK; @@ -793,31 +807,23 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, /* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */ #define DBG_BCR_BVR_WCR_WVR_EL1(n) \ - /* DBGBVRn_EL1 */ \ - { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b100), \ + { SYS_DESC(SYS_DBGBVRn_EL1(n)), \ trap_bvr, reset_bvr, n, 0, get_bvr, set_bvr }, \ - /* DBGBCRn_EL1 */ \ - { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b101), \ + { SYS_DESC(SYS_DBGBCRn_EL1(n)), \ trap_bcr, reset_bcr, n, 0, get_bcr, set_bcr }, \ - /* DBGWVRn_EL1 */ \ - { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b110), \ + { SYS_DESC(SYS_DBGWVRn_EL1(n)), \ trap_wvr, reset_wvr, n, 0, get_wvr, set_wvr }, \ - /* DBGWCRn_EL1 */ \ - { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b111), \ + { SYS_DESC(SYS_DBGWCRn_EL1(n)), \ trap_wcr, reset_wcr, n, 0, get_wcr, set_wcr } /* Macro to expand the PMEVCNTRn_EL0 register */ #define PMU_PMEVCNTR_EL0(n) \ - /* PMEVCNTRn_EL0 */ \ - { Op0(0b11), Op1(0b011), CRn(0b1110), \ - CRm((0b1000 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)), \ + { SYS_DESC(SYS_PMEVCNTRn_EL0(n)), \ access_pmu_evcntr, reset_unknown, (PMEVCNTR0_EL0 + n), } /* Macro to expand the PMEVTYPERn_EL0 register */ #define PMU_PMEVTYPER_EL0(n) \ - /* PMEVTYPERn_EL0 */ \ - { Op0(0b11), Op1(0b011), CRn(0b1110), \ - CRm((0b1100 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)), \ + { SYS_DESC(SYS_PMEVTYPERn_EL0(n)), \ access_pmu_evtyper, reset_unknown, (PMEVTYPER0_EL0 + n), } static bool access_cntp_tval(struct kvm_vcpu *vcpu, @@ -887,24 +893,14 @@ static bool access_cntp_cval(struct kvm_vcpu *vcpu, * more demanding guest... */ static const struct sys_reg_desc sys_reg_descs[] = { - /* DC ISW */ - { Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b0110), Op2(0b010), - access_dcsw }, - /* DC CSW */ - { Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b1010), Op2(0b010), - access_dcsw }, - /* DC CISW */ - { Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b010), - access_dcsw }, + { SYS_DESC(SYS_DC_ISW), access_dcsw }, + { SYS_DESC(SYS_DC_CSW), access_dcsw }, + { SYS_DESC(SYS_DC_CISW), access_dcsw }, DBG_BCR_BVR_WCR_WVR_EL1(0), DBG_BCR_BVR_WCR_WVR_EL1(1), - /* MDCCINT_EL1 */ - { Op0(0b10), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b000), - trap_debug_regs, reset_val, MDCCINT_EL1, 0 }, - /* MDSCR_EL1 */ - { Op0(0b10), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b010), - trap_debug_regs, reset_val, MDSCR_EL1, 0 }, + { SYS_DESC(SYS_MDCCINT_EL1), trap_debug_regs, reset_val, MDCCINT_EL1, 0 }, + { SYS_DESC(SYS_MDSCR_EL1), trap_debug_regs, reset_val, MDSCR_EL1, 0 }, DBG_BCR_BVR_WCR_WVR_EL1(2), DBG_BCR_BVR_WCR_WVR_EL1(3), DBG_BCR_BVR_WCR_WVR_EL1(4), @@ -920,179 +916,77 @@ static const struct sys_reg_desc sys_reg_descs[] = { DBG_BCR_BVR_WCR_WVR_EL1(14), DBG_BCR_BVR_WCR_WVR_EL1(15), - /* MDRAR_EL1 */ - { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000), - trap_raz_wi }, - /* OSLAR_EL1 */ - { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b100), - trap_raz_wi }, - /* OSLSR_EL1 */ - { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0001), Op2(0b100), - trap_oslsr_el1 }, - /* OSDLR_EL1 */ - { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0011), Op2(0b100), - trap_raz_wi }, - /* DBGPRCR_EL1 */ - { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0100), Op2(0b100), - trap_raz_wi }, - /* DBGCLAIMSET_EL1 */ - { Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1000), Op2(0b110), - trap_raz_wi }, - /* DBGCLAIMCLR_EL1 */ - { Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1001), Op2(0b110), - trap_raz_wi }, - /* DBGAUTHSTATUS_EL1 */ - { Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b110), - trap_dbgauthstatus_el1 }, - - /* MDCCSR_EL1 */ - { Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0001), Op2(0b000), - trap_raz_wi }, - /* DBGDTR_EL0 */ - { Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0100), Op2(0b000), - trap_raz_wi }, - /* DBGDTR[TR]X_EL0 */ - { Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0101), Op2(0b000), - trap_raz_wi }, - - /* DBGVCR32_EL2 */ - { Op0(0b10), Op1(0b100), CRn(0b0000), CRm(0b0111), Op2(0b000), - NULL, reset_val, DBGVCR32_EL2, 0 }, - - /* MPIDR_EL1 */ - { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0000), Op2(0b101), - NULL, reset_mpidr, MPIDR_EL1 }, - /* SCTLR_EL1 */ - { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000), - access_vm_reg, reset_val, SCTLR_EL1, 0x00C50078 }, - /* CPACR_EL1 */ - { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b010), - NULL, reset_val, CPACR_EL1, 0 }, - /* TTBR0_EL1 */ - { Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b000), - access_vm_reg, reset_unknown, TTBR0_EL1 }, - /* TTBR1_EL1 */ - { Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b001), - access_vm_reg, reset_unknown, TTBR1_EL1 }, - /* TCR_EL1 */ - { Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b010), - access_vm_reg, reset_val, TCR_EL1, 0 }, - - /* AFSR0_EL1 */ - { Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0001), Op2(0b000), - access_vm_reg, reset_unknown, AFSR0_EL1 }, - /* AFSR1_EL1 */ - { Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0001), Op2(0b001), - access_vm_reg, reset_unknown, AFSR1_EL1 }, - /* ESR_EL1 */ - { Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0010), Op2(0b000), - access_vm_reg, reset_unknown, ESR_EL1 }, - /* FAR_EL1 */ - { Op0(0b11), Op1(0b000), CRn(0b0110), CRm(0b0000), Op2(0b000), - access_vm_reg, reset_unknown, FAR_EL1 }, - /* PAR_EL1 */ - { Op0(0b11), Op1(0b000), CRn(0b0111), CRm(0b0100), Op2(0b000), - NULL, reset_unknown, PAR_EL1 }, - - /* PMINTENSET_EL1 */ - { Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b001), - access_pminten, reset_unknown, PMINTENSET_EL1 }, - /* PMINTENCLR_EL1 */ - { Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b010), - access_pminten, NULL, PMINTENSET_EL1 }, - - /* MAIR_EL1 */ - { Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0010), Op2(0b000), - access_vm_reg, reset_unknown, MAIR_EL1 }, - /* AMAIR_EL1 */ - { Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0011), Op2(0b000), - access_vm_reg, reset_amair_el1, AMAIR_EL1 }, - - /* VBAR_EL1 */ - { Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b0000), Op2(0b000), - NULL, reset_val, VBAR_EL1, 0 }, - - /* ICC_SGI1R_EL1 */ - { Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b1011), Op2(0b101), - access_gic_sgi }, - /* ICC_SRE_EL1 */ - { Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b1100), Op2(0b101), - access_gic_sre }, - - /* CONTEXTIDR_EL1 */ - { Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b001), - access_vm_reg, reset_val, CONTEXTIDR_EL1, 0 }, - /* TPIDR_EL1 */ - { Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b100), - NULL, reset_unknown, TPIDR_EL1 }, - - /* CNTKCTL_EL1 */ - { Op0(0b11), Op1(0b000), CRn(0b1110), CRm(0b0001), Op2(0b000), - NULL, reset_val, CNTKCTL_EL1, 0}, - - /* CSSELR_EL1 */ - { Op0(0b11), Op1(0b010), CRn(0b0000), CRm(0b0000), Op2(0b000), - NULL, reset_unknown, CSSELR_EL1 }, - - /* PMCR_EL0 */ - { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b000), - access_pmcr, reset_pmcr, }, - /* PMCNTENSET_EL0 */ - { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b001), - access_pmcnten, reset_unknown, PMCNTENSET_EL0 }, - /* PMCNTENCLR_EL0 */ - { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b010), - access_pmcnten, NULL, PMCNTENSET_EL0 }, - /* PMOVSCLR_EL0 */ - { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b011), - access_pmovs, NULL, PMOVSSET_EL0 }, - /* PMSWINC_EL0 */ - { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b100), - access_pmswinc, reset_unknown, PMSWINC_EL0 }, - /* PMSELR_EL0 */ - { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b101), - access_pmselr, reset_unknown, PMSELR_EL0 }, - /* PMCEID0_EL0 */ - { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b110), - access_pmceid }, - /* PMCEID1_EL0 */ - { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b111), - access_pmceid }, - /* PMCCNTR_EL0 */ - { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b000), - access_pmu_evcntr, reset_unknown, PMCCNTR_EL0 }, - /* PMXEVTYPER_EL0 */ - { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b001), - access_pmu_evtyper }, - /* PMXEVCNTR_EL0 */ - { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b010), - access_pmu_evcntr }, - /* PMUSERENR_EL0 - * This register resets as unknown in 64bit mode while it resets as zero + { SYS_DESC(SYS_MDRAR_EL1), trap_raz_wi }, + { SYS_DESC(SYS_OSLAR_EL1), trap_raz_wi }, + { SYS_DESC(SYS_OSLSR_EL1), trap_oslsr_el1 }, + { SYS_DESC(SYS_OSDLR_EL1), trap_raz_wi }, + { SYS_DESC(SYS_DBGPRCR_EL1), trap_raz_wi }, + { SYS_DESC(SYS_DBGCLAIMSET_EL1), trap_raz_wi }, + { SYS_DESC(SYS_DBGCLAIMCLR_EL1), trap_raz_wi }, + { SYS_DESC(SYS_DBGAUTHSTATUS_EL1), trap_dbgauthstatus_el1 }, + + { SYS_DESC(SYS_MDCCSR_EL0), trap_raz_wi }, + { SYS_DESC(SYS_DBGDTR_EL0), trap_raz_wi }, + // DBGDTR[TR]X_EL0 share the same encoding + { SYS_DESC(SYS_DBGDTRTX_EL0), trap_raz_wi }, + + { SYS_DESC(SYS_DBGVCR32_EL2), NULL, reset_val, DBGVCR32_EL2, 0 }, + + { SYS_DESC(SYS_MPIDR_EL1), NULL, reset_mpidr, MPIDR_EL1 }, + { SYS_DESC(SYS_SCTLR_EL1), access_vm_reg, reset_val, SCTLR_EL1, 0x00C50078 }, + { SYS_DESC(SYS_CPACR_EL1), NULL, reset_val, CPACR_EL1, 0 }, + { SYS_DESC(SYS_TTBR0_EL1), access_vm_reg, reset_unknown, TTBR0_EL1 }, + { SYS_DESC(SYS_TTBR1_EL1), access_vm_reg, reset_unknown, TTBR1_EL1 }, + { SYS_DESC(SYS_TCR_EL1), access_vm_reg, reset_val, TCR_EL1, 0 }, + + { SYS_DESC(SYS_AFSR0_EL1), access_vm_reg, reset_unknown, AFSR0_EL1 }, + { SYS_DESC(SYS_AFSR1_EL1), access_vm_reg, reset_unknown, AFSR1_EL1 }, + { SYS_DESC(SYS_ESR_EL1), access_vm_reg, reset_unknown, ESR_EL1 }, + { SYS_DESC(SYS_FAR_EL1), access_vm_reg, reset_unknown, FAR_EL1 }, + { SYS_DESC(SYS_PAR_EL1), NULL, reset_unknown, PAR_EL1 }, + + { SYS_DESC(SYS_PMINTENSET_EL1), access_pminten, reset_unknown, PMINTENSET_EL1 }, + { SYS_DESC(SYS_PMINTENCLR_EL1), access_pminten, NULL, PMINTENSET_EL1 }, + + { SYS_DESC(SYS_MAIR_EL1), access_vm_reg, reset_unknown, MAIR_EL1 }, + { SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 }, + + { SYS_DESC(SYS_VBAR_EL1), NULL, reset_val, VBAR_EL1, 0 }, + + { SYS_DESC(SYS_ICC_SGI1R_EL1), access_gic_sgi }, + { SYS_DESC(SYS_ICC_SRE_EL1), access_gic_sre }, + + { SYS_DESC(SYS_CONTEXTIDR_EL1), access_vm_reg, reset_val, CONTEXTIDR_EL1, 0 }, + { SYS_DESC(SYS_TPIDR_EL1), NULL, reset_unknown, TPIDR_EL1 }, + + { SYS_DESC(SYS_CNTKCTL_EL1), NULL, reset_val, CNTKCTL_EL1, 0}, + + { SYS_DESC(SYS_CSSELR_EL1), NULL, reset_unknown, CSSELR_EL1 }, + + { SYS_DESC(SYS_PMCR_EL0), access_pmcr, reset_pmcr, }, + { SYS_DESC(SYS_PMCNTENSET_EL0), access_pmcnten, reset_unknown, PMCNTENSET_EL0 }, + { SYS_DESC(SYS_PMCNTENCLR_EL0), access_pmcnten, NULL, PMCNTENSET_EL0 }, + { SYS_DESC(SYS_PMOVSCLR_EL0), access_pmovs, NULL, PMOVSSET_EL0 }, + { SYS_DESC(SYS_PMSWINC_EL0), access_pmswinc, reset_unknown, PMSWINC_EL0 }, + { SYS_DESC(SYS_PMSELR_EL0), access_pmselr, reset_unknown, PMSELR_EL0 }, + { SYS_DESC(SYS_PMCEID0_EL0), access_pmceid }, + { SYS_DESC(SYS_PMCEID1_EL0), access_pmceid }, + { SYS_DESC(SYS_PMCCNTR_EL0), access_pmu_evcntr, reset_unknown, PMCCNTR_EL0 }, + { SYS_DESC(SYS_PMXEVTYPER_EL0), access_pmu_evtyper }, + { SYS_DESC(SYS_PMXEVCNTR_EL0), access_pmu_evcntr }, + /* + * PMUSERENR_EL0 resets as unknown in 64bit mode while it resets as zero * in 32bit mode. Here we choose to reset it as zero for consistency. */ - { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b000), - access_pmuserenr, reset_val, PMUSERENR_EL0, 0 }, - /* PMOVSSET_EL0 */ - { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b011), - access_pmovs, reset_unknown, PMOVSSET_EL0 }, - - /* TPIDR_EL0 */ - { Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b010), - NULL, reset_unknown, TPIDR_EL0 }, - /* TPIDRRO_EL0 */ - { Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b011), - NULL, reset_unknown, TPIDRRO_EL0 }, - - /* CNTP_TVAL_EL0 */ - { Op0(0b11), Op1(0b011), CRn(0b1110), CRm(0b0010), Op2(0b000), - access_cntp_tval }, - /* CNTP_CTL_EL0 */ - { Op0(0b11), Op1(0b011), CRn(0b1110), CRm(0b0010), Op2(0b001), - access_cntp_ctl }, - /* CNTP_CVAL_EL0 */ - { Op0(0b11), Op1(0b011), CRn(0b1110), CRm(0b0010), Op2(0b010), - access_cntp_cval }, + { SYS_DESC(SYS_PMUSERENR_EL0), access_pmuserenr, reset_val, PMUSERENR_EL0, 0 }, + { SYS_DESC(SYS_PMOVSSET_EL0), access_pmovs, reset_unknown, PMOVSSET_EL0 }, + + { SYS_DESC(SYS_TPIDR_EL0), NULL, reset_unknown, TPIDR_EL0 }, + { SYS_DESC(SYS_TPIDRRO_EL0), NULL, reset_unknown, TPIDRRO_EL0 }, + + { SYS_DESC(SYS_CNTP_TVAL_EL0), access_cntp_tval }, + { SYS_DESC(SYS_CNTP_CTL_EL0), access_cntp_ctl }, + { SYS_DESC(SYS_CNTP_CVAL_EL0), access_cntp_cval }, /* PMEVCNTRn_EL0 */ PMU_PMEVCNTR_EL0(0), @@ -1158,22 +1052,15 @@ static const struct sys_reg_desc sys_reg_descs[] = { PMU_PMEVTYPER_EL0(28), PMU_PMEVTYPER_EL0(29), PMU_PMEVTYPER_EL0(30), - /* PMCCFILTR_EL0 - * This register resets as unknown in 64bit mode while it resets as zero + /* + * PMCCFILTR_EL0 resets as unknown in 64bit mode while it resets as zero * in 32bit mode. Here we choose to reset it as zero for consistency. */ - { Op0(0b11), Op1(0b011), CRn(0b1110), CRm(0b1111), Op2(0b111), - access_pmu_evtyper, reset_val, PMCCFILTR_EL0, 0 }, - - /* DACR32_EL2 */ - { Op0(0b11), Op1(0b100), CRn(0b0011), CRm(0b0000), Op2(0b000), - NULL, reset_unknown, DACR32_EL2 }, - /* IFSR32_EL2 */ - { Op0(0b11), Op1(0b100), CRn(0b0101), CRm(0b0000), Op2(0b001), - NULL, reset_unknown, IFSR32_EL2 }, - /* FPEXC32_EL2 */ - { Op0(0b11), Op1(0b100), CRn(0b0101), CRm(0b0011), Op2(0b000), - NULL, reset_val, FPEXC32_EL2, 0x70 }, + { SYS_DESC(SYS_PMCCFILTR_EL0), access_pmu_evtyper, reset_val, PMCCFILTR_EL0, 0 }, + + { SYS_DESC(SYS_DACR32_EL2), NULL, reset_unknown, DACR32_EL2 }, + { SYS_DESC(SYS_IFSR32_EL2), NULL, reset_unknown, IFSR32_EL2 }, + { SYS_DESC(SYS_FPEXC32_EL2), NULL, reset_val, FPEXC32_EL2, 0x70 }, }; static bool trap_dbgidr(struct kvm_vcpu *vcpu, @@ -1183,8 +1070,8 @@ static bool trap_dbgidr(struct kvm_vcpu *vcpu, if (p->is_write) { return ignore_write(vcpu, p); } else { - u64 dfr = read_system_reg(SYS_ID_AA64DFR0_EL1); - u64 pfr = read_system_reg(SYS_ID_AA64PFR0_EL1); + u64 dfr = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1); + u64 pfr = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); u32 el3 = !!cpuid_feature_extract_unsigned_field(pfr, ID_AA64PFR0_EL3_SHIFT); p->regval = ((((dfr >> ID_AA64DFR0_WRPS_SHIFT) & 0xf) << 28) | @@ -1557,6 +1444,22 @@ int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run) return 1; } +static void perform_access(struct kvm_vcpu *vcpu, + struct sys_reg_params *params, + const struct sys_reg_desc *r) +{ + /* + * Not having an accessor means that we have configured a trap + * that we don't know how to handle. This certainly qualifies + * as a gross bug that should be fixed right away. + */ + BUG_ON(!r->access); + + /* Skip instruction if instructed so */ + if (likely(r->access(vcpu, params, r))) + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); +} + /* * emulate_cp -- tries to match a sys_reg access in a handling table, and * call the corresponding trap handler. @@ -1580,20 +1483,8 @@ static int emulate_cp(struct kvm_vcpu *vcpu, r = find_reg(params, table, num); if (r) { - /* - * Not having an accessor means that we have - * configured a trap that we don't know how to - * handle. This certainly qualifies as a gross bug - * that should be fixed right away. - */ - BUG_ON(!r->access); - - if (likely(r->access(vcpu, params, r))) { - /* Skip instruction, since it was emulated */ - kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); - /* Handled */ - return 0; - } + perform_access(vcpu, params, r); + return 0; } /* Not handled */ @@ -1660,20 +1551,25 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu, params.regval |= vcpu_get_reg(vcpu, Rt2) << 32; } - if (!emulate_cp(vcpu, ¶ms, target_specific, nr_specific)) - goto out; - if (!emulate_cp(vcpu, ¶ms, global, nr_global)) - goto out; - - unhandled_cp_access(vcpu, ¶ms); + /* + * Try to emulate the coprocessor access using the target + * specific table first, and using the global table afterwards. + * If either of the tables contains a handler, handle the + * potential register operation in the case of a read and return + * with success. + */ + if (!emulate_cp(vcpu, ¶ms, target_specific, nr_specific) || + !emulate_cp(vcpu, ¶ms, global, nr_global)) { + /* Split up the value between registers for the read side */ + if (!params.is_write) { + vcpu_set_reg(vcpu, Rt, lower_32_bits(params.regval)); + vcpu_set_reg(vcpu, Rt2, upper_32_bits(params.regval)); + } -out: - /* Split up the value between registers for the read side */ - if (!params.is_write) { - vcpu_set_reg(vcpu, Rt, lower_32_bits(params.regval)); - vcpu_set_reg(vcpu, Rt2, upper_32_bits(params.regval)); + return 1; } + unhandled_cp_access(vcpu, ¶ms); return 1; } @@ -1763,26 +1659,13 @@ static int emulate_sys_reg(struct kvm_vcpu *vcpu, r = find_reg(params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); if (likely(r)) { - /* - * Not having an accessor means that we have - * configured a trap that we don't know how to - * handle. This certainly qualifies as a gross bug - * that should be fixed right away. - */ - BUG_ON(!r->access); - - if (likely(r->access(vcpu, params, r))) { - /* Skip instruction, since it was emulated */ - kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); - return 1; - } - /* If access function fails, it should complain. */ + perform_access(vcpu, params, r); } else { kvm_err("Unsupported guest sys_reg access at: %lx\n", *vcpu_pc(vcpu)); print_sys_reg_instr(params); + kvm_inject_undefined(vcpu); } - kvm_inject_undefined(vcpu); return 1; } @@ -1932,44 +1815,25 @@ FUNCTION_INVARIANT(aidr_el1) /* ->val is filled in by kvm_sys_reg_table_init() */ static struct sys_reg_desc invariant_sys_regs[] = { - { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0000), Op2(0b000), - NULL, get_midr_el1 }, - { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0000), Op2(0b110), - NULL, get_revidr_el1 }, - { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b000), - NULL, get_id_pfr0_el1 }, - { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b001), - NULL, get_id_pfr1_el1 }, - { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b010), - NULL, get_id_dfr0_el1 }, - { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b011), - NULL, get_id_afr0_el1 }, - { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b100), - NULL, get_id_mmfr0_el1 }, - { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b101), - NULL, get_id_mmfr1_el1 }, - { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b110), - NULL, get_id_mmfr2_el1 }, - { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b111), - NULL, get_id_mmfr3_el1 }, - { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b000), - NULL, get_id_isar0_el1 }, - { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b001), - NULL, get_id_isar1_el1 }, - { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b010), - NULL, get_id_isar2_el1 }, - { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b011), - NULL, get_id_isar3_el1 }, - { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b100), - NULL, get_id_isar4_el1 }, - { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b101), - NULL, get_id_isar5_el1 }, - { Op0(0b11), Op1(0b001), CRn(0b0000), CRm(0b0000), Op2(0b001), - NULL, get_clidr_el1 }, - { Op0(0b11), Op1(0b001), CRn(0b0000), CRm(0b0000), Op2(0b111), - NULL, get_aidr_el1 }, - { Op0(0b11), Op1(0b011), CRn(0b0000), CRm(0b0000), Op2(0b001), - NULL, get_ctr_el0 }, + { SYS_DESC(SYS_MIDR_EL1), NULL, get_midr_el1 }, + { SYS_DESC(SYS_REVIDR_EL1), NULL, get_revidr_el1 }, + { SYS_DESC(SYS_ID_PFR0_EL1), NULL, get_id_pfr0_el1 }, + { SYS_DESC(SYS_ID_PFR1_EL1), NULL, get_id_pfr1_el1 }, + { SYS_DESC(SYS_ID_DFR0_EL1), NULL, get_id_dfr0_el1 }, + { SYS_DESC(SYS_ID_AFR0_EL1), NULL, get_id_afr0_el1 }, + { SYS_DESC(SYS_ID_MMFR0_EL1), NULL, get_id_mmfr0_el1 }, + { SYS_DESC(SYS_ID_MMFR1_EL1), NULL, get_id_mmfr1_el1 }, + { SYS_DESC(SYS_ID_MMFR2_EL1), NULL, get_id_mmfr2_el1 }, + { SYS_DESC(SYS_ID_MMFR3_EL1), NULL, get_id_mmfr3_el1 }, + { SYS_DESC(SYS_ID_ISAR0_EL1), NULL, get_id_isar0_el1 }, + { SYS_DESC(SYS_ID_ISAR1_EL1), NULL, get_id_isar1_el1 }, + { SYS_DESC(SYS_ID_ISAR2_EL1), NULL, get_id_isar2_el1 }, + { SYS_DESC(SYS_ID_ISAR3_EL1), NULL, get_id_isar3_el1 }, + { SYS_DESC(SYS_ID_ISAR4_EL1), NULL, get_id_isar4_el1 }, + { SYS_DESC(SYS_ID_ISAR5_EL1), NULL, get_id_isar5_el1 }, + { SYS_DESC(SYS_CLIDR_EL1), NULL, get_clidr_el1 }, + { SYS_DESC(SYS_AIDR_EL1), NULL, get_aidr_el1 }, + { SYS_DESC(SYS_CTR_EL0), NULL, get_ctr_el0 }, }; static int reg_from_user(u64 *val, const void __user *uaddr, u64 id) diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index 9c6ffd0f0196..060f5348ef25 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -83,24 +83,6 @@ static inline bool read_zero(struct kvm_vcpu *vcpu, return true; } -static inline bool write_to_read_only(struct kvm_vcpu *vcpu, - const struct sys_reg_params *params) -{ - kvm_debug("sys_reg write to read-only register at: %lx\n", - *vcpu_pc(vcpu)); - print_sys_reg_instr(params); - return false; -} - -static inline bool read_from_write_only(struct kvm_vcpu *vcpu, - const struct sys_reg_params *params) -{ - kvm_debug("sys_reg read to write-only register at: %lx\n", - *vcpu_pc(vcpu)); - print_sys_reg_instr(params); - return false; -} - /* Reset functions */ static inline void reset_unknown(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) @@ -147,4 +129,9 @@ const struct sys_reg_desc *find_reg_by_id(u64 id, #define CRm(_x) .CRm = _x #define Op2(_x) .Op2 = _x +#define SYS_DESC(reg) \ + Op0(sys_reg_Op0(reg)), Op1(sys_reg_Op1(reg)), \ + CRn(sys_reg_CRn(reg)), CRm(sys_reg_CRm(reg)), \ + Op2(sys_reg_Op2(reg)) + #endif /* __ARM64_KVM_SYS_REGS_LOCAL_H__ */ diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c index 46af7186bca6..969ade1d333d 100644 --- a/arch/arm64/kvm/sys_regs_generic_v8.c +++ b/arch/arm64/kvm/sys_regs_generic_v8.c @@ -52,9 +52,7 @@ static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 */ static const struct sys_reg_desc genericv8_sys_regs[] = { - /* ACTLR_EL1 */ - { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b001), - access_actlr, reset_actlr, ACTLR_EL1 }, + { SYS_DESC(SYS_ACTLR_EL1), access_actlr, reset_actlr, ACTLR_EL1 }, }; static const struct sys_reg_desc genericv8_cp15_regs[] = { diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S index 47184c3a97da..b24a830419ad 100644 --- a/arch/arm64/lib/copy_in_user.S +++ b/arch/arm64/lib/copy_in_user.S @@ -64,14 +64,14 @@ .endm end .req x5 -ENTRY(__copy_in_user) +ENTRY(raw_copy_in_user) uaccess_enable_not_uao x3, x4 add end, x0, x2 #include "copy_template.S" uaccess_disable_not_uao x3 mov x0, #0 ret -ENDPROC(__copy_in_user) +ENDPROC(raw_copy_in_user) .section .fixup,"ax" .align 2 diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index 68634c630cdd..ab9f5f0fb2c7 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -119,9 +119,6 @@ static void flush_context(unsigned int cpu) /* Queue a TLB invalidate and flush the I-cache if necessary. */ cpumask_setall(&tlb_flush_pending); - - if (icache_is_aivivt()) - __flush_icache_all(); } static bool check_update_reserved_asid(u64 asid, u64 newasid) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 81cdb2e844ed..4dac4afc95a5 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -308,24 +308,15 @@ static void __swiotlb_sync_sg_for_device(struct device *dev, sg->length, dir); } -static int __swiotlb_mmap(struct device *dev, - struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size, - unsigned long attrs) +static int __swiotlb_mmap_pfn(struct vm_area_struct *vma, + unsigned long pfn, size_t size) { int ret = -ENXIO; unsigned long nr_vma_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; - unsigned long pfn = dma_to_phys(dev, dma_addr) >> PAGE_SHIFT; unsigned long off = vma->vm_pgoff; - vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, - is_device_dma_coherent(dev)); - - if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) - return ret; - if (off < nr_pages && nr_vma_pages <= (nr_pages - off)) { ret = remap_pfn_range(vma, vma->vm_start, pfn + off, @@ -336,19 +327,43 @@ static int __swiotlb_mmap(struct device *dev, return ret; } -static int __swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt, - void *cpu_addr, dma_addr_t handle, size_t size, - unsigned long attrs) +static int __swiotlb_mmap(struct device *dev, + struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs) +{ + int ret; + unsigned long pfn = dma_to_phys(dev, dma_addr) >> PAGE_SHIFT; + + vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, + is_device_dma_coherent(dev)); + + if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) + return ret; + + return __swiotlb_mmap_pfn(vma, pfn, size); +} + +static int __swiotlb_get_sgtable_page(struct sg_table *sgt, + struct page *page, size_t size) { int ret = sg_alloc_table(sgt, 1, GFP_KERNEL); if (!ret) - sg_set_page(sgt->sgl, phys_to_page(dma_to_phys(dev, handle)), - PAGE_ALIGN(size), 0); + sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0); return ret; } +static int __swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t handle, size_t size, + unsigned long attrs) +{ + struct page *page = phys_to_page(dma_to_phys(dev, handle)); + + return __swiotlb_get_sgtable_page(sgt, page, size); +} + static int __swiotlb_dma_supported(struct device *hwdev, u64 mask) { if (swiotlb) @@ -584,20 +599,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size, */ gfp |= __GFP_ZERO; - if (gfpflags_allow_blocking(gfp)) { - struct page **pages; - pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent); - - pages = iommu_dma_alloc(dev, iosize, gfp, attrs, ioprot, - handle, flush_page); - if (!pages) - return NULL; - - addr = dma_common_pages_remap(pages, size, VM_USERMAP, prot, - __builtin_return_address(0)); - if (!addr) - iommu_dma_free(dev, pages, iosize, handle); - } else { + if (!gfpflags_allow_blocking(gfp)) { struct page *page; /* * In atomic context we can't remap anything, so we'll only @@ -621,6 +623,45 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size, __free_from_pool(addr, size); addr = NULL; } + } else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { + pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent); + struct page *page; + + page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT, + get_order(size), gfp); + if (!page) + return NULL; + + *handle = iommu_dma_map_page(dev, page, 0, iosize, ioprot); + if (iommu_dma_mapping_error(dev, *handle)) { + dma_release_from_contiguous(dev, page, + size >> PAGE_SHIFT); + return NULL; + } + if (!coherent) + __dma_flush_area(page_to_virt(page), iosize); + + addr = dma_common_contiguous_remap(page, size, VM_USERMAP, + prot, + __builtin_return_address(0)); + if (!addr) { + iommu_dma_unmap_page(dev, *handle, iosize, 0, attrs); + dma_release_from_contiguous(dev, page, + size >> PAGE_SHIFT); + } + } else { + pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent); + struct page **pages; + + pages = iommu_dma_alloc(dev, iosize, gfp, attrs, ioprot, + handle, flush_page); + if (!pages) + return NULL; + + addr = dma_common_pages_remap(pages, size, VM_USERMAP, prot, + __builtin_return_address(0)); + if (!addr) + iommu_dma_free(dev, pages, iosize, handle); } return addr; } @@ -632,7 +673,8 @@ static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, size = PAGE_ALIGN(size); /* - * @cpu_addr will be one of 3 things depending on how it was allocated: + * @cpu_addr will be one of 4 things depending on how it was allocated: + * - A remapped array of pages for contiguous allocations. * - A remapped array of pages from iommu_dma_alloc(), for all * non-atomic allocations. * - A non-cacheable alias from the atomic pool, for atomic @@ -644,6 +686,12 @@ static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, if (__in_atomic_pool(cpu_addr, size)) { iommu_dma_unmap_page(dev, handle, iosize, 0, 0); __free_from_pool(cpu_addr, size); + } else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { + struct page *page = vmalloc_to_page(cpu_addr); + + iommu_dma_unmap_page(dev, handle, iosize, 0, attrs); + dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT); + dma_common_free_remap(cpu_addr, size, VM_USERMAP); } else if (is_vmalloc_addr(cpu_addr)){ struct vm_struct *area = find_vm_area(cpu_addr); @@ -670,6 +718,15 @@ static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) return ret; + if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { + /* + * DMA_ATTR_FORCE_CONTIGUOUS allocations are always remapped, + * hence in the vmalloc space. + */ + unsigned long pfn = vmalloc_to_pfn(cpu_addr); + return __swiotlb_mmap_pfn(vma, pfn, size); + } + area = find_vm_area(cpu_addr); if (WARN_ON(!area || !area->pages)) return -ENXIO; @@ -684,6 +741,15 @@ static int __iommu_get_sgtable(struct device *dev, struct sg_table *sgt, unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; struct vm_struct *area = find_vm_area(cpu_addr); + if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { + /* + * DMA_ATTR_FORCE_CONTIGUOUS allocations are always remapped, + * hence in the vmalloc space. + */ + struct page *page = vmalloc_to_page(cpu_addr); + return __swiotlb_get_sgtable_page(sgt, page, size); + } + if (WARN_ON(!area || !area->pages)) return -ENXIO; @@ -977,4 +1043,11 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, dev->archdata.dma_coherent = coherent; __iommu_setup_dma_ops(dev, dma_base, size, iommu); + +#ifdef CONFIG_XEN + if (xen_initial_domain()) { + dev->archdata.dev_dma_ops = dev->dma_ops; + dev->dma_ops = xen_dma_ops; + } +#endif } diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 4bf899fb451b..37b95dff0b07 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -42,7 +42,20 @@ #include <asm/pgtable.h> #include <asm/tlbflush.h> -static const char *fault_name(unsigned int esr); +struct fault_info { + int (*fn)(unsigned long addr, unsigned int esr, + struct pt_regs *regs); + int sig; + int code; + const char *name; +}; + +static const struct fault_info fault_info[]; + +static inline const struct fault_info *esr_to_fault_info(unsigned int esr) +{ + return fault_info + (esr & 63); +} #ifdef CONFIG_KPROBES static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr) @@ -161,12 +174,33 @@ static bool is_el1_instruction_abort(unsigned int esr) return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_CUR; } +static inline bool is_permission_fault(unsigned int esr, struct pt_regs *regs, + unsigned long addr) +{ + unsigned int ec = ESR_ELx_EC(esr); + unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE; + + if (ec != ESR_ELx_EC_DABT_CUR && ec != ESR_ELx_EC_IABT_CUR) + return false; + + if (fsc_type == ESR_ELx_FSC_PERM) + return true; + + if (addr < USER_DS && system_uses_ttbr0_pan()) + return fsc_type == ESR_ELx_FSC_FAULT && + (regs->pstate & PSR_PAN_BIT); + + return false; +} + /* * The kernel tried to access some page that wasn't present. */ static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int esr, struct pt_regs *regs) { + const char *msg; + /* * Are we prepared to handle this kernel fault? * We are almost certainly not prepared to handle instruction faults. @@ -178,9 +212,20 @@ static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr, * No handler, we'll have to terminate things with extreme prejudice. */ bust_spinlocks(1); - pr_alert("Unable to handle kernel %s at virtual address %08lx\n", - (addr < PAGE_SIZE) ? "NULL pointer dereference" : - "paging request", addr); + + if (is_permission_fault(esr, regs, addr)) { + if (esr & ESR_ELx_WNR) + msg = "write to read-only memory"; + else + msg = "read from unreadable memory"; + } else if (addr < PAGE_SIZE) { + msg = "NULL pointer dereference"; + } else { + msg = "paging request"; + } + + pr_alert("Unable to handle kernel %s at virtual address %08lx\n", msg, + addr); show_pte(mm, addr); die("Oops", regs, esr); @@ -197,10 +242,12 @@ static void __do_user_fault(struct task_struct *tsk, unsigned long addr, struct pt_regs *regs) { struct siginfo si; + const struct fault_info *inf; if (unhandled_signal(tsk, sig) && show_unhandled_signals_ratelimited()) { + inf = esr_to_fault_info(esr); pr_info("%s[%d]: unhandled %s (%d) at 0x%08lx, esr 0x%03x\n", - tsk->comm, task_pid_nr(tsk), fault_name(esr), sig, + tsk->comm, task_pid_nr(tsk), inf->name, sig, addr, esr); show_pte(tsk->mm, addr); show_regs(regs); @@ -219,14 +266,16 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re { struct task_struct *tsk = current; struct mm_struct *mm = tsk->active_mm; + const struct fault_info *inf; /* * If we are in kernel mode at this point, we have no context to * handle this fault with. */ - if (user_mode(regs)) - __do_user_fault(tsk, addr, esr, SIGSEGV, SEGV_MAPERR, regs); - else + if (user_mode(regs)) { + inf = esr_to_fault_info(esr); + __do_user_fault(tsk, addr, esr, inf->sig, inf->code, regs); + } else __do_kernel_fault(mm, addr, esr, regs); } @@ -270,21 +319,6 @@ out: return fault; } -static inline bool is_permission_fault(unsigned int esr, struct pt_regs *regs) -{ - unsigned int ec = ESR_ELx_EC(esr); - unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE; - - if (ec != ESR_ELx_EC_DABT_CUR && ec != ESR_ELx_EC_IABT_CUR) - return false; - - if (system_uses_ttbr0_pan()) - return fsc_type == ESR_ELx_FSC_FAULT && - (regs->pstate & PSR_PAN_BIT); - else - return fsc_type == ESR_ELx_FSC_PERM; -} - static bool is_el0_instruction_abort(unsigned int esr) { return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW; @@ -322,7 +356,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, mm_flags |= FAULT_FLAG_WRITE; } - if (addr < USER_DS && is_permission_fault(esr, regs)) { + if (addr < USER_DS && is_permission_fault(esr, regs, addr)) { /* regs->orig_addr_limit may be 0 if we entered from EL0 */ if (regs->orig_addr_limit == KERNEL_DS) die("Accessing user space memory with fs=KERNEL_DS", regs, esr); @@ -488,12 +522,7 @@ static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs) return 1; } -static const struct fault_info { - int (*fn)(unsigned long addr, unsigned int esr, struct pt_regs *regs); - int sig; - int code; - const char *name; -} fault_info[] = { +static const struct fault_info fault_info[] = { { do_bad, SIGBUS, 0, "ttbr address size fault" }, { do_bad, SIGBUS, 0, "level 1 address size fault" }, { do_bad, SIGBUS, 0, "level 2 address size fault" }, @@ -560,19 +589,13 @@ static const struct fault_info { { do_bad, SIGBUS, 0, "unknown 63" }, }; -static const char *fault_name(unsigned int esr) -{ - const struct fault_info *inf = fault_info + (esr & 63); - return inf->name; -} - /* * Dispatch a data abort to the relevant handler. */ asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs) { - const struct fault_info *inf = fault_info + (esr & 63); + const struct fault_info *inf = esr_to_fault_info(esr); struct siginfo info; if (!inf->fn(addr, esr, regs)) diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index 554a2558c12e..21a8d828cbf4 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -22,7 +22,7 @@ #include <linux/pagemap.h> #include <asm/cacheflush.h> -#include <asm/cachetype.h> +#include <asm/cache.h> #include <asm/tlbflush.h> void sync_icache_aliases(void *kaddr, unsigned long len) @@ -65,8 +65,6 @@ void __sync_icache_dcache(pte_t pte, unsigned long addr) if (!test_and_set_bit(PG_dcache_clean, &page->flags)) sync_icache_aliases(page_address(page), PAGE_SIZE << compound_order(page)); - else if (icache_is_aivivt()) - __flush_icache_all(); } /* diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index e25584d72396..7514a000e361 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -294,10 +294,6 @@ static __init int setup_hugepagesz(char *opt) hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); } else if (ps == PUD_SIZE) { hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); - } else if (ps == (PAGE_SIZE * CONT_PTES)) { - hugetlb_add_hstate(CONT_PTE_SHIFT); - } else if (ps == (PMD_SIZE * CONT_PMDS)) { - hugetlb_add_hstate((PMD_SHIFT + CONT_PMD_SHIFT) - PAGE_SHIFT); } else { hugetlb_bad_size(); pr_err("hugepagesz: Unsupported page size %lu K\n", ps >> 10); @@ -306,13 +302,3 @@ static __init int setup_hugepagesz(char *opt) return 1; } __setup("hugepagesz=", setup_hugepagesz); - -#ifdef CONFIG_ARM64_64K_PAGES -static __init int add_default_hugepagesz(void) -{ - if (size_to_hstate(CONT_PTES * PAGE_SIZE) == NULL) - hugetlb_add_hstate(CONT_PTE_SHIFT); - return 0; -} -arch_initcall(add_default_hugepagesz); -#endif diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index e19e06593e37..5960bef0170d 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -30,6 +30,7 @@ #include <linux/gfp.h> #include <linux/memblock.h> #include <linux/sort.h> +#include <linux/of.h> #include <linux/of_fdt.h> #include <linux/dma-mapping.h> #include <linux/dma-contiguous.h> @@ -37,6 +38,8 @@ #include <linux/swiotlb.h> #include <linux/vmalloc.h> #include <linux/mm.h> +#include <linux/kexec.h> +#include <linux/crash_dump.h> #include <asm/boot.h> #include <asm/fixmap.h> @@ -77,6 +80,142 @@ static int __init early_initrd(char *p) early_param("initrd", early_initrd); #endif +#ifdef CONFIG_KEXEC_CORE +/* + * reserve_crashkernel() - reserves memory for crash kernel + * + * This function reserves memory area given in "crashkernel=" kernel command + * line parameter. The memory reserved is used by dump capture kernel when + * primary kernel is crashing. + */ +static void __init reserve_crashkernel(void) +{ + unsigned long long crash_base, crash_size; + int ret; + + ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), + &crash_size, &crash_base); + /* no crashkernel= or invalid value specified */ + if (ret || !crash_size) + return; + + crash_size = PAGE_ALIGN(crash_size); + + if (crash_base == 0) { + /* Current arm64 boot protocol requires 2MB alignment */ + crash_base = memblock_find_in_range(0, ARCH_LOW_ADDRESS_LIMIT, + crash_size, SZ_2M); + if (crash_base == 0) { + pr_warn("cannot allocate crashkernel (size:0x%llx)\n", + crash_size); + return; + } + } else { + /* User specifies base address explicitly. */ + if (!memblock_is_region_memory(crash_base, crash_size)) { + pr_warn("cannot reserve crashkernel: region is not memory\n"); + return; + } + + if (memblock_is_region_reserved(crash_base, crash_size)) { + pr_warn("cannot reserve crashkernel: region overlaps reserved memory\n"); + return; + } + + if (!IS_ALIGNED(crash_base, SZ_2M)) { + pr_warn("cannot reserve crashkernel: base address is not 2MB aligned\n"); + return; + } + } + memblock_reserve(crash_base, crash_size); + + pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n", + crash_base, crash_base + crash_size, crash_size >> 20); + + crashk_res.start = crash_base; + crashk_res.end = crash_base + crash_size - 1; +} + +static void __init kexec_reserve_crashkres_pages(void) +{ +#ifdef CONFIG_HIBERNATION + phys_addr_t addr; + struct page *page; + + if (!crashk_res.end) + return; + + /* + * To reduce the size of hibernation image, all the pages are + * marked as Reserved initially. + */ + for (addr = crashk_res.start; addr < (crashk_res.end + 1); + addr += PAGE_SIZE) { + page = phys_to_page(addr); + SetPageReserved(page); + } +#endif +} +#else +static void __init reserve_crashkernel(void) +{ +} + +static void __init kexec_reserve_crashkres_pages(void) +{ +} +#endif /* CONFIG_KEXEC_CORE */ + +#ifdef CONFIG_CRASH_DUMP +static int __init early_init_dt_scan_elfcorehdr(unsigned long node, + const char *uname, int depth, void *data) +{ + const __be32 *reg; + int len; + + if (depth != 1 || strcmp(uname, "chosen") != 0) + return 0; + + reg = of_get_flat_dt_prop(node, "linux,elfcorehdr", &len); + if (!reg || (len < (dt_root_addr_cells + dt_root_size_cells))) + return 1; + + elfcorehdr_addr = dt_mem_next_cell(dt_root_addr_cells, ®); + elfcorehdr_size = dt_mem_next_cell(dt_root_size_cells, ®); + + return 1; +} + +/* + * reserve_elfcorehdr() - reserves memory for elf core header + * + * This function reserves the memory occupied by an elf core header + * described in the device tree. This region contains all the + * information about primary kernel's core image and is used by a dump + * capture kernel to access the system memory on primary kernel. + */ +static void __init reserve_elfcorehdr(void) +{ + of_scan_flat_dt(early_init_dt_scan_elfcorehdr, NULL); + + if (!elfcorehdr_size) + return; + + if (memblock_is_region_reserved(elfcorehdr_addr, elfcorehdr_size)) { + pr_warn("elfcorehdr is overlapped\n"); + return; + } + + memblock_reserve(elfcorehdr_addr, elfcorehdr_size); + + pr_info("Reserving %lldKB of memory at 0x%llx for elfcorehdr\n", + elfcorehdr_size >> 10, elfcorehdr_addr); +} +#else +static void __init reserve_elfcorehdr(void) +{ +} +#endif /* CONFIG_CRASH_DUMP */ /* * Return the maximum physical address for ZONE_DMA (DMA_BIT_MASK(32)). It * currently assumes that for memory starting above 4G, 32-bit devices will @@ -188,10 +327,45 @@ static int __init early_mem(char *p) } early_param("mem", early_mem); +static int __init early_init_dt_scan_usablemem(unsigned long node, + const char *uname, int depth, void *data) +{ + struct memblock_region *usablemem = data; + const __be32 *reg; + int len; + + if (depth != 1 || strcmp(uname, "chosen") != 0) + return 0; + + reg = of_get_flat_dt_prop(node, "linux,usable-memory-range", &len); + if (!reg || (len < (dt_root_addr_cells + dt_root_size_cells))) + return 1; + + usablemem->base = dt_mem_next_cell(dt_root_addr_cells, ®); + usablemem->size = dt_mem_next_cell(dt_root_size_cells, ®); + + return 1; +} + +static void __init fdt_enforce_memory_region(void) +{ + struct memblock_region reg = { + .size = 0, + }; + + of_scan_flat_dt(early_init_dt_scan_usablemem, ®); + + if (reg.size) + memblock_cap_memory_range(reg.base, reg.size); +} + void __init arm64_memblock_init(void) { const s64 linear_region_size = -(s64)PAGE_OFFSET; + /* Handle linux,usable-memory-range property */ + fdt_enforce_memory_region(); + /* * Ensure that the linear region takes up exactly half of the kernel * virtual address space. This way, we can distinguish a linear address @@ -297,6 +471,11 @@ void __init arm64_memblock_init(void) arm64_dma_phys_limit = max_zone_dma_phys(); else arm64_dma_phys_limit = PHYS_MASK + 1; + + reserve_crashkernel(); + + reserve_elfcorehdr(); + dma_contiguous_reserve(arm64_dma_phys_limit); memblock_allow_resize(); @@ -416,6 +595,8 @@ void __init mem_init(void) /* this will put all unused low memory onto the freelists */ free_all_bootmem(); + kexec_reserve_crashkres_pages(); + mem_init_print_info(NULL); #define MLK(b, t) b, t, ((t) - (b)) >> 10 diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index 55d1e9205543..687a358a3733 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -162,7 +162,7 @@ void __init kasan_init(void) clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); vmemmap_populate(kimg_shadow_start, kimg_shadow_end, - pfn_to_nid(virt_to_pfn(_text))); + pfn_to_nid(virt_to_pfn(lm_alias(_text)))); /* * vmemmap_populate() has populated the shadow region that covers the diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index d28dbcf596b6..0c429ec6fde8 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -22,6 +22,8 @@ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/init.h> +#include <linux/ioport.h> +#include <linux/kexec.h> #include <linux/libfdt.h> #include <linux/mman.h> #include <linux/nodemask.h> @@ -43,6 +45,9 @@ #include <asm/mmu_context.h> #include <asm/ptdump.h> +#define NO_BLOCK_MAPPINGS BIT(0) +#define NO_CONT_MAPPINGS BIT(1) + u64 idmap_t0sz = TCR_T0SZ(VA_BITS); u64 kimage_voffset __ro_after_init; @@ -103,33 +108,27 @@ static bool pgattr_change_is_safe(u64 old, u64 new) */ static const pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE; - return old == 0 || new == 0 || ((old ^ new) & ~mask) == 0; + /* creating or taking down mappings is always safe */ + if (old == 0 || new == 0) + return true; + + /* live contiguous mappings may not be manipulated at all */ + if ((old | new) & PTE_CONT) + return false; + + return ((old ^ new) & ~mask) == 0; } -static void alloc_init_pte(pmd_t *pmd, unsigned long addr, - unsigned long end, unsigned long pfn, - pgprot_t prot, - phys_addr_t (*pgtable_alloc)(void)) +static void init_pte(pmd_t *pmd, unsigned long addr, unsigned long end, + phys_addr_t phys, pgprot_t prot) { pte_t *pte; - BUG_ON(pmd_sect(*pmd)); - if (pmd_none(*pmd)) { - phys_addr_t pte_phys; - BUG_ON(!pgtable_alloc); - pte_phys = pgtable_alloc(); - pte = pte_set_fixmap(pte_phys); - __pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE); - pte_clear_fixmap(); - } - BUG_ON(pmd_bad(*pmd)); - pte = pte_set_fixmap_offset(pmd, addr); do { pte_t old_pte = *pte; - set_pte(pte, pfn_pte(pfn, prot)); - pfn++; + set_pte(pte, pfn_pte(__phys_to_pfn(phys), prot)); /* * After the PTE entry has been populated once, we @@ -137,32 +136,51 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr, */ BUG_ON(!pgattr_change_is_safe(pte_val(old_pte), pte_val(*pte))); + phys += PAGE_SIZE; } while (pte++, addr += PAGE_SIZE, addr != end); pte_clear_fixmap(); } -static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, - phys_addr_t phys, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(void), - bool page_mappings_only) +static void alloc_init_cont_pte(pmd_t *pmd, unsigned long addr, + unsigned long end, phys_addr_t phys, + pgprot_t prot, + phys_addr_t (*pgtable_alloc)(void), + int flags) { - pmd_t *pmd; unsigned long next; - /* - * Check for initial section mappings in the pgd/pud and remove them. - */ - BUG_ON(pud_sect(*pud)); - if (pud_none(*pud)) { - phys_addr_t pmd_phys; + BUG_ON(pmd_sect(*pmd)); + if (pmd_none(*pmd)) { + phys_addr_t pte_phys; BUG_ON(!pgtable_alloc); - pmd_phys = pgtable_alloc(); - pmd = pmd_set_fixmap(pmd_phys); - __pud_populate(pud, pmd_phys, PUD_TYPE_TABLE); - pmd_clear_fixmap(); + pte_phys = pgtable_alloc(); + __pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE); } - BUG_ON(pud_bad(*pud)); + BUG_ON(pmd_bad(*pmd)); + + do { + pgprot_t __prot = prot; + + next = pte_cont_addr_end(addr, end); + + /* use a contiguous mapping if the range is suitably aligned */ + if ((((addr | next | phys) & ~CONT_PTE_MASK) == 0) && + (flags & NO_CONT_MAPPINGS) == 0) + __prot = __pgprot(pgprot_val(prot) | PTE_CONT); + + init_pte(pmd, addr, next, phys, __prot); + + phys += next - addr; + } while (addr = next, addr != end); +} + +static void init_pmd(pud_t *pud, unsigned long addr, unsigned long end, + phys_addr_t phys, pgprot_t prot, + phys_addr_t (*pgtable_alloc)(void), int flags) +{ + unsigned long next; + pmd_t *pmd; pmd = pmd_set_fixmap_offset(pud, addr); do { @@ -172,7 +190,7 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, /* try section mapping first */ if (((addr | next | phys) & ~SECTION_MASK) == 0 && - !page_mappings_only) { + (flags & NO_BLOCK_MAPPINGS) == 0) { pmd_set_huge(pmd, phys, prot); /* @@ -182,8 +200,8 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, BUG_ON(!pgattr_change_is_safe(pmd_val(old_pmd), pmd_val(*pmd))); } else { - alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys), - prot, pgtable_alloc); + alloc_init_cont_pte(pmd, addr, next, phys, prot, + pgtable_alloc, flags); BUG_ON(pmd_val(old_pmd) != 0 && pmd_val(old_pmd) != pmd_val(*pmd)); @@ -194,6 +212,41 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, pmd_clear_fixmap(); } +static void alloc_init_cont_pmd(pud_t *pud, unsigned long addr, + unsigned long end, phys_addr_t phys, + pgprot_t prot, + phys_addr_t (*pgtable_alloc)(void), int flags) +{ + unsigned long next; + + /* + * Check for initial section mappings in the pgd/pud. + */ + BUG_ON(pud_sect(*pud)); + if (pud_none(*pud)) { + phys_addr_t pmd_phys; + BUG_ON(!pgtable_alloc); + pmd_phys = pgtable_alloc(); + __pud_populate(pud, pmd_phys, PUD_TYPE_TABLE); + } + BUG_ON(pud_bad(*pud)); + + do { + pgprot_t __prot = prot; + + next = pmd_cont_addr_end(addr, end); + + /* use a contiguous mapping if the range is suitably aligned */ + if ((((addr | next | phys) & ~CONT_PMD_MASK) == 0) && + (flags & NO_CONT_MAPPINGS) == 0) + __prot = __pgprot(pgprot_val(prot) | PTE_CONT); + + init_pmd(pud, addr, next, phys, __prot, pgtable_alloc, flags); + + phys += next - addr; + } while (addr = next, addr != end); +} + static inline bool use_1G_block(unsigned long addr, unsigned long next, unsigned long phys) { @@ -209,7 +262,7 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next, static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, phys_addr_t (*pgtable_alloc)(void), - bool page_mappings_only) + int flags) { pud_t *pud; unsigned long next; @@ -231,7 +284,8 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, /* * For 4K granule only, attempt to put down a 1GB block */ - if (use_1G_block(addr, next, phys) && !page_mappings_only) { + if (use_1G_block(addr, next, phys) && + (flags & NO_BLOCK_MAPPINGS) == 0) { pud_set_huge(pud, phys, prot); /* @@ -241,8 +295,8 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, BUG_ON(!pgattr_change_is_safe(pud_val(old_pud), pud_val(*pud))); } else { - alloc_init_pmd(pud, addr, next, phys, prot, - pgtable_alloc, page_mappings_only); + alloc_init_cont_pmd(pud, addr, next, phys, prot, + pgtable_alloc, flags); BUG_ON(pud_val(old_pud) != 0 && pud_val(old_pud) != pud_val(*pud)); @@ -257,7 +311,7 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot, phys_addr_t (*pgtable_alloc)(void), - bool page_mappings_only) + int flags) { unsigned long addr, length, end, next; pgd_t *pgd = pgd_offset_raw(pgdir, virt); @@ -277,7 +331,7 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, do { next = pgd_addr_end(addr, end); alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc, - page_mappings_only); + flags); phys += next - addr; } while (pgd++, addr = next, addr != end); } @@ -306,82 +360,80 @@ static void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt, &phys, virt); return; } - __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, false); + __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, + NO_CONT_MAPPINGS); } void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot, bool page_mappings_only) { + int flags = 0; + BUG_ON(mm == &init_mm); + if (page_mappings_only) + flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; + __create_pgd_mapping(mm->pgd, phys, virt, size, prot, - pgd_pgtable_alloc, page_mappings_only); + pgd_pgtable_alloc, flags); } -static void create_mapping_late(phys_addr_t phys, unsigned long virt, - phys_addr_t size, pgprot_t prot) +static void update_mapping_prot(phys_addr_t phys, unsigned long virt, + phys_addr_t size, pgprot_t prot) { if (virt < VMALLOC_START) { - pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n", + pr_warn("BUG: not updating mapping for %pa at 0x%016lx - outside kernel range\n", &phys, virt); return; } - __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, - NULL, debug_pagealloc_enabled()); + __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, + NO_CONT_MAPPINGS); + + /* flush the TLBs after updating live kernel mappings */ + flush_tlb_kernel_range(virt, virt + size); } -static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end) +static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, + phys_addr_t end, pgprot_t prot, int flags) { - phys_addr_t kernel_start = __pa_symbol(_text); - phys_addr_t kernel_end = __pa_symbol(__init_begin); - - /* - * Take care not to create a writable alias for the - * read-only text and rodata sections of the kernel image. - */ - - /* No overlap with the kernel text/rodata */ - if (end < kernel_start || start >= kernel_end) { - __create_pgd_mapping(pgd, start, __phys_to_virt(start), - end - start, PAGE_KERNEL, - early_pgtable_alloc, - debug_pagealloc_enabled()); - return; - } - - /* - * This block overlaps the kernel text/rodata mappings. - * Map the portion(s) which don't overlap. - */ - if (start < kernel_start) - __create_pgd_mapping(pgd, start, - __phys_to_virt(start), - kernel_start - start, PAGE_KERNEL, - early_pgtable_alloc, - debug_pagealloc_enabled()); - if (kernel_end < end) - __create_pgd_mapping(pgd, kernel_end, - __phys_to_virt(kernel_end), - end - kernel_end, PAGE_KERNEL, - early_pgtable_alloc, - debug_pagealloc_enabled()); + __create_pgd_mapping(pgd, start, __phys_to_virt(start), end - start, + prot, early_pgtable_alloc, flags); +} +void __init mark_linear_text_alias_ro(void) +{ /* - * Map the linear alias of the [_text, __init_begin) interval as - * read-only/non-executable. This makes the contents of the - * region accessible to subsystems such as hibernate, but - * protects it from inadvertent modification or execution. + * Remove the write permissions from the linear alias of .text/.rodata */ - __create_pgd_mapping(pgd, kernel_start, __phys_to_virt(kernel_start), - kernel_end - kernel_start, PAGE_KERNEL_RO, - early_pgtable_alloc, debug_pagealloc_enabled()); + update_mapping_prot(__pa_symbol(_text), (unsigned long)lm_alias(_text), + (unsigned long)__init_begin - (unsigned long)_text, + PAGE_KERNEL_RO); } static void __init map_mem(pgd_t *pgd) { + phys_addr_t kernel_start = __pa_symbol(_text); + phys_addr_t kernel_end = __pa_symbol(__init_begin); struct memblock_region *reg; + int flags = 0; + + if (debug_pagealloc_enabled()) + flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; + + /* + * Take care not to create a writable alias for the + * read-only text and rodata sections of the kernel image. + * So temporarily mark them as NOMAP to skip mappings in + * the following for-loop + */ + memblock_mark_nomap(kernel_start, kernel_end - kernel_start); +#ifdef CONFIG_KEXEC_CORE + if (crashk_res.end) + memblock_mark_nomap(crashk_res.start, + resource_size(&crashk_res)); +#endif /* map all the memory banks */ for_each_memblock(memory, reg) { @@ -393,33 +445,57 @@ static void __init map_mem(pgd_t *pgd) if (memblock_is_nomap(reg)) continue; - __map_memblock(pgd, start, end); + __map_memblock(pgd, start, end, PAGE_KERNEL, flags); + } + + /* + * Map the linear alias of the [_text, __init_begin) interval + * as non-executable now, and remove the write permission in + * mark_linear_text_alias_ro() below (which will be called after + * alternative patching has completed). This makes the contents + * of the region accessible to subsystems such as hibernate, + * but protects it from inadvertent modification or execution. + * Note that contiguous mappings cannot be remapped in this way, + * so we should avoid them here. + */ + __map_memblock(pgd, kernel_start, kernel_end, + PAGE_KERNEL, NO_CONT_MAPPINGS); + memblock_clear_nomap(kernel_start, kernel_end - kernel_start); + +#ifdef CONFIG_KEXEC_CORE + /* + * Use page-level mappings here so that we can shrink the region + * in page granularity and put back unused memory to buddy system + * through /sys/kernel/kexec_crash_size interface. + */ + if (crashk_res.end) { + __map_memblock(pgd, crashk_res.start, crashk_res.end + 1, + PAGE_KERNEL, + NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS); + memblock_clear_nomap(crashk_res.start, + resource_size(&crashk_res)); } +#endif } void mark_rodata_ro(void) { unsigned long section_size; - section_size = (unsigned long)_etext - (unsigned long)_text; - create_mapping_late(__pa_symbol(_text), (unsigned long)_text, - section_size, PAGE_KERNEL_ROX); /* * mark .rodata as read only. Use __init_begin rather than __end_rodata * to cover NOTES and EXCEPTION_TABLE. */ section_size = (unsigned long)__init_begin - (unsigned long)__start_rodata; - create_mapping_late(__pa_symbol(__start_rodata), (unsigned long)__start_rodata, + update_mapping_prot(__pa_symbol(__start_rodata), (unsigned long)__start_rodata, section_size, PAGE_KERNEL_RO); - /* flush the TLBs after updating live kernel mappings */ - flush_tlb_all(); - debug_checkwx(); } static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end, - pgprot_t prot, struct vm_struct *vma) + pgprot_t prot, struct vm_struct *vma, + int flags) { phys_addr_t pa_start = __pa_symbol(va_start); unsigned long size = va_end - va_start; @@ -428,7 +504,7 @@ static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end, BUG_ON(!PAGE_ALIGNED(size)); __create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot, - early_pgtable_alloc, debug_pagealloc_enabled()); + early_pgtable_alloc, flags); vma->addr = va_start; vma->phys_addr = pa_start; @@ -439,18 +515,39 @@ static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end, vm_area_add_early(vma); } +static int __init parse_rodata(char *arg) +{ + return strtobool(arg, &rodata_enabled); +} +early_param("rodata", parse_rodata); + /* * Create fine-grained mappings for the kernel. */ static void __init map_kernel(pgd_t *pgd) { - static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_init, vmlinux_data; + static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_inittext, + vmlinux_initdata, vmlinux_data; - map_kernel_segment(pgd, _text, _etext, PAGE_KERNEL_EXEC, &vmlinux_text); - map_kernel_segment(pgd, __start_rodata, __init_begin, PAGE_KERNEL, &vmlinux_rodata); - map_kernel_segment(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC, - &vmlinux_init); - map_kernel_segment(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data); + /* + * External debuggers may need to write directly to the text + * mapping to install SW breakpoints. Allow this (only) when + * explicitly requested with rodata=off. + */ + pgprot_t text_prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC; + + /* + * Only rodata will be remapped with different permissions later on, + * all other segments are allowed to use contiguous mappings. + */ + map_kernel_segment(pgd, _text, _etext, text_prot, &vmlinux_text, 0); + map_kernel_segment(pgd, __start_rodata, __inittext_begin, PAGE_KERNEL, + &vmlinux_rodata, NO_CONT_MAPPINGS); + map_kernel_segment(pgd, __inittext_begin, __inittext_end, text_prot, + &vmlinux_inittext, 0); + map_kernel_segment(pgd, __initdata_begin, __initdata_end, PAGE_KERNEL, + &vmlinux_initdata, 0); + map_kernel_segment(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data, 0); if (!pgd_val(*pgd_offset_raw(pgd, FIXADDR_START))) { /* diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index 8def55e7249b..a682a0a2a0fa 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -17,6 +17,7 @@ #include <linux/vmalloc.h> #include <asm/pgtable.h> +#include <asm/set_memory.h> #include <asm/tlbflush.h> struct page_change_data { @@ -125,20 +126,23 @@ int set_memory_x(unsigned long addr, int numpages) } EXPORT_SYMBOL_GPL(set_memory_x); -#ifdef CONFIG_DEBUG_PAGEALLOC -void __kernel_map_pages(struct page *page, int numpages, int enable) +int set_memory_valid(unsigned long addr, int numpages, int enable) { - unsigned long addr = (unsigned long) page_address(page); - if (enable) - __change_memory_common(addr, PAGE_SIZE * numpages, + return __change_memory_common(addr, PAGE_SIZE * numpages, __pgprot(PTE_VALID), __pgprot(0)); else - __change_memory_common(addr, PAGE_SIZE * numpages, + return __change_memory_common(addr, PAGE_SIZE * numpages, __pgprot(0), __pgprot(PTE_VALID)); } + +#ifdef CONFIG_DEBUG_PAGEALLOC +void __kernel_map_pages(struct page *page, int numpages, int enable) +{ + set_memory_valid((unsigned long)page_address(page), numpages, enable); +} #ifdef CONFIG_HIBERNATION /* * When built with CONFIG_DEBUG_PAGEALLOC and CONFIG_HIBERNATION, this function diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h index 7c16e547ccb2..b02a9268dfbf 100644 --- a/arch/arm64/net/bpf_jit.h +++ b/arch/arm64/net/bpf_jit.h @@ -83,6 +83,25 @@ /* Rt = Rn[0]; Rt2 = Rn[8]; Rn += 16; */ #define A64_POP(Rt, Rt2, Rn) A64_LS_PAIR(Rt, Rt2, Rn, 16, LOAD, POST_INDEX) +/* Load/store exclusive */ +#define A64_SIZE(sf) \ + ((sf) ? AARCH64_INSN_SIZE_64 : AARCH64_INSN_SIZE_32) +#define A64_LSX(sf, Rt, Rn, Rs, type) \ + aarch64_insn_gen_load_store_ex(Rt, Rn, Rs, A64_SIZE(sf), \ + AARCH64_INSN_LDST_##type) +/* Rt = [Rn]; (atomic) */ +#define A64_LDXR(sf, Rt, Rn) \ + A64_LSX(sf, Rt, Rn, A64_ZR, LOAD_EX) +/* [Rn] = Rt; (atomic) Rs = [state] */ +#define A64_STXR(sf, Rt, Rn, Rs) \ + A64_LSX(sf, Rt, Rn, Rs, STORE_EX) + +/* Prefetch */ +#define A64_PRFM(Rn, type, target, policy) \ + aarch64_insn_gen_prefetch(Rn, AARCH64_INSN_PRFM_TYPE_##type, \ + AARCH64_INSN_PRFM_TARGET_##target, \ + AARCH64_INSN_PRFM_POLICY_##policy) + /* Add/subtract (immediate) */ #define A64_ADDSUB_IMM(sf, Rd, Rn, imm12, type) \ aarch64_insn_gen_add_sub_imm(Rd, Rn, imm12, \ diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index a785554916c0..c6e53580aefe 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -27,6 +27,7 @@ #include <asm/byteorder.h> #include <asm/cacheflush.h> #include <asm/debug-monitors.h> +#include <asm/set_memory.h> #include "bpf_jit.h" @@ -321,6 +322,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) const s32 imm = insn->imm; const int i = insn - ctx->prog->insnsi; const bool is64 = BPF_CLASS(code) == BPF_ALU64; + const bool isdw = BPF_SIZE(code) == BPF_DW; u8 jmp_cond; s32 jmp_offset; @@ -604,15 +606,6 @@ emit_cond_jmp: const struct bpf_insn insn1 = insn[1]; u64 imm64; - if (insn1.code != 0 || insn1.src_reg != 0 || - insn1.dst_reg != 0 || insn1.off != 0) { - /* Note: verifier in BPF core must catch invalid - * instructions. - */ - pr_err_once("Invalid BPF_LD_IMM64 instruction\n"); - return -EINVAL; - } - imm64 = (u64)insn1.imm << 32 | (u32)imm; emit_a64_mov_i64(dst, imm64, ctx); @@ -690,7 +683,16 @@ emit_cond_jmp: case BPF_STX | BPF_XADD | BPF_W: /* STX XADD: lock *(u64 *)(dst + off) += src */ case BPF_STX | BPF_XADD | BPF_DW: - goto notyet; + emit_a64_mov_i(1, tmp, off, ctx); + emit(A64_ADD(1, tmp, tmp, dst), ctx); + emit(A64_PRFM(tmp, PST, L1, STRM), ctx); + emit(A64_LDXR(isdw, tmp2, tmp), ctx); + emit(A64_ADD(isdw, tmp2, tmp2, src), ctx); + emit(A64_STXR(isdw, tmp2, tmp, tmp2), ctx); + jmp_offset = -3; + check_imm19(jmp_offset); + emit(A64_CBNZ(0, tmp2, jmp_offset), ctx); + break; /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */ case BPF_LD | BPF_ABS | BPF_W: @@ -757,10 +759,6 @@ emit_cond_jmp: } break; } -notyet: - pr_info_once("*** NOT YET: opcode %02x ***\n", code); - return -EFAULT; - default: pr_err_once("unknown opcode %02x\n", code); return -EINVAL; @@ -779,14 +777,14 @@ static int build_body(struct jit_ctx *ctx) int ret; ret = build_insn(insn, ctx); - - if (ctx->image == NULL) - ctx->offset[i] = ctx->idx; - if (ret > 0) { i++; + if (ctx->image == NULL) + ctx->offset[i] = ctx->idx; continue; } + if (ctx->image == NULL) + ctx->offset[i] = ctx->idx; if (ret) return ret; } |