diff options
Diffstat (limited to 'arch/arm64')
30 files changed, 268 insertions, 175 deletions
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index b35788c909f1..b481b4a7c011 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -83,9 +83,6 @@ endif ifeq ($(CONFIG_ARM64_MODULE_PLTS),y) KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/arm64/kernel/module.lds -ifeq ($(CONFIG_DYNAMIC_FTRACE),y) -KBUILD_LDFLAGS_MODULE += $(objtree)/arch/arm64/kernel/ftrace-mod.o -endif endif # Default value diff --git a/arch/arm64/boot/dts/Makefile b/arch/arm64/boot/dts/Makefile index d7c22d51bc50..4aa50b9b26bc 100644 --- a/arch/arm64/boot/dts/Makefile +++ b/arch/arm64/boot/dts/Makefile @@ -12,6 +12,7 @@ subdir-y += cavium subdir-y += exynos subdir-y += freescale subdir-y += hisilicon +subdir-y += lg subdir-y += marvell subdir-y += mediatek subdir-y += nvidia @@ -22,5 +23,4 @@ subdir-y += rockchip subdir-y += socionext subdir-y += sprd subdir-y += xilinx -subdir-y += lg subdir-y += zte diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi index ead895a4e9a5..1fb8b9d6cb4e 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi @@ -753,12 +753,12 @@ &uart_B { clocks = <&xtal>, <&clkc CLKID_UART1>, <&xtal>; - clock-names = "xtal", "core", "baud"; + clock-names = "xtal", "pclk", "baud"; }; &uart_C { clocks = <&xtal>, <&clkc CLKID_UART2>, <&xtal>; - clock-names = "xtal", "core", "baud"; + clock-names = "xtal", "pclk", "baud"; }; &vpu { diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi index 8ed981f59e5a..6524b89e7115 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi @@ -688,7 +688,7 @@ &uart_A { clocks = <&xtal>, <&clkc CLKID_UART0>, <&xtal>; - clock-names = "xtal", "core", "baud"; + clock-names = "xtal", "pclk", "baud"; }; &uart_AO { @@ -703,12 +703,12 @@ &uart_B { clocks = <&xtal>, <&clkc CLKID_UART1>, <&xtal>; - clock-names = "xtal", "core", "baud"; + clock-names = "xtal", "pclk", "baud"; }; &uart_C { clocks = <&xtal>, <&clkc CLKID_UART2>, <&xtal>; - clock-names = "xtal", "core", "baud"; + clock-names = "xtal", "pclk", "baud"; }; &vpu { diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld11-ref.dts b/arch/arm64/boot/dts/socionext/uniphier-ld11-ref.dts index dd7193acc7df..6bdefb26b329 100644 --- a/arch/arm64/boot/dts/socionext/uniphier-ld11-ref.dts +++ b/arch/arm64/boot/dts/socionext/uniphier-ld11-ref.dts @@ -40,7 +40,6 @@ }; ðsc { - interrupt-parent = <&gpio>; interrupts = <0 8>; }; diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts b/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts index d99e3731358c..254d6795c67e 100644 --- a/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts +++ b/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts @@ -40,7 +40,6 @@ }; ðsc { - interrupt-parent = <&gpio>; interrupts = <0 8>; }; diff --git a/arch/arm64/boot/dts/socionext/uniphier-pxs3-ref.dts b/arch/arm64/boot/dts/socionext/uniphier-pxs3-ref.dts index 864feeb35180..f9f06fcfb94a 100644 --- a/arch/arm64/boot/dts/socionext/uniphier-pxs3-ref.dts +++ b/arch/arm64/boot/dts/socionext/uniphier-pxs3-ref.dts @@ -38,8 +38,7 @@ }; ðsc { - interrupt-parent = <&gpio>; - interrupts = <0 8>; + interrupts = <4 8>; }; &serial0 { diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 76d1cc85d5b1..955130762a3c 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -38,7 +38,7 @@ * * See Documentation/cachetlb.txt for more information. Please note that * the implementation assumes non-aliasing VIPT D-cache and (aliasing) - * VIPT or ASID-tagged VIVT I-cache. + * VIPT I-cache. * * flush_cache_mm(mm) * diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 650344d01124..c4cd5081d78b 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -132,11 +132,9 @@ static inline void efi_set_pgd(struct mm_struct *mm) * Defer the switch to the current thread's TTBR0_EL1 * until uaccess_enable(). Restore the current * thread's saved ttbr0 corresponding to its active_mm - * (if different from init_mm). */ cpu_set_reserved_ttbr0(); - if (current->active_mm != &init_mm) - update_saved_ttbr0(current, current->active_mm); + update_saved_ttbr0(current, current->active_mm); } } } diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 7f069ff37f06..715d395ef45b 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -170,8 +170,7 @@ #define VTCR_EL2_FLAGS (VTCR_EL2_COMMON_BITS | VTCR_EL2_TGRAN_FLAGS) #define VTTBR_X (VTTBR_X_TGRAN_MAGIC - VTCR_EL2_T0SZ_IPA) -#define VTTBR_BADDR_SHIFT (VTTBR_X - 1) -#define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) +#define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_X) #define VTTBR_VMID_SHIFT (UL(48)) #define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 674912d7a571..ea6cb5b24258 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -370,6 +370,7 @@ void kvm_arm_init_debug(void); void kvm_arm_setup_debug(struct kvm_vcpu *vcpu); void kvm_arm_clear_debug(struct kvm_vcpu *vcpu); void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu); +bool kvm_arm_handle_step_debug(struct kvm_vcpu *vcpu, struct kvm_run *run); int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 3257895a9b5e..9d155fa9a507 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -156,29 +156,21 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu); #define init_new_context(tsk,mm) ({ atomic64_set(&(mm)->context.id, 0); 0; }) -/* - * This is called when "tsk" is about to enter lazy TLB mode. - * - * mm: describes the currently active mm context - * tsk: task which is entering lazy tlb - * cpu: cpu number which is entering lazy tlb - * - * tsk->mm will be NULL - */ -static inline void -enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) -{ -} - #ifdef CONFIG_ARM64_SW_TTBR0_PAN static inline void update_saved_ttbr0(struct task_struct *tsk, struct mm_struct *mm) { - if (system_uses_ttbr0_pan()) { - BUG_ON(mm->pgd == swapper_pg_dir); - task_thread_info(tsk)->ttbr0 = - virt_to_phys(mm->pgd) | ASID(mm) << 48; - } + u64 ttbr; + + if (!system_uses_ttbr0_pan()) + return; + + if (mm == &init_mm) + ttbr = __pa_symbol(empty_zero_page); + else + ttbr = virt_to_phys(mm->pgd) | ASID(mm) << 48; + + task_thread_info(tsk)->ttbr0 = ttbr; } #else static inline void update_saved_ttbr0(struct task_struct *tsk, @@ -187,6 +179,16 @@ static inline void update_saved_ttbr0(struct task_struct *tsk, } #endif +static inline void +enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) +{ + /* + * We don't actually care about the ttbr0 mapping, so point it at the + * zero page. + */ + update_saved_ttbr0(tsk, &init_mm); +} + static inline void __switch_mm(struct mm_struct *next) { unsigned int cpu = smp_processor_id(); @@ -214,11 +216,9 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, * Update the saved TTBR0_EL1 of the scheduled-in task as the previous * value may have not been initialised yet (activate_mm caller) or the * ASID has changed since the last run (following the context switch - * of another thread of the same process). Avoid setting the reserved - * TTBR0_EL1 to swapper_pg_dir (init_mm; e.g. via idle_task_exit). + * of another thread of the same process). */ - if (next != &init_mm) - update_saved_ttbr0(tsk, next); + update_saved_ttbr0(tsk, next); } #define deactivate_mm(tsk,mm) do { } while (0) diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index 19bd97671bb8..4f766178fa6f 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -32,7 +32,7 @@ struct mod_arch_specific { struct mod_plt_sec init; /* for CONFIG_DYNAMIC_FTRACE */ - void *ftrace_trampoline; + struct plt_entry *ftrace_trampoline; }; #endif @@ -45,4 +45,48 @@ extern u64 module_alloc_base; #define module_alloc_base ((u64)_etext - MODULES_VSIZE) #endif +struct plt_entry { + /* + * A program that conforms to the AArch64 Procedure Call Standard + * (AAPCS64) must assume that a veneer that alters IP0 (x16) and/or + * IP1 (x17) may be inserted at any branch instruction that is + * exposed to a relocation that supports long branches. Since that + * is exactly what we are dealing with here, we are free to use x16 + * as a scratch register in the PLT veneers. + */ + __le32 mov0; /* movn x16, #0x.... */ + __le32 mov1; /* movk x16, #0x...., lsl #16 */ + __le32 mov2; /* movk x16, #0x...., lsl #32 */ + __le32 br; /* br x16 */ +}; + +static inline struct plt_entry get_plt_entry(u64 val) +{ + /* + * MOVK/MOVN/MOVZ opcode: + * +--------+------------+--------+-----------+-------------+---------+ + * | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] | + * +--------+------------+--------+-----------+-------------+---------+ + * + * Rd := 0x10 (x16) + * hw := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32) + * opc := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ) + * sf := 1 (64-bit variant) + */ + return (struct plt_entry){ + cpu_to_le32(0x92800010 | (((~val ) & 0xffff)) << 5), + cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5), + cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5), + cpu_to_le32(0xd61f0200) + }; +} + +static inline bool plt_entries_equal(const struct plt_entry *a, + const struct plt_entry *b) +{ + return a->mov0 == b->mov0 && + a->mov1 == b->mov1 && + a->mov2 == b->mov2; +} + #endif /* __ASM_MODULE_H */ diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h index 8d5cbec17d80..f9ccc36d3dc3 100644 --- a/arch/arm64/include/asm/perf_event.h +++ b/arch/arm64/include/asm/perf_event.h @@ -18,6 +18,7 @@ #define __ASM_PERF_EVENT_H #include <asm/stack_pointer.h> +#include <asm/ptrace.h> #define ARMV8_PMU_MAX_COUNTERS 32 #define ARMV8_PMU_COUNTER_MASK (ARMV8_PMU_MAX_COUNTERS - 1) @@ -79,6 +80,7 @@ struct pt_regs; extern unsigned long perf_instruction_pointer(struct pt_regs *regs); extern unsigned long perf_misc_flags(struct pt_regs *regs); #define perf_misc_flags(regs) perf_misc_flags(regs) +#define perf_arch_bpf_user_pt_regs(regs) ®s->user_regs #endif #define perf_arch_fetch_caller_regs(regs, __ip) { \ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index c9530b5b5ca8..149d05fb9421 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -345,7 +345,6 @@ static inline int pmd_protnone(pmd_t pmd) #define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd)) -#define __HAVE_ARCH_PMD_WRITE #define pmd_write(pmd) pte_write(pmd_pte(pmd)) #define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT)) diff --git a/arch/arm64/include/uapi/asm/bpf_perf_event.h b/arch/arm64/include/uapi/asm/bpf_perf_event.h new file mode 100644 index 000000000000..b551b741653d --- /dev/null +++ b/arch/arm64/include/uapi/asm/bpf_perf_event.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__ +#define _UAPI__ASM_BPF_PERF_EVENT_H__ + +#include <asm/ptrace.h> + +typedef struct user_pt_regs bpf_user_pt_regs_t; + +#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 8265dd790895..067baace74a0 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -61,6 +61,3 @@ extra-y += $(head-y) vmlinux.lds ifeq ($(CONFIG_DEBUG_EFI),y) AFLAGS_head.o += -DVMLINUX_PATH="\"$(realpath $(objtree)/vmlinux)\"" endif - -# will be included by each individual module but not by the core kernel itself -extra-$(CONFIG_DYNAMIC_FTRACE) += ftrace-mod.o diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c index d16978213c5b..ea001241bdd4 100644 --- a/arch/arm64/kernel/cpu_ops.c +++ b/arch/arm64/kernel/cpu_ops.c @@ -31,13 +31,13 @@ extern const struct cpu_operations cpu_psci_ops; const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init; -static const struct cpu_operations *dt_supported_cpu_ops[] __initconst = { +static const struct cpu_operations *const dt_supported_cpu_ops[] __initconst = { &smp_spin_table_ops, &cpu_psci_ops, NULL, }; -static const struct cpu_operations *acpi_supported_cpu_ops[] __initconst = { +static const struct cpu_operations *const acpi_supported_cpu_ops[] __initconst = { #ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL &acpi_parking_protocol_ops, #endif @@ -47,7 +47,7 @@ static const struct cpu_operations *acpi_supported_cpu_ops[] __initconst = { static const struct cpu_operations * __init cpu_get_ops(const char *name) { - const struct cpu_operations **ops; + const struct cpu_operations *const *ops; ops = acpi_disabled ? dt_supported_cpu_ops : acpi_supported_cpu_ops; diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 143b3e72c25e..540a1e010eb5 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -114,7 +114,12 @@ * returned from the 2nd syscall yet, TIF_FOREIGN_FPSTATE is still set so * whatever is in the FPSIMD registers is not saved to memory, but discarded. */ -static DEFINE_PER_CPU(struct fpsimd_state *, fpsimd_last_state); +struct fpsimd_last_state_struct { + struct fpsimd_state *st; + bool sve_in_use; +}; + +static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state); /* Default VL for tasks that don't set it explicitly: */ static int sve_default_vl = -1; @@ -905,7 +910,7 @@ void fpsimd_thread_switch(struct task_struct *next) */ struct fpsimd_state *st = &next->thread.fpsimd_state; - if (__this_cpu_read(fpsimd_last_state) == st + if (__this_cpu_read(fpsimd_last_state.st) == st && st->cpu == smp_processor_id()) clear_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE); else @@ -992,6 +997,21 @@ void fpsimd_signal_preserve_current_state(void) } /* + * Associate current's FPSIMD context with this cpu + * Preemption must be disabled when calling this function. + */ +static void fpsimd_bind_to_cpu(void) +{ + struct fpsimd_last_state_struct *last = + this_cpu_ptr(&fpsimd_last_state); + struct fpsimd_state *st = ¤t->thread.fpsimd_state; + + last->st = st; + last->sve_in_use = test_thread_flag(TIF_SVE); + st->cpu = smp_processor_id(); +} + +/* * Load the userland FPSIMD state of 'current' from memory, but only if the * FPSIMD state already held in the registers is /not/ the most recent FPSIMD * state of 'current' @@ -1004,11 +1024,8 @@ void fpsimd_restore_current_state(void) local_bh_disable(); if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { - struct fpsimd_state *st = ¤t->thread.fpsimd_state; - task_fpsimd_load(); - __this_cpu_write(fpsimd_last_state, st); - st->cpu = smp_processor_id(); + fpsimd_bind_to_cpu(); } local_bh_enable(); @@ -1026,18 +1043,14 @@ void fpsimd_update_current_state(struct fpsimd_state *state) local_bh_disable(); - if (system_supports_sve() && test_thread_flag(TIF_SVE)) { - current->thread.fpsimd_state = *state; + current->thread.fpsimd_state = *state; + if (system_supports_sve() && test_thread_flag(TIF_SVE)) fpsimd_to_sve(current); - } - task_fpsimd_load(); - if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { - struct fpsimd_state *st = ¤t->thread.fpsimd_state; + task_fpsimd_load(); - __this_cpu_write(fpsimd_last_state, st); - st->cpu = smp_processor_id(); - } + if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) + fpsimd_bind_to_cpu(); local_bh_enable(); } @@ -1052,7 +1065,7 @@ void fpsimd_flush_task_state(struct task_struct *t) static inline void fpsimd_flush_cpu_state(void) { - __this_cpu_write(fpsimd_last_state, NULL); + __this_cpu_write(fpsimd_last_state.st, NULL); } /* @@ -1065,14 +1078,10 @@ static inline void fpsimd_flush_cpu_state(void) #ifdef CONFIG_ARM64_SVE void sve_flush_cpu_state(void) { - struct fpsimd_state *const fpstate = __this_cpu_read(fpsimd_last_state); - struct task_struct *tsk; - - if (!fpstate) - return; + struct fpsimd_last_state_struct const *last = + this_cpu_ptr(&fpsimd_last_state); - tsk = container_of(fpstate, struct task_struct, thread.fpsimd_state); - if (test_tsk_thread_flag(tsk, TIF_SVE)) + if (last->st && last->sve_in_use) fpsimd_flush_cpu_state(); } #endif /* CONFIG_ARM64_SVE */ @@ -1267,7 +1276,7 @@ static inline void fpsimd_pm_init(void) { } #ifdef CONFIG_HOTPLUG_CPU static int fpsimd_cpu_dead(unsigned int cpu) { - per_cpu(fpsimd_last_state, cpu) = NULL; + per_cpu(fpsimd_last_state.st, cpu) = NULL; return 0; } diff --git a/arch/arm64/kernel/ftrace-mod.S b/arch/arm64/kernel/ftrace-mod.S deleted file mode 100644 index 00c4025be4ff..000000000000 --- a/arch/arm64/kernel/ftrace-mod.S +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/linkage.h> -#include <asm/assembler.h> - - .section ".text.ftrace_trampoline", "ax" - .align 3 -0: .quad 0 -__ftrace_trampoline: - ldr x16, 0b - br x16 -ENDPROC(__ftrace_trampoline) diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index c13b1fca0e5b..50986e388d2b 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -76,7 +76,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) if (offset < -SZ_128M || offset >= SZ_128M) { #ifdef CONFIG_ARM64_MODULE_PLTS - unsigned long *trampoline; + struct plt_entry trampoline; struct module *mod; /* @@ -104,22 +104,24 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) * is added in the future, but for now, the pr_err() below * deals with a theoretical issue only. */ - trampoline = (unsigned long *)mod->arch.ftrace_trampoline; - if (trampoline[0] != addr) { - if (trampoline[0] != 0) { + trampoline = get_plt_entry(addr); + if (!plt_entries_equal(mod->arch.ftrace_trampoline, + &trampoline)) { + if (!plt_entries_equal(mod->arch.ftrace_trampoline, + &(struct plt_entry){})) { pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n"); return -EINVAL; } /* point the trampoline to our ftrace entry point */ module_disable_ro(mod); - trampoline[0] = addr; + *mod->arch.ftrace_trampoline = trampoline; module_enable_ro(mod, true); /* update trampoline before patching in the branch */ smp_wmb(); } - addr = (unsigned long)&trampoline[1]; + addr = (unsigned long)(void *)mod->arch.ftrace_trampoline; #else /* CONFIG_ARM64_MODULE_PLTS */ return -EINVAL; #endif /* CONFIG_ARM64_MODULE_PLTS */ diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c index d05dbe658409..ea640f92fe5a 100644 --- a/arch/arm64/kernel/module-plts.c +++ b/arch/arm64/kernel/module-plts.c @@ -11,21 +11,6 @@ #include <linux/module.h> #include <linux/sort.h> -struct plt_entry { - /* - * A program that conforms to the AArch64 Procedure Call Standard - * (AAPCS64) must assume that a veneer that alters IP0 (x16) and/or - * IP1 (x17) may be inserted at any branch instruction that is - * exposed to a relocation that supports long branches. Since that - * is exactly what we are dealing with here, we are free to use x16 - * as a scratch register in the PLT veneers. - */ - __le32 mov0; /* movn x16, #0x.... */ - __le32 mov1; /* movk x16, #0x...., lsl #16 */ - __le32 mov2; /* movk x16, #0x...., lsl #32 */ - __le32 br; /* br x16 */ -}; - static bool in_init(const struct module *mod, void *loc) { return (u64)loc - (u64)mod->init_layout.base < mod->init_layout.size; @@ -40,33 +25,14 @@ u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela, int i = pltsec->plt_num_entries; u64 val = sym->st_value + rela->r_addend; - /* - * MOVK/MOVN/MOVZ opcode: - * +--------+------------+--------+-----------+-------------+---------+ - * | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] | - * +--------+------------+--------+-----------+-------------+---------+ - * - * Rd := 0x10 (x16) - * hw := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32) - * opc := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ) - * sf := 1 (64-bit variant) - */ - plt[i] = (struct plt_entry){ - cpu_to_le32(0x92800010 | (((~val ) & 0xffff)) << 5), - cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5), - cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5), - cpu_to_le32(0xd61f0200) - }; + plt[i] = get_plt_entry(val); /* * Check if the entry we just created is a duplicate. Given that the * relocations are sorted, this will be the last entry we allocated. * (if one exists). */ - if (i > 0 && - plt[i].mov0 == plt[i - 1].mov0 && - plt[i].mov1 == plt[i - 1].mov1 && - plt[i].mov2 == plt[i - 1].mov2) + if (i > 0 && plt_entries_equal(plt + i, plt + i - 1)) return (u64)&plt[i - 1]; pltsec->plt_num_entries++; @@ -154,6 +120,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, unsigned long core_plts = 0; unsigned long init_plts = 0; Elf64_Sym *syms = NULL; + Elf_Shdr *tramp = NULL; int i; /* @@ -165,6 +132,10 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, mod->arch.core.plt = sechdrs + i; else if (!strcmp(secstrings + sechdrs[i].sh_name, ".init.plt")) mod->arch.init.plt = sechdrs + i; + else if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE) && + !strcmp(secstrings + sechdrs[i].sh_name, + ".text.ftrace_trampoline")) + tramp = sechdrs + i; else if (sechdrs[i].sh_type == SHT_SYMTAB) syms = (Elf64_Sym *)sechdrs[i].sh_addr; } @@ -215,5 +186,12 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, mod->arch.init.plt_num_entries = 0; mod->arch.init.plt_max_entries = init_plts; + if (tramp) { + tramp->sh_type = SHT_NOBITS; + tramp->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + tramp->sh_addralign = __alignof__(struct plt_entry); + tramp->sh_size = sizeof(struct plt_entry); + } + return 0; } diff --git a/arch/arm64/kernel/module.lds b/arch/arm64/kernel/module.lds index f7c9781a9d48..22e36a21c113 100644 --- a/arch/arm64/kernel/module.lds +++ b/arch/arm64/kernel/module.lds @@ -1,4 +1,5 @@ SECTIONS { .plt (NOLOAD) : { BYTE(0) } .init.plt (NOLOAD) : { BYTE(0) } + .text.ftrace_trampoline (NOLOAD) : { BYTE(0) } } diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 9eaef51f83ff..3affca3dd96a 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -262,12 +262,6 @@ static const unsigned armv8_a73_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD, [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR, - - [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, - [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, - - [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, - [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, }; static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index b2adcce7bc18..6b7dcf4310ac 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -314,6 +314,15 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, clear_tsk_thread_flag(p, TIF_SVE); p->thread.sve_state = NULL; + /* + * In case p was allocated the same task_struct pointer as some + * other recently-exited task, make sure p is disassociated from + * any cpu that may have run that now-exited task recently. + * Otherwise we could erroneously skip reloading the FPSIMD + * registers for p. + */ + fpsimd_flush_task_state(p); + if (likely(!(p->flags & PF_KTHREAD))) { *childregs = *current_pt_regs(); childregs->regs[0] = 0; diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index dbadfaf850a7..fa63b28c65e0 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -221,3 +221,24 @@ void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) } } } + + +/* + * After successfully emulating an instruction, we might want to + * return to user space with a KVM_EXIT_DEBUG. We can only do this + * once the emulation is complete, though, so for userspace emulations + * we have to wait until we have re-entered KVM before calling this + * helper. + * + * Return true (and set exit_reason) to return to userspace or false + * if no further action is required. + */ +bool kvm_arm_handle_step_debug(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { + run->exit_reason = KVM_EXIT_DEBUG; + run->debug.arch.hsr = ESR_ELx_EC_SOFTSTP_LOW << ESR_ELx_EC_SHIFT; + return true; + } + return false; +} diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index b71247995469..304203fa9e33 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -28,6 +28,7 @@ #include <asm/kvm_emulate.h> #include <asm/kvm_mmu.h> #include <asm/kvm_psci.h> +#include <asm/debug-monitors.h> #define CREATE_TRACE_POINTS #include "trace.h" @@ -187,14 +188,46 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) } /* + * We may be single-stepping an emulated instruction. If the emulation + * has been completed in the kernel, we can return to userspace with a + * KVM_EXIT_DEBUG, otherwise userspace needs to complete its + * emulation first. + */ +static int handle_trap_exceptions(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + int handled; + + /* + * See ARM ARM B1.14.1: "Hyp traps on instructions + * that fail their condition code check" + */ + if (!kvm_condition_valid(vcpu)) { + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + handled = 1; + } else { + exit_handle_fn exit_handler; + + exit_handler = kvm_get_exit_handler(vcpu); + handled = exit_handler(vcpu, run); + } + + /* + * kvm_arm_handle_step_debug() sets the exit_reason on the kvm_run + * structure if we need to return to userspace. + */ + if (handled > 0 && kvm_arm_handle_step_debug(vcpu, run)) + handled = 0; + + return handled; +} + +/* * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on * proper exit to userspace. */ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, int exception_index) { - exit_handle_fn exit_handler; - if (ARM_SERROR_PENDING(exception_index)) { u8 hsr_ec = ESR_ELx_EC(kvm_vcpu_get_hsr(vcpu)); @@ -220,20 +253,14 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, return 1; case ARM_EXCEPTION_EL1_SERROR: kvm_inject_vabt(vcpu); - return 1; - case ARM_EXCEPTION_TRAP: - /* - * See ARM ARM B1.14.1: "Hyp traps on instructions - * that fail their condition code check" - */ - if (!kvm_condition_valid(vcpu)) { - kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + /* We may still need to return for single-step */ + if (!(*vcpu_cpsr(vcpu) & DBG_SPSR_SS) + && kvm_arm_handle_step_debug(vcpu, run)) + return 0; + else return 1; - } - - exit_handler = kvm_get_exit_handler(vcpu); - - return exit_handler(vcpu, run); + case ARM_EXCEPTION_TRAP: + return handle_trap_exceptions(vcpu, run); case ARM_EXCEPTION_HYP_GONE: /* * EL2 has been reset to the hyp-stub. This happens when a guest diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 525c01f48867..f7c651f3a8c0 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -22,6 +22,7 @@ #include <asm/kvm_emulate.h> #include <asm/kvm_hyp.h> #include <asm/fpsimd.h> +#include <asm/debug-monitors.h> static bool __hyp_text __fpsimd_enabled_nvhe(void) { @@ -269,7 +270,11 @@ static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu) return true; } -static void __hyp_text __skip_instr(struct kvm_vcpu *vcpu) +/* Skip an instruction which has been emulated. Returns true if + * execution can continue or false if we need to exit hyp mode because + * single-step was in effect. + */ +static bool __hyp_text __skip_instr(struct kvm_vcpu *vcpu) { *vcpu_pc(vcpu) = read_sysreg_el2(elr); @@ -282,6 +287,14 @@ static void __hyp_text __skip_instr(struct kvm_vcpu *vcpu) } write_sysreg_el2(*vcpu_pc(vcpu), elr); + + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { + vcpu->arch.fault.esr_el2 = + (ESR_ELx_EC_SOFTSTP_LOW << ESR_ELx_EC_SHIFT) | 0x22; + return false; + } else { + return true; + } } int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu) @@ -342,13 +355,21 @@ again: int ret = __vgic_v2_perform_cpuif_access(vcpu); if (ret == 1) { - __skip_instr(vcpu); - goto again; + if (__skip_instr(vcpu)) + goto again; + else + exit_code = ARM_EXCEPTION_TRAP; } if (ret == -1) { - /* Promote an illegal access to an SError */ - __skip_instr(vcpu); + /* Promote an illegal access to an + * SError. If we would be returning + * due to single-step clear the SS + * bit so handle_exit knows what to + * do after dealing with the error. + */ + if (!__skip_instr(vcpu)) + *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS; exit_code = ARM_EXCEPTION_EL1_SERROR; } @@ -363,8 +384,10 @@ again: int ret = __vgic_v3_perform_cpuif_access(vcpu); if (ret == 1) { - __skip_instr(vcpu); - goto again; + if (__skip_instr(vcpu)) + goto again; + else + exit_code = ARM_EXCEPTION_TRAP; } /* 0 falls through to be handled out of EL2 */ diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index ab9f5f0fb2c7..6f4017046323 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -96,12 +96,6 @@ static void flush_context(unsigned int cpu) set_reserved_asid_bits(); - /* - * Ensure the generation bump is observed before we xchg the - * active_asids. - */ - smp_wmb(); - for_each_possible_cpu(i) { asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0); /* @@ -117,7 +111,10 @@ static void flush_context(unsigned int cpu) per_cpu(reserved_asids, i) = asid; } - /* Queue a TLB invalidate and flush the I-cache if necessary. */ + /* + * Queue a TLB invalidation for each CPU to perform on next + * context-switch + */ cpumask_setall(&tlb_flush_pending); } @@ -202,11 +199,18 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) asid = atomic64_read(&mm->context.id); /* - * The memory ordering here is subtle. We rely on the control - * dependency between the generation read and the update of - * active_asids to ensure that we are synchronised with a - * parallel rollover (i.e. this pairs with the smp_wmb() in - * flush_context). + * The memory ordering here is subtle. + * If our ASID matches the current generation, then we update + * our active_asids entry with a relaxed xchg. Racing with a + * concurrent rollover means that either: + * + * - We get a zero back from the xchg and end up waiting on the + * lock. Taking the lock synchronises with the rollover and so + * we are forced to see the updated generation. + * + * - We get a valid ASID back from the xchg, which means the + * relaxed xchg in flush_context will treat us as reserved + * because atomic RmWs are totally ordered for a given location. */ if (!((asid ^ atomic64_read(&asid_generation)) >> asid_bits) && atomic64_xchg_relaxed(&per_cpu(active_asids, cpu), asid)) diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c index 371c5f03a170..051e71ec3335 100644 --- a/arch/arm64/mm/pgd.c +++ b/arch/arm64/mm/pgd.c @@ -26,7 +26,7 @@ #include <asm/page.h> #include <asm/tlbflush.h> -static struct kmem_cache *pgd_cache; +static struct kmem_cache *pgd_cache __ro_after_init; pgd_t *pgd_alloc(struct mm_struct *mm) { |