diff options
Diffstat (limited to 'arch/arm64/kernel')
32 files changed, 958 insertions, 746 deletions
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 0025f8691046..95ac7374d723 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -18,7 +18,8 @@ arm64-obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ hyp-stub.o psci.o cpu_ops.o insn.o \ return_address.o cpuinfo.o cpu_errata.o \ cpufeature.o alternative.o cacheinfo.o \ - smp.o smp_spin_table.o topology.o smccc-call.o + smp.o smp_spin_table.o topology.o smccc-call.o \ + syscall.o extra-$(CONFIG_EFI) := efi-entry.o @@ -27,7 +28,7 @@ $(obj)/%.stub.o: $(obj)/%.o FORCE $(call if_changed,objcopy) arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ - sys_compat.o entry32.o + sys_compat.o arm64-obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o arm64-obj-$(CONFIG_ARM64_MODULE_PLTS) += module-plts.o diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index 7b09487ff8fb..ed46dc188b22 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -18,6 +18,7 @@ #include <linux/acpi.h> #include <linux/bootmem.h> #include <linux/cpumask.h> +#include <linux/efi.h> #include <linux/efi-bgrt.h> #include <linux/init.h> #include <linux/irq.h> @@ -29,13 +30,9 @@ #include <asm/cputype.h> #include <asm/cpu_ops.h> +#include <asm/pgtable.h> #include <asm/smp_plat.h> -#ifdef CONFIG_ACPI_APEI -# include <linux/efi.h> -# include <asm/pgtable.h> -#endif - int acpi_noirq = 1; /* skip ACPI IRQ initialization */ int acpi_disabled = 1; EXPORT_SYMBOL(acpi_disabled); @@ -239,8 +236,7 @@ done: } } -#ifdef CONFIG_ACPI_APEI -pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr) +pgprot_t __acpi_get_mem_attribute(phys_addr_t addr) { /* * According to "Table 8 Map: EFI memory types to AArch64 memory @@ -261,4 +257,3 @@ pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr) return __pgprot(PROT_NORMAL_NC); return __pgprot(PROT_DEVICE_nGnRnE); } -#endif diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c index d190a7b231bf..4f4f1815e047 100644 --- a/arch/arm64/kernel/acpi_numa.c +++ b/arch/arm64/kernel/acpi_numa.c @@ -26,36 +26,73 @@ #include <linux/module.h> #include <linux/topology.h> -#include <acpi/processor.h> #include <asm/numa.h> -static int cpus_in_srat; +static int acpi_early_node_map[NR_CPUS] __initdata = { NUMA_NO_NODE }; -struct __node_cpu_hwid { - u32 node_id; /* logical node containing this CPU */ - u64 cpu_hwid; /* MPIDR for this CPU */ -}; +int __init acpi_numa_get_nid(unsigned int cpu) +{ + return acpi_early_node_map[cpu]; +} + +static inline int get_cpu_for_acpi_id(u32 uid) +{ + int cpu; + + for (cpu = 0; cpu < nr_cpu_ids; cpu++) + if (uid == get_acpi_id_for_cpu(cpu)) + return cpu; -static struct __node_cpu_hwid early_node_cpu_hwid[NR_CPUS] = { -[0 ... NR_CPUS - 1] = {NUMA_NO_NODE, PHYS_CPUID_INVALID} }; + return -EINVAL; +} -int acpi_numa_get_nid(unsigned int cpu, u64 hwid) +static int __init acpi_parse_gicc_pxm(struct acpi_subtable_header *header, + const unsigned long end) { - int i; + struct acpi_srat_gicc_affinity *pa; + int cpu, pxm, node; - for (i = 0; i < cpus_in_srat; i++) { - if (hwid == early_node_cpu_hwid[i].cpu_hwid) - return early_node_cpu_hwid[i].node_id; - } + if (srat_disabled()) + return -EINVAL; + + pa = (struct acpi_srat_gicc_affinity *)header; + if (!pa) + return -EINVAL; + + if (!(pa->flags & ACPI_SRAT_GICC_ENABLED)) + return 0; - return NUMA_NO_NODE; + pxm = pa->proximity_domain; + node = pxm_to_node(pxm); + + /* + * If we can't map the UID to a logical cpu this + * means that the UID is not part of possible cpus + * so we do not need a NUMA mapping for it, skip + * the SRAT entry and keep parsing. + */ + cpu = get_cpu_for_acpi_id(pa->acpi_processor_uid); + if (cpu < 0) + return 0; + + acpi_early_node_map[cpu] = node; + pr_info("SRAT: PXM %d -> MPIDR 0x%llx -> Node %d\n", pxm, + cpu_logical_map(cpu), node); + + return 0; +} + +void __init acpi_map_cpus_to_nodes(void) +{ + acpi_table_parse_entries(ACPI_SIG_SRAT, sizeof(struct acpi_table_srat), + ACPI_SRAT_TYPE_GICC_AFFINITY, + acpi_parse_gicc_pxm, 0); } /* Callback for Proximity Domain -> ACPI processor UID mapping */ void __init acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa) { int pxm, node; - phys_cpuid_t mpidr; if (srat_disabled()) return; @@ -70,12 +107,6 @@ void __init acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa) if (!(pa->flags & ACPI_SRAT_GICC_ENABLED)) return; - if (cpus_in_srat >= NR_CPUS) { - pr_warn_once("SRAT: cpu_to_node_map[%d] is too small, may not be able to use all cpus\n", - NR_CPUS); - return; - } - pxm = pa->proximity_domain; node = acpi_map_pxm_to_node(pxm); @@ -85,20 +116,7 @@ void __init acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa) return; } - mpidr = acpi_map_madt_entry(pa->acpi_processor_uid); - if (mpidr == PHYS_CPUID_INVALID) { - pr_err("SRAT: PXM %d with ACPI ID %d has no valid MPIDR in MADT\n", - pxm, pa->acpi_processor_uid); - bad_srat(); - return; - } - - early_node_cpu_hwid[cpus_in_srat].node_id = node; - early_node_cpu_hwid[cpus_in_srat].cpu_hwid = mpidr; node_set(node, numa_nodes_parsed); - cpus_in_srat++; - pr_info("SRAT: PXM %d -> MPIDR 0x%Lx -> Node %d\n", - pxm, mpidr, node); } int __init arm64_acpi_numa_init(void) diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 36fb069fd049..b5d603992d40 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -47,11 +47,11 @@ static bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc) unsigned long replptr; if (kernel_text_address(pc)) - return 1; + return true; replptr = (unsigned long)ALT_REPL_PTR(alt); if (pc >= replptr && pc <= (replptr + alt->alt_len)) - return 0; + return false; /* * Branching into *another* alternate sequence is doomed, and diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index d4707abb2f16..92be1d12d590 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -441,8 +441,8 @@ static struct undef_hook swp_hooks[] = { { .instr_mask = 0x0fb00ff0, .instr_val = 0x01000090, - .pstate_mask = COMPAT_PSR_MODE_MASK, - .pstate_val = COMPAT_PSR_MODE_USR, + .pstate_mask = PSR_AA32_MODE_MASK, + .pstate_val = PSR_AA32_MODE_USR, .fn = swp_handler }, { } @@ -511,9 +511,9 @@ ret: static int cp15_barrier_set_hw_mode(bool enable) { if (enable) - config_sctlr_el1(0, SCTLR_EL1_CP15BEN); + sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_CP15BEN); else - config_sctlr_el1(SCTLR_EL1_CP15BEN, 0); + sysreg_clear_set(sctlr_el1, SCTLR_EL1_CP15BEN, 0); return 0; } @@ -521,15 +521,15 @@ static struct undef_hook cp15_barrier_hooks[] = { { .instr_mask = 0x0fff0fdf, .instr_val = 0x0e070f9a, - .pstate_mask = COMPAT_PSR_MODE_MASK, - .pstate_val = COMPAT_PSR_MODE_USR, + .pstate_mask = PSR_AA32_MODE_MASK, + .pstate_val = PSR_AA32_MODE_USR, .fn = cp15barrier_handler, }, { .instr_mask = 0x0fff0fff, .instr_val = 0x0e070f95, - .pstate_mask = COMPAT_PSR_MODE_MASK, - .pstate_val = COMPAT_PSR_MODE_USR, + .pstate_mask = PSR_AA32_MODE_MASK, + .pstate_val = PSR_AA32_MODE_USR, .fn = cp15barrier_handler, }, { } @@ -548,9 +548,9 @@ static int setend_set_hw_mode(bool enable) return -EINVAL; if (enable) - config_sctlr_el1(SCTLR_EL1_SED, 0); + sysreg_clear_set(sctlr_el1, SCTLR_EL1_SED, 0); else - config_sctlr_el1(0, SCTLR_EL1_SED); + sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_SED); return 0; } @@ -562,10 +562,10 @@ static int compat_setend_handler(struct pt_regs *regs, u32 big_endian) if (big_endian) { insn = "setend be"; - regs->pstate |= COMPAT_PSR_E_BIT; + regs->pstate |= PSR_AA32_E_BIT; } else { insn = "setend le"; - regs->pstate &= ~COMPAT_PSR_E_BIT; + regs->pstate &= ~PSR_AA32_E_BIT; } trace_instruction_emulation(insn, regs->pc); @@ -593,16 +593,16 @@ static struct undef_hook setend_hooks[] = { { .instr_mask = 0xfffffdff, .instr_val = 0xf1010000, - .pstate_mask = COMPAT_PSR_MODE_MASK, - .pstate_val = COMPAT_PSR_MODE_USR, + .pstate_mask = PSR_AA32_MODE_MASK, + .pstate_val = PSR_AA32_MODE_USR, .fn = a32_setend_handler, }, { /* Thumb mode */ .instr_mask = 0x0000fff7, .instr_val = 0x0000b650, - .pstate_mask = (COMPAT_PSR_T_BIT | COMPAT_PSR_MODE_MASK), - .pstate_val = (COMPAT_PSR_T_BIT | COMPAT_PSR_MODE_USR), + .pstate_mask = (PSR_AA32_T_BIT | PSR_AA32_MODE_MASK), + .pstate_val = (PSR_AA32_T_BIT | PSR_AA32_MODE_USR), .fn = t16_setend_handler, }, {} diff --git a/arch/arm64/kernel/cpu-reset.h b/arch/arm64/kernel/cpu-reset.h index 6c2b1b4f57c9..fad90e4935fb 100644 --- a/arch/arm64/kernel/cpu-reset.h +++ b/arch/arm64/kernel/cpu-reset.h @@ -16,13 +16,14 @@ void __cpu_soft_restart(unsigned long el2_switch, unsigned long entry, unsigned long arg0, unsigned long arg1, unsigned long arg2); -static inline void __noreturn cpu_soft_restart(unsigned long el2_switch, - unsigned long entry, unsigned long arg0, unsigned long arg1, - unsigned long arg2) +static inline void __noreturn cpu_soft_restart(unsigned long entry, + unsigned long arg0, + unsigned long arg1, + unsigned long arg2) { typeof(__cpu_soft_restart) *restart; - el2_switch = el2_switch && !is_kernel_in_hyp_mode() && + unsigned long el2_switch = !is_kernel_in_hyp_mode() && is_hyp_mode_available(); restart = (void *)__pa_symbol(__cpu_soft_restart); diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 1d2b6d768efe..dec10898d688 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -65,19 +65,24 @@ is_kryo_midr(const struct arm64_cpu_capabilities *entry, int scope) } static bool -has_mismatched_cache_line_size(const struct arm64_cpu_capabilities *entry, - int scope) +has_mismatched_cache_type(const struct arm64_cpu_capabilities *entry, + int scope) { + u64 mask = CTR_CACHE_MINLINE_MASK; + + /* Skip matching the min line sizes for cache type check */ + if (entry->capability == ARM64_MISMATCHED_CACHE_TYPE) + mask ^= arm64_ftr_reg_ctrel0.strict_mask; + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); - return (read_cpuid_cachetype() & arm64_ftr_reg_ctrel0.strict_mask) != - (arm64_ftr_reg_ctrel0.sys_val & arm64_ftr_reg_ctrel0.strict_mask); + return (read_cpuid_cachetype() & mask) != + (arm64_ftr_reg_ctrel0.sys_val & mask); } static void cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *__unused) { - /* Clear SCTLR_EL1.UCT */ - config_sctlr_el1(SCTLR_EL1_UCT, 0); + sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0); } atomic_t arm64_el2_vector_last_slot = ATOMIC_INIT(-1); @@ -101,7 +106,7 @@ static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start, for (i = 0; i < SZ_2K; i += 0x80) memcpy(dst + i, hyp_vecs_start, hyp_vecs_end - hyp_vecs_start); - flush_icache_range((uintptr_t)dst, (uintptr_t)dst + SZ_2K); + __flush_icache_range((uintptr_t)dst, (uintptr_t)dst + SZ_2K); } static void __install_bp_hardening_cb(bp_hardening_cb_t fn, @@ -613,7 +618,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = { { .desc = "Mismatched cache line size", .capability = ARM64_MISMATCHED_CACHE_LINE_SIZE, - .matches = has_mismatched_cache_line_size, + .matches = has_mismatched_cache_type, + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + .cpu_enable = cpu_enable_trap_ctr_access, + }, + { + .desc = "Mismatched cache type", + .capability = ARM64_MISMATCHED_CACHE_TYPE, + .matches = has_mismatched_cache_type, .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, .cpu_enable = cpu_enable_trap_ctr_access, }, @@ -649,7 +661,6 @@ const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, .cpu_enable = enable_smccc_arch_workaround_1, ERRATA_MIDR_RANGE_LIST(arm64_bp_harden_smccc_cpus), }, @@ -658,7 +669,6 @@ const struct arm64_cpu_capabilities arm64_errata[] = { { .desc = "EL2 vector hardening", .capability = ARM64_HARDEN_EL2_VECTORS, - .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, ERRATA_MIDR_RANGE_LIST(arm64_harden_el2_vectors), }, #endif diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index c6d80743f4ed..e238b7932096 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -192,6 +192,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { }; static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_FWB_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_AT_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LVA_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_IESB_SHIFT, 4, 0), @@ -214,7 +215,7 @@ static const struct arm64_ftr_bits ftr_ctr[] = { * If we have differing I-cache policies, report it as the weakest - VIPT. */ ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_EXACT, 14, 2, ICACHE_POLICY_VIPT), /* L1Ip */ - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* IminLine */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_IMINLINE_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -1026,6 +1027,14 @@ static void cpu_copy_el2regs(const struct arm64_cpu_capabilities *__unused) } #endif +static void cpu_has_fwb(const struct arm64_cpu_capabilities *__unused) +{ + u64 val = read_sysreg_s(SYS_CLIDR_EL1); + + /* Check that CLIDR_EL1.LOU{U,IS} are both 0 */ + WARN_ON(val & (7 << 27 | 7 << 21)); +} + static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "GIC system register CPU interface", @@ -1182,6 +1191,17 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cache_dic, }, + { + .desc = "Stage-2 Force Write-Back", + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .capability = ARM64_HAS_STAGE2_FWB, + .sys_reg = SYS_ID_AA64MMFR2_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64MMFR2_FWB_SHIFT, + .min_field_value = 1, + .matches = has_cpuid_feature, + .cpu_enable = cpu_has_fwb, + }, #ifdef CONFIG_ARM64_HW_AFDBM { /* @@ -1723,7 +1743,7 @@ static int emulate_mrs(struct pt_regs *regs, u32 insn) static struct undef_hook mrs_hook = { .instr_mask = 0xfff00000, .instr_val = 0xd5300000, - .pstate_mask = COMPAT_PSR_MODE_MASK, + .pstate_mask = PSR_AA32_MODE_MASK, .pstate_val = PSR_MODE_EL0t, .fn = emulate_mrs, }; diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 28ad8799406f..09dbea221a27 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -41,19 +41,9 @@ * Context tracking subsystem. Used to instrument transitions * between user and kernel mode. */ - .macro ct_user_exit, syscall = 0 + .macro ct_user_exit #ifdef CONFIG_CONTEXT_TRACKING bl context_tracking_user_exit - .if \syscall == 1 - /* - * Save/restore needed during syscalls. Restore syscall arguments from - * the values already saved on stack during kernel_entry. - */ - ldp x0, x1, [sp] - ldp x2, x3, [sp, #S_X2] - ldp x4, x5, [sp, #S_X4] - ldp x6, x7, [sp, #S_X6] - .endif #endif .endm @@ -63,6 +53,12 @@ #endif .endm + .macro clear_gp_regs + .irp n,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29 + mov x\n, xzr + .endr + .endm + /* * Bad Abort numbers *----------------- @@ -140,20 +136,21 @@ alternative_else_nop_endif // This macro corrupts x0-x3. It is the caller's duty // to save/restore them if required. - .macro apply_ssbd, state, targ, tmp1, tmp2 + .macro apply_ssbd, state, tmp1, tmp2 #ifdef CONFIG_ARM64_SSBD alternative_cb arm64_enable_wa2_handling - b \targ + b .L__asm_ssbd_skip\@ alternative_cb_end ldr_this_cpu \tmp2, arm64_ssbd_callback_required, \tmp1 - cbz \tmp2, \targ + cbz \tmp2, .L__asm_ssbd_skip\@ ldr \tmp2, [tsk, #TSK_TI_FLAGS] - tbnz \tmp2, #TIF_SSBD, \targ + tbnz \tmp2, #TIF_SSBD, .L__asm_ssbd_skip\@ mov w0, #ARM_SMCCC_ARCH_WORKAROUND_2 mov w1, #\state alternative_cb arm64_update_smccc_conduit nop // Patched to SMC/HVC #0 alternative_cb_end +.L__asm_ssbd_skip\@: #endif .endm @@ -178,20 +175,14 @@ alternative_cb_end stp x28, x29, [sp, #16 * 14] .if \el == 0 + clear_gp_regs mrs x21, sp_el0 ldr_this_cpu tsk, __entry_task, x20 // Ensure MDSCR_EL1.SS is clear, ldr x19, [tsk, #TSK_TI_FLAGS] // since we can unmask debug disable_step_tsk x19, x20 // exceptions when scheduling. - apply_ssbd 1, 1f, x22, x23 - -#ifdef CONFIG_ARM64_SSBD - ldp x0, x1, [sp, #16 * 0] - ldp x2, x3, [sp, #16 * 1] -#endif -1: + apply_ssbd 1, x22, x23 - mov x29, xzr // fp pointed to user-space .else add x21, sp, #S_FRAME_SIZE get_thread_info tsk @@ -331,8 +322,7 @@ alternative_if ARM64_WORKAROUND_845719 alternative_else_nop_endif #endif 3: - apply_ssbd 0, 5f, x0, x1 -5: + apply_ssbd 0, x0, x1 .endif msr elr_el1, x21 // set up the return data @@ -720,14 +710,9 @@ el0_sync_compat: b.ge el0_dbg b el0_inv el0_svc_compat: - /* - * AArch32 syscall handling - */ - ldr x16, [tsk, #TSK_TI_FLAGS] // load thread flags - adrp stbl, compat_sys_call_table // load compat syscall table pointer - mov wscno, w7 // syscall number in w7 (r7) - mov wsc_nr, #__NR_compat_syscalls - b el0_svc_naked + mov x0, sp + bl el0_svc_compat_handler + b ret_to_user .align 6 el0_irq_compat: @@ -896,25 +881,6 @@ el0_error_naked: b ret_to_user ENDPROC(el0_error) - -/* - * This is the fast syscall return path. We do as little as possible here, - * and this includes saving x0 back into the kernel stack. - */ -ret_fast_syscall: - disable_daif - str x0, [sp, #S_X0] // returned x0 - ldr x1, [tsk, #TSK_TI_FLAGS] // re-check for syscall tracing - and x2, x1, #_TIF_SYSCALL_WORK - cbnz x2, ret_fast_syscall_trace - and x2, x1, #_TIF_WORK_MASK - cbnz x2, work_pending - enable_step_tsk x1, x2 - kernel_exit 0 -ret_fast_syscall_trace: - enable_daif - b __sys_trace_return_skipped // we already saved x0 - /* * Ok, we need to do extra processing, enter the slow path. */ @@ -936,6 +902,9 @@ ret_to_user: cbnz x2, work_pending finish_ret_to_user: enable_step_tsk x1, x2 +#ifdef CONFIG_GCC_PLUGIN_STACKLEAK + bl stackleak_erase +#endif kernel_exit 0 ENDPROC(ret_to_user) @@ -944,85 +913,10 @@ ENDPROC(ret_to_user) */ .align 6 el0_svc: - ldr x16, [tsk, #TSK_TI_FLAGS] // load thread flags - adrp stbl, sys_call_table // load syscall table pointer - mov wscno, w8 // syscall number in w8 - mov wsc_nr, #__NR_syscalls - -#ifdef CONFIG_ARM64_SVE -alternative_if_not ARM64_SVE - b el0_svc_naked -alternative_else_nop_endif - tbz x16, #TIF_SVE, el0_svc_naked // Skip unless TIF_SVE set: - bic x16, x16, #_TIF_SVE // discard SVE state - str x16, [tsk, #TSK_TI_FLAGS] - - /* - * task_fpsimd_load() won't be called to update CPACR_EL1 in - * ret_to_user unless TIF_FOREIGN_FPSTATE is still set, which only - * happens if a context switch or kernel_neon_begin() or context - * modification (sigreturn, ptrace) intervenes. - * So, ensure that CPACR_EL1 is already correct for the fast-path case: - */ - mrs x9, cpacr_el1 - bic x9, x9, #CPACR_EL1_ZEN_EL0EN // disable SVE for el0 - msr cpacr_el1, x9 // synchronised by eret to el0 -#endif - -el0_svc_naked: // compat entry point - stp x0, xscno, [sp, #S_ORIG_X0] // save the original x0 and syscall number - enable_daif - ct_user_exit 1 - - tst x16, #_TIF_SYSCALL_WORK // check for syscall hooks - b.ne __sys_trace - cmp wscno, wsc_nr // check upper syscall limit - b.hs ni_sys - mask_nospec64 xscno, xsc_nr, x19 // enforce bounds for syscall number - ldr x16, [stbl, xscno, lsl #3] // address in the syscall table - blr x16 // call sys_* routine - b ret_fast_syscall -ni_sys: - mov x0, sp - bl do_ni_syscall - b ret_fast_syscall -ENDPROC(el0_svc) - - /* - * This is the really slow path. We're going to be doing context - * switches, and waiting for our parent to respond. - */ -__sys_trace: - cmp wscno, #NO_SYSCALL // user-issued syscall(-1)? - b.ne 1f - mov x0, #-ENOSYS // set default errno if so - str x0, [sp, #S_X0] -1: mov x0, sp - bl syscall_trace_enter - cmp w0, #NO_SYSCALL // skip the syscall? - b.eq __sys_trace_return_skipped - mov wscno, w0 // syscall number (possibly new) - mov x1, sp // pointer to regs - cmp wscno, wsc_nr // check upper syscall limit - b.hs __ni_sys_trace - ldp x0, x1, [sp] // restore the syscall args - ldp x2, x3, [sp, #S_X2] - ldp x4, x5, [sp, #S_X4] - ldp x6, x7, [sp, #S_X6] - ldr x16, [stbl, xscno, lsl #3] // address in the syscall table - blr x16 // call sys_* routine - -__sys_trace_return: - str x0, [sp, #S_X0] // save returned x0 -__sys_trace_return_skipped: mov x0, sp - bl syscall_trace_exit + bl el0_svc_handler b ret_to_user - -__ni_sys_trace: - mov x0, sp - bl do_ni_syscall - b __sys_trace_return +ENDPROC(el0_svc) .popsection // .entry.text @@ -1138,14 +1032,6 @@ __entry_tramp_data_start: #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ /* - * Special system call wrappers. - */ -ENTRY(sys_rt_sigreturn_wrapper) - mov x0, sp - b sys_rt_sigreturn -ENDPROC(sys_rt_sigreturn_wrapper) - -/* * Register switch for AArch64. The callee-saved registers need to be saved * and restored. On entry: * x0 = previous task_struct (must be preserved across the switch) diff --git a/arch/arm64/kernel/entry32.S b/arch/arm64/kernel/entry32.S deleted file mode 100644 index f332d5d1f6b4..000000000000 --- a/arch/arm64/kernel/entry32.S +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Compat system call wrappers - * - * Copyright (C) 2012 ARM Ltd. - * Authors: Will Deacon <will.deacon@arm.com> - * Catalin Marinas <catalin.marinas@arm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <linux/linkage.h> -#include <linux/const.h> - -#include <asm/assembler.h> -#include <asm/asm-offsets.h> -#include <asm/errno.h> -#include <asm/page.h> - -/* - * System call wrappers for the AArch32 compatibility layer. - */ - -ENTRY(compat_sys_sigreturn_wrapper) - mov x0, sp - b compat_sys_sigreturn -ENDPROC(compat_sys_sigreturn_wrapper) - -ENTRY(compat_sys_rt_sigreturn_wrapper) - mov x0, sp - b compat_sys_rt_sigreturn -ENDPROC(compat_sys_rt_sigreturn_wrapper) - -ENTRY(compat_sys_statfs64_wrapper) - mov w3, #84 - cmp w1, #88 - csel w1, w3, w1, eq - b compat_sys_statfs64 -ENDPROC(compat_sys_statfs64_wrapper) - -ENTRY(compat_sys_fstatfs64_wrapper) - mov w3, #84 - cmp w1, #88 - csel w1, w3, w1, eq - b compat_sys_fstatfs64 -ENDPROC(compat_sys_fstatfs64_wrapper) - -/* - * Note: off_4k (w5) is always in units of 4K. If we can't do the - * requested offset because it is not page-aligned, we return -EINVAL. - */ -ENTRY(compat_sys_mmap2_wrapper) -#if PAGE_SHIFT > 12 - tst w5, #~PAGE_MASK >> 12 - b.ne 1f - lsr w5, w5, #PAGE_SHIFT - 12 -#endif - b sys_mmap_pgoff -1: mov x0, #-EINVAL - ret -ENDPROC(compat_sys_mmap2_wrapper) - -/* - * Wrappers for AArch32 syscalls that either take 64-bit parameters - * in registers or that take 32-bit parameters which require sign - * extension. - */ -ENTRY(compat_sys_pread64_wrapper) - regs_to_64 x3, x4, x5 - b sys_pread64 -ENDPROC(compat_sys_pread64_wrapper) - -ENTRY(compat_sys_pwrite64_wrapper) - regs_to_64 x3, x4, x5 - b sys_pwrite64 -ENDPROC(compat_sys_pwrite64_wrapper) - -ENTRY(compat_sys_truncate64_wrapper) - regs_to_64 x1, x2, x3 - b sys_truncate -ENDPROC(compat_sys_truncate64_wrapper) - -ENTRY(compat_sys_ftruncate64_wrapper) - regs_to_64 x1, x2, x3 - b sys_ftruncate -ENDPROC(compat_sys_ftruncate64_wrapper) - -ENTRY(compat_sys_readahead_wrapper) - regs_to_64 x1, x2, x3 - mov w2, w4 - b sys_readahead -ENDPROC(compat_sys_readahead_wrapper) - -ENTRY(compat_sys_fadvise64_64_wrapper) - mov w6, w1 - regs_to_64 x1, x2, x3 - regs_to_64 x2, x4, x5 - mov w3, w6 - b sys_fadvise64_64 -ENDPROC(compat_sys_fadvise64_64_wrapper) - -ENTRY(compat_sys_sync_file_range2_wrapper) - regs_to_64 x2, x2, x3 - regs_to_64 x3, x4, x5 - b sys_sync_file_range2 -ENDPROC(compat_sys_sync_file_range2_wrapper) - -ENTRY(compat_sys_fallocate_wrapper) - regs_to_64 x2, x2, x3 - regs_to_64 x3, x4, x5 - b sys_fallocate -ENDPROC(compat_sys_fallocate_wrapper) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 84c68b14f1b2..58c53bc96928 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -159,25 +159,6 @@ static void sve_free(struct task_struct *task) __sve_free(task); } -static void change_cpacr(u64 val, u64 mask) -{ - u64 cpacr = read_sysreg(CPACR_EL1); - u64 new = (cpacr & ~mask) | val; - - if (new != cpacr) - write_sysreg(new, CPACR_EL1); -} - -static void sve_user_disable(void) -{ - change_cpacr(0, CPACR_EL1_ZEN_EL0EN); -} - -static void sve_user_enable(void) -{ - change_cpacr(CPACR_EL1_ZEN_EL0EN, CPACR_EL1_ZEN_EL0EN); -} - /* * TIF_SVE controls whether a task can use SVE without trapping while * in userspace, and also the way a task's FPSIMD/SVE state is stored diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 413dbe530da8..8c9644376326 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -343,14 +343,13 @@ static int get_hbp_len(u8 hbp_len) /* * Check whether bp virtual address is in kernel space. */ -int arch_check_bp_in_kernelspace(struct perf_event *bp) +int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw) { unsigned int len; unsigned long va; - struct arch_hw_breakpoint *info = counter_arch_bp(bp); - va = info->address; - len = get_hbp_len(info->ctrl.len); + va = hw->address; + len = get_hbp_len(hw->ctrl.len); return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); } @@ -421,53 +420,53 @@ int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl, /* * Construct an arch_hw_breakpoint from a perf_event. */ -static int arch_build_bp_info(struct perf_event *bp) +static int arch_build_bp_info(struct perf_event *bp, + const struct perf_event_attr *attr, + struct arch_hw_breakpoint *hw) { - struct arch_hw_breakpoint *info = counter_arch_bp(bp); - /* Type */ - switch (bp->attr.bp_type) { + switch (attr->bp_type) { case HW_BREAKPOINT_X: - info->ctrl.type = ARM_BREAKPOINT_EXECUTE; + hw->ctrl.type = ARM_BREAKPOINT_EXECUTE; break; case HW_BREAKPOINT_R: - info->ctrl.type = ARM_BREAKPOINT_LOAD; + hw->ctrl.type = ARM_BREAKPOINT_LOAD; break; case HW_BREAKPOINT_W: - info->ctrl.type = ARM_BREAKPOINT_STORE; + hw->ctrl.type = ARM_BREAKPOINT_STORE; break; case HW_BREAKPOINT_RW: - info->ctrl.type = ARM_BREAKPOINT_LOAD | ARM_BREAKPOINT_STORE; + hw->ctrl.type = ARM_BREAKPOINT_LOAD | ARM_BREAKPOINT_STORE; break; default: return -EINVAL; } /* Len */ - switch (bp->attr.bp_len) { + switch (attr->bp_len) { case HW_BREAKPOINT_LEN_1: - info->ctrl.len = ARM_BREAKPOINT_LEN_1; + hw->ctrl.len = ARM_BREAKPOINT_LEN_1; break; case HW_BREAKPOINT_LEN_2: - info->ctrl.len = ARM_BREAKPOINT_LEN_2; + hw->ctrl.len = ARM_BREAKPOINT_LEN_2; break; case HW_BREAKPOINT_LEN_3: - info->ctrl.len = ARM_BREAKPOINT_LEN_3; + hw->ctrl.len = ARM_BREAKPOINT_LEN_3; break; case HW_BREAKPOINT_LEN_4: - info->ctrl.len = ARM_BREAKPOINT_LEN_4; + hw->ctrl.len = ARM_BREAKPOINT_LEN_4; break; case HW_BREAKPOINT_LEN_5: - info->ctrl.len = ARM_BREAKPOINT_LEN_5; + hw->ctrl.len = ARM_BREAKPOINT_LEN_5; break; case HW_BREAKPOINT_LEN_6: - info->ctrl.len = ARM_BREAKPOINT_LEN_6; + hw->ctrl.len = ARM_BREAKPOINT_LEN_6; break; case HW_BREAKPOINT_LEN_7: - info->ctrl.len = ARM_BREAKPOINT_LEN_7; + hw->ctrl.len = ARM_BREAKPOINT_LEN_7; break; case HW_BREAKPOINT_LEN_8: - info->ctrl.len = ARM_BREAKPOINT_LEN_8; + hw->ctrl.len = ARM_BREAKPOINT_LEN_8; break; default: return -EINVAL; @@ -478,37 +477,37 @@ static int arch_build_bp_info(struct perf_event *bp) * AArch32 also requires breakpoints of length 2 for Thumb. * Watchpoints can be of length 1, 2, 4 or 8 bytes. */ - if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) { + if (hw->ctrl.type == ARM_BREAKPOINT_EXECUTE) { if (is_compat_bp(bp)) { - if (info->ctrl.len != ARM_BREAKPOINT_LEN_2 && - info->ctrl.len != ARM_BREAKPOINT_LEN_4) + if (hw->ctrl.len != ARM_BREAKPOINT_LEN_2 && + hw->ctrl.len != ARM_BREAKPOINT_LEN_4) return -EINVAL; - } else if (info->ctrl.len != ARM_BREAKPOINT_LEN_4) { + } else if (hw->ctrl.len != ARM_BREAKPOINT_LEN_4) { /* * FIXME: Some tools (I'm looking at you perf) assume * that breakpoints should be sizeof(long). This * is nonsense. For now, we fix up the parameter * but we should probably return -EINVAL instead. */ - info->ctrl.len = ARM_BREAKPOINT_LEN_4; + hw->ctrl.len = ARM_BREAKPOINT_LEN_4; } } /* Address */ - info->address = bp->attr.bp_addr; + hw->address = attr->bp_addr; /* * Privilege * Note that we disallow combined EL0/EL1 breakpoints because * that would complicate the stepping code. */ - if (arch_check_bp_in_kernelspace(bp)) - info->ctrl.privilege = AARCH64_BREAKPOINT_EL1; + if (arch_check_bp_in_kernelspace(hw)) + hw->ctrl.privilege = AARCH64_BREAKPOINT_EL1; else - info->ctrl.privilege = AARCH64_BREAKPOINT_EL0; + hw->ctrl.privilege = AARCH64_BREAKPOINT_EL0; /* Enabled? */ - info->ctrl.enabled = !bp->attr.disabled; + hw->ctrl.enabled = !attr->disabled; return 0; } @@ -516,14 +515,15 @@ static int arch_build_bp_info(struct perf_event *bp) /* * Validate the arch-specific HW Breakpoint register settings. */ -int arch_validate_hwbkpt_settings(struct perf_event *bp) +int hw_breakpoint_arch_parse(struct perf_event *bp, + const struct perf_event_attr *attr, + struct arch_hw_breakpoint *hw) { - struct arch_hw_breakpoint *info = counter_arch_bp(bp); int ret; u64 alignment_mask, offset; /* Build the arch_hw_breakpoint. */ - ret = arch_build_bp_info(bp); + ret = arch_build_bp_info(bp, attr, hw); if (ret) return ret; @@ -537,42 +537,42 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) * that here. */ if (is_compat_bp(bp)) { - if (info->ctrl.len == ARM_BREAKPOINT_LEN_8) + if (hw->ctrl.len == ARM_BREAKPOINT_LEN_8) alignment_mask = 0x7; else alignment_mask = 0x3; - offset = info->address & alignment_mask; + offset = hw->address & alignment_mask; switch (offset) { case 0: /* Aligned */ break; case 1: /* Allow single byte watchpoint. */ - if (info->ctrl.len == ARM_BREAKPOINT_LEN_1) + if (hw->ctrl.len == ARM_BREAKPOINT_LEN_1) break; case 2: /* Allow halfword watchpoints and breakpoints. */ - if (info->ctrl.len == ARM_BREAKPOINT_LEN_2) + if (hw->ctrl.len == ARM_BREAKPOINT_LEN_2) break; default: return -EINVAL; } } else { - if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) + if (hw->ctrl.type == ARM_BREAKPOINT_EXECUTE) alignment_mask = 0x3; else alignment_mask = 0x7; - offset = info->address & alignment_mask; + offset = hw->address & alignment_mask; } - info->address &= ~alignment_mask; - info->ctrl.len <<= offset; + hw->address &= ~alignment_mask; + hw->ctrl.len <<= offset; /* * Disallow per-task kernel breakpoints since these would * complicate the stepping code. */ - if (info->ctrl.privilege == AARCH64_BREAKPOINT_EL1 && bp->hw.target) + if (hw->ctrl.privilege == AARCH64_BREAKPOINT_EL1 && bp->hw.target) return -EINVAL; return 0; diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 816d03c4c913..2b3413549734 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -149,20 +149,6 @@ int __kprobes aarch64_insn_write(void *addr, u32 insn) return __aarch64_insn_write(addr, cpu_to_le32(insn)); } -static bool __kprobes __aarch64_insn_hotpatch_safe(u32 insn) -{ - if (aarch64_get_insn_class(insn) != AARCH64_INSN_CLS_BR_SYS) - return false; - - return aarch64_insn_is_b(insn) || - aarch64_insn_is_bl(insn) || - aarch64_insn_is_svc(insn) || - aarch64_insn_is_hvc(insn) || - aarch64_insn_is_smc(insn) || - aarch64_insn_is_brk(insn) || - aarch64_insn_is_nop(insn); -} - bool __kprobes aarch64_insn_uses_literal(u32 insn) { /* ldr/ldrsw (literal), prfm */ @@ -189,22 +175,6 @@ bool __kprobes aarch64_insn_is_branch(u32 insn) aarch64_insn_is_bcond(insn); } -/* - * ARM Architecture Reference Manual for ARMv8 Profile-A, Issue A.a - * Section B2.6.5 "Concurrent modification and execution of instructions": - * Concurrent modification and execution of instructions can lead to the - * resulting instruction performing any behavior that can be achieved by - * executing any sequence of instructions that can be executed from the - * same Exception level, except where the instruction before modification - * and the instruction after modification is a B, BL, NOP, BKPT, SVC, HVC, - * or SMC instruction. - */ -bool __kprobes aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn) -{ - return __aarch64_insn_hotpatch_safe(old_insn) && - __aarch64_insn_hotpatch_safe(new_insn); -} - int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn) { u32 *tp = addr; @@ -216,8 +186,8 @@ int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn) ret = aarch64_insn_write(tp, insn); if (ret == 0) - flush_icache_range((uintptr_t)tp, - (uintptr_t)tp + AARCH64_INSN_SIZE); + __flush_icache_range((uintptr_t)tp, + (uintptr_t)tp + AARCH64_INSN_SIZE); return ret; } @@ -239,11 +209,6 @@ static int __kprobes aarch64_insn_patch_text_cb(void *arg) for (i = 0; ret == 0 && i < pp->insn_cnt; i++) ret = aarch64_insn_patch_text_nosync(pp->text_addrs[i], pp->new_insns[i]); - /* - * aarch64_insn_patch_text_nosync() calls flush_icache_range(), - * which ends with "dsb; isb" pair guaranteeing global - * visibility. - */ /* Notify other processors with an additional increment. */ atomic_inc(&pp->cpu_count); } else { @@ -255,8 +220,7 @@ static int __kprobes aarch64_insn_patch_text_cb(void *arg) return ret; } -static -int __kprobes aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt) +int __kprobes aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt) { struct aarch64_insn_patch patch = { .text_addrs = addrs, @@ -272,34 +236,6 @@ int __kprobes aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt) cpu_online_mask); } -int __kprobes aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt) -{ - int ret; - u32 insn; - - /* Unsafe to patch multiple instructions without synchronizaiton */ - if (cnt == 1) { - ret = aarch64_insn_read(addrs[0], &insn); - if (ret) - return ret; - - if (aarch64_insn_hotpatch_safe(insn, insns[0])) { - /* - * ARMv8 architecture doesn't guarantee all CPUs see - * the new instruction after returning from function - * aarch64_insn_patch_text_nosync(). So send IPIs to - * all other CPUs to achieve instruction - * synchronization. - */ - ret = aarch64_insn_patch_text_nosync(addrs[0], insns[0]); - kick_all_cpus_sync(); - return ret; - } - } - - return aarch64_insn_patch_text_sync(addrs, insns, cnt); -} - static int __kprobes aarch64_get_imm_shift_mask(enum aarch64_insn_imm_type type, u32 *maskp, int *shiftp) { diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index 60e5fc661f74..780a12f59a8f 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -42,16 +42,6 @@ int arch_show_interrupts(struct seq_file *p, int prec) return 0; } -void (*handle_arch_irq)(struct pt_regs *) = NULL; - -void __init set_handle_irq(void (*handle_irq)(struct pt_regs *)) -{ - if (handle_arch_irq) - return; - - handle_arch_irq = handle_irq; -} - #ifdef CONFIG_VMAP_STACK static void init_irq_stacks(void) { diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c index c2dd1ad3e648..e0756416e567 100644 --- a/arch/arm64/kernel/jump_label.c +++ b/arch/arm64/kernel/jump_label.c @@ -36,7 +36,7 @@ void arch_jump_label_transform(struct jump_entry *entry, insn = aarch64_insn_gen_nop(); } - aarch64_insn_patch_text(&addr, &insn, 1); + aarch64_insn_patch_text_nosync(addr, insn); } void arch_jump_label_transform_static(struct jump_entry *entry, diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index f76ea92dff91..f6a5c6bc1434 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -184,8 +184,15 @@ void machine_kexec(struct kimage *kimage) /* Flush the reboot_code_buffer in preparation for its execution. */ __flush_dcache_area(reboot_code_buffer, arm64_relocate_new_kernel_size); - flush_icache_range((uintptr_t)reboot_code_buffer, - arm64_relocate_new_kernel_size); + + /* + * Although we've killed off the secondary CPUs, we don't update + * the online mask if we're handling a crash kernel and consequently + * need to avoid flush_icache_range(), which will attempt to IPI + * the offline CPUs. Therefore, we must use the __* variant here. + */ + __flush_icache_range((uintptr_t)reboot_code_buffer, + arm64_relocate_new_kernel_size); /* Flush the kimage list and its buffers. */ kexec_list_flush(kimage); @@ -207,8 +214,7 @@ void machine_kexec(struct kimage *kimage) * relocation is complete. */ - cpu_soft_restart(kimage != kexec_crash_image, - reboot_code_buffer_phys, kimage->head, kimage->start, 0); + cpu_soft_restart(reboot_code_buffer_phys, kimage->head, kimage->start, 0); BUG(); /* Should never get here. */ } @@ -361,4 +367,5 @@ void arch_crash_save_vmcoreinfo(void) kimage_voffset); vmcoreinfo_append_str("NUMBER(PHYS_OFFSET)=0x%llx\n", PHYS_OFFSET); + vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset()); } diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 33147aacdafd..8e38d5267f22 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -25,6 +25,7 @@ #include <asm/virt.h> #include <linux/acpi.h> +#include <linux/clocksource.h> #include <linux/of.h> #include <linux/perf/arm_pmu.h> #include <linux/platform_device.h> @@ -446,9 +447,16 @@ static struct attribute_group armv8_pmuv3_events_attr_group = { }; PMU_FORMAT_ATTR(event, "config:0-15"); +PMU_FORMAT_ATTR(long, "config1:0"); + +static inline bool armv8pmu_event_is_64bit(struct perf_event *event) +{ + return event->attr.config1 & 0x1; +} static struct attribute *armv8_pmuv3_format_attrs[] = { &format_attr_event.attr, + &format_attr_long.attr, NULL, }; @@ -466,6 +474,21 @@ static struct attribute_group armv8_pmuv3_format_attr_group = { (ARMV8_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1) /* + * We must chain two programmable counters for 64 bit events, + * except when we have allocated the 64bit cycle counter (for CPU + * cycles event). This must be called only when the event has + * a counter allocated. + */ +static inline bool armv8pmu_event_is_chained(struct perf_event *event) +{ + int idx = event->hw.idx; + + return !WARN_ON(idx < 0) && + armv8pmu_event_is_64bit(event) && + (idx != ARMV8_IDX_CYCLE_COUNTER); +} + +/* * ARMv8 low level PMU access */ @@ -503,34 +526,68 @@ static inline int armv8pmu_counter_has_overflowed(u32 pmnc, int idx) return pmnc & BIT(ARMV8_IDX_TO_COUNTER(idx)); } -static inline int armv8pmu_select_counter(int idx) +static inline void armv8pmu_select_counter(int idx) { u32 counter = ARMV8_IDX_TO_COUNTER(idx); write_sysreg(counter, pmselr_el0); isb(); +} - return idx; +static inline u32 armv8pmu_read_evcntr(int idx) +{ + armv8pmu_select_counter(idx); + return read_sysreg(pmxevcntr_el0); +} + +static inline u64 armv8pmu_read_hw_counter(struct perf_event *event) +{ + int idx = event->hw.idx; + u64 val = 0; + + val = armv8pmu_read_evcntr(idx); + if (armv8pmu_event_is_chained(event)) + val = (val << 32) | armv8pmu_read_evcntr(idx - 1); + return val; } -static inline u32 armv8pmu_read_counter(struct perf_event *event) +static inline u64 armv8pmu_read_counter(struct perf_event *event) { struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; - u32 value = 0; + u64 value = 0; if (!armv8pmu_counter_valid(cpu_pmu, idx)) pr_err("CPU%u reading wrong counter %d\n", smp_processor_id(), idx); else if (idx == ARMV8_IDX_CYCLE_COUNTER) value = read_sysreg(pmccntr_el0); - else if (armv8pmu_select_counter(idx) == idx) - value = read_sysreg(pmxevcntr_el0); + else + value = armv8pmu_read_hw_counter(event); return value; } -static inline void armv8pmu_write_counter(struct perf_event *event, u32 value) +static inline void armv8pmu_write_evcntr(int idx, u32 value) +{ + armv8pmu_select_counter(idx); + write_sysreg(value, pmxevcntr_el0); +} + +static inline void armv8pmu_write_hw_counter(struct perf_event *event, + u64 value) +{ + int idx = event->hw.idx; + + if (armv8pmu_event_is_chained(event)) { + armv8pmu_write_evcntr(idx, upper_32_bits(value)); + armv8pmu_write_evcntr(idx - 1, lower_32_bits(value)); + } else { + armv8pmu_write_evcntr(idx, value); + } +} + +static inline void armv8pmu_write_counter(struct perf_event *event, u64 value) { struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; @@ -541,22 +598,43 @@ static inline void armv8pmu_write_counter(struct perf_event *event, u32 value) smp_processor_id(), idx); else if (idx == ARMV8_IDX_CYCLE_COUNTER) { /* - * Set the upper 32bits as this is a 64bit counter but we only - * count using the lower 32bits and we want an interrupt when - * it overflows. + * The cycles counter is really a 64-bit counter. + * When treating it as a 32-bit counter, we only count + * the lower 32 bits, and set the upper 32-bits so that + * we get an interrupt upon 32-bit overflow. */ - u64 value64 = 0xffffffff00000000ULL | value; - - write_sysreg(value64, pmccntr_el0); - } else if (armv8pmu_select_counter(idx) == idx) - write_sysreg(value, pmxevcntr_el0); + if (!armv8pmu_event_is_64bit(event)) + value |= 0xffffffff00000000ULL; + write_sysreg(value, pmccntr_el0); + } else + armv8pmu_write_hw_counter(event, value); } static inline void armv8pmu_write_evtype(int idx, u32 val) { - if (armv8pmu_select_counter(idx) == idx) { - val &= ARMV8_PMU_EVTYPE_MASK; - write_sysreg(val, pmxevtyper_el0); + armv8pmu_select_counter(idx); + val &= ARMV8_PMU_EVTYPE_MASK; + write_sysreg(val, pmxevtyper_el0); +} + +static inline void armv8pmu_write_event_type(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + /* + * For chained events, the low counter is programmed to count + * the event of interest and the high counter is programmed + * with CHAIN event code with filters set to count at all ELs. + */ + if (armv8pmu_event_is_chained(event)) { + u32 chain_evt = ARMV8_PMUV3_PERFCTR_CHAIN | + ARMV8_PMU_INCLUDE_EL2; + + armv8pmu_write_evtype(idx - 1, hwc->config_base); + armv8pmu_write_evtype(idx, chain_evt); + } else { + armv8pmu_write_evtype(idx, hwc->config_base); } } @@ -567,6 +645,16 @@ static inline int armv8pmu_enable_counter(int idx) return idx; } +static inline void armv8pmu_enable_event_counter(struct perf_event *event) +{ + int idx = event->hw.idx; + + armv8pmu_enable_counter(idx); + if (armv8pmu_event_is_chained(event)) + armv8pmu_enable_counter(idx - 1); + isb(); +} + static inline int armv8pmu_disable_counter(int idx) { u32 counter = ARMV8_IDX_TO_COUNTER(idx); @@ -574,6 +662,16 @@ static inline int armv8pmu_disable_counter(int idx) return idx; } +static inline void armv8pmu_disable_event_counter(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + if (armv8pmu_event_is_chained(event)) + armv8pmu_disable_counter(idx - 1); + armv8pmu_disable_counter(idx); +} + static inline int armv8pmu_enable_intens(int idx) { u32 counter = ARMV8_IDX_TO_COUNTER(idx); @@ -581,6 +679,11 @@ static inline int armv8pmu_enable_intens(int idx) return idx; } +static inline int armv8pmu_enable_event_irq(struct perf_event *event) +{ + return armv8pmu_enable_intens(event->hw.idx); +} + static inline int armv8pmu_disable_intens(int idx) { u32 counter = ARMV8_IDX_TO_COUNTER(idx); @@ -593,6 +696,11 @@ static inline int armv8pmu_disable_intens(int idx) return idx; } +static inline int armv8pmu_disable_event_irq(struct perf_event *event) +{ + return armv8pmu_disable_intens(event->hw.idx); +} + static inline u32 armv8pmu_getreset_flags(void) { u32 value; @@ -610,10 +718,8 @@ static inline u32 armv8pmu_getreset_flags(void) static void armv8pmu_enable_event(struct perf_event *event) { unsigned long flags; - struct hw_perf_event *hwc = &event->hw; struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); - int idx = hwc->idx; /* * Enable counter and interrupt, and set the counter to count @@ -624,22 +730,22 @@ static void armv8pmu_enable_event(struct perf_event *event) /* * Disable counter */ - armv8pmu_disable_counter(idx); + armv8pmu_disable_event_counter(event); /* * Set event (if destined for PMNx counters). */ - armv8pmu_write_evtype(idx, hwc->config_base); + armv8pmu_write_event_type(event); /* * Enable interrupt for this counter */ - armv8pmu_enable_intens(idx); + armv8pmu_enable_event_irq(event); /* * Enable counter */ - armv8pmu_enable_counter(idx); + armv8pmu_enable_event_counter(event); raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } @@ -647,10 +753,8 @@ static void armv8pmu_enable_event(struct perf_event *event) static void armv8pmu_disable_event(struct perf_event *event) { unsigned long flags; - struct hw_perf_event *hwc = &event->hw; struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); - int idx = hwc->idx; /* * Disable counter and interrupt @@ -660,16 +764,38 @@ static void armv8pmu_disable_event(struct perf_event *event) /* * Disable counter */ - armv8pmu_disable_counter(idx); + armv8pmu_disable_event_counter(event); /* * Disable interrupt for this counter */ - armv8pmu_disable_intens(idx); + armv8pmu_disable_event_irq(event); raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } +static void armv8pmu_start(struct arm_pmu *cpu_pmu) +{ + unsigned long flags; + struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); + + raw_spin_lock_irqsave(&events->pmu_lock, flags); + /* Enable all counters */ + armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E); + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static void armv8pmu_stop(struct arm_pmu *cpu_pmu) +{ + unsigned long flags; + struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); + + raw_spin_lock_irqsave(&events->pmu_lock, flags); + /* Disable all counters */ + armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_PMCR_E); + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) { u32 pmovsr; @@ -694,6 +820,11 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) */ regs = get_irq_regs(); + /* + * Stop the PMU while processing the counter overflows + * to prevent skews in group events. + */ + armv8pmu_stop(cpu_pmu); for (idx = 0; idx < cpu_pmu->num_events; ++idx) { struct perf_event *event = cpuc->events[idx]; struct hw_perf_event *hwc; @@ -718,6 +849,7 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) if (perf_event_overflow(event, &data, regs)) cpu_pmu->disable(event); } + armv8pmu_start(cpu_pmu); /* * Handle the pending perf events. @@ -731,32 +863,42 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) return IRQ_HANDLED; } -static void armv8pmu_start(struct arm_pmu *cpu_pmu) +static int armv8pmu_get_single_idx(struct pmu_hw_events *cpuc, + struct arm_pmu *cpu_pmu) { - unsigned long flags; - struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); + int idx; - raw_spin_lock_irqsave(&events->pmu_lock, flags); - /* Enable all counters */ - armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E); - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); + for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; idx ++) { + if (!test_and_set_bit(idx, cpuc->used_mask)) + return idx; + } + return -EAGAIN; } -static void armv8pmu_stop(struct arm_pmu *cpu_pmu) +static int armv8pmu_get_chain_idx(struct pmu_hw_events *cpuc, + struct arm_pmu *cpu_pmu) { - unsigned long flags; - struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); + int idx; - raw_spin_lock_irqsave(&events->pmu_lock, flags); - /* Disable all counters */ - armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_PMCR_E); - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); + /* + * Chaining requires two consecutive event counters, where + * the lower idx must be even. + */ + for (idx = ARMV8_IDX_COUNTER0 + 1; idx < cpu_pmu->num_events; idx += 2) { + if (!test_and_set_bit(idx, cpuc->used_mask)) { + /* Check if the preceding even counter is available */ + if (!test_and_set_bit(idx - 1, cpuc->used_mask)) + return idx; + /* Release the Odd counter */ + clear_bit(idx, cpuc->used_mask); + } + } + return -EAGAIN; } static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, struct perf_event *event) { - int idx; struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT; @@ -770,13 +912,20 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, /* * Otherwise use events counters */ - for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; ++idx) { - if (!test_and_set_bit(idx, cpuc->used_mask)) - return idx; - } + if (armv8pmu_event_is_64bit(event)) + return armv8pmu_get_chain_idx(cpuc, cpu_pmu); + else + return armv8pmu_get_single_idx(cpuc, cpu_pmu); +} - /* The counters are all in use. */ - return -EAGAIN; +static void armv8pmu_clear_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + int idx = event->hw.idx; + + clear_bit(idx, cpuc->used_mask); + if (armv8pmu_event_is_chained(event)) + clear_bit(idx - 1, cpuc->used_mask); } /* @@ -851,6 +1000,9 @@ static int __armv8_pmuv3_map_event(struct perf_event *event, &armv8_pmuv3_perf_cache_map, ARMV8_PMU_EVTYPE_EVENT); + if (armv8pmu_event_is_64bit(event)) + event->hw.flags |= ARMPMU_EVT_64BIT; + /* Onl expose micro/arch events supported by this PMU */ if ((hw_event_id > 0) && (hw_event_id < ARMV8_PMUV3_MAX_COMMON_EVENTS) && test_bit(hw_event_id, armpmu->pmceid_bitmap)) { @@ -957,10 +1109,10 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->read_counter = armv8pmu_read_counter, cpu_pmu->write_counter = armv8pmu_write_counter, cpu_pmu->get_event_idx = armv8pmu_get_event_idx, + cpu_pmu->clear_event_idx = armv8pmu_clear_event_idx, cpu_pmu->start = armv8pmu_start, cpu_pmu->stop = armv8pmu_stop, cpu_pmu->reset = armv8pmu_reset, - cpu_pmu->max_period = (1LLU << 32) - 1, cpu_pmu->set_event_filter = armv8pmu_set_event_filter; return 0; @@ -1127,3 +1279,32 @@ static int __init armv8_pmu_driver_init(void) return arm_pmu_acpi_probe(armv8_pmuv3_init); } device_initcall(armv8_pmu_driver_init) + +void arch_perf_update_userpage(struct perf_event *event, + struct perf_event_mmap_page *userpg, u64 now) +{ + u32 freq; + u32 shift; + + /* + * Internal timekeeping for enabled/running/stopped times + * is always computed with the sched_clock. + */ + freq = arch_timer_get_rate(); + userpg->cap_user_time = 1; + + clocks_calc_mult_shift(&userpg->time_mult, &shift, freq, + NSEC_PER_SEC, 0); + /* + * time_shift is not expected to be greater than 31 due to + * the original published conversion algorithm shifting a + * 32-bit value (now specifies a 64-bit value) - refer + * perf_event_mmap_page documentation in perf_event.h. + */ + if (shift == 32) { + shift = 31; + userpg->time_mult >>= 1; + } + userpg->time_shift = (u16)shift; + userpg->time_offset = -now; +} diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index d849d9804011..e78c3ef04d95 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -275,7 +275,7 @@ static int __kprobes reenter_kprobe(struct kprobe *p, break; case KPROBE_HIT_SS: case KPROBE_REENTER: - pr_warn("Unrecoverable kprobe detected at %p.\n", p->addr); + pr_warn("Unrecoverable kprobe detected.\n"); dump_kprobe(p); BUG(); break; @@ -395,9 +395,9 @@ static void __kprobes kprobe_handler(struct pt_regs *regs) /* * If we have no pre-handler or it returned 0, we * continue with normal processing. If we have a - * pre-handler and it returned non-zero, it prepped - * for calling the break_handler below on re-entry, - * so get out doing nothing more here. + * pre-handler and it returned non-zero, it will + * modify the execution path and no need to single + * stepping. Let's just reset current kprobe and exit. * * pre_handler can hit a breakpoint and can step thru * before return, keep PSTATE D-flag enabled until @@ -405,16 +405,8 @@ static void __kprobes kprobe_handler(struct pt_regs *regs) */ if (!p->pre_handler || !p->pre_handler(p, regs)) { setup_singlestep(p, regs, kcb, 0); - return; - } - } - } else if ((le32_to_cpu(*(kprobe_opcode_t *) addr) == - BRK64_OPCODE_KPROBES) && cur_kprobe) { - /* We probably hit a jprobe. Call its break handler. */ - if (cur_kprobe->break_handler && - cur_kprobe->break_handler(cur_kprobe, regs)) { - setup_singlestep(cur_kprobe, regs, kcb, 0); - return; + } else + reset_current_kprobe(); } } /* @@ -465,74 +457,6 @@ kprobe_breakpoint_handler(struct pt_regs *regs, unsigned int esr) return DBG_HOOK_HANDLED; } -int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) -{ - struct jprobe *jp = container_of(p, struct jprobe, kp); - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - - kcb->jprobe_saved_regs = *regs; - /* - * Since we can't be sure where in the stack frame "stacked" - * pass-by-value arguments are stored we just don't try to - * duplicate any of the stack. Do not use jprobes on functions that - * use more than 64 bytes (after padding each to an 8 byte boundary) - * of arguments, or pass individual arguments larger than 16 bytes. - */ - - instruction_pointer_set(regs, (unsigned long) jp->entry); - preempt_disable(); - pause_graph_tracing(); - return 1; -} - -void __kprobes jprobe_return(void) -{ - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - - /* - * Jprobe handler return by entering break exception, - * encoded same as kprobe, but with following conditions - * -a special PC to identify it from the other kprobes. - * -restore stack addr to original saved pt_regs - */ - asm volatile(" mov sp, %0 \n" - "jprobe_return_break: brk %1 \n" - : - : "r" (kcb->jprobe_saved_regs.sp), - "I" (BRK64_ESR_KPROBES) - : "memory"); - - unreachable(); -} - -int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) -{ - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - long stack_addr = kcb->jprobe_saved_regs.sp; - long orig_sp = kernel_stack_pointer(regs); - struct jprobe *jp = container_of(p, struct jprobe, kp); - extern const char jprobe_return_break[]; - - if (instruction_pointer(regs) != (u64) jprobe_return_break) - return 0; - - if (orig_sp != stack_addr) { - struct pt_regs *saved_regs = - (struct pt_regs *)kcb->jprobe_saved_regs.sp; - pr_err("current sp %lx does not match saved sp %lx\n", - orig_sp, stack_addr); - pr_err("Saved registers for jprobe %p\n", jp); - __show_regs(saved_regs); - pr_err("Current registers\n"); - __show_regs(regs); - BUG(); - } - unpause_graph_tracing(); - *regs = kcb->jprobe_saved_regs; - preempt_enable_no_resched(); - return 1; -} - bool arch_within_kprobe_blacklist(unsigned long addr) { if ((addr >= (unsigned long)__kprobes_text_start && diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index e10bc363f533..7f1628effe6d 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -177,16 +177,16 @@ static void print_pstate(struct pt_regs *regs) if (compat_user_mode(regs)) { printk("pstate: %08llx (%c%c%c%c %c %s %s %c%c%c)\n", pstate, - pstate & COMPAT_PSR_N_BIT ? 'N' : 'n', - pstate & COMPAT_PSR_Z_BIT ? 'Z' : 'z', - pstate & COMPAT_PSR_C_BIT ? 'C' : 'c', - pstate & COMPAT_PSR_V_BIT ? 'V' : 'v', - pstate & COMPAT_PSR_Q_BIT ? 'Q' : 'q', - pstate & COMPAT_PSR_T_BIT ? "T32" : "A32", - pstate & COMPAT_PSR_E_BIT ? "BE" : "LE", - pstate & COMPAT_PSR_A_BIT ? 'A' : 'a', - pstate & COMPAT_PSR_I_BIT ? 'I' : 'i', - pstate & COMPAT_PSR_F_BIT ? 'F' : 'f'); + pstate & PSR_AA32_N_BIT ? 'N' : 'n', + pstate & PSR_AA32_Z_BIT ? 'Z' : 'z', + pstate & PSR_AA32_C_BIT ? 'C' : 'c', + pstate & PSR_AA32_V_BIT ? 'V' : 'v', + pstate & PSR_AA32_Q_BIT ? 'Q' : 'q', + pstate & PSR_AA32_T_BIT ? "T32" : "A32", + pstate & PSR_AA32_E_BIT ? "BE" : "LE", + pstate & PSR_AA32_A_BIT ? 'A' : 'a', + pstate & PSR_AA32_I_BIT ? 'I' : 'i', + pstate & PSR_AA32_F_BIT ? 'F' : 'f'); } else { printk("pstate: %08llx (%c%c%c%c %c%c%c%c %cPAN %cUAO)\n", pstate, @@ -493,3 +493,25 @@ void arch_setup_new_exec(void) { current->mm->context.flags = is_compat_task() ? MMCF_AARCH32 : 0; } + +#ifdef CONFIG_GCC_PLUGIN_STACKLEAK +void __used stackleak_check_alloca(unsigned long size) +{ + unsigned long stack_left; + unsigned long current_sp = current_stack_pointer; + struct stack_info info; + + BUG_ON(!on_accessible_stack(current, current_sp, &info)); + + stack_left = current_sp - info.low; + + /* + * There's a good chance we're almost out of stack space if this + * is true. Using panic() over BUG() is more likely to give + * reliable debugging output. + */ + if (size >= stack_left) + panic("alloca() over the kernel stack boundary\n"); +} +EXPORT_SYMBOL(stackleak_check_alloca); +#endif diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 5c338ce5a7fa..6219486fa25f 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -132,7 +132,7 @@ static bool regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr) { return ((addr & ~(THREAD_SIZE - 1)) == (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1))) || - on_irq_stack(addr); + on_irq_stack(addr, NULL); } /** @@ -277,19 +277,22 @@ static int ptrace_hbp_set_event(unsigned int note_type, switch (note_type) { case NT_ARM_HW_BREAK: - if (idx < ARM_MAX_BRP) { - tsk->thread.debug.hbp_break[idx] = bp; - err = 0; - } + if (idx >= ARM_MAX_BRP) + goto out; + idx = array_index_nospec(idx, ARM_MAX_BRP); + tsk->thread.debug.hbp_break[idx] = bp; + err = 0; break; case NT_ARM_HW_WATCH: - if (idx < ARM_MAX_WRP) { - tsk->thread.debug.hbp_watch[idx] = bp; - err = 0; - } + if (idx >= ARM_MAX_WRP) + goto out; + idx = array_index_nospec(idx, ARM_MAX_WRP); + tsk->thread.debug.hbp_watch[idx] = bp; + err = 0; break; } +out: return err; } @@ -1076,6 +1079,7 @@ static int compat_gpr_get(struct task_struct *target, break; case 16: reg = task_pt_regs(target)->pstate; + reg = pstate_to_compat_psr(reg); break; case 17: reg = task_pt_regs(target)->orig_x0; @@ -1143,6 +1147,7 @@ static int compat_gpr_set(struct task_struct *target, newregs.pc = reg; break; case 16: + reg = compat_psr_to_pstate(reg); newregs.pstate = reg; break; case 17: @@ -1629,7 +1634,7 @@ static void tracehook_report_syscall(struct pt_regs *regs, regs->regs[regno] = saved_reg; } -asmlinkage int syscall_trace_enter(struct pt_regs *regs) +int syscall_trace_enter(struct pt_regs *regs) { if (test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER); @@ -1647,7 +1652,7 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs) return regs->syscallno; } -asmlinkage void syscall_trace_exit(struct pt_regs *regs) +void syscall_trace_exit(struct pt_regs *regs) { audit_syscall_exit(regs); @@ -1656,18 +1661,24 @@ asmlinkage void syscall_trace_exit(struct pt_regs *regs) if (test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT); + + rseq_syscall(regs); } /* - * Bits which are always architecturally RES0 per ARM DDI 0487A.h + * SPSR_ELx bits which are always architecturally RES0 per ARM DDI 0487C.a + * We also take into account DIT (bit 24), which is not yet documented, and + * treat PAN and UAO as RES0 bits, as they are meaningless at EL0, and may be + * allocated an EL0 meaning in future. * Userspace cannot use these until they have an architectural meaning. + * Note that this follows the SPSR_ELx format, not the AArch32 PSR format. * We also reserve IL for the kernel; SS is handled dynamically. */ #define SPSR_EL1_AARCH64_RES0_BITS \ - (GENMASK_ULL(63,32) | GENMASK_ULL(27, 22) | GENMASK_ULL(20, 10) | \ - GENMASK_ULL(5, 5)) + (GENMASK_ULL(63,32) | GENMASK_ULL(27, 25) | GENMASK_ULL(23, 22) | \ + GENMASK_ULL(20, 10) | GENMASK_ULL(5, 5)) #define SPSR_EL1_AARCH32_RES0_BITS \ - (GENMASK_ULL(63,32) | GENMASK_ULL(24, 22) | GENMASK_ULL(20,20)) + (GENMASK_ULL(63,32) | GENMASK_ULL(23, 22) | GENMASK_ULL(20,20)) static int valid_compat_regs(struct user_pt_regs *regs) { @@ -1675,15 +1686,15 @@ static int valid_compat_regs(struct user_pt_regs *regs) if (!system_supports_mixed_endian_el0()) { if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) - regs->pstate |= COMPAT_PSR_E_BIT; + regs->pstate |= PSR_AA32_E_BIT; else - regs->pstate &= ~COMPAT_PSR_E_BIT; + regs->pstate &= ~PSR_AA32_E_BIT; } if (user_mode(regs) && (regs->pstate & PSR_MODE32_BIT) && - (regs->pstate & COMPAT_PSR_A_BIT) == 0 && - (regs->pstate & COMPAT_PSR_I_BIT) == 0 && - (regs->pstate & COMPAT_PSR_F_BIT) == 0) { + (regs->pstate & PSR_AA32_A_BIT) == 0 && + (regs->pstate & PSR_AA32_I_BIT) == 0 && + (regs->pstate & PSR_AA32_F_BIT) == 0) { return 1; } @@ -1691,11 +1702,11 @@ static int valid_compat_regs(struct user_pt_regs *regs) * Force PSR to a valid 32-bit EL0t, preserving the same bits as * arch/arm. */ - regs->pstate &= COMPAT_PSR_N_BIT | COMPAT_PSR_Z_BIT | - COMPAT_PSR_C_BIT | COMPAT_PSR_V_BIT | - COMPAT_PSR_Q_BIT | COMPAT_PSR_IT_MASK | - COMPAT_PSR_GE_MASK | COMPAT_PSR_E_BIT | - COMPAT_PSR_T_BIT; + regs->pstate &= PSR_AA32_N_BIT | PSR_AA32_Z_BIT | + PSR_AA32_C_BIT | PSR_AA32_V_BIT | + PSR_AA32_Q_BIT | PSR_AA32_IT_MASK | + PSR_AA32_GE_MASK | PSR_AA32_E_BIT | + PSR_AA32_T_BIT; regs->pstate |= PSR_MODE32_BIT; return 0; diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c index 6b8d90d5ceae..5ba4465e44f0 100644 --- a/arch/arm64/kernel/sdei.c +++ b/arch/arm64/kernel/sdei.c @@ -13,6 +13,7 @@ #include <asm/mmu.h> #include <asm/ptrace.h> #include <asm/sections.h> +#include <asm/stacktrace.h> #include <asm/sysreg.h> #include <asm/vmap_stack.h> @@ -88,23 +89,52 @@ static int init_sdei_stacks(void) return err; } -bool _on_sdei_stack(unsigned long sp) +static bool on_sdei_normal_stack(unsigned long sp, struct stack_info *info) { - unsigned long low, high; + unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_normal_ptr); + unsigned long high = low + SDEI_STACK_SIZE; - if (!IS_ENABLED(CONFIG_VMAP_STACK)) + if (sp < low || sp >= high) return false; - low = (unsigned long)raw_cpu_read(sdei_stack_critical_ptr); - high = low + SDEI_STACK_SIZE; + if (info) { + info->low = low; + info->high = high; + info->type = STACK_TYPE_SDEI_NORMAL; + } - if (low <= sp && sp < high) + return true; +} + +static bool on_sdei_critical_stack(unsigned long sp, struct stack_info *info) +{ + unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_critical_ptr); + unsigned long high = low + SDEI_STACK_SIZE; + + if (sp < low || sp >= high) + return false; + + if (info) { + info->low = low; + info->high = high; + info->type = STACK_TYPE_SDEI_CRITICAL; + } + + return true; +} + +bool _on_sdei_stack(unsigned long sp, struct stack_info *info) +{ + if (!IS_ENABLED(CONFIG_VMAP_STACK)) + return false; + + if (on_sdei_critical_stack(sp, info)) return true; - low = (unsigned long)raw_cpu_read(sdei_stack_normal_ptr); - high = low + SDEI_STACK_SIZE; + if (on_sdei_normal_stack(sp, info)) + return true; - return (low <= sp && sp < high); + return false; } unsigned long sdei_arch_get_entry_point(int conduit) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 30ad2f085d1f..5b4fac434c84 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -241,6 +241,44 @@ static void __init request_standard_resources(void) } } +static int __init reserve_memblock_reserved_regions(void) +{ + phys_addr_t start, end, roundup_end = 0; + struct resource *mem, *res; + u64 i; + + for_each_reserved_mem_region(i, &start, &end) { + if (end <= roundup_end) + continue; /* done already */ + + start = __pfn_to_phys(PFN_DOWN(start)); + end = __pfn_to_phys(PFN_UP(end)) - 1; + roundup_end = end; + + res = kzalloc(sizeof(*res), GFP_ATOMIC); + if (WARN_ON(!res)) + return -ENOMEM; + res->start = start; + res->end = end; + res->name = "reserved"; + res->flags = IORESOURCE_MEM; + + mem = request_resource_conflict(&iomem_resource, res); + /* + * We expected memblock_reserve() regions to conflict with + * memory created by request_standard_resources(). + */ + if (WARN_ON_ONCE(!mem)) + continue; + kfree(res); + + reserve_region_with_split(mem, start, end, "reserved"); + } + + return 0; +} +arch_initcall(reserve_memblock_reserved_regions); + u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID }; void __init setup_arch(char **cmdline_p) diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 511af13e8d8f..5dcc942906db 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -539,8 +539,9 @@ static int restore_sigframe(struct pt_regs *regs, return err; } -asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) +SYSCALL_DEFINE0(rt_sigreturn) { + struct pt_regs *regs = current_pt_regs(); struct rt_sigframe __user *frame; /* Always make any pending restarted system calls return -EINTR */ @@ -802,6 +803,8 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) int usig = ksig->sig; int ret; + rseq_signal_deliver(ksig, regs); + /* * Set up the stack frame */ @@ -910,7 +913,7 @@ static void do_signal(struct pt_regs *regs) } asmlinkage void do_notify_resume(struct pt_regs *regs, - unsigned int thread_flags) + unsigned long thread_flags) { /* * The assembly code enters us with IRQs off, but it hasn't @@ -940,6 +943,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, if (thread_flags & _TIF_NOTIFY_RESUME) { clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); + rseq_handle_notify_resume(NULL, regs); } if (thread_flags & _TIF_FOREIGN_FPSTATE) diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 77b91f478995..24b09003f821 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -243,6 +243,7 @@ static int compat_restore_sigframe(struct pt_regs *regs, int err; sigset_t set; struct compat_aux_sigframe __user *aux; + unsigned long psr; err = get_sigset_t(&set, &sf->uc.uc_sigmask); if (err == 0) { @@ -266,7 +267,9 @@ static int compat_restore_sigframe(struct pt_regs *regs, __get_user_error(regs->compat_sp, &sf->uc.uc_mcontext.arm_sp, err); __get_user_error(regs->compat_lr, &sf->uc.uc_mcontext.arm_lr, err); __get_user_error(regs->pc, &sf->uc.uc_mcontext.arm_pc, err); - __get_user_error(regs->pstate, &sf->uc.uc_mcontext.arm_cpsr, err); + __get_user_error(psr, &sf->uc.uc_mcontext.arm_cpsr, err); + + regs->pstate = compat_psr_to_pstate(psr); /* * Avoid compat_sys_sigreturn() restarting. @@ -282,8 +285,9 @@ static int compat_restore_sigframe(struct pt_regs *regs, return err; } -asmlinkage int compat_sys_sigreturn(struct pt_regs *regs) +COMPAT_SYSCALL_DEFINE0(sigreturn) { + struct pt_regs *regs = current_pt_regs(); struct compat_sigframe __user *frame; /* Always make any pending restarted system calls return -EINTR */ @@ -312,8 +316,9 @@ badframe: return 0; } -asmlinkage int compat_sys_rt_sigreturn(struct pt_regs *regs) +COMPAT_SYSCALL_DEFINE0(rt_sigreturn) { + struct pt_regs *regs = current_pt_regs(); struct compat_rt_sigframe __user *frame; /* Always make any pending restarted system calls return -EINTR */ @@ -372,22 +377,22 @@ static void compat_setup_return(struct pt_regs *regs, struct k_sigaction *ka, { compat_ulong_t handler = ptr_to_compat(ka->sa.sa_handler); compat_ulong_t retcode; - compat_ulong_t spsr = regs->pstate & ~(PSR_f | COMPAT_PSR_E_BIT); + compat_ulong_t spsr = regs->pstate & ~(PSR_f | PSR_AA32_E_BIT); int thumb; /* Check if the handler is written for ARM or Thumb */ thumb = handler & 1; if (thumb) - spsr |= COMPAT_PSR_T_BIT; + spsr |= PSR_AA32_T_BIT; else - spsr &= ~COMPAT_PSR_T_BIT; + spsr &= ~PSR_AA32_T_BIT; /* The IT state must be cleared for both ARM and Thumb-2 */ - spsr &= ~COMPAT_PSR_IT_MASK; + spsr &= ~PSR_AA32_IT_MASK; /* Restore the original endianness */ - spsr |= COMPAT_PSR_ENDSTATE; + spsr |= PSR_AA32_ENDSTATE; if (ka->sa.sa_flags & SA_RESTORER) { retcode = ptr_to_compat(ka->sa.sa_restorer); @@ -414,6 +419,7 @@ static int compat_setup_sigframe(struct compat_sigframe __user *sf, struct pt_regs *regs, sigset_t *set) { struct compat_aux_sigframe __user *aux; + unsigned long psr = pstate_to_compat_psr(regs->pstate); int err = 0; __put_user_error(regs->regs[0], &sf->uc.uc_mcontext.arm_r0, err); @@ -432,7 +438,7 @@ static int compat_setup_sigframe(struct compat_sigframe __user *sf, __put_user_error(regs->compat_sp, &sf->uc.uc_mcontext.arm_sp, err); __put_user_error(regs->compat_lr, &sf->uc.uc_mcontext.arm_lr, err); __put_user_error(regs->pc, &sf->uc.uc_mcontext.arm_pc, err); - __put_user_error(regs->pstate, &sf->uc.uc_mcontext.arm_cpsr, err); + __put_user_error(psr, &sf->uc.uc_mcontext.arm_cpsr, err); __put_user_error((compat_ulong_t)0, &sf->uc.uc_mcontext.trap_no, err); /* set the compat FSR WnR */ diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 2faa9863d2e5..25fcd22a4bb2 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -225,6 +225,7 @@ asmlinkage notrace void secondary_start_kernel(void) notify_cpu_starting(cpu); store_cpu_topology(cpu); + numa_add_cpu(cpu); /* * OK, now it's safe to let the boot CPU continue. Wait for @@ -278,6 +279,9 @@ int __cpu_disable(void) if (ret) return ret; + remove_cpu_topology(cpu); + numa_remove_cpu(cpu); + /* * Take this CPU offline. Once we clear this, we can't return, * and we must not schedule until we're ready to give up the cpu. @@ -518,7 +522,6 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor) } bootcpu_valid = true; cpu_madt_gicc[0] = *processor; - early_map_cpu_to_node(0, acpi_numa_get_nid(0, hwid)); return; } @@ -541,8 +544,6 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor) */ acpi_set_mailbox_entry(cpu_count, processor); - early_map_cpu_to_node(cpu_count, acpi_numa_get_nid(cpu_count, hwid)); - cpu_count++; } @@ -562,8 +563,34 @@ acpi_parse_gic_cpu_interface(struct acpi_subtable_header *header, return 0; } + +static void __init acpi_parse_and_init_cpus(void) +{ + int i; + + /* + * do a walk of MADT to determine how many CPUs + * we have including disabled CPUs, and get information + * we need for SMP init. + */ + acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT, + acpi_parse_gic_cpu_interface, 0); + + /* + * In ACPI, SMP and CPU NUMA information is provided in separate + * static tables, namely the MADT and the SRAT. + * + * Thus, it is simpler to first create the cpu logical map through + * an MADT walk and then map the logical cpus to their node ids + * as separate steps. + */ + acpi_map_cpus_to_nodes(); + + for (i = 0; i < nr_cpu_ids; i++) + early_map_cpu_to_node(i, acpi_numa_get_nid(i)); +} #else -#define acpi_table_parse_madt(...) do { } while (0) +#define acpi_parse_and_init_cpus(...) do { } while (0) #endif /* @@ -636,13 +663,7 @@ void __init smp_init_cpus(void) if (acpi_disabled) of_parse_and_init_cpus(); else - /* - * do a walk of MADT to determine how many CPUs - * we have including disabled CPUs, and get information - * we need for SMP init - */ - acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT, - acpi_parse_gic_cpu_interface, 0); + acpi_parse_and_init_cpus(); if (cpu_count > nr_cpu_ids) pr_warn("Number of cores (%d) exceeds configured maximum of %u - clipping\n", @@ -679,6 +700,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) this_cpu = smp_processor_id(); store_cpu_topology(this_cpu); numa_store_cpu_info(this_cpu); + numa_add_cpu(this_cpu); /* * If UP is mandated by "nosmp" (which implies "maxcpus=0"), don't set diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index d5718a060672..4989f7ea1e59 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -50,7 +50,7 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) if (!tsk) tsk = current; - if (!on_accessible_stack(tsk, fp)) + if (!on_accessible_stack(tsk, fp, NULL)) return -EINVAL; frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp)); diff --git a/arch/arm64/kernel/sys.c b/arch/arm64/kernel/sys.c index 72981bae10eb..b44065fb1616 100644 --- a/arch/arm64/kernel/sys.c +++ b/arch/arm64/kernel/sys.c @@ -25,11 +25,13 @@ #include <linux/sched.h> #include <linux/slab.h> #include <linux/syscalls.h> + #include <asm/cpufeature.h> +#include <asm/syscall.h> -asmlinkage long sys_mmap(unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, off_t off) +SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, + unsigned long, prot, unsigned long, flags, + unsigned long, fd, off_t, off) { if (offset_in_page(off) != 0) return -EINVAL; @@ -42,24 +44,25 @@ SYSCALL_DEFINE1(arm64_personality, unsigned int, personality) if (personality(personality) == PER_LINUX32 && !system_supports_32bit_el0()) return -EINVAL; - return sys_personality(personality); + return ksys_personality(personality); } /* * Wrappers to pass the pt_regs argument. */ -asmlinkage long sys_rt_sigreturn_wrapper(void); -#define sys_rt_sigreturn sys_rt_sigreturn_wrapper #define sys_personality sys_arm64_personality +asmlinkage long sys_ni_syscall(const struct pt_regs *); +#define __arm64_sys_ni_syscall sys_ni_syscall + #undef __SYSCALL -#define __SYSCALL(nr, sym) [nr] = sym, +#define __SYSCALL(nr, sym) asmlinkage long __arm64_##sym(const struct pt_regs *); +#include <asm/unistd.h> -/* - * The sys_call_table array must be 4K aligned to be accessible from - * kernel/entry.S. - */ -void * const sys_call_table[__NR_syscalls] __aligned(4096) = { - [0 ... __NR_syscalls - 1] = sys_ni_syscall, +#undef __SYSCALL +#define __SYSCALL(nr, sym) [nr] = (syscall_fn_t)__arm64_##sym, + +const syscall_fn_t sys_call_table[__NR_syscalls] = { + [0 ... __NR_syscalls - 1] = (syscall_fn_t)sys_ni_syscall, #include <asm/unistd.h> }; diff --git a/arch/arm64/kernel/sys32.c b/arch/arm64/kernel/sys32.c index a40b1343b819..0f8bcb7de700 100644 --- a/arch/arm64/kernel/sys32.c +++ b/arch/arm64/kernel/sys32.c @@ -22,31 +22,128 @@ */ #define __COMPAT_SYSCALL_NR +#include <linux/compat.h> #include <linux/compiler.h> #include <linux/syscalls.h> -asmlinkage long compat_sys_sigreturn_wrapper(void); -asmlinkage long compat_sys_rt_sigreturn_wrapper(void); -asmlinkage long compat_sys_statfs64_wrapper(void); -asmlinkage long compat_sys_fstatfs64_wrapper(void); -asmlinkage long compat_sys_pread64_wrapper(void); -asmlinkage long compat_sys_pwrite64_wrapper(void); -asmlinkage long compat_sys_truncate64_wrapper(void); -asmlinkage long compat_sys_ftruncate64_wrapper(void); -asmlinkage long compat_sys_readahead_wrapper(void); -asmlinkage long compat_sys_fadvise64_64_wrapper(void); -asmlinkage long compat_sys_sync_file_range2_wrapper(void); -asmlinkage long compat_sys_fallocate_wrapper(void); -asmlinkage long compat_sys_mmap2_wrapper(void); +#include <asm/syscall.h> -#undef __SYSCALL -#define __SYSCALL(nr, sym) [nr] = sym, +asmlinkage long compat_sys_sigreturn(void); +asmlinkage long compat_sys_rt_sigreturn(void); + +COMPAT_SYSCALL_DEFINE3(aarch32_statfs64, const char __user *, pathname, + compat_size_t, sz, struct compat_statfs64 __user *, buf) +{ + /* + * 32-bit ARM applies an OABI compatibility fixup to statfs64 and + * fstatfs64 regardless of whether OABI is in use, and therefore + * arbitrary binaries may rely upon it, so we must do the same. + * For more details, see commit: + * + * 713c481519f19df9 ("[ARM] 3108/2: old ABI compat: statfs64 and + * fstatfs64") + */ + if (sz == 88) + sz = 84; + + return kcompat_sys_statfs64(pathname, sz, buf); +} + +COMPAT_SYSCALL_DEFINE3(aarch32_fstatfs64, unsigned int, fd, compat_size_t, sz, + struct compat_statfs64 __user *, buf) +{ + /* see aarch32_statfs64 */ + if (sz == 88) + sz = 84; + + return kcompat_sys_fstatfs64(fd, sz, buf); +} /* - * The sys_call_table array must be 4K aligned to be accessible from - * kernel/entry.S. + * Note: off_4k is always in units of 4K. If we can't do the + * requested offset because it is not page-aligned, we return -EINVAL. */ -void * const compat_sys_call_table[__NR_compat_syscalls] __aligned(4096) = { - [0 ... __NR_compat_syscalls - 1] = sys_ni_syscall, +COMPAT_SYSCALL_DEFINE6(aarch32_mmap2, unsigned long, addr, unsigned long, len, + unsigned long, prot, unsigned long, flags, + unsigned long, fd, unsigned long, off_4k) +{ + if (off_4k & (~PAGE_MASK >> 12)) + return -EINVAL; + + off_4k >>= (PAGE_SHIFT - 12); + + return ksys_mmap_pgoff(addr, len, prot, flags, fd, off_4k); +} + +#ifdef CONFIG_CPU_BIG_ENDIAN +#define arg_u32p(name) u32, name##_hi, u32, name##_lo +#else +#define arg_u32p(name) u32, name##_lo, u32, name##_hi +#endif + +#define arg_u64(name) (((u64)name##_hi << 32) | name##_lo) + +COMPAT_SYSCALL_DEFINE6(aarch32_pread64, unsigned int, fd, char __user *, buf, + size_t, count, u32, __pad, arg_u32p(pos)) +{ + return ksys_pread64(fd, buf, count, arg_u64(pos)); +} + +COMPAT_SYSCALL_DEFINE6(aarch32_pwrite64, unsigned int, fd, + const char __user *, buf, size_t, count, u32, __pad, + arg_u32p(pos)) +{ + return ksys_pwrite64(fd, buf, count, arg_u64(pos)); +} + +COMPAT_SYSCALL_DEFINE4(aarch32_truncate64, const char __user *, pathname, + u32, __pad, arg_u32p(length)) +{ + return ksys_truncate(pathname, arg_u64(length)); +} + +COMPAT_SYSCALL_DEFINE4(aarch32_ftruncate64, unsigned int, fd, u32, __pad, + arg_u32p(length)) +{ + return ksys_ftruncate(fd, arg_u64(length)); +} + +COMPAT_SYSCALL_DEFINE5(aarch32_readahead, int, fd, u32, __pad, + arg_u32p(offset), size_t, count) +{ + return ksys_readahead(fd, arg_u64(offset), count); +} + +COMPAT_SYSCALL_DEFINE6(aarch32_fadvise64_64, int, fd, int, advice, + arg_u32p(offset), arg_u32p(len)) +{ + return ksys_fadvise64_64(fd, arg_u64(offset), arg_u64(len), advice); +} + +COMPAT_SYSCALL_DEFINE6(aarch32_sync_file_range2, int, fd, unsigned int, flags, + arg_u32p(offset), arg_u32p(nbytes)) +{ + return ksys_sync_file_range(fd, arg_u64(offset), arg_u64(nbytes), + flags); +} + +COMPAT_SYSCALL_DEFINE6(aarch32_fallocate, int, fd, int, mode, + arg_u32p(offset), arg_u32p(len)) +{ + return ksys_fallocate(fd, mode, arg_u64(offset), arg_u64(len)); +} + +asmlinkage long sys_ni_syscall(const struct pt_regs *); +#define __arm64_sys_ni_syscall sys_ni_syscall + +#undef __SYSCALL +#define __SYSCALL(nr, sym) asmlinkage long __arm64_##sym(const struct pt_regs *); +#include <asm/unistd32.h> + +#undef __SYSCALL +#define __SYSCALL(nr, sym) [nr] = (syscall_fn_t)__arm64_##sym, + +const syscall_fn_t compat_sys_call_table[__NR_compat_syscalls] = { + [0 ... __NR_compat_syscalls - 1] = (syscall_fn_t)sys_ni_syscall, #include <asm/unistd32.h> }; diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c new file mode 100644 index 000000000000..032d22312881 --- /dev/null +++ b/arch/arm64/kernel/syscall.c @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/compiler.h> +#include <linux/context_tracking.h> +#include <linux/errno.h> +#include <linux/nospec.h> +#include <linux/ptrace.h> +#include <linux/syscalls.h> + +#include <asm/daifflags.h> +#include <asm/fpsimd.h> +#include <asm/syscall.h> +#include <asm/thread_info.h> +#include <asm/unistd.h> + +long compat_arm_syscall(struct pt_regs *regs); + +long sys_ni_syscall(void); + +asmlinkage long do_ni_syscall(struct pt_regs *regs) +{ +#ifdef CONFIG_COMPAT + long ret; + if (is_compat_task()) { + ret = compat_arm_syscall(regs); + if (ret != -ENOSYS) + return ret; + } +#endif + + return sys_ni_syscall(); +} + +static long __invoke_syscall(struct pt_regs *regs, syscall_fn_t syscall_fn) +{ + return syscall_fn(regs); +} + +static void invoke_syscall(struct pt_regs *regs, unsigned int scno, + unsigned int sc_nr, + const syscall_fn_t syscall_table[]) +{ + long ret; + + if (scno < sc_nr) { + syscall_fn_t syscall_fn; + syscall_fn = syscall_table[array_index_nospec(scno, sc_nr)]; + ret = __invoke_syscall(regs, syscall_fn); + } else { + ret = do_ni_syscall(regs); + } + + regs->regs[0] = ret; +} + +static inline bool has_syscall_work(unsigned long flags) +{ + return unlikely(flags & _TIF_SYSCALL_WORK); +} + +int syscall_trace_enter(struct pt_regs *regs); +void syscall_trace_exit(struct pt_regs *regs); + +static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, + const syscall_fn_t syscall_table[]) +{ + unsigned long flags = current_thread_info()->flags; + + regs->orig_x0 = regs->regs[0]; + regs->syscallno = scno; + + local_daif_restore(DAIF_PROCCTX); + user_exit(); + + if (has_syscall_work(flags)) { + /* set default errno for user-issued syscall(-1) */ + if (scno == NO_SYSCALL) + regs->regs[0] = -ENOSYS; + scno = syscall_trace_enter(regs); + if (scno == NO_SYSCALL) + goto trace_exit; + } + + invoke_syscall(regs, scno, sc_nr, syscall_table); + + /* + * The tracing status may have changed under our feet, so we have to + * check again. However, if we were tracing entry, then we always trace + * exit regardless, as the old entry assembly did. + */ + if (!has_syscall_work(flags) && !IS_ENABLED(CONFIG_DEBUG_RSEQ)) { + local_daif_mask(); + flags = current_thread_info()->flags; + if (!has_syscall_work(flags)) { + /* + * We're off to userspace, where interrupts are + * always enabled after we restore the flags from + * the SPSR. + */ + trace_hardirqs_on(); + return; + } + local_daif_restore(DAIF_PROCCTX); + } + +trace_exit: + syscall_trace_exit(regs); +} + +static inline void sve_user_discard(void) +{ + if (!system_supports_sve()) + return; + + clear_thread_flag(TIF_SVE); + + /* + * task_fpsimd_load() won't be called to update CPACR_EL1 in + * ret_to_user unless TIF_FOREIGN_FPSTATE is still set, which only + * happens if a context switch or kernel_neon_begin() or context + * modification (sigreturn, ptrace) intervenes. + * So, ensure that CPACR_EL1 is already correct for the fast-path case. + */ + sve_user_disable(); +} + +asmlinkage void el0_svc_handler(struct pt_regs *regs) +{ + sve_user_discard(); + el0_svc_common(regs, regs->regs[8], __NR_syscalls, sys_call_table); +} + +#ifdef CONFIG_COMPAT +asmlinkage void el0_svc_compat_handler(struct pt_regs *regs) +{ + el0_svc_common(regs, regs->regs[7], __NR_compat_syscalls, + compat_sys_call_table); +} +#endif diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index f845a8617812..0825c4a856e3 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -215,11 +215,16 @@ EXPORT_SYMBOL_GPL(cpu_topology); const struct cpumask *cpu_coregroup_mask(int cpu) { - const cpumask_t *core_mask = &cpu_topology[cpu].core_sibling; + const cpumask_t *core_mask = cpumask_of_node(cpu_to_node(cpu)); + /* Find the smaller of NUMA, core or LLC siblings */ + if (cpumask_subset(&cpu_topology[cpu].core_sibling, core_mask)) { + /* not numa in package, lets use the package siblings */ + core_mask = &cpu_topology[cpu].core_sibling; + } if (cpu_topology[cpu].llc_id != -1) { - if (cpumask_subset(&cpu_topology[cpu].llc_siblings, core_mask)) - core_mask = &cpu_topology[cpu].llc_siblings; + if (cpumask_subset(&cpu_topology[cpu].llc_sibling, core_mask)) + core_mask = &cpu_topology[cpu].llc_sibling; } return core_mask; @@ -231,27 +236,25 @@ static void update_siblings_masks(unsigned int cpuid) int cpu; /* update core and thread sibling masks */ - for_each_possible_cpu(cpu) { + for_each_online_cpu(cpu) { cpu_topo = &cpu_topology[cpu]; if (cpuid_topo->llc_id == cpu_topo->llc_id) { - cpumask_set_cpu(cpu, &cpuid_topo->llc_siblings); - cpumask_set_cpu(cpuid, &cpu_topo->llc_siblings); + cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling); + cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling); } if (cpuid_topo->package_id != cpu_topo->package_id) continue; cpumask_set_cpu(cpuid, &cpu_topo->core_sibling); - if (cpu != cpuid) - cpumask_set_cpu(cpu, &cpuid_topo->core_sibling); + cpumask_set_cpu(cpu, &cpuid_topo->core_sibling); if (cpuid_topo->core_id != cpu_topo->core_id) continue; cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling); - if (cpu != cpuid) - cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling); + cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling); } } @@ -293,6 +296,19 @@ topology_populated: update_siblings_masks(cpuid); } +static void clear_cpu_topology(int cpu) +{ + struct cpu_topology *cpu_topo = &cpu_topology[cpu]; + + cpumask_clear(&cpu_topo->llc_sibling); + cpumask_set_cpu(cpu, &cpu_topo->llc_sibling); + + cpumask_clear(&cpu_topo->core_sibling); + cpumask_set_cpu(cpu, &cpu_topo->core_sibling); + cpumask_clear(&cpu_topo->thread_sibling); + cpumask_set_cpu(cpu, &cpu_topo->thread_sibling); +} + static void __init reset_cpu_topology(void) { unsigned int cpu; @@ -303,18 +319,26 @@ static void __init reset_cpu_topology(void) cpu_topo->thread_id = -1; cpu_topo->core_id = 0; cpu_topo->package_id = -1; - cpu_topo->llc_id = -1; - cpumask_clear(&cpu_topo->llc_siblings); - cpumask_set_cpu(cpu, &cpu_topo->llc_siblings); - cpumask_clear(&cpu_topo->core_sibling); - cpumask_set_cpu(cpu, &cpu_topo->core_sibling); - cpumask_clear(&cpu_topo->thread_sibling); - cpumask_set_cpu(cpu, &cpu_topo->thread_sibling); + clear_cpu_topology(cpu); } } +void remove_cpu_topology(unsigned int cpu) +{ + int sibling; + + for_each_cpu(sibling, topology_core_cpumask(cpu)) + cpumask_clear_cpu(cpu, topology_core_cpumask(sibling)); + for_each_cpu(sibling, topology_sibling_cpumask(cpu)) + cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling)); + for_each_cpu(sibling, topology_llc_cpumask(cpu)) + cpumask_clear_cpu(cpu, topology_llc_cpumask(sibling)); + + clear_cpu_topology(cpu); +} + #ifdef CONFIG_ACPI /* * Propagate the topology information of the processor_topology_node tree to the diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index d399d459397b..039e9ff379cc 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -411,7 +411,7 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs) void cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused) { - config_sctlr_el1(SCTLR_EL1_UCI, 0); + sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCI, 0); } #define __user_cache_maint(insn, address, res) \ @@ -547,22 +547,6 @@ asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs) do_undefinstr(regs); } -long compat_arm_syscall(struct pt_regs *regs); - -asmlinkage long do_ni_syscall(struct pt_regs *regs) -{ -#ifdef CONFIG_COMPAT - long ret; - if (is_compat_task()) { - ret = compat_arm_syscall(regs); - if (ret != -ENOSYS) - return ret; - } -#endif - - return sys_ni_syscall(); -} - static const char *esr_class_str[] = { [0 ... ESR_ELx_EC_MAX] = "UNRECOGNIZED EC", [ESR_ELx_EC_UNKNOWN] = "Unknown/Uncategorized", diff --git a/arch/arm64/kernel/vdso/note.S b/arch/arm64/kernel/vdso/note.S index b82c85e5d972..e20483b104d9 100644 --- a/arch/arm64/kernel/vdso/note.S +++ b/arch/arm64/kernel/vdso/note.S @@ -22,7 +22,10 @@ #include <linux/uts.h> #include <linux/version.h> #include <linux/elfnote.h> +#include <linux/build-salt.h> ELFNOTE_START(Linux, 0, "a") .long LINUX_VERSION_CODE ELFNOTE_END + +BUILD_SALT |