summaryrefslogtreecommitdiffstats
path: root/arch/arm64/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-09-16 06:55:07 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2024-09-16 06:55:07 +0200
commit114143a595895c03fbefccfd8346fc51fb4908ed (patch)
tree161f753ad69a2aabe7cd7ac7cc493c3678dbd760 /arch/arm64/kernel
parentMerge tag 'mips_6.12' of git://git.kernel.org/pub/scm/linux/kernel/git/mips/l... (diff)
parentMerge branch 'for-next/timers' into for-next/core (diff)
downloadlinux-114143a595895c03fbefccfd8346fc51fb4908ed.tar.xz
linux-114143a595895c03fbefccfd8346fc51fb4908ed.zip
Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
Pull arm64 updates from Will Deacon: "The highlights are support for Arm's "Permission Overlay Extension" using memory protection keys, support for running as a protected guest on Android as well as perf support for a bunch of new interconnect PMUs. Summary: ACPI: - Enable PMCG erratum workaround for HiSilicon HIP10 and 11 platforms. - Ensure arm64-specific IORT header is covered by MAINTAINERS. CPU Errata: - Enable workaround for hardware access/dirty issue on Ampere-1A cores. Memory management: - Define PHYSMEM_END to fix a crash in the amdgpu driver. - Avoid tripping over invalid kernel mappings on the kexec() path. - Userspace support for the Permission Overlay Extension (POE) using protection keys. Perf and PMUs: - Add support for the "fixed instruction counter" extension in the CPU PMU architecture. - Extend and fix the event encodings for Apple's M1 CPU PMU. - Allow LSM hooks to decide on SPE permissions for physical profiling. - Add support for the CMN S3 and NI-700 PMUs. Confidential Computing: - Add support for booting an arm64 kernel as a protected guest under Android's "Protected KVM" (pKVM) hypervisor. Selftests: - Fix vector length issues in the SVE/SME sigreturn tests - Fix build warning in the ptrace tests. Timers: - Add support for PR_{G,S}ET_TSC so that 'rr' can deal with non-determinism arising from the architected counter. Miscellaneous: - Rework our IPI-based CPU stopping code to try NMIs if regular IPIs don't succeed. - Minor fixes and cleanups" * tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (94 commits) perf: arm-ni: Fix an NULL vs IS_ERR() bug arm64: hibernate: Fix warning for cast from restricted gfp_t arm64: esr: Define ESR_ELx_EC_* constants as UL arm64: pkeys: remove redundant WARN perf: arm_pmuv3: Use BR_RETIRED for HW branch event if enabled MAINTAINERS: List Arm interconnect PMUs as supported perf: Add driver for Arm NI-700 interconnect PMU dt-bindings/perf: Add Arm NI-700 PMU perf/arm-cmn: Improve format attr printing perf/arm-cmn: Clean up unnecessary NUMA_NO_NODE check arm64/mm: use lm_alias() with addresses passed to memblock_free() mm: arm64: document why pte is not advanced in contpte_ptep_set_access_flags() arm64: Expose the end of the linear map in PHYSMEM_END arm64: trans_pgd: mark PTEs entries as valid to avoid dead kexec() arm64/mm: Delete __init region from memblock.reserved perf/arm-cmn: Support CMN S3 dt-bindings: perf: arm-cmn: Add CMN S3 perf/arm-cmn: Refactor DTC PMU register access perf/arm-cmn: Make cycle counts less surprising perf/arm-cmn: Improve build-time assertion ...
Diffstat (limited to 'arch/arm64/kernel')
-rw-r--r--arch/arm64/kernel/cpu_errata.c10
-rw-r--r--arch/arm64/kernel/cpufeature.c23
-rw-r--r--arch/arm64/kernel/cpuinfo.c3
-rw-r--r--arch/arm64/kernel/hibernate.c2
-rw-r--r--arch/arm64/kernel/process.c97
-rw-r--r--arch/arm64/kernel/ptrace.c46
-rw-r--r--arch/arm64/kernel/signal.c62
-rw-r--r--arch/arm64/kernel/smp.c160
-rw-r--r--arch/arm64/kernel/traps.c26
9 files changed, 345 insertions, 84 deletions
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index f6b6b4507357..dfefbdf4073a 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -456,6 +456,14 @@ static const struct midr_range erratum_spec_ssbs_list[] = {
};
#endif
+#ifdef CONFIG_AMPERE_ERRATUM_AC03_CPU_38
+static const struct midr_range erratum_ac03_cpu_38_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_AMPERE1),
+ MIDR_ALL_VERSIONS(MIDR_AMPERE1A),
+ {},
+};
+#endif
+
const struct arm64_cpu_capabilities arm64_errata[] = {
#ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE
{
@@ -772,7 +780,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
{
.desc = "AmpereOne erratum AC03_CPU_38",
.capability = ARM64_WORKAROUND_AMPERE_AC03_CPU_38,
- ERRATA_MIDR_ALL_VERSIONS(MIDR_AMPERE1),
+ ERRATA_MIDR_RANGE_LIST(erratum_ac03_cpu_38_list),
},
#endif
{
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 646ecd3069fd..718728a85430 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -466,6 +466,8 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
};
static const struct arm64_ftr_bits ftr_id_aa64mmfr3[] = {
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_POE),
+ FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_S1POE_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_S1PIE_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_TCRX_SHIFT, 4, 0),
ARM64_FTR_END,
@@ -2348,6 +2350,14 @@ static void cpu_enable_mops(const struct arm64_cpu_capabilities *__unused)
sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_MSCEn);
}
+#ifdef CONFIG_ARM64_POE
+static void cpu_enable_poe(const struct arm64_cpu_capabilities *__unused)
+{
+ sysreg_clear_set(REG_TCR2_EL1, 0, TCR2_EL1x_E0POE);
+ sysreg_clear_set(CPACR_EL1, 0, CPACR_ELx_E0POE);
+}
+#endif
+
/* Internal helper functions to match cpu capability type */
static bool
cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap)
@@ -2870,6 +2880,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_nv1,
ARM64_CPUID_FIELDS_NEG(ID_AA64MMFR4_EL1, E2H0, NI_NV1)
},
+#ifdef CONFIG_ARM64_POE
+ {
+ .desc = "Stage-1 Permission Overlay Extension (S1POE)",
+ .capability = ARM64_HAS_S1POE,
+ .type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
+ .matches = has_cpuid_feature,
+ .cpu_enable = cpu_enable_poe,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR3_EL1, S1POE, IMP)
+ },
+#endif
{},
};
@@ -3034,6 +3054,9 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(ID_AA64FPFR0_EL1, F8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_F8DP2),
HWCAP_CAP(ID_AA64FPFR0_EL1, F8E4M3, IMP, CAP_HWCAP, KERNEL_HWCAP_F8E4M3),
HWCAP_CAP(ID_AA64FPFR0_EL1, F8E5M2, IMP, CAP_HWCAP, KERNEL_HWCAP_F8E5M2),
+#ifdef CONFIG_ARM64_POE
+ HWCAP_CAP(ID_AA64MMFR3_EL1, S1POE, IMP, CAP_HWCAP, KERNEL_HWCAP_POE),
+#endif
{},
};
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 09eeaa24d456..44718d0482b3 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -143,6 +143,7 @@ static const char *const hwcap_str[] = {
[KERNEL_HWCAP_SME_SF8FMA] = "smesf8fma",
[KERNEL_HWCAP_SME_SF8DP4] = "smesf8dp4",
[KERNEL_HWCAP_SME_SF8DP2] = "smesf8dp2",
+ [KERNEL_HWCAP_POE] = "poe",
};
#ifdef CONFIG_COMPAT
@@ -280,7 +281,7 @@ const struct seq_operations cpuinfo_op = {
};
-static struct kobj_type cpuregs_kobj_type = {
+static const struct kobj_type cpuregs_kobj_type = {
.sysfs_ops = &kobj_sysfs_ops,
};
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index 02870beb271e..7b11d84f533c 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -407,7 +407,7 @@ int swsusp_arch_resume(void)
void *, phys_addr_t, phys_addr_t);
struct trans_pgd_info trans_info = {
.trans_alloc_page = hibernate_page_alloc,
- .trans_alloc_arg = (void *)GFP_ATOMIC,
+ .trans_alloc_arg = (__force void *)GFP_ATOMIC,
};
/*
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 4ae31b7af6c3..0540653fbf38 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -43,6 +43,7 @@
#include <linux/stacktrace.h>
#include <asm/alternative.h>
+#include <asm/arch_timer.h>
#include <asm/compat.h>
#include <asm/cpufeature.h>
#include <asm/cacheflush.h>
@@ -271,12 +272,21 @@ static void flush_tagged_addr_state(void)
clear_thread_flag(TIF_TAGGED_ADDR);
}
+static void flush_poe(void)
+{
+ if (!system_supports_poe())
+ return;
+
+ write_sysreg_s(POR_EL0_INIT, SYS_POR_EL0);
+}
+
void flush_thread(void)
{
fpsimd_flush_thread();
tls_thread_flush();
flush_ptrace_hw_breakpoint(current);
flush_tagged_addr_state();
+ flush_poe();
}
void arch_release_task_struct(struct task_struct *tsk)
@@ -371,6 +381,9 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
if (system_supports_tpidr2())
p->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0);
+ if (system_supports_poe())
+ p->thread.por_el0 = read_sysreg_s(SYS_POR_EL0);
+
if (stack_start) {
if (is_compat_thread(task_thread_info(p)))
childregs->compat_sp = stack_start;
@@ -472,27 +485,63 @@ static void entry_task_switch(struct task_struct *next)
}
/*
- * ARM erratum 1418040 handling, affecting the 32bit view of CNTVCT.
- * Ensure access is disabled when switching to a 32bit task, ensure
- * access is enabled when switching to a 64bit task.
+ * Handle sysreg updates for ARM erratum 1418040 which affects the 32bit view of
+ * CNTVCT, various other errata which require trapping all CNTVCT{,_EL0}
+ * accesses and prctl(PR_SET_TSC). Ensure access is disabled iff a workaround is
+ * required or PR_TSC_SIGSEGV is set.
*/
-static void erratum_1418040_thread_switch(struct task_struct *next)
+static void update_cntkctl_el1(struct task_struct *next)
{
- if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040) ||
- !this_cpu_has_cap(ARM64_WORKAROUND_1418040))
- return;
+ struct thread_info *ti = task_thread_info(next);
- if (is_compat_thread(task_thread_info(next)))
+ if (test_ti_thread_flag(ti, TIF_TSC_SIGSEGV) ||
+ has_erratum_handler(read_cntvct_el0) ||
+ (IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040) &&
+ this_cpu_has_cap(ARM64_WORKAROUND_1418040) &&
+ is_compat_thread(ti)))
sysreg_clear_set(cntkctl_el1, ARCH_TIMER_USR_VCT_ACCESS_EN, 0);
else
sysreg_clear_set(cntkctl_el1, 0, ARCH_TIMER_USR_VCT_ACCESS_EN);
}
-static void erratum_1418040_new_exec(void)
+static void cntkctl_thread_switch(struct task_struct *prev,
+ struct task_struct *next)
+{
+ if ((read_ti_thread_flags(task_thread_info(prev)) &
+ (_TIF_32BIT | _TIF_TSC_SIGSEGV)) !=
+ (read_ti_thread_flags(task_thread_info(next)) &
+ (_TIF_32BIT | _TIF_TSC_SIGSEGV)))
+ update_cntkctl_el1(next);
+}
+
+static int do_set_tsc_mode(unsigned int val)
{
+ bool tsc_sigsegv;
+
+ if (val == PR_TSC_SIGSEGV)
+ tsc_sigsegv = true;
+ else if (val == PR_TSC_ENABLE)
+ tsc_sigsegv = false;
+ else
+ return -EINVAL;
+
preempt_disable();
- erratum_1418040_thread_switch(current);
+ update_thread_flag(TIF_TSC_SIGSEGV, tsc_sigsegv);
+ update_cntkctl_el1(current);
preempt_enable();
+
+ return 0;
+}
+
+static void permission_overlay_switch(struct task_struct *next)
+{
+ if (!system_supports_poe())
+ return;
+
+ current->thread.por_el0 = read_sysreg_s(SYS_POR_EL0);
+ if (current->thread.por_el0 != next->thread.por_el0) {
+ write_sysreg_s(next->thread.por_el0, SYS_POR_EL0);
+ }
}
/*
@@ -528,8 +577,9 @@ struct task_struct *__switch_to(struct task_struct *prev,
contextidr_thread_switch(next);
entry_task_switch(next);
ssbs_thread_switch(next);
- erratum_1418040_thread_switch(next);
+ cntkctl_thread_switch(prev, next);
ptrauth_thread_switch_user(next);
+ permission_overlay_switch(next);
/*
* Complete any pending TLB or cache maintenance on this CPU in case
@@ -645,7 +695,7 @@ void arch_setup_new_exec(void)
current->mm->context.flags = mmflags;
ptrauth_thread_init_user();
mte_thread_init_user();
- erratum_1418040_new_exec();
+ do_set_tsc_mode(PR_TSC_ENABLE);
if (task_spec_ssb_noexec(current)) {
arch_prctl_spec_ctrl_set(current, PR_SPEC_STORE_BYPASS,
@@ -754,3 +804,26 @@ int arch_elf_adjust_prot(int prot, const struct arch_elf_state *state,
return prot;
}
#endif
+
+int get_tsc_mode(unsigned long adr)
+{
+ unsigned int val;
+
+ if (is_compat_task())
+ return -EINVAL;
+
+ if (test_thread_flag(TIF_TSC_SIGSEGV))
+ val = PR_TSC_SIGSEGV;
+ else
+ val = PR_TSC_ENABLE;
+
+ return put_user(val, (unsigned int __user *)adr);
+}
+
+int set_tsc_mode(unsigned int val)
+{
+ if (is_compat_task())
+ return -EINVAL;
+
+ return do_set_tsc_mode(val);
+}
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 0d022599eb61..b756578aeaee 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -1440,6 +1440,39 @@ static int tagged_addr_ctrl_set(struct task_struct *target, const struct
}
#endif
+#ifdef CONFIG_ARM64_POE
+static int poe_get(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to)
+{
+ if (!system_supports_poe())
+ return -EINVAL;
+
+ return membuf_write(&to, &target->thread.por_el0,
+ sizeof(target->thread.por_el0));
+}
+
+static int poe_set(struct task_struct *target, const struct
+ user_regset *regset, unsigned int pos,
+ unsigned int count, const void *kbuf, const
+ void __user *ubuf)
+{
+ int ret;
+ long ctrl;
+
+ if (!system_supports_poe())
+ return -EINVAL;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, 0, -1);
+ if (ret)
+ return ret;
+
+ target->thread.por_el0 = ctrl;
+
+ return 0;
+}
+#endif
+
enum aarch64_regset {
REGSET_GPR,
REGSET_FPR,
@@ -1469,6 +1502,9 @@ enum aarch64_regset {
#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI
REGSET_TAGGED_ADDR_CTRL,
#endif
+#ifdef CONFIG_ARM64_POE
+ REGSET_POE
+#endif
};
static const struct user_regset aarch64_regsets[] = {
@@ -1628,6 +1664,16 @@ static const struct user_regset aarch64_regsets[] = {
.set = tagged_addr_ctrl_set,
},
#endif
+#ifdef CONFIG_ARM64_POE
+ [REGSET_POE] = {
+ .core_note_type = NT_ARM_POE,
+ .n = 1,
+ .size = sizeof(long),
+ .align = sizeof(long),
+ .regset_get = poe_get,
+ .set = poe_set,
+ },
+#endif
};
static const struct user_regset_view user_aarch64_view = {
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 4a77f4976e11..561986947530 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -61,6 +61,7 @@ struct rt_sigframe_user_layout {
unsigned long za_offset;
unsigned long zt_offset;
unsigned long fpmr_offset;
+ unsigned long poe_offset;
unsigned long extra_offset;
unsigned long end_offset;
};
@@ -185,6 +186,8 @@ struct user_ctxs {
u32 zt_size;
struct fpmr_context __user *fpmr;
u32 fpmr_size;
+ struct poe_context __user *poe;
+ u32 poe_size;
};
static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
@@ -258,6 +261,32 @@ static int restore_fpmr_context(struct user_ctxs *user)
return err;
}
+static int preserve_poe_context(struct poe_context __user *ctx)
+{
+ int err = 0;
+
+ __put_user_error(POE_MAGIC, &ctx->head.magic, err);
+ __put_user_error(sizeof(*ctx), &ctx->head.size, err);
+ __put_user_error(read_sysreg_s(SYS_POR_EL0), &ctx->por_el0, err);
+
+ return err;
+}
+
+static int restore_poe_context(struct user_ctxs *user)
+{
+ u64 por_el0;
+ int err = 0;
+
+ if (user->poe_size != sizeof(*user->poe))
+ return -EINVAL;
+
+ __get_user_error(por_el0, &(user->poe->por_el0), err);
+ if (!err)
+ write_sysreg_s(por_el0, SYS_POR_EL0);
+
+ return err;
+}
+
#ifdef CONFIG_ARM64_SVE
static int preserve_sve_context(struct sve_context __user *ctx)
@@ -621,6 +650,7 @@ static int parse_user_sigframe(struct user_ctxs *user,
user->za = NULL;
user->zt = NULL;
user->fpmr = NULL;
+ user->poe = NULL;
if (!IS_ALIGNED((unsigned long)base, 16))
goto invalid;
@@ -671,6 +701,17 @@ static int parse_user_sigframe(struct user_ctxs *user,
/* ignore */
break;
+ case POE_MAGIC:
+ if (!system_supports_poe())
+ goto invalid;
+
+ if (user->poe)
+ goto invalid;
+
+ user->poe = (struct poe_context __user *)head;
+ user->poe_size = size;
+ break;
+
case SVE_MAGIC:
if (!system_supports_sve() && !system_supports_sme())
goto invalid;
@@ -857,6 +898,9 @@ static int restore_sigframe(struct pt_regs *regs,
if (err == 0 && system_supports_sme2() && user.zt)
err = restore_zt_context(&user);
+ if (err == 0 && system_supports_poe() && user.poe)
+ err = restore_poe_context(&user);
+
return err;
}
@@ -980,6 +1024,13 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user,
return err;
}
+ if (system_supports_poe()) {
+ err = sigframe_alloc(user, &user->poe_offset,
+ sizeof(struct poe_context));
+ if (err)
+ return err;
+ }
+
return sigframe_alloc_end(user);
}
@@ -1042,6 +1093,14 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user,
err |= preserve_fpmr_context(fpmr_ctx);
}
+ if (system_supports_poe() && err == 0 && user->poe_offset) {
+ struct poe_context __user *poe_ctx =
+ apply_user_offset(user, user->poe_offset);
+
+ err |= preserve_poe_context(poe_ctx);
+ }
+
+
/* ZA state if present */
if (system_supports_sme() && err == 0 && user->za_offset) {
struct za_context __user *za_ctx =
@@ -1178,6 +1237,9 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka,
sme_smstop();
}
+ if (system_supports_poe())
+ write_sysreg_s(POR_EL0_INIT, SYS_POR_EL0);
+
if (ka->sa.sa_flags & SA_RESTORER)
sigtramp = ka->sa.sa_restorer;
else
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index f01f0fd7b7fe..3b3f6b56e733 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -68,7 +68,7 @@ enum ipi_msg_type {
IPI_RESCHEDULE,
IPI_CALL_FUNC,
IPI_CPU_STOP,
- IPI_CPU_CRASH_STOP,
+ IPI_CPU_STOP_NMI,
IPI_TIMER,
IPI_IRQ_WORK,
NR_IPI,
@@ -85,6 +85,8 @@ static int ipi_irq_base __ro_after_init;
static int nr_ipi __ro_after_init = NR_IPI;
static struct irq_desc *ipi_desc[MAX_IPI] __ro_after_init;
+static bool crash_stop;
+
static void ipi_setup(int cpu);
#ifdef CONFIG_HOTPLUG_CPU
@@ -823,7 +825,7 @@ static const char *ipi_types[MAX_IPI] __tracepoint_string = {
[IPI_RESCHEDULE] = "Rescheduling interrupts",
[IPI_CALL_FUNC] = "Function call interrupts",
[IPI_CPU_STOP] = "CPU stop interrupts",
- [IPI_CPU_CRASH_STOP] = "CPU stop (for crash dump) interrupts",
+ [IPI_CPU_STOP_NMI] = "CPU stop NMIs",
[IPI_TIMER] = "Timer broadcast interrupts",
[IPI_IRQ_WORK] = "IRQ work interrupts",
[IPI_CPU_BACKTRACE] = "CPU backtrace interrupts",
@@ -867,9 +869,9 @@ void arch_irq_work_raise(void)
}
#endif
-static void __noreturn local_cpu_stop(void)
+static void __noreturn local_cpu_stop(unsigned int cpu)
{
- set_cpu_online(smp_processor_id(), false);
+ set_cpu_online(cpu, false);
local_daif_mask();
sdei_mask_local_cpu();
@@ -883,21 +885,26 @@ static void __noreturn local_cpu_stop(void)
*/
void __noreturn panic_smp_self_stop(void)
{
- local_cpu_stop();
+ local_cpu_stop(smp_processor_id());
}
-#ifdef CONFIG_KEXEC_CORE
-static atomic_t waiting_for_crash_ipi = ATOMIC_INIT(0);
-#endif
-
static void __noreturn ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs)
{
#ifdef CONFIG_KEXEC_CORE
+ /*
+ * Use local_daif_mask() instead of local_irq_disable() to make sure
+ * that pseudo-NMIs are disabled. The "crash stop" code starts with
+ * an IRQ and falls back to NMI (which might be pseudo). If the IRQ
+ * finally goes through right as we're timing out then the NMI could
+ * interrupt us. It's better to prevent the NMI and let the IRQ
+ * finish since the pt_regs will be better.
+ */
+ local_daif_mask();
+
crash_save_cpu(regs, cpu);
- atomic_dec(&waiting_for_crash_ipi);
+ set_cpu_online(cpu, false);
- local_irq_disable();
sdei_mask_local_cpu();
if (IS_ENABLED(CONFIG_HOTPLUG_CPU))
@@ -962,14 +969,12 @@ static void do_handle_IPI(int ipinr)
break;
case IPI_CPU_STOP:
- local_cpu_stop();
- break;
-
- case IPI_CPU_CRASH_STOP:
- if (IS_ENABLED(CONFIG_KEXEC_CORE)) {
+ case IPI_CPU_STOP_NMI:
+ if (IS_ENABLED(CONFIG_KEXEC_CORE) && crash_stop) {
ipi_cpu_crash_stop(cpu, get_irq_regs());
-
unreachable();
+ } else {
+ local_cpu_stop(cpu);
}
break;
@@ -1024,8 +1029,7 @@ static bool ipi_should_be_nmi(enum ipi_msg_type ipi)
return false;
switch (ipi) {
- case IPI_CPU_STOP:
- case IPI_CPU_CRASH_STOP:
+ case IPI_CPU_STOP_NMI:
case IPI_CPU_BACKTRACE:
case IPI_KGDB_ROUNDUP:
return true;
@@ -1138,79 +1142,109 @@ static inline unsigned int num_other_online_cpus(void)
void smp_send_stop(void)
{
+ static unsigned long stop_in_progress;
+ cpumask_t mask;
unsigned long timeout;
- if (num_other_online_cpus()) {
- cpumask_t mask;
+ /*
+ * If this cpu is the only one alive at this point in time, online or
+ * not, there are no stop messages to be sent around, so just back out.
+ */
+ if (num_other_online_cpus() == 0)
+ goto skip_ipi;
- cpumask_copy(&mask, cpu_online_mask);
- cpumask_clear_cpu(smp_processor_id(), &mask);
+ /* Only proceed if this is the first CPU to reach this code */
+ if (test_and_set_bit(0, &stop_in_progress))
+ return;
- if (system_state <= SYSTEM_RUNNING)
- pr_crit("SMP: stopping secondary CPUs\n");
- smp_cross_call(&mask, IPI_CPU_STOP);
- }
+ /*
+ * Send an IPI to all currently online CPUs except the CPU running
+ * this code.
+ *
+ * NOTE: we don't do anything here to prevent other CPUs from coming
+ * online after we snapshot `cpu_online_mask`. Ideally, the calling code
+ * should do something to prevent other CPUs from coming up. This code
+ * can be called in the panic path and thus it doesn't seem wise to
+ * grab the CPU hotplug mutex ourselves. Worst case:
+ * - If a CPU comes online as we're running, we'll likely notice it
+ * during the 1 second wait below and then we'll catch it when we try
+ * with an NMI (assuming NMIs are enabled) since we re-snapshot the
+ * mask before sending an NMI.
+ * - If we leave the function and see that CPUs are still online we'll
+ * at least print a warning. Especially without NMIs this function
+ * isn't foolproof anyway so calling code will just have to accept
+ * the fact that there could be cases where a CPU can't be stopped.
+ */
+ cpumask_copy(&mask, cpu_online_mask);
+ cpumask_clear_cpu(smp_processor_id(), &mask);
- /* Wait up to one second for other CPUs to stop */
+ if (system_state <= SYSTEM_RUNNING)
+ pr_crit("SMP: stopping secondary CPUs\n");
+
+ /*
+ * Start with a normal IPI and wait up to one second for other CPUs to
+ * stop. We do this first because it gives other processors a chance
+ * to exit critical sections / drop locks and makes the rest of the
+ * stop process (especially console flush) more robust.
+ */
+ smp_cross_call(&mask, IPI_CPU_STOP);
timeout = USEC_PER_SEC;
while (num_other_online_cpus() && timeout--)
udelay(1);
- if (num_other_online_cpus())
+ /*
+ * If CPUs are still online, try an NMI. There's no excuse for this to
+ * be slow, so we only give them an extra 10 ms to respond.
+ */
+ if (num_other_online_cpus() && ipi_should_be_nmi(IPI_CPU_STOP_NMI)) {
+ smp_rmb();
+ cpumask_copy(&mask, cpu_online_mask);
+ cpumask_clear_cpu(smp_processor_id(), &mask);
+
+ pr_info("SMP: retry stop with NMI for CPUs %*pbl\n",
+ cpumask_pr_args(&mask));
+
+ smp_cross_call(&mask, IPI_CPU_STOP_NMI);
+ timeout = USEC_PER_MSEC * 10;
+ while (num_other_online_cpus() && timeout--)
+ udelay(1);
+ }
+
+ if (num_other_online_cpus()) {
+ smp_rmb();
+ cpumask_copy(&mask, cpu_online_mask);
+ cpumask_clear_cpu(smp_processor_id(), &mask);
+
pr_warn("SMP: failed to stop secondary CPUs %*pbl\n",
- cpumask_pr_args(cpu_online_mask));
+ cpumask_pr_args(&mask));
+ }
+skip_ipi:
sdei_mask_local_cpu();
}
#ifdef CONFIG_KEXEC_CORE
void crash_smp_send_stop(void)
{
- static int cpus_stopped;
- cpumask_t mask;
- unsigned long timeout;
-
/*
* This function can be called twice in panic path, but obviously
* we execute this only once.
+ *
+ * We use this same boolean to tell whether the IPI we send was a
+ * stop or a "crash stop".
*/
- if (cpus_stopped)
+ if (crash_stop)
return;
+ crash_stop = 1;
- cpus_stopped = 1;
+ smp_send_stop();
- /*
- * If this cpu is the only one alive at this point in time, online or
- * not, there are no stop messages to be sent around, so just back out.
- */
- if (num_other_online_cpus() == 0)
- goto skip_ipi;
-
- cpumask_copy(&mask, cpu_online_mask);
- cpumask_clear_cpu(smp_processor_id(), &mask);
-
- atomic_set(&waiting_for_crash_ipi, num_other_online_cpus());
-
- pr_crit("SMP: stopping secondary CPUs\n");
- smp_cross_call(&mask, IPI_CPU_CRASH_STOP);
-
- /* Wait up to one second for other CPUs to stop */
- timeout = USEC_PER_SEC;
- while ((atomic_read(&waiting_for_crash_ipi) > 0) && timeout--)
- udelay(1);
-
- if (atomic_read(&waiting_for_crash_ipi) > 0)
- pr_warn("SMP: failed to stop secondary CPUs %*pbl\n",
- cpumask_pr_args(&mask));
-
-skip_ipi:
- sdei_mask_local_cpu();
sdei_handler_abort();
}
bool smp_crash_stop_failed(void)
{
- return (atomic_read(&waiting_for_crash_ipi) > 0);
+ return num_other_online_cpus() != 0;
}
#endif
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 9e22683aa921..563cbce11126 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -273,6 +273,12 @@ void arm64_force_sig_fault(int signo, int code, unsigned long far,
force_sig_fault(signo, code, (void __user *)far);
}
+void arm64_force_sig_fault_pkey(unsigned long far, const char *str, int pkey)
+{
+ arm64_show_signal(SIGSEGV, str);
+ force_sig_pkuerr((void __user *)far, pkey);
+}
+
void arm64_force_sig_mceerr(int code, unsigned long far, short lsb,
const char *str)
{
@@ -601,18 +607,26 @@ static void ctr_read_handler(unsigned long esr, struct pt_regs *regs)
static void cntvct_read_handler(unsigned long esr, struct pt_regs *regs)
{
- int rt = ESR_ELx_SYS64_ISS_RT(esr);
+ if (test_thread_flag(TIF_TSC_SIGSEGV)) {
+ force_sig(SIGSEGV);
+ } else {
+ int rt = ESR_ELx_SYS64_ISS_RT(esr);
- pt_regs_write_reg(regs, rt, arch_timer_read_counter());
- arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
+ pt_regs_write_reg(regs, rt, arch_timer_read_counter());
+ arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
+ }
}
static void cntfrq_read_handler(unsigned long esr, struct pt_regs *regs)
{
- int rt = ESR_ELx_SYS64_ISS_RT(esr);
+ if (test_thread_flag(TIF_TSC_SIGSEGV)) {
+ force_sig(SIGSEGV);
+ } else {
+ int rt = ESR_ELx_SYS64_ISS_RT(esr);
- pt_regs_write_reg(regs, rt, arch_timer_get_rate());
- arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
+ pt_regs_write_reg(regs, rt, arch_timer_get_rate());
+ arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
+ }
}
static void mrs_handler(unsigned long esr, struct pt_regs *regs)