diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-08 18:54:55 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-08 18:54:55 +0200 |
commit | dfd437a257924484b144ee750e60affc95562c6d (patch) | |
tree | d692781730ed72743b8abbc42a04a176cb2f1d6d /arch | |
parent | Linux 5.2 (diff) | |
parent | Merge branch 'for-next/perf' of git://git.kernel.org/pub/scm/linux/kernel/git... (diff) | |
download | linux-dfd437a257924484b144ee750e60affc95562c6d.tar.xz linux-dfd437a257924484b144ee750e60affc95562c6d.zip |
Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
Pull arm64 updates from Catalin Marinas:
- arm64 support for syscall emulation via PTRACE_SYSEMU{,_SINGLESTEP}
- Wire up VM_FLUSH_RESET_PERMS for arm64, allowing the core code to
manage the permissions of executable vmalloc regions more strictly
- Slight performance improvement by keeping softirqs enabled while
touching the FPSIMD/SVE state (kernel_neon_begin/end)
- Expose a couple of ARMv8.5 features to user (HWCAP): CondM (new
XAFLAG and AXFLAG instructions for floating point comparison flags
manipulation) and FRINT (rounding floating point numbers to integers)
- Re-instate ARM64_PSEUDO_NMI support which was previously marked as
BROKEN due to some bugs (now fixed)
- Improve parking of stopped CPUs and implement an arm64-specific
panic_smp_self_stop() to avoid warning on not being able to stop
secondary CPUs during panic
- perf: enable the ARM Statistical Profiling Extensions (SPE) on ACPI
platforms
- perf: DDR performance monitor support for iMX8QXP
- cache_line_size() can now be set from DT or ACPI/PPTT if provided to
cope with a system cache info not exposed via the CPUID registers
- Avoid warning on hardware cache line size greater than
ARCH_DMA_MINALIGN if the system is fully coherent
- arm64 do_page_fault() and hugetlb cleanups
- Refactor set_pte_at() to avoid redundant READ_ONCE(*ptep)
- Ignore ACPI 5.1 FADTs reported as 5.0 (infer from the
'arm_boot_flags' introduced in 5.1)
- CONFIG_RANDOMIZE_BASE now enabled in defconfig
- Allow the selection of ARM64_MODULE_PLTS, currently only done via
RANDOMIZE_BASE (and an erratum workaround), allowing modules to spill
over into the vmalloc area
- Make ZONE_DMA32 configurable
* tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (54 commits)
perf: arm_spe: Enable ACPI/Platform automatic module loading
arm_pmu: acpi: spe: Add initial MADT/SPE probing
ACPI/PPTT: Add function to return ACPI 6.3 Identical tokens
ACPI/PPTT: Modify node flag detection to find last IDENTICAL
x86/entry: Simplify _TIF_SYSCALL_EMU handling
arm64: rename dump_instr as dump_kernel_instr
arm64/mm: Drop [PTE|PMD]_TYPE_FAULT
arm64: Implement panic_smp_self_stop()
arm64: Improve parking of stopped CPUs
arm64: Expose FRINT capabilities to userspace
arm64: Expose ARMv8.5 CondM capability to userspace
arm64: defconfig: enable CONFIG_RANDOMIZE_BASE
arm64: ARM64_MODULES_PLTS must depend on MODULES
arm64: bpf: do not allocate executable memory
arm64/kprobes: set VM_FLUSH_RESET_PERMS on kprobe instruction pages
arm64/mm: wire up CONFIG_ARCH_HAS_SET_DIRECT_MAP
arm64: module: create module allocations without exec permissions
arm64: Allow user selection of ARM64_MODULE_PLTS
acpi/arm64: ignore 5.1 FADTs that are reported as 5.0
arm64: Allow selecting Pseudo-NMI again
...
Diffstat (limited to 'arch')
47 files changed, 552 insertions, 291 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 697ea0510729..089a834b0ed0 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -26,6 +26,7 @@ config ARM64 select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SETUP_DMA_OPS + select ARCH_HAS_SET_DIRECT_MAP select ARCH_HAS_SET_MEMORY select ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_STRICT_MODULE_RWX @@ -260,7 +261,8 @@ config GENERIC_CALIBRATE_DELAY def_bool y config ZONE_DMA32 - def_bool y + bool "Support DMA32 zone" if EXPERT + default y config HAVE_GENERIC_GUP def_bool y @@ -933,7 +935,6 @@ config PARAVIRT config PARAVIRT_TIME_ACCOUNTING bool "Paravirtual steal time accounting" select PARAVIRT - default n help Select this option to enable fine granularity task steal time accounting. Time spent executing other tasks in parallel with @@ -1418,12 +1419,27 @@ config ARM64_SVE KVM in the same kernel image. config ARM64_MODULE_PLTS - bool + bool "Use PLTs to allow module memory to spill over into vmalloc area" + depends on MODULES select HAVE_MOD_ARCH_SPECIFIC + help + Allocate PLTs when loading modules so that jumps and calls whose + targets are too far away for their relative offsets to be encoded + in the instructions themselves can be bounced via veneers in the + module's PLT. This allows modules to be allocated in the generic + vmalloc area after the dedicated module memory area has been + exhausted. + + When running with address space randomization (KASLR), the module + region itself may be too far away for ordinary relative jumps and + calls, and so in that case, module PLTs are required and cannot be + disabled. + + Specific errata workaround(s) might also force module PLTs to be + enabled (ARM64_ERRATUM_843419). config ARM64_PSEUDO_NMI bool "Support for NMI-like interrupts" - depends on BROKEN # 1556553607-46531-1-git-send-email-julien.thierry@arm.com select CONFIG_ARM_GIC_V3 help Adds support for mimicking Non-Maskable Interrupts through the use of @@ -1436,6 +1452,17 @@ config ARM64_PSEUDO_NMI If unsure, say N +if ARM64_PSEUDO_NMI +config ARM64_DEBUG_PRIORITY_MASKING + bool "Debug interrupt priority masking" + help + This adds runtime checks to functions enabling/disabling + interrupts when using priority masking. The additional checks verify + the validity of ICC_PMR_EL1 when calling concerned functions. + + If unsure, say N +endif + config RELOCATABLE bool help diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 6bca5b082ea4..dd827e64e5fe 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -68,6 +68,7 @@ CONFIG_KEXEC=y CONFIG_CRASH_DUMP=y CONFIG_XEN=y CONFIG_COMPAT=y +CONFIG_RANDOMIZE_BASE=y CONFIG_HIBERNATION=y CONFIG_WQ_POWER_EFFICIENT_DEFAULT=y CONFIG_ARM_CPUIDLE=y diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index ada0bc480a1b..b263e239cb59 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -38,6 +38,9 @@ (!(entry) || (entry)->header.length < ACPI_MADT_GICC_MIN_LENGTH || \ (unsigned long)(entry) + (entry)->header.length > (end)) +#define ACPI_MADT_GICC_SPE (ACPI_OFFSET(struct acpi_madt_generic_interrupt, \ + spe_interrupt) + sizeof(u16)) + /* Basic configuration for ACPI */ #ifdef CONFIG_ACPI pgprot_t __acpi_get_mem_attribute(phys_addr_t addr); diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 2247908e55d6..79155a8cfe7c 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -152,7 +152,9 @@ static inline bool gic_prio_masking_enabled(void) static inline void gic_pmr_mask_irqs(void) { - BUILD_BUG_ON(GICD_INT_DEF_PRI <= GIC_PRIO_IRQOFF); + BUILD_BUG_ON(GICD_INT_DEF_PRI < (GIC_PRIO_IRQOFF | + GIC_PRIO_PSR_I_SET)); + BUILD_BUG_ON(GICD_INT_DEF_PRI >= GIC_PRIO_IRQON); gic_write_pmr(GIC_PRIO_IRQOFF); } diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index a05db636981a..64eeaa41e7ca 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -80,12 +80,15 @@ static inline u32 cache_type_cwg(void) #define __read_mostly __attribute__((__section__(".data..read_mostly"))) -static inline int cache_line_size(void) +static inline int cache_line_size_of_cpu(void) { u32 cwg = cache_type_cwg(); + return cwg ? 4 << cwg : ARCH_DMA_MINALIGN; } +int cache_line_size(void); + /* * Read the effective value of CTR_EL0. * diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 1fe4467442aa..665c78e0665a 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -176,4 +176,7 @@ static inline void flush_cache_vunmap(unsigned long start, unsigned long end) int set_memory_valid(unsigned long addr, int numpages, int enable); +int set_direct_map_invalid_noflush(struct page *page); +int set_direct_map_default_noflush(struct page *page); + #endif diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 373799b7982f..3d8db50d9ae2 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -614,6 +614,12 @@ static inline bool system_uses_irq_prio_masking(void) cpus_have_const_cap(ARM64_HAS_IRQ_PRIO_MASKING); } +static inline bool system_has_prio_mask_debugging(void) +{ + return IS_ENABLED(CONFIG_ARM64_DEBUG_PRIORITY_MASKING) && + system_uses_irq_prio_masking(); +} + #define ARM64_SSBD_UNKNOWN -1 #define ARM64_SSBD_FORCE_DISABLE 0 #define ARM64_SSBD_KERNEL 1 diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index 6dd8a8723525..987926ed535e 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -7,6 +7,7 @@ #include <linux/irqflags.h> +#include <asm/arch_gicv3.h> #include <asm/cpufeature.h> #define DAIF_PROCCTX 0 @@ -16,11 +17,20 @@ /* mask/save/unmask/restore all exceptions, including interrupts. */ static inline void local_daif_mask(void) { + WARN_ON(system_has_prio_mask_debugging() && + (read_sysreg_s(SYS_ICC_PMR_EL1) == (GIC_PRIO_IRQOFF | + GIC_PRIO_PSR_I_SET))); + asm volatile( "msr daifset, #0xf // local_daif_mask\n" : : : "memory"); + + /* Don't really care for a dsb here, we don't intend to enable IRQs */ + if (system_uses_irq_prio_masking()) + gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); + trace_hardirqs_off(); } @@ -32,7 +42,7 @@ static inline unsigned long local_daif_save(void) if (system_uses_irq_prio_masking()) { /* If IRQs are masked with PMR, reflect it in the flags */ - if (read_sysreg_s(SYS_ICC_PMR_EL1) <= GIC_PRIO_IRQOFF) + if (read_sysreg_s(SYS_ICC_PMR_EL1) != GIC_PRIO_IRQON) flags |= PSR_I_BIT; } @@ -45,39 +55,50 @@ static inline void local_daif_restore(unsigned long flags) { bool irq_disabled = flags & PSR_I_BIT; + WARN_ON(system_has_prio_mask_debugging() && + !(read_sysreg(daif) & PSR_I_BIT)); + if (!irq_disabled) { trace_hardirqs_on(); - if (system_uses_irq_prio_masking()) - arch_local_irq_enable(); - } else if (!(flags & PSR_A_BIT)) { - /* - * If interrupts are disabled but we can take - * asynchronous errors, we can take NMIs - */ if (system_uses_irq_prio_masking()) { - flags &= ~PSR_I_BIT; + gic_write_pmr(GIC_PRIO_IRQON); + dsb(sy); + } + } else if (system_uses_irq_prio_masking()) { + u64 pmr; + + if (!(flags & PSR_A_BIT)) { /* - * There has been concern that the write to daif - * might be reordered before this write to PMR. - * From the ARM ARM DDI 0487D.a, section D1.7.1 - * "Accessing PSTATE fields": - * Writes to the PSTATE fields have side-effects on - * various aspects of the PE operation. All of these - * side-effects are guaranteed: - * - Not to be visible to earlier instructions in - * the execution stream. - * - To be visible to later instructions in the - * execution stream - * - * Also, writes to PMR are self-synchronizing, so no - * interrupts with a lower priority than PMR is signaled - * to the PE after the write. - * - * So we don't need additional synchronization here. + * If interrupts are disabled but we can take + * asynchronous errors, we can take NMIs */ - arch_local_irq_disable(); + flags &= ~PSR_I_BIT; + pmr = GIC_PRIO_IRQOFF; + } else { + pmr = GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET; } + + /* + * There has been concern that the write to daif + * might be reordered before this write to PMR. + * From the ARM ARM DDI 0487D.a, section D1.7.1 + * "Accessing PSTATE fields": + * Writes to the PSTATE fields have side-effects on + * various aspects of the PE operation. All of these + * side-effects are guaranteed: + * - Not to be visible to earlier instructions in + * the execution stream. + * - To be visible to later instructions in the + * execution stream + * + * Also, writes to PMR are self-synchronizing, so no + * interrupts with a lower priority than PMR is signaled + * to the PE after the write. + * + * So we don't need additional synchronization here. + */ + gic_write_pmr(pmr); } write_sysreg(flags, daif); diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 897029c8e9b5..b6a2c352f4c3 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -37,8 +37,6 @@ struct task_struct; extern void fpsimd_save_state(struct user_fpsimd_state *state); extern void fpsimd_load_state(struct user_fpsimd_state *state); -extern void fpsimd_save(void); - extern void fpsimd_thread_switch(struct task_struct *next); extern void fpsimd_flush_thread(void); @@ -52,8 +50,7 @@ extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state, void *sve_state, unsigned int sve_vl); extern void fpsimd_flush_task_state(struct task_struct *target); -extern void fpsimd_flush_cpu_state(void); -extern void sve_flush_cpu_state(void); +extern void fpsimd_save_and_flush_cpu_state(void); /* Maximum VL that SVE VL-agnostic software can transparently support */ #define SVE_VL_ARCH_MAX 0x100 diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index e5d9420cd258..3d2f2472a36c 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -84,6 +84,8 @@ #define KERNEL_HWCAP_SVEBITPERM __khwcap2_feature(SVEBITPERM) #define KERNEL_HWCAP_SVESHA3 __khwcap2_feature(SVESHA3) #define KERNEL_HWCAP_SVESM4 __khwcap2_feature(SVESM4) +#define KERNEL_HWCAP_FLAGM2 __khwcap2_feature(FLAGM2) +#define KERNEL_HWCAP_FRINT __khwcap2_feature(FRINT) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h index 66853fde60f9..7872f260c9ee 100644 --- a/arch/arm64/include/asm/irqflags.h +++ b/arch/arm64/include/asm/irqflags.h @@ -29,6 +29,12 @@ */ static inline void arch_local_irq_enable(void) { + if (system_has_prio_mask_debugging()) { + u32 pmr = read_sysreg_s(SYS_ICC_PMR_EL1); + + WARN_ON_ONCE(pmr != GIC_PRIO_IRQON && pmr != GIC_PRIO_IRQOFF); + } + asm volatile(ALTERNATIVE( "msr daifclr, #2 // arch_local_irq_enable\n" "nop", @@ -42,6 +48,12 @@ static inline void arch_local_irq_enable(void) static inline void arch_local_irq_disable(void) { + if (system_has_prio_mask_debugging()) { + u32 pmr = read_sysreg_s(SYS_ICC_PMR_EL1); + + WARN_ON_ONCE(pmr != GIC_PRIO_IRQON && pmr != GIC_PRIO_IRQOFF); + } + asm volatile(ALTERNATIVE( "msr daifset, #2 // arch_local_irq_disable", __msr_s(SYS_ICC_PMR_EL1, "%0"), @@ -56,43 +68,46 @@ static inline void arch_local_irq_disable(void) */ static inline unsigned long arch_local_save_flags(void) { - unsigned long daif_bits; unsigned long flags; - daif_bits = read_sysreg(daif); - - /* - * The asm is logically equivalent to: - * - * if (system_uses_irq_prio_masking()) - * flags = (daif_bits & PSR_I_BIT) ? - * GIC_PRIO_IRQOFF : - * read_sysreg_s(SYS_ICC_PMR_EL1); - * else - * flags = daif_bits; - */ asm volatile(ALTERNATIVE( - "mov %0, %1\n" - "nop\n" - "nop", - __mrs_s("%0", SYS_ICC_PMR_EL1) - "ands %1, %1, " __stringify(PSR_I_BIT) "\n" - "csel %0, %0, %2, eq", - ARM64_HAS_IRQ_PRIO_MASKING) - : "=&r" (flags), "+r" (daif_bits) - : "r" ((unsigned long) GIC_PRIO_IRQOFF) + "mrs %0, daif", + __mrs_s("%0", SYS_ICC_PMR_EL1), + ARM64_HAS_IRQ_PRIO_MASKING) + : "=&r" (flags) + : : "memory"); return flags; } +static inline int arch_irqs_disabled_flags(unsigned long flags) +{ + int res; + + asm volatile(ALTERNATIVE( + "and %w0, %w1, #" __stringify(PSR_I_BIT), + "eor %w0, %w1, #" __stringify(GIC_PRIO_IRQON), + ARM64_HAS_IRQ_PRIO_MASKING) + : "=&r" (res) + : "r" ((int) flags) + : "memory"); + + return res; +} + static inline unsigned long arch_local_irq_save(void) { unsigned long flags; flags = arch_local_save_flags(); - arch_local_irq_disable(); + /* + * There are too many states with IRQs disabled, just keep the current + * state if interrupts are already disabled/masked. + */ + if (!arch_irqs_disabled_flags(flags)) + arch_local_irq_disable(); return flags; } @@ -108,26 +123,10 @@ static inline void arch_local_irq_restore(unsigned long flags) __msr_s(SYS_ICC_PMR_EL1, "%0") "dsb sy", ARM64_HAS_IRQ_PRIO_MASKING) - : "+r" (flags) : + : "r" (flags) : "memory"); } -static inline int arch_irqs_disabled_flags(unsigned long flags) -{ - int res; - - asm volatile(ALTERNATIVE( - "and %w0, %w1, #" __stringify(PSR_I_BIT) "\n" - "nop", - "cmp %w1, #" __stringify(GIC_PRIO_IRQOFF) "\n" - "cset %w0, ls", - ARM64_HAS_IRQ_PRIO_MASKING) - : "=&r" (res) - : "r" ((int) flags) - : "memory"); - - return res; -} #endif #endif diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index c328191aa202..9f19c354b165 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -597,11 +597,12 @@ static inline void kvm_arm_vhe_guest_enter(void) * will not signal the CPU of interrupts of lower priority, and the * only way to get out will be via guest exceptions. * Naturally, we want to avoid this. + * + * local_daif_mask() already sets GIC_PRIO_PSR_I_SET, we just need a + * dsb to ensure the redistributor is forwards EL2 IRQs to the CPU. */ - if (system_uses_irq_prio_masking()) { - gic_write_pmr(GIC_PRIO_IRQON); + if (system_uses_irq_prio_masking()) dsb(sy); - } } static inline void kvm_arm_vhe_guest_exit(void) diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 30e5e67749e5..db92950bb1a0 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -115,7 +115,6 @@ * Level 2 descriptor (PMD). */ #define PMD_TYPE_MASK (_AT(pmdval_t, 3) << 0) -#define PMD_TYPE_FAULT (_AT(pmdval_t, 0) << 0) #define PMD_TYPE_TABLE (_AT(pmdval_t, 3) << 0) #define PMD_TYPE_SECT (_AT(pmdval_t, 1) << 0) #define PMD_TABLE_BIT (_AT(pmdval_t, 1) << 1) @@ -142,8 +141,8 @@ /* * Level 3 descriptor (PTE). */ +#define PTE_VALID (_AT(pteval_t, 1) << 0) #define PTE_TYPE_MASK (_AT(pteval_t, 3) << 0) -#define PTE_TYPE_FAULT (_AT(pteval_t, 0) << 0) #define PTE_TYPE_PAGE (_AT(pteval_t, 3) << 0) #define PTE_TABLE_BIT (_AT(pteval_t, 1) << 1) #define PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */ diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index c81583be034b..f318258a14be 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -13,7 +13,6 @@ /* * Software defined PTE bits definition. */ -#define PTE_VALID (_AT(pteval_t, 1) << 0) #define PTE_WRITE (PTE_DBM) /* same as DBM (51) */ #define PTE_DIRTY (_AT(pteval_t, 1) << 55) #define PTE_SPECIAL (_AT(pteval_t, 1) << 56) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index fca26759081a..3052381baaeb 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -235,29 +235,42 @@ extern void __sync_icache_dcache(pte_t pteval); * * PTE_DIRTY || (PTE_WRITE && !PTE_RDONLY) */ -static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte) + +static inline void __check_racy_pte_update(struct mm_struct *mm, pte_t *ptep, + pte_t pte) { pte_t old_pte; - if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte)) - __sync_icache_dcache(pte); + if (!IS_ENABLED(CONFIG_DEBUG_VM)) + return; + + old_pte = READ_ONCE(*ptep); + + if (!pte_valid(old_pte) || !pte_valid(pte)) + return; + if (mm != current->active_mm && atomic_read(&mm->mm_users) <= 1) + return; /* - * If the existing pte is valid, check for potential race with - * hardware updates of the pte (ptep_set_access_flags safely changes - * valid ptes without going through an invalid entry). + * Check for potential race with hardware updates of the pte + * (ptep_set_access_flags safely changes valid ptes without going + * through an invalid entry). */ - old_pte = READ_ONCE(*ptep); - if (IS_ENABLED(CONFIG_DEBUG_VM) && pte_valid(old_pte) && pte_valid(pte) && - (mm == current->active_mm || atomic_read(&mm->mm_users) > 1)) { - VM_WARN_ONCE(!pte_young(pte), - "%s: racy access flag clearing: 0x%016llx -> 0x%016llx", - __func__, pte_val(old_pte), pte_val(pte)); - VM_WARN_ONCE(pte_write(old_pte) && !pte_dirty(pte), - "%s: racy dirty state clearing: 0x%016llx -> 0x%016llx", - __func__, pte_val(old_pte), pte_val(pte)); - } + VM_WARN_ONCE(!pte_young(pte), + "%s: racy access flag clearing: 0x%016llx -> 0x%016llx", + __func__, pte_val(old_pte), pte_val(pte)); + VM_WARN_ONCE(pte_write(old_pte) && !pte_dirty(pte), + "%s: racy dirty state clearing: 0x%016llx -> 0x%016llx", + __func__, pte_val(old_pte), pte_val(pte)); +} + +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte)) + __sync_icache_dcache(pte); + + __check_racy_pte_update(mm, ptep, pte); set_pte(ptep, pte); } @@ -324,9 +337,14 @@ static inline pmd_t pte_pmd(pte_t pte) return __pmd(pte_val(pte)); } -static inline pgprot_t mk_sect_prot(pgprot_t prot) +static inline pgprot_t mk_pud_sect_prot(pgprot_t prot) +{ + return __pgprot((pgprot_val(prot) & ~PUD_TABLE_BIT) | PUD_TYPE_SECT); +} + +static inline pgprot_t mk_pmd_sect_prot(pgprot_t prot) { - return __pgprot(pgprot_val(prot) & ~PTE_TABLE_BIT); + return __pgprot((pgprot_val(prot) & ~PMD_TABLE_BIT) | PMD_TYPE_SECT); } #ifdef CONFIG_NUMA_BALANCING diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index dad858b6adc6..81693244f58d 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -24,9 +24,15 @@ * means masking more IRQs (or at least that the same IRQs remain masked). * * To mask interrupts, we clear the most significant bit of PMR. + * + * Some code sections either automatically switch back to PSR.I or explicitly + * require to not use priority masking. If bit GIC_PRIO_PSR_I_SET is included + * in the the priority mask, it indicates that PSR.I should be set and + * interrupt disabling temporarily does not rely on IRQ priorities. */ -#define GIC_PRIO_IRQON 0xf0 -#define GIC_PRIO_IRQOFF (GIC_PRIO_IRQON & ~0x80) +#define GIC_PRIO_IRQON 0xc0 +#define GIC_PRIO_IRQOFF (GIC_PRIO_IRQON & ~0x80) +#define GIC_PRIO_PSR_I_SET (1 << 4) /* Additional SPSR bits not exposed in the UABI */ #define PSR_IL_BIT (1 << 20) diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h index 7e245b9e03a5..7434844036d3 100644 --- a/arch/arm64/include/asm/simd.h +++ b/arch/arm64/include/asm/simd.h @@ -12,9 +12,9 @@ #include <linux/preempt.h> #include <linux/types.h> -#ifdef CONFIG_KERNEL_MODE_NEON +DECLARE_PER_CPU(bool, fpsimd_context_busy); -DECLARE_PER_CPU(bool, kernel_neon_busy); +#ifdef CONFIG_KERNEL_MODE_NEON /* * may_use_simd - whether it is allowable at this time to issue SIMD @@ -26,15 +26,15 @@ DECLARE_PER_CPU(bool, kernel_neon_busy); static __must_check inline bool may_use_simd(void) { /* - * kernel_neon_busy is only set while preemption is disabled, + * fpsimd_context_busy is only set while preemption is disabled, * and is clear whenever preemption is enabled. Since - * this_cpu_read() is atomic w.r.t. preemption, kernel_neon_busy + * this_cpu_read() is atomic w.r.t. preemption, fpsimd_context_busy * cannot change under our feet -- if it's set we cannot be * migrated, and if it's clear we cannot be migrated to a CPU * where it is set. */ return !in_irq() && !irqs_disabled() && !in_nmi() && - !this_cpu_read(kernel_neon_busy); + !this_cpu_read(fpsimd_context_busy); } #else /* ! CONFIG_KERNEL_MODE_NEON */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index cd7f7ce1a56a..d0bd4ffcf2c4 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -549,6 +549,7 @@ /* id_aa64isar1 */ #define ID_AA64ISAR1_SB_SHIFT 36 +#define ID_AA64ISAR1_FRINTTS_SHIFT 32 #define ID_AA64ISAR1_GPI_SHIFT 28 #define ID_AA64ISAR1_GPA_SHIFT 24 #define ID_AA64ISAR1_LRCPC_SHIFT 20 diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 2372e97db29c..180b34ec5965 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -65,6 +65,7 @@ void arch_release_task_struct(struct task_struct *tsk); * TIF_SYSCALL_TRACEPOINT - syscall tracepoint for ftrace * TIF_SYSCALL_AUDIT - syscall auditing * TIF_SECCOMP - syscall secure computing + * TIF_SYSCALL_EMU - syscall emulation active * TIF_SIGPENDING - signal pending * TIF_NEED_RESCHED - rescheduling necessary * TIF_NOTIFY_RESUME - callback before returning to user @@ -80,6 +81,7 @@ void arch_release_task_struct(struct task_struct *tsk); #define TIF_SYSCALL_AUDIT 9 #define TIF_SYSCALL_TRACEPOINT 10 #define TIF_SECCOMP 11 +#define TIF_SYSCALL_EMU 12 #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ #define TIF_FREEZE 19 #define TIF_RESTORE_SIGMASK 20 @@ -98,6 +100,7 @@ void arch_release_task_struct(struct task_struct *tsk); #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_FSCHECK (1 << TIF_FSCHECK) #define _TIF_32BIT (1 << TIF_32BIT) @@ -109,7 +112,7 @@ void arch_release_task_struct(struct task_struct *tsk); #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ - _TIF_NOHZ) + _TIF_NOHZ | _TIF_SYSCALL_EMU) #define INIT_THREAD_INFO(tsk) \ { \ diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 1a772b162191..a1e72886b30c 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -63,5 +63,7 @@ #define HWCAP2_SVEBITPERM (1 << 4) #define HWCAP2_SVESHA3 (1 << 5) #define HWCAP2_SVESM4 (1 << 6) +#define HWCAP2_FLAGM2 (1 << 7) +#define HWCAP2_FRINT (1 << 8) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index e932284993d4..7ed9294e2004 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -62,6 +62,9 @@ #define PSR_x 0x0000ff00 /* Extension */ #define PSR_c 0x000000ff /* Control */ +/* syscall emulation path in ptrace */ +#define PTRACE_SYSEMU 31 +#define PTRACE_SYSEMU_SINGLESTEP 32 #ifndef __ASSEMBLY__ diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index 2804330c95dc..3a58e9db5cfe 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -152,10 +152,14 @@ static int __init acpi_fadt_sanity_check(void) */ if (table->revision < 5 || (table->revision == 5 && fadt->minor_revision < 1)) { - pr_err("Unsupported FADT revision %d.%d, should be 5.1+\n", + pr_err(FW_BUG "Unsupported FADT revision %d.%d, should be 5.1+\n", table->revision, fadt->minor_revision); - ret = -EINVAL; - goto out; + + if (!fadt->arm_boot_flags) { + ret = -EINVAL; + goto out; + } + pr_err("FADT has ARM boot flags set, assuming 5.1\n"); } if (!(fadt->flags & ACPI_FADT_HW_REDUCED)) { diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c index 880d79904d36..7fa6828bb488 100644 --- a/arch/arm64/kernel/cacheinfo.c +++ b/arch/arm64/kernel/cacheinfo.c @@ -17,6 +17,15 @@ #define CLIDR_CTYPE(clidr, level) \ (((clidr) & CLIDR_CTYPE_MASK(level)) >> CLIDR_CTYPE_SHIFT(level)) +int cache_line_size(void) +{ + if (coherency_max_size != 0) + return coherency_max_size; + + return cache_line_size_of_cpu(); +} +EXPORT_SYMBOL_GPL(cache_line_size); + static inline enum cache_type get_cache_type(int level) { u64 clidr; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index aabdabf52fdb..f29f36a65175 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1184,14 +1184,14 @@ static struct undef_hook ssbs_emulation_hook = { static void cpu_enable_ssbs(const struct arm64_cpu_capabilities *__unused) { static bool undef_hook_registered = false; - static DEFINE_SPINLOCK(hook_lock); + static DEFINE_RAW_SPINLOCK(hook_lock); - spin_lock(&hook_lock); + raw_spin_lock(&hook_lock); if (!undef_hook_registered) { register_undef_hook(&ssbs_emulation_hook); undef_hook_registered = true; } - spin_unlock(&hook_lock); + raw_spin_unlock(&hook_lock); if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) { sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_DSSBS); @@ -1618,6 +1618,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDDP), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDFHM), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FLAGM), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_FP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FPHP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD), @@ -1629,6 +1630,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FCMA), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_LRCPC), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FRINTTS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FRINT), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_SB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SB), HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT), #ifdef CONFIG_ARM64_SVE diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 0593665fc7b4..876055e37352 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -82,6 +82,8 @@ static const char *const hwcap_str[] = { "svebitperm", "svesha3", "svesm4", + "flagm2", + "frint", NULL }; diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 2df8d0a1d980..9cdc4592da3e 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -247,6 +247,7 @@ alternative_else_nop_endif /* * Registers that may be useful after this macro is invoked: * + * x20 - ICC_PMR_EL1 * x21 - aborted SP * x22 - aborted PC * x23 - aborted PSTATE @@ -424,6 +425,38 @@ tsk .req x28 // current thread_info irq_stack_exit .endm +#ifdef CONFIG_ARM64_PSEUDO_NMI + /* + * Set res to 0 if irqs were unmasked in interrupted context. + * Otherwise set res to non-0 value. + */ + .macro test_irqs_unmasked res:req, pmr:req +alternative_if ARM64_HAS_IRQ_PRIO_MASKING + sub \res, \pmr, #GIC_PRIO_IRQON +alternative_else + mov \res, xzr +alternative_endif + .endm +#endif + + .macro gic_prio_kentry_setup, tmp:req +#ifdef CONFIG_ARM64_PSEUDO_NMI + alternative_if ARM64_HAS_IRQ_PRIO_MASKING + mov \tmp, #(GIC_PRIO_PSR_I_SET | GIC_PRIO_IRQON) + msr_s SYS_ICC_PMR_EL1, \tmp + alternative_else_nop_endif +#endif + .endm + + .macro gic_prio_irq_setup, pmr:req, tmp:req +#ifdef CONFIG_ARM64_PSEUDO_NMI + alternative_if ARM64_HAS_IRQ_PRIO_MASKING + orr \tmp, \pmr, #GIC_PRIO_PSR_I_SET + msr_s SYS_ICC_PMR_EL1, \tmp + alternative_else_nop_endif +#endif + .endm + .text /* @@ -602,6 +635,7 @@ el1_dbg: cmp x24, #ESR_ELx_EC_BRK64 // if BRK64 cinc x24, x24, eq // set bit '0' tbz x24, #0, el1_inv // EL1 only + gic_prio_kentry_setup tmp=x3 mrs x0, far_el1 mov x2, sp // struct pt_regs bl do_debug_exception @@ -619,20 +653,18 @@ ENDPROC(el1_sync) .align 6 el1_irq: kernel_entry 1 + gic_prio_irq_setup pmr=x20, tmp=x1 enable_da_f -#ifdef CONFIG_TRACE_IRQFLAGS + #ifdef CONFIG_ARM64_PSEUDO_NMI -alternative_if ARM64_HAS_IRQ_PRIO_MASKING - ldr x20, [sp, #S_PMR_SAVE] -alternative_else - mov x20, #GIC_PRIO_IRQON -alternative_endif - cmp x20, #GIC_PRIO_IRQOFF - /* Irqs were disabled, don't trace */ - b.ls 1f + test_irqs_unmasked res=x0, pmr=x20 + cbz x0, 1f + bl asm_nmi_enter +1: #endif + +#ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_off -1: #endif irq_handler @@ -651,14 +683,23 @@ alternative_else_nop_endif bl preempt_schedule_irq // irq en/disable is done inside 1: #endif -#ifdef CONFIG_TRACE_IRQFLAGS + #ifdef CONFIG_ARM64_PSEUDO_NMI /* - * if IRQs were disabled when we received the interrupt, we have an NMI - * and we are not re-enabling interrupt upon eret. Skip tracing. + * When using IRQ priority masking, we can get spurious interrupts while + * PMR is set to GIC_PRIO_IRQOFF. An NMI might also have occurred in a + * section with interrupts disabled. Skip tracing in those cases. */ - cmp x20, #GIC_PRIO_IRQOFF - b.ls 1f + test_irqs_unmasked res=x0, pmr=x20 + cbz x0, 1f + bl asm_nmi_exit +1: +#endif + +#ifdef CONFIG_TRACE_IRQFLAGS +#ifdef CONFIG_ARM64_PSEUDO_NMI + test_irqs_unmasked res=x0, pmr=x20 + cbnz x0, 1f #endif bl trace_hardirqs_on 1: @@ -776,6 +817,7 @@ el0_ia: * Instruction abort handling */ mrs x26, far_el1 + gic_prio_kentry_setup tmp=x0 enable_da_f #ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_off @@ -821,6 +863,7 @@ el0_sp_pc: * Stack or PC alignment exception handling */ mrs x26, far_el1 + gic_prio_kentry_setup tmp=x0 enable_da_f #ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_off @@ -855,11 +898,12 @@ el0_dbg: * Debug exception handling */ tbnz x24, #0, el0_inv // EL0 only + gic_prio_kentry_setup tmp=x3 mrs x0, far_el1 mov x1, x25 mov x2, sp bl do_debug_exception - enable_daif + enable_da_f ct_user_exit b ret_to_user el0_inv: @@ -876,7 +920,9 @@ ENDPROC(el0_sync) el0_irq: kernel_entry 0 el0_irq_naked: + gic_prio_irq_setup pmr=x20, tmp=x0 enable_da_f + #ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_off #endif @@ -898,6 +944,7 @@ ENDPROC(el0_irq) el1_error: kernel_entry 1 mrs x1, esr_el1 + gic_prio_kentry_setup tmp=x2 enable_dbg mov x0, sp bl do_serror @@ -908,10 +955,11 @@ el0_error: kernel_entry 0 el0_error_naked: mrs x1, esr_el1 + gic_prio_kentry_setup tmp=x2 enable_dbg mov x0, sp bl do_serror - enable_daif + enable_da_f ct_user_exit b ret_to_user ENDPROC(el0_error) @@ -932,6 +980,7 @@ work_pending: */ ret_to_user: disable_daif + gic_prio_kentry_setup tmp=x3 ldr x1, [tsk, #TSK_TI_FLAGS] and x2, x1, #_TIF_WORK_MASK cbnz x2, work_pending @@ -948,6 +997,7 @@ ENDPROC(ret_to_user) */ .align 6 el0_svc: + gic_prio_kentry_setup tmp=x1 mov x0, sp bl el0_svc_handler b ret_to_user diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 0cfcf5c237c5..eec4776ae5f0 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -82,7 +82,8 @@ * To prevent this from racing with the manipulation of the task's FPSIMD state * from task context and thereby corrupting the state, it is necessary to * protect any manipulation of a task's fpsimd_state or TIF_FOREIGN_FPSTATE - * flag with local_bh_disable() unless softirqs are already masked. + * flag with {, __}get_cpu_fpsimd_context(). This will still allow softirqs to + * run but prevent them to use FPSIMD. * * For a certain task, the sequence may look something like this: * - the task gets scheduled in; if both the task's fpsimd_cpu field @@ -145,6 +146,56 @@ extern void __percpu *efi_sve_state; #endif /* ! CONFIG_ARM64_SVE */ +DEFINE_PER_CPU(bool, fpsimd_context_busy); +EXPORT_PER_CPU_SYMBOL(fpsimd_context_busy); + +static void __get_cpu_fpsimd_context(void) +{ + bool busy = __this_cpu_xchg(fpsimd_context_busy, true); + + WARN_ON(busy); +} + +/* + * Claim ownership of the CPU FPSIMD context for use by the calling context. + * + * The caller may freely manipulate the FPSIMD context metadata until + * put_cpu_fpsimd_context() is called. + * + * The double-underscore version must only be called if you know the task + * can't be preempted. + */ +static void get_cpu_fpsimd_context(void) +{ + preempt_disable(); + __get_cpu_fpsimd_context(); +} + +static void __put_cpu_fpsimd_context(void) +{ + bool busy = __this_cpu_xchg(fpsimd_context_busy, false); + + WARN_ON(!busy); /* No matching get_cpu_fpsimd_context()? */ +} + +/* + * Release the CPU FPSIMD context. + * + * Must be called from a context in which get_cpu_fpsimd_context() was + * previously called, with no call to put_cpu_fpsimd_context() in the + * meantime. + */ +static void put_cpu_fpsimd_context(void) +{ + __put_cpu_fpsimd_context(); + preempt_enable(); +} + +static bool have_cpu_fpsimd_context(void) +{ + return !preemptible() && __this_cpu_read(fpsimd_context_busy); +} + /* * Call __sve_free() directly only if you know task can't be scheduled * or preempted. @@ -215,12 +266,10 @@ static void sve_free(struct task_struct *task) * This function should be called only when the FPSIMD/SVE state in * thread_struct is known to be up to date, when preparing to enter * userspace. - * - * Softirqs (and preemption) must be disabled. */ static void task_fpsimd_load(void) { - WARN_ON(!in_softirq() && !irqs_disabled()); + WARN_ON(!have_cpu_fpsimd_context()); if (system_supports_sve() && test_thread_flag(TIF_SVE)) sve_load_state(sve_pffr(¤t->thread), @@ -233,16 +282,14 @@ static void task_fpsimd_load(void) /* * Ensure FPSIMD/SVE storage in memory for the loaded context is up to * date with respect to the CPU registers. - * - * Softirqs (and preemption) must be disabled. */ -void fpsimd_save(void) +static void fpsimd_save(void) { struct fpsimd_last_state_struct const *last = this_cpu_ptr(&fpsimd_last_state); /* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */ - WARN_ON(!in_softirq() && !irqs_disabled()); + WARN_ON(!have_cpu_fpsimd_context()); if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { if (system_supports_sve() && test_thread_flag(TIF_SVE)) { @@ -364,7 +411,8 @@ static __uint128_t arm64_cpu_to_le128(__uint128_t x) * task->thread.sve_state. * * Task can be a non-runnable task, or current. In the latter case, - * softirqs (and preemption) must be disabled. + * the caller must have ownership of the cpu FPSIMD context before calling + * this function. * task->thread.sve_state must point to at least sve_state_size(task) * bytes of allocated kernel memory. * task->thread.uw.fpsimd_state must be up to date before calling this @@ -393,7 +441,8 @@ static void fpsimd_to_sve(struct task_struct *task) * task->thread.uw.fpsimd_state. * * Task can be a non-runnable task, or current. In the latter case, - * softirqs (and preemption) must be disabled. + * the caller must have ownership of the cpu FPSIMD context before calling + * this function. * task->thread.sve_state must point to at least sve_state_size(task) * bytes of allocated kernel memory. * task->thread.sve_state must be up to date before calling this function. @@ -557,7 +606,7 @@ int sve_set_vector_length(struct task_struct *task, * non-SVE thread. */ if (task == current) { - local_bh_disable(); + get_cpu_fpsimd_context(); fpsimd_save(); } @@ -567,7 +616,7 @@ int sve_set_vector_length(struct task_struct *task, sve_to_fpsimd(task); if (task == current) - local_bh_enable(); + put_cpu_fpsimd_context(); /* * Force reallocation of task SVE state to the correct size @@ -880,7 +929,7 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs) sve_alloc(current); - local_bh_disable(); + get_cpu_fpsimd_context(); fpsimd_save(); @@ -891,7 +940,7 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs) if (test_and_set_thread_flag(TIF_SVE)) WARN_ON(1); /* SVE access shouldn't have trapped */ - local_bh_enable(); + put_cpu_fpsimd_context(); } /* @@ -935,6 +984,8 @@ void fpsimd_thread_switch(struct task_struct *next) if (!system_supports_fpsimd()) return; + __get_cpu_fpsimd_context(); + /* Save unsaved fpsimd state, if any: */ fpsimd_save(); @@ -949,6 +1000,8 @@ void fpsimd_thread_switch(struct task_struct *next) update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE, wrong_task || wrong_cpu); + + __put_cpu_fpsimd_context(); } void fpsimd_flush_thread(void) @@ -958,7 +1011,7 @@ void fpsimd_flush_thread(void) if (!system_supports_fpsimd()) return; - local_bh_disable(); + get_cpu_fpsimd_context(); fpsimd_flush_task_state(current); memset(¤t->thread.uw.fpsimd_state, 0, @@ -999,7 +1052,7 @@ void fpsimd_flush_thread(void) current->thread.sve_vl_onexec = 0; } - local_bh_enable(); + put_cpu_fpsimd_context(); } /* @@ -1011,9 +1064,9 @@ void fpsimd_preserve_current_state(void) if (!system_supports_fpsimd()) return; - local_bh_disable(); + get_cpu_fpsimd_context(); fpsimd_save(); - local_bh_enable(); + put_cpu_fpsimd_context(); } /* @@ -1030,7 +1083,8 @@ void fpsimd_signal_preserve_current_state(void) /* * Associate current's FPSIMD context with this cpu - * Preemption must be disabled when calling this function. + * The caller must have ownership of the cpu FPSIMD context before calling + * this function. */ void fpsimd_bind_task_to_cpu(void) { @@ -1076,14 +1130,14 @@ void fpsimd_restore_current_state(void) if (!system_supports_fpsimd()) return; - local_bh_disable(); + get_cpu_fpsimd_context(); if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { task_fpsimd_load(); fpsimd_bind_task_to_cpu(); } - local_bh_enable(); + put_cpu_fpsimd_context(); } /* @@ -1096,7 +1150,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state) if (!system_supports_fpsimd()) return; - local_bh_disable(); + get_cpu_fpsimd_context(); current->thread.uw.fpsimd_state = *state; if (system_supports_sve() && test_thread_flag(TIF_SVE)) @@ -1107,7 +1161,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state) clear_thread_flag(TIF_FOREIGN_FPSTATE); - local_bh_enable(); + put_cpu_fpsimd_context(); } /* @@ -1133,18 +1187,29 @@ void fpsimd_flush_task_state(struct task_struct *t) /* * Invalidate any task's FPSIMD state that is present on this cpu. - * This function must be called with softirqs disabled. + * The FPSIMD context should be acquired with get_cpu_fpsimd_context() + * before calling this function. */ -void fpsimd_flush_cpu_state(void) +static void fpsimd_flush_cpu_state(void) { __this_cpu_write(fpsimd_last_state.st, NULL); set_thread_flag(TIF_FOREIGN_FPSTATE); } -#ifdef CONFIG_KERNEL_MODE_NEON +/* + * Save the FPSIMD state to memory and invalidate cpu view. + * This function must be called with preemption disabled. + */ +void fpsimd_save_and_flush_cpu_state(void) +{ + WARN_ON(preemptible()); + __get_cpu_fpsimd_context(); + fpsimd_save(); + fpsimd_flush_cpu_state(); + __put_cpu_fpsimd_context(); +} -DEFINE_PER_CPU(bool, kernel_neon_busy); -EXPORT_PER_CPU_SYMBOL(kernel_neon_busy); +#ifdef CONFIG_KERNEL_MODE_NEON /* * Kernel-side NEON support functions @@ -1170,19 +1235,13 @@ void kernel_neon_begin(void) BUG_ON(!may_use_simd()); - local_bh_disable(); - - __this_cpu_write(kernel_neon_busy, true); + get_cpu_fpsimd_context(); /* Save unsaved fpsimd state, if any: */ fpsimd_save(); /* Invalidate any task state remaining in the fpsimd regs: */ fpsimd_flush_cpu_state(); - - preempt_disable(); - - local_bh_enable(); } EXPORT_SYMBOL(kernel_neon_begin); @@ -1197,15 +1256,10 @@ EXPORT_SYMBOL(kernel_neon_begin); */ void kernel_neon_end(void) { - bool busy; - if (!system_supports_fpsimd()) return; - busy = __this_cpu_xchg(kernel_neon_busy, false); - WARN_ON(!busy); /* No matching kernel_neon_begin()? */ - - preempt_enable(); + put_cpu_fpsimd_context(); } EXPORT_SYMBOL(kernel_neon_end); @@ -1297,8 +1351,7 @@ static int fpsimd_cpu_pm_notifier(struct notifier_block *self, { switch (cmd) { case CPU_PM_ENTER: - fpsimd_save(); - fpsimd_flush_cpu_state(); + fpsimd_save_and_flush_cpu_state(); break; case CPU_PM_EXIT: break; diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index c70034fbd4ce..04a327ccf84d 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -16,8 +16,10 @@ #include <linux/smp.h> #include <linux/init.h> #include <linux/irqchip.h> +#include <linux/kprobes.h> #include <linux/seq_file.h> #include <linux/vmalloc.h> +#include <asm/daifflags.h> #include <asm/vmap_stack.h> unsigned long irq_err_count; @@ -64,4 +66,28 @@ void __init init_IRQ(void) irqchip_init(); if (!handle_arch_irq) panic("No interrupt controller found."); + + if (system_uses_irq_prio_masking()) { + /* + * Now that we have a stack for our IRQ handler, set + * the PMR/PSR pair to a consistent state. + */ + WARN_ON(read_sysreg(daif) & PSR_A_BIT); + local_daif_restore(DAIF_PROCCTX_NOIRQ); + } +} + +/* + * Stubs to make nmi_enter/exit() code callable from ASM + */ +asmlinkage void notrace asm_nmi_enter(void) +{ + nmi_enter(); +} +NOKPROBE_SYMBOL(asm_nmi_enter); + +asmlinkage void notrace asm_nmi_exit(void) +{ + nmi_exit(); } +NOKPROBE_SYMBOL(asm_nmi_exit); diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 71530e080ecc..46e643e30708 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -34,7 +34,7 @@ void *module_alloc(unsigned long size) module_alloc_end = MODULES_END; p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base, - module_alloc_end, gfp_mask, PAGE_KERNEL_EXEC, 0, + module_alloc_end, gfp_mask, PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && @@ -50,7 +50,7 @@ void *module_alloc(unsigned long size) */ p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base, module_alloc_base + SZ_2G, GFP_KERNEL, - PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, + PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); if (p && (kasan_module_alloc(p, size) < 0)) { diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 88ce502c8e6f..bd5dfffca272 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -122,8 +122,10 @@ void *alloc_insn_page(void) void *page; page = vmalloc_exec(PAGE_SIZE); - if (page) + if (page) { set_memory_ro((unsigned long)page, 1); + set_vm_flush_reset_perms(page); + } return page; } diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 9856395ccdb7..6a869d9f304f 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -83,7 +83,7 @@ static void __cpu_do_idle_irqprio(void) * be raised. */ pmr = gic_read_pmr(); - gic_write_pmr(GIC_PRIO_IRQON); + gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); __cpu_do_idle(); diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index da2441d7b066..3cf3b135027e 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1808,8 +1808,12 @@ static void tracehook_report_syscall(struct pt_regs *regs, int syscall_trace_enter(struct pt_regs *regs) { - if (test_thread_flag(TIF_SYSCALL_TRACE)) + if (test_thread_flag(TIF_SYSCALL_TRACE) || + test_thread_flag(TIF_SYSCALL_EMU)) { tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER); + if (!in_syscall(regs) || test_thread_flag(TIF_SYSCALL_EMU)) + return -1; + } /* Do the secure computing after ptrace; failures should be fast. */ if (secure_computing(NULL) == -1) diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index 3e53ffa07994..f5b04dd8a710 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -27,7 +27,7 @@ * aff0 = mpidr_masked & 0xff; * aff1 = mpidr_masked & 0xff00; * aff2 = mpidr_masked & 0xff0000; - * aff2 = mpidr_masked & 0xff00000000; + * aff3 = mpidr_masked & 0xff00000000; * dst = (aff0 >> rs0 | aff1 >> rs1 | aff2 >> rs2 | aff3 >> rs3); *} * Input registers: rs0, rs1, rs2, rs3, mpidr, mask diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 6dcf9607d770..9286ee6749e8 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -181,11 +181,7 @@ static void init_gic_priority_masking(void) WARN_ON(!(cpuflags & PSR_I_BIT)); - gic_write_pmr(GIC_PRIO_IRQOFF); - - /* We can only unmask PSR.I if we can take aborts */ - if (!(cpuflags & PSR_A_BIT)) - write_sysreg(cpuflags & ~PSR_I_BIT, daif); + gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); } /* @@ -834,18 +830,23 @@ void arch_irq_work_raise(void) } #endif -/* - * ipi_cpu_stop - handle IPI from smp_send_stop() - */ -static void ipi_cpu_stop(unsigned int cpu) +static void local_cpu_stop(void) { - set_cpu_online(cpu, false); + set_cpu_online(smp_processor_id(), false); local_daif_mask(); sdei_mask_local_cpu(); + cpu_park_loop(); +} - while (1) - cpu_relax(); +/* + * We need to implement panic_smp_self_stop() for parallel panic() calls, so + * that cpu_online_mask gets correctly updated and smp_send_stop() can skip + * CPUs that have already stopped themselves. + */ +void panic_smp_self_stop(void) +{ + local_cpu_stop(); } #ifdef CONFIG_KEXEC_CORE @@ -898,7 +899,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs) case IPI_CPU_STOP: irq_enter(); - ipi_cpu_stop(cpu); + local_cpu_stop(); irq_exit(); break; diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 985721a1264c..a835a1a53826 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -55,16 +55,19 @@ static void dump_backtrace_entry(unsigned long where) printk(" %pS\n", (void *)where); } -static void __dump_instr(const char *lvl, struct pt_regs *regs) +static void dump_kernel_instr(const char *lvl, struct pt_regs *regs) { unsigned long addr = instruction_pointer(regs); char str[sizeof("00000000 ") * 5 + 2 + 1], *p = str; int i; + if (user_mode(regs)) + return; + for (i = -4; i < 1; i++) { unsigned int val, bad; - bad = get_user(val, &((u32 *)addr)[i]); + bad = aarch64_insn_read(&((u32 *)addr)[i], &val); if (!bad) p += sprintf(p, i == 0 ? "(%08x) " : "%08x ", val); @@ -73,19 +76,8 @@ static void __dump_instr(const char *lvl, struct pt_regs *regs) break; } } - printk("%sCode: %s\n", lvl, str); -} -static void dump_instr(const char *lvl, struct pt_regs *regs) -{ - if (!user_mode(regs)) { - mm_segment_t fs = get_fs(); - set_fs(KERNEL_DS); - __dump_instr(lvl, regs); - set_fs(fs); - } else { - __dump_instr(lvl, regs); - } + printk("%sCode: %s\n", lvl, str); } void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) @@ -171,8 +163,7 @@ static int __die(const char *str, int err, struct pt_regs *regs) print_modules(); show_regs(regs); - if (!user_mode(regs)) - dump_instr(KERN_EMERG, regs); + dump_kernel_instr(KERN_EMERG, regs); return ret; } diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 6e3c9c8b2df9..525010504f9d 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -112,9 +112,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { u64 *guest_zcr = &vcpu->arch.ctxt.sys_regs[ZCR_EL1]; - /* Clean guest FP state to memory and invalidate cpu view */ - fpsimd_save(); - fpsimd_flush_cpu_state(); + fpsimd_save_and_flush_cpu_state(); if (guest_has_sve) *guest_zcr = read_sysreg_s(SYS_ZCR_EL12); diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index b0041812bca9..58f281b6ca4a 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -604,7 +604,7 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) * Naturally, we want to avoid this. */ if (system_uses_irq_prio_masking()) { - gic_write_pmr(GIC_PRIO_IRQON); + gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); dsb(sy); } diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 5992eb9a9a08..1d17dbeafe76 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -80,10 +80,6 @@ static int __swiotlb_mmap_pfn(struct vm_area_struct *vma, static int __init arm64_dma_init(void) { - WARN_TAINT(ARCH_DMA_MINALIGN < cache_line_size(), - TAINT_CPU_OUT_OF_SPEC, - "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)", - ARCH_DMA_MINALIGN, cache_line_size()); return dma_atomic_pool_init(GFP_DMA32, __pgprot(PROT_NORMAL_NC)); } arch_initcall(arm64_dma_init); @@ -461,6 +457,14 @@ static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, const struct iommu_ops *iommu, bool coherent) { + int cls = cache_line_size_of_cpu(); + + WARN_TAINT(!coherent && cls > ARCH_DMA_MINALIGN, + TAINT_CPU_OUT_OF_SPEC, + "%s %s: ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)", + dev_driver_string(dev), dev_name(dev), + ARCH_DMA_MINALIGN, cls); + dev->dma_coherent = coherent; __iommu_setup_dma_ops(dev, dma_base, size, iommu); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 2d115016feb4..c8c61b1eb479 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -384,40 +384,31 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re #define VM_FAULT_BADACCESS 0x020000 static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr, - unsigned int mm_flags, unsigned long vm_flags, - struct task_struct *tsk) + unsigned int mm_flags, unsigned long vm_flags) { - struct vm_area_struct *vma; - vm_fault_t fault; + struct vm_area_struct *vma = find_vma(mm, addr); - vma = find_vma(mm, addr); - fault = VM_FAULT_BADMAP; if (unlikely(!vma)) - goto out; - if (unlikely(vma->vm_start > addr)) - goto check_stack; + return VM_FAULT_BADMAP; /* * Ok, we have a good vm_area for this memory access, so we can handle * it. */ -good_area: + if (unlikely(vma->vm_start > addr)) { + if (!(vma->vm_flags & VM_GROWSDOWN)) + return VM_FAULT_BADMAP; + if (expand_stack(vma, addr)) + return VM_FAULT_BADMAP; + } + /* * Check that the permissions on the VMA allow for the fault which * occurred. */ - if (!(vma->vm_flags & vm_flags)) { - fault = VM_FAULT_BADACCESS; - goto out; - } - + if (!(vma->vm_flags & vm_flags)) + return VM_FAULT_BADACCESS; return handle_mm_fault(vma, addr & PAGE_MASK, mm_flags); - -check_stack: - if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr)) - goto good_area; -out: - return fault; } static bool is_el0_instruction_abort(unsigned int esr) @@ -425,12 +416,20 @@ static bool is_el0_instruction_abort(unsigned int esr) return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW; } +/* + * Note: not valid for EL1 DC IVAC, but we never use that such that it + * should fault. EL0 cannot issue DC IVAC (undef). + */ +static bool is_write_abort(unsigned int esr) +{ + return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM); +} + static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, struct pt_regs *regs) { const struct fault_info *inf; - struct task_struct *tsk; - struct mm_struct *mm; + struct mm_struct *mm = current->mm; vm_fault_t fault, major = 0; unsigned long vm_flags = VM_READ | VM_WRITE; unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; @@ -438,9 +437,6 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, if (notify_page_fault(regs, esr)) return 0; - tsk = current; - mm = tsk->mm; - /* * If we're in an interrupt or have no user context, we must not take * the fault. @@ -453,7 +449,8 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, if (is_el0_instruction_abort(esr)) { vm_flags = VM_EXEC; - } else if ((esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM)) { + mm_flags |= FAULT_FLAG_INSTRUCTION; + } else if (is_write_abort(esr)) { vm_flags = VM_WRITE; mm_flags |= FAULT_FLAG_WRITE; } @@ -492,12 +489,14 @@ retry: */ might_sleep(); #ifdef CONFIG_DEBUG_VM - if (!user_mode(regs) && !search_exception_tables(regs->pc)) + if (!user_mode(regs) && !search_exception_tables(regs->pc)) { + up_read(&mm->mmap_sem); goto no_context; + } #endif } - fault = __do_page_fault(mm, addr, mm_flags, vm_flags, tsk); + fault = __do_page_fault(mm, addr, mm_flags, vm_flags); major |= fault & VM_FAULT_MAJOR; if (fault & VM_FAULT_RETRY) { @@ -537,11 +536,11 @@ retry: * that point. */ if (major) { - tsk->maj_flt++; + current->maj_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, addr); } else { - tsk->min_flt++; + current->min_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, addr); } diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index f475e54fbc43..bbeb6a5a6ba6 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -228,7 +228,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, if (sz == PUD_SIZE) { ptep = (pte_t *)pudp; - } else if (sz == (PAGE_SIZE * CONT_PTES)) { + } else if (sz == (CONT_PTE_SIZE)) { pmdp = pmd_alloc(mm, pudp, addr); WARN_ON(addr & (sz - 1)); @@ -246,7 +246,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, ptep = huge_pmd_share(mm, addr, pudp); else ptep = (pte_t *)pmd_alloc(mm, pudp, addr); - } else if (sz == (PMD_SIZE * CONT_PMDS)) { + } else if (sz == (CONT_PMD_SIZE)) { pmdp = pmd_alloc(mm, pudp, addr); WARN_ON(addr & (sz - 1)); return (pte_t *)pmdp; @@ -454,9 +454,9 @@ static int __init hugetlbpage_init(void) #ifdef CONFIG_ARM64_4K_PAGES add_huge_page_size(PUD_SIZE); #endif - add_huge_page_size(PMD_SIZE * CONT_PMDS); + add_huge_page_size(CONT_PMD_SIZE); add_huge_page_size(PMD_SIZE); - add_huge_page_size(PAGE_SIZE * CONT_PTES); + add_huge_page_size(CONT_PTE_SIZE); return 0; } @@ -470,9 +470,9 @@ static __init int setup_hugepagesz(char *opt) #ifdef CONFIG_ARM64_4K_PAGES case PUD_SIZE: #endif - case PMD_SIZE * CONT_PMDS: + case CONT_PMD_SIZE: case PMD_SIZE: - case PAGE_SIZE * CONT_PTES: + case CONT_PTE_SIZE: add_huge_page_size(ps); return 1; } diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 749c9b269f08..f3c795278def 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -180,8 +180,9 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) { unsigned long max_zone_pfns[MAX_NR_ZONES] = {0}; - if (IS_ENABLED(CONFIG_ZONE_DMA32)) - max_zone_pfns[ZONE_DMA32] = PFN_DOWN(max_zone_dma_phys()); +#ifdef CONFIG_ZONE_DMA32 + max_zone_pfns[ZONE_DMA32] = PFN_DOWN(max_zone_dma_phys()); +#endif max_zone_pfns[ZONE_NORMAL] = max; free_area_init_nodes(max_zone_pfns); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index e5ae8663f230..3645f29bd814 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -765,7 +765,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, return 0; } -#endif /* CONFIG_ARM64_64K_PAGES */ +#endif /* !ARM64_SWAPPER_USES_SECTION_MAPS */ void vmemmap_free(unsigned long start, unsigned long end, struct vmem_altmap *altmap) { @@ -960,32 +960,28 @@ int __init arch_ioremap_pmd_supported(void) int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot) { - pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT | - pgprot_val(mk_sect_prot(prot))); - pud_t new_pud = pfn_pud(__phys_to_pfn(phys), sect_prot); + pud_t new_pud = pfn_pud(__phys_to_pfn(phys), mk_pud_sect_prot(prot)); /* Only allow permission changes for now */ if (!pgattr_change_is_safe(READ_ONCE(pud_val(*pudp)), pud_val(new_pud))) return 0; - BUG_ON(phys & ~PUD_MASK); + VM_BUG_ON(phys & ~PUD_MASK); set_pud(pudp, new_pud); return 1; } int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot) { - pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT | - pgprot_val(mk_sect_prot(prot))); - pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), sect_prot); + pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), mk_pmd_sect_prot(prot)); /* Only allow permission changes for now */ if (!pgattr_change_is_safe(READ_ONCE(pmd_val(*pmdp)), pmd_val(new_pmd))) return 0; - BUG_ON(phys & ~PMD_MASK); + VM_BUG_ON(phys & ~PMD_MASK); set_pmd(pmdp, new_pmd); return 1; } diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index 47b057bfa803..fcdcf6cd7677 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -151,17 +151,48 @@ int set_memory_valid(unsigned long addr, int numpages, int enable) __pgprot(PTE_VALID)); } -#ifdef CONFIG_DEBUG_PAGEALLOC +int set_direct_map_invalid_noflush(struct page *page) +{ + struct page_change_data data = { + .set_mask = __pgprot(0), + .clear_mask = __pgprot(PTE_VALID), + }; + + if (!rodata_full) + return 0; + + return apply_to_page_range(&init_mm, + (unsigned long)page_address(page), + PAGE_SIZE, change_page_range, &data); +} + +int set_direct_map_default_noflush(struct page *page) +{ + struct page_change_data data = { + .set_mask = __pgprot(PTE_VALID | PTE_WRITE), + .clear_mask = __pgprot(PTE_RDONLY), + }; + + if (!rodata_full) + return 0; + + return apply_to_page_range(&init_mm, + (unsigned long)page_address(page), + PAGE_SIZE, change_page_range, &data); +} + void __kernel_map_pages(struct page *page, int numpages, int enable) { + if (!debug_pagealloc_enabled() && !rodata_full) + return; + set_memory_valid((unsigned long)page_address(page), numpages, enable); } -#ifdef CONFIG_HIBERNATION + /* - * When built with CONFIG_DEBUG_PAGEALLOC and CONFIG_HIBERNATION, this function - * is used to determine if a linear map page has been marked as not-valid by - * CONFIG_DEBUG_PAGEALLOC. Walk the page table and check the PTE_VALID bit. - * This is based on kern_addr_valid(), which almost does what we need. + * This function is used to determine if a linear map page has been marked as + * not-valid. Walk the page table and check the PTE_VALID bit. This is based + * on kern_addr_valid(), which almost does what we need. * * Because this is only called on the kernel linear map, p?d_sect() implies * p?d_present(). When debug_pagealloc is enabled, sections mappings are @@ -175,6 +206,9 @@ bool kernel_page_present(struct page *page) pte_t *ptep; unsigned long addr = (unsigned long)page_address(page); + if (!debug_pagealloc_enabled() && !rodata_full) + return true; + pgdp = pgd_offset_k(addr); if (pgd_none(READ_ONCE(*pgdp))) return false; @@ -196,5 +230,3 @@ bool kernel_page_present(struct page *page) ptep = pte_offset_kernel(pmdp, addr); return pte_valid(READ_ONCE(*ptep)); } -#endif /* CONFIG_HIBERNATION */ -#endif /* CONFIG_DEBUG_PAGEALLOC */ diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 87c568807925..f5b437f8a22b 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -970,7 +970,7 @@ void *bpf_jit_alloc_exec(unsigned long size) { return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START, BPF_JIT_REGION_END, GFP_KERNEL, - PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, + PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); } diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 684b0b315c32..8c92febf5f44 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -2521,7 +2521,6 @@ void ptrace_disable(struct task_struct *child) { /* make sure the single step bit is not set. */ user_disable_single_step(child); - clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); } #ifdef CONFIG_PPC_ADV_DEBUG_REGS diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 2418804e66b4..536b574b6161 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -72,23 +72,18 @@ static long syscall_trace_enter(struct pt_regs *regs) struct thread_info *ti = current_thread_info(); unsigned long ret = 0; - bool emulated = false; u32 work; if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) BUG_ON(regs != task_pt_regs(current)); - work = READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY; + work = READ_ONCE(ti->flags); - if (unlikely(work & _TIF_SYSCALL_EMU)) - emulated = true; - - if ((emulated || (work & _TIF_SYSCALL_TRACE)) && - tracehook_report_syscall_entry(regs)) - return -1L; - - if (emulated) - return -1L; + if (work & (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU)) { + ret = tracehook_report_syscall_entry(regs); + if (ret || (work & _TIF_SYSCALL_EMU)) + return -1L; + } #ifdef CONFIG_SECCOMP /* diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index a166c960bc9e..36998e0c3fc4 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -747,9 +747,6 @@ static int ioperm_get(struct task_struct *target, void ptrace_disable(struct task_struct *child) { user_disable_single_step(child); -#ifdef TIF_SYSCALL_EMU - clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); -#endif } #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION |