diff options
Diffstat (limited to 'arch/arm64/include/asm')
72 files changed, 1437 insertions, 646 deletions
diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index b263e239cb59..bd68e1b7f29f 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -12,6 +12,7 @@ #include <linux/efi.h> #include <linux/memblock.h> #include <linux/psci.h> +#include <linux/stddef.h> #include <asm/cputype.h> #include <asm/io.h> @@ -31,14 +32,14 @@ * is therefore used to delimit the MADT GICC structure minimum length * appropriately. */ -#define ACPI_MADT_GICC_MIN_LENGTH ACPI_OFFSET( \ +#define ACPI_MADT_GICC_MIN_LENGTH offsetof( \ struct acpi_madt_generic_interrupt, efficiency_class) #define BAD_MADT_GICC_ENTRY(entry, end) \ (!(entry) || (entry)->header.length < ACPI_MADT_GICC_MIN_LENGTH || \ (unsigned long)(entry) + (entry)->header.length > (end)) -#define ACPI_MADT_GICC_SPE (ACPI_OFFSET(struct acpi_madt_generic_interrupt, \ +#define ACPI_MADT_GICC_SPE (offsetof(struct acpi_madt_generic_interrupt, \ spe_interrupt) + sizeof(u16)) /* Basic configuration for ACPI */ @@ -46,20 +47,7 @@ pgprot_t __acpi_get_mem_attribute(phys_addr_t addr); /* ACPI table mapping after acpi_permanent_mmap is set */ -static inline void __iomem *acpi_os_ioremap(acpi_physical_address phys, - acpi_size size) -{ - /* For normal memory we already have a cacheable mapping. */ - if (memblock_is_map_memory(phys)) - return (void __iomem *)__phys_to_virt(phys); - - /* - * We should still honor the memory's attribute here because - * crash dump kernel possibly excludes some ACPI (reclaim) - * regions from memblock list. - */ - return __ioremap(phys, size, __acpi_get_mem_attribute(phys)); -} +void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size); #define acpi_os_ioremap acpi_os_ioremap typedef u64 phys_cpuid_t; diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index 5e5dc05d63a0..619db9b4c9d5 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -73,13 +73,13 @@ static inline void apply_alternatives_module(void *start, size_t length) { } ".pushsection .altinstructions,\"a\"\n" \ ALTINSTR_ENTRY(feature) \ ".popsection\n" \ - ".pushsection .altinstr_replacement, \"a\"\n" \ + ".subsection 1\n" \ "663:\n\t" \ newinstr "\n" \ "664:\n\t" \ - ".popsection\n\t" \ ".org . - (664b-663b) + (662b-661b)\n\t" \ - ".org . - (662b-661b) + (664b-663b)\n" \ + ".org . - (662b-661b) + (664b-663b)\n\t" \ + ".previous\n" \ ".endif\n" #define __ALTERNATIVE_CFG_CB(oldinstr, feature, cfg_enabled, cb) \ @@ -117,9 +117,9 @@ static inline void apply_alternatives_module(void *start, size_t length) { } 662: .pushsection .altinstructions, "a" altinstruction_entry 661b, 663f, \cap, 662b-661b, 664f-663f .popsection - .pushsection .altinstr_replacement, "ax" + .subsection 1 663: \insn2 -664: .popsection +664: .previous .org . - (664b-663b) + (662b-661b) .org . - (662b-661b) + (664b-663b) .endif @@ -160,7 +160,7 @@ static inline void apply_alternatives_module(void *start, size_t length) { } .pushsection .altinstructions, "a" altinstruction_entry 663f, 661f, \cap, 664f-663f, 662f-661f .popsection - .pushsection .altinstr_replacement, "ax" + .subsection 1 .align 2 /* So GAS knows label 661 is suitably aligned */ 661: .endm @@ -179,9 +179,9 @@ static inline void apply_alternatives_module(void *start, size_t length) { } .macro alternative_else 662: .if .Lasm_alt_mode==0 - .pushsection .altinstr_replacement, "ax" + .subsection 1 .else - .popsection + .previous .endif 663: .endm @@ -192,7 +192,7 @@ static inline void apply_alternatives_module(void *start, size_t length) { } .macro alternative_endif 664: .if .Lasm_alt_mode==0 - .popsection + .previous .endif .org . - (664b-663b) + (662b-661b) .org . - (662b-661b) + (664b-663b) diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index a358e97572c1..6647ae4f0231 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -109,7 +109,7 @@ static inline u32 gic_read_pmr(void) return read_sysreg_s(SYS_ICC_PMR_EL1); } -static inline void gic_write_pmr(u32 val) +static __always_inline void gic_write_pmr(u32 val) { write_sysreg_s(val, SYS_ICC_PMR_EL1); } diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index 7ae54d7d333a..9f0ec21d6327 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -58,6 +58,7 @@ struct arch_timer_erratum_workaround { u64 (*read_cntvct_el0)(void); int (*set_next_event_phys)(unsigned long, struct clock_event_device *); int (*set_next_event_virt)(unsigned long, struct clock_event_device *); + bool disable_compat_vdso; }; DECLARE_PER_CPU(const struct arch_timer_erratum_workaround *, diff --git a/arch/arm64/include/asm/archrandom.h b/arch/arm64/include/asm/archrandom.h index fc1594a0710e..44209f6146aa 100644 --- a/arch/arm64/include/asm/archrandom.h +++ b/arch/arm64/include/asm/archrandom.h @@ -6,7 +6,6 @@ #include <linux/bug.h> #include <linux/kernel.h> -#include <linux/random.h> #include <asm/cpufeature.h> static inline bool __arm64_rndr(unsigned long *v) diff --git a/arch/arm64/include/asm/asm_pointer_auth.h b/arch/arm64/include/asm/asm_pointer_auth.h index ce2a8486992b..52dead2a8640 100644 --- a/arch/arm64/include/asm/asm_pointer_auth.h +++ b/arch/arm64/include/asm/asm_pointer_auth.h @@ -39,25 +39,58 @@ alternative_if ARM64_HAS_GENERIC_AUTH alternative_else_nop_endif .endm - .macro ptrauth_keys_install_kernel tsk, sync, tmp1, tmp2, tmp3 -alternative_if ARM64_HAS_ADDRESS_AUTH + .macro __ptrauth_keys_install_kernel_nosync tsk, tmp1, tmp2, tmp3 mov \tmp1, #THREAD_KEYS_KERNEL add \tmp1, \tsk, \tmp1 ldp \tmp2, \tmp3, [\tmp1, #PTRAUTH_KERNEL_KEY_APIA] msr_s SYS_APIAKEYLO_EL1, \tmp2 msr_s SYS_APIAKEYHI_EL1, \tmp3 - .if \sync == 1 + .endm + + .macro ptrauth_keys_install_kernel_nosync tsk, tmp1, tmp2, tmp3 +alternative_if ARM64_HAS_ADDRESS_AUTH + __ptrauth_keys_install_kernel_nosync \tsk, \tmp1, \tmp2, \tmp3 +alternative_else_nop_endif + .endm + + .macro ptrauth_keys_install_kernel tsk, tmp1, tmp2, tmp3 +alternative_if ARM64_HAS_ADDRESS_AUTH + __ptrauth_keys_install_kernel_nosync \tsk, \tmp1, \tmp2, \tmp3 isb - .endif alternative_else_nop_endif .endm + .macro __ptrauth_keys_init_cpu tsk, tmp1, tmp2, tmp3 + mrs \tmp1, id_aa64isar1_el1 + ubfx \tmp1, \tmp1, #ID_AA64ISAR1_APA_SHIFT, #8 + cbz \tmp1, .Lno_addr_auth\@ + mov_q \tmp1, (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | \ + SCTLR_ELx_ENDA | SCTLR_ELx_ENDB) + mrs \tmp2, sctlr_el1 + orr \tmp2, \tmp2, \tmp1 + msr sctlr_el1, \tmp2 + __ptrauth_keys_install_kernel_nosync \tsk, \tmp1, \tmp2, \tmp3 + isb +.Lno_addr_auth\@: + .endm + + .macro ptrauth_keys_init_cpu tsk, tmp1, tmp2, tmp3 +alternative_if_not ARM64_HAS_ADDRESS_AUTH + b .Lno_addr_auth\@ +alternative_else_nop_endif + __ptrauth_keys_init_cpu \tsk, \tmp1, \tmp2, \tmp3 +.Lno_addr_auth\@: + .endm + #else /* CONFIG_ARM64_PTR_AUTH */ .macro ptrauth_keys_install_user tsk, tmp1, tmp2, tmp3 .endm - .macro ptrauth_keys_install_kernel tsk, sync, tmp1, tmp2, tmp3 + .macro ptrauth_keys_install_kernel_nosync tsk, tmp1, tmp2, tmp3 + .endm + + .macro ptrauth_keys_install_kernel tsk, tmp1, tmp2, tmp3 .endm #endif /* CONFIG_ARM64_PTR_AUTH */ diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 0bff325117b4..54d181177656 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -736,4 +736,54 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU .Lyield_out_\@ : .endm +/* + * This macro emits a program property note section identifying + * architecture features which require special handling, mainly for + * use in assembly files included in the VDSO. + */ + +#define NT_GNU_PROPERTY_TYPE_0 5 +#define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000 + +#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI (1U << 0) +#define GNU_PROPERTY_AARCH64_FEATURE_1_PAC (1U << 1) + +#ifdef CONFIG_ARM64_BTI_KERNEL +#define GNU_PROPERTY_AARCH64_FEATURE_1_DEFAULT \ + ((GNU_PROPERTY_AARCH64_FEATURE_1_BTI | \ + GNU_PROPERTY_AARCH64_FEATURE_1_PAC)) +#endif + +#ifdef GNU_PROPERTY_AARCH64_FEATURE_1_DEFAULT +.macro emit_aarch64_feature_1_and, feat=GNU_PROPERTY_AARCH64_FEATURE_1_DEFAULT + .pushsection .note.gnu.property, "a" + .align 3 + .long 2f - 1f + .long 6f - 3f + .long NT_GNU_PROPERTY_TYPE_0 +1: .string "GNU" +2: + .align 3 +3: .long GNU_PROPERTY_AARCH64_FEATURE_1_AND + .long 5f - 4f +4: + /* + * This is described with an array of char in the Linux API + * spec but the text and all other usage (including binutils, + * clang and GCC) treat this as a 32 bit value so no swizzling + * is required for big endian. + */ + .long \feat +5: + .align 3 +6: + .popsection +.endm + +#else +.macro emit_aarch64_feature_1_and, feat=0 +.endm + +#endif /* GNU_PROPERTY_AARCH64_FEATURE_1_DEFAULT */ + #endif /* __ASM_ASSEMBLER_H */ diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h index 9543b5e0534d..015ddffaf6ca 100644 --- a/arch/arm64/include/asm/atomic.h +++ b/arch/arm64/include/asm/atomic.h @@ -99,10 +99,8 @@ static inline long arch_atomic64_dec_if_positive(atomic64_t *v) return __lse_ll_sc_body(atomic64_dec_if_positive, v); } -#define ATOMIC_INIT(i) { (i) } - -#define arch_atomic_read(v) READ_ONCE((v)->counter) -#define arch_atomic_set(v, i) WRITE_ONCE(((v)->counter), (i)) +#define arch_atomic_read(v) __READ_ONCE((v)->counter) +#define arch_atomic_set(v, i) __WRITE_ONCE(((v)->counter), (i)) #define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed #define arch_atomic_add_return_acquire arch_atomic_add_return_acquire @@ -225,6 +223,6 @@ static inline long arch_atomic64_dec_if_positive(atomic64_t *v) #define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive -#include <asm-generic/atomic-instrumented.h> +#define ARCH_ATOMIC #endif /* __ASM_ATOMIC_H */ diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 7d9cc5ec4971..fb4c27506ef4 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -76,8 +76,8 @@ static inline unsigned long array_index_mask_nospec(unsigned long idx, #define __smp_store_release(p, v) \ do { \ typeof(p) __p = (p); \ - union { typeof(*p) __val; char __c[1]; } __u = \ - { .__val = (__force typeof(*p)) (v) }; \ + union { __unqual_scalar_typeof(*p) __val; char __c[1]; } __u = \ + { .__val = (__force __unqual_scalar_typeof(*p)) (v) }; \ compiletime_assert_atomic_type(*p); \ kasan_check_write(__p, sizeof(*p)); \ switch (sizeof(*p)) { \ @@ -110,7 +110,7 @@ do { \ #define __smp_load_acquire(p) \ ({ \ - union { typeof(*p) __val; char __c[1]; } __u; \ + union { __unqual_scalar_typeof(*p) __val; char __c[1]; } __u; \ typeof(p) __p = (p); \ compiletime_assert_atomic_type(*p); \ kasan_check_read(__p, sizeof(*p)); \ @@ -136,33 +136,33 @@ do { \ : "Q" (*__p) : "memory"); \ break; \ } \ - __u.__val; \ + (typeof(*p))__u.__val; \ }) #define smp_cond_load_relaxed(ptr, cond_expr) \ ({ \ typeof(ptr) __PTR = (ptr); \ - typeof(*ptr) VAL; \ + __unqual_scalar_typeof(*ptr) VAL; \ for (;;) { \ VAL = READ_ONCE(*__PTR); \ if (cond_expr) \ break; \ __cmpwait_relaxed(__PTR, VAL); \ } \ - VAL; \ + (typeof(*ptr))VAL; \ }) #define smp_cond_load_acquire(ptr, cond_expr) \ ({ \ typeof(ptr) __PTR = (ptr); \ - typeof(*ptr) VAL; \ + __unqual_scalar_typeof(*ptr) VAL; \ for (;;) { \ VAL = smp_load_acquire(__PTR); \ if (cond_expr) \ break; \ __cmpwait_relaxed(__PTR, VAL); \ } \ - VAL; \ + (typeof(*ptr))VAL; \ }) #include <asm-generic/barrier.h> diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index e6cca3d4acf7..9384fd8fc13c 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -79,7 +79,7 @@ static inline void flush_icache_range(unsigned long start, unsigned long end) * IPI all online CPUs so that they undergo a context synchronization * event and are forced to refetch the new instructions. */ -#ifdef CONFIG_KGDB + /* * KGDB performs cache maintenance with interrupts disabled, so we * will deadlock trying to IPI the secondary CPUs. In theory, we can @@ -89,25 +89,12 @@ static inline void flush_icache_range(unsigned long start, unsigned long end) * the patching operation, so we don't need extra IPIs here anyway. * In which case, add a KGDB-specific bodge and return early. */ - if (kgdb_connected && irqs_disabled()) + if (in_dbg_master()) return; -#endif - kick_all_cpus_sync(); -} -static inline void flush_cache_mm(struct mm_struct *mm) -{ -} - -static inline void flush_cache_page(struct vm_area_struct *vma, - unsigned long user_addr, unsigned long pfn) -{ -} - -static inline void flush_cache_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) -{ + kick_all_cpus_sync(); } +#define flush_icache_range flush_icache_range /* * Cache maintenance functions used by the DMA API. No to be used directly. @@ -123,12 +110,7 @@ extern void __dma_flush_area(const void *, size_t); */ extern void copy_to_user_page(struct vm_area_struct *, struct page *, unsigned long, void *, const void *, unsigned long); -#define copy_from_user_page(vma, page, vaddr, dst, src, len) \ - do { \ - memcpy(dst, src, len); \ - } while (0) - -#define flush_cache_dup_mm(mm) flush_cache_mm(mm) +#define copy_to_user_page copy_to_user_page /* * flush_dcache_page is used when the kernel has written to the page @@ -154,29 +136,11 @@ static __always_inline void __flush_icache_all(void) dsb(ish); } -#define flush_dcache_mmap_lock(mapping) do { } while (0) -#define flush_dcache_mmap_unlock(mapping) do { } while (0) - -/* - * We don't appear to need to do anything here. In fact, if we did, we'd - * duplicate cache flushing elsewhere performed by flush_dcache_page(). - */ -#define flush_icache_page(vma,page) do { } while (0) - -/* - * Not required on AArch64 (PIPT or VIPT non-aliasing D-cache). - */ -static inline void flush_cache_vmap(unsigned long start, unsigned long end) -{ -} - -static inline void flush_cache_vunmap(unsigned long start, unsigned long end) -{ -} - int set_memory_valid(unsigned long addr, int numpages, int enable); int set_direct_map_invalid_noflush(struct page *page); int set_direct_map_default_noflush(struct page *page); -#endif +#include <asm-generic/cacheflush.h> + +#endif /* __ASM_CACHEFLUSH_H */ diff --git a/arch/arm64/include/asm/checksum.h b/arch/arm64/include/asm/checksum.h index b6f7bc6da5fb..93a161b3bf3f 100644 --- a/arch/arm64/include/asm/checksum.h +++ b/arch/arm64/include/asm/checksum.h @@ -24,16 +24,17 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) { __uint128_t tmp; u64 sum; + int n = ihl; /* we want it signed */ tmp = *(const __uint128_t *)iph; iph += 16; - ihl -= 4; + n -= 4; tmp += ((tmp >> 64) | (tmp << 64)); sum = tmp >> 64; do { sum += *(const u32 *)iph; iph += 4; - } while (--ihl); + } while (--n > 0); sum += ((sum >> 32) | (sum << 32)); return csum_fold((__force u32)(sum >> 32)); diff --git a/arch/arm64/include/asm/compiler.h b/arch/arm64/include/asm/compiler.h index eece20d2c55f..6fb2e6bcc392 100644 --- a/arch/arm64/include/asm/compiler.h +++ b/arch/arm64/include/asm/compiler.h @@ -2,7 +2,11 @@ #ifndef __ASM_COMPILER_H #define __ASM_COMPILER_H -#if defined(CONFIG_ARM64_PTR_AUTH) +#ifdef ARM64_ASM_ARCH +#define ARM64_ASM_PREAMBLE ".arch " ARM64_ASM_ARCH "\n" +#else +#define ARM64_ASM_PREAMBLE +#endif /* * The EL0/EL1 pointer bits used by a pointer authentication code. @@ -19,6 +23,4 @@ #define __builtin_return_address(val) \ (void *)(ptrauth_clear_pac((unsigned long)__builtin_return_address(val))) -#endif /* CONFIG_ARM64_PTR_AUTH */ - #endif /* __ASM_COMPILER_H */ diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index b4a40535a3d8..7faae6ff3ab4 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -33,6 +33,7 @@ struct cpuinfo_arm64 { u64 reg_id_aa64zfr0; u32 reg_id_dfr0; + u32 reg_id_dfr1; u32 reg_id_isar0; u32 reg_id_isar1; u32 reg_id_isar2; @@ -44,8 +45,11 @@ struct cpuinfo_arm64 { u32 reg_id_mmfr1; u32 reg_id_mmfr2; u32 reg_id_mmfr3; + u32 reg_id_mmfr4; + u32 reg_id_mmfr5; u32 reg_id_pfr0; u32 reg_id_pfr1; + u32 reg_id_pfr2; u32 reg_mvfr0; u32 reg_mvfr1; diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 8eb5a088ae65..07b643a70710 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -44,7 +44,7 @@ #define ARM64_SSBS 34 #define ARM64_WORKAROUND_1418040 35 #define ARM64_HAS_SB 36 -#define ARM64_WORKAROUND_SPECULATIVE_AT_VHE 37 +#define ARM64_WORKAROUND_SPECULATIVE_AT 37 #define ARM64_HAS_ADDRESS_AUTH_ARCH 38 #define ARM64_HAS_ADDRESS_AUTH_IMP_DEF 39 #define ARM64_HAS_GENERIC_AUTH_ARCH 40 @@ -55,13 +55,16 @@ #define ARM64_WORKAROUND_CAVIUM_TX2_219_TVM 45 #define ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM 46 #define ARM64_WORKAROUND_1542419 47 -#define ARM64_WORKAROUND_SPECULATIVE_AT_NVHE 48 -#define ARM64_HAS_E0PD 49 -#define ARM64_HAS_RNG 50 -#define ARM64_HAS_AMU_EXTN 51 -#define ARM64_HAS_ADDRESS_AUTH 52 -#define ARM64_HAS_GENERIC_AUTH 53 +#define ARM64_HAS_E0PD 48 +#define ARM64_HAS_RNG 49 +#define ARM64_HAS_AMU_EXTN 50 +#define ARM64_HAS_ADDRESS_AUTH 51 +#define ARM64_HAS_GENERIC_AUTH 52 +#define ARM64_HAS_32BIT_EL1 53 +#define ARM64_BTI 54 +#define ARM64_HAS_ARMv8_4_TTL 55 +#define ARM64_HAS_TLB_RANGE 56 -#define ARM64_NCAPS 54 +#define ARM64_NCAPS 57 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index afe08251ff95..89b4f0142c28 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -551,6 +551,13 @@ static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0) cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_BIGENDEL0_SHIFT) == 0x1; } +static inline bool id_aa64pfr0_32bit_el1(u64 pfr0) +{ + u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_SHIFT); + + return val == ID_AA64PFR0_EL1_32BIT_64BIT; +} + static inline bool id_aa64pfr0_32bit_el0(u64 pfr0) { u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL0_SHIFT); @@ -668,7 +675,7 @@ static inline bool system_supports_generic_auth(void) cpus_have_const_cap(ARM64_HAS_GENERIC_AUTH); } -static inline bool system_uses_irq_prio_masking(void) +static __always_inline bool system_uses_irq_prio_masking(void) { return IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && cpus_have_const_cap(ARM64_HAS_IRQ_PRIO_MASKING); @@ -680,6 +687,17 @@ static inline bool system_has_prio_mask_debugging(void) system_uses_irq_prio_masking(); } +static inline bool system_supports_bti(void) +{ + return IS_ENABLED(CONFIG_ARM64_BTI) && cpus_have_const_cap(ARM64_BTI); +} + +static inline bool system_supports_tlb_range(void) +{ + return IS_ENABLED(CONFIG_ARM64_TLB_RANGE) && + cpus_have_const_cap(ARM64_HAS_TLB_RANGE); +} + #define ARM64_BP_HARDEN_UNKNOWN -1 #define ARM64_BP_HARDEN_WA_NEEDED 0 #define ARM64_BP_HARDEN_NOT_REQUIRED 1 @@ -745,6 +763,25 @@ static inline bool cpu_has_hw_af(void) extern bool cpu_has_amu_feat(int cpu); #endif +static inline unsigned int get_vmid_bits(u64 mmfr1) +{ + int vmid_bits; + + vmid_bits = cpuid_feature_extract_unsigned_field(mmfr1, + ID_AA64MMFR1_VMIDBITS_SHIFT); + if (vmid_bits == ID_AA64MMFR1_VMIDBITS_16) + return 16; + + /* + * Return the default here even if any reserved + * value is fetched from the system register. + */ + return 8; +} + +u32 get_kvm_ipa_limit(void); +void dump_cpu_features(void); + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index a87a93f67671..7219cddeba66 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -86,6 +86,7 @@ #define QCOM_CPU_PART_FALKOR 0xC00 #define QCOM_CPU_PART_KRYO 0x200 #define QCOM_CPU_PART_KRYO_3XX_SILVER 0x803 +#define QCOM_CPU_PART_KRYO_4XX_GOLD 0x804 #define QCOM_CPU_PART_KRYO_4XX_SILVER 0x805 #define NVIDIA_CPU_PART_DENVER 0x003 @@ -114,6 +115,7 @@ #define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR) #define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO) #define MIDR_QCOM_KRYO_3XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_SILVER) +#define MIDR_QCOM_KRYO_4XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_GOLD) #define MIDR_QCOM_KRYO_4XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_SILVER) #define MIDR_NVIDIA_DENVER MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_DENVER) #define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL) diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 7619f473155f..0b298f48f5bf 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -109,6 +109,8 @@ void disable_debug_monitors(enum dbg_active_el el); void user_rewind_single_step(struct task_struct *task); void user_fastforward_single_step(struct task_struct *task); +void user_regs_reset_single_step(struct user_pt_regs *regs, + struct task_struct *task); void kernel_enable_single_step(struct pt_regs *regs); void kernel_disable_single_step(void); @@ -125,5 +127,7 @@ static inline int reinstall_suspended_bps(struct pt_regs *regs) int aarch32_break_handler(struct pt_regs *regs); +void debug_traps_init(void); + #endif /* __ASSEMBLY */ #endif /* __ASM_DEBUG_MONITORS_H */ diff --git a/arch/arm64/include/asm/device.h b/arch/arm64/include/asm/device.h index 12b778d55342..996498751318 100644 --- a/arch/arm64/include/asm/device.h +++ b/arch/arm64/include/asm/device.h @@ -6,9 +6,6 @@ #define __ASM_DEVICE_H struct dev_archdata { -#ifdef CONFIG_IOMMU_API - void *iommu; /* private IOMMU data */ -#endif }; struct pdev_archdata { diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 45e821222774..d4ab3f73e7a3 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -86,14 +86,6 @@ static inline unsigned long efi_get_max_initrd_addr(unsigned long dram_base, return (image_addr & ~(SZ_1G - 1UL)) + (1UL << (VA_BITS_MIN - 1)); } -#define efi_bs_call(func, ...) efi_system_table()->boottime->func(__VA_ARGS__) -#define efi_rt_call(func, ...) efi_system_table()->runtime->func(__VA_ARGS__) -#define efi_is_native() (true) - -#define efi_table_attr(inst, attr) (inst->attr) - -#define efi_call_proto(inst, func, ...) inst->func(inst, ##__VA_ARGS__) - #define alloc_screen_info(x...) &screen_info static inline void free_screen_info(struct screen_info *si) diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index b618017205a3..8d1c8dcb87fd 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -96,7 +96,28 @@ */ #define elf_check_arch(x) ((x)->e_machine == EM_AARCH64) -#define elf_read_implies_exec(ex,stk) (stk != EXSTACK_DISABLE_X) +/* + * An executable for which elf_read_implies_exec() returns TRUE will + * have the READ_IMPLIES_EXEC personality flag set automatically. + * + * The decision process for determining the results are: + * + *        CPU*: | arm32  | arm64 | + * ELF:        |       |       | + * ---------------------|------------|------------| + * missing PT_GNU_STACK | exec-all  | exec-none | + * PT_GNU_STACK == RWX  | exec-stack | exec-stack | + * PT_GNU_STACK == RW  | exec-none | exec-none | + * + * exec-all : all PROT_READ user mappings are executable, except when + * backed by files on a noexec-filesystem. + * exec-none : only PROT_EXEC user mappings are executable. + * exec-stack: only the stack and PROT_EXEC user mappings are executable. + * + * *all arm64 CPUs support NX, so there is no "lacks NX" column. + * + */ +#define compat_elf_read_implies_exec(ex, stk) (stk == EXSTACK_DEFAULT) #define CORE_DUMP_USE_REGSET #define ELF_EXEC_PAGESIZE PAGE_SIZE @@ -114,7 +135,11 @@ #ifndef __ASSEMBLY__ +#include <uapi/linux/elf.h> #include <linux/bug.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/types.h> #include <asm/processor.h> /* for signal_minsigstksz, used by ARCH_DLINFO */ typedef unsigned long elf_greg_t; @@ -224,6 +249,52 @@ extern int aarch32_setup_additional_pages(struct linux_binprm *bprm, #endif /* CONFIG_COMPAT */ +struct arch_elf_state { + int flags; +}; + +#define ARM64_ELF_BTI (1 << 0) + +#define INIT_ARCH_ELF_STATE { \ + .flags = 0, \ +} + +static inline int arch_parse_elf_property(u32 type, const void *data, + size_t datasz, bool compat, + struct arch_elf_state *arch) +{ + /* No known properties for AArch32 yet */ + if (IS_ENABLED(CONFIG_COMPAT) && compat) + return 0; + + if (type == GNU_PROPERTY_AARCH64_FEATURE_1_AND) { + const u32 *p = data; + + if (datasz != sizeof(*p)) + return -ENOEXEC; + + if (system_supports_bti() && + (*p & GNU_PROPERTY_AARCH64_FEATURE_1_BTI)) + arch->flags |= ARM64_ELF_BTI; + } + + return 0; +} + +static inline int arch_elf_pt_proc(void *ehdr, void *phdr, + struct file *f, bool is_interp, + struct arch_elf_state *state) +{ + return 0; +} + +static inline int arch_check_elf(void *ehdr, bool has_interp, + void *interp_ehdr, + struct arch_elf_state *state) +{ + return 0; +} + #endif /* !__ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 6a395a7e6707..035003acfa87 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -22,7 +22,7 @@ #define ESR_ELx_EC_PAC (0x09) /* EL2 and above */ /* Unallocated EC: 0x0A - 0x0B */ #define ESR_ELx_EC_CP14_64 (0x0C) -/* Unallocated EC: 0x0d */ +#define ESR_ELx_EC_BTI (0x0D) #define ESR_ELx_EC_ILL (0x0E) /* Unallocated EC: 0x0F - 0x10 */ #define ESR_ELx_EC_SVC32 (0x11) diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index 7a6e81ca23a8..7577a754d443 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -34,6 +34,7 @@ static inline u32 disr_to_esr(u64 disr) asmlinkage void enter_from_user_mode(void); void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs); void do_undefinstr(struct pt_regs *regs); +void do_bti(struct pt_regs *regs); asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr); void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr, struct pt_regs *regs); diff --git a/arch/arm64/include/asm/extable.h b/arch/arm64/include/asm/extable.h index 56a4f68b262e..840a35ed92ec 100644 --- a/arch/arm64/include/asm/extable.h +++ b/arch/arm64/include/asm/extable.h @@ -22,5 +22,17 @@ struct exception_table_entry #define ARCH_HAS_RELATIVE_EXTABLE +#ifdef CONFIG_BPF_JIT +int arm64_bpf_fixup_exception(const struct exception_table_entry *ex, + struct pt_regs *regs); +#else /* !CONFIG_BPF_JIT */ +static inline +int arm64_bpf_fixup_exception(const struct exception_table_entry *ex, + struct pt_regs *regs) +{ + return 0; +} +#endif /* !CONFIG_BPF_JIT */ + extern int fixup_exception(struct pt_regs *regs); #endif diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index f987b8a8f325..4335800201c9 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -28,10 +28,9 @@ * compile time, but to set the physical address only * in the boot process. * - * These 'compile-time allocated' memory buffers are - * page-sized. Use set_fixmap(idx,phys) to associate - * physical memory with fixmap indices. - * + * Each enum increment in these 'compile-time allocated' + * memory buffers is page-sized. Use set_fixmap(idx,phys) + * to associate physical memory with a fixmap index. */ enum fixed_addresses { FIX_HOLE, diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h index 87ad961f3c97..985493af704b 100644 --- a/arch/arm64/include/asm/hardirq.h +++ b/arch/arm64/include/asm/hardirq.h @@ -32,30 +32,70 @@ u64 smp_irq_stat_cpu(unsigned int cpu); struct nmi_ctx { u64 hcr; + unsigned int cnt; }; DECLARE_PER_CPU(struct nmi_ctx, nmi_contexts); -#define arch_nmi_enter() \ - do { \ - if (is_kernel_in_hyp_mode()) { \ - struct nmi_ctx *nmi_ctx = this_cpu_ptr(&nmi_contexts); \ - nmi_ctx->hcr = read_sysreg(hcr_el2); \ - if (!(nmi_ctx->hcr & HCR_TGE)) { \ - write_sysreg(nmi_ctx->hcr | HCR_TGE, hcr_el2); \ - isb(); \ - } \ - } \ - } while (0) +#define arch_nmi_enter() \ +do { \ + struct nmi_ctx *___ctx; \ + u64 ___hcr; \ + \ + if (!is_kernel_in_hyp_mode()) \ + break; \ + \ + ___ctx = this_cpu_ptr(&nmi_contexts); \ + if (___ctx->cnt) { \ + ___ctx->cnt++; \ + break; \ + } \ + \ + ___hcr = read_sysreg(hcr_el2); \ + if (!(___hcr & HCR_TGE)) { \ + write_sysreg(___hcr | HCR_TGE, hcr_el2); \ + isb(); \ + } \ + /* \ + * Make sure the sysreg write is performed before ___ctx->cnt \ + * is set to 1. NMIs that see cnt == 1 will rely on us. \ + */ \ + barrier(); \ + ___ctx->cnt = 1; \ + /* \ + * Make sure ___ctx->cnt is set before we save ___hcr. We \ + * don't want ___ctx->hcr to be overwritten. \ + */ \ + barrier(); \ + ___ctx->hcr = ___hcr; \ +} while (0) -#define arch_nmi_exit() \ - do { \ - if (is_kernel_in_hyp_mode()) { \ - struct nmi_ctx *nmi_ctx = this_cpu_ptr(&nmi_contexts); \ - if (!(nmi_ctx->hcr & HCR_TGE)) \ - write_sysreg(nmi_ctx->hcr, hcr_el2); \ - } \ - } while (0) +#define arch_nmi_exit() \ +do { \ + struct nmi_ctx *___ctx; \ + u64 ___hcr; \ + \ + if (!is_kernel_in_hyp_mode()) \ + break; \ + \ + ___ctx = this_cpu_ptr(&nmi_contexts); \ + ___hcr = ___ctx->hcr; \ + /* \ + * Make sure we read ___ctx->hcr before we release \ + * ___ctx->cnt as it makes ___ctx->hcr updatable again. \ + */ \ + barrier(); \ + ___ctx->cnt--; \ + /* \ + * Make sure ___ctx->cnt release is visible before we \ + * restore the sysreg. Otherwise a new NMI occurring \ + * right after write_sysreg() can be fooled and think \ + * we secured things for it. \ + */ \ + barrier(); \ + if (!___ctx->cnt && !(___hcr & HCR_TGE)) \ + write_sysreg(___hcr, hcr_el2); \ +} while (0) static inline void ack_bad_irq(unsigned int irq) { diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h index 2eb6c234d594..5abf91e3494c 100644 --- a/arch/arm64/include/asm/hugetlb.h +++ b/arch/arm64/include/asm/hugetlb.h @@ -17,22 +17,11 @@ extern bool arch_hugetlb_migration_supported(struct hstate *h); #endif -#define __HAVE_ARCH_HUGE_PTEP_GET -static inline pte_t huge_ptep_get(pte_t *ptep) -{ - return READ_ONCE(*ptep); -} - -static inline int is_hugepage_only_range(struct mm_struct *mm, - unsigned long addr, unsigned long len) -{ - return 0; -} - static inline void arch_clear_hugepage_flags(struct page *page) { clear_bit(PG_dcache_clean, &page->flags); } +#define arch_clear_hugepage_flags arch_clear_hugepage_flags extern pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, struct page *page, int writable); @@ -60,6 +49,8 @@ extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte, unsigned long sz); #define set_huge_swap_pte_at set_huge_swap_pte_at +void __init arm64_hugetlb_cma_reserve(void); + #include <asm-generic/hugetlb.h> #endif /* __ASM_HUGETLB_H */ diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 0f00265248b5..22f73fe09030 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -94,6 +94,8 @@ #define KERNEL_HWCAP_BF16 __khwcap2_feature(BF16) #define KERNEL_HWCAP_DGH __khwcap2_feature(DGH) #define KERNEL_HWCAP_RNG __khwcap2_feature(RNG) +#define KERNEL_HWCAP_BTI __khwcap2_feature(BTI) +/* reserved for KERNEL_HWCAP_MTE __khwcap2_feature(MTE) */ /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index bb313dde58a4..0bc46149e491 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -39,13 +39,37 @@ enum aarch64_insn_encoding_class { * system instructions */ }; -enum aarch64_insn_hint_op { +enum aarch64_insn_hint_cr_op { AARCH64_INSN_HINT_NOP = 0x0 << 5, AARCH64_INSN_HINT_YIELD = 0x1 << 5, AARCH64_INSN_HINT_WFE = 0x2 << 5, AARCH64_INSN_HINT_WFI = 0x3 << 5, AARCH64_INSN_HINT_SEV = 0x4 << 5, AARCH64_INSN_HINT_SEVL = 0x5 << 5, + + AARCH64_INSN_HINT_XPACLRI = 0x07 << 5, + AARCH64_INSN_HINT_PACIA_1716 = 0x08 << 5, + AARCH64_INSN_HINT_PACIB_1716 = 0x0A << 5, + AARCH64_INSN_HINT_AUTIA_1716 = 0x0C << 5, + AARCH64_INSN_HINT_AUTIB_1716 = 0x0E << 5, + AARCH64_INSN_HINT_PACIAZ = 0x18 << 5, + AARCH64_INSN_HINT_PACIASP = 0x19 << 5, + AARCH64_INSN_HINT_PACIBZ = 0x1A << 5, + AARCH64_INSN_HINT_PACIBSP = 0x1B << 5, + AARCH64_INSN_HINT_AUTIAZ = 0x1C << 5, + AARCH64_INSN_HINT_AUTIASP = 0x1D << 5, + AARCH64_INSN_HINT_AUTIBZ = 0x1E << 5, + AARCH64_INSN_HINT_AUTIBSP = 0x1F << 5, + + AARCH64_INSN_HINT_ESB = 0x10 << 5, + AARCH64_INSN_HINT_PSB = 0x11 << 5, + AARCH64_INSN_HINT_TSB = 0x12 << 5, + AARCH64_INSN_HINT_CSDB = 0x14 << 5, + + AARCH64_INSN_HINT_BTI = 0x20 << 5, + AARCH64_INSN_HINT_BTIC = 0x22 << 5, + AARCH64_INSN_HINT_BTIJ = 0x24 << 5, + AARCH64_INSN_HINT_BTIJC = 0x26 << 5, }; enum aarch64_insn_imm_type { @@ -344,7 +368,7 @@ __AARCH64_INSN_FUNCS(msr_reg, 0xFFF00000, 0xD5100000) #undef __AARCH64_INSN_FUNCS -bool aarch64_insn_is_nop(u32 insn); +bool aarch64_insn_is_steppable_hint(u32 insn); bool aarch64_insn_is_branch_imm(u32 insn); static inline bool aarch64_insn_is_adr_adrp(u32 insn) @@ -370,7 +394,7 @@ u32 aarch64_insn_gen_comp_branch_imm(unsigned long pc, unsigned long addr, enum aarch64_insn_branch_type type); u32 aarch64_insn_gen_cond_branch_imm(unsigned long pc, unsigned long addr, enum aarch64_insn_condition cond); -u32 aarch64_insn_gen_hint(enum aarch64_insn_hint_op op); +u32 aarch64_insn_gen_hint(enum aarch64_insn_hint_cr_op op); u32 aarch64_insn_gen_nop(void); u32 aarch64_insn_gen_branch_reg(enum aarch64_insn_register reg, enum aarch64_insn_branch_type type); diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 6facd1308e7c..ff50dd731852 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -9,11 +9,11 @@ #define __ASM_IO_H #include <linux/types.h> +#include <linux/pgtable.h> #include <asm/byteorder.h> #include <asm/barrier.h> #include <asm/memory.h> -#include <asm/pgtable.h> #include <asm/early_ioremap.h> #include <asm/alternative.h> #include <asm/cpufeature.h> diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h index aa4b6521ef14..ff328e5bbb75 100644 --- a/arch/arm64/include/asm/irqflags.h +++ b/arch/arm64/include/asm/irqflags.h @@ -95,6 +95,11 @@ static inline int arch_irqs_disabled_flags(unsigned long flags) return res; } +static inline int arch_irqs_disabled(void) +{ + return arch_irqs_disabled_flags(arch_local_save_flags()); +} + static inline unsigned long arch_local_irq_save(void) { unsigned long flags; diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index a6e5da755359..329fb15f6bac 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -8,7 +8,7 @@ #ifndef __ASM_KERNEL_PGTABLE_H #define __ASM_KERNEL_PGTABLE_H -#include <asm/pgtable.h> +#include <asm/pgtable-hwdef.h> #include <asm/sparsemem.h> /* diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 51c1d9918999..1da8e3dc4455 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -71,11 +71,12 @@ * IMO: Override CPSR.I and enable signaling with VI * FMO: Override CPSR.F and enable signaling with VF * SWIO: Turn set/way invalidates into set/way clean+invalidate + * PTW: Take a stage2 fault if a stage1 walk steps in device memory */ #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \ HCR_BSU_IS | HCR_FB | HCR_TAC | \ HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \ - HCR_FMO | HCR_IMO) + HCR_FMO | HCR_IMO | HCR_PTW ) #define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF) #define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK) #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 7c7eeeaab9fa..6f98fbd0ac81 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -42,33 +42,83 @@ #include <linux/mm.h> -/* Translate a kernel address of @sym into its equivalent linear mapping */ -#define kvm_ksym_ref(sym) \ +/* + * Translate name of a symbol defined in nVHE hyp to the name seen + * by kernel proper. All nVHE symbols are prefixed by the build system + * to avoid clashes with the VHE variants. + */ +#define kvm_nvhe_sym(sym) __kvm_nvhe_##sym + +#define DECLARE_KVM_VHE_SYM(sym) extern char sym[] +#define DECLARE_KVM_NVHE_SYM(sym) extern char kvm_nvhe_sym(sym)[] + +/* + * Define a pair of symbols sharing the same name but one defined in + * VHE and the other in nVHE hyp implementations. + */ +#define DECLARE_KVM_HYP_SYM(sym) \ + DECLARE_KVM_VHE_SYM(sym); \ + DECLARE_KVM_NVHE_SYM(sym) + +#define CHOOSE_VHE_SYM(sym) sym +#define CHOOSE_NVHE_SYM(sym) kvm_nvhe_sym(sym) + +#ifndef __KVM_NVHE_HYPERVISOR__ +/* + * BIG FAT WARNINGS: + * + * - Don't be tempted to change the following is_kernel_in_hyp_mode() + * to has_vhe(). has_vhe() is implemented as a *final* capability, + * while this is used early at boot time, when the capabilities are + * not final yet.... + * + * - Don't let the nVHE hypervisor have access to this, as it will + * pick the *wrong* symbol (yes, it runs at EL2...). + */ +#define CHOOSE_HYP_SYM(sym) (is_kernel_in_hyp_mode() ? CHOOSE_VHE_SYM(sym) \ + : CHOOSE_NVHE_SYM(sym)) +#else +/* The nVHE hypervisor shouldn't even try to access anything */ +extern void *__nvhe_undefined_symbol; +#define CHOOSE_HYP_SYM(sym) __nvhe_undefined_symbol +#endif + +/* Translate a kernel address @ptr into its equivalent linear mapping */ +#define kvm_ksym_ref(ptr) \ ({ \ - void *val = &sym; \ + void *val = (ptr); \ if (!is_kernel_in_hyp_mode()) \ - val = lm_alias(&sym); \ + val = lm_alias((ptr)); \ val; \ }) +#define kvm_ksym_ref_nvhe(sym) kvm_ksym_ref(kvm_nvhe_sym(sym)) struct kvm; struct kvm_vcpu; +struct kvm_s2_mmu; -extern char __kvm_hyp_init[]; -extern char __kvm_hyp_init_end[]; +DECLARE_KVM_NVHE_SYM(__kvm_hyp_init); +DECLARE_KVM_HYP_SYM(__kvm_hyp_vector); +#define __kvm_hyp_init CHOOSE_NVHE_SYM(__kvm_hyp_init) +#define __kvm_hyp_vector CHOOSE_HYP_SYM(__kvm_hyp_vector) -extern char __kvm_hyp_vector[]; +#ifdef CONFIG_KVM_INDIRECT_VECTORS +extern atomic_t arm64_el2_vector_last_slot; +DECLARE_KVM_HYP_SYM(__bp_harden_hyp_vecs); +#define __bp_harden_hyp_vecs CHOOSE_HYP_SYM(__bp_harden_hyp_vecs) +#endif extern void __kvm_flush_vm_context(void); -extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); -extern void __kvm_tlb_flush_vmid(struct kvm *kvm); -extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu); +extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa, + int level); +extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu); +extern void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu); -extern void __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high); +extern void __kvm_timer_set_cntvoff(u64 cntvoff); -extern int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu); +extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); -extern int __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu); +extern void __kvm_enable_ssbs(void); extern u64 __vgic_v3_get_ich_vtr_el2(void); extern u64 __vgic_v3_read_vmcr(void); @@ -79,12 +129,39 @@ extern u32 __kvm_get_mdcr_el2(void); extern char __smccc_workaround_1_smc[__SMCCC_WORKAROUND_1_SMC_SZ]; -/* Home-grown __this_cpu_{ptr,read} variants that always work at HYP */ +/* + * Obtain the PC-relative address of a kernel symbol + * s: symbol + * + * The goal of this macro is to return a symbol's address based on a + * PC-relative computation, as opposed to a loading the VA from a + * constant pool or something similar. This works well for HYP, as an + * absolute VA is guaranteed to be wrong. Only use this if trying to + * obtain the address of a symbol (i.e. not something you obtained by + * following a pointer). + */ +#define hyp_symbol_addr(s) \ + ({ \ + typeof(s) *addr; \ + asm("adrp %0, %1\n" \ + "add %0, %0, :lo12:%1\n" \ + : "=r" (addr) : "S" (&s)); \ + addr; \ + }) + +/* + * Home-grown __this_cpu_{ptr,read} variants that always work at HYP, + * provided that sym is really a *symbol* and not a pointer obtained from + * a data structure. As for SHIFT_PERCPU_PTR(), the creative casting keeps + * sparse quiet. + */ #define __hyp_this_cpu_ptr(sym) \ ({ \ - void *__ptr = hyp_symbol_addr(sym); \ + void *__ptr; \ + __verify_pcpu_ptr(&sym); \ + __ptr = hyp_symbol_addr(sym); \ __ptr += read_sysreg(tpidr_el2); \ - (typeof(&sym))__ptr; \ + (typeof(sym) __kernel __force *)__ptr; \ }) #define __hyp_this_cpu_read(sym) \ @@ -92,6 +169,34 @@ extern char __smccc_workaround_1_smc[__SMCCC_WORKAROUND_1_SMC_SZ]; *__hyp_this_cpu_ptr(sym); \ }) +#define __KVM_EXTABLE(from, to) \ + " .pushsection __kvm_ex_table, \"a\"\n" \ + " .align 3\n" \ + " .long (" #from " - .), (" #to " - .)\n" \ + " .popsection\n" + + +#define __kvm_at(at_op, addr) \ +( { \ + int __kvm_at_err = 0; \ + u64 spsr, elr; \ + asm volatile( \ + " mrs %1, spsr_el2\n" \ + " mrs %2, elr_el2\n" \ + "1: at "at_op", %3\n" \ + " isb\n" \ + " b 9f\n" \ + "2: msr spsr_el2, %1\n" \ + " msr elr_el2, %2\n" \ + " mov %w0, %4\n" \ + "9:\n" \ + __KVM_EXTABLE(1b, 2b) \ + : "+r" (__kvm_at_err), "=&r" (spsr), "=&r" (elr) \ + : "r" (addr), "i" (-EFAULT)); \ + __kvm_at_err; \ +} ) + + #else /* __ASSEMBLY__ */ .macro hyp_adr_this_cpu reg, sym, tmp @@ -114,7 +219,21 @@ extern char __smccc_workaround_1_smc[__SMCCC_WORKAROUND_1_SMC_SZ]; .macro get_vcpu_ptr vcpu, ctxt get_host_ctxt \ctxt, \vcpu ldr \vcpu, [\ctxt, #HOST_CONTEXT_VCPU] - kern_hyp_va \vcpu +.endm + +/* + * KVM extable for unexpected exceptions. + * In the same format _asm_extable, but output to a different section so that + * it can be mapped to EL2. The KVM version is not sorted. The caller must + * ensure: + * x18 has the hypervisor value to allow any Shadow-Call-Stack instrumented + * code to write to it, and that SPSR_EL2 and ELR_EL2 are restored by the fixup. + */ +.macro _kvm_extable, from, to + .pushsection __kvm_ex_table, "a" + .align 3 + .long (\from - .), (\to - .) + .popsection .endm #endif diff --git a/arch/arm64/include/asm/kvm_coproc.h b/arch/arm64/include/asm/kvm_coproc.h index 0185ee8b8b5e..d6bb40122fdb 100644 --- a/arch/arm64/include/asm/kvm_coproc.h +++ b/arch/arm64/include/asm/kvm_coproc.h @@ -19,20 +19,12 @@ struct kvm_sys_reg_table { size_t num; }; -struct kvm_sys_reg_target_table { - struct kvm_sys_reg_table table64; - struct kvm_sys_reg_table table32; -}; - -void kvm_register_target_sys_reg_table(unsigned int target, - struct kvm_sys_reg_target_table *table); - -int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run); -int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run); -int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run); -int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run); -int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run); -int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run); +int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu); +int kvm_handle_cp14_32(struct kvm_vcpu *vcpu); +int kvm_handle_cp14_64(struct kvm_vcpu *vcpu); +int kvm_handle_cp15_32(struct kvm_vcpu *vcpu); +int kvm_handle_cp15_64(struct kvm_vcpu *vcpu); +int kvm_handle_sys_reg(struct kvm_vcpu *vcpu); #define kvm_coproc_table_init kvm_sys_reg_table_init void kvm_sys_reg_table_init(void); diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index a30b4eec7cb4..1cc5f5f72d0b 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -112,12 +112,6 @@ static inline void vcpu_ptrauth_disable(struct kvm_vcpu *vcpu) vcpu->arch.hcr_el2 &= ~(HCR_API | HCR_APK); } -static inline void vcpu_ptrauth_setup_lazy(struct kvm_vcpu *vcpu) -{ - if (vcpu_has_ptrauth(vcpu)) - vcpu_ptrauth_disable(vcpu); -} - static inline unsigned long vcpu_get_vsesr(struct kvm_vcpu *vcpu) { return vcpu->arch.vsesr_el2; @@ -130,33 +124,12 @@ static inline void vcpu_set_vsesr(struct kvm_vcpu *vcpu, u64 vsesr) static __always_inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu) { - return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc; -} - -static inline unsigned long *__vcpu_elr_el1(const struct kvm_vcpu *vcpu) -{ - return (unsigned long *)&vcpu_gp_regs(vcpu)->elr_el1; -} - -static inline unsigned long vcpu_read_elr_el1(const struct kvm_vcpu *vcpu) -{ - if (vcpu->arch.sysregs_loaded_on_cpu) - return read_sysreg_el1(SYS_ELR); - else - return *__vcpu_elr_el1(vcpu); -} - -static inline void vcpu_write_elr_el1(const struct kvm_vcpu *vcpu, unsigned long v) -{ - if (vcpu->arch.sysregs_loaded_on_cpu) - write_sysreg_el1(v, SYS_ELR); - else - *__vcpu_elr_el1(vcpu) = v; + return (unsigned long *)&vcpu_gp_regs(vcpu)->pc; } static __always_inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu) { - return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pstate; + return (unsigned long *)&vcpu_gp_regs(vcpu)->pstate; } static __always_inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu) @@ -185,14 +158,14 @@ static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu) static __always_inline unsigned long vcpu_get_reg(const struct kvm_vcpu *vcpu, u8 reg_num) { - return (reg_num == 31) ? 0 : vcpu_gp_regs(vcpu)->regs.regs[reg_num]; + return (reg_num == 31) ? 0 : vcpu_gp_regs(vcpu)->regs[reg_num]; } static __always_inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num, unsigned long val) { if (reg_num != 31) - vcpu_gp_regs(vcpu)->regs.regs[reg_num] = val; + vcpu_gp_regs(vcpu)->regs[reg_num] = val; } static inline unsigned long vcpu_read_spsr(const struct kvm_vcpu *vcpu) @@ -203,7 +176,7 @@ static inline unsigned long vcpu_read_spsr(const struct kvm_vcpu *vcpu) if (vcpu->arch.sysregs_loaded_on_cpu) return read_sysreg_el1(SYS_SPSR); else - return vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1]; + return __vcpu_sys_reg(vcpu, SPSR_EL1); } static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v) @@ -216,7 +189,7 @@ static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v) if (vcpu->arch.sysregs_loaded_on_cpu) write_sysreg_el1(v, SYS_SPSR); else - vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1] = v; + __vcpu_sys_reg(vcpu, SPSR_EL1) = v; } /* @@ -265,14 +238,14 @@ static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu) return mode != PSR_MODE_EL0t; } -static __always_inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu) +static __always_inline u32 kvm_vcpu_get_esr(const struct kvm_vcpu *vcpu) { return vcpu->arch.fault.esr_el2; } static __always_inline int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu) { - u32 esr = kvm_vcpu_get_hsr(vcpu); + u32 esr = kvm_vcpu_get_esr(vcpu); if (esr & ESR_ELx_CV) return (esr & ESR_ELx_COND_MASK) >> ESR_ELx_COND_SHIFT; @@ -297,64 +270,64 @@ static inline u64 kvm_vcpu_get_disr(const struct kvm_vcpu *vcpu) static inline u32 kvm_vcpu_hvc_get_imm(const struct kvm_vcpu *vcpu) { - return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_xVC_IMM_MASK; + return kvm_vcpu_get_esr(vcpu) & ESR_ELx_xVC_IMM_MASK; } static __always_inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu) { - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_ISV); + return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_ISV); } static inline unsigned long kvm_vcpu_dabt_iss_nisv_sanitized(const struct kvm_vcpu *vcpu) { - return kvm_vcpu_get_hsr(vcpu) & (ESR_ELx_CM | ESR_ELx_WNR | ESR_ELx_FSC); + return kvm_vcpu_get_esr(vcpu) & (ESR_ELx_CM | ESR_ELx_WNR | ESR_ELx_FSC); } static inline bool kvm_vcpu_dabt_issext(const struct kvm_vcpu *vcpu) { - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SSE); + return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_SSE); } static inline bool kvm_vcpu_dabt_issf(const struct kvm_vcpu *vcpu) { - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SF); + return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_SF); } static __always_inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu) { - return (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT; + return (kvm_vcpu_get_esr(vcpu) & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT; } -static __always_inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu) +static __always_inline bool kvm_vcpu_abt_iss1tw(const struct kvm_vcpu *vcpu) { - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_S1PTW); + return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_S1PTW); } +/* Always check for S1PTW *before* using this. */ static __always_inline bool kvm_vcpu_dabt_iswrite(const struct kvm_vcpu *vcpu) { - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WNR) || - kvm_vcpu_dabt_iss1tw(vcpu); /* AF/DBM update */ + return kvm_vcpu_get_esr(vcpu) & ESR_ELx_WNR; } static inline bool kvm_vcpu_dabt_is_cm(const struct kvm_vcpu *vcpu) { - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_CM); + return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_CM); } static __always_inline unsigned int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu) { - return 1 << ((kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT); + return 1 << ((kvm_vcpu_get_esr(vcpu) & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT); } /* This one is not specific to Data Abort */ static __always_inline bool kvm_vcpu_trap_il_is32bit(const struct kvm_vcpu *vcpu) { - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_IL); + return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_IL); } static __always_inline u8 kvm_vcpu_trap_get_class(const struct kvm_vcpu *vcpu) { - return ESR_ELx_EC(kvm_vcpu_get_hsr(vcpu)); + return ESR_ELx_EC(kvm_vcpu_get_esr(vcpu)); } static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu) @@ -362,17 +335,22 @@ static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu) return kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_IABT_LOW; } +static inline bool kvm_vcpu_trap_is_exec_fault(const struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_trap_is_iabt(vcpu) && !kvm_vcpu_abt_iss1tw(vcpu); +} + static __always_inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu) { - return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC; + return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC; } static __always_inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu) { - return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC_TYPE; + return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_TYPE; } -static __always_inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu) +static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu) { switch (kvm_vcpu_trap_get_fault(vcpu)) { case FSC_SEA: @@ -393,12 +371,15 @@ static __always_inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu) static __always_inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu) { - u32 esr = kvm_vcpu_get_hsr(vcpu); + u32 esr = kvm_vcpu_get_esr(vcpu); return ESR_ELx_SYS64_ISS_RT(esr); } static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu) { + if (kvm_vcpu_abt_iss1tw(vcpu)) + return true; + if (kvm_vcpu_trap_is_iabt(vcpu)) return false; @@ -507,10 +488,12 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu, static __always_inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr) { - if (vcpu_mode_is_32bit(vcpu)) + if (vcpu_mode_is_32bit(vcpu)) { kvm_skip_instr32(vcpu, is_wide_instr); - else + } else { *vcpu_pc(vcpu) += 4; + *vcpu_cpsr(vcpu) &= ~PSR_BTYPE_MASK; + } /* advance the singlestep state machine */ *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS; @@ -520,14 +503,14 @@ static __always_inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_i * Skip an instruction which has been emulated at hyp while most guest sysregs * are live. */ -static __always_inline void __hyp_text __kvm_skip_instr(struct kvm_vcpu *vcpu) +static __always_inline void __kvm_skip_instr(struct kvm_vcpu *vcpu) { *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR); - vcpu->arch.ctxt.gp_regs.regs.pstate = read_sysreg_el2(SYS_SPSR); + vcpu_gp_regs(vcpu)->pstate = read_sysreg_el2(SYS_SPSR); kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); - write_sysreg_el2(vcpu->arch.ctxt.gp_regs.regs.pstate, SYS_SPSR); + write_sysreg_el2(vcpu_gp_regs(vcpu)->pstate, SYS_SPSR); write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR); } diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 32c8a675e5a4..905c2b87e05a 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -46,6 +46,9 @@ #define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3) #define KVM_REQ_RELOAD_GICv4 KVM_ARCH_REQ(4) +#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ + KVM_DIRTY_LOG_INITIALLY_SET) + DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use); extern unsigned int kvm_sve_max_vl; @@ -63,19 +66,34 @@ struct kvm_vmid { u32 vmid; }; -struct kvm_arch { +struct kvm_s2_mmu { struct kvm_vmid vmid; - /* stage2 entry level table */ - pgd_t *pgd; - phys_addr_t pgd_phys; - - /* VTCR_EL2 value for this VM */ - u64 vtcr; + /* + * stage2 entry level table + * + * Two kvm_s2_mmu structures in the same VM can point to the same + * pgd here. This happens when running a guest using a + * translation regime that isn't affected by its own stage-2 + * translation, such as a non-VHE hypervisor running at vEL2, or + * for vEL1/EL0 with vHCR_EL2.VM == 0. In that case, we use the + * canonical stage-2 page tables. + */ + pgd_t *pgd; + phys_addr_t pgd_phys; /* The last vcpu id that ran on each physical CPU */ int __percpu *last_vcpu_ran; + struct kvm *kvm; +}; + +struct kvm_arch { + struct kvm_s2_mmu mmu; + + /* VTCR_EL2 value for this VM */ + u64 vtcr; + /* The maximum number of vCPUs depends on the used GIC model */ int max_vcpus; @@ -94,17 +112,6 @@ struct kvm_arch { bool return_nisv_io_abort_to_user; }; -#define KVM_NR_MEM_OBJS 40 - -/* - * We don't want allocation failures within the mmu code, so we preallocate - * enough memory for a single page fault in a cache. - */ -struct kvm_mmu_memory_cache { - int nobjs; - void *objects[KVM_NR_MEM_OBJS]; -}; - struct kvm_vcpu_fault_info { u32 esr_el2; /* Hyp Syndrom Register */ u64 far_el2; /* Hyp Fault Address Register */ @@ -112,12 +119,8 @@ struct kvm_vcpu_fault_info { u64 disr_el1; /* Deferred [SError] Status Register */ }; -/* - * 0 is reserved as an invalid value. - * Order should be kept in sync with the save/restore code. - */ enum vcpu_sysreg { - __INVALID_SYSREG__, + __INVALID_SYSREG__, /* 0 is reserved as an invalid value */ MPIDR_EL1, /* MultiProcessor Affinity Register */ CSSELR_EL1, /* Cache Size Selection Register */ SCTLR_EL1, /* System Control Register */ @@ -171,6 +174,16 @@ enum vcpu_sysreg { APGAKEYLO_EL1, APGAKEYHI_EL1, + ELR_EL1, + SP_EL1, + SPSR_EL1, + + CNTVOFF_EL2, + CNTV_CVAL_EL0, + CNTV_CTL_EL0, + CNTP_CVAL_EL0, + CNTP_CTL_EL0, + /* 32bit specific registers. Keep them at the end of the range */ DACR32_EL2, /* Domain Access Control Register */ IFSR32_EL2, /* Instruction Fault Status Register */ @@ -222,7 +235,15 @@ enum vcpu_sysreg { #define NR_COPRO_REGS (NR_SYS_REGS * 2) struct kvm_cpu_context { - struct kvm_regs gp_regs; + struct user_pt_regs regs; /* sp = sp_el0 */ + + u64 spsr_abt; + u64 spsr_und; + u64 spsr_irq; + u64 spsr_fiq; + + struct user_fpsimd_state fp_regs; + union { u64 sys_regs[NR_SYS_REGS]; u32 copro[NR_COPRO_REGS]; @@ -255,6 +276,9 @@ struct kvm_vcpu_arch { void *sve_state; unsigned int sve_max_vl; + /* Stage 2 paging state used by the hardware on next switch */ + struct kvm_s2_mmu *hw_mmu; + /* HYP configuration */ u64 hcr_el2; u32 mdcr_el2; @@ -285,9 +309,6 @@ struct kvm_vcpu_arch { struct kvm_guest_debug_arch vcpu_debug_state; struct kvm_guest_debug_arch external_debug_state; - /* Pointer to host CPU context */ - struct kvm_cpu_context *host_cpu_context; - struct thread_info *host_thread_info; /* hyp VA */ struct user_fpsimd_state *host_fpsimd_state; /* hyp VA */ @@ -342,12 +363,11 @@ struct kvm_vcpu_arch { struct vcpu_reset_state reset_state; /* True when deferrable sysregs are loaded on the physical CPU, - * see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */ + * see kvm_vcpu_load_sysregs_vhe and kvm_vcpu_put_sysregs_vhe. */ bool sysregs_loaded_on_cpu; /* Guest PV state */ struct { - u64 steal; u64 last_steal; gpa_t base; } steal; @@ -384,19 +404,29 @@ struct kvm_vcpu_arch { #define vcpu_has_sve(vcpu) (system_supports_sve() && \ ((vcpu)->arch.flags & KVM_ARM64_GUEST_HAS_SVE)) -#define vcpu_has_ptrauth(vcpu) ((system_supports_address_auth() || \ - system_supports_generic_auth()) && \ - ((vcpu)->arch.flags & KVM_ARM64_GUEST_HAS_PTRAUTH)) +#ifdef CONFIG_ARM64_PTR_AUTH +#define vcpu_has_ptrauth(vcpu) \ + ((cpus_have_final_cap(ARM64_HAS_ADDRESS_AUTH) || \ + cpus_have_final_cap(ARM64_HAS_GENERIC_AUTH)) && \ + (vcpu)->arch.flags & KVM_ARM64_GUEST_HAS_PTRAUTH) +#else +#define vcpu_has_ptrauth(vcpu) false +#endif -#define vcpu_gp_regs(v) (&(v)->arch.ctxt.gp_regs) +#define vcpu_gp_regs(v) (&(v)->arch.ctxt.regs) /* - * Only use __vcpu_sys_reg if you know you want the memory backed version of a - * register, and not the one most recently accessed by a running VCPU. For - * example, for userspace access or for system registers that are never context - * switched, but only emulated. + * Only use __vcpu_sys_reg/ctxt_sys_reg if you know you want the + * memory backed version of a register, and not the one most recently + * accessed by a running VCPU. For example, for userspace access or + * for system registers that are never context switched, but only + * emulated. */ -#define __vcpu_sys_reg(v,r) ((v)->arch.ctxt.sys_regs[(r)]) +#define __ctxt_sys_reg(c,r) (&(c)->sys_regs[(r)]) + +#define ctxt_sys_reg(c,r) (*__ctxt_sys_reg(c,r)) + +#define __vcpu_sys_reg(v,r) (ctxt_sys_reg(&(v)->arch.ctxt, (r))) u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg); void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg); @@ -405,8 +435,10 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg); * CP14 and CP15 live in the same array, as they are backed by the * same system registers. */ -#define vcpu_cp14(v,r) ((v)->arch.ctxt.copro[(r)]) -#define vcpu_cp15(v,r) ((v)->arch.ctxt.copro[(r)]) +#define CPx_BIAS IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) + +#define vcpu_cp14(v,r) ((v)->arch.ctxt.copro[(r) ^ CPx_BIAS]) +#define vcpu_cp15(v,r) ((v)->arch.ctxt.copro[(r) ^ CPx_BIAS]) struct kvm_vm_stat { ulong remote_tlb_flush; @@ -415,6 +447,8 @@ struct kvm_vm_stat { struct kvm_vcpu_stat { u64 halt_successful_poll; u64 halt_attempted_poll; + u64 halt_poll_success_ns; + u64 halt_poll_fail_ns; u64 halt_poll_invalid; u64 halt_wakeup; u64 hvc_exit_stat; @@ -438,7 +472,7 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, #define KVM_ARCH_WANT_MMU_NOTIFIER int kvm_unmap_hva_range(struct kvm *kvm, - unsigned long start, unsigned long end); + unsigned long start, unsigned long end, unsigned flags); int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); @@ -448,6 +482,18 @@ void kvm_arm_resume_guest(struct kvm *kvm); u64 __kvm_call_hyp(void *hypfn, ...); +#define kvm_call_hyp_nvhe(f, ...) \ + do { \ + DECLARE_KVM_NVHE_SYM(f); \ + __kvm_call_hyp(kvm_ksym_ref_nvhe(f), ##__VA_ARGS__); \ + } while(0) + +#define kvm_call_hyp_nvhe_ret(f, ...) \ + ({ \ + DECLARE_KVM_NVHE_SYM(f); \ + __kvm_call_hyp(kvm_ksym_ref_nvhe(f), ##__VA_ARGS__); \ + }) + /* * The couple of isb() below are there to guarantee the same behaviour * on VHE as on !VHE, where the eret to EL1 acts as a context @@ -459,7 +505,7 @@ u64 __kvm_call_hyp(void *hypfn, ...); f(__VA_ARGS__); \ isb(); \ } else { \ - __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__); \ + kvm_call_hyp_nvhe(f, ##__VA_ARGS__); \ } \ } while(0) @@ -471,8 +517,7 @@ u64 __kvm_call_hyp(void *hypfn, ...); ret = f(__VA_ARGS__); \ isb(); \ } else { \ - ret = __kvm_call_hyp(kvm_ksym_ref(f), \ - ##__VA_ARGS__); \ + ret = kvm_call_hyp_nvhe_ret(f, ##__VA_ARGS__); \ } \ \ ret; \ @@ -481,18 +526,15 @@ u64 __kvm_call_hyp(void *hypfn, ...); void force_vm_exit(const cpumask_t *mask); void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot); -int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, - int exception_index); -void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run, - int exception_index); +int handle_exit(struct kvm_vcpu *vcpu, int exception_index); +void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index); /* MMIO helpers */ void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len); -int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run); -int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, - phys_addr_t fault_ipa); +int kvm_handle_mmio_return(struct kvm_vcpu *vcpu); +int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa); int kvm_perf_init(void); int kvm_perf_teardown(void); @@ -501,6 +543,7 @@ long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu); gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu); void kvm_update_stolen_time(struct kvm_vcpu *vcpu); +bool kvm_arm_pvtime_supported(void); int kvm_arm_pvtime_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); int kvm_arm_pvtime_get_attr(struct kvm_vcpu *vcpu, @@ -527,40 +570,7 @@ DECLARE_PER_CPU(kvm_host_data_t, kvm_host_data); static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt) { /* The host's MPIDR is immutable, so let's set it up at boot time */ - cpu_ctxt->sys_regs[MPIDR_EL1] = read_cpuid_mpidr(); -} - -void __kvm_enable_ssbs(void); - -static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, - unsigned long hyp_stack_ptr, - unsigned long vector_ptr) -{ - /* - * Calculate the raw per-cpu offset without a translation from the - * kernel's mapping to the linear mapping, and store it in tpidr_el2 - * so that we can use adr_l to access per-cpu variables in EL2. - */ - u64 tpidr_el2 = ((u64)this_cpu_ptr(&kvm_host_data) - - (u64)kvm_ksym_ref(kvm_host_data)); - - /* - * Call initialization code, and switch to the full blown HYP code. - * If the cpucaps haven't been finalized yet, something has gone very - * wrong, and hyp will crash and burn when it uses any - * cpus_have_const_cap() wrapper. - */ - BUG_ON(!system_capabilities_finalized()); - __kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr, tpidr_el2); - - /* - * Disabling SSBD on a non-VHE system requires us to enable SSBS - * at EL2. - */ - if (!has_vhe() && this_cpu_has_cap(ARM64_SSBS) && - arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) { - kvm_call_hyp(__kvm_enable_ssbs); - } + ctxt_sys_reg(cpu_ctxt, MPIDR_EL1) = read_cpuid_mpidr(); } static inline bool kvm_arch_requires_vhe(void) @@ -573,10 +583,6 @@ static inline bool kvm_arch_requires_vhe(void) if (system_supports_sve()) return true; - /* Some implementations have defects that confine them to VHE */ - if (cpus_have_cap(ARM64_WORKAROUND_SPECULATIVE_AT_VHE)) - return true; - return false; } @@ -598,8 +604,6 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); -static inline void __cpu_init_stage2(void) {} - /* Guest/host FPSIMD coordination helpers */ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu); @@ -667,10 +671,10 @@ static inline int kvm_arm_have_ssbd(void) } } -void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu); -void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu); +void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu); +void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu); -void kvm_set_ipa_limit(void); +int kvm_set_ipa_limit(void); #define __KVM_HAVE_ARCH_VM_ALLOC struct kvm *kvm_arch_alloc_vm(void); diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index fe57f60f06a8..46689e7db46c 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -10,11 +10,8 @@ #include <linux/compiler.h> #include <linux/kvm_host.h> #include <asm/alternative.h> -#include <asm/kvm_mmu.h> #include <asm/sysreg.h> -#define __hyp_text __section(.hyp.text) notrace - #define read_sysreg_elx(r,nvh,vh) \ ({ \ u64 reg; \ @@ -56,25 +53,28 @@ int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu); -void __vgic_v3_save_state(struct kvm_vcpu *vcpu); -void __vgic_v3_restore_state(struct kvm_vcpu *vcpu); -void __vgic_v3_activate_traps(struct kvm_vcpu *vcpu); -void __vgic_v3_deactivate_traps(struct kvm_vcpu *vcpu); -void __vgic_v3_save_aprs(struct kvm_vcpu *vcpu); -void __vgic_v3_restore_aprs(struct kvm_vcpu *vcpu); +void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if); +void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if); +void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if); +void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if); +void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if); +void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if); int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu); +#ifdef __KVM_NVHE_HYPERVISOR__ void __timer_enable_traps(struct kvm_vcpu *vcpu); void __timer_disable_traps(struct kvm_vcpu *vcpu); +#endif +#ifdef __KVM_NVHE_HYPERVISOR__ void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt); void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt); +#else void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt); void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt); void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt); void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt); -void __sysreg32_save_state(struct kvm_vcpu *vcpu); -void __sysreg32_restore_state(struct kvm_vcpu *vcpu); +#endif void __debug_switch_to_guest(struct kvm_vcpu *vcpu); void __debug_switch_to_host(struct kvm_vcpu *vcpu); @@ -82,28 +82,17 @@ void __debug_switch_to_host(struct kvm_vcpu *vcpu); void __fpsimd_save_state(struct user_fpsimd_state *fp_regs); void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs); +#ifndef __KVM_NVHE_HYPERVISOR__ void activate_traps_vhe_load(struct kvm_vcpu *vcpu); void deactivate_traps_vhe_put(void); +#endif u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt); -void __noreturn __hyp_do_panic(unsigned long, ...); -/* - * Must be called from hyp code running at EL2 with an updated VTTBR - * and interrupts disabled. - */ -static __always_inline void __hyp_text __load_guest_stage2(struct kvm *kvm) -{ - write_sysreg(kvm->arch.vtcr, vtcr_el2); - write_sysreg(kvm_get_vttbr(kvm), vttbr_el2); - - /* - * ARM errata 1165522 and 1530923 require the actual execution of the - * above before we can switch to the EL1/EL0 translation regime used by - * the guest. - */ - asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT_VHE)); -} +void __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt); +#ifdef __KVM_NVHE_HYPERVISOR__ +void __noreturn __hyp_do_panic(unsigned long, ...); +#endif #endif /* __ARM64_KVM_HYP_H__ */ diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 30b0e8d6b895..189839c3706a 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -83,11 +83,11 @@ alternative_cb_end #else +#include <linux/pgtable.h> #include <asm/pgalloc.h> #include <asm/cache.h> #include <asm/cacheflush.h> #include <asm/mmu_context.h> -#include <asm/pgtable.h> void kvm_update_va_mask(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst); @@ -108,26 +108,6 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v) #define kern_hyp_va(v) ((typeof(v))(__kern_hyp_va((unsigned long)(v)))) /* - * Obtain the PC-relative address of a kernel symbol - * s: symbol - * - * The goal of this macro is to return a symbol's address based on a - * PC-relative computation, as opposed to a loading the VA from a - * constant pool or something similar. This works well for HYP, as an - * absolute VA is guaranteed to be wrong. Only use this if trying to - * obtain the address of a symbol (i.e. not something you obtained by - * following a pointer). - */ -#define hyp_symbol_addr(s) \ - ({ \ - typeof(s) *addr; \ - asm("adrp %0, %1\n" \ - "add %0, %0, :lo12:%1\n" \ - : "=r" (addr) : "S" (&s)); \ - addr; \ - }) - -/* * We currently support using a VM-specified IPA size. For backward * compatibility, the default IPA size is fixed to 40bits. */ @@ -154,12 +134,12 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size, void free_hyp_pgds(void); void stage2_unmap_vm(struct kvm *kvm); -int kvm_alloc_stage2_pgd(struct kvm *kvm); -void kvm_free_stage2_pgd(struct kvm *kvm); +int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu); +void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu); int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, phys_addr_t pa, unsigned long size, bool writable); -int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run); +int kvm_handle_guest_abort(struct kvm_vcpu *vcpu); void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu); @@ -172,8 +152,8 @@ void kvm_clear_hyp_idmap(void); __pmd(__phys_to_pmd_val(__pa(ptep)) | PMD_TYPE_TABLE) #define kvm_mk_pud(pmdp) \ __pud(__phys_to_pud_val(__pa(pmdp)) | PMD_TYPE_TABLE) -#define kvm_mk_pgd(pudp) \ - __pgd(__phys_to_pgd_val(__pa(pudp)) | PUD_TYPE_TABLE) +#define kvm_mk_p4d(pmdp) \ + __p4d(__phys_to_p4d_val(__pa(pmdp)) | PUD_TYPE_TABLE) #define kvm_set_pud(pudp, pud) set_pud(pudp, pud) @@ -299,6 +279,12 @@ static inline bool kvm_s2pud_young(pud_t pud) #define hyp_pud_table_empty(pudp) kvm_page_empty(pudp) #endif +#ifdef __PAGETABLE_P4D_FOLDED +#define hyp_p4d_table_empty(p4dp) (0) +#else +#define hyp_p4d_table_empty(p4dp) kvm_page_empty(p4dp) +#endif + struct kvm; #define kvm_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l)) @@ -363,8 +349,6 @@ static inline void __kvm_flush_dcache_pud(pud_t pud) } } -#define kvm_virt_to_phys(x) __pa_symbol(x) - void kvm_set_way_flush(struct kvm_vcpu *vcpu); void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled); @@ -416,7 +400,7 @@ static inline unsigned int kvm_get_vmid_bits(void) { int reg = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); - return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8; + return get_vmid_bits(reg); } /* @@ -473,7 +457,7 @@ static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa, extern void *__kvm_bp_vect_base; extern int __kvm_harden_el2_vector_slot; -/* This is only called on a VHE system */ +/* This is called on both VHE and !VHE systems */ static inline void *kvm_get_hyp_vector(void) { struct bp_hardening_data *data = arm64_get_bp_hardening_data(); @@ -593,16 +577,33 @@ static inline u64 kvm_vttbr_baddr_mask(struct kvm *kvm) return vttbr_baddr_mask(kvm_phys_shift(kvm), kvm_stage2_levels(kvm)); } -static __always_inline u64 kvm_get_vttbr(struct kvm *kvm) +static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu) { - struct kvm_vmid *vmid = &kvm->arch.vmid; + struct kvm_vmid *vmid = &mmu->vmid; u64 vmid_field, baddr; u64 cnp = system_supports_cnp() ? VTTBR_CNP_BIT : 0; - baddr = kvm->arch.pgd_phys; + baddr = mmu->pgd_phys; vmid_field = (u64)vmid->vmid << VTTBR_VMID_SHIFT; return kvm_phys_to_vttbr(baddr) | vmid_field | cnp; } +/* + * Must be called from hyp code running at EL2 with an updated VTTBR + * and interrupts disabled. + */ +static __always_inline void __load_guest_stage2(struct kvm_s2_mmu *mmu) +{ + write_sysreg(kern_hyp_va(mmu->kvm)->arch.vtcr, vtcr_el2); + write_sysreg(kvm_get_vttbr(mmu), vttbr_el2); + + /* + * ARM errata 1165522 and 1530923 require the actual execution of the + * above before we can switch to the EL1/EL0 translation regime used by + * the guest. + */ + asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT)); +} + #endif /* __ASSEMBLY__ */ #endif /* __ARM64_KVM_MMU_H__ */ diff --git a/arch/arm64/include/asm/kvm_ptrauth.h b/arch/arm64/include/asm/kvm_ptrauth.h index 6301813dcace..0ddf98c3ba9f 100644 --- a/arch/arm64/include/asm/kvm_ptrauth.h +++ b/arch/arm64/include/asm/kvm_ptrauth.h @@ -61,44 +61,36 @@ /* * Both ptrauth_switch_to_guest and ptrauth_switch_to_host macros will - * check for the presence of one of the cpufeature flag - * ARM64_HAS_ADDRESS_AUTH_ARCH or ARM64_HAS_ADDRESS_AUTH_IMP_DEF and + * check for the presence ARM64_HAS_ADDRESS_AUTH, which is defined as + * (ARM64_HAS_ADDRESS_AUTH_ARCH || ARM64_HAS_ADDRESS_AUTH_IMP_DEF) and * then proceed ahead with the save/restore of Pointer Authentication - * key registers. + * key registers if enabled for the guest. */ .macro ptrauth_switch_to_guest g_ctxt, reg1, reg2, reg3 -alternative_if ARM64_HAS_ADDRESS_AUTH_ARCH - b 1000f +alternative_if_not ARM64_HAS_ADDRESS_AUTH + b .L__skip_switch\@ alternative_else_nop_endif -alternative_if_not ARM64_HAS_ADDRESS_AUTH_IMP_DEF - b 1001f -alternative_else_nop_endif -1000: - ldr \reg1, [\g_ctxt, #(VCPU_HCR_EL2 - VCPU_CONTEXT)] + mrs \reg1, hcr_el2 and \reg1, \reg1, #(HCR_API | HCR_APK) - cbz \reg1, 1001f + cbz \reg1, .L__skip_switch\@ add \reg1, \g_ctxt, #CPU_APIAKEYLO_EL1 ptrauth_restore_state \reg1, \reg2, \reg3 -1001: +.L__skip_switch\@: .endm .macro ptrauth_switch_to_host g_ctxt, h_ctxt, reg1, reg2, reg3 -alternative_if ARM64_HAS_ADDRESS_AUTH_ARCH - b 2000f -alternative_else_nop_endif -alternative_if_not ARM64_HAS_ADDRESS_AUTH_IMP_DEF - b 2001f +alternative_if_not ARM64_HAS_ADDRESS_AUTH + b .L__skip_switch\@ alternative_else_nop_endif -2000: - ldr \reg1, [\g_ctxt, #(VCPU_HCR_EL2 - VCPU_CONTEXT)] + mrs \reg1, hcr_el2 and \reg1, \reg1, #(HCR_API | HCR_APK) - cbz \reg1, 2001f + cbz \reg1, .L__skip_switch\@ add \reg1, \g_ctxt, #CPU_APIAKEYLO_EL1 ptrauth_save_state \reg1, \reg2, \reg3 add \reg1, \h_ctxt, #CPU_APIAKEYLO_EL1 ptrauth_restore_state \reg1, \reg2, \reg3 isb -2001: +.L__skip_switch\@: .endm #else /* !CONFIG_ARM64_PTR_AUTH */ diff --git a/arch/arm64/include/asm/kvm_types.h b/arch/arm64/include/asm/kvm_types.h new file mode 100644 index 000000000000..9a126b9e2d7c --- /dev/null +++ b/arch/arm64/include/asm/kvm_types.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_ARM64_KVM_TYPES_H +#define _ASM_ARM64_KVM_TYPES_H + +#define KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE 40 + +#endif /* _ASM_ARM64_KVM_TYPES_H */ + diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h index ebee3113a62f..ba89a9af820a 100644 --- a/arch/arm64/include/asm/linkage.h +++ b/arch/arm64/include/asm/linkage.h @@ -4,6 +4,46 @@ #define __ALIGN .align 2 #define __ALIGN_STR ".align 2" +#if defined(CONFIG_ARM64_BTI_KERNEL) && defined(__aarch64__) + +/* + * Since current versions of gas reject the BTI instruction unless we + * set the architecture version to v8.5 we use the hint instruction + * instead. + */ +#define BTI_C hint 34 ; + +/* + * When using in-kernel BTI we need to ensure that PCS-conformant assembly + * functions have suitable annotations. Override SYM_FUNC_START to insert + * a BTI landing pad at the start of everything. + */ +#define SYM_FUNC_START(name) \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) \ + BTI_C + +#define SYM_FUNC_START_NOALIGN(name) \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_NONE) \ + BTI_C + +#define SYM_FUNC_START_LOCAL(name) \ + SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN) \ + BTI_C + +#define SYM_FUNC_START_LOCAL_NOALIGN(name) \ + SYM_START(name, SYM_L_LOCAL, SYM_A_NONE) \ + BTI_C + +#define SYM_FUNC_START_WEAK(name) \ + SYM_START(name, SYM_L_WEAK, SYM_A_ALIGN) \ + BTI_C + +#define SYM_FUNC_START_WEAK_NOALIGN(name) \ + SYM_START(name, SYM_L_WEAK, SYM_A_NONE) \ + BTI_C + +#endif + /* * Annotate a function as position independent, i.e., safe to be called before * the kernel virtual mapping is activated. diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index a1871bb32bb1..afa722504bfd 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -10,11 +10,8 @@ #ifndef __ASM_MEMORY_H #define __ASM_MEMORY_H -#include <linux/compiler.h> #include <linux/const.h> #include <linux/sizes.h> -#include <linux/types.h> -#include <asm/bug.h> #include <asm/page-def.h> /* @@ -157,11 +154,15 @@ #endif #ifndef __ASSEMBLY__ -extern u64 vabits_actual; -#define PAGE_END (_PAGE_END(vabits_actual)) #include <linux/bitops.h> +#include <linux/compiler.h> #include <linux/mmdebug.h> +#include <linux/types.h> +#include <asm/bug.h> + +extern u64 vabits_actual; +#define PAGE_END (_PAGE_END(vabits_actual)) extern s64 physvirt_offset; extern s64 memstart_addr; @@ -322,6 +323,7 @@ static inline void *phys_to_virt(phys_addr_t x) __is_lm_address(__addr) && pfn_valid(virt_to_pfn(__addr)); \ }) +void dump_mem_limit(void); #endif /* !ASSEMBLY */ /* diff --git a/arch/arm64/include/asm/mman.h b/arch/arm64/include/asm/mman.h new file mode 100644 index 000000000000..081ec8de9ea6 --- /dev/null +++ b/arch/arm64/include/asm/mman.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_MMAN_H__ +#define __ASM_MMAN_H__ + +#include <linux/compiler.h> +#include <linux/types.h> +#include <uapi/asm/mman.h> + +static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot, + unsigned long pkey __always_unused) +{ + if (system_supports_bti() && (prot & PROT_BTI)) + return VM_ARM64_BTI; + + return 0; +} +#define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey) + +static inline pgprot_t arch_vm_get_page_prot(unsigned long vm_flags) +{ + return (vm_flags & VM_ARM64_BTI) ? __pgprot(PTE_GP) : __pgprot(0); +} +#define arch_vm_get_page_prot(vm_flags) arch_vm_get_page_prot(vm_flags) + +static inline bool arch_validate_prot(unsigned long prot, + unsigned long addr __always_unused) +{ + unsigned long supported = PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM; + + if (system_supports_bti()) + supported |= PROT_BTI; + + return (prot & ~supported) == 0; +} +#define arch_validate_prot(prot, addr) arch_validate_prot(prot, addr) + +#endif /* ! __ASM_MMAN_H__ */ diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 68140fdd89d6..a7a5ecaa2e83 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -19,6 +19,9 @@ typedef struct { atomic64_t id; +#ifdef CONFIG_COMPAT + void *sigpage; +#endif void *vdso; unsigned long flags; } mm_context_t; @@ -42,13 +45,6 @@ struct bp_hardening_data { bp_hardening_cb_t fn; }; -#if (defined(CONFIG_HARDEN_BRANCH_PREDICTOR) || \ - defined(CONFIG_HARDEN_EL2_VECTORS)) - -extern char __bp_harden_hyp_vecs[]; -extern atomic_t arm64_el2_vector_last_slot; -#endif /* CONFIG_HARDEN_BRANCH_PREDICTOR || CONFIG_HARDEN_EL2_VECTORS */ - #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index ab46187c6300..f2d7537d6f83 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -14,13 +14,13 @@ #include <linux/sched.h> #include <linux/sched/hotplug.h> #include <linux/mm_types.h> +#include <linux/pgtable.h> #include <asm/cacheflush.h> #include <asm/cpufeature.h> #include <asm/proc-fns.h> #include <asm-generic/mm_hooks.h> #include <asm/cputype.h> -#include <asm/pgtable.h> #include <asm/sysreg.h> #include <asm/tlbflush.h> @@ -175,7 +175,7 @@ static inline void cpu_replace_ttbr1(pgd_t *pgdp) * take CPU migration into account. */ #define destroy_context(mm) do { } while(0) -void check_and_switch_context(struct mm_struct *mm, unsigned int cpu); +void check_and_switch_context(struct mm_struct *mm); #define init_new_context(tsk,mm) ({ atomic64_set(&(mm)->context.id, 0); 0; }) @@ -214,8 +214,6 @@ enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) static inline void __switch_mm(struct mm_struct *next) { - unsigned int cpu = smp_processor_id(); - /* * init_mm.pgd does not contain any user mappings and it is always * active for kernel addresses in TTBR1. Just set the reserved TTBR0. @@ -225,7 +223,7 @@ static inline void __switch_mm(struct mm_struct *next) return; } - check_and_switch_context(next, cpu); + check_and_switch_context(next); } static inline void diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h index e7765b62c712..2c2d7dbe8a02 100644 --- a/arch/arm64/include/asm/perf_event.h +++ b/arch/arm64/include/asm/perf_event.h @@ -72,6 +72,13 @@ #define ARMV8_PMUV3_PERFCTR_LL_CACHE_RD 0x36 #define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD 0x37 #define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD 0x38 +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_LMISS_RD 0x39 +#define ARMV8_PMUV3_PERFCTR_OP_RETIRED 0x3A +#define ARMV8_PMUV3_PERFCTR_OP_SPEC 0x3B +#define ARMV8_PMUV3_PERFCTR_STALL 0x3C +#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND 0x3D +#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND 0x3E +#define ARMV8_PMUV3_PERFCTR_STALL_SLOT 0x3F /* Statistical profiling extension microarchitectural events */ #define ARMV8_SPE_PERFCTR_SAMPLE_POP 0x4000 @@ -79,6 +86,26 @@ #define ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE 0x4002 #define ARMV8_SPE_PERFCTR_SAMPLE_COLLISION 0x4003 +/* AMUv1 architecture events */ +#define ARMV8_AMU_PERFCTR_CNT_CYCLES 0x4004 +#define ARMV8_AMU_PERFCTR_STALL_BACKEND_MEM 0x4005 + +/* long-latency read miss events */ +#define ARMV8_PMUV3_PERFCTR_L1I_CACHE_LMISS 0x4006 +#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_LMISS_RD 0x4009 +#define ARMV8_PMUV3_PERFCTR_L2I_CACHE_LMISS 0x400A +#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_LMISS_RD 0x400B + +/* additional latency from alignment events */ +#define ARMV8_PMUV3_PERFCTR_LDST_ALIGN_LAT 0x4020 +#define ARMV8_PMUV3_PERFCTR_LD_ALIGN_LAT 0x4021 +#define ARMV8_PMUV3_PERFCTR_ST_ALIGN_LAT 0x4022 + +/* Armv8.5 Memory Tagging Extension events */ +#define ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED 0x4024 +#define ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_RD 0x4025 +#define ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_WR 0x4026 + /* ARMv8 recommended implementation defined event types */ #define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD 0x40 #define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR 0x41 diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index 172d76fa0245..3c6a7f5988b1 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -13,37 +13,13 @@ #include <asm/cacheflush.h> #include <asm/tlbflush.h> -#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */ +#define __HAVE_ARCH_PGD_FREE +#include <asm-generic/pgalloc.h> #define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t)) #if CONFIG_PGTABLE_LEVELS > 2 -static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) -{ - gfp_t gfp = GFP_PGTABLE_USER; - struct page *page; - - if (mm == &init_mm) - gfp = GFP_PGTABLE_KERNEL; - - page = alloc_page(gfp); - if (!page) - return NULL; - if (!pgtable_pmd_page_ctor(page)) { - __free_page(page); - return NULL; - } - return page_address(page); -} - -static inline void pmd_free(struct mm_struct *mm, pmd_t *pmdp) -{ - BUG_ON((unsigned long)pmdp & (PAGE_SIZE-1)); - pgtable_pmd_page_dtor(virt_to_page(pmdp)); - free_page((unsigned long)pmdp); -} - static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot) { set_pud(pudp, __pud(__phys_to_pud_val(pmdp) | prot)); @@ -62,28 +38,17 @@ static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot) #if CONFIG_PGTABLE_LEVELS > 3 -static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) -{ - return (pud_t *)__get_free_page(GFP_PGTABLE_USER); -} - -static inline void pud_free(struct mm_struct *mm, pud_t *pudp) -{ - BUG_ON((unsigned long)pudp & (PAGE_SIZE-1)); - free_page((unsigned long)pudp); -} - -static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pudp, pgdval_t prot) +static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot) { - set_pgd(pgdp, __pgd(__phys_to_pgd_val(pudp) | prot)); + set_p4d(p4dp, __p4d(__phys_to_p4d_val(pudp) | prot)); } -static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgdp, pud_t *pudp) +static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4dp, pud_t *pudp) { - __pgd_populate(pgdp, __pa(pudp), PUD_TYPE_TABLE); + __p4d_populate(p4dp, __pa(pudp), PUD_TYPE_TABLE); } #else -static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pudp, pgdval_t prot) +static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot) { BUILD_BUG(); } diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 6bf5e650da78..d400a4d9aee2 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -29,7 +29,7 @@ * Size mapped by an entry at level n ( 0 <= n <= 3) * We map (PAGE_SHIFT - 3) at all translation levels and PAGE_SHIFT bits * in the final page. The maximum number of translation levels supported by - * the architecture is 4. Hence, starting at at level n, we have further + * the architecture is 4. Hence, starting at level n, we have further * ((4 - n) - 1) levels of translation excluding the offset within the page. * So, the total number of bits mapped by an entry at level n is : * @@ -82,23 +82,23 @@ * Contiguous page definitions. */ #ifdef CONFIG_ARM64_64K_PAGES -#define CONT_PTE_SHIFT 5 -#define CONT_PMD_SHIFT 5 +#define CONT_PTE_SHIFT (5 + PAGE_SHIFT) +#define CONT_PMD_SHIFT (5 + PMD_SHIFT) #elif defined(CONFIG_ARM64_16K_PAGES) -#define CONT_PTE_SHIFT 7 -#define CONT_PMD_SHIFT 5 +#define CONT_PTE_SHIFT (7 + PAGE_SHIFT) +#define CONT_PMD_SHIFT (5 + PMD_SHIFT) #else -#define CONT_PTE_SHIFT 4 -#define CONT_PMD_SHIFT 4 +#define CONT_PTE_SHIFT (4 + PAGE_SHIFT) +#define CONT_PMD_SHIFT (4 + PMD_SHIFT) #endif -#define CONT_PTES (1 << CONT_PTE_SHIFT) +#define CONT_PTES (1 << (CONT_PTE_SHIFT - PAGE_SHIFT)) #define CONT_PTE_SIZE (CONT_PTES * PAGE_SIZE) #define CONT_PTE_MASK (~(CONT_PTE_SIZE - 1)) -#define CONT_PMDS (1 << CONT_PMD_SHIFT) +#define CONT_PMDS (1 << (CONT_PMD_SHIFT - PMD_SHIFT)) #define CONT_PMD_SIZE (CONT_PMDS * PMD_SIZE) #define CONT_PMD_MASK (~(CONT_PMD_SIZE - 1)) -/* the the numerical offset of the PTE within a range of CONT_PTES */ +/* the numerical offset of the PTE within a range of CONT_PTES */ #define CONT_RANGE_OFFSET(addr) (((addr)>>PAGE_SHIFT)&(CONT_PTES-1)) /* @@ -151,6 +151,7 @@ #define PTE_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */ #define PTE_AF (_AT(pteval_t, 1) << 10) /* Access Flag */ #define PTE_NG (_AT(pteval_t, 1) << 11) /* nG */ +#define PTE_GP (_AT(pteval_t, 1) << 50) /* BTI guarded */ #define PTE_DBM (_AT(pteval_t, 1) << 51) /* Dirty Bit Management */ #define PTE_CONT (_AT(pteval_t, 1) << 52) /* Contiguous range */ #define PTE_PXN (_AT(pteval_t, 1) << 53) /* Privileged XN */ @@ -177,10 +178,12 @@ #define PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[2:1] */ #define PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */ #define PTE_S2_XN (_AT(pteval_t, 2) << 53) /* XN[1:0] */ +#define PTE_S2_SW_RESVD (_AT(pteval_t, 15) << 55) /* Reserved for SW */ #define PMD_S2_RDONLY (_AT(pmdval_t, 1) << 6) /* HAP[2:1] */ #define PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */ #define PMD_S2_XN (_AT(pmdval_t, 2) << 53) /* XN[1:0] */ +#define PMD_S2_SW_RESVD (_AT(pmdval_t, 15) << 55) /* Reserved for SW */ #define PUD_S2_RDONLY (_AT(pudval_t, 1) << 6) /* HAP[2:1] */ #define PUD_S2_RDWR (_AT(pudval_t, 3) << 6) /* HAP[2:1] */ @@ -190,7 +193,6 @@ * Memory Attribute override for Stage-2 (MemAttr[3:0]) */ #define PTE_S2_MEMATTR(t) (_AT(pteval_t, (t)) << 2) -#define PTE_S2_MEMATTR_MASK (_AT(pteval_t, 0xf) << 2) /* * EL2/HYP PTE/PMD definitions @@ -216,6 +218,7 @@ #define TCR_TxSZ(x) (TCR_T0SZ(x) | TCR_T1SZ(x)) #define TCR_TxSZ_WIDTH 6 #define TCR_T0SZ_MASK (((UL(1) << TCR_TxSZ_WIDTH) - 1) << TCR_T0SZ_OFFSET) +#define TCR_T1SZ_MASK (((UL(1) << TCR_TxSZ_WIDTH) - 1) << TCR_T1SZ_OFFSET) #define TCR_EPD0_SHIFT 7 #define TCR_EPD0_MASK (UL(1) << TCR_EPD0_SHIFT) diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 1305e28225fc..4d867c6446c4 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -21,6 +21,7 @@ #ifndef __ASSEMBLY__ +#include <asm/cpufeature.h> #include <asm/pgtable-types.h> extern bool arm64_use_ng_mappings; @@ -31,6 +32,16 @@ extern bool arm64_use_ng_mappings; #define PTE_MAYBE_NG (arm64_use_ng_mappings ? PTE_NG : 0) #define PMD_MAYBE_NG (arm64_use_ng_mappings ? PMD_SECT_NG : 0) +/* + * If we have userspace only BTI we don't want to mark kernel pages + * guarded even if the system does support BTI. + */ +#ifdef CONFIG_ARM64_BTI_KERNEL +#define PTE_MAYBE_GP (system_supports_bti() ? PTE_GP : 0) +#else +#define PTE_MAYBE_GP 0 +#endif + #define PROT_DEFAULT (_PROT_DEFAULT | PTE_MAYBE_NG) #define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_MAYBE_NG) @@ -56,7 +67,7 @@ extern bool arm64_use_ng_mappings; #define PAGE_HYP __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_HYP_XN) #define PAGE_HYP_EXEC __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY) #define PAGE_HYP_RO __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN) -#define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP) +#define PAGE_HYP_DEVICE __pgprot(_PROT_DEFAULT | PTE_ATTRINDX(MT_DEVICE_nGnRE) | PTE_HYP | PTE_HYP_XN) #define PAGE_S2_MEMATTR(attr) \ ({ \ diff --git a/arch/arm64/include/asm/pgtable-types.h b/arch/arm64/include/asm/pgtable-types.h index acb0751a6606..b8f158ae2527 100644 --- a/arch/arm64/include/asm/pgtable-types.h +++ b/arch/arm64/include/asm/pgtable-types.h @@ -14,6 +14,7 @@ typedef u64 pteval_t; typedef u64 pmdval_t; typedef u64 pudval_t; +typedef u64 p4dval_t; typedef u64 pgdval_t; /* @@ -44,13 +45,11 @@ typedef struct { pteval_t pgprot; } pgprot_t; #define __pgprot(x) ((pgprot_t) { (x) } ) #if CONFIG_PGTABLE_LEVELS == 2 -#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> #elif CONFIG_PGTABLE_LEVELS == 3 -#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopud.h> #elif CONFIG_PGTABLE_LEVELS == 4 -#include <asm-generic/5level-fixup.h> +#include <asm-generic/pgtable-nop4d.h> #endif #endif /* __ASM_PGTABLE_TYPES_H */ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 538c85e62f86..d5d3fbe73953 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -40,6 +40,16 @@ extern void __pmd_error(const char *file, int line, unsigned long val); extern void __pud_error(const char *file, int line, unsigned long val); extern void __pgd_error(const char *file, int line, unsigned long val); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE + +/* Set stride and tlb_level in flush_*_tlb_range */ +#define flush_pmd_tlb_range(vma, addr, end) \ + __flush_tlb_range(vma, addr, end, PMD_SIZE, false, 2) +#define flush_pud_tlb_range(vma, addr, end) \ + __flush_tlb_range(vma, addr, end, PUD_SIZE, false, 1) +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + /* * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. @@ -298,6 +308,11 @@ static inline pte_t pgd_pte(pgd_t pgd) return __pte(pgd_val(pgd)); } +static inline pte_t p4d_pte(p4d_t p4d) +{ + return __pte(p4d_val(p4d)); +} + static inline pte_t pud_pte(pud_t pud) { return __pte(pud_val(pud)); @@ -335,7 +350,7 @@ static inline pgprot_t mk_pmd_sect_prot(pgprot_t prot) #ifdef CONFIG_NUMA_BALANCING /* - * See the comment in include/asm-generic/pgtable.h + * See the comment in include/linux/pgtable.h */ static inline int pte_protnone(pte_t pte) { @@ -366,7 +381,7 @@ static inline int pmd_protnone(pmd_t pmd) #define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd))) #define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd))) #define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd))) -#define pmd_mknotpresent(pmd) (__pmd(pmd_val(pmd) & ~PMD_SECT_VALID)) +#define pmd_mkinvalid(pmd) (__pmd(pmd_val(pmd) & ~PMD_SECT_VALID)) #define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd)) @@ -401,12 +416,18 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd) #define set_pmd_at(mm, addr, pmdp, pmd) set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd)) +#define __p4d_to_phys(p4d) __pte_to_phys(p4d_pte(p4d)) +#define __phys_to_p4d_val(phys) __phys_to_pte_val(phys) + #define __pgd_to_phys(pgd) __pte_to_phys(pgd_pte(pgd)) #define __phys_to_pgd_val(phys) __phys_to_pte_val(phys) #define __pgprot_modify(prot,mask,bits) \ __pgprot((pgprot_val(prot) & ~(mask)) | (bits)) +#define pgprot_nx(prot) \ + __pgprot_modify(prot, PTE_MAYBE_GP, PTE_PXN) + /* * Mark the prot value as uncacheable and unbufferable. */ @@ -457,6 +478,7 @@ extern pgd_t init_pg_dir[PTRS_PER_PGD]; extern pgd_t init_pg_end[]; extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; +extern pgd_t idmap_pg_end[]; extern pgd_t tramp_pg_dir[PTRS_PER_PGD]; extern void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd); @@ -494,21 +516,19 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd) return __pmd_to_phys(pmd); } -static inline void pte_unmap(pte_t *pte) { } +static inline unsigned long pmd_page_vaddr(pmd_t pmd) +{ + return (unsigned long)__va(pmd_page_paddr(pmd)); +} /* Find an entry in the third-level page table. */ -#define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) - #define pte_offset_phys(dir,addr) (pmd_page_paddr(READ_ONCE(*(dir))) + pte_index(addr) * sizeof(pte_t)) -#define pte_offset_kernel(dir,addr) ((pte_t *)__va(pte_offset_phys((dir), (addr)))) - -#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) #define pte_set_fixmap(addr) ((pte_t *)set_fixmap_offset(FIX_PTE, addr)) #define pte_set_fixmap_offset(pmd, addr) pte_set_fixmap(pte_offset_phys(pmd, addr)) #define pte_clear_fixmap() clear_fixmap(FIX_PTE) -#define pmd_page(pmd) pfn_to_page(__phys_to_pfn(__pmd_to_phys(pmd))) +#define pmd_page(pmd) phys_to_page(__pmd_to_phys(pmd)) /* use ONLY for statically allocated translation tables */ #define pte_offset_kimg(dir,addr) ((pte_t *)__phys_to_kimg(pte_offset_phys((dir), (addr)))) @@ -556,17 +576,19 @@ static inline phys_addr_t pud_page_paddr(pud_t pud) return __pud_to_phys(pud); } -/* Find an entry in the second-level page table. */ -#define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) +static inline unsigned long pud_page_vaddr(pud_t pud) +{ + return (unsigned long)__va(pud_page_paddr(pud)); +} +/* Find an entry in the second-level page table. */ #define pmd_offset_phys(dir, addr) (pud_page_paddr(READ_ONCE(*(dir))) + pmd_index(addr) * sizeof(pmd_t)) -#define pmd_offset(dir, addr) ((pmd_t *)__va(pmd_offset_phys((dir), (addr)))) #define pmd_set_fixmap(addr) ((pmd_t *)set_fixmap_offset(FIX_PMD, addr)) #define pmd_set_fixmap_offset(pud, addr) pmd_set_fixmap(pmd_offset_phys(pud, addr)) #define pmd_clear_fixmap() clear_fixmap(FIX_PMD) -#define pud_page(pud) pfn_to_page(__phys_to_pfn(__pud_to_phys(pud))) +#define pud_page(pud) phys_to_page(__pud_to_phys(pud)) /* use ONLY for statically allocated translation tables */ #define pmd_offset_kimg(dir,addr) ((pmd_t *)__phys_to_kimg(pmd_offset_phys((dir), (addr)))) @@ -588,49 +610,52 @@ static inline phys_addr_t pud_page_paddr(pud_t pud) #define pud_ERROR(pud) __pud_error(__FILE__, __LINE__, pud_val(pud)) -#define pgd_none(pgd) (!pgd_val(pgd)) -#define pgd_bad(pgd) (!(pgd_val(pgd) & 2)) -#define pgd_present(pgd) (pgd_val(pgd)) +#define p4d_none(p4d) (!p4d_val(p4d)) +#define p4d_bad(p4d) (!(p4d_val(p4d) & 2)) +#define p4d_present(p4d) (p4d_val(p4d)) -static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) +static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) { - if (in_swapper_pgdir(pgdp)) { - set_swapper_pgd(pgdp, pgd); + if (in_swapper_pgdir(p4dp)) { + set_swapper_pgd((pgd_t *)p4dp, __pgd(p4d_val(p4d))); return; } - WRITE_ONCE(*pgdp, pgd); + WRITE_ONCE(*p4dp, p4d); dsb(ishst); isb(); } -static inline void pgd_clear(pgd_t *pgdp) +static inline void p4d_clear(p4d_t *p4dp) { - set_pgd(pgdp, __pgd(0)); + set_p4d(p4dp, __p4d(0)); } -static inline phys_addr_t pgd_page_paddr(pgd_t pgd) +static inline phys_addr_t p4d_page_paddr(p4d_t p4d) { - return __pgd_to_phys(pgd); + return __p4d_to_phys(p4d); } -/* Find an entry in the frst-level page table. */ -#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) +static inline unsigned long p4d_page_vaddr(p4d_t p4d) +{ + return (unsigned long)__va(p4d_page_paddr(p4d)); +} -#define pud_offset_phys(dir, addr) (pgd_page_paddr(READ_ONCE(*(dir))) + pud_index(addr) * sizeof(pud_t)) -#define pud_offset(dir, addr) ((pud_t *)__va(pud_offset_phys((dir), (addr)))) +/* Find an entry in the frst-level page table. */ +#define pud_offset_phys(dir, addr) (p4d_page_paddr(READ_ONCE(*(dir))) + pud_index(addr) * sizeof(pud_t)) #define pud_set_fixmap(addr) ((pud_t *)set_fixmap_offset(FIX_PUD, addr)) -#define pud_set_fixmap_offset(pgd, addr) pud_set_fixmap(pud_offset_phys(pgd, addr)) +#define pud_set_fixmap_offset(p4d, addr) pud_set_fixmap(pud_offset_phys(p4d, addr)) #define pud_clear_fixmap() clear_fixmap(FIX_PUD) -#define pgd_page(pgd) pfn_to_page(__phys_to_pfn(__pgd_to_phys(pgd))) +#define p4d_page(p4d) pfn_to_page(__phys_to_pfn(__p4d_to_phys(p4d))) /* use ONLY for statically allocated translation tables */ #define pud_offset_kimg(dir,addr) ((pud_t *)__phys_to_kimg(pud_offset_phys((dir), (addr)))) #else +#define p4d_page_paddr(p4d) ({ BUILD_BUG(); 0;}) #define pgd_page_paddr(pgd) ({ BUILD_BUG(); 0;}) /* Match pud_offset folding in <asm/generic/pgtable-nopud.h> */ @@ -644,23 +669,13 @@ static inline phys_addr_t pgd_page_paddr(pgd_t pgd) #define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd)) -/* to find an entry in a page-table-directory */ -#define pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) - -#define pgd_offset_raw(pgd, addr) ((pgd) + pgd_index(addr)) - -#define pgd_offset(mm, addr) (pgd_offset_raw((mm)->pgd, (addr))) - -/* to find an entry in a kernel page-table-directory */ -#define pgd_offset_k(addr) pgd_offset(&init_mm, addr) - #define pgd_set_fixmap(addr) ((pgd_t *)set_fixmap_offset(FIX_PGD, addr)) #define pgd_clear_fixmap() clear_fixmap(FIX_PGD) static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY | - PTE_PROT_NONE | PTE_VALID | PTE_WRITE; + PTE_PROT_NONE | PTE_VALID | PTE_WRITE | PTE_GP; /* preserve the hardware dirty information */ if (pte_hw_dirty(pte)) pte = pte_mkdirty(pte); @@ -840,8 +855,6 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, extern int kern_addr_valid(unsigned long addr); -#include <asm-generic/pgtable.h> - /* * On AArch64, the cache coherency is handled via the set_pte_at() function. */ diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index bf57308fcd63..966ed30ed5f7 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -27,7 +27,7 @@ * * Some code sections either automatically switch back to PSR.I or explicitly * require to not use priority masking. If bit GIC_PRIO_PSR_I_SET is included - * in the the priority mask, it indicates that PSR.I should be set and + * in the priority mask, it indicates that PSR.I should be set and * interrupt disabling temporarily does not rely on IRQ priorities. */ #define GIC_PRIO_IRQON 0xe0 @@ -35,6 +35,7 @@ #define GIC_PRIO_PSR_I_SET (1 << 4) /* Additional SPSR bits not exposed in the UABI */ +#define PSR_MODE_THREAD_BIT (1 << 0) #define PSR_IL_BIT (1 << 20) /* AArch32-specific ptrace requests */ diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h new file mode 100644 index 000000000000..eaa2cd92e4c1 --- /dev/null +++ b/arch/arm64/include/asm/scs.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_SCS_H +#define _ASM_SCS_H + +#ifdef __ASSEMBLY__ + +#include <asm/asm-offsets.h> + +#ifdef CONFIG_SHADOW_CALL_STACK + scs_sp .req x18 + + .macro scs_load tsk, tmp + ldr scs_sp, [\tsk, #TSK_TI_SCS_SP] + .endm + + .macro scs_save tsk, tmp + str scs_sp, [\tsk, #TSK_TI_SCS_SP] + .endm +#else + .macro scs_load tsk, tmp + .endm + + .macro scs_save tsk, tmp + .endm +#endif /* CONFIG_SHADOW_CALL_STACK */ + +#endif /* __ASSEMBLY __ */ + +#endif /* _ASM_SCS_H */ diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index 40d5ba029615..0eadbf933e35 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -23,14 +23,6 @@ #define CPU_STUCK_REASON_52_BIT_VA (UL(1) << CPU_STUCK_REASON_SHIFT) #define CPU_STUCK_REASON_NO_GRAN (UL(2) << CPU_STUCK_REASON_SHIFT) -/* Possible options for __cpu_setup */ -/* Option to setup primary cpu */ -#define ARM64_CPU_BOOT_PRIMARY (1) -/* Option to setup secondary cpus */ -#define ARM64_CPU_BOOT_SECONDARY (2) -/* Option to setup cpus for different cpu run time services */ -#define ARM64_CPU_RUNTIME (3) - #ifndef __ASSEMBLY__ #include <asm/percpu.h> @@ -38,7 +30,6 @@ #include <linux/threads.h> #include <linux/cpumask.h> #include <linux/thread_info.h> -#include <asm/pointer_auth.h> DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number); @@ -55,7 +46,12 @@ DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number); * Logical CPU mapping. */ extern u64 __cpu_logical_map[NR_CPUS]; -#define cpu_logical_map(cpu) __cpu_logical_map[cpu] +extern u64 cpu_logical_map(int cpu); + +static inline void set_cpu_logical_map(int cpu, u64 hwid) +{ + __cpu_logical_map[cpu] = hwid; +} struct seq_file; @@ -96,9 +92,6 @@ asmlinkage void secondary_start_kernel(void); struct secondary_data { void *stack; struct task_struct *task; -#ifdef CONFIG_ARM64_PTR_AUTH - struct ptrauth_keys_kernel ptrauth_key; -#endif long status; }; diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index 4d9b1f48dc39..fc7613023c19 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -64,16 +64,15 @@ struct stackframe { extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame); extern void walk_stackframe(struct task_struct *tsk, struct stackframe *frame, int (*fn)(struct stackframe *, void *), void *data); -extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk); +extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk, + const char *loglvl); DECLARE_PER_CPU(unsigned long *, irq_stack_ptr); -static inline bool on_irq_stack(unsigned long sp, +static inline bool on_stack(unsigned long sp, unsigned long low, + unsigned long high, enum stack_type type, struct stack_info *info) { - unsigned long low = (unsigned long)raw_cpu_read(irq_stack_ptr); - unsigned long high = low + IRQ_STACK_SIZE; - if (!low) return false; @@ -83,12 +82,20 @@ static inline bool on_irq_stack(unsigned long sp, if (info) { info->low = low; info->high = high; - info->type = STACK_TYPE_IRQ; + info->type = type; } - return true; } +static inline bool on_irq_stack(unsigned long sp, + struct stack_info *info) +{ + unsigned long low = (unsigned long)raw_cpu_read(irq_stack_ptr); + unsigned long high = low + IRQ_STACK_SIZE; + + return on_stack(sp, low, high, STACK_TYPE_IRQ, info); +} + static inline bool on_task_stack(const struct task_struct *tsk, unsigned long sp, struct stack_info *info) @@ -96,16 +103,7 @@ static inline bool on_task_stack(const struct task_struct *tsk, unsigned long low = (unsigned long)task_stack_page(tsk); unsigned long high = low + THREAD_SIZE; - if (sp < low || sp >= high) - return false; - - if (info) { - info->low = low; - info->high = high; - info->type = STACK_TYPE_TASK; - } - - return true; + return on_stack(sp, low, high, STACK_TYPE_TASK, info); } #ifdef CONFIG_VMAP_STACK @@ -117,16 +115,7 @@ static inline bool on_overflow_stack(unsigned long sp, unsigned long low = (unsigned long)raw_cpu_ptr(overflow_stack); unsigned long high = low + OVERFLOW_STACK_SIZE; - if (sp < low || sp >= high) - return false; - - if (info) { - info->low = low; - info->high = high; - info->type = STACK_TYPE_OVERFLOW; - } - - return true; + return on_stack(sp, low, high, STACK_TYPE_OVERFLOW, info); } #else static inline bool on_overflow_stack(unsigned long sp, diff --git a/arch/arm64/include/asm/stage2_pgtable.h b/arch/arm64/include/asm/stage2_pgtable.h index 326aac658b9d..996bf98f0cab 100644 --- a/arch/arm64/include/asm/stage2_pgtable.h +++ b/arch/arm64/include/asm/stage2_pgtable.h @@ -9,7 +9,7 @@ #define __ARM64_S2_PGTABLE_H_ #include <linux/hugetlb.h> -#include <asm/pgtable.h> +#include <linux/pgtable.h> /* * PGDIR_SHIFT determines the size a top-level page table entry can map @@ -68,41 +68,67 @@ static inline bool kvm_stage2_has_pud(struct kvm *kvm) #define S2_PUD_SIZE (1UL << S2_PUD_SHIFT) #define S2_PUD_MASK (~(S2_PUD_SIZE - 1)) -static inline bool stage2_pgd_none(struct kvm *kvm, pgd_t pgd) +#define stage2_pgd_none(kvm, pgd) pgd_none(pgd) +#define stage2_pgd_clear(kvm, pgd) pgd_clear(pgd) +#define stage2_pgd_present(kvm, pgd) pgd_present(pgd) +#define stage2_pgd_populate(kvm, pgd, p4d) pgd_populate(NULL, pgd, p4d) + +static inline p4d_t *stage2_p4d_offset(struct kvm *kvm, + pgd_t *pgd, unsigned long address) +{ + return p4d_offset(pgd, address); +} + +static inline void stage2_p4d_free(struct kvm *kvm, p4d_t *p4d) +{ +} + +static inline bool stage2_p4d_table_empty(struct kvm *kvm, p4d_t *p4dp) +{ + return false; +} + +static inline phys_addr_t stage2_p4d_addr_end(struct kvm *kvm, + phys_addr_t addr, phys_addr_t end) +{ + return end; +} + +static inline bool stage2_p4d_none(struct kvm *kvm, p4d_t p4d) { if (kvm_stage2_has_pud(kvm)) - return pgd_none(pgd); + return p4d_none(p4d); else return 0; } -static inline void stage2_pgd_clear(struct kvm *kvm, pgd_t *pgdp) +static inline void stage2_p4d_clear(struct kvm *kvm, p4d_t *p4dp) { if (kvm_stage2_has_pud(kvm)) - pgd_clear(pgdp); + p4d_clear(p4dp); } -static inline bool stage2_pgd_present(struct kvm *kvm, pgd_t pgd) +static inline bool stage2_p4d_present(struct kvm *kvm, p4d_t p4d) { if (kvm_stage2_has_pud(kvm)) - return pgd_present(pgd); + return p4d_present(p4d); else return 1; } -static inline void stage2_pgd_populate(struct kvm *kvm, pgd_t *pgd, pud_t *pud) +static inline void stage2_p4d_populate(struct kvm *kvm, p4d_t *p4d, pud_t *pud) { if (kvm_stage2_has_pud(kvm)) - pgd_populate(NULL, pgd, pud); + p4d_populate(NULL, p4d, pud); } static inline pud_t *stage2_pud_offset(struct kvm *kvm, - pgd_t *pgd, unsigned long address) + p4d_t *p4d, unsigned long address) { if (kvm_stage2_has_pud(kvm)) - return pud_offset(pgd, address); + return pud_offset(p4d, address); else - return (pud_t *)pgd; + return (pud_t *)p4d; } static inline void stage2_pud_free(struct kvm *kvm, pud_t *pud) @@ -230,4 +256,13 @@ stage2_pgd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) return (boundary - 1 < end - 1) ? boundary : end; } +/* + * Level values for the ARMv8.4-TTL extension, mapping PUD/PMD/PTE and + * the architectural page-table level. + */ +#define S2_NO_LEVEL_HINT 0 +#define S2_PUD_LEVEL 1 +#define S2_PMD_LEVEL 2 +#define S2_PTE_LEVEL 3 + #endif /* __ARM64_S2_PGTABLE_H_ */ diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h index 8939c87c4dce..0cde2f473971 100644 --- a/arch/arm64/include/asm/suspend.h +++ b/arch/arm64/include/asm/suspend.h @@ -2,7 +2,7 @@ #ifndef __ASM_SUSPEND_H #define __ASM_SUSPEND_H -#define NR_CTX_REGS 12 +#define NR_CTX_REGS 13 #define NR_CALLEE_SAVED_REGS 12 /* diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h index 65299a2dcf9c..cfc0672013f6 100644 --- a/arch/arm64/include/asm/syscall.h +++ b/arch/arm64/include/asm/syscall.h @@ -34,6 +34,10 @@ static inline long syscall_get_error(struct task_struct *task, struct pt_regs *regs) { unsigned long error = regs->regs[0]; + + if (is_compat_thread(task_thread_info(task))) + error = sign_extend64(error, 31); + return IS_ERR_VALUE(error) ? error : 0; } @@ -47,7 +51,13 @@ static inline void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, int error, long val) { - regs->regs[0] = (long) error ? error : val; + if (error) + val = error; + + if (is_compat_thread(task_thread_info(task))) + val = lower_32_bits(val); + + regs->regs[0] = val; } #define SYSCALL_MAX_ARGS 6 diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index c4ac0ac25a00..554a7e8ecb07 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -105,6 +105,10 @@ #define SYS_DC_CSW sys_insn(1, 0, 7, 10, 2) #define SYS_DC_CISW sys_insn(1, 0, 7, 14, 2) +/* + * System registers, organised loosely by encoding but grouped together + * where the architected name contains an index. e.g. ID_MMFR<n>_EL1. + */ #define SYS_OSDTRRX_EL1 sys_reg(2, 0, 0, 0, 2) #define SYS_MDCCINT_EL1 sys_reg(2, 0, 0, 2, 0) #define SYS_MDSCR_EL1 sys_reg(2, 0, 0, 2, 2) @@ -134,12 +138,16 @@ #define SYS_ID_PFR0_EL1 sys_reg(3, 0, 0, 1, 0) #define SYS_ID_PFR1_EL1 sys_reg(3, 0, 0, 1, 1) +#define SYS_ID_PFR2_EL1 sys_reg(3, 0, 0, 3, 4) #define SYS_ID_DFR0_EL1 sys_reg(3, 0, 0, 1, 2) +#define SYS_ID_DFR1_EL1 sys_reg(3, 0, 0, 3, 5) #define SYS_ID_AFR0_EL1 sys_reg(3, 0, 0, 1, 3) #define SYS_ID_MMFR0_EL1 sys_reg(3, 0, 0, 1, 4) #define SYS_ID_MMFR1_EL1 sys_reg(3, 0, 0, 1, 5) #define SYS_ID_MMFR2_EL1 sys_reg(3, 0, 0, 1, 6) #define SYS_ID_MMFR3_EL1 sys_reg(3, 0, 0, 1, 7) +#define SYS_ID_MMFR4_EL1 sys_reg(3, 0, 0, 2, 6) +#define SYS_ID_MMFR5_EL1 sys_reg(3, 0, 0, 3, 6) #define SYS_ID_ISAR0_EL1 sys_reg(3, 0, 0, 2, 0) #define SYS_ID_ISAR1_EL1 sys_reg(3, 0, 0, 2, 1) @@ -147,7 +155,6 @@ #define SYS_ID_ISAR3_EL1 sys_reg(3, 0, 0, 2, 3) #define SYS_ID_ISAR4_EL1 sys_reg(3, 0, 0, 2, 4) #define SYS_ID_ISAR5_EL1 sys_reg(3, 0, 0, 2, 5) -#define SYS_ID_MMFR4_EL1 sys_reg(3, 0, 0, 2, 6) #define SYS_ID_ISAR6_EL1 sys_reg(3, 0, 0, 2, 7) #define SYS_MVFR0_EL1 sys_reg(3, 0, 0, 3, 0) @@ -414,9 +421,9 @@ */ #define SYS_AMEVCNTR0_EL0(n) SYS_AM_EL0(4 + ((n) >> 3), (n) & 7) -#define SYS_AMEVTYPE0_EL0(n) SYS_AM_EL0(6 + ((n) >> 3), (n) & 7) +#define SYS_AMEVTYPER0_EL0(n) SYS_AM_EL0(6 + ((n) >> 3), (n) & 7) #define SYS_AMEVCNTR1_EL0(n) SYS_AM_EL0(12 + ((n) >> 3), (n) & 7) -#define SYS_AMEVTYPE1_EL0(n) SYS_AM_EL0(14 + ((n) >> 3), (n) & 7) +#define SYS_AMEVTYPER1_EL0(n) SYS_AM_EL0(14 + ((n) >> 3), (n) & 7) /* AMU v1: Fixed (architecturally defined) activity monitors */ #define SYS_AMEVCNTR0_CORE_EL0 SYS_AMEVCNTR0_EL0(0) @@ -552,6 +559,8 @@ #endif /* SCTLR_EL1 specific flags. */ +#define SCTLR_EL1_BT1 (BIT(36)) +#define SCTLR_EL1_BT0 (BIT(35)) #define SCTLR_EL1_UCI (BIT(26)) #define SCTLR_EL1_E0E (BIT(24)) #define SCTLR_EL1_SPAN (BIT(23)) @@ -594,6 +603,7 @@ /* id_aa64isar0 */ #define ID_AA64ISAR0_RNDR_SHIFT 60 +#define ID_AA64ISAR0_TLB_SHIFT 56 #define ID_AA64ISAR0_TS_SHIFT 52 #define ID_AA64ISAR0_FHM_SHIFT 48 #define ID_AA64ISAR0_DP_SHIFT 44 @@ -607,6 +617,9 @@ #define ID_AA64ISAR0_SHA1_SHIFT 8 #define ID_AA64ISAR0_AES_SHIFT 4 +#define ID_AA64ISAR0_TLB_RANGE_NI 0x0 +#define ID_AA64ISAR0_TLB_RANGE 0x2 + /* id_aa64isar1 */ #define ID_AA64ISAR1_I8MM_SHIFT 52 #define ID_AA64ISAR1_DGH_SHIFT 48 @@ -637,6 +650,8 @@ #define ID_AA64PFR0_CSV2_SHIFT 56 #define ID_AA64PFR0_DIT_SHIFT 48 #define ID_AA64PFR0_AMU_SHIFT 44 +#define ID_AA64PFR0_MPAM_SHIFT 40 +#define ID_AA64PFR0_SEL2_SHIFT 36 #define ID_AA64PFR0_SVE_SHIFT 32 #define ID_AA64PFR0_RAS_SHIFT 28 #define ID_AA64PFR0_GIC_SHIFT 24 @@ -655,15 +670,21 @@ #define ID_AA64PFR0_ASIMD_NI 0xf #define ID_AA64PFR0_ASIMD_SUPPORTED 0x0 #define ID_AA64PFR0_EL1_64BIT_ONLY 0x1 +#define ID_AA64PFR0_EL1_32BIT_64BIT 0x2 #define ID_AA64PFR0_EL0_64BIT_ONLY 0x1 #define ID_AA64PFR0_EL0_32BIT_64BIT 0x2 /* id_aa64pfr1 */ +#define ID_AA64PFR1_MPAMFRAC_SHIFT 16 +#define ID_AA64PFR1_RASFRAC_SHIFT 12 +#define ID_AA64PFR1_MTE_SHIFT 8 #define ID_AA64PFR1_SSBS_SHIFT 4 +#define ID_AA64PFR1_BT_SHIFT 0 #define ID_AA64PFR1_SSBS_PSTATE_NI 0 #define ID_AA64PFR1_SSBS_PSTATE_ONLY 1 #define ID_AA64PFR1_SSBS_PSTATE_INSNS 2 +#define ID_AA64PFR1_BT_BTI 0x1 /* id_aa64zfr0 */ #define ID_AA64ZFR0_F64MM_SHIFT 56 @@ -688,6 +709,12 @@ #define ID_AA64ZFR0_SVEVER_SVE2 0x1 /* id_aa64mmfr0 */ +#define ID_AA64MMFR0_ECV_SHIFT 60 +#define ID_AA64MMFR0_FGT_SHIFT 56 +#define ID_AA64MMFR0_EXS_SHIFT 44 +#define ID_AA64MMFR0_TGRAN4_2_SHIFT 40 +#define ID_AA64MMFR0_TGRAN64_2_SHIFT 36 +#define ID_AA64MMFR0_TGRAN16_2_SHIFT 32 #define ID_AA64MMFR0_TGRAN4_SHIFT 28 #define ID_AA64MMFR0_TGRAN64_SHIFT 24 #define ID_AA64MMFR0_TGRAN16_SHIFT 20 @@ -713,6 +740,10 @@ #endif /* id_aa64mmfr1 */ +#define ID_AA64MMFR1_ETS_SHIFT 36 +#define ID_AA64MMFR1_TWED_SHIFT 32 +#define ID_AA64MMFR1_XNX_SHIFT 28 +#define ID_AA64MMFR1_SPECSEI_SHIFT 24 #define ID_AA64MMFR1_PAN_SHIFT 20 #define ID_AA64MMFR1_LOR_SHIFT 16 #define ID_AA64MMFR1_HPD_SHIFT 12 @@ -725,8 +756,15 @@ /* id_aa64mmfr2 */ #define ID_AA64MMFR2_E0PD_SHIFT 60 +#define ID_AA64MMFR2_EVT_SHIFT 56 +#define ID_AA64MMFR2_BBM_SHIFT 52 +#define ID_AA64MMFR2_TTL_SHIFT 48 #define ID_AA64MMFR2_FWB_SHIFT 40 +#define ID_AA64MMFR2_IDS_SHIFT 36 #define ID_AA64MMFR2_AT_SHIFT 32 +#define ID_AA64MMFR2_ST_SHIFT 28 +#define ID_AA64MMFR2_NV_SHIFT 24 +#define ID_AA64MMFR2_CCIDX_SHIFT 20 #define ID_AA64MMFR2_LVA_SHIFT 16 #define ID_AA64MMFR2_IESB_SHIFT 12 #define ID_AA64MMFR2_LSM_SHIFT 8 @@ -734,6 +772,7 @@ #define ID_AA64MMFR2_CNP_SHIFT 0 /* id_aa64dfr0 */ +#define ID_AA64DFR0_DOUBLELOCK_SHIFT 36 #define ID_AA64DFR0_PMSVER_SHIFT 32 #define ID_AA64DFR0_CTX_CMPS_SHIFT 28 #define ID_AA64DFR0_WRPS_SHIFT 20 @@ -752,6 +791,25 @@ #define ID_DFR0_PERFMON_8_1 0x4 +#define ID_ISAR4_SWP_FRAC_SHIFT 28 +#define ID_ISAR4_PSR_M_SHIFT 24 +#define ID_ISAR4_SYNCH_PRIM_FRAC_SHIFT 20 +#define ID_ISAR4_BARRIER_SHIFT 16 +#define ID_ISAR4_SMC_SHIFT 12 +#define ID_ISAR4_WRITEBACK_SHIFT 8 +#define ID_ISAR4_WITHSHIFTS_SHIFT 4 +#define ID_ISAR4_UNPRIV_SHIFT 0 + +#define ID_DFR1_MTPMU_SHIFT 0 + +#define ID_ISAR0_DIVIDE_SHIFT 24 +#define ID_ISAR0_DEBUG_SHIFT 20 +#define ID_ISAR0_COPROC_SHIFT 16 +#define ID_ISAR0_CMPBRANCH_SHIFT 12 +#define ID_ISAR0_BITFIELD_SHIFT 8 +#define ID_ISAR0_BITCOUNT_SHIFT 4 +#define ID_ISAR0_SWAP_SHIFT 0 + #define ID_ISAR5_RDM_SHIFT 24 #define ID_ISAR5_CRC32_SHIFT 16 #define ID_ISAR5_SHA2_SHIFT 12 @@ -767,6 +825,44 @@ #define ID_ISAR6_DP_SHIFT 4 #define ID_ISAR6_JSCVT_SHIFT 0 +#define ID_MMFR0_INNERSHR_SHIFT 28 +#define ID_MMFR0_FCSE_SHIFT 24 +#define ID_MMFR0_AUXREG_SHIFT 20 +#define ID_MMFR0_TCM_SHIFT 16 +#define ID_MMFR0_SHARELVL_SHIFT 12 +#define ID_MMFR0_OUTERSHR_SHIFT 8 +#define ID_MMFR0_PMSA_SHIFT 4 +#define ID_MMFR0_VMSA_SHIFT 0 + +#define ID_MMFR4_EVT_SHIFT 28 +#define ID_MMFR4_CCIDX_SHIFT 24 +#define ID_MMFR4_LSM_SHIFT 20 +#define ID_MMFR4_HPDS_SHIFT 16 +#define ID_MMFR4_CNP_SHIFT 12 +#define ID_MMFR4_XNX_SHIFT 8 +#define ID_MMFR4_AC2_SHIFT 4 +#define ID_MMFR4_SPECSEI_SHIFT 0 + +#define ID_MMFR5_ETS_SHIFT 0 + +#define ID_PFR0_DIT_SHIFT 24 +#define ID_PFR0_CSV2_SHIFT 16 +#define ID_PFR0_STATE3_SHIFT 12 +#define ID_PFR0_STATE2_SHIFT 8 +#define ID_PFR0_STATE1_SHIFT 4 +#define ID_PFR0_STATE0_SHIFT 0 + +#define ID_DFR0_PERFMON_SHIFT 24 +#define ID_DFR0_MPROFDBG_SHIFT 20 +#define ID_DFR0_MMAPTRC_SHIFT 16 +#define ID_DFR0_COPTRC_SHIFT 12 +#define ID_DFR0_MMAPDBG_SHIFT 8 +#define ID_DFR0_COPSDBG_SHIFT 4 +#define ID_DFR0_COPDBG_SHIFT 0 + +#define ID_PFR2_SSBS_SHIFT 4 +#define ID_PFR2_CSV3_SHIFT 0 + #define MVFR0_FPROUND_SHIFT 28 #define MVFR0_FPSHVEC_SHIFT 24 #define MVFR0_FPSQRT_SHIFT 20 @@ -785,17 +881,14 @@ #define MVFR1_FPDNAN_SHIFT 4 #define MVFR1_FPFTZ_SHIFT 0 - -#define ID_AA64MMFR0_TGRAN4_SHIFT 28 -#define ID_AA64MMFR0_TGRAN64_SHIFT 24 -#define ID_AA64MMFR0_TGRAN16_SHIFT 20 - -#define ID_AA64MMFR0_TGRAN4_NI 0xf -#define ID_AA64MMFR0_TGRAN4_SUPPORTED 0x0 -#define ID_AA64MMFR0_TGRAN64_NI 0xf -#define ID_AA64MMFR0_TGRAN64_SUPPORTED 0x0 -#define ID_AA64MMFR0_TGRAN16_NI 0x0 -#define ID_AA64MMFR0_TGRAN16_SUPPORTED 0x1 +#define ID_PFR1_GIC_SHIFT 28 +#define ID_PFR1_VIRT_FRAC_SHIFT 24 +#define ID_PFR1_SEC_FRAC_SHIFT 20 +#define ID_PFR1_GENTIMER_SHIFT 16 +#define ID_PFR1_VIRTUALIZATION_SHIFT 12 +#define ID_PFR1_MPROGMOD_SHIFT 8 +#define ID_PFR1_SECURITY_SHIFT 4 +#define ID_PFR1_PROGMOD_SHIFT 0 #if defined(CONFIG_ARM64_4K_PAGES) #define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN4_SHIFT @@ -808,6 +901,11 @@ #define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN64_SUPPORTED #endif +#define MVFR2_FPMISC_SHIFT 4 +#define MVFR2_SIMDMISC_SHIFT 0 + +#define DCZID_DZP_SHIFT 4 +#define DCZID_BS_SHIFT 0 /* * The ZCR_ELx_LEN_* definitions intentionally include bits [8:4] which diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 512174a8e789..5e784e16ee89 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -41,6 +41,10 @@ struct thread_info { #endif } preempt; }; +#ifdef CONFIG_SHADOW_CALL_STACK + void *scs_base; + void *scs_sp; +#endif }; #define thread_saved_pc(tsk) \ @@ -89,6 +93,7 @@ void arch_release_task_struct(struct task_struct *tsk); #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_FSCHECK (1 << TIF_FSCHECK) +#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) #define _TIF_32BIT (1 << TIF_32BIT) #define _TIF_SVE (1 << TIF_SVE) @@ -100,11 +105,20 @@ void arch_release_task_struct(struct task_struct *tsk); _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ _TIF_SYSCALL_EMU) +#ifdef CONFIG_SHADOW_CALL_STACK +#define INIT_SCS \ + .scs_base = init_shadow_call_stack, \ + .scs_sp = init_shadow_call_stack, +#else +#define INIT_SCS +#endif + #define INIT_THREAD_INFO(tsk) \ { \ .flags = _TIF_FOREIGN_FPSTATE, \ .preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ + INIT_SCS \ } #endif /* __ASM_THREAD_INFO_H */ diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index b76df828e6b7..61c97d3b58c7 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -21,11 +21,37 @@ static void tlb_flush(struct mmu_gather *tlb); #include <asm-generic/tlb.h> +/* + * get the tlbi levels in arm64. Default value is 0 if more than one + * of cleared_* is set or neither is set. + * Arm64 doesn't support p4ds now. + */ +static inline int tlb_get_level(struct mmu_gather *tlb) +{ + if (tlb->cleared_ptes && !(tlb->cleared_pmds || + tlb->cleared_puds || + tlb->cleared_p4ds)) + return 3; + + if (tlb->cleared_pmds && !(tlb->cleared_ptes || + tlb->cleared_puds || + tlb->cleared_p4ds)) + return 2; + + if (tlb->cleared_puds && !(tlb->cleared_ptes || + tlb->cleared_pmds || + tlb->cleared_p4ds)) + return 1; + + return 0; +} + static inline void tlb_flush(struct mmu_gather *tlb) { struct vm_area_struct vma = TLB_FLUSH_VMA(tlb->mm, 0); bool last_level = !tlb->freed_tables; unsigned long stride = tlb_get_unmap_size(tlb); + int tlb_level = tlb_get_level(tlb); /* * If we're tearing down the address space then we only care about @@ -38,7 +64,8 @@ static inline void tlb_flush(struct mmu_gather *tlb) return; } - __flush_tlb_range(&vma, tlb->start, tlb->end, stride, last_level); + __flush_tlb_range(&vma, tlb->start, tlb->end, stride, + last_level, tlb_level); } static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index bc3949064725..cc3f5a33ff9c 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -10,6 +10,7 @@ #ifndef __ASSEMBLY__ +#include <linux/bitfield.h> #include <linux/mm_types.h> #include <linux/sched.h> #include <asm/cputype.h> @@ -27,14 +28,16 @@ * not. The macros handles invoking the asm with or without the * register argument as appropriate. */ -#define __TLBI_0(op, arg) asm ("tlbi " #op "\n" \ +#define __TLBI_0(op, arg) asm (ARM64_ASM_PREAMBLE \ + "tlbi " #op "\n" \ ALTERNATIVE("nop\n nop", \ "dsb ish\n tlbi " #op, \ ARM64_WORKAROUND_REPEAT_TLBI, \ CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \ : : ) -#define __TLBI_1(op, arg) asm ("tlbi " #op ", %0\n" \ +#define __TLBI_1(op, arg) asm (ARM64_ASM_PREAMBLE \ + "tlbi " #op ", %0\n" \ ALTERNATIVE("nop\n nop", \ "dsb ish\n tlbi " #op ", %0", \ ARM64_WORKAROUND_REPEAT_TLBI, \ @@ -60,6 +63,102 @@ }) /* + * Get translation granule of the system, which is decided by + * PAGE_SIZE. Used by TTL. + * - 4KB : 1 + * - 16KB : 2 + * - 64KB : 3 + */ +#define TLBI_TTL_TG_4K 1 +#define TLBI_TTL_TG_16K 2 +#define TLBI_TTL_TG_64K 3 + +static inline unsigned long get_trans_granule(void) +{ + switch (PAGE_SIZE) { + case SZ_4K: + return TLBI_TTL_TG_4K; + case SZ_16K: + return TLBI_TTL_TG_16K; + case SZ_64K: + return TLBI_TTL_TG_64K; + default: + return 0; + } +} + +/* + * Level-based TLBI operations. + * + * When ARMv8.4-TTL exists, TLBI operations take an additional hint for + * the level at which the invalidation must take place. If the level is + * wrong, no invalidation may take place. In the case where the level + * cannot be easily determined, a 0 value for the level parameter will + * perform a non-hinted invalidation. + * + * For Stage-2 invalidation, use the level values provided to that effect + * in asm/stage2_pgtable.h. + */ +#define TLBI_TTL_MASK GENMASK_ULL(47, 44) + +#define __tlbi_level(op, addr, level) do { \ + u64 arg = addr; \ + \ + if (cpus_have_const_cap(ARM64_HAS_ARMv8_4_TTL) && \ + level) { \ + u64 ttl = level & 3; \ + ttl |= get_trans_granule() << 2; \ + arg &= ~TLBI_TTL_MASK; \ + arg |= FIELD_PREP(TLBI_TTL_MASK, ttl); \ + } \ + \ + __tlbi(op, arg); \ +} while(0) + +#define __tlbi_user_level(op, arg, level) do { \ + if (arm64_kernel_unmapped_at_el0()) \ + __tlbi_level(op, (arg | USER_ASID_FLAG), level); \ +} while (0) + +/* + * This macro creates a properly formatted VA operand for the TLB RANGE. + * The value bit assignments are: + * + * +----------+------+-------+-------+-------+----------------------+ + * | ASID | TG | SCALE | NUM | TTL | BADDR | + * +-----------------+-------+-------+-------+----------------------+ + * |63 48|47 46|45 44|43 39|38 37|36 0| + * + * The address range is determined by below formula: + * [BADDR, BADDR + (NUM + 1) * 2^(5*SCALE + 1) * PAGESIZE) + * + */ +#define __TLBI_VADDR_RANGE(addr, asid, scale, num, ttl) \ + ({ \ + unsigned long __ta = (addr) >> PAGE_SHIFT; \ + __ta &= GENMASK_ULL(36, 0); \ + __ta |= (unsigned long)(ttl) << 37; \ + __ta |= (unsigned long)(num) << 39; \ + __ta |= (unsigned long)(scale) << 44; \ + __ta |= get_trans_granule() << 46; \ + __ta |= (unsigned long)(asid) << 48; \ + __ta; \ + }) + +/* These macros are used by the TLBI RANGE feature. */ +#define __TLBI_RANGE_PAGES(num, scale) \ + ((unsigned long)((num) + 1) << (5 * (scale) + 1)) +#define MAX_TLBI_RANGE_PAGES __TLBI_RANGE_PAGES(31, 3) + +/* + * Generate 'num' values from -1 to 30 with -1 rejected by the + * __flush_tlb_range() loop below. + */ +#define TLBI_RANGE_MASK GENMASK_ULL(4, 0) +#define __TLBI_RANGE_NUM(pages, scale) \ + ((((pages) >> (5 * (scale) + 1)) & TLBI_RANGE_MASK) - 1) + +/* * TLB Invalidation * ================ * @@ -179,34 +278,83 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, static inline void __flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, - unsigned long stride, bool last_level) + unsigned long stride, bool last_level, + int tlb_level) { + int num = 0; + int scale = 0; unsigned long asid = ASID(vma->vm_mm); unsigned long addr; + unsigned long pages; start = round_down(start, stride); end = round_up(end, stride); + pages = (end - start) >> PAGE_SHIFT; - if ((end - start) >= (MAX_TLBI_OPS * stride)) { + /* + * When not uses TLB range ops, we can handle up to + * (MAX_TLBI_OPS - 1) pages; + * When uses TLB range ops, we can handle up to + * (MAX_TLBI_RANGE_PAGES - 1) pages. + */ + if ((!system_supports_tlb_range() && + (end - start) >= (MAX_TLBI_OPS * stride)) || + pages >= MAX_TLBI_RANGE_PAGES) { flush_tlb_mm(vma->vm_mm); return; } - /* Convert the stride into units of 4k */ - stride >>= 12; + dsb(ishst); - start = __TLBI_VADDR(start, asid); - end = __TLBI_VADDR(end, asid); + /* + * When the CPU does not support TLB range operations, flush the TLB + * entries one by one at the granularity of 'stride'. If the the TLB + * range ops are supported, then: + * + * 1. If 'pages' is odd, flush the first page through non-range + * operations; + * + * 2. For remaining pages: the minimum range granularity is decided + * by 'scale', so multiple range TLBI operations may be required. + * Start from scale = 0, flush the corresponding number of pages + * ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it + * until no pages left. + * + * Note that certain ranges can be represented by either num = 31 and + * scale or num = 0 and scale + 1. The loop below favours the latter + * since num is limited to 30 by the __TLBI_RANGE_NUM() macro. + */ + while (pages > 0) { + if (!system_supports_tlb_range() || + pages % 2 == 1) { + addr = __TLBI_VADDR(start, asid); + if (last_level) { + __tlbi_level(vale1is, addr, tlb_level); + __tlbi_user_level(vale1is, addr, tlb_level); + } else { + __tlbi_level(vae1is, addr, tlb_level); + __tlbi_user_level(vae1is, addr, tlb_level); + } + start += stride; + pages -= stride >> PAGE_SHIFT; + continue; + } - dsb(ishst); - for (addr = start; addr < end; addr += stride) { - if (last_level) { - __tlbi(vale1is, addr); - __tlbi_user(vale1is, addr); - } else { - __tlbi(vae1is, addr); - __tlbi_user(vae1is, addr); + num = __TLBI_RANGE_NUM(pages, scale); + if (num >= 0) { + addr = __TLBI_VADDR_RANGE(start, asid, scale, + num, tlb_level); + if (last_level) { + __tlbi(rvale1is, addr); + __tlbi_user(rvale1is, addr); + } else { + __tlbi(rvae1is, addr); + __tlbi_user(rvae1is, addr); + } + start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; + pages -= __TLBI_RANGE_PAGES(num, scale); } + scale++; } dsb(ish); } @@ -217,8 +365,9 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, /* * We cannot use leaf-only invalidation here, since we may be invalidating * table entries as part of collapsing hugepages or moving page tables. + * Set the tlb_level to 0 because we can not get enough information here. */ - __flush_tlb_range(vma, start, end, PAGE_SIZE, false); + __flush_tlb_range(vma, start, end, PAGE_SIZE, false, 0); } static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h index 0cc835ddfcd1..e042f6527981 100644 --- a/arch/arm64/include/asm/topology.h +++ b/arch/arm64/include/asm/topology.h @@ -34,8 +34,9 @@ void topology_scale_freq_tick(void); /* Enable topology flag updates */ #define arch_update_cpu_topology topology_update_cpu_topology -/* Replace task scheduler's default thermal pressure retrieve API */ +/* Replace task scheduler's default thermal pressure API */ #define arch_scale_thermal_pressure topology_get_thermal_pressure +#define arch_set_thermal_pressure topology_set_thermal_pressure #include <asm-generic/topology.h> diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index bc5c7b091152..991dd5f031e4 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -19,6 +19,7 @@ #include <linux/string.h> #include <asm/cpufeature.h> +#include <asm/mmu.h> #include <asm/ptrace.h> #include <asm/memory.h> #include <asm/extable.h> @@ -49,7 +50,7 @@ static inline void set_fs(mm_segment_t fs) CONFIG_ARM64_UAO)); } -#define segment_eq(a, b) ((a) == (b)) +#define uaccess_kernel() (get_fs() == KERNEL_DS) /* * Test whether a block of memory is a valid user space address. diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 803039d504de..3b859596840d 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -38,7 +38,7 @@ #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 439 +#define __NR_compat_syscalls 440 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index c1c61635f89c..734860ac7cf9 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -308,8 +308,8 @@ __SYSCALL(__NR_writev, compat_sys_writev) __SYSCALL(__NR_getsid, sys_getsid) #define __NR_fdatasync 148 __SYSCALL(__NR_fdatasync, sys_fdatasync) -#define __NR__sysctl 149 -__SYSCALL(__NR__sysctl, compat_sys_sysctl) + /* 149 was sys_sysctl */ +__SYSCALL(149, sys_ni_syscall) #define __NR_mlock 150 __SYSCALL(__NR_mlock, sys_mlock) #define __NR_munlock 151 @@ -599,9 +599,9 @@ __SYSCALL(__NR_recvfrom, compat_sys_recvfrom) #define __NR_shutdown 293 __SYSCALL(__NR_shutdown, sys_shutdown) #define __NR_setsockopt 294 -__SYSCALL(__NR_setsockopt, compat_sys_setsockopt) +__SYSCALL(__NR_setsockopt, sys_setsockopt) #define __NR_getsockopt 295 -__SYSCALL(__NR_getsockopt, compat_sys_getsockopt) +__SYSCALL(__NR_getsockopt, sys_getsockopt) #define __NR_sendmsg 296 __SYSCALL(__NR_sendmsg, compat_sys_sendmsg) #define __NR_recvmsg 297 @@ -879,10 +879,14 @@ __SYSCALL(__NR_fspick, sys_fspick) __SYSCALL(__NR_pidfd_open, sys_pidfd_open) #define __NR_clone3 435 __SYSCALL(__NR_clone3, sys_clone3) +#define __NR_close_range 436 +__SYSCALL(__NR_close_range, sys_close_range) #define __NR_openat2 437 __SYSCALL(__NR_openat2, sys_openat2) #define __NR_pidfd_getfd 438 __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd) +#define __NR_faccessat2 439 +__SYSCALL(__NR_faccessat2, sys_faccessat2) /* * Please add new compat syscalls above this comment and update diff --git a/arch/arm64/include/asm/vdso.h b/arch/arm64/include/asm/vdso.h index 07468428fd29..f99dcb94b438 100644 --- a/arch/arm64/include/asm/vdso.h +++ b/arch/arm64/include/asm/vdso.h @@ -12,6 +12,8 @@ */ #define VDSO_LBASE 0x0 +#define __VVAR_PAGES 2 + #ifndef __ASSEMBLY__ #include <generated/vdso-offsets.h> diff --git a/arch/arm64/include/asm/vdso/clocksource.h b/arch/arm64/include/asm/vdso/clocksource.h index df6ea65c1dec..b054d9febfb5 100644 --- a/arch/arm64/include/asm/vdso/clocksource.h +++ b/arch/arm64/include/asm/vdso/clocksource.h @@ -2,7 +2,10 @@ #ifndef __ASM_VDSOCLOCKSOURCE_H #define __ASM_VDSOCLOCKSOURCE_H -#define VDSO_ARCH_CLOCKMODES \ - VDSO_CLOCKMODE_ARCHTIMER +#define VDSO_ARCH_CLOCKMODES \ + /* vdso clocksource for both 32 and 64bit tasks */ \ + VDSO_CLOCKMODE_ARCHTIMER, \ + /* vdso clocksource for 64bit tasks only */ \ + VDSO_CLOCKMODE_ARCHTIMER_NOCOMPAT #endif diff --git a/arch/arm64/include/asm/vdso/compat_gettimeofday.h b/arch/arm64/include/asm/vdso/compat_gettimeofday.h index b6907ae78e53..7508b0ac1d21 100644 --- a/arch/arm64/include/asm/vdso/compat_gettimeofday.h +++ b/arch/arm64/include/asm/vdso/compat_gettimeofday.h @@ -7,6 +7,7 @@ #ifndef __ASSEMBLY__ +#include <asm/barrier.h> #include <asm/unistd.h> #include <asm/errno.h> @@ -102,7 +103,8 @@ int clock_getres32_fallback(clockid_t _clkid, struct old_timespec32 *_ts) return ret; } -static __always_inline u64 __arch_get_hw_counter(s32 clock_mode) +static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, + const struct vdso_data *vd) { u64 res; @@ -111,7 +113,7 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode) * update. Return something. Core will do another round and then * see the mode change and fallback to the syscall. */ - if (clock_mode == VDSO_CLOCKMODE_NONE) + if (clock_mode != VDSO_CLOCKMODE_ARCHTIMER) return 0; /* @@ -152,6 +154,24 @@ static __always_inline const struct vdso_data *__arch_get_vdso_data(void) return ret; } +#ifdef CONFIG_TIME_NS +static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(void) +{ + const struct vdso_data *ret; + + /* See __arch_get_vdso_data(). */ + asm volatile("mov %0, %1" : "=r"(ret) : "r"(_timens_data)); + + return ret; +} +#endif + +static inline bool vdso_clocksource_ok(const struct vdso_data *vd) +{ + return vd->clock_mode == VDSO_CLOCKMODE_ARCHTIMER; +} +#define vdso_clocksource_ok vdso_clocksource_ok + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h index afba6ba332f8..631ab1281633 100644 --- a/arch/arm64/include/asm/vdso/gettimeofday.h +++ b/arch/arm64/include/asm/vdso/gettimeofday.h @@ -7,6 +7,7 @@ #ifndef __ASSEMBLY__ +#include <asm/barrier.h> #include <asm/unistd.h> #define VDSO_HAS_CLOCK_GETRES 1 @@ -63,7 +64,8 @@ int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) return ret; } -static __always_inline u64 __arch_get_hw_counter(s32 clock_mode) +static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, + const struct vdso_data *vd) { u64 res; @@ -96,6 +98,14 @@ const struct vdso_data *__arch_get_vdso_data(void) return _vdso_data; } +#ifdef CONFIG_TIME_NS +static __always_inline +const struct vdso_data *__arch_get_timens_vdso_data(void) +{ + return _timens_data; +} +#endif + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 61fd26752adc..09977acc007d 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -85,10 +85,17 @@ static inline bool is_kernel_in_hyp_mode(void) static __always_inline bool has_vhe(void) { - if (cpus_have_const_cap(ARM64_HAS_VIRT_HOST_EXTN)) + /* + * The following macros are defined for code specic to VHE/nVHE. + * If has_vhe() is inlined into those compilation units, it can + * be determined statically. Otherwise fall back to caps. + */ + if (__is_defined(__KVM_VHE_HYPERVISOR__)) return true; - - return false; + else if (__is_defined(__KVM_NVHE_HYPERVISOR__)) + return false; + else + return cpus_have_final_cap(ARM64_HAS_VIRT_HOST_EXTN); } #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/vmap_stack.h b/arch/arm64/include/asm/vmap_stack.h index 0a12115d9638..894e031b28d2 100644 --- a/arch/arm64/include/asm/vmap_stack.h +++ b/arch/arm64/include/asm/vmap_stack.h @@ -7,8 +7,8 @@ #include <linux/gfp.h> #include <linux/kconfig.h> #include <linux/vmalloc.h> +#include <linux/pgtable.h> #include <asm/memory.h> -#include <asm/pgtable.h> #include <asm/thread_info.h> /* @@ -19,10 +19,8 @@ static inline unsigned long *arch_alloc_vmap_stack(size_t stack_size, int node) { BUILD_BUG_ON(!IS_ENABLED(CONFIG_VMAP_STACK)); - return __vmalloc_node_range(stack_size, THREAD_ALIGN, - VMALLOC_START, VMALLOC_END, - THREADINFO_GFP, PAGE_KERNEL, 0, node, - __builtin_return_address(0)); + return __vmalloc_node(stack_size, THREAD_ALIGN, THREADINFO_GFP, node, + __builtin_return_address(0)); } #endif /* __ASM_VMAP_STACK_H */ |