diff options
author | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2022-09-05 08:03:23 +0200 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2022-09-05 08:03:23 +0200 |
commit | a956f91247da3fe71e4d5f7da1f6dd8bae7c3c21 (patch) | |
tree | dcbfa79369938f1349af5b39a68dee698861a815 /arch | |
parent | USB: hcd-pci: Drop the unused id parameter from usb_hcd_pci_probe() (diff) | |
parent | Linux 6.0-rc4 (diff) | |
download | linux-a956f91247da3fe71e4d5f7da1f6dd8bae7c3c21.tar.xz linux-a956f91247da3fe71e4d5f7da1f6dd8bae7c3c21.zip |
Merge 6.0-rc4 into usb-next
We need the USB fixes in here and this resolves the merge issue in:
drivers/usb/dwc3/gadget.c
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'arch')
139 files changed, 1112 insertions, 742 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index f330410da63a..5dbf11a5ba4e 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -53,7 +53,6 @@ config KPROBES config JUMP_LABEL bool "Optimize very unlikely/likely branches" depends on HAVE_ARCH_JUMP_LABEL - depends on CC_HAS_ASM_GOTO select OBJTOOL if HAVE_JUMP_LABEL_HACK help This option enables a transparent branch optimization that @@ -1361,7 +1360,7 @@ config HAVE_PREEMPT_DYNAMIC_CALL config HAVE_PREEMPT_DYNAMIC_KEY bool - depends on HAVE_ARCH_JUMP_LABEL && CC_HAS_ASM_GOTO + depends on HAVE_ARCH_JUMP_LABEL select HAVE_PREEMPT_DYNAMIC help An architecture should select this if it can handle the preemption diff --git a/arch/alpha/include/asm/bitops.h b/arch/alpha/include/asm/bitops.h index 492c7713ddae..bafb1c1f0fdc 100644 --- a/arch/alpha/include/asm/bitops.h +++ b/arch/alpha/include/asm/bitops.h @@ -283,11 +283,8 @@ arch___test_and_change_bit(unsigned long nr, volatile unsigned long *addr) return (old & mask) != 0; } -static __always_inline bool -arch_test_bit(unsigned long nr, const volatile unsigned long *addr) -{ - return (1UL & (((const int *) addr)[nr >> 5] >> (nr & 31))) != 0UL; -} +#define arch_test_bit generic_test_bit +#define arch_test_bit_acquire generic_test_bit_acquire /* * ffz = Find First Zero in word. Undefined if no zero exists, diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 571cc234d0b3..9fb9fff08c94 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -917,6 +917,23 @@ config ARM64_ERRATUM_1902691 If unsure, say Y. +config ARM64_ERRATUM_2457168 + bool "Cortex-A510: 2457168: workaround for AMEVCNTR01 incrementing incorrectly" + depends on ARM64_AMU_EXTN + default y + help + This option adds the workaround for ARM Cortex-A510 erratum 2457168. + + The AMU counter AMEVCNTR01 (constant counter) should increment at the same rate + as the system counter. On affected Cortex-A510 cores AMEVCNTR01 increments + incorrectly giving a significantly higher output value. + + Work around this problem by returning 0 when reading the affected counter in + key locations that results in disabling all users of this counter. This effect + is the same to firmware disabling affected counters. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index ca9b487112cc..34256bda0da9 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -71,7 +71,7 @@ static __always_inline int icache_is_vpipt(void) static inline u32 cache_type_cwg(void) { - return (read_cpuid_cachetype() >> CTR_EL0_CWG_SHIFT) & CTR_EL0_CWG_MASK; + return SYS_FIELD_GET(CTR_EL0, CWG, read_cpuid_cachetype()); } #define __read_mostly __section(".data..read_mostly") diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 9bb1873f5295..6f86b7ab6c28 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -153,7 +153,7 @@ struct vl_info { #ifdef CONFIG_ARM64_SVE -extern void sve_alloc(struct task_struct *task); +extern void sve_alloc(struct task_struct *task, bool flush); extern void fpsimd_release_task(struct task_struct *task); extern void fpsimd_sync_to_sve(struct task_struct *task); extern void fpsimd_force_sync_to_sve(struct task_struct *task); @@ -256,7 +256,7 @@ size_t sve_state_size(struct task_struct const *task); #else /* ! CONFIG_ARM64_SVE */ -static inline void sve_alloc(struct task_struct *task) { } +static inline void sve_alloc(struct task_struct *task, bool flush) { } static inline void fpsimd_release_task(struct task_struct *task) { } static inline void sve_sync_to_fpsimd(struct task_struct *task) { } static inline void sve_sync_from_fpsimd_zeropad(struct task_struct *task) { } diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index 02e59fa8f293..32d14f481f0c 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -64,28 +64,28 @@ #define EARLY_KASLR (0) #endif -#define EARLY_ENTRIES(vstart, vend, shift) \ - ((((vend) - 1) >> (shift)) - ((vstart) >> (shift)) + 1 + EARLY_KASLR) +#define EARLY_ENTRIES(vstart, vend, shift, add) \ + ((((vend) - 1) >> (shift)) - ((vstart) >> (shift)) + 1 + add) -#define EARLY_PGDS(vstart, vend) (EARLY_ENTRIES(vstart, vend, PGDIR_SHIFT)) +#define EARLY_PGDS(vstart, vend, add) (EARLY_ENTRIES(vstart, vend, PGDIR_SHIFT, add)) #if SWAPPER_PGTABLE_LEVELS > 3 -#define EARLY_PUDS(vstart, vend) (EARLY_ENTRIES(vstart, vend, PUD_SHIFT)) +#define EARLY_PUDS(vstart, vend, add) (EARLY_ENTRIES(vstart, vend, PUD_SHIFT, add)) #else -#define EARLY_PUDS(vstart, vend) (0) +#define EARLY_PUDS(vstart, vend, add) (0) #endif #if SWAPPER_PGTABLE_LEVELS > 2 -#define EARLY_PMDS(vstart, vend) (EARLY_ENTRIES(vstart, vend, SWAPPER_TABLE_SHIFT)) +#define EARLY_PMDS(vstart, vend, add) (EARLY_ENTRIES(vstart, vend, SWAPPER_TABLE_SHIFT, add)) #else -#define EARLY_PMDS(vstart, vend) (0) +#define EARLY_PMDS(vstart, vend, add) (0) #endif -#define EARLY_PAGES(vstart, vend) ( 1 /* PGDIR page */ \ - + EARLY_PGDS((vstart), (vend)) /* each PGDIR needs a next level page table */ \ - + EARLY_PUDS((vstart), (vend)) /* each PUD needs a next level page table */ \ - + EARLY_PMDS((vstart), (vend))) /* each PMD needs a next level page table */ -#define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR, _end)) +#define EARLY_PAGES(vstart, vend, add) ( 1 /* PGDIR page */ \ + + EARLY_PGDS((vstart), (vend), add) /* each PGDIR needs a next level page table */ \ + + EARLY_PUDS((vstart), (vend), add) /* each PUD needs a next level page table */ \ + + EARLY_PMDS((vstart), (vend), add)) /* each PMD needs a next level page table */ +#define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR, _end, EARLY_KASLR)) /* the initial ID map may need two extra pages if it needs to be extended */ #if VA_BITS < 48 @@ -93,7 +93,7 @@ #else #define INIT_IDMAP_DIR_SIZE (INIT_IDMAP_DIR_PAGES * PAGE_SIZE) #endif -#define INIT_IDMAP_DIR_PAGES EARLY_PAGES(KIMAGE_VADDR, _end + MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE) +#define INIT_IDMAP_DIR_PAGES EARLY_PAGES(KIMAGE_VADDR, _end + MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE, 1) /* Initial memory map size */ #if ARM64_KERNEL_USES_PMD_MAPS diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index f38ef299f13b..e9c9388ccc02 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -929,6 +929,10 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu); (system_supports_mte() && \ test_bit(KVM_ARCH_FLAG_MTE_ENABLED, &(kvm)->arch.flags)) +#define kvm_supports_32bit_el0() \ + (system_supports_32bit_el0() && \ + !static_branch_unlikely(&arm64_mismatched_32bit_el0)) + int kvm_trng_call(struct kvm_vcpu *vcpu); #ifdef CONFIG_KVM extern phys_addr_t hyp_mem_base; diff --git a/arch/arm64/include/asm/setup.h b/arch/arm64/include/asm/setup.h index 6437df661700..f4af547ef54c 100644 --- a/arch/arm64/include/asm/setup.h +++ b/arch/arm64/include/asm/setup.h @@ -3,6 +3,8 @@ #ifndef __ARM64_ASM_SETUP_H #define __ARM64_ASM_SETUP_H +#include <linux/string.h> + #include <uapi/asm/setup.h> void *get_early_fdt_ptr(void); @@ -14,4 +16,19 @@ void early_fdt_map(u64 dt_phys); extern phys_addr_t __fdt_pointer __initdata; extern u64 __cacheline_aligned boot_args[4]; +static inline bool arch_parse_debug_rodata(char *arg) +{ + extern bool rodata_enabled; + extern bool rodata_full; + + if (arg && !strcmp(arg, "full")) { + rodata_enabled = true; + rodata_full = true; + return true; + } + + return false; +} +#define arch_parse_debug_rodata arch_parse_debug_rodata + #endif diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 7c71358d44c4..818df938a7ad 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1116,6 +1116,7 @@ #else +#include <linux/bitfield.h> #include <linux/build_bug.h> #include <linux/types.h> #include <asm/alternative.h> @@ -1209,8 +1210,6 @@ par; \ }) -#endif - #define SYS_FIELD_GET(reg, field, val) \ FIELD_GET(reg##_##field##_MASK, val) @@ -1220,4 +1219,6 @@ #define SYS_FIELD_PREP_ENUM(reg, field, val) \ FIELD_PREP(reg##_##field##_MASK, reg##_##field##_##val) +#endif + #endif /* __ASM_SYSREG_H */ diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 3bb134355874..316917b98707 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -75,9 +75,11 @@ struct kvm_regs { /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */ #define KVM_ARM_DEVICE_TYPE_SHIFT 0 -#define KVM_ARM_DEVICE_TYPE_MASK (0xffff << KVM_ARM_DEVICE_TYPE_SHIFT) +#define KVM_ARM_DEVICE_TYPE_MASK GENMASK(KVM_ARM_DEVICE_TYPE_SHIFT + 15, \ + KVM_ARM_DEVICE_TYPE_SHIFT) #define KVM_ARM_DEVICE_ID_SHIFT 16 -#define KVM_ARM_DEVICE_ID_MASK (0xffff << KVM_ARM_DEVICE_ID_SHIFT) +#define KVM_ARM_DEVICE_ID_MASK GENMASK(KVM_ARM_DEVICE_ID_SHIFT + 15, \ + KVM_ARM_DEVICE_ID_SHIFT) /* Supported device IDs */ #define KVM_ARM_DEVICE_VGIC_V2 0 diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c index 587543c6c51c..97c42be71338 100644 --- a/arch/arm64/kernel/cacheinfo.c +++ b/arch/arm64/kernel/cacheinfo.c @@ -45,7 +45,8 @@ static void ci_leaf_init(struct cacheinfo *this_leaf, int init_cache_level(unsigned int cpu) { - unsigned int ctype, level, leaves, fw_level; + unsigned int ctype, level, leaves; + int fw_level; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); for (level = 1, leaves = 0; level <= MAX_CACHE_LEVEL; level++) { @@ -63,6 +64,9 @@ int init_cache_level(unsigned int cpu) else fw_level = acpi_find_last_cache_level(cpu); + if (fw_level < 0) + return fw_level; + if (level < fw_level) { /* * some external caches not specified in CLIDR_EL1 diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 7e6289e709fc..53b973b6059f 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -208,6 +208,8 @@ static const struct arm64_cpu_capabilities arm64_repeat_tlbi_list[] = { #ifdef CONFIG_ARM64_ERRATUM_1286807 { ERRATA_MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 3, 0), + }, + { /* Kryo4xx Gold (rcpe to rfpe) => (r0p0 to r3p0) */ ERRATA_MIDR_RANGE(MIDR_QCOM_KRYO_4XX_GOLD, 0xc, 0xe, 0xf, 0xe), }, @@ -654,6 +656,16 @@ const struct arm64_cpu_capabilities arm64_errata[] = { ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 2) }, #endif +#ifdef CONFIG_ARM64_ERRATUM_2457168 + { + .desc = "ARM erratum 2457168", + .capability = ARM64_WORKAROUND_2457168, + .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, + + /* Cortex-A510 r0p0-r1p1 */ + CAP_MIDR_RANGE(MIDR_CORTEX_A510, 0, 0, 1, 1) + }, +#endif #ifdef CONFIG_ARM64_ERRATUM_2038923 { .desc = "ARM erratum 2038923", diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 907401e4fffb..af4de817d712 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1870,7 +1870,10 @@ static void cpu_amu_enable(struct arm64_cpu_capabilities const *cap) pr_info("detected CPU%d: Activity Monitors Unit (AMU)\n", smp_processor_id()); cpumask_set_cpu(smp_processor_id(), &amu_cpus); - update_freq_counters_refs(); + + /* 0 reference values signal broken/disabled counters */ + if (!this_cpu_has_cap(ARM64_WORKAROUND_2457168)) + update_freq_counters_refs(); } } diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 254fe31c03a0..2d73b3e793b2 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -502,7 +502,7 @@ tsk .req x28 // current thread_info SYM_CODE_START(vectors) kernel_ventry 1, t, 64, sync // Synchronous EL1t kernel_ventry 1, t, 64, irq // IRQ EL1t - kernel_ventry 1, t, 64, fiq // FIQ EL1h + kernel_ventry 1, t, 64, fiq // FIQ EL1t kernel_ventry 1, t, 64, error // Error EL1t kernel_ventry 1, h, 64, sync // Synchronous EL1h diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index dd63ffc3a2fa..23834d96d1e7 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -715,10 +715,12 @@ size_t sve_state_size(struct task_struct const *task) * do_sve_acc() case, there is no ABI requirement to hide stale data * written previously be task. */ -void sve_alloc(struct task_struct *task) +void sve_alloc(struct task_struct *task, bool flush) { if (task->thread.sve_state) { - memset(task->thread.sve_state, 0, sve_state_size(task)); + if (flush) + memset(task->thread.sve_state, 0, + sve_state_size(task)); return; } @@ -1388,7 +1390,7 @@ void do_sve_acc(unsigned long esr, struct pt_regs *regs) return; } - sve_alloc(current); + sve_alloc(current, true); if (!current->thread.sve_state) { force_sig(SIGKILL); return; @@ -1439,7 +1441,7 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs) return; } - sve_alloc(current); + sve_alloc(current, false); sme_alloc(current); if (!current->thread.sve_state || !current->thread.za_state) { force_sig(SIGKILL); @@ -1460,17 +1462,6 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs) fpsimd_bind_task_to_cpu(); } - /* - * If SVE was not already active initialise the SVE registers, - * any non-shared state between the streaming and regular SVE - * registers is architecturally guaranteed to be zeroed when - * we enter streaming mode. We do not need to initialize ZA - * since ZA must be disabled at this point and enabling ZA is - * architecturally defined to zero ZA. - */ - if (system_supports_sve() && !test_thread_flag(TIF_SVE)) - sve_init_regs(); - put_cpu_fpsimd_context(); } diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index cefe6a73ee54..814b6587ccb7 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -371,7 +371,9 @@ SYM_FUNC_END(create_idmap) SYM_FUNC_START_LOCAL(create_kernel_mapping) adrp x0, init_pg_dir mov_q x5, KIMAGE_VADDR // compile time __va(_text) +#ifdef CONFIG_RELOCATABLE add x5, x5, x23 // add KASLR displacement +#endif adrp x6, _end // runtime __pa(_end) adrp x3, _text // runtime __pa(_text) sub x6, x6, x3 // _end - _text diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index 889951291cc0..a11a6e14ba89 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -47,7 +47,7 @@ static int prepare_elf_headers(void **addr, unsigned long *sz) u64 i; phys_addr_t start, end; - nr_ranges = 1; /* for exclusion of crashkernel region */ + nr_ranges = 2; /* for exclusion of crashkernel region */ for_each_mem_range(i, &start, &end) nr_ranges++; diff --git a/arch/arm64/kernel/pi/kaslr_early.c b/arch/arm64/kernel/pi/kaslr_early.c index 6c3855e69395..17bff6e399e4 100644 --- a/arch/arm64/kernel/pi/kaslr_early.c +++ b/arch/arm64/kernel/pi/kaslr_early.c @@ -94,11 +94,9 @@ asmlinkage u64 kaslr_early_init(void *fdt) seed = get_kaslr_seed(fdt); if (!seed) { -#ifdef CONFIG_ARCH_RANDOM - if (!__early_cpu_has_rndr() || - !__arm64_rndr((unsigned long *)&seed)) -#endif - return 0; + if (!__early_cpu_has_rndr() || + !__arm64_rndr((unsigned long *)&seed)) + return 0; } /* diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 21da83187a60..eb7c08dfb834 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -882,7 +882,7 @@ static int sve_set_common(struct task_struct *target, * state and ensure there's storage. */ if (target->thread.svcr != old_svcr) - sve_alloc(target); + sve_alloc(target, true); } /* Registers: FPSIMD-only case */ @@ -912,7 +912,7 @@ static int sve_set_common(struct task_struct *target, goto out; } - sve_alloc(target); + sve_alloc(target, true); if (!target->thread.sve_state) { ret = -ENOMEM; clear_tsk_thread_flag(target, TIF_SVE); @@ -1082,7 +1082,7 @@ static int za_set(struct task_struct *target, /* Ensure there is some SVE storage for streaming mode */ if (!target->thread.sve_state) { - sve_alloc(target); + sve_alloc(target, false); if (!target->thread.sve_state) { clear_thread_flag(TIF_SME); ret = -ENOMEM; diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 3e6d0352d7d3..9ad911f1647c 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -91,7 +91,7 @@ static size_t sigframe_size(struct rt_sigframe_user_layout const *user) * not taken into account. This limit is not a guarantee and is * NOT ABI. */ -#define SIGFRAME_MAXSZ SZ_64K +#define SIGFRAME_MAXSZ SZ_256K static int __sigframe_alloc(struct rt_sigframe_user_layout *user, unsigned long *offset, size_t size, bool extend) @@ -310,7 +310,7 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user) fpsimd_flush_task_state(current); /* From now, fpsimd_thread_switch() won't touch thread.sve_state */ - sve_alloc(current); + sve_alloc(current, true); if (!current->thread.sve_state) { clear_thread_flag(TIF_SVE); return -ENOMEM; @@ -926,6 +926,16 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka, /* Signal handlers are invoked with ZA and streaming mode disabled */ if (system_supports_sme()) { + /* + * If we were in streaming mode the saved register + * state was SVE but we will exit SM and use the + * FPSIMD register state - flush the saved FPSIMD + * register state in case it gets loaded. + */ + if (current->thread.svcr & SVCR_SM_MASK) + memset(¤t->thread.uw.fpsimd_state, 0, + sizeof(current->thread.uw.fpsimd_state)); + current->thread.svcr &= ~(SVCR_ZA_MASK | SVCR_SM_MASK); sme_smstop(); diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 869ffc4d4484..ad2bfc794257 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -296,12 +296,25 @@ core_initcall(init_amu_fie); static void cpu_read_corecnt(void *val) { + /* + * A value of 0 can be returned if the current CPU does not support AMUs + * or if the counter is disabled for this CPU. A return value of 0 at + * counter read is properly handled as an error case by the users of the + * counter. + */ *(u64 *)val = read_corecnt(); } static void cpu_read_constcnt(void *val) { - *(u64 *)val = read_constcnt(); + /* + * Return 0 if the current CPU is affected by erratum 2457168. A value + * of 0 is also returned if the current CPU does not support AMUs or if + * the counter is disabled. A return value of 0 at counter read is + * properly handled as an error case by the users of the counter. + */ + *(u64 *)val = this_cpu_has_cap(ARM64_WORKAROUND_2457168) ? + 0UL : read_constcnt(); } static inline @@ -328,7 +341,22 @@ int counters_read_on_cpu(int cpu, smp_call_func_t func, u64 *val) */ bool cpc_ffh_supported(void) { - return freq_counters_valid(get_cpu_with_amu_feat()); + int cpu = get_cpu_with_amu_feat(); + + /* + * FFH is considered supported if there is at least one present CPU that + * supports AMUs. Using FFH to read core and reference counters for CPUs + * that do not support AMUs, have counters disabled or that are affected + * by errata, will result in a return value of 0. + * + * This is done to allow any enabled and valid counters to be read + * through FFH, knowing that potentially returning 0 as counter value is + * properly handled by the users of these counters. + */ + if ((cpu >= nr_cpu_ids) || !cpumask_test_cpu(cpu, cpu_present_mask)) + return false; + + return true; } int cpc_read_ffh(int cpu, struct cpc_reg *reg, u64 *val) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 986cee6fbc7f..2ff0ef62abad 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -757,8 +757,7 @@ static bool vcpu_mode_is_bad_32bit(struct kvm_vcpu *vcpu) if (likely(!vcpu_mode_is_32bit(vcpu))) return false; - return !system_supports_32bit_el0() || - static_branch_unlikely(&arm64_mismatched_32bit_el0); + return !kvm_supports_32bit_el0(); } /** diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 8c607199cad1..f802a3b3f8db 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -242,7 +242,7 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) u64 mode = (*(u64 *)valp) & PSR_AA32_MODE_MASK; switch (mode) { case PSR_AA32_MODE_USR: - if (!system_supports_32bit_el0()) + if (!kvm_supports_32bit_el0()) return -EINVAL; break; case PSR_AA32_MODE_FIQ: diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 87f1cd0df36e..c9a13e487187 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -993,7 +993,7 @@ transparent_hugepage_adjust(struct kvm *kvm, struct kvm_memory_slot *memslot, * THP doesn't start to split while we are adjusting the * refcounts. * - * We are sure this doesn't happen, because mmu_notifier_retry + * We are sure this doesn't happen, because mmu_invalidate_retry * was successful and we are holding the mmu_lock, so if this * THP is trying to split, it will be blocked in the mmu * notifier before touching any of the pages, specifically @@ -1188,9 +1188,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return ret; } - mmu_seq = vcpu->kvm->mmu_notifier_seq; + mmu_seq = vcpu->kvm->mmu_invalidate_seq; /* - * Ensure the read of mmu_notifier_seq happens before we call + * Ensure the read of mmu_invalidate_seq happens before we call * gfn_to_pfn_prot (which calls get_user_pages), so that we don't risk * the page we just got a reference to gets unmapped before we have a * chance to grab the mmu_lock, which ensure that if the page gets @@ -1246,7 +1246,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, else write_lock(&kvm->mmu_lock); pgt = vcpu->arch.hw_mmu->pgt; - if (mmu_notifier_retry(kvm, mmu_seq)) + if (mmu_invalidate_retry(kvm, mmu_seq)) goto out_unlock; /* diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index c059b259aea6..3234f50b8c4b 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -652,7 +652,7 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) */ val = ((pmcr & ~ARMV8_PMU_PMCR_MASK) | (ARMV8_PMU_PMCR_MASK & 0xdecafbad)) & (~ARMV8_PMU_PMCR_E); - if (!system_supports_32bit_el0()) + if (!kvm_supports_32bit_el0()) val |= ARMV8_PMU_PMCR_LC; __vcpu_sys_reg(vcpu, r->reg) = val; } @@ -701,7 +701,7 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, val = __vcpu_sys_reg(vcpu, PMCR_EL0); val &= ~ARMV8_PMU_PMCR_MASK; val |= p->regval & ARMV8_PMU_PMCR_MASK; - if (!system_supports_32bit_el0()) + if (!kvm_supports_32bit_el0()) val |= ARMV8_PMU_PMCR_LC; __vcpu_sys_reg(vcpu, PMCR_EL0) = val; kvm_pmu_handle_pmcr(vcpu, val); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index db7c4e6ae57b..e7ad44585f40 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -642,24 +642,6 @@ static void __init map_kernel_segment(pgd_t *pgdp, void *va_start, void *va_end, vm_area_add_early(vma); } -static int __init parse_rodata(char *arg) -{ - int ret = strtobool(arg, &rodata_enabled); - if (!ret) { - rodata_full = false; - return 0; - } - - /* permit 'full' in addition to boolean options */ - if (strcmp(arg, "full")) - return -EINVAL; - - rodata_enabled = true; - rodata_full = true; - return 0; -} -early_param("rodata", parse_rodata); - #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 static int __init map_entry_trampoline(void) { diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 779653771507..63b2484ce6c3 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -67,6 +67,7 @@ WORKAROUND_1902691 WORKAROUND_2038923 WORKAROUND_2064142 WORKAROUND_2077057 +WORKAROUND_2457168 WORKAROUND_TRBE_OVERWRITE_FILL_MODE WORKAROUND_TSB_FLUSH_FAILURE WORKAROUND_TRBE_WRITE_OUT_OF_RANGE diff --git a/arch/hexagon/include/asm/bitops.h b/arch/hexagon/include/asm/bitops.h index da500471ac73..160d8f37fa1a 100644 --- a/arch/hexagon/include/asm/bitops.h +++ b/arch/hexagon/include/asm/bitops.h @@ -179,6 +179,21 @@ arch_test_bit(unsigned long nr, const volatile unsigned long *addr) return retval; } +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + int retval; + + asm volatile( + "{P0 = tstbit(%1,%2); if (P0.new) %0 = #1; if (!P0.new) %0 = #0;}\n" + : "=&r" (retval) + : "r" (addr[BIT_WORD(nr)]), "r" (nr % BITS_PER_LONG) + : "p0", "memory" + ); + + return retval; +} + /* * ffz - find first zero in word. * @word: The word to search diff --git a/arch/ia64/include/asm/bitops.h b/arch/ia64/include/asm/bitops.h index 9f62af7fd7c4..1accb7842f58 100644 --- a/arch/ia64/include/asm/bitops.h +++ b/arch/ia64/include/asm/bitops.h @@ -331,11 +331,8 @@ arch___test_and_change_bit(unsigned long nr, volatile unsigned long *addr) return (old & bit) != 0; } -static __always_inline bool -arch_test_bit(unsigned long nr, const volatile unsigned long *addr) -{ - return 1 & (((const volatile __u32 *) addr)[nr >> 5] >> (nr & 31)); -} +#define arch_test_bit generic_test_bit +#define arch_test_bit_acquire generic_test_bit_acquire /** * ffz - find the first zero bit in a long word diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 4abc9a28aba4..c7dd6ad779af 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -39,6 +39,7 @@ config LOONGARCH select ARCH_INLINE_SPIN_UNLOCK_BH if !PREEMPTION select ARCH_INLINE_SPIN_UNLOCK_IRQ if !PREEMPTION select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE if !PREEMPTION + select ARCH_KEEP_MEMBLOCK select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO select ARCH_SPARSEMEM_ENABLE @@ -51,6 +52,7 @@ config LOONGARCH select ARCH_USE_CMPXCHG_LOCKREF select ARCH_USE_QUEUED_RWLOCKS select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT + select ARCH_WANT_LD_ORPHAN_WARN select ARCH_WANTS_NO_INSTR select BUILDTIME_TABLE_SORT select COMMON_CLK @@ -111,6 +113,7 @@ config LOONGARCH select PCI_ECAM if ACPI select PCI_LOONGSON select PCI_MSI_ARCH_FALLBACKS + select PCI_QUIRKS select PERF_USE_VMALLOC select RTC_LIB select SMP diff --git a/arch/loongarch/include/asm/acpi.h b/arch/loongarch/include/asm/acpi.h index c5108213876c..17162f494b9b 100644 --- a/arch/loongarch/include/asm/acpi.h +++ b/arch/loongarch/include/asm/acpi.h @@ -15,7 +15,7 @@ extern int acpi_pci_disabled; extern int acpi_noirq; #define acpi_os_ioremap acpi_os_ioremap -void __init __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size); +void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size); static inline void disable_acpi(void) { diff --git a/arch/loongarch/include/asm/addrspace.h b/arch/loongarch/include/asm/addrspace.h index b91e0733b2e5..d342935e5a72 100644 --- a/arch/loongarch/include/asm/addrspace.h +++ b/arch/loongarch/include/asm/addrspace.h @@ -109,4 +109,20 @@ extern unsigned long vm_map_base; */ #define PHYSADDR(a) ((_ACAST64_(a)) & TO_PHYS_MASK) +/* + * On LoongArch, I/O ports mappring is following: + * + * | .... | + * |-----------------------| + * | pci io ports(16K~32M) | + * |-----------------------| + * | isa io ports(0 ~16K) | + * PCI_IOBASE ->|-----------------------| + * | .... | + */ +#define PCI_IOBASE ((void __iomem *)(vm_map_base + (2 * PAGE_SIZE))) +#define PCI_IOSIZE SZ_32M +#define ISA_IOSIZE SZ_16K +#define IO_SPACE_LIMIT (PCI_IOSIZE - 1) + #endif /* _ASM_ADDRSPACE_H */ diff --git a/arch/loongarch/include/asm/cmpxchg.h b/arch/loongarch/include/asm/cmpxchg.h index 0a9b0fac1eee..ae19e33c7754 100644 --- a/arch/loongarch/include/asm/cmpxchg.h +++ b/arch/loongarch/include/asm/cmpxchg.h @@ -5,8 +5,9 @@ #ifndef __ASM_CMPXCHG_H #define __ASM_CMPXCHG_H -#include <asm/barrier.h> +#include <linux/bits.h> #include <linux/build_bug.h> +#include <asm/barrier.h> #define __xchg_asm(amswap_db, m, val) \ ({ \ @@ -21,10 +22,53 @@ __ret; \ }) +static inline unsigned int __xchg_small(volatile void *ptr, unsigned int val, + unsigned int size) +{ + unsigned int shift; + u32 old32, mask, temp; + volatile u32 *ptr32; + + /* Mask value to the correct size. */ + mask = GENMASK((size * BITS_PER_BYTE) - 1, 0); + val &= mask; + + /* + * Calculate a shift & mask that correspond to the value we wish to + * exchange within the naturally aligned 4 byte integerthat includes + * it. + */ + shift = (unsigned long)ptr & 0x3; + shift *= BITS_PER_BYTE; + mask <<= shift; + + /* + * Calculate a pointer to the naturally aligned 4 byte integer that + * includes our byte of interest, and load its value. + */ + ptr32 = (volatile u32 *)((unsigned long)ptr & ~0x3); + + asm volatile ( + "1: ll.w %0, %3 \n" + " andn %1, %0, %z4 \n" + " or %1, %1, %z5 \n" + " sc.w %1, %2 \n" + " beqz %1, 1b \n" + : "=&r" (old32), "=&r" (temp), "=ZC" (*ptr32) + : "ZC" (*ptr32), "Jr" (mask), "Jr" (val << shift) + : "memory"); + + return (old32 & mask) >> shift; +} + static inline unsigned long __xchg(volatile void *ptr, unsigned long x, int size) { switch (size) { + case 1: + case 2: + return __xchg_small(ptr, x, size); + case 4: return __xchg_asm("amswap_db.w", (volatile u32 *)ptr, (u32)x); @@ -67,10 +111,62 @@ static inline unsigned long __xchg(volatile void *ptr, unsigned long x, __ret; \ }) +static inline unsigned int __cmpxchg_small(volatile void *ptr, unsigned int old, + unsigned int new, unsigned int size) +{ + unsigned int shift; + u32 old32, mask, temp; + volatile u32 *ptr32; + + /* Mask inputs to the correct size. */ + mask = GENMASK((size * BITS_PER_BYTE) - 1, 0); + old &= mask; + new &= mask; + + /* + * Calculate a shift & mask that correspond to the value we wish to + * compare & exchange within the naturally aligned 4 byte integer + * that includes it. + */ + shift = (unsigned long)ptr & 0x3; + shift *= BITS_PER_BYTE; + old <<= shift; + new <<= shift; + mask <<= shift; + + /* + * Calculate a pointer to the naturally aligned 4 byte integer that + * includes our byte of interest, and load its value. + */ + ptr32 = (volatile u32 *)((unsigned long)ptr & ~0x3); + + asm volatile ( + "1: ll.w %0, %3 \n" + " and %1, %0, %z4 \n" + " bne %1, %z5, 2f \n" + " andn %1, %0, %z4 \n" + " or %1, %1, %z6 \n" + " sc.w %1, %2 \n" + " beqz %1, 1b \n" + " b 3f \n" + "2: \n" + __WEAK_LLSC_MB + "3: \n" + : "=&r" (old32), "=&r" (temp), "=ZC" (*ptr32) + : "ZC" (*ptr32), "Jr" (mask), "Jr" (old), "Jr" (new) + : "memory"); + + return (old32 & mask) >> shift; +} + static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, unsigned int size) { switch (size) { + case 1: + case 2: + return __cmpxchg_small(ptr, old, new, size); + case 4: return __cmpxchg_asm("ll.w", "sc.w", (volatile u32 *)ptr, (u32)old, new); diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/io.h index 884599739b36..999944ea1cea 100644 --- a/arch/loongarch/include/asm/io.h +++ b/arch/loongarch/include/asm/io.h @@ -7,35 +7,16 @@ #define ARCH_HAS_IOREMAP_WC -#include <linux/compiler.h> #include <linux/kernel.h> #include <linux/types.h> #include <asm/addrspace.h> -#include <asm/bug.h> -#include <asm/byteorder.h> #include <asm/cpu.h> #include <asm/page.h> #include <asm/pgtable-bits.h> #include <asm/string.h> /* - * On LoongArch, I/O ports mappring is following: - * - * | .... | - * |-----------------------| - * | pci io ports(64K~32M) | - * |-----------------------| - * | isa io ports(0 ~16K) | - * PCI_IOBASE ->|-----------------------| - * | .... | - */ -#define PCI_IOBASE ((void __iomem *)(vm_map_base + (2 * PAGE_SIZE))) -#define PCI_IOSIZE SZ_32M -#define ISA_IOSIZE SZ_16K -#define IO_SPACE_LIMIT (PCI_IOSIZE - 1) - -/* * Change "struct page" to physical address. */ #define page_to_phys(page) ((phys_addr_t)page_to_pfn(page) << PAGE_SHIFT) diff --git a/arch/loongarch/include/asm/irq.h b/arch/loongarch/include/asm/irq.h index 4b130199ceae..d06d4542b634 100644 --- a/arch/loongarch/include/asm/irq.h +++ b/arch/loongarch/include/asm/irq.h @@ -81,7 +81,6 @@ extern struct acpi_vector_group msi_group[MAX_IO_PICS]; #define GSI_MIN_PCH_IRQ LOONGSON_PCH_IRQ_BASE #define GSI_MAX_PCH_IRQ (LOONGSON_PCH_IRQ_BASE + 256 - 1) -extern int find_pch_pic(u32 gsi); struct acpi_madt_lio_pic; struct acpi_madt_eio_pic; struct acpi_madt_ht_pic; diff --git a/arch/loongarch/include/asm/page.h b/arch/loongarch/include/asm/page.h index a37324ac460b..53f284a96182 100644 --- a/arch/loongarch/include/asm/page.h +++ b/arch/loongarch/include/asm/page.h @@ -95,7 +95,7 @@ static inline int pfn_valid(unsigned long pfn) #endif -#define virt_to_pfn(kaddr) PFN_DOWN(virt_to_phys((void *)(kaddr))) +#define virt_to_pfn(kaddr) PFN_DOWN(PHYSADDR(kaddr)) #define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr)) extern int __virt_addr_valid(volatile void *kaddr); diff --git a/arch/loongarch/include/asm/percpu.h b/arch/loongarch/include/asm/percpu.h index e6569f18c6dd..0bd6b0110198 100644 --- a/arch/loongarch/include/asm/percpu.h +++ b/arch/loongarch/include/asm/percpu.h @@ -123,6 +123,10 @@ static inline unsigned long __percpu_xchg(void *ptr, unsigned long val, int size) { switch (size) { + case 1: + case 2: + return __xchg_small((volatile void *)ptr, val, size); + case 4: return __xchg_asm("amswap.w", (volatile u32 *)ptr, (u32)val); @@ -204,9 +208,13 @@ do { \ #define this_cpu_write_4(pcp, val) _percpu_write(pcp, val) #define this_cpu_write_8(pcp, val) _percpu_write(pcp, val) +#define this_cpu_xchg_1(pcp, val) _percpu_xchg(pcp, val) +#define this_cpu_xchg_2(pcp, val) _percpu_xchg(pcp, val) #define this_cpu_xchg_4(pcp, val) _percpu_xchg(pcp, val) #define this_cpu_xchg_8(pcp, val) _percpu_xchg(pcp, val) +#define this_cpu_cmpxchg_1(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) +#define this_cpu_cmpxchg_2(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) #define this_cpu_cmpxchg_4(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) #define this_cpu_cmpxchg_8(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index e03443abaf7d..8ea57e2f0e04 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -59,7 +59,6 @@ #include <linux/mm_types.h> #include <linux/mmzone.h> #include <asm/fixmap.h> -#include <asm/io.h> struct mm_struct; struct vm_area_struct; @@ -145,7 +144,7 @@ static inline void set_p4d(p4d_t *p4d, p4d_t p4dval) *p4d = p4dval; } -#define p4d_phys(p4d) virt_to_phys((void *)p4d_val(p4d)) +#define p4d_phys(p4d) PHYSADDR(p4d_val(p4d)) #define p4d_page(p4d) (pfn_to_page(p4d_phys(p4d) >> PAGE_SHIFT)) #endif @@ -188,7 +187,7 @@ static inline pmd_t *pud_pgtable(pud_t pud) #define set_pud(pudptr, pudval) do { *(pudptr) = (pudval); } while (0) -#define pud_phys(pud) virt_to_phys((void *)pud_val(pud)) +#define pud_phys(pud) PHYSADDR(pud_val(pud)) #define pud_page(pud) (pfn_to_page(pud_phys(pud) >> PAGE_SHIFT)) #endif @@ -221,7 +220,7 @@ static inline void pmd_clear(pmd_t *pmdp) #define set_pmd(pmdptr, pmdval) do { *(pmdptr) = (pmdval); } while (0) -#define pmd_phys(pmd) virt_to_phys((void *)pmd_val(pmd)) +#define pmd_phys(pmd) PHYSADDR(pmd_val(pmd)) #ifndef CONFIG_TRANSPARENT_HUGEPAGE #define pmd_page(pmd) (pfn_to_page(pmd_phys(pmd) >> PAGE_SHIFT)) diff --git a/arch/loongarch/include/asm/reboot.h b/arch/loongarch/include/asm/reboot.h deleted file mode 100644 index 51151749d8f0..000000000000 --- a/arch/loongarch/include/asm/reboot.h +++ /dev/null @@ -1,10 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2020-2022 Loongson Technology Corporation Limited - */ -#ifndef _ASM_REBOOT_H -#define _ASM_REBOOT_H - -extern void (*pm_restart)(void); - -#endif /* _ASM_REBOOT_H */ diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c index f1c928648a4a..335398482038 100644 --- a/arch/loongarch/kernel/acpi.c +++ b/arch/loongarch/kernel/acpi.c @@ -48,7 +48,7 @@ void __init __acpi_unmap_table(void __iomem *map, unsigned long size) early_memunmap(map, size); } -void __init __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) +void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) { if (!memblock_is_memory(phys)) return ioremap(phys, size); diff --git a/arch/loongarch/kernel/reset.c b/arch/loongarch/kernel/reset.c index 800c965a17ea..8c82021eb2f4 100644 --- a/arch/loongarch/kernel/reset.c +++ b/arch/loongarch/kernel/reset.c @@ -15,10 +15,16 @@ #include <acpi/reboot.h> #include <asm/idle.h> #include <asm/loongarch.h> -#include <asm/reboot.h> -static void default_halt(void) +void (*pm_power_off)(void); +EXPORT_SYMBOL(pm_power_off); + +void machine_halt(void) { +#ifdef CONFIG_SMP + preempt_disable(); + smp_send_stop(); +#endif local_irq_disable(); clear_csr_ecfg(ECFG0_IM); @@ -30,18 +36,29 @@ static void default_halt(void) } } -static void default_poweroff(void) +void machine_power_off(void) { +#ifdef CONFIG_SMP + preempt_disable(); + smp_send_stop(); +#endif + do_kernel_power_off(); #ifdef CONFIG_EFI efi.reset_system(EFI_RESET_SHUTDOWN, EFI_SUCCESS, 0, NULL); #endif + while (true) { __arch_cpu_idle(); } } -static void default_restart(void) +void machine_restart(char *command) { +#ifdef CONFIG_SMP + preempt_disable(); + smp_send_stop(); +#endif + do_kernel_restart(command); #ifdef CONFIG_EFI if (efi_capsule_pending(NULL)) efi_reboot(REBOOT_WARM, NULL); @@ -55,47 +72,3 @@ static void default_restart(void) __arch_cpu_idle(); } } - -void (*pm_restart)(void); -EXPORT_SYMBOL(pm_restart); - -void (*pm_power_off)(void); -EXPORT_SYMBOL(pm_power_off); - -void machine_halt(void) -{ -#ifdef CONFIG_SMP - preempt_disable(); - smp_send_stop(); -#endif - default_halt(); -} - -void machine_power_off(void) -{ -#ifdef CONFIG_SMP - preempt_disable(); - smp_send_stop(); -#endif - pm_power_off(); -} - -void machine_restart(char *command) -{ -#ifdef CONFIG_SMP - preempt_disable(); - smp_send_stop(); -#endif - do_kernel_restart(command); - pm_restart(); -} - -static int __init loongarch_reboot_setup(void) -{ - pm_restart = default_restart; - pm_power_off = default_poweroff; - - return 0; -} - -arch_initcall(loongarch_reboot_setup); diff --git a/arch/loongarch/kernel/signal.c b/arch/loongarch/kernel/signal.c index 7f4889df4a17..8f5b7986374b 100644 --- a/arch/loongarch/kernel/signal.c +++ b/arch/loongarch/kernel/signal.c @@ -529,11 +529,11 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) signal_setup_done(ret, ksig, 0); } -void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal) +void arch_do_signal_or_restart(struct pt_regs *regs) { struct ksignal ksig; - if (has_signal && get_signal(&ksig)) { + if (get_signal(&ksig)) { /* Whee! Actually deliver the signal. */ handle_signal(&ksig, regs); return; diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S index 69c76f26c1c5..f4831df5d2f9 100644 --- a/arch/loongarch/kernel/vmlinux.lds.S +++ b/arch/loongarch/kernel/vmlinux.lds.S @@ -77,6 +77,8 @@ SECTIONS PERCPU_SECTION(1 << CONFIG_L1_CACHE_SHIFT) #endif + .rela.dyn : ALIGN(8) { *(.rela.dyn) *(.rela*) } + .init.bss : { *(.init.bss) } diff --git a/arch/loongarch/lib/dump_tlb.c b/arch/loongarch/lib/dump_tlb.c index cda2c6bc7f09..c2cc7ce343c9 100644 --- a/arch/loongarch/lib/dump_tlb.c +++ b/arch/loongarch/lib/dump_tlb.c @@ -18,11 +18,11 @@ void dump_tlb_regs(void) { const int field = 2 * sizeof(unsigned long); - pr_info("Index : %0x\n", read_csr_tlbidx()); - pr_info("PageSize : %0x\n", read_csr_pagesize()); - pr_info("EntryHi : %0*llx\n", field, read_csr_entryhi()); - pr_info("EntryLo0 : %0*llx\n", field, read_csr_entrylo0()); - pr_info("EntryLo1 : %0*llx\n", field, read_csr_entrylo1()); + pr_info("Index : 0x%0x\n", read_csr_tlbidx()); + pr_info("PageSize : 0x%0x\n", read_csr_pagesize()); + pr_info("EntryHi : 0x%0*llx\n", field, read_csr_entryhi()); + pr_info("EntryLo0 : 0x%0*llx\n", field, read_csr_entrylo0()); + pr_info("EntryLo1 : 0x%0*llx\n", field, read_csr_entrylo1()); } static void dump_tlb(int first, int last) @@ -33,8 +33,8 @@ static void dump_tlb(int first, int last) unsigned int s_index, s_asid; unsigned int pagesize, c0, c1, i; unsigned long asidmask = cpu_asid_mask(¤t_cpu_data); - int pwidth = 11; - int vwidth = 11; + int pwidth = 16; + int vwidth = 16; int asidwidth = DIV_ROUND_UP(ilog2(asidmask) + 1, 4); s_entryhi = read_csr_entryhi(); @@ -64,22 +64,22 @@ static void dump_tlb(int first, int last) /* * Only print entries in use */ - pr_info("Index: %2d pgsize=%x ", i, (1 << pagesize)); + pr_info("Index: %4d pgsize=0x%x ", i, (1 << pagesize)); c0 = (entrylo0 & ENTRYLO_C) >> ENTRYLO_C_SHIFT; c1 = (entrylo1 & ENTRYLO_C) >> ENTRYLO_C_SHIFT; - pr_cont("va=%0*lx asid=%0*lx", + pr_cont("va=0x%0*lx asid=0x%0*lx", vwidth, (entryhi & ~0x1fffUL), asidwidth, asid & asidmask); /* NR/NX are in awkward places, so mask them off separately */ pa = entrylo0 & ~(ENTRYLO_NR | ENTRYLO_NX); pa = pa & PAGE_MASK; pr_cont("\n\t["); - pr_cont("ri=%d xi=%d ", + pr_cont("nr=%d nx=%d ", (entrylo0 & ENTRYLO_NR) ? 1 : 0, (entrylo0 & ENTRYLO_NX) ? 1 : 0); - pr_cont("pa=%0*llx c=%d d=%d v=%d g=%d plv=%lld] [", + pr_cont("pa=0x%0*llx c=%d d=%d v=%d g=%d plv=%lld] [", pwidth, pa, c0, (entrylo0 & ENTRYLO_D) ? 1 : 0, (entrylo0 & ENTRYLO_V) ? 1 : 0, @@ -88,10 +88,10 @@ static void dump_tlb(int first, int last) /* NR/NX are in awkward places, so mask them off separately */ pa = entrylo1 & ~(ENTRYLO_NR | ENTRYLO_NX); pa = pa & PAGE_MASK; - pr_cont("ri=%d xi=%d ", + pr_cont("nr=%d nx=%d ", (entrylo1 & ENTRYLO_NR) ? 1 : 0, (entrylo1 & ENTRYLO_NX) ? 1 : 0); - pr_cont("pa=%0*llx c=%d d=%d v=%d g=%d plv=%lld]\n", + pr_cont("pa=0x%0*llx c=%d d=%d v=%d g=%d plv=%lld]\n", pwidth, pa, c1, (entrylo1 & ENTRYLO_D) ? 1 : 0, (entrylo1 & ENTRYLO_V) ? 1 : 0, diff --git a/arch/loongarch/mm/fault.c b/arch/loongarch/mm/fault.c index 605579b19a00..1ccd53655cab 100644 --- a/arch/loongarch/mm/fault.c +++ b/arch/loongarch/mm/fault.c @@ -216,6 +216,10 @@ good_area: return; } + /* The fault is fully completed (including releasing mmap lock) */ + if (fault & VM_FAULT_COMPLETED) + return; + if (unlikely(fault & VM_FAULT_RETRY)) { flags |= FAULT_FLAG_TRIED; diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index 7094a68c9b83..0532ed5ba43d 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -131,18 +131,6 @@ int arch_add_memory(int nid, u64 start, u64 size, struct mhp_params *params) return ret; } -#ifdef CONFIG_NUMA -int memory_add_physaddr_to_nid(u64 start) -{ - int nid; - - nid = pa_to_nid(start); - return nid; -} -EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); -#endif - -#ifdef CONFIG_MEMORY_HOTREMOVE void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; @@ -154,6 +142,13 @@ void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) page += vmem_altmap_offset(altmap); __remove_pages(start_pfn, nr_pages, altmap); } + +#ifdef CONFIG_NUMA +int memory_add_physaddr_to_nid(u64 start) +{ + return pa_to_nid(start); +} +EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); #endif #endif diff --git a/arch/loongarch/mm/mmap.c b/arch/loongarch/mm/mmap.c index 52e40f0ba732..381a569635a9 100644 --- a/arch/loongarch/mm/mmap.c +++ b/arch/loongarch/mm/mmap.c @@ -2,16 +2,9 @@ /* * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ -#include <linux/compiler.h> -#include <linux/elf-randomize.h> -#include <linux/errno.h> +#include <linux/export.h> #include <linux/mm.h> #include <linux/mman.h> -#include <linux/export.h> -#include <linux/personality.h> -#include <linux/random.h> -#include <linux/sched/signal.h> -#include <linux/sched/mm.h> unsigned long shm_align_mask = PAGE_SIZE - 1; /* Sane caches */ EXPORT_SYMBOL(shm_align_mask); @@ -120,6 +113,6 @@ int __virt_addr_valid(volatile void *kaddr) if ((vaddr < PAGE_OFFSET) || (vaddr >= vm_map_base)) return 0; - return pfn_valid(PFN_DOWN(virt_to_phys(kaddr))); + return pfn_valid(PFN_DOWN(PHYSADDR(kaddr))); } EXPORT_SYMBOL_GPL(__virt_addr_valid); diff --git a/arch/loongarch/vdso/vgetcpu.c b/arch/loongarch/vdso/vgetcpu.c index 43a0078e4418..e02e775f5360 100644 --- a/arch/loongarch/vdso/vgetcpu.c +++ b/arch/loongarch/vdso/vgetcpu.c @@ -24,6 +24,8 @@ static __always_inline const struct vdso_pcpu_data *get_pcpu_data(void) return (struct vdso_pcpu_data *)(get_vdso_base() - VDSO_DATA_SIZE); } +extern +int __vdso_getcpu(unsigned int *cpu, unsigned int *node, struct getcpu_cache *unused); int __vdso_getcpu(unsigned int *cpu, unsigned int *node, struct getcpu_cache *unused) { int cpu_id; diff --git a/arch/loongarch/vdso/vgettimeofday.c b/arch/loongarch/vdso/vgettimeofday.c index b1f4548dae92..8f22863bd7ea 100644 --- a/arch/loongarch/vdso/vgettimeofday.c +++ b/arch/loongarch/vdso/vgettimeofday.c @@ -6,20 +6,23 @@ */ #include <linux/types.h> -int __vdso_clock_gettime(clockid_t clock, - struct __kernel_timespec *ts) +extern +int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts); +int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts) { return __cvdso_clock_gettime(clock, ts); } -int __vdso_gettimeofday(struct __kernel_old_timeval *tv, - struct timezone *tz) +extern +int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz); +int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) { return __cvdso_gettimeofday(tv, tz); } -int __vdso_clock_getres(clockid_t clock_id, - struct __kernel_timespec *res) +extern +int __vdso_clock_getres(clockid_t clock_id, struct __kernel_timespec *res); +int __vdso_clock_getres(clockid_t clock_id, struct __kernel_timespec *res) { return __cvdso_clock_getres(clock_id, res); } diff --git a/arch/m68k/include/asm/bitops.h b/arch/m68k/include/asm/bitops.h index 470aed978590..e984af71df6b 100644 --- a/arch/m68k/include/asm/bitops.h +++ b/arch/m68k/include/asm/bitops.h @@ -157,11 +157,8 @@ arch___change_bit(unsigned long nr, volatile unsigned long *addr) change_bit(nr, addr); } -static __always_inline bool -arch_test_bit(unsigned long nr, const volatile unsigned long *addr) -{ - return (addr[nr >> 5] & (1UL << (nr & 31))) != 0; -} +#define arch_test_bit generic_test_bit +#define arch_test_bit_acquire generic_test_bit_acquire static inline int bset_reg_test_and_set_bit(int nr, volatile unsigned long *vaddr) diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 717716cc51c5..5cedb28e8a40 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -84,8 +84,6 @@ #define KVM_MAX_VCPUS 16 -/* memory slots that does not exposed to userspace */ -#define KVM_PRIVATE_MEM_SLOTS 0 #define KVM_HALT_POLL_NS_DEFAULT 500000 diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c index db17e870bdff..74cd64a24d05 100644 --- a/arch/mips/kvm/mmu.c +++ b/arch/mips/kvm/mmu.c @@ -615,17 +615,17 @@ retry: * Used to check for invalidations in progress, of the pfn that is * returned by pfn_to_pfn_prot below. */ - mmu_seq = kvm->mmu_notifier_seq; + mmu_seq = kvm->mmu_invalidate_seq; /* - * Ensure the read of mmu_notifier_seq isn't reordered with PTE reads in - * gfn_to_pfn_prot() (which calls get_user_pages()), so that we don't + * Ensure the read of mmu_invalidate_seq isn't reordered with PTE reads + * in gfn_to_pfn_prot() (which calls get_user_pages()), so that we don't * risk the page we get a reference to getting unmapped before we have a - * chance to grab the mmu_lock without mmu_notifier_retry() noticing. + * chance to grab the mmu_lock without mmu_invalidate_retry() noticing. * * This smp_rmb() pairs with the effective smp_wmb() of the combination * of the pte_unmap_unlock() after the PTE is zapped, and the * spin_lock() in kvm_mmu_notifier_invalidate_<page|range_end>() before - * mmu_notifier_seq is incremented. + * mmu_invalidate_seq is incremented. */ smp_rmb(); @@ -638,7 +638,7 @@ retry: spin_lock(&kvm->mmu_lock); /* Check if an invalidation has taken place since we got pfn */ - if (mmu_notifier_retry(kvm, mmu_seq)) { + if (mmu_invalidate_retry(kvm, mmu_seq)) { /* * This can happen when mappings are changed asynchronously, but * also synchronously if a COW is triggered by diff --git a/arch/nios2/include/asm/entry.h b/arch/nios2/include/asm/entry.h index cf37f55efbc2..bafb7b2ca59f 100644 --- a/arch/nios2/include/asm/entry.h +++ b/arch/nios2/include/asm/entry.h @@ -50,7 +50,8 @@ stw r13, PT_R13(sp) stw r14, PT_R14(sp) stw r15, PT_R15(sp) - stw r2, PT_ORIG_R2(sp) + movi r24, -1 + stw r24, PT_ORIG_R2(sp) stw r7, PT_ORIG_R7(sp) stw ra, PT_RA(sp) diff --git a/arch/nios2/include/asm/ptrace.h b/arch/nios2/include/asm/ptrace.h index 642462144872..9da34c3022a2 100644 --- a/arch/nios2/include/asm/ptrace.h +++ b/arch/nios2/include/asm/ptrace.h @@ -74,6 +74,8 @@ extern void show_regs(struct pt_regs *); ((struct pt_regs *)((unsigned long)current_thread_info() + THREAD_SIZE)\ - 1) +#define force_successful_syscall_return() (current_pt_regs()->orig_r2 = -1) + int do_syscall_trace_enter(void); void do_syscall_trace_exit(void); #endif /* __ASSEMBLY__ */ diff --git a/arch/nios2/kernel/entry.S b/arch/nios2/kernel/entry.S index 0794cd7803df..99f0a65e6234 100644 --- a/arch/nios2/kernel/entry.S +++ b/arch/nios2/kernel/entry.S @@ -185,6 +185,7 @@ ENTRY(handle_system_call) ldw r5, PT_R5(sp) local_restart: + stw r2, PT_ORIG_R2(sp) /* Check that the requested system call is within limits */ movui r1, __NR_syscalls bgeu r2, r1, ret_invsyscall @@ -192,7 +193,6 @@ local_restart: movhi r11, %hiadj(sys_call_table) add r1, r1, r11 ldw r1, %lo(sys_call_table)(r1) - beq r1, r0, ret_invsyscall /* Check if we are being traced */ GET_THREAD_INFO r11 @@ -213,6 +213,9 @@ local_restart: translate_rc_and_ret: movi r1, 0 bge r2, zero, 3f + ldw r1, PT_ORIG_R2(sp) + addi r1, r1, 1 + beq r1, zero, 3f sub r2, zero, r2 movi r1, 1 3: @@ -255,9 +258,9 @@ traced_system_call: ldw r6, PT_R6(sp) ldw r7, PT_R7(sp) - /* Fetch the syscall function, we don't need to check the boundaries - * since this is already done. - */ + /* Fetch the syscall function. */ + movui r1, __NR_syscalls + bgeu r2, r1, traced_invsyscall slli r1, r2, 2 movhi r11,%hiadj(sys_call_table) add r1, r1, r11 @@ -276,6 +279,9 @@ traced_system_call: translate_rc_and_ret2: movi r1, 0 bge r2, zero, 4f + ldw r1, PT_ORIG_R2(sp) + addi r1, r1, 1 + beq r1, zero, 4f sub r2, zero, r2 movi r1, 1 4: @@ -287,6 +293,11 @@ end_translate_rc_and_ret2: RESTORE_SWITCH_STACK br ret_from_exception + /* If the syscall number was invalid return ENOSYS */ +traced_invsyscall: + movi r2, -ENOSYS + br translate_rc_and_ret2 + Luser_return: GET_THREAD_INFO r11 /* get thread_info pointer */ ldw r10, TI_FLAGS(r11) /* get thread_info->flags */ @@ -336,9 +347,6 @@ external_interrupt: /* skip if no interrupt is pending */ beq r12, r0, ret_from_interrupt - movi r24, -1 - stw r24, PT_ORIG_R2(sp) - /* * Process an external hardware interrupt. */ diff --git a/arch/nios2/kernel/signal.c b/arch/nios2/kernel/signal.c index cb0b91589cf2..a5b93a30c6eb 100644 --- a/arch/nios2/kernel/signal.c +++ b/arch/nios2/kernel/signal.c @@ -242,7 +242,7 @@ static int do_signal(struct pt_regs *regs) /* * If we were from a system call, check for system call restarting... */ - if (regs->orig_r2 >= 0) { + if (regs->orig_r2 >= 0 && regs->r1) { continue_addr = regs->ea; restart_addr = continue_addr - 4; retval = regs->r2; @@ -264,6 +264,7 @@ static int do_signal(struct pt_regs *regs) regs->ea = restart_addr; break; } + regs->orig_r2 = -1; } if (get_signal(&ksig)) { diff --git a/arch/nios2/kernel/syscall_table.c b/arch/nios2/kernel/syscall_table.c index 6176d63023c1..c2875a6dd5a4 100644 --- a/arch/nios2/kernel/syscall_table.c +++ b/arch/nios2/kernel/syscall_table.c @@ -13,5 +13,6 @@ #define __SYSCALL(nr, call) [nr] = (call), void *sys_call_table[__NR_syscalls] = { + [0 ... __NR_syscalls-1] = sys_ni_syscall, #include <asm/unistd.h> }; diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 7f059cd1196a..9aede2447011 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -146,10 +146,10 @@ menu "Processor type and features" choice prompt "Processor type" - default PA7000 + default PA7000 if "$(ARCH)" = "parisc" config PA7000 - bool "PA7000/PA7100" + bool "PA7000/PA7100" if "$(ARCH)" = "parisc" help This is the processor type of your CPU. This information is used for optimizing purposes. In order to compile a kernel @@ -160,21 +160,21 @@ config PA7000 which is required on some machines. config PA7100LC - bool "PA7100LC" + bool "PA7100LC" if "$(ARCH)" = "parisc" help Select this option for the PCX-L processor, as used in the 712, 715/64, 715/80, 715/100, 715/100XC, 725/100, 743, 748, D200, D210, D300, D310 and E-class config PA7200 - bool "PA7200" + bool "PA7200" if "$(ARCH)" = "parisc" help Select this option for the PCX-T' processor, as used in the C100, C110, J100, J110, J210XC, D250, D260, D350, D360, K100, K200, K210, K220, K400, K410 and K420 config PA7300LC - bool "PA7300LC" + bool "PA7300LC" if "$(ARCH)" = "parisc" help Select this option for the PCX-L2 processor, as used in the 744, A180, B132L, B160L, B180L, C132L, C160L, C180L, @@ -224,17 +224,8 @@ config MLONGCALLS Enabling this option will probably slow down your kernel. config 64BIT - bool "64-bit kernel" + def_bool "$(ARCH)" = "parisc64" depends on PA8X00 - help - Enable this if you want to support 64bit kernel on PA-RISC platform. - - At the moment, only people willing to use more than 2GB of RAM, - or having a 64bit-only capable PA-RISC machine should say Y here. - - Since there is no 64bit userland on PA-RISC, there is no point to - enable this option otherwise. The 64bit kernel is significantly bigger - and slower than the 32bit one. choice prompt "Kernel page size" diff --git a/arch/parisc/include/asm/bitops.h b/arch/parisc/include/asm/bitops.h index 56ffd260c669..0ec9cfc5131f 100644 --- a/arch/parisc/include/asm/bitops.h +++ b/arch/parisc/include/asm/bitops.h @@ -12,14 +12,6 @@ #include <asm/barrier.h> #include <linux/atomic.h> -/* compiler build environment sanity checks: */ -#if !defined(CONFIG_64BIT) && defined(__LP64__) -#error "Please use 'ARCH=parisc' to build the 32-bit kernel." -#endif -#if defined(CONFIG_64BIT) && !defined(__LP64__) -#error "Please use 'ARCH=parisc64' to build the 64-bit kernel." -#endif - /* See http://marc.theaimsgroup.com/?t=108826637900003 for discussion * on use of volatile and __*_bit() (set/clear/change): * *_bit() want use of volatile. diff --git a/arch/parisc/kernel/head.S b/arch/parisc/kernel/head.S index e0a9e9657622..fd15fd4bbb61 100644 --- a/arch/parisc/kernel/head.S +++ b/arch/parisc/kernel/head.S @@ -22,7 +22,7 @@ #include <linux/init.h> #include <linux/pgtable.h> - .level PA_ASM_LEVEL + .level 1.1 __INITDATA ENTRY(boot_args) @@ -70,6 +70,47 @@ $bss_loop: stw,ma %arg2,4(%r1) stw,ma %arg3,4(%r1) +#if !defined(CONFIG_64BIT) && defined(CONFIG_PA20) + /* This 32-bit kernel was compiled for PA2.0 CPUs. Check current CPU + * and halt kernel if we detect a PA1.x CPU. */ + ldi 32,%r10 + mtctl %r10,%cr11 + .level 2.0 + mfctl,w %cr11,%r10 + .level 1.1 + comib,<>,n 0,%r10,$cpu_ok + + load32 PA(msg1),%arg0 + ldi msg1_end-msg1,%arg1 +$iodc_panic: + copy %arg0, %r10 + copy %arg1, %r11 + load32 PA(init_stack),%sp +#define MEM_CONS 0x3A0 + ldw MEM_CONS+32(%r0),%arg0 // HPA + ldi ENTRY_IO_COUT,%arg1 + ldw MEM_CONS+36(%r0),%arg2 // SPA + ldw MEM_CONS+8(%r0),%arg3 // layers + load32 PA(__bss_start),%r1 + stw %r1,-52(%sp) // arg4 + stw %r0,-56(%sp) // arg5 + stw %r10,-60(%sp) // arg6 = ptr to text + stw %r11,-64(%sp) // arg7 = len + stw %r0,-68(%sp) // arg8 + load32 PA(.iodc_panic_ret), %rp + ldw MEM_CONS+40(%r0),%r1 // ENTRY_IODC + bv,n (%r1) +.iodc_panic_ret: + b . /* wait endless with ... */ + or %r10,%r10,%r10 /* qemu idle sleep */ +msg1: .ascii "Can't boot kernel which was built for PA8x00 CPUs on this machine.\r\n" +msg1_end: + +$cpu_ok: +#endif + + .level PA_ASM_LEVEL + /* Initialize startup VM. Just map first 16/32 MB of memory */ load32 PA(swapper_pg_dir),%r4 mtctl %r4,%cr24 /* Initialize kernel root pointer */ diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c index bac581b5ecfc..e8a4d77cff53 100644 --- a/arch/parisc/kernel/unaligned.c +++ b/arch/parisc/kernel/unaligned.c @@ -93,7 +93,7 @@ #define R1(i) (((i)>>21)&0x1f) #define R2(i) (((i)>>16)&0x1f) #define R3(i) ((i)&0x1f) -#define FR3(i) ((((i)<<1)&0x1f)|(((i)>>6)&1)) +#define FR3(i) ((((i)&0x1f)<<1)|(((i)>>6)&1)) #define IM(i,n) (((i)>>1&((1<<(n-1))-1))|((i)&1?((0-1L)<<(n-1)):0)) #define IM5_2(i) IM((i)>>16,5) #define IM5_3(i) IM((i),5) diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index 398e0b5e485f..ed6db13a1d7c 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -83,6 +83,8 @@ enum { FW_FEATURE_POWERNV_ALWAYS = 0, FW_FEATURE_PS3_POSSIBLE = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1, FW_FEATURE_PS3_ALWAYS = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1, + FW_FEATURE_NATIVE_POSSIBLE = 0, + FW_FEATURE_NATIVE_ALWAYS = 0, FW_FEATURE_POSSIBLE = #ifdef CONFIG_PPC_PSERIES FW_FEATURE_PSERIES_POSSIBLE | @@ -93,6 +95,9 @@ enum { #ifdef CONFIG_PPC_PS3 FW_FEATURE_PS3_POSSIBLE | #endif +#ifdef CONFIG_PPC_HASH_MMU_NATIVE + FW_FEATURE_NATIVE_ALWAYS | +#endif 0, FW_FEATURE_ALWAYS = #ifdef CONFIG_PPC_PSERIES @@ -104,6 +109,9 @@ enum { #ifdef CONFIG_PPC_PS3 FW_FEATURE_PS3_ALWAYS & #endif +#ifdef CONFIG_PPC_HASH_MMU_NATIVE + FW_FEATURE_NATIVE_ALWAYS & +#endif FW_FEATURE_POSSIBLE, #else /* CONFIG_PPC64 */ diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 26ede09c521d..983551859891 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -113,7 +113,14 @@ static inline void __hard_RI_enable(void) static inline notrace unsigned long irq_soft_mask_return(void) { - return READ_ONCE(local_paca->irq_soft_mask); + unsigned long flags; + + asm volatile( + "lbz %0,%1(13)" + : "=r" (flags) + : "i" (offsetof(struct paca_struct, irq_soft_mask))); + + return flags; } /* @@ -140,24 +147,46 @@ static inline notrace void irq_soft_mask_set(unsigned long mask) if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) WARN_ON(mask && !(mask & IRQS_DISABLED)); - WRITE_ONCE(local_paca->irq_soft_mask, mask); - barrier(); + asm volatile( + "stb %0,%1(13)" + : + : "r" (mask), + "i" (offsetof(struct paca_struct, irq_soft_mask)) + : "memory"); } static inline notrace unsigned long irq_soft_mask_set_return(unsigned long mask) { - unsigned long flags = irq_soft_mask_return(); + unsigned long flags; - irq_soft_mask_set(mask); +#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG + WARN_ON(mask && !(mask & IRQS_DISABLED)); +#endif + + asm volatile( + "lbz %0,%1(13); stb %2,%1(13)" + : "=&r" (flags) + : "i" (offsetof(struct paca_struct, irq_soft_mask)), + "r" (mask) + : "memory"); return flags; } static inline notrace unsigned long irq_soft_mask_or_return(unsigned long mask) { - unsigned long flags = irq_soft_mask_return(); + unsigned long flags, tmp; + + asm volatile( + "lbz %0,%2(13); or %1,%0,%3; stb %1,%2(13)" + : "=&r" (flags), "=r" (tmp) + : "i" (offsetof(struct paca_struct, irq_soft_mask)), + "r" (mask) + : "memory"); - irq_soft_mask_set(flags | mask); +#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG + WARN_ON((mask | flags) && !((mask | flags) & IRQS_DISABLED)); +#endif return flags; } @@ -282,7 +311,8 @@ static inline bool pmi_irq_pending(void) flags = irq_soft_mask_set_return(IRQS_ALL_DISABLED); \ local_paca->irq_happened |= PACA_IRQ_HARD_DIS; \ if (!arch_irqs_disabled_flags(flags)) { \ - WRITE_ONCE(local_paca->saved_r1, current_stack_pointer);\ + asm volatile("std%X0 %1,%0" : "=m" (local_paca->saved_r1) \ + : "r" (current_stack_pointer)); \ trace_hardirqs_off(); \ } \ } while(0) diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 4def2bd17b9b..d49065af08e9 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -666,7 +666,7 @@ static inline pte_t *find_kvm_host_pte(struct kvm *kvm, unsigned long mmu_seq, VM_WARN(!spin_is_locked(&kvm->mmu_lock), "%s called with kvm mmu_lock not held \n", __func__); - if (mmu_notifier_retry(kvm, mmu_seq)) + if (mmu_invalidate_retry(kvm, mmu_seq)) return NULL; pte = __find_linux_pte(kvm->mm->pgd, ea, NULL, hshift); diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index bdd3332200c5..31de91c8359c 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -68,10 +68,6 @@ void __init set_pci_dma_ops(const struct dma_map_ops *dma_ops) pci_dma_ops = dma_ops; } -/* - * This function should run under locking protection, specifically - * hose_spinlock. - */ static int get_phb_number(struct device_node *dn) { int ret, phb_id = -1; @@ -108,15 +104,20 @@ static int get_phb_number(struct device_node *dn) if (!ret) phb_id = (int)(prop & (MAX_PHBS - 1)); + spin_lock(&hose_spinlock); + /* We need to be sure to not use the same PHB number twice. */ if ((phb_id >= 0) && !test_and_set_bit(phb_id, phb_bitmap)) - return phb_id; + goto out_unlock; /* If everything fails then fallback to dynamic PHB numbering. */ phb_id = find_first_zero_bit(phb_bitmap, MAX_PHBS); BUG_ON(phb_id >= MAX_PHBS); set_bit(phb_id, phb_bitmap); +out_unlock: + spin_unlock(&hose_spinlock); + return phb_id; } @@ -127,10 +128,13 @@ struct pci_controller *pcibios_alloc_controller(struct device_node *dev) phb = zalloc_maybe_bootmem(sizeof(struct pci_controller), GFP_KERNEL); if (phb == NULL) return NULL; - spin_lock(&hose_spinlock); + phb->global_number = get_phb_number(dev); + + spin_lock(&hose_spinlock); list_add_tail(&phb->list_node, &hose_list); spin_unlock(&hose_spinlock); + phb->dn = dev; phb->is_dynamic = slab_is_available(); #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index 433965bf37b4..855b59892c5c 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -245,6 +245,15 @@ static int __init pcibios_init(void) printk(KERN_INFO "PCI: Probing PCI hardware\n"); +#ifdef CONFIG_PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT + /* + * Enable PCI domains in /proc when PCI bus numbers are not unique + * across all PCI domains to prevent conflicts. And keep PCI domain 0 + * backward compatible in /proc for video cards. + */ + pci_add_flags(PCI_ENABLE_PROC_DOMAINS | PCI_COMPAT_DOMAIN_0); +#endif + if (pci_has_flag(PCI_REASSIGN_ALL_BUS)) pci_assign_all_buses = 1; diff --git a/arch/powerpc/kernel/rtas_entry.S b/arch/powerpc/kernel/rtas_entry.S index 9a434d42e660..6ce95ddadbcd 100644 --- a/arch/powerpc/kernel/rtas_entry.S +++ b/arch/powerpc/kernel/rtas_entry.S @@ -109,8 +109,12 @@ __enter_rtas: * its critical regions (as specified in PAPR+ section 7.2.1). MSR[S] * is not impacted by RFI_TO_KERNEL (only urfid can unset it). So if * MSR[S] is set, it will remain when entering RTAS. + * If we're in HV mode, RTAS must also run in HV mode, so extract MSR_HV + * from the saved MSR value and insert into the value RTAS will use. */ + extrdi r0, r6, 1, 63 - MSR_HV_LG LOAD_REG_IMMEDIATE(r6, MSR_ME | MSR_RI) + insrdi r6, r0, 1, 63 - MSR_HV_LG li r0,0 mtmsrd r0,1 /* disable RI before using SRR0/1 */ diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S index cb3358886203..6c1db3b6de2d 100644 --- a/arch/powerpc/kernel/systbl.S +++ b/arch/powerpc/kernel/systbl.S @@ -18,6 +18,7 @@ .p2align 3 #define __SYSCALL(nr, entry) .8byte entry #else + .p2align 2 #define __SYSCALL(nr, entry) .long entry #endif diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index 1ae09992c9ea..bc6a381b5346 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c @@ -90,7 +90,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte, unsigned long pfn; /* used to check for invalidations in progress */ - mmu_seq = kvm->mmu_notifier_seq; + mmu_seq = kvm->mmu_invalidate_seq; smp_rmb(); /* Get host physical address for gpa */ @@ -151,7 +151,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte, cpte = kvmppc_mmu_hpte_cache_next(vcpu); spin_lock(&kvm->mmu_lock); - if (!cpte || mmu_notifier_retry(kvm, mmu_seq)) { + if (!cpte || mmu_invalidate_retry(kvm, mmu_seq)) { r = -EAGAIN; goto out_unlock; } diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 514fd45c1994..e9744b41a226 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -578,7 +578,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_vcpu *vcpu, return -EFAULT; /* used to check for invalidations in progress */ - mmu_seq = kvm->mmu_notifier_seq; + mmu_seq = kvm->mmu_invalidate_seq; smp_rmb(); ret = -EFAULT; @@ -693,7 +693,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_vcpu *vcpu, /* Check if we might have been invalidated; let the guest retry if so */ ret = RESUME_GUEST; - if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) { + if (mmu_invalidate_retry(vcpu->kvm, mmu_seq)) { unlock_rmap(rmap); goto out_unlock; } diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 9d4b3feda3b6..5d5e12f3bf86 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -640,7 +640,7 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, /* Check if we might have been invalidated; let the guest retry if so */ spin_lock(&kvm->mmu_lock); ret = -EAGAIN; - if (mmu_notifier_retry(kvm, mmu_seq)) + if (mmu_invalidate_retry(kvm, mmu_seq)) goto out_unlock; /* Now traverse again under the lock and change the tree */ @@ -830,7 +830,7 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu, bool large_enable; /* used to check for invalidations in progress */ - mmu_seq = kvm->mmu_notifier_seq; + mmu_seq = kvm->mmu_invalidate_seq; smp_rmb(); /* @@ -1191,7 +1191,7 @@ void kvmppc_radix_flush_memslot(struct kvm *kvm, * Increase the mmu notifier sequence number to prevent any page * fault that read the memslot earlier from writing a PTE. */ - kvm->mmu_notifier_seq++; + kvm->mmu_invalidate_seq++; spin_unlock(&kvm->mmu_lock); } diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index be8249cc6107..5a64a1341e6f 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -1580,7 +1580,7 @@ static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu, /* 2. Find the host pte for this L1 guest real address */ /* Used to check for invalidations in progress */ - mmu_seq = kvm->mmu_notifier_seq; + mmu_seq = kvm->mmu_invalidate_seq; smp_rmb(); /* See if can find translation in our partition scoped tables for L1 */ diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 2257fb18cb72..5a05953ae13f 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -219,7 +219,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, g_ptel = ptel; /* used later to detect if we might have been invalidated */ - mmu_seq = kvm->mmu_notifier_seq; + mmu_seq = kvm->mmu_invalidate_seq; smp_rmb(); /* Find the memslot (if any) for this address */ @@ -366,7 +366,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, rmap = real_vmalloc_addr(rmap); lock_rmap(rmap); /* Check for pending invalidations under the rmap chain lock */ - if (mmu_notifier_retry(kvm, mmu_seq)) { + if (mmu_invalidate_retry(kvm, mmu_seq)) { /* inval in progress, write a non-present HPTE */ pteh |= HPTE_V_ABSENT; pteh &= ~HPTE_V_VALID; @@ -932,7 +932,7 @@ static long kvmppc_do_h_page_init_zero(struct kvm_vcpu *vcpu, int i; /* Used later to detect if we might have been invalidated */ - mmu_seq = kvm->mmu_notifier_seq; + mmu_seq = kvm->mmu_invalidate_seq; smp_rmb(); arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock); @@ -960,7 +960,7 @@ static long kvmppc_do_h_page_init_copy(struct kvm_vcpu *vcpu, long ret = H_SUCCESS; /* Used later to detect if we might have been invalidated */ - mmu_seq = kvm->mmu_notifier_seq; + mmu_seq = kvm->mmu_invalidate_seq; smp_rmb(); arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock); diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 7f16afc331ef..05668e964140 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -339,7 +339,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, unsigned long flags; /* used to check for invalidations in progress */ - mmu_seq = kvm->mmu_notifier_seq; + mmu_seq = kvm->mmu_invalidate_seq; smp_rmb(); /* @@ -460,7 +460,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, } spin_lock(&kvm->mmu_lock); - if (mmu_notifier_retry(kvm, mmu_seq)) { + if (mmu_invalidate_retry(kvm, mmu_seq)) { ret = -EAGAIN; goto out; } diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 20f6ed813bff..2f8385523a13 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -124,9 +124,6 @@ struct papr_scm_priv { /* The bits which needs to be overridden */ u64 health_bitmap_inject_mask; - - /* array to have event_code and stat_id mappings */ - u8 *nvdimm_events_map; }; static int papr_scm_pmem_flush(struct nd_region *nd_region, @@ -350,6 +347,25 @@ static ssize_t drc_pmem_query_stats(struct papr_scm_priv *p, #ifdef CONFIG_PERF_EVENTS #define to_nvdimm_pmu(_pmu) container_of(_pmu, struct nvdimm_pmu, pmu) +static const char * const nvdimm_events_map[] = { + [1] = "CtlResCt", + [2] = "CtlResTm", + [3] = "PonSecs ", + [4] = "MemLife ", + [5] = "CritRscU", + [6] = "HostLCnt", + [7] = "HostSCnt", + [8] = "HostSDur", + [9] = "HostLDur", + [10] = "MedRCnt ", + [11] = "MedWCnt ", + [12] = "MedRDur ", + [13] = "MedWDur ", + [14] = "CchRHCnt", + [15] = "CchWHCnt", + [16] = "FastWCnt", +}; + static int papr_scm_pmu_get_value(struct perf_event *event, struct device *dev, u64 *count) { struct papr_scm_perf_stat *stat; @@ -357,11 +373,15 @@ static int papr_scm_pmu_get_value(struct perf_event *event, struct device *dev, struct papr_scm_priv *p = dev_get_drvdata(dev); int rc, size; + /* Invalid eventcode */ + if (event->attr.config == 0 || event->attr.config >= ARRAY_SIZE(nvdimm_events_map)) + return -EINVAL; + /* Allocate request buffer enough to hold single performance stat */ size = sizeof(struct papr_scm_perf_stats) + sizeof(struct papr_scm_perf_stat); - if (!p || !p->nvdimm_events_map) + if (!p) return -EINVAL; stats = kzalloc(size, GFP_KERNEL); @@ -370,7 +390,7 @@ static int papr_scm_pmu_get_value(struct perf_event *event, struct device *dev, stat = &stats->scm_statistic[0]; memcpy(&stat->stat_id, - &p->nvdimm_events_map[event->attr.config * sizeof(stat->stat_id)], + nvdimm_events_map[event->attr.config], sizeof(stat->stat_id)); stat->stat_val = 0; @@ -458,56 +478,6 @@ static void papr_scm_pmu_del(struct perf_event *event, int flags) papr_scm_pmu_read(event); } -static int papr_scm_pmu_check_events(struct papr_scm_priv *p, struct nvdimm_pmu *nd_pmu) -{ - struct papr_scm_perf_stat *stat; - struct papr_scm_perf_stats *stats; - u32 available_events; - int index, rc = 0; - - if (!p->stat_buffer_len) - return -ENOENT; - - available_events = (p->stat_buffer_len - sizeof(struct papr_scm_perf_stats)) - / sizeof(struct papr_scm_perf_stat); - if (available_events == 0) - return -EOPNOTSUPP; - - /* Allocate the buffer for phyp where stats are written */ - stats = kzalloc(p->stat_buffer_len, GFP_KERNEL); - if (!stats) { - rc = -ENOMEM; - return rc; - } - - /* Called to get list of events supported */ - rc = drc_pmem_query_stats(p, stats, 0); - if (rc) - goto out; - - /* - * Allocate memory and populate nvdimm_event_map. - * Allocate an extra element for NULL entry - */ - p->nvdimm_events_map = kcalloc(available_events + 1, - sizeof(stat->stat_id), - GFP_KERNEL); - if (!p->nvdimm_events_map) { - rc = -ENOMEM; - goto out; - } - - /* Copy all stat_ids to event map */ - for (index = 0, stat = stats->scm_statistic; - index < available_events; index++, ++stat) { - memcpy(&p->nvdimm_events_map[index * sizeof(stat->stat_id)], - &stat->stat_id, sizeof(stat->stat_id)); - } -out: - kfree(stats); - return rc; -} - static void papr_scm_pmu_register(struct papr_scm_priv *p) { struct nvdimm_pmu *nd_pmu; @@ -519,9 +489,10 @@ static void papr_scm_pmu_register(struct papr_scm_priv *p) goto pmu_err_print; } - rc = papr_scm_pmu_check_events(p, nd_pmu); - if (rc) + if (!p->stat_buffer_len) { + rc = -ENOENT; goto pmu_check_events_err; + } nd_pmu->pmu.task_ctx_nr = perf_invalid_context; nd_pmu->pmu.name = nvdimm_name(p->nvdimm); @@ -539,7 +510,7 @@ static void papr_scm_pmu_register(struct papr_scm_priv *p) rc = register_nvdimm_pmu(nd_pmu, p->pdev); if (rc) - goto pmu_register_err; + goto pmu_check_events_err; /* * Set archdata.priv value to nvdimm_pmu structure, to handle the @@ -548,8 +519,6 @@ static void papr_scm_pmu_register(struct papr_scm_priv *p) p->pdev->archdata.priv = nd_pmu; return; -pmu_register_err: - kfree(p->nvdimm_events_map); pmu_check_events_err: kfree(nd_pmu); pmu_err_print: @@ -1560,7 +1529,6 @@ static int papr_scm_remove(struct platform_device *pdev) unregister_nvdimm_pmu(pdev->archdata.priv); pdev->archdata.priv = NULL; - kfree(p->nvdimm_events_map); kfree(p->bus_desc.provider_name); kfree(p); diff --git a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts index 044982a11df5..f3f87ed2007f 100644 --- a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts +++ b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts @@ -84,12 +84,10 @@ phy1: ethernet-phy@9 { reg = <9>; - ti,fifo-depth = <0x1>; }; phy0: ethernet-phy@8 { reg = <8>; - ti,fifo-depth = <0x1>; }; }; @@ -102,7 +100,6 @@ disable-wp; cap-sd-highspeed; cap-mmc-highspeed; - card-detect-delay = <200>; mmc-ddr-1_8v; mmc-hs200-1_8v; sd-uhs-sdr12; diff --git a/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts b/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts index 82c93c8f5c17..c87cc2d8fe29 100644 --- a/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts +++ b/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts @@ -54,12 +54,10 @@ phy1: ethernet-phy@5 { reg = <5>; - ti,fifo-depth = <0x01>; }; phy0: ethernet-phy@4 { reg = <4>; - ti,fifo-depth = <0x01>; }; }; @@ -72,7 +70,6 @@ disable-wp; cap-sd-highspeed; cap-mmc-highspeed; - card-detect-delay = <200>; mmc-ddr-1_8v; mmc-hs200-1_8v; sd-uhs-sdr12; diff --git a/arch/riscv/boot/dts/microchip/mpfs.dtsi b/arch/riscv/boot/dts/microchip/mpfs.dtsi index 499c2e63ad35..74493344ea41 100644 --- a/arch/riscv/boot/dts/microchip/mpfs.dtsi +++ b/arch/riscv/boot/dts/microchip/mpfs.dtsi @@ -193,7 +193,7 @@ cache-size = <2097152>; cache-unified; interrupt-parent = <&plic>; - interrupts = <1>, <2>, <3>; + interrupts = <1>, <3>, <4>, <2>; }; clint: clint@2000000 { @@ -485,9 +485,8 @@ ranges = <0x3000000 0x0 0x8000000 0x20 0x8000000 0x0 0x80000000>; msi-parent = <&pcie>; msi-controller; - microchip,axi-m-atr0 = <0x10 0x0>; status = "disabled"; - pcie_intc: legacy-interrupt-controller { + pcie_intc: interrupt-controller { #address-cells = <0>; #interrupt-cells = <1>; interrupt-controller; diff --git a/arch/riscv/include/asm/kvm_vcpu_sbi.h b/arch/riscv/include/asm/kvm_vcpu_sbi.h index 83d6d4d2b1df..26a446a34057 100644 --- a/arch/riscv/include/asm/kvm_vcpu_sbi.h +++ b/arch/riscv/include/asm/kvm_vcpu_sbi.h @@ -33,4 +33,16 @@ void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu, u32 type, u64 flags); const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext(unsigned long extid); +#ifdef CONFIG_RISCV_SBI_V01 +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01; +#endif +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_base; +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_time; +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_ipi; +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_rfence; +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_srst; +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_hsm; +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental; +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor; + #endif /* __RISCV_KVM_VCPU_SBI_H__ */ diff --git a/arch/riscv/include/asm/signal.h b/arch/riscv/include/asm/signal.h new file mode 100644 index 000000000000..532c29ef0376 --- /dev/null +++ b/arch/riscv/include/asm/signal.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __ASM_SIGNAL_H +#define __ASM_SIGNAL_H + +#include <uapi/asm/signal.h> +#include <uapi/asm/ptrace.h> + +asmlinkage __visible +void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags); + +#endif diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h index 78933ac04995..67322f878e0d 100644 --- a/arch/riscv/include/asm/thread_info.h +++ b/arch/riscv/include/asm/thread_info.h @@ -42,6 +42,8 @@ #ifndef __ASSEMBLY__ +extern long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE / sizeof(long)]; + #include <asm/processor.h> #include <asm/csr.h> diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 553d755483ed..3b5583db9d80 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -28,7 +28,7 @@ unsigned long elf_hwcap __read_mostly; /* Host ISA bitmap */ static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly; -__ro_after_init DEFINE_STATIC_KEY_ARRAY_FALSE(riscv_isa_ext_keys, RISCV_ISA_EXT_KEY_MAX); +DEFINE_STATIC_KEY_ARRAY_FALSE(riscv_isa_ext_keys, RISCV_ISA_EXT_KEY_MAX); EXPORT_SYMBOL(riscv_isa_ext_keys); /** diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index 38b05ca6fe66..5a2de6b6f882 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -15,6 +15,7 @@ #include <asm/ucontext.h> #include <asm/vdso.h> +#include <asm/signal.h> #include <asm/signal32.h> #include <asm/switch_to.h> #include <asm/csr.h> diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 39d0f8bba4b4..635e6ec26938 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -20,9 +20,10 @@ #include <asm/asm-prototypes.h> #include <asm/bug.h> +#include <asm/csr.h> #include <asm/processor.h> #include <asm/ptrace.h> -#include <asm/csr.h> +#include <asm/thread_info.h> int show_unhandled_signals = 1; diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c index 3a35b2d95697..3620ecac2fa1 100644 --- a/arch/riscv/kvm/mmu.c +++ b/arch/riscv/kvm/mmu.c @@ -666,7 +666,7 @@ int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu, return ret; } - mmu_seq = kvm->mmu_notifier_seq; + mmu_seq = kvm->mmu_invalidate_seq; hfn = gfn_to_pfn_prot(kvm, gfn, is_write, &writable); if (hfn == KVM_PFN_ERR_HWPOISON) { @@ -686,7 +686,7 @@ int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu, spin_lock(&kvm->mmu_lock); - if (mmu_notifier_retry(kvm, mmu_seq)) + if (mmu_invalidate_retry(kvm, mmu_seq)) goto out_unlock; if (writable) { diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c index d45e7da3f0d3..f96991d230bf 100644 --- a/arch/riscv/kvm/vcpu_sbi.c +++ b/arch/riscv/kvm/vcpu_sbi.c @@ -32,23 +32,13 @@ static int kvm_linux_err_map_sbi(int err) }; } -#ifdef CONFIG_RISCV_SBI_V01 -extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01; -#else +#ifndef CONFIG_RISCV_SBI_V01 static const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01 = { .extid_start = -1UL, .extid_end = -1UL, .handler = NULL, }; #endif -extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_base; -extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_time; -extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_ipi; -extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_rfence; -extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_srst; -extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_hsm; -extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental; -extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor; static const struct kvm_vcpu_sbi_extension *sbi_ext[] = { &vcpu_sbi_ext_v01, diff --git a/arch/riscv/kvm/vcpu_timer.c b/arch/riscv/kvm/vcpu_timer.c index 16f50c46ba39..185f2386a747 100644 --- a/arch/riscv/kvm/vcpu_timer.c +++ b/arch/riscv/kvm/vcpu_timer.c @@ -299,7 +299,6 @@ static void kvm_riscv_vcpu_update_timedelta(struct kvm_vcpu *vcpu) void kvm_riscv_vcpu_timer_restore(struct kvm_vcpu *vcpu) { - struct kvm_vcpu_csr *csr; struct kvm_vcpu_timer *t = &vcpu->arch.timer; kvm_riscv_vcpu_update_timedelta(vcpu); @@ -307,7 +306,6 @@ void kvm_riscv_vcpu_timer_restore(struct kvm_vcpu *vcpu) if (!t->sstc_enabled) return; - csr = &vcpu->arch.guest_csr; #if defined(CONFIG_32BIT) csr_write(CSR_VSTIMECMP, (u32)t->next_cycles); csr_write(CSR_VSTIMECMPH, (u32)(t->next_cycles >> 32)); @@ -324,13 +322,11 @@ void kvm_riscv_vcpu_timer_restore(struct kvm_vcpu *vcpu) void kvm_riscv_vcpu_timer_save(struct kvm_vcpu *vcpu) { - struct kvm_vcpu_csr *csr; struct kvm_vcpu_timer *t = &vcpu->arch.timer; if (!t->sstc_enabled) return; - csr = &vcpu->arch.guest_csr; t = &vcpu->arch.timer; #if defined(CONFIG_32BIT) t->next_cycles = csr_read(CSR_VSTIMECMP); diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c index 5e49e4b4a4cc..86c56616e5de 100644 --- a/arch/riscv/mm/pageattr.c +++ b/arch/riscv/mm/pageattr.c @@ -118,10 +118,10 @@ static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask, if (!numpages) return 0; - mmap_read_lock(&init_mm); + mmap_write_lock(&init_mm); ret = walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL, &masks); - mmap_read_unlock(&init_mm); + mmap_write_unlock(&init_mm); flush_tlb_kernel_range(start, end); diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index f6dfde577ce8..2a827002934b 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -40,8 +40,6 @@ CONFIG_CHECKPOINT_RESTORE=y CONFIG_SCHED_AUTOGROUP=y CONFIG_EXPERT=y # CONFIG_SYSFS_SYSCALL is not set -CONFIG_USERFAULTFD=y -# CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_LIVEPATCH=y CONFIG_MARCH_ZEC12=y @@ -74,6 +72,7 @@ CONFIG_MODULES=y CONFIG_MODULE_FORCE_LOAD=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y +CONFIG_MODULE_UNLOAD_TAINT_TRACKING=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y CONFIG_MODULE_SIG_SHA256=y @@ -93,6 +92,10 @@ CONFIG_UNIXWARE_DISKLABEL=y CONFIG_IOSCHED_BFQ=y CONFIG_BFQ_GROUP_IOSCHED=y CONFIG_BINFMT_MISC=m +CONFIG_ZSWAP=y +CONFIG_ZSMALLOC_STAT=y +CONFIG_SLUB_STATS=y +# CONFIG_COMPAT_BRK is not set CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y @@ -102,14 +105,12 @@ CONFIG_CMA_DEBUGFS=y CONFIG_CMA_SYSFS=y CONFIG_CMA_AREAS=7 CONFIG_MEM_SOFT_DIRTY=y -CONFIG_ZSWAP=y -CONFIG_ZSMALLOC=y -CONFIG_ZSMALLOC_STAT=y CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_IDLE_PAGE_TRACKING=y CONFIG_PERCPU_STATS=y CONFIG_GUP_TEST=y CONFIG_ANON_VMA_NAME=y +CONFIG_USERFAULTFD=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_PACKET_DIAG=m @@ -167,6 +168,7 @@ CONFIG_BRIDGE_NETFILTER=m CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_PROCFS=y CONFIG_NF_CONNTRACK_EVENTS=y CONFIG_NF_CONNTRACK_TIMEOUT=y CONFIG_NF_CONNTRACK_TIMESTAMP=y @@ -493,7 +495,6 @@ CONFIG_NLMON=m # CONFIG_NET_VENDOR_ASIX is not set # CONFIG_NET_VENDOR_ATHEROS is not set # CONFIG_NET_VENDOR_BROADCOM is not set -# CONFIG_NET_VENDOR_BROCADE is not set # CONFIG_NET_VENDOR_CADENCE is not set # CONFIG_NET_VENDOR_CAVIUM is not set # CONFIG_NET_VENDOR_CHELSIO is not set @@ -509,7 +510,7 @@ CONFIG_NLMON=m # CONFIG_NET_VENDOR_GOOGLE is not set # CONFIG_NET_VENDOR_HUAWEI is not set # CONFIG_NET_VENDOR_INTEL is not set -# CONFIG_NET_VENDOR_MICROSOFT is not set +# CONFIG_NET_VENDOR_WANGXUN is not set # CONFIG_NET_VENDOR_LITEX is not set # CONFIG_NET_VENDOR_MARVELL is not set CONFIG_MLX4_EN=m @@ -518,16 +519,18 @@ CONFIG_MLX5_CORE_EN=y # CONFIG_NET_VENDOR_MICREL is not set # CONFIG_NET_VENDOR_MICROCHIP is not set # CONFIG_NET_VENDOR_MICROSEMI is not set +# CONFIG_NET_VENDOR_MICROSOFT is not set # CONFIG_NET_VENDOR_MYRI is not set +# CONFIG_NET_VENDOR_NI is not set # CONFIG_NET_VENDOR_NATSEMI is not set # CONFIG_NET_VENDOR_NETERION is not set # CONFIG_NET_VENDOR_NETRONOME is not set -# CONFIG_NET_VENDOR_NI is not set # CONFIG_NET_VENDOR_NVIDIA is not set # CONFIG_NET_VENDOR_OKI is not set # CONFIG_NET_VENDOR_PACKET_ENGINES is not set # CONFIG_NET_VENDOR_PENSANDO is not set # CONFIG_NET_VENDOR_QLOGIC is not set +# CONFIG_NET_VENDOR_BROCADE is not set # CONFIG_NET_VENDOR_QUALCOMM is not set # CONFIG_NET_VENDOR_RDC is not set # CONFIG_NET_VENDOR_REALTEK is not set @@ -535,9 +538,9 @@ CONFIG_MLX5_CORE_EN=y # CONFIG_NET_VENDOR_ROCKER is not set # CONFIG_NET_VENDOR_SAMSUNG is not set # CONFIG_NET_VENDOR_SEEQ is not set -# CONFIG_NET_VENDOR_SOLARFLARE is not set # CONFIG_NET_VENDOR_SILAN is not set # CONFIG_NET_VENDOR_SIS is not set +# CONFIG_NET_VENDOR_SOLARFLARE is not set # CONFIG_NET_VENDOR_SMSC is not set # CONFIG_NET_VENDOR_SOCIONEXT is not set # CONFIG_NET_VENDOR_STMICRO is not set @@ -570,6 +573,8 @@ CONFIG_VIRTIO_CONSOLE=m CONFIG_HW_RANDOM_VIRTIO=m CONFIG_HANGCHECK_TIMER=m CONFIG_TN3270_FS=y +# CONFIG_RANDOM_TRUST_CPU is not set +# CONFIG_RANDOM_TRUST_BOOTLOADER is not set CONFIG_PPS=m # CONFIG_PTP_1588_CLOCK is not set # CONFIG_HWMON is not set @@ -727,18 +732,26 @@ CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_ADIANTUM=m +CONFIG_CRYPTO_HCTR2=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_CRC32=m -CONFIG_CRYPTO_BLAKE2S=m +CONFIG_CRYPTO_CRC32_S390=y CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MD5=y CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m +CONFIG_CRYPTO_SHA512_S390=m +CONFIG_CRYPTO_SHA1_S390=m +CONFIG_CRYPTO_SHA256_S390=m CONFIG_CRYPTO_SHA3=m -CONFIG_CRYPTO_SM3=m +CONFIG_CRYPTO_SHA3_256_S390=m +CONFIG_CRYPTO_SHA3_512_S390=m +CONFIG_CRYPTO_SM3_GENERIC=m CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_AES_TI=m +CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_ARC4=m CONFIG_CRYPTO_BLOWFISH=m @@ -746,11 +759,14 @@ CONFIG_CRYPTO_CAMELLIA=m CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_DES=m +CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_KHAZAD=m +CONFIG_CRYPTO_CHACHA_S390=m CONFIG_CRYPTO_SEED=m +CONFIG_CRYPTO_ARIA=m CONFIG_CRYPTO_SERPENT=m -CONFIG_CRYPTO_SM4=m +CONFIG_CRYPTO_SM4_GENERIC=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_842=m @@ -766,16 +782,6 @@ CONFIG_CRYPTO_STATS=y CONFIG_ZCRYPT=m CONFIG_PKEY=m CONFIG_CRYPTO_PAES_S390=m -CONFIG_CRYPTO_SHA1_S390=m -CONFIG_CRYPTO_SHA256_S390=m -CONFIG_CRYPTO_SHA512_S390=m -CONFIG_CRYPTO_SHA3_256_S390=m -CONFIG_CRYPTO_SHA3_512_S390=m -CONFIG_CRYPTO_DES_S390=m -CONFIG_CRYPTO_AES_S390=m -CONFIG_CRYPTO_CHACHA_S390=m -CONFIG_CRYPTO_GHASH_S390=m -CONFIG_CRYPTO_CRC32_S390=y CONFIG_CRYPTO_DEV_VIRTIO=m CONFIG_CORDIC=m CONFIG_CRYPTO_LIB_CURVE25519=m @@ -797,6 +803,7 @@ CONFIG_HEADERS_INSTALL=y CONFIG_DEBUG_SECTION_MISMATCH=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_PAGEALLOC=y +CONFIG_SLUB_DEBUG_ON=y CONFIG_PAGE_OWNER=y CONFIG_DEBUG_RODATA_TEST=y CONFIG_DEBUG_WX=y @@ -808,8 +815,6 @@ CONFIG_DEBUG_OBJECTS_TIMERS=y CONFIG_DEBUG_OBJECTS_WORK=y CONFIG_DEBUG_OBJECTS_RCU_HEAD=y CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER=y -CONFIG_SLUB_DEBUG_ON=y -CONFIG_SLUB_STATS=y CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUG_VM=y CONFIG_DEBUG_VM_PGFLAGS=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 706df3a4a867..fb780e80e4c8 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -38,8 +38,6 @@ CONFIG_CHECKPOINT_RESTORE=y CONFIG_SCHED_AUTOGROUP=y CONFIG_EXPERT=y # CONFIG_SYSFS_SYSCALL is not set -CONFIG_USERFAULTFD=y -# CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_LIVEPATCH=y CONFIG_MARCH_ZEC12=y @@ -69,6 +67,7 @@ CONFIG_MODULES=y CONFIG_MODULE_FORCE_LOAD=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y +CONFIG_MODULE_UNLOAD_TAINT_TRACKING=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y CONFIG_MODULE_SIG_SHA256=y @@ -88,6 +87,9 @@ CONFIG_UNIXWARE_DISKLABEL=y CONFIG_IOSCHED_BFQ=y CONFIG_BFQ_GROUP_IOSCHED=y CONFIG_BINFMT_MISC=m +CONFIG_ZSWAP=y +CONFIG_ZSMALLOC_STAT=y +# CONFIG_COMPAT_BRK is not set CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y @@ -95,13 +97,11 @@ CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_CMA_SYSFS=y CONFIG_CMA_AREAS=7 CONFIG_MEM_SOFT_DIRTY=y -CONFIG_ZSWAP=y -CONFIG_ZSMALLOC=y -CONFIG_ZSMALLOC_STAT=y CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_IDLE_PAGE_TRACKING=y CONFIG_PERCPU_STATS=y CONFIG_ANON_VMA_NAME=y +CONFIG_USERFAULTFD=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_PACKET_DIAG=m @@ -159,6 +159,7 @@ CONFIG_BRIDGE_NETFILTER=m CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_PROCFS=y CONFIG_NF_CONNTRACK_EVENTS=y CONFIG_NF_CONNTRACK_TIMEOUT=y CONFIG_NF_CONNTRACK_TIMESTAMP=y @@ -484,7 +485,6 @@ CONFIG_NLMON=m # CONFIG_NET_VENDOR_ASIX is not set # CONFIG_NET_VENDOR_ATHEROS is not set # CONFIG_NET_VENDOR_BROADCOM is not set -# CONFIG_NET_VENDOR_BROCADE is not set # CONFIG_NET_VENDOR_CADENCE is not set # CONFIG_NET_VENDOR_CAVIUM is not set # CONFIG_NET_VENDOR_CHELSIO is not set @@ -500,7 +500,7 @@ CONFIG_NLMON=m # CONFIG_NET_VENDOR_GOOGLE is not set # CONFIG_NET_VENDOR_HUAWEI is not set # CONFIG_NET_VENDOR_INTEL is not set -# CONFIG_NET_VENDOR_MICROSOFT is not set +# CONFIG_NET_VENDOR_WANGXUN is not set # CONFIG_NET_VENDOR_LITEX is not set # CONFIG_NET_VENDOR_MARVELL is not set CONFIG_MLX4_EN=m @@ -509,16 +509,18 @@ CONFIG_MLX5_CORE_EN=y # CONFIG_NET_VENDOR_MICREL is not set # CONFIG_NET_VENDOR_MICROCHIP is not set # CONFIG_NET_VENDOR_MICROSEMI is not set +# CONFIG_NET_VENDOR_MICROSOFT is not set # CONFIG_NET_VENDOR_MYRI is not set +# CONFIG_NET_VENDOR_NI is not set # CONFIG_NET_VENDOR_NATSEMI is not set # CONFIG_NET_VENDOR_NETERION is not set # CONFIG_NET_VENDOR_NETRONOME is not set -# CONFIG_NET_VENDOR_NI is not set # CONFIG_NET_VENDOR_NVIDIA is not set # CONFIG_NET_VENDOR_OKI is not set # CONFIG_NET_VENDOR_PACKET_ENGINES is not set # CONFIG_NET_VENDOR_PENSANDO is not set # CONFIG_NET_VENDOR_QLOGIC is not set +# CONFIG_NET_VENDOR_BROCADE is not set # CONFIG_NET_VENDOR_QUALCOMM is not set # CONFIG_NET_VENDOR_RDC is not set # CONFIG_NET_VENDOR_REALTEK is not set @@ -526,9 +528,9 @@ CONFIG_MLX5_CORE_EN=y # CONFIG_NET_VENDOR_ROCKER is not set # CONFIG_NET_VENDOR_SAMSUNG is not set # CONFIG_NET_VENDOR_SEEQ is not set -# CONFIG_NET_VENDOR_SOLARFLARE is not set # CONFIG_NET_VENDOR_SILAN is not set # CONFIG_NET_VENDOR_SIS is not set +# CONFIG_NET_VENDOR_SOLARFLARE is not set # CONFIG_NET_VENDOR_SMSC is not set # CONFIG_NET_VENDOR_SOCIONEXT is not set # CONFIG_NET_VENDOR_STMICRO is not set @@ -561,6 +563,8 @@ CONFIG_VIRTIO_CONSOLE=m CONFIG_HW_RANDOM_VIRTIO=m CONFIG_HANGCHECK_TIMER=m CONFIG_TN3270_FS=y +# CONFIG_RANDOM_TRUST_CPU is not set +# CONFIG_RANDOM_TRUST_BOOTLOADER is not set # CONFIG_PTP_1588_CLOCK is not set # CONFIG_HWMON is not set CONFIG_WATCHDOG=y @@ -713,18 +717,26 @@ CONFIG_CRYPTO_OFB=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_ADIANTUM=m +CONFIG_CRYPTO_HCTR2=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_CRC32=m -CONFIG_CRYPTO_BLAKE2S=m +CONFIG_CRYPTO_CRC32_S390=y CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MD5=y CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m +CONFIG_CRYPTO_SHA512_S390=m +CONFIG_CRYPTO_SHA1_S390=m +CONFIG_CRYPTO_SHA256_S390=m CONFIG_CRYPTO_SHA3=m -CONFIG_CRYPTO_SM3=m +CONFIG_CRYPTO_SHA3_256_S390=m +CONFIG_CRYPTO_SHA3_512_S390=m +CONFIG_CRYPTO_SM3_GENERIC=m CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_AES_TI=m +CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_ARC4=m CONFIG_CRYPTO_BLOWFISH=m @@ -732,11 +744,14 @@ CONFIG_CRYPTO_CAMELLIA=m CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_DES=m +CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_KHAZAD=m +CONFIG_CRYPTO_CHACHA_S390=m CONFIG_CRYPTO_SEED=m +CONFIG_CRYPTO_ARIA=m CONFIG_CRYPTO_SERPENT=m -CONFIG_CRYPTO_SM4=m +CONFIG_CRYPTO_SM4_GENERIC=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_842=m @@ -752,16 +767,6 @@ CONFIG_CRYPTO_STATS=y CONFIG_ZCRYPT=m CONFIG_PKEY=m CONFIG_CRYPTO_PAES_S390=m -CONFIG_CRYPTO_SHA1_S390=m -CONFIG_CRYPTO_SHA256_S390=m -CONFIG_CRYPTO_SHA512_S390=m -CONFIG_CRYPTO_SHA3_256_S390=m -CONFIG_CRYPTO_SHA3_512_S390=m -CONFIG_CRYPTO_DES_S390=m -CONFIG_CRYPTO_AES_S390=m -CONFIG_CRYPTO_CHACHA_S390=m -CONFIG_CRYPTO_GHASH_S390=m -CONFIG_CRYPTO_CRC32_S390=y CONFIG_CRYPTO_DEV_VIRTIO=m CONFIG_CORDIC=m CONFIG_PRIME_NUMBERS=m diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index f4976f611b94..a5576b8d4081 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -1,4 +1,3 @@ -# CONFIG_SWAP is not set CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y CONFIG_BPF_SYSCALL=y @@ -9,7 +8,6 @@ CONFIG_BPF_SYSCALL=y # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y -# CONFIG_COMPAT_BRK is not set CONFIG_MARCH_ZEC12=y CONFIG_TUNE_ZEC12=y # CONFIG_COMPAT is not set @@ -28,6 +26,8 @@ CONFIG_CRASH_DUMP=y # CONFIG_BLOCK_LEGACY_AUTOLOAD is not set CONFIG_PARTITION_ADVANCED=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +# CONFIG_SWAP is not set +# CONFIG_COMPAT_BRK is not set # CONFIG_COMPACTION is not set # CONFIG_MIGRATION is not set CONFIG_NET=y @@ -53,10 +53,12 @@ CONFIG_ZFCP=y # CONFIG_HVC_IUCV is not set # CONFIG_HW_RANDOM_S390 is not set # CONFIG_HMC_DRV is not set +# CONFIG_S390_UV_UAPI is not set # CONFIG_S390_TAPE is not set # CONFIG_VMCP is not set # CONFIG_MONWRITER is not set # CONFIG_S390_VMUR is not set +# CONFIG_RANDOM_TRUST_BOOTLOADER is not set # CONFIG_HID is not set # CONFIG_VIRTIO_MENU is not set # CONFIG_VHOST_MENU is not set diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c index f0bc4dc3e9bf..6511d15ace45 100644 --- a/arch/s390/hypfs/hypfs_diag.c +++ b/arch/s390/hypfs/hypfs_diag.c @@ -437,7 +437,7 @@ __init int hypfs_diag_init(void) int rc; if (diag204_probe()) { - pr_err("The hardware system does not support hypfs\n"); + pr_info("The hardware system does not support hypfs\n"); return -ENODATA; } diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 5c97f48cea91..ee919bfc8186 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -496,9 +496,9 @@ fail_hypfs_sprp_exit: hypfs_vm_exit(); fail_hypfs_diag_exit: hypfs_diag_exit(); + pr_err("Initialization of hypfs failed with rc=%i\n", rc); fail_dbfs_exit: hypfs_dbfs_exit(); - pr_err("Initialization of hypfs failed with rc=%i\n", rc); return rc; } device_initcall(hypfs_init) diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index 9a7d15da966e..2de74fcd0578 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -176,14 +176,8 @@ arch___test_and_change_bit(unsigned long nr, volatile unsigned long *addr) return old & mask; } -static __always_inline bool -arch_test_bit(unsigned long nr, const volatile unsigned long *addr) -{ - const volatile unsigned long *p = __bitops_word(nr, addr); - unsigned long mask = __bitops_mask(nr); - - return *p & mask; -} +#define arch_test_bit generic_test_bit +#define arch_test_bit_acquire generic_test_bit_acquire static inline bool arch_test_and_set_bit_lock(unsigned long nr, volatile unsigned long *ptr) diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index f22beda9e6d5..ccdbccfde148 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -28,9 +28,11 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, static inline int prepare_hugepage_range(struct file *file, unsigned long addr, unsigned long len) { - if (len & ~HPAGE_MASK) + struct hstate *h = hstate_file(file); + + if (len & ~huge_page_mask(h)) return -EINVAL; - if (addr & ~HPAGE_MASK) + if (addr & ~huge_page_mask(h)) return -EINVAL; return 0; } diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index f39092e0ceaa..b1e98a9ed152 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -1038,16 +1038,11 @@ static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} #define __KVM_HAVE_ARCH_VM_FREE void kvm_arch_free_vm(struct kvm *kvm); -#ifdef CONFIG_VFIO_PCI_ZDEV_KVM -int kvm_s390_pci_register_kvm(struct zpci_dev *zdev, struct kvm *kvm); -void kvm_s390_pci_unregister_kvm(struct zpci_dev *zdev); -#else -static inline int kvm_s390_pci_register_kvm(struct zpci_dev *dev, - struct kvm *kvm) -{ - return -EPERM; -} -static inline void kvm_s390_pci_unregister_kvm(struct zpci_dev *dev) {} -#endif +struct zpci_kvm_hook { + int (*kvm_register)(void *opaque, struct kvm *kvm); + void (*kvm_unregister)(void *opaque); +}; + +extern struct zpci_kvm_hook zpci_kvm_hook; #endif diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 89949b9f3cf8..d5119e039d85 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -91,6 +91,18 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) memcpy(dst, src, arch_task_struct_size); dst->thread.fpu.regs = dst->thread.fpu.fprs; + + /* + * Don't transfer over the runtime instrumentation or the guarded + * storage control block pointers. These fields are cleared here instead + * of in copy_thread() to avoid premature freeing of associated memory + * on fork() failure. Wait to clear the RI flag because ->stack still + * refers to the source thread. + */ + dst->thread.ri_cb = NULL; + dst->thread.gs_cb = NULL; + dst->thread.gs_bc_cb = NULL; + return 0; } @@ -150,13 +162,11 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) frame->childregs.flags = 0; if (new_stackp) frame->childregs.gprs[15] = new_stackp; - - /* Don't copy runtime instrumentation info */ - p->thread.ri_cb = NULL; + /* + * Clear the runtime instrumentation flag after the above childregs + * copy. The CB pointer was already cleared in arch_dup_task_struct(). + */ frame->childregs.psw.mask &= ~PSW_MASK_RI; - /* Don't copy guarded storage control block */ - p->thread.gs_cb = NULL; - p->thread.gs_bc_cb = NULL; /* Set a new TLS ? */ if (clone_flags & CLONE_SETTLS) { diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 2e526f11b91e..5ea3830af0cc 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -131,6 +131,7 @@ SECTIONS /* * Table with the patch locations to undo expolines */ + . = ALIGN(4); .nospec_call_table : { __nospec_call_start = . ; *(.s390_indirect*) diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c index 4946fb7757d6..bb8c335d17b9 100644 --- a/arch/s390/kvm/pci.c +++ b/arch/s390/kvm/pci.c @@ -431,8 +431,9 @@ static void kvm_s390_pci_dev_release(struct zpci_dev *zdev) * available, enable them and let userspace indicate whether or not they will * be used (specify SHM bit to disable). */ -int kvm_s390_pci_register_kvm(struct zpci_dev *zdev, struct kvm *kvm) +static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm) { + struct zpci_dev *zdev = opaque; int rc; if (!zdev) @@ -510,10 +511,10 @@ err: kvm_put_kvm(kvm); return rc; } -EXPORT_SYMBOL_GPL(kvm_s390_pci_register_kvm); -void kvm_s390_pci_unregister_kvm(struct zpci_dev *zdev) +static void kvm_s390_pci_unregister_kvm(void *opaque) { + struct zpci_dev *zdev = opaque; struct kvm *kvm; if (!zdev) @@ -566,7 +567,6 @@ out: kvm_put_kvm(kvm); } -EXPORT_SYMBOL_GPL(kvm_s390_pci_unregister_kvm); void kvm_s390_pci_init_list(struct kvm *kvm) { @@ -678,6 +678,8 @@ int kvm_s390_pci_init(void) spin_lock_init(&aift->gait_lock); mutex_init(&aift->aift_lock); + zpci_kvm_hook.kvm_register = kvm_s390_pci_register_kvm; + zpci_kvm_hook.kvm_unregister = kvm_s390_pci_unregister_kvm; return 0; } @@ -685,6 +687,8 @@ int kvm_s390_pci_init(void) void kvm_s390_pci_exit(void) { mutex_destroy(&aift->aift_lock); + zpci_kvm_hook.kvm_register = NULL; + zpci_kvm_hook.kvm_unregister = NULL; kfree(aift); } diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 13449941516c..9ab6ca6f7f59 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -379,7 +379,9 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access) flags = FAULT_FLAG_DEFAULT; if (user_mode(regs)) flags |= FAULT_FLAG_USER; - if (access == VM_WRITE || is_write) + if (is_write) + access = VM_WRITE; + if (access == VM_WRITE) flags |= FAULT_FLAG_WRITE; mmap_read_lock(mm); @@ -419,8 +421,6 @@ retry: if (unlikely(!(vma->vm_flags & access))) goto out_up; - if (is_vm_hugetlb_page(vma)) - address &= HPAGE_MASK; /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile index bf557a1b789c..5ae31ca9dd44 100644 --- a/arch/s390/pci/Makefile +++ b/arch/s390/pci/Makefile @@ -5,5 +5,5 @@ obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_dma.o pci_clp.o pci_sysfs.o \ pci_event.o pci_debug.o pci_insn.o pci_mmio.o \ - pci_bus.o + pci_bus.o pci_kvm_hook.o obj-$(CONFIG_PCI_IOV) += pci_iov.o diff --git a/arch/s390/pci/pci_kvm_hook.c b/arch/s390/pci/pci_kvm_hook.c new file mode 100644 index 000000000000..ff34baf50a3e --- /dev/null +++ b/arch/s390/pci/pci_kvm_hook.c @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * VFIO ZPCI devices support + * + * Copyright (C) IBM Corp. 2022. All rights reserved. + * Author(s): Pierre Morel <pmorel@linux.ibm.com> + */ +#include <linux/kvm_host.h> + +struct zpci_kvm_hook zpci_kvm_hook; +EXPORT_SYMBOL_GPL(zpci_kvm_hook); diff --git a/arch/sh/include/asm/bitops-op32.h b/arch/sh/include/asm/bitops-op32.h index 565a85d8b7fb..5ace89b46507 100644 --- a/arch/sh/include/asm/bitops-op32.h +++ b/arch/sh/include/asm/bitops-op32.h @@ -135,16 +135,8 @@ arch___test_and_change_bit(unsigned long nr, volatile unsigned long *addr) return (old & mask) != 0; } -/** - * arch_test_bit - Determine whether a bit is set - * @nr: bit number to test - * @addr: Address to start counting from - */ -static __always_inline bool -arch_test_bit(unsigned long nr, const volatile unsigned long *addr) -{ - return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); -} +#define arch_test_bit generic_test_bit +#define arch_test_bit_acquire generic_test_bit_acquire #include <asm-generic/bitops/non-instrumented-non-atomic.h> diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c index 79e38afd4b91..e719af8bdf56 100644 --- a/arch/um/drivers/virtio_uml.c +++ b/arch/um/drivers/virtio_uml.c @@ -1011,7 +1011,7 @@ error_kzalloc: static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[], u32 sizes[], const bool *ctx, + const char * const names[], const bool *ctx, struct irq_affinity *desc) { struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); diff --git a/arch/um/include/asm/cpufeature.h b/arch/um/include/asm/cpufeature.h index 19cd7ed6ec3c..4b6d1b526bc1 100644 --- a/arch/um/include/asm/cpufeature.h +++ b/arch/um/include/asm/cpufeature.h @@ -65,20 +65,6 @@ extern void setup_clear_cpu_cap(unsigned int bit); #define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit) -#if defined(__clang__) && !defined(CONFIG_CC_HAS_ASM_GOTO) - -/* - * Workaround for the sake of BPF compilation which utilizes kernel - * headers, but clang does not support ASM GOTO and fails the build. - */ -#ifndef __BPF_TRACING__ -#warning "Compiler lacks ASM_GOTO support. Add -D __BPF_TRACING__ to your compiler arguments" -#endif - -#define static_cpu_has(bit) boot_cpu_has(bit) - -#else - /* * Static testing of CPU features. Used the same as boot_cpu_has(). It * statically patches the target code for additional performance. Use @@ -137,7 +123,6 @@ t_no: boot_cpu_has(bit) : \ _static_cpu_has(bit) \ ) -#endif #define cpu_has_bug(c, bit) cpu_has(c, (bit)) #define set_cpu_bug(c, bit) set_cpu_cap(c, (bit)) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 7854685c5f25..bafbd905e6e7 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -286,10 +286,6 @@ vdso_install: archprepare: checkbin checkbin: -ifndef CONFIG_CC_HAS_ASM_GOTO - @echo Compiler lacks asm-goto support. - @exit 1 -endif ifdef CONFIG_RETPOLINE ifeq ($(RETPOLINE_CFLAGS),) @echo "You are building kernel with non-retpoline compiler." >&2 diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 4910bf230d7b..62208ec04ca4 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -132,7 +132,17 @@ void snp_set_page_private(unsigned long paddr); void snp_set_page_shared(unsigned long paddr); void sev_prep_identity_maps(unsigned long top_level_pgt); #else -static inline void sev_enable(struct boot_params *bp) { } +static inline void sev_enable(struct boot_params *bp) +{ + /* + * bp->cc_blob_address should only be set by boot/compressed kernel. + * Initialize it to 0 unconditionally (thus here in this stub too) to + * ensure that uninitialized values from buggy bootloaders aren't + * propagated. + */ + if (bp) + bp->cc_blob_address = 0; +} static inline void sev_es_shutdown_ghcb(void) { } static inline bool sev_es_check_ghcb_fault(unsigned long address) { diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index 52f989f6acc2..c93930d5ccbd 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -277,6 +277,14 @@ void sev_enable(struct boot_params *bp) bool snp; /* + * bp->cc_blob_address should only be set by boot/compressed kernel. + * Initialize it to 0 to ensure that uninitialized values from + * buggy bootloaders aren't propagated. + */ + if (bp) + bp->cc_blob_address = 0; + + /* * Setup/preliminary detection of SNP. This will be sanity-checked * against CPUID/MSR values later. */ diff --git a/arch/x86/configs/xen.config b/arch/x86/configs/xen.config index d9fc7139fd46..581296255b39 100644 --- a/arch/x86/configs/xen.config +++ b/arch/x86/configs/xen.config @@ -14,7 +14,6 @@ CONFIG_CPU_FREQ=y # x86 xen specific config options CONFIG_XEN_PVH=y -CONFIG_XEN_MAX_DOMAIN_MEMORY=500 CONFIG_XEN_SAVE_RESTORE=y # CONFIG_XEN_DEBUG_FS is not set CONFIG_XEN_MCE_LOG=y diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 682338e7e2a3..4dd19819053a 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -311,7 +311,7 @@ SYM_CODE_START(entry_INT80_compat) * Interrupts are off on entry. */ ASM_CLAC /* Do this early to minimize exposure */ - SWAPGS + ALTERNATIVE "swapgs", "", X86_FEATURE_XENPV /* * User tracing code (ptrace or signal handlers) might assume that diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 2db93498ff71..c601939a74b1 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4052,8 +4052,9 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data) /* Disable guest PEBS if host PEBS is enabled. */ arr[pebs_enable].guest = 0; } else { - /* Disable guest PEBS for cross-mapped PEBS counters. */ + /* Disable guest PEBS thoroughly for cross-mapped PEBS counters. */ arr[pebs_enable].guest &= ~kvm_pmu->host_cross_mapped_mask; + arr[global_ctrl].guest &= ~kvm_pmu->host_cross_mapped_mask; /* Set hw GLOBAL_CTRL bits for PEBS counter when it runs for guest */ arr[global_ctrl].guest |= arr[pebs_enable].guest; } @@ -6291,10 +6292,8 @@ __init int intel_pmu_init(void) x86_pmu.pebs_aliases = NULL; x86_pmu.pebs_prec_dist = true; x86_pmu.pebs_block = true; - x86_pmu.pebs_capable = ~0ULL; x86_pmu.flags |= PMU_FL_HAS_RSP_1; x86_pmu.flags |= PMU_FL_NO_HT_SHARING; - x86_pmu.flags |= PMU_FL_PEBS_ALL; x86_pmu.flags |= PMU_FL_INSTR_LATENCY; x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX; @@ -6337,10 +6336,8 @@ __init int intel_pmu_init(void) x86_pmu.pebs_aliases = NULL; x86_pmu.pebs_prec_dist = true; x86_pmu.pebs_block = true; - x86_pmu.pebs_capable = ~0ULL; x86_pmu.flags |= PMU_FL_HAS_RSP_1; x86_pmu.flags |= PMU_FL_NO_HT_SHARING; - x86_pmu.flags |= PMU_FL_PEBS_ALL; x86_pmu.flags |= PMU_FL_INSTR_LATENCY; x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX; x86_pmu.lbr_pt_coexist = true; diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index ba60427caa6d..de1f55d51784 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -291,6 +291,7 @@ static u64 load_latency_data(struct perf_event *event, u64 status) static u64 store_latency_data(struct perf_event *event, u64 status) { union intel_x86_pebs_dse dse; + union perf_mem_data_src src; u64 val; dse.val = status; @@ -304,7 +305,14 @@ static u64 store_latency_data(struct perf_event *event, u64 status) val |= P(BLK, NA); - return val; + /* + * the pebs_data_source table is only for loads + * so override the mem_op to say STORE instead + */ + src.val = val; + src.mem_op = P(OP,STORE); + + return src.val; } struct pebs_record_core { @@ -822,7 +830,7 @@ struct event_constraint intel_glm_pebs_event_constraints[] = { struct event_constraint intel_grt_pebs_event_constraints[] = { /* Allow all events as PEBS with no flags */ - INTEL_HYBRID_LAT_CONSTRAINT(0x5d0, 0xf), + INTEL_HYBRID_LAT_CONSTRAINT(0x5d0, 0x3), INTEL_HYBRID_LAT_CONSTRAINT(0x6d0, 0xf), EVENT_CONSTRAINT_END }; @@ -2262,6 +2270,7 @@ void __init intel_ds_init(void) PERF_SAMPLE_BRANCH_STACK | PERF_SAMPLE_TIME; x86_pmu.flags |= PMU_FL_PEBS_ALL; + x86_pmu.pebs_capable = ~0ULL; pebs_qual = "-baseline"; x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_EXTENDED_REGS; } else { diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 4f70fb6c2c1e..47fca6a7a8bc 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -1097,6 +1097,14 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) if (static_cpu_has(X86_FEATURE_ARCH_LBR)) { reg->config = mask; + + /* + * The Arch LBR HW can retrieve the common branch types + * from the LBR_INFO. It doesn't require the high overhead + * SW disassemble. + * Enable the branch type by default for the Arch LBR. + */ + reg->reg |= X86_BR_TYPE_SAVE; return 0; } diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index ce440011cc4e..1ef4f7861e2e 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -841,6 +841,22 @@ int snb_pci2phy_map_init(int devid) return 0; } +static u64 snb_uncore_imc_read_counter(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + /* + * SNB IMC counters are 32-bit and are laid out back to back + * in MMIO space. Therefore we must use a 32-bit accessor function + * using readq() from uncore_mmio_read_counter() causes problems + * because it is reading 64-bit at a time. This is okay for the + * uncore_perf_event_update() function because it drops the upper + * 32-bits but not okay for plain uncore_read_counter() as invoked + * in uncore_pmu_event_start(). + */ + return (u64)readl(box->io_addr + hwc->event_base); +} + static struct pmu snb_uncore_imc_pmu = { .task_ctx_nr = perf_invalid_context, .event_init = snb_uncore_imc_event_init, @@ -860,7 +876,7 @@ static struct intel_uncore_ops snb_uncore_imc_ops = { .disable_event = snb_uncore_imc_disable_event, .enable_event = snb_uncore_imc_enable_event, .hw_config = snb_uncore_imc_hw_config, - .read_counter = uncore_mmio_read_counter, + .read_counter = snb_uncore_imc_read_counter, }; static struct intel_uncore_type snb_uncore_imc = { diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 973c6bd17f98..0fe9de58af31 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -207,6 +207,20 @@ static __always_inline bool constant_test_bit(long nr, const volatile unsigned l (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; } +static __always_inline bool constant_test_bit_acquire(long nr, const volatile unsigned long *addr) +{ + bool oldbit; + + asm volatile("testb %2,%1" + CC_SET(nz) + : CC_OUT(nz) (oldbit) + : "m" (((unsigned char *)addr)[nr >> 3]), + "i" (1 << (nr & 7)) + :"memory"); + + return oldbit; +} + static __always_inline bool variable_test_bit(long nr, volatile const unsigned long *addr) { bool oldbit; @@ -226,6 +240,13 @@ arch_test_bit(unsigned long nr, const volatile unsigned long *addr) variable_test_bit(nr, addr); } +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + return __builtin_constant_p(nr) ? constant_test_bit_acquire(nr, addr) : + variable_test_bit(nr, addr); +} + /** * __ffs - find first set bit in word * @word: The word to search diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index ea34cc31b047..1a85e1fb0922 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -155,20 +155,6 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit); #define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit) -#if defined(__clang__) && !defined(CONFIG_CC_HAS_ASM_GOTO) - -/* - * Workaround for the sake of BPF compilation which utilizes kernel - * headers, but clang does not support ASM GOTO and fails the build. - */ -#ifndef __BPF_TRACING__ -#warning "Compiler lacks ASM_GOTO support. Add -D __BPF_TRACING__ to your compiler arguments" -#endif - -#define static_cpu_has(bit) boot_cpu_has(bit) - -#else - /* * Static testing of CPU features. Used the same as boot_cpu_has(). It * statically patches the target code for additional performance. Use @@ -208,7 +194,6 @@ t_no: boot_cpu_has(bit) : \ _static_cpu_has(bit) \ ) -#endif #define cpu_has_bug(c, bit) cpu_has(c, (bit)) #define set_cpu_bug(c, bit) set_cpu_cap(c, (bit)) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 235dc85c91c3..ef4775c6db01 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -457,7 +457,8 @@ #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ #define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ #define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */ -#define X86_BUG_RETBLEED X86_BUG(26) /* CPU is affected by RETBleed */ -#define X86_BUG_EIBRS_PBRSB X86_BUG(27) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ +#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* CPU is too old and its MMIO Stale Data status is unknown */ +#define X86_BUG_RETBLEED X86_BUG(27) /* CPU is affected by RETBleed */ +#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/extable_fixup_types.h b/arch/x86/include/asm/extable_fixup_types.h index 503622627400..991e31cfde94 100644 --- a/arch/x86/include/asm/extable_fixup_types.h +++ b/arch/x86/include/asm/extable_fixup_types.h @@ -64,4 +64,6 @@ #define EX_TYPE_UCOPY_LEN4 (EX_TYPE_UCOPY_LEN | EX_DATA_IMM(4)) #define EX_TYPE_UCOPY_LEN8 (EX_TYPE_UCOPY_LEN | EX_DATA_IMM(8)) +#define EX_TYPE_ZEROPAD 20 /* longword load with zeropad on fault */ + #endif diff --git a/arch/x86/include/asm/ibt.h b/arch/x86/include/asm/ibt.h index 689880eca9ba..9b08082a5d9f 100644 --- a/arch/x86/include/asm/ibt.h +++ b/arch/x86/include/asm/ibt.h @@ -31,6 +31,16 @@ #define __noendbr __attribute__((nocf_check)) +/* + * Create a dummy function pointer reference to prevent objtool from marking + * the function as needing to be "sealed" (i.e. ENDBR converted to NOP by + * apply_ibt_endbr()). + */ +#define IBT_NOSEAL(fname) \ + ".pushsection .discard.ibt_endbr_noseal\n\t" \ + _ASM_PTR fname "\n\t" \ + ".popsection\n\t" + static inline __attribute_const__ u32 gen_endbr(void) { u32 endbr; @@ -84,6 +94,7 @@ extern __noendbr void ibt_restore(u64 save); #ifndef __ASSEMBLY__ #define ASM_ENDBR +#define IBT_NOSEAL(name) #define __noendbr diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index def6ca121111..aeb38023a703 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -27,6 +27,7 @@ * _X - regular server parts * _D - micro server parts * _N,_P - other mobile parts + * _S - other client parts * * Historical OPTDIFFs: * @@ -112,6 +113,7 @@ #define INTEL_FAM6_RAPTORLAKE 0xB7 #define INTEL_FAM6_RAPTORLAKE_P 0xBA +#define INTEL_FAM6_RAPTORLAKE_S 0xBF /* "Small Core" Processors (Atom) */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 5ffa578cafe1..2c96c43c313a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -53,7 +53,7 @@ #define KVM_MAX_VCPU_IDS (KVM_MAX_VCPUS * KVM_VCPU_ID_RATIO) /* memory slots that are not exposed to userspace */ -#define KVM_PRIVATE_MEM_SLOTS 3 +#define KVM_INTERNAL_MEM_SLOTS 3 #define KVM_HALT_POLL_NS_DEFAULT 200000 diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index e64fd20778b6..c936ce9f0c47 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -35,33 +35,56 @@ #define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ /* + * Common helper for __FILL_RETURN_BUFFER and __FILL_ONE_RETURN. + */ +#define __FILL_RETURN_SLOT \ + ANNOTATE_INTRA_FUNCTION_CALL; \ + call 772f; \ + int3; \ +772: + +/* + * Stuff the entire RSB. + * * Google experimented with loop-unrolling and this turned out to be * the optimal version - two calls, each with their own speculation * trap should their return address end up getting used, in a loop. */ -#define __FILL_RETURN_BUFFER(reg, nr, sp) \ - mov $(nr/2), reg; \ -771: \ - ANNOTATE_INTRA_FUNCTION_CALL; \ - call 772f; \ -773: /* speculation trap */ \ - UNWIND_HINT_EMPTY; \ - pause; \ - lfence; \ - jmp 773b; \ -772: \ - ANNOTATE_INTRA_FUNCTION_CALL; \ - call 774f; \ -775: /* speculation trap */ \ - UNWIND_HINT_EMPTY; \ - pause; \ - lfence; \ - jmp 775b; \ -774: \ - add $(BITS_PER_LONG/8) * 2, sp; \ - dec reg; \ - jnz 771b; \ - /* barrier for jnz misprediction */ \ +#ifdef CONFIG_X86_64 +#define __FILL_RETURN_BUFFER(reg, nr) \ + mov $(nr/2), reg; \ +771: \ + __FILL_RETURN_SLOT \ + __FILL_RETURN_SLOT \ + add $(BITS_PER_LONG/8) * 2, %_ASM_SP; \ + dec reg; \ + jnz 771b; \ + /* barrier for jnz misprediction */ \ + lfence; +#else +/* + * i386 doesn't unconditionally have LFENCE, as such it can't + * do a loop. + */ +#define __FILL_RETURN_BUFFER(reg, nr) \ + .rept nr; \ + __FILL_RETURN_SLOT; \ + .endr; \ + add $(BITS_PER_LONG/8) * nr, %_ASM_SP; +#endif + +/* + * Stuff a single RSB slot. + * + * To mitigate Post-Barrier RSB speculation, one CALL instruction must be + * forced to retire before letting a RET instruction execute. + * + * On PBRSB-vulnerable CPUs, it is not safe for a RET to be executed + * before this point. + */ +#define __FILL_ONE_RETURN \ + __FILL_RETURN_SLOT \ + add $(BITS_PER_LONG/8), %_ASM_SP; \ lfence; #ifdef __ASSEMBLY__ @@ -132,28 +155,15 @@ #endif .endm -.macro ISSUE_UNBALANCED_RET_GUARD - ANNOTATE_INTRA_FUNCTION_CALL - call .Lunbalanced_ret_guard_\@ - int3 -.Lunbalanced_ret_guard_\@: - add $(BITS_PER_LONG/8), %_ASM_SP - lfence -.endm - /* * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP * monstrosity above, manually. */ -.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2 -.ifb \ftr2 - ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr -.else - ALTERNATIVE_2 "jmp .Lskip_rsb_\@", "", \ftr, "jmp .Lunbalanced_\@", \ftr2 -.endif - __FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP) -.Lunbalanced_\@: - ISSUE_UNBALANCED_RET_GUARD +.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2=ALT_NOT(X86_FEATURE_ALWAYS) + ALTERNATIVE_2 "jmp .Lskip_rsb_\@", \ + __stringify(__FILL_RETURN_BUFFER(\reg,\nr)), \ftr, \ + __stringify(__FILL_ONE_RETURN), \ftr2 + .Lskip_rsb_\@: .endm diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h index 8a9eba191516..7fa611216417 100644 --- a/arch/x86/include/asm/rmwcc.h +++ b/arch/x86/include/asm/rmwcc.h @@ -11,7 +11,7 @@ #define __CLOBBERS_MEM(clb...) "memory", ## clb -#if !defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(CONFIG_CC_HAS_ASM_GOTO) +#ifndef __GCC_ASM_FLAG_OUTPUTS__ /* Use asm goto */ @@ -27,7 +27,7 @@ cc_label: c = true; \ c; \ }) -#else /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CONFIG_CC_HAS_ASM_GOTO) */ +#else /* defined(__GCC_ASM_FLAG_OUTPUTS__) */ /* Use flags output or a set instruction */ @@ -40,7 +40,7 @@ cc_label: c = true; \ c; \ }) -#endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CONFIG_CC_HAS_ASM_GOTO) */ +#endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) */ #define GEN_UNARY_RMWcc_4(op, var, cc, arg0) \ __GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM()) diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 4a23e52fe0ee..ebc271bb6d8e 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -195,7 +195,7 @@ void snp_set_memory_shared(unsigned long vaddr, unsigned int npages); void snp_set_memory_private(unsigned long vaddr, unsigned int npages); void snp_set_wakeup_secondary_cpu(void); bool snp_init(struct boot_params *bp); -void snp_abort(void); +void __init __noreturn snp_abort(void); int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned long *fw_err); #else static inline void sev_es_ist_enter(struct pt_regs *regs) { } diff --git a/arch/x86/include/asm/word-at-a-time.h b/arch/x86/include/asm/word-at-a-time.h index 8338b0432b50..46b4f1f7f354 100644 --- a/arch/x86/include/asm/word-at-a-time.h +++ b/arch/x86/include/asm/word-at-a-time.h @@ -77,58 +77,18 @@ static inline unsigned long find_zero(unsigned long mask) * and the next page not being mapped, take the exception and * return zeroes in the non-existing part. */ -#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT - static inline unsigned long load_unaligned_zeropad(const void *addr) { - unsigned long offset, data; unsigned long ret; - asm_volatile_goto( + asm volatile( "1: mov %[mem], %[ret]\n" - - _ASM_EXTABLE(1b, %l[do_exception]) - - : [ret] "=r" (ret) - : [mem] "m" (*(unsigned long *)addr) - : : do_exception); - - return ret; - -do_exception: - offset = (unsigned long)addr & (sizeof(long) - 1); - addr = (void *)((unsigned long)addr & ~(sizeof(long) - 1)); - data = *(unsigned long *)addr; - ret = data >> offset * 8; - - return ret; -} - -#else /* !CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ - -static inline unsigned long load_unaligned_zeropad(const void *addr) -{ - unsigned long offset, data; - unsigned long ret, err = 0; - - asm( "1: mov %[mem], %[ret]\n" "2:\n" - - _ASM_EXTABLE_FAULT(1b, 2b) - - : [ret] "=&r" (ret), "+a" (err) + _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_ZEROPAD) + : [ret] "=r" (ret) : [mem] "m" (*(unsigned long *)addr)); - if (unlikely(err)) { - offset = (unsigned long)addr & (sizeof(long) - 1); - addr = (void *)((unsigned long)addr & ~(sizeof(long) - 1)); - data = *(unsigned long *)addr; - ret = data >> offset * 8; - } - return ret; } -#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ - #endif /* _ASM_WORD_AT_A_TIME_H */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 510d85261132..da7c361f47e0 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -433,7 +433,8 @@ static void __init mmio_select_mitigation(void) u64 ia32_cap; if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) || - cpu_mitigations_off()) { + boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN) || + cpu_mitigations_off()) { mmio_mitigation = MMIO_MITIGATION_OFF; return; } @@ -538,6 +539,8 @@ out: pr_info("TAA: %s\n", taa_strings[taa_mitigation]); if (boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) pr_info("MMIO Stale Data: %s\n", mmio_strings[mmio_mitigation]); + else if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN)) + pr_info("MMIO Stale Data: Unknown: No mitigations\n"); } static void __init md_clear_select_mitigation(void) @@ -2275,6 +2278,9 @@ static ssize_t tsx_async_abort_show_state(char *buf) static ssize_t mmio_stale_data_show_state(char *buf) { + if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN)) + return sysfs_emit(buf, "Unknown: No mitigations\n"); + if (mmio_mitigation == MMIO_MITIGATION_OFF) return sysfs_emit(buf, "%s\n", mmio_strings[mmio_mitigation]); @@ -2421,6 +2427,7 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr return srbds_show_state(buf); case X86_BUG_MMIO_STALE_DATA: + case X86_BUG_MMIO_UNKNOWN: return mmio_stale_data_show_state(buf); case X86_BUG_RETBLEED: @@ -2480,7 +2487,10 @@ ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char * ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *attr, char *buf) { - return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA); + if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN)) + return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_UNKNOWN); + else + return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA); } ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, char *buf) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 64a73f415f03..3e508f239098 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1135,7 +1135,8 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) #define NO_SWAPGS BIT(6) #define NO_ITLB_MULTIHIT BIT(7) #define NO_SPECTRE_V2 BIT(8) -#define NO_EIBRS_PBRSB BIT(9) +#define NO_MMIO BIT(9) +#define NO_EIBRS_PBRSB BIT(10) #define VULNWL(vendor, family, model, whitelist) \ X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist) @@ -1158,6 +1159,11 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { VULNWL(VORTEX, 6, X86_MODEL_ANY, NO_SPECULATION), /* Intel Family 6 */ + VULNWL_INTEL(TIGERLAKE, NO_MMIO), + VULNWL_INTEL(TIGERLAKE_L, NO_MMIO), + VULNWL_INTEL(ALDERLAKE, NO_MMIO), + VULNWL_INTEL(ALDERLAKE_L, NO_MMIO), + VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION | NO_ITLB_MULTIHIT), VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION | NO_ITLB_MULTIHIT), VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT), @@ -1176,9 +1182,9 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), VULNWL_INTEL(ATOM_AIRMONT_NP, NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB), + VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB), /* * Technically, swapgs isn't serializing on AMD (despite it previously @@ -1193,18 +1199,18 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB), /* AMD Family 0xf - 0x12 */ - VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ - VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), /* Zhaoxin Family 7 */ - VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS), - VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS), + VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO), + VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO), {} }; @@ -1358,10 +1364,16 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) * Affected CPU list is generally enough to enumerate the vulnerability, * but for virtualization case check for ARCH_CAP MSR bits also, VMM may * not want the guest to enumerate the bug. + * + * Set X86_BUG_MMIO_UNKNOWN for CPUs that are neither in the blacklist, + * nor in the whitelist and also don't enumerate MSR ARCH_CAP MMIO bits. */ - if (cpu_matches(cpu_vuln_blacklist, MMIO) && - !arch_cap_mmio_immune(ia32_cap)) - setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); + if (!arch_cap_mmio_immune(ia32_cap)) { + if (cpu_matches(cpu_vuln_blacklist, MMIO)) + setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); + else if (!cpu_matches(cpu_vuln_whitelist, NO_MMIO)) + setup_force_cpu_bug(X86_BUG_MMIO_UNKNOWN); + } if (!cpu_has(c, X86_FEATURE_BTC_NO)) { if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA)) diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 74167dc5f55e..4c3c27b6aea3 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -505,7 +505,7 @@ static void kprobe_emulate_jcc(struct kprobe *p, struct pt_regs *regs) match = ((regs->flags & X86_EFLAGS_SF) >> X86_EFLAGS_SF_BIT) ^ ((regs->flags & X86_EFLAGS_OF) >> X86_EFLAGS_OF_BIT); if (p->ainsn.jcc.type >= 0xe) - match = match && (regs->flags & X86_EFLAGS_ZF); + match = match || (regs->flags & X86_EFLAGS_ZF); } __kprobe_emulate_jmp(p, regs, (match && !invert) || (!match && invert)); } diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 63dc626627a0..a428c62330d3 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -701,7 +701,13 @@ e_term: void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned int npages) { - if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) + /* + * This can be invoked in early boot while running identity mapped, so + * use an open coded check for SNP instead of using cc_platform_has(). + * This eliminates worries about jump tables or checking boot_cpu_data + * in the cc_platform_has() function. + */ + if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) return; /* @@ -717,7 +723,13 @@ void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long padd void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned int npages) { - if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) + /* + * This can be invoked in early boot while running identity mapped, so + * use an open coded check for SNP instead of using cc_platform_has(). + * This eliminates worries about jump tables or checking boot_cpu_data + * in the cc_platform_has() function. + */ + if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) return; /* Invalidate the memory pages before they are marked shared in the RMP table. */ @@ -2100,7 +2112,7 @@ bool __init snp_init(struct boot_params *bp) return true; } -void __init snp_abort(void) +void __init __noreturn snp_abort(void) { sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); } diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 38185aedf7d1..0ea57da92940 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -93,22 +93,27 @@ static struct orc_entry *orc_find(unsigned long ip); static struct orc_entry *orc_ftrace_find(unsigned long ip) { struct ftrace_ops *ops; - unsigned long caller; + unsigned long tramp_addr, offset; ops = ftrace_ops_trampoline(ip); if (!ops) return NULL; + /* Set tramp_addr to the start of the code copied by the trampoline */ if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) - caller = (unsigned long)ftrace_regs_call; + tramp_addr = (unsigned long)ftrace_regs_caller; else - caller = (unsigned long)ftrace_call; + tramp_addr = (unsigned long)ftrace_caller; + + /* Now place tramp_addr to the location within the trampoline ip is at */ + offset = ip - ops->trampoline; + tramp_addr += offset; /* Prevent unlikely recursion */ - if (ip == caller) + if (ip == tramp_addr) return NULL; - return orc_find(caller); + return orc_find(tramp_addr); } #else static struct orc_entry *orc_ftrace_find(unsigned long ip) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index b4eeb7c75dfa..d5ec3a2ed5a4 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -326,7 +326,8 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); ".align " __stringify(FASTOP_SIZE) " \n\t" \ ".type " name ", @function \n\t" \ name ":\n\t" \ - ASM_ENDBR + ASM_ENDBR \ + IBT_NOSEAL(name) #define FOP_FUNC(name) \ __FOP_FUNC(#name) @@ -446,27 +447,12 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); FOP_END /* Special case for SETcc - 1 instruction per cc */ - -/* - * Depending on .config the SETcc functions look like: - * - * ENDBR [4 bytes; CONFIG_X86_KERNEL_IBT] - * SETcc %al [3 bytes] - * RET | JMP __x86_return_thunk [1,5 bytes; CONFIG_RETHUNK] - * INT3 [1 byte; CONFIG_SLS] - */ -#define SETCC_ALIGN 16 - #define FOP_SETCC(op) \ - ".align " __stringify(SETCC_ALIGN) " \n\t" \ - ".type " #op ", @function \n\t" \ - #op ": \n\t" \ - ASM_ENDBR \ + FOP_FUNC(op) \ #op " %al \n\t" \ - __FOP_RET(#op) \ - ".skip " __stringify(SETCC_ALIGN) " - (.-" #op "), 0xcc \n\t" + FOP_RET(op) -__FOP_START(setcc, SETCC_ALIGN) +FOP_START(setcc) FOP_SETCC(seto) FOP_SETCC(setno) FOP_SETCC(setc) @@ -493,7 +479,7 @@ FOP_END; /* * XXX: inoutclob user must know where the argument is being expanded. - * Relying on CONFIG_CC_HAS_ASM_GOTO would allow us to remove _fault. + * Using asm goto would allow us to remove _fault. */ #define asm_safe(insn, inoutclob...) \ ({ \ @@ -1079,7 +1065,7 @@ static int em_bsr_c(struct x86_emulate_ctxt *ctxt) static __always_inline u8 test_cc(unsigned int condition, unsigned long flags) { u8 rc; - void (*fop)(void) = (void *)em_setcc + SETCC_ALIGN * (condition & 0xf); + void (*fop)(void) = (void *)em_setcc + FASTOP_SIZE * (condition & 0xf); flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF; asm("push %[flags]; popf; " CALL_NOSPEC diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index eccddb136954..e418ef3ecfcb 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -2914,7 +2914,7 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep) * If addresses are being invalidated, skip prefetching to avoid * accidentally prefetching those addresses. */ - if (unlikely(vcpu->kvm->mmu_notifier_count)) + if (unlikely(vcpu->kvm->mmu_invalidate_in_progress)) return; __direct_pte_prefetch(vcpu, sp, sptep); @@ -2928,7 +2928,7 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep) * * There are several ways to safely use this helper: * - * - Check mmu_notifier_retry_hva() after grabbing the mapping level, before + * - Check mmu_invalidate_retry_hva() after grabbing the mapping level, before * consuming it. In this case, mmu_lock doesn't need to be held during the * lookup, but it does need to be held while checking the MMU notifier. * @@ -3056,7 +3056,7 @@ void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault return; /* - * mmu_notifier_retry() was successful and mmu_lock is held, so + * mmu_invalidate_retry() was successful and mmu_lock is held, so * the pmd can't be split from under us. */ fault->goal_level = fault->req_level; @@ -4203,7 +4203,7 @@ static bool is_page_fault_stale(struct kvm_vcpu *vcpu, return true; return fault->slot && - mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva); + mmu_invalidate_retry_hva(vcpu->kvm, mmu_seq, fault->hva); } static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) @@ -4227,7 +4227,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault if (r) return r; - mmu_seq = vcpu->kvm->mmu_notifier_seq; + mmu_seq = vcpu->kvm->mmu_invalidate_seq; smp_rmb(); r = kvm_faultin_pfn(vcpu, fault); @@ -5361,19 +5361,6 @@ void kvm_mmu_free_obsolete_roots(struct kvm_vcpu *vcpu) __kvm_mmu_free_obsolete_roots(vcpu->kvm, &vcpu->arch.guest_mmu); } -static bool need_remote_flush(u64 old, u64 new) -{ - if (!is_shadow_present_pte(old)) - return false; - if (!is_shadow_present_pte(new)) - return true; - if ((old ^ new) & SPTE_BASE_ADDR_MASK) - return true; - old ^= shadow_nx_mask; - new ^= shadow_nx_mask; - return (old & ~new & SPTE_PERM_MASK) != 0; -} - static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, int *bytes) { @@ -5519,7 +5506,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, mmu_page_zap_pte(vcpu->kvm, sp, spte, NULL); if (gentry && sp->role.level != PG_LEVEL_4K) ++vcpu->kvm->stat.mmu_pde_zapped; - if (need_remote_flush(entry, *spte)) + if (is_shadow_present_pte(entry)) flush = true; ++spte; } @@ -6055,7 +6042,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) write_lock(&kvm->mmu_lock); - kvm_inc_notifier_count(kvm, gfn_start, gfn_end); + kvm_mmu_invalidate_begin(kvm, gfn_start, gfn_end); flush = kvm_rmap_zap_gfn_range(kvm, gfn_start, gfn_end); @@ -6069,7 +6056,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) kvm_flush_remote_tlbs_with_address(kvm, gfn_start, gfn_end - gfn_start); - kvm_dec_notifier_count(kvm, gfn_start, gfn_end); + kvm_mmu_invalidate_end(kvm, gfn_start, gfn_end); write_unlock(&kvm->mmu_lock); } @@ -6085,47 +6072,18 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, const struct kvm_memory_slot *memslot, int start_level) { - bool flush = false; - if (kvm_memslots_have_rmaps(kvm)) { write_lock(&kvm->mmu_lock); - flush = slot_handle_level(kvm, memslot, slot_rmap_write_protect, - start_level, KVM_MAX_HUGEPAGE_LEVEL, - false); + slot_handle_level(kvm, memslot, slot_rmap_write_protect, + start_level, KVM_MAX_HUGEPAGE_LEVEL, false); write_unlock(&kvm->mmu_lock); } if (is_tdp_mmu_enabled(kvm)) { read_lock(&kvm->mmu_lock); - flush |= kvm_tdp_mmu_wrprot_slot(kvm, memslot, start_level); + kvm_tdp_mmu_wrprot_slot(kvm, memslot, start_level); read_unlock(&kvm->mmu_lock); } - - /* - * Flush TLBs if any SPTEs had to be write-protected to ensure that - * guest writes are reflected in the dirty bitmap before the memslot - * update completes, i.e. before enabling dirty logging is visible to - * userspace. - * - * Perform the TLB flush outside the mmu_lock to reduce the amount of - * time the lock is held. However, this does mean that another CPU can - * now grab mmu_lock and encounter a write-protected SPTE while CPUs - * still have a writable mapping for the associated GFN in their TLB. - * - * This is safe but requires KVM to be careful when making decisions - * based on the write-protection status of an SPTE. Specifically, KVM - * also write-protects SPTEs to monitor changes to guest page tables - * during shadow paging, and must guarantee no CPUs can write to those - * page before the lock is dropped. As mentioned in the previous - * paragraph, a write-protected SPTE is no guarantee that CPU cannot - * perform writes. So to determine if a TLB flush is truly required, KVM - * will clear a separate software-only bit (MMU-writable) and skip the - * flush if-and-only-if this bit was already clear. - * - * See is_writable_pte() for more details. - */ - if (flush) - kvm_arch_flush_remote_tlbs_memslot(kvm, memslot); } static inline bool need_topup(struct kvm_mmu_memory_cache *cache, int min) @@ -6493,32 +6451,30 @@ void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm, void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, const struct kvm_memory_slot *memslot) { - bool flush = false; - if (kvm_memslots_have_rmaps(kvm)) { write_lock(&kvm->mmu_lock); /* * Clear dirty bits only on 4k SPTEs since the legacy MMU only * support dirty logging at a 4k granularity. */ - flush = slot_handle_level_4k(kvm, memslot, __rmap_clear_dirty, false); + slot_handle_level_4k(kvm, memslot, __rmap_clear_dirty, false); write_unlock(&kvm->mmu_lock); } if (is_tdp_mmu_enabled(kvm)) { read_lock(&kvm->mmu_lock); - flush |= kvm_tdp_mmu_clear_dirty_slot(kvm, memslot); + kvm_tdp_mmu_clear_dirty_slot(kvm, memslot); read_unlock(&kvm->mmu_lock); } /* + * The caller will flush the TLBs after this function returns. + * * It's also safe to flush TLBs out of mmu lock here as currently this * function is only used for dirty logging, in which case flushing TLB * out of mmu lock also guarantees no dirty pages will be lost in * dirty_bitmap. */ - if (flush) - kvm_arch_flush_remote_tlbs_memslot(kvm, memslot); } void kvm_mmu_zap_all(struct kvm *kvm) diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index f5958071220c..39e0205e7300 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -589,7 +589,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, * If addresses are being invalidated, skip prefetching to avoid * accidentally prefetching those addresses. */ - if (unlikely(vcpu->kvm->mmu_notifier_count)) + if (unlikely(vcpu->kvm->mmu_invalidate_in_progress)) return; if (sp->role.direct) @@ -838,7 +838,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault else fault->max_level = walker.level; - mmu_seq = vcpu->kvm->mmu_notifier_seq; + mmu_seq = vcpu->kvm->mmu_invalidate_seq; smp_rmb(); r = kvm_faultin_pfn(vcpu, fault); diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index f3744eea45f5..7670c13ce251 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -343,7 +343,7 @@ static __always_inline bool is_rsvd_spte(struct rsvd_bits_validate *rsvd_check, } /* - * An shadow-present leaf SPTE may be non-writable for 3 possible reasons: + * A shadow-present leaf SPTE may be non-writable for 4 possible reasons: * * 1. To intercept writes for dirty logging. KVM write-protects huge pages * so that they can be split be split down into the dirty logging @@ -361,8 +361,13 @@ static __always_inline bool is_rsvd_spte(struct rsvd_bits_validate *rsvd_check, * read-only memslot or guest memory backed by a read-only VMA. Writes to * such pages are disallowed entirely. * - * To keep track of why a given SPTE is write-protected, KVM uses 2 - * software-only bits in the SPTE: + * 4. To emulate the Accessed bit for SPTEs without A/D bits. Note, in this + * case, the SPTE is access-protected, not just write-protected! + * + * For cases #1 and #4, KVM can safely make such SPTEs writable without taking + * mmu_lock as capturing the Accessed/Dirty state doesn't require taking it. + * To differentiate #1 and #4 from #2 and #3, KVM uses two software-only bits + * in the SPTE: * * shadow_mmu_writable_mask, aka MMU-writable - * Cleared on SPTEs that KVM is currently write-protecting for shadow paging @@ -391,7 +396,8 @@ static __always_inline bool is_rsvd_spte(struct rsvd_bits_validate *rsvd_check, * shadow page tables between vCPUs. Write-protecting an SPTE for dirty logging * (which does not clear the MMU-writable bit), does not flush TLBs before * dropping the lock, as it only needs to synchronize guest writes with the - * dirty bitmap. + * dirty bitmap. Similarly, making the SPTE inaccessible (and non-writable) for + * access-tracking via the clear_young() MMU notifier also does not flush TLBs. * * So, there is the problem: clearing the MMU-writable bit can encounter a * write-protected SPTE while CPUs still have writable mappings for that SPTE diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index d7f8331d6f7e..c9b49a09e6b5 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -843,8 +843,7 @@ static bool msr_write_intercepted(struct vcpu_vmx *vmx, u32 msr) if (!(exec_controls_get(vmx) & CPU_BASED_USE_MSR_BITMAPS)) return true; - return vmx_test_msr_bitmap_write(vmx->loaded_vmcs->msr_bitmap, - MSR_IA32_SPEC_CTRL); + return vmx_test_msr_bitmap_write(vmx->loaded_vmcs->msr_bitmap, msr); } unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 205ebdc2b11b..43a6a7efc6ec 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1557,12 +1557,32 @@ static const u32 msr_based_features_all[] = { static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)]; static unsigned int num_msr_based_features; +/* + * Some IA32_ARCH_CAPABILITIES bits have dependencies on MSRs that KVM + * does not yet virtualize. These include: + * 10 - MISC_PACKAGE_CTRLS + * 11 - ENERGY_FILTERING_CTL + * 12 - DOITM + * 18 - FB_CLEAR_CTRL + * 21 - XAPIC_DISABLE_STATUS + * 23 - OVERCLOCKING_STATUS + */ + +#define KVM_SUPPORTED_ARCH_CAP \ + (ARCH_CAP_RDCL_NO | ARCH_CAP_IBRS_ALL | ARCH_CAP_RSBA | \ + ARCH_CAP_SKIP_VMENTRY_L1DFLUSH | ARCH_CAP_SSB_NO | ARCH_CAP_MDS_NO | \ + ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \ + ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \ + ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO) + static u64 kvm_get_arch_capabilities(void) { u64 data = 0; - if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) + if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) { rdmsrl(MSR_IA32_ARCH_CAPABILITIES, data); + data &= KVM_SUPPORTED_ARCH_CAP; + } /* * If nx_huge_pages is enabled, KVM's shadow paging will ensure that @@ -1610,9 +1630,6 @@ static u64 kvm_get_arch_capabilities(void) */ } - /* Guests don't need to know "Fill buffer clear control" exists */ - data &= ~ARCH_CAP_FB_CLEAR_CTRL; - return data; } @@ -10652,7 +10669,8 @@ static inline int vcpu_block(struct kvm_vcpu *vcpu) case KVM_MP_STATE_INIT_RECEIVED: break; default: - return -EINTR; + WARN_ON_ONCE(1); + break; } return 1; } @@ -11093,9 +11111,22 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, vcpu_load(vcpu); - if (!lapic_in_kernel(vcpu) && - mp_state->mp_state != KVM_MP_STATE_RUNNABLE) + switch (mp_state->mp_state) { + case KVM_MP_STATE_UNINITIALIZED: + case KVM_MP_STATE_HALTED: + case KVM_MP_STATE_AP_RESET_HOLD: + case KVM_MP_STATE_INIT_RECEIVED: + case KVM_MP_STATE_SIPI_RECEIVED: + if (!lapic_in_kernel(vcpu)) + goto out; + break; + + case KVM_MP_STATE_RUNNABLE: + break; + + default: goto out; + } /* * KVM_MP_STATE_INIT_RECEIVED means the processor is in @@ -11563,7 +11594,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) vcpu->arch.mci_ctl2_banks = kcalloc(KVM_MAX_MCE_BANKS, sizeof(u64), GFP_KERNEL_ACCOUNT); if (!vcpu->arch.mce_banks || !vcpu->arch.mci_ctl2_banks) - goto fail_free_pio_data; + goto fail_free_mce_banks; vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, @@ -11617,7 +11648,6 @@ free_wbinvd_dirty_mask: fail_free_mce_banks: kfree(vcpu->arch.mce_banks); kfree(vcpu->arch.mci_ctl2_banks); -fail_free_pio_data: free_page((unsigned long)vcpu->arch.pio_data); fail_free_lapic: kvm_free_lapic(vcpu); @@ -12473,6 +12503,50 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm, } else { kvm_mmu_slot_remove_write_access(kvm, new, PG_LEVEL_4K); } + + /* + * Unconditionally flush the TLBs after enabling dirty logging. + * A flush is almost always going to be necessary (see below), + * and unconditionally flushing allows the helpers to omit + * the subtly complex checks when removing write access. + * + * Do the flush outside of mmu_lock to reduce the amount of + * time mmu_lock is held. Flushing after dropping mmu_lock is + * safe as KVM only needs to guarantee the slot is fully + * write-protected before returning to userspace, i.e. before + * userspace can consume the dirty status. + * + * Flushing outside of mmu_lock requires KVM to be careful when + * making decisions based on writable status of an SPTE, e.g. a + * !writable SPTE doesn't guarantee a CPU can't perform writes. + * + * Specifically, KVM also write-protects guest page tables to + * monitor changes when using shadow paging, and must guarantee + * no CPUs can write to those page before mmu_lock is dropped. + * Because CPUs may have stale TLB entries at this point, a + * !writable SPTE doesn't guarantee CPUs can't perform writes. + * + * KVM also allows making SPTES writable outside of mmu_lock, + * e.g. to allow dirty logging without taking mmu_lock. + * + * To handle these scenarios, KVM uses a separate software-only + * bit (MMU-writable) to track if a SPTE is !writable due to + * a guest page table being write-protected (KVM clears the + * MMU-writable flag when write-protecting for shadow paging). + * + * The use of MMU-writable is also the primary motivation for + * the unconditional flush. Because KVM must guarantee that a + * CPU doesn't contain stale, writable TLB entries for a + * !MMU-writable SPTE, KVM must flush if it encounters any + * MMU-writable SPTE regardless of whether the actual hardware + * writable bit was set. I.e. KVM is almost guaranteed to need + * to flush, while unconditionally flushing allows the "remove + * write access" helpers to ignore MMU-writable entirely. + * + * See is_writable_pte() for more details (the case involving + * access-tracked SPTEs is particularly relevant). + */ + kvm_arch_flush_remote_tlbs_memslot(kvm, new); } } diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 331310c29349..60814e110a54 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -41,6 +41,59 @@ static bool ex_handler_default(const struct exception_table_entry *e, return true; } +/* + * This is the *very* rare case where we do a "load_unaligned_zeropad()" + * and it's a page crosser into a non-existent page. + * + * This happens when we optimistically load a pathname a word-at-a-time + * and the name is less than the full word and the next page is not + * mapped. Typically that only happens for CONFIG_DEBUG_PAGEALLOC. + * + * NOTE! The faulting address is always a 'mov mem,reg' type instruction + * of size 'long', and the exception fixup must always point to right + * after the instruction. + */ +static bool ex_handler_zeropad(const struct exception_table_entry *e, + struct pt_regs *regs, + unsigned long fault_addr) +{ + struct insn insn; + const unsigned long mask = sizeof(long) - 1; + unsigned long offset, addr, next_ip, len; + unsigned long *reg; + + next_ip = ex_fixup_addr(e); + len = next_ip - regs->ip; + if (len > MAX_INSN_SIZE) + return false; + + if (insn_decode(&insn, (void *) regs->ip, len, INSN_MODE_KERN)) + return false; + if (insn.length != len) + return false; + + if (insn.opcode.bytes[0] != 0x8b) + return false; + if (insn.opnd_bytes != sizeof(long)) + return false; + + addr = (unsigned long) insn_get_addr_ref(&insn, regs); + if (addr == ~0ul) + return false; + + offset = addr & mask; + addr = addr & ~mask; + if (fault_addr != addr + sizeof(long)) + return false; + + reg = insn_get_modrm_reg_ptr(&insn, regs); + if (!reg) + return false; + + *reg = *(unsigned long *)addr >> (offset * 8); + return ex_handler_default(e, regs); +} + static bool ex_handler_fault(const struct exception_table_entry *fixup, struct pt_regs *regs, int trapnr) { @@ -217,6 +270,8 @@ int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code, return ex_handler_sgx(e, regs, trapnr); case EX_TYPE_UCOPY_LEN: return ex_handler_ucopy_len(e, regs, trapnr, reg, imm); + case EX_TYPE_ZEROPAD: + return ex_handler_zeropad(e, regs, fault_addr); } BUG(); } diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 39c5246964a9..0fe690ebc269 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -645,7 +645,7 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end, pages++; spin_lock(&init_mm.page_table_lock); - prot = __pgprot(pgprot_val(prot) | __PAGE_KERNEL_LARGE); + prot = __pgprot(pgprot_val(prot) | _PAGE_PSE); set_pte_init((pte_t *)pud, pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT, diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c index d5ef64ddd35e..66a209f7eb86 100644 --- a/arch/x86/mm/pat/memtype.c +++ b/arch/x86/mm/pat/memtype.c @@ -62,6 +62,7 @@ static bool __read_mostly pat_bp_initialized; static bool __read_mostly pat_disabled = !IS_ENABLED(CONFIG_X86_PAT); +static bool __initdata pat_force_disabled = !IS_ENABLED(CONFIG_X86_PAT); static bool __read_mostly pat_bp_enabled; static bool __read_mostly pat_cm_initialized; @@ -86,6 +87,7 @@ void pat_disable(const char *msg_reason) static int __init nopat(char *str) { pat_disable("PAT support disabled via boot option."); + pat_force_disabled = true; return 0; } early_param("nopat", nopat); @@ -272,7 +274,7 @@ static void pat_ap_init(u64 pat) wrmsrl(MSR_IA32_CR_PAT, pat); } -void init_cache_modes(void) +void __init init_cache_modes(void) { u64 pat = 0; @@ -313,6 +315,12 @@ void init_cache_modes(void) */ pat = PAT(0, WB) | PAT(1, WT) | PAT(2, UC_MINUS) | PAT(3, UC) | PAT(4, WB) | PAT(5, WT) | PAT(6, UC_MINUS) | PAT(7, UC); + } else if (!pat_force_disabled && cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) { + /* + * Clearly PAT is enabled underneath. Allow pat_enabled() to + * reflect this. + */ + pat_bp_enabled = true; } __init_cache_modes(pat); |