diff options
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/boot/string.c | 8 | ||||
-rw-r--r-- | arch/x86/entry/calling.h | 17 | ||||
-rw-r--r-- | arch/x86/entry/entry_64.S | 21 | ||||
-rw-r--r-- | arch/x86/include/asm/cpufeatures.h | 3 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 3 | ||||
-rw-r--r-- | arch/x86/include/asm/vdso/gettimeofday.h | 36 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/bugs.c | 105 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 44 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mtrr/cyrix.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/kvm.c | 8 | ||||
-rw-r--r-- | arch/x86/kernel/ptrace.c | 1 | ||||
-rw-r--r-- | arch/x86/kvm/debugfs.c | 46 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.c | 8 | ||||
-rw-r--r-- | arch/x86/kvm/svm.c | 6 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx.c | 6 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 16 | ||||
-rw-r--r-- | arch/x86/lib/cpu.c | 1 | ||||
-rw-r--r-- | arch/x86/purgatory/Makefile | 36 | ||||
-rw-r--r-- | arch/x86/purgatory/purgatory.c | 6 | ||||
-rw-r--r-- | arch/x86/purgatory/string.c | 23 |
20 files changed, 290 insertions, 105 deletions
diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c index 401e30ca0a75..8272a4492844 100644 --- a/arch/x86/boot/string.c +++ b/arch/x86/boot/string.c @@ -37,6 +37,14 @@ int memcmp(const void *s1, const void *s2, size_t len) return diff; } +/* + * Clang may lower `memcmp == 0` to `bcmp == 0`. + */ +int bcmp(const void *s1, const void *s2, size_t len) +{ + return memcmp(s1, s2, len); +} + int strcmp(const char *str1, const char *str2) { const unsigned char *s1 = (const unsigned char *)str1; diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 830bd984182b..515c0ceeb4a3 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -314,6 +314,23 @@ For 32-bit we have the following conventions - kernel is built with #endif +/* + * Mitigate Spectre v1 for conditional swapgs code paths. + * + * FENCE_SWAPGS_USER_ENTRY is used in the user entry swapgs code path, to + * prevent a speculative swapgs when coming from kernel space. + * + * FENCE_SWAPGS_KERNEL_ENTRY is used in the kernel entry non-swapgs code path, + * to prevent the swapgs from getting speculatively skipped when coming from + * user space. + */ +.macro FENCE_SWAPGS_USER_ENTRY + ALTERNATIVE "", "lfence", X86_FEATURE_FENCE_SWAPGS_USER +.endm +.macro FENCE_SWAPGS_KERNEL_ENTRY + ALTERNATIVE "", "lfence", X86_FEATURE_FENCE_SWAPGS_KERNEL +.endm + .macro STACKLEAK_ERASE_NOCLOBBER #ifdef CONFIG_GCC_PLUGIN_STACKLEAK PUSH_AND_CLEAR_REGS diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 3f5a978a02a7..be9ca198c581 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -519,7 +519,7 @@ ENTRY(interrupt_entry) testb $3, CS-ORIG_RAX+8(%rsp) jz 1f SWAPGS - + FENCE_SWAPGS_USER_ENTRY /* * Switch to the thread stack. The IRET frame and orig_ax are * on the stack, as well as the return address. RDI..R12 are @@ -549,8 +549,10 @@ ENTRY(interrupt_entry) UNWIND_HINT_FUNC movq (%rdi), %rdi + jmp 2f 1: - + FENCE_SWAPGS_KERNEL_ENTRY +2: PUSH_AND_CLEAR_REGS save_ret=1 ENCODE_FRAME_POINTER 8 @@ -1238,6 +1240,13 @@ ENTRY(paranoid_entry) */ SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14 + /* + * The above SAVE_AND_SWITCH_TO_KERNEL_CR3 macro doesn't do an + * unconditional CR3 write, even in the PTI case. So do an lfence + * to prevent GS speculation, regardless of whether PTI is enabled. + */ + FENCE_SWAPGS_KERNEL_ENTRY + ret END(paranoid_entry) @@ -1288,6 +1297,7 @@ ENTRY(error_entry) * from user mode due to an IRET fault. */ SWAPGS + FENCE_SWAPGS_USER_ENTRY /* We have user CR3. Change to kernel CR3. */ SWITCH_TO_KERNEL_CR3 scratch_reg=%rax @@ -1301,6 +1311,8 @@ ENTRY(error_entry) pushq %r12 ret +.Lerror_entry_done_lfence: + FENCE_SWAPGS_KERNEL_ENTRY .Lerror_entry_done: ret @@ -1318,7 +1330,7 @@ ENTRY(error_entry) cmpq %rax, RIP+8(%rsp) je .Lbstep_iret cmpq $.Lgs_change, RIP+8(%rsp) - jne .Lerror_entry_done + jne .Lerror_entry_done_lfence /* * hack: .Lgs_change can fail with user gsbase. If this happens, fix up @@ -1326,6 +1338,7 @@ ENTRY(error_entry) * .Lgs_change's error handler with kernel gsbase. */ SWAPGS + FENCE_SWAPGS_USER_ENTRY SWITCH_TO_KERNEL_CR3 scratch_reg=%rax jmp .Lerror_entry_done @@ -1340,6 +1353,7 @@ ENTRY(error_entry) * gsbase and CR3. Switch to kernel gsbase and CR3: */ SWAPGS + FENCE_SWAPGS_USER_ENTRY SWITCH_TO_KERNEL_CR3 scratch_reg=%rax /* @@ -1431,6 +1445,7 @@ ENTRY(nmi) swapgs cld + FENCE_SWAPGS_USER_ENTRY SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx movq %rsp, %rdx movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 998c2cc08363..e880f2408e29 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -281,6 +281,8 @@ #define X86_FEATURE_CQM_OCCUP_LLC (11*32+ 1) /* LLC occupancy monitoring */ #define X86_FEATURE_CQM_MBM_TOTAL (11*32+ 2) /* LLC Total MBM monitoring */ #define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */ +#define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */ +#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ @@ -394,5 +396,6 @@ #define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */ #define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */ #define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */ +#define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 7b0a4ee77313..74e88e5edd9c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -35,6 +35,8 @@ #include <asm/kvm_vcpu_regs.h> #include <asm/hyperv-tlfs.h> +#define __KVM_HAVE_ARCH_VCPU_DEBUGFS + #define KVM_MAX_VCPUS 288 #define KVM_SOFT_MAX_VCPUS 240 #define KVM_MAX_VCPU_ID 1023 @@ -1175,6 +1177,7 @@ struct kvm_x86_ops { int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq, bool set); void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu); + bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu); int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc, bool *expired); diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h index ae91429129a6..ba71a63cdac4 100644 --- a/arch/x86/include/asm/vdso/gettimeofday.h +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -96,6 +96,8 @@ long clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) #else +#define VDSO_HAS_32BIT_FALLBACK 1 + static __always_inline long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) { @@ -114,6 +116,23 @@ long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) } static __always_inline +long clock_gettime32_fallback(clockid_t _clkid, struct old_timespec32 *_ts) +{ + long ret; + + asm ( + "mov %%ebx, %%edx \n" + "mov %[clock], %%ebx \n" + "call __kernel_vsyscall \n" + "mov %%edx, %%ebx \n" + : "=a" (ret), "=m" (*_ts) + : "0" (__NR_clock_gettime), [clock] "g" (_clkid), "c" (_ts) + : "edx"); + + return ret; +} + +static __always_inline long gettimeofday_fallback(struct __kernel_old_timeval *_tv, struct timezone *_tz) { @@ -148,6 +167,23 @@ clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) return ret; } +static __always_inline +long clock_getres32_fallback(clockid_t _clkid, struct old_timespec32 *_ts) +{ + long ret; + + asm ( + "mov %%ebx, %%edx \n" + "mov %[clock], %%ebx \n" + "call __kernel_vsyscall \n" + "mov %%edx, %%ebx \n" + : "=a" (ret), "=m" (*_ts) + : "0" (__NR_clock_getres), [clock] "g" (_clkid), "c" (_ts) + : "edx"); + + return ret; +} + #endif #ifdef CONFIG_PARAVIRT_CLOCK diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 801ecd1c3fd5..c6fa3ef10b4e 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -34,6 +34,7 @@ #include "cpu.h" +static void __init spectre_v1_select_mitigation(void); static void __init spectre_v2_select_mitigation(void); static void __init ssb_select_mitigation(void); static void __init l1tf_select_mitigation(void); @@ -98,17 +99,11 @@ void __init check_bugs(void) if (boot_cpu_has(X86_FEATURE_STIBP)) x86_spec_ctrl_mask |= SPEC_CTRL_STIBP; - /* Select the proper spectre mitigation before patching alternatives */ + /* Select the proper CPU mitigations before patching alternatives: */ + spectre_v1_select_mitigation(); spectre_v2_select_mitigation(); - - /* - * Select proper mitigation for any exposure to the Speculative Store - * Bypass vulnerability. - */ ssb_select_mitigation(); - l1tf_select_mitigation(); - mds_select_mitigation(); arch_smt_update(); @@ -274,6 +269,98 @@ static int __init mds_cmdline(char *str) early_param("mds", mds_cmdline); #undef pr_fmt +#define pr_fmt(fmt) "Spectre V1 : " fmt + +enum spectre_v1_mitigation { + SPECTRE_V1_MITIGATION_NONE, + SPECTRE_V1_MITIGATION_AUTO, +}; + +static enum spectre_v1_mitigation spectre_v1_mitigation __ro_after_init = + SPECTRE_V1_MITIGATION_AUTO; + +static const char * const spectre_v1_strings[] = { + [SPECTRE_V1_MITIGATION_NONE] = "Vulnerable: __user pointer sanitization and usercopy barriers only; no swapgs barriers", + [SPECTRE_V1_MITIGATION_AUTO] = "Mitigation: usercopy/swapgs barriers and __user pointer sanitization", +}; + +/* + * Does SMAP provide full mitigation against speculative kernel access to + * userspace? + */ +static bool smap_works_speculatively(void) +{ + if (!boot_cpu_has(X86_FEATURE_SMAP)) + return false; + + /* + * On CPUs which are vulnerable to Meltdown, SMAP does not + * prevent speculative access to user data in the L1 cache. + * Consider SMAP to be non-functional as a mitigation on these + * CPUs. + */ + if (boot_cpu_has(X86_BUG_CPU_MELTDOWN)) + return false; + + return true; +} + +static void __init spectre_v1_select_mitigation(void) +{ + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1) || cpu_mitigations_off()) { + spectre_v1_mitigation = SPECTRE_V1_MITIGATION_NONE; + return; + } + + if (spectre_v1_mitigation == SPECTRE_V1_MITIGATION_AUTO) { + /* + * With Spectre v1, a user can speculatively control either + * path of a conditional swapgs with a user-controlled GS + * value. The mitigation is to add lfences to both code paths. + * + * If FSGSBASE is enabled, the user can put a kernel address in + * GS, in which case SMAP provides no protection. + * + * [ NOTE: Don't check for X86_FEATURE_FSGSBASE until the + * FSGSBASE enablement patches have been merged. ] + * + * If FSGSBASE is disabled, the user can only put a user space + * address in GS. That makes an attack harder, but still + * possible if there's no SMAP protection. + */ + if (!smap_works_speculatively()) { + /* + * Mitigation can be provided from SWAPGS itself or + * PTI as the CR3 write in the Meltdown mitigation + * is serializing. + * + * If neither is there, mitigate with an LFENCE to + * stop speculation through swapgs. + */ + if (boot_cpu_has_bug(X86_BUG_SWAPGS) && + !boot_cpu_has(X86_FEATURE_PTI)) + setup_force_cpu_cap(X86_FEATURE_FENCE_SWAPGS_USER); + + /* + * Enable lfences in the kernel entry (non-swapgs) + * paths, to prevent user entry from speculatively + * skipping swapgs. + */ + setup_force_cpu_cap(X86_FEATURE_FENCE_SWAPGS_KERNEL); + } + } + + pr_info("%s\n", spectre_v1_strings[spectre_v1_mitigation]); +} + +static int __init nospectre_v1_cmdline(char *str) +{ + spectre_v1_mitigation = SPECTRE_V1_MITIGATION_NONE; + return 0; +} +early_param("nospectre_v1", nospectre_v1_cmdline); + +#undef pr_fmt #define pr_fmt(fmt) "Spectre V2 : " fmt static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = @@ -1290,7 +1377,7 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr break; case X86_BUG_SPECTRE_V1: - return sprintf(buf, "Mitigation: __user pointer sanitization\n"); + return sprintf(buf, "%s\n", spectre_v1_strings[spectre_v1_mitigation]); case X86_BUG_SPECTRE_V2: return sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 11472178e17f..f125bf7ecb6f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1022,6 +1022,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) #define NO_L1TF BIT(3) #define NO_MDS BIT(4) #define MSBDS_ONLY BIT(5) +#define NO_SWAPGS BIT(6) #define VULNWL(_vendor, _family, _model, _whitelist) \ { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist } @@ -1048,30 +1049,38 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION), VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION), - VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY), - VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF | MSBDS_ONLY), - VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY), - VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY), - VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY), - VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY), + VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), + VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), + VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), + VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), + VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), + VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), VULNWL_INTEL(CORE_YONAH, NO_SSB), - VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY), + VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS), - VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF), - VULNWL_INTEL(ATOM_GOLDMONT_X, NO_MDS | NO_L1TF), - VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF), + VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS), + VULNWL_INTEL(ATOM_GOLDMONT_X, NO_MDS | NO_L1TF | NO_SWAPGS), + VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS), + + /* + * Technically, swapgs isn't serializing on AMD (despite it previously + * being documented as such in the APM). But according to AMD, %gs is + * updated non-speculatively, and the issuing of %gs-relative memory + * operands will be blocked until the %gs update completes, which is + * good enough for our purposes. + */ /* AMD Family 0xf - 0x12 */ - VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS), - VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS), - VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS), - VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS), + VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS), + VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS), + VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS), + VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS), /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ - VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS), - VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS), + VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS), + VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS), {} }; @@ -1108,6 +1117,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_MSBDS_ONLY); } + if (!cpu_matches(NO_SWAPGS)) + setup_force_cpu_bug(X86_BUG_SWAPGS); + if (cpu_matches(NO_MELTDOWN)) return; diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c index 4296c702a3f7..72182809b333 100644 --- a/arch/x86/kernel/cpu/mtrr/cyrix.c +++ b/arch/x86/kernel/cpu/mtrr/cyrix.c @@ -98,6 +98,7 @@ cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg) case 7: if (size < 0x40) break; + /* Else, fall through */ case 6: case 5: case 4: diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index b7f34fe2171e..4ab377c9fffe 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -308,9 +308,6 @@ static notrace void kvm_guest_apic_eoi_write(u32 reg, u32 val) static void kvm_guest_cpu_init(void) { - if (!kvm_para_available()) - return; - if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) { u64 pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason)); @@ -625,9 +622,6 @@ static void __init kvm_guest_init(void) { int i; - if (!kvm_para_available()) - return; - paravirt_ops_setup(); register_reboot_notifier(&kvm_pv_reboot_nb); for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) @@ -848,8 +842,6 @@ asm( */ void __init kvm_spinlock_init(void) { - if (!kvm_para_available()) - return; /* Does host kernel support KVM_FEATURE_PV_UNHALT? */ if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) return; diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 0fdbe89d0754..3c5bbe8e4120 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -201,6 +201,7 @@ static int set_segment_reg(struct task_struct *task, case offsetof(struct user_regs_struct, ss): if (unlikely(value == 0)) return -EIO; + /* Else, fall through */ default: *pt_regs_access(task_pt_regs(task), offset) = value; diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c index 329361b69d5e..018aebce33ff 100644 --- a/arch/x86/kvm/debugfs.c +++ b/arch/x86/kvm/debugfs.c @@ -8,11 +8,6 @@ #include <linux/debugfs.h> #include "lapic.h" -bool kvm_arch_has_vcpu_debugfs(void) -{ - return true; -} - static int vcpu_get_timer_advance_ns(void *data, u64 *val) { struct kvm_vcpu *vcpu = (struct kvm_vcpu *) data; @@ -48,37 +43,22 @@ static int vcpu_get_tsc_scaling_frac_bits(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(vcpu_tsc_scaling_frac_fops, vcpu_get_tsc_scaling_frac_bits, NULL, "%llu\n"); -int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu) +void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu) { - struct dentry *ret; - - ret = debugfs_create_file("tsc-offset", 0444, - vcpu->debugfs_dentry, - vcpu, &vcpu_tsc_offset_fops); - if (!ret) - return -ENOMEM; + debugfs_create_file("tsc-offset", 0444, vcpu->debugfs_dentry, vcpu, + &vcpu_tsc_offset_fops); - if (lapic_in_kernel(vcpu)) { - ret = debugfs_create_file("lapic_timer_advance_ns", 0444, - vcpu->debugfs_dentry, - vcpu, &vcpu_timer_advance_ns_fops); - if (!ret) - return -ENOMEM; - } + if (lapic_in_kernel(vcpu)) + debugfs_create_file("lapic_timer_advance_ns", 0444, + vcpu->debugfs_dentry, vcpu, + &vcpu_timer_advance_ns_fops); if (kvm_has_tsc_control) { - ret = debugfs_create_file("tsc-scaling-ratio", 0444, - vcpu->debugfs_dentry, - vcpu, &vcpu_tsc_scaling_fops); - if (!ret) - return -ENOMEM; - ret = debugfs_create_file("tsc-scaling-ratio-frac-bits", 0444, - vcpu->debugfs_dentry, - vcpu, &vcpu_tsc_scaling_frac_fops); - if (!ret) - return -ENOMEM; - + debugfs_create_file("tsc-scaling-ratio", 0444, + vcpu->debugfs_dentry, vcpu, + &vcpu_tsc_scaling_fops); + debugfs_create_file("tsc-scaling-ratio-frac-bits", 0444, + vcpu->debugfs_dentry, vcpu, + &vcpu_tsc_scaling_frac_fops); } - - return 0; } diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 0aa158657f20..685d17c11461 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1548,7 +1548,6 @@ static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic) static void apic_timer_expired(struct kvm_lapic *apic) { struct kvm_vcpu *vcpu = apic->vcpu; - struct swait_queue_head *q = &vcpu->wq; struct kvm_timer *ktimer = &apic->lapic_timer; if (atomic_read(&apic->lapic_timer.pending)) @@ -1566,13 +1565,6 @@ static void apic_timer_expired(struct kvm_lapic *apic) atomic_inc(&apic->lapic_timer.pending); kvm_set_pending_timer(vcpu); - - /* - * For x86, the atomic_inc() is serialized, thus - * using swait_active() is safe. - */ - if (swait_active(q)) - swake_up_one(q); } static void start_sw_tscdeadline(struct kvm_lapic *apic) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 7eafc6907861..d685491fce4d 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -5190,6 +5190,11 @@ static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) kvm_vcpu_wake_up(vcpu); } +static bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu) +{ + return false; +} + static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi) { unsigned long flags; @@ -7314,6 +7319,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .pmu_ops = &amd_pmu_ops, .deliver_posted_interrupt = svm_deliver_avic_intr, + .dy_apicv_has_pending_interrupt = svm_dy_apicv_has_pending_interrupt, .update_pi_irte = svm_update_pi_irte, .setup_mce = svm_setup_mce, diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 074385c86c09..42ed3faa6af8 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6117,6 +6117,11 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) return max_irr; } +static bool vmx_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu) +{ + return pi_test_on(vcpu_to_pi_desc(vcpu)); +} + static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) { if (!kvm_vcpu_apicv_active(vcpu)) @@ -7726,6 +7731,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .guest_apic_has_interrupt = vmx_guest_apic_has_interrupt, .sync_pir_to_irr = vmx_sync_pir_to_irr, .deliver_posted_interrupt = vmx_deliver_posted_interrupt, + .dy_apicv_has_pending_interrupt = vmx_dy_apicv_has_pending_interrupt, .set_tss_addr = vmx_set_tss_addr, .set_identity_map_addr = vmx_set_identity_map_addr, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c6d951cbd76c..93b0bd45ac73 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9698,6 +9698,22 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu); } +bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu) +{ + if (READ_ONCE(vcpu->arch.pv.pv_unhalted)) + return true; + + if (kvm_test_request(KVM_REQ_NMI, vcpu) || + kvm_test_request(KVM_REQ_SMI, vcpu) || + kvm_test_request(KVM_REQ_EVENT, vcpu)) + return true; + + if (vcpu->arch.apicv_active && kvm_x86_ops->dy_apicv_has_pending_interrupt(vcpu)) + return true; + + return false; +} + bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) { return vcpu->arch.preempted_in_kernel; diff --git a/arch/x86/lib/cpu.c b/arch/x86/lib/cpu.c index 04967cdce5d1..7ad68917a51e 100644 --- a/arch/x86/lib/cpu.c +++ b/arch/x86/lib/cpu.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only #include <linux/types.h> #include <linux/export.h> +#include <asm/cpu.h> unsigned int x86_family(unsigned int sig) { diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index 3cf302b26332..8901a1f89cf5 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -6,6 +6,9 @@ purgatory-y := purgatory.o stack.o setup-x86_$(BITS).o sha256.o entry64.o string targets += $(purgatory-y) PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y)) +$(obj)/string.o: $(srctree)/arch/x86/boot/compressed/string.c FORCE + $(call if_changed_rule,cc_o_c) + $(obj)/sha256.o: $(srctree)/lib/sha256.c FORCE $(call if_changed_rule,cc_o_c) @@ -17,11 +20,34 @@ KCOV_INSTRUMENT := n # Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That # in turn leaves some undefined symbols like __fentry__ in purgatory and not -# sure how to relocate those. Like kexec-tools, use custom flags. - -KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes -fno-zero-initialized-in-bss -fno-builtin -ffreestanding -c -Os -mcmodel=large -KBUILD_CFLAGS += -m$(BITS) -KBUILD_CFLAGS += $(call cc-option,-fno-PIE) +# sure how to relocate those. +ifdef CONFIG_FUNCTION_TRACER +CFLAGS_REMOVE_sha256.o += $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_purgatory.o += $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_string.o += $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_kexec-purgatory.o += $(CC_FLAGS_FTRACE) +endif + +ifdef CONFIG_STACKPROTECTOR +CFLAGS_REMOVE_sha256.o += -fstack-protector +CFLAGS_REMOVE_purgatory.o += -fstack-protector +CFLAGS_REMOVE_string.o += -fstack-protector +CFLAGS_REMOVE_kexec-purgatory.o += -fstack-protector +endif + +ifdef CONFIG_STACKPROTECTOR_STRONG +CFLAGS_REMOVE_sha256.o += -fstack-protector-strong +CFLAGS_REMOVE_purgatory.o += -fstack-protector-strong +CFLAGS_REMOVE_string.o += -fstack-protector-strong +CFLAGS_REMOVE_kexec-purgatory.o += -fstack-protector-strong +endif + +ifdef CONFIG_RETPOLINE +CFLAGS_REMOVE_sha256.o += $(RETPOLINE_CFLAGS) +CFLAGS_REMOVE_purgatory.o += $(RETPOLINE_CFLAGS) +CFLAGS_REMOVE_string.o += $(RETPOLINE_CFLAGS) +CFLAGS_REMOVE_kexec-purgatory.o += $(RETPOLINE_CFLAGS) +endif $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE $(call if_changed,ld) diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c index 6d8d5a34c377..b607bda786f6 100644 --- a/arch/x86/purgatory/purgatory.c +++ b/arch/x86/purgatory/purgatory.c @@ -68,3 +68,9 @@ void purgatory(void) } copy_backup_region(); } + +/* + * Defined in order to reuse memcpy() and memset() from + * arch/x86/boot/compressed/string.c + */ +void warn(const char *msg) {} diff --git a/arch/x86/purgatory/string.c b/arch/x86/purgatory/string.c deleted file mode 100644 index 01ad43873ad9..000000000000 --- a/arch/x86/purgatory/string.c +++ /dev/null @@ -1,23 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Simple string functions. - * - * Copyright (C) 2014 Red Hat Inc. - * - * Author: - * Vivek Goyal <vgoyal@redhat.com> - */ - -#include <linux/types.h> - -#include "../boot/string.c" - -void *memcpy(void *dst, const void *src, size_t len) -{ - return __builtin_memcpy(dst, src, len); -} - -void *memset(void *dst, int c, size_t len) -{ - return __builtin_memset(dst, c, len); -} |