diff options
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r-- | arch/x86/kvm/cpuid.c | 26 | ||||
-rw-r--r-- | arch/x86/kvm/hyperv.c | 19 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.c | 16 | ||||
-rw-r--r-- | arch/x86/kvm/svm.c | 66 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 31 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 30 |
6 files changed, 119 insertions, 69 deletions
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 82055b90a8b3..92bf2f2e7cdd 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -379,7 +379,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 0x80000008.ebx */ const u32 kvm_cpuid_8000_0008_ebx_x86_features = - F(IBPB) | F(IBRS); + F(AMD_IBPB) | F(AMD_IBRS) | F(VIRT_SSBD); /* cpuid 0xC0000001.edx */ const u32 kvm_cpuid_C000_0001_edx_x86_features = @@ -408,7 +408,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 7.0.edx*/ const u32 kvm_cpuid_7_0_edx_x86_features = F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | - F(ARCH_CAPABILITIES); + F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES); /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); @@ -495,6 +495,11 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->ecx &= ~F(PKU); entry->edx &= kvm_cpuid_7_0_edx_x86_features; cpuid_mask(&entry->edx, CPUID_7_EDX); + /* + * We emulate ARCH_CAPABILITIES in software even + * if the host doesn't support it. + */ + entry->edx |= F(ARCH_CAPABILITIES); } else { entry->ebx = 0; entry->ecx = 0; @@ -647,13 +652,20 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, g_phys_as = phys_as; entry->eax = g_phys_as | (virt_as << 8); entry->edx = 0; - /* IBRS and IBPB aren't necessarily present in hardware cpuid */ - if (boot_cpu_has(X86_FEATURE_IBPB)) - entry->ebx |= F(IBPB); - if (boot_cpu_has(X86_FEATURE_IBRS)) - entry->ebx |= F(IBRS); + /* + * IBRS, IBPB and VIRT_SSBD aren't necessarily present in + * hardware cpuid + */ + if (boot_cpu_has(X86_FEATURE_AMD_IBPB)) + entry->ebx |= F(AMD_IBPB); + if (boot_cpu_has(X86_FEATURE_AMD_IBRS)) + entry->ebx |= F(AMD_IBRS); + if (boot_cpu_has(X86_FEATURE_VIRT_SSBD)) + entry->ebx |= F(VIRT_SSBD); entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features; cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX); + if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD)) + entry->ebx |= F(VIRT_SSBD); break; } case 0x80000019: diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 5708e951a5c6..46ff64da44ca 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1260,14 +1260,18 @@ static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result) } } -static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu) +static int kvm_hv_hypercall_complete(struct kvm_vcpu *vcpu, u64 result) { - struct kvm_run *run = vcpu->run; - - kvm_hv_hypercall_set_result(vcpu, run->hyperv.u.hcall.result); + kvm_hv_hypercall_set_result(vcpu, result); + ++vcpu->stat.hypercalls; return kvm_skip_emulated_instruction(vcpu); } +static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu) +{ + return kvm_hv_hypercall_complete(vcpu, vcpu->run->hyperv.u.hcall.result); +} + static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param) { struct eventfd_ctx *eventfd; @@ -1350,7 +1354,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) /* Hypercall continuation is not supported yet */ if (rep_cnt || rep_idx) { ret = HV_STATUS_INVALID_HYPERCALL_CODE; - goto set_result; + goto out; } switch (code) { @@ -1381,9 +1385,8 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) break; } -set_result: - kvm_hv_hypercall_set_result(vcpu, ret); - return 1; +out: + return kvm_hv_hypercall_complete(vcpu, ret); } void kvm_hv_init_vm(struct kvm *kvm) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index b74c9c1405b9..3773c4625114 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1522,11 +1522,23 @@ static bool set_target_expiration(struct kvm_lapic *apic) static void advance_periodic_target_expiration(struct kvm_lapic *apic) { - apic->lapic_timer.tscdeadline += - nsec_to_cycles(apic->vcpu, apic->lapic_timer.period); + ktime_t now = ktime_get(); + u64 tscl = rdtsc(); + ktime_t delta; + + /* + * Synchronize both deadlines to the same time source or + * differences in the periods (caused by differences in the + * underlying clocks or numerical approximation errors) will + * cause the two to drift apart over time as the errors + * accumulate. + */ apic->lapic_timer.target_expiration = ktime_add_ns(apic->lapic_timer.target_expiration, apic->lapic_timer.period); + delta = ktime_sub(apic->lapic_timer.target_expiration, now); + apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) + + nsec_to_cycles(apic->vcpu, delta); } static void start_sw_period(struct kvm_lapic *apic) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 1fc05e428aba..26110c202b19 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -49,7 +49,7 @@ #include <asm/debugreg.h> #include <asm/kvm_para.h> #include <asm/irq_remapping.h> -#include <asm/nospec-branch.h> +#include <asm/spec-ctrl.h> #include <asm/virtext.h> #include "trace.h" @@ -213,6 +213,12 @@ struct vcpu_svm { } host; u64 spec_ctrl; + /* + * Contains guest-controlled bits of VIRT_SPEC_CTRL, which will be + * translated into the appropriate L2_CFG bits on the host to + * perform speculative control. + */ + u64 virt_spec_ctrl; u32 *msrpm; @@ -2060,6 +2066,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vcpu->arch.microcode_version = 0x01000065; svm->spec_ctrl = 0; + svm->virt_spec_ctrl = 0; if (!init_event) { svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE | @@ -4108,11 +4115,18 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_IBRS)) + !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS)) return 1; msr_info->data = svm->spec_ctrl; break; + case MSR_AMD64_VIRT_SPEC_CTRL: + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD)) + return 1; + + msr_info->data = svm->virt_spec_ctrl; + break; case MSR_F15H_IC_CFG: { int family, model; @@ -4203,7 +4217,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) break; case MSR_IA32_SPEC_CTRL: if (!msr->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_IBRS)) + !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS)) return 1; /* The STIBP bit doesn't fault even if it's not advertised */ @@ -4230,7 +4244,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) break; case MSR_IA32_PRED_CMD: if (!msr->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_IBPB)) + !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBPB)) return 1; if (data & ~PRED_CMD_IBPB) @@ -4244,6 +4258,16 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) break; set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1); break; + case MSR_AMD64_VIRT_SPEC_CTRL: + if (!msr->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD)) + return 1; + + if (data & ~SPEC_CTRL_SSBD) + return 1; + + svm->virt_spec_ctrl = data; + break; case MSR_STAR: svm->vmcb->save.star = data; break; @@ -5557,8 +5581,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) * is no need to worry about the conditional branch over the wrmsr * being speculatively taken. */ - if (svm->spec_ctrl) - native_wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); + x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl); asm volatile ( "push %%" _ASM_BP "; \n\t" @@ -5652,6 +5675,18 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) #endif ); + /* Eliminate branch target predictions from guest mode */ + vmexit_fill_RSB(); + +#ifdef CONFIG_X86_64 + wrmsrl(MSR_GS_BASE, svm->host.gs_base); +#else + loadsegment(fs, svm->host.fs); +#ifndef CONFIG_X86_32_LAZY_GS + loadsegment(gs, svm->host.gs); +#endif +#endif + /* * We do not use IBRS in the kernel. If this vCPU has used the * SPEC_CTRL MSR it may have left it on; save the value and @@ -5670,20 +5705,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); - if (svm->spec_ctrl) - native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); - - /* Eliminate branch target predictions from guest mode */ - vmexit_fill_RSB(); - -#ifdef CONFIG_X86_64 - wrmsrl(MSR_GS_BASE, svm->host.gs_base); -#else - loadsegment(fs, svm->host.fs); -#ifndef CONFIG_X86_32_LAZY_GS - loadsegment(gs, svm->host.gs); -#endif -#endif + x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl); reload_tss(vcpu); @@ -5786,7 +5808,7 @@ static bool svm_cpu_has_accelerated_tpr(void) return false; } -static bool svm_has_high_real_mode_segbase(void) +static bool svm_has_emulated_msr(int index) { return true; } @@ -7012,7 +7034,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .hardware_enable = svm_hardware_enable, .hardware_disable = svm_hardware_disable, .cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr, - .cpu_has_high_real_mode_segbase = svm_has_high_real_mode_segbase, + .has_emulated_msr = svm_has_emulated_msr, .vcpu_create = svm_create_vcpu, .vcpu_free = svm_free_vcpu, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 3f1696570b41..40aa29204baf 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -51,7 +51,7 @@ #include <asm/apic.h> #include <asm/irq_remapping.h> #include <asm/mmu_context.h> -#include <asm/nospec-branch.h> +#include <asm/spec-ctrl.h> #include <asm/mshyperv.h> #include "trace.h" @@ -3529,7 +3529,6 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return kvm_get_msr_common(vcpu, msr_info); case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) && !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) return 1; @@ -3648,12 +3647,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) && !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) return 1; /* The STIBP bit doesn't fault even if it's not advertised */ - if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP)) + if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD)) return 1; vmx->spec_ctrl = data; @@ -3679,7 +3677,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_PRED_CMD: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_IBPB) && !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) return 1; @@ -9488,9 +9485,21 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) } STACK_FRAME_NON_STANDARD(vmx_handle_external_intr); -static bool vmx_has_high_real_mode_segbase(void) +static bool vmx_has_emulated_msr(int index) { - return enable_unrestricted_guest || emulate_invalid_guest_state; + switch (index) { + case MSR_IA32_SMBASE: + /* + * We cannot do SMM unless we can run the guest in big + * real mode. + */ + return enable_unrestricted_guest || emulate_invalid_guest_state; + case MSR_AMD64_VIRT_SPEC_CTRL: + /* This is AMD only. */ + return false; + default: + return true; + } } static bool vmx_mpx_supported(void) @@ -9722,8 +9731,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) * is no need to worry about the conditional branch over the wrmsr * being speculatively taken. */ - if (vmx->spec_ctrl) - native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); + x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0); vmx->__launched = vmx->loaded_vmcs->launched; @@ -9871,8 +9879,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); - if (vmx->spec_ctrl) - native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); + x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0); /* Eliminate branch target predictions from guest mode */ vmexit_fill_RSB(); @@ -12632,7 +12639,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .hardware_enable = hardware_enable, .hardware_disable = hardware_disable, .cpu_has_accelerated_tpr = report_flexpriority, - .cpu_has_high_real_mode_segbase = vmx_has_high_real_mode_segbase, + .has_emulated_msr = vmx_has_emulated_msr, .vm_init = vmx_vm_init, .vm_alloc = vmx_vm_alloc, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 59371de5d722..71e7cda6d014 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1061,6 +1061,7 @@ static u32 emulated_msrs[] = { MSR_SMI_COUNT, MSR_PLATFORM_INFO, MSR_MISC_FEATURES_ENABLES, + MSR_AMD64_VIRT_SPEC_CTRL, }; static unsigned num_emulated_msrs; @@ -2906,7 +2907,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) * fringe case that is not enabled except via specific settings * of the module parameters. */ - r = kvm_x86_ops->cpu_has_high_real_mode_segbase(); + r = kvm_x86_ops->has_emulated_msr(MSR_IA32_SMBASE); break; case KVM_CAP_VAPIC: r = !kvm_x86_ops->cpu_has_accelerated_tpr(); @@ -4606,14 +4607,8 @@ static void kvm_init_msr_list(void) num_msrs_to_save = j; for (i = j = 0; i < ARRAY_SIZE(emulated_msrs); i++) { - switch (emulated_msrs[i]) { - case MSR_IA32_SMBASE: - if (!kvm_x86_ops->cpu_has_high_real_mode_segbase()) - continue; - break; - default: - break; - } + if (!kvm_x86_ops->has_emulated_msr(emulated_msrs[i])) + continue; if (j < i) emulated_msrs[j] = emulated_msrs[i]; @@ -6676,11 +6671,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) unsigned long nr, a0, a1, a2, a3, ret; int op_64_bit; - if (kvm_hv_hypercall_enabled(vcpu->kvm)) { - if (!kvm_hv_hypercall(vcpu)) - return 0; - goto out; - } + if (kvm_hv_hypercall_enabled(vcpu->kvm)) + return kvm_hv_hypercall(vcpu); nr = kvm_register_read(vcpu, VCPU_REGS_RAX); a0 = kvm_register_read(vcpu, VCPU_REGS_RBX); @@ -6701,7 +6693,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) if (kvm_x86_ops->get_cpl(vcpu) != 0) { ret = -KVM_EPERM; - goto out_error; + goto out; } switch (nr) { @@ -6721,12 +6713,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) ret = -KVM_ENOSYS; break; } -out_error: +out: if (!op_64_bit) ret = (u32)ret; kvm_register_write(vcpu, VCPU_REGS_RAX, ret); -out: ++vcpu->stat.hypercalls; return kvm_skip_emulated_instruction(vcpu); } @@ -7985,6 +7976,7 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { struct msr_data apic_base_msr; int mmu_reset_needed = 0; + int cpuid_update_needed = 0; int pending_vec, max_bits, idx; struct desc_ptr dt; int ret = -EINVAL; @@ -8023,8 +8015,10 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) vcpu->arch.cr0 = sregs->cr0; mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4; + cpuid_update_needed |= ((kvm_read_cr4(vcpu) ^ sregs->cr4) & + (X86_CR4_OSXSAVE | X86_CR4_PKE)); kvm_x86_ops->set_cr4(vcpu, sregs->cr4); - if (sregs->cr4 & (X86_CR4_OSXSAVE | X86_CR4_PKE)) + if (cpuid_update_needed) kvm_update_cpuid(vcpu); idx = srcu_read_lock(&vcpu->kvm->srcu); |