diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-06-04 00:13:47 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-06-04 00:13:47 +0200 |
commit | 039aeb9deb9291f3b19c375a8bc6fa7f768996cc (patch) | |
tree | d98d5ddf276843995aa214157b587bb88270c530 /arch/x86/kvm/svm | |
parent | Merge tag 'hyperv-next-signed' of git://git.kernel.org/pub/scm/linux/kernel/g... (diff) | |
parent | KVM: selftests: fix rdtsc() for vmx_tsc_adjust_test (diff) | |
download | linux-039aeb9deb9291f3b19c375a8bc6fa7f768996cc.tar.xz linux-039aeb9deb9291f3b19c375a8bc6fa7f768996cc.zip |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini:
"ARM:
- Move the arch-specific code into arch/arm64/kvm
- Start the post-32bit cleanup
- Cherry-pick a few non-invasive pre-NV patches
x86:
- Rework of TLB flushing
- Rework of event injection, especially with respect to nested
virtualization
- Nested AMD event injection facelift, building on the rework of
generic code and fixing a lot of corner cases
- Nested AMD live migration support
- Optimization for TSC deadline MSR writes and IPIs
- Various cleanups
- Asynchronous page fault cleanups (from tglx, common topic branch
with tip tree)
- Interrupt-based delivery of asynchronous "page ready" events (host
side)
- Hyper-V MSRs and hypercalls for guest debugging
- VMX preemption timer fixes
s390:
- Cleanups
Generic:
- switch vCPU thread wakeup from swait to rcuwait
The other architectures, and the guest side of the asynchronous page
fault work, will come next week"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (256 commits)
KVM: selftests: fix rdtsc() for vmx_tsc_adjust_test
KVM: check userspace_addr for all memslots
KVM: selftests: update hyperv_cpuid with SynDBG tests
x86/kvm/hyper-v: Add support for synthetic debugger via hypercalls
x86/kvm/hyper-v: enable hypercalls regardless of hypercall page
x86/kvm/hyper-v: Add support for synthetic debugger interface
x86/hyper-v: Add synthetic debugger definitions
KVM: selftests: VMX preemption timer migration test
KVM: nVMX: Fix VMX preemption timer migration
x86/kvm/hyper-v: Explicitly align hcall param for kvm_hyperv_exit
KVM: x86/pmu: Support full width counting
KVM: x86/pmu: Tweak kvm_pmu_get_msr to pass 'struct msr_data' in
KVM: x86: announce KVM_FEATURE_ASYNC_PF_INT
KVM: x86: acknowledgment mechanism for async pf page ready notifications
KVM: x86: interrupt based APF 'page ready' event delivery
KVM: introduce kvm_read_guest_offset_cached()
KVM: rename kvm_arch_can_inject_async_page_present() to kvm_arch_can_dequeue_async_page_present()
KVM: x86: extend struct kvm_vcpu_pv_apf_data with token info
Revert "KVM: async_pf: Fix #DF due to inject "Page not Present" and "Page Ready" exceptions simultaneously"
KVM: VMX: Replace zero-length array with flexible-array
...
Diffstat (limited to 'arch/x86/kvm/svm')
-rw-r--r-- | arch/x86/kvm/svm/nested.c | 698 | ||||
-rw-r--r-- | arch/x86/kvm/svm/pmu.c | 7 | ||||
-rw-r--r-- | arch/x86/kvm/svm/svm.c | 368 | ||||
-rw-r--r-- | arch/x86/kvm/svm/svm.h | 57 |
4 files changed, 714 insertions, 416 deletions
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 9a2a62e5afeb..8a6db11dcb43 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -25,6 +25,8 @@ #include "trace.h" #include "mmu.h" #include "x86.h" +#include "cpuid.h" +#include "lapic.h" #include "svm.h" static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu, @@ -59,7 +61,7 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu, static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index) { struct vcpu_svm *svm = to_svm(vcpu); - u64 cr3 = svm->nested.nested_cr3; + u64 cr3 = svm->nested.ctl.nested_cr3; u64 pdpte; int ret; @@ -74,19 +76,22 @@ static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - return svm->nested.nested_cr3; + return svm->nested.ctl.nested_cr3; } static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) { + struct vcpu_svm *svm = to_svm(vcpu); + struct vmcb *hsave = svm->nested.hsave; + WARN_ON(mmu_is_nested(vcpu)); vcpu->arch.mmu = &vcpu->arch.guest_mmu; - kvm_init_shadow_mmu(vcpu); + kvm_init_shadow_mmu(vcpu, X86_CR0_PG, hsave->save.cr4, hsave->save.efer); vcpu->arch.mmu->get_guest_pgd = nested_svm_get_tdp_cr3; vcpu->arch.mmu->get_pdptr = nested_svm_get_tdp_pdptr; vcpu->arch.mmu->inject_page_fault = nested_svm_inject_npf_exit; - vcpu->arch.mmu->shadow_root_level = kvm_x86_ops.get_tdp_level(vcpu); + vcpu->arch.mmu->shadow_root_level = vcpu->arch.tdp_level; reset_shadow_zero_bits_mask(vcpu, vcpu->arch.mmu); vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; } @@ -99,8 +104,7 @@ static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu) void recalc_intercepts(struct vcpu_svm *svm) { - struct vmcb_control_area *c, *h; - struct nested_state *g; + struct vmcb_control_area *c, *h, *g; mark_dirty(svm->vmcb, VMCB_INTERCEPTS); @@ -109,14 +113,16 @@ void recalc_intercepts(struct vcpu_svm *svm) c = &svm->vmcb->control; h = &svm->nested.hsave->control; - g = &svm->nested; + g = &svm->nested.ctl; + + svm->nested.host_intercept_exceptions = h->intercept_exceptions; c->intercept_cr = h->intercept_cr; c->intercept_dr = h->intercept_dr; c->intercept_exceptions = h->intercept_exceptions; c->intercept = h->intercept; - if (svm->vcpu.arch.hflags & HF_VINTR_MASK) { + if (g->int_ctl & V_INTR_MASKING_MASK) { /* We only want the cr8 intercept bits of L1 */ c->intercept_cr &= ~(1U << INTERCEPT_CR8_READ); c->intercept_cr &= ~(1U << INTERCEPT_CR8_WRITE); @@ -138,11 +144,9 @@ void recalc_intercepts(struct vcpu_svm *svm) c->intercept |= g->intercept; } -static void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *from_vmcb) +static void copy_vmcb_control_area(struct vmcb_control_area *dst, + struct vmcb_control_area *from) { - struct vmcb_control_area *dst = &dst_vmcb->control; - struct vmcb_control_area *from = &from_vmcb->control; - dst->intercept_cr = from->intercept_cr; dst->intercept_dr = from->intercept_dr; dst->intercept_exceptions = from->intercept_exceptions; @@ -150,7 +154,7 @@ static void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *from_vmcb dst->iopm_base_pa = from->iopm_base_pa; dst->msrpm_base_pa = from->msrpm_base_pa; dst->tsc_offset = from->tsc_offset; - dst->asid = from->asid; + /* asid not copied, it is handled manually for svm->vmcb. */ dst->tlb_ctl = from->tlb_ctl; dst->int_ctl = from->int_ctl; dst->int_vector = from->int_vector; @@ -179,7 +183,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) */ int i; - if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT))) + if (!(svm->nested.ctl.intercept & (1ULL << INTERCEPT_MSR_PROT))) return true; for (i = 0; i < MSRPM_OFFSETS; i++) { @@ -190,7 +194,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) break; p = msrpm_offsets[i]; - offset = svm->nested.vmcb_msrpm + (p * 4); + offset = svm->nested.ctl.msrpm_base_pa + (p * 4); if (kvm_vcpu_read_guest(&svm->vcpu, offset, &value, 4)) return false; @@ -203,41 +207,111 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) return true; } -static bool nested_vmcb_checks(struct vmcb *vmcb) +static bool nested_vmcb_check_controls(struct vmcb_control_area *control) { - if ((vmcb->save.efer & EFER_SVME) == 0) + if ((control->intercept & (1ULL << INTERCEPT_VMRUN)) == 0) return false; - if ((vmcb->control.intercept & (1ULL << INTERCEPT_VMRUN)) == 0) + if (control->asid == 0) return false; - if (vmcb->control.asid == 0) - return false; - - if ((vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) && + if ((control->nested_ctl & SVM_NESTED_CTL_NP_ENABLE) && !npt_enabled) return false; return true; } -void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa, - struct vmcb *nested_vmcb, struct kvm_host_map *map) +static bool nested_vmcb_checks(struct vmcb *vmcb) { - bool evaluate_pending_interrupts = - is_intercept(svm, INTERCEPT_VINTR) || - is_intercept(svm, INTERCEPT_IRET); + if ((vmcb->save.efer & EFER_SVME) == 0) + return false; - if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF) - svm->vcpu.arch.hflags |= HF_HIF_MASK; - else - svm->vcpu.arch.hflags &= ~HF_HIF_MASK; + if (((vmcb->save.cr0 & X86_CR0_CD) == 0) && + (vmcb->save.cr0 & X86_CR0_NW)) + return false; - if (nested_vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) { - svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3; - nested_svm_init_mmu_context(&svm->vcpu); + return nested_vmcb_check_controls(&vmcb->control); +} + +static void load_nested_vmcb_control(struct vcpu_svm *svm, + struct vmcb_control_area *control) +{ + copy_vmcb_control_area(&svm->nested.ctl, control); + + /* Copy it here because nested_svm_check_controls will check it. */ + svm->nested.ctl.asid = control->asid; + svm->nested.ctl.msrpm_base_pa &= ~0x0fffULL; + svm->nested.ctl.iopm_base_pa &= ~0x0fffULL; +} + +/* + * Synchronize fields that are written by the processor, so that + * they can be copied back into the nested_vmcb. + */ +void sync_nested_vmcb_control(struct vcpu_svm *svm) +{ + u32 mask; + svm->nested.ctl.event_inj = svm->vmcb->control.event_inj; + svm->nested.ctl.event_inj_err = svm->vmcb->control.event_inj_err; + + /* Only a few fields of int_ctl are written by the processor. */ + mask = V_IRQ_MASK | V_TPR_MASK; + if (!(svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK) && + is_intercept(svm, SVM_EXIT_VINTR)) { + /* + * In order to request an interrupt window, L0 is usurping + * svm->vmcb->control.int_ctl and possibly setting V_IRQ + * even if it was clear in L1's VMCB. Restoring it would be + * wrong. However, in this case V_IRQ will remain true until + * interrupt_window_interception calls svm_clear_vintr and + * restores int_ctl. We can just leave it aside. + */ + mask &= ~V_IRQ_MASK; } + svm->nested.ctl.int_ctl &= ~mask; + svm->nested.ctl.int_ctl |= svm->vmcb->control.int_ctl & mask; +} + +/* + * Transfer any event that L0 or L1 wanted to inject into L2 to + * EXIT_INT_INFO. + */ +static void nested_vmcb_save_pending_event(struct vcpu_svm *svm, + struct vmcb *nested_vmcb) +{ + struct kvm_vcpu *vcpu = &svm->vcpu; + u32 exit_int_info = 0; + unsigned int nr; + + if (vcpu->arch.exception.injected) { + nr = vcpu->arch.exception.nr; + exit_int_info = nr | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_EXEPT; + + if (vcpu->arch.exception.has_error_code) { + exit_int_info |= SVM_EVTINJ_VALID_ERR; + nested_vmcb->control.exit_int_info_err = + vcpu->arch.exception.error_code; + } + + } else if (vcpu->arch.nmi_injected) { + exit_int_info = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI; + } else if (vcpu->arch.interrupt.injected) { + nr = vcpu->arch.interrupt.nr; + exit_int_info = nr | SVM_EVTINJ_VALID; + + if (vcpu->arch.interrupt.soft) + exit_int_info |= SVM_EVTINJ_TYPE_SOFT; + else + exit_int_info |= SVM_EVTINJ_TYPE_INTR; + } + + nested_vmcb->control.exit_int_info = exit_int_info; +} + +static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *nested_vmcb) +{ /* Load the nested guest state */ svm->vmcb->save.es = nested_vmcb->save.es; svm->vmcb->save.cs = nested_vmcb->save.cs; @@ -249,14 +323,7 @@ void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa, svm_set_efer(&svm->vcpu, nested_vmcb->save.efer); svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0); svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4); - if (npt_enabled) { - svm->vmcb->save.cr3 = nested_vmcb->save.cr3; - svm->vcpu.arch.cr3 = nested_vmcb->save.cr3; - } else - (void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3); - - /* Guest paging mode is active - reset mmu */ - kvm_mmu_reset_context(&svm->vcpu); + (void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3); svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2; kvm_rax_write(&svm->vcpu, nested_vmcb->save.rax); @@ -270,38 +337,34 @@ void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa, svm->vmcb->save.dr7 = nested_vmcb->save.dr7; svm->vcpu.arch.dr6 = nested_vmcb->save.dr6; svm->vmcb->save.cpl = nested_vmcb->save.cpl; +} - svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL; - svm->nested.vmcb_iopm = nested_vmcb->control.iopm_base_pa & ~0x0fffULL; +static void nested_prepare_vmcb_control(struct vcpu_svm *svm) +{ + const u32 mask = V_INTR_MASKING_MASK | V_GIF_ENABLE_MASK | V_GIF_MASK; + if (svm->nested.ctl.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) + nested_svm_init_mmu_context(&svm->vcpu); - /* cache intercepts */ - svm->nested.intercept_cr = nested_vmcb->control.intercept_cr; - svm->nested.intercept_dr = nested_vmcb->control.intercept_dr; - svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions; - svm->nested.intercept = nested_vmcb->control.intercept; + /* Guest paging mode is active - reset mmu */ + kvm_mmu_reset_context(&svm->vcpu); - svm_flush_tlb(&svm->vcpu, true); - svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK; - if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK) - svm->vcpu.arch.hflags |= HF_VINTR_MASK; - else - svm->vcpu.arch.hflags &= ~HF_VINTR_MASK; + svm_flush_tlb(&svm->vcpu); - svm->vcpu.arch.tsc_offset += nested_vmcb->control.tsc_offset; - svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset; + svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset = + svm->vcpu.arch.l1_tsc_offset + svm->nested.ctl.tsc_offset; - svm->vmcb->control.virt_ext = nested_vmcb->control.virt_ext; - svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; - svm->vmcb->control.int_state = nested_vmcb->control.int_state; - svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; - svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; + svm->vmcb->control.int_ctl = + (svm->nested.ctl.int_ctl & ~mask) | + (svm->nested.hsave->control.int_ctl & mask); - svm->vmcb->control.pause_filter_count = - nested_vmcb->control.pause_filter_count; - svm->vmcb->control.pause_filter_thresh = - nested_vmcb->control.pause_filter_thresh; + svm->vmcb->control.virt_ext = svm->nested.ctl.virt_ext; + svm->vmcb->control.int_vector = svm->nested.ctl.int_vector; + svm->vmcb->control.int_state = svm->nested.ctl.int_state; + svm->vmcb->control.event_inj = svm->nested.ctl.event_inj; + svm->vmcb->control.event_inj_err = svm->nested.ctl.event_inj_err; - kvm_vcpu_unmap(&svm->vcpu, map, true); + svm->vmcb->control.pause_filter_count = svm->nested.ctl.pause_filter_count; + svm->vmcb->control.pause_filter_thresh = svm->nested.ctl.pause_filter_thresh; /* Enter Guest-Mode */ enter_guest_mode(&svm->vcpu); @@ -312,25 +375,18 @@ void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa, */ recalc_intercepts(svm); - svm->nested.vmcb = vmcb_gpa; + mark_all_dirty(svm->vmcb); +} - /* - * If L1 had a pending IRQ/NMI before executing VMRUN, - * which wasn't delivered because it was disallowed (e.g. - * interrupts disabled), L0 needs to evaluate if this pending - * event should cause an exit from L2 to L1 or be delivered - * directly to L2. - * - * Usually this would be handled by the processor noticing an - * IRQ/NMI window request. However, VMRUN can unblock interrupts - * by implicitly setting GIF, so force L0 to perform pending event - * evaluation by requesting a KVM_REQ_EVENT. - */ - enable_gif(svm); - if (unlikely(evaluate_pending_interrupts)) - kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); +void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa, + struct vmcb *nested_vmcb) +{ + svm->nested.vmcb = vmcb_gpa; + load_nested_vmcb_control(svm, &nested_vmcb->control); + nested_prepare_vmcb_save(svm, nested_vmcb); + nested_prepare_vmcb_control(svm); - mark_all_dirty(svm->vmcb); + svm_set_gif(svm, true); } int nested_svm_vmrun(struct vcpu_svm *svm) @@ -342,8 +398,12 @@ int nested_svm_vmrun(struct vcpu_svm *svm) struct kvm_host_map map; u64 vmcb_gpa; - vmcb_gpa = svm->vmcb->save.rax; + if (is_smm(&svm->vcpu)) { + kvm_queue_exception(&svm->vcpu, UD_VECTOR); + return 1; + } + vmcb_gpa = svm->vmcb->save.rax; ret = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb_gpa), &map); if (ret == -EINVAL) { kvm_inject_gp(&svm->vcpu, 0); @@ -361,10 +421,7 @@ int nested_svm_vmrun(struct vcpu_svm *svm) nested_vmcb->control.exit_code_hi = 0; nested_vmcb->control.exit_info_1 = 0; nested_vmcb->control.exit_info_2 = 0; - - kvm_vcpu_unmap(&svm->vcpu, &map, true); - - return ret; + goto out; } trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb_gpa, @@ -404,9 +461,10 @@ int nested_svm_vmrun(struct vcpu_svm *svm) else hsave->save.cr3 = kvm_read_cr3(&svm->vcpu); - copy_vmcb_control_area(hsave, vmcb); + copy_vmcb_control_area(&hsave->control, &vmcb->control); - enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb, &map); + svm->nested.nested_run_pending = 1; + enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb); if (!nested_svm_vmrun_msrpm(svm)) { svm->vmcb->control.exit_code = SVM_EXIT_ERR; @@ -417,6 +475,9 @@ int nested_svm_vmrun(struct vcpu_svm *svm) nested_svm_vmexit(svm); } +out: + kvm_vcpu_unmap(&svm->vcpu, &map, true); + return ret; } @@ -444,13 +505,6 @@ int nested_svm_vmexit(struct vcpu_svm *svm) struct vmcb *vmcb = svm->vmcb; struct kvm_host_map map; - trace_kvm_nested_vmexit_inject(vmcb->control.exit_code, - vmcb->control.exit_info_1, - vmcb->control.exit_info_2, - vmcb->control.exit_int_info, - vmcb->control.exit_int_info_err, - KVM_ISA_SVM); - rc = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(svm->nested.vmcb), &map); if (rc) { if (rc == -EINVAL) @@ -463,9 +517,13 @@ int nested_svm_vmexit(struct vcpu_svm *svm) /* Exit Guest-Mode */ leave_guest_mode(&svm->vcpu); svm->nested.vmcb = 0; + WARN_ON_ONCE(svm->nested.nested_run_pending); + + /* in case we halted in L2 */ + svm->vcpu.arch.mp_state = KVM_MP_STATE_RUNNABLE; /* Give the current vmcb to the guest */ - disable_gif(svm); + svm_set_gif(svm, false); nested_vmcb->save.es = vmcb->save.es; nested_vmcb->save.cs = vmcb->save.cs; @@ -479,62 +537,42 @@ int nested_svm_vmexit(struct vcpu_svm *svm) nested_vmcb->save.cr2 = vmcb->save.cr2; nested_vmcb->save.cr4 = svm->vcpu.arch.cr4; nested_vmcb->save.rflags = kvm_get_rflags(&svm->vcpu); - nested_vmcb->save.rip = vmcb->save.rip; - nested_vmcb->save.rsp = vmcb->save.rsp; - nested_vmcb->save.rax = vmcb->save.rax; + nested_vmcb->save.rip = kvm_rip_read(&svm->vcpu); + nested_vmcb->save.rsp = kvm_rsp_read(&svm->vcpu); + nested_vmcb->save.rax = kvm_rax_read(&svm->vcpu); nested_vmcb->save.dr7 = vmcb->save.dr7; nested_vmcb->save.dr6 = svm->vcpu.arch.dr6; nested_vmcb->save.cpl = vmcb->save.cpl; - nested_vmcb->control.int_ctl = vmcb->control.int_ctl; - nested_vmcb->control.int_vector = vmcb->control.int_vector; nested_vmcb->control.int_state = vmcb->control.int_state; nested_vmcb->control.exit_code = vmcb->control.exit_code; nested_vmcb->control.exit_code_hi = vmcb->control.exit_code_hi; nested_vmcb->control.exit_info_1 = vmcb->control.exit_info_1; nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2; - nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info; - nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err; + + if (nested_vmcb->control.exit_code != SVM_EXIT_ERR) + nested_vmcb_save_pending_event(svm, nested_vmcb); if (svm->nrips_enabled) nested_vmcb->control.next_rip = vmcb->control.next_rip; - /* - * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have - * to make sure that we do not lose injected events. So check event_inj - * here and copy it to exit_int_info if it is valid. - * Exit_int_info and event_inj can't be both valid because the case - * below only happens on a VMRUN instruction intercept which has - * no valid exit_int_info set. - */ - if (vmcb->control.event_inj & SVM_EVTINJ_VALID) { - struct vmcb_control_area *nc = &nested_vmcb->control; - - nc->exit_int_info = vmcb->control.event_inj; - nc->exit_int_info_err = vmcb->control.event_inj_err; - } - - nested_vmcb->control.tlb_ctl = 0; - nested_vmcb->control.event_inj = 0; - nested_vmcb->control.event_inj_err = 0; + nested_vmcb->control.int_ctl = svm->nested.ctl.int_ctl; + nested_vmcb->control.tlb_ctl = svm->nested.ctl.tlb_ctl; + nested_vmcb->control.event_inj = svm->nested.ctl.event_inj; + nested_vmcb->control.event_inj_err = svm->nested.ctl.event_inj_err; nested_vmcb->control.pause_filter_count = svm->vmcb->control.pause_filter_count; nested_vmcb->control.pause_filter_thresh = svm->vmcb->control.pause_filter_thresh; - /* We always set V_INTR_MASKING and remember the old value in hflags */ - if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) - nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK; - /* Restore the original control entries */ - copy_vmcb_control_area(vmcb, hsave); + copy_vmcb_control_area(&vmcb->control, &hsave->control); - svm->vcpu.arch.tsc_offset = svm->vmcb->control.tsc_offset; - kvm_clear_exception_queue(&svm->vcpu); - kvm_clear_interrupt_queue(&svm->vcpu); + svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset = + svm->vcpu.arch.l1_tsc_offset; - svm->nested.nested_cr3 = 0; + svm->nested.ctl.nested_cr3 = 0; /* Restore selected save entries */ svm->vmcb->save.es = hsave->save.es; @@ -562,6 +600,13 @@ int nested_svm_vmexit(struct vcpu_svm *svm) mark_all_dirty(svm->vmcb); + trace_kvm_nested_vmexit_inject(nested_vmcb->control.exit_code, + nested_vmcb->control.exit_info_1, + nested_vmcb->control.exit_info_2, + nested_vmcb->control.exit_int_info, + nested_vmcb->control.exit_int_info_err, + KVM_ISA_SVM); + kvm_vcpu_unmap(&svm->vcpu, &map, true); nested_svm_uninit_mmu_context(&svm->vcpu); @@ -579,12 +624,28 @@ int nested_svm_vmexit(struct vcpu_svm *svm) return 0; } +/* + * Forcibly leave nested mode in order to be able to reset the VCPU later on. + */ +void svm_leave_nested(struct vcpu_svm *svm) +{ + if (is_guest_mode(&svm->vcpu)) { + struct vmcb *hsave = svm->nested.hsave; + struct vmcb *vmcb = svm->vmcb; + + svm->nested.nested_run_pending = 0; + leave_guest_mode(&svm->vcpu); + copy_vmcb_control_area(&vmcb->control, &hsave->control); + nested_svm_uninit_mmu_context(&svm->vcpu); + } +} + static int nested_svm_exit_handled_msr(struct vcpu_svm *svm) { u32 offset, msr, value; int write, mask; - if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT))) + if (!(svm->nested.ctl.intercept & (1ULL << INTERCEPT_MSR_PROT))) return NESTED_EXIT_HOST; msr = svm->vcpu.arch.regs[VCPU_REGS_RCX]; @@ -598,56 +659,12 @@ static int nested_svm_exit_handled_msr(struct vcpu_svm *svm) /* Offset is in 32 bit units but need in 8 bit units */ offset *= 4; - if (kvm_vcpu_read_guest(&svm->vcpu, svm->nested.vmcb_msrpm + offset, &value, 4)) + if (kvm_vcpu_read_guest(&svm->vcpu, svm->nested.ctl.msrpm_base_pa + offset, &value, 4)) return NESTED_EXIT_DONE; return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; } -/* DB exceptions for our internal use must not cause vmexit */ -static int nested_svm_intercept_db(struct vcpu_svm *svm) -{ - unsigned long dr6 = svm->vmcb->save.dr6; - - /* Always catch it and pass it to userspace if debugging. */ - if (svm->vcpu.guest_debug & - (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) - return NESTED_EXIT_HOST; - - /* if we're not singlestepping, it's not ours */ - if (!svm->nmi_singlestep) - goto reflected_db; - - /* if it's not a singlestep exception, it's not ours */ - if (!(dr6 & DR6_BS)) - goto reflected_db; - - /* if the guest is singlestepping, it should get the vmexit */ - if (svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF) { - disable_nmi_singlestep(svm); - goto reflected_db; - } - - /* it's ours, the nested hypervisor must not see this one */ - return NESTED_EXIT_HOST; - -reflected_db: - /* - * Synchronize guest DR6 here just like in kvm_deliver_exception_payload; - * it will be moved into the nested VMCB by nested_svm_vmexit. Once - * exceptions will be moved to svm_check_nested_events, all this stuff - * will just go away and we could just return NESTED_EXIT_HOST - * unconditionally. db_interception will queue the exception, which - * will be processed by svm_check_nested_events if a nested vmexit is - * required, and we will just use kvm_deliver_exception_payload to copy - * the payload to DR6 before vmexit. - */ - WARN_ON(svm->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT); - svm->vcpu.arch.dr6 &= ~(DR_TRAP_BITS | DR6_RTM); - svm->vcpu.arch.dr6 |= dr6 & ~DR6_FIXED_1; - return NESTED_EXIT_DONE; -} - static int nested_svm_intercept_ioio(struct vcpu_svm *svm) { unsigned port, size, iopm_len; @@ -655,13 +672,13 @@ static int nested_svm_intercept_ioio(struct vcpu_svm *svm) u8 start_bit; u64 gpa; - if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT))) + if (!(svm->nested.ctl.intercept & (1ULL << INTERCEPT_IOIO_PROT))) return NESTED_EXIT_HOST; port = svm->vmcb->control.exit_info_1 >> 16; size = (svm->vmcb->control.exit_info_1 & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; - gpa = svm->nested.vmcb_iopm + (port / 8); + gpa = svm->nested.ctl.iopm_base_pa + (port / 8); start_bit = port % 8; iopm_len = (start_bit + size > 8) ? 2 : 1; mask = (0xf >> (4 - size)) << start_bit; @@ -687,31 +704,23 @@ static int nested_svm_intercept(struct vcpu_svm *svm) break; case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: { u32 bit = 1U << (exit_code - SVM_EXIT_READ_CR0); - if (svm->nested.intercept_cr & bit) + if (svm->nested.ctl.intercept_cr & bit) vmexit = NESTED_EXIT_DONE; break; } case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: { u32 bit = 1U << (exit_code - SVM_EXIT_READ_DR0); - if (svm->nested.intercept_dr & bit) + if (svm->nested.ctl.intercept_dr & bit) vmexit = NESTED_EXIT_DONE; break; } case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: { - u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE); - if (svm->nested.intercept_exceptions & excp_bits) { - if (exit_code == SVM_EXIT_EXCP_BASE + DB_VECTOR) - vmexit = nested_svm_intercept_db(svm); - else if (exit_code == SVM_EXIT_EXCP_BASE + BP_VECTOR && - svm->vcpu.guest_debug & KVM_GUESTDBG_USE_SW_BP) - vmexit = NESTED_EXIT_HOST; - else - vmexit = NESTED_EXIT_DONE; - } - /* async page fault always cause vmexit */ - else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) && - svm->vcpu.arch.exception.nested_apf != 0) - vmexit = NESTED_EXIT_DONE; + /* + * Host-intercepted exceptions have been checked already in + * nested_svm_exit_special. There is nothing to do here, + * the vmexit is injected by svm_check_nested_events. + */ + vmexit = NESTED_EXIT_DONE; break; } case SVM_EXIT_ERR: { @@ -720,7 +729,7 @@ static int nested_svm_intercept(struct vcpu_svm *svm) } default: { u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR); - if (svm->nested.intercept & exit_bits) + if (svm->nested.ctl.intercept & exit_bits) vmexit = NESTED_EXIT_DONE; } } @@ -756,62 +765,140 @@ int nested_svm_check_permissions(struct vcpu_svm *svm) return 0; } -int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, - bool has_error_code, u32 error_code) +static bool nested_exit_on_exception(struct vcpu_svm *svm) { - int vmexit; + unsigned int nr = svm->vcpu.arch.exception.nr; - if (!is_guest_mode(&svm->vcpu)) - return 0; + return (svm->nested.ctl.intercept_exceptions & (1 << nr)); +} - vmexit = nested_svm_intercept(svm); - if (vmexit != NESTED_EXIT_DONE) - return 0; +static void nested_svm_inject_exception_vmexit(struct vcpu_svm *svm) +{ + unsigned int nr = svm->vcpu.arch.exception.nr; svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr; svm->vmcb->control.exit_code_hi = 0; - svm->vmcb->control.exit_info_1 = error_code; + + if (svm->vcpu.arch.exception.has_error_code) + svm->vmcb->control.exit_info_1 = svm->vcpu.arch.exception.error_code; /* * EXITINFO2 is undefined for all exception intercepts other * than #PF. */ - if (svm->vcpu.arch.exception.nested_apf) - svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token; - else if (svm->vcpu.arch.exception.has_payload) - svm->vmcb->control.exit_info_2 = svm->vcpu.arch.exception.payload; - else - svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2; + if (nr == PF_VECTOR) { + if (svm->vcpu.arch.exception.nested_apf) + svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token; + else if (svm->vcpu.arch.exception.has_payload) + svm->vmcb->control.exit_info_2 = svm->vcpu.arch.exception.payload; + else + svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2; + } else if (nr == DB_VECTOR) { + /* See inject_pending_event. */ + kvm_deliver_exception_payload(&svm->vcpu); + if (svm->vcpu.arch.dr7 & DR7_GD) { + svm->vcpu.arch.dr7 &= ~DR7_GD; + kvm_update_dr7(&svm->vcpu); + } + } else + WARN_ON(svm->vcpu.arch.exception.has_payload); - svm->nested.exit_required = true; - return vmexit; + nested_svm_vmexit(svm); +} + +static void nested_svm_smi(struct vcpu_svm *svm) +{ + svm->vmcb->control.exit_code = SVM_EXIT_SMI; + svm->vmcb->control.exit_info_1 = 0; + svm->vmcb->control.exit_info_2 = 0; + + nested_svm_vmexit(svm); +} + +static void nested_svm_nmi(struct vcpu_svm *svm) +{ + svm->vmcb->control.exit_code = SVM_EXIT_NMI; + svm->vmcb->control.exit_info_1 = 0; + svm->vmcb->control.exit_info_2 = 0; + + nested_svm_vmexit(svm); } static void nested_svm_intr(struct vcpu_svm *svm) { + trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip); + svm->vmcb->control.exit_code = SVM_EXIT_INTR; svm->vmcb->control.exit_info_1 = 0; svm->vmcb->control.exit_info_2 = 0; - /* nested_svm_vmexit this gets called afterwards from handle_exit */ - svm->nested.exit_required = true; - trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip); + nested_svm_vmexit(svm); } -static bool nested_exit_on_intr(struct vcpu_svm *svm) +static inline bool nested_exit_on_init(struct vcpu_svm *svm) { - return (svm->nested.intercept & 1ULL); + return (svm->nested.ctl.intercept & (1ULL << INTERCEPT_INIT)); +} + +static void nested_svm_init(struct vcpu_svm *svm) +{ + svm->vmcb->control.exit_code = SVM_EXIT_INIT; + svm->vmcb->control.exit_info_1 = 0; + svm->vmcb->control.exit_info_2 = 0; + + nested_svm_vmexit(svm); } -int svm_check_nested_events(struct kvm_vcpu *vcpu) + +static int svm_check_nested_events(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); bool block_nested_events = - kvm_event_needs_reinjection(vcpu) || svm->nested.exit_required; + kvm_event_needs_reinjection(vcpu) || svm->nested.nested_run_pending; + struct kvm_lapic *apic = vcpu->arch.apic; + + if (lapic_in_kernel(vcpu) && + test_bit(KVM_APIC_INIT, &apic->pending_events)) { + if (block_nested_events) + return -EBUSY; + if (!nested_exit_on_init(svm)) + return 0; + nested_svm_init(svm); + return 0; + } + + if (vcpu->arch.exception.pending) { + if (block_nested_events) + return -EBUSY; + if (!nested_exit_on_exception(svm)) + return 0; + nested_svm_inject_exception_vmexit(svm); + return 0; + } + + if (vcpu->arch.smi_pending && !svm_smi_blocked(vcpu)) { + if (block_nested_events) + return -EBUSY; + if (!nested_exit_on_smi(svm)) + return 0; + nested_svm_smi(svm); + return 0; + } + + if (vcpu->arch.nmi_pending && !svm_nmi_blocked(vcpu)) { + if (block_nested_events) + return -EBUSY; + if (!nested_exit_on_nmi(svm)) + return 0; + nested_svm_nmi(svm); + return 0; + } - if (kvm_cpu_has_interrupt(vcpu) && nested_exit_on_intr(svm)) { + if (kvm_cpu_has_interrupt(vcpu) && !svm_interrupt_blocked(vcpu)) { if (block_nested_events) return -EBUSY; + if (!nested_exit_on_intr(svm)) + return 0; nested_svm_intr(svm); return 0; } @@ -826,21 +913,170 @@ int nested_svm_exit_special(struct vcpu_svm *svm) switch (exit_code) { case SVM_EXIT_INTR: case SVM_EXIT_NMI: - case SVM_EXIT_EXCP_BASE + MC_VECTOR: - return NESTED_EXIT_HOST; case SVM_EXIT_NPF: - /* For now we are always handling NPFs when using them */ - if (npt_enabled) + return NESTED_EXIT_HOST; + case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: { + u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE); + + if (get_host_vmcb(svm)->control.intercept_exceptions & excp_bits) return NESTED_EXIT_HOST; - break; - case SVM_EXIT_EXCP_BASE + PF_VECTOR: - /* When we're shadowing, trap PFs, but not async PF */ - if (!npt_enabled && svm->vcpu.arch.apf.host_apf_reason == 0) + else if (exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR && + svm->vcpu.arch.apf.host_apf_flags) + /* Trap async PF even if not shadowing */ return NESTED_EXIT_HOST; break; + } default: break; } return NESTED_EXIT_CONTINUE; } + +static int svm_get_nested_state(struct kvm_vcpu *vcpu, + struct kvm_nested_state __user *user_kvm_nested_state, + u32 user_data_size) +{ + struct vcpu_svm *svm; + struct kvm_nested_state kvm_state = { + .flags = 0, + .format = KVM_STATE_NESTED_FORMAT_SVM, + .size = sizeof(kvm_state), + }; + struct vmcb __user *user_vmcb = (struct vmcb __user *) + &user_kvm_nested_state->data.svm[0]; + + if (!vcpu) + return kvm_state.size + KVM_STATE_NESTED_SVM_VMCB_SIZE; + + svm = to_svm(vcpu); + + if (user_data_size < kvm_state.size) + goto out; + + /* First fill in the header and copy it out. */ + if (is_guest_mode(vcpu)) { + kvm_state.hdr.svm.vmcb_pa = svm->nested.vmcb; + kvm_state.size += KVM_STATE_NESTED_SVM_VMCB_SIZE; + kvm_state.flags |= KVM_STATE_NESTED_GUEST_MODE; + + if (svm->nested.nested_run_pending) + kvm_state.flags |= KVM_STATE_NESTED_RUN_PENDING; + } + + if (gif_set(svm)) + kvm_state.flags |= KVM_STATE_NESTED_GIF_SET; + + if (copy_to_user(user_kvm_nested_state, &kvm_state, sizeof(kvm_state))) + return -EFAULT; + + if (!is_guest_mode(vcpu)) + goto out; + + /* + * Copy over the full size of the VMCB rather than just the size + * of the structs. + */ + if (clear_user(user_vmcb, KVM_STATE_NESTED_SVM_VMCB_SIZE)) + return -EFAULT; + if (copy_to_user(&user_vmcb->control, &svm->nested.ctl, + sizeof(user_vmcb->control))) + return -EFAULT; + if (copy_to_user(&user_vmcb->save, &svm->nested.hsave->save, + sizeof(user_vmcb->save))) + return -EFAULT; + +out: + return kvm_state.size; +} + +static int svm_set_nested_state(struct kvm_vcpu *vcpu, + struct kvm_nested_state __user *user_kvm_nested_state, + struct kvm_nested_state *kvm_state) +{ + struct vcpu_svm *svm = to_svm(vcpu); + struct vmcb *hsave = svm->nested.hsave; + struct vmcb __user *user_vmcb = (struct vmcb __user *) + &user_kvm_nested_state->data.svm[0]; + struct vmcb_control_area ctl; + struct vmcb_save_area save; + u32 cr0; + + if (kvm_state->format != KVM_STATE_NESTED_FORMAT_SVM) + return -EINVAL; + + if (kvm_state->flags & ~(KVM_STATE_NESTED_GUEST_MODE | + KVM_STATE_NESTED_RUN_PENDING | + KVM_STATE_NESTED_GIF_SET)) + return -EINVAL; + + /* + * If in guest mode, vcpu->arch.efer actually refers to the L2 guest's + * EFER.SVME, but EFER.SVME still has to be 1 for VMRUN to succeed. + */ + if (!(vcpu->arch.efer & EFER_SVME)) { + /* GIF=1 and no guest mode are required if SVME=0. */ + if (kvm_state->flags != KVM_STATE_NESTED_GIF_SET) + return -EINVAL; + } + + /* SMM temporarily disables SVM, so we cannot be in guest mode. */ + if (is_smm(vcpu) && (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) + return -EINVAL; + + if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) { + svm_leave_nested(svm); + goto out_set_gif; + } + + if (!page_address_valid(vcpu, kvm_state->hdr.svm.vmcb_pa)) + return -EINVAL; + if (kvm_state->size < sizeof(*kvm_state) + KVM_STATE_NESTED_SVM_VMCB_SIZE) + return -EINVAL; + if (copy_from_user(&ctl, &user_vmcb->control, sizeof(ctl))) + return -EFAULT; + if (copy_from_user(&save, &user_vmcb->save, sizeof(save))) + return -EFAULT; + + if (!nested_vmcb_check_controls(&ctl)) + return -EINVAL; + + /* + * Processor state contains L2 state. Check that it is + * valid for guest mode (see nested_vmcb_checks). + */ + cr0 = kvm_read_cr0(vcpu); + if (((cr0 & X86_CR0_CD) == 0) && (cr0 & X86_CR0_NW)) + return -EINVAL; + + /* + * Validate host state saved from before VMRUN (see + * nested_svm_check_permissions). + * TODO: validate reserved bits for all saved state. + */ + if (!(save.cr0 & X86_CR0_PG)) + return -EINVAL; + + /* + * All checks done, we can enter guest mode. L1 control fields + * come from the nested save state. Guest state is already + * in the registers, the save area of the nested state instead + * contains saved L1 state. + */ + copy_vmcb_control_area(&hsave->control, &svm->vmcb->control); + hsave->save = save; + + svm->nested.vmcb = kvm_state->hdr.svm.vmcb_pa; + load_nested_vmcb_control(svm, &ctl); + nested_prepare_vmcb_control(svm); + +out_set_gif: + svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET)); + return 0; +} + +struct kvm_x86_nested_ops svm_nested_ops = { + .check_events = svm_check_nested_events, + .get_state = svm_get_nested_state, + .set_state = svm_set_nested_state, +}; diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index ce0b10fe5e2b..035da07500e8 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -215,21 +215,22 @@ static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr) return pmc; } -static int amd_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data) +static int amd_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); struct kvm_pmc *pmc; + u32 msr = msr_info->index; /* MSR_PERFCTRn */ pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER); if (pmc) { - *data = pmc_read_counter(pmc); + msr_info->data = pmc_read_counter(pmc); return 0; } /* MSR_EVNTSELn */ pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_EVNTSEL); if (pmc) { - *data = pmc->eventsel; + msr_info->data = pmc->eventsel; return 0; } diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index a862c768fd54..9e333b91ff78 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -33,6 +33,7 @@ #include <asm/debugreg.h> #include <asm/kvm_para.h> #include <asm/irq_remapping.h> +#include <asm/mce.h> #include <asm/spec-ctrl.h> #include <asm/cpu_device_id.h> @@ -264,6 +265,7 @@ static int get_npt_level(struct kvm_vcpu *vcpu) void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) { + struct vcpu_svm *svm = to_svm(vcpu); vcpu->arch.efer = efer; if (!npt_enabled) { @@ -274,8 +276,13 @@ void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) efer &= ~EFER_LME; } - to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME; - mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR); + if (!(efer & EFER_SVME)) { + svm_leave_nested(svm); + svm_set_gif(svm, true); + } + + svm->vmcb->save.efer = efer | EFER_SVME; + mark_dirty(svm->vmcb, VMCB_CR); } static int is_external_interrupt(u32 info) @@ -318,9 +325,6 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu) if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP)) return 0; } else { - if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE) - pr_err("%s: ip 0x%lx next 0x%llx\n", - __func__, kvm_rip_read(vcpu), svm->next_rip); kvm_rip_write(vcpu, svm->next_rip); } svm_set_interrupt_shadow(vcpu, 0); @@ -333,17 +337,8 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); unsigned nr = vcpu->arch.exception.nr; bool has_error_code = vcpu->arch.exception.has_error_code; - bool reinject = vcpu->arch.exception.injected; u32 error_code = vcpu->arch.exception.error_code; - /* - * If we are within a nested VM we'd better #VMEXIT and let the guest - * handle the exception - */ - if (!reinject && - nested_svm_check_exception(svm, nr, has_error_code, error_code)) - return; - kvm_deliver_exception_payload(&svm->vcpu); if (nr == BP_VECTOR && !nrips) { @@ -780,7 +775,7 @@ static __init void svm_adjust_mmio_mask(void) */ mask = (mask_bit < 52) ? rsvd_bits(mask_bit, 51) | PT_PRESENT_MASK : 0; - kvm_mmu_set_mmio_spte_mask(mask, mask, PT_WRITABLE_MASK | PT_USER_MASK); + kvm_mmu_set_mmio_spte_mask(mask, PT_WRITABLE_MASK | PT_USER_MASK); } static void svm_hardware_teardown(void) @@ -890,7 +885,7 @@ static __init int svm_hardware_setup(void) if (npt_enabled && !npt) npt_enabled = false; - kvm_configure_mmu(npt_enabled, PT_PDPE_LEVEL); + kvm_configure_mmu(npt_enabled, PG_LEVEL_1G); pr_info("kvm: Nested Paging %sabled\n", npt_enabled ? "en" : "dis"); if (nrips) { @@ -953,16 +948,6 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type) seg->base = 0; } -static u64 svm_read_l1_tsc_offset(struct kvm_vcpu *vcpu) -{ - struct vcpu_svm *svm = to_svm(vcpu); - - if (is_guest_mode(vcpu)) - return svm->nested.hsave->control.tsc_offset; - - return vcpu->arch.tsc_offset; -} - static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) { struct vcpu_svm *svm = to_svm(vcpu); @@ -1208,6 +1193,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) svm->avic_is_running = true; svm->nested.hsave = page_address(hsave_page); + clear_page(svm->nested.hsave); svm->msrpm = page_address(msrpm_pages); svm_vcpu_init_msrpm(svm->msrpm); @@ -1364,12 +1350,13 @@ static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) } } -static inline void svm_enable_vintr(struct vcpu_svm *svm) +static void svm_set_vintr(struct vcpu_svm *svm) { struct vmcb_control_area *control; /* The following fields are ignored when AVIC is enabled */ WARN_ON(kvm_vcpu_apicv_active(&svm->vcpu)); + set_intercept(svm, INTERCEPT_VINTR); /* * This is just a dummy VINTR to actually cause a vmexit to happen. @@ -1383,18 +1370,19 @@ static inline void svm_enable_vintr(struct vcpu_svm *svm) mark_dirty(svm->vmcb, VMCB_INTR); } -static void svm_set_vintr(struct vcpu_svm *svm) -{ - set_intercept(svm, INTERCEPT_VINTR); - if (is_intercept(svm, INTERCEPT_VINTR)) - svm_enable_vintr(svm); -} - static void svm_clear_vintr(struct vcpu_svm *svm) { + const u32 mask = V_TPR_MASK | V_GIF_ENABLE_MASK | V_GIF_MASK | V_INTR_MASKING_MASK; clr_intercept(svm, INTERCEPT_VINTR); - svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; + /* Drop int_ctl fields related to VINTR injection. */ + svm->vmcb->control.int_ctl &= mask; + if (is_guest_mode(&svm->vcpu)) { + WARN_ON((svm->vmcb->control.int_ctl & V_TPR_MASK) != + (svm->nested.ctl.int_ctl & V_TPR_MASK)); + svm->vmcb->control.int_ctl |= svm->nested.ctl.int_ctl & ~mask; + } + mark_dirty(svm->vmcb, VMCB_INTR); } @@ -1533,14 +1521,6 @@ static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) mark_dirty(svm->vmcb, VMCB_DT); } -static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) -{ -} - -static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) -{ -} - static void update_cr0_intercept(struct vcpu_svm *svm) { ulong gcr0 = svm->vcpu.arch.cr0; @@ -1603,7 +1583,7 @@ int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) return 1; if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE)) - svm_flush_tlb(vcpu, true); + svm_flush_tlb(vcpu); vcpu->arch.cr4 = cr4; if (!npt_enabled) @@ -1842,6 +1822,25 @@ static bool is_erratum_383(void) return true; } +/* + * Trigger machine check on the host. We assume all the MSRs are already set up + * by the CPU and that we still run on the same CPU as the MCE occurred on. + * We pass a fake environment to the machine check handler because we want + * the guest to be always treated like user space, no matter what context + * it used internally. + */ +static void kvm_machine_check(void) +{ +#if defined(CONFIG_X86_MCE) + struct pt_regs regs = { + .cs = 3, /* Fake ring 3 no matter what the guest ran on */ + .flags = X86_EFLAGS_IF, + }; + + do_machine_check(®s, 0); +#endif +} + static void svm_handle_mce(struct vcpu_svm *svm) { if (is_erratum_383()) { @@ -1860,11 +1859,7 @@ static void svm_handle_mce(struct vcpu_svm *svm) * On an #MC intercept the MCE handler is not called automatically in * the host. So do it by hand here. */ - asm volatile ( - "int $0x12\n"); - /* not sure if we ever come back to this point */ - - return; + kvm_machine_check(); } static int mc_interception(struct vcpu_svm *svm) @@ -1993,6 +1988,38 @@ static int vmrun_interception(struct vcpu_svm *svm) return nested_svm_vmrun(svm); } +void svm_set_gif(struct vcpu_svm *svm, bool value) +{ + if (value) { + /* + * If VGIF is enabled, the STGI intercept is only added to + * detect the opening of the SMI/NMI window; remove it now. + * Likewise, clear the VINTR intercept, we will set it + * again while processing KVM_REQ_EVENT if needed. + */ + if (vgif_enabled(svm)) + clr_intercept(svm, INTERCEPT_STGI); + if (is_intercept(svm, SVM_EXIT_VINTR)) + svm_clear_vintr(svm); + + enable_gif(svm); + if (svm->vcpu.arch.smi_pending || + svm->vcpu.arch.nmi_pending || + kvm_cpu_has_injectable_intr(&svm->vcpu)) + kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); + } else { + disable_gif(svm); + + /* + * After a CLGI no interrupts should come. But if vGIF is + * in use, we still rely on the VINTR intercept (rather than + * STGI) to detect an open interrupt window. + */ + if (!vgif_enabled(svm)) + svm_clear_vintr(svm); + } +} + static int stgi_interception(struct vcpu_svm *svm) { int ret; @@ -2000,18 +2027,8 @@ static int stgi_interception(struct vcpu_svm *svm) if (nested_svm_check_permissions(svm)) return 1; - /* - * If VGIF is enabled, the STGI intercept is only added to - * detect the opening of the SMI/NMI window; remove it now. - */ - if (vgif_enabled(svm)) - clr_intercept(svm, INTERCEPT_STGI); - ret = kvm_skip_emulated_instruction(&svm->vcpu); - kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); - - enable_gif(svm); - + svm_set_gif(svm, true); return ret; } @@ -2023,13 +2040,7 @@ static int clgi_interception(struct vcpu_svm *svm) return 1; ret = kvm_skip_emulated_instruction(&svm->vcpu); - - disable_gif(svm); - - /* After a CLGI no interrupts should come */ - if (!kvm_vcpu_apicv_active(&svm->vcpu)) - svm_clear_vintr(svm); - + svm_set_gif(svm, false); return ret; } @@ -2193,7 +2204,7 @@ static bool check_selective_cr0_intercepted(struct vcpu_svm *svm, bool ret = false; u64 intercept; - intercept = svm->nested.intercept; + intercept = svm->nested.ctl.intercept; if (!is_guest_mode(&svm->vcpu) || (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0)))) @@ -2671,8 +2682,6 @@ static int interrupt_window_interception(struct vcpu_svm *svm) */ svm_toggle_avic_for_irq_window(&svm->vcpu, true); - svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; - mark_dirty(svm->vmcb, VMCB_INTR); ++svm->vcpu.stat.irq_window_exits; return 1; } @@ -2898,8 +2907,7 @@ static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) *info2 = control->exit_info_2; } -static int handle_exit(struct kvm_vcpu *vcpu, - enum exit_fastpath_completion exit_fastpath) +static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) { struct vcpu_svm *svm = to_svm(vcpu); struct kvm_run *kvm_run = vcpu->run; @@ -2912,12 +2920,7 @@ static int handle_exit(struct kvm_vcpu *vcpu, if (npt_enabled) vcpu->arch.cr3 = svm->vmcb->save.cr3; - if (unlikely(svm->nested.exit_required)) { - nested_svm_vmexit(svm); - svm->nested.exit_required = false; - - return 1; - } + svm_complete_interrupts(svm); if (is_guest_mode(vcpu)) { int vmexit; @@ -2938,8 +2941,6 @@ static int handle_exit(struct kvm_vcpu *vcpu, return 1; } - svm_complete_interrupts(svm); - if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; kvm_run->fail_entry.hardware_entry_failure_reason @@ -2957,10 +2958,10 @@ static int handle_exit(struct kvm_vcpu *vcpu, __func__, svm->vmcb->control.exit_int_info, exit_code); - if (exit_fastpath == EXIT_FASTPATH_SKIP_EMUL_INS) { - kvm_skip_emulated_instruction(vcpu); + if (exit_fastpath != EXIT_FASTPATH_NONE) return 1; - } else if (exit_code >= ARRAY_SIZE(svm_exit_handlers) + + if (exit_code >= ARRAY_SIZE(svm_exit_handlers) || !svm_exit_handlers[exit_code]) { vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%x\n", exit_code); dump_vmcb(vcpu); @@ -3049,18 +3050,37 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) set_cr_intercept(svm, INTERCEPT_CR8_WRITE); } -static int svm_nmi_allowed(struct kvm_vcpu *vcpu) +bool svm_nmi_blocked(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); struct vmcb *vmcb = svm->vmcb; - int ret; - ret = !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) && - !(svm->vcpu.arch.hflags & HF_NMI_MASK); - ret = ret && gif_set(svm) && nested_svm_nmi(svm); + bool ret; + + if (!gif_set(svm)) + return true; + + if (is_guest_mode(vcpu) && nested_exit_on_nmi(svm)) + return false; + + ret = (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) || + (svm->vcpu.arch.hflags & HF_NMI_MASK); return ret; } +static int svm_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection) +{ + struct vcpu_svm *svm = to_svm(vcpu); + if (svm->nested.nested_run_pending) + return -EBUSY; + + /* An NMI must not be injected into L2 if it's supposed to VM-Exit. */ + if (for_injection && is_guest_mode(vcpu) && nested_exit_on_nmi(svm)) + return -EBUSY; + + return !svm_nmi_blocked(vcpu); +} + static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -3081,19 +3101,46 @@ static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) } } -static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) +bool svm_interrupt_blocked(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); struct vmcb *vmcb = svm->vmcb; - if (!gif_set(svm) || - (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)) - return 0; + if (!gif_set(svm)) + return true; - if (is_guest_mode(vcpu) && (svm->vcpu.arch.hflags & HF_VINTR_MASK)) - return !!(svm->vcpu.arch.hflags & HF_HIF_MASK); - else - return !!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF); + if (is_guest_mode(vcpu)) { + /* As long as interrupts are being delivered... */ + if ((svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK) + ? !(svm->nested.hsave->save.rflags & X86_EFLAGS_IF) + : !(kvm_get_rflags(vcpu) & X86_EFLAGS_IF)) + return true; + + /* ... vmexits aren't blocked by the interrupt shadow */ + if (nested_exit_on_intr(svm)) + return false; + } else { + if (!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF)) + return true; + } + + return (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK); +} + +static int svm_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection) +{ + struct vcpu_svm *svm = to_svm(vcpu); + if (svm->nested.nested_run_pending) + return -EBUSY; + + /* + * An IRQ must not be injected into L2 if it's supposed to VM-Exit, + * e.g. if the IRQ arrived asynchronously after checking nested events. + */ + if (for_injection && is_guest_mode(vcpu) && nested_exit_on_intr(svm)) + return -EBUSY; + + return !svm_interrupt_blocked(vcpu); } static void enable_irq_window(struct kvm_vcpu *vcpu) @@ -3134,9 +3181,6 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) return; /* STGI will cause a vm exit */ } - if (svm->nested.exit_required) - return; /* we're not going to run the guest yet */ - /* * Something prevents NMI from been injected. Single step over possible * problem (IRET or exception injection or interrupt shadow) @@ -3156,10 +3200,17 @@ static int svm_set_identity_map_addr(struct kvm *kvm, u64 ident_addr) return 0; } -void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa) +void svm_flush_tlb(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + /* + * Flush only the current ASID even if the TLB flush was invoked via + * kvm_flush_remote_tlbs(). Although flushing remote TLBs requires all + * ASIDs to be flushed, KVM uses a single ASID for L1 and L2, and + * unconditionally does a TLB flush on both nested VM-Enter and nested + * VM-Exit (via kvm_mmu_reset_context()). + */ if (static_cpu_has(X86_FEATURE_FLUSHBYASID)) svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID; else @@ -3279,10 +3330,21 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu) svm_complete_interrupts(svm); } +static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu) +{ + if (!is_guest_mode(vcpu) && + to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR && + to_svm(vcpu)->vmcb->control.exit_info_1) + return handle_fastpath_set_msr_irqoff(vcpu); + + return EXIT_FASTPATH_NONE; +} + void __svm_vcpu_run(unsigned long vmcb_pa, unsigned long *regs); -static void svm_vcpu_run(struct kvm_vcpu *vcpu) +static fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) { + fastpath_t exit_fastpath; struct vcpu_svm *svm = to_svm(vcpu); svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; @@ -3290,13 +3352,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; /* - * A vmexit emulation is required before the vcpu can be executed - * again. - */ - if (unlikely(svm->nested.exit_required)) - return; - - /* * Disable singlestep if we're injecting an interrupt/exception. * We don't want our modified rflags to be pushed on the stack where * we might not be able to easily reset them if we disabled NMI @@ -3387,6 +3442,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) stgi(); /* Any pending NMI will happen here */ + exit_fastpath = svm_exit_handlers_fastpath(vcpu); if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) kvm_after_interrupt(&svm->vcpu); @@ -3394,12 +3450,17 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) sync_cr8_to_lapic(vcpu); svm->next_rip = 0; + if (is_guest_mode(&svm->vcpu)) { + sync_nested_vmcb_control(svm); + svm->nested.nested_run_pending = 0; + } svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING; /* if exit due to PF check for async PF */ if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) - svm->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason(); + svm->vcpu.arch.apf.host_apf_flags = + kvm_read_and_reset_apf_flags(); if (npt_enabled) { vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR); @@ -3415,12 +3476,12 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) svm_handle_mce(svm); mark_all_clean(svm->vmcb); + return exit_fastpath; } static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long root) { struct vcpu_svm *svm = to_svm(vcpu); - bool update_guest_cr3 = true; unsigned long cr3; cr3 = __sme_set(root); @@ -3429,18 +3490,13 @@ static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long root) mark_dirty(svm->vmcb, VMCB_NPT); /* Loading L2's CR3 is handled by enter_svm_guest_mode. */ - if (is_guest_mode(vcpu)) - update_guest_cr3 = false; - else if (test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail)) - cr3 = vcpu->arch.cr3; - else /* CR3 is already up-to-date. */ - update_guest_cr3 = false; + if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail)) + return; + cr3 = vcpu->arch.cr3; } - if (update_guest_cr3) { - svm->vmcb->save.cr3 = cr3; - mark_dirty(svm->vmcb, VMCB_CR); - } + svm->vmcb->save.cr3 = cr3; + mark_dirty(svm->vmcb, VMCB_CR); } static int is_disabled(void) @@ -3475,7 +3531,7 @@ static bool svm_cpu_has_accelerated_tpr(void) return false; } -static bool svm_has_emulated_msr(int index) +static bool svm_has_emulated_msr(u32 index) { switch (index) { case MSR_IA32_MCG_EXT_CTL: @@ -3628,7 +3684,7 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu, info->intercept == x86_intercept_clts) break; - intercept = svm->nested.intercept; + intercept = svm->nested.ctl.intercept; if (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0))) break; @@ -3716,13 +3772,8 @@ out: return ret; } -static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu, - enum exit_fastpath_completion *exit_fastpath) +static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu) { - if (!is_guest_mode(vcpu) && - to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR && - to_svm(vcpu)->vmcb->control.exit_info_1) - *exit_fastpath = handle_fastpath_set_msr_irqoff(vcpu); } static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu) @@ -3737,23 +3788,28 @@ static void svm_setup_mce(struct kvm_vcpu *vcpu) vcpu->arch.mcg_cap &= 0x1ff; } -static int svm_smi_allowed(struct kvm_vcpu *vcpu) +bool svm_smi_blocked(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); /* Per APM Vol.2 15.22.2 "Response to SMI" */ if (!gif_set(svm)) - return 0; + return true; - if (is_guest_mode(&svm->vcpu) && - svm->nested.intercept & (1ULL << INTERCEPT_SMI)) { - /* TODO: Might need to set exit_info_1 and exit_info_2 here */ - svm->vmcb->control.exit_code = SVM_EXIT_SMI; - svm->nested.exit_required = true; - return 0; - } + return is_smm(vcpu); +} - return 1; +static int svm_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection) +{ + struct vcpu_svm *svm = to_svm(vcpu); + if (svm->nested.nested_run_pending) + return -EBUSY; + + /* An SMI must not be injected into L2 if it's supposed to VM-Exit. */ + if (for_injection && is_guest_mode(vcpu) && nested_exit_on_smi(svm)) + return -EBUSY; + + return !svm_smi_blocked(vcpu); } static int svm_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate) @@ -3793,12 +3849,13 @@ static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) if (kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb), &map) == -EINVAL) return 1; nested_vmcb = map.hva; - enter_svm_guest_mode(svm, vmcb, nested_vmcb, &map); + enter_svm_guest_mode(svm, vmcb, nested_vmcb); + kvm_vcpu_unmap(&svm->vcpu, &map, true); } return 0; } -static int enable_smi_window(struct kvm_vcpu *vcpu) +static void enable_smi_window(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -3806,9 +3863,9 @@ static int enable_smi_window(struct kvm_vcpu *vcpu) if (vgif_enabled(svm)) set_intercept(svm, INTERCEPT_STGI); /* STGI will cause a vm exit */ - return 1; + } else { + /* We must be in SMM; RSM will cause a vmexit anyway. */ } - return 0; } static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu) @@ -3819,6 +3876,13 @@ static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu) bool is_user = svm_get_cpl(vcpu) == 3; /* + * If RIP is invalid, go ahead with emulation which will cause an + * internal error exit. + */ + if (!kvm_vcpu_gfn_to_memslot(vcpu, kvm_rip_read(vcpu) >> PAGE_SHIFT)) + return true; + + /* * Detect and workaround Errata 1096 Fam_17h_00_0Fh. * * Errata: @@ -3876,9 +3940,9 @@ static bool svm_apic_init_signal_blocked(struct kvm_vcpu *vcpu) /* * TODO: Last condition latch INIT signals on vCPU when * vCPU is in guest-mode and vmcb12 defines intercept on INIT. - * To properly emulate the INIT intercept, SVM should implement - * kvm_x86_ops.check_nested_events() and call nested_svm_vmexit() - * there if an INIT signal is pending. + * To properly emulate the INIT intercept, + * svm_check_nested_events() should call nested_svm_vmexit() + * if an INIT signal is pending. */ return !gif_set(svm) || (svm->vmcb->control.intercept & (1ULL << INTERCEPT_INIT)); @@ -3932,8 +3996,6 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .set_segment = svm_set_segment, .get_cpl = svm_get_cpl, .get_cs_db_l_bits = kvm_get_cs_db_l_bits, - .decache_cr0_guest_bits = svm_decache_cr0_guest_bits, - .decache_cr4_guest_bits = svm_decache_cr4_guest_bits, .set_cr0 = svm_set_cr0, .set_cr4 = svm_set_cr4, .set_efer = svm_set_efer, @@ -3947,8 +4009,10 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .get_rflags = svm_get_rflags, .set_rflags = svm_set_rflags, - .tlb_flush = svm_flush_tlb, + .tlb_flush_all = svm_flush_tlb, + .tlb_flush_current = svm_flush_tlb, .tlb_flush_gva = svm_flush_tlb_gva, + .tlb_flush_guest = svm_flush_tlb, .run = svm_vcpu_run, .handle_exit = handle_exit, @@ -3989,7 +4053,6 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .has_wbinvd_exit = svm_has_wbinvd_exit, - .read_l1_tsc_offset = svm_read_l1_tsc_offset, .write_l1_tsc_offset = svm_write_l1_tsc_offset, .load_mmu_pgd = svm_load_mmu_pgd, @@ -4002,6 +4065,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .sched_in = svm_sched_in, .pmu_ops = &amd_pmu_ops, + .nested_ops = &svm_nested_ops, + .deliver_posted_interrupt = svm_deliver_avic_intr, .dy_apicv_has_pending_interrupt = svm_dy_apicv_has_pending_interrupt, .update_pi_irte = svm_update_pi_irte, @@ -4016,14 +4081,9 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .mem_enc_reg_region = svm_register_enc_region, .mem_enc_unreg_region = svm_unregister_enc_region, - .nested_enable_evmcs = NULL, - .nested_get_evmcs_version = NULL, - .need_emulation_on_page_fault = svm_need_emulation_on_page_fault, .apic_init_signal_blocked = svm_apic_init_signal_blocked, - - .check_nested_events = svm_check_nested_events, }; static struct kvm_x86_init_ops svm_init_ops __initdata = { diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index df3474f4fb02..6ac4c00a5d82 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -86,25 +86,17 @@ struct nested_state { u64 hsave_msr; u64 vm_cr_msr; u64 vmcb; + u32 host_intercept_exceptions; /* These are the merged vectors */ u32 *msrpm; - /* gpa pointers to the real vectors */ - u64 vmcb_msrpm; - u64 vmcb_iopm; + /* A VMRUN has started but has not yet been performed, so + * we cannot inject a nested vmexit yet. */ + bool nested_run_pending; - /* A VMEXIT is required but not yet emulated */ - bool exit_required; - - /* cache for intercepts of the guest */ - u32 intercept_cr; - u32 intercept_dr; - u32 intercept_exceptions; - u64 intercept; - - /* Nested Paging related state */ - u64 nested_cr3; + /* cache for control fields of the guest */ + struct vmcb_control_area ctl; }; struct vcpu_svm { @@ -360,8 +352,12 @@ u32 svm_msrpm_offset(u32 msr); void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer); void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); -void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa); +void svm_flush_tlb(struct kvm_vcpu *vcpu); void disable_nmi_singlestep(struct vcpu_svm *svm); +bool svm_smi_blocked(struct kvm_vcpu *vcpu); +bool svm_nmi_blocked(struct kvm_vcpu *vcpu); +bool svm_interrupt_blocked(struct kvm_vcpu *vcpu); +void svm_set_gif(struct vcpu_svm *svm, bool value); /* nested.c */ @@ -369,28 +365,31 @@ void disable_nmi_singlestep(struct vcpu_svm *svm); #define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */ #define NESTED_EXIT_CONTINUE 2 /* Further checks needed */ -/* This function returns true if it is save to enable the nmi window */ -static inline bool nested_svm_nmi(struct vcpu_svm *svm) +static inline bool svm_nested_virtualize_tpr(struct kvm_vcpu *vcpu) { - if (!is_guest_mode(&svm->vcpu)) - return true; + struct vcpu_svm *svm = to_svm(vcpu); - if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI))) - return true; + return is_guest_mode(vcpu) && (svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK); +} - svm->vmcb->control.exit_code = SVM_EXIT_NMI; - svm->nested.exit_required = true; +static inline bool nested_exit_on_smi(struct vcpu_svm *svm) +{ + return (svm->nested.ctl.intercept & (1ULL << INTERCEPT_SMI)); +} - return false; +static inline bool nested_exit_on_intr(struct vcpu_svm *svm) +{ + return (svm->nested.ctl.intercept & (1ULL << INTERCEPT_INTR)); } -static inline bool svm_nested_virtualize_tpr(struct kvm_vcpu *vcpu) +static inline bool nested_exit_on_nmi(struct vcpu_svm *svm) { - return is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK); + return (svm->nested.ctl.intercept & (1ULL << INTERCEPT_NMI)); } void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa, - struct vmcb *nested_vmcb, struct kvm_host_map *map); + struct vmcb *nested_vmcb); +void svm_leave_nested(struct vcpu_svm *svm); int nested_svm_vmrun(struct vcpu_svm *svm); void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb); int nested_svm_vmexit(struct vcpu_svm *svm); @@ -398,8 +397,10 @@ int nested_svm_exit_handled(struct vcpu_svm *svm); int nested_svm_check_permissions(struct vcpu_svm *svm); int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, bool has_error_code, u32 error_code); -int svm_check_nested_events(struct kvm_vcpu *vcpu); int nested_svm_exit_special(struct vcpu_svm *svm); +void sync_nested_vmcb_control(struct vcpu_svm *svm); + +extern struct kvm_x86_nested_ops svm_nested_ops; /* avic.c */ |