summaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/svm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-06-04 00:13:47 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2020-06-04 00:13:47 +0200
commit039aeb9deb9291f3b19c375a8bc6fa7f768996cc (patch)
treed98d5ddf276843995aa214157b587bb88270c530 /arch/x86/kvm/svm
parentMerge tag 'hyperv-next-signed' of git://git.kernel.org/pub/scm/linux/kernel/g... (diff)
parentKVM: selftests: fix rdtsc() for vmx_tsc_adjust_test (diff)
downloadlinux-039aeb9deb9291f3b19c375a8bc6fa7f768996cc.tar.xz
linux-039aeb9deb9291f3b19c375a8bc6fa7f768996cc.zip
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini: "ARM: - Move the arch-specific code into arch/arm64/kvm - Start the post-32bit cleanup - Cherry-pick a few non-invasive pre-NV patches x86: - Rework of TLB flushing - Rework of event injection, especially with respect to nested virtualization - Nested AMD event injection facelift, building on the rework of generic code and fixing a lot of corner cases - Nested AMD live migration support - Optimization for TSC deadline MSR writes and IPIs - Various cleanups - Asynchronous page fault cleanups (from tglx, common topic branch with tip tree) - Interrupt-based delivery of asynchronous "page ready" events (host side) - Hyper-V MSRs and hypercalls for guest debugging - VMX preemption timer fixes s390: - Cleanups Generic: - switch vCPU thread wakeup from swait to rcuwait The other architectures, and the guest side of the asynchronous page fault work, will come next week" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (256 commits) KVM: selftests: fix rdtsc() for vmx_tsc_adjust_test KVM: check userspace_addr for all memslots KVM: selftests: update hyperv_cpuid with SynDBG tests x86/kvm/hyper-v: Add support for synthetic debugger via hypercalls x86/kvm/hyper-v: enable hypercalls regardless of hypercall page x86/kvm/hyper-v: Add support for synthetic debugger interface x86/hyper-v: Add synthetic debugger definitions KVM: selftests: VMX preemption timer migration test KVM: nVMX: Fix VMX preemption timer migration x86/kvm/hyper-v: Explicitly align hcall param for kvm_hyperv_exit KVM: x86/pmu: Support full width counting KVM: x86/pmu: Tweak kvm_pmu_get_msr to pass 'struct msr_data' in KVM: x86: announce KVM_FEATURE_ASYNC_PF_INT KVM: x86: acknowledgment mechanism for async pf page ready notifications KVM: x86: interrupt based APF 'page ready' event delivery KVM: introduce kvm_read_guest_offset_cached() KVM: rename kvm_arch_can_inject_async_page_present() to kvm_arch_can_dequeue_async_page_present() KVM: x86: extend struct kvm_vcpu_pv_apf_data with token info Revert "KVM: async_pf: Fix #DF due to inject "Page not Present" and "Page Ready" exceptions simultaneously" KVM: VMX: Replace zero-length array with flexible-array ...
Diffstat (limited to 'arch/x86/kvm/svm')
-rw-r--r--arch/x86/kvm/svm/nested.c698
-rw-r--r--arch/x86/kvm/svm/pmu.c7
-rw-r--r--arch/x86/kvm/svm/svm.c368
-rw-r--r--arch/x86/kvm/svm/svm.h57
4 files changed, 714 insertions, 416 deletions
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 9a2a62e5afeb..8a6db11dcb43 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -25,6 +25,8 @@
#include "trace.h"
#include "mmu.h"
#include "x86.h"
+#include "cpuid.h"
+#include "lapic.h"
#include "svm.h"
static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
@@ -59,7 +61,7 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
{
struct vcpu_svm *svm = to_svm(vcpu);
- u64 cr3 = svm->nested.nested_cr3;
+ u64 cr3 = svm->nested.ctl.nested_cr3;
u64 pdpte;
int ret;
@@ -74,19 +76,22 @@ static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- return svm->nested.nested_cr3;
+ return svm->nested.ctl.nested_cr3;
}
static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ struct vmcb *hsave = svm->nested.hsave;
+
WARN_ON(mmu_is_nested(vcpu));
vcpu->arch.mmu = &vcpu->arch.guest_mmu;
- kvm_init_shadow_mmu(vcpu);
+ kvm_init_shadow_mmu(vcpu, X86_CR0_PG, hsave->save.cr4, hsave->save.efer);
vcpu->arch.mmu->get_guest_pgd = nested_svm_get_tdp_cr3;
vcpu->arch.mmu->get_pdptr = nested_svm_get_tdp_pdptr;
vcpu->arch.mmu->inject_page_fault = nested_svm_inject_npf_exit;
- vcpu->arch.mmu->shadow_root_level = kvm_x86_ops.get_tdp_level(vcpu);
+ vcpu->arch.mmu->shadow_root_level = vcpu->arch.tdp_level;
reset_shadow_zero_bits_mask(vcpu, vcpu->arch.mmu);
vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
}
@@ -99,8 +104,7 @@ static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
void recalc_intercepts(struct vcpu_svm *svm)
{
- struct vmcb_control_area *c, *h;
- struct nested_state *g;
+ struct vmcb_control_area *c, *h, *g;
mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
@@ -109,14 +113,16 @@ void recalc_intercepts(struct vcpu_svm *svm)
c = &svm->vmcb->control;
h = &svm->nested.hsave->control;
- g = &svm->nested;
+ g = &svm->nested.ctl;
+
+ svm->nested.host_intercept_exceptions = h->intercept_exceptions;
c->intercept_cr = h->intercept_cr;
c->intercept_dr = h->intercept_dr;
c->intercept_exceptions = h->intercept_exceptions;
c->intercept = h->intercept;
- if (svm->vcpu.arch.hflags & HF_VINTR_MASK) {
+ if (g->int_ctl & V_INTR_MASKING_MASK) {
/* We only want the cr8 intercept bits of L1 */
c->intercept_cr &= ~(1U << INTERCEPT_CR8_READ);
c->intercept_cr &= ~(1U << INTERCEPT_CR8_WRITE);
@@ -138,11 +144,9 @@ void recalc_intercepts(struct vcpu_svm *svm)
c->intercept |= g->intercept;
}
-static void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *from_vmcb)
+static void copy_vmcb_control_area(struct vmcb_control_area *dst,
+ struct vmcb_control_area *from)
{
- struct vmcb_control_area *dst = &dst_vmcb->control;
- struct vmcb_control_area *from = &from_vmcb->control;
-
dst->intercept_cr = from->intercept_cr;
dst->intercept_dr = from->intercept_dr;
dst->intercept_exceptions = from->intercept_exceptions;
@@ -150,7 +154,7 @@ static void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *from_vmcb
dst->iopm_base_pa = from->iopm_base_pa;
dst->msrpm_base_pa = from->msrpm_base_pa;
dst->tsc_offset = from->tsc_offset;
- dst->asid = from->asid;
+ /* asid not copied, it is handled manually for svm->vmcb. */
dst->tlb_ctl = from->tlb_ctl;
dst->int_ctl = from->int_ctl;
dst->int_vector = from->int_vector;
@@ -179,7 +183,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
*/
int i;
- if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
+ if (!(svm->nested.ctl.intercept & (1ULL << INTERCEPT_MSR_PROT)))
return true;
for (i = 0; i < MSRPM_OFFSETS; i++) {
@@ -190,7 +194,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
break;
p = msrpm_offsets[i];
- offset = svm->nested.vmcb_msrpm + (p * 4);
+ offset = svm->nested.ctl.msrpm_base_pa + (p * 4);
if (kvm_vcpu_read_guest(&svm->vcpu, offset, &value, 4))
return false;
@@ -203,41 +207,111 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
return true;
}
-static bool nested_vmcb_checks(struct vmcb *vmcb)
+static bool nested_vmcb_check_controls(struct vmcb_control_area *control)
{
- if ((vmcb->save.efer & EFER_SVME) == 0)
+ if ((control->intercept & (1ULL << INTERCEPT_VMRUN)) == 0)
return false;
- if ((vmcb->control.intercept & (1ULL << INTERCEPT_VMRUN)) == 0)
+ if (control->asid == 0)
return false;
- if (vmcb->control.asid == 0)
- return false;
-
- if ((vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) &&
+ if ((control->nested_ctl & SVM_NESTED_CTL_NP_ENABLE) &&
!npt_enabled)
return false;
return true;
}
-void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
- struct vmcb *nested_vmcb, struct kvm_host_map *map)
+static bool nested_vmcb_checks(struct vmcb *vmcb)
{
- bool evaluate_pending_interrupts =
- is_intercept(svm, INTERCEPT_VINTR) ||
- is_intercept(svm, INTERCEPT_IRET);
+ if ((vmcb->save.efer & EFER_SVME) == 0)
+ return false;
- if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF)
- svm->vcpu.arch.hflags |= HF_HIF_MASK;
- else
- svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
+ if (((vmcb->save.cr0 & X86_CR0_CD) == 0) &&
+ (vmcb->save.cr0 & X86_CR0_NW))
+ return false;
- if (nested_vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) {
- svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3;
- nested_svm_init_mmu_context(&svm->vcpu);
+ return nested_vmcb_check_controls(&vmcb->control);
+}
+
+static void load_nested_vmcb_control(struct vcpu_svm *svm,
+ struct vmcb_control_area *control)
+{
+ copy_vmcb_control_area(&svm->nested.ctl, control);
+
+ /* Copy it here because nested_svm_check_controls will check it. */
+ svm->nested.ctl.asid = control->asid;
+ svm->nested.ctl.msrpm_base_pa &= ~0x0fffULL;
+ svm->nested.ctl.iopm_base_pa &= ~0x0fffULL;
+}
+
+/*
+ * Synchronize fields that are written by the processor, so that
+ * they can be copied back into the nested_vmcb.
+ */
+void sync_nested_vmcb_control(struct vcpu_svm *svm)
+{
+ u32 mask;
+ svm->nested.ctl.event_inj = svm->vmcb->control.event_inj;
+ svm->nested.ctl.event_inj_err = svm->vmcb->control.event_inj_err;
+
+ /* Only a few fields of int_ctl are written by the processor. */
+ mask = V_IRQ_MASK | V_TPR_MASK;
+ if (!(svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK) &&
+ is_intercept(svm, SVM_EXIT_VINTR)) {
+ /*
+ * In order to request an interrupt window, L0 is usurping
+ * svm->vmcb->control.int_ctl and possibly setting V_IRQ
+ * even if it was clear in L1's VMCB. Restoring it would be
+ * wrong. However, in this case V_IRQ will remain true until
+ * interrupt_window_interception calls svm_clear_vintr and
+ * restores int_ctl. We can just leave it aside.
+ */
+ mask &= ~V_IRQ_MASK;
}
+ svm->nested.ctl.int_ctl &= ~mask;
+ svm->nested.ctl.int_ctl |= svm->vmcb->control.int_ctl & mask;
+}
+
+/*
+ * Transfer any event that L0 or L1 wanted to inject into L2 to
+ * EXIT_INT_INFO.
+ */
+static void nested_vmcb_save_pending_event(struct vcpu_svm *svm,
+ struct vmcb *nested_vmcb)
+{
+ struct kvm_vcpu *vcpu = &svm->vcpu;
+ u32 exit_int_info = 0;
+ unsigned int nr;
+
+ if (vcpu->arch.exception.injected) {
+ nr = vcpu->arch.exception.nr;
+ exit_int_info = nr | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_EXEPT;
+
+ if (vcpu->arch.exception.has_error_code) {
+ exit_int_info |= SVM_EVTINJ_VALID_ERR;
+ nested_vmcb->control.exit_int_info_err =
+ vcpu->arch.exception.error_code;
+ }
+
+ } else if (vcpu->arch.nmi_injected) {
+ exit_int_info = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
+ } else if (vcpu->arch.interrupt.injected) {
+ nr = vcpu->arch.interrupt.nr;
+ exit_int_info = nr | SVM_EVTINJ_VALID;
+
+ if (vcpu->arch.interrupt.soft)
+ exit_int_info |= SVM_EVTINJ_TYPE_SOFT;
+ else
+ exit_int_info |= SVM_EVTINJ_TYPE_INTR;
+ }
+
+ nested_vmcb->control.exit_int_info = exit_int_info;
+}
+
+static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *nested_vmcb)
+{
/* Load the nested guest state */
svm->vmcb->save.es = nested_vmcb->save.es;
svm->vmcb->save.cs = nested_vmcb->save.cs;
@@ -249,14 +323,7 @@ void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
svm_set_efer(&svm->vcpu, nested_vmcb->save.efer);
svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0);
svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4);
- if (npt_enabled) {
- svm->vmcb->save.cr3 = nested_vmcb->save.cr3;
- svm->vcpu.arch.cr3 = nested_vmcb->save.cr3;
- } else
- (void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
-
- /* Guest paging mode is active - reset mmu */
- kvm_mmu_reset_context(&svm->vcpu);
+ (void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2;
kvm_rax_write(&svm->vcpu, nested_vmcb->save.rax);
@@ -270,38 +337,34 @@ void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
svm->vmcb->save.dr7 = nested_vmcb->save.dr7;
svm->vcpu.arch.dr6 = nested_vmcb->save.dr6;
svm->vmcb->save.cpl = nested_vmcb->save.cpl;
+}
- svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL;
- svm->nested.vmcb_iopm = nested_vmcb->control.iopm_base_pa & ~0x0fffULL;
+static void nested_prepare_vmcb_control(struct vcpu_svm *svm)
+{
+ const u32 mask = V_INTR_MASKING_MASK | V_GIF_ENABLE_MASK | V_GIF_MASK;
+ if (svm->nested.ctl.nested_ctl & SVM_NESTED_CTL_NP_ENABLE)
+ nested_svm_init_mmu_context(&svm->vcpu);
- /* cache intercepts */
- svm->nested.intercept_cr = nested_vmcb->control.intercept_cr;
- svm->nested.intercept_dr = nested_vmcb->control.intercept_dr;
- svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions;
- svm->nested.intercept = nested_vmcb->control.intercept;
+ /* Guest paging mode is active - reset mmu */
+ kvm_mmu_reset_context(&svm->vcpu);
- svm_flush_tlb(&svm->vcpu, true);
- svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
- if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
- svm->vcpu.arch.hflags |= HF_VINTR_MASK;
- else
- svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
+ svm_flush_tlb(&svm->vcpu);
- svm->vcpu.arch.tsc_offset += nested_vmcb->control.tsc_offset;
- svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset;
+ svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset =
+ svm->vcpu.arch.l1_tsc_offset + svm->nested.ctl.tsc_offset;
- svm->vmcb->control.virt_ext = nested_vmcb->control.virt_ext;
- svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
- svm->vmcb->control.int_state = nested_vmcb->control.int_state;
- svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
- svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
+ svm->vmcb->control.int_ctl =
+ (svm->nested.ctl.int_ctl & ~mask) |
+ (svm->nested.hsave->control.int_ctl & mask);
- svm->vmcb->control.pause_filter_count =
- nested_vmcb->control.pause_filter_count;
- svm->vmcb->control.pause_filter_thresh =
- nested_vmcb->control.pause_filter_thresh;
+ svm->vmcb->control.virt_ext = svm->nested.ctl.virt_ext;
+ svm->vmcb->control.int_vector = svm->nested.ctl.int_vector;
+ svm->vmcb->control.int_state = svm->nested.ctl.int_state;
+ svm->vmcb->control.event_inj = svm->nested.ctl.event_inj;
+ svm->vmcb->control.event_inj_err = svm->nested.ctl.event_inj_err;
- kvm_vcpu_unmap(&svm->vcpu, map, true);
+ svm->vmcb->control.pause_filter_count = svm->nested.ctl.pause_filter_count;
+ svm->vmcb->control.pause_filter_thresh = svm->nested.ctl.pause_filter_thresh;
/* Enter Guest-Mode */
enter_guest_mode(&svm->vcpu);
@@ -312,25 +375,18 @@ void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
*/
recalc_intercepts(svm);
- svm->nested.vmcb = vmcb_gpa;
+ mark_all_dirty(svm->vmcb);
+}
- /*
- * If L1 had a pending IRQ/NMI before executing VMRUN,
- * which wasn't delivered because it was disallowed (e.g.
- * interrupts disabled), L0 needs to evaluate if this pending
- * event should cause an exit from L2 to L1 or be delivered
- * directly to L2.
- *
- * Usually this would be handled by the processor noticing an
- * IRQ/NMI window request. However, VMRUN can unblock interrupts
- * by implicitly setting GIF, so force L0 to perform pending event
- * evaluation by requesting a KVM_REQ_EVENT.
- */
- enable_gif(svm);
- if (unlikely(evaluate_pending_interrupts))
- kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
+void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
+ struct vmcb *nested_vmcb)
+{
+ svm->nested.vmcb = vmcb_gpa;
+ load_nested_vmcb_control(svm, &nested_vmcb->control);
+ nested_prepare_vmcb_save(svm, nested_vmcb);
+ nested_prepare_vmcb_control(svm);
- mark_all_dirty(svm->vmcb);
+ svm_set_gif(svm, true);
}
int nested_svm_vmrun(struct vcpu_svm *svm)
@@ -342,8 +398,12 @@ int nested_svm_vmrun(struct vcpu_svm *svm)
struct kvm_host_map map;
u64 vmcb_gpa;
- vmcb_gpa = svm->vmcb->save.rax;
+ if (is_smm(&svm->vcpu)) {
+ kvm_queue_exception(&svm->vcpu, UD_VECTOR);
+ return 1;
+ }
+ vmcb_gpa = svm->vmcb->save.rax;
ret = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb_gpa), &map);
if (ret == -EINVAL) {
kvm_inject_gp(&svm->vcpu, 0);
@@ -361,10 +421,7 @@ int nested_svm_vmrun(struct vcpu_svm *svm)
nested_vmcb->control.exit_code_hi = 0;
nested_vmcb->control.exit_info_1 = 0;
nested_vmcb->control.exit_info_2 = 0;
-
- kvm_vcpu_unmap(&svm->vcpu, &map, true);
-
- return ret;
+ goto out;
}
trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb_gpa,
@@ -404,9 +461,10 @@ int nested_svm_vmrun(struct vcpu_svm *svm)
else
hsave->save.cr3 = kvm_read_cr3(&svm->vcpu);
- copy_vmcb_control_area(hsave, vmcb);
+ copy_vmcb_control_area(&hsave->control, &vmcb->control);
- enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb, &map);
+ svm->nested.nested_run_pending = 1;
+ enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb);
if (!nested_svm_vmrun_msrpm(svm)) {
svm->vmcb->control.exit_code = SVM_EXIT_ERR;
@@ -417,6 +475,9 @@ int nested_svm_vmrun(struct vcpu_svm *svm)
nested_svm_vmexit(svm);
}
+out:
+ kvm_vcpu_unmap(&svm->vcpu, &map, true);
+
return ret;
}
@@ -444,13 +505,6 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
struct vmcb *vmcb = svm->vmcb;
struct kvm_host_map map;
- trace_kvm_nested_vmexit_inject(vmcb->control.exit_code,
- vmcb->control.exit_info_1,
- vmcb->control.exit_info_2,
- vmcb->control.exit_int_info,
- vmcb->control.exit_int_info_err,
- KVM_ISA_SVM);
-
rc = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(svm->nested.vmcb), &map);
if (rc) {
if (rc == -EINVAL)
@@ -463,9 +517,13 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
/* Exit Guest-Mode */
leave_guest_mode(&svm->vcpu);
svm->nested.vmcb = 0;
+ WARN_ON_ONCE(svm->nested.nested_run_pending);
+
+ /* in case we halted in L2 */
+ svm->vcpu.arch.mp_state = KVM_MP_STATE_RUNNABLE;
/* Give the current vmcb to the guest */
- disable_gif(svm);
+ svm_set_gif(svm, false);
nested_vmcb->save.es = vmcb->save.es;
nested_vmcb->save.cs = vmcb->save.cs;
@@ -479,62 +537,42 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
nested_vmcb->save.cr2 = vmcb->save.cr2;
nested_vmcb->save.cr4 = svm->vcpu.arch.cr4;
nested_vmcb->save.rflags = kvm_get_rflags(&svm->vcpu);
- nested_vmcb->save.rip = vmcb->save.rip;
- nested_vmcb->save.rsp = vmcb->save.rsp;
- nested_vmcb->save.rax = vmcb->save.rax;
+ nested_vmcb->save.rip = kvm_rip_read(&svm->vcpu);
+ nested_vmcb->save.rsp = kvm_rsp_read(&svm->vcpu);
+ nested_vmcb->save.rax = kvm_rax_read(&svm->vcpu);
nested_vmcb->save.dr7 = vmcb->save.dr7;
nested_vmcb->save.dr6 = svm->vcpu.arch.dr6;
nested_vmcb->save.cpl = vmcb->save.cpl;
- nested_vmcb->control.int_ctl = vmcb->control.int_ctl;
- nested_vmcb->control.int_vector = vmcb->control.int_vector;
nested_vmcb->control.int_state = vmcb->control.int_state;
nested_vmcb->control.exit_code = vmcb->control.exit_code;
nested_vmcb->control.exit_code_hi = vmcb->control.exit_code_hi;
nested_vmcb->control.exit_info_1 = vmcb->control.exit_info_1;
nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2;
- nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info;
- nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err;
+
+ if (nested_vmcb->control.exit_code != SVM_EXIT_ERR)
+ nested_vmcb_save_pending_event(svm, nested_vmcb);
if (svm->nrips_enabled)
nested_vmcb->control.next_rip = vmcb->control.next_rip;
- /*
- * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have
- * to make sure that we do not lose injected events. So check event_inj
- * here and copy it to exit_int_info if it is valid.
- * Exit_int_info and event_inj can't be both valid because the case
- * below only happens on a VMRUN instruction intercept which has
- * no valid exit_int_info set.
- */
- if (vmcb->control.event_inj & SVM_EVTINJ_VALID) {
- struct vmcb_control_area *nc = &nested_vmcb->control;
-
- nc->exit_int_info = vmcb->control.event_inj;
- nc->exit_int_info_err = vmcb->control.event_inj_err;
- }
-
- nested_vmcb->control.tlb_ctl = 0;
- nested_vmcb->control.event_inj = 0;
- nested_vmcb->control.event_inj_err = 0;
+ nested_vmcb->control.int_ctl = svm->nested.ctl.int_ctl;
+ nested_vmcb->control.tlb_ctl = svm->nested.ctl.tlb_ctl;
+ nested_vmcb->control.event_inj = svm->nested.ctl.event_inj;
+ nested_vmcb->control.event_inj_err = svm->nested.ctl.event_inj_err;
nested_vmcb->control.pause_filter_count =
svm->vmcb->control.pause_filter_count;
nested_vmcb->control.pause_filter_thresh =
svm->vmcb->control.pause_filter_thresh;
- /* We always set V_INTR_MASKING and remember the old value in hflags */
- if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
- nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
-
/* Restore the original control entries */
- copy_vmcb_control_area(vmcb, hsave);
+ copy_vmcb_control_area(&vmcb->control, &hsave->control);
- svm->vcpu.arch.tsc_offset = svm->vmcb->control.tsc_offset;
- kvm_clear_exception_queue(&svm->vcpu);
- kvm_clear_interrupt_queue(&svm->vcpu);
+ svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset =
+ svm->vcpu.arch.l1_tsc_offset;
- svm->nested.nested_cr3 = 0;
+ svm->nested.ctl.nested_cr3 = 0;
/* Restore selected save entries */
svm->vmcb->save.es = hsave->save.es;
@@ -562,6 +600,13 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
mark_all_dirty(svm->vmcb);
+ trace_kvm_nested_vmexit_inject(nested_vmcb->control.exit_code,
+ nested_vmcb->control.exit_info_1,
+ nested_vmcb->control.exit_info_2,
+ nested_vmcb->control.exit_int_info,
+ nested_vmcb->control.exit_int_info_err,
+ KVM_ISA_SVM);
+
kvm_vcpu_unmap(&svm->vcpu, &map, true);
nested_svm_uninit_mmu_context(&svm->vcpu);
@@ -579,12 +624,28 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
return 0;
}
+/*
+ * Forcibly leave nested mode in order to be able to reset the VCPU later on.
+ */
+void svm_leave_nested(struct vcpu_svm *svm)
+{
+ if (is_guest_mode(&svm->vcpu)) {
+ struct vmcb *hsave = svm->nested.hsave;
+ struct vmcb *vmcb = svm->vmcb;
+
+ svm->nested.nested_run_pending = 0;
+ leave_guest_mode(&svm->vcpu);
+ copy_vmcb_control_area(&vmcb->control, &hsave->control);
+ nested_svm_uninit_mmu_context(&svm->vcpu);
+ }
+}
+
static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
{
u32 offset, msr, value;
int write, mask;
- if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
+ if (!(svm->nested.ctl.intercept & (1ULL << INTERCEPT_MSR_PROT)))
return NESTED_EXIT_HOST;
msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
@@ -598,56 +659,12 @@ static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
/* Offset is in 32 bit units but need in 8 bit units */
offset *= 4;
- if (kvm_vcpu_read_guest(&svm->vcpu, svm->nested.vmcb_msrpm + offset, &value, 4))
+ if (kvm_vcpu_read_guest(&svm->vcpu, svm->nested.ctl.msrpm_base_pa + offset, &value, 4))
return NESTED_EXIT_DONE;
return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
}
-/* DB exceptions for our internal use must not cause vmexit */
-static int nested_svm_intercept_db(struct vcpu_svm *svm)
-{
- unsigned long dr6 = svm->vmcb->save.dr6;
-
- /* Always catch it and pass it to userspace if debugging. */
- if (svm->vcpu.guest_debug &
- (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
- return NESTED_EXIT_HOST;
-
- /* if we're not singlestepping, it's not ours */
- if (!svm->nmi_singlestep)
- goto reflected_db;
-
- /* if it's not a singlestep exception, it's not ours */
- if (!(dr6 & DR6_BS))
- goto reflected_db;
-
- /* if the guest is singlestepping, it should get the vmexit */
- if (svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF) {
- disable_nmi_singlestep(svm);
- goto reflected_db;
- }
-
- /* it's ours, the nested hypervisor must not see this one */
- return NESTED_EXIT_HOST;
-
-reflected_db:
- /*
- * Synchronize guest DR6 here just like in kvm_deliver_exception_payload;
- * it will be moved into the nested VMCB by nested_svm_vmexit. Once
- * exceptions will be moved to svm_check_nested_events, all this stuff
- * will just go away and we could just return NESTED_EXIT_HOST
- * unconditionally. db_interception will queue the exception, which
- * will be processed by svm_check_nested_events if a nested vmexit is
- * required, and we will just use kvm_deliver_exception_payload to copy
- * the payload to DR6 before vmexit.
- */
- WARN_ON(svm->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT);
- svm->vcpu.arch.dr6 &= ~(DR_TRAP_BITS | DR6_RTM);
- svm->vcpu.arch.dr6 |= dr6 & ~DR6_FIXED_1;
- return NESTED_EXIT_DONE;
-}
-
static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
{
unsigned port, size, iopm_len;
@@ -655,13 +672,13 @@ static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
u8 start_bit;
u64 gpa;
- if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT)))
+ if (!(svm->nested.ctl.intercept & (1ULL << INTERCEPT_IOIO_PROT)))
return NESTED_EXIT_HOST;
port = svm->vmcb->control.exit_info_1 >> 16;
size = (svm->vmcb->control.exit_info_1 & SVM_IOIO_SIZE_MASK) >>
SVM_IOIO_SIZE_SHIFT;
- gpa = svm->nested.vmcb_iopm + (port / 8);
+ gpa = svm->nested.ctl.iopm_base_pa + (port / 8);
start_bit = port % 8;
iopm_len = (start_bit + size > 8) ? 2 : 1;
mask = (0xf >> (4 - size)) << start_bit;
@@ -687,31 +704,23 @@ static int nested_svm_intercept(struct vcpu_svm *svm)
break;
case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: {
u32 bit = 1U << (exit_code - SVM_EXIT_READ_CR0);
- if (svm->nested.intercept_cr & bit)
+ if (svm->nested.ctl.intercept_cr & bit)
vmexit = NESTED_EXIT_DONE;
break;
}
case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: {
u32 bit = 1U << (exit_code - SVM_EXIT_READ_DR0);
- if (svm->nested.intercept_dr & bit)
+ if (svm->nested.ctl.intercept_dr & bit)
vmexit = NESTED_EXIT_DONE;
break;
}
case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
- u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
- if (svm->nested.intercept_exceptions & excp_bits) {
- if (exit_code == SVM_EXIT_EXCP_BASE + DB_VECTOR)
- vmexit = nested_svm_intercept_db(svm);
- else if (exit_code == SVM_EXIT_EXCP_BASE + BP_VECTOR &&
- svm->vcpu.guest_debug & KVM_GUESTDBG_USE_SW_BP)
- vmexit = NESTED_EXIT_HOST;
- else
- vmexit = NESTED_EXIT_DONE;
- }
- /* async page fault always cause vmexit */
- else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) &&
- svm->vcpu.arch.exception.nested_apf != 0)
- vmexit = NESTED_EXIT_DONE;
+ /*
+ * Host-intercepted exceptions have been checked already in
+ * nested_svm_exit_special. There is nothing to do here,
+ * the vmexit is injected by svm_check_nested_events.
+ */
+ vmexit = NESTED_EXIT_DONE;
break;
}
case SVM_EXIT_ERR: {
@@ -720,7 +729,7 @@ static int nested_svm_intercept(struct vcpu_svm *svm)
}
default: {
u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
- if (svm->nested.intercept & exit_bits)
+ if (svm->nested.ctl.intercept & exit_bits)
vmexit = NESTED_EXIT_DONE;
}
}
@@ -756,62 +765,140 @@ int nested_svm_check_permissions(struct vcpu_svm *svm)
return 0;
}
-int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
- bool has_error_code, u32 error_code)
+static bool nested_exit_on_exception(struct vcpu_svm *svm)
{
- int vmexit;
+ unsigned int nr = svm->vcpu.arch.exception.nr;
- if (!is_guest_mode(&svm->vcpu))
- return 0;
+ return (svm->nested.ctl.intercept_exceptions & (1 << nr));
+}
- vmexit = nested_svm_intercept(svm);
- if (vmexit != NESTED_EXIT_DONE)
- return 0;
+static void nested_svm_inject_exception_vmexit(struct vcpu_svm *svm)
+{
+ unsigned int nr = svm->vcpu.arch.exception.nr;
svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
svm->vmcb->control.exit_code_hi = 0;
- svm->vmcb->control.exit_info_1 = error_code;
+
+ if (svm->vcpu.arch.exception.has_error_code)
+ svm->vmcb->control.exit_info_1 = svm->vcpu.arch.exception.error_code;
/*
* EXITINFO2 is undefined for all exception intercepts other
* than #PF.
*/
- if (svm->vcpu.arch.exception.nested_apf)
- svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token;
- else if (svm->vcpu.arch.exception.has_payload)
- svm->vmcb->control.exit_info_2 = svm->vcpu.arch.exception.payload;
- else
- svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
+ if (nr == PF_VECTOR) {
+ if (svm->vcpu.arch.exception.nested_apf)
+ svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token;
+ else if (svm->vcpu.arch.exception.has_payload)
+ svm->vmcb->control.exit_info_2 = svm->vcpu.arch.exception.payload;
+ else
+ svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
+ } else if (nr == DB_VECTOR) {
+ /* See inject_pending_event. */
+ kvm_deliver_exception_payload(&svm->vcpu);
+ if (svm->vcpu.arch.dr7 & DR7_GD) {
+ svm->vcpu.arch.dr7 &= ~DR7_GD;
+ kvm_update_dr7(&svm->vcpu);
+ }
+ } else
+ WARN_ON(svm->vcpu.arch.exception.has_payload);
- svm->nested.exit_required = true;
- return vmexit;
+ nested_svm_vmexit(svm);
+}
+
+static void nested_svm_smi(struct vcpu_svm *svm)
+{
+ svm->vmcb->control.exit_code = SVM_EXIT_SMI;
+ svm->vmcb->control.exit_info_1 = 0;
+ svm->vmcb->control.exit_info_2 = 0;
+
+ nested_svm_vmexit(svm);
+}
+
+static void nested_svm_nmi(struct vcpu_svm *svm)
+{
+ svm->vmcb->control.exit_code = SVM_EXIT_NMI;
+ svm->vmcb->control.exit_info_1 = 0;
+ svm->vmcb->control.exit_info_2 = 0;
+
+ nested_svm_vmexit(svm);
}
static void nested_svm_intr(struct vcpu_svm *svm)
{
+ trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
+
svm->vmcb->control.exit_code = SVM_EXIT_INTR;
svm->vmcb->control.exit_info_1 = 0;
svm->vmcb->control.exit_info_2 = 0;
- /* nested_svm_vmexit this gets called afterwards from handle_exit */
- svm->nested.exit_required = true;
- trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
+ nested_svm_vmexit(svm);
}
-static bool nested_exit_on_intr(struct vcpu_svm *svm)
+static inline bool nested_exit_on_init(struct vcpu_svm *svm)
{
- return (svm->nested.intercept & 1ULL);
+ return (svm->nested.ctl.intercept & (1ULL << INTERCEPT_INIT));
+}
+
+static void nested_svm_init(struct vcpu_svm *svm)
+{
+ svm->vmcb->control.exit_code = SVM_EXIT_INIT;
+ svm->vmcb->control.exit_info_1 = 0;
+ svm->vmcb->control.exit_info_2 = 0;
+
+ nested_svm_vmexit(svm);
}
-int svm_check_nested_events(struct kvm_vcpu *vcpu)
+
+static int svm_check_nested_events(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
bool block_nested_events =
- kvm_event_needs_reinjection(vcpu) || svm->nested.exit_required;
+ kvm_event_needs_reinjection(vcpu) || svm->nested.nested_run_pending;
+ struct kvm_lapic *apic = vcpu->arch.apic;
+
+ if (lapic_in_kernel(vcpu) &&
+ test_bit(KVM_APIC_INIT, &apic->pending_events)) {
+ if (block_nested_events)
+ return -EBUSY;
+ if (!nested_exit_on_init(svm))
+ return 0;
+ nested_svm_init(svm);
+ return 0;
+ }
+
+ if (vcpu->arch.exception.pending) {
+ if (block_nested_events)
+ return -EBUSY;
+ if (!nested_exit_on_exception(svm))
+ return 0;
+ nested_svm_inject_exception_vmexit(svm);
+ return 0;
+ }
+
+ if (vcpu->arch.smi_pending && !svm_smi_blocked(vcpu)) {
+ if (block_nested_events)
+ return -EBUSY;
+ if (!nested_exit_on_smi(svm))
+ return 0;
+ nested_svm_smi(svm);
+ return 0;
+ }
+
+ if (vcpu->arch.nmi_pending && !svm_nmi_blocked(vcpu)) {
+ if (block_nested_events)
+ return -EBUSY;
+ if (!nested_exit_on_nmi(svm))
+ return 0;
+ nested_svm_nmi(svm);
+ return 0;
+ }
- if (kvm_cpu_has_interrupt(vcpu) && nested_exit_on_intr(svm)) {
+ if (kvm_cpu_has_interrupt(vcpu) && !svm_interrupt_blocked(vcpu)) {
if (block_nested_events)
return -EBUSY;
+ if (!nested_exit_on_intr(svm))
+ return 0;
nested_svm_intr(svm);
return 0;
}
@@ -826,21 +913,170 @@ int nested_svm_exit_special(struct vcpu_svm *svm)
switch (exit_code) {
case SVM_EXIT_INTR:
case SVM_EXIT_NMI:
- case SVM_EXIT_EXCP_BASE + MC_VECTOR:
- return NESTED_EXIT_HOST;
case SVM_EXIT_NPF:
- /* For now we are always handling NPFs when using them */
- if (npt_enabled)
+ return NESTED_EXIT_HOST;
+ case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
+ u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
+
+ if (get_host_vmcb(svm)->control.intercept_exceptions & excp_bits)
return NESTED_EXIT_HOST;
- break;
- case SVM_EXIT_EXCP_BASE + PF_VECTOR:
- /* When we're shadowing, trap PFs, but not async PF */
- if (!npt_enabled && svm->vcpu.arch.apf.host_apf_reason == 0)
+ else if (exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR &&
+ svm->vcpu.arch.apf.host_apf_flags)
+ /* Trap async PF even if not shadowing */
return NESTED_EXIT_HOST;
break;
+ }
default:
break;
}
return NESTED_EXIT_CONTINUE;
}
+
+static int svm_get_nested_state(struct kvm_vcpu *vcpu,
+ struct kvm_nested_state __user *user_kvm_nested_state,
+ u32 user_data_size)
+{
+ struct vcpu_svm *svm;
+ struct kvm_nested_state kvm_state = {
+ .flags = 0,
+ .format = KVM_STATE_NESTED_FORMAT_SVM,
+ .size = sizeof(kvm_state),
+ };
+ struct vmcb __user *user_vmcb = (struct vmcb __user *)
+ &user_kvm_nested_state->data.svm[0];
+
+ if (!vcpu)
+ return kvm_state.size + KVM_STATE_NESTED_SVM_VMCB_SIZE;
+
+ svm = to_svm(vcpu);
+
+ if (user_data_size < kvm_state.size)
+ goto out;
+
+ /* First fill in the header and copy it out. */
+ if (is_guest_mode(vcpu)) {
+ kvm_state.hdr.svm.vmcb_pa = svm->nested.vmcb;
+ kvm_state.size += KVM_STATE_NESTED_SVM_VMCB_SIZE;
+ kvm_state.flags |= KVM_STATE_NESTED_GUEST_MODE;
+
+ if (svm->nested.nested_run_pending)
+ kvm_state.flags |= KVM_STATE_NESTED_RUN_PENDING;
+ }
+
+ if (gif_set(svm))
+ kvm_state.flags |= KVM_STATE_NESTED_GIF_SET;
+
+ if (copy_to_user(user_kvm_nested_state, &kvm_state, sizeof(kvm_state)))
+ return -EFAULT;
+
+ if (!is_guest_mode(vcpu))
+ goto out;
+
+ /*
+ * Copy over the full size of the VMCB rather than just the size
+ * of the structs.
+ */
+ if (clear_user(user_vmcb, KVM_STATE_NESTED_SVM_VMCB_SIZE))
+ return -EFAULT;
+ if (copy_to_user(&user_vmcb->control, &svm->nested.ctl,
+ sizeof(user_vmcb->control)))
+ return -EFAULT;
+ if (copy_to_user(&user_vmcb->save, &svm->nested.hsave->save,
+ sizeof(user_vmcb->save)))
+ return -EFAULT;
+
+out:
+ return kvm_state.size;
+}
+
+static int svm_set_nested_state(struct kvm_vcpu *vcpu,
+ struct kvm_nested_state __user *user_kvm_nested_state,
+ struct kvm_nested_state *kvm_state)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ struct vmcb *hsave = svm->nested.hsave;
+ struct vmcb __user *user_vmcb = (struct vmcb __user *)
+ &user_kvm_nested_state->data.svm[0];
+ struct vmcb_control_area ctl;
+ struct vmcb_save_area save;
+ u32 cr0;
+
+ if (kvm_state->format != KVM_STATE_NESTED_FORMAT_SVM)
+ return -EINVAL;
+
+ if (kvm_state->flags & ~(KVM_STATE_NESTED_GUEST_MODE |
+ KVM_STATE_NESTED_RUN_PENDING |
+ KVM_STATE_NESTED_GIF_SET))
+ return -EINVAL;
+
+ /*
+ * If in guest mode, vcpu->arch.efer actually refers to the L2 guest's
+ * EFER.SVME, but EFER.SVME still has to be 1 for VMRUN to succeed.
+ */
+ if (!(vcpu->arch.efer & EFER_SVME)) {
+ /* GIF=1 and no guest mode are required if SVME=0. */
+ if (kvm_state->flags != KVM_STATE_NESTED_GIF_SET)
+ return -EINVAL;
+ }
+
+ /* SMM temporarily disables SVM, so we cannot be in guest mode. */
+ if (is_smm(vcpu) && (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
+ return -EINVAL;
+
+ if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) {
+ svm_leave_nested(svm);
+ goto out_set_gif;
+ }
+
+ if (!page_address_valid(vcpu, kvm_state->hdr.svm.vmcb_pa))
+ return -EINVAL;
+ if (kvm_state->size < sizeof(*kvm_state) + KVM_STATE_NESTED_SVM_VMCB_SIZE)
+ return -EINVAL;
+ if (copy_from_user(&ctl, &user_vmcb->control, sizeof(ctl)))
+ return -EFAULT;
+ if (copy_from_user(&save, &user_vmcb->save, sizeof(save)))
+ return -EFAULT;
+
+ if (!nested_vmcb_check_controls(&ctl))
+ return -EINVAL;
+
+ /*
+ * Processor state contains L2 state. Check that it is
+ * valid for guest mode (see nested_vmcb_checks).
+ */
+ cr0 = kvm_read_cr0(vcpu);
+ if (((cr0 & X86_CR0_CD) == 0) && (cr0 & X86_CR0_NW))
+ return -EINVAL;
+
+ /*
+ * Validate host state saved from before VMRUN (see
+ * nested_svm_check_permissions).
+ * TODO: validate reserved bits for all saved state.
+ */
+ if (!(save.cr0 & X86_CR0_PG))
+ return -EINVAL;
+
+ /*
+ * All checks done, we can enter guest mode. L1 control fields
+ * come from the nested save state. Guest state is already
+ * in the registers, the save area of the nested state instead
+ * contains saved L1 state.
+ */
+ copy_vmcb_control_area(&hsave->control, &svm->vmcb->control);
+ hsave->save = save;
+
+ svm->nested.vmcb = kvm_state->hdr.svm.vmcb_pa;
+ load_nested_vmcb_control(svm, &ctl);
+ nested_prepare_vmcb_control(svm);
+
+out_set_gif:
+ svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET));
+ return 0;
+}
+
+struct kvm_x86_nested_ops svm_nested_ops = {
+ .check_events = svm_check_nested_events,
+ .get_state = svm_get_nested_state,
+ .set_state = svm_set_nested_state,
+};
diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
index ce0b10fe5e2b..035da07500e8 100644
--- a/arch/x86/kvm/svm/pmu.c
+++ b/arch/x86/kvm/svm/pmu.c
@@ -215,21 +215,22 @@ static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr)
return pmc;
}
-static int amd_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
+static int amd_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
struct kvm_pmc *pmc;
+ u32 msr = msr_info->index;
/* MSR_PERFCTRn */
pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER);
if (pmc) {
- *data = pmc_read_counter(pmc);
+ msr_info->data = pmc_read_counter(pmc);
return 0;
}
/* MSR_EVNTSELn */
pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_EVNTSEL);
if (pmc) {
- *data = pmc->eventsel;
+ msr_info->data = pmc->eventsel;
return 0;
}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index a862c768fd54..9e333b91ff78 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -33,6 +33,7 @@
#include <asm/debugreg.h>
#include <asm/kvm_para.h>
#include <asm/irq_remapping.h>
+#include <asm/mce.h>
#include <asm/spec-ctrl.h>
#include <asm/cpu_device_id.h>
@@ -264,6 +265,7 @@ static int get_npt_level(struct kvm_vcpu *vcpu)
void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
{
+ struct vcpu_svm *svm = to_svm(vcpu);
vcpu->arch.efer = efer;
if (!npt_enabled) {
@@ -274,8 +276,13 @@ void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
efer &= ~EFER_LME;
}
- to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
- mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
+ if (!(efer & EFER_SVME)) {
+ svm_leave_nested(svm);
+ svm_set_gif(svm, true);
+ }
+
+ svm->vmcb->save.efer = efer | EFER_SVME;
+ mark_dirty(svm->vmcb, VMCB_CR);
}
static int is_external_interrupt(u32 info)
@@ -318,9 +325,6 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP))
return 0;
} else {
- if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
- pr_err("%s: ip 0x%lx next 0x%llx\n",
- __func__, kvm_rip_read(vcpu), svm->next_rip);
kvm_rip_write(vcpu, svm->next_rip);
}
svm_set_interrupt_shadow(vcpu, 0);
@@ -333,17 +337,8 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
unsigned nr = vcpu->arch.exception.nr;
bool has_error_code = vcpu->arch.exception.has_error_code;
- bool reinject = vcpu->arch.exception.injected;
u32 error_code = vcpu->arch.exception.error_code;
- /*
- * If we are within a nested VM we'd better #VMEXIT and let the guest
- * handle the exception
- */
- if (!reinject &&
- nested_svm_check_exception(svm, nr, has_error_code, error_code))
- return;
-
kvm_deliver_exception_payload(&svm->vcpu);
if (nr == BP_VECTOR && !nrips) {
@@ -780,7 +775,7 @@ static __init void svm_adjust_mmio_mask(void)
*/
mask = (mask_bit < 52) ? rsvd_bits(mask_bit, 51) | PT_PRESENT_MASK : 0;
- kvm_mmu_set_mmio_spte_mask(mask, mask, PT_WRITABLE_MASK | PT_USER_MASK);
+ kvm_mmu_set_mmio_spte_mask(mask, PT_WRITABLE_MASK | PT_USER_MASK);
}
static void svm_hardware_teardown(void)
@@ -890,7 +885,7 @@ static __init int svm_hardware_setup(void)
if (npt_enabled && !npt)
npt_enabled = false;
- kvm_configure_mmu(npt_enabled, PT_PDPE_LEVEL);
+ kvm_configure_mmu(npt_enabled, PG_LEVEL_1G);
pr_info("kvm: Nested Paging %sabled\n", npt_enabled ? "en" : "dis");
if (nrips) {
@@ -953,16 +948,6 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
seg->base = 0;
}
-static u64 svm_read_l1_tsc_offset(struct kvm_vcpu *vcpu)
-{
- struct vcpu_svm *svm = to_svm(vcpu);
-
- if (is_guest_mode(vcpu))
- return svm->nested.hsave->control.tsc_offset;
-
- return vcpu->arch.tsc_offset;
-}
-
static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -1208,6 +1193,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
svm->avic_is_running = true;
svm->nested.hsave = page_address(hsave_page);
+ clear_page(svm->nested.hsave);
svm->msrpm = page_address(msrpm_pages);
svm_vcpu_init_msrpm(svm->msrpm);
@@ -1364,12 +1350,13 @@ static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
}
}
-static inline void svm_enable_vintr(struct vcpu_svm *svm)
+static void svm_set_vintr(struct vcpu_svm *svm)
{
struct vmcb_control_area *control;
/* The following fields are ignored when AVIC is enabled */
WARN_ON(kvm_vcpu_apicv_active(&svm->vcpu));
+ set_intercept(svm, INTERCEPT_VINTR);
/*
* This is just a dummy VINTR to actually cause a vmexit to happen.
@@ -1383,18 +1370,19 @@ static inline void svm_enable_vintr(struct vcpu_svm *svm)
mark_dirty(svm->vmcb, VMCB_INTR);
}
-static void svm_set_vintr(struct vcpu_svm *svm)
-{
- set_intercept(svm, INTERCEPT_VINTR);
- if (is_intercept(svm, INTERCEPT_VINTR))
- svm_enable_vintr(svm);
-}
-
static void svm_clear_vintr(struct vcpu_svm *svm)
{
+ const u32 mask = V_TPR_MASK | V_GIF_ENABLE_MASK | V_GIF_MASK | V_INTR_MASKING_MASK;
clr_intercept(svm, INTERCEPT_VINTR);
- svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
+ /* Drop int_ctl fields related to VINTR injection. */
+ svm->vmcb->control.int_ctl &= mask;
+ if (is_guest_mode(&svm->vcpu)) {
+ WARN_ON((svm->vmcb->control.int_ctl & V_TPR_MASK) !=
+ (svm->nested.ctl.int_ctl & V_TPR_MASK));
+ svm->vmcb->control.int_ctl |= svm->nested.ctl.int_ctl & ~mask;
+ }
+
mark_dirty(svm->vmcb, VMCB_INTR);
}
@@ -1533,14 +1521,6 @@ static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
mark_dirty(svm->vmcb, VMCB_DT);
}
-static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
-{
-}
-
-static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
-{
-}
-
static void update_cr0_intercept(struct vcpu_svm *svm)
{
ulong gcr0 = svm->vcpu.arch.cr0;
@@ -1603,7 +1583,7 @@ int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
return 1;
if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE))
- svm_flush_tlb(vcpu, true);
+ svm_flush_tlb(vcpu);
vcpu->arch.cr4 = cr4;
if (!npt_enabled)
@@ -1842,6 +1822,25 @@ static bool is_erratum_383(void)
return true;
}
+/*
+ * Trigger machine check on the host. We assume all the MSRs are already set up
+ * by the CPU and that we still run on the same CPU as the MCE occurred on.
+ * We pass a fake environment to the machine check handler because we want
+ * the guest to be always treated like user space, no matter what context
+ * it used internally.
+ */
+static void kvm_machine_check(void)
+{
+#if defined(CONFIG_X86_MCE)
+ struct pt_regs regs = {
+ .cs = 3, /* Fake ring 3 no matter what the guest ran on */
+ .flags = X86_EFLAGS_IF,
+ };
+
+ do_machine_check(&regs, 0);
+#endif
+}
+
static void svm_handle_mce(struct vcpu_svm *svm)
{
if (is_erratum_383()) {
@@ -1860,11 +1859,7 @@ static void svm_handle_mce(struct vcpu_svm *svm)
* On an #MC intercept the MCE handler is not called automatically in
* the host. So do it by hand here.
*/
- asm volatile (
- "int $0x12\n");
- /* not sure if we ever come back to this point */
-
- return;
+ kvm_machine_check();
}
static int mc_interception(struct vcpu_svm *svm)
@@ -1993,6 +1988,38 @@ static int vmrun_interception(struct vcpu_svm *svm)
return nested_svm_vmrun(svm);
}
+void svm_set_gif(struct vcpu_svm *svm, bool value)
+{
+ if (value) {
+ /*
+ * If VGIF is enabled, the STGI intercept is only added to
+ * detect the opening of the SMI/NMI window; remove it now.
+ * Likewise, clear the VINTR intercept, we will set it
+ * again while processing KVM_REQ_EVENT if needed.
+ */
+ if (vgif_enabled(svm))
+ clr_intercept(svm, INTERCEPT_STGI);
+ if (is_intercept(svm, SVM_EXIT_VINTR))
+ svm_clear_vintr(svm);
+
+ enable_gif(svm);
+ if (svm->vcpu.arch.smi_pending ||
+ svm->vcpu.arch.nmi_pending ||
+ kvm_cpu_has_injectable_intr(&svm->vcpu))
+ kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
+ } else {
+ disable_gif(svm);
+
+ /*
+ * After a CLGI no interrupts should come. But if vGIF is
+ * in use, we still rely on the VINTR intercept (rather than
+ * STGI) to detect an open interrupt window.
+ */
+ if (!vgif_enabled(svm))
+ svm_clear_vintr(svm);
+ }
+}
+
static int stgi_interception(struct vcpu_svm *svm)
{
int ret;
@@ -2000,18 +2027,8 @@ static int stgi_interception(struct vcpu_svm *svm)
if (nested_svm_check_permissions(svm))
return 1;
- /*
- * If VGIF is enabled, the STGI intercept is only added to
- * detect the opening of the SMI/NMI window; remove it now.
- */
- if (vgif_enabled(svm))
- clr_intercept(svm, INTERCEPT_STGI);
-
ret = kvm_skip_emulated_instruction(&svm->vcpu);
- kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
-
- enable_gif(svm);
-
+ svm_set_gif(svm, true);
return ret;
}
@@ -2023,13 +2040,7 @@ static int clgi_interception(struct vcpu_svm *svm)
return 1;
ret = kvm_skip_emulated_instruction(&svm->vcpu);
-
- disable_gif(svm);
-
- /* After a CLGI no interrupts should come */
- if (!kvm_vcpu_apicv_active(&svm->vcpu))
- svm_clear_vintr(svm);
-
+ svm_set_gif(svm, false);
return ret;
}
@@ -2193,7 +2204,7 @@ static bool check_selective_cr0_intercepted(struct vcpu_svm *svm,
bool ret = false;
u64 intercept;
- intercept = svm->nested.intercept;
+ intercept = svm->nested.ctl.intercept;
if (!is_guest_mode(&svm->vcpu) ||
(!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0))))
@@ -2671,8 +2682,6 @@ static int interrupt_window_interception(struct vcpu_svm *svm)
*/
svm_toggle_avic_for_irq_window(&svm->vcpu, true);
- svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
- mark_dirty(svm->vmcb, VMCB_INTR);
++svm->vcpu.stat.irq_window_exits;
return 1;
}
@@ -2898,8 +2907,7 @@ static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
*info2 = control->exit_info_2;
}
-static int handle_exit(struct kvm_vcpu *vcpu,
- enum exit_fastpath_completion exit_fastpath)
+static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct kvm_run *kvm_run = vcpu->run;
@@ -2912,12 +2920,7 @@ static int handle_exit(struct kvm_vcpu *vcpu,
if (npt_enabled)
vcpu->arch.cr3 = svm->vmcb->save.cr3;
- if (unlikely(svm->nested.exit_required)) {
- nested_svm_vmexit(svm);
- svm->nested.exit_required = false;
-
- return 1;
- }
+ svm_complete_interrupts(svm);
if (is_guest_mode(vcpu)) {
int vmexit;
@@ -2938,8 +2941,6 @@ static int handle_exit(struct kvm_vcpu *vcpu,
return 1;
}
- svm_complete_interrupts(svm);
-
if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
kvm_run->fail_entry.hardware_entry_failure_reason
@@ -2957,10 +2958,10 @@ static int handle_exit(struct kvm_vcpu *vcpu,
__func__, svm->vmcb->control.exit_int_info,
exit_code);
- if (exit_fastpath == EXIT_FASTPATH_SKIP_EMUL_INS) {
- kvm_skip_emulated_instruction(vcpu);
+ if (exit_fastpath != EXIT_FASTPATH_NONE)
return 1;
- } else if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
+
+ if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
|| !svm_exit_handlers[exit_code]) {
vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%x\n", exit_code);
dump_vmcb(vcpu);
@@ -3049,18 +3050,37 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
}
-static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
+bool svm_nmi_blocked(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb *vmcb = svm->vmcb;
- int ret;
- ret = !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) &&
- !(svm->vcpu.arch.hflags & HF_NMI_MASK);
- ret = ret && gif_set(svm) && nested_svm_nmi(svm);
+ bool ret;
+
+ if (!gif_set(svm))
+ return true;
+
+ if (is_guest_mode(vcpu) && nested_exit_on_nmi(svm))
+ return false;
+
+ ret = (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) ||
+ (svm->vcpu.arch.hflags & HF_NMI_MASK);
return ret;
}
+static int svm_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ if (svm->nested.nested_run_pending)
+ return -EBUSY;
+
+ /* An NMI must not be injected into L2 if it's supposed to VM-Exit. */
+ if (for_injection && is_guest_mode(vcpu) && nested_exit_on_nmi(svm))
+ return -EBUSY;
+
+ return !svm_nmi_blocked(vcpu);
+}
+
static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -3081,19 +3101,46 @@ static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
}
}
-static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
+bool svm_interrupt_blocked(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb *vmcb = svm->vmcb;
- if (!gif_set(svm) ||
- (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK))
- return 0;
+ if (!gif_set(svm))
+ return true;
- if (is_guest_mode(vcpu) && (svm->vcpu.arch.hflags & HF_VINTR_MASK))
- return !!(svm->vcpu.arch.hflags & HF_HIF_MASK);
- else
- return !!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF);
+ if (is_guest_mode(vcpu)) {
+ /* As long as interrupts are being delivered... */
+ if ((svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK)
+ ? !(svm->nested.hsave->save.rflags & X86_EFLAGS_IF)
+ : !(kvm_get_rflags(vcpu) & X86_EFLAGS_IF))
+ return true;
+
+ /* ... vmexits aren't blocked by the interrupt shadow */
+ if (nested_exit_on_intr(svm))
+ return false;
+ } else {
+ if (!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF))
+ return true;
+ }
+
+ return (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK);
+}
+
+static int svm_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ if (svm->nested.nested_run_pending)
+ return -EBUSY;
+
+ /*
+ * An IRQ must not be injected into L2 if it's supposed to VM-Exit,
+ * e.g. if the IRQ arrived asynchronously after checking nested events.
+ */
+ if (for_injection && is_guest_mode(vcpu) && nested_exit_on_intr(svm))
+ return -EBUSY;
+
+ return !svm_interrupt_blocked(vcpu);
}
static void enable_irq_window(struct kvm_vcpu *vcpu)
@@ -3134,9 +3181,6 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
return; /* STGI will cause a vm exit */
}
- if (svm->nested.exit_required)
- return; /* we're not going to run the guest yet */
-
/*
* Something prevents NMI from been injected. Single step over possible
* problem (IRET or exception injection or interrupt shadow)
@@ -3156,10 +3200,17 @@ static int svm_set_identity_map_addr(struct kvm *kvm, u64 ident_addr)
return 0;
}
-void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
+void svm_flush_tlb(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ /*
+ * Flush only the current ASID even if the TLB flush was invoked via
+ * kvm_flush_remote_tlbs(). Although flushing remote TLBs requires all
+ * ASIDs to be flushed, KVM uses a single ASID for L1 and L2, and
+ * unconditionally does a TLB flush on both nested VM-Enter and nested
+ * VM-Exit (via kvm_mmu_reset_context()).
+ */
if (static_cpu_has(X86_FEATURE_FLUSHBYASID))
svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
else
@@ -3279,10 +3330,21 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu)
svm_complete_interrupts(svm);
}
+static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
+{
+ if (!is_guest_mode(vcpu) &&
+ to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR &&
+ to_svm(vcpu)->vmcb->control.exit_info_1)
+ return handle_fastpath_set_msr_irqoff(vcpu);
+
+ return EXIT_FASTPATH_NONE;
+}
+
void __svm_vcpu_run(unsigned long vmcb_pa, unsigned long *regs);
-static void svm_vcpu_run(struct kvm_vcpu *vcpu)
+static fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
{
+ fastpath_t exit_fastpath;
struct vcpu_svm *svm = to_svm(vcpu);
svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
@@ -3290,13 +3352,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
/*
- * A vmexit emulation is required before the vcpu can be executed
- * again.
- */
- if (unlikely(svm->nested.exit_required))
- return;
-
- /*
* Disable singlestep if we're injecting an interrupt/exception.
* We don't want our modified rflags to be pushed on the stack where
* we might not be able to easily reset them if we disabled NMI
@@ -3387,6 +3442,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
stgi();
/* Any pending NMI will happen here */
+ exit_fastpath = svm_exit_handlers_fastpath(vcpu);
if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
kvm_after_interrupt(&svm->vcpu);
@@ -3394,12 +3450,17 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
sync_cr8_to_lapic(vcpu);
svm->next_rip = 0;
+ if (is_guest_mode(&svm->vcpu)) {
+ sync_nested_vmcb_control(svm);
+ svm->nested.nested_run_pending = 0;
+ }
svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
/* if exit due to PF check for async PF */
if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR)
- svm->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason();
+ svm->vcpu.arch.apf.host_apf_flags =
+ kvm_read_and_reset_apf_flags();
if (npt_enabled) {
vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR);
@@ -3415,12 +3476,12 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
svm_handle_mce(svm);
mark_all_clean(svm->vmcb);
+ return exit_fastpath;
}
static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long root)
{
struct vcpu_svm *svm = to_svm(vcpu);
- bool update_guest_cr3 = true;
unsigned long cr3;
cr3 = __sme_set(root);
@@ -3429,18 +3490,13 @@ static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long root)
mark_dirty(svm->vmcb, VMCB_NPT);
/* Loading L2's CR3 is handled by enter_svm_guest_mode. */
- if (is_guest_mode(vcpu))
- update_guest_cr3 = false;
- else if (test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
- cr3 = vcpu->arch.cr3;
- else /* CR3 is already up-to-date. */
- update_guest_cr3 = false;
+ if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
+ return;
+ cr3 = vcpu->arch.cr3;
}
- if (update_guest_cr3) {
- svm->vmcb->save.cr3 = cr3;
- mark_dirty(svm->vmcb, VMCB_CR);
- }
+ svm->vmcb->save.cr3 = cr3;
+ mark_dirty(svm->vmcb, VMCB_CR);
}
static int is_disabled(void)
@@ -3475,7 +3531,7 @@ static bool svm_cpu_has_accelerated_tpr(void)
return false;
}
-static bool svm_has_emulated_msr(int index)
+static bool svm_has_emulated_msr(u32 index)
{
switch (index) {
case MSR_IA32_MCG_EXT_CTL:
@@ -3628,7 +3684,7 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
info->intercept == x86_intercept_clts)
break;
- intercept = svm->nested.intercept;
+ intercept = svm->nested.ctl.intercept;
if (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0)))
break;
@@ -3716,13 +3772,8 @@ out:
return ret;
}
-static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu,
- enum exit_fastpath_completion *exit_fastpath)
+static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu)
{
- if (!is_guest_mode(vcpu) &&
- to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR &&
- to_svm(vcpu)->vmcb->control.exit_info_1)
- *exit_fastpath = handle_fastpath_set_msr_irqoff(vcpu);
}
static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
@@ -3737,23 +3788,28 @@ static void svm_setup_mce(struct kvm_vcpu *vcpu)
vcpu->arch.mcg_cap &= 0x1ff;
}
-static int svm_smi_allowed(struct kvm_vcpu *vcpu)
+bool svm_smi_blocked(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
/* Per APM Vol.2 15.22.2 "Response to SMI" */
if (!gif_set(svm))
- return 0;
+ return true;
- if (is_guest_mode(&svm->vcpu) &&
- svm->nested.intercept & (1ULL << INTERCEPT_SMI)) {
- /* TODO: Might need to set exit_info_1 and exit_info_2 here */
- svm->vmcb->control.exit_code = SVM_EXIT_SMI;
- svm->nested.exit_required = true;
- return 0;
- }
+ return is_smm(vcpu);
+}
- return 1;
+static int svm_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ if (svm->nested.nested_run_pending)
+ return -EBUSY;
+
+ /* An SMI must not be injected into L2 if it's supposed to VM-Exit. */
+ if (for_injection && is_guest_mode(vcpu) && nested_exit_on_smi(svm))
+ return -EBUSY;
+
+ return !svm_smi_blocked(vcpu);
}
static int svm_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
@@ -3793,12 +3849,13 @@ static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
if (kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb), &map) == -EINVAL)
return 1;
nested_vmcb = map.hva;
- enter_svm_guest_mode(svm, vmcb, nested_vmcb, &map);
+ enter_svm_guest_mode(svm, vmcb, nested_vmcb);
+ kvm_vcpu_unmap(&svm->vcpu, &map, true);
}
return 0;
}
-static int enable_smi_window(struct kvm_vcpu *vcpu)
+static void enable_smi_window(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -3806,9 +3863,9 @@ static int enable_smi_window(struct kvm_vcpu *vcpu)
if (vgif_enabled(svm))
set_intercept(svm, INTERCEPT_STGI);
/* STGI will cause a vm exit */
- return 1;
+ } else {
+ /* We must be in SMM; RSM will cause a vmexit anyway. */
}
- return 0;
}
static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
@@ -3819,6 +3876,13 @@ static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
bool is_user = svm_get_cpl(vcpu) == 3;
/*
+ * If RIP is invalid, go ahead with emulation which will cause an
+ * internal error exit.
+ */
+ if (!kvm_vcpu_gfn_to_memslot(vcpu, kvm_rip_read(vcpu) >> PAGE_SHIFT))
+ return true;
+
+ /*
* Detect and workaround Errata 1096 Fam_17h_00_0Fh.
*
* Errata:
@@ -3876,9 +3940,9 @@ static bool svm_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
/*
* TODO: Last condition latch INIT signals on vCPU when
* vCPU is in guest-mode and vmcb12 defines intercept on INIT.
- * To properly emulate the INIT intercept, SVM should implement
- * kvm_x86_ops.check_nested_events() and call nested_svm_vmexit()
- * there if an INIT signal is pending.
+ * To properly emulate the INIT intercept,
+ * svm_check_nested_events() should call nested_svm_vmexit()
+ * if an INIT signal is pending.
*/
return !gif_set(svm) ||
(svm->vmcb->control.intercept & (1ULL << INTERCEPT_INIT));
@@ -3932,8 +3996,6 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.set_segment = svm_set_segment,
.get_cpl = svm_get_cpl,
.get_cs_db_l_bits = kvm_get_cs_db_l_bits,
- .decache_cr0_guest_bits = svm_decache_cr0_guest_bits,
- .decache_cr4_guest_bits = svm_decache_cr4_guest_bits,
.set_cr0 = svm_set_cr0,
.set_cr4 = svm_set_cr4,
.set_efer = svm_set_efer,
@@ -3947,8 +4009,10 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.get_rflags = svm_get_rflags,
.set_rflags = svm_set_rflags,
- .tlb_flush = svm_flush_tlb,
+ .tlb_flush_all = svm_flush_tlb,
+ .tlb_flush_current = svm_flush_tlb,
.tlb_flush_gva = svm_flush_tlb_gva,
+ .tlb_flush_guest = svm_flush_tlb,
.run = svm_vcpu_run,
.handle_exit = handle_exit,
@@ -3989,7 +4053,6 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.has_wbinvd_exit = svm_has_wbinvd_exit,
- .read_l1_tsc_offset = svm_read_l1_tsc_offset,
.write_l1_tsc_offset = svm_write_l1_tsc_offset,
.load_mmu_pgd = svm_load_mmu_pgd,
@@ -4002,6 +4065,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.sched_in = svm_sched_in,
.pmu_ops = &amd_pmu_ops,
+ .nested_ops = &svm_nested_ops,
+
.deliver_posted_interrupt = svm_deliver_avic_intr,
.dy_apicv_has_pending_interrupt = svm_dy_apicv_has_pending_interrupt,
.update_pi_irte = svm_update_pi_irte,
@@ -4016,14 +4081,9 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.mem_enc_reg_region = svm_register_enc_region,
.mem_enc_unreg_region = svm_unregister_enc_region,
- .nested_enable_evmcs = NULL,
- .nested_get_evmcs_version = NULL,
-
.need_emulation_on_page_fault = svm_need_emulation_on_page_fault,
.apic_init_signal_blocked = svm_apic_init_signal_blocked,
-
- .check_nested_events = svm_check_nested_events,
};
static struct kvm_x86_init_ops svm_init_ops __initdata = {
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index df3474f4fb02..6ac4c00a5d82 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -86,25 +86,17 @@ struct nested_state {
u64 hsave_msr;
u64 vm_cr_msr;
u64 vmcb;
+ u32 host_intercept_exceptions;
/* These are the merged vectors */
u32 *msrpm;
- /* gpa pointers to the real vectors */
- u64 vmcb_msrpm;
- u64 vmcb_iopm;
+ /* A VMRUN has started but has not yet been performed, so
+ * we cannot inject a nested vmexit yet. */
+ bool nested_run_pending;
- /* A VMEXIT is required but not yet emulated */
- bool exit_required;
-
- /* cache for intercepts of the guest */
- u32 intercept_cr;
- u32 intercept_dr;
- u32 intercept_exceptions;
- u64 intercept;
-
- /* Nested Paging related state */
- u64 nested_cr3;
+ /* cache for control fields of the guest */
+ struct vmcb_control_area ctl;
};
struct vcpu_svm {
@@ -360,8 +352,12 @@ u32 svm_msrpm_offset(u32 msr);
void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer);
void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
-void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa);
+void svm_flush_tlb(struct kvm_vcpu *vcpu);
void disable_nmi_singlestep(struct vcpu_svm *svm);
+bool svm_smi_blocked(struct kvm_vcpu *vcpu);
+bool svm_nmi_blocked(struct kvm_vcpu *vcpu);
+bool svm_interrupt_blocked(struct kvm_vcpu *vcpu);
+void svm_set_gif(struct vcpu_svm *svm, bool value);
/* nested.c */
@@ -369,28 +365,31 @@ void disable_nmi_singlestep(struct vcpu_svm *svm);
#define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */
#define NESTED_EXIT_CONTINUE 2 /* Further checks needed */
-/* This function returns true if it is save to enable the nmi window */
-static inline bool nested_svm_nmi(struct vcpu_svm *svm)
+static inline bool svm_nested_virtualize_tpr(struct kvm_vcpu *vcpu)
{
- if (!is_guest_mode(&svm->vcpu))
- return true;
+ struct vcpu_svm *svm = to_svm(vcpu);
- if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI)))
- return true;
+ return is_guest_mode(vcpu) && (svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK);
+}
- svm->vmcb->control.exit_code = SVM_EXIT_NMI;
- svm->nested.exit_required = true;
+static inline bool nested_exit_on_smi(struct vcpu_svm *svm)
+{
+ return (svm->nested.ctl.intercept & (1ULL << INTERCEPT_SMI));
+}
- return false;
+static inline bool nested_exit_on_intr(struct vcpu_svm *svm)
+{
+ return (svm->nested.ctl.intercept & (1ULL << INTERCEPT_INTR));
}
-static inline bool svm_nested_virtualize_tpr(struct kvm_vcpu *vcpu)
+static inline bool nested_exit_on_nmi(struct vcpu_svm *svm)
{
- return is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK);
+ return (svm->nested.ctl.intercept & (1ULL << INTERCEPT_NMI));
}
void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
- struct vmcb *nested_vmcb, struct kvm_host_map *map);
+ struct vmcb *nested_vmcb);
+void svm_leave_nested(struct vcpu_svm *svm);
int nested_svm_vmrun(struct vcpu_svm *svm);
void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb);
int nested_svm_vmexit(struct vcpu_svm *svm);
@@ -398,8 +397,10 @@ int nested_svm_exit_handled(struct vcpu_svm *svm);
int nested_svm_check_permissions(struct vcpu_svm *svm);
int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
bool has_error_code, u32 error_code);
-int svm_check_nested_events(struct kvm_vcpu *vcpu);
int nested_svm_exit_special(struct vcpu_svm *svm);
+void sync_nested_vmcb_control(struct vcpu_svm *svm);
+
+extern struct kvm_x86_nested_ops svm_nested_ops;
/* avic.c */