summaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/svm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/svm')
-rw-r--r--arch/x86/kvm/svm/avic.c24
-rw-r--r--arch/x86/kvm/svm/nested.c573
-rw-r--r--arch/x86/kvm/svm/sev.c922
-rw-r--r--arch/x86/kvm/svm/svm.c1107
-rw-r--r--arch/x86/kvm/svm/svm.h91
-rw-r--r--arch/x86/kvm/svm/vmenter.S47
6 files changed, 1672 insertions, 1092 deletions
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 3e55674098be..712b4e0de481 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -270,7 +270,7 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu)
if (id >= AVIC_MAX_PHYSICAL_ID_COUNT)
return -EINVAL;
- if (!svm->vcpu.arch.apic->regs)
+ if (!vcpu->arch.apic->regs)
return -EINVAL;
if (kvm_apicv_activated(vcpu->kvm)) {
@@ -281,7 +281,7 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu)
return ret;
}
- svm->avic_backing_page = virt_to_page(svm->vcpu.arch.apic->regs);
+ svm->avic_backing_page = virt_to_page(vcpu->arch.apic->regs);
/* Setting AVIC backing page address in the phy APIC ID table */
entry = avic_get_physical_id_entry(vcpu, id);
@@ -315,15 +315,16 @@ static void avic_kick_target_vcpus(struct kvm *kvm, struct kvm_lapic *source,
}
}
-int avic_incomplete_ipi_interception(struct vcpu_svm *svm)
+int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu)
{
+ struct vcpu_svm *svm = to_svm(vcpu);
u32 icrh = svm->vmcb->control.exit_info_1 >> 32;
u32 icrl = svm->vmcb->control.exit_info_1;
u32 id = svm->vmcb->control.exit_info_2 >> 32;
u32 index = svm->vmcb->control.exit_info_2 & 0xFF;
- struct kvm_lapic *apic = svm->vcpu.arch.apic;
+ struct kvm_lapic *apic = vcpu->arch.apic;
- trace_kvm_avic_incomplete_ipi(svm->vcpu.vcpu_id, icrh, icrl, id, index);
+ trace_kvm_avic_incomplete_ipi(vcpu->vcpu_id, icrh, icrl, id, index);
switch (id) {
case AVIC_IPI_FAILURE_INVALID_INT_TYPE:
@@ -347,11 +348,11 @@ int avic_incomplete_ipi_interception(struct vcpu_svm *svm)
* set the appropriate IRR bits on the valid target
* vcpus. So, we just need to kick the appropriate vcpu.
*/
- avic_kick_target_vcpus(svm->vcpu.kvm, apic, icrl, icrh);
+ avic_kick_target_vcpus(vcpu->kvm, apic, icrl, icrh);
break;
case AVIC_IPI_FAILURE_INVALID_TARGET:
WARN_ONCE(1, "Invalid IPI target: index=%u, vcpu=%d, icr=%#0x:%#0x\n",
- index, svm->vcpu.vcpu_id, icrh, icrl);
+ index, vcpu->vcpu_id, icrh, icrl);
break;
case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
WARN_ONCE(1, "Invalid backing page\n");
@@ -539,8 +540,9 @@ static bool is_avic_unaccelerated_access_trap(u32 offset)
return ret;
}
-int avic_unaccelerated_access_interception(struct vcpu_svm *svm)
+int avic_unaccelerated_access_interception(struct kvm_vcpu *vcpu)
{
+ struct vcpu_svm *svm = to_svm(vcpu);
int ret = 0;
u32 offset = svm->vmcb->control.exit_info_1 &
AVIC_UNACCEL_ACCESS_OFFSET_MASK;
@@ -550,7 +552,7 @@ int avic_unaccelerated_access_interception(struct vcpu_svm *svm)
AVIC_UNACCEL_ACCESS_WRITE_MASK;
bool trap = is_avic_unaccelerated_access_trap(offset);
- trace_kvm_avic_unaccelerated_access(svm->vcpu.vcpu_id, offset,
+ trace_kvm_avic_unaccelerated_access(vcpu->vcpu_id, offset,
trap, write, vector);
if (trap) {
/* Handling Trap */
@@ -558,7 +560,7 @@ int avic_unaccelerated_access_interception(struct vcpu_svm *svm)
ret = avic_unaccel_trap_write(svm);
} else {
/* Handling Fault */
- ret = kvm_emulate_instruction(&svm->vcpu, 0);
+ ret = kvm_emulate_instruction(vcpu, 0);
}
return ret;
@@ -572,7 +574,7 @@ int avic_init_vcpu(struct vcpu_svm *svm)
if (!avic || !irqchip_in_kernel(vcpu->kvm))
return 0;
- ret = avic_init_backing_page(&svm->vcpu);
+ ret = avic_init_backing_page(vcpu);
if (ret)
return ret;
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index fb204eaa8bb3..540d43ba2cf4 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -29,6 +29,8 @@
#include "lapic.h"
#include "svm.h"
+#define CC KVM_NESTED_VMENTER_CONSISTENCY_CHECK
+
static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
struct x86_exception *fault)
{
@@ -92,12 +94,12 @@ static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu)
static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- struct vmcb *hsave = svm->nested.hsave;
WARN_ON(mmu_is_nested(vcpu));
vcpu->arch.mmu = &vcpu->arch.guest_mmu;
- kvm_init_shadow_npt_mmu(vcpu, X86_CR0_PG, hsave->save.cr4, hsave->save.efer,
+ kvm_init_shadow_npt_mmu(vcpu, X86_CR0_PG, svm->vmcb01.ptr->save.cr4,
+ svm->vmcb01.ptr->save.efer,
svm->nested.ctl.nested_cr3);
vcpu->arch.mmu->get_guest_pgd = nested_svm_get_tdp_cr3;
vcpu->arch.mmu->get_pdptr = nested_svm_get_tdp_pdptr;
@@ -123,7 +125,7 @@ void recalc_intercepts(struct vcpu_svm *svm)
return;
c = &svm->vmcb->control;
- h = &svm->nested.hsave->control;
+ h = &svm->vmcb01.ptr->control;
g = &svm->nested.ctl;
for (i = 0; i < MAX_INTERCEPT; i++)
@@ -213,44 +215,64 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
return true;
}
-static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu)
+/*
+ * Bits 11:0 of bitmap address are ignored by hardware
+ */
+static bool nested_svm_check_bitmap_pa(struct kvm_vcpu *vcpu, u64 pa, u32 size)
{
- struct vcpu_svm *svm = to_svm(vcpu);
+ u64 addr = PAGE_ALIGN(pa);
- if (WARN_ON(!is_guest_mode(vcpu)))
- return true;
-
- if (!nested_svm_vmrun_msrpm(svm)) {
- vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
- vcpu->run->internal.suberror =
- KVM_INTERNAL_ERROR_EMULATION;
- vcpu->run->internal.ndata = 0;
- return false;
- }
-
- return true;
+ return kvm_vcpu_is_legal_gpa(vcpu, addr) &&
+ kvm_vcpu_is_legal_gpa(vcpu, addr + size - 1);
}
-static bool nested_vmcb_check_controls(struct vmcb_control_area *control)
+static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
+ struct vmcb_control_area *control)
{
- if ((vmcb_is_intercept(control, INTERCEPT_VMRUN)) == 0)
+ if (CC(!vmcb_is_intercept(control, INTERCEPT_VMRUN)))
return false;
- if (control->asid == 0)
+ if (CC(control->asid == 0))
return false;
- if ((control->nested_ctl & SVM_NESTED_CTL_NP_ENABLE) &&
- !npt_enabled)
+ if (CC((control->nested_ctl & SVM_NESTED_CTL_NP_ENABLE) && !npt_enabled))
+ return false;
+
+ if (CC(!nested_svm_check_bitmap_pa(vcpu, control->msrpm_base_pa,
+ MSRPM_SIZE)))
+ return false;
+ if (CC(!nested_svm_check_bitmap_pa(vcpu, control->iopm_base_pa,
+ IOPM_SIZE)))
return false;
return true;
}
-static bool nested_vmcb_check_save(struct vcpu_svm *svm, struct vmcb *vmcb12)
+static bool nested_vmcb_check_cr3_cr4(struct kvm_vcpu *vcpu,
+ struct vmcb_save_area *save)
{
- struct kvm_vcpu *vcpu = &svm->vcpu;
- bool vmcb12_lma;
+ /*
+ * These checks are also performed by KVM_SET_SREGS,
+ * except that EFER.LMA is not checked by SVM against
+ * CR0.PG && EFER.LME.
+ */
+ if ((save->efer & EFER_LME) && (save->cr0 & X86_CR0_PG)) {
+ if (CC(!(save->cr4 & X86_CR4_PAE)) ||
+ CC(!(save->cr0 & X86_CR0_PE)) ||
+ CC(kvm_vcpu_is_illegal_gpa(vcpu, save->cr3)))
+ return false;
+ }
+
+ if (CC(!kvm_is_valid_cr4(vcpu, save->cr4)))
+ return false;
+
+ return true;
+}
+/* Common checks that apply to both L1 and L2 state. */
+static bool nested_vmcb_valid_sregs(struct kvm_vcpu *vcpu,
+ struct vmcb_save_area *save)
+{
/*
* FIXME: these should be done after copying the fields,
* to avoid TOC/TOU races. For these save area checks
@@ -258,31 +280,27 @@ static bool nested_vmcb_check_save(struct vcpu_svm *svm, struct vmcb *vmcb12)
* kvm_set_cr4 handle failure; EFER_SVME is an exception
* so it is force-set later in nested_prepare_vmcb_save.
*/
- if ((vmcb12->save.efer & EFER_SVME) == 0)
+ if (CC(!(save->efer & EFER_SVME)))
return false;
- if (((vmcb12->save.cr0 & X86_CR0_CD) == 0) && (vmcb12->save.cr0 & X86_CR0_NW))
+ if (CC((save->cr0 & X86_CR0_CD) == 0 && (save->cr0 & X86_CR0_NW)) ||
+ CC(save->cr0 & ~0xffffffffULL))
return false;
- if (!kvm_dr6_valid(vmcb12->save.dr6) || !kvm_dr7_valid(vmcb12->save.dr7))
+ if (CC(!kvm_dr6_valid(save->dr6)) || CC(!kvm_dr7_valid(save->dr7)))
return false;
- vmcb12_lma = (vmcb12->save.efer & EFER_LME) && (vmcb12->save.cr0 & X86_CR0_PG);
+ if (!nested_vmcb_check_cr3_cr4(vcpu, save))
+ return false;
- if (vmcb12_lma) {
- if (!(vmcb12->save.cr4 & X86_CR4_PAE) ||
- !(vmcb12->save.cr0 & X86_CR0_PE) ||
- kvm_vcpu_is_illegal_gpa(vcpu, vmcb12->save.cr3))
- return false;
- }
- if (!kvm_is_valid_cr4(&svm->vcpu, vmcb12->save.cr4))
+ if (CC(!kvm_valid_efer(vcpu, save->efer)))
return false;
return true;
}
-static void load_nested_vmcb_control(struct vcpu_svm *svm,
- struct vmcb_control_area *control)
+static void nested_load_control_from_vmcb12(struct vcpu_svm *svm,
+ struct vmcb_control_area *control)
{
copy_vmcb_control_area(&svm->nested.ctl, control);
@@ -294,9 +312,9 @@ static void load_nested_vmcb_control(struct vcpu_svm *svm,
/*
* Synchronize fields that are written by the processor, so that
- * they can be copied back into the nested_vmcb.
+ * they can be copied back into the vmcb12.
*/
-void sync_nested_vmcb_control(struct vcpu_svm *svm)
+void nested_sync_control_from_vmcb02(struct vcpu_svm *svm)
{
u32 mask;
svm->nested.ctl.event_inj = svm->vmcb->control.event_inj;
@@ -324,8 +342,8 @@ void sync_nested_vmcb_control(struct vcpu_svm *svm)
* Transfer any event that L0 or L1 wanted to inject into L2 to
* EXIT_INT_INFO.
*/
-static void nested_vmcb_save_pending_event(struct vcpu_svm *svm,
- struct vmcb *vmcb12)
+static void nested_save_pending_event_to_vmcb12(struct vcpu_svm *svm,
+ struct vmcb *vmcb12)
{
struct kvm_vcpu *vcpu = &svm->vcpu;
u32 exit_int_info = 0;
@@ -369,12 +387,12 @@ static inline bool nested_npt_enabled(struct vcpu_svm *svm)
static int nested_svm_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
bool nested_npt)
{
- if (kvm_vcpu_is_illegal_gpa(vcpu, cr3))
+ if (CC(kvm_vcpu_is_illegal_gpa(vcpu, cr3)))
return -EINVAL;
if (!nested_npt && is_pae_paging(vcpu) &&
(cr3 != kvm_read_cr3(vcpu) || pdptrs_changed(vcpu))) {
- if (!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
+ if (CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)))
return -EINVAL;
}
@@ -393,15 +411,42 @@ static int nested_svm_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
return 0;
}
-static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *vmcb12)
+void nested_vmcb02_compute_g_pat(struct vcpu_svm *svm)
{
+ if (!svm->nested.vmcb02.ptr)
+ return;
+
+ /* FIXME: merge g_pat from vmcb01 and vmcb12. */
+ svm->nested.vmcb02.ptr->save.g_pat = svm->vmcb01.ptr->save.g_pat;
+}
+
+static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12)
+{
+ bool new_vmcb12 = false;
+
+ nested_vmcb02_compute_g_pat(svm);
+
/* Load the nested guest state */
- svm->vmcb->save.es = vmcb12->save.es;
- svm->vmcb->save.cs = vmcb12->save.cs;
- svm->vmcb->save.ss = vmcb12->save.ss;
- svm->vmcb->save.ds = vmcb12->save.ds;
- svm->vmcb->save.gdtr = vmcb12->save.gdtr;
- svm->vmcb->save.idtr = vmcb12->save.idtr;
+ if (svm->nested.vmcb12_gpa != svm->nested.last_vmcb12_gpa) {
+ new_vmcb12 = true;
+ svm->nested.last_vmcb12_gpa = svm->nested.vmcb12_gpa;
+ }
+
+ if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_SEG))) {
+ svm->vmcb->save.es = vmcb12->save.es;
+ svm->vmcb->save.cs = vmcb12->save.cs;
+ svm->vmcb->save.ss = vmcb12->save.ss;
+ svm->vmcb->save.ds = vmcb12->save.ds;
+ svm->vmcb->save.cpl = vmcb12->save.cpl;
+ vmcb_mark_dirty(svm->vmcb, VMCB_SEG);
+ }
+
+ if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_DT))) {
+ svm->vmcb->save.gdtr = vmcb12->save.gdtr;
+ svm->vmcb->save.idtr = vmcb12->save.idtr;
+ vmcb_mark_dirty(svm->vmcb, VMCB_DT);
+ }
+
kvm_set_rflags(&svm->vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED);
/*
@@ -413,7 +458,9 @@ static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *vmcb12)
svm_set_cr0(&svm->vcpu, vmcb12->save.cr0);
svm_set_cr4(&svm->vcpu, vmcb12->save.cr4);
- svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = vmcb12->save.cr2;
+
+ svm->vcpu.arch.cr2 = vmcb12->save.cr2;
+
kvm_rax_write(&svm->vcpu, vmcb12->save.rax);
kvm_rsp_write(&svm->vcpu, vmcb12->save.rsp);
kvm_rip_write(&svm->vcpu, vmcb12->save.rip);
@@ -422,15 +469,41 @@ static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *vmcb12)
svm->vmcb->save.rax = vmcb12->save.rax;
svm->vmcb->save.rsp = vmcb12->save.rsp;
svm->vmcb->save.rip = vmcb12->save.rip;
- svm->vmcb->save.dr7 = vmcb12->save.dr7 | DR7_FIXED_1;
- svm->vcpu.arch.dr6 = vmcb12->save.dr6 | DR6_ACTIVE_LOW;
- svm->vmcb->save.cpl = vmcb12->save.cpl;
+
+ /* These bits will be set properly on the first execution when new_vmc12 is true */
+ if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_DR))) {
+ svm->vmcb->save.dr7 = vmcb12->save.dr7 | DR7_FIXED_1;
+ svm->vcpu.arch.dr6 = vmcb12->save.dr6 | DR6_ACTIVE_LOW;
+ vmcb_mark_dirty(svm->vmcb, VMCB_DR);
+ }
}
-static void nested_prepare_vmcb_control(struct vcpu_svm *svm)
+static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
{
const u32 mask = V_INTR_MASKING_MASK | V_GIF_ENABLE_MASK | V_GIF_MASK;
+ /*
+ * Filled at exit: exit_code, exit_code_hi, exit_info_1, exit_info_2,
+ * exit_int_info, exit_int_info_err, next_rip, insn_len, insn_bytes.
+ */
+
+ /*
+ * Also covers avic_vapic_bar, avic_backing_page, avic_logical_id,
+ * avic_physical_id.
+ */
+ WARN_ON(svm->vmcb01.ptr->control.int_ctl & AVIC_ENABLE_MASK);
+
+ /* Copied from vmcb01. msrpm_base can be overwritten later. */
+ svm->vmcb->control.nested_ctl = svm->vmcb01.ptr->control.nested_ctl;
+ svm->vmcb->control.iopm_base_pa = svm->vmcb01.ptr->control.iopm_base_pa;
+ svm->vmcb->control.msrpm_base_pa = svm->vmcb01.ptr->control.msrpm_base_pa;
+
+ /* Done at vmrun: asid. */
+
+ /* Also overwritten later if necessary. */
+ svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
+
+ /* nested_cr3. */
if (nested_npt_enabled(svm))
nested_svm_init_mmu_context(&svm->vcpu);
@@ -439,7 +512,7 @@ static void nested_prepare_vmcb_control(struct vcpu_svm *svm)
svm->vmcb->control.int_ctl =
(svm->nested.ctl.int_ctl & ~mask) |
- (svm->nested.hsave->control.int_ctl & mask);
+ (svm->vmcb01.ptr->control.int_ctl & mask);
svm->vmcb->control.virt_ext = svm->nested.ctl.virt_ext;
svm->vmcb->control.int_vector = svm->nested.ctl.int_vector;
@@ -454,17 +527,28 @@ static void nested_prepare_vmcb_control(struct vcpu_svm *svm)
enter_guest_mode(&svm->vcpu);
/*
- * Merge guest and host intercepts - must be called with vcpu in
- * guest-mode to take affect here
+ * Merge guest and host intercepts - must be called with vcpu in
+ * guest-mode to take effect.
*/
recalc_intercepts(svm);
+}
- vmcb_mark_all_dirty(svm->vmcb);
+static void nested_svm_copy_common_state(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
+{
+ /*
+ * Some VMCB state is shared between L1 and L2 and thus has to be
+ * moved at the time of nested vmrun and vmexit.
+ *
+ * VMLOAD/VMSAVE state would also belong in this category, but KVM
+ * always performs VMLOAD and VMSAVE from the VMCB01.
+ */
+ to_vmcb->save.spec_ctrl = from_vmcb->save.spec_ctrl;
}
-int enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb12_gpa,
+int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa,
struct vmcb *vmcb12)
{
+ struct vcpu_svm *svm = to_svm(vcpu);
int ret;
trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb12_gpa,
@@ -482,8 +566,14 @@ int enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb12_gpa,
svm->nested.vmcb12_gpa = vmcb12_gpa;
- nested_prepare_vmcb_control(svm);
- nested_prepare_vmcb_save(svm, vmcb12);
+
+ WARN_ON(svm->vmcb == svm->nested.vmcb02.ptr);
+
+ nested_svm_copy_common_state(svm->vmcb01.ptr, svm->nested.vmcb02.ptr);
+
+ svm_switch_vmcb(svm, &svm->nested.vmcb02);
+ nested_vmcb02_prepare_control(svm);
+ nested_vmcb02_prepare_save(svm, vmcb12);
ret = nested_svm_load_cr3(&svm->vcpu, vmcb12->save.cr3,
nested_npt_enabled(svm));
@@ -491,47 +581,48 @@ int enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb12_gpa,
return ret;
if (!npt_enabled)
- svm->vcpu.arch.mmu->inject_page_fault = svm_inject_page_fault_nested;
+ vcpu->arch.mmu->inject_page_fault = svm_inject_page_fault_nested;
svm_set_gif(svm, true);
return 0;
}
-int nested_svm_vmrun(struct vcpu_svm *svm)
+int nested_svm_vmrun(struct kvm_vcpu *vcpu)
{
+ struct vcpu_svm *svm = to_svm(vcpu);
int ret;
struct vmcb *vmcb12;
- struct vmcb *hsave = svm->nested.hsave;
- struct vmcb *vmcb = svm->vmcb;
struct kvm_host_map map;
u64 vmcb12_gpa;
- if (is_smm(&svm->vcpu)) {
- kvm_queue_exception(&svm->vcpu, UD_VECTOR);
+ ++vcpu->stat.nested_run;
+
+ if (is_smm(vcpu)) {
+ kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
vmcb12_gpa = svm->vmcb->save.rax;
- ret = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb12_gpa), &map);
+ ret = kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map);
if (ret == -EINVAL) {
- kvm_inject_gp(&svm->vcpu, 0);
+ kvm_inject_gp(vcpu, 0);
return 1;
} else if (ret) {
- return kvm_skip_emulated_instruction(&svm->vcpu);
+ return kvm_skip_emulated_instruction(vcpu);
}
- ret = kvm_skip_emulated_instruction(&svm->vcpu);
+ ret = kvm_skip_emulated_instruction(vcpu);
vmcb12 = map.hva;
if (WARN_ON_ONCE(!svm->nested.initialized))
return -EINVAL;
- load_nested_vmcb_control(svm, &vmcb12->control);
+ nested_load_control_from_vmcb12(svm, &vmcb12->control);
- if (!nested_vmcb_check_save(svm, vmcb12) ||
- !nested_vmcb_check_controls(&svm->nested.ctl)) {
+ if (!nested_vmcb_valid_sregs(vcpu, &vmcb12->save) ||
+ !nested_vmcb_check_controls(vcpu, &svm->nested.ctl)) {
vmcb12->control.exit_code = SVM_EXIT_ERR;
vmcb12->control.exit_code_hi = 0;
vmcb12->control.exit_info_1 = 0;
@@ -541,36 +632,25 @@ int nested_svm_vmrun(struct vcpu_svm *svm)
/* Clear internal status */
- kvm_clear_exception_queue(&svm->vcpu);
- kvm_clear_interrupt_queue(&svm->vcpu);
+ kvm_clear_exception_queue(vcpu);
+ kvm_clear_interrupt_queue(vcpu);
/*
- * Save the old vmcb, so we don't need to pick what we save, but can
- * restore everything when a VMEXIT occurs
+ * Since vmcb01 is not in use, we can use it to store some of the L1
+ * state.
*/
- hsave->save.es = vmcb->save.es;
- hsave->save.cs = vmcb->save.cs;
- hsave->save.ss = vmcb->save.ss;
- hsave->save.ds = vmcb->save.ds;
- hsave->save.gdtr = vmcb->save.gdtr;
- hsave->save.idtr = vmcb->save.idtr;
- hsave->save.efer = svm->vcpu.arch.efer;
- hsave->save.cr0 = kvm_read_cr0(&svm->vcpu);
- hsave->save.cr4 = svm->vcpu.arch.cr4;
- hsave->save.rflags = kvm_get_rflags(&svm->vcpu);
- hsave->save.rip = kvm_rip_read(&svm->vcpu);
- hsave->save.rsp = vmcb->save.rsp;
- hsave->save.rax = vmcb->save.rax;
- if (npt_enabled)
- hsave->save.cr3 = vmcb->save.cr3;
- else
- hsave->save.cr3 = kvm_read_cr3(&svm->vcpu);
-
- copy_vmcb_control_area(&hsave->control, &vmcb->control);
+ svm->vmcb01.ptr->save.efer = vcpu->arch.efer;
+ svm->vmcb01.ptr->save.cr0 = kvm_read_cr0(vcpu);
+ svm->vmcb01.ptr->save.cr4 = vcpu->arch.cr4;
+ svm->vmcb01.ptr->save.rflags = kvm_get_rflags(vcpu);
+ svm->vmcb01.ptr->save.rip = kvm_rip_read(vcpu);
+
+ if (!npt_enabled)
+ svm->vmcb01.ptr->save.cr3 = kvm_read_cr3(vcpu);
svm->nested.nested_run_pending = 1;
- if (enter_svm_guest_mode(svm, vmcb12_gpa, vmcb12))
+ if (enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12))
goto out_exit_err;
if (nested_svm_vmrun_msrpm(svm))
@@ -587,7 +667,7 @@ out_exit_err:
nested_svm_vmexit(svm);
out:
- kvm_vcpu_unmap(&svm->vcpu, &map, true);
+ kvm_vcpu_unmap(vcpu, &map, true);
return ret;
}
@@ -610,27 +690,30 @@ void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
int nested_svm_vmexit(struct vcpu_svm *svm)
{
- int rc;
+ struct kvm_vcpu *vcpu = &svm->vcpu;
struct vmcb *vmcb12;
- struct vmcb *hsave = svm->nested.hsave;
struct vmcb *vmcb = svm->vmcb;
struct kvm_host_map map;
+ int rc;
- rc = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(svm->nested.vmcb12_gpa), &map);
+ /* Triple faults in L2 should never escape. */
+ WARN_ON_ONCE(kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu));
+
+ rc = kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.vmcb12_gpa), &map);
if (rc) {
if (rc == -EINVAL)
- kvm_inject_gp(&svm->vcpu, 0);
+ kvm_inject_gp(vcpu, 0);
return 1;
}
vmcb12 = map.hva;
/* Exit Guest-Mode */
- leave_guest_mode(&svm->vcpu);
+ leave_guest_mode(vcpu);
svm->nested.vmcb12_gpa = 0;
WARN_ON_ONCE(svm->nested.nested_run_pending);
- kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, &svm->vcpu);
+ kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
/* in case we halted in L2 */
svm->vcpu.arch.mp_state = KVM_MP_STATE_RUNNABLE;
@@ -644,14 +727,14 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
vmcb12->save.gdtr = vmcb->save.gdtr;
vmcb12->save.idtr = vmcb->save.idtr;
vmcb12->save.efer = svm->vcpu.arch.efer;
- vmcb12->save.cr0 = kvm_read_cr0(&svm->vcpu);
- vmcb12->save.cr3 = kvm_read_cr3(&svm->vcpu);
+ vmcb12->save.cr0 = kvm_read_cr0(vcpu);
+ vmcb12->save.cr3 = kvm_read_cr3(vcpu);
vmcb12->save.cr2 = vmcb->save.cr2;
vmcb12->save.cr4 = svm->vcpu.arch.cr4;
- vmcb12->save.rflags = kvm_get_rflags(&svm->vcpu);
- vmcb12->save.rip = kvm_rip_read(&svm->vcpu);
- vmcb12->save.rsp = kvm_rsp_read(&svm->vcpu);
- vmcb12->save.rax = kvm_rax_read(&svm->vcpu);
+ vmcb12->save.rflags = kvm_get_rflags(vcpu);
+ vmcb12->save.rip = kvm_rip_read(vcpu);
+ vmcb12->save.rsp = kvm_rsp_read(vcpu);
+ vmcb12->save.rax = kvm_rax_read(vcpu);
vmcb12->save.dr7 = vmcb->save.dr7;
vmcb12->save.dr6 = svm->vcpu.arch.dr6;
vmcb12->save.cpl = vmcb->save.cpl;
@@ -663,7 +746,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
vmcb12->control.exit_info_2 = vmcb->control.exit_info_2;
if (vmcb12->control.exit_code != SVM_EXIT_ERR)
- nested_vmcb_save_pending_event(svm, vmcb12);
+ nested_save_pending_event_to_vmcb12(svm, vmcb12);
if (svm->nrips_enabled)
vmcb12->control.next_rip = vmcb->control.next_rip;
@@ -678,37 +761,39 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
vmcb12->control.pause_filter_thresh =
svm->vmcb->control.pause_filter_thresh;
- /* Restore the original control entries */
- copy_vmcb_control_area(&vmcb->control, &hsave->control);
+ nested_svm_copy_common_state(svm->nested.vmcb02.ptr, svm->vmcb01.ptr);
+
+ svm_switch_vmcb(svm, &svm->vmcb01);
+ WARN_ON_ONCE(svm->vmcb->control.exit_code != SVM_EXIT_VMRUN);
- /* On vmexit the GIF is set to false */
+ /*
+ * On vmexit the GIF is set to false and
+ * no event can be injected in L1.
+ */
svm_set_gif(svm, false);
+ svm->vmcb->control.exit_int_info = 0;
- svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset =
- svm->vcpu.arch.l1_tsc_offset;
+ svm->vcpu.arch.tsc_offset = svm->vcpu.arch.l1_tsc_offset;
+ if (svm->vmcb->control.tsc_offset != svm->vcpu.arch.tsc_offset) {
+ svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset;
+ vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
+ }
svm->nested.ctl.nested_cr3 = 0;
- /* Restore selected save entries */
- svm->vmcb->save.es = hsave->save.es;
- svm->vmcb->save.cs = hsave->save.cs;
- svm->vmcb->save.ss = hsave->save.ss;
- svm->vmcb->save.ds = hsave->save.ds;
- svm->vmcb->save.gdtr = hsave->save.gdtr;
- svm->vmcb->save.idtr = hsave->save.idtr;
- kvm_set_rflags(&svm->vcpu, hsave->save.rflags);
- kvm_set_rflags(&svm->vcpu, hsave->save.rflags | X86_EFLAGS_FIXED);
- svm_set_efer(&svm->vcpu, hsave->save.efer);
- svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE);
- svm_set_cr4(&svm->vcpu, hsave->save.cr4);
- kvm_rax_write(&svm->vcpu, hsave->save.rax);
- kvm_rsp_write(&svm->vcpu, hsave->save.rsp);
- kvm_rip_write(&svm->vcpu, hsave->save.rip);
- svm->vmcb->save.dr7 = DR7_FIXED_1;
- svm->vmcb->save.cpl = 0;
- svm->vmcb->control.exit_int_info = 0;
+ /*
+ * Restore processor state that had been saved in vmcb01
+ */
+ kvm_set_rflags(vcpu, svm->vmcb->save.rflags);
+ svm_set_efer(vcpu, svm->vmcb->save.efer);
+ svm_set_cr0(vcpu, svm->vmcb->save.cr0 | X86_CR0_PE);
+ svm_set_cr4(vcpu, svm->vmcb->save.cr4);
+ kvm_rax_write(vcpu, svm->vmcb->save.rax);
+ kvm_rsp_write(vcpu, svm->vmcb->save.rsp);
+ kvm_rip_write(vcpu, svm->vmcb->save.rip);
- vmcb_mark_all_dirty(svm->vmcb);
+ svm->vcpu.arch.dr7 = DR7_FIXED_1;
+ kvm_update_dr7(&svm->vcpu);
trace_kvm_nested_vmexit_inject(vmcb12->control.exit_code,
vmcb12->control.exit_info_1,
@@ -717,50 +802,62 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
vmcb12->control.exit_int_info_err,
KVM_ISA_SVM);
- kvm_vcpu_unmap(&svm->vcpu, &map, true);
+ kvm_vcpu_unmap(vcpu, &map, true);
- nested_svm_uninit_mmu_context(&svm->vcpu);
+ nested_svm_uninit_mmu_context(vcpu);
- rc = nested_svm_load_cr3(&svm->vcpu, hsave->save.cr3, false);
+ rc = nested_svm_load_cr3(vcpu, svm->vmcb->save.cr3, false);
if (rc)
return 1;
- if (npt_enabled)
- svm->vmcb->save.cr3 = hsave->save.cr3;
-
/*
* Drop what we picked up for L2 via svm_complete_interrupts() so it
* doesn't end up in L1.
*/
svm->vcpu.arch.nmi_injected = false;
- kvm_clear_exception_queue(&svm->vcpu);
- kvm_clear_interrupt_queue(&svm->vcpu);
+ kvm_clear_exception_queue(vcpu);
+ kvm_clear_interrupt_queue(vcpu);
+
+ /*
+ * If we are here following the completion of a VMRUN that
+ * is being single-stepped, queue the pending #DB intercept
+ * right now so that it an be accounted for before we execute
+ * L1's next instruction.
+ */
+ if (unlikely(svm->vmcb->save.rflags & X86_EFLAGS_TF))
+ kvm_queue_exception(&(svm->vcpu), DB_VECTOR);
return 0;
}
+static void nested_svm_triple_fault(struct kvm_vcpu *vcpu)
+{
+ nested_svm_simple_vmexit(to_svm(vcpu), SVM_EXIT_SHUTDOWN);
+}
+
int svm_allocate_nested(struct vcpu_svm *svm)
{
- struct page *hsave_page;
+ struct page *vmcb02_page;
if (svm->nested.initialized)
return 0;
- hsave_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
- if (!hsave_page)
+ vmcb02_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+ if (!vmcb02_page)
return -ENOMEM;
- svm->nested.hsave = page_address(hsave_page);
+ svm->nested.vmcb02.ptr = page_address(vmcb02_page);
+ svm->nested.vmcb02.pa = __sme_set(page_to_pfn(vmcb02_page) << PAGE_SHIFT);
svm->nested.msrpm = svm_vcpu_alloc_msrpm();
if (!svm->nested.msrpm)
- goto err_free_hsave;
+ goto err_free_vmcb02;
svm_vcpu_init_msrpm(&svm->vcpu, svm->nested.msrpm);
svm->nested.initialized = true;
return 0;
-err_free_hsave:
- __free_page(hsave_page);
+err_free_vmcb02:
+ __free_page(vmcb02_page);
return -ENOMEM;
}
@@ -772,8 +869,8 @@ void svm_free_nested(struct vcpu_svm *svm)
svm_vcpu_free_msrpm(svm->nested.msrpm);
svm->nested.msrpm = NULL;
- __free_page(virt_to_page(svm->nested.hsave));
- svm->nested.hsave = NULL;
+ __free_page(virt_to_page(svm->nested.vmcb02.ptr));
+ svm->nested.vmcb02.ptr = NULL;
svm->nested.initialized = false;
}
@@ -783,18 +880,19 @@ void svm_free_nested(struct vcpu_svm *svm)
*/
void svm_leave_nested(struct vcpu_svm *svm)
{
- if (is_guest_mode(&svm->vcpu)) {
- struct vmcb *hsave = svm->nested.hsave;
- struct vmcb *vmcb = svm->vmcb;
+ struct kvm_vcpu *vcpu = &svm->vcpu;
+ if (is_guest_mode(vcpu)) {
svm->nested.nested_run_pending = 0;
- leave_guest_mode(&svm->vcpu);
- copy_vmcb_control_area(&vmcb->control, &hsave->control);
- nested_svm_uninit_mmu_context(&svm->vcpu);
+ leave_guest_mode(vcpu);
+
+ svm_switch_vmcb(svm, &svm->nested.vmcb02);
+
+ nested_svm_uninit_mmu_context(vcpu);
vmcb_mark_all_dirty(svm->vmcb);
}
- kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, &svm->vcpu);
+ kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
}
static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
@@ -903,16 +1001,15 @@ int nested_svm_exit_handled(struct vcpu_svm *svm)
return vmexit;
}
-int nested_svm_check_permissions(struct vcpu_svm *svm)
+int nested_svm_check_permissions(struct kvm_vcpu *vcpu)
{
- if (!(svm->vcpu.arch.efer & EFER_SVME) ||
- !is_paging(&svm->vcpu)) {
- kvm_queue_exception(&svm->vcpu, UD_VECTOR);
+ if (!(vcpu->arch.efer & EFER_SVME) || !is_paging(vcpu)) {
+ kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
- if (svm->vmcb->save.cpl) {
- kvm_inject_gp(&svm->vcpu, 0);
+ if (to_svm(vcpu)->vmcb->save.cpl) {
+ kvm_inject_gp(vcpu, 0);
return 1;
}
@@ -960,50 +1057,11 @@ static void nested_svm_inject_exception_vmexit(struct vcpu_svm *svm)
nested_svm_vmexit(svm);
}
-static void nested_svm_smi(struct vcpu_svm *svm)
-{
- svm->vmcb->control.exit_code = SVM_EXIT_SMI;
- svm->vmcb->control.exit_info_1 = 0;
- svm->vmcb->control.exit_info_2 = 0;
-
- nested_svm_vmexit(svm);
-}
-
-static void nested_svm_nmi(struct vcpu_svm *svm)
-{
- svm->vmcb->control.exit_code = SVM_EXIT_NMI;
- svm->vmcb->control.exit_info_1 = 0;
- svm->vmcb->control.exit_info_2 = 0;
-
- nested_svm_vmexit(svm);
-}
-
-static void nested_svm_intr(struct vcpu_svm *svm)
-{
- trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
-
- svm->vmcb->control.exit_code = SVM_EXIT_INTR;
- svm->vmcb->control.exit_info_1 = 0;
- svm->vmcb->control.exit_info_2 = 0;
-
- nested_svm_vmexit(svm);
-}
-
static inline bool nested_exit_on_init(struct vcpu_svm *svm)
{
return vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_INIT);
}
-static void nested_svm_init(struct vcpu_svm *svm)
-{
- svm->vmcb->control.exit_code = SVM_EXIT_INIT;
- svm->vmcb->control.exit_info_1 = 0;
- svm->vmcb->control.exit_info_2 = 0;
-
- nested_svm_vmexit(svm);
-}
-
-
static int svm_check_nested_events(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -1017,12 +1075,18 @@ static int svm_check_nested_events(struct kvm_vcpu *vcpu)
return -EBUSY;
if (!nested_exit_on_init(svm))
return 0;
- nested_svm_init(svm);
+ nested_svm_simple_vmexit(svm, SVM_EXIT_INIT);
return 0;
}
if (vcpu->arch.exception.pending) {
- if (block_nested_events)
+ /*
+ * Only a pending nested run can block a pending exception.
+ * Otherwise an injected NMI/interrupt should either be
+ * lost or delivered to the nested hypervisor in the EXITINTINFO
+ * vmcb field, while delivering the pending exception.
+ */
+ if (svm->nested.nested_run_pending)
return -EBUSY;
if (!nested_exit_on_exception(svm))
return 0;
@@ -1035,7 +1099,7 @@ static int svm_check_nested_events(struct kvm_vcpu *vcpu)
return -EBUSY;
if (!nested_exit_on_smi(svm))
return 0;
- nested_svm_smi(svm);
+ nested_svm_simple_vmexit(svm, SVM_EXIT_SMI);
return 0;
}
@@ -1044,7 +1108,7 @@ static int svm_check_nested_events(struct kvm_vcpu *vcpu)
return -EBUSY;
if (!nested_exit_on_nmi(svm))
return 0;
- nested_svm_nmi(svm);
+ nested_svm_simple_vmexit(svm, SVM_EXIT_NMI);
return 0;
}
@@ -1053,7 +1117,8 @@ static int svm_check_nested_events(struct kvm_vcpu *vcpu)
return -EBUSY;
if (!nested_exit_on_intr(svm))
return 0;
- nested_svm_intr(svm);
+ trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
+ nested_svm_simple_vmexit(svm, SVM_EXIT_INTR);
return 0;
}
@@ -1072,8 +1137,8 @@ int nested_svm_exit_special(struct vcpu_svm *svm)
case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
- if (get_host_vmcb(svm)->control.intercepts[INTERCEPT_EXCEPTION] &
- excp_bits)
+ if (svm->vmcb01.ptr->control.intercepts[INTERCEPT_EXCEPTION] &
+ excp_bits)
return NESTED_EXIT_HOST;
else if (exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR &&
svm->vcpu.arch.apf.host_apf_flags)
@@ -1137,10 +1202,9 @@ static int svm_get_nested_state(struct kvm_vcpu *vcpu,
if (copy_to_user(&user_vmcb->control, &svm->nested.ctl,
sizeof(user_vmcb->control)))
return -EFAULT;
- if (copy_to_user(&user_vmcb->save, &svm->nested.hsave->save,
+ if (copy_to_user(&user_vmcb->save, &svm->vmcb01.ptr->save,
sizeof(user_vmcb->save)))
return -EFAULT;
-
out:
return kvm_state.size;
}
@@ -1150,7 +1214,6 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
struct kvm_nested_state *kvm_state)
{
struct vcpu_svm *svm = to_svm(vcpu);
- struct vmcb *hsave = svm->nested.hsave;
struct vmcb __user *user_vmcb = (struct vmcb __user *)
&user_kvm_nested_state->data.svm[0];
struct vmcb_control_area *ctl;
@@ -1195,8 +1258,8 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
return -EINVAL;
ret = -ENOMEM;
- ctl = kzalloc(sizeof(*ctl), GFP_KERNEL);
- save = kzalloc(sizeof(*save), GFP_KERNEL);
+ ctl = kzalloc(sizeof(*ctl), GFP_KERNEL_ACCOUNT);
+ save = kzalloc(sizeof(*save), GFP_KERNEL_ACCOUNT);
if (!ctl || !save)
goto out_free;
@@ -1207,12 +1270,12 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
goto out_free;
ret = -EINVAL;
- if (!nested_vmcb_check_controls(ctl))
+ if (!nested_vmcb_check_controls(vcpu, ctl))
goto out_free;
/*
* Processor state contains L2 state. Check that it is
- * valid for guest mode (see nested_vmcb_checks).
+ * valid for guest mode (see nested_vmcb_check_save).
*/
cr0 = kvm_read_cr0(vcpu);
if (((cr0 & X86_CR0_CD) == 0) && (cr0 & X86_CR0_NW))
@@ -1221,29 +1284,48 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
/*
* Validate host state saved from before VMRUN (see
* nested_svm_check_permissions).
- * TODO: validate reserved bits for all saved state.
*/
- if (!(save->cr0 & X86_CR0_PG))
- goto out_free;
- if (!(save->efer & EFER_SVME))
+ if (!(save->cr0 & X86_CR0_PG) ||
+ !(save->cr0 & X86_CR0_PE) ||
+ (save->rflags & X86_EFLAGS_VM) ||
+ !nested_vmcb_valid_sregs(vcpu, save))
goto out_free;
/*
- * All checks done, we can enter guest mode. L1 control fields
- * come from the nested save state. Guest state is already
- * in the registers, the save area of the nested state instead
- * contains saved L1 state.
+ * All checks done, we can enter guest mode. Userspace provides
+ * vmcb12.control, which will be combined with L1 and stored into
+ * vmcb02, and the L1 save state which we store in vmcb01.
+ * L2 registers if needed are moved from the current VMCB to VMCB02.
*/
svm->nested.nested_run_pending =
!!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
- copy_vmcb_control_area(&hsave->control, &svm->vmcb->control);
- hsave->save = *save;
-
svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa;
- load_nested_vmcb_control(svm, ctl);
- nested_prepare_vmcb_control(svm);
+ if (svm->current_vmcb == &svm->vmcb01)
+ svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save;
+
+ svm->vmcb01.ptr->save.es = save->es;
+ svm->vmcb01.ptr->save.cs = save->cs;
+ svm->vmcb01.ptr->save.ss = save->ss;
+ svm->vmcb01.ptr->save.ds = save->ds;
+ svm->vmcb01.ptr->save.gdtr = save->gdtr;
+ svm->vmcb01.ptr->save.idtr = save->idtr;
+ svm->vmcb01.ptr->save.rflags = save->rflags | X86_EFLAGS_FIXED;
+ svm->vmcb01.ptr->save.efer = save->efer;
+ svm->vmcb01.ptr->save.cr0 = save->cr0;
+ svm->vmcb01.ptr->save.cr3 = save->cr3;
+ svm->vmcb01.ptr->save.cr4 = save->cr4;
+ svm->vmcb01.ptr->save.rax = save->rax;
+ svm->vmcb01.ptr->save.rsp = save->rsp;
+ svm->vmcb01.ptr->save.rip = save->rip;
+ svm->vmcb01.ptr->save.cpl = 0;
+
+ nested_load_control_from_vmcb12(svm, ctl);
+
+ svm_switch_vmcb(svm, &svm->nested.vmcb02);
+
+ nested_vmcb02_prepare_control(svm);
kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
ret = 0;
@@ -1254,8 +1336,31 @@ out_free:
return ret;
}
+static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ if (WARN_ON(!is_guest_mode(vcpu)))
+ return true;
+
+ if (nested_svm_load_cr3(&svm->vcpu, vcpu->arch.cr3,
+ nested_npt_enabled(svm)))
+ return false;
+
+ if (!nested_svm_vmrun_msrpm(svm)) {
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->internal.suberror =
+ KVM_INTERNAL_ERROR_EMULATION;
+ vcpu->run->internal.ndata = 0;
+ return false;
+ }
+
+ return true;
+}
+
struct kvm_x86_nested_ops svm_nested_ops = {
.check_events = svm_check_nested_events,
+ .triple_fault = nested_svm_triple_fault,
.get_nested_state_pages = svm_get_nested_state_pages,
.get_state = svm_get_nested_state,
.set_state = svm_set_nested_state,
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 415a49b8b8f8..1356ee095cd5 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -44,12 +44,25 @@
#define MISC_CG_RES_SEV_ES MISC_CG_RES_TYPES
#endif
+#ifdef CONFIG_KVM_AMD_SEV
+/* enable/disable SEV support */
+static bool sev_enabled = true;
+module_param_named(sev, sev_enabled, bool, 0444);
+
+/* enable/disable SEV-ES support */
+static bool sev_es_enabled = true;
+module_param_named(sev_es, sev_es_enabled, bool, 0444);
+#else
+#define sev_enabled false
+#define sev_es_enabled false
+#endif /* CONFIG_KVM_AMD_SEV */
+
static u8 sev_enc_bit;
-static int sev_flush_asids(void);
static DECLARE_RWSEM(sev_deactivate_lock);
static DEFINE_MUTEX(sev_bitmap_lock);
unsigned int max_sev_asid;
static unsigned int min_sev_asid;
+static unsigned long sev_me_mask;
static unsigned long *sev_asid_bitmap;
static unsigned long *sev_reclaim_asid_bitmap;
@@ -61,9 +74,15 @@ struct enc_region {
unsigned long size;
};
-static int sev_flush_asids(void)
+/* Called with the sev_bitmap_lock held, or on shutdown */
+static int sev_flush_asids(int min_asid, int max_asid)
{
- int ret, error = 0;
+ int ret, pos, error = 0;
+
+ /* Check if there are any ASIDs to reclaim before performing a flush */
+ pos = find_next_bit(sev_reclaim_asid_bitmap, max_asid, min_asid);
+ if (pos >= max_asid)
+ return -EBUSY;
/*
* DEACTIVATE will clear the WBINVD indicator causing DF_FLUSH to fail,
@@ -82,17 +101,15 @@ static int sev_flush_asids(void)
return ret;
}
+static inline bool is_mirroring_enc_context(struct kvm *kvm)
+{
+ return !!to_kvm_svm(kvm)->sev_info.enc_context_owner;
+}
+
/* Must be called with the sev_bitmap_lock held */
static bool __sev_recycle_asids(int min_asid, int max_asid)
{
- int pos;
-
- /* Check if there are any ASIDs to reclaim before performing a flush */
- pos = find_next_bit(sev_reclaim_asid_bitmap, max_sev_asid, min_asid);
- if (pos >= max_asid)
- return false;
-
- if (sev_flush_asids())
+ if (sev_flush_asids(min_asid, max_asid))
return false;
/* The flush process will flush all reclaimable SEV and SEV-ES ASIDs */
@@ -184,49 +201,41 @@ static void sev_asid_free(struct kvm_sev_info *sev)
static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
{
- struct sev_data_decommission *decommission;
- struct sev_data_deactivate *data;
+ struct sev_data_decommission decommission;
+ struct sev_data_deactivate deactivate;
if (!handle)
return;
- data = kzalloc(sizeof(*data), GFP_KERNEL);
- if (!data)
- return;
-
- /* deactivate handle */
- data->handle = handle;
+ deactivate.handle = handle;
/* Guard DEACTIVATE against WBINVD/DF_FLUSH used in ASID recycling */
down_read(&sev_deactivate_lock);
- sev_guest_deactivate(data, NULL);
+ sev_guest_deactivate(&deactivate, NULL);
up_read(&sev_deactivate_lock);
- kfree(data);
-
- decommission = kzalloc(sizeof(*decommission), GFP_KERNEL);
- if (!decommission)
- return;
-
/* decommission handle */
- decommission->handle = handle;
- sev_guest_decommission(decommission, NULL);
-
- kfree(decommission);
+ decommission.handle = handle;
+ sev_guest_decommission(&decommission, NULL);
}
static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ bool es_active = argp->id == KVM_SEV_ES_INIT;
int asid, ret;
+ if (kvm->created_vcpus)
+ return -EINVAL;
+
ret = -EBUSY;
if (unlikely(sev->active))
return ret;
+ sev->es_active = es_active;
asid = sev_asid_new(sev);
if (asid < 0)
- return ret;
+ goto e_no_asid;
sev->asid = asid;
ret = sev_platform_init(&argp->error);
@@ -234,6 +243,7 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
goto e_free;
sev->active = true;
+ sev->asid = asid;
INIT_LIST_HEAD(&sev->regions_list);
return 0;
@@ -241,34 +251,21 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
e_free:
sev_asid_free(sev);
sev->asid = 0;
+e_no_asid:
+ sev->es_active = false;
return ret;
}
-static int sev_es_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
-{
- if (!sev_es)
- return -ENOTTY;
-
- to_kvm_svm(kvm)->sev_info.es_active = true;
-
- return sev_guest_init(kvm, argp);
-}
-
static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error)
{
- struct sev_data_activate *data;
+ struct sev_data_activate activate;
int asid = sev_get_asid(kvm);
int ret;
- data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
- if (!data)
- return -ENOMEM;
-
/* activate ASID on the given handle */
- data->handle = handle;
- data->asid = asid;
- ret = sev_guest_activate(data, error);
- kfree(data);
+ activate.handle = handle;
+ activate.asid = asid;
+ ret = sev_guest_activate(&activate, error);
return ret;
}
@@ -298,7 +295,7 @@ static int sev_issue_cmd(struct kvm *kvm, int id, void *data, int *error)
static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
- struct sev_data_launch_start *start;
+ struct sev_data_launch_start start;
struct kvm_sev_launch_start params;
void *dh_blob, *session_blob;
int *error = &argp->error;
@@ -310,20 +307,16 @@ static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
return -EFAULT;
- start = kzalloc(sizeof(*start), GFP_KERNEL_ACCOUNT);
- if (!start)
- return -ENOMEM;
+ memset(&start, 0, sizeof(start));
dh_blob = NULL;
if (params.dh_uaddr) {
dh_blob = psp_copy_user_blob(params.dh_uaddr, params.dh_len);
- if (IS_ERR(dh_blob)) {
- ret = PTR_ERR(dh_blob);
- goto e_free;
- }
+ if (IS_ERR(dh_blob))
+ return PTR_ERR(dh_blob);
- start->dh_cert_address = __sme_set(__pa(dh_blob));
- start->dh_cert_len = params.dh_len;
+ start.dh_cert_address = __sme_set(__pa(dh_blob));
+ start.dh_cert_len = params.dh_len;
}
session_blob = NULL;
@@ -334,40 +327,38 @@ static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
goto e_free_dh;
}
- start->session_address = __sme_set(__pa(session_blob));
- start->session_len = params.session_len;
+ start.session_address = __sme_set(__pa(session_blob));
+ start.session_len = params.session_len;
}
- start->handle = params.handle;
- start->policy = params.policy;
+ start.handle = params.handle;
+ start.policy = params.policy;
/* create memory encryption context */
- ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_LAUNCH_START, start, error);
+ ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_LAUNCH_START, &start, error);
if (ret)
goto e_free_session;
/* Bind ASID to this guest */
- ret = sev_bind_asid(kvm, start->handle, error);
+ ret = sev_bind_asid(kvm, start.handle, error);
if (ret)
goto e_free_session;
/* return handle to userspace */
- params.handle = start->handle;
+ params.handle = start.handle;
if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params))) {
- sev_unbind_asid(kvm, start->handle);
+ sev_unbind_asid(kvm, start.handle);
ret = -EFAULT;
goto e_free_session;
}
- sev->handle = start->handle;
+ sev->handle = start.handle;
sev->fd = argp->sev_fd;
e_free_session:
kfree(session_blob);
e_free_dh:
kfree(dh_blob);
-e_free:
- kfree(start);
return ret;
}
@@ -486,7 +477,7 @@ static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
unsigned long vaddr, vaddr_end, next_vaddr, npages, pages, size, i;
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct kvm_sev_launch_update_data params;
- struct sev_data_launch_update_data *data;
+ struct sev_data_launch_update_data data;
struct page **inpages;
int ret;
@@ -496,20 +487,14 @@ static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
return -EFAULT;
- data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
- if (!data)
- return -ENOMEM;
-
vaddr = params.uaddr;
size = params.len;
vaddr_end = vaddr + size;
/* Lock the user memory. */
inpages = sev_pin_memory(kvm, vaddr, size, &npages, 1);
- if (IS_ERR(inpages)) {
- ret = PTR_ERR(inpages);
- goto e_free;
- }
+ if (IS_ERR(inpages))
+ return PTR_ERR(inpages);
/*
* Flush (on non-coherent CPUs) before LAUNCH_UPDATE encrypts pages in
@@ -517,6 +502,9 @@ static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
*/
sev_clflush_pages(inpages, npages);
+ data.reserved = 0;
+ data.handle = sev->handle;
+
for (i = 0; vaddr < vaddr_end; vaddr = next_vaddr, i += pages) {
int offset, len;
@@ -531,10 +519,9 @@ static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
len = min_t(size_t, ((pages * PAGE_SIZE) - offset), size);
- data->handle = sev->handle;
- data->len = len;
- data->address = __sme_page_pa(inpages[i]) + offset;
- ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_DATA, data, &argp->error);
+ data.len = len;
+ data.address = __sme_page_pa(inpages[i]) + offset;
+ ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_DATA, &data, &argp->error);
if (ret)
goto e_unpin;
@@ -550,8 +537,6 @@ e_unpin:
}
/* unlock the user pages */
sev_unpin_memory(kvm, inpages, npages);
-e_free:
- kfree(data);
return ret;
}
@@ -603,23 +588,22 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm)
static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
- struct sev_data_launch_update_vmsa *vmsa;
+ struct sev_data_launch_update_vmsa vmsa;
+ struct kvm_vcpu *vcpu;
int i, ret;
if (!sev_es_guest(kvm))
return -ENOTTY;
- vmsa = kzalloc(sizeof(*vmsa), GFP_KERNEL);
- if (!vmsa)
- return -ENOMEM;
+ vmsa.reserved = 0;
- for (i = 0; i < kvm->created_vcpus; i++) {
- struct vcpu_svm *svm = to_svm(kvm->vcpus[i]);
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ struct vcpu_svm *svm = to_svm(vcpu);
/* Perform some pre-encryption checks against the VMSA */
ret = sev_es_sync_vmsa(svm);
if (ret)
- goto e_free;
+ return ret;
/*
* The LAUNCH_UPDATE_VMSA command will perform in-place
@@ -629,27 +613,25 @@ static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
*/
clflush_cache_range(svm->vmsa, PAGE_SIZE);
- vmsa->handle = sev->handle;
- vmsa->address = __sme_pa(svm->vmsa);
- vmsa->len = PAGE_SIZE;
- ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, vmsa,
+ vmsa.handle = sev->handle;
+ vmsa.address = __sme_pa(svm->vmsa);
+ vmsa.len = PAGE_SIZE;
+ ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, &vmsa,
&argp->error);
if (ret)
- goto e_free;
+ return ret;
svm->vcpu.arch.guest_state_protected = true;
}
-e_free:
- kfree(vmsa);
- return ret;
+ return 0;
}
static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
void __user *measure = (void __user *)(uintptr_t)argp->data;
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
- struct sev_data_launch_measure *data;
+ struct sev_data_launch_measure data;
struct kvm_sev_launch_measure params;
void __user *p = NULL;
void *blob = NULL;
@@ -661,9 +643,7 @@ static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
if (copy_from_user(&params, measure, sizeof(params)))
return -EFAULT;
- data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
- if (!data)
- return -ENOMEM;
+ memset(&data, 0, sizeof(data));
/* User wants to query the blob length */
if (!params.len)
@@ -671,23 +651,20 @@ static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
p = (void __user *)(uintptr_t)params.uaddr;
if (p) {
- if (params.len > SEV_FW_BLOB_MAX_SIZE) {
- ret = -EINVAL;
- goto e_free;
- }
+ if (params.len > SEV_FW_BLOB_MAX_SIZE)
+ return -EINVAL;
- ret = -ENOMEM;
- blob = kmalloc(params.len, GFP_KERNEL);
+ blob = kmalloc(params.len, GFP_KERNEL_ACCOUNT);
if (!blob)
- goto e_free;
+ return -ENOMEM;
- data->address = __psp_pa(blob);
- data->len = params.len;
+ data.address = __psp_pa(blob);
+ data.len = params.len;
}
cmd:
- data->handle = sev->handle;
- ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_MEASURE, data, &argp->error);
+ data.handle = sev->handle;
+ ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_MEASURE, &data, &argp->error);
/*
* If we query the session length, FW responded with expected data.
@@ -704,63 +681,50 @@ cmd:
}
done:
- params.len = data->len;
+ params.len = data.len;
if (copy_to_user(measure, &params, sizeof(params)))
ret = -EFAULT;
e_free_blob:
kfree(blob);
-e_free:
- kfree(data);
return ret;
}
static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
- struct sev_data_launch_finish *data;
- int ret;
+ struct sev_data_launch_finish data;
if (!sev_guest(kvm))
return -ENOTTY;
- data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
- if (!data)
- return -ENOMEM;
-
- data->handle = sev->handle;
- ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_FINISH, data, &argp->error);
-
- kfree(data);
- return ret;
+ data.handle = sev->handle;
+ return sev_issue_cmd(kvm, SEV_CMD_LAUNCH_FINISH, &data, &argp->error);
}
static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct kvm_sev_guest_status params;
- struct sev_data_guest_status *data;
+ struct sev_data_guest_status data;
int ret;
if (!sev_guest(kvm))
return -ENOTTY;
- data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
- if (!data)
- return -ENOMEM;
+ memset(&data, 0, sizeof(data));
- data->handle = sev->handle;
- ret = sev_issue_cmd(kvm, SEV_CMD_GUEST_STATUS, data, &argp->error);
+ data.handle = sev->handle;
+ ret = sev_issue_cmd(kvm, SEV_CMD_GUEST_STATUS, &data, &argp->error);
if (ret)
- goto e_free;
+ return ret;
- params.policy = data->policy;
- params.state = data->state;
- params.handle = data->handle;
+ params.policy = data.policy;
+ params.state = data.state;
+ params.handle = data.handle;
if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params)))
ret = -EFAULT;
-e_free:
- kfree(data);
+
return ret;
}
@@ -769,23 +733,17 @@ static int __sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src,
int *error, bool enc)
{
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
- struct sev_data_dbg *data;
- int ret;
-
- data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
- if (!data)
- return -ENOMEM;
+ struct sev_data_dbg data;
- data->handle = sev->handle;
- data->dst_addr = dst;
- data->src_addr = src;
- data->len = size;
+ data.reserved = 0;
+ data.handle = sev->handle;
+ data.dst_addr = dst;
+ data.src_addr = src;
+ data.len = size;
- ret = sev_issue_cmd(kvm,
- enc ? SEV_CMD_DBG_ENCRYPT : SEV_CMD_DBG_DECRYPT,
- data, error);
- kfree(data);
- return ret;
+ return sev_issue_cmd(kvm,
+ enc ? SEV_CMD_DBG_ENCRYPT : SEV_CMD_DBG_DECRYPT,
+ &data, error);
}
static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr,
@@ -1005,7 +963,7 @@ err:
static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
- struct sev_data_launch_secret *data;
+ struct sev_data_launch_secret data;
struct kvm_sev_launch_secret params;
struct page **pages;
void *blob, *hdr;
@@ -1037,41 +995,36 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
goto e_unpin_memory;
}
- ret = -ENOMEM;
- data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
- if (!data)
- goto e_unpin_memory;
+ memset(&data, 0, sizeof(data));
offset = params.guest_uaddr & (PAGE_SIZE - 1);
- data->guest_address = __sme_page_pa(pages[0]) + offset;
- data->guest_len = params.guest_len;
+ data.guest_address = __sme_page_pa(pages[0]) + offset;
+ data.guest_len = params.guest_len;
blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len);
if (IS_ERR(blob)) {
ret = PTR_ERR(blob);
- goto e_free;
+ goto e_unpin_memory;
}
- data->trans_address = __psp_pa(blob);
- data->trans_len = params.trans_len;
+ data.trans_address = __psp_pa(blob);
+ data.trans_len = params.trans_len;
hdr = psp_copy_user_blob(params.hdr_uaddr, params.hdr_len);
if (IS_ERR(hdr)) {
ret = PTR_ERR(hdr);
goto e_free_blob;
}
- data->hdr_address = __psp_pa(hdr);
- data->hdr_len = params.hdr_len;
+ data.hdr_address = __psp_pa(hdr);
+ data.hdr_len = params.hdr_len;
- data->handle = sev->handle;
- ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, data, &argp->error);
+ data.handle = sev->handle;
+ ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, &data, &argp->error);
kfree(hdr);
e_free_blob:
kfree(blob);
-e_free:
- kfree(data);
e_unpin_memory:
/* content of memory is updated, mark pages dirty */
for (i = 0; i < n; i++) {
@@ -1086,7 +1039,7 @@ static int sev_get_attestation_report(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
void __user *report = (void __user *)(uintptr_t)argp->data;
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
- struct sev_data_attestation_report *data;
+ struct sev_data_attestation_report data;
struct kvm_sev_attestation_report params;
void __user *p;
void *blob = NULL;
@@ -1098,9 +1051,7 @@ static int sev_get_attestation_report(struct kvm *kvm, struct kvm_sev_cmd *argp)
if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
return -EFAULT;
- data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
- if (!data)
- return -ENOMEM;
+ memset(&data, 0, sizeof(data));
/* User wants to query the blob length */
if (!params.len)
@@ -1108,23 +1059,20 @@ static int sev_get_attestation_report(struct kvm *kvm, struct kvm_sev_cmd *argp)
p = (void __user *)(uintptr_t)params.uaddr;
if (p) {
- if (params.len > SEV_FW_BLOB_MAX_SIZE) {
- ret = -EINVAL;
- goto e_free;
- }
+ if (params.len > SEV_FW_BLOB_MAX_SIZE)
+ return -EINVAL;
- ret = -ENOMEM;
- blob = kmalloc(params.len, GFP_KERNEL);
+ blob = kmalloc(params.len, GFP_KERNEL_ACCOUNT);
if (!blob)
- goto e_free;
+ return -ENOMEM;
- data->address = __psp_pa(blob);
- data->len = params.len;
- memcpy(data->mnonce, params.mnonce, sizeof(params.mnonce));
+ data.address = __psp_pa(blob);
+ data.len = params.len;
+ memcpy(data.mnonce, params.mnonce, sizeof(params.mnonce));
}
cmd:
- data->handle = sev->handle;
- ret = sev_issue_cmd(kvm, SEV_CMD_ATTESTATION_REPORT, data, &argp->error);
+ data.handle = sev->handle;
+ ret = sev_issue_cmd(kvm, SEV_CMD_ATTESTATION_REPORT, &data, &argp->error);
/*
* If we query the session length, FW responded with expected data.
*/
@@ -1140,22 +1088,417 @@ cmd:
}
done:
- params.len = data->len;
+ params.len = data.len;
if (copy_to_user(report, &params, sizeof(params)))
ret = -EFAULT;
e_free_blob:
kfree(blob);
-e_free:
- kfree(data);
return ret;
}
+/* Userspace wants to query session length. */
+static int
+__sev_send_start_query_session_length(struct kvm *kvm, struct kvm_sev_cmd *argp,
+ struct kvm_sev_send_start *params)
+{
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct sev_data_send_start data;
+ int ret;
+
+ data.handle = sev->handle;
+ ret = sev_issue_cmd(kvm, SEV_CMD_SEND_START, &data, &argp->error);
+ if (ret < 0)
+ return ret;
+
+ params->session_len = data.session_len;
+ if (copy_to_user((void __user *)(uintptr_t)argp->data, params,
+ sizeof(struct kvm_sev_send_start)))
+ ret = -EFAULT;
+
+ return ret;
+}
+
+static int sev_send_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct sev_data_send_start data;
+ struct kvm_sev_send_start params;
+ void *amd_certs, *session_data;
+ void *pdh_cert, *plat_certs;
+ int ret;
+
+ if (!sev_guest(kvm))
+ return -ENOTTY;
+
+ if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data,
+ sizeof(struct kvm_sev_send_start)))
+ return -EFAULT;
+
+ /* if session_len is zero, userspace wants to query the session length */
+ if (!params.session_len)
+ return __sev_send_start_query_session_length(kvm, argp,
+ &params);
+
+ /* some sanity checks */
+ if (!params.pdh_cert_uaddr || !params.pdh_cert_len ||
+ !params.session_uaddr || params.session_len > SEV_FW_BLOB_MAX_SIZE)
+ return -EINVAL;
+
+ /* allocate the memory to hold the session data blob */
+ session_data = kmalloc(params.session_len, GFP_KERNEL_ACCOUNT);
+ if (!session_data)
+ return -ENOMEM;
+
+ /* copy the certificate blobs from userspace */
+ pdh_cert = psp_copy_user_blob(params.pdh_cert_uaddr,
+ params.pdh_cert_len);
+ if (IS_ERR(pdh_cert)) {
+ ret = PTR_ERR(pdh_cert);
+ goto e_free_session;
+ }
+
+ plat_certs = psp_copy_user_blob(params.plat_certs_uaddr,
+ params.plat_certs_len);
+ if (IS_ERR(plat_certs)) {
+ ret = PTR_ERR(plat_certs);
+ goto e_free_pdh;
+ }
+
+ amd_certs = psp_copy_user_blob(params.amd_certs_uaddr,
+ params.amd_certs_len);
+ if (IS_ERR(amd_certs)) {
+ ret = PTR_ERR(amd_certs);
+ goto e_free_plat_cert;
+ }
+
+ /* populate the FW SEND_START field with system physical address */
+ memset(&data, 0, sizeof(data));
+ data.pdh_cert_address = __psp_pa(pdh_cert);
+ data.pdh_cert_len = params.pdh_cert_len;
+ data.plat_certs_address = __psp_pa(plat_certs);
+ data.plat_certs_len = params.plat_certs_len;
+ data.amd_certs_address = __psp_pa(amd_certs);
+ data.amd_certs_len = params.amd_certs_len;
+ data.session_address = __psp_pa(session_data);
+ data.session_len = params.session_len;
+ data.handle = sev->handle;
+
+ ret = sev_issue_cmd(kvm, SEV_CMD_SEND_START, &data, &argp->error);
+
+ if (!ret && copy_to_user((void __user *)(uintptr_t)params.session_uaddr,
+ session_data, params.session_len)) {
+ ret = -EFAULT;
+ goto e_free_amd_cert;
+ }
+
+ params.policy = data.policy;
+ params.session_len = data.session_len;
+ if (copy_to_user((void __user *)(uintptr_t)argp->data, &params,
+ sizeof(struct kvm_sev_send_start)))
+ ret = -EFAULT;
+
+e_free_amd_cert:
+ kfree(amd_certs);
+e_free_plat_cert:
+ kfree(plat_certs);
+e_free_pdh:
+ kfree(pdh_cert);
+e_free_session:
+ kfree(session_data);
+ return ret;
+}
+
+/* Userspace wants to query either header or trans length. */
+static int
+__sev_send_update_data_query_lengths(struct kvm *kvm, struct kvm_sev_cmd *argp,
+ struct kvm_sev_send_update_data *params)
+{
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct sev_data_send_update_data data;
+ int ret;
+
+ data.handle = sev->handle;
+ ret = sev_issue_cmd(kvm, SEV_CMD_SEND_UPDATE_DATA, &data, &argp->error);
+ if (ret < 0)
+ return ret;
+
+ params->hdr_len = data.hdr_len;
+ params->trans_len = data.trans_len;
+
+ if (copy_to_user((void __user *)(uintptr_t)argp->data, params,
+ sizeof(struct kvm_sev_send_update_data)))
+ ret = -EFAULT;
+
+ return ret;
+}
+
+static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct sev_data_send_update_data data;
+ struct kvm_sev_send_update_data params;
+ void *hdr, *trans_data;
+ struct page **guest_page;
+ unsigned long n;
+ int ret, offset;
+
+ if (!sev_guest(kvm))
+ return -ENOTTY;
+
+ if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data,
+ sizeof(struct kvm_sev_send_update_data)))
+ return -EFAULT;
+
+ /* userspace wants to query either header or trans length */
+ if (!params.trans_len || !params.hdr_len)
+ return __sev_send_update_data_query_lengths(kvm, argp, &params);
+
+ if (!params.trans_uaddr || !params.guest_uaddr ||
+ !params.guest_len || !params.hdr_uaddr)
+ return -EINVAL;
+
+ /* Check if we are crossing the page boundary */
+ offset = params.guest_uaddr & (PAGE_SIZE - 1);
+ if ((params.guest_len + offset > PAGE_SIZE))
+ return -EINVAL;
+
+ /* Pin guest memory */
+ guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK,
+ PAGE_SIZE, &n, 0);
+ if (!guest_page)
+ return -EFAULT;
+
+ /* allocate memory for header and transport buffer */
+ ret = -ENOMEM;
+ hdr = kmalloc(params.hdr_len, GFP_KERNEL_ACCOUNT);
+ if (!hdr)
+ goto e_unpin;
+
+ trans_data = kmalloc(params.trans_len, GFP_KERNEL_ACCOUNT);
+ if (!trans_data)
+ goto e_free_hdr;
+
+ memset(&data, 0, sizeof(data));
+ data.hdr_address = __psp_pa(hdr);
+ data.hdr_len = params.hdr_len;
+ data.trans_address = __psp_pa(trans_data);
+ data.trans_len = params.trans_len;
+
+ /* The SEND_UPDATE_DATA command requires C-bit to be always set. */
+ data.guest_address = (page_to_pfn(guest_page[0]) << PAGE_SHIFT) + offset;
+ data.guest_address |= sev_me_mask;
+ data.guest_len = params.guest_len;
+ data.handle = sev->handle;
+
+ ret = sev_issue_cmd(kvm, SEV_CMD_SEND_UPDATE_DATA, &data, &argp->error);
+
+ if (ret)
+ goto e_free_trans_data;
+
+ /* copy transport buffer to user space */
+ if (copy_to_user((void __user *)(uintptr_t)params.trans_uaddr,
+ trans_data, params.trans_len)) {
+ ret = -EFAULT;
+ goto e_free_trans_data;
+ }
+
+ /* Copy packet header to userspace. */
+ ret = copy_to_user((void __user *)(uintptr_t)params.hdr_uaddr, hdr,
+ params.hdr_len);
+
+e_free_trans_data:
+ kfree(trans_data);
+e_free_hdr:
+ kfree(hdr);
+e_unpin:
+ sev_unpin_memory(kvm, guest_page, n);
+
+ return ret;
+}
+
+static int sev_send_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct sev_data_send_finish data;
+
+ if (!sev_guest(kvm))
+ return -ENOTTY;
+
+ data.handle = sev->handle;
+ return sev_issue_cmd(kvm, SEV_CMD_SEND_FINISH, &data, &argp->error);
+}
+
+static int sev_send_cancel(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct sev_data_send_cancel data;
+
+ if (!sev_guest(kvm))
+ return -ENOTTY;
+
+ data.handle = sev->handle;
+ return sev_issue_cmd(kvm, SEV_CMD_SEND_CANCEL, &data, &argp->error);
+}
+
+static int sev_receive_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct sev_data_receive_start start;
+ struct kvm_sev_receive_start params;
+ int *error = &argp->error;
+ void *session_data;
+ void *pdh_data;
+ int ret;
+
+ if (!sev_guest(kvm))
+ return -ENOTTY;
+
+ /* Get parameter from the userspace */
+ if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data,
+ sizeof(struct kvm_sev_receive_start)))
+ return -EFAULT;
+
+ /* some sanity checks */
+ if (!params.pdh_uaddr || !params.pdh_len ||
+ !params.session_uaddr || !params.session_len)
+ return -EINVAL;
+
+ pdh_data = psp_copy_user_blob(params.pdh_uaddr, params.pdh_len);
+ if (IS_ERR(pdh_data))
+ return PTR_ERR(pdh_data);
+
+ session_data = psp_copy_user_blob(params.session_uaddr,
+ params.session_len);
+ if (IS_ERR(session_data)) {
+ ret = PTR_ERR(session_data);
+ goto e_free_pdh;
+ }
+
+ memset(&start, 0, sizeof(start));
+ start.handle = params.handle;
+ start.policy = params.policy;
+ start.pdh_cert_address = __psp_pa(pdh_data);
+ start.pdh_cert_len = params.pdh_len;
+ start.session_address = __psp_pa(session_data);
+ start.session_len = params.session_len;
+
+ /* create memory encryption context */
+ ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_RECEIVE_START, &start,
+ error);
+ if (ret)
+ goto e_free_session;
+
+ /* Bind ASID to this guest */
+ ret = sev_bind_asid(kvm, start.handle, error);
+ if (ret)
+ goto e_free_session;
+
+ params.handle = start.handle;
+ if (copy_to_user((void __user *)(uintptr_t)argp->data,
+ &params, sizeof(struct kvm_sev_receive_start))) {
+ ret = -EFAULT;
+ sev_unbind_asid(kvm, start.handle);
+ goto e_free_session;
+ }
+
+ sev->handle = start.handle;
+ sev->fd = argp->sev_fd;
+
+e_free_session:
+ kfree(session_data);
+e_free_pdh:
+ kfree(pdh_data);
+
+ return ret;
+}
+
+static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct kvm_sev_receive_update_data params;
+ struct sev_data_receive_update_data data;
+ void *hdr = NULL, *trans = NULL;
+ struct page **guest_page;
+ unsigned long n;
+ int ret, offset;
+
+ if (!sev_guest(kvm))
+ return -EINVAL;
+
+ if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data,
+ sizeof(struct kvm_sev_receive_update_data)))
+ return -EFAULT;
+
+ if (!params.hdr_uaddr || !params.hdr_len ||
+ !params.guest_uaddr || !params.guest_len ||
+ !params.trans_uaddr || !params.trans_len)
+ return -EINVAL;
+
+ /* Check if we are crossing the page boundary */
+ offset = params.guest_uaddr & (PAGE_SIZE - 1);
+ if ((params.guest_len + offset > PAGE_SIZE))
+ return -EINVAL;
+
+ hdr = psp_copy_user_blob(params.hdr_uaddr, params.hdr_len);
+ if (IS_ERR(hdr))
+ return PTR_ERR(hdr);
+
+ trans = psp_copy_user_blob(params.trans_uaddr, params.trans_len);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ goto e_free_hdr;
+ }
+
+ memset(&data, 0, sizeof(data));
+ data.hdr_address = __psp_pa(hdr);
+ data.hdr_len = params.hdr_len;
+ data.trans_address = __psp_pa(trans);
+ data.trans_len = params.trans_len;
+
+ /* Pin guest memory */
+ ret = -EFAULT;
+ guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK,
+ PAGE_SIZE, &n, 0);
+ if (!guest_page)
+ goto e_free_trans;
+
+ /* The RECEIVE_UPDATE_DATA command requires C-bit to be always set. */
+ data.guest_address = (page_to_pfn(guest_page[0]) << PAGE_SHIFT) + offset;
+ data.guest_address |= sev_me_mask;
+ data.guest_len = params.guest_len;
+ data.handle = sev->handle;
+
+ ret = sev_issue_cmd(kvm, SEV_CMD_RECEIVE_UPDATE_DATA, &data,
+ &argp->error);
+
+ sev_unpin_memory(kvm, guest_page, n);
+
+e_free_trans:
+ kfree(trans);
+e_free_hdr:
+ kfree(hdr);
+
+ return ret;
+}
+
+static int sev_receive_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct sev_data_receive_finish data;
+
+ if (!sev_guest(kvm))
+ return -ENOTTY;
+
+ data.handle = sev->handle;
+ return sev_issue_cmd(kvm, SEV_CMD_RECEIVE_FINISH, &data, &argp->error);
+}
+
int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
{
struct kvm_sev_cmd sev_cmd;
int r;
- if (!svm_sev_enabled() || !sev)
+ if (!sev_enabled)
return -ENOTTY;
if (!argp)
@@ -1166,13 +1509,22 @@ int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
mutex_lock(&kvm->lock);
+ /* enc_context_owner handles all memory enc operations */
+ if (is_mirroring_enc_context(kvm)) {
+ r = -EINVAL;
+ goto out;
+ }
+
switch (sev_cmd.id) {
+ case KVM_SEV_ES_INIT:
+ if (!sev_es_enabled) {
+ r = -ENOTTY;
+ goto out;
+ }
+ fallthrough;
case KVM_SEV_INIT:
r = sev_guest_init(kvm, &sev_cmd);
break;
- case KVM_SEV_ES_INIT:
- r = sev_es_guest_init(kvm, &sev_cmd);
- break;
case KVM_SEV_LAUNCH_START:
r = sev_launch_start(kvm, &sev_cmd);
break;
@@ -1203,6 +1555,27 @@ int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
case KVM_SEV_GET_ATTESTATION_REPORT:
r = sev_get_attestation_report(kvm, &sev_cmd);
break;
+ case KVM_SEV_SEND_START:
+ r = sev_send_start(kvm, &sev_cmd);
+ break;
+ case KVM_SEV_SEND_UPDATE_DATA:
+ r = sev_send_update_data(kvm, &sev_cmd);
+ break;
+ case KVM_SEV_SEND_FINISH:
+ r = sev_send_finish(kvm, &sev_cmd);
+ break;
+ case KVM_SEV_SEND_CANCEL:
+ r = sev_send_cancel(kvm, &sev_cmd);
+ break;
+ case KVM_SEV_RECEIVE_START:
+ r = sev_receive_start(kvm, &sev_cmd);
+ break;
+ case KVM_SEV_RECEIVE_UPDATE_DATA:
+ r = sev_receive_update_data(kvm, &sev_cmd);
+ break;
+ case KVM_SEV_RECEIVE_FINISH:
+ r = sev_receive_finish(kvm, &sev_cmd);
+ break;
default:
r = -EINVAL;
goto out;
@@ -1226,6 +1599,10 @@ int svm_register_enc_region(struct kvm *kvm,
if (!sev_guest(kvm))
return -ENOTTY;
+ /* If kvm is mirroring encryption context it isn't responsible for it */
+ if (is_mirroring_enc_context(kvm))
+ return -EINVAL;
+
if (range->addr > ULONG_MAX || range->size > ULONG_MAX)
return -EINVAL;
@@ -1292,6 +1669,10 @@ int svm_unregister_enc_region(struct kvm *kvm,
struct enc_region *region;
int ret;
+ /* If kvm is mirroring encryption context it isn't responsible for it */
+ if (is_mirroring_enc_context(kvm))
+ return -EINVAL;
+
mutex_lock(&kvm->lock);
if (!sev_guest(kvm)) {
@@ -1322,6 +1703,71 @@ failed:
return ret;
}
+int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd)
+{
+ struct file *source_kvm_file;
+ struct kvm *source_kvm;
+ struct kvm_sev_info *mirror_sev;
+ unsigned int asid;
+ int ret;
+
+ source_kvm_file = fget(source_fd);
+ if (!file_is_kvm(source_kvm_file)) {
+ ret = -EBADF;
+ goto e_source_put;
+ }
+
+ source_kvm = source_kvm_file->private_data;
+ mutex_lock(&source_kvm->lock);
+
+ if (!sev_guest(source_kvm)) {
+ ret = -EINVAL;
+ goto e_source_unlock;
+ }
+
+ /* Mirrors of mirrors should work, but let's not get silly */
+ if (is_mirroring_enc_context(source_kvm) || source_kvm == kvm) {
+ ret = -EINVAL;
+ goto e_source_unlock;
+ }
+
+ asid = to_kvm_svm(source_kvm)->sev_info.asid;
+
+ /*
+ * The mirror kvm holds an enc_context_owner ref so its asid can't
+ * disappear until we're done with it
+ */
+ kvm_get_kvm(source_kvm);
+
+ fput(source_kvm_file);
+ mutex_unlock(&source_kvm->lock);
+ mutex_lock(&kvm->lock);
+
+ if (sev_guest(kvm)) {
+ ret = -EINVAL;
+ goto e_mirror_unlock;
+ }
+
+ /* Set enc_context_owner and copy its encryption context over */
+ mirror_sev = &to_kvm_svm(kvm)->sev_info;
+ mirror_sev->enc_context_owner = source_kvm;
+ mirror_sev->asid = asid;
+ mirror_sev->active = true;
+
+ mutex_unlock(&kvm->lock);
+ return 0;
+
+e_mirror_unlock:
+ mutex_unlock(&kvm->lock);
+ kvm_put_kvm(source_kvm);
+ return ret;
+e_source_unlock:
+ mutex_unlock(&source_kvm->lock);
+e_source_put:
+ fput(source_kvm_file);
+ return ret;
+}
+
void sev_vm_destroy(struct kvm *kvm)
{
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
@@ -1331,6 +1777,12 @@ void sev_vm_destroy(struct kvm *kvm)
if (!sev_guest(kvm))
return;
+ /* If this is a mirror_kvm release the enc_context_owner and skip sev cleanup */
+ if (is_mirroring_enc_context(kvm)) {
+ kvm_put_kvm(sev->enc_context_owner);
+ return;
+ }
+
mutex_lock(&kvm->lock);
/*
@@ -1358,12 +1810,24 @@ void sev_vm_destroy(struct kvm *kvm)
sev_asid_free(sev);
}
+void __init sev_set_cpu_caps(void)
+{
+ if (!sev_enabled)
+ kvm_cpu_cap_clear(X86_FEATURE_SEV);
+ if (!sev_es_enabled)
+ kvm_cpu_cap_clear(X86_FEATURE_SEV_ES);
+}
+
void __init sev_hardware_setup(void)
{
+#ifdef CONFIG_KVM_AMD_SEV
unsigned int eax, ebx, ecx, edx, sev_asid_count, sev_es_asid_count;
bool sev_es_supported = false;
bool sev_supported = false;
+ if (!sev_enabled || !npt_enabled)
+ goto out;
+
/* Does the CPU support SEV? */
if (!boot_cpu_has(X86_FEATURE_SEV))
goto out;
@@ -1376,12 +1840,12 @@ void __init sev_hardware_setup(void)
/* Maximum number of encrypted guests supported simultaneously */
max_sev_asid = ecx;
-
- if (!svm_sev_enabled())
+ if (!max_sev_asid)
goto out;
/* Minimum ASID value that should be used for SEV guest */
min_sev_asid = edx;
+ sev_me_mask = 1UL << (ebx & 0x3f);
/* Initialize SEV ASID bitmaps */
sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
@@ -1389,8 +1853,11 @@ void __init sev_hardware_setup(void)
goto out;
sev_reclaim_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
- if (!sev_reclaim_asid_bitmap)
+ if (!sev_reclaim_asid_bitmap) {
+ bitmap_free(sev_asid_bitmap);
+ sev_asid_bitmap = NULL;
goto out;
+ }
sev_asid_count = max_sev_asid - min_sev_asid + 1;
if (misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count))
@@ -1400,7 +1867,7 @@ void __init sev_hardware_setup(void)
sev_supported = true;
/* SEV-ES support requested? */
- if (!sev_es)
+ if (!sev_es_enabled)
goto out;
/* Does the CPU support SEV-ES? */
@@ -1419,21 +1886,36 @@ void __init sev_hardware_setup(void)
sev_es_supported = true;
out:
- sev = sev_supported;
- sev_es = sev_es_supported;
+ sev_enabled = sev_supported;
+ sev_es_enabled = sev_es_supported;
+#endif
}
void sev_hardware_teardown(void)
{
- if (!svm_sev_enabled())
+ if (!sev_enabled)
return;
+ /* No need to take sev_bitmap_lock, all VMs have been destroyed. */
+ sev_flush_asids(0, max_sev_asid);
+
bitmap_free(sev_asid_bitmap);
bitmap_free(sev_reclaim_asid_bitmap);
+
misc_cg_set_capacity(MISC_CG_RES_SEV, 0);
misc_cg_set_capacity(MISC_CG_RES_SEV_ES, 0);
+}
- sev_flush_asids();
+int sev_cpu_init(struct svm_cpu_data *sd)
+{
+ if (!sev_enabled)
+ return 0;
+
+ sd->sev_vmcbs = kcalloc(max_sev_asid + 1, sizeof(void *), GFP_KERNEL);
+ if (!sd->sev_vmcbs)
+ return -ENOMEM;
+
+ return 0;
}
/*
@@ -1825,7 +2307,7 @@ static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
len, GHCB_SCRATCH_AREA_LIMIT);
return false;
}
- scratch_va = kzalloc(len, GFP_KERNEL);
+ scratch_va = kzalloc(len, GFP_KERNEL_ACCOUNT);
if (!scratch_va)
return false;
@@ -1899,7 +2381,7 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
vcpu->arch.regs[VCPU_REGS_RAX] = cpuid_fn;
vcpu->arch.regs[VCPU_REGS_RCX] = 0;
- ret = svm_invoke_exit_handler(svm, SVM_EXIT_CPUID);
+ ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_CPUID);
if (!ret) {
ret = -EINVAL;
break;
@@ -1949,8 +2431,9 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
return ret;
}
-int sev_handle_vmgexit(struct vcpu_svm *svm)
+int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
{
+ struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb_control_area *control = &svm->vmcb->control;
u64 ghcb_gpa, exit_code;
struct ghcb *ghcb;
@@ -1962,13 +2445,13 @@ int sev_handle_vmgexit(struct vcpu_svm *svm)
return sev_handle_vmgexit_msr_protocol(svm);
if (!ghcb_gpa) {
- vcpu_unimpl(&svm->vcpu, "vmgexit: GHCB gpa is not set\n");
+ vcpu_unimpl(vcpu, "vmgexit: GHCB gpa is not set\n");
return -EINVAL;
}
- if (kvm_vcpu_map(&svm->vcpu, ghcb_gpa >> PAGE_SHIFT, &svm->ghcb_map)) {
+ if (kvm_vcpu_map(vcpu, ghcb_gpa >> PAGE_SHIFT, &svm->ghcb_map)) {
/* Unable to map GHCB from guest */
- vcpu_unimpl(&svm->vcpu, "vmgexit: error mapping GHCB [%#llx] from guest\n",
+ vcpu_unimpl(vcpu, "vmgexit: error mapping GHCB [%#llx] from guest\n",
ghcb_gpa);
return -EINVAL;
}
@@ -1976,7 +2459,7 @@ int sev_handle_vmgexit(struct vcpu_svm *svm)
svm->ghcb = svm->ghcb_map.hva;
ghcb = svm->ghcb_map.hva;
- trace_kvm_vmgexit_enter(svm->vcpu.vcpu_id, ghcb);
+ trace_kvm_vmgexit_enter(vcpu->vcpu_id, ghcb);
exit_code = ghcb_get_sw_exit_code(ghcb);
@@ -1994,7 +2477,7 @@ int sev_handle_vmgexit(struct vcpu_svm *svm)
if (!setup_vmgexit_scratch(svm, true, control->exit_info_2))
break;
- ret = kvm_sev_es_mmio_read(&svm->vcpu,
+ ret = kvm_sev_es_mmio_read(vcpu,
control->exit_info_1,
control->exit_info_2,
svm->ghcb_sa);
@@ -2003,19 +2486,19 @@ int sev_handle_vmgexit(struct vcpu_svm *svm)
if (!setup_vmgexit_scratch(svm, false, control->exit_info_2))
break;
- ret = kvm_sev_es_mmio_write(&svm->vcpu,
+ ret = kvm_sev_es_mmio_write(vcpu,
control->exit_info_1,
control->exit_info_2,
svm->ghcb_sa);
break;
case SVM_VMGEXIT_NMI_COMPLETE:
- ret = svm_invoke_exit_handler(svm, SVM_EXIT_IRET);
+ ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_IRET);
break;
case SVM_VMGEXIT_AP_HLT_LOOP:
- ret = kvm_emulate_ap_reset_hold(&svm->vcpu);
+ ret = kvm_emulate_ap_reset_hold(vcpu);
break;
case SVM_VMGEXIT_AP_JUMP_TABLE: {
- struct kvm_sev_info *sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info;
+ struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info;
switch (control->exit_info_1) {
case 0:
@@ -2040,12 +2523,12 @@ int sev_handle_vmgexit(struct vcpu_svm *svm)
break;
}
case SVM_VMGEXIT_UNSUPPORTED_EVENT:
- vcpu_unimpl(&svm->vcpu,
+ vcpu_unimpl(vcpu,
"vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n",
control->exit_info_1, control->exit_info_2);
break;
default:
- ret = svm_invoke_exit_handler(svm, exit_code);
+ ret = svm_invoke_exit_handler(vcpu, exit_code);
}
return ret;
@@ -2154,5 +2637,8 @@ void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
* the guest will set the CS and RIP. Set SW_EXIT_INFO_2 to a
* non-zero value.
*/
+ if (!svm->ghcb)
+ return;
+
ghcb_set_sw_exit_info_2(svm->ghcb, 1);
}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 6dad89248312..9790c73f2a32 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -56,9 +56,6 @@ static const struct x86_cpu_id svm_cpu_id[] = {
MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
#endif
-#define IOPM_ALLOC_ORDER 2
-#define MSRPM_ALLOC_ORDER 1
-
#define SEG_TYPE_LDT 2
#define SEG_TYPE_BUSY_TSS16 3
@@ -95,6 +92,8 @@ static const struct svm_direct_access_msrs {
} direct_access_msrs[MAX_DIRECT_ACCESS_MSRS] = {
{ .index = MSR_STAR, .always = true },
{ .index = MSR_IA32_SYSENTER_CS, .always = true },
+ { .index = MSR_IA32_SYSENTER_EIP, .always = false },
+ { .index = MSR_IA32_SYSENTER_ESP, .always = false },
#ifdef CONFIG_X86_64
{ .index = MSR_GS_BASE, .always = true },
{ .index = MSR_FS_BASE, .always = true },
@@ -186,14 +185,6 @@ module_param(vls, int, 0444);
static int vgif = true;
module_param(vgif, int, 0444);
-/* enable/disable SEV support */
-int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
-module_param(sev, int, 0444);
-
-/* enable/disable SEV-ES support */
-int sev_es = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
-module_param(sev_es, int, 0444);
-
bool __read_mostly dump_invalid_vmcb;
module_param(dump_invalid_vmcb, bool, 0644);
@@ -214,6 +205,15 @@ struct kvm_ldttss_desc {
DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
+/*
+ * Only MSR_TSC_AUX is switched via the user return hook. EFER is switched via
+ * the VMCB, and the SYSCALL/SYSENTER MSRs are handled by VMLOAD/VMSAVE.
+ *
+ * RDTSCP and RDPID are not used in the kernel, specifically to allow KVM to
+ * defer the restoration of TSC_AUX until the CPU returns to userspace.
+ */
+#define TSC_AUX_URET_SLOT 0
+
static const u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
#define NUM_MSR_MAPS ARRAY_SIZE(msrpm_ranges)
@@ -279,7 +279,7 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
* In this case we will return to the nested guest
* as soon as we leave SMM.
*/
- if (!is_smm(&svm->vcpu))
+ if (!is_smm(vcpu))
svm_free_nested(svm);
} else {
@@ -363,10 +363,10 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu)
bool has_error_code = vcpu->arch.exception.has_error_code;
u32 error_code = vcpu->arch.exception.error_code;
- kvm_deliver_exception_payload(&svm->vcpu);
+ kvm_deliver_exception_payload(vcpu);
if (nr == BP_VECTOR && !nrips) {
- unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu);
+ unsigned long rip, old_rip = kvm_rip_read(vcpu);
/*
* For guest debugging where we have to reinject #BP if some
@@ -375,8 +375,8 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu)
* raises a fault that is not intercepted. Still better than
* failing in all cases.
*/
- (void)skip_emulated_instruction(&svm->vcpu);
- rip = kvm_rip_read(&svm->vcpu);
+ (void)skip_emulated_instruction(vcpu);
+ rip = kvm_rip_read(vcpu);
svm->int3_rip = rip + svm->vmcb->save.cs.base;
svm->int3_injected = rip - old_rip;
}
@@ -553,23 +553,21 @@ static void svm_cpu_uninit(int cpu)
static int svm_cpu_init(int cpu)
{
struct svm_cpu_data *sd;
+ int ret = -ENOMEM;
sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL);
if (!sd)
- return -ENOMEM;
+ return ret;
sd->cpu = cpu;
sd->save_area = alloc_page(GFP_KERNEL);
if (!sd->save_area)
goto free_cpu_data;
+
clear_page(page_address(sd->save_area));
- if (svm_sev_enabled()) {
- sd->sev_vmcbs = kmalloc_array(max_sev_asid + 1,
- sizeof(void *),
- GFP_KERNEL);
- if (!sd->sev_vmcbs)
- goto free_save_area;
- }
+ ret = sev_cpu_init(sd);
+ if (ret)
+ goto free_save_area;
per_cpu(svm_data, cpu) = sd;
@@ -579,7 +577,7 @@ free_save_area:
__free_page(sd->save_area);
free_cpu_data:
kfree(sd);
- return -ENOMEM;
+ return ret;
}
@@ -681,14 +679,15 @@ void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr,
u32 *svm_vcpu_alloc_msrpm(void)
{
- struct page *pages = alloc_pages(GFP_KERNEL_ACCOUNT, MSRPM_ALLOC_ORDER);
+ unsigned int order = get_order(MSRPM_SIZE);
+ struct page *pages = alloc_pages(GFP_KERNEL_ACCOUNT, order);
u32 *msrpm;
if (!pages)
return NULL;
msrpm = page_address(pages);
- memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER));
+ memset(msrpm, 0xff, PAGE_SIZE * (1 << order));
return msrpm;
}
@@ -707,7 +706,7 @@ void svm_vcpu_init_msrpm(struct kvm_vcpu *vcpu, u32 *msrpm)
void svm_vcpu_free_msrpm(u32 *msrpm)
{
- __free_pages(virt_to_page(msrpm), MSRPM_ALLOC_ORDER);
+ __free_pages(virt_to_page(msrpm), get_order(MSRPM_SIZE));
}
static void svm_msr_filter_changed(struct kvm_vcpu *vcpu)
@@ -881,20 +880,20 @@ static __init void svm_adjust_mmio_mask(void)
*/
mask = (mask_bit < 52) ? rsvd_bits(mask_bit, 51) | PT_PRESENT_MASK : 0;
- kvm_mmu_set_mmio_spte_mask(mask, PT_WRITABLE_MASK | PT_USER_MASK);
+ kvm_mmu_set_mmio_spte_mask(mask, mask, PT_WRITABLE_MASK | PT_USER_MASK);
}
static void svm_hardware_teardown(void)
{
int cpu;
- if (svm_sev_enabled())
- sev_hardware_teardown();
+ sev_hardware_teardown();
for_each_possible_cpu(cpu)
svm_cpu_uninit(cpu);
- __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
+ __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT),
+ get_order(IOPM_SIZE));
iopm_base = 0;
}
@@ -922,6 +921,9 @@ static __init void svm_set_cpu_caps(void)
if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) ||
boot_cpu_has(X86_FEATURE_AMD_SSBD))
kvm_cpu_cap_set(X86_FEATURE_VIRT_SSBD);
+
+ /* CPUID 0x8000001F (SME/SEV features) */
+ sev_set_cpu_caps();
}
static __init int svm_hardware_setup(void)
@@ -930,14 +932,15 @@ static __init int svm_hardware_setup(void)
struct page *iopm_pages;
void *iopm_va;
int r;
+ unsigned int order = get_order(IOPM_SIZE);
- iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER);
+ iopm_pages = alloc_pages(GFP_KERNEL, order);
if (!iopm_pages)
return -ENOMEM;
iopm_va = page_address(iopm_pages);
- memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER));
+ memset(iopm_va, 0xff, PAGE_SIZE * (1 << order));
iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;
init_msrpm_offsets();
@@ -956,6 +959,9 @@ static __init int svm_hardware_setup(void)
kvm_tsc_scaling_ratio_frac_bits = 32;
}
+ if (boot_cpu_has(X86_FEATURE_RDTSCP))
+ kvm_define_user_return_msr(TSC_AUX_URET_SLOT, MSR_TSC_AUX);
+
/* Check for pause filtering support */
if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
pause_filter_count = 0;
@@ -969,21 +975,6 @@ static __init int svm_hardware_setup(void)
kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
}
- if (IS_ENABLED(CONFIG_KVM_AMD_SEV) && sev) {
- sev_hardware_setup();
- } else {
- sev = false;
- sev_es = false;
- }
-
- svm_adjust_mmio_mask();
-
- for_each_possible_cpu(cpu) {
- r = svm_cpu_init(cpu);
- if (r)
- goto err;
- }
-
/*
* KVM's MMU doesn't support using 2-level paging for itself, and thus
* NPT isn't supported if the host is using 2-level paging since host
@@ -998,6 +989,17 @@ static __init int svm_hardware_setup(void)
kvm_configure_mmu(npt_enabled, get_max_npt_level(), PG_LEVEL_1G);
pr_info("kvm: Nested Paging %sabled\n", npt_enabled ? "en" : "dis");
+ /* Note, SEV setup consumes npt_enabled. */
+ sev_hardware_setup();
+
+ svm_adjust_mmio_mask();
+
+ for_each_possible_cpu(cpu) {
+ r = svm_cpu_init(cpu);
+ if (r)
+ goto err;
+ }
+
if (nrips) {
if (!boot_cpu_has(X86_FEATURE_NRIPS))
nrips = false;
@@ -1084,8 +1086,8 @@ static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
if (is_guest_mode(vcpu)) {
/* Write L1's TSC offset. */
g_tsc_offset = svm->vmcb->control.tsc_offset -
- svm->nested.hsave->control.tsc_offset;
- svm->nested.hsave->control.tsc_offset = offset;
+ svm->vmcb01.ptr->control.tsc_offset;
+ svm->vmcb01.ptr->control.tsc_offset = offset;
}
trace_kvm_write_tsc_offset(vcpu->vcpu_id,
@@ -1113,12 +1115,13 @@ static void svm_check_invpcid(struct vcpu_svm *svm)
}
}
-static void init_vmcb(struct vcpu_svm *svm)
+static void init_vmcb(struct kvm_vcpu *vcpu)
{
+ struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb_control_area *control = &svm->vmcb->control;
struct vmcb_save_area *save = &svm->vmcb->save;
- svm->vcpu.arch.hflags = 0;
+ vcpu->arch.hflags = 0;
svm_set_intercept(svm, INTERCEPT_CR0_READ);
svm_set_intercept(svm, INTERCEPT_CR3_READ);
@@ -1126,7 +1129,7 @@ static void init_vmcb(struct vcpu_svm *svm)
svm_set_intercept(svm, INTERCEPT_CR0_WRITE);
svm_set_intercept(svm, INTERCEPT_CR3_WRITE);
svm_set_intercept(svm, INTERCEPT_CR4_WRITE);
- if (!kvm_vcpu_apicv_active(&svm->vcpu))
+ if (!kvm_vcpu_apicv_active(vcpu))
svm_set_intercept(svm, INTERCEPT_CR8_WRITE);
set_dr_intercepts(svm);
@@ -1170,12 +1173,12 @@ static void init_vmcb(struct vcpu_svm *svm)
svm_set_intercept(svm, INTERCEPT_RDPRU);
svm_set_intercept(svm, INTERCEPT_RSM);
- if (!kvm_mwait_in_guest(svm->vcpu.kvm)) {
+ if (!kvm_mwait_in_guest(vcpu->kvm)) {
svm_set_intercept(svm, INTERCEPT_MONITOR);
svm_set_intercept(svm, INTERCEPT_MWAIT);
}
- if (!kvm_hlt_in_guest(svm->vcpu.kvm))
+ if (!kvm_hlt_in_guest(vcpu->kvm))
svm_set_intercept(svm, INTERCEPT_HLT);
control->iopm_base_pa = __sme_set(iopm_base);
@@ -1201,19 +1204,19 @@ static void init_vmcb(struct vcpu_svm *svm)
init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
- svm_set_cr4(&svm->vcpu, 0);
- svm_set_efer(&svm->vcpu, 0);
+ svm_set_cr4(vcpu, 0);
+ svm_set_efer(vcpu, 0);
save->dr6 = 0xffff0ff0;
- kvm_set_rflags(&svm->vcpu, X86_EFLAGS_FIXED);
+ kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
save->rip = 0x0000fff0;
- svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;
+ vcpu->arch.regs[VCPU_REGS_RIP] = save->rip;
/*
* svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0.
* It also updates the guest-visible cr0 value.
*/
- svm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET);
- kvm_mmu_reset_context(&svm->vcpu);
+ svm_set_cr0(vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET);
+ kvm_mmu_reset_context(vcpu);
save->cr4 = X86_CR4_PAE;
/* rdx = ?? */
@@ -1225,17 +1228,18 @@ static void init_vmcb(struct vcpu_svm *svm)
clr_exception_intercept(svm, PF_VECTOR);
svm_clr_intercept(svm, INTERCEPT_CR3_READ);
svm_clr_intercept(svm, INTERCEPT_CR3_WRITE);
- save->g_pat = svm->vcpu.arch.pat;
+ save->g_pat = vcpu->arch.pat;
save->cr3 = 0;
save->cr4 = 0;
}
- svm->asid_generation = 0;
+ svm->current_vmcb->asid_generation = 0;
svm->asid = 0;
svm->nested.vmcb12_gpa = 0;
- svm->vcpu.arch.hflags = 0;
+ svm->nested.last_vmcb12_gpa = 0;
+ vcpu->arch.hflags = 0;
- if (!kvm_pause_in_guest(svm->vcpu.kvm)) {
+ if (!kvm_pause_in_guest(vcpu->kvm)) {
control->pause_filter_count = pause_filter_count;
if (pause_filter_thresh)
control->pause_filter_thresh = pause_filter_thresh;
@@ -1246,18 +1250,15 @@ static void init_vmcb(struct vcpu_svm *svm)
svm_check_invpcid(svm);
- if (kvm_vcpu_apicv_active(&svm->vcpu))
- avic_init_vmcb(svm);
-
/*
- * If hardware supports Virtual VMLOAD VMSAVE then enable it
- * in VMCB and clear intercepts to avoid #VMEXIT.
+ * If the host supports V_SPEC_CTRL then disable the interception
+ * of MSR_IA32_SPEC_CTRL.
*/
- if (vls) {
- svm_clr_intercept(svm, INTERCEPT_VMLOAD);
- svm_clr_intercept(svm, INTERCEPT_VMSAVE);
- svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
- }
+ if (boot_cpu_has(X86_FEATURE_V_SPEC_CTRL))
+ set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1);
+
+ if (kvm_vcpu_apicv_active(vcpu))
+ avic_init_vmcb(svm);
if (vgif) {
svm_clr_intercept(svm, INTERCEPT_STGI);
@@ -1265,11 +1266,11 @@ static void init_vmcb(struct vcpu_svm *svm)
svm->vmcb->control.int_ctl |= V_GIF_ENABLE_MASK;
}
- if (sev_guest(svm->vcpu.kvm)) {
+ if (sev_guest(vcpu->kvm)) {
svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE;
clr_exception_intercept(svm, UD_VECTOR);
- if (sev_es_guest(svm->vcpu.kvm)) {
+ if (sev_es_guest(vcpu->kvm)) {
/* Perform SEV-ES specific VMCB updates */
sev_es_init_vmcb(svm);
}
@@ -1291,12 +1292,12 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
svm->virt_spec_ctrl = 0;
if (!init_event) {
- svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE |
- MSR_IA32_APICBASE_ENABLE;
- if (kvm_vcpu_is_reset_bsp(&svm->vcpu))
- svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
+ vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE |
+ MSR_IA32_APICBASE_ENABLE;
+ if (kvm_vcpu_is_reset_bsp(vcpu))
+ vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP;
}
- init_vmcb(svm);
+ init_vmcb(vcpu);
kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, false);
kvm_rdx_write(vcpu, eax);
@@ -1305,10 +1306,16 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
avic_update_vapic_bar(svm, APIC_DEFAULT_PHYS_BASE);
}
+void svm_switch_vmcb(struct vcpu_svm *svm, struct kvm_vmcb_info *target_vmcb)
+{
+ svm->current_vmcb = target_vmcb;
+ svm->vmcb = target_vmcb->ptr;
+}
+
static int svm_create_vcpu(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm;
- struct page *vmcb_page;
+ struct page *vmcb01_page;
struct page *vmsa_page = NULL;
int err;
@@ -1316,11 +1323,11 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
svm = to_svm(vcpu);
err = -ENOMEM;
- vmcb_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
- if (!vmcb_page)
+ vmcb01_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+ if (!vmcb01_page)
goto out;
- if (sev_es_guest(svm->vcpu.kvm)) {
+ if (sev_es_guest(vcpu->kvm)) {
/*
* SEV-ES guests require a separate VMSA page used to contain
* the encrypted register state of the guest.
@@ -1356,20 +1363,21 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
svm_vcpu_init_msrpm(vcpu, svm->msrpm);
- svm->vmcb = page_address(vmcb_page);
- svm->vmcb_pa = __sme_set(page_to_pfn(vmcb_page) << PAGE_SHIFT);
+ svm->vmcb01.ptr = page_address(vmcb01_page);
+ svm->vmcb01.pa = __sme_set(page_to_pfn(vmcb01_page) << PAGE_SHIFT);
if (vmsa_page)
svm->vmsa = page_address(vmsa_page);
- svm->asid_generation = 0;
svm->guest_state_loaded = false;
- init_vmcb(svm);
+
+ svm_switch_vmcb(svm, &svm->vmcb01);
+ init_vmcb(vcpu);
svm_init_osvw(vcpu);
vcpu->arch.microcode_version = 0x01000065;
- if (sev_es_guest(svm->vcpu.kvm))
+ if (sev_es_guest(vcpu->kvm))
/* Perform SEV-ES specific VMCB creation updates */
sev_es_create_vcpu(svm);
@@ -1379,7 +1387,7 @@ error_free_vmsa_page:
if (vmsa_page)
__free_page(vmsa_page);
error_free_vmcb_page:
- __free_page(vmcb_page);
+ __free_page(vmcb01_page);
out:
return err;
}
@@ -1407,32 +1415,23 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
sev_free_vcpu(vcpu);
- __free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT));
- __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
+ __free_page(pfn_to_page(__sme_clr(svm->vmcb01.pa) >> PAGE_SHIFT));
+ __free_pages(virt_to_page(svm->msrpm), get_order(MSRPM_SIZE));
}
static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
- unsigned int i;
if (svm->guest_state_loaded)
return;
/*
- * Certain MSRs are restored on VMEXIT (sev-es), or vmload of host save
- * area (non-sev-es). Save ones that aren't so we can restore them
- * individually later.
- */
- for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
- rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
-
- /*
* Save additional host state that will be restored on VMEXIT (sev-es)
* or subsequent vmload of host save area.
*/
- if (sev_es_guest(svm->vcpu.kvm)) {
+ if (sev_es_guest(vcpu->kvm)) {
sev_es_prepare_guest_switch(svm, vcpu->cpu);
} else {
vmsave(__sme_page_pa(sd->save_area));
@@ -1446,29 +1445,15 @@ static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
}
}
- /* This assumes that the kernel never uses MSR_TSC_AUX */
if (static_cpu_has(X86_FEATURE_RDTSCP))
- wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
+ kvm_set_user_return_msr(TSC_AUX_URET_SLOT, svm->tsc_aux, -1ull);
svm->guest_state_loaded = true;
}
static void svm_prepare_host_switch(struct kvm_vcpu *vcpu)
{
- struct vcpu_svm *svm = to_svm(vcpu);
- unsigned int i;
-
- if (!svm->guest_state_loaded)
- return;
-
- /*
- * Certain MSRs are restored on VMEXIT (sev-es), or vmload of host save
- * area (non-sev-es). Restore the ones that weren't.
- */
- for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
- wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
-
- svm->guest_state_loaded = false;
+ to_svm(vcpu)->guest_state_loaded = false;
}
static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -1476,11 +1461,6 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
struct vcpu_svm *svm = to_svm(vcpu);
struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
- if (unlikely(cpu != vcpu->cpu)) {
- svm->asid_generation = 0;
- vmcb_mark_all_dirty(svm->vmcb);
- }
-
if (sd->current_vmcb != svm->vmcb) {
sd->current_vmcb = svm->vmcb;
indirect_branch_prediction_barrier();
@@ -1564,7 +1544,7 @@ static void svm_clear_vintr(struct vcpu_svm *svm)
/* Drop int_ctl fields related to VINTR injection. */
svm->vmcb->control.int_ctl &= mask;
if (is_guest_mode(&svm->vcpu)) {
- svm->nested.hsave->control.int_ctl &= mask;
+ svm->vmcb01.ptr->control.int_ctl &= mask;
WARN_ON((svm->vmcb->control.int_ctl & V_TPR_MASK) !=
(svm->nested.ctl.int_ctl & V_TPR_MASK));
@@ -1577,16 +1557,17 @@ static void svm_clear_vintr(struct vcpu_svm *svm)
static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg)
{
struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
+ struct vmcb_save_area *save01 = &to_svm(vcpu)->vmcb01.ptr->save;
switch (seg) {
case VCPU_SREG_CS: return &save->cs;
case VCPU_SREG_DS: return &save->ds;
case VCPU_SREG_ES: return &save->es;
- case VCPU_SREG_FS: return &save->fs;
- case VCPU_SREG_GS: return &save->gs;
+ case VCPU_SREG_FS: return &save01->fs;
+ case VCPU_SREG_GS: return &save01->gs;
case VCPU_SREG_SS: return &save->ss;
- case VCPU_SREG_TR: return &save->tr;
- case VCPU_SREG_LDTR: return &save->ldtr;
+ case VCPU_SREG_TR: return &save01->tr;
+ case VCPU_SREG_LDTR: return &save01->ldtr;
}
BUG();
return NULL;
@@ -1709,37 +1690,10 @@ static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
vmcb_mark_dirty(svm->vmcb, VMCB_DT);
}
-static void update_cr0_intercept(struct vcpu_svm *svm)
-{
- ulong gcr0;
- u64 *hcr0;
-
- /*
- * SEV-ES guests must always keep the CR intercepts cleared. CR
- * tracking is done using the CR write traps.
- */
- if (sev_es_guest(svm->vcpu.kvm))
- return;
-
- gcr0 = svm->vcpu.arch.cr0;
- hcr0 = &svm->vmcb->save.cr0;
- *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
- | (gcr0 & SVM_CR0_SELECTIVE_MASK);
-
- vmcb_mark_dirty(svm->vmcb, VMCB_CR);
-
- if (gcr0 == *hcr0) {
- svm_clr_intercept(svm, INTERCEPT_CR0_READ);
- svm_clr_intercept(svm, INTERCEPT_CR0_WRITE);
- } else {
- svm_set_intercept(svm, INTERCEPT_CR0_READ);
- svm_set_intercept(svm, INTERCEPT_CR0_WRITE);
- }
-}
-
void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ u64 hcr0 = cr0;
#ifdef CONFIG_X86_64
if (vcpu->arch.efer & EFER_LME && !vcpu->arch.guest_state_protected) {
@@ -1757,7 +1711,7 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
vcpu->arch.cr0 = cr0;
if (!npt_enabled)
- cr0 |= X86_CR0_PG | X86_CR0_WP;
+ hcr0 |= X86_CR0_PG | X86_CR0_WP;
/*
* re-enable caching here because the QEMU bios
@@ -1765,10 +1719,26 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
* reboot
*/
if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
- cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
- svm->vmcb->save.cr0 = cr0;
+ hcr0 &= ~(X86_CR0_CD | X86_CR0_NW);
+
+ svm->vmcb->save.cr0 = hcr0;
vmcb_mark_dirty(svm->vmcb, VMCB_CR);
- update_cr0_intercept(svm);
+
+ /*
+ * SEV-ES guests must always keep the CR intercepts cleared. CR
+ * tracking is done using the CR write traps.
+ */
+ if (sev_es_guest(vcpu->kvm))
+ return;
+
+ if (hcr0 == cr0) {
+ /* Selective CR0 write remains on. */
+ svm_clr_intercept(svm, INTERCEPT_CR0_READ);
+ svm_clr_intercept(svm, INTERCEPT_CR0_WRITE);
+ } else {
+ svm_set_intercept(svm, INTERCEPT_CR0_READ);
+ svm_set_intercept(svm, INTERCEPT_CR0_WRITE);
+ }
}
static bool svm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
@@ -1847,7 +1817,7 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
vmcb_mark_dirty(svm->vmcb, VMCB_ASID);
}
- svm->asid_generation = sd->asid_generation;
+ svm->current_vmcb->asid_generation = sd->asid_generation;
svm->asid = sd->next_asid++;
}
@@ -1896,39 +1866,43 @@ static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
vmcb_mark_dirty(svm->vmcb, VMCB_DR);
}
-static int pf_interception(struct vcpu_svm *svm)
+static int pf_interception(struct kvm_vcpu *vcpu)
{
- u64 fault_address = __sme_clr(svm->vmcb->control.exit_info_2);
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ u64 fault_address = svm->vmcb->control.exit_info_2;
u64 error_code = svm->vmcb->control.exit_info_1;
- return kvm_handle_page_fault(&svm->vcpu, error_code, fault_address,
+ return kvm_handle_page_fault(vcpu, error_code, fault_address,
static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
svm->vmcb->control.insn_bytes : NULL,
svm->vmcb->control.insn_len);
}
-static int npf_interception(struct vcpu_svm *svm)
+static int npf_interception(struct kvm_vcpu *vcpu)
{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
u64 fault_address = __sme_clr(svm->vmcb->control.exit_info_2);
u64 error_code = svm->vmcb->control.exit_info_1;
trace_kvm_page_fault(fault_address, error_code);
- return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code,
+ return kvm_mmu_page_fault(vcpu, fault_address, error_code,
static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
svm->vmcb->control.insn_bytes : NULL,
svm->vmcb->control.insn_len);
}
-static int db_interception(struct vcpu_svm *svm)
+static int db_interception(struct kvm_vcpu *vcpu)
{
- struct kvm_run *kvm_run = svm->vcpu.run;
- struct kvm_vcpu *vcpu = &svm->vcpu;
+ struct kvm_run *kvm_run = vcpu->run;
+ struct vcpu_svm *svm = to_svm(vcpu);
- if (!(svm->vcpu.guest_debug &
+ if (!(vcpu->guest_debug &
(KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
!svm->nmi_singlestep) {
u32 payload = svm->vmcb->save.dr6 ^ DR6_ACTIVE_LOW;
- kvm_queue_exception_p(&svm->vcpu, DB_VECTOR, payload);
+ kvm_queue_exception_p(vcpu, DB_VECTOR, payload);
return 1;
}
@@ -1938,7 +1912,7 @@ static int db_interception(struct vcpu_svm *svm)
kvm_make_request(KVM_REQ_EVENT, vcpu);
}
- if (svm->vcpu.guest_debug &
+ if (vcpu->guest_debug &
(KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) {
kvm_run->exit_reason = KVM_EXIT_DEBUG;
kvm_run->debug.arch.dr6 = svm->vmcb->save.dr6;
@@ -1952,9 +1926,10 @@ static int db_interception(struct vcpu_svm *svm)
return 1;
}
-static int bp_interception(struct vcpu_svm *svm)
+static int bp_interception(struct kvm_vcpu *vcpu)
{
- struct kvm_run *kvm_run = svm->vcpu.run;
+ struct vcpu_svm *svm = to_svm(vcpu);
+ struct kvm_run *kvm_run = vcpu->run;
kvm_run->exit_reason = KVM_EXIT_DEBUG;
kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
@@ -1962,14 +1937,14 @@ static int bp_interception(struct vcpu_svm *svm)
return 0;
}
-static int ud_interception(struct vcpu_svm *svm)
+static int ud_interception(struct kvm_vcpu *vcpu)
{
- return handle_ud(&svm->vcpu);
+ return handle_ud(vcpu);
}
-static int ac_interception(struct vcpu_svm *svm)
+static int ac_interception(struct kvm_vcpu *vcpu)
{
- kvm_queue_exception_e(&svm->vcpu, AC_VECTOR, 0);
+ kvm_queue_exception_e(vcpu, AC_VECTOR, 0);
return 1;
}
@@ -2012,7 +1987,7 @@ static bool is_erratum_383(void)
return true;
}
-static void svm_handle_mce(struct vcpu_svm *svm)
+static void svm_handle_mce(struct kvm_vcpu *vcpu)
{
if (is_erratum_383()) {
/*
@@ -2021,7 +1996,7 @@ static void svm_handle_mce(struct vcpu_svm *svm)
*/
pr_err("KVM: Guest triggered AMD Erratum 383\n");
- kvm_make_request(KVM_REQ_TRIPLE_FAULT, &svm->vcpu);
+ kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
return;
}
@@ -2033,20 +2008,21 @@ static void svm_handle_mce(struct vcpu_svm *svm)
kvm_machine_check();
}
-static int mc_interception(struct vcpu_svm *svm)
+static int mc_interception(struct kvm_vcpu *vcpu)
{
return 1;
}
-static int shutdown_interception(struct vcpu_svm *svm)
+static int shutdown_interception(struct kvm_vcpu *vcpu)
{
- struct kvm_run *kvm_run = svm->vcpu.run;
+ struct kvm_run *kvm_run = vcpu->run;
+ struct vcpu_svm *svm = to_svm(vcpu);
/*
* The VM save area has already been encrypted so it
* cannot be reinitialized - just terminate.
*/
- if (sev_es_guest(svm->vcpu.kvm))
+ if (sev_es_guest(vcpu->kvm))
return -EINVAL;
/*
@@ -2054,20 +2030,20 @@ static int shutdown_interception(struct vcpu_svm *svm)
* so reinitialize it.
*/
clear_page(svm->vmcb);
- init_vmcb(svm);
+ init_vmcb(vcpu);
kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
return 0;
}
-static int io_interception(struct vcpu_svm *svm)
+static int io_interception(struct kvm_vcpu *vcpu)
{
- struct kvm_vcpu *vcpu = &svm->vcpu;
+ struct vcpu_svm *svm = to_svm(vcpu);
u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
int size, in, string;
unsigned port;
- ++svm->vcpu.stat.io_exits;
+ ++vcpu->stat.io_exits;
string = (io_info & SVM_IOIO_STR_MASK) != 0;
in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
port = io_info >> 16;
@@ -2082,93 +2058,69 @@ static int io_interception(struct vcpu_svm *svm)
svm->next_rip = svm->vmcb->control.exit_info_2;
- return kvm_fast_pio(&svm->vcpu, size, port, in);
-}
-
-static int nmi_interception(struct vcpu_svm *svm)
-{
- return 1;
+ return kvm_fast_pio(vcpu, size, port, in);
}
-static int intr_interception(struct vcpu_svm *svm)
+static int nmi_interception(struct kvm_vcpu *vcpu)
{
- ++svm->vcpu.stat.irq_exits;
return 1;
}
-static int nop_on_interception(struct vcpu_svm *svm)
+static int intr_interception(struct kvm_vcpu *vcpu)
{
+ ++vcpu->stat.irq_exits;
return 1;
}
-static int halt_interception(struct vcpu_svm *svm)
+static int vmload_vmsave_interception(struct kvm_vcpu *vcpu, bool vmload)
{
- return kvm_emulate_halt(&svm->vcpu);
-}
-
-static int vmmcall_interception(struct vcpu_svm *svm)
-{
- return kvm_emulate_hypercall(&svm->vcpu);
-}
-
-static int vmload_interception(struct vcpu_svm *svm)
-{
- struct vmcb *nested_vmcb;
+ struct vcpu_svm *svm = to_svm(vcpu);
+ struct vmcb *vmcb12;
struct kvm_host_map map;
int ret;
- if (nested_svm_check_permissions(svm))
+ if (nested_svm_check_permissions(vcpu))
return 1;
- ret = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(svm->vmcb->save.rax), &map);
+ ret = kvm_vcpu_map(vcpu, gpa_to_gfn(svm->vmcb->save.rax), &map);
if (ret) {
if (ret == -EINVAL)
- kvm_inject_gp(&svm->vcpu, 0);
+ kvm_inject_gp(vcpu, 0);
return 1;
}
- nested_vmcb = map.hva;
+ vmcb12 = map.hva;
+
+ ret = kvm_skip_emulated_instruction(vcpu);
- ret = kvm_skip_emulated_instruction(&svm->vcpu);
+ if (vmload) {
+ nested_svm_vmloadsave(vmcb12, svm->vmcb);
+ svm->sysenter_eip_hi = 0;
+ svm->sysenter_esp_hi = 0;
+ } else
+ nested_svm_vmloadsave(svm->vmcb, vmcb12);
- nested_svm_vmloadsave(nested_vmcb, svm->vmcb);
- kvm_vcpu_unmap(&svm->vcpu, &map, true);
+ kvm_vcpu_unmap(vcpu, &map, true);
return ret;
}
-static int vmsave_interception(struct vcpu_svm *svm)
+static int vmload_interception(struct kvm_vcpu *vcpu)
{
- struct vmcb *nested_vmcb;
- struct kvm_host_map map;
- int ret;
-
- if (nested_svm_check_permissions(svm))
- return 1;
-
- ret = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(svm->vmcb->save.rax), &map);
- if (ret) {
- if (ret == -EINVAL)
- kvm_inject_gp(&svm->vcpu, 0);
- return 1;
- }
-
- nested_vmcb = map.hva;
-
- ret = kvm_skip_emulated_instruction(&svm->vcpu);
-
- nested_svm_vmloadsave(svm->vmcb, nested_vmcb);
- kvm_vcpu_unmap(&svm->vcpu, &map, true);
+ return vmload_vmsave_interception(vcpu, true);
+}
- return ret;
+static int vmsave_interception(struct kvm_vcpu *vcpu)
+{
+ return vmload_vmsave_interception(vcpu, false);
}
-static int vmrun_interception(struct vcpu_svm *svm)
+static int vmrun_interception(struct kvm_vcpu *vcpu)
{
- if (nested_svm_check_permissions(svm))
+ if (nested_svm_check_permissions(vcpu))
return 1;
- return nested_svm_vmrun(svm);
+ return nested_svm_vmrun(vcpu);
}
enum {
@@ -2207,7 +2159,7 @@ static int emulate_svm_instr(struct kvm_vcpu *vcpu, int opcode)
[SVM_INSTR_VMLOAD] = SVM_EXIT_VMLOAD,
[SVM_INSTR_VMSAVE] = SVM_EXIT_VMSAVE,
};
- int (*const svm_instr_handlers[])(struct vcpu_svm *svm) = {
+ int (*const svm_instr_handlers[])(struct kvm_vcpu *vcpu) = {
[SVM_INSTR_VMRUN] = vmrun_interception,
[SVM_INSTR_VMLOAD] = vmload_interception,
[SVM_INSTR_VMSAVE] = vmsave_interception,
@@ -2216,17 +2168,13 @@ static int emulate_svm_instr(struct kvm_vcpu *vcpu, int opcode)
int ret;
if (is_guest_mode(vcpu)) {
- svm->vmcb->control.exit_code = guest_mode_exit_codes[opcode];
- svm->vmcb->control.exit_info_1 = 0;
- svm->vmcb->control.exit_info_2 = 0;
-
/* Returns '1' or -errno on failure, '0' on success. */
- ret = nested_svm_vmexit(svm);
+ ret = nested_svm_simple_vmexit(svm, guest_mode_exit_codes[opcode]);
if (ret)
return ret;
return 1;
}
- return svm_instr_handlers[opcode](svm);
+ return svm_instr_handlers[opcode](vcpu);
}
/*
@@ -2237,9 +2185,9 @@ static int emulate_svm_instr(struct kvm_vcpu *vcpu, int opcode)
* regions (e.g. SMM memory on host).
* 2) VMware backdoor
*/
-static int gp_interception(struct vcpu_svm *svm)
+static int gp_interception(struct kvm_vcpu *vcpu)
{
- struct kvm_vcpu *vcpu = &svm->vcpu;
+ struct vcpu_svm *svm = to_svm(vcpu);
u32 error_code = svm->vmcb->control.exit_info_1;
int opcode;
@@ -2304,73 +2252,58 @@ void svm_set_gif(struct vcpu_svm *svm, bool value)
}
}
-static int stgi_interception(struct vcpu_svm *svm)
+static int stgi_interception(struct kvm_vcpu *vcpu)
{
int ret;
- if (nested_svm_check_permissions(svm))
+ if (nested_svm_check_permissions(vcpu))
return 1;
- ret = kvm_skip_emulated_instruction(&svm->vcpu);
- svm_set_gif(svm, true);
+ ret = kvm_skip_emulated_instruction(vcpu);
+ svm_set_gif(to_svm(vcpu), true);
return ret;
}
-static int clgi_interception(struct vcpu_svm *svm)
+static int clgi_interception(struct kvm_vcpu *vcpu)
{
int ret;
- if (nested_svm_check_permissions(svm))
+ if (nested_svm_check_permissions(vcpu))
return 1;
- ret = kvm_skip_emulated_instruction(&svm->vcpu);
- svm_set_gif(svm, false);
+ ret = kvm_skip_emulated_instruction(vcpu);
+ svm_set_gif(to_svm(vcpu), false);
return ret;
}
-static int invlpga_interception(struct vcpu_svm *svm)
+static int invlpga_interception(struct kvm_vcpu *vcpu)
{
- struct kvm_vcpu *vcpu = &svm->vcpu;
-
- trace_kvm_invlpga(svm->vmcb->save.rip, kvm_rcx_read(&svm->vcpu),
- kvm_rax_read(&svm->vcpu));
-
- /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
- kvm_mmu_invlpg(vcpu, kvm_rax_read(&svm->vcpu));
+ gva_t gva = kvm_rax_read(vcpu);
+ u32 asid = kvm_rcx_read(vcpu);
- return kvm_skip_emulated_instruction(&svm->vcpu);
-}
+ /* FIXME: Handle an address size prefix. */
+ if (!is_long_mode(vcpu))
+ gva = (u32)gva;
-static int skinit_interception(struct vcpu_svm *svm)
-{
- trace_kvm_skinit(svm->vmcb->save.rip, kvm_rax_read(&svm->vcpu));
+ trace_kvm_invlpga(to_svm(vcpu)->vmcb->save.rip, asid, gva);
- kvm_queue_exception(&svm->vcpu, UD_VECTOR);
- return 1;
-}
+ /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
+ kvm_mmu_invlpg(vcpu, gva);
-static int wbinvd_interception(struct vcpu_svm *svm)
-{
- return kvm_emulate_wbinvd(&svm->vcpu);
+ return kvm_skip_emulated_instruction(vcpu);
}
-static int xsetbv_interception(struct vcpu_svm *svm)
+static int skinit_interception(struct kvm_vcpu *vcpu)
{
- u64 new_bv = kvm_read_edx_eax(&svm->vcpu);
- u32 index = kvm_rcx_read(&svm->vcpu);
+ trace_kvm_skinit(to_svm(vcpu)->vmcb->save.rip, kvm_rax_read(vcpu));
- int err = kvm_set_xcr(&svm->vcpu, index, new_bv);
- return kvm_complete_insn_gp(&svm->vcpu, err);
-}
-
-static int rdpru_interception(struct vcpu_svm *svm)
-{
- kvm_queue_exception(&svm->vcpu, UD_VECTOR);
+ kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
-static int task_switch_interception(struct vcpu_svm *svm)
+static int task_switch_interception(struct kvm_vcpu *vcpu)
{
+ struct vcpu_svm *svm = to_svm(vcpu);
u16 tss_selector;
int reason;
int int_type = svm->vmcb->control.exit_int_info &
@@ -2399,7 +2332,7 @@ static int task_switch_interception(struct vcpu_svm *svm)
if (reason == TASK_SWITCH_GATE) {
switch (type) {
case SVM_EXITINTINFO_TYPE_NMI:
- svm->vcpu.arch.nmi_injected = false;
+ vcpu->arch.nmi_injected = false;
break;
case SVM_EXITINTINFO_TYPE_EXEPT:
if (svm->vmcb->control.exit_info_2 &
@@ -2408,10 +2341,10 @@ static int task_switch_interception(struct vcpu_svm *svm)
error_code =
(u32)svm->vmcb->control.exit_info_2;
}
- kvm_clear_exception_queue(&svm->vcpu);
+ kvm_clear_exception_queue(vcpu);
break;
case SVM_EXITINTINFO_TYPE_INTR:
- kvm_clear_interrupt_queue(&svm->vcpu);
+ kvm_clear_interrupt_queue(vcpu);
break;
default:
break;
@@ -2422,77 +2355,58 @@ static int task_switch_interception(struct vcpu_svm *svm)
int_type == SVM_EXITINTINFO_TYPE_SOFT ||
(int_type == SVM_EXITINTINFO_TYPE_EXEPT &&
(int_vec == OF_VECTOR || int_vec == BP_VECTOR))) {
- if (!skip_emulated_instruction(&svm->vcpu))
+ if (!skip_emulated_instruction(vcpu))
return 0;
}
if (int_type != SVM_EXITINTINFO_TYPE_SOFT)
int_vec = -1;
- return kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason,
+ return kvm_task_switch(vcpu, tss_selector, int_vec, reason,
has_error_code, error_code);
}
-static int cpuid_interception(struct vcpu_svm *svm)
+static int iret_interception(struct kvm_vcpu *vcpu)
{
- return kvm_emulate_cpuid(&svm->vcpu);
-}
+ struct vcpu_svm *svm = to_svm(vcpu);
-static int iret_interception(struct vcpu_svm *svm)
-{
- ++svm->vcpu.stat.nmi_window_exits;
- svm->vcpu.arch.hflags |= HF_IRET_MASK;
- if (!sev_es_guest(svm->vcpu.kvm)) {
+ ++vcpu->stat.nmi_window_exits;
+ vcpu->arch.hflags |= HF_IRET_MASK;
+ if (!sev_es_guest(vcpu->kvm)) {
svm_clr_intercept(svm, INTERCEPT_IRET);
- svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu);
+ svm->nmi_iret_rip = kvm_rip_read(vcpu);
}
- kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
return 1;
}
-static int invd_interception(struct vcpu_svm *svm)
-{
- /* Treat an INVD instruction as a NOP and just skip it. */
- return kvm_skip_emulated_instruction(&svm->vcpu);
-}
-
-static int invlpg_interception(struct vcpu_svm *svm)
+static int invlpg_interception(struct kvm_vcpu *vcpu)
{
if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
- return kvm_emulate_instruction(&svm->vcpu, 0);
+ return kvm_emulate_instruction(vcpu, 0);
- kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1);
- return kvm_skip_emulated_instruction(&svm->vcpu);
+ kvm_mmu_invlpg(vcpu, to_svm(vcpu)->vmcb->control.exit_info_1);
+ return kvm_skip_emulated_instruction(vcpu);
}
-static int emulate_on_interception(struct vcpu_svm *svm)
+static int emulate_on_interception(struct kvm_vcpu *vcpu)
{
- return kvm_emulate_instruction(&svm->vcpu, 0);
+ return kvm_emulate_instruction(vcpu, 0);
}
-static int rsm_interception(struct vcpu_svm *svm)
+static int rsm_interception(struct kvm_vcpu *vcpu)
{
- return kvm_emulate_instruction_from_buffer(&svm->vcpu, rsm_ins_bytes, 2);
+ return kvm_emulate_instruction_from_buffer(vcpu, rsm_ins_bytes, 2);
}
-static int rdpmc_interception(struct vcpu_svm *svm)
-{
- int err;
-
- if (!nrips)
- return emulate_on_interception(svm);
-
- err = kvm_rdpmc(&svm->vcpu);
- return kvm_complete_insn_gp(&svm->vcpu, err);
-}
-
-static bool check_selective_cr0_intercepted(struct vcpu_svm *svm,
+static bool check_selective_cr0_intercepted(struct kvm_vcpu *vcpu,
unsigned long val)
{
- unsigned long cr0 = svm->vcpu.arch.cr0;
+ struct vcpu_svm *svm = to_svm(vcpu);
+ unsigned long cr0 = vcpu->arch.cr0;
bool ret = false;
- if (!is_guest_mode(&svm->vcpu) ||
+ if (!is_guest_mode(vcpu) ||
(!(vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_SELECTIVE_CR0))))
return false;
@@ -2509,17 +2423,18 @@ static bool check_selective_cr0_intercepted(struct vcpu_svm *svm,
#define CR_VALID (1ULL << 63)
-static int cr_interception(struct vcpu_svm *svm)
+static int cr_interception(struct kvm_vcpu *vcpu)
{
+ struct vcpu_svm *svm = to_svm(vcpu);
int reg, cr;
unsigned long val;
int err;
if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
- return emulate_on_interception(svm);
+ return emulate_on_interception(vcpu);
if (unlikely((svm->vmcb->control.exit_info_1 & CR_VALID) == 0))
- return emulate_on_interception(svm);
+ return emulate_on_interception(vcpu);
reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
if (svm->vmcb->control.exit_code == SVM_EXIT_CR0_SEL_WRITE)
@@ -2530,61 +2445,61 @@ static int cr_interception(struct vcpu_svm *svm)
err = 0;
if (cr >= 16) { /* mov to cr */
cr -= 16;
- val = kvm_register_read(&svm->vcpu, reg);
+ val = kvm_register_read(vcpu, reg);
trace_kvm_cr_write(cr, val);
switch (cr) {
case 0:
- if (!check_selective_cr0_intercepted(svm, val))
- err = kvm_set_cr0(&svm->vcpu, val);
+ if (!check_selective_cr0_intercepted(vcpu, val))
+ err = kvm_set_cr0(vcpu, val);
else
return 1;
break;
case 3:
- err = kvm_set_cr3(&svm->vcpu, val);
+ err = kvm_set_cr3(vcpu, val);
break;
case 4:
- err = kvm_set_cr4(&svm->vcpu, val);
+ err = kvm_set_cr4(vcpu, val);
break;
case 8:
- err = kvm_set_cr8(&svm->vcpu, val);
+ err = kvm_set_cr8(vcpu, val);
break;
default:
WARN(1, "unhandled write to CR%d", cr);
- kvm_queue_exception(&svm->vcpu, UD_VECTOR);
+ kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
} else { /* mov from cr */
switch (cr) {
case 0:
- val = kvm_read_cr0(&svm->vcpu);
+ val = kvm_read_cr0(vcpu);
break;
case 2:
- val = svm->vcpu.arch.cr2;
+ val = vcpu->arch.cr2;
break;
case 3:
- val = kvm_read_cr3(&svm->vcpu);
+ val = kvm_read_cr3(vcpu);
break;
case 4:
- val = kvm_read_cr4(&svm->vcpu);
+ val = kvm_read_cr4(vcpu);
break;
case 8:
- val = kvm_get_cr8(&svm->vcpu);
+ val = kvm_get_cr8(vcpu);
break;
default:
WARN(1, "unhandled read from CR%d", cr);
- kvm_queue_exception(&svm->vcpu, UD_VECTOR);
+ kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
- kvm_register_write(&svm->vcpu, reg, val);
+ kvm_register_write(vcpu, reg, val);
trace_kvm_cr_read(cr, val);
}
- return kvm_complete_insn_gp(&svm->vcpu, err);
+ return kvm_complete_insn_gp(vcpu, err);
}
-static int cr_trap(struct vcpu_svm *svm)
+static int cr_trap(struct kvm_vcpu *vcpu)
{
- struct kvm_vcpu *vcpu = &svm->vcpu;
+ struct vcpu_svm *svm = to_svm(vcpu);
unsigned long old_value, new_value;
unsigned int cr;
int ret = 0;
@@ -2606,7 +2521,7 @@ static int cr_trap(struct vcpu_svm *svm)
kvm_post_set_cr4(vcpu, old_value, new_value);
break;
case 8:
- ret = kvm_set_cr8(&svm->vcpu, new_value);
+ ret = kvm_set_cr8(vcpu, new_value);
break;
default:
WARN(1, "unhandled CR%d write trap", cr);
@@ -2617,57 +2532,57 @@ static int cr_trap(struct vcpu_svm *svm)
return kvm_complete_insn_gp(vcpu, ret);
}
-static int dr_interception(struct vcpu_svm *svm)
+static int dr_interception(struct kvm_vcpu *vcpu)
{
+ struct vcpu_svm *svm = to_svm(vcpu);
int reg, dr;
unsigned long val;
int err = 0;
- if (svm->vcpu.guest_debug == 0) {
+ if (vcpu->guest_debug == 0) {
/*
* No more DR vmexits; force a reload of the debug registers
* and reenter on this instruction. The next vmexit will
* retrieve the full state of the debug registers.
*/
clr_dr_intercepts(svm);
- svm->vcpu.arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT;
+ vcpu->arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT;
return 1;
}
if (!boot_cpu_has(X86_FEATURE_DECODEASSISTS))
- return emulate_on_interception(svm);
+ return emulate_on_interception(vcpu);
reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
dr = svm->vmcb->control.exit_code - SVM_EXIT_READ_DR0;
if (dr >= 16) { /* mov to DRn */
dr -= 16;
- val = kvm_register_read(&svm->vcpu, reg);
- err = kvm_set_dr(&svm->vcpu, dr, val);
+ val = kvm_register_read(vcpu, reg);
+ err = kvm_set_dr(vcpu, dr, val);
} else {
- kvm_get_dr(&svm->vcpu, dr, &val);
- kvm_register_write(&svm->vcpu, reg, val);
+ kvm_get_dr(vcpu, dr, &val);
+ kvm_register_write(vcpu, reg, val);
}
- return kvm_complete_insn_gp(&svm->vcpu, err);
+ return kvm_complete_insn_gp(vcpu, err);
}
-static int cr8_write_interception(struct vcpu_svm *svm)
+static int cr8_write_interception(struct kvm_vcpu *vcpu)
{
- struct kvm_run *kvm_run = svm->vcpu.run;
int r;
- u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
+ u8 cr8_prev = kvm_get_cr8(vcpu);
/* instruction emulation calls kvm_set_cr8() */
- r = cr_interception(svm);
- if (lapic_in_kernel(&svm->vcpu))
+ r = cr_interception(vcpu);
+ if (lapic_in_kernel(vcpu))
return r;
- if (cr8_prev <= kvm_get_cr8(&svm->vcpu))
+ if (cr8_prev <= kvm_get_cr8(vcpu))
return r;
- kvm_run->exit_reason = KVM_EXIT_SET_TPR;
+ vcpu->run->exit_reason = KVM_EXIT_SET_TPR;
return 0;
}
-static int efer_trap(struct vcpu_svm *svm)
+static int efer_trap(struct kvm_vcpu *vcpu)
{
struct msr_data msr_info;
int ret;
@@ -2680,10 +2595,10 @@ static int efer_trap(struct vcpu_svm *svm)
*/
msr_info.host_initiated = false;
msr_info.index = MSR_EFER;
- msr_info.data = svm->vmcb->control.exit_info_1 & ~EFER_SVME;
- ret = kvm_set_msr_common(&svm->vcpu, &msr_info);
+ msr_info.data = to_svm(vcpu)->vmcb->control.exit_info_1 & ~EFER_SVME;
+ ret = kvm_set_msr_common(vcpu, &msr_info);
- return kvm_complete_insn_gp(&svm->vcpu, ret);
+ return kvm_complete_insn_gp(vcpu, ret);
}
static int svm_get_msr_feature(struct kvm_msr_entry *msr)
@@ -2710,34 +2625,41 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
switch (msr_info->index) {
case MSR_STAR:
- msr_info->data = svm->vmcb->save.star;
+ msr_info->data = svm->vmcb01.ptr->save.star;
break;
#ifdef CONFIG_X86_64
case MSR_LSTAR:
- msr_info->data = svm->vmcb->save.lstar;
+ msr_info->data = svm->vmcb01.ptr->save.lstar;
break;
case MSR_CSTAR:
- msr_info->data = svm->vmcb->save.cstar;
+ msr_info->data = svm->vmcb01.ptr->save.cstar;
break;
case MSR_KERNEL_GS_BASE:
- msr_info->data = svm->vmcb->save.kernel_gs_base;
+ msr_info->data = svm->vmcb01.ptr->save.kernel_gs_base;
break;
case MSR_SYSCALL_MASK:
- msr_info->data = svm->vmcb->save.sfmask;
+ msr_info->data = svm->vmcb01.ptr->save.sfmask;
break;
#endif
case MSR_IA32_SYSENTER_CS:
- msr_info->data = svm->vmcb->save.sysenter_cs;
+ msr_info->data = svm->vmcb01.ptr->save.sysenter_cs;
break;
case MSR_IA32_SYSENTER_EIP:
- msr_info->data = svm->sysenter_eip;
+ msr_info->data = (u32)svm->vmcb01.ptr->save.sysenter_eip;
+ if (guest_cpuid_is_intel(vcpu))
+ msr_info->data |= (u64)svm->sysenter_eip_hi << 32;
break;
case MSR_IA32_SYSENTER_ESP:
- msr_info->data = svm->sysenter_esp;
+ msr_info->data = svm->vmcb01.ptr->save.sysenter_esp;
+ if (guest_cpuid_is_intel(vcpu))
+ msr_info->data |= (u64)svm->sysenter_esp_hi << 32;
break;
case MSR_TSC_AUX:
if (!boot_cpu_has(X86_FEATURE_RDTSCP))
return 1;
+ if (!msr_info->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
+ return 1;
msr_info->data = svm->tsc_aux;
break;
/*
@@ -2771,7 +2693,10 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
!guest_has_spec_ctrl_msr(vcpu))
return 1;
- msr_info->data = svm->spec_ctrl;
+ if (boot_cpu_has(X86_FEATURE_V_SPEC_CTRL))
+ msr_info->data = svm->vmcb->save.spec_ctrl;
+ else
+ msr_info->data = svm->spec_ctrl;
break;
case MSR_AMD64_VIRT_SPEC_CTRL:
if (!msr_info->host_initiated &&
@@ -2809,8 +2734,8 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
static int svm_complete_emulated_msr(struct kvm_vcpu *vcpu, int err)
{
struct vcpu_svm *svm = to_svm(vcpu);
- if (!sev_es_guest(svm->vcpu.kvm) || !err)
- return kvm_complete_insn_gp(&svm->vcpu, err);
+ if (!err || !sev_es_guest(vcpu->kvm) || WARN_ON_ONCE(!svm->ghcb))
+ return kvm_complete_insn_gp(vcpu, err);
ghcb_set_sw_exit_info_1(svm->ghcb, 1);
ghcb_set_sw_exit_info_2(svm->ghcb,
@@ -2820,11 +2745,6 @@ static int svm_complete_emulated_msr(struct kvm_vcpu *vcpu, int err)
return 1;
}
-static int rdmsr_interception(struct vcpu_svm *svm)
-{
- return kvm_emulate_rdmsr(&svm->vcpu);
-}
-
static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -2853,6 +2773,7 @@ static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data)
static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ int r;
u32 ecx = msr->index;
u64 data = msr->data;
@@ -2861,7 +2782,9 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
return 1;
vcpu->arch.pat = data;
- svm->vmcb->save.g_pat = data;
+ svm->vmcb01.ptr->save.g_pat = data;
+ if (is_guest_mode(vcpu))
+ nested_vmcb02_compute_g_pat(svm);
vmcb_mark_dirty(svm->vmcb, VMCB_NPT);
break;
case MSR_IA32_SPEC_CTRL:
@@ -2872,7 +2795,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
if (kvm_spec_ctrl_test_value(data))
return 1;
- svm->spec_ctrl = data;
+ if (boot_cpu_has(X86_FEATURE_V_SPEC_CTRL))
+ svm->vmcb->save.spec_ctrl = data;
+ else
+ svm->spec_ctrl = data;
if (!data)
break;
@@ -2915,44 +2841,70 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
svm->virt_spec_ctrl = data;
break;
case MSR_STAR:
- svm->vmcb->save.star = data;
+ svm->vmcb01.ptr->save.star = data;
break;
#ifdef CONFIG_X86_64
case MSR_LSTAR:
- svm->vmcb->save.lstar = data;
+ svm->vmcb01.ptr->save.lstar = data;
break;
case MSR_CSTAR:
- svm->vmcb->save.cstar = data;
+ svm->vmcb01.ptr->save.cstar = data;
break;
case MSR_KERNEL_GS_BASE:
- svm->vmcb->save.kernel_gs_base = data;
+ svm->vmcb01.ptr->save.kernel_gs_base = data;
break;
case MSR_SYSCALL_MASK:
- svm->vmcb->save.sfmask = data;
+ svm->vmcb01.ptr->save.sfmask = data;
break;
#endif
case MSR_IA32_SYSENTER_CS:
- svm->vmcb->save.sysenter_cs = data;
+ svm->vmcb01.ptr->save.sysenter_cs = data;
break;
case MSR_IA32_SYSENTER_EIP:
- svm->sysenter_eip = data;
- svm->vmcb->save.sysenter_eip = data;
+ svm->vmcb01.ptr->save.sysenter_eip = (u32)data;
+ /*
+ * We only intercept the MSR_IA32_SYSENTER_{EIP|ESP} msrs
+ * when we spoof an Intel vendor ID (for cross vendor migration).
+ * In this case we use this intercept to track the high
+ * 32 bit part of these msrs to support Intel's
+ * implementation of SYSENTER/SYSEXIT.
+ */
+ svm->sysenter_eip_hi = guest_cpuid_is_intel(vcpu) ? (data >> 32) : 0;
break;
case MSR_IA32_SYSENTER_ESP:
- svm->sysenter_esp = data;
- svm->vmcb->save.sysenter_esp = data;
+ svm->vmcb01.ptr->save.sysenter_esp = (u32)data;
+ svm->sysenter_esp_hi = guest_cpuid_is_intel(vcpu) ? (data >> 32) : 0;
break;
case MSR_TSC_AUX:
if (!boot_cpu_has(X86_FEATURE_RDTSCP))
return 1;
+ if (!msr->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
+ return 1;
+
+ /*
+ * Per Intel's SDM, bits 63:32 are reserved, but AMD's APM has
+ * incomplete and conflicting architectural behavior. Current
+ * AMD CPUs completely ignore bits 63:32, i.e. they aren't
+ * reserved and always read as zeros. Emulate AMD CPU behavior
+ * to avoid explosions if the vCPU is migrated from an AMD host
+ * to an Intel host.
+ */
+ data = (u32)data;
+
/*
- * This is rare, so we update the MSR here instead of using
- * direct_access_msrs. Doing that would require a rdmsr in
- * svm_vcpu_put.
+ * TSC_AUX is usually changed only during boot and never read
+ * directly. Intercept TSC_AUX instead of exposing it to the
+ * guest via direct_access_msrs, and switch it via user return.
*/
+ preempt_disable();
+ r = kvm_set_user_return_msr(TSC_AUX_URET_SLOT, data, -1ull);
+ preempt_enable();
+ if (r)
+ return 1;
+
svm->tsc_aux = data;
- wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
break;
case MSR_IA32_DEBUGCTLMSR:
if (!boot_cpu_has(X86_FEATURE_LBRV)) {
@@ -3006,38 +2958,32 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
return 0;
}
-static int wrmsr_interception(struct vcpu_svm *svm)
-{
- return kvm_emulate_wrmsr(&svm->vcpu);
-}
-
-static int msr_interception(struct vcpu_svm *svm)
+static int msr_interception(struct kvm_vcpu *vcpu)
{
- if (svm->vmcb->control.exit_info_1)
- return wrmsr_interception(svm);
+ if (to_svm(vcpu)->vmcb->control.exit_info_1)
+ return kvm_emulate_wrmsr(vcpu);
else
- return rdmsr_interception(svm);
+ return kvm_emulate_rdmsr(vcpu);
}
-static int interrupt_window_interception(struct vcpu_svm *svm)
+static int interrupt_window_interception(struct kvm_vcpu *vcpu)
{
- kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
- svm_clear_vintr(svm);
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+ svm_clear_vintr(to_svm(vcpu));
/*
* For AVIC, the only reason to end up here is ExtINTs.
* In this case AVIC was temporarily disabled for
* requesting the IRQ window and we have to re-enable it.
*/
- svm_toggle_avic_for_irq_window(&svm->vcpu, true);
+ svm_toggle_avic_for_irq_window(vcpu, true);
- ++svm->vcpu.stat.irq_window_exits;
+ ++vcpu->stat.irq_window_exits;
return 1;
}
-static int pause_interception(struct vcpu_svm *svm)
+static int pause_interception(struct kvm_vcpu *vcpu)
{
- struct kvm_vcpu *vcpu = &svm->vcpu;
bool in_kernel;
/*
@@ -3045,35 +2991,18 @@ static int pause_interception(struct vcpu_svm *svm)
* vcpu->arch.preempted_in_kernel can never be true. Just
* set in_kernel to false as well.
*/
- in_kernel = !sev_es_guest(svm->vcpu.kvm) && svm_get_cpl(vcpu) == 0;
+ in_kernel = !sev_es_guest(vcpu->kvm) && svm_get_cpl(vcpu) == 0;
if (!kvm_pause_in_guest(vcpu->kvm))
grow_ple_window(vcpu);
kvm_vcpu_on_spin(vcpu, in_kernel);
- return 1;
-}
-
-static int nop_interception(struct vcpu_svm *svm)
-{
- return kvm_skip_emulated_instruction(&(svm->vcpu));
+ return kvm_skip_emulated_instruction(vcpu);
}
-static int monitor_interception(struct vcpu_svm *svm)
+static int invpcid_interception(struct kvm_vcpu *vcpu)
{
- printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
- return nop_interception(svm);
-}
-
-static int mwait_interception(struct vcpu_svm *svm)
-{
- printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n");
- return nop_interception(svm);
-}
-
-static int invpcid_interception(struct vcpu_svm *svm)
-{
- struct kvm_vcpu *vcpu = &svm->vcpu;
+ struct vcpu_svm *svm = to_svm(vcpu);
unsigned long type;
gva_t gva;
@@ -3098,7 +3027,7 @@ static int invpcid_interception(struct vcpu_svm *svm)
return kvm_handle_invpcid(vcpu, type, gva);
}
-static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
+static int (*const svm_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[SVM_EXIT_READ_CR0] = cr_interception,
[SVM_EXIT_READ_CR3] = cr_interception,
[SVM_EXIT_READ_CR4] = cr_interception,
@@ -3133,15 +3062,15 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
[SVM_EXIT_EXCP_BASE + GP_VECTOR] = gp_interception,
[SVM_EXIT_INTR] = intr_interception,
[SVM_EXIT_NMI] = nmi_interception,
- [SVM_EXIT_SMI] = nop_on_interception,
- [SVM_EXIT_INIT] = nop_on_interception,
+ [SVM_EXIT_SMI] = kvm_emulate_as_nop,
+ [SVM_EXIT_INIT] = kvm_emulate_as_nop,
[SVM_EXIT_VINTR] = interrupt_window_interception,
- [SVM_EXIT_RDPMC] = rdpmc_interception,
- [SVM_EXIT_CPUID] = cpuid_interception,
+ [SVM_EXIT_RDPMC] = kvm_emulate_rdpmc,
+ [SVM_EXIT_CPUID] = kvm_emulate_cpuid,
[SVM_EXIT_IRET] = iret_interception,
- [SVM_EXIT_INVD] = invd_interception,
+ [SVM_EXIT_INVD] = kvm_emulate_invd,
[SVM_EXIT_PAUSE] = pause_interception,
- [SVM_EXIT_HLT] = halt_interception,
+ [SVM_EXIT_HLT] = kvm_emulate_halt,
[SVM_EXIT_INVLPG] = invlpg_interception,
[SVM_EXIT_INVLPGA] = invlpga_interception,
[SVM_EXIT_IOIO] = io_interception,
@@ -3149,17 +3078,17 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
[SVM_EXIT_TASK_SWITCH] = task_switch_interception,
[SVM_EXIT_SHUTDOWN] = shutdown_interception,
[SVM_EXIT_VMRUN] = vmrun_interception,
- [SVM_EXIT_VMMCALL] = vmmcall_interception,
+ [SVM_EXIT_VMMCALL] = kvm_emulate_hypercall,
[SVM_EXIT_VMLOAD] = vmload_interception,
[SVM_EXIT_VMSAVE] = vmsave_interception,
[SVM_EXIT_STGI] = stgi_interception,
[SVM_EXIT_CLGI] = clgi_interception,
[SVM_EXIT_SKINIT] = skinit_interception,
- [SVM_EXIT_WBINVD] = wbinvd_interception,
- [SVM_EXIT_MONITOR] = monitor_interception,
- [SVM_EXIT_MWAIT] = mwait_interception,
- [SVM_EXIT_XSETBV] = xsetbv_interception,
- [SVM_EXIT_RDPRU] = rdpru_interception,
+ [SVM_EXIT_WBINVD] = kvm_emulate_wbinvd,
+ [SVM_EXIT_MONITOR] = kvm_emulate_monitor,
+ [SVM_EXIT_MWAIT] = kvm_emulate_mwait,
+ [SVM_EXIT_XSETBV] = kvm_emulate_xsetbv,
+ [SVM_EXIT_RDPRU] = kvm_handle_invalid_op,
[SVM_EXIT_EFER_WRITE_TRAP] = efer_trap,
[SVM_EXIT_CR0_WRITE_TRAP] = cr_trap,
[SVM_EXIT_CR4_WRITE_TRAP] = cr_trap,
@@ -3177,6 +3106,7 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb_control_area *control = &svm->vmcb->control;
struct vmcb_save_area *save = &svm->vmcb->save;
+ struct vmcb_save_area *save01 = &svm->vmcb01.ptr->save;
if (!dump_invalid_vmcb) {
pr_warn_ratelimited("set kvm_amd.dump_invalid_vmcb=1 to dump internal KVM state.\n");
@@ -3239,28 +3169,28 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
save->ds.limit, save->ds.base);
pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
"fs:",
- save->fs.selector, save->fs.attrib,
- save->fs.limit, save->fs.base);
+ save01->fs.selector, save01->fs.attrib,
+ save01->fs.limit, save01->fs.base);
pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
"gs:",
- save->gs.selector, save->gs.attrib,
- save->gs.limit, save->gs.base);
+ save01->gs.selector, save01->gs.attrib,
+ save01->gs.limit, save01->gs.base);
pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
"gdtr:",
save->gdtr.selector, save->gdtr.attrib,
save->gdtr.limit, save->gdtr.base);
pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
"ldtr:",
- save->ldtr.selector, save->ldtr.attrib,
- save->ldtr.limit, save->ldtr.base);
+ save01->ldtr.selector, save01->ldtr.attrib,
+ save01->ldtr.limit, save01->ldtr.base);
pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
"idtr:",
save->idtr.selector, save->idtr.attrib,
save->idtr.limit, save->idtr.base);
pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
"tr:",
- save->tr.selector, save->tr.attrib,
- save->tr.limit, save->tr.base);
+ save01->tr.selector, save01->tr.attrib,
+ save01->tr.limit, save01->tr.base);
pr_err("cpl: %d efer: %016llx\n",
save->cpl, save->efer);
pr_err("%-15s %016llx %-13s %016llx\n",
@@ -3274,15 +3204,15 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
pr_err("%-15s %016llx %-13s %016llx\n",
"rsp:", save->rsp, "rax:", save->rax);
pr_err("%-15s %016llx %-13s %016llx\n",
- "star:", save->star, "lstar:", save->lstar);
+ "star:", save01->star, "lstar:", save01->lstar);
pr_err("%-15s %016llx %-13s %016llx\n",
- "cstar:", save->cstar, "sfmask:", save->sfmask);
+ "cstar:", save01->cstar, "sfmask:", save01->sfmask);
pr_err("%-15s %016llx %-13s %016llx\n",
- "kernel_gs_base:", save->kernel_gs_base,
- "sysenter_cs:", save->sysenter_cs);
+ "kernel_gs_base:", save01->kernel_gs_base,
+ "sysenter_cs:", save01->sysenter_cs);
pr_err("%-15s %016llx %-13s %016llx\n",
- "sysenter_esp:", save->sysenter_esp,
- "sysenter_eip:", save->sysenter_eip);
+ "sysenter_esp:", save01->sysenter_esp,
+ "sysenter_eip:", save01->sysenter_eip);
pr_err("%-15s %016llx %-13s %016llx\n",
"gpat:", save->g_pat, "dbgctl:", save->dbgctl);
pr_err("%-15s %016llx %-13s %016llx\n",
@@ -3309,24 +3239,24 @@ static int svm_handle_invalid_exit(struct kvm_vcpu *vcpu, u64 exit_code)
return -EINVAL;
}
-int svm_invoke_exit_handler(struct vcpu_svm *svm, u64 exit_code)
+int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64 exit_code)
{
- if (svm_handle_invalid_exit(&svm->vcpu, exit_code))
+ if (svm_handle_invalid_exit(vcpu, exit_code))
return 0;
#ifdef CONFIG_RETPOLINE
if (exit_code == SVM_EXIT_MSR)
- return msr_interception(svm);
+ return msr_interception(vcpu);
else if (exit_code == SVM_EXIT_VINTR)
- return interrupt_window_interception(svm);
+ return interrupt_window_interception(vcpu);
else if (exit_code == SVM_EXIT_INTR)
- return intr_interception(svm);
+ return intr_interception(vcpu);
else if (exit_code == SVM_EXIT_HLT)
- return halt_interception(svm);
+ return kvm_emulate_halt(vcpu);
else if (exit_code == SVM_EXIT_NPF)
- return npf_interception(svm);
+ return npf_interception(vcpu);
#endif
- return svm_exit_handlers[exit_code](svm);
+ return svm_exit_handlers[exit_code](vcpu);
}
static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2,
@@ -3395,7 +3325,7 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
if (exit_fastpath != EXIT_FASTPATH_NONE)
return 1;
- return svm_invoke_exit_handler(svm, exit_code);
+ return svm_invoke_exit_handler(vcpu, exit_code);
}
static void reload_tss(struct kvm_vcpu *vcpu)
@@ -3406,15 +3336,27 @@ static void reload_tss(struct kvm_vcpu *vcpu)
load_TR_desc();
}
-static void pre_svm_run(struct vcpu_svm *svm)
+static void pre_svm_run(struct kvm_vcpu *vcpu)
{
- struct svm_cpu_data *sd = per_cpu(svm_data, svm->vcpu.cpu);
+ struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
+ struct vcpu_svm *svm = to_svm(vcpu);
- if (sev_guest(svm->vcpu.kvm))
- return pre_sev_run(svm, svm->vcpu.cpu);
+ /*
+ * If the previous vmrun of the vmcb occurred on a different physical
+ * cpu, then mark the vmcb dirty and assign a new asid. Hardware's
+ * vmcb clean bits are per logical CPU, as are KVM's asid assignments.
+ */
+ if (unlikely(svm->current_vmcb->cpu != vcpu->cpu)) {
+ svm->current_vmcb->asid_generation = 0;
+ vmcb_mark_all_dirty(svm->vmcb);
+ svm->current_vmcb->cpu = vcpu->cpu;
+ }
+
+ if (sev_guest(vcpu->kvm))
+ return pre_sev_run(svm, vcpu->cpu);
/* FIXME: handle wraparound of asid_generation */
- if (svm->asid_generation != sd->asid_generation)
+ if (svm->current_vmcb->asid_generation != sd->asid_generation)
new_asid(svm, sd);
}
@@ -3424,7 +3366,7 @@ static void svm_inject_nmi(struct kvm_vcpu *vcpu)
svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
vcpu->arch.hflags |= HF_NMI_MASK;
- if (!sev_es_guest(svm->vcpu.kvm))
+ if (!sev_es_guest(vcpu->kvm))
svm_set_intercept(svm, INTERCEPT_IRET);
++vcpu->stat.nmi_injections;
}
@@ -3478,7 +3420,7 @@ bool svm_nmi_blocked(struct kvm_vcpu *vcpu)
return false;
ret = (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) ||
- (svm->vcpu.arch.hflags & HF_NMI_MASK);
+ (vcpu->arch.hflags & HF_NMI_MASK);
return ret;
}
@@ -3498,9 +3440,7 @@ static int svm_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
{
- struct vcpu_svm *svm = to_svm(vcpu);
-
- return !!(svm->vcpu.arch.hflags & HF_NMI_MASK);
+ return !!(vcpu->arch.hflags & HF_NMI_MASK);
}
static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
@@ -3508,12 +3448,12 @@ static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
struct vcpu_svm *svm = to_svm(vcpu);
if (masked) {
- svm->vcpu.arch.hflags |= HF_NMI_MASK;
- if (!sev_es_guest(svm->vcpu.kvm))
+ vcpu->arch.hflags |= HF_NMI_MASK;
+ if (!sev_es_guest(vcpu->kvm))
svm_set_intercept(svm, INTERCEPT_IRET);
} else {
- svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
- if (!sev_es_guest(svm->vcpu.kvm))
+ vcpu->arch.hflags &= ~HF_NMI_MASK;
+ if (!sev_es_guest(vcpu->kvm))
svm_clr_intercept(svm, INTERCEPT_IRET);
}
}
@@ -3526,7 +3466,7 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu)
if (!gif_set(svm))
return true;
- if (sev_es_guest(svm->vcpu.kvm)) {
+ if (sev_es_guest(vcpu->kvm)) {
/*
* SEV-ES guests to not expose RFLAGS. Use the VMCB interrupt mask
* bit to determine the state of the IF flag.
@@ -3536,7 +3476,7 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu)
} else if (is_guest_mode(vcpu)) {
/* As long as interrupts are being delivered... */
if ((svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK)
- ? !(svm->nested.hsave->save.rflags & X86_EFLAGS_IF)
+ ? !(svm->vmcb01.ptr->save.rflags & X86_EFLAGS_IF)
: !(kvm_get_rflags(vcpu) & X86_EFLAGS_IF))
return true;
@@ -3595,8 +3535,7 @@ static void svm_enable_nmi_window(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
- == HF_NMI_MASK)
+ if ((vcpu->arch.hflags & (HF_NMI_MASK | HF_IRET_MASK)) == HF_NMI_MASK)
return; /* IRET will cause a vm exit */
if (!gif_set(svm)) {
@@ -3638,7 +3577,7 @@ void svm_flush_tlb(struct kvm_vcpu *vcpu)
if (static_cpu_has(X86_FEATURE_FLUSHBYASID))
svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
else
- svm->asid_generation--;
+ svm->current_vmcb->asid_generation--;
}
static void svm_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t gva)
@@ -3675,8 +3614,9 @@ static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu)
svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK;
}
-static void svm_complete_interrupts(struct vcpu_svm *svm)
+static void svm_complete_interrupts(struct kvm_vcpu *vcpu)
{
+ struct vcpu_svm *svm = to_svm(vcpu);
u8 vector;
int type;
u32 exitintinfo = svm->vmcb->control.exit_int_info;
@@ -3688,28 +3628,28 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
* If we've made progress since setting HF_IRET_MASK, we've
* executed an IRET and can allow NMI injection.
*/
- if ((svm->vcpu.arch.hflags & HF_IRET_MASK) &&
- (sev_es_guest(svm->vcpu.kvm) ||
- kvm_rip_read(&svm->vcpu) != svm->nmi_iret_rip)) {
- svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
- kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
+ if ((vcpu->arch.hflags & HF_IRET_MASK) &&
+ (sev_es_guest(vcpu->kvm) ||
+ kvm_rip_read(vcpu) != svm->nmi_iret_rip)) {
+ vcpu->arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
}
- svm->vcpu.arch.nmi_injected = false;
- kvm_clear_exception_queue(&svm->vcpu);
- kvm_clear_interrupt_queue(&svm->vcpu);
+ vcpu->arch.nmi_injected = false;
+ kvm_clear_exception_queue(vcpu);
+ kvm_clear_interrupt_queue(vcpu);
if (!(exitintinfo & SVM_EXITINTINFO_VALID))
return;
- kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
vector = exitintinfo & SVM_EXITINTINFO_VEC_MASK;
type = exitintinfo & SVM_EXITINTINFO_TYPE_MASK;
switch (type) {
case SVM_EXITINTINFO_TYPE_NMI:
- svm->vcpu.arch.nmi_injected = true;
+ vcpu->arch.nmi_injected = true;
break;
case SVM_EXITINTINFO_TYPE_EXEPT:
/*
@@ -3725,21 +3665,20 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
*/
if (kvm_exception_is_soft(vector)) {
if (vector == BP_VECTOR && int3_injected &&
- kvm_is_linear_rip(&svm->vcpu, svm->int3_rip))
- kvm_rip_write(&svm->vcpu,
- kvm_rip_read(&svm->vcpu) -
- int3_injected);
+ kvm_is_linear_rip(vcpu, svm->int3_rip))
+ kvm_rip_write(vcpu,
+ kvm_rip_read(vcpu) - int3_injected);
break;
}
if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
u32 err = svm->vmcb->control.exit_int_info_err;
- kvm_requeue_exception_e(&svm->vcpu, vector, err);
+ kvm_requeue_exception_e(vcpu, vector, err);
} else
- kvm_requeue_exception(&svm->vcpu, vector);
+ kvm_requeue_exception(vcpu, vector);
break;
case SVM_EXITINTINFO_TYPE_INTR:
- kvm_queue_interrupt(&svm->vcpu, vector, false);
+ kvm_queue_interrupt(vcpu, vector, false);
break;
default:
break;
@@ -3754,7 +3693,7 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu)
control->exit_int_info = control->event_inj;
control->exit_int_info_err = control->event_inj_err;
control->event_inj = 0;
- svm_complete_interrupts(svm);
+ svm_complete_interrupts(vcpu);
}
static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
@@ -3766,9 +3705,11 @@ static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
return EXIT_FASTPATH_NONE;
}
-static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu,
- struct vcpu_svm *svm)
+static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ unsigned long vmcb_pa = svm->current_vmcb->pa;
+
/*
* VMENTER enables interrupts (host state), but the kernel state is
* interrupts disabled when this is invoked. Also tell RCU about
@@ -3789,12 +3730,20 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu,
guest_enter_irqoff();
lockdep_hardirqs_on(CALLER_ADDR0);
- if (sev_es_guest(svm->vcpu.kvm)) {
- __svm_sev_es_vcpu_run(svm->vmcb_pa);
+ if (sev_es_guest(vcpu->kvm)) {
+ __svm_sev_es_vcpu_run(vmcb_pa);
} else {
struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
- __svm_vcpu_run(svm->vmcb_pa, (unsigned long *)&svm->vcpu.arch.regs);
+ /*
+ * Use a single vmcb (vmcb01 because it's always valid) for
+ * context switching guest state via VMLOAD/VMSAVE, that way
+ * the state doesn't need to be copied between vmcb01 and
+ * vmcb02 when switching vmcbs for nested virtualization.
+ */
+ vmload(svm->vmcb01.pa);
+ __svm_vcpu_run(vmcb_pa, (unsigned long *)&vcpu->arch.regs);
+ vmsave(svm->vmcb01.pa);
vmload(__sme_page_pa(sd->save_area));
}
@@ -3845,7 +3794,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
smp_send_reschedule(vcpu->cpu);
}
- pre_svm_run(svm);
+ pre_svm_run(vcpu);
sync_lapic_to_cr8(vcpu);
@@ -3859,7 +3808,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
* Run with all-zero DR6 unless needed, so that we can get the exact cause
* of a #DB.
*/
- if (unlikely(svm->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT))
+ if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT))
svm_set_dr6(svm, vcpu->arch.dr6);
else
svm_set_dr6(svm, DR6_ACTIVE_LOW);
@@ -3875,9 +3824,10 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
* is no need to worry about the conditional branch over the wrmsr
* being speculatively taken.
*/
- x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl);
+ if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL))
+ x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl);
- svm_vcpu_enter_exit(vcpu, svm);
+ svm_vcpu_enter_exit(vcpu);
/*
* We do not use IBRS in the kernel. If this vCPU has used the
@@ -3894,15 +3844,17 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
* If the L02 MSR bitmap does not intercept the MSR, then we need to
* save it.
*/
- if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
+ if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL) &&
+ unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
- if (!sev_es_guest(svm->vcpu.kvm))
+ if (!sev_es_guest(vcpu->kvm))
reload_tss(vcpu);
- x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl);
+ if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL))
+ x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl);
- if (!sev_es_guest(svm->vcpu.kvm)) {
+ if (!sev_es_guest(vcpu->kvm)) {
vcpu->arch.cr2 = svm->vmcb->save.cr2;
vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
@@ -3910,7 +3862,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
}
if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
- kvm_before_interrupt(&svm->vcpu);
+ kvm_before_interrupt(vcpu);
kvm_load_host_xsave_state(vcpu);
stgi();
@@ -3918,13 +3870,13 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
/* Any pending NMI will happen here */
if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
- kvm_after_interrupt(&svm->vcpu);
+ kvm_after_interrupt(vcpu);
sync_cr8_to_lapic(vcpu);
svm->next_rip = 0;
- if (is_guest_mode(&svm->vcpu)) {
- sync_nested_vmcb_control(svm);
+ if (is_guest_mode(vcpu)) {
+ nested_sync_control_from_vmcb02(svm);
svm->nested.nested_run_pending = 0;
}
@@ -3933,7 +3885,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
/* if exit due to PF check for async PF */
if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR)
- svm->vcpu.arch.apf.host_apf_flags =
+ vcpu->arch.apf.host_apf_flags =
kvm_read_and_reset_apf_flags();
if (npt_enabled) {
@@ -3947,9 +3899,9 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
*/
if (unlikely(svm->vmcb->control.exit_code ==
SVM_EXIT_EXCP_BASE + MC_VECTOR))
- svm_handle_mce(svm);
+ svm_handle_mce(vcpu);
- svm_complete_interrupts(svm);
+ svm_complete_interrupts(vcpu);
if (is_guest_mode(vcpu))
return EXIT_FASTPATH_NONE;
@@ -3957,21 +3909,26 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
return svm_exit_handlers_fastpath(vcpu);
}
-static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long root,
+static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa,
int root_level)
{
struct vcpu_svm *svm = to_svm(vcpu);
unsigned long cr3;
- cr3 = __sme_set(root);
if (npt_enabled) {
- svm->vmcb->control.nested_cr3 = cr3;
+ svm->vmcb->control.nested_cr3 = __sme_set(root_hpa);
vmcb_mark_dirty(svm->vmcb, VMCB_NPT);
/* Loading L2's CR3 is handled by enter_svm_guest_mode. */
if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
return;
cr3 = vcpu->arch.cr3;
+ } else if (vcpu->arch.mmu->shadow_root_level >= PT64_ROOT_4LEVEL) {
+ cr3 = __sme_set(root_hpa) | kvm_get_active_pcid(vcpu);
+ } else {
+ /* PCID in the guest should be impossible with a 32-bit MMU. */
+ WARN_ON_ONCE(kvm_get_active_pcid(vcpu));
+ cr3 = root_hpa;
}
svm->vmcb->save.cr3 = cr3;
@@ -4048,7 +4005,7 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
/* Update nrips enabled cache */
svm->nrips_enabled = kvm_cpu_cap_has(X86_FEATURE_NRIPS) &&
- guest_cpuid_has(&svm->vcpu, X86_FEATURE_NRIPS);
+ guest_cpuid_has(vcpu, X86_FEATURE_NRIPS);
/* Check again if INVPCID interception if required */
svm_check_invpcid(svm);
@@ -4060,24 +4017,50 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
vcpu->arch.reserved_gpa_bits &= ~(1UL << (best->ebx & 0x3f));
}
- if (!kvm_vcpu_apicv_active(vcpu))
- return;
+ if (kvm_vcpu_apicv_active(vcpu)) {
+ /*
+ * AVIC does not work with an x2APIC mode guest. If the X2APIC feature
+ * is exposed to the guest, disable AVIC.
+ */
+ if (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC))
+ kvm_request_apicv_update(vcpu->kvm, false,
+ APICV_INHIBIT_REASON_X2APIC);
- /*
- * AVIC does not work with an x2APIC mode guest. If the X2APIC feature
- * is exposed to the guest, disable AVIC.
- */
- if (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC))
- kvm_request_apicv_update(vcpu->kvm, false,
- APICV_INHIBIT_REASON_X2APIC);
+ /*
+ * Currently, AVIC does not work with nested virtualization.
+ * So, we disable AVIC when cpuid for SVM is set in the L1 guest.
+ */
+ if (nested && guest_cpuid_has(vcpu, X86_FEATURE_SVM))
+ kvm_request_apicv_update(vcpu->kvm, false,
+ APICV_INHIBIT_REASON_NESTED);
+ }
- /*
- * Currently, AVIC does not work with nested virtualization.
- * So, we disable AVIC when cpuid for SVM is set in the L1 guest.
- */
- if (nested && guest_cpuid_has(vcpu, X86_FEATURE_SVM))
- kvm_request_apicv_update(vcpu->kvm, false,
- APICV_INHIBIT_REASON_NESTED);
+ if (guest_cpuid_is_intel(vcpu)) {
+ /*
+ * We must intercept SYSENTER_EIP and SYSENTER_ESP
+ * accesses because the processor only stores 32 bits.
+ * For the same reason we cannot use virtual VMLOAD/VMSAVE.
+ */
+ svm_set_intercept(svm, INTERCEPT_VMLOAD);
+ svm_set_intercept(svm, INTERCEPT_VMSAVE);
+ svm->vmcb->control.virt_ext &= ~VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
+
+ set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_EIP, 0, 0);
+ set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_ESP, 0, 0);
+ } else {
+ /*
+ * If hardware supports Virtual VMLOAD VMSAVE then enable it
+ * in VMCB and clear intercepts to avoid #VMEXIT.
+ */
+ if (vls) {
+ svm_clr_intercept(svm, INTERCEPT_VMLOAD);
+ svm_clr_intercept(svm, INTERCEPT_VMSAVE);
+ svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
+ }
+ /* No need to intercept these MSRs */
+ set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_EIP, 1, 1);
+ set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_ESP, 1, 1);
+ }
}
static bool svm_has_wbinvd_exit(void)
@@ -4349,15 +4332,15 @@ static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
if (!(saved_efer & EFER_SVME))
return 1;
- if (kvm_vcpu_map(&svm->vcpu,
+ if (kvm_vcpu_map(vcpu,
gpa_to_gfn(vmcb12_gpa), &map) == -EINVAL)
return 1;
if (svm_allocate_nested(svm))
return 1;
- ret = enter_svm_guest_mode(svm, vmcb12_gpa, map.hva);
- kvm_vcpu_unmap(&svm->vcpu, &map, true);
+ ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, map.hva);
+ kvm_vcpu_unmap(vcpu, &map, true);
}
}
@@ -4612,6 +4595,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.mem_enc_reg_region = svm_register_enc_region,
.mem_enc_unreg_region = svm_unregister_enc_region,
+ .vm_copy_enc_context_from = svm_vm_copy_asid_from,
+
.can_emulate_instruction = svm_can_emulate_instruction,
.apic_init_signal_blocked = svm_apic_init_signal_blocked,
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 9806aaebc37f..84b3133c2251 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -23,12 +23,10 @@
#define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
-static const u32 host_save_user_msrs[] = {
- MSR_TSC_AUX,
-};
-#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
+#define IOPM_SIZE PAGE_SIZE * 3
+#define MSRPM_SIZE PAGE_SIZE * 2
-#define MAX_DIRECT_ACCESS_MSRS 18
+#define MAX_DIRECT_ACCESS_MSRS 20
#define MSRPM_OFFSETS 16
extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
extern bool npt_enabled;
@@ -65,6 +63,7 @@ struct kvm_sev_info {
unsigned long pages_locked; /* Number of pages locked */
struct list_head regions_list; /* List of registered regions */
u64 ap_jump_table; /* SEV-ES AP Jump Table address */
+ struct kvm *enc_context_owner; /* Owner of copied encryption context */
struct misc_cg *misc_cg; /* For misc cgroup accounting */
};
@@ -82,11 +81,19 @@ struct kvm_svm {
struct kvm_vcpu;
+struct kvm_vmcb_info {
+ struct vmcb *ptr;
+ unsigned long pa;
+ int cpu;
+ uint64_t asid_generation;
+};
+
struct svm_nested_state {
- struct vmcb *hsave;
+ struct kvm_vmcb_info vmcb02;
u64 hsave_msr;
u64 vm_cr_msr;
u64 vmcb12_gpa;
+ u64 last_vmcb12_gpa;
/* These are the merged vectors */
u32 *msrpm;
@@ -103,21 +110,20 @@ struct svm_nested_state {
struct vcpu_svm {
struct kvm_vcpu vcpu;
+ /* vmcb always points at current_vmcb->ptr, it's purely a shorthand. */
struct vmcb *vmcb;
- unsigned long vmcb_pa;
+ struct kvm_vmcb_info vmcb01;
+ struct kvm_vmcb_info *current_vmcb;
struct svm_cpu_data *svm_data;
u32 asid;
- uint64_t asid_generation;
- uint64_t sysenter_esp;
- uint64_t sysenter_eip;
+ u32 sysenter_esp_hi;
+ u32 sysenter_eip_hi;
uint64_t tsc_aux;
u64 msr_decfg;
u64 next_rip;
- u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
-
u64 spec_ctrl;
/*
* Contains guest-controlled bits of VIRT_SPEC_CTRL, which will be
@@ -240,17 +246,14 @@ static inline void vmcb_mark_dirty(struct vmcb *vmcb, int bit)
vmcb->control.clean &= ~(1 << bit);
}
-static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
+static inline bool vmcb_is_dirty(struct vmcb *vmcb, int bit)
{
- return container_of(vcpu, struct vcpu_svm, vcpu);
+ return !test_bit(bit, (unsigned long *)&vmcb->control.clean);
}
-static inline struct vmcb *get_host_vmcb(struct vcpu_svm *svm)
+static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
{
- if (is_guest_mode(&svm->vcpu))
- return svm->nested.hsave;
- else
- return svm->vmcb;
+ return container_of(vcpu, struct vcpu_svm, vcpu);
}
static inline void vmcb_set_intercept(struct vmcb_control_area *control, u32 bit)
@@ -273,7 +276,7 @@ static inline bool vmcb_is_intercept(struct vmcb_control_area *control, u32 bit)
static inline void set_dr_intercepts(struct vcpu_svm *svm)
{
- struct vmcb *vmcb = get_host_vmcb(svm);
+ struct vmcb *vmcb = svm->vmcb01.ptr;
if (!sev_es_guest(svm->vcpu.kvm)) {
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
@@ -300,7 +303,7 @@ static inline void set_dr_intercepts(struct vcpu_svm *svm)
static inline void clr_dr_intercepts(struct vcpu_svm *svm)
{
- struct vmcb *vmcb = get_host_vmcb(svm);
+ struct vmcb *vmcb = svm->vmcb01.ptr;
vmcb->control.intercepts[INTERCEPT_DR] = 0;
@@ -315,7 +318,7 @@ static inline void clr_dr_intercepts(struct vcpu_svm *svm)
static inline void set_exception_intercept(struct vcpu_svm *svm, u32 bit)
{
- struct vmcb *vmcb = get_host_vmcb(svm);
+ struct vmcb *vmcb = svm->vmcb01.ptr;
WARN_ON_ONCE(bit >= 32);
vmcb_set_intercept(&vmcb->control, INTERCEPT_EXCEPTION_OFFSET + bit);
@@ -325,7 +328,7 @@ static inline void set_exception_intercept(struct vcpu_svm *svm, u32 bit)
static inline void clr_exception_intercept(struct vcpu_svm *svm, u32 bit)
{
- struct vmcb *vmcb = get_host_vmcb(svm);
+ struct vmcb *vmcb = svm->vmcb01.ptr;
WARN_ON_ONCE(bit >= 32);
vmcb_clr_intercept(&vmcb->control, INTERCEPT_EXCEPTION_OFFSET + bit);
@@ -335,7 +338,7 @@ static inline void clr_exception_intercept(struct vcpu_svm *svm, u32 bit)
static inline void svm_set_intercept(struct vcpu_svm *svm, int bit)
{
- struct vmcb *vmcb = get_host_vmcb(svm);
+ struct vmcb *vmcb = svm->vmcb01.ptr;
vmcb_set_intercept(&vmcb->control, bit);
@@ -344,7 +347,7 @@ static inline void svm_set_intercept(struct vcpu_svm *svm, int bit)
static inline void svm_clr_intercept(struct vcpu_svm *svm, int bit)
{
- struct vmcb *vmcb = get_host_vmcb(svm);
+ struct vmcb *vmcb = svm->vmcb01.ptr;
vmcb_clr_intercept(&vmcb->control, bit);
@@ -388,8 +391,6 @@ static inline bool gif_set(struct vcpu_svm *svm)
/* svm.c */
#define MSR_INVALID 0xffffffffU
-extern int sev;
-extern int sev_es;
extern bool dump_invalid_vmcb;
u32 svm_msrpm_offset(u32 msr);
@@ -406,7 +407,7 @@ bool svm_smi_blocked(struct kvm_vcpu *vcpu);
bool svm_nmi_blocked(struct kvm_vcpu *vcpu);
bool svm_interrupt_blocked(struct kvm_vcpu *vcpu);
void svm_set_gif(struct vcpu_svm *svm, bool value);
-int svm_invoke_exit_handler(struct vcpu_svm *svm, u64 exit_code);
+int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64 exit_code);
void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr,
int read, int write);
@@ -438,20 +439,30 @@ static inline bool nested_exit_on_nmi(struct vcpu_svm *svm)
return vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_NMI);
}
-int enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
- struct vmcb *nested_vmcb);
+int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb_gpa, struct vmcb *vmcb12);
void svm_leave_nested(struct vcpu_svm *svm);
void svm_free_nested(struct vcpu_svm *svm);
int svm_allocate_nested(struct vcpu_svm *svm);
-int nested_svm_vmrun(struct vcpu_svm *svm);
+int nested_svm_vmrun(struct kvm_vcpu *vcpu);
void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb);
int nested_svm_vmexit(struct vcpu_svm *svm);
+
+static inline int nested_svm_simple_vmexit(struct vcpu_svm *svm, u32 exit_code)
+{
+ svm->vmcb->control.exit_code = exit_code;
+ svm->vmcb->control.exit_info_1 = 0;
+ svm->vmcb->control.exit_info_2 = 0;
+ return nested_svm_vmexit(svm);
+}
+
int nested_svm_exit_handled(struct vcpu_svm *svm);
-int nested_svm_check_permissions(struct vcpu_svm *svm);
+int nested_svm_check_permissions(struct kvm_vcpu *vcpu);
int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
bool has_error_code, u32 error_code);
int nested_svm_exit_special(struct vcpu_svm *svm);
-void sync_nested_vmcb_control(struct vcpu_svm *svm);
+void nested_sync_control_from_vmcb02(struct vcpu_svm *svm);
+void nested_vmcb02_compute_g_pat(struct vcpu_svm *svm);
+void svm_switch_vmcb(struct vcpu_svm *svm, struct kvm_vmcb_info *target_vmcb);
extern struct kvm_x86_nested_ops svm_nested_ops;
@@ -492,8 +503,8 @@ void avic_vm_destroy(struct kvm *kvm);
int avic_vm_init(struct kvm *kvm);
void avic_init_vmcb(struct vcpu_svm *svm);
void svm_toggle_avic_for_irq_window(struct kvm_vcpu *vcpu, bool activate);
-int avic_incomplete_ipi_interception(struct vcpu_svm *svm);
-int avic_unaccelerated_access_interception(struct vcpu_svm *svm);
+int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu);
+int avic_unaccelerated_access_interception(struct kvm_vcpu *vcpu);
int avic_init_vcpu(struct vcpu_svm *svm);
void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
void avic_vcpu_put(struct kvm_vcpu *vcpu);
@@ -551,22 +562,20 @@ void svm_vcpu_unblocking(struct kvm_vcpu *vcpu);
extern unsigned int max_sev_asid;
-static inline bool svm_sev_enabled(void)
-{
- return IS_ENABLED(CONFIG_KVM_AMD_SEV) ? max_sev_asid : 0;
-}
-
void sev_vm_destroy(struct kvm *kvm);
int svm_mem_enc_op(struct kvm *kvm, void __user *argp);
int svm_register_enc_region(struct kvm *kvm,
struct kvm_enc_region *range);
int svm_unregister_enc_region(struct kvm *kvm,
struct kvm_enc_region *range);
+int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd);
void pre_sev_run(struct vcpu_svm *svm, int cpu);
+void __init sev_set_cpu_caps(void);
void __init sev_hardware_setup(void);
void sev_hardware_teardown(void);
+int sev_cpu_init(struct svm_cpu_data *sd);
void sev_free_vcpu(struct kvm_vcpu *vcpu);
-int sev_handle_vmgexit(struct vcpu_svm *svm);
+int sev_handle_vmgexit(struct kvm_vcpu *vcpu);
int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in);
void sev_es_init_vmcb(struct vcpu_svm *svm);
void sev_es_create_vcpu(struct vcpu_svm *svm);
diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S
index 6feb8c08f45a..4fa17df123cd 100644
--- a/arch/x86/kvm/svm/vmenter.S
+++ b/arch/x86/kvm/svm/vmenter.S
@@ -79,28 +79,10 @@ SYM_FUNC_START(__svm_vcpu_run)
/* Enter guest mode */
sti
-1: vmload %_ASM_AX
- jmp 3f
-2: cmpb $0, kvm_rebooting
- jne 3f
- ud2
- _ASM_EXTABLE(1b, 2b)
-3: vmrun %_ASM_AX
- jmp 5f
-4: cmpb $0, kvm_rebooting
- jne 5f
- ud2
- _ASM_EXTABLE(3b, 4b)
+1: vmrun %_ASM_AX
-5: vmsave %_ASM_AX
- jmp 7f
-6: cmpb $0, kvm_rebooting
- jne 7f
- ud2
- _ASM_EXTABLE(5b, 6b)
-7:
- cli
+2: cli
#ifdef CONFIG_RETPOLINE
/* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
@@ -167,6 +149,13 @@ SYM_FUNC_START(__svm_vcpu_run)
#endif
pop %_ASM_BP
ret
+
+3: cmpb $0, kvm_rebooting
+ jne 2b
+ ud2
+
+ _ASM_EXTABLE(1b, 3b)
+
SYM_FUNC_END(__svm_vcpu_run)
/**
@@ -186,18 +175,15 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
#endif
push %_ASM_BX
- /* Enter guest mode */
+ /* Move @vmcb to RAX. */
mov %_ASM_ARG1, %_ASM_AX
+
+ /* Enter guest mode */
sti
1: vmrun %_ASM_AX
- jmp 3f
-2: cmpb $0, kvm_rebooting
- jne 3f
- ud2
- _ASM_EXTABLE(1b, 2b)
-3: cli
+2: cli
#ifdef CONFIG_RETPOLINE
/* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
@@ -217,4 +203,11 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
#endif
pop %_ASM_BP
ret
+
+3: cmpb $0, kvm_rebooting
+ jne 2b
+ ud2
+
+ _ASM_EXTABLE(1b, 3b)
+
SYM_FUNC_END(__svm_sev_es_vcpu_run)