diff options
Diffstat (limited to 'arch/x86/kvm/svm/svm.c')
-rw-r--r-- | arch/x86/kvm/svm/svm.c | 404 |
1 files changed, 268 insertions, 136 deletions
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 9709c98d0d6c..2f32fd09e259 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -91,7 +91,7 @@ static DEFINE_PER_CPU(u64, current_tsc_ratio); static const struct svm_direct_access_msrs { u32 index; /* Index of the MSR */ bool always; /* True if intercept is always on */ -} direct_access_msrs[] = { +} direct_access_msrs[MAX_DIRECT_ACCESS_MSRS] = { { .index = MSR_STAR, .always = true }, { .index = MSR_IA32_SYSENTER_CS, .always = true }, #ifdef CONFIG_X86_64 @@ -263,9 +263,10 @@ static int get_max_npt_level(void) #endif } -void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) +int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) { struct vcpu_svm *svm = to_svm(vcpu); + u64 old_efer = vcpu->arch.efer; vcpu->arch.efer = efer; if (!npt_enabled) { @@ -276,13 +277,32 @@ void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) efer &= ~EFER_LME; } - if (!(efer & EFER_SVME)) { - svm_leave_nested(svm); - svm_set_gif(svm, true); + if ((old_efer & EFER_SVME) != (efer & EFER_SVME)) { + if (!(efer & EFER_SVME)) { + svm_leave_nested(svm); + svm_set_gif(svm, true); + + /* + * Free the nested guest state, unless we are in SMM. + * In this case we will return to the nested guest + * as soon as we leave SMM. + */ + if (!is_smm(&svm->vcpu)) + svm_free_nested(svm); + + } else { + int ret = svm_allocate_nested(svm); + + if (ret) { + vcpu->arch.efer = old_efer; + return ret; + } + } } svm->vmcb->save.efer = efer | EFER_SVME; vmcb_mark_dirty(svm->vmcb, VMCB_CR); + return 0; } static int is_external_interrupt(u32 info) @@ -553,18 +573,44 @@ free_cpu_data: } -static bool valid_msr_intercept(u32 index) +static int direct_access_msr_slot(u32 msr) { - int i; + u32 i; for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) - if (direct_access_msrs[i].index == index) - return true; + if (direct_access_msrs[i].index == msr) + return i; - return false; + return -ENOENT; +} + +static void set_shadow_msr_intercept(struct kvm_vcpu *vcpu, u32 msr, int read, + int write) +{ + struct vcpu_svm *svm = to_svm(vcpu); + int slot = direct_access_msr_slot(msr); + + if (slot == -ENOENT) + return; + + /* Set the shadow bitmaps to the desired intercept states */ + if (read) + set_bit(slot, svm->shadow_msr_intercept.read); + else + clear_bit(slot, svm->shadow_msr_intercept.read); + + if (write) + set_bit(slot, svm->shadow_msr_intercept.write); + else + clear_bit(slot, svm->shadow_msr_intercept.write); } -static bool msr_write_intercepted(struct kvm_vcpu *vcpu, unsigned msr) +static bool valid_msr_intercept(u32 index) +{ + return direct_access_msr_slot(index) != -ENOENT; +} + +static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr) { u8 bit_write; unsigned long tmp; @@ -583,8 +629,8 @@ static bool msr_write_intercepted(struct kvm_vcpu *vcpu, unsigned msr) return !!test_bit(bit_write, &tmp); } -static void set_msr_interception(u32 *msrpm, unsigned msr, - int read, int write) +static void set_msr_interception_bitmap(struct kvm_vcpu *vcpu, u32 *msrpm, + u32 msr, int read, int write) { u8 bit_read, bit_write; unsigned long tmp; @@ -596,6 +642,13 @@ static void set_msr_interception(u32 *msrpm, unsigned msr, */ WARN_ON(!valid_msr_intercept(msr)); + /* Enforce non allowed MSRs to trap */ + if (read && !kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_READ)) + read = 0; + + if (write && !kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_WRITE)) + write = 0; + offset = svm_msrpm_offset(msr); bit_read = 2 * (msr & 0x0f); bit_write = 2 * (msr & 0x0f) + 1; @@ -609,17 +662,60 @@ static void set_msr_interception(u32 *msrpm, unsigned msr, msrpm[offset] = tmp; } -static void svm_vcpu_init_msrpm(u32 *msrpm) +static void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr, + int read, int write) { - int i; + set_shadow_msr_intercept(vcpu, msr, read, write); + set_msr_interception_bitmap(vcpu, msrpm, msr, read, write); +} + +u32 *svm_vcpu_alloc_msrpm(void) +{ + struct page *pages = alloc_pages(GFP_KERNEL_ACCOUNT, MSRPM_ALLOC_ORDER); + u32 *msrpm; + + if (!pages) + return NULL; + msrpm = page_address(pages); memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER)); + return msrpm; +} + +void svm_vcpu_init_msrpm(struct kvm_vcpu *vcpu, u32 *msrpm) +{ + int i; + for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) { if (!direct_access_msrs[i].always) continue; + set_msr_interception(vcpu, msrpm, direct_access_msrs[i].index, 1, 1); + } +} - set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1); + +void svm_vcpu_free_msrpm(u32 *msrpm) +{ + __free_pages(virt_to_page(msrpm), MSRPM_ALLOC_ORDER); +} + +static void svm_msr_filter_changed(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + u32 i; + + /* + * Set intercept permissions for all direct access MSRs again. They + * will automatically get filtered through the MSR filter, so we are + * back in sync after this. + */ + for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) { + u32 msr = direct_access_msrs[i].index; + u32 read = test_bit(i, svm->shadow_msr_intercept.read); + u32 write = test_bit(i, svm->shadow_msr_intercept.write); + + set_msr_interception_bitmap(vcpu, svm->msrpm, msr, read, write); } } @@ -666,26 +762,26 @@ static void init_msrpm_offsets(void) } } -static void svm_enable_lbrv(struct vcpu_svm *svm) +static void svm_enable_lbrv(struct kvm_vcpu *vcpu) { - u32 *msrpm = svm->msrpm; + struct vcpu_svm *svm = to_svm(vcpu); svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK; - set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1); - set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1); - set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1); - set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 1, 1); + set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1); + set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1); + set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1); + set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1); } -static void svm_disable_lbrv(struct vcpu_svm *svm) +static void svm_disable_lbrv(struct kvm_vcpu *vcpu) { - u32 *msrpm = svm->msrpm; + struct vcpu_svm *svm = to_svm(vcpu); svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK; - set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0); - set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0); - set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0); - set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0); + set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0); + set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0); + set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 0, 0); + set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 0, 0); } void disable_nmi_singlestep(struct vcpu_svm *svm) @@ -813,6 +909,9 @@ static __init void svm_set_cpu_caps(void) if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) || boot_cpu_has(X86_FEATURE_AMD_SSBD)) kvm_cpu_cap_set(X86_FEATURE_VIRT_SSBD); + + /* Enable INVPCID feature */ + kvm_cpu_cap_check_and_set(X86_FEATURE_INVPCID); } static __init int svm_hardware_setup(void) @@ -985,6 +1084,21 @@ static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) return svm->vmcb->control.tsc_offset; } +static void svm_check_invpcid(struct vcpu_svm *svm) +{ + /* + * Intercept INVPCID instruction only if shadow page table is + * enabled. Interception is not required with nested page table + * enabled. + */ + if (kvm_cpu_cap_has(X86_FEATURE_INVPCID)) { + if (!npt_enabled) + svm_set_intercept(svm, INTERCEPT_INVPCID); + else + svm_clr_intercept(svm, INTERCEPT_INVPCID); + } +} + static void init_vmcb(struct vcpu_svm *svm) { struct vmcb_control_area *control = &svm->vmcb->control; @@ -992,14 +1106,14 @@ static void init_vmcb(struct vcpu_svm *svm) svm->vcpu.arch.hflags = 0; - set_cr_intercept(svm, INTERCEPT_CR0_READ); - set_cr_intercept(svm, INTERCEPT_CR3_READ); - set_cr_intercept(svm, INTERCEPT_CR4_READ); - set_cr_intercept(svm, INTERCEPT_CR0_WRITE); - set_cr_intercept(svm, INTERCEPT_CR3_WRITE); - set_cr_intercept(svm, INTERCEPT_CR4_WRITE); + svm_set_intercept(svm, INTERCEPT_CR0_READ); + svm_set_intercept(svm, INTERCEPT_CR3_READ); + svm_set_intercept(svm, INTERCEPT_CR4_READ); + svm_set_intercept(svm, INTERCEPT_CR0_WRITE); + svm_set_intercept(svm, INTERCEPT_CR3_WRITE); + svm_set_intercept(svm, INTERCEPT_CR4_WRITE); if (!kvm_vcpu_apicv_active(&svm->vcpu)) - set_cr_intercept(svm, INTERCEPT_CR8_WRITE); + svm_set_intercept(svm, INTERCEPT_CR8_WRITE); set_dr_intercepts(svm); @@ -1094,15 +1208,15 @@ static void init_vmcb(struct vcpu_svm *svm) control->nested_ctl |= SVM_NESTED_CTL_NP_ENABLE; svm_clr_intercept(svm, INTERCEPT_INVLPG); clr_exception_intercept(svm, PF_VECTOR); - clr_cr_intercept(svm, INTERCEPT_CR3_READ); - clr_cr_intercept(svm, INTERCEPT_CR3_WRITE); + svm_clr_intercept(svm, INTERCEPT_CR3_READ); + svm_clr_intercept(svm, INTERCEPT_CR3_WRITE); save->g_pat = svm->vcpu.arch.pat; save->cr3 = 0; save->cr4 = 0; } svm->asid_generation = 0; - svm->nested.vmcb = 0; + svm->nested.vmcb12_gpa = 0; svm->vcpu.arch.hflags = 0; if (!kvm_pause_in_guest(svm->vcpu.kvm)) { @@ -1114,6 +1228,8 @@ static void init_vmcb(struct vcpu_svm *svm) svm_clr_intercept(svm, INTERCEPT_PAUSE); } + svm_check_invpcid(svm); + if (kvm_vcpu_apicv_active(&svm->vcpu)) avic_init_vmcb(svm); @@ -1171,35 +1287,20 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) static int svm_create_vcpu(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm; - struct page *page; - struct page *msrpm_pages; - struct page *hsave_page; - struct page *nested_msrpm_pages; + struct page *vmcb_page; int err; BUILD_BUG_ON(offsetof(struct vcpu_svm, vcpu) != 0); svm = to_svm(vcpu); err = -ENOMEM; - page = alloc_page(GFP_KERNEL_ACCOUNT); - if (!page) + vmcb_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); + if (!vmcb_page) goto out; - msrpm_pages = alloc_pages(GFP_KERNEL_ACCOUNT, MSRPM_ALLOC_ORDER); - if (!msrpm_pages) - goto free_page1; - - nested_msrpm_pages = alloc_pages(GFP_KERNEL_ACCOUNT, MSRPM_ALLOC_ORDER); - if (!nested_msrpm_pages) - goto free_page2; - - hsave_page = alloc_page(GFP_KERNEL_ACCOUNT); - if (!hsave_page) - goto free_page3; - err = avic_init_vcpu(svm); if (err) - goto free_page4; + goto error_free_vmcb_page; /* We initialize this flag to true to make sure that the is_running * bit would be set the first time the vcpu is loaded. @@ -1207,18 +1308,14 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) if (irqchip_in_kernel(vcpu->kvm) && kvm_apicv_activated(vcpu->kvm)) svm->avic_is_running = true; - svm->nested.hsave = page_address(hsave_page); - clear_page(svm->nested.hsave); - - svm->msrpm = page_address(msrpm_pages); - svm_vcpu_init_msrpm(svm->msrpm); + svm->msrpm = svm_vcpu_alloc_msrpm(); + if (!svm->msrpm) + goto error_free_vmcb_page; - svm->nested.msrpm = page_address(nested_msrpm_pages); - svm_vcpu_init_msrpm(svm->nested.msrpm); + svm_vcpu_init_msrpm(vcpu, svm->msrpm); - svm->vmcb = page_address(page); - clear_page(svm->vmcb); - svm->vmcb_pa = __sme_set(page_to_pfn(page) << PAGE_SHIFT); + svm->vmcb = page_address(vmcb_page); + svm->vmcb_pa = __sme_set(page_to_pfn(vmcb_page) << PAGE_SHIFT); svm->asid_generation = 0; init_vmcb(svm); @@ -1227,14 +1324,8 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) return 0; -free_page4: - __free_page(hsave_page); -free_page3: - __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER); -free_page2: - __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER); -free_page1: - __free_page(page); +error_free_vmcb_page: + __free_page(vmcb_page); out: return err; } @@ -1258,10 +1349,10 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) */ svm_clear_current_vmcb(svm->vmcb); + svm_free_nested(svm); + __free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT)); __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER); - __free_page(virt_to_page(svm->nested.hsave)); - __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER); } static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) @@ -1549,11 +1640,11 @@ static void update_cr0_intercept(struct vcpu_svm *svm) vmcb_mark_dirty(svm->vmcb, VMCB_CR); if (gcr0 == *hcr0) { - clr_cr_intercept(svm, INTERCEPT_CR0_READ); - clr_cr_intercept(svm, INTERCEPT_CR0_WRITE); + svm_clr_intercept(svm, INTERCEPT_CR0_READ); + svm_clr_intercept(svm, INTERCEPT_CR0_WRITE); } else { - set_cr_intercept(svm, INTERCEPT_CR0_READ); - set_cr_intercept(svm, INTERCEPT_CR0_WRITE); + svm_set_intercept(svm, INTERCEPT_CR0_READ); + svm_set_intercept(svm, INTERCEPT_CR0_WRITE); } } @@ -2224,12 +2315,9 @@ static bool check_selective_cr0_intercepted(struct vcpu_svm *svm, { unsigned long cr0 = svm->vcpu.arch.cr0; bool ret = false; - u64 intercept; - - intercept = svm->nested.ctl.intercept; if (!is_guest_mode(&svm->vcpu) || - (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0)))) + (!(vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_SELECTIVE_CR0)))) return false; cr0 &= ~SVM_CR0_SELECTIVE_MASK; @@ -2267,6 +2355,7 @@ static int cr_interception(struct vcpu_svm *svm) if (cr >= 16) { /* mov to cr */ cr -= 16; val = kvm_register_read(&svm->vcpu, reg); + trace_kvm_cr_write(cr, val); switch (cr) { case 0: if (!check_selective_cr0_intercepted(svm, val)) @@ -2312,6 +2401,7 @@ static int cr_interception(struct vcpu_svm *svm) return 1; } kvm_register_write(&svm->vcpu, reg, val); + trace_kvm_cr_read(cr, val); } return kvm_complete_insn_gp(&svm->vcpu, err); } @@ -2562,7 +2652,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) * We update the L1 MSR bit as well since it will end up * touching the MSR anyway now. */ - set_msr_interception(svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1); + set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1); break; case MSR_IA32_PRED_CMD: if (!msr->host_initiated && @@ -2577,7 +2667,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) break; wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB); - set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1); + set_msr_interception(vcpu, svm->msrpm, MSR_IA32_PRED_CMD, 0, 1); break; case MSR_AMD64_VIRT_SPEC_CTRL: if (!msr->host_initiated && @@ -2641,9 +2731,9 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) svm->vmcb->save.dbgctl = data; vmcb_mark_dirty(svm->vmcb, VMCB_LBR); if (data & (1ULL<<0)) - svm_enable_lbrv(svm); + svm_enable_lbrv(vcpu); else - svm_disable_lbrv(svm); + svm_disable_lbrv(vcpu); break; case MSR_VM_HSAVE_PA: svm->nested.hsave_msr = data; @@ -2739,6 +2829,33 @@ static int mwait_interception(struct vcpu_svm *svm) return nop_interception(svm); } +static int invpcid_interception(struct vcpu_svm *svm) +{ + struct kvm_vcpu *vcpu = &svm->vcpu; + unsigned long type; + gva_t gva; + + if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) { + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; + } + + /* + * For an INVPCID intercept: + * EXITINFO1 provides the linear address of the memory operand. + * EXITINFO2 provides the contents of the register operand. + */ + type = svm->vmcb->control.exit_info_2; + gva = svm->vmcb->control.exit_info_1; + + if (type > 3) { + kvm_inject_gp(vcpu, 0); + return 1; + } + + return kvm_handle_invpcid(vcpu, type, gva); +} + static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_READ_CR0] = cr_interception, [SVM_EXIT_READ_CR3] = cr_interception, @@ -2801,6 +2918,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_MWAIT] = mwait_interception, [SVM_EXIT_XSETBV] = xsetbv_interception, [SVM_EXIT_RDPRU] = rdpru_interception, + [SVM_EXIT_INVPCID] = invpcid_interception, [SVM_EXIT_NPF] = npf_interception, [SVM_EXIT_RSM] = rsm_interception, [SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception, @@ -2819,12 +2937,14 @@ static void dump_vmcb(struct kvm_vcpu *vcpu) } pr_err("VMCB Control Area:\n"); - pr_err("%-20s%04x\n", "cr_read:", control->intercept_cr & 0xffff); - pr_err("%-20s%04x\n", "cr_write:", control->intercept_cr >> 16); - pr_err("%-20s%04x\n", "dr_read:", control->intercept_dr & 0xffff); - pr_err("%-20s%04x\n", "dr_write:", control->intercept_dr >> 16); - pr_err("%-20s%08x\n", "exceptions:", control->intercept_exceptions); - pr_err("%-20s%016llx\n", "intercepts:", control->intercept); + pr_err("%-20s%04x\n", "cr_read:", control->intercepts[INTERCEPT_CR] & 0xffff); + pr_err("%-20s%04x\n", "cr_write:", control->intercepts[INTERCEPT_CR] >> 16); + pr_err("%-20s%04x\n", "dr_read:", control->intercepts[INTERCEPT_DR] & 0xffff); + pr_err("%-20s%04x\n", "dr_write:", control->intercepts[INTERCEPT_DR] >> 16); + pr_err("%-20s%08x\n", "exceptions:", control->intercepts[INTERCEPT_EXCEPTION]); + pr_err("%-20s%08x %08x\n", "intercepts:", + control->intercepts[INTERCEPT_WORD3], + control->intercepts[INTERCEPT_WORD4]); pr_err("%-20s%d\n", "pause filter count:", control->pause_filter_count); pr_err("%-20s%d\n", "pause filter threshold:", control->pause_filter_thresh); @@ -2923,12 +3043,19 @@ static void dump_vmcb(struct kvm_vcpu *vcpu) "excp_to:", save->last_excp_to); } -static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) +static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2, + u32 *intr_info, u32 *error_code) { struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control; *info1 = control->exit_info_1; *info2 = control->exit_info_2; + *intr_info = control->exit_int_info; + if ((*intr_info & SVM_EXITINTINFO_VALID) && + (*intr_info & SVM_EXITINTINFO_VALID_ERR)) + *error_code = control->exit_int_info_err; + else + *error_code = 0; } static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) @@ -2939,7 +3066,7 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM); - if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE)) + if (!svm_is_intercept(svm, INTERCEPT_CR0_WRITE)) vcpu->arch.cr0 = svm->vmcb->save.cr0; if (npt_enabled) vcpu->arch.cr3 = svm->vmcb->save.cr3; @@ -2947,12 +3074,7 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) if (is_guest_mode(vcpu)) { int vmexit; - trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code, - svm->vmcb->control.exit_info_1, - svm->vmcb->control.exit_info_2, - svm->vmcb->control.exit_int_info, - svm->vmcb->control.exit_int_info_err, - KVM_ISA_SVM); + trace_kvm_nested_vmexit(exit_code, vcpu, KVM_ISA_SVM); vmexit = nested_svm_exit_special(svm); @@ -3062,13 +3184,13 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) if (nested_svm_virtualize_tpr(vcpu)) return; - clr_cr_intercept(svm, INTERCEPT_CR8_WRITE); + svm_clr_intercept(svm, INTERCEPT_CR8_WRITE); if (irr == -1) return; if (tpr >= irr) - set_cr_intercept(svm, INTERCEPT_CR8_WRITE); + svm_set_intercept(svm, INTERCEPT_CR8_WRITE); } bool svm_nmi_blocked(struct kvm_vcpu *vcpu) @@ -3256,7 +3378,7 @@ static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu) if (nested_svm_virtualize_tpr(vcpu)) return; - if (!is_cr_intercept(svm, INTERCEPT_CR8_WRITE)) { + if (!svm_is_intercept(svm, INTERCEPT_CR8_WRITE)) { int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK; kvm_set_cr8(vcpu, cr8); } @@ -3353,8 +3475,7 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu) static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu) { - if (!is_guest_mode(vcpu) && - to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR && + if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR && to_svm(vcpu)->vmcb->control.exit_info_1) return handle_fastpath_set_msr_irqoff(vcpu); @@ -3419,7 +3540,6 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) { - fastpath_t exit_fastpath; struct vcpu_svm *svm = to_svm(vcpu); svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; @@ -3460,9 +3580,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) clgi(); kvm_load_guest_xsave_state(vcpu); - if (lapic_in_kernel(vcpu) && - vcpu->arch.apic->lapic_timer.timer_advance_ns) - kvm_wait_lapic_expire(vcpu); + kvm_wait_lapic_expire(vcpu); /* * If this vCPU has touched SPEC_CTRL, restore the guest's value if @@ -3542,8 +3660,11 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) svm_handle_mce(svm); svm_complete_interrupts(svm); - exit_fastpath = svm_exit_handlers_fastpath(vcpu); - return exit_fastpath; + + if (is_guest_mode(vcpu)) + return EXIT_FASTPATH_NONE; + + return svm_exit_handlers_fastpath(vcpu); } static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long root, @@ -3629,6 +3750,9 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) svm->nrips_enabled = kvm_cpu_cap_has(X86_FEATURE_NRIPS) && guest_cpuid_has(&svm->vcpu, X86_FEATURE_NRIPS); + /* Check again if INVPCID interception if required */ + svm_check_invpcid(svm); + if (!kvm_vcpu_apicv_active(vcpu)) return; @@ -3743,7 +3867,6 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu, break; case SVM_EXIT_WRITE_CR0: { unsigned long cr0, val; - u64 intercept; if (info->intercept == x86_intercept_cr_write) icpt_info.exit_code += info->modrm_reg; @@ -3752,9 +3875,8 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu, info->intercept == x86_intercept_clts) break; - intercept = svm->nested.ctl.intercept; - - if (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0))) + if (!(vmcb_is_intercept(&svm->nested.ctl, + INTERCEPT_SELECTIVE_CR0))) break; cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK; @@ -3889,7 +4011,7 @@ static int svm_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate) /* FED8h - SVM Guest */ put_smstate(u64, smstate, 0x7ed8, 1); /* FEE0h - SVM Guest VMCB Physical Address */ - put_smstate(u64, smstate, 0x7ee0, svm->nested.vmcb); + put_smstate(u64, smstate, 0x7ee0, svm->nested.vmcb12_gpa); svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; @@ -3911,7 +4033,7 @@ static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) { u64 saved_efer = GET_SMSTATE(u64, smstate, 0x7ed0); u64 guest = GET_SMSTATE(u64, smstate, 0x7ed8); - u64 vmcb = GET_SMSTATE(u64, smstate, 0x7ee0); + u64 vmcb12_gpa = GET_SMSTATE(u64, smstate, 0x7ee0); if (guest) { if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM)) @@ -3921,10 +4043,13 @@ static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) return 1; if (kvm_vcpu_map(&svm->vcpu, - gpa_to_gfn(vmcb), &map) == -EINVAL) + gpa_to_gfn(vmcb12_gpa), &map) == -EINVAL) + return 1; + + if (svm_allocate_nested(svm)) return 1; - ret = enter_svm_guest_mode(svm, vmcb, map.hva); + ret = enter_svm_guest_mode(svm, vmcb12_gpa, map.hva); kvm_vcpu_unmap(&svm->vcpu, &map, true); } } @@ -3945,19 +4070,10 @@ static void enable_smi_window(struct kvm_vcpu *vcpu) } } -static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu) +static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int insn_len) { - unsigned long cr4 = kvm_read_cr4(vcpu); - bool smep = cr4 & X86_CR4_SMEP; - bool smap = cr4 & X86_CR4_SMAP; - bool is_user = svm_get_cpl(vcpu) == 3; - - /* - * If RIP is invalid, go ahead with emulation which will cause an - * internal error exit. - */ - if (!kvm_vcpu_gfn_to_memslot(vcpu, kvm_rip_read(vcpu) >> PAGE_SHIFT)) - return true; + bool smep, smap, is_user; + unsigned long cr4; /* * Detect and workaround Errata 1096 Fam_17h_00_0Fh. @@ -3999,6 +4115,20 @@ static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu) * instruction pointer so we will not able to workaround it. Lets * print the error and request to kill the guest. */ + if (likely(!insn || insn_len)) + return true; + + /* + * If RIP is invalid, go ahead with emulation which will cause an + * internal error exit. + */ + if (!kvm_vcpu_gfn_to_memslot(vcpu, kvm_rip_read(vcpu) >> PAGE_SHIFT)) + return true; + + cr4 = kvm_read_cr4(vcpu); + smep = cr4 & X86_CR4_SMEP; + smap = cr4 & X86_CR4_SMAP; + is_user = svm_get_cpl(vcpu) == 3; if (smap && (!smep || is_user)) { if (!sev_guest(vcpu->kvm)) return true; @@ -4022,7 +4152,7 @@ static bool svm_apic_init_signal_blocked(struct kvm_vcpu *vcpu) * if an INIT signal is pending. */ return !gif_set(svm) || - (svm->vmcb->control.intercept & (1ULL << INTERCEPT_INIT)); + (vmcb_is_intercept(&svm->vmcb->control, INTERCEPT_INIT)); } static void svm_vm_destroy(struct kvm *kvm) @@ -4160,9 +4290,11 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .mem_enc_reg_region = svm_register_enc_region, .mem_enc_unreg_region = svm_unregister_enc_region, - .need_emulation_on_page_fault = svm_need_emulation_on_page_fault, + .can_emulate_instruction = svm_can_emulate_instruction, .apic_init_signal_blocked = svm_apic_init_signal_blocked, + + .msr_filter_changed = svm_msr_filter_changed, }; static struct kvm_x86_init_ops svm_init_ops __initdata = { |