diff options
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r-- | arch/x86/kvm/mmu.c | 25 | ||||
-rw-r--r-- | arch/x86/kvm/svm.c | 129 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 11 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 6 |
4 files changed, 44 insertions, 127 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 69088a1ba509..ff606f507913 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3322,7 +3322,7 @@ walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) break; reserved |= is_shadow_zero_bits_set(&vcpu->arch.mmu, spte, - leaf); + iterator.level); } walk_shadow_page_lockless_end(vcpu); @@ -3614,7 +3614,7 @@ static void __reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, struct rsvd_bits_validate *rsvd_check, int maxphyaddr, int level, bool nx, bool gbpages, - bool pse) + bool pse, bool amd) { u64 exb_bit_rsvd = 0; u64 gbpages_bit_rsvd = 0; @@ -3631,7 +3631,7 @@ __reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, * Non-leaf PML4Es and PDPEs reserve bit 8 (which would be the G bit for * leaf entries) on AMD CPUs only. */ - if (guest_cpuid_is_amd(vcpu)) + if (amd) nonleaf_bit8_rsvd = rsvd_bits(8, 8); switch (level) { @@ -3699,7 +3699,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, __reset_rsvds_bits_mask(vcpu, &context->guest_rsvd_check, cpuid_maxphyaddr(vcpu), context->root_level, context->nx, guest_cpuid_has_gbpages(vcpu), - is_pse(vcpu)); + is_pse(vcpu), guest_cpuid_is_amd(vcpu)); } static void @@ -3749,13 +3749,24 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu, void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context) { + /* + * Passing "true" to the last argument is okay; it adds a check + * on bit 8 of the SPTEs which KVM doesn't use anyway. + */ __reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check, boot_cpu_data.x86_phys_bits, context->shadow_root_level, context->nx, - guest_cpuid_has_gbpages(vcpu), is_pse(vcpu)); + guest_cpuid_has_gbpages(vcpu), is_pse(vcpu), + true); } EXPORT_SYMBOL_GPL(reset_shadow_zero_bits_mask); +static inline bool boot_cpu_is_amd(void) +{ + WARN_ON_ONCE(!tdp_enabled); + return shadow_x_mask == 0; +} + /* * the direct page table on host, use as much mmu features as * possible, however, kvm currently does not do execution-protection. @@ -3764,11 +3775,11 @@ static void reset_tdp_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context) { - if (guest_cpuid_is_amd(vcpu)) + if (boot_cpu_is_amd()) __reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check, boot_cpu_data.x86_phys_bits, context->shadow_root_level, false, - cpu_has_gbpages, true); + cpu_has_gbpages, true, true); else __reset_rsvds_bits_mask_ept(&context->shadow_zero_check, boot_cpu_data.x86_phys_bits, diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index fdb8cb63a6c0..2f9ed1ff0632 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -202,6 +202,7 @@ module_param(npt, int, S_IRUGO); static int nested = true; module_param(nested, int, S_IRUGO); +static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); static void svm_flush_tlb(struct kvm_vcpu *vcpu); static void svm_complete_interrupts(struct vcpu_svm *svm); @@ -513,7 +514,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); if (svm->vmcb->control.next_rip != 0) { - WARN_ON(!static_cpu_has(X86_FEATURE_NRIPS)); + WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS)); svm->next_rip = svm->vmcb->control.next_rip; } @@ -865,64 +866,6 @@ static void svm_disable_lbrv(struct vcpu_svm *svm) set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0); } -#define MTRR_TYPE_UC_MINUS 7 -#define MTRR2PROTVAL_INVALID 0xff - -static u8 mtrr2protval[8]; - -static u8 fallback_mtrr_type(int mtrr) -{ - /* - * WT and WP aren't always available in the host PAT. Treat - * them as UC and UC- respectively. Everything else should be - * there. - */ - switch (mtrr) - { - case MTRR_TYPE_WRTHROUGH: - return MTRR_TYPE_UNCACHABLE; - case MTRR_TYPE_WRPROT: - return MTRR_TYPE_UC_MINUS; - default: - BUG(); - } -} - -static void build_mtrr2protval(void) -{ - int i; - u64 pat; - - for (i = 0; i < 8; i++) - mtrr2protval[i] = MTRR2PROTVAL_INVALID; - - /* Ignore the invalid MTRR types. */ - mtrr2protval[2] = 0; - mtrr2protval[3] = 0; - - /* - * Use host PAT value to figure out the mapping from guest MTRR - * values to nested page table PAT/PCD/PWT values. We do not - * want to change the host PAT value every time we enter the - * guest. - */ - rdmsrl(MSR_IA32_CR_PAT, pat); - for (i = 0; i < 8; i++) { - u8 mtrr = pat >> (8 * i); - - if (mtrr2protval[mtrr] == MTRR2PROTVAL_INVALID) - mtrr2protval[mtrr] = __cm_idx2pte(i); - } - - for (i = 0; i < 8; i++) { - if (mtrr2protval[i] == MTRR2PROTVAL_INVALID) { - u8 fallback = fallback_mtrr_type(i); - mtrr2protval[i] = mtrr2protval[fallback]; - BUG_ON(mtrr2protval[i] == MTRR2PROTVAL_INVALID); - } - } -} - static __init int svm_hardware_setup(void) { int cpu; @@ -989,7 +932,6 @@ static __init int svm_hardware_setup(void) } else kvm_disable_tdp(); - build_mtrr2protval(); return 0; err: @@ -1144,43 +1086,6 @@ static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) return target_tsc - tsc; } -static void svm_set_guest_pat(struct vcpu_svm *svm, u64 *g_pat) -{ - struct kvm_vcpu *vcpu = &svm->vcpu; - - /* Unlike Intel, AMD takes the guest's CR0.CD into account. - * - * AMD does not have IPAT. To emulate it for the case of guests - * with no assigned devices, just set everything to WB. If guests - * have assigned devices, however, we cannot force WB for RAM - * pages only, so use the guest PAT directly. - */ - if (!kvm_arch_has_assigned_device(vcpu->kvm)) - *g_pat = 0x0606060606060606; - else - *g_pat = vcpu->arch.pat; -} - -static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) -{ - u8 mtrr; - - /* - * 1. MMIO: trust guest MTRR, so same as item 3. - * 2. No passthrough: always map as WB, and force guest PAT to WB as well - * 3. Passthrough: can't guarantee the result, try to trust guest. - */ - if (!is_mmio && !kvm_arch_has_assigned_device(vcpu->kvm)) - return 0; - - if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED) && - kvm_read_cr0(vcpu) & X86_CR0_CD) - return _PAGE_NOCACHE; - - mtrr = kvm_mtrr_get_guest_memory_type(vcpu, gfn); - return mtrr2protval[mtrr]; -} - static void init_vmcb(struct vcpu_svm *svm, bool init_event) { struct vmcb_control_area *control = &svm->vmcb->control; @@ -1263,7 +1168,8 @@ static void init_vmcb(struct vcpu_svm *svm, bool init_event) * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. * It also updates the guest-visible cr0 value. */ - (void)kvm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET); + svm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET); + kvm_mmu_reset_context(&svm->vcpu); save->cr4 = X86_CR4_PAE; /* rdx = ?? */ @@ -1276,7 +1182,6 @@ static void init_vmcb(struct vcpu_svm *svm, bool init_event) clr_cr_intercept(svm, INTERCEPT_CR3_READ); clr_cr_intercept(svm, INTERCEPT_CR3_WRITE); save->g_pat = svm->vcpu.arch.pat; - svm_set_guest_pat(svm, &save->g_pat); save->cr3 = 0; save->cr4 = 0; } @@ -1671,10 +1576,13 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) if (!vcpu->fpu_active) cr0 |= X86_CR0_TS; - - /* These are emulated via page tables. */ - cr0 &= ~(X86_CR0_CD | X86_CR0_NW); - + /* + * re-enable caching here because the QEMU bios + * does not do it - this results in some delay at + * reboot + */ + if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED)) + cr0 &= ~(X86_CR0_CD | X86_CR0_NW); svm->vmcb->save.cr0 = cr0; mark_dirty(svm->vmcb, VMCB_CR); update_cr0_intercept(svm); @@ -3349,16 +3257,6 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) case MSR_VM_IGNNE: vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); break; - case MSR_IA32_CR_PAT: - if (npt_enabled) { - if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) - return 1; - vcpu->arch.pat = data; - svm_set_guest_pat(svm, &svm->vmcb->save.g_pat); - mark_dirty(svm->vmcb, VMCB_NPT); - break; - } - /* fall through */ default: return kvm_set_msr_common(vcpu, msr); } @@ -4193,6 +4091,11 @@ static bool svm_has_high_real_mode_segbase(void) return true; } +static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) +{ + return 0; +} + static void svm_cpuid_update(struct kvm_vcpu *vcpu) { } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 64076740251e..06ef4908ba61 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8617,17 +8617,22 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) u64 ipat = 0; /* For VT-d and EPT combination - * 1. MMIO: guest may want to apply WC, trust it. + * 1. MMIO: always map as UC * 2. EPT with VT-d: * a. VT-d without snooping control feature: can't guarantee the - * result, try to trust guest. So the same as item 1. + * result, try to trust guest. * b. VT-d with snooping control feature: snooping control feature of * VT-d engine can guarantee the cache correctness. Just set it * to WB to keep consistent with host. So the same as item 3. * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep * consistent with host MTRR */ - if (!is_mmio && !kvm_arch_has_noncoherent_dma(vcpu->kvm)) { + if (is_mmio) { + cache = MTRR_TYPE_UNCACHABLE; + goto exit; + } + + if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) { ipat = VMX_EPT_IPAT_BIT; cache = MTRR_TYPE_WRBACK; goto exit; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6bbb0dfb99d0..92511d4b7236 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1708,8 +1708,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) vcpu->pvclock_set_guest_stopped_request = false; } - pvclock_flags |= PVCLOCK_COUNTS_FROM_ZERO; - /* If the host uses TSC clocksource, then it is stable */ if (use_master_clock) pvclock_flags |= PVCLOCK_TSC_STABLE_BIT; @@ -2007,8 +2005,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) &vcpu->requests); ka->boot_vcpu_runs_old_kvmclock = tmp; - - ka->kvmclock_offset = -get_kernel_ns(); } vcpu->arch.time = data; @@ -2190,6 +2186,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_LASTINTFROMIP: case MSR_IA32_LASTINTTOIP: case MSR_K8_SYSCFG: + case MSR_K8_TSEG_ADDR: + case MSR_K8_TSEG_MASK: case MSR_K7_HWCR: case MSR_VM_HSAVE_PA: case MSR_K8_INT_PENDING_MSG: |