diff options
author | Paolo Bonzini <pbonzini@redhat.com> | 2023-04-26 21:53:36 +0200 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2023-04-26 21:53:36 +0200 |
commit | 48b1893ae38bd6d46a9dcfc7b85c70a143fb8cab (patch) | |
tree | d69a46c054bf8a8558b17cdb9510ae5285296ba6 /arch/x86 | |
parent | Merge tag 'kvm-x86-mmu-6.4' of https://github.com/kvm-x86/linux into HEAD (diff) | |
parent | KVM: selftests: Test the PMU event "Instructions retired" (diff) | |
download | linux-48b1893ae38bd6d46a9dcfc7b85c70a143fb8cab.tar.xz linux-48b1893ae38bd6d46a9dcfc7b85c70a143fb8cab.zip |
Merge tag 'kvm-x86-pmu-6.4' of https://github.com/kvm-x86/linux into HEAD
KVM x86 PMU changes for 6.4:
- Disallow virtualizing legacy LBRs if architectural LBRs are available,
the two are mutually exclusive in hardware
- Disallow writes to immutable feature MSRs (notably PERF_CAPABILITIES)
after KVM_RUN, and overhaul the vmx_pmu_caps selftest to better
validate PERF_CAPABILITIES
- Apply PMU filters to emulated events and add test coverage to the
pmu_event_filter selftest
- Misc cleanups and fixes
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 2 | ||||
-rw-r--r-- | arch/x86/kvm/cpuid.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/mmu.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/pmu.c | 21 | ||||
-rw-r--r-- | arch/x86/kvm/pmu.h | 2 | ||||
-rw-r--r-- | arch/x86/kvm/svm/pmu.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/svm/svm.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/pmu_intel.c | 135 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx.c | 16 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 102 | ||||
-rw-r--r-- | arch/x86/kvm/x86.h | 13 |
11 files changed, 185 insertions, 114 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ffdb0c3010e7..d197c54dcaa6 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -513,6 +513,7 @@ struct kvm_pmc { #define MSR_ARCH_PERFMON_FIXED_CTR_MAX (MSR_ARCH_PERFMON_FIXED_CTR0 + KVM_PMC_MAX_FIXED - 1) #define KVM_AMD_PMC_MAX_GENERIC 6 struct kvm_pmu { + u8 version; unsigned nr_arch_gp_counters; unsigned nr_arch_fixed_counters; unsigned available_event_types; @@ -525,7 +526,6 @@ struct kvm_pmu { u64 global_ovf_ctrl_mask; u64 reserved_bits; u64 raw_event_mask; - u8 version; struct kvm_pmc gp_counters[KVM_INTEL_PMC_MAX_GENERIC]; struct kvm_pmc fixed_counters[KVM_PMC_MAX_FIXED]; struct irq_work irq_work; diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 6972e0be60fa..b944492faefa 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -414,7 +414,7 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2, * KVM_SET_CPUID{,2} again. To support this legacy behavior, check * whether the supplied CPUID data is equal to what's already set. */ - if (vcpu->arch.last_vmentry_cpu != -1) { + if (kvm_vcpu_has_run(vcpu)) { r = kvm_cpuid_check_equal(vcpu, e2, nent); if (r) return r; diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index e3d02f059437..c8961f45e3b1 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -5476,7 +5476,7 @@ void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu) * Changing guest CPUID after KVM_RUN is forbidden, see the comment in * kvm_arch_vcpu_ioctl(). */ - KVM_BUG_ON(vcpu->arch.last_vmentry_cpu != -1, vcpu->kvm); + KVM_BUG_ON(kvm_vcpu_has_run(vcpu), vcpu->kvm); } void kvm_mmu_reset_context(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index bb1733bb5bf9..1690d41c1830 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -93,7 +93,7 @@ void kvm_pmu_ops_update(const struct kvm_pmu_ops *pmu_ops) #undef __KVM_X86_PMU_OP } -static inline bool pmc_is_enabled(struct kvm_pmc *pmc) +static inline bool pmc_is_globally_enabled(struct kvm_pmc *pmc) { return static_call(kvm_x86_pmu_pmc_is_enabled)(pmc); } @@ -400,6 +400,12 @@ static bool check_pmu_event_filter(struct kvm_pmc *pmc) return is_fixed_event_allowed(filter, pmc->idx); } +static bool pmc_event_is_allowed(struct kvm_pmc *pmc) +{ + return pmc_is_globally_enabled(pmc) && pmc_speculative_in_use(pmc) && + check_pmu_event_filter(pmc); +} + static void reprogram_counter(struct kvm_pmc *pmc) { struct kvm_pmu *pmu = pmc_to_pmu(pmc); @@ -409,10 +415,7 @@ static void reprogram_counter(struct kvm_pmc *pmc) pmc_pause_counter(pmc); - if (!pmc_speculative_in_use(pmc) || !pmc_is_enabled(pmc)) - goto reprogram_complete; - - if (!check_pmu_event_filter(pmc)) + if (!pmc_event_is_allowed(pmc)) goto reprogram_complete; if (pmc->counter < pmc->prev_counter) @@ -589,6 +592,10 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) */ void kvm_pmu_refresh(struct kvm_vcpu *vcpu) { + if (KVM_BUG_ON(kvm_vcpu_has_run(vcpu), vcpu->kvm)) + return; + + bitmap_zero(vcpu_to_pmu(vcpu)->all_valid_pmc_idx, X86_PMC_IDX_MAX); static_call(kvm_x86_pmu_refresh)(vcpu); } @@ -646,7 +653,7 @@ static void kvm_pmu_incr_counter(struct kvm_pmc *pmc) { pmc->prev_counter = pmc->counter; pmc->counter = (pmc->counter + 1) & pmc_bitmask(pmc); - kvm_pmu_request_counter_reprogam(pmc); + kvm_pmu_request_counter_reprogram(pmc); } static inline bool eventsel_match_perf_hw_id(struct kvm_pmc *pmc, @@ -684,7 +691,7 @@ void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 perf_hw_id) for_each_set_bit(i, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX) { pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, i); - if (!pmc || !pmc_is_enabled(pmc) || !pmc_speculative_in_use(pmc)) + if (!pmc || !pmc_event_is_allowed(pmc)) continue; /* Ignore checks for edge detect, pin control, invert and CMASK bits */ diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index be62c16f2265..5c7bbf03b599 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -195,7 +195,7 @@ static inline void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops) KVM_PMC_MAX_FIXED); } -static inline void kvm_pmu_request_counter_reprogam(struct kvm_pmc *pmc) +static inline void kvm_pmu_request_counter_reprogram(struct kvm_pmc *pmc) { set_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi); kvm_make_request(KVM_REQ_PMU, pmc->vcpu); diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index cc77a0681800..5fa939e411d8 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -161,7 +161,7 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) data &= ~pmu->reserved_bits; if (data != pmc->eventsel) { pmc->eventsel = data; - kvm_pmu_request_counter_reprogam(pmc); + kvm_pmu_request_counter_reprogram(pmc); } return 0; } diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 0827e0c25309..a64ede4f1d8a 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4093,7 +4093,7 @@ static bool svm_has_emulated_msr(struct kvm *kvm, u32 index) { switch (index) { case MSR_IA32_MCG_EXT_CTL: - case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: + case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR: return false; case MSR_IA32_SMBASE: if (!IS_ENABLED(CONFIG_KVM_SMM)) diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index e8a3be0b9df9..741efe2c497b 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -57,7 +57,7 @@ static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data) pmc = get_fixed_pmc(pmu, MSR_CORE_PERF_FIXED_CTR0 + i); __set_bit(INTEL_PMC_IDX_FIXED + i, pmu->pmc_in_use); - kvm_pmu_request_counter_reprogam(pmc); + kvm_pmu_request_counter_reprogram(pmc); } } @@ -76,13 +76,13 @@ static struct kvm_pmc *intel_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx) static void reprogram_counters(struct kvm_pmu *pmu, u64 diff) { int bit; - struct kvm_pmc *pmc; - for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX) { - pmc = intel_pmc_idx_to_pmc(pmu, bit); - if (pmc) - kvm_pmu_request_counter_reprogam(pmc); - } + if (!diff) + return; + + for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX) + set_bit(bit, pmu->reprogram_pmi); + kvm_make_request(KVM_REQ_PMU, pmu_to_vcpu(pmu)); } static bool intel_hw_event_available(struct kvm_pmc *pmc) @@ -351,45 +351,47 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) switch (msr) { case MSR_CORE_PERF_FIXED_CTR_CTRL: msr_info->data = pmu->fixed_ctr_ctrl; - return 0; + break; case MSR_CORE_PERF_GLOBAL_STATUS: msr_info->data = pmu->global_status; - return 0; + break; case MSR_CORE_PERF_GLOBAL_CTRL: msr_info->data = pmu->global_ctrl; - return 0; + break; case MSR_CORE_PERF_GLOBAL_OVF_CTRL: msr_info->data = 0; - return 0; + break; case MSR_IA32_PEBS_ENABLE: msr_info->data = pmu->pebs_enable; - return 0; + break; case MSR_IA32_DS_AREA: msr_info->data = pmu->ds_area; - return 0; + break; case MSR_PEBS_DATA_CFG: msr_info->data = pmu->pebs_data_cfg; - return 0; + break; default: if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) || (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) { u64 val = pmc_read_counter(pmc); msr_info->data = val & pmu->counter_bitmask[KVM_PMC_GP]; - return 0; + break; } else if ((pmc = get_fixed_pmc(pmu, msr))) { u64 val = pmc_read_counter(pmc); msr_info->data = val & pmu->counter_bitmask[KVM_PMC_FIXED]; - return 0; + break; } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) { msr_info->data = pmc->eventsel; - return 0; - } else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, true)) - return 0; + break; + } else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, true)) { + break; + } + return 1; } - return 1; + return 0; } static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) @@ -402,44 +404,43 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) switch (msr) { case MSR_CORE_PERF_FIXED_CTR_CTRL: - if (pmu->fixed_ctr_ctrl == data) - return 0; - if (!(data & pmu->fixed_ctr_ctrl_mask)) { + if (data & pmu->fixed_ctr_ctrl_mask) + return 1; + + if (pmu->fixed_ctr_ctrl != data) reprogram_fixed_counters(pmu, data); - return 0; - } break; case MSR_CORE_PERF_GLOBAL_STATUS: - if (msr_info->host_initiated) { - pmu->global_status = data; - return 0; - } - break; /* RO MSR */ + if (!msr_info->host_initiated) + return 1; /* RO MSR */ + + pmu->global_status = data; + break; case MSR_CORE_PERF_GLOBAL_CTRL: - if (pmu->global_ctrl == data) - return 0; - if (kvm_valid_perf_global_ctrl(pmu, data)) { + if (!kvm_valid_perf_global_ctrl(pmu, data)) + return 1; + + if (pmu->global_ctrl != data) { diff = pmu->global_ctrl ^ data; pmu->global_ctrl = data; reprogram_counters(pmu, diff); - return 0; } break; case MSR_CORE_PERF_GLOBAL_OVF_CTRL: - if (!(data & pmu->global_ovf_ctrl_mask)) { - if (!msr_info->host_initiated) - pmu->global_status &= ~data; - return 0; - } + if (data & pmu->global_ovf_ctrl_mask) + return 1; + + if (!msr_info->host_initiated) + pmu->global_status &= ~data; break; case MSR_IA32_PEBS_ENABLE: - if (pmu->pebs_enable == data) - return 0; - if (!(data & pmu->pebs_enable_mask)) { + if (data & pmu->pebs_enable_mask) + return 1; + + if (pmu->pebs_enable != data) { diff = pmu->pebs_enable ^ data; pmu->pebs_enable = data; reprogram_counters(pmu, diff); - return 0; } break; case MSR_IA32_DS_AREA: @@ -447,15 +448,14 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; if (is_noncanonical_address(data, vcpu)) return 1; + pmu->ds_area = data; - return 0; + break; case MSR_PEBS_DATA_CFG: - if (pmu->pebs_data_cfg == data) - return 0; - if (!(data & pmu->pebs_data_cfg_mask)) { - pmu->pebs_data_cfg = data; - return 0; - } + if (data & pmu->pebs_data_cfg_mask) + return 1; + + pmu->pebs_data_cfg = data; break; default: if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) || @@ -463,33 +463,38 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if ((msr & MSR_PMC_FULL_WIDTH_BIT) && (data & ~pmu->counter_bitmask[KVM_PMC_GP])) return 1; + if (!msr_info->host_initiated && !(msr & MSR_PMC_FULL_WIDTH_BIT)) data = (s64)(s32)data; pmc->counter += data - pmc_read_counter(pmc); pmc_update_sample_period(pmc); - return 0; + break; } else if ((pmc = get_fixed_pmc(pmu, msr))) { pmc->counter += data - pmc_read_counter(pmc); pmc_update_sample_period(pmc); - return 0; + break; } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) { - if (data == pmc->eventsel) - return 0; reserved_bits = pmu->reserved_bits; if ((pmc->idx == 2) && (pmu->raw_event_mask & HSW_IN_TX_CHECKPOINTED)) reserved_bits ^= HSW_IN_TX_CHECKPOINTED; - if (!(data & reserved_bits)) { + if (data & reserved_bits) + return 1; + + if (data != pmc->eventsel) { pmc->eventsel = data; - kvm_pmu_request_counter_reprogam(pmc); - return 0; + kvm_pmu_request_counter_reprogram(pmc); } - } else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, false)) - return 0; + break; + } else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, false)) { + break; + } + /* Not a known PMU MSR. */ + return 1; } - return 1; + return 0; } static void setup_fixed_pmc_eventsel(struct kvm_pmu *pmu) @@ -531,6 +536,16 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) pmu->pebs_enable_mask = ~0ull; pmu->pebs_data_cfg_mask = ~0ull; + memset(&lbr_desc->records, 0, sizeof(lbr_desc->records)); + + /* + * Setting passthrough of LBR MSRs is done only in the VM-Entry loop, + * and PMU refresh is disallowed after the vCPU has run, i.e. this code + * should never be reached while KVM is passing through MSRs. + */ + if (KVM_BUG_ON(lbr_desc->msr_passthrough, vcpu->kvm)) + return; + entry = kvm_find_cpuid_entry(vcpu, 0xa); if (!entry || !vcpu->kvm->arch.enable_pmu) return; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 536b7d688851..599dd62eeb0e 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1946,7 +1946,7 @@ static inline bool is_vmx_feature_control_msr_valid(struct vcpu_vmx *vmx, static int vmx_get_msr_feature(struct kvm_msr_entry *msr) { switch (msr->index) { - case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: + case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR: if (!nested) return 1; return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data); @@ -2031,7 +2031,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = to_vmx(vcpu)->msr_ia32_sgxlepubkeyhash [msr_info->index - MSR_IA32_SGXLEPUBKEYHASH0]; break; - case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: + case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR: if (!nested_vmx_allowed(vcpu)) return 1; if (vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index, @@ -2340,7 +2340,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vmx->msr_ia32_sgxlepubkeyhash [msr_index - MSR_IA32_SGXLEPUBKEYHASH0] = data; break; - case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: + case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR: if (!msr_info->host_initiated) return 1; /* they are read-only */ if (!nested_vmx_allowed(vcpu)) @@ -6930,7 +6930,7 @@ static bool vmx_has_emulated_msr(struct kvm *kvm, u32 index) * real mode. */ return enable_unrestricted_guest || emulate_invalid_guest_state; - case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: + case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR: return nested; case MSR_AMD64_VIRT_SPEC_CTRL: case MSR_AMD64_TSC_RATIO: @@ -7756,9 +7756,11 @@ static u64 vmx_get_perf_capabilities(void) if (boot_cpu_has(X86_FEATURE_PDCM)) rdmsrl(MSR_IA32_PERF_CAPABILITIES, host_perf_cap); - x86_perf_get_lbr(&lbr); - if (lbr.nr) - perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT; + if (!cpu_feature_enabled(X86_FEATURE_ARCH_LBR)) { + x86_perf_get_lbr(&lbr); + if (lbr.nr) + perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT; + } if (vmx_pebs_supported()) { perf_cap |= host_perf_cap & PERF_CAP_PEBS_MASK; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ad4a45d8975b..095a41c6f346 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1556,39 +1556,41 @@ static u32 emulated_msrs[ARRAY_SIZE(emulated_msrs_all)]; static unsigned num_emulated_msrs; /* - * List of msr numbers which are used to expose MSR-based features that - * can be used by a hypervisor to validate requested CPU features. + * List of MSRs that control the existence of MSR-based features, i.e. MSRs + * that are effectively CPUID leafs. VMX MSRs are also included in the set of + * feature MSRs, but are handled separately to allow expedited lookups. */ -static const u32 msr_based_features_all[] = { - MSR_IA32_VMX_BASIC, - MSR_IA32_VMX_TRUE_PINBASED_CTLS, - MSR_IA32_VMX_PINBASED_CTLS, - MSR_IA32_VMX_TRUE_PROCBASED_CTLS, - MSR_IA32_VMX_PROCBASED_CTLS, - MSR_IA32_VMX_TRUE_EXIT_CTLS, - MSR_IA32_VMX_EXIT_CTLS, - MSR_IA32_VMX_TRUE_ENTRY_CTLS, - MSR_IA32_VMX_ENTRY_CTLS, - MSR_IA32_VMX_MISC, - MSR_IA32_VMX_CR0_FIXED0, - MSR_IA32_VMX_CR0_FIXED1, - MSR_IA32_VMX_CR4_FIXED0, - MSR_IA32_VMX_CR4_FIXED1, - MSR_IA32_VMX_VMCS_ENUM, - MSR_IA32_VMX_PROCBASED_CTLS2, - MSR_IA32_VMX_EPT_VPID_CAP, - MSR_IA32_VMX_VMFUNC, - +static const u32 msr_based_features_all_except_vmx[] = { MSR_AMD64_DE_CFG, MSR_IA32_UCODE_REV, MSR_IA32_ARCH_CAPABILITIES, MSR_IA32_PERF_CAPABILITIES, }; -static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)]; +static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all_except_vmx) + + (KVM_LAST_EMULATED_VMX_MSR - KVM_FIRST_EMULATED_VMX_MSR + 1)]; static unsigned int num_msr_based_features; /* + * All feature MSRs except uCode revID, which tracks the currently loaded uCode + * patch, are immutable once the vCPU model is defined. + */ +static bool kvm_is_immutable_feature_msr(u32 msr) +{ + int i; + + if (msr >= KVM_FIRST_EMULATED_VMX_MSR && msr <= KVM_LAST_EMULATED_VMX_MSR) + return true; + + for (i = 0; i < ARRAY_SIZE(msr_based_features_all_except_vmx); i++) { + if (msr == msr_based_features_all_except_vmx[i]) + return msr != MSR_IA32_UCODE_REV; + } + + return false; +} + +/* * Some IA32_ARCH_CAPABILITIES bits have dependencies on MSRs that KVM * does not yet virtualize. These include: * 10 - MISC_PACKAGE_CTRLS @@ -2205,6 +2207,22 @@ static int do_get_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) { + u64 val; + + /* + * Disallow writes to immutable feature MSRs after KVM_RUN. KVM does + * not support modifying the guest vCPU model on the fly, e.g. changing + * the nVMX capabilities while L2 is running is nonsensical. Ignore + * writes of the same value, e.g. to allow userspace to blindly stuff + * all MSRs when emulating RESET. + */ + if (kvm_vcpu_has_run(vcpu) && kvm_is_immutable_feature_msr(index)) { + if (do_get_msr(vcpu, index, &val) || *data != val) + return -EINVAL; + + return 0; + } + return kvm_set_msr_ignored_check(vcpu, index, *data, true); } @@ -3627,9 +3645,17 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (data & ~kvm_caps.supported_perf_cap) return 1; + /* + * Note, this is not just a performance optimization! KVM + * disallows changing feature MSRs after the vCPU has run; PMU + * refresh will bug the VM if called after the vCPU has run. + */ + if (vcpu->arch.perf_capabilities == data) + break; + vcpu->arch.perf_capabilities = data; kvm_pmu_refresh(vcpu); - return 0; + break; case MSR_IA32_PRED_CMD: if (!msr_info->host_initiated && !guest_has_pred_cmd_msr(vcpu)) return 1; @@ -7045,6 +7071,18 @@ out: return r; } +static void kvm_probe_feature_msr(u32 msr_index) +{ + struct kvm_msr_entry msr = { + .index = msr_index, + }; + + if (kvm_get_msr_feature(&msr)) + return; + + msr_based_features[num_msr_based_features++] = msr_index; +} + static void kvm_probe_msr_to_save(u32 msr_index) { u32 dummy[2]; @@ -7120,7 +7158,7 @@ static void kvm_probe_msr_to_save(u32 msr_index) msrs_to_save[num_msrs_to_save++] = msr_index; } -static void kvm_init_msr_list(void) +static void kvm_init_msr_lists(void) { unsigned i; @@ -7146,15 +7184,11 @@ static void kvm_init_msr_list(void) emulated_msrs[num_emulated_msrs++] = emulated_msrs_all[i]; } - for (i = 0; i < ARRAY_SIZE(msr_based_features_all); i++) { - struct kvm_msr_entry msr; + for (i = KVM_FIRST_EMULATED_VMX_MSR; i <= KVM_LAST_EMULATED_VMX_MSR; i++) + kvm_probe_feature_msr(i); - msr.index = msr_based_features_all[i]; - if (kvm_get_msr_feature(&msr)) - continue; - - msr_based_features[num_msr_based_features++] = msr_based_features_all[i]; - } + for (i = 0; i < ARRAY_SIZE(msr_based_features_all_except_vmx); i++) + kvm_probe_feature_msr(msr_based_features_all_except_vmx[i]); } static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, @@ -9488,7 +9522,7 @@ static int __kvm_x86_vendor_init(struct kvm_x86_init_ops *ops) kvm_caps.max_guest_tsc_khz = max; } kvm_caps.default_tsc_scaling_ratio = 1ULL << kvm_caps.tsc_scaling_ratio_frac_bits; - kvm_init_msr_list(); + kvm_init_msr_lists(); return 0; out_unwind_ops: diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 203fb6640b5b..fbef05c0bdeb 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -40,6 +40,14 @@ void kvm_spurious_fault(void); failed; \ }) +/* + * The first...last VMX feature MSRs that are emulated by KVM. This may or may + * not cover all known VMX MSRs, as KVM doesn't emulate an MSR until there's an + * associated feature that KVM supports for nested virtualization. + */ +#define KVM_FIRST_EMULATED_VMX_MSR MSR_IA32_VMX_BASIC +#define KVM_LAST_EMULATED_VMX_MSR MSR_IA32_VMX_VMFUNC + #define KVM_DEFAULT_PLE_GAP 128 #define KVM_VMX_DEFAULT_PLE_WINDOW 4096 #define KVM_DEFAULT_PLE_WINDOW_GROW 2 @@ -83,6 +91,11 @@ static inline unsigned int __shrink_ple_window(unsigned int val, void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu); int kvm_check_nested_events(struct kvm_vcpu *vcpu); +static inline bool kvm_vcpu_has_run(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.last_vmentry_cpu != -1; +} + static inline bool kvm_is_exception_pending(struct kvm_vcpu *vcpu) { return vcpu->arch.exception.pending || |