diff options
author | akpm <akpm@linux-foundation.org> | 2022-06-27 19:31:34 +0200 |
---|---|---|
committer | akpm <akpm@linux-foundation.org> | 2022-06-27 19:31:34 +0200 |
commit | 46a3b1125308f8f90a065eeecfafd2a96b01a36c (patch) | |
tree | 6e080118fbad5aa217d160cedf02f10108ce3bf2 /arch/x86/kvm | |
parent | mm/kmemleak: prevent soft lockup in first object iteration loop of kmemleak_s... (diff) | |
parent | Linux 5.19-rc4 (diff) | |
download | linux-46a3b1125308f8f90a065eeecfafd2a96b01a36c.tar.xz linux-46a3b1125308f8f90a065eeecfafd2a96b01a36c.zip |
Merge branch 'master' into mm-stable
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r-- | arch/x86/kvm/lapic.c | 27 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/mmu.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/svm/avic.c | 171 | ||||
-rw-r--r-- | arch/x86/kvm/svm/nested.c | 39 | ||||
-rw-r--r-- | arch/x86/kvm/svm/sev.c | 72 | ||||
-rw-r--r-- | arch/x86/kvm/svm/svm.c | 19 | ||||
-rw-r--r-- | arch/x86/kvm/svm/svm.h | 6 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx.c | 76 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx.h | 2 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 5 |
10 files changed, 259 insertions, 160 deletions
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index f1bdac3f5aa8..0e68b4c937fc 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2039,6 +2039,19 @@ static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) } } +static void kvm_lapic_xapic_id_updated(struct kvm_lapic *apic) +{ + struct kvm *kvm = apic->vcpu->kvm; + + if (KVM_BUG_ON(apic_x2apic_mode(apic), kvm)) + return; + + if (kvm_xapic_id(apic) == apic->vcpu->vcpu_id) + return; + + kvm_set_apicv_inhibit(apic->vcpu->kvm, APICV_INHIBIT_REASON_APIC_ID_MODIFIED); +} + static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) { int ret = 0; @@ -2047,10 +2060,12 @@ static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) switch (reg) { case APIC_ID: /* Local APIC ID */ - if (!apic_x2apic_mode(apic)) + if (!apic_x2apic_mode(apic)) { kvm_apic_set_xapic_id(apic, val >> 24); - else + kvm_lapic_xapic_id_updated(apic); + } else { ret = 1; + } break; case APIC_TASKPRI: @@ -2336,8 +2351,10 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) MSR_IA32_APICBASE_BASE; if ((value & MSR_IA32_APICBASE_ENABLE) && - apic->base_address != APIC_DEFAULT_PHYS_BASE) - pr_warn_once("APIC base relocation is unsupported by KVM"); + apic->base_address != APIC_DEFAULT_PHYS_BASE) { + kvm_set_apicv_inhibit(apic->vcpu->kvm, + APICV_INHIBIT_REASON_APIC_BASE_MODIFIED); + } } void kvm_apic_update_apicv(struct kvm_vcpu *vcpu) @@ -2648,6 +2665,8 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR); __kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32); } + } else { + kvm_lapic_xapic_id_updated(vcpu->arch.apic); } return 0; diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index e826ee9138fa..17252f39bd7c 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3411,7 +3411,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) root = mmu_alloc_root(vcpu, i << (30 - PAGE_SHIFT), i << 30, PT32_ROOT_LEVEL, true); mmu->pae_root[i] = root | PT_PRESENT_MASK | - shadow_me_mask; + shadow_me_value; } mmu->root.hpa = __pa(mmu->pae_root); } else { diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 54fe03714f8a..d1bc5820ea46 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -291,58 +291,91 @@ void avic_ring_doorbell(struct kvm_vcpu *vcpu) static int avic_kick_target_vcpus_fast(struct kvm *kvm, struct kvm_lapic *source, u32 icrl, u32 icrh, u32 index) { - u32 dest, apic_id; - struct kvm_vcpu *vcpu; + u32 l1_physical_id, dest; + struct kvm_vcpu *target_vcpu; int dest_mode = icrl & APIC_DEST_MASK; int shorthand = icrl & APIC_SHORT_MASK; struct kvm_svm *kvm_svm = to_kvm_svm(kvm); - u32 *avic_logical_id_table = page_address(kvm_svm->avic_logical_id_table_page); if (shorthand != APIC_DEST_NOSHORT) return -EINVAL; - /* - * The AVIC incomplete IPI #vmexit info provides index into - * the physical APIC ID table, which can be used to derive - * guest physical APIC ID. - */ + if (apic_x2apic_mode(source)) + dest = icrh; + else + dest = GET_APIC_DEST_FIELD(icrh); + if (dest_mode == APIC_DEST_PHYSICAL) { - apic_id = index; + /* broadcast destination, use slow path */ + if (apic_x2apic_mode(source) && dest == X2APIC_BROADCAST) + return -EINVAL; + if (!apic_x2apic_mode(source) && dest == APIC_BROADCAST) + return -EINVAL; + + l1_physical_id = dest; + + if (WARN_ON_ONCE(l1_physical_id != index)) + return -EINVAL; + } else { - if (!apic_x2apic_mode(source)) { - /* For xAPIC logical mode, the index is for logical APIC table. */ - apic_id = avic_logical_id_table[index] & 0x1ff; + u32 bitmap, cluster; + int logid_index; + + if (apic_x2apic_mode(source)) { + /* 16 bit dest mask, 16 bit cluster id */ + bitmap = dest & 0xFFFF0000; + cluster = (dest >> 16) << 4; + } else if (kvm_lapic_get_reg(source, APIC_DFR) == APIC_DFR_FLAT) { + /* 8 bit dest mask*/ + bitmap = dest; + cluster = 0; } else { - return -EINVAL; + /* 4 bit desk mask, 4 bit cluster id */ + bitmap = dest & 0xF; + cluster = (dest >> 4) << 2; } - } - /* - * Assuming vcpu ID is the same as physical apic ID, - * and use it to retrieve the target vCPU. - */ - vcpu = kvm_get_vcpu_by_id(kvm, apic_id); - if (!vcpu) - return -EINVAL; + if (unlikely(!bitmap)) + /* guest bug: nobody to send the logical interrupt to */ + return 0; - if (apic_x2apic_mode(vcpu->arch.apic)) - dest = icrh; - else - dest = GET_APIC_DEST_FIELD(icrh); + if (!is_power_of_2(bitmap)) + /* multiple logical destinations, use slow path */ + return -EINVAL; - /* - * Try matching the destination APIC ID with the vCPU. - */ - if (kvm_apic_match_dest(vcpu, source, shorthand, dest, dest_mode)) { - vcpu->arch.apic->irr_pending = true; - svm_complete_interrupt_delivery(vcpu, - icrl & APIC_MODE_MASK, - icrl & APIC_INT_LEVELTRIG, - icrl & APIC_VECTOR_MASK); - return 0; + logid_index = cluster + __ffs(bitmap); + + if (apic_x2apic_mode(source)) { + l1_physical_id = logid_index; + } else { + u32 *avic_logical_id_table = + page_address(kvm_svm->avic_logical_id_table_page); + + u32 logid_entry = avic_logical_id_table[logid_index]; + + if (WARN_ON_ONCE(index != logid_index)) + return -EINVAL; + + /* guest bug: non existing/reserved logical destination */ + if (unlikely(!(logid_entry & AVIC_LOGICAL_ID_ENTRY_VALID_MASK))) + return 0; + + l1_physical_id = logid_entry & + AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK; + } } - return -EINVAL; + target_vcpu = kvm_get_vcpu_by_id(kvm, l1_physical_id); + if (unlikely(!target_vcpu)) + /* guest bug: non existing vCPU is a target of this IPI*/ + return 0; + + target_vcpu->arch.apic->irr_pending = true; + svm_complete_interrupt_delivery(target_vcpu, + icrl & APIC_MODE_MASK, + icrl & APIC_INT_LEVELTRIG, + icrl & APIC_VECTOR_MASK); + return 0; } static void avic_kick_target_vcpus(struct kvm *kvm, struct kvm_lapic *source, @@ -508,35 +541,6 @@ static int avic_handle_ldr_update(struct kvm_vcpu *vcpu) return ret; } -static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu) -{ - u64 *old, *new; - struct vcpu_svm *svm = to_svm(vcpu); - u32 id = kvm_xapic_id(vcpu->arch.apic); - - if (vcpu->vcpu_id == id) - return 0; - - old = avic_get_physical_id_entry(vcpu, vcpu->vcpu_id); - new = avic_get_physical_id_entry(vcpu, id); - if (!new || !old) - return 1; - - /* We need to move physical_id_entry to new offset */ - *new = *old; - *old = 0ULL; - to_svm(vcpu)->avic_physical_id_cache = new; - - /* - * Also update the guest physical APIC ID in the logical - * APIC ID table entry if already setup the LDR. - */ - if (svm->ldr_reg) - avic_handle_ldr_update(vcpu); - - return 0; -} - static void avic_handle_dfr_update(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -555,10 +559,6 @@ static int avic_unaccel_trap_write(struct kvm_vcpu *vcpu) AVIC_UNACCEL_ACCESS_OFFSET_MASK; switch (offset) { - case APIC_ID: - if (avic_handle_apic_id_update(vcpu)) - return 0; - break; case APIC_LDR: if (avic_handle_ldr_update(vcpu)) return 0; @@ -650,8 +650,6 @@ int avic_init_vcpu(struct vcpu_svm *svm) void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu) { - if (avic_handle_apic_id_update(vcpu) != 0) - return; avic_handle_dfr_update(vcpu); avic_handle_ldr_update(vcpu); } @@ -910,7 +908,9 @@ bool avic_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason) BIT(APICV_INHIBIT_REASON_PIT_REINJ) | BIT(APICV_INHIBIT_REASON_X2APIC) | BIT(APICV_INHIBIT_REASON_BLOCKIRQ) | - BIT(APICV_INHIBIT_REASON_SEV); + BIT(APICV_INHIBIT_REASON_SEV) | + BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) | + BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED); return supported & BIT(reason); } @@ -946,7 +946,7 @@ out: return ret; } -void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { u64 entry; int h_physical_id = kvm_cpu_get_apicid(cpu); @@ -978,7 +978,7 @@ void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, true); } -void __avic_vcpu_put(struct kvm_vcpu *vcpu) +void avic_vcpu_put(struct kvm_vcpu *vcpu) { u64 entry; struct vcpu_svm *svm = to_svm(vcpu); @@ -997,25 +997,6 @@ void __avic_vcpu_put(struct kvm_vcpu *vcpu) WRITE_ONCE(*(svm->avic_physical_id_cache), entry); } -static void avic_vcpu_load(struct kvm_vcpu *vcpu) -{ - int cpu = get_cpu(); - - WARN_ON(cpu != vcpu->cpu); - - __avic_vcpu_load(vcpu, cpu); - - put_cpu(); -} - -static void avic_vcpu_put(struct kvm_vcpu *vcpu) -{ - preempt_disable(); - - __avic_vcpu_put(vcpu); - - preempt_enable(); -} void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) { @@ -1042,7 +1023,7 @@ void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) vmcb_mark_dirty(vmcb, VMCB_AVIC); if (activated) - avic_vcpu_load(vcpu); + avic_vcpu_load(vcpu, vcpu->cpu); else avic_vcpu_put(vcpu); @@ -1075,5 +1056,5 @@ void avic_vcpu_unblocking(struct kvm_vcpu *vcpu) if (!kvm_vcpu_apicv_active(vcpu)) return; - avic_vcpu_load(vcpu); + avic_vcpu_load(vcpu, vcpu->cpu); } diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 3361258640a2..ba7cd26f438f 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -616,6 +616,8 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm) struct kvm_vcpu *vcpu = &svm->vcpu; struct vmcb *vmcb01 = svm->vmcb01.ptr; struct vmcb *vmcb02 = svm->nested.vmcb02.ptr; + u32 pause_count12; + u32 pause_thresh12; /* * Filled at exit: exit_code, exit_code_hi, exit_info_1, exit_info_2, @@ -671,27 +673,25 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm) if (!nested_vmcb_needs_vls_intercept(svm)) vmcb02->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK; + pause_count12 = svm->pause_filter_enabled ? svm->nested.ctl.pause_filter_count : 0; + pause_thresh12 = svm->pause_threshold_enabled ? svm->nested.ctl.pause_filter_thresh : 0; if (kvm_pause_in_guest(svm->vcpu.kvm)) { - /* use guest values since host doesn't use them */ - vmcb02->control.pause_filter_count = - svm->pause_filter_enabled ? - svm->nested.ctl.pause_filter_count : 0; + /* use guest values since host doesn't intercept PAUSE */ + vmcb02->control.pause_filter_count = pause_count12; + vmcb02->control.pause_filter_thresh = pause_thresh12; - vmcb02->control.pause_filter_thresh = - svm->pause_threshold_enabled ? - svm->nested.ctl.pause_filter_thresh : 0; - - } else if (!vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_PAUSE)) { - /* use host values when guest doesn't use them */ + } else { + /* start from host values otherwise */ vmcb02->control.pause_filter_count = vmcb01->control.pause_filter_count; vmcb02->control.pause_filter_thresh = vmcb01->control.pause_filter_thresh; - } else { - /* - * Intercept every PAUSE otherwise and - * ignore both host and guest values - */ - vmcb02->control.pause_filter_count = 0; - vmcb02->control.pause_filter_thresh = 0; + + /* ... but ensure filtering is disabled if so requested. */ + if (vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_PAUSE)) { + if (!pause_count12) + vmcb02->control.pause_filter_count = 0; + if (!pause_thresh12) + vmcb02->control.pause_filter_thresh = 0; + } } nested_svm_transition_tlb_flush(vcpu); @@ -951,8 +951,11 @@ int nested_svm_vmexit(struct vcpu_svm *svm) vmcb12->control.event_inj = svm->nested.ctl.event_inj; vmcb12->control.event_inj_err = svm->nested.ctl.event_inj_err; - if (!kvm_pause_in_guest(vcpu->kvm) && vmcb02->control.pause_filter_count) + if (!kvm_pause_in_guest(vcpu->kvm)) { vmcb01->control.pause_filter_count = vmcb02->control.pause_filter_count; + vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS); + + } nested_svm_copy_common_state(svm->nested.vmcb02.ptr, svm->vmcb01.ptr); diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 51fd985cf21d..0c240ed04f96 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -844,7 +844,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr, /* If source buffer is not aligned then use an intermediate buffer */ if (!IS_ALIGNED((unsigned long)vaddr, 16)) { - src_tpage = alloc_page(GFP_KERNEL); + src_tpage = alloc_page(GFP_KERNEL_ACCOUNT); if (!src_tpage) return -ENOMEM; @@ -865,7 +865,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr, if (!IS_ALIGNED((unsigned long)dst_vaddr, 16) || !IS_ALIGNED(size, 16)) { int dst_offset; - dst_tpage = alloc_page(GFP_KERNEL); + dst_tpage = alloc_page(GFP_KERNEL_ACCOUNT); if (!dst_tpage) { ret = -ENOMEM; goto e_free; @@ -1665,19 +1665,24 @@ static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm) { struct kvm_sev_info *dst = &to_kvm_svm(dst_kvm)->sev_info; struct kvm_sev_info *src = &to_kvm_svm(src_kvm)->sev_info; + struct kvm_vcpu *dst_vcpu, *src_vcpu; + struct vcpu_svm *dst_svm, *src_svm; struct kvm_sev_info *mirror; + unsigned long i; dst->active = true; dst->asid = src->asid; dst->handle = src->handle; dst->pages_locked = src->pages_locked; dst->enc_context_owner = src->enc_context_owner; + dst->es_active = src->es_active; src->asid = 0; src->active = false; src->handle = 0; src->pages_locked = 0; src->enc_context_owner = NULL; + src->es_active = false; list_cut_before(&dst->regions_list, &src->regions_list, &src->regions_list); @@ -1704,26 +1709,21 @@ static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm) list_del(&src->mirror_entry); list_add_tail(&dst->mirror_entry, &owner_sev_info->mirror_vms); } -} -static int sev_es_migrate_from(struct kvm *dst, struct kvm *src) -{ - unsigned long i; - struct kvm_vcpu *dst_vcpu, *src_vcpu; - struct vcpu_svm *dst_svm, *src_svm; + kvm_for_each_vcpu(i, dst_vcpu, dst_kvm) { + dst_svm = to_svm(dst_vcpu); - if (atomic_read(&src->online_vcpus) != atomic_read(&dst->online_vcpus)) - return -EINVAL; + sev_init_vmcb(dst_svm); - kvm_for_each_vcpu(i, src_vcpu, src) { - if (!src_vcpu->arch.guest_state_protected) - return -EINVAL; - } + if (!dst->es_active) + continue; - kvm_for_each_vcpu(i, src_vcpu, src) { + /* + * Note, the source is not required to have the same number of + * vCPUs as the destination when migrating a vanilla SEV VM. + */ + src_vcpu = kvm_get_vcpu(dst_kvm, i); src_svm = to_svm(src_vcpu); - dst_vcpu = kvm_get_vcpu(dst, i); - dst_svm = to_svm(dst_vcpu); /* * Transfer VMSA and GHCB state to the destination. Nullify and @@ -1740,8 +1740,23 @@ static int sev_es_migrate_from(struct kvm *dst, struct kvm *src) src_svm->vmcb->control.vmsa_pa = INVALID_PAGE; src_vcpu->arch.guest_state_protected = false; } - to_kvm_svm(src)->sev_info.es_active = false; - to_kvm_svm(dst)->sev_info.es_active = true; +} + +static int sev_check_source_vcpus(struct kvm *dst, struct kvm *src) +{ + struct kvm_vcpu *src_vcpu; + unsigned long i; + + if (!sev_es_guest(src)) + return 0; + + if (atomic_read(&src->online_vcpus) != atomic_read(&dst->online_vcpus)) + return -EINVAL; + + kvm_for_each_vcpu(i, src_vcpu, src) { + if (!src_vcpu->arch.guest_state_protected) + return -EINVAL; + } return 0; } @@ -1789,11 +1804,9 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd) if (ret) goto out_dst_vcpu; - if (sev_es_guest(source_kvm)) { - ret = sev_es_migrate_from(kvm, source_kvm); - if (ret) - goto out_source_vcpu; - } + ret = sev_check_source_vcpus(kvm, source_kvm); + if (ret) + goto out_source_vcpu; sev_migrate_from(kvm, source_kvm); kvm_vm_dead(source_kvm); @@ -2914,7 +2927,7 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in) count, in); } -void sev_es_init_vmcb(struct vcpu_svm *svm) +static void sev_es_init_vmcb(struct vcpu_svm *svm) { struct kvm_vcpu *vcpu = &svm->vcpu; @@ -2967,6 +2980,15 @@ void sev_es_init_vmcb(struct vcpu_svm *svm) } } +void sev_init_vmcb(struct vcpu_svm *svm) +{ + svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE; + clr_exception_intercept(svm, UD_VECTOR); + + if (sev_es_guest(svm->vcpu.kvm)) + sev_es_init_vmcb(svm); +} + void sev_es_vcpu_reset(struct vcpu_svm *svm) { /* diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 1dc02cdf6960..44bbf25dfeb9 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -921,7 +921,7 @@ static void grow_ple_window(struct kvm_vcpu *vcpu) struct vmcb_control_area *control = &svm->vmcb->control; int old = control->pause_filter_count; - if (kvm_pause_in_guest(vcpu->kvm) || !old) + if (kvm_pause_in_guest(vcpu->kvm)) return; control->pause_filter_count = __grow_ple_window(old, @@ -942,7 +942,7 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu) struct vmcb_control_area *control = &svm->vmcb->control; int old = control->pause_filter_count; - if (kvm_pause_in_guest(vcpu->kvm) || !old) + if (kvm_pause_in_guest(vcpu->kvm)) return; control->pause_filter_count = @@ -1212,15 +1212,8 @@ static void init_vmcb(struct kvm_vcpu *vcpu) svm->vmcb->control.int_ctl |= V_GIF_ENABLE_MASK; } - if (sev_guest(vcpu->kvm)) { - svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE; - clr_exception_intercept(svm, UD_VECTOR); - - if (sev_es_guest(vcpu->kvm)) { - /* Perform SEV-ES specific VMCB updates */ - sev_es_init_vmcb(svm); - } - } + if (sev_guest(vcpu->kvm)) + sev_init_vmcb(svm); svm_hv_init_vmcb(vmcb); init_vmcb_after_set_cpuid(vcpu); @@ -1400,13 +1393,13 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) indirect_branch_prediction_barrier(); } if (kvm_vcpu_apicv_active(vcpu)) - __avic_vcpu_load(vcpu, cpu); + avic_vcpu_load(vcpu, cpu); } static void svm_vcpu_put(struct kvm_vcpu *vcpu) { if (kvm_vcpu_apicv_active(vcpu)) - __avic_vcpu_put(vcpu); + avic_vcpu_put(vcpu); svm_prepare_host_switch(vcpu); diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 500348c1cb35..9223ac100ef5 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -610,8 +610,8 @@ void avic_init_vmcb(struct vcpu_svm *svm, struct vmcb *vmcb); int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu); int avic_unaccelerated_access_interception(struct kvm_vcpu *vcpu); int avic_init_vcpu(struct vcpu_svm *svm); -void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu); -void __avic_vcpu_put(struct kvm_vcpu *vcpu); +void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu); +void avic_vcpu_put(struct kvm_vcpu *vcpu); void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu); void avic_set_virtual_apic_mode(struct kvm_vcpu *vcpu); void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu); @@ -649,10 +649,10 @@ void __init sev_set_cpu_caps(void); void __init sev_hardware_setup(void); void sev_hardware_unsetup(void); int sev_cpu_init(struct svm_cpu_data *sd); +void sev_init_vmcb(struct vcpu_svm *svm); void sev_free_vcpu(struct kvm_vcpu *vcpu); int sev_handle_vmgexit(struct kvm_vcpu *vcpu); int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in); -void sev_es_init_vmcb(struct vcpu_svm *svm); void sev_es_vcpu_reset(struct vcpu_svm *svm); void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector); void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 9bd86ecccdab..3a919e49129b 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -229,6 +229,9 @@ static const struct { #define L1D_CACHE_ORDER 4 static void *vmx_l1d_flush_pages; +/* Control for disabling CPU Fill buffer clear */ +static bool __read_mostly vmx_fb_clear_ctrl_available; + static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf) { struct page *page; @@ -360,6 +363,60 @@ static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp) return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option); } +static void vmx_setup_fb_clear_ctrl(void) +{ + u64 msr; + + if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES) && + !boot_cpu_has_bug(X86_BUG_MDS) && + !boot_cpu_has_bug(X86_BUG_TAA)) { + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr); + if (msr & ARCH_CAP_FB_CLEAR_CTRL) + vmx_fb_clear_ctrl_available = true; + } +} + +static __always_inline void vmx_disable_fb_clear(struct vcpu_vmx *vmx) +{ + u64 msr; + + if (!vmx->disable_fb_clear) + return; + + rdmsrl(MSR_IA32_MCU_OPT_CTRL, msr); + msr |= FB_CLEAR_DIS; + wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr); + /* Cache the MSR value to avoid reading it later */ + vmx->msr_ia32_mcu_opt_ctrl = msr; +} + +static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx) +{ + if (!vmx->disable_fb_clear) + return; + + vmx->msr_ia32_mcu_opt_ctrl &= ~FB_CLEAR_DIS; + wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl); +} + +static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) +{ + vmx->disable_fb_clear = vmx_fb_clear_ctrl_available; + + /* + * If guest will not execute VERW, there is no need to set FB_CLEAR_DIS + * at VMEntry. Skip the MSR read/write when a guest has no use case to + * execute VERW. + */ + if ((vcpu->arch.arch_capabilities & ARCH_CAP_FB_CLEAR) || + ((vcpu->arch.arch_capabilities & ARCH_CAP_MDS_NO) && + (vcpu->arch.arch_capabilities & ARCH_CAP_TAA_NO) && + (vcpu->arch.arch_capabilities & ARCH_CAP_PSDP_NO) && + (vcpu->arch.arch_capabilities & ARCH_CAP_FBSDP_NO) && + (vcpu->arch.arch_capabilities & ARCH_CAP_SBDR_SSDP_NO))) + vmx->disable_fb_clear = false; +} + static const struct kernel_param_ops vmentry_l1d_flush_ops = { .set = vmentry_l1d_flush_set, .get = vmentry_l1d_flush_get, @@ -2252,6 +2309,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) ret = kvm_set_msr_common(vcpu, msr_info); } + /* FB_CLEAR may have changed, also update the FB_CLEAR_DIS behavior */ + if (msr_index == MSR_IA32_ARCH_CAPABILITIES) + vmx_update_fb_clear_dis(vcpu, vmx); + return ret; } @@ -4553,6 +4614,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); vpid_sync_context(vmx->vpid); + + vmx_update_fb_clear_dis(vcpu, vmx); } static void vmx_enable_irq_window(struct kvm_vcpu *vcpu) @@ -6772,6 +6835,11 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, vmx_l1d_flush(vcpu); else if (static_branch_unlikely(&mds_user_clear)) mds_clear_cpu_buffers(); + else if (static_branch_unlikely(&mmio_stale_data_clear) && + kvm_arch_has_assigned_device(vcpu->kvm)) + mds_clear_cpu_buffers(); + + vmx_disable_fb_clear(vmx); if (vcpu->arch.cr2 != native_read_cr2()) native_write_cr2(vcpu->arch.cr2); @@ -6781,6 +6849,8 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, vcpu->arch.cr2 = native_read_cr2(); + vmx_enable_fb_clear(vmx); + guest_state_exit_irqoff(); } @@ -7709,7 +7779,9 @@ static bool vmx_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason) ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) | BIT(APICV_INHIBIT_REASON_ABSENT) | BIT(APICV_INHIBIT_REASON_HYPERV) | - BIT(APICV_INHIBIT_REASON_BLOCKIRQ); + BIT(APICV_INHIBIT_REASON_BLOCKIRQ) | + BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) | + BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED); return supported & BIT(reason); } @@ -8212,6 +8284,8 @@ static int __init vmx_init(void) return r; } + vmx_setup_fb_clear_ctrl(); + for_each_possible_cpu(cpu) { INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu)); diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index b98c7e96697a..8d2342ede0c5 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -348,6 +348,8 @@ struct vcpu_vmx { u64 msr_ia32_feature_control_valid_bits; /* SGX Launch Control public key hash */ u64 msr_ia32_sgxlepubkeyhash[4]; + u64 msr_ia32_mcu_opt_ctrl; + bool disable_fb_clear; struct pt_desc pt_desc; struct lbr_desc lbr_desc; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 03fbfbbec460..1910e1e78b15 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1617,6 +1617,9 @@ static u64 kvm_get_arch_capabilities(void) */ } + /* Guests don't need to know "Fill buffer clear control" exists */ + data &= ~ARCH_CAP_FB_CLEAR_CTRL; + return data; } @@ -9850,6 +9853,7 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu) return; down_read(&vcpu->kvm->arch.apicv_update_lock); + preempt_disable(); activate = kvm_vcpu_apicv_activated(vcpu); @@ -9870,6 +9874,7 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu) kvm_make_request(KVM_REQ_EVENT, vcpu); out: + preempt_enable(); up_read(&vcpu->kvm->arch.apicv_update_lock); } EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv); |