summaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/mmu/mmu.c
diff options
context:
space:
mode:
authorSean Christopherson <seanjc@google.com>2024-08-31 02:15:29 +0200
committerSean Christopherson <seanjc@google.com>2024-09-10 05:16:29 +0200
commitb299c273c06f005976cdc1b9e9299d492527607e (patch)
treea30a6c3b5eb8f31d70cc476b7db4134d001de196 /arch/x86/kvm/mmu/mmu.c
parentKVM: x86/mmu: Always walk guest PTEs with WRITE access when unprotecting (diff)
downloadlinux-b299c273c06f005976cdc1b9e9299d492527607e.tar.xz
linux-b299c273c06f005976cdc1b9e9299d492527607e.zip
KVM: x86/mmu: Move event re-injection unprotect+retry into common path
Move the event re-injection unprotect+retry logic into kvm_mmu_write_protect_fault(), i.e. unprotect and retry if and only if the #PF actually hit a write-protected gfn. Note, there is a small possibility that the gfn was unprotected by a different tasking between hitting the #PF and acquiring mmu_lock, but in that case, KVM will resume the guest immediately anyways because KVM will treat the fault as spurious. As a bonus, unprotecting _after_ handling the page fault also addresses the case where the installing a SPTE to handle fault encounters a shadowed PTE, i.e. *creates* a read-only SPTE. Opportunstically add a comment explaining what on earth the intent of the code is, as based on the changelog from commit 577bdc496614 ("KVM: Avoid instruction emulation when event delivery is pending"). Link: https://lore.kernel.org/r/20240831001538.336683-15-seanjc@google.com Signed-off-by: Sean Christopherson <seanjc@google.com>
Diffstat (limited to 'arch/x86/kvm/mmu/mmu.c')
-rw-r--r--arch/x86/kvm/mmu/mmu.c30
1 files changed, 9 insertions, 21 deletions
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 937fa9a82a43..195ba7430720 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -2743,23 +2743,6 @@ bool kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa)
return r;
}
-static int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
-{
- gpa_t gpa;
- int r;
-
- if (vcpu->arch.mmu->root_role.direct)
- return 0;
-
- gpa = kvm_mmu_gva_to_gpa_write(vcpu, gva, NULL);
- if (gpa == INVALID_GPA)
- return 0;
-
- r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
-
- return r;
-}
-
static void kvm_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
{
trace_kvm_mmu_unsync_page(sp);
@@ -4630,8 +4613,6 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
if (!flags) {
trace_kvm_page_fault(vcpu, fault_address, error_code);
- if (kvm_event_needs_reinjection(vcpu))
- kvm_mmu_unprotect_page_virt(vcpu, fault_address);
r = kvm_mmu_page_fault(vcpu, fault_address, error_code, insn,
insn_len);
} else if (flags & KVM_PV_REASON_PAGE_NOT_PRESENT) {
@@ -6037,8 +6018,15 @@ static int kvm_mmu_write_protect_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
* Note, this code also applies to Intel CPUs, even though it is *very*
* unlikely that an L1 will share its page tables (IA32/PAE/paging64
* format) with L2's page tables (EPT format).
- */
- if (direct && is_write_to_guest_page_table(error_code) &&
+ *
+ * For indirect MMUs, i.e. if KVM is shadowing the current MMU, try to
+ * unprotect the gfn and retry if an event is awaiting reinjection. If
+ * KVM emulates multiple instructions before completing event injection,
+ * the event could be delayed beyond what is architecturally allowed,
+ * e.g. KVM could inject an IRQ after the TPR has been raised.
+ */
+ if (((direct && is_write_to_guest_page_table(error_code)) ||
+ (!direct && kvm_event_needs_reinjection(vcpu))) &&
kvm_mmu_unprotect_gfn_and_retry(vcpu, cr2_or_gpa))
return RET_PF_RETRY;