diff options
author | Sean Christopherson <seanjc@google.com> | 2024-10-11 04:10:49 +0200 |
---|---|---|
committer | Sean Christopherson <seanjc@google.com> | 2024-10-30 23:25:40 +0100 |
commit | 2ebbe0308c29287914eeabe6ed83a6e21f9962bc (patch) | |
tree | afdbaf2f641a0ae230a1fee6b0b22ef327f02833 | |
parent | KVM: x86/mmu: Set Dirty bit for new SPTEs, even if _hardware_ A/D bits are di... (diff) | |
download | linux-2ebbe0308c29287914eeabe6ed83a6e21f9962bc.tar.xz linux-2ebbe0308c29287914eeabe6ed83a6e21f9962bc.zip |
KVM: Allow arch code to elide TLB flushes when aging a young page
Add a Kconfig to allow architectures to opt-out of a TLB flush when a
young page is aged, as invalidating TLB entries is not functionally
required on most KVM-supported architectures. Stale TLB entries can
result in false negatives and theoretically lead to suboptimal reclaim,
but in practice all observations have been that the performance gained by
skipping TLB flushes outweighs any performance lost by reclaiming hot
pages.
E.g. the primary MMUs for x86 RISC-V, s390, and PPC Book3S elide the TLB
flush for ptep_clear_flush_young(), and arm64's MMU skips the trailing DSB
that's required for ordering (presumably because there are optimizations
related to eliding other TLB flushes when doing make-before-break).
Link: https://lore.kernel.org/r/20241011021051.1557902-18-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
-rw-r--r-- | virt/kvm/Kconfig | 4 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 20 |
2 files changed, 10 insertions, 14 deletions
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index fd6a3010afa8..54e959e7d68f 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -100,6 +100,10 @@ config KVM_GENERIC_MMU_NOTIFIER select MMU_NOTIFIER bool +config KVM_ELIDE_TLB_FLUSH_IF_YOUNG + depends on KVM_GENERIC_MMU_NOTIFIER + bool + config KVM_GENERIC_MEMORY_ATTRIBUTES depends on KVM_GENERIC_MMU_NOTIFIER bool diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b1b10dc408a0..83b525d16b61 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -630,7 +630,8 @@ mmu_unlock: static __always_inline int kvm_handle_hva_range(struct mmu_notifier *mn, unsigned long start, unsigned long end, - gfn_handler_t handler) + gfn_handler_t handler, + bool flush_on_ret) { struct kvm *kvm = mmu_notifier_to_kvm(mn); const struct kvm_mmu_notifier_range range = { @@ -638,7 +639,7 @@ static __always_inline int kvm_handle_hva_range(struct mmu_notifier *mn, .end = end, .handler = handler, .on_lock = (void *)kvm_null_fn, - .flush_on_ret = true, + .flush_on_ret = flush_on_ret, .may_block = false, }; @@ -650,17 +651,7 @@ static __always_inline int kvm_handle_hva_range_no_flush(struct mmu_notifier *mn unsigned long end, gfn_handler_t handler) { - struct kvm *kvm = mmu_notifier_to_kvm(mn); - const struct kvm_mmu_notifier_range range = { - .start = start, - .end = end, - .handler = handler, - .on_lock = (void *)kvm_null_fn, - .flush_on_ret = false, - .may_block = false, - }; - - return __kvm_handle_hva_range(kvm, &range).ret; + return kvm_handle_hva_range(mn, start, end, handler, false); } void kvm_mmu_invalidate_begin(struct kvm *kvm) @@ -825,7 +816,8 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, { trace_kvm_age_hva(start, end); - return kvm_handle_hva_range(mn, start, end, kvm_age_gfn); + return kvm_handle_hva_range(mn, start, end, kvm_age_gfn, + !IS_ENABLED(CONFIG_KVM_ELIDE_TLB_FLUSH_IF_YOUNG)); } static int kvm_mmu_notifier_clear_young(struct mmu_notifier *mn, |