From c4fd527f52ecb135018655c7f56f87800872c5bc Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 9 Feb 2020 11:58:57 +0100 Subject: powerpc/kvm: no need to check return value of debugfs_create functions When calling debugfs functions, there is no need to ever check the return value. The function can work or not, but the code logic should never do something different based on this. Because of this cleanup, we get to remove a few fields in struct kvm_arch that are now unused. Signed-off-by: Greg Kroah-Hartman [mpe: Fix build error in kvm/timing.c, adapt kvmppc_remove_cpu_debugfs()] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200209105901.1620958-2-gregkh@linuxfoundation.org --- arch/powerpc/kvm/book3s_64_mmu_hv.c | 5 ++--- arch/powerpc/kvm/book3s_64_mmu_radix.c | 5 ++--- arch/powerpc/kvm/book3s_hv.c | 9 ++------- arch/powerpc/kvm/timing.c | 17 ++++------------- 4 files changed, 10 insertions(+), 26 deletions(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 6c372f5c61b6..8b4eac0c9dcd 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -2138,9 +2138,8 @@ static const struct file_operations debugfs_htab_fops = { void kvmppc_mmu_debugfs_init(struct kvm *kvm) { - kvm->arch.htab_dentry = debugfs_create_file("htab", 0400, - kvm->arch.debugfs_dir, kvm, - &debugfs_htab_fops); + debugfs_create_file("htab", 0400, kvm->arch.debugfs_dir, kvm, + &debugfs_htab_fops); } void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu) diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 803940d79b73..1d75ed684b53 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -1376,9 +1376,8 @@ static const struct file_operations debugfs_radix_fops = { void kvmhv_radix_debugfs_init(struct kvm *kvm) { - kvm->arch.radix_dentry = debugfs_create_file("radix", 0400, - kvm->arch.debugfs_dir, kvm, - &debugfs_radix_fops); + debugfs_create_file("radix", 0400, kvm->arch.debugfs_dir, kvm, + &debugfs_radix_fops); } int kvmppc_radix_init(void) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 2cefd071b848..33be4d93248a 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2258,14 +2258,9 @@ static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id) struct kvm *kvm = vcpu->kvm; snprintf(buf, sizeof(buf), "vcpu%u", id); - if (IS_ERR_OR_NULL(kvm->arch.debugfs_dir)) - return; vcpu->arch.debugfs_dir = debugfs_create_dir(buf, kvm->arch.debugfs_dir); - if (IS_ERR_OR_NULL(vcpu->arch.debugfs_dir)) - return; - vcpu->arch.debugfs_timings = - debugfs_create_file("timings", 0444, vcpu->arch.debugfs_dir, - vcpu, &debugfs_timings_ops); + debugfs_create_file("timings", 0444, vcpu->arch.debugfs_dir, vcpu, + &debugfs_timings_ops); } #else /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */ diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c index bfe4f106cffc..ba56a5cbba97 100644 --- a/arch/powerpc/kvm/timing.c +++ b/arch/powerpc/kvm/timing.c @@ -211,23 +211,14 @@ void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id) snprintf(dbg_fname, sizeof(dbg_fname), "vm%u_vcpu%u_timing", current->pid, id); - debugfs_file = debugfs_create_file(dbg_fname, 0666, - kvm_debugfs_dir, vcpu, - &kvmppc_exit_timing_fops); - - if (!debugfs_file) { - printk(KERN_ERR"%s: error creating debugfs file %s\n", - __func__, dbg_fname); - return; - } + debugfs_file = debugfs_create_file(dbg_fname, 0666, kvm_debugfs_dir, + vcpu, &kvmppc_exit_timing_fops); vcpu->arch.debugfs_exit_timing = debugfs_file; } void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu) { - if (vcpu->arch.debugfs_exit_timing) { - debugfs_remove(vcpu->arch.debugfs_exit_timing); - vcpu->arch.debugfs_exit_timing = NULL; - } + debugfs_remove(vcpu->arch.debugfs_exit_timing); + vcpu->arch.debugfs_exit_timing = NULL; } -- cgit v1.2.3 From 82307e676f9d885871f121e4921b12905a53397d Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 18 Feb 2020 13:07:18 -0800 Subject: KVM: PPC: Move memslot memory allocation into prepare_memory_region() Allocate the rmap array during kvm_arch_prepare_memory_region() to pave the way for removing kvm_arch_create_memslot() altogether. Moving PPC's memory allocation only changes the order of kernel memory allocations between PPC and common KVM code. No functional change intended. Acked-by: Paul Mackerras Reviewed-by: Peter Xu Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/powerpc/include/asm/kvm_ppc.h | 11 ++++------- arch/powerpc/kvm/book3s.c | 12 ++++-------- arch/powerpc/kvm/book3s_hv.c | 23 +++++++++++------------ arch/powerpc/kvm/book3s_pr.c | 11 ++--------- arch/powerpc/kvm/booke.c | 9 ++------- arch/powerpc/kvm/powerpc.c | 4 ++-- 6 files changed, 25 insertions(+), 45 deletions(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index bc2494e5710a..d162649430ba 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -202,12 +202,10 @@ extern void kvmppc_core_destroy_vm(struct kvm *kvm); extern void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont); -extern int kvmppc_core_create_memslot(struct kvm *kvm, - struct kvm_memory_slot *slot, - unsigned long npages); extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - const struct kvm_userspace_memory_region *mem); + const struct kvm_userspace_memory_region *mem, + enum kvm_mr_change change); extern void kvmppc_core_commit_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old, @@ -280,7 +278,8 @@ struct kvmppc_ops { void (*flush_memslot)(struct kvm *kvm, struct kvm_memory_slot *memslot); int (*prepare_memory_region)(struct kvm *kvm, struct kvm_memory_slot *memslot, - const struct kvm_userspace_memory_region *mem); + const struct kvm_userspace_memory_region *mem, + enum kvm_mr_change change); void (*commit_memory_region)(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old, @@ -294,8 +293,6 @@ struct kvmppc_ops { void (*mmu_destroy)(struct kvm_vcpu *vcpu); void (*free_memslot)(struct kvm_memory_slot *free, struct kvm_memory_slot *dont); - int (*create_memslot)(struct kvm_memory_slot *slot, - unsigned long npages); int (*init_vm)(struct kvm *kvm); void (*destroy_vm)(struct kvm *kvm); int (*get_smmu_info)(struct kvm *kvm, struct kvm_ppc_smmu_info *info); diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index d07a8e12fa15..e9149a815806 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -810,12 +810,6 @@ void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, kvm->arch.kvm_ops->free_memslot(free, dont); } -int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, - unsigned long npages) -{ - return kvm->arch.kvm_ops->create_memslot(slot, npages); -} - void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot) { kvm->arch.kvm_ops->flush_memslot(kvm, memslot); @@ -823,9 +817,11 @@ void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot) int kvmppc_core_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - const struct kvm_userspace_memory_region *mem) + const struct kvm_userspace_memory_region *mem, + enum kvm_mr_change change) { - return kvm->arch.kvm_ops->prepare_memory_region(kvm, memslot, mem); + return kvm->arch.kvm_ops->prepare_memory_region(kvm, memslot, mem, + change); } void kvmppc_core_commit_memory_region(struct kvm *kvm, diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 2cefd071b848..460f31f94337 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -4456,20 +4456,20 @@ static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free, } } -static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot, - unsigned long npages) +static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm, + struct kvm_memory_slot *slot, + const struct kvm_userspace_memory_region *mem, + enum kvm_mr_change change) { - slot->arch.rmap = vzalloc(array_size(npages, sizeof(*slot->arch.rmap))); - if (!slot->arch.rmap) - return -ENOMEM; + unsigned long npages = mem->memory_size >> PAGE_SHIFT; - return 0; -} + if (change == KVM_MR_CREATE) { + slot->arch.rmap = vzalloc(array_size(npages, + sizeof(*slot->arch.rmap))); + if (!slot->arch.rmap) + return -ENOMEM; + } -static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm, - struct kvm_memory_slot *memslot, - const struct kvm_userspace_memory_region *mem) -{ return 0; } @@ -5528,7 +5528,6 @@ static struct kvmppc_ops kvm_ops_hv = { .set_spte_hva = kvm_set_spte_hva_hv, .mmu_destroy = kvmppc_mmu_destroy_hv, .free_memslot = kvmppc_core_free_memslot_hv, - .create_memslot = kvmppc_core_create_memslot_hv, .init_vm = kvmppc_core_init_vm_hv, .destroy_vm = kvmppc_core_destroy_vm_hv, .get_smmu_info = kvm_vm_ioctl_get_smmu_info_hv, diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 729a0f12a752..3077a0cd0e5e 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1927,7 +1927,8 @@ static void kvmppc_core_flush_memslot_pr(struct kvm *kvm, static int kvmppc_core_prepare_memory_region_pr(struct kvm *kvm, struct kvm_memory_slot *memslot, - const struct kvm_userspace_memory_region *mem) + const struct kvm_userspace_memory_region *mem, + enum kvm_mr_change change) { return 0; } @@ -1947,13 +1948,6 @@ static void kvmppc_core_free_memslot_pr(struct kvm_memory_slot *free, return; } -static int kvmppc_core_create_memslot_pr(struct kvm_memory_slot *slot, - unsigned long npages) -{ - return 0; -} - - #ifdef CONFIG_PPC64 static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm, struct kvm_ppc_smmu_info *info) @@ -2099,7 +2093,6 @@ static struct kvmppc_ops kvm_ops_pr = { .set_spte_hva = kvm_set_spte_hva_pr, .mmu_destroy = kvmppc_mmu_destroy_pr, .free_memslot = kvmppc_core_free_memslot_pr, - .create_memslot = kvmppc_core_create_memslot_pr, .init_vm = kvmppc_core_init_vm_pr, .destroy_vm = kvmppc_core_destroy_vm_pr, .get_smmu_info = kvm_vm_ioctl_get_smmu_info_pr, diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 7b27604adadf..44f50fc50976 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1776,15 +1776,10 @@ void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, { } -int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, - unsigned long npages) -{ - return 0; -} - int kvmppc_core_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - const struct kvm_userspace_memory_region *mem) + const struct kvm_userspace_memory_region *mem, + enum kvm_mr_change change) { return 0; } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 1af96fb5dc6f..4e0f7a77920d 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -694,7 +694,7 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, unsigned long npages) { - return kvmppc_core_create_memslot(kvm, slot, npages); + return 0; } int kvm_arch_prepare_memory_region(struct kvm *kvm, @@ -702,7 +702,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, enum kvm_mr_change change) { - return kvmppc_core_prepare_memory_region(kvm, memslot, mem); + return kvmppc_core_prepare_memory_region(kvm, memslot, mem, change); } void kvm_arch_commit_memory_region(struct kvm *kvm, -- cgit v1.2.3 From 414de7abbf809f046511269797d9f2310b88e036 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 18 Feb 2020 13:07:20 -0800 Subject: KVM: Drop kvm_arch_create_memslot() Remove kvm_arch_create_memslot() now that all arch implementations are effectively nops. Removing kvm_arch_create_memslot() eliminates the possibility for arch specific code to allocate memory prior to setting a memslot, which sets the stage for simplifying kvm_free_memslot(). Cc: Janosch Frank Acked-by: Christian Borntraeger Reviewed-by: Peter Xu Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/mips/kvm/mips.c | 6 ------ arch/powerpc/kvm/powerpc.c | 6 ------ arch/s390/kvm/kvm-s390.c | 6 ------ arch/x86/kvm/x86.c | 6 ------ include/linux/kvm_host.h | 2 -- virt/kvm/arm/mmu.c | 6 ------ virt/kvm/kvm_main.c | 21 +++++++-------------- 7 files changed, 7 insertions(+), 46 deletions(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 71244bf87c3a..b3243d87097f 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -188,12 +188,6 @@ long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, return -ENOIOCTLCMD; } -int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, - unsigned long npages) -{ - return 0; -} - void kvm_arch_flush_shadow_all(struct kvm *kvm) { /* Flush whole GPA */ diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 4e0f7a77920d..48abf1b9ad58 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -691,12 +691,6 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, kvmppc_core_free_memslot(kvm, free, dont); } -int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, - unsigned long npages) -{ - return 0; -} - int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, const struct kvm_userspace_memory_region *mem, diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index d7ff30e45589..6638024e440d 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -4507,12 +4507,6 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } -int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, - unsigned long npages) -{ - return 0; -} - /* Section: memory related */ int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 94ea7b914fc7..c23b08f48079 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9873,12 +9873,6 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, kvm_page_track_free_memslot(free, dont); } -int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, - unsigned long npages) -{ - return 0; -} - static int kvm_alloc_memslot_metadata(struct kvm_memory_slot *slot, unsigned long npages) { diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 7944ad6ac10b..8f47f6b48444 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -671,8 +671,6 @@ int __kvm_set_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem); void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont); -int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, - unsigned long npages); void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen); int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 19c961ac4e3c..1d97410b3470 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -2354,12 +2354,6 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, { } -int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, - unsigned long npages) -{ - return 0; -} - void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) { } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index af9eb59e6769..c4e80ad63e09 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1040,12 +1040,13 @@ int __kvm_set_memory_region(struct kvm *kvm, new.base_gfn = base_gfn; new.npages = npages; new.flags = mem->flags; + new.userspace_addr = mem->userspace_addr; if (npages) { if (!old.npages) change = KVM_MR_CREATE; else { /* Modify an existing slot. */ - if ((mem->userspace_addr != old.userspace_addr) || + if ((new.userspace_addr != old.userspace_addr) || (npages != old.npages) || ((new.flags ^ old.flags) & KVM_MEM_READONLY)) goto out; @@ -1080,22 +1081,14 @@ int __kvm_set_memory_region(struct kvm *kvm, } } - /* Free page dirty bitmap if unneeded */ - if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES)) - new.dirty_bitmap = NULL; - r = -ENOMEM; - if (change == KVM_MR_CREATE) { - new.userspace_addr = mem->userspace_addr; - if (kvm_arch_create_memslot(kvm, &new, npages)) - goto out; - } - - /* Allocate page dirty bitmap if needed */ - if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { + /* Allocate/free page dirty bitmap as needed */ + if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES)) + new.dirty_bitmap = NULL; + else if (!new.dirty_bitmap) { if (kvm_create_dirty_bitmap(&new) < 0) - goto out_free; + goto out; } slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL_ACCOUNT); -- cgit v1.2.3 From 9d4c197c0e94c372ceffd2ffc53a23518f301ed9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 18 Feb 2020 13:07:24 -0800 Subject: KVM: Drop "const" attribute from old memslot in commit_memory_region() Drop the "const" attribute from @old in kvm_arch_commit_memory_region() to allow arch specific code to free arch specific resources in the old memslot without having to cast away the attribute. Freeing resources in kvm_arch_commit_memory_region() paves the way for simplifying kvm_free_memslot() by eliminating the last usage of its @dont param. Reviewed-by: Peter Xu Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/mips/kvm/mips.c | 2 +- arch/powerpc/kvm/powerpc.c | 2 +- arch/s390/kvm/kvm-s390.c | 2 +- arch/x86/kvm/x86.c | 2 +- include/linux/kvm_host.h | 2 +- virt/kvm/arm/mmu.c | 2 +- virt/kvm/kvm_main.c | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index b3243d87097f..c7536aa341d2 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -224,7 +224,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, void kvm_arch_commit_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, - const struct kvm_memory_slot *old, + struct kvm_memory_slot *old, const struct kvm_memory_slot *new, enum kvm_mr_change change) { diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 48abf1b9ad58..768c4a9269be 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -701,7 +701,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, void kvm_arch_commit_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, - const struct kvm_memory_slot *old, + struct kvm_memory_slot *old, const struct kvm_memory_slot *new, enum kvm_mr_change change) { diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 6638024e440d..78f92c005f93 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -4532,7 +4532,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, void kvm_arch_commit_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, - const struct kvm_memory_slot *old, + struct kvm_memory_slot *old, const struct kvm_memory_slot *new, enum kvm_mr_change change) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c23b08f48079..a3c92c5077d0 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10024,7 +10024,7 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm, void kvm_arch_commit_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, - const struct kvm_memory_slot *old, + struct kvm_memory_slot *old, const struct kvm_memory_slot *new, enum kvm_mr_change change) { diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8f47f6b48444..7827156ec1c9 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -678,7 +678,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, enum kvm_mr_change change); void kvm_arch_commit_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, - const struct kvm_memory_slot *old, + struct kvm_memory_slot *old, const struct kvm_memory_slot *new, enum kvm_mr_change change); bool kvm_largepages_enabled(void); diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 1d97410b3470..97b87037aff3 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -2251,7 +2251,7 @@ out: void kvm_arch_commit_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, - const struct kvm_memory_slot *old, + struct kvm_memory_slot *old, const struct kvm_memory_slot *new, enum kvm_mr_change change) { diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 882ea3b70ec0..10e9456da6eb 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -985,7 +985,7 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, static int kvm_set_memslot(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, - const struct kvm_memory_slot *old, + struct kvm_memory_slot *old, struct kvm_memory_slot *new, int as_id, enum kvm_mr_change change) { -- cgit v1.2.3 From e96c81ee89d80e1a0fe50a0e9be40c1b77e14aaa Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 18 Feb 2020 13:07:27 -0800 Subject: KVM: Simplify kvm_free_memslot() and all its descendents Now that all callers of kvm_free_memslot() pass NULL for @dont, remove the param from the top-level routine and all arch's implementations. No functional change intended. Tested-by: Christoffer Dall Reviewed-by: Peter Xu Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/mips/include/asm/kvm_host.h | 2 +- arch/powerpc/include/asm/kvm_ppc.h | 6 ++---- arch/powerpc/kvm/book3s.c | 5 ++--- arch/powerpc/kvm/book3s_hv.c | 9 +++------ arch/powerpc/kvm/book3s_pr.c | 3 +-- arch/powerpc/kvm/booke.c | 3 +-- arch/powerpc/kvm/powerpc.c | 5 ++--- arch/s390/include/asm/kvm_host.h | 2 +- arch/x86/include/asm/kvm_page_track.h | 3 +-- arch/x86/kvm/mmu/page_track.c | 15 ++++++--------- arch/x86/kvm/x86.c | 21 ++++++++------------- include/linux/kvm_host.h | 3 +-- virt/kvm/arm/mmu.c | 3 +-- virt/kvm/kvm_main.c | 18 +++++++----------- 14 files changed, 37 insertions(+), 61 deletions(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 41204a49cf95..2c343c346b79 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -1133,7 +1133,7 @@ extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm); static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_free_memslot(struct kvm *kvm, - struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} + struct kvm_memory_slot *slot) {} static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index d162649430ba..406ec46304d5 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -200,8 +200,7 @@ extern void kvm_free_hpt_cma(struct page *page, unsigned long nr_pages); extern int kvmppc_core_init_vm(struct kvm *kvm); extern void kvmppc_core_destroy_vm(struct kvm *kvm); extern void kvmppc_core_free_memslot(struct kvm *kvm, - struct kvm_memory_slot *free, - struct kvm_memory_slot *dont); + struct kvm_memory_slot *slot); extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, const struct kvm_userspace_memory_region *mem, @@ -291,8 +290,7 @@ struct kvmppc_ops { int (*test_age_hva)(struct kvm *kvm, unsigned long hva); void (*set_spte_hva)(struct kvm *kvm, unsigned long hva, pte_t pte); void (*mmu_destroy)(struct kvm_vcpu *vcpu); - void (*free_memslot)(struct kvm_memory_slot *free, - struct kvm_memory_slot *dont); + void (*free_memslot)(struct kvm_memory_slot *slot); int (*init_vm)(struct kvm *kvm); void (*destroy_vm)(struct kvm *kvm); int (*get_smmu_info)(struct kvm *kvm, struct kvm_ppc_smmu_info *info); diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index e9149a815806..97ce6c4f7b48 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -804,10 +804,9 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) return kvm->arch.kvm_ops->get_dirty_log(kvm, log); } -void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, - struct kvm_memory_slot *dont) +void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) { - kvm->arch.kvm_ops->free_memslot(free, dont); + kvm->arch.kvm_ops->free_memslot(slot); } void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 460f31f94337..a7353a5a0045 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -4447,13 +4447,10 @@ out: return r; } -static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free, - struct kvm_memory_slot *dont) +static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *slot) { - if (!dont || free->arch.rmap != dont->arch.rmap) { - vfree(free->arch.rmap); - free->arch.rmap = NULL; - } + vfree(slot->arch.rmap); + slot->arch.rmap = NULL; } static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm, diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 3077a0cd0e5e..71ae332b91c9 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1942,8 +1942,7 @@ static void kvmppc_core_commit_memory_region_pr(struct kvm *kvm, return; } -static void kvmppc_core_free_memslot_pr(struct kvm_memory_slot *free, - struct kvm_memory_slot *dont) +static void kvmppc_core_free_memslot_pr(struct kvm_memory_slot *slot) { return; } diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 44f50fc50976..b0519069892b 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1771,8 +1771,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) return -ENOTSUPP; } -void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, - struct kvm_memory_slot *dont) +void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) { } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 768c4a9269be..838cdcd2db12 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -685,10 +685,9 @@ long kvm_arch_dev_ioctl(struct file *filp, return -EINVAL; } -void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, - struct kvm_memory_slot *dont) +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) { - kvmppc_core_free_memslot(kvm, free, dont); + kvmppc_core_free_memslot(kvm, slot); } int kvm_arch_prepare_memory_region(struct kvm *kvm, diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 1726224e7772..1060508f1a53 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -921,7 +921,7 @@ static inline void kvm_arch_hardware_disable(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} static inline void kvm_arch_free_memslot(struct kvm *kvm, - struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} + struct kvm_memory_slot *slot) {} static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {} static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm, diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h index 172f9749dbb2..87bd6025d91d 100644 --- a/arch/x86/include/asm/kvm_page_track.h +++ b/arch/x86/include/asm/kvm_page_track.h @@ -49,8 +49,7 @@ struct kvm_page_track_notifier_node { void kvm_page_track_init(struct kvm *kvm); void kvm_page_track_cleanup(struct kvm *kvm); -void kvm_page_track_free_memslot(struct kvm_memory_slot *free, - struct kvm_memory_slot *dont); +void kvm_page_track_free_memslot(struct kvm_memory_slot *slot); int kvm_page_track_create_memslot(struct kvm_memory_slot *slot, unsigned long npages); diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c index 3521e2d176f2..d125ec379c79 100644 --- a/arch/x86/kvm/mmu/page_track.c +++ b/arch/x86/kvm/mmu/page_track.c @@ -19,17 +19,14 @@ #include "mmu.h" -void kvm_page_track_free_memslot(struct kvm_memory_slot *free, - struct kvm_memory_slot *dont) +void kvm_page_track_free_memslot(struct kvm_memory_slot *slot) { int i; - for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) - if (!dont || free->arch.gfn_track[i] != - dont->arch.gfn_track[i]) { - kvfree(free->arch.gfn_track[i]); - free->arch.gfn_track[i] = NULL; - } + for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) { + kvfree(slot->arch.gfn_track[i]); + slot->arch.gfn_track[i] = NULL; + } } int kvm_page_track_create_memslot(struct kvm_memory_slot *slot, @@ -48,7 +45,7 @@ int kvm_page_track_create_memslot(struct kvm_memory_slot *slot, return 0; track_free: - kvm_page_track_free_memslot(slot, NULL); + kvm_page_track_free_memslot(slot); return -ENOMEM; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6068208917dd..88885e3147c4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9850,27 +9850,22 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_hv_destroy_vm(kvm); } -void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, - struct kvm_memory_slot *dont) +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) { int i; for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) { - if (!dont || free->arch.rmap[i] != dont->arch.rmap[i]) { - kvfree(free->arch.rmap[i]); - free->arch.rmap[i] = NULL; - } + kvfree(slot->arch.rmap[i]); + slot->arch.rmap[i] = NULL; + if (i == 0) continue; - if (!dont || free->arch.lpage_info[i - 1] != - dont->arch.lpage_info[i - 1]) { - kvfree(free->arch.lpage_info[i - 1]); - free->arch.lpage_info[i - 1] = NULL; - } + kvfree(slot->arch.lpage_info[i - 1]); + slot->arch.lpage_info[i - 1] = NULL; } - kvm_page_track_free_memslot(free, dont); + kvm_page_track_free_memslot(slot); } static int kvm_alloc_memslot_metadata(struct kvm_memory_slot *slot, @@ -10069,7 +10064,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, /* Free the arrays associated with the old memslot. */ if (change == KVM_MR_MOVE) - kvm_arch_free_memslot(kvm, old, NULL); + kvm_arch_free_memslot(kvm, old); } void kvm_arch_flush_shadow_all(struct kvm *kvm) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 7827156ec1c9..5404ef8be291 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -669,8 +669,7 @@ int kvm_set_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem); int __kvm_set_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem); -void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, - struct kvm_memory_slot *dont); +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot); void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen); int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 97b87037aff3..8f22efa095f4 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -2349,8 +2349,7 @@ out: return ret; } -void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, - struct kvm_memory_slot *dont) +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) { } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d15ed920f627..642215e2aeb2 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -580,18 +580,14 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot) memslot->dirty_bitmap = NULL; } -/* - * Free any memory in @free but not in @dont. - */ -static void kvm_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, - struct kvm_memory_slot *dont) +static void kvm_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) { - if (!dont || free->dirty_bitmap != dont->dirty_bitmap) - kvm_destroy_dirty_bitmap(free); + kvm_destroy_dirty_bitmap(slot); - kvm_arch_free_memslot(kvm, free, dont); + kvm_arch_free_memslot(kvm, slot); - free->npages = 0; + slot->flags = 0; + slot->npages = 0; } static void kvm_free_memslots(struct kvm *kvm, struct kvm_memslots *slots) @@ -602,7 +598,7 @@ static void kvm_free_memslots(struct kvm *kvm, struct kvm_memslots *slots) return; kvm_for_each_memslot(memslot, slots) - kvm_free_memslot(kvm, memslot, NULL); + kvm_free_memslot(kvm, memslot); kvfree(slots); } @@ -1060,7 +1056,7 @@ static int kvm_delete_memslot(struct kvm *kvm, if (r) return r; - kvm_free_memslot(kvm, old, NULL); + kvm_free_memslot(kvm, old); return 0; } -- cgit v1.2.3 From 0dff084607bd555d6f74db2af8406a9da9f0fc3a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 18 Feb 2020 13:07:29 -0800 Subject: KVM: Provide common implementation for generic dirty log functions Move the implementations of KVM_GET_DIRTY_LOG and KVM_CLEAR_DIRTY_LOG for CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT into common KVM code. The arch specific implemenations are extremely similar, differing only in whether the dirty log needs to be sync'd from hardware (x86) and how the TLBs are flushed. Add new arch hooks to handle sync and TLB flush; the sync will also be used for non-generic dirty log support in a future patch (s390). The ulterior motive for providing a common implementation is to eliminate the dependency between arch and common code with respect to the memslot referenced by the dirty log, i.e. to make it obvious in the code that the validity of the memslot is guaranteed, as a future patch will rework memslot handling such that id_to_memslot() can return NULL. Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/mips/kvm/mips.c | 63 +++----------------------------- arch/powerpc/kvm/book3s.c | 5 +++ arch/powerpc/kvm/booke.c | 5 +++ arch/s390/kvm/kvm-s390.c | 5 ++- arch/x86/kvm/x86.c | 61 +++---------------------------- include/linux/kvm_host.h | 23 ++++++------ virt/kvm/arm/arm.c | 48 +++---------------------- virt/kvm/kvm_main.c | 91 ++++++++++++++++++++++++++++++++++++----------- 8 files changed, 106 insertions(+), 195 deletions(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index c7536aa341d2..78507757ba9a 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -978,69 +978,16 @@ long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, return r; } -/** - * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot - * @kvm: kvm instance - * @log: slot id and address to which we copy the log - * - * Steps 1-4 below provide general overview of dirty page logging. See - * kvm_get_dirty_log_protect() function description for additional details. - * - * We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we - * always flush the TLB (step 4) even if previous step failed and the dirty - * bitmap may be corrupt. Regardless of previous outcome the KVM logging API - * does not preclude user space subsequent dirty log read. Flushing TLB ensures - * writes will be marked dirty for next log read. - * - * 1. Take a snapshot of the bit and clear it if needed. - * 2. Write protect the corresponding page. - * 3. Copy the snapshot to the userspace. - * 4. Flush TLB's if needed. - */ -int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) +void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) { - struct kvm_memslots *slots; - struct kvm_memory_slot *memslot; - bool flush = false; - int r; - - mutex_lock(&kvm->slots_lock); - - r = kvm_get_dirty_log_protect(kvm, log, &flush); - - if (flush) { - slots = kvm_memslots(kvm); - memslot = id_to_memslot(slots, log->slot); - - /* Let implementation handle TLB/GVA invalidation */ - kvm_mips_callbacks->flush_shadow_memslot(kvm, memslot); - } - mutex_unlock(&kvm->slots_lock); - return r; } -int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, struct kvm_clear_dirty_log *log) +void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm, + struct kvm_memory_slot *memslot) { - struct kvm_memslots *slots; - struct kvm_memory_slot *memslot; - bool flush = false; - int r; - - mutex_lock(&kvm->slots_lock); - - r = kvm_clear_dirty_log_protect(kvm, log, &flush); - - if (flush) { - slots = kvm_memslots(kvm); - memslot = id_to_memslot(slots, log->slot); - - /* Let implementation handle TLB/GVA invalidation */ - kvm_mips_callbacks->flush_shadow_memslot(kvm, memslot); - } - - mutex_unlock(&kvm->slots_lock); - return r; + /* Let implementation handle TLB/GVA invalidation */ + kvm_mips_callbacks->flush_shadow_memslot(kvm, memslot); } long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 97ce6c4f7b48..0adaf4791a6d 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -799,6 +799,11 @@ int kvmppc_core_check_requests(struct kvm_vcpu *vcpu) return vcpu->kvm->arch.kvm_ops->check_requests(vcpu); } +void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) +{ + +} + int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) { return kvm->arch.kvm_ops->get_dirty_log(kvm, log); diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index b0519069892b..c9f4b374dc56 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1766,6 +1766,11 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, return r; } +void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) +{ + +} + int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) { return -ENOTSUPP; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 78f92c005f93..2adbc2fde382 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -570,8 +570,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) return r; } -static void kvm_s390_sync_dirty_log(struct kvm *kvm, - struct kvm_memory_slot *memslot) +void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) { int i; gfn_t cur_gfn, last_gfn; @@ -631,7 +630,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, if (!memslot->dirty_bitmap) goto out; - kvm_s390_sync_dirty_log(kvm, memslot); + kvm_arch_sync_dirty_log(kvm, memslot); r = kvm_get_dirty_log(kvm, log, &is_dirty); if (r) goto out; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 88885e3147c4..27b97e546980 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4759,77 +4759,24 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm, return 0; } -/** - * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot - * @kvm: kvm instance - * @log: slot id and address to which we copy the log - * - * Steps 1-4 below provide general overview of dirty page logging. See - * kvm_get_dirty_log_protect() function description for additional details. - * - * We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we - * always flush the TLB (step 4) even if previous step failed and the dirty - * bitmap may be corrupt. Regardless of previous outcome the KVM logging API - * does not preclude user space subsequent dirty log read. Flushing TLB ensures - * writes will be marked dirty for next log read. - * - * 1. Take a snapshot of the bit and clear it if needed. - * 2. Write protect the corresponding page. - * 3. Copy the snapshot to the userspace. - * 4. Flush TLB's if needed. - */ -int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) +void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) { - bool flush = false; - int r; - - mutex_lock(&kvm->slots_lock); - /* * Flush potentially hardware-cached dirty pages to dirty_bitmap. */ if (kvm_x86_ops->flush_log_dirty) kvm_x86_ops->flush_log_dirty(kvm); - - r = kvm_get_dirty_log_protect(kvm, log, &flush); - - /* - * All the TLBs can be flushed out of mmu lock, see the comments in - * kvm_mmu_slot_remove_write_access(). - */ - lockdep_assert_held(&kvm->slots_lock); - if (flush) - kvm_flush_remote_tlbs(kvm); - - mutex_unlock(&kvm->slots_lock); - return r; } -int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, struct kvm_clear_dirty_log *log) +void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm, + struct kvm_memory_slot *memslot) { - bool flush = false; - int r; - - mutex_lock(&kvm->slots_lock); - - /* - * Flush potentially hardware-cached dirty pages to dirty_bitmap. - */ - if (kvm_x86_ops->flush_log_dirty) - kvm_x86_ops->flush_log_dirty(kvm); - - r = kvm_clear_dirty_log_protect(kvm, log, &flush); - /* * All the TLBs can be flushed out of mmu lock, see the comments in * kvm_mmu_slot_remove_write_access(). */ lockdep_assert_held(&kvm->slots_lock); - if (flush) - kvm_flush_remote_tlbs(kvm); - - mutex_unlock(&kvm->slots_lock); - return r; + kvm_flush_remote_tlbs(kvm); } int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event, diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 5404ef8be291..35e6975d0a82 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -816,23 +816,20 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf); int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext); -int kvm_get_dirty_log(struct kvm *kvm, - struct kvm_dirty_log *log, int *is_dirty); - -int kvm_get_dirty_log_protect(struct kvm *kvm, - struct kvm_dirty_log *log, bool *flush); -int kvm_clear_dirty_log_protect(struct kvm *kvm, - struct kvm_clear_dirty_log *log, bool *flush); - void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn_offset, unsigned long mask); - -int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, - struct kvm_dirty_log *log); -int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, - struct kvm_clear_dirty_log *log); +void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot); + +#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT +void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm, + struct kvm_memory_slot *memslot); +#else /* !CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */ +int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log); +int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log, + int *is_dirty); +#endif int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, bool line_status); diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index d65a0faa46d8..bfdba1caf59d 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -1183,55 +1183,15 @@ long kvm_arch_vcpu_ioctl(struct file *filp, return r; } -/** - * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot - * @kvm: kvm instance - * @log: slot id and address to which we copy the log - * - * Steps 1-4 below provide general overview of dirty page logging. See - * kvm_get_dirty_log_protect() function description for additional details. - * - * We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we - * always flush the TLB (step 4) even if previous step failed and the dirty - * bitmap may be corrupt. Regardless of previous outcome the KVM logging API - * does not preclude user space subsequent dirty log read. Flushing TLB ensures - * writes will be marked dirty for next log read. - * - * 1. Take a snapshot of the bit and clear it if needed. - * 2. Write protect the corresponding page. - * 3. Copy the snapshot to the userspace. - * 4. Flush TLB's if needed. - */ -int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) +void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) { - bool flush = false; - int r; - - mutex_lock(&kvm->slots_lock); - - r = kvm_get_dirty_log_protect(kvm, log, &flush); - if (flush) - kvm_flush_remote_tlbs(kvm); - - mutex_unlock(&kvm->slots_lock); - return r; } -int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, struct kvm_clear_dirty_log *log) +void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm, + struct kvm_memory_slot *memslot) { - bool flush = false; - int r; - - mutex_lock(&kvm->slots_lock); - - r = kvm_clear_dirty_log_protect(kvm, log, &flush); - - if (flush) - kvm_flush_remote_tlbs(kvm); - - mutex_unlock(&kvm->slots_lock); - return r; + kvm_flush_remote_tlbs(kvm); } static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 1632e466ad6f..7420aa468b75 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -856,7 +856,7 @@ static int kvm_vm_release(struct inode *inode, struct file *filp) /* * Allocation size is twice as large as the actual dirty bitmap size. - * See x86's kvm_vm_ioctl_get_dirty_log() why this is needed. + * See kvm_vm_ioctl_get_dirty_log() why this is needed. */ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) { @@ -1104,12 +1104,9 @@ int __kvm_set_memory_region(struct kvm *kvm, * Make a full copy of the old memslot, the pointer will become stale * when the memslots are re-sorted by update_memslots(), and the old * memslot needs to be referenced after calling update_memslots(), e.g. - * to free its resources and for arch specific behavior. Kill @tmp - * after making a copy to deter potentially dangerous usage. + * to free its resources and for arch specific behavior. */ - tmp = id_to_memslot(__kvm_memslots(kvm, as_id), id); - old = *tmp; - tmp = NULL; + old = *id_to_memslot(__kvm_memslots(kvm, as_id), id); if (!mem->memory_size) return kvm_delete_memslot(kvm, mem, &old, as_id); @@ -1201,6 +1198,7 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, return kvm_set_memory_region(kvm, mem); } +#ifndef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log, int *is_dirty) { @@ -1234,13 +1232,12 @@ int kvm_get_dirty_log(struct kvm *kvm, } EXPORT_SYMBOL_GPL(kvm_get_dirty_log); -#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT +#else /* CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */ /** * kvm_get_dirty_log_protect - get a snapshot of dirty pages * and reenable dirty page tracking for the corresponding pages. * @kvm: pointer to kvm instance * @log: slot id and address to which we copy the log - * @flush: true if TLB flush is needed by caller * * We need to keep it in mind that VCPU threads can write to the bitmap * concurrently. So, to avoid losing track of dirty pages we keep the @@ -1257,8 +1254,7 @@ EXPORT_SYMBOL_GPL(kvm_get_dirty_log); * exiting to userspace will be logged for the next call. * */ -int kvm_get_dirty_log_protect(struct kvm *kvm, - struct kvm_dirty_log *log, bool *flush) +static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log) { struct kvm_memslots *slots; struct kvm_memory_slot *memslot; @@ -1266,6 +1262,7 @@ int kvm_get_dirty_log_protect(struct kvm *kvm, unsigned long n; unsigned long *dirty_bitmap; unsigned long *dirty_bitmap_buffer; + bool flush; as_id = log->slot >> 16; id = (u16)log->slot; @@ -1279,8 +1276,10 @@ int kvm_get_dirty_log_protect(struct kvm *kvm, if (!dirty_bitmap) return -ENOENT; + kvm_arch_sync_dirty_log(kvm, memslot); + n = kvm_dirty_bitmap_bytes(memslot); - *flush = false; + flush = false; if (kvm->manual_dirty_log_protect) { /* * Unlike kvm_get_dirty_log, we always return false in *flush, @@ -1303,7 +1302,7 @@ int kvm_get_dirty_log_protect(struct kvm *kvm, if (!dirty_bitmap[i]) continue; - *flush = true; + flush = true; mask = xchg(&dirty_bitmap[i], 0); dirty_bitmap_buffer[i] = mask; @@ -1314,21 +1313,55 @@ int kvm_get_dirty_log_protect(struct kvm *kvm, spin_unlock(&kvm->mmu_lock); } + if (flush) + kvm_arch_flush_remote_tlbs_memslot(kvm, memslot); + if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n)) return -EFAULT; return 0; } -EXPORT_SYMBOL_GPL(kvm_get_dirty_log_protect); + + +/** + * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot + * @kvm: kvm instance + * @log: slot id and address to which we copy the log + * + * Steps 1-4 below provide general overview of dirty page logging. See + * kvm_get_dirty_log_protect() function description for additional details. + * + * We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we + * always flush the TLB (step 4) even if previous step failed and the dirty + * bitmap may be corrupt. Regardless of previous outcome the KVM logging API + * does not preclude user space subsequent dirty log read. Flushing TLB ensures + * writes will be marked dirty for next log read. + * + * 1. Take a snapshot of the bit and clear it if needed. + * 2. Write protect the corresponding page. + * 3. Copy the snapshot to the userspace. + * 4. Flush TLB's if needed. + */ +static int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, + struct kvm_dirty_log *log) +{ + int r; + + mutex_lock(&kvm->slots_lock); + + r = kvm_get_dirty_log_protect(kvm, log); + + mutex_unlock(&kvm->slots_lock); + return r; +} /** * kvm_clear_dirty_log_protect - clear dirty bits in the bitmap * and reenable dirty page tracking for the corresponding pages. * @kvm: pointer to kvm instance * @log: slot id and address from which to fetch the bitmap of dirty pages - * @flush: true if TLB flush is needed by caller */ -int kvm_clear_dirty_log_protect(struct kvm *kvm, - struct kvm_clear_dirty_log *log, bool *flush) +static int kvm_clear_dirty_log_protect(struct kvm *kvm, + struct kvm_clear_dirty_log *log) { struct kvm_memslots *slots; struct kvm_memory_slot *memslot; @@ -1337,6 +1370,7 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm, unsigned long i, n; unsigned long *dirty_bitmap; unsigned long *dirty_bitmap_buffer; + bool flush; as_id = log->slot >> 16; id = (u16)log->slot; @@ -1360,7 +1394,9 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm, (log->num_pages < memslot->npages - log->first_page && (log->num_pages & 63))) return -EINVAL; - *flush = false; + kvm_arch_sync_dirty_log(kvm, memslot); + + flush = false; dirty_bitmap_buffer = kvm_second_dirty_bitmap(memslot); if (copy_from_user(dirty_bitmap_buffer, log->dirty_bitmap, n)) return -EFAULT; @@ -1383,17 +1419,32 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm, * a problem if userspace sets them in log->dirty_bitmap. */ if (mask) { - *flush = true; + flush = true; kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, offset, mask); } } spin_unlock(&kvm->mmu_lock); + if (flush) + kvm_arch_flush_remote_tlbs_memslot(kvm, memslot); + return 0; } -EXPORT_SYMBOL_GPL(kvm_clear_dirty_log_protect); -#endif + +static int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, + struct kvm_clear_dirty_log *log) +{ + int r; + + mutex_lock(&kvm->slots_lock); + + r = kvm_clear_dirty_log_protect(kvm, log); + + mutex_unlock(&kvm->slots_lock); + return r; +} +#endif /* CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */ bool kvm_largepages_enabled(void) { -- cgit v1.2.3 From 2a49f61dfcdc25ec06b41f7466ccb94a7a9d2624 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 18 Feb 2020 13:07:30 -0800 Subject: KVM: Ensure validity of memslot with respect to kvm_get_dirty_log() Rework kvm_get_dirty_log() so that it "returns" the associated memslot on success. A future patch will rework memslot handling such that id_to_memslot() can return NULL, returning the memslot makes it more obvious that the validity of the memslot has been verified, i.e. precludes the need to add validity checks in the arch code that are technically unnecessary. To maintain ordering in s390, move the call to kvm_arch_sync_dirty_log() from s390's kvm_vm_ioctl_get_dirty_log() to the new kvm_get_dirty_log(). This is a nop for PPC, the only other arch that doesn't select KVM_GENERIC_DIRTYLOG_READ_PROTECT, as its sync_dirty_log() is empty. Ideally, moving the sync_dirty_log() call would be done in a separate patch, but it can't be done in a follow-on patch because that would temporarily break s390's ordering. Making the move in a preparatory patch would be functionally correct, but would create an odd scenario where the moved sync_dirty_log() would operate on a "different" memslot due to consuming the result of a different id_to_memslot(). The memslot couldn't actually be different as slots_lock is held, but the code is confusing enough as it is, i.e. moving sync_dirty_log() in this patch is the lesser of all evils. Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/powerpc/kvm/book3s_pr.c | 6 +----- arch/s390/kvm/kvm-s390.c | 12 ++---------- include/linux/kvm_host.h | 2 +- virt/kvm/kvm_main.c | 27 +++++++++++++++++++-------- 4 files changed, 23 insertions(+), 24 deletions(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 71ae332b91c9..3bc2f5da8fa1 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1884,7 +1884,6 @@ out: static int kvm_vm_ioctl_get_dirty_log_pr(struct kvm *kvm, struct kvm_dirty_log *log) { - struct kvm_memslots *slots; struct kvm_memory_slot *memslot; struct kvm_vcpu *vcpu; ulong ga, ga_end; @@ -1894,15 +1893,12 @@ static int kvm_vm_ioctl_get_dirty_log_pr(struct kvm *kvm, mutex_lock(&kvm->slots_lock); - r = kvm_get_dirty_log(kvm, log, &is_dirty); + r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot); if (r) goto out; /* If nothing is dirty, don't bother messing with page tables. */ if (is_dirty) { - slots = kvm_memslots(kvm); - memslot = id_to_memslot(slots, log->slot); - ga = memslot->base_gfn << PAGE_SHIFT; ga_end = ga + (memslot->npages << PAGE_SHIFT); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 2adbc2fde382..fb081c5715b2 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -611,9 +611,8 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, { int r; unsigned long n; - struct kvm_memslots *slots; struct kvm_memory_slot *memslot; - int is_dirty = 0; + int is_dirty; if (kvm_is_ucontrol(kvm)) return -EINVAL; @@ -624,14 +623,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, if (log->slot >= KVM_USER_MEM_SLOTS) goto out; - slots = kvm_memslots(kvm); - memslot = id_to_memslot(slots, log->slot); - r = -ENOENT; - if (!memslot->dirty_bitmap) - goto out; - - kvm_arch_sync_dirty_log(kvm, memslot); - r = kvm_get_dirty_log(kvm, log, &is_dirty); + r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot); if (r) goto out; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 35e6975d0a82..63ce6b21b107 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -828,7 +828,7 @@ void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm, #else /* !CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log); int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log, - int *is_dirty); + int *is_dirty, struct kvm_memory_slot **memslot); #endif int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7420aa468b75..c115b78e85c3 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1199,31 +1199,42 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, } #ifndef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT -int kvm_get_dirty_log(struct kvm *kvm, - struct kvm_dirty_log *log, int *is_dirty) +/** + * kvm_get_dirty_log - get a snapshot of dirty pages + * @kvm: pointer to kvm instance + * @log: slot id and address to which we copy the log + * @is_dirty: set to '1' if any dirty pages were found + * @memslot: set to the associated memslot, always valid on success + */ +int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log, + int *is_dirty, struct kvm_memory_slot **memslot) { struct kvm_memslots *slots; - struct kvm_memory_slot *memslot; int i, as_id, id; unsigned long n; unsigned long any = 0; + *memslot = NULL; + *is_dirty = 0; + as_id = log->slot >> 16; id = (u16)log->slot; if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS) return -EINVAL; slots = __kvm_memslots(kvm, as_id); - memslot = id_to_memslot(slots, id); - if (!memslot->dirty_bitmap) + *memslot = id_to_memslot(slots, id); + if (!(*memslot)->dirty_bitmap) return -ENOENT; - n = kvm_dirty_bitmap_bytes(memslot); + kvm_arch_sync_dirty_log(kvm, *memslot); + + n = kvm_dirty_bitmap_bytes(*memslot); for (i = 0; !any && i < n/sizeof(long); ++i) - any = memslot->dirty_bitmap[i]; + any = (*memslot)->dirty_bitmap[i]; - if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) + if (copy_to_user(log->dirty_bitmap, (*memslot)->dirty_bitmap, n)) return -EFAULT; if (any) -- cgit v1.2.3 From 0577d1abe704c315bb5cdfc71f4ca7b9b5358f59 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 18 Feb 2020 13:07:31 -0800 Subject: KVM: Terminate memslot walks via used_slots Refactor memslot handling to treat the number of used slots as the de facto size of the memslot array, e.g. return NULL from id_to_memslot() when an invalid index is provided instead of relying on npages==0 to detect an invalid memslot. Rework the sorting and walking of memslots in advance of dynamically sizing memslots to aid bisection and debug, e.g. with luck, a bug in the refactoring will bisect here and/or hit a WARN instead of randomly corrupting memory. Alternatively, a global null/invalid memslot could be returned, i.e. so callers of id_to_memslot() don't have to explicitly check for a NULL memslot, but that approach runs the risk of introducing difficult-to- debug issues, e.g. if the global null slot is modified. Constifying the return from id_to_memslot() to combat such issues is possible, but would require a massive refactoring of arch specific code and would still be susceptible to casting shenanigans. Add function comments to update_memslots() and search_memslots() to explicitly (and loudly) state how memslots are sorted. Opportunistically stuff @hva with a non-canonical value when deleting a private memslot on x86 to detect bogus usage of the freed slot. No functional change intended. Tested-by: Christoffer Dall Tested-by: Marc Zyngier Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/powerpc/kvm/book3s_hv.c | 2 +- arch/x86/kvm/x86.c | 15 ++-- include/linux/kvm_host.h | 18 ++-- virt/kvm/arm/mmu.c | 9 +- virt/kvm/kvm_main.c | 210 ++++++++++++++++++++++++++++++++----------- 5 files changed, 186 insertions(+), 68 deletions(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index a7353a5a0045..6a4a30b9d750 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -4400,7 +4400,7 @@ static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm, slots = kvm_memslots(kvm); memslot = id_to_memslot(slots, log->slot); r = -ENOENT; - if (!memslot->dirty_bitmap) + if (!memslot || !memslot->dirty_bitmap) goto out; /* diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 27b97e546980..13ac4d0f9794 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9715,9 +9715,9 @@ void kvm_arch_sync_events(struct kvm *kvm) int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size) { int i, r; - unsigned long hva; + unsigned long hva, uninitialized_var(old_npages); struct kvm_memslots *slots = kvm_memslots(kvm); - struct kvm_memory_slot *slot, old; + struct kvm_memory_slot *slot; /* Called with kvm->slots_lock held. */ if (WARN_ON(id >= KVM_MEM_SLOTS_NUM)) @@ -9725,7 +9725,7 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size) slot = id_to_memslot(slots, id); if (size) { - if (slot->npages) + if (slot && slot->npages) return -EEXIST; /* @@ -9737,13 +9737,14 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size) if (IS_ERR((void *)hva)) return PTR_ERR((void *)hva); } else { - if (!slot->npages) + if (!slot || !slot->npages) return 0; - hva = 0; + /* Stuff a non-canonical value to catch use-after-delete. */ + hva = 0xdeadull << 48; + old_npages = slot->npages; } - old = *slot; for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { struct kvm_userspace_memory_region m; @@ -9758,7 +9759,7 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size) } if (!size) - vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE); + vm_munmap(hva, old_npages * PAGE_SIZE); return 0; } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 63ce6b21b107..20763598b13b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -572,10 +572,11 @@ static inline int kvm_vcpu_get_idx(struct kvm_vcpu *vcpu) return vcpu->vcpu_idx; } -#define kvm_for_each_memslot(memslot, slots) \ - for (memslot = &slots->memslots[0]; \ - memslot < slots->memslots + KVM_MEM_SLOTS_NUM && memslot->npages;\ - memslot++) +#define kvm_for_each_memslot(memslot, slots) \ + for (memslot = &slots->memslots[0]; \ + memslot < slots->memslots + slots->used_slots; memslot++) \ + if (WARN_ON_ONCE(!memslot->npages)) { \ + } else void kvm_vcpu_destroy(struct kvm_vcpu *vcpu); @@ -635,12 +636,15 @@ static inline struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu) return __kvm_memslots(vcpu->kvm, as_id); } -static inline struct kvm_memory_slot * -id_to_memslot(struct kvm_memslots *slots, int id) +static inline +struct kvm_memory_slot *id_to_memslot(struct kvm_memslots *slots, int id) { int index = slots->id_to_index[id]; struct kvm_memory_slot *slot; + if (index < 0) + return NULL; + slot = &slots->memslots[index]; WARN_ON(slot->id != id); @@ -1012,6 +1016,8 @@ bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args); * used in non-modular code in arch/powerpc/kvm/book3s_hv_rm_mmu.c. * gfn_to_memslot() itself isn't here as an inline because that would * bloat other code too much. + * + * IMPORTANT: Slots are sorted from highest GFN to lowest GFN! */ static inline struct kvm_memory_slot * search_memslots(struct kvm_memslots *slots, gfn_t gfn) diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 8f22efa095f4..e3b9ee268823 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -1534,8 +1534,13 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot) { struct kvm_memslots *slots = kvm_memslots(kvm); struct kvm_memory_slot *memslot = id_to_memslot(slots, slot); - phys_addr_t start = memslot->base_gfn << PAGE_SHIFT; - phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT; + phys_addr_t start, end; + + if (WARN_ON_ONCE(!memslot)) + return; + + start = memslot->base_gfn << PAGE_SHIFT; + end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT; spin_lock(&kvm->mmu_lock); stage2_wp_range(kvm, start, end); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index c115b78e85c3..62e45c64e443 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -566,7 +566,7 @@ static struct kvm_memslots *kvm_alloc_memslots(void) return NULL; for (i = 0; i < KVM_MEM_SLOTS_NUM; i++) - slots->id_to_index[i] = slots->memslots[i].id = i; + slots->id_to_index[i] = slots->memslots[i].id = -1; return slots; } @@ -870,63 +870,162 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) } /* - * Insert memslot and re-sort memslots based on their GFN, - * so binary search could be used to lookup GFN. - * Sorting algorithm takes advantage of having initially - * sorted array and known changed memslot position. + * Delete a memslot by decrementing the number of used slots and shifting all + * other entries in the array forward one spot. */ -static void update_memslots(struct kvm_memslots *slots, - struct kvm_memory_slot *new, - enum kvm_mr_change change) +static inline void kvm_memslot_delete(struct kvm_memslots *slots, + struct kvm_memory_slot *memslot) { - int id = new->id; - int i = slots->id_to_index[id]; struct kvm_memory_slot *mslots = slots->memslots; + int i; - WARN_ON(mslots[i].id != id); - switch (change) { - case KVM_MR_CREATE: - slots->used_slots++; - WARN_ON(mslots[i].npages || !new->npages); - break; - case KVM_MR_DELETE: - slots->used_slots--; - WARN_ON(new->npages || !mslots[i].npages); - break; - default: - break; - } + if (WARN_ON(slots->id_to_index[memslot->id] == -1)) + return; - while (i < KVM_MEM_SLOTS_NUM - 1 && - new->base_gfn <= mslots[i + 1].base_gfn) { - if (!mslots[i + 1].npages) - break; + slots->used_slots--; + + for (i = slots->id_to_index[memslot->id]; i < slots->used_slots; i++) { mslots[i] = mslots[i + 1]; slots->id_to_index[mslots[i].id] = i; - i++; } + mslots[i] = *memslot; + slots->id_to_index[memslot->id] = -1; +} + +/* + * "Insert" a new memslot by incrementing the number of used slots. Returns + * the new slot's initial index into the memslots array. + */ +static inline int kvm_memslot_insert_back(struct kvm_memslots *slots) +{ + return slots->used_slots++; +} + +/* + * Move a changed memslot backwards in the array by shifting existing slots + * with a higher GFN toward the front of the array. Note, the changed memslot + * itself is not preserved in the array, i.e. not swapped at this time, only + * its new index into the array is tracked. Returns the changed memslot's + * current index into the memslots array. + */ +static inline int kvm_memslot_move_backward(struct kvm_memslots *slots, + struct kvm_memory_slot *memslot) +{ + struct kvm_memory_slot *mslots = slots->memslots; + int i; + + if (WARN_ON_ONCE(slots->id_to_index[memslot->id] == -1) || + WARN_ON_ONCE(!slots->used_slots)) + return -1; /* - * The ">=" is needed when creating a slot with base_gfn == 0, - * so that it moves before all those with base_gfn == npages == 0. - * - * On the other hand, if new->npages is zero, the above loop has - * already left i pointing to the beginning of the empty part of - * mslots, and the ">=" would move the hole backwards in this - * case---which is wrong. So skip the loop when deleting a slot. + * Move the target memslot backward in the array by shifting existing + * memslots with a higher GFN (than the target memslot) towards the + * front of the array. */ - if (new->npages) { - while (i > 0 && - new->base_gfn >= mslots[i - 1].base_gfn) { - mslots[i] = mslots[i - 1]; - slots->id_to_index[mslots[i].id] = i; - i--; - } - } else - WARN_ON_ONCE(i != slots->used_slots); + for (i = slots->id_to_index[memslot->id]; i < slots->used_slots - 1; i++) { + if (memslot->base_gfn > mslots[i + 1].base_gfn) + break; + + WARN_ON_ONCE(memslot->base_gfn == mslots[i + 1].base_gfn); + + /* Shift the next memslot forward one and update its index. */ + mslots[i] = mslots[i + 1]; + slots->id_to_index[mslots[i].id] = i; + } + return i; +} + +/* + * Move a changed memslot forwards in the array by shifting existing slots with + * a lower GFN toward the back of the array. Note, the changed memslot itself + * is not preserved in the array, i.e. not swapped at this time, only its new + * index into the array is tracked. Returns the changed memslot's final index + * into the memslots array. + */ +static inline int kvm_memslot_move_forward(struct kvm_memslots *slots, + struct kvm_memory_slot *memslot, + int start) +{ + struct kvm_memory_slot *mslots = slots->memslots; + int i; + + for (i = start; i > 0; i--) { + if (memslot->base_gfn < mslots[i - 1].base_gfn) + break; + + WARN_ON_ONCE(memslot->base_gfn == mslots[i - 1].base_gfn); - mslots[i] = *new; - slots->id_to_index[mslots[i].id] = i; + /* Shift the next memslot back one and update its index. */ + mslots[i] = mslots[i - 1]; + slots->id_to_index[mslots[i].id] = i; + } + return i; +} + +/* + * Re-sort memslots based on their GFN to account for an added, deleted, or + * moved memslot. Sorting memslots by GFN allows using a binary search during + * memslot lookup. + * + * IMPORTANT: Slots are sorted from highest GFN to lowest GFN! I.e. the entry + * at memslots[0] has the highest GFN. + * + * The sorting algorithm takes advantage of having initially sorted memslots + * and knowing the position of the changed memslot. Sorting is also optimized + * by not swapping the updated memslot and instead only shifting other memslots + * and tracking the new index for the update memslot. Only once its final + * index is known is the updated memslot copied into its position in the array. + * + * - When deleting a memslot, the deleted memslot simply needs to be moved to + * the end of the array. + * + * - When creating a memslot, the algorithm "inserts" the new memslot at the + * end of the array and then it forward to its correct location. + * + * - When moving a memslot, the algorithm first moves the updated memslot + * backward to handle the scenario where the memslot's GFN was changed to a + * lower value. update_memslots() then falls through and runs the same flow + * as creating a memslot to move the memslot forward to handle the scenario + * where its GFN was changed to a higher value. + * + * Note, slots are sorted from highest->lowest instead of lowest->highest for + * historical reasons. Originally, invalid memslots where denoted by having + * GFN=0, thus sorting from highest->lowest naturally sorted invalid memslots + * to the end of the array. The current algorithm uses dedicated logic to + * delete a memslot and thus does not rely on invalid memslots having GFN=0. + * + * The other historical motiviation for highest->lowest was to improve the + * performance of memslot lookup. KVM originally used a linear search starting + * at memslots[0]. On x86, the largest memslot usually has one of the highest, + * if not *the* highest, GFN, as the bulk of the guest's RAM is located in a + * single memslot above the 4gb boundary. As the largest memslot is also the + * most likely to be referenced, sorting it to the front of the array was + * advantageous. The current binary search starts from the middle of the array + * and uses an LRU pointer to improve performance for all memslots and GFNs. + */ +static void update_memslots(struct kvm_memslots *slots, + struct kvm_memory_slot *memslot, + enum kvm_mr_change change) +{ + int i; + + if (change == KVM_MR_DELETE) { + kvm_memslot_delete(slots, memslot); + } else { + if (change == KVM_MR_CREATE) + i = kvm_memslot_insert_back(slots); + else + i = kvm_memslot_move_backward(slots, memslot); + i = kvm_memslot_move_forward(slots, memslot, i); + + /* + * Copy the memslot to its new position in memslots and update + * its index accordingly. + */ + slots->memslots[i] = *memslot; + slots->id_to_index[memslot->id] = i; + } } static int check_memory_region_flags(const struct kvm_userspace_memory_region *mem) @@ -1106,7 +1205,14 @@ int __kvm_set_memory_region(struct kvm *kvm, * memslot needs to be referenced after calling update_memslots(), e.g. * to free its resources and for arch specific behavior. */ - old = *id_to_memslot(__kvm_memslots(kvm, as_id), id); + tmp = id_to_memslot(__kvm_memslots(kvm, as_id), id); + if (tmp) { + old = *tmp; + tmp = NULL; + } else { + memset(&old, 0, sizeof(old)); + old.id = id; + } if (!mem->memory_size) return kvm_delete_memslot(kvm, mem, &old, as_id); @@ -1224,7 +1330,7 @@ int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log, slots = __kvm_memslots(kvm, as_id); *memslot = id_to_memslot(slots, id); - if (!(*memslot)->dirty_bitmap) + if (!(*memslot) || !(*memslot)->dirty_bitmap) return -ENOENT; kvm_arch_sync_dirty_log(kvm, *memslot); @@ -1282,10 +1388,10 @@ static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log) slots = __kvm_memslots(kvm, as_id); memslot = id_to_memslot(slots, id); + if (!memslot || !memslot->dirty_bitmap) + return -ENOENT; dirty_bitmap = memslot->dirty_bitmap; - if (!dirty_bitmap) - return -ENOENT; kvm_arch_sync_dirty_log(kvm, memslot); @@ -1393,10 +1499,10 @@ static int kvm_clear_dirty_log_protect(struct kvm *kvm, slots = __kvm_memslots(kvm, as_id); memslot = id_to_memslot(slots, id); + if (!memslot || !memslot->dirty_bitmap) + return -ENOENT; dirty_bitmap = memslot->dirty_bitmap; - if (!dirty_bitmap) - return -ENOENT; n = ALIGN(log->num_pages, BITS_PER_LONG) / 8; -- cgit v1.2.3 From 4d395762599dbab1eb29d9011d5b75ca3cc4f70a Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 28 Feb 2020 13:30:20 -0500 Subject: KVM: Remove unnecessary asm/kvm_host.h includes Remove includes of asm/kvm_host.h from files that already include linux/kvm_host.h to make it more obvious that there is no ordering issue between the two headers. linux/kvm_host.h includes asm/kvm_host.h to pick up architecture specific settings, and this will never change, i.e. including asm/kvm_host.h after linux/kvm_host.h may seem problematic, but in practice is simply redundant. Signed-off-by: Peter Xu Signed-off-by: Paolo Bonzini --- arch/arm/kvm/coproc.c | 1 - arch/arm64/kvm/fpsimd.c | 1 - arch/arm64/kvm/guest.c | 1 - arch/arm64/kvm/hyp/switch.c | 1 - arch/arm64/kvm/sys_regs.c | 1 - arch/arm64/kvm/sys_regs_generic_v8.c | 1 - arch/powerpc/kvm/book3s_64_vio.c | 1 - arch/powerpc/kvm/book3s_64_vio_hv.c | 1 - arch/powerpc/kvm/book3s_hv.c | 1 - arch/powerpc/kvm/mpic.c | 1 - arch/powerpc/kvm/powerpc.c | 1 - arch/powerpc/kvm/timing.h | 1 - arch/s390/kvm/intercept.c | 1 - arch/x86/kvm/mmu/page_track.c | 1 - virt/kvm/arm/psci.c | 1 - 15 files changed, 15 deletions(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index 07745ee022a1..f0c09049ee99 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 525010504f9d..e329a36b2bee 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 2bd92301d32f..23ebe51410f0 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include "trace.h" diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index dfe8dd172512..f3e0ab961565 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 3e909b117f0c..b95f7b7743c8 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c index 2b4a3e2d1b89..9cb6b4c8355a 100644 --- a/arch/arm64/kvm/sys_regs_generic_v8.c +++ b/arch/arm64/kvm/sys_regs_generic_v8.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index ee6c103bb7d5..50555ad1db93 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index ab6eeb8e753e..6fcaf1fa8e02 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 6a4a30b9d750..fbc55a12b691 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -72,7 +72,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c index fe312c160d97..23e9c2bd9f27 100644 --- a/arch/powerpc/kvm/mpic.c +++ b/arch/powerpc/kvm/mpic.c @@ -32,7 +32,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 838cdcd2db12..62ee66d5eb6f 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -32,7 +32,6 @@ #include #endif #include -#include #include "timing.h" #include "irq.h" diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h index ace65f9fed30..feef7885ba82 100644 --- a/arch/powerpc/kvm/timing.h +++ b/arch/powerpc/kvm/timing.h @@ -10,7 +10,6 @@ #define __POWERPC_KVM_EXITTIMING_H__ #include -#include #ifdef CONFIG_KVM_EXIT_TIMING void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu); diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index a389fa85cca2..3655196f1c03 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -12,7 +12,6 @@ #include #include -#include #include #include #include diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c index d125ec379c79..ddc1ec3bdacd 100644 --- a/arch/x86/kvm/mmu/page_track.c +++ b/arch/x86/kvm/mmu/page_track.c @@ -14,7 +14,6 @@ #include #include -#include #include #include "mmu.h" diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c index 17e2bdd4b76f..14a162e295a9 100644 --- a/virt/kvm/arm/psci.c +++ b/virt/kvm/arm/psci.c @@ -12,7 +12,6 @@ #include #include -#include #include #include -- cgit v1.2.3 From cd758a9b57ee85f0733c759e60f42b969c81f27b Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 26 Aug 2019 16:20:47 +1000 Subject: KVM: PPC: Book3S HV: Use __gfn_to_pfn_memslot in HPT page fault handler This makes the same changes in the page fault handler for HPT guests that commits 31c8b0d0694a ("KVM: PPC: Book3S HV: Use __gfn_to_pfn_memslot() in page fault handler", 2018-03-01), 71d29f43b633 ("KVM: PPC: Book3S HV: Don't use compound_order to determine host mapping size", 2018-09-11) and 6579804c4317 ("KVM: PPC: Book3S HV: Avoid crash from THP collapse during radix page fault", 2018-10-04) made for the page fault handler for radix guests. In summary, where we used to call get_user_pages_fast() and then do special handling for VM_PFNMAP vmas, we now call __get_user_pages_fast() and then __gfn_to_pfn_memslot() if that fails, followed by reading the Linux PTE to get the host PFN, host page size and mapping attributes. This also brings in the change from SetPageDirty() to set_page_dirty_lock() which was done for the radix page fault handler in commit c3856aeb2940 ("KVM: PPC: Book3S HV: Fix handling of large pages in radix page fault handler", 2018-02-23). Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_64_mmu_hv.c | 119 +++++++++++++++++------------------- 1 file changed, 57 insertions(+), 62 deletions(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 6c372f5c61b6..3aecec890d6f 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -485,18 +485,18 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, __be64 *hptep; unsigned long mmu_seq, psize, pte_size; unsigned long gpa_base, gfn_base; - unsigned long gpa, gfn, hva, pfn; + unsigned long gpa, gfn, hva, pfn, hpa; struct kvm_memory_slot *memslot; unsigned long *rmap; struct revmap_entry *rev; - struct page *page, *pages[1]; - long index, ret, npages; + struct page *page; + long index, ret; bool is_ci; - unsigned int writing, write_ok; - struct vm_area_struct *vma; + bool writing, write_ok; + unsigned int shift; unsigned long rcbits; long mmio_update; - struct mm_struct *mm; + pte_t pte, *ptep; if (kvm_is_radix(kvm)) return kvmppc_book3s_radix_page_fault(run, vcpu, ea, dsisr); @@ -570,59 +570,62 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, smp_rmb(); ret = -EFAULT; - is_ci = false; - pfn = 0; page = NULL; - mm = kvm->mm; - pte_size = PAGE_SIZE; writing = (dsisr & DSISR_ISSTORE) != 0; /* If writing != 0, then the HPTE must allow writing, if we get here */ write_ok = writing; hva = gfn_to_hva_memslot(memslot, gfn); - npages = get_user_pages_fast(hva, 1, writing ? FOLL_WRITE : 0, pages); - if (npages < 1) { - /* Check if it's an I/O mapping */ - down_read(&mm->mmap_sem); - vma = find_vma(mm, hva); - if (vma && vma->vm_start <= hva && hva + psize <= vma->vm_end && - (vma->vm_flags & VM_PFNMAP)) { - pfn = vma->vm_pgoff + - ((hva - vma->vm_start) >> PAGE_SHIFT); - pte_size = psize; - is_ci = pte_ci(__pte((pgprot_val(vma->vm_page_prot)))); - write_ok = vma->vm_flags & VM_WRITE; - } - up_read(&mm->mmap_sem); - if (!pfn) - goto out_put; + + /* + * Do a fast check first, since __gfn_to_pfn_memslot doesn't + * do it with !atomic && !async, which is how we call it. + * We always ask for write permission since the common case + * is that the page is writable. + */ + if (__get_user_pages_fast(hva, 1, 1, &page) == 1) { + write_ok = true; } else { - page = pages[0]; - pfn = page_to_pfn(page); - if (PageHuge(page)) { - page = compound_head(page); - pte_size <<= compound_order(page); - } - /* if the guest wants write access, see if that is OK */ - if (!writing && hpte_is_writable(r)) { - pte_t *ptep, pte; - unsigned long flags; - /* - * We need to protect against page table destruction - * hugepage split and collapse. - */ - local_irq_save(flags); - ptep = find_current_mm_pte(mm->pgd, hva, NULL, NULL); - if (ptep) { - pte = kvmppc_read_update_linux_pte(ptep, 1); - if (__pte_write(pte)) - write_ok = 1; - } - local_irq_restore(flags); + /* Call KVM generic code to do the slow-path check */ + pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL, + writing, &write_ok); + if (is_error_noslot_pfn(pfn)) + return -EFAULT; + page = NULL; + if (pfn_valid(pfn)) { + page = pfn_to_page(pfn); + if (PageReserved(page)) + page = NULL; } } + /* + * Read the PTE from the process' radix tree and use that + * so we get the shift and attribute bits. + */ + local_irq_disable(); + ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift); + /* + * If the PTE disappeared temporarily due to a THP + * collapse, just return and let the guest try again. + */ + if (!ptep) { + local_irq_enable(); + if (page) + put_page(page); + return RESUME_GUEST; + } + pte = *ptep; + local_irq_enable(); + hpa = pte_pfn(pte) << PAGE_SHIFT; + pte_size = PAGE_SIZE; + if (shift) + pte_size = 1ul << shift; + is_ci = pte_ci(pte); + if (psize > pte_size) goto out_put; + if (pte_size > psize) + hpa |= hva & (pte_size - psize); /* Check WIMG vs. the actual page we're accessing */ if (!hpte_cache_flags_ok(r, is_ci)) { @@ -636,14 +639,13 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, } /* - * Set the HPTE to point to pfn. - * Since the pfn is at PAGE_SIZE granularity, make sure we + * Set the HPTE to point to hpa. + * Since the hpa is at PAGE_SIZE granularity, make sure we * don't mask out lower-order bits if psize < PAGE_SIZE. */ if (psize < PAGE_SIZE) psize = PAGE_SIZE; - r = (r & HPTE_R_KEY_HI) | (r & ~(HPTE_R_PP0 - psize)) | - ((pfn << PAGE_SHIFT) & ~(psize - 1)); + r = (r & HPTE_R_KEY_HI) | (r & ~(HPTE_R_PP0 - psize)) | hpa; if (hpte_is_writable(r) && !write_ok) r = hpte_make_readonly(r); ret = RESUME_GUEST; @@ -708,20 +710,13 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, asm volatile("ptesync" : : : "memory"); preempt_enable(); if (page && hpte_is_writable(r)) - SetPageDirty(page); + set_page_dirty_lock(page); out_put: trace_kvm_page_fault_exit(vcpu, hpte, ret); - if (page) { - /* - * We drop pages[0] here, not page because page might - * have been set to the head page of a compound, but - * we have to drop the reference on the correct tail - * page to match the get inside gup() - */ - put_page(pages[0]); - } + if (page) + put_page(page); return ret; out_unlock: -- cgit v1.2.3 From afd313564cf1904bbdb52052c80a2522b55782d3 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 18 Feb 2020 15:36:50 +1100 Subject: KVM: PPC: Book3S HV: Use RADIX_PTE_INDEX_SIZE in Radix MMU code In kvmppc_unmap_free_pte() in book3s_64_mmu_radix.c, we use the non-constant value PTE_INDEX_SIZE to clear a PTE page. We can instead use the constant RADIX_PTE_INDEX_SIZE, because we know this code will only be running when the Radix MMU is active. Note that we already use RADIX_PTE_INDEX_SIZE for the allocation of kvm_pte_cache. Signed-off-by: Michael Ellerman Reviewed-by: Leonardo Bras Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_64_mmu_radix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 803940d79b73..134fbc1f029f 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -425,7 +425,7 @@ static void kvmppc_unmap_free_pte(struct kvm *kvm, pte_t *pte, bool full, unsigned int lpid) { if (full) { - memset(pte, 0, sizeof(long) << PTE_INDEX_SIZE); + memset(pte, 0, sizeof(long) << RADIX_PTE_INDEX_SIZE); } else { pte_t *p = pte; unsigned long it; -- cgit v1.2.3 From 1dff3064c764b5a51c367b949b341d2e38972bec Mon Sep 17 00:00:00 2001 From: Gustavo Romero Date: Fri, 21 Feb 2020 11:29:50 -0500 Subject: KVM: PPC: Book3S HV: Treat TM-related invalid form instructions on P9 like the valid ones On P9 DD2.2 due to a CPU defect some TM instructions need to be emulated by KVM. This is handled at first by the hardware raising a softpatch interrupt when certain TM instructions that need KVM assistance are executed in the guest. Althought some TM instructions per Power ISA are invalid forms they can raise a softpatch interrupt too. For instance, 'tresume.' instruction as defined in the ISA must have bit 31 set (1), but an instruction that matches 'tresume.' PO and XO opcode fields but has bit 31 not set (0), like 0x7cfe9ddc, also raises a softpatch interrupt. Similarly for 'treclaim.' and 'trechkpt.' instructions with bit 31 = 0, i.e. 0x7c00075c and 0x7c0007dc, respectively. Hence, if a code like the following is executed in the guest it will raise a softpatch interrupt just like a 'tresume.' when the TM facility is enabled ('tabort. 0' in the example is used only to enable the TM facility): int main() { asm("tabort. 0; .long 0x7cfe9ddc;"); } Currently in such a case KVM throws a complete trace like: [345523.705984] WARNING: CPU: 24 PID: 64413 at arch/powerpc/kvm/book3s_hv_tm.c:211 kvmhv_p9_tm_emulation+0x68/0x620 [kvm_hv] [345523.705985] Modules linked in: kvm_hv(E) xt_conntrack ipt_REJECT nf_reject_ipv4 xt_tcpudp ip6table_mangle ip6table_nat iptable_mangle iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter bridge stp llc sch_fq_codel ipmi_powernv at24 vmx_crypto ipmi_devintf ipmi_msghandler ibmpowernv uio_pdrv_genirq kvm opal_prd uio leds_powernv ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi ip_tables x_tables autofs4 btrfs blake2b_generic zstd_compress raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx libcrc32c xor raid6_pq raid1 raid0 multipath linear tg3 crct10dif_vpmsum crc32c_vpmsum ipr [last unloaded: kvm_hv] [345523.706030] CPU: 24 PID: 64413 Comm: CPU 0/KVM Tainted: G W E 5.5.0+ #1 [345523.706031] NIP: c0080000072cb9c0 LR: c0080000072b5e80 CTR: c0080000085c7850 [345523.706034] REGS: c000000399467680 TRAP: 0700 Tainted: G W E (5.5.0+) [345523.706034] MSR: 900000010282b033 CR: 24022428 XER: 00000000 [345523.706042] CFAR: c0080000072b5e7c IRQMASK: 0 GPR00: c0080000072b5e80 c000000399467910 c0080000072db500 c000000375ccc720 GPR04: c000000375ccc720 00000003fbec0000 0000a10395dda5a6 0000000000000000 GPR08: 000000007cfe9ddc 7cfe9ddc000005dc 7cfe9ddc7c0005dc c0080000072cd530 GPR12: c0080000085c7850 c0000003fffeb800 0000000000000001 00007dfb737f0000 GPR16: c0002001edcca558 0000000000000000 0000000000000000 0000000000000001 GPR20: c000000001b21258 c0002001edcca558 0000000000000018 0000000000000000 GPR24: 0000000001000000 ffffffffffffffff 0000000000000001 0000000000001500 GPR28: c0002001edcc4278 c00000037dd80000 800000050280f033 c000000375ccc720 [345523.706062] NIP [c0080000072cb9c0] kvmhv_p9_tm_emulation+0x68/0x620 [kvm_hv] [345523.706065] LR [c0080000072b5e80] kvmppc_handle_exit_hv.isra.53+0x3e8/0x798 [kvm_hv] [345523.706066] Call Trace: [345523.706069] [c000000399467910] [c000000399467940] 0xc000000399467940 (unreliable) [345523.706071] [c000000399467950] [c000000399467980] 0xc000000399467980 [345523.706075] [c0000003994679f0] [c0080000072bd1c4] kvmhv_run_single_vcpu+0xa1c/0xb80 [kvm_hv] [345523.706079] [c000000399467ac0] [c0080000072bd8e0] kvmppc_vcpu_run_hv+0x5b8/0xb00 [kvm_hv] [345523.706087] [c000000399467b90] [c0080000085c93cc] kvmppc_vcpu_run+0x34/0x48 [kvm] [345523.706095] [c000000399467bb0] [c0080000085c582c] kvm_arch_vcpu_ioctl_run+0x244/0x420 [kvm] [345523.706101] [c000000399467c40] [c0080000085b7498] kvm_vcpu_ioctl+0x3d0/0x7b0 [kvm] [345523.706105] [c000000399467db0] [c0000000004adf9c] ksys_ioctl+0x13c/0x170 [345523.706107] [c000000399467e00] [c0000000004adff8] sys_ioctl+0x28/0x80 [345523.706111] [c000000399467e20] [c00000000000b278] system_call+0x5c/0x68 [345523.706112] Instruction dump: [345523.706114] 419e0390 7f8a4840 409d0048 6d497c00 2f89075d 419e021c 6d497c00 2f8907dd [345523.706119] 419e01c0 6d497c00 2f8905dd 419e00a4 <0fe00000> 38210040 38600000 ebc1fff0 and then treats the executed instruction as a 'nop'. However the POWER9 User's Manual, in section "4.6.10 Book II Invalid Forms", informs that for TM instructions bit 31 is in fact ignored, thus for the TM-related invalid forms ignoring bit 31 and handling them like the valid forms is an acceptable way to handle them. POWER8 behaves the same way too. This commit changes the handling of the cases here described by treating the TM-related invalid forms that can generate a softpatch interrupt just like their valid forms (w/ bit 31 = 1) instead of as a 'nop' and by gently reporting any other unrecognized case to the host and treating it as illegal instruction instead of throwing a trace and treating it as a 'nop'. Signed-off-by: Gustavo Romero Reviewed-by: Segher Boessenkool Acked-By: Michael Neuling Reviewed-by: Leonardo Bras Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_asm.h | 3 +++ arch/powerpc/kvm/book3s_hv_tm.c | 28 +++++++++++++++++++++++----- arch/powerpc/kvm/book3s_hv_tm_builtin.c | 16 ++++++++++++++-- 3 files changed, 40 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index 635fb154b33f..a3633560493b 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h @@ -150,4 +150,7 @@ #define KVM_INST_FETCH_FAILED -1 +/* Extract PO and XOP opcode fields */ +#define PO_XOP_OPCODE_MASK 0xfc0007fe + #endif /* __POWERPC_KVM_ASM_H__ */ diff --git a/arch/powerpc/kvm/book3s_hv_tm.c b/arch/powerpc/kvm/book3s_hv_tm.c index 0db937497169..cc90b8b82329 100644 --- a/arch/powerpc/kvm/book3s_hv_tm.c +++ b/arch/powerpc/kvm/book3s_hv_tm.c @@ -3,6 +3,8 @@ * Copyright 2017 Paul Mackerras, IBM Corp. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include @@ -44,7 +46,18 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) u64 newmsr, bescr; int ra, rs; - switch (instr & 0xfc0007ff) { + /* + * rfid, rfebb, and mtmsrd encode bit 31 = 0 since it's a reserved bit + * in these instructions, so masking bit 31 out doesn't change these + * instructions. For treclaim., tsr., and trechkpt. instructions if bit + * 31 = 0 then they are per ISA invalid forms, however P9 UM, in section + * 4.6.10 Book II Invalid Forms, informs specifically that ignoring bit + * 31 is an acceptable way to handle these invalid forms that have + * bit 31 = 0. Moreover, for emulation purposes both forms (w/ and wo/ + * bit 31 set) can generate a softpatch interrupt. Hence both forms + * are handled below for these instructions so they behave the same way. + */ + switch (instr & PO_XOP_OPCODE_MASK) { case PPC_INST_RFID: /* XXX do we need to check for PR=0 here? */ newmsr = vcpu->arch.shregs.srr1; @@ -105,7 +118,8 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) vcpu->arch.shregs.msr = newmsr; return RESUME_GUEST; - case PPC_INST_TSR: + /* ignore bit 31, see comment above */ + case (PPC_INST_TSR & PO_XOP_OPCODE_MASK): /* check for PR=1 and arch 2.06 bit set in PCR */ if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206)) { /* generate an illegal instruction interrupt */ @@ -140,7 +154,8 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) vcpu->arch.shregs.msr = msr; return RESUME_GUEST; - case PPC_INST_TRECLAIM: + /* ignore bit 31, see comment above */ + case (PPC_INST_TRECLAIM & PO_XOP_OPCODE_MASK): /* check for TM disabled in the HFSCR or MSR */ if (!(vcpu->arch.hfscr & HFSCR_TM)) { /* generate an illegal instruction interrupt */ @@ -176,7 +191,8 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) vcpu->arch.shregs.msr &= ~MSR_TS_MASK; return RESUME_GUEST; - case PPC_INST_TRECHKPT: + /* ignore bit 31, see comment above */ + case (PPC_INST_TRECHKPT & PO_XOP_OPCODE_MASK): /* XXX do we need to check for PR=0 here? */ /* check for TM disabled in the HFSCR or MSR */ if (!(vcpu->arch.hfscr & HFSCR_TM)) { @@ -208,6 +224,8 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) } /* What should we do here? We didn't recognize the instruction */ - WARN_ON_ONCE(1); + kvmppc_core_queue_program(vcpu, SRR1_PROGILL); + pr_warn_ratelimited("Unrecognized TM-related instruction %#x for emulation", instr); + return RESUME_GUEST; } diff --git a/arch/powerpc/kvm/book3s_hv_tm_builtin.c b/arch/powerpc/kvm/book3s_hv_tm_builtin.c index 217246279dfa..fad931f224ef 100644 --- a/arch/powerpc/kvm/book3s_hv_tm_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_tm_builtin.c @@ -23,7 +23,18 @@ int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu) u64 newmsr, msr, bescr; int rs; - switch (instr & 0xfc0007ff) { + /* + * rfid, rfebb, and mtmsrd encode bit 31 = 0 since it's a reserved bit + * in these instructions, so masking bit 31 out doesn't change these + * instructions. For the tsr. instruction if bit 31 = 0 then it is per + * ISA an invalid form, however P9 UM, in section 4.6.10 Book II Invalid + * Forms, informs specifically that ignoring bit 31 is an acceptable way + * to handle TM-related invalid forms that have bit 31 = 0. Moreover, + * for emulation purposes both forms (w/ and wo/ bit 31 set) can + * generate a softpatch interrupt. Hence both forms are handled below + * for tsr. to make them behave the same way. + */ + switch (instr & PO_XOP_OPCODE_MASK) { case PPC_INST_RFID: /* XXX do we need to check for PR=0 here? */ newmsr = vcpu->arch.shregs.srr1; @@ -73,7 +84,8 @@ int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu) vcpu->arch.shregs.msr = newmsr; return 1; - case PPC_INST_TSR: + /* ignore bit 31, see comment above */ + case (PPC_INST_TSR & PO_XOP_OPCODE_MASK): /* we know the MSR has the TS field = S (0b01) here */ msr = vcpu->arch.shregs.msr; /* check for PR=1 and arch 2.06 bit set in PCR */ -- cgit v1.2.3 From 1f50cc1705350a4697923203fedd7d8fb1087fe2 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Tue, 10 Mar 2020 16:11:28 -0500 Subject: KVM: PPC: Book3S HV: Fix H_CEDE return code for nested guests The h_cede_tm kvm-unit-test currently fails when run inside an L1 guest via the guest/nested hypervisor. ./run-tests.sh -v ... TESTNAME=h_cede_tm TIMEOUT=90s ACCEL= ./powerpc/run powerpc/tm.elf -smp 2,threads=2 -machine cap-htm=on -append "h_cede_tm" FAIL h_cede_tm (2 tests, 1 unexpected failures) While the test relates to transactional memory instructions, the actual failure is due to the return code of the H_CEDE hypercall, which is reported as 224 instead of 0. This happens even when no TM instructions are issued. 224 is the value placed in r3 to execute a hypercall for H_CEDE, and r3 is where the caller expects the return code to be placed upon return. In the case of guest running under a nested hypervisor, issuing H_CEDE causes a return from H_ENTER_NESTED. In this case H_CEDE is specially-handled immediately rather than later in kvmppc_pseries_do_hcall() as with most other hcalls, but we forget to set the return code for the caller, hence why kvm-unit-test sees the 224 return code and reports an error. Guest kernels generally don't check the return value of H_CEDE, so that likely explains why this hasn't caused issues outside of kvm-unit-tests so far. Fix this by setting r3 to 0 after we finish processing the H_CEDE. RHBZ: 1778556 Fixes: 4bad77799fed ("KVM: PPC: Book3S HV: Handle hypercalls correctly when nested") Cc: linuxppc-dev@ozlabs.org Cc: David Gibson Cc: Paul Mackerras Signed-off-by: Michael Roth Reviewed-by: David Gibson Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index fbc55a12b691..18675bd48e90 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -3615,6 +3615,7 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested && kvmppc_get_gpr(vcpu, 3) == H_CEDE) { kvmppc_nested_cede(vcpu); + kvmppc_set_gpr(vcpu, 3, 0); trap = 0; } } else { -- cgit v1.2.3 From 8fc6ba0a205e9adfaf976077be976f1d4dcd45eb Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 10 Mar 2020 21:51:30 -0700 Subject: KVM: PPC: Use fallthrough; Convert the various uses of fallthrough comments to fallthrough; Done via script Link: https://lore.kernel.org/lkml/b56602fcf79f849e733e7b521bb0e17895d390fa.1582230379.git.joe.com/ Signed-off-by: Joe Perches Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_32_mmu.c | 2 +- arch/powerpc/kvm/book3s_64_mmu.c | 2 +- arch/powerpc/kvm/book3s_pr.c | 2 +- arch/powerpc/kvm/booke.c | 6 +++--- arch/powerpc/kvm/powerpc.c | 1 - 5 files changed, 6 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c index f21e73492ce3..3fbd570f9c1e 100644 --- a/arch/powerpc/kvm/book3s_32_mmu.c +++ b/arch/powerpc/kvm/book3s_32_mmu.c @@ -234,7 +234,7 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr, case 2: case 6: pte->may_write = true; - /* fall through */ + fallthrough; case 3: case 5: case 7: diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c index 599133256a95..26b8b27a3755 100644 --- a/arch/powerpc/kvm/book3s_64_mmu.c +++ b/arch/powerpc/kvm/book3s_64_mmu.c @@ -311,7 +311,7 @@ do_second: case 2: case 6: gpte->may_write = true; - /* fall through */ + fallthrough; case 3: case 5: case 7: diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 3bc2f5da8fa1..1d9c554746a9 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -740,7 +740,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) && ((pte.raddr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)) pte.raddr &= ~SPLIT_HACK_MASK; - /* fall through */ + fallthrough; case MSR_IR: vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid); diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index c9f4b374dc56..10e2d76d8546 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -421,11 +421,11 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, case BOOKE_IRQPRIO_DATA_STORAGE: case BOOKE_IRQPRIO_ALIGNMENT: update_dear = true; - /* fall through */ + fallthrough; case BOOKE_IRQPRIO_INST_STORAGE: case BOOKE_IRQPRIO_PROGRAM: update_esr = true; - /* fall through */ + fallthrough; case BOOKE_IRQPRIO_ITLB_MISS: case BOOKE_IRQPRIO_SYSCALL: case BOOKE_IRQPRIO_FP_UNAVAIL: @@ -459,7 +459,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, case BOOKE_IRQPRIO_DECREMENTER: case BOOKE_IRQPRIO_FIT: keep_irq = true; - /* fall through */ + fallthrough; case BOOKE_IRQPRIO_EXTERNAL: case BOOKE_IRQPRIO_DBELL: allowed = vcpu->arch.shared->msr & MSR_EE; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 62ee66d5eb6f..6729c13161f7 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -524,7 +524,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = 1; break; case KVM_CAP_PPC_GUEST_DEBUG_SSTEP: - /* fall through */ case KVM_CAP_PPC_PAIRED_SINGLES: case KVM_CAP_PPC_OSI: case KVM_CAP_PPC_GET_PVINFO: -- cgit v1.2.3 From b2fa4f9088db497fb1b4c6b71e64045b3597ed1f Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Wed, 18 Mar 2020 18:43:30 +0100 Subject: KVM: PPC: Book3S PR: Fix kernel crash with PR KVM With PR KVM, shutting down a VM causes the host kernel to crash: [ 314.219284] BUG: Unable to handle kernel data access on read at 0xc00800000176c638 [ 314.219299] Faulting instruction address: 0xc008000000d4ddb0 cpu 0x0: Vector: 300 (Data Access) at [c00000036da077a0] pc: c008000000d4ddb0: kvmppc_mmu_pte_flush_all+0x68/0xd0 [kvm_pr] lr: c008000000d4dd94: kvmppc_mmu_pte_flush_all+0x4c/0xd0 [kvm_pr] sp: c00000036da07a30 msr: 900000010280b033 dar: c00800000176c638 dsisr: 40000000 current = 0xc00000036d4c0000 paca = 0xc000000001a00000 irqmask: 0x03 irq_happened: 0x01 pid = 1992, comm = qemu-system-ppc Linux version 5.6.0-master-gku+ (greg@palmb) (gcc version 7.5.0 (Ubuntu 7.5.0-3ubuntu1~18.04)) #17 SMP Wed Mar 18 13:49:29 CET 2020 enter ? for help [c00000036da07ab0] c008000000d4fbe0 kvmppc_mmu_destroy_pr+0x28/0x60 [kvm_pr] [c00000036da07ae0] c0080000009eab8c kvmppc_mmu_destroy+0x34/0x50 [kvm] [c00000036da07b00] c0080000009e50c0 kvm_arch_vcpu_destroy+0x108/0x140 [kvm] [c00000036da07b30] c0080000009d1b50 kvm_vcpu_destroy+0x28/0x80 [kvm] [c00000036da07b60] c0080000009e4434 kvm_arch_destroy_vm+0xbc/0x190 [kvm] [c00000036da07ba0] c0080000009d9c2c kvm_put_kvm+0x1d4/0x3f0 [kvm] [c00000036da07c00] c0080000009da760 kvm_vm_release+0x38/0x60 [kvm] [c00000036da07c30] c000000000420be0 __fput+0xe0/0x310 [c00000036da07c90] c0000000001747a0 task_work_run+0x150/0x1c0 [c00000036da07cf0] c00000000014896c do_exit+0x44c/0xd00 [c00000036da07dc0] c0000000001492f4 do_group_exit+0x64/0xd0 [c00000036da07e00] c000000000149384 sys_exit_group+0x24/0x30 [c00000036da07e20] c00000000000b9d0 system_call+0x5c/0x68 This is caused by a use-after-free in kvmppc_mmu_pte_flush_all() which dereferences vcpu->arch.book3s which was previously freed by kvmppc_core_vcpu_free_pr(). This happens because kvmppc_mmu_destroy() is called after kvmppc_core_vcpu_free() since commit ff030fdf5573 ("KVM: PPC: Move kvm_vcpu_init() invocation to common code"). The kvmppc_mmu_destroy() helper calls one of the following depending on the KVM backend: - kvmppc_mmu_destroy_hv() which does nothing (Book3s HV) - kvmppc_mmu_destroy_pr() which undoes the effects of kvmppc_mmu_init() (Book3s PR 32-bit) - kvmppc_mmu_destroy_pr() which undoes the effects of kvmppc_mmu_init() (Book3s PR 64-bit) - kvmppc_mmu_destroy_e500() which does nothing (BookE e500/e500mc) It turns out that this is only relevant to PR KVM actually. And both 32 and 64 backends need vcpu->arch.book3s to be valid when calling kvmppc_mmu_destroy_pr(). So instead of calling kvmppc_mmu_destroy() from kvm_arch_vcpu_destroy(), call kvmppc_mmu_destroy_pr() at the beginning of kvmppc_core_vcpu_free_pr(). This is consistent with kvmppc_mmu_init() being the last call in kvmppc_core_vcpu_create_pr(). For the same reason, if kvmppc_core_vcpu_create_pr() returns an error then this means that kvmppc_mmu_init() was either not called or failed, in which case kvmppc_mmu_destroy() should not be called. Drop the line in the error path of kvm_arch_vcpu_create(). Fixes: ff030fdf5573 ("KVM: PPC: Move kvm_vcpu_init() invocation to common code") Signed-off-by: Greg Kurz Reviewed-by: Sean Christopherson Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_pr.c | 1 + arch/powerpc/kvm/powerpc.c | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 1d9c554746a9..9b112bd243d9 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1817,6 +1817,7 @@ static void kvmppc_core_vcpu_free_pr(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); + kvmppc_mmu_destroy_pr(vcpu); free_page((unsigned long)vcpu->arch.shared & PAGE_MASK); #ifdef CONFIG_KVM_BOOK3S_32_HANDLER kfree(vcpu->arch.shadow_vcpu); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 6729c13161f7..e229a81016d0 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -750,7 +750,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) return 0; out_vcpu_uninit: - kvmppc_mmu_destroy(vcpu); kvmppc_subarch_vcpu_uninit(vcpu); return err; } @@ -783,7 +782,6 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) kvmppc_core_vcpu_free(vcpu); - kvmppc_mmu_destroy(vcpu); kvmppc_subarch_vcpu_uninit(vcpu); } -- cgit v1.2.3 From 3f1268dda8e47f808f4f50f24715b84d4b228bf3 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Wed, 18 Mar 2020 18:43:36 +0100 Subject: KVM: PPC: Book3S PR: Move kvmppc_mmu_init() into PR KVM This is only relevant to PR KVM. Make it obvious by moving the function declaration to the Book3s header and rename it with a _pr suffix. Signed-off-by: Greg Kurz Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_ppc.h | 1 - arch/powerpc/kvm/book3s.h | 1 + arch/powerpc/kvm/book3s_32_mmu_host.c | 2 +- arch/powerpc/kvm/book3s_64_mmu_host.c | 2 +- arch/powerpc/kvm/book3s_pr.c | 2 +- 5 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 406ec46304d5..0b80e3420fef 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -108,7 +108,6 @@ extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode); extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid); extern void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu); -extern int kvmppc_mmu_init(struct kvm_vcpu *vcpu); extern int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr); extern int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr); extern gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index, diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h index 3a4613985949..eae259ee49af 100644 --- a/arch/powerpc/kvm/book3s.h +++ b/arch/powerpc/kvm/book3s.h @@ -16,6 +16,7 @@ extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, extern int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva); extern void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte); +extern int kvmppc_mmu_init_pr(struct kvm_vcpu *vcpu); extern void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu); extern int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int inst, int *advance); diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c index d4cb3bcf41b6..e8e7b2c530d1 100644 --- a/arch/powerpc/kvm/book3s_32_mmu_host.c +++ b/arch/powerpc/kvm/book3s_32_mmu_host.c @@ -356,7 +356,7 @@ void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu) /* From mm/mmu_context_hash32.c */ #define CTX_TO_VSID(c, id) ((((c) * (897 * 16)) + (id * 0x111)) & 0xffffff) -int kvmppc_mmu_init(struct kvm_vcpu *vcpu) +int kvmppc_mmu_init_pr(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); int err; diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index 044dd49eeb9d..e452158a18d7 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c @@ -384,7 +384,7 @@ void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu) __destroy_context(to_book3s(vcpu)->context_id[0]); } -int kvmppc_mmu_init(struct kvm_vcpu *vcpu) +int kvmppc_mmu_init_pr(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); int err; diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 9b112bd243d9..ec042e0327b9 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1795,7 +1795,7 @@ static int kvmppc_core_vcpu_create_pr(struct kvm_vcpu *vcpu) vcpu->arch.shadow_msr = MSR_USER64 & ~MSR_LE; - err = kvmppc_mmu_init(vcpu); + err = kvmppc_mmu_init_pr(vcpu); if (err < 0) goto free_shared_page; -- cgit v1.2.3 From 6fef0c6bbe4987fc94c14f52782b224ddaf3530b Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Wed, 18 Mar 2020 18:43:42 +0100 Subject: KVM: PPC: Kill kvmppc_ops::mmu_destroy() and kvmppc_mmu_destroy() These are only used by HV KVM and BookE, and in both cases they are nops. Signed-off-by: Greg Kurz Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_ppc.h | 2 -- arch/powerpc/kvm/book3s.c | 5 ----- arch/powerpc/kvm/book3s_hv.c | 6 ------ arch/powerpc/kvm/book3s_pr.c | 1 - arch/powerpc/kvm/booke.c | 5 ----- arch/powerpc/kvm/booke.h | 2 -- arch/powerpc/kvm/e500.c | 1 - arch/powerpc/kvm/e500_mmu.c | 4 ---- arch/powerpc/kvm/e500mc.c | 1 - 9 files changed, 27 deletions(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 0b80e3420fef..e716862d56b9 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -107,7 +107,6 @@ extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, unsigned int gtlb_idx); extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode); extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid); -extern void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu); extern int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr); extern int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr); extern gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index, @@ -288,7 +287,6 @@ struct kvmppc_ops { int (*age_hva)(struct kvm *kvm, unsigned long start, unsigned long end); int (*test_age_hva)(struct kvm *kvm, unsigned long hva); void (*set_spte_hva)(struct kvm *kvm, unsigned long hva, pte_t pte); - void (*mmu_destroy)(struct kvm_vcpu *vcpu); void (*free_memslot)(struct kvm_memory_slot *slot); int (*init_vm)(struct kvm *kvm); void (*destroy_vm)(struct kvm *kvm); diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 0adaf4791a6d..5690a1f9b976 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -858,11 +858,6 @@ int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) return 0; } -void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) -{ - vcpu->kvm->arch.kvm_ops->mmu_destroy(vcpu); -} - int kvmppc_core_init_vm(struct kvm *kvm) { diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 18675bd48e90..85e75b12e513 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -4555,11 +4555,6 @@ void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask) } } -static void kvmppc_mmu_destroy_hv(struct kvm_vcpu *vcpu) -{ - return; -} - void kvmppc_setup_partition_table(struct kvm *kvm) { unsigned long dw0, dw1; @@ -5523,7 +5518,6 @@ static struct kvmppc_ops kvm_ops_hv = { .age_hva = kvm_age_hva_hv, .test_age_hva = kvm_test_age_hva_hv, .set_spte_hva = kvm_set_spte_hva_hv, - .mmu_destroy = kvmppc_mmu_destroy_hv, .free_memslot = kvmppc_core_free_memslot_hv, .init_vm = kvmppc_core_init_vm_hv, .destroy_vm = kvmppc_core_destroy_vm_hv, diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index ec042e0327b9..a0f6813f4560 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -2087,7 +2087,6 @@ static struct kvmppc_ops kvm_ops_pr = { .age_hva = kvm_age_hva_pr, .test_age_hva = kvm_test_age_hva_pr, .set_spte_hva = kvm_set_spte_hva_pr, - .mmu_destroy = kvmppc_mmu_destroy_pr, .free_memslot = kvmppc_core_free_memslot_pr, .init_vm = kvmppc_core_init_vm_pr, .destroy_vm = kvmppc_core_destroy_vm_pr, diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 10e2d76d8546..6c18ea88fd25 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -2073,11 +2073,6 @@ void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu) kvmppc_clear_dbsr(); } -void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) -{ - vcpu->kvm->arch.kvm_ops->mmu_destroy(vcpu); -} - int kvmppc_core_init_vm(struct kvm *kvm) { return kvm->arch.kvm_ops->init_vm(kvm); diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h index 9d3169fbce55..65b4d337d337 100644 --- a/arch/powerpc/kvm/booke.h +++ b/arch/powerpc/kvm/booke.h @@ -94,7 +94,6 @@ enum int_class { void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type); -extern void kvmppc_mmu_destroy_e500(struct kvm_vcpu *vcpu); extern int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int inst, int *advance); @@ -102,7 +101,6 @@ extern int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong spr_val); extern int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val); -extern void kvmppc_mmu_destroy_e500(struct kvm_vcpu *vcpu); extern int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int inst, int *advance); diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index f2b4feaff6d2..7e8b69015d20 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -490,7 +490,6 @@ static struct kvmppc_ops kvm_ops_e500 = { .vcpu_put = kvmppc_core_vcpu_put_e500, .vcpu_create = kvmppc_core_vcpu_create_e500, .vcpu_free = kvmppc_core_vcpu_free_e500, - .mmu_destroy = kvmppc_mmu_destroy_e500, .init_vm = kvmppc_core_init_vm_e500, .destroy_vm = kvmppc_core_destroy_vm_e500, .emulate_op = kvmppc_core_emulate_op_e500, diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c index 2d910b87e441..e131fbecdcc4 100644 --- a/arch/powerpc/kvm/e500_mmu.c +++ b/arch/powerpc/kvm/e500_mmu.c @@ -533,10 +533,6 @@ gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int index, return get_tlb_raddr(gtlbe) | (eaddr & pgmask); } -void kvmppc_mmu_destroy_e500(struct kvm_vcpu *vcpu) -{ -} - /*****************************************/ static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500) diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index e6b06cb2b92c..1c189b5aadcc 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c @@ -376,7 +376,6 @@ static struct kvmppc_ops kvm_ops_e500mc = { .vcpu_put = kvmppc_core_vcpu_put_e500mc, .vcpu_create = kvmppc_core_vcpu_create_e500mc, .vcpu_free = kvmppc_core_vcpu_free_e500mc, - .mmu_destroy = kvmppc_mmu_destroy_e500, .init_vm = kvmppc_core_init_vm_e500mc, .destroy_vm = kvmppc_core_destroy_vm_e500mc, .emulate_op = kvmppc_core_emulate_op_e500, -- cgit v1.2.3 From 1d0c32ec3b860a32df593a22bad0d1dbc5546a59 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Wed, 18 Mar 2020 18:43:30 +0100 Subject: KVM: PPC: Fix kernel crash with PR KVM With PR KVM, shutting down a VM causes the host kernel to crash: [ 314.219284] BUG: Unable to handle kernel data access on read at 0xc00800000176c638 [ 314.219299] Faulting instruction address: 0xc008000000d4ddb0 cpu 0x0: Vector: 300 (Data Access) at [c00000036da077a0] pc: c008000000d4ddb0: kvmppc_mmu_pte_flush_all+0x68/0xd0 [kvm_pr] lr: c008000000d4dd94: kvmppc_mmu_pte_flush_all+0x4c/0xd0 [kvm_pr] sp: c00000036da07a30 msr: 900000010280b033 dar: c00800000176c638 dsisr: 40000000 current = 0xc00000036d4c0000 paca = 0xc000000001a00000 irqmask: 0x03 irq_happened: 0x01 pid = 1992, comm = qemu-system-ppc Linux version 5.6.0-master-gku+ (greg@palmb) (gcc version 7.5.0 (Ubuntu 7.5.0-3ubuntu1~18.04)) #17 SMP Wed Mar 18 13:49:29 CET 2020 enter ? for help [c00000036da07ab0] c008000000d4fbe0 kvmppc_mmu_destroy_pr+0x28/0x60 [kvm_pr] [c00000036da07ae0] c0080000009eab8c kvmppc_mmu_destroy+0x34/0x50 [kvm] [c00000036da07b00] c0080000009e50c0 kvm_arch_vcpu_destroy+0x108/0x140 [kvm] [c00000036da07b30] c0080000009d1b50 kvm_vcpu_destroy+0x28/0x80 [kvm] [c00000036da07b60] c0080000009e4434 kvm_arch_destroy_vm+0xbc/0x190 [kvm] [c00000036da07ba0] c0080000009d9c2c kvm_put_kvm+0x1d4/0x3f0 [kvm] [c00000036da07c00] c0080000009da760 kvm_vm_release+0x38/0x60 [kvm] [c00000036da07c30] c000000000420be0 __fput+0xe0/0x310 [c00000036da07c90] c0000000001747a0 task_work_run+0x150/0x1c0 [c00000036da07cf0] c00000000014896c do_exit+0x44c/0xd00 [c00000036da07dc0] c0000000001492f4 do_group_exit+0x64/0xd0 [c00000036da07e00] c000000000149384 sys_exit_group+0x24/0x30 [c00000036da07e20] c00000000000b9d0 system_call+0x5c/0x68 This is caused by a use-after-free in kvmppc_mmu_pte_flush_all() which dereferences vcpu->arch.book3s which was previously freed by kvmppc_core_vcpu_free_pr(). This happens because kvmppc_mmu_destroy() is called after kvmppc_core_vcpu_free() since commit ff030fdf5573 ("KVM: PPC: Move kvm_vcpu_init() invocation to common code"). The kvmppc_mmu_destroy() helper calls one of the following depending on the KVM backend: - kvmppc_mmu_destroy_hv() which does nothing (Book3s HV) - kvmppc_mmu_destroy_pr() which undoes the effects of kvmppc_mmu_init() (Book3s PR 32-bit) - kvmppc_mmu_destroy_pr() which undoes the effects of kvmppc_mmu_init() (Book3s PR 64-bit) - kvmppc_mmu_destroy_e500() which does nothing (BookE e500/e500mc) It turns out that this is only relevant to PR KVM actually. And both 32 and 64 backends need vcpu->arch.book3s to be valid when calling kvmppc_mmu_destroy_pr(). So instead of calling kvmppc_mmu_destroy() from kvm_arch_vcpu_destroy(), call kvmppc_mmu_destroy_pr() at the beginning of kvmppc_core_vcpu_free_pr(). This is consistent with kvmppc_mmu_init() being the last call in kvmppc_core_vcpu_create_pr(). For the same reason, if kvmppc_core_vcpu_create_pr() returns an error then this means that kvmppc_mmu_init() was either not called or failed, in which case kvmppc_mmu_destroy() should not be called. Drop the line in the error path of kvm_arch_vcpu_create(). Fixes: ff030fdf5573 ("KVM: PPC: Move kvm_vcpu_init() invocation to common code") Signed-off-by: Greg Kurz Reviewed-by: Sean Christopherson Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/158455341029.178873.15248663726399374882.stgit@bahia.lan --- arch/powerpc/kvm/book3s_pr.c | 1 + arch/powerpc/kvm/powerpc.c | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 729a0f12a752..db3a87319642 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1817,6 +1817,7 @@ static void kvmppc_core_vcpu_free_pr(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); + kvmppc_mmu_destroy_pr(vcpu); free_page((unsigned long)vcpu->arch.shared & PAGE_MASK); #ifdef CONFIG_KVM_BOOK3S_32_HANDLER kfree(vcpu->arch.shadow_vcpu); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 1af96fb5dc6f..302e9dccdd6d 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -759,7 +759,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) return 0; out_vcpu_uninit: - kvmppc_mmu_destroy(vcpu); kvmppc_subarch_vcpu_uninit(vcpu); return err; } @@ -792,7 +791,6 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) kvmppc_core_vcpu_free(vcpu); - kvmppc_mmu_destroy(vcpu); kvmppc_subarch_vcpu_uninit(vcpu); } -- cgit v1.2.3 From 9bee484b280a059c1faa10ae174af4f4af02c805 Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Thu, 19 Mar 2020 19:55:10 -0300 Subject: KVM: PPC: Book3S HV: Skip kvmppc_uvmem_free if Ultravisor is not supported kvmppc_uvmem_init checks for Ultravisor support and returns early if it is not present. Calling kvmppc_uvmem_free at module exit will cause an Oops: $ modprobe -r kvm-hv Oops: Kernel access of bad area, sig: 11 [#1] NIP: c000000000789e90 LR: c000000000789e8c CTR: c000000000401030 REGS: c000003fa7bab9a0 TRAP: 0300 Not tainted (5.6.0-rc6-00033-g6c90b86a745a-dirty) MSR: 9000000000009033 CR: 24002282 XER: 00000000 CFAR: c000000000dae880 DAR: 0000000000000008 DSISR: 40000000 IRQMASK: 1 GPR00: c000000000789e8c c000003fa7babc30 c0000000016fe500 0000000000000000 GPR04: 0000000000000000 0000000000000006 0000000000000000 c000003faf205c00 GPR08: 0000000000000000 0000000000000001 000000008000002d c00800000ddde140 GPR12: c000000000401030 c000003ffffd9080 0000000000000001 0000000000000000 GPR16: 0000000000000000 0000000000000000 000000013aad0074 000000013aaac978 GPR20: 000000013aad0070 0000000000000000 00007fffd1b37158 0000000000000000 GPR24: 000000014fef0d58 0000000000000000 000000014fef0cf0 0000000000000001 GPR28: 0000000000000000 0000000000000000 c0000000018b2a60 0000000000000000 NIP [c000000000789e90] percpu_ref_kill_and_confirm+0x40/0x170 LR [c000000000789e8c] percpu_ref_kill_and_confirm+0x3c/0x170 Call Trace: [c000003fa7babc30] [c000003faf2064d4] 0xc000003faf2064d4 (unreliable) [c000003fa7babcb0] [c000000000400e8c] dev_pagemap_kill+0x6c/0x80 [c000003fa7babcd0] [c000000000401064] memunmap_pages+0x34/0x2f0 [c000003fa7babd50] [c00800000dddd548] kvmppc_uvmem_free+0x30/0x80 [kvm_hv] [c000003fa7babd80] [c00800000ddcef18] kvmppc_book3s_exit_hv+0x20/0x78 [kvm_hv] [c000003fa7babda0] [c0000000002084d0] sys_delete_module+0x1d0/0x2c0 [c000003fa7babe20] [c00000000000b9d0] system_call+0x5c/0x68 Instruction dump: 3fc2001b fb81ffe0 fba1ffe8 fbe1fff8 7c7f1b78 7c9c2378 3bde4560 7fc3f378 f8010010 f821ff81 486249a1 60000000 7c7d1b78 712a0002 40820084 ---[ end trace 5774ef4dc2c98279 ]--- So this patch checks if kvmppc_uvmem_init actually allocated anything before running kvmppc_uvmem_free. Fixes: ca9f4942670c ("KVM: PPC: Book3S HV: Support for running secure guests") Cc: stable@vger.kernel.org # v5.5+ Reported-by: Greg Kurz Signed-off-by: Fabiano Rosas Tested-by: Greg Kurz Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv_uvmem.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index 79b1202b1c62..9d26614b2a77 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -806,6 +806,9 @@ out: void kvmppc_uvmem_free(void) { + if (!kvmppc_uvmem_bitmap) + return; + memunmap_pages(&kvmppc_uvmem_pgmap); release_mem_region(kvmppc_uvmem_pgmap.res.start, resource_size(&kvmppc_uvmem_pgmap.res)); -- cgit v1.2.3 From 8c47b6ff29e3d88484fe59d02f9db6de7e44e310 Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Fri, 20 Mar 2020 11:26:42 +0100 Subject: KVM: PPC: Book3S HV: Check caller of H_SVM_* Hcalls The Hcall named H_SVM_* are reserved to the Ultravisor. However, nothing prevent a malicious VM or SVM to call them. This could lead to weird result and should be filtered out. Checking the Secure bit of the calling MSR ensure that the call is coming from either the Ultravisor or a SVM. But any system call made from a SVM are going through the Ultravisor, and the Ultravisor should filter out these malicious call. This way, only the Ultravisor is able to make such a Hcall. Cc: Bharata B Rao Cc: Benjamin Herrenschmidt Cc: Michael Ellerman Signed-off-by: Laurent Dufour Reviewed-by: Ram Pai Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 85e75b12e513..a308de610cdf 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1073,25 +1073,35 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) kvmppc_get_gpr(vcpu, 6)); break; case H_SVM_PAGE_IN: - ret = kvmppc_h_svm_page_in(vcpu->kvm, - kvmppc_get_gpr(vcpu, 4), - kvmppc_get_gpr(vcpu, 5), - kvmppc_get_gpr(vcpu, 6)); + ret = H_UNSUPPORTED; + if (kvmppc_get_srr1(vcpu) & MSR_S) + ret = kvmppc_h_svm_page_in(vcpu->kvm, + kvmppc_get_gpr(vcpu, 4), + kvmppc_get_gpr(vcpu, 5), + kvmppc_get_gpr(vcpu, 6)); break; case H_SVM_PAGE_OUT: - ret = kvmppc_h_svm_page_out(vcpu->kvm, - kvmppc_get_gpr(vcpu, 4), - kvmppc_get_gpr(vcpu, 5), - kvmppc_get_gpr(vcpu, 6)); + ret = H_UNSUPPORTED; + if (kvmppc_get_srr1(vcpu) & MSR_S) + ret = kvmppc_h_svm_page_out(vcpu->kvm, + kvmppc_get_gpr(vcpu, 4), + kvmppc_get_gpr(vcpu, 5), + kvmppc_get_gpr(vcpu, 6)); break; case H_SVM_INIT_START: - ret = kvmppc_h_svm_init_start(vcpu->kvm); + ret = H_UNSUPPORTED; + if (kvmppc_get_srr1(vcpu) & MSR_S) + ret = kvmppc_h_svm_init_start(vcpu->kvm); break; case H_SVM_INIT_DONE: - ret = kvmppc_h_svm_init_done(vcpu->kvm); + ret = H_UNSUPPORTED; + if (kvmppc_get_srr1(vcpu) & MSR_S) + ret = kvmppc_h_svm_init_done(vcpu->kvm); break; case H_SVM_INIT_ABORT: - ret = kvmppc_h_svm_init_abort(vcpu->kvm); + ret = H_UNSUPPORTED; + if (kvmppc_get_srr1(vcpu) & MSR_S) + ret = kvmppc_h_svm_init_abort(vcpu->kvm); break; default: -- cgit v1.2.3 From 377f02d487b5f74a2411fa01316ba4aff1819629 Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Fri, 20 Mar 2020 11:26:43 +0100 Subject: KVM: PPC: Book3S HV: H_SVM_INIT_START must call UV_RETURN When the call to UV_REGISTER_MEM_SLOT is failing, for instance because there is not enough free secured memory, the Hypervisor (HV) has to call UV_RETURN to report the error to the Ultravisor (UV). Then the UV will call H_SVM_INIT_ABORT to abort the securing phase and go back to the calling VM. If the kvm->arch.secure_guest is not set, in the return path rfid is called but there is no valid context to get back to the SVM since the Hcall has been routed by the Ultravisor. Move the setting of kvm->arch.secure_guest earlier in kvmppc_h_svm_init_start() so in the return path, UV_RETURN will be called instead of rfid. Cc: Bharata B Rao Cc: Benjamin Herrenschmidt Cc: Michael Ellerman Signed-off-by: Laurent Dufour Reviewed-by: Ram Pai Tested-by: Fabiano Rosas Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv_uvmem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index 9d26614b2a77..53b88cae3e73 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -209,6 +209,8 @@ unsigned long kvmppc_h_svm_init_start(struct kvm *kvm) int ret = H_SUCCESS; int srcu_idx; + kvm->arch.secure_guest = KVMPPC_SECURE_INIT_START; + if (!kvmppc_uvmem_bitmap) return H_UNSUPPORTED; @@ -233,7 +235,6 @@ unsigned long kvmppc_h_svm_init_start(struct kvm *kvm) goto out; } } - kvm->arch.secure_guest |= KVMPPC_SECURE_INIT_START; out: srcu_read_unlock(&kvm->srcu, srcu_idx); return ret; -- cgit v1.2.3 From 9a5788c615f52f6d7bf0b61986a632d4ec86791d Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 19 Mar 2020 15:29:55 +1100 Subject: KVM: PPC: Book3S HV: Add a capability for enabling secure guests At present, on Power systems with Protected Execution Facility hardware and an ultravisor, a KVM guest can transition to being a secure guest at will. Userspace (QEMU) has no way of knowing whether a host system is capable of running secure guests. This will present a problem in future when the ultravisor is capable of migrating secure guests from one host to another, because virtualization management software will have no way to ensure that secure guests only run in domains where all of the hosts can support secure guests. This adds a VM capability which has two functions: (a) userspace can query it to find out whether the host can support secure guests, and (b) userspace can enable it for a guest, which allows that guest to become a secure guest. If userspace does not enable it, KVM will return an error when the ultravisor does the hypercall that indicates that the guest is starting to transition to a secure guest. The ultravisor will then abort the transition and the guest will terminate. Signed-off-by: Paul Mackerras Reviewed-by: David Gibson Reviewed-by: Ram Pai --- Documentation/virt/kvm/api.rst | 17 +++++++++++++++++ arch/powerpc/include/asm/kvm_book3s_uvmem.h | 6 ++++++ arch/powerpc/include/asm/kvm_host.h | 1 + arch/powerpc/include/asm/kvm_ppc.h | 1 + arch/powerpc/kvm/book3s_hv.c | 16 ++++++++++++++++ arch/powerpc/kvm/book3s_hv_uvmem.c | 13 +++++++++++++ arch/powerpc/kvm/powerpc.c | 14 ++++++++++++++ include/uapi/linux/kvm.h | 1 + 8 files changed, 69 insertions(+) (limited to 'arch/powerpc/kvm') diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 158d1186d103..a9255002aa1c 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -5779,6 +5779,23 @@ it hard or impossible to use it correctly. The availability of KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 signals that those bugs are fixed. Userspace should not try to use KVM_CAP_MANUAL_DIRTY_LOG_PROTECT. +7.19 KVM_CAP_PPC_SECURE_GUEST +------------------------------ + +:Architectures: ppc + +This capability indicates that KVM is running on a host that has +ultravisor firmware and thus can support a secure guest. On such a +system, a guest can ask the ultravisor to make it a secure guest, +one whose memory is inaccessible to the host except for pages which +are explicitly requested to be shared with the host. The ultravisor +notifies KVM when a guest requests to become a secure guest, and KVM +has the opportunity to veto the transition. + +If present, this capability can be enabled for a VM, meaning that KVM +will allow the transition to secure guest mode. Otherwise KVM will +veto the transition. + 8. Other capabilities. ====================== diff --git a/arch/powerpc/include/asm/kvm_book3s_uvmem.h b/arch/powerpc/include/asm/kvm_book3s_uvmem.h index 5a9834e0e2d1..9cb7d8be2366 100644 --- a/arch/powerpc/include/asm/kvm_book3s_uvmem.h +++ b/arch/powerpc/include/asm/kvm_book3s_uvmem.h @@ -5,6 +5,7 @@ #ifdef CONFIG_PPC_UV int kvmppc_uvmem_init(void); void kvmppc_uvmem_free(void); +bool kvmppc_uvmem_available(void); int kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot); void kvmppc_uvmem_slot_free(struct kvm *kvm, const struct kvm_memory_slot *slot); @@ -30,6 +31,11 @@ static inline int kvmppc_uvmem_init(void) static inline void kvmppc_uvmem_free(void) { } +static inline bool kvmppc_uvmem_available(void) +{ + return false; +} + static inline int kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot) { diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 6e8b8ffd06ad..f99b4333dfba 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -303,6 +303,7 @@ struct kvm_arch { u8 radix; u8 fwnmi_enabled; u8 secure_guest; + u8 svm_enabled; bool threads_indep; bool nested_enable; pgd_t *pgtable; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index e716862d56b9..94f5a32acaf1 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -313,6 +313,7 @@ struct kvmppc_ops { int size); int (*store_to_eaddr)(struct kvm_vcpu *vcpu, ulong *eaddr, void *ptr, int size); + int (*enable_svm)(struct kvm *kvm); int (*svm_off)(struct kvm *kvm); }; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index a308de610cdf..fa6e4fc7d0e4 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -5428,6 +5428,21 @@ static void unpin_vpa_reset(struct kvm *kvm, struct kvmppc_vpa *vpa) vpa->update_pending = 0; } +/* + * Enable a guest to become a secure VM, or test whether + * that could be enabled. + * Called when the KVM_CAP_PPC_SECURE_GUEST capability is + * tested (kvm == NULL) or enabled (kvm != NULL). + */ +static int kvmhv_enable_svm(struct kvm *kvm) +{ + if (!kvmppc_uvmem_available()) + return -EINVAL; + if (kvm) + kvm->arch.svm_enabled = 1; + return 0; +} + /* * IOCTL handler to turn off secure mode of guest * @@ -5548,6 +5563,7 @@ static struct kvmppc_ops kvm_ops_hv = { .enable_nested = kvmhv_enable_nested, .load_from_eaddr = kvmhv_load_from_eaddr, .store_to_eaddr = kvmhv_store_to_eaddr, + .enable_svm = kvmhv_enable_svm, .svm_off = kvmhv_svm_off, }; diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index 53b88cae3e73..6ed98e70097d 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -113,6 +113,15 @@ struct kvmppc_uvmem_page_pvt { bool skip_page_out; }; +bool kvmppc_uvmem_available(void) +{ + /* + * If kvmppc_uvmem_bitmap != NULL, then there is an ultravisor + * and our data structures have been initialized successfully. + */ + return !!kvmppc_uvmem_bitmap; +} + int kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot) { struct kvmppc_uvmem_slot *p; @@ -218,6 +227,10 @@ unsigned long kvmppc_h_svm_init_start(struct kvm *kvm) if (!kvm_is_radix(kvm)) return H_UNSUPPORTED; + /* NAK the transition to secure if not enabled */ + if (!kvm->arch.svm_enabled) + return H_AUTHORITY; + srcu_idx = srcu_read_lock(&kvm->srcu); slots = kvm_memslots(kvm); kvm_for_each_memslot(memslot, slots) { diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index e229a81016d0..c48862d86adc 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -668,6 +668,12 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = !!(cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_HTM) || (hv_enabled && cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)); break; +#endif +#if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) + case KVM_CAP_PPC_SECURE_GUEST: + r = hv_enabled && kvmppc_hv_ops->enable_svm && + !kvmppc_hv_ops->enable_svm(NULL); + break; #endif default: r = 0; @@ -2166,6 +2172,14 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, break; r = kvm->arch.kvm_ops->enable_nested(kvm); break; +#endif +#if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) + case KVM_CAP_PPC_SECURE_GUEST: + r = -EINVAL; + if (!is_kvmppc_hv_enabled(kvm) || !kvm->arch.kvm_ops->enable_svm) + break; + r = kvm->arch.kvm_ops->enable_svm(kvm); + break; #endif default: r = -EINVAL; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 5e6234cb25a6..428c7dde6b4b 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1016,6 +1016,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ARM_INJECT_EXT_DABT 178 #define KVM_CAP_S390_VCPU_RESETS 179 #define KVM_CAP_S390_PROTECTED 180 +#define KVM_CAP_PPC_SECURE_GUEST 181 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From f894ddd5ff01d3bb48faf4dc533536bf5269c17a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 16 Mar 2020 20:32:13 +0100 Subject: memremap: add an owner field to struct dev_pagemap Add a new opaque owner field to struct dev_pagemap, which will allow the hmm and migrate_vma code to identify who owns ZONE_DEVICE memory, and refuse to work on mappings not owned by the calling entity. Link: https://lore.kernel.org/r/20200316193216.920734-2-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Ralph Campbell Tested-by: Bharata B Rao Signed-off-by: Jason Gunthorpe --- arch/powerpc/kvm/book3s_hv_uvmem.c | 2 ++ drivers/gpu/drm/nouveau/nouveau_dmem.c | 1 + include/linux/memremap.h | 4 ++++ mm/memremap.c | 4 ++++ 4 files changed, 11 insertions(+) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index 79b1202b1c62..67fefb03b9b7 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -779,6 +779,8 @@ int kvmppc_uvmem_init(void) kvmppc_uvmem_pgmap.type = MEMORY_DEVICE_PRIVATE; kvmppc_uvmem_pgmap.res = *res; kvmppc_uvmem_pgmap.ops = &kvmppc_uvmem_ops; + /* just one global instance: */ + kvmppc_uvmem_pgmap.owner = &kvmppc_uvmem_pgmap; addr = memremap_pages(&kvmppc_uvmem_pgmap, NUMA_NO_NODE); if (IS_ERR(addr)) { ret = PTR_ERR(addr); diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index 0ad5d87b5a8e..a4682272586e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -526,6 +526,7 @@ nouveau_dmem_init(struct nouveau_drm *drm) drm->dmem->pagemap.type = MEMORY_DEVICE_PRIVATE; drm->dmem->pagemap.res = *res; drm->dmem->pagemap.ops = &nouveau_dmem_pagemap_ops; + drm->dmem->pagemap.owner = drm->dev; if (IS_ERR(devm_memremap_pages(device, &drm->dmem->pagemap))) goto out_free; diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 6fefb09af7c3..60d97e8fd3c0 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -103,6 +103,9 @@ struct dev_pagemap_ops { * @type: memory type: see MEMORY_* in memory_hotplug.h * @flags: PGMAP_* flags to specify defailed behavior * @ops: method table + * @owner: an opaque pointer identifying the entity that manages this + * instance. Used by various helpers to make sure that no + * foreign ZONE_DEVICE memory is accessed. */ struct dev_pagemap { struct vmem_altmap altmap; @@ -113,6 +116,7 @@ struct dev_pagemap { enum memory_type type; unsigned int flags; const struct dev_pagemap_ops *ops; + void *owner; }; static inline struct vmem_altmap *pgmap_altmap(struct dev_pagemap *pgmap) diff --git a/mm/memremap.c b/mm/memremap.c index 09b5b7adc773..9b2c97ceb775 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -181,6 +181,10 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) WARN(1, "Missing migrate_to_ram method\n"); return ERR_PTR(-EINVAL); } + if (!pgmap->owner) { + WARN(1, "Missing owner\n"); + return ERR_PTR(-EINVAL); + } break; case MEMORY_DEVICE_FS_DAX: if (!IS_ENABLED(CONFIG_ZONE_DEVICE) || -- cgit v1.2.3 From 800bb1c8dc80bb4121446b56813067f3ea44edee Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 16 Mar 2020 20:32:14 +0100 Subject: mm: handle multiple owners of device private pages in migrate_vma Add a new src_owner field to struct migrate_vma. If the field is set, only device private pages with page->pgmap->owner equal to that field are migrated. If the field is not set only "normal" pages are migrated. Fixes: df6ad69838fc ("mm/device-public-memory: device memory cache coherent with CPU") Link: https://lore.kernel.org/r/20200316193216.920734-3-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Ralph Campbell Tested-by: Bharata B Rao Signed-off-by: Jason Gunthorpe --- arch/powerpc/kvm/book3s_hv_uvmem.c | 1 + drivers/gpu/drm/nouveau/nouveau_dmem.c | 1 + include/linux/migrate.h | 8 ++++++++ mm/migrate.c | 9 ++++++--- 4 files changed, 16 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index 67fefb03b9b7..f44f6b27950f 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -563,6 +563,7 @@ kvmppc_svm_page_out(struct vm_area_struct *vma, unsigned long start, mig.end = end; mig.src = &src_pfn; mig.dst = &dst_pfn; + mig.src_owner = &kvmppc_uvmem_pgmap; mutex_lock(&kvm->arch.uvmem_lock); /* The requested page is already paged-out, nothing to do */ diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index a4682272586e..0e36345d395c 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -176,6 +176,7 @@ static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf) .end = vmf->address + PAGE_SIZE, .src = &src, .dst = &dst, + .src_owner = drm->dev, }; /* diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 72120061b7d4..3e546cbf03dd 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -196,6 +196,14 @@ struct migrate_vma { unsigned long npages; unsigned long start; unsigned long end; + + /* + * Set to the owner value also stored in page->pgmap->owner for + * migrating out of device private memory. If set only device + * private pages with this owner are migrated. If not set + * device private pages are not migrated at all. + */ + void *src_owner; }; int migrate_vma_setup(struct migrate_vma *args); diff --git a/mm/migrate.c b/mm/migrate.c index b1092876e537..7605d2c23433 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2241,7 +2241,7 @@ again: arch_enter_lazy_mmu_mode(); for (; addr < end; addr += PAGE_SIZE, ptep++) { - unsigned long mpfn, pfn; + unsigned long mpfn = 0, pfn; struct page *page; swp_entry_t entry; pte_t pte; @@ -2255,8 +2255,6 @@ again: } if (!pte_present(pte)) { - mpfn = 0; - /* * Only care about unaddressable device page special * page table entry. Other special swap entries are not @@ -2267,11 +2265,16 @@ again: goto next; page = device_private_entry_to_page(entry); + if (page->pgmap->owner != migrate->src_owner) + goto next; + mpfn = migrate_pfn(page_to_pfn(page)) | MIGRATE_PFN_MIGRATE; if (is_write_device_private_entry(entry)) mpfn |= MIGRATE_PFN_WRITE; } else { + if (migrate->src_owner) + goto next; pfn = pte_pfn(pte); if (is_zero_pfn(pfn)) { mpfn = MIGRATE_PFN_MIGRATE; -- cgit v1.2.3 From b990408537388e9174b642ad36cdef6c47c64d3a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Sat, 21 Mar 2020 13:25:55 -0700 Subject: KVM: Pass kvm_init()'s opaque param to additional arch funcs Pass @opaque to kvm_arch_hardware_setup() and kvm_arch_check_processor_compat() to allow architecture specific code to reference @opaque without having to stash it away in a temporary global variable. This will enable x86 to separate its vendor specific callback ops, which are passed via @opaque, into "init" and "runtime" ops without having to stash away the "init" ops. No functional change intended. Reviewed-by: Cornelia Huck Tested-by: Cornelia Huck #s390 Acked-by: Marc Zyngier Signed-off-by: Sean Christopherson Message-Id: <20200321202603.19355-2-sean.j.christopherson@intel.com> Reviewed-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- arch/mips/kvm/mips.c | 4 ++-- arch/powerpc/kvm/powerpc.c | 4 ++-- arch/s390/kvm/kvm-s390.c | 4 ++-- arch/x86/kvm/x86.c | 4 ++-- include/linux/kvm_host.h | 4 ++-- virt/kvm/arm/arm.c | 4 ++-- virt/kvm/kvm_main.c | 18 ++++++++++++++---- 7 files changed, 26 insertions(+), 16 deletions(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 78507757ba9a..8f05dd0a0f4e 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -118,12 +118,12 @@ void kvm_arch_hardware_disable(void) kvm_mips_callbacks->hardware_disable(); } -int kvm_arch_hardware_setup(void) +int kvm_arch_hardware_setup(void *opaque) { return 0; } -int kvm_arch_check_processor_compat(void) +int kvm_arch_check_processor_compat(void *opaque) { return 0; } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index c48862d86adc..e15166b0a16d 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -415,12 +415,12 @@ int kvm_arch_hardware_enable(void) return 0; } -int kvm_arch_hardware_setup(void) +int kvm_arch_hardware_setup(void *opaque) { return 0; } -int kvm_arch_check_processor_compat(void) +int kvm_arch_check_processor_compat(void *opaque) { return kvmppc_core_check_processor_compat(); } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 6b2649b3d4f3..f6268dfb8362 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -235,7 +235,7 @@ int kvm_arch_hardware_enable(void) return 0; } -int kvm_arch_check_processor_compat(void) +int kvm_arch_check_processor_compat(void *opaque) { return 0; } @@ -302,7 +302,7 @@ static struct notifier_block kvm_clock_notifier = { .notifier_call = kvm_clock_sync, }; -int kvm_arch_hardware_setup(void) +int kvm_arch_hardware_setup(void *opaque) { gmap_notifier.notifier_call = kvm_gmap_notifier; gmap_register_pte_notifier(&gmap_notifier); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1b6d9ac9533c..a656fba5bd60 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9626,7 +9626,7 @@ void kvm_arch_hardware_disable(void) drop_user_return_notifiers(); } -int kvm_arch_hardware_setup(void) +int kvm_arch_hardware_setup(void *opaque) { int r; @@ -9667,7 +9667,7 @@ void kvm_arch_hardware_unsetup(void) kvm_x86_ops->hardware_unsetup(); } -int kvm_arch_check_processor_compat(void) +int kvm_arch_check_processor_compat(void *opaque) { struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index f6a1905da9bf..6d58beb65454 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -886,9 +886,9 @@ void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu); int kvm_arch_hardware_enable(void); void kvm_arch_hardware_disable(void); -int kvm_arch_hardware_setup(void); +int kvm_arch_hardware_setup(void *opaque); void kvm_arch_hardware_unsetup(void); -int kvm_arch_check_processor_compat(void); +int kvm_arch_check_processor_compat(void *opaque); int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 376c6a74166d..48d0ec44ad77 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -64,12 +64,12 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; } -int kvm_arch_hardware_setup(void) +int kvm_arch_hardware_setup(void *opaque) { return 0; } -int kvm_arch_check_processor_compat(void) +int kvm_arch_check_processor_compat(void *opaque) { return 0; } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f744bc603c53..74bdb7bf3295 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -4648,14 +4648,22 @@ struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void) return &kvm_running_vcpu; } -static void check_processor_compat(void *rtn) +struct kvm_cpu_compat_check { + void *opaque; + int *ret; +}; + +static void check_processor_compat(void *data) { - *(int *)rtn = kvm_arch_check_processor_compat(); + struct kvm_cpu_compat_check *c = data; + + *c->ret = kvm_arch_check_processor_compat(c->opaque); } int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, struct module *module) { + struct kvm_cpu_compat_check c; int r; int cpu; @@ -4679,12 +4687,14 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, goto out_free_0; } - r = kvm_arch_hardware_setup(); + r = kvm_arch_hardware_setup(opaque); if (r < 0) goto out_free_1; + c.ret = &r; + c.opaque = opaque; for_each_online_cpu(cpu) { - smp_call_function_single(cpu, check_processor_compat, &r, 1); + smp_call_function_single(cpu, check_processor_compat, &c, 1); if (r < 0) goto out_free_2; } -- cgit v1.2.3 From 9600f261acaaabd476d7833cec2dd20f2919f1a0 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 26 Feb 2020 03:35:21 +1000 Subject: powerpc/64s/exception: Move KVM test to common code This allows more code to be moved out of unrelocated regions. The system call KVMTEST is changed to be open-coded and remain in the tramp area to avoid having to move it to entry_64.S. The custom nature of the system call entry code means the hcall case can be made more streamlined than regular interrupt handlers. mpe: Incorporate fix from Nick: Moving KVM test to the common entry code missed the case of HMI and MCE, which do not do __GEN_COMMON_ENTRY (because they don't want to switch to virt mode). This means a MCE or HMI exception that is taken while KVM is running a guest context will not be switched out of that context, and KVM won't be notified. Found by running sigfuz in guest with patched host on POWER9 DD2.3, which causes some TM related HMI interrupts (which are expected and supposed to be handled by KVM). This fix adds a __GEN_REALMODE_COMMON_ENTRY for those handlers to add the KVM test. This makes them look a little more like other handlers that all use __GEN_COMMON_ENTRY. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200225173541.1549955-13-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64s.S | 260 +++++++++++++++++--------------- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 11 -- arch/powerpc/kvm/book3s_segment.S | 7 - 3 files changed, 139 insertions(+), 139 deletions(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index a47f2e5922d8..1bc73acceb9a 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -44,7 +44,6 @@ * EXC_VIRT_BEGIN/END - virt (AIL), unrelocated exception vectors * TRAMP_REAL_BEGIN - real, unrelocated helpers (virt may call these) * TRAMP_VIRT_BEGIN - virt, unreloc helpers (in practice, real can use) - * TRAMP_KVM_BEGIN - KVM handlers, these are put into real, unrelocated * EXC_COMMON - After switching to virtual, relocated mode. */ @@ -74,13 +73,6 @@ name: #define TRAMP_VIRT_BEGIN(name) \ FIXED_SECTION_ENTRY_BEGIN(virt_trampolines, name) -#ifdef CONFIG_KVM_BOOK3S_64_HANDLER -#define TRAMP_KVM_BEGIN(name) \ - TRAMP_VIRT_BEGIN(name) -#else -#define TRAMP_KVM_BEGIN(name) -#endif - #define EXC_REAL_NONE(start, size) \ FIXED_SECTION_ENTRY_BEGIN_LOCATION(real_vectors, exc_real_##start##_##unused, start, size); \ FIXED_SECTION_ENTRY_END_LOCATION(real_vectors, exc_real_##start##_##unused, start, size) @@ -271,6 +263,9 @@ do_define_int n .endm .macro GEN_KVM name + .balign IFETCH_ALIGN_BYTES +\name\()_kvm: + .if IKVM_SKIP cmpwi r10,KVM_GUEST_MODE_SKIP beq 89f @@ -281,13 +276,18 @@ BEGIN_FTR_SECTION_NESTED(947) END_FTR_SECTION_NESTED(CPU_FTR_CFAR,CPU_FTR_CFAR,947) .endif + ld r10,PACA_EXGEN+EX_CTR(r13) + mtctr r10 BEGIN_FTR_SECTION_NESTED(948) ld r10,IAREA+EX_PPR(r13) std r10,HSTATE_PPR(r13) END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) - ld r10,IAREA+EX_R10(r13) + ld r11,IAREA+EX_R11(r13) + ld r12,IAREA+EX_R12(r13) std r12,HSTATE_SCRATCH0(r13) sldi r12,r9,32 + ld r9,IAREA+EX_R9(r13) + ld r10,IAREA+EX_R10(r13) /* HSRR variants have the 0x2 bit added to their trap number */ .if IHSRR == EXC_HV_OR_STD BEGIN_FTR_SECTION @@ -300,29 +300,16 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) .else ori r12,r12,(IVEC) .endif - -#ifdef CONFIG_RELOCATABLE - /* - * KVM requires __LOAD_FAR_HANDLER beause kvmppc_interrupt lives - * outside the head section. CONFIG_RELOCATABLE KVM expects CTR - * to be saved in HSTATE_SCRATCH1. - */ - ld r9,IAREA+EX_CTR(r13) - std r9,HSTATE_SCRATCH1(r13) - __LOAD_FAR_HANDLER(r9, kvmppc_interrupt) - mtctr r9 - ld r9,IAREA+EX_R9(r13) - bctr -#else - ld r9,IAREA+EX_R9(r13) b kvmppc_interrupt -#endif - .if IKVM_SKIP 89: mtocrf 0x80,r9 + ld r10,PACA_EXGEN+EX_CTR(r13) + mtctr r10 ld r9,IAREA+EX_R9(r13) ld r10,IAREA+EX_R10(r13) + ld r11,IAREA+EX_R11(r13) + ld r12,IAREA+EX_R12(r13) .if IHSRR == EXC_HV_OR_STD BEGIN_FTR_SECTION b kvmppc_skip_Hinterrupt @@ -407,11 +394,6 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) mfctr r10 std r10,IAREA+EX_CTR(r13) mfcr r9 - - .if (!\virt && IKVM_REAL) || (\virt && IKVM_VIRT) - KVMTEST \name IHSRR IVEC - .endif - std r11,IAREA+EX_R11(r13) std r12,IAREA+EX_R12(r13) @@ -469,12 +451,18 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) /* * __GEN_COMMON_ENTRY is required to receive the branch from interrupt - * entry, except in the case of the IEARLY handlers. + * entry, except in the case of the real-mode handlers which require + * __GEN_REALMODE_COMMON_ENTRY. + * * This switches to virtual mode and sets MSR[RI]. */ .macro __GEN_COMMON_ENTRY name DEFINE_FIXED_SYMBOL(\name\()_common_real) \name\()_common_real: + .if IKVM_REAL + KVMTEST \name IHSRR IVEC + .endif + ld r10,PACAKMSR(r13) /* get MSR value for kernel */ /* MSR[RI] is clear iff using SRR regs */ .if IHSRR == EXC_HV_OR_STD @@ -487,12 +475,32 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real) mtmsrd r10 .if IVIRT + .if IKVM_VIRT + b 1f /* skip the virt test coming from real */ + .endif + .balign IFETCH_ALIGN_BYTES DEFINE_FIXED_SYMBOL(\name\()_common_virt) \name\()_common_virt: + .if IKVM_VIRT + KVMTEST \name IHSRR IVEC +1: + .endif .endif /* IVIRT */ .endm +/* + * Don't switch to virt mode. Used for early MCE and HMI handlers that + * want to run in real mode. + */ +.macro __GEN_REALMODE_COMMON_ENTRY name +DEFINE_FIXED_SYMBOL(\name\()_common_real) +\name\()_common_real: + .if IKVM_REAL + KVMTEST \name IHSRR IVEC + .endif +.endm + .macro __GEN_COMMON_BODY name .if IMASK lbz r10,PACAIRQSOFTMASK(r13) @@ -848,8 +856,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) */ EXC_REAL_END(system_reset, 0x100, 0x100) EXC_VIRT_NONE(0x4100, 0x100) -TRAMP_KVM_BEGIN(system_reset_kvm) - GEN_KVM system_reset #ifdef CONFIG_PPC_P7_NAP TRAMP_REAL_BEGIN(system_reset_idle_wake) @@ -927,6 +933,8 @@ EXC_COMMON_BEGIN(system_reset_common) EXCEPTION_RESTORE_REGS EXC_STD RFI_TO_USER_OR_KERNEL + GEN_KVM system_reset + INT_DEFINE_BEGIN(machine_check_early) IVEC=0x200 @@ -968,9 +976,6 @@ TRAMP_REAL_BEGIN(machine_check_fwnmi) GEN_INT_ENTRY machine_check_early, virt=0 #endif -TRAMP_KVM_BEGIN(machine_check_kvm) - GEN_KVM machine_check - #define MACHINE_CHECK_HANDLER_WINDUP \ /* Clear MSR_RI before setting SRR0 and SRR1. */\ li r9,0; \ @@ -985,6 +990,8 @@ EXC_COMMON_BEGIN(machine_check_early_common) mfspr r11,SPRN_SRR0 mfspr r12,SPRN_SRR1 + __GEN_REALMODE_COMMON_ENTRY machine_check_early + /* * Switch to mc_emergency stack and handle re-entrancy (we limit * the nested MCE upto level 4 to avoid stack overflow). @@ -1126,6 +1133,9 @@ EXC_COMMON_BEGIN(machine_check_common) bl machine_check_exception b ret_from_except + GEN_KVM machine_check + + #ifdef CONFIG_PPC_P7_NAP /* * This is an idle wakeup. Low level machine check has already been @@ -1218,8 +1228,6 @@ EXC_REAL_END(data_access, 0x300, 0x80) EXC_VIRT_BEGIN(data_access, 0x4300, 0x80) GEN_INT_ENTRY data_access, virt=1 EXC_VIRT_END(data_access, 0x4300, 0x80) -TRAMP_KVM_BEGIN(data_access_kvm) - GEN_KVM data_access EXC_COMMON_BEGIN(data_access_common) GEN_COMMON data_access ld r4,_DAR(r1) @@ -1232,6 +1240,8 @@ MMU_FTR_SECTION_ELSE b handle_page_fault ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) + GEN_KVM data_access + INT_DEFINE_BEGIN(data_access_slb) IVEC=0x380 @@ -1248,8 +1258,6 @@ EXC_REAL_END(data_access_slb, 0x380, 0x80) EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80) GEN_INT_ENTRY data_access_slb, virt=1 EXC_VIRT_END(data_access_slb, 0x4380, 0x80) -TRAMP_KVM_BEGIN(data_access_slb_kvm) - GEN_KVM data_access_slb EXC_COMMON_BEGIN(data_access_slb_common) GEN_COMMON data_access_slb ld r4,_DAR(r1) @@ -1274,6 +1282,8 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) bl do_bad_slb_fault b ret_from_except + GEN_KVM data_access_slb + INT_DEFINE_BEGIN(instruction_access) IVEC=0x400 @@ -1289,8 +1299,6 @@ EXC_REAL_END(instruction_access, 0x400, 0x80) EXC_VIRT_BEGIN(instruction_access, 0x4400, 0x80) GEN_INT_ENTRY instruction_access, virt=1 EXC_VIRT_END(instruction_access, 0x4400, 0x80) -TRAMP_KVM_BEGIN(instruction_access_kvm) - GEN_KVM instruction_access EXC_COMMON_BEGIN(instruction_access_common) GEN_COMMON instruction_access ld r4,_DAR(r1) @@ -1303,6 +1311,8 @@ MMU_FTR_SECTION_ELSE b handle_page_fault ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) + GEN_KVM instruction_access + INT_DEFINE_BEGIN(instruction_access_slb) IVEC=0x480 @@ -1319,8 +1329,6 @@ EXC_REAL_END(instruction_access_slb, 0x480, 0x80) EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80) GEN_INT_ENTRY instruction_access_slb, virt=1 EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80) -TRAMP_KVM_BEGIN(instruction_access_slb_kvm) - GEN_KVM instruction_access_slb EXC_COMMON_BEGIN(instruction_access_slb_common) GEN_COMMON instruction_access_slb ld r4,_DAR(r1) @@ -1345,6 +1353,9 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) bl do_bad_slb_fault b ret_from_except + GEN_KVM instruction_access_slb + + INT_DEFINE_BEGIN(hardware_interrupt) IVEC=0x500 IHSRR=EXC_HV_OR_STD @@ -1359,8 +1370,6 @@ EXC_REAL_END(hardware_interrupt, 0x500, 0x100) EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100) GEN_INT_ENTRY hardware_interrupt, virt=1 EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100) -TRAMP_KVM_BEGIN(hardware_interrupt_kvm) - GEN_KVM hardware_interrupt EXC_COMMON_BEGIN(hardware_interrupt_common) GEN_COMMON hardware_interrupt FINISH_NAP @@ -1369,6 +1378,8 @@ EXC_COMMON_BEGIN(hardware_interrupt_common) bl do_IRQ b ret_from_except_lite + GEN_KVM hardware_interrupt + INT_DEFINE_BEGIN(alignment) IVEC=0x600 @@ -1383,8 +1394,6 @@ EXC_REAL_END(alignment, 0x600, 0x100) EXC_VIRT_BEGIN(alignment, 0x4600, 0x100) GEN_INT_ENTRY alignment, virt=1 EXC_VIRT_END(alignment, 0x4600, 0x100) -TRAMP_KVM_BEGIN(alignment_kvm) - GEN_KVM alignment EXC_COMMON_BEGIN(alignment_common) GEN_COMMON alignment bl save_nvgprs @@ -1392,6 +1401,8 @@ EXC_COMMON_BEGIN(alignment_common) bl alignment_exception b ret_from_except + GEN_KVM alignment + INT_DEFINE_BEGIN(program_check) IVEC=0x700 @@ -1404,8 +1415,6 @@ EXC_REAL_END(program_check, 0x700, 0x100) EXC_VIRT_BEGIN(program_check, 0x4700, 0x100) GEN_INT_ENTRY program_check, virt=1 EXC_VIRT_END(program_check, 0x4700, 0x100) -TRAMP_KVM_BEGIN(program_check_kvm) - GEN_KVM program_check EXC_COMMON_BEGIN(program_check_common) __GEN_COMMON_ENTRY program_check @@ -1445,6 +1454,8 @@ EXC_COMMON_BEGIN(program_check_common) bl program_check_exception b ret_from_except + GEN_KVM program_check + INT_DEFINE_BEGIN(fp_unavailable) IVEC=0x800 @@ -1458,8 +1469,6 @@ EXC_REAL_END(fp_unavailable, 0x800, 0x100) EXC_VIRT_BEGIN(fp_unavailable, 0x4800, 0x100) GEN_INT_ENTRY fp_unavailable, virt=1 EXC_VIRT_END(fp_unavailable, 0x4800, 0x100) -TRAMP_KVM_BEGIN(fp_unavailable_kvm) - GEN_KVM fp_unavailable EXC_COMMON_BEGIN(fp_unavailable_common) GEN_COMMON fp_unavailable bne 1f /* if from user, just load it up */ @@ -1490,6 +1499,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) b ret_from_except #endif + GEN_KVM fp_unavailable + INT_DEFINE_BEGIN(decrementer) IVEC=0x900 @@ -1503,8 +1514,6 @@ EXC_REAL_END(decrementer, 0x900, 0x80) EXC_VIRT_BEGIN(decrementer, 0x4900, 0x80) GEN_INT_ENTRY decrementer, virt=1 EXC_VIRT_END(decrementer, 0x4900, 0x80) -TRAMP_KVM_BEGIN(decrementer_kvm) - GEN_KVM decrementer EXC_COMMON_BEGIN(decrementer_common) GEN_COMMON decrementer FINISH_NAP @@ -1513,6 +1522,8 @@ EXC_COMMON_BEGIN(decrementer_common) bl timer_interrupt b ret_from_except_lite + GEN_KVM decrementer + INT_DEFINE_BEGIN(hdecrementer) IVEC=0x980 @@ -1527,8 +1538,6 @@ EXC_REAL_END(hdecrementer, 0x980, 0x80) EXC_VIRT_BEGIN(hdecrementer, 0x4980, 0x80) GEN_INT_ENTRY hdecrementer, virt=1 EXC_VIRT_END(hdecrementer, 0x4980, 0x80) -TRAMP_KVM_BEGIN(hdecrementer_kvm) - GEN_KVM hdecrementer EXC_COMMON_BEGIN(hdecrementer_common) GEN_COMMON hdecrementer bl save_nvgprs @@ -1536,6 +1545,8 @@ EXC_COMMON_BEGIN(hdecrementer_common) bl hdec_interrupt b ret_from_except + GEN_KVM hdecrementer + INT_DEFINE_BEGIN(doorbell_super) IVEC=0xa00 @@ -1549,8 +1560,6 @@ EXC_REAL_END(doorbell_super, 0xa00, 0x100) EXC_VIRT_BEGIN(doorbell_super, 0x4a00, 0x100) GEN_INT_ENTRY doorbell_super, virt=1 EXC_VIRT_END(doorbell_super, 0x4a00, 0x100) -TRAMP_KVM_BEGIN(doorbell_super_kvm) - GEN_KVM doorbell_super EXC_COMMON_BEGIN(doorbell_super_common) GEN_COMMON doorbell_super FINISH_NAP @@ -1563,6 +1572,8 @@ EXC_COMMON_BEGIN(doorbell_super_common) #endif b ret_from_except_lite + GEN_KVM doorbell_super + EXC_REAL_NONE(0xb00, 0x100) EXC_VIRT_NONE(0x4b00, 0x100) @@ -1667,6 +1678,7 @@ EXC_VIRT_BEGIN(system_call, 0x4c00, 0x100) EXC_VIRT_END(system_call, 0x4c00, 0x100) #ifdef CONFIG_KVM_BOOK3S_64_HANDLER +TRAMP_REAL_BEGIN(system_call_kvm) /* * This is a hcall, so register convention is as above, with these * differences: @@ -1674,20 +1686,35 @@ EXC_VIRT_END(system_call, 0x4c00, 0x100) * ctr = orig r13 * orig r10 saved in PACA */ -TRAMP_KVM_BEGIN(system_call_kvm) /* * Save the PPR (on systems that support it) before changing to * HMT_MEDIUM. That allows the KVM code to save that value into the * guest state (it is the guest's PPR value). */ - OPT_GET_SPR(r10, SPRN_PPR, CPU_FTR_HAS_PPR) +BEGIN_FTR_SECTION_NESTED(948) + mfspr r10,SPRN_PPR + std r10,HSTATE_PPR(r13) +END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) HMT_MEDIUM - OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r10, CPU_FTR_HAS_PPR) mfctr r10 SET_SCRATCH0(r10) - std r9,PACA_EXGEN+EX_R9(r13) - mfcr r9 - GEN_KVM system_call + mfcr r10 + std r12,HSTATE_SCRATCH0(r13) + sldi r12,r10,32 + ori r12,r12,0xc00 +#ifdef CONFIG_RELOCATABLE + /* + * Requires __LOAD_FAR_HANDLER beause kvmppc_interrupt lives + * outside the head section. + */ + __LOAD_FAR_HANDLER(r10, kvmppc_interrupt) + mtctr r10 + ld r10,PACA_EXGEN+EX_R10(r13) + bctr +#else + ld r10,PACA_EXGEN+EX_R10(r13) + b kvmppc_interrupt +#endif #endif @@ -1702,8 +1729,6 @@ EXC_REAL_END(single_step, 0xd00, 0x100) EXC_VIRT_BEGIN(single_step, 0x4d00, 0x100) GEN_INT_ENTRY single_step, virt=1 EXC_VIRT_END(single_step, 0x4d00, 0x100) -TRAMP_KVM_BEGIN(single_step_kvm) - GEN_KVM single_step EXC_COMMON_BEGIN(single_step_common) GEN_COMMON single_step bl save_nvgprs @@ -1711,6 +1736,8 @@ EXC_COMMON_BEGIN(single_step_common) bl single_step_exception b ret_from_except + GEN_KVM single_step + INT_DEFINE_BEGIN(h_data_storage) IVEC=0xe00 @@ -1728,8 +1755,6 @@ EXC_REAL_END(h_data_storage, 0xe00, 0x20) EXC_VIRT_BEGIN(h_data_storage, 0x4e00, 0x20) GEN_INT_ENTRY h_data_storage, virt=1, ool=1 EXC_VIRT_END(h_data_storage, 0x4e00, 0x20) -TRAMP_KVM_BEGIN(h_data_storage_kvm) - GEN_KVM h_data_storage EXC_COMMON_BEGIN(h_data_storage_common) GEN_COMMON h_data_storage bl save_nvgprs @@ -1743,6 +1768,8 @@ MMU_FTR_SECTION_ELSE ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX) b ret_from_except + GEN_KVM h_data_storage + INT_DEFINE_BEGIN(h_instr_storage) IVEC=0xe20 @@ -1757,8 +1784,6 @@ EXC_REAL_END(h_instr_storage, 0xe20, 0x20) EXC_VIRT_BEGIN(h_instr_storage, 0x4e20, 0x20) GEN_INT_ENTRY h_instr_storage, virt=1, ool=1 EXC_VIRT_END(h_instr_storage, 0x4e20, 0x20) -TRAMP_KVM_BEGIN(h_instr_storage_kvm) - GEN_KVM h_instr_storage EXC_COMMON_BEGIN(h_instr_storage_common) GEN_COMMON h_instr_storage bl save_nvgprs @@ -1766,6 +1791,8 @@ EXC_COMMON_BEGIN(h_instr_storage_common) bl unknown_exception b ret_from_except + GEN_KVM h_instr_storage + INT_DEFINE_BEGIN(emulation_assist) IVEC=0xe40 @@ -1780,8 +1807,6 @@ EXC_REAL_END(emulation_assist, 0xe40, 0x20) EXC_VIRT_BEGIN(emulation_assist, 0x4e40, 0x20) GEN_INT_ENTRY emulation_assist, virt=1, ool=1 EXC_VIRT_END(emulation_assist, 0x4e40, 0x20) -TRAMP_KVM_BEGIN(emulation_assist_kvm) - GEN_KVM emulation_assist EXC_COMMON_BEGIN(emulation_assist_common) GEN_COMMON emulation_assist bl save_nvgprs @@ -1789,6 +1814,8 @@ EXC_COMMON_BEGIN(emulation_assist_common) bl emulation_assist_interrupt b ret_from_except + GEN_KVM emulation_assist + /* * hmi_exception trampoline is a special case. It jumps to hmi_exception_early @@ -1816,14 +1843,13 @@ EXC_REAL_BEGIN(hmi_exception, 0xe60, 0x20) GEN_INT_ENTRY hmi_exception_early, virt=0, ool=1 EXC_REAL_END(hmi_exception, 0xe60, 0x20) EXC_VIRT_NONE(0x4e60, 0x20) -TRAMP_KVM_BEGIN(hmi_exception_early_kvm) - GEN_KVM hmi_exception_early -TRAMP_KVM_BEGIN(hmi_exception_kvm) - GEN_KVM hmi_exception EXC_COMMON_BEGIN(hmi_exception_early_common) mfspr r11,SPRN_HSRR0 /* Save HSRR0 */ mfspr r12,SPRN_HSRR1 /* Save HSRR1 */ + + __GEN_REALMODE_COMMON_ENTRY hmi_exception_early + mr r10,r1 /* Save r1 */ ld r1,PACAEMERGSP(r13) /* Use emergency stack for realmode */ subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */ @@ -1846,6 +1872,8 @@ EXC_COMMON_BEGIN(hmi_exception_early_common) EXCEPTION_RESTORE_REGS EXC_HV GEN_INT_ENTRY hmi_exception, virt=0 + GEN_KVM hmi_exception_early + EXC_COMMON_BEGIN(hmi_exception_common) GEN_COMMON hmi_exception FINISH_NAP @@ -1855,6 +1883,8 @@ EXC_COMMON_BEGIN(hmi_exception_common) bl handle_hmi_exception b ret_from_except + GEN_KVM hmi_exception + INT_DEFINE_BEGIN(h_doorbell) IVEC=0xe80 @@ -1870,8 +1900,6 @@ EXC_REAL_END(h_doorbell, 0xe80, 0x20) EXC_VIRT_BEGIN(h_doorbell, 0x4e80, 0x20) GEN_INT_ENTRY h_doorbell, virt=1, ool=1 EXC_VIRT_END(h_doorbell, 0x4e80, 0x20) -TRAMP_KVM_BEGIN(h_doorbell_kvm) - GEN_KVM h_doorbell EXC_COMMON_BEGIN(h_doorbell_common) GEN_COMMON h_doorbell FINISH_NAP @@ -1884,6 +1912,8 @@ EXC_COMMON_BEGIN(h_doorbell_common) #endif b ret_from_except_lite + GEN_KVM h_doorbell + INT_DEFINE_BEGIN(h_virt_irq) IVEC=0xea0 @@ -1899,8 +1929,6 @@ EXC_REAL_END(h_virt_irq, 0xea0, 0x20) EXC_VIRT_BEGIN(h_virt_irq, 0x4ea0, 0x20) GEN_INT_ENTRY h_virt_irq, virt=1, ool=1 EXC_VIRT_END(h_virt_irq, 0x4ea0, 0x20) -TRAMP_KVM_BEGIN(h_virt_irq_kvm) - GEN_KVM h_virt_irq EXC_COMMON_BEGIN(h_virt_irq_common) GEN_COMMON h_virt_irq FINISH_NAP @@ -1909,6 +1937,8 @@ EXC_COMMON_BEGIN(h_virt_irq_common) bl do_IRQ b ret_from_except_lite + GEN_KVM h_virt_irq + EXC_REAL_NONE(0xec0, 0x20) EXC_VIRT_NONE(0x4ec0, 0x20) @@ -1928,8 +1958,6 @@ EXC_REAL_END(performance_monitor, 0xf00, 0x20) EXC_VIRT_BEGIN(performance_monitor, 0x4f00, 0x20) GEN_INT_ENTRY performance_monitor, virt=1, ool=1 EXC_VIRT_END(performance_monitor, 0x4f00, 0x20) -TRAMP_KVM_BEGIN(performance_monitor_kvm) - GEN_KVM performance_monitor EXC_COMMON_BEGIN(performance_monitor_common) GEN_COMMON performance_monitor FINISH_NAP @@ -1938,6 +1966,8 @@ EXC_COMMON_BEGIN(performance_monitor_common) bl performance_monitor_exception b ret_from_except_lite + GEN_KVM performance_monitor + INT_DEFINE_BEGIN(altivec_unavailable) IVEC=0xf20 @@ -1951,8 +1981,6 @@ EXC_REAL_END(altivec_unavailable, 0xf20, 0x20) EXC_VIRT_BEGIN(altivec_unavailable, 0x4f20, 0x20) GEN_INT_ENTRY altivec_unavailable, virt=1, ool=1 EXC_VIRT_END(altivec_unavailable, 0x4f20, 0x20) -TRAMP_KVM_BEGIN(altivec_unavailable_kvm) - GEN_KVM altivec_unavailable EXC_COMMON_BEGIN(altivec_unavailable_common) GEN_COMMON altivec_unavailable #ifdef CONFIG_ALTIVEC @@ -1986,6 +2014,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) bl altivec_unavailable_exception b ret_from_except + GEN_KVM altivec_unavailable + INT_DEFINE_BEGIN(vsx_unavailable) IVEC=0xf40 @@ -1999,8 +2029,6 @@ EXC_REAL_END(vsx_unavailable, 0xf40, 0x20) EXC_VIRT_BEGIN(vsx_unavailable, 0x4f40, 0x20) GEN_INT_ENTRY vsx_unavailable, virt=1, ool=1 EXC_VIRT_END(vsx_unavailable, 0x4f40, 0x20) -TRAMP_KVM_BEGIN(vsx_unavailable_kvm) - GEN_KVM vsx_unavailable EXC_COMMON_BEGIN(vsx_unavailable_common) GEN_COMMON vsx_unavailable #ifdef CONFIG_VSX @@ -2033,6 +2061,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) bl vsx_unavailable_exception b ret_from_except + GEN_KVM vsx_unavailable + INT_DEFINE_BEGIN(facility_unavailable) IVEC=0xf60 @@ -2045,8 +2075,6 @@ EXC_REAL_END(facility_unavailable, 0xf60, 0x20) EXC_VIRT_BEGIN(facility_unavailable, 0x4f60, 0x20) GEN_INT_ENTRY facility_unavailable, virt=1, ool=1 EXC_VIRT_END(facility_unavailable, 0x4f60, 0x20) -TRAMP_KVM_BEGIN(facility_unavailable_kvm) - GEN_KVM facility_unavailable EXC_COMMON_BEGIN(facility_unavailable_common) GEN_COMMON facility_unavailable bl save_nvgprs @@ -2054,6 +2082,8 @@ EXC_COMMON_BEGIN(facility_unavailable_common) bl facility_unavailable_exception b ret_from_except + GEN_KVM facility_unavailable + INT_DEFINE_BEGIN(h_facility_unavailable) IVEC=0xf80 @@ -2068,8 +2098,6 @@ EXC_REAL_END(h_facility_unavailable, 0xf80, 0x20) EXC_VIRT_BEGIN(h_facility_unavailable, 0x4f80, 0x20) GEN_INT_ENTRY h_facility_unavailable, virt=1, ool=1 EXC_VIRT_END(h_facility_unavailable, 0x4f80, 0x20) -TRAMP_KVM_BEGIN(h_facility_unavailable_kvm) - GEN_KVM h_facility_unavailable EXC_COMMON_BEGIN(h_facility_unavailable_common) GEN_COMMON h_facility_unavailable bl save_nvgprs @@ -2077,6 +2105,8 @@ EXC_COMMON_BEGIN(h_facility_unavailable_common) bl facility_unavailable_exception b ret_from_except + GEN_KVM h_facility_unavailable + EXC_REAL_NONE(0xfa0, 0x20) EXC_VIRT_NONE(0x4fa0, 0x20) @@ -2102,14 +2132,15 @@ EXC_REAL_BEGIN(cbe_system_error, 0x1200, 0x100) GEN_INT_ENTRY cbe_system_error, virt=0 EXC_REAL_END(cbe_system_error, 0x1200, 0x100) EXC_VIRT_NONE(0x5200, 0x100) -TRAMP_KVM_BEGIN(cbe_system_error_kvm) - GEN_KVM cbe_system_error EXC_COMMON_BEGIN(cbe_system_error_common) GEN_COMMON cbe_system_error bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl cbe_system_error_exception b ret_from_except + + GEN_KVM cbe_system_error + #else /* CONFIG_CBE_RAS */ EXC_REAL_NONE(0x1200, 0x100) EXC_VIRT_NONE(0x5200, 0x100) @@ -2128,8 +2159,6 @@ EXC_REAL_END(instruction_breakpoint, 0x1300, 0x100) EXC_VIRT_BEGIN(instruction_breakpoint, 0x5300, 0x100) GEN_INT_ENTRY instruction_breakpoint, virt=1 EXC_VIRT_END(instruction_breakpoint, 0x5300, 0x100) -TRAMP_KVM_BEGIN(instruction_breakpoint_kvm) - GEN_KVM instruction_breakpoint EXC_COMMON_BEGIN(instruction_breakpoint_common) GEN_COMMON instruction_breakpoint bl save_nvgprs @@ -2137,6 +2166,8 @@ EXC_COMMON_BEGIN(instruction_breakpoint_common) bl instruction_breakpoint_exception b ret_from_except + GEN_KVM instruction_breakpoint + EXC_REAL_NONE(0x1400, 0x100) EXC_VIRT_NONE(0x5400, 0x100) @@ -2145,6 +2176,7 @@ INT_DEFINE_BEGIN(denorm_exception) IVEC=0x1500 IHSRR=EXC_HV IEARLY=2 + IKVM_REAL=1 INT_DEFINE_END(denorm_exception) EXC_REAL_BEGIN(denorm_exception, 0x1500, 0x100) @@ -2154,7 +2186,6 @@ EXC_REAL_BEGIN(denorm_exception, 0x1500, 0x100) andis. r10,r10,(HSRR1_DENORM)@h /* denorm? */ bne+ denorm_assist #endif - KVMTEST denorm_exception, EXC_HV, 0x1500 mfspr r11,SPRN_HSRR0 mfspr r12,SPRN_HSRR1 GEN_BRANCH_TO_COMMON denorm_exception, virt=0 @@ -2172,8 +2203,6 @@ EXC_VIRT_END(denorm_exception, 0x5500, 0x100) #else EXC_VIRT_NONE(0x5500, 0x100) #endif -TRAMP_KVM_BEGIN(denorm_exception_kvm) - GEN_KVM denorm_exception #ifdef CONFIG_PPC_DENORMALISATION TRAMP_REAL_BEGIN(denorm_assist) @@ -2251,6 +2280,8 @@ EXC_COMMON_BEGIN(denorm_exception_common) bl unknown_exception b ret_from_except + GEN_KVM denorm_exception + #ifdef CONFIG_CBE_RAS INT_DEFINE_BEGIN(cbe_maintenance) @@ -2264,14 +2295,15 @@ EXC_REAL_BEGIN(cbe_maintenance, 0x1600, 0x100) GEN_INT_ENTRY cbe_maintenance, virt=0 EXC_REAL_END(cbe_maintenance, 0x1600, 0x100) EXC_VIRT_NONE(0x5600, 0x100) -TRAMP_KVM_BEGIN(cbe_maintenance_kvm) - GEN_KVM cbe_maintenance EXC_COMMON_BEGIN(cbe_maintenance_common) GEN_COMMON cbe_maintenance bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl cbe_maintenance_exception b ret_from_except + + GEN_KVM cbe_maintenance + #else /* CONFIG_CBE_RAS */ EXC_REAL_NONE(0x1600, 0x100) EXC_VIRT_NONE(0x5600, 0x100) @@ -2289,8 +2321,6 @@ EXC_REAL_END(altivec_assist, 0x1700, 0x100) EXC_VIRT_BEGIN(altivec_assist, 0x5700, 0x100) GEN_INT_ENTRY altivec_assist, virt=1 EXC_VIRT_END(altivec_assist, 0x5700, 0x100) -TRAMP_KVM_BEGIN(altivec_assist_kvm) - GEN_KVM altivec_assist EXC_COMMON_BEGIN(altivec_assist_common) GEN_COMMON altivec_assist bl save_nvgprs @@ -2302,6 +2332,8 @@ EXC_COMMON_BEGIN(altivec_assist_common) #endif b ret_from_except + GEN_KVM altivec_assist + #ifdef CONFIG_CBE_RAS INT_DEFINE_BEGIN(cbe_thermal) @@ -2315,14 +2347,15 @@ EXC_REAL_BEGIN(cbe_thermal, 0x1800, 0x100) GEN_INT_ENTRY cbe_thermal, virt=0 EXC_REAL_END(cbe_thermal, 0x1800, 0x100) EXC_VIRT_NONE(0x5800, 0x100) -TRAMP_KVM_BEGIN(cbe_thermal_kvm) - GEN_KVM cbe_thermal EXC_COMMON_BEGIN(cbe_thermal_common) GEN_COMMON cbe_thermal bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl cbe_thermal_exception b ret_from_except + + GEN_KVM cbe_thermal + #else /* CONFIG_CBE_RAS */ EXC_REAL_NONE(0x1800, 0x100) EXC_VIRT_NONE(0x5800, 0x100) @@ -2514,17 +2547,12 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback) GET_SCRATCH0(r13); hrfid -/* - * Real mode exceptions actually use this too, but alternate - * instruction code patches (which end up in the common .text area) - * cannot reach these if they are put there. - */ USE_TEXT_SECTION() MASKED_INTERRUPT EXC_STD MASKED_INTERRUPT EXC_HV #ifdef CONFIG_KVM_BOOK3S_64_HANDLER -TRAMP_REAL_BEGIN(kvmppc_skip_interrupt) +kvmppc_skip_interrupt: /* * Here all GPRs are unchanged from when the interrupt happened * except for r13, which is saved in SPRG_SCRATCH0. @@ -2536,7 +2564,7 @@ TRAMP_REAL_BEGIN(kvmppc_skip_interrupt) RFI_TO_KERNEL b . -TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt) +kvmppc_skip_Hinterrupt: /* * Here all GPRs are unchanged from when the interrupt happened * except for r13, which is saved in SPRG_SCRATCH0. @@ -2549,16 +2577,6 @@ TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt) b . #endif -/* - * Ensure that any handlers that get invoked from the exception prologs - * above are below the first 64KB (0x10000) of the kernel image because - * the prologs assemble the addresses of these handlers using the - * LOAD_HANDLER macro, which uses an ori instruction. - */ - -/*** Common interrupt handlers ***/ - - /* * Relocation-on interrupts: A subset of the interrupts can be delivered * with IR=1/DR=1, if AIL==2 and MSR.HV won't be changed by delivering diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index dbc2fecc37f0..780a499c7114 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1266,7 +1266,6 @@ kvmppc_interrupt_hv: * R12 = (guest CR << 32) | interrupt vector * R13 = PACA * guest R12 saved in shadow VCPU SCRATCH0 - * guest CTR saved in shadow VCPU SCRATCH1 if RELOCATABLE * guest R13 saved in SPRN_SCRATCH0 */ std r9, HSTATE_SCRATCH2(r13) @@ -1367,12 +1366,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) 11: stw r3,VCPU_HEIR(r9) /* these are volatile across C function calls */ -#ifdef CONFIG_RELOCATABLE - ld r3, HSTATE_SCRATCH1(r13) - mtctr r3 -#else mfctr r3 -#endif mfxer r4 std r3, VCPU_CTR(r9) std r4, VCPU_XER(r9) @@ -3258,7 +3252,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_P9_TM_HV_ASSIST) * r12 is (CR << 32) | vector * r13 points to our PACA * r12 is saved in HSTATE_SCRATCH0(r13) - * ctr is saved in HSTATE_SCRATCH1(r13) if RELOCATABLE * r9 is saved in HSTATE_SCRATCH2(r13) * r13 is saved in HSPRG1 * cfar is saved in HSTATE_CFAR(r13) @@ -3307,11 +3300,7 @@ kvmppc_bad_host_intr: ld r5, HSTATE_CFAR(r13) std r5, ORIG_GPR3(r1) mflr r3 -#ifdef CONFIG_RELOCATABLE - ld r4, HSTATE_SCRATCH1(r13) -#else mfctr r4 -#endif mfxer r5 lbz r6, PACAIRQSOFTMASK(r13) std r3, _LINK(r1) diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S index 0169bab544dd..1f492aa4c8d6 100644 --- a/arch/powerpc/kvm/book3s_segment.S +++ b/arch/powerpc/kvm/book3s_segment.S @@ -167,16 +167,9 @@ kvmppc_interrupt_pr: * R12 = (guest CR << 32) | exit handler id * R13 = PACA * HSTATE.SCRATCH0 = guest R12 - * HSTATE.SCRATCH1 = guest CTR if RELOCATABLE */ #ifdef CONFIG_PPC64 /* Match 32-bit entry */ -#ifdef CONFIG_RELOCATABLE - std r9, HSTATE_SCRATCH2(r13) - ld r9, HSTATE_SCRATCH1(r13) - mtctr r9 - ld r9, HSTATE_SCRATCH2(r13) -#endif rotldi r12, r12, 32 /* Flip R12 halves for stw */ stw r12, HSTATE_SCRATCH1(r13) /* CR is now in the low half */ srdi r12, r12, 32 /* shift trap into low half */ -- cgit v1.2.3 From 20c384f1ea1a0bc7320bc445c72dd02d2970d594 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 26 Mar 2020 22:01:17 +0800 Subject: vhost: refine vhost and vringh kconfig Currently, CONFIG_VHOST depends on CONFIG_VIRTUALIZATION. But vhost is not necessarily for VM since it's a generic userspace and kernel communication protocol. Such dependency may prevent archs without virtualization support from using vhost. To solve this, a dedicated vhost menu is created under drivers so CONIFG_VHOST can be decoupled out of CONFIG_VIRTUALIZATION. While at it, also squash Kconfig.vringh into vhost Kconfig file. This avoids the trick of conditional inclusion from VOP or CAIF. Then it will be easier to introduce new vringh users and common dependency for both vringh and vhost. Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20200326140125.19794-2-jasowang@redhat.com Signed-off-by: Michael S. Tsirkin --- arch/arm/kvm/Kconfig | 2 -- arch/arm64/kvm/Kconfig | 2 -- arch/mips/kvm/Kconfig | 2 -- arch/powerpc/kvm/Kconfig | 2 -- arch/s390/kvm/Kconfig | 4 ---- arch/x86/kvm/Kconfig | 4 ---- drivers/Kconfig | 2 ++ drivers/misc/mic/Kconfig | 4 ---- drivers/net/caif/Kconfig | 4 ---- drivers/vhost/Kconfig | 28 +++++++++++++++++++++------- drivers/vhost/Kconfig.vringh | 6 ------ 11 files changed, 23 insertions(+), 37 deletions(-) delete mode 100644 drivers/vhost/Kconfig.vringh (limited to 'arch/powerpc/kvm') diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index f591026347a5..be97393761bf 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -54,6 +54,4 @@ config KVM_ARM_HOST ---help--- Provides host support for ARM processors. -source "drivers/vhost/Kconfig" - endif # VIRTUALIZATION diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index a475c68cbfec..449386d76441 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -64,6 +64,4 @@ config KVM_ARM_PMU config KVM_INDIRECT_VECTORS def_bool KVM && (HARDEN_BRANCH_PREDICTOR || HARDEN_EL2_VECTORS) -source "drivers/vhost/Kconfig" - endif # VIRTUALIZATION diff --git a/arch/mips/kvm/Kconfig b/arch/mips/kvm/Kconfig index eac25aef21e0..b91d145aa2d5 100644 --- a/arch/mips/kvm/Kconfig +++ b/arch/mips/kvm/Kconfig @@ -72,6 +72,4 @@ config KVM_MIPS_DEBUG_COP0_COUNTERS If unsure, say N. -source "drivers/vhost/Kconfig" - endif # VIRTUALIZATION diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 711fca9bc6f0..12885eda324e 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -204,6 +204,4 @@ config KVM_XIVE default y depends on KVM_XICS && PPC_XIVE_NATIVE && KVM_BOOK3S_HV_POSSIBLE -source "drivers/vhost/Kconfig" - endif # VIRTUALIZATION diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index d3db3d7ed077..def3b60f1fe8 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig @@ -55,8 +55,4 @@ config KVM_S390_UCONTROL If unsure, say N. -# OK, it's a little counter-intuitive to do this, but it puts it neatly under -# the virtualization menu. -source "drivers/vhost/Kconfig" - endif # VIRTUALIZATION diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 9fea0757db92..d8154e0684b6 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -107,8 +107,4 @@ config KVM_MMU_AUDIT This option adds a R/W kVM module parameter 'mmu_audit', which allows auditing of KVM MMU events at runtime. -# OK, it's a little counter-intuitive to do this, but it puts it neatly under -# the virtualization menu. -source "drivers/vhost/Kconfig" - endif # VIRTUALIZATION diff --git a/drivers/Kconfig b/drivers/Kconfig index 8befa53f43be..7a6d8b2b68b4 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -138,6 +138,8 @@ source "drivers/virt/Kconfig" source "drivers/virtio/Kconfig" +source "drivers/vhost/Kconfig" + source "drivers/hv/Kconfig" source "drivers/xen/Kconfig" diff --git a/drivers/misc/mic/Kconfig b/drivers/misc/mic/Kconfig index b6841ba6d922..8f201d019f5a 100644 --- a/drivers/misc/mic/Kconfig +++ b/drivers/misc/mic/Kconfig @@ -133,8 +133,4 @@ config VOP OS and tools for MIC to use with this driver are available from . -if VOP -source "drivers/vhost/Kconfig.vringh" -endif - endmenu diff --git a/drivers/net/caif/Kconfig b/drivers/net/caif/Kconfig index e74e2bb61236..9db0570c5beb 100644 --- a/drivers/net/caif/Kconfig +++ b/drivers/net/caif/Kconfig @@ -58,8 +58,4 @@ config CAIF_VIRTIO ---help--- The CAIF driver for CAIF over Virtio. -if CAIF_VIRTIO -source "drivers/vhost/Kconfig.vringh" -endif - endif # CAIF_DRIVERS diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig index 3d03ccbd1adc..e775beddc36a 100644 --- a/drivers/vhost/Kconfig +++ b/drivers/vhost/Kconfig @@ -1,4 +1,23 @@ # SPDX-License-Identifier: GPL-2.0-only +config VHOST_RING + tristate + help + This option is selected by any driver which needs to access + the host side of a virtio ring. + +config VHOST + tristate + select VHOST_IOTLB + help + This option is selected by any driver which needs to access + the core of vhost. + +menuconfig VHOST_MENU + bool "VHOST drivers" + default y + +if VHOST_MENU + config VHOST_NET tristate "Host kernel accelerator for virtio net" depends on NET && EVENTFD && (TUN || !TUN) && (TAP || !TAP) @@ -23,8 +42,8 @@ config VHOST_SCSI config VHOST_VSOCK tristate "vhost virtio-vsock driver" depends on VSOCKETS && EVENTFD - select VIRTIO_VSOCKETS_COMMON select VHOST + select VIRTIO_VSOCKETS_COMMON default n ---help--- This kernel module can be loaded in the host kernel to provide AF_VSOCK @@ -34,12 +53,6 @@ config VHOST_VSOCK To compile this driver as a module, choose M here: the module will be called vhost_vsock. -config VHOST - tristate - ---help--- - This option is selected by any driver which needs to access - the core of vhost. - config VHOST_CROSS_ENDIAN_LEGACY bool "Cross-endian support for vhost" default n @@ -54,3 +67,4 @@ config VHOST_CROSS_ENDIAN_LEGACY adds some overhead, it is disabled by default. If unsure, say "N". +endif diff --git a/drivers/vhost/Kconfig.vringh b/drivers/vhost/Kconfig.vringh deleted file mode 100644 index c1fe36a9b8d4..000000000000 --- a/drivers/vhost/Kconfig.vringh +++ /dev/null @@ -1,6 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -config VHOST_RING - tristate - ---help--- - This option is selected by any driver which needs to access - the host side of a virtio ring. -- cgit v1.2.3 From 03911132aafd6727e59408e497c049402a5a11fa Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Mon, 6 Apr 2020 20:03:51 -0700 Subject: mm/vma: replace all remaining open encodings with is_vm_hugetlb_page() This replaces all remaining open encodings with is_vm_hugetlb_page(). Signed-off-by: Anshuman Khandual Signed-off-by: Andrew Morton Acked-by: Vlastimil Babka Cc: Paul Mackerras Cc: Benjamin Herrenschmidt Cc: Michael Ellerman Cc: Alexander Viro Cc: Will Deacon Cc: "Aneesh Kumar K.V" Cc: Nick Piggin Cc: Peter Zijlstra Cc: Arnd Bergmann Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Andy Lutomirski Cc: Dave Hansen Cc: Geert Uytterhoeven Cc: Guo Ren Cc: Mel Gorman Cc: Paul Burton Cc: Paul Mackerras Cc: Ralf Baechle Cc: Rich Felker Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Yoshinori Sato Link: http://lkml.kernel.org/r/1582520593-30704-4-git-send-email-anshuman.khandual@arm.com Signed-off-by: Linus Torvalds --- arch/powerpc/kvm/e500_mmu_host.c | 2 +- fs/binfmt_elf.c | 3 ++- include/asm-generic/tlb.h | 3 ++- kernel/events/core.c | 3 ++- 4 files changed, 7 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 425d13806645..df9989cf7ba3 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -422,7 +422,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, break; } } else if (vma && hva >= vma->vm_start && - (vma->vm_flags & VM_HUGETLB)) { + is_vm_hugetlb_page(vma)) { unsigned long psize = vma_kernel_pagesize(vma); tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >> diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index f4713ea76e82..1eb63867e266 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -1317,7 +1318,7 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma, } /* Hugetlb memory check */ - if (vma->vm_flags & VM_HUGETLB) { + if (is_vm_hugetlb_page(vma)) { if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED)) goto whole; if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE)) diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index f391f6b500b4..3f1649a8cf55 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -13,6 +13,7 @@ #include #include +#include #include #include #include @@ -398,7 +399,7 @@ tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) * We rely on tlb_end_vma() to issue a flush, such that when we reset * these values the batch is empty. */ - tlb->vma_huge = !!(vma->vm_flags & VM_HUGETLB); + tlb->vma_huge = is_vm_hugetlb_page(vma); tlb->vma_exec = !!(vma->vm_flags & VM_EXEC); } diff --git a/kernel/events/core.c b/kernel/events/core.c index 81e6d80cb219..55e44417f66d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -7973,7 +7974,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) flags |= MAP_EXECUTABLE; if (vma->vm_flags & VM_LOCKED) flags |= MAP_LOCKED; - if (vma->vm_flags & VM_HUGETLB) + if (is_vm_hugetlb_page(vma)) flags |= MAP_HUGETLB; if (file) { -- cgit v1.2.3