diff options
41 files changed, 981 insertions, 540 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 9fa2bf8c3f6f..695544420ff2 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -959,7 +959,8 @@ documentation when it pops into existence). 4.37 KVM_ENABLE_CAP Capability: KVM_CAP_ENABLE_CAP, KVM_CAP_ENABLE_CAP_VM -Architectures: ppc, s390 +Architectures: x86 (only KVM_CAP_ENABLE_CAP_VM), + mips (only KVM_CAP_ENABLE_CAP), ppc, s390 Type: vcpu ioctl, vm ioctl (with KVM_CAP_ENABLE_CAP_VM) Parameters: struct kvm_enable_cap (in) Returns: 0 on success; -1 on error diff --git a/Documentation/virtual/kvm/mmu.txt b/Documentation/virtual/kvm/mmu.txt index 53838d9c6295..c59bd9bc41ef 100644 --- a/Documentation/virtual/kvm/mmu.txt +++ b/Documentation/virtual/kvm/mmu.txt @@ -169,6 +169,10 @@ Shadow pages contain the following information: Contains the value of cr4.smep && !cr0.wp for which the page is valid (pages for which this is true are different from other pages; see the treatment of cr0.wp=0 below). + role.smap_andnot_wp: + Contains the value of cr4.smap && !cr0.wp for which the page is valid + (pages for which this is true are different from other pages; see the + treatment of cr0.wp=0 below). gfn: Either the guest page table containing the translations shadowed by this page, or the base page frame for linear translations. See role.direct. @@ -344,10 +348,16 @@ on fault type: (user write faults generate a #PF) -In the first case there is an additional complication if CR4.SMEP is -enabled: since we've turned the page into a kernel page, the kernel may now -execute it. We handle this by also setting spte.nx. If we get a user -fetch or read fault, we'll change spte.u=1 and spte.nx=gpte.nx back. +In the first case there are two additional complications: +- if CR4.SMEP is enabled: since we've turned the page into a kernel page, + the kernel may now execute it. We handle this by also setting spte.nx. + If we get a user fetch or read fault, we'll change spte.u=1 and + spte.nx=gpte.nx back. +- if CR4.SMAP is disabled: since the page has been changed to a kernel + page, it can not be reused when CR4.SMAP is enabled. We set + CR4.SMAP && !CR0.WP into shadow page's role to avoid this case. Note, + here we do not care the case that CR4.SMAP is enabled since KVM will + directly inject #PF to guest due to failed permission check. To prevent an spte that was converted into a kernel page with cr0.wp=0 from being written by the kernel after cr0.wp has changed to 1, we make diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index d9631ecddd56..e41cb11f71b2 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -553,13 +553,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) * Enter the guest */ trace_kvm_entry(*vcpu_pc(vcpu)); - kvm_guest_enter(); + __kvm_guest_enter(); vcpu->mode = IN_GUEST_MODE; ret = kvm_call_hyp(__kvm_vcpu_run, vcpu); vcpu->mode = OUTSIDE_GUEST_MODE; - kvm_guest_exit(); + __kvm_guest_exit(); trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); /* * We may have taken a host interrupt in HYP mode (ie diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 1d5accbd3dcf..7f473e6d3bf5 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -1155,7 +1155,8 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) */ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot) { - struct kvm_memory_slot *memslot = id_to_memslot(kvm->memslots, slot); + struct kvm_memslots *slots = kvm_memslots(kvm); + struct kvm_memory_slot *memslot = id_to_memslot(slots, slot); phys_addr_t start = memslot->base_gfn << PAGE_SHIFT; phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT; @@ -1718,8 +1719,9 @@ out: } void kvm_arch_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, + const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old, + const struct kvm_memory_slot *new, enum kvm_mr_change change) { /* @@ -1733,7 +1735,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_userspace_memory_region *mem, + const struct kvm_userspace_memory_region *mem, enum kvm_mr_change change) { hva_t hva = mem->userspace_addr; @@ -1838,7 +1840,7 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, return 0; } -void kvm_arch_memslots_updated(struct kvm *kvm) +void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) { } diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 4c25823563fe..e8c8d9d0c45f 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -839,7 +839,7 @@ static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} -static inline void kvm_arch_memslots_updated(struct kvm *kvm) {} +static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) {} static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) {} diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index bb68e8d520e8..cd4c129ce743 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -198,15 +198,16 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_userspace_memory_region *mem, + const struct kvm_userspace_memory_region *mem, enum kvm_mr_change change) { return 0; } void kvm_arch_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, + const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old, + const struct kvm_memory_slot *new, enum kvm_mr_change change) { unsigned long npages = 0; @@ -393,7 +394,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) kvm_mips_deliver_interrupts(vcpu, kvm_read_c0_guest_cause(vcpu->arch.cop0)); - kvm_guest_enter(); + __kvm_guest_enter(); /* Disable hardware page table walking while in guest */ htw_stop(); @@ -403,7 +404,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) /* Re-enable HTW before enabling interrupts */ htw_start(); - kvm_guest_exit(); + __kvm_guest_exit(); local_irq_enable(); if (vcpu->sigset_active) @@ -968,6 +969,7 @@ out: /* Get (and clear) the dirty memory log for a memory slot. */ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) { + struct kvm_memslots *slots; struct kvm_memory_slot *memslot; unsigned long ga, ga_end; int is_dirty = 0; @@ -982,7 +984,8 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) /* If nothing is dirty, don't bother messing with page tables. */ if (is_dirty) { - memslot = &kvm->memslots->memslots[log->slot]; + slots = kvm_memslots(kvm); + memslot = id_to_memslot(slots, log->slot); ga = memslot->base_gfn << PAGE_SHIFT; ga_end = ga + (memslot->npages << PAGE_SHIFT); diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index a193a13cf08b..d91f65b28e32 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -698,7 +698,7 @@ struct kvm_vcpu_arch { static inline void kvm_arch_hardware_disable(void) {} static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} -static inline void kvm_arch_memslots_updated(struct kvm *kvm) {} +static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) {} static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} static inline void kvm_arch_exit(void) {} diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index b8475daad884..c6ef05bd0765 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -182,10 +182,11 @@ extern int kvmppc_core_create_memslot(struct kvm *kvm, unsigned long npages); extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_userspace_memory_region *mem); + const struct kvm_userspace_memory_region *mem); extern void kvmppc_core_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - const struct kvm_memory_slot *old); + const struct kvm_userspace_memory_region *mem, + const struct kvm_memory_slot *old, + const struct kvm_memory_slot *new); extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info); extern void kvmppc_core_flush_memslot(struct kvm *kvm, @@ -243,10 +244,11 @@ struct kvmppc_ops { void (*flush_memslot)(struct kvm *kvm, struct kvm_memory_slot *memslot); int (*prepare_memory_region)(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_userspace_memory_region *mem); + const struct kvm_userspace_memory_region *mem); void (*commit_memory_region)(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - const struct kvm_memory_slot *old); + const struct kvm_userspace_memory_region *mem, + const struct kvm_memory_slot *old, + const struct kvm_memory_slot *new); int (*unmap_hva)(struct kvm *kvm, unsigned long hva); int (*unmap_hva_range)(struct kvm *kvm, unsigned long start, unsigned long end); diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 453a8a47a467..05ea8fc7f829 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -757,16 +757,17 @@ void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot) int kvmppc_core_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_userspace_memory_region *mem) + const struct kvm_userspace_memory_region *mem) { return kvm->arch.kvm_ops->prepare_memory_region(kvm, memslot, mem); } void kvmppc_core_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - const struct kvm_memory_slot *old) + const struct kvm_userspace_memory_region *mem, + const struct kvm_memory_slot *old, + const struct kvm_memory_slot *new) { - kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old); + kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old, new); } int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 1a4acf8bf4f4..dab68b7af3f2 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -650,7 +650,7 @@ static void kvmppc_rmap_reset(struct kvm *kvm) int srcu_idx; srcu_idx = srcu_read_lock(&kvm->srcu); - slots = kvm->memslots; + slots = kvm_memslots(kvm); kvm_for_each_memslot(memslot, slots) { /* * This assumes it is acceptable to lose reference and diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 48d3c5d2ecc9..68d067ad4222 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1952,7 +1952,7 @@ static void post_guest_process(struct kvmppc_vcore *vc) */ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) { - struct kvm_vcpu *vcpu; + struct kvm_vcpu *vcpu, *vnext; int i; int srcu_idx; @@ -1982,7 +1982,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) */ if ((threads_per_core > 1) && ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) { - list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { + list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads, + arch.run_list) { vcpu->arch.ret = -EBUSY; kvmppc_remove_runnable(vc, vcpu); wake_up(&vcpu->arch.cpu_run); @@ -2320,6 +2321,7 @@ static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm, static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm, struct kvm_dirty_log *log) { + struct kvm_memslots *slots; struct kvm_memory_slot *memslot; int r; unsigned long n; @@ -2330,7 +2332,8 @@ static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm, if (log->slot >= KVM_USER_MEM_SLOTS) goto out; - memslot = id_to_memslot(kvm->memslots, log->slot); + slots = kvm_memslots(kvm); + memslot = id_to_memslot(slots, log->slot); r = -ENOENT; if (!memslot->dirty_bitmap) goto out; @@ -2373,16 +2376,18 @@ static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot, static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_userspace_memory_region *mem) + const struct kvm_userspace_memory_region *mem) { return 0; } static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - const struct kvm_memory_slot *old) + const struct kvm_userspace_memory_region *mem, + const struct kvm_memory_slot *old, + const struct kvm_memory_slot *new) { unsigned long npages = mem->memory_size >> PAGE_SHIFT; + struct kvm_memslots *slots; struct kvm_memory_slot *memslot; if (npages && old->npages) { @@ -2392,7 +2397,8 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm, * since the rmap array starts out as all zeroes, * i.e. no pages are dirty. */ - memslot = id_to_memslot(kvm->memslots, mem->slot); + slots = kvm_memslots(kvm); + memslot = id_to_memslot(slots, mem->slot); kvmppc_hv_get_dirty_log(kvm, memslot, NULL); } } diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index f57383941d03..64891b081ad5 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1530,6 +1530,7 @@ out: static int kvm_vm_ioctl_get_dirty_log_pr(struct kvm *kvm, struct kvm_dirty_log *log) { + struct kvm_memslots *slots; struct kvm_memory_slot *memslot; struct kvm_vcpu *vcpu; ulong ga, ga_end; @@ -1545,7 +1546,8 @@ static int kvm_vm_ioctl_get_dirty_log_pr(struct kvm *kvm, /* If nothing is dirty, don't bother messing with page tables. */ if (is_dirty) { - memslot = id_to_memslot(kvm->memslots, log->slot); + slots = kvm_memslots(kvm); + memslot = id_to_memslot(slots, log->slot); ga = memslot->base_gfn << PAGE_SHIFT; ga_end = ga + (memslot->npages << PAGE_SHIFT); @@ -1571,14 +1573,15 @@ static void kvmppc_core_flush_memslot_pr(struct kvm *kvm, static int kvmppc_core_prepare_memory_region_pr(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_userspace_memory_region *mem) + const struct kvm_userspace_memory_region *mem) { return 0; } static void kvmppc_core_commit_memory_region_pr(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - const struct kvm_memory_slot *old) + const struct kvm_userspace_memory_region *mem, + const struct kvm_memory_slot *old, + const struct kvm_memory_slot *new) { return; } diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 6c1316a15a27..cc5842657161 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1004,10 +1004,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, break; } - local_irq_enable(); - trace_kvm_exit(exit_nr, vcpu); - kvm_guest_exit(); + __kvm_guest_exit(); + + local_irq_enable(); run->exit_reason = KVM_EXIT_UNKNOWN; run->ready_for_interrupt_injection = 1; @@ -1784,14 +1784,15 @@ int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, int kvmppc_core_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_userspace_memory_region *mem) + const struct kvm_userspace_memory_region *mem) { return 0; } void kvmppc_core_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - const struct kvm_memory_slot *old) + const struct kvm_userspace_memory_region *mem, + const struct kvm_memory_slot *old, + const struct kvm_memory_slot *new) { } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index ac3ddf115f3d..e5dde32fe71f 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -115,7 +115,7 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) continue; } - kvm_guest_enter(); + __kvm_guest_enter(); return 1; } @@ -595,18 +595,19 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_userspace_memory_region *mem, + const struct kvm_userspace_memory_region *mem, enum kvm_mr_change change) { return kvmppc_core_prepare_memory_region(kvm, memslot, mem); } void kvm_arch_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, + const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old, + const struct kvm_memory_slot *new, enum kvm_mr_change change) { - kvmppc_core_commit_memory_region(kvm, mem, old); + kvmppc_core_commit_memory_region(kvm, mem, old, new); } void kvm_arch_flush_shadow_memslot(struct kvm *kvm, diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 444c412307cb..3024acbe1f9d 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -636,7 +636,7 @@ static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} static inline void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} -static inline void kvm_arch_memslots_updated(struct kvm *kvm) {} +static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) {} static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) {} diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 9cb6cfaac986..71530a43a728 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -240,6 +240,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, { int r; unsigned long n; + struct kvm_memslots *slots; struct kvm_memory_slot *memslot; int is_dirty = 0; @@ -249,7 +250,8 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, if (log->slot >= KVM_USER_MEM_SLOTS) goto out; - memslot = id_to_memslot(kvm->memslots, log->slot); + slots = kvm_memslots(kvm); + memslot = id_to_memslot(slots, log->slot); r = -ENOENT; if (!memslot->dirty_bitmap) goto out; @@ -2015,12 +2017,14 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) * As PF_VCPU will be used in fault handler, between * guest_enter and guest_exit should be no uaccess. */ - preempt_disable(); - kvm_guest_enter(); - preempt_enable(); + local_irq_disable(); + __kvm_guest_enter(); + local_irq_enable(); exit_reason = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs); - kvm_guest_exit(); + local_irq_disable(); + __kvm_guest_exit(); + local_irq_enable(); vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); rc = vcpu_post_run(vcpu, exit_reason); @@ -2583,7 +2587,7 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, /* Section: memory related */ int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_userspace_memory_region *mem, + const struct kvm_userspace_memory_region *mem, enum kvm_mr_change change) { /* A few sanity checks. We can have memory slots which have to be @@ -2601,8 +2605,9 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, } void kvm_arch_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, + const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old, + const struct kvm_memory_slot *new, enum kvm_mr_change change) { int rc; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index dea2e7e962e3..7276107b35df 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -207,6 +207,7 @@ union kvm_mmu_page_role { unsigned nxe:1; unsigned cr0_wp:1; unsigned smep_andnot_wp:1; + unsigned smap_andnot_wp:1; }; }; @@ -400,6 +401,7 @@ struct kvm_vcpu_arch { struct kvm_mmu_memory_cache mmu_page_header_cache; struct fpu guest_fpu; + bool eager_fpu; u64 xcr0; u64 guest_supported_xcr0; u32 guest_xstate_size; @@ -635,6 +637,8 @@ struct kvm_arch { #endif bool boot_vcpu_runs_old_kvmclock; + + u64 disabled_quirks; }; struct kvm_vm_stat { @@ -687,12 +691,13 @@ struct msr_data { struct kvm_lapic_irq { u32 vector; - u32 delivery_mode; - u32 dest_mode; - u32 level; - u32 trig_mode; + u16 delivery_mode; + u16 dest_mode; + bool level; + u16 trig_mode; u32 shorthand; u32 dest_id; + bool msi_redir_hint; }; struct kvm_x86_ops { @@ -709,7 +714,7 @@ struct kvm_x86_ops { /* Create, but do not attach this VCPU */ struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id); void (*vcpu_free)(struct kvm_vcpu *vcpu); - void (*vcpu_reset)(struct kvm_vcpu *vcpu); + void (*vcpu_reset)(struct kvm_vcpu *vcpu, bool init_event); void (*prepare_guest_switch)(struct kvm_vcpu *vcpu); void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); @@ -743,6 +748,7 @@ struct kvm_x86_ops { void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); + void (*fpu_activate)(struct kvm_vcpu *vcpu); void (*fpu_deactivate)(struct kvm_vcpu *vcpu); void (*tlb_flush)(struct kvm_vcpu *vcpu); @@ -868,7 +874,7 @@ void kvm_mmu_reset_context(struct kvm_vcpu *vcpu); void kvm_mmu_slot_remove_write_access(struct kvm *kvm, struct kvm_memory_slot *memslot); void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm, - struct kvm_memory_slot *memslot); + const struct kvm_memory_slot *memslot); void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, struct kvm_memory_slot *memslot); void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm, @@ -999,7 +1005,7 @@ void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id); void kvm_inject_nmi(struct kvm_vcpu *vcpu); -int fx_init(struct kvm_vcpu *vcpu); +int fx_init(struct kvm_vcpu *vcpu, bool init_event); void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, int bytes); @@ -1143,7 +1149,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v); int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); int kvm_cpu_get_interrupt(struct kvm_vcpu *v); -void kvm_vcpu_reset(struct kvm_vcpu *vcpu); +void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event); void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu); void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, unsigned long address); diff --git a/arch/x86/include/asm/pvclock-abi.h b/arch/x86/include/asm/pvclock-abi.h index 6167fd798188..655e07a48f6c 100644 --- a/arch/x86/include/asm/pvclock-abi.h +++ b/arch/x86/include/asm/pvclock-abi.h @@ -41,5 +41,6 @@ struct pvclock_wall_clock { #define PVCLOCK_TSC_STABLE_BIT (1 << 0) #define PVCLOCK_GUEST_STOPPED (1 << 1) +#define PVCLOCK_COUNTS_FROM_ZERO (1 << 2) #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_PVCLOCK_ABI_H */ diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index d6b078e9fa28..628954ceede1 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -86,7 +86,6 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src, offset = pvclock_get_nsec_offset(src); ret = src->system_time + offset; ret_flags = src->flags; - rdtsc_barrier(); *cycles = ret; *flags = ret_flags; diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index d7dcef58aefa..2fec75e4b1e1 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -345,4 +345,7 @@ struct kvm_xcrs { struct kvm_sync_regs { }; +#define KVM_QUIRK_LINT0_REENABLED (1 << 0) +#define KVM_QUIRK_CD_NW_CLEARED (1 << 1) + #endif /* _ASM_X86_KVM_H */ diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 9435620062df..cc34cece853e 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -331,7 +331,7 @@ static void kvm_guest_apic_eoi_write(u32 reg, u32 val) apic_write(APIC_EOI, APIC_EOI_ACK); } -void kvm_guest_cpu_init(void) +static void kvm_guest_cpu_init(void) { if (!kvm_para_available()) return; @@ -655,7 +655,7 @@ static inline void spin_time_accum_blocked(u64 start) static struct dentry *d_spin_debug; static struct dentry *d_kvm_debug; -struct dentry *kvm_init_debugfs(void) +static struct dentry *kvm_init_debugfs(void) { d_kvm_debug = debugfs_create_dir("kvm-guest", NULL); if (!d_kvm_debug) diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 42caaef897c8..49487b488061 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -24,6 +24,7 @@ #include <linux/percpu.h> #include <linux/hardirq.h> #include <linux/memblock.h> +#include <linux/sched.h> #include <asm/x86_init.h> #include <asm/reboot.h> @@ -217,8 +218,10 @@ static void kvm_shutdown(void) void __init kvmclock_init(void) { + struct pvclock_vcpu_time_info *vcpu_time; unsigned long mem; - int size; + int size, cpu; + u8 flags; size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS); @@ -264,7 +267,14 @@ void __init kvmclock_init(void) pv_info.name = "KVM"; if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) - pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT); + pvclock_set_flags(~0); + + cpu = get_cpu(); + vcpu_time = &hv_clock[cpu].pvti; + flags = pvclock_read_flags(vcpu_time); + if (flags & PVCLOCK_COUNTS_FROM_ZERO) + set_sched_clock_stable(); + put_cpu(); } int __init kvm_setup_vsyscall_timeinfo(void) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 59b69f6a2844..9dadf8d67873 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -16,6 +16,8 @@ #include <linux/module.h> #include <linux/vmalloc.h> #include <linux/uaccess.h> +#include <asm/i387.h> /* For use_eager_fpu. Ugh! */ +#include <asm/fpu-internal.h> /* For use_eager_fpu. Ugh! */ #include <asm/user.h> #include <asm/xsave.h> #include "cpuid.h" @@ -95,6 +97,8 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) if (best && (best->eax & (F(XSAVES) | F(XSAVEC)))) best->ebx = xstate_required_size(vcpu->arch.xcr0, true); + vcpu->arch.eager_fpu = use_eager_fpu() || guest_cpuid_has_mpx(vcpu); + /* * The existing code assumes virtual address is 48-bit in the canonical * address checks; exit if it is ever changed. @@ -411,6 +415,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, } break; } + case 6: /* Thermal management */ + entry->eax = 0x4; /* allow ARAT */ + entry->ebx = 0; + entry->ecx = 0; + entry->edx = 0; + break; case 7: { entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; /* Mask ebx against host capability word 9 */ @@ -587,7 +597,6 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, break; case 3: /* Processor serial number */ case 5: /* MONITOR/MWAIT */ - case 6: /* Thermal management */ case 0xC0000002: case 0xC0000003: case 0xC0000004: diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index c3b1ad9fca81..496b3695d3d3 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -117,4 +117,12 @@ static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu) best = kvm_find_cpuid_entry(vcpu, 7, 0); return best && (best->ebx & bit(X86_FEATURE_RTM)); } + +static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 7, 0); + return best && (best->ebx & bit(X86_FEATURE_MPX)); +} #endif diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 630bcb0d7a04..9b655d113fc6 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -25,6 +25,7 @@ #include <linux/module.h> #include <asm/kvm_emulate.h> #include <linux/stringify.h> +#include <asm/debugreg.h> #include "x86.h" #include "tss.h" @@ -523,13 +524,9 @@ static void masked_increment(ulong *reg, ulong mask, int inc) static inline void register_address_increment(struct x86_emulate_ctxt *ctxt, int reg, int inc) { - ulong mask; + ulong *preg = reg_rmw(ctxt, reg); - if (ctxt->ad_bytes == sizeof(unsigned long)) - mask = ~0UL; - else - mask = ad_mask(ctxt); - masked_increment(reg_rmw(ctxt, reg), mask, inc); + assign_register(preg, *preg + inc, ctxt->ad_bytes); } static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc) @@ -2573,6 +2570,30 @@ static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt, return true; } +static void string_registers_quirk(struct x86_emulate_ctxt *ctxt) +{ + /* + * Intel CPUs mask the counter and pointers in quite strange + * manner when ECX is zero due to REP-string optimizations. + */ +#ifdef CONFIG_X86_64 + if (ctxt->ad_bytes != 4 || !vendor_intel(ctxt)) + return; + + *reg_write(ctxt, VCPU_REGS_RCX) = 0; + + switch (ctxt->b) { + case 0xa4: /* movsb */ + case 0xa5: /* movsd/w */ + *reg_rmw(ctxt, VCPU_REGS_RSI) &= (u32)-1; + /* fall through */ + case 0xaa: /* stosb */ + case 0xab: /* stosd/w */ + *reg_rmw(ctxt, VCPU_REGS_RDI) &= (u32)-1; + } +#endif +} + static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt, struct tss_segment_16 *tss) { @@ -2849,7 +2870,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, ulong old_tss_base = ops->get_cached_segment_base(ctxt, VCPU_SREG_TR); u32 desc_limit; - ulong desc_addr; + ulong desc_addr, dr7; /* FIXME: old_tss_base == ~0 ? */ @@ -2934,6 +2955,9 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, ret = em_push(ctxt); } + ops->get_dr(ctxt, 7, &dr7); + ops->set_dr(ctxt, 7, dr7 & ~(DR_LOCAL_ENABLE_MASK | DR_LOCAL_SLOWDOWN)); + return ret; } @@ -3840,7 +3864,7 @@ static const struct opcode group5[] = { F(DstMem | SrcNone | Lock, em_inc), F(DstMem | SrcNone | Lock, em_dec), I(SrcMem | NearBranch, em_call_near_abs), - I(SrcMemFAddr | ImplicitOps | Stack, em_call_far), + I(SrcMemFAddr | ImplicitOps, em_call_far), I(SrcMem | NearBranch, em_jmp_abs), I(SrcMemFAddr | ImplicitOps, em_jmp_far), I(SrcMem | Stack, em_push), D(Undefined), @@ -4910,6 +4934,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) if (ctxt->rep_prefix && (ctxt->d & String)) { /* All REP prefixes have the same first termination condition */ if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) { + string_registers_quirk(ctxt); ctxt->eip = ctxt->_eip; ctxt->eflags &= ~X86_EFLAGS_RF; goto done; diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 28146f03c514..856f79105bb5 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -349,6 +349,7 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status) irqe.delivery_mode = entry->fields.delivery_mode << 8; irqe.level = 1; irqe.shorthand = 0; + irqe.msi_redir_hint = false; if (irqe.trig_mode == IOAPIC_EDGE_TRIG) ioapic->irr_delivered |= 1 << irq; @@ -637,11 +638,9 @@ void kvm_ioapic_destroy(struct kvm *kvm) struct kvm_ioapic *ioapic = kvm->arch.vioapic; cancel_delayed_work_sync(&ioapic->eoi_inject); - if (ioapic) { - kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev); - kvm->arch.vioapic = NULL; - kfree(ioapic); - } + kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev); + kvm->arch.vioapic = NULL; + kfree(ioapic); } int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 72298b3ac025..9efff9e5b58c 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -31,6 +31,8 @@ #include "ioapic.h" +#include "lapic.h" + static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int irq_source_id, int level, bool line_status) @@ -48,11 +50,6 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, line_status); } -inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq) -{ - return irq->delivery_mode == APIC_DM_LOWEST; -} - int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, struct kvm_lapic_irq *irq, unsigned long *dest_map) { @@ -60,7 +57,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, struct kvm_vcpu *vcpu, *lowest = NULL; if (irq->dest_mode == 0 && irq->dest_id == 0xff && - kvm_is_dm_lowest_prio(irq)) { + kvm_lowest_prio_delivery(irq)) { printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n"); irq->delivery_mode = APIC_DM_FIXED; } @@ -76,7 +73,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, irq->dest_id, irq->dest_mode)) continue; - if (!kvm_is_dm_lowest_prio(irq)) { + if (!kvm_lowest_prio_delivery(irq)) { if (r < 0) r = 0; r += kvm_apic_set_irq(vcpu, irq, dest_map); @@ -106,9 +103,10 @@ static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e, irq->dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo; irq->trig_mode = (1 << MSI_DATA_TRIGGER_SHIFT) & e->msi.data; irq->delivery_mode = e->msi.data & 0x700; + irq->msi_redir_hint = ((e->msi.address_lo + & MSI_ADDR_REDIRECTION_LOWPRI) > 0); irq->level = 1; irq->shorthand = 0; - /* TODO Deal with RH bit of MSI message address */ } int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 629af0f1c5c4..c789e00dfa8b 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -240,6 +240,15 @@ static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id) recalculate_apic_map(apic->vcpu->kvm); } +static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u8 id) +{ + u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); + + apic_set_reg(apic, APIC_ID, id << 24); + apic_set_reg(apic, APIC_LDR, ldr); + recalculate_apic_map(apic->vcpu->kvm); +} + static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type) { return !(kvm_apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED); @@ -728,7 +737,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, dst = map->logical_map[cid]; - if (irq->delivery_mode == APIC_DM_LOWEST) { + if (kvm_lowest_prio_delivery(irq)) { int l = -1; for_each_set_bit(i, &bitmap, 16) { if (!dst[i]) @@ -914,9 +923,10 @@ static void apic_send_ipi(struct kvm_lapic *apic) irq.vector = icr_low & APIC_VECTOR_MASK; irq.delivery_mode = icr_low & APIC_MODE_MASK; irq.dest_mode = icr_low & APIC_DEST_MASK; - irq.level = icr_low & APIC_INT_ASSERT; + irq.level = (icr_low & APIC_INT_ASSERT) != 0; irq.trig_mode = icr_low & APIC_INT_LEVELTRIG; irq.shorthand = icr_low & APIC_SHORT_MASK; + irq.msi_redir_hint = false; if (apic_x2apic_mode(apic)) irq.dest_id = icr_high; else @@ -926,10 +936,11 @@ static void apic_send_ipi(struct kvm_lapic *apic) apic_debug("icr_high 0x%x, icr_low 0x%x, " "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, " - "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x\n", + "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x, " + "msi_redir_hint 0x%x\n", icr_high, icr_low, irq.shorthand, irq.dest_id, irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode, - irq.vector); + irq.vector, irq.msi_redir_hint); kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL); } @@ -1536,9 +1547,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) if ((old_value ^ value) & X2APIC_ENABLE) { if (value & X2APIC_ENABLE) { - u32 id = kvm_apic_id(apic); - u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); - kvm_apic_set_ldr(apic, ldr); + kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id); kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true); } else kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false); @@ -1557,7 +1566,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) } -void kvm_lapic_reset(struct kvm_vcpu *vcpu) +void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) { struct kvm_lapic *apic; int i; @@ -1571,19 +1580,22 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) /* Stop the timer in case it's a reset to an active apic */ hrtimer_cancel(&apic->lapic_timer.timer); - kvm_apic_set_id(apic, vcpu->vcpu_id); + if (!init_event) + kvm_apic_set_id(apic, vcpu->vcpu_id); kvm_apic_set_version(apic->vcpu); for (i = 0; i < APIC_LVT_NUM; i++) apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED); apic->lapic_timer.timer_mode = 0; - apic_set_reg(apic, APIC_LVT0, - SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT)); + if (!(vcpu->kvm->arch.disabled_quirks & KVM_QUIRK_LINT0_REENABLED)) + apic_set_reg(apic, APIC_LVT0, + SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT)); apic_set_reg(apic, APIC_DFR, 0xffffffffU); apic_set_spiv(apic, 0xff); apic_set_reg(apic, APIC_TASKPRI, 0); - kvm_apic_set_ldr(apic, 0); + if (!apic_x2apic_mode(apic)) + kvm_apic_set_ldr(apic, 0); apic_set_reg(apic, APIC_ESR, 0); apic_set_reg(apic, APIC_ICR, 0); apic_set_reg(apic, APIC_ICR2, 0); @@ -1712,7 +1724,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu) APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE); static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */ - kvm_lapic_reset(vcpu); + kvm_lapic_reset(vcpu, false); kvm_iodevice_init(&apic->dev, &apic_mmio_ops); return 0; @@ -2046,8 +2058,8 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu) pe = xchg(&apic->pending_events, 0); if (test_bit(KVM_APIC_INIT, &pe)) { - kvm_lapic_reset(vcpu); - kvm_vcpu_reset(vcpu); + kvm_lapic_reset(vcpu, true); + kvm_vcpu_reset(vcpu, true); if (kvm_vcpu_is_bsp(apic->vcpu)) vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; else diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 9d28383fc1e7..71b150cae5f9 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -48,7 +48,7 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu); int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu); int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu); void kvm_apic_accept_events(struct kvm_vcpu *vcpu); -void kvm_lapic_reset(struct kvm_vcpu *vcpu); +void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event); u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu); void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8); void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu); @@ -153,6 +153,12 @@ static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu) return vcpu->arch.apic->pending_events; } +static inline bool kvm_lowest_prio_delivery(struct kvm_lapic_irq *irq) +{ + return (irq->delivery_mode == APIC_DM_LOWEST || + irq->msi_redir_hint); +} + bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector); void wait_lapic_expire(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index d43867c33bc4..a65ce12470f8 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -804,30 +804,32 @@ static struct kvm_lpage_info *lpage_info_slot(gfn_t gfn, return &slot->arch.lpage_info[level - 2][idx]; } -static void account_shadowed(struct kvm *kvm, gfn_t gfn) +static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) { struct kvm_memory_slot *slot; struct kvm_lpage_info *linfo; + gfn_t gfn; int i; + gfn = sp->gfn; slot = gfn_to_memslot(kvm, gfn); - for (i = PT_DIRECTORY_LEVEL; - i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { + for (i = PT_DIRECTORY_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) { linfo = lpage_info_slot(gfn, slot, i); linfo->write_count += 1; } kvm->arch.indirect_shadow_pages++; } -static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) +static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) { struct kvm_memory_slot *slot; struct kvm_lpage_info *linfo; + gfn_t gfn; int i; + gfn = sp->gfn; slot = gfn_to_memslot(kvm, gfn); - for (i = PT_DIRECTORY_LEVEL; - i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { + for (i = PT_DIRECTORY_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) { linfo = lpage_info_slot(gfn, slot, i); linfo->write_count -= 1; WARN_ON(linfo->write_count < 0); @@ -858,8 +860,7 @@ static int host_mapping_level(struct kvm *kvm, gfn_t gfn) page_size = kvm_host_page_size(kvm, gfn); - for (i = PT_PAGE_TABLE_LEVEL; - i < (PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES); ++i) { + for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) { if (page_size >= KVM_HPAGE_SIZE(i)) ret = i; else @@ -1142,6 +1143,11 @@ static u64 *rmap_get_next(struct rmap_iterator *iter) return NULL; } +#define for_each_rmap_spte(_rmap_, _iter_, _spte_) \ + for (_spte_ = rmap_get_first(*_rmap_, _iter_); \ + _spte_ && ({BUG_ON(!is_shadow_present_pte(*_spte_)); 1;}); \ + _spte_ = rmap_get_next(_iter_)) + static void drop_spte(struct kvm *kvm, u64 *sptep) { if (mmu_spte_clear_track_bits(sptep)) @@ -1205,12 +1211,8 @@ static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, struct rmap_iterator iter; bool flush = false; - for (sptep = rmap_get_first(*rmapp, &iter); sptep;) { - BUG_ON(!(*sptep & PT_PRESENT_MASK)); - + for_each_rmap_spte(rmapp, &iter, sptep) flush |= spte_write_protect(kvm, sptep, pt_protect); - sptep = rmap_get_next(&iter); - } return flush; } @@ -1232,12 +1234,8 @@ static bool __rmap_clear_dirty(struct kvm *kvm, unsigned long *rmapp) struct rmap_iterator iter; bool flush = false; - for (sptep = rmap_get_first(*rmapp, &iter); sptep;) { - BUG_ON(!(*sptep & PT_PRESENT_MASK)); - + for_each_rmap_spte(rmapp, &iter, sptep) flush |= spte_clear_dirty(kvm, sptep); - sptep = rmap_get_next(&iter); - } return flush; } @@ -1259,12 +1257,8 @@ static bool __rmap_set_dirty(struct kvm *kvm, unsigned long *rmapp) struct rmap_iterator iter; bool flush = false; - for (sptep = rmap_get_first(*rmapp, &iter); sptep;) { - BUG_ON(!(*sptep & PT_PRESENT_MASK)); - + for_each_rmap_spte(rmapp, &iter, sptep) flush |= spte_set_dirty(kvm, sptep); - sptep = rmap_get_next(&iter); - } return flush; } @@ -1351,8 +1345,7 @@ static bool rmap_write_protect(struct kvm *kvm, u64 gfn) slot = gfn_to_memslot(kvm, gfn); - for (i = PT_PAGE_TABLE_LEVEL; - i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { + for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) { rmapp = __gfn_to_rmap(gfn, i, slot); write_protected |= __rmap_write_protect(kvm, rmapp, true); } @@ -1360,24 +1353,28 @@ static bool rmap_write_protect(struct kvm *kvm, u64 gfn) return write_protected; } -static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, - struct kvm_memory_slot *slot, gfn_t gfn, int level, - unsigned long data) +static bool kvm_zap_rmapp(struct kvm *kvm, unsigned long *rmapp) { u64 *sptep; struct rmap_iterator iter; - int need_tlb_flush = 0; + bool flush = false; while ((sptep = rmap_get_first(*rmapp, &iter))) { BUG_ON(!(*sptep & PT_PRESENT_MASK)); - rmap_printk("kvm_rmap_unmap_hva: spte %p %llx gfn %llx (%d)\n", - sptep, *sptep, gfn, level); + rmap_printk("%s: spte %p %llx.\n", __func__, sptep, *sptep); drop_spte(kvm, sptep); - need_tlb_flush = 1; + flush = true; } - return need_tlb_flush; + return flush; +} + +static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, + struct kvm_memory_slot *slot, gfn_t gfn, int level, + unsigned long data) +{ + return kvm_zap_rmapp(kvm, rmapp); } static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, @@ -1394,8 +1391,8 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, WARN_ON(pte_huge(*ptep)); new_pfn = pte_pfn(*ptep); - for (sptep = rmap_get_first(*rmapp, &iter); sptep;) { - BUG_ON(!is_shadow_present_pte(*sptep)); +restart: + for_each_rmap_spte(rmapp, &iter, sptep) { rmap_printk("kvm_set_pte_rmapp: spte %p %llx gfn %llx (%d)\n", sptep, *sptep, gfn, level); @@ -1403,7 +1400,7 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, if (pte_write(*ptep)) { drop_spte(kvm, sptep); - sptep = rmap_get_first(*rmapp, &iter); + goto restart; } else { new_spte = *sptep & ~PT64_BASE_ADDR_MASK; new_spte |= (u64)new_pfn << PAGE_SHIFT; @@ -1414,7 +1411,6 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, mmu_spte_clear_track_bits(sptep); mmu_spte_set(sptep, new_spte); - sptep = rmap_get_next(&iter); } } @@ -1424,6 +1420,74 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, return 0; } +struct slot_rmap_walk_iterator { + /* input fields. */ + struct kvm_memory_slot *slot; + gfn_t start_gfn; + gfn_t end_gfn; + int start_level; + int end_level; + + /* output fields. */ + gfn_t gfn; + unsigned long *rmap; + int level; + + /* private field. */ + unsigned long *end_rmap; +}; + +static void +rmap_walk_init_level(struct slot_rmap_walk_iterator *iterator, int level) +{ + iterator->level = level; + iterator->gfn = iterator->start_gfn; + iterator->rmap = __gfn_to_rmap(iterator->gfn, level, iterator->slot); + iterator->end_rmap = __gfn_to_rmap(iterator->end_gfn, level, + iterator->slot); +} + +static void +slot_rmap_walk_init(struct slot_rmap_walk_iterator *iterator, + struct kvm_memory_slot *slot, int start_level, + int end_level, gfn_t start_gfn, gfn_t end_gfn) +{ + iterator->slot = slot; + iterator->start_level = start_level; + iterator->end_level = end_level; + iterator->start_gfn = start_gfn; + iterator->end_gfn = end_gfn; + + rmap_walk_init_level(iterator, iterator->start_level); +} + +static bool slot_rmap_walk_okay(struct slot_rmap_walk_iterator *iterator) +{ + return !!iterator->rmap; +} + +static void slot_rmap_walk_next(struct slot_rmap_walk_iterator *iterator) +{ + if (++iterator->rmap <= iterator->end_rmap) { + iterator->gfn += (1UL << KVM_HPAGE_GFN_SHIFT(iterator->level)); + return; + } + + if (++iterator->level > iterator->end_level) { + iterator->rmap = NULL; + return; + } + + rmap_walk_init_level(iterator, iterator->level); +} + +#define for_each_slot_rmap_range(_slot_, _start_level_, _end_level_, \ + _start_gfn, _end_gfn, _iter_) \ + for (slot_rmap_walk_init(_iter_, _slot_, _start_level_, \ + _end_level_, _start_gfn, _end_gfn); \ + slot_rmap_walk_okay(_iter_); \ + slot_rmap_walk_next(_iter_)) + static int kvm_handle_hva_range(struct kvm *kvm, unsigned long start, unsigned long end, @@ -1435,10 +1499,10 @@ static int kvm_handle_hva_range(struct kvm *kvm, int level, unsigned long data)) { - int j; - int ret = 0; struct kvm_memslots *slots; struct kvm_memory_slot *memslot; + struct slot_rmap_walk_iterator iterator; + int ret = 0; slots = kvm_memslots(kvm); @@ -1458,26 +1522,11 @@ static int kvm_handle_hva_range(struct kvm *kvm, gfn_start = hva_to_gfn_memslot(hva_start, memslot); gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); - for (j = PT_PAGE_TABLE_LEVEL; - j < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++j) { - unsigned long idx, idx_end; - unsigned long *rmapp; - gfn_t gfn = gfn_start; - - /* - * {idx(page_j) | page_j intersects with - * [hva_start, hva_end)} = {idx, idx+1, ..., idx_end}. - */ - idx = gfn_to_index(gfn_start, memslot->base_gfn, j); - idx_end = gfn_to_index(gfn_end - 1, memslot->base_gfn, j); - - rmapp = __gfn_to_rmap(gfn_start, j, memslot); - - for (; idx <= idx_end; - ++idx, gfn += (1UL << KVM_HPAGE_GFN_SHIFT(j))) - ret |= handler(kvm, rmapp++, memslot, - gfn, j, data); - } + for_each_slot_rmap_range(memslot, PT_PAGE_TABLE_LEVEL, + PT_MAX_HUGEPAGE_LEVEL, gfn_start, gfn_end - 1, + &iterator) + ret |= handler(kvm, iterator.rmap, memslot, + iterator.gfn, iterator.level, data); } return ret; @@ -1518,16 +1567,13 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, BUG_ON(!shadow_accessed_mask); - for (sptep = rmap_get_first(*rmapp, &iter); sptep; - sptep = rmap_get_next(&iter)) { - BUG_ON(!is_shadow_present_pte(*sptep)); - + for_each_rmap_spte(rmapp, &iter, sptep) if (*sptep & shadow_accessed_mask) { young = 1; clear_bit((ffs(shadow_accessed_mask) - 1), (unsigned long *)sptep); } - } + trace_kvm_age_page(gfn, level, slot, young); return young; } @@ -1548,15 +1594,11 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, if (!shadow_accessed_mask) goto out; - for (sptep = rmap_get_first(*rmapp, &iter); sptep; - sptep = rmap_get_next(&iter)) { - BUG_ON(!is_shadow_present_pte(*sptep)); - + for_each_rmap_spte(rmapp, &iter, sptep) if (*sptep & shadow_accessed_mask) { young = 1; break; } - } out: return young; } @@ -2093,7 +2135,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, if (level > PT_PAGE_TABLE_LEVEL && need_sync) kvm_sync_pages(vcpu, gfn); - account_shadowed(vcpu->kvm, gfn); + account_shadowed(vcpu->kvm, sp); } sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen; init_shadow_page_table(sp); @@ -2274,7 +2316,7 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, kvm_mmu_unlink_parents(kvm, sp); if (!sp->role.invalid && !sp->role.direct) - unaccount_shadowed(kvm, sp->gfn); + unaccount_shadowed(kvm, sp); if (sp->unsync) kvm_unlink_unsync_page(kvm, sp); @@ -2393,19 +2435,20 @@ EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page); static int get_mtrr_type(struct mtrr_state_type *mtrr_state, u64 start, u64 end) { - int i; u64 base, mask; u8 prev_match, curr_match; - int num_var_ranges = KVM_NR_VAR_MTRR; + int i, num_var_ranges = KVM_NR_VAR_MTRR; - if (!mtrr_state->enabled) - return 0xFF; + /* MTRR is completely disabled, use UC for all of physical memory. */ + if (!(mtrr_state->enabled & 0x2)) + return MTRR_TYPE_UNCACHABLE; /* Make end inclusive end, instead of exclusive */ end--; /* Look in fixed ranges. Just return the type as per start */ - if (mtrr_state->have_fixed && (start < 0x100000)) { + if (mtrr_state->have_fixed && (mtrr_state->enabled & 0x1) && + (start < 0x100000)) { int idx; if (start < 0x80000) { @@ -2428,9 +2471,6 @@ static int get_mtrr_type(struct mtrr_state_type *mtrr_state, * Look of multiple ranges matching this address and pick type * as per MTRR precedence */ - if (!(mtrr_state->enabled & 2)) - return mtrr_state->def_type; - prev_match = 0xFF; for (i = 0; i < num_var_ranges; ++i) { unsigned short start_state, end_state; @@ -2692,15 +2732,17 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu, u64 *start, u64 *end) { struct page *pages[PTE_PREFETCH_NUM]; + struct kvm_memory_slot *slot; unsigned access = sp->role.access; int i, ret; gfn_t gfn; gfn = kvm_mmu_page_get_gfn(sp, start - sp->spt); - if (!gfn_to_memslot_dirty_bitmap(vcpu, gfn, access & ACC_WRITE_MASK)) + slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, access & ACC_WRITE_MASK); + if (!slot) return -1; - ret = gfn_to_page_many_atomic(vcpu->kvm, gfn, pages, end - start); + ret = gfn_to_page_many_atomic(slot, gfn, pages, end - start); if (ret <= 0) return -1; @@ -3475,10 +3517,12 @@ static bool can_do_async_pf(struct kvm_vcpu *vcpu) static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, gva_t gva, pfn_t *pfn, bool write, bool *writable) { + struct kvm_memory_slot *slot; bool async; - *pfn = gfn_to_pfn_async(vcpu->kvm, gfn, &async, write, writable); - + slot = gfn_to_memslot(vcpu->kvm, gfn); + async = false; + *pfn = __gfn_to_pfn_memslot(slot, gfn, false, &async, write, writable); if (!async) return false; /* *pfn has correct page already */ @@ -3492,8 +3536,7 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, return true; } - *pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write, writable); - + *pfn = __gfn_to_pfn_memslot(slot, gfn, false, NULL, write, writable); return false; } @@ -3736,8 +3779,8 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu, } } -void update_permission_bitmask(struct kvm_vcpu *vcpu, - struct kvm_mmu *mmu, bool ept) +static void update_permission_bitmask(struct kvm_vcpu *vcpu, + struct kvm_mmu *mmu, bool ept) { unsigned bit, byte, pfec; u8 map; @@ -3918,6 +3961,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu) { bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP); + bool smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP); struct kvm_mmu *context = &vcpu->arch.mmu; MMU_WARN_ON(VALID_PAGE(context->root_hpa)); @@ -3936,6 +3980,8 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu) context->base_role.cr0_wp = is_write_protection(vcpu); context->base_role.smep_andnot_wp = smep && !is_write_protection(vcpu); + context->base_role.smap_andnot_wp + = smap && !is_write_protection(vcpu); } EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu); @@ -4207,12 +4253,18 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, int bytes) { gfn_t gfn = gpa >> PAGE_SHIFT; - union kvm_mmu_page_role mask = { .word = 0 }; struct kvm_mmu_page *sp; LIST_HEAD(invalid_list); u64 entry, gentry, *spte; int npte; bool remote_flush, local_flush, zap_page; + union kvm_mmu_page_role mask = { }; + + mask.cr0_wp = 1; + mask.cr4_pae = 1; + mask.nxe = 1; + mask.smep_andnot_wp = 1; + mask.smap_andnot_wp = 1; /* * If we don't have indirect shadow pages, it means no page is @@ -4238,7 +4290,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, ++vcpu->kvm->stat.mmu_pte_write; kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); - mask.cr0_wp = mask.cr4_pae = mask.nxe = 1; for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) { if (detect_write_misaligned(sp, gpa, bytes) || detect_write_flooding(sp)) { @@ -4412,36 +4463,113 @@ void kvm_mmu_setup(struct kvm_vcpu *vcpu) init_kvm_mmu(vcpu); } -void kvm_mmu_slot_remove_write_access(struct kvm *kvm, - struct kvm_memory_slot *memslot) +/* The return value indicates if tlb flush on all vcpus is needed. */ +typedef bool (*slot_level_handler) (struct kvm *kvm, unsigned long *rmap); + +/* The caller should hold mmu-lock before calling this function. */ +static bool +slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot, + slot_level_handler fn, int start_level, int end_level, + gfn_t start_gfn, gfn_t end_gfn, bool lock_flush_tlb) { - gfn_t last_gfn; - int i; + struct slot_rmap_walk_iterator iterator; bool flush = false; - last_gfn = memslot->base_gfn + memslot->npages - 1; + for_each_slot_rmap_range(memslot, start_level, end_level, start_gfn, + end_gfn, &iterator) { + if (iterator.rmap) + flush |= fn(kvm, iterator.rmap); - spin_lock(&kvm->mmu_lock); + if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { + if (flush && lock_flush_tlb) { + kvm_flush_remote_tlbs(kvm); + flush = false; + } + cond_resched_lock(&kvm->mmu_lock); + } + } - for (i = PT_PAGE_TABLE_LEVEL; - i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { - unsigned long *rmapp; - unsigned long last_index, index; + if (flush && lock_flush_tlb) { + kvm_flush_remote_tlbs(kvm); + flush = false; + } + + return flush; +} - rmapp = memslot->arch.rmap[i - PT_PAGE_TABLE_LEVEL]; - last_index = gfn_to_index(last_gfn, memslot->base_gfn, i); +static bool +slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot, + slot_level_handler fn, int start_level, int end_level, + bool lock_flush_tlb) +{ + return slot_handle_level_range(kvm, memslot, fn, start_level, + end_level, memslot->base_gfn, + memslot->base_gfn + memslot->npages - 1, + lock_flush_tlb); +} - for (index = 0; index <= last_index; ++index, ++rmapp) { - if (*rmapp) - flush |= __rmap_write_protect(kvm, rmapp, - false); +static bool +slot_handle_all_level(struct kvm *kvm, struct kvm_memory_slot *memslot, + slot_level_handler fn, bool lock_flush_tlb) +{ + return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL, + PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb); +} - if (need_resched() || spin_needbreak(&kvm->mmu_lock)) - cond_resched_lock(&kvm->mmu_lock); - } +static bool +slot_handle_large_level(struct kvm *kvm, struct kvm_memory_slot *memslot, + slot_level_handler fn, bool lock_flush_tlb) +{ + return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL + 1, + PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb); +} + +static bool +slot_handle_leaf(struct kvm *kvm, struct kvm_memory_slot *memslot, + slot_level_handler fn, bool lock_flush_tlb) +{ + return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL, + PT_PAGE_TABLE_LEVEL, lock_flush_tlb); +} + +void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) +{ + struct kvm_memslots *slots; + struct kvm_memory_slot *memslot; + + slots = kvm_memslots(kvm); + + spin_lock(&kvm->mmu_lock); + kvm_for_each_memslot(memslot, slots) { + gfn_t start, end; + + start = max(gfn_start, memslot->base_gfn); + end = min(gfn_end, memslot->base_gfn + memslot->npages); + if (start >= end) + continue; + + slot_handle_level_range(kvm, memslot, kvm_zap_rmapp, + PT_PAGE_TABLE_LEVEL, PT_MAX_HUGEPAGE_LEVEL, + start, end - 1, true); } spin_unlock(&kvm->mmu_lock); +} + +static bool slot_rmap_write_protect(struct kvm *kvm, unsigned long *rmapp) +{ + return __rmap_write_protect(kvm, rmapp, false); +} + +void kvm_mmu_slot_remove_write_access(struct kvm *kvm, + struct kvm_memory_slot *memslot) +{ + bool flush; + + spin_lock(&kvm->mmu_lock); + flush = slot_handle_all_level(kvm, memslot, slot_rmap_write_protect, + false); + spin_unlock(&kvm->mmu_lock); /* * kvm_mmu_slot_remove_write_access() and kvm_vm_ioctl_get_dirty_log() @@ -4474,9 +4602,8 @@ static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm, pfn_t pfn; struct kvm_mmu_page *sp; - for (sptep = rmap_get_first(*rmapp, &iter); sptep;) { - BUG_ON(!(*sptep & PT_PRESENT_MASK)); - +restart: + for_each_rmap_spte(rmapp, &iter, sptep) { sp = page_header(__pa(sptep)); pfn = spte_to_pfn(*sptep); @@ -4491,71 +4618,31 @@ static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm, !kvm_is_reserved_pfn(pfn) && PageTransCompound(pfn_to_page(pfn))) { drop_spte(kvm, sptep); - sptep = rmap_get_first(*rmapp, &iter); need_tlb_flush = 1; - } else - sptep = rmap_get_next(&iter); + goto restart; + } } return need_tlb_flush; } void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm, - struct kvm_memory_slot *memslot) + const struct kvm_memory_slot *memslot) { - bool flush = false; - unsigned long *rmapp; - unsigned long last_index, index; - + /* FIXME: const-ify all uses of struct kvm_memory_slot. */ spin_lock(&kvm->mmu_lock); - - rmapp = memslot->arch.rmap[0]; - last_index = gfn_to_index(memslot->base_gfn + memslot->npages - 1, - memslot->base_gfn, PT_PAGE_TABLE_LEVEL); - - for (index = 0; index <= last_index; ++index, ++rmapp) { - if (*rmapp) - flush |= kvm_mmu_zap_collapsible_spte(kvm, rmapp); - - if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { - if (flush) { - kvm_flush_remote_tlbs(kvm); - flush = false; - } - cond_resched_lock(&kvm->mmu_lock); - } - } - - if (flush) - kvm_flush_remote_tlbs(kvm); - + slot_handle_leaf(kvm, (struct kvm_memory_slot *)memslot, + kvm_mmu_zap_collapsible_spte, true); spin_unlock(&kvm->mmu_lock); } void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, struct kvm_memory_slot *memslot) { - gfn_t last_gfn; - unsigned long *rmapp; - unsigned long last_index, index; - bool flush = false; - - last_gfn = memslot->base_gfn + memslot->npages - 1; + bool flush; spin_lock(&kvm->mmu_lock); - - rmapp = memslot->arch.rmap[PT_PAGE_TABLE_LEVEL - 1]; - last_index = gfn_to_index(last_gfn, memslot->base_gfn, - PT_PAGE_TABLE_LEVEL); - - for (index = 0; index <= last_index; ++index, ++rmapp) { - if (*rmapp) - flush |= __rmap_clear_dirty(kvm, rmapp); - - if (need_resched() || spin_needbreak(&kvm->mmu_lock)) - cond_resched_lock(&kvm->mmu_lock); - } - + flush = slot_handle_leaf(kvm, memslot, __rmap_clear_dirty, false); spin_unlock(&kvm->mmu_lock); lockdep_assert_held(&kvm->slots_lock); @@ -4574,31 +4661,11 @@ EXPORT_SYMBOL_GPL(kvm_mmu_slot_leaf_clear_dirty); void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm, struct kvm_memory_slot *memslot) { - gfn_t last_gfn; - int i; - bool flush = false; - - last_gfn = memslot->base_gfn + memslot->npages - 1; + bool flush; spin_lock(&kvm->mmu_lock); - - for (i = PT_PAGE_TABLE_LEVEL + 1; /* skip rmap for 4K page */ - i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { - unsigned long *rmapp; - unsigned long last_index, index; - - rmapp = memslot->arch.rmap[i - PT_PAGE_TABLE_LEVEL]; - last_index = gfn_to_index(last_gfn, memslot->base_gfn, i); - - for (index = 0; index <= last_index; ++index, ++rmapp) { - if (*rmapp) - flush |= __rmap_write_protect(kvm, rmapp, - false); - - if (need_resched() || spin_needbreak(&kvm->mmu_lock)) - cond_resched_lock(&kvm->mmu_lock); - } - } + flush = slot_handle_large_level(kvm, memslot, slot_rmap_write_protect, + false); spin_unlock(&kvm->mmu_lock); /* see kvm_mmu_slot_remove_write_access */ @@ -4612,31 +4679,10 @@ EXPORT_SYMBOL_GPL(kvm_mmu_slot_largepage_remove_write_access); void kvm_mmu_slot_set_dirty(struct kvm *kvm, struct kvm_memory_slot *memslot) { - gfn_t last_gfn; - int i; - bool flush = false; - - last_gfn = memslot->base_gfn + memslot->npages - 1; + bool flush; spin_lock(&kvm->mmu_lock); - - for (i = PT_PAGE_TABLE_LEVEL; - i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { - unsigned long *rmapp; - unsigned long last_index, index; - - rmapp = memslot->arch.rmap[i - PT_PAGE_TABLE_LEVEL]; - last_index = gfn_to_index(last_gfn, memslot->base_gfn, i); - - for (index = 0; index <= last_index; ++index, ++rmapp) { - if (*rmapp) - flush |= __rmap_set_dirty(kvm, rmapp); - - if (need_resched() || spin_needbreak(&kvm->mmu_lock)) - cond_resched_lock(&kvm->mmu_lock); - } - } - + flush = slot_handle_all_level(kvm, memslot, __rmap_set_dirty, false); spin_unlock(&kvm->mmu_lock); lockdep_assert_held(&kvm->slots_lock); diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index c7d65637c851..398d21c0f6dd 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -43,6 +43,7 @@ #define PT_PDPE_LEVEL 3 #define PT_DIRECTORY_LEVEL 2 #define PT_PAGE_TABLE_LEVEL 1 +#define PT_MAX_HUGEPAGE_LEVEL (PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES - 1) static inline u64 rsvd_bits(int s, int e) { @@ -71,8 +72,6 @@ enum { int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct); void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu); void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly); -void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, - bool ept); static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) { @@ -166,8 +165,11 @@ static inline bool permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, int index = (pfec >> 1) + (smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1)); + WARN_ON(pfec & PFERR_RSVD_MASK); + return (mmu->permissions[index] >> pte_access) & 1; } void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm); +void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end); #endif diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c index 9ade5cfb5a4c..368d53497314 100644 --- a/arch/x86/kvm/mmu_audit.c +++ b/arch/x86/kvm/mmu_audit.c @@ -197,13 +197,11 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp) rmapp = gfn_to_rmap(kvm, sp->gfn, PT_PAGE_TABLE_LEVEL); - for (sptep = rmap_get_first(*rmapp, &iter); sptep; - sptep = rmap_get_next(&iter)) { + for_each_rmap_spte(rmapp, &iter, sptep) if (is_writable_pte(*sptep)) audit_printk(kvm, "shadow page has writable " "mappings: gfn %llx role %x\n", sp->gfn, sp->role.word); - } } static void audit_sp(struct kvm *kvm, struct kvm_mmu_page *sp) diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index fd49c867b25a..6e6d115fe9b5 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -718,6 +718,13 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, mmu_is_nested(vcpu)); if (likely(r != RET_MMIO_PF_INVALID)) return r; + + /* + * page fault with PFEC.RSVD = 1 is caused by shadow + * page fault, should not be used to walk guest page + * table. + */ + error_code &= ~PFERR_RSVD_MASK; }; r = mmu_topup_memory_caches(vcpu); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index ce741b8650f6..b9f9e1073e50 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1082,7 +1082,7 @@ static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) return target_tsc - tsc; } -static void init_vmcb(struct vcpu_svm *svm) +static void init_vmcb(struct vcpu_svm *svm, bool init_event) { struct vmcb_control_area *control = &svm->vmcb->control; struct vmcb_save_area *save = &svm->vmcb->save; @@ -1153,17 +1153,17 @@ static void init_vmcb(struct vcpu_svm *svm) init_sys_seg(&save->ldtr, SEG_TYPE_LDT); init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16); - svm_set_efer(&svm->vcpu, 0); + if (!init_event) + svm_set_efer(&svm->vcpu, 0); save->dr6 = 0xffff0ff0; kvm_set_rflags(&svm->vcpu, 2); save->rip = 0x0000fff0; svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; /* - * This is the guest-visible cr0 value. * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. + * It also updates the guest-visible cr0 value. */ - svm->vcpu.arch.cr0 = 0; (void)kvm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET); save->cr4 = X86_CR4_PAE; @@ -1176,7 +1176,7 @@ static void init_vmcb(struct vcpu_svm *svm) clr_exception_intercept(svm, PF_VECTOR); clr_cr_intercept(svm, INTERCEPT_CR3_READ); clr_cr_intercept(svm, INTERCEPT_CR3_WRITE); - save->g_pat = 0x0007040600070406ULL; + save->g_pat = svm->vcpu.arch.pat; save->cr3 = 0; save->cr4 = 0; } @@ -1195,13 +1195,19 @@ static void init_vmcb(struct vcpu_svm *svm) enable_gif(svm); } -static void svm_vcpu_reset(struct kvm_vcpu *vcpu) +static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) { struct vcpu_svm *svm = to_svm(vcpu); u32 dummy; u32 eax = 1; - init_vmcb(svm); + if (!init_event) { + svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE | + MSR_IA32_APICBASE_ENABLE; + if (kvm_vcpu_is_reset_bsp(&svm->vcpu)) + svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; + } + init_vmcb(svm, init_event); kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy); kvm_register_write(vcpu, VCPU_REGS_RDX, eax); @@ -1257,12 +1263,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) clear_page(svm->vmcb); svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; svm->asid_generation = 0; - init_vmcb(svm); - - svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE | - MSR_IA32_APICBASE_ENABLE; - if (kvm_vcpu_is_reset_bsp(&svm->vcpu)) - svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; + init_vmcb(svm, false); svm_init_osvw(&svm->vcpu); @@ -1575,7 +1576,8 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) * does not do it - this results in some delay at * reboot */ - cr0 &= ~(X86_CR0_CD | X86_CR0_NW); + if (!(vcpu->kvm->arch.disabled_quirks & KVM_QUIRK_CD_NW_CLEARED)) + cr0 &= ~(X86_CR0_CD | X86_CR0_NW); svm->vmcb->save.cr0 = cr0; mark_dirty(svm->vmcb, VMCB_CR); update_cr0_intercept(svm); @@ -1883,7 +1885,7 @@ static int shutdown_interception(struct vcpu_svm *svm) * so reinitialize it. */ clear_page(svm->vmcb); - init_vmcb(svm); + init_vmcb(svm, false); kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; return 0; @@ -4381,6 +4383,7 @@ static struct kvm_x86_ops svm_x86_ops = { .cache_reg = svm_cache_reg, .get_rflags = svm_get_rflags, .set_rflags = svm_set_rflags, + .fpu_activate = svm_fpu_activate, .fpu_deactivate = svm_fpu_deactivate, .tlb_flush = svm_flush_tlb, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f7b61687bd79..9cf5030927d5 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2170,8 +2170,7 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu) if (is_guest_mode(vcpu)) msr_bitmap = vmx_msr_bitmap_nested; - else if (irqchip_in_kernel(vcpu->kvm) && - apic_x2apic_mode(vcpu->arch.apic)) { + else if (vcpu->arch.apic_base & X2APIC_ENABLE) { if (is_long_mode(vcpu)) msr_bitmap = vmx_msr_bitmap_longmode_x2apic; else @@ -4667,16 +4666,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest)); - if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { - u32 msr_low, msr_high; - u64 host_pat; - rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high); - host_pat = msr_low | ((u64) msr_high << 32); - /* Write the default value follow host pat */ - vmcs_write64(GUEST_IA32_PAT, host_pat); - /* Keep arch.pat sync with GUEST_IA32_PAT */ - vmx->vcpu.arch.pat = host_pat; - } + if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) + vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat); for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) { u32 index = vmx_msr_index[i]; @@ -4708,22 +4699,27 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) return 0; } -static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) +static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) { struct vcpu_vmx *vmx = to_vmx(vcpu); struct msr_data apic_base_msr; + u64 cr0; vmx->rmode.vm86_active = 0; vmx->soft_vnmi_blocked = 0; vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); - kvm_set_cr8(&vmx->vcpu, 0); - apic_base_msr.data = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE; - if (kvm_vcpu_is_reset_bsp(&vmx->vcpu)) - apic_base_msr.data |= MSR_IA32_APICBASE_BSP; - apic_base_msr.host_initiated = true; - kvm_set_apic_base(&vmx->vcpu, &apic_base_msr); + kvm_set_cr8(vcpu, 0); + + if (!init_event) { + apic_base_msr.data = APIC_DEFAULT_PHYS_BASE | + MSR_IA32_APICBASE_ENABLE; + if (kvm_vcpu_is_reset_bsp(vcpu)) + apic_base_msr.data |= MSR_IA32_APICBASE_BSP; + apic_base_msr.host_initiated = true; + kvm_set_apic_base(vcpu, &apic_base_msr); + } vmx_segment_cache_clear(vmx); @@ -4747,9 +4743,12 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmcs_write32(GUEST_LDTR_LIMIT, 0xffff); vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082); - vmcs_write32(GUEST_SYSENTER_CS, 0); - vmcs_writel(GUEST_SYSENTER_ESP, 0); - vmcs_writel(GUEST_SYSENTER_EIP, 0); + if (!init_event) { + vmcs_write32(GUEST_SYSENTER_CS, 0); + vmcs_writel(GUEST_SYSENTER_ESP, 0); + vmcs_writel(GUEST_SYSENTER_EIP, 0); + vmcs_write64(GUEST_IA32_DEBUGCTL, 0); + } vmcs_writel(GUEST_RFLAGS, 0x02); kvm_rip_write(vcpu, 0xfff0); @@ -4764,18 +4763,15 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0); vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0); - /* Special registers */ - vmcs_write64(GUEST_IA32_DEBUGCTL, 0); - setup_msrs(vmx); vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */ - if (cpu_has_vmx_tpr_shadow()) { + if (cpu_has_vmx_tpr_shadow() && !init_event) { vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0); - if (vm_need_tpr_shadow(vmx->vcpu.kvm)) + if (vm_need_tpr_shadow(vcpu->kvm)) vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, - __pa(vmx->vcpu.arch.apic->regs)); + __pa(vcpu->arch.apic->regs)); vmcs_write32(TPR_THRESHOLD, 0); } @@ -4787,12 +4783,14 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) if (vmx->vpid != 0) vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); - vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; - vmx_set_cr0(&vmx->vcpu, kvm_read_cr0(vcpu)); /* enter rmode */ - vmx_set_cr4(&vmx->vcpu, 0); - vmx_set_efer(&vmx->vcpu, 0); - vmx_fpu_activate(&vmx->vcpu); - update_exception_bitmap(&vmx->vcpu); + cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; + vmx_set_cr0(vcpu, cr0); /* enter rmode */ + vmx->vcpu.arch.cr0 = cr0; + vmx_set_cr4(vcpu, 0); + if (!init_event) + vmx_set_efer(vcpu, 0); + vmx_fpu_activate(vcpu); + update_exception_bitmap(vcpu); vpid_sync_context(vmx); } @@ -5710,9 +5708,6 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) return 0; } - /* clear all local breakpoint enable flags */ - vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~0x155); - /* * TODO: What about debug traps on tss switch? * Are we supposed to inject them and update dr6? @@ -7691,6 +7686,158 @@ static void kvm_flush_pml_buffers(struct kvm *kvm) kvm_vcpu_kick(vcpu); } +static void vmx_dump_sel(char *name, uint32_t sel) +{ + pr_err("%s sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016lx\n", + name, vmcs_read32(sel), + vmcs_read32(sel + GUEST_ES_AR_BYTES - GUEST_ES_SELECTOR), + vmcs_read32(sel + GUEST_ES_LIMIT - GUEST_ES_SELECTOR), + vmcs_readl(sel + GUEST_ES_BASE - GUEST_ES_SELECTOR)); +} + +static void vmx_dump_dtsel(char *name, uint32_t limit) +{ + pr_err("%s limit=0x%08x, base=0x%016lx\n", + name, vmcs_read32(limit), + vmcs_readl(limit + GUEST_GDTR_BASE - GUEST_GDTR_LIMIT)); +} + +static void dump_vmcs(void) +{ + u32 vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS); + u32 vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS); + u32 cpu_based_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); + u32 pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL); + u32 secondary_exec_control = 0; + unsigned long cr4 = vmcs_readl(GUEST_CR4); + u64 efer = vmcs_readl(GUEST_IA32_EFER); + int i, n; + + if (cpu_has_secondary_exec_ctrls()) + secondary_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); + + pr_err("*** Guest State ***\n"); + pr_err("CR0: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n", + vmcs_readl(GUEST_CR0), vmcs_readl(CR0_READ_SHADOW), + vmcs_readl(CR0_GUEST_HOST_MASK)); + pr_err("CR4: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n", + cr4, vmcs_readl(CR4_READ_SHADOW), vmcs_readl(CR4_GUEST_HOST_MASK)); + pr_err("CR3 = 0x%016lx\n", vmcs_readl(GUEST_CR3)); + if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT) && + (cr4 & X86_CR4_PAE) && !(efer & EFER_LMA)) + { + pr_err("PDPTR0 = 0x%016lx PDPTR1 = 0x%016lx\n", + vmcs_readl(GUEST_PDPTR0), vmcs_readl(GUEST_PDPTR1)); + pr_err("PDPTR2 = 0x%016lx PDPTR3 = 0x%016lx\n", + vmcs_readl(GUEST_PDPTR2), vmcs_readl(GUEST_PDPTR3)); + } + pr_err("RSP = 0x%016lx RIP = 0x%016lx\n", + vmcs_readl(GUEST_RSP), vmcs_readl(GUEST_RIP)); + pr_err("RFLAGS=0x%08lx DR7 = 0x%016lx\n", + vmcs_readl(GUEST_RFLAGS), vmcs_readl(GUEST_DR7)); + pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n", + vmcs_readl(GUEST_SYSENTER_ESP), + vmcs_read32(GUEST_SYSENTER_CS), vmcs_readl(GUEST_SYSENTER_EIP)); + vmx_dump_sel("CS: ", GUEST_CS_SELECTOR); + vmx_dump_sel("DS: ", GUEST_DS_SELECTOR); + vmx_dump_sel("SS: ", GUEST_SS_SELECTOR); + vmx_dump_sel("ES: ", GUEST_ES_SELECTOR); + vmx_dump_sel("FS: ", GUEST_FS_SELECTOR); + vmx_dump_sel("GS: ", GUEST_GS_SELECTOR); + vmx_dump_dtsel("GDTR:", GUEST_GDTR_LIMIT); + vmx_dump_sel("LDTR:", GUEST_LDTR_SELECTOR); + vmx_dump_dtsel("IDTR:", GUEST_IDTR_LIMIT); + vmx_dump_sel("TR: ", GUEST_TR_SELECTOR); + if ((vmexit_ctl & (VM_EXIT_SAVE_IA32_PAT | VM_EXIT_SAVE_IA32_EFER)) || + (vmentry_ctl & (VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_IA32_EFER))) + pr_err("EFER = 0x%016llx PAT = 0x%016lx\n", + efer, vmcs_readl(GUEST_IA32_PAT)); + pr_err("DebugCtl = 0x%016lx DebugExceptions = 0x%016lx\n", + vmcs_readl(GUEST_IA32_DEBUGCTL), + vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS)); + if (vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) + pr_err("PerfGlobCtl = 0x%016lx\n", + vmcs_readl(GUEST_IA32_PERF_GLOBAL_CTRL)); + if (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS) + pr_err("BndCfgS = 0x%016lx\n", vmcs_readl(GUEST_BNDCFGS)); + pr_err("Interruptibility = %08x ActivityState = %08x\n", + vmcs_read32(GUEST_INTERRUPTIBILITY_INFO), + vmcs_read32(GUEST_ACTIVITY_STATE)); + if (secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) + pr_err("InterruptStatus = %04x\n", + vmcs_read16(GUEST_INTR_STATUS)); + + pr_err("*** Host State ***\n"); + pr_err("RIP = 0x%016lx RSP = 0x%016lx\n", + vmcs_readl(HOST_RIP), vmcs_readl(HOST_RSP)); + pr_err("CS=%04x SS=%04x DS=%04x ES=%04x FS=%04x GS=%04x TR=%04x\n", + vmcs_read16(HOST_CS_SELECTOR), vmcs_read16(HOST_SS_SELECTOR), + vmcs_read16(HOST_DS_SELECTOR), vmcs_read16(HOST_ES_SELECTOR), + vmcs_read16(HOST_FS_SELECTOR), vmcs_read16(HOST_GS_SELECTOR), + vmcs_read16(HOST_TR_SELECTOR)); + pr_err("FSBase=%016lx GSBase=%016lx TRBase=%016lx\n", + vmcs_readl(HOST_FS_BASE), vmcs_readl(HOST_GS_BASE), + vmcs_readl(HOST_TR_BASE)); + pr_err("GDTBase=%016lx IDTBase=%016lx\n", + vmcs_readl(HOST_GDTR_BASE), vmcs_readl(HOST_IDTR_BASE)); + pr_err("CR0=%016lx CR3=%016lx CR4=%016lx\n", + vmcs_readl(HOST_CR0), vmcs_readl(HOST_CR3), + vmcs_readl(HOST_CR4)); + pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n", + vmcs_readl(HOST_IA32_SYSENTER_ESP), + vmcs_read32(HOST_IA32_SYSENTER_CS), + vmcs_readl(HOST_IA32_SYSENTER_EIP)); + if (vmexit_ctl & (VM_EXIT_LOAD_IA32_PAT | VM_EXIT_LOAD_IA32_EFER)) + pr_err("EFER = 0x%016lx PAT = 0x%016lx\n", + vmcs_readl(HOST_IA32_EFER), vmcs_readl(HOST_IA32_PAT)); + if (vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) + pr_err("PerfGlobCtl = 0x%016lx\n", + vmcs_readl(HOST_IA32_PERF_GLOBAL_CTRL)); + + pr_err("*** Control State ***\n"); + pr_err("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n", + pin_based_exec_ctrl, cpu_based_exec_ctrl, secondary_exec_control); + pr_err("EntryControls=%08x ExitControls=%08x\n", vmentry_ctl, vmexit_ctl); + pr_err("ExceptionBitmap=%08x PFECmask=%08x PFECmatch=%08x\n", + vmcs_read32(EXCEPTION_BITMAP), + vmcs_read32(PAGE_FAULT_ERROR_CODE_MASK), + vmcs_read32(PAGE_FAULT_ERROR_CODE_MATCH)); + pr_err("VMEntry: intr_info=%08x errcode=%08x ilen=%08x\n", + vmcs_read32(VM_ENTRY_INTR_INFO_FIELD), + vmcs_read32(VM_ENTRY_EXCEPTION_ERROR_CODE), + vmcs_read32(VM_ENTRY_INSTRUCTION_LEN)); + pr_err("VMExit: intr_info=%08x errcode=%08x ilen=%08x\n", + vmcs_read32(VM_EXIT_INTR_INFO), + vmcs_read32(VM_EXIT_INTR_ERROR_CODE), + vmcs_read32(VM_EXIT_INSTRUCTION_LEN)); + pr_err(" reason=%08x qualification=%016lx\n", + vmcs_read32(VM_EXIT_REASON), vmcs_readl(EXIT_QUALIFICATION)); + pr_err("IDTVectoring: info=%08x errcode=%08x\n", + vmcs_read32(IDT_VECTORING_INFO_FIELD), + vmcs_read32(IDT_VECTORING_ERROR_CODE)); + pr_err("TSC Offset = 0x%016lx\n", vmcs_readl(TSC_OFFSET)); + if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW) + pr_err("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD)); + if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR) + pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV)); + if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT)) + pr_err("EPT pointer = 0x%016lx\n", vmcs_readl(EPT_POINTER)); + n = vmcs_read32(CR3_TARGET_COUNT); + for (i = 0; i + 1 < n; i += 4) + pr_err("CR3 target%u=%016lx target%u=%016lx\n", + i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2), + i + 1, vmcs_readl(CR3_TARGET_VALUE0 + i * 2 + 2)); + if (i < n) + pr_err("CR3 target%u=%016lx\n", + i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2)); + if (secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING) + pr_err("PLE Gap=%08x Window=%08x\n", + vmcs_read32(PLE_GAP), vmcs_read32(PLE_WINDOW)); + if (secondary_exec_control & SECONDARY_EXEC_ENABLE_VPID) + pr_err("Virtual processor ID = 0x%04x\n", + vmcs_read16(VIRTUAL_PROCESSOR_ID)); +} + /* * The guest has exited. See if we can fix it or if we need userspace * assistance. @@ -7723,6 +7870,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) } if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { + dump_vmcs(); vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; vcpu->run->fail_entry.hardware_entry_failure_reason = exit_reason; @@ -8924,7 +9072,7 @@ static int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu, struct vmx_msr_entry *e) { /* x2APIC MSR accesses are not allowed */ - if (apic_x2apic_mode(vcpu->arch.apic) && e->index >> 8 == 0x8) + if (vcpu->arch.apic_base & X2APIC_ENABLE && e->index >> 8 == 0x8) return -EINVAL; if (e->index == MSR_IA32_UCODE_WRITE || /* SDM Table 35-2 */ e->index == MSR_IA32_UCODE_REV) @@ -10185,6 +10333,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .cache_reg = vmx_cache_reg, .get_rflags = vmx_get_rflags, .set_rflags = vmx_set_rflags, + .fpu_activate = vmx_fpu_activate, .fpu_deactivate = vmx_fpu_deactivate, .tlb_flush = vmx_flush_tlb, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c73efcd03e29..79dde1656db6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -99,6 +99,9 @@ module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR); unsigned int min_timer_period_us = 500; module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); +static bool __read_mostly kvmclock_periodic_sync = true; +module_param(kvmclock_periodic_sync, bool, S_IRUGO); + bool kvm_has_tsc_control; EXPORT_SYMBOL_GPL(kvm_has_tsc_control); u32 kvm_max_guest_tsc_khz; @@ -572,8 +575,7 @@ out: int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { unsigned long old_cr0 = kvm_read_cr0(vcpu); - unsigned long update_bits = X86_CR0_PG | X86_CR0_WP | - X86_CR0_CD | X86_CR0_NW; + unsigned long update_bits = X86_CR0_PG | X86_CR0_WP; cr0 |= X86_CR0_ET; @@ -702,8 +704,9 @@ EXPORT_SYMBOL_GPL(kvm_set_xcr); int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { unsigned long old_cr4 = kvm_read_cr4(vcpu); - unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | - X86_CR4_PAE | X86_CR4_SMEP; + unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE | + X86_CR4_SMEP | X86_CR4_SMAP; + if (cr4 & CR4_RESERVED_BITS) return 1; @@ -744,9 +747,6 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE))) kvm_mmu_reset_context(vcpu); - if ((cr4 ^ old_cr4) & X86_CR4_SMAP) - update_permission_bitmask(vcpu, vcpu->arch.walk_mmu, false); - if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE) kvm_update_cpuid(vcpu); @@ -1700,6 +1700,8 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) vcpu->pvclock_set_guest_stopped_request = false; } + pvclock_flags |= PVCLOCK_COUNTS_FROM_ZERO; + /* If the host uses TSC clocksource, then it is stable */ if (use_master_clock) pvclock_flags |= PVCLOCK_TSC_STABLE_BIT; @@ -1770,6 +1772,9 @@ static void kvmclock_sync_fn(struct work_struct *work) kvmclock_sync_work); struct kvm *kvm = container_of(ka, struct kvm, arch); + if (!kvmclock_periodic_sync) + return; + schedule_delayed_work(&kvm->arch.kvmclock_update_work, 0); schedule_delayed_work(&kvm->arch.kvmclock_sync_work, KVMCLOCK_SYNC_PERIOD); @@ -1854,6 +1859,63 @@ bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data) } EXPORT_SYMBOL_GPL(kvm_mtrr_valid); +static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr) +{ + struct mtrr_state_type *mtrr_state = &vcpu->arch.mtrr_state; + unsigned char mtrr_enabled = mtrr_state->enabled; + gfn_t start, end, mask; + int index; + bool is_fixed = true; + + if (msr == MSR_IA32_CR_PAT || !tdp_enabled || + !kvm_arch_has_noncoherent_dma(vcpu->kvm)) + return; + + if (!(mtrr_enabled & 0x2) && msr != MSR_MTRRdefType) + return; + + switch (msr) { + case MSR_MTRRfix64K_00000: + start = 0x0; + end = 0x80000; + break; + case MSR_MTRRfix16K_80000: + start = 0x80000; + end = 0xa0000; + break; + case MSR_MTRRfix16K_A0000: + start = 0xa0000; + end = 0xc0000; + break; + case MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000: + index = msr - MSR_MTRRfix4K_C0000; + start = 0xc0000 + index * (32 << 10); + end = start + (32 << 10); + break; + case MSR_MTRRdefType: + is_fixed = false; + start = 0x0; + end = ~0ULL; + break; + default: + /* variable range MTRRs. */ + is_fixed = false; + index = (msr - 0x200) / 2; + start = (((u64)mtrr_state->var_ranges[index].base_hi) << 32) + + (mtrr_state->var_ranges[index].base_lo & PAGE_MASK); + mask = (((u64)mtrr_state->var_ranges[index].mask_hi) << 32) + + (mtrr_state->var_ranges[index].mask_lo & PAGE_MASK); + mask |= ~0ULL << cpuid_maxphyaddr(vcpu); + + end = ((start & mask) | ~mask) + 1; + } + + if (is_fixed && !(mtrr_enabled & 0x1)) + return; + + kvm_zap_gfn_range(vcpu->kvm, gpa_to_gfn(start), gpa_to_gfn(end)); +} + static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data) { u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges; @@ -1887,7 +1949,7 @@ static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data) *pt = data; } - kvm_mmu_reset_context(vcpu); + update_mtrr(vcpu, msr); return 0; } @@ -2222,6 +2284,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) &vcpu->requests); ka->boot_vcpu_runs_old_kvmclock = tmp; + + ka->kvmclock_offset = -get_kernel_ns(); } vcpu->arch.time = data; @@ -2800,6 +2864,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_HYPERV_TIME: case KVM_CAP_IOAPIC_POLARITY_IGNORED: case KVM_CAP_TSC_DEADLINE_TIMER: + case KVM_CAP_ENABLE_CAP_VM: + case KVM_CAP_DISABLE_QUIRKS: #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT case KVM_CAP_ASSIGN_DEV_IRQ: case KVM_CAP_PCI_2_3: @@ -3847,6 +3913,26 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event, return 0; } +static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, + struct kvm_enable_cap *cap) +{ + int r; + + if (cap->flags) + return -EINVAL; + + switch (cap->cap) { + case KVM_CAP_DISABLE_QUIRKS: + kvm->arch.disabled_quirks = cap->args[0]; + r = 0; + break; + default: + r = -EINVAL; + break; + } + return r; +} + long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -4099,7 +4185,15 @@ long kvm_arch_vm_ioctl(struct file *filp, r = 0; break; } + case KVM_ENABLE_CAP: { + struct kvm_enable_cap cap; + r = -EFAULT; + if (copy_from_user(&cap, argp, sizeof(cap))) + goto out; + r = kvm_vm_ioctl_enable_cap(kvm, &cap); + break; + } default: r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg); } @@ -5954,6 +6048,7 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid) lapic_irq.shorthand = 0; lapic_irq.dest_mode = 0; lapic_irq.dest_id = apicid; + lapic_irq.msi_redir_hint = false; lapic_irq.delivery_mode = APIC_DM_REMRD; kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL); @@ -6197,6 +6292,8 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) return; page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); + if (is_error_page(page)) + return; kvm_x86_ops->set_apic_access_page_addr(vcpu, page_to_phys(page)); /* @@ -6347,7 +6444,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (req_immediate_exit) smp_send_reschedule(vcpu->cpu); - kvm_guest_enter(); + __kvm_guest_enter(); if (unlikely(vcpu->arch.switch_db_regs)) { set_debugreg(0, 7); @@ -7003,7 +7100,7 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) return 0; } -int fx_init(struct kvm_vcpu *vcpu) +int fx_init(struct kvm_vcpu *vcpu, bool init_event) { int err; @@ -7011,7 +7108,9 @@ int fx_init(struct kvm_vcpu *vcpu) if (err) return err; - fpu_finit(&vcpu->arch.guest_fpu); + if (!init_event) + fpu_finit(&vcpu->arch.guest_fpu); + if (cpu_has_xsaves) vcpu->arch.guest_fpu.state->xsave.xsave_hdr.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED; @@ -7053,14 +7152,25 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) { kvm_put_guest_xcr0(vcpu); - if (!vcpu->guest_fpu_loaded) + if (!vcpu->guest_fpu_loaded) { + vcpu->fpu_counter = 0; return; + } vcpu->guest_fpu_loaded = 0; fpu_save_init(&vcpu->arch.guest_fpu); __kernel_fpu_end(); ++vcpu->stat.fpu_reload; - kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu); + /* + * If using eager FPU mode, or if the guest is a frequent user + * of the FPU, just leave the FPU active for next time. + * Every 255 times fpu_counter rolls over to 0; a guest that uses + * the FPU in bursts will revert to loading it on demand. + */ + if (!vcpu->arch.eager_fpu) { + if (++vcpu->fpu_counter < 5) + kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu); + } trace_kvm_fpu(0); } @@ -7076,11 +7186,21 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) { + struct kvm_vcpu *vcpu; + if (check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0) printk_once(KERN_WARNING "kvm: SMP vm created on host with unstable TSC; " "guest TSC will not be reliable\n"); - return kvm_x86_ops->vcpu_create(kvm, id); + + vcpu = kvm_x86_ops->vcpu_create(kvm, id); + + /* + * Activate fpu unconditionally in case the guest needs eager FPU. It will be + * deactivated soon if it doesn't. + */ + kvm_x86_ops->fpu_activate(vcpu); + return vcpu; } int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) @@ -7091,7 +7211,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) r = vcpu_load(vcpu); if (r) return r; - kvm_vcpu_reset(vcpu); + kvm_vcpu_reset(vcpu, false); kvm_mmu_setup(vcpu); vcpu_put(vcpu); @@ -7111,6 +7231,9 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) kvm_write_tsc(vcpu, &msr); vcpu_put(vcpu); + if (!kvmclock_periodic_sync) + return; + schedule_delayed_work(&kvm->arch.kvmclock_sync_work, KVMCLOCK_SYNC_PERIOD); } @@ -7129,7 +7252,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) kvm_x86_ops->vcpu_free(vcpu); } -void kvm_vcpu_reset(struct kvm_vcpu *vcpu) +void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) { atomic_set(&vcpu->arch.nmi_queued, 0); vcpu->arch.nmi_pending = 0; @@ -7156,13 +7279,14 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu) kvm_async_pf_hash_reset(vcpu); vcpu->arch.apf.halted = false; - kvm_pmu_reset(vcpu); + if (!init_event) + kvm_pmu_reset(vcpu); memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs)); vcpu->arch.regs_avail = ~0; vcpu->arch.regs_dirty = ~0; - kvm_x86_ops->vcpu_reset(vcpu); + kvm_x86_ops->vcpu_reset(vcpu, init_event); } void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) @@ -7351,7 +7475,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) goto fail_free_mce_banks; } - r = fx_init(vcpu); + r = fx_init(vcpu, false); if (r) goto fail_free_wbinvd_dirty_mask; @@ -7363,6 +7487,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); + vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT; + kvm_async_pf_hash_reset(vcpu); kvm_pmu_init(vcpu); @@ -7576,7 +7702,7 @@ out_free: return -ENOMEM; } -void kvm_arch_memslots_updated(struct kvm *kvm) +void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) { /* * memslots->generation has been incremented. @@ -7587,7 +7713,7 @@ void kvm_arch_memslots_updated(struct kvm *kvm) int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_userspace_memory_region *mem, + const struct kvm_userspace_memory_region *mem, enum kvm_mr_change change) { /* @@ -7665,14 +7791,14 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm, } void kvm_arch_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, + const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old, + const struct kvm_memory_slot *new, enum kvm_mr_change change) { - struct kvm_memory_slot *new; int nr_mmu_pages = 0; - if ((mem->slot >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_DELETE)) { + if (change == KVM_MR_DELETE && old->id >= KVM_USER_MEM_SLOTS) { int ret; ret = vm_munmap(old->userspace_addr, @@ -7689,9 +7815,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, if (nr_mmu_pages) kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); - /* It's OK to get 'new' slot here as it has already been installed */ - new = id_to_memslot(kvm->memslots, mem->slot); - /* * Dirty logging tracks sptes in 4k granularity, meaning that large * sptes have to be split. If live migration is successful, the guest @@ -7716,9 +7839,11 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, * been zapped so no dirty logging staff is needed for old slot. For * KVM_MR_FLAGS_ONLY, the old slot is essentially the same one as the * new and it's also covered when dealing with the new slot. + * + * FIXME: const-ify all uses of struct kvm_memory_slot. */ if (change != KVM_MR_DELETE) - kvm_mmu_slot_apply_flags(kvm, new); + kvm_mmu_slot_apply_flags(kvm, (struct kvm_memory_slot *) new); } void kvm_arch_flush_shadow_all(struct kvm *kvm) diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index f5fef1868096..01a1d011e073 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -4,6 +4,8 @@ #include <linux/kvm_host.h> #include "kvm_cache_regs.h" +#define MSR_IA32_CR_PAT_DEFAULT 0x0007040600070406ULL + static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu) { vcpu->arch.exception.pending = false; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ad45054309a0..a8bcbc9c6078 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -230,6 +230,7 @@ struct kvm_vcpu { int fpu_active; int guest_fpu_loaded, guest_xcr0_loaded; + unsigned char fpu_counter; wait_queue_head_t wq; struct pid *pid; int sigset_active; @@ -500,21 +501,22 @@ enum kvm_mr_change { }; int kvm_set_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem); + const struct kvm_userspace_memory_region *mem); int __kvm_set_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem); + const struct kvm_userspace_memory_region *mem); void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont); int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, unsigned long npages); -void kvm_arch_memslots_updated(struct kvm *kvm); +void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots); int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_userspace_memory_region *mem, + const struct kvm_userspace_memory_region *mem, enum kvm_mr_change change); void kvm_arch_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, + const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old, + const struct kvm_memory_slot *new, enum kvm_mr_change change); bool kvm_largepages_enabled(void); void kvm_disable_largepages(void); @@ -524,8 +526,8 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm); void kvm_arch_flush_shadow_memslot(struct kvm *kvm, struct kvm_memory_slot *slot); -int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages, - int nr_pages); +int gfn_to_page_many_atomic(struct kvm_memory_slot *slot, gfn_t gfn, + struct page **pages, int nr_pages); struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); @@ -538,13 +540,13 @@ void kvm_release_page_dirty(struct page *page); void kvm_set_page_accessed(struct page *page); pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn); -pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async, - bool write_fault, bool *writable); pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, bool *writable); pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn); pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn); +pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn, bool atomic, + bool *async, bool write_fault, bool *writable); void kvm_release_pfn_clean(pfn_t pfn); void kvm_set_pfn_dirty(pfn_t pfn); @@ -762,16 +764,10 @@ static inline void kvm_iommu_unmap_pages(struct kvm *kvm, } #endif -static inline void kvm_guest_enter(void) +/* must be called with irqs disabled */ +static inline void __kvm_guest_enter(void) { - unsigned long flags; - - BUG_ON(preemptible()); - - local_irq_save(flags); guest_enter(); - local_irq_restore(flags); - /* KVM does not hold any references to rcu protected data when it * switches CPU into a guest mode. In fact switching to a guest mode * is very similar to exiting to userspace from rcu point of view. In @@ -783,12 +779,27 @@ static inline void kvm_guest_enter(void) rcu_virt_note_context_switch(smp_processor_id()); } +/* must be called with irqs disabled */ +static inline void __kvm_guest_exit(void) +{ + guest_exit(); +} + +static inline void kvm_guest_enter(void) +{ + unsigned long flags; + + local_irq_save(flags); + __kvm_guest_enter(); + local_irq_restore(flags); +} + static inline void kvm_guest_exit(void) { unsigned long flags; local_irq_save(flags); - guest_exit(); + __kvm_guest_exit(); local_irq_restore(flags); } diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index 931da7e917cf..1b47a185c2f0 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -28,6 +28,7 @@ struct kvm_run; struct kvm_userspace_memory_region; struct kvm_vcpu; struct kvm_vcpu_init; +struct kvm_memslots; enum kvm_mr_change; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 4b60056776d1..75bd9f7fd846 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -814,6 +814,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_S390_INJECT_IRQ 113 #define KVM_CAP_S390_IRQ_STATE 114 #define KVM_CAP_PPC_HWRNG 115 +#define KVM_CAP_DISABLE_QUIRKS 116 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 90977418aeb6..6c8e124006ad 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -103,8 +103,7 @@ static void hardware_disable_all(void); static void kvm_io_bus_destroy(struct kvm_io_bus *bus); static void kvm_release_pfn_dirty(pfn_t pfn); -static void mark_page_dirty_in_slot(struct kvm *kvm, - struct kvm_memory_slot *memslot, gfn_t gfn); +static void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot, gfn_t gfn); __visible bool kvm_rebooting; EXPORT_SYMBOL_GPL(kvm_rebooting); @@ -440,13 +439,60 @@ static int kvm_init_mmu_notifier(struct kvm *kvm) #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */ -static void kvm_init_memslots_id(struct kvm *kvm) +static struct kvm_memslots *kvm_alloc_memslots(void) { int i; - struct kvm_memslots *slots = kvm->memslots; + struct kvm_memslots *slots; + slots = kvm_kvzalloc(sizeof(struct kvm_memslots)); + if (!slots) + return NULL; + + /* + * Init kvm generation close to the maximum to easily test the + * code of handling generation number wrap-around. + */ + slots->generation = -150; for (i = 0; i < KVM_MEM_SLOTS_NUM; i++) slots->id_to_index[i] = slots->memslots[i].id = i; + + return slots; +} + +static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot) +{ + if (!memslot->dirty_bitmap) + return; + + kvfree(memslot->dirty_bitmap); + memslot->dirty_bitmap = NULL; +} + +/* + * Free any memory in @free but not in @dont. + */ +static void kvm_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, + struct kvm_memory_slot *dont) +{ + if (!dont || free->dirty_bitmap != dont->dirty_bitmap) + kvm_destroy_dirty_bitmap(free); + + kvm_arch_free_memslot(kvm, free, dont); + + free->npages = 0; +} + +static void kvm_free_memslots(struct kvm *kvm, struct kvm_memslots *slots) +{ + struct kvm_memory_slot *memslot; + + if (!slots) + return; + + kvm_for_each_memslot(memslot, slots) + kvm_free_memslot(kvm, memslot, NULL); + + kvfree(slots); } static struct kvm *kvm_create_vm(unsigned long type) @@ -472,17 +518,10 @@ static struct kvm *kvm_create_vm(unsigned long type) BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX); r = -ENOMEM; - kvm->memslots = kvm_kvzalloc(sizeof(struct kvm_memslots)); + kvm->memslots = kvm_alloc_memslots(); if (!kvm->memslots) goto out_err_no_srcu; - /* - * Init kvm generation close to the maximum to easily test the - * code of handling generation number wrap-around. - */ - kvm->memslots->generation = -150; - - kvm_init_memslots_id(kvm); if (init_srcu_struct(&kvm->srcu)) goto out_err_no_srcu; if (init_srcu_struct(&kvm->irq_srcu)) @@ -523,7 +562,7 @@ out_err_no_srcu: out_err_no_disable: for (i = 0; i < KVM_NR_BUSES; i++) kfree(kvm->buses[i]); - kvfree(kvm->memslots); + kvm_free_memslots(kvm, kvm->memslots); kvm_arch_free_vm(kvm); return ERR_PTR(r); } @@ -540,40 +579,6 @@ void *kvm_kvzalloc(unsigned long size) return kzalloc(size, GFP_KERNEL); } -static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot) -{ - if (!memslot->dirty_bitmap) - return; - - kvfree(memslot->dirty_bitmap); - memslot->dirty_bitmap = NULL; -} - -/* - * Free any memory in @free but not in @dont. - */ -static void kvm_free_physmem_slot(struct kvm *kvm, struct kvm_memory_slot *free, - struct kvm_memory_slot *dont) -{ - if (!dont || free->dirty_bitmap != dont->dirty_bitmap) - kvm_destroy_dirty_bitmap(free); - - kvm_arch_free_memslot(kvm, free, dont); - - free->npages = 0; -} - -static void kvm_free_physmem(struct kvm *kvm) -{ - struct kvm_memslots *slots = kvm->memslots; - struct kvm_memory_slot *memslot; - - kvm_for_each_memslot(memslot, slots) - kvm_free_physmem_slot(kvm, memslot, NULL); - - kvfree(kvm->memslots); -} - static void kvm_destroy_devices(struct kvm *kvm) { struct list_head *node, *tmp; @@ -607,7 +612,7 @@ static void kvm_destroy_vm(struct kvm *kvm) #endif kvm_arch_destroy_vm(kvm); kvm_destroy_devices(kvm); - kvm_free_physmem(kvm); + kvm_free_memslots(kvm, kvm->memslots); cleanup_srcu_struct(&kvm->irq_srcu); cleanup_srcu_struct(&kvm->srcu); kvm_arch_free_vm(kvm); @@ -670,8 +675,6 @@ static void update_memslots(struct kvm_memslots *slots, WARN_ON(mslots[i].id != id); if (!new->npages) { WARN_ON(!mslots[i].npages); - new->base_gfn = 0; - new->flags = 0; if (mslots[i].npages) slots->used_slots--; } else { @@ -711,7 +714,7 @@ static void update_memslots(struct kvm_memslots *slots, slots->id_to_index[mslots[i].id] = i; } -static int check_memory_region_flags(struct kvm_userspace_memory_region *mem) +static int check_memory_region_flags(const struct kvm_userspace_memory_region *mem) { u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES; @@ -728,7 +731,7 @@ static int check_memory_region_flags(struct kvm_userspace_memory_region *mem) static struct kvm_memslots *install_new_memslots(struct kvm *kvm, struct kvm_memslots *slots) { - struct kvm_memslots *old_memslots = kvm->memslots; + struct kvm_memslots *old_memslots = kvm_memslots(kvm); /* * Set the low bit in the generation, which disables SPTE caching @@ -747,7 +750,7 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, */ slots->generation++; - kvm_arch_memslots_updated(kvm); + kvm_arch_memslots_updated(kvm, slots); return old_memslots; } @@ -761,7 +764,7 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, * Must be called holding kvm->slots_lock for write. */ int __kvm_set_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem) + const struct kvm_userspace_memory_region *mem) { int r; gfn_t base_gfn; @@ -793,16 +796,13 @@ int __kvm_set_memory_region(struct kvm *kvm, if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) goto out; - slot = id_to_memslot(kvm->memslots, mem->slot); + slot = id_to_memslot(kvm_memslots(kvm), mem->slot); base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; npages = mem->memory_size >> PAGE_SHIFT; if (npages > KVM_MEM_MAX_NR_PAGES) goto out; - if (!npages) - mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES; - new = old = *slot; new.id = mem->slot; @@ -828,15 +828,19 @@ int __kvm_set_memory_region(struct kvm *kvm, goto out; } } - } else if (old.npages) { + } else { + if (!old.npages) + goto out; + change = KVM_MR_DELETE; - } else /* Modify a non-existent slot: disallowed. */ - goto out; + new.base_gfn = 0; + new.flags = 0; + } if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { /* Check for overlaps */ r = -EEXIST; - kvm_for_each_memslot(slot, kvm->memslots) { + kvm_for_each_memslot(slot, kvm_memslots(kvm)) { if ((slot->id >= KVM_USER_MEM_SLOTS) || (slot->id == mem->slot)) continue; @@ -867,7 +871,7 @@ int __kvm_set_memory_region(struct kvm *kvm, slots = kvm_kvzalloc(sizeof(struct kvm_memslots)); if (!slots) goto out_free; - memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); + memcpy(slots, kvm_memslots(kvm), sizeof(struct kvm_memslots)); if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) { slot = id_to_memslot(slots, mem->slot); @@ -898,7 +902,7 @@ int __kvm_set_memory_region(struct kvm *kvm, if (r) goto out_slots; - /* actual memory is freed via old in kvm_free_physmem_slot below */ + /* actual memory is freed via old in kvm_free_memslot below */ if (change == KVM_MR_DELETE) { new.dirty_bitmap = NULL; memset(&new.arch, 0, sizeof(new.arch)); @@ -907,9 +911,9 @@ int __kvm_set_memory_region(struct kvm *kvm, update_memslots(slots, &new); old_memslots = install_new_memslots(kvm, slots); - kvm_arch_commit_memory_region(kvm, mem, &old, change); + kvm_arch_commit_memory_region(kvm, mem, &old, &new, change); - kvm_free_physmem_slot(kvm, &old, &new); + kvm_free_memslot(kvm, &old, &new); kvfree(old_memslots); /* @@ -931,14 +935,14 @@ int __kvm_set_memory_region(struct kvm *kvm, out_slots: kvfree(slots); out_free: - kvm_free_physmem_slot(kvm, &new, &old); + kvm_free_memslot(kvm, &new, &old); out: return r; } EXPORT_SYMBOL_GPL(__kvm_set_memory_region); int kvm_set_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem) + const struct kvm_userspace_memory_region *mem) { int r; @@ -954,12 +958,14 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, { if (mem->slot >= KVM_USER_MEM_SLOTS) return -EINVAL; + return kvm_set_memory_region(kvm, mem); } int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log, int *is_dirty) { + struct kvm_memslots *slots; struct kvm_memory_slot *memslot; int r, i; unsigned long n; @@ -969,7 +975,8 @@ int kvm_get_dirty_log(struct kvm *kvm, if (log->slot >= KVM_USER_MEM_SLOTS) goto out; - memslot = id_to_memslot(kvm->memslots, log->slot); + slots = kvm_memslots(kvm); + memslot = id_to_memslot(slots, log->slot); r = -ENOENT; if (!memslot->dirty_bitmap) goto out; @@ -1018,6 +1025,7 @@ EXPORT_SYMBOL_GPL(kvm_get_dirty_log); int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log, bool *is_dirty) { + struct kvm_memslots *slots; struct kvm_memory_slot *memslot; int r, i; unsigned long n; @@ -1028,7 +1036,8 @@ int kvm_get_dirty_log_protect(struct kvm *kvm, if (log->slot >= KVM_USER_MEM_SLOTS) goto out; - memslot = id_to_memslot(kvm->memslots, log->slot); + slots = kvm_memslots(kvm); + memslot = id_to_memslot(slots, log->slot); dirty_bitmap = memslot->dirty_bitmap; r = -ENOENT; @@ -1355,9 +1364,8 @@ exit: return pfn; } -static pfn_t -__gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn, bool atomic, - bool *async, bool write_fault, bool *writable) +pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn, bool atomic, + bool *async, bool write_fault, bool *writable) { unsigned long addr = __gfn_to_hva_many(slot, gfn, NULL, write_fault); @@ -1376,44 +1384,13 @@ __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn, bool atomic, return hva_to_pfn(addr, atomic, async, write_fault, writable); } - -static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async, - bool write_fault, bool *writable) -{ - struct kvm_memory_slot *slot; - - if (async) - *async = false; - - slot = gfn_to_memslot(kvm, gfn); - - return __gfn_to_pfn_memslot(slot, gfn, atomic, async, write_fault, - writable); -} - -pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn) -{ - return __gfn_to_pfn(kvm, gfn, true, NULL, true, NULL); -} -EXPORT_SYMBOL_GPL(gfn_to_pfn_atomic); - -pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async, - bool write_fault, bool *writable) -{ - return __gfn_to_pfn(kvm, gfn, false, async, write_fault, writable); -} -EXPORT_SYMBOL_GPL(gfn_to_pfn_async); - -pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) -{ - return __gfn_to_pfn(kvm, gfn, false, NULL, true, NULL); -} -EXPORT_SYMBOL_GPL(gfn_to_pfn); +EXPORT_SYMBOL_GPL(__gfn_to_pfn_memslot); pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, bool *writable) { - return __gfn_to_pfn(kvm, gfn, false, NULL, write_fault, writable); + return __gfn_to_pfn_memslot(gfn_to_memslot(kvm, gfn), gfn, false, NULL, + write_fault, writable); } EXPORT_SYMBOL_GPL(gfn_to_pfn_prot); @@ -1421,6 +1398,7 @@ pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn) { return __gfn_to_pfn_memslot(slot, gfn, false, NULL, true, NULL); } +EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot); pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn) { @@ -1428,13 +1406,25 @@ pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn) } EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot_atomic); -int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages, - int nr_pages) +pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn) +{ + return gfn_to_pfn_memslot_atomic(gfn_to_memslot(kvm, gfn), gfn); +} +EXPORT_SYMBOL_GPL(gfn_to_pfn_atomic); + +pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) +{ + return gfn_to_pfn_memslot(gfn_to_memslot(kvm, gfn), gfn); +} +EXPORT_SYMBOL_GPL(gfn_to_pfn); + +int gfn_to_page_many_atomic(struct kvm_memory_slot *slot, gfn_t gfn, + struct page **pages, int nr_pages) { unsigned long addr; gfn_t entry; - addr = gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, &entry); + addr = gfn_to_hva_many(slot, gfn, &entry); if (kvm_is_error_hva(addr)) return -1; @@ -1590,15 +1580,17 @@ int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data, int offset, int len) { int r; + struct kvm_memory_slot *memslot; unsigned long addr; - addr = gfn_to_hva(kvm, gfn); + memslot = gfn_to_memslot(kvm, gfn); + addr = gfn_to_hva_memslot(memslot, gfn); if (kvm_is_error_hva(addr)) return -EFAULT; r = __copy_to_user((void __user *)addr + offset, data, len); if (r) return -EFAULT; - mark_page_dirty(kvm, gfn); + mark_page_dirty_in_slot(memslot, gfn); return 0; } EXPORT_SYMBOL_GPL(kvm_write_guest_page); @@ -1681,7 +1673,7 @@ int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, r = __copy_to_user((void __user *)ghc->hva, data, len); if (r) return -EFAULT; - mark_page_dirty_in_slot(kvm, ghc->memslot, ghc->gpa >> PAGE_SHIFT); + mark_page_dirty_in_slot(ghc->memslot, ghc->gpa >> PAGE_SHIFT); return 0; } @@ -1739,8 +1731,7 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len) } EXPORT_SYMBOL_GPL(kvm_clear_guest); -static void mark_page_dirty_in_slot(struct kvm *kvm, - struct kvm_memory_slot *memslot, +static void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot, gfn_t gfn) { if (memslot && memslot->dirty_bitmap) { @@ -1755,7 +1746,7 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn) struct kvm_memory_slot *memslot; memslot = gfn_to_memslot(kvm, gfn); - mark_page_dirty_in_slot(kvm, memslot, gfn); + mark_page_dirty_in_slot(memslot, gfn); } EXPORT_SYMBOL_GPL(mark_page_dirty); @@ -2882,18 +2873,12 @@ static int hardware_enable_all(void) static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, void *v) { - int cpu = (long)v; - val &= ~CPU_TASKS_FROZEN; switch (val) { case CPU_DYING: - pr_info("kvm: disabling virtualization on CPU%d\n", - cpu); hardware_disable(); break; case CPU_STARTING: - pr_info("kvm: enabling virtualization on CPU%d\n", - cpu); hardware_enable(); break; } |