diff options
Diffstat (limited to 'drivers/kvm')
-rw-r--r-- | drivers/kvm/Kconfig | 8 | ||||
-rw-r--r-- | drivers/kvm/kvm.h | 20 | ||||
-rw-r--r-- | drivers/kvm/kvm_main.c | 124 | ||||
-rw-r--r-- | drivers/kvm/mmu.c | 151 | ||||
-rw-r--r-- | drivers/kvm/paging_tmpl.h | 2 | ||||
-rw-r--r-- | drivers/kvm/x86_emulate.c | 30 |
6 files changed, 171 insertions, 164 deletions
diff --git a/drivers/kvm/Kconfig b/drivers/kvm/Kconfig index 33fa28a8c199..0a419a0de603 100644 --- a/drivers/kvm/Kconfig +++ b/drivers/kvm/Kconfig @@ -5,13 +5,19 @@ menuconfig VIRTUALIZATION bool "Virtualization" depends on X86 default y + ---help--- + Say Y here to get to see options for using your Linux host to run other + operating systems inside virtual machines (guests). + This option alone does not add any kernel code. + + If you say N, all options in this submenu will be skipped and disabled. if VIRTUALIZATION config KVM tristate "Kernel-based Virtual Machine (KVM) support" depends on X86 && EXPERIMENTAL - depends on X86_CMPXCHG64 || 64BIT + select ANON_INODES ---help--- Support hosting fully virtualized guest machines using hardware virtualization extensions. You will need a fairly recent diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index a7c5e6bee034..336be86c6f5a 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -121,7 +121,7 @@ struct kvm_pte_chain { * bits 4:7 - page table level for this shadow (1-4) * bits 8:9 - page table quadrant for 2-level guests * bit 16 - "metaphysical" - gfn is not a real page (huge page/real mode) - * bits 17:18 - "access" - the user and writable bits of a huge page pde + * bits 17:19 - "access" - the user, writable, and nx bits of a huge page pde */ union kvm_mmu_page_role { unsigned word; @@ -131,7 +131,7 @@ union kvm_mmu_page_role { unsigned quadrant : 2; unsigned pad_for_nice_hex_output : 6; unsigned metaphysical : 1; - unsigned hugepage_access : 2; + unsigned hugepage_access : 3; }; }; @@ -535,8 +535,8 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu); int kvm_mmu_setup(struct kvm_vcpu *vcpu); int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); -void kvm_mmu_slot_remove_write_access(struct kvm_vcpu *vcpu, int slot); -void kvm_mmu_zap_all(struct kvm_vcpu *vcpu); +void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); +void kvm_mmu_zap_all(struct kvm *kvm); hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa); #define HPA_MSB ((sizeof(hpa_t) * 8) - 1) @@ -569,6 +569,8 @@ void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr); void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long value, unsigned long *rflags); +int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data); +int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); struct x86_emulate_ctxt; @@ -617,7 +619,7 @@ unsigned long segment_base(u16 selector); void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *old, const u8 *new, int bytes); int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); -void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); +void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); int kvm_mmu_load(struct kvm_vcpu *vcpu); void kvm_mmu_unload(struct kvm_vcpu *vcpu); @@ -626,11 +628,15 @@ int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run); static inline int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code) { - if (unlikely(vcpu->kvm->n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES)) - kvm_mmu_free_some_pages(vcpu); return vcpu->mmu.page_fault(vcpu, gva, error_code); } +static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) +{ + if (unlikely(vcpu->kvm->n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES)) + __kvm_mmu_free_some_pages(vcpu); +} + static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu) { if (likely(vcpu->mmu.root_hpa != INVALID_PAGE)) diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 1b206f197c6b..cd0557954e50 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -238,23 +238,6 @@ static void vcpu_load(struct kvm_vcpu *vcpu) kvm_arch_ops->vcpu_load(vcpu); } -/* - * Switches to specified vcpu, until a matching vcpu_put(). Will return NULL - * if the slot is not populated. - */ -static struct kvm_vcpu *vcpu_load_slot(struct kvm *kvm, int slot) -{ - struct kvm_vcpu *vcpu = &kvm->vcpus[slot]; - - mutex_lock(&vcpu->mutex); - if (!vcpu->vmcs) { - mutex_unlock(&vcpu->mutex); - return NULL; - } - kvm_arch_ops->vcpu_load(vcpu); - return vcpu; -} - static void vcpu_put(struct kvm_vcpu *vcpu) { kvm_arch_ops->vcpu_put(vcpu); @@ -314,9 +297,6 @@ static struct kvm *kvm_create_vm(void) kvm_io_bus_init(&kvm->pio_bus); spin_lock_init(&kvm->lock); INIT_LIST_HEAD(&kvm->active_mmu_pages); - spin_lock(&kvm_lock); - list_add(&kvm->vm_list, &vm_list); - spin_unlock(&kvm_lock); kvm_io_bus_init(&kvm->mmio_bus); for (i = 0; i < KVM_MAX_VCPUS; ++i) { struct kvm_vcpu *vcpu = &kvm->vcpus[i]; @@ -326,6 +306,9 @@ static struct kvm *kvm_create_vm(void) vcpu->kvm = kvm; vcpu->mmu.root_hpa = INVALID_PAGE; } + spin_lock(&kvm_lock); + list_add(&kvm->vm_list, &vm_list); + spin_unlock(&kvm_lock); return kvm; } @@ -663,13 +646,6 @@ void fx_init(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(fx_init); -static void do_remove_write_access(struct kvm_vcpu *vcpu, int slot) -{ - spin_lock(&vcpu->kvm->lock); - kvm_mmu_slot_remove_write_access(vcpu, slot); - spin_unlock(&vcpu->kvm->lock); -} - /* * Allocate some memory and give it an address in the guest physical address * space. @@ -792,19 +768,10 @@ raced: *memslot = new; ++kvm->memory_config_version; - spin_unlock(&kvm->lock); - - for (i = 0; i < KVM_MAX_VCPUS; ++i) { - struct kvm_vcpu *vcpu; + kvm_mmu_slot_remove_write_access(kvm, mem->slot); + kvm_flush_remote_tlbs(kvm); - vcpu = vcpu_load_slot(kvm, i); - if (!vcpu) - continue; - if (new.flags & KVM_MEM_LOG_DIRTY_PAGES) - do_remove_write_access(vcpu, mem->slot); - kvm_mmu_reset_context(vcpu); - vcpu_put(vcpu); - } + spin_unlock(&kvm->lock); kvm_free_physmem_slot(&old, &new); return 0; @@ -826,7 +793,6 @@ static int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot; int r, i; int n; - int cleared; unsigned long any = 0; spin_lock(&kvm->lock); @@ -855,23 +821,11 @@ static int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) goto out; - if (any) { - cleared = 0; - for (i = 0; i < KVM_MAX_VCPUS; ++i) { - struct kvm_vcpu *vcpu; - - vcpu = vcpu_load_slot(kvm, i); - if (!vcpu) - continue; - if (!cleared) { - do_remove_write_access(vcpu, log->slot); - memset(memslot->dirty_bitmap, 0, n); - cleared = 1; - } - kvm_arch_ops->tlb_flush(vcpu); - vcpu_put(vcpu); - } - } + spin_lock(&kvm->lock); + kvm_mmu_slot_remove_write_access(kvm, log->slot); + kvm_flush_remote_tlbs(kvm); + memset(memslot->dirty_bitmap, 0, n); + spin_unlock(&kvm->lock); r = 0; @@ -920,13 +874,9 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, break; kvm->naliases = n; - spin_unlock(&kvm->lock); + kvm_mmu_zap_all(kvm); - vcpu_load(&kvm->vcpus[0]); - spin_lock(&kvm->lock); - kvm_mmu_zap_all(&kvm->vcpus[0]); spin_unlock(&kvm->lock); - vcpu_put(&kvm->vcpus[0]); return 0; @@ -1120,18 +1070,16 @@ static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, return 0; mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT); virt = kmap_atomic(page, KM_USER0); - if (memcmp(virt + offset_in_page(gpa), val, bytes)) { - kvm_mmu_pte_write(vcpu, gpa, virt + offset, val, bytes); - memcpy(virt + offset_in_page(gpa), val, bytes); - } + kvm_mmu_pte_write(vcpu, gpa, virt + offset, val, bytes); + memcpy(virt + offset_in_page(gpa), val, bytes); kunmap_atomic(virt, KM_USER0); return 1; } -static int emulator_write_emulated(unsigned long addr, - const void *val, - unsigned int bytes, - struct x86_emulate_ctxt *ctxt) +static int emulator_write_emulated_onepage(unsigned long addr, + const void *val, + unsigned int bytes, + struct x86_emulate_ctxt *ctxt) { struct kvm_vcpu *vcpu = ctxt->vcpu; struct kvm_io_device *mmio_dev; @@ -1163,6 +1111,26 @@ static int emulator_write_emulated(unsigned long addr, return X86EMUL_CONTINUE; } +static int emulator_write_emulated(unsigned long addr, + const void *val, + unsigned int bytes, + struct x86_emulate_ctxt *ctxt) +{ + /* Crossing a page boundary? */ + if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { + int rc, now; + + now = -addr & ~PAGE_MASK; + rc = emulator_write_emulated_onepage(addr, val, now, ctxt); + if (rc != X86EMUL_CONTINUE) + return rc; + addr += now; + val += now; + bytes -= now; + } + return emulator_write_emulated_onepage(addr, val, bytes, ctxt); +} + static int emulator_cmpxchg_emulated(unsigned long addr, const void *old, const void *new, @@ -1567,7 +1535,7 @@ EXPORT_SYMBOL_GPL(kvm_get_msr_common); * Returns 0 on success, non-0 otherwise. * Assumes vcpu_load() was already called. */ -static int get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) +int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) { return kvm_arch_ops->get_msr(vcpu, msr_index, pdata); } @@ -1645,7 +1613,7 @@ EXPORT_SYMBOL_GPL(kvm_set_msr_common); * Returns 0 on success, non-0 otherwise. * Assumes vcpu_load() was already called. */ -static int set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) +int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) { return kvm_arch_ops->set_msr(vcpu, msr_index, data); } @@ -2183,7 +2151,7 @@ static __init void kvm_init_msr_list(void) */ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) { - return set_msr(vcpu, index, *data); + return kvm_set_msr(vcpu, index, *data); } /* @@ -2464,9 +2432,9 @@ static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu) break; } } - if (entry && (entry->edx & EFER_NX) && !(efer & EFER_NX)) { + if (entry && (entry->edx & (1 << 20)) && !(efer & EFER_NX)) { entry->edx &= ~(1 << 20); - printk(KERN_INFO ": guest NX capability removed\n"); + printk(KERN_INFO "kvm: guest NX capability removed\n"); } } @@ -2667,7 +2635,7 @@ static long kvm_vcpu_ioctl(struct file *filp, break; } case KVM_GET_MSRS: - r = msr_io(vcpu, argp, get_msr, 1); + r = msr_io(vcpu, argp, kvm_get_msr, 1); break; case KVM_SET_MSRS: r = msr_io(vcpu, argp, do_set_msr, 0); @@ -3006,6 +2974,10 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, switch (val) { case CPU_DYING: case CPU_DYING_FROZEN: + printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", + cpu); + hardware_disable(NULL); + break; case CPU_UP_CANCELED: case CPU_UP_CANCELED_FROZEN: printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index b297a6b111ac..23965aa5ee78 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -154,7 +154,6 @@ struct kvm_rmap_desc { static struct kmem_cache *pte_chain_cache; static struct kmem_cache *rmap_desc_cache; -static struct kmem_cache *mmu_page_cache; static struct kmem_cache *mmu_page_header_cache; static int is_write_protection(struct kvm_vcpu *vcpu) @@ -225,6 +224,29 @@ static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc) kfree(mc->objects[--mc->nobjs]); } +static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache, + int min, gfp_t gfp_flags) +{ + struct page *page; + + if (cache->nobjs >= min) + return 0; + while (cache->nobjs < ARRAY_SIZE(cache->objects)) { + page = alloc_page(gfp_flags); + if (!page) + return -ENOMEM; + set_page_private(page, 0); + cache->objects[cache->nobjs++] = page_address(page); + } + return 0; +} + +static void mmu_free_memory_cache_page(struct kvm_mmu_memory_cache *mc) +{ + while (mc->nobjs) + free_page((unsigned long)mc->objects[--mc->nobjs]); +} + static int __mmu_topup_memory_caches(struct kvm_vcpu *vcpu, gfp_t gfp_flags) { int r; @@ -237,8 +259,7 @@ static int __mmu_topup_memory_caches(struct kvm_vcpu *vcpu, gfp_t gfp_flags) rmap_desc_cache, 1, gfp_flags); if (r) goto out; - r = mmu_topup_memory_cache(&vcpu->mmu_page_cache, - mmu_page_cache, 4, gfp_flags); + r = mmu_topup_memory_cache_page(&vcpu->mmu_page_cache, 4, gfp_flags); if (r) goto out; r = mmu_topup_memory_cache(&vcpu->mmu_page_header_cache, @@ -252,12 +273,14 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu) int r; r = __mmu_topup_memory_caches(vcpu, GFP_NOWAIT); + kvm_mmu_free_some_pages(vcpu); if (r < 0) { spin_unlock(&vcpu->kvm->lock); kvm_arch_ops->vcpu_put(vcpu); r = __mmu_topup_memory_caches(vcpu, GFP_KERNEL); kvm_arch_ops->vcpu_load(vcpu); spin_lock(&vcpu->kvm->lock); + kvm_mmu_free_some_pages(vcpu); } return r; } @@ -266,7 +289,7 @@ static void mmu_free_memory_caches(struct kvm_vcpu *vcpu) { mmu_free_memory_cache(&vcpu->mmu_pte_chain_cache); mmu_free_memory_cache(&vcpu->mmu_rmap_desc_cache); - mmu_free_memory_cache(&vcpu->mmu_page_cache); + mmu_free_memory_cache_page(&vcpu->mmu_page_cache); mmu_free_memory_cache(&vcpu->mmu_page_header_cache); } @@ -281,24 +304,15 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc, return p; } -static void mmu_memory_cache_free(struct kvm_mmu_memory_cache *mc, void *obj) -{ - if (mc->nobjs < KVM_NR_MEM_OBJS) - mc->objects[mc->nobjs++] = obj; - else - kfree(obj); -} - static struct kvm_pte_chain *mmu_alloc_pte_chain(struct kvm_vcpu *vcpu) { return mmu_memory_cache_alloc(&vcpu->mmu_pte_chain_cache, sizeof(struct kvm_pte_chain)); } -static void mmu_free_pte_chain(struct kvm_vcpu *vcpu, - struct kvm_pte_chain *pc) +static void mmu_free_pte_chain(struct kvm_pte_chain *pc) { - mmu_memory_cache_free(&vcpu->mmu_pte_chain_cache, pc); + kfree(pc); } static struct kvm_rmap_desc *mmu_alloc_rmap_desc(struct kvm_vcpu *vcpu) @@ -307,10 +321,9 @@ static struct kvm_rmap_desc *mmu_alloc_rmap_desc(struct kvm_vcpu *vcpu) sizeof(struct kvm_rmap_desc)); } -static void mmu_free_rmap_desc(struct kvm_vcpu *vcpu, - struct kvm_rmap_desc *rd) +static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd) { - mmu_memory_cache_free(&vcpu->mmu_rmap_desc_cache, rd); + kfree(rd); } /* @@ -355,8 +368,7 @@ static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte) } } -static void rmap_desc_remove_entry(struct kvm_vcpu *vcpu, - struct page *page, +static void rmap_desc_remove_entry(struct page *page, struct kvm_rmap_desc *desc, int i, struct kvm_rmap_desc *prev_desc) @@ -376,10 +388,10 @@ static void rmap_desc_remove_entry(struct kvm_vcpu *vcpu, prev_desc->more = desc->more; else set_page_private(page,(unsigned long)desc->more | 1); - mmu_free_rmap_desc(vcpu, desc); + mmu_free_rmap_desc(desc); } -static void rmap_remove(struct kvm_vcpu *vcpu, u64 *spte) +static void rmap_remove(u64 *spte) { struct page *page; struct kvm_rmap_desc *desc; @@ -407,7 +419,7 @@ static void rmap_remove(struct kvm_vcpu *vcpu, u64 *spte) while (desc) { for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; ++i) if (desc->shadow_ptes[i] == spte) { - rmap_desc_remove_entry(vcpu, page, + rmap_desc_remove_entry(page, desc, i, prev_desc); return; @@ -442,7 +454,7 @@ static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn) BUG_ON(!(*spte & PT_PRESENT_MASK)); BUG_ON(!(*spte & PT_WRITABLE_MASK)); rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); - rmap_remove(vcpu, spte); + rmap_remove(spte); set_shadow_pte(spte, *spte & ~PT_WRITABLE_MASK); kvm_flush_remote_tlbs(vcpu->kvm); } @@ -464,14 +476,14 @@ static int is_empty_shadow_page(u64 *spt) } #endif -static void kvm_mmu_free_page(struct kvm_vcpu *vcpu, +static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *page_head) { ASSERT(is_empty_shadow_page(page_head->spt)); list_del(&page_head->link); - mmu_memory_cache_free(&vcpu->mmu_page_cache, page_head->spt); - mmu_memory_cache_free(&vcpu->mmu_page_header_cache, page_head); - ++vcpu->kvm->n_free_mmu_pages; + __free_page(virt_to_page(page_head->spt)); + kfree(page_head); + ++kvm->n_free_mmu_pages; } static unsigned kvm_page_table_hashfn(gfn_t gfn) @@ -537,8 +549,7 @@ static void mmu_page_add_parent_pte(struct kvm_vcpu *vcpu, pte_chain->parent_ptes[0] = parent_pte; } -static void mmu_page_remove_parent_pte(struct kvm_vcpu *vcpu, - struct kvm_mmu_page *page, +static void mmu_page_remove_parent_pte(struct kvm_mmu_page *page, u64 *parent_pte) { struct kvm_pte_chain *pte_chain; @@ -565,7 +576,7 @@ static void mmu_page_remove_parent_pte(struct kvm_vcpu *vcpu, pte_chain->parent_ptes[i] = NULL; if (i == 0) { hlist_del(&pte_chain->link); - mmu_free_pte_chain(vcpu, pte_chain); + mmu_free_pte_chain(pte_chain); if (hlist_empty(&page->parent_ptes)) { page->multimapped = 0; page->parent_pte = NULL; @@ -643,7 +654,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, return page; } -static void kvm_mmu_page_unlink_children(struct kvm_vcpu *vcpu, +static void kvm_mmu_page_unlink_children(struct kvm *kvm, struct kvm_mmu_page *page) { unsigned i; @@ -655,10 +666,10 @@ static void kvm_mmu_page_unlink_children(struct kvm_vcpu *vcpu, if (page->role.level == PT_PAGE_TABLE_LEVEL) { for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { if (pt[i] & PT_PRESENT_MASK) - rmap_remove(vcpu, &pt[i]); + rmap_remove(&pt[i]); pt[i] = 0; } - kvm_flush_remote_tlbs(vcpu->kvm); + kvm_flush_remote_tlbs(kvm); return; } @@ -669,19 +680,18 @@ static void kvm_mmu_page_unlink_children(struct kvm_vcpu *vcpu, if (!(ent & PT_PRESENT_MASK)) continue; ent &= PT64_BASE_ADDR_MASK; - mmu_page_remove_parent_pte(vcpu, page_header(ent), &pt[i]); + mmu_page_remove_parent_pte(page_header(ent), &pt[i]); } - kvm_flush_remote_tlbs(vcpu->kvm); + kvm_flush_remote_tlbs(kvm); } -static void kvm_mmu_put_page(struct kvm_vcpu *vcpu, - struct kvm_mmu_page *page, +static void kvm_mmu_put_page(struct kvm_mmu_page *page, u64 *parent_pte) { - mmu_page_remove_parent_pte(vcpu, page, parent_pte); + mmu_page_remove_parent_pte(page, parent_pte); } -static void kvm_mmu_zap_page(struct kvm_vcpu *vcpu, +static void kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *page) { u64 *parent_pte; @@ -697,15 +707,15 @@ static void kvm_mmu_zap_page(struct kvm_vcpu *vcpu, parent_pte = chain->parent_ptes[0]; } BUG_ON(!parent_pte); - kvm_mmu_put_page(vcpu, page, parent_pte); + kvm_mmu_put_page(page, parent_pte); set_shadow_pte(parent_pte, 0); } - kvm_mmu_page_unlink_children(vcpu, page); + kvm_mmu_page_unlink_children(kvm, page); if (!page->root_count) { hlist_del(&page->hash_link); - kvm_mmu_free_page(vcpu, page); + kvm_mmu_free_page(kvm, page); } else - list_move(&page->link, &vcpu->kvm->active_mmu_pages); + list_move(&page->link, &kvm->active_mmu_pages); } static int kvm_mmu_unprotect_page(struct kvm_vcpu *vcpu, gfn_t gfn) @@ -724,7 +734,7 @@ static int kvm_mmu_unprotect_page(struct kvm_vcpu *vcpu, gfn_t gfn) if (page->gfn == gfn && !page->role.metaphysical) { pgprintk("%s: gfn %lx role %x\n", __FUNCTION__, gfn, page->role.word); - kvm_mmu_zap_page(vcpu, page); + kvm_mmu_zap_page(vcpu->kvm, page); r = 1; } return r; @@ -737,7 +747,7 @@ static void mmu_unshadow(struct kvm_vcpu *vcpu, gfn_t gfn) while ((page = kvm_mmu_lookup_page(vcpu, gfn)) != NULL) { pgprintk("%s: zap %lx %x\n", __FUNCTION__, gfn, page->role.word); - kvm_mmu_zap_page(vcpu, page); + kvm_mmu_zap_page(vcpu->kvm, page); } } @@ -1089,10 +1099,10 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu, pte = *spte; if (is_present_pte(pte)) { if (page->role.level == PT_PAGE_TABLE_LEVEL) - rmap_remove(vcpu, spte); + rmap_remove(spte); else { child = page_header(pte & PT64_BASE_ADDR_MASK); - mmu_page_remove_parent_pte(vcpu, child, spte); + mmu_page_remove_parent_pte(child, spte); } } *spte = 0; @@ -1161,7 +1171,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, */ pgprintk("misaligned: gpa %llx bytes %d role %x\n", gpa, bytes, page->role.word); - kvm_mmu_zap_page(vcpu, page); + kvm_mmu_zap_page(vcpu->kvm, page); continue; } page_offset = offset; @@ -1200,17 +1210,16 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) return kvm_mmu_unprotect_page(vcpu, gpa >> PAGE_SHIFT); } -void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) +void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) { while (vcpu->kvm->n_free_mmu_pages < KVM_REFILL_PAGES) { struct kvm_mmu_page *page; page = container_of(vcpu->kvm->active_mmu_pages.prev, struct kvm_mmu_page, link); - kvm_mmu_zap_page(vcpu, page); + kvm_mmu_zap_page(vcpu->kvm, page); } } -EXPORT_SYMBOL_GPL(kvm_mmu_free_some_pages); static void free_mmu_pages(struct kvm_vcpu *vcpu) { @@ -1219,7 +1228,7 @@ static void free_mmu_pages(struct kvm_vcpu *vcpu) while (!list_empty(&vcpu->kvm->active_mmu_pages)) { page = container_of(vcpu->kvm->active_mmu_pages.next, struct kvm_mmu_page, link); - kvm_mmu_zap_page(vcpu, page); + kvm_mmu_zap_page(vcpu->kvm, page); } free_page((unsigned long)vcpu->mmu.pae_root); } @@ -1277,9 +1286,8 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu) mmu_free_memory_caches(vcpu); } -void kvm_mmu_slot_remove_write_access(struct kvm_vcpu *vcpu, int slot) +void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) { - struct kvm *kvm = vcpu->kvm; struct kvm_mmu_page *page; list_for_each_entry(page, &kvm->active_mmu_pages, link) { @@ -1293,27 +1301,20 @@ void kvm_mmu_slot_remove_write_access(struct kvm_vcpu *vcpu, int slot) for (i = 0; i < PT64_ENT_PER_PAGE; ++i) /* avoid RMW */ if (pt[i] & PT_WRITABLE_MASK) { - rmap_remove(vcpu, &pt[i]); + rmap_remove(&pt[i]); pt[i] &= ~PT_WRITABLE_MASK; } } } -void kvm_mmu_zap_all(struct kvm_vcpu *vcpu) +void kvm_mmu_zap_all(struct kvm *kvm) { - destroy_kvm_mmu(vcpu); - - while (!list_empty(&vcpu->kvm->active_mmu_pages)) { - struct kvm_mmu_page *page; + struct kvm_mmu_page *page, *node; - page = container_of(vcpu->kvm->active_mmu_pages.next, - struct kvm_mmu_page, link); - kvm_mmu_zap_page(vcpu, page); - } + list_for_each_entry_safe(page, node, &kvm->active_mmu_pages, link) + kvm_mmu_zap_page(kvm, page); - mmu_free_memory_caches(vcpu); - kvm_flush_remote_tlbs(vcpu->kvm); - init_kvm_mmu(vcpu); + kvm_flush_remote_tlbs(kvm); } void kvm_mmu_module_exit(void) @@ -1322,8 +1323,6 @@ void kvm_mmu_module_exit(void) kmem_cache_destroy(pte_chain_cache); if (rmap_desc_cache) kmem_cache_destroy(rmap_desc_cache); - if (mmu_page_cache) - kmem_cache_destroy(mmu_page_cache); if (mmu_page_header_cache) kmem_cache_destroy(mmu_page_header_cache); } @@ -1332,24 +1331,18 @@ int kvm_mmu_module_init(void) { pte_chain_cache = kmem_cache_create("kvm_pte_chain", sizeof(struct kvm_pte_chain), - 0, 0, NULL, NULL); + 0, 0, NULL); if (!pte_chain_cache) goto nomem; rmap_desc_cache = kmem_cache_create("kvm_rmap_desc", sizeof(struct kvm_rmap_desc), - 0, 0, NULL, NULL); + 0, 0, NULL); if (!rmap_desc_cache) goto nomem; - mmu_page_cache = kmem_cache_create("kvm_mmu_page", - PAGE_SIZE, - PAGE_SIZE, 0, NULL, NULL); - if (!mmu_page_cache) - goto nomem; - mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header", sizeof(struct kvm_mmu_page), - 0, 0, NULL, NULL); + 0, 0, NULL); if (!mmu_page_header_cache) goto nomem; diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h index a7c5cb0319ea..4b5391c717f8 100644 --- a/drivers/kvm/paging_tmpl.h +++ b/drivers/kvm/paging_tmpl.h @@ -366,6 +366,8 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, metaphysical = 1; hugepage_access = *guest_ent; hugepage_access &= PT_USER_MASK | PT_WRITABLE_MASK; + if (*guest_ent & PT64_NX_MASK) + hugepage_access |= (1 << 2); hugepage_access >>= PT_WRITABLE_SHIFT; table_gfn = (*guest_ent & PT_BASE_ADDR_MASK) >> PAGE_SHIFT; diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c index f60012d62610..4b8a0cc9665e 100644 --- a/drivers/kvm/x86_emulate.c +++ b/drivers/kvm/x86_emulate.c @@ -163,7 +163,7 @@ static u16 twobyte_table[256] = { ModRM | ImplicitOps, ModRM, ModRM | ImplicitOps, ModRM, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30 - 0x3F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ImplicitOps, 0, ImplicitOps, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40 - 0x47 */ DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, @@ -486,6 +486,7 @@ x86_emulate_memop(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) unsigned long modrm_ea; int use_modrm_ea, index_reg = 0, base_reg = 0, scale, rip_relative = 0; int no_wb = 0; + u64 msr_data; /* Shadow copy of register state. Committed on successful emulation. */ unsigned long _regs[NR_VCPU_REGS]; @@ -1177,6 +1178,8 @@ pop_instruction: twobyte_insn: switch (b) { case 0x01: /* lgdt, lidt, lmsw */ + /* Disable writeback. */ + no_wb = 1; switch (modrm_reg) { u16 size; unsigned long address; @@ -1214,11 +1217,13 @@ twobyte_insn: } break; case 0x21: /* mov from dr to reg */ + no_wb = 1; if (modrm_mod != 3) goto cannot_emulate; rc = emulator_get_dr(ctxt, modrm_reg, &_regs[modrm_rm]); break; case 0x23: /* mov from reg to dr */ + no_wb = 1; if (modrm_mod != 3) goto cannot_emulate; rc = emulator_set_dr(ctxt, modrm_reg, _regs[modrm_rm]); @@ -1344,6 +1349,29 @@ twobyte_special_insn: goto cannot_emulate; realmode_set_cr(ctxt->vcpu, modrm_reg, modrm_val, &_eflags); break; + case 0x30: + /* wrmsr */ + msr_data = (u32)_regs[VCPU_REGS_RAX] + | ((u64)_regs[VCPU_REGS_RDX] << 32); + rc = kvm_set_msr(ctxt->vcpu, _regs[VCPU_REGS_RCX], msr_data); + if (rc) { + kvm_arch_ops->inject_gp(ctxt->vcpu, 0); + _eip = ctxt->vcpu->rip; + } + rc = X86EMUL_CONTINUE; + break; + case 0x32: + /* rdmsr */ + rc = kvm_get_msr(ctxt->vcpu, _regs[VCPU_REGS_RCX], &msr_data); + if (rc) { + kvm_arch_ops->inject_gp(ctxt->vcpu, 0); + _eip = ctxt->vcpu->rip; + } else { + _regs[VCPU_REGS_RAX] = (u32)msr_data; + _regs[VCPU_REGS_RDX] = msr_data >> 32; + } + rc = X86EMUL_CONTINUE; + break; case 0xc7: /* Grp9 (cmpxchg8b) */ { u64 old, new; |