From 2ed152afc7ed61830b848b32936e1541a1a57799 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Wed, 10 Mar 2010 19:00:43 +0800 Subject: KVM: cleanup kvm trace This patch does: - no need call tracepoint_synchronize_unregister() when kvm module is unloaded since ftrace can handle it - cleanup ftrace's macro Signed-off-by: Xiao Guangrong Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kvm/mmu.c') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 19a8906bcaa2..3af2dfd8778e 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -148,7 +148,6 @@ module_param(oos_shadow, bool, 0644); #include -#undef TRACE_INCLUDE_FILE #define CREATE_TRACE_POINTS #include "mmutrace.h" -- cgit v1.2.3 From d4f64b6cad0fc0fb4cec868c6ca6b1325949d08b Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Wed, 10 Mar 2010 23:31:22 +0900 Subject: KVM: remove redundant initialization of page->private The prep_new_page() in page allocator calls set_page_private(page, 0). So we don't need to reinitialize private of page. Signed-off-by: Minchan Kim Cc: Avi Kivity Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kvm/mmu.c') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 3af2dfd8778e..4455ddbe36f4 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -326,7 +326,6 @@ static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache, page = alloc_page(GFP_KERNEL); if (!page) return -ENOMEM; - set_page_private(page, 0); cache->objects[cache->nobjs++] = page_address(page); } return 0; -- cgit v1.2.3 From 72016f3a4221799a0b1fdf443ef6e29db572a9bb Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 15 Mar 2010 13:59:53 +0200 Subject: KVM: MMU: Consolidate two guest pte reads in kvm_mmu_pte_write() kvm_mmu_pte_write() reads guest ptes in two different occasions, both to allow a 32-bit pae guest to update a pte with 4-byte writes. Consolidate these into a single read, which also allows us to consolidate another read from an invlpg speculating a gpte into the shadow page table. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/mmu.c | 69 ++++++++++++++++++++++++------------------------------ 1 file changed, 31 insertions(+), 38 deletions(-) (limited to 'arch/x86/kvm/mmu.c') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 4455ddbe36f4..91f8b171c825 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2560,36 +2560,11 @@ static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu) } static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, - const u8 *new, int bytes) + u64 gpte) { gfn_t gfn; - int r; - u64 gpte = 0; pfn_t pfn; - if (bytes != 4 && bytes != 8) - return; - - /* - * Assume that the pte write on a page table of the same type - * as the current vcpu paging mode. This is nearly always true - * (might be false while changing modes). Note it is verified later - * by update_pte(). - */ - if (is_pae(vcpu)) { - /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ - if ((bytes == 4) && (gpa % 4 == 0)) { - r = kvm_read_guest(vcpu->kvm, gpa & ~(u64)7, &gpte, 8); - if (r) - return; - memcpy((void *)&gpte + (gpa % 8), new, 4); - } else if ((bytes == 8) && (gpa % 8 == 0)) { - memcpy((void *)&gpte, new, 8); - } - } else { - if ((bytes == 4) && (gpa % 4 == 0)) - memcpy((void *)&gpte, new, 4); - } if (!is_present_gpte(gpte)) return; gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; @@ -2640,7 +2615,34 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, int r; pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); - mmu_guess_page_from_pte_write(vcpu, gpa, new, bytes); + + switch (bytes) { + case 4: + gentry = *(const u32 *)new; + break; + case 8: + gentry = *(const u64 *)new; + break; + default: + gentry = 0; + break; + } + + /* + * Assume that the pte write on a page table of the same type + * as the current vcpu paging mode. This is nearly always true + * (might be false while changing modes). Note it is verified later + * by update_pte(). + */ + if (is_pae(vcpu) && bytes == 4) { + /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ + gpa &= ~(gpa_t)7; + r = kvm_read_guest(vcpu->kvm, gpa, &gentry, 8); + if (r) + gentry = 0; + } + + mmu_guess_page_from_pte_write(vcpu, gpa, gentry); spin_lock(&vcpu->kvm->mmu_lock); kvm_mmu_access_page(vcpu, gfn); kvm_mmu_free_some_pages(vcpu); @@ -2705,20 +2707,11 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, continue; } spte = &sp->spt[page_offset / sizeof(*spte)]; - if ((gpa & (pte_size - 1)) || (bytes < pte_size)) { - gentry = 0; - r = kvm_read_guest_atomic(vcpu->kvm, - gpa & ~(u64)(pte_size - 1), - &gentry, pte_size); - new = (const void *)&gentry; - if (r < 0) - new = NULL; - } while (npte--) { entry = *spte; mmu_pte_write_zap_pte(vcpu, sp, spte); - if (new) - mmu_pte_write_new_pte(vcpu, sp, spte, new); + if (gentry) + mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); mmu_pte_write_flush_tlb(vcpu, entry, *spte); ++spte; } -- cgit v1.2.3 From 08e850c6536db302050c0287649e68e3bbdfe2c7 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 15 Mar 2010 13:59:57 +0200 Subject: KVM: MMU: Reinstate pte prefetch on invlpg Commit fb341f57 removed the pte prefetch on guest invlpg, citing guest races. However, the SDM is adamant that prefetch is allowed: "The processor may create entries in paging-structure caches for translations required for prefetches and for accesses that are a result of speculative execution that would never actually occur in the executed code path." And, in fact, there was a race in the prefetch code: we picked up the pte without the mmu lock held, so an older invlpg could install the pte over a newer invlpg. Reinstate the prefetch logic, but this time note whether another invlpg has executed using a counter. If a race occured, do not install the pte. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/mmu.c | 37 +++++++++++++++++++++++-------------- arch/x86/kvm/paging_tmpl.h | 15 +++++++++++++++ 3 files changed, 39 insertions(+), 14 deletions(-) (limited to 'arch/x86/kvm/mmu.c') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ea1b6c615f9f..28826c82d1e2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -389,6 +389,7 @@ struct kvm_arch { unsigned int n_free_mmu_pages; unsigned int n_requested_mmu_pages; unsigned int n_alloc_mmu_pages; + atomic_t invlpg_counter; struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; /* * Hash table of struct kvm_mmu_page. diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 91f8b171c825..064c3efb49dc 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2613,20 +2613,11 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, int flooded = 0; int npte; int r; + int invlpg_counter; pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); - switch (bytes) { - case 4: - gentry = *(const u32 *)new; - break; - case 8: - gentry = *(const u64 *)new; - break; - default: - gentry = 0; - break; - } + invlpg_counter = atomic_read(&vcpu->kvm->arch.invlpg_counter); /* * Assume that the pte write on a page table of the same type @@ -2634,16 +2625,34 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, * (might be false while changing modes). Note it is verified later * by update_pte(). */ - if (is_pae(vcpu) && bytes == 4) { + if ((is_pae(vcpu) && bytes == 4) || !new) { /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ - gpa &= ~(gpa_t)7; - r = kvm_read_guest(vcpu->kvm, gpa, &gentry, 8); + if (is_pae(vcpu)) { + gpa &= ~(gpa_t)7; + bytes = 8; + } + r = kvm_read_guest(vcpu->kvm, gpa, &gentry, min(bytes, 8)); if (r) gentry = 0; + new = (const u8 *)&gentry; + } + + switch (bytes) { + case 4: + gentry = *(const u32 *)new; + break; + case 8: + gentry = *(const u64 *)new; + break; + default: + gentry = 0; + break; } mmu_guess_page_from_pte_write(vcpu, gpa, gentry); spin_lock(&vcpu->kvm->mmu_lock); + if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter) + gentry = 0; kvm_mmu_access_page(vcpu, gfn); kvm_mmu_free_some_pages(vcpu); ++vcpu->kvm->stat.mmu_pte_write; diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 4b37e1acd375..067797a72768 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -463,6 +463,7 @@ out_unlock: static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) { struct kvm_shadow_walk_iterator iterator; + gpa_t pte_gpa = -1; int level; u64 *sptep; int need_flush = 0; @@ -476,6 +477,10 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) if (level == PT_PAGE_TABLE_LEVEL || ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) || ((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) { + struct kvm_mmu_page *sp = page_header(__pa(sptep)); + + pte_gpa = (sp->gfn << PAGE_SHIFT); + pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); if (is_shadow_present_pte(*sptep)) { rmap_remove(vcpu->kvm, sptep); @@ -493,7 +498,17 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) if (need_flush) kvm_flush_remote_tlbs(vcpu->kvm); + + atomic_inc(&vcpu->kvm->arch.invlpg_counter); + spin_unlock(&vcpu->kvm->mmu_lock); + + if (pte_gpa == -1) + return; + + if (mmu_topup_memory_caches(vcpu)) + return; + kvm_mmu_pte_write(vcpu, pte_gpa, NULL, sizeof(pt_element_t), 0); } static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, -- cgit v1.2.3 From f815bce8940bc28e42e6b924825bb31df2a4bff5 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Fri, 19 Mar 2010 17:58:53 +0800 Subject: KVM: MMU: check reserved bits only if CR4.PSE=1 or CR4.PAE=1 - Check reserved bits only if CR4.PAE=1 or CR4.PSE=1 when guest #PF occurs - Fix a typo in reset_rsvds_bits_mask() Signed-off-by: Xiao Guangrong Reviewed-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'arch/x86/kvm/mmu.c') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 064c3efb49dc..83d2ebce9ea9 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2297,13 +2297,19 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level) /* no rsvd bits for 2 level 4K page table entries */ context->rsvd_bits_mask[0][1] = 0; context->rsvd_bits_mask[0][0] = 0; + context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; + + if (!is_pse(vcpu)) { + context->rsvd_bits_mask[1][1] = 0; + break; + } + if (is_cpuid_PSE36()) /* 36bits PSE 4MB page */ context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21); else /* 32 bits PSE 4MB page */ context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21); - context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0]; break; case PT32E_ROOT_LEVEL: context->rsvd_bits_mask[0][2] = @@ -2316,7 +2322,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level) context->rsvd_bits_mask[1][1] = exb_bit_rsvd | rsvd_bits(maxphyaddr, 62) | rsvd_bits(13, 20); /* large page */ - context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0]; + context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; break; case PT64_ROOT_LEVEL: context->rsvd_bits_mask[0][3] = exb_bit_rsvd | @@ -2334,7 +2340,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level) context->rsvd_bits_mask[1][1] = exb_bit_rsvd | rsvd_bits(maxphyaddr, 51) | rsvd_bits(13, 20); /* large page */ - context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0]; + context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; break; } } -- cgit v1.2.3 From 84b0c8c6a6f87b62bca93727dee12ec59e32e597 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 14 Mar 2010 10:16:40 +0200 Subject: KVM: MMU: Disassociate direct maps from guest levels Direct maps are linear translations for a section of memory, used for real mode or with large pages. As such, they are independent of the guest levels. Teach the mmu about this by making page->role.glevels = 0 for direct maps. This allows direct maps to be shared among real mode and the various paging modes. Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kvm/mmu.c') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 83d2ebce9ea9..1cc60d3f445b 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1329,6 +1329,8 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, role = vcpu->arch.mmu.base_role; role.level = level; role.direct = direct; + if (role.direct) + role.glevels = 0; role.access = access; if (vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) { quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); -- cgit v1.2.3 From 805d32dea4dfb8319aaf7e73a681ad410a5e331a Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Thu, 1 Apr 2010 16:50:45 +0800 Subject: KVM: MMU: cleanup/fix mmu audit code This patch does: - 'sp' parameter in inspect_spte_fn() is not used, so remove it - fix 'kvm' and 'slots' is not defined in count_rmaps() - fix a bug in inspect_spte_has_rmap() Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/mmu.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'arch/x86/kvm/mmu.c') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 1cc60d3f445b..6ed7d633aefe 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3182,8 +3182,7 @@ static gva_t canonicalize(gva_t gva) } -typedef void (*inspect_spte_fn) (struct kvm *kvm, struct kvm_mmu_page *sp, - u64 *sptep); +typedef void (*inspect_spte_fn) (struct kvm *kvm, u64 *sptep); static void __mmu_spte_walk(struct kvm *kvm, struct kvm_mmu_page *sp, inspect_spte_fn fn) @@ -3199,7 +3198,7 @@ static void __mmu_spte_walk(struct kvm *kvm, struct kvm_mmu_page *sp, child = page_header(ent & PT64_BASE_ADDR_MASK); __mmu_spte_walk(kvm, child, fn); } else - fn(kvm, sp, &sp->spt[i]); + fn(kvm, &sp->spt[i]); } } } @@ -3290,6 +3289,8 @@ static void audit_mappings(struct kvm_vcpu *vcpu) static int count_rmaps(struct kvm_vcpu *vcpu) { + struct kvm *kvm = vcpu->kvm; + struct kvm_memslots *slots; int nmaps = 0; int i, j, k, idx; @@ -3323,7 +3324,7 @@ static int count_rmaps(struct kvm_vcpu *vcpu) return nmaps; } -void inspect_spte_has_rmap(struct kvm *kvm, struct kvm_mmu_page *sp, u64 *sptep) +void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) { unsigned long *rmapp; struct kvm_mmu_page *rev_sp; @@ -3339,14 +3340,14 @@ void inspect_spte_has_rmap(struct kvm *kvm, struct kvm_mmu_page *sp, u64 *sptep) printk(KERN_ERR "%s: no memslot for gfn %ld\n", audit_msg, gfn); printk(KERN_ERR "%s: index %ld of sp (gfn=%lx)\n", - audit_msg, sptep - rev_sp->spt, + audit_msg, (long int)(sptep - rev_sp->spt), rev_sp->gfn); dump_stack(); return; } rmapp = gfn_to_rmap(kvm, rev_sp->gfns[sptep - rev_sp->spt], - is_large_pte(*sptep)); + rev_sp->role.level); if (!*rmapp) { if (!printk_ratelimit()) return; @@ -3381,7 +3382,7 @@ static void check_writable_mappings_rmap(struct kvm_vcpu *vcpu) continue; if (!(ent & PT_WRITABLE_MASK)) continue; - inspect_spte_has_rmap(vcpu->kvm, sp, &pt[i]); + inspect_spte_has_rmap(vcpu->kvm, &pt[i]); } } return; -- cgit v1.2.3 From f84cbb0561d7cefb5fc050ee99d8c82ec9ce883d Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 6 Apr 2010 18:29:05 +0800 Subject: KVM: MMU: remove unused field kvm_mmu_page.oos_link is not used, so remove it Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 2 -- arch/x86/kvm/mmu.c | 1 - 2 files changed, 3 deletions(-) (limited to 'arch/x86/kvm/mmu.c') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 26c629a062db..0c49c888be6b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -187,8 +187,6 @@ struct kvm_mmu_page { struct list_head link; struct hlist_node hash_link; - struct list_head oos_link; - /* * The following two entries are used to key the shadow page in the * hash table. diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 6ed7d633aefe..ec8900b6692a 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -923,7 +923,6 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); set_page_private(virt_to_page(sp->spt), (unsigned long)sp); list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); - INIT_LIST_HEAD(&sp->oos_link); bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); sp->multimapped = 0; sp->parent_pte = parent_pte; -- cgit v1.2.3 From 5b7e0102ae744e9175b905f4267a81393bdb7a75 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 14 Apr 2010 19:20:03 +0300 Subject: KVM: MMU: Replace role.glevels with role.cr4_pae There is no real distinction between glevels=3 and glevels=4; both have exactly the same format and the code is treated exactly the same way. Drop role.glevels and replace is with role.cr4_pae (which is meaningful). This simplifies the code a bit. As a side effect, it allows sharing shadow page tables between pae and longmode guest page tables at the same guest page. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/mmu.c | 12 ++++++------ arch/x86/kvm/mmutrace.h | 5 +++-- 3 files changed, 10 insertions(+), 9 deletions(-) (limited to 'arch/x86/kvm/mmu.c') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3602728d54de..707d272ae4a1 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -171,8 +171,8 @@ struct kvm_pte_chain { union kvm_mmu_page_role { unsigned word; struct { - unsigned glevels:4; unsigned level:4; + unsigned cr4_pae:1; unsigned quadrant:2; unsigned pad_for_nice_hex_output:6; unsigned direct:1; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index ec8900b6692a..51aa580d0aa5 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1206,7 +1206,7 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp); static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) { - if (sp->role.glevels != vcpu->arch.mmu.root_level) { + if (sp->role.cr4_pae != !!is_pae(vcpu)) { kvm_mmu_zap_page(vcpu->kvm, sp); return 1; } @@ -1329,7 +1329,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, role.level = level; role.direct = direct; if (role.direct) - role.glevels = 0; + role.cr4_pae = 0; role.access = access; if (vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) { quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); @@ -2443,7 +2443,7 @@ static int init_kvm_softmmu(struct kvm_vcpu *vcpu) else r = paging32_init_context(vcpu); - vcpu->arch.mmu.base_role.glevels = vcpu->arch.mmu.root_level; + vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu); return r; } @@ -2532,7 +2532,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, } ++vcpu->kvm->stat.mmu_pte_updated; - if (sp->role.glevels == PT32_ROOT_LEVEL) + if (!sp->role.cr4_pae) paging32_update_pte(vcpu, sp, spte, new); else paging64_update_pte(vcpu, sp, spte, new); @@ -2681,7 +2681,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) { if (sp->gfn != gfn || sp->role.direct || sp->role.invalid) continue; - pte_size = sp->role.glevels == PT32_ROOT_LEVEL ? 4 : 8; + pte_size = sp->role.cr4_pae ? 8 : 4; misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); misaligned |= bytes < 4; if (misaligned || flooded) { @@ -2705,7 +2705,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, page_offset = offset; level = sp->role.level; npte = 1; - if (sp->role.glevels == PT32_ROOT_LEVEL) { + if (!sp->role.cr4_pae) { page_offset <<= 1; /* 32->64 */ /* * A 32-bit pde maps 4MB while the shadow pdes map diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index 1fe956ab7617..3851f1f3030c 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h @@ -28,9 +28,10 @@ \ role.word = __entry->role; \ \ - trace_seq_printf(p, "sp gfn %llx %u/%u q%u%s %s%s %spge" \ + trace_seq_printf(p, "sp gfn %llx %u%s q%u%s %s%s %spge" \ " %snxe root %u %s%c", \ - __entry->gfn, role.level, role.glevels, \ + __entry->gfn, role.level, \ + role.cr4_pae ? " pae" : "", \ role.quadrant, \ role.direct ? " direct" : "", \ access_str[role.access], \ -- cgit v1.2.3 From 1b8c7934a4063653095fb9fa88f9169f5b52f52b Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Fri, 16 Apr 2010 21:23:41 +0800 Subject: KVM: MMU: remove unused struct kvm_unsync_walk Remove 'struct kvm_unsync_walk' since it's not used. Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/mmu.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch/x86/kvm/mmu.c') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 51aa580d0aa5..b57be03d442c 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -173,11 +173,6 @@ struct kvm_shadow_walk_iterator { shadow_walk_okay(&(_walker)); \ shadow_walk_next(&(_walker))) - -struct kvm_unsync_walk { - int (*entry) (struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk); -}; - typedef int (*mmu_parent_walk_fn) (struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp); static struct kmem_cache *pte_chain_cache; -- cgit v1.2.3 From 6b18493d60e7f54a0feb771fa141b5fcb72ce1e4 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Fri, 16 Apr 2010 21:29:17 +0800 Subject: KVM: MMU: remove unused parameter in mmu_parent_walk() 'vcpu' is unused, remove it Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/mmu.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) (limited to 'arch/x86/kvm/mmu.c') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index b57be03d442c..45f3aa5213c5 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -173,7 +173,7 @@ struct kvm_shadow_walk_iterator { shadow_walk_okay(&(_walker)); \ shadow_walk_next(&(_walker))) -typedef int (*mmu_parent_walk_fn) (struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp); +typedef int (*mmu_parent_walk_fn) (struct kvm_mmu_page *sp); static struct kmem_cache *pte_chain_cache; static struct kmem_cache *rmap_desc_cache; @@ -1001,8 +1001,7 @@ static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp, } -static void mmu_parent_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, - mmu_parent_walk_fn fn) +static void mmu_parent_walk(struct kvm_mmu_page *sp, mmu_parent_walk_fn fn) { struct kvm_pte_chain *pte_chain; struct hlist_node *node; @@ -1011,8 +1010,8 @@ static void mmu_parent_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, if (!sp->multimapped && sp->parent_pte) { parent_sp = page_header(__pa(sp->parent_pte)); - fn(vcpu, parent_sp); - mmu_parent_walk(vcpu, parent_sp, fn); + fn(parent_sp); + mmu_parent_walk(parent_sp, fn); return; } hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) @@ -1020,8 +1019,8 @@ static void mmu_parent_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, if (!pte_chain->parent_ptes[i]) break; parent_sp = page_header(__pa(pte_chain->parent_ptes[i])); - fn(vcpu, parent_sp); - mmu_parent_walk(vcpu, parent_sp, fn); + fn(parent_sp); + mmu_parent_walk(parent_sp, fn); } } @@ -1058,16 +1057,15 @@ static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp) } } -static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) +static int unsync_walk_fn(struct kvm_mmu_page *sp) { kvm_mmu_update_parents_unsync(sp); return 1; } -static void kvm_mmu_mark_parents_unsync(struct kvm_vcpu *vcpu, - struct kvm_mmu_page *sp) +static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp) { - mmu_parent_walk(vcpu, sp, unsync_walk_fn); + mmu_parent_walk(sp, unsync_walk_fn); kvm_mmu_update_parents_unsync(sp); } @@ -1345,7 +1343,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, mmu_page_add_parent_pte(vcpu, sp, parent_pte); if (sp->unsync_children) { set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests); - kvm_mmu_mark_parents_unsync(vcpu, sp); + kvm_mmu_mark_parents_unsync(sp); } trace_kvm_mmu_get_page(sp, false); return sp; @@ -1759,7 +1757,7 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) ++vcpu->kvm->stat.mmu_unsync; sp->unsync = 1; - kvm_mmu_mark_parents_unsync(vcpu, sp); + kvm_mmu_mark_parents_unsync(sp); mmu_convert_notrap(sp); return 0; -- cgit v1.2.3 From 3246af0ece6c61689847417977733f0b12dc4b6f Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Fri, 16 Apr 2010 16:35:54 +0800 Subject: KVM: MMU: cleanup for hlist walk restart Quote from Avi: |Just change the assignment to a 'goto restart;' please, |I don't like playing with list_for_each internals. Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/mmu.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'arch/x86/kvm/mmu.c') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 45f3aa5213c5..7a17db1cdcd6 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1565,13 +1565,14 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) r = 0; index = kvm_page_table_hashfn(gfn); bucket = &kvm->arch.mmu_page_hash[index]; +restart: hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) if (sp->gfn == gfn && !sp->role.direct) { pgprintk("%s: gfn %lx role %x\n", __func__, gfn, sp->role.word); r = 1; if (kvm_mmu_zap_page(kvm, sp)) - n = bucket->first; + goto restart; } return r; } @@ -1585,13 +1586,14 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn) index = kvm_page_table_hashfn(gfn); bucket = &kvm->arch.mmu_page_hash[index]; +restart: hlist_for_each_entry_safe(sp, node, nn, bucket, hash_link) { if (sp->gfn == gfn && !sp->role.direct && !sp->role.invalid) { pgprintk("%s: zap %lx %x\n", __func__, gfn, sp->role.word); if (kvm_mmu_zap_page(kvm, sp)) - nn = bucket->first; + goto restart; } } } @@ -2671,6 +2673,8 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, } index = kvm_page_table_hashfn(gfn); bucket = &vcpu->kvm->arch.mmu_page_hash[index]; + +restart: hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) { if (sp->gfn != gfn || sp->role.direct || sp->role.invalid) continue; @@ -2691,7 +2695,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, pgprintk("misaligned: gpa %llx bytes %d role %x\n", gpa, bytes, sp->role.word); if (kvm_mmu_zap_page(vcpu->kvm, sp)) - n = bucket->first; + goto restart; ++vcpu->kvm->stat.mmu_flooded; continue; } @@ -2900,10 +2904,11 @@ void kvm_mmu_zap_all(struct kvm *kvm) struct kvm_mmu_page *sp, *node; spin_lock(&kvm->mmu_lock); +restart: list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) if (kvm_mmu_zap_page(kvm, sp)) - node = container_of(kvm->arch.active_mmu_pages.next, - struct kvm_mmu_page, link); + goto restart; + spin_unlock(&kvm->mmu_lock); kvm_flush_remote_tlbs(kvm); -- cgit v1.2.3 From 90d83dc3d49f5101addae962ccc1b4aff66b68d8 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Mon, 19 Apr 2010 17:41:23 +0800 Subject: KVM: use the correct RCU API for PROVE_RCU=y The RCU/SRCU API have already changed for proving RCU usage. I got the following dmesg when PROVE_RCU=y because we used incorrect API. This patch coverts rcu_deference() to srcu_dereference() or family API. =================================================== [ INFO: suspicious rcu_dereference_check() usage. ] --------------------------------------------------- arch/x86/kvm/mmu.c:3020 invoked rcu_dereference_check() without protection! other info that might help us debug this: rcu_scheduler_active = 1, debug_locks = 0 2 locks held by qemu-system-x86/8550: #0: (&kvm->slots_lock){+.+.+.}, at: [] kvm_set_memory_region+0x29/0x50 [kvm] #1: (&(&kvm->mmu_lock)->rlock){+.+...}, at: [] kvm_arch_commit_memory_region+0xa6/0xe2 [kvm] stack backtrace: Pid: 8550, comm: qemu-system-x86 Not tainted 2.6.34-rc4-tip-01028-g939eab1 #27 Call Trace: [] lockdep_rcu_dereference+0xaa/0xb3 [] kvm_mmu_calculate_mmu_pages+0x44/0x7d [kvm] [] kvm_arch_commit_memory_region+0xb7/0xe2 [kvm] [] __kvm_set_memory_region+0x636/0x6e2 [kvm] [] kvm_set_memory_region+0x37/0x50 [kvm] [] vmx_set_tss_addr+0x46/0x5a [kvm_intel] [] kvm_arch_vm_ioctl+0x17a/0xcf8 [kvm] [] ? unlock_page+0x27/0x2c [] ? __do_fault+0x3a9/0x3e1 [] kvm_vm_ioctl+0x364/0x38d [kvm] [] ? up_read+0x23/0x3d [] vfs_ioctl+0x32/0xa6 [] do_vfs_ioctl+0x495/0x4db [] ? fget_light+0xc2/0x241 [] ? do_sys_open+0x104/0x116 [] ? retint_swapgs+0xe/0x13 [] sys_ioctl+0x47/0x6a [] system_call_fastpath+0x16/0x1b Signed-off-by: Lai Jiangshan Signed-off-by: Avi Kivity --- arch/ia64/kvm/kvm-ia64.c | 2 +- arch/s390/kvm/kvm-s390.h | 2 +- arch/x86/kvm/mmu.c | 7 ++++--- arch/x86/kvm/vmx.c | 2 +- arch/x86/kvm/x86.c | 4 ++-- arch/x86/kvm/x86.h | 7 +++++++ include/linux/kvm_host.h | 7 +++++++ virt/kvm/iommu.c | 4 ++-- virt/kvm/kvm_main.c | 13 ++++++++----- 9 files changed, 33 insertions(+), 15 deletions(-) (limited to 'arch/x86/kvm/mmu.c') diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index d7bac1f75af0..d5f4e9161201 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1381,7 +1381,7 @@ static void kvm_release_vm_pages(struct kvm *kvm) int i, j; unsigned long base_gfn; - slots = rcu_dereference(kvm->memslots); + slots = kvm_memslots(kvm); for (i = 0; i < slots->nmemslots; i++) { memslot = &slots->memslots[i]; base_gfn = memslot->base_gfn; diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 60f09ab3672c..cfa9d1777457 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -72,7 +72,7 @@ static inline void kvm_s390_vcpu_set_mem(struct kvm_vcpu *vcpu) struct kvm_memslots *memslots; idx = srcu_read_lock(&vcpu->kvm->srcu); - memslots = rcu_dereference(vcpu->kvm->memslots); + memslots = kvm_memslots(vcpu->kvm); mem = &memslots->memslots[0]; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 7a17db1cdcd6..0682a393ad90 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -787,7 +787,7 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, int retval = 0; struct kvm_memslots *slots; - slots = rcu_dereference(kvm->memslots); + slots = kvm_memslots(kvm); for (i = 0; i < slots->nmemslots; i++) { struct kvm_memory_slot *memslot = &slots->memslots[i]; @@ -3016,7 +3016,8 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm) unsigned int nr_pages = 0; struct kvm_memslots *slots; - slots = rcu_dereference(kvm->memslots); + slots = kvm_memslots(kvm); + for (i = 0; i < slots->nmemslots; i++) nr_pages += slots->memslots[i].npages; @@ -3292,7 +3293,7 @@ static int count_rmaps(struct kvm_vcpu *vcpu) int i, j, k, idx; idx = srcu_read_lock(&kvm->srcu); - slots = rcu_dereference(kvm->memslots); + slots = kvm_memslots(kvm); for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { struct kvm_memory_slot *m = &slots->memslots[i]; struct kvm_rmap_desc *d; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 0b896ac7e4bb..d0a10b5612e9 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1558,7 +1558,7 @@ static gva_t rmode_tss_base(struct kvm *kvm) struct kvm_memslots *slots; gfn_t base_gfn; - slots = rcu_dereference(kvm->memslots); + slots = kvm_memslots(kvm); base_gfn = kvm->memslots->memslots[0].base_gfn + kvm->memslots->memslots[0].npages - 3; return base_gfn << PAGE_SHIFT; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 58a96e6a234c..638248c96999 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2497,7 +2497,7 @@ gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn) struct kvm_mem_alias *alias; struct kvm_mem_aliases *aliases; - aliases = rcu_dereference(kvm->arch.aliases); + aliases = kvm_aliases(kvm); for (i = 0; i < aliases->naliases; ++i) { alias = &aliases->aliases[i]; @@ -2516,7 +2516,7 @@ gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) struct kvm_mem_alias *alias; struct kvm_mem_aliases *aliases; - aliases = rcu_dereference(kvm->arch.aliases); + aliases = kvm_aliases(kvm); for (i = 0; i < aliases->naliases; ++i) { alias = &aliases->aliases[i]; diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index b7a404722d2b..f4b54458285b 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -65,6 +65,13 @@ static inline int is_paging(struct kvm_vcpu *vcpu) return kvm_read_cr0_bits(vcpu, X86_CR0_PG); } +static inline struct kvm_mem_aliases *kvm_aliases(struct kvm *kvm) +{ + return rcu_dereference_check(kvm->arch.aliases, + srcu_read_lock_held(&kvm->srcu) + || lockdep_is_held(&kvm->slots_lock)); +} + void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 55830638aeb1..1ed030bad59e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -250,6 +250,13 @@ void kvm_exit(void); void kvm_get_kvm(struct kvm *kvm); void kvm_put_kvm(struct kvm *kvm); +static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm) +{ + return rcu_dereference_check(kvm->memslots, + srcu_read_lock_held(&kvm->srcu) + || lockdep_is_held(&kvm->slots_lock)); +} + #define HPA_MSB ((sizeof(hpa_t) * 8) - 1) #define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB) static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; } diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index 80fd3ad3b2de..37ca71ebdba8 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c @@ -78,7 +78,7 @@ static int kvm_iommu_map_memslots(struct kvm *kvm) int i, r = 0; struct kvm_memslots *slots; - slots = rcu_dereference(kvm->memslots); + slots = kvm_memslots(kvm); for (i = 0; i < slots->nmemslots; i++) { r = kvm_iommu_map_pages(kvm, &slots->memslots[i]); @@ -217,7 +217,7 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm) int i; struct kvm_memslots *slots; - slots = rcu_dereference(kvm->memslots); + slots = kvm_memslots(kvm); for (i = 0; i < slots->nmemslots; i++) { kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn, diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d6351a34b297..4901ec5061ba 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -834,7 +834,7 @@ EXPORT_SYMBOL_GPL(kvm_is_error_hva); struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn) { int i; - struct kvm_memslots *slots = rcu_dereference(kvm->memslots); + struct kvm_memslots *slots = kvm_memslots(kvm); for (i = 0; i < slots->nmemslots; ++i) { struct kvm_memory_slot *memslot = &slots->memslots[i]; @@ -856,7 +856,7 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) { int i; - struct kvm_memslots *slots = rcu_dereference(kvm->memslots); + struct kvm_memslots *slots = kvm_memslots(kvm); gfn = unalias_gfn_instantiation(kvm, gfn); for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { @@ -900,7 +900,7 @@ out: int memslot_id(struct kvm *kvm, gfn_t gfn) { int i; - struct kvm_memslots *slots = rcu_dereference(kvm->memslots); + struct kvm_memslots *slots = kvm_memslots(kvm); struct kvm_memory_slot *memslot = NULL; gfn = unalias_gfn(kvm, gfn); @@ -1994,7 +1994,9 @@ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, const void *val) { int i; - struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]); + struct kvm_io_bus *bus; + + bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); for (i = 0; i < bus->dev_count; i++) if (!kvm_iodevice_write(bus->devs[i], addr, len, val)) return 0; @@ -2006,8 +2008,9 @@ int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, void *val) { int i; - struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]); + struct kvm_io_bus *bus; + bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); for (i = 0; i < bus->dev_count; i++) if (!kvm_iodevice_read(bus->devs[i], addr, len, val)) return 0; -- cgit v1.2.3 From b2fc15a5ef6679a5f87fee012a836294532bc628 Mon Sep 17 00:00:00 2001 From: Gui Jianfeng Date: Fri, 16 Apr 2010 17:18:54 +0800 Subject: KVM: MMU: Remove unused varialbe in rmap_next() Remove unused varialbe in rmap_next() Signed-off-by: Gui Jianfeng Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86/kvm/mmu.c') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 0682a393ad90..7eff794457f3 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -647,7 +647,6 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) { struct kvm_rmap_desc *desc; - struct kvm_rmap_desc *prev_desc; u64 *prev_spte; int i; @@ -659,7 +658,6 @@ static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) return NULL; } desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); - prev_desc = NULL; prev_spte = NULL; while (desc) { for (i = 0; i < RMAP_EXT && desc->sptes[i]; ++i) { -- cgit v1.2.3 From 2a059bf444dd7758ccf48f217cd981570132be85 Mon Sep 17 00:00:00 2001 From: Gui Jianfeng Date: Fri, 16 Apr 2010 17:19:48 +0800 Subject: KVM: Get rid of dead function gva_to_page() Nobody use gva_to_page() anymore, get rid of it. Signed-off-by: Gui Jianfeng Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 14 -------------- include/linux/kvm_host.h | 1 - 2 files changed, 15 deletions(-) (limited to 'arch/x86/kvm/mmu.c') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 7eff794457f3..d2a110e870fa 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1618,20 +1618,6 @@ static void mmu_convert_notrap(struct kvm_mmu_page *sp) } } -struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva) -{ - struct page *page; - - gpa_t gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL); - - if (gpa == UNMAPPED_GVA) - return NULL; - - page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); - - return page; -} - /* * The function is based on mtrr_type_lookup() in * arch/x86/kernel/cpu/mtrr/generic.c diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1ed030bad59e..ce027d518096 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -260,7 +260,6 @@ static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm) #define HPA_MSB ((sizeof(hpa_t) * 8) - 1) #define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB) static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; } -struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva); extern struct page *bad_page; extern pfn_t bad_pfn; -- cgit v1.2.3 From 77a1a715707d0f60ce0cfbe44070527a0a561f01 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 16 Apr 2010 16:21:42 +0800 Subject: KVM: MMU: cleanup for function unaccount_shadowed() Since gfn is not changed in the for loop, we do not need to call gfn_to_memslot_unaliased() under the loop, and it is safe to move it out. Signed-off-by: Wei Yongjun Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kvm/mmu.c') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index d2a110e870fa..ddfa8658fb6d 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -431,9 +431,9 @@ static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) int i; gfn = unalias_gfn(kvm, gfn); + slot = gfn_to_memslot_unaliased(kvm, gfn); for (i = PT_DIRECTORY_LEVEL; i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { - slot = gfn_to_memslot_unaliased(kvm, gfn); write_count = slot_largepage_idx(gfn, slot, i); *write_count -= 1; WARN_ON(*write_count < 0); -- cgit v1.2.3 From 5a7388c2d2faa2cc70c2d4717c8d7836d55459e0 Mon Sep 17 00:00:00 2001 From: Eric Northup Date: Mon, 26 Apr 2010 17:00:05 -0700 Subject: KVM: MMU: fix hashing for TDP and non-paging modes For TDP mode, avoid creating multiple page table roots for the single guest-to-host physical address map by fixing the inputs used for the shadow page table hash in mmu_alloc_roots(). Signed-off-by: Eric Northup Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/mmu.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'arch/x86/kvm/mmu.c') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index ddfa8658fb6d..9696d654b01f 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2059,10 +2059,12 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu) hpa_t root = vcpu->arch.mmu.root_hpa; ASSERT(!VALID_PAGE(root)); - if (tdp_enabled) - direct = 1; if (mmu_check_root(vcpu, root_gfn)) return 1; + if (tdp_enabled) { + direct = 1; + root_gfn = 0; + } sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL, direct, ACC_ALL, NULL); @@ -2072,8 +2074,6 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu) return 0; } direct = !is_paging(vcpu); - if (tdp_enabled) - direct = 1; for (i = 0; i < 4; ++i) { hpa_t root = vcpu->arch.mmu.pae_root[i]; @@ -2089,6 +2089,10 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu) root_gfn = 0; if (mmu_check_root(vcpu, root_gfn)) return 1; + if (tdp_enabled) { + direct = 1; + root_gfn = i << 30; + } sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL, direct, ACC_ALL, NULL); -- cgit v1.2.3 From d35b8dd9355805f17225fdbfee4bc704d7bf7547 Mon Sep 17 00:00:00 2001 From: Gui Jianfeng Date: Tue, 27 Apr 2010 10:39:49 +0800 Subject: KVM: Fix mmu shrinker error kvm_mmu_remove_one_alloc_mmu_page() assumes kvm_mmu_zap_page() only reclaims only one sp, but that's not the case. This will cause mmu shrinker returns a wrong number. This patch fix the counting error. Signed-off-by: Gui Jianfeng Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/mmu.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/kvm/mmu.c') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 9696d654b01f..18d2f584945b 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2902,13 +2902,13 @@ restart: kvm_flush_remote_tlbs(kvm); } -static void kvm_mmu_remove_one_alloc_mmu_page(struct kvm *kvm) +static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm) { struct kvm_mmu_page *page; page = container_of(kvm->arch.active_mmu_pages.prev, struct kvm_mmu_page, link); - kvm_mmu_zap_page(kvm, page); + return kvm_mmu_zap_page(kvm, page) + 1; } static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) @@ -2920,7 +2920,7 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { - int npages, idx; + int npages, idx, freed_pages; idx = srcu_read_lock(&kvm->srcu); spin_lock(&kvm->mmu_lock); @@ -2928,8 +2928,8 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) kvm->arch.n_free_mmu_pages; cache_count += npages; if (!kvm_freed && nr_to_scan > 0 && npages > 0) { - kvm_mmu_remove_one_alloc_mmu_page(kvm); - cache_count--; + freed_pages = kvm_mmu_remove_some_alloc_mmu_pages(kvm); + cache_count -= freed_pages; kvm_freed = kvm; } nr_to_scan--; -- cgit v1.2.3 From 5e1b3ddbf220d2256a6a0d676a219ed76b8048d0 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Wed, 28 Apr 2010 11:55:06 +0800 Subject: KVM: MMU: move unsync/sync tracpoints to proper place Move unsync/sync tracepoints to the proper place, it's good for us to obtain unsync page live time Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/mmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kvm/mmu.c') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 18d2f584945b..51eb6d6abd86 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1189,6 +1189,7 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) { WARN_ON(!sp->unsync); + trace_kvm_mmu_sync_page(sp); sp->unsync = 0; --kvm->stat.mmu_unsync; } @@ -1202,7 +1203,6 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) return 1; } - trace_kvm_mmu_sync_page(sp); if (rmap_write_protect(vcpu->kvm, sp->gfn)) kvm_flush_remote_tlbs(vcpu->kvm); kvm_unlink_unsync_page(vcpu->kvm, sp); @@ -1730,7 +1730,6 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) struct kvm_mmu_page *s; struct hlist_node *node, *n; - trace_kvm_mmu_unsync_page(sp); index = kvm_page_table_hashfn(sp->gfn); bucket = &vcpu->kvm->arch.mmu_page_hash[index]; /* don't unsync if pagetable is shadowed with multiple roles */ @@ -1740,6 +1739,7 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) if (s->role.word != sp->role.word) return 1; } + trace_kvm_mmu_unsync_page(sp); ++vcpu->kvm->stat.mmu_unsync; sp->unsync = 1; -- cgit v1.2.3 From 8facbbff071ff2b19268d3732e31badc60471e21 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 4 May 2010 12:58:32 +0300 Subject: KVM: MMU: Don't read pdptrs with mmu spinlock held in mmu_alloc_roots On svm, kvm_read_pdptr() may require reading guest memory, which can sleep. Push the spinlock into mmu_alloc_roots(), and only take it after we've read the pdptr. Tested-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/x86/kvm/mmu.c') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 51eb6d6abd86..de996380ec26 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2065,11 +2065,13 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu) direct = 1; root_gfn = 0; } + spin_lock(&vcpu->kvm->mmu_lock); sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL, direct, ACC_ALL, NULL); root = __pa(sp->spt); ++sp->root_count; + spin_unlock(&vcpu->kvm->mmu_lock); vcpu->arch.mmu.root_hpa = root; return 0; } @@ -2093,11 +2095,14 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu) direct = 1; root_gfn = i << 30; } + spin_lock(&vcpu->kvm->mmu_lock); sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL, direct, ACC_ALL, NULL); root = __pa(sp->spt); ++sp->root_count; + spin_unlock(&vcpu->kvm->mmu_lock); + vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK; } vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root); @@ -2466,7 +2471,9 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu) goto out; spin_lock(&vcpu->kvm->mmu_lock); kvm_mmu_free_some_pages(vcpu); + spin_unlock(&vcpu->kvm->mmu_lock); r = mmu_alloc_roots(vcpu); + spin_lock(&vcpu->kvm->mmu_lock); mmu_sync_roots(vcpu); spin_unlock(&vcpu->kvm->mmu_lock); if (r) -- cgit v1.2.3 From 3dbe141595faa48a067add3e47bba3205b79d33c Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 12 May 2010 11:48:18 +0300 Subject: KVM: MMU: Segregate shadow pages with different cr0.wp When cr0.wp=0, we may shadow a gpte having u/s=1 and r/w=0 with an spte having u/s=0 and r/w=1. This allows excessive access if the guest sets cr0.wp=1 and accesses through this spte. Fix by making cr0.wp part of the base role; we'll have different sptes for the two cases and the problem disappears. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- Documentation/kvm/mmu.txt | 2 ++ arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/mmu.c | 3 ++- 3 files changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/x86/kvm/mmu.c') diff --git a/Documentation/kvm/mmu.txt b/Documentation/kvm/mmu.txt index 0cc28fb84f4f..aaed6ab9d7ab 100644 --- a/Documentation/kvm/mmu.txt +++ b/Documentation/kvm/mmu.txt @@ -163,6 +163,8 @@ Shadow pages contain the following information: 32-bit or 64-bit gptes are in use). role.cr4_nxe: Contains the value of efer.nxe for which the page is valid. + role.cr0_wp: + Contains the value of cr0.wp for which the page is valid. gfn: Either the guest page table containing the translations shadowed by this page, or the base page frame for linear translations. See role.direct. diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3f0007b076da..76f5483cffec 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -179,6 +179,7 @@ union kvm_mmu_page_role { unsigned access:3; unsigned invalid:1; unsigned nxe:1; + unsigned cr0_wp:1; }; }; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index de996380ec26..81563e76e28f 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -217,7 +217,7 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, } EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); -static int is_write_protection(struct kvm_vcpu *vcpu) +static bool is_write_protection(struct kvm_vcpu *vcpu) { return kvm_read_cr0_bits(vcpu, X86_CR0_WP); } @@ -2432,6 +2432,7 @@ static int init_kvm_softmmu(struct kvm_vcpu *vcpu) r = paging32_init_context(vcpu); vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu); + vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu); return r; } -- cgit v1.2.3 From 3be2264be3c00865116f997dc53ebcc90fe7fc4b Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Fri, 28 May 2010 09:44:59 -0300 Subject: KVM: MMU: invalidate and flush on spte small->large page size change Always invalidate spte and flush TLBs when changing page size, to make sure different sized translations for the same address are never cached in a CPU's TLB. Currently the only case where this occurs is when a non-leaf spte pointer is overwritten by a leaf, large spte entry. This can happen after dirty logging is disabled on a memslot, for example. Noticed by Andrea. KVM-Stable-Tag Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kvm/mmu.c') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 81563e76e28f..6fbcb48d5a9b 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1870,6 +1870,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, child = page_header(pte & PT64_BASE_ADDR_MASK); mmu_page_remove_parent_pte(child, sptep); + __set_spte(sptep, shadow_trap_nonpresent_pte); + kvm_flush_remote_tlbs(vcpu->kvm); } else if (pfn != spte_to_pfn(*sptep)) { pgprintk("hfn old %lx new %lx\n", spte_to_pfn(*sptep), pfn); -- cgit v1.2.3 From 69325a122580d3a7b26589e8efdd6663001c3297 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 27 May 2010 14:35:58 +0300 Subject: KVM: MMU: Remove user access when allowing kernel access to gpte.w=0 page If cr0.wp=0, we have to allow the guest kernel access to a page with pte.w=0. We do that by setting spte.w=1, since the host cr0.wp must remain set so the host can write protect pages. Once we allow write access, we must remove user access otherwise we mistakenly allow the user to write the page. Reviewed-by: Xiao Guangrong Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/kvm/mmu.c') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 6fbcb48d5a9b..a6f695d76928 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1815,6 +1815,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, spte |= PT_WRITABLE_MASK; + if (!tdp_enabled && !(pte_access & ACC_WRITE_MASK)) + spte &= ~PT_USER_MASK; + /* * Optimization: for pte sync, if spte was writable the hash * lookup is unnecessary (and expensive). Write protection -- cgit v1.2.3