summaryrefslogtreecommitdiffstats
path: root/drivers/kvm/mmu.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/kvm/mmu.c')
-rw-r--r--drivers/kvm/mmu.c154
1 files changed, 112 insertions, 42 deletions
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index cab26f301eab..e8e228118de9 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -52,11 +52,15 @@ static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg) {}
static int dbg = 1;
#endif
+#ifndef MMU_DEBUG
+#define ASSERT(x) do { } while (0)
+#else
#define ASSERT(x) \
if (!(x)) { \
printk(KERN_WARNING "assertion failed %s:%d: %s\n", \
__FILE__, __LINE__, #x); \
}
+#endif
#define PT64_PT_BITS 9
#define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS)
@@ -159,6 +163,9 @@ struct kvm_rmap_desc {
struct kvm_rmap_desc *more;
};
+static struct kmem_cache *pte_chain_cache;
+static struct kmem_cache *rmap_desc_cache;
+
static int is_write_protection(struct kvm_vcpu *vcpu)
{
return vcpu->cr0 & CR0_WP_MASK;
@@ -196,14 +203,15 @@ static int is_rmap_pte(u64 pte)
}
static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
- size_t objsize, int min)
+ struct kmem_cache *base_cache, int min,
+ gfp_t gfp_flags)
{
void *obj;
if (cache->nobjs >= min)
return 0;
while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
- obj = kzalloc(objsize, GFP_NOWAIT);
+ obj = kmem_cache_zalloc(base_cache, gfp_flags);
if (!obj)
return -ENOMEM;
cache->objects[cache->nobjs++] = obj;
@@ -217,20 +225,35 @@ static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
kfree(mc->objects[--mc->nobjs]);
}
-static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
+static int __mmu_topup_memory_caches(struct kvm_vcpu *vcpu, gfp_t gfp_flags)
{
int r;
r = mmu_topup_memory_cache(&vcpu->mmu_pte_chain_cache,
- sizeof(struct kvm_pte_chain), 4);
+ pte_chain_cache, 4, gfp_flags);
if (r)
goto out;
r = mmu_topup_memory_cache(&vcpu->mmu_rmap_desc_cache,
- sizeof(struct kvm_rmap_desc), 1);
+ rmap_desc_cache, 1, gfp_flags);
out:
return r;
}
+static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
+{
+ int r;
+
+ r = __mmu_topup_memory_caches(vcpu, GFP_NOWAIT);
+ if (r < 0) {
+ spin_unlock(&vcpu->kvm->lock);
+ kvm_arch_ops->vcpu_put(vcpu);
+ r = __mmu_topup_memory_caches(vcpu, GFP_KERNEL);
+ kvm_arch_ops->vcpu_load(vcpu);
+ spin_lock(&vcpu->kvm->lock);
+ }
+ return r;
+}
+
static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
{
mmu_free_memory_cache(&vcpu->mmu_pte_chain_cache);
@@ -390,13 +413,11 @@ static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
{
struct kvm *kvm = vcpu->kvm;
struct page *page;
- struct kvm_memory_slot *slot;
struct kvm_rmap_desc *desc;
u64 *spte;
- slot = gfn_to_memslot(kvm, gfn);
- BUG_ON(!slot);
- page = gfn_to_page(slot, gfn);
+ page = gfn_to_page(kvm, gfn);
+ BUG_ON(!page);
while (page_private(page)) {
if (!(page_private(page) & 1))
@@ -417,6 +438,7 @@ static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
}
}
+#ifdef MMU_DEBUG
static int is_empty_shadow_page(hpa_t page_hpa)
{
u64 *pos;
@@ -431,15 +453,15 @@ static int is_empty_shadow_page(hpa_t page_hpa)
}
return 1;
}
+#endif
static void kvm_mmu_free_page(struct kvm_vcpu *vcpu, hpa_t page_hpa)
{
struct kvm_mmu_page *page_head = page_header(page_hpa);
ASSERT(is_empty_shadow_page(page_hpa));
- list_del(&page_head->link);
page_head->page_hpa = page_hpa;
- list_add(&page_head->link, &vcpu->free_pages);
+ list_move(&page_head->link, &vcpu->free_pages);
++vcpu->kvm->n_free_mmu_pages;
}
@@ -457,11 +479,9 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
return NULL;
page = list_entry(vcpu->free_pages.next, struct kvm_mmu_page, link);
- list_del(&page->link);
- list_add(&page->link, &vcpu->kvm->active_mmu_pages);
+ list_move(&page->link, &vcpu->kvm->active_mmu_pages);
ASSERT(is_empty_shadow_page(page->page_hpa));
page->slot_bitmap = 0;
- page->global = 1;
page->multimapped = 0;
page->parent_pte = parent_pte;
--vcpu->kvm->n_free_mmu_pages;
@@ -569,6 +589,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
gva_t gaddr,
unsigned level,
int metaphysical,
+ unsigned hugepage_access,
u64 *parent_pte)
{
union kvm_mmu_page_role role;
@@ -582,6 +603,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
role.glevels = vcpu->mmu.root_level;
role.level = level;
role.metaphysical = metaphysical;
+ role.hugepage_access = hugepage_access;
if (vcpu->mmu.root_level <= PT32_ROOT_LEVEL) {
quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level));
quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
@@ -669,10 +691,8 @@ static void kvm_mmu_zap_page(struct kvm_vcpu *vcpu,
if (!page->root_count) {
hlist_del(&page->hash_link);
kvm_mmu_free_page(vcpu, page->page_hpa);
- } else {
- list_del(&page->link);
- list_add(&page->link, &vcpu->kvm->active_mmu_pages);
- }
+ } else
+ list_move(&page->link, &vcpu->kvm->active_mmu_pages);
}
static int kvm_mmu_unprotect_page(struct kvm_vcpu *vcpu, gfn_t gfn)
@@ -714,14 +734,12 @@ hpa_t safe_gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa)
hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa)
{
- struct kvm_memory_slot *slot;
struct page *page;
ASSERT((gpa & HPA_ERR_MASK) == 0);
- slot = gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT);
- if (!slot)
+ page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
+ if (!page)
return gpa | HPA_ERR_MASK;
- page = gfn_to_page(slot, gpa >> PAGE_SHIFT);
return ((hpa_t)page_to_pfn(page) << PAGE_SHIFT)
| (gpa & (PAGE_SIZE-1));
}
@@ -735,6 +753,15 @@ hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva)
return gpa_to_hpa(vcpu, gpa);
}
+struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
+{
+ gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva);
+
+ if (gpa == UNMAPPED_GVA)
+ return NULL;
+ return pfn_to_page(gpa_to_hpa(vcpu, gpa) >> PAGE_SHIFT);
+}
+
static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
{
}
@@ -772,7 +799,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p)
>> PAGE_SHIFT;
new_table = kvm_mmu_get_page(vcpu, pseudo_gfn,
v, level - 1,
- 1, &table[index]);
+ 1, 0, &table[index]);
if (!new_table) {
pgprintk("nonpaging_map: ENOMEM\n");
return -ENOMEM;
@@ -804,10 +831,12 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
for (i = 0; i < 4; ++i) {
hpa_t root = vcpu->mmu.pae_root[i];
- ASSERT(VALID_PAGE(root));
- root &= PT64_BASE_ADDR_MASK;
- page = page_header(root);
- --page->root_count;
+ if (root) {
+ ASSERT(VALID_PAGE(root));
+ root &= PT64_BASE_ADDR_MASK;
+ page = page_header(root);
+ --page->root_count;
+ }
vcpu->mmu.pae_root[i] = INVALID_PAGE;
}
vcpu->mmu.root_hpa = INVALID_PAGE;
@@ -827,7 +856,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
ASSERT(!VALID_PAGE(root));
page = kvm_mmu_get_page(vcpu, root_gfn, 0,
- PT64_ROOT_LEVEL, 0, NULL);
+ PT64_ROOT_LEVEL, 0, 0, NULL);
root = page->page_hpa;
++page->root_count;
vcpu->mmu.root_hpa = root;
@@ -838,13 +867,17 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
hpa_t root = vcpu->mmu.pae_root[i];
ASSERT(!VALID_PAGE(root));
- if (vcpu->mmu.root_level == PT32E_ROOT_LEVEL)
+ if (vcpu->mmu.root_level == PT32E_ROOT_LEVEL) {
+ if (!is_present_pte(vcpu->pdptrs[i])) {
+ vcpu->mmu.pae_root[i] = 0;
+ continue;
+ }
root_gfn = vcpu->pdptrs[i] >> PAGE_SHIFT;
- else if (vcpu->mmu.root_level == 0)
+ } else if (vcpu->mmu.root_level == 0)
root_gfn = 0;
page = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
PT32_ROOT_LEVEL, !is_paging(vcpu),
- NULL);
+ 0, NULL);
root = page->page_hpa;
++page->root_count;
vcpu->mmu.pae_root[i] = root | PT_PRESENT_MASK;
@@ -903,7 +936,7 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu)
static void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
{
- ++kvm_stat.tlb_flush;
+ ++vcpu->stat.tlb_flush;
kvm_arch_ops->tlb_flush(vcpu);
}
@@ -918,11 +951,6 @@ static void paging_new_cr3(struct kvm_vcpu *vcpu)
kvm_arch_ops->set_cr3(vcpu, vcpu->mmu.root_hpa);
}
-static void mark_pagetable_nonglobal(void *shadow_pte)
-{
- page_header(__pa(shadow_pte))->global = 0;
-}
-
static inline void set_pte_common(struct kvm_vcpu *vcpu,
u64 *shadow_pte,
gpa_t gaddr,
@@ -940,9 +968,6 @@ static inline void set_pte_common(struct kvm_vcpu *vcpu,
*shadow_pte |= access_bits;
- if (!(*shadow_pte & PT_GLOBAL_MASK))
- mark_pagetable_nonglobal(shadow_pte);
-
if (is_error_hpa(paddr)) {
*shadow_pte |= gaddr;
*shadow_pte |= PT_SHADOW_IO_MARK;
@@ -1316,6 +1341,51 @@ void kvm_mmu_slot_remove_write_access(struct kvm_vcpu *vcpu, int slot)
}
}
+void kvm_mmu_zap_all(struct kvm_vcpu *vcpu)
+{
+ destroy_kvm_mmu(vcpu);
+
+ while (!list_empty(&vcpu->kvm->active_mmu_pages)) {
+ struct kvm_mmu_page *page;
+
+ page = container_of(vcpu->kvm->active_mmu_pages.next,
+ struct kvm_mmu_page, link);
+ kvm_mmu_zap_page(vcpu, page);
+ }
+
+ mmu_free_memory_caches(vcpu);
+ kvm_arch_ops->tlb_flush(vcpu);
+ init_kvm_mmu(vcpu);
+}
+
+void kvm_mmu_module_exit(void)
+{
+ if (pte_chain_cache)
+ kmem_cache_destroy(pte_chain_cache);
+ if (rmap_desc_cache)
+ kmem_cache_destroy(rmap_desc_cache);
+}
+
+int kvm_mmu_module_init(void)
+{
+ pte_chain_cache = kmem_cache_create("kvm_pte_chain",
+ sizeof(struct kvm_pte_chain),
+ 0, 0, NULL, NULL);
+ if (!pte_chain_cache)
+ goto nomem;
+ rmap_desc_cache = kmem_cache_create("kvm_rmap_desc",
+ sizeof(struct kvm_rmap_desc),
+ 0, 0, NULL, NULL);
+ if (!rmap_desc_cache)
+ goto nomem;
+
+ return 0;
+
+nomem:
+ kvm_mmu_module_exit();
+ return -ENOMEM;
+}
+
#ifdef AUDIT
static const char *audit_msg;
@@ -1338,7 +1408,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
for (i = 0; i < PT64_ENT_PER_PAGE; ++i, va += va_delta) {
u64 ent = pt[i];
- if (!ent & PT_PRESENT_MASK)
+ if (!(ent & PT_PRESENT_MASK))
continue;
va = canonicalize(va);
@@ -1360,7 +1430,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
static void audit_mappings(struct kvm_vcpu *vcpu)
{
- int i;
+ unsigned i;
if (vcpu->mmu.root_level == 4)
audit_mappings_page(vcpu, vcpu->mmu.root_hpa, 0, 4);