summaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/paging_tmpl.h
diff options
context:
space:
mode:
authorDavid Woodhouse <David.Woodhouse@intel.com>2010-10-30 13:35:11 +0200
committerDavid Woodhouse <David.Woodhouse@intel.com>2010-10-30 13:35:11 +0200
commit67577927e8d7a1f4b09b4992df640eadc6aacb36 (patch)
tree2e9efe6b5745965faf0dcc084d4613d9356263f9 /arch/x86/kvm/paging_tmpl.h
parentMTD: Fix wrong check register_blkdev return value (diff)
parentMerge branch 'next-devicetree' of git://git.secretlab.ca/git/linux-2.6 (diff)
downloadlinux-67577927e8d7a1f4b09b4992df640eadc6aacb36.tar.xz
linux-67577927e8d7a1f4b09b4992df640eadc6aacb36.zip
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git
Conflicts: drivers/mtd/mtd_blkdevs.c Merge Grant's device-tree bits so that we can apply the subsequent fixes. Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Diffstat (limited to 'arch/x86/kvm/paging_tmpl.h')
-rw-r--r--arch/x86/kvm/paging_tmpl.h202
1 files changed, 164 insertions, 38 deletions
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 51ef9097960d..cd7a833a3b52 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -7,7 +7,7 @@
* MMU support
*
* Copyright (C) 2006 Qumranet, Inc.
- * Copyright 2010 Red Hat, Inc. and/or its affilates.
+ * Copyright 2010 Red Hat, Inc. and/or its affiliates.
*
* Authors:
* Yaniv Kamay <yaniv@qumranet.com>
@@ -67,6 +67,7 @@ struct guest_walker {
int level;
gfn_t table_gfn[PT_MAX_FULL_LEVELS];
pt_element_t ptes[PT_MAX_FULL_LEVELS];
+ pt_element_t prefetch_ptes[PTE_PREFETCH_NUM];
gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
unsigned pt_access;
unsigned pte_access;
@@ -104,7 +105,7 @@ static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte)
access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK;
#if PTTYPE == 64
- if (is_nx(vcpu))
+ if (vcpu->arch.mmu.nx)
access &= ~(gpte >> PT64_NX_SHIFT);
#endif
return access;
@@ -113,26 +114,32 @@ static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte)
/*
* Fetch a guest pte for a guest virtual address
*/
-static int FNAME(walk_addr)(struct guest_walker *walker,
- struct kvm_vcpu *vcpu, gva_t addr,
- int write_fault, int user_fault, int fetch_fault)
+static int FNAME(walk_addr_generic)(struct guest_walker *walker,
+ struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+ gva_t addr, u32 access)
{
pt_element_t pte;
gfn_t table_gfn;
unsigned index, pt_access, uninitialized_var(pte_access);
gpa_t pte_gpa;
bool eperm, present, rsvd_fault;
+ int offset, write_fault, user_fault, fetch_fault;
+
+ write_fault = access & PFERR_WRITE_MASK;
+ user_fault = access & PFERR_USER_MASK;
+ fetch_fault = access & PFERR_FETCH_MASK;
trace_kvm_mmu_pagetable_walk(addr, write_fault, user_fault,
fetch_fault);
walk:
present = true;
eperm = rsvd_fault = false;
- walker->level = vcpu->arch.mmu.root_level;
- pte = vcpu->arch.cr3;
+ walker->level = mmu->root_level;
+ pte = mmu->get_cr3(vcpu);
+
#if PTTYPE == 64
- if (!is_long_mode(vcpu)) {
- pte = kvm_pdptr_read(vcpu, (addr >> 30) & 3);
+ if (walker->level == PT32E_ROOT_LEVEL) {
+ pte = kvm_pdptr_read_mmu(vcpu, mmu, (addr >> 30) & 3);
trace_kvm_mmu_paging_element(pte, walker->level);
if (!is_present_gpte(pte)) {
present = false;
@@ -142,7 +149,7 @@ walk:
}
#endif
ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
- (vcpu->arch.cr3 & CR3_NONPAE_RESERVED_BITS) == 0);
+ (mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0);
pt_access = ACC_ALL;
@@ -150,12 +157,14 @@ walk:
index = PT_INDEX(addr, walker->level);
table_gfn = gpte_to_gfn(pte);
- pte_gpa = gfn_to_gpa(table_gfn);
- pte_gpa += index * sizeof(pt_element_t);
+ offset = index * sizeof(pt_element_t);
+ pte_gpa = gfn_to_gpa(table_gfn) + offset;
walker->table_gfn[walker->level - 1] = table_gfn;
walker->pte_gpa[walker->level - 1] = pte_gpa;
- if (kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte))) {
+ if (kvm_read_guest_page_mmu(vcpu, mmu, table_gfn, &pte,
+ offset, sizeof(pte),
+ PFERR_USER_MASK|PFERR_WRITE_MASK)) {
present = false;
break;
}
@@ -167,7 +176,7 @@ walk:
break;
}
- if (is_rsvd_bits_set(vcpu, pte, walker->level)) {
+ if (is_rsvd_bits_set(&vcpu->arch.mmu, pte, walker->level)) {
rsvd_fault = true;
break;
}
@@ -204,17 +213,28 @@ walk:
(PTTYPE == 64 || is_pse(vcpu))) ||
((walker->level == PT_PDPE_LEVEL) &&
is_large_pte(pte) &&
- is_long_mode(vcpu))) {
+ mmu->root_level == PT64_ROOT_LEVEL)) {
int lvl = walker->level;
+ gpa_t real_gpa;
+ gfn_t gfn;
+ u32 ac;
- walker->gfn = gpte_to_gfn_lvl(pte, lvl);
- walker->gfn += (addr & PT_LVL_OFFSET_MASK(lvl))
- >> PAGE_SHIFT;
+ gfn = gpte_to_gfn_lvl(pte, lvl);
+ gfn += (addr & PT_LVL_OFFSET_MASK(lvl)) >> PAGE_SHIFT;
if (PTTYPE == 32 &&
walker->level == PT_DIRECTORY_LEVEL &&
is_cpuid_PSE36())
- walker->gfn += pse36_gfn_delta(pte);
+ gfn += pse36_gfn_delta(pte);
+
+ ac = write_fault | fetch_fault | user_fault;
+
+ real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn),
+ ac);
+ if (real_gpa == UNMAPPED_GVA)
+ return 0;
+
+ walker->gfn = real_gpa >> PAGE_SHIFT;
break;
}
@@ -249,18 +269,36 @@ error:
walker->error_code = 0;
if (present)
walker->error_code |= PFERR_PRESENT_MASK;
- if (write_fault)
- walker->error_code |= PFERR_WRITE_MASK;
- if (user_fault)
- walker->error_code |= PFERR_USER_MASK;
- if (fetch_fault && is_nx(vcpu))
+
+ walker->error_code |= write_fault | user_fault;
+
+ if (fetch_fault && mmu->nx)
walker->error_code |= PFERR_FETCH_MASK;
if (rsvd_fault)
walker->error_code |= PFERR_RSVD_MASK;
+
+ vcpu->arch.fault.address = addr;
+ vcpu->arch.fault.error_code = walker->error_code;
+
trace_kvm_mmu_walker_error(walker->error_code);
return 0;
}
+static int FNAME(walk_addr)(struct guest_walker *walker,
+ struct kvm_vcpu *vcpu, gva_t addr, u32 access)
+{
+ return FNAME(walk_addr_generic)(walker, vcpu, &vcpu->arch.mmu, addr,
+ access);
+}
+
+static int FNAME(walk_addr_nested)(struct guest_walker *walker,
+ struct kvm_vcpu *vcpu, gva_t addr,
+ u32 access)
+{
+ return FNAME(walk_addr_generic)(walker, vcpu, &vcpu->arch.nested_mmu,
+ addr, access);
+}
+
static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
u64 *spte, const void *pte)
{
@@ -302,14 +340,87 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
static bool FNAME(gpte_changed)(struct kvm_vcpu *vcpu,
struct guest_walker *gw, int level)
{
- int r;
pt_element_t curr_pte;
-
- r = kvm_read_guest_atomic(vcpu->kvm, gw->pte_gpa[level - 1],
+ gpa_t base_gpa, pte_gpa = gw->pte_gpa[level - 1];
+ u64 mask;
+ int r, index;
+
+ if (level == PT_PAGE_TABLE_LEVEL) {
+ mask = PTE_PREFETCH_NUM * sizeof(pt_element_t) - 1;
+ base_gpa = pte_gpa & ~mask;
+ index = (pte_gpa - base_gpa) / sizeof(pt_element_t);
+
+ r = kvm_read_guest_atomic(vcpu->kvm, base_gpa,
+ gw->prefetch_ptes, sizeof(gw->prefetch_ptes));
+ curr_pte = gw->prefetch_ptes[index];
+ } else
+ r = kvm_read_guest_atomic(vcpu->kvm, pte_gpa,
&curr_pte, sizeof(curr_pte));
+
return r || curr_pte != gw->ptes[level - 1];
}
+static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
+ u64 *sptep)
+{
+ struct kvm_mmu_page *sp;
+ struct kvm_mmu *mmu = &vcpu->arch.mmu;
+ pt_element_t *gptep = gw->prefetch_ptes;
+ u64 *spte;
+ int i;
+
+ sp = page_header(__pa(sptep));
+
+ if (sp->role.level > PT_PAGE_TABLE_LEVEL)
+ return;
+
+ if (sp->role.direct)
+ return __direct_pte_prefetch(vcpu, sp, sptep);
+
+ i = (sptep - sp->spt) & ~(PTE_PREFETCH_NUM - 1);
+ spte = sp->spt + i;
+
+ for (i = 0; i < PTE_PREFETCH_NUM; i++, spte++) {
+ pt_element_t gpte;
+ unsigned pte_access;
+ gfn_t gfn;
+ pfn_t pfn;
+ bool dirty;
+
+ if (spte == sptep)
+ continue;
+
+ if (*spte != shadow_trap_nonpresent_pte)
+ continue;
+
+ gpte = gptep[i];
+
+ if (!is_present_gpte(gpte) ||
+ is_rsvd_bits_set(mmu, gpte, PT_PAGE_TABLE_LEVEL)) {
+ if (!sp->unsync)
+ __set_spte(spte, shadow_notrap_nonpresent_pte);
+ continue;
+ }
+
+ if (!(gpte & PT_ACCESSED_MASK))
+ continue;
+
+ pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
+ gfn = gpte_to_gfn(gpte);
+ dirty = is_dirty_gpte(gpte);
+ pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
+ (pte_access & ACC_WRITE_MASK) && dirty);
+ if (is_error_pfn(pfn)) {
+ kvm_release_pfn_clean(pfn);
+ break;
+ }
+
+ mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,
+ dirty, NULL, PT_PAGE_TABLE_LEVEL, gfn,
+ pfn, true, true);
+ }
+}
+
/*
* Fetch a shadow pte for a specific level in the paging hierarchy.
*/
@@ -391,6 +502,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
mmu_set_spte(vcpu, it.sptep, access, gw->pte_access & access,
user_fault, write_fault, dirty, ptwrite, it.level,
gw->gfn, pfn, false, true);
+ FNAME(pte_prefetch)(vcpu, gw, it.sptep);
return it.sptep;
@@ -420,7 +532,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
{
int write_fault = error_code & PFERR_WRITE_MASK;
int user_fault = error_code & PFERR_USER_MASK;
- int fetch_fault = error_code & PFERR_FETCH_MASK;
struct guest_walker walker;
u64 *sptep;
int write_pt = 0;
@@ -430,7 +541,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
unsigned long mmu_seq;
pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
- kvm_mmu_audit(vcpu, "pre page fault");
r = mmu_topup_memory_caches(vcpu);
if (r)
@@ -439,15 +549,14 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
/*
* Look up the guest pte for the faulting address.
*/
- r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault,
- fetch_fault);
+ r = FNAME(walk_addr)(&walker, vcpu, addr, error_code);
/*
* The page is not mapped by the guest. Let the guest handle it.
*/
if (!r) {
pgprintk("%s: guest page fault\n", __func__);
- inject_page_fault(vcpu, addr, walker.error_code);
+ inject_page_fault(vcpu);
vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
return 0;
}
@@ -468,6 +577,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
spin_lock(&vcpu->kvm->mmu_lock);
if (mmu_notifier_retry(vcpu, mmu_seq))
goto out_unlock;
+
+ trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
kvm_mmu_free_some_pages(vcpu);
sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
level, &write_pt, pfn);
@@ -479,7 +590,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
++vcpu->stat.pf_fixed;
- kvm_mmu_audit(vcpu, "post page fault (fixed)");
+ trace_kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
spin_unlock(&vcpu->kvm->mmu_lock);
return write_pt;
@@ -556,10 +667,25 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access,
gpa_t gpa = UNMAPPED_GVA;
int r;
- r = FNAME(walk_addr)(&walker, vcpu, vaddr,
- !!(access & PFERR_WRITE_MASK),
- !!(access & PFERR_USER_MASK),
- !!(access & PFERR_FETCH_MASK));
+ r = FNAME(walk_addr)(&walker, vcpu, vaddr, access);
+
+ if (r) {
+ gpa = gfn_to_gpa(walker.gfn);
+ gpa |= vaddr & ~PAGE_MASK;
+ } else if (error)
+ *error = walker.error_code;
+
+ return gpa;
+}
+
+static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr,
+ u32 access, u32 *error)
+{
+ struct guest_walker walker;
+ gpa_t gpa = UNMAPPED_GVA;
+ int r;
+
+ r = FNAME(walk_addr_nested)(&walker, vcpu, vaddr, access);
if (r) {
gpa = gfn_to_gpa(walker.gfn);
@@ -638,7 +764,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
return -EINVAL;
gfn = gpte_to_gfn(gpte);
- if (is_rsvd_bits_set(vcpu, gpte, PT_PAGE_TABLE_LEVEL)
+ if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL)
|| gfn != sp->gfns[i] || !is_present_gpte(gpte)
|| !(gpte & PT_ACCESSED_MASK)) {
u64 nonpresent;