From bbacc0c111c3c5d1f3192b8cc1642b9c3954f80d Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 10 Dec 2012 10:33:09 -0700 Subject: KVM: Rename KVM_MEMORY_SLOTS -> KVM_USER_MEM_SLOTS It's easy to confuse KVM_MEMORY_SLOTS and KVM_MEM_SLOTS_NUM. One is the user accessible slots and the other is user + private. Make this more obvious. Reviewed-by: Gleb Natapov Signed-off-by: Alex Williamson Signed-off-by: Marcelo Tosatti --- arch/powerpc/kvm/book3s_hv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 71d0c90b62bf..80dcc53a1aba 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1549,7 +1549,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) mutex_lock(&kvm->slots_lock); r = -EINVAL; - if (log->slot >= KVM_MEMORY_SLOTS) + if (log->slot >= KVM_USER_MEM_SLOTS) goto out; memslot = id_to_memslot(kvm->memslots, log->slot); -- cgit v1.2.3 From f82a8cfe9354f5cdea55ebeceba3fd19051d3ee8 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 10 Dec 2012 10:33:21 -0700 Subject: KVM: struct kvm_memory_slot.user_alloc -> bool There's no need for this to be an int, it holds a boolean. Move to the end of the struct for alignment. Reviewed-by: Gleb Natapov Signed-off-by: Alex Williamson Signed-off-by: Marcelo Tosatti --- arch/ia64/kvm/kvm-ia64.c | 6 +++--- arch/powerpc/kvm/powerpc.c | 4 ++-- arch/s390/kvm/kvm-s390.c | 4 ++-- arch/x86/kvm/vmx.c | 6 +++--- arch/x86/kvm/x86.c | 4 ++-- include/linux/kvm_host.h | 12 ++++++------ virt/kvm/kvm_main.c | 8 ++++---- 7 files changed, 22 insertions(+), 22 deletions(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 9bacfe207b43..ad3126a58644 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -955,7 +955,7 @@ long kvm_arch_vm_ioctl(struct file *filp, kvm_mem.guest_phys_addr; kvm_userspace_mem.memory_size = kvm_mem.memory_size; r = kvm_vm_ioctl_set_memory_region(kvm, - &kvm_userspace_mem, 0); + &kvm_userspace_mem, false); if (r) goto out; break; @@ -1580,7 +1580,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_memory_slot old, struct kvm_userspace_memory_region *mem, - int user_alloc) + bool user_alloc) { unsigned long i; unsigned long pfn; @@ -1611,7 +1611,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, struct kvm_memory_slot old, - int user_alloc) + bool user_alloc) { return; } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 70739a089560..be83fca2e8fd 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -412,7 +412,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_memory_slot old, struct kvm_userspace_memory_region *mem, - int user_alloc) + bool user_alloc) { return kvmppc_core_prepare_memory_region(kvm, memslot, mem); } @@ -420,7 +420,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, struct kvm_memory_slot old, - int user_alloc) + bool user_alloc) { kvmppc_core_commit_memory_region(kvm, mem, old); } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index c9011bfaabbe..f718bc65835c 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -928,7 +928,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_memory_slot old, struct kvm_userspace_memory_region *mem, - int user_alloc) + bool user_alloc) { /* A few sanity checks. We can have exactly one memory slot which has to start at guest virtual zero and which has to be located at a @@ -958,7 +958,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, struct kvm_memory_slot old, - int user_alloc) + bool user_alloc) { int rc; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 9120ae1901e4..b3101e368079 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3667,7 +3667,7 @@ static int alloc_apic_access_page(struct kvm *kvm) kvm_userspace_mem.flags = 0; kvm_userspace_mem.guest_phys_addr = 0xfee00000ULL; kvm_userspace_mem.memory_size = PAGE_SIZE; - r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0); + r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false); if (r) goto out; @@ -3697,7 +3697,7 @@ static int alloc_identity_pagetable(struct kvm *kvm) kvm_userspace_mem.guest_phys_addr = kvm->arch.ept_identity_map_addr; kvm_userspace_mem.memory_size = PAGE_SIZE; - r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0); + r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false); if (r) goto out; @@ -4251,7 +4251,7 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) .flags = 0, }; - ret = kvm_set_memory_region(kvm, &tss_mem, 0); + ret = kvm_set_memory_region(kvm, &tss_mem, false); if (ret) return ret; kvm->arch.tss_addr = addr; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 816074757c96..1c9c834b72f0 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6839,7 +6839,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_memory_slot old, struct kvm_userspace_memory_region *mem, - int user_alloc) + bool user_alloc) { int npages = memslot->npages; int map_flags = MAP_PRIVATE | MAP_ANONYMOUS; @@ -6875,7 +6875,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, struct kvm_memory_slot old, - int user_alloc) + bool user_alloc) { int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 5a3581ceb036..d897f035749f 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -270,8 +270,8 @@ struct kvm_memory_slot { unsigned long *dirty_bitmap; struct kvm_arch_memory_slot arch; unsigned long userspace_addr; - int user_alloc; int id; + bool user_alloc; }; static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memslot) @@ -451,10 +451,10 @@ id_to_memslot(struct kvm_memslots *slots, int id) int kvm_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - int user_alloc); + bool user_alloc); int __kvm_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - int user_alloc); + bool user_alloc); void kvm_arch_free_memslot(struct kvm_memory_slot *free, struct kvm_memory_slot *dont); int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages); @@ -462,11 +462,11 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_memory_slot old, struct kvm_userspace_memory_region *mem, - int user_alloc); + bool user_alloc); void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, struct kvm_memory_slot old, - int user_alloc); + bool user_alloc); bool kvm_largepages_enabled(void); void kvm_disable_largepages(void); /* flush all memory translations */ @@ -553,7 +553,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - int user_alloc); + bool user_alloc); int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level); long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 5f0638cb6968..42c1eb73a0e3 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -709,7 +709,7 @@ static int check_memory_region_flags(struct kvm_userspace_memory_region *mem) */ int __kvm_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - int user_alloc) + bool user_alloc) { int r; gfn_t base_gfn; @@ -889,7 +889,7 @@ EXPORT_SYMBOL_GPL(__kvm_set_memory_region); int kvm_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - int user_alloc) + bool user_alloc) { int r; @@ -903,7 +903,7 @@ EXPORT_SYMBOL_GPL(kvm_set_memory_region); int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - int user_alloc) + bool user_alloc) { if (mem->slot >= KVM_USER_MEM_SLOTS) return -EINVAL; @@ -2148,7 +2148,7 @@ static long kvm_vm_ioctl(struct file *filp, sizeof kvm_userspace_mem)) goto out; - r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 1); + r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, true); break; } case KVM_GET_DIRTY_LOG: { -- cgit v1.2.3 From 5a33169ed29060df71627103e6968078b42de945 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 14 Dec 2012 23:46:03 +0100 Subject: KVM: PPC: Only WARN on invalid emulation When we hit an emulation result that we didn't expect, that is an error, but it's nothing that warrants a BUG(), because it can be guest triggered. So instead, let's only WARN() the user that this happened. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/powerpc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index be83fca2e8fd..e2225e5b8a4c 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -237,7 +237,8 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) r = RESUME_HOST; break; default: - BUG(); + WARN_ON(1); + r = RESUME_GUEST; } return r; -- cgit v1.2.3 From 50c7bb80b5bd5a9962905306dd2292eeb9857d46 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 14 Dec 2012 23:42:05 +0100 Subject: KVM: PPC: Book3S: PR: Enable alternative instruction for SC 1 When running on top of pHyp, the hypercall instruction "sc 1" goes straight into pHyp without trapping in supervisor mode. So if we want to support PAPR guest in this configuration we need to add a second way of accessing PAPR hypercalls, preferably with the exact same semantics except for the instruction. So let's overlay an officially reserved instruction and emulate PAPR hypercalls whenever we hit that one. Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_ppc.h | 1 + arch/powerpc/kvm/book3s_emulate.c | 28 ++++++++++++++++++++++++++++ arch/powerpc/kvm/book3s_pr.c | 5 +++++ 3 files changed, 34 insertions(+) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 572aa7530619..5f5f69abd281 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -44,6 +44,7 @@ enum emulation_result { EMULATE_DO_DCR, /* kvm_run filled with DCR request */ EMULATE_FAIL, /* can't emulate this instruction */ EMULATE_AGAIN, /* something went wrong. go again */ + EMULATE_DO_PAPR, /* kvm_run filled with PAPR request */ }; extern int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index d31a716f7f2b..c88161bed8df 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -34,6 +34,8 @@ #define OP_31_XOP_MTSRIN 242 #define OP_31_XOP_TLBIEL 274 #define OP_31_XOP_TLBIE 306 +/* Opcode is officially reserved, reuse it as sc 1 when sc 1 doesn't trap */ +#define OP_31_XOP_FAKE_SC1 308 #define OP_31_XOP_SLBMTE 402 #define OP_31_XOP_SLBIE 434 #define OP_31_XOP_SLBIA 498 @@ -170,6 +172,32 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, vcpu->arch.mmu.tlbie(vcpu, addr, large); break; } +#ifdef CONFIG_KVM_BOOK3S_64_PR + case OP_31_XOP_FAKE_SC1: + { + /* SC 1 papr hypercalls */ + ulong cmd = kvmppc_get_gpr(vcpu, 3); + int i; + + if ((vcpu->arch.shared->msr & MSR_PR) || + !vcpu->arch.papr_enabled) { + emulated = EMULATE_FAIL; + break; + } + + if (kvmppc_h_pr(vcpu, cmd) == EMULATE_DONE) + break; + + run->papr_hcall.nr = cmd; + for (i = 0; i < 9; ++i) { + ulong gpr = kvmppc_get_gpr(vcpu, 4 + i); + run->papr_hcall.args[i] = gpr; + } + + emulated = EMULATE_DO_PAPR; + break; + } +#endif case OP_31_XOP_EIOIO: break; case OP_31_XOP_SLBMTE: diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 28d38adeca73..73ed11c41bac 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -760,6 +760,11 @@ program_interrupt: run->exit_reason = KVM_EXIT_MMIO; r = RESUME_HOST_NV; break; + case EMULATE_DO_PAPR: + run->exit_reason = KVM_EXIT_PAPR_HCALL; + vcpu->arch.hcall_needed = 1; + r = RESUME_HOST_NV; + break; default: BUG(); } -- cgit v1.2.3 From f2be655004ddc36f2c5fc5e541d481dcd782ab83 Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Thu, 20 Dec 2012 04:52:39 +0000 Subject: KVM: PPC: Fix mfspr/mtspr MMUCFG emulation On mfspr/mtspr emulation path Book3E's MMUCFG SPR with value 1015 clashes with G4's MSSSR0 SPR. Move MSSSR0 emulation from generic part to Books3S. MSSSR0 also clashes with Book3S's DABRX SPR. DABRX was not explicitly handled so Book3S execution flow will behave as before. Signed-off-by: Mihai Caraman Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_emulate.c | 2 ++ arch/powerpc/kvm/emulate.c | 5 ----- 2 files changed, 2 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index c88161bed8df..836c56975e21 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -455,6 +455,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) case SPRN_PMC3_GEKKO: case SPRN_PMC4_GEKKO: case SPRN_WPAR_GEKKO: + case SPRN_MSSSR0: break; unprivileged: default: @@ -551,6 +552,7 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) case SPRN_PMC3_GEKKO: case SPRN_PMC4_GEKKO: case SPRN_WPAR_GEKKO: + case SPRN_MSSSR0: *spr_val = 0; break; default: diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index b0855e5d8905..71abcf4e2bda 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -149,8 +149,6 @@ static int kvmppc_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) case SPRN_TBWL: break; case SPRN_TBWU: break; - case SPRN_MSSSR0: break; - case SPRN_DEC: vcpu->arch.dec = spr_val; kvmppc_emulate_dec(vcpu); @@ -201,9 +199,6 @@ static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) case SPRN_PIR: spr_val = vcpu->vcpu_id; break; - case SPRN_MSSSR0: - spr_val = 0; - break; /* Note: mftb and TBRL/TBWL are user-accessible, so * the guest can always access the real TB anyways. -- cgit v1.2.3 From b8c649a99d582a6d8afd8457ba6145c624b8a76f Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 20 Dec 2012 04:52:39 +0000 Subject: KVM: PPC: BookE: Allow irq deliveries to inject requests When injecting an interrupt into guest context, we usually don't need to check for requests anymore. At least not until today. With the introduction of EPR, we will have to create a request when the guest has successfully accepted an external interrupt though. So we need to prepare the interrupt delivery to abort guest entry gracefully. Otherwise we'd delay the EPR request. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/booke.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 69f114015780..964f4475f55c 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -581,6 +581,11 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) kvmppc_core_check_exceptions(vcpu); + if (vcpu->requests) { + /* Exception delivery raised request; start over */ + return 1; + } + if (vcpu->arch.shared->msr & MSR_WE) { local_irq_enable(); kvm_vcpu_block(vcpu); -- cgit v1.2.3 From 37ecb257f68ce4fb7c7048a1123bbcbbe36d9575 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 4 Jan 2013 18:02:14 +0100 Subject: KVM: PPC: BookE: Emulate mfspr on EPR The EPR register is potentially valid for PR KVM as well, so we need to emulate accesses to it. It's only defined for reading, so only handle the mfspr case. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/booke_emulate.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c index 4685b8cf2249..27a4b2877c10 100644 --- a/arch/powerpc/kvm/booke_emulate.c +++ b/arch/powerpc/kvm/booke_emulate.c @@ -269,6 +269,9 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) case SPRN_ESR: *spr_val = vcpu->arch.shared->esr; break; + case SPRN_EPR: + *spr_val = vcpu->arch.epr; + break; case SPRN_CSRR0: *spr_val = vcpu->arch.csrr0; break; -- cgit v1.2.3 From 1c810636556c8d53a37406b34a64d9b9b0161aa6 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 4 Jan 2013 18:12:48 +0100 Subject: KVM: PPC: BookE: Implement EPR exit The External Proxy Facility in FSL BookE chips allows the interrupt controller to automatically acknowledge an interrupt as soon as a core gets its pending external interrupt delivered. Today, user space implements the interrupt controller, so we need to check on it during such a cycle. This patch implements logic for user space to enable EPR exiting, disable EPR exiting and EPR exiting itself, so that user space can acknowledge an interrupt when an external interrupt has successfully been delivered into the guest vcpu. Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/api.txt | 40 +++++++++++++++++++++++++++++++++++-- arch/powerpc/include/asm/kvm_host.h | 2 ++ arch/powerpc/include/asm/kvm_ppc.h | 9 +++++++++ arch/powerpc/kvm/booke.c | 14 ++++++++++++- arch/powerpc/kvm/powerpc.c | 10 ++++++++++ include/linux/kvm_host.h | 1 + include/uapi/linux/kvm.h | 6 ++++++ 7 files changed, 79 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kvm') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 4fc2bfcb16d5..a98ed09269d7 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2246,8 +2246,8 @@ executed a memory-mapped I/O instruction which could not be satisfied by kvm. The 'data' member contains the written data if 'is_write' is true, and should be filled by application code otherwise. -NOTE: For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_DCR - and KVM_EXIT_PAPR the corresponding +NOTE: For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_DCR, + KVM_EXIT_PAPR and KVM_EXIT_EPR the corresponding operations are complete (and guest state is consistent) only after userspace has re-entered the kernel with KVM_RUN. The kernel side will first finish incomplete operations and then check for pending signals. Userspace @@ -2366,6 +2366,25 @@ interrupt for the target subchannel has been dequeued and subchannel_id, subchannel_nr, io_int_parm and io_int_word contain the parameters for that interrupt. ipb is needed for instruction parameter decoding. + /* KVM_EXIT_EPR */ + struct { + __u32 epr; + } epr; + +On FSL BookE PowerPC chips, the interrupt controller has a fast patch +interrupt acknowledge path to the core. When the core successfully +delivers an interrupt, it automatically populates the EPR register with +the interrupt vector number and acknowledges the interrupt inside +the interrupt controller. + +In case the interrupt controller lives in user space, we need to do +the interrupt acknowledge cycle through it to fetch the next to be +delivered interrupt vector using this exit. + +It gets triggered whenever both KVM_CAP_PPC_EPR are enabled and an +external interrupt has just been delivered into the guest. User space +should put the acknowledged interrupt vector into the 'epr' field. + /* Fix the size of the union. */ char padding[256]; }; @@ -2501,3 +2520,20 @@ handled in-kernel, while the other I/O instructions are passed to userspace. When this capability is enabled, KVM_EXIT_S390_TSCH will occur on TEST SUBCHANNEL intercepts. + +6.5 KVM_CAP_PPC_EPR + +Architectures: ppc +Parameters: args[0] defines whether the proxy facility is active +Returns: 0 on success; -1 on error + +This capability enables or disables the delivery of interrupts through the +external proxy facility. + +When enabled (args[0] != 0), every time the guest gets an external interrupt +delivered, it automatically exits into user space with a KVM_EXIT_EPR exit +to receive the topmost interrupt vector. + +When disabled (args[0] == 0), behavior is as if this facility is unsupported. + +When this capability is enabled, KVM_EXIT_EPR can occur. diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index ab49c6cf891c..8a72d59467eb 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -520,6 +520,8 @@ struct kvm_vcpu_arch { u8 sane; u8 cpu_type; u8 hcall_needed; + u8 epr_enabled; + u8 epr_needed; u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 5f5f69abd281..493630e209c8 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -264,6 +264,15 @@ static inline void kvm_linear_init(void) {} #endif +static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr) +{ +#ifdef CONFIG_KVM_BOOKE_HV + mtspr(SPRN_GEPR, epr); +#elif defined(CONFIG_BOOKE) + vcpu->arch.epr = epr; +#endif +} + int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, struct kvm_config_tlb *cfg); int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu, diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 964f4475f55c..940ec806187e 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -306,7 +306,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, { int allowed = 0; ulong msr_mask = 0; - bool update_esr = false, update_dear = false; + bool update_esr = false, update_dear = false, update_epr = false; ulong crit_raw = vcpu->arch.shared->critical; ulong crit_r1 = kvmppc_get_gpr(vcpu, 1); bool crit; @@ -330,6 +330,9 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, keep_irq = true; } + if ((priority == BOOKE_IRQPRIO_EXTERNAL) && vcpu->arch.epr_enabled) + update_epr = true; + switch (priority) { case BOOKE_IRQPRIO_DTLB_MISS: case BOOKE_IRQPRIO_DATA_STORAGE: @@ -408,6 +411,8 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, set_guest_esr(vcpu, vcpu->arch.queued_esr); if (update_dear == true) set_guest_dear(vcpu, vcpu->arch.queued_dear); + if (update_epr == true) + kvm_make_request(KVM_REQ_EPR_EXIT, vcpu); new_msr &= msr_mask; #if defined(CONFIG_64BIT) @@ -615,6 +620,13 @@ int kvmppc_core_check_requests(struct kvm_vcpu *vcpu) r = 0; } + if (kvm_check_request(KVM_REQ_EPR_EXIT, vcpu)) { + vcpu->run->epr.epr = 0; + vcpu->arch.epr_needed = true; + vcpu->run->exit_reason = KVM_EXIT_EPR; + r = 0; + } + return r; } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index e2225e5b8a4c..934413cd3a1b 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -306,6 +306,7 @@ int kvm_dev_ioctl_check_extension(long ext) #ifdef CONFIG_BOOKE case KVM_CAP_PPC_BOOKE_SREGS: case KVM_CAP_PPC_BOOKE_WATCHDOG: + case KVM_CAP_PPC_EPR: #else case KVM_CAP_PPC_SEGSTATE: case KVM_CAP_PPC_HIOR: @@ -721,6 +722,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) for (i = 0; i < 9; ++i) kvmppc_set_gpr(vcpu, 4 + i, run->papr_hcall.args[i]); vcpu->arch.hcall_needed = 0; +#ifdef CONFIG_BOOKE + } else if (vcpu->arch.epr_needed) { + kvmppc_set_epr(vcpu, run->epr.epr); + vcpu->arch.epr_needed = 0; +#endif } r = kvmppc_vcpu_run(run, vcpu); @@ -762,6 +768,10 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, r = 0; vcpu->arch.papr_enabled = true; break; + case KVM_CAP_PPC_EPR: + r = 0; + vcpu->arch.epr_enabled = cap->args[0]; + break; #ifdef CONFIG_BOOKE case KVM_CAP_PPC_BOOKE_WATCHDOG: r = 0; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index cbe0d683e2e5..4dd7d7531e69 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -122,6 +122,7 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_WATCHDOG 18 #define KVM_REQ_MASTERCLOCK_UPDATE 19 #define KVM_REQ_MCLOCK_INPROGRESS 20 +#define KVM_REQ_EPR_EXIT 21 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 8bb0bf83afc5..9a2db5767ed5 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -169,6 +169,7 @@ struct kvm_pit_config { #define KVM_EXIT_S390_UCONTROL 20 #define KVM_EXIT_WATCHDOG 21 #define KVM_EXIT_S390_TSCH 22 +#define KVM_EXIT_EPR 23 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -295,6 +296,10 @@ struct kvm_run { __u32 ipb; __u8 dequeued; } s390_tsch; + /* KVM_EXIT_EPR */ + struct { + __u32 epr; + } epr; /* Fix the size of the union. */ char padding[256]; }; @@ -656,6 +661,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_PPC_BOOKE_WATCHDOG 83 #define KVM_CAP_PPC_HTAB_FD 84 #define KVM_CAP_S390_CSS_SUPPORT 85 +#define KVM_CAP_PPC_EPR 86 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From 324b3e63167bce69e6622c2be182595790bf7e38 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 4 Jan 2013 18:28:51 +0100 Subject: KVM: PPC: BookE: Add EPR ONE_REG sync We need to be able to read and write the contents of the EPR register from user space. This patch implements that logic through the ONE_REG API and declares its (never implemented) SREGS counterpart as deprecated. Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/api.txt | 1 + arch/powerpc/include/uapi/asm/kvm.h | 6 +++++- arch/powerpc/kvm/booke.c | 21 +++++++++++++++++++++ 3 files changed, 27 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kvm') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index a98ed09269d7..09905cbcbb0b 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1774,6 +1774,7 @@ registers, find a list below: PPC | KVM_REG_PPC_VPA_SLB | 128 PPC | KVM_REG_PPC_VPA_DTL | 128 PPC | KVM_REG_PPC_EPCR | 32 + PPC | KVM_REG_PPC_EPR | 32 4.69 KVM_GET_ONE_REG diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 2fba8a66fb10..16064d00adb9 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -114,7 +114,10 @@ struct kvm_regs { /* Embedded Floating Point (SPE) -- IVOR32-34 if KVM_SREGS_E_IVOR */ #define KVM_SREGS_E_SPE (1 << 9) -/* External Proxy (EXP) -- EPR */ +/* + * DEPRECATED! USE ONE_REG FOR THIS ONE! + * External Proxy (EXP) -- EPR + */ #define KVM_SREGS_EXP (1 << 10) /* External PID (E.PD) -- EPSC/EPLC */ @@ -412,5 +415,6 @@ struct kvm_get_htab_header { #define KVM_REG_PPC_VPA_DTL (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x84) #define KVM_REG_PPC_EPCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x85) +#define KVM_REG_PPC_EPR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x86) #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 940ec806187e..8779cd4c52d9 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -300,6 +300,15 @@ static void set_guest_esr(struct kvm_vcpu *vcpu, u32 esr) #endif } +static unsigned long get_guest_epr(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_KVM_BOOKE_HV + return mfspr(SPRN_GEPR); +#else + return vcpu->arch.epr; +#endif +} + /* Deliver the interrupt of the corresponding priority, if possible. */ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) @@ -1405,6 +1414,11 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) &vcpu->arch.dbg_reg.dac[dac], sizeof(u64)); break; } + case KVM_REG_PPC_EPR: { + u32 epr = get_guest_epr(vcpu); + r = put_user(epr, (u32 __user *)(long)reg->addr); + break; + } #if defined(CONFIG_64BIT) case KVM_REG_PPC_EPCR: r = put_user(vcpu->arch.epcr, (u32 __user *)(long)reg->addr); @@ -1437,6 +1451,13 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) (u64 __user *)(long)reg->addr, sizeof(u64)); break; } + case KVM_REG_PPC_EPR: { + u32 new_epr; + r = get_user(new_epr, (u32 __user *)(long)reg->addr); + if (!r) + kvmppc_set_epr(vcpu, new_epr); + break; + } #if defined(CONFIG_64BIT) case KVM_REG_PPC_EPCR: { u32 new_epcr; -- cgit v1.2.3 From 9445ef0181ebe0b74c3f2a23191a6f3d9a92b14b Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 18 Jan 2013 02:25:23 +0100 Subject: KVM: PPC: E500: Move write_stlbe higher Later patches want to call the function and it doesn't have dependencies on anything below write_host_tlbe. Move it higher up in the file. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/e500_tlb.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index cf3f18012371..d38ad63fdcb9 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -156,6 +156,22 @@ static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500, } } +/* sesel is for tlb1 only */ +static void write_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500, + struct kvm_book3e_206_tlb_entry *gtlbe, + struct kvm_book3e_206_tlb_entry *stlbe, + int stlbsel, int sesel) +{ + int stid; + + preempt_disable(); + stid = kvmppc_e500_get_tlb_stid(&vcpu_e500->vcpu, gtlbe); + + stlbe->mas1 |= MAS1_TID(stid); + write_host_tlbe(vcpu_e500, stlbsel, sesel, stlbe); + preempt_enable(); +} + #ifdef CONFIG_KVM_E500V2 void kvmppc_map_magic(struct kvm_vcpu *vcpu) { @@ -834,22 +850,6 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, gva_t ea) return EMULATE_DONE; } -/* sesel is for tlb1 only */ -static void write_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500, - struct kvm_book3e_206_tlb_entry *gtlbe, - struct kvm_book3e_206_tlb_entry *stlbe, - int stlbsel, int sesel) -{ - int stid; - - preempt_disable(); - stid = kvmppc_e500_get_tlb_stid(&vcpu_e500->vcpu, gtlbe); - - stlbe->mas1 |= MAS1_TID(stid); - write_host_tlbe(vcpu_e500, stlbsel, sesel, stlbe); - preempt_enable(); -} - int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); -- cgit v1.2.3 From 523f0e5421c12610527c620b983b443f329e3a32 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 18 Jan 2013 02:27:14 +0100 Subject: KVM: PPC: E500: Explicitly mark shadow maps invalid When we invalidate shadow TLB maps on the host, we don't mark them as not valid. But we should. Fix this by removing the E500_TLB_VALID from their flags when invalidating. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/e500_tlb.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index d38ad63fdcb9..8efb2acee2bf 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -204,9 +204,13 @@ static void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500, { struct kvm_book3e_206_tlb_entry *gtlbe = get_entry(vcpu_e500, tlbsel, esel); + struct tlbe_ref *ref = &vcpu_e500->gtlb_priv[tlbsel][esel].ref; - if (tlbsel == 1 && - vcpu_e500->gtlb_priv[1][esel].ref.flags & E500_TLB_BITMAP) { + /* Don't bother with unmapped entries */ + if (!(ref->flags & E500_TLB_VALID)) + return; + + if (tlbsel == 1 && ref->flags & E500_TLB_BITMAP) { u64 tmp = vcpu_e500->g2h_tlb1_map[esel]; int hw_tlb_indx; unsigned long flags; @@ -224,7 +228,7 @@ static void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500, } mb(); vcpu_e500->g2h_tlb1_map[esel] = 0; - vcpu_e500->gtlb_priv[1][esel].ref.flags &= ~E500_TLB_BITMAP; + ref->flags &= ~(E500_TLB_BITMAP | E500_TLB_VALID); local_irq_restore(flags); return; @@ -232,6 +236,9 @@ static void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500, /* Guest tlbe is backed by at most one host tlbe per shadow pid. */ kvmppc_e500_tlbil_one(vcpu_e500, gtlbe); + + /* Mark the TLB as not backed by the host anymore */ + ref->flags &= ~E500_TLB_VALID; } static int tlb0_set_base(gva_t addr, int sets, int ways) -- cgit v1.2.3 From 2c378fd779d2b37aed64cb44caa607707edc51d3 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 18 Jan 2013 02:31:01 +0100 Subject: KVM: PPC: E500: Propagate errors when shadow mapping When shadow mapping a page, mapping this page can fail. In that case we don't have a shadow map. Take this case into account, otherwise we might end up writing bogus TLB entries into the host TLB. While at it, also move the write_stlbe() calls into the respective TLBn handlers. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/e500_tlb.c | 69 +++++++++++++++++++++++++++------------------ 1 file changed, 41 insertions(+), 28 deletions(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index 8efb2acee2bf..3777167e5f31 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -432,7 +432,7 @@ static inline void kvmppc_e500_setup_stlbe( #endif } -static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, +static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe, int tlbsel, struct kvm_book3e_206_tlb_entry *stlbe, struct tlbe_ref *ref) @@ -551,7 +551,7 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, if (is_error_noslot_pfn(pfn)) { printk(KERN_ERR "Couldn't get real page for gfn %lx!\n", (long)gfn); - return; + return -EINVAL; } /* Align guest and physical address to page map boundaries */ @@ -571,22 +571,33 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, /* Drop refcount on page, so that mmu notifiers can clear it */ kvm_release_pfn_clean(pfn); + + return 0; } /* XXX only map the one-one case, for now use TLB0 */ -static void kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500, - int esel, - struct kvm_book3e_206_tlb_entry *stlbe) +static int kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500, + int esel, + struct kvm_book3e_206_tlb_entry *stlbe) { struct kvm_book3e_206_tlb_entry *gtlbe; struct tlbe_ref *ref; + int stlbsel = 0; + int sesel = 0; + int r; gtlbe = get_entry(vcpu_e500, 0, esel); ref = &vcpu_e500->gtlb_priv[0][esel].ref; - kvmppc_e500_shadow_map(vcpu_e500, get_tlb_eaddr(gtlbe), + r = kvmppc_e500_shadow_map(vcpu_e500, get_tlb_eaddr(gtlbe), get_tlb_raddr(gtlbe) >> PAGE_SHIFT, gtlbe, 0, stlbe, ref); + if (r) + return r; + + write_stlbe(vcpu_e500, gtlbe, stlbe, stlbsel, sesel); + + return 0; } /* Caller must ensure that the specified guest TLB entry is safe to insert into @@ -597,25 +608,32 @@ static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500, struct kvm_book3e_206_tlb_entry *stlbe, int esel) { struct tlbe_ref *ref; - unsigned int victim; + unsigned int sesel; + int r; + int stlbsel = 1; - victim = vcpu_e500->host_tlb1_nv++; + sesel = vcpu_e500->host_tlb1_nv++; if (unlikely(vcpu_e500->host_tlb1_nv >= tlb1_max_shadow_size())) vcpu_e500->host_tlb1_nv = 0; - ref = &vcpu_e500->tlb_refs[1][victim]; - kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, stlbe, ref); + ref = &vcpu_e500->tlb_refs[1][sesel]; + r = kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, stlbe, + ref); + if (r) + return r; - vcpu_e500->g2h_tlb1_map[esel] |= (u64)1 << victim; + vcpu_e500->g2h_tlb1_map[esel] |= (u64)1 << sesel; vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_BITMAP; - if (vcpu_e500->h2g_tlb1_rmap[victim]) { - unsigned int idx = vcpu_e500->h2g_tlb1_rmap[victim]; - vcpu_e500->g2h_tlb1_map[idx] &= ~(1ULL << victim); + if (vcpu_e500->h2g_tlb1_rmap[sesel]) { + unsigned int idx = vcpu_e500->h2g_tlb1_rmap[sesel]; + vcpu_e500->g2h_tlb1_map[idx] &= ~(1ULL << sesel); } - vcpu_e500->h2g_tlb1_rmap[victim] = esel; + vcpu_e500->h2g_tlb1_rmap[sesel] = esel; - return victim; + write_stlbe(vcpu_e500, gtlbe, stlbe, stlbsel, sesel); + + return 0; } static void kvmppc_recalc_tlb1map_range(struct kvmppc_vcpu_e500 *vcpu_e500) @@ -1034,30 +1052,27 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, struct kvm_book3e_206_tlb_entry *gtlbe, stlbe; int tlbsel = tlbsel_of(index); int esel = esel_of(index); - int stlbsel, sesel; gtlbe = get_entry(vcpu_e500, tlbsel, esel); switch (tlbsel) { case 0: - stlbsel = 0; - sesel = 0; /* unused */ priv = &vcpu_e500->gtlb_priv[tlbsel][esel]; - /* Only triggers after clear_tlb_refs */ - if (unlikely(!(priv->ref.flags & E500_TLB_VALID))) + /* Triggers after clear_tlb_refs or on initial mapping */ + if (!(priv->ref.flags & E500_TLB_VALID)) { kvmppc_e500_tlb0_map(vcpu_e500, esel, &stlbe); - else + } else { kvmppc_e500_setup_stlbe(vcpu, gtlbe, BOOK3E_PAGESZ_4K, &priv->ref, eaddr, &stlbe); + write_stlbe(vcpu_e500, gtlbe, &stlbe, 0, 0); + } break; case 1: { gfn_t gfn = gpaddr >> PAGE_SHIFT; - - stlbsel = 1; - sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, gfn, - gtlbe, &stlbe, esel); + kvmppc_e500_tlb1_map(vcpu_e500, eaddr, gfn, gtlbe, &stlbe, + esel); break; } @@ -1065,8 +1080,6 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, BUG(); break; } - - write_stlbe(vcpu_e500, gtlbe, &stlbe, stlbsel, sesel); } /************* MMU Notifiers *************/ -- cgit v1.2.3 From 9d98b3ff949dab3bafa2c50856ce9e1f88497f9a Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 17 Jan 2013 19:23:28 +0100 Subject: KVM: PPC: e500: Call kvmppc_mmu_map for initial mapping When emulating tlbwe, we want to automatically map the entry that just got written in our shadow TLB map, because chances are quite high that it's going to be used very soon. Today this happens explicitly, duplicating all the logic that is in kvmppc_mmu_map() already. Just call that one instead. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/e500_tlb.c | 38 +++++++------------------------------- 1 file changed, 7 insertions(+), 31 deletions(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index 3777167e5f31..48d1a4f1f5ff 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -878,8 +878,8 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, gva_t ea) int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - struct kvm_book3e_206_tlb_entry *gtlbe, stlbe; - int tlbsel, esel, stlbsel, sesel; + struct kvm_book3e_206_tlb_entry *gtlbe; + int tlbsel, esel; int recal = 0; tlbsel = get_tlb_tlbsel(vcpu); @@ -917,40 +917,16 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */ if (tlbe_is_host_safe(vcpu, gtlbe)) { - u64 eaddr; - u64 raddr; + u64 eaddr = get_tlb_eaddr(gtlbe); + u64 raddr = get_tlb_raddr(gtlbe); - switch (tlbsel) { - case 0: - /* TLB0 */ + if (tlbsel == 0) { gtlbe->mas1 &= ~MAS1_TSIZE(~0); gtlbe->mas1 |= MAS1_TSIZE(BOOK3E_PAGESZ_4K); - - stlbsel = 0; - kvmppc_e500_tlb0_map(vcpu_e500, esel, &stlbe); - sesel = 0; /* unused */ - - break; - - case 1: - /* TLB1 */ - eaddr = get_tlb_eaddr(gtlbe); - raddr = get_tlb_raddr(gtlbe); - - /* Create a 4KB mapping on the host. - * If the guest wanted a large page, - * only the first 4KB is mapped here and the rest - * are mapped on the fly. */ - stlbsel = 1; - sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, - raddr >> PAGE_SHIFT, gtlbe, &stlbe, esel); - break; - - default: - BUG(); } - write_stlbe(vcpu_e500, gtlbe, &stlbe, stlbsel, sesel); + /* Premap the faulting page */ + kvmppc_mmu_map(vcpu, eaddr, raddr, index_of(tlbsel, esel)); } kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS); -- cgit v1.2.3 From b71c9e2fb72cf538aadbc59ea719639a1e2191fa Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 11 Jan 2013 15:22:45 +0100 Subject: KVM: PPC: E500: Split host and guest MMU parts This patch splits the file e500_tlb.c into e500_mmu.c (guest TLB handling) and e500_mmu_host.c (host TLB handling). The main benefit of this split is readability and maintainability. It's just a lot harder to write dirty code :). Signed-off-by: Alexander Graf --- arch/powerpc/kvm/Makefile | 9 +- arch/powerpc/kvm/e500_mmu.c | 811 ++++++++++++++++++++++ arch/powerpc/kvm/e500_mmu_host.c | 672 ++++++++++++++++++ arch/powerpc/kvm/e500_mmu_host.h | 20 + arch/powerpc/kvm/e500_tlb.c | 1426 -------------------------------------- 5 files changed, 1509 insertions(+), 1429 deletions(-) create mode 100644 arch/powerpc/kvm/e500_mmu.c create mode 100644 arch/powerpc/kvm/e500_mmu_host.c create mode 100644 arch/powerpc/kvm/e500_mmu_host.h delete mode 100644 arch/powerpc/kvm/e500_tlb.c (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 1e473d46322c..b772eded8c26 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -10,7 +10,8 @@ common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o \ eventfd.o) CFLAGS_44x_tlb.o := -I. -CFLAGS_e500_tlb.o := -I. +CFLAGS_e500_mmu.o := -I. +CFLAGS_e500_mmu_host.o := -I. CFLAGS_emulate.o := -I. common-objs-y += powerpc.o emulate.o @@ -35,7 +36,8 @@ kvm-e500-objs := \ booke_emulate.o \ booke_interrupts.o \ e500.o \ - e500_tlb.o \ + e500_mmu.o \ + e500_mmu_host.o \ e500_emulate.o kvm-objs-$(CONFIG_KVM_E500V2) := $(kvm-e500-objs) @@ -45,7 +47,8 @@ kvm-e500mc-objs := \ booke_emulate.o \ bookehv_interrupts.o \ e500mc.o \ - e500_tlb.o \ + e500_mmu.o \ + e500_mmu_host.o \ e500_emulate.o kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs) diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c new file mode 100644 index 000000000000..c3d1721aa1b8 --- /dev/null +++ b/arch/powerpc/kvm/e500_mmu.c @@ -0,0 +1,811 @@ +/* + * Copyright (C) 2008-2013 Freescale Semiconductor, Inc. All rights reserved. + * + * Author: Yu Liu, yu.liu@freescale.com + * Scott Wood, scottwood@freescale.com + * Ashish Kalra, ashish.kalra@freescale.com + * Varun Sethi, varun.sethi@freescale.com + * Alexander Graf, agraf@suse.de + * + * Description: + * This file is based on arch/powerpc/kvm/44x_tlb.c, + * by Hollis Blanchard . + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "e500.h" +#include "trace.h" +#include "timing.h" +#include "e500_mmu_host.h" + +static inline unsigned int gtlb0_get_next_victim( + struct kvmppc_vcpu_e500 *vcpu_e500) +{ + unsigned int victim; + + victim = vcpu_e500->gtlb_nv[0]++; + if (unlikely(vcpu_e500->gtlb_nv[0] >= vcpu_e500->gtlb_params[0].ways)) + vcpu_e500->gtlb_nv[0] = 0; + + return victim; +} + +static int tlb0_set_base(gva_t addr, int sets, int ways) +{ + int set_base; + + set_base = (addr >> PAGE_SHIFT) & (sets - 1); + set_base *= ways; + + return set_base; +} + +static int gtlb0_set_base(struct kvmppc_vcpu_e500 *vcpu_e500, gva_t addr) +{ + return tlb0_set_base(addr, vcpu_e500->gtlb_params[0].sets, + vcpu_e500->gtlb_params[0].ways); +} + +static unsigned int get_tlb_esel(struct kvm_vcpu *vcpu, int tlbsel) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + int esel = get_tlb_esel_bit(vcpu); + + if (tlbsel == 0) { + esel &= vcpu_e500->gtlb_params[0].ways - 1; + esel += gtlb0_set_base(vcpu_e500, vcpu->arch.shared->mas2); + } else { + esel &= vcpu_e500->gtlb_params[tlbsel].entries - 1; + } + + return esel; +} + +/* Search the guest TLB for a matching entry. */ +static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500, + gva_t eaddr, int tlbsel, unsigned int pid, int as) +{ + int size = vcpu_e500->gtlb_params[tlbsel].entries; + unsigned int set_base, offset; + int i; + + if (tlbsel == 0) { + set_base = gtlb0_set_base(vcpu_e500, eaddr); + size = vcpu_e500->gtlb_params[0].ways; + } else { + if (eaddr < vcpu_e500->tlb1_min_eaddr || + eaddr > vcpu_e500->tlb1_max_eaddr) + return -1; + set_base = 0; + } + + offset = vcpu_e500->gtlb_offset[tlbsel]; + + for (i = 0; i < size; i++) { + struct kvm_book3e_206_tlb_entry *tlbe = + &vcpu_e500->gtlb_arch[offset + set_base + i]; + unsigned int tid; + + if (eaddr < get_tlb_eaddr(tlbe)) + continue; + + if (eaddr > get_tlb_end(tlbe)) + continue; + + tid = get_tlb_tid(tlbe); + if (tid && (tid != pid)) + continue; + + if (!get_tlb_v(tlbe)) + continue; + + if (get_tlb_ts(tlbe) != as && as != -1) + continue; + + return set_base + i; + } + + return -1; +} + +static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu, + unsigned int eaddr, int as) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + unsigned int victim, tsized; + int tlbsel; + + /* since we only have two TLBs, only lower bit is used. */ + tlbsel = (vcpu->arch.shared->mas4 >> 28) & 0x1; + victim = (tlbsel == 0) ? gtlb0_get_next_victim(vcpu_e500) : 0; + tsized = (vcpu->arch.shared->mas4 >> 7) & 0x1f; + + vcpu->arch.shared->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim) + | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); + vcpu->arch.shared->mas1 = MAS1_VALID | (as ? MAS1_TS : 0) + | MAS1_TID(get_tlbmiss_tid(vcpu)) + | MAS1_TSIZE(tsized); + vcpu->arch.shared->mas2 = (eaddr & MAS2_EPN) + | (vcpu->arch.shared->mas4 & MAS2_ATTRIB_MASK); + vcpu->arch.shared->mas7_3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3; + vcpu->arch.shared->mas6 = (vcpu->arch.shared->mas6 & MAS6_SPID1) + | (get_cur_pid(vcpu) << 16) + | (as ? MAS6_SAS : 0); +} + +static void kvmppc_recalc_tlb1map_range(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + int size = vcpu_e500->gtlb_params[1].entries; + unsigned int offset; + gva_t eaddr; + int i; + + vcpu_e500->tlb1_min_eaddr = ~0UL; + vcpu_e500->tlb1_max_eaddr = 0; + offset = vcpu_e500->gtlb_offset[1]; + + for (i = 0; i < size; i++) { + struct kvm_book3e_206_tlb_entry *tlbe = + &vcpu_e500->gtlb_arch[offset + i]; + + if (!get_tlb_v(tlbe)) + continue; + + eaddr = get_tlb_eaddr(tlbe); + vcpu_e500->tlb1_min_eaddr = + min(vcpu_e500->tlb1_min_eaddr, eaddr); + + eaddr = get_tlb_end(tlbe); + vcpu_e500->tlb1_max_eaddr = + max(vcpu_e500->tlb1_max_eaddr, eaddr); + } +} + +static int kvmppc_need_recalc_tlb1map_range(struct kvmppc_vcpu_e500 *vcpu_e500, + struct kvm_book3e_206_tlb_entry *gtlbe) +{ + unsigned long start, end, size; + + size = get_tlb_bytes(gtlbe); + start = get_tlb_eaddr(gtlbe) & ~(size - 1); + end = start + size - 1; + + return vcpu_e500->tlb1_min_eaddr == start || + vcpu_e500->tlb1_max_eaddr == end; +} + +/* This function is supposed to be called for a adding a new valid tlb entry */ +static void kvmppc_set_tlb1map_range(struct kvm_vcpu *vcpu, + struct kvm_book3e_206_tlb_entry *gtlbe) +{ + unsigned long start, end, size; + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + if (!get_tlb_v(gtlbe)) + return; + + size = get_tlb_bytes(gtlbe); + start = get_tlb_eaddr(gtlbe) & ~(size - 1); + end = start + size - 1; + + vcpu_e500->tlb1_min_eaddr = min(vcpu_e500->tlb1_min_eaddr, start); + vcpu_e500->tlb1_max_eaddr = max(vcpu_e500->tlb1_max_eaddr, end); +} + +static inline int kvmppc_e500_gtlbe_invalidate( + struct kvmppc_vcpu_e500 *vcpu_e500, + int tlbsel, int esel) +{ + struct kvm_book3e_206_tlb_entry *gtlbe = + get_entry(vcpu_e500, tlbsel, esel); + + if (unlikely(get_tlb_iprot(gtlbe))) + return -1; + + if (tlbsel == 1 && kvmppc_need_recalc_tlb1map_range(vcpu_e500, gtlbe)) + kvmppc_recalc_tlb1map_range(vcpu_e500); + + gtlbe->mas1 = 0; + + return 0; +} + +int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500, ulong value) +{ + int esel; + + if (value & MMUCSR0_TLB0FI) + for (esel = 0; esel < vcpu_e500->gtlb_params[0].entries; esel++) + kvmppc_e500_gtlbe_invalidate(vcpu_e500, 0, esel); + if (value & MMUCSR0_TLB1FI) + for (esel = 0; esel < vcpu_e500->gtlb_params[1].entries; esel++) + kvmppc_e500_gtlbe_invalidate(vcpu_e500, 1, esel); + + /* Invalidate all vcpu id mappings */ + kvmppc_e500_tlbil_all(vcpu_e500); + + return EMULATE_DONE; +} + +int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, gva_t ea) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + unsigned int ia; + int esel, tlbsel; + + ia = (ea >> 2) & 0x1; + + /* since we only have two TLBs, only lower bit is used. */ + tlbsel = (ea >> 3) & 0x1; + + if (ia) { + /* invalidate all entries */ + for (esel = 0; esel < vcpu_e500->gtlb_params[tlbsel].entries; + esel++) + kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel); + } else { + ea &= 0xfffff000; + esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, + get_cur_pid(vcpu), -1); + if (esel >= 0) + kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel); + } + + /* Invalidate all vcpu id mappings */ + kvmppc_e500_tlbil_all(vcpu_e500); + + return EMULATE_DONE; +} + +static void tlbilx_all(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, + int pid, int type) +{ + struct kvm_book3e_206_tlb_entry *tlbe; + int tid, esel; + + /* invalidate all entries */ + for (esel = 0; esel < vcpu_e500->gtlb_params[tlbsel].entries; esel++) { + tlbe = get_entry(vcpu_e500, tlbsel, esel); + tid = get_tlb_tid(tlbe); + if (type == 0 || tid == pid) { + inval_gtlbe_on_host(vcpu_e500, tlbsel, esel); + kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel); + } + } +} + +static void tlbilx_one(struct kvmppc_vcpu_e500 *vcpu_e500, int pid, + gva_t ea) +{ + int tlbsel, esel; + + for (tlbsel = 0; tlbsel < 2; tlbsel++) { + esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, -1); + if (esel >= 0) { + inval_gtlbe_on_host(vcpu_e500, tlbsel, esel); + kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel); + break; + } + } +} + +int kvmppc_e500_emul_tlbilx(struct kvm_vcpu *vcpu, int type, gva_t ea) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + int pid = get_cur_spid(vcpu); + + if (type == 0 || type == 1) { + tlbilx_all(vcpu_e500, 0, pid, type); + tlbilx_all(vcpu_e500, 1, pid, type); + } else if (type == 3) { + tlbilx_one(vcpu_e500, pid, ea); + } + + return EMULATE_DONE; +} + +int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + int tlbsel, esel; + struct kvm_book3e_206_tlb_entry *gtlbe; + + tlbsel = get_tlb_tlbsel(vcpu); + esel = get_tlb_esel(vcpu, tlbsel); + + gtlbe = get_entry(vcpu_e500, tlbsel, esel); + vcpu->arch.shared->mas0 &= ~MAS0_NV(~0); + vcpu->arch.shared->mas0 |= MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); + vcpu->arch.shared->mas1 = gtlbe->mas1; + vcpu->arch.shared->mas2 = gtlbe->mas2; + vcpu->arch.shared->mas7_3 = gtlbe->mas7_3; + + return EMULATE_DONE; +} + +int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, gva_t ea) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + int as = !!get_cur_sas(vcpu); + unsigned int pid = get_cur_spid(vcpu); + int esel, tlbsel; + struct kvm_book3e_206_tlb_entry *gtlbe = NULL; + + for (tlbsel = 0; tlbsel < 2; tlbsel++) { + esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as); + if (esel >= 0) { + gtlbe = get_entry(vcpu_e500, tlbsel, esel); + break; + } + } + + if (gtlbe) { + esel &= vcpu_e500->gtlb_params[tlbsel].ways - 1; + + vcpu->arch.shared->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(esel) + | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); + vcpu->arch.shared->mas1 = gtlbe->mas1; + vcpu->arch.shared->mas2 = gtlbe->mas2; + vcpu->arch.shared->mas7_3 = gtlbe->mas7_3; + } else { + int victim; + + /* since we only have two TLBs, only lower bit is used. */ + tlbsel = vcpu->arch.shared->mas4 >> 28 & 0x1; + victim = (tlbsel == 0) ? gtlb0_get_next_victim(vcpu_e500) : 0; + + vcpu->arch.shared->mas0 = MAS0_TLBSEL(tlbsel) + | MAS0_ESEL(victim) + | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); + vcpu->arch.shared->mas1 = + (vcpu->arch.shared->mas6 & MAS6_SPID0) + | (vcpu->arch.shared->mas6 & (MAS6_SAS ? MAS1_TS : 0)) + | (vcpu->arch.shared->mas4 & MAS4_TSIZED(~0)); + vcpu->arch.shared->mas2 &= MAS2_EPN; + vcpu->arch.shared->mas2 |= vcpu->arch.shared->mas4 & + MAS2_ATTRIB_MASK; + vcpu->arch.shared->mas7_3 &= MAS3_U0 | MAS3_U1 | + MAS3_U2 | MAS3_U3; + } + + kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS); + return EMULATE_DONE; +} + +int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + struct kvm_book3e_206_tlb_entry *gtlbe; + int tlbsel, esel; + int recal = 0; + + tlbsel = get_tlb_tlbsel(vcpu); + esel = get_tlb_esel(vcpu, tlbsel); + + gtlbe = get_entry(vcpu_e500, tlbsel, esel); + + if (get_tlb_v(gtlbe)) { + inval_gtlbe_on_host(vcpu_e500, tlbsel, esel); + if ((tlbsel == 1) && + kvmppc_need_recalc_tlb1map_range(vcpu_e500, gtlbe)) + recal = 1; + } + + gtlbe->mas1 = vcpu->arch.shared->mas1; + gtlbe->mas2 = vcpu->arch.shared->mas2; + if (!(vcpu->arch.shared->msr & MSR_CM)) + gtlbe->mas2 &= 0xffffffffUL; + gtlbe->mas7_3 = vcpu->arch.shared->mas7_3; + + trace_kvm_booke206_gtlb_write(vcpu->arch.shared->mas0, gtlbe->mas1, + gtlbe->mas2, gtlbe->mas7_3); + + if (tlbsel == 1) { + /* + * If a valid tlb1 entry is overwritten then recalculate the + * min/max TLB1 map address range otherwise no need to look + * in tlb1 array. + */ + if (recal) + kvmppc_recalc_tlb1map_range(vcpu_e500); + else + kvmppc_set_tlb1map_range(vcpu, gtlbe); + } + + /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */ + if (tlbe_is_host_safe(vcpu, gtlbe)) { + u64 eaddr = get_tlb_eaddr(gtlbe); + u64 raddr = get_tlb_raddr(gtlbe); + + if (tlbsel == 0) { + gtlbe->mas1 &= ~MAS1_TSIZE(~0); + gtlbe->mas1 |= MAS1_TSIZE(BOOK3E_PAGESZ_4K); + } + + /* Premap the faulting page */ + kvmppc_mmu_map(vcpu, eaddr, raddr, index_of(tlbsel, esel)); + } + + kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS); + return EMULATE_DONE; +} + +static int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu, + gva_t eaddr, unsigned int pid, int as) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + int esel, tlbsel; + + for (tlbsel = 0; tlbsel < 2; tlbsel++) { + esel = kvmppc_e500_tlb_index(vcpu_e500, eaddr, tlbsel, pid, as); + if (esel >= 0) + return index_of(tlbsel, esel); + } + + return -1; +} + +/* 'linear_address' is actually an encoding of AS|PID|EADDR . */ +int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, + struct kvm_translation *tr) +{ + int index; + gva_t eaddr; + u8 pid; + u8 as; + + eaddr = tr->linear_address; + pid = (tr->linear_address >> 32) & 0xff; + as = (tr->linear_address >> 40) & 0x1; + + index = kvmppc_e500_tlb_search(vcpu, eaddr, pid, as); + if (index < 0) { + tr->valid = 0; + return 0; + } + + tr->physical_address = kvmppc_mmu_xlate(vcpu, index, eaddr); + /* XXX what does "writeable" and "usermode" even mean? */ + tr->valid = 1; + + return 0; +} + + +int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) +{ + unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS); + + return kvmppc_e500_tlb_search(vcpu, eaddr, get_cur_pid(vcpu), as); +} + +int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) +{ + unsigned int as = !!(vcpu->arch.shared->msr & MSR_DS); + + return kvmppc_e500_tlb_search(vcpu, eaddr, get_cur_pid(vcpu), as); +} + +void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu) +{ + unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS); + + kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.pc, as); +} + +void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu) +{ + unsigned int as = !!(vcpu->arch.shared->msr & MSR_DS); + + kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.fault_dear, as); +} + +gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int index, + gva_t eaddr) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + struct kvm_book3e_206_tlb_entry *gtlbe; + u64 pgmask; + + gtlbe = get_entry(vcpu_e500, tlbsel_of(index), esel_of(index)); + pgmask = get_tlb_bytes(gtlbe) - 1; + + return get_tlb_raddr(gtlbe) | (eaddr & pgmask); +} + +void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) +{ +} + +/*****************************************/ + +static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + int i; + + clear_tlb1_bitmap(vcpu_e500); + kfree(vcpu_e500->g2h_tlb1_map); + + clear_tlb_refs(vcpu_e500); + kfree(vcpu_e500->gtlb_priv[0]); + kfree(vcpu_e500->gtlb_priv[1]); + + if (vcpu_e500->shared_tlb_pages) { + vfree((void *)(round_down((uintptr_t)vcpu_e500->gtlb_arch, + PAGE_SIZE))); + + for (i = 0; i < vcpu_e500->num_shared_tlb_pages; i++) { + set_page_dirty_lock(vcpu_e500->shared_tlb_pages[i]); + put_page(vcpu_e500->shared_tlb_pages[i]); + } + + vcpu_e500->num_shared_tlb_pages = 0; + + kfree(vcpu_e500->shared_tlb_pages); + vcpu_e500->shared_tlb_pages = NULL; + } else { + kfree(vcpu_e500->gtlb_arch); + } + + vcpu_e500->gtlb_arch = NULL; +} + +void kvmppc_get_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + sregs->u.e.mas0 = vcpu->arch.shared->mas0; + sregs->u.e.mas1 = vcpu->arch.shared->mas1; + sregs->u.e.mas2 = vcpu->arch.shared->mas2; + sregs->u.e.mas7_3 = vcpu->arch.shared->mas7_3; + sregs->u.e.mas4 = vcpu->arch.shared->mas4; + sregs->u.e.mas6 = vcpu->arch.shared->mas6; + + sregs->u.e.mmucfg = vcpu->arch.mmucfg; + sregs->u.e.tlbcfg[0] = vcpu->arch.tlbcfg[0]; + sregs->u.e.tlbcfg[1] = vcpu->arch.tlbcfg[1]; + sregs->u.e.tlbcfg[2] = 0; + sregs->u.e.tlbcfg[3] = 0; +} + +int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + if (sregs->u.e.features & KVM_SREGS_E_ARCH206_MMU) { + vcpu->arch.shared->mas0 = sregs->u.e.mas0; + vcpu->arch.shared->mas1 = sregs->u.e.mas1; + vcpu->arch.shared->mas2 = sregs->u.e.mas2; + vcpu->arch.shared->mas7_3 = sregs->u.e.mas7_3; + vcpu->arch.shared->mas4 = sregs->u.e.mas4; + vcpu->arch.shared->mas6 = sregs->u.e.mas6; + } + + return 0; +} + +int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, + struct kvm_config_tlb *cfg) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + struct kvm_book3e_206_tlb_params params; + char *virt; + struct page **pages; + struct tlbe_priv *privs[2] = {}; + u64 *g2h_bitmap = NULL; + size_t array_len; + u32 sets; + int num_pages, ret, i; + + if (cfg->mmu_type != KVM_MMU_FSL_BOOKE_NOHV) + return -EINVAL; + + if (copy_from_user(¶ms, (void __user *)(uintptr_t)cfg->params, + sizeof(params))) + return -EFAULT; + + if (params.tlb_sizes[1] > 64) + return -EINVAL; + if (params.tlb_ways[1] != params.tlb_sizes[1]) + return -EINVAL; + if (params.tlb_sizes[2] != 0 || params.tlb_sizes[3] != 0) + return -EINVAL; + if (params.tlb_ways[2] != 0 || params.tlb_ways[3] != 0) + return -EINVAL; + + if (!is_power_of_2(params.tlb_ways[0])) + return -EINVAL; + + sets = params.tlb_sizes[0] >> ilog2(params.tlb_ways[0]); + if (!is_power_of_2(sets)) + return -EINVAL; + + array_len = params.tlb_sizes[0] + params.tlb_sizes[1]; + array_len *= sizeof(struct kvm_book3e_206_tlb_entry); + + if (cfg->array_len < array_len) + return -EINVAL; + + num_pages = DIV_ROUND_UP(cfg->array + array_len - 1, PAGE_SIZE) - + cfg->array / PAGE_SIZE; + pages = kmalloc(sizeof(struct page *) * num_pages, GFP_KERNEL); + if (!pages) + return -ENOMEM; + + ret = get_user_pages_fast(cfg->array, num_pages, 1, pages); + if (ret < 0) + goto err_pages; + + if (ret != num_pages) { + num_pages = ret; + ret = -EFAULT; + goto err_put_page; + } + + virt = vmap(pages, num_pages, VM_MAP, PAGE_KERNEL); + if (!virt) { + ret = -ENOMEM; + goto err_put_page; + } + + privs[0] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[0], + GFP_KERNEL); + privs[1] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[1], + GFP_KERNEL); + + if (!privs[0] || !privs[1]) { + ret = -ENOMEM; + goto err_privs; + } + + g2h_bitmap = kzalloc(sizeof(u64) * params.tlb_sizes[1], + GFP_KERNEL); + if (!g2h_bitmap) { + ret = -ENOMEM; + goto err_privs; + } + + free_gtlb(vcpu_e500); + + vcpu_e500->gtlb_priv[0] = privs[0]; + vcpu_e500->gtlb_priv[1] = privs[1]; + vcpu_e500->g2h_tlb1_map = g2h_bitmap; + + vcpu_e500->gtlb_arch = (struct kvm_book3e_206_tlb_entry *) + (virt + (cfg->array & (PAGE_SIZE - 1))); + + vcpu_e500->gtlb_params[0].entries = params.tlb_sizes[0]; + vcpu_e500->gtlb_params[1].entries = params.tlb_sizes[1]; + + vcpu_e500->gtlb_offset[0] = 0; + vcpu_e500->gtlb_offset[1] = params.tlb_sizes[0]; + + vcpu->arch.mmucfg = mfspr(SPRN_MMUCFG) & ~MMUCFG_LPIDSIZE; + + vcpu->arch.tlbcfg[0] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); + if (params.tlb_sizes[0] <= 2048) + vcpu->arch.tlbcfg[0] |= params.tlb_sizes[0]; + vcpu->arch.tlbcfg[0] |= params.tlb_ways[0] << TLBnCFG_ASSOC_SHIFT; + + vcpu->arch.tlbcfg[1] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); + vcpu->arch.tlbcfg[1] |= params.tlb_sizes[1]; + vcpu->arch.tlbcfg[1] |= params.tlb_ways[1] << TLBnCFG_ASSOC_SHIFT; + + vcpu_e500->shared_tlb_pages = pages; + vcpu_e500->num_shared_tlb_pages = num_pages; + + vcpu_e500->gtlb_params[0].ways = params.tlb_ways[0]; + vcpu_e500->gtlb_params[0].sets = sets; + + vcpu_e500->gtlb_params[1].ways = params.tlb_sizes[1]; + vcpu_e500->gtlb_params[1].sets = 1; + + kvmppc_recalc_tlb1map_range(vcpu_e500); + return 0; + +err_privs: + kfree(privs[0]); + kfree(privs[1]); + +err_put_page: + for (i = 0; i < num_pages; i++) + put_page(pages[i]); + +err_pages: + kfree(pages); + return ret; +} + +int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu, + struct kvm_dirty_tlb *dirty) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + kvmppc_recalc_tlb1map_range(vcpu_e500); + clear_tlb_refs(vcpu_e500); + return 0; +} + +int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + struct kvm_vcpu *vcpu = &vcpu_e500->vcpu; + int entry_size = sizeof(struct kvm_book3e_206_tlb_entry); + int entries = KVM_E500_TLB0_SIZE + KVM_E500_TLB1_SIZE; + + if (e500_mmu_host_init(vcpu_e500)) + goto err; + + vcpu_e500->gtlb_params[0].entries = KVM_E500_TLB0_SIZE; + vcpu_e500->gtlb_params[1].entries = KVM_E500_TLB1_SIZE; + + vcpu_e500->gtlb_params[0].ways = KVM_E500_TLB0_WAY_NUM; + vcpu_e500->gtlb_params[0].sets = + KVM_E500_TLB0_SIZE / KVM_E500_TLB0_WAY_NUM; + + vcpu_e500->gtlb_params[1].ways = KVM_E500_TLB1_SIZE; + vcpu_e500->gtlb_params[1].sets = 1; + + vcpu_e500->gtlb_arch = kmalloc(entries * entry_size, GFP_KERNEL); + if (!vcpu_e500->gtlb_arch) + return -ENOMEM; + + vcpu_e500->gtlb_offset[0] = 0; + vcpu_e500->gtlb_offset[1] = KVM_E500_TLB0_SIZE; + + vcpu_e500->gtlb_priv[0] = kzalloc(sizeof(struct tlbe_ref) * + vcpu_e500->gtlb_params[0].entries, + GFP_KERNEL); + if (!vcpu_e500->gtlb_priv[0]) + goto err; + + vcpu_e500->gtlb_priv[1] = kzalloc(sizeof(struct tlbe_ref) * + vcpu_e500->gtlb_params[1].entries, + GFP_KERNEL); + if (!vcpu_e500->gtlb_priv[1]) + goto err; + + vcpu_e500->g2h_tlb1_map = kzalloc(sizeof(u64) * + vcpu_e500->gtlb_params[1].entries, + GFP_KERNEL); + if (!vcpu_e500->g2h_tlb1_map) + goto err; + + /* Init TLB configuration register */ + vcpu->arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) & + ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); + vcpu->arch.tlbcfg[0] |= vcpu_e500->gtlb_params[0].entries; + vcpu->arch.tlbcfg[0] |= + vcpu_e500->gtlb_params[0].ways << TLBnCFG_ASSOC_SHIFT; + + vcpu->arch.tlbcfg[1] = mfspr(SPRN_TLB1CFG) & + ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); + vcpu->arch.tlbcfg[1] |= vcpu_e500->gtlb_params[1].entries; + vcpu->arch.tlbcfg[1] |= + vcpu_e500->gtlb_params[1].ways << TLBnCFG_ASSOC_SHIFT; + + kvmppc_recalc_tlb1map_range(vcpu_e500); + return 0; + +err: + free_gtlb(vcpu_e500); + return -1; +} + +void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + free_gtlb(vcpu_e500); + e500_mmu_host_uninit(vcpu_e500); +} diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c new file mode 100644 index 000000000000..4c32d6510133 --- /dev/null +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -0,0 +1,672 @@ +/* + * Copyright (C) 2008-2013 Freescale Semiconductor, Inc. All rights reserved. + * + * Author: Yu Liu, yu.liu@freescale.com + * Scott Wood, scottwood@freescale.com + * Ashish Kalra, ashish.kalra@freescale.com + * Varun Sethi, varun.sethi@freescale.com + * Alexander Graf, agraf@suse.de + * + * Description: + * This file is based on arch/powerpc/kvm/44x_tlb.c, + * by Hollis Blanchard . + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "e500.h" +#include "trace.h" +#include "timing.h" +#include "e500_mmu_host.h" + +#define to_htlb1_esel(esel) (host_tlb_params[1].entries - (esel) - 1) + +static struct kvmppc_e500_tlb_params host_tlb_params[E500_TLB_NUM]; + +static inline unsigned int tlb1_max_shadow_size(void) +{ + /* reserve one entry for magic page */ + return host_tlb_params[1].entries - tlbcam_index - 1; +} + +static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode) +{ + /* Mask off reserved bits. */ + mas3 &= MAS3_ATTRIB_MASK; + +#ifndef CONFIG_KVM_BOOKE_HV + if (!usermode) { + /* Guest is in supervisor mode, + * so we need to translate guest + * supervisor permissions into user permissions. */ + mas3 &= ~E500_TLB_USER_PERM_MASK; + mas3 |= (mas3 & E500_TLB_SUPER_PERM_MASK) << 1; + } + mas3 |= E500_TLB_SUPER_PERM_MASK; +#endif + return mas3; +} + +static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode) +{ +#ifdef CONFIG_SMP + return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M; +#else + return mas2 & MAS2_ATTRIB_MASK; +#endif +} + +/* + * writing shadow tlb entry to host TLB + */ +static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe, + uint32_t mas0) +{ + unsigned long flags; + + local_irq_save(flags); + mtspr(SPRN_MAS0, mas0); + mtspr(SPRN_MAS1, stlbe->mas1); + mtspr(SPRN_MAS2, (unsigned long)stlbe->mas2); + mtspr(SPRN_MAS3, (u32)stlbe->mas7_3); + mtspr(SPRN_MAS7, (u32)(stlbe->mas7_3 >> 32)); +#ifdef CONFIG_KVM_BOOKE_HV + mtspr(SPRN_MAS8, stlbe->mas8); +#endif + asm volatile("isync; tlbwe" : : : "memory"); + +#ifdef CONFIG_KVM_BOOKE_HV + /* Must clear mas8 for other host tlbwe's */ + mtspr(SPRN_MAS8, 0); + isync(); +#endif + local_irq_restore(flags); + + trace_kvm_booke206_stlb_write(mas0, stlbe->mas8, stlbe->mas1, + stlbe->mas2, stlbe->mas7_3); +} + +/* + * Acquire a mas0 with victim hint, as if we just took a TLB miss. + * + * We don't care about the address we're searching for, other than that it's + * in the right set and is not present in the TLB. Using a zero PID and a + * userspace address means we don't have to set and then restore MAS5, or + * calculate a proper MAS6 value. + */ +static u32 get_host_mas0(unsigned long eaddr) +{ + unsigned long flags; + u32 mas0; + + local_irq_save(flags); + mtspr(SPRN_MAS6, 0); + asm volatile("tlbsx 0, %0" : : "b" (eaddr & ~CONFIG_PAGE_OFFSET)); + mas0 = mfspr(SPRN_MAS0); + local_irq_restore(flags); + + return mas0; +} + +/* sesel is for tlb1 only */ +static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500, + int tlbsel, int sesel, struct kvm_book3e_206_tlb_entry *stlbe) +{ + u32 mas0; + + if (tlbsel == 0) { + mas0 = get_host_mas0(stlbe->mas2); + __write_host_tlbe(stlbe, mas0); + } else { + __write_host_tlbe(stlbe, + MAS0_TLBSEL(1) | + MAS0_ESEL(to_htlb1_esel(sesel))); + } +} + +/* sesel is for tlb1 only */ +static void write_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500, + struct kvm_book3e_206_tlb_entry *gtlbe, + struct kvm_book3e_206_tlb_entry *stlbe, + int stlbsel, int sesel) +{ + int stid; + + preempt_disable(); + stid = kvmppc_e500_get_tlb_stid(&vcpu_e500->vcpu, gtlbe); + + stlbe->mas1 |= MAS1_TID(stid); + write_host_tlbe(vcpu_e500, stlbsel, sesel, stlbe); + preempt_enable(); +} + +#ifdef CONFIG_KVM_E500V2 +/* XXX should be a hook in the gva2hpa translation */ +void kvmppc_map_magic(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + struct kvm_book3e_206_tlb_entry magic; + ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK; + unsigned int stid; + pfn_t pfn; + + pfn = (pfn_t)virt_to_phys((void *)shared_page) >> PAGE_SHIFT; + get_page(pfn_to_page(pfn)); + + preempt_disable(); + stid = kvmppc_e500_get_sid(vcpu_e500, 0, 0, 0, 0); + + magic.mas1 = MAS1_VALID | MAS1_TS | MAS1_TID(stid) | + MAS1_TSIZE(BOOK3E_PAGESZ_4K); + magic.mas2 = vcpu->arch.magic_page_ea | MAS2_M; + magic.mas7_3 = ((u64)pfn << PAGE_SHIFT) | + MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR; + magic.mas8 = 0; + + __write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index)); + preempt_enable(); +} +#endif + +void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, + int esel) +{ + struct kvm_book3e_206_tlb_entry *gtlbe = + get_entry(vcpu_e500, tlbsel, esel); + struct tlbe_ref *ref = &vcpu_e500->gtlb_priv[tlbsel][esel].ref; + + /* Don't bother with unmapped entries */ + if (!(ref->flags & E500_TLB_VALID)) + return; + + if (tlbsel == 1 && ref->flags & E500_TLB_BITMAP) { + u64 tmp = vcpu_e500->g2h_tlb1_map[esel]; + int hw_tlb_indx; + unsigned long flags; + + local_irq_save(flags); + while (tmp) { + hw_tlb_indx = __ilog2_u64(tmp & -tmp); + mtspr(SPRN_MAS0, + MAS0_TLBSEL(1) | + MAS0_ESEL(to_htlb1_esel(hw_tlb_indx))); + mtspr(SPRN_MAS1, 0); + asm volatile("tlbwe"); + vcpu_e500->h2g_tlb1_rmap[hw_tlb_indx] = 0; + tmp &= tmp - 1; + } + mb(); + vcpu_e500->g2h_tlb1_map[esel] = 0; + ref->flags &= ~(E500_TLB_BITMAP | E500_TLB_VALID); + local_irq_restore(flags); + + return; + } + + /* Guest tlbe is backed by at most one host tlbe per shadow pid. */ + kvmppc_e500_tlbil_one(vcpu_e500, gtlbe); + + /* Mark the TLB as not backed by the host anymore */ + ref->flags &= ~E500_TLB_VALID; +} + +static inline int tlbe_is_writable(struct kvm_book3e_206_tlb_entry *tlbe) +{ + return tlbe->mas7_3 & (MAS3_SW|MAS3_UW); +} + +static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref, + struct kvm_book3e_206_tlb_entry *gtlbe, + pfn_t pfn) +{ + ref->pfn = pfn; + ref->flags = E500_TLB_VALID; + + if (tlbe_is_writable(gtlbe)) + kvm_set_pfn_dirty(pfn); +} + +static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref) +{ + if (ref->flags & E500_TLB_VALID) { + trace_kvm_booke206_ref_release(ref->pfn, ref->flags); + ref->flags = 0; + } +} + +void clear_tlb1_bitmap(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + if (vcpu_e500->g2h_tlb1_map) + memset(vcpu_e500->g2h_tlb1_map, 0, + sizeof(u64) * vcpu_e500->gtlb_params[1].entries); + if (vcpu_e500->h2g_tlb1_rmap) + memset(vcpu_e500->h2g_tlb1_rmap, 0, + sizeof(unsigned int) * host_tlb_params[1].entries); +} + +static void clear_tlb_privs(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + int tlbsel = 0; + int i; + + for (i = 0; i < vcpu_e500->gtlb_params[tlbsel].entries; i++) { + struct tlbe_ref *ref = + &vcpu_e500->gtlb_priv[tlbsel][i].ref; + kvmppc_e500_ref_release(ref); + } +} + +void clear_tlb_refs(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + int stlbsel = 1; + int i; + + kvmppc_e500_tlbil_all(vcpu_e500); + + for (i = 0; i < host_tlb_params[stlbsel].entries; i++) { + struct tlbe_ref *ref = + &vcpu_e500->tlb_refs[stlbsel][i]; + kvmppc_e500_ref_release(ref); + } + + clear_tlb_privs(vcpu_e500); +} + +void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + clear_tlb_refs(vcpu_e500); + clear_tlb1_bitmap(vcpu_e500); +} + +/* TID must be supplied by the caller */ +static void kvmppc_e500_setup_stlbe( + struct kvm_vcpu *vcpu, + struct kvm_book3e_206_tlb_entry *gtlbe, + int tsize, struct tlbe_ref *ref, u64 gvaddr, + struct kvm_book3e_206_tlb_entry *stlbe) +{ + pfn_t pfn = ref->pfn; + u32 pr = vcpu->arch.shared->msr & MSR_PR; + + BUG_ON(!(ref->flags & E500_TLB_VALID)); + + /* Force IPROT=0 for all guest mappings. */ + stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID; + stlbe->mas2 = (gvaddr & MAS2_EPN) | + e500_shadow_mas2_attrib(gtlbe->mas2, pr); + stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) | + e500_shadow_mas3_attrib(gtlbe->mas7_3, pr); + +#ifdef CONFIG_KVM_BOOKE_HV + stlbe->mas8 = MAS8_TGS | vcpu->kvm->arch.lpid; +#endif +} + +static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, + u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe, + int tlbsel, struct kvm_book3e_206_tlb_entry *stlbe, + struct tlbe_ref *ref) +{ + struct kvm_memory_slot *slot; + unsigned long pfn = 0; /* silence GCC warning */ + unsigned long hva; + int pfnmap = 0; + int tsize = BOOK3E_PAGESZ_4K; + + /* + * Translate guest physical to true physical, acquiring + * a page reference if it is normal, non-reserved memory. + * + * gfn_to_memslot() must succeed because otherwise we wouldn't + * have gotten this far. Eventually we should just pass the slot + * pointer through from the first lookup. + */ + slot = gfn_to_memslot(vcpu_e500->vcpu.kvm, gfn); + hva = gfn_to_hva_memslot(slot, gfn); + + if (tlbsel == 1) { + struct vm_area_struct *vma; + down_read(¤t->mm->mmap_sem); + + vma = find_vma(current->mm, hva); + if (vma && hva >= vma->vm_start && + (vma->vm_flags & VM_PFNMAP)) { + /* + * This VMA is a physically contiguous region (e.g. + * /dev/mem) that bypasses normal Linux page + * management. Find the overlap between the + * vma and the memslot. + */ + + unsigned long start, end; + unsigned long slot_start, slot_end; + + pfnmap = 1; + + start = vma->vm_pgoff; + end = start + + ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT); + + pfn = start + ((hva - vma->vm_start) >> PAGE_SHIFT); + + slot_start = pfn - (gfn - slot->base_gfn); + slot_end = slot_start + slot->npages; + + if (start < slot_start) + start = slot_start; + if (end > slot_end) + end = slot_end; + + tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >> + MAS1_TSIZE_SHIFT; + + /* + * e500 doesn't implement the lowest tsize bit, + * or 1K pages. + */ + tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1); + + /* + * Now find the largest tsize (up to what the guest + * requested) that will cover gfn, stay within the + * range, and for which gfn and pfn are mutually + * aligned. + */ + + for (; tsize > BOOK3E_PAGESZ_4K; tsize -= 2) { + unsigned long gfn_start, gfn_end, tsize_pages; + tsize_pages = 1 << (tsize - 2); + + gfn_start = gfn & ~(tsize_pages - 1); + gfn_end = gfn_start + tsize_pages; + + if (gfn_start + pfn - gfn < start) + continue; + if (gfn_end + pfn - gfn > end) + continue; + if ((gfn & (tsize_pages - 1)) != + (pfn & (tsize_pages - 1))) + continue; + + gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1); + pfn &= ~(tsize_pages - 1); + break; + } + } else if (vma && hva >= vma->vm_start && + (vma->vm_flags & VM_HUGETLB)) { + unsigned long psize = vma_kernel_pagesize(vma); + + tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >> + MAS1_TSIZE_SHIFT; + + /* + * Take the largest page size that satisfies both host + * and guest mapping + */ + tsize = min(__ilog2(psize) - 10, tsize); + + /* + * e500 doesn't implement the lowest tsize bit, + * or 1K pages. + */ + tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1); + } + + up_read(¤t->mm->mmap_sem); + } + + if (likely(!pfnmap)) { + unsigned long tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT); + pfn = gfn_to_pfn_memslot(slot, gfn); + if (is_error_noslot_pfn(pfn)) { + printk(KERN_ERR "Couldn't get real page for gfn %lx!\n", + (long)gfn); + return -EINVAL; + } + + /* Align guest and physical address to page map boundaries */ + pfn &= ~(tsize_pages - 1); + gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1); + } + + /* Drop old ref and setup new one. */ + kvmppc_e500_ref_release(ref); + kvmppc_e500_ref_setup(ref, gtlbe, pfn); + + kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize, + ref, gvaddr, stlbe); + + /* Clear i-cache for new pages */ + kvmppc_mmu_flush_icache(pfn); + + /* Drop refcount on page, so that mmu notifiers can clear it */ + kvm_release_pfn_clean(pfn); + + return 0; +} + +/* XXX only map the one-one case, for now use TLB0 */ +static int kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500, int esel, + struct kvm_book3e_206_tlb_entry *stlbe) +{ + struct kvm_book3e_206_tlb_entry *gtlbe; + struct tlbe_ref *ref; + int stlbsel = 0; + int sesel = 0; + int r; + + gtlbe = get_entry(vcpu_e500, 0, esel); + ref = &vcpu_e500->gtlb_priv[0][esel].ref; + + r = kvmppc_e500_shadow_map(vcpu_e500, get_tlb_eaddr(gtlbe), + get_tlb_raddr(gtlbe) >> PAGE_SHIFT, + gtlbe, 0, stlbe, ref); + if (r) + return r; + + write_stlbe(vcpu_e500, gtlbe, stlbe, stlbsel, sesel); + + return 0; +} + +/* Caller must ensure that the specified guest TLB entry is safe to insert into + * the shadow TLB. */ +/* XXX for both one-one and one-to-many , for now use TLB1 */ +static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500, + u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe, + struct kvm_book3e_206_tlb_entry *stlbe, int esel) +{ + struct tlbe_ref *ref; + unsigned int sesel; + int r; + int stlbsel = 1; + + sesel = vcpu_e500->host_tlb1_nv++; + + if (unlikely(vcpu_e500->host_tlb1_nv >= tlb1_max_shadow_size())) + vcpu_e500->host_tlb1_nv = 0; + + ref = &vcpu_e500->tlb_refs[1][sesel]; + r = kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, stlbe, + ref); + if (r) + return r; + + vcpu_e500->g2h_tlb1_map[esel] |= (u64)1 << sesel; + vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_BITMAP; + if (vcpu_e500->h2g_tlb1_rmap[sesel]) { + unsigned int idx = vcpu_e500->h2g_tlb1_rmap[sesel]; + vcpu_e500->g2h_tlb1_map[idx] &= ~(1ULL << sesel); + } + vcpu_e500->h2g_tlb1_rmap[sesel] = esel; + + write_stlbe(vcpu_e500, gtlbe, stlbe, stlbsel, sesel); + + return 0; +} + +void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, + unsigned int index) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + struct tlbe_priv *priv; + struct kvm_book3e_206_tlb_entry *gtlbe, stlbe; + int tlbsel = tlbsel_of(index); + int esel = esel_of(index); + + gtlbe = get_entry(vcpu_e500, tlbsel, esel); + + switch (tlbsel) { + case 0: + priv = &vcpu_e500->gtlb_priv[tlbsel][esel]; + + /* Triggers after clear_tlb_refs or on initial mapping */ + if (!(priv->ref.flags & E500_TLB_VALID)) { + kvmppc_e500_tlb0_map(vcpu_e500, esel, &stlbe); + } else { + kvmppc_e500_setup_stlbe(vcpu, gtlbe, BOOK3E_PAGESZ_4K, + &priv->ref, eaddr, &stlbe); + write_stlbe(vcpu_e500, gtlbe, &stlbe, 0, 0); + } + break; + + case 1: { + gfn_t gfn = gpaddr >> PAGE_SHIFT; + kvmppc_e500_tlb1_map(vcpu_e500, eaddr, gfn, gtlbe, &stlbe, + esel); + break; + } + + default: + BUG(); + break; + } +} + +/************* MMU Notifiers *************/ + +int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) +{ + trace_kvm_unmap_hva(hva); + + /* + * Flush all shadow tlb entries everywhere. This is slow, but + * we are 100% sure that we catch the to be unmapped page + */ + kvm_flush_remote_tlbs(kvm); + + return 0; +} + +int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) +{ + /* kvm_unmap_hva flushes everything anyways */ + kvm_unmap_hva(kvm, start); + + return 0; +} + +int kvm_age_hva(struct kvm *kvm, unsigned long hva) +{ + /* XXX could be more clever ;) */ + return 0; +} + +int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) +{ + /* XXX could be more clever ;) */ + return 0; +} + +void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) +{ + /* The page will get remapped properly on its next fault */ + kvm_unmap_hva(kvm, hva); +} + +/*****************************************/ + +int e500_mmu_host_init(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + host_tlb_params[0].entries = mfspr(SPRN_TLB0CFG) & TLBnCFG_N_ENTRY; + host_tlb_params[1].entries = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY; + + /* + * This should never happen on real e500 hardware, but is + * architecturally possible -- e.g. in some weird nested + * virtualization case. + */ + if (host_tlb_params[0].entries == 0 || + host_tlb_params[1].entries == 0) { + pr_err("%s: need to know host tlb size\n", __func__); + return -ENODEV; + } + + host_tlb_params[0].ways = (mfspr(SPRN_TLB0CFG) & TLBnCFG_ASSOC) >> + TLBnCFG_ASSOC_SHIFT; + host_tlb_params[1].ways = host_tlb_params[1].entries; + + if (!is_power_of_2(host_tlb_params[0].entries) || + !is_power_of_2(host_tlb_params[0].ways) || + host_tlb_params[0].entries < host_tlb_params[0].ways || + host_tlb_params[0].ways == 0) { + pr_err("%s: bad tlb0 host config: %u entries %u ways\n", + __func__, host_tlb_params[0].entries, + host_tlb_params[0].ways); + return -ENODEV; + } + + host_tlb_params[0].sets = + host_tlb_params[0].entries / host_tlb_params[0].ways; + host_tlb_params[1].sets = 1; + + vcpu_e500->tlb_refs[0] = + kzalloc(sizeof(struct tlbe_ref) * host_tlb_params[0].entries, + GFP_KERNEL); + if (!vcpu_e500->tlb_refs[0]) + goto err; + + vcpu_e500->tlb_refs[1] = + kzalloc(sizeof(struct tlbe_ref) * host_tlb_params[1].entries, + GFP_KERNEL); + if (!vcpu_e500->tlb_refs[1]) + goto err; + + vcpu_e500->h2g_tlb1_rmap = kzalloc(sizeof(unsigned int) * + host_tlb_params[1].entries, + GFP_KERNEL); + if (!vcpu_e500->h2g_tlb1_rmap) + goto err; + + return 0; + +err: + kfree(vcpu_e500->tlb_refs[0]); + kfree(vcpu_e500->tlb_refs[1]); + return -EINVAL; +} + +void e500_mmu_host_uninit(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + kfree(vcpu_e500->h2g_tlb1_rmap); + kfree(vcpu_e500->tlb_refs[0]); + kfree(vcpu_e500->tlb_refs[1]); +} diff --git a/arch/powerpc/kvm/e500_mmu_host.h b/arch/powerpc/kvm/e500_mmu_host.h new file mode 100644 index 000000000000..9e4d4a20e694 --- /dev/null +++ b/arch/powerpc/kvm/e500_mmu_host.h @@ -0,0 +1,20 @@ +/* + * Copyright (C) 2008-2013 Freescale Semiconductor, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#ifndef KVM_E500_MMU_HOST_H +#define KVM_E500_MMU_HOST_H + +void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, + int esel); + +void clear_tlb1_bitmap(struct kvmppc_vcpu_e500 *vcpu_e500); +void clear_tlb_refs(struct kvmppc_vcpu_e500 *vcpu_e500); +int e500_mmu_host_init(struct kvmppc_vcpu_e500 *vcpu_e500); +void e500_mmu_host_uninit(struct kvmppc_vcpu_e500 *vcpu_e500); + +#endif /* KVM_E500_MMU_HOST_H */ diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c deleted file mode 100644 index 48d1a4f1f5ff..000000000000 --- a/arch/powerpc/kvm/e500_tlb.c +++ /dev/null @@ -1,1426 +0,0 @@ -/* - * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved. - * - * Author: Yu Liu, yu.liu@freescale.com - * Scott Wood, scottwood@freescale.com - * Ashish Kalra, ashish.kalra@freescale.com - * Varun Sethi, varun.sethi@freescale.com - * - * Description: - * This file is based on arch/powerpc/kvm/44x_tlb.c, - * by Hollis Blanchard . - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "e500.h" -#include "trace.h" -#include "timing.h" - -#define to_htlb1_esel(esel) (host_tlb_params[1].entries - (esel) - 1) - -static struct kvmppc_e500_tlb_params host_tlb_params[E500_TLB_NUM]; - -static inline unsigned int gtlb0_get_next_victim( - struct kvmppc_vcpu_e500 *vcpu_e500) -{ - unsigned int victim; - - victim = vcpu_e500->gtlb_nv[0]++; - if (unlikely(vcpu_e500->gtlb_nv[0] >= vcpu_e500->gtlb_params[0].ways)) - vcpu_e500->gtlb_nv[0] = 0; - - return victim; -} - -static inline unsigned int tlb1_max_shadow_size(void) -{ - /* reserve one entry for magic page */ - return host_tlb_params[1].entries - tlbcam_index - 1; -} - -static inline int tlbe_is_writable(struct kvm_book3e_206_tlb_entry *tlbe) -{ - return tlbe->mas7_3 & (MAS3_SW|MAS3_UW); -} - -static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode) -{ - /* Mask off reserved bits. */ - mas3 &= MAS3_ATTRIB_MASK; - -#ifndef CONFIG_KVM_BOOKE_HV - if (!usermode) { - /* Guest is in supervisor mode, - * so we need to translate guest - * supervisor permissions into user permissions. */ - mas3 &= ~E500_TLB_USER_PERM_MASK; - mas3 |= (mas3 & E500_TLB_SUPER_PERM_MASK) << 1; - } - mas3 |= E500_TLB_SUPER_PERM_MASK; -#endif - return mas3; -} - -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode) -{ -#ifdef CONFIG_SMP - return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M; -#else - return mas2 & MAS2_ATTRIB_MASK; -#endif -} - -/* - * writing shadow tlb entry to host TLB - */ -static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe, - uint32_t mas0) -{ - unsigned long flags; - - local_irq_save(flags); - mtspr(SPRN_MAS0, mas0); - mtspr(SPRN_MAS1, stlbe->mas1); - mtspr(SPRN_MAS2, (unsigned long)stlbe->mas2); - mtspr(SPRN_MAS3, (u32)stlbe->mas7_3); - mtspr(SPRN_MAS7, (u32)(stlbe->mas7_3 >> 32)); -#ifdef CONFIG_KVM_BOOKE_HV - mtspr(SPRN_MAS8, stlbe->mas8); -#endif - asm volatile("isync; tlbwe" : : : "memory"); - -#ifdef CONFIG_KVM_BOOKE_HV - /* Must clear mas8 for other host tlbwe's */ - mtspr(SPRN_MAS8, 0); - isync(); -#endif - local_irq_restore(flags); - - trace_kvm_booke206_stlb_write(mas0, stlbe->mas8, stlbe->mas1, - stlbe->mas2, stlbe->mas7_3); -} - -/* - * Acquire a mas0 with victim hint, as if we just took a TLB miss. - * - * We don't care about the address we're searching for, other than that it's - * in the right set and is not present in the TLB. Using a zero PID and a - * userspace address means we don't have to set and then restore MAS5, or - * calculate a proper MAS6 value. - */ -static u32 get_host_mas0(unsigned long eaddr) -{ - unsigned long flags; - u32 mas0; - - local_irq_save(flags); - mtspr(SPRN_MAS6, 0); - asm volatile("tlbsx 0, %0" : : "b" (eaddr & ~CONFIG_PAGE_OFFSET)); - mas0 = mfspr(SPRN_MAS0); - local_irq_restore(flags); - - return mas0; -} - -/* sesel is for tlb1 only */ -static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500, - int tlbsel, int sesel, struct kvm_book3e_206_tlb_entry *stlbe) -{ - u32 mas0; - - if (tlbsel == 0) { - mas0 = get_host_mas0(stlbe->mas2); - __write_host_tlbe(stlbe, mas0); - } else { - __write_host_tlbe(stlbe, - MAS0_TLBSEL(1) | - MAS0_ESEL(to_htlb1_esel(sesel))); - } -} - -/* sesel is for tlb1 only */ -static void write_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500, - struct kvm_book3e_206_tlb_entry *gtlbe, - struct kvm_book3e_206_tlb_entry *stlbe, - int stlbsel, int sesel) -{ - int stid; - - preempt_disable(); - stid = kvmppc_e500_get_tlb_stid(&vcpu_e500->vcpu, gtlbe); - - stlbe->mas1 |= MAS1_TID(stid); - write_host_tlbe(vcpu_e500, stlbsel, sesel, stlbe); - preempt_enable(); -} - -#ifdef CONFIG_KVM_E500V2 -void kvmppc_map_magic(struct kvm_vcpu *vcpu) -{ - struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - struct kvm_book3e_206_tlb_entry magic; - ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK; - unsigned int stid; - pfn_t pfn; - - pfn = (pfn_t)virt_to_phys((void *)shared_page) >> PAGE_SHIFT; - get_page(pfn_to_page(pfn)); - - preempt_disable(); - stid = kvmppc_e500_get_sid(vcpu_e500, 0, 0, 0, 0); - - magic.mas1 = MAS1_VALID | MAS1_TS | MAS1_TID(stid) | - MAS1_TSIZE(BOOK3E_PAGESZ_4K); - magic.mas2 = vcpu->arch.magic_page_ea | MAS2_M; - magic.mas7_3 = ((u64)pfn << PAGE_SHIFT) | - MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR; - magic.mas8 = 0; - - __write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index)); - preempt_enable(); -} -#endif - -static void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500, - int tlbsel, int esel) -{ - struct kvm_book3e_206_tlb_entry *gtlbe = - get_entry(vcpu_e500, tlbsel, esel); - struct tlbe_ref *ref = &vcpu_e500->gtlb_priv[tlbsel][esel].ref; - - /* Don't bother with unmapped entries */ - if (!(ref->flags & E500_TLB_VALID)) - return; - - if (tlbsel == 1 && ref->flags & E500_TLB_BITMAP) { - u64 tmp = vcpu_e500->g2h_tlb1_map[esel]; - int hw_tlb_indx; - unsigned long flags; - - local_irq_save(flags); - while (tmp) { - hw_tlb_indx = __ilog2_u64(tmp & -tmp); - mtspr(SPRN_MAS0, - MAS0_TLBSEL(1) | - MAS0_ESEL(to_htlb1_esel(hw_tlb_indx))); - mtspr(SPRN_MAS1, 0); - asm volatile("tlbwe"); - vcpu_e500->h2g_tlb1_rmap[hw_tlb_indx] = 0; - tmp &= tmp - 1; - } - mb(); - vcpu_e500->g2h_tlb1_map[esel] = 0; - ref->flags &= ~(E500_TLB_BITMAP | E500_TLB_VALID); - local_irq_restore(flags); - - return; - } - - /* Guest tlbe is backed by at most one host tlbe per shadow pid. */ - kvmppc_e500_tlbil_one(vcpu_e500, gtlbe); - - /* Mark the TLB as not backed by the host anymore */ - ref->flags &= ~E500_TLB_VALID; -} - -static int tlb0_set_base(gva_t addr, int sets, int ways) -{ - int set_base; - - set_base = (addr >> PAGE_SHIFT) & (sets - 1); - set_base *= ways; - - return set_base; -} - -static int gtlb0_set_base(struct kvmppc_vcpu_e500 *vcpu_e500, gva_t addr) -{ - return tlb0_set_base(addr, vcpu_e500->gtlb_params[0].sets, - vcpu_e500->gtlb_params[0].ways); -} - -static unsigned int get_tlb_esel(struct kvm_vcpu *vcpu, int tlbsel) -{ - struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - int esel = get_tlb_esel_bit(vcpu); - - if (tlbsel == 0) { - esel &= vcpu_e500->gtlb_params[0].ways - 1; - esel += gtlb0_set_base(vcpu_e500, vcpu->arch.shared->mas2); - } else { - esel &= vcpu_e500->gtlb_params[tlbsel].entries - 1; - } - - return esel; -} - -/* Search the guest TLB for a matching entry. */ -static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500, - gva_t eaddr, int tlbsel, unsigned int pid, int as) -{ - int size = vcpu_e500->gtlb_params[tlbsel].entries; - unsigned int set_base, offset; - int i; - - if (tlbsel == 0) { - set_base = gtlb0_set_base(vcpu_e500, eaddr); - size = vcpu_e500->gtlb_params[0].ways; - } else { - if (eaddr < vcpu_e500->tlb1_min_eaddr || - eaddr > vcpu_e500->tlb1_max_eaddr) - return -1; - set_base = 0; - } - - offset = vcpu_e500->gtlb_offset[tlbsel]; - - for (i = 0; i < size; i++) { - struct kvm_book3e_206_tlb_entry *tlbe = - &vcpu_e500->gtlb_arch[offset + set_base + i]; - unsigned int tid; - - if (eaddr < get_tlb_eaddr(tlbe)) - continue; - - if (eaddr > get_tlb_end(tlbe)) - continue; - - tid = get_tlb_tid(tlbe); - if (tid && (tid != pid)) - continue; - - if (!get_tlb_v(tlbe)) - continue; - - if (get_tlb_ts(tlbe) != as && as != -1) - continue; - - return set_base + i; - } - - return -1; -} - -static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref, - struct kvm_book3e_206_tlb_entry *gtlbe, - pfn_t pfn) -{ - ref->pfn = pfn; - ref->flags = E500_TLB_VALID; - - if (tlbe_is_writable(gtlbe)) - kvm_set_pfn_dirty(pfn); -} - -static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref) -{ - if (ref->flags & E500_TLB_VALID) { - trace_kvm_booke206_ref_release(ref->pfn, ref->flags); - ref->flags = 0; - } -} - -static void clear_tlb1_bitmap(struct kvmppc_vcpu_e500 *vcpu_e500) -{ - if (vcpu_e500->g2h_tlb1_map) - memset(vcpu_e500->g2h_tlb1_map, 0, - sizeof(u64) * vcpu_e500->gtlb_params[1].entries); - if (vcpu_e500->h2g_tlb1_rmap) - memset(vcpu_e500->h2g_tlb1_rmap, 0, - sizeof(unsigned int) * host_tlb_params[1].entries); -} - -static void clear_tlb_privs(struct kvmppc_vcpu_e500 *vcpu_e500) -{ - int tlbsel = 0; - int i; - - for (i = 0; i < vcpu_e500->gtlb_params[tlbsel].entries; i++) { - struct tlbe_ref *ref = - &vcpu_e500->gtlb_priv[tlbsel][i].ref; - kvmppc_e500_ref_release(ref); - } -} - -static void clear_tlb_refs(struct kvmppc_vcpu_e500 *vcpu_e500) -{ - int stlbsel = 1; - int i; - - kvmppc_e500_tlbil_all(vcpu_e500); - - for (i = 0; i < host_tlb_params[stlbsel].entries; i++) { - struct tlbe_ref *ref = - &vcpu_e500->tlb_refs[stlbsel][i]; - kvmppc_e500_ref_release(ref); - } - - clear_tlb_privs(vcpu_e500); -} - -void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu) -{ - struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - clear_tlb_refs(vcpu_e500); - clear_tlb1_bitmap(vcpu_e500); -} - -static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu, - unsigned int eaddr, int as) -{ - struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - unsigned int victim, tsized; - int tlbsel; - - /* since we only have two TLBs, only lower bit is used. */ - tlbsel = (vcpu->arch.shared->mas4 >> 28) & 0x1; - victim = (tlbsel == 0) ? gtlb0_get_next_victim(vcpu_e500) : 0; - tsized = (vcpu->arch.shared->mas4 >> 7) & 0x1f; - - vcpu->arch.shared->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim) - | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); - vcpu->arch.shared->mas1 = MAS1_VALID | (as ? MAS1_TS : 0) - | MAS1_TID(get_tlbmiss_tid(vcpu)) - | MAS1_TSIZE(tsized); - vcpu->arch.shared->mas2 = (eaddr & MAS2_EPN) - | (vcpu->arch.shared->mas4 & MAS2_ATTRIB_MASK); - vcpu->arch.shared->mas7_3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3; - vcpu->arch.shared->mas6 = (vcpu->arch.shared->mas6 & MAS6_SPID1) - | (get_cur_pid(vcpu) << 16) - | (as ? MAS6_SAS : 0); -} - -/* TID must be supplied by the caller */ -static inline void kvmppc_e500_setup_stlbe( - struct kvm_vcpu *vcpu, - struct kvm_book3e_206_tlb_entry *gtlbe, - int tsize, struct tlbe_ref *ref, u64 gvaddr, - struct kvm_book3e_206_tlb_entry *stlbe) -{ - pfn_t pfn = ref->pfn; - u32 pr = vcpu->arch.shared->msr & MSR_PR; - - BUG_ON(!(ref->flags & E500_TLB_VALID)); - - /* Force IPROT=0 for all guest mappings. */ - stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID; - stlbe->mas2 = (gvaddr & MAS2_EPN) | - e500_shadow_mas2_attrib(gtlbe->mas2, pr); - stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) | - e500_shadow_mas3_attrib(gtlbe->mas7_3, pr); - -#ifdef CONFIG_KVM_BOOKE_HV - stlbe->mas8 = MAS8_TGS | vcpu->kvm->arch.lpid; -#endif -} - -static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, - u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe, - int tlbsel, struct kvm_book3e_206_tlb_entry *stlbe, - struct tlbe_ref *ref) -{ - struct kvm_memory_slot *slot; - unsigned long pfn = 0; /* silence GCC warning */ - unsigned long hva; - int pfnmap = 0; - int tsize = BOOK3E_PAGESZ_4K; - - /* - * Translate guest physical to true physical, acquiring - * a page reference if it is normal, non-reserved memory. - * - * gfn_to_memslot() must succeed because otherwise we wouldn't - * have gotten this far. Eventually we should just pass the slot - * pointer through from the first lookup. - */ - slot = gfn_to_memslot(vcpu_e500->vcpu.kvm, gfn); - hva = gfn_to_hva_memslot(slot, gfn); - - if (tlbsel == 1) { - struct vm_area_struct *vma; - down_read(¤t->mm->mmap_sem); - - vma = find_vma(current->mm, hva); - if (vma && hva >= vma->vm_start && - (vma->vm_flags & VM_PFNMAP)) { - /* - * This VMA is a physically contiguous region (e.g. - * /dev/mem) that bypasses normal Linux page - * management. Find the overlap between the - * vma and the memslot. - */ - - unsigned long start, end; - unsigned long slot_start, slot_end; - - pfnmap = 1; - - start = vma->vm_pgoff; - end = start + - ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT); - - pfn = start + ((hva - vma->vm_start) >> PAGE_SHIFT); - - slot_start = pfn - (gfn - slot->base_gfn); - slot_end = slot_start + slot->npages; - - if (start < slot_start) - start = slot_start; - if (end > slot_end) - end = slot_end; - - tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >> - MAS1_TSIZE_SHIFT; - - /* - * e500 doesn't implement the lowest tsize bit, - * or 1K pages. - */ - tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1); - - /* - * Now find the largest tsize (up to what the guest - * requested) that will cover gfn, stay within the - * range, and for which gfn and pfn are mutually - * aligned. - */ - - for (; tsize > BOOK3E_PAGESZ_4K; tsize -= 2) { - unsigned long gfn_start, gfn_end, tsize_pages; - tsize_pages = 1 << (tsize - 2); - - gfn_start = gfn & ~(tsize_pages - 1); - gfn_end = gfn_start + tsize_pages; - - if (gfn_start + pfn - gfn < start) - continue; - if (gfn_end + pfn - gfn > end) - continue; - if ((gfn & (tsize_pages - 1)) != - (pfn & (tsize_pages - 1))) - continue; - - gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1); - pfn &= ~(tsize_pages - 1); - break; - } - } else if (vma && hva >= vma->vm_start && - (vma->vm_flags & VM_HUGETLB)) { - unsigned long psize = vma_kernel_pagesize(vma); - - tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >> - MAS1_TSIZE_SHIFT; - - /* - * Take the largest page size that satisfies both host - * and guest mapping - */ - tsize = min(__ilog2(psize) - 10, tsize); - - /* - * e500 doesn't implement the lowest tsize bit, - * or 1K pages. - */ - tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1); - } - - up_read(¤t->mm->mmap_sem); - } - - if (likely(!pfnmap)) { - unsigned long tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT); - pfn = gfn_to_pfn_memslot(slot, gfn); - if (is_error_noslot_pfn(pfn)) { - printk(KERN_ERR "Couldn't get real page for gfn %lx!\n", - (long)gfn); - return -EINVAL; - } - - /* Align guest and physical address to page map boundaries */ - pfn &= ~(tsize_pages - 1); - gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1); - } - - /* Drop old ref and setup new one. */ - kvmppc_e500_ref_release(ref); - kvmppc_e500_ref_setup(ref, gtlbe, pfn); - - kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize, - ref, gvaddr, stlbe); - - /* Clear i-cache for new pages */ - kvmppc_mmu_flush_icache(pfn); - - /* Drop refcount on page, so that mmu notifiers can clear it */ - kvm_release_pfn_clean(pfn); - - return 0; -} - -/* XXX only map the one-one case, for now use TLB0 */ -static int kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500, - int esel, - struct kvm_book3e_206_tlb_entry *stlbe) -{ - struct kvm_book3e_206_tlb_entry *gtlbe; - struct tlbe_ref *ref; - int stlbsel = 0; - int sesel = 0; - int r; - - gtlbe = get_entry(vcpu_e500, 0, esel); - ref = &vcpu_e500->gtlb_priv[0][esel].ref; - - r = kvmppc_e500_shadow_map(vcpu_e500, get_tlb_eaddr(gtlbe), - get_tlb_raddr(gtlbe) >> PAGE_SHIFT, - gtlbe, 0, stlbe, ref); - if (r) - return r; - - write_stlbe(vcpu_e500, gtlbe, stlbe, stlbsel, sesel); - - return 0; -} - -/* Caller must ensure that the specified guest TLB entry is safe to insert into - * the shadow TLB. */ -/* XXX for both one-one and one-to-many , for now use TLB1 */ -static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500, - u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe, - struct kvm_book3e_206_tlb_entry *stlbe, int esel) -{ - struct tlbe_ref *ref; - unsigned int sesel; - int r; - int stlbsel = 1; - - sesel = vcpu_e500->host_tlb1_nv++; - - if (unlikely(vcpu_e500->host_tlb1_nv >= tlb1_max_shadow_size())) - vcpu_e500->host_tlb1_nv = 0; - - ref = &vcpu_e500->tlb_refs[1][sesel]; - r = kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, stlbe, - ref); - if (r) - return r; - - vcpu_e500->g2h_tlb1_map[esel] |= (u64)1 << sesel; - vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_BITMAP; - if (vcpu_e500->h2g_tlb1_rmap[sesel]) { - unsigned int idx = vcpu_e500->h2g_tlb1_rmap[sesel]; - vcpu_e500->g2h_tlb1_map[idx] &= ~(1ULL << sesel); - } - vcpu_e500->h2g_tlb1_rmap[sesel] = esel; - - write_stlbe(vcpu_e500, gtlbe, stlbe, stlbsel, sesel); - - return 0; -} - -static void kvmppc_recalc_tlb1map_range(struct kvmppc_vcpu_e500 *vcpu_e500) -{ - int size = vcpu_e500->gtlb_params[1].entries; - unsigned int offset; - gva_t eaddr; - int i; - - vcpu_e500->tlb1_min_eaddr = ~0UL; - vcpu_e500->tlb1_max_eaddr = 0; - offset = vcpu_e500->gtlb_offset[1]; - - for (i = 0; i < size; i++) { - struct kvm_book3e_206_tlb_entry *tlbe = - &vcpu_e500->gtlb_arch[offset + i]; - - if (!get_tlb_v(tlbe)) - continue; - - eaddr = get_tlb_eaddr(tlbe); - vcpu_e500->tlb1_min_eaddr = - min(vcpu_e500->tlb1_min_eaddr, eaddr); - - eaddr = get_tlb_end(tlbe); - vcpu_e500->tlb1_max_eaddr = - max(vcpu_e500->tlb1_max_eaddr, eaddr); - } -} - -static int kvmppc_need_recalc_tlb1map_range(struct kvmppc_vcpu_e500 *vcpu_e500, - struct kvm_book3e_206_tlb_entry *gtlbe) -{ - unsigned long start, end, size; - - size = get_tlb_bytes(gtlbe); - start = get_tlb_eaddr(gtlbe) & ~(size - 1); - end = start + size - 1; - - return vcpu_e500->tlb1_min_eaddr == start || - vcpu_e500->tlb1_max_eaddr == end; -} - -/* This function is supposed to be called for a adding a new valid tlb entry */ -static void kvmppc_set_tlb1map_range(struct kvm_vcpu *vcpu, - struct kvm_book3e_206_tlb_entry *gtlbe) -{ - unsigned long start, end, size; - struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - - if (!get_tlb_v(gtlbe)) - return; - - size = get_tlb_bytes(gtlbe); - start = get_tlb_eaddr(gtlbe) & ~(size - 1); - end = start + size - 1; - - vcpu_e500->tlb1_min_eaddr = min(vcpu_e500->tlb1_min_eaddr, start); - vcpu_e500->tlb1_max_eaddr = max(vcpu_e500->tlb1_max_eaddr, end); -} - -static inline int kvmppc_e500_gtlbe_invalidate( - struct kvmppc_vcpu_e500 *vcpu_e500, - int tlbsel, int esel) -{ - struct kvm_book3e_206_tlb_entry *gtlbe = - get_entry(vcpu_e500, tlbsel, esel); - - if (unlikely(get_tlb_iprot(gtlbe))) - return -1; - - if (tlbsel == 1 && kvmppc_need_recalc_tlb1map_range(vcpu_e500, gtlbe)) - kvmppc_recalc_tlb1map_range(vcpu_e500); - - gtlbe->mas1 = 0; - - return 0; -} - -int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500, ulong value) -{ - int esel; - - if (value & MMUCSR0_TLB0FI) - for (esel = 0; esel < vcpu_e500->gtlb_params[0].entries; esel++) - kvmppc_e500_gtlbe_invalidate(vcpu_e500, 0, esel); - if (value & MMUCSR0_TLB1FI) - for (esel = 0; esel < vcpu_e500->gtlb_params[1].entries; esel++) - kvmppc_e500_gtlbe_invalidate(vcpu_e500, 1, esel); - - /* Invalidate all vcpu id mappings */ - kvmppc_e500_tlbil_all(vcpu_e500); - - return EMULATE_DONE; -} - -int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, gva_t ea) -{ - struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - unsigned int ia; - int esel, tlbsel; - - ia = (ea >> 2) & 0x1; - - /* since we only have two TLBs, only lower bit is used. */ - tlbsel = (ea >> 3) & 0x1; - - if (ia) { - /* invalidate all entries */ - for (esel = 0; esel < vcpu_e500->gtlb_params[tlbsel].entries; - esel++) - kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel); - } else { - ea &= 0xfffff000; - esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, - get_cur_pid(vcpu), -1); - if (esel >= 0) - kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel); - } - - /* Invalidate all vcpu id mappings */ - kvmppc_e500_tlbil_all(vcpu_e500); - - return EMULATE_DONE; -} - -static void tlbilx_all(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, - int pid, int type) -{ - struct kvm_book3e_206_tlb_entry *tlbe; - int tid, esel; - - /* invalidate all entries */ - for (esel = 0; esel < vcpu_e500->gtlb_params[tlbsel].entries; esel++) { - tlbe = get_entry(vcpu_e500, tlbsel, esel); - tid = get_tlb_tid(tlbe); - if (type == 0 || tid == pid) { - inval_gtlbe_on_host(vcpu_e500, tlbsel, esel); - kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel); - } - } -} - -static void tlbilx_one(struct kvmppc_vcpu_e500 *vcpu_e500, int pid, - gva_t ea) -{ - int tlbsel, esel; - - for (tlbsel = 0; tlbsel < 2; tlbsel++) { - esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, -1); - if (esel >= 0) { - inval_gtlbe_on_host(vcpu_e500, tlbsel, esel); - kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel); - break; - } - } -} - -int kvmppc_e500_emul_tlbilx(struct kvm_vcpu *vcpu, int type, gva_t ea) -{ - struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - int pid = get_cur_spid(vcpu); - - if (type == 0 || type == 1) { - tlbilx_all(vcpu_e500, 0, pid, type); - tlbilx_all(vcpu_e500, 1, pid, type); - } else if (type == 3) { - tlbilx_one(vcpu_e500, pid, ea); - } - - return EMULATE_DONE; -} - -int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu) -{ - struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - int tlbsel, esel; - struct kvm_book3e_206_tlb_entry *gtlbe; - - tlbsel = get_tlb_tlbsel(vcpu); - esel = get_tlb_esel(vcpu, tlbsel); - - gtlbe = get_entry(vcpu_e500, tlbsel, esel); - vcpu->arch.shared->mas0 &= ~MAS0_NV(~0); - vcpu->arch.shared->mas0 |= MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); - vcpu->arch.shared->mas1 = gtlbe->mas1; - vcpu->arch.shared->mas2 = gtlbe->mas2; - vcpu->arch.shared->mas7_3 = gtlbe->mas7_3; - - return EMULATE_DONE; -} - -int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, gva_t ea) -{ - struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - int as = !!get_cur_sas(vcpu); - unsigned int pid = get_cur_spid(vcpu); - int esel, tlbsel; - struct kvm_book3e_206_tlb_entry *gtlbe = NULL; - - for (tlbsel = 0; tlbsel < 2; tlbsel++) { - esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as); - if (esel >= 0) { - gtlbe = get_entry(vcpu_e500, tlbsel, esel); - break; - } - } - - if (gtlbe) { - esel &= vcpu_e500->gtlb_params[tlbsel].ways - 1; - - vcpu->arch.shared->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(esel) - | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); - vcpu->arch.shared->mas1 = gtlbe->mas1; - vcpu->arch.shared->mas2 = gtlbe->mas2; - vcpu->arch.shared->mas7_3 = gtlbe->mas7_3; - } else { - int victim; - - /* since we only have two TLBs, only lower bit is used. */ - tlbsel = vcpu->arch.shared->mas4 >> 28 & 0x1; - victim = (tlbsel == 0) ? gtlb0_get_next_victim(vcpu_e500) : 0; - - vcpu->arch.shared->mas0 = MAS0_TLBSEL(tlbsel) - | MAS0_ESEL(victim) - | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); - vcpu->arch.shared->mas1 = - (vcpu->arch.shared->mas6 & MAS6_SPID0) - | (vcpu->arch.shared->mas6 & (MAS6_SAS ? MAS1_TS : 0)) - | (vcpu->arch.shared->mas4 & MAS4_TSIZED(~0)); - vcpu->arch.shared->mas2 &= MAS2_EPN; - vcpu->arch.shared->mas2 |= vcpu->arch.shared->mas4 & - MAS2_ATTRIB_MASK; - vcpu->arch.shared->mas7_3 &= MAS3_U0 | MAS3_U1 | - MAS3_U2 | MAS3_U3; - } - - kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS); - return EMULATE_DONE; -} - -int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) -{ - struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - struct kvm_book3e_206_tlb_entry *gtlbe; - int tlbsel, esel; - int recal = 0; - - tlbsel = get_tlb_tlbsel(vcpu); - esel = get_tlb_esel(vcpu, tlbsel); - - gtlbe = get_entry(vcpu_e500, tlbsel, esel); - - if (get_tlb_v(gtlbe)) { - inval_gtlbe_on_host(vcpu_e500, tlbsel, esel); - if ((tlbsel == 1) && - kvmppc_need_recalc_tlb1map_range(vcpu_e500, gtlbe)) - recal = 1; - } - - gtlbe->mas1 = vcpu->arch.shared->mas1; - gtlbe->mas2 = vcpu->arch.shared->mas2; - if (!(vcpu->arch.shared->msr & MSR_CM)) - gtlbe->mas2 &= 0xffffffffUL; - gtlbe->mas7_3 = vcpu->arch.shared->mas7_3; - - trace_kvm_booke206_gtlb_write(vcpu->arch.shared->mas0, gtlbe->mas1, - gtlbe->mas2, gtlbe->mas7_3); - - if (tlbsel == 1) { - /* - * If a valid tlb1 entry is overwritten then recalculate the - * min/max TLB1 map address range otherwise no need to look - * in tlb1 array. - */ - if (recal) - kvmppc_recalc_tlb1map_range(vcpu_e500); - else - kvmppc_set_tlb1map_range(vcpu, gtlbe); - } - - /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */ - if (tlbe_is_host_safe(vcpu, gtlbe)) { - u64 eaddr = get_tlb_eaddr(gtlbe); - u64 raddr = get_tlb_raddr(gtlbe); - - if (tlbsel == 0) { - gtlbe->mas1 &= ~MAS1_TSIZE(~0); - gtlbe->mas1 |= MAS1_TSIZE(BOOK3E_PAGESZ_4K); - } - - /* Premap the faulting page */ - kvmppc_mmu_map(vcpu, eaddr, raddr, index_of(tlbsel, esel)); - } - - kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS); - return EMULATE_DONE; -} - -static int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu, - gva_t eaddr, unsigned int pid, int as) -{ - struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - int esel, tlbsel; - - for (tlbsel = 0; tlbsel < 2; tlbsel++) { - esel = kvmppc_e500_tlb_index(vcpu_e500, eaddr, tlbsel, pid, as); - if (esel >= 0) - return index_of(tlbsel, esel); - } - - return -1; -} - -/* 'linear_address' is actually an encoding of AS|PID|EADDR . */ -int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, - struct kvm_translation *tr) -{ - int index; - gva_t eaddr; - u8 pid; - u8 as; - - eaddr = tr->linear_address; - pid = (tr->linear_address >> 32) & 0xff; - as = (tr->linear_address >> 40) & 0x1; - - index = kvmppc_e500_tlb_search(vcpu, eaddr, pid, as); - if (index < 0) { - tr->valid = 0; - return 0; - } - - tr->physical_address = kvmppc_mmu_xlate(vcpu, index, eaddr); - /* XXX what does "writeable" and "usermode" even mean? */ - tr->valid = 1; - - return 0; -} - - -int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) -{ - unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS); - - return kvmppc_e500_tlb_search(vcpu, eaddr, get_cur_pid(vcpu), as); -} - -int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) -{ - unsigned int as = !!(vcpu->arch.shared->msr & MSR_DS); - - return kvmppc_e500_tlb_search(vcpu, eaddr, get_cur_pid(vcpu), as); -} - -void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu) -{ - unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS); - - kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.pc, as); -} - -void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu) -{ - unsigned int as = !!(vcpu->arch.shared->msr & MSR_DS); - - kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.fault_dear, as); -} - -gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int index, - gva_t eaddr) -{ - struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - struct kvm_book3e_206_tlb_entry *gtlbe; - u64 pgmask; - - gtlbe = get_entry(vcpu_e500, tlbsel_of(index), esel_of(index)); - pgmask = get_tlb_bytes(gtlbe) - 1; - - return get_tlb_raddr(gtlbe) | (eaddr & pgmask); -} - -void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) -{ -} - -void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, - unsigned int index) -{ - struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - struct tlbe_priv *priv; - struct kvm_book3e_206_tlb_entry *gtlbe, stlbe; - int tlbsel = tlbsel_of(index); - int esel = esel_of(index); - - gtlbe = get_entry(vcpu_e500, tlbsel, esel); - - switch (tlbsel) { - case 0: - priv = &vcpu_e500->gtlb_priv[tlbsel][esel]; - - /* Triggers after clear_tlb_refs or on initial mapping */ - if (!(priv->ref.flags & E500_TLB_VALID)) { - kvmppc_e500_tlb0_map(vcpu_e500, esel, &stlbe); - } else { - kvmppc_e500_setup_stlbe(vcpu, gtlbe, BOOK3E_PAGESZ_4K, - &priv->ref, eaddr, &stlbe); - write_stlbe(vcpu_e500, gtlbe, &stlbe, 0, 0); - } - break; - - case 1: { - gfn_t gfn = gpaddr >> PAGE_SHIFT; - kvmppc_e500_tlb1_map(vcpu_e500, eaddr, gfn, gtlbe, &stlbe, - esel); - break; - } - - default: - BUG(); - break; - } -} - -/************* MMU Notifiers *************/ - -int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) -{ - trace_kvm_unmap_hva(hva); - - /* - * Flush all shadow tlb entries everywhere. This is slow, but - * we are 100% sure that we catch the to be unmapped page - */ - kvm_flush_remote_tlbs(kvm); - - return 0; -} - -int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) -{ - /* kvm_unmap_hva flushes everything anyways */ - kvm_unmap_hva(kvm, start); - - return 0; -} - -int kvm_age_hva(struct kvm *kvm, unsigned long hva) -{ - /* XXX could be more clever ;) */ - return 0; -} - -int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) -{ - /* XXX could be more clever ;) */ - return 0; -} - -void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) -{ - /* The page will get remapped properly on its next fault */ - kvm_unmap_hva(kvm, hva); -} - -/*****************************************/ - -static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500) -{ - int i; - - clear_tlb1_bitmap(vcpu_e500); - kfree(vcpu_e500->g2h_tlb1_map); - - clear_tlb_refs(vcpu_e500); - kfree(vcpu_e500->gtlb_priv[0]); - kfree(vcpu_e500->gtlb_priv[1]); - - if (vcpu_e500->shared_tlb_pages) { - vfree((void *)(round_down((uintptr_t)vcpu_e500->gtlb_arch, - PAGE_SIZE))); - - for (i = 0; i < vcpu_e500->num_shared_tlb_pages; i++) { - set_page_dirty_lock(vcpu_e500->shared_tlb_pages[i]); - put_page(vcpu_e500->shared_tlb_pages[i]); - } - - vcpu_e500->num_shared_tlb_pages = 0; - - kfree(vcpu_e500->shared_tlb_pages); - vcpu_e500->shared_tlb_pages = NULL; - } else { - kfree(vcpu_e500->gtlb_arch); - } - - vcpu_e500->gtlb_arch = NULL; -} - -void kvmppc_get_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) -{ - sregs->u.e.mas0 = vcpu->arch.shared->mas0; - sregs->u.e.mas1 = vcpu->arch.shared->mas1; - sregs->u.e.mas2 = vcpu->arch.shared->mas2; - sregs->u.e.mas7_3 = vcpu->arch.shared->mas7_3; - sregs->u.e.mas4 = vcpu->arch.shared->mas4; - sregs->u.e.mas6 = vcpu->arch.shared->mas6; - - sregs->u.e.mmucfg = vcpu->arch.mmucfg; - sregs->u.e.tlbcfg[0] = vcpu->arch.tlbcfg[0]; - sregs->u.e.tlbcfg[1] = vcpu->arch.tlbcfg[1]; - sregs->u.e.tlbcfg[2] = 0; - sregs->u.e.tlbcfg[3] = 0; -} - -int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) -{ - if (sregs->u.e.features & KVM_SREGS_E_ARCH206_MMU) { - vcpu->arch.shared->mas0 = sregs->u.e.mas0; - vcpu->arch.shared->mas1 = sregs->u.e.mas1; - vcpu->arch.shared->mas2 = sregs->u.e.mas2; - vcpu->arch.shared->mas7_3 = sregs->u.e.mas7_3; - vcpu->arch.shared->mas4 = sregs->u.e.mas4; - vcpu->arch.shared->mas6 = sregs->u.e.mas6; - } - - return 0; -} - -int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, - struct kvm_config_tlb *cfg) -{ - struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - struct kvm_book3e_206_tlb_params params; - char *virt; - struct page **pages; - struct tlbe_priv *privs[2] = {}; - u64 *g2h_bitmap = NULL; - size_t array_len; - u32 sets; - int num_pages, ret, i; - - if (cfg->mmu_type != KVM_MMU_FSL_BOOKE_NOHV) - return -EINVAL; - - if (copy_from_user(¶ms, (void __user *)(uintptr_t)cfg->params, - sizeof(params))) - return -EFAULT; - - if (params.tlb_sizes[1] > 64) - return -EINVAL; - if (params.tlb_ways[1] != params.tlb_sizes[1]) - return -EINVAL; - if (params.tlb_sizes[2] != 0 || params.tlb_sizes[3] != 0) - return -EINVAL; - if (params.tlb_ways[2] != 0 || params.tlb_ways[3] != 0) - return -EINVAL; - - if (!is_power_of_2(params.tlb_ways[0])) - return -EINVAL; - - sets = params.tlb_sizes[0] >> ilog2(params.tlb_ways[0]); - if (!is_power_of_2(sets)) - return -EINVAL; - - array_len = params.tlb_sizes[0] + params.tlb_sizes[1]; - array_len *= sizeof(struct kvm_book3e_206_tlb_entry); - - if (cfg->array_len < array_len) - return -EINVAL; - - num_pages = DIV_ROUND_UP(cfg->array + array_len - 1, PAGE_SIZE) - - cfg->array / PAGE_SIZE; - pages = kmalloc(sizeof(struct page *) * num_pages, GFP_KERNEL); - if (!pages) - return -ENOMEM; - - ret = get_user_pages_fast(cfg->array, num_pages, 1, pages); - if (ret < 0) - goto err_pages; - - if (ret != num_pages) { - num_pages = ret; - ret = -EFAULT; - goto err_put_page; - } - - virt = vmap(pages, num_pages, VM_MAP, PAGE_KERNEL); - if (!virt) { - ret = -ENOMEM; - goto err_put_page; - } - - privs[0] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[0], - GFP_KERNEL); - privs[1] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[1], - GFP_KERNEL); - - if (!privs[0] || !privs[1]) { - ret = -ENOMEM; - goto err_privs; - } - - g2h_bitmap = kzalloc(sizeof(u64) * params.tlb_sizes[1], - GFP_KERNEL); - if (!g2h_bitmap) { - ret = -ENOMEM; - goto err_privs; - } - - free_gtlb(vcpu_e500); - - vcpu_e500->gtlb_priv[0] = privs[0]; - vcpu_e500->gtlb_priv[1] = privs[1]; - vcpu_e500->g2h_tlb1_map = g2h_bitmap; - - vcpu_e500->gtlb_arch = (struct kvm_book3e_206_tlb_entry *) - (virt + (cfg->array & (PAGE_SIZE - 1))); - - vcpu_e500->gtlb_params[0].entries = params.tlb_sizes[0]; - vcpu_e500->gtlb_params[1].entries = params.tlb_sizes[1]; - - vcpu_e500->gtlb_offset[0] = 0; - vcpu_e500->gtlb_offset[1] = params.tlb_sizes[0]; - - vcpu->arch.mmucfg = mfspr(SPRN_MMUCFG) & ~MMUCFG_LPIDSIZE; - - vcpu->arch.tlbcfg[0] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); - if (params.tlb_sizes[0] <= 2048) - vcpu->arch.tlbcfg[0] |= params.tlb_sizes[0]; - vcpu->arch.tlbcfg[0] |= params.tlb_ways[0] << TLBnCFG_ASSOC_SHIFT; - - vcpu->arch.tlbcfg[1] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); - vcpu->arch.tlbcfg[1] |= params.tlb_sizes[1]; - vcpu->arch.tlbcfg[1] |= params.tlb_ways[1] << TLBnCFG_ASSOC_SHIFT; - - vcpu_e500->shared_tlb_pages = pages; - vcpu_e500->num_shared_tlb_pages = num_pages; - - vcpu_e500->gtlb_params[0].ways = params.tlb_ways[0]; - vcpu_e500->gtlb_params[0].sets = sets; - - vcpu_e500->gtlb_params[1].ways = params.tlb_sizes[1]; - vcpu_e500->gtlb_params[1].sets = 1; - - kvmppc_recalc_tlb1map_range(vcpu_e500); - return 0; - -err_privs: - kfree(privs[0]); - kfree(privs[1]); - -err_put_page: - for (i = 0; i < num_pages; i++) - put_page(pages[i]); - -err_pages: - kfree(pages); - return ret; -} - -int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu, - struct kvm_dirty_tlb *dirty) -{ - struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - kvmppc_recalc_tlb1map_range(vcpu_e500); - clear_tlb_refs(vcpu_e500); - return 0; -} - -int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) -{ - struct kvm_vcpu *vcpu = &vcpu_e500->vcpu; - int entry_size = sizeof(struct kvm_book3e_206_tlb_entry); - int entries = KVM_E500_TLB0_SIZE + KVM_E500_TLB1_SIZE; - - host_tlb_params[0].entries = mfspr(SPRN_TLB0CFG) & TLBnCFG_N_ENTRY; - host_tlb_params[1].entries = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY; - - /* - * This should never happen on real e500 hardware, but is - * architecturally possible -- e.g. in some weird nested - * virtualization case. - */ - if (host_tlb_params[0].entries == 0 || - host_tlb_params[1].entries == 0) { - pr_err("%s: need to know host tlb size\n", __func__); - return -ENODEV; - } - - host_tlb_params[0].ways = (mfspr(SPRN_TLB0CFG) & TLBnCFG_ASSOC) >> - TLBnCFG_ASSOC_SHIFT; - host_tlb_params[1].ways = host_tlb_params[1].entries; - - if (!is_power_of_2(host_tlb_params[0].entries) || - !is_power_of_2(host_tlb_params[0].ways) || - host_tlb_params[0].entries < host_tlb_params[0].ways || - host_tlb_params[0].ways == 0) { - pr_err("%s: bad tlb0 host config: %u entries %u ways\n", - __func__, host_tlb_params[0].entries, - host_tlb_params[0].ways); - return -ENODEV; - } - - host_tlb_params[0].sets = - host_tlb_params[0].entries / host_tlb_params[0].ways; - host_tlb_params[1].sets = 1; - - vcpu_e500->gtlb_params[0].entries = KVM_E500_TLB0_SIZE; - vcpu_e500->gtlb_params[1].entries = KVM_E500_TLB1_SIZE; - - vcpu_e500->gtlb_params[0].ways = KVM_E500_TLB0_WAY_NUM; - vcpu_e500->gtlb_params[0].sets = - KVM_E500_TLB0_SIZE / KVM_E500_TLB0_WAY_NUM; - - vcpu_e500->gtlb_params[1].ways = KVM_E500_TLB1_SIZE; - vcpu_e500->gtlb_params[1].sets = 1; - - vcpu_e500->gtlb_arch = kmalloc(entries * entry_size, GFP_KERNEL); - if (!vcpu_e500->gtlb_arch) - return -ENOMEM; - - vcpu_e500->gtlb_offset[0] = 0; - vcpu_e500->gtlb_offset[1] = KVM_E500_TLB0_SIZE; - - vcpu_e500->tlb_refs[0] = - kzalloc(sizeof(struct tlbe_ref) * host_tlb_params[0].entries, - GFP_KERNEL); - if (!vcpu_e500->tlb_refs[0]) - goto err; - - vcpu_e500->tlb_refs[1] = - kzalloc(sizeof(struct tlbe_ref) * host_tlb_params[1].entries, - GFP_KERNEL); - if (!vcpu_e500->tlb_refs[1]) - goto err; - - vcpu_e500->gtlb_priv[0] = kzalloc(sizeof(struct tlbe_ref) * - vcpu_e500->gtlb_params[0].entries, - GFP_KERNEL); - if (!vcpu_e500->gtlb_priv[0]) - goto err; - - vcpu_e500->gtlb_priv[1] = kzalloc(sizeof(struct tlbe_ref) * - vcpu_e500->gtlb_params[1].entries, - GFP_KERNEL); - if (!vcpu_e500->gtlb_priv[1]) - goto err; - - vcpu_e500->g2h_tlb1_map = kzalloc(sizeof(u64) * - vcpu_e500->gtlb_params[1].entries, - GFP_KERNEL); - if (!vcpu_e500->g2h_tlb1_map) - goto err; - - vcpu_e500->h2g_tlb1_rmap = kzalloc(sizeof(unsigned int) * - host_tlb_params[1].entries, - GFP_KERNEL); - if (!vcpu_e500->h2g_tlb1_rmap) - goto err; - - /* Init TLB configuration register */ - vcpu->arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) & - ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); - vcpu->arch.tlbcfg[0] |= vcpu_e500->gtlb_params[0].entries; - vcpu->arch.tlbcfg[0] |= - vcpu_e500->gtlb_params[0].ways << TLBnCFG_ASSOC_SHIFT; - - vcpu->arch.tlbcfg[1] = mfspr(SPRN_TLB1CFG) & - ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); - vcpu->arch.tlbcfg[1] |= vcpu_e500->gtlb_params[1].entries; - vcpu->arch.tlbcfg[1] |= - vcpu_e500->gtlb_params[1].ways << TLBnCFG_ASSOC_SHIFT; - - kvmppc_recalc_tlb1map_range(vcpu_e500); - return 0; - -err: - free_gtlb(vcpu_e500); - kfree(vcpu_e500->tlb_refs[0]); - kfree(vcpu_e500->tlb_refs[1]); - return -1; -} - -void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500) -{ - free_gtlb(vcpu_e500); - kfree(vcpu_e500->h2g_tlb1_rmap); - kfree(vcpu_e500->tlb_refs[0]); - kfree(vcpu_e500->tlb_refs[1]); -} -- cgit v1.2.3 From c015c62b13498629809185eb0ff04e3f13d1afb6 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 17 Jan 2013 17:54:36 +0100 Subject: KVM: PPC: e500: Implement TLB1-in-TLB0 mapping When a host mapping fault happens in a guest TLB1 entry today, we map the translated guest entry into the host's TLB1. This isn't particularly clever when the guest is mapped by normal 4k pages, since these would be a lot better to put into TLB0 instead. This patch adds the required logic to map 4k TLB1 shadow maps into the host's TLB0. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/e500.h | 1 + arch/powerpc/kvm/e500_mmu_host.c | 65 ++++++++++++++++++++++++++++------------ 2 files changed, 47 insertions(+), 19 deletions(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h index c70d37ed770a..41cefd43655f 100644 --- a/arch/powerpc/kvm/e500.h +++ b/arch/powerpc/kvm/e500.h @@ -28,6 +28,7 @@ #define E500_TLB_VALID 1 #define E500_TLB_BITMAP 2 +#define E500_TLB_TLB0 (1 << 2) struct tlbe_ref { pfn_t pfn; diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 4c32d6510133..9a150bced298 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -216,10 +216,21 @@ void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, vcpu_e500->g2h_tlb1_map[esel] = 0; ref->flags &= ~(E500_TLB_BITMAP | E500_TLB_VALID); local_irq_restore(flags); + } - return; + if (tlbsel == 1 && ref->flags & E500_TLB_TLB0) { + /* + * TLB1 entry is backed by 4k pages. This should happen + * rarely and is not worth optimizing. Invalidate everything. + */ + kvmppc_e500_tlbil_all(vcpu_e500); + ref->flags &= ~(E500_TLB_TLB0 | E500_TLB_VALID); } + /* Already invalidated in between */ + if (!(ref->flags & E500_TLB_VALID)) + return; + /* Guest tlbe is backed by at most one host tlbe per shadow pid. */ kvmppc_e500_tlbil_one(vcpu_e500, gtlbe); @@ -487,38 +498,54 @@ static int kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500, int esel, return 0; } +static int kvmppc_e500_tlb1_map_tlb1(struct kvmppc_vcpu_e500 *vcpu_e500, + struct tlbe_ref *ref, + int esel) +{ + unsigned int sesel = vcpu_e500->host_tlb1_nv++; + + if (unlikely(vcpu_e500->host_tlb1_nv >= tlb1_max_shadow_size())) + vcpu_e500->host_tlb1_nv = 0; + + vcpu_e500->tlb_refs[1][sesel] = *ref; + vcpu_e500->g2h_tlb1_map[esel] |= (u64)1 << sesel; + vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_BITMAP; + if (vcpu_e500->h2g_tlb1_rmap[sesel]) { + unsigned int idx = vcpu_e500->h2g_tlb1_rmap[sesel]; + vcpu_e500->g2h_tlb1_map[idx] &= ~(1ULL << sesel); + } + vcpu_e500->h2g_tlb1_rmap[sesel] = esel; + + return sesel; +} + /* Caller must ensure that the specified guest TLB entry is safe to insert into * the shadow TLB. */ -/* XXX for both one-one and one-to-many , for now use TLB1 */ +/* For both one-one and one-to-many */ static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500, u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe, struct kvm_book3e_206_tlb_entry *stlbe, int esel) { - struct tlbe_ref *ref; - unsigned int sesel; + struct tlbe_ref ref; + int sesel; int r; - int stlbsel = 1; - - sesel = vcpu_e500->host_tlb1_nv++; - - if (unlikely(vcpu_e500->host_tlb1_nv >= tlb1_max_shadow_size())) - vcpu_e500->host_tlb1_nv = 0; - ref = &vcpu_e500->tlb_refs[1][sesel]; + ref.flags = 0; r = kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, stlbe, - ref); + &ref); if (r) return r; - vcpu_e500->g2h_tlb1_map[esel] |= (u64)1 << sesel; - vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_BITMAP; - if (vcpu_e500->h2g_tlb1_rmap[sesel]) { - unsigned int idx = vcpu_e500->h2g_tlb1_rmap[sesel]; - vcpu_e500->g2h_tlb1_map[idx] &= ~(1ULL << sesel); + /* Use TLB0 when we can only map a page with 4k */ + if (get_tlb_tsize(stlbe) == BOOK3E_PAGESZ_4K) { + vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_TLB0; + write_stlbe(vcpu_e500, gtlbe, stlbe, 0, 0); + return 0; } - vcpu_e500->h2g_tlb1_rmap[sesel] = esel; - write_stlbe(vcpu_e500, gtlbe, stlbe, stlbsel, sesel); + /* Otherwise map into TLB1 */ + sesel = kvmppc_e500_tlb1_map_tlb1(vcpu_e500, &ref, esel); + write_stlbe(vcpu_e500, gtlbe, stlbe, 1, sesel); return 0; } -- cgit v1.2.3 From 483ba97c0fafaade3a0ee61217ca5123b0d85c03 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 18 Jan 2013 15:13:19 +0100 Subject: KVM: PPC: E500: Make clear_tlb_refs and clear_tlb1_bitmap static Host shadow TLB flushing is logic that the guest TLB code should have no insight about. Declare the internal clear_tlb_refs and clear_tlb1_bitmap functions static to the host TLB handling file. Instead of these, we can use the already exported kvmppc_core_flush_tlb(). This gives us a common API across the board to say "please flush any pending host shadow translation". Signed-off-by: Alexander Graf --- arch/powerpc/kvm/e500_mmu.c | 6 ++---- arch/powerpc/kvm/e500_mmu_host.c | 4 ++-- arch/powerpc/kvm/e500_mmu_host.h | 2 -- 3 files changed, 4 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c index c3d1721aa1b8..623a192c2658 100644 --- a/arch/powerpc/kvm/e500_mmu.c +++ b/arch/powerpc/kvm/e500_mmu.c @@ -541,10 +541,8 @@ static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500) { int i; - clear_tlb1_bitmap(vcpu_e500); + kvmppc_core_flush_tlb(&vcpu_e500->vcpu); kfree(vcpu_e500->g2h_tlb1_map); - - clear_tlb_refs(vcpu_e500); kfree(vcpu_e500->gtlb_priv[0]); kfree(vcpu_e500->gtlb_priv[1]); @@ -735,7 +733,7 @@ int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu, { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); kvmppc_recalc_tlb1map_range(vcpu_e500); - clear_tlb_refs(vcpu_e500); + kvmppc_core_flush_tlb(vcpu); return 0; } diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 9a150bced298..a222edfb9a9b 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -262,7 +262,7 @@ static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref) } } -void clear_tlb1_bitmap(struct kvmppc_vcpu_e500 *vcpu_e500) +static void clear_tlb1_bitmap(struct kvmppc_vcpu_e500 *vcpu_e500) { if (vcpu_e500->g2h_tlb1_map) memset(vcpu_e500->g2h_tlb1_map, 0, @@ -284,7 +284,7 @@ static void clear_tlb_privs(struct kvmppc_vcpu_e500 *vcpu_e500) } } -void clear_tlb_refs(struct kvmppc_vcpu_e500 *vcpu_e500) +static void clear_tlb_refs(struct kvmppc_vcpu_e500 *vcpu_e500) { int stlbsel = 1; int i; diff --git a/arch/powerpc/kvm/e500_mmu_host.h b/arch/powerpc/kvm/e500_mmu_host.h index 9e4d4a20e694..7624835b76c7 100644 --- a/arch/powerpc/kvm/e500_mmu_host.h +++ b/arch/powerpc/kvm/e500_mmu_host.h @@ -12,8 +12,6 @@ void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, int esel); -void clear_tlb1_bitmap(struct kvmppc_vcpu_e500 *vcpu_e500); -void clear_tlb_refs(struct kvmppc_vcpu_e500 *vcpu_e500); int e500_mmu_host_init(struct kvmppc_vcpu_e500 *vcpu_e500); void e500_mmu_host_uninit(struct kvmppc_vcpu_e500 *vcpu_e500); -- cgit v1.2.3 From b9e3e208935e95ad62bd1b1bc4408c23a9ae3ada Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 18 Jan 2013 15:22:08 +0100 Subject: KVM: PPC: E500: Remove kvmppc_e500_tlbil_all usage from guest TLB code The guest TLB handling code should not have any insight into how the host TLB shadow code works. kvmppc_e500_tlbil_all() is a function that is used for distinction between e500v2 and e500mc (E.HV) on how to flush shadow entries. This function really is private between the e500.c/e500mc.c file and e500_mmu_host.c. Instead of this one, use the public kvmppc_core_flush_tlb() function to flush all shadow TLB entries. As a nice side effect, with this we also end up flushing TLB1 entries which we forgot to do before. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/e500_mmu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c index 623a192c2658..5c4475983f78 100644 --- a/arch/powerpc/kvm/e500_mmu.c +++ b/arch/powerpc/kvm/e500_mmu.c @@ -239,8 +239,8 @@ int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500, ulong value) for (esel = 0; esel < vcpu_e500->gtlb_params[1].entries; esel++) kvmppc_e500_gtlbe_invalidate(vcpu_e500, 1, esel); - /* Invalidate all vcpu id mappings */ - kvmppc_e500_tlbil_all(vcpu_e500); + /* Invalidate all host shadow mappings */ + kvmppc_core_flush_tlb(&vcpu_e500->vcpu); return EMULATE_DONE; } @@ -269,8 +269,8 @@ int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, gva_t ea) kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel); } - /* Invalidate all vcpu id mappings */ - kvmppc_e500_tlbil_all(vcpu_e500); + /* Invalidate all host shadow mappings */ + kvmppc_core_flush_tlb(&vcpu_e500->vcpu); return EMULATE_DONE; } -- cgit v1.2.3 From ffe129ecd79779221fdb03305049ec8b5a8beb0f Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Tue, 15 Jan 2013 22:20:42 +0000 Subject: KVM: PPC: booke: use vcpu reference from thread_struct Like other places, use thread_struct to get vcpu reference. Signed-off-by: Bharat Bhushan Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/reg.h | 2 -- arch/powerpc/kernel/asm-offsets.c | 2 +- arch/powerpc/kvm/booke_interrupts.S | 6 ++---- 3 files changed, 3 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 97d37278ea2d..11ae3d8ba3a2 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -919,8 +919,6 @@ #define SPRN_SPRG_RSCRATCH_DBG SPRN_SPRG9 #define SPRN_SPRG_WSCRATCH_DBG SPRN_SPRG9 #endif -#define SPRN_SPRG_RVCPU SPRN_SPRG1 -#define SPRN_SPRG_WVCPU SPRN_SPRG1 #endif #ifdef CONFIG_8xx diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 4e23ba2f3ca7..46f6afd2172a 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -117,7 +117,7 @@ int main(void) #ifdef CONFIG_KVM_BOOK3S_32_HANDLER DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu)); #endif -#ifdef CONFIG_KVM_BOOKE_HV +#if defined(CONFIG_KVM) && defined(CONFIG_BOOKE) DEFINE(THREAD_KVM_VCPU, offsetof(struct thread_struct, kvm_vcpu)); #endif diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index bb46b32f9813..ca16d57f7686 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -56,7 +56,8 @@ _GLOBAL(kvmppc_handler_\ivor_nr) /* Get pointer to vcpu and record exit number. */ mtspr \scratch , r4 - mfspr r4, SPRN_SPRG_RVCPU + mfspr r4, SPRN_SPRG_THREAD + lwz r4, THREAD_KVM_VCPU(r4) stw r3, VCPU_GPR(R3)(r4) stw r5, VCPU_GPR(R5)(r4) stw r6, VCPU_GPR(R6)(r4) @@ -402,9 +403,6 @@ lightweight_exit: lwz r8, kvmppc_booke_handlers@l(r8) mtspr SPRN_IVPR, r8 - /* Save vcpu pointer for the exception handlers. */ - mtspr SPRN_SPRG_WVCPU, r4 - lwz r5, VCPU_SHARED(r4) /* Can't switch the stack pointer until after IVPR is switched, -- cgit v1.2.3 From 1d542d9c2bbca9b99835fef6a938b9ae9dd7ca2a Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Tue, 15 Jan 2013 22:24:39 +0000 Subject: KVM: PPC: booke: Allow multiple exception types Current kvmppc_booke_handlers uses the same macro (KVM_HANDLER) and all handlers are considered to be the same size. This will not be the case if we want to use different macros for different handlers. This patch improves the kvmppc_booke_handler so that it can support different macros for different handlers. Signed-off-by: Liu Yu [bharat.bhushan@freescale.com: Substantial changes] Signed-off-by: Bharat Bhushan Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_ppc.h | 2 -- arch/powerpc/kvm/booke.c | 14 +++++++++----- arch/powerpc/kvm/booke.h | 1 + arch/powerpc/kvm/booke_interrupts.S | 37 ++++++++++++++++++++++++++++++++++--- arch/powerpc/kvm/e500.c | 16 ++++++++++------ 5 files changed, 54 insertions(+), 16 deletions(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 493630e209c8..44a657adf416 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -49,8 +49,6 @@ enum emulation_result { extern int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); -extern char kvmppc_handlers_start[]; -extern unsigned long kvmppc_handler_len; extern void kvmppc_handler_highmem(void); extern void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 8779cd4c52d9..d2f502d209ff 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1594,7 +1594,9 @@ int __init kvmppc_booke_init(void) { #ifndef CONFIG_KVM_BOOKE_HV unsigned long ivor[16]; + unsigned long *handler = kvmppc_booke_handler_addr; unsigned long max_ivor = 0; + unsigned long handler_len; int i; /* We install our own exception handlers by hijacking IVPR. IVPR must @@ -1627,14 +1629,16 @@ int __init kvmppc_booke_init(void) for (i = 0; i < 16; i++) { if (ivor[i] > max_ivor) - max_ivor = ivor[i]; + max_ivor = i; + handler_len = handler[i + 1] - handler[i]; memcpy((void *)kvmppc_booke_handlers + ivor[i], - kvmppc_handlers_start + i * kvmppc_handler_len, - kvmppc_handler_len); + (void *)handler[i], handler_len); } - flush_icache_range(kvmppc_booke_handlers, - kvmppc_booke_handlers + max_ivor + kvmppc_handler_len); + + handler_len = handler[max_ivor + 1] - handler[max_ivor]; + flush_icache_range(kvmppc_booke_handlers, kvmppc_booke_handlers + + ivor[max_ivor] + handler_len); #endif /* !BOOKE_HV */ return 0; } diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h index e9b88e433f64..5fd1ba693579 100644 --- a/arch/powerpc/kvm/booke.h +++ b/arch/powerpc/kvm/booke.h @@ -65,6 +65,7 @@ (1 << BOOKE_IRQPRIO_CRITICAL)) extern unsigned long kvmppc_booke_handlers; +extern unsigned long kvmppc_booke_handler_addr[]; void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr); void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr); diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index ca16d57f7686..eae848376440 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -74,6 +74,14 @@ _GLOBAL(kvmppc_handler_\ivor_nr) bctr .endm +.macro KVM_HANDLER_ADDR ivor_nr + .long kvmppc_handler_\ivor_nr +.endm + +.macro KVM_HANDLER_END + .long kvmppc_handlers_end +.endm + _GLOBAL(kvmppc_handlers_start) KVM_HANDLER BOOKE_INTERRUPT_CRITICAL SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0 KVM_HANDLER BOOKE_INTERRUPT_MACHINE_CHECK SPRN_SPRG_RSCRATCH_MC SPRN_MCSRR0 @@ -94,9 +102,7 @@ KVM_HANDLER BOOKE_INTERRUPT_DEBUG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0 KVM_HANDLER BOOKE_INTERRUPT_SPE_UNAVAIL SPRN_SPRG_RSCRATCH0 SPRN_SRR0 KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_DATA SPRN_SPRG_RSCRATCH0 SPRN_SRR0 KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_ROUND SPRN_SPRG_RSCRATCH0 SPRN_SRR0 - -_GLOBAL(kvmppc_handler_len) - .long kvmppc_handler_1 - kvmppc_handler_0 +_GLOBAL(kvmppc_handlers_end) /* Registers: * SPRG_SCRATCH0: guest r4 @@ -461,6 +467,31 @@ lightweight_exit: lwz r4, VCPU_GPR(R4)(r4) rfi + .data + .align 4 + .globl kvmppc_booke_handler_addr +kvmppc_booke_handler_addr: +KVM_HANDLER_ADDR BOOKE_INTERRUPT_CRITICAL +KVM_HANDLER_ADDR BOOKE_INTERRUPT_MACHINE_CHECK +KVM_HANDLER_ADDR BOOKE_INTERRUPT_DATA_STORAGE +KVM_HANDLER_ADDR BOOKE_INTERRUPT_INST_STORAGE +KVM_HANDLER_ADDR BOOKE_INTERRUPT_EXTERNAL +KVM_HANDLER_ADDR BOOKE_INTERRUPT_ALIGNMENT +KVM_HANDLER_ADDR BOOKE_INTERRUPT_PROGRAM +KVM_HANDLER_ADDR BOOKE_INTERRUPT_FP_UNAVAIL +KVM_HANDLER_ADDR BOOKE_INTERRUPT_SYSCALL +KVM_HANDLER_ADDR BOOKE_INTERRUPT_AP_UNAVAIL +KVM_HANDLER_ADDR BOOKE_INTERRUPT_DECREMENTER +KVM_HANDLER_ADDR BOOKE_INTERRUPT_FIT +KVM_HANDLER_ADDR BOOKE_INTERRUPT_WATCHDOG +KVM_HANDLER_ADDR BOOKE_INTERRUPT_DTLB_MISS +KVM_HANDLER_ADDR BOOKE_INTERRUPT_ITLB_MISS +KVM_HANDLER_ADDR BOOKE_INTERRUPT_DEBUG +KVM_HANDLER_ADDR BOOKE_INTERRUPT_SPE_UNAVAIL +KVM_HANDLER_ADDR BOOKE_INTERRUPT_SPE_FP_DATA +KVM_HANDLER_ADDR BOOKE_INTERRUPT_SPE_FP_ROUND +KVM_HANDLER_END /*Always keep this in end*/ + #ifdef CONFIG_SPE _GLOBAL(kvmppc_save_guest_spe) cmpi 0,r3,0 diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index b479ed77c515..6dd4de7802bf 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -491,6 +491,9 @@ static int __init kvmppc_e500_init(void) { int r, i; unsigned long ivor[3]; + /* Process remaining handlers above the generic first 16 */ + unsigned long *handler = &kvmppc_booke_handler_addr[16]; + unsigned long handler_len; unsigned long max_ivor = 0; r = kvmppc_core_check_processor_compat(); @@ -506,15 +509,16 @@ static int __init kvmppc_e500_init(void) ivor[1] = mfspr(SPRN_IVOR33); ivor[2] = mfspr(SPRN_IVOR34); for (i = 0; i < 3; i++) { - if (ivor[i] > max_ivor) - max_ivor = ivor[i]; + if (ivor[i] > ivor[max_ivor]) + max_ivor = i; + handler_len = handler[i + 1] - handler[i]; memcpy((void *)kvmppc_booke_handlers + ivor[i], - kvmppc_handlers_start + (i + 16) * kvmppc_handler_len, - kvmppc_handler_len); + (void *)handler[i], handler_len); } - flush_icache_range(kvmppc_booke_handlers, - kvmppc_booke_handlers + max_ivor + kvmppc_handler_len); + handler_len = handler[max_ivor + 1] - handler[max_ivor]; + flush_icache_range(kvmppc_booke_handlers, kvmppc_booke_handlers + + ivor[max_ivor] + handler_len); return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); } -- cgit v1.2.3 From 011da8996263f799a469a761ee15c998d7ef1acb Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 31 Jan 2013 14:17:38 +0100 Subject: KVM: PPC: BookE: Handle alignment interrupts When the guest triggers an alignment interrupt, we don't handle it properly today and instead BUG_ON(). This really shouldn't happen. Instead, we should just pass the interrupt back into the guest so it can deal with it. Reported-by: Gao Guanhua-B22826 Tested-by: Gao Guanhua-B22826 Signed-off-by: Alexander Graf --- arch/powerpc/kvm/booke.c | 16 +++++++++++++++- arch/powerpc/kvm/booke_interrupts.S | 6 ++++-- 2 files changed, 19 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index d2f502d209ff..020923e43134 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -182,6 +182,14 @@ static void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE); } +static void kvmppc_core_queue_alignment(struct kvm_vcpu *vcpu, ulong dear_flags, + ulong esr_flags) +{ + vcpu->arch.queued_dear = dear_flags; + vcpu->arch.queued_esr = esr_flags; + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ALIGNMENT); +} + void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong esr_flags) { vcpu->arch.queued_esr = esr_flags; @@ -345,6 +353,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, switch (priority) { case BOOKE_IRQPRIO_DTLB_MISS: case BOOKE_IRQPRIO_DATA_STORAGE: + case BOOKE_IRQPRIO_ALIGNMENT: update_dear = true; /* fall through */ case BOOKE_IRQPRIO_INST_STORAGE: @@ -358,7 +367,6 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, case BOOKE_IRQPRIO_SPE_FP_DATA: case BOOKE_IRQPRIO_SPE_FP_ROUND: case BOOKE_IRQPRIO_AP_UNAVAIL: - case BOOKE_IRQPRIO_ALIGNMENT: allowed = 1; msr_mask = MSR_CE | MSR_ME | MSR_DE; int_class = INT_CLASS_NONCRIT; @@ -971,6 +979,12 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, r = RESUME_GUEST; break; + case BOOKE_INTERRUPT_ALIGNMENT: + kvmppc_core_queue_alignment(vcpu, vcpu->arch.fault_dear, + vcpu->arch.fault_esr); + r = RESUME_GUEST; + break; + #ifdef CONFIG_KVM_BOOKE_HV case BOOKE_INTERRUPT_HV_SYSCALL: if (!(vcpu->arch.shared->msr & MSR_PR)) { diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index eae848376440..f4bb55c96517 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -45,12 +45,14 @@ (1<