diff options
Diffstat (limited to 'arch')
59 files changed, 2953 insertions, 1276 deletions
diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index aa9e785c59c2..7841b8a60657 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -134,7 +134,13 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip); void pnv_power9_force_smt4_catch(void); void pnv_power9_force_smt4_release(void); +/* Transaction memory related */ void tm_enable(void); void tm_disable(void); void tm_abort(uint8_t cause); + +struct kvm_vcpu; +void _kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu, u64 guest_msr); +void _kvmppc_save_tm_pr(struct kvm_vcpu *vcpu, u64 guest_msr); + #endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */ diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index e7377b73cfec..1f345a0b6ba2 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -104,6 +104,7 @@ struct kvmppc_vcore { ulong vtb; /* virtual timebase */ ulong conferring_threads; unsigned int halt_poll_ns; + atomic_t online_count; }; struct kvmppc_vcpu_book3s { @@ -209,6 +210,7 @@ extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec) extern void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec); extern void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags); +extern void kvmppc_trigger_fac_interrupt(struct kvm_vcpu *vcpu, ulong fac); extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, bool upper, u32 val); extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr); @@ -256,6 +258,21 @@ extern int kvmppc_hcall_impl_pr(unsigned long cmd); extern int kvmppc_hcall_impl_hv_realmode(unsigned long cmd); extern void kvmppc_copy_to_svcpu(struct kvm_vcpu *vcpu); extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu); + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +void kvmppc_save_tm_pr(struct kvm_vcpu *vcpu); +void kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu); +void kvmppc_save_tm_sprs(struct kvm_vcpu *vcpu); +void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu); +#else +static inline void kvmppc_save_tm_pr(struct kvm_vcpu *vcpu) {} +static inline void kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu) {} +static inline void kvmppc_save_tm_sprs(struct kvm_vcpu *vcpu) {} +static inline void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu) {} +#endif + +void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac); + extern int kvm_irq_bypass; static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu) @@ -274,12 +291,12 @@ static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu) static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) { - vcpu->arch.gpr[num] = val; + vcpu->arch.regs.gpr[num] = val; } static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num) { - return vcpu->arch.gpr[num]; + return vcpu->arch.regs.gpr[num]; } static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val) @@ -294,42 +311,42 @@ static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu) static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val) { - vcpu->arch.xer = val; + vcpu->arch.regs.xer = val; } static inline ulong kvmppc_get_xer(struct kvm_vcpu *vcpu) { - return vcpu->arch.xer; + return vcpu->arch.regs.xer; } static inline void kvmppc_set_ctr(struct kvm_vcpu *vcpu, ulong val) { - vcpu->arch.ctr = val; + vcpu->arch.regs.ctr = val; } static inline ulong kvmppc_get_ctr(struct kvm_vcpu *vcpu) { - return vcpu->arch.ctr; + return vcpu->arch.regs.ctr; } static inline void kvmppc_set_lr(struct kvm_vcpu *vcpu, ulong val) { - vcpu->arch.lr = val; + vcpu->arch.regs.link = val; } static inline ulong kvmppc_get_lr(struct kvm_vcpu *vcpu) { - return vcpu->arch.lr; + return vcpu->arch.regs.link; } static inline void kvmppc_set_pc(struct kvm_vcpu *vcpu, ulong val) { - vcpu->arch.pc = val; + vcpu->arch.regs.nip = val; } static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu) { - return vcpu->arch.pc; + return vcpu->arch.regs.nip; } static inline u64 kvmppc_get_msr(struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index c424e44f4c00..dc435a5af7d6 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -483,15 +483,15 @@ static inline u64 sanitize_msr(u64 msr) static inline void copy_from_checkpoint(struct kvm_vcpu *vcpu) { vcpu->arch.cr = vcpu->arch.cr_tm; - vcpu->arch.xer = vcpu->arch.xer_tm; - vcpu->arch.lr = vcpu->arch.lr_tm; - vcpu->arch.ctr = vcpu->arch.ctr_tm; + vcpu->arch.regs.xer = vcpu->arch.xer_tm; + vcpu->arch.regs.link = vcpu->arch.lr_tm; + vcpu->arch.regs.ctr = vcpu->arch.ctr_tm; vcpu->arch.amr = vcpu->arch.amr_tm; vcpu->arch.ppr = vcpu->arch.ppr_tm; vcpu->arch.dscr = vcpu->arch.dscr_tm; vcpu->arch.tar = vcpu->arch.tar_tm; - memcpy(vcpu->arch.gpr, vcpu->arch.gpr_tm, - sizeof(vcpu->arch.gpr)); + memcpy(vcpu->arch.regs.gpr, vcpu->arch.gpr_tm, + sizeof(vcpu->arch.regs.gpr)); vcpu->arch.fp = vcpu->arch.fp_tm; vcpu->arch.vr = vcpu->arch.vr_tm; vcpu->arch.vrsave = vcpu->arch.vrsave_tm; @@ -500,15 +500,15 @@ static inline void copy_from_checkpoint(struct kvm_vcpu *vcpu) static inline void copy_to_checkpoint(struct kvm_vcpu *vcpu) { vcpu->arch.cr_tm = vcpu->arch.cr; - vcpu->arch.xer_tm = vcpu->arch.xer; - vcpu->arch.lr_tm = vcpu->arch.lr; - vcpu->arch.ctr_tm = vcpu->arch.ctr; + vcpu->arch.xer_tm = vcpu->arch.regs.xer; + vcpu->arch.lr_tm = vcpu->arch.regs.link; + vcpu->arch.ctr_tm = vcpu->arch.regs.ctr; vcpu->arch.amr_tm = vcpu->arch.amr; vcpu->arch.ppr_tm = vcpu->arch.ppr; vcpu->arch.dscr_tm = vcpu->arch.dscr; vcpu->arch.tar_tm = vcpu->arch.tar; - memcpy(vcpu->arch.gpr_tm, vcpu->arch.gpr, - sizeof(vcpu->arch.gpr)); + memcpy(vcpu->arch.gpr_tm, vcpu->arch.regs.gpr, + sizeof(vcpu->arch.regs.gpr)); vcpu->arch.fp_tm = vcpu->arch.fp; vcpu->arch.vr_tm = vcpu->arch.vr; vcpu->arch.vrsave_tm = vcpu->arch.vrsave; diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h index bc6e29e4dfd4..d513e3ed1c65 100644 --- a/arch/powerpc/include/asm/kvm_booke.h +++ b/arch/powerpc/include/asm/kvm_booke.h @@ -36,12 +36,12 @@ static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) { - vcpu->arch.gpr[num] = val; + vcpu->arch.regs.gpr[num] = val; } static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num) { - return vcpu->arch.gpr[num]; + return vcpu->arch.regs.gpr[num]; } static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val) @@ -56,12 +56,12 @@ static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu) static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val) { - vcpu->arch.xer = val; + vcpu->arch.regs.xer = val; } static inline ulong kvmppc_get_xer(struct kvm_vcpu *vcpu) { - return vcpu->arch.xer; + return vcpu->arch.regs.xer; } static inline bool kvmppc_need_byteswap(struct kvm_vcpu *vcpu) @@ -72,32 +72,32 @@ static inline bool kvmppc_need_byteswap(struct kvm_vcpu *vcpu) static inline void kvmppc_set_ctr(struct kvm_vcpu *vcpu, ulong val) { - vcpu->arch.ctr = val; + vcpu->arch.regs.ctr = val; } static inline ulong kvmppc_get_ctr(struct kvm_vcpu *vcpu) { - return vcpu->arch.ctr; + return vcpu->arch.regs.ctr; } static inline void kvmppc_set_lr(struct kvm_vcpu *vcpu, ulong val) { - vcpu->arch.lr = val; + vcpu->arch.regs.link = val; } static inline ulong kvmppc_get_lr(struct kvm_vcpu *vcpu) { - return vcpu->arch.lr; + return vcpu->arch.regs.link; } static inline void kvmppc_set_pc(struct kvm_vcpu *vcpu, ulong val) { - vcpu->arch.pc = val; + vcpu->arch.regs.nip = val; } static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu) { - return vcpu->arch.pc; + return vcpu->arch.regs.nip; } static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu) diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 17498e9a26e4..fa4efa7e88f7 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -269,7 +269,6 @@ struct kvm_arch { unsigned long host_lpcr; unsigned long sdr1; unsigned long host_sdr1; - int tlbie_lock; unsigned long lpcr; unsigned long vrma_slb_v; int mmu_ready; @@ -454,6 +453,12 @@ struct mmio_hpte_cache { #define KVMPPC_VSX_COPY_WORD 1 #define KVMPPC_VSX_COPY_DWORD 2 #define KVMPPC_VSX_COPY_DWORD_LOAD_DUMP 3 +#define KVMPPC_VSX_COPY_WORD_LOAD_DUMP 4 + +#define KVMPPC_VMX_COPY_BYTE 8 +#define KVMPPC_VMX_COPY_HWORD 9 +#define KVMPPC_VMX_COPY_WORD 10 +#define KVMPPC_VMX_COPY_DWORD 11 struct openpic; @@ -486,7 +491,7 @@ struct kvm_vcpu_arch { struct kvmppc_book3s_shadow_vcpu *shadow_vcpu; #endif - ulong gpr[32]; + struct pt_regs regs; struct thread_fp_state fp; @@ -521,14 +526,10 @@ struct kvm_vcpu_arch { u32 qpr[32]; #endif - ulong pc; - ulong ctr; - ulong lr; #ifdef CONFIG_PPC_BOOK3S ulong tar; #endif - ulong xer; u32 cr; #ifdef CONFIG_PPC_BOOK3S @@ -626,7 +627,6 @@ struct kvm_vcpu_arch { struct thread_vr_state vr_tm; u32 vrsave_tm; /* also USPRG0 */ - #endif #ifdef CONFIG_KVM_EXIT_TIMING @@ -681,16 +681,17 @@ struct kvm_vcpu_arch { * Number of simulations for vsx. * If we use 2*8bytes to simulate 1*16bytes, * then the number should be 2 and - * mmio_vsx_copy_type=KVMPPC_VSX_COPY_DWORD. + * mmio_copy_type=KVMPPC_VSX_COPY_DWORD. * If we use 4*4bytes to simulate 1*16bytes, * the number should be 4 and * mmio_vsx_copy_type=KVMPPC_VSX_COPY_WORD. */ u8 mmio_vsx_copy_nums; u8 mmio_vsx_offset; - u8 mmio_vsx_copy_type; u8 mmio_vsx_tx_sx_enabled; u8 mmio_vmx_copy_nums; + u8 mmio_vmx_offset; + u8 mmio_copy_type; u8 osi_needed; u8 osi_enabled; u8 papr_enabled; @@ -772,6 +773,8 @@ struct kvm_vcpu_arch { u64 busy_preempt; u32 emul_inst; + + u32 online; #endif #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index abe7032cdb54..e991821dd7fa 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -52,7 +52,7 @@ enum emulation_result { EMULATE_EXIT_USER, /* emulation requires exit to user-space */ }; -enum instruction_type { +enum instruction_fetch_type { INST_GENERIC, INST_SC, /* system call */ }; @@ -81,10 +81,10 @@ extern int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu, extern int kvmppc_handle_vsx_load(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int rt, unsigned int bytes, int is_default_endian, int mmio_sign_extend); -extern int kvmppc_handle_load128_by2x64(struct kvm_run *run, - struct kvm_vcpu *vcpu, unsigned int rt, int is_default_endian); -extern int kvmppc_handle_store128_by2x64(struct kvm_run *run, - struct kvm_vcpu *vcpu, unsigned int rs, int is_default_endian); +extern int kvmppc_handle_vmx_load(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int rt, unsigned int bytes, int is_default_endian); +extern int kvmppc_handle_vmx_store(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int rs, unsigned int bytes, int is_default_endian); extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, u64 val, unsigned int bytes, int is_default_endian); @@ -93,7 +93,7 @@ extern int kvmppc_handle_vsx_store(struct kvm_run *run, struct kvm_vcpu *vcpu, int is_default_endian); extern int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, - enum instruction_type type, u32 *inst); + enum instruction_fetch_type type, u32 *inst); extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); @@ -265,6 +265,8 @@ union kvmppc_one_reg { vector128 vval; u64 vsxval[2]; u32 vsx32val[4]; + u16 vsx16val[8]; + u8 vsx8val[16]; struct { u64 addr; u64 length; @@ -324,13 +326,14 @@ struct kvmppc_ops { int (*get_rmmu_info)(struct kvm *kvm, struct kvm_ppc_rmmu_info *info); int (*set_smt_mode)(struct kvm *kvm, unsigned long mode, unsigned long flags); + void (*giveup_ext)(struct kvm_vcpu *vcpu, ulong msr); }; extern struct kvmppc_ops *kvmppc_hv_ops; extern struct kvmppc_ops *kvmppc_pr_ops; static inline int kvmppc_get_last_inst(struct kvm_vcpu *vcpu, - enum instruction_type type, u32 *inst) + enum instruction_fetch_type type, u32 *inst) { int ret = EMULATE_DONE; u32 fetched_inst; diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 75c5b2cd9d66..562568414cf4 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -385,6 +385,7 @@ #define SPRN_PSSCR 0x357 /* Processor Stop Status and Control Register (ISA 3.0) */ #define SPRN_PSSCR_PR 0x337 /* PSSCR ISA 3.0, privileged mode access */ #define SPRN_PMCR 0x374 /* Power Management Control Register */ +#define SPRN_RWMR 0x375 /* Region-Weighting Mode Register */ /* HFSCR and FSCR bit numbers are the same */ #define FSCR_SCV_LG 12 /* Enable System Call Vectored */ diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 833ed9a16adf..1b32b56a03d3 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -633,6 +633,7 @@ struct kvm_ppc_cpu_char { #define KVM_REG_PPC_PSSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd) #define KVM_REG_PPC_DEC_EXPIRY (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe) +#define KVM_REG_PPC_ONLINE (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbf) /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 9fc9e0977009..0a0544335950 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -426,20 +426,20 @@ int main(void) OFFSET(VCPU_HOST_STACK, kvm_vcpu, arch.host_stack); OFFSET(VCPU_HOST_PID, kvm_vcpu, arch.host_pid); OFFSET(VCPU_GUEST_PID, kvm_vcpu, arch.pid); - OFFSET(VCPU_GPRS, kvm_vcpu, arch.gpr); + OFFSET(VCPU_GPRS, kvm_vcpu, arch.regs.gpr); OFFSET(VCPU_VRSAVE, kvm_vcpu, arch.vrsave); OFFSET(VCPU_FPRS, kvm_vcpu, arch.fp.fpr); #ifdef CONFIG_ALTIVEC OFFSET(VCPU_VRS, kvm_vcpu, arch.vr.vr); #endif - OFFSET(VCPU_XER, kvm_vcpu, arch.xer); - OFFSET(VCPU_CTR, kvm_vcpu, arch.ctr); - OFFSET(VCPU_LR, kvm_vcpu, arch.lr); + OFFSET(VCPU_XER, kvm_vcpu, arch.regs.xer); + OFFSET(VCPU_CTR, kvm_vcpu, arch.regs.ctr); + OFFSET(VCPU_LR, kvm_vcpu, arch.regs.link); #ifdef CONFIG_PPC_BOOK3S OFFSET(VCPU_TAR, kvm_vcpu, arch.tar); #endif OFFSET(VCPU_CR, kvm_vcpu, arch.cr); - OFFSET(VCPU_PC, kvm_vcpu, arch.pc); + OFFSET(VCPU_PC, kvm_vcpu, arch.regs.nip); #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE OFFSET(VCPU_MSR, kvm_vcpu, arch.shregs.msr); OFFSET(VCPU_SRR0, kvm_vcpu, arch.shregs.srr0); @@ -696,10 +696,10 @@ int main(void) #else /* CONFIG_PPC_BOOK3S */ OFFSET(VCPU_CR, kvm_vcpu, arch.cr); - OFFSET(VCPU_XER, kvm_vcpu, arch.xer); - OFFSET(VCPU_LR, kvm_vcpu, arch.lr); - OFFSET(VCPU_CTR, kvm_vcpu, arch.ctr); - OFFSET(VCPU_PC, kvm_vcpu, arch.pc); + OFFSET(VCPU_XER, kvm_vcpu, arch.regs.xer); + OFFSET(VCPU_LR, kvm_vcpu, arch.regs.link); + OFFSET(VCPU_CTR, kvm_vcpu, arch.regs.ctr); + OFFSET(VCPU_PC, kvm_vcpu, arch.regs.nip); OFFSET(VCPU_SPRG9, kvm_vcpu, arch.sprg9); OFFSET(VCPU_LAST_INST, kvm_vcpu, arch.last_inst); OFFSET(VCPU_FAULT_DEAR, kvm_vcpu, arch.fault_dear); diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 4b19da8c87ae..f872c04bb5b1 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -63,6 +63,9 @@ kvm-pr-y := \ book3s_64_mmu.o \ book3s_32_mmu.o +kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \ + tm.o + ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \ book3s_rmhandlers.o diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 97d4a112648f..edaf4720d156 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -134,7 +134,7 @@ void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags) { kvmppc_unfixup_split_real(vcpu); kvmppc_set_srr0(vcpu, kvmppc_get_pc(vcpu)); - kvmppc_set_srr1(vcpu, kvmppc_get_msr(vcpu) | flags); + kvmppc_set_srr1(vcpu, (kvmppc_get_msr(vcpu) & ~0x783f0000ul) | flags); kvmppc_set_pc(vcpu, kvmppc_interrupt_offset(vcpu) + vec); vcpu->arch.mmu.reset_msr(vcpu); } @@ -256,18 +256,15 @@ void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, ulong dar, { kvmppc_set_dar(vcpu, dar); kvmppc_set_dsisr(vcpu, flags); - kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE); + kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE, 0); } -EXPORT_SYMBOL_GPL(kvmppc_core_queue_data_storage); /* used by kvm_hv */ +EXPORT_SYMBOL_GPL(kvmppc_core_queue_data_storage); void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, ulong flags) { - u64 msr = kvmppc_get_msr(vcpu); - msr &= ~(SRR1_ISI_NOPT | SRR1_ISI_N_OR_G | SRR1_ISI_PROT); - msr |= flags & (SRR1_ISI_NOPT | SRR1_ISI_N_OR_G | SRR1_ISI_PROT); - kvmppc_set_msr_fast(vcpu, msr); - kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE); + kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_INST_STORAGE, flags); } +EXPORT_SYMBOL_GPL(kvmppc_core_queue_inst_storage); static int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) @@ -450,8 +447,8 @@ int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, enum xlate_instdata xlid, return r; } -int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type, - u32 *inst) +int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, + enum instruction_fetch_type type, u32 *inst) { ulong pc = kvmppc_get_pc(vcpu); int r; @@ -509,8 +506,6 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { int i; - vcpu_load(vcpu); - regs->pc = kvmppc_get_pc(vcpu); regs->cr = kvmppc_get_cr(vcpu); regs->ctr = kvmppc_get_ctr(vcpu); @@ -532,7 +527,6 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) regs->gpr[i] = kvmppc_get_gpr(vcpu, i); - vcpu_put(vcpu); return 0; } @@ -540,8 +534,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { int i; - vcpu_load(vcpu); - kvmppc_set_pc(vcpu, regs->pc); kvmppc_set_cr(vcpu, regs->cr); kvmppc_set_ctr(vcpu, regs->ctr); @@ -562,7 +554,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) kvmppc_set_gpr(vcpu, i, regs->gpr[i]); - vcpu_put(vcpu); return 0; } diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h index 4ad5e287b8bc..14ef03501d21 100644 --- a/arch/powerpc/kvm/book3s.h +++ b/arch/powerpc/kvm/book3s.h @@ -31,4 +31,10 @@ extern int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, extern int kvmppc_book3s_init_pr(void); extern void kvmppc_book3s_exit_pr(void); +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +extern void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val); +#else +static inline void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val) {} +#endif + #endif diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c index 1992676c7a94..45c8ea4a0487 100644 --- a/arch/powerpc/kvm/book3s_32_mmu.c +++ b/arch/powerpc/kvm/book3s_32_mmu.c @@ -52,7 +52,7 @@ static inline bool check_debug_ip(struct kvm_vcpu *vcpu) { #ifdef DEBUG_MMU_PTE_IP - return vcpu->arch.pc == DEBUG_MMU_PTE_IP; + return vcpu->arch.regs.nip == DEBUG_MMU_PTE_IP; #else return true; #endif diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c index a93d719edc90..cf9d686e8162 100644 --- a/arch/powerpc/kvm/book3s_64_mmu.c +++ b/arch/powerpc/kvm/book3s_64_mmu.c @@ -38,7 +38,16 @@ static void kvmppc_mmu_book3s_64_reset_msr(struct kvm_vcpu *vcpu) { - kvmppc_set_msr(vcpu, vcpu->arch.intr_msr); + unsigned long msr = vcpu->arch.intr_msr; + unsigned long cur_msr = kvmppc_get_msr(vcpu); + + /* If transactional, change to suspend mode on IRQ delivery */ + if (MSR_TM_TRANSACTIONAL(cur_msr)) + msr |= MSR_TS_S; + else + msr |= cur_msr & MSR_TS_MASK; + + kvmppc_set_msr(vcpu, msr); } static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe( diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 1b3fcafc685e..7f3a8cf5d66f 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -272,6 +272,9 @@ int kvmppc_mmu_hv_init(void) if (!cpu_has_feature(CPU_FTR_HVMODE)) return -EINVAL; + if (!mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE)) + return -EINVAL; + /* POWER7 has 10-bit LPIDs (12-bit in POWER8) */ host_lpid = mfspr(SPRN_LPID); rsvd_lpid = LPID_RSVD; diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 481da8f93fa4..176f911ee983 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -139,44 +139,24 @@ int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, return 0; } -#ifdef CONFIG_PPC_64K_PAGES -#define MMU_BASE_PSIZE MMU_PAGE_64K -#else -#define MMU_BASE_PSIZE MMU_PAGE_4K -#endif - static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr, unsigned int pshift) { - int psize = MMU_BASE_PSIZE; - - if (pshift >= PUD_SHIFT) - psize = MMU_PAGE_1G; - else if (pshift >= PMD_SHIFT) - psize = MMU_PAGE_2M; - addr &= ~0xfffUL; - addr |= mmu_psize_defs[psize].ap << 5; - asm volatile("ptesync": : :"memory"); - asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1) - : : "r" (addr), "r" (kvm->arch.lpid) : "memory"); - if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) - asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1) - : : "r" (addr), "r" (kvm->arch.lpid) : "memory"); - asm volatile("eieio ; tlbsync ; ptesync": : :"memory"); + unsigned long psize = PAGE_SIZE; + + if (pshift) + psize = 1UL << pshift; + + addr &= ~(psize - 1); + radix__flush_tlb_lpid_page(kvm->arch.lpid, addr, psize); } -static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned long addr) +static void kvmppc_radix_flush_pwc(struct kvm *kvm) { - unsigned long rb = 0x2 << PPC_BITLSHIFT(53); /* IS = 2 */ - - asm volatile("ptesync": : :"memory"); - /* RIC=1 PRS=0 R=1 IS=2 */ - asm volatile(PPC_TLBIE_5(%0, %1, 1, 0, 1) - : : "r" (rb), "r" (kvm->arch.lpid) : "memory"); - asm volatile("eieio ; tlbsync ; ptesync": : :"memory"); + radix__flush_pwc_lpid(kvm->arch.lpid); } -unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep, +static unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep, unsigned long clr, unsigned long set, unsigned long addr, unsigned int shift) { @@ -228,6 +208,167 @@ static void kvmppc_pmd_free(pmd_t *pmdp) kmem_cache_free(kvm_pmd_cache, pmdp); } +static void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, + unsigned long gpa, unsigned int shift) + +{ + unsigned long page_size = 1ul << shift; + unsigned long old; + + old = kvmppc_radix_update_pte(kvm, pte, ~0UL, 0, gpa, shift); + kvmppc_radix_tlbie_page(kvm, gpa, shift); + if (old & _PAGE_DIRTY) { + unsigned long gfn = gpa >> PAGE_SHIFT; + struct kvm_memory_slot *memslot; + + memslot = gfn_to_memslot(kvm, gfn); + if (memslot && memslot->dirty_bitmap) + kvmppc_update_dirty_map(memslot, gfn, page_size); + } +} + +/* + * kvmppc_free_p?d are used to free existing page tables, and recursively + * descend and clear and free children. + * Callers are responsible for flushing the PWC. + * + * When page tables are being unmapped/freed as part of page fault path + * (full == false), ptes are not expected. There is code to unmap them + * and emit a warning if encountered, but there may already be data + * corruption due to the unexpected mappings. + */ +static void kvmppc_unmap_free_pte(struct kvm *kvm, pte_t *pte, bool full) +{ + if (full) { + memset(pte, 0, sizeof(long) << PTE_INDEX_SIZE); + } else { + pte_t *p = pte; + unsigned long it; + + for (it = 0; it < PTRS_PER_PTE; ++it, ++p) { + if (pte_val(*p) == 0) + continue; + WARN_ON_ONCE(1); + kvmppc_unmap_pte(kvm, p, + pte_pfn(*p) << PAGE_SHIFT, + PAGE_SHIFT); + } + } + + kvmppc_pte_free(pte); +} + +static void kvmppc_unmap_free_pmd(struct kvm *kvm, pmd_t *pmd, bool full) +{ + unsigned long im; + pmd_t *p = pmd; + + for (im = 0; im < PTRS_PER_PMD; ++im, ++p) { + if (!pmd_present(*p)) + continue; + if (pmd_is_leaf(*p)) { + if (full) { + pmd_clear(p); + } else { + WARN_ON_ONCE(1); + kvmppc_unmap_pte(kvm, (pte_t *)p, + pte_pfn(*(pte_t *)p) << PAGE_SHIFT, + PMD_SHIFT); + } + } else { + pte_t *pte; + + pte = pte_offset_map(p, 0); + kvmppc_unmap_free_pte(kvm, pte, full); + pmd_clear(p); + } + } + kvmppc_pmd_free(pmd); +} + +static void kvmppc_unmap_free_pud(struct kvm *kvm, pud_t *pud) +{ + unsigned long iu; + pud_t *p = pud; + + for (iu = 0; iu < PTRS_PER_PUD; ++iu, ++p) { + if (!pud_present(*p)) + continue; + if (pud_huge(*p)) { + pud_clear(p); + } else { + pmd_t *pmd; + + pmd = pmd_offset(p, 0); + kvmppc_unmap_free_pmd(kvm, pmd, true); + pud_clear(p); + } + } + pud_free(kvm->mm, pud); +} + +void kvmppc_free_radix(struct kvm *kvm) +{ + unsigned long ig; + pgd_t *pgd; + + if (!kvm->arch.pgtable) + return; + pgd = kvm->arch.pgtable; + for (ig = 0; ig < PTRS_PER_PGD; ++ig, ++pgd) { + pud_t *pud; + + if (!pgd_present(*pgd)) + continue; + pud = pud_offset(pgd, 0); + kvmppc_unmap_free_pud(kvm, pud); + pgd_clear(pgd); + } + pgd_free(kvm->mm, kvm->arch.pgtable); + kvm->arch.pgtable = NULL; +} + +static void kvmppc_unmap_free_pmd_entry_table(struct kvm *kvm, pmd_t *pmd, + unsigned long gpa) +{ + pte_t *pte = pte_offset_kernel(pmd, 0); + + /* + * Clearing the pmd entry then flushing the PWC ensures that the pte + * page no longer be cached by the MMU, so can be freed without + * flushing the PWC again. + */ + pmd_clear(pmd); + kvmppc_radix_flush_pwc(kvm); + + kvmppc_unmap_free_pte(kvm, pte, false); +} + +static void kvmppc_unmap_free_pud_entry_table(struct kvm *kvm, pud_t *pud, + unsigned long gpa) +{ + pmd_t *pmd = pmd_offset(pud, 0); + + /* + * Clearing the pud entry then flushing the PWC ensures that the pmd + * page and any children pte pages will no longer be cached by the MMU, + * so can be freed without flushing the PWC again. + */ + pud_clear(pud); + kvmppc_radix_flush_pwc(kvm); + + kvmppc_unmap_free_pmd(kvm, pmd, false); +} + +/* + * There are a number of bits which may differ between different faults to + * the same partition scope entry. RC bits, in the course of cleaning and + * aging. And the write bit can change, either the access could have been + * upgraded, or a read fault could happen concurrently with a write fault + * that sets those bits first. + */ +#define PTE_BITS_MUST_MATCH (~(_PAGE_WRITE | _PAGE_DIRTY | _PAGE_ACCESSED)) + static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, unsigned int level, unsigned long mmu_seq) { @@ -235,7 +376,6 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, pud_t *pud, *new_pud = NULL; pmd_t *pmd, *new_pmd = NULL; pte_t *ptep, *new_ptep = NULL; - unsigned long old; int ret; /* Traverse the guest's 2nd-level tree, allocate new levels needed */ @@ -273,42 +413,39 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, if (pud_huge(*pud)) { unsigned long hgpa = gpa & PUD_MASK; + /* Check if we raced and someone else has set the same thing */ + if (level == 2) { + if (pud_raw(*pud) == pte_raw(pte)) { + ret = 0; + goto out_unlock; + } + /* Valid 1GB page here already, add our extra bits */ + WARN_ON_ONCE((pud_val(*pud) ^ pte_val(pte)) & + PTE_BITS_MUST_MATCH); + kvmppc_radix_update_pte(kvm, (pte_t *)pud, + 0, pte_val(pte), hgpa, PUD_SHIFT); + ret = 0; + goto out_unlock; + } /* * If we raced with another CPU which has just put * a 1GB pte in after we saw a pmd page, try again. */ - if (level <= 1 && !new_pmd) { + if (!new_pmd) { ret = -EAGAIN; goto out_unlock; } - /* Check if we raced and someone else has set the same thing */ - if (level == 2 && pud_raw(*pud) == pte_raw(pte)) { - ret = 0; - goto out_unlock; - } /* Valid 1GB page here already, remove it */ - old = kvmppc_radix_update_pte(kvm, (pte_t *)pud, - ~0UL, 0, hgpa, PUD_SHIFT); - kvmppc_radix_tlbie_page(kvm, hgpa, PUD_SHIFT); - if (old & _PAGE_DIRTY) { - unsigned long gfn = hgpa >> PAGE_SHIFT; - struct kvm_memory_slot *memslot; - memslot = gfn_to_memslot(kvm, gfn); - if (memslot && memslot->dirty_bitmap) - kvmppc_update_dirty_map(memslot, - gfn, PUD_SIZE); - } + kvmppc_unmap_pte(kvm, (pte_t *)pud, hgpa, PUD_SHIFT); } if (level == 2) { if (!pud_none(*pud)) { /* * There's a page table page here, but we wanted to * install a large page, so remove and free the page - * table page. new_pmd will be NULL since level == 2. + * table page. */ - new_pmd = pmd_offset(pud, 0); - pud_clear(pud); - kvmppc_radix_flush_pwc(kvm, gpa); + kvmppc_unmap_free_pud_entry_table(kvm, pud, gpa); } kvmppc_radix_set_pte_at(kvm, gpa, (pte_t *)pud, pte); ret = 0; @@ -324,42 +461,40 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, if (pmd_is_leaf(*pmd)) { unsigned long lgpa = gpa & PMD_MASK; + /* Check if we raced and someone else has set the same thing */ + if (level == 1) { + if (pmd_raw(*pmd) == pte_raw(pte)) { + ret = 0; + goto out_unlock; + } + /* Valid 2MB page here already, add our extra bits */ + WARN_ON_ONCE((pmd_val(*pmd) ^ pte_val(pte)) & + PTE_BITS_MUST_MATCH); + kvmppc_radix_update_pte(kvm, pmdp_ptep(pmd), + 0, pte_val(pte), lgpa, PMD_SHIFT); + ret = 0; + goto out_unlock; + } + /* * If we raced with another CPU which has just put * a 2MB pte in after we saw a pte page, try again. */ - if (level == 0 && !new_ptep) { + if (!new_ptep) { ret = -EAGAIN; goto out_unlock; } - /* Check if we raced and someone else has set the same thing */ - if (level == 1 && pmd_raw(*pmd) == pte_raw(pte)) { - ret = 0; - goto out_unlock; - } /* Valid 2MB page here already, remove it */ - old = kvmppc_radix_update_pte(kvm, pmdp_ptep(pmd), - ~0UL, 0, lgpa, PMD_SHIFT); - kvmppc_radix_tlbie_page(kvm, lgpa, PMD_SHIFT); - if (old & _PAGE_DIRTY) { - unsigned long gfn = lgpa >> PAGE_SHIFT; - struct kvm_memory_slot *memslot; - memslot = gfn_to_memslot(kvm, gfn); - if (memslot && memslot->dirty_bitmap) - kvmppc_update_dirty_map(memslot, - gfn, PMD_SIZE); - } + kvmppc_unmap_pte(kvm, pmdp_ptep(pmd), lgpa, PMD_SHIFT); } if (level == 1) { if (!pmd_none(*pmd)) { /* * There's a page table page here, but we wanted to * install a large page, so remove and free the page - * table page. new_ptep will be NULL since level == 1. + * table page. */ - new_ptep = pte_offset_kernel(pmd, 0); - pmd_clear(pmd); - kvmppc_radix_flush_pwc(kvm, gpa); + kvmppc_unmap_free_pmd_entry_table(kvm, pmd, gpa); } kvmppc_radix_set_pte_at(kvm, gpa, pmdp_ptep(pmd), pte); ret = 0; @@ -378,12 +513,12 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, ret = 0; goto out_unlock; } - /* PTE was previously valid, so invalidate it */ - old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT, - 0, gpa, 0); - kvmppc_radix_tlbie_page(kvm, gpa, 0); - if (old & _PAGE_DIRTY) - mark_page_dirty(kvm, gpa >> PAGE_SHIFT); + /* Valid page here already, add our extra bits */ + WARN_ON_ONCE((pte_val(*ptep) ^ pte_val(pte)) & + PTE_BITS_MUST_MATCH); + kvmppc_radix_update_pte(kvm, ptep, 0, pte_val(pte), gpa, 0); + ret = 0; + goto out_unlock; } kvmppc_radix_set_pte_at(kvm, gpa, ptep, pte); ret = 0; @@ -565,9 +700,13 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned long mask = (1ul << shift) - PAGE_SIZE; pte = __pte(pte_val(pte) | (hva & mask)); } - if (!(writing || upgrade_write)) - pte = __pte(pte_val(pte) & ~ _PAGE_WRITE); - pte = __pte(pte_val(pte) | _PAGE_EXEC); + pte = __pte(pte_val(pte) | _PAGE_EXEC | _PAGE_ACCESSED); + if (writing || upgrade_write) { + if (pte_val(pte) & _PAGE_WRITE) + pte = __pte(pte_val(pte) | _PAGE_DIRTY); + } else { + pte = __pte(pte_val(pte) & ~(_PAGE_WRITE | _PAGE_DIRTY)); + } } /* Allocate space in the tree and write the PTE */ @@ -734,51 +873,6 @@ int kvmppc_init_vm_radix(struct kvm *kvm) return 0; } -void kvmppc_free_radix(struct kvm *kvm) -{ - unsigned long ig, iu, im; - pte_t *pte; - pmd_t *pmd; - pud_t *pud; - pgd_t *pgd; - - if (!kvm->arch.pgtable) - return; - pgd = kvm->arch.pgtable; - for (ig = 0; ig < PTRS_PER_PGD; ++ig, ++pgd) { - if (!pgd_present(*pgd)) - continue; - pud = pud_offset(pgd, 0); - for (iu = 0; iu < PTRS_PER_PUD; ++iu, ++pud) { - if (!pud_present(*pud)) - continue; - if (pud_huge(*pud)) { - pud_clear(pud); - continue; - } - pmd = pmd_offset(pud, 0); - for (im = 0; im < PTRS_PER_PMD; ++im, ++pmd) { - if (pmd_is_leaf(*pmd)) { - pmd_clear(pmd); - continue; - } - if (!pmd_present(*pmd)) - continue; - pte = pte_offset_map(pmd, 0); - memset(pte, 0, sizeof(long) << PTE_INDEX_SIZE); - kvmppc_pte_free(pte); - pmd_clear(pmd); - } - kvmppc_pmd_free(pmd_offset(pud, 0)); - pud_clear(pud); - } - pud_free(kvm->mm, pud_offset(pgd, 0)); - pgd_clear(pgd); - } - pgd_free(kvm->mm, kvm->arch.pgtable); - kvm->arch.pgtable = NULL; -} - static void pte_ctor(void *addr) { memset(addr, 0, RADIX_PTE_TABLE_SIZE); diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 4dffa611376d..d066e37551ec 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -176,14 +176,12 @@ extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, if (!tbltmp) continue; - /* - * Make sure hardware table parameters are exactly the same; - * this is used in the TCE handlers where boundary checks - * use only the first attached table. - */ - if ((tbltmp->it_page_shift == stt->page_shift) && - (tbltmp->it_offset == stt->offset) && - (tbltmp->it_size == stt->size)) { + /* Make sure hardware table parameters are compatible */ + if ((tbltmp->it_page_shift <= stt->page_shift) && + (tbltmp->it_offset << tbltmp->it_page_shift == + stt->offset << stt->page_shift) && + (tbltmp->it_size << tbltmp->it_page_shift == + stt->size << stt->page_shift)) { /* * Reference the table to avoid races with * add/remove DMA windows. @@ -237,7 +235,7 @@ static void release_spapr_tce_table(struct rcu_head *head) kfree(stt); } -static int kvm_spapr_tce_fault(struct vm_fault *vmf) +static vm_fault_t kvm_spapr_tce_fault(struct vm_fault *vmf) { struct kvmppc_spapr_tce_table *stt = vmf->vma->vm_file->private_data; struct page *page; @@ -302,7 +300,8 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, int ret = -ENOMEM; int i; - if (!args->size) + if (!args->size || args->page_shift < 12 || args->page_shift > 34 || + (args->offset + args->size > (ULLONG_MAX >> args->page_shift))) return -EINVAL; size = _ALIGN_UP(args->size, PAGE_SIZE >> 3); @@ -396,7 +395,7 @@ static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm, return H_SUCCESS; } -static long kvmppc_tce_iommu_unmap(struct kvm *kvm, +static long kvmppc_tce_iommu_do_unmap(struct kvm *kvm, struct iommu_table *tbl, unsigned long entry) { enum dma_data_direction dir = DMA_NONE; @@ -416,7 +415,24 @@ static long kvmppc_tce_iommu_unmap(struct kvm *kvm, return ret; } -long kvmppc_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl, +static long kvmppc_tce_iommu_unmap(struct kvm *kvm, + struct kvmppc_spapr_tce_table *stt, struct iommu_table *tbl, + unsigned long entry) +{ + unsigned long i, ret = H_SUCCESS; + unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift); + unsigned long io_entry = entry * subpages; + + for (i = 0; i < subpages; ++i) { + ret = kvmppc_tce_iommu_do_unmap(kvm, tbl, io_entry + i); + if (ret != H_SUCCESS) + break; + } + + return ret; +} + +long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl, unsigned long entry, unsigned long ua, enum dma_data_direction dir) { @@ -453,6 +469,27 @@ long kvmppc_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl, return 0; } +static long kvmppc_tce_iommu_map(struct kvm *kvm, + struct kvmppc_spapr_tce_table *stt, struct iommu_table *tbl, + unsigned long entry, unsigned long ua, + enum dma_data_direction dir) +{ + unsigned long i, pgoff, ret = H_SUCCESS; + unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift); + unsigned long io_entry = entry * subpages; + + for (i = 0, pgoff = 0; i < subpages; + ++i, pgoff += IOMMU_PAGE_SIZE(tbl)) { + + ret = kvmppc_tce_iommu_do_map(kvm, tbl, + io_entry + i, ua + pgoff, dir); + if (ret != H_SUCCESS) + break; + } + + return ret; +} + long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, unsigned long ioba, unsigned long tce) { @@ -491,10 +528,10 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { if (dir == DMA_NONE) - ret = kvmppc_tce_iommu_unmap(vcpu->kvm, + ret = kvmppc_tce_iommu_unmap(vcpu->kvm, stt, stit->tbl, entry); else - ret = kvmppc_tce_iommu_map(vcpu->kvm, stit->tbl, + ret = kvmppc_tce_iommu_map(vcpu->kvm, stt, stit->tbl, entry, ua, dir); if (ret == H_SUCCESS) @@ -570,7 +607,7 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, return H_PARAMETER; list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { - ret = kvmppc_tce_iommu_map(vcpu->kvm, + ret = kvmppc_tce_iommu_map(vcpu->kvm, stt, stit->tbl, entry + i, ua, iommu_tce_direction(tce)); @@ -615,10 +652,10 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu, return H_PARAMETER; list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { - unsigned long entry = ioba >> stit->tbl->it_page_shift; + unsigned long entry = ioba >> stt->page_shift; for (i = 0; i < npages; ++i) { - ret = kvmppc_tce_iommu_unmap(vcpu->kvm, + ret = kvmppc_tce_iommu_unmap(vcpu->kvm, stt, stit->tbl, entry + i); if (ret == H_SUCCESS) diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index 6651f736a0b1..925fc316a104 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -221,7 +221,7 @@ static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm, return H_SUCCESS; } -static long kvmppc_rm_tce_iommu_unmap(struct kvm *kvm, +static long kvmppc_rm_tce_iommu_do_unmap(struct kvm *kvm, struct iommu_table *tbl, unsigned long entry) { enum dma_data_direction dir = DMA_NONE; @@ -245,7 +245,24 @@ static long kvmppc_rm_tce_iommu_unmap(struct kvm *kvm, return ret; } -static long kvmppc_rm_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl, +static long kvmppc_rm_tce_iommu_unmap(struct kvm *kvm, + struct kvmppc_spapr_tce_table *stt, struct iommu_table *tbl, + unsigned long entry) +{ + unsigned long i, ret = H_SUCCESS; + unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift); + unsigned long io_entry = entry * subpages; + + for (i = 0; i < subpages; ++i) { + ret = kvmppc_rm_tce_iommu_do_unmap(kvm, tbl, io_entry + i); + if (ret != H_SUCCESS) + break; + } + + return ret; +} + +static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl, unsigned long entry, unsigned long ua, enum dma_data_direction dir) { @@ -290,6 +307,27 @@ static long kvmppc_rm_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl, return 0; } +static long kvmppc_rm_tce_iommu_map(struct kvm *kvm, + struct kvmppc_spapr_tce_table *stt, struct iommu_table *tbl, + unsigned long entry, unsigned long ua, + enum dma_data_direction dir) +{ + unsigned long i, pgoff, ret = H_SUCCESS; + unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift); + unsigned long io_entry = entry * subpages; + + for (i = 0, pgoff = 0; i < subpages; + ++i, pgoff += IOMMU_PAGE_SIZE(tbl)) { + + ret = kvmppc_rm_tce_iommu_do_map(kvm, tbl, + io_entry + i, ua + pgoff, dir); + if (ret != H_SUCCESS) + break; + } + + return ret; +} + long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, unsigned long ioba, unsigned long tce) { @@ -327,10 +365,10 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { if (dir == DMA_NONE) - ret = kvmppc_rm_tce_iommu_unmap(vcpu->kvm, + ret = kvmppc_rm_tce_iommu_unmap(vcpu->kvm, stt, stit->tbl, entry); else - ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, + ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, stt, stit->tbl, entry, ua, dir); if (ret == H_SUCCESS) @@ -477,7 +515,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, return H_PARAMETER; list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { - ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, + ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, stt, stit->tbl, entry + i, ua, iommu_tce_direction(tce)); @@ -526,10 +564,10 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu, return H_PARAMETER; list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { - unsigned long entry = ioba >> stit->tbl->it_page_shift; + unsigned long entry = ioba >> stt->page_shift; for (i = 0; i < npages; ++i) { - ret = kvmppc_rm_tce_iommu_unmap(vcpu->kvm, + ret = kvmppc_rm_tce_iommu_unmap(vcpu->kvm, stt, stit->tbl, entry + i); if (ret == H_SUCCESS) @@ -571,7 +609,7 @@ long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn, page = stt->pages[idx / TCES_PER_PAGE]; tbl = (u64 *)page_address(page); - vcpu->arch.gpr[4] = tbl[idx % TCES_PER_PAGE]; + vcpu->arch.regs.gpr[4] = tbl[idx % TCES_PER_PAGE]; return H_SUCCESS; } diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 68d68983948e..36b11c5a0dbb 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -23,7 +23,9 @@ #include <asm/reg.h> #include <asm/switch_to.h> #include <asm/time.h> +#include <asm/tm.h> #include "book3s.h" +#include <asm/asm-prototypes.h> #define OP_19_XOP_RFID 18 #define OP_19_XOP_RFI 50 @@ -47,6 +49,12 @@ #define OP_31_XOP_EIOIO 854 #define OP_31_XOP_SLBMFEE 915 +#define OP_31_XOP_TBEGIN 654 +#define OP_31_XOP_TABORT 910 + +#define OP_31_XOP_TRECLAIM 942 +#define OP_31_XOP_TRCHKPT 1006 + /* DCBZ is actually 1014, but we patch it to 1010 so we get a trap */ #define OP_31_XOP_DCBZ 1010 @@ -87,6 +95,157 @@ static bool spr_allowed(struct kvm_vcpu *vcpu, enum priv_level level) return true; } +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +static inline void kvmppc_copyto_vcpu_tm(struct kvm_vcpu *vcpu) +{ + memcpy(&vcpu->arch.gpr_tm[0], &vcpu->arch.regs.gpr[0], + sizeof(vcpu->arch.gpr_tm)); + memcpy(&vcpu->arch.fp_tm, &vcpu->arch.fp, + sizeof(struct thread_fp_state)); + memcpy(&vcpu->arch.vr_tm, &vcpu->arch.vr, + sizeof(struct thread_vr_state)); + vcpu->arch.ppr_tm = vcpu->arch.ppr; + vcpu->arch.dscr_tm = vcpu->arch.dscr; + vcpu->arch.amr_tm = vcpu->arch.amr; + vcpu->arch.ctr_tm = vcpu->arch.regs.ctr; + vcpu->arch.tar_tm = vcpu->arch.tar; + vcpu->arch.lr_tm = vcpu->arch.regs.link; + vcpu->arch.cr_tm = vcpu->arch.cr; + vcpu->arch.xer_tm = vcpu->arch.regs.xer; + vcpu->arch.vrsave_tm = vcpu->arch.vrsave; +} + +static inline void kvmppc_copyfrom_vcpu_tm(struct kvm_vcpu *vcpu) +{ + memcpy(&vcpu->arch.regs.gpr[0], &vcpu->arch.gpr_tm[0], + sizeof(vcpu->arch.regs.gpr)); + memcpy(&vcpu->arch.fp, &vcpu->arch.fp_tm, + sizeof(struct thread_fp_state)); + memcpy(&vcpu->arch.vr, &vcpu->arch.vr_tm, + sizeof(struct thread_vr_state)); + vcpu->arch.ppr = vcpu->arch.ppr_tm; + vcpu->arch.dscr = vcpu->arch.dscr_tm; + vcpu->arch.amr = vcpu->arch.amr_tm; + vcpu->arch.regs.ctr = vcpu->arch.ctr_tm; + vcpu->arch.tar = vcpu->arch.tar_tm; + vcpu->arch.regs.link = vcpu->arch.lr_tm; + vcpu->arch.cr = vcpu->arch.cr_tm; + vcpu->arch.regs.xer = vcpu->arch.xer_tm; + vcpu->arch.vrsave = vcpu->arch.vrsave_tm; +} + +static void kvmppc_emulate_treclaim(struct kvm_vcpu *vcpu, int ra_val) +{ + unsigned long guest_msr = kvmppc_get_msr(vcpu); + int fc_val = ra_val ? ra_val : 1; + uint64_t texasr; + + /* CR0 = 0 | MSR[TS] | 0 */ + vcpu->arch.cr = (vcpu->arch.cr & ~(CR0_MASK << CR0_SHIFT)) | + (((guest_msr & MSR_TS_MASK) >> (MSR_TS_S_LG - 1)) + << CR0_SHIFT); + + preempt_disable(); + tm_enable(); + texasr = mfspr(SPRN_TEXASR); + kvmppc_save_tm_pr(vcpu); + kvmppc_copyfrom_vcpu_tm(vcpu); + + /* failure recording depends on Failure Summary bit */ + if (!(texasr & TEXASR_FS)) { + texasr &= ~TEXASR_FC; + texasr |= ((u64)fc_val << TEXASR_FC_LG) | TEXASR_FS; + + texasr &= ~(TEXASR_PR | TEXASR_HV); + if (kvmppc_get_msr(vcpu) & MSR_PR) + texasr |= TEXASR_PR; + + if (kvmppc_get_msr(vcpu) & MSR_HV) + texasr |= TEXASR_HV; + + vcpu->arch.texasr = texasr; + vcpu->arch.tfiar = kvmppc_get_pc(vcpu); + mtspr(SPRN_TEXASR, texasr); + mtspr(SPRN_TFIAR, vcpu->arch.tfiar); + } + tm_disable(); + /* + * treclaim need quit to non-transactional state. + */ + guest_msr &= ~(MSR_TS_MASK); + kvmppc_set_msr(vcpu, guest_msr); + preempt_enable(); + + if (vcpu->arch.shadow_fscr & FSCR_TAR) + mtspr(SPRN_TAR, vcpu->arch.tar); +} + +static void kvmppc_emulate_trchkpt(struct kvm_vcpu *vcpu) +{ + unsigned long guest_msr = kvmppc_get_msr(vcpu); + + preempt_disable(); + /* + * need flush FP/VEC/VSX to vcpu save area before + * copy. + */ + kvmppc_giveup_ext(vcpu, MSR_VSX); + kvmppc_giveup_fac(vcpu, FSCR_TAR_LG); + kvmppc_copyto_vcpu_tm(vcpu); + kvmppc_save_tm_sprs(vcpu); + + /* + * as a result of trecheckpoint. set TS to suspended. + */ + guest_msr &= ~(MSR_TS_MASK); + guest_msr |= MSR_TS_S; + kvmppc_set_msr(vcpu, guest_msr); + kvmppc_restore_tm_pr(vcpu); + preempt_enable(); +} + +/* emulate tabort. at guest privilege state */ +void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val) +{ + /* currently we only emulate tabort. but no emulation of other + * tabort variants since there is no kernel usage of them at + * present. + */ + unsigned long guest_msr = kvmppc_get_msr(vcpu); + uint64_t org_texasr; + + preempt_disable(); + tm_enable(); + org_texasr = mfspr(SPRN_TEXASR); + tm_abort(ra_val); + + /* CR0 = 0 | MSR[TS] | 0 */ + vcpu->arch.cr = (vcpu->arch.cr & ~(CR0_MASK << CR0_SHIFT)) | + (((guest_msr & MSR_TS_MASK) >> (MSR_TS_S_LG - 1)) + << CR0_SHIFT); + + vcpu->arch.texasr = mfspr(SPRN_TEXASR); + /* failure recording depends on Failure Summary bit, + * and tabort will be treated as nops in non-transactional + * state. + */ + if (!(org_texasr & TEXASR_FS) && + MSR_TM_ACTIVE(guest_msr)) { + vcpu->arch.texasr &= ~(TEXASR_PR | TEXASR_HV); + if (guest_msr & MSR_PR) + vcpu->arch.texasr |= TEXASR_PR; + + if (guest_msr & MSR_HV) + vcpu->arch.texasr |= TEXASR_HV; + + vcpu->arch.tfiar = kvmppc_get_pc(vcpu); + } + tm_disable(); + preempt_enable(); +} + +#endif + int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int inst, int *advance) { @@ -117,11 +276,28 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, case 19: switch (get_xop(inst)) { case OP_19_XOP_RFID: - case OP_19_XOP_RFI: + case OP_19_XOP_RFI: { + unsigned long srr1 = kvmppc_get_srr1(vcpu); +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + unsigned long cur_msr = kvmppc_get_msr(vcpu); + + /* + * add rules to fit in ISA specification regarding TM + * state transistion in TM disable/Suspended state, + * and target TM state is TM inactive(00) state. (the + * change should be suppressed). + */ + if (((cur_msr & MSR_TM) == 0) && + ((srr1 & MSR_TM) == 0) && + MSR_TM_SUSPENDED(cur_msr) && + !MSR_TM_ACTIVE(srr1)) + srr1 |= MSR_TS_S; +#endif kvmppc_set_pc(vcpu, kvmppc_get_srr0(vcpu)); - kvmppc_set_msr(vcpu, kvmppc_get_srr1(vcpu)); + kvmppc_set_msr(vcpu, srr1); *advance = 0; break; + } default: emulated = EMULATE_FAIL; @@ -304,6 +480,140 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, break; } +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + case OP_31_XOP_TBEGIN: + { + if (!cpu_has_feature(CPU_FTR_TM)) + break; + + if (!(kvmppc_get_msr(vcpu) & MSR_TM)) { + kvmppc_trigger_fac_interrupt(vcpu, FSCR_TM_LG); + emulated = EMULATE_AGAIN; + break; + } + + if (!(kvmppc_get_msr(vcpu) & MSR_PR)) { + preempt_disable(); + vcpu->arch.cr = (CR0_TBEGIN_FAILURE | + (vcpu->arch.cr & ~(CR0_MASK << CR0_SHIFT))); + + vcpu->arch.texasr = (TEXASR_FS | TEXASR_EXACT | + (((u64)(TM_CAUSE_EMULATE | TM_CAUSE_PERSISTENT)) + << TEXASR_FC_LG)); + + if ((inst >> 21) & 0x1) + vcpu->arch.texasr |= TEXASR_ROT; + + if (kvmppc_get_msr(vcpu) & MSR_HV) + vcpu->arch.texasr |= TEXASR_HV; + + vcpu->arch.tfhar = kvmppc_get_pc(vcpu) + 4; + vcpu->arch.tfiar = kvmppc_get_pc(vcpu); + + kvmppc_restore_tm_sprs(vcpu); + preempt_enable(); + } else + emulated = EMULATE_FAIL; + break; + } + case OP_31_XOP_TABORT: + { + ulong guest_msr = kvmppc_get_msr(vcpu); + unsigned long ra_val = 0; + + if (!cpu_has_feature(CPU_FTR_TM)) + break; + + if (!(kvmppc_get_msr(vcpu) & MSR_TM)) { + kvmppc_trigger_fac_interrupt(vcpu, FSCR_TM_LG); + emulated = EMULATE_AGAIN; + break; + } + + /* only emulate for privilege guest, since problem state + * guest can run with TM enabled and we don't expect to + * trap at here for that case. + */ + WARN_ON(guest_msr & MSR_PR); + + if (ra) + ra_val = kvmppc_get_gpr(vcpu, ra); + + kvmppc_emulate_tabort(vcpu, ra_val); + break; + } + case OP_31_XOP_TRECLAIM: + { + ulong guest_msr = kvmppc_get_msr(vcpu); + unsigned long ra_val = 0; + + if (!cpu_has_feature(CPU_FTR_TM)) + break; + + if (!(kvmppc_get_msr(vcpu) & MSR_TM)) { + kvmppc_trigger_fac_interrupt(vcpu, FSCR_TM_LG); + emulated = EMULATE_AGAIN; + break; + } + + /* generate interrupts based on priorities */ + if (guest_msr & MSR_PR) { + /* Privileged Instruction type Program Interrupt */ + kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV); + emulated = EMULATE_AGAIN; + break; + } + + if (!MSR_TM_ACTIVE(guest_msr)) { + /* TM bad thing interrupt */ + kvmppc_core_queue_program(vcpu, SRR1_PROGTM); + emulated = EMULATE_AGAIN; + break; + } + + if (ra) + ra_val = kvmppc_get_gpr(vcpu, ra); + kvmppc_emulate_treclaim(vcpu, ra_val); + break; + } + case OP_31_XOP_TRCHKPT: + { + ulong guest_msr = kvmppc_get_msr(vcpu); + unsigned long texasr; + + if (!cpu_has_feature(CPU_FTR_TM)) + break; + + if (!(kvmppc_get_msr(vcpu) & MSR_TM)) { + kvmppc_trigger_fac_interrupt(vcpu, FSCR_TM_LG); + emulated = EMULATE_AGAIN; + break; + } + + /* generate interrupt based on priorities */ + if (guest_msr & MSR_PR) { + /* Privileged Instruction type Program Intr */ + kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV); + emulated = EMULATE_AGAIN; + break; + } + + tm_enable(); + texasr = mfspr(SPRN_TEXASR); + tm_disable(); + + if (MSR_TM_ACTIVE(guest_msr) || + !(texasr & (TEXASR_FS))) { + /* TM bad thing interrupt */ + kvmppc_core_queue_program(vcpu, SRR1_PROGTM); + emulated = EMULATE_AGAIN; + break; + } + + kvmppc_emulate_trchkpt(vcpu); + break; + } +#endif default: emulated = EMULATE_FAIL; } @@ -465,13 +775,38 @@ int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) break; #ifdef CONFIG_PPC_TRANSACTIONAL_MEM case SPRN_TFHAR: - vcpu->arch.tfhar = spr_val; - break; case SPRN_TEXASR: - vcpu->arch.texasr = spr_val; - break; case SPRN_TFIAR: - vcpu->arch.tfiar = spr_val; + if (!cpu_has_feature(CPU_FTR_TM)) + break; + + if (!(kvmppc_get_msr(vcpu) & MSR_TM)) { + kvmppc_trigger_fac_interrupt(vcpu, FSCR_TM_LG); + emulated = EMULATE_AGAIN; + break; + } + + if (MSR_TM_ACTIVE(kvmppc_get_msr(vcpu)) && + !((MSR_TM_SUSPENDED(kvmppc_get_msr(vcpu))) && + (sprn == SPRN_TFHAR))) { + /* it is illegal to mtspr() TM regs in + * other than non-transactional state, with + * the exception of TFHAR in suspend state. + */ + kvmppc_core_queue_program(vcpu, SRR1_PROGTM); + emulated = EMULATE_AGAIN; + break; + } + + tm_enable(); + if (sprn == SPRN_TFHAR) + mtspr(SPRN_TFHAR, spr_val); + else if (sprn == SPRN_TEXASR) + mtspr(SPRN_TEXASR, spr_val); + else + mtspr(SPRN_TFIAR, spr_val); + tm_disable(); + break; #endif #endif @@ -618,13 +953,25 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val break; #ifdef CONFIG_PPC_TRANSACTIONAL_MEM case SPRN_TFHAR: - *spr_val = vcpu->arch.tfhar; - break; case SPRN_TEXASR: - *spr_val = vcpu->arch.texasr; - break; case SPRN_TFIAR: - *spr_val = vcpu->arch.tfiar; + if (!cpu_has_feature(CPU_FTR_TM)) + break; + + if (!(kvmppc_get_msr(vcpu) & MSR_TM)) { + kvmppc_trigger_fac_interrupt(vcpu, FSCR_TM_LG); + emulated = EMULATE_AGAIN; + break; + } + + tm_enable(); + if (sprn == SPRN_TFHAR) + *spr_val = mfspr(SPRN_TFHAR); + else if (sprn == SPRN_TEXASR) + *spr_val = mfspr(SPRN_TEXASR); + else if (sprn == SPRN_TFIAR) + *spr_val = mfspr(SPRN_TFIAR); + tm_disable(); break; #endif #endif diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 8858ab8b6ca4..de686b340f4a 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -123,6 +123,32 @@ static bool no_mixing_hpt_and_radix; static void kvmppc_end_cede(struct kvm_vcpu *vcpu); static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); +/* + * RWMR values for POWER8. These control the rate at which PURR + * and SPURR count and should be set according to the number of + * online threads in the vcore being run. + */ +#define RWMR_RPA_P8_1THREAD 0x164520C62609AECA +#define RWMR_RPA_P8_2THREAD 0x7FFF2908450D8DA9 +#define RWMR_RPA_P8_3THREAD 0x164520C62609AECA +#define RWMR_RPA_P8_4THREAD 0x199A421245058DA9 +#define RWMR_RPA_P8_5THREAD 0x164520C62609AECA +#define RWMR_RPA_P8_6THREAD 0x164520C62609AECA +#define RWMR_RPA_P8_7THREAD 0x164520C62609AECA +#define RWMR_RPA_P8_8THREAD 0x164520C62609AECA + +static unsigned long p8_rwmr_values[MAX_SMT_THREADS + 1] = { + RWMR_RPA_P8_1THREAD, + RWMR_RPA_P8_1THREAD, + RWMR_RPA_P8_2THREAD, + RWMR_RPA_P8_3THREAD, + RWMR_RPA_P8_4THREAD, + RWMR_RPA_P8_5THREAD, + RWMR_RPA_P8_6THREAD, + RWMR_RPA_P8_7THREAD, + RWMR_RPA_P8_8THREAD, +}; + static inline struct kvm_vcpu *next_runnable_thread(struct kvmppc_vcore *vc, int *ip) { @@ -371,13 +397,13 @@ static void kvmppc_dump_regs(struct kvm_vcpu *vcpu) pr_err("vcpu %p (%d):\n", vcpu, vcpu->vcpu_id); pr_err("pc = %.16lx msr = %.16llx trap = %x\n", - vcpu->arch.pc, vcpu->arch.shregs.msr, vcpu->arch.trap); + vcpu->arch.regs.nip, vcpu->arch.shregs.msr, vcpu->arch.trap); for (r = 0; r < 16; ++r) pr_err("r%2d = %.16lx r%d = %.16lx\n", r, kvmppc_get_gpr(vcpu, r), r+16, kvmppc_get_gpr(vcpu, r+16)); pr_err("ctr = %.16lx lr = %.16lx\n", - vcpu->arch.ctr, vcpu->arch.lr); + vcpu->arch.regs.ctr, vcpu->arch.regs.link); pr_err("srr0 = %.16llx srr1 = %.16llx\n", vcpu->arch.shregs.srr0, vcpu->arch.shregs.srr1); pr_err("sprg0 = %.16llx sprg1 = %.16llx\n", @@ -385,7 +411,7 @@ static void kvmppc_dump_regs(struct kvm_vcpu *vcpu) pr_err("sprg2 = %.16llx sprg3 = %.16llx\n", vcpu->arch.shregs.sprg2, vcpu->arch.shregs.sprg3); pr_err("cr = %.8x xer = %.16lx dsisr = %.8x\n", - vcpu->arch.cr, vcpu->arch.xer, vcpu->arch.shregs.dsisr); + vcpu->arch.cr, vcpu->arch.regs.xer, vcpu->arch.shregs.dsisr); pr_err("dar = %.16llx\n", vcpu->arch.shregs.dar); pr_err("fault dar = %.16lx dsisr = %.8x\n", vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); @@ -1526,6 +1552,9 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, *val = get_reg_val(id, vcpu->arch.dec_expires + vcpu->arch.vcore->tb_offset); break; + case KVM_REG_PPC_ONLINE: + *val = get_reg_val(id, vcpu->arch.online); + break; default: r = -EINVAL; break; @@ -1757,6 +1786,14 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, vcpu->arch.dec_expires = set_reg_val(id, *val) - vcpu->arch.vcore->tb_offset; break; + case KVM_REG_PPC_ONLINE: + i = set_reg_val(id, *val); + if (i && !vcpu->arch.online) + atomic_inc(&vcpu->arch.vcore->online_count); + else if (!i && vcpu->arch.online) + atomic_dec(&vcpu->arch.vcore->online_count); + vcpu->arch.online = i; + break; default: r = -EINVAL; break; @@ -2850,6 +2887,25 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) } } + /* + * On POWER8, set RWMR register. + * Since it only affects PURR and SPURR, it doesn't affect + * the host, so we don't save/restore the host value. + */ + if (is_power8) { + unsigned long rwmr_val = RWMR_RPA_P8_8THREAD; + int n_online = atomic_read(&vc->online_count); + + /* + * Use the 8-thread value if we're doing split-core + * or if the vcore's online count looks bogus. + */ + if (split == 1 && threads_per_subcore == MAX_SMT_THREADS && + n_online >= 1 && n_online <= MAX_SMT_THREADS) + rwmr_val = p8_rwmr_values[n_online]; + mtspr(SPRN_RWMR, rwmr_val); + } + /* Start all the threads */ active = 0; for (sub = 0; sub < core_info.n_subcores; ++sub) { @@ -2902,6 +2958,32 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) for (sub = 0; sub < core_info.n_subcores; ++sub) spin_unlock(&core_info.vc[sub]->lock); + if (kvm_is_radix(vc->kvm)) { + int tmp = pcpu; + + /* + * Do we need to flush the process scoped TLB for the LPAR? + * + * On POWER9, individual threads can come in here, but the + * TLB is shared between the 4 threads in a core, hence + * invalidating on one thread invalidates for all. + * Thus we make all 4 threads use the same bit here. + * + * Hash must be flushed in realmode in order to use tlbiel. + */ + mtspr(SPRN_LPID, vc->kvm->arch.lpid); + isync(); + + if (cpu_has_feature(CPU_FTR_ARCH_300)) + tmp &= ~0x3UL; + + if (cpumask_test_cpu(tmp, &vc->kvm->arch.need_tlb_flush)) { + radix__local_flush_tlb_lpid_guest(vc->kvm->arch.lpid); + /* Clear the bit after the TLB flush */ + cpumask_clear_cpu(tmp, &vc->kvm->arch.need_tlb_flush); + } + } + /* * Interrupts will be enabled once we get into the guest, * so tell lockdep that we're about to enable interrupts. @@ -3356,6 +3438,15 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) } #endif + /* + * Force online to 1 for the sake of old userspace which doesn't + * set it. + */ + if (!vcpu->arch.online) { + atomic_inc(&vcpu->arch.vcore->online_count); + vcpu->arch.online = 1; + } + kvmppc_core_prepare_to_enter(vcpu); /* No need to go into the guest when all we'll do is come back out */ diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index de18299f92b7..d4a3f4da409b 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -18,6 +18,7 @@ #include <linux/cma.h> #include <linux/bitops.h> +#include <asm/asm-prototypes.h> #include <asm/cputable.h> #include <asm/kvm_ppc.h> #include <asm/kvm_book3s.h> @@ -211,9 +212,9 @@ long kvmppc_h_random(struct kvm_vcpu *vcpu) /* Only need to do the expensive mfmsr() on radix */ if (kvm_is_radix(vcpu->kvm) && (mfmsr() & MSR_IR)) - r = powernv_get_random_long(&vcpu->arch.gpr[4]); + r = powernv_get_random_long(&vcpu->arch.regs.gpr[4]); else - r = powernv_get_random_real_mode(&vcpu->arch.gpr[4]); + r = powernv_get_random_real_mode(&vcpu->arch.regs.gpr[4]); if (r) return H_SUCCESS; @@ -562,7 +563,7 @@ unsigned long kvmppc_rm_h_xirr_x(struct kvm_vcpu *vcpu) { if (!kvmppc_xics_enabled(vcpu)) return H_TOO_HARD; - vcpu->arch.gpr[5] = get_tb(); + vcpu->arch.regs.gpr[5] = get_tb(); if (xive_enabled()) { if (is_rm()) return xive_rm_h_xirr(vcpu); @@ -633,7 +634,19 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) void kvmppc_bad_interrupt(struct pt_regs *regs) { - die("Bad interrupt in KVM entry/exit code", regs, SIGABRT); + /* + * 100 could happen at any time, 200 can happen due to invalid real + * address access for example (or any time due to a hardware problem). + */ + if (TRAP(regs) == 0x100) { + get_paca()->in_nmi++; + system_reset_exception(regs); + get_paca()->in_nmi--; + } else if (TRAP(regs) == 0x200) { + machine_check_exception(regs); + } else { + die("Bad interrupt in KVM entry/exit code", regs, SIGABRT); + } panic("Bad KVM trap"); } diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S index 0e8493033288..82f2ff9410b6 100644 --- a/arch/powerpc/kvm/book3s_hv_interrupts.S +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S @@ -137,7 +137,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) /* * We return here in virtual mode after the guest exits * with something that we can't handle in real mode. - * Interrupts are enabled again at this point. + * Interrupts are still hard-disabled. */ /* diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 78e6a392330f..1f22d9e977d4 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -418,7 +418,8 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, long pte_index, unsigned long pteh, unsigned long ptel) { return kvmppc_do_h_enter(vcpu->kvm, flags, pte_index, pteh, ptel, - vcpu->arch.pgdir, true, &vcpu->arch.gpr[4]); + vcpu->arch.pgdir, true, + &vcpu->arch.regs.gpr[4]); } #ifdef __BIG_ENDIAN__ @@ -434,24 +435,6 @@ static inline int is_mmio_hpte(unsigned long v, unsigned long r) (HPTE_R_KEY_HI | HPTE_R_KEY_LO)); } -static inline int try_lock_tlbie(unsigned int *lock) -{ - unsigned int tmp, old; - unsigned int token = LOCK_TOKEN; - - asm volatile("1:lwarx %1,0,%2\n" - " cmpwi cr0,%1,0\n" - " bne 2f\n" - " stwcx. %3,0,%2\n" - " bne- 1b\n" - " isync\n" - "2:" - : "=&r" (tmp), "=&r" (old) - : "r" (lock), "r" (token) - : "cc", "memory"); - return old == 0; -} - static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues, long npages, int global, bool need_sync) { @@ -463,8 +446,6 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues, * the RS field, this is backwards-compatible with P7 and P8. */ if (global) { - while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) - cpu_relax(); if (need_sync) asm volatile("ptesync" : : : "memory"); for (i = 0; i < npages; ++i) { @@ -483,7 +464,6 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues, } asm volatile("eieio; tlbsync; ptesync" : : : "memory"); - kvm->arch.tlbie_lock = 0; } else { if (need_sync) asm volatile("ptesync" : : : "memory"); @@ -561,13 +541,13 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags, unsigned long pte_index, unsigned long avpn) { return kvmppc_do_h_remove(vcpu->kvm, flags, pte_index, avpn, - &vcpu->arch.gpr[4]); + &vcpu->arch.regs.gpr[4]); } long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) { struct kvm *kvm = vcpu->kvm; - unsigned long *args = &vcpu->arch.gpr[4]; + unsigned long *args = &vcpu->arch.regs.gpr[4]; __be64 *hp, *hptes[4]; unsigned long tlbrb[4]; long int i, j, k, n, found, indexes[4]; @@ -787,8 +767,8 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C)); r &= ~HPTE_GR_RESERVED; } - vcpu->arch.gpr[4 + i * 2] = v; - vcpu->arch.gpr[5 + i * 2] = r; + vcpu->arch.regs.gpr[4 + i * 2] = v; + vcpu->arch.regs.gpr[5 + i * 2] = r; } return H_SUCCESS; } @@ -834,7 +814,7 @@ long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags, } } } - vcpu->arch.gpr[4] = gr; + vcpu->arch.regs.gpr[4] = gr; ret = H_SUCCESS; out: unlock_hpte(hpte, v & ~HPTE_V_HVLOCK); @@ -881,7 +861,7 @@ long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags, kvmppc_set_dirty_from_hpte(kvm, v, gr); } } - vcpu->arch.gpr[4] = gr; + vcpu->arch.regs.gpr[4] = gr; ret = H_SUCCESS; out: unlock_hpte(hpte, v & ~HPTE_V_HVLOCK); diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index 2a862618f072..758d1d23215e 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c @@ -517,7 +517,7 @@ unsigned long xics_rm_h_xirr(struct kvm_vcpu *vcpu) } while (!icp_rm_try_update(icp, old_state, new_state)); /* Return the result in GPR4 */ - vcpu->arch.gpr[4] = xirr; + vcpu->arch.regs.gpr[4] = xirr; return check_too_hard(xics, icp); } diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index b97d261d3b89..153988d878e8 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -39,8 +39,6 @@ BEGIN_FTR_SECTION; \ extsw reg, reg; \ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) -#define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM) - /* Values in HSTATE_NAPPING(r13) */ #define NAPPING_CEDE 1 #define NAPPING_NOVCPU 2 @@ -639,6 +637,10 @@ kvmppc_hv_entry: /* Primary thread switches to guest partition. */ cmpwi r6,0 bne 10f + + /* Radix has already switched LPID and flushed core TLB */ + bne cr7, 22f + lwz r7,KVM_LPID(r9) BEGIN_FTR_SECTION ld r6,KVM_SDR1(r9) @@ -650,7 +652,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) mtspr SPRN_LPID,r7 isync - /* See if we need to flush the TLB */ + /* See if we need to flush the TLB. Hash has to be done in RM */ lhz r6,PACAPACAINDEX(r13) /* test_bit(cpu, need_tlb_flush) */ BEGIN_FTR_SECTION /* @@ -677,15 +679,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) li r7,0x800 /* IS field = 0b10 */ ptesync li r0,0 /* RS for P9 version of tlbiel */ - bne cr7, 29f 28: tlbiel r7 /* On P9, rs=0, RIC=0, PRS=0, R=0 */ addi r7,r7,0x1000 bdnz 28b - b 30f -29: PPC_TLBIEL(7,0,2,1,1) /* for radix, RIC=2, PRS=1, R=1 */ - addi r7,r7,0x1000 - bdnz 29b -30: ptesync + ptesync 23: ldarx r7,0,r6 /* clear the bit after TLB flushed */ andc r7,r7,r8 stdcx. r7,0,r6 @@ -799,7 +796,10 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) /* * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR */ - bl kvmppc_restore_tm + mr r3, r4 + ld r4, VCPU_MSR(r3) + bl kvmppc_restore_tm_hv + ld r4, HSTATE_KVM_VCPU(r13) 91: #endif @@ -1783,7 +1783,10 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) /* * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR */ - bl kvmppc_save_tm + mr r3, r9 + ld r4, VCPU_MSR(r3) + bl kvmppc_save_tm_hv + ld r9, HSTATE_KVM_VCPU(r13) 91: #endif @@ -2686,8 +2689,9 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) /* * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR */ - ld r9, HSTATE_KVM_VCPU(r13) - bl kvmppc_save_tm + ld r3, HSTATE_KVM_VCPU(r13) + ld r4, VCPU_MSR(r3) + bl kvmppc_save_tm_hv 91: #endif @@ -2805,7 +2809,10 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) /* * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR */ - bl kvmppc_restore_tm + mr r3, r4 + ld r4, VCPU_MSR(r3) + bl kvmppc_restore_tm_hv + ld r4, HSTATE_KVM_VCPU(r13) 91: #endif @@ -3126,11 +3133,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #ifdef CONFIG_PPC_TRANSACTIONAL_MEM /* * Save transactional state and TM-related registers. - * Called with r9 pointing to the vcpu struct. + * Called with r3 pointing to the vcpu struct and r4 containing + * the guest MSR value. * This can modify all checkpointed registers, but - * restores r1, r2 and r9 (vcpu pointer) before exit. + * restores r1 and r2 before exit. */ -kvmppc_save_tm: +kvmppc_save_tm_hv: + /* See if we need to handle fake suspend mode */ +BEGIN_FTR_SECTION + b __kvmppc_save_tm +END_FTR_SECTION_IFCLR(CPU_FTR_P9_TM_HV_ASSIST) + + lbz r0, HSTATE_FAKE_SUSPEND(r13) /* Were we fake suspended? */ + cmpwi r0, 0 + beq __kvmppc_save_tm + + /* The following code handles the fake_suspend = 1 case */ mflr r0 std r0, PPC_LR_STKOFF(r1) stdu r1, -PPC_MIN_STKFRM(r1) @@ -3141,59 +3159,37 @@ kvmppc_save_tm: rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG mtmsrd r8 - ld r5, VCPU_MSR(r9) - rldicl. r5, r5, 64 - MSR_TS_S_LG, 62 - beq 1f /* TM not active in guest. */ - - std r1, HSTATE_HOST_R1(r13) - li r3, TM_CAUSE_KVM_RESCHED - -BEGIN_FTR_SECTION - lbz r0, HSTATE_FAKE_SUSPEND(r13) /* Were we fake suspended? */ - cmpwi r0, 0 - beq 3f rldicl. r8, r8, 64 - MSR_TS_S_LG, 62 /* Did we actually hrfid? */ beq 4f -BEGIN_FTR_SECTION_NESTED(96) +BEGIN_FTR_SECTION bl pnv_power9_force_smt4_catch -END_FTR_SECTION_NESTED(CPU_FTR_P9_TM_XER_SO_BUG, CPU_FTR_P9_TM_XER_SO_BUG, 96) +END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG) nop - b 6f -3: - /* Emulation of the treclaim instruction needs TEXASR before treclaim */ - mfspr r6, SPRN_TEXASR - std r6, VCPU_ORIG_TEXASR(r9) -6: -END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST) - /* Clear the MSR RI since r1, r13 are all going to be foobar. */ + std r1, HSTATE_HOST_R1(r13) + + /* Clear the MSR RI since r1, r13 may be foobar. */ li r5, 0 mtmsrd r5, 1 - /* All GPRs are volatile at this point. */ + /* We have to treclaim here because that's the only way to do S->N */ + li r3, TM_CAUSE_KVM_RESCHED TRECLAIM(R3) - /* Temporarily store r13 and r9 so we have some regs to play with */ - SET_SCRATCH0(r13) - GET_PACA(r13) - std r9, PACATMSCRATCH(r13) - - /* If doing TM emulation on POWER9 DD2.2, check for fake suspend mode */ -BEGIN_FTR_SECTION - lbz r9, HSTATE_FAKE_SUSPEND(r13) - cmpwi r9, 0 - beq 2f /* * We were in fake suspend, so we are not going to save the * register state as the guest checkpointed state (since * we already have it), therefore we can now use any volatile GPR. */ - /* Reload stack pointer and TOC. */ + /* Reload PACA pointer, stack pointer and TOC. */ + GET_PACA(r13) ld r1, HSTATE_HOST_R1(r13) ld r2, PACATOC(r13) + /* Set MSR RI now we have r1 and r13 back. */ li r5, MSR_RI mtmsrd r5, 1 + HMT_MEDIUM ld r6, HSTATE_DSCR(r13) mtspr SPRN_DSCR, r6 @@ -3208,85 +3204,9 @@ END_FTR_SECTION_NESTED(CPU_FTR_P9_TM_XER_SO_BUG, CPU_FTR_P9_TM_XER_SO_BUG, 96) li r0, PSSCR_FAKE_SUSPEND andc r3, r3, r0 mtspr SPRN_PSSCR, r3 - ld r9, HSTATE_KVM_VCPU(r13) - /* Don't save TEXASR, use value from last exit in real suspend state */ - b 11f -2: -END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST) + /* Don't save TEXASR, use value from last exit in real suspend state */ ld r9, HSTATE_KVM_VCPU(r13) - - /* Get a few more GPRs free. */ - std r29, VCPU_GPRS_TM(29)(r9) - std r30, VCPU_GPRS_TM(30)(r9) - std r31, VCPU_GPRS_TM(31)(r9) - - /* Save away PPR and DSCR soon so don't run with user values. */ - mfspr r31, SPRN_PPR - HMT_MEDIUM - mfspr r30, SPRN_DSCR - ld r29, HSTATE_DSCR(r13) - mtspr SPRN_DSCR, r29 - - /* Save all but r9, r13 & r29-r31 */ - reg = 0 - .rept 29 - .if (reg != 9) && (reg != 13) - std reg, VCPU_GPRS_TM(reg)(r9) - .endif - reg = reg + 1 - .endr - /* ... now save r13 */ - GET_SCRATCH0(r4) - std r4, VCPU_GPRS_TM(13)(r9) - /* ... and save r9 */ - ld r4, PACATMSCRATCH(r13) - std r4, VCPU_GPRS_TM(9)(r9) - - /* Reload stack pointer and TOC. */ - ld r1, HSTATE_HOST_R1(r13) - ld r2, PACATOC(r13) - - /* Set MSR RI now we have r1 and r13 back. */ - li r5, MSR_RI - mtmsrd r5, 1 - - /* Save away checkpinted SPRs. */ - std r31, VCPU_PPR_TM(r9) - std r30, VCPU_DSCR_TM(r9) - mflr r5 - mfcr r6 - mfctr r7 - mfspr r8, SPRN_AMR - mfspr r10, SPRN_TAR - mfxer r11 - std r5, VCPU_LR_TM(r9) - stw r6, VCPU_CR_TM(r9) - std r7, VCPU_CTR_TM(r9) - std r8, VCPU_AMR_TM(r9) - std r10, VCPU_TAR_TM(r9) - std r11, VCPU_XER_TM(r9) - - /* Restore r12 as trap number. */ - lwz r12, VCPU_TRAP(r9) - - /* Save FP/VSX. */ - addi r3, r9, VCPU_FPRS_TM - bl store_fp_state - addi r3, r9, VCPU_VRS_TM - bl store_vr_state - mfspr r6, SPRN_VRSAVE - stw r6, VCPU_VRSAVE_TM(r9) -1: - /* - * We need to save these SPRs after the treclaim so that the software - * error code is recorded correctly in the TEXASR. Also the user may - * change these outside of a transaction, so they must always be - * context switched. - */ - mfspr r7, SPRN_TEXASR - std r7, VCPU_TEXASR(r9) -11: mfspr r5, SPRN_TFHAR mfspr r6, SPRN_TFIAR std r5, VCPU_TFHAR(r9) @@ -3299,149 +3219,63 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST) /* * Restore transactional state and TM-related registers. - * Called with r4 pointing to the vcpu struct. + * Called with r3 pointing to the vcpu struct + * and r4 containing the guest MSR value. * This potentially modifies all checkpointed registers. - * It restores r1, r2, r4 from the PACA. + * It restores r1 and r2 from the PACA. */ -kvmppc_restore_tm: +kvmppc_restore_tm_hv: + /* + * If we are doing TM emulation for the guest on a POWER9 DD2, + * then we don't actually do a trechkpt -- we either set up + * fake-suspend mode, or emulate a TM rollback. + */ +BEGIN_FTR_SECTION + b __kvmppc_restore_tm +END_FTR_SECTION_IFCLR(CPU_FTR_P9_TM_HV_ASSIST) mflr r0 std r0, PPC_LR_STKOFF(r1) - /* Turn on TM/FP/VSX/VMX so we can restore them. */ + li r0, 0 + stb r0, HSTATE_FAKE_SUSPEND(r13) + + /* Turn on TM so we can restore TM SPRs */ mfmsr r5 - li r6, MSR_TM >> 32 - sldi r6, r6, 32 - or r5, r5, r6 - ori r5, r5, MSR_FP - oris r5, r5, (MSR_VEC | MSR_VSX)@h + li r0, 1 + rldimi r5, r0, MSR_TM_LG, 63-MSR_TM_LG mtmsrd r5 /* * The user may change these outside of a transaction, so they must * always be context switched. */ - ld r5, VCPU_TFHAR(r4) - ld r6, VCPU_TFIAR(r4) - ld r7, VCPU_TEXASR(r4) + ld r5, VCPU_TFHAR(r3) + ld r6, VCPU_TFIAR(r3) + ld r7, VCPU_TEXASR(r3) mtspr SPRN_TFHAR, r5 mtspr SPRN_TFIAR, r6 mtspr SPRN_TEXASR, r7 - li r0, 0 - stb r0, HSTATE_FAKE_SUSPEND(r13) - ld r5, VCPU_MSR(r4) - rldicl. r5, r5, 64 - MSR_TS_S_LG, 62 + rldicl. r5, r4, 64 - MSR_TS_S_LG, 62 beqlr /* TM not active in guest */ - std r1, HSTATE_HOST_R1(r13) - /* Make sure the failure summary is set, otherwise we'll program check - * when we trechkpt. It's possible that this might have been not set - * on a kvmppc_set_one_reg() call but we shouldn't let this crash the - * host. - */ + /* Make sure the failure summary is set */ oris r7, r7, (TEXASR_FS)@h mtspr SPRN_TEXASR, r7 - /* - * If we are doing TM emulation for the guest on a POWER9 DD2, - * then we don't actually do a trechkpt -- we either set up - * fake-suspend mode, or emulate a TM rollback. - */ -BEGIN_FTR_SECTION - b .Ldo_tm_fake_load -END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST) - - /* - * We need to load up the checkpointed state for the guest. - * We need to do this early as it will blow away any GPRs, VSRs and - * some SPRs. - */ - - mr r31, r4 - addi r3, r31, VCPU_FPRS_TM - bl load_fp_state - addi r3, r31, VCPU_VRS_TM - bl load_vr_state - mr r4, r31 - lwz r7, VCPU_VRSAVE_TM(r4) - mtspr SPRN_VRSAVE, r7 - - ld r5, VCPU_LR_TM(r4) - lwz r6, VCPU_CR_TM(r4) - ld r7, VCPU_CTR_TM(r4) - ld r8, VCPU_AMR_TM(r4) - ld r9, VCPU_TAR_TM(r4) - ld r10, VCPU_XER_TM(r4) - mtlr r5 - mtcr r6 - mtctr r7 - mtspr SPRN_AMR, r8 - mtspr SPRN_TAR, r9 - mtxer r10 - - /* - * Load up PPR and DSCR values but don't put them in the actual SPRs - * till the last moment to avoid running with userspace PPR and DSCR for - * too long. - */ - ld r29, VCPU_DSCR_TM(r4) - ld r30, VCPU_PPR_TM(r4) - - std r2, PACATMSCRATCH(r13) /* Save TOC */ - - /* Clear the MSR RI since r1, r13 are all going to be foobar. */ - li r5, 0 - mtmsrd r5, 1 - - /* Load GPRs r0-r28 */ - reg = 0 - .rept 29 - ld reg, VCPU_GPRS_TM(reg)(r31) - reg = reg + 1 - .endr - - mtspr SPRN_DSCR, r29 - mtspr SPRN_PPR, r30 - - /* Load final GPRs */ - ld 29, VCPU_GPRS_TM(29)(r31) - ld 30, VCPU_GPRS_TM(30)(r31) - ld 31, VCPU_GPRS_TM(31)(r31) - - /* TM checkpointed state is now setup. All GPRs are now volatile. */ - TRECHKPT - - /* Now let's get back the state we need. */ - HMT_MEDIUM - GET_PACA(r13) - ld r29, HSTATE_DSCR(r13) - mtspr SPRN_DSCR, r29 - ld r4, HSTATE_KVM_VCPU(r13) - ld r1, HSTATE_HOST_R1(r13) - ld r2, PACATMSCRATCH(r13) - - /* Set the MSR RI since we have our registers back. */ - li r5, MSR_RI - mtmsrd r5, 1 -9: - ld r0, PPC_LR_STKOFF(r1) - mtlr r0 - blr - -.Ldo_tm_fake_load: cmpwi r5, 1 /* check for suspended state */ bgt 10f stb r5, HSTATE_FAKE_SUSPEND(r13) - b 9b /* and return */ + b 9f /* and return */ 10: stdu r1, -PPC_MIN_STKFRM(r1) /* guest is in transactional state, so simulate rollback */ - mr r3, r4 bl kvmhv_emulate_tm_rollback nop - ld r4, HSTATE_KVM_VCPU(r13) /* our vcpu pointer has been trashed */ addi r1, r1, PPC_MIN_STKFRM - b 9b -#endif +9: ld r0, PPC_LR_STKOFF(r1) + mtlr r0 + blr +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ /* * We come here if we get any exception or interrupt while we are @@ -3572,6 +3406,8 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) bcl 20, 31, .+4 5: mflr r3 addi r3, r3, 9f - 5b + li r4, -1 + rldimi r3, r4, 62, 0 /* ensure 0xc000000000000000 bits are set */ ld r4, PACAKMSR(r13) mtspr SPRN_SRR0, r3 mtspr SPRN_SRR1, r4 diff --git a/arch/powerpc/kvm/book3s_hv_tm.c b/arch/powerpc/kvm/book3s_hv_tm.c index bf710ad3a6d7..008285058f9b 100644 --- a/arch/powerpc/kvm/book3s_hv_tm.c +++ b/arch/powerpc/kvm/book3s_hv_tm.c @@ -19,7 +19,7 @@ static void emulate_tx_failure(struct kvm_vcpu *vcpu, u64 failure_cause) u64 texasr, tfiar; u64 msr = vcpu->arch.shregs.msr; - tfiar = vcpu->arch.pc & ~0x3ull; + tfiar = vcpu->arch.regs.nip & ~0x3ull; texasr = (failure_cause << 56) | TEXASR_ABORT | TEXASR_FS | TEXASR_EXACT; if (MSR_TM_SUSPENDED(vcpu->arch.shregs.msr)) texasr |= TEXASR_SUSP; @@ -57,8 +57,8 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) (newmsr & MSR_TM))); newmsr = sanitize_msr(newmsr); vcpu->arch.shregs.msr = newmsr; - vcpu->arch.cfar = vcpu->arch.pc - 4; - vcpu->arch.pc = vcpu->arch.shregs.srr0; + vcpu->arch.cfar = vcpu->arch.regs.nip - 4; + vcpu->arch.regs.nip = vcpu->arch.shregs.srr0; return RESUME_GUEST; case PPC_INST_RFEBB: @@ -90,8 +90,8 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) vcpu->arch.bescr = bescr; msr = (msr & ~MSR_TS_MASK) | MSR_TS_T; vcpu->arch.shregs.msr = msr; - vcpu->arch.cfar = vcpu->arch.pc - 4; - vcpu->arch.pc = vcpu->arch.ebbrr; + vcpu->arch.cfar = vcpu->arch.regs.nip - 4; + vcpu->arch.regs.nip = vcpu->arch.ebbrr; return RESUME_GUEST; case PPC_INST_MTMSRD: diff --git a/arch/powerpc/kvm/book3s_hv_tm_builtin.c b/arch/powerpc/kvm/book3s_hv_tm_builtin.c index d98ccfd2b88c..b2c7c6fca4f9 100644 --- a/arch/powerpc/kvm/book3s_hv_tm_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_tm_builtin.c @@ -35,8 +35,8 @@ int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu) return 0; newmsr = sanitize_msr(newmsr); vcpu->arch.shregs.msr = newmsr; - vcpu->arch.cfar = vcpu->arch.pc - 4; - vcpu->arch.pc = vcpu->arch.shregs.srr0; + vcpu->arch.cfar = vcpu->arch.regs.nip - 4; + vcpu->arch.regs.nip = vcpu->arch.shregs.srr0; return 1; case PPC_INST_RFEBB: @@ -58,8 +58,8 @@ int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu) mtspr(SPRN_BESCR, bescr); msr = (msr & ~MSR_TS_MASK) | MSR_TS_T; vcpu->arch.shregs.msr = msr; - vcpu->arch.cfar = vcpu->arch.pc - 4; - vcpu->arch.pc = mfspr(SPRN_EBBRR); + vcpu->arch.cfar = vcpu->arch.regs.nip - 4; + vcpu->arch.regs.nip = mfspr(SPRN_EBBRR); return 1; case PPC_INST_MTMSRD: @@ -103,7 +103,7 @@ int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu) void kvmhv_emulate_tm_rollback(struct kvm_vcpu *vcpu) { vcpu->arch.shregs.msr &= ~MSR_TS_MASK; /* go to N state */ - vcpu->arch.pc = vcpu->arch.tfhar; + vcpu->arch.regs.nip = vcpu->arch.tfhar; copy_from_checkpoint(vcpu); vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | 0xa0000000; } diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index d3f304d06adf..c3b8006f0eac 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -42,6 +42,8 @@ #include <linux/highmem.h> #include <linux/module.h> #include <linux/miscdevice.h> +#include <asm/asm-prototypes.h> +#include <asm/tm.h> #include "book3s.h" @@ -53,7 +55,9 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, ulong msr); -static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac); +#ifdef CONFIG_PPC_BOOK3S_64 +static int kvmppc_handle_fac(struct kvm_vcpu *vcpu, ulong fac); +#endif /* Some compatibility defines */ #ifdef CONFIG_PPC_BOOK3S_32 @@ -114,6 +118,8 @@ static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu) if (kvmppc_is_split_real(vcpu)) kvmppc_fixup_split_real(vcpu); + + kvmppc_restore_tm_pr(vcpu); } static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu) @@ -133,6 +139,7 @@ static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu) kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX); kvmppc_giveup_fac(vcpu, FSCR_TAR_LG); + kvmppc_save_tm_pr(vcpu); /* Enable AIL if supported */ if (cpu_has_feature(CPU_FTR_HVMODE) && @@ -147,25 +154,25 @@ void kvmppc_copy_to_svcpu(struct kvm_vcpu *vcpu) { struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); - svcpu->gpr[0] = vcpu->arch.gpr[0]; - svcpu->gpr[1] = vcpu->arch.gpr[1]; - svcpu->gpr[2] = vcpu->arch.gpr[2]; - svcpu->gpr[3] = vcpu->arch.gpr[3]; - svcpu->gpr[4] = vcpu->arch.gpr[4]; - svcpu->gpr[5] = vcpu->arch.gpr[5]; - svcpu->gpr[6] = vcpu->arch.gpr[6]; - svcpu->gpr[7] = vcpu->arch.gpr[7]; - svcpu->gpr[8] = vcpu->arch.gpr[8]; - svcpu->gpr[9] = vcpu->arch.gpr[9]; - svcpu->gpr[10] = vcpu->arch.gpr[10]; - svcpu->gpr[11] = vcpu->arch.gpr[11]; - svcpu->gpr[12] = vcpu->arch.gpr[12]; - svcpu->gpr[13] = vcpu->arch.gpr[13]; + svcpu->gpr[0] = vcpu->arch.regs.gpr[0]; + svcpu->gpr[1] = vcpu->arch.regs.gpr[1]; + svcpu->gpr[2] = vcpu->arch.regs.gpr[2]; + svcpu->gpr[3] = vcpu->arch.regs.gpr[3]; + svcpu->gpr[4] = vcpu->arch.regs.gpr[4]; + svcpu->gpr[5] = vcpu->arch.regs.gpr[5]; + svcpu->gpr[6] = vcpu->arch.regs.gpr[6]; + svcpu->gpr[7] = vcpu->arch.regs.gpr[7]; + svcpu->gpr[8] = vcpu->arch.regs.gpr[8]; + svcpu->gpr[9] = vcpu->arch.regs.gpr[9]; + svcpu->gpr[10] = vcpu->arch.regs.gpr[10]; + svcpu->gpr[11] = vcpu->arch.regs.gpr[11]; + svcpu->gpr[12] = vcpu->arch.regs.gpr[12]; + svcpu->gpr[13] = vcpu->arch.regs.gpr[13]; svcpu->cr = vcpu->arch.cr; - svcpu->xer = vcpu->arch.xer; - svcpu->ctr = vcpu->arch.ctr; - svcpu->lr = vcpu->arch.lr; - svcpu->pc = vcpu->arch.pc; + svcpu->xer = vcpu->arch.regs.xer; + svcpu->ctr = vcpu->arch.regs.ctr; + svcpu->lr = vcpu->arch.regs.link; + svcpu->pc = vcpu->arch.regs.nip; #ifdef CONFIG_PPC_BOOK3S_64 svcpu->shadow_fscr = vcpu->arch.shadow_fscr; #endif @@ -182,10 +189,45 @@ void kvmppc_copy_to_svcpu(struct kvm_vcpu *vcpu) svcpu_put(svcpu); } +static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu) +{ + ulong guest_msr = kvmppc_get_msr(vcpu); + ulong smsr = guest_msr; + + /* Guest MSR values */ +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_LE | + MSR_TM | MSR_TS_MASK; +#else + smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_LE; +#endif + /* Process MSR values */ + smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE; + /* External providers the guest reserved */ + smsr |= (guest_msr & vcpu->arch.guest_owned_ext); + /* 64-bit Process MSR values */ +#ifdef CONFIG_PPC_BOOK3S_64 + smsr |= MSR_ISF | MSR_HV; +#endif +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* + * in guest privileged state, we want to fail all TM transactions. + * So disable MSR TM bit so that all tbegin. will be able to be + * trapped into host. + */ + if (!(guest_msr & MSR_PR)) + smsr &= ~MSR_TM; +#endif + vcpu->arch.shadow_msr = smsr; +} + /* Copy data touched by real-mode code from shadow vcpu back to vcpu */ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu) { struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + ulong old_msr; +#endif /* * Maybe we were already preempted and synced the svcpu from @@ -194,25 +236,25 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu) if (!svcpu->in_use) goto out; - vcpu->arch.gpr[0] = svcpu->gpr[0]; - vcpu->arch.gpr[1] = svcpu->gpr[1]; - vcpu->arch.gpr[2] = svcpu->gpr[2]; - vcpu->arch.gpr[3] = svcpu->gpr[3]; - vcpu->arch.gpr[4] = svcpu->gpr[4]; - vcpu->arch.gpr[5] = svcpu->gpr[5]; - vcpu->arch.gpr[6] = svcpu->gpr[6]; - vcpu->arch.gpr[7] = svcpu->gpr[7]; - vcpu->arch.gpr[8] = svcpu->gpr[8]; - vcpu->arch.gpr[9] = svcpu->gpr[9]; - vcpu->arch.gpr[10] = svcpu->gpr[10]; - vcpu->arch.gpr[11] = svcpu->gpr[11]; - vcpu->arch.gpr[12] = svcpu->gpr[12]; - vcpu->arch.gpr[13] = svcpu->gpr[13]; + vcpu->arch.regs.gpr[0] = svcpu->gpr[0]; + vcpu->arch.regs.gpr[1] = svcpu->gpr[1]; + vcpu->arch.regs.gpr[2] = svcpu->gpr[2]; + vcpu->arch.regs.gpr[3] = svcpu->gpr[3]; + vcpu->arch.regs.gpr[4] = svcpu->gpr[4]; + vcpu->arch.regs.gpr[5] = svcpu->gpr[5]; + vcpu->arch.regs.gpr[6] = svcpu->gpr[6]; + vcpu->arch.regs.gpr[7] = svcpu->gpr[7]; + vcpu->arch.regs.gpr[8] = svcpu->gpr[8]; + vcpu->arch.regs.gpr[9] = svcpu->gpr[9]; + vcpu->arch.regs.gpr[10] = svcpu->gpr[10]; + vcpu->arch.regs.gpr[11] = svcpu->gpr[11]; + vcpu->arch.regs.gpr[12] = svcpu->gpr[12]; + vcpu->arch.regs.gpr[13] = svcpu->gpr[13]; vcpu->arch.cr = svcpu->cr; - vcpu->arch.xer = svcpu->xer; - vcpu->arch.ctr = svcpu->ctr; - vcpu->arch.lr = svcpu->lr; - vcpu->arch.pc = svcpu->pc; + vcpu->arch.regs.xer = svcpu->xer; + vcpu->arch.regs.ctr = svcpu->ctr; + vcpu->arch.regs.link = svcpu->lr; + vcpu->arch.regs.nip = svcpu->pc; vcpu->arch.shadow_srr1 = svcpu->shadow_srr1; vcpu->arch.fault_dar = svcpu->fault_dar; vcpu->arch.fault_dsisr = svcpu->fault_dsisr; @@ -228,12 +270,116 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu) to_book3s(vcpu)->vtb += get_vtb() - vcpu->arch.entry_vtb; if (cpu_has_feature(CPU_FTR_ARCH_207S)) vcpu->arch.ic += mfspr(SPRN_IC) - vcpu->arch.entry_ic; + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* + * Unlike other MSR bits, MSR[TS]bits can be changed at guest without + * notifying host: + * modified by unprivileged instructions like "tbegin"/"tend"/ + * "tresume"/"tsuspend" in PR KVM guest. + * + * It is necessary to sync here to calculate a correct shadow_msr. + * + * privileged guest's tbegin will be failed at present. So we + * only take care of problem state guest. + */ + old_msr = kvmppc_get_msr(vcpu); + if (unlikely((old_msr & MSR_PR) && + (vcpu->arch.shadow_srr1 & (MSR_TS_MASK)) != + (old_msr & (MSR_TS_MASK)))) { + old_msr &= ~(MSR_TS_MASK); + old_msr |= (vcpu->arch.shadow_srr1 & (MSR_TS_MASK)); + kvmppc_set_msr_fast(vcpu, old_msr); + kvmppc_recalc_shadow_msr(vcpu); + } +#endif + svcpu->in_use = false; out: svcpu_put(svcpu); } +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +void kvmppc_save_tm_sprs(struct kvm_vcpu *vcpu) +{ + tm_enable(); + vcpu->arch.tfhar = mfspr(SPRN_TFHAR); + vcpu->arch.texasr = mfspr(SPRN_TEXASR); + vcpu->arch.tfiar = mfspr(SPRN_TFIAR); + tm_disable(); +} + +void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu) +{ + tm_enable(); + mtspr(SPRN_TFHAR, vcpu->arch.tfhar); + mtspr(SPRN_TEXASR, vcpu->arch.texasr); + mtspr(SPRN_TFIAR, vcpu->arch.tfiar); + tm_disable(); +} + +/* loadup math bits which is enabled at kvmppc_get_msr() but not enabled at + * hardware. + */ +static void kvmppc_handle_lost_math_exts(struct kvm_vcpu *vcpu) +{ + ulong exit_nr; + ulong ext_diff = (kvmppc_get_msr(vcpu) & ~vcpu->arch.guest_owned_ext) & + (MSR_FP | MSR_VEC | MSR_VSX); + + if (!ext_diff) + return; + + if (ext_diff == MSR_FP) + exit_nr = BOOK3S_INTERRUPT_FP_UNAVAIL; + else if (ext_diff == MSR_VEC) + exit_nr = BOOK3S_INTERRUPT_ALTIVEC; + else + exit_nr = BOOK3S_INTERRUPT_VSX; + + kvmppc_handle_ext(vcpu, exit_nr, ext_diff); +} + +void kvmppc_save_tm_pr(struct kvm_vcpu *vcpu) +{ + if (!(MSR_TM_ACTIVE(kvmppc_get_msr(vcpu)))) { + kvmppc_save_tm_sprs(vcpu); + return; + } + + kvmppc_giveup_fac(vcpu, FSCR_TAR_LG); + kvmppc_giveup_ext(vcpu, MSR_VSX); + + preempt_disable(); + _kvmppc_save_tm_pr(vcpu, mfmsr()); + preempt_enable(); +} + +void kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu) +{ + if (!MSR_TM_ACTIVE(kvmppc_get_msr(vcpu))) { + kvmppc_restore_tm_sprs(vcpu); + if (kvmppc_get_msr(vcpu) & MSR_TM) { + kvmppc_handle_lost_math_exts(vcpu); + if (vcpu->arch.fscr & FSCR_TAR) + kvmppc_handle_fac(vcpu, FSCR_TAR_LG); + } + return; + } + + preempt_disable(); + _kvmppc_restore_tm_pr(vcpu, kvmppc_get_msr(vcpu)); + preempt_enable(); + + if (kvmppc_get_msr(vcpu) & MSR_TM) { + kvmppc_handle_lost_math_exts(vcpu); + if (vcpu->arch.fscr & FSCR_TAR) + kvmppc_handle_fac(vcpu, FSCR_TAR_LG); + } +} +#endif + static int kvmppc_core_check_requests_pr(struct kvm_vcpu *vcpu) { int r = 1; /* Indicate we want to get back into the guest */ @@ -306,32 +452,29 @@ static void kvm_set_spte_hva_pr(struct kvm *kvm, unsigned long hva, pte_t pte) /*****************************************/ -static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu) -{ - ulong guest_msr = kvmppc_get_msr(vcpu); - ulong smsr = guest_msr; - - /* Guest MSR values */ - smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_LE; - /* Process MSR values */ - smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE; - /* External providers the guest reserved */ - smsr |= (guest_msr & vcpu->arch.guest_owned_ext); - /* 64-bit Process MSR values */ -#ifdef CONFIG_PPC_BOOK3S_64 - smsr |= MSR_ISF | MSR_HV; -#endif - vcpu->arch.shadow_msr = smsr; -} - static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr) { - ulong old_msr = kvmppc_get_msr(vcpu); + ulong old_msr; + + /* For PAPR guest, make sure MSR reflects guest mode */ + if (vcpu->arch.papr_enabled) + msr = (msr & ~MSR_HV) | MSR_ME; #ifdef EXIT_DEBUG printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr); #endif +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* We should never target guest MSR to TS=10 && PR=0, + * since we always fail transaction for guest privilege + * state. + */ + if (!(msr & MSR_PR) && MSR_TM_TRANSACTIONAL(msr)) + kvmppc_emulate_tabort(vcpu, + TM_CAUSE_KVM_FAC_UNAV | TM_CAUSE_PERSISTENT); +#endif + + old_msr = kvmppc_get_msr(vcpu); msr &= to_book3s(vcpu)->msr_mask; kvmppc_set_msr_fast(vcpu, msr); kvmppc_recalc_shadow_msr(vcpu); @@ -387,6 +530,11 @@ static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr) /* Preload FPU if it's enabled */ if (kvmppc_get_msr(vcpu) & MSR_FP) kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (kvmppc_get_msr(vcpu) & MSR_TM) + kvmppc_handle_lost_math_exts(vcpu); +#endif } void kvmppc_set_pvr_pr(struct kvm_vcpu *vcpu, u32 pvr) @@ -584,24 +732,20 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, pte.may_execute = !data; } - if (page_found == -ENOENT) { - /* Page not found in guest PTE entries */ - u64 ssrr1 = vcpu->arch.shadow_srr1; - u64 msr = kvmppc_get_msr(vcpu); - kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu)); - kvmppc_set_dsisr(vcpu, vcpu->arch.fault_dsisr); - kvmppc_set_msr_fast(vcpu, msr | (ssrr1 & 0xf8000000ULL)); - kvmppc_book3s_queue_irqprio(vcpu, vec); - } else if (page_found == -EPERM) { - /* Storage protection */ - u32 dsisr = vcpu->arch.fault_dsisr; - u64 ssrr1 = vcpu->arch.shadow_srr1; - u64 msr = kvmppc_get_msr(vcpu); - kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu)); - dsisr = (dsisr & ~DSISR_NOHPTE) | DSISR_PROTFAULT; - kvmppc_set_dsisr(vcpu, dsisr); - kvmppc_set_msr_fast(vcpu, msr | (ssrr1 & 0xf8000000ULL)); - kvmppc_book3s_queue_irqprio(vcpu, vec); + if (page_found == -ENOENT || page_found == -EPERM) { + /* Page not found in guest PTE entries, or protection fault */ + u64 flags; + + if (page_found == -EPERM) + flags = DSISR_PROTFAULT; + else + flags = DSISR_NOHPTE; + if (data) { + flags |= vcpu->arch.fault_dsisr & DSISR_ISSTORE; + kvmppc_core_queue_data_storage(vcpu, eaddr, flags); + } else { + kvmppc_core_queue_inst_storage(vcpu, flags); + } } else if (page_found == -EINVAL) { /* Page not found in guest SLB */ kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu)); @@ -683,7 +827,7 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr) } /* Give up facility (TAR / EBB / DSCR) */ -static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac) +void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac) { #ifdef CONFIG_PPC_BOOK3S_64 if (!(vcpu->arch.shadow_fscr & (1ULL << fac))) { @@ -802,7 +946,7 @@ static void kvmppc_handle_lost_ext(struct kvm_vcpu *vcpu) #ifdef CONFIG_PPC_BOOK3S_64 -static void kvmppc_trigger_fac_interrupt(struct kvm_vcpu *vcpu, ulong fac) +void kvmppc_trigger_fac_interrupt(struct kvm_vcpu *vcpu, ulong fac) { /* Inject the Interrupt Cause field and trigger a guest interrupt */ vcpu->arch.fscr &= ~(0xffULL << 56); @@ -864,6 +1008,18 @@ static int kvmppc_handle_fac(struct kvm_vcpu *vcpu, ulong fac) break; } +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* Since we disabled MSR_TM at privilege state, the mfspr instruction + * for TM spr can trigger TM fac unavailable. In this case, the + * emulation is handled by kvmppc_emulate_fac(), which invokes + * kvmppc_emulate_mfspr() finally. But note the mfspr can include + * RT for NV registers. So it need to restore those NV reg to reflect + * the update. + */ + if ((fac == FSCR_TM_LG) && !(kvmppc_get_msr(vcpu) & MSR_PR)) + return RESUME_GUEST_NV; +#endif + return RESUME_GUEST; } @@ -872,7 +1028,12 @@ void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr) if ((vcpu->arch.fscr & FSCR_TAR) && !(fscr & FSCR_TAR)) { /* TAR got dropped, drop it in shadow too */ kvmppc_giveup_fac(vcpu, FSCR_TAR_LG); + } else if (!(vcpu->arch.fscr & FSCR_TAR) && (fscr & FSCR_TAR)) { + vcpu->arch.fscr = fscr; + kvmppc_handle_fac(vcpu, FSCR_TAR_LG); + return; } + vcpu->arch.fscr = fscr; } #endif @@ -1017,10 +1178,8 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL); r = RESUME_GUEST; } else { - u64 msr = kvmppc_get_msr(vcpu); - msr |= shadow_srr1 & 0x58000000; - kvmppc_set_msr_fast(vcpu, msr); - kvmppc_book3s_queue_irqprio(vcpu, exit_nr); + kvmppc_core_queue_inst_storage(vcpu, + shadow_srr1 & 0x58000000); r = RESUME_GUEST; } break; @@ -1059,9 +1218,7 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr); srcu_read_unlock(&vcpu->kvm->srcu, idx); } else { - kvmppc_set_dar(vcpu, dar); - kvmppc_set_dsisr(vcpu, fault_dsisr); - kvmppc_book3s_queue_irqprio(vcpu, exit_nr); + kvmppc_core_queue_data_storage(vcpu, dar, fault_dsisr); r = RESUME_GUEST; } break; @@ -1092,10 +1249,13 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, case BOOK3S_INTERRUPT_EXTERNAL: case BOOK3S_INTERRUPT_EXTERNAL_LEVEL: case BOOK3S_INTERRUPT_EXTERNAL_HV: + case BOOK3S_INTERRUPT_H_VIRT: vcpu->stat.ext_intr_exits++; r = RESUME_GUEST; break; + case BOOK3S_INTERRUPT_HMI: case BOOK3S_INTERRUPT_PERFMON: + case BOOK3S_INTERRUPT_SYSTEM_RESET: r = RESUME_GUEST; break; case BOOK3S_INTERRUPT_PROGRAM: @@ -1225,8 +1385,7 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, } #ifdef CONFIG_PPC_BOOK3S_64 case BOOK3S_INTERRUPT_FAC_UNAVAIL: - kvmppc_handle_fac(vcpu, vcpu->arch.shadow_fscr >> 56); - r = RESUME_GUEST; + r = kvmppc_handle_fac(vcpu, vcpu->arch.shadow_fscr >> 56); break; #endif case BOOK3S_INTERRUPT_MACHINE_CHECK: @@ -1379,6 +1538,73 @@ static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id, else *val = get_reg_val(id, 0); break; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + case KVM_REG_PPC_TFHAR: + *val = get_reg_val(id, vcpu->arch.tfhar); + break; + case KVM_REG_PPC_TFIAR: + *val = get_reg_val(id, vcpu->arch.tfiar); + break; + case KVM_REG_PPC_TEXASR: + *val = get_reg_val(id, vcpu->arch.texasr); + break; + case KVM_REG_PPC_TM_GPR0 ... KVM_REG_PPC_TM_GPR31: + *val = get_reg_val(id, + vcpu->arch.gpr_tm[id-KVM_REG_PPC_TM_GPR0]); + break; + case KVM_REG_PPC_TM_VSR0 ... KVM_REG_PPC_TM_VSR63: + { + int i, j; + + i = id - KVM_REG_PPC_TM_VSR0; + if (i < 32) + for (j = 0; j < TS_FPRWIDTH; j++) + val->vsxval[j] = vcpu->arch.fp_tm.fpr[i][j]; + else { + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + val->vval = vcpu->arch.vr_tm.vr[i-32]; + else + r = -ENXIO; + } + break; + } + case KVM_REG_PPC_TM_CR: + *val = get_reg_val(id, vcpu->arch.cr_tm); + break; + case KVM_REG_PPC_TM_XER: + *val = get_reg_val(id, vcpu->arch.xer_tm); + break; + case KVM_REG_PPC_TM_LR: + *val = get_reg_val(id, vcpu->arch.lr_tm); + break; + case KVM_REG_PPC_TM_CTR: + *val = get_reg_val(id, vcpu->arch.ctr_tm); + break; + case KVM_REG_PPC_TM_FPSCR: + *val = get_reg_val(id, vcpu->arch.fp_tm.fpscr); + break; + case KVM_REG_PPC_TM_AMR: + *val = get_reg_val(id, vcpu->arch.amr_tm); + break; + case KVM_REG_PPC_TM_PPR: + *val = get_reg_val(id, vcpu->arch.ppr_tm); + break; + case KVM_REG_PPC_TM_VRSAVE: + *val = get_reg_val(id, vcpu->arch.vrsave_tm); + break; + case KVM_REG_PPC_TM_VSCR: + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + *val = get_reg_val(id, vcpu->arch.vr_tm.vscr.u[3]); + else + r = -ENXIO; + break; + case KVM_REG_PPC_TM_DSCR: + *val = get_reg_val(id, vcpu->arch.dscr_tm); + break; + case KVM_REG_PPC_TM_TAR: + *val = get_reg_val(id, vcpu->arch.tar_tm); + break; +#endif default: r = -EINVAL; break; @@ -1412,6 +1638,72 @@ static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_LPCR_64: kvmppc_set_lpcr_pr(vcpu, set_reg_val(id, *val)); break; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + case KVM_REG_PPC_TFHAR: + vcpu->arch.tfhar = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TFIAR: + vcpu->arch.tfiar = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TEXASR: + vcpu->arch.texasr = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TM_GPR0 ... KVM_REG_PPC_TM_GPR31: + vcpu->arch.gpr_tm[id - KVM_REG_PPC_TM_GPR0] = + set_reg_val(id, *val); + break; + case KVM_REG_PPC_TM_VSR0 ... KVM_REG_PPC_TM_VSR63: + { + int i, j; + + i = id - KVM_REG_PPC_TM_VSR0; + if (i < 32) + for (j = 0; j < TS_FPRWIDTH; j++) + vcpu->arch.fp_tm.fpr[i][j] = val->vsxval[j]; + else + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + vcpu->arch.vr_tm.vr[i-32] = val->vval; + else + r = -ENXIO; + break; + } + case KVM_REG_PPC_TM_CR: + vcpu->arch.cr_tm = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TM_XER: + vcpu->arch.xer_tm = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TM_LR: + vcpu->arch.lr_tm = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TM_CTR: + vcpu->arch.ctr_tm = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TM_FPSCR: + vcpu->arch.fp_tm.fpscr = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TM_AMR: + vcpu->arch.amr_tm = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TM_PPR: + vcpu->arch.ppr_tm = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TM_VRSAVE: + vcpu->arch.vrsave_tm = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TM_VSCR: + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + vcpu->arch.vr.vscr.u[3] = set_reg_val(id, *val); + else + r = -ENXIO; + break; + case KVM_REG_PPC_TM_DSCR: + vcpu->arch.dscr_tm = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TM_TAR: + vcpu->arch.tar_tm = set_reg_val(id, *val); + break; +#endif default: r = -EINVAL; break; @@ -1687,6 +1979,17 @@ static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm, return 0; } + +static int kvm_configure_mmu_pr(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg) +{ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + return -ENODEV; + /* Require flags and process table base and size to all be zero. */ + if (cfg->flags || cfg->process_table) + return -EINVAL; + return 0; +} + #else static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm, struct kvm_ppc_smmu_info *info) @@ -1735,9 +2038,12 @@ static void kvmppc_core_destroy_vm_pr(struct kvm *kvm) static int kvmppc_core_check_processor_compat_pr(void) { /* - * Disable KVM for Power9 untill the required bits merged. + * PR KVM can work on POWER9 inside a guest partition + * running in HPT mode. It can't work if we are using + * radix translation (because radix provides no way for + * a process to have unique translations in quadrant 3). */ - if (cpu_has_feature(CPU_FTR_ARCH_300)) + if (cpu_has_feature(CPU_FTR_ARCH_300) && radix_enabled()) return -EIO; return 0; } @@ -1781,7 +2087,9 @@ static struct kvmppc_ops kvm_ops_pr = { .arch_vm_ioctl = kvm_arch_vm_ioctl_pr, #ifdef CONFIG_PPC_BOOK3S_64 .hcall_implemented = kvmppc_hcall_impl_pr, + .configure_mmu = kvm_configure_mmu_pr, #endif + .giveup_ext = kvmppc_giveup_ext, }; diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S index 93a180ceefad..98ccc7ec5d48 100644 --- a/arch/powerpc/kvm/book3s_segment.S +++ b/arch/powerpc/kvm/book3s_segment.S @@ -383,6 +383,19 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) */ PPC_LL r6, HSTATE_HOST_MSR(r13) +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* + * We don't want to change MSR[TS] bits via rfi here. + * The actual TM handling logic will be in host with + * recovered DR/IR bits after HSTATE_VMHANDLER. + * And MSR_TM can be enabled in HOST_MSR so rfid may + * not suppress this change and can lead to exception. + * Manually set MSR to prevent TS state change here. + */ + mfmsr r7 + rldicl r7, r7, 64 - MSR_TS_S_LG, 62 + rldimi r6, r7, MSR_TS_S_LG, 63 - MSR_TS_T_LG +#endif PPC_LL r8, HSTATE_VMHANDLER(r13) #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c index 99c3620b40d9..6e41ba7ec8f4 100644 --- a/arch/powerpc/kvm/book3s_xive_template.c +++ b/arch/powerpc/kvm/book3s_xive_template.c @@ -334,7 +334,7 @@ X_STATIC unsigned long GLUE(X_PFX,h_xirr)(struct kvm_vcpu *vcpu) */ /* Return interrupt and old CPPR in GPR4 */ - vcpu->arch.gpr[4] = hirq | (old_cppr << 24); + vcpu->arch.regs.gpr[4] = hirq | (old_cppr << 24); return H_SUCCESS; } @@ -369,7 +369,7 @@ X_STATIC unsigned long GLUE(X_PFX,h_ipoll)(struct kvm_vcpu *vcpu, unsigned long hirq = GLUE(X_PFX,scan_interrupts)(xc, pending, scan_poll); /* Return interrupt and old CPPR in GPR4 */ - vcpu->arch.gpr[4] = hirq | (xc->cppr << 24); + vcpu->arch.regs.gpr[4] = hirq | (xc->cppr << 24); return H_SUCCESS; } diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 876d4f294fdd..a9ca016da670 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -77,8 +77,10 @@ void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu) { int i; - printk("pc: %08lx msr: %08llx\n", vcpu->arch.pc, vcpu->arch.shared->msr); - printk("lr: %08lx ctr: %08lx\n", vcpu->arch.lr, vcpu->arch.ctr); + printk("pc: %08lx msr: %08llx\n", vcpu->arch.regs.nip, + vcpu->arch.shared->msr); + printk("lr: %08lx ctr: %08lx\n", vcpu->arch.regs.link, + vcpu->arch.regs.ctr); printk("srr0: %08llx srr1: %08llx\n", vcpu->arch.shared->srr0, vcpu->arch.shared->srr1); @@ -491,24 +493,25 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, if (allowed) { switch (int_class) { case INT_CLASS_NONCRIT: - set_guest_srr(vcpu, vcpu->arch.pc, + set_guest_srr(vcpu, vcpu->arch.regs.nip, vcpu->arch.shared->msr); break; case INT_CLASS_CRIT: - set_guest_csrr(vcpu, vcpu->arch.pc, + set_guest_csrr(vcpu, vcpu->arch.regs.nip, vcpu->arch.shared->msr); break; case INT_CLASS_DBG: - set_guest_dsrr(vcpu, vcpu->arch.pc, + set_guest_dsrr(vcpu, vcpu->arch.regs.nip, vcpu->arch.shared->msr); break; case INT_CLASS_MC: - set_guest_mcsrr(vcpu, vcpu->arch.pc, + set_guest_mcsrr(vcpu, vcpu->arch.regs.nip, vcpu->arch.shared->msr); break; } - vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority]; + vcpu->arch.regs.nip = vcpu->arch.ivpr | + vcpu->arch.ivor[priority]; if (update_esr == true) kvmppc_set_esr(vcpu, vcpu->arch.queued_esr); if (update_dear == true) @@ -826,7 +829,7 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) case EMULATE_FAIL: printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", - __func__, vcpu->arch.pc, vcpu->arch.last_inst); + __func__, vcpu->arch.regs.nip, vcpu->arch.last_inst); /* For debugging, encode the failing instruction and * report it to userspace. */ run->hw.hardware_exit_reason = ~0ULL << 32; @@ -875,7 +878,7 @@ static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu *vcpu) */ vcpu->arch.dbsr = 0; run->debug.arch.status = 0; - run->debug.arch.address = vcpu->arch.pc; + run->debug.arch.address = vcpu->arch.regs.nip; if (dbsr & (DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4)) { run->debug.arch.status |= KVMPPC_DEBUG_BREAKPOINT; @@ -971,7 +974,7 @@ static int kvmppc_resume_inst_load(struct kvm_run *run, struct kvm_vcpu *vcpu, case EMULATE_FAIL: pr_debug("%s: load instruction from guest address %lx failed\n", - __func__, vcpu->arch.pc); + __func__, vcpu->arch.regs.nip); /* For debugging, encode the failing instruction and * report it to userspace. */ run->hw.hardware_exit_reason = ~0ULL << 32; @@ -1169,7 +1172,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, case BOOKE_INTERRUPT_SPE_FP_DATA: case BOOKE_INTERRUPT_SPE_FP_ROUND: printk(KERN_CRIT "%s: unexpected SPE interrupt %u at %08lx\n", - __func__, exit_nr, vcpu->arch.pc); + __func__, exit_nr, vcpu->arch.regs.nip); run->hw.hardware_exit_reason = exit_nr; r = RESUME_HOST; break; @@ -1299,7 +1302,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, } case BOOKE_INTERRUPT_ITLB_MISS: { - unsigned long eaddr = vcpu->arch.pc; + unsigned long eaddr = vcpu->arch.regs.nip; gpa_t gpaddr; gfn_t gfn; int gtlb_index; @@ -1391,7 +1394,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) int i; int r; - vcpu->arch.pc = 0; + vcpu->arch.regs.nip = 0; vcpu->arch.shared->pir = vcpu->vcpu_id; kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */ kvmppc_set_msr(vcpu, 0); @@ -1440,10 +1443,10 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) vcpu_load(vcpu); - regs->pc = vcpu->arch.pc; + regs->pc = vcpu->arch.regs.nip; regs->cr = kvmppc_get_cr(vcpu); - regs->ctr = vcpu->arch.ctr; - regs->lr = vcpu->arch.lr; + regs->ctr = vcpu->arch.regs.ctr; + regs->lr = vcpu->arch.regs.link; regs->xer = kvmppc_get_xer(vcpu); regs->msr = vcpu->arch.shared->msr; regs->srr0 = kvmppc_get_srr0(vcpu); @@ -1471,10 +1474,10 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) vcpu_load(vcpu); - vcpu->arch.pc = regs->pc; + vcpu->arch.regs.nip = regs->pc; kvmppc_set_cr(vcpu, regs->cr); - vcpu->arch.ctr = regs->ctr; - vcpu->arch.lr = regs->lr; + vcpu->arch.regs.ctr = regs->ctr; + vcpu->arch.regs.link = regs->lr; kvmppc_set_xer(vcpu, regs->xer); kvmppc_set_msr(vcpu, regs->msr); kvmppc_set_srr0(vcpu, regs->srr0); diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c index a82f64502de1..d23e582f0fee 100644 --- a/arch/powerpc/kvm/booke_emulate.c +++ b/arch/powerpc/kvm/booke_emulate.c @@ -34,19 +34,19 @@ static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu) { - vcpu->arch.pc = vcpu->arch.shared->srr0; + vcpu->arch.regs.nip = vcpu->arch.shared->srr0; kvmppc_set_msr(vcpu, vcpu->arch.shared->srr1); } static void kvmppc_emul_rfdi(struct kvm_vcpu *vcpu) { - vcpu->arch.pc = vcpu->arch.dsrr0; + vcpu->arch.regs.nip = vcpu->arch.dsrr0; kvmppc_set_msr(vcpu, vcpu->arch.dsrr1); } static void kvmppc_emul_rfci(struct kvm_vcpu *vcpu) { - vcpu->arch.pc = vcpu->arch.csrr0; + vcpu->arch.regs.nip = vcpu->arch.csrr0; kvmppc_set_msr(vcpu, vcpu->arch.csrr1); } diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index 990db69a1d0b..3f8189eb56ed 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -53,7 +53,7 @@ static int dbell2prio(ulong param) static int kvmppc_e500_emul_msgclr(struct kvm_vcpu *vcpu, int rb) { - ulong param = vcpu->arch.gpr[rb]; + ulong param = vcpu->arch.regs.gpr[rb]; int prio = dbell2prio(param); if (prio < 0) @@ -65,7 +65,7 @@ static int kvmppc_e500_emul_msgclr(struct kvm_vcpu *vcpu, int rb) static int kvmppc_e500_emul_msgsnd(struct kvm_vcpu *vcpu, int rb) { - ulong param = vcpu->arch.gpr[rb]; + ulong param = vcpu->arch.regs.gpr[rb]; int prio = dbell2prio(rb); int pir = param & PPC_DBELL_PIR_MASK; int i; @@ -94,7 +94,7 @@ static int kvmppc_e500_emul_ehpriv(struct kvm_run *run, struct kvm_vcpu *vcpu, switch (get_oc(inst)) { case EHPRIV_OC_DEBUG: run->exit_reason = KVM_EXIT_DEBUG; - run->debug.arch.address = vcpu->arch.pc; + run->debug.arch.address = vcpu->arch.regs.nip; run->debug.arch.status = 0; kvmppc_account_exit(vcpu, DEBUG_EXITS); emulated = EMULATE_EXIT_USER; diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c index ddbf8f0284c0..24296f4cadc6 100644 --- a/arch/powerpc/kvm/e500_mmu.c +++ b/arch/powerpc/kvm/e500_mmu.c @@ -513,7 +513,7 @@ void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu) { unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS); - kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.pc, as); + kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.regs.nip, as); } void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu) diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index c878b4ffb86f..8f2985e46f6f 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -625,8 +625,8 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, } #ifdef CONFIG_KVM_BOOKE_HV -int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type, - u32 *instr) +int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, + enum instruction_fetch_type type, u32 *instr) { gva_t geaddr; hpa_t addr; @@ -715,8 +715,8 @@ int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type, return EMULATE_DONE; } #else -int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type, - u32 *instr) +int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, + enum instruction_fetch_type type, u32 *instr) { return EMULATE_AGAIN; } diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c index a382e15135e6..afde788be141 100644 --- a/arch/powerpc/kvm/emulate_loadstore.c +++ b/arch/powerpc/kvm/emulate_loadstore.c @@ -31,6 +31,7 @@ #include <asm/kvm_ppc.h> #include <asm/disassemble.h> #include <asm/ppc-opcode.h> +#include <asm/sstep.h> #include "timing.h" #include "trace.h" @@ -84,8 +85,9 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) struct kvm_run *run = vcpu->run; u32 inst; int ra, rs, rt; - enum emulation_result emulated; + enum emulation_result emulated = EMULATE_FAIL; int advance = 1; + struct instruction_op op; /* this default type might be overwritten by subcategories */ kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS); @@ -107,580 +109,276 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) vcpu->arch.mmio_vsx_tx_sx_enabled = get_tx_or_sx(inst); vcpu->arch.mmio_vsx_copy_nums = 0; vcpu->arch.mmio_vsx_offset = 0; - vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_NONE; + vcpu->arch.mmio_copy_type = KVMPPC_VSX_COPY_NONE; vcpu->arch.mmio_sp64_extend = 0; vcpu->arch.mmio_sign_extend = 0; vcpu->arch.mmio_vmx_copy_nums = 0; + vcpu->arch.mmio_vmx_offset = 0; + vcpu->arch.mmio_host_swabbed = 0; - switch (get_op(inst)) { - case 31: - switch (get_xop(inst)) { - case OP_31_XOP_LWZX: - emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); - break; - - case OP_31_XOP_LWZUX: - emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; + emulated = EMULATE_FAIL; + vcpu->arch.regs.msr = vcpu->arch.shared->msr; + vcpu->arch.regs.ccr = vcpu->arch.cr; + if (analyse_instr(&op, &vcpu->arch.regs, inst) == 0) { + int type = op.type & INSTR_TYPE_MASK; + int size = GETSIZE(op.type); - case OP_31_XOP_LBZX: - emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); - break; - - case OP_31_XOP_LBZUX: - emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; + switch (type) { + case LOAD: { + int instr_byte_swap = op.type & BYTEREV; - case OP_31_XOP_STDX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), 8, 1); - break; + if (op.type & SIGNEXT) + emulated = kvmppc_handle_loads(run, vcpu, + op.reg, size, !instr_byte_swap); + else + emulated = kvmppc_handle_load(run, vcpu, + op.reg, size, !instr_byte_swap); - case OP_31_XOP_STDUX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), 8, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; + if ((op.type & UPDATE) && (emulated != EMULATE_FAIL)) + kvmppc_set_gpr(vcpu, op.update_reg, op.ea); - case OP_31_XOP_STWX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), 4, 1); - break; - - case OP_31_XOP_STWUX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), 4, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_31_XOP_STBX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), 1, 1); - break; - - case OP_31_XOP_STBUX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), 1, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_31_XOP_LHAX: - emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); - break; - - case OP_31_XOP_LHAUX: - emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_31_XOP_LHZX: - emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); - break; - - case OP_31_XOP_LHZUX: - emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_31_XOP_STHX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), 2, 1); - break; - - case OP_31_XOP_STHUX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), 2, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_31_XOP_DCBST: - case OP_31_XOP_DCBF: - case OP_31_XOP_DCBI: - /* Do nothing. The guest is performing dcbi because - * hardware DMA is not snooped by the dcache, but - * emulated DMA either goes through the dcache as - * normal writes, or the host kernel has handled dcache - * coherence. */ - break; - - case OP_31_XOP_LWBRX: - emulated = kvmppc_handle_load(run, vcpu, rt, 4, 0); - break; - - case OP_31_XOP_STWBRX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), 4, 0); break; - - case OP_31_XOP_LHBRX: - emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0); - break; - - case OP_31_XOP_STHBRX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), 2, 0); - break; - - case OP_31_XOP_LDBRX: - emulated = kvmppc_handle_load(run, vcpu, rt, 8, 0); - break; - - case OP_31_XOP_STDBRX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), 8, 0); - break; - - case OP_31_XOP_LDX: - emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1); - break; - - case OP_31_XOP_LDUX: - emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_31_XOP_LWAX: - emulated = kvmppc_handle_loads(run, vcpu, rt, 4, 1); - break; - - case OP_31_XOP_LWAUX: - emulated = kvmppc_handle_loads(run, vcpu, rt, 4, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - + } #ifdef CONFIG_PPC_FPU - case OP_31_XOP_LFSX: - if (kvmppc_check_fp_disabled(vcpu)) - return EMULATE_DONE; - vcpu->arch.mmio_sp64_extend = 1; - emulated = kvmppc_handle_load(run, vcpu, - KVM_MMIO_REG_FPR|rt, 4, 1); - break; - - case OP_31_XOP_LFSUX: + case LOAD_FP: if (kvmppc_check_fp_disabled(vcpu)) return EMULATE_DONE; - vcpu->arch.mmio_sp64_extend = 1; - emulated = kvmppc_handle_load(run, vcpu, - KVM_MMIO_REG_FPR|rt, 4, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - case OP_31_XOP_LFDX: - if (kvmppc_check_fp_disabled(vcpu)) - return EMULATE_DONE; - emulated = kvmppc_handle_load(run, vcpu, - KVM_MMIO_REG_FPR|rt, 8, 1); - break; + if (op.type & FPCONV) + vcpu->arch.mmio_sp64_extend = 1; - case OP_31_XOP_LFDUX: - if (kvmppc_check_fp_disabled(vcpu)) - return EMULATE_DONE; - emulated = kvmppc_handle_load(run, vcpu, - KVM_MMIO_REG_FPR|rt, 8, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_31_XOP_LFIWAX: - if (kvmppc_check_fp_disabled(vcpu)) - return EMULATE_DONE; - emulated = kvmppc_handle_loads(run, vcpu, - KVM_MMIO_REG_FPR|rt, 4, 1); - break; + if (op.type & SIGNEXT) + emulated = kvmppc_handle_loads(run, vcpu, + KVM_MMIO_REG_FPR|op.reg, size, 1); + else + emulated = kvmppc_handle_load(run, vcpu, + KVM_MMIO_REG_FPR|op.reg, size, 1); - case OP_31_XOP_LFIWZX: - if (kvmppc_check_fp_disabled(vcpu)) - return EMULATE_DONE; - emulated = kvmppc_handle_load(run, vcpu, - KVM_MMIO_REG_FPR|rt, 4, 1); - break; + if ((op.type & UPDATE) && (emulated != EMULATE_FAIL)) + kvmppc_set_gpr(vcpu, op.update_reg, op.ea); - case OP_31_XOP_STFSX: - if (kvmppc_check_fp_disabled(vcpu)) - return EMULATE_DONE; - vcpu->arch.mmio_sp64_extend = 1; - emulated = kvmppc_handle_store(run, vcpu, - VCPU_FPR(vcpu, rs), 4, 1); break; - - case OP_31_XOP_STFSUX: - if (kvmppc_check_fp_disabled(vcpu)) - return EMULATE_DONE; - vcpu->arch.mmio_sp64_extend = 1; - emulated = kvmppc_handle_store(run, vcpu, - VCPU_FPR(vcpu, rs), 4, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_31_XOP_STFDX: - if (kvmppc_check_fp_disabled(vcpu)) - return EMULATE_DONE; - emulated = kvmppc_handle_store(run, vcpu, - VCPU_FPR(vcpu, rs), 8, 1); - break; - - case OP_31_XOP_STFDUX: - if (kvmppc_check_fp_disabled(vcpu)) +#endif +#ifdef CONFIG_ALTIVEC + case LOAD_VMX: + if (kvmppc_check_altivec_disabled(vcpu)) return EMULATE_DONE; - emulated = kvmppc_handle_store(run, vcpu, - VCPU_FPR(vcpu, rs), 8, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - case OP_31_XOP_STFIWX: - if (kvmppc_check_fp_disabled(vcpu)) - return EMULATE_DONE; - emulated = kvmppc_handle_store(run, vcpu, - VCPU_FPR(vcpu, rs), 4, 1); + /* Hardware enforces alignment of VMX accesses */ + vcpu->arch.vaddr_accessed &= ~((unsigned long)size - 1); + vcpu->arch.paddr_accessed &= ~((unsigned long)size - 1); + + if (size == 16) { /* lvx */ + vcpu->arch.mmio_copy_type = + KVMPPC_VMX_COPY_DWORD; + } else if (size == 4) { /* lvewx */ + vcpu->arch.mmio_copy_type = + KVMPPC_VMX_COPY_WORD; + } else if (size == 2) { /* lvehx */ + vcpu->arch.mmio_copy_type = + KVMPPC_VMX_COPY_HWORD; + } else if (size == 1) { /* lvebx */ + vcpu->arch.mmio_copy_type = + KVMPPC_VMX_COPY_BYTE; + } else + break; + + vcpu->arch.mmio_vmx_offset = + (vcpu->arch.vaddr_accessed & 0xf)/size; + + if (size == 16) { + vcpu->arch.mmio_vmx_copy_nums = 2; + emulated = kvmppc_handle_vmx_load(run, + vcpu, KVM_MMIO_REG_VMX|op.reg, + 8, 1); + } else { + vcpu->arch.mmio_vmx_copy_nums = 1; + emulated = kvmppc_handle_vmx_load(run, vcpu, + KVM_MMIO_REG_VMX|op.reg, + size, 1); + } break; #endif - #ifdef CONFIG_VSX - case OP_31_XOP_LXSDX: - if (kvmppc_check_vsx_disabled(vcpu)) - return EMULATE_DONE; - vcpu->arch.mmio_vsx_copy_nums = 1; - vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD; - emulated = kvmppc_handle_vsx_load(run, vcpu, - KVM_MMIO_REG_VSX|rt, 8, 1, 0); - break; - - case OP_31_XOP_LXSSPX: - if (kvmppc_check_vsx_disabled(vcpu)) - return EMULATE_DONE; - vcpu->arch.mmio_vsx_copy_nums = 1; - vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD; - vcpu->arch.mmio_sp64_extend = 1; - emulated = kvmppc_handle_vsx_load(run, vcpu, - KVM_MMIO_REG_VSX|rt, 4, 1, 0); - break; + case LOAD_VSX: { + int io_size_each; + + if (op.vsx_flags & VSX_CHECK_VEC) { + if (kvmppc_check_altivec_disabled(vcpu)) + return EMULATE_DONE; + } else { + if (kvmppc_check_vsx_disabled(vcpu)) + return EMULATE_DONE; + } + + if (op.vsx_flags & VSX_FPCONV) + vcpu->arch.mmio_sp64_extend = 1; + + if (op.element_size == 8) { + if (op.vsx_flags & VSX_SPLAT) + vcpu->arch.mmio_copy_type = + KVMPPC_VSX_COPY_DWORD_LOAD_DUMP; + else + vcpu->arch.mmio_copy_type = + KVMPPC_VSX_COPY_DWORD; + } else if (op.element_size == 4) { + if (op.vsx_flags & VSX_SPLAT) + vcpu->arch.mmio_copy_type = + KVMPPC_VSX_COPY_WORD_LOAD_DUMP; + else + vcpu->arch.mmio_copy_type = + KVMPPC_VSX_COPY_WORD; + } else + break; + + if (size < op.element_size) { + /* precision convert case: lxsspx, etc */ + vcpu->arch.mmio_vsx_copy_nums = 1; + io_size_each = size; + } else { /* lxvw4x, lxvd2x, etc */ + vcpu->arch.mmio_vsx_copy_nums = + size/op.element_size; + io_size_each = op.element_size; + } - case OP_31_XOP_LXSIWAX: - if (kvmppc_check_vsx_disabled(vcpu)) - return EMULATE_DONE; - vcpu->arch.mmio_vsx_copy_nums = 1; - vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD; emulated = kvmppc_handle_vsx_load(run, vcpu, - KVM_MMIO_REG_VSX|rt, 4, 1, 1); - break; - - case OP_31_XOP_LXSIWZX: - if (kvmppc_check_vsx_disabled(vcpu)) - return EMULATE_DONE; - vcpu->arch.mmio_vsx_copy_nums = 1; - vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD; - emulated = kvmppc_handle_vsx_load(run, vcpu, - KVM_MMIO_REG_VSX|rt, 4, 1, 0); + KVM_MMIO_REG_VSX | (op.reg & 0x1f), + io_size_each, 1, op.type & SIGNEXT); break; + } +#endif + case STORE: + /* if need byte reverse, op.val has been reversed by + * analyse_instr(). + */ + emulated = kvmppc_handle_store(run, vcpu, op.val, + size, 1); - case OP_31_XOP_LXVD2X: - /* - * In this case, the official load/store process is like this: - * Step1, exit from vm by page fault isr, then kvm save vsr. - * Please see guest_exit_cont->store_fp_state->SAVE_32VSRS - * as reference. - * - * Step2, copy data between memory and VCPU - * Notice: for LXVD2X/STXVD2X/LXVW4X/STXVW4X, we use - * 2copies*8bytes or 4copies*4bytes - * to simulate one copy of 16bytes. - * Also there is an endian issue here, we should notice the - * layout of memory. - * Please see MARCO of LXVD2X_ROT/STXVD2X_ROT as more reference. - * If host is little-endian, kvm will call XXSWAPD for - * LXVD2X_ROT/STXVD2X_ROT. - * So, if host is little-endian, - * the postion of memeory should be swapped. - * - * Step3, return to guest, kvm reset register. - * Please see kvmppc_hv_entry->load_fp_state->REST_32VSRS - * as reference. - */ - if (kvmppc_check_vsx_disabled(vcpu)) - return EMULATE_DONE; - vcpu->arch.mmio_vsx_copy_nums = 2; - vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD; - emulated = kvmppc_handle_vsx_load(run, vcpu, - KVM_MMIO_REG_VSX|rt, 8, 1, 0); - break; + if ((op.type & UPDATE) && (emulated != EMULATE_FAIL)) + kvmppc_set_gpr(vcpu, op.update_reg, op.ea); - case OP_31_XOP_LXVW4X: - if (kvmppc_check_vsx_disabled(vcpu)) - return EMULATE_DONE; - vcpu->arch.mmio_vsx_copy_nums = 4; - vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_WORD; - emulated = kvmppc_handle_vsx_load(run, vcpu, - KVM_MMIO_REG_VSX|rt, 4, 1, 0); break; - - case OP_31_XOP_LXVDSX: - if (kvmppc_check_vsx_disabled(vcpu)) +#ifdef CONFIG_PPC_FPU + case STORE_FP: + if (kvmppc_check_fp_disabled(vcpu)) return EMULATE_DONE; - vcpu->arch.mmio_vsx_copy_nums = 1; - vcpu->arch.mmio_vsx_copy_type = - KVMPPC_VSX_COPY_DWORD_LOAD_DUMP; - emulated = kvmppc_handle_vsx_load(run, vcpu, - KVM_MMIO_REG_VSX|rt, 8, 1, 0); - break; - case OP_31_XOP_STXSDX: - if (kvmppc_check_vsx_disabled(vcpu)) - return EMULATE_DONE; - vcpu->arch.mmio_vsx_copy_nums = 1; - vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD; - emulated = kvmppc_handle_vsx_store(run, vcpu, - rs, 8, 1); - break; + /* The FP registers need to be flushed so that + * kvmppc_handle_store() can read actual FP vals + * from vcpu->arch. + */ + if (vcpu->kvm->arch.kvm_ops->giveup_ext) + vcpu->kvm->arch.kvm_ops->giveup_ext(vcpu, + MSR_FP); - case OP_31_XOP_STXSSPX: - if (kvmppc_check_vsx_disabled(vcpu)) - return EMULATE_DONE; - vcpu->arch.mmio_vsx_copy_nums = 1; - vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD; - vcpu->arch.mmio_sp64_extend = 1; - emulated = kvmppc_handle_vsx_store(run, vcpu, - rs, 4, 1); - break; + if (op.type & FPCONV) + vcpu->arch.mmio_sp64_extend = 1; - case OP_31_XOP_STXSIWX: - if (kvmppc_check_vsx_disabled(vcpu)) - return EMULATE_DONE; - vcpu->arch.mmio_vsx_offset = 1; - vcpu->arch.mmio_vsx_copy_nums = 1; - vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_WORD; - emulated = kvmppc_handle_vsx_store(run, vcpu, - rs, 4, 1); - break; + emulated = kvmppc_handle_store(run, vcpu, + VCPU_FPR(vcpu, op.reg), size, 1); - case OP_31_XOP_STXVD2X: - if (kvmppc_check_vsx_disabled(vcpu)) - return EMULATE_DONE; - vcpu->arch.mmio_vsx_copy_nums = 2; - vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD; - emulated = kvmppc_handle_vsx_store(run, vcpu, - rs, 8, 1); - break; + if ((op.type & UPDATE) && (emulated != EMULATE_FAIL)) + kvmppc_set_gpr(vcpu, op.update_reg, op.ea); - case OP_31_XOP_STXVW4X: - if (kvmppc_check_vsx_disabled(vcpu)) - return EMULATE_DONE; - vcpu->arch.mmio_vsx_copy_nums = 4; - vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_WORD; - emulated = kvmppc_handle_vsx_store(run, vcpu, - rs, 4, 1); break; -#endif /* CONFIG_VSX */ - +#endif #ifdef CONFIG_ALTIVEC - case OP_31_XOP_LVX: + case STORE_VMX: if (kvmppc_check_altivec_disabled(vcpu)) return EMULATE_DONE; - vcpu->arch.vaddr_accessed &= ~0xFULL; - vcpu->arch.paddr_accessed &= ~0xFULL; - vcpu->arch.mmio_vmx_copy_nums = 2; - emulated = kvmppc_handle_load128_by2x64(run, vcpu, - KVM_MMIO_REG_VMX|rt, 1); - break; - case OP_31_XOP_STVX: - if (kvmppc_check_altivec_disabled(vcpu)) - return EMULATE_DONE; - vcpu->arch.vaddr_accessed &= ~0xFULL; - vcpu->arch.paddr_accessed &= ~0xFULL; - vcpu->arch.mmio_vmx_copy_nums = 2; - emulated = kvmppc_handle_store128_by2x64(run, vcpu, - rs, 1); - break; -#endif /* CONFIG_ALTIVEC */ + /* Hardware enforces alignment of VMX accesses. */ + vcpu->arch.vaddr_accessed &= ~((unsigned long)size - 1); + vcpu->arch.paddr_accessed &= ~((unsigned long)size - 1); + + if (vcpu->kvm->arch.kvm_ops->giveup_ext) + vcpu->kvm->arch.kvm_ops->giveup_ext(vcpu, + MSR_VEC); + if (size == 16) { /* stvx */ + vcpu->arch.mmio_copy_type = + KVMPPC_VMX_COPY_DWORD; + } else if (size == 4) { /* stvewx */ + vcpu->arch.mmio_copy_type = + KVMPPC_VMX_COPY_WORD; + } else if (size == 2) { /* stvehx */ + vcpu->arch.mmio_copy_type = + KVMPPC_VMX_COPY_HWORD; + } else if (size == 1) { /* stvebx */ + vcpu->arch.mmio_copy_type = + KVMPPC_VMX_COPY_BYTE; + } else + break; + + vcpu->arch.mmio_vmx_offset = + (vcpu->arch.vaddr_accessed & 0xf)/size; + + if (size == 16) { + vcpu->arch.mmio_vmx_copy_nums = 2; + emulated = kvmppc_handle_vmx_store(run, + vcpu, op.reg, 8, 1); + } else { + vcpu->arch.mmio_vmx_copy_nums = 1; + emulated = kvmppc_handle_vmx_store(run, + vcpu, op.reg, size, 1); + } - default: - emulated = EMULATE_FAIL; break; - } - break; - - case OP_LWZ: - emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); - break; - -#ifdef CONFIG_PPC_FPU - case OP_STFS: - if (kvmppc_check_fp_disabled(vcpu)) - return EMULATE_DONE; - vcpu->arch.mmio_sp64_extend = 1; - emulated = kvmppc_handle_store(run, vcpu, - VCPU_FPR(vcpu, rs), - 4, 1); - break; - - case OP_STFSU: - if (kvmppc_check_fp_disabled(vcpu)) - return EMULATE_DONE; - vcpu->arch.mmio_sp64_extend = 1; - emulated = kvmppc_handle_store(run, vcpu, - VCPU_FPR(vcpu, rs), - 4, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_STFD: - if (kvmppc_check_fp_disabled(vcpu)) - return EMULATE_DONE; - emulated = kvmppc_handle_store(run, vcpu, - VCPU_FPR(vcpu, rs), - 8, 1); - break; - - case OP_STFDU: - if (kvmppc_check_fp_disabled(vcpu)) - return EMULATE_DONE; - emulated = kvmppc_handle_store(run, vcpu, - VCPU_FPR(vcpu, rs), - 8, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; #endif +#ifdef CONFIG_VSX + case STORE_VSX: { + int io_size_each; + + if (op.vsx_flags & VSX_CHECK_VEC) { + if (kvmppc_check_altivec_disabled(vcpu)) + return EMULATE_DONE; + } else { + if (kvmppc_check_vsx_disabled(vcpu)) + return EMULATE_DONE; + } + + if (vcpu->kvm->arch.kvm_ops->giveup_ext) + vcpu->kvm->arch.kvm_ops->giveup_ext(vcpu, + MSR_VSX); + + if (op.vsx_flags & VSX_FPCONV) + vcpu->arch.mmio_sp64_extend = 1; + + if (op.element_size == 8) + vcpu->arch.mmio_copy_type = + KVMPPC_VSX_COPY_DWORD; + else if (op.element_size == 4) + vcpu->arch.mmio_copy_type = + KVMPPC_VSX_COPY_WORD; + else + break; + + if (size < op.element_size) { + /* precise conversion case, like stxsspx */ + vcpu->arch.mmio_vsx_copy_nums = 1; + io_size_each = size; + } else { /* stxvw4x, stxvd2x, etc */ + vcpu->arch.mmio_vsx_copy_nums = + size/op.element_size; + io_size_each = op.element_size; + } - case OP_LD: - rt = get_rt(inst); - switch (inst & 3) { - case 0: /* ld */ - emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1); - break; - case 1: /* ldu */ - emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - case 2: /* lwa */ - emulated = kvmppc_handle_loads(run, vcpu, rt, 4, 1); + emulated = kvmppc_handle_vsx_store(run, vcpu, + op.reg & 0x1f, io_size_each, 1); break; - default: - emulated = EMULATE_FAIL; } - break; - - case OP_LWZU: - emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_LBZ: - emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); - break; - - case OP_LBZU: - emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_STW: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 4, 1); - break; - - case OP_STD: - rs = get_rs(inst); - switch (inst & 3) { - case 0: /* std */ - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), 8, 1); - break; - case 1: /* stdu */ - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), 8, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); +#endif + case CACHEOP: + /* Do nothing. The guest is performing dcbi because + * hardware DMA is not snooped by the dcache, but + * emulated DMA either goes through the dcache as + * normal writes, or the host kernel has handled dcache + * coherence. + */ + emulated = EMULATE_DONE; break; default: - emulated = EMULATE_FAIL; + break; } - break; - - case OP_STWU: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), 4, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_STB: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), 1, 1); - break; - - case OP_STBU: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), 1, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_LHZ: - emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); - break; - - case OP_LHZU: - emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_LHA: - emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); - break; - - case OP_LHAU: - emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_STH: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), 2, 1); - break; - - case OP_STHU: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), 2, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - -#ifdef CONFIG_PPC_FPU - case OP_LFS: - if (kvmppc_check_fp_disabled(vcpu)) - return EMULATE_DONE; - vcpu->arch.mmio_sp64_extend = 1; - emulated = kvmppc_handle_load(run, vcpu, - KVM_MMIO_REG_FPR|rt, 4, 1); - break; - - case OP_LFSU: - if (kvmppc_check_fp_disabled(vcpu)) - return EMULATE_DONE; - vcpu->arch.mmio_sp64_extend = 1; - emulated = kvmppc_handle_load(run, vcpu, - KVM_MMIO_REG_FPR|rt, 4, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_LFD: - if (kvmppc_check_fp_disabled(vcpu)) - return EMULATE_DONE; - emulated = kvmppc_handle_load(run, vcpu, - KVM_MMIO_REG_FPR|rt, 8, 1); - break; - - case OP_LFDU: - if (kvmppc_check_fp_disabled(vcpu)) - return EMULATE_DONE; - emulated = kvmppc_handle_load(run, vcpu, - KVM_MMIO_REG_FPR|rt, 8, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; -#endif - - default: - emulated = EMULATE_FAIL; - break; } if (emulated == EMULATE_FAIL) { diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 3764d000872e..0e8c20c5eaac 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -648,9 +648,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) #endif #ifdef CONFIG_PPC_TRANSACTIONAL_MEM case KVM_CAP_PPC_HTM: - r = hv_enabled && - (!!(cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_HTM) || - cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)); + r = !!(cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_HTM) || + (hv_enabled && cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)); break; #endif default: @@ -907,6 +906,26 @@ static inline void kvmppc_set_vsr_dword_dump(struct kvm_vcpu *vcpu, } } +static inline void kvmppc_set_vsr_word_dump(struct kvm_vcpu *vcpu, + u32 gpr) +{ + union kvmppc_one_reg val; + int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK; + + if (vcpu->arch.mmio_vsx_tx_sx_enabled) { + val.vsx32val[0] = gpr; + val.vsx32val[1] = gpr; + val.vsx32val[2] = gpr; + val.vsx32val[3] = gpr; + VCPU_VSX_VR(vcpu, index) = val.vval; + } else { + val.vsx32val[0] = gpr; + val.vsx32val[1] = gpr; + VCPU_VSX_FPR(vcpu, index, 0) = val.vsxval[0]; + VCPU_VSX_FPR(vcpu, index, 1) = val.vsxval[0]; + } +} + static inline void kvmppc_set_vsr_word(struct kvm_vcpu *vcpu, u32 gpr32) { @@ -933,30 +952,110 @@ static inline void kvmppc_set_vsr_word(struct kvm_vcpu *vcpu, #endif /* CONFIG_VSX */ #ifdef CONFIG_ALTIVEC +static inline int kvmppc_get_vmx_offset_generic(struct kvm_vcpu *vcpu, + int index, int element_size) +{ + int offset; + int elts = sizeof(vector128)/element_size; + + if ((index < 0) || (index >= elts)) + return -1; + + if (kvmppc_need_byteswap(vcpu)) + offset = elts - index - 1; + else + offset = index; + + return offset; +} + +static inline int kvmppc_get_vmx_dword_offset(struct kvm_vcpu *vcpu, + int index) +{ + return kvmppc_get_vmx_offset_generic(vcpu, index, 8); +} + +static inline int kvmppc_get_vmx_word_offset(struct kvm_vcpu *vcpu, + int index) +{ + return kvmppc_get_vmx_offset_generic(vcpu, index, 4); +} + +static inline int kvmppc_get_vmx_hword_offset(struct kvm_vcpu *vcpu, + int index) +{ + return kvmppc_get_vmx_offset_generic(vcpu, index, 2); +} + +static inline int kvmppc_get_vmx_byte_offset(struct kvm_vcpu *vcpu, + int index) +{ + return kvmppc_get_vmx_offset_generic(vcpu, index, 1); +} + + static inline void kvmppc_set_vmx_dword(struct kvm_vcpu *vcpu, - u64 gpr) + u64 gpr) { + union kvmppc_one_reg val; + int offset = kvmppc_get_vmx_dword_offset(vcpu, + vcpu->arch.mmio_vmx_offset); int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK; - u32 hi, lo; - u32 di; -#ifdef __BIG_ENDIAN - hi = gpr >> 32; - lo = gpr & 0xffffffff; -#else - lo = gpr >> 32; - hi = gpr & 0xffffffff; -#endif + if (offset == -1) + return; + + val.vval = VCPU_VSX_VR(vcpu, index); + val.vsxval[offset] = gpr; + VCPU_VSX_VR(vcpu, index) = val.vval; +} + +static inline void kvmppc_set_vmx_word(struct kvm_vcpu *vcpu, + u32 gpr32) +{ + union kvmppc_one_reg val; + int offset = kvmppc_get_vmx_word_offset(vcpu, + vcpu->arch.mmio_vmx_offset); + int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK; - di = 2 - vcpu->arch.mmio_vmx_copy_nums; /* doubleword index */ - if (di > 1) + if (offset == -1) return; - if (vcpu->arch.mmio_host_swabbed) - di = 1 - di; + val.vval = VCPU_VSX_VR(vcpu, index); + val.vsx32val[offset] = gpr32; + VCPU_VSX_VR(vcpu, index) = val.vval; +} + +static inline void kvmppc_set_vmx_hword(struct kvm_vcpu *vcpu, + u16 gpr16) +{ + union kvmppc_one_reg val; + int offset = kvmppc_get_vmx_hword_offset(vcpu, + vcpu->arch.mmio_vmx_offset); + int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK; + + if (offset == -1) + return; + + val.vval = VCPU_VSX_VR(vcpu, index); + val.vsx16val[offset] = gpr16; + VCPU_VSX_VR(vcpu, index) = val.vval; +} + +static inline void kvmppc_set_vmx_byte(struct kvm_vcpu *vcpu, + u8 gpr8) +{ + union kvmppc_one_reg val; + int offset = kvmppc_get_vmx_byte_offset(vcpu, + vcpu->arch.mmio_vmx_offset); + int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK; + + if (offset == -1) + return; - VCPU_VSX_VR(vcpu, index).u[di * 2] = hi; - VCPU_VSX_VR(vcpu, index).u[di * 2 + 1] = lo; + val.vval = VCPU_VSX_VR(vcpu, index); + val.vsx8val[offset] = gpr8; + VCPU_VSX_VR(vcpu, index) = val.vval; } #endif /* CONFIG_ALTIVEC */ @@ -1041,6 +1140,9 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr); break; case KVM_MMIO_REG_FPR: + if (vcpu->kvm->arch.kvm_ops->giveup_ext) + vcpu->kvm->arch.kvm_ops->giveup_ext(vcpu, MSR_FP); + VCPU_FPR(vcpu, vcpu->arch.io_gpr & KVM_MMIO_REG_MASK) = gpr; break; #ifdef CONFIG_PPC_BOOK3S @@ -1054,18 +1156,36 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, #endif #ifdef CONFIG_VSX case KVM_MMIO_REG_VSX: - if (vcpu->arch.mmio_vsx_copy_type == KVMPPC_VSX_COPY_DWORD) + if (vcpu->kvm->arch.kvm_ops->giveup_ext) + vcpu->kvm->arch.kvm_ops->giveup_ext(vcpu, MSR_VSX); + + if (vcpu->arch.mmio_copy_type == KVMPPC_VSX_COPY_DWORD) kvmppc_set_vsr_dword(vcpu, gpr); - else if (vcpu->arch.mmio_vsx_copy_type == KVMPPC_VSX_COPY_WORD) + else if (vcpu->arch.mmio_copy_type == KVMPPC_VSX_COPY_WORD) kvmppc_set_vsr_word(vcpu, gpr); - else if (vcpu->arch.mmio_vsx_copy_type == + else if (vcpu->arch.mmio_copy_type == KVMPPC_VSX_COPY_DWORD_LOAD_DUMP) kvmppc_set_vsr_dword_dump(vcpu, gpr); + else if (vcpu->arch.mmio_copy_type == + KVMPPC_VSX_COPY_WORD_LOAD_DUMP) + kvmppc_set_vsr_word_dump(vcpu, gpr); break; #endif #ifdef CONFIG_ALTIVEC case KVM_MMIO_REG_VMX: - kvmppc_set_vmx_dword(vcpu, gpr); + if (vcpu->kvm->arch.kvm_ops->giveup_ext) + vcpu->kvm->arch.kvm_ops->giveup_ext(vcpu, MSR_VEC); + + if (vcpu->arch.mmio_copy_type == KVMPPC_VMX_COPY_DWORD) + kvmppc_set_vmx_dword(vcpu, gpr); + else if (vcpu->arch.mmio_copy_type == KVMPPC_VMX_COPY_WORD) + kvmppc_set_vmx_word(vcpu, gpr); + else if (vcpu->arch.mmio_copy_type == + KVMPPC_VMX_COPY_HWORD) + kvmppc_set_vmx_hword(vcpu, gpr); + else if (vcpu->arch.mmio_copy_type == + KVMPPC_VMX_COPY_BYTE) + kvmppc_set_vmx_byte(vcpu, gpr); break; #endif default: @@ -1228,7 +1348,7 @@ static inline int kvmppc_get_vsr_data(struct kvm_vcpu *vcpu, int rs, u64 *val) u32 dword_offset, word_offset; union kvmppc_one_reg reg; int vsx_offset = 0; - int copy_type = vcpu->arch.mmio_vsx_copy_type; + int copy_type = vcpu->arch.mmio_copy_type; int result = 0; switch (copy_type) { @@ -1344,14 +1464,16 @@ static int kvmppc_emulate_mmio_vsx_loadstore(struct kvm_vcpu *vcpu, #endif /* CONFIG_VSX */ #ifdef CONFIG_ALTIVEC -/* handle quadword load access in two halves */ -int kvmppc_handle_load128_by2x64(struct kvm_run *run, struct kvm_vcpu *vcpu, - unsigned int rt, int is_default_endian) +int kvmppc_handle_vmx_load(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int rt, unsigned int bytes, int is_default_endian) { enum emulation_result emulated = EMULATE_DONE; + if (vcpu->arch.mmio_vsx_copy_nums > 2) + return EMULATE_FAIL; + while (vcpu->arch.mmio_vmx_copy_nums) { - emulated = __kvmppc_handle_load(run, vcpu, rt, 8, + emulated = __kvmppc_handle_load(run, vcpu, rt, bytes, is_default_endian, 0); if (emulated != EMULATE_DONE) @@ -1359,55 +1481,127 @@ int kvmppc_handle_load128_by2x64(struct kvm_run *run, struct kvm_vcpu *vcpu, vcpu->arch.paddr_accessed += run->mmio.len; vcpu->arch.mmio_vmx_copy_nums--; + vcpu->arch.mmio_vmx_offset++; } return emulated; } -static inline int kvmppc_get_vmx_data(struct kvm_vcpu *vcpu, int rs, u64 *val) +int kvmppc_get_vmx_dword(struct kvm_vcpu *vcpu, int index, u64 *val) { - vector128 vrs = VCPU_VSX_VR(vcpu, rs); - u32 di; - u64 w0, w1; + union kvmppc_one_reg reg; + int vmx_offset = 0; + int result = 0; - di = 2 - vcpu->arch.mmio_vmx_copy_nums; /* doubleword index */ - if (di > 1) + vmx_offset = + kvmppc_get_vmx_dword_offset(vcpu, vcpu->arch.mmio_vmx_offset); + + if (vmx_offset == -1) return -1; - if (vcpu->arch.mmio_host_swabbed) - di = 1 - di; + reg.vval = VCPU_VSX_VR(vcpu, index); + *val = reg.vsxval[vmx_offset]; - w0 = vrs.u[di * 2]; - w1 = vrs.u[di * 2 + 1]; + return result; +} -#ifdef __BIG_ENDIAN - *val = (w0 << 32) | w1; -#else - *val = (w1 << 32) | w0; -#endif - return 0; +int kvmppc_get_vmx_word(struct kvm_vcpu *vcpu, int index, u64 *val) +{ + union kvmppc_one_reg reg; + int vmx_offset = 0; + int result = 0; + + vmx_offset = + kvmppc_get_vmx_word_offset(vcpu, vcpu->arch.mmio_vmx_offset); + + if (vmx_offset == -1) + return -1; + + reg.vval = VCPU_VSX_VR(vcpu, index); + *val = reg.vsx32val[vmx_offset]; + + return result; +} + +int kvmppc_get_vmx_hword(struct kvm_vcpu *vcpu, int index, u64 *val) +{ + union kvmppc_one_reg reg; + int vmx_offset = 0; + int result = 0; + + vmx_offset = + kvmppc_get_vmx_hword_offset(vcpu, vcpu->arch.mmio_vmx_offset); + + if (vmx_offset == -1) + return -1; + + reg.vval = VCPU_VSX_VR(vcpu, index); + *val = reg.vsx16val[vmx_offset]; + + return result; } -/* handle quadword store in two halves */ -int kvmppc_handle_store128_by2x64(struct kvm_run *run, struct kvm_vcpu *vcpu, - unsigned int rs, int is_default_endian) +int kvmppc_get_vmx_byte(struct kvm_vcpu *vcpu, int index, u64 *val) +{ + union kvmppc_one_reg reg; + int vmx_offset = 0; + int result = 0; + + vmx_offset = + kvmppc_get_vmx_byte_offset(vcpu, vcpu->arch.mmio_vmx_offset); + + if (vmx_offset == -1) + return -1; + + reg.vval = VCPU_VSX_VR(vcpu, index); + *val = reg.vsx8val[vmx_offset]; + + return result; +} + +int kvmppc_handle_vmx_store(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int rs, unsigned int bytes, int is_default_endian) { u64 val = 0; + unsigned int index = rs & KVM_MMIO_REG_MASK; enum emulation_result emulated = EMULATE_DONE; + if (vcpu->arch.mmio_vsx_copy_nums > 2) + return EMULATE_FAIL; + vcpu->arch.io_gpr = rs; while (vcpu->arch.mmio_vmx_copy_nums) { - if (kvmppc_get_vmx_data(vcpu, rs, &val) == -1) + switch (vcpu->arch.mmio_copy_type) { + case KVMPPC_VMX_COPY_DWORD: + if (kvmppc_get_vmx_dword(vcpu, index, &val) == -1) + return EMULATE_FAIL; + + break; + case KVMPPC_VMX_COPY_WORD: + if (kvmppc_get_vmx_word(vcpu, index, &val) == -1) + return EMULATE_FAIL; + break; + case KVMPPC_VMX_COPY_HWORD: + if (kvmppc_get_vmx_hword(vcpu, index, &val) == -1) + return EMULATE_FAIL; + break; + case KVMPPC_VMX_COPY_BYTE: + if (kvmppc_get_vmx_byte(vcpu, index, &val) == -1) + return EMULATE_FAIL; + break; + default: return EMULATE_FAIL; + } - emulated = kvmppc_handle_store(run, vcpu, val, 8, + emulated = kvmppc_handle_store(run, vcpu, val, bytes, is_default_endian); if (emulated != EMULATE_DONE) break; vcpu->arch.paddr_accessed += run->mmio.len; vcpu->arch.mmio_vmx_copy_nums--; + vcpu->arch.mmio_vmx_offset++; } return emulated; @@ -1422,11 +1616,11 @@ static int kvmppc_emulate_mmio_vmx_loadstore(struct kvm_vcpu *vcpu, vcpu->arch.paddr_accessed += run->mmio.len; if (!vcpu->mmio_is_write) { - emulated = kvmppc_handle_load128_by2x64(run, vcpu, - vcpu->arch.io_gpr, 1); + emulated = kvmppc_handle_vmx_load(run, vcpu, + vcpu->arch.io_gpr, run->mmio.len, 1); } else { - emulated = kvmppc_handle_store128_by2x64(run, vcpu, - vcpu->arch.io_gpr, 1); + emulated = kvmppc_handle_vmx_store(run, vcpu, + vcpu->arch.io_gpr, run->mmio.len, 1); } switch (emulated) { @@ -1570,8 +1764,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) } #endif #ifdef CONFIG_ALTIVEC - if (vcpu->arch.mmio_vmx_copy_nums > 0) + if (vcpu->arch.mmio_vmx_copy_nums > 0) { vcpu->arch.mmio_vmx_copy_nums--; + vcpu->arch.mmio_vmx_offset++; + } if (vcpu->arch.mmio_vmx_copy_nums > 0) { r = kvmppc_emulate_mmio_vmx_loadstore(vcpu, run); @@ -1784,16 +1980,16 @@ long kvm_arch_vcpu_ioctl(struct file *filp, void __user *argp = (void __user *)arg; long r; - vcpu_load(vcpu); - switch (ioctl) { case KVM_ENABLE_CAP: { struct kvm_enable_cap cap; r = -EFAULT; + vcpu_load(vcpu); if (copy_from_user(&cap, argp, sizeof(cap))) goto out; r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); + vcpu_put(vcpu); break; } @@ -1815,9 +2011,11 @@ long kvm_arch_vcpu_ioctl(struct file *filp, case KVM_DIRTY_TLB: { struct kvm_dirty_tlb dirty; r = -EFAULT; + vcpu_load(vcpu); if (copy_from_user(&dirty, argp, sizeof(dirty))) goto out; r = kvm_vcpu_ioctl_dirty_tlb(vcpu, &dirty); + vcpu_put(vcpu); break; } #endif @@ -1826,7 +2024,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp, } out: - vcpu_put(vcpu); return r; } diff --git a/arch/powerpc/kvm/tm.S b/arch/powerpc/kvm/tm.S new file mode 100644 index 000000000000..90e330f21356 --- /dev/null +++ b/arch/powerpc/kvm/tm.S @@ -0,0 +1,384 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Derived from book3s_hv_rmhandlers.S, which is: + * + * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> + * + */ + +#include <asm/reg.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/export.h> +#include <asm/tm.h> +#include <asm/cputable.h> + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +#define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM) + +/* + * Save transactional state and TM-related registers. + * Called with: + * - r3 pointing to the vcpu struct + * - r4 points to the MSR with current TS bits: + * (For HV KVM, it is VCPU_MSR ; For PR KVM, it is host MSR). + * This can modify all checkpointed registers, but + * restores r1, r2 before exit. + */ +_GLOBAL(__kvmppc_save_tm) + mflr r0 + std r0, PPC_LR_STKOFF(r1) + + /* Turn on TM. */ + mfmsr r8 + li r0, 1 + rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG + ori r8, r8, MSR_FP + oris r8, r8, (MSR_VEC | MSR_VSX)@h + mtmsrd r8 + + rldicl. r4, r4, 64 - MSR_TS_S_LG, 62 + beq 1f /* TM not active in guest. */ + + std r1, HSTATE_SCRATCH2(r13) + std r3, HSTATE_SCRATCH1(r13) + +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +BEGIN_FTR_SECTION + /* Emulation of the treclaim instruction needs TEXASR before treclaim */ + mfspr r6, SPRN_TEXASR + std r6, VCPU_ORIG_TEXASR(r3) +END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST) +#endif + + /* Clear the MSR RI since r1, r13 are all going to be foobar. */ + li r5, 0 + mtmsrd r5, 1 + + li r3, TM_CAUSE_KVM_RESCHED + + /* All GPRs are volatile at this point. */ + TRECLAIM(R3) + + /* Temporarily store r13 and r9 so we have some regs to play with */ + SET_SCRATCH0(r13) + GET_PACA(r13) + std r9, PACATMSCRATCH(r13) + ld r9, HSTATE_SCRATCH1(r13) + + /* Get a few more GPRs free. */ + std r29, VCPU_GPRS_TM(29)(r9) + std r30, VCPU_GPRS_TM(30)(r9) + std r31, VCPU_GPRS_TM(31)(r9) + + /* Save away PPR and DSCR soon so don't run with user values. */ + mfspr r31, SPRN_PPR + HMT_MEDIUM + mfspr r30, SPRN_DSCR +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + ld r29, HSTATE_DSCR(r13) + mtspr SPRN_DSCR, r29 +#endif + + /* Save all but r9, r13 & r29-r31 */ + reg = 0 + .rept 29 + .if (reg != 9) && (reg != 13) + std reg, VCPU_GPRS_TM(reg)(r9) + .endif + reg = reg + 1 + .endr + /* ... now save r13 */ + GET_SCRATCH0(r4) + std r4, VCPU_GPRS_TM(13)(r9) + /* ... and save r9 */ + ld r4, PACATMSCRATCH(r13) + std r4, VCPU_GPRS_TM(9)(r9) + + /* Reload stack pointer and TOC. */ + ld r1, HSTATE_SCRATCH2(r13) + ld r2, PACATOC(r13) + + /* Set MSR RI now we have r1 and r13 back. */ + li r5, MSR_RI + mtmsrd r5, 1 + + /* Save away checkpinted SPRs. */ + std r31, VCPU_PPR_TM(r9) + std r30, VCPU_DSCR_TM(r9) + mflr r5 + mfcr r6 + mfctr r7 + mfspr r8, SPRN_AMR + mfspr r10, SPRN_TAR + mfxer r11 + std r5, VCPU_LR_TM(r9) + stw r6, VCPU_CR_TM(r9) + std r7, VCPU_CTR_TM(r9) + std r8, VCPU_AMR_TM(r9) + std r10, VCPU_TAR_TM(r9) + std r11, VCPU_XER_TM(r9) + + /* Restore r12 as trap number. */ + lwz r12, VCPU_TRAP(r9) + + /* Save FP/VSX. */ + addi r3, r9, VCPU_FPRS_TM + bl store_fp_state + addi r3, r9, VCPU_VRS_TM + bl store_vr_state + mfspr r6, SPRN_VRSAVE + stw r6, VCPU_VRSAVE_TM(r9) +1: + /* + * We need to save these SPRs after the treclaim so that the software + * error code is recorded correctly in the TEXASR. Also the user may + * change these outside of a transaction, so they must always be + * context switched. + */ + mfspr r7, SPRN_TEXASR + std r7, VCPU_TEXASR(r9) +11: + mfspr r5, SPRN_TFHAR + mfspr r6, SPRN_TFIAR + std r5, VCPU_TFHAR(r9) + std r6, VCPU_TFIAR(r9) + + ld r0, PPC_LR_STKOFF(r1) + mtlr r0 + blr + +/* + * _kvmppc_save_tm_pr() is a wrapper around __kvmppc_save_tm(), so that it can + * be invoked from C function by PR KVM only. + */ +_GLOBAL(_kvmppc_save_tm_pr) + mflr r5 + std r5, PPC_LR_STKOFF(r1) + stdu r1, -SWITCH_FRAME_SIZE(r1) + SAVE_NVGPRS(r1) + + /* save MSR since TM/math bits might be impacted + * by __kvmppc_save_tm(). + */ + mfmsr r5 + SAVE_GPR(5, r1) + + /* also save DSCR/CR/TAR so that it can be recovered later */ + mfspr r6, SPRN_DSCR + SAVE_GPR(6, r1) + + mfcr r7 + stw r7, _CCR(r1) + + mfspr r8, SPRN_TAR + SAVE_GPR(8, r1) + + bl __kvmppc_save_tm + + REST_GPR(8, r1) + mtspr SPRN_TAR, r8 + + ld r7, _CCR(r1) + mtcr r7 + + REST_GPR(6, r1) + mtspr SPRN_DSCR, r6 + + /* need preserve current MSR's MSR_TS bits */ + REST_GPR(5, r1) + mfmsr r6 + rldicl r6, r6, 64 - MSR_TS_S_LG, 62 + rldimi r5, r6, MSR_TS_S_LG, 63 - MSR_TS_T_LG + mtmsrd r5 + + REST_NVGPRS(r1) + addi r1, r1, SWITCH_FRAME_SIZE + ld r5, PPC_LR_STKOFF(r1) + mtlr r5 + blr + +EXPORT_SYMBOL_GPL(_kvmppc_save_tm_pr); + +/* + * Restore transactional state and TM-related registers. + * Called with: + * - r3 pointing to the vcpu struct. + * - r4 is the guest MSR with desired TS bits: + * For HV KVM, it is VCPU_MSR + * For PR KVM, it is provided by caller + * This potentially modifies all checkpointed registers. + * It restores r1, r2 from the PACA. + */ +_GLOBAL(__kvmppc_restore_tm) + mflr r0 + std r0, PPC_LR_STKOFF(r1) + + /* Turn on TM/FP/VSX/VMX so we can restore them. */ + mfmsr r5 + li r6, MSR_TM >> 32 + sldi r6, r6, 32 + or r5, r5, r6 + ori r5, r5, MSR_FP + oris r5, r5, (MSR_VEC | MSR_VSX)@h + mtmsrd r5 + + /* + * The user may change these outside of a transaction, so they must + * always be context switched. + */ + ld r5, VCPU_TFHAR(r3) + ld r6, VCPU_TFIAR(r3) + ld r7, VCPU_TEXASR(r3) + mtspr SPRN_TFHAR, r5 + mtspr SPRN_TFIAR, r6 + mtspr SPRN_TEXASR, r7 + + mr r5, r4 + rldicl. r5, r5, 64 - MSR_TS_S_LG, 62 + beqlr /* TM not active in guest */ + std r1, HSTATE_SCRATCH2(r13) + + /* Make sure the failure summary is set, otherwise we'll program check + * when we trechkpt. It's possible that this might have been not set + * on a kvmppc_set_one_reg() call but we shouldn't let this crash the + * host. + */ + oris r7, r7, (TEXASR_FS)@h + mtspr SPRN_TEXASR, r7 + + /* + * We need to load up the checkpointed state for the guest. + * We need to do this early as it will blow away any GPRs, VSRs and + * some SPRs. + */ + + mr r31, r3 + addi r3, r31, VCPU_FPRS_TM + bl load_fp_state + addi r3, r31, VCPU_VRS_TM + bl load_vr_state + mr r3, r31 + lwz r7, VCPU_VRSAVE_TM(r3) + mtspr SPRN_VRSAVE, r7 + + ld r5, VCPU_LR_TM(r3) + lwz r6, VCPU_CR_TM(r3) + ld r7, VCPU_CTR_TM(r3) + ld r8, VCPU_AMR_TM(r3) + ld r9, VCPU_TAR_TM(r3) + ld r10, VCPU_XER_TM(r3) + mtlr r5 + mtcr r6 + mtctr r7 + mtspr SPRN_AMR, r8 + mtspr SPRN_TAR, r9 + mtxer r10 + + /* + * Load up PPR and DSCR values but don't put them in the actual SPRs + * till the last moment to avoid running with userspace PPR and DSCR for + * too long. + */ + ld r29, VCPU_DSCR_TM(r3) + ld r30, VCPU_PPR_TM(r3) + + std r2, PACATMSCRATCH(r13) /* Save TOC */ + + /* Clear the MSR RI since r1, r13 are all going to be foobar. */ + li r5, 0 + mtmsrd r5, 1 + + /* Load GPRs r0-r28 */ + reg = 0 + .rept 29 + ld reg, VCPU_GPRS_TM(reg)(r31) + reg = reg + 1 + .endr + + mtspr SPRN_DSCR, r29 + mtspr SPRN_PPR, r30 + + /* Load final GPRs */ + ld 29, VCPU_GPRS_TM(29)(r31) + ld 30, VCPU_GPRS_TM(30)(r31) + ld 31, VCPU_GPRS_TM(31)(r31) + + /* TM checkpointed state is now setup. All GPRs are now volatile. */ + TRECHKPT + + /* Now let's get back the state we need. */ + HMT_MEDIUM + GET_PACA(r13) +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + ld r29, HSTATE_DSCR(r13) + mtspr SPRN_DSCR, r29 +#endif + ld r1, HSTATE_SCRATCH2(r13) + ld r2, PACATMSCRATCH(r13) + + /* Set the MSR RI since we have our registers back. */ + li r5, MSR_RI + mtmsrd r5, 1 + ld r0, PPC_LR_STKOFF(r1) + mtlr r0 + blr + +/* + * _kvmppc_restore_tm_pr() is a wrapper around __kvmppc_restore_tm(), so that it + * can be invoked from C function by PR KVM only. + */ +_GLOBAL(_kvmppc_restore_tm_pr) + mflr r5 + std r5, PPC_LR_STKOFF(r1) + stdu r1, -SWITCH_FRAME_SIZE(r1) + SAVE_NVGPRS(r1) + + /* save MSR to avoid TM/math bits change */ + mfmsr r5 + SAVE_GPR(5, r1) + + /* also save DSCR/CR/TAR so that it can be recovered later */ + mfspr r6, SPRN_DSCR + SAVE_GPR(6, r1) + + mfcr r7 + stw r7, _CCR(r1) + + mfspr r8, SPRN_TAR + SAVE_GPR(8, r1) + + bl __kvmppc_restore_tm + + REST_GPR(8, r1) + mtspr SPRN_TAR, r8 + + ld r7, _CCR(r1) + mtcr r7 + + REST_GPR(6, r1) + mtspr SPRN_DSCR, r6 + + /* need preserve current MSR's MSR_TS bits */ + REST_GPR(5, r1) + mfmsr r6 + rldicl r6, r6, 64 - MSR_TS_S_LG, 62 + rldimi r5, r6, MSR_TS_S_LG, 63 - MSR_TS_T_LG + mtmsrd r5 + + REST_NVGPRS(r1) + addi r1, r1, SWITCH_FRAME_SIZE + ld r5, PPC_LR_STKOFF(r1) + mtlr r5 + blr + +EXPORT_SYMBOL_GPL(_kvmppc_restore_tm_pr); +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 42e581a268e1..f12680c9b947 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -32,6 +32,7 @@ config RISCV select HAVE_MEMBLOCK_NODE_MAP select HAVE_DMA_CONTIGUOUS select HAVE_GENERIC_DMA_COHERENT + select HAVE_PERF_EVENTS select IRQ_DOMAIN select NO_BOOTMEM select RISCV_ISA_A if SMP @@ -193,6 +194,19 @@ config RISCV_ISA_C config RISCV_ISA_A def_bool y +menu "supported PMU type" + depends on PERF_EVENTS + +config RISCV_BASE_PMU + bool "Base Performance Monitoring Unit" + def_bool y + help + A base PMU that serves as a reference implementation and has limited + feature of perf. It can run on any RISC-V machines so serves as the + fallback, but this option can also be disable to reduce kernel size. + +endmenu + endmenu menu "Kernel type" diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 76e958a5414a..6d4a5f6c3f4f 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -71,6 +71,9 @@ KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-relax) # architectures. It's faster to have GCC emit only aligned accesses. KBUILD_CFLAGS += $(call cc-option,-mstrict-align) +# arch specific predefines for sparse +CHECKFLAGS += -D__riscv -D__riscv_xlen=$(BITS) + head-y := arch/riscv/kernel/head.o core-y += arch/riscv/kernel/ arch/riscv/mm/ diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index bca0eee733b0..07326466871b 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -44,6 +44,7 @@ CONFIG_INPUT_MOUSEDEV=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_OF_PLATFORM=y +CONFIG_HVC_RISCV_SBI=y # CONFIG_PTP_1588_CLOCK is not set CONFIG_DRM=y CONFIG_DRM_RADEON=y diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild index 4286a5f83876..576ffdca06ba 100644 --- a/arch/riscv/include/asm/Kbuild +++ b/arch/riscv/include/asm/Kbuild @@ -25,6 +25,7 @@ generic-y += kdebug.h generic-y += kmap_types.h generic-y += kvm_para.h generic-y += local.h +generic-y += local64.h generic-y += mm-arch-hooks.h generic-y += mman.h generic-y += module.h diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h index efd89a88d2d0..8f13074413a7 100644 --- a/arch/riscv/include/asm/cacheflush.h +++ b/arch/riscv/include/asm/cacheflush.h @@ -47,7 +47,7 @@ static inline void flush_dcache_page(struct page *page) #else /* CONFIG_SMP */ -#define flush_icache_all() sbi_remote_fence_i(0) +#define flush_icache_all() sbi_remote_fence_i(NULL) void flush_icache_mm(struct mm_struct *mm, bool local); #endif /* CONFIG_SMP */ diff --git a/arch/riscv/include/asm/perf_event.h b/arch/riscv/include/asm/perf_event.h new file mode 100644 index 000000000000..0e638a0c3feb --- /dev/null +++ b/arch/riscv/include/asm/perf_event.h @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2018 SiFive + * Copyright (C) 2018 Andes Technology Corporation + * + */ + +#ifndef _ASM_RISCV_PERF_EVENT_H +#define _ASM_RISCV_PERF_EVENT_H + +#include <linux/perf_event.h> +#include <linux/ptrace.h> + +#define RISCV_BASE_COUNTERS 2 + +/* + * The RISCV_MAX_COUNTERS parameter should be specified. + */ + +#ifdef CONFIG_RISCV_BASE_PMU +#define RISCV_MAX_COUNTERS 2 +#endif + +#ifndef RISCV_MAX_COUNTERS +#error "Please provide a valid RISCV_MAX_COUNTERS for the PMU." +#endif + +/* + * These are the indexes of bits in counteren register *minus* 1, + * except for cycle. It would be coherent if it can directly mapped + * to counteren bit definition, but there is a *time* register at + * counteren[1]. Per-cpu structure is scarce resource here. + * + * According to the spec, an implementation can support counter up to + * mhpmcounter31, but many high-end processors has at most 6 general + * PMCs, we give the definition to MHPMCOUNTER8 here. + */ +#define RISCV_PMU_CYCLE 0 +#define RISCV_PMU_INSTRET 1 +#define RISCV_PMU_MHPMCOUNTER3 2 +#define RISCV_PMU_MHPMCOUNTER4 3 +#define RISCV_PMU_MHPMCOUNTER5 4 +#define RISCV_PMU_MHPMCOUNTER6 5 +#define RISCV_PMU_MHPMCOUNTER7 6 +#define RISCV_PMU_MHPMCOUNTER8 7 + +#define RISCV_OP_UNSUPP (-EOPNOTSUPP) + +struct cpu_hw_events { + /* # currently enabled events*/ + int n_events; + /* currently enabled events */ + struct perf_event *events[RISCV_MAX_COUNTERS]; + /* vendor-defined PMU data */ + void *platform; +}; + +struct riscv_pmu { + struct pmu *pmu; + + /* generic hw/cache events table */ + const int *hw_events; + const int (*cache_events)[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]; + /* method used to map hw/cache events */ + int (*map_hw_event)(u64 config); + int (*map_cache_event)(u64 config); + + /* max generic hw events in map */ + int max_events; + /* number total counters, 2(base) + x(general) */ + int num_counters; + /* the width of the counter */ + int counter_width; + + /* vendor-defined PMU features */ + void *platform; + + irqreturn_t (*handle_irq)(int irq_num, void *dev); + int irq; +}; + +#endif /* _ASM_RISCV_PERF_EVENT_H */ diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 7b209aec355d..85c2d8bae957 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -49,7 +49,7 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, #include <asm/sbi.h> -#define flush_tlb_all() sbi_remote_sfence_vma(0, 0, -1) +#define flush_tlb_all() sbi_remote_sfence_vma(NULL, 0, -1) #define flush_tlb_page(vma, addr) flush_tlb_range(vma, addr, 0) #define flush_tlb_range(vma, start, end) \ sbi_remote_sfence_vma(mm_cpumask((vma)->vm_mm)->bits, \ diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index 14b0b22fb578..473cfc84e412 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -392,19 +392,21 @@ do { \ }) -extern unsigned long __must_check __copy_user(void __user *to, +extern unsigned long __must_check __asm_copy_to_user(void __user *to, + const void *from, unsigned long n); +extern unsigned long __must_check __asm_copy_from_user(void *to, const void __user *from, unsigned long n); static inline unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n) { - return __copy_user(to, from, n); + return __asm_copy_to_user(to, from, n); } static inline unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n) { - return __copy_user(to, from, n); + return __asm_copy_from_user(to, from, n); } extern long strncpy_from_user(char *dest, const char __user *src, long count); diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 8586dd96c2f0..e1274fc03af4 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -39,4 +39,6 @@ obj-$(CONFIG_MODULE_SECTIONS) += module-sections.o obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o obj-$(CONFIG_DYNAMIC_FTRACE) += mcount-dyn.o +obj-$(CONFIG_PERF_EVENTS) += perf_event.o + clean: diff --git a/arch/riscv/kernel/mcount.S b/arch/riscv/kernel/mcount.S index ce9bdc57a2a1..5721624886a1 100644 --- a/arch/riscv/kernel/mcount.S +++ b/arch/riscv/kernel/mcount.S @@ -126,5 +126,5 @@ do_trace: RESTORE_ABI_STATE ret ENDPROC(_mcount) -EXPORT_SYMBOL(_mcount) #endif +EXPORT_SYMBOL(_mcount) diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 5dddba301d0a..1d5e9b934b8c 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -17,6 +17,17 @@ #include <linux/errno.h> #include <linux/moduleloader.h> +static int apply_r_riscv_32_rela(struct module *me, u32 *location, Elf_Addr v) +{ + if (v != (u32)v) { + pr_err("%s: value %016llx out of range for 32-bit field\n", + me->name, v); + return -EINVAL; + } + *location = v; + return 0; +} + static int apply_r_riscv_64_rela(struct module *me, u32 *location, Elf_Addr v) { *(u64 *)location = v; @@ -265,6 +276,7 @@ static int apply_r_riscv_sub32_rela(struct module *me, u32 *location, static int (*reloc_handlers_rela[]) (struct module *me, u32 *location, Elf_Addr v) = { + [R_RISCV_32] = apply_r_riscv_32_rela, [R_RISCV_64] = apply_r_riscv_64_rela, [R_RISCV_BRANCH] = apply_r_riscv_branch_rela, [R_RISCV_JAL] = apply_r_riscv_jal_rela, diff --git a/arch/riscv/kernel/perf_event.c b/arch/riscv/kernel/perf_event.c new file mode 100644 index 000000000000..b0e10c4e9f77 --- /dev/null +++ b/arch/riscv/kernel/perf_event.c @@ -0,0 +1,485 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> + * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar + * Copyright (C) 2009 Jaswinder Singh Rajput + * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter + * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra + * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com> + * Copyright (C) 2009 Google, Inc., Stephane Eranian + * Copyright 2014 Tilera Corporation. All Rights Reserved. + * Copyright (C) 2018 Andes Technology Corporation + * + * Perf_events support for RISC-V platforms. + * + * Since the spec. (as of now, Priv-Spec 1.10) does not provide enough + * functionality for perf event to fully work, this file provides + * the very basic framework only. + * + * For platform portings, please check Documentations/riscv/pmu.txt. + * + * The Copyright line includes x86 and tile ones. + */ + +#include <linux/kprobes.h> +#include <linux/kernel.h> +#include <linux/kdebug.h> +#include <linux/mutex.h> +#include <linux/bitmap.h> +#include <linux/irq.h> +#include <linux/interrupt.h> +#include <linux/perf_event.h> +#include <linux/atomic.h> +#include <linux/of.h> +#include <asm/perf_event.h> + +static const struct riscv_pmu *riscv_pmu __read_mostly; +static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); + +/* + * Hardware & cache maps and their methods + */ + +static const int riscv_hw_event_map[] = { + [PERF_COUNT_HW_CPU_CYCLES] = RISCV_PMU_CYCLE, + [PERF_COUNT_HW_INSTRUCTIONS] = RISCV_PMU_INSTRET, + [PERF_COUNT_HW_CACHE_REFERENCES] = RISCV_OP_UNSUPP, + [PERF_COUNT_HW_CACHE_MISSES] = RISCV_OP_UNSUPP, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = RISCV_OP_UNSUPP, + [PERF_COUNT_HW_BRANCH_MISSES] = RISCV_OP_UNSUPP, + [PERF_COUNT_HW_BUS_CYCLES] = RISCV_OP_UNSUPP, +}; + +#define C(x) PERF_COUNT_HW_CACHE_##x +static const int riscv_cache_event_map[PERF_COUNT_HW_CACHE_MAX] +[PERF_COUNT_HW_CACHE_OP_MAX] +[PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, + [C(RESULT_MISS)] = RISCV_OP_UNSUPP, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, + [C(RESULT_MISS)] = RISCV_OP_UNSUPP, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, + [C(RESULT_MISS)] = RISCV_OP_UNSUPP, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, + [C(RESULT_MISS)] = RISCV_OP_UNSUPP, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, + [C(RESULT_MISS)] = RISCV_OP_UNSUPP, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, + [C(RESULT_MISS)] = RISCV_OP_UNSUPP, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, + [C(RESULT_MISS)] = RISCV_OP_UNSUPP, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, + [C(RESULT_MISS)] = RISCV_OP_UNSUPP, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, + [C(RESULT_MISS)] = RISCV_OP_UNSUPP, + }, + }, + [C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, + [C(RESULT_MISS)] = RISCV_OP_UNSUPP, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, + [C(RESULT_MISS)] = RISCV_OP_UNSUPP, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, + [C(RESULT_MISS)] = RISCV_OP_UNSUPP, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, + [C(RESULT_MISS)] = RISCV_OP_UNSUPP, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, + [C(RESULT_MISS)] = RISCV_OP_UNSUPP, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, + [C(RESULT_MISS)] = RISCV_OP_UNSUPP, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, + [C(RESULT_MISS)] = RISCV_OP_UNSUPP, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, + [C(RESULT_MISS)] = RISCV_OP_UNSUPP, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, + [C(RESULT_MISS)] = RISCV_OP_UNSUPP, + }, + }, +}; + +static int riscv_map_hw_event(u64 config) +{ + if (config >= riscv_pmu->max_events) + return -EINVAL; + + return riscv_pmu->hw_events[config]; +} + +int riscv_map_cache_decode(u64 config, unsigned int *type, + unsigned int *op, unsigned int *result) +{ + return -ENOENT; +} + +static int riscv_map_cache_event(u64 config) +{ + unsigned int type, op, result; + int err = -ENOENT; + int code; + + err = riscv_map_cache_decode(config, &type, &op, &result); + if (!riscv_pmu->cache_events || err) + return err; + + if (type >= PERF_COUNT_HW_CACHE_MAX || + op >= PERF_COUNT_HW_CACHE_OP_MAX || + result >= PERF_COUNT_HW_CACHE_RESULT_MAX) + return -EINVAL; + + code = (*riscv_pmu->cache_events)[type][op][result]; + if (code == RISCV_OP_UNSUPP) + return -EINVAL; + + return code; +} + +/* + * Low-level functions: reading/writing counters + */ + +static inline u64 read_counter(int idx) +{ + u64 val = 0; + + switch (idx) { + case RISCV_PMU_CYCLE: + val = csr_read(cycle); + break; + case RISCV_PMU_INSTRET: + val = csr_read(instret); + break; + default: + WARN_ON_ONCE(idx < 0 || idx > RISCV_MAX_COUNTERS); + return -EINVAL; + } + + return val; +} + +static inline void write_counter(int idx, u64 value) +{ + /* currently not supported */ + WARN_ON_ONCE(1); +} + +/* + * pmu->read: read and update the counter + * + * Other architectures' implementation often have a xxx_perf_event_update + * routine, which can return counter values when called in the IRQ, but + * return void when being called by the pmu->read method. + */ +static void riscv_pmu_read(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 prev_raw_count, new_raw_count; + u64 oldval; + int idx = hwc->idx; + u64 delta; + + do { + prev_raw_count = local64_read(&hwc->prev_count); + new_raw_count = read_counter(idx); + + oldval = local64_cmpxchg(&hwc->prev_count, prev_raw_count, + new_raw_count); + } while (oldval != prev_raw_count); + + /* + * delta is the value to update the counter we maintain in the kernel. + */ + delta = (new_raw_count - prev_raw_count) & + ((1ULL << riscv_pmu->counter_width) - 1); + local64_add(delta, &event->count); + /* + * Something like local64_sub(delta, &hwc->period_left) here is + * needed if there is an interrupt for perf. + */ +} + +/* + * State transition functions: + * + * stop()/start() & add()/del() + */ + +/* + * pmu->stop: stop the counter + */ +static void riscv_pmu_stop(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + + WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); + hwc->state |= PERF_HES_STOPPED; + + if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { + riscv_pmu->pmu->read(event); + hwc->state |= PERF_HES_UPTODATE; + } +} + +/* + * pmu->start: start the event. + */ +static void riscv_pmu_start(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + + if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) + return; + + if (flags & PERF_EF_RELOAD) { + WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); + + /* + * Set the counter to the period to the next interrupt here, + * if you have any. + */ + } + + hwc->state = 0; + perf_event_update_userpage(event); + + /* + * Since we cannot write to counters, this serves as an initialization + * to the delta-mechanism in pmu->read(); otherwise, the delta would be + * wrong when pmu->read is called for the first time. + */ + local64_set(&hwc->prev_count, read_counter(hwc->idx)); +} + +/* + * pmu->add: add the event to PMU. + */ +static int riscv_pmu_add(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + + if (cpuc->n_events == riscv_pmu->num_counters) + return -ENOSPC; + + /* + * We don't have general conunters, so no binding-event-to-counter + * process here. + * + * Indexing using hwc->config generally not works, since config may + * contain extra information, but here the only info we have in + * hwc->config is the event index. + */ + hwc->idx = hwc->config; + cpuc->events[hwc->idx] = event; + cpuc->n_events++; + + hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + + if (flags & PERF_EF_START) + riscv_pmu->pmu->start(event, PERF_EF_RELOAD); + + return 0; +} + +/* + * pmu->del: delete the event from PMU. + */ +static void riscv_pmu_del(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + + cpuc->events[hwc->idx] = NULL; + cpuc->n_events--; + riscv_pmu->pmu->stop(event, PERF_EF_UPDATE); + perf_event_update_userpage(event); +} + +/* + * Interrupt: a skeletion for reference. + */ + +static DEFINE_MUTEX(pmc_reserve_mutex); + +irqreturn_t riscv_base_pmu_handle_irq(int irq_num, void *dev) +{ + return IRQ_NONE; +} + +static int reserve_pmc_hardware(void) +{ + int err = 0; + + mutex_lock(&pmc_reserve_mutex); + if (riscv_pmu->irq >= 0 && riscv_pmu->handle_irq) { + err = request_irq(riscv_pmu->irq, riscv_pmu->handle_irq, + IRQF_PERCPU, "riscv-base-perf", NULL); + } + mutex_unlock(&pmc_reserve_mutex); + + return err; +} + +void release_pmc_hardware(void) +{ + mutex_lock(&pmc_reserve_mutex); + if (riscv_pmu->irq >= 0) + free_irq(riscv_pmu->irq, NULL); + mutex_unlock(&pmc_reserve_mutex); +} + +/* + * Event Initialization/Finalization + */ + +static atomic_t riscv_active_events = ATOMIC_INIT(0); + +static void riscv_event_destroy(struct perf_event *event) +{ + if (atomic_dec_return(&riscv_active_events) == 0) + release_pmc_hardware(); +} + +static int riscv_event_init(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + struct hw_perf_event *hwc = &event->hw; + int err; + int code; + + if (atomic_inc_return(&riscv_active_events) == 1) { + err = reserve_pmc_hardware(); + + if (err) { + pr_warn("PMC hardware not available\n"); + atomic_dec(&riscv_active_events); + return -EBUSY; + } + } + + switch (event->attr.type) { + case PERF_TYPE_HARDWARE: + code = riscv_pmu->map_hw_event(attr->config); + break; + case PERF_TYPE_HW_CACHE: + code = riscv_pmu->map_cache_event(attr->config); + break; + case PERF_TYPE_RAW: + return -EOPNOTSUPP; + default: + return -ENOENT; + } + + event->destroy = riscv_event_destroy; + if (code < 0) { + event->destroy(event); + return code; + } + + /* + * idx is set to -1 because the index of a general event should not be + * decided until binding to some counter in pmu->add(). + * + * But since we don't have such support, later in pmu->add(), we just + * use hwc->config as the index instead. + */ + hwc->config = code; + hwc->idx = -1; + + return 0; +} + +/* + * Initialization + */ + +static struct pmu min_pmu = { + .name = "riscv-base", + .event_init = riscv_event_init, + .add = riscv_pmu_add, + .del = riscv_pmu_del, + .start = riscv_pmu_start, + .stop = riscv_pmu_stop, + .read = riscv_pmu_read, +}; + +static const struct riscv_pmu riscv_base_pmu = { + .pmu = &min_pmu, + .max_events = ARRAY_SIZE(riscv_hw_event_map), + .map_hw_event = riscv_map_hw_event, + .hw_events = riscv_hw_event_map, + .map_cache_event = riscv_map_cache_event, + .cache_events = &riscv_cache_event_map, + .counter_width = 63, + .num_counters = RISCV_BASE_COUNTERS + 0, + .handle_irq = &riscv_base_pmu_handle_irq, + + /* This means this PMU has no IRQ. */ + .irq = -1, +}; + +static const struct of_device_id riscv_pmu_of_ids[] = { + {.compatible = "riscv,base-pmu", .data = &riscv_base_pmu}, + { /* sentinel value */ } +}; + +int __init init_hw_perf_events(void) +{ + struct device_node *node = of_find_node_by_type(NULL, "pmu"); + const struct of_device_id *of_id; + + riscv_pmu = &riscv_base_pmu; + + if (node) { + of_id = of_match_node(riscv_pmu_of_ids, node); + + if (of_id) + riscv_pmu = of_id->data; + } + + perf_pmu_register(riscv_pmu->pmu, "cpu", PERF_TYPE_RAW); + return 0; +} +arch_initcall(init_hw_perf_events); diff --git a/arch/riscv/kernel/riscv_ksyms.c b/arch/riscv/kernel/riscv_ksyms.c index 551734248748..f247d6d2137c 100644 --- a/arch/riscv/kernel/riscv_ksyms.c +++ b/arch/riscv/kernel/riscv_ksyms.c @@ -13,6 +13,7 @@ * Assembly functions that may be used (directly or indirectly) by modules */ EXPORT_SYMBOL(__clear_user); -EXPORT_SYMBOL(__copy_user); +EXPORT_SYMBOL(__asm_copy_to_user); +EXPORT_SYMBOL(__asm_copy_from_user); EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memcpy); diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index b99d9dd21fd0..81a1952015a6 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -148,7 +148,7 @@ int is_valid_bugaddr(unsigned long pc) if (pc < PAGE_OFFSET) return 0; - if (probe_kernel_address((bug_insn_t __user *)pc, insn)) + if (probe_kernel_address((bug_insn_t *)pc, insn)) return 0; return (insn == __BUG_INSN); } diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S index 58fb2877c865..399e6f0c2d98 100644 --- a/arch/riscv/lib/uaccess.S +++ b/arch/riscv/lib/uaccess.S @@ -13,7 +13,8 @@ _epc: .previous .endm -ENTRY(__copy_user) +ENTRY(__asm_copy_to_user) +ENTRY(__asm_copy_from_user) /* Enable access to user memory */ li t6, SR_SUM @@ -63,7 +64,8 @@ ENTRY(__copy_user) addi a0, a0, 1 bltu a1, a3, 5b j 3b -ENDPROC(__copy_user) +ENDPROC(__asm_copy_to_user) +ENDPROC(__asm_copy_from_user) ENTRY(__clear_user) @@ -84,7 +86,7 @@ ENTRY(__clear_user) bgeu t0, t1, 2f bltu a0, t0, 4f 1: - fixup REG_S, zero, (a0), 10f + fixup REG_S, zero, (a0), 11f addi a0, a0, SZREG bltu a0, t1, 1b 2: @@ -96,12 +98,12 @@ ENTRY(__clear_user) li a0, 0 ret 4: /* Edge case: unalignment */ - fixup sb, zero, (a0), 10f + fixup sb, zero, (a0), 11f addi a0, a0, 1 bltu a0, t0, 4b j 1b 5: /* Edge case: remainder */ - fixup sb, zero, (a0), 10f + fixup sb, zero, (a0), 11f addi a0, a0, 1 bltu a0, a3, 5b j 3b @@ -109,9 +111,14 @@ ENDPROC(__clear_user) .section .fixup,"ax" .balign 4 + /* Fixup code for __copy_user(10) and __clear_user(11) */ 10: /* Disable access to user memory */ csrs sstatus, t6 - sub a0, a3, a0 + mv a0, a2 + ret +11: + csrs sstatus, t6 + mv a0, a1 ret .previous diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c index 627075e6d875..50ee3bb5a63a 100644 --- a/arch/um/drivers/vector_kern.c +++ b/arch/um/drivers/vector_kern.c @@ -188,7 +188,7 @@ static int get_transport_options(struct arglist *def) if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0) return (vec_rx | VECTOR_BPF); if (strncmp(transport, TRANS_RAW, TRANS_RAW_LEN) == 0) - return (vec_rx | vec_tx); + return (vec_rx | vec_tx | VECTOR_QDISC_BYPASS); return (vec_rx | vec_tx); } @@ -504,15 +504,19 @@ static struct vector_queue *create_queue( result = kmalloc(sizeof(struct vector_queue), GFP_KERNEL); if (result == NULL) - goto out_fail; + return NULL; result->max_depth = max_size; result->dev = vp->dev; result->mmsg_vector = kmalloc( (sizeof(struct mmsghdr) * max_size), GFP_KERNEL); + if (result->mmsg_vector == NULL) + goto out_mmsg_fail; result->skbuff_vector = kmalloc( (sizeof(void *) * max_size), GFP_KERNEL); - if (result->mmsg_vector == NULL || result->skbuff_vector == NULL) - goto out_fail; + if (result->skbuff_vector == NULL) + goto out_skb_fail; + + /* further failures can be handled safely by destroy_queue*/ mmsg_vector = result->mmsg_vector; for (i = 0; i < max_size; i++) { @@ -563,6 +567,11 @@ static struct vector_queue *create_queue( result->head = 0; result->tail = 0; return result; +out_skb_fail: + kfree(result->mmsg_vector); +out_mmsg_fail: + kfree(result); + return NULL; out_fail: destroy_queue(result); return NULL; @@ -1232,9 +1241,8 @@ static int vector_net_open(struct net_device *dev) if ((vp->options & VECTOR_QDISC_BYPASS) != 0) { if (!uml_raw_enable_qdisc_bypass(vp->fds->rx_fd)) - vp->options = vp->options | VECTOR_BPF; + vp->options |= VECTOR_BPF; } - if ((vp->options & VECTOR_BPF) != 0) vp->bpf = uml_vector_default_bpf(vp->fds->rx_fd, dev->dev_addr); diff --git a/arch/um/include/asm/common.lds.S b/arch/um/include/asm/common.lds.S index b30d73ca29d0..7adb4e6b658a 100644 --- a/arch/um/include/asm/common.lds.S +++ b/arch/um/include/asm/common.lds.S @@ -53,12 +53,6 @@ CON_INITCALL } - .uml.initcall.init : { - __uml_initcall_start = .; - *(.uml.initcall.init) - __uml_initcall_end = .; - } - SECURITY_INIT .exitcall : { diff --git a/arch/um/include/shared/init.h b/arch/um/include/shared/init.h index b3f5865a92c9..c66de434a983 100644 --- a/arch/um/include/shared/init.h +++ b/arch/um/include/shared/init.h @@ -64,14 +64,10 @@ struct uml_param { int (*setup_func)(char *, int *); }; -extern initcall_t __uml_initcall_start, __uml_initcall_end; extern initcall_t __uml_postsetup_start, __uml_postsetup_end; extern const char *__uml_help_start, *__uml_help_end; #endif -#define __uml_initcall(fn) \ - static initcall_t __uml_initcall_##fn __uml_init_call = fn - #define __uml_exitcall(fn) \ static exitcall_t __uml_exitcall_##fn __uml_exit_call = fn @@ -108,7 +104,6 @@ extern struct uml_param __uml_setup_start, __uml_setup_end; */ #define __uml_init_setup __used __section(.uml.setup.init) #define __uml_setup_help __used __section(.uml.help.init) -#define __uml_init_call __used __section(.uml.initcall.init) #define __uml_postsetup_call __used __section(.uml.postsetup.init) #define __uml_exit_call __used __section(.uml.exitcall.exit) diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c index 5f970ece5ac3..f1fee2b91239 100644 --- a/arch/um/os-Linux/main.c +++ b/arch/um/os-Linux/main.c @@ -40,17 +40,6 @@ static void set_stklim(void) } } -static __init void do_uml_initcalls(void) -{ - initcall_t *call; - - call = &__uml_initcall_start; - while (call < &__uml_initcall_end) { - (*call)(); - call++; - } -} - static void last_ditch_exit(int sig) { uml_cleanup(); @@ -151,7 +140,6 @@ int __init main(int argc, char **argv, char **envp) scan_elf_aux(envp); #endif - do_uml_initcalls(); change_sig(SIGPIPE, 0); ret = linux_main(argc, argv); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d0dd35d582da..559a12b6184d 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4429,16 +4429,14 @@ static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) goto out_vmcs; memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE); -#if IS_ENABLED(CONFIG_HYPERV) - if (static_branch_unlikely(&enable_evmcs) && + if (IS_ENABLED(CONFIG_HYPERV) && + static_branch_unlikely(&enable_evmcs) && (ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)) { struct hv_enlightened_vmcs *evmcs = (struct hv_enlightened_vmcs *)loaded_vmcs->vmcs; evmcs->hv_enlightenments_control.msr_bitmap = 1; } -#endif - } return 0; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6bcecc325e7e..0046aa70205a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8567,7 +8567,7 @@ int kvm_arch_hardware_setup(void) /* * Make sure the user can only configure tsc_khz values that * fit into a signed integer. - * A min value is not calculated needed because it will always + * A min value is not calculated because it will always * be 1 on all machines. */ u64 max = min(0x7fffffffULL, |