summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kvm/book3s_hv_rmhandlers.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kvm/book3s_hv_rmhandlers.S')
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S248
1 files changed, 163 insertions, 85 deletions
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index bdb3f76ceb6b..cb44065e2946 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -32,12 +32,30 @@
#include <asm/opal.h>
#include <asm/xive-regs.h>
+/* Sign-extend HDEC if not on POWER9 */
+#define EXTEND_HDEC(reg) \
+BEGIN_FTR_SECTION; \
+ extsw reg, reg; \
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
+
#define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM)
/* Values in HSTATE_NAPPING(r13) */
#define NAPPING_CEDE 1
#define NAPPING_NOVCPU 2
+/* Stack frame offsets for kvmppc_hv_entry */
+#define SFS 160
+#define STACK_SLOT_TRAP (SFS-4)
+#define STACK_SLOT_TID (SFS-16)
+#define STACK_SLOT_PSSCR (SFS-24)
+#define STACK_SLOT_PID (SFS-32)
+#define STACK_SLOT_IAMR (SFS-40)
+#define STACK_SLOT_CIABR (SFS-48)
+#define STACK_SLOT_DAWR (SFS-56)
+#define STACK_SLOT_DAWRX (SFS-64)
+#define STACK_SLOT_HFSCR (SFS-72)
+
/*
* Call kvmppc_hv_entry in real mode.
* Must be called with interrupts hard-disabled.
@@ -51,6 +69,7 @@ _GLOBAL_TOC(kvmppc_hv_entry_trampoline)
std r0, PPC_LR_STKOFF(r1)
stdu r1, -112(r1)
mfmsr r10
+ std r10, HSTATE_HOST_MSR(r13)
LOAD_REG_ADDR(r5, kvmppc_call_hv_entry)
li r0,MSR_RI
andc r0,r10,r0
@@ -135,20 +154,21 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
stb r0, HSTATE_HWTHREAD_REQ(r13)
/*
- * For external and machine check interrupts, we need
- * to call the Linux handler to process the interrupt.
- * We do that by jumping to absolute address 0x500 for
- * external interrupts, or the machine_check_fwnmi label
- * for machine checks (since firmware might have patched
- * the vector area at 0x200). The [h]rfid at the end of the
- * handler will return to the book3s_hv_interrupts.S code.
- * For other interrupts we do the rfid to get back
- * to the book3s_hv_interrupts.S code here.
+ * For external interrupts we need to call the Linux
+ * handler to process the interrupt. We do that by jumping
+ * to absolute address 0x500 for external interrupts.
+ * The [h]rfid at the end of the handler will return to
+ * the book3s_hv_interrupts.S code. For other interrupts
+ * we do the rfid to get back to the book3s_hv_interrupts.S
+ * code here.
*/
ld r8, 112+PPC_LR_STKOFF(r1)
addi r1, r1, 112
ld r7, HSTATE_HOST_MSR(r13)
+ /* Return the trap number on this thread as the return value */
+ mr r3, r12
+
/*
* If we came back from the guest via a relocation-on interrupt,
* we will be in virtual mode at this point, which makes it a
@@ -158,62 +178,25 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
andi. r0, r0, MSR_IR /* in real mode? */
bne .Lvirt_return
- cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK
- cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
- beq 11f
- cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL
- beq 15f /* Invoke the H_DOORBELL handler */
- cmpwi cr2, r12, BOOK3S_INTERRUPT_HMI
- beq cr2, 14f /* HMI check */
-
- /* RFI into the highmem handler, or branch to interrupt handler */
+ /* RFI into the highmem handler */
mfmsr r6
li r0, MSR_RI
andc r6, r6, r0
mtmsrd r6, 1 /* Clear RI in MSR */
mtsrr0 r8
mtsrr1 r7
- beq cr1, 13f /* machine check */
RFI
- /* On POWER7, we have external interrupts set to use HSRR0/1 */
-11: mtspr SPRN_HSRR0, r8
- mtspr SPRN_HSRR1, r7
- ba 0x500
-
-13: b machine_check_fwnmi
-
-14: mtspr SPRN_HSRR0, r8
- mtspr SPRN_HSRR1, r7
- b hmi_exception_after_realmode
-
-15: mtspr SPRN_HSRR0, r8
- mtspr SPRN_HSRR1, r7
- ba 0xe80
-
- /* Virtual-mode return - can't get here for HMI or machine check */
+ /* Virtual-mode return */
.Lvirt_return:
- cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
- beq 16f
- cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL
- beq 17f
- andi. r0, r7, MSR_EE /* were interrupts hard-enabled? */
- beq 18f
- mtmsrd r7, 1 /* if so then re-enable them */
-18: mtlr r8
+ mtlr r8
blr
-16: mtspr SPRN_HSRR0, r8 /* jump to reloc-on external vector */
- mtspr SPRN_HSRR1, r7
- b exc_virt_0x4500_hardware_interrupt
-
-17: mtspr SPRN_HSRR0, r8
- mtspr SPRN_HSRR1, r7
- b exc_virt_0x4e80_h_doorbell
-
kvmppc_primary_no_guest:
/* We handle this much like a ceded vcpu */
/* put the HDEC into the DEC, since HDEC interrupts don't wake us */
+ /* HDEC may be larger than DEC for arch >= v3.00, but since the */
+ /* HDEC value came from DEC in the first place, it will fit */
mfspr r3, SPRN_HDEC
mtspr SPRN_DEC, r3
/*
@@ -295,8 +278,9 @@ kvm_novcpu_wakeup:
/* See if our timeslice has expired (HDEC is negative) */
mfspr r0, SPRN_HDEC
+ EXTEND_HDEC(r0)
li r12, BOOK3S_INTERRUPT_HV_DECREMENTER
- cmpwi r0, 0
+ cmpdi r0, 0
blt kvm_novcpu_exit
/* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */
@@ -319,25 +303,31 @@ kvm_novcpu_exit:
bl kvmhv_accumulate_time
#endif
13: mr r3, r12
- stw r12, 112-4(r1)
+ stw r12, STACK_SLOT_TRAP(r1)
bl kvmhv_commence_exit
nop
- lwz r12, 112-4(r1)
+ lwz r12, STACK_SLOT_TRAP(r1)
b kvmhv_switch_to_host
/*
* We come in here when wakened from nap mode.
* Relocation is off and most register values are lost.
* r13 points to the PACA.
+ * r3 contains the SRR1 wakeup value, SRR1 is trashed.
*/
.globl kvm_start_guest
kvm_start_guest:
-
/* Set runlatch bit the minute you wake up from nap */
mfspr r0, SPRN_CTRLF
ori r0, r0, 1
mtspr SPRN_CTRLT, r0
+ /*
+ * Could avoid this and pass it through in r3. For now,
+ * code expects it to be in SRR1.
+ */
+ mtspr SPRN_SRR1,r3
+
ld r2,PACATOC(r13)
li r0,KVM_HWTHREAD_IN_KVM
@@ -390,8 +380,8 @@ kvm_secondary_got_guest:
lbz r4, HSTATE_PTID(r13)
cmpwi r4, 0
bne 63f
- lis r6, 0x7fff
- ori r6, r6, 0xffff
+ LOAD_REG_ADDR(r6, decrementer_max)
+ ld r6, 0(r6)
mtspr SPRN_HDEC, r6
/* and set per-LPAR registers, if doing dynamic micro-threading */
ld r6, HSTATE_SPLIT_MODE(r13)
@@ -456,13 +446,15 @@ kvm_no_guest:
/*
* We jump to pnv_wakeup_loss, which will return to the caller
* of power7_nap in the powernv cpu offline loop. The value we
- * put in r3 becomes the return value for power7_nap.
+ * put in r3 becomes the return value for power7_nap. pnv_wakeup_loss
+ * requires SRR1 in r12.
*/
li r3, LPCR_PECE0
mfspr r4, SPRN_LPCR
rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1
mtspr SPRN_LPCR, r4
li r3, 0
+ mfspr r12,SPRN_SRR1
b pnv_wakeup_loss
53: HMT_LOW
@@ -545,11 +537,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
* *
*****************************************************************************/
-/* Stack frame offsets */
-#define STACK_SLOT_TID (112-16)
-#define STACK_SLOT_PSSCR (112-24)
-#define STACK_SLOT_PID (112-32)
-
.global kvmppc_hv_entry
kvmppc_hv_entry:
@@ -565,7 +552,7 @@ kvmppc_hv_entry:
*/
mflr r0
std r0, PPC_LR_STKOFF(r1)
- stdu r1, -112(r1)
+ stdu r1, -SFS(r1)
/* Save R1 in the PACA */
std r1, HSTATE_HOST_R1(r13)
@@ -749,10 +736,22 @@ BEGIN_FTR_SECTION
mfspr r5, SPRN_TIDR
mfspr r6, SPRN_PSSCR
mfspr r7, SPRN_PID
+ mfspr r8, SPRN_IAMR
std r5, STACK_SLOT_TID(r1)
std r6, STACK_SLOT_PSSCR(r1)
std r7, STACK_SLOT_PID(r1)
+ std r8, STACK_SLOT_IAMR(r1)
+ mfspr r5, SPRN_HFSCR
+ std r5, STACK_SLOT_HFSCR(r1)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+BEGIN_FTR_SECTION
+ mfspr r5, SPRN_CIABR
+ mfspr r6, SPRN_DAWR
+ mfspr r7, SPRN_DAWRX
+ std r5, STACK_SLOT_CIABR(r1)
+ std r6, STACK_SLOT_DAWR(r1)
+ std r7, STACK_SLOT_DAWRX(r1)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
BEGIN_FTR_SECTION
/* Set partition DABR */
@@ -895,8 +894,10 @@ FTR_SECTION_ELSE
ld r5, VCPU_TID(r4)
ld r6, VCPU_PSSCR(r4)
oris r6, r6, PSSCR_EC@h /* This makes stop trap to HV */
+ ld r7, VCPU_HFSCR(r4)
mtspr SPRN_TIDR, r5
mtspr SPRN_PSSCR, r6
+ mtspr SPRN_HFSCR, r7
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
8:
@@ -911,7 +912,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
mftb r7
subf r3,r7,r8
mtspr SPRN_DEC,r3
- stw r3,VCPU_DEC(r4)
+ std r3,VCPU_DEC(r4)
ld r5, VCPU_SPRG0(r4)
ld r6, VCPU_SPRG1(r4)
@@ -968,7 +969,8 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
/* Check if HDEC expires soon */
mfspr r3, SPRN_HDEC
- cmpwi r3, 512 /* 1 microsecond */
+ EXTEND_HDEC(r3)
+ cmpdi r3, 512 /* 1 microsecond */
blt hdec_soon
#ifdef CONFIG_KVM_XICS
@@ -1022,7 +1024,13 @@ kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */
li r0, BOOK3S_INTERRUPT_EXTERNAL
bne cr1, 12f
mfspr r0, SPRN_DEC
- cmpwi r0, 0
+BEGIN_FTR_SECTION
+ /* On POWER9 check whether the guest has large decrementer enabled */
+ andis. r8, r8, LPCR_LD@h
+ bne 15f
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+ extsw r0, r0
+15: cmpdi r0, 0
li r0, BOOK3S_INTERRUPT_DECREMENTER
bge 5f
@@ -1032,6 +1040,23 @@ kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */
mr r9, r4
bl kvmppc_msr_interrupt
5:
+BEGIN_FTR_SECTION
+ b fast_guest_return
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
+ /* On POWER9, check for pending doorbell requests */
+ lbz r0, VCPU_DBELL_REQ(r4)
+ cmpwi r0, 0
+ beq fast_guest_return
+ ld r5, HSTATE_KVM_VCORE(r13)
+ /* Set DPDES register so the CPU will take a doorbell interrupt */
+ li r0, 1
+ mtspr SPRN_DPDES, r0
+ std r0, VCORE_DPDES(r5)
+ /* Make sure other cpus see vcore->dpdes set before dbell req clear */
+ lwsync
+ /* Clear the pending doorbell request */
+ li r0, 0
+ stb r0, VCPU_DBELL_REQ(r4)
/*
* Required state:
@@ -1206,6 +1231,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
stw r12,VCPU_TRAP(r9)
+ /*
+ * Now that we have saved away SRR0/1 and HSRR0/1,
+ * interrupts are recoverable in principle, so set MSR_RI.
+ * This becomes important for relocation-on interrupts from
+ * the guest, which we can get in radix mode on POWER9.
+ */
+ li r0, MSR_RI
+ mtmsrd r0, 1
+
#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
addi r3, r9, VCPU_TB_RMINTR
mr r4, r9
@@ -1262,6 +1296,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
beq 4f
b guest_exit_cont
3:
+ /* If it's a hypervisor facility unavailable interrupt, save HFSCR */
+ cmpwi r12, BOOK3S_INTERRUPT_H_FAC_UNAVAIL
+ bne 14f
+ mfspr r3, SPRN_HFSCR
+ std r3, VCPU_HFSCR(r9)
+ b guest_exit_cont
+14:
/* External interrupt ? */
cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
bne+ guest_exit_cont
@@ -1449,12 +1490,18 @@ mc_cont:
mtspr SPRN_SPURR,r4
/* Save DEC */
+ ld r3, HSTATE_KVM_VCORE(r13)
mfspr r5,SPRN_DEC
mftb r6
+ /* On P9, if the guest has large decr enabled, don't sign extend */
+BEGIN_FTR_SECTION
+ ld r4, VCORE_LPCR(r3)
+ andis. r4, r4, LPCR_LD@h
+ bne 16f
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
extsw r5,r5
- add r5,r5,r6
+16: add r5,r5,r6
/* r5 is a guest timebase value here, convert to host TB */
- ld r3,HSTATE_KVM_VCORE(r13)
ld r4,VCORE_TB_OFFSET(r3)
subf r5,r4,r5
std r5,VCPU_DEC_EXPIRES(r9)
@@ -1499,17 +1546,19 @@ FTR_SECTION_ELSE
rldicl r6, r6, 4, 50 /* r6 &= PSSCR_GUEST_VIS */
rotldi r6, r6, 60
std r6, VCPU_PSSCR(r9)
+ /* Restore host HFSCR value */
+ ld r7, STACK_SLOT_HFSCR(r1)
+ mtspr SPRN_HFSCR, r7
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
/*
* Restore various registers to 0, where non-zero values
* set by the guest could disrupt the host.
*/
li r0, 0
- mtspr SPRN_IAMR, r0
- mtspr SPRN_CIABR, r0
- mtspr SPRN_DAWRX, r0
+ mtspr SPRN_PSPB, r0
mtspr SPRN_WORT, r0
BEGIN_FTR_SECTION
+ mtspr SPRN_IAMR, r0
mtspr SPRN_TCSCR, r0
/* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */
li r0, 1
@@ -1525,6 +1574,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
std r6,VCPU_UAMOR(r9)
li r6,0
mtspr SPRN_AMR,r6
+ mtspr SPRN_UAMOR, r6
/* Switch DSCR back to host value */
mfspr r8, SPRN_DSCR
@@ -1670,12 +1720,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
/* Restore host values of some registers */
BEGIN_FTR_SECTION
+ ld r5, STACK_SLOT_CIABR(r1)
+ ld r6, STACK_SLOT_DAWR(r1)
+ ld r7, STACK_SLOT_DAWRX(r1)
+ mtspr SPRN_CIABR, r5
+ mtspr SPRN_DAWR, r6
+ mtspr SPRN_DAWRX, r7
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+BEGIN_FTR_SECTION
ld r5, STACK_SLOT_TID(r1)
ld r6, STACK_SLOT_PSSCR(r1)
ld r7, STACK_SLOT_PID(r1)
+ ld r8, STACK_SLOT_IAMR(r1)
mtspr SPRN_TIDR, r5
mtspr SPRN_PSSCR, r6
mtspr SPRN_PID, r7
+ mtspr SPRN_IAMR, r8
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
BEGIN_FTR_SECTION
PPC_INVALIDATE_ERAT
@@ -1819,8 +1879,8 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
li r0, KVM_GUEST_MODE_NONE
stb r0, HSTATE_IN_GUEST(r13)
- ld r0, 112+PPC_LR_STKOFF(r1)
- addi r1, r1, 112
+ ld r0, SFS+PPC_LR_STKOFF(r1)
+ addi r1, r1, SFS
mtlr r0
blr
@@ -2366,12 +2426,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
mfspr r3, SPRN_DEC
mfspr r4, SPRN_HDEC
mftb r5
- cmpw r3, r4
+BEGIN_FTR_SECTION
+ /* On P9 check whether the guest has large decrementer mode enabled */
+ ld r6, HSTATE_KVM_VCORE(r13)
+ ld r6, VCORE_LPCR(r6)
+ andis. r6, r6, LPCR_LD@h
+ bne 68f
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+ extsw r3, r3
+68: EXTEND_HDEC(r4)
+ cmpd r3, r4
ble 67f
mtspr SPRN_DEC, r4
67:
/* save expiry time of guest decrementer */
- extsw r3, r3
add r3, r3, r5
ld r4, HSTATE_KVM_VCPU(r13)
ld r5, HSTATE_KVM_VCORE(r13)
@@ -2552,22 +2620,32 @@ machine_check_realmode:
ld r9, HSTATE_KVM_VCPU(r13)
li r12, BOOK3S_INTERRUPT_MACHINE_CHECK
/*
- * Deliver unhandled/fatal (e.g. UE) MCE errors to guest through
- * machine check interrupt (set HSRR0 to 0x200). And for handled
- * errors (no-fatal), just go back to guest execution with current
- * HSRR0 instead of exiting guest. This new approach will inject
- * machine check to guest for fatal error causing guest to crash.
- *
- * The old code used to return to host for unhandled errors which
- * was causing guest to hang with soft lockups inside guest and
- * makes it difficult to recover guest instance.
+ * For the guest that is FWNMI capable, deliver all the MCE errors
+ * (handled/unhandled) by exiting the guest with KVM_EXIT_NMI exit
+ * reason. This new approach injects machine check errors in guest
+ * address space to guest with additional information in the form
+ * of RTAS event, thus enabling guest kernel to suitably handle
+ * such errors.
*
+ * For the guest that is not FWNMI capable (old QEMU) fallback
+ * to old behaviour for backward compatibility:
+ * Deliver unhandled/fatal (e.g. UE) MCE errors to guest either
+ * through machine check interrupt (set HSRR0 to 0x200).
+ * For handled errors (no-fatal), just go back to guest execution
+ * with current HSRR0.
* if we receive machine check with MSR(RI=0) then deliver it to
* guest as machine check causing guest to crash.
*/
ld r11, VCPU_MSR(r9)
rldicl. r0, r11, 64-MSR_HV_LG, 63 /* check if it happened in HV mode */
bne mc_cont /* if so, exit to host */
+ /* Check if guest is capable of handling NMI exit */
+ ld r10, VCPU_KVM(r9)
+ lbz r10, KVM_FWNMI(r10)
+ cmpdi r10, 1 /* FWNMI capable? */
+ beq mc_cont /* if so, exit with KVM_EXIT_NMI. */
+
+ /* if not, fall through for backward compatibility. */
andi. r10, r11, MSR_RI /* check for unrecoverable exception */
beq 1f /* Deliver a machine check to guest */
ld r10, VCPU_PC(r9)