diff options
Diffstat (limited to 'arch/arm64/kvm/hyp.S')
-rw-r--r-- | arch/arm64/kvm/hyp.S | 656 |
1 files changed, 236 insertions, 420 deletions
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 17a8fb14f428..e5836138ec42 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -230,199 +230,52 @@ stp x24, x25, [x3, #160] .endm -.macro save_debug - // x2: base address for cpu context - // x3: tmp register - - mrs x26, id_aa64dfr0_el1 - ubfx x24, x26, #12, #4 // Extract BRPs - ubfx x25, x26, #20, #4 // Extract WRPs - mov w26, #15 - sub w24, w26, w24 // How many BPs to skip - sub w25, w26, w25 // How many WPs to skip - - add x3, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1) - - adr x26, 1f - add x26, x26, x24, lsl #2 - br x26 -1: - mrs x20, dbgbcr15_el1 - mrs x19, dbgbcr14_el1 - mrs x18, dbgbcr13_el1 - mrs x17, dbgbcr12_el1 - mrs x16, dbgbcr11_el1 - mrs x15, dbgbcr10_el1 - mrs x14, dbgbcr9_el1 - mrs x13, dbgbcr8_el1 - mrs x12, dbgbcr7_el1 - mrs x11, dbgbcr6_el1 - mrs x10, dbgbcr5_el1 - mrs x9, dbgbcr4_el1 - mrs x8, dbgbcr3_el1 - mrs x7, dbgbcr2_el1 - mrs x6, dbgbcr1_el1 - mrs x5, dbgbcr0_el1 - - adr x26, 1f - add x26, x26, x24, lsl #2 - br x26 - -1: - str x20, [x3, #(15 * 8)] - str x19, [x3, #(14 * 8)] - str x18, [x3, #(13 * 8)] - str x17, [x3, #(12 * 8)] - str x16, [x3, #(11 * 8)] - str x15, [x3, #(10 * 8)] - str x14, [x3, #(9 * 8)] - str x13, [x3, #(8 * 8)] - str x12, [x3, #(7 * 8)] - str x11, [x3, #(6 * 8)] - str x10, [x3, #(5 * 8)] - str x9, [x3, #(4 * 8)] - str x8, [x3, #(3 * 8)] - str x7, [x3, #(2 * 8)] - str x6, [x3, #(1 * 8)] - str x5, [x3, #(0 * 8)] - - add x3, x2, #CPU_SYSREG_OFFSET(DBGBVR0_EL1) - - adr x26, 1f - add x26, x26, x24, lsl #2 - br x26 -1: - mrs x20, dbgbvr15_el1 - mrs x19, dbgbvr14_el1 - mrs x18, dbgbvr13_el1 - mrs x17, dbgbvr12_el1 - mrs x16, dbgbvr11_el1 - mrs x15, dbgbvr10_el1 - mrs x14, dbgbvr9_el1 - mrs x13, dbgbvr8_el1 - mrs x12, dbgbvr7_el1 - mrs x11, dbgbvr6_el1 - mrs x10, dbgbvr5_el1 - mrs x9, dbgbvr4_el1 - mrs x8, dbgbvr3_el1 - mrs x7, dbgbvr2_el1 - mrs x6, dbgbvr1_el1 - mrs x5, dbgbvr0_el1 - - adr x26, 1f - add x26, x26, x24, lsl #2 - br x26 - +.macro save_debug type + // x4: pointer to register set + // x5: number of registers to skip + // x6..x22 trashed + + adr x22, 1f + add x22, x22, x5, lsl #2 + br x22 1: - str x20, [x3, #(15 * 8)] - str x19, [x3, #(14 * 8)] - str x18, [x3, #(13 * 8)] - str x17, [x3, #(12 * 8)] - str x16, [x3, #(11 * 8)] - str x15, [x3, #(10 * 8)] - str x14, [x3, #(9 * 8)] - str x13, [x3, #(8 * 8)] - str x12, [x3, #(7 * 8)] - str x11, [x3, #(6 * 8)] - str x10, [x3, #(5 * 8)] - str x9, [x3, #(4 * 8)] - str x8, [x3, #(3 * 8)] - str x7, [x3, #(2 * 8)] - str x6, [x3, #(1 * 8)] - str x5, [x3, #(0 * 8)] - - add x3, x2, #CPU_SYSREG_OFFSET(DBGWCR0_EL1) - - adr x26, 1f - add x26, x26, x25, lsl #2 - br x26 + mrs x21, \type\()15_el1 + mrs x20, \type\()14_el1 + mrs x19, \type\()13_el1 + mrs x18, \type\()12_el1 + mrs x17, \type\()11_el1 + mrs x16, \type\()10_el1 + mrs x15, \type\()9_el1 + mrs x14, \type\()8_el1 + mrs x13, \type\()7_el1 + mrs x12, \type\()6_el1 + mrs x11, \type\()5_el1 + mrs x10, \type\()4_el1 + mrs x9, \type\()3_el1 + mrs x8, \type\()2_el1 + mrs x7, \type\()1_el1 + mrs x6, \type\()0_el1 + + adr x22, 1f + add x22, x22, x5, lsl #2 + br x22 1: - mrs x20, dbgwcr15_el1 - mrs x19, dbgwcr14_el1 - mrs x18, dbgwcr13_el1 - mrs x17, dbgwcr12_el1 - mrs x16, dbgwcr11_el1 - mrs x15, dbgwcr10_el1 - mrs x14, dbgwcr9_el1 - mrs x13, dbgwcr8_el1 - mrs x12, dbgwcr7_el1 - mrs x11, dbgwcr6_el1 - mrs x10, dbgwcr5_el1 - mrs x9, dbgwcr4_el1 - mrs x8, dbgwcr3_el1 - mrs x7, dbgwcr2_el1 - mrs x6, dbgwcr1_el1 - mrs x5, dbgwcr0_el1 - - adr x26, 1f - add x26, x26, x25, lsl #2 - br x26 - -1: - str x20, [x3, #(15 * 8)] - str x19, [x3, #(14 * 8)] - str x18, [x3, #(13 * 8)] - str x17, [x3, #(12 * 8)] - str x16, [x3, #(11 * 8)] - str x15, [x3, #(10 * 8)] - str x14, [x3, #(9 * 8)] - str x13, [x3, #(8 * 8)] - str x12, [x3, #(7 * 8)] - str x11, [x3, #(6 * 8)] - str x10, [x3, #(5 * 8)] - str x9, [x3, #(4 * 8)] - str x8, [x3, #(3 * 8)] - str x7, [x3, #(2 * 8)] - str x6, [x3, #(1 * 8)] - str x5, [x3, #(0 * 8)] - - add x3, x2, #CPU_SYSREG_OFFSET(DBGWVR0_EL1) - - adr x26, 1f - add x26, x26, x25, lsl #2 - br x26 -1: - mrs x20, dbgwvr15_el1 - mrs x19, dbgwvr14_el1 - mrs x18, dbgwvr13_el1 - mrs x17, dbgwvr12_el1 - mrs x16, dbgwvr11_el1 - mrs x15, dbgwvr10_el1 - mrs x14, dbgwvr9_el1 - mrs x13, dbgwvr8_el1 - mrs x12, dbgwvr7_el1 - mrs x11, dbgwvr6_el1 - mrs x10, dbgwvr5_el1 - mrs x9, dbgwvr4_el1 - mrs x8, dbgwvr3_el1 - mrs x7, dbgwvr2_el1 - mrs x6, dbgwvr1_el1 - mrs x5, dbgwvr0_el1 - - adr x26, 1f - add x26, x26, x25, lsl #2 - br x26 - -1: - str x20, [x3, #(15 * 8)] - str x19, [x3, #(14 * 8)] - str x18, [x3, #(13 * 8)] - str x17, [x3, #(12 * 8)] - str x16, [x3, #(11 * 8)] - str x15, [x3, #(10 * 8)] - str x14, [x3, #(9 * 8)] - str x13, [x3, #(8 * 8)] - str x12, [x3, #(7 * 8)] - str x11, [x3, #(6 * 8)] - str x10, [x3, #(5 * 8)] - str x9, [x3, #(4 * 8)] - str x8, [x3, #(3 * 8)] - str x7, [x3, #(2 * 8)] - str x6, [x3, #(1 * 8)] - str x5, [x3, #(0 * 8)] - - mrs x21, mdccint_el1 - str x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)] + str x21, [x4, #(15 * 8)] + str x20, [x4, #(14 * 8)] + str x19, [x4, #(13 * 8)] + str x18, [x4, #(12 * 8)] + str x17, [x4, #(11 * 8)] + str x16, [x4, #(10 * 8)] + str x15, [x4, #(9 * 8)] + str x14, [x4, #(8 * 8)] + str x13, [x4, #(7 * 8)] + str x12, [x4, #(6 * 8)] + str x11, [x4, #(5 * 8)] + str x10, [x4, #(4 * 8)] + str x9, [x4, #(3 * 8)] + str x8, [x4, #(2 * 8)] + str x7, [x4, #(1 * 8)] + str x6, [x4, #(0 * 8)] .endm .macro restore_sysregs @@ -467,195 +320,52 @@ msr mdscr_el1, x25 .endm -.macro restore_debug - // x2: base address for cpu context - // x3: tmp register +.macro restore_debug type + // x4: pointer to register set + // x5: number of registers to skip + // x6..x22 trashed - mrs x26, id_aa64dfr0_el1 - ubfx x24, x26, #12, #4 // Extract BRPs - ubfx x25, x26, #20, #4 // Extract WRPs - mov w26, #15 - sub w24, w26, w24 // How many BPs to skip - sub w25, w26, w25 // How many WPs to skip - - add x3, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1) - - adr x26, 1f - add x26, x26, x24, lsl #2 - br x26 -1: - ldr x20, [x3, #(15 * 8)] - ldr x19, [x3, #(14 * 8)] - ldr x18, [x3, #(13 * 8)] - ldr x17, [x3, #(12 * 8)] - ldr x16, [x3, #(11 * 8)] - ldr x15, [x3, #(10 * 8)] - ldr x14, [x3, #(9 * 8)] - ldr x13, [x3, #(8 * 8)] - ldr x12, [x3, #(7 * 8)] - ldr x11, [x3, #(6 * 8)] - ldr x10, [x3, #(5 * 8)] - ldr x9, [x3, #(4 * 8)] - ldr x8, [x3, #(3 * 8)] - ldr x7, [x3, #(2 * 8)] - ldr x6, [x3, #(1 * 8)] - ldr x5, [x3, #(0 * 8)] - - adr x26, 1f - add x26, x26, x24, lsl #2 - br x26 -1: - msr dbgbcr15_el1, x20 - msr dbgbcr14_el1, x19 - msr dbgbcr13_el1, x18 - msr dbgbcr12_el1, x17 - msr dbgbcr11_el1, x16 - msr dbgbcr10_el1, x15 - msr dbgbcr9_el1, x14 - msr dbgbcr8_el1, x13 - msr dbgbcr7_el1, x12 - msr dbgbcr6_el1, x11 - msr dbgbcr5_el1, x10 - msr dbgbcr4_el1, x9 - msr dbgbcr3_el1, x8 - msr dbgbcr2_el1, x7 - msr dbgbcr1_el1, x6 - msr dbgbcr0_el1, x5 - - add x3, x2, #CPU_SYSREG_OFFSET(DBGBVR0_EL1) - - adr x26, 1f - add x26, x26, x24, lsl #2 - br x26 -1: - ldr x20, [x3, #(15 * 8)] - ldr x19, [x3, #(14 * 8)] - ldr x18, [x3, #(13 * 8)] - ldr x17, [x3, #(12 * 8)] - ldr x16, [x3, #(11 * 8)] - ldr x15, [x3, #(10 * 8)] - ldr x14, [x3, #(9 * 8)] - ldr x13, [x3, #(8 * 8)] - ldr x12, [x3, #(7 * 8)] - ldr x11, [x3, #(6 * 8)] - ldr x10, [x3, #(5 * 8)] - ldr x9, [x3, #(4 * 8)] - ldr x8, [x3, #(3 * 8)] - ldr x7, [x3, #(2 * 8)] - ldr x6, [x3, #(1 * 8)] - ldr x5, [x3, #(0 * 8)] - - adr x26, 1f - add x26, x26, x24, lsl #2 - br x26 -1: - msr dbgbvr15_el1, x20 - msr dbgbvr14_el1, x19 - msr dbgbvr13_el1, x18 - msr dbgbvr12_el1, x17 - msr dbgbvr11_el1, x16 - msr dbgbvr10_el1, x15 - msr dbgbvr9_el1, x14 - msr dbgbvr8_el1, x13 - msr dbgbvr7_el1, x12 - msr dbgbvr6_el1, x11 - msr dbgbvr5_el1, x10 - msr dbgbvr4_el1, x9 - msr dbgbvr3_el1, x8 - msr dbgbvr2_el1, x7 - msr dbgbvr1_el1, x6 - msr dbgbvr0_el1, x5 - - add x3, x2, #CPU_SYSREG_OFFSET(DBGWCR0_EL1) - - adr x26, 1f - add x26, x26, x25, lsl #2 - br x26 -1: - ldr x20, [x3, #(15 * 8)] - ldr x19, [x3, #(14 * 8)] - ldr x18, [x3, #(13 * 8)] - ldr x17, [x3, #(12 * 8)] - ldr x16, [x3, #(11 * 8)] - ldr x15, [x3, #(10 * 8)] - ldr x14, [x3, #(9 * 8)] - ldr x13, [x3, #(8 * 8)] - ldr x12, [x3, #(7 * 8)] - ldr x11, [x3, #(6 * 8)] - ldr x10, [x3, #(5 * 8)] - ldr x9, [x3, #(4 * 8)] - ldr x8, [x3, #(3 * 8)] - ldr x7, [x3, #(2 * 8)] - ldr x6, [x3, #(1 * 8)] - ldr x5, [x3, #(0 * 8)] - - adr x26, 1f - add x26, x26, x25, lsl #2 - br x26 + adr x22, 1f + add x22, x22, x5, lsl #2 + br x22 1: - msr dbgwcr15_el1, x20 - msr dbgwcr14_el1, x19 - msr dbgwcr13_el1, x18 - msr dbgwcr12_el1, x17 - msr dbgwcr11_el1, x16 - msr dbgwcr10_el1, x15 - msr dbgwcr9_el1, x14 - msr dbgwcr8_el1, x13 - msr dbgwcr7_el1, x12 - msr dbgwcr6_el1, x11 - msr dbgwcr5_el1, x10 - msr dbgwcr4_el1, x9 - msr dbgwcr3_el1, x8 - msr dbgwcr2_el1, x7 - msr dbgwcr1_el1, x6 - msr dbgwcr0_el1, x5 - - add x3, x2, #CPU_SYSREG_OFFSET(DBGWVR0_EL1) - - adr x26, 1f - add x26, x26, x25, lsl #2 - br x26 + ldr x21, [x4, #(15 * 8)] + ldr x20, [x4, #(14 * 8)] + ldr x19, [x4, #(13 * 8)] + ldr x18, [x4, #(12 * 8)] + ldr x17, [x4, #(11 * 8)] + ldr x16, [x4, #(10 * 8)] + ldr x15, [x4, #(9 * 8)] + ldr x14, [x4, #(8 * 8)] + ldr x13, [x4, #(7 * 8)] + ldr x12, [x4, #(6 * 8)] + ldr x11, [x4, #(5 * 8)] + ldr x10, [x4, #(4 * 8)] + ldr x9, [x4, #(3 * 8)] + ldr x8, [x4, #(2 * 8)] + ldr x7, [x4, #(1 * 8)] + ldr x6, [x4, #(0 * 8)] + + adr x22, 1f + add x22, x22, x5, lsl #2 + br x22 1: - ldr x20, [x3, #(15 * 8)] - ldr x19, [x3, #(14 * 8)] - ldr x18, [x3, #(13 * 8)] - ldr x17, [x3, #(12 * 8)] - ldr x16, [x3, #(11 * 8)] - ldr x15, [x3, #(10 * 8)] - ldr x14, [x3, #(9 * 8)] - ldr x13, [x3, #(8 * 8)] - ldr x12, [x3, #(7 * 8)] - ldr x11, [x3, #(6 * 8)] - ldr x10, [x3, #(5 * 8)] - ldr x9, [x3, #(4 * 8)] - ldr x8, [x3, #(3 * 8)] - ldr x7, [x3, #(2 * 8)] - ldr x6, [x3, #(1 * 8)] - ldr x5, [x3, #(0 * 8)] - - adr x26, 1f - add x26, x26, x25, lsl #2 - br x26 -1: - msr dbgwvr15_el1, x20 - msr dbgwvr14_el1, x19 - msr dbgwvr13_el1, x18 - msr dbgwvr12_el1, x17 - msr dbgwvr11_el1, x16 - msr dbgwvr10_el1, x15 - msr dbgwvr9_el1, x14 - msr dbgwvr8_el1, x13 - msr dbgwvr7_el1, x12 - msr dbgwvr6_el1, x11 - msr dbgwvr5_el1, x10 - msr dbgwvr4_el1, x9 - msr dbgwvr3_el1, x8 - msr dbgwvr2_el1, x7 - msr dbgwvr1_el1, x6 - msr dbgwvr0_el1, x5 - - ldr x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)] - msr mdccint_el1, x21 + msr \type\()15_el1, x21 + msr \type\()14_el1, x20 + msr \type\()13_el1, x19 + msr \type\()12_el1, x18 + msr \type\()11_el1, x17 + msr \type\()10_el1, x16 + msr \type\()9_el1, x15 + msr \type\()8_el1, x14 + msr \type\()7_el1, x13 + msr \type\()6_el1, x12 + msr \type\()5_el1, x11 + msr \type\()4_el1, x10 + msr \type\()3_el1, x9 + msr \type\()2_el1, x8 + msr \type\()1_el1, x7 + msr \type\()0_el1, x6 .endm .macro skip_32bit_state tmp, target @@ -675,6 +385,14 @@ tbz \tmp, #KVM_ARM64_DEBUG_DIRTY_SHIFT, \target .endm +/* + * Branch to target if CPTR_EL2.TFP bit is set (VFP/SIMD trapping enabled) + */ +.macro skip_fpsimd_state tmp, target + mrs \tmp, cptr_el2 + tbnz \tmp, #CPTR_EL2_TFP_SHIFT, \target +.endm + .macro compute_debug_state target // Compute debug state: If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY // is set, we do a full save/restore cycle and disable trapping. @@ -713,20 +431,15 @@ add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2) mrs x4, dacr32_el2 mrs x5, ifsr32_el2 - mrs x6, fpexc32_el2 stp x4, x5, [x3] - str x6, [x3, #16] - skip_debug_state x8, 2f + skip_fpsimd_state x8, 2f + mrs x6, fpexc32_el2 + str x6, [x3, #16] +2: + skip_debug_state x8, 1f mrs x7, dbgvcr32_el2 str x7, [x3, #24] -2: - skip_tee_state x8, 1f - - add x3, x2, #CPU_SYSREG_OFFSET(TEECR32_EL1) - mrs x4, teecr32_el1 - mrs x5, teehbr32_el1 - stp x4, x5, [x3] 1: .endm @@ -743,51 +456,46 @@ add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2) ldp x4, x5, [x3] - ldr x6, [x3, #16] msr dacr32_el2, x4 msr ifsr32_el2, x5 - msr fpexc32_el2, x6 - skip_debug_state x8, 2f + skip_debug_state x8, 1f ldr x7, [x3, #24] msr dbgvcr32_el2, x7 -2: - skip_tee_state x8, 1f - - add x3, x2, #CPU_SYSREG_OFFSET(TEECR32_EL1) - ldp x4, x5, [x3] - msr teecr32_el1, x4 - msr teehbr32_el1, x5 1: .endm .macro activate_traps ldr x2, [x0, #VCPU_HCR_EL2] + + /* + * We are about to set CPTR_EL2.TFP to trap all floating point + * register accesses to EL2, however, the ARM ARM clearly states that + * traps are only taken to EL2 if the operation would not otherwise + * trap to EL1. Therefore, always make sure that for 32-bit guests, + * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit. + */ + tbnz x2, #HCR_RW_SHIFT, 99f // open code skip_32bit_state + mov x3, #(1 << 30) + msr fpexc32_el2, x3 + isb +99: msr hcr_el2, x2 mov x2, #CPTR_EL2_TTA + orr x2, x2, #CPTR_EL2_TFP msr cptr_el2, x2 mov x2, #(1 << 15) // Trap CP15 Cr=15 msr hstr_el2, x2 - mrs x2, mdcr_el2 - and x2, x2, #MDCR_EL2_HPMN_MASK - orr x2, x2, #(MDCR_EL2_TPM | MDCR_EL2_TPMCR) - orr x2, x2, #(MDCR_EL2_TDRA | MDCR_EL2_TDOSA) - - // Check for KVM_ARM64_DEBUG_DIRTY, and set debug to trap - // if not dirty. - ldr x3, [x0, #VCPU_DEBUG_FLAGS] - tbnz x3, #KVM_ARM64_DEBUG_DIRTY_SHIFT, 1f - orr x2, x2, #MDCR_EL2_TDA -1: + // Monitor Debug Config - see kvm_arm_setup_debug() + ldr x2, [x0, #VCPU_MDCR_EL2] msr mdcr_el2, x2 .endm .macro deactivate_traps mov x2, #HCR_RW msr hcr_el2, x2 - msr cptr_el2, xzr msr hstr_el2, xzr mrs x2, mdcr_el2 @@ -810,7 +518,11 @@ * Call into the vgic backend for state saving */ .macro save_vgic_state - alternative_insn "bl __save_vgic_v2_state", "bl __save_vgic_v3_state", ARM64_HAS_SYSREG_GIC_CPUIF +alternative_if_not ARM64_HAS_SYSREG_GIC_CPUIF + bl __save_vgic_v2_state +alternative_else + bl __save_vgic_v3_state +alternative_endif mrs x24, hcr_el2 mov x25, #HCR_INT_OVERRIDE neg x25, x25 @@ -827,7 +539,11 @@ orr x24, x24, #HCR_INT_OVERRIDE orr x24, x24, x25 msr hcr_el2, x24 - alternative_insn "bl __restore_vgic_v2_state", "bl __restore_vgic_v3_state", ARM64_HAS_SYSREG_GIC_CPUIF +alternative_if_not ARM64_HAS_SYSREG_GIC_CPUIF + bl __restore_vgic_v2_state +alternative_else + bl __restore_vgic_v3_state +alternative_endif .endm .macro save_timer_state @@ -840,8 +556,6 @@ mrs x3, cntv_ctl_el0 and x3, x3, #3 str w3, [x0, #VCPU_TIMER_CNTV_CTL] - bic x3, x3, #1 // Clear Enable - msr cntv_ctl_el0, x3 isb @@ -849,6 +563,9 @@ str x3, [x0, #VCPU_TIMER_CNTV_CVAL] 1: + // Disable the virtual timer + msr cntv_ctl_el0, xzr + // Allow physical timer/counter access for the host mrs x2, cnthctl_el2 orr x2, x2, #3 @@ -892,21 +609,101 @@ __restore_sysregs: restore_sysregs ret +/* Save debug state */ __save_debug: - save_debug + // x2: ptr to CPU context + // x3: ptr to debug reg struct + // x4/x5/x6-22/x24-26: trashed + + mrs x26, id_aa64dfr0_el1 + ubfx x24, x26, #12, #4 // Extract BRPs + ubfx x25, x26, #20, #4 // Extract WRPs + mov w26, #15 + sub w24, w26, w24 // How many BPs to skip + sub w25, w26, w25 // How many WPs to skip + + mov x5, x24 + add x4, x3, #DEBUG_BCR + save_debug dbgbcr + add x4, x3, #DEBUG_BVR + save_debug dbgbvr + + mov x5, x25 + add x4, x3, #DEBUG_WCR + save_debug dbgwcr + add x4, x3, #DEBUG_WVR + save_debug dbgwvr + + mrs x21, mdccint_el1 + str x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)] ret +/* Restore debug state */ __restore_debug: - restore_debug + // x2: ptr to CPU context + // x3: ptr to debug reg struct + // x4/x5/x6-22/x24-26: trashed + + mrs x26, id_aa64dfr0_el1 + ubfx x24, x26, #12, #4 // Extract BRPs + ubfx x25, x26, #20, #4 // Extract WRPs + mov w26, #15 + sub w24, w26, w24 // How many BPs to skip + sub w25, w26, w25 // How many WPs to skip + + mov x5, x24 + add x4, x3, #DEBUG_BCR + restore_debug dbgbcr + add x4, x3, #DEBUG_BVR + restore_debug dbgbvr + + mov x5, x25 + add x4, x3, #DEBUG_WCR + restore_debug dbgwcr + add x4, x3, #DEBUG_WVR + restore_debug dbgwvr + + ldr x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)] + msr mdccint_el1, x21 + ret __save_fpsimd: + skip_fpsimd_state x3, 1f save_fpsimd - ret +1: ret __restore_fpsimd: + skip_fpsimd_state x3, 1f restore_fpsimd - ret +1: ret + +switch_to_guest_fpsimd: + push x4, lr + + mrs x2, cptr_el2 + bic x2, x2, #CPTR_EL2_TFP + msr cptr_el2, x2 + isb + + mrs x0, tpidr_el2 + + ldr x2, [x0, #VCPU_HOST_CONTEXT] + kern_hyp_va x2 + bl __save_fpsimd + + add x2, x0, #VCPU_CONTEXT + bl __restore_fpsimd + + skip_32bit_state x3, 1f + ldr x4, [x2, #CPU_SYSREG_OFFSET(FPEXC32_EL2)] + msr fpexc32_el2, x4 +1: + pop x4, lr + pop x2, x3 + pop x0, x1 + + eret /* * u64 __kvm_vcpu_run(struct kvm_vcpu *vcpu); @@ -928,10 +725,10 @@ ENTRY(__kvm_vcpu_run) kern_hyp_va x2 save_host_regs - bl __save_fpsimd bl __save_sysregs compute_debug_state 1f + add x3, x0, #VCPU_HOST_DEBUG_STATE bl __save_debug 1: activate_traps @@ -943,13 +740,16 @@ ENTRY(__kvm_vcpu_run) // Guest context add x2, x0, #VCPU_CONTEXT + // We must restore the 32-bit state before the sysregs, thanks + // to Cortex-A57 erratum #852523. + restore_guest_32bit_state bl __restore_sysregs - bl __restore_fpsimd skip_debug_state x3, 1f + ldr x3, [x0, #VCPU_DEBUG_PTR] + kern_hyp_va x3 bl __restore_debug 1: - restore_guest_32bit_state restore_guest_regs // That's it, no more messing around. @@ -967,6 +767,8 @@ __kvm_vcpu_return: bl __save_sysregs skip_debug_state x3, 1f + ldr x3, [x0, #VCPU_DEBUG_PTR] + kern_hyp_va x3 bl __save_debug 1: save_guest_32bit_state @@ -983,12 +785,15 @@ __kvm_vcpu_return: bl __restore_sysregs bl __restore_fpsimd + /* Clear FPSIMD and Trace trapping */ + msr cptr_el2, xzr skip_debug_state x3, 1f // Clear the dirty flag for the next run, as all the state has // already been saved. Note that we nuke the whole 64bit word. // If we ever add more flags, we'll have to be more careful... str xzr, [x0, #VCPU_DEBUG_FLAGS] + add x3, x0, #VCPU_HOST_DEBUG_STATE bl __restore_debug 1: restore_host_regs @@ -1191,6 +996,11 @@ el1_trap: * x1: ESR * x2: ESR_EC */ + + /* Guest accessed VFP/SIMD registers, save host, restore Guest */ + cmp x2, #ESR_ELx_EC_FP_ASIMD + b.eq switch_to_guest_fpsimd + cmp x2, #ESR_ELx_EC_DABT_LOW mov x0, #ESR_ELx_EC_IABT_LOW ccmp x2, x0, #4, ne @@ -1285,4 +1095,10 @@ ENTRY(__kvm_hyp_vector) ventry el1_error_invalid // Error 32-bit EL1 ENDPROC(__kvm_hyp_vector) + +ENTRY(__kvm_get_mdcr_el2) + mrs x0, mdcr_el2 + ret +ENDPROC(__kvm_get_mdcr_el2) + .popsection |