summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/Kconfig2
-rw-r--r--arch/arm/Makefile1
-rw-r--r--arch/arm/configs/axm55xx_defconfig2
-rw-r--r--arch/arm/include/asm/arch_gicv3.h114
-rw-r--r--arch/arm/include/asm/kvm_arm.h239
-rw-r--r--arch/arm/include/asm/kvm_asm.h77
-rw-r--r--arch/arm/include/asm/kvm_coproc.h36
-rw-r--r--arch/arm/include/asm/kvm_emulate.h372
-rw-r--r--arch/arm/include/asm/kvm_host.h456
-rw-r--r--arch/arm/include/asm/kvm_hyp.h127
-rw-r--r--arch/arm/include/asm/kvm_mmu.h435
-rw-r--r--arch/arm/include/asm/kvm_ras.h14
-rw-r--r--arch/arm/include/asm/pgtable-3level.h20
-rw-r--r--arch/arm/include/asm/pgtable.h9
-rw-r--r--arch/arm/include/asm/sections.h6
-rw-r--r--arch/arm/include/asm/stage2_pgtable.h75
-rw-r--r--arch/arm/include/asm/virt.h17
-rw-r--r--arch/arm/include/uapi/asm/kvm.h314
-rw-r--r--arch/arm/kernel/asm-offsets.c11
-rw-r--r--arch/arm/kernel/hyp-stub.S39
-rw-r--r--arch/arm/kernel/vmlinux-xip.lds.S8
-rw-r--r--arch/arm/kernel/vmlinux.lds.S8
-rw-r--r--arch/arm/kernel/vmlinux.lds.h10
-rw-r--r--arch/arm/kvm/Kconfig59
-rw-r--r--arch/arm/kvm/Makefile43
-rw-r--r--arch/arm/kvm/coproc.c1455
-rw-r--r--arch/arm/kvm/coproc.h130
-rw-r--r--arch/arm/kvm/coproc_a15.c39
-rw-r--r--arch/arm/kvm/coproc_a7.c42
-rw-r--r--arch/arm/kvm/emulate.c166
-rw-r--r--arch/arm/kvm/guest.c387
-rw-r--r--arch/arm/kvm/handle_exit.c175
-rw-r--r--arch/arm/kvm/hyp/Makefile34
-rw-r--r--arch/arm/kvm/hyp/banked-sr.c70
-rw-r--r--arch/arm/kvm/hyp/cp15-sr.c72
-rw-r--r--arch/arm/kvm/hyp/entry.S121
-rw-r--r--arch/arm/kvm/hyp/hyp-entry.S295
-rw-r--r--arch/arm/kvm/hyp/s2-setup.c22
-rw-r--r--arch/arm/kvm/hyp/switch.c242
-rw-r--r--arch/arm/kvm/hyp/tlb.c68
-rw-r--r--arch/arm/kvm/hyp/vfp.S57
-rw-r--r--arch/arm/kvm/init.S157
-rw-r--r--arch/arm/kvm/interrupts.S36
-rw-r--r--arch/arm/kvm/irq.h16
-rw-r--r--arch/arm/kvm/reset.c86
-rw-r--r--arch/arm/kvm/trace.h86
-rw-r--r--arch/arm/kvm/vgic-v3-coproc.c27
-rw-r--r--arch/arm/mach-exynos/Kconfig2
-rw-r--r--arch/arm/mm/mmu.c26
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h3
-rw-r--r--arch/arm64/include/asm/kvm_host.h1
-rw-r--r--arch/arm64/kvm/fpsimd.c1
-rw-r--r--arch/arm64/kvm/guest.c1
-rw-r--r--arch/arm64/kvm/hyp/switch.c1
-rw-r--r--arch/arm64/kvm/sys_regs.c1
-rw-r--r--arch/arm64/kvm/sys_regs_generic_v8.c1
-rw-r--r--arch/mips/include/asm/kvm_host.h2
-rw-r--r--arch/mips/kvm/mips.c75
-rw-r--r--arch/powerpc/include/asm/kvm_asm.h3
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_uvmem.h6
-rw-r--r--arch/powerpc/include/asm/kvm_host.h1
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h21
-rw-r--r--arch/powerpc/kvm/book3s.c25
-rw-r--r--arch/powerpc/kvm/book3s.h1
-rw-r--r--arch/powerpc/kvm/book3s_32_mmu.c2
-rw-r--r--arch/powerpc/kvm/book3s_32_mmu_host.c2
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu.c2
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_host.c2
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c119
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_radix.c2
-rw-r--r--arch/powerpc/kvm/book3s_64_vio.c1
-rw-r--r--arch/powerpc/kvm/book3s_64_vio_hv.c1
-rw-r--r--arch/powerpc/kvm/book3s_hv.c90
-rw-r--r--arch/powerpc/kvm/book3s_hv_tm.c28
-rw-r--r--arch/powerpc/kvm/book3s_hv_tm_builtin.c16
-rw-r--r--arch/powerpc/kvm/book3s_hv_uvmem.c19
-rw-r--r--arch/powerpc/kvm/book3s_pr.c25
-rw-r--r--arch/powerpc/kvm/booke.c26
-rw-r--r--arch/powerpc/kvm/booke.h2
-rw-r--r--arch/powerpc/kvm/e500.c1
-rw-r--r--arch/powerpc/kvm/e500_mmu.c4
-rw-r--r--arch/powerpc/kvm/e500mc.c1
-rw-r--r--arch/powerpc/kvm/mpic.c1
-rw-r--r--arch/powerpc/kvm/powerpc.c35
-rw-r--r--arch/powerpc/kvm/timing.h1
-rw-r--r--arch/s390/boot/Makefile2
-rw-r--r--arch/s390/boot/uv.c20
-rw-r--r--arch/s390/include/asm/gmap.h6
-rw-r--r--arch/s390/include/asm/kvm_host.h117
-rw-r--r--arch/s390/include/asm/mmu.h2
-rw-r--r--arch/s390/include/asm/mmu_context.h1
-rw-r--r--arch/s390/include/asm/page.h5
-rw-r--r--arch/s390/include/asm/pgtable.h35
-rw-r--r--arch/s390/include/asm/uv.h251
-rw-r--r--arch/s390/kernel/Makefile1
-rw-r--r--arch/s390/kernel/entry.h2
-rw-r--r--arch/s390/kernel/pgm_check.S4
-rw-r--r--arch/s390/kernel/setup.c9
-rw-r--r--arch/s390/kernel/uv.c414
-rw-r--r--arch/s390/kvm/Makefile2
-rw-r--r--arch/s390/kvm/diag.c6
-rw-r--r--arch/s390/kvm/gaccess.c23
-rw-r--r--arch/s390/kvm/intercept.c123
-rw-r--r--arch/s390/kvm/interrupt.c401
-rw-r--r--arch/s390/kvm/kvm-s390.c597
-rw-r--r--arch/s390/kvm/kvm-s390.h51
-rw-r--r--arch/s390/kvm/priv.c13
-rw-r--r--arch/s390/kvm/pv.c303
-rw-r--r--arch/s390/mm/fault.c78
-rw-r--r--arch/s390/mm/gmap.c72
-rw-r--r--arch/x86/include/asm/kvm_host.h105
-rw-r--r--arch/x86/include/asm/kvm_page_track.h3
-rw-r--r--arch/x86/include/asm/vmx.h12
-rw-r--r--arch/x86/kvm/cpuid.c944
-rw-r--r--arch/x86/kvm/cpuid.h151
-rw-r--r--arch/x86/kvm/emulate.c57
-rw-r--r--arch/x86/kvm/hyperv.c8
-rw-r--r--arch/x86/kvm/i8254.c2
-rw-r--r--arch/x86/kvm/kvm_cache_regs.h10
-rw-r--r--arch/x86/kvm/kvm_emulate.h (renamed from arch/x86/include/asm/kvm_emulate.h)43
-rw-r--r--arch/x86/kvm/lapic.c85
-rw-r--r--arch/x86/kvm/lapic.h2
-rw-r--r--arch/x86/kvm/mmu.h10
-rw-r--r--arch/x86/kvm/mmu/mmu.c209
-rw-r--r--arch/x86/kvm/mmu/page_track.c16
-rw-r--r--arch/x86/kvm/mmu/paging_tmpl.h4
-rw-r--r--arch/x86/kvm/pmu.c34
-rw-r--r--arch/x86/kvm/pmu.h11
-rw-r--r--arch/x86/kvm/svm.c407
-rw-r--r--arch/x86/kvm/trace.h50
-rw-r--r--arch/x86/kvm/vmx/capabilities.h25
-rw-r--r--arch/x86/kvm/vmx/evmcs.h7
-rw-r--r--arch/x86/kvm/vmx/nested.c183
-rw-r--r--arch/x86/kvm/vmx/nested.h8
-rw-r--r--arch/x86/kvm/vmx/ops.h27
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c8
-rw-r--r--arch/x86/kvm/vmx/vmenter.S72
-rw-r--r--arch/x86/kvm/vmx/vmx.c665
-rw-r--r--arch/x86/kvm/vmx/vmx.h8
-rw-r--r--arch/x86/kvm/x86.c787
-rw-r--r--arch/x86/kvm/x86.h28
141 files changed, 4603 insertions, 8744 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 03bbfc312fe7..66a04f6f4775 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -2090,5 +2090,3 @@ source "drivers/firmware/Kconfig"
if CRYPTO
source "arch/arm/crypto/Kconfig"
endif
-
-source "arch/arm/kvm/Kconfig"
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 1fc32b611f8a..e1d13d779e08 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -278,7 +278,6 @@ core-$(CONFIG_FPE_NWFPE) += arch/arm/nwfpe/
core-$(CONFIG_FPE_FASTFPE) += $(patsubst $(srctree)/%,%,$(wildcard $(srctree)/arch/arm/fastfpe/))
core-$(CONFIG_VFP) += arch/arm/vfp/
core-$(CONFIG_XEN) += arch/arm/xen/
-core-$(CONFIG_KVM_ARM_HOST) += arch/arm/kvm/
core-$(CONFIG_VDSO) += arch/arm/vdso/
# If we have a machine-specific directory, then include it in the build.
diff --git a/arch/arm/configs/axm55xx_defconfig b/arch/arm/configs/axm55xx_defconfig
index 6ea7dafa4c9e..46075216ee6d 100644
--- a/arch/arm/configs/axm55xx_defconfig
+++ b/arch/arm/configs/axm55xx_defconfig
@@ -236,5 +236,3 @@ CONFIG_CRYPTO_GCM=y
CONFIG_CRYPTO_XCBC=y
CONFIG_CRYPTO_SHA256=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_VIRTUALIZATION=y
-CONFIG_KVM=y
diff --git a/arch/arm/include/asm/arch_gicv3.h b/arch/arm/include/asm/arch_gicv3.h
index c815477b4303..413abfb42989 100644
--- a/arch/arm/include/asm/arch_gicv3.h
+++ b/arch/arm/include/asm/arch_gicv3.h
@@ -38,71 +38,6 @@
#define ICC_AP1R2 __ICC_AP1Rx(2)
#define ICC_AP1R3 __ICC_AP1Rx(3)
-#define ICC_HSRE __ACCESS_CP15(c12, 4, c9, 5)
-
-#define ICH_VSEIR __ACCESS_CP15(c12, 4, c9, 4)
-#define ICH_HCR __ACCESS_CP15(c12, 4, c11, 0)
-#define ICH_VTR __ACCESS_CP15(c12, 4, c11, 1)
-#define ICH_MISR __ACCESS_CP15(c12, 4, c11, 2)
-#define ICH_EISR __ACCESS_CP15(c12, 4, c11, 3)
-#define ICH_ELRSR __ACCESS_CP15(c12, 4, c11, 5)
-#define ICH_VMCR __ACCESS_CP15(c12, 4, c11, 7)
-
-#define __LR0(x) __ACCESS_CP15(c12, 4, c12, x)
-#define __LR8(x) __ACCESS_CP15(c12, 4, c13, x)
-
-#define ICH_LR0 __LR0(0)
-#define ICH_LR1 __LR0(1)
-#define ICH_LR2 __LR0(2)
-#define ICH_LR3 __LR0(3)
-#define ICH_LR4 __LR0(4)
-#define ICH_LR5 __LR0(5)
-#define ICH_LR6 __LR0(6)
-#define ICH_LR7 __LR0(7)
-#define ICH_LR8 __LR8(0)
-#define ICH_LR9 __LR8(1)
-#define ICH_LR10 __LR8(2)
-#define ICH_LR11 __LR8(3)
-#define ICH_LR12 __LR8(4)
-#define ICH_LR13 __LR8(5)
-#define ICH_LR14 __LR8(6)
-#define ICH_LR15 __LR8(7)
-
-/* LR top half */
-#define __LRC0(x) __ACCESS_CP15(c12, 4, c14, x)
-#define __LRC8(x) __ACCESS_CP15(c12, 4, c15, x)
-
-#define ICH_LRC0 __LRC0(0)
-#define ICH_LRC1 __LRC0(1)
-#define ICH_LRC2 __LRC0(2)
-#define ICH_LRC3 __LRC0(3)
-#define ICH_LRC4 __LRC0(4)
-#define ICH_LRC5 __LRC0(5)
-#define ICH_LRC6 __LRC0(6)
-#define ICH_LRC7 __LRC0(7)
-#define ICH_LRC8 __LRC8(0)
-#define ICH_LRC9 __LRC8(1)
-#define ICH_LRC10 __LRC8(2)
-#define ICH_LRC11 __LRC8(3)
-#define ICH_LRC12 __LRC8(4)
-#define ICH_LRC13 __LRC8(5)
-#define ICH_LRC14 __LRC8(6)
-#define ICH_LRC15 __LRC8(7)
-
-#define __ICH_AP0Rx(x) __ACCESS_CP15(c12, 4, c8, x)
-#define ICH_AP0R0 __ICH_AP0Rx(0)
-#define ICH_AP0R1 __ICH_AP0Rx(1)
-#define ICH_AP0R2 __ICH_AP0Rx(2)
-#define ICH_AP0R3 __ICH_AP0Rx(3)
-
-#define __ICH_AP1Rx(x) __ACCESS_CP15(c12, 4, c9, x)
-#define ICH_AP1R0 __ICH_AP1Rx(0)
-#define ICH_AP1R1 __ICH_AP1Rx(1)
-#define ICH_AP1R2 __ICH_AP1Rx(2)
-#define ICH_AP1R3 __ICH_AP1Rx(3)
-
-/* A32-to-A64 mappings used by VGIC save/restore */
-
#define CPUIF_MAP(a32, a64) \
static inline void write_ ## a64(u32 val) \
{ \
@@ -113,21 +48,6 @@ static inline u32 read_ ## a64(void) \
return read_sysreg(a32); \
} \
-#define CPUIF_MAP_LO_HI(a32lo, a32hi, a64) \
-static inline void write_ ## a64(u64 val) \
-{ \
- write_sysreg(lower_32_bits(val), a32lo);\
- write_sysreg(upper_32_bits(val), a32hi);\
-} \
-static inline u64 read_ ## a64(void) \
-{ \
- u64 val = read_sysreg(a32lo); \
- \
- val |= (u64)read_sysreg(a32hi) << 32; \
- \
- return val; \
-}
-
CPUIF_MAP(ICC_PMR, ICC_PMR_EL1)
CPUIF_MAP(ICC_AP0R0, ICC_AP0R0_EL1)
CPUIF_MAP(ICC_AP0R1, ICC_AP0R1_EL1)
@@ -138,40 +58,6 @@ CPUIF_MAP(ICC_AP1R1, ICC_AP1R1_EL1)
CPUIF_MAP(ICC_AP1R2, ICC_AP1R2_EL1)
CPUIF_MAP(ICC_AP1R3, ICC_AP1R3_EL1)
-CPUIF_MAP(ICH_HCR, ICH_HCR_EL2)
-CPUIF_MAP(ICH_VTR, ICH_VTR_EL2)
-CPUIF_MAP(ICH_MISR, ICH_MISR_EL2)
-CPUIF_MAP(ICH_EISR, ICH_EISR_EL2)
-CPUIF_MAP(ICH_ELRSR, ICH_ELRSR_EL2)
-CPUIF_MAP(ICH_VMCR, ICH_VMCR_EL2)
-CPUIF_MAP(ICH_AP0R3, ICH_AP0R3_EL2)
-CPUIF_MAP(ICH_AP0R2, ICH_AP0R2_EL2)
-CPUIF_MAP(ICH_AP0R1, ICH_AP0R1_EL2)
-CPUIF_MAP(ICH_AP0R0, ICH_AP0R0_EL2)
-CPUIF_MAP(ICH_AP1R3, ICH_AP1R3_EL2)
-CPUIF_MAP(ICH_AP1R2, ICH_AP1R2_EL2)
-CPUIF_MAP(ICH_AP1R1, ICH_AP1R1_EL2)
-CPUIF_MAP(ICH_AP1R0, ICH_AP1R0_EL2)
-CPUIF_MAP(ICC_HSRE, ICC_SRE_EL2)
-CPUIF_MAP(ICC_SRE, ICC_SRE_EL1)
-
-CPUIF_MAP_LO_HI(ICH_LR15, ICH_LRC15, ICH_LR15_EL2)
-CPUIF_MAP_LO_HI(ICH_LR14, ICH_LRC14, ICH_LR14_EL2)
-CPUIF_MAP_LO_HI(ICH_LR13, ICH_LRC13, ICH_LR13_EL2)
-CPUIF_MAP_LO_HI(ICH_LR12, ICH_LRC12, ICH_LR12_EL2)
-CPUIF_MAP_LO_HI(ICH_LR11, ICH_LRC11, ICH_LR11_EL2)
-CPUIF_MAP_LO_HI(ICH_LR10, ICH_LRC10, ICH_LR10_EL2)
-CPUIF_MAP_LO_HI(ICH_LR9, ICH_LRC9, ICH_LR9_EL2)
-CPUIF_MAP_LO_HI(ICH_LR8, ICH_LRC8, ICH_LR8_EL2)
-CPUIF_MAP_LO_HI(ICH_LR7, ICH_LRC7, ICH_LR7_EL2)
-CPUIF_MAP_LO_HI(ICH_LR6, ICH_LRC6, ICH_LR6_EL2)
-CPUIF_MAP_LO_HI(ICH_LR5, ICH_LRC5, ICH_LR5_EL2)
-CPUIF_MAP_LO_HI(ICH_LR4, ICH_LRC4, ICH_LR4_EL2)
-CPUIF_MAP_LO_HI(ICH_LR3, ICH_LRC3, ICH_LR3_EL2)
-CPUIF_MAP_LO_HI(ICH_LR2, ICH_LRC2, ICH_LR2_EL2)
-CPUIF_MAP_LO_HI(ICH_LR1, ICH_LRC1, ICH_LR1_EL2)
-CPUIF_MAP_LO_HI(ICH_LR0, ICH_LRC0, ICH_LR0_EL2)
-
#define read_gicreg(r) read_##r()
#define write_gicreg(v, r) write_##r(v)
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
deleted file mode 100644
index 9c04bd810d07..000000000000
--- a/arch/arm/include/asm/kvm_arm.h
+++ /dev/null
@@ -1,239 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2012 - Virtual Open Systems and Columbia University
- * Author: Christoffer Dall <c.dall@virtualopensystems.com>
- */
-
-#ifndef __ARM_KVM_ARM_H__
-#define __ARM_KVM_ARM_H__
-
-#include <linux/const.h>
-#include <linux/types.h>
-
-/* Hyp Configuration Register (HCR) bits */
-#define HCR_TGE (1 << 27)
-#define HCR_TVM (1 << 26)
-#define HCR_TTLB (1 << 25)
-#define HCR_TPU (1 << 24)
-#define HCR_TPC (1 << 23)
-#define HCR_TSW (1 << 22)
-#define HCR_TAC (1 << 21)
-#define HCR_TIDCP (1 << 20)
-#define HCR_TSC (1 << 19)
-#define HCR_TID3 (1 << 18)
-#define HCR_TID2 (1 << 17)
-#define HCR_TID1 (1 << 16)
-#define HCR_TID0 (1 << 15)
-#define HCR_TWE (1 << 14)
-#define HCR_TWI (1 << 13)
-#define HCR_DC (1 << 12)
-#define HCR_BSU (3 << 10)
-#define HCR_BSU_IS (1 << 10)
-#define HCR_FB (1 << 9)
-#define HCR_VA (1 << 8)
-#define HCR_VI (1 << 7)
-#define HCR_VF (1 << 6)
-#define HCR_AMO (1 << 5)
-#define HCR_IMO (1 << 4)
-#define HCR_FMO (1 << 3)
-#define HCR_PTW (1 << 2)
-#define HCR_SWIO (1 << 1)
-#define HCR_VM 1
-
-/*
- * The bits we set in HCR:
- * TAC: Trap ACTLR
- * TSC: Trap SMC
- * TVM: Trap VM ops (until MMU and caches are on)
- * TSW: Trap cache operations by set/way
- * TWI: Trap WFI
- * TWE: Trap WFE
- * TIDCP: Trap L2CTLR/L2ECTLR
- * BSU_IS: Upgrade barriers to the inner shareable domain
- * FB: Force broadcast of all maintainance operations
- * AMO: Override CPSR.A and enable signaling with VA
- * IMO: Override CPSR.I and enable signaling with VI
- * FMO: Override CPSR.F and enable signaling with VF
- * SWIO: Turn set/way invalidates into set/way clean+invalidate
- */
-#define HCR_GUEST_MASK (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \
- HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \
- HCR_TVM | HCR_TWE | HCR_SWIO | HCR_TIDCP)
-
-/* System Control Register (SCTLR) bits */
-#define SCTLR_TE (1 << 30)
-#define SCTLR_EE (1 << 25)
-#define SCTLR_V (1 << 13)
-
-/* Hyp System Control Register (HSCTLR) bits */
-#define HSCTLR_TE (1 << 30)
-#define HSCTLR_EE (1 << 25)
-#define HSCTLR_FI (1 << 21)
-#define HSCTLR_WXN (1 << 19)
-#define HSCTLR_I (1 << 12)
-#define HSCTLR_C (1 << 2)
-#define HSCTLR_A (1 << 1)
-#define HSCTLR_M 1
-#define HSCTLR_MASK (HSCTLR_M | HSCTLR_A | HSCTLR_C | HSCTLR_I | \
- HSCTLR_WXN | HSCTLR_FI | HSCTLR_EE | HSCTLR_TE)
-
-/* TTBCR and HTCR Registers bits */
-#define TTBCR_EAE (1 << 31)
-#define TTBCR_IMP (1 << 30)
-#define TTBCR_SH1 (3 << 28)
-#define TTBCR_ORGN1 (3 << 26)
-#define TTBCR_IRGN1 (3 << 24)
-#define TTBCR_EPD1 (1 << 23)
-#define TTBCR_A1 (1 << 22)
-#define TTBCR_T1SZ (7 << 16)
-#define TTBCR_SH0 (3 << 12)
-#define TTBCR_ORGN0 (3 << 10)
-#define TTBCR_IRGN0 (3 << 8)
-#define TTBCR_EPD0 (1 << 7)
-#define TTBCR_T0SZ (7 << 0)
-#define HTCR_MASK (TTBCR_T0SZ | TTBCR_IRGN0 | TTBCR_ORGN0 | TTBCR_SH0)
-
-/* Hyp System Trap Register */
-#define HSTR_T(x) (1 << x)
-#define HSTR_TTEE (1 << 16)
-#define HSTR_TJDBX (1 << 17)
-
-/* Hyp Coprocessor Trap Register */
-#define HCPTR_TCP(x) (1 << x)
-#define HCPTR_TCP_MASK (0x3fff)
-#define HCPTR_TASE (1 << 15)
-#define HCPTR_TTA (1 << 20)
-#define HCPTR_TCPAC (1 << 31)
-
-/* Hyp Debug Configuration Register bits */
-#define HDCR_TDRA (1 << 11)
-#define HDCR_TDOSA (1 << 10)
-#define HDCR_TDA (1 << 9)
-#define HDCR_TDE (1 << 8)
-#define HDCR_HPME (1 << 7)
-#define HDCR_TPM (1 << 6)
-#define HDCR_TPMCR (1 << 5)
-#define HDCR_HPMN_MASK (0x1F)
-
-/*
- * The architecture supports 40-bit IPA as input to the 2nd stage translations
- * and PTRS_PER_S2_PGD becomes 1024, because each entry covers 1GB of address
- * space.
- */
-#define KVM_PHYS_SHIFT (40)
-
-#define PTRS_PER_S2_PGD (_AC(1, ULL) << (KVM_PHYS_SHIFT - 30))
-
-/* Virtualization Translation Control Register (VTCR) bits */
-#define VTCR_SH0 (3 << 12)
-#define VTCR_ORGN0 (3 << 10)
-#define VTCR_IRGN0 (3 << 8)
-#define VTCR_SL0 (3 << 6)
-#define VTCR_S (1 << 4)
-#define VTCR_T0SZ (0xf)
-#define VTCR_MASK (VTCR_SH0 | VTCR_ORGN0 | VTCR_IRGN0 | VTCR_SL0 | \
- VTCR_S | VTCR_T0SZ)
-#define VTCR_HTCR_SH (VTCR_SH0 | VTCR_ORGN0 | VTCR_IRGN0)
-#define VTCR_SL_L2 (0 << 6) /* Starting-level: 2 */
-#define VTCR_SL_L1 (1 << 6) /* Starting-level: 1 */
-#define KVM_VTCR_SL0 VTCR_SL_L1
-/* stage-2 input address range defined as 2^(32-T0SZ) */
-#define KVM_T0SZ (32 - KVM_PHYS_SHIFT)
-#define KVM_VTCR_T0SZ (KVM_T0SZ & VTCR_T0SZ)
-#define KVM_VTCR_S ((KVM_VTCR_T0SZ << 1) & VTCR_S)
-
-/* Virtualization Translation Table Base Register (VTTBR) bits */
-#if KVM_VTCR_SL0 == VTCR_SL_L2 /* see ARM DDI 0406C: B4-1720 */
-#define VTTBR_X (14 - KVM_T0SZ)
-#else
-#define VTTBR_X (5 - KVM_T0SZ)
-#endif
-#define VTTBR_CNP_BIT _AC(1, UL)
-#define VTTBR_BADDR_MASK (((_AC(1, ULL) << (40 - VTTBR_X)) - 1) << VTTBR_X)
-#define VTTBR_VMID_SHIFT _AC(48, ULL)
-#define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT)
-
-/* Hyp Syndrome Register (HSR) bits */
-#define HSR_EC_SHIFT (26)
-#define HSR_EC (_AC(0x3f, UL) << HSR_EC_SHIFT)
-#define HSR_IL (_AC(1, UL) << 25)
-#define HSR_ISS (HSR_IL - 1)
-#define HSR_ISV_SHIFT (24)
-#define HSR_ISV (_AC(1, UL) << HSR_ISV_SHIFT)
-#define HSR_SRT_SHIFT (16)
-#define HSR_SRT_MASK (0xf << HSR_SRT_SHIFT)
-#define HSR_CM (1 << 8)
-#define HSR_FSC (0x3f)
-#define HSR_FSC_TYPE (0x3c)
-#define HSR_SSE (1 << 21)
-#define HSR_WNR (1 << 6)
-#define HSR_CV_SHIFT (24)
-#define HSR_CV (_AC(1, UL) << HSR_CV_SHIFT)
-#define HSR_COND_SHIFT (20)
-#define HSR_COND (_AC(0xf, UL) << HSR_COND_SHIFT)
-
-#define FSC_FAULT (0x04)
-#define FSC_ACCESS (0x08)
-#define FSC_PERM (0x0c)
-#define FSC_SEA (0x10)
-#define FSC_SEA_TTW0 (0x14)
-#define FSC_SEA_TTW1 (0x15)
-#define FSC_SEA_TTW2 (0x16)
-#define FSC_SEA_TTW3 (0x17)
-#define FSC_SECC (0x18)
-#define FSC_SECC_TTW0 (0x1c)
-#define FSC_SECC_TTW1 (0x1d)
-#define FSC_SECC_TTW2 (0x1e)
-#define FSC_SECC_TTW3 (0x1f)
-
-/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
-#define HPFAR_MASK (~0xf)
-
-#define HSR_EC_UNKNOWN (0x00)
-#define HSR_EC_WFI (0x01)
-#define HSR_EC_CP15_32 (0x03)
-#define HSR_EC_CP15_64 (0x04)
-#define HSR_EC_CP14_MR (0x05)
-#define HSR_EC_CP14_LS (0x06)
-#define HSR_EC_CP_0_13 (0x07)
-#define HSR_EC_CP10_ID (0x08)
-#define HSR_EC_JAZELLE (0x09)
-#define HSR_EC_BXJ (0x0A)
-#define HSR_EC_CP14_64 (0x0C)
-#define HSR_EC_SVC_HYP (0x11)
-#define HSR_EC_HVC (0x12)
-#define HSR_EC_SMC (0x13)
-#define HSR_EC_IABT (0x20)
-#define HSR_EC_IABT_HYP (0x21)
-#define HSR_EC_DABT (0x24)
-#define HSR_EC_DABT_HYP (0x25)
-#define HSR_EC_MAX (0x3f)
-
-#define HSR_WFI_IS_WFE (_AC(1, UL) << 0)
-
-#define HSR_HVC_IMM_MASK ((_AC(1, UL) << 16) - 1)
-
-#define HSR_DABT_S1PTW (_AC(1, UL) << 7)
-#define HSR_DABT_CM (_AC(1, UL) << 8)
-
-#define kvm_arm_exception_type \
- {0, "RESET" }, \
- {1, "UNDEFINED" }, \
- {2, "SOFTWARE" }, \
- {3, "PREF_ABORT" }, \
- {4, "DATA_ABORT" }, \
- {5, "IRQ" }, \
- {6, "FIQ" }, \
- {7, "HVC" }
-
-#define HSRECN(x) { HSR_EC_##x, #x }
-
-#define kvm_arm_exception_class \
- HSRECN(UNKNOWN), HSRECN(WFI), HSRECN(CP15_32), HSRECN(CP15_64), \
- HSRECN(CP14_MR), HSRECN(CP14_LS), HSRECN(CP_0_13), HSRECN(CP10_ID), \
- HSRECN(JAZELLE), HSRECN(BXJ), HSRECN(CP14_64), HSRECN(SVC_HYP), \
- HSRECN(HVC), HSRECN(SMC), HSRECN(IABT), HSRECN(IABT_HYP), \
- HSRECN(DABT), HSRECN(DABT_HYP)
-
-
-#endif /* __ARM_KVM_ARM_H__ */
diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
deleted file mode 100644
index f615830f9f57..000000000000
--- a/arch/arm/include/asm/kvm_asm.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2012 - Virtual Open Systems and Columbia University
- * Author: Christoffer Dall <c.dall@virtualopensystems.com>
- */
-
-#ifndef __ARM_KVM_ASM_H__
-#define __ARM_KVM_ASM_H__
-
-#include <asm/virt.h>
-
-#define ARM_EXIT_WITH_ABORT_BIT 31
-#define ARM_EXCEPTION_CODE(x) ((x) & ~(1U << ARM_EXIT_WITH_ABORT_BIT))
-#define ARM_EXCEPTION_IS_TRAP(x) \
- (ARM_EXCEPTION_CODE((x)) == ARM_EXCEPTION_PREF_ABORT || \
- ARM_EXCEPTION_CODE((x)) == ARM_EXCEPTION_DATA_ABORT || \
- ARM_EXCEPTION_CODE((x)) == ARM_EXCEPTION_HVC)
-#define ARM_ABORT_PENDING(x) !!((x) & (1U << ARM_EXIT_WITH_ABORT_BIT))
-
-#define ARM_EXCEPTION_RESET 0
-#define ARM_EXCEPTION_UNDEFINED 1
-#define ARM_EXCEPTION_SOFTWARE 2
-#define ARM_EXCEPTION_PREF_ABORT 3
-#define ARM_EXCEPTION_DATA_ABORT 4
-#define ARM_EXCEPTION_IRQ 5
-#define ARM_EXCEPTION_FIQ 6
-#define ARM_EXCEPTION_HVC 7
-#define ARM_EXCEPTION_HYP_GONE HVC_STUB_ERR
-/*
- * The rr_lo_hi macro swaps a pair of registers depending on
- * current endianness. It is used in conjunction with ldrd and strd
- * instructions that load/store a 64-bit value from/to memory to/from
- * a pair of registers which are used with the mrrc and mcrr instructions.
- * If used with the ldrd/strd instructions, the a1 parameter is the first
- * source/destination register and the a2 parameter is the second
- * source/destination register. Note that the ldrd/strd instructions
- * already swap the bytes within the words correctly according to the
- * endianness setting, but the order of the registers need to be effectively
- * swapped when used with the mrrc/mcrr instructions.
- */
-#ifdef CONFIG_CPU_ENDIAN_BE8
-#define rr_lo_hi(a1, a2) a2, a1
-#else
-#define rr_lo_hi(a1, a2) a1, a2
-#endif
-
-#define kvm_ksym_ref(kva) (kva)
-
-#ifndef __ASSEMBLY__
-struct kvm;
-struct kvm_vcpu;
-
-extern char __kvm_hyp_init[];
-extern char __kvm_hyp_init_end[];
-
-extern void __kvm_flush_vm_context(void);
-extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
-extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
-extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu);
-
-extern void __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high);
-
-/* no VHE on 32-bit :( */
-static inline int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) { BUG(); return 0; }
-
-extern int __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu);
-
-extern void __init_stage2_translation(void);
-
-extern u64 __vgic_v3_get_ich_vtr_el2(void);
-extern u64 __vgic_v3_read_vmcr(void);
-extern void __vgic_v3_write_vmcr(u32 vmcr);
-extern void __vgic_v3_init_lrs(void);
-
-#endif
-
-#endif /* __ARM_KVM_ASM_H__ */
diff --git a/arch/arm/include/asm/kvm_coproc.h b/arch/arm/include/asm/kvm_coproc.h
deleted file mode 100644
index a23826117dd6..000000000000
--- a/arch/arm/include/asm/kvm_coproc.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2012 Rusty Russell IBM Corporation
- */
-
-#ifndef __ARM_KVM_COPROC_H__
-#define __ARM_KVM_COPROC_H__
-#include <linux/kvm_host.h>
-
-void kvm_reset_coprocs(struct kvm_vcpu *vcpu);
-
-struct kvm_coproc_target_table {
- unsigned target;
- const struct coproc_reg *table;
- size_t num;
-};
-void kvm_register_target_coproc_table(struct kvm_coproc_target_table *table);
-
-int kvm_handle_cp10_id(struct kvm_vcpu *vcpu, struct kvm_run *run);
-int kvm_handle_cp_0_13_access(struct kvm_vcpu *vcpu, struct kvm_run *run);
-int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run);
-int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run);
-int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run);
-int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run);
-int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run);
-
-unsigned long kvm_arm_num_guest_msrs(struct kvm_vcpu *vcpu);
-int kvm_arm_copy_msrindices(struct kvm_vcpu *vcpu, u64 __user *uindices);
-void kvm_coproc_table_init(void);
-
-struct kvm_one_reg;
-int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
-int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
-int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
-unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu);
-#endif /* __ARM_KVM_COPROC_H__ */
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
deleted file mode 100644
index 3944305e81df..000000000000
--- a/arch/arm/include/asm/kvm_emulate.h
+++ /dev/null
@@ -1,372 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2012 - Virtual Open Systems and Columbia University
- * Author: Christoffer Dall <c.dall@virtualopensystems.com>
- */
-
-#ifndef __ARM_KVM_EMULATE_H__
-#define __ARM_KVM_EMULATE_H__
-
-#include <linux/kvm_host.h>
-#include <asm/kvm_asm.h>
-#include <asm/kvm_arm.h>
-#include <asm/cputype.h>
-
-/* arm64 compatibility macros */
-#define PSR_AA32_MODE_FIQ FIQ_MODE
-#define PSR_AA32_MODE_SVC SVC_MODE
-#define PSR_AA32_MODE_ABT ABT_MODE
-#define PSR_AA32_MODE_UND UND_MODE
-#define PSR_AA32_T_BIT PSR_T_BIT
-#define PSR_AA32_F_BIT PSR_F_BIT
-#define PSR_AA32_I_BIT PSR_I_BIT
-#define PSR_AA32_A_BIT PSR_A_BIT
-#define PSR_AA32_E_BIT PSR_E_BIT
-#define PSR_AA32_IT_MASK PSR_IT_MASK
-#define PSR_AA32_GE_MASK 0x000f0000
-#define PSR_AA32_DIT_BIT 0x00200000
-#define PSR_AA32_PAN_BIT 0x00400000
-#define PSR_AA32_SSBS_BIT 0x00800000
-#define PSR_AA32_Q_BIT PSR_Q_BIT
-#define PSR_AA32_V_BIT PSR_V_BIT
-#define PSR_AA32_C_BIT PSR_C_BIT
-#define PSR_AA32_Z_BIT PSR_Z_BIT
-#define PSR_AA32_N_BIT PSR_N_BIT
-
-unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num);
-
-static inline unsigned long *vcpu_reg32(struct kvm_vcpu *vcpu, u8 reg_num)
-{
- return vcpu_reg(vcpu, reg_num);
-}
-
-unsigned long *__vcpu_spsr(struct kvm_vcpu *vcpu);
-
-static inline unsigned long vpcu_read_spsr(struct kvm_vcpu *vcpu)
-{
- return *__vcpu_spsr(vcpu);
-}
-
-static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v)
-{
- *__vcpu_spsr(vcpu) = v;
-}
-
-static inline unsigned long host_spsr_to_spsr32(unsigned long spsr)
-{
- return spsr;
-}
-
-static inline unsigned long vcpu_get_reg(struct kvm_vcpu *vcpu,
- u8 reg_num)
-{
- return *vcpu_reg(vcpu, reg_num);
-}
-
-static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num,
- unsigned long val)
-{
- *vcpu_reg(vcpu, reg_num) = val;
-}
-
-bool kvm_condition_valid32(const struct kvm_vcpu *vcpu);
-void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr);
-void kvm_inject_undef32(struct kvm_vcpu *vcpu);
-void kvm_inject_dabt32(struct kvm_vcpu *vcpu, unsigned long addr);
-void kvm_inject_pabt32(struct kvm_vcpu *vcpu, unsigned long addr);
-void kvm_inject_vabt(struct kvm_vcpu *vcpu);
-
-static inline void kvm_inject_undefined(struct kvm_vcpu *vcpu)
-{
- kvm_inject_undef32(vcpu);
-}
-
-static inline void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr)
-{
- kvm_inject_dabt32(vcpu, addr);
-}
-
-static inline void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr)
-{
- kvm_inject_pabt32(vcpu, addr);
-}
-
-static inline bool kvm_condition_valid(const struct kvm_vcpu *vcpu)
-{
- return kvm_condition_valid32(vcpu);
-}
-
-static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr)
-{
- kvm_skip_instr32(vcpu, is_wide_instr);
-}
-
-static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
-{
- vcpu->arch.hcr = HCR_GUEST_MASK;
-}
-
-static inline unsigned long *vcpu_hcr(const struct kvm_vcpu *vcpu)
-{
- return (unsigned long *)&vcpu->arch.hcr;
-}
-
-static inline void vcpu_clear_wfx_traps(struct kvm_vcpu *vcpu)
-{
- vcpu->arch.hcr &= ~HCR_TWE;
-}
-
-static inline void vcpu_set_wfx_traps(struct kvm_vcpu *vcpu)
-{
- vcpu->arch.hcr |= HCR_TWE;
-}
-
-static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu)
-{
- return true;
-}
-
-static inline unsigned long *vcpu_pc(struct kvm_vcpu *vcpu)
-{
- return &vcpu->arch.ctxt.gp_regs.usr_regs.ARM_pc;
-}
-
-static inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu)
-{
- return (unsigned long *)&vcpu->arch.ctxt.gp_regs.usr_regs.ARM_cpsr;
-}
-
-static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu)
-{
- *vcpu_cpsr(vcpu) |= PSR_T_BIT;
-}
-
-static inline bool mode_has_spsr(struct kvm_vcpu *vcpu)
-{
- unsigned long cpsr_mode = vcpu->arch.ctxt.gp_regs.usr_regs.ARM_cpsr & MODE_MASK;
- return (cpsr_mode > USR_MODE && cpsr_mode < SYSTEM_MODE);
-}
-
-static inline bool vcpu_mode_priv(struct kvm_vcpu *vcpu)
-{
- unsigned long cpsr_mode = vcpu->arch.ctxt.gp_regs.usr_regs.ARM_cpsr & MODE_MASK;
- return cpsr_mode > USR_MODE;
-}
-
-static inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu)
-{
- return vcpu->arch.fault.hsr;
-}
-
-static inline int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu)
-{
- u32 hsr = kvm_vcpu_get_hsr(vcpu);
-
- if (hsr & HSR_CV)
- return (hsr & HSR_COND) >> HSR_COND_SHIFT;
-
- return -1;
-}
-
-static inline unsigned long kvm_vcpu_get_hfar(struct kvm_vcpu *vcpu)
-{
- return vcpu->arch.fault.hxfar;
-}
-
-static inline phys_addr_t kvm_vcpu_get_fault_ipa(struct kvm_vcpu *vcpu)
-{
- return ((phys_addr_t)vcpu->arch.fault.hpfar & HPFAR_MASK) << 8;
-}
-
-static inline bool kvm_vcpu_dabt_isvalid(struct kvm_vcpu *vcpu)
-{
- return kvm_vcpu_get_hsr(vcpu) & HSR_ISV;
-}
-
-static inline unsigned long kvm_vcpu_dabt_iss_nisv_sanitized(const struct kvm_vcpu *vcpu)
-{
- return kvm_vcpu_get_hsr(vcpu) & (HSR_CM | HSR_WNR | HSR_FSC);
-}
-
-static inline bool kvm_vcpu_dabt_iswrite(struct kvm_vcpu *vcpu)
-{
- return kvm_vcpu_get_hsr(vcpu) & HSR_WNR;
-}
-
-static inline bool kvm_vcpu_dabt_issext(struct kvm_vcpu *vcpu)
-{
- return kvm_vcpu_get_hsr(vcpu) & HSR_SSE;
-}
-
-static inline bool kvm_vcpu_dabt_issf(const struct kvm_vcpu *vcpu)
-{
- return false;
-}
-
-static inline int kvm_vcpu_dabt_get_rd(struct kvm_vcpu *vcpu)
-{
- return (kvm_vcpu_get_hsr(vcpu) & HSR_SRT_MASK) >> HSR_SRT_SHIFT;
-}
-
-static inline bool kvm_vcpu_dabt_iss1tw(struct kvm_vcpu *vcpu)
-{
- return kvm_vcpu_get_hsr(vcpu) & HSR_DABT_S1PTW;
-}
-
-static inline bool kvm_vcpu_dabt_is_cm(struct kvm_vcpu *vcpu)
-{
- return !!(kvm_vcpu_get_hsr(vcpu) & HSR_DABT_CM);
-}
-
-/* Get Access Size from a data abort */
-static inline unsigned int kvm_vcpu_dabt_get_as(struct kvm_vcpu *vcpu)
-{
- switch ((kvm_vcpu_get_hsr(vcpu) >> 22) & 0x3) {
- case 0:
- return 1;
- case 1:
- return 2;
- case 2:
- return 4;
- default:
- kvm_err("Hardware is weird: SAS 0b11 is reserved\n");
- return 4;
- }
-}
-
-/* This one is not specific to Data Abort */
-static inline bool kvm_vcpu_trap_il_is32bit(struct kvm_vcpu *vcpu)
-{
- return kvm_vcpu_get_hsr(vcpu) & HSR_IL;
-}
-
-static inline u8 kvm_vcpu_trap_get_class(struct kvm_vcpu *vcpu)
-{
- return kvm_vcpu_get_hsr(vcpu) >> HSR_EC_SHIFT;
-}
-
-static inline bool kvm_vcpu_trap_is_iabt(struct kvm_vcpu *vcpu)
-{
- return kvm_vcpu_trap_get_class(vcpu) == HSR_EC_IABT;
-}
-
-static inline u8 kvm_vcpu_trap_get_fault(struct kvm_vcpu *vcpu)
-{
- return kvm_vcpu_get_hsr(vcpu) & HSR_FSC;
-}
-
-static inline u8 kvm_vcpu_trap_get_fault_type(struct kvm_vcpu *vcpu)
-{
- return kvm_vcpu_get_hsr(vcpu) & HSR_FSC_TYPE;
-}
-
-static inline bool kvm_vcpu_dabt_isextabt(struct kvm_vcpu *vcpu)
-{
- switch (kvm_vcpu_trap_get_fault(vcpu)) {
- case FSC_SEA:
- case FSC_SEA_TTW0:
- case FSC_SEA_TTW1:
- case FSC_SEA_TTW2:
- case FSC_SEA_TTW3:
- case FSC_SECC:
- case FSC_SECC_TTW0:
- case FSC_SECC_TTW1:
- case FSC_SECC_TTW2:
- case FSC_SECC_TTW3:
- return true;
- default:
- return false;
- }
-}
-
-static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
-{
- if (kvm_vcpu_trap_is_iabt(vcpu))
- return false;
-
- return kvm_vcpu_dabt_iswrite(vcpu);
-}
-
-static inline u32 kvm_vcpu_hvc_get_imm(struct kvm_vcpu *vcpu)
-{
- return kvm_vcpu_get_hsr(vcpu) & HSR_HVC_IMM_MASK;
-}
-
-static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
-{
- return vcpu_cp15(vcpu, c0_MPIDR) & MPIDR_HWID_BITMASK;
-}
-
-static inline bool kvm_arm_get_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu)
-{
- return false;
-}
-
-static inline void kvm_arm_set_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu,
- bool flag)
-{
-}
-
-static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
-{
- *vcpu_cpsr(vcpu) |= PSR_E_BIT;
-}
-
-static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu)
-{
- return !!(*vcpu_cpsr(vcpu) & PSR_E_BIT);
-}
-
-static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu,
- unsigned long data,
- unsigned int len)
-{
- if (kvm_vcpu_is_be(vcpu)) {
- switch (len) {
- case 1:
- return data & 0xff;
- case 2:
- return be16_to_cpu(data & 0xffff);
- default:
- return be32_to_cpu(data);
- }
- } else {
- switch (len) {
- case 1:
- return data & 0xff;
- case 2:
- return le16_to_cpu(data & 0xffff);
- default:
- return le32_to_cpu(data);
- }
- }
-}
-
-static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
- unsigned long data,
- unsigned int len)
-{
- if (kvm_vcpu_is_be(vcpu)) {
- switch (len) {
- case 1:
- return data & 0xff;
- case 2:
- return cpu_to_be16(data & 0xffff);
- default:
- return cpu_to_be32(data);
- }
- } else {
- switch (len) {
- case 1:
- return data & 0xff;
- case 2:
- return cpu_to_le16(data & 0xffff);
- default:
- return cpu_to_le32(data);
- }
- }
-}
-
-static inline void vcpu_ptrauth_setup_lazy(struct kvm_vcpu *vcpu) {}
-
-#endif /* __ARM_KVM_EMULATE_H__ */
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
deleted file mode 100644
index a827b4d60d38..000000000000
--- a/arch/arm/include/asm/kvm_host.h
+++ /dev/null
@@ -1,456 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2012 - Virtual Open Systems and Columbia University
- * Author: Christoffer Dall <c.dall@virtualopensystems.com>
- */
-
-#ifndef __ARM_KVM_HOST_H__
-#define __ARM_KVM_HOST_H__
-
-#include <linux/arm-smccc.h>
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/kvm_types.h>
-#include <asm/cputype.h>
-#include <asm/kvm.h>
-#include <asm/kvm_asm.h>
-#include <asm/fpstate.h>
-#include <kvm/arm_arch_timer.h>
-
-#define __KVM_HAVE_ARCH_INTC_INITIALIZED
-
-#define KVM_USER_MEM_SLOTS 32
-#define KVM_HAVE_ONE_REG
-#define KVM_HALT_POLL_NS_DEFAULT 500000
-
-#define KVM_VCPU_MAX_FEATURES 2
-
-#include <kvm/arm_vgic.h>
-
-
-#ifdef CONFIG_ARM_GIC_V3
-#define KVM_MAX_VCPUS VGIC_V3_MAX_CPUS
-#else
-#define KVM_MAX_VCPUS VGIC_V2_MAX_CPUS
-#endif
-
-#define KVM_REQ_SLEEP \
- KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
-#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1)
-#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2)
-#define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3)
-
-DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
-
-static inline int kvm_arm_init_sve(void) { return 0; }
-
-u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
-int __attribute_const__ kvm_target_cpu(void);
-int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
-void kvm_reset_coprocs(struct kvm_vcpu *vcpu);
-
-struct kvm_vmid {
- /* The VMID generation used for the virt. memory system */
- u64 vmid_gen;
- u32 vmid;
-};
-
-struct kvm_arch {
- /* The last vcpu id that ran on each physical CPU */
- int __percpu *last_vcpu_ran;
-
- /*
- * Anything that is not used directly from assembly code goes
- * here.
- */
-
- /* The VMID generation used for the virt. memory system */
- struct kvm_vmid vmid;
-
- /* Stage-2 page table */
- pgd_t *pgd;
- phys_addr_t pgd_phys;
-
- /* Interrupt controller */
- struct vgic_dist vgic;
- int max_vcpus;
-
- /* Mandated version of PSCI */
- u32 psci_version;
-
- /*
- * If we encounter a data abort without valid instruction syndrome
- * information, report this to user space. User space can (and
- * should) opt in to this feature if KVM_CAP_ARM_NISV_TO_USER is
- * supported.
- */
- bool return_nisv_io_abort_to_user;
-};
-
-#define KVM_NR_MEM_OBJS 40
-
-/*
- * We don't want allocation failures within the mmu code, so we preallocate
- * enough memory for a single page fault in a cache.
- */
-struct kvm_mmu_memory_cache {
- int nobjs;
- void *objects[KVM_NR_MEM_OBJS];
-};
-
-struct kvm_vcpu_fault_info {
- u32 hsr; /* Hyp Syndrome Register */
- u32 hxfar; /* Hyp Data/Inst. Fault Address Register */
- u32 hpfar; /* Hyp IPA Fault Address Register */
-};
-
-/*
- * 0 is reserved as an invalid value.
- * Order should be kept in sync with the save/restore code.
- */
-enum vcpu_sysreg {
- __INVALID_SYSREG__,
- c0_MPIDR, /* MultiProcessor ID Register */
- c0_CSSELR, /* Cache Size Selection Register */
- c1_SCTLR, /* System Control Register */
- c1_ACTLR, /* Auxiliary Control Register */
- c1_CPACR, /* Coprocessor Access Control */
- c2_TTBR0, /* Translation Table Base Register 0 */
- c2_TTBR0_high, /* TTBR0 top 32 bits */
- c2_TTBR1, /* Translation Table Base Register 1 */
- c2_TTBR1_high, /* TTBR1 top 32 bits */
- c2_TTBCR, /* Translation Table Base Control R. */
- c3_DACR, /* Domain Access Control Register */
- c5_DFSR, /* Data Fault Status Register */
- c5_IFSR, /* Instruction Fault Status Register */
- c5_ADFSR, /* Auxilary Data Fault Status R */
- c5_AIFSR, /* Auxilary Instrunction Fault Status R */
- c6_DFAR, /* Data Fault Address Register */
- c6_IFAR, /* Instruction Fault Address Register */
- c7_PAR, /* Physical Address Register */
- c7_PAR_high, /* PAR top 32 bits */
- c9_L2CTLR, /* Cortex A15/A7 L2 Control Register */
- c10_PRRR, /* Primary Region Remap Register */
- c10_NMRR, /* Normal Memory Remap Register */
- c12_VBAR, /* Vector Base Address Register */
- c13_CID, /* Context ID Register */
- c13_TID_URW, /* Thread ID, User R/W */
- c13_TID_URO, /* Thread ID, User R/O */
- c13_TID_PRIV, /* Thread ID, Privileged */
- c14_CNTKCTL, /* Timer Control Register (PL1) */
- c10_AMAIR0, /* Auxilary Memory Attribute Indirection Reg0 */
- c10_AMAIR1, /* Auxilary Memory Attribute Indirection Reg1 */
- NR_CP15_REGS /* Number of regs (incl. invalid) */
-};
-
-struct kvm_cpu_context {
- struct kvm_regs gp_regs;
- struct vfp_hard_struct vfp;
- u32 cp15[NR_CP15_REGS];
-};
-
-struct kvm_host_data {
- struct kvm_cpu_context host_ctxt;
-};
-
-typedef struct kvm_host_data kvm_host_data_t;
-
-static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt)
-{
- /* The host's MPIDR is immutable, so let's set it up at boot time */
- cpu_ctxt->cp15[c0_MPIDR] = read_cpuid_mpidr();
-}
-
-struct vcpu_reset_state {
- unsigned long pc;
- unsigned long r0;
- bool be;
- bool reset;
-};
-
-struct kvm_vcpu_arch {
- struct kvm_cpu_context ctxt;
-
- int target; /* Processor target */
- DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES);
-
- /* The CPU type we expose to the VM */
- u32 midr;
-
- /* HYP trapping configuration */
- u32 hcr;
-
- /* Exception Information */
- struct kvm_vcpu_fault_info fault;
-
- /* Host FP context */
- struct kvm_cpu_context *host_cpu_context;
-
- /* VGIC state */
- struct vgic_cpu vgic_cpu;
- struct arch_timer_cpu timer_cpu;
-
- /*
- * Anything that is not used directly from assembly code goes
- * here.
- */
-
- /* vcpu power-off state */
- bool power_off;
-
- /* Don't run the guest (internal implementation need) */
- bool pause;
-
- /* Cache some mmu pages needed inside spinlock regions */
- struct kvm_mmu_memory_cache mmu_page_cache;
-
- struct vcpu_reset_state reset_state;
-
- /* Detect first run of a vcpu */
- bool has_run_once;
-};
-
-struct kvm_vm_stat {
- ulong remote_tlb_flush;
-};
-
-struct kvm_vcpu_stat {
- u64 halt_successful_poll;
- u64 halt_attempted_poll;
- u64 halt_poll_invalid;
- u64 halt_wakeup;
- u64 hvc_exit_stat;
- u64 wfe_exit_stat;
- u64 wfi_exit_stat;
- u64 mmio_exit_user;
- u64 mmio_exit_kernel;
- u64 exits;
-};
-
-#define vcpu_cp15(v,r) (v)->arch.ctxt.cp15[r]
-
-int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
-unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
-int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
-int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
-int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
-
-unsigned long __kvm_call_hyp(void *hypfn, ...);
-
-/*
- * The has_vhe() part doesn't get emitted, but is used for type-checking.
- */
-#define kvm_call_hyp(f, ...) \
- do { \
- if (has_vhe()) { \
- f(__VA_ARGS__); \
- } else { \
- __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__); \
- } \
- } while(0)
-
-#define kvm_call_hyp_ret(f, ...) \
- ({ \
- typeof(f(__VA_ARGS__)) ret; \
- \
- if (has_vhe()) { \
- ret = f(__VA_ARGS__); \
- } else { \
- ret = __kvm_call_hyp(kvm_ksym_ref(f), \
- ##__VA_ARGS__); \
- } \
- \
- ret; \
- })
-
-void force_vm_exit(const cpumask_t *mask);
-int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
- struct kvm_vcpu_events *events);
-
-int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
- struct kvm_vcpu_events *events);
-
-#define KVM_ARCH_WANT_MMU_NOTIFIER
-int kvm_unmap_hva_range(struct kvm *kvm,
- unsigned long start, unsigned long end);
-int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
-
-unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
-int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
-int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
-int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
-
-void kvm_arm_halt_guest(struct kvm *kvm);
-void kvm_arm_resume_guest(struct kvm *kvm);
-
-int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
-unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu);
-int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
-int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
-
-int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
- int exception_index);
-
-static inline void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run,
- int exception_index) {}
-
-/* MMIO helpers */
-void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data);
-unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len);
-
-int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
-int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
- phys_addr_t fault_ipa);
-
-static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
- unsigned long hyp_stack_ptr,
- unsigned long vector_ptr)
-{
- /*
- * Call initialization code, and switch to the full blown HYP
- * code. The init code doesn't need to preserve these
- * registers as r0-r3 are already callee saved according to
- * the AAPCS.
- * Note that we slightly misuse the prototype by casting the
- * stack pointer to a void *.
-
- * The PGDs are always passed as the third argument, in order
- * to be passed into r2-r3 to the init code (yes, this is
- * compliant with the PCS!).
- */
-
- __kvm_call_hyp((void*)hyp_stack_ptr, vector_ptr, pgd_ptr);
-}
-
-static inline void __cpu_init_stage2(void)
-{
- kvm_call_hyp(__init_stage2_translation);
-}
-
-static inline int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext)
-{
- return 0;
-}
-
-int kvm_perf_init(void);
-int kvm_perf_teardown(void);
-
-static inline long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu)
-{
- return SMCCC_RET_NOT_SUPPORTED;
-}
-
-static inline gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu)
-{
- return GPA_INVALID;
-}
-
-static inline void kvm_update_stolen_time(struct kvm_vcpu *vcpu)
-{
-}
-
-static inline void kvm_arm_pvtime_vcpu_init(struct kvm_vcpu_arch *vcpu_arch)
-{
-}
-
-static inline bool kvm_arm_is_pvtime_enabled(struct kvm_vcpu_arch *vcpu_arch)
-{
- return false;
-}
-
-void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
-
-struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
-
-static inline bool kvm_arch_requires_vhe(void) { return false; }
-static inline void kvm_arch_hardware_unsetup(void) {}
-static inline void kvm_arch_sync_events(struct kvm *kvm) {}
-static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
-static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
-static inline void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu) {}
-
-static inline void kvm_arm_init_debug(void) {}
-static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {}
-static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {}
-static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {}
-
-int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
- struct kvm_device_attr *attr);
-int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
- struct kvm_device_attr *attr);
-int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
- struct kvm_device_attr *attr);
-
-/*
- * VFP/NEON switching is all done by the hyp switch code, so no need to
- * coordinate with host context handling for this state:
- */
-static inline void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) {}
-static inline void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) {}
-static inline void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) {}
-
-static inline void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu) {}
-static inline void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu) {}
-
-#define KVM_BP_HARDEN_UNKNOWN -1
-#define KVM_BP_HARDEN_WA_NEEDED 0
-#define KVM_BP_HARDEN_NOT_REQUIRED 1
-
-static inline int kvm_arm_harden_branch_predictor(void)
-{
- switch(read_cpuid_part()) {
-#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
- case ARM_CPU_PART_BRAHMA_B15:
- case ARM_CPU_PART_CORTEX_A12:
- case ARM_CPU_PART_CORTEX_A15:
- case ARM_CPU_PART_CORTEX_A17:
- return KVM_BP_HARDEN_WA_NEEDED;
-#endif
- case ARM_CPU_PART_CORTEX_A7:
- return KVM_BP_HARDEN_NOT_REQUIRED;
- default:
- return KVM_BP_HARDEN_UNKNOWN;
- }
-}
-
-#define KVM_SSBD_UNKNOWN -1
-#define KVM_SSBD_FORCE_DISABLE 0
-#define KVM_SSBD_KERNEL 1
-#define KVM_SSBD_FORCE_ENABLE 2
-#define KVM_SSBD_MITIGATED 3
-
-static inline int kvm_arm_have_ssbd(void)
-{
- /* No way to detect it yet, pretend it is not there. */
- return KVM_SSBD_UNKNOWN;
-}
-
-static inline void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu) {}
-static inline void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) {}
-
-#define __KVM_HAVE_ARCH_VM_ALLOC
-struct kvm *kvm_arch_alloc_vm(void);
-void kvm_arch_free_vm(struct kvm *kvm);
-
-static inline int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type)
-{
- /*
- * On 32bit ARM, VMs get a static 40bit IPA stage2 setup,
- * so any non-zero value used as type is illegal.
- */
- if (type)
- return -EINVAL;
- return 0;
-}
-
-static inline int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature)
-{
- return -EINVAL;
-}
-
-static inline bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu)
-{
- return true;
-}
-
-#endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h
deleted file mode 100644
index 3c1b55ecc578..000000000000
--- a/arch/arm/include/asm/kvm_hyp.h
+++ /dev/null
@@ -1,127 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2015 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- */
-
-#ifndef __ARM_KVM_HYP_H__
-#define __ARM_KVM_HYP_H__
-
-#include <linux/compiler.h>
-#include <linux/kvm_host.h>
-#include <asm/cp15.h>
-#include <asm/kvm_arm.h>
-#include <asm/vfp.h>
-
-#define __hyp_text __section(.hyp.text) notrace
-
-#define __ACCESS_VFP(CRn) \
- "mrc", "mcr", __stringify(p10, 7, %0, CRn, cr0, 0), u32
-
-#define write_special(v, r) \
- asm volatile("msr " __stringify(r) ", %0" : : "r" (v))
-#define read_special(r) ({ \
- u32 __val; \
- asm volatile("mrs %0, " __stringify(r) : "=r" (__val)); \
- __val; \
-})
-
-#define TTBR0 __ACCESS_CP15_64(0, c2)
-#define TTBR1 __ACCESS_CP15_64(1, c2)
-#define VTTBR __ACCESS_CP15_64(6, c2)
-#define PAR __ACCESS_CP15_64(0, c7)
-#define CNTP_CVAL __ACCESS_CP15_64(2, c14)
-#define CNTV_CVAL __ACCESS_CP15_64(3, c14)
-#define CNTVOFF __ACCESS_CP15_64(4, c14)
-
-#define MIDR __ACCESS_CP15(c0, 0, c0, 0)
-#define CSSELR __ACCESS_CP15(c0, 2, c0, 0)
-#define VPIDR __ACCESS_CP15(c0, 4, c0, 0)
-#define VMPIDR __ACCESS_CP15(c0, 4, c0, 5)
-#define SCTLR __ACCESS_CP15(c1, 0, c0, 0)
-#define CPACR __ACCESS_CP15(c1, 0, c0, 2)
-#define HCR __ACCESS_CP15(c1, 4, c1, 0)
-#define HDCR __ACCESS_CP15(c1, 4, c1, 1)
-#define HCPTR __ACCESS_CP15(c1, 4, c1, 2)
-#define HSTR __ACCESS_CP15(c1, 4, c1, 3)
-#define TTBCR __ACCESS_CP15(c2, 0, c0, 2)
-#define HTCR __ACCESS_CP15(c2, 4, c0, 2)
-#define VTCR __ACCESS_CP15(c2, 4, c1, 2)
-#define DACR __ACCESS_CP15(c3, 0, c0, 0)
-#define DFSR __ACCESS_CP15(c5, 0, c0, 0)
-#define IFSR __ACCESS_CP15(c5, 0, c0, 1)
-#define ADFSR __ACCESS_CP15(c5, 0, c1, 0)
-#define AIFSR __ACCESS_CP15(c5, 0, c1, 1)
-#define HSR __ACCESS_CP15(c5, 4, c2, 0)
-#define DFAR __ACCESS_CP15(c6, 0, c0, 0)
-#define IFAR __ACCESS_CP15(c6, 0, c0, 2)
-#define HDFAR __ACCESS_CP15(c6, 4, c0, 0)
-#define HIFAR __ACCESS_CP15(c6, 4, c0, 2)
-#define HPFAR __ACCESS_CP15(c6, 4, c0, 4)
-#define ICIALLUIS __ACCESS_CP15(c7, 0, c1, 0)
-#define BPIALLIS __ACCESS_CP15(c7, 0, c1, 6)
-#define ICIMVAU __ACCESS_CP15(c7, 0, c5, 1)
-#define ATS1CPR __ACCESS_CP15(c7, 0, c8, 0)
-#define TLBIALLIS __ACCESS_CP15(c8, 0, c3, 0)
-#define TLBIALL __ACCESS_CP15(c8, 0, c7, 0)
-#define TLBIALLNSNHIS __ACCESS_CP15(c8, 4, c3, 4)
-#define PRRR __ACCESS_CP15(c10, 0, c2, 0)
-#define NMRR __ACCESS_CP15(c10, 0, c2, 1)
-#define AMAIR0 __ACCESS_CP15(c10, 0, c3, 0)
-#define AMAIR1 __ACCESS_CP15(c10, 0, c3, 1)
-#define VBAR __ACCESS_CP15(c12, 0, c0, 0)
-#define CID __ACCESS_CP15(c13, 0, c0, 1)
-#define TID_URW __ACCESS_CP15(c13, 0, c0, 2)
-#define TID_URO __ACCESS_CP15(c13, 0, c0, 3)
-#define TID_PRIV __ACCESS_CP15(c13, 0, c0, 4)
-#define HTPIDR __ACCESS_CP15(c13, 4, c0, 2)
-#define CNTKCTL __ACCESS_CP15(c14, 0, c1, 0)
-#define CNTP_CTL __ACCESS_CP15(c14, 0, c2, 1)
-#define CNTV_CTL __ACCESS_CP15(c14, 0, c3, 1)
-#define CNTHCTL __ACCESS_CP15(c14, 4, c1, 0)
-
-#define VFP_FPEXC __ACCESS_VFP(FPEXC)
-
-/* AArch64 compatibility macros, only for the timer so far */
-#define read_sysreg_el0(r) read_sysreg(r##_EL0)
-#define write_sysreg_el0(v, r) write_sysreg(v, r##_EL0)
-
-#define SYS_CNTP_CTL_EL0 CNTP_CTL
-#define SYS_CNTP_CVAL_EL0 CNTP_CVAL
-#define SYS_CNTV_CTL_EL0 CNTV_CTL
-#define SYS_CNTV_CVAL_EL0 CNTV_CVAL
-
-#define cntvoff_el2 CNTVOFF
-#define cnthctl_el2 CNTHCTL
-
-void __timer_enable_traps(struct kvm_vcpu *vcpu);
-void __timer_disable_traps(struct kvm_vcpu *vcpu);
-
-void __vgic_v2_save_state(struct kvm_vcpu *vcpu);
-void __vgic_v2_restore_state(struct kvm_vcpu *vcpu);
-
-void __sysreg_save_state(struct kvm_cpu_context *ctxt);
-void __sysreg_restore_state(struct kvm_cpu_context *ctxt);
-
-void __vgic_v3_save_state(struct kvm_vcpu *vcpu);
-void __vgic_v3_restore_state(struct kvm_vcpu *vcpu);
-void __vgic_v3_activate_traps(struct kvm_vcpu *vcpu);
-void __vgic_v3_deactivate_traps(struct kvm_vcpu *vcpu);
-void __vgic_v3_save_aprs(struct kvm_vcpu *vcpu);
-void __vgic_v3_restore_aprs(struct kvm_vcpu *vcpu);
-
-asmlinkage void __vfp_save_state(struct vfp_hard_struct *vfp);
-asmlinkage void __vfp_restore_state(struct vfp_hard_struct *vfp);
-static inline bool __vfp_enabled(void)
-{
- return !(read_sysreg(HCPTR) & (HCPTR_TCP(11) | HCPTR_TCP(10)));
-}
-
-void __hyp_text __banked_save_state(struct kvm_cpu_context *ctxt);
-void __hyp_text __banked_restore_state(struct kvm_cpu_context *ctxt);
-
-asmlinkage int __guest_enter(struct kvm_vcpu *vcpu,
- struct kvm_cpu_context *host);
-asmlinkage int __hyp_do_panic(const char *, int, u32);
-
-#endif /* __ARM_KVM_HYP_H__ */
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
deleted file mode 100644
index 0d84d50bf9ba..000000000000
--- a/arch/arm/include/asm/kvm_mmu.h
+++ /dev/null
@@ -1,435 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2012 - Virtual Open Systems and Columbia University
- * Author: Christoffer Dall <c.dall@virtualopensystems.com>
- */
-
-#ifndef __ARM_KVM_MMU_H__
-#define __ARM_KVM_MMU_H__
-
-#include <asm/memory.h>
-#include <asm/page.h>
-
-/*
- * We directly use the kernel VA for the HYP, as we can directly share
- * the mapping (HTTBR "covers" TTBR1).
- */
-#define kern_hyp_va(kva) (kva)
-
-/* Contrary to arm64, there is no need to generate a PC-relative address */
-#define hyp_symbol_addr(s) \
- ({ \
- typeof(s) *addr = &(s); \
- addr; \
- })
-
-#ifndef __ASSEMBLY__
-
-#include <linux/highmem.h>
-#include <asm/cacheflush.h>
-#include <asm/cputype.h>
-#include <asm/kvm_arm.h>
-#include <asm/kvm_hyp.h>
-#include <asm/pgalloc.h>
-#include <asm/stage2_pgtable.h>
-
-/* Ensure compatibility with arm64 */
-#define VA_BITS 32
-
-#define kvm_phys_shift(kvm) KVM_PHYS_SHIFT
-#define kvm_phys_size(kvm) (1ULL << kvm_phys_shift(kvm))
-#define kvm_phys_mask(kvm) (kvm_phys_size(kvm) - 1ULL)
-#define kvm_vttbr_baddr_mask(kvm) VTTBR_BADDR_MASK
-
-#define stage2_pgd_size(kvm) (PTRS_PER_S2_PGD * sizeof(pgd_t))
-
-int create_hyp_mappings(void *from, void *to, pgprot_t prot);
-int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
- void __iomem **kaddr,
- void __iomem **haddr);
-int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
- void **haddr);
-void free_hyp_pgds(void);
-
-void stage2_unmap_vm(struct kvm *kvm);
-int kvm_alloc_stage2_pgd(struct kvm *kvm);
-void kvm_free_stage2_pgd(struct kvm *kvm);
-int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
- phys_addr_t pa, unsigned long size, bool writable);
-
-int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run);
-
-void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu);
-
-phys_addr_t kvm_mmu_get_httbr(void);
-phys_addr_t kvm_get_idmap_vector(void);
-int kvm_mmu_init(void);
-void kvm_clear_hyp_idmap(void);
-
-#define kvm_mk_pmd(ptep) __pmd(__pa(ptep) | PMD_TYPE_TABLE)
-#define kvm_mk_pud(pmdp) __pud(__pa(pmdp) | PMD_TYPE_TABLE)
-#define kvm_mk_pgd(pudp) ({ BUILD_BUG(); 0; })
-
-#define kvm_pfn_pte(pfn, prot) pfn_pte(pfn, prot)
-#define kvm_pfn_pmd(pfn, prot) pfn_pmd(pfn, prot)
-#define kvm_pfn_pud(pfn, prot) (__pud(0))
-
-#define kvm_pud_pfn(pud) ({ WARN_ON(1); 0; })
-
-
-#define kvm_pmd_mkhuge(pmd) pmd_mkhuge(pmd)
-/* No support for pud hugepages */
-#define kvm_pud_mkhuge(pud) ( {WARN_ON(1); pud; })
-
-/*
- * The following kvm_*pud*() functions are provided strictly to allow
- * sharing code with arm64. They should never be called in practice.
- */
-static inline void kvm_set_s2pud_readonly(pud_t *pud)
-{
- WARN_ON(1);
-}
-
-static inline bool kvm_s2pud_readonly(pud_t *pud)
-{
- WARN_ON(1);
- return false;
-}
-
-static inline void kvm_set_pud(pud_t *pud, pud_t new_pud)
-{
- WARN_ON(1);
-}
-
-static inline pud_t kvm_s2pud_mkwrite(pud_t pud)
-{
- WARN_ON(1);
- return pud;
-}
-
-static inline pud_t kvm_s2pud_mkexec(pud_t pud)
-{
- WARN_ON(1);
- return pud;
-}
-
-static inline bool kvm_s2pud_exec(pud_t *pud)
-{
- WARN_ON(1);
- return false;
-}
-
-static inline pud_t kvm_s2pud_mkyoung(pud_t pud)
-{
- BUG();
- return pud;
-}
-
-static inline bool kvm_s2pud_young(pud_t pud)
-{
- WARN_ON(1);
- return false;
-}
-
-static inline pte_t kvm_s2pte_mkwrite(pte_t pte)
-{
- pte_val(pte) |= L_PTE_S2_RDWR;
- return pte;
-}
-
-static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd)
-{
- pmd_val(pmd) |= L_PMD_S2_RDWR;
- return pmd;
-}
-
-static inline pte_t kvm_s2pte_mkexec(pte_t pte)
-{
- pte_val(pte) &= ~L_PTE_XN;
- return pte;
-}
-
-static inline pmd_t kvm_s2pmd_mkexec(pmd_t pmd)
-{
- pmd_val(pmd) &= ~PMD_SECT_XN;
- return pmd;
-}
-
-static inline void kvm_set_s2pte_readonly(pte_t *pte)
-{
- pte_val(*pte) = (pte_val(*pte) & ~L_PTE_S2_RDWR) | L_PTE_S2_RDONLY;
-}
-
-static inline bool kvm_s2pte_readonly(pte_t *pte)
-{
- return (pte_val(*pte) & L_PTE_S2_RDWR) == L_PTE_S2_RDONLY;
-}
-
-static inline bool kvm_s2pte_exec(pte_t *pte)
-{
- return !(pte_val(*pte) & L_PTE_XN);
-}
-
-static inline void kvm_set_s2pmd_readonly(pmd_t *pmd)
-{
- pmd_val(*pmd) = (pmd_val(*pmd) & ~L_PMD_S2_RDWR) | L_PMD_S2_RDONLY;
-}
-
-static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
-{
- return (pmd_val(*pmd) & L_PMD_S2_RDWR) == L_PMD_S2_RDONLY;
-}
-
-static inline bool kvm_s2pmd_exec(pmd_t *pmd)
-{
- return !(pmd_val(*pmd) & PMD_SECT_XN);
-}
-
-static inline bool kvm_page_empty(void *ptr)
-{
- struct page *ptr_page = virt_to_page(ptr);
- return page_count(ptr_page) == 1;
-}
-
-#define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep)
-#define kvm_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp)
-#define kvm_pud_table_empty(kvm, pudp) false
-
-#define hyp_pte_table_empty(ptep) kvm_page_empty(ptep)
-#define hyp_pmd_table_empty(pmdp) kvm_page_empty(pmdp)
-#define hyp_pud_table_empty(pudp) false
-
-struct kvm;
-
-#define kvm_flush_dcache_to_poc(a,l) __cpuc_flush_dcache_area((a), (l))
-
-static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
-{
- return (vcpu_cp15(vcpu, c1_SCTLR) & 0b101) == 0b101;
-}
-
-static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
-{
- /*
- * Clean the dcache to the Point of Coherency.
- *
- * We need to do this through a kernel mapping (using the
- * user-space mapping has proved to be the wrong
- * solution). For that, we need to kmap one page at a time,
- * and iterate over the range.
- */
-
- VM_BUG_ON(size & ~PAGE_MASK);
-
- while (size) {
- void *va = kmap_atomic_pfn(pfn);
-
- kvm_flush_dcache_to_poc(va, PAGE_SIZE);
-
- size -= PAGE_SIZE;
- pfn++;
-
- kunmap_atomic(va);
- }
-}
-
-static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn,
- unsigned long size)
-{
- u32 iclsz;
-
- /*
- * If we are going to insert an instruction page and the icache is
- * either VIPT or PIPT, there is a potential problem where the host
- * (or another VM) may have used the same page as this guest, and we
- * read incorrect data from the icache. If we're using a PIPT cache,
- * we can invalidate just that page, but if we are using a VIPT cache
- * we need to invalidate the entire icache - damn shame - as written
- * in the ARM ARM (DDI 0406C.b - Page B3-1393).
- *
- * VIVT caches are tagged using both the ASID and the VMID and doesn't
- * need any kind of flushing (DDI 0406C.b - Page B3-1392).
- */
-
- VM_BUG_ON(size & ~PAGE_MASK);
-
- if (icache_is_vivt_asid_tagged())
- return;
-
- if (!icache_is_pipt()) {
- /* any kind of VIPT cache */
- __flush_icache_all();
- return;
- }
-
- /*
- * CTR IminLine contains Log2 of the number of words in the
- * cache line, so we can get the number of words as
- * 2 << (IminLine - 1). To get the number of bytes, we
- * multiply by 4 (the number of bytes in a 32-bit word), and
- * get 4 << (IminLine).
- */
- iclsz = 4 << (read_cpuid(CPUID_CACHETYPE) & 0xf);
-
- while (size) {
- void *va = kmap_atomic_pfn(pfn);
- void *end = va + PAGE_SIZE;
- void *addr = va;
-
- do {
- write_sysreg(addr, ICIMVAU);
- addr += iclsz;
- } while (addr < end);
-
- dsb(ishst);
- isb();
-
- size -= PAGE_SIZE;
- pfn++;
-
- kunmap_atomic(va);
- }
-
- /* Check if we need to invalidate the BTB */
- if ((read_cpuid_ext(CPUID_EXT_MMFR1) >> 28) != 4) {
- write_sysreg(0, BPIALLIS);
- dsb(ishst);
- isb();
- }
-}
-
-static inline void __kvm_flush_dcache_pte(pte_t pte)
-{
- void *va = kmap_atomic(pte_page(pte));
-
- kvm_flush_dcache_to_poc(va, PAGE_SIZE);
-
- kunmap_atomic(va);
-}
-
-static inline void __kvm_flush_dcache_pmd(pmd_t pmd)
-{
- unsigned long size = PMD_SIZE;
- kvm_pfn_t pfn = pmd_pfn(pmd);
-
- while (size) {
- void *va = kmap_atomic_pfn(pfn);
-
- kvm_flush_dcache_to_poc(va, PAGE_SIZE);
-
- pfn++;
- size -= PAGE_SIZE;
-
- kunmap_atomic(va);
- }
-}
-
-static inline void __kvm_flush_dcache_pud(pud_t pud)
-{
-}
-
-#define kvm_virt_to_phys(x) virt_to_idmap((unsigned long)(x))
-
-void kvm_set_way_flush(struct kvm_vcpu *vcpu);
-void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled);
-
-static inline bool __kvm_cpu_uses_extended_idmap(void)
-{
- return false;
-}
-
-static inline unsigned long __kvm_idmap_ptrs_per_pgd(void)
-{
- return PTRS_PER_PGD;
-}
-
-static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd,
- pgd_t *hyp_pgd,
- pgd_t *merged_hyp_pgd,
- unsigned long hyp_idmap_start) { }
-
-static inline unsigned int kvm_get_vmid_bits(void)
-{
- return 8;
-}
-
-/*
- * We are not in the kvm->srcu critical section most of the time, so we take
- * the SRCU read lock here. Since we copy the data from the user page, we
- * can immediately drop the lock again.
- */
-static inline int kvm_read_guest_lock(struct kvm *kvm,
- gpa_t gpa, void *data, unsigned long len)
-{
- int srcu_idx = srcu_read_lock(&kvm->srcu);
- int ret = kvm_read_guest(kvm, gpa, data, len);
-
- srcu_read_unlock(&kvm->srcu, srcu_idx);
-
- return ret;
-}
-
-static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa,
- const void *data, unsigned long len)
-{
- int srcu_idx = srcu_read_lock(&kvm->srcu);
- int ret = kvm_write_guest(kvm, gpa, data, len);
-
- srcu_read_unlock(&kvm->srcu, srcu_idx);
-
- return ret;
-}
-
-static inline void *kvm_get_hyp_vector(void)
-{
- switch(read_cpuid_part()) {
-#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
- case ARM_CPU_PART_CORTEX_A12:
- case ARM_CPU_PART_CORTEX_A17:
- {
- extern char __kvm_hyp_vector_bp_inv[];
- return kvm_ksym_ref(__kvm_hyp_vector_bp_inv);
- }
-
- case ARM_CPU_PART_BRAHMA_B15:
- case ARM_CPU_PART_CORTEX_A15:
- {
- extern char __kvm_hyp_vector_ic_inv[];
- return kvm_ksym_ref(__kvm_hyp_vector_ic_inv);
- }
-#endif
- default:
- {
- extern char __kvm_hyp_vector[];
- return kvm_ksym_ref(__kvm_hyp_vector);
- }
- }
-}
-
-static inline int kvm_map_vectors(void)
-{
- return 0;
-}
-
-static inline int hyp_map_aux_data(void)
-{
- return 0;
-}
-
-#define kvm_phys_to_vttbr(addr) (addr)
-
-static inline void kvm_set_ipa_limit(void) {}
-
-static __always_inline u64 kvm_get_vttbr(struct kvm *kvm)
-{
- struct kvm_vmid *vmid = &kvm->arch.vmid;
- u64 vmid_field, baddr;
-
- baddr = kvm->arch.pgd_phys;
- vmid_field = (u64)vmid->vmid << VTTBR_VMID_SHIFT;
- return kvm_phys_to_vttbr(baddr) | vmid_field;
-}
-
-#endif /* !__ASSEMBLY__ */
-
-#endif /* __ARM_KVM_MMU_H__ */
diff --git a/arch/arm/include/asm/kvm_ras.h b/arch/arm/include/asm/kvm_ras.h
deleted file mode 100644
index e9577292dfe4..000000000000
--- a/arch/arm/include/asm/kvm_ras.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2018 - Arm Ltd */
-
-#ifndef __ARM_KVM_RAS_H__
-#define __ARM_KVM_RAS_H__
-
-#include <linux/types.h>
-
-static inline int kvm_handle_guest_sea(phys_addr_t addr, unsigned int esr)
-{
- return -1;
-}
-
-#endif /* __ARM_KVM_RAS_H__ */
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index ad55ab068dbf..36805f94939e 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -104,26 +104,6 @@
*/
#define L_PGD_SWAPPER (_AT(pgdval_t, 1) << 55) /* swapper_pg_dir entry */
-/*
- * 2nd stage PTE definitions for LPAE.
- */
-#define L_PTE_S2_MT_UNCACHED (_AT(pteval_t, 0x0) << 2) /* strongly ordered */
-#define L_PTE_S2_MT_WRITETHROUGH (_AT(pteval_t, 0xa) << 2) /* normal inner write-through */
-#define L_PTE_S2_MT_WRITEBACK (_AT(pteval_t, 0xf) << 2) /* normal inner write-back */
-#define L_PTE_S2_MT_DEV_SHARED (_AT(pteval_t, 0x1) << 2) /* device */
-#define L_PTE_S2_MT_MASK (_AT(pteval_t, 0xf) << 2)
-
-#define L_PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[1] */
-#define L_PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */
-
-#define L_PMD_S2_RDONLY (_AT(pmdval_t, 1) << 6) /* HAP[1] */
-#define L_PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */
-
-/*
- * Hyp-mode PL2 PTE definitions for LPAE.
- */
-#define L_PTE_HYP L_PTE_USER
-
#ifndef __ASSEMBLY__
#define pud_none(pud) (!pud_val(pud))
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index eabcb48a7840..0483cf413315 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -80,9 +80,6 @@ extern void __pgd_error(const char *file, int line, pgd_t);
extern pgprot_t pgprot_user;
extern pgprot_t pgprot_kernel;
-extern pgprot_t pgprot_hyp_device;
-extern pgprot_t pgprot_s2;
-extern pgprot_t pgprot_s2_device;
#define _MOD_PROT(p, b) __pgprot(pgprot_val(p) | (b))
@@ -95,12 +92,6 @@ extern pgprot_t pgprot_s2_device;
#define PAGE_READONLY_EXEC _MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_RDONLY)
#define PAGE_KERNEL _MOD_PROT(pgprot_kernel, L_PTE_XN)
#define PAGE_KERNEL_EXEC pgprot_kernel
-#define PAGE_HYP _MOD_PROT(pgprot_kernel, L_PTE_HYP | L_PTE_XN)
-#define PAGE_HYP_EXEC _MOD_PROT(pgprot_kernel, L_PTE_HYP | L_PTE_RDONLY)
-#define PAGE_HYP_RO _MOD_PROT(pgprot_kernel, L_PTE_HYP | L_PTE_RDONLY | L_PTE_XN)
-#define PAGE_HYP_DEVICE _MOD_PROT(pgprot_hyp_device, L_PTE_HYP)
-#define PAGE_S2 _MOD_PROT(pgprot_s2, L_PTE_S2_RDONLY | L_PTE_XN)
-#define PAGE_S2_DEVICE _MOD_PROT(pgprot_s2_device, L_PTE_S2_RDONLY | L_PTE_XN)
#define __PAGE_NONE __pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN | L_PTE_NONE)
#define __PAGE_SHARED __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN)
diff --git a/arch/arm/include/asm/sections.h b/arch/arm/include/asm/sections.h
index 4ceb4f757d4d..700b8bcdf9bd 100644
--- a/arch/arm/include/asm/sections.h
+++ b/arch/arm/include/asm/sections.h
@@ -10,8 +10,6 @@ extern char __idmap_text_start[];
extern char __idmap_text_end[];
extern char __entry_text_start[];
extern char __entry_text_end[];
-extern char __hyp_idmap_text_start[];
-extern char __hyp_idmap_text_end[];
static inline bool in_entry_text(unsigned long addr)
{
@@ -22,9 +20,7 @@ static inline bool in_entry_text(unsigned long addr)
static inline bool in_idmap_text(unsigned long addr)
{
void *a = (void *)addr;
- return memory_contains(__idmap_text_start, __idmap_text_end, a, 1) ||
- memory_contains(__hyp_idmap_text_start, __hyp_idmap_text_end,
- a, 1);
+ return memory_contains(__idmap_text_start, __idmap_text_end, a, 1);
}
#endif /* _ASM_ARM_SECTIONS_H */
diff --git a/arch/arm/include/asm/stage2_pgtable.h b/arch/arm/include/asm/stage2_pgtable.h
deleted file mode 100644
index aaceec7855ec..000000000000
--- a/arch/arm/include/asm/stage2_pgtable.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2016 - ARM Ltd
- *
- * stage2 page table helpers
- */
-
-#ifndef __ARM_S2_PGTABLE_H_
-#define __ARM_S2_PGTABLE_H_
-
-/*
- * kvm_mmu_cache_min_pages() is the number of pages required
- * to install a stage-2 translation. We pre-allocate the entry
- * level table at VM creation. Since we have a 3 level page-table,
- * we need only two pages to add a new mapping.
- */
-#define kvm_mmu_cache_min_pages(kvm) 2
-
-#define stage2_pgd_none(kvm, pgd) pgd_none(pgd)
-#define stage2_pgd_clear(kvm, pgd) pgd_clear(pgd)
-#define stage2_pgd_present(kvm, pgd) pgd_present(pgd)
-#define stage2_pgd_populate(kvm, pgd, pud) pgd_populate(NULL, pgd, pud)
-#define stage2_pud_offset(kvm, pgd, address) pud_offset(pgd, address)
-#define stage2_pud_free(kvm, pud) do { } while (0)
-
-#define stage2_pud_none(kvm, pud) pud_none(pud)
-#define stage2_pud_clear(kvm, pud) pud_clear(pud)
-#define stage2_pud_present(kvm, pud) pud_present(pud)
-#define stage2_pud_populate(kvm, pud, pmd) pud_populate(NULL, pud, pmd)
-#define stage2_pmd_offset(kvm, pud, address) pmd_offset(pud, address)
-#define stage2_pmd_free(kvm, pmd) free_page((unsigned long)pmd)
-
-#define stage2_pud_huge(kvm, pud) pud_huge(pud)
-
-/* Open coded p*d_addr_end that can deal with 64bit addresses */
-static inline phys_addr_t
-stage2_pgd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
-{
- phys_addr_t boundary = (addr + PGDIR_SIZE) & PGDIR_MASK;
-
- return (boundary - 1 < end - 1) ? boundary : end;
-}
-
-#define stage2_pud_addr_end(kvm, addr, end) (end)
-
-static inline phys_addr_t
-stage2_pmd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
-{
- phys_addr_t boundary = (addr + PMD_SIZE) & PMD_MASK;
-
- return (boundary - 1 < end - 1) ? boundary : end;
-}
-
-#define stage2_pgd_index(kvm, addr) pgd_index(addr)
-
-#define stage2_pte_table_empty(kvm, ptep) kvm_page_empty(ptep)
-#define stage2_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp)
-#define stage2_pud_table_empty(kvm, pudp) false
-
-static inline bool kvm_stage2_has_pud(struct kvm *kvm)
-{
- return false;
-}
-
-#define S2_PMD_MASK PMD_MASK
-#define S2_PMD_SIZE PMD_SIZE
-#define S2_PUD_MASK PUD_MASK
-#define S2_PUD_SIZE PUD_SIZE
-
-static inline bool kvm_stage2_has_pmd(struct kvm *kvm)
-{
- return true;
-}
-
-#endif /* __ARM_S2_PGTABLE_H_ */
diff --git a/arch/arm/include/asm/virt.h b/arch/arm/include/asm/virt.h
index 17c26ccd126d..dd9697b2bde8 100644
--- a/arch/arm/include/asm/virt.h
+++ b/arch/arm/include/asm/virt.h
@@ -39,8 +39,6 @@ static inline void sync_boot_mode(void)
sync_cache_r(&__boot_cpu_mode);
}
-void __hyp_set_vectors(unsigned long phys_vector_base);
-void __hyp_reset_vectors(void);
#else
#define __boot_cpu_mode (SVC_MODE)
#define sync_boot_mode()
@@ -67,18 +65,6 @@ static inline bool is_kernel_in_hyp_mode(void)
return false;
}
-static inline bool has_vhe(void)
-{
- return false;
-}
-
-/* The section containing the hypervisor idmap text */
-extern char __hyp_idmap_text_start[];
-extern char __hyp_idmap_text_end[];
-
-/* The section containing the hypervisor text */
-extern char __hyp_text_start[];
-extern char __hyp_text_end[];
#endif
#else
@@ -87,9 +73,6 @@ extern char __hyp_text_end[];
#define HVC_SET_VECTORS 0
#define HVC_SOFT_RESTART 1
-#define HVC_RESET_VECTORS 2
-
-#define HVC_STUB_HCALL_NR 3
#endif /* __ASSEMBLY__ */
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
deleted file mode 100644
index 03cd7c19a683..000000000000
--- a/arch/arm/include/uapi/asm/kvm.h
+++ /dev/null
@@ -1,314 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * Copyright (C) 2012 - Virtual Open Systems and Columbia University
- * Author: Christoffer Dall <c.dall@virtualopensystems.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#ifndef __ARM_KVM_H__
-#define __ARM_KVM_H__
-
-#include <linux/types.h>
-#include <linux/psci.h>
-#include <asm/ptrace.h>
-
-#define __KVM_HAVE_GUEST_DEBUG
-#define __KVM_HAVE_IRQ_LINE
-#define __KVM_HAVE_READONLY_MEM
-#define __KVM_HAVE_VCPU_EVENTS
-
-#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
-
-#define KVM_REG_SIZE(id) \
- (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
-
-/* Valid for svc_regs, abt_regs, und_regs, irq_regs in struct kvm_regs */
-#define KVM_ARM_SVC_sp svc_regs[0]
-#define KVM_ARM_SVC_lr svc_regs[1]
-#define KVM_ARM_SVC_spsr svc_regs[2]
-#define KVM_ARM_ABT_sp abt_regs[0]
-#define KVM_ARM_ABT_lr abt_regs[1]
-#define KVM_ARM_ABT_spsr abt_regs[2]
-#define KVM_ARM_UND_sp und_regs[0]
-#define KVM_ARM_UND_lr und_regs[1]
-#define KVM_ARM_UND_spsr und_regs[2]
-#define KVM_ARM_IRQ_sp irq_regs[0]
-#define KVM_ARM_IRQ_lr irq_regs[1]
-#define KVM_ARM_IRQ_spsr irq_regs[2]
-
-/* Valid only for fiq_regs in struct kvm_regs */
-#define KVM_ARM_FIQ_r8 fiq_regs[0]
-#define KVM_ARM_FIQ_r9 fiq_regs[1]
-#define KVM_ARM_FIQ_r10 fiq_regs[2]
-#define KVM_ARM_FIQ_fp fiq_regs[3]
-#define KVM_ARM_FIQ_ip fiq_regs[4]
-#define KVM_ARM_FIQ_sp fiq_regs[5]
-#define KVM_ARM_FIQ_lr fiq_regs[6]
-#define KVM_ARM_FIQ_spsr fiq_regs[7]
-
-struct kvm_regs {
- struct pt_regs usr_regs; /* R0_usr - R14_usr, PC, CPSR */
- unsigned long svc_regs[3]; /* SP_svc, LR_svc, SPSR_svc */
- unsigned long abt_regs[3]; /* SP_abt, LR_abt, SPSR_abt */
- unsigned long und_regs[3]; /* SP_und, LR_und, SPSR_und */
- unsigned long irq_regs[3]; /* SP_irq, LR_irq, SPSR_irq */
- unsigned long fiq_regs[8]; /* R8_fiq - R14_fiq, SPSR_fiq */
-};
-
-/* Supported Processor Types */
-#define KVM_ARM_TARGET_CORTEX_A15 0
-#define KVM_ARM_TARGET_CORTEX_A7 1
-#define KVM_ARM_NUM_TARGETS 2
-
-/* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
-#define KVM_ARM_DEVICE_TYPE_SHIFT 0
-#define KVM_ARM_DEVICE_TYPE_MASK (0xffff << KVM_ARM_DEVICE_TYPE_SHIFT)
-#define KVM_ARM_DEVICE_ID_SHIFT 16
-#define KVM_ARM_DEVICE_ID_MASK (0xffff << KVM_ARM_DEVICE_ID_SHIFT)
-
-/* Supported device IDs */
-#define KVM_ARM_DEVICE_VGIC_V2 0
-
-/* Supported VGIC address types */
-#define KVM_VGIC_V2_ADDR_TYPE_DIST 0
-#define KVM_VGIC_V2_ADDR_TYPE_CPU 1
-
-#define KVM_VGIC_V2_DIST_SIZE 0x1000
-#define KVM_VGIC_V2_CPU_SIZE 0x2000
-
-/* Supported VGICv3 address types */
-#define KVM_VGIC_V3_ADDR_TYPE_DIST 2
-#define KVM_VGIC_V3_ADDR_TYPE_REDIST 3
-#define KVM_VGIC_ITS_ADDR_TYPE 4
-#define KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION 5
-
-#define KVM_VGIC_V3_DIST_SIZE SZ_64K
-#define KVM_VGIC_V3_REDIST_SIZE (2 * SZ_64K)
-#define KVM_VGIC_V3_ITS_SIZE (2 * SZ_64K)
-
-#define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */
-#define KVM_ARM_VCPU_PSCI_0_2 1 /* CPU uses PSCI v0.2 */
-
-struct kvm_vcpu_init {
- __u32 target;
- __u32 features[7];
-};
-
-struct kvm_sregs {
-};
-
-struct kvm_fpu {
-};
-
-struct kvm_guest_debug_arch {
-};
-
-struct kvm_debug_exit_arch {
-};
-
-struct kvm_sync_regs {
- /* Used with KVM_CAP_ARM_USER_IRQ */
- __u64 device_irq_level;
-};
-
-struct kvm_arch_memory_slot {
-};
-
-/* for KVM_GET/SET_VCPU_EVENTS */
-struct kvm_vcpu_events {
- struct {
- __u8 serror_pending;
- __u8 serror_has_esr;
- __u8 ext_dabt_pending;
- /* Align it to 8 bytes */
- __u8 pad[5];
- __u64 serror_esr;
- } exception;
- __u32 reserved[12];
-};
-
-/* If you need to interpret the index values, here is the key: */
-#define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000
-#define KVM_REG_ARM_COPROC_SHIFT 16
-#define KVM_REG_ARM_32_OPC2_MASK 0x0000000000000007
-#define KVM_REG_ARM_32_OPC2_SHIFT 0
-#define KVM_REG_ARM_OPC1_MASK 0x0000000000000078
-#define KVM_REG_ARM_OPC1_SHIFT 3
-#define KVM_REG_ARM_CRM_MASK 0x0000000000000780
-#define KVM_REG_ARM_CRM_SHIFT 7
-#define KVM_REG_ARM_32_CRN_MASK 0x0000000000007800
-#define KVM_REG_ARM_32_CRN_SHIFT 11
-/*
- * For KVM currently all guest registers are nonsecure, but we reserve a bit
- * in the encoding to distinguish secure from nonsecure for AArch32 system
- * registers that are banked by security. This is 1 for the secure banked
- * register, and 0 for the nonsecure banked register or if the register is
- * not banked by security.
- */
-#define KVM_REG_ARM_SECURE_MASK 0x0000000010000000
-#define KVM_REG_ARM_SECURE_SHIFT 28
-
-#define ARM_CP15_REG_SHIFT_MASK(x,n) \
- (((x) << KVM_REG_ARM_ ## n ## _SHIFT) & KVM_REG_ARM_ ## n ## _MASK)
-
-#define __ARM_CP15_REG(op1,crn,crm,op2) \
- (KVM_REG_ARM | (15 << KVM_REG_ARM_COPROC_SHIFT) | \
- ARM_CP15_REG_SHIFT_MASK(op1, OPC1) | \
- ARM_CP15_REG_SHIFT_MASK(crn, 32_CRN) | \
- ARM_CP15_REG_SHIFT_MASK(crm, CRM) | \
- ARM_CP15_REG_SHIFT_MASK(op2, 32_OPC2))
-
-#define ARM_CP15_REG32(...) (__ARM_CP15_REG(__VA_ARGS__) | KVM_REG_SIZE_U32)
-
-#define __ARM_CP15_REG64(op1,crm) \
- (__ARM_CP15_REG(op1, 0, crm, 0) | KVM_REG_SIZE_U64)
-#define ARM_CP15_REG64(...) __ARM_CP15_REG64(__VA_ARGS__)
-
-/* PL1 Physical Timer Registers */
-#define KVM_REG_ARM_PTIMER_CTL ARM_CP15_REG32(0, 14, 2, 1)
-#define KVM_REG_ARM_PTIMER_CNT ARM_CP15_REG64(0, 14)
-#define KVM_REG_ARM_PTIMER_CVAL ARM_CP15_REG64(2, 14)
-
-/* Virtual Timer Registers */
-#define KVM_REG_ARM_TIMER_CTL ARM_CP15_REG32(0, 14, 3, 1)
-#define KVM_REG_ARM_TIMER_CNT ARM_CP15_REG64(1, 14)
-#define KVM_REG_ARM_TIMER_CVAL ARM_CP15_REG64(3, 14)
-
-/* Normal registers are mapped as coprocessor 16. */
-#define KVM_REG_ARM_CORE (0x0010 << KVM_REG_ARM_COPROC_SHIFT)
-#define KVM_REG_ARM_CORE_REG(name) (offsetof(struct kvm_regs, name) / 4)
-
-/* Some registers need more space to represent values. */
-#define KVM_REG_ARM_DEMUX (0x0011 << KVM_REG_ARM_COPROC_SHIFT)
-#define KVM_REG_ARM_DEMUX_ID_MASK 0x000000000000FF00
-#define KVM_REG_ARM_DEMUX_ID_SHIFT 8
-#define KVM_REG_ARM_DEMUX_ID_CCSIDR (0x00 << KVM_REG_ARM_DEMUX_ID_SHIFT)
-#define KVM_REG_ARM_DEMUX_VAL_MASK 0x00000000000000FF
-#define KVM_REG_ARM_DEMUX_VAL_SHIFT 0
-
-/* VFP registers: we could overload CP10 like ARM does, but that's ugly. */
-#define KVM_REG_ARM_VFP (0x0012 << KVM_REG_ARM_COPROC_SHIFT)
-#define KVM_REG_ARM_VFP_MASK 0x000000000000FFFF
-#define KVM_REG_ARM_VFP_BASE_REG 0x0
-#define KVM_REG_ARM_VFP_FPSID 0x1000
-#define KVM_REG_ARM_VFP_FPSCR 0x1001
-#define KVM_REG_ARM_VFP_MVFR1 0x1006
-#define KVM_REG_ARM_VFP_MVFR0 0x1007
-#define KVM_REG_ARM_VFP_FPEXC 0x1008
-#define KVM_REG_ARM_VFP_FPINST 0x1009
-#define KVM_REG_ARM_VFP_FPINST2 0x100A
-
-/* KVM-as-firmware specific pseudo-registers */
-#define KVM_REG_ARM_FW (0x0014 << KVM_REG_ARM_COPROC_SHIFT)
-#define KVM_REG_ARM_FW_REG(r) (KVM_REG_ARM | KVM_REG_SIZE_U64 | \
- KVM_REG_ARM_FW | ((r) & 0xffff))
-#define KVM_REG_ARM_PSCI_VERSION KVM_REG_ARM_FW_REG(0)
-#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1 KVM_REG_ARM_FW_REG(1)
- /* Higher values mean better protection. */
-#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL 0
-#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL 1
-#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED 2
-#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2 KVM_REG_ARM_FW_REG(2)
- /* Higher values mean better protection. */
-#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL 0
-#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN 1
-#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL 2
-#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED 3
-#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED (1U << 4)
-
-/* Device Control API: ARM VGIC */
-#define KVM_DEV_ARM_VGIC_GRP_ADDR 0
-#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1
-#define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2
-#define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32
-#define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
-#define KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32
-#define KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \
- (0xffffffffULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT)
-#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0
-#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
-#define KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0xffff)
-#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3
-#define KVM_DEV_ARM_VGIC_GRP_CTRL 4
-#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
-#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
-#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7
-#define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8
-#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10
-#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
- (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
-#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff
-#define VGIC_LEVEL_INFO_LINE_LEVEL 0
-
-/* Device Control API on vcpu fd */
-#define KVM_ARM_VCPU_PMU_V3_CTRL 0
-#define KVM_ARM_VCPU_PMU_V3_IRQ 0
-#define KVM_ARM_VCPU_PMU_V3_INIT 1
-#define KVM_ARM_VCPU_TIMER_CTRL 1
-#define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0
-#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1
-
-#define KVM_DEV_ARM_VGIC_CTRL_INIT 0
-#define KVM_DEV_ARM_ITS_SAVE_TABLES 1
-#define KVM_DEV_ARM_ITS_RESTORE_TABLES 2
-#define KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES 3
-#define KVM_DEV_ARM_ITS_CTRL_RESET 4
-
-/* KVM_IRQ_LINE irq field index values */
-#define KVM_ARM_IRQ_VCPU2_SHIFT 28
-#define KVM_ARM_IRQ_VCPU2_MASK 0xf
-#define KVM_ARM_IRQ_TYPE_SHIFT 24
-#define KVM_ARM_IRQ_TYPE_MASK 0xf
-#define KVM_ARM_IRQ_VCPU_SHIFT 16
-#define KVM_ARM_IRQ_VCPU_MASK 0xff
-#define KVM_ARM_IRQ_NUM_SHIFT 0
-#define KVM_ARM_IRQ_NUM_MASK 0xffff
-
-/* irq_type field */
-#define KVM_ARM_IRQ_TYPE_CPU 0
-#define KVM_ARM_IRQ_TYPE_SPI 1
-#define KVM_ARM_IRQ_TYPE_PPI 2
-
-/* out-of-kernel GIC cpu interrupt injection irq_number field */
-#define KVM_ARM_IRQ_CPU_IRQ 0
-#define KVM_ARM_IRQ_CPU_FIQ 1
-
-/*
- * This used to hold the highest supported SPI, but it is now obsolete
- * and only here to provide source code level compatibility with older
- * userland. The highest SPI number can be set via KVM_DEV_ARM_VGIC_GRP_NR_IRQS.
- */
-#ifndef __KERNEL__
-#define KVM_ARM_IRQ_GIC_MAX 127
-#endif
-
-/* One single KVM irqchip, ie. the VGIC */
-#define KVM_NR_IRQCHIPS 1
-
-/* PSCI interface */
-#define KVM_PSCI_FN_BASE 0x95c1ba5e
-#define KVM_PSCI_FN(n) (KVM_PSCI_FN_BASE + (n))
-
-#define KVM_PSCI_FN_CPU_SUSPEND KVM_PSCI_FN(0)
-#define KVM_PSCI_FN_CPU_OFF KVM_PSCI_FN(1)
-#define KVM_PSCI_FN_CPU_ON KVM_PSCI_FN(2)
-#define KVM_PSCI_FN_MIGRATE KVM_PSCI_FN(3)
-
-#define KVM_PSCI_RET_SUCCESS PSCI_RET_SUCCESS
-#define KVM_PSCI_RET_NI PSCI_RET_NOT_SUPPORTED
-#define KVM_PSCI_RET_INVAL PSCI_RET_INVALID_PARAMS
-#define KVM_PSCI_RET_DENIED PSCI_RET_DENIED
-
-#endif /* __ARM_KVM_H__ */
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index c773b829ee8e..c036a4a2f8e2 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -11,9 +11,6 @@
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/dma-mapping.h>
-#ifdef CONFIG_KVM_ARM_HOST
-#include <linux/kvm_host.h>
-#endif
#include <asm/cacheflush.h>
#include <asm/glue-df.h>
#include <asm/glue-pf.h>
@@ -167,14 +164,6 @@ int main(void)
DEFINE(CACHE_WRITEBACK_ORDER, __CACHE_WRITEBACK_ORDER);
DEFINE(CACHE_WRITEBACK_GRANULE, __CACHE_WRITEBACK_GRANULE);
BLANK();
-#ifdef CONFIG_KVM_ARM_HOST
- DEFINE(VCPU_GUEST_CTXT, offsetof(struct kvm_vcpu, arch.ctxt));
- DEFINE(VCPU_HOST_CTXT, offsetof(struct kvm_vcpu, arch.host_cpu_context));
- DEFINE(CPU_CTXT_VFP, offsetof(struct kvm_cpu_context, vfp));
- DEFINE(CPU_CTXT_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs));
- DEFINE(GP_REGS_USR, offsetof(struct kvm_regs, usr_regs));
-#endif
- BLANK();
#ifdef CONFIG_VDSO
DEFINE(VDSO_DATA_SIZE, sizeof(union vdso_data_store));
#endif
diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S
index 6607fa817bba..26d8e03b1dd3 100644
--- a/arch/arm/kernel/hyp-stub.S
+++ b/arch/arm/kernel/hyp-stub.S
@@ -189,19 +189,19 @@ ARM_BE8(orr r7, r7, #(1 << 25)) @ HSCTLR.EE
ENDPROC(__hyp_stub_install_secondary)
__hyp_stub_do_trap:
+#ifdef ZIMAGE
teq r0, #HVC_SET_VECTORS
bne 1f
+ /* Only the ZIMAGE stubs can change the HYP vectors */
mcr p15, 4, r1, c12, c0, 0 @ set HVBAR
b __hyp_stub_exit
+#endif
1: teq r0, #HVC_SOFT_RESTART
- bne 1f
+ bne 2f
bx r1
-1: teq r0, #HVC_RESET_VECTORS
- beq __hyp_stub_exit
-
- ldr r0, =HVC_STUB_ERR
+2: ldr r0, =HVC_STUB_ERR
__ERET
__hyp_stub_exit:
@@ -210,26 +210,9 @@ __hyp_stub_exit:
ENDPROC(__hyp_stub_do_trap)
/*
- * __hyp_set_vectors: Call this after boot to set the initial hypervisor
- * vectors as part of hypervisor installation. On an SMP system, this should
- * be called on each CPU.
- *
- * r0 must be the physical address of the new vector table (which must lie in
- * the bottom 4GB of physical address space.
- *
- * r0 must be 32-byte aligned.
- *
- * Before calling this, you must check that the stub hypervisor is installed
- * everywhere, by waiting for any secondary CPUs to be brought up and then
- * checking that BOOT_CPU_MODE_HAVE_HYP(__boot_cpu_mode) is true.
- *
- * If not, there is a pre-existing hypervisor, some CPUs failed to boot, or
- * something else went wrong... in such cases, trying to install a new
- * hypervisor is unlikely to work as desired.
- *
- * When you call into your shiny new hypervisor, sp_hyp will contain junk,
- * so you will need to set that to something sensible at the new hypervisor's
- * initialisation entry point.
+ * __hyp_set_vectors is only used when ZIMAGE must bounce between HYP
+ * and SVC. For the kernel itself, the vectors are set once and for
+ * all by the stubs.
*/
ENTRY(__hyp_set_vectors)
mov r1, r0
@@ -245,12 +228,6 @@ ENTRY(__hyp_soft_restart)
ret lr
ENDPROC(__hyp_soft_restart)
-ENTRY(__hyp_reset_vectors)
- mov r0, #HVC_RESET_VECTORS
- __HVC(0)
- ret lr
-ENDPROC(__hyp_reset_vectors)
-
#ifndef ZIMAGE
.align 2
.L__boot_cpu_mode_offset:
diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S
index 21b8b271c80d..6d2be994ae58 100644
--- a/arch/arm/kernel/vmlinux-xip.lds.S
+++ b/arch/arm/kernel/vmlinux-xip.lds.S
@@ -162,14 +162,6 @@ SECTIONS
ASSERT((__proc_info_end - __proc_info_begin), "missing CPU support")
ASSERT((__arch_info_end - __arch_info_begin), "no machine record defined")
-/*
- * The HYP init code can't be more than a page long,
- * and should not cross a page boundary.
- * The above comment applies as well.
- */
-ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & PAGE_MASK) <= PAGE_SIZE,
- "HYP init code too big or misaligned")
-
#ifdef CONFIG_XIP_DEFLATED_DATA
/*
* The .bss is used as a stack area for __inflate_kernel_data() whose stack
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 319ccb10846a..88a720da443b 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -170,12 +170,4 @@ __start_rodata_section_aligned = ALIGN(__start_rodata, 1 << SECTION_SHIFT);
ASSERT((__proc_info_end - __proc_info_begin), "missing CPU support")
ASSERT((__arch_info_end - __arch_info_begin), "no machine record defined")
-/*
- * The HYP init code can't be more than a page long,
- * and should not cross a page boundary.
- * The above comment applies as well.
- */
-ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & PAGE_MASK) <= PAGE_SIZE,
- "HYP init code too big or misaligned")
-
#endif /* CONFIG_XIP_KERNEL */
diff --git a/arch/arm/kernel/vmlinux.lds.h b/arch/arm/kernel/vmlinux.lds.h
index 8247bc15addc..381a8e105fa5 100644
--- a/arch/arm/kernel/vmlinux.lds.h
+++ b/arch/arm/kernel/vmlinux.lds.h
@@ -31,20 +31,11 @@
*(.proc.info.init) \
__proc_info_end = .;
-#define HYPERVISOR_TEXT \
- __hyp_text_start = .; \
- *(.hyp.text) \
- __hyp_text_end = .;
-
#define IDMAP_TEXT \
ALIGN_FUNCTION(); \
__idmap_text_start = .; \
*(.idmap.text) \
__idmap_text_end = .; \
- . = ALIGN(PAGE_SIZE); \
- __hyp_idmap_text_start = .; \
- *(.hyp.idmap.text) \
- __hyp_idmap_text_end = .;
#define ARM_DISCARD \
*(.ARM.exidx.exit.text) \
@@ -72,7 +63,6 @@
SCHED_TEXT \
CPUIDLE_TEXT \
LOCK_TEXT \
- HYPERVISOR_TEXT \
KPROBES_TEXT \
*(.gnu.warning) \
*(.glue_7) \
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
deleted file mode 100644
index f591026347a5..000000000000
--- a/arch/arm/kvm/Kconfig
+++ /dev/null
@@ -1,59 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# KVM configuration
-#
-
-source "virt/kvm/Kconfig"
-source "virt/lib/Kconfig"
-
-menuconfig VIRTUALIZATION
- bool "Virtualization"
- ---help---
- Say Y here to get to see options for using your Linux host to run
- other operating systems inside virtual machines (guests).
- This option alone does not add any kernel code.
-
- If you say N, all options in this submenu will be skipped and
- disabled.
-
-if VIRTUALIZATION
-
-config KVM
- bool "Kernel-based Virtual Machine (KVM) support"
- depends on MMU && OF
- select PREEMPT_NOTIFIERS
- select ARM_GIC
- select ARM_GIC_V3
- select ARM_GIC_V3_ITS
- select HAVE_KVM_CPU_RELAX_INTERCEPT
- select HAVE_KVM_ARCH_TLB_FLUSH_ALL
- select KVM_MMIO
- select KVM_ARM_HOST
- select KVM_GENERIC_DIRTYLOG_READ_PROTECT
- select SRCU
- select MMU_NOTIFIER
- select KVM_VFIO
- select HAVE_KVM_EVENTFD
- select HAVE_KVM_IRQFD
- select HAVE_KVM_IRQCHIP
- select HAVE_KVM_IRQ_ROUTING
- select HAVE_KVM_MSI
- select IRQ_BYPASS_MANAGER
- select HAVE_KVM_IRQ_BYPASS
- depends on ARM_VIRT_EXT && ARM_LPAE && ARM_ARCH_TIMER
- ---help---
- Support hosting virtualized guest machines.
-
- This module provides access to the hardware capabilities through
- a character device node named /dev/kvm.
-
- If unsure, say N.
-
-config KVM_ARM_HOST
- bool
- ---help---
- Provides host support for ARM processors.
-
-source "drivers/vhost/Kconfig"
-
-endif # VIRTUALIZATION
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
deleted file mode 100644
index e442d82821df..000000000000
--- a/arch/arm/kvm/Makefile
+++ /dev/null
@@ -1,43 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Makefile for Kernel-based Virtual Machine module
-#
-
-plus_virt := $(call as-instr,.arch_extension virt,+virt)
-ifeq ($(plus_virt),+virt)
- plus_virt_def := -DREQUIRES_VIRT=1
-endif
-
-KVM := ../../../virt/kvm
-
-ccflags-y += -I $(srctree)/$(src) -I $(srctree)/virt/kvm/arm/vgic
-CFLAGS_$(KVM)/arm/arm.o := $(plus_virt_def)
-
-AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt)
-AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt)
-
-kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o
-
-obj-$(CONFIG_KVM_ARM_HOST) += hyp/
-
-obj-y += kvm-arm.o init.o interrupts.o
-obj-y += handle_exit.o guest.o emulate.o reset.o
-obj-y += coproc.o coproc_a15.o coproc_a7.o vgic-v3-coproc.o
-obj-y += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o
-obj-y += $(KVM)/arm/psci.o $(KVM)/arm/perf.o $(KVM)/arm/hypercalls.o
-obj-y += $(KVM)/arm/aarch32.o
-
-obj-y += $(KVM)/arm/vgic/vgic.o
-obj-y += $(KVM)/arm/vgic/vgic-init.o
-obj-y += $(KVM)/arm/vgic/vgic-irqfd.o
-obj-y += $(KVM)/arm/vgic/vgic-v2.o
-obj-y += $(KVM)/arm/vgic/vgic-v3.o
-obj-y += $(KVM)/arm/vgic/vgic-v4.o
-obj-y += $(KVM)/arm/vgic/vgic-mmio.o
-obj-y += $(KVM)/arm/vgic/vgic-mmio-v2.o
-obj-y += $(KVM)/arm/vgic/vgic-mmio-v3.o
-obj-y += $(KVM)/arm/vgic/vgic-kvm-device.o
-obj-y += $(KVM)/arm/vgic/vgic-its.o
-obj-y += $(KVM)/arm/vgic/vgic-debug.o
-obj-y += $(KVM)/irqchip.o
-obj-y += $(KVM)/arm/arch_timer.o
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
deleted file mode 100644
index 07745ee022a1..000000000000
--- a/arch/arm/kvm/coproc.c
+++ /dev/null
@@ -1,1455 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2012 - Virtual Open Systems and Columbia University
- * Authors: Rusty Russell <rusty@rustcorp.com.au>
- * Christoffer Dall <c.dall@virtualopensystems.com>
- */
-
-#include <linux/bsearch.h>
-#include <linux/mm.h>
-#include <linux/kvm_host.h>
-#include <linux/uaccess.h>
-#include <asm/kvm_arm.h>
-#include <asm/kvm_host.h>
-#include <asm/kvm_emulate.h>
-#include <asm/kvm_coproc.h>
-#include <asm/kvm_mmu.h>
-#include <asm/cacheflush.h>
-#include <asm/cputype.h>
-#include <trace/events/kvm.h>
-#include <asm/vfp.h>
-#include "../vfp/vfpinstr.h"
-
-#define CREATE_TRACE_POINTS
-#include "trace.h"
-#include "coproc.h"
-
-
-/******************************************************************************
- * Co-processor emulation
- *****************************************************************************/
-
-static bool write_to_read_only(struct kvm_vcpu *vcpu,
- const struct coproc_params *params)
-{
- WARN_ONCE(1, "CP15 write to read-only register\n");
- print_cp_instr(params);
- kvm_inject_undefined(vcpu);
- return false;
-}
-
-static bool read_from_write_only(struct kvm_vcpu *vcpu,
- const struct coproc_params *params)
-{
- WARN_ONCE(1, "CP15 read to write-only register\n");
- print_cp_instr(params);
- kvm_inject_undefined(vcpu);
- return false;
-}
-
-/* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */
-static u32 cache_levels;
-
-/* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */
-#define CSSELR_MAX 12
-
-/*
- * kvm_vcpu_arch.cp15 holds cp15 registers as an array of u32, but some
- * of cp15 registers can be viewed either as couple of two u32 registers
- * or one u64 register. Current u64 register encoding is that least
- * significant u32 word is followed by most significant u32 word.
- */
-static inline void vcpu_cp15_reg64_set(struct kvm_vcpu *vcpu,
- const struct coproc_reg *r,
- u64 val)
-{
- vcpu_cp15(vcpu, r->reg) = val & 0xffffffff;
- vcpu_cp15(vcpu, r->reg + 1) = val >> 32;
-}
-
-static inline u64 vcpu_cp15_reg64_get(struct kvm_vcpu *vcpu,
- const struct coproc_reg *r)
-{
- u64 val;
-
- val = vcpu_cp15(vcpu, r->reg + 1);
- val = val << 32;
- val = val | vcpu_cp15(vcpu, r->reg);
- return val;
-}
-
-int kvm_handle_cp10_id(struct kvm_vcpu *vcpu, struct kvm_run *run)
-{
- kvm_inject_undefined(vcpu);
- return 1;
-}
-
-int kvm_handle_cp_0_13_access(struct kvm_vcpu *vcpu, struct kvm_run *run)
-{
- /*
- * We can get here, if the host has been built without VFPv3 support,
- * but the guest attempted a floating point operation.
- */
- kvm_inject_undefined(vcpu);
- return 1;
-}
-
-int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run)
-{
- kvm_inject_undefined(vcpu);
- return 1;
-}
-
-static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
-{
- /*
- * Compute guest MPIDR. We build a virtual cluster out of the
- * vcpu_id, but we read the 'U' bit from the underlying
- * hardware directly.
- */
- vcpu_cp15(vcpu, c0_MPIDR) = ((read_cpuid_mpidr() & MPIDR_SMP_BITMASK) |
- ((vcpu->vcpu_id >> 2) << MPIDR_LEVEL_BITS) |
- (vcpu->vcpu_id & 3));
-}
-
-/* TRM entries A7:4.3.31 A15:4.3.28 - RO WI */
-static bool access_actlr(struct kvm_vcpu *vcpu,
- const struct coproc_params *p,
- const struct coproc_reg *r)
-{
- if (p->is_write)
- return ignore_write(vcpu, p);
-
- *vcpu_reg(vcpu, p->Rt1) = vcpu_cp15(vcpu, c1_ACTLR);
- return true;
-}
-
-/* TRM entries A7:4.3.56, A15:4.3.60 - R/O. */
-static bool access_cbar(struct kvm_vcpu *vcpu,
- const struct coproc_params *p,
- const struct coproc_reg *r)
-{
- if (p->is_write)
- return write_to_read_only(vcpu, p);
- return read_zero(vcpu, p);
-}
-
-/* TRM entries A7:4.3.49, A15:4.3.48 - R/O WI */
-static bool access_l2ctlr(struct kvm_vcpu *vcpu,
- const struct coproc_params *p,
- const struct coproc_reg *r)
-{
- if (p->is_write)
- return ignore_write(vcpu, p);
-
- *vcpu_reg(vcpu, p->Rt1) = vcpu_cp15(vcpu, c9_L2CTLR);
- return true;
-}
-
-static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
-{
- u32 l2ctlr, ncores;
-
- asm volatile("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr));
- l2ctlr &= ~(3 << 24);
- ncores = atomic_read(&vcpu->kvm->online_vcpus) - 1;
- /* How many cores in the current cluster and the next ones */
- ncores -= (vcpu->vcpu_id & ~3);
- /* Cap it to the maximum number of cores in a single cluster */
- ncores = min(ncores, 3U);
- l2ctlr |= (ncores & 3) << 24;
-
- vcpu_cp15(vcpu, c9_L2CTLR) = l2ctlr;
-}
-
-static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
-{
- u32 actlr;
-
- /* ACTLR contains SMP bit: make sure you create all cpus first! */
- asm volatile("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr));
- /* Make the SMP bit consistent with the guest configuration */
- if (atomic_read(&vcpu->kvm->online_vcpus) > 1)
- actlr |= 1U << 6;
- else
- actlr &= ~(1U << 6);
-
- vcpu_cp15(vcpu, c1_ACTLR) = actlr;
-}
-
-/*
- * TRM entries: A7:4.3.50, A15:4.3.49
- * R/O WI (even if NSACR.NS_L2ERR, a write of 1 is ignored).
- */
-static bool access_l2ectlr(struct kvm_vcpu *vcpu,
- const struct coproc_params *p,
- const struct coproc_reg *r)
-{
- if (p->is_write)
- return ignore_write(vcpu, p);
-
- *vcpu_reg(vcpu, p->Rt1) = 0;
- return true;
-}
-
-/*
- * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized).
- */
-static bool access_dcsw(struct kvm_vcpu *vcpu,
- const struct coproc_params *p,
- const struct coproc_reg *r)
-{
- if (!p->is_write)
- return read_from_write_only(vcpu, p);
-
- kvm_set_way_flush(vcpu);
- return true;
-}
-
-/*
- * Generic accessor for VM registers. Only called as long as HCR_TVM
- * is set. If the guest enables the MMU, we stop trapping the VM
- * sys_regs and leave it in complete control of the caches.
- *
- * Used by the cpu-specific code.
- */
-bool access_vm_reg(struct kvm_vcpu *vcpu,
- const struct coproc_params *p,
- const struct coproc_reg *r)
-{
- bool was_enabled = vcpu_has_cache_enabled(vcpu);
-
- BUG_ON(!p->is_write);
-
- vcpu_cp15(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt1);
- if (p->is_64bit)
- vcpu_cp15(vcpu, r->reg + 1) = *vcpu_reg(vcpu, p->Rt2);
-
- kvm_toggle_cache(vcpu, was_enabled);
- return true;
-}
-
-static bool access_gic_sgi(struct kvm_vcpu *vcpu,
- const struct coproc_params *p,
- const struct coproc_reg *r)
-{
- u64 reg;
- bool g1;
-
- if (!p->is_write)
- return read_from_write_only(vcpu, p);
-
- reg = (u64)*vcpu_reg(vcpu, p->Rt2) << 32;
- reg |= *vcpu_reg(vcpu, p->Rt1) ;
-
- /*
- * In a system where GICD_CTLR.DS=1, a ICC_SGI0R access generates
- * Group0 SGIs only, while ICC_SGI1R can generate either group,
- * depending on the SGI configuration. ICC_ASGI1R is effectively
- * equivalent to ICC_SGI0R, as there is no "alternative" secure
- * group.
- */
- switch (p->Op1) {
- default: /* Keep GCC quiet */
- case 0: /* ICC_SGI1R */
- g1 = true;
- break;
- case 1: /* ICC_ASGI1R */
- case 2: /* ICC_SGI0R */
- g1 = false;
- break;
- }
-
- vgic_v3_dispatch_sgi(vcpu, reg, g1);
-
- return true;
-}
-
-static bool access_gic_sre(struct kvm_vcpu *vcpu,
- const struct coproc_params *p,
- const struct coproc_reg *r)
-{
- if (p->is_write)
- return ignore_write(vcpu, p);
-
- *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.vgic_cpu.vgic_v3.vgic_sre;
-
- return true;
-}
-
-static bool access_cntp_tval(struct kvm_vcpu *vcpu,
- const struct coproc_params *p,
- const struct coproc_reg *r)
-{
- u32 val;
-
- if (p->is_write) {
- val = *vcpu_reg(vcpu, p->Rt1);
- kvm_arm_timer_write_sysreg(vcpu,
- TIMER_PTIMER, TIMER_REG_TVAL, val);
- } else {
- val = kvm_arm_timer_read_sysreg(vcpu,
- TIMER_PTIMER, TIMER_REG_TVAL);
- *vcpu_reg(vcpu, p->Rt1) = val;
- }
-
- return true;
-}
-
-static bool access_cntp_ctl(struct kvm_vcpu *vcpu,
- const struct coproc_params *p,
- const struct coproc_reg *r)
-{
- u32 val;
-
- if (p->is_write) {
- val = *vcpu_reg(vcpu, p->Rt1);
- kvm_arm_timer_write_sysreg(vcpu,
- TIMER_PTIMER, TIMER_REG_CTL, val);
- } else {
- val = kvm_arm_timer_read_sysreg(vcpu,
- TIMER_PTIMER, TIMER_REG_CTL);
- *vcpu_reg(vcpu, p->Rt1) = val;
- }
-
- return true;
-}
-
-static bool access_cntp_cval(struct kvm_vcpu *vcpu,
- const struct coproc_params *p,
- const struct coproc_reg *r)
-{
- u64 val;
-
- if (p->is_write) {
- val = (u64)*vcpu_reg(vcpu, p->Rt2) << 32;
- val |= *vcpu_reg(vcpu, p->Rt1);
- kvm_arm_timer_write_sysreg(vcpu,
- TIMER_PTIMER, TIMER_REG_CVAL, val);
- } else {
- val = kvm_arm_timer_read_sysreg(vcpu,
- TIMER_PTIMER, TIMER_REG_CVAL);
- *vcpu_reg(vcpu, p->Rt1) = val;
- *vcpu_reg(vcpu, p->Rt2) = val >> 32;
- }
-
- return true;
-}
-
-/*
- * We could trap ID_DFR0 and tell the guest we don't support performance
- * monitoring. Unfortunately the patch to make the kernel check ID_DFR0 was
- * NAKed, so it will read the PMCR anyway.
- *
- * Therefore we tell the guest we have 0 counters. Unfortunately, we
- * must always support PMCCNTR (the cycle counter): we just RAZ/WI for
- * all PM registers, which doesn't crash the guest kernel at least.
- */
-static bool trap_raz_wi(struct kvm_vcpu *vcpu,
- const struct coproc_params *p,
- const struct coproc_reg *r)
-{
- if (p->is_write)
- return ignore_write(vcpu, p);
- else
- return read_zero(vcpu, p);
-}
-
-#define access_pmcr trap_raz_wi
-#define access_pmcntenset trap_raz_wi
-#define access_pmcntenclr trap_raz_wi
-#define access_pmovsr trap_raz_wi
-#define access_pmselr trap_raz_wi
-#define access_pmceid0 trap_raz_wi
-#define access_pmceid1 trap_raz_wi
-#define access_pmccntr trap_raz_wi
-#define access_pmxevtyper trap_raz_wi
-#define access_pmxevcntr trap_raz_wi
-#define access_pmuserenr trap_raz_wi
-#define access_pmintenset trap_raz_wi
-#define access_pmintenclr trap_raz_wi
-
-/* Architected CP15 registers.
- * CRn denotes the primary register number, but is copied to the CRm in the
- * user space API for 64-bit register access in line with the terminology used
- * in the ARM ARM.
- * Important: Must be sorted ascending by CRn, CRM, Op1, Op2 and with 64-bit
- * registers preceding 32-bit ones.
- */
-static const struct coproc_reg cp15_regs[] = {
- /* MPIDR: we use VMPIDR for guest access. */
- { CRn( 0), CRm( 0), Op1( 0), Op2( 5), is32,
- NULL, reset_mpidr, c0_MPIDR },
-
- /* CSSELR: swapped by interrupt.S. */
- { CRn( 0), CRm( 0), Op1( 2), Op2( 0), is32,
- NULL, reset_unknown, c0_CSSELR },
-
- /* ACTLR: trapped by HCR.TAC bit. */
- { CRn( 1), CRm( 0), Op1( 0), Op2( 1), is32,
- access_actlr, reset_actlr, c1_ACTLR },
-
- /* CPACR: swapped by interrupt.S. */
- { CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32,
- NULL, reset_val, c1_CPACR, 0x00000000 },
-
- /* TTBR0/TTBR1/TTBCR: swapped by interrupt.S. */
- { CRm64( 2), Op1( 0), is64, access_vm_reg, reset_unknown64, c2_TTBR0 },
- { CRn(2), CRm( 0), Op1( 0), Op2( 0), is32,
- access_vm_reg, reset_unknown, c2_TTBR0 },
- { CRn(2), CRm( 0), Op1( 0), Op2( 1), is32,
- access_vm_reg, reset_unknown, c2_TTBR1 },
- { CRn( 2), CRm( 0), Op1( 0), Op2( 2), is32,
- access_vm_reg, reset_val, c2_TTBCR, 0x00000000 },
- { CRm64( 2), Op1( 1), is64, access_vm_reg, reset_unknown64, c2_TTBR1 },
-
-
- /* DACR: swapped by interrupt.S. */
- { CRn( 3), CRm( 0), Op1( 0), Op2( 0), is32,
- access_vm_reg, reset_unknown, c3_DACR },
-
- /* DFSR/IFSR/ADFSR/AIFSR: swapped by interrupt.S. */
- { CRn( 5), CRm( 0), Op1( 0), Op2( 0), is32,
- access_vm_reg, reset_unknown, c5_DFSR },
- { CRn( 5), CRm( 0), Op1( 0), Op2( 1), is32,
- access_vm_reg, reset_unknown, c5_IFSR },
- { CRn( 5), CRm( 1), Op1( 0), Op2( 0), is32,
- access_vm_reg, reset_unknown, c5_ADFSR },
- { CRn( 5), CRm( 1), Op1( 0), Op2( 1), is32,
- access_vm_reg, reset_unknown, c5_AIFSR },
-
- /* DFAR/IFAR: swapped by interrupt.S. */
- { CRn( 6), CRm( 0), Op1( 0), Op2( 0), is32,
- access_vm_reg, reset_unknown, c6_DFAR },
- { CRn( 6), CRm( 0), Op1( 0), Op2( 2), is32,
- access_vm_reg, reset_unknown, c6_IFAR },
-
- /* PAR swapped by interrupt.S */
- { CRm64( 7), Op1( 0), is64, NULL, reset_unknown64, c7_PAR },
-
- /*
- * DC{C,I,CI}SW operations:
- */
- { CRn( 7), CRm( 6), Op1( 0), Op2( 2), is32, access_dcsw},
- { CRn( 7), CRm(10), Op1( 0), Op2( 2), is32, access_dcsw},
- { CRn( 7), CRm(14), Op1( 0), Op2( 2), is32, access_dcsw},
- /*
- * L2CTLR access (guest wants to know #CPUs).
- */
- { CRn( 9), CRm( 0), Op1( 1), Op2( 2), is32,
- access_l2ctlr, reset_l2ctlr, c9_L2CTLR },
- { CRn( 9), CRm( 0), Op1( 1), Op2( 3), is32, access_l2ectlr},
-
- /*
- * Dummy performance monitor implementation.
- */
- { CRn( 9), CRm(12), Op1( 0), Op2( 0), is32, access_pmcr},
- { CRn( 9), CRm(12), Op1( 0), Op2( 1), is32, access_pmcntenset},
- { CRn( 9), CRm(12), Op1( 0), Op2( 2), is32, access_pmcntenclr},
- { CRn( 9), CRm(12), Op1( 0), Op2( 3), is32, access_pmovsr},
- { CRn( 9), CRm(12), Op1( 0), Op2( 5), is32, access_pmselr},
- { CRn( 9), CRm(12), Op1( 0), Op2( 6), is32, access_pmceid0},
- { CRn( 9), CRm(12), Op1( 0), Op2( 7), is32, access_pmceid1},
- { CRn( 9), CRm(13), Op1( 0), Op2( 0), is32, access_pmccntr},
- { CRn( 9), CRm(13), Op1( 0), Op2( 1), is32, access_pmxevtyper},
- { CRn( 9), CRm(13), Op1( 0), Op2( 2), is32, access_pmxevcntr},
- { CRn( 9), CRm(14), Op1( 0), Op2( 0), is32, access_pmuserenr},
- { CRn( 9), CRm(14), Op1( 0), Op2( 1), is32, access_pmintenset},
- { CRn( 9), CRm(14), Op1( 0), Op2( 2), is32, access_pmintenclr},
-
- /* PRRR/NMRR (aka MAIR0/MAIR1): swapped by interrupt.S. */
- { CRn(10), CRm( 2), Op1( 0), Op2( 0), is32,
- access_vm_reg, reset_unknown, c10_PRRR},
- { CRn(10), CRm( 2), Op1( 0), Op2( 1), is32,
- access_vm_reg, reset_unknown, c10_NMRR},
-
- /* AMAIR0/AMAIR1: swapped by interrupt.S. */
- { CRn(10), CRm( 3), Op1( 0), Op2( 0), is32,
- access_vm_reg, reset_unknown, c10_AMAIR0},
- { CRn(10), CRm( 3), Op1( 0), Op2( 1), is32,
- access_vm_reg, reset_unknown, c10_AMAIR1},
-
- /* ICC_SGI1R */
- { CRm64(12), Op1( 0), is64, access_gic_sgi},
-
- /* VBAR: swapped by interrupt.S. */
- { CRn(12), CRm( 0), Op1( 0), Op2( 0), is32,
- NULL, reset_val, c12_VBAR, 0x00000000 },
-
- /* ICC_ASGI1R */
- { CRm64(12), Op1( 1), is64, access_gic_sgi},
- /* ICC_SGI0R */
- { CRm64(12), Op1( 2), is64, access_gic_sgi},
- /* ICC_SRE */
- { CRn(12), CRm(12), Op1( 0), Op2(5), is32, access_gic_sre },
-
- /* CONTEXTIDR/TPIDRURW/TPIDRURO/TPIDRPRW: swapped by interrupt.S. */
- { CRn(13), CRm( 0), Op1( 0), Op2( 1), is32,
- access_vm_reg, reset_val, c13_CID, 0x00000000 },
- { CRn(13), CRm( 0), Op1( 0), Op2( 2), is32,
- NULL, reset_unknown, c13_TID_URW },
- { CRn(13), CRm( 0), Op1( 0), Op2( 3), is32,
- NULL, reset_unknown, c13_TID_URO },
- { CRn(13), CRm( 0), Op1( 0), Op2( 4), is32,
- NULL, reset_unknown, c13_TID_PRIV },
-
- /* CNTP */
- { CRm64(14), Op1( 2), is64, access_cntp_cval},
-
- /* CNTKCTL: swapped by interrupt.S. */
- { CRn(14), CRm( 1), Op1( 0), Op2( 0), is32,
- NULL, reset_val, c14_CNTKCTL, 0x00000000 },
-
- /* CNTP */
- { CRn(14), CRm( 2), Op1( 0), Op2( 0), is32, access_cntp_tval },
- { CRn(14), CRm( 2), Op1( 0), Op2( 1), is32, access_cntp_ctl },
-
- /* The Configuration Base Address Register. */
- { CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar},
-};
-
-static int check_reg_table(const struct coproc_reg *table, unsigned int n)
-{
- unsigned int i;
-
- for (i = 1; i < n; i++) {
- if (cmp_reg(&table[i-1], &table[i]) >= 0) {
- kvm_err("reg table %p out of order (%d)\n", table, i - 1);
- return 1;
- }
- }
-
- return 0;
-}
-
-/* Target specific emulation tables */
-static struct kvm_coproc_target_table *target_tables[KVM_ARM_NUM_TARGETS];
-
-void kvm_register_target_coproc_table(struct kvm_coproc_target_table *table)
-{
- BUG_ON(check_reg_table(table->table, table->num));
- target_tables[table->target] = table;
-}
-
-/* Get specific register table for this target. */
-static const struct coproc_reg *get_target_table(unsigned target, size_t *num)
-{
- struct kvm_coproc_target_table *table;
-
- table = target_tables[target];
- *num = table->num;
- return table->table;
-}
-
-#define reg_to_match_value(x) \
- ({ \
- unsigned long val; \
- val = (x)->CRn << 11; \
- val |= (x)->CRm << 7; \
- val |= (x)->Op1 << 4; \
- val |= (x)->Op2 << 1; \
- val |= !(x)->is_64bit; \
- val; \
- })
-
-static int match_reg(const void *key, const void *elt)
-{
- const unsigned long pval = (unsigned long)key;
- const struct coproc_reg *r = elt;
-
- return pval - reg_to_match_value(r);
-}
-
-static const struct coproc_reg *find_reg(const struct coproc_params *params,
- const struct coproc_reg table[],
- unsigned int num)
-{
- unsigned long pval = reg_to_match_value(params);
-
- return bsearch((void *)pval, table, num, sizeof(table[0]), match_reg);
-}
-
-static int emulate_cp15(struct kvm_vcpu *vcpu,
- const struct coproc_params *params)
-{
- size_t num;
- const struct coproc_reg *table, *r;
-
- trace_kvm_emulate_cp15_imp(params->Op1, params->Rt1, params->CRn,
- params->CRm, params->Op2, params->is_write);
-
- table = get_target_table(vcpu->arch.target, &num);
-
- /* Search target-specific then generic table. */
- r = find_reg(params, table, num);
- if (!r)
- r = find_reg(params, cp15_regs, ARRAY_SIZE(cp15_regs));
-
- if (likely(r)) {
- /* If we don't have an accessor, we should never get here! */
- BUG_ON(!r->access);
-
- if (likely(r->access(vcpu, params, r))) {
- /* Skip instruction, since it was emulated */
- kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
- }
- } else {
- /* If access function fails, it should complain. */
- kvm_err("Unsupported guest CP15 access at: %08lx [%08lx]\n",
- *vcpu_pc(vcpu), *vcpu_cpsr(vcpu));
- print_cp_instr(params);
- kvm_inject_undefined(vcpu);
- }
-
- return 1;
-}
-
-static struct coproc_params decode_64bit_hsr(struct kvm_vcpu *vcpu)
-{
- struct coproc_params params;
-
- params.CRn = (kvm_vcpu_get_hsr(vcpu) >> 1) & 0xf;
- params.Rt1 = (kvm_vcpu_get_hsr(vcpu) >> 5) & 0xf;
- params.is_write = ((kvm_vcpu_get_hsr(vcpu) & 1) == 0);
- params.is_64bit = true;
-
- params.Op1 = (kvm_vcpu_get_hsr(vcpu) >> 16) & 0xf;
- params.Op2 = 0;
- params.Rt2 = (kvm_vcpu_get_hsr(vcpu) >> 10) & 0xf;
- params.CRm = 0;
-
- return params;
-}
-
-/**
- * kvm_handle_cp15_64 -- handles a mrrc/mcrr trap on a guest CP15 access
- * @vcpu: The VCPU pointer
- * @run: The kvm_run struct
- */
-int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
-{
- struct coproc_params params = decode_64bit_hsr(vcpu);
-
- return emulate_cp15(vcpu, &params);
-}
-
-/**
- * kvm_handle_cp14_64 -- handles a mrrc/mcrr trap on a guest CP14 access
- * @vcpu: The VCPU pointer
- * @run: The kvm_run struct
- */
-int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
-{
- struct coproc_params params = decode_64bit_hsr(vcpu);
-
- /* raz_wi cp14 */
- trap_raz_wi(vcpu, &params, NULL);
-
- /* handled */
- kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
- return 1;
-}
-
-static void reset_coproc_regs(struct kvm_vcpu *vcpu,
- const struct coproc_reg *table, size_t num,
- unsigned long *bmap)
-{
- unsigned long i;
-
- for (i = 0; i < num; i++)
- if (table[i].reset) {
- int reg = table[i].reg;
-
- table[i].reset(vcpu, &table[i]);
- if (reg > 0 && reg < NR_CP15_REGS) {
- set_bit(reg, bmap);
- if (table[i].is_64bit)
- set_bit(reg + 1, bmap);
- }
- }
-}
-
-static struct coproc_params decode_32bit_hsr(struct kvm_vcpu *vcpu)
-{
- struct coproc_params params;
-
- params.CRm = (kvm_vcpu_get_hsr(vcpu) >> 1) & 0xf;
- params.Rt1 = (kvm_vcpu_get_hsr(vcpu) >> 5) & 0xf;
- params.is_write = ((kvm_vcpu_get_hsr(vcpu) & 1) == 0);
- params.is_64bit = false;
-
- params.CRn = (kvm_vcpu_get_hsr(vcpu) >> 10) & 0xf;
- params.Op1 = (kvm_vcpu_get_hsr(vcpu) >> 14) & 0x7;
- params.Op2 = (kvm_vcpu_get_hsr(vcpu) >> 17) & 0x7;
- params.Rt2 = 0;
-
- return params;
-}
-
-/**
- * kvm_handle_cp15_32 -- handles a mrc/mcr trap on a guest CP15 access
- * @vcpu: The VCPU pointer
- * @run: The kvm_run struct
- */
-int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
-{
- struct coproc_params params = decode_32bit_hsr(vcpu);
- return emulate_cp15(vcpu, &params);
-}
-
-/**
- * kvm_handle_cp14_32 -- handles a mrc/mcr trap on a guest CP14 access
- * @vcpu: The VCPU pointer
- * @run: The kvm_run struct
- */
-int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
-{
- struct coproc_params params = decode_32bit_hsr(vcpu);
-
- /* raz_wi cp14 */
- trap_raz_wi(vcpu, &params, NULL);
-
- /* handled */
- kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
- return 1;
-}
-
-/******************************************************************************
- * Userspace API
- *****************************************************************************/
-
-static bool index_to_params(u64 id, struct coproc_params *params)
-{
- switch (id & KVM_REG_SIZE_MASK) {
- case KVM_REG_SIZE_U32:
- /* Any unused index bits means it's not valid. */
- if (id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK
- | KVM_REG_ARM_COPROC_MASK
- | KVM_REG_ARM_32_CRN_MASK
- | KVM_REG_ARM_CRM_MASK
- | KVM_REG_ARM_OPC1_MASK
- | KVM_REG_ARM_32_OPC2_MASK))
- return false;
-
- params->is_64bit = false;
- params->CRn = ((id & KVM_REG_ARM_32_CRN_MASK)
- >> KVM_REG_ARM_32_CRN_SHIFT);
- params->CRm = ((id & KVM_REG_ARM_CRM_MASK)
- >> KVM_REG_ARM_CRM_SHIFT);
- params->Op1 = ((id & KVM_REG_ARM_OPC1_MASK)
- >> KVM_REG_ARM_OPC1_SHIFT);
- params->Op2 = ((id & KVM_REG_ARM_32_OPC2_MASK)
- >> KVM_REG_ARM_32_OPC2_SHIFT);
- return true;
- case KVM_REG_SIZE_U64:
- /* Any unused index bits means it's not valid. */
- if (id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK
- | KVM_REG_ARM_COPROC_MASK
- | KVM_REG_ARM_CRM_MASK
- | KVM_REG_ARM_OPC1_MASK))
- return false;
- params->is_64bit = true;
- /* CRm to CRn: see cp15_to_index for details */
- params->CRn = ((id & KVM_REG_ARM_CRM_MASK)
- >> KVM_REG_ARM_CRM_SHIFT);
- params->Op1 = ((id & KVM_REG_ARM_OPC1_MASK)
- >> KVM_REG_ARM_OPC1_SHIFT);
- params->Op2 = 0;
- params->CRm = 0;
- return true;
- default:
- return false;
- }
-}
-
-/* Decode an index value, and find the cp15 coproc_reg entry. */
-static const struct coproc_reg *index_to_coproc_reg(struct kvm_vcpu *vcpu,
- u64 id)
-{
- size_t num;
- const struct coproc_reg *table, *r;
- struct coproc_params params;
-
- /* We only do cp15 for now. */
- if ((id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT != 15)
- return NULL;
-
- if (!index_to_params(id, &params))
- return NULL;
-
- table = get_target_table(vcpu->arch.target, &num);
- r = find_reg(&params, table, num);
- if (!r)
- r = find_reg(&params, cp15_regs, ARRAY_SIZE(cp15_regs));
-
- /* Not saved in the cp15 array? */
- if (r && !r->reg)
- r = NULL;
-
- return r;
-}
-
-/*
- * These are the invariant cp15 registers: we let the guest see the host
- * versions of these, so they're part of the guest state.
- *
- * A future CPU may provide a mechanism to present different values to
- * the guest, or a future kvm may trap them.
- */
-/* Unfortunately, there's no register-argument for mrc, so generate. */
-#define FUNCTION_FOR32(crn, crm, op1, op2, name) \
- static void get_##name(struct kvm_vcpu *v, \
- const struct coproc_reg *r) \
- { \
- u32 val; \
- \
- asm volatile("mrc p15, " __stringify(op1) \
- ", %0, c" __stringify(crn) \
- ", c" __stringify(crm) \
- ", " __stringify(op2) "\n" : "=r" (val)); \
- ((struct coproc_reg *)r)->val = val; \
- }
-
-FUNCTION_FOR32(0, 0, 0, 0, MIDR)
-FUNCTION_FOR32(0, 0, 0, 1, CTR)
-FUNCTION_FOR32(0, 0, 0, 2, TCMTR)
-FUNCTION_FOR32(0, 0, 0, 3, TLBTR)
-FUNCTION_FOR32(0, 0, 0, 6, REVIDR)
-FUNCTION_FOR32(0, 1, 0, 0, ID_PFR0)
-FUNCTION_FOR32(0, 1, 0, 1, ID_PFR1)
-FUNCTION_FOR32(0, 1, 0, 2, ID_DFR0)
-FUNCTION_FOR32(0, 1, 0, 3, ID_AFR0)
-FUNCTION_FOR32(0, 1, 0, 4, ID_MMFR0)
-FUNCTION_FOR32(0, 1, 0, 5, ID_MMFR1)
-FUNCTION_FOR32(0, 1, 0, 6, ID_MMFR2)
-FUNCTION_FOR32(0, 1, 0, 7, ID_MMFR3)
-FUNCTION_FOR32(0, 2, 0, 0, ID_ISAR0)
-FUNCTION_FOR32(0, 2, 0, 1, ID_ISAR1)
-FUNCTION_FOR32(0, 2, 0, 2, ID_ISAR2)
-FUNCTION_FOR32(0, 2, 0, 3, ID_ISAR3)
-FUNCTION_FOR32(0, 2, 0, 4, ID_ISAR4)
-FUNCTION_FOR32(0, 2, 0, 5, ID_ISAR5)
-FUNCTION_FOR32(0, 0, 1, 1, CLIDR)
-FUNCTION_FOR32(0, 0, 1, 7, AIDR)
-
-/* ->val is filled in by kvm_invariant_coproc_table_init() */
-static struct coproc_reg invariant_cp15[] = {
- { CRn( 0), CRm( 0), Op1( 0), Op2( 0), is32, NULL, get_MIDR },
- { CRn( 0), CRm( 0), Op1( 0), Op2( 1), is32, NULL, get_CTR },
- { CRn( 0), CRm( 0), Op1( 0), Op2( 2), is32, NULL, get_TCMTR },
- { CRn( 0), CRm( 0), Op1( 0), Op2( 3), is32, NULL, get_TLBTR },
- { CRn( 0), CRm( 0), Op1( 0), Op2( 6), is32, NULL, get_REVIDR },
-
- { CRn( 0), CRm( 0), Op1( 1), Op2( 1), is32, NULL, get_CLIDR },
- { CRn( 0), CRm( 0), Op1( 1), Op2( 7), is32, NULL, get_AIDR },
-
- { CRn( 0), CRm( 1), Op1( 0), Op2( 0), is32, NULL, get_ID_PFR0 },
- { CRn( 0), CRm( 1), Op1( 0), Op2( 1), is32, NULL, get_ID_PFR1 },
- { CRn( 0), CRm( 1), Op1( 0), Op2( 2), is32, NULL, get_ID_DFR0 },
- { CRn( 0), CRm( 1), Op1( 0), Op2( 3), is32, NULL, get_ID_AFR0 },
- { CRn( 0), CRm( 1), Op1( 0), Op2( 4), is32, NULL, get_ID_MMFR0 },
- { CRn( 0), CRm( 1), Op1( 0), Op2( 5), is32, NULL, get_ID_MMFR1 },
- { CRn( 0), CRm( 1), Op1( 0), Op2( 6), is32, NULL, get_ID_MMFR2 },
- { CRn( 0), CRm( 1), Op1( 0), Op2( 7), is32, NULL, get_ID_MMFR3 },
-
- { CRn( 0), CRm( 2), Op1( 0), Op2( 0), is32, NULL, get_ID_ISAR0 },
- { CRn( 0), CRm( 2), Op1( 0), Op2( 1), is32, NULL, get_ID_ISAR1 },
- { CRn( 0), CRm( 2), Op1( 0), Op2( 2), is32, NULL, get_ID_ISAR2 },
- { CRn( 0), CRm( 2), Op1( 0), Op2( 3), is32, NULL, get_ID_ISAR3 },
- { CRn( 0), CRm( 2), Op1( 0), Op2( 4), is32, NULL, get_ID_ISAR4 },
- { CRn( 0), CRm( 2), Op1( 0), Op2( 5), is32, NULL, get_ID_ISAR5 },
-};
-
-/*
- * Reads a register value from a userspace address to a kernel
- * variable. Make sure that register size matches sizeof(*__val).
- */
-static int reg_from_user(void *val, const void __user *uaddr, u64 id)
-{
- if (copy_from_user(val, uaddr, KVM_REG_SIZE(id)) != 0)
- return -EFAULT;
- return 0;
-}
-
-/*
- * Writes a register value to a userspace address from a kernel variable.
- * Make sure that register size matches sizeof(*__val).
- */
-static int reg_to_user(void __user *uaddr, const void *val, u64 id)
-{
- if (copy_to_user(uaddr, val, KVM_REG_SIZE(id)) != 0)
- return -EFAULT;
- return 0;
-}
-
-static int get_invariant_cp15(u64 id, void __user *uaddr)
-{
- struct coproc_params params;
- const struct coproc_reg *r;
- int ret;
-
- if (!index_to_params(id, &params))
- return -ENOENT;
-
- r = find_reg(&params, invariant_cp15, ARRAY_SIZE(invariant_cp15));
- if (!r)
- return -ENOENT;
-
- ret = -ENOENT;
- if (KVM_REG_SIZE(id) == 4) {
- u32 val = r->val;
-
- ret = reg_to_user(uaddr, &val, id);
- } else if (KVM_REG_SIZE(id) == 8) {
- ret = reg_to_user(uaddr, &r->val, id);
- }
- return ret;
-}
-
-static int set_invariant_cp15(u64 id, void __user *uaddr)
-{
- struct coproc_params params;
- const struct coproc_reg *r;
- int err;
- u64 val;
-
- if (!index_to_params(id, &params))
- return -ENOENT;
- r = find_reg(&params, invariant_cp15, ARRAY_SIZE(invariant_cp15));
- if (!r)
- return -ENOENT;
-
- err = -ENOENT;
- if (KVM_REG_SIZE(id) == 4) {
- u32 val32;
-
- err = reg_from_user(&val32, uaddr, id);
- if (!err)
- val = val32;
- } else if (KVM_REG_SIZE(id) == 8) {
- err = reg_from_user(&val, uaddr, id);
- }
- if (err)
- return err;
-
- /* This is what we mean by invariant: you can't change it. */
- if (r->val != val)
- return -EINVAL;
-
- return 0;
-}
-
-static bool is_valid_cache(u32 val)
-{
- u32 level, ctype;
-
- if (val >= CSSELR_MAX)
- return false;
-
- /* Bottom bit is Instruction or Data bit. Next 3 bits are level. */
- level = (val >> 1);
- ctype = (cache_levels >> (level * 3)) & 7;
-
- switch (ctype) {
- case 0: /* No cache */
- return false;
- case 1: /* Instruction cache only */
- return (val & 1);
- case 2: /* Data cache only */
- case 4: /* Unified cache */
- return !(val & 1);
- case 3: /* Separate instruction and data caches */
- return true;
- default: /* Reserved: we can't know instruction or data. */
- return false;
- }
-}
-
-/* Which cache CCSIDR represents depends on CSSELR value. */
-static u32 get_ccsidr(u32 csselr)
-{
- u32 ccsidr;
-
- /* Make sure noone else changes CSSELR during this! */
- local_irq_disable();
- /* Put value into CSSELR */
- asm volatile("mcr p15, 2, %0, c0, c0, 0" : : "r" (csselr));
- isb();
- /* Read result out of CCSIDR */
- asm volatile("mrc p15, 1, %0, c0, c0, 0" : "=r" (ccsidr));
- local_irq_enable();
-
- return ccsidr;
-}
-
-static int demux_c15_get(u64 id, void __user *uaddr)
-{
- u32 val;
- u32 __user *uval = uaddr;
-
- /* Fail if we have unknown bits set. */
- if (id & ~(KVM_REG_ARCH_MASK|KVM_REG_SIZE_MASK|KVM_REG_ARM_COPROC_MASK
- | ((1 << KVM_REG_ARM_COPROC_SHIFT)-1)))
- return -ENOENT;
-
- switch (id & KVM_REG_ARM_DEMUX_ID_MASK) {
- case KVM_REG_ARM_DEMUX_ID_CCSIDR:
- if (KVM_REG_SIZE(id) != 4)
- return -ENOENT;
- val = (id & KVM_REG_ARM_DEMUX_VAL_MASK)
- >> KVM_REG_ARM_DEMUX_VAL_SHIFT;
- if (!is_valid_cache(val))
- return -ENOENT;
-
- return put_user(get_ccsidr(val), uval);
- default:
- return -ENOENT;
- }
-}
-
-static int demux_c15_set(u64 id, void __user *uaddr)
-{
- u32 val, newval;
- u32 __user *uval = uaddr;
-
- /* Fail if we have unknown bits set. */
- if (id & ~(KVM_REG_ARCH_MASK|KVM_REG_SIZE_MASK|KVM_REG_ARM_COPROC_MASK
- | ((1 << KVM_REG_ARM_COPROC_SHIFT)-1)))
- return -ENOENT;
-
- switch (id & KVM_REG_ARM_DEMUX_ID_MASK) {
- case KVM_REG_ARM_DEMUX_ID_CCSIDR:
- if (KVM_REG_SIZE(id) != 4)
- return -ENOENT;
- val = (id & KVM_REG_ARM_DEMUX_VAL_MASK)
- >> KVM_REG_ARM_DEMUX_VAL_SHIFT;
- if (!is_valid_cache(val))
- return -ENOENT;
-
- if (get_user(newval, uval))
- return -EFAULT;
-
- /* This is also invariant: you can't change it. */
- if (newval != get_ccsidr(val))
- return -EINVAL;
- return 0;
- default:
- return -ENOENT;
- }
-}
-
-#ifdef CONFIG_VFPv3
-static const int vfp_sysregs[] = { KVM_REG_ARM_VFP_FPEXC,
- KVM_REG_ARM_VFP_FPSCR,
- KVM_REG_ARM_VFP_FPINST,
- KVM_REG_ARM_VFP_FPINST2,
- KVM_REG_ARM_VFP_MVFR0,
- KVM_REG_ARM_VFP_MVFR1,
- KVM_REG_ARM_VFP_FPSID };
-
-static unsigned int num_fp_regs(void)
-{
- if (((fmrx(MVFR0) & MVFR0_A_SIMD_MASK) >> MVFR0_A_SIMD_BIT) == 2)
- return 32;
- else
- return 16;
-}
-
-static unsigned int num_vfp_regs(void)
-{
- /* Normal FP regs + control regs. */
- return num_fp_regs() + ARRAY_SIZE(vfp_sysregs);
-}
-
-static int copy_vfp_regids(u64 __user *uindices)
-{
- unsigned int i;
- const u64 u32reg = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_VFP;
- const u64 u64reg = KVM_REG_ARM | KVM_REG_SIZE_U64 | KVM_REG_ARM_VFP;
-
- for (i = 0; i < num_fp_regs(); i++) {
- if (put_user((u64reg | KVM_REG_ARM_VFP_BASE_REG) + i,
- uindices))
- return -EFAULT;
- uindices++;
- }
-
- for (i = 0; i < ARRAY_SIZE(vfp_sysregs); i++) {
- if (put_user(u32reg | vfp_sysregs[i], uindices))
- return -EFAULT;
- uindices++;
- }
-
- return num_vfp_regs();
-}
-
-static int vfp_get_reg(const struct kvm_vcpu *vcpu, u64 id, void __user *uaddr)
-{
- u32 vfpid = (id & KVM_REG_ARM_VFP_MASK);
- u32 val;
-
- /* Fail if we have unknown bits set. */
- if (id & ~(KVM_REG_ARCH_MASK|KVM_REG_SIZE_MASK|KVM_REG_ARM_COPROC_MASK
- | ((1 << KVM_REG_ARM_COPROC_SHIFT)-1)))
- return -ENOENT;
-
- if (vfpid < num_fp_regs()) {
- if (KVM_REG_SIZE(id) != 8)
- return -ENOENT;
- return reg_to_user(uaddr, &vcpu->arch.ctxt.vfp.fpregs[vfpid],
- id);
- }
-
- /* FP control registers are all 32 bit. */
- if (KVM_REG_SIZE(id) != 4)
- return -ENOENT;
-
- switch (vfpid) {
- case KVM_REG_ARM_VFP_FPEXC:
- return reg_to_user(uaddr, &vcpu->arch.ctxt.vfp.fpexc, id);
- case KVM_REG_ARM_VFP_FPSCR:
- return reg_to_user(uaddr, &vcpu->arch.ctxt.vfp.fpscr, id);
- case KVM_REG_ARM_VFP_FPINST:
- return reg_to_user(uaddr, &vcpu->arch.ctxt.vfp.fpinst, id);
- case KVM_REG_ARM_VFP_FPINST2:
- return reg_to_user(uaddr, &vcpu->arch.ctxt.vfp.fpinst2, id);
- case KVM_REG_ARM_VFP_MVFR0:
- val = fmrx(MVFR0);
- return reg_to_user(uaddr, &val, id);
- case KVM_REG_ARM_VFP_MVFR1:
- val = fmrx(MVFR1);
- return reg_to_user(uaddr, &val, id);
- case KVM_REG_ARM_VFP_FPSID:
- val = fmrx(FPSID);
- return reg_to_user(uaddr, &val, id);
- default:
- return -ENOENT;
- }
-}
-
-static int vfp_set_reg(struct kvm_vcpu *vcpu, u64 id, const void __user *uaddr)
-{
- u32 vfpid = (id & KVM_REG_ARM_VFP_MASK);
- u32 val;
-
- /* Fail if we have unknown bits set. */
- if (id & ~(KVM_REG_ARCH_MASK|KVM_REG_SIZE_MASK|KVM_REG_ARM_COPROC_MASK
- | ((1 << KVM_REG_ARM_COPROC_SHIFT)-1)))
- return -ENOENT;
-
- if (vfpid < num_fp_regs()) {
- if (KVM_REG_SIZE(id) != 8)
- return -ENOENT;
- return reg_from_user(&vcpu->arch.ctxt.vfp.fpregs[vfpid],
- uaddr, id);
- }
-
- /* FP control registers are all 32 bit. */
- if (KVM_REG_SIZE(id) != 4)
- return -ENOENT;
-
- switch (vfpid) {
- case KVM_REG_ARM_VFP_FPEXC:
- return reg_from_user(&vcpu->arch.ctxt.vfp.fpexc, uaddr, id);
- case KVM_REG_ARM_VFP_FPSCR:
- return reg_from_user(&vcpu->arch.ctxt.vfp.fpscr, uaddr, id);
- case KVM_REG_ARM_VFP_FPINST:
- return reg_from_user(&vcpu->arch.ctxt.vfp.fpinst, uaddr, id);
- case KVM_REG_ARM_VFP_FPINST2:
- return reg_from_user(&vcpu->arch.ctxt.vfp.fpinst2, uaddr, id);
- /* These are invariant. */
- case KVM_REG_ARM_VFP_MVFR0:
- if (reg_from_user(&val, uaddr, id))
- return -EFAULT;
- if (val != fmrx(MVFR0))
- return -EINVAL;
- return 0;
- case KVM_REG_ARM_VFP_MVFR1:
- if (reg_from_user(&val, uaddr, id))
- return -EFAULT;
- if (val != fmrx(MVFR1))
- return -EINVAL;
- return 0;
- case KVM_REG_ARM_VFP_FPSID:
- if (reg_from_user(&val, uaddr, id))
- return -EFAULT;
- if (val != fmrx(FPSID))
- return -EINVAL;
- return 0;
- default:
- return -ENOENT;
- }
-}
-#else /* !CONFIG_VFPv3 */
-static unsigned int num_vfp_regs(void)
-{
- return 0;
-}
-
-static int copy_vfp_regids(u64 __user *uindices)
-{
- return 0;
-}
-
-static int vfp_get_reg(const struct kvm_vcpu *vcpu, u64 id, void __user *uaddr)
-{
- return -ENOENT;
-}
-
-static int vfp_set_reg(struct kvm_vcpu *vcpu, u64 id, const void __user *uaddr)
-{
- return -ENOENT;
-}
-#endif /* !CONFIG_VFPv3 */
-
-int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
-{
- const struct coproc_reg *r;
- void __user *uaddr = (void __user *)(long)reg->addr;
- int ret;
-
- if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
- return demux_c15_get(reg->id, uaddr);
-
- if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_VFP)
- return vfp_get_reg(vcpu, reg->id, uaddr);
-
- r = index_to_coproc_reg(vcpu, reg->id);
- if (!r)
- return get_invariant_cp15(reg->id, uaddr);
-
- ret = -ENOENT;
- if (KVM_REG_SIZE(reg->id) == 8) {
- u64 val;
-
- val = vcpu_cp15_reg64_get(vcpu, r);
- ret = reg_to_user(uaddr, &val, reg->id);
- } else if (KVM_REG_SIZE(reg->id) == 4) {
- ret = reg_to_user(uaddr, &vcpu_cp15(vcpu, r->reg), reg->id);
- }
-
- return ret;
-}
-
-int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
-{
- const struct coproc_reg *r;
- void __user *uaddr = (void __user *)(long)reg->addr;
- int ret;
-
- if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
- return demux_c15_set(reg->id, uaddr);
-
- if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_VFP)
- return vfp_set_reg(vcpu, reg->id, uaddr);
-
- r = index_to_coproc_reg(vcpu, reg->id);
- if (!r)
- return set_invariant_cp15(reg->id, uaddr);
-
- ret = -ENOENT;
- if (KVM_REG_SIZE(reg->id) == 8) {
- u64 val;
-
- ret = reg_from_user(&val, uaddr, reg->id);
- if (!ret)
- vcpu_cp15_reg64_set(vcpu, r, val);
- } else if (KVM_REG_SIZE(reg->id) == 4) {
- ret = reg_from_user(&vcpu_cp15(vcpu, r->reg), uaddr, reg->id);
- }
-
- return ret;
-}
-
-static unsigned int num_demux_regs(void)
-{
- unsigned int i, count = 0;
-
- for (i = 0; i < CSSELR_MAX; i++)
- if (is_valid_cache(i))
- count++;
-
- return count;
-}
-
-static int write_demux_regids(u64 __user *uindices)
-{
- u64 val = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX;
- unsigned int i;
-
- val |= KVM_REG_ARM_DEMUX_ID_CCSIDR;
- for (i = 0; i < CSSELR_MAX; i++) {
- if (!is_valid_cache(i))
- continue;
- if (put_user(val | i, uindices))
- return -EFAULT;
- uindices++;
- }
- return 0;
-}
-
-static u64 cp15_to_index(const struct coproc_reg *reg)
-{
- u64 val = KVM_REG_ARM | (15 << KVM_REG_ARM_COPROC_SHIFT);
- if (reg->is_64bit) {
- val |= KVM_REG_SIZE_U64;
- val |= (reg->Op1 << KVM_REG_ARM_OPC1_SHIFT);
- /*
- * CRn always denotes the primary coproc. reg. nr. for the
- * in-kernel representation, but the user space API uses the
- * CRm for the encoding, because it is modelled after the
- * MRRC/MCRR instructions: see the ARM ARM rev. c page
- * B3-1445
- */
- val |= (reg->CRn << KVM_REG_ARM_CRM_SHIFT);
- } else {
- val |= KVM_REG_SIZE_U32;
- val |= (reg->Op1 << KVM_REG_ARM_OPC1_SHIFT);
- val |= (reg->Op2 << KVM_REG_ARM_32_OPC2_SHIFT);
- val |= (reg->CRm << KVM_REG_ARM_CRM_SHIFT);
- val |= (reg->CRn << KVM_REG_ARM_32_CRN_SHIFT);
- }
- return val;
-}
-
-static bool copy_reg_to_user(const struct coproc_reg *reg, u64 __user **uind)
-{
- if (!*uind)
- return true;
-
- if (put_user(cp15_to_index(reg), *uind))
- return false;
-
- (*uind)++;
- return true;
-}
-
-/* Assumed ordered tables, see kvm_coproc_table_init. */
-static int walk_cp15(struct kvm_vcpu *vcpu, u64 __user *uind)
-{
- const struct coproc_reg *i1, *i2, *end1, *end2;
- unsigned int total = 0;
- size_t num;
-
- /* We check for duplicates here, to allow arch-specific overrides. */
- i1 = get_target_table(vcpu->arch.target, &num);
- end1 = i1 + num;
- i2 = cp15_regs;
- end2 = cp15_regs + ARRAY_SIZE(cp15_regs);
-
- BUG_ON(i1 == end1 || i2 == end2);
-
- /* Walk carefully, as both tables may refer to the same register. */
- while (i1 || i2) {
- int cmp = cmp_reg(i1, i2);
- /* target-specific overrides generic entry. */
- if (cmp <= 0) {
- /* Ignore registers we trap but don't save. */
- if (i1->reg) {
- if (!copy_reg_to_user(i1, &uind))
- return -EFAULT;
- total++;
- }
- } else {
- /* Ignore registers we trap but don't save. */
- if (i2->reg) {
- if (!copy_reg_to_user(i2, &uind))
- return -EFAULT;
- total++;
- }
- }
-
- if (cmp <= 0 && ++i1 == end1)
- i1 = NULL;
- if (cmp >= 0 && ++i2 == end2)
- i2 = NULL;
- }
- return total;
-}
-
-unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu)
-{
- return ARRAY_SIZE(invariant_cp15)
- + num_demux_regs()
- + num_vfp_regs()
- + walk_cp15(vcpu, (u64 __user *)NULL);
-}
-
-int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
-{
- unsigned int i;
- int err;
-
- /* Then give them all the invariant registers' indices. */
- for (i = 0; i < ARRAY_SIZE(invariant_cp15); i++) {
- if (put_user(cp15_to_index(&invariant_cp15[i]), uindices))
- return -EFAULT;
- uindices++;
- }
-
- err = walk_cp15(vcpu, uindices);
- if (err < 0)
- return err;
- uindices += err;
-
- err = copy_vfp_regids(uindices);
- if (err < 0)
- return err;
- uindices += err;
-
- return write_demux_regids(uindices);
-}
-
-void kvm_coproc_table_init(void)
-{
- unsigned int i;
-
- /* Make sure tables are unique and in order. */
- BUG_ON(check_reg_table(cp15_regs, ARRAY_SIZE(cp15_regs)));
- BUG_ON(check_reg_table(invariant_cp15, ARRAY_SIZE(invariant_cp15)));
-
- /* We abuse the reset function to overwrite the table itself. */
- for (i = 0; i < ARRAY_SIZE(invariant_cp15); i++)
- invariant_cp15[i].reset(NULL, &invariant_cp15[i]);
-
- /*
- * CLIDR format is awkward, so clean it up. See ARM B4.1.20:
- *
- * If software reads the Cache Type fields from Ctype1
- * upwards, once it has seen a value of 0b000, no caches
- * exist at further-out levels of the hierarchy. So, for
- * example, if Ctype3 is the first Cache Type field with a
- * value of 0b000, the values of Ctype4 to Ctype7 must be
- * ignored.
- */
- asm volatile("mrc p15, 1, %0, c0, c0, 1" : "=r" (cache_levels));
- for (i = 0; i < 7; i++)
- if (((cache_levels >> (i*3)) & 7) == 0)
- break;
- /* Clear all higher bits. */
- cache_levels &= (1 << (i*3))-1;
-}
-
-/**
- * kvm_reset_coprocs - sets cp15 registers to reset value
- * @vcpu: The VCPU pointer
- *
- * This function finds the right table above and sets the registers on the
- * virtual CPU struct to their architecturally defined reset values.
- */
-void kvm_reset_coprocs(struct kvm_vcpu *vcpu)
-{
- size_t num;
- const struct coproc_reg *table;
- DECLARE_BITMAP(bmap, NR_CP15_REGS) = { 0, };
-
- /* Generic chip reset first (so target could override). */
- reset_coproc_regs(vcpu, cp15_regs, ARRAY_SIZE(cp15_regs), bmap);
-
- table = get_target_table(vcpu->arch.target, &num);
- reset_coproc_regs(vcpu, table, num, bmap);
-
- for (num = 1; num < NR_CP15_REGS; num++)
- WARN(!test_bit(num, bmap),
- "Didn't reset vcpu_cp15(vcpu, %zi)", num);
-}
diff --git a/arch/arm/kvm/coproc.h b/arch/arm/kvm/coproc.h
deleted file mode 100644
index 637065b13012..000000000000
--- a/arch/arm/kvm/coproc.h
+++ /dev/null
@@ -1,130 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2012 - Virtual Open Systems and Columbia University
- * Authors: Christoffer Dall <c.dall@virtualopensystems.com>
- */
-
-#ifndef __ARM_KVM_COPROC_LOCAL_H__
-#define __ARM_KVM_COPROC_LOCAL_H__
-
-struct coproc_params {
- unsigned long CRn;
- unsigned long CRm;
- unsigned long Op1;
- unsigned long Op2;
- unsigned long Rt1;
- unsigned long Rt2;
- bool is_64bit;
- bool is_write;
-};
-
-struct coproc_reg {
- /* MRC/MCR/MRRC/MCRR instruction which accesses it. */
- unsigned long CRn;
- unsigned long CRm;
- unsigned long Op1;
- unsigned long Op2;
-
- bool is_64bit;
-
- /* Trapped access from guest, if non-NULL. */
- bool (*access)(struct kvm_vcpu *,
- const struct coproc_params *,
- const struct coproc_reg *);
-
- /* Initialization for vcpu. */
- void (*reset)(struct kvm_vcpu *, const struct coproc_reg *);
-
- /* Index into vcpu_cp15(vcpu, ...), or 0 if we don't need to save it. */
- unsigned long reg;
-
- /* Value (usually reset value) */
- u64 val;
-};
-
-static inline void print_cp_instr(const struct coproc_params *p)
-{
- /* Look, we even formatted it for you to paste into the table! */
- if (p->is_64bit) {
- kvm_pr_unimpl(" { CRm64(%2lu), Op1(%2lu), is64, func_%s },\n",
- p->CRn, p->Op1, p->is_write ? "write" : "read");
- } else {
- kvm_pr_unimpl(" { CRn(%2lu), CRm(%2lu), Op1(%2lu), Op2(%2lu), is32,"
- " func_%s },\n",
- p->CRn, p->CRm, p->Op1, p->Op2,
- p->is_write ? "write" : "read");
- }
-}
-
-static inline bool ignore_write(struct kvm_vcpu *vcpu,
- const struct coproc_params *p)
-{
- return true;
-}
-
-static inline bool read_zero(struct kvm_vcpu *vcpu,
- const struct coproc_params *p)
-{
- *vcpu_reg(vcpu, p->Rt1) = 0;
- return true;
-}
-
-/* Reset functions */
-static inline void reset_unknown(struct kvm_vcpu *vcpu,
- const struct coproc_reg *r)
-{
- BUG_ON(!r->reg);
- BUG_ON(r->reg >= ARRAY_SIZE(vcpu->arch.ctxt.cp15));
- vcpu_cp15(vcpu, r->reg) = 0xdecafbad;
-}
-
-static inline void reset_val(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
-{
- BUG_ON(!r->reg);
- BUG_ON(r->reg >= ARRAY_SIZE(vcpu->arch.ctxt.cp15));
- vcpu_cp15(vcpu, r->reg) = r->val;
-}
-
-static inline void reset_unknown64(struct kvm_vcpu *vcpu,
- const struct coproc_reg *r)
-{
- BUG_ON(!r->reg);
- BUG_ON(r->reg + 1 >= ARRAY_SIZE(vcpu->arch.ctxt.cp15));
-
- vcpu_cp15(vcpu, r->reg) = 0xdecafbad;
- vcpu_cp15(vcpu, r->reg+1) = 0xd0c0ffee;
-}
-
-static inline int cmp_reg(const struct coproc_reg *i1,
- const struct coproc_reg *i2)
-{
- BUG_ON(i1 == i2);
- if (!i1)
- return 1;
- else if (!i2)
- return -1;
- if (i1->CRn != i2->CRn)
- return i1->CRn - i2->CRn;
- if (i1->CRm != i2->CRm)
- return i1->CRm - i2->CRm;
- if (i1->Op1 != i2->Op1)
- return i1->Op1 - i2->Op1;
- if (i1->Op2 != i2->Op2)
- return i1->Op2 - i2->Op2;
- return i2->is_64bit - i1->is_64bit;
-}
-
-
-#define CRn(_x) .CRn = _x
-#define CRm(_x) .CRm = _x
-#define CRm64(_x) .CRn = _x, .CRm = 0
-#define Op1(_x) .Op1 = _x
-#define Op2(_x) .Op2 = _x
-#define is64 .is_64bit = true
-#define is32 .is_64bit = false
-
-bool access_vm_reg(struct kvm_vcpu *vcpu,
- const struct coproc_params *p,
- const struct coproc_reg *r);
-
-#endif /* __ARM_KVM_COPROC_LOCAL_H__ */
diff --git a/arch/arm/kvm/coproc_a15.c b/arch/arm/kvm/coproc_a15.c
deleted file mode 100644
index 36bf15421ae8..000000000000
--- a/arch/arm/kvm/coproc_a15.c
+++ /dev/null
@@ -1,39 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2012 - Virtual Open Systems and Columbia University
- * Authors: Rusty Russell <rusty@rustcorp.au>
- * Christoffer Dall <c.dall@virtualopensystems.com>
- */
-#include <linux/kvm_host.h>
-#include <asm/kvm_coproc.h>
-#include <asm/kvm_emulate.h>
-#include <linux/init.h>
-
-#include "coproc.h"
-
-/*
- * A15-specific CP15 registers.
- * CRn denotes the primary register number, but is copied to the CRm in the
- * user space API for 64-bit register access in line with the terminology used
- * in the ARM ARM.
- * Important: Must be sorted ascending by CRn, CRM, Op1, Op2 and with 64-bit
- * registers preceding 32-bit ones.
- */
-static const struct coproc_reg a15_regs[] = {
- /* SCTLR: swapped by interrupt.S. */
- { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32,
- access_vm_reg, reset_val, c1_SCTLR, 0x00C50078 },
-};
-
-static struct kvm_coproc_target_table a15_target_table = {
- .target = KVM_ARM_TARGET_CORTEX_A15,
- .table = a15_regs,
- .num = ARRAY_SIZE(a15_regs),
-};
-
-static int __init coproc_a15_init(void)
-{
- kvm_register_target_coproc_table(&a15_target_table);
- return 0;
-}
-late_initcall(coproc_a15_init);
diff --git a/arch/arm/kvm/coproc_a7.c b/arch/arm/kvm/coproc_a7.c
deleted file mode 100644
index 40f643e1e05c..000000000000
--- a/arch/arm/kvm/coproc_a7.c
+++ /dev/null
@@ -1,42 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2012 - Virtual Open Systems and Columbia University
- * Copyright (C) 2013 - ARM Ltd
- *
- * Authors: Rusty Russell <rusty@rustcorp.au>
- * Christoffer Dall <c.dall@virtualopensystems.com>
- * Jonathan Austin <jonathan.austin@arm.com>
- */
-#include <linux/kvm_host.h>
-#include <asm/kvm_coproc.h>
-#include <asm/kvm_emulate.h>
-#include <linux/init.h>
-
-#include "coproc.h"
-
-/*
- * Cortex-A7 specific CP15 registers.
- * CRn denotes the primary register number, but is copied to the CRm in the
- * user space API for 64-bit register access in line with the terminology used
- * in the ARM ARM.
- * Important: Must be sorted ascending by CRn, CRM, Op1, Op2 and with 64-bit
- * registers preceding 32-bit ones.
- */
-static const struct coproc_reg a7_regs[] = {
- /* SCTLR: swapped by interrupt.S. */
- { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32,
- access_vm_reg, reset_val, c1_SCTLR, 0x00C50878 },
-};
-
-static struct kvm_coproc_target_table a7_target_table = {
- .target = KVM_ARM_TARGET_CORTEX_A7,
- .table = a7_regs,
- .num = ARRAY_SIZE(a7_regs),
-};
-
-static int __init coproc_a7_init(void)
-{
- kvm_register_target_coproc_table(&a7_target_table);
- return 0;
-}
-late_initcall(coproc_a7_init);
diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c
deleted file mode 100644
index 29bb852140c5..000000000000
--- a/arch/arm/kvm/emulate.c
+++ /dev/null
@@ -1,166 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2012 - Virtual Open Systems and Columbia University
- * Author: Christoffer Dall <c.dall@virtualopensystems.com>
- */
-
-#include <linux/mm.h>
-#include <linux/kvm_host.h>
-#include <asm/kvm_arm.h>
-#include <asm/kvm_emulate.h>
-#include <asm/opcodes.h>
-#include <trace/events/kvm.h>
-
-#include "trace.h"
-
-#define VCPU_NR_MODES 6
-#define VCPU_REG_OFFSET_USR 0
-#define VCPU_REG_OFFSET_FIQ 1
-#define VCPU_REG_OFFSET_IRQ 2
-#define VCPU_REG_OFFSET_SVC 3
-#define VCPU_REG_OFFSET_ABT 4
-#define VCPU_REG_OFFSET_UND 5
-#define REG_OFFSET(_reg) \
- (offsetof(struct kvm_regs, _reg) / sizeof(u32))
-
-#define USR_REG_OFFSET(_num) REG_OFFSET(usr_regs.uregs[_num])
-
-static const unsigned long vcpu_reg_offsets[VCPU_NR_MODES][15] = {
- /* USR/SYS Registers */
- [VCPU_REG_OFFSET_USR] = {
- USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
- USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
- USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
- USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
- USR_REG_OFFSET(12), USR_REG_OFFSET(13), USR_REG_OFFSET(14),
- },
-
- /* FIQ Registers */
- [VCPU_REG_OFFSET_FIQ] = {
- USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
- USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
- USR_REG_OFFSET(6), USR_REG_OFFSET(7),
- REG_OFFSET(fiq_regs[0]), /* r8 */
- REG_OFFSET(fiq_regs[1]), /* r9 */
- REG_OFFSET(fiq_regs[2]), /* r10 */
- REG_OFFSET(fiq_regs[3]), /* r11 */
- REG_OFFSET(fiq_regs[4]), /* r12 */
- REG_OFFSET(fiq_regs[5]), /* r13 */
- REG_OFFSET(fiq_regs[6]), /* r14 */
- },
-
- /* IRQ Registers */
- [VCPU_REG_OFFSET_IRQ] = {
- USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
- USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
- USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
- USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
- USR_REG_OFFSET(12),
- REG_OFFSET(irq_regs[0]), /* r13 */
- REG_OFFSET(irq_regs[1]), /* r14 */
- },
-
- /* SVC Registers */
- [VCPU_REG_OFFSET_SVC] = {
- USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
- USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
- USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
- USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
- USR_REG_OFFSET(12),
- REG_OFFSET(svc_regs[0]), /* r13 */
- REG_OFFSET(svc_regs[1]), /* r14 */
- },
-
- /* ABT Registers */
- [VCPU_REG_OFFSET_ABT] = {
- USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
- USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
- USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
- USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
- USR_REG_OFFSET(12),
- REG_OFFSET(abt_regs[0]), /* r13 */
- REG_OFFSET(abt_regs[1]), /* r14 */
- },
-
- /* UND Registers */
- [VCPU_REG_OFFSET_UND] = {
- USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
- USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
- USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
- USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
- USR_REG_OFFSET(12),
- REG_OFFSET(und_regs[0]), /* r13 */
- REG_OFFSET(und_regs[1]), /* r14 */
- },
-};
-
-/*
- * Return a pointer to the register number valid in the current mode of
- * the virtual CPU.
- */
-unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num)
-{
- unsigned long *reg_array = (unsigned long *)&vcpu->arch.ctxt.gp_regs;
- unsigned long mode = *vcpu_cpsr(vcpu) & MODE_MASK;
-
- switch (mode) {
- case USR_MODE...SVC_MODE:
- mode &= ~MODE32_BIT; /* 0 ... 3 */
- break;
-
- case ABT_MODE:
- mode = VCPU_REG_OFFSET_ABT;
- break;
-
- case UND_MODE:
- mode = VCPU_REG_OFFSET_UND;
- break;
-
- case SYSTEM_MODE:
- mode = VCPU_REG_OFFSET_USR;
- break;
-
- default:
- BUG();
- }
-
- return reg_array + vcpu_reg_offsets[mode][reg_num];
-}
-
-/*
- * Return the SPSR for the current mode of the virtual CPU.
- */
-unsigned long *__vcpu_spsr(struct kvm_vcpu *vcpu)
-{
- unsigned long mode = *vcpu_cpsr(vcpu) & MODE_MASK;
- switch (mode) {
- case SVC_MODE:
- return &vcpu->arch.ctxt.gp_regs.KVM_ARM_SVC_spsr;
- case ABT_MODE:
- return &vcpu->arch.ctxt.gp_regs.KVM_ARM_ABT_spsr;
- case UND_MODE:
- return &vcpu->arch.ctxt.gp_regs.KVM_ARM_UND_spsr;
- case IRQ_MODE:
- return &vcpu->arch.ctxt.gp_regs.KVM_ARM_IRQ_spsr;
- case FIQ_MODE:
- return &vcpu->arch.ctxt.gp_regs.KVM_ARM_FIQ_spsr;
- default:
- BUG();
- }
-}
-
-/******************************************************************************
- * Inject exceptions into the guest
- */
-
-/**
- * kvm_inject_vabt - inject an async abort / SError into the guest
- * @vcpu: The VCPU to receive the exception
- *
- * It is assumed that this code is called from the VCPU thread and that the
- * VCPU therefore is not currently executing guest code.
- */
-void kvm_inject_vabt(struct kvm_vcpu *vcpu)
-{
- *vcpu_hcr(vcpu) |= HCR_VA;
-}
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
deleted file mode 100644
index 9f7ae0d8690f..000000000000
--- a/arch/arm/kvm/guest.c
+++ /dev/null
@@ -1,387 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2012 - Virtual Open Systems and Columbia University
- * Author: Christoffer Dall <c.dall@virtualopensystems.com>
- */
-
-#include <linux/errno.h>
-#include <linux/err.h>
-#include <linux/kvm_host.h>
-#include <linux/module.h>
-#include <linux/vmalloc.h>
-#include <linux/fs.h>
-#include <kvm/arm_psci.h>
-#include <asm/cputype.h>
-#include <linux/uaccess.h>
-#include <asm/kvm.h>
-#include <asm/kvm_emulate.h>
-#include <asm/kvm_coproc.h>
-
-#define VM_STAT(x) { #x, offsetof(struct kvm, stat.x), KVM_STAT_VM }
-#define VCPU_STAT(x) { #x, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU }
-
-struct kvm_stats_debugfs_item debugfs_entries[] = {
- VCPU_STAT(halt_successful_poll),
- VCPU_STAT(halt_attempted_poll),
- VCPU_STAT(halt_poll_invalid),
- VCPU_STAT(halt_wakeup),
- VCPU_STAT(hvc_exit_stat),
- VCPU_STAT(wfe_exit_stat),
- VCPU_STAT(wfi_exit_stat),
- VCPU_STAT(mmio_exit_user),
- VCPU_STAT(mmio_exit_kernel),
- VCPU_STAT(exits),
- { NULL }
-};
-
-static u64 core_reg_offset_from_id(u64 id)
-{
- return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE);
-}
-
-static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
-{
- u32 __user *uaddr = (u32 __user *)(long)reg->addr;
- struct kvm_regs *regs = &vcpu->arch.ctxt.gp_regs;
- u64 off;
-
- if (KVM_REG_SIZE(reg->id) != 4)
- return -ENOENT;
-
- /* Our ID is an index into the kvm_regs struct. */
- off = core_reg_offset_from_id(reg->id);
- if (off >= sizeof(*regs) / KVM_REG_SIZE(reg->id))
- return -ENOENT;
-
- return put_user(((u32 *)regs)[off], uaddr);
-}
-
-static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
-{
- u32 __user *uaddr = (u32 __user *)(long)reg->addr;
- struct kvm_regs *regs = &vcpu->arch.ctxt.gp_regs;
- u64 off, val;
-
- if (KVM_REG_SIZE(reg->id) != 4)
- return -ENOENT;
-
- /* Our ID is an index into the kvm_regs struct. */
- off = core_reg_offset_from_id(reg->id);
- if (off >= sizeof(*regs) / KVM_REG_SIZE(reg->id))
- return -ENOENT;
-
- if (get_user(val, uaddr) != 0)
- return -EFAULT;
-
- if (off == KVM_REG_ARM_CORE_REG(usr_regs.ARM_cpsr)) {
- unsigned long mode = val & MODE_MASK;
- switch (mode) {
- case USR_MODE:
- case FIQ_MODE:
- case IRQ_MODE:
- case SVC_MODE:
- case ABT_MODE:
- case UND_MODE:
- break;
- default:
- return -EINVAL;
- }
- }
-
- ((u32 *)regs)[off] = val;
- return 0;
-}
-
-int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
-{
- return -EINVAL;
-}
-
-int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
-{
- return -EINVAL;
-}
-
-#define NUM_TIMER_REGS 3
-
-static bool is_timer_reg(u64 index)
-{
- switch (index) {
- case KVM_REG_ARM_TIMER_CTL:
- case KVM_REG_ARM_TIMER_CNT:
- case KVM_REG_ARM_TIMER_CVAL:
- return true;
- }
- return false;
-}
-
-static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
-{
- if (put_user(KVM_REG_ARM_TIMER_CTL, uindices))
- return -EFAULT;
- uindices++;
- if (put_user(KVM_REG_ARM_TIMER_CNT, uindices))
- return -EFAULT;
- uindices++;
- if (put_user(KVM_REG_ARM_TIMER_CVAL, uindices))
- return -EFAULT;
-
- return 0;
-}
-
-static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
-{
- void __user *uaddr = (void __user *)(long)reg->addr;
- u64 val;
- int ret;
-
- ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id));
- if (ret != 0)
- return -EFAULT;
-
- return kvm_arm_timer_set_reg(vcpu, reg->id, val);
-}
-
-static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
-{
- void __user *uaddr = (void __user *)(long)reg->addr;
- u64 val;
-
- val = kvm_arm_timer_get_reg(vcpu, reg->id);
- return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)) ? -EFAULT : 0;
-}
-
-static unsigned long num_core_regs(void)
-{
- return sizeof(struct kvm_regs) / sizeof(u32);
-}
-
-/**
- * kvm_arm_num_regs - how many registers do we present via KVM_GET_ONE_REG
- *
- * This is for all registers.
- */
-unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu)
-{
- return num_core_regs() + kvm_arm_num_coproc_regs(vcpu)
- + kvm_arm_get_fw_num_regs(vcpu)
- + NUM_TIMER_REGS;
-}
-
-/**
- * kvm_arm_copy_reg_indices - get indices of all registers.
- *
- * We do core registers right here, then we append coproc regs.
- */
-int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
-{
- unsigned int i;
- const u64 core_reg = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE;
- int ret;
-
- for (i = 0; i < sizeof(struct kvm_regs)/sizeof(u32); i++) {
- if (put_user(core_reg | i, uindices))
- return -EFAULT;
- uindices++;
- }
-
- ret = kvm_arm_copy_fw_reg_indices(vcpu, uindices);
- if (ret)
- return ret;
- uindices += kvm_arm_get_fw_num_regs(vcpu);
-
- ret = copy_timer_indices(vcpu, uindices);
- if (ret)
- return ret;
- uindices += NUM_TIMER_REGS;
-
- return kvm_arm_copy_coproc_indices(vcpu, uindices);
-}
-
-int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
-{
- /* We currently use nothing arch-specific in upper 32 bits */
- if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM >> 32)
- return -EINVAL;
-
- /* Register group 16 means we want a core register. */
- if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
- return get_core_reg(vcpu, reg);
-
- if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_FW)
- return kvm_arm_get_fw_reg(vcpu, reg);
-
- if (is_timer_reg(reg->id))
- return get_timer_reg(vcpu, reg);
-
- return kvm_arm_coproc_get_reg(vcpu, reg);
-}
-
-int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
-{
- /* We currently use nothing arch-specific in upper 32 bits */
- if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM >> 32)
- return -EINVAL;
-
- /* Register group 16 means we set a core register. */
- if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
- return set_core_reg(vcpu, reg);
-
- if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_FW)
- return kvm_arm_set_fw_reg(vcpu, reg);
-
- if (is_timer_reg(reg->id))
- return set_timer_reg(vcpu, reg);
-
- return kvm_arm_coproc_set_reg(vcpu, reg);
-}
-
-int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
- struct kvm_sregs *sregs)
-{
- return -EINVAL;
-}
-
-int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
- struct kvm_sregs *sregs)
-{
- return -EINVAL;
-}
-
-
-int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
- struct kvm_vcpu_events *events)
-{
- events->exception.serror_pending = !!(*vcpu_hcr(vcpu) & HCR_VA);
-
- /*
- * We never return a pending ext_dabt here because we deliver it to
- * the virtual CPU directly when setting the event and it's no longer
- * 'pending' at this point.
- */
-
- return 0;
-}
-
-int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
- struct kvm_vcpu_events *events)
-{
- bool serror_pending = events->exception.serror_pending;
- bool has_esr = events->exception.serror_has_esr;
- bool ext_dabt_pending = events->exception.ext_dabt_pending;
-
- if (serror_pending && has_esr)
- return -EINVAL;
- else if (serror_pending)
- kvm_inject_vabt(vcpu);
-
- if (ext_dabt_pending)
- kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
-
- return 0;
-}
-
-int __attribute_const__ kvm_target_cpu(void)
-{
- switch (read_cpuid_part()) {
- case ARM_CPU_PART_CORTEX_A7:
- return KVM_ARM_TARGET_CORTEX_A7;
- case ARM_CPU_PART_CORTEX_A15:
- return KVM_ARM_TARGET_CORTEX_A15;
- default:
- return -EINVAL;
- }
-}
-
-int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
-{
- int target = kvm_target_cpu();
-
- if (target < 0)
- return -ENODEV;
-
- memset(init, 0, sizeof(*init));
-
- /*
- * For now, we don't return any features.
- * In future, we might use features to return target
- * specific features available for the preferred
- * target type.
- */
- init->target = (__u32)target;
-
- return 0;
-}
-
-int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
-{
- return -EINVAL;
-}
-
-int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
-{
- return -EINVAL;
-}
-
-int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
- struct kvm_translation *tr)
-{
- return -EINVAL;
-}
-
-int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
- struct kvm_guest_debug *dbg)
-{
- return -EINVAL;
-}
-
-int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
- struct kvm_device_attr *attr)
-{
- int ret;
-
- switch (attr->group) {
- case KVM_ARM_VCPU_TIMER_CTRL:
- ret = kvm_arm_timer_set_attr(vcpu, attr);
- break;
- default:
- ret = -ENXIO;
- break;
- }
-
- return ret;
-}
-
-int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
- struct kvm_device_attr *attr)
-{
- int ret;
-
- switch (attr->group) {
- case KVM_ARM_VCPU_TIMER_CTRL:
- ret = kvm_arm_timer_get_attr(vcpu, attr);
- break;
- default:
- ret = -ENXIO;
- break;
- }
-
- return ret;
-}
-
-int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
- struct kvm_device_attr *attr)
-{
- int ret;
-
- switch (attr->group) {
- case KVM_ARM_VCPU_TIMER_CTRL:
- ret = kvm_arm_timer_has_attr(vcpu, attr);
- break;
- default:
- ret = -ENXIO;
- break;
- }
-
- return ret;
-}
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
deleted file mode 100644
index e58a89d2f13f..000000000000
--- a/arch/arm/kvm/handle_exit.c
+++ /dev/null
@@ -1,175 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2012 - Virtual Open Systems and Columbia University
- * Author: Christoffer Dall <c.dall@virtualopensystems.com>
- */
-
-#include <linux/kvm.h>
-#include <linux/kvm_host.h>
-#include <asm/kvm_emulate.h>
-#include <asm/kvm_coproc.h>
-#include <asm/kvm_mmu.h>
-#include <kvm/arm_hypercalls.h>
-#include <trace/events/kvm.h>
-
-#include "trace.h"
-
-typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
-
-static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
-{
- int ret;
-
- trace_kvm_hvc(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0),
- kvm_vcpu_hvc_get_imm(vcpu));
- vcpu->stat.hvc_exit_stat++;
-
- ret = kvm_hvc_call_handler(vcpu);
- if (ret < 0) {
- vcpu_set_reg(vcpu, 0, ~0UL);
- return 1;
- }
-
- return ret;
-}
-
-static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
-{
- /*
- * "If an SMC instruction executed at Non-secure EL1 is
- * trapped to EL2 because HCR_EL2.TSC is 1, the exception is a
- * Trap exception, not a Secure Monitor Call exception [...]"
- *
- * We need to advance the PC after the trap, as it would
- * otherwise return to the same address...
- */
- vcpu_set_reg(vcpu, 0, ~0UL);
- kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
- return 1;
-}
-
-/**
- * kvm_handle_wfx - handle a WFI or WFE instructions trapped in guests
- * @vcpu: the vcpu pointer
- * @run: the kvm_run structure pointer
- *
- * WFE: Yield the CPU and come back to this vcpu when the scheduler
- * decides to.
- * WFI: Simply call kvm_vcpu_block(), which will halt execution of
- * world-switches and schedule other host processes until there is an
- * incoming IRQ or FIQ to the VM.
- */
-static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
-{
- if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE) {
- trace_kvm_wfx(*vcpu_pc(vcpu), true);
- vcpu->stat.wfe_exit_stat++;
- kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu));
- } else {
- trace_kvm_wfx(*vcpu_pc(vcpu), false);
- vcpu->stat.wfi_exit_stat++;
- kvm_vcpu_block(vcpu);
- kvm_clear_request(KVM_REQ_UNHALT, vcpu);
- }
-
- kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
-
- return 1;
-}
-
-static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run)
-{
- u32 hsr = kvm_vcpu_get_hsr(vcpu);
-
- kvm_pr_unimpl("Unknown exception class: hsr: %#08x\n",
- hsr);
-
- kvm_inject_undefined(vcpu);
- return 1;
-}
-
-static exit_handle_fn arm_exit_handlers[] = {
- [0 ... HSR_EC_MAX] = kvm_handle_unknown_ec,
- [HSR_EC_WFI] = kvm_handle_wfx,
- [HSR_EC_CP15_32] = kvm_handle_cp15_32,
- [HSR_EC_CP15_64] = kvm_handle_cp15_64,
- [HSR_EC_CP14_MR] = kvm_handle_cp14_32,
- [HSR_EC_CP14_LS] = kvm_handle_cp14_load_store,
- [HSR_EC_CP14_64] = kvm_handle_cp14_64,
- [HSR_EC_CP_0_13] = kvm_handle_cp_0_13_access,
- [HSR_EC_CP10_ID] = kvm_handle_cp10_id,
- [HSR_EC_HVC] = handle_hvc,
- [HSR_EC_SMC] = handle_smc,
- [HSR_EC_IABT] = kvm_handle_guest_abort,
- [HSR_EC_DABT] = kvm_handle_guest_abort,
-};
-
-static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
-{
- u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu);
-
- return arm_exit_handlers[hsr_ec];
-}
-
-/*
- * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on
- * proper exit to userspace.
- */
-int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
- int exception_index)
-{
- exit_handle_fn exit_handler;
-
- if (ARM_ABORT_PENDING(exception_index)) {
- u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu);
-
- /*
- * HVC/SMC already have an adjusted PC, which we need
- * to correct in order to return to after having
- * injected the abort.
- */
- if (hsr_ec == HSR_EC_HVC || hsr_ec == HSR_EC_SMC) {
- u32 adj = kvm_vcpu_trap_il_is32bit(vcpu) ? 4 : 2;
- *vcpu_pc(vcpu) -= adj;
- }
-
- kvm_inject_vabt(vcpu);
- return 1;
- }
-
- exception_index = ARM_EXCEPTION_CODE(exception_index);
-
- switch (exception_index) {
- case ARM_EXCEPTION_IRQ:
- return 1;
- case ARM_EXCEPTION_HVC:
- /*
- * See ARM ARM B1.14.1: "Hyp traps on instructions
- * that fail their condition code check"
- */
- if (!kvm_condition_valid(vcpu)) {
- kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
- return 1;
- }
-
- exit_handler = kvm_get_exit_handler(vcpu);
-
- return exit_handler(vcpu, run);
- case ARM_EXCEPTION_DATA_ABORT:
- kvm_inject_vabt(vcpu);
- return 1;
- case ARM_EXCEPTION_HYP_GONE:
- /*
- * HYP has been reset to the hyp-stub. This happens
- * when a guest is pre-empted by kvm_reboot()'s
- * shutdown call.
- */
- run->exit_reason = KVM_EXIT_FAIL_ENTRY;
- return 0;
- default:
- kvm_pr_unimpl("Unsupported exception type: %d",
- exception_index);
- run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
- return 0;
- }
-}
diff --git a/arch/arm/kvm/hyp/Makefile b/arch/arm/kvm/hyp/Makefile
deleted file mode 100644
index ba88b1eca93c..000000000000
--- a/arch/arm/kvm/hyp/Makefile
+++ /dev/null
@@ -1,34 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Makefile for Kernel-based Virtual Machine module, HYP part
-#
-
-ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING
-
-KVM=../../../../virt/kvm
-
-CFLAGS_ARMV7VE :=$(call cc-option, -march=armv7ve)
-
-obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o
-obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o
-obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/aarch32.o
-
-obj-$(CONFIG_KVM_ARM_HOST) += tlb.o
-obj-$(CONFIG_KVM_ARM_HOST) += cp15-sr.o
-obj-$(CONFIG_KVM_ARM_HOST) += vfp.o
-obj-$(CONFIG_KVM_ARM_HOST) += banked-sr.o
-CFLAGS_banked-sr.o += $(CFLAGS_ARMV7VE)
-
-obj-$(CONFIG_KVM_ARM_HOST) += entry.o
-obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o
-obj-$(CONFIG_KVM_ARM_HOST) += switch.o
-CFLAGS_switch.o += $(CFLAGS_ARMV7VE)
-obj-$(CONFIG_KVM_ARM_HOST) += s2-setup.o
-
-# KVM code is run at a different exception code with a different map, so
-# compiler instrumentation that inserts callbacks or checks into the code may
-# cause crashes. Just disable it.
-GCOV_PROFILE := n
-KASAN_SANITIZE := n
-UBSAN_SANITIZE := n
-KCOV_INSTRUMENT := n
diff --git a/arch/arm/kvm/hyp/banked-sr.c b/arch/arm/kvm/hyp/banked-sr.c
deleted file mode 100644
index c4632ed9e819..000000000000
--- a/arch/arm/kvm/hyp/banked-sr.c
+++ /dev/null
@@ -1,70 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Original code:
- * Copyright (C) 2012 - Virtual Open Systems and Columbia University
- * Author: Christoffer Dall <c.dall@virtualopensystems.com>
- *
- * Mostly rewritten in C by Marc Zyngier <marc.zyngier@arm.com>
- */
-
-#include <asm/kvm_hyp.h>
-
-/*
- * gcc before 4.9 doesn't understand -march=armv7ve, so we have to
- * trick the assembler.
- */
-__asm__(".arch_extension virt");
-
-void __hyp_text __banked_save_state(struct kvm_cpu_context *ctxt)
-{
- ctxt->gp_regs.usr_regs.ARM_sp = read_special(SP_usr);
- ctxt->gp_regs.usr_regs.ARM_pc = read_special(ELR_hyp);
- ctxt->gp_regs.usr_regs.ARM_cpsr = read_special(SPSR);
- ctxt->gp_regs.KVM_ARM_SVC_sp = read_special(SP_svc);
- ctxt->gp_regs.KVM_ARM_SVC_lr = read_special(LR_svc);
- ctxt->gp_regs.KVM_ARM_SVC_spsr = read_special(SPSR_svc);
- ctxt->gp_regs.KVM_ARM_ABT_sp = read_special(SP_abt);
- ctxt->gp_regs.KVM_ARM_ABT_lr = read_special(LR_abt);
- ctxt->gp_regs.KVM_ARM_ABT_spsr = read_special(SPSR_abt);
- ctxt->gp_regs.KVM_ARM_UND_sp = read_special(SP_und);
- ctxt->gp_regs.KVM_ARM_UND_lr = read_special(LR_und);
- ctxt->gp_regs.KVM_ARM_UND_spsr = read_special(SPSR_und);
- ctxt->gp_regs.KVM_ARM_IRQ_sp = read_special(SP_irq);
- ctxt->gp_regs.KVM_ARM_IRQ_lr = read_special(LR_irq);
- ctxt->gp_regs.KVM_ARM_IRQ_spsr = read_special(SPSR_irq);
- ctxt->gp_regs.KVM_ARM_FIQ_r8 = read_special(R8_fiq);
- ctxt->gp_regs.KVM_ARM_FIQ_r9 = read_special(R9_fiq);
- ctxt->gp_regs.KVM_ARM_FIQ_r10 = read_special(R10_fiq);
- ctxt->gp_regs.KVM_ARM_FIQ_fp = read_special(R11_fiq);
- ctxt->gp_regs.KVM_ARM_FIQ_ip = read_special(R12_fiq);
- ctxt->gp_regs.KVM_ARM_FIQ_sp = read_special(SP_fiq);
- ctxt->gp_regs.KVM_ARM_FIQ_lr = read_special(LR_fiq);
- ctxt->gp_regs.KVM_ARM_FIQ_spsr = read_special(SPSR_fiq);
-}
-
-void __hyp_text __banked_restore_state(struct kvm_cpu_context *ctxt)
-{
- write_special(ctxt->gp_regs.usr_regs.ARM_sp, SP_usr);
- write_special(ctxt->gp_regs.usr_regs.ARM_pc, ELR_hyp);
- write_special(ctxt->gp_regs.usr_regs.ARM_cpsr, SPSR_cxsf);
- write_special(ctxt->gp_regs.KVM_ARM_SVC_sp, SP_svc);
- write_special(ctxt->gp_regs.KVM_ARM_SVC_lr, LR_svc);
- write_special(ctxt->gp_regs.KVM_ARM_SVC_spsr, SPSR_svc);
- write_special(ctxt->gp_regs.KVM_ARM_ABT_sp, SP_abt);
- write_special(ctxt->gp_regs.KVM_ARM_ABT_lr, LR_abt);
- write_special(ctxt->gp_regs.KVM_ARM_ABT_spsr, SPSR_abt);
- write_special(ctxt->gp_regs.KVM_ARM_UND_sp, SP_und);
- write_special(ctxt->gp_regs.KVM_ARM_UND_lr, LR_und);
- write_special(ctxt->gp_regs.KVM_ARM_UND_spsr, SPSR_und);
- write_special(ctxt->gp_regs.KVM_ARM_IRQ_sp, SP_irq);
- write_special(ctxt->gp_regs.KVM_ARM_IRQ_lr, LR_irq);
- write_special(ctxt->gp_regs.KVM_ARM_IRQ_spsr, SPSR_irq);
- write_special(ctxt->gp_regs.KVM_ARM_FIQ_r8, R8_fiq);
- write_special(ctxt->gp_regs.KVM_ARM_FIQ_r9, R9_fiq);
- write_special(ctxt->gp_regs.KVM_ARM_FIQ_r10, R10_fiq);
- write_special(ctxt->gp_regs.KVM_ARM_FIQ_fp, R11_fiq);
- write_special(ctxt->gp_regs.KVM_ARM_FIQ_ip, R12_fiq);
- write_special(ctxt->gp_regs.KVM_ARM_FIQ_sp, SP_fiq);
- write_special(ctxt->gp_regs.KVM_ARM_FIQ_lr, LR_fiq);
- write_special(ctxt->gp_regs.KVM_ARM_FIQ_spsr, SPSR_fiq);
-}
diff --git a/arch/arm/kvm/hyp/cp15-sr.c b/arch/arm/kvm/hyp/cp15-sr.c
deleted file mode 100644
index e6923306f698..000000000000
--- a/arch/arm/kvm/hyp/cp15-sr.c
+++ /dev/null
@@ -1,72 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Original code:
- * Copyright (C) 2012 - Virtual Open Systems and Columbia University
- * Author: Christoffer Dall <c.dall@virtualopensystems.com>
- *
- * Mostly rewritten in C by Marc Zyngier <marc.zyngier@arm.com>
- */
-
-#include <asm/kvm_hyp.h>
-
-static u64 *cp15_64(struct kvm_cpu_context *ctxt, int idx)
-{
- return (u64 *)(ctxt->cp15 + idx);
-}
-
-void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt)
-{
- ctxt->cp15[c0_CSSELR] = read_sysreg(CSSELR);
- ctxt->cp15[c1_SCTLR] = read_sysreg(SCTLR);
- ctxt->cp15[c1_CPACR] = read_sysreg(CPACR);
- *cp15_64(ctxt, c2_TTBR0) = read_sysreg(TTBR0);
- *cp15_64(ctxt, c2_TTBR1) = read_sysreg(TTBR1);
- ctxt->cp15[c2_TTBCR] = read_sysreg(TTBCR);
- ctxt->cp15[c3_DACR] = read_sysreg(DACR);
- ctxt->cp15[c5_DFSR] = read_sysreg(DFSR);
- ctxt->cp15[c5_IFSR] = read_sysreg(IFSR);
- ctxt->cp15[c5_ADFSR] = read_sysreg(ADFSR);
- ctxt->cp15[c5_AIFSR] = read_sysreg(AIFSR);
- ctxt->cp15[c6_DFAR] = read_sysreg(DFAR);
- ctxt->cp15[c6_IFAR] = read_sysreg(IFAR);
- *cp15_64(ctxt, c7_PAR) = read_sysreg(PAR);
- ctxt->cp15[c10_PRRR] = read_sysreg(PRRR);
- ctxt->cp15[c10_NMRR] = read_sysreg(NMRR);
- ctxt->cp15[c10_AMAIR0] = read_sysreg(AMAIR0);
- ctxt->cp15[c10_AMAIR1] = read_sysreg(AMAIR1);
- ctxt->cp15[c12_VBAR] = read_sysreg(VBAR);
- ctxt->cp15[c13_CID] = read_sysreg(CID);
- ctxt->cp15[c13_TID_URW] = read_sysreg(TID_URW);
- ctxt->cp15[c13_TID_URO] = read_sysreg(TID_URO);
- ctxt->cp15[c13_TID_PRIV] = read_sysreg(TID_PRIV);
- ctxt->cp15[c14_CNTKCTL] = read_sysreg(CNTKCTL);
-}
-
-void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt)
-{
- write_sysreg(ctxt->cp15[c0_MPIDR], VMPIDR);
- write_sysreg(ctxt->cp15[c0_CSSELR], CSSELR);
- write_sysreg(ctxt->cp15[c1_SCTLR], SCTLR);
- write_sysreg(ctxt->cp15[c1_CPACR], CPACR);
- write_sysreg(*cp15_64(ctxt, c2_TTBR0), TTBR0);
- write_sysreg(*cp15_64(ctxt, c2_TTBR1), TTBR1);
- write_sysreg(ctxt->cp15[c2_TTBCR], TTBCR);
- write_sysreg(ctxt->cp15[c3_DACR], DACR);
- write_sysreg(ctxt->cp15[c5_DFSR], DFSR);
- write_sysreg(ctxt->cp15[c5_IFSR], IFSR);
- write_sysreg(ctxt->cp15[c5_ADFSR], ADFSR);
- write_sysreg(ctxt->cp15[c5_AIFSR], AIFSR);
- write_sysreg(ctxt->cp15[c6_DFAR], DFAR);
- write_sysreg(ctxt->cp15[c6_IFAR], IFAR);
- write_sysreg(*cp15_64(ctxt, c7_PAR), PAR);
- write_sysreg(ctxt->cp15[c10_PRRR], PRRR);
- write_sysreg(ctxt->cp15[c10_NMRR], NMRR);
- write_sysreg(ctxt->cp15[c10_AMAIR0], AMAIR0);
- write_sysreg(ctxt->cp15[c10_AMAIR1], AMAIR1);
- write_sysreg(ctxt->cp15[c12_VBAR], VBAR);
- write_sysreg(ctxt->cp15[c13_CID], CID);
- write_sysreg(ctxt->cp15[c13_TID_URW], TID_URW);
- write_sysreg(ctxt->cp15[c13_TID_URO], TID_URO);
- write_sysreg(ctxt->cp15[c13_TID_PRIV], TID_PRIV);
- write_sysreg(ctxt->cp15[c14_CNTKCTL], CNTKCTL);
-}
diff --git a/arch/arm/kvm/hyp/entry.S b/arch/arm/kvm/hyp/entry.S
deleted file mode 100644
index 4bd1f6a74180..000000000000
--- a/arch/arm/kvm/hyp/entry.S
+++ /dev/null
@@ -1,121 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2016 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
-*/
-
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/kvm_arm.h>
-#include <asm/kvm_asm.h>
-
- .arch_extension virt
-
- .text
- .pushsection .hyp.text, "ax"
-
-#define USR_REGS_OFFSET (CPU_CTXT_GP_REGS + GP_REGS_USR)
-
-/* int __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host) */
-ENTRY(__guest_enter)
- @ Save host registers
- add r1, r1, #(USR_REGS_OFFSET + S_R4)
- stm r1!, {r4-r12}
- str lr, [r1, #4] @ Skip SP_usr (already saved)
-
- @ Restore guest registers
- add r0, r0, #(VCPU_GUEST_CTXT + USR_REGS_OFFSET + S_R0)
- ldr lr, [r0, #S_LR]
- ldm r0, {r0-r12}
-
- clrex
- eret
-ENDPROC(__guest_enter)
-
-ENTRY(__guest_exit)
- /*
- * return convention:
- * guest r0, r1, r2 saved on the stack
- * r0: vcpu pointer
- * r1: exception code
- */
-
- add r2, r0, #(VCPU_GUEST_CTXT + USR_REGS_OFFSET + S_R3)
- stm r2!, {r3-r12}
- str lr, [r2, #4]
- add r2, r0, #(VCPU_GUEST_CTXT + USR_REGS_OFFSET + S_R0)
- pop {r3, r4, r5} @ r0, r1, r2
- stm r2, {r3-r5}
-
- ldr r0, [r0, #VCPU_HOST_CTXT]
- add r0, r0, #(USR_REGS_OFFSET + S_R4)
- ldm r0!, {r4-r12}
- ldr lr, [r0, #4]
-
- mov r0, r1
- mrs r1, SPSR
- mrs r2, ELR_hyp
- mrc p15, 4, r3, c5, c2, 0 @ HSR
-
- /*
- * Force loads and stores to complete before unmasking aborts
- * and forcing the delivery of the exception. This gives us a
- * single instruction window, which the handler will try to
- * match.
- */
- dsb sy
- cpsie a
-
- .global abort_guest_exit_start
-abort_guest_exit_start:
-
- isb
-
- .global abort_guest_exit_end
-abort_guest_exit_end:
-
- /*
- * If we took an abort, r0[31] will be set, and cmp will set
- * the N bit in PSTATE.
- */
- cmp r0, #0
- msrmi SPSR_cxsf, r1
- msrmi ELR_hyp, r2
- mcrmi p15, 4, r3, c5, c2, 0 @ HSR
-
- bx lr
-ENDPROC(__guest_exit)
-
-/*
- * If VFPv3 support is not available, then we will not switch the VFP
- * registers; however cp10 and cp11 accesses will still trap and fallback
- * to the regular coprocessor emulation code, which currently will
- * inject an undefined exception to the guest.
- */
-#ifdef CONFIG_VFPv3
-ENTRY(__vfp_guest_restore)
- push {r3, r4, lr}
-
- @ NEON/VFP used. Turn on VFP access.
- mrc p15, 4, r1, c1, c1, 2 @ HCPTR
- bic r1, r1, #(HCPTR_TCP(10) | HCPTR_TCP(11))
- mcr p15, 4, r1, c1, c1, 2 @ HCPTR
- isb
-
- @ Switch VFP/NEON hardware state to the guest's
- mov r4, r0
- ldr r0, [r0, #VCPU_HOST_CTXT]
- add r0, r0, #CPU_CTXT_VFP
- bl __vfp_save_state
- add r0, r4, #(VCPU_GUEST_CTXT + CPU_CTXT_VFP)
- bl __vfp_restore_state
-
- pop {r3, r4, lr}
- pop {r0, r1, r2}
- clrex
- eret
-ENDPROC(__vfp_guest_restore)
-#endif
-
- .popsection
-
diff --git a/arch/arm/kvm/hyp/hyp-entry.S b/arch/arm/kvm/hyp/hyp-entry.S
deleted file mode 100644
index fe3d7811a908..000000000000
--- a/arch/arm/kvm/hyp/hyp-entry.S
+++ /dev/null
@@ -1,295 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2012 - Virtual Open Systems and Columbia University
- * Author: Christoffer Dall <c.dall@virtualopensystems.com>
- */
-
-#include <linux/arm-smccc.h>
-#include <linux/linkage.h>
-#include <asm/kvm_arm.h>
-#include <asm/kvm_asm.h>
-
- .arch_extension virt
-
- .text
- .pushsection .hyp.text, "ax"
-
-.macro load_vcpu reg
- mrc p15, 4, \reg, c13, c0, 2 @ HTPIDR
-.endm
-
-/********************************************************************
- * Hypervisor exception vector and handlers
- *
- *
- * The KVM/ARM Hypervisor ABI is defined as follows:
- *
- * Entry to Hyp mode from the host kernel will happen _only_ when an HVC
- * instruction is issued since all traps are disabled when running the host
- * kernel as per the Hyp-mode initialization at boot time.
- *
- * HVC instructions cause a trap to the vector page + offset 0x14 (see hyp_hvc
- * below) when the HVC instruction is called from SVC mode (i.e. a guest or the
- * host kernel) and they cause a trap to the vector page + offset 0x8 when HVC
- * instructions are called from within Hyp-mode.
- *
- * Hyp-ABI: Calling HYP-mode functions from host (in SVC mode):
- * Switching to Hyp mode is done through a simple HVC #0 instruction. The
- * exception vector code will check that the HVC comes from VMID==0.
- * - r0 contains a pointer to a HYP function
- * - r1, r2, and r3 contain arguments to the above function.
- * - The HYP function will be called with its arguments in r0, r1 and r2.
- * On HYP function return, we return directly to SVC.
- *
- * Note that the above is used to execute code in Hyp-mode from a host-kernel
- * point of view, and is a different concept from performing a world-switch and
- * executing guest code SVC mode (with a VMID != 0).
- */
-
- .align 5
-__kvm_hyp_vector:
- .global __kvm_hyp_vector
-
- @ Hyp-mode exception vector
- W(b) hyp_reset
- W(b) hyp_undef
- W(b) hyp_svc
- W(b) hyp_pabt
- W(b) hyp_dabt
- W(b) hyp_hvc
- W(b) hyp_irq
- W(b) hyp_fiq
-
-#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
- .align 5
-__kvm_hyp_vector_ic_inv:
- .global __kvm_hyp_vector_ic_inv
-
- /*
- * We encode the exception entry in the bottom 3 bits of
- * SP, and we have to guarantee to be 8 bytes aligned.
- */
- W(add) sp, sp, #1 /* Reset 7 */
- W(add) sp, sp, #1 /* Undef 6 */
- W(add) sp, sp, #1 /* Syscall 5 */
- W(add) sp, sp, #1 /* Prefetch abort 4 */
- W(add) sp, sp, #1 /* Data abort 3 */
- W(add) sp, sp, #1 /* HVC 2 */
- W(add) sp, sp, #1 /* IRQ 1 */
- W(nop) /* FIQ 0 */
-
- mcr p15, 0, r0, c7, c5, 0 /* ICIALLU */
- isb
-
- b decode_vectors
-
- .align 5
-__kvm_hyp_vector_bp_inv:
- .global __kvm_hyp_vector_bp_inv
-
- /*
- * We encode the exception entry in the bottom 3 bits of
- * SP, and we have to guarantee to be 8 bytes aligned.
- */
- W(add) sp, sp, #1 /* Reset 7 */
- W(add) sp, sp, #1 /* Undef 6 */
- W(add) sp, sp, #1 /* Syscall 5 */
- W(add) sp, sp, #1 /* Prefetch abort 4 */
- W(add) sp, sp, #1 /* Data abort 3 */
- W(add) sp, sp, #1 /* HVC 2 */
- W(add) sp, sp, #1 /* IRQ 1 */
- W(nop) /* FIQ 0 */
-
- mcr p15, 0, r0, c7, c5, 6 /* BPIALL */
- isb
-
-decode_vectors:
-
-#ifdef CONFIG_THUMB2_KERNEL
- /*
- * Yet another silly hack: Use VPIDR as a temp register.
- * Thumb2 is really a pain, as SP cannot be used with most
- * of the bitwise instructions. The vect_br macro ensures
- * things gets cleaned-up.
- */
- mcr p15, 4, r0, c0, c0, 0 /* VPIDR */
- mov r0, sp
- and r0, r0, #7
- sub sp, sp, r0
- push {r1, r2}
- mov r1, r0
- mrc p15, 4, r0, c0, c0, 0 /* VPIDR */
- mrc p15, 0, r2, c0, c0, 0 /* MIDR */
- mcr p15, 4, r2, c0, c0, 0 /* VPIDR */
-#endif
-
-.macro vect_br val, targ
-ARM( eor sp, sp, #\val )
-ARM( tst sp, #7 )
-ARM( eorne sp, sp, #\val )
-
-THUMB( cmp r1, #\val )
-THUMB( popeq {r1, r2} )
-
- beq \targ
-.endm
-
- vect_br 0, hyp_fiq
- vect_br 1, hyp_irq
- vect_br 2, hyp_hvc
- vect_br 3, hyp_dabt
- vect_br 4, hyp_pabt
- vect_br 5, hyp_svc
- vect_br 6, hyp_undef
- vect_br 7, hyp_reset
-#endif
-
-.macro invalid_vector label, cause
- .align
-\label: mov r0, #\cause
- b __hyp_panic
-.endm
-
- invalid_vector hyp_reset ARM_EXCEPTION_RESET
- invalid_vector hyp_undef ARM_EXCEPTION_UNDEFINED
- invalid_vector hyp_svc ARM_EXCEPTION_SOFTWARE
- invalid_vector hyp_pabt ARM_EXCEPTION_PREF_ABORT
- invalid_vector hyp_fiq ARM_EXCEPTION_FIQ
-
-ENTRY(__hyp_do_panic)
- mrs lr, cpsr
- bic lr, lr, #MODE_MASK
- orr lr, lr, #SVC_MODE
-THUMB( orr lr, lr, #PSR_T_BIT )
- msr spsr_cxsf, lr
- ldr lr, =panic
- msr ELR_hyp, lr
- ldr lr, =__kvm_call_hyp
- clrex
- eret
-ENDPROC(__hyp_do_panic)
-
-hyp_hvc:
- /*
- * Getting here is either because of a trap from a guest,
- * or from executing HVC from the host kernel, which means
- * "do something in Hyp mode".
- */
- push {r0, r1, r2}
-
- @ Check syndrome register
- mrc p15, 4, r1, c5, c2, 0 @ HSR
- lsr r0, r1, #HSR_EC_SHIFT
- cmp r0, #HSR_EC_HVC
- bne guest_trap @ Not HVC instr.
-
- /*
- * Let's check if the HVC came from VMID 0 and allow simple
- * switch to Hyp mode
- */
- mrrc p15, 6, r0, r2, c2
- lsr r2, r2, #16
- and r2, r2, #0xff
- cmp r2, #0
- bne guest_hvc_trap @ Guest called HVC
-
- /*
- * Getting here means host called HVC, we shift parameters and branch
- * to Hyp function.
- */
- pop {r0, r1, r2}
-
- /*
- * Check if we have a kernel function, which is guaranteed to be
- * bigger than the maximum hyp stub hypercall
- */
- cmp r0, #HVC_STUB_HCALL_NR
- bhs 1f
-
- /*
- * Not a kernel function, treat it as a stub hypercall.
- * Compute the physical address for __kvm_handle_stub_hvc
- * (as the code lives in the idmaped page) and branch there.
- * We hijack ip (r12) as a tmp register.
- */
- push {r1}
- ldr r1, =kimage_voffset
- ldr r1, [r1]
- ldr ip, =__kvm_handle_stub_hvc
- sub ip, ip, r1
- pop {r1}
-
- bx ip
-
-1:
- /*
- * Pushing r2 here is just a way of keeping the stack aligned to
- * 8 bytes on any path that can trigger a HYP exception. Here,
- * we may well be about to jump into the guest, and the guest
- * exit would otherwise be badly decoded by our fancy
- * "decode-exception-without-a-branch" code...
- */
- push {r2, lr}
-
- mov lr, r0
- mov r0, r1
- mov r1, r2
- mov r2, r3
-
-THUMB( orr lr, #1)
- blx lr @ Call the HYP function
-
- pop {r2, lr}
- eret
-
-guest_hvc_trap:
- movw r2, #:lower16:ARM_SMCCC_ARCH_WORKAROUND_1
- movt r2, #:upper16:ARM_SMCCC_ARCH_WORKAROUND_1
- ldr r0, [sp] @ Guest's r0
- teq r0, r2
- bne guest_trap
- add sp, sp, #12
- @ Returns:
- @ r0 = 0
- @ r1 = HSR value (perfectly predictable)
- @ r2 = ARM_SMCCC_ARCH_WORKAROUND_1
- mov r0, #0
- eret
-
-guest_trap:
- load_vcpu r0 @ Load VCPU pointer to r0
-
-#ifdef CONFIG_VFPv3
- @ Check for a VFP access
- lsr r1, r1, #HSR_EC_SHIFT
- cmp r1, #HSR_EC_CP_0_13
- beq __vfp_guest_restore
-#endif
-
- mov r1, #ARM_EXCEPTION_HVC
- b __guest_exit
-
-hyp_irq:
- push {r0, r1, r2}
- mov r1, #ARM_EXCEPTION_IRQ
- load_vcpu r0 @ Load VCPU pointer to r0
- b __guest_exit
-
-hyp_dabt:
- push {r0, r1}
- mrs r0, ELR_hyp
- ldr r1, =abort_guest_exit_start
-THUMB( add r1, r1, #1)
- cmp r0, r1
- ldrne r1, =abort_guest_exit_end
-THUMB( addne r1, r1, #1)
- cmpne r0, r1
- pop {r0, r1}
- bne __hyp_panic
-
- orr r0, r0, #(1 << ARM_EXIT_WITH_ABORT_BIT)
- eret
-
- .ltorg
-
- .popsection
diff --git a/arch/arm/kvm/hyp/s2-setup.c b/arch/arm/kvm/hyp/s2-setup.c
deleted file mode 100644
index 5dfbea5adf65..000000000000
--- a/arch/arm/kvm/hyp/s2-setup.c
+++ /dev/null
@@ -1,22 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2016 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- */
-
-#include <linux/types.h>
-#include <asm/kvm_arm.h>
-#include <asm/kvm_asm.h>
-#include <asm/kvm_hyp.h>
-
-void __hyp_text __init_stage2_translation(void)
-{
- u64 val;
-
- val = read_sysreg(VTCR) & ~VTCR_MASK;
-
- val |= read_sysreg(HTCR) & VTCR_HTCR_SH;
- val |= KVM_VTCR_SL0 | KVM_VTCR_T0SZ | KVM_VTCR_S;
-
- write_sysreg(val, VTCR);
-}
diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c
deleted file mode 100644
index 1efeef3fd0ee..000000000000
--- a/arch/arm/kvm/hyp/switch.c
+++ /dev/null
@@ -1,242 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2015 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- */
-#include <linux/jump_label.h>
-
-#include <asm/kvm_asm.h>
-#include <asm/kvm_hyp.h>
-#include <asm/kvm_mmu.h>
-
-__asm__(".arch_extension virt");
-
-/*
- * Activate the traps, saving the host's fpexc register before
- * overwriting it. We'll restore it on VM exit.
- */
-static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu, u32 *fpexc_host)
-{
- u32 val;
-
- /*
- * We are about to set HCPTR.TCP10/11 to trap all floating point
- * register accesses to HYP, however, the ARM ARM clearly states that
- * traps are only taken to HYP if the operation would not otherwise
- * trap to SVC. Therefore, always make sure that for 32-bit guests,
- * we set FPEXC.EN to prevent traps to SVC, when setting the TCP bits.
- */
- val = read_sysreg(VFP_FPEXC);
- *fpexc_host = val;
- if (!(val & FPEXC_EN)) {
- write_sysreg(val | FPEXC_EN, VFP_FPEXC);
- isb();
- }
-
- write_sysreg(vcpu->arch.hcr, HCR);
- /* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */
- write_sysreg(HSTR_T(15), HSTR);
- write_sysreg(HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11), HCPTR);
- val = read_sysreg(HDCR);
- val |= HDCR_TPM | HDCR_TPMCR; /* trap performance monitors */
- val |= HDCR_TDRA | HDCR_TDOSA | HDCR_TDA; /* trap debug regs */
- write_sysreg(val, HDCR);
-}
-
-static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
-{
- u32 val;
-
- /*
- * If we pended a virtual abort, preserve it until it gets
- * cleared. See B1.9.9 (Virtual Abort exception) for details,
- * but the crucial bit is the zeroing of HCR.VA in the
- * pseudocode.
- */
- if (vcpu->arch.hcr & HCR_VA)
- vcpu->arch.hcr = read_sysreg(HCR);
-
- write_sysreg(0, HCR);
- write_sysreg(0, HSTR);
- val = read_sysreg(HDCR);
- write_sysreg(val & ~(HDCR_TPM | HDCR_TPMCR), HDCR);
- write_sysreg(0, HCPTR);
-}
-
-static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu)
-{
- struct kvm *kvm = kern_hyp_va(vcpu->kvm);
- write_sysreg(kvm_get_vttbr(kvm), VTTBR);
- write_sysreg(vcpu->arch.midr, VPIDR);
-}
-
-static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu)
-{
- write_sysreg(0, VTTBR);
- write_sysreg(read_sysreg(MIDR), VPIDR);
-}
-
-
-static void __hyp_text __vgic_save_state(struct kvm_vcpu *vcpu)
-{
- if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
- __vgic_v3_save_state(vcpu);
- __vgic_v3_deactivate_traps(vcpu);
- }
-}
-
-static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu)
-{
- if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
- __vgic_v3_activate_traps(vcpu);
- __vgic_v3_restore_state(vcpu);
- }
-}
-
-static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)
-{
- u32 hsr = read_sysreg(HSR);
- u8 ec = hsr >> HSR_EC_SHIFT;
- u32 hpfar, far;
-
- vcpu->arch.fault.hsr = hsr;
-
- if (ec == HSR_EC_IABT)
- far = read_sysreg(HIFAR);
- else if (ec == HSR_EC_DABT)
- far = read_sysreg(HDFAR);
- else
- return true;
-
- /*
- * B3.13.5 Reporting exceptions taken to the Non-secure PL2 mode:
- *
- * Abort on the stage 2 translation for a memory access from a
- * Non-secure PL1 or PL0 mode:
- *
- * For any Access flag fault or Translation fault, and also for any
- * Permission fault on the stage 2 translation of a memory access
- * made as part of a translation table walk for a stage 1 translation,
- * the HPFAR holds the IPA that caused the fault. Otherwise, the HPFAR
- * is UNKNOWN.
- */
- if (!(hsr & HSR_DABT_S1PTW) && (hsr & HSR_FSC_TYPE) == FSC_PERM) {
- u64 par, tmp;
-
- par = read_sysreg(PAR);
- write_sysreg(far, ATS1CPR);
- isb();
-
- tmp = read_sysreg(PAR);
- write_sysreg(par, PAR);
-
- if (unlikely(tmp & 1))
- return false; /* Translation failed, back to guest */
-
- hpfar = ((tmp >> 12) & ((1UL << 28) - 1)) << 4;
- } else {
- hpfar = read_sysreg(HPFAR);
- }
-
- vcpu->arch.fault.hxfar = far;
- vcpu->arch.fault.hpfar = hpfar;
- return true;
-}
-
-int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
-{
- struct kvm_cpu_context *host_ctxt;
- struct kvm_cpu_context *guest_ctxt;
- bool fp_enabled;
- u64 exit_code;
- u32 fpexc;
-
- vcpu = kern_hyp_va(vcpu);
- write_sysreg(vcpu, HTPIDR);
-
- host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
- guest_ctxt = &vcpu->arch.ctxt;
-
- __sysreg_save_state(host_ctxt);
- __banked_save_state(host_ctxt);
-
- __activate_traps(vcpu, &fpexc);
- __activate_vm(vcpu);
-
- __vgic_restore_state(vcpu);
- __timer_enable_traps(vcpu);
-
- __sysreg_restore_state(guest_ctxt);
- __banked_restore_state(guest_ctxt);
-
- /* Jump in the fire! */
-again:
- exit_code = __guest_enter(vcpu, host_ctxt);
- /* And we're baaack! */
-
- if (exit_code == ARM_EXCEPTION_HVC && !__populate_fault_info(vcpu))
- goto again;
-
- fp_enabled = __vfp_enabled();
-
- __banked_save_state(guest_ctxt);
- __sysreg_save_state(guest_ctxt);
- __timer_disable_traps(vcpu);
-
- __vgic_save_state(vcpu);
-
- __deactivate_traps(vcpu);
- __deactivate_vm(vcpu);
-
- __banked_restore_state(host_ctxt);
- __sysreg_restore_state(host_ctxt);
-
- if (fp_enabled) {
- __vfp_save_state(&guest_ctxt->vfp);
- __vfp_restore_state(&host_ctxt->vfp);
- }
-
- write_sysreg(fpexc, VFP_FPEXC);
-
- return exit_code;
-}
-
-static const char * const __hyp_panic_string[] = {
- [ARM_EXCEPTION_RESET] = "\nHYP panic: RST PC:%08x CPSR:%08x",
- [ARM_EXCEPTION_UNDEFINED] = "\nHYP panic: UNDEF PC:%08x CPSR:%08x",
- [ARM_EXCEPTION_SOFTWARE] = "\nHYP panic: SVC PC:%08x CPSR:%08x",
- [ARM_EXCEPTION_PREF_ABORT] = "\nHYP panic: PABRT PC:%08x CPSR:%08x",
- [ARM_EXCEPTION_DATA_ABORT] = "\nHYP panic: DABRT PC:%08x ADDR:%08x",
- [ARM_EXCEPTION_IRQ] = "\nHYP panic: IRQ PC:%08x CPSR:%08x",
- [ARM_EXCEPTION_FIQ] = "\nHYP panic: FIQ PC:%08x CPSR:%08x",
- [ARM_EXCEPTION_HVC] = "\nHYP panic: HVC PC:%08x CPSR:%08x",
-};
-
-void __hyp_text __noreturn __hyp_panic(int cause)
-{
- u32 elr = read_special(ELR_hyp);
- u32 val;
-
- if (cause == ARM_EXCEPTION_DATA_ABORT)
- val = read_sysreg(HDFAR);
- else
- val = read_special(SPSR);
-
- if (read_sysreg(VTTBR)) {
- struct kvm_vcpu *vcpu;
- struct kvm_cpu_context *host_ctxt;
-
- vcpu = (struct kvm_vcpu *)read_sysreg(HTPIDR);
- host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
- __timer_disable_traps(vcpu);
- __deactivate_traps(vcpu);
- __deactivate_vm(vcpu);
- __banked_restore_state(host_ctxt);
- __sysreg_restore_state(host_ctxt);
- }
-
- /* Call panic for real */
- __hyp_do_panic(__hyp_panic_string[cause], elr, val);
-
- unreachable();
-}
diff --git a/arch/arm/kvm/hyp/tlb.c b/arch/arm/kvm/hyp/tlb.c
deleted file mode 100644
index 848f27bbad9d..000000000000
--- a/arch/arm/kvm/hyp/tlb.c
+++ /dev/null
@@ -1,68 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Original code:
- * Copyright (C) 2012 - Virtual Open Systems and Columbia University
- * Author: Christoffer Dall <c.dall@virtualopensystems.com>
- *
- * Mostly rewritten in C by Marc Zyngier <marc.zyngier@arm.com>
- */
-
-#include <asm/kvm_hyp.h>
-#include <asm/kvm_mmu.h>
-
-/**
- * Flush per-VMID TLBs
- *
- * __kvm_tlb_flush_vmid(struct kvm *kvm);
- *
- * We rely on the hardware to broadcast the TLB invalidation to all CPUs
- * inside the inner-shareable domain (which is the case for all v7
- * implementations). If we come across a non-IS SMP implementation, we'll
- * have to use an IPI based mechanism. Until then, we stick to the simple
- * hardware assisted version.
- *
- * As v7 does not support flushing per IPA, just nuke the whole TLB
- * instead, ignoring the ipa value.
- */
-void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm)
-{
- dsb(ishst);
-
- /* Switch to requested VMID */
- kvm = kern_hyp_va(kvm);
- write_sysreg(kvm_get_vttbr(kvm), VTTBR);
- isb();
-
- write_sysreg(0, TLBIALLIS);
- dsb(ish);
- isb();
-
- write_sysreg(0, VTTBR);
-}
-
-void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
-{
- __kvm_tlb_flush_vmid(kvm);
-}
-
-void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
-{
- struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm);
-
- /* Switch to requested VMID */
- write_sysreg(kvm_get_vttbr(kvm), VTTBR);
- isb();
-
- write_sysreg(0, TLBIALL);
- dsb(nsh);
- isb();
-
- write_sysreg(0, VTTBR);
-}
-
-void __hyp_text __kvm_flush_vm_context(void)
-{
- write_sysreg(0, TLBIALLNSNHIS);
- write_sysreg(0, ICIALLUIS);
- dsb(ish);
-}
diff --git a/arch/arm/kvm/hyp/vfp.S b/arch/arm/kvm/hyp/vfp.S
deleted file mode 100644
index 675a52348d8d..000000000000
--- a/arch/arm/kvm/hyp/vfp.S
+++ /dev/null
@@ -1,57 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2012 - Virtual Open Systems and Columbia University
- * Author: Christoffer Dall <c.dall@virtualopensystems.com>
- */
-
-#include <linux/linkage.h>
-#include <asm/vfpmacros.h>
-
- .text
- .pushsection .hyp.text, "ax"
-
-/* void __vfp_save_state(struct vfp_hard_struct *vfp); */
-ENTRY(__vfp_save_state)
- push {r4, r5}
- VFPFMRX r1, FPEXC
-
- @ Make sure *really* VFP is enabled so we can touch the registers.
- orr r5, r1, #FPEXC_EN
- tst r5, #FPEXC_EX @ Check for VFP Subarchitecture
- bic r5, r5, #FPEXC_EX @ FPEXC_EX disable
- VFPFMXR FPEXC, r5
- isb
-
- VFPFMRX r2, FPSCR
- beq 1f
-
- @ If FPEXC_EX is 0, then FPINST/FPINST2 reads are upredictable, so
- @ we only need to save them if FPEXC_EX is set.
- VFPFMRX r3, FPINST
- tst r5, #FPEXC_FP2V
- VFPFMRX r4, FPINST2, ne @ vmrsne
-1:
- VFPFSTMIA r0, r5 @ Save VFP registers
- stm r0, {r1-r4} @ Save FPEXC, FPSCR, FPINST, FPINST2
- pop {r4, r5}
- bx lr
-ENDPROC(__vfp_save_state)
-
-/* void __vfp_restore_state(struct vfp_hard_struct *vfp);
- * Assume FPEXC_EN is on and FPEXC_EX is off */
-ENTRY(__vfp_restore_state)
- VFPFLDMIA r0, r1 @ Load VFP registers
- ldm r0, {r0-r3} @ Load FPEXC, FPSCR, FPINST, FPINST2
-
- VFPFMXR FPSCR, r1
- tst r0, #FPEXC_EX @ Check for VFP Subarchitecture
- beq 1f
- VFPFMXR FPINST, r2
- tst r0, #FPEXC_FP2V
- VFPFMXR FPINST2, r3, ne
-1:
- VFPFMXR FPEXC, r0 @ FPEXC (last, in case !EN)
- bx lr
-ENDPROC(__vfp_restore_state)
-
- .popsection
diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S
deleted file mode 100644
index 33e34b6d24b2..000000000000
--- a/arch/arm/kvm/init.S
+++ /dev/null
@@ -1,157 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2012 - Virtual Open Systems and Columbia University
- * Author: Christoffer Dall <c.dall@virtualopensystems.com>
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/unified.h>
-#include <asm/asm-offsets.h>
-#include <asm/kvm_asm.h>
-#include <asm/kvm_arm.h>
-#include <asm/kvm_mmu.h>
-#include <asm/virt.h>
-
-/********************************************************************
- * Hypervisor initialization
- * - should be called with:
- * r0 = top of Hyp stack (kernel VA)
- * r1 = pointer to hyp vectors
- * r2,r3 = Hypervisor pgd pointer
- *
- * The init scenario is:
- * - We jump in HYP with 3 parameters: runtime HYP pgd, runtime stack,
- * runtime vectors
- * - Invalidate TLBs
- * - Set stack and vectors
- * - Setup the page tables
- * - Enable the MMU
- * - Profit! (or eret, if you only care about the code).
- *
- * Another possibility is to get a HYP stub hypercall.
- * We discriminate between the two by checking if r0 contains a value
- * that is less than HVC_STUB_HCALL_NR.
- */
-
- .text
- .pushsection .hyp.idmap.text,"ax"
- .align 5
-__kvm_hyp_init:
- .globl __kvm_hyp_init
-
- @ Hyp-mode exception vector
- W(b) .
- W(b) .
- W(b) .
- W(b) .
- W(b) .
- W(b) __do_hyp_init
- W(b) .
- W(b) .
-
-__do_hyp_init:
- @ Check for a stub hypercall
- cmp r0, #HVC_STUB_HCALL_NR
- blo __kvm_handle_stub_hvc
-
- @ Set stack pointer
- mov sp, r0
-
- @ Set HVBAR to point to the HYP vectors
- mcr p15, 4, r1, c12, c0, 0 @ HVBAR
-
- @ Set the HTTBR to point to the hypervisor PGD pointer passed
- mcrr p15, 4, rr_lo_hi(r2, r3), c2
-
- @ Set the HTCR and VTCR to the same shareability and cacheability
- @ settings as the non-secure TTBCR and with T0SZ == 0.
- mrc p15, 4, r0, c2, c0, 2 @ HTCR
- ldr r2, =HTCR_MASK
- bic r0, r0, r2
- mrc p15, 0, r1, c2, c0, 2 @ TTBCR
- and r1, r1, #(HTCR_MASK & ~TTBCR_T0SZ)
- orr r0, r0, r1
- mcr p15, 4, r0, c2, c0, 2 @ HTCR
-
- @ Use the same memory attributes for hyp. accesses as the kernel
- @ (copy MAIRx ro HMAIRx).
- mrc p15, 0, r0, c10, c2, 0
- mcr p15, 4, r0, c10, c2, 0
- mrc p15, 0, r0, c10, c2, 1
- mcr p15, 4, r0, c10, c2, 1
-
- @ Invalidate the stale TLBs from Bootloader
- mcr p15, 4, r0, c8, c7, 0 @ TLBIALLH
- dsb ish
-
- @ Set the HSCTLR to:
- @ - ARM/THUMB exceptions: Kernel config (Thumb-2 kernel)
- @ - Endianness: Kernel config
- @ - Fast Interrupt Features: Kernel config
- @ - Write permission implies XN: disabled
- @ - Instruction cache: enabled
- @ - Data/Unified cache: enabled
- @ - MMU: enabled (this code must be run from an identity mapping)
- mrc p15, 4, r0, c1, c0, 0 @ HSCR
- ldr r2, =HSCTLR_MASK
- bic r0, r0, r2
- mrc p15, 0, r1, c1, c0, 0 @ SCTLR
- ldr r2, =(HSCTLR_EE | HSCTLR_FI | HSCTLR_I | HSCTLR_C)
- and r1, r1, r2
- ARM( ldr r2, =(HSCTLR_M) )
- THUMB( ldr r2, =(HSCTLR_M | HSCTLR_TE) )
- orr r1, r1, r2
- orr r0, r0, r1
- mcr p15, 4, r0, c1, c0, 0 @ HSCR
- isb
-
- eret
-
-ENTRY(__kvm_handle_stub_hvc)
- cmp r0, #HVC_SOFT_RESTART
- bne 1f
-
- /* The target is expected in r1 */
- msr ELR_hyp, r1
- mrs r0, cpsr
- bic r0, r0, #MODE_MASK
- orr r0, r0, #HYP_MODE
-THUMB( orr r0, r0, #PSR_T_BIT )
- msr spsr_cxsf, r0
- b reset
-
-1: cmp r0, #HVC_RESET_VECTORS
- bne 1f
-
-reset:
- /* We're now in idmap, disable MMU */
- mrc p15, 4, r1, c1, c0, 0 @ HSCTLR
- ldr r0, =(HSCTLR_M | HSCTLR_A | HSCTLR_C | HSCTLR_I)
- bic r1, r1, r0
- mcr p15, 4, r1, c1, c0, 0 @ HSCTLR
-
- /*
- * Install stub vectors, using ardb's VA->PA trick.
- */
-0: adr r0, 0b @ PA(0)
- movw r1, #:lower16:__hyp_stub_vectors - 0b @ VA(stub) - VA(0)
- movt r1, #:upper16:__hyp_stub_vectors - 0b
- add r1, r1, r0 @ PA(stub)
- mcr p15, 4, r1, c12, c0, 0 @ HVBAR
- b exit
-
-1: ldr r0, =HVC_STUB_ERR
- eret
-
-exit:
- mov r0, #0
- eret
-ENDPROC(__kvm_handle_stub_hvc)
-
- .ltorg
-
- .globl __kvm_hyp_init_end
-__kvm_hyp_init_end:
-
- .popsection
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
deleted file mode 100644
index 064f4f118ca7..000000000000
--- a/arch/arm/kvm/interrupts.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2012 - Virtual Open Systems and Columbia University
- * Author: Christoffer Dall <c.dall@virtualopensystems.com>
- */
-
-#include <linux/linkage.h>
-
- .text
-
-/********************************************************************
- * Call function in Hyp mode
- *
- *
- * unsigned long kvm_call_hyp(void *hypfn, ...);
- *
- * This is not really a variadic function in the classic C-way and care must
- * be taken when calling this to ensure parameters are passed in registers
- * only, since the stack will change between the caller and the callee.
- *
- * Call the function with the first argument containing a pointer to the
- * function you wish to call in Hyp mode, and subsequent arguments will be
- * passed as r0, r1, and r2 (a maximum of 3 arguments in addition to the
- * function pointer can be passed). The function being called must be mapped
- * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c). Return values are
- * passed in r0 (strictly 32bit).
- *
- * The calling convention follows the standard AAPCS:
- * r0 - r3: caller save
- * r12: caller save
- * rest: callee save
- */
-ENTRY(__kvm_call_hyp)
- hvc #0
- bx lr
-ENDPROC(__kvm_call_hyp)
diff --git a/arch/arm/kvm/irq.h b/arch/arm/kvm/irq.h
deleted file mode 100644
index 0d257de42c10..000000000000
--- a/arch/arm/kvm/irq.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * irq.h: in kernel interrupt controller related definitions
- * Copyright (c) 2016 Red Hat, Inc.
- *
- * This header is included by irqchip.c. However, on ARM, interrupt
- * controller declarations are located in include/kvm/arm_vgic.h since
- * they are mostly shared between arm and arm64.
- */
-
-#ifndef __IRQ_H
-#define __IRQ_H
-
-#include <kvm/arm_vgic.h>
-
-#endif
diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c
deleted file mode 100644
index eb4174f6ebbd..000000000000
--- a/arch/arm/kvm/reset.c
+++ /dev/null
@@ -1,86 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2012 - Virtual Open Systems and Columbia University
- * Author: Christoffer Dall <c.dall@virtualopensystems.com>
- */
-#include <linux/compiler.h>
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/kvm_host.h>
-#include <linux/kvm.h>
-
-#include <asm/unified.h>
-#include <asm/ptrace.h>
-#include <asm/cputype.h>
-#include <asm/kvm_arm.h>
-#include <asm/kvm_coproc.h>
-#include <asm/kvm_emulate.h>
-
-#include <kvm/arm_arch_timer.h>
-
-/******************************************************************************
- * Cortex-A15 and Cortex-A7 Reset Values
- */
-
-static struct kvm_regs cortexa_regs_reset = {
- .usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT,
-};
-
-
-/*******************************************************************************
- * Exported reset function
- */
-
-/**
- * kvm_reset_vcpu - sets core registers and cp15 registers to reset value
- * @vcpu: The VCPU pointer
- *
- * This function finds the right table above and sets the registers on the
- * virtual CPU struct to their architecturally defined reset values.
- */
-int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
-{
- struct kvm_regs *reset_regs;
-
- switch (vcpu->arch.target) {
- case KVM_ARM_TARGET_CORTEX_A7:
- case KVM_ARM_TARGET_CORTEX_A15:
- reset_regs = &cortexa_regs_reset;
- vcpu->arch.midr = read_cpuid_id();
- break;
- default:
- return -ENODEV;
- }
-
- /* Reset core registers */
- memcpy(&vcpu->arch.ctxt.gp_regs, reset_regs, sizeof(vcpu->arch.ctxt.gp_regs));
-
- /* Reset CP15 registers */
- kvm_reset_coprocs(vcpu);
-
- /*
- * Additional reset state handling that PSCI may have imposed on us.
- * Must be done after all the sys_reg reset.
- */
- if (READ_ONCE(vcpu->arch.reset_state.reset)) {
- unsigned long target_pc = vcpu->arch.reset_state.pc;
-
- /* Gracefully handle Thumb2 entry point */
- if (target_pc & 1) {
- target_pc &= ~1UL;
- vcpu_set_thumb(vcpu);
- }
-
- /* Propagate caller endianness */
- if (vcpu->arch.reset_state.be)
- kvm_vcpu_set_be(vcpu);
-
- *vcpu_pc(vcpu) = target_pc;
- vcpu_set_reg(vcpu, 0, vcpu->arch.reset_state.r0);
-
- vcpu->arch.reset_state.reset = false;
- }
-
- /* Reset arch_timer context */
- return kvm_timer_vcpu_reset(vcpu);
-}
diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h
deleted file mode 100644
index 69a9d62a0ac6..000000000000
--- a/arch/arm/kvm/trace.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#if !defined(_TRACE_ARM_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_ARM_KVM_H
-
-#include <linux/tracepoint.h>
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM kvm
-
-/* Architecturally implementation defined CP15 register access */
-TRACE_EVENT(kvm_emulate_cp15_imp,
- TP_PROTO(unsigned long Op1, unsigned long Rt1, unsigned long CRn,
- unsigned long CRm, unsigned long Op2, bool is_write),
- TP_ARGS(Op1, Rt1, CRn, CRm, Op2, is_write),
-
- TP_STRUCT__entry(
- __field( unsigned int, Op1 )
- __field( unsigned int, Rt1 )
- __field( unsigned int, CRn )
- __field( unsigned int, CRm )
- __field( unsigned int, Op2 )
- __field( bool, is_write )
- ),
-
- TP_fast_assign(
- __entry->is_write = is_write;
- __entry->Op1 = Op1;
- __entry->Rt1 = Rt1;
- __entry->CRn = CRn;
- __entry->CRm = CRm;
- __entry->Op2 = Op2;
- ),
-
- TP_printk("Implementation defined CP15: %s\tp15, %u, r%u, c%u, c%u, %u",
- (__entry->is_write) ? "mcr" : "mrc",
- __entry->Op1, __entry->Rt1, __entry->CRn,
- __entry->CRm, __entry->Op2)
-);
-
-TRACE_EVENT(kvm_wfx,
- TP_PROTO(unsigned long vcpu_pc, bool is_wfe),
- TP_ARGS(vcpu_pc, is_wfe),
-
- TP_STRUCT__entry(
- __field( unsigned long, vcpu_pc )
- __field( bool, is_wfe )
- ),
-
- TP_fast_assign(
- __entry->vcpu_pc = vcpu_pc;
- __entry->is_wfe = is_wfe;
- ),
-
- TP_printk("guest executed wf%c at: 0x%08lx",
- __entry->is_wfe ? 'e' : 'i', __entry->vcpu_pc)
-);
-
-TRACE_EVENT(kvm_hvc,
- TP_PROTO(unsigned long vcpu_pc, unsigned long r0, unsigned long imm),
- TP_ARGS(vcpu_pc, r0, imm),
-
- TP_STRUCT__entry(
- __field( unsigned long, vcpu_pc )
- __field( unsigned long, r0 )
- __field( unsigned long, imm )
- ),
-
- TP_fast_assign(
- __entry->vcpu_pc = vcpu_pc;
- __entry->r0 = r0;
- __entry->imm = imm;
- ),
-
- TP_printk("HVC at 0x%08lx (r0: 0x%08lx, imm: 0x%lx",
- __entry->vcpu_pc, __entry->r0, __entry->imm)
-);
-
-#endif /* _TRACE_ARM_KVM_H */
-
-#undef TRACE_INCLUDE_PATH
-#define TRACE_INCLUDE_PATH .
-#undef TRACE_INCLUDE_FILE
-#define TRACE_INCLUDE_FILE trace
-
-/* This part must be outside protection */
-#include <trace/define_trace.h>
diff --git a/arch/arm/kvm/vgic-v3-coproc.c b/arch/arm/kvm/vgic-v3-coproc.c
deleted file mode 100644
index ed3b2e4759ce..000000000000
--- a/arch/arm/kvm/vgic-v3-coproc.c
+++ /dev/null
@@ -1,27 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * VGIC system registers handling functions for AArch32 mode
- */
-
-#include <linux/kvm.h>
-#include <linux/kvm_host.h>
-#include <asm/kvm_emulate.h>
-#include "vgic.h"
-
-int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, bool is_write, u64 id,
- u64 *reg)
-{
- /*
- * TODO: Implement for AArch32
- */
- return -ENXIO;
-}
-
-int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu, bool is_write, u64 id,
- u64 *reg)
-{
- /*
- * TODO: Implement for AArch32
- */
- return -ENXIO;
-}
diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig
index cbbe03e96de8..76838255b5fa 100644
--- a/arch/arm/mach-exynos/Kconfig
+++ b/arch/arm/mach-exynos/Kconfig
@@ -21,7 +21,7 @@ menuconfig ARCH_EXYNOS
select EXYNOS_SROM
select EXYNOS_PM_DOMAINS if PM_GENERIC_DOMAINS
select GPIOLIB
- select HAVE_ARM_ARCH_TIMER if ARCH_EXYNOS5 && VIRTUALIZATION
+ select HAVE_ARM_ARCH_TIMER if ARCH_EXYNOS5
select HAVE_ARM_SCU if SMP
select HAVE_S3C2410_I2C if I2C
select HAVE_S3C2410_WATCHDOG if WATCHDOG
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 5d0d0f86e790..69a337df619f 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -63,9 +63,6 @@ static unsigned int cachepolicy __initdata = CPOLICY_WRITEBACK;
static unsigned int ecc_mask __initdata = 0;
pgprot_t pgprot_user;
pgprot_t pgprot_kernel;
-pgprot_t pgprot_hyp_device;
-pgprot_t pgprot_s2;
-pgprot_t pgprot_s2_device;
EXPORT_SYMBOL(pgprot_user);
EXPORT_SYMBOL(pgprot_kernel);
@@ -75,15 +72,8 @@ struct cachepolicy {
unsigned int cr_mask;
pmdval_t pmd;
pteval_t pte;
- pteval_t pte_s2;
};
-#ifdef CONFIG_ARM_LPAE
-#define s2_policy(policy) policy
-#else
-#define s2_policy(policy) 0
-#endif
-
unsigned long kimage_voffset __ro_after_init;
static struct cachepolicy cache_policies[] __initdata = {
@@ -92,31 +82,26 @@ static struct cachepolicy cache_policies[] __initdata = {
.cr_mask = CR_W|CR_C,
.pmd = PMD_SECT_UNCACHED,
.pte = L_PTE_MT_UNCACHED,
- .pte_s2 = s2_policy(L_PTE_S2_MT_UNCACHED),
}, {
.policy = "buffered",
.cr_mask = CR_C,
.pmd = PMD_SECT_BUFFERED,
.pte = L_PTE_MT_BUFFERABLE,
- .pte_s2 = s2_policy(L_PTE_S2_MT_UNCACHED),
}, {
.policy = "writethrough",
.cr_mask = 0,
.pmd = PMD_SECT_WT,
.pte = L_PTE_MT_WRITETHROUGH,
- .pte_s2 = s2_policy(L_PTE_S2_MT_WRITETHROUGH),
}, {
.policy = "writeback",
.cr_mask = 0,
.pmd = PMD_SECT_WB,
.pte = L_PTE_MT_WRITEBACK,
- .pte_s2 = s2_policy(L_PTE_S2_MT_WRITEBACK),
}, {
.policy = "writealloc",
.cr_mask = 0,
.pmd = PMD_SECT_WBWA,
.pte = L_PTE_MT_WRITEALLOC,
- .pte_s2 = s2_policy(L_PTE_S2_MT_WRITEBACK),
}
};
@@ -246,9 +231,6 @@ static struct mem_type mem_types[] __ro_after_init = {
[MT_DEVICE] = { /* Strongly ordered / ARMv6 shared device */
.prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_SHARED |
L_PTE_SHARED,
- .prot_pte_s2 = s2_policy(PROT_PTE_S2_DEVICE) |
- s2_policy(L_PTE_S2_MT_DEV_SHARED) |
- L_PTE_SHARED,
.prot_l1 = PMD_TYPE_TABLE,
.prot_sect = PROT_SECT_DEVICE | PMD_SECT_S,
.domain = DOMAIN_IO,
@@ -434,7 +416,6 @@ static void __init build_mem_type_table(void)
struct cachepolicy *cp;
unsigned int cr = get_cr();
pteval_t user_pgprot, kern_pgprot, vecs_pgprot;
- pteval_t hyp_device_pgprot, s2_pgprot, s2_device_pgprot;
int cpu_arch = cpu_architecture();
int i;
@@ -558,9 +539,6 @@ static void __init build_mem_type_table(void)
*/
cp = &cache_policies[cachepolicy];
vecs_pgprot = kern_pgprot = user_pgprot = cp->pte;
- s2_pgprot = cp->pte_s2;
- hyp_device_pgprot = mem_types[MT_DEVICE].prot_pte;
- s2_device_pgprot = mem_types[MT_DEVICE].prot_pte_s2;
#ifndef CONFIG_ARM_LPAE
/*
@@ -604,7 +582,6 @@ static void __init build_mem_type_table(void)
user_pgprot |= L_PTE_SHARED;
kern_pgprot |= L_PTE_SHARED;
vecs_pgprot |= L_PTE_SHARED;
- s2_pgprot |= L_PTE_SHARED;
mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_S;
mem_types[MT_DEVICE_WC].prot_pte |= L_PTE_SHARED;
mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_S;
@@ -666,9 +643,6 @@ static void __init build_mem_type_table(void)
pgprot_user = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot);
pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG |
L_PTE_DIRTY | kern_pgprot);
- pgprot_s2 = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | s2_pgprot);
- pgprot_s2_device = __pgprot(s2_device_pgprot);
- pgprot_hyp_device = __pgprot(hyp_device_pgprot);
mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask;
mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask;
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index f658dda12364..a30b4eec7cb4 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -89,7 +89,8 @@ static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu)
static inline void vcpu_clear_wfx_traps(struct kvm_vcpu *vcpu)
{
vcpu->arch.hcr_el2 &= ~HCR_TWE;
- if (atomic_read(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count))
+ if (atomic_read(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count) ||
+ vcpu->kvm->arch.vgic.nassgireq)
vcpu->arch.hcr_el2 &= ~HCR_TWI;
else
vcpu->arch.hcr_el2 |= HCR_TWI;
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 57fd46acd058..32c8a675e5a4 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -44,6 +44,7 @@
#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1)
#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2)
#define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3)
+#define KVM_REQ_RELOAD_GICv4 KVM_ARCH_REQ(4)
DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index 525010504f9d..e329a36b2bee 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -11,7 +11,6 @@
#include <linux/kvm_host.h>
#include <asm/fpsimd.h>
#include <asm/kvm_asm.h>
-#include <asm/kvm_host.h>
#include <asm/kvm_mmu.h>
#include <asm/sysreg.h>
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 2bd92301d32f..23ebe51410f0 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -25,7 +25,6 @@
#include <asm/kvm.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_coproc.h>
-#include <asm/kvm_host.h>
#include <asm/sigcontext.h>
#include "trace.h"
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index eaa05c3c7235..8a1e81a400e0 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -17,7 +17,6 @@
#include <asm/kprobes.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_emulate.h>
-#include <asm/kvm_host.h>
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
#include <asm/fpsimd.h>
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 090f46d3add1..51db934702b6 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -22,7 +22,6 @@
#include <asm/kvm_arm.h>
#include <asm/kvm_coproc.h>
#include <asm/kvm_emulate.h>
-#include <asm/kvm_host.h>
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
#include <asm/perf_event.h>
diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c
index 2b4a3e2d1b89..9cb6b4c8355a 100644
--- a/arch/arm64/kvm/sys_regs_generic_v8.c
+++ b/arch/arm64/kvm/sys_regs_generic_v8.c
@@ -12,7 +12,6 @@
#include <asm/cputype.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
-#include <asm/kvm_host.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_coproc.h>
#include <asm/sysreg.h>
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 41204a49cf95..2c343c346b79 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -1133,7 +1133,7 @@ extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm);
static inline void kvm_arch_hardware_unsetup(void) {}
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
static inline void kvm_arch_free_memslot(struct kvm *kvm,
- struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
+ struct kvm_memory_slot *slot) {}
static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {}
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 71244bf87c3a..8f05dd0a0f4e 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -118,12 +118,12 @@ void kvm_arch_hardware_disable(void)
kvm_mips_callbacks->hardware_disable();
}
-int kvm_arch_hardware_setup(void)
+int kvm_arch_hardware_setup(void *opaque)
{
return 0;
}
-int kvm_arch_check_processor_compat(void)
+int kvm_arch_check_processor_compat(void *opaque)
{
return 0;
}
@@ -188,12 +188,6 @@ long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl,
return -ENOIOCTLCMD;
}
-int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
- unsigned long npages)
-{
- return 0;
-}
-
void kvm_arch_flush_shadow_all(struct kvm *kvm)
{
/* Flush whole GPA */
@@ -230,7 +224,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
void kvm_arch_commit_memory_region(struct kvm *kvm,
const struct kvm_userspace_memory_region *mem,
- const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *old,
const struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
@@ -984,69 +978,16 @@ long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl,
return r;
}
-/**
- * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot
- * @kvm: kvm instance
- * @log: slot id and address to which we copy the log
- *
- * Steps 1-4 below provide general overview of dirty page logging. See
- * kvm_get_dirty_log_protect() function description for additional details.
- *
- * We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we
- * always flush the TLB (step 4) even if previous step failed and the dirty
- * bitmap may be corrupt. Regardless of previous outcome the KVM logging API
- * does not preclude user space subsequent dirty log read. Flushing TLB ensures
- * writes will be marked dirty for next log read.
- *
- * 1. Take a snapshot of the bit and clear it if needed.
- * 2. Write protect the corresponding page.
- * 3. Copy the snapshot to the userspace.
- * 4. Flush TLB's if needed.
- */
-int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
+void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
{
- struct kvm_memslots *slots;
- struct kvm_memory_slot *memslot;
- bool flush = false;
- int r;
-
- mutex_lock(&kvm->slots_lock);
-
- r = kvm_get_dirty_log_protect(kvm, log, &flush);
-
- if (flush) {
- slots = kvm_memslots(kvm);
- memslot = id_to_memslot(slots, log->slot);
- /* Let implementation handle TLB/GVA invalidation */
- kvm_mips_callbacks->flush_shadow_memslot(kvm, memslot);
- }
-
- mutex_unlock(&kvm->slots_lock);
- return r;
}
-int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, struct kvm_clear_dirty_log *log)
+void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *memslot)
{
- struct kvm_memslots *slots;
- struct kvm_memory_slot *memslot;
- bool flush = false;
- int r;
-
- mutex_lock(&kvm->slots_lock);
-
- r = kvm_clear_dirty_log_protect(kvm, log, &flush);
-
- if (flush) {
- slots = kvm_memslots(kvm);
- memslot = id_to_memslot(slots, log->slot);
-
- /* Let implementation handle TLB/GVA invalidation */
- kvm_mips_callbacks->flush_shadow_memslot(kvm, memslot);
- }
-
- mutex_unlock(&kvm->slots_lock);
- return r;
+ /* Let implementation handle TLB/GVA invalidation */
+ kvm_mips_callbacks->flush_shadow_memslot(kvm, memslot);
}
long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index 635fb154b33f..a3633560493b 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -150,4 +150,7 @@
#define KVM_INST_FETCH_FAILED -1
+/* Extract PO and XOP opcode fields */
+#define PO_XOP_OPCODE_MASK 0xfc0007fe
+
#endif /* __POWERPC_KVM_ASM_H__ */
diff --git a/arch/powerpc/include/asm/kvm_book3s_uvmem.h b/arch/powerpc/include/asm/kvm_book3s_uvmem.h
index 5a9834e0e2d1..9cb7d8be2366 100644
--- a/arch/powerpc/include/asm/kvm_book3s_uvmem.h
+++ b/arch/powerpc/include/asm/kvm_book3s_uvmem.h
@@ -5,6 +5,7 @@
#ifdef CONFIG_PPC_UV
int kvmppc_uvmem_init(void);
void kvmppc_uvmem_free(void);
+bool kvmppc_uvmem_available(void);
int kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot);
void kvmppc_uvmem_slot_free(struct kvm *kvm,
const struct kvm_memory_slot *slot);
@@ -30,6 +31,11 @@ static inline int kvmppc_uvmem_init(void)
static inline void kvmppc_uvmem_free(void) { }
+static inline bool kvmppc_uvmem_available(void)
+{
+ return false;
+}
+
static inline int
kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot)
{
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 6e8b8ffd06ad..f99b4333dfba 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -303,6 +303,7 @@ struct kvm_arch {
u8 radix;
u8 fwnmi_enabled;
u8 secure_guest;
+ u8 svm_enabled;
bool threads_indep;
bool nested_enable;
pgd_t *pgtable;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index bc2494e5710a..94f5a32acaf1 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -107,8 +107,6 @@ extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
unsigned int gtlb_idx);
extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode);
extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid);
-extern void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu);
-extern int kvmppc_mmu_init(struct kvm_vcpu *vcpu);
extern int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
extern int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
extern gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index,
@@ -200,14 +198,11 @@ extern void kvm_free_hpt_cma(struct page *page, unsigned long nr_pages);
extern int kvmppc_core_init_vm(struct kvm *kvm);
extern void kvmppc_core_destroy_vm(struct kvm *kvm);
extern void kvmppc_core_free_memslot(struct kvm *kvm,
- struct kvm_memory_slot *free,
- struct kvm_memory_slot *dont);
-extern int kvmppc_core_create_memslot(struct kvm *kvm,
- struct kvm_memory_slot *slot,
- unsigned long npages);
+ struct kvm_memory_slot *slot);
extern int kvmppc_core_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
- const struct kvm_userspace_memory_region *mem);
+ const struct kvm_userspace_memory_region *mem,
+ enum kvm_mr_change change);
extern void kvmppc_core_commit_memory_region(struct kvm *kvm,
const struct kvm_userspace_memory_region *mem,
const struct kvm_memory_slot *old,
@@ -280,7 +275,8 @@ struct kvmppc_ops {
void (*flush_memslot)(struct kvm *kvm, struct kvm_memory_slot *memslot);
int (*prepare_memory_region)(struct kvm *kvm,
struct kvm_memory_slot *memslot,
- const struct kvm_userspace_memory_region *mem);
+ const struct kvm_userspace_memory_region *mem,
+ enum kvm_mr_change change);
void (*commit_memory_region)(struct kvm *kvm,
const struct kvm_userspace_memory_region *mem,
const struct kvm_memory_slot *old,
@@ -291,11 +287,7 @@ struct kvmppc_ops {
int (*age_hva)(struct kvm *kvm, unsigned long start, unsigned long end);
int (*test_age_hva)(struct kvm *kvm, unsigned long hva);
void (*set_spte_hva)(struct kvm *kvm, unsigned long hva, pte_t pte);
- void (*mmu_destroy)(struct kvm_vcpu *vcpu);
- void (*free_memslot)(struct kvm_memory_slot *free,
- struct kvm_memory_slot *dont);
- int (*create_memslot)(struct kvm_memory_slot *slot,
- unsigned long npages);
+ void (*free_memslot)(struct kvm_memory_slot *slot);
int (*init_vm)(struct kvm *kvm);
void (*destroy_vm)(struct kvm *kvm);
int (*get_smmu_info)(struct kvm *kvm, struct kvm_ppc_smmu_info *info);
@@ -321,6 +313,7 @@ struct kvmppc_ops {
int size);
int (*store_to_eaddr)(struct kvm_vcpu *vcpu, ulong *eaddr, void *ptr,
int size);
+ int (*enable_svm)(struct kvm *kvm);
int (*svm_off)(struct kvm *kvm);
};
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index d07a8e12fa15..5690a1f9b976 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -799,21 +799,19 @@ int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
return vcpu->kvm->arch.kvm_ops->check_requests(vcpu);
}
-int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
+void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
{
- return kvm->arch.kvm_ops->get_dirty_log(kvm, log);
+
}
-void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
- struct kvm_memory_slot *dont)
+int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
{
- kvm->arch.kvm_ops->free_memslot(free, dont);
+ return kvm->arch.kvm_ops->get_dirty_log(kvm, log);
}
-int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
- unsigned long npages)
+void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
{
- return kvm->arch.kvm_ops->create_memslot(slot, npages);
+ kvm->arch.kvm_ops->free_memslot(slot);
}
void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
@@ -823,9 +821,11 @@ void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
int kvmppc_core_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
- const struct kvm_userspace_memory_region *mem)
+ const struct kvm_userspace_memory_region *mem,
+ enum kvm_mr_change change)
{
- return kvm->arch.kvm_ops->prepare_memory_region(kvm, memslot, mem);
+ return kvm->arch.kvm_ops->prepare_memory_region(kvm, memslot, mem,
+ change);
}
void kvmppc_core_commit_memory_region(struct kvm *kvm,
@@ -858,11 +858,6 @@ int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
return 0;
}
-void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
-{
- vcpu->kvm->arch.kvm_ops->mmu_destroy(vcpu);
-}
-
int kvmppc_core_init_vm(struct kvm *kvm)
{
diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h
index 3a4613985949..eae259ee49af 100644
--- a/arch/powerpc/kvm/book3s.h
+++ b/arch/powerpc/kvm/book3s.h
@@ -16,6 +16,7 @@ extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long start,
extern int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva);
extern void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte);
+extern int kvmppc_mmu_init_pr(struct kvm_vcpu *vcpu);
extern void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu);
extern int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int inst, int *advance);
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c
index f21e73492ce3..3fbd570f9c1e 100644
--- a/arch/powerpc/kvm/book3s_32_mmu.c
+++ b/arch/powerpc/kvm/book3s_32_mmu.c
@@ -234,7 +234,7 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
case 2:
case 6:
pte->may_write = true;
- /* fall through */
+ fallthrough;
case 3:
case 5:
case 7:
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c
index d4cb3bcf41b6..e8e7b2c530d1 100644
--- a/arch/powerpc/kvm/book3s_32_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -356,7 +356,7 @@ void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu)
/* From mm/mmu_context_hash32.c */
#define CTX_TO_VSID(c, id) ((((c) * (897 * 16)) + (id * 0x111)) & 0xffffff)
-int kvmppc_mmu_init(struct kvm_vcpu *vcpu)
+int kvmppc_mmu_init_pr(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
int err;
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index 599133256a95..26b8b27a3755 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -311,7 +311,7 @@ do_second:
case 2:
case 6:
gpte->may_write = true;
- /* fall through */
+ fallthrough;
case 3:
case 5:
case 7:
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index 044dd49eeb9d..e452158a18d7 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -384,7 +384,7 @@ void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu)
__destroy_context(to_book3s(vcpu)->context_id[0]);
}
-int kvmppc_mmu_init(struct kvm_vcpu *vcpu)
+int kvmppc_mmu_init_pr(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
int err;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 6c372f5c61b6..3aecec890d6f 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -485,18 +485,18 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
__be64 *hptep;
unsigned long mmu_seq, psize, pte_size;
unsigned long gpa_base, gfn_base;
- unsigned long gpa, gfn, hva, pfn;
+ unsigned long gpa, gfn, hva, pfn, hpa;
struct kvm_memory_slot *memslot;
unsigned long *rmap;
struct revmap_entry *rev;
- struct page *page, *pages[1];
- long index, ret, npages;
+ struct page *page;
+ long index, ret;
bool is_ci;
- unsigned int writing, write_ok;
- struct vm_area_struct *vma;
+ bool writing, write_ok;
+ unsigned int shift;
unsigned long rcbits;
long mmio_update;
- struct mm_struct *mm;
+ pte_t pte, *ptep;
if (kvm_is_radix(kvm))
return kvmppc_book3s_radix_page_fault(run, vcpu, ea, dsisr);
@@ -570,59 +570,62 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
smp_rmb();
ret = -EFAULT;
- is_ci = false;
- pfn = 0;
page = NULL;
- mm = kvm->mm;
- pte_size = PAGE_SIZE;
writing = (dsisr & DSISR_ISSTORE) != 0;
/* If writing != 0, then the HPTE must allow writing, if we get here */
write_ok = writing;
hva = gfn_to_hva_memslot(memslot, gfn);
- npages = get_user_pages_fast(hva, 1, writing ? FOLL_WRITE : 0, pages);
- if (npages < 1) {
- /* Check if it's an I/O mapping */
- down_read(&mm->mmap_sem);
- vma = find_vma(mm, hva);
- if (vma && vma->vm_start <= hva && hva + psize <= vma->vm_end &&
- (vma->vm_flags & VM_PFNMAP)) {
- pfn = vma->vm_pgoff +
- ((hva - vma->vm_start) >> PAGE_SHIFT);
- pte_size = psize;
- is_ci = pte_ci(__pte((pgprot_val(vma->vm_page_prot))));
- write_ok = vma->vm_flags & VM_WRITE;
- }
- up_read(&mm->mmap_sem);
- if (!pfn)
- goto out_put;
+
+ /*
+ * Do a fast check first, since __gfn_to_pfn_memslot doesn't
+ * do it with !atomic && !async, which is how we call it.
+ * We always ask for write permission since the common case
+ * is that the page is writable.
+ */
+ if (__get_user_pages_fast(hva, 1, 1, &page) == 1) {
+ write_ok = true;
} else {
- page = pages[0];
- pfn = page_to_pfn(page);
- if (PageHuge(page)) {
- page = compound_head(page);
- pte_size <<= compound_order(page);
- }
- /* if the guest wants write access, see if that is OK */
- if (!writing && hpte_is_writable(r)) {
- pte_t *ptep, pte;
- unsigned long flags;
- /*
- * We need to protect against page table destruction
- * hugepage split and collapse.
- */
- local_irq_save(flags);
- ptep = find_current_mm_pte(mm->pgd, hva, NULL, NULL);
- if (ptep) {
- pte = kvmppc_read_update_linux_pte(ptep, 1);
- if (__pte_write(pte))
- write_ok = 1;
- }
- local_irq_restore(flags);
+ /* Call KVM generic code to do the slow-path check */
+ pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
+ writing, &write_ok);
+ if (is_error_noslot_pfn(pfn))
+ return -EFAULT;
+ page = NULL;
+ if (pfn_valid(pfn)) {
+ page = pfn_to_page(pfn);
+ if (PageReserved(page))
+ page = NULL;
}
}
+ /*
+ * Read the PTE from the process' radix tree and use that
+ * so we get the shift and attribute bits.
+ */
+ local_irq_disable();
+ ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift);
+ /*
+ * If the PTE disappeared temporarily due to a THP
+ * collapse, just return and let the guest try again.
+ */
+ if (!ptep) {
+ local_irq_enable();
+ if (page)
+ put_page(page);
+ return RESUME_GUEST;
+ }
+ pte = *ptep;
+ local_irq_enable();
+ hpa = pte_pfn(pte) << PAGE_SHIFT;
+ pte_size = PAGE_SIZE;
+ if (shift)
+ pte_size = 1ul << shift;
+ is_ci = pte_ci(pte);
+
if (psize > pte_size)
goto out_put;
+ if (pte_size > psize)
+ hpa |= hva & (pte_size - psize);
/* Check WIMG vs. the actual page we're accessing */
if (!hpte_cache_flags_ok(r, is_ci)) {
@@ -636,14 +639,13 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
}
/*
- * Set the HPTE to point to pfn.
- * Since the pfn is at PAGE_SIZE granularity, make sure we
+ * Set the HPTE to point to hpa.
+ * Since the hpa is at PAGE_SIZE granularity, make sure we
* don't mask out lower-order bits if psize < PAGE_SIZE.
*/
if (psize < PAGE_SIZE)
psize = PAGE_SIZE;
- r = (r & HPTE_R_KEY_HI) | (r & ~(HPTE_R_PP0 - psize)) |
- ((pfn << PAGE_SHIFT) & ~(psize - 1));
+ r = (r & HPTE_R_KEY_HI) | (r & ~(HPTE_R_PP0 - psize)) | hpa;
if (hpte_is_writable(r) && !write_ok)
r = hpte_make_readonly(r);
ret = RESUME_GUEST;
@@ -708,20 +710,13 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
asm volatile("ptesync" : : : "memory");
preempt_enable();
if (page && hpte_is_writable(r))
- SetPageDirty(page);
+ set_page_dirty_lock(page);
out_put:
trace_kvm_page_fault_exit(vcpu, hpte, ret);
- if (page) {
- /*
- * We drop pages[0] here, not page because page might
- * have been set to the head page of a compound, but
- * we have to drop the reference on the correct tail
- * page to match the get inside gup()
- */
- put_page(pages[0]);
- }
+ if (page)
+ put_page(page);
return ret;
out_unlock:
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 803940d79b73..134fbc1f029f 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -425,7 +425,7 @@ static void kvmppc_unmap_free_pte(struct kvm *kvm, pte_t *pte, bool full,
unsigned int lpid)
{
if (full) {
- memset(pte, 0, sizeof(long) << PTE_INDEX_SIZE);
+ memset(pte, 0, sizeof(long) << RADIX_PTE_INDEX_SIZE);
} else {
pte_t *p = pte;
unsigned long it;
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index ee6c103bb7d5..50555ad1db93 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -27,7 +27,6 @@
#include <asm/hvcall.h>
#include <asm/synch.h>
#include <asm/ppc-opcode.h>
-#include <asm/kvm_host.h>
#include <asm/udbg.h>
#include <asm/iommu.h>
#include <asm/tce.h>
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index ab6eeb8e753e..6fcaf1fa8e02 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -24,7 +24,6 @@
#include <asm/hvcall.h>
#include <asm/synch.h>
#include <asm/ppc-opcode.h>
-#include <asm/kvm_host.h>
#include <asm/udbg.h>
#include <asm/iommu.h>
#include <asm/tce.h>
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 2cefd071b848..fa6e4fc7d0e4 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -72,7 +72,6 @@
#include <asm/xics.h>
#include <asm/xive.h>
#include <asm/hw_breakpoint.h>
-#include <asm/kvm_host.h>
#include <asm/kvm_book3s_uvmem.h>
#include <asm/ultravisor.h>
@@ -1074,25 +1073,35 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
kvmppc_get_gpr(vcpu, 6));
break;
case H_SVM_PAGE_IN:
- ret = kvmppc_h_svm_page_in(vcpu->kvm,
- kvmppc_get_gpr(vcpu, 4),
- kvmppc_get_gpr(vcpu, 5),
- kvmppc_get_gpr(vcpu, 6));
+ ret = H_UNSUPPORTED;
+ if (kvmppc_get_srr1(vcpu) & MSR_S)
+ ret = kvmppc_h_svm_page_in(vcpu->kvm,
+ kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6));
break;
case H_SVM_PAGE_OUT:
- ret = kvmppc_h_svm_page_out(vcpu->kvm,
- kvmppc_get_gpr(vcpu, 4),
- kvmppc_get_gpr(vcpu, 5),
- kvmppc_get_gpr(vcpu, 6));
+ ret = H_UNSUPPORTED;
+ if (kvmppc_get_srr1(vcpu) & MSR_S)
+ ret = kvmppc_h_svm_page_out(vcpu->kvm,
+ kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6));
break;
case H_SVM_INIT_START:
- ret = kvmppc_h_svm_init_start(vcpu->kvm);
+ ret = H_UNSUPPORTED;
+ if (kvmppc_get_srr1(vcpu) & MSR_S)
+ ret = kvmppc_h_svm_init_start(vcpu->kvm);
break;
case H_SVM_INIT_DONE:
- ret = kvmppc_h_svm_init_done(vcpu->kvm);
+ ret = H_UNSUPPORTED;
+ if (kvmppc_get_srr1(vcpu) & MSR_S)
+ ret = kvmppc_h_svm_init_done(vcpu->kvm);
break;
case H_SVM_INIT_ABORT:
- ret = kvmppc_h_svm_init_abort(vcpu->kvm);
+ ret = H_UNSUPPORTED;
+ if (kvmppc_get_srr1(vcpu) & MSR_S)
+ ret = kvmppc_h_svm_init_abort(vcpu->kvm);
break;
default:
@@ -3616,6 +3625,7 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
kvmppc_get_gpr(vcpu, 3) == H_CEDE) {
kvmppc_nested_cede(vcpu);
+ kvmppc_set_gpr(vcpu, 3, 0);
trap = 0;
}
} else {
@@ -4400,7 +4410,7 @@ static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm,
slots = kvm_memslots(kvm);
memslot = id_to_memslot(slots, log->slot);
r = -ENOENT;
- if (!memslot->dirty_bitmap)
+ if (!memslot || !memslot->dirty_bitmap)
goto out;
/*
@@ -4447,29 +4457,26 @@ out:
return r;
}
-static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free,
- struct kvm_memory_slot *dont)
+static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *slot)
{
- if (!dont || free->arch.rmap != dont->arch.rmap) {
- vfree(free->arch.rmap);
- free->arch.rmap = NULL;
- }
+ vfree(slot->arch.rmap);
+ slot->arch.rmap = NULL;
}
-static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot,
- unsigned long npages)
+static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm,
+ struct kvm_memory_slot *slot,
+ const struct kvm_userspace_memory_region *mem,
+ enum kvm_mr_change change)
{
- slot->arch.rmap = vzalloc(array_size(npages, sizeof(*slot->arch.rmap)));
- if (!slot->arch.rmap)
- return -ENOMEM;
+ unsigned long npages = mem->memory_size >> PAGE_SHIFT;
- return 0;
-}
+ if (change == KVM_MR_CREATE) {
+ slot->arch.rmap = vzalloc(array_size(npages,
+ sizeof(*slot->arch.rmap)));
+ if (!slot->arch.rmap)
+ return -ENOMEM;
+ }
-static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm,
- struct kvm_memory_slot *memslot,
- const struct kvm_userspace_memory_region *mem)
-{
return 0;
}
@@ -4558,11 +4565,6 @@ void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask)
}
}
-static void kvmppc_mmu_destroy_hv(struct kvm_vcpu *vcpu)
-{
- return;
-}
-
void kvmppc_setup_partition_table(struct kvm *kvm)
{
unsigned long dw0, dw1;
@@ -5427,6 +5429,21 @@ static void unpin_vpa_reset(struct kvm *kvm, struct kvmppc_vpa *vpa)
}
/*
+ * Enable a guest to become a secure VM, or test whether
+ * that could be enabled.
+ * Called when the KVM_CAP_PPC_SECURE_GUEST capability is
+ * tested (kvm == NULL) or enabled (kvm != NULL).
+ */
+static int kvmhv_enable_svm(struct kvm *kvm)
+{
+ if (!kvmppc_uvmem_available())
+ return -EINVAL;
+ if (kvm)
+ kvm->arch.svm_enabled = 1;
+ return 0;
+}
+
+/*
* IOCTL handler to turn off secure mode of guest
*
* - Release all device pages
@@ -5526,9 +5543,7 @@ static struct kvmppc_ops kvm_ops_hv = {
.age_hva = kvm_age_hva_hv,
.test_age_hva = kvm_test_age_hva_hv,
.set_spte_hva = kvm_set_spte_hva_hv,
- .mmu_destroy = kvmppc_mmu_destroy_hv,
.free_memslot = kvmppc_core_free_memslot_hv,
- .create_memslot = kvmppc_core_create_memslot_hv,
.init_vm = kvmppc_core_init_vm_hv,
.destroy_vm = kvmppc_core_destroy_vm_hv,
.get_smmu_info = kvm_vm_ioctl_get_smmu_info_hv,
@@ -5548,6 +5563,7 @@ static struct kvmppc_ops kvm_ops_hv = {
.enable_nested = kvmhv_enable_nested,
.load_from_eaddr = kvmhv_load_from_eaddr,
.store_to_eaddr = kvmhv_store_to_eaddr,
+ .enable_svm = kvmhv_enable_svm,
.svm_off = kvmhv_svm_off,
};
diff --git a/arch/powerpc/kvm/book3s_hv_tm.c b/arch/powerpc/kvm/book3s_hv_tm.c
index 0db937497169..cc90b8b82329 100644
--- a/arch/powerpc/kvm/book3s_hv_tm.c
+++ b/arch/powerpc/kvm/book3s_hv_tm.c
@@ -3,6 +3,8 @@
* Copyright 2017 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/kvm_host.h>
#include <asm/kvm_ppc.h>
@@ -44,7 +46,18 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
u64 newmsr, bescr;
int ra, rs;
- switch (instr & 0xfc0007ff) {
+ /*
+ * rfid, rfebb, and mtmsrd encode bit 31 = 0 since it's a reserved bit
+ * in these instructions, so masking bit 31 out doesn't change these
+ * instructions. For treclaim., tsr., and trechkpt. instructions if bit
+ * 31 = 0 then they are per ISA invalid forms, however P9 UM, in section
+ * 4.6.10 Book II Invalid Forms, informs specifically that ignoring bit
+ * 31 is an acceptable way to handle these invalid forms that have
+ * bit 31 = 0. Moreover, for emulation purposes both forms (w/ and wo/
+ * bit 31 set) can generate a softpatch interrupt. Hence both forms
+ * are handled below for these instructions so they behave the same way.
+ */
+ switch (instr & PO_XOP_OPCODE_MASK) {
case PPC_INST_RFID:
/* XXX do we need to check for PR=0 here? */
newmsr = vcpu->arch.shregs.srr1;
@@ -105,7 +118,8 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
vcpu->arch.shregs.msr = newmsr;
return RESUME_GUEST;
- case PPC_INST_TSR:
+ /* ignore bit 31, see comment above */
+ case (PPC_INST_TSR & PO_XOP_OPCODE_MASK):
/* check for PR=1 and arch 2.06 bit set in PCR */
if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206)) {
/* generate an illegal instruction interrupt */
@@ -140,7 +154,8 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
vcpu->arch.shregs.msr = msr;
return RESUME_GUEST;
- case PPC_INST_TRECLAIM:
+ /* ignore bit 31, see comment above */
+ case (PPC_INST_TRECLAIM & PO_XOP_OPCODE_MASK):
/* check for TM disabled in the HFSCR or MSR */
if (!(vcpu->arch.hfscr & HFSCR_TM)) {
/* generate an illegal instruction interrupt */
@@ -176,7 +191,8 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
vcpu->arch.shregs.msr &= ~MSR_TS_MASK;
return RESUME_GUEST;
- case PPC_INST_TRECHKPT:
+ /* ignore bit 31, see comment above */
+ case (PPC_INST_TRECHKPT & PO_XOP_OPCODE_MASK):
/* XXX do we need to check for PR=0 here? */
/* check for TM disabled in the HFSCR or MSR */
if (!(vcpu->arch.hfscr & HFSCR_TM)) {
@@ -208,6 +224,8 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
}
/* What should we do here? We didn't recognize the instruction */
- WARN_ON_ONCE(1);
+ kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+ pr_warn_ratelimited("Unrecognized TM-related instruction %#x for emulation", instr);
+
return RESUME_GUEST;
}
diff --git a/arch/powerpc/kvm/book3s_hv_tm_builtin.c b/arch/powerpc/kvm/book3s_hv_tm_builtin.c
index 217246279dfa..fad931f224ef 100644
--- a/arch/powerpc/kvm/book3s_hv_tm_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_tm_builtin.c
@@ -23,7 +23,18 @@ int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu)
u64 newmsr, msr, bescr;
int rs;
- switch (instr & 0xfc0007ff) {
+ /*
+ * rfid, rfebb, and mtmsrd encode bit 31 = 0 since it's a reserved bit
+ * in these instructions, so masking bit 31 out doesn't change these
+ * instructions. For the tsr. instruction if bit 31 = 0 then it is per
+ * ISA an invalid form, however P9 UM, in section 4.6.10 Book II Invalid
+ * Forms, informs specifically that ignoring bit 31 is an acceptable way
+ * to handle TM-related invalid forms that have bit 31 = 0. Moreover,
+ * for emulation purposes both forms (w/ and wo/ bit 31 set) can
+ * generate a softpatch interrupt. Hence both forms are handled below
+ * for tsr. to make them behave the same way.
+ */
+ switch (instr & PO_XOP_OPCODE_MASK) {
case PPC_INST_RFID:
/* XXX do we need to check for PR=0 here? */
newmsr = vcpu->arch.shregs.srr1;
@@ -73,7 +84,8 @@ int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu)
vcpu->arch.shregs.msr = newmsr;
return 1;
- case PPC_INST_TSR:
+ /* ignore bit 31, see comment above */
+ case (PPC_INST_TSR & PO_XOP_OPCODE_MASK):
/* we know the MSR has the TS field = S (0b01) here */
msr = vcpu->arch.shregs.msr;
/* check for PR=1 and arch 2.06 bit set in PCR */
diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
index f44f6b27950f..76d05c71fb1f 100644
--- a/arch/powerpc/kvm/book3s_hv_uvmem.c
+++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
@@ -113,6 +113,15 @@ struct kvmppc_uvmem_page_pvt {
bool skip_page_out;
};
+bool kvmppc_uvmem_available(void)
+{
+ /*
+ * If kvmppc_uvmem_bitmap != NULL, then there is an ultravisor
+ * and our data structures have been initialized successfully.
+ */
+ return !!kvmppc_uvmem_bitmap;
+}
+
int kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot)
{
struct kvmppc_uvmem_slot *p;
@@ -209,6 +218,8 @@ unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
int ret = H_SUCCESS;
int srcu_idx;
+ kvm->arch.secure_guest = KVMPPC_SECURE_INIT_START;
+
if (!kvmppc_uvmem_bitmap)
return H_UNSUPPORTED;
@@ -216,6 +227,10 @@ unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
if (!kvm_is_radix(kvm))
return H_UNSUPPORTED;
+ /* NAK the transition to secure if not enabled */
+ if (!kvm->arch.svm_enabled)
+ return H_AUTHORITY;
+
srcu_idx = srcu_read_lock(&kvm->srcu);
slots = kvm_memslots(kvm);
kvm_for_each_memslot(memslot, slots) {
@@ -233,7 +248,6 @@ unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
goto out;
}
}
- kvm->arch.secure_guest |= KVMPPC_SECURE_INIT_START;
out:
srcu_read_unlock(&kvm->srcu, srcu_idx);
return ret;
@@ -809,6 +823,9 @@ out:
void kvmppc_uvmem_free(void)
{
+ if (!kvmppc_uvmem_bitmap)
+ return;
+
memunmap_pages(&kvmppc_uvmem_pgmap);
release_mem_region(kvmppc_uvmem_pgmap.res.start,
resource_size(&kvmppc_uvmem_pgmap.res));
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index db3a87319642..a0f6813f4560 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -740,7 +740,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
(vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) &&
((pte.raddr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS))
pte.raddr &= ~SPLIT_HACK_MASK;
- /* fall through */
+ fallthrough;
case MSR_IR:
vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid);
@@ -1795,7 +1795,7 @@ static int kvmppc_core_vcpu_create_pr(struct kvm_vcpu *vcpu)
vcpu->arch.shadow_msr = MSR_USER64 & ~MSR_LE;
- err = kvmppc_mmu_init(vcpu);
+ err = kvmppc_mmu_init_pr(vcpu);
if (err < 0)
goto free_shared_page;
@@ -1885,7 +1885,6 @@ out:
static int kvm_vm_ioctl_get_dirty_log_pr(struct kvm *kvm,
struct kvm_dirty_log *log)
{
- struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
struct kvm_vcpu *vcpu;
ulong ga, ga_end;
@@ -1895,15 +1894,12 @@ static int kvm_vm_ioctl_get_dirty_log_pr(struct kvm *kvm,
mutex_lock(&kvm->slots_lock);
- r = kvm_get_dirty_log(kvm, log, &is_dirty);
+ r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
if (r)
goto out;
/* If nothing is dirty, don't bother messing with page tables. */
if (is_dirty) {
- slots = kvm_memslots(kvm);
- memslot = id_to_memslot(slots, log->slot);
-
ga = memslot->base_gfn << PAGE_SHIFT;
ga_end = ga + (memslot->npages << PAGE_SHIFT);
@@ -1928,7 +1924,8 @@ static void kvmppc_core_flush_memslot_pr(struct kvm *kvm,
static int kvmppc_core_prepare_memory_region_pr(struct kvm *kvm,
struct kvm_memory_slot *memslot,
- const struct kvm_userspace_memory_region *mem)
+ const struct kvm_userspace_memory_region *mem,
+ enum kvm_mr_change change)
{
return 0;
}
@@ -1942,19 +1939,11 @@ static void kvmppc_core_commit_memory_region_pr(struct kvm *kvm,
return;
}
-static void kvmppc_core_free_memslot_pr(struct kvm_memory_slot *free,
- struct kvm_memory_slot *dont)
+static void kvmppc_core_free_memslot_pr(struct kvm_memory_slot *slot)
{
return;
}
-static int kvmppc_core_create_memslot_pr(struct kvm_memory_slot *slot,
- unsigned long npages)
-{
- return 0;
-}
-
-
#ifdef CONFIG_PPC64
static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm,
struct kvm_ppc_smmu_info *info)
@@ -2098,9 +2087,7 @@ static struct kvmppc_ops kvm_ops_pr = {
.age_hva = kvm_age_hva_pr,
.test_age_hva = kvm_test_age_hva_pr,
.set_spte_hva = kvm_set_spte_hva_pr,
- .mmu_destroy = kvmppc_mmu_destroy_pr,
.free_memslot = kvmppc_core_free_memslot_pr,
- .create_memslot = kvmppc_core_create_memslot_pr,
.init_vm = kvmppc_core_init_vm_pr,
.destroy_vm = kvmppc_core_destroy_vm_pr,
.get_smmu_info = kvm_vm_ioctl_get_smmu_info_pr,
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 7b27604adadf..6c18ea88fd25 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -421,11 +421,11 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
case BOOKE_IRQPRIO_DATA_STORAGE:
case BOOKE_IRQPRIO_ALIGNMENT:
update_dear = true;
- /* fall through */
+ fallthrough;
case BOOKE_IRQPRIO_INST_STORAGE:
case BOOKE_IRQPRIO_PROGRAM:
update_esr = true;
- /* fall through */
+ fallthrough;
case BOOKE_IRQPRIO_ITLB_MISS:
case BOOKE_IRQPRIO_SYSCALL:
case BOOKE_IRQPRIO_FP_UNAVAIL:
@@ -459,7 +459,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
case BOOKE_IRQPRIO_DECREMENTER:
case BOOKE_IRQPRIO_FIT:
keep_irq = true;
- /* fall through */
+ fallthrough;
case BOOKE_IRQPRIO_EXTERNAL:
case BOOKE_IRQPRIO_DBELL:
allowed = vcpu->arch.shared->msr & MSR_EE;
@@ -1766,25 +1766,24 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
return r;
}
-int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
+void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
{
- return -ENOTSUPP;
+
}
-void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
- struct kvm_memory_slot *dont)
+int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
{
+ return -ENOTSUPP;
}
-int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
- unsigned long npages)
+void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
{
- return 0;
}
int kvmppc_core_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
- const struct kvm_userspace_memory_region *mem)
+ const struct kvm_userspace_memory_region *mem,
+ enum kvm_mr_change change)
{
return 0;
}
@@ -2074,11 +2073,6 @@ void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu)
kvmppc_clear_dbsr();
}
-void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
-{
- vcpu->kvm->arch.kvm_ops->mmu_destroy(vcpu);
-}
-
int kvmppc_core_init_vm(struct kvm *kvm)
{
return kvm->arch.kvm_ops->init_vm(kvm);
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
index 9d3169fbce55..65b4d337d337 100644
--- a/arch/powerpc/kvm/booke.h
+++ b/arch/powerpc/kvm/booke.h
@@ -94,7 +94,6 @@ enum int_class {
void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type);
-extern void kvmppc_mmu_destroy_e500(struct kvm_vcpu *vcpu);
extern int kvmppc_core_emulate_op_e500(struct kvm_run *run,
struct kvm_vcpu *vcpu,
unsigned int inst, int *advance);
@@ -102,7 +101,6 @@ extern int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn,
ulong spr_val);
extern int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn,
ulong *spr_val);
-extern void kvmppc_mmu_destroy_e500(struct kvm_vcpu *vcpu);
extern int kvmppc_core_emulate_op_e500(struct kvm_run *run,
struct kvm_vcpu *vcpu,
unsigned int inst, int *advance);
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index f2b4feaff6d2..7e8b69015d20 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -490,7 +490,6 @@ static struct kvmppc_ops kvm_ops_e500 = {
.vcpu_put = kvmppc_core_vcpu_put_e500,
.vcpu_create = kvmppc_core_vcpu_create_e500,
.vcpu_free = kvmppc_core_vcpu_free_e500,
- .mmu_destroy = kvmppc_mmu_destroy_e500,
.init_vm = kvmppc_core_init_vm_e500,
.destroy_vm = kvmppc_core_destroy_vm_e500,
.emulate_op = kvmppc_core_emulate_op_e500,
diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c
index 2d910b87e441..e131fbecdcc4 100644
--- a/arch/powerpc/kvm/e500_mmu.c
+++ b/arch/powerpc/kvm/e500_mmu.c
@@ -533,10 +533,6 @@ gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int index,
return get_tlb_raddr(gtlbe) | (eaddr & pgmask);
}
-void kvmppc_mmu_destroy_e500(struct kvm_vcpu *vcpu)
-{
-}
-
/*****************************************/
static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500)
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
index e6b06cb2b92c..1c189b5aadcc 100644
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@ -376,7 +376,6 @@ static struct kvmppc_ops kvm_ops_e500mc = {
.vcpu_put = kvmppc_core_vcpu_put_e500mc,
.vcpu_create = kvmppc_core_vcpu_create_e500mc,
.vcpu_free = kvmppc_core_vcpu_free_e500mc,
- .mmu_destroy = kvmppc_mmu_destroy_e500,
.init_vm = kvmppc_core_init_vm_e500mc,
.destroy_vm = kvmppc_core_destroy_vm_e500mc,
.emulate_op = kvmppc_core_emulate_op_e500,
diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c
index fe312c160d97..23e9c2bd9f27 100644
--- a/arch/powerpc/kvm/mpic.c
+++ b/arch/powerpc/kvm/mpic.c
@@ -32,7 +32,6 @@
#include <linux/uaccess.h>
#include <asm/mpic.h>
#include <asm/kvm_para.h>
-#include <asm/kvm_host.h>
#include <asm/kvm_ppc.h>
#include <kvm/iodev.h>
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 302e9dccdd6d..e15166b0a16d 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -32,7 +32,6 @@
#include <asm/plpar_wrappers.h>
#endif
#include <asm/ultravisor.h>
-#include <asm/kvm_host.h>
#include "timing.h"
#include "irq.h"
@@ -416,12 +415,12 @@ int kvm_arch_hardware_enable(void)
return 0;
}
-int kvm_arch_hardware_setup(void)
+int kvm_arch_hardware_setup(void *opaque)
{
return 0;
}
-int kvm_arch_check_processor_compat(void)
+int kvm_arch_check_processor_compat(void *opaque)
{
return kvmppc_core_check_processor_compat();
}
@@ -525,7 +524,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = 1;
break;
case KVM_CAP_PPC_GUEST_DEBUG_SSTEP:
- /* fall through */
case KVM_CAP_PPC_PAIRED_SINGLES:
case KVM_CAP_PPC_OSI:
case KVM_CAP_PPC_GET_PVINFO:
@@ -671,6 +669,12 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
(hv_enabled && cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST));
break;
#endif
+#if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE)
+ case KVM_CAP_PPC_SECURE_GUEST:
+ r = hv_enabled && kvmppc_hv_ops->enable_svm &&
+ !kvmppc_hv_ops->enable_svm(NULL);
+ break;
+#endif
default:
r = 0;
break;
@@ -685,16 +689,9 @@ long kvm_arch_dev_ioctl(struct file *filp,
return -EINVAL;
}
-void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
- struct kvm_memory_slot *dont)
-{
- kvmppc_core_free_memslot(kvm, free, dont);
-}
-
-int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
- unsigned long npages)
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
{
- return kvmppc_core_create_memslot(kvm, slot, npages);
+ kvmppc_core_free_memslot(kvm, slot);
}
int kvm_arch_prepare_memory_region(struct kvm *kvm,
@@ -702,12 +699,12 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
const struct kvm_userspace_memory_region *mem,
enum kvm_mr_change change)
{
- return kvmppc_core_prepare_memory_region(kvm, memslot, mem);
+ return kvmppc_core_prepare_memory_region(kvm, memslot, mem, change);
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
const struct kvm_userspace_memory_region *mem,
- const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *old,
const struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
@@ -2176,6 +2173,14 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
r = kvm->arch.kvm_ops->enable_nested(kvm);
break;
#endif
+#if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE)
+ case KVM_CAP_PPC_SECURE_GUEST:
+ r = -EINVAL;
+ if (!is_kvmppc_hv_enabled(kvm) || !kvm->arch.kvm_ops->enable_svm)
+ break;
+ r = kvm->arch.kvm_ops->enable_svm(kvm);
+ break;
+#endif
default:
r = -EINVAL;
break;
diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h
index ace65f9fed30..feef7885ba82 100644
--- a/arch/powerpc/kvm/timing.h
+++ b/arch/powerpc/kvm/timing.h
@@ -10,7 +10,6 @@
#define __POWERPC_KVM_EXITTIMING_H__
#include <linux/kvm_host.h>
-#include <asm/kvm_host.h>
#ifdef CONFIG_KVM_EXIT_TIMING
void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile
index 0ff9261c915e..45b33b83de08 100644
--- a/arch/s390/boot/Makefile
+++ b/arch/s390/boot/Makefile
@@ -37,7 +37,7 @@ CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char
obj-y := head.o als.o startup.o mem_detect.o ipl_parm.o ipl_report.o
obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o
obj-y += version.o pgm_check_info.o ctype.o text_dma.o
-obj-$(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) += uv.o
+obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o
obj-$(CONFIG_RELOCATABLE) += machine_kexec_reloc.o
obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
targets := bzImage startup.a section_cmp.boot.data section_cmp.boot.preserved.data $(obj-y)
diff --git a/arch/s390/boot/uv.c b/arch/s390/boot/uv.c
index 3f501159ee9f..8fde561f1d07 100644
--- a/arch/s390/boot/uv.c
+++ b/arch/s390/boot/uv.c
@@ -3,7 +3,13 @@
#include <asm/facility.h>
#include <asm/sections.h>
+/* will be used in arch/s390/kernel/uv.c */
+#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
int __bootdata_preserved(prot_virt_guest);
+#endif
+#if IS_ENABLED(CONFIG_KVM)
+struct uv_info __bootdata_preserved(uv_info);
+#endif
void uv_query_info(void)
{
@@ -19,7 +25,21 @@ void uv_query_info(void)
if (uv_call(0, (uint64_t)&uvcb) && uvcb.header.rc != 0x100)
return;
+ if (IS_ENABLED(CONFIG_KVM)) {
+ memcpy(uv_info.inst_calls_list, uvcb.inst_calls_list, sizeof(uv_info.inst_calls_list));
+ uv_info.uv_base_stor_len = uvcb.uv_base_stor_len;
+ uv_info.guest_base_stor_len = uvcb.conf_base_phys_stor_len;
+ uv_info.guest_virt_base_stor_len = uvcb.conf_base_virt_stor_len;
+ uv_info.guest_virt_var_stor_len = uvcb.conf_virt_var_stor_len;
+ uv_info.guest_cpu_stor_len = uvcb.cpu_stor_len;
+ uv_info.max_sec_stor_addr = ALIGN(uvcb.max_guest_stor_addr, PAGE_SIZE);
+ uv_info.max_num_sec_conf = uvcb.max_num_sec_conf;
+ uv_info.max_guest_cpus = uvcb.max_guest_cpus;
+ }
+
+#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
if (test_bit_inv(BIT_UVC_CMD_SET_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list) &&
test_bit_inv(BIT_UVC_CMD_REMOVE_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list))
prot_virt_guest = 1;
+#endif
}
diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h
index 37f96b6f0e61..a816fb4734b8 100644
--- a/arch/s390/include/asm/gmap.h
+++ b/arch/s390/include/asm/gmap.h
@@ -9,6 +9,7 @@
#ifndef _ASM_S390_GMAP_H
#define _ASM_S390_GMAP_H
+#include <linux/radix-tree.h>
#include <linux/refcount.h>
/* Generic bits for GMAP notification on DAT table entry changes. */
@@ -31,6 +32,7 @@
* @table: pointer to the page directory
* @asce: address space control element for gmap page table
* @pfault_enabled: defines if pfaults are applicable for the guest
+ * @guest_handle: protected virtual machine handle for the ultravisor
* @host_to_rmap: radix tree with gmap_rmap lists
* @children: list of shadow gmap structures
* @pt_list: list of all page tables used in the shadow guest address space
@@ -54,6 +56,8 @@ struct gmap {
unsigned long asce_end;
void *private;
bool pfault_enabled;
+ /* only set for protected virtual machines */
+ unsigned long guest_handle;
/* Additional data for shadow guest address spaces */
struct radix_tree_root host_to_rmap;
struct list_head children;
@@ -144,4 +148,6 @@ int gmap_mprotect_notify(struct gmap *, unsigned long start,
void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4],
unsigned long gaddr, unsigned long vmaddr);
+int gmap_mark_unmergeable(void);
+void s390_reset_acc(struct mm_struct *mm);
#endif /* _ASM_S390_GMAP_H */
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 1726224e7772..d6bcd34f3ec3 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -127,6 +127,12 @@ struct mcck_volatile_info {
#define CR14_INITIAL_MASK (CR14_UNUSED_32 | CR14_UNUSED_33 | \
CR14_EXTERNAL_DAMAGE_SUBMASK)
+#define SIDAD_SIZE_MASK 0xff
+#define sida_origin(sie_block) \
+ ((sie_block)->sidad & PAGE_MASK)
+#define sida_size(sie_block) \
+ ((((sie_block)->sidad & SIDAD_SIZE_MASK) + 1) * PAGE_SIZE)
+
#define CPUSTAT_STOPPED 0x80000000
#define CPUSTAT_WAIT 0x10000000
#define CPUSTAT_ECALL_PEND 0x08000000
@@ -160,7 +166,13 @@ struct kvm_s390_sie_block {
__u8 reserved08[4]; /* 0x0008 */
#define PROG_IN_SIE (1<<0)
__u32 prog0c; /* 0x000c */
- __u8 reserved10[16]; /* 0x0010 */
+ union {
+ __u8 reserved10[16]; /* 0x0010 */
+ struct {
+ __u64 pv_handle_cpu;
+ __u64 pv_handle_config;
+ };
+ };
#define PROG_BLOCK_SIE (1<<0)
#define PROG_REQUEST (1<<1)
atomic_t prog20; /* 0x0020 */
@@ -209,10 +221,23 @@ struct kvm_s390_sie_block {
#define ICPT_PARTEXEC 0x38
#define ICPT_IOINST 0x40
#define ICPT_KSS 0x5c
+#define ICPT_MCHKREQ 0x60
+#define ICPT_INT_ENABLE 0x64
+#define ICPT_PV_INSTR 0x68
+#define ICPT_PV_NOTIFY 0x6c
+#define ICPT_PV_PREF 0x70
__u8 icptcode; /* 0x0050 */
__u8 icptstatus; /* 0x0051 */
__u16 ihcpu; /* 0x0052 */
- __u8 reserved54[2]; /* 0x0054 */
+ __u8 reserved54; /* 0x0054 */
+#define IICTL_CODE_NONE 0x00
+#define IICTL_CODE_MCHK 0x01
+#define IICTL_CODE_EXT 0x02
+#define IICTL_CODE_IO 0x03
+#define IICTL_CODE_RESTART 0x04
+#define IICTL_CODE_SPECIFICATION 0x10
+#define IICTL_CODE_OPERAND 0x11
+ __u8 iictl; /* 0x0055 */
__u16 ipa; /* 0x0056 */
__u32 ipb; /* 0x0058 */
__u32 scaoh; /* 0x005c */
@@ -233,7 +258,7 @@ struct kvm_s390_sie_block {
#define ECB3_RI 0x01
__u8 ecb3; /* 0x0063 */
__u32 scaol; /* 0x0064 */
- __u8 reserved68; /* 0x0068 */
+ __u8 sdf; /* 0x0068 */
__u8 epdx; /* 0x0069 */
__u8 reserved6a[2]; /* 0x006a */
__u32 todpr; /* 0x006c */
@@ -249,31 +274,58 @@ struct kvm_s390_sie_block {
#define HPID_KVM 0x4
#define HPID_VSIE 0x5
__u8 hpid; /* 0x00b8 */
- __u8 reservedb9[11]; /* 0x00b9 */
- __u16 extcpuaddr; /* 0x00c4 */
- __u16 eic; /* 0x00c6 */
+ __u8 reservedb9[7]; /* 0x00b9 */
+ union {
+ struct {
+ __u32 eiparams; /* 0x00c0 */
+ __u16 extcpuaddr; /* 0x00c4 */
+ __u16 eic; /* 0x00c6 */
+ };
+ __u64 mcic; /* 0x00c0 */
+ } __packed;
__u32 reservedc8; /* 0x00c8 */
- __u16 pgmilc; /* 0x00cc */
- __u16 iprcc; /* 0x00ce */
- __u32 dxc; /* 0x00d0 */
- __u16 mcn; /* 0x00d4 */
- __u8 perc; /* 0x00d6 */
- __u8 peratmid; /* 0x00d7 */
+ union {
+ struct {
+ __u16 pgmilc; /* 0x00cc */
+ __u16 iprcc; /* 0x00ce */
+ };
+ __u32 edc; /* 0x00cc */
+ } __packed;
+ union {
+ struct {
+ __u32 dxc; /* 0x00d0 */
+ __u16 mcn; /* 0x00d4 */
+ __u8 perc; /* 0x00d6 */
+ __u8 peratmid; /* 0x00d7 */
+ };
+ __u64 faddr; /* 0x00d0 */
+ } __packed;
__u64 peraddr; /* 0x00d8 */
__u8 eai; /* 0x00e0 */
__u8 peraid; /* 0x00e1 */
__u8 oai; /* 0x00e2 */
__u8 armid; /* 0x00e3 */
__u8 reservede4[4]; /* 0x00e4 */
- __u64 tecmc; /* 0x00e8 */
- __u8 reservedf0[12]; /* 0x00f0 */
+ union {
+ __u64 tecmc; /* 0x00e8 */
+ struct {
+ __u16 subchannel_id; /* 0x00e8 */
+ __u16 subchannel_nr; /* 0x00ea */
+ __u32 io_int_parm; /* 0x00ec */
+ __u32 io_int_word; /* 0x00f0 */
+ };
+ } __packed;
+ __u8 reservedf4[8]; /* 0x00f4 */
#define CRYCB_FORMAT_MASK 0x00000003
#define CRYCB_FORMAT0 0x00000000
#define CRYCB_FORMAT1 0x00000001
#define CRYCB_FORMAT2 0x00000003
__u32 crycbd; /* 0x00fc */
__u64 gcr[16]; /* 0x0100 */
- __u64 gbea; /* 0x0180 */
+ union {
+ __u64 gbea; /* 0x0180 */
+ __u64 sidad;
+ };
__u8 reserved188[8]; /* 0x0188 */
__u64 sdnxo; /* 0x0190 */
__u8 reserved198[8]; /* 0x0198 */
@@ -292,7 +344,7 @@ struct kvm_s390_sie_block {
__u64 itdba; /* 0x01e8 */
__u64 riccbd; /* 0x01f0 */
__u64 gvrd; /* 0x01f8 */
-} __attribute__((packed));
+} __packed __aligned(512);
struct kvm_s390_itdb {
__u8 data[256];
@@ -301,7 +353,9 @@ struct kvm_s390_itdb {
struct sie_page {
struct kvm_s390_sie_block sie_block;
struct mcck_volatile_info mcck_info; /* 0x0200 */
- __u8 reserved218[1000]; /* 0x0218 */
+ __u8 reserved218[360]; /* 0x0218 */
+ __u64 pv_grregs[16]; /* 0x0380 */
+ __u8 reserved400[512]; /* 0x0400 */
struct kvm_s390_itdb itdb; /* 0x0600 */
__u8 reserved700[2304]; /* 0x0700 */
};
@@ -476,6 +530,7 @@ enum irq_types {
IRQ_PEND_PFAULT_INIT,
IRQ_PEND_EXT_HOST,
IRQ_PEND_EXT_SERVICE,
+ IRQ_PEND_EXT_SERVICE_EV,
IRQ_PEND_EXT_TIMING,
IRQ_PEND_EXT_CPU_TIMER,
IRQ_PEND_EXT_CLOCK_COMP,
@@ -520,6 +575,7 @@ enum irq_types {
(1UL << IRQ_PEND_EXT_TIMING) | \
(1UL << IRQ_PEND_EXT_HOST) | \
(1UL << IRQ_PEND_EXT_SERVICE) | \
+ (1UL << IRQ_PEND_EXT_SERVICE_EV) | \
(1UL << IRQ_PEND_VIRTIO) | \
(1UL << IRQ_PEND_PFAULT_INIT) | \
(1UL << IRQ_PEND_PFAULT_DONE))
@@ -536,6 +592,13 @@ enum irq_types {
#define IRQ_PEND_MCHK_MASK ((1UL << IRQ_PEND_MCHK_REP) | \
(1UL << IRQ_PEND_MCHK_EX))
+#define IRQ_PEND_EXT_II_MASK ((1UL << IRQ_PEND_EXT_CPU_TIMER) | \
+ (1UL << IRQ_PEND_EXT_CLOCK_COMP) | \
+ (1UL << IRQ_PEND_EXT_EMERGENCY) | \
+ (1UL << IRQ_PEND_EXT_EXTERNAL) | \
+ (1UL << IRQ_PEND_EXT_SERVICE) | \
+ (1UL << IRQ_PEND_EXT_SERVICE_EV))
+
struct kvm_s390_interrupt_info {
struct list_head list;
u64 type;
@@ -594,6 +657,7 @@ struct kvm_s390_local_interrupt {
struct kvm_s390_float_interrupt {
unsigned long pending_irqs;
+ unsigned long masked_irqs;
spinlock_t lock;
struct list_head lists[FIRQ_LIST_COUNT];
int counters[FIRQ_MAX_COUNT];
@@ -645,6 +709,11 @@ struct kvm_guestdbg_info_arch {
unsigned long last_bp;
};
+struct kvm_s390_pv_vcpu {
+ u64 handle;
+ unsigned long stor_base;
+};
+
struct kvm_vcpu_arch {
struct kvm_s390_sie_block *sie_block;
/* if vsie is active, currently executed shadow sie control block */
@@ -673,6 +742,7 @@ struct kvm_vcpu_arch {
__u64 cputm_start;
bool gs_enabled;
bool skey_enabled;
+ struct kvm_s390_pv_vcpu pv;
};
struct kvm_vm_stat {
@@ -701,9 +771,6 @@ struct s390_io_adapter {
bool masked;
bool swap;
bool suppressible;
- struct rw_semaphore maps_lock;
- struct list_head maps;
- atomic_t nr_maps;
};
#define MAX_S390_IO_ADAPTERS ((MAX_ISC + 1) * 8)
@@ -846,6 +913,13 @@ struct kvm_s390_gisa_interrupt {
DECLARE_BITMAP(kicked_mask, KVM_MAX_VCPUS);
};
+struct kvm_s390_pv {
+ u64 handle;
+ u64 guest_len;
+ unsigned long stor_base;
+ void *stor_var;
+};
+
struct kvm_arch{
void *sca;
int use_esca;
@@ -881,6 +955,7 @@ struct kvm_arch{
DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
DECLARE_BITMAP(idle_mask, KVM_MAX_VCPUS);
struct kvm_s390_gisa_interrupt gisa_int;
+ struct kvm_s390_pv pv;
};
#define KVM_HVA_ERR_BAD (-1UL)
@@ -921,7 +996,7 @@ static inline void kvm_arch_hardware_disable(void) {}
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
static inline void kvm_arch_free_memslot(struct kvm *kvm,
- struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
+ struct kvm_memory_slot *slot) {}
static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {}
static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index bcfb6371086f..e21b618ad432 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -16,6 +16,8 @@ typedef struct {
unsigned long asce;
unsigned long asce_limit;
unsigned long vdso_base;
+ /* The mmu context belongs to a secure guest. */
+ atomic_t is_protected;
/*
* The following bitfields need a down_write on the mm
* semaphore when they are written to. As they are only
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 8d04e6f3f796..afa836014076 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -23,6 +23,7 @@ static inline int init_new_context(struct task_struct *tsk,
INIT_LIST_HEAD(&mm->context.gmap_list);
cpumask_clear(&mm->context.cpu_attach_mask);
atomic_set(&mm->context.flush_count, 0);
+ atomic_set(&mm->context.is_protected, 0);
mm->context.gmap_asce = 0;
mm->context.flush_mm = 0;
mm->context.compat_mm = test_thread_flag(TIF_31BIT);
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 1019efd85b9d..62440a82731a 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -153,6 +153,11 @@ static inline int devmem_is_allowed(unsigned long pfn)
#define HAVE_ARCH_FREE_PAGE
#define HAVE_ARCH_ALLOC_PAGE
+#if IS_ENABLED(CONFIG_PGSTE)
+int arch_make_page_accessible(struct page *page);
+#define HAVE_ARCH_MAKE_PAGE_ACCESSIBLE
+#endif
+
#endif /* !__ASSEMBLY__ */
#define __PAGE_OFFSET 0x0UL
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 6d7c3b7e9281..6076c8c912d2 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -19,6 +19,7 @@
#include <linux/atomic.h>
#include <asm/bug.h>
#include <asm/page.h>
+#include <asm/uv.h>
extern pgd_t swapper_pg_dir[];
extern void paging_init(void);
@@ -520,6 +521,15 @@ static inline int mm_has_pgste(struct mm_struct *mm)
return 0;
}
+static inline int mm_is_protected(struct mm_struct *mm)
+{
+#ifdef CONFIG_PGSTE
+ if (unlikely(atomic_read(&mm->context.is_protected)))
+ return 1;
+#endif
+ return 0;
+}
+
static inline int mm_alloc_pgste(struct mm_struct *mm)
{
#ifdef CONFIG_PGSTE
@@ -1067,7 +1077,12 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
- return ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
+ pte_t res;
+
+ res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
+ if (mm_is_protected(mm) && pte_present(res))
+ uv_convert_from_secure(pte_val(res) & PAGE_MASK);
+ return res;
}
#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
@@ -1079,7 +1094,12 @@ void ptep_modify_prot_commit(struct vm_area_struct *, unsigned long,
static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
- return ptep_xchg_direct(vma->vm_mm, addr, ptep, __pte(_PAGE_INVALID));
+ pte_t res;
+
+ res = ptep_xchg_direct(vma->vm_mm, addr, ptep, __pte(_PAGE_INVALID));
+ if (mm_is_protected(vma->vm_mm) && pte_present(res))
+ uv_convert_from_secure(pte_val(res) & PAGE_MASK);
+ return res;
}
/*
@@ -1094,12 +1114,17 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
unsigned long addr,
pte_t *ptep, int full)
{
+ pte_t res;
+
if (full) {
- pte_t pte = *ptep;
+ res = *ptep;
*ptep = __pte(_PAGE_INVALID);
- return pte;
+ } else {
+ res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
}
- return ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
+ if (mm_is_protected(mm) && pte_present(res))
+ uv_convert_from_secure(pte_val(res) & PAGE_MASK);
+ return res;
}
#define __HAVE_ARCH_PTEP_SET_WRPROTECT
diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h
index 4093a2856929..cff4b4c99b75 100644
--- a/arch/s390/include/asm/uv.h
+++ b/arch/s390/include/asm/uv.h
@@ -14,23 +14,62 @@
#include <linux/types.h>
#include <linux/errno.h>
#include <linux/bug.h>
+#include <linux/sched.h>
#include <asm/page.h>
+#include <asm/gmap.h>
#define UVC_RC_EXECUTED 0x0001
#define UVC_RC_INV_CMD 0x0002
#define UVC_RC_INV_STATE 0x0003
#define UVC_RC_INV_LEN 0x0005
#define UVC_RC_NO_RESUME 0x0007
+#define UVC_RC_NEED_DESTROY 0x8000
#define UVC_CMD_QUI 0x0001
+#define UVC_CMD_INIT_UV 0x000f
+#define UVC_CMD_CREATE_SEC_CONF 0x0100
+#define UVC_CMD_DESTROY_SEC_CONF 0x0101
+#define UVC_CMD_CREATE_SEC_CPU 0x0120
+#define UVC_CMD_DESTROY_SEC_CPU 0x0121
+#define UVC_CMD_CONV_TO_SEC_STOR 0x0200
+#define UVC_CMD_CONV_FROM_SEC_STOR 0x0201
+#define UVC_CMD_SET_SEC_CONF_PARAMS 0x0300
+#define UVC_CMD_UNPACK_IMG 0x0301
+#define UVC_CMD_VERIFY_IMG 0x0302
+#define UVC_CMD_CPU_RESET 0x0310
+#define UVC_CMD_CPU_RESET_INITIAL 0x0311
+#define UVC_CMD_PREPARE_RESET 0x0320
+#define UVC_CMD_CPU_RESET_CLEAR 0x0321
+#define UVC_CMD_CPU_SET_STATE 0x0330
+#define UVC_CMD_SET_UNSHARE_ALL 0x0340
+#define UVC_CMD_PIN_PAGE_SHARED 0x0341
+#define UVC_CMD_UNPIN_PAGE_SHARED 0x0342
#define UVC_CMD_SET_SHARED_ACCESS 0x1000
#define UVC_CMD_REMOVE_SHARED_ACCESS 0x1001
/* Bits in installed uv calls */
enum uv_cmds_inst {
BIT_UVC_CMD_QUI = 0,
+ BIT_UVC_CMD_INIT_UV = 1,
+ BIT_UVC_CMD_CREATE_SEC_CONF = 2,
+ BIT_UVC_CMD_DESTROY_SEC_CONF = 3,
+ BIT_UVC_CMD_CREATE_SEC_CPU = 4,
+ BIT_UVC_CMD_DESTROY_SEC_CPU = 5,
+ BIT_UVC_CMD_CONV_TO_SEC_STOR = 6,
+ BIT_UVC_CMD_CONV_FROM_SEC_STOR = 7,
BIT_UVC_CMD_SET_SHARED_ACCESS = 8,
BIT_UVC_CMD_REMOVE_SHARED_ACCESS = 9,
+ BIT_UVC_CMD_SET_SEC_PARMS = 11,
+ BIT_UVC_CMD_UNPACK_IMG = 13,
+ BIT_UVC_CMD_VERIFY_IMG = 14,
+ BIT_UVC_CMD_CPU_RESET = 15,
+ BIT_UVC_CMD_CPU_RESET_INITIAL = 16,
+ BIT_UVC_CMD_CPU_SET_STATE = 17,
+ BIT_UVC_CMD_PREPARE_RESET = 18,
+ BIT_UVC_CMD_CPU_PERFORM_CLEAR_RESET = 19,
+ BIT_UVC_CMD_UNSHARE_ALL = 20,
+ BIT_UVC_CMD_PIN_PAGE_SHARED = 21,
+ BIT_UVC_CMD_UNPIN_PAGE_SHARED = 22,
};
struct uv_cb_header {
@@ -40,13 +79,127 @@ struct uv_cb_header {
u16 rrc; /* Return Reason Code */
} __packed __aligned(8);
+/* Query Ultravisor Information */
struct uv_cb_qui {
struct uv_cb_header header;
u64 reserved08;
u64 inst_calls_list[4];
- u64 reserved30[15];
+ u64 reserved30[2];
+ u64 uv_base_stor_len;
+ u64 reserved48;
+ u64 conf_base_phys_stor_len;
+ u64 conf_base_virt_stor_len;
+ u64 conf_virt_var_stor_len;
+ u64 cpu_stor_len;
+ u32 reserved70[3];
+ u32 max_num_sec_conf;
+ u64 max_guest_stor_addr;
+ u8 reserved88[158 - 136];
+ u16 max_guest_cpus;
+ u8 reserveda0[200 - 160];
} __packed __aligned(8);
+/* Initialize Ultravisor */
+struct uv_cb_init {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u64 stor_origin;
+ u64 stor_len;
+ u64 reserved28[4];
+} __packed __aligned(8);
+
+/* Create Guest Configuration */
+struct uv_cb_cgc {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u64 guest_handle;
+ u64 conf_base_stor_origin;
+ u64 conf_virt_stor_origin;
+ u64 reserved30;
+ u64 guest_stor_origin;
+ u64 guest_stor_len;
+ u64 guest_sca;
+ u64 guest_asce;
+ u64 reserved58[5];
+} __packed __aligned(8);
+
+/* Create Secure CPU */
+struct uv_cb_csc {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u64 cpu_handle;
+ u64 guest_handle;
+ u64 stor_origin;
+ u8 reserved30[6];
+ u16 num;
+ u64 state_origin;
+ u64 reserved40[4];
+} __packed __aligned(8);
+
+/* Convert to Secure */
+struct uv_cb_cts {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u64 guest_handle;
+ u64 gaddr;
+} __packed __aligned(8);
+
+/* Convert from Secure / Pin Page Shared */
+struct uv_cb_cfs {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u64 paddr;
+} __packed __aligned(8);
+
+/* Set Secure Config Parameter */
+struct uv_cb_ssc {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u64 guest_handle;
+ u64 sec_header_origin;
+ u32 sec_header_len;
+ u32 reserved2c;
+ u64 reserved30[4];
+} __packed __aligned(8);
+
+/* Unpack */
+struct uv_cb_unp {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u64 guest_handle;
+ u64 gaddr;
+ u64 tweak[2];
+ u64 reserved38[3];
+} __packed __aligned(8);
+
+#define PV_CPU_STATE_OPR 1
+#define PV_CPU_STATE_STP 2
+#define PV_CPU_STATE_CHKSTP 3
+#define PV_CPU_STATE_OPR_LOAD 5
+
+struct uv_cb_cpu_set_state {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u64 cpu_handle;
+ u8 reserved20[7];
+ u8 state;
+ u64 reserved28[5];
+};
+
+/*
+ * A common UV call struct for calls that take no payload
+ * Examples:
+ * Destroy cpu/config
+ * Verify
+ */
+struct uv_cb_nodata {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u64 handle;
+ u64 reserved20[4];
+} __packed __aligned(8);
+
+/* Set Shared Access */
struct uv_cb_share {
struct uv_cb_header header;
u64 reserved08[3];
@@ -54,21 +207,76 @@ struct uv_cb_share {
u64 reserved28;
} __packed __aligned(8);
-static inline int uv_call(unsigned long r1, unsigned long r2)
+static inline int __uv_call(unsigned long r1, unsigned long r2)
{
int cc;
asm volatile(
- "0: .insn rrf,0xB9A40000,%[r1],%[r2],0,0\n"
- " brc 3,0b\n"
- " ipm %[cc]\n"
- " srl %[cc],28\n"
+ " .insn rrf,0xB9A40000,%[r1],%[r2],0,0\n"
+ " ipm %[cc]\n"
+ " srl %[cc],28\n"
: [cc] "=d" (cc)
: [r1] "a" (r1), [r2] "a" (r2)
: "memory", "cc");
return cc;
}
+static inline int uv_call(unsigned long r1, unsigned long r2)
+{
+ int cc;
+
+ do {
+ cc = __uv_call(r1, r2);
+ } while (cc > 1);
+ return cc;
+}
+
+/* Low level uv_call that avoids stalls for long running busy conditions */
+static inline int uv_call_sched(unsigned long r1, unsigned long r2)
+{
+ int cc;
+
+ do {
+ cc = __uv_call(r1, r2);
+ cond_resched();
+ } while (cc > 1);
+ return cc;
+}
+
+/*
+ * special variant of uv_call that only transports the cpu or guest
+ * handle and the command, like destroy or verify.
+ */
+static inline int uv_cmd_nodata(u64 handle, u16 cmd, u16 *rc, u16 *rrc)
+{
+ struct uv_cb_nodata uvcb = {
+ .header.cmd = cmd,
+ .header.len = sizeof(uvcb),
+ .handle = handle,
+ };
+ int cc;
+
+ WARN(!handle, "No handle provided to Ultravisor call cmd %x\n", cmd);
+ cc = uv_call_sched(0, (u64)&uvcb);
+ *rc = uvcb.header.rc;
+ *rrc = uvcb.header.rrc;
+ return cc ? -EINVAL : 0;
+}
+
+struct uv_info {
+ unsigned long inst_calls_list[4];
+ unsigned long uv_base_stor_len;
+ unsigned long guest_base_stor_len;
+ unsigned long guest_virt_base_stor_len;
+ unsigned long guest_virt_var_stor_len;
+ unsigned long guest_cpu_stor_len;
+ unsigned long max_sec_stor_addr;
+ unsigned int max_num_sec_conf;
+ unsigned short max_guest_cpus;
+};
+
+extern struct uv_info uv_info;
+
#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
extern int prot_virt_guest;
@@ -121,11 +329,40 @@ static inline int uv_remove_shared(unsigned long addr)
return share(addr, UVC_CMD_REMOVE_SHARED_ACCESS);
}
-void uv_query_info(void);
#else
#define is_prot_virt_guest() 0
static inline int uv_set_shared(unsigned long addr) { return 0; }
static inline int uv_remove_shared(unsigned long addr) { return 0; }
+#endif
+
+#if IS_ENABLED(CONFIG_KVM)
+extern int prot_virt_host;
+
+static inline int is_prot_virt_host(void)
+{
+ return prot_virt_host;
+}
+
+int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb);
+int uv_convert_from_secure(unsigned long paddr);
+int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr);
+
+void setup_uv(void);
+void adjust_to_uv_max(unsigned long *vmax);
+#else
+#define is_prot_virt_host() 0
+static inline void setup_uv(void) {}
+static inline void adjust_to_uv_max(unsigned long *vmax) {}
+
+static inline int uv_convert_from_secure(unsigned long paddr)
+{
+ return 0;
+}
+#endif
+
+#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM)
+void uv_query_info(void);
+#else
static inline void uv_query_info(void) {}
#endif
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 578a6fa82ea4..33d4de233c5b 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -78,6 +78,7 @@ obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o perf_regs.o
obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_diag.o
obj-$(CONFIG_TRACEPOINTS) += trace.o
+obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o
# vdso
obj-y += vdso64/
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index 1d3927e01a5f..faca269d5f27 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -24,6 +24,8 @@ asmlinkage void do_syscall_trace_exit(struct pt_regs *regs);
void do_protection_exception(struct pt_regs *regs);
void do_dat_exception(struct pt_regs *regs);
+void do_secure_storage_access(struct pt_regs *regs);
+void do_non_secure_storage_access(struct pt_regs *regs);
void addressing_exception(struct pt_regs *regs);
void data_exception(struct pt_regs *regs);
diff --git a/arch/s390/kernel/pgm_check.S b/arch/s390/kernel/pgm_check.S
index eee3a482195a..2c27907a5ffc 100644
--- a/arch/s390/kernel/pgm_check.S
+++ b/arch/s390/kernel/pgm_check.S
@@ -78,8 +78,8 @@ PGM_CHECK(do_dat_exception) /* 39 */
PGM_CHECK(do_dat_exception) /* 3a */
PGM_CHECK(do_dat_exception) /* 3b */
PGM_CHECK_DEFAULT /* 3c */
-PGM_CHECK_DEFAULT /* 3d */
-PGM_CHECK_DEFAULT /* 3e */
+PGM_CHECK(do_secure_storage_access) /* 3d */
+PGM_CHECK(do_non_secure_storage_access) /* 3e */
PGM_CHECK_DEFAULT /* 3f */
PGM_CHECK(monitor_event_exception) /* 40 */
PGM_CHECK_DEFAULT /* 41 */
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index b2c2f75860e8..1423090a2259 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -92,10 +92,6 @@ char elf_platform[ELF_PLATFORM_SIZE];
unsigned long int_hwcap = 0;
-#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
-int __bootdata_preserved(prot_virt_guest);
-#endif
-
int __bootdata(noexec_disabled);
int __bootdata(memory_end_set);
unsigned long __bootdata(memory_end);
@@ -564,6 +560,9 @@ static void __init setup_memory_end(void)
vmax = _REGION1_SIZE; /* 4-level kernel page table */
}
+ if (is_prot_virt_host())
+ adjust_to_uv_max(&vmax);
+
/* module area is at the end of the kernel address space. */
MODULES_END = vmax;
MODULES_VADDR = MODULES_END - MODULES_LEN;
@@ -1138,6 +1137,8 @@ void __init setup_arch(char **cmdline_p)
*/
memblock_trim_memory(1UL << (MAX_ORDER - 1 + PAGE_SHIFT));
+ if (is_prot_virt_host())
+ setup_uv();
setup_memory_end();
setup_memory();
dma_contiguous_reserve(memory_end);
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
new file mode 100644
index 000000000000..c86d654351d1
--- /dev/null
+++ b/arch/s390/kernel/uv.c
@@ -0,0 +1,414 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Common Ultravisor functions and initialization
+ *
+ * Copyright IBM Corp. 2019, 2020
+ */
+#define KMSG_COMPONENT "prot_virt"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/sizes.h>
+#include <linux/bitmap.h>
+#include <linux/memblock.h>
+#include <linux/pagemap.h>
+#include <linux/swap.h>
+#include <asm/facility.h>
+#include <asm/sections.h>
+#include <asm/uv.h>
+
+/* the bootdata_preserved fields come from ones in arch/s390/boot/uv.c */
+#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
+int __bootdata_preserved(prot_virt_guest);
+#endif
+
+#if IS_ENABLED(CONFIG_KVM)
+int prot_virt_host;
+EXPORT_SYMBOL(prot_virt_host);
+struct uv_info __bootdata_preserved(uv_info);
+EXPORT_SYMBOL(uv_info);
+
+static int __init prot_virt_setup(char *val)
+{
+ bool enabled;
+ int rc;
+
+ rc = kstrtobool(val, &enabled);
+ if (!rc && enabled)
+ prot_virt_host = 1;
+
+ if (is_prot_virt_guest() && prot_virt_host) {
+ prot_virt_host = 0;
+ pr_warn("Protected virtualization not available in protected guests.");
+ }
+
+ if (prot_virt_host && !test_facility(158)) {
+ prot_virt_host = 0;
+ pr_warn("Protected virtualization not supported by the hardware.");
+ }
+
+ return rc;
+}
+early_param("prot_virt", prot_virt_setup);
+
+static int __init uv_init(unsigned long stor_base, unsigned long stor_len)
+{
+ struct uv_cb_init uvcb = {
+ .header.cmd = UVC_CMD_INIT_UV,
+ .header.len = sizeof(uvcb),
+ .stor_origin = stor_base,
+ .stor_len = stor_len,
+ };
+
+ if (uv_call(0, (uint64_t)&uvcb)) {
+ pr_err("Ultravisor init failed with rc: 0x%x rrc: 0%x\n",
+ uvcb.header.rc, uvcb.header.rrc);
+ return -1;
+ }
+ return 0;
+}
+
+void __init setup_uv(void)
+{
+ unsigned long uv_stor_base;
+
+ uv_stor_base = (unsigned long)memblock_alloc_try_nid(
+ uv_info.uv_base_stor_len, SZ_1M, SZ_2G,
+ MEMBLOCK_ALLOC_ACCESSIBLE, NUMA_NO_NODE);
+ if (!uv_stor_base) {
+ pr_warn("Failed to reserve %lu bytes for ultravisor base storage\n",
+ uv_info.uv_base_stor_len);
+ goto fail;
+ }
+
+ if (uv_init(uv_stor_base, uv_info.uv_base_stor_len)) {
+ memblock_free(uv_stor_base, uv_info.uv_base_stor_len);
+ goto fail;
+ }
+
+ pr_info("Reserving %luMB as ultravisor base storage\n",
+ uv_info.uv_base_stor_len >> 20);
+ return;
+fail:
+ pr_info("Disabling support for protected virtualization");
+ prot_virt_host = 0;
+}
+
+void adjust_to_uv_max(unsigned long *vmax)
+{
+ *vmax = min_t(unsigned long, *vmax, uv_info.max_sec_stor_addr);
+}
+
+/*
+ * Requests the Ultravisor to pin the page in the shared state. This will
+ * cause an intercept when the guest attempts to unshare the pinned page.
+ */
+static int uv_pin_shared(unsigned long paddr)
+{
+ struct uv_cb_cfs uvcb = {
+ .header.cmd = UVC_CMD_PIN_PAGE_SHARED,
+ .header.len = sizeof(uvcb),
+ .paddr = paddr,
+ };
+
+ if (uv_call(0, (u64)&uvcb))
+ return -EINVAL;
+ return 0;
+}
+
+/*
+ * Requests the Ultravisor to encrypt a guest page and make it
+ * accessible to the host for paging (export).
+ *
+ * @paddr: Absolute host address of page to be exported
+ */
+int uv_convert_from_secure(unsigned long paddr)
+{
+ struct uv_cb_cfs uvcb = {
+ .header.cmd = UVC_CMD_CONV_FROM_SEC_STOR,
+ .header.len = sizeof(uvcb),
+ .paddr = paddr
+ };
+
+ if (uv_call(0, (u64)&uvcb))
+ return -EINVAL;
+ return 0;
+}
+
+/*
+ * Calculate the expected ref_count for a page that would otherwise have no
+ * further pins. This was cribbed from similar functions in other places in
+ * the kernel, but with some slight modifications. We know that a secure
+ * page can not be a huge page for example.
+ */
+static int expected_page_refs(struct page *page)
+{
+ int res;
+
+ res = page_mapcount(page);
+ if (PageSwapCache(page)) {
+ res++;
+ } else if (page_mapping(page)) {
+ res++;
+ if (page_has_private(page))
+ res++;
+ }
+ return res;
+}
+
+static int make_secure_pte(pte_t *ptep, unsigned long addr,
+ struct page *exp_page, struct uv_cb_header *uvcb)
+{
+ pte_t entry = READ_ONCE(*ptep);
+ struct page *page;
+ int expected, rc = 0;
+
+ if (!pte_present(entry))
+ return -ENXIO;
+ if (pte_val(entry) & _PAGE_INVALID)
+ return -ENXIO;
+
+ page = pte_page(entry);
+ if (page != exp_page)
+ return -ENXIO;
+ if (PageWriteback(page))
+ return -EAGAIN;
+ expected = expected_page_refs(page);
+ if (!page_ref_freeze(page, expected))
+ return -EBUSY;
+ set_bit(PG_arch_1, &page->flags);
+ rc = uv_call(0, (u64)uvcb);
+ page_ref_unfreeze(page, expected);
+ /* Return -ENXIO if the page was not mapped, -EINVAL otherwise */
+ if (rc)
+ rc = uvcb->rc == 0x10a ? -ENXIO : -EINVAL;
+ return rc;
+}
+
+/*
+ * Requests the Ultravisor to make a page accessible to a guest.
+ * If it's brought in the first time, it will be cleared. If
+ * it has been exported before, it will be decrypted and integrity
+ * checked.
+ */
+int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb)
+{
+ struct vm_area_struct *vma;
+ bool local_drain = false;
+ spinlock_t *ptelock;
+ unsigned long uaddr;
+ struct page *page;
+ pte_t *ptep;
+ int rc;
+
+again:
+ rc = -EFAULT;
+ down_read(&gmap->mm->mmap_sem);
+
+ uaddr = __gmap_translate(gmap, gaddr);
+ if (IS_ERR_VALUE(uaddr))
+ goto out;
+ vma = find_vma(gmap->mm, uaddr);
+ if (!vma)
+ goto out;
+ /*
+ * Secure pages cannot be huge and userspace should not combine both.
+ * In case userspace does it anyway this will result in an -EFAULT for
+ * the unpack. The guest is thus never reaching secure mode. If
+ * userspace is playing dirty tricky with mapping huge pages later
+ * on this will result in a segmentation fault.
+ */
+ if (is_vm_hugetlb_page(vma))
+ goto out;
+
+ rc = -ENXIO;
+ page = follow_page(vma, uaddr, FOLL_WRITE);
+ if (IS_ERR_OR_NULL(page))
+ goto out;
+
+ lock_page(page);
+ ptep = get_locked_pte(gmap->mm, uaddr, &ptelock);
+ rc = make_secure_pte(ptep, uaddr, page, uvcb);
+ pte_unmap_unlock(ptep, ptelock);
+ unlock_page(page);
+out:
+ up_read(&gmap->mm->mmap_sem);
+
+ if (rc == -EAGAIN) {
+ wait_on_page_writeback(page);
+ } else if (rc == -EBUSY) {
+ /*
+ * If we have tried a local drain and the page refcount
+ * still does not match our expected safe value, try with a
+ * system wide drain. This is needed if the pagevecs holding
+ * the page are on a different CPU.
+ */
+ if (local_drain) {
+ lru_add_drain_all();
+ /* We give up here, and let the caller try again */
+ return -EAGAIN;
+ }
+ /*
+ * We are here if the page refcount does not match the
+ * expected safe value. The main culprits are usually
+ * pagevecs. With lru_add_drain() we drain the pagevecs
+ * on the local CPU so that hopefully the refcount will
+ * reach the expected safe value.
+ */
+ lru_add_drain();
+ local_drain = true;
+ /* And now we try again immediately after draining */
+ goto again;
+ } else if (rc == -ENXIO) {
+ if (gmap_fault(gmap, gaddr, FAULT_FLAG_WRITE))
+ return -EFAULT;
+ return -EAGAIN;
+ }
+ return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_make_secure);
+
+int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr)
+{
+ struct uv_cb_cts uvcb = {
+ .header.cmd = UVC_CMD_CONV_TO_SEC_STOR,
+ .header.len = sizeof(uvcb),
+ .guest_handle = gmap->guest_handle,
+ .gaddr = gaddr,
+ };
+
+ return gmap_make_secure(gmap, gaddr, &uvcb);
+}
+EXPORT_SYMBOL_GPL(gmap_convert_to_secure);
+
+/*
+ * To be called with the page locked or with an extra reference! This will
+ * prevent gmap_make_secure from touching the page concurrently. Having 2
+ * parallel make_page_accessible is fine, as the UV calls will become a
+ * no-op if the page is already exported.
+ */
+int arch_make_page_accessible(struct page *page)
+{
+ int rc = 0;
+
+ /* Hugepage cannot be protected, so nothing to do */
+ if (PageHuge(page))
+ return 0;
+
+ /*
+ * PG_arch_1 is used in 3 places:
+ * 1. for kernel page tables during early boot
+ * 2. for storage keys of huge pages and KVM
+ * 3. As an indication that this page might be secure. This can
+ * overindicate, e.g. we set the bit before calling
+ * convert_to_secure.
+ * As secure pages are never huge, all 3 variants can co-exists.
+ */
+ if (!test_bit(PG_arch_1, &page->flags))
+ return 0;
+
+ rc = uv_pin_shared(page_to_phys(page));
+ if (!rc) {
+ clear_bit(PG_arch_1, &page->flags);
+ return 0;
+ }
+
+ rc = uv_convert_from_secure(page_to_phys(page));
+ if (!rc) {
+ clear_bit(PG_arch_1, &page->flags);
+ return 0;
+ }
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(arch_make_page_accessible);
+
+#endif
+
+#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM)
+static ssize_t uv_query_facilities(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%lx\n%lx\n%lx\n%lx\n",
+ uv_info.inst_calls_list[0],
+ uv_info.inst_calls_list[1],
+ uv_info.inst_calls_list[2],
+ uv_info.inst_calls_list[3]);
+}
+
+static struct kobj_attribute uv_query_facilities_attr =
+ __ATTR(facilities, 0444, uv_query_facilities, NULL);
+
+static ssize_t uv_query_max_guest_cpus(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%d\n",
+ uv_info.max_guest_cpus);
+}
+
+static struct kobj_attribute uv_query_max_guest_cpus_attr =
+ __ATTR(max_cpus, 0444, uv_query_max_guest_cpus, NULL);
+
+static ssize_t uv_query_max_guest_vms(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%d\n",
+ uv_info.max_num_sec_conf);
+}
+
+static struct kobj_attribute uv_query_max_guest_vms_attr =
+ __ATTR(max_guests, 0444, uv_query_max_guest_vms, NULL);
+
+static ssize_t uv_query_max_guest_addr(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%lx\n",
+ uv_info.max_sec_stor_addr);
+}
+
+static struct kobj_attribute uv_query_max_guest_addr_attr =
+ __ATTR(max_address, 0444, uv_query_max_guest_addr, NULL);
+
+static struct attribute *uv_query_attrs[] = {
+ &uv_query_facilities_attr.attr,
+ &uv_query_max_guest_cpus_attr.attr,
+ &uv_query_max_guest_vms_attr.attr,
+ &uv_query_max_guest_addr_attr.attr,
+ NULL,
+};
+
+static struct attribute_group uv_query_attr_group = {
+ .attrs = uv_query_attrs,
+};
+
+static struct kset *uv_query_kset;
+static struct kobject *uv_kobj;
+
+static int __init uv_info_init(void)
+{
+ int rc = -ENOMEM;
+
+ if (!test_facility(158))
+ return 0;
+
+ uv_kobj = kobject_create_and_add("uv", firmware_kobj);
+ if (!uv_kobj)
+ return -ENOMEM;
+
+ uv_query_kset = kset_create_and_add("query", NULL, uv_kobj);
+ if (!uv_query_kset)
+ goto out_kobj;
+
+ rc = sysfs_create_group(&uv_query_kset->kobj, &uv_query_attr_group);
+ if (!rc)
+ return 0;
+
+ kset_unregister(uv_query_kset);
+out_kobj:
+ kobject_del(uv_kobj);
+ kobject_put(uv_kobj);
+ return rc;
+}
+device_initcall(uv_info_init);
+#endif
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index 05ee90a5ea08..12decca22e7c 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -9,6 +9,6 @@ common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqch
ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o
-kvm-objs += diag.o gaccess.o guestdbg.o vsie.o
+kvm-objs += diag.o gaccess.o guestdbg.o vsie.o pv.o
obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 3fb54ec2cf3e..563429dece03 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -2,7 +2,7 @@
/*
* handling diagnose instructions
*
- * Copyright IBM Corp. 2008, 2011
+ * Copyright IBM Corp. 2008, 2020
*
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
@@ -201,6 +201,10 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
return -EOPNOTSUPP;
}
+ /*
+ * no need to check the return value of vcpu_stop as it can only have
+ * an error for protvirt, but protvirt means user cpu state
+ */
if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
kvm_s390_vcpu_stop(vcpu);
vcpu->run->s390_reset_flags |= KVM_S390_RESET_SUBSYSTEM;
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 07d30ffcfa41..47a67a958107 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -505,7 +505,7 @@ static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva,
switch (prot) {
case PROT_TYPE_IEP:
tec->b61 = 1;
- /* FALL THROUGH */
+ fallthrough;
case PROT_TYPE_LA:
tec->b56 = 1;
break;
@@ -514,12 +514,12 @@ static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva,
break;
case PROT_TYPE_ALC:
tec->b60 = 1;
- /* FALL THROUGH */
+ fallthrough;
case PROT_TYPE_DAT:
tec->b61 = 1;
break;
}
- /* FALL THROUGH */
+ fallthrough;
case PGM_ASCE_TYPE:
case PGM_PAGE_TRANSLATION:
case PGM_REGION_FIRST_TRANS:
@@ -534,7 +534,7 @@ static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva,
tec->addr = gva >> PAGE_SHIFT;
tec->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH;
tec->as = psw_bits(vcpu->arch.sie_block->gpsw).as;
- /* FALL THROUGH */
+ fallthrough;
case PGM_ALEN_TRANSLATION:
case PGM_ALE_SEQUENCE:
case PGM_ASTE_VALIDITY:
@@ -677,7 +677,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
dat_protection |= rfte.p;
ptr = rfte.rto * PAGE_SIZE + vaddr.rsx * 8;
}
- /* fallthrough */
+ fallthrough;
case ASCE_TYPE_REGION2: {
union region2_table_entry rste;
@@ -695,7 +695,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
dat_protection |= rste.p;
ptr = rste.rto * PAGE_SIZE + vaddr.rtx * 8;
}
- /* fallthrough */
+ fallthrough;
case ASCE_TYPE_REGION3: {
union region3_table_entry rtte;
@@ -723,7 +723,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
dat_protection |= rtte.fc0.p;
ptr = rtte.fc0.sto * PAGE_SIZE + vaddr.sx * 8;
}
- /* fallthrough */
+ fallthrough;
case ASCE_TYPE_SEGMENT: {
union segment_table_entry ste;
@@ -1050,7 +1050,8 @@ shadow_r2t:
rc = gmap_shadow_r2t(sg, saddr, rfte.val, *fake);
if (rc)
return rc;
- } /* fallthrough */
+ }
+ fallthrough;
case ASCE_TYPE_REGION2: {
union region2_table_entry rste;
@@ -1076,7 +1077,8 @@ shadow_r3t:
rc = gmap_shadow_r3t(sg, saddr, rste.val, *fake);
if (rc)
return rc;
- } /* fallthrough */
+ }
+ fallthrough;
case ASCE_TYPE_REGION3: {
union region3_table_entry rtte;
@@ -1111,7 +1113,8 @@ shadow_sgt:
rc = gmap_shadow_sgt(sg, saddr, rtte.val, *fake);
if (rc)
return rc;
- } /* fallthrough */
+ }
+ fallthrough;
case ASCE_TYPE_SEGMENT: {
union segment_table_entry ste;
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index a389fa85cca2..e7a7c499a73f 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -2,7 +2,7 @@
/*
* in-kernel handling for sie intercepts
*
- * Copyright IBM Corp. 2008, 2014
+ * Copyright IBM Corp. 2008, 2020
*
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
@@ -12,10 +12,10 @@
#include <linux/errno.h>
#include <linux/pagemap.h>
-#include <asm/kvm_host.h>
#include <asm/asm-offsets.h>
#include <asm/irq.h>
#include <asm/sysinfo.h>
+#include <asm/uv.h>
#include "kvm-s390.h"
#include "gaccess.h"
@@ -79,6 +79,10 @@ static int handle_stop(struct kvm_vcpu *vcpu)
return rc;
}
+ /*
+ * no need to check the return value of vcpu_stop as it can only have
+ * an error for protvirt, but protvirt means user cpu state
+ */
if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
kvm_s390_vcpu_stop(vcpu);
return -EOPNOTSUPP;
@@ -231,6 +235,13 @@ static int handle_prog(struct kvm_vcpu *vcpu)
vcpu->stat.exit_program_interruption++;
+ /*
+ * Intercept 8 indicates a loop of specification exceptions
+ * for protected guests.
+ */
+ if (kvm_s390_pv_cpu_is_protected(vcpu))
+ return -EOPNOTSUPP;
+
if (guestdbg_enabled(vcpu) && per_event(vcpu)) {
rc = kvm_s390_handle_per_event(vcpu);
if (rc)
@@ -384,7 +395,7 @@ int handle_sthyi(struct kvm_vcpu *vcpu)
goto out;
}
- if (addr & ~PAGE_MASK)
+ if (!kvm_s390_pv_cpu_is_protected(vcpu) && (addr & ~PAGE_MASK))
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
sctns = (void *)get_zeroed_page(GFP_KERNEL);
@@ -395,10 +406,15 @@ int handle_sthyi(struct kvm_vcpu *vcpu)
out:
if (!cc) {
- r = write_guest(vcpu, addr, reg2, sctns, PAGE_SIZE);
- if (r) {
- free_page((unsigned long)sctns);
- return kvm_s390_inject_prog_cond(vcpu, r);
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ memcpy((void *)(sida_origin(vcpu->arch.sie_block)),
+ sctns, PAGE_SIZE);
+ } else {
+ r = write_guest(vcpu, addr, reg2, sctns, PAGE_SIZE);
+ if (r) {
+ free_page((unsigned long)sctns);
+ return kvm_s390_inject_prog_cond(vcpu, r);
+ }
}
}
@@ -444,6 +460,77 @@ static int handle_operexc(struct kvm_vcpu *vcpu)
return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
}
+static int handle_pv_spx(struct kvm_vcpu *vcpu)
+{
+ u32 pref = *(u32 *)vcpu->arch.sie_block->sidad;
+
+ kvm_s390_set_prefix(vcpu, pref);
+ trace_kvm_s390_handle_prefix(vcpu, 1, pref);
+ return 0;
+}
+
+static int handle_pv_sclp(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+
+ spin_lock(&fi->lock);
+ /*
+ * 2 cases:
+ * a: an sccb answering interrupt was already pending or in flight.
+ * As the sccb value is not known we can simply set some value to
+ * trigger delivery of a saved SCCB. UV will then use its saved
+ * copy of the SCCB value.
+ * b: an error SCCB interrupt needs to be injected so we also inject
+ * a fake SCCB address. Firmware will use the proper one.
+ * This makes sure, that both errors and real sccb returns will only
+ * be delivered after a notification intercept (instruction has
+ * finished) but not after others.
+ */
+ fi->srv_signal.ext_params |= 0x43000;
+ set_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs);
+ clear_bit(IRQ_PEND_EXT_SERVICE, &fi->masked_irqs);
+ spin_unlock(&fi->lock);
+ return 0;
+}
+
+static int handle_pv_uvc(struct kvm_vcpu *vcpu)
+{
+ struct uv_cb_share *guest_uvcb = (void *)vcpu->arch.sie_block->sidad;
+ struct uv_cb_cts uvcb = {
+ .header.cmd = UVC_CMD_UNPIN_PAGE_SHARED,
+ .header.len = sizeof(uvcb),
+ .guest_handle = kvm_s390_pv_get_handle(vcpu->kvm),
+ .gaddr = guest_uvcb->paddr,
+ };
+ int rc;
+
+ if (guest_uvcb->header.cmd != UVC_CMD_REMOVE_SHARED_ACCESS) {
+ WARN_ONCE(1, "Unexpected notification intercept for UVC 0x%x\n",
+ guest_uvcb->header.cmd);
+ return 0;
+ }
+ rc = gmap_make_secure(vcpu->arch.gmap, uvcb.gaddr, &uvcb);
+ /*
+ * If the unpin did not succeed, the guest will exit again for the UVC
+ * and we will retry the unpin.
+ */
+ if (rc == -EINVAL)
+ return 0;
+ return rc;
+}
+
+static int handle_pv_notification(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.sie_block->ipa == 0xb210)
+ return handle_pv_spx(vcpu);
+ if (vcpu->arch.sie_block->ipa == 0xb220)
+ return handle_pv_sclp(vcpu);
+ if (vcpu->arch.sie_block->ipa == 0xb9a4)
+ return handle_pv_uvc(vcpu);
+
+ return handle_instruction(vcpu);
+}
+
int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
{
int rc, per_rc = 0;
@@ -480,6 +567,28 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
case ICPT_KSS:
rc = kvm_s390_skey_check_enable(vcpu);
break;
+ case ICPT_MCHKREQ:
+ case ICPT_INT_ENABLE:
+ /*
+ * PSW bit 13 or a CR (0, 6, 14) changed and we might
+ * now be able to deliver interrupts. The pre-run code
+ * will take care of this.
+ */
+ rc = 0;
+ break;
+ case ICPT_PV_INSTR:
+ rc = handle_instruction(vcpu);
+ break;
+ case ICPT_PV_NOTIFY:
+ rc = handle_pv_notification(vcpu);
+ break;
+ case ICPT_PV_PREF:
+ rc = 0;
+ gmap_convert_to_secure(vcpu->arch.gmap,
+ kvm_s390_get_prefix(vcpu));
+ gmap_convert_to_secure(vcpu->arch.gmap,
+ kvm_s390_get_prefix(vcpu) + PAGE_SIZE);
+ break;
default:
return -EOPNOTSUPP;
}
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index c06c89d370a7..8191106bf7b9 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -2,7 +2,7 @@
/*
* handling kvm guest interrupts
*
- * Copyright IBM Corp. 2008, 2015
+ * Copyright IBM Corp. 2008, 2020
*
* Author(s): Carsten Otte <cotte@de.ibm.com>
*/
@@ -324,8 +324,11 @@ static inline int gisa_tac_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc)
static inline unsigned long pending_irqs_no_gisa(struct kvm_vcpu *vcpu)
{
- return vcpu->kvm->arch.float_int.pending_irqs |
- vcpu->arch.local_int.pending_irqs;
+ unsigned long pending = vcpu->kvm->arch.float_int.pending_irqs |
+ vcpu->arch.local_int.pending_irqs;
+
+ pending &= ~vcpu->kvm->arch.float_int.masked_irqs;
+ return pending;
}
static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu)
@@ -383,10 +386,18 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu)
__clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &active_mask);
if (!(vcpu->arch.sie_block->gcr[0] & CR0_CPU_TIMER_SUBMASK))
__clear_bit(IRQ_PEND_EXT_CPU_TIMER, &active_mask);
- if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
+ if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK)) {
__clear_bit(IRQ_PEND_EXT_SERVICE, &active_mask);
+ __clear_bit(IRQ_PEND_EXT_SERVICE_EV, &active_mask);
+ }
if (psw_mchk_disabled(vcpu))
active_mask &= ~IRQ_PEND_MCHK_MASK;
+ /* PV guest cpus can have a single interruption injected at a time. */
+ if (kvm_s390_pv_cpu_is_protected(vcpu) &&
+ vcpu->arch.sie_block->iictl != IICTL_CODE_NONE)
+ active_mask &= ~(IRQ_PEND_EXT_II_MASK |
+ IRQ_PEND_IO_MASK |
+ IRQ_PEND_MCHK_MASK);
/*
* Check both floating and local interrupt's cr14 because
* bit IRQ_PEND_MCHK_REP could be set in both cases.
@@ -479,19 +490,23 @@ static void set_intercept_indicators(struct kvm_vcpu *vcpu)
static int __must_check __deliver_cpu_timer(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
- int rc;
+ int rc = 0;
vcpu->stat.deliver_cputm++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER,
0, 0);
-
- rc = put_guest_lc(vcpu, EXT_IRQ_CPU_TIMER,
- (u16 *)__LC_EXT_INT_CODE);
- rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
- rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
- rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ vcpu->arch.sie_block->iictl = IICTL_CODE_EXT;
+ vcpu->arch.sie_block->eic = EXT_IRQ_CPU_TIMER;
+ } else {
+ rc = put_guest_lc(vcpu, EXT_IRQ_CPU_TIMER,
+ (u16 *)__LC_EXT_INT_CODE);
+ rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
+ rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ }
clear_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
return rc ? -EFAULT : 0;
}
@@ -499,19 +514,23 @@ static int __must_check __deliver_cpu_timer(struct kvm_vcpu *vcpu)
static int __must_check __deliver_ckc(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
- int rc;
+ int rc = 0;
vcpu->stat.deliver_ckc++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP,
0, 0);
-
- rc = put_guest_lc(vcpu, EXT_IRQ_CLK_COMP,
- (u16 __user *)__LC_EXT_INT_CODE);
- rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
- rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
- rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ vcpu->arch.sie_block->iictl = IICTL_CODE_EXT;
+ vcpu->arch.sie_block->eic = EXT_IRQ_CLK_COMP;
+ } else {
+ rc = put_guest_lc(vcpu, EXT_IRQ_CLK_COMP,
+ (u16 __user *)__LC_EXT_INT_CODE);
+ rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
+ rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ }
clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
return rc ? -EFAULT : 0;
}
@@ -553,6 +572,20 @@ static int __write_machine_check(struct kvm_vcpu *vcpu,
union mci mci;
int rc;
+ /*
+ * All other possible payload for a machine check (e.g. the register
+ * contents in the save area) will be handled by the ultravisor, as
+ * the hypervisor does not not have the needed information for
+ * protected guests.
+ */
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ vcpu->arch.sie_block->iictl = IICTL_CODE_MCHK;
+ vcpu->arch.sie_block->mcic = mchk->mcic;
+ vcpu->arch.sie_block->faddr = mchk->failing_storage_address;
+ vcpu->arch.sie_block->edc = mchk->ext_damage_code;
+ return 0;
+ }
+
mci.val = mchk->mcic;
/* take care of lazy register loading */
save_fpu_regs();
@@ -696,17 +729,21 @@ static int __must_check __deliver_machine_check(struct kvm_vcpu *vcpu)
static int __must_check __deliver_restart(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
- int rc;
+ int rc = 0;
VCPU_EVENT(vcpu, 3, "%s", "deliver: cpu restart");
vcpu->stat.deliver_restart_signal++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0);
- rc = write_guest_lc(vcpu,
- offsetof(struct lowcore, restart_old_psw),
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
- rc |= read_guest_lc(vcpu, offsetof(struct lowcore, restart_psw),
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ vcpu->arch.sie_block->iictl = IICTL_CODE_RESTART;
+ } else {
+ rc = write_guest_lc(vcpu,
+ offsetof(struct lowcore, restart_old_psw),
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, offsetof(struct lowcore, restart_psw),
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ }
clear_bit(IRQ_PEND_RESTART, &li->pending_irqs);
return rc ? -EFAULT : 0;
}
@@ -748,6 +785,12 @@ static int __must_check __deliver_emergency_signal(struct kvm_vcpu *vcpu)
vcpu->stat.deliver_emergency_signal++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY,
cpu_addr, 0);
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ vcpu->arch.sie_block->iictl = IICTL_CODE_EXT;
+ vcpu->arch.sie_block->eic = EXT_IRQ_EMERGENCY_SIG;
+ vcpu->arch.sie_block->extcpuaddr = cpu_addr;
+ return 0;
+ }
rc = put_guest_lc(vcpu, EXT_IRQ_EMERGENCY_SIG,
(u16 *)__LC_EXT_INT_CODE);
@@ -776,6 +819,12 @@ static int __must_check __deliver_external_call(struct kvm_vcpu *vcpu)
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
KVM_S390_INT_EXTERNAL_CALL,
extcall.code, 0);
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ vcpu->arch.sie_block->iictl = IICTL_CODE_EXT;
+ vcpu->arch.sie_block->eic = EXT_IRQ_EXTERNAL_CALL;
+ vcpu->arch.sie_block->extcpuaddr = extcall.code;
+ return 0;
+ }
rc = put_guest_lc(vcpu, EXT_IRQ_EXTERNAL_CALL,
(u16 *)__LC_EXT_INT_CODE);
@@ -787,6 +836,21 @@ static int __must_check __deliver_external_call(struct kvm_vcpu *vcpu)
return rc ? -EFAULT : 0;
}
+static int __deliver_prog_pv(struct kvm_vcpu *vcpu, u16 code)
+{
+ switch (code) {
+ case PGM_SPECIFICATION:
+ vcpu->arch.sie_block->iictl = IICTL_CODE_SPECIFICATION;
+ break;
+ case PGM_OPERAND:
+ vcpu->arch.sie_block->iictl = IICTL_CODE_OPERAND;
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
@@ -807,6 +871,10 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
pgm_info.code, 0);
+ /* PER is handled by the ultravisor */
+ if (kvm_s390_pv_cpu_is_protected(vcpu))
+ return __deliver_prog_pv(vcpu, pgm_info.code & ~PGM_PER);
+
switch (pgm_info.code & ~PGM_PER) {
case PGM_AFX_TRANSLATION:
case PGM_ASX_TRANSLATION:
@@ -818,7 +886,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
case PGM_PRIMARY_AUTHORITY:
case PGM_SECONDARY_AUTHORITY:
nullifying = true;
- /* fall through */
+ fallthrough;
case PGM_SPACE_SWITCH:
rc = put_guest_lc(vcpu, pgm_info.trans_exc_code,
(u64 *)__LC_TRANS_EXC_CODE);
@@ -902,20 +970,49 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
return rc ? -EFAULT : 0;
}
+#define SCCB_MASK 0xFFFFFFF8
+#define SCCB_EVENT_PENDING 0x3
+
+static int write_sclp(struct kvm_vcpu *vcpu, u32 parm)
+{
+ int rc;
+
+ if (kvm_s390_pv_cpu_get_handle(vcpu)) {
+ vcpu->arch.sie_block->iictl = IICTL_CODE_EXT;
+ vcpu->arch.sie_block->eic = EXT_IRQ_SERVICE_SIG;
+ vcpu->arch.sie_block->eiparams = parm;
+ return 0;
+ }
+
+ rc = put_guest_lc(vcpu, EXT_IRQ_SERVICE_SIG, (u16 *)__LC_EXT_INT_CODE);
+ rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
+ rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= put_guest_lc(vcpu, parm,
+ (u32 *)__LC_EXT_PARAMS);
+
+ return rc ? -EFAULT : 0;
+}
+
static int __must_check __deliver_service(struct kvm_vcpu *vcpu)
{
struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
struct kvm_s390_ext_info ext;
- int rc = 0;
spin_lock(&fi->lock);
- if (!(test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs))) {
+ if (test_bit(IRQ_PEND_EXT_SERVICE, &fi->masked_irqs) ||
+ !(test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs))) {
spin_unlock(&fi->lock);
return 0;
}
ext = fi->srv_signal;
memset(&fi->srv_signal, 0, sizeof(ext));
clear_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs);
+ clear_bit(IRQ_PEND_EXT_SERVICE_EV, &fi->pending_irqs);
+ if (kvm_s390_pv_cpu_is_protected(vcpu))
+ set_bit(IRQ_PEND_EXT_SERVICE, &fi->masked_irqs);
spin_unlock(&fi->lock);
VCPU_EVENT(vcpu, 4, "deliver: sclp parameter 0x%x",
@@ -924,16 +1021,31 @@ static int __must_check __deliver_service(struct kvm_vcpu *vcpu)
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_SERVICE,
ext.ext_params, 0);
- rc = put_guest_lc(vcpu, EXT_IRQ_SERVICE_SIG, (u16 *)__LC_EXT_INT_CODE);
- rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
- rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
- rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
- rc |= put_guest_lc(vcpu, ext.ext_params,
- (u32 *)__LC_EXT_PARAMS);
+ return write_sclp(vcpu, ext.ext_params);
+}
- return rc ? -EFAULT : 0;
+static int __must_check __deliver_service_ev(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+ struct kvm_s390_ext_info ext;
+
+ spin_lock(&fi->lock);
+ if (!(test_bit(IRQ_PEND_EXT_SERVICE_EV, &fi->pending_irqs))) {
+ spin_unlock(&fi->lock);
+ return 0;
+ }
+ ext = fi->srv_signal;
+ /* only clear the event bit */
+ fi->srv_signal.ext_params &= ~SCCB_EVENT_PENDING;
+ clear_bit(IRQ_PEND_EXT_SERVICE_EV, &fi->pending_irqs);
+ spin_unlock(&fi->lock);
+
+ VCPU_EVENT(vcpu, 4, "%s", "deliver: sclp parameter event");
+ vcpu->stat.deliver_service_signal++;
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_SERVICE,
+ ext.ext_params, 0);
+
+ return write_sclp(vcpu, SCCB_EVENT_PENDING);
}
static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu)
@@ -1028,6 +1140,15 @@ static int __do_deliver_io(struct kvm_vcpu *vcpu, struct kvm_s390_io_info *io)
{
int rc;
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ vcpu->arch.sie_block->iictl = IICTL_CODE_IO;
+ vcpu->arch.sie_block->subchannel_id = io->subchannel_id;
+ vcpu->arch.sie_block->subchannel_nr = io->subchannel_nr;
+ vcpu->arch.sie_block->io_int_parm = io->io_int_parm;
+ vcpu->arch.sie_block->io_int_word = io->io_int_word;
+ return 0;
+ }
+
rc = put_guest_lc(vcpu, io->subchannel_id, (u16 *)__LC_SUBCHANNEL_ID);
rc |= put_guest_lc(vcpu, io->subchannel_nr, (u16 *)__LC_SUBCHANNEL_NR);
rc |= put_guest_lc(vcpu, io->io_int_parm, (u32 *)__LC_IO_INT_PARM);
@@ -1329,6 +1450,9 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
case IRQ_PEND_EXT_SERVICE:
rc = __deliver_service(vcpu);
break;
+ case IRQ_PEND_EXT_SERVICE_EV:
+ rc = __deliver_service_ev(vcpu);
+ break;
case IRQ_PEND_PFAULT_DONE:
rc = __deliver_pfault_done(vcpu);
break;
@@ -1421,7 +1545,7 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
if (kvm_get_vcpu_by_id(vcpu->kvm, src_id) == NULL)
return -EINVAL;
- if (sclp.has_sigpif)
+ if (sclp.has_sigpif && !kvm_s390_pv_cpu_get_handle(vcpu))
return sca_inject_ext_call(vcpu, src_id);
if (test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs))
@@ -1681,9 +1805,6 @@ out:
return inti;
}
-#define SCCB_MASK 0xFFFFFFF8
-#define SCCB_EVENT_PENDING 0x3
-
static int __inject_service(struct kvm *kvm,
struct kvm_s390_interrupt_info *inti)
{
@@ -1692,6 +1813,11 @@ static int __inject_service(struct kvm *kvm,
kvm->stat.inject_service_signal++;
spin_lock(&fi->lock);
fi->srv_signal.ext_params |= inti->ext.ext_params & SCCB_EVENT_PENDING;
+
+ /* We always allow events, track them separately from the sccb ints */
+ if (fi->srv_signal.ext_params & SCCB_EVENT_PENDING)
+ set_bit(IRQ_PEND_EXT_SERVICE_EV, &fi->pending_irqs);
+
/*
* Early versions of the QEMU s390 bios will inject several
* service interrupts after another without handling a
@@ -1773,7 +1899,14 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
kvm->stat.inject_io++;
isc = int_word_to_isc(inti->io.io_int_word);
- if (gi->origin && inti->type & KVM_S390_INT_IO_AI_MASK) {
+ /*
+ * Do not make use of gisa in protected mode. We do not use the lock
+ * checking variant as this is just a performance optimization and we
+ * do not hold the lock here. This is ok as the code will pick
+ * interrupts from both "lists" for delivery.
+ */
+ if (!kvm_s390_pv_get_handle(kvm) &&
+ gi->origin && inti->type & KVM_S390_INT_IO_AI_MASK) {
VM_EVENT(kvm, 4, "%s isc %1u", "inject: I/O (AI/gisa)", isc);
gisa_set_ipm_gisc(gi->origin, isc);
kfree(inti);
@@ -1834,7 +1967,8 @@ static void __floating_irq_kick(struct kvm *kvm, u64 type)
break;
case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
if (!(type & KVM_S390_INT_IO_AI_MASK &&
- kvm->arch.gisa_int.origin))
+ kvm->arch.gisa_int.origin) ||
+ kvm_s390_pv_cpu_get_handle(dst_vcpu))
kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_IO_INT);
break;
default:
@@ -2080,6 +2214,10 @@ void kvm_s390_clear_float_irqs(struct kvm *kvm)
struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
int i;
+ mutex_lock(&kvm->lock);
+ if (!kvm_s390_pv_is_protected(kvm))
+ fi->masked_irqs = 0;
+ mutex_unlock(&kvm->lock);
spin_lock(&fi->lock);
fi->pending_irqs = 0;
memset(&fi->srv_signal, 0, sizeof(fi->srv_signal));
@@ -2146,7 +2284,8 @@ static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len)
n++;
}
}
- if (test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs)) {
+ if (test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs) ||
+ test_bit(IRQ_PEND_EXT_SERVICE_EV, &fi->pending_irqs)) {
if (n == max_irqs) {
/* signal userspace to try again */
ret = -ENOMEM;
@@ -2327,9 +2466,6 @@ static int register_io_adapter(struct kvm_device *dev,
if (!adapter)
return -ENOMEM;
- INIT_LIST_HEAD(&adapter->maps);
- init_rwsem(&adapter->maps_lock);
- atomic_set(&adapter->nr_maps, 0);
adapter->id = adapter_info.id;
adapter->isc = adapter_info.isc;
adapter->maskable = adapter_info.maskable;
@@ -2354,87 +2490,12 @@ int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked)
return ret;
}
-static int kvm_s390_adapter_map(struct kvm *kvm, unsigned int id, __u64 addr)
-{
- struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
- struct s390_map_info *map;
- int ret;
-
- if (!adapter || !addr)
- return -EINVAL;
-
- map = kzalloc(sizeof(*map), GFP_KERNEL);
- if (!map) {
- ret = -ENOMEM;
- goto out;
- }
- INIT_LIST_HEAD(&map->list);
- map->guest_addr = addr;
- map->addr = gmap_translate(kvm->arch.gmap, addr);
- if (map->addr == -EFAULT) {
- ret = -EFAULT;
- goto out;
- }
- ret = get_user_pages_fast(map->addr, 1, FOLL_WRITE, &map->page);
- if (ret < 0)
- goto out;
- BUG_ON(ret != 1);
- down_write(&adapter->maps_lock);
- if (atomic_inc_return(&adapter->nr_maps) < MAX_S390_ADAPTER_MAPS) {
- list_add_tail(&map->list, &adapter->maps);
- ret = 0;
- } else {
- put_page(map->page);
- ret = -EINVAL;
- }
- up_write(&adapter->maps_lock);
-out:
- if (ret)
- kfree(map);
- return ret;
-}
-
-static int kvm_s390_adapter_unmap(struct kvm *kvm, unsigned int id, __u64 addr)
-{
- struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
- struct s390_map_info *map, *tmp;
- int found = 0;
-
- if (!adapter || !addr)
- return -EINVAL;
-
- down_write(&adapter->maps_lock);
- list_for_each_entry_safe(map, tmp, &adapter->maps, list) {
- if (map->guest_addr == addr) {
- found = 1;
- atomic_dec(&adapter->nr_maps);
- list_del(&map->list);
- put_page(map->page);
- kfree(map);
- break;
- }
- }
- up_write(&adapter->maps_lock);
-
- return found ? 0 : -EINVAL;
-}
-
void kvm_s390_destroy_adapters(struct kvm *kvm)
{
int i;
- struct s390_map_info *map, *tmp;
- for (i = 0; i < MAX_S390_IO_ADAPTERS; i++) {
- if (!kvm->arch.adapters[i])
- continue;
- list_for_each_entry_safe(map, tmp,
- &kvm->arch.adapters[i]->maps, list) {
- list_del(&map->list);
- put_page(map->page);
- kfree(map);
- }
+ for (i = 0; i < MAX_S390_IO_ADAPTERS; i++)
kfree(kvm->arch.adapters[i]);
- }
}
static int modify_io_adapter(struct kvm_device *dev,
@@ -2456,11 +2517,14 @@ static int modify_io_adapter(struct kvm_device *dev,
if (ret > 0)
ret = 0;
break;
+ /*
+ * The following operations are no longer needed and therefore no-ops.
+ * The gpa to hva translation is done when an IRQ route is set up. The
+ * set_irq code uses get_user_pages_remote() to do the actual write.
+ */
case KVM_S390_IO_ADAPTER_MAP:
- ret = kvm_s390_adapter_map(dev->kvm, req.id, req.addr);
- break;
case KVM_S390_IO_ADAPTER_UNMAP:
- ret = kvm_s390_adapter_unmap(dev->kvm, req.id, req.addr);
+ ret = 0;
break;
default:
ret = -EINVAL;
@@ -2699,19 +2763,15 @@ static unsigned long get_ind_bit(__u64 addr, unsigned long bit_nr, bool swap)
return swap ? (bit ^ (BITS_PER_LONG - 1)) : bit;
}
-static struct s390_map_info *get_map_info(struct s390_io_adapter *adapter,
- u64 addr)
+static struct page *get_map_page(struct kvm *kvm, u64 uaddr)
{
- struct s390_map_info *map;
+ struct page *page = NULL;
- if (!adapter)
- return NULL;
-
- list_for_each_entry(map, &adapter->maps, list) {
- if (map->guest_addr == addr)
- return map;
- }
- return NULL;
+ down_read(&kvm->mm->mmap_sem);
+ get_user_pages_remote(NULL, kvm->mm, uaddr, 1, FOLL_WRITE,
+ &page, NULL, NULL);
+ up_read(&kvm->mm->mmap_sem);
+ return page;
}
static int adapter_indicators_set(struct kvm *kvm,
@@ -2720,30 +2780,35 @@ static int adapter_indicators_set(struct kvm *kvm,
{
unsigned long bit;
int summary_set, idx;
- struct s390_map_info *info;
+ struct page *ind_page, *summary_page;
void *map;
- info = get_map_info(adapter, adapter_int->ind_addr);
- if (!info)
+ ind_page = get_map_page(kvm, adapter_int->ind_addr);
+ if (!ind_page)
return -1;
- map = page_address(info->page);
- bit = get_ind_bit(info->addr, adapter_int->ind_offset, adapter->swap);
- set_bit(bit, map);
- idx = srcu_read_lock(&kvm->srcu);
- mark_page_dirty(kvm, info->guest_addr >> PAGE_SHIFT);
- set_page_dirty_lock(info->page);
- info = get_map_info(adapter, adapter_int->summary_addr);
- if (!info) {
- srcu_read_unlock(&kvm->srcu, idx);
+ summary_page = get_map_page(kvm, adapter_int->summary_addr);
+ if (!summary_page) {
+ put_page(ind_page);
return -1;
}
- map = page_address(info->page);
- bit = get_ind_bit(info->addr, adapter_int->summary_offset,
- adapter->swap);
+
+ idx = srcu_read_lock(&kvm->srcu);
+ map = page_address(ind_page);
+ bit = get_ind_bit(adapter_int->ind_addr,
+ adapter_int->ind_offset, adapter->swap);
+ set_bit(bit, map);
+ mark_page_dirty(kvm, adapter_int->ind_addr >> PAGE_SHIFT);
+ set_page_dirty_lock(ind_page);
+ map = page_address(summary_page);
+ bit = get_ind_bit(adapter_int->summary_addr,
+ adapter_int->summary_offset, adapter->swap);
summary_set = test_and_set_bit(bit, map);
- mark_page_dirty(kvm, info->guest_addr >> PAGE_SHIFT);
- set_page_dirty_lock(info->page);
+ mark_page_dirty(kvm, adapter_int->summary_addr >> PAGE_SHIFT);
+ set_page_dirty_lock(summary_page);
srcu_read_unlock(&kvm->srcu, idx);
+
+ put_page(ind_page);
+ put_page(summary_page);
return summary_set ? 0 : 1;
}
@@ -2765,9 +2830,7 @@ static int set_adapter_int(struct kvm_kernel_irq_routing_entry *e,
adapter = get_io_adapter(kvm, e->adapter.adapter_id);
if (!adapter)
return -1;
- down_read(&adapter->maps_lock);
ret = adapter_indicators_set(kvm, adapter, &e->adapter);
- up_read(&adapter->maps_lock);
if ((ret > 0) && !adapter->masked) {
ret = kvm_s390_inject_airq(kvm, adapter);
if (ret == 0)
@@ -2818,23 +2881,27 @@ int kvm_set_routing_entry(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue)
{
- int ret;
+ u64 uaddr;
switch (ue->type) {
+ /* we store the userspace addresses instead of the guest addresses */
case KVM_IRQ_ROUTING_S390_ADAPTER:
e->set = set_adapter_int;
- e->adapter.summary_addr = ue->u.adapter.summary_addr;
- e->adapter.ind_addr = ue->u.adapter.ind_addr;
+ uaddr = gmap_translate(kvm->arch.gmap, ue->u.adapter.summary_addr);
+ if (uaddr == -EFAULT)
+ return -EFAULT;
+ e->adapter.summary_addr = uaddr;
+ uaddr = gmap_translate(kvm->arch.gmap, ue->u.adapter.ind_addr);
+ if (uaddr == -EFAULT)
+ return -EFAULT;
+ e->adapter.ind_addr = uaddr;
e->adapter.summary_offset = ue->u.adapter.summary_offset;
e->adapter.ind_offset = ue->u.adapter.ind_offset;
e->adapter.adapter_id = ue->u.adapter.adapter_id;
- ret = 0;
- break;
+ return 0;
default:
- ret = -EINVAL;
+ return -EINVAL;
}
-
- return ret;
}
int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm,
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index c2e6d4ba4e23..19a81024fe16 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2,7 +2,7 @@
/*
* hosting IBM Z kernel virtual machines (s390x)
*
- * Copyright IBM Corp. 2008, 2018
+ * Copyright IBM Corp. 2008, 2020
*
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
@@ -44,6 +44,7 @@
#include <asm/cpacf.h>
#include <asm/timex.h>
#include <asm/ap.h>
+#include <asm/uv.h>
#include "kvm-s390.h"
#include "gaccess.h"
@@ -184,6 +185,11 @@ static u8 halt_poll_max_steal = 10;
module_param(halt_poll_max_steal, byte, 0644);
MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
+/* if set to true, the GISA will be initialized and used if available */
+static bool use_gisa = true;
+module_param(use_gisa, bool, 0644);
+MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
+
/*
* For now we handle at most 16 double words as this is what the s390 base
* kernel handles and stores in the prefix page. If we ever need to go beyond
@@ -220,6 +226,7 @@ static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
static struct gmap_notifier gmap_notifier;
static struct gmap_notifier vsie_gmap_notifier;
debug_info_t *kvm_s390_dbf;
+debug_info_t *kvm_s390_dbf_uv;
/* Section: not file related */
int kvm_arch_hardware_enable(void)
@@ -228,13 +235,15 @@ int kvm_arch_hardware_enable(void)
return 0;
}
-int kvm_arch_check_processor_compat(void)
+int kvm_arch_check_processor_compat(void *opaque)
{
return 0;
}
+/* forward declarations */
static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
unsigned long end);
+static int sca_switch_to_extended(struct kvm *kvm);
static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
{
@@ -293,7 +302,7 @@ static struct notifier_block kvm_clock_notifier = {
.notifier_call = kvm_clock_sync,
};
-int kvm_arch_hardware_setup(void)
+int kvm_arch_hardware_setup(void *opaque)
{
gmap_notifier.notifier_call = kvm_gmap_notifier;
gmap_register_pte_notifier(&gmap_notifier);
@@ -460,7 +469,12 @@ int kvm_arch_init(void *opaque)
if (!kvm_s390_dbf)
return -ENOMEM;
- if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view))
+ kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
+ if (!kvm_s390_dbf_uv)
+ goto out;
+
+ if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
+ debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
goto out;
kvm_s390_cpu_feat_init();
@@ -487,6 +501,7 @@ void kvm_arch_exit(void)
{
kvm_s390_gib_destroy();
debug_unregister(kvm_s390_dbf);
+ debug_unregister(kvm_s390_dbf_uv);
}
/* Section: device related */
@@ -564,14 +579,16 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_S390_BPB:
r = test_facility(82);
break;
+ case KVM_CAP_S390_PROTECTED:
+ r = is_prot_virt_host();
+ break;
default:
r = 0;
}
return r;
}
-static void kvm_s390_sync_dirty_log(struct kvm *kvm,
- struct kvm_memory_slot *memslot)
+void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
{
int i;
gfn_t cur_gfn, last_gfn;
@@ -612,9 +629,8 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
{
int r;
unsigned long n;
- struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
- int is_dirty = 0;
+ int is_dirty;
if (kvm_is_ucontrol(kvm))
return -EINVAL;
@@ -625,14 +641,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
if (log->slot >= KVM_USER_MEM_SLOTS)
goto out;
- slots = kvm_memslots(kvm);
- memslot = id_to_memslot(slots, log->slot);
- r = -ENOENT;
- if (!memslot->dirty_bitmap)
- goto out;
-
- kvm_s390_sync_dirty_log(kvm, memslot);
- r = kvm_get_dirty_log(kvm, log, &is_dirty);
+ r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
if (r)
goto out;
@@ -1993,6 +2002,9 @@ static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
struct kvm_memslots *slots = kvm_memslots(kvm);
struct kvm_memory_slot *ms;
+ if (unlikely(!slots->used_slots))
+ return 0;
+
cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
ms = gfn_to_memslot(kvm, cur_gfn);
args->count = 0;
@@ -2158,6 +2170,194 @@ out:
return r;
}
+static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
+{
+ struct kvm_vcpu *vcpu;
+ u16 rc, rrc;
+ int ret = 0;
+ int i;
+
+ /*
+ * We ignore failures and try to destroy as many CPUs as possible.
+ * At the same time we must not free the assigned resources when
+ * this fails, as the ultravisor has still access to that memory.
+ * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
+ * behind.
+ * We want to return the first failure rc and rrc, though.
+ */
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ mutex_lock(&vcpu->mutex);
+ if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
+ *rcp = rc;
+ *rrcp = rrc;
+ ret = -EIO;
+ }
+ mutex_unlock(&vcpu->mutex);
+ }
+ return ret;
+}
+
+static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
+{
+ int i, r = 0;
+ u16 dummy;
+
+ struct kvm_vcpu *vcpu;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ mutex_lock(&vcpu->mutex);
+ r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
+ mutex_unlock(&vcpu->mutex);
+ if (r)
+ break;
+ }
+ if (r)
+ kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
+ return r;
+}
+
+static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
+{
+ int r = 0;
+ u16 dummy;
+ void __user *argp = (void __user *)cmd->data;
+
+ switch (cmd->cmd) {
+ case KVM_PV_ENABLE: {
+ r = -EINVAL;
+ if (kvm_s390_pv_is_protected(kvm))
+ break;
+
+ /*
+ * FMT 4 SIE needs esca. As we never switch back to bsca from
+ * esca, we need no cleanup in the error cases below
+ */
+ r = sca_switch_to_extended(kvm);
+ if (r)
+ break;
+
+ down_write(&current->mm->mmap_sem);
+ r = gmap_mark_unmergeable();
+ up_write(&current->mm->mmap_sem);
+ if (r)
+ break;
+
+ r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
+ if (r)
+ break;
+
+ r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
+ if (r)
+ kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
+
+ /* we need to block service interrupts from now on */
+ set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
+ break;
+ }
+ case KVM_PV_DISABLE: {
+ r = -EINVAL;
+ if (!kvm_s390_pv_is_protected(kvm))
+ break;
+
+ r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
+ /*
+ * If a CPU could not be destroyed, destroy VM will also fail.
+ * There is no point in trying to destroy it. Instead return
+ * the rc and rrc from the first CPU that failed destroying.
+ */
+ if (r)
+ break;
+ r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
+
+ /* no need to block service interrupts any more */
+ clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
+ break;
+ }
+ case KVM_PV_SET_SEC_PARMS: {
+ struct kvm_s390_pv_sec_parm parms = {};
+ void *hdr;
+
+ r = -EINVAL;
+ if (!kvm_s390_pv_is_protected(kvm))
+ break;
+
+ r = -EFAULT;
+ if (copy_from_user(&parms, argp, sizeof(parms)))
+ break;
+
+ /* Currently restricted to 8KB */
+ r = -EINVAL;
+ if (parms.length > PAGE_SIZE * 2)
+ break;
+
+ r = -ENOMEM;
+ hdr = vmalloc(parms.length);
+ if (!hdr)
+ break;
+
+ r = -EFAULT;
+ if (!copy_from_user(hdr, (void __user *)parms.origin,
+ parms.length))
+ r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
+ &cmd->rc, &cmd->rrc);
+
+ vfree(hdr);
+ break;
+ }
+ case KVM_PV_UNPACK: {
+ struct kvm_s390_pv_unp unp = {};
+
+ r = -EINVAL;
+ if (!kvm_s390_pv_is_protected(kvm))
+ break;
+
+ r = -EFAULT;
+ if (copy_from_user(&unp, argp, sizeof(unp)))
+ break;
+
+ r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
+ &cmd->rc, &cmd->rrc);
+ break;
+ }
+ case KVM_PV_VERIFY: {
+ r = -EINVAL;
+ if (!kvm_s390_pv_is_protected(kvm))
+ break;
+
+ r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
+ UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
+ cmd->rrc);
+ break;
+ }
+ case KVM_PV_PREP_RESET: {
+ r = -EINVAL;
+ if (!kvm_s390_pv_is_protected(kvm))
+ break;
+
+ r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
+ UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
+ cmd->rc, cmd->rrc);
+ break;
+ }
+ case KVM_PV_UNSHARE_ALL: {
+ r = -EINVAL;
+ if (!kvm_s390_pv_is_protected(kvm))
+ break;
+
+ r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
+ UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
+ cmd->rc, cmd->rrc);
+ break;
+ }
+ default:
+ r = -ENOTTY;
+ }
+ return r;
+}
+
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -2255,6 +2455,33 @@ long kvm_arch_vm_ioctl(struct file *filp,
mutex_unlock(&kvm->slots_lock);
break;
}
+ case KVM_S390_PV_COMMAND: {
+ struct kvm_pv_cmd args;
+
+ /* protvirt means user sigp */
+ kvm->arch.user_cpu_state_ctrl = 1;
+ r = 0;
+ if (!is_prot_virt_host()) {
+ r = -EINVAL;
+ break;
+ }
+ if (copy_from_user(&args, argp, sizeof(args))) {
+ r = -EFAULT;
+ break;
+ }
+ if (args.flags) {
+ r = -EINVAL;
+ break;
+ }
+ mutex_lock(&kvm->lock);
+ r = kvm_s390_handle_pv(kvm, &args);
+ mutex_unlock(&kvm->lock);
+ if (copy_to_user(argp, &args, sizeof(args))) {
+ r = -EFAULT;
+ break;
+ }
+ break;
+ }
default:
r = -ENOTTY;
}
@@ -2504,7 +2731,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm->arch.use_skf = sclp.has_skey;
spin_lock_init(&kvm->arch.start_stop_lock);
kvm_s390_vsie_init(kvm);
- kvm_s390_gisa_init(kvm);
+ if (use_gisa)
+ kvm_s390_gisa_init(kvm);
KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
return 0;
@@ -2518,6 +2746,8 @@ out_err:
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{
+ u16 rc, rrc;
+
VCPU_EVENT(vcpu, 3, "%s", "free cpu");
trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
kvm_s390_clear_local_irqs(vcpu);
@@ -2530,6 +2760,9 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
if (vcpu->kvm->arch.use_cmma)
kvm_s390_vcpu_unsetup_cmma(vcpu);
+ /* We can not hold the vcpu mutex here, we are already dying */
+ if (kvm_s390_pv_cpu_get_handle(vcpu))
+ kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
free_page((unsigned long)(vcpu->arch.sie_block));
}
@@ -2551,10 +2784,20 @@ static void kvm_free_vcpus(struct kvm *kvm)
void kvm_arch_destroy_vm(struct kvm *kvm)
{
+ u16 rc, rrc;
+
kvm_free_vcpus(kvm);
sca_dispose(kvm);
- debug_unregister(kvm->arch.dbf);
kvm_s390_gisa_destroy(kvm);
+ /*
+ * We are already at the end of life and kvm->lock is not taken.
+ * This is ok as the file descriptor is closed by now and nobody
+ * can mess with the pv state. To avoid lockdep_assert_held from
+ * complaining we do not use kvm_s390_pv_is_protected.
+ */
+ if (kvm_s390_pv_get_handle(kvm))
+ kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
+ debug_unregister(kvm->arch.dbf);
free_page((unsigned long)kvm->arch.sie_page2);
if (!kvm_is_ucontrol(kvm))
gmap_remove(kvm->arch.gmap);
@@ -2650,6 +2893,9 @@ static int sca_switch_to_extended(struct kvm *kvm)
unsigned int vcpu_idx;
u32 scaol, scaoh;
+ if (kvm->arch.use_esca)
+ return 0;
+
new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
if (!new_sca)
return -ENOMEM;
@@ -2901,6 +3147,7 @@ static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
{
int rc = 0;
+ u16 uvrc, uvrrc;
atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
CPUSTAT_SM |
@@ -2968,6 +3215,14 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
kvm_s390_vcpu_crypto_setup(vcpu);
+ mutex_lock(&vcpu->kvm->lock);
+ if (kvm_s390_pv_is_protected(vcpu->kvm)) {
+ rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
+ if (rc)
+ kvm_s390_vcpu_unsetup_cmma(vcpu);
+ }
+ mutex_unlock(&vcpu->kvm->lock);
+
return rc;
}
@@ -3277,7 +3532,6 @@ static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
kvm_s390_set_prefix(vcpu, 0);
kvm_s390_set_cpu_timer(vcpu, 0);
vcpu->arch.sie_block->ckc = 0;
- vcpu->arch.sie_block->todpr = 0;
memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
@@ -3295,9 +3549,17 @@ static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
vcpu->run->s.regs.pp = 0;
vcpu->run->s.regs.gbea = 1;
vcpu->run->s.regs.fpc = 0;
- vcpu->arch.sie_block->gbea = 1;
- vcpu->arch.sie_block->pp = 0;
- vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
+ /*
+ * Do not reset these registers in the protected case, as some of
+ * them are overlayed and they are not accessible in this case
+ * anyway.
+ */
+ if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
+ vcpu->arch.sie_block->gbea = 1;
+ vcpu->arch.sie_block->pp = 0;
+ vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
+ vcpu->arch.sie_block->todpr = 0;
+ }
}
static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
@@ -3487,14 +3749,20 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
switch (mp_state->mp_state) {
case KVM_MP_STATE_STOPPED:
- kvm_s390_vcpu_stop(vcpu);
+ rc = kvm_s390_vcpu_stop(vcpu);
break;
case KVM_MP_STATE_OPERATING:
- kvm_s390_vcpu_start(vcpu);
+ rc = kvm_s390_vcpu_start(vcpu);
break;
case KVM_MP_STATE_LOAD:
+ if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
+ rc = -ENXIO;
+ break;
+ }
+ rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
+ break;
case KVM_MP_STATE_CHECK_STOP:
- /* fall through - CHECK_STOP and LOAD are not supported yet */
+ fallthrough; /* CHECK_STOP and LOAD are not supported yet */
default:
rc = -ENXIO;
}
@@ -3844,9 +4112,11 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
return vcpu_post_run_fault_in_sie(vcpu);
}
+#define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
static int __vcpu_run(struct kvm_vcpu *vcpu)
{
int rc, exit_reason;
+ struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
/*
* We try to hold kvm->srcu during most of vcpu_run (except when run-
@@ -3868,8 +4138,28 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
guest_enter_irqoff();
__disable_cpu_timer_accounting(vcpu);
local_irq_enable();
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ memcpy(sie_page->pv_grregs,
+ vcpu->run->s.regs.gprs,
+ sizeof(sie_page->pv_grregs));
+ }
exit_reason = sie64a(vcpu->arch.sie_block,
vcpu->run->s.regs.gprs);
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ memcpy(vcpu->run->s.regs.gprs,
+ sie_page->pv_grregs,
+ sizeof(sie_page->pv_grregs));
+ /*
+ * We're not allowed to inject interrupts on intercepts
+ * that leave the guest state in an "in-between" state
+ * where the next SIE entry will do a continuation.
+ * Fence interrupts in our "internal" PSW.
+ */
+ if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
+ vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
+ vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
+ }
+ }
local_irq_disable();
__enable_cpu_timer_accounting(vcpu);
guest_exit_irqoff();
@@ -3883,7 +4173,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
return rc;
}
-static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static void sync_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
struct runtime_instr_cb *riccb;
struct gs_cb *gscb;
@@ -3892,16 +4182,7 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
- if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
- kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
- if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
- memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
- /* some control register changes require a tlb flush */
- kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
- }
if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
- kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
- vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
@@ -3942,6 +4223,36 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
}
+ if (MACHINE_HAS_GS) {
+ preempt_disable();
+ __ctl_set_bit(2, 4);
+ if (current->thread.gs_cb) {
+ vcpu->arch.host_gscb = current->thread.gs_cb;
+ save_gs_cb(vcpu->arch.host_gscb);
+ }
+ if (vcpu->arch.gs_enabled) {
+ current->thread.gs_cb = (struct gs_cb *)
+ &vcpu->run->s.regs.gscb;
+ restore_gs_cb(current->thread.gs_cb);
+ }
+ preempt_enable();
+ }
+ /* SIE will load etoken directly from SDNX and therefore kvm_run */
+}
+
+static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+ if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
+ kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
+ if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
+ memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
+ /* some control register changes require a tlb flush */
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+ }
+ if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
+ kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
+ vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
+ }
save_access_regs(vcpu->arch.host_acrs);
restore_access_regs(vcpu->run->s.regs.acrs);
/* save host (userspace) fprs/vrs */
@@ -3956,23 +4267,47 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
if (test_fp_ctl(current->thread.fpu.fpc))
/* User space provided an invalid FPC, let's clear it */
current->thread.fpu.fpc = 0;
+
+ /* Sync fmt2 only data */
+ if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
+ sync_regs_fmt2(vcpu, kvm_run);
+ } else {
+ /*
+ * In several places we have to modify our internal view to
+ * not do things that are disallowed by the ultravisor. For
+ * example we must not inject interrupts after specific exits
+ * (e.g. 112 prefix page not secure). We do this by turning
+ * off the machine check, external and I/O interrupt bits
+ * of our PSW copy. To avoid getting validity intercepts, we
+ * do only accept the condition code from userspace.
+ */
+ vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
+ vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
+ PSW_MASK_CC;
+ }
+
+ kvm_run->kvm_dirty_regs = 0;
+}
+
+static void store_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+ kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
+ kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
+ kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
+ kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
if (MACHINE_HAS_GS) {
- preempt_disable();
__ctl_set_bit(2, 4);
- if (current->thread.gs_cb) {
- vcpu->arch.host_gscb = current->thread.gs_cb;
- save_gs_cb(vcpu->arch.host_gscb);
- }
- if (vcpu->arch.gs_enabled) {
- current->thread.gs_cb = (struct gs_cb *)
- &vcpu->run->s.regs.gscb;
- restore_gs_cb(current->thread.gs_cb);
- }
+ if (vcpu->arch.gs_enabled)
+ save_gs_cb(current->thread.gs_cb);
+ preempt_disable();
+ current->thread.gs_cb = vcpu->arch.host_gscb;
+ restore_gs_cb(vcpu->arch.host_gscb);
preempt_enable();
+ if (!vcpu->arch.host_gscb)
+ __ctl_clear_bit(2, 4);
+ vcpu->arch.host_gscb = NULL;
}
- /* SIE will load etoken directly from SDNX and therefore kvm_run */
-
- kvm_run->kvm_dirty_regs = 0;
+ /* SIE will save etoken directly into SDNX and therefore kvm_run */
}
static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
@@ -3983,13 +4318,9 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
- kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
- kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
- kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
kvm_run->s.regs.pft = vcpu->arch.pfault_token;
kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
- kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
save_access_regs(vcpu->run->s.regs.acrs);
restore_access_regs(vcpu->arch.host_acrs);
/* Save guest register state */
@@ -3998,19 +4329,8 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
/* Restore will be done lazily at return */
current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
- if (MACHINE_HAS_GS) {
- __ctl_set_bit(2, 4);
- if (vcpu->arch.gs_enabled)
- save_gs_cb(current->thread.gs_cb);
- preempt_disable();
- current->thread.gs_cb = vcpu->arch.host_gscb;
- restore_gs_cb(vcpu->arch.host_gscb);
- preempt_enable();
- if (!vcpu->arch.host_gscb)
- __ctl_clear_bit(2, 4);
- vcpu->arch.host_gscb = NULL;
- }
- /* SIE will save etoken directly into SDNX and therefore kvm_run */
+ if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
+ store_regs_fmt2(vcpu, kvm_run);
}
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
@@ -4034,6 +4354,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
kvm_sigset_activate(vcpu);
+ /*
+ * no need to check the return value of vcpu_start as it can only have
+ * an error for protvirt, but protvirt means user cpu state
+ */
if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
kvm_s390_vcpu_start(vcpu);
} else if (is_vcpu_stopped(vcpu)) {
@@ -4171,18 +4495,27 @@ static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
}
-void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
+int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
{
- int i, online_vcpus, started_vcpus = 0;
+ int i, online_vcpus, r = 0, started_vcpus = 0;
if (!is_vcpu_stopped(vcpu))
- return;
+ return 0;
trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
/* Only one cpu at a time may enter/leave the STOPPED state. */
spin_lock(&vcpu->kvm->arch.start_stop_lock);
online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
+ /* Let's tell the UV that we want to change into the operating state */
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
+ if (r) {
+ spin_unlock(&vcpu->kvm->arch.start_stop_lock);
+ return r;
+ }
+ }
+
for (i = 0; i < online_vcpus; i++) {
if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
started_vcpus++;
@@ -4202,27 +4535,43 @@ void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
/*
+ * The real PSW might have changed due to a RESTART interpreted by the
+ * ultravisor. We block all interrupts and let the next sie exit
+ * refresh our view.
+ */
+ if (kvm_s390_pv_cpu_is_protected(vcpu))
+ vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
+ /*
* Another VCPU might have used IBS while we were offline.
* Let's play safe and flush the VCPU at startup.
*/
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
spin_unlock(&vcpu->kvm->arch.start_stop_lock);
- return;
+ return 0;
}
-void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
+int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
{
- int i, online_vcpus, started_vcpus = 0;
+ int i, online_vcpus, r = 0, started_vcpus = 0;
struct kvm_vcpu *started_vcpu = NULL;
if (is_vcpu_stopped(vcpu))
- return;
+ return 0;
trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
/* Only one cpu at a time may enter/leave the STOPPED state. */
spin_lock(&vcpu->kvm->arch.start_stop_lock);
online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
+ /* Let's tell the UV that we want to change into the stopped state */
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
+ if (r) {
+ spin_unlock(&vcpu->kvm->arch.start_stop_lock);
+ return r;
+ }
+ }
+
/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
kvm_s390_clear_stop_irq(vcpu);
@@ -4245,7 +4594,7 @@ void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
}
spin_unlock(&vcpu->kvm->arch.start_stop_lock);
- return;
+ return 0;
}
static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
@@ -4272,12 +4621,40 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
return r;
}
+static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
+ struct kvm_s390_mem_op *mop)
+{
+ void __user *uaddr = (void __user *)mop->buf;
+ int r = 0;
+
+ if (mop->flags || !mop->size)
+ return -EINVAL;
+ if (mop->size + mop->sida_offset < mop->size)
+ return -EINVAL;
+ if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
+ return -E2BIG;
+
+ switch (mop->op) {
+ case KVM_S390_MEMOP_SIDA_READ:
+ if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
+ mop->sida_offset), mop->size))
+ r = -EFAULT;
+
+ break;
+ case KVM_S390_MEMOP_SIDA_WRITE:
+ if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
+ mop->sida_offset), uaddr, mop->size))
+ r = -EFAULT;
+ break;
+ }
+ return r;
+}
static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
struct kvm_s390_mem_op *mop)
{
void __user *uaddr = (void __user *)mop->buf;
void *tmpbuf = NULL;
- int r, srcu_idx;
+ int r = 0;
const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
| KVM_S390_MEMOP_F_CHECK_ONLY;
@@ -4287,14 +4664,15 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
if (mop->size > MEM_OP_MAX_SIZE)
return -E2BIG;
+ if (kvm_s390_pv_cpu_is_protected(vcpu))
+ return -EINVAL;
+
if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
tmpbuf = vmalloc(mop->size);
if (!tmpbuf)
return -ENOMEM;
}
- srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
-
switch (mop->op) {
case KVM_S390_MEMOP_LOGICAL_READ:
if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
@@ -4320,12 +4698,8 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
}
r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
break;
- default:
- r = -EINVAL;
}
- srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
-
if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
@@ -4333,6 +4707,31 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
return r;
}
+static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
+ struct kvm_s390_mem_op *mop)
+{
+ int r, srcu_idx;
+
+ srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+ switch (mop->op) {
+ case KVM_S390_MEMOP_LOGICAL_READ:
+ case KVM_S390_MEMOP_LOGICAL_WRITE:
+ r = kvm_s390_guest_mem_op(vcpu, mop);
+ break;
+ case KVM_S390_MEMOP_SIDA_READ:
+ case KVM_S390_MEMOP_SIDA_WRITE:
+ /* we are locked against sida going away by the vcpu->mutex */
+ r = kvm_s390_guest_sida_op(vcpu, mop);
+ break;
+ default:
+ r = -EINVAL;
+ }
+
+ srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
+ return r;
+}
+
long kvm_arch_vcpu_async_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -4368,6 +4767,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
void __user *argp = (void __user *)arg;
int idx;
long r;
+ u16 rc, rrc;
vcpu_load(vcpu);
@@ -4389,18 +4789,40 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
case KVM_S390_CLEAR_RESET:
r = 0;
kvm_arch_vcpu_ioctl_clear_reset(vcpu);
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
+ UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
+ VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
+ rc, rrc);
+ }
break;
case KVM_S390_INITIAL_RESET:
r = 0;
kvm_arch_vcpu_ioctl_initial_reset(vcpu);
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
+ UVC_CMD_CPU_RESET_INITIAL,
+ &rc, &rrc);
+ VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
+ rc, rrc);
+ }
break;
case KVM_S390_NORMAL_RESET:
r = 0;
kvm_arch_vcpu_ioctl_normal_reset(vcpu);
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
+ UVC_CMD_CPU_RESET, &rc, &rrc);
+ VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
+ rc, rrc);
+ }
break;
case KVM_SET_ONE_REG:
case KVM_GET_ONE_REG: {
struct kvm_one_reg reg;
+ r = -EINVAL;
+ if (kvm_s390_pv_cpu_is_protected(vcpu))
+ break;
r = -EFAULT;
if (copy_from_user(&reg, argp, sizeof(reg)))
break;
@@ -4463,7 +4885,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
struct kvm_s390_mem_op mem_op;
if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
- r = kvm_s390_guest_mem_op(vcpu, &mem_op);
+ r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
else
r = -EFAULT;
break;
@@ -4523,12 +4945,6 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
return VM_FAULT_SIGBUS;
}
-int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
- unsigned long npages)
-{
- return 0;
-}
-
/* Section: memory related */
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
@@ -4549,12 +4965,15 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
return -EINVAL;
+ /* When we are protected, we should not change the memory slots */
+ if (kvm_s390_pv_get_handle(kvm))
+ return -EINVAL;
return 0;
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
const struct kvm_userspace_memory_region *mem,
- const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *old,
const struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
@@ -4570,7 +4989,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
old->npages * PAGE_SIZE);
if (rc)
break;
- /* FALLTHROUGH */
+ fallthrough;
case KVM_MR_CREATE:
rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
mem->guest_phys_addr, mem->memory_size);
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 6d9448dbd052..79dcd647b378 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -2,7 +2,7 @@
/*
* definition for kvm on s390
*
- * Copyright IBM Corp. 2008, 2009
+ * Copyright IBM Corp. 2008, 2020
*
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
@@ -15,6 +15,7 @@
#include <linux/hrtimer.h>
#include <linux/kvm.h>
#include <linux/kvm_host.h>
+#include <linux/lockdep.h>
#include <asm/facility.h>
#include <asm/processor.h>
#include <asm/sclp.h>
@@ -25,6 +26,17 @@
#define IS_ITDB_VALID(vcpu) ((*(char *)vcpu->arch.sie_block->itdba == TDB_FORMAT1))
extern debug_info_t *kvm_s390_dbf;
+extern debug_info_t *kvm_s390_dbf_uv;
+
+#define KVM_UV_EVENT(d_kvm, d_loglevel, d_string, d_args...)\
+do { \
+ debug_sprintf_event((d_kvm)->arch.dbf, d_loglevel, d_string "\n", \
+ d_args); \
+ debug_sprintf_event(kvm_s390_dbf_uv, d_loglevel, \
+ "%d: " d_string "\n", (d_kvm)->userspace_pid, \
+ d_args); \
+} while (0)
+
#define KVM_EVENT(d_loglevel, d_string, d_args...)\
do { \
debug_sprintf_event(kvm_s390_dbf, d_loglevel, d_string "\n", \
@@ -196,6 +208,39 @@ static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm)
return kvm->arch.user_cpu_state_ctrl != 0;
}
+/* implemented in pv.c */
+int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
+int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
+int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc);
+int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc);
+int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
+ u16 *rrc);
+int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size,
+ unsigned long tweak, u16 *rc, u16 *rrc);
+int kvm_s390_pv_set_cpu_state(struct kvm_vcpu *vcpu, u8 state);
+
+static inline u64 kvm_s390_pv_get_handle(struct kvm *kvm)
+{
+ return kvm->arch.pv.handle;
+}
+
+static inline u64 kvm_s390_pv_cpu_get_handle(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.pv.handle;
+}
+
+static inline bool kvm_s390_pv_is_protected(struct kvm *kvm)
+{
+ lockdep_assert_held(&kvm->lock);
+ return !!kvm_s390_pv_get_handle(kvm);
+}
+
+static inline bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu)
+{
+ lockdep_assert_held(&vcpu->mutex);
+ return !!kvm_s390_pv_cpu_get_handle(vcpu);
+}
+
/* implemented in interrupt.c */
int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu);
@@ -286,8 +331,8 @@ void kvm_s390_set_tod_clock(struct kvm *kvm,
long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable);
int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr);
int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr);
-void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu);
-void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu);
+int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu);
+int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu);
bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index ed52ffa8d5d4..69a824f9ef0b 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -2,7 +2,7 @@
/*
* handling privileged instructions
*
- * Copyright IBM Corp. 2008, 2018
+ * Copyright IBM Corp. 2008, 2020
*
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
@@ -872,7 +872,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
operand2 = kvm_s390_get_base_disp_s(vcpu, &ar);
- if (operand2 & 0xfff)
+ if (!kvm_s390_pv_cpu_is_protected(vcpu) && (operand2 & 0xfff))
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
switch (fc) {
@@ -893,8 +893,13 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
handle_stsi_3_2_2(vcpu, (void *) mem);
break;
}
-
- rc = write_guest(vcpu, operand2, ar, (void *)mem, PAGE_SIZE);
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ memcpy((void *)sida_origin(vcpu->arch.sie_block), (void *)mem,
+ PAGE_SIZE);
+ rc = 0;
+ } else {
+ rc = write_guest(vcpu, operand2, ar, (void *)mem, PAGE_SIZE);
+ }
if (rc) {
rc = kvm_s390_inject_prog_cond(vcpu, rc);
goto out;
diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
new file mode 100644
index 000000000000..63e330109b63
--- /dev/null
+++ b/arch/s390/kvm/pv.c
@@ -0,0 +1,303 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hosting Protected Virtual Machines
+ *
+ * Copyright IBM Corp. 2019, 2020
+ * Author(s): Janosch Frank <frankja@linux.ibm.com>
+ */
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/pagemap.h>
+#include <linux/sched/signal.h>
+#include <asm/pgalloc.h>
+#include <asm/gmap.h>
+#include <asm/uv.h>
+#include <asm/mman.h>
+#include "kvm-s390.h"
+
+int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
+{
+ int cc = 0;
+
+ if (kvm_s390_pv_cpu_get_handle(vcpu)) {
+ cc = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
+ UVC_CMD_DESTROY_SEC_CPU, rc, rrc);
+
+ KVM_UV_EVENT(vcpu->kvm, 3,
+ "PROTVIRT DESTROY VCPU %d: rc %x rrc %x",
+ vcpu->vcpu_id, *rc, *rrc);
+ WARN_ONCE(cc, "protvirt destroy cpu failed rc %x rrc %x",
+ *rc, *rrc);
+ }
+ /* Intended memory leak for something that should never happen. */
+ if (!cc)
+ free_pages(vcpu->arch.pv.stor_base,
+ get_order(uv_info.guest_cpu_stor_len));
+
+ free_page(sida_origin(vcpu->arch.sie_block));
+ vcpu->arch.sie_block->pv_handle_cpu = 0;
+ vcpu->arch.sie_block->pv_handle_config = 0;
+ memset(&vcpu->arch.pv, 0, sizeof(vcpu->arch.pv));
+ vcpu->arch.sie_block->sdf = 0;
+ /*
+ * The sidad field (for sdf == 2) is now the gbea field (for sdf == 0).
+ * Use the reset value of gbea to avoid leaking the kernel pointer of
+ * the just freed sida.
+ */
+ vcpu->arch.sie_block->gbea = 1;
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+
+ return cc ? EIO : 0;
+}
+
+int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
+{
+ struct uv_cb_csc uvcb = {
+ .header.cmd = UVC_CMD_CREATE_SEC_CPU,
+ .header.len = sizeof(uvcb),
+ };
+ int cc;
+
+ if (kvm_s390_pv_cpu_get_handle(vcpu))
+ return -EINVAL;
+
+ vcpu->arch.pv.stor_base = __get_free_pages(GFP_KERNEL,
+ get_order(uv_info.guest_cpu_stor_len));
+ if (!vcpu->arch.pv.stor_base)
+ return -ENOMEM;
+
+ /* Input */
+ uvcb.guest_handle = kvm_s390_pv_get_handle(vcpu->kvm);
+ uvcb.num = vcpu->arch.sie_block->icpua;
+ uvcb.state_origin = (u64)vcpu->arch.sie_block;
+ uvcb.stor_origin = (u64)vcpu->arch.pv.stor_base;
+
+ /* Alloc Secure Instruction Data Area Designation */
+ vcpu->arch.sie_block->sidad = __get_free_page(GFP_KERNEL | __GFP_ZERO);
+ if (!vcpu->arch.sie_block->sidad) {
+ free_pages(vcpu->arch.pv.stor_base,
+ get_order(uv_info.guest_cpu_stor_len));
+ return -ENOMEM;
+ }
+
+ cc = uv_call(0, (u64)&uvcb);
+ *rc = uvcb.header.rc;
+ *rrc = uvcb.header.rrc;
+ KVM_UV_EVENT(vcpu->kvm, 3,
+ "PROTVIRT CREATE VCPU: cpu %d handle %llx rc %x rrc %x",
+ vcpu->vcpu_id, uvcb.cpu_handle, uvcb.header.rc,
+ uvcb.header.rrc);
+
+ if (cc) {
+ u16 dummy;
+
+ kvm_s390_pv_destroy_cpu(vcpu, &dummy, &dummy);
+ return -EIO;
+ }
+
+ /* Output */
+ vcpu->arch.pv.handle = uvcb.cpu_handle;
+ vcpu->arch.sie_block->pv_handle_cpu = uvcb.cpu_handle;
+ vcpu->arch.sie_block->pv_handle_config = kvm_s390_pv_get_handle(vcpu->kvm);
+ vcpu->arch.sie_block->sdf = 2;
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+ return 0;
+}
+
+/* only free resources when the destroy was successful */
+static void kvm_s390_pv_dealloc_vm(struct kvm *kvm)
+{
+ vfree(kvm->arch.pv.stor_var);
+ free_pages(kvm->arch.pv.stor_base,
+ get_order(uv_info.guest_base_stor_len));
+ memset(&kvm->arch.pv, 0, sizeof(kvm->arch.pv));
+}
+
+static int kvm_s390_pv_alloc_vm(struct kvm *kvm)
+{
+ unsigned long base = uv_info.guest_base_stor_len;
+ unsigned long virt = uv_info.guest_virt_var_stor_len;
+ unsigned long npages = 0, vlen = 0;
+ struct kvm_memory_slot *memslot;
+
+ kvm->arch.pv.stor_var = NULL;
+ kvm->arch.pv.stor_base = __get_free_pages(GFP_KERNEL, get_order(base));
+ if (!kvm->arch.pv.stor_base)
+ return -ENOMEM;
+
+ /*
+ * Calculate current guest storage for allocation of the
+ * variable storage, which is based on the length in MB.
+ *
+ * Slots are sorted by GFN
+ */
+ mutex_lock(&kvm->slots_lock);
+ memslot = kvm_memslots(kvm)->memslots;
+ npages = memslot->base_gfn + memslot->npages;
+ mutex_unlock(&kvm->slots_lock);
+
+ kvm->arch.pv.guest_len = npages * PAGE_SIZE;
+
+ /* Allocate variable storage */
+ vlen = ALIGN(virt * ((npages * PAGE_SIZE) / HPAGE_SIZE), PAGE_SIZE);
+ vlen += uv_info.guest_virt_base_stor_len;
+ kvm->arch.pv.stor_var = vzalloc(vlen);
+ if (!kvm->arch.pv.stor_var)
+ goto out_err;
+ return 0;
+
+out_err:
+ kvm_s390_pv_dealloc_vm(kvm);
+ return -ENOMEM;
+}
+
+/* this should not fail, but if it does, we must not free the donated memory */
+int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
+{
+ int cc;
+
+ /* make all pages accessible before destroying the guest */
+ s390_reset_acc(kvm->mm);
+
+ cc = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
+ UVC_CMD_DESTROY_SEC_CONF, rc, rrc);
+ WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
+ atomic_set(&kvm->mm->context.is_protected, 0);
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM: rc %x rrc %x", *rc, *rrc);
+ WARN_ONCE(cc, "protvirt destroy vm failed rc %x rrc %x", *rc, *rrc);
+ /* Inteded memory leak on "impossible" error */
+ if (!cc)
+ kvm_s390_pv_dealloc_vm(kvm);
+ return cc ? -EIO : 0;
+}
+
+int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
+{
+ struct uv_cb_cgc uvcb = {
+ .header.cmd = UVC_CMD_CREATE_SEC_CONF,
+ .header.len = sizeof(uvcb)
+ };
+ int cc, ret;
+ u16 dummy;
+
+ ret = kvm_s390_pv_alloc_vm(kvm);
+ if (ret)
+ return ret;
+
+ /* Inputs */
+ uvcb.guest_stor_origin = 0; /* MSO is 0 for KVM */
+ uvcb.guest_stor_len = kvm->arch.pv.guest_len;
+ uvcb.guest_asce = kvm->arch.gmap->asce;
+ uvcb.guest_sca = (unsigned long)kvm->arch.sca;
+ uvcb.conf_base_stor_origin = (u64)kvm->arch.pv.stor_base;
+ uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var;
+
+ cc = uv_call(0, (u64)&uvcb);
+ *rc = uvcb.header.rc;
+ *rrc = uvcb.header.rrc;
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x",
+ uvcb.guest_handle, uvcb.guest_stor_len, *rc, *rrc);
+
+ /* Outputs */
+ kvm->arch.pv.handle = uvcb.guest_handle;
+
+ if (cc) {
+ if (uvcb.header.rc & UVC_RC_NEED_DESTROY)
+ kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
+ else
+ kvm_s390_pv_dealloc_vm(kvm);
+ return -EIO;
+ }
+ kvm->arch.gmap->guest_handle = uvcb.guest_handle;
+ atomic_set(&kvm->mm->context.is_protected, 1);
+ return 0;
+}
+
+int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
+ u16 *rrc)
+{
+ struct uv_cb_ssc uvcb = {
+ .header.cmd = UVC_CMD_SET_SEC_CONF_PARAMS,
+ .header.len = sizeof(uvcb),
+ .sec_header_origin = (u64)hdr,
+ .sec_header_len = length,
+ .guest_handle = kvm_s390_pv_get_handle(kvm),
+ };
+ int cc = uv_call(0, (u64)&uvcb);
+
+ *rc = uvcb.header.rc;
+ *rrc = uvcb.header.rrc;
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT VM SET PARMS: rc %x rrc %x",
+ *rc, *rrc);
+ return cc ? -EINVAL : 0;
+}
+
+static int unpack_one(struct kvm *kvm, unsigned long addr, u64 tweak,
+ u64 offset, u16 *rc, u16 *rrc)
+{
+ struct uv_cb_unp uvcb = {
+ .header.cmd = UVC_CMD_UNPACK_IMG,
+ .header.len = sizeof(uvcb),
+ .guest_handle = kvm_s390_pv_get_handle(kvm),
+ .gaddr = addr,
+ .tweak[0] = tweak,
+ .tweak[1] = offset,
+ };
+ int ret = gmap_make_secure(kvm->arch.gmap, addr, &uvcb);
+
+ *rc = uvcb.header.rc;
+ *rrc = uvcb.header.rrc;
+
+ if (ret && ret != -EAGAIN)
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: failed addr %llx with rc %x rrc %x",
+ uvcb.gaddr, *rc, *rrc);
+ return ret;
+}
+
+int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size,
+ unsigned long tweak, u16 *rc, u16 *rrc)
+{
+ u64 offset = 0;
+ int ret = 0;
+
+ if (addr & ~PAGE_MASK || !size || size & ~PAGE_MASK)
+ return -EINVAL;
+
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: start addr %lx size %lx",
+ addr, size);
+
+ while (offset < size) {
+ ret = unpack_one(kvm, addr, tweak, offset, rc, rrc);
+ if (ret == -EAGAIN) {
+ cond_resched();
+ if (fatal_signal_pending(current))
+ break;
+ continue;
+ }
+ if (ret)
+ break;
+ addr += PAGE_SIZE;
+ offset += PAGE_SIZE;
+ }
+ if (!ret)
+ KVM_UV_EVENT(kvm, 3, "%s", "PROTVIRT VM UNPACK: successful");
+ return ret;
+}
+
+int kvm_s390_pv_set_cpu_state(struct kvm_vcpu *vcpu, u8 state)
+{
+ struct uv_cb_cpu_set_state uvcb = {
+ .header.cmd = UVC_CMD_CPU_SET_STATE,
+ .header.len = sizeof(uvcb),
+ .cpu_handle = kvm_s390_pv_cpu_get_handle(vcpu),
+ .state = state,
+ };
+ int cc;
+
+ cc = uv_call(0, (u64)&uvcb);
+ KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT SET CPU %d STATE %d rc %x rrc %x",
+ vcpu->vcpu_id, state, uvcb.header.rc, uvcb.header.rrc);
+ if (cc)
+ return -EINVAL;
+ return 0;
+}
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index aeccdb30899a..09bf7f2121ac 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -38,6 +38,7 @@
#include <asm/irq.h>
#include <asm/mmu_context.h>
#include <asm/facility.h>
+#include <asm/uv.h>
#include "../kernel/entry.h"
#define __FAIL_ADDR_MASK -4096L
@@ -812,3 +813,80 @@ out_extint:
early_initcall(pfault_irq_init);
#endif /* CONFIG_PFAULT */
+
+#if IS_ENABLED(CONFIG_PGSTE)
+void do_secure_storage_access(struct pt_regs *regs)
+{
+ unsigned long addr = regs->int_parm_long & __FAIL_ADDR_MASK;
+ struct vm_area_struct *vma;
+ struct mm_struct *mm;
+ struct page *page;
+ int rc;
+
+ switch (get_fault_type(regs)) {
+ case USER_FAULT:
+ mm = current->mm;
+ down_read(&mm->mmap_sem);
+ vma = find_vma(mm, addr);
+ if (!vma) {
+ up_read(&mm->mmap_sem);
+ do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP);
+ break;
+ }
+ page = follow_page(vma, addr, FOLL_WRITE | FOLL_GET);
+ if (IS_ERR_OR_NULL(page)) {
+ up_read(&mm->mmap_sem);
+ break;
+ }
+ if (arch_make_page_accessible(page))
+ send_sig(SIGSEGV, current, 0);
+ put_page(page);
+ up_read(&mm->mmap_sem);
+ break;
+ case KERNEL_FAULT:
+ page = phys_to_page(addr);
+ if (unlikely(!try_get_page(page)))
+ break;
+ rc = arch_make_page_accessible(page);
+ put_page(page);
+ if (rc)
+ BUG();
+ break;
+ case VDSO_FAULT:
+ /* fallthrough */
+ case GMAP_FAULT:
+ /* fallthrough */
+ default:
+ do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP);
+ WARN_ON_ONCE(1);
+ }
+}
+NOKPROBE_SYMBOL(do_secure_storage_access);
+
+void do_non_secure_storage_access(struct pt_regs *regs)
+{
+ unsigned long gaddr = regs->int_parm_long & __FAIL_ADDR_MASK;
+ struct gmap *gmap = (struct gmap *)S390_lowcore.gmap;
+
+ if (get_fault_type(regs) != GMAP_FAULT) {
+ do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP);
+ WARN_ON_ONCE(1);
+ return;
+ }
+
+ if (gmap_convert_to_secure(gmap, gaddr) == -EINVAL)
+ send_sig(SIGSEGV, current, 0);
+}
+NOKPROBE_SYMBOL(do_non_secure_storage_access);
+
+#else
+void do_secure_storage_access(struct pt_regs *regs)
+{
+ default_trap_handler(regs);
+}
+
+void do_non_secure_storage_access(struct pt_regs *regs)
+{
+ default_trap_handler(regs);
+}
+#endif
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index edcdca97e85e..2fbece47ef6f 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -804,7 +804,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
if (*table & _REGION_ENTRY_INVALID)
return NULL;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- /* Fallthrough */
+ fallthrough;
case _ASCE_TYPE_REGION2:
table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT;
if (level == 3)
@@ -812,7 +812,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
if (*table & _REGION_ENTRY_INVALID)
return NULL;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- /* Fallthrough */
+ fallthrough;
case _ASCE_TYPE_REGION3:
table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT;
if (level == 2)
@@ -820,7 +820,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
if (*table & _REGION_ENTRY_INVALID)
return NULL;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- /* Fallthrough */
+ fallthrough;
case _ASCE_TYPE_SEGMENT:
table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
if (level == 1)
@@ -2548,6 +2548,23 @@ int s390_enable_sie(void)
}
EXPORT_SYMBOL_GPL(s390_enable_sie);
+int gmap_mark_unmergeable(void)
+{
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ int ret;
+
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
+ MADV_UNMERGEABLE, &vma->vm_flags);
+ if (ret)
+ return ret;
+ }
+ mm->def_flags &= ~VM_MERGEABLE;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(gmap_mark_unmergeable);
+
/*
* Enable storage key handling from now on and initialize the storage
* keys with the default key.
@@ -2593,7 +2610,6 @@ static const struct mm_walk_ops enable_skey_walk_ops = {
int s390_enable_skey(void)
{
struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
int rc = 0;
down_write(&mm->mmap_sem);
@@ -2601,16 +2617,11 @@ int s390_enable_skey(void)
goto out_up;
mm->context.uses_skeys = 1;
- for (vma = mm->mmap; vma; vma = vma->vm_next) {
- if (ksm_madvise(vma, vma->vm_start, vma->vm_end,
- MADV_UNMERGEABLE, &vma->vm_flags)) {
- mm->context.uses_skeys = 0;
- rc = -ENOMEM;
- goto out_up;
- }
+ rc = gmap_mark_unmergeable();
+ if (rc) {
+ mm->context.uses_skeys = 0;
+ goto out_up;
}
- mm->def_flags &= ~VM_MERGEABLE;
-
walk_page_range(mm, 0, TASK_SIZE, &enable_skey_walk_ops, NULL);
out_up:
@@ -2640,3 +2651,38 @@ void s390_reset_cmma(struct mm_struct *mm)
up_write(&mm->mmap_sem);
}
EXPORT_SYMBOL_GPL(s390_reset_cmma);
+
+/*
+ * make inaccessible pages accessible again
+ */
+static int __s390_reset_acc(pte_t *ptep, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ pte_t pte = READ_ONCE(*ptep);
+
+ if (pte_present(pte))
+ WARN_ON_ONCE(uv_convert_from_secure(pte_val(pte) & PAGE_MASK));
+ return 0;
+}
+
+static const struct mm_walk_ops reset_acc_walk_ops = {
+ .pte_entry = __s390_reset_acc,
+};
+
+#include <linux/sched/mm.h>
+void s390_reset_acc(struct mm_struct *mm)
+{
+ /*
+ * we might be called during
+ * reset: we walk the pages and clear
+ * close of all kvm file descriptors: we walk the pages and clear
+ * exit of process on fd closure: vma already gone, do nothing
+ */
+ if (!mmget_not_zero(mm))
+ return;
+ down_read(&mm->mmap_sem);
+ walk_page_range(mm, 0, TASK_SIZE, &reset_acc_walk_ops, NULL);
+ up_read(&mm->mmap_sem);
+ mmput(mm);
+}
+EXPORT_SYMBOL_GPL(s390_reset_acc);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 98959e8cd448..42a2d0d3984a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -49,13 +49,16 @@
#define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS
+#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
+ KVM_DIRTY_LOG_INITIALLY_SET)
+
/* x86-specific vcpu->requests bit members */
#define KVM_REQ_MIGRATE_TIMER KVM_ARCH_REQ(0)
#define KVM_REQ_REPORT_TPR_ACCESS KVM_ARCH_REQ(1)
#define KVM_REQ_TRIPLE_FAULT KVM_ARCH_REQ(2)
#define KVM_REQ_MMU_SYNC KVM_ARCH_REQ(3)
#define KVM_REQ_CLOCK_UPDATE KVM_ARCH_REQ(4)
-#define KVM_REQ_LOAD_CR3 KVM_ARCH_REQ(5)
+#define KVM_REQ_LOAD_MMU_PGD KVM_ARCH_REQ(5)
#define KVM_REQ_EVENT KVM_ARCH_REQ(6)
#define KVM_REQ_APF_HALT KVM_ARCH_REQ(7)
#define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(8)
@@ -182,7 +185,10 @@ enum exit_fastpath_completion {
EXIT_FASTPATH_SKIP_EMUL_INS,
};
-#include <asm/kvm_emulate.h>
+struct x86_emulate_ctxt;
+struct x86_exception;
+enum x86_intercept;
+enum x86_intercept_stage;
#define KVM_NR_MEM_OBJS 40
@@ -297,7 +303,6 @@ union kvm_mmu_extended_role {
unsigned int cr4_pke:1;
unsigned int cr4_smap:1;
unsigned int cr4_smep:1;
- unsigned int cr4_la57:1;
unsigned int maxphyaddr:6;
};
};
@@ -382,8 +387,7 @@ struct kvm_mmu_root_info {
* current mmu mode.
*/
struct kvm_mmu {
- void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root);
- unsigned long (*get_cr3)(struct kvm_vcpu *vcpu);
+ unsigned long (*get_guest_pgd)(struct kvm_vcpu *vcpu);
u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index);
int (*page_fault)(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u32 err,
bool prefault);
@@ -678,7 +682,7 @@ struct kvm_vcpu_arch {
/* emulate context */
- struct x86_emulate_ctxt emulate_ctxt;
+ struct x86_emulate_ctxt *emulate_ctxt;
bool emulate_regs_need_sync_to_vcpu;
bool emulate_regs_need_sync_from_vcpu;
int (*complete_userspace_io)(struct kvm_vcpu *vcpu);
@@ -808,10 +812,6 @@ struct kvm_vcpu_arch {
int pending_ioapic_eoi;
int pending_external_vector;
- /* GPA available */
- bool gpa_available;
- gpa_t gpa_val;
-
/* be preempted when it's in kernel-mode(cpl=0) */
bool preempted_in_kernel;
@@ -890,6 +890,7 @@ enum kvm_irqchip_mode {
#define APICV_INHIBIT_REASON_NESTED 2
#define APICV_INHIBIT_REASON_IRQWIN 3
#define APICV_INHIBIT_REASON_PIT_REINJ 4
+#define APICV_INHIBIT_REASON_X2APIC 5
struct kvm_arch {
unsigned long n_used_mmu_pages;
@@ -920,6 +921,7 @@ struct kvm_arch {
atomic_t vapics_in_nmi_mode;
struct mutex apic_map_lock;
struct kvm_apic_map *apic_map;
+ bool apic_map_dirty;
bool apic_access_page_done;
unsigned long apicv_inhibit_reasons;
@@ -1052,19 +1054,14 @@ static inline u16 kvm_lapic_irq_dest_mode(bool dest_mode_logical)
}
struct kvm_x86_ops {
- int (*cpu_has_kvm_support)(void); /* __init */
- int (*disabled_by_bios)(void); /* __init */
int (*hardware_enable)(void);
void (*hardware_disable)(void);
- int (*check_processor_compatibility)(void);/* __init */
- int (*hardware_setup)(void); /* __init */
- void (*hardware_unsetup)(void); /* __exit */
+ void (*hardware_unsetup)(void);
bool (*cpu_has_accelerated_tpr)(void);
bool (*has_emulated_msr)(int index);
void (*cpuid_update)(struct kvm_vcpu *vcpu);
- struct kvm *(*vm_alloc)(void);
- void (*vm_free)(struct kvm *);
+ unsigned int vm_size;
int (*vm_init)(struct kvm *kvm);
void (*vm_destroy)(struct kvm *kvm);
@@ -1090,7 +1087,6 @@ struct kvm_x86_ops {
void (*decache_cr0_guest_bits)(struct kvm_vcpu *vcpu);
void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu);
void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
- void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
int (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer);
void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
@@ -1153,13 +1149,8 @@ struct kvm_x86_ops {
int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr);
int (*get_tdp_level)(struct kvm_vcpu *vcpu);
u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
- int (*get_lpage_level)(void);
- bool (*rdtscp_supported)(void);
- bool (*invpcid_supported)(void);
-
- void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
- void (*set_supported_cpuid)(u32 func, struct kvm_cpuid_entry2 *entry);
+ void (*load_mmu_pgd)(struct kvm_vcpu *vcpu, unsigned long cr3);
bool (*has_wbinvd_exit)(void);
@@ -1171,16 +1162,12 @@ struct kvm_x86_ops {
int (*check_intercept)(struct kvm_vcpu *vcpu,
struct x86_instruction_info *info,
- enum x86_intercept_stage stage);
+ enum x86_intercept_stage stage,
+ struct x86_exception *exception);
void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu,
enum exit_fastpath_completion *exit_fastpath);
- bool (*mpx_supported)(void);
- bool (*xsaves_supported)(void);
- bool (*umip_emulated)(void);
- bool (*pt_supported)(void);
- bool (*pku_supported)(void);
- int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
+ int (*check_nested_events)(struct kvm_vcpu *vcpu);
void (*request_immediate_exit)(struct kvm_vcpu *vcpu);
void (*sched_in)(struct kvm_vcpu *kvm, int cpu);
@@ -1269,6 +1256,15 @@ struct kvm_x86_ops {
int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
};
+struct kvm_x86_init_ops {
+ int (*cpu_has_kvm_support)(void);
+ int (*disabled_by_bios)(void);
+ int (*check_processor_compatibility)(void);
+ int (*hardware_setup)(void);
+
+ struct kvm_x86_ops *runtime_ops;
+};
+
struct kvm_arch_async_pf {
u32 token;
gfn_t gfn;
@@ -1276,25 +1272,24 @@ struct kvm_arch_async_pf {
bool direct_map;
};
-extern struct kvm_x86_ops *kvm_x86_ops;
+extern u64 __read_mostly host_efer;
+
+extern struct kvm_x86_ops kvm_x86_ops;
extern struct kmem_cache *x86_fpu_cache;
#define __KVM_HAVE_ARCH_VM_ALLOC
static inline struct kvm *kvm_arch_alloc_vm(void)
{
- return kvm_x86_ops->vm_alloc();
-}
-
-static inline void kvm_arch_free_vm(struct kvm *kvm)
-{
- return kvm_x86_ops->vm_free(kvm);
+ return __vmalloc(kvm_x86_ops.vm_size,
+ GFP_KERNEL_ACCOUNT | __GFP_ZERO, PAGE_KERNEL);
}
+void kvm_arch_free_vm(struct kvm *kvm);
#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
{
- if (kvm_x86_ops->tlb_remote_flush &&
- !kvm_x86_ops->tlb_remote_flush(kvm))
+ if (kvm_x86_ops.tlb_remote_flush &&
+ !kvm_x86_ops.tlb_remote_flush(kvm))
return 0;
else
return -ENOTSUPP;
@@ -1313,7 +1308,8 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
- struct kvm_memory_slot *memslot);
+ struct kvm_memory_slot *memslot,
+ int start_level);
void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
const struct kvm_memory_slot *memslot);
void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
@@ -1379,10 +1375,11 @@ extern u64 kvm_mce_cap_supported;
*
* EMULTYPE_SKIP - Set when emulating solely to skip an instruction, i.e. to
* decode the instruction length. For use *only* by
- * kvm_x86_ops->skip_emulated_instruction() implementations.
+ * kvm_x86_ops.skip_emulated_instruction() implementations.
*
- * EMULTYPE_ALLOW_RETRY - Set when the emulator should resume the guest to
- * retry native execution under certain conditions.
+ * EMULTYPE_ALLOW_RETRY_PF - Set when the emulator should resume the guest to
+ * retry native execution under certain conditions,
+ * Can only be set in conjunction with EMULTYPE_PF.
*
* EMULTYPE_TRAP_UD_FORCED - Set when emulating an intercepted #UD that was
* triggered by KVM's magic "force emulation" prefix,
@@ -1395,13 +1392,18 @@ extern u64 kvm_mce_cap_supported;
* backdoor emulation, which is opt in via module param.
* VMware backoor emulation handles select instructions
* and reinjects the #GP for all other cases.
+ *
+ * EMULTYPE_PF - Set when emulating MMIO by way of an intercepted #PF, in which
+ * case the CR2/GPA value pass on the stack is valid.
*/
#define EMULTYPE_NO_DECODE (1 << 0)
#define EMULTYPE_TRAP_UD (1 << 1)
#define EMULTYPE_SKIP (1 << 2)
-#define EMULTYPE_ALLOW_RETRY (1 << 3)
+#define EMULTYPE_ALLOW_RETRY_PF (1 << 3)
#define EMULTYPE_TRAP_UD_FORCED (1 << 4)
#define EMULTYPE_VMWARE_GP (1 << 5)
+#define EMULTYPE_PF (1 << 6)
+
int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type);
int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
void *insn, int insn_len);
@@ -1414,8 +1416,6 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data);
int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu);
int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu);
-struct x86_emulate_ctxt;
-
int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in);
int kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
int kvm_emulate_halt(struct kvm_vcpu *vcpu);
@@ -1512,8 +1512,7 @@ void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid);
void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, bool skip_tlb_flush);
-void kvm_enable_tdp(void);
-void kvm_disable_tdp(void);
+void kvm_configure_mmu(bool enable_tdp, int tdp_page_level);
static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
struct x86_exception *exception)
@@ -1670,14 +1669,14 @@ static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq)
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
{
- if (kvm_x86_ops->vcpu_blocking)
- kvm_x86_ops->vcpu_blocking(vcpu);
+ if (kvm_x86_ops.vcpu_blocking)
+ kvm_x86_ops.vcpu_blocking(vcpu);
}
static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
{
- if (kvm_x86_ops->vcpu_unblocking)
- kvm_x86_ops->vcpu_unblocking(vcpu);
+ if (kvm_x86_ops.vcpu_unblocking)
+ kvm_x86_ops.vcpu_unblocking(vcpu);
}
static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h
index 172f9749dbb2..87bd6025d91d 100644
--- a/arch/x86/include/asm/kvm_page_track.h
+++ b/arch/x86/include/asm/kvm_page_track.h
@@ -49,8 +49,7 @@ struct kvm_page_track_notifier_node {
void kvm_page_track_init(struct kvm *kvm);
void kvm_page_track_cleanup(struct kvm *kvm);
-void kvm_page_track_free_memslot(struct kvm_memory_slot *free,
- struct kvm_memory_slot *dont);
+void kvm_page_track_free_memslot(struct kvm_memory_slot *slot);
int kvm_page_track_create_memslot(struct kvm_memory_slot *slot,
unsigned long npages);
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 8521af3fef27..5e090d1f03f8 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -500,6 +500,18 @@ enum vmcs_field {
VMX_EPT_EXECUTABLE_MASK)
#define VMX_EPT_MT_MASK (7ull << VMX_EPT_MT_EPTE_SHIFT)
+static inline u8 vmx_eptp_page_walk_level(u64 eptp)
+{
+ u64 encoded_level = eptp & VMX_EPTP_PWL_MASK;
+
+ if (encoded_level == VMX_EPTP_PWL_5)
+ return 5;
+
+ /* @eptp must be pre-validated by the caller. */
+ WARN_ON_ONCE(encoded_level != VMX_EPTP_PWL_4);
+ return 4;
+}
+
/* The mask to use to trigger an EPT Misconfiguration in order to track MMIO */
#define VMX_EPT_MISCONFIG_WX_VALUE (VMX_EPT_WRITABLE_MASK | \
VMX_EPT_EXECUTABLE_MASK)
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index b1c469446b07..901cd1fdecd9 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -24,6 +24,13 @@
#include "trace.h"
#include "pmu.h"
+/*
+ * Unlike "struct cpuinfo_x86.x86_capability", kvm_cpu_caps doesn't need to be
+ * aligned to sizeof(unsigned long) because it's not accessed via bitops.
+ */
+u32 kvm_cpu_caps[NCAPINTS] __read_mostly;
+EXPORT_SYMBOL_GPL(kvm_cpu_caps);
+
static u32 xstate_required_size(u64 xstate_bv, bool compacted)
{
int feature_bit = 0;
@@ -45,23 +52,6 @@ static u32 xstate_required_size(u64 xstate_bv, bool compacted)
return ret;
}
-bool kvm_mpx_supported(void)
-{
- return ((host_xcr0 & (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR))
- && kvm_x86_ops->mpx_supported());
-}
-EXPORT_SYMBOL_GPL(kvm_mpx_supported);
-
-u64 kvm_supported_xcr0(void)
-{
- u64 xcr0 = KVM_SUPPORTED_XCR0 & host_xcr0;
-
- if (!kvm_mpx_supported())
- xcr0 &= ~(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR);
-
- return xcr0;
-}
-
#define F feature_bit
int kvm_update_cpuid(struct kvm_vcpu *vcpu)
@@ -74,32 +64,24 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
return 0;
/* Update OSXSAVE bit */
- if (boot_cpu_has(X86_FEATURE_XSAVE) && best->function == 0x1) {
- best->ecx &= ~F(OSXSAVE);
- if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE))
- best->ecx |= F(OSXSAVE);
- }
+ if (boot_cpu_has(X86_FEATURE_XSAVE) && best->function == 0x1)
+ cpuid_entry_change(best, X86_FEATURE_OSXSAVE,
+ kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE));
- best->edx &= ~F(APIC);
- if (vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE)
- best->edx |= F(APIC);
+ cpuid_entry_change(best, X86_FEATURE_APIC,
+ vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
if (apic) {
- if (best->ecx & F(TSC_DEADLINE_TIMER))
+ if (cpuid_entry_has(best, X86_FEATURE_TSC_DEADLINE_TIMER))
apic->lapic_timer.timer_mode_mask = 3 << 17;
else
apic->lapic_timer.timer_mode_mask = 1 << 17;
}
best = kvm_find_cpuid_entry(vcpu, 7, 0);
- if (best) {
- /* Update OSPKE bit */
- if (boot_cpu_has(X86_FEATURE_PKU) && best->function == 0x7) {
- best->ecx &= ~F(OSPKE);
- if (kvm_read_cr4_bits(vcpu, X86_CR4_PKE))
- best->ecx |= F(OSPKE);
- }
- }
+ if (best && boot_cpu_has(X86_FEATURE_PKU) && best->function == 0x7)
+ cpuid_entry_change(best, X86_FEATURE_OSPKE,
+ kvm_read_cr4_bits(vcpu, X86_CR4_PKE));
best = kvm_find_cpuid_entry(vcpu, 0xD, 0);
if (!best) {
@@ -107,14 +89,14 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
} else {
vcpu->arch.guest_supported_xcr0 =
- (best->eax | ((u64)best->edx << 32)) &
- kvm_supported_xcr0();
+ (best->eax | ((u64)best->edx << 32)) & supported_xcr0;
vcpu->arch.guest_xstate_size = best->ebx =
xstate_required_size(vcpu->arch.xcr0, false);
}
best = kvm_find_cpuid_entry(vcpu, 0xD, 1);
- if (best && (best->eax & (F(XSAVES) | F(XSAVEC))))
+ if (best && (cpuid_entry_has(best, X86_FEATURE_XSAVES) ||
+ cpuid_entry_has(best, X86_FEATURE_XSAVEC)))
best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
/*
@@ -136,12 +118,10 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) {
best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
- if (best) {
- if (vcpu->arch.ia32_misc_enable_msr & MSR_IA32_MISC_ENABLE_MWAIT)
- best->ecx |= F(MWAIT);
- else
- best->ecx &= ~F(MWAIT);
- }
+ if (best)
+ cpuid_entry_change(best, X86_FEATURE_MWAIT,
+ vcpu->arch.ia32_misc_enable_msr &
+ MSR_IA32_MISC_ENABLE_MWAIT);
}
/* Update physical-address width */
@@ -154,10 +134,7 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
static int is_efer_nx(void)
{
- unsigned long long efer = 0;
-
- rdmsrl_safe(MSR_EFER, &efer);
- return efer & EFER_NX;
+ return host_efer & EFER_NX;
}
static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
@@ -173,8 +150,8 @@ static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
break;
}
}
- if (entry && (entry->edx & F(NX)) && !is_efer_nx()) {
- entry->edx &= ~F(NX);
+ if (entry && cpuid_entry_has(entry, X86_FEATURE_NX) && !is_efer_nx()) {
+ cpuid_entry_clear(entry, X86_FEATURE_NX);
printk(KERN_INFO "kvm: guest NX capability removed\n");
}
}
@@ -232,7 +209,7 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
vcpu->arch.cpuid_nent = cpuid->nent;
cpuid_fix_nx_cap(vcpu);
kvm_apic_set_version(vcpu);
- kvm_x86_ops->cpuid_update(vcpu);
+ kvm_x86_ops.cpuid_update(vcpu);
r = kvm_update_cpuid(vcpu);
out:
@@ -255,7 +232,7 @@ int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
goto out;
vcpu->arch.cpuid_nent = cpuid->nent;
kvm_apic_set_version(vcpu);
- kvm_x86_ops->cpuid_update(vcpu);
+ kvm_x86_ops.cpuid_update(vcpu);
r = kvm_update_cpuid(vcpu);
out:
return r;
@@ -281,15 +258,189 @@ out:
return r;
}
-static __always_inline void cpuid_mask(u32 *word, int wordnum)
+static __always_inline void kvm_cpu_cap_mask(enum cpuid_leafs leaf, u32 mask)
{
- reverse_cpuid_check(wordnum);
- *word &= boot_cpu_data.x86_capability[wordnum];
+ const struct cpuid_reg cpuid = x86_feature_cpuid(leaf * 32);
+ struct kvm_cpuid_entry2 entry;
+
+ reverse_cpuid_check(leaf);
+ kvm_cpu_caps[leaf] &= mask;
+
+ cpuid_count(cpuid.function, cpuid.index,
+ &entry.eax, &entry.ebx, &entry.ecx, &entry.edx);
+
+ kvm_cpu_caps[leaf] &= *__cpuid_entry_get_reg(&entry, cpuid.reg);
+}
+
+void kvm_set_cpu_caps(void)
+{
+ unsigned int f_nx = is_efer_nx() ? F(NX) : 0;
+#ifdef CONFIG_X86_64
+ unsigned int f_gbpages = F(GBPAGES);
+ unsigned int f_lm = F(LM);
+#else
+ unsigned int f_gbpages = 0;
+ unsigned int f_lm = 0;
+#endif
+
+ BUILD_BUG_ON(sizeof(kvm_cpu_caps) >
+ sizeof(boot_cpu_data.x86_capability));
+
+ memcpy(&kvm_cpu_caps, &boot_cpu_data.x86_capability,
+ sizeof(kvm_cpu_caps));
+
+ kvm_cpu_cap_mask(CPUID_1_ECX,
+ /*
+ * NOTE: MONITOR (and MWAIT) are emulated as NOP, but *not*
+ * advertised to guests via CPUID!
+ */
+ F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ |
+ 0 /* DS-CPL, VMX, SMX, EST */ |
+ 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
+ F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ |
+ F(PCID) | 0 /* Reserved, DCA */ | F(XMM4_1) |
+ F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
+ 0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) |
+ F(F16C) | F(RDRAND)
+ );
+ /* KVM emulates x2apic in software irrespective of host support. */
+ kvm_cpu_cap_set(X86_FEATURE_X2APIC);
+
+ kvm_cpu_cap_mask(CPUID_1_EDX,
+ F(FPU) | F(VME) | F(DE) | F(PSE) |
+ F(TSC) | F(MSR) | F(PAE) | F(MCE) |
+ F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) |
+ F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
+ F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLUSH) |
+ 0 /* Reserved, DS, ACPI */ | F(MMX) |
+ F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) |
+ 0 /* HTT, TM, Reserved, PBE */
+ );
+
+ kvm_cpu_cap_mask(CPUID_7_0_EBX,
+ F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
+ F(BMI2) | F(ERMS) | 0 /*INVPCID*/ | F(RTM) | 0 /*MPX*/ | F(RDSEED) |
+ F(ADX) | F(SMAP) | F(AVX512IFMA) | F(AVX512F) | F(AVX512PF) |
+ F(AVX512ER) | F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) |
+ F(SHA_NI) | F(AVX512BW) | F(AVX512VL) | 0 /*INTEL_PT*/
+ );
+
+ kvm_cpu_cap_mask(CPUID_7_ECX,
+ F(AVX512VBMI) | F(LA57) | 0 /*PKU*/ | 0 /*OSPKE*/ | F(RDPID) |
+ F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
+ F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
+ F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/
+ );
+ /* Set LA57 based on hardware capability. */
+ if (cpuid_ecx(7) & F(LA57))
+ kvm_cpu_cap_set(X86_FEATURE_LA57);
+
+ kvm_cpu_cap_mask(CPUID_7_EDX,
+ F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
+ F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
+ F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM)
+ );
+
+ /* TSC_ADJUST and ARCH_CAPABILITIES are emulated in software. */
+ kvm_cpu_cap_set(X86_FEATURE_TSC_ADJUST);
+ kvm_cpu_cap_set(X86_FEATURE_ARCH_CAPABILITIES);
+
+ if (boot_cpu_has(X86_FEATURE_IBPB) && boot_cpu_has(X86_FEATURE_IBRS))
+ kvm_cpu_cap_set(X86_FEATURE_SPEC_CTRL);
+ if (boot_cpu_has(X86_FEATURE_STIBP))
+ kvm_cpu_cap_set(X86_FEATURE_INTEL_STIBP);
+ if (boot_cpu_has(X86_FEATURE_AMD_SSBD))
+ kvm_cpu_cap_set(X86_FEATURE_SPEC_CTRL_SSBD);
+
+ kvm_cpu_cap_mask(CPUID_7_1_EAX,
+ F(AVX512_BF16)
+ );
+
+ kvm_cpu_cap_mask(CPUID_D_1_EAX,
+ F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | F(XSAVES)
+ );
+
+ kvm_cpu_cap_mask(CPUID_8000_0001_ECX,
+ F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ |
+ F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
+ F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
+ 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM) |
+ F(TOPOEXT) | F(PERFCTR_CORE)
+ );
+
+ kvm_cpu_cap_mask(CPUID_8000_0001_EDX,
+ F(FPU) | F(VME) | F(DE) | F(PSE) |
+ F(TSC) | F(MSR) | F(PAE) | F(MCE) |
+ F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) |
+ F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
+ F(PAT) | F(PSE36) | 0 /* Reserved */ |
+ f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
+ F(FXSR) | F(FXSR_OPT) | f_gbpages | F(RDTSCP) |
+ 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW)
+ );
+
+ if (!tdp_enabled && IS_ENABLED(CONFIG_X86_64))
+ kvm_cpu_cap_set(X86_FEATURE_GBPAGES);
+
+ kvm_cpu_cap_mask(CPUID_8000_0008_EBX,
+ F(CLZERO) | F(XSAVEERPTR) |
+ F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |
+ F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON)
+ );
+
+ /*
+ * AMD has separate bits for each SPEC_CTRL bit.
+ * arch/x86/kernel/cpu/bugs.c is kind enough to
+ * record that in cpufeatures so use them.
+ */
+ if (boot_cpu_has(X86_FEATURE_IBPB))
+ kvm_cpu_cap_set(X86_FEATURE_AMD_IBPB);
+ if (boot_cpu_has(X86_FEATURE_IBRS))
+ kvm_cpu_cap_set(X86_FEATURE_AMD_IBRS);
+ if (boot_cpu_has(X86_FEATURE_STIBP))
+ kvm_cpu_cap_set(X86_FEATURE_AMD_STIBP);
+ if (boot_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD))
+ kvm_cpu_cap_set(X86_FEATURE_AMD_SSBD);
+ if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
+ kvm_cpu_cap_set(X86_FEATURE_AMD_SSB_NO);
+ /*
+ * The preference is to use SPEC CTRL MSR instead of the
+ * VIRT_SPEC MSR.
+ */
+ if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) &&
+ !boot_cpu_has(X86_FEATURE_AMD_SSBD))
+ kvm_cpu_cap_set(X86_FEATURE_VIRT_SSBD);
+
+ /*
+ * Hide all SVM features by default, SVM will set the cap bits for
+ * features it emulates and/or exposes for L1.
+ */
+ kvm_cpu_cap_mask(CPUID_8000_000A_EDX, 0);
+
+ kvm_cpu_cap_mask(CPUID_C000_0001_EDX,
+ F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) |
+ F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
+ F(PMM) | F(PMM_EN)
+ );
}
+EXPORT_SYMBOL_GPL(kvm_set_cpu_caps);
-static void do_host_cpuid(struct kvm_cpuid_entry2 *entry, u32 function,
- u32 index)
+struct kvm_cpuid_array {
+ struct kvm_cpuid_entry2 *entries;
+ const int maxnent;
+ int nent;
+};
+
+static struct kvm_cpuid_entry2 *do_host_cpuid(struct kvm_cpuid_array *array,
+ u32 function, u32 index)
{
+ struct kvm_cpuid_entry2 *entry;
+
+ if (array->nent >= array->maxnent)
+ return NULL;
+
+ entry = &array->entries[array->nent++];
+
entry->function = function;
entry->index = index;
entry->flags = 0;
@@ -298,9 +449,6 @@ static void do_host_cpuid(struct kvm_cpuid_entry2 *entry, u32 function,
&entry->eax, &entry->ebx, &entry->ecx, &entry->edx);
switch (function) {
- case 2:
- entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
- break;
case 4:
case 7:
case 0xb:
@@ -316,11 +464,18 @@ static void do_host_cpuid(struct kvm_cpuid_entry2 *entry, u32 function,
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
break;
}
+
+ return entry;
}
-static int __do_cpuid_func_emulated(struct kvm_cpuid_entry2 *entry,
- u32 func, int *nent, int maxnent)
+static int __do_cpuid_func_emulated(struct kvm_cpuid_array *array, u32 func)
{
+ struct kvm_cpuid_entry2 *entry;
+
+ if (array->nent >= array->maxnent)
+ return -E2BIG;
+
+ entry = &array->entries[array->nent];
entry->function = func;
entry->index = 0;
entry->flags = 0;
@@ -328,17 +483,17 @@ static int __do_cpuid_func_emulated(struct kvm_cpuid_entry2 *entry,
switch (func) {
case 0:
entry->eax = 7;
- ++*nent;
+ ++array->nent;
break;
case 1:
entry->ecx = F(MOVBE);
- ++*nent;
+ ++array->nent;
break;
case 7:
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
entry->eax = 0;
entry->ecx = F(RDPID);
- ++*nent;
+ ++array->nent;
default:
break;
}
@@ -346,223 +501,60 @@ static int __do_cpuid_func_emulated(struct kvm_cpuid_entry2 *entry,
return 0;
}
-static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index)
+static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
{
- unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0;
- unsigned f_mpx = kvm_mpx_supported() ? F(MPX) : 0;
- unsigned f_umip = kvm_x86_ops->umip_emulated() ? F(UMIP) : 0;
- unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0;
- unsigned f_la57;
- unsigned f_pku = kvm_x86_ops->pku_supported() ? F(PKU) : 0;
-
- /* cpuid 7.0.ebx */
- const u32 kvm_cpuid_7_0_ebx_x86_features =
- F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
- F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
- F(ADX) | F(SMAP) | F(AVX512IFMA) | F(AVX512F) | F(AVX512PF) |
- F(AVX512ER) | F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) |
- F(SHA_NI) | F(AVX512BW) | F(AVX512VL) | f_intel_pt;
-
- /* cpuid 7.0.ecx*/
- const u32 kvm_cpuid_7_0_ecx_x86_features =
- F(AVX512VBMI) | F(LA57) | 0 /*PKU*/ | 0 /*OSPKE*/ | F(RDPID) |
- F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
- F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
- F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/;
-
- /* cpuid 7.0.edx*/
- const u32 kvm_cpuid_7_0_edx_x86_features =
- F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
- F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
- F(MD_CLEAR);
-
- /* cpuid 7.1.eax */
- const u32 kvm_cpuid_7_1_eax_x86_features =
- F(AVX512_BF16);
-
- switch (index) {
- case 0:
- entry->eax = min(entry->eax, 1u);
- entry->ebx &= kvm_cpuid_7_0_ebx_x86_features;
- cpuid_mask(&entry->ebx, CPUID_7_0_EBX);
- /* TSC_ADJUST is emulated */
- entry->ebx |= F(TSC_ADJUST);
-
- entry->ecx &= kvm_cpuid_7_0_ecx_x86_features;
- f_la57 = entry->ecx & F(LA57);
- cpuid_mask(&entry->ecx, CPUID_7_ECX);
- /* Set LA57 based on hardware capability. */
- entry->ecx |= f_la57;
- entry->ecx |= f_umip;
- entry->ecx |= f_pku;
- /* PKU is not yet implemented for shadow paging. */
- if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
- entry->ecx &= ~F(PKU);
-
- entry->edx &= kvm_cpuid_7_0_edx_x86_features;
- cpuid_mask(&entry->edx, CPUID_7_EDX);
- if (boot_cpu_has(X86_FEATURE_IBPB) && boot_cpu_has(X86_FEATURE_IBRS))
- entry->edx |= F(SPEC_CTRL);
- if (boot_cpu_has(X86_FEATURE_STIBP))
- entry->edx |= F(INTEL_STIBP);
- if (boot_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
- boot_cpu_has(X86_FEATURE_AMD_SSBD))
- entry->edx |= F(SPEC_CTRL_SSBD);
- /*
- * We emulate ARCH_CAPABILITIES in software even
- * if the host doesn't support it.
- */
- entry->edx |= F(ARCH_CAPABILITIES);
- break;
- case 1:
- entry->eax &= kvm_cpuid_7_1_eax_x86_features;
- entry->ebx = 0;
- entry->ecx = 0;
- entry->edx = 0;
- break;
- default:
- WARN_ON_ONCE(1);
- entry->eax = 0;
- entry->ebx = 0;
- entry->ecx = 0;
- entry->edx = 0;
- break;
- }
-}
-
-static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
- int *nent, int maxnent)
-{
- int r;
- unsigned f_nx = is_efer_nx() ? F(NX) : 0;
-#ifdef CONFIG_X86_64
- unsigned f_gbpages = (kvm_x86_ops->get_lpage_level() == PT_PDPE_LEVEL)
- ? F(GBPAGES) : 0;
- unsigned f_lm = F(LM);
-#else
- unsigned f_gbpages = 0;
- unsigned f_lm = 0;
-#endif
- unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
- unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0;
- unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0;
-
- /* cpuid 1.edx */
- const u32 kvm_cpuid_1_edx_x86_features =
- F(FPU) | F(VME) | F(DE) | F(PSE) |
- F(TSC) | F(MSR) | F(PAE) | F(MCE) |
- F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) |
- F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
- F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLUSH) |
- 0 /* Reserved, DS, ACPI */ | F(MMX) |
- F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) |
- 0 /* HTT, TM, Reserved, PBE */;
- /* cpuid 0x80000001.edx */
- const u32 kvm_cpuid_8000_0001_edx_x86_features =
- F(FPU) | F(VME) | F(DE) | F(PSE) |
- F(TSC) | F(MSR) | F(PAE) | F(MCE) |
- F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) |
- F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
- F(PAT) | F(PSE36) | 0 /* Reserved */ |
- f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
- F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp |
- 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
- /* cpuid 1.ecx */
- const u32 kvm_cpuid_1_ecx_x86_features =
- /* NOTE: MONITOR (and MWAIT) are emulated as NOP,
- * but *not* advertised to guests via CPUID ! */
- F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ |
- 0 /* DS-CPL, VMX, SMX, EST */ |
- 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
- F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ |
- F(PCID) | 0 /* Reserved, DCA */ | F(XMM4_1) |
- F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
- 0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) |
- F(F16C) | F(RDRAND);
- /* cpuid 0x80000001.ecx */
- const u32 kvm_cpuid_8000_0001_ecx_x86_features =
- F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ |
- F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
- F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
- 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM) |
- F(TOPOEXT) | F(PERFCTR_CORE);
-
- /* cpuid 0x80000008.ebx */
- const u32 kvm_cpuid_8000_0008_ebx_x86_features =
- F(CLZERO) | F(XSAVEERPTR) |
- F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |
- F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON);
-
- /* cpuid 0xC0000001.edx */
- const u32 kvm_cpuid_C000_0001_edx_x86_features =
- F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) |
- F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
- F(PMM) | F(PMM_EN);
-
- /* cpuid 0xD.1.eax */
- const u32 kvm_cpuid_D_1_eax_x86_features =
- F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves;
+ struct kvm_cpuid_entry2 *entry;
+ int r, i, max_idx;
/* all calls to cpuid_count() should be made on the same cpu */
get_cpu();
r = -E2BIG;
- if (WARN_ON(*nent >= maxnent))
+ entry = do_host_cpuid(array, function, 0);
+ if (!entry)
goto out;
- do_host_cpuid(entry, function, 0);
- ++*nent;
-
switch (function) {
case 0:
/* Limited to the highest leaf implemented in KVM. */
entry->eax = min(entry->eax, 0x1fU);
break;
case 1:
- entry->edx &= kvm_cpuid_1_edx_x86_features;
- cpuid_mask(&entry->edx, CPUID_1_EDX);
- entry->ecx &= kvm_cpuid_1_ecx_x86_features;
- cpuid_mask(&entry->ecx, CPUID_1_ECX);
- /* we support x2apic emulation even if host does not support
- * it since we emulate x2apic in software */
- entry->ecx |= F(X2APIC);
+ cpuid_entry_override(entry, CPUID_1_EDX);
+ cpuid_entry_override(entry, CPUID_1_ECX);
break;
- /* function 2 entries are STATEFUL. That is, repeated cpuid commands
- * may return different values. This forces us to get_cpu() before
- * issuing the first command, and also to emulate this annoying behavior
- * in kvm_emulate_cpuid() using KVM_CPUID_FLAG_STATE_READ_NEXT */
- case 2: {
- int t, times = entry->eax & 0xff;
-
- entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
- for (t = 1; t < times; ++t) {
- if (*nent >= maxnent)
- goto out;
-
- do_host_cpuid(&entry[t], function, 0);
- ++*nent;
- }
+ case 2:
+ /*
+ * On ancient CPUs, function 2 entries are STATEFUL. That is,
+ * CPUID(function=2, index=0) may return different results each
+ * time, with the least-significant byte in EAX enumerating the
+ * number of times software should do CPUID(2, 0).
+ *
+ * Modern CPUs, i.e. every CPU KVM has *ever* run on are less
+ * idiotic. Intel's SDM states that EAX & 0xff "will always
+ * return 01H. Software should ignore this value and not
+ * interpret it as an informational descriptor", while AMD's
+ * APM states that CPUID(2) is reserved.
+ *
+ * WARN if a frankenstein CPU that supports virtualization and
+ * a stateful CPUID.0x2 is encountered.
+ */
+ WARN_ON_ONCE((entry->eax & 0xff) > 1);
break;
- }
/* functions 4 and 0x8000001d have additional index. */
case 4:
- case 0x8000001d: {
- int i, cache_type;
-
- /* read more entries until cache_type is zero */
- for (i = 1; ; ++i) {
- if (*nent >= maxnent)
+ case 0x8000001d:
+ /*
+ * Read entries until the cache type in the previous entry is
+ * zero, i.e. indicates an invalid entry.
+ */
+ for (i = 1; entry->eax & 0x1f; ++i) {
+ entry = do_host_cpuid(array, function, i);
+ if (!entry)
goto out;
-
- cache_type = entry[i - 1].eax & 0x1f;
- if (!cache_type)
- break;
- do_host_cpuid(&entry[i], function, i);
- ++*nent;
}
break;
- }
case 6: /* Thermal management */
entry->eax = 0x4; /* allow ARAT */
entry->ebx = 0;
@@ -570,22 +562,24 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
entry->edx = 0;
break;
/* function 7 has additional index. */
- case 7: {
- int i;
-
- for (i = 0; ; ) {
- do_cpuid_7_mask(&entry[i], i);
- if (i == entry->eax)
- break;
- if (*nent >= maxnent)
+ case 7:
+ entry->eax = min(entry->eax, 1u);
+ cpuid_entry_override(entry, CPUID_7_0_EBX);
+ cpuid_entry_override(entry, CPUID_7_ECX);
+ cpuid_entry_override(entry, CPUID_7_EDX);
+
+ /* KVM only supports 0x7.0 and 0x7.1, capped above via min(). */
+ if (entry->eax == 1) {
+ entry = do_host_cpuid(array, function, 1);
+ if (!entry)
goto out;
- ++i;
- do_host_cpuid(&entry[i], function, i);
- ++*nent;
+ cpuid_entry_override(entry, CPUID_7_1_EAX);
+ entry->ebx = 0;
+ entry->ecx = 0;
+ entry->edx = 0;
}
break;
- }
case 9:
break;
case 0xa: { /* Architectural Performance Monitoring */
@@ -622,79 +616,81 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
* thus they can be handled by common code.
*/
case 0x1f:
- case 0xb: {
- int i;
-
+ case 0xb:
/*
- * We filled in entry[0] for CPUID(EAX=<function>,
- * ECX=00H) above. If its level type (ECX[15:8]) is
- * zero, then the leaf is unimplemented, and we're
- * done. Otherwise, continue to populate entries
- * until the level type (ECX[15:8]) of the previously
- * added entry is zero.
+ * Populate entries until the level type (ECX[15:8]) of the
+ * previous entry is zero. Note, CPUID EAX.{0x1f,0xb}.0 is
+ * the starting entry, filled by the primary do_host_cpuid().
*/
- for (i = 1; entry[i - 1].ecx & 0xff00; ++i) {
- if (*nent >= maxnent)
+ for (i = 1; entry->ecx & 0xff00; ++i) {
+ entry = do_host_cpuid(array, function, i);
+ if (!entry)
goto out;
-
- do_host_cpuid(&entry[i], function, i);
- ++*nent;
}
break;
- }
- case 0xd: {
- int idx, i;
- u64 supported = kvm_supported_xcr0();
-
- entry->eax &= supported;
- entry->ebx = xstate_required_size(supported, false);
+ case 0xd:
+ entry->eax &= supported_xcr0;
+ entry->ebx = xstate_required_size(supported_xcr0, false);
entry->ecx = entry->ebx;
- entry->edx &= supported >> 32;
- if (!supported)
+ entry->edx &= supported_xcr0 >> 32;
+ if (!supported_xcr0)
break;
- for (idx = 1, i = 1; idx < 64; ++idx) {
- u64 mask = ((u64)1 << idx);
- if (*nent >= maxnent)
+ entry = do_host_cpuid(array, function, 1);
+ if (!entry)
+ goto out;
+
+ cpuid_entry_override(entry, CPUID_D_1_EAX);
+ if (entry->eax & (F(XSAVES)|F(XSAVEC)))
+ entry->ebx = xstate_required_size(supported_xcr0 | supported_xss,
+ true);
+ else {
+ WARN_ON_ONCE(supported_xss != 0);
+ entry->ebx = 0;
+ }
+ entry->ecx &= supported_xss;
+ entry->edx &= supported_xss >> 32;
+
+ for (i = 2; i < 64; ++i) {
+ bool s_state;
+ if (supported_xcr0 & BIT_ULL(i))
+ s_state = false;
+ else if (supported_xss & BIT_ULL(i))
+ s_state = true;
+ else
+ continue;
+
+ entry = do_host_cpuid(array, function, i);
+ if (!entry)
goto out;
- do_host_cpuid(&entry[i], function, idx);
- if (idx == 1) {
- entry[i].eax &= kvm_cpuid_D_1_eax_x86_features;
- cpuid_mask(&entry[i].eax, CPUID_D_1_EAX);
- entry[i].ebx = 0;
- if (entry[i].eax & (F(XSAVES)|F(XSAVEC)))
- entry[i].ebx =
- xstate_required_size(supported,
- true);
- } else {
- if (entry[i].eax == 0 || !(supported & mask))
- continue;
- if (WARN_ON_ONCE(entry[i].ecx & 1))
- continue;
+ /*
+ * The supported check above should have filtered out
+ * invalid sub-leafs. Only valid sub-leafs should
+ * reach this point, and they should have a non-zero
+ * save state size. Furthermore, check whether the
+ * processor agrees with supported_xcr0/supported_xss
+ * on whether this is an XCR0- or IA32_XSS-managed area.
+ */
+ if (WARN_ON_ONCE(!entry->eax || (entry->ecx & 0x1) != s_state)) {
+ --array->nent;
+ continue;
}
- entry[i].ecx = 0;
- entry[i].edx = 0;
- ++*nent;
- ++i;
+ entry->edx = 0;
}
break;
- }
/* Intel PT */
- case 0x14: {
- int t, times = entry->eax;
-
- if (!f_intel_pt)
+ case 0x14:
+ if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT)) {
+ entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
break;
+ }
- for (t = 1; t <= times; ++t) {
- if (*nent >= maxnent)
+ for (i = 1, max_idx = entry->eax; i <= max_idx; ++i) {
+ if (!do_host_cpuid(array, function, i))
goto out;
- do_host_cpuid(&entry[t], function, t);
- ++*nent;
}
break;
- }
case KVM_CPUID_SIGNATURE: {
static const char signature[12] = "KVMKVMKVM\0\0";
const u32 *sigptr = (const u32 *)signature;
@@ -729,10 +725,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
entry->eax = min(entry->eax, 0x8000001f);
break;
case 0x80000001:
- entry->edx &= kvm_cpuid_8000_0001_edx_x86_features;
- cpuid_mask(&entry->edx, CPUID_8000_0001_EDX);
- entry->ecx &= kvm_cpuid_8000_0001_ecx_x86_features;
- cpuid_mask(&entry->ecx, CPUID_8000_0001_ECX);
+ cpuid_entry_override(entry, CPUID_8000_0001_EDX);
+ cpuid_entry_override(entry, CPUID_8000_0001_ECX);
break;
case 0x80000007: /* Advanced power management */
/* invariant TSC is CPUID.80000007H:EDX[8] */
@@ -750,33 +744,20 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
g_phys_as = phys_as;
entry->eax = g_phys_as | (virt_as << 8);
entry->edx = 0;
- entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features;
- cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX);
- /*
- * AMD has separate bits for each SPEC_CTRL bit.
- * arch/x86/kernel/cpu/bugs.c is kind enough to
- * record that in cpufeatures so use them.
- */
- if (boot_cpu_has(X86_FEATURE_IBPB))
- entry->ebx |= F(AMD_IBPB);
- if (boot_cpu_has(X86_FEATURE_IBRS))
- entry->ebx |= F(AMD_IBRS);
- if (boot_cpu_has(X86_FEATURE_STIBP))
- entry->ebx |= F(AMD_STIBP);
- if (boot_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
- boot_cpu_has(X86_FEATURE_AMD_SSBD))
- entry->ebx |= F(AMD_SSBD);
- if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
- entry->ebx |= F(AMD_SSB_NO);
- /*
- * The preference is to use SPEC CTRL MSR instead of the
- * VIRT_SPEC MSR.
- */
- if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) &&
- !boot_cpu_has(X86_FEATURE_AMD_SSBD))
- entry->ebx |= F(VIRT_SSBD);
+ cpuid_entry_override(entry, CPUID_8000_0008_EBX);
break;
}
+ case 0x8000000A:
+ if (!kvm_cpu_cap_has(X86_FEATURE_SVM)) {
+ entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
+ break;
+ }
+ entry->eax = 1; /* SVM revision 1 */
+ entry->ebx = 8; /* Lets support 8 ASIDs in case we add proper
+ ASID emulation to nested SVM */
+ entry->ecx = 0; /* Reserved */
+ cpuid_entry_override(entry, CPUID_8000_000A_EDX);
+ break;
case 0x80000019:
entry->ecx = entry->edx = 0;
break;
@@ -794,8 +775,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
entry->eax = min(entry->eax, 0xC0000004);
break;
case 0xC0000001:
- entry->edx &= kvm_cpuid_C000_0001_edx_x86_features;
- cpuid_mask(&entry->edx, CPUID_C000_0001_EDX);
+ cpuid_entry_override(entry, CPUID_C000_0001_EDX);
break;
case 3: /* Processor serial number */
case 5: /* MONITOR/MWAIT */
@@ -807,8 +787,6 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
break;
}
- kvm_x86_ops->set_supported_cpuid(function, entry);
-
r = 0;
out:
@@ -817,26 +795,39 @@ out:
return r;
}
-static int do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 func,
- int *nent, int maxnent, unsigned int type)
+static int do_cpuid_func(struct kvm_cpuid_array *array, u32 func,
+ unsigned int type)
{
- if (*nent >= maxnent)
- return -E2BIG;
-
if (type == KVM_GET_EMULATED_CPUID)
- return __do_cpuid_func_emulated(entry, func, nent, maxnent);
+ return __do_cpuid_func_emulated(array, func);
- return __do_cpuid_func(entry, func, nent, maxnent);
+ return __do_cpuid_func(array, func);
}
-struct kvm_cpuid_param {
- u32 func;
- bool (*qualifier)(const struct kvm_cpuid_param *param);
-};
+#define CENTAUR_CPUID_SIGNATURE 0xC0000000
-static bool is_centaur_cpu(const struct kvm_cpuid_param *param)
+static int get_cpuid_func(struct kvm_cpuid_array *array, u32 func,
+ unsigned int type)
{
- return boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR;
+ u32 limit;
+ int r;
+
+ if (func == CENTAUR_CPUID_SIGNATURE &&
+ boot_cpu_data.x86_vendor != X86_VENDOR_CENTAUR)
+ return 0;
+
+ r = do_cpuid_func(array, func, type);
+ if (r)
+ return r;
+
+ limit = array->entries[array->nent - 1].eax;
+ for (func = func + 1; func <= limit; ++func) {
+ r = do_cpuid_func(array, func, type);
+ if (r)
+ break;
+ }
+
+ return r;
}
static bool sanity_check_entries(struct kvm_cpuid_entry2 __user *entries,
@@ -870,157 +861,145 @@ int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
struct kvm_cpuid_entry2 __user *entries,
unsigned int type)
{
- struct kvm_cpuid_entry2 *cpuid_entries;
- int limit, nent = 0, r = -E2BIG, i;
- u32 func;
- static const struct kvm_cpuid_param param[] = {
- { .func = 0 },
- { .func = 0x80000000 },
- { .func = 0xC0000000, .qualifier = is_centaur_cpu },
- { .func = KVM_CPUID_SIGNATURE },
+ static const u32 funcs[] = {
+ 0, 0x80000000, CENTAUR_CPUID_SIGNATURE, KVM_CPUID_SIGNATURE,
+ };
+
+ struct kvm_cpuid_array array = {
+ .nent = 0,
+ .maxnent = cpuid->nent,
};
+ int r, i;
if (cpuid->nent < 1)
- goto out;
+ return -E2BIG;
if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
cpuid->nent = KVM_MAX_CPUID_ENTRIES;
if (sanity_check_entries(entries, cpuid->nent, type))
return -EINVAL;
- r = -ENOMEM;
- cpuid_entries = vzalloc(array_size(sizeof(struct kvm_cpuid_entry2),
+ array.entries = vzalloc(array_size(sizeof(struct kvm_cpuid_entry2),
cpuid->nent));
- if (!cpuid_entries)
- goto out;
-
- r = 0;
- for (i = 0; i < ARRAY_SIZE(param); i++) {
- const struct kvm_cpuid_param *ent = &param[i];
-
- if (ent->qualifier && !ent->qualifier(ent))
- continue;
-
- r = do_cpuid_func(&cpuid_entries[nent], ent->func,
- &nent, cpuid->nent, type);
-
- if (r)
- goto out_free;
-
- limit = cpuid_entries[nent - 1].eax;
- for (func = ent->func + 1; func <= limit && nent < cpuid->nent && r == 0; ++func)
- r = do_cpuid_func(&cpuid_entries[nent], func,
- &nent, cpuid->nent, type);
+ if (!array.entries)
+ return -ENOMEM;
+ for (i = 0; i < ARRAY_SIZE(funcs); i++) {
+ r = get_cpuid_func(&array, funcs[i], type);
if (r)
goto out_free;
}
+ cpuid->nent = array.nent;
- r = -EFAULT;
- if (copy_to_user(entries, cpuid_entries,
- nent * sizeof(struct kvm_cpuid_entry2)))
- goto out_free;
- cpuid->nent = nent;
- r = 0;
+ if (copy_to_user(entries, array.entries,
+ array.nent * sizeof(struct kvm_cpuid_entry2)))
+ r = -EFAULT;
out_free:
- vfree(cpuid_entries);
-out:
+ vfree(array.entries);
return r;
}
-static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
-{
- struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i];
- struct kvm_cpuid_entry2 *ej;
- int j = i;
- int nent = vcpu->arch.cpuid_nent;
-
- e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT;
- /* when no next entry is found, the current entry[i] is reselected */
- do {
- j = (j + 1) % nent;
- ej = &vcpu->arch.cpuid_entries[j];
- } while (ej->function != e->function);
-
- ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
-
- return j;
-}
-
-/* find an entry with matching function, matching index (if needed), and that
- * should be read next (if it's stateful) */
-static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e,
- u32 function, u32 index)
-{
- if (e->function != function)
- return 0;
- if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index)
- return 0;
- if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) &&
- !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT))
- return 0;
- return 1;
-}
-
struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
u32 function, u32 index)
{
+ struct kvm_cpuid_entry2 *e;
int i;
- struct kvm_cpuid_entry2 *best = NULL;
for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
- struct kvm_cpuid_entry2 *e;
-
e = &vcpu->arch.cpuid_entries[i];
- if (is_matching_cpuid_entry(e, function, index)) {
- if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC)
- move_to_next_stateful_cpuid_entry(vcpu, i);
- best = e;
- break;
- }
+
+ if (e->function == function && (e->index == index ||
+ !(e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX)))
+ return e;
}
- return best;
+ return NULL;
}
EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry);
/*
- * If the basic or extended CPUID leaf requested is higher than the
- * maximum supported basic or extended leaf, respectively, then it is
- * out of range.
+ * Intel CPUID semantics treats any query for an out-of-range leaf as if the
+ * highest basic leaf (i.e. CPUID.0H:EAX) were requested. AMD CPUID semantics
+ * returns all zeroes for any undefined leaf, whether or not the leaf is in
+ * range. Centaur/VIA follows Intel semantics.
+ *
+ * A leaf is considered out-of-range if its function is higher than the maximum
+ * supported leaf of its associated class or if its associated class does not
+ * exist.
+ *
+ * There are three primary classes to be considered, with their respective
+ * ranges described as "<base> - <top>[,<base2> - <top2>] inclusive. A primary
+ * class exists if a guest CPUID entry for its <base> leaf exists. For a given
+ * class, CPUID.<base>.EAX contains the max supported leaf for the class.
+ *
+ * - Basic: 0x00000000 - 0x3fffffff, 0x50000000 - 0x7fffffff
+ * - Hypervisor: 0x40000000 - 0x4fffffff
+ * - Extended: 0x80000000 - 0xbfffffff
+ * - Centaur: 0xc0000000 - 0xcfffffff
+ *
+ * The Hypervisor class is further subdivided into sub-classes that each act as
+ * their own indepdent class associated with a 0x100 byte range. E.g. if Qemu
+ * is advertising support for both HyperV and KVM, the resulting Hypervisor
+ * CPUID sub-classes are:
+ *
+ * - HyperV: 0x40000000 - 0x400000ff
+ * - KVM: 0x40000100 - 0x400001ff
*/
-static bool cpuid_function_in_range(struct kvm_vcpu *vcpu, u32 function)
+static struct kvm_cpuid_entry2 *
+get_out_of_range_cpuid_entry(struct kvm_vcpu *vcpu, u32 *fn_ptr, u32 index)
{
- struct kvm_cpuid_entry2 *max;
+ struct kvm_cpuid_entry2 *basic, *class;
+ u32 function = *fn_ptr;
+
+ basic = kvm_find_cpuid_entry(vcpu, 0, 0);
+ if (!basic)
+ return NULL;
+
+ if (is_guest_vendor_amd(basic->ebx, basic->ecx, basic->edx) ||
+ is_guest_vendor_hygon(basic->ebx, basic->ecx, basic->edx))
+ return NULL;
- max = kvm_find_cpuid_entry(vcpu, function & 0x80000000, 0);
- return max && function <= max->eax;
+ if (function >= 0x40000000 && function <= 0x4fffffff)
+ class = kvm_find_cpuid_entry(vcpu, function & 0xffffff00, 0);
+ else if (function >= 0xc0000000)
+ class = kvm_find_cpuid_entry(vcpu, 0xc0000000, 0);
+ else
+ class = kvm_find_cpuid_entry(vcpu, function & 0x80000000, 0);
+
+ if (class && function <= class->eax)
+ return NULL;
+
+ /*
+ * Leaf specific adjustments are also applied when redirecting to the
+ * max basic entry, e.g. if the max basic leaf is 0xb but there is no
+ * entry for CPUID.0xb.index (see below), then the output value for EDX
+ * needs to be pulled from CPUID.0xb.1.
+ */
+ *fn_ptr = basic->eax;
+
+ /*
+ * The class does not exist or the requested function is out of range;
+ * the effective CPUID entry is the max basic leaf. Note, the index of
+ * the original requested leaf is observed!
+ */
+ return kvm_find_cpuid_entry(vcpu, basic->eax, index);
}
bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
- u32 *ecx, u32 *edx, bool check_limit)
+ u32 *ecx, u32 *edx, bool exact_only)
{
- u32 function = *eax, index = *ecx;
+ u32 orig_function = *eax, function = *eax, index = *ecx;
struct kvm_cpuid_entry2 *entry;
- struct kvm_cpuid_entry2 *max;
- bool found;
+ bool exact, used_max_basic = false;
entry = kvm_find_cpuid_entry(vcpu, function, index);
- found = entry;
- /*
- * Intel CPUID semantics treats any query for an out-of-range
- * leaf as if the highest basic leaf (i.e. CPUID.0H:EAX) were
- * requested. AMD CPUID semantics returns all zeroes for any
- * undefined leaf, whether or not the leaf is in range.
- */
- if (!entry && check_limit && !guest_cpuid_is_amd(vcpu) &&
- !cpuid_function_in_range(vcpu, function)) {
- max = kvm_find_cpuid_entry(vcpu, 0, 0);
- if (max) {
- function = max->eax;
- entry = kvm_find_cpuid_entry(vcpu, function, index);
- }
+ exact = !!entry;
+
+ if (!entry && !exact_only) {
+ entry = get_out_of_range_cpuid_entry(vcpu, &function, index);
+ used_max_basic = !!entry;
}
+
if (entry) {
*eax = entry->eax;
*ebx = entry->ebx;
@@ -1049,8 +1028,9 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
}
}
}
- trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx, found);
- return found;
+ trace_kvm_cpuid(orig_function, index, *eax, *ebx, *ecx, *edx, exact,
+ used_max_basic);
+ return exact;
}
EXPORT_SYMBOL_GPL(kvm_cpuid);
@@ -1063,7 +1043,7 @@ int kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
eax = kvm_rax_read(vcpu);
ecx = kvm_rcx_read(vcpu);
- kvm_cpuid(vcpu, &eax, &ebx, &ecx, &edx, true);
+ kvm_cpuid(vcpu, &eax, &ebx, &ecx, &edx, false);
kvm_rax_write(vcpu, eax);
kvm_rbx_write(vcpu, ebx);
kvm_rcx_write(vcpu, ecx);
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 7366c618aa04..63a70f6a3df3 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -6,8 +6,10 @@
#include <asm/cpu.h>
#include <asm/processor.h>
+extern u32 kvm_cpu_caps[NCAPINTS] __read_mostly;
+void kvm_set_cpu_caps(void);
+
int kvm_update_cpuid(struct kvm_vcpu *vcpu);
-bool kvm_mpx_supported(void);
struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
u32 function, u32 index);
int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
@@ -23,7 +25,7 @@ int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
struct kvm_cpuid2 *cpuid,
struct kvm_cpuid_entry2 __user *entries);
bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
- u32 *ecx, u32 *edx, bool check_limit);
+ u32 *ecx, u32 *edx, bool exact_only);
int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu);
@@ -64,7 +66,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
* and can't be used by KVM to query/control guest capabilities. And obviously
* the leaf being queried must have an entry in the lookup table.
*/
-static __always_inline void reverse_cpuid_check(unsigned x86_leaf)
+static __always_inline void reverse_cpuid_check(unsigned int x86_leaf)
{
BUILD_BUG_ON(x86_leaf == CPUID_LNX_1);
BUILD_BUG_ON(x86_leaf == CPUID_LNX_2);
@@ -88,24 +90,18 @@ static __always_inline u32 __feature_bit(int x86_feature)
#define feature_bit(name) __feature_bit(X86_FEATURE_##name)
-static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned x86_feature)
+static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned int x86_feature)
{
- unsigned x86_leaf = x86_feature / 32;
+ unsigned int x86_leaf = x86_feature / 32;
reverse_cpuid_check(x86_leaf);
return reverse_cpuid[x86_leaf];
}
-static __always_inline int *guest_cpuid_get_register(struct kvm_vcpu *vcpu, unsigned x86_feature)
+static __always_inline u32 *__cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry,
+ u32 reg)
{
- struct kvm_cpuid_entry2 *entry;
- const struct cpuid_reg cpuid = x86_feature_cpuid(x86_feature);
-
- entry = kvm_find_cpuid_entry(vcpu, cpuid.function, cpuid.index);
- if (!entry)
- return NULL;
-
- switch (cpuid.reg) {
+ switch (reg) {
case CPUID_EAX:
return &entry->eax;
case CPUID_EBX:
@@ -120,9 +116,86 @@ static __always_inline int *guest_cpuid_get_register(struct kvm_vcpu *vcpu, unsi
}
}
-static __always_inline bool guest_cpuid_has(struct kvm_vcpu *vcpu, unsigned x86_feature)
+static __always_inline u32 *cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry,
+ unsigned int x86_feature)
+{
+ const struct cpuid_reg cpuid = x86_feature_cpuid(x86_feature);
+
+ return __cpuid_entry_get_reg(entry, cpuid.reg);
+}
+
+static __always_inline u32 cpuid_entry_get(struct kvm_cpuid_entry2 *entry,
+ unsigned int x86_feature)
+{
+ u32 *reg = cpuid_entry_get_reg(entry, x86_feature);
+
+ return *reg & __feature_bit(x86_feature);
+}
+
+static __always_inline bool cpuid_entry_has(struct kvm_cpuid_entry2 *entry,
+ unsigned int x86_feature)
+{
+ return cpuid_entry_get(entry, x86_feature);
+}
+
+static __always_inline void cpuid_entry_clear(struct kvm_cpuid_entry2 *entry,
+ unsigned int x86_feature)
+{
+ u32 *reg = cpuid_entry_get_reg(entry, x86_feature);
+
+ *reg &= ~__feature_bit(x86_feature);
+}
+
+static __always_inline void cpuid_entry_set(struct kvm_cpuid_entry2 *entry,
+ unsigned int x86_feature)
+{
+ u32 *reg = cpuid_entry_get_reg(entry, x86_feature);
+
+ *reg |= __feature_bit(x86_feature);
+}
+
+static __always_inline void cpuid_entry_change(struct kvm_cpuid_entry2 *entry,
+ unsigned int x86_feature,
+ bool set)
+{
+ u32 *reg = cpuid_entry_get_reg(entry, x86_feature);
+
+ /*
+ * Open coded instead of using cpuid_entry_{clear,set}() to coerce the
+ * compiler into using CMOV instead of Jcc when possible.
+ */
+ if (set)
+ *reg |= __feature_bit(x86_feature);
+ else
+ *reg &= ~__feature_bit(x86_feature);
+}
+
+static __always_inline void cpuid_entry_override(struct kvm_cpuid_entry2 *entry,
+ enum cpuid_leafs leaf)
+{
+ u32 *reg = cpuid_entry_get_reg(entry, leaf * 32);
+
+ BUILD_BUG_ON(leaf >= ARRAY_SIZE(kvm_cpu_caps));
+ *reg = kvm_cpu_caps[leaf];
+}
+
+static __always_inline u32 *guest_cpuid_get_register(struct kvm_vcpu *vcpu,
+ unsigned int x86_feature)
{
- int *reg;
+ const struct cpuid_reg cpuid = x86_feature_cpuid(x86_feature);
+ struct kvm_cpuid_entry2 *entry;
+
+ entry = kvm_find_cpuid_entry(vcpu, cpuid.function, cpuid.index);
+ if (!entry)
+ return NULL;
+
+ return __cpuid_entry_get_reg(entry, cpuid.reg);
+}
+
+static __always_inline bool guest_cpuid_has(struct kvm_vcpu *vcpu,
+ unsigned int x86_feature)
+{
+ u32 *reg;
reg = guest_cpuid_get_register(vcpu, x86_feature);
if (!reg)
@@ -131,21 +204,24 @@ static __always_inline bool guest_cpuid_has(struct kvm_vcpu *vcpu, unsigned x86_
return *reg & __feature_bit(x86_feature);
}
-static __always_inline void guest_cpuid_clear(struct kvm_vcpu *vcpu, unsigned x86_feature)
+static __always_inline void guest_cpuid_clear(struct kvm_vcpu *vcpu,
+ unsigned int x86_feature)
{
- int *reg;
+ u32 *reg;
reg = guest_cpuid_get_register(vcpu, x86_feature);
if (reg)
*reg &= ~__feature_bit(x86_feature);
}
-static inline bool guest_cpuid_is_amd(struct kvm_vcpu *vcpu)
+static inline bool guest_cpuid_is_amd_or_hygon(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
best = kvm_find_cpuid_entry(vcpu, 0, 0);
- return best && best->ebx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx;
+ return best &&
+ (is_guest_vendor_amd(best->ebx, best->ecx, best->edx) ||
+ is_guest_vendor_hygon(best->ebx, best->ecx, best->edx));
}
static inline int guest_cpuid_family(struct kvm_vcpu *vcpu)
@@ -192,4 +268,39 @@ static inline bool cpuid_fault_enabled(struct kvm_vcpu *vcpu)
MSR_MISC_FEATURES_ENABLES_CPUID_FAULT;
}
+static __always_inline void kvm_cpu_cap_clear(unsigned int x86_feature)
+{
+ unsigned int x86_leaf = x86_feature / 32;
+
+ reverse_cpuid_check(x86_leaf);
+ kvm_cpu_caps[x86_leaf] &= ~__feature_bit(x86_feature);
+}
+
+static __always_inline void kvm_cpu_cap_set(unsigned int x86_feature)
+{
+ unsigned int x86_leaf = x86_feature / 32;
+
+ reverse_cpuid_check(x86_leaf);
+ kvm_cpu_caps[x86_leaf] |= __feature_bit(x86_feature);
+}
+
+static __always_inline u32 kvm_cpu_cap_get(unsigned int x86_feature)
+{
+ unsigned int x86_leaf = x86_feature / 32;
+
+ reverse_cpuid_check(x86_leaf);
+ return kvm_cpu_caps[x86_leaf] & __feature_bit(x86_feature);
+}
+
+static __always_inline bool kvm_cpu_cap_has(unsigned int x86_feature)
+{
+ return !!kvm_cpu_cap_get(x86_feature);
+}
+
+static __always_inline void kvm_cpu_cap_check_and_set(unsigned int x86_feature)
+{
+ if (boot_cpu_has(x86_feature))
+ kvm_cpu_cap_set(x86_feature);
+}
+
#endif
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index bc00642e5d3b..bddaba9c68dd 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -20,7 +20,7 @@
#include <linux/kvm_host.h>
#include "kvm_cache_regs.h"
-#include <asm/kvm_emulate.h>
+#include "kvm_emulate.h"
#include <linux/stringify.h>
#include <asm/fpu/api.h>
#include <asm/debugreg.h>
@@ -665,6 +665,17 @@ static void set_segment_selector(struct x86_emulate_ctxt *ctxt, u16 selector,
ctxt->ops->set_segment(ctxt, selector, &desc, base3, seg);
}
+static inline u8 ctxt_virt_addr_bits(struct x86_emulate_ctxt *ctxt)
+{
+ return (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_LA57) ? 57 : 48;
+}
+
+static inline bool emul_is_noncanonical_address(u64 la,
+ struct x86_emulate_ctxt *ctxt)
+{
+ return get_canonical(la, ctxt_virt_addr_bits(ctxt)) != la;
+}
+
/*
* x86 defines three classes of vector instructions: explicitly
* aligned, explicitly unaligned, and the rest, which change behaviour
@@ -2711,10 +2722,8 @@ static bool vendor_intel(struct x86_emulate_ctxt *ctxt)
u32 eax, ebx, ecx, edx;
eax = ecx = 0;
- ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false);
- return ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx
- && ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx
- && edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx;
+ ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true);
+ return is_guest_vendor_intel(ebx, ecx, edx);
}
static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt)
@@ -2731,36 +2740,18 @@ static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt)
eax = 0x00000000;
ecx = 0x00000000;
- ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false);
+ ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true);
/*
- * Intel ("GenuineIntel")
- * remark: Intel CPUs only support "syscall" in 64bit
- * longmode. Also an 64bit guest with a
- * 32bit compat-app running will #UD !! While this
- * behaviour can be fixed (by emulating) into AMD
- * response - CPUs of AMD can't behave like Intel.
+ * remark: Intel CPUs only support "syscall" in 64bit longmode. Also a
+ * 64bit guest with a 32bit compat-app running will #UD !! While this
+ * behaviour can be fixed (by emulating) into AMD response - CPUs of
+ * AMD can't behave like Intel.
*/
- if (ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx &&
- ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx &&
- edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx)
+ if (is_guest_vendor_intel(ebx, ecx, edx))
return false;
- /* AMD ("AuthenticAMD") */
- if (ebx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx &&
- ecx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx &&
- edx == X86EMUL_CPUID_VENDOR_AuthenticAMD_edx)
- return true;
-
- /* AMD ("AMDisbetter!") */
- if (ebx == X86EMUL_CPUID_VENDOR_AMDisbetterI_ebx &&
- ecx == X86EMUL_CPUID_VENDOR_AMDisbetterI_ecx &&
- edx == X86EMUL_CPUID_VENDOR_AMDisbetterI_edx)
- return true;
-
- /* Hygon ("HygonGenuine") */
- if (ebx == X86EMUL_CPUID_VENDOR_HygonGenuine_ebx &&
- ecx == X86EMUL_CPUID_VENDOR_HygonGenuine_ecx &&
- edx == X86EMUL_CPUID_VENDOR_HygonGenuine_edx)
+ if (is_guest_vendor_amd(ebx, ecx, edx) ||
+ is_guest_vendor_hygon(ebx, ecx, edx))
return true;
/*
@@ -3980,7 +3971,7 @@ static int em_cpuid(struct x86_emulate_ctxt *ctxt)
eax = reg_read(ctxt, VCPU_REGS_RAX);
ecx = reg_read(ctxt, VCPU_REGS_RCX);
- ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true);
+ ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false);
*reg_write(ctxt, VCPU_REGS_RAX) = eax;
*reg_write(ctxt, VCPU_REGS_RBX) = ebx;
*reg_write(ctxt, VCPU_REGS_RCX) = ecx;
@@ -4250,7 +4241,7 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt)
eax = 0x80000008;
ecx = 0;
if (ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx,
- &edx, false))
+ &edx, true))
maxphyaddr = eax & 0xff;
else
maxphyaddr = 36;
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index a86fda7a1d03..bcefa9d4e57e 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1022,7 +1022,7 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
addr = gfn_to_hva(kvm, gfn);
if (kvm_is_error_hva(addr))
return 1;
- kvm_x86_ops->patch_hypercall(vcpu, instructions);
+ kvm_x86_ops.patch_hypercall(vcpu, instructions);
((unsigned char *)instructions)[3] = 0xc3; /* ret */
if (__copy_to_user((void __user *)addr, instructions, 4))
return 1;
@@ -1607,7 +1607,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
* hypercall generates UD from non zero cpl and real mode
* per HYPER-V spec
*/
- if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) {
+ if (kvm_x86_ops.get_cpl(vcpu) != 0 || !is_protmode(vcpu)) {
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
@@ -1800,8 +1800,8 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
};
int i, nent = ARRAY_SIZE(cpuid_entries);
- if (kvm_x86_ops->nested_get_evmcs_version)
- evmcs_ver = kvm_x86_ops->nested_get_evmcs_version(vcpu);
+ if (kvm_x86_ops.nested_get_evmcs_version)
+ evmcs_ver = kvm_x86_ops.nested_get_evmcs_version(vcpu);
/* Skip NESTED_FEATURES if eVMCS is not supported */
if (!evmcs_ver)
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index b24c606ac04b..febca334c320 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -367,7 +367,7 @@ static void pit_load_count(struct kvm_pit *pit, int channel, u32 val)
{
struct kvm_kpit_state *ps = &pit->pit_state;
- pr_debug("load_count val is %d, channel is %d\n", val, channel);
+ pr_debug("load_count val is %u, channel is %d\n", val, channel);
/*
* The largest possible initial count is 0; this is equivalent
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index 58767020de41..62558b9bdda7 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -68,7 +68,7 @@ static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, int reg)
return 0;
if (!kvm_register_is_available(vcpu, reg))
- kvm_x86_ops->cache_reg(vcpu, reg);
+ kvm_x86_ops.cache_reg(vcpu, reg);
return vcpu->arch.regs[reg];
}
@@ -108,7 +108,7 @@ static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index)
might_sleep(); /* on svm */
if (!kvm_register_is_available(vcpu, VCPU_EXREG_PDPTR))
- kvm_x86_ops->cache_reg(vcpu, VCPU_EXREG_PDPTR);
+ kvm_x86_ops.cache_reg(vcpu, VCPU_EXREG_PDPTR);
return vcpu->arch.walk_mmu->pdptrs[index];
}
@@ -117,7 +117,7 @@ static inline ulong kvm_read_cr0_bits(struct kvm_vcpu *vcpu, ulong mask)
{
ulong tmask = mask & KVM_POSSIBLE_CR0_GUEST_BITS;
if (tmask & vcpu->arch.cr0_guest_owned_bits)
- kvm_x86_ops->decache_cr0_guest_bits(vcpu);
+ kvm_x86_ops.decache_cr0_guest_bits(vcpu);
return vcpu->arch.cr0 & mask;
}
@@ -130,14 +130,14 @@ static inline ulong kvm_read_cr4_bits(struct kvm_vcpu *vcpu, ulong mask)
{
ulong tmask = mask & KVM_POSSIBLE_CR4_GUEST_BITS;
if (tmask & vcpu->arch.cr4_guest_owned_bits)
- kvm_x86_ops->decache_cr4_guest_bits(vcpu);
+ kvm_x86_ops.decache_cr4_guest_bits(vcpu);
return vcpu->arch.cr4 & mask;
}
static inline ulong kvm_read_cr3(struct kvm_vcpu *vcpu)
{
if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3))
- kvm_x86_ops->cache_reg(vcpu, VCPU_EXREG_CR3);
+ kvm_x86_ops.cache_reg(vcpu, VCPU_EXREG_CR3);
return vcpu->arch.cr3;
}
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index c06e8353efd3..43c93ffa76ed 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -221,7 +221,7 @@ struct x86_emulate_ops {
enum x86_intercept_stage stage);
bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt, u32 *eax, u32 *ebx,
- u32 *ecx, u32 *edx, bool check_limit);
+ u32 *ecx, u32 *edx, bool exact_only);
bool (*guest_has_long_mode)(struct x86_emulate_ctxt *ctxt);
bool (*guest_has_movbe)(struct x86_emulate_ctxt *ctxt);
bool (*guest_has_fxsr)(struct x86_emulate_ctxt *ctxt);
@@ -301,6 +301,7 @@ struct fastop;
typedef void (*fastop_t)(struct fastop *);
struct x86_emulate_ctxt {
+ void *vcpu;
const struct x86_emulate_ops *ops;
/* Register state before/after emulation. */
@@ -319,6 +320,10 @@ struct x86_emulate_ctxt {
bool have_exception;
struct x86_exception exception;
+ /* GPA available */
+ bool gpa_available;
+ gpa_t gpa_val;
+
/*
* decode cache
*/
@@ -329,9 +334,6 @@ struct x86_emulate_ctxt {
u8 intercept;
u8 op_bytes;
u8 ad_bytes;
- struct operand src;
- struct operand src2;
- struct operand dst;
union {
int (*execute)(struct x86_emulate_ctxt *ctxt);
fastop_t fop;
@@ -359,6 +361,11 @@ struct x86_emulate_ctxt {
u8 seg_override;
u64 d;
unsigned long _eip;
+
+ /* Here begins the usercopy section. */
+ struct operand src;
+ struct operand src2;
+ struct operand dst;
struct operand memop;
unsigned long _regs[NR_VCPU_REGS];
struct operand *memopp;
@@ -388,6 +395,34 @@ struct x86_emulate_ctxt {
#define X86EMUL_CPUID_VENDOR_GenuineIntel_ecx 0x6c65746e
#define X86EMUL_CPUID_VENDOR_GenuineIntel_edx 0x49656e69
+#define X86EMUL_CPUID_VENDOR_CentaurHauls_ebx 0x746e6543
+#define X86EMUL_CPUID_VENDOR_CentaurHauls_ecx 0x736c7561
+#define X86EMUL_CPUID_VENDOR_CentaurHauls_edx 0x48727561
+
+static inline bool is_guest_vendor_intel(u32 ebx, u32 ecx, u32 edx)
+{
+ return ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx &&
+ ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx &&
+ edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx;
+}
+
+static inline bool is_guest_vendor_amd(u32 ebx, u32 ecx, u32 edx)
+{
+ return (ebx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx &&
+ ecx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx &&
+ edx == X86EMUL_CPUID_VENDOR_AuthenticAMD_edx) ||
+ (ebx == X86EMUL_CPUID_VENDOR_AMDisbetterI_ebx &&
+ ecx == X86EMUL_CPUID_VENDOR_AMDisbetterI_ecx &&
+ edx == X86EMUL_CPUID_VENDOR_AMDisbetterI_edx);
+}
+
+static inline bool is_guest_vendor_hygon(u32 ebx, u32 ecx, u32 edx)
+{
+ return ebx == X86EMUL_CPUID_VENDOR_HygonGenuine_ebx &&
+ ecx == X86EMUL_CPUID_VENDOR_HygonGenuine_ecx &&
+ edx == X86EMUL_CPUID_VENDOR_HygonGenuine_edx;
+}
+
enum x86_intercept_stage {
X86_ICTP_NONE = 0, /* Allow zero-init to not match anything */
X86_ICPT_PRE_EXCEPT,
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 7356a56e6282..ca80daf8f878 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -164,14 +164,28 @@ static void kvm_apic_map_free(struct rcu_head *rcu)
kvfree(map);
}
-static void recalculate_apic_map(struct kvm *kvm)
+void kvm_recalculate_apic_map(struct kvm *kvm)
{
struct kvm_apic_map *new, *old = NULL;
struct kvm_vcpu *vcpu;
int i;
u32 max_id = 255; /* enough space for any xAPIC ID */
+ if (!kvm->arch.apic_map_dirty) {
+ /*
+ * Read kvm->arch.apic_map_dirty before
+ * kvm->arch.apic_map
+ */
+ smp_rmb();
+ return;
+ }
+
mutex_lock(&kvm->arch.apic_map_lock);
+ if (!kvm->arch.apic_map_dirty) {
+ /* Someone else has updated the map. */
+ mutex_unlock(&kvm->arch.apic_map_lock);
+ return;
+ }
kvm_for_each_vcpu(i, vcpu, kvm)
if (kvm_apic_present(vcpu))
@@ -236,6 +250,12 @@ out:
old = rcu_dereference_protected(kvm->arch.apic_map,
lockdep_is_held(&kvm->arch.apic_map_lock));
rcu_assign_pointer(kvm->arch.apic_map, new);
+ /*
+ * Write kvm->arch.apic_map before
+ * clearing apic->apic_map_dirty
+ */
+ smp_wmb();
+ kvm->arch.apic_map_dirty = false;
mutex_unlock(&kvm->arch.apic_map_lock);
if (old)
@@ -257,20 +277,20 @@ static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
else
static_key_slow_inc(&apic_sw_disabled.key);
- recalculate_apic_map(apic->vcpu->kvm);
+ apic->vcpu->kvm->arch.apic_map_dirty = true;
}
}
static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id)
{
kvm_lapic_set_reg(apic, APIC_ID, id << 24);
- recalculate_apic_map(apic->vcpu->kvm);
+ apic->vcpu->kvm->arch.apic_map_dirty = true;
}
static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
{
kvm_lapic_set_reg(apic, APIC_LDR, id);
- recalculate_apic_map(apic->vcpu->kvm);
+ apic->vcpu->kvm->arch.apic_map_dirty = true;
}
static inline u32 kvm_apic_calc_x2apic_ldr(u32 id)
@@ -286,7 +306,7 @@ static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id)
kvm_lapic_set_reg(apic, APIC_ID, id);
kvm_lapic_set_reg(apic, APIC_LDR, ldr);
- recalculate_apic_map(apic->vcpu->kvm);
+ apic->vcpu->kvm->arch.apic_map_dirty = true;
}
static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
@@ -294,11 +314,6 @@ static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
return !(kvm_lapic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
}
-static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type)
-{
- return kvm_lapic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK;
-}
-
static inline int apic_lvtt_oneshot(struct kvm_lapic *apic)
{
return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_ONESHOT;
@@ -448,7 +463,7 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
if (unlikely(vcpu->arch.apicv_active)) {
/* need to update RVI */
kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
- kvm_x86_ops->hwapic_irr_update(vcpu,
+ kvm_x86_ops.hwapic_irr_update(vcpu,
apic_find_highest_irr(apic));
} else {
apic->irr_pending = false;
@@ -473,7 +488,7 @@ static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
* just set SVI.
*/
if (unlikely(vcpu->arch.apicv_active))
- kvm_x86_ops->hwapic_isr_update(vcpu, vec);
+ kvm_x86_ops.hwapic_isr_update(vcpu, vec);
else {
++apic->isr_count;
BUG_ON(apic->isr_count > MAX_APIC_VECTOR);
@@ -521,7 +536,7 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
* and must be left alone.
*/
if (unlikely(vcpu->arch.apicv_active))
- kvm_x86_ops->hwapic_isr_update(vcpu,
+ kvm_x86_ops.hwapic_isr_update(vcpu,
apic_find_highest_isr(apic));
else {
--apic->isr_count;
@@ -659,7 +674,7 @@ static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr)
{
int highest_irr;
if (apic->vcpu->arch.apicv_active)
- highest_irr = kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
+ highest_irr = kvm_x86_ops.sync_pir_to_irr(apic->vcpu);
else
highest_irr = apic_find_highest_irr(apic);
if (highest_irr == -1 || (highest_irr & 0xF0) <= ppr)
@@ -1048,7 +1063,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
apic->regs + APIC_TMR);
}
- if (kvm_x86_ops->deliver_posted_interrupt(vcpu, vector)) {
+ if (kvm_x86_ops.deliver_posted_interrupt(vcpu, vector)) {
kvm_lapic_set_irr(vector, apic);
kvm_make_request(KVM_REQ_EVENT, vcpu);
kvm_vcpu_kick(vcpu);
@@ -1226,7 +1241,7 @@ void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
}
EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
-static void apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high)
+void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high)
{
struct kvm_lapic_irq irq;
@@ -1737,7 +1752,7 @@ static void cancel_hv_timer(struct kvm_lapic *apic)
{
WARN_ON(preemptible());
WARN_ON(!apic->lapic_timer.hv_timer_in_use);
- kvm_x86_ops->cancel_hv_timer(apic->vcpu);
+ kvm_x86_ops.cancel_hv_timer(apic->vcpu);
apic->lapic_timer.hv_timer_in_use = false;
}
@@ -1748,13 +1763,13 @@ static bool start_hv_timer(struct kvm_lapic *apic)
bool expired;
WARN_ON(preemptible());
- if (!kvm_x86_ops->set_hv_timer)
+ if (!kvm_x86_ops.set_hv_timer)
return false;
if (!ktimer->tscdeadline)
return false;
- if (kvm_x86_ops->set_hv_timer(vcpu, ktimer->tscdeadline, &expired))
+ if (kvm_x86_ops.set_hv_timer(vcpu, ktimer->tscdeadline, &expired))
return false;
ktimer->hv_timer_in_use = true;
@@ -1917,7 +1932,7 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
case APIC_DFR:
if (!apic_x2apic_mode(apic)) {
kvm_lapic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF);
- recalculate_apic_map(apic->vcpu->kvm);
+ apic->vcpu->kvm->arch.apic_map_dirty = true;
} else
ret = 1;
break;
@@ -1946,7 +1961,7 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
case APIC_ICR:
/* No delay here, so we always clear the pending bit */
val &= ~(1 << 12);
- apic_send_ipi(apic, val, kvm_lapic_get_reg(apic, APIC_ICR2));
+ kvm_apic_send_ipi(apic, val, kvm_lapic_get_reg(apic, APIC_ICR2));
kvm_lapic_set_reg(apic, APIC_ICR, val);
break;
@@ -2023,6 +2038,8 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
break;
}
+ kvm_recalculate_apic_map(apic->vcpu->kvm);
+
return ret;
}
EXPORT_SYMBOL_GPL(kvm_lapic_reg_write);
@@ -2171,7 +2188,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
static_key_slow_dec_deferred(&apic_hw_disabled);
} else {
static_key_slow_inc(&apic_hw_disabled.key);
- recalculate_apic_map(vcpu->kvm);
+ vcpu->kvm->arch.apic_map_dirty = true;
}
}
@@ -2179,7 +2196,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id);
if ((old_value ^ value) & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE))
- kvm_x86_ops->set_virtual_apic_mode(vcpu);
+ kvm_x86_ops.set_virtual_apic_mode(vcpu);
apic->base_address = apic->vcpu->arch.apic_base &
MSR_IA32_APICBASE_BASE;
@@ -2212,6 +2229,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
if (!apic)
return;
+ vcpu->kvm->arch.apic_map_dirty = false;
/* Stop the timer in case it's a reset to an active apic */
hrtimer_cancel(&apic->lapic_timer.timer);
@@ -2256,13 +2274,15 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
vcpu->arch.pv_eoi.msr_val = 0;
apic_update_ppr(apic);
if (vcpu->arch.apicv_active) {
- kvm_x86_ops->apicv_post_state_restore(vcpu);
- kvm_x86_ops->hwapic_irr_update(vcpu, -1);
- kvm_x86_ops->hwapic_isr_update(vcpu, -1);
+ kvm_x86_ops.apicv_post_state_restore(vcpu);
+ kvm_x86_ops.hwapic_irr_update(vcpu, -1);
+ kvm_x86_ops.hwapic_isr_update(vcpu, -1);
}
vcpu->arch.apic_arb_prio = 0;
vcpu->arch.apic_attention = 0;
+
+ kvm_recalculate_apic_map(vcpu->kvm);
}
/*
@@ -2484,17 +2504,18 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
struct kvm_lapic *apic = vcpu->arch.apic;
int r;
-
kvm_lapic_set_base(vcpu, vcpu->arch.apic_base);
/* set SPIV separately to get count of SW disabled APICs right */
apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV)));
r = kvm_apic_state_fixup(vcpu, s, true);
- if (r)
+ if (r) {
+ kvm_recalculate_apic_map(vcpu->kvm);
return r;
+ }
memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s));
- recalculate_apic_map(vcpu->kvm);
+ kvm_recalculate_apic_map(vcpu->kvm);
kvm_apic_set_version(vcpu);
apic_update_ppr(apic);
@@ -2506,10 +2527,10 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
kvm_apic_update_apicv(vcpu);
apic->highest_isr_cache = -1;
if (vcpu->arch.apicv_active) {
- kvm_x86_ops->apicv_post_state_restore(vcpu);
- kvm_x86_ops->hwapic_irr_update(vcpu,
+ kvm_x86_ops.apicv_post_state_restore(vcpu);
+ kvm_x86_ops.hwapic_irr_update(vcpu,
apic_find_highest_irr(apic));
- kvm_x86_ops->hwapic_isr_update(vcpu,
+ kvm_x86_ops.hwapic_isr_update(vcpu,
apic_find_highest_isr(apic));
}
kvm_make_request(KVM_REQ_EVENT, vcpu);
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index ec6fbfe325cf..40ed6ed22751 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -78,6 +78,7 @@ void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu);
void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu);
+void kvm_recalculate_apic_map(struct kvm *kvm);
void kvm_apic_set_version(struct kvm_vcpu *vcpu);
int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val);
int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
@@ -95,6 +96,7 @@ void kvm_apic_update_apicv(struct kvm_vcpu *vcpu);
bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map);
+void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high);
u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index a647601c9e1c..8a3b1bce722a 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -95,11 +95,11 @@ static inline unsigned long kvm_get_active_pcid(struct kvm_vcpu *vcpu)
return kvm_get_pcid(vcpu, kvm_read_cr3(vcpu));
}
-static inline void kvm_mmu_load_cr3(struct kvm_vcpu *vcpu)
+static inline void kvm_mmu_load_pgd(struct kvm_vcpu *vcpu)
{
if (VALID_PAGE(vcpu->arch.mmu->root_hpa))
- vcpu->arch.mmu->set_cr3(vcpu, vcpu->arch.mmu->root_hpa |
- kvm_get_active_pcid(vcpu));
+ kvm_x86_ops.load_mmu_pgd(vcpu, vcpu->arch.mmu->root_hpa |
+ kvm_get_active_pcid(vcpu));
}
int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
@@ -170,8 +170,8 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
unsigned pte_access, unsigned pte_pkey,
unsigned pfec)
{
- int cpl = kvm_x86_ops->get_cpl(vcpu);
- unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
+ int cpl = kvm_x86_ops.get_cpl(vcpu);
+ unsigned long rflags = kvm_x86_ops.get_rflags(vcpu);
/*
* If CPL < 3, SMAP prevention are disabled if EFLAGS.AC = 1.
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 87e9ba27ada1..8071952e9cf2 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -19,6 +19,7 @@
#include "mmu.h"
#include "x86.h"
#include "kvm_cache_regs.h"
+#include "kvm_emulate.h"
#include "cpuid.h"
#include <linux/kvm_host.h>
@@ -86,6 +87,8 @@ __MODULE_PARM_TYPE(nx_huge_pages_recovery_ratio, "uint");
*/
bool tdp_enabled = false;
+static int max_page_level __read_mostly;
+
enum {
AUDIT_PRE_PAGE_FAULT,
AUDIT_POST_PAGE_FAULT,
@@ -215,17 +218,6 @@ struct kvm_shadow_walk_iterator {
unsigned index;
};
-static const union kvm_mmu_page_role mmu_base_role_mask = {
- .cr0_wp = 1,
- .gpte_is_8_bytes = 1,
- .nxe = 1,
- .smep_andnot_wp = 1,
- .smap_andnot_wp = 1,
- .smm = 1,
- .guest_mode = 1,
- .ad_disabled = 1,
-};
-
#define for_each_shadow_entry_using_root(_vcpu, _root, _addr, _walker) \
for (shadow_walk_init_using_root(&(_walker), (_vcpu), \
(_root), (_addr)); \
@@ -313,7 +305,7 @@ kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu);
static inline bool kvm_available_flush_tlb_with_range(void)
{
- return kvm_x86_ops->tlb_remote_flush_with_range;
+ return kvm_x86_ops.tlb_remote_flush_with_range;
}
static void kvm_flush_remote_tlbs_with_range(struct kvm *kvm,
@@ -321,8 +313,8 @@ static void kvm_flush_remote_tlbs_with_range(struct kvm *kvm,
{
int ret = -ENOTSUPP;
- if (range && kvm_x86_ops->tlb_remote_flush_with_range)
- ret = kvm_x86_ops->tlb_remote_flush_with_range(kvm, range);
+ if (range && kvm_x86_ops.tlb_remote_flush_with_range)
+ ret = kvm_x86_ops.tlb_remote_flush_with_range(kvm, range);
if (ret)
kvm_flush_remote_tlbs(kvm);
@@ -1650,7 +1642,7 @@ static bool spte_set_dirty(u64 *sptep)
rmap_printk("rmap_set_dirty: spte %p %llx\n", sptep, *sptep);
/*
- * Similar to the !kvm_x86_ops->slot_disable_log_dirty case,
+ * Similar to the !kvm_x86_ops.slot_disable_log_dirty case,
* do not bother adding back write access to pages marked
* SPTE_AD_WRPROT_ONLY_MASK.
*/
@@ -1739,8 +1731,8 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
struct kvm_memory_slot *slot,
gfn_t gfn_offset, unsigned long mask)
{
- if (kvm_x86_ops->enable_log_dirty_pt_masked)
- kvm_x86_ops->enable_log_dirty_pt_masked(kvm, slot, gfn_offset,
+ if (kvm_x86_ops.enable_log_dirty_pt_masked)
+ kvm_x86_ops.enable_log_dirty_pt_masked(kvm, slot, gfn_offset,
mask);
else
kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
@@ -1755,8 +1747,8 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
*/
int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu)
{
- if (kvm_x86_ops->write_log_dirty)
- return kvm_x86_ops->write_log_dirty(vcpu);
+ if (kvm_x86_ops.write_log_dirty)
+ return kvm_x86_ops.write_log_dirty(vcpu);
return 0;
}
@@ -3044,7 +3036,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
if (level > PT_PAGE_TABLE_LEVEL)
spte |= PT_PAGE_SIZE_MASK;
if (tdp_enabled)
- spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn,
+ spte |= kvm_x86_ops.get_mt_mask(vcpu, gfn,
kvm_is_mmio_pfn(pfn));
if (host_writable)
@@ -3292,7 +3284,7 @@ static int kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, gfn_t gfn,
if (!slot)
return PT_PAGE_TABLE_LEVEL;
- max_level = min(max_level, kvm_x86_ops->get_lpage_level());
+ max_level = min(max_level, max_page_level);
for ( ; max_level > PT_PAGE_TABLE_LEVEL; max_level--) {
linfo = lpage_info_slot(gfn, slot, max_level);
if (!linfo->disallow_lpage)
@@ -3568,8 +3560,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
* write-protected for dirty-logging or access tracking.
*/
if ((error_code & PFERR_WRITE_MASK) &&
- spte_can_locklessly_be_made_writable(spte))
- {
+ spte_can_locklessly_be_made_writable(spte)) {
new_spte |= PT_WRITABLE_MASK;
/*
@@ -3731,7 +3722,9 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->pae_root);
} else
BUG();
- vcpu->arch.mmu->root_cr3 = vcpu->arch.mmu->get_cr3(vcpu);
+
+ /* root_cr3 is ignored for direct MMUs. */
+ vcpu->arch.mmu->root_cr3 = 0;
return 0;
}
@@ -3743,7 +3736,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
gfn_t root_gfn, root_cr3;
int i;
- root_cr3 = vcpu->arch.mmu->get_cr3(vcpu);
+ root_cr3 = vcpu->arch.mmu->get_guest_pgd(vcpu);
root_gfn = root_cr3 >> PAGE_SHIFT;
if (mmu_check_root(vcpu, root_gfn))
@@ -4080,7 +4073,7 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
arch.token = (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id;
arch.gfn = gfn;
arch.direct_map = vcpu->arch.mmu->direct_map;
- arch.cr3 = vcpu->arch.mmu->get_cr3(vcpu);
+ arch.cr3 = vcpu->arch.mmu->get_guest_pgd(vcpu);
return kvm_setup_async_pf(vcpu, cr2_or_gpa,
kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
@@ -4252,6 +4245,14 @@ static void nonpaging_init_context(struct kvm_vcpu *vcpu,
context->nx = false;
}
+static inline bool is_root_usable(struct kvm_mmu_root_info *root, gpa_t cr3,
+ union kvm_mmu_page_role role)
+{
+ return (role.direct || cr3 == root->cr3) &&
+ VALID_PAGE(root->hpa) && page_header(root->hpa) &&
+ role.word == page_header(root->hpa)->role.word;
+}
+
/*
* Find out if a previously cached root matching the new CR3/role is available.
* The current root is also inserted into the cache.
@@ -4270,12 +4271,13 @@ static bool cached_root_available(struct kvm_vcpu *vcpu, gpa_t new_cr3,
root.cr3 = mmu->root_cr3;
root.hpa = mmu->root_hpa;
+ if (is_root_usable(&root, new_cr3, new_role))
+ return true;
+
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
swap(root, mmu->prev_roots[i]);
- if (new_cr3 == root.cr3 && VALID_PAGE(root.hpa) &&
- page_header(root.hpa) != NULL &&
- new_role.word == page_header(root.hpa)->role.word)
+ if (is_root_usable(&root, new_cr3, new_role))
break;
}
@@ -4309,7 +4311,7 @@ static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3,
* accompanied by KVM_REQ_MMU_RELOAD, which will free
* the root set here and allocate a new one.
*/
- kvm_make_request(KVM_REQ_LOAD_CR3, vcpu);
+ kvm_make_request(KVM_REQ_LOAD_MMU_PGD, vcpu);
if (!skip_tlb_flush) {
kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
@@ -4508,7 +4510,8 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
cpuid_maxphyaddr(vcpu), context->root_level,
context->nx,
guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES),
- is_pse(vcpu), guest_cpuid_is_amd(vcpu));
+ is_pse(vcpu),
+ guest_cpuid_is_amd_or_hygon(vcpu));
}
static void
@@ -4874,7 +4877,6 @@ static union kvm_mmu_extended_role kvm_calc_mmu_role_ext(struct kvm_vcpu *vcpu)
ext.cr4_smap = !!kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
ext.cr4_pse = !!is_pse(vcpu);
ext.cr4_pke = !!kvm_read_cr4_bits(vcpu, X86_CR4_PKE);
- ext.cr4_la57 = !!kvm_read_cr4_bits(vcpu, X86_CR4_LA57);
ext.maxphyaddr = cpuid_maxphyaddr(vcpu);
ext.valid = 1;
@@ -4907,7 +4909,7 @@ kvm_calc_tdp_mmu_root_page_role(struct kvm_vcpu *vcpu, bool base_only)
union kvm_mmu_role role = kvm_calc_mmu_role_common(vcpu, base_only);
role.base.ad_disabled = (shadow_accessed_mask == 0);
- role.base.level = kvm_x86_ops->get_tdp_level(vcpu);
+ role.base.level = kvm_x86_ops.get_tdp_level(vcpu);
role.base.direct = true;
role.base.gpte_is_8_bytes = true;
@@ -4920,7 +4922,6 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
union kvm_mmu_role new_role =
kvm_calc_tdp_mmu_root_page_role(vcpu, false);
- new_role.base.word &= mmu_base_role_mask.word;
if (new_role.as_u64 == context->mmu_role.as_u64)
return;
@@ -4929,10 +4930,9 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
context->sync_page = nonpaging_sync_page;
context->invlpg = nonpaging_invlpg;
context->update_pte = nonpaging_update_pte;
- context->shadow_root_level = kvm_x86_ops->get_tdp_level(vcpu);
+ context->shadow_root_level = kvm_x86_ops.get_tdp_level(vcpu);
context->direct_map = true;
- context->set_cr3 = kvm_x86_ops->set_tdp_cr3;
- context->get_cr3 = get_cr3;
+ context->get_guest_pgd = get_cr3;
context->get_pdptr = kvm_pdptr_read;
context->inject_page_fault = kvm_inject_page_fault;
@@ -4992,7 +4992,6 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
union kvm_mmu_role new_role =
kvm_calc_shadow_mmu_root_page_role(vcpu, false);
- new_role.base.word &= mmu_base_role_mask.word;
if (new_role.as_u64 == context->mmu_role.as_u64)
return;
@@ -5012,14 +5011,14 @@ EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
static union kvm_mmu_role
kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty,
- bool execonly)
+ bool execonly, u8 level)
{
union kvm_mmu_role role = {0};
/* SMM flag is inherited from root_mmu */
role.base.smm = vcpu->arch.root_mmu.mmu_role.base.smm;
- role.base.level = PT64_ROOT_4LEVEL;
+ role.base.level = level;
role.base.gpte_is_8_bytes = true;
role.base.direct = false;
role.base.ad_disabled = !accessed_dirty;
@@ -5043,17 +5042,17 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
bool accessed_dirty, gpa_t new_eptp)
{
struct kvm_mmu *context = vcpu->arch.mmu;
+ u8 level = vmx_eptp_page_walk_level(new_eptp);
union kvm_mmu_role new_role =
kvm_calc_shadow_ept_root_page_role(vcpu, accessed_dirty,
- execonly);
+ execonly, level);
__kvm_mmu_new_cr3(vcpu, new_eptp, new_role.base, false);
- new_role.base.word &= mmu_base_role_mask.word;
if (new_role.as_u64 == context->mmu_role.as_u64)
return;
- context->shadow_root_level = PT64_ROOT_4LEVEL;
+ context->shadow_root_level = level;
context->nx = true;
context->ept_ad = accessed_dirty;
@@ -5062,7 +5061,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
context->sync_page = ept_sync_page;
context->invlpg = ept_invlpg;
context->update_pte = ept_update_pte;
- context->root_level = PT64_ROOT_4LEVEL;
+ context->root_level = level;
context->direct_map = false;
context->mmu_role.as_u64 = new_role.as_u64;
@@ -5079,8 +5078,7 @@ static void init_kvm_softmmu(struct kvm_vcpu *vcpu)
struct kvm_mmu *context = vcpu->arch.mmu;
kvm_init_shadow_mmu(vcpu);
- context->set_cr3 = kvm_x86_ops->set_cr3;
- context->get_cr3 = get_cr3;
+ context->get_guest_pgd = get_cr3;
context->get_pdptr = kvm_pdptr_read;
context->inject_page_fault = kvm_inject_page_fault;
}
@@ -5090,12 +5088,11 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
union kvm_mmu_role new_role = kvm_calc_mmu_role_common(vcpu, false);
struct kvm_mmu *g_context = &vcpu->arch.nested_mmu;
- new_role.base.word &= mmu_base_role_mask.word;
if (new_role.as_u64 == g_context->mmu_role.as_u64)
return;
g_context->mmu_role.as_u64 = new_role.as_u64;
- g_context->get_cr3 = get_cr3;
+ g_context->get_guest_pgd = get_cr3;
g_context->get_pdptr = kvm_pdptr_read;
g_context->inject_page_fault = kvm_inject_page_fault;
@@ -5185,8 +5182,8 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
kvm_mmu_sync_roots(vcpu);
if (r)
goto out;
- kvm_mmu_load_cr3(vcpu);
- kvm_x86_ops->tlb_flush(vcpu, true);
+ kvm_mmu_load_pgd(vcpu);
+ kvm_x86_ops.tlb_flush(vcpu, true);
out:
return r;
}
@@ -5329,6 +5326,22 @@ static u64 *get_written_sptes(struct kvm_mmu_page *sp, gpa_t gpa, int *nspte)
return spte;
}
+/*
+ * Ignore various flags when determining if a SPTE can be immediately
+ * overwritten for the current MMU.
+ * - level: explicitly checked in mmu_pte_write_new_pte(), and will never
+ * match the current MMU role, as MMU's level tracks the root level.
+ * - access: updated based on the new guest PTE
+ * - quadrant: handled by get_written_sptes()
+ * - invalid: always false (loop only walks valid shadow pages)
+ */
+static const union kvm_mmu_page_role role_ign = {
+ .level = 0xf,
+ .access = 0x7,
+ .quadrant = 0x3,
+ .invalid = 0x1,
+};
+
static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
const u8 *new, int bytes,
struct kvm_page_track_notifier_node *node)
@@ -5384,8 +5397,8 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
entry = *spte;
mmu_page_zap_pte(vcpu->kvm, sp, spte);
if (gentry &&
- !((sp->role.word ^ base_role)
- & mmu_base_role_mask.word) && rmap_can_add(vcpu))
+ !((sp->role.word ^ base_role) & ~role_ign.word) &&
+ rmap_can_add(vcpu))
mmu_pte_write_new_pte(vcpu, sp, spte, &gentry);
if (need_remote_flush(entry, *spte))
remote_flush = true;
@@ -5416,18 +5429,12 @@ EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt);
int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
void *insn, int insn_len)
{
- int r, emulation_type = 0;
+ int r, emulation_type = EMULTYPE_PF;
bool direct = vcpu->arch.mmu->direct_map;
if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa)))
return RET_PF_RETRY;
- /* With shadow page tables, fault_address contains a GVA or nGPA. */
- if (vcpu->arch.mmu->direct_map) {
- vcpu->arch.gpa_available = true;
- vcpu->arch.gpa_val = cr2_or_gpa;
- }
-
r = RET_PF_INVALID;
if (unlikely(error_code & PFERR_RSVD_MASK)) {
r = handle_mmio_page_fault(vcpu, cr2_or_gpa, direct);
@@ -5471,7 +5478,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
* for L1 isn't going to magically fix whatever issue cause L2 to fail.
*/
if (!mmio_info_in_cache(vcpu, cr2_or_gpa, direct) && !is_guest_mode(vcpu))
- emulation_type = EMULTYPE_ALLOW_RETRY;
+ emulation_type |= EMULTYPE_ALLOW_RETRY_PF;
emulate:
/*
* On AMD platforms, under certain conditions insn_len may be zero on #NPF.
@@ -5481,7 +5488,7 @@ emulate:
* guest, with the exception of AMD Erratum 1096 which is unrecoverable.
*/
if (unlikely(insn && !insn_len)) {
- if (!kvm_x86_ops->need_emulation_on_page_fault(vcpu))
+ if (!kvm_x86_ops.need_emulation_on_page_fault(vcpu))
return 1;
}
@@ -5516,7 +5523,7 @@ void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
if (VALID_PAGE(mmu->prev_roots[i].hpa))
mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
- kvm_x86_ops->tlb_flush_gva(vcpu, gva);
+ kvm_x86_ops.tlb_flush_gva(vcpu, gva);
++vcpu->stat.invlpg;
}
EXPORT_SYMBOL_GPL(kvm_mmu_invlpg);
@@ -5541,7 +5548,7 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid)
}
if (tlb_flush)
- kvm_x86_ops->tlb_flush_gva(vcpu, gva);
+ kvm_x86_ops.tlb_flush_gva(vcpu, gva);
++vcpu->stat.invlpg;
@@ -5553,18 +5560,25 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid)
}
EXPORT_SYMBOL_GPL(kvm_mmu_invpcid_gva);
-void kvm_enable_tdp(void)
+void kvm_configure_mmu(bool enable_tdp, int tdp_page_level)
{
- tdp_enabled = true;
-}
-EXPORT_SYMBOL_GPL(kvm_enable_tdp);
+ tdp_enabled = enable_tdp;
-void kvm_disable_tdp(void)
-{
- tdp_enabled = false;
+ /*
+ * max_page_level reflects the capabilities of KVM's MMU irrespective
+ * of kernel support, e.g. KVM may be capable of using 1GB pages when
+ * the kernel is not. But, KVM never creates a page size greater than
+ * what is used by the kernel for any given HVA, i.e. the kernel's
+ * capabilities are ultimately consulted by kvm_mmu_hugepage_adjust().
+ */
+ if (tdp_enabled)
+ max_page_level = tdp_page_level;
+ else if (boot_cpu_has(X86_FEATURE_GBPAGES))
+ max_page_level = PT_PDPE_LEVEL;
+ else
+ max_page_level = PT_DIRECTORY_LEVEL;
}
-EXPORT_SYMBOL_GPL(kvm_disable_tdp);
-
+EXPORT_SYMBOL_GPL(kvm_configure_mmu);
/* The return value indicates if tlb flush on all vcpus is needed. */
typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head);
@@ -5658,7 +5672,7 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
* SVM's 32-bit NPT support, TDP paging doesn't use PAE paging and can
* skip allocating the PDP table.
*/
- if (tdp_enabled && kvm_x86_ops->get_tdp_level(vcpu) > PT32E_ROOT_LEVEL)
+ if (tdp_enabled && kvm_x86_ops.get_tdp_level(vcpu) > PT32E_ROOT_LEVEL)
return 0;
page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_DMA32);
@@ -5860,23 +5874,17 @@ static bool slot_rmap_write_protect(struct kvm *kvm,
}
void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
- struct kvm_memory_slot *memslot)
+ struct kvm_memory_slot *memslot,
+ int start_level)
{
bool flush;
spin_lock(&kvm->mmu_lock);
- flush = slot_handle_all_level(kvm, memslot, slot_rmap_write_protect,
- false);
+ flush = slot_handle_level(kvm, memslot, slot_rmap_write_protect,
+ start_level, PT_MAX_HUGEPAGE_LEVEL, false);
spin_unlock(&kvm->mmu_lock);
/*
- * kvm_mmu_slot_remove_write_access() and kvm_vm_ioctl_get_dirty_log()
- * which do tlb flush out of mmu-lock should be serialized by
- * kvm->slots_lock otherwise tlb flush would be missed.
- */
- lockdep_assert_held(&kvm->slots_lock);
-
- /*
* We can flush all the TLBs out of the mmu lock without TLB
* corruption since we just change the spte from writable to
* readonly so that we only need to care the case of changing
@@ -5888,8 +5896,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
* on PT_WRITABLE_MASK anymore.
*/
if (flush)
- kvm_flush_remote_tlbs_with_address(kvm, memslot->base_gfn,
- memslot->npages);
+ kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
}
static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
@@ -5941,6 +5948,21 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
spin_unlock(&kvm->mmu_lock);
}
+void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *memslot)
+{
+ /*
+ * All current use cases for flushing the TLBs for a specific memslot
+ * are related to dirty logging, and do the TLB flush out of mmu_lock.
+ * The interaction between the various operations on memslot must be
+ * serialized by slots_locks to ensure the TLB flush from one operation
+ * is observed by any other operation on the same memslot.
+ */
+ lockdep_assert_held(&kvm->slots_lock);
+ kvm_flush_remote_tlbs_with_address(kvm, memslot->base_gfn,
+ memslot->npages);
+}
+
void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
struct kvm_memory_slot *memslot)
{
@@ -5950,8 +5972,6 @@ void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
flush = slot_handle_leaf(kvm, memslot, __rmap_clear_dirty, false);
spin_unlock(&kvm->mmu_lock);
- lockdep_assert_held(&kvm->slots_lock);
-
/*
* It's also safe to flush TLBs out of mmu lock here as currently this
* function is only used for dirty logging, in which case flushing TLB
@@ -5959,8 +5979,7 @@ void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
* dirty_bitmap.
*/
if (flush)
- kvm_flush_remote_tlbs_with_address(kvm, memslot->base_gfn,
- memslot->npages);
+ kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
}
EXPORT_SYMBOL_GPL(kvm_mmu_slot_leaf_clear_dirty);
@@ -5974,12 +5993,8 @@ void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm,
false);
spin_unlock(&kvm->mmu_lock);
- /* see kvm_mmu_slot_remove_write_access */
- lockdep_assert_held(&kvm->slots_lock);
-
if (flush)
- kvm_flush_remote_tlbs_with_address(kvm, memslot->base_gfn,
- memslot->npages);
+ kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
}
EXPORT_SYMBOL_GPL(kvm_mmu_slot_largepage_remove_write_access);
@@ -5992,12 +6007,8 @@ void kvm_mmu_slot_set_dirty(struct kvm *kvm,
flush = slot_handle_all_level(kvm, memslot, __rmap_set_dirty, false);
spin_unlock(&kvm->mmu_lock);
- lockdep_assert_held(&kvm->slots_lock);
-
- /* see kvm_mmu_slot_leaf_clear_dirty */
if (flush)
- kvm_flush_remote_tlbs_with_address(kvm, memslot->base_gfn,
- memslot->npages);
+ kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
}
EXPORT_SYMBOL_GPL(kvm_mmu_slot_set_dirty);
diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c
index 3521e2d176f2..ddc1ec3bdacd 100644
--- a/arch/x86/kvm/mmu/page_track.c
+++ b/arch/x86/kvm/mmu/page_track.c
@@ -14,22 +14,18 @@
#include <linux/kvm_host.h>
#include <linux/rculist.h>
-#include <asm/kvm_host.h>
#include <asm/kvm_page_track.h>
#include "mmu.h"
-void kvm_page_track_free_memslot(struct kvm_memory_slot *free,
- struct kvm_memory_slot *dont)
+void kvm_page_track_free_memslot(struct kvm_memory_slot *slot)
{
int i;
- for (i = 0; i < KVM_PAGE_TRACK_MAX; i++)
- if (!dont || free->arch.gfn_track[i] !=
- dont->arch.gfn_track[i]) {
- kvfree(free->arch.gfn_track[i]);
- free->arch.gfn_track[i] = NULL;
- }
+ for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) {
+ kvfree(slot->arch.gfn_track[i]);
+ slot->arch.gfn_track[i] = NULL;
+ }
}
int kvm_page_track_create_memslot(struct kvm_memory_slot *slot,
@@ -48,7 +44,7 @@ int kvm_page_track_create_memslot(struct kvm_memory_slot *slot,
return 0;
track_free:
- kvm_page_track_free_memslot(slot, NULL);
+ kvm_page_track_free_memslot(slot);
return -ENOMEM;
}
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 21a3320f166a..9bdf9b7d9a96 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -66,7 +66,7 @@
#define PT_GUEST_ACCESSED_SHIFT 8
#define PT_HAVE_ACCESSED_DIRTY(mmu) ((mmu)->ept_ad)
#define CMPXCHG cmpxchg64
- #define PT_MAX_FULL_LEVELS 4
+ #define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL
#else
#error Invalid PTTYPE value
#endif
@@ -333,7 +333,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
trace_kvm_mmu_pagetable_walk(addr, access);
retry_walk:
walker->level = mmu->root_level;
- pte = mmu->get_cr3(vcpu);
+ pte = mmu->get_guest_pgd(vcpu);
have_ad = PT_HAVE_ACCESSED_DIRTY(mmu);
#if PTTYPE == 64
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index bcc6a73d6628..a5078841bdac 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -111,7 +111,7 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
.config = config,
};
- attr.sample_period = (-pmc->counter) & pmc_bitmask(pmc);
+ attr.sample_period = get_sample_period(pmc, pmc->counter);
if (in_tx)
attr.config |= HSW_IN_TX;
@@ -158,7 +158,7 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc)
/* recalibrate sample period and check if it's accepted by perf core */
if (perf_event_period(pmc->perf_event,
- (-pmc->counter) & pmc_bitmask(pmc)))
+ get_sample_period(pmc, pmc->counter)))
return false;
/* reuse perf_event to serve as pmc_reprogram_counter() does*/
@@ -211,7 +211,7 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
ARCH_PERFMON_EVENTSEL_CMASK |
HSW_IN_TX |
HSW_IN_TX_CHECKPOINTED))) {
- config = kvm_x86_ops->pmu_ops->find_arch_event(pmc_to_pmu(pmc),
+ config = kvm_x86_ops.pmu_ops->find_arch_event(pmc_to_pmu(pmc),
event_select,
unit_mask);
if (config != PERF_COUNT_HW_MAX)
@@ -265,7 +265,7 @@ void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx)
pmc->current_config = (u64)ctrl;
pmc_reprogram_counter(pmc, PERF_TYPE_HARDWARE,
- kvm_x86_ops->pmu_ops->find_fixed_event(idx),
+ kvm_x86_ops.pmu_ops->find_fixed_event(idx),
!(en_field & 0x2), /* exclude user */
!(en_field & 0x1), /* exclude kernel */
pmi, false, false);
@@ -274,7 +274,7 @@ EXPORT_SYMBOL_GPL(reprogram_fixed_counter);
void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx)
{
- struct kvm_pmc *pmc = kvm_x86_ops->pmu_ops->pmc_idx_to_pmc(pmu, pmc_idx);
+ struct kvm_pmc *pmc = kvm_x86_ops.pmu_ops->pmc_idx_to_pmc(pmu, pmc_idx);
if (!pmc)
return;
@@ -296,7 +296,7 @@ void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
int bit;
for_each_set_bit(bit, pmu->reprogram_pmi, X86_PMC_IDX_MAX) {
- struct kvm_pmc *pmc = kvm_x86_ops->pmu_ops->pmc_idx_to_pmc(pmu, bit);
+ struct kvm_pmc *pmc = kvm_x86_ops.pmu_ops->pmc_idx_to_pmc(pmu, bit);
if (unlikely(!pmc || !pmc->perf_event)) {
clear_bit(bit, pmu->reprogram_pmi);
@@ -318,7 +318,7 @@ void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
/* check if idx is a valid index to access PMU */
int kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
{
- return kvm_x86_ops->pmu_ops->is_valid_rdpmc_ecx(vcpu, idx);
+ return kvm_x86_ops.pmu_ops->is_valid_rdpmc_ecx(vcpu, idx);
}
bool is_vmware_backdoor_pmc(u32 pmc_idx)
@@ -368,7 +368,7 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
if (is_vmware_backdoor_pmc(idx))
return kvm_pmu_rdpmc_vmware(vcpu, idx, data);
- pmc = kvm_x86_ops->pmu_ops->rdpmc_ecx_to_pmc(vcpu, idx, &mask);
+ pmc = kvm_x86_ops.pmu_ops->rdpmc_ecx_to_pmc(vcpu, idx, &mask);
if (!pmc)
return 1;
@@ -384,14 +384,14 @@ void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu)
bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
{
- return kvm_x86_ops->pmu_ops->msr_idx_to_pmc(vcpu, msr) ||
- kvm_x86_ops->pmu_ops->is_valid_msr(vcpu, msr);
+ return kvm_x86_ops.pmu_ops->msr_idx_to_pmc(vcpu, msr) ||
+ kvm_x86_ops.pmu_ops->is_valid_msr(vcpu, msr);
}
static void kvm_pmu_mark_pmc_in_use(struct kvm_vcpu *vcpu, u32 msr)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
- struct kvm_pmc *pmc = kvm_x86_ops->pmu_ops->msr_idx_to_pmc(vcpu, msr);
+ struct kvm_pmc *pmc = kvm_x86_ops.pmu_ops->msr_idx_to_pmc(vcpu, msr);
if (pmc)
__set_bit(pmc->idx, pmu->pmc_in_use);
@@ -399,13 +399,13 @@ static void kvm_pmu_mark_pmc_in_use(struct kvm_vcpu *vcpu, u32 msr)
int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
{
- return kvm_x86_ops->pmu_ops->get_msr(vcpu, msr, data);
+ return kvm_x86_ops.pmu_ops->get_msr(vcpu, msr, data);
}
int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
kvm_pmu_mark_pmc_in_use(vcpu, msr_info->index);
- return kvm_x86_ops->pmu_ops->set_msr(vcpu, msr_info);
+ return kvm_x86_ops.pmu_ops->set_msr(vcpu, msr_info);
}
/* refresh PMU settings. This function generally is called when underlying
@@ -414,7 +414,7 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
*/
void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
{
- kvm_x86_ops->pmu_ops->refresh(vcpu);
+ kvm_x86_ops.pmu_ops->refresh(vcpu);
}
void kvm_pmu_reset(struct kvm_vcpu *vcpu)
@@ -422,7 +422,7 @@ void kvm_pmu_reset(struct kvm_vcpu *vcpu)
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
irq_work_sync(&pmu->irq_work);
- kvm_x86_ops->pmu_ops->reset(vcpu);
+ kvm_x86_ops.pmu_ops->reset(vcpu);
}
void kvm_pmu_init(struct kvm_vcpu *vcpu)
@@ -430,7 +430,7 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu)
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
memset(pmu, 0, sizeof(*pmu));
- kvm_x86_ops->pmu_ops->init(vcpu);
+ kvm_x86_ops.pmu_ops->init(vcpu);
init_irq_work(&pmu->irq_work, kvm_pmi_trigger_fn);
pmu->event_count = 0;
pmu->need_cleanup = false;
@@ -462,7 +462,7 @@ void kvm_pmu_cleanup(struct kvm_vcpu *vcpu)
pmu->pmc_in_use, X86_PMC_IDX_MAX);
for_each_set_bit(i, bitmask, X86_PMC_IDX_MAX) {
- pmc = kvm_x86_ops->pmu_ops->pmc_idx_to_pmc(pmu, i);
+ pmc = kvm_x86_ops.pmu_ops->pmc_idx_to_pmc(pmu, i);
if (pmc && pmc->perf_event && !pmc_speculative_in_use(pmc))
pmc_stop_counter(pmc);
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index 13332984b6d5..a6c78a797cb1 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -88,7 +88,7 @@ static inline bool pmc_is_fixed(struct kvm_pmc *pmc)
static inline bool pmc_is_enabled(struct kvm_pmc *pmc)
{
- return kvm_x86_ops->pmu_ops->pmc_is_enabled(pmc);
+ return kvm_x86_ops.pmu_ops->pmc_is_enabled(pmc);
}
static inline bool kvm_valid_perf_global_ctrl(struct kvm_pmu *pmu,
@@ -129,6 +129,15 @@ static inline struct kvm_pmc *get_fixed_pmc(struct kvm_pmu *pmu, u32 msr)
return NULL;
}
+static inline u64 get_sample_period(struct kvm_pmc *pmc, u64 counter_value)
+{
+ u64 sample_period = (-counter_value) & pmc_bitmask(pmc);
+
+ if (!sample_period)
+ sample_period = pmc_bitmask(pmc) + 1;
+ return sample_period;
+}
+
void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel);
void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int fixed_idx);
void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 216364cb65a3..851e9cc79930 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -522,10 +522,31 @@ static void recalc_intercepts(struct vcpu_svm *svm)
h = &svm->nested.hsave->control;
g = &svm->nested;
- c->intercept_cr = h->intercept_cr | g->intercept_cr;
- c->intercept_dr = h->intercept_dr | g->intercept_dr;
- c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions;
- c->intercept = h->intercept | g->intercept;
+ c->intercept_cr = h->intercept_cr;
+ c->intercept_dr = h->intercept_dr;
+ c->intercept_exceptions = h->intercept_exceptions;
+ c->intercept = h->intercept;
+
+ if (svm->vcpu.arch.hflags & HF_VINTR_MASK) {
+ /* We only want the cr8 intercept bits of L1 */
+ c->intercept_cr &= ~(1U << INTERCEPT_CR8_READ);
+ c->intercept_cr &= ~(1U << INTERCEPT_CR8_WRITE);
+
+ /*
+ * Once running L2 with HF_VINTR_MASK, EFLAGS.IF does not
+ * affect any interrupt we may want to inject; therefore,
+ * interrupt window vmexits are irrelevant to L0.
+ */
+ c->intercept &= ~(1ULL << INTERCEPT_VINTR);
+ }
+
+ /* We don't want to see VMMCALLs from a nested guest */
+ c->intercept &= ~(1ULL << INTERCEPT_VMMCALL);
+
+ c->intercept_cr |= g->intercept_cr;
+ c->intercept_dr |= g->intercept_dr;
+ c->intercept_exceptions |= g->intercept_exceptions;
+ c->intercept |= g->intercept;
}
static inline struct vmcb *get_host_vmcb(struct vcpu_svm *svm)
@@ -630,6 +651,11 @@ static inline void clr_intercept(struct vcpu_svm *svm, int bit)
recalc_intercepts(svm);
}
+static inline bool is_intercept(struct vcpu_svm *svm, int bit)
+{
+ return (svm->vmcb->control.intercept & (1ULL << bit)) != 0;
+}
+
static inline bool vgif_enabled(struct vcpu_svm *svm)
{
return !!(svm->vmcb->control.int_ctl & V_GIF_ENABLE_MASK);
@@ -1209,6 +1235,7 @@ static int avic_ga_log_notifier(u32 ga_tag)
u32 vcpu_id = AVIC_GATAG_TO_VCPUID(ga_tag);
pr_debug("SVM: %s: vm_id=%#x, vcpu_id=%#x\n", __func__, vm_id, vcpu_id);
+ trace_kvm_avic_ga_log(vm_id, vcpu_id);
spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
hash_for_each_possible(svm_vm_data_hash, kvm_svm, hnode, vm_id) {
@@ -1370,6 +1397,29 @@ static void svm_hardware_teardown(void)
iopm_base = 0;
}
+static __init void svm_set_cpu_caps(void)
+{
+ kvm_set_cpu_caps();
+
+ supported_xss = 0;
+
+ /* CPUID 0x80000001 and 0x8000000A (SVM features) */
+ if (nested) {
+ kvm_cpu_cap_set(X86_FEATURE_SVM);
+
+ if (nrips)
+ kvm_cpu_cap_set(X86_FEATURE_NRIPS);
+
+ if (npt_enabled)
+ kvm_cpu_cap_set(X86_FEATURE_NPT);
+ }
+
+ /* CPUID 0x80000008 */
+ if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) ||
+ boot_cpu_has(X86_FEATURE_AMD_SSBD))
+ kvm_cpu_cap_set(X86_FEATURE_VIRT_SSBD);
+}
+
static __init int svm_hardware_setup(void)
{
int cpu;
@@ -1388,6 +1438,8 @@ static __init int svm_hardware_setup(void)
init_msrpm_offsets();
+ supported_xcr0 &= ~(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR);
+
if (boot_cpu_has(X86_FEATURE_NX))
kvm_enable_efer_bits(EFER_NX);
@@ -1435,16 +1487,11 @@ static __init int svm_hardware_setup(void)
if (!boot_cpu_has(X86_FEATURE_NPT))
npt_enabled = false;
- if (npt_enabled && !npt) {
- printk(KERN_INFO "kvm: Nested Paging disabled\n");
+ if (npt_enabled && !npt)
npt_enabled = false;
- }
- if (npt_enabled) {
- printk(KERN_INFO "kvm: Nested Paging enabled\n");
- kvm_enable_tdp();
- } else
- kvm_disable_tdp();
+ kvm_configure_mmu(npt_enabled, PT_PDPE_LEVEL);
+ pr_info("kvm: Nested Paging %sabled\n", npt_enabled ? "en" : "dis");
if (nrips) {
if (!boot_cpu_has(X86_FEATURE_NRIPS))
@@ -1480,6 +1527,8 @@ static __init int svm_hardware_setup(void)
pr_info("Virtual GIF supported\n");
}
+ svm_set_cpu_caps();
+
return 0;
err:
@@ -1939,19 +1988,6 @@ static void __unregister_enc_region_locked(struct kvm *kvm,
kfree(region);
}
-static struct kvm *svm_vm_alloc(void)
-{
- struct kvm_svm *kvm_svm = __vmalloc(sizeof(struct kvm_svm),
- GFP_KERNEL_ACCOUNT | __GFP_ZERO,
- PAGE_KERNEL);
- return &kvm_svm->kvm;
-}
-
-static void svm_vm_free(struct kvm *kvm)
-{
- vfree(to_kvm_svm(kvm));
-}
-
static void sev_vm_destroy(struct kvm *kvm)
{
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
@@ -2186,7 +2222,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
}
init_vmcb(svm);
- kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, true);
+ kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, false);
kvm_rdx_write(vcpu, eax);
if (kvm_vcpu_apicv_active(vcpu) && !init_event)
@@ -2420,14 +2456,38 @@ static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
}
}
+static inline void svm_enable_vintr(struct vcpu_svm *svm)
+{
+ struct vmcb_control_area *control;
+
+ /* The following fields are ignored when AVIC is enabled */
+ WARN_ON(kvm_vcpu_apicv_active(&svm->vcpu));
+
+ /*
+ * This is just a dummy VINTR to actually cause a vmexit to happen.
+ * Actual injection of virtual interrupts happens through EVENTINJ.
+ */
+ control = &svm->vmcb->control;
+ control->int_vector = 0x0;
+ control->int_ctl &= ~V_INTR_PRIO_MASK;
+ control->int_ctl |= V_IRQ_MASK |
+ ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT);
+ mark_dirty(svm->vmcb, VMCB_INTR);
+}
+
static void svm_set_vintr(struct vcpu_svm *svm)
{
set_intercept(svm, INTERCEPT_VINTR);
+ if (is_intercept(svm, INTERCEPT_VINTR))
+ svm_enable_vintr(svm);
}
static void svm_clear_vintr(struct vcpu_svm *svm)
{
clr_intercept(svm, INTERCEPT_VINTR);
+
+ svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
+ mark_dirty(svm->vmcb, VMCB_INTR);
}
static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg)
@@ -2983,15 +3043,6 @@ static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
return pdpte;
}
-static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu,
- unsigned long root)
-{
- struct vcpu_svm *svm = to_svm(vcpu);
-
- svm->vmcb->control.nested_cr3 = __sme_set(root);
- mark_dirty(svm->vmcb, VMCB_NPT);
-}
-
static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
struct x86_exception *fault)
{
@@ -3027,8 +3078,7 @@ static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
vcpu->arch.mmu = &vcpu->arch.guest_mmu;
kvm_init_shadow_mmu(vcpu);
- vcpu->arch.mmu->set_cr3 = nested_svm_set_tdp_cr3;
- vcpu->arch.mmu->get_cr3 = nested_svm_get_tdp_cr3;
+ vcpu->arch.mmu->get_guest_pgd = nested_svm_get_tdp_cr3;
vcpu->arch.mmu->get_pdptr = nested_svm_get_tdp_pdptr;
vcpu->arch.mmu->inject_page_fault = nested_svm_inject_npf_exit;
vcpu->arch.mmu->shadow_root_level = get_npt_level(vcpu);
@@ -3089,43 +3139,36 @@ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
return vmexit;
}
-/* This function returns true if it is save to enable the irq window */
-static inline bool nested_svm_intr(struct vcpu_svm *svm)
+static void nested_svm_intr(struct vcpu_svm *svm)
{
- if (!is_guest_mode(&svm->vcpu))
- return true;
-
- if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
- return true;
-
- if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))
- return false;
-
- /*
- * if vmexit was already requested (by intercepted exception
- * for instance) do not overwrite it with "external interrupt"
- * vmexit.
- */
- if (svm->nested.exit_required)
- return false;
-
svm->vmcb->control.exit_code = SVM_EXIT_INTR;
svm->vmcb->control.exit_info_1 = 0;
svm->vmcb->control.exit_info_2 = 0;
- if (svm->nested.intercept & 1ULL) {
- /*
- * The #vmexit can't be emulated here directly because this
- * code path runs with irqs and preemption disabled. A
- * #vmexit emulation might sleep. Only signal request for
- * the #vmexit here.
- */
- svm->nested.exit_required = true;
- trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
- return false;
+ /* nested_svm_vmexit this gets called afterwards from handle_exit */
+ svm->nested.exit_required = true;
+ trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
+}
+
+static bool nested_exit_on_intr(struct vcpu_svm *svm)
+{
+ return (svm->nested.intercept & 1ULL);
+}
+
+static int svm_check_nested_events(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ bool block_nested_events =
+ kvm_event_needs_reinjection(vcpu) || svm->nested.exit_required;
+
+ if (kvm_cpu_has_interrupt(vcpu) && nested_exit_on_intr(svm)) {
+ if (block_nested_events)
+ return -EBUSY;
+ nested_svm_intr(svm);
+ return 0;
}
- return true;
+ return 0;
}
/* This function returns true if it is save to enable the nmi window */
@@ -3244,9 +3287,6 @@ static int nested_svm_exit_special(struct vcpu_svm *svm)
return NESTED_EXIT_CONTINUE;
}
-/*
- * If this function returns true, this #vmexit was already handled
- */
static int nested_svm_intercept(struct vcpu_svm *svm)
{
u32 exit_code = svm->vmcb->control.exit_code;
@@ -3521,6 +3561,9 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
static bool nested_vmcb_checks(struct vmcb *vmcb)
{
+ if ((vmcb->save.efer & EFER_SVME) == 0)
+ return false;
+
if ((vmcb->control.intercept & (1ULL << INTERCEPT_VMRUN)) == 0)
return false;
@@ -3537,6 +3580,10 @@ static bool nested_vmcb_checks(struct vmcb *vmcb)
static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
struct vmcb *nested_vmcb, struct kvm_host_map *map)
{
+ bool evaluate_pending_interrupts =
+ is_intercept(svm, INTERCEPT_VINTR) ||
+ is_intercept(svm, INTERCEPT_IRET);
+
if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF)
svm->vcpu.arch.hflags |= HF_HIF_MASK;
else
@@ -3596,15 +3643,6 @@ static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
else
svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
- if (svm->vcpu.arch.hflags & HF_VINTR_MASK) {
- /* We only want the cr8 intercept bits of the guest */
- clr_cr_intercept(svm, INTERCEPT_CR8_READ);
- clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
- }
-
- /* We don't want to see VMMCALLs from a nested guest */
- clr_intercept(svm, INTERCEPT_VMMCALL);
-
svm->vcpu.arch.tsc_offset += nested_vmcb->control.tsc_offset;
svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset;
@@ -3632,7 +3670,21 @@ static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
svm->nested.vmcb = vmcb_gpa;
+ /*
+ * If L1 had a pending IRQ/NMI before executing VMRUN,
+ * which wasn't delivered because it was disallowed (e.g.
+ * interrupts disabled), L0 needs to evaluate if this pending
+ * event should cause an exit from L2 to L1 or be delivered
+ * directly to L2.
+ *
+ * Usually this would be handled by the processor noticing an
+ * IRQ/NMI window request. However, VMRUN can unblock interrupts
+ * by implicitly setting GIF, so force L0 to perform pending event
+ * evaluation by requesting a KVM_REQ_EVENT.
+ */
enable_gif(svm);
+ if (unlikely(evaluate_pending_interrupts))
+ kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
mark_all_dirty(svm->vmcb);
}
@@ -3834,11 +3886,8 @@ static int clgi_interception(struct vcpu_svm *svm)
disable_gif(svm);
/* After a CLGI no interrupts should come */
- if (!kvm_vcpu_apicv_active(&svm->vcpu)) {
+ if (!kvm_vcpu_apicv_active(&svm->vcpu))
svm_clear_vintr(svm);
- svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
- mark_dirty(svm->vmcb, VMCB_INTR);
- }
return ret;
}
@@ -5124,19 +5173,6 @@ static void svm_inject_nmi(struct kvm_vcpu *vcpu)
++vcpu->stat.nmi_injections;
}
-static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
-{
- struct vmcb_control_area *control;
-
- /* The following fields are ignored when AVIC is enabled */
- control = &svm->vmcb->control;
- control->int_vector = irq;
- control->int_ctl &= ~V_INTR_PRIO_MASK;
- control->int_ctl |= V_IRQ_MASK |
- ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT);
- mark_dirty(svm->vmcb, VMCB_INTR);
-}
-
static void svm_set_irq(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -5525,18 +5561,15 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb *vmcb = svm->vmcb;
- int ret;
if (!gif_set(svm) ||
(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK))
return 0;
- ret = !!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF);
-
- if (is_guest_mode(vcpu))
- return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK);
-
- return ret;
+ if (is_guest_mode(vcpu) && (svm->vcpu.arch.hflags & HF_VINTR_MASK))
+ return !!(svm->vcpu.arch.hflags & HF_HIF_MASK);
+ else
+ return !!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF);
}
static void enable_irq_window(struct kvm_vcpu *vcpu)
@@ -5551,7 +5584,7 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
* enabled, the STGI interception will not occur. Enable the irq
* window under the assumption that the hardware will set the GIF.
*/
- if ((vgif_enabled(svm) || gif_set(svm)) && nested_svm_intr(svm)) {
+ if (vgif_enabled(svm) || gif_set(svm)) {
/*
* IRQ window is not needed when AVIC is enabled,
* unless we have pending ExtINT since it cannot be injected
@@ -5560,7 +5593,6 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
*/
svm_toggle_avic_for_irq_window(vcpu, false);
svm_set_vintr(svm);
- svm_inject_irq(svm, 0x0);
}
}
@@ -5946,24 +5978,30 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
}
STACK_FRAME_NON_STANDARD(svm_vcpu_run);
-static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
+static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long root)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ bool update_guest_cr3 = true;
+ unsigned long cr3;
- svm->vmcb->save.cr3 = __sme_set(root);
- mark_dirty(svm->vmcb, VMCB_CR);
-}
-
-static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root)
-{
- struct vcpu_svm *svm = to_svm(vcpu);
+ cr3 = __sme_set(root);
+ if (npt_enabled) {
+ svm->vmcb->control.nested_cr3 = cr3;
+ mark_dirty(svm->vmcb, VMCB_NPT);
- svm->vmcb->control.nested_cr3 = __sme_set(root);
- mark_dirty(svm->vmcb, VMCB_NPT);
+ /* Loading L2's CR3 is handled by enter_svm_guest_mode. */
+ if (is_guest_mode(vcpu))
+ update_guest_cr3 = false;
+ else if (test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
+ cr3 = vcpu->arch.cr3;
+ else /* CR3 is already up-to-date. */
+ update_guest_cr3 = false;
+ }
- /* Also sync guest cr3 here in case we live migrate */
- svm->vmcb->save.cr3 = kvm_read_cr3(vcpu);
- mark_dirty(svm->vmcb, VMCB_CR);
+ if (update_guest_cr3) {
+ svm->vmcb->save.cr3 = cr3;
+ mark_dirty(svm->vmcb, VMCB_CR);
+ }
}
static int is_disabled(void)
@@ -6025,12 +6063,19 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu)
boot_cpu_has(X86_FEATURE_XSAVES);
/* Update nrips enabled cache */
- svm->nrips_enabled = !!guest_cpuid_has(&svm->vcpu, X86_FEATURE_NRIPS);
+ svm->nrips_enabled = kvm_cpu_cap_has(X86_FEATURE_NRIPS) &&
+ guest_cpuid_has(&svm->vcpu, X86_FEATURE_NRIPS);
if (!kvm_vcpu_apicv_active(vcpu))
return;
- guest_cpuid_clear(vcpu, X86_FEATURE_X2APIC);
+ /*
+ * AVIC does not work with an x2APIC mode guest. If the X2APIC feature
+ * is exposed to the guest, disable AVIC.
+ */
+ if (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC))
+ kvm_request_apicv_update(vcpu->kvm, false,
+ APICV_INHIBIT_REASON_X2APIC);
/*
* Currently, AVIC does not work with nested virtualization.
@@ -6041,88 +6086,11 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu)
APICV_INHIBIT_REASON_NESTED);
}
-#define F feature_bit
-
-static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
-{
- switch (func) {
- case 0x1:
- if (avic)
- entry->ecx &= ~F(X2APIC);
- break;
- case 0x80000001:
- if (nested)
- entry->ecx |= (1 << 2); /* Set SVM bit */
- break;
- case 0x80000008:
- if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) ||
- boot_cpu_has(X86_FEATURE_AMD_SSBD))
- entry->ebx |= F(VIRT_SSBD);
- break;
- case 0x8000000A:
- entry->eax = 1; /* SVM revision 1 */
- entry->ebx = 8; /* Lets support 8 ASIDs in case we add proper
- ASID emulation to nested SVM */
- entry->ecx = 0; /* Reserved */
- entry->edx = 0; /* Per default do not support any
- additional features */
-
- /* Support next_rip if host supports it */
- if (boot_cpu_has(X86_FEATURE_NRIPS))
- entry->edx |= F(NRIPS);
-
- /* Support NPT for the guest if enabled */
- if (npt_enabled)
- entry->edx |= F(NPT);
-
- }
-}
-
-static int svm_get_lpage_level(void)
-{
- return PT_PDPE_LEVEL;
-}
-
-static bool svm_rdtscp_supported(void)
-{
- return boot_cpu_has(X86_FEATURE_RDTSCP);
-}
-
-static bool svm_invpcid_supported(void)
-{
- return false;
-}
-
-static bool svm_mpx_supported(void)
-{
- return false;
-}
-
-static bool svm_xsaves_supported(void)
-{
- return boot_cpu_has(X86_FEATURE_XSAVES);
-}
-
-static bool svm_umip_emulated(void)
-{
- return false;
-}
-
-static bool svm_pt_supported(void)
-{
- return false;
-}
-
static bool svm_has_wbinvd_exit(void)
{
return true;
}
-static bool svm_pku_supported(void)
-{
- return false;
-}
-
#define PRE_EX(exit) { .exit_code = (exit), \
.stage = X86_ICPT_PRE_EXCEPT, }
#define POST_EX(exit) { .exit_code = (exit), \
@@ -6189,7 +6157,8 @@ static const struct __x86_intercept {
static int svm_check_intercept(struct kvm_vcpu *vcpu,
struct x86_instruction_info *info,
- enum x86_intercept_stage stage)
+ enum x86_intercept_stage stage,
+ struct x86_exception *exception)
{
struct vcpu_svm *svm = to_svm(vcpu);
int vmexit, ret = X86EMUL_CONTINUE;
@@ -7371,7 +7340,7 @@ static bool svm_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
* TODO: Last condition latch INIT signals on vCPU when
* vCPU is in guest-mode and vmcb12 defines intercept on INIT.
* To properly emulate the INIT intercept, SVM should implement
- * kvm_x86_ops->check_nested_events() and call nested_svm_vmexit()
+ * kvm_x86_ops.check_nested_events() and call nested_svm_vmexit()
* there if an INIT signal is pending.
*/
return !gif_set(svm) ||
@@ -7384,7 +7353,8 @@ static bool svm_check_apicv_inhibit_reasons(ulong bit)
BIT(APICV_INHIBIT_REASON_HYPERV) |
BIT(APICV_INHIBIT_REASON_NESTED) |
BIT(APICV_INHIBIT_REASON_IRQWIN) |
- BIT(APICV_INHIBIT_REASON_PIT_REINJ);
+ BIT(APICV_INHIBIT_REASON_PIT_REINJ) |
+ BIT(APICV_INHIBIT_REASON_X2APIC);
return supported & BIT(bit);
}
@@ -7394,12 +7364,8 @@ static void svm_pre_update_apicv_exec_ctrl(struct kvm *kvm, bool activate)
avic_update_access_page(kvm, activate);
}
-static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
- .cpu_has_kvm_support = has_svm,
- .disabled_by_bios = is_disabled,
- .hardware_setup = svm_hardware_setup,
+static struct kvm_x86_ops svm_x86_ops __initdata = {
.hardware_unsetup = svm_hardware_teardown,
- .check_processor_compatibility = svm_check_processor_compat,
.hardware_enable = svm_hardware_enable,
.hardware_disable = svm_hardware_disable,
.cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr,
@@ -7409,8 +7375,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.vcpu_free = svm_free_vcpu,
.vcpu_reset = svm_vcpu_reset,
- .vm_alloc = svm_vm_alloc,
- .vm_free = svm_vm_free,
+ .vm_size = sizeof(struct kvm_svm),
.vm_init = svm_vm_init,
.vm_destroy = svm_vm_destroy,
@@ -7432,7 +7397,6 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.decache_cr0_guest_bits = svm_decache_cr0_guest_bits,
.decache_cr4_guest_bits = svm_decache_cr4_guest_bits,
.set_cr0 = svm_set_cr0,
- .set_cr3 = svm_set_cr3,
.set_cr4 = svm_set_cr4,
.set_efer = svm_set_efer,
.get_idt = svm_get_idt,
@@ -7485,26 +7449,14 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.get_exit_info = svm_get_exit_info,
- .get_lpage_level = svm_get_lpage_level,
-
.cpuid_update = svm_cpuid_update,
- .rdtscp_supported = svm_rdtscp_supported,
- .invpcid_supported = svm_invpcid_supported,
- .mpx_supported = svm_mpx_supported,
- .xsaves_supported = svm_xsaves_supported,
- .umip_emulated = svm_umip_emulated,
- .pt_supported = svm_pt_supported,
- .pku_supported = svm_pku_supported,
-
- .set_supported_cpuid = svm_set_supported_cpuid,
-
.has_wbinvd_exit = svm_has_wbinvd_exit,
.read_l1_tsc_offset = svm_read_l1_tsc_offset,
.write_l1_tsc_offset = svm_write_l1_tsc_offset,
- .set_tdp_cr3 = set_tdp_cr3,
+ .load_mmu_pgd = svm_load_mmu_pgd,
.check_intercept = svm_check_intercept,
.handle_exit_irqoff = svm_handle_exit_irqoff,
@@ -7534,11 +7486,22 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.need_emulation_on_page_fault = svm_need_emulation_on_page_fault,
.apic_init_signal_blocked = svm_apic_init_signal_blocked,
+
+ .check_nested_events = svm_check_nested_events,
+};
+
+static struct kvm_x86_init_ops svm_init_ops __initdata = {
+ .cpu_has_kvm_support = has_svm,
+ .disabled_by_bios = is_disabled,
+ .hardware_setup = svm_hardware_setup,
+ .check_processor_compatibility = svm_check_processor_compat,
+
+ .runtime_ops = &svm_x86_ops,
};
static int __init svm_init(void)
{
- return kvm_init(&svm_x86_ops, sizeof(struct vcpu_svm),
+ return kvm_init(&svm_init_ops, sizeof(struct vcpu_svm),
__alignof__(struct vcpu_svm), THIS_MODULE);
}
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index cef5a344fedb..249062f24b94 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -151,32 +151,38 @@ TRACE_EVENT(kvm_fast_mmio,
* Tracepoint for cpuid.
*/
TRACE_EVENT(kvm_cpuid,
- TP_PROTO(unsigned int function, unsigned long rax, unsigned long rbx,
- unsigned long rcx, unsigned long rdx, bool found),
- TP_ARGS(function, rax, rbx, rcx, rdx, found),
+ TP_PROTO(unsigned int function, unsigned int index, unsigned long rax,
+ unsigned long rbx, unsigned long rcx, unsigned long rdx,
+ bool found, bool used_max_basic),
+ TP_ARGS(function, index, rax, rbx, rcx, rdx, found, used_max_basic),
TP_STRUCT__entry(
__field( unsigned int, function )
+ __field( unsigned int, index )
__field( unsigned long, rax )
__field( unsigned long, rbx )
__field( unsigned long, rcx )
__field( unsigned long, rdx )
__field( bool, found )
+ __field( bool, used_max_basic )
),
TP_fast_assign(
__entry->function = function;
+ __entry->index = index;
__entry->rax = rax;
__entry->rbx = rbx;
__entry->rcx = rcx;
__entry->rdx = rdx;
__entry->found = found;
+ __entry->used_max_basic = used_max_basic;
),
- TP_printk("func %x rax %lx rbx %lx rcx %lx rdx %lx, cpuid entry %s",
- __entry->function, __entry->rax,
+ TP_printk("func %x idx %x rax %lx rbx %lx rcx %lx rdx %lx, cpuid entry %s%s",
+ __entry->function, __entry->index, __entry->rax,
__entry->rbx, __entry->rcx, __entry->rdx,
- __entry->found ? "found" : "not found")
+ __entry->found ? "found" : "not found",
+ __entry->used_max_basic ? ", used max basic" : "")
);
#define AREG(x) { APIC_##x, "APIC_" #x }
@@ -240,7 +246,7 @@ TRACE_EVENT(kvm_exit,
__entry->guest_rip = kvm_rip_read(vcpu);
__entry->isa = isa;
__entry->vcpu_id = vcpu->vcpu_id;
- kvm_x86_ops->get_exit_info(vcpu, &__entry->info1,
+ kvm_x86_ops.get_exit_info(vcpu, &__entry->info1,
&__entry->info2);
),
@@ -744,14 +750,14 @@ TRACE_EVENT(kvm_emulate_insn,
),
TP_fast_assign(
- __entry->csbase = kvm_x86_ops->get_segment_base(vcpu, VCPU_SREG_CS);
- __entry->len = vcpu->arch.emulate_ctxt.fetch.ptr
- - vcpu->arch.emulate_ctxt.fetch.data;
- __entry->rip = vcpu->arch.emulate_ctxt._eip - __entry->len;
+ __entry->csbase = kvm_x86_ops.get_segment_base(vcpu, VCPU_SREG_CS);
+ __entry->len = vcpu->arch.emulate_ctxt->fetch.ptr
+ - vcpu->arch.emulate_ctxt->fetch.data;
+ __entry->rip = vcpu->arch.emulate_ctxt->_eip - __entry->len;
memcpy(__entry->insn,
- vcpu->arch.emulate_ctxt.fetch.data,
+ vcpu->arch.emulate_ctxt->fetch.data,
15);
- __entry->flags = kei_decode_mode(vcpu->arch.emulate_ctxt.mode);
+ __entry->flags = kei_decode_mode(vcpu->arch.emulate_ctxt->mode);
__entry->failed = failed;
),
@@ -1367,6 +1373,24 @@ TRACE_EVENT(kvm_avic_unaccelerated_access,
__entry->vec)
);
+TRACE_EVENT(kvm_avic_ga_log,
+ TP_PROTO(u32 vmid, u32 vcpuid),
+ TP_ARGS(vmid, vcpuid),
+
+ TP_STRUCT__entry(
+ __field(u32, vmid)
+ __field(u32, vcpuid)
+ ),
+
+ TP_fast_assign(
+ __entry->vmid = vmid;
+ __entry->vcpuid = vcpuid;
+ ),
+
+ TP_printk("vmid=%u, vcpuid=%u",
+ __entry->vmid, __entry->vcpuid)
+);
+
TRACE_EVENT(kvm_hv_timer_state,
TP_PROTO(unsigned int vcpu_id, unsigned int hv_timer_in_use),
TP_ARGS(vcpu_id, hv_timer_in_use),
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index f486e2606247..8903475f751e 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -101,7 +101,7 @@ static inline bool cpu_has_load_perf_global_ctrl(void)
(vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL);
}
-static inline bool vmx_mpx_supported(void)
+static inline bool cpu_has_vmx_mpx(void)
{
return (vmcs_config.vmexit_ctrl & VM_EXIT_CLEAR_BNDCFGS) &&
(vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_BNDCFGS);
@@ -146,11 +146,6 @@ static inline bool vmx_umip_emulated(void)
SECONDARY_EXEC_DESC;
}
-static inline bool vmx_pku_supported(void)
-{
- return boot_cpu_has(X86_FEATURE_PKU);
-}
-
static inline bool cpu_has_vmx_rdtscp(void)
{
return vmcs_config.cpu_based_2nd_exec_ctrl &
@@ -354,4 +349,22 @@ static inline bool cpu_has_vmx_intel_pt(void)
(vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_RTIT_CTL);
}
+/*
+ * Processor Trace can operate in one of three modes:
+ * a. system-wide: trace both host/guest and output to host buffer
+ * b. host-only: only trace host and output to host buffer
+ * c. host-guest: trace host and guest simultaneously and output to their
+ * respective buffer
+ *
+ * KVM currently only supports (a) and (c).
+ */
+static inline bool vmx_pt_mode_is_system(void)
+{
+ return pt_mode == PT_MODE_SYSTEM;
+}
+static inline bool vmx_pt_mode_is_host_guest(void)
+{
+ return pt_mode == PT_MODE_HOST_GUEST;
+}
+
#endif /* __KVM_X86_VMX_CAPS_H */
diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h
index 6de47f2569c9..e5f7a7ebf27d 100644
--- a/arch/x86/kvm/vmx/evmcs.h
+++ b/arch/x86/kvm/vmx/evmcs.h
@@ -198,6 +198,13 @@ static inline void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) {}
static inline void evmcs_touch_msr_bitmap(void) {}
#endif /* IS_ENABLED(CONFIG_HYPERV) */
+enum nested_evmptrld_status {
+ EVMPTRLD_DISABLED,
+ EVMPTRLD_SUCCEEDED,
+ EVMPTRLD_VMFAIL,
+ EVMPTRLD_ERROR,
+};
+
bool nested_enlightened_vmentry(struct kvm_vcpu *vcpu, u64 *evmcs_gpa);
uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu);
int nested_enable_evmcs(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 9750e590c89d..de232306561a 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -353,9 +353,8 @@ static void nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
to_vmx(vcpu)->nested.msrs.ept_caps &
VMX_EPT_EXECUTE_ONLY_BIT,
nested_ept_ad_enabled(vcpu),
- nested_ept_get_cr3(vcpu));
- vcpu->arch.mmu->set_cr3 = vmx_set_cr3;
- vcpu->arch.mmu->get_cr3 = nested_ept_get_cr3;
+ nested_ept_get_eptp(vcpu));
+ vcpu->arch.mmu->get_guest_pgd = nested_ept_get_eptp;
vcpu->arch.mmu->inject_page_fault = nested_ept_inject_page_fault;
vcpu->arch.mmu->get_pdptr = kvm_pdptr_read;
@@ -1910,18 +1909,18 @@ static int copy_vmcs12_to_enlightened(struct vcpu_vmx *vmx)
* This is an equivalent of the nested hypervisor executing the vmptrld
* instruction.
*/
-static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu,
- bool from_launch)
+static enum nested_evmptrld_status nested_vmx_handle_enlightened_vmptrld(
+ struct kvm_vcpu *vcpu, bool from_launch)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
bool evmcs_gpa_changed = false;
u64 evmcs_gpa;
if (likely(!vmx->nested.enlightened_vmcs_enabled))
- return 1;
+ return EVMPTRLD_DISABLED;
if (!nested_enlightened_vmentry(vcpu, &evmcs_gpa))
- return 1;
+ return EVMPTRLD_DISABLED;
if (unlikely(!vmx->nested.hv_evmcs ||
evmcs_gpa != vmx->nested.hv_evmcs_vmptr)) {
@@ -1932,7 +1931,7 @@ static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu,
if (kvm_vcpu_map(vcpu, gpa_to_gfn(evmcs_gpa),
&vmx->nested.hv_evmcs_map))
- return 0;
+ return EVMPTRLD_ERROR;
vmx->nested.hv_evmcs = vmx->nested.hv_evmcs_map.hva;
@@ -1961,7 +1960,7 @@ static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu,
if ((vmx->nested.hv_evmcs->revision_id != KVM_EVMCS_VERSION) &&
(vmx->nested.hv_evmcs->revision_id != VMCS12_REVISION)) {
nested_release_evmcs(vcpu);
- return 0;
+ return EVMPTRLD_VMFAIL;
}
vmx->nested.dirty_vmcs12 = true;
@@ -1990,21 +1989,13 @@ static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu,
vmx->nested.hv_evmcs->hv_clean_fields &=
~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
- return 1;
+ return EVMPTRLD_SUCCEEDED;
}
void nested_sync_vmcs12_to_shadow(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- /*
- * hv_evmcs may end up being not mapped after migration (when
- * L2 was running), map it here to make sure vmcs12 changes are
- * properly reflected.
- */
- if (vmx->nested.enlightened_vmcs_enabled && !vmx->nested.hv_evmcs)
- nested_vmx_handle_enlightened_vmptrld(vcpu, false);
-
if (vmx->nested.hv_evmcs) {
copy_vmcs12_to_enlightened(vmx);
/* All fields are clean */
@@ -2475,9 +2466,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
* If L1 use EPT, then L0 needs to execute INVEPT on
* EPTP02 instead of EPTP01. Therefore, delay TLB
* flush until vmcs02->eptp is fully updated by
- * KVM_REQ_LOAD_CR3. Note that this assumes
+ * KVM_REQ_LOAD_MMU_PGD. Note that this assumes
* KVM_REQ_TLB_FLUSH is evaluated after
- * KVM_REQ_LOAD_CR3 in vcpu_enter_guest().
+ * KVM_REQ_LOAD_MMU_PGD in vcpu_enter_guest().
*/
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
}
@@ -2522,7 +2513,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
/*
* Immediately write vmcs02.GUEST_CR3. It will be propagated to vmcs12
* on nested VM-Exit, which can occur without actually running L2 and
- * thus without hitting vmx_set_cr3(), e.g. if L1 is entering L2 with
+ * thus without hitting vmx_load_mmu_pgd(), e.g. if L1 is entering L2 with
* vmcs12.GUEST_ACTIVITYSTATE=HLT, in which case KVM will intercept the
* transition to HLT instead of running L2.
*/
@@ -2564,13 +2555,13 @@ static int nested_vmx_check_nmi_controls(struct vmcs12 *vmcs12)
return 0;
}
-static bool valid_ept_address(struct kvm_vcpu *vcpu, u64 address)
+static bool nested_vmx_check_eptp(struct kvm_vcpu *vcpu, u64 new_eptp)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
int maxphyaddr = cpuid_maxphyaddr(vcpu);
/* Check for memory type validity */
- switch (address & VMX_EPTP_MT_MASK) {
+ switch (new_eptp & VMX_EPTP_MT_MASK) {
case VMX_EPTP_MT_UC:
if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPTP_UC_BIT)))
return false;
@@ -2583,16 +2574,26 @@ static bool valid_ept_address(struct kvm_vcpu *vcpu, u64 address)
return false;
}
- /* only 4 levels page-walk length are valid */
- if (CC((address & VMX_EPTP_PWL_MASK) != VMX_EPTP_PWL_4))
+ /* Page-walk levels validity. */
+ switch (new_eptp & VMX_EPTP_PWL_MASK) {
+ case VMX_EPTP_PWL_5:
+ if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_PAGE_WALK_5_BIT)))
+ return false;
+ break;
+ case VMX_EPTP_PWL_4:
+ if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_PAGE_WALK_4_BIT)))
+ return false;
+ break;
+ default:
return false;
+ }
/* Reserved bits should not be set */
- if (CC(address >> maxphyaddr || ((address >> 7) & 0x1f)))
+ if (CC(new_eptp >> maxphyaddr || ((new_eptp >> 7) & 0x1f)))
return false;
/* AD, if set, should be supported */
- if (address & VMX_EPTP_AD_ENABLE_BIT) {
+ if (new_eptp & VMX_EPTP_AD_ENABLE_BIT) {
if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_AD_BIT)))
return false;
}
@@ -2641,7 +2642,7 @@ static int nested_check_vm_execution_controls(struct kvm_vcpu *vcpu,
return -EINVAL;
if (nested_cpu_has_ept(vmcs12) &&
- CC(!valid_ept_address(vcpu, vmcs12->ept_pointer)))
+ CC(!nested_vmx_check_eptp(vcpu, vmcs12->ept_pointer)))
return -EINVAL;
if (nested_cpu_has_vmfunc(vmcs12)) {
@@ -2961,7 +2962,7 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
/*
* Induce a consistency check VMExit by clearing bit 1 in GUEST_RFLAGS,
* which is reserved to '1' by hardware. GUEST_RFLAGS is guaranteed to
- * be written (by preparve_vmcs02()) before the "real" VMEnter, i.e.
+ * be written (by prepare_vmcs02()) before the "real" VMEnter, i.e.
* there is no need to preserve other bits or save/restore the field.
*/
vmcs_writel(GUEST_RFLAGS, 0);
@@ -3053,6 +3054,27 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
struct page *page;
u64 hpa;
+ /*
+ * hv_evmcs may end up being not mapped after migration (when
+ * L2 was running), map it here to make sure vmcs12 changes are
+ * properly reflected.
+ */
+ if (vmx->nested.enlightened_vmcs_enabled && !vmx->nested.hv_evmcs) {
+ enum nested_evmptrld_status evmptrld_status =
+ nested_vmx_handle_enlightened_vmptrld(vcpu, false);
+
+ if (evmptrld_status == EVMPTRLD_VMFAIL ||
+ evmptrld_status == EVMPTRLD_ERROR) {
+ pr_debug_ratelimited("%s: enlightened vmptrld failed\n",
+ __func__);
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->internal.suberror =
+ KVM_INTERNAL_ERROR_EMULATION;
+ vcpu->run->internal.ndata = 0;
+ return false;
+ }
+ }
+
if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
/*
* Translate L1 physical address to host physical
@@ -3316,12 +3338,18 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
enum nvmx_vmentry_status status;
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 interrupt_shadow = vmx_get_interrupt_shadow(vcpu);
+ enum nested_evmptrld_status evmptrld_status;
if (!nested_vmx_check_permission(vcpu))
return 1;
- if (!nested_vmx_handle_enlightened_vmptrld(vcpu, launch))
+ evmptrld_status = nested_vmx_handle_enlightened_vmptrld(vcpu, launch);
+ if (evmptrld_status == EVMPTRLD_ERROR) {
+ kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
+ } else if (evmptrld_status == EVMPTRLD_VMFAIL) {
+ return nested_vmx_failInvalid(vcpu);
+ }
if (!vmx->nested.hv_evmcs && vmx->nested.current_vmptr == -1ull)
return nested_vmx_failInvalid(vcpu);
@@ -3499,7 +3527,7 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
}
-static void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu)
+void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu)
{
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
gfn_t gfn;
@@ -3604,7 +3632,7 @@ static void nested_vmx_update_pending_dbg(struct kvm_vcpu *vcpu)
vcpu->arch.exception.payload);
}
-static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
+static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long exit_qual;
@@ -3680,8 +3708,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
return 0;
}
- if ((kvm_cpu_has_interrupt(vcpu) || external_intr) &&
- nested_exit_on_intr(vcpu)) {
+ if (kvm_cpu_has_interrupt(vcpu) && nested_exit_on_intr(vcpu)) {
if (block_nested_events)
return -EBUSY;
nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0);
@@ -4024,7 +4051,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
*
* If vmcs12 uses EPT, we need to execute this flush on EPTP01
* and therefore we request the TLB flush to happen only after VMCS EPTP
- * has been set by KVM_REQ_LOAD_CR3.
+ * has been set by KVM_REQ_LOAD_MMU_PGD.
*/
if (enable_vpid &&
(!nested_cpu_has_vpid(vmcs12) || !nested_has_guest_tlb_tag(vcpu))) {
@@ -4329,17 +4356,8 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
if (likely(!vmx->fail)) {
- /*
- * TODO: SDM says that with acknowledge interrupt on
- * exit, bit 31 of the VM-exit interrupt information
- * (valid interrupt) is always set to 1 on
- * EXIT_REASON_EXTERNAL_INTERRUPT, so we shouldn't
- * need kvm_cpu_has_interrupt(). See the commit
- * message for details.
- */
- if (nested_exit_intr_ack_set(vcpu) &&
- exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT &&
- kvm_cpu_has_interrupt(vcpu)) {
+ if (exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT &&
+ nested_exit_intr_ack_set(vcpu)) {
int irq = kvm_cpu_get_interrupt(vcpu);
WARN_ON(irq < 0);
vmcs12->vm_exit_intr_info = irq |
@@ -4383,7 +4401,7 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
* Decode the memory-address operand of a vmx instruction, as recorded on an
* exit caused by such an instruction (run by a guest hypervisor).
* On success, returns 0. When the operand is invalid, returns 1 and throws
- * #UD or #GP.
+ * #UD, #GP, or #SS.
*/
int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
u32 vmx_instruction_info, bool wr, int len, gva_t *ret)
@@ -4424,7 +4442,7 @@ int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
if (base_is_valid)
off += kvm_register_read(vcpu, base_reg);
if (index_is_valid)
- off += kvm_register_read(vcpu, index_reg)<<scaling;
+ off += kvm_register_read(vcpu, index_reg) << scaling;
vmx_get_segment(vcpu, &s, seg_reg);
/*
@@ -4517,7 +4535,7 @@ void nested_vmx_pmu_entry_exit_ctls_update(struct kvm_vcpu *vcpu)
return;
vmx = to_vmx(vcpu);
- if (kvm_x86_ops->pmu_ops->is_valid_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL)) {
+ if (kvm_x86_ops.pmu_ops->is_valid_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL)) {
vmx->nested.msrs.entry_ctls_high |=
VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
vmx->nested.msrs.exit_ctls_high |=
@@ -4603,7 +4621,7 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
vmx->nested.vmcs02_initialized = false;
vmx->nested.vmxon = true;
- if (pt_mode == PT_MODE_HOST_GUEST) {
+ if (vmx_pt_mode_is_host_guest()) {
vmx->pt_desc.guest.ctl = 0;
pt_update_intercept_for_msr(vmx);
}
@@ -5235,7 +5253,7 @@ static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
u32 index = kvm_rcx_read(vcpu);
- u64 address;
+ u64 new_eptp;
bool accessed_dirty;
struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
@@ -5248,23 +5266,23 @@ static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu,
if (kvm_vcpu_read_guest_page(vcpu, vmcs12->eptp_list_address >> PAGE_SHIFT,
- &address, index * 8, 8))
+ &new_eptp, index * 8, 8))
return 1;
- accessed_dirty = !!(address & VMX_EPTP_AD_ENABLE_BIT);
+ accessed_dirty = !!(new_eptp & VMX_EPTP_AD_ENABLE_BIT);
/*
* If the (L2) guest does a vmfunc to the currently
* active ept pointer, we don't have to do anything else
*/
- if (vmcs12->ept_pointer != address) {
- if (!valid_ept_address(vcpu, address))
+ if (vmcs12->ept_pointer != new_eptp) {
+ if (!nested_vmx_check_eptp(vcpu, new_eptp))
return 1;
kvm_mmu_unload(vcpu);
mmu->ept_ad = accessed_dirty;
mmu->mmu_role.base.ad_disabled = !accessed_dirty;
- vmcs12->ept_pointer = address;
+ vmcs12->ept_pointer = new_eptp;
/*
* TODO: Check what's the correct approach in case
* mmu reload fails. Currently, we just let the next
@@ -5525,8 +5543,7 @@ bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
- if (vmx->nested.nested_run_pending)
- return false;
+ WARN_ON_ONCE(vmx->nested.nested_run_pending);
if (unlikely(vmx->fail)) {
trace_kvm_nested_vmenter_failed(
@@ -5535,19 +5552,6 @@ bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
return true;
}
- /*
- * The host physical addresses of some pages of guest memory
- * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC
- * Page). The CPU may write to these pages via their host
- * physical address while L2 is running, bypassing any
- * address-translation-based dirty tracking (e.g. EPT write
- * protection).
- *
- * Mark them dirty on every exit from L2 to prevent them from
- * getting out of sync with dirty tracking.
- */
- nested_mark_vmcs12_pages_dirty(vcpu);
-
trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason,
vmcs_readl(EXIT_QUALIFICATION),
vmx->idt_vectoring_info,
@@ -5628,7 +5632,7 @@ bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
case EXIT_REASON_MWAIT_INSTRUCTION:
return nested_cpu_has(vmcs12, CPU_BASED_MWAIT_EXITING);
case EXIT_REASON_MONITOR_TRAP_FLAG:
- return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_TRAP_FLAG);
+ return nested_cpu_has_mtf(vmcs12);
case EXIT_REASON_MONITOR_INSTRUCTION:
return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_EXITING);
case EXIT_REASON_PAUSE_INSTRUCTION:
@@ -5905,10 +5909,12 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
set_current_vmptr(vmx, kvm_state->hdr.vmx.vmcs12_pa);
} else if (kvm_state->flags & KVM_STATE_NESTED_EVMCS) {
/*
- * Sync eVMCS upon entry as we may not have
- * HV_X64_MSR_VP_ASSIST_PAGE set up yet.
+ * nested_vmx_handle_enlightened_vmptrld() cannot be called
+ * directly from here as HV_X64_MSR_VP_ASSIST_PAGE may not be
+ * restored yet. EVMCS will be mapped from
+ * nested_get_vmcs12_pages().
*/
- vmx->nested.need_vmcs12_to_shadow_sync = true;
+ kvm_make_request(KVM_REQ_GET_VMCS12_PAGES, vcpu);
} else {
return -EINVAL;
}
@@ -6130,11 +6136,13 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps)
/* nested EPT: emulate EPT also to L1 */
msrs->secondary_ctls_high |=
SECONDARY_EXEC_ENABLE_EPT;
- msrs->ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
- VMX_EPTP_WB_BIT | VMX_EPT_INVEPT_BIT;
- if (cpu_has_vmx_ept_execute_only())
- msrs->ept_caps |=
- VMX_EPT_EXECUTE_ONLY_BIT;
+ msrs->ept_caps =
+ VMX_EPT_PAGE_WALK_4_BIT |
+ VMX_EPT_PAGE_WALK_5_BIT |
+ VMX_EPTP_WB_BIT |
+ VMX_EPT_INVEPT_BIT |
+ VMX_EPT_EXECUTE_ONLY_BIT;
+
msrs->ept_caps &= ept_caps;
msrs->ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT |
VMX_EPT_EXTENT_CONTEXT_BIT | VMX_EPT_2MB_PAGE_BIT |
@@ -6233,7 +6241,8 @@ void nested_vmx_hardware_unsetup(void)
}
}
-__init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *))
+__init int nested_vmx_hardware_setup(struct kvm_x86_ops *ops,
+ int (*exit_handlers[])(struct kvm_vcpu *))
{
int i;
@@ -6269,12 +6278,12 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *))
exit_handlers[EXIT_REASON_INVVPID] = handle_invvpid;
exit_handlers[EXIT_REASON_VMFUNC] = handle_vmfunc;
- kvm_x86_ops->check_nested_events = vmx_check_nested_events;
- kvm_x86_ops->get_nested_state = vmx_get_nested_state;
- kvm_x86_ops->set_nested_state = vmx_set_nested_state;
- kvm_x86_ops->get_vmcs12_pages = nested_get_vmcs12_pages;
- kvm_x86_ops->nested_enable_evmcs = nested_enable_evmcs;
- kvm_x86_ops->nested_get_evmcs_version = nested_get_evmcs_version;
+ ops->check_nested_events = vmx_check_nested_events;
+ ops->get_nested_state = vmx_get_nested_state;
+ ops->set_nested_state = vmx_set_nested_state;
+ ops->get_vmcs12_pages = nested_get_vmcs12_pages;
+ ops->nested_enable_evmcs = nested_enable_evmcs;
+ ops->nested_get_evmcs_version = nested_get_evmcs_version;
return 0;
}
diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h
index 9aeda46f473e..ac56aefa49e3 100644
--- a/arch/x86/kvm/vmx/nested.h
+++ b/arch/x86/kvm/vmx/nested.h
@@ -19,7 +19,8 @@ enum nvmx_vmentry_status {
void vmx_leave_nested(struct kvm_vcpu *vcpu);
void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps);
void nested_vmx_hardware_unsetup(void);
-__init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *));
+__init int nested_vmx_hardware_setup(struct kvm_x86_ops *ops,
+ int (*exit_handlers[])(struct kvm_vcpu *));
void nested_vmx_set_vmcs_shadowing_bitmap(void);
void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu);
enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
@@ -33,6 +34,7 @@ int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata);
int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
u32 vmx_instruction_info, bool wr, int len, gva_t *ret);
void nested_vmx_pmu_entry_exit_ctls_update(struct kvm_vcpu *vcpu);
+void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu);
bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port,
int size);
@@ -60,7 +62,7 @@ static inline int vmx_has_valid_vmcs12(struct kvm_vcpu *vcpu)
vmx->nested.hv_evmcs;
}
-static inline unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu)
+static inline unsigned long nested_ept_get_eptp(struct kvm_vcpu *vcpu)
{
/* return the page table to be shadowed - in our case, EPT12 */
return get_vmcs12(vcpu)->ept_pointer;
@@ -68,7 +70,7 @@ static inline unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu)
static inline bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu)
{
- return nested_ept_get_cr3(vcpu) & VMX_EPTP_AD_ENABLE_BIT;
+ return nested_ept_get_eptp(vcpu) & VMX_EPTP_AD_ENABLE_BIT;
}
/*
diff --git a/arch/x86/kvm/vmx/ops.h b/arch/x86/kvm/vmx/ops.h
index 45eaedee2ac0..19717d0a1100 100644
--- a/arch/x86/kvm/vmx/ops.h
+++ b/arch/x86/kvm/vmx/ops.h
@@ -13,6 +13,8 @@
#define __ex(x) __kvm_handle_fault_on_reboot(x)
asmlinkage void vmread_error(unsigned long field, bool fault);
+__attribute__((regparm(0))) void vmread_error_trampoline(unsigned long field,
+ bool fault);
void vmwrite_error(unsigned long field, unsigned long value);
void vmclear_error(struct vmcs *vmcs, u64 phys_addr);
void vmptrld_error(struct vmcs *vmcs, u64 phys_addr);
@@ -70,15 +72,28 @@ static __always_inline unsigned long __vmcs_readl(unsigned long field)
asm volatile("1: vmread %2, %1\n\t"
".byte 0x3e\n\t" /* branch taken hint */
"ja 3f\n\t"
- "mov %2, %%" _ASM_ARG1 "\n\t"
- "xor %%" _ASM_ARG2 ", %%" _ASM_ARG2 "\n\t"
- "2: call vmread_error\n\t"
- "xor %k1, %k1\n\t"
+
+ /*
+ * VMREAD failed. Push '0' for @fault, push the failing
+ * @field, and bounce through the trampoline to preserve
+ * volatile registers.
+ */
+ "push $0\n\t"
+ "push %2\n\t"
+ "2:call vmread_error_trampoline\n\t"
+
+ /*
+ * Unwind the stack. Note, the trampoline zeros out the
+ * memory for @fault so that the result is '0' on error.
+ */
+ "pop %2\n\t"
+ "pop %1\n\t"
"3:\n\t"
+ /* VMREAD faulted. As above, except push '1' for @fault. */
".pushsection .fixup, \"ax\"\n\t"
- "4: mov %2, %%" _ASM_ARG1 "\n\t"
- "mov $1, %%" _ASM_ARG2 "\n\t"
+ "4: push $1\n\t"
+ "push %2\n\t"
"jmp 2b\n\t"
".popsection\n\t"
_ASM_EXTABLE(1b, 4b)
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index fd21cdb10b79..7c857737b438 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -263,9 +263,15 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (!msr_info->host_initiated)
data = (s64)(s32)data;
pmc->counter += data - pmc_read_counter(pmc);
+ if (pmc->perf_event)
+ perf_event_period(pmc->perf_event,
+ get_sample_period(pmc, data));
return 0;
} else if ((pmc = get_fixed_pmc(pmu, msr))) {
pmc->counter += data - pmc_read_counter(pmc);
+ if (pmc->perf_event)
+ perf_event_period(pmc->perf_event,
+ get_sample_period(pmc, data));
return 0;
} else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
if (data == pmc->eventsel)
@@ -329,7 +335,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->global_ovf_ctrl_mask = pmu->global_ctrl_mask
& ~(MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF |
MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD);
- if (kvm_x86_ops->pt_supported())
+ if (vmx_pt_mode_is_host_guest())
pmu->global_ovf_ctrl_mask &=
~MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI;
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 81ada2ce99e7..9651ba388ba9 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -135,12 +135,12 @@ SYM_FUNC_START(__vmx_vcpu_run)
cmpb $0, %bl
/* Load guest registers. Don't clobber flags. */
- mov VCPU_RBX(%_ASM_AX), %_ASM_BX
mov VCPU_RCX(%_ASM_AX), %_ASM_CX
mov VCPU_RDX(%_ASM_AX), %_ASM_DX
+ mov VCPU_RBX(%_ASM_AX), %_ASM_BX
+ mov VCPU_RBP(%_ASM_AX), %_ASM_BP
mov VCPU_RSI(%_ASM_AX), %_ASM_SI
mov VCPU_RDI(%_ASM_AX), %_ASM_DI
- mov VCPU_RBP(%_ASM_AX), %_ASM_BP
#ifdef CONFIG_X86_64
mov VCPU_R8 (%_ASM_AX), %r8
mov VCPU_R9 (%_ASM_AX), %r9
@@ -168,12 +168,12 @@ SYM_FUNC_START(__vmx_vcpu_run)
/* Save all guest registers, including RAX from the stack */
__ASM_SIZE(pop) VCPU_RAX(%_ASM_AX)
- mov %_ASM_BX, VCPU_RBX(%_ASM_AX)
mov %_ASM_CX, VCPU_RCX(%_ASM_AX)
mov %_ASM_DX, VCPU_RDX(%_ASM_AX)
+ mov %_ASM_BX, VCPU_RBX(%_ASM_AX)
+ mov %_ASM_BP, VCPU_RBP(%_ASM_AX)
mov %_ASM_SI, VCPU_RSI(%_ASM_AX)
mov %_ASM_DI, VCPU_RDI(%_ASM_AX)
- mov %_ASM_BP, VCPU_RBP(%_ASM_AX)
#ifdef CONFIG_X86_64
mov %r8, VCPU_R8 (%_ASM_AX)
mov %r9, VCPU_R9 (%_ASM_AX)
@@ -197,12 +197,12 @@ SYM_FUNC_START(__vmx_vcpu_run)
* free. RSP and RAX are exempt as RSP is restored by hardware during
* VM-Exit and RAX is explicitly loaded with 0 or 1 to return VM-Fail.
*/
-1: xor %ebx, %ebx
- xor %ecx, %ecx
+1: xor %ecx, %ecx
xor %edx, %edx
+ xor %ebx, %ebx
+ xor %ebp, %ebp
xor %esi, %esi
xor %edi, %edi
- xor %ebp, %ebp
#ifdef CONFIG_X86_64
xor %r8d, %r8d
xor %r9d, %r9d
@@ -234,3 +234,61 @@ SYM_FUNC_START(__vmx_vcpu_run)
2: mov $1, %eax
jmp 1b
SYM_FUNC_END(__vmx_vcpu_run)
+
+/**
+ * vmread_error_trampoline - Trampoline from inline asm to vmread_error()
+ * @field: VMCS field encoding that failed
+ * @fault: %true if the VMREAD faulted, %false if it failed
+
+ * Save and restore volatile registers across a call to vmread_error(). Note,
+ * all parameters are passed on the stack.
+ */
+SYM_FUNC_START(vmread_error_trampoline)
+ push %_ASM_BP
+ mov %_ASM_SP, %_ASM_BP
+
+ push %_ASM_AX
+ push %_ASM_CX
+ push %_ASM_DX
+#ifdef CONFIG_X86_64
+ push %rdi
+ push %rsi
+ push %r8
+ push %r9
+ push %r10
+ push %r11
+#endif
+#ifdef CONFIG_X86_64
+ /* Load @field and @fault to arg1 and arg2 respectively. */
+ mov 3*WORD_SIZE(%rbp), %_ASM_ARG2
+ mov 2*WORD_SIZE(%rbp), %_ASM_ARG1
+#else
+ /* Parameters are passed on the stack for 32-bit (see asmlinkage). */
+ push 3*WORD_SIZE(%ebp)
+ push 2*WORD_SIZE(%ebp)
+#endif
+
+ call vmread_error
+
+#ifndef CONFIG_X86_64
+ add $8, %esp
+#endif
+
+ /* Zero out @fault, which will be popped into the result register. */
+ _ASM_MOV $0, 3*WORD_SIZE(%_ASM_BP)
+
+#ifdef CONFIG_X86_64
+ pop %r11
+ pop %r10
+ pop %r9
+ pop %r8
+ pop %rsi
+ pop %rdi
+#endif
+ pop %_ASM_DX
+ pop %_ASM_CX
+ pop %_ASM_AX
+ pop %_ASM_BP
+
+ ret
+SYM_FUNC_END(vmread_error_trampoline)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 458e684dfbdc..91749f1254e8 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -437,7 +437,6 @@ static const struct kvm_vmx_segment_field {
VMX_SEGMENT_FIELD(LDTR),
};
-u64 host_efer;
static unsigned long host_idt_base;
/*
@@ -658,53 +657,16 @@ static int vmx_set_guest_msr(struct vcpu_vmx *vmx, struct shared_msr_entry *msr,
return ret;
}
-void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs)
-{
- vmcs_clear(loaded_vmcs->vmcs);
- if (loaded_vmcs->shadow_vmcs && loaded_vmcs->launched)
- vmcs_clear(loaded_vmcs->shadow_vmcs);
- loaded_vmcs->cpu = -1;
- loaded_vmcs->launched = 0;
-}
-
#ifdef CONFIG_KEXEC_CORE
-/*
- * This bitmap is used to indicate whether the vmclear
- * operation is enabled on all cpus. All disabled by
- * default.
- */
-static cpumask_t crash_vmclear_enabled_bitmap = CPU_MASK_NONE;
-
-static inline void crash_enable_local_vmclear(int cpu)
-{
- cpumask_set_cpu(cpu, &crash_vmclear_enabled_bitmap);
-}
-
-static inline void crash_disable_local_vmclear(int cpu)
-{
- cpumask_clear_cpu(cpu, &crash_vmclear_enabled_bitmap);
-}
-
-static inline int crash_local_vmclear_enabled(int cpu)
-{
- return cpumask_test_cpu(cpu, &crash_vmclear_enabled_bitmap);
-}
-
static void crash_vmclear_local_loaded_vmcss(void)
{
int cpu = raw_smp_processor_id();
struct loaded_vmcs *v;
- if (!crash_local_vmclear_enabled(cpu))
- return;
-
list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu),
loaded_vmcss_on_cpu_link)
vmcs_clear(v->vmcs);
}
-#else
-static inline void crash_enable_local_vmclear(int cpu) { }
-static inline void crash_disable_local_vmclear(int cpu) { }
#endif /* CONFIG_KEXEC_CORE */
static void __loaded_vmcs_clear(void *arg)
@@ -716,19 +678,24 @@ static void __loaded_vmcs_clear(void *arg)
return; /* vcpu migration can race with cpu offline */
if (per_cpu(current_vmcs, cpu) == loaded_vmcs->vmcs)
per_cpu(current_vmcs, cpu) = NULL;
- crash_disable_local_vmclear(cpu);
+
+ vmcs_clear(loaded_vmcs->vmcs);
+ if (loaded_vmcs->shadow_vmcs && loaded_vmcs->launched)
+ vmcs_clear(loaded_vmcs->shadow_vmcs);
+
list_del(&loaded_vmcs->loaded_vmcss_on_cpu_link);
/*
- * we should ensure updating loaded_vmcs->loaded_vmcss_on_cpu_link
- * is before setting loaded_vmcs->vcpu to -1 which is done in
- * loaded_vmcs_init. Otherwise, other cpu can see vcpu = -1 fist
- * then adds the vmcs into percpu list before it is deleted.
+ * Ensure all writes to loaded_vmcs, including deleting it from its
+ * current percpu list, complete before setting loaded_vmcs->vcpu to
+ * -1, otherwise a different cpu can see vcpu == -1 first and add
+ * loaded_vmcs to its percpu list before it's deleted from this cpu's
+ * list. Pairs with the smp_rmb() in vmx_vcpu_load_vmcs().
*/
smp_wmb();
- loaded_vmcs_init(loaded_vmcs);
- crash_enable_local_vmclear(cpu);
+ loaded_vmcs->cpu = -1;
+ loaded_vmcs->launched = 0;
}
void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs)
@@ -812,7 +779,7 @@ void update_exception_bitmap(struct kvm_vcpu *vcpu)
if (to_vmx(vcpu)->rmode.vm86_active)
eb = ~0;
if (enable_ept)
- eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */
+ eb &= ~(1u << PF_VECTOR);
/* When we are running a nested L2 guest and L1 specified for it a
* certain exception bitmap, we must trap the same exceptions and pass
@@ -1063,7 +1030,7 @@ static unsigned long segment_base(u16 selector)
static inline bool pt_can_write_msr(struct vcpu_vmx *vmx)
{
- return (pt_mode == PT_MODE_HOST_GUEST) &&
+ return vmx_pt_mode_is_host_guest() &&
!(vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN);
}
@@ -1097,7 +1064,7 @@ static inline void pt_save_msr(struct pt_ctx *ctx, u32 addr_range)
static void pt_guest_enter(struct vcpu_vmx *vmx)
{
- if (pt_mode == PT_MODE_SYSTEM)
+ if (vmx_pt_mode_is_system())
return;
/*
@@ -1114,7 +1081,7 @@ static void pt_guest_enter(struct vcpu_vmx *vmx)
static void pt_guest_exit(struct vcpu_vmx *vmx)
{
- if (pt_mode == PT_MODE_SYSTEM)
+ if (vmx_pt_mode_is_system())
return;
if (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) {
@@ -1347,18 +1314,17 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu)
if (!already_loaded) {
loaded_vmcs_clear(vmx->loaded_vmcs);
local_irq_disable();
- crash_disable_local_vmclear(cpu);
/*
- * Read loaded_vmcs->cpu should be before fetching
- * loaded_vmcs->loaded_vmcss_on_cpu_link.
- * See the comments in __loaded_vmcs_clear().
+ * Ensure loaded_vmcs->cpu is read before adding loaded_vmcs to
+ * this cpu's percpu list, otherwise it may not yet be deleted
+ * from its previous cpu's percpu list. Pairs with the
+ * smb_wmb() in __loaded_vmcs_clear().
*/
smp_rmb();
list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link,
&per_cpu(loaded_vmcss_on_cpu, cpu));
- crash_enable_local_vmclear(cpu);
local_irq_enable();
}
@@ -1691,16 +1657,6 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
vmx_clear_hlt(vcpu);
}
-static bool vmx_rdtscp_supported(void)
-{
- return cpu_has_vmx_rdtscp();
-}
-
-static bool vmx_invpcid_supported(void)
-{
- return cpu_has_vmx_invpcid();
-}
-
/*
* Swap MSR entry in host/guest MSR entry array.
*/
@@ -1908,24 +1864,24 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
&msr_info->data);
break;
case MSR_IA32_RTIT_CTL:
- if (pt_mode != PT_MODE_HOST_GUEST)
+ if (!vmx_pt_mode_is_host_guest())
return 1;
msr_info->data = vmx->pt_desc.guest.ctl;
break;
case MSR_IA32_RTIT_STATUS:
- if (pt_mode != PT_MODE_HOST_GUEST)
+ if (!vmx_pt_mode_is_host_guest())
return 1;
msr_info->data = vmx->pt_desc.guest.status;
break;
case MSR_IA32_RTIT_CR3_MATCH:
- if ((pt_mode != PT_MODE_HOST_GUEST) ||
+ if (!vmx_pt_mode_is_host_guest() ||
!intel_pt_validate_cap(vmx->pt_desc.caps,
PT_CAP_cr3_filtering))
return 1;
msr_info->data = vmx->pt_desc.guest.cr3_match;
break;
case MSR_IA32_RTIT_OUTPUT_BASE:
- if ((pt_mode != PT_MODE_HOST_GUEST) ||
+ if (!vmx_pt_mode_is_host_guest() ||
(!intel_pt_validate_cap(vmx->pt_desc.caps,
PT_CAP_topa_output) &&
!intel_pt_validate_cap(vmx->pt_desc.caps,
@@ -1934,7 +1890,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = vmx->pt_desc.guest.output_base;
break;
case MSR_IA32_RTIT_OUTPUT_MASK:
- if ((pt_mode != PT_MODE_HOST_GUEST) ||
+ if (!vmx_pt_mode_is_host_guest() ||
(!intel_pt_validate_cap(vmx->pt_desc.caps,
PT_CAP_topa_output) &&
!intel_pt_validate_cap(vmx->pt_desc.caps,
@@ -1944,7 +1900,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B:
index = msr_info->index - MSR_IA32_RTIT_ADDR0_A;
- if ((pt_mode != PT_MODE_HOST_GUEST) ||
+ if (!vmx_pt_mode_is_host_guest() ||
(index >= 2 * intel_pt_validate_cap(vmx->pt_desc.caps,
PT_CAP_num_address_ranges)))
return 1;
@@ -2150,7 +2106,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
return vmx_set_vmx_msr(vcpu, msr_index, data);
case MSR_IA32_RTIT_CTL:
- if ((pt_mode != PT_MODE_HOST_GUEST) ||
+ if (!vmx_pt_mode_is_host_guest() ||
vmx_rtit_ctl_check(vcpu, data) ||
vmx->nested.vmxon)
return 1;
@@ -2266,18 +2222,33 @@ static __init int vmx_disabled_by_bios(void)
!boot_cpu_has(X86_FEATURE_VMX);
}
-static void kvm_cpu_vmxon(u64 addr)
+static int kvm_cpu_vmxon(u64 vmxon_pointer)
{
+ u64 msr;
+
cr4_set_bits(X86_CR4_VMXE);
intel_pt_handle_vmx(1);
- asm volatile ("vmxon %0" : : "m"(addr));
+ asm_volatile_goto("1: vmxon %[vmxon_pointer]\n\t"
+ _ASM_EXTABLE(1b, %l[fault])
+ : : [vmxon_pointer] "m"(vmxon_pointer)
+ : : fault);
+ return 0;
+
+fault:
+ WARN_ONCE(1, "VMXON faulted, MSR_IA32_FEAT_CTL (0x3a) = 0x%llx\n",
+ rdmsrl_safe(MSR_IA32_FEAT_CTL, &msr) ? 0xdeadbeef : msr);
+ intel_pt_handle_vmx(0);
+ cr4_clear_bits(X86_CR4_VMXE);
+
+ return -EFAULT;
}
static int hardware_enable(void)
{
int cpu = raw_smp_processor_id();
u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
+ int r;
if (cr4_read_shadow() & X86_CR4_VMXE)
return -EBUSY;
@@ -2294,18 +2265,10 @@ static int hardware_enable(void)
INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu));
spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
- /*
- * Now we can enable the vmclear operation in kdump
- * since the loaded_vmcss_on_cpu list on this cpu
- * has been initialized.
- *
- * Though the cpu is not in VMX operation now, there
- * is no problem to enable the vmclear operation
- * for the loaded_vmcss_on_cpu list is empty!
- */
- crash_enable_local_vmclear(cpu);
+ r = kvm_cpu_vmxon(phys_addr);
+ if (r)
+ return r;
- kvm_cpu_vmxon(phys_addr);
if (enable_ept)
ept_sync_global();
@@ -2617,9 +2580,12 @@ int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
if (!loaded_vmcs->vmcs)
return -ENOMEM;
+ vmcs_clear(loaded_vmcs->vmcs);
+
loaded_vmcs->shadow_vmcs = NULL;
loaded_vmcs->hv_timer_soft_disabled = false;
- loaded_vmcs_init(loaded_vmcs);
+ loaded_vmcs->cpu = -1;
+ loaded_vmcs->launched = 0;
if (cpu_has_vmx_msr_bitmap()) {
loaded_vmcs->msr_bitmap = (unsigned long *)
@@ -3001,9 +2967,8 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
static int get_ept_level(struct kvm_vcpu *vcpu)
{
- /* Nested EPT currently only supports 4-level walks. */
if (is_guest_mode(vcpu) && nested_cpu_has_ept(get_vmcs12(vcpu)))
- return 4;
+ return vmx_eptp_page_walk_level(nested_ept_get_eptp(vcpu));
if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48))
return 5;
return 4;
@@ -3023,7 +2988,7 @@ u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa)
return eptp;
}
-void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
+void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long cr3)
{
struct kvm *kvm = vcpu->kvm;
bool update_guest_cr3 = true;
@@ -3035,7 +3000,7 @@ void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
eptp = construct_eptp(vcpu, cr3);
vmcs_write64(EPT_POINTER, eptp);
- if (kvm_x86_ops->tlb_remote_flush) {
+ if (kvm_x86_ops.tlb_remote_flush) {
spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock);
to_vmx(vcpu)->ept_pointer = eptp;
to_kvm_vmx(kvm)->ept_pointers_match
@@ -4040,7 +4005,7 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl;
- if (pt_mode == PT_MODE_SYSTEM)
+ if (vmx_pt_mode_is_system())
exec_control &= ~(SECONDARY_EXEC_PT_USE_GPA | SECONDARY_EXEC_PT_CONCEAL_VMX);
if (!cpu_need_virtualize_apic_accesses(vcpu))
exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
@@ -4095,7 +4060,7 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
}
}
- if (vmx_rdtscp_supported()) {
+ if (cpu_has_vmx_rdtscp()) {
bool rdtscp_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP);
if (!rdtscp_enabled)
exec_control &= ~SECONDARY_EXEC_RDTSCP;
@@ -4110,7 +4075,7 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
}
}
- if (vmx_invpcid_supported()) {
+ if (cpu_has_vmx_invpcid()) {
/* Exposing INVPCID only when PCID is exposed */
bool invpcid_enabled =
guest_cpuid_has(vcpu, X86_FEATURE_INVPCID) &&
@@ -4281,7 +4246,7 @@ static void init_vmcs(struct vcpu_vmx *vmx)
if (cpu_has_vmx_encls_vmexit())
vmcs_write64(ENCLS_EXITING_BITMAP, -1ull);
- if (pt_mode == PT_MODE_HOST_GUEST) {
+ if (vmx_pt_mode_is_host_guest()) {
memset(&vmx->pt_desc, 0, sizeof(vmx->pt_desc));
/* Bit[6~0] are forced to 1, writes are ignored. */
vmx->pt_desc.guest.output_mask = 0x7F;
@@ -4509,8 +4474,13 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
{
- return (!to_vmx(vcpu)->nested.nested_run_pending &&
- vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
+ if (to_vmx(vcpu)->nested.nested_run_pending)
+ return false;
+
+ if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
+ return true;
+
+ return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS));
}
@@ -4566,7 +4536,6 @@ static bool rmode_exception(struct kvm_vcpu *vcpu, int vec)
case GP_VECTOR:
case MF_VECTOR:
return true;
- break;
}
return false;
}
@@ -5343,7 +5312,6 @@ static void vmx_enable_tdp(void)
VMX_EPT_RWX_MASK, 0ull);
ept_set_mmio_spte_mask();
- kvm_enable_tdp();
}
/*
@@ -5876,8 +5844,23 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu,
if (vmx->emulation_required)
return handle_invalid_guest_state(vcpu);
- if (is_guest_mode(vcpu) && nested_vmx_exit_reflected(vcpu, exit_reason))
- return nested_vmx_reflect_vmexit(vcpu, exit_reason);
+ if (is_guest_mode(vcpu)) {
+ /*
+ * The host physical addresses of some pages of guest memory
+ * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC
+ * Page). The CPU may write to these pages via their host
+ * physical address while L2 is running, bypassing any
+ * address-translation-based dirty tracking (e.g. EPT write
+ * protection).
+ *
+ * Mark them dirty on every exit from L2 to prevent them from
+ * getting out of sync with dirty tracking.
+ */
+ nested_mark_vmcs12_pages_dirty(vcpu);
+
+ if (nested_vmx_exit_reflected(vcpu, exit_reason))
+ return nested_vmx_reflect_vmexit(vcpu, exit_reason);
+ }
if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
dump_vmcs();
@@ -6237,15 +6220,13 @@ static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
/* if exit due to PF check for async PF */
- if (is_page_fault(vmx->exit_intr_info))
+ if (is_page_fault(vmx->exit_intr_info)) {
vmx->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason();
-
/* Handle machine checks before interrupts are enabled */
- if (is_machine_check(vmx->exit_intr_info))
+ } else if (is_machine_check(vmx->exit_intr_info)) {
kvm_machine_check();
-
/* We need to handle NMIs before interrupts are enabled */
- if (is_nmi(vmx->exit_intr_info)) {
+ } else if (is_nmi(vmx->exit_intr_info)) {
kvm_before_interrupt(&vmx->vcpu);
asm("int $2");
kvm_after_interrupt(&vmx->vcpu);
@@ -6331,11 +6312,6 @@ static bool vmx_has_emulated_msr(int index)
}
}
-static bool vmx_pt_supported(void)
-{
- return pt_mode == PT_MODE_HOST_GUEST;
-}
-
static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
{
u32 exit_intr_info;
@@ -6581,7 +6557,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
pt_guest_enter(vmx);
- atomic_switch_perf_msrs(vmx);
+ if (vcpu_to_pmu(vcpu)->version)
+ atomic_switch_perf_msrs(vmx);
atomic_switch_umwait_control_msr(vmx);
if (enable_preemption_timer)
@@ -6698,20 +6675,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmx_complete_interrupts(vmx);
}
-static struct kvm *vmx_vm_alloc(void)
-{
- struct kvm_vmx *kvm_vmx = __vmalloc(sizeof(struct kvm_vmx),
- GFP_KERNEL_ACCOUNT | __GFP_ZERO,
- PAGE_KERNEL);
- return &kvm_vmx->kvm;
-}
-
-static void vmx_vm_free(struct kvm *kvm)
-{
- kfree(kvm->arch.hyperv.hv_pa_pg);
- vfree(to_kvm_vmx(kvm));
-}
-
static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -6914,17 +6877,24 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
u8 cache;
u64 ipat = 0;
- /* For VT-d and EPT combination
- * 1. MMIO: always map as UC
- * 2. EPT with VT-d:
- * a. VT-d without snooping control feature: can't guarantee the
- * result, try to trust guest.
- * b. VT-d with snooping control feature: snooping control feature of
- * VT-d engine can guarantee the cache correctness. Just set it
- * to WB to keep consistent with host. So the same as item 3.
- * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep
- * consistent with host MTRR
+ /* We wanted to honor guest CD/MTRR/PAT, but doing so could result in
+ * memory aliases with conflicting memory types and sometimes MCEs.
+ * We have to be careful as to what are honored and when.
+ *
+ * For MMIO, guest CD/MTRR are ignored. The EPT memory type is set to
+ * UC. The effective memory type is UC or WC depending on guest PAT.
+ * This was historically the source of MCEs and we want to be
+ * conservative.
+ *
+ * When there is no need to deal with noncoherent DMA (e.g., no VT-d
+ * or VT-d has snoop control), guest CD/MTRR/PAT are all ignored. The
+ * EPT memory type is set to WB. The effective memory type is forced
+ * WB.
+ *
+ * Otherwise, we trust guest. Guest CD/MTRR/PAT are all honored. The
+ * EPT memory type is used to emulate guest CD/MTRR.
*/
+
if (is_mmio) {
cache = MTRR_TYPE_UNCACHABLE;
goto exit;
@@ -6951,15 +6921,6 @@ exit:
return (cache << VMX_EPT_MT_EPTE_SHIFT) | ipat;
}
-static int vmx_get_lpage_level(void)
-{
- if (enable_ept && !cpu_has_vmx_ept_1g_page())
- return PT_DIRECTORY_LEVEL;
- else
- /* For shadow and EPT supported 1GB page */
- return PT_PDPE_LEVEL;
-}
-
static void vmcs_set_secondary_exec_control(struct vcpu_vmx *vmx)
{
/*
@@ -7150,10 +7111,37 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
}
}
-static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
+static __init void vmx_set_cpu_caps(void)
{
- if (func == 1 && nested)
- entry->ecx |= feature_bit(VMX);
+ kvm_set_cpu_caps();
+
+ /* CPUID 0x1 */
+ if (nested)
+ kvm_cpu_cap_set(X86_FEATURE_VMX);
+
+ /* CPUID 0x7 */
+ if (kvm_mpx_supported())
+ kvm_cpu_cap_check_and_set(X86_FEATURE_MPX);
+ if (cpu_has_vmx_invpcid())
+ kvm_cpu_cap_check_and_set(X86_FEATURE_INVPCID);
+ if (vmx_pt_mode_is_host_guest())
+ kvm_cpu_cap_check_and_set(X86_FEATURE_INTEL_PT);
+
+ /* PKU is not yet implemented for shadow paging. */
+ if (enable_ept && boot_cpu_has(X86_FEATURE_OSPKE))
+ kvm_cpu_cap_check_and_set(X86_FEATURE_PKU);
+
+ if (vmx_umip_emulated())
+ kvm_cpu_cap_set(X86_FEATURE_UMIP);
+
+ /* CPUID 0xD.1 */
+ supported_xss = 0;
+ if (!vmx_xsaves_supported())
+ kvm_cpu_cap_clear(X86_FEATURE_XSAVES);
+
+ /* CPUID 0x80000001 */
+ if (!cpu_has_vmx_rdtscp())
+ kvm_cpu_cap_clear(X86_FEATURE_RDTSCP);
}
static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu)
@@ -7197,10 +7185,10 @@ static int vmx_check_intercept_io(struct kvm_vcpu *vcpu,
static int vmx_check_intercept(struct kvm_vcpu *vcpu,
struct x86_instruction_info *info,
- enum x86_intercept_stage stage)
+ enum x86_intercept_stage stage,
+ struct x86_exception *exception)
{
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
- struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
switch (info->intercept) {
/*
@@ -7209,8 +7197,8 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu,
*/
case x86_intercept_rdtscp:
if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) {
- ctxt->exception.vector = UD_VECTOR;
- ctxt->exception.error_code_valid = false;
+ exception->vector = UD_VECTOR;
+ exception->error_code_valid = false;
return X86EMUL_PROPAGATE_FAULT;
}
break;
@@ -7321,7 +7309,8 @@ static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
static void vmx_slot_enable_log_dirty(struct kvm *kvm,
struct kvm_memory_slot *slot)
{
- kvm_mmu_slot_leaf_clear_dirty(kvm, slot);
+ if (!kvm_dirty_log_manual_protect_and_init_set(kvm))
+ kvm_mmu_slot_leaf_clear_dirty(kvm, slot);
kvm_mmu_slot_largepage_remove_write_access(kvm, slot);
}
@@ -7504,7 +7493,7 @@ static void pi_post_block(struct kvm_vcpu *vcpu)
static void vmx_post_block(struct kvm_vcpu *vcpu)
{
- if (kvm_x86_ops->set_hv_timer)
+ if (kvm_x86_ops.set_hv_timer)
kvm_lapic_switch_to_hv_timer(vcpu);
pi_post_block(vcpu);
@@ -7671,13 +7660,164 @@ static bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
return to_vmx(vcpu)->nested.vmxon;
}
+static void hardware_unsetup(void)
+{
+ if (nested)
+ nested_vmx_hardware_unsetup();
+
+ free_kvm_area();
+}
+
+static bool vmx_check_apicv_inhibit_reasons(ulong bit)
+{
+ ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) |
+ BIT(APICV_INHIBIT_REASON_HYPERV);
+
+ return supported & BIT(bit);
+}
+
+static struct kvm_x86_ops vmx_x86_ops __initdata = {
+ .hardware_unsetup = hardware_unsetup,
+
+ .hardware_enable = hardware_enable,
+ .hardware_disable = hardware_disable,
+ .cpu_has_accelerated_tpr = report_flexpriority,
+ .has_emulated_msr = vmx_has_emulated_msr,
+
+ .vm_size = sizeof(struct kvm_vmx),
+ .vm_init = vmx_vm_init,
+
+ .vcpu_create = vmx_create_vcpu,
+ .vcpu_free = vmx_free_vcpu,
+ .vcpu_reset = vmx_vcpu_reset,
+
+ .prepare_guest_switch = vmx_prepare_switch_to_guest,
+ .vcpu_load = vmx_vcpu_load,
+ .vcpu_put = vmx_vcpu_put,
+
+ .update_bp_intercept = update_exception_bitmap,
+ .get_msr_feature = vmx_get_msr_feature,
+ .get_msr = vmx_get_msr,
+ .set_msr = vmx_set_msr,
+ .get_segment_base = vmx_get_segment_base,
+ .get_segment = vmx_get_segment,
+ .set_segment = vmx_set_segment,
+ .get_cpl = vmx_get_cpl,
+ .get_cs_db_l_bits = vmx_get_cs_db_l_bits,
+ .decache_cr0_guest_bits = vmx_decache_cr0_guest_bits,
+ .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits,
+ .set_cr0 = vmx_set_cr0,
+ .set_cr4 = vmx_set_cr4,
+ .set_efer = vmx_set_efer,
+ .get_idt = vmx_get_idt,
+ .set_idt = vmx_set_idt,
+ .get_gdt = vmx_get_gdt,
+ .set_gdt = vmx_set_gdt,
+ .get_dr6 = vmx_get_dr6,
+ .set_dr6 = vmx_set_dr6,
+ .set_dr7 = vmx_set_dr7,
+ .sync_dirty_debug_regs = vmx_sync_dirty_debug_regs,
+ .cache_reg = vmx_cache_reg,
+ .get_rflags = vmx_get_rflags,
+ .set_rflags = vmx_set_rflags,
+
+ .tlb_flush = vmx_flush_tlb,
+ .tlb_flush_gva = vmx_flush_tlb_gva,
+
+ .run = vmx_vcpu_run,
+ .handle_exit = vmx_handle_exit,
+ .skip_emulated_instruction = vmx_skip_emulated_instruction,
+ .update_emulated_instruction = vmx_update_emulated_instruction,
+ .set_interrupt_shadow = vmx_set_interrupt_shadow,
+ .get_interrupt_shadow = vmx_get_interrupt_shadow,
+ .patch_hypercall = vmx_patch_hypercall,
+ .set_irq = vmx_inject_irq,
+ .set_nmi = vmx_inject_nmi,
+ .queue_exception = vmx_queue_exception,
+ .cancel_injection = vmx_cancel_injection,
+ .interrupt_allowed = vmx_interrupt_allowed,
+ .nmi_allowed = vmx_nmi_allowed,
+ .get_nmi_mask = vmx_get_nmi_mask,
+ .set_nmi_mask = vmx_set_nmi_mask,
+ .enable_nmi_window = enable_nmi_window,
+ .enable_irq_window = enable_irq_window,
+ .update_cr8_intercept = update_cr8_intercept,
+ .set_virtual_apic_mode = vmx_set_virtual_apic_mode,
+ .set_apic_access_page_addr = vmx_set_apic_access_page_addr,
+ .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl,
+ .load_eoi_exitmap = vmx_load_eoi_exitmap,
+ .apicv_post_state_restore = vmx_apicv_post_state_restore,
+ .check_apicv_inhibit_reasons = vmx_check_apicv_inhibit_reasons,
+ .hwapic_irr_update = vmx_hwapic_irr_update,
+ .hwapic_isr_update = vmx_hwapic_isr_update,
+ .guest_apic_has_interrupt = vmx_guest_apic_has_interrupt,
+ .sync_pir_to_irr = vmx_sync_pir_to_irr,
+ .deliver_posted_interrupt = vmx_deliver_posted_interrupt,
+ .dy_apicv_has_pending_interrupt = vmx_dy_apicv_has_pending_interrupt,
+
+ .set_tss_addr = vmx_set_tss_addr,
+ .set_identity_map_addr = vmx_set_identity_map_addr,
+ .get_tdp_level = get_ept_level,
+ .get_mt_mask = vmx_get_mt_mask,
+
+ .get_exit_info = vmx_get_exit_info,
+
+ .cpuid_update = vmx_cpuid_update,
+
+ .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
+
+ .read_l1_tsc_offset = vmx_read_l1_tsc_offset,
+ .write_l1_tsc_offset = vmx_write_l1_tsc_offset,
+
+ .load_mmu_pgd = vmx_load_mmu_pgd,
+
+ .check_intercept = vmx_check_intercept,
+ .handle_exit_irqoff = vmx_handle_exit_irqoff,
+
+ .request_immediate_exit = vmx_request_immediate_exit,
+
+ .sched_in = vmx_sched_in,
+
+ .slot_enable_log_dirty = vmx_slot_enable_log_dirty,
+ .slot_disable_log_dirty = vmx_slot_disable_log_dirty,
+ .flush_log_dirty = vmx_flush_log_dirty,
+ .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
+ .write_log_dirty = vmx_write_pml_buffer,
+
+ .pre_block = vmx_pre_block,
+ .post_block = vmx_post_block,
+
+ .pmu_ops = &intel_pmu_ops,
+
+ .update_pi_irte = vmx_update_pi_irte,
+
+#ifdef CONFIG_X86_64
+ .set_hv_timer = vmx_set_hv_timer,
+ .cancel_hv_timer = vmx_cancel_hv_timer,
+#endif
+
+ .setup_mce = vmx_setup_mce,
+
+ .smi_allowed = vmx_smi_allowed,
+ .pre_enter_smm = vmx_pre_enter_smm,
+ .pre_leave_smm = vmx_pre_leave_smm,
+ .enable_smi_window = enable_smi_window,
+
+ .check_nested_events = NULL,
+ .get_nested_state = NULL,
+ .set_nested_state = NULL,
+ .get_vmcs12_pages = NULL,
+ .nested_enable_evmcs = NULL,
+ .nested_get_evmcs_version = NULL,
+ .need_emulation_on_page_fault = vmx_need_emulation_on_page_fault,
+ .apic_init_signal_blocked = vmx_apic_init_signal_blocked,
+};
+
static __init int hardware_setup(void)
{
unsigned long host_bndcfgs;
struct desc_ptr dt;
- int r, i;
-
- rdmsrl_safe(MSR_EFER, &host_efer);
+ int r, i, ept_lpage_level;
store_idt(&dt);
host_idt_base = dt.address;
@@ -7696,6 +7836,10 @@ static __init int hardware_setup(void)
WARN_ONCE(host_bndcfgs, "KVM: BNDCFGS in host will be lost");
}
+ if (!cpu_has_vmx_mpx())
+ supported_xcr0 &= ~(XFEATURE_MASK_BNDREGS |
+ XFEATURE_MASK_BNDCSR);
+
if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() ||
!(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global()))
enable_vpid = 0;
@@ -7724,19 +7868,16 @@ static __init int hardware_setup(void)
* using the APIC_ACCESS_ADDR VMCS field.
*/
if (!flexpriority_enabled)
- kvm_x86_ops->set_apic_access_page_addr = NULL;
+ vmx_x86_ops.set_apic_access_page_addr = NULL;
if (!cpu_has_vmx_tpr_shadow())
- kvm_x86_ops->update_cr8_intercept = NULL;
-
- if (enable_ept && !cpu_has_vmx_ept_2m_page())
- kvm_disable_largepages();
+ vmx_x86_ops.update_cr8_intercept = NULL;
#if IS_ENABLED(CONFIG_HYPERV)
if (ms_hyperv.nested_features & HV_X64_NESTED_GUEST_MAPPING_FLUSH
&& enable_ept) {
- kvm_x86_ops->tlb_remote_flush = hv_remote_flush_tlb;
- kvm_x86_ops->tlb_remote_flush_with_range =
+ vmx_x86_ops.tlb_remote_flush = hv_remote_flush_tlb;
+ vmx_x86_ops.tlb_remote_flush_with_range =
hv_remote_flush_tlb_with_range;
}
#endif
@@ -7751,7 +7892,7 @@ static __init int hardware_setup(void)
if (!cpu_has_vmx_apicv()) {
enable_apicv = 0;
- kvm_x86_ops->sync_pir_to_irr = NULL;
+ vmx_x86_ops.sync_pir_to_irr = NULL;
}
if (cpu_has_vmx_tsc_scaling()) {
@@ -7764,8 +7905,16 @@ static __init int hardware_setup(void)
if (enable_ept)
vmx_enable_tdp();
+
+ if (!enable_ept)
+ ept_lpage_level = 0;
+ else if (cpu_has_vmx_ept_1g_page())
+ ept_lpage_level = PT_PDPE_LEVEL;
+ else if (cpu_has_vmx_ept_2m_page())
+ ept_lpage_level = PT_DIRECTORY_LEVEL;
else
- kvm_disable_tdp();
+ ept_lpage_level = PT_PAGE_TABLE_LEVEL;
+ kvm_configure_mmu(enable_ept, ept_lpage_level);
/*
* Only enable PML when hardware supports PML feature, and both EPT
@@ -7775,10 +7924,10 @@ static __init int hardware_setup(void)
enable_pml = 0;
if (!enable_pml) {
- kvm_x86_ops->slot_enable_log_dirty = NULL;
- kvm_x86_ops->slot_disable_log_dirty = NULL;
- kvm_x86_ops->flush_log_dirty = NULL;
- kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
+ vmx_x86_ops.slot_enable_log_dirty = NULL;
+ vmx_x86_ops.slot_disable_log_dirty = NULL;
+ vmx_x86_ops.flush_log_dirty = NULL;
+ vmx_x86_ops.enable_log_dirty_pt_masked = NULL;
}
if (!cpu_has_vmx_preemption_timer())
@@ -7806,9 +7955,9 @@ static __init int hardware_setup(void)
}
if (!enable_preemption_timer) {
- kvm_x86_ops->set_hv_timer = NULL;
- kvm_x86_ops->cancel_hv_timer = NULL;
- kvm_x86_ops->request_immediate_exit = __kvm_request_immediate_exit;
+ vmx_x86_ops.set_hv_timer = NULL;
+ vmx_x86_ops.cancel_hv_timer = NULL;
+ vmx_x86_ops.request_immediate_exit = __kvm_request_immediate_exit;
}
kvm_set_posted_intr_wakeup_handler(wakeup_handler);
@@ -7824,185 +7973,27 @@ static __init int hardware_setup(void)
nested_vmx_setup_ctls_msrs(&vmcs_config.nested,
vmx_capability.ept);
- r = nested_vmx_hardware_setup(kvm_vmx_exit_handlers);
+ r = nested_vmx_hardware_setup(&vmx_x86_ops,
+ kvm_vmx_exit_handlers);
if (r)
return r;
}
+ vmx_set_cpu_caps();
+
r = alloc_kvm_area();
if (r)
nested_vmx_hardware_unsetup();
return r;
}
-static __exit void hardware_unsetup(void)
-{
- if (nested)
- nested_vmx_hardware_unsetup();
-
- free_kvm_area();
-}
-
-static bool vmx_check_apicv_inhibit_reasons(ulong bit)
-{
- ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) |
- BIT(APICV_INHIBIT_REASON_HYPERV);
-
- return supported & BIT(bit);
-}
-
-static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
+static struct kvm_x86_init_ops vmx_init_ops __initdata = {
.cpu_has_kvm_support = cpu_has_kvm_support,
.disabled_by_bios = vmx_disabled_by_bios,
- .hardware_setup = hardware_setup,
- .hardware_unsetup = hardware_unsetup,
.check_processor_compatibility = vmx_check_processor_compat,
- .hardware_enable = hardware_enable,
- .hardware_disable = hardware_disable,
- .cpu_has_accelerated_tpr = report_flexpriority,
- .has_emulated_msr = vmx_has_emulated_msr,
-
- .vm_init = vmx_vm_init,
- .vm_alloc = vmx_vm_alloc,
- .vm_free = vmx_vm_free,
-
- .vcpu_create = vmx_create_vcpu,
- .vcpu_free = vmx_free_vcpu,
- .vcpu_reset = vmx_vcpu_reset,
-
- .prepare_guest_switch = vmx_prepare_switch_to_guest,
- .vcpu_load = vmx_vcpu_load,
- .vcpu_put = vmx_vcpu_put,
-
- .update_bp_intercept = update_exception_bitmap,
- .get_msr_feature = vmx_get_msr_feature,
- .get_msr = vmx_get_msr,
- .set_msr = vmx_set_msr,
- .get_segment_base = vmx_get_segment_base,
- .get_segment = vmx_get_segment,
- .set_segment = vmx_set_segment,
- .get_cpl = vmx_get_cpl,
- .get_cs_db_l_bits = vmx_get_cs_db_l_bits,
- .decache_cr0_guest_bits = vmx_decache_cr0_guest_bits,
- .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits,
- .set_cr0 = vmx_set_cr0,
- .set_cr3 = vmx_set_cr3,
- .set_cr4 = vmx_set_cr4,
- .set_efer = vmx_set_efer,
- .get_idt = vmx_get_idt,
- .set_idt = vmx_set_idt,
- .get_gdt = vmx_get_gdt,
- .set_gdt = vmx_set_gdt,
- .get_dr6 = vmx_get_dr6,
- .set_dr6 = vmx_set_dr6,
- .set_dr7 = vmx_set_dr7,
- .sync_dirty_debug_regs = vmx_sync_dirty_debug_regs,
- .cache_reg = vmx_cache_reg,
- .get_rflags = vmx_get_rflags,
- .set_rflags = vmx_set_rflags,
-
- .tlb_flush = vmx_flush_tlb,
- .tlb_flush_gva = vmx_flush_tlb_gva,
-
- .run = vmx_vcpu_run,
- .handle_exit = vmx_handle_exit,
- .skip_emulated_instruction = vmx_skip_emulated_instruction,
- .update_emulated_instruction = vmx_update_emulated_instruction,
- .set_interrupt_shadow = vmx_set_interrupt_shadow,
- .get_interrupt_shadow = vmx_get_interrupt_shadow,
- .patch_hypercall = vmx_patch_hypercall,
- .set_irq = vmx_inject_irq,
- .set_nmi = vmx_inject_nmi,
- .queue_exception = vmx_queue_exception,
- .cancel_injection = vmx_cancel_injection,
- .interrupt_allowed = vmx_interrupt_allowed,
- .nmi_allowed = vmx_nmi_allowed,
- .get_nmi_mask = vmx_get_nmi_mask,
- .set_nmi_mask = vmx_set_nmi_mask,
- .enable_nmi_window = enable_nmi_window,
- .enable_irq_window = enable_irq_window,
- .update_cr8_intercept = update_cr8_intercept,
- .set_virtual_apic_mode = vmx_set_virtual_apic_mode,
- .set_apic_access_page_addr = vmx_set_apic_access_page_addr,
- .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl,
- .load_eoi_exitmap = vmx_load_eoi_exitmap,
- .apicv_post_state_restore = vmx_apicv_post_state_restore,
- .check_apicv_inhibit_reasons = vmx_check_apicv_inhibit_reasons,
- .hwapic_irr_update = vmx_hwapic_irr_update,
- .hwapic_isr_update = vmx_hwapic_isr_update,
- .guest_apic_has_interrupt = vmx_guest_apic_has_interrupt,
- .sync_pir_to_irr = vmx_sync_pir_to_irr,
- .deliver_posted_interrupt = vmx_deliver_posted_interrupt,
- .dy_apicv_has_pending_interrupt = vmx_dy_apicv_has_pending_interrupt,
-
- .set_tss_addr = vmx_set_tss_addr,
- .set_identity_map_addr = vmx_set_identity_map_addr,
- .get_tdp_level = get_ept_level,
- .get_mt_mask = vmx_get_mt_mask,
-
- .get_exit_info = vmx_get_exit_info,
-
- .get_lpage_level = vmx_get_lpage_level,
-
- .cpuid_update = vmx_cpuid_update,
-
- .rdtscp_supported = vmx_rdtscp_supported,
- .invpcid_supported = vmx_invpcid_supported,
-
- .set_supported_cpuid = vmx_set_supported_cpuid,
-
- .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
-
- .read_l1_tsc_offset = vmx_read_l1_tsc_offset,
- .write_l1_tsc_offset = vmx_write_l1_tsc_offset,
-
- .set_tdp_cr3 = vmx_set_cr3,
-
- .check_intercept = vmx_check_intercept,
- .handle_exit_irqoff = vmx_handle_exit_irqoff,
- .mpx_supported = vmx_mpx_supported,
- .xsaves_supported = vmx_xsaves_supported,
- .umip_emulated = vmx_umip_emulated,
- .pt_supported = vmx_pt_supported,
- .pku_supported = vmx_pku_supported,
-
- .request_immediate_exit = vmx_request_immediate_exit,
-
- .sched_in = vmx_sched_in,
-
- .slot_enable_log_dirty = vmx_slot_enable_log_dirty,
- .slot_disable_log_dirty = vmx_slot_disable_log_dirty,
- .flush_log_dirty = vmx_flush_log_dirty,
- .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
- .write_log_dirty = vmx_write_pml_buffer,
-
- .pre_block = vmx_pre_block,
- .post_block = vmx_post_block,
-
- .pmu_ops = &intel_pmu_ops,
-
- .update_pi_irte = vmx_update_pi_irte,
-
-#ifdef CONFIG_X86_64
- .set_hv_timer = vmx_set_hv_timer,
- .cancel_hv_timer = vmx_cancel_hv_timer,
-#endif
-
- .setup_mce = vmx_setup_mce,
-
- .smi_allowed = vmx_smi_allowed,
- .pre_enter_smm = vmx_pre_enter_smm,
- .pre_leave_smm = vmx_pre_leave_smm,
- .enable_smi_window = enable_smi_window,
+ .hardware_setup = hardware_setup,
- .check_nested_events = NULL,
- .get_nested_state = NULL,
- .set_nested_state = NULL,
- .get_vmcs12_pages = NULL,
- .nested_enable_evmcs = NULL,
- .nested_get_evmcs_version = NULL,
- .need_emulation_on_page_fault = vmx_need_emulation_on_page_fault,
- .apic_init_signal_blocked = vmx_apic_init_signal_blocked,
+ .runtime_ops = &vmx_x86_ops,
};
static void vmx_cleanup_l1d_flush(void)
@@ -8089,7 +8080,7 @@ static int __init vmx_init(void)
}
#endif
- r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
+ r = kvm_init(&vmx_init_ops, sizeof(struct vcpu_vmx),
__alignof__(struct vcpu_vmx), THIS_MODULE);
if (r)
return r;
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 0695ea177e22..aab9df55336e 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -12,7 +12,6 @@
#include "vmcs.h"
extern const u32 vmx_msr_index[];
-extern u64 host_efer;
#define MSR_TYPE_R 1
#define MSR_TYPE_W 2
@@ -333,9 +332,9 @@ u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu);
void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask);
void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer);
void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
-void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
void set_cr4_guest_host_mask(struct vcpu_vmx *vmx);
+void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long cr3);
void ept_save_pdptrs(struct kvm_vcpu *vcpu);
void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
@@ -450,7 +449,7 @@ static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
static inline u32 vmx_vmentry_ctrl(void)
{
u32 vmentry_ctrl = vmcs_config.vmentry_ctrl;
- if (pt_mode == PT_MODE_SYSTEM)
+ if (vmx_pt_mode_is_system())
vmentry_ctrl &= ~(VM_ENTRY_PT_CONCEAL_PIP |
VM_ENTRY_LOAD_IA32_RTIT_CTL);
/* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */
@@ -461,7 +460,7 @@ static inline u32 vmx_vmentry_ctrl(void)
static inline u32 vmx_vmexit_ctrl(void)
{
u32 vmexit_ctrl = vmcs_config.vmexit_ctrl;
- if (pt_mode == PT_MODE_SYSTEM)
+ if (vmx_pt_mode_is_system())
vmexit_ctrl &= ~(VM_EXIT_PT_CONCEAL_PIP |
VM_EXIT_CLEAR_IA32_RTIT_CTL);
/* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */
@@ -491,7 +490,6 @@ struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags);
void free_vmcs(struct vmcs *vmcs);
int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs);
void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs);
-void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs);
void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs);
static inline struct vmcs *alloc_vmcs(bool shadow)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bf8564d73fc3..b8124b562dea 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -22,6 +22,7 @@
#include "i8254.h"
#include "tss.h"
#include "kvm_cache_regs.h"
+#include "kvm_emulate.h"
#include "x86.h"
#include "cpuid.h"
#include "pmu.h"
@@ -81,7 +82,7 @@ u64 __read_mostly kvm_mce_cap_supported = MCG_CTL_P | MCG_SER_P;
EXPORT_SYMBOL_GPL(kvm_mce_cap_supported);
#define emul_to_vcpu(ctxt) \
- container_of(ctxt, struct kvm_vcpu, arch.emulate_ctxt)
+ ((struct kvm_vcpu *)(ctxt)->vcpu)
/* EFER defaults:
* - enable syscall per default because its emulated by KVM
@@ -109,7 +110,7 @@ static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
static void store_regs(struct kvm_vcpu *vcpu);
static int sync_regs(struct kvm_vcpu *vcpu);
-struct kvm_x86_ops *kvm_x86_ops __read_mostly;
+struct kvm_x86_ops kvm_x86_ops __read_mostly;
EXPORT_SYMBOL_GPL(kvm_x86_ops);
static bool __read_mostly ignore_msrs = 0;
@@ -180,7 +181,17 @@ struct kvm_shared_msrs {
static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
static struct kvm_shared_msrs __percpu *shared_msrs;
+#define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
+ | XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \
+ | XFEATURE_MASK_BNDCSR | XFEATURE_MASK_AVX512 \
+ | XFEATURE_MASK_PKRU)
+
+u64 __read_mostly host_efer;
+EXPORT_SYMBOL_GPL(host_efer);
+
static u64 __read_mostly host_xss;
+u64 __read_mostly supported_xss;
+EXPORT_SYMBOL_GPL(supported_xss);
struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "pf_fixed", VCPU_STAT(pf_fixed) },
@@ -226,10 +237,25 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
};
u64 __read_mostly host_xcr0;
+u64 __read_mostly supported_xcr0;
+EXPORT_SYMBOL_GPL(supported_xcr0);
struct kmem_cache *x86_fpu_cache;
EXPORT_SYMBOL_GPL(x86_fpu_cache);
+static struct kmem_cache *x86_emulator_cache;
+
+static struct kmem_cache *kvm_alloc_emulator_cache(void)
+{
+ unsigned int useroffset = offsetof(struct x86_emulate_ctxt, src);
+ unsigned int size = sizeof(struct x86_emulate_ctxt);
+
+ return kmem_cache_create_usercopy("x86_emulator", size,
+ __alignof__(struct x86_emulate_ctxt),
+ SLAB_ACCOUNT, useroffset,
+ size - useroffset, NULL);
+}
+
static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
@@ -350,6 +376,7 @@ int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
}
kvm_lapic_set_base(vcpu, msr_info->data);
+ kvm_recalculate_apic_map(vcpu->kvm);
return 0;
}
EXPORT_SYMBOL_GPL(kvm_set_apic_base);
@@ -619,7 +646,7 @@ EXPORT_SYMBOL_GPL(kvm_requeue_exception_e);
*/
bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl)
{
- if (kvm_x86_ops->get_cpl(vcpu) <= required_cpl)
+ if (kvm_x86_ops.get_cpl(vcpu) <= required_cpl)
return true;
kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
return false;
@@ -760,7 +787,7 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
if (!is_pae(vcpu))
return 1;
- kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
+ kvm_x86_ops.get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
if (cs_l)
return 1;
} else
@@ -773,7 +800,7 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
if (!(cr0 & X86_CR0_PG) && kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))
return 1;
- kvm_x86_ops->set_cr0(vcpu, cr0);
+ kvm_x86_ops.set_cr0(vcpu, cr0);
if ((cr0 ^ old_cr0) & X86_CR0_PG) {
kvm_clear_async_pf_completion_queue(vcpu);
@@ -869,7 +896,7 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
{
- if (kvm_x86_ops->get_cpl(vcpu) != 0 ||
+ if (kvm_x86_ops.get_cpl(vcpu) != 0 ||
__kvm_set_xcr(vcpu, index, xcr)) {
kvm_inject_gp(vcpu, 0);
return 1;
@@ -903,10 +930,10 @@ static u64 kvm_host_cr4_reserved_bits(struct cpuinfo_x86 *c)
{
u64 reserved_bits = __cr4_reserved_bits(cpu_has, c);
- if (cpuid_ecx(0x7) & feature_bit(LA57))
+ if (kvm_cpu_cap_has(X86_FEATURE_LA57))
reserved_bits &= ~X86_CR4_LA57;
- if (kvm_x86_ops->umip_emulated())
+ if (kvm_cpu_cap_has(X86_FEATURE_UMIP))
reserved_bits &= ~X86_CR4_UMIP;
return reserved_bits;
@@ -950,7 +977,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
return 1;
}
- if (kvm_x86_ops->set_cr4(vcpu, cr4))
+ if (kvm_x86_ops.set_cr4(vcpu, cr4))
return 1;
if (((cr4 ^ old_cr4) & pdptr_bits) ||
@@ -1034,7 +1061,7 @@ static void kvm_update_dr0123(struct kvm_vcpu *vcpu)
static void kvm_update_dr6(struct kvm_vcpu *vcpu)
{
if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
- kvm_x86_ops->set_dr6(vcpu, vcpu->arch.dr6);
+ kvm_x86_ops.set_dr6(vcpu, vcpu->arch.dr6);
}
static void kvm_update_dr7(struct kvm_vcpu *vcpu)
@@ -1045,7 +1072,7 @@ static void kvm_update_dr7(struct kvm_vcpu *vcpu)
dr7 = vcpu->arch.guest_debug_dr7;
else
dr7 = vcpu->arch.dr7;
- kvm_x86_ops->set_dr7(vcpu, dr7);
+ kvm_x86_ops.set_dr7(vcpu, dr7);
vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_BP_ENABLED;
if (dr7 & DR7_BP_EN_MASK)
vcpu->arch.switch_db_regs |= KVM_DEBUGREG_BP_ENABLED;
@@ -1115,7 +1142,7 @@ int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
*val = vcpu->arch.dr6;
else
- *val = kvm_x86_ops->get_dr6(vcpu);
+ *val = kvm_x86_ops.get_dr6(vcpu);
break;
case 5:
/* fall through */
@@ -1350,7 +1377,7 @@ static int kvm_get_msr_feature(struct kvm_msr_entry *msr)
rdmsrl_safe(msr->index, &msr->data);
break;
default:
- if (kvm_x86_ops->get_msr_feature(msr))
+ if (kvm_x86_ops.get_msr_feature(msr))
return 1;
}
return 0;
@@ -1418,7 +1445,7 @@ static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
efer &= ~EFER_LMA;
efer |= vcpu->arch.efer & EFER_LMA;
- kvm_x86_ops->set_efer(vcpu, efer);
+ kvm_x86_ops.set_efer(vcpu, efer);
/* Update reserved bits */
if ((efer ^ old_efer) & EFER_NX)
@@ -1474,7 +1501,7 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
msr.index = index;
msr.host_initiated = host_initiated;
- return kvm_x86_ops->set_msr(vcpu, &msr);
+ return kvm_x86_ops.set_msr(vcpu, &msr);
}
/*
@@ -1492,7 +1519,7 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
msr.index = index;
msr.host_initiated = host_initiated;
- ret = kvm_x86_ops->get_msr(vcpu, &msr);
+ ret = kvm_x86_ops.get_msr(vcpu, &msr);
if (!ret)
*data = msr.data;
return ret;
@@ -1561,8 +1588,12 @@ static int handle_fastpath_set_x2apic_icr_irqoff(struct kvm_vcpu *vcpu, u64 data
((data & APIC_DEST_MASK) == APIC_DEST_PHYSICAL) &&
((data & APIC_MODE_MASK) == APIC_DM_FIXED)) {
+ data &= ~(1 << 12);
+ kvm_apic_send_ipi(vcpu->arch.apic, (u32)data, (u32)(data >> 32));
kvm_lapic_set_reg(vcpu->arch.apic, APIC_ICR2, (u32)(data >> 32));
- return kvm_lapic_reg_write(vcpu->arch.apic, APIC_ICR, (u32)data);
+ kvm_lapic_set_reg(vcpu->arch.apic, APIC_ICR, (u32)data);
+ trace_kvm_apic_write(APIC_ICR, (u32)data);
+ return 0;
}
return 1;
@@ -1571,11 +1602,12 @@ static int handle_fastpath_set_x2apic_icr_irqoff(struct kvm_vcpu *vcpu, u64 data
enum exit_fastpath_completion handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu)
{
u32 msr = kvm_rcx_read(vcpu);
- u64 data = kvm_read_edx_eax(vcpu);
+ u64 data;
int ret = 0;
switch (msr) {
case APIC_BASE_MSR + (APIC_ICR >> 4):
+ data = kvm_read_edx_eax(vcpu);
ret = handle_fastpath_set_x2apic_icr_irqoff(vcpu, data);
break;
default:
@@ -1876,7 +1908,7 @@ static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset)
{
- u64 curr_offset = kvm_x86_ops->read_l1_tsc_offset(vcpu);
+ u64 curr_offset = kvm_x86_ops.read_l1_tsc_offset(vcpu);
vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset;
}
@@ -1918,7 +1950,7 @@ static u64 kvm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
{
- u64 tsc_offset = kvm_x86_ops->read_l1_tsc_offset(vcpu);
+ u64 tsc_offset = kvm_x86_ops.read_l1_tsc_offset(vcpu);
return tsc_offset + kvm_scale_tsc(vcpu, host_tsc);
}
@@ -1926,7 +1958,7 @@ EXPORT_SYMBOL_GPL(kvm_read_l1_tsc);
static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
{
- vcpu->arch.tsc_offset = kvm_x86_ops->write_l1_tsc_offset(vcpu, offset);
+ vcpu->arch.tsc_offset = kvm_x86_ops.write_l1_tsc_offset(vcpu, offset);
}
static inline bool kvm_check_tsc_unstable(void)
@@ -2050,7 +2082,7 @@ EXPORT_SYMBOL_GPL(kvm_write_tsc);
static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
s64 adjustment)
{
- u64 tsc_offset = kvm_x86_ops->read_l1_tsc_offset(vcpu);
+ u64 tsc_offset = kvm_x86_ops.read_l1_tsc_offset(vcpu);
kvm_vcpu_write_tsc_offset(vcpu, tsc_offset + adjustment);
}
@@ -2525,7 +2557,7 @@ static void kvmclock_sync_fn(struct work_struct *work)
static bool can_set_mci_status(struct kvm_vcpu *vcpu)
{
/* McStatusWrEn enabled? */
- if (guest_cpuid_is_amd(vcpu))
+ if (guest_cpuid_is_amd_or_hygon(vcpu))
return !!(vcpu->arch.msr_hwcr & BIT_ULL(18));
return false;
@@ -2647,7 +2679,7 @@ static void kvmclock_reset(struct kvm_vcpu *vcpu)
static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
{
++vcpu->stat.tlb_flush;
- kvm_x86_ops->tlb_flush(vcpu, invalidate_gpa);
+ kvm_x86_ops.tlb_flush(vcpu, invalidate_gpa);
}
static void record_steal_time(struct kvm_vcpu *vcpu)
@@ -2800,12 +2832,11 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
!guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))
return 1;
/*
- * We do support PT if kvm_x86_ops->pt_supported(), but we do
- * not support IA32_XSS[bit 8]. Guests will have to use
- * RDMSR/WRMSR rather than XSAVES/XRSTORS to save/restore PT
- * MSRs.
+ * KVM supports exposing PT to the guest, but does not support
+ * IA32_XSS[bit 8]. Guests have to use RDMSR/WRMSR rather than
+ * XSAVES/XRSTORS to save/restore PT MSRs.
*/
- if (data != 0)
+ if (data & ~supported_xss)
return 1;
vcpu->arch.ia32_xss = data;
break;
@@ -3079,7 +3110,6 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
return kvm_x2apic_msr_read(vcpu, msr_info->index, &msr_info->data);
- break;
case MSR_IA32_TSCDEADLINE:
msr_info->data = kvm_get_lapic_tscdeadline_msr(vcpu);
break;
@@ -3162,7 +3192,6 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return kvm_hv_get_msr_common(vcpu,
msr_info->index, &msr_info->data,
msr_info->host_initiated);
- break;
case MSR_IA32_BBL_CR_CTL3:
/* This legacy MSR exists but isn't fully documented in current
* silicon. It is however accessed by winxp in very narrow
@@ -3367,10 +3396,10 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
* fringe case that is not enabled except via specific settings
* of the module parameters.
*/
- r = kvm_x86_ops->has_emulated_msr(MSR_IA32_SMBASE);
+ r = kvm_x86_ops.has_emulated_msr(MSR_IA32_SMBASE);
break;
case KVM_CAP_VAPIC:
- r = !kvm_x86_ops->cpu_has_accelerated_tpr();
+ r = !kvm_x86_ops.cpu_has_accelerated_tpr();
break;
case KVM_CAP_NR_VCPUS:
r = KVM_SOFT_MAX_VCPUS;
@@ -3397,14 +3426,14 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = KVM_X2APIC_API_VALID_FLAGS;
break;
case KVM_CAP_NESTED_STATE:
- r = kvm_x86_ops->get_nested_state ?
- kvm_x86_ops->get_nested_state(NULL, NULL, 0) : 0;
+ r = kvm_x86_ops.get_nested_state ?
+ kvm_x86_ops.get_nested_state(NULL, NULL, 0) : 0;
break;
case KVM_CAP_HYPERV_DIRECT_TLBFLUSH:
- r = kvm_x86_ops->enable_direct_tlbflush != NULL;
+ r = kvm_x86_ops.enable_direct_tlbflush != NULL;
break;
case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
- r = kvm_x86_ops->nested_enable_evmcs != NULL;
+ r = kvm_x86_ops.nested_enable_evmcs != NULL;
break;
default:
break;
@@ -3466,7 +3495,7 @@ long kvm_arch_dev_ioctl(struct file *filp,
r = 0;
break;
}
- case KVM_X86_GET_MCE_CAP_SUPPORTED: {
+ case KVM_X86_GET_MCE_CAP_SUPPORTED:
r = -EFAULT;
if (copy_to_user(argp, &kvm_mce_cap_supported,
sizeof(kvm_mce_cap_supported)))
@@ -3498,9 +3527,9 @@ long kvm_arch_dev_ioctl(struct file *filp,
case KVM_GET_MSRS:
r = msr_io(NULL, argp, do_get_msr_feature, 1);
break;
- }
default:
r = -EINVAL;
+ break;
}
out:
return r;
@@ -3520,14 +3549,14 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
/* Address WBINVD may be executed by guest */
if (need_emulate_wbinvd(vcpu)) {
- if (kvm_x86_ops->has_wbinvd_exit())
+ if (kvm_x86_ops.has_wbinvd_exit())
cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
else if (vcpu->cpu != -1 && vcpu->cpu != cpu)
smp_call_function_single(vcpu->cpu,
wbinvd_ipi, NULL, 1);
}
- kvm_x86_ops->vcpu_load(vcpu, cpu);
+ kvm_x86_ops.vcpu_load(vcpu, cpu);
/* Apply any externally detected TSC adjustments (due to suspend) */
if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
@@ -3594,7 +3623,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
int idx;
if (vcpu->preempted)
- vcpu->arch.preempted_in_kernel = !kvm_x86_ops->get_cpl(vcpu);
+ vcpu->arch.preempted_in_kernel = !kvm_x86_ops.get_cpl(vcpu);
/*
* Disable page faults because we're in atomic context here.
@@ -3613,7 +3642,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
kvm_steal_time_set_preempted(vcpu);
srcu_read_unlock(&vcpu->kvm->srcu, idx);
pagefault_enable();
- kvm_x86_ops->vcpu_put(vcpu);
+ kvm_x86_ops.vcpu_put(vcpu);
vcpu->arch.last_host_tsc = rdtsc();
/*
* If userspace has set any breakpoints or watchpoints, dr6 is restored
@@ -3627,7 +3656,7 @@ static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
struct kvm_lapic_state *s)
{
if (vcpu->arch.apicv_active)
- kvm_x86_ops->sync_pir_to_irr(vcpu);
+ kvm_x86_ops.sync_pir_to_irr(vcpu);
return kvm_apic_get_state(vcpu, s);
}
@@ -3735,7 +3764,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
for (bank = 0; bank < bank_num; bank++)
vcpu->arch.mce_banks[bank*4] = ~(u64)0;
- kvm_x86_ops->setup_mce(vcpu);
+ kvm_x86_ops.setup_mce(vcpu);
out:
return r;
}
@@ -3839,11 +3868,11 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft;
events->interrupt.nr = vcpu->arch.interrupt.nr;
events->interrupt.soft = 0;
- events->interrupt.shadow = kvm_x86_ops->get_interrupt_shadow(vcpu);
+ events->interrupt.shadow = kvm_x86_ops.get_interrupt_shadow(vcpu);
events->nmi.injected = vcpu->arch.nmi_injected;
events->nmi.pending = vcpu->arch.nmi_pending != 0;
- events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu);
+ events->nmi.masked = kvm_x86_ops.get_nmi_mask(vcpu);
events->nmi.pad = 0;
events->sipi_vector = 0; /* never valid when reporting to user space */
@@ -3910,13 +3939,13 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
vcpu->arch.interrupt.nr = events->interrupt.nr;
vcpu->arch.interrupt.soft = events->interrupt.soft;
if (events->flags & KVM_VCPUEVENT_VALID_SHADOW)
- kvm_x86_ops->set_interrupt_shadow(vcpu,
+ kvm_x86_ops.set_interrupt_shadow(vcpu,
events->interrupt.shadow);
vcpu->arch.nmi_injected = events->nmi.injected;
if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING)
vcpu->arch.nmi_pending = events->nmi.pending;
- kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked);
+ kvm_x86_ops.set_nmi_mask(vcpu, events->nmi.masked);
if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR &&
lapic_in_kernel(vcpu))
@@ -4103,8 +4132,7 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
* CPUID leaf 0xD, index 0, EDX:EAX. This is for compatibility
* with old userspace.
*/
- if (xstate_bv & ~kvm_supported_xcr0() ||
- mxcsr & ~mxcsr_feature_mask)
+ if (xstate_bv & ~supported_xcr0 || mxcsr & ~mxcsr_feature_mask)
return -EINVAL;
load_xsave(vcpu, (u8 *)guest_xsave->region);
} else {
@@ -4191,9 +4219,9 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
return kvm_hv_activate_synic(vcpu, cap->cap ==
KVM_CAP_HYPERV_SYNIC2);
case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
- if (!kvm_x86_ops->nested_enable_evmcs)
+ if (!kvm_x86_ops.nested_enable_evmcs)
return -ENOTTY;
- r = kvm_x86_ops->nested_enable_evmcs(vcpu, &vmcs_version);
+ r = kvm_x86_ops.nested_enable_evmcs(vcpu, &vmcs_version);
if (!r) {
user_ptr = (void __user *)(uintptr_t)cap->args[0];
if (copy_to_user(user_ptr, &vmcs_version,
@@ -4202,10 +4230,10 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
}
return r;
case KVM_CAP_HYPERV_DIRECT_TLBFLUSH:
- if (!kvm_x86_ops->enable_direct_tlbflush)
+ if (!kvm_x86_ops.enable_direct_tlbflush)
return -ENOTTY;
- return kvm_x86_ops->enable_direct_tlbflush(vcpu);
+ return kvm_x86_ops.enable_direct_tlbflush(vcpu);
default:
return -EINVAL;
@@ -4508,7 +4536,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
u32 user_data_size;
r = -EINVAL;
- if (!kvm_x86_ops->get_nested_state)
+ if (!kvm_x86_ops.get_nested_state)
break;
BUILD_BUG_ON(sizeof(user_data_size) != sizeof(user_kvm_nested_state->size));
@@ -4516,7 +4544,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
if (get_user(user_data_size, &user_kvm_nested_state->size))
break;
- r = kvm_x86_ops->get_nested_state(vcpu, user_kvm_nested_state,
+ r = kvm_x86_ops.get_nested_state(vcpu, user_kvm_nested_state,
user_data_size);
if (r < 0)
break;
@@ -4538,7 +4566,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
int idx;
r = -EINVAL;
- if (!kvm_x86_ops->set_nested_state)
+ if (!kvm_x86_ops.set_nested_state)
break;
r = -EFAULT;
@@ -4560,7 +4588,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
break;
idx = srcu_read_lock(&vcpu->kvm->srcu);
- r = kvm_x86_ops->set_nested_state(vcpu, user_kvm_nested_state, &kvm_state);
+ r = kvm_x86_ops.set_nested_state(vcpu, user_kvm_nested_state, &kvm_state);
srcu_read_unlock(&vcpu->kvm->srcu, idx);
break;
}
@@ -4604,14 +4632,14 @@ static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
if (addr > (unsigned int)(-3 * PAGE_SIZE))
return -EINVAL;
- ret = kvm_x86_ops->set_tss_addr(kvm, addr);
+ ret = kvm_x86_ops.set_tss_addr(kvm, addr);
return ret;
}
static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
u64 ident_addr)
{
- return kvm_x86_ops->set_identity_map_addr(kvm, ident_addr);
+ return kvm_x86_ops.set_identity_map_addr(kvm, ident_addr);
}
static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
@@ -4763,77 +4791,13 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
return 0;
}
-/**
- * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot
- * @kvm: kvm instance
- * @log: slot id and address to which we copy the log
- *
- * Steps 1-4 below provide general overview of dirty page logging. See
- * kvm_get_dirty_log_protect() function description for additional details.
- *
- * We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we
- * always flush the TLB (step 4) even if previous step failed and the dirty
- * bitmap may be corrupt. Regardless of previous outcome the KVM logging API
- * does not preclude user space subsequent dirty log read. Flushing TLB ensures
- * writes will be marked dirty for next log read.
- *
- * 1. Take a snapshot of the bit and clear it if needed.
- * 2. Write protect the corresponding page.
- * 3. Copy the snapshot to the userspace.
- * 4. Flush TLB's if needed.
- */
-int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
+void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
{
- bool flush = false;
- int r;
-
- mutex_lock(&kvm->slots_lock);
-
/*
* Flush potentially hardware-cached dirty pages to dirty_bitmap.
*/
- if (kvm_x86_ops->flush_log_dirty)
- kvm_x86_ops->flush_log_dirty(kvm);
-
- r = kvm_get_dirty_log_protect(kvm, log, &flush);
-
- /*
- * All the TLBs can be flushed out of mmu lock, see the comments in
- * kvm_mmu_slot_remove_write_access().
- */
- lockdep_assert_held(&kvm->slots_lock);
- if (flush)
- kvm_flush_remote_tlbs(kvm);
-
- mutex_unlock(&kvm->slots_lock);
- return r;
-}
-
-int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, struct kvm_clear_dirty_log *log)
-{
- bool flush = false;
- int r;
-
- mutex_lock(&kvm->slots_lock);
-
- /*
- * Flush potentially hardware-cached dirty pages to dirty_bitmap.
- */
- if (kvm_x86_ops->flush_log_dirty)
- kvm_x86_ops->flush_log_dirty(kvm);
-
- r = kvm_clear_dirty_log_protect(kvm, log, &flush);
-
- /*
- * All the TLBs can be flushed out of mmu lock, see the comments in
- * kvm_mmu_slot_remove_write_access().
- */
- lockdep_assert_held(&kvm->slots_lock);
- if (flush)
- kvm_flush_remote_tlbs(kvm);
-
- mutex_unlock(&kvm->slots_lock);
- return r;
+ if (kvm_x86_ops.flush_log_dirty)
+ kvm_x86_ops.flush_log_dirty(kvm);
}
int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
@@ -5186,8 +5150,8 @@ set_identity_unlock:
}
case KVM_MEMORY_ENCRYPT_OP: {
r = -ENOTTY;
- if (kvm_x86_ops->mem_enc_op)
- r = kvm_x86_ops->mem_enc_op(kvm, argp);
+ if (kvm_x86_ops.mem_enc_op)
+ r = kvm_x86_ops.mem_enc_op(kvm, argp);
break;
}
case KVM_MEMORY_ENCRYPT_REG_REGION: {
@@ -5198,8 +5162,8 @@ set_identity_unlock:
goto out;
r = -ENOTTY;
- if (kvm_x86_ops->mem_enc_reg_region)
- r = kvm_x86_ops->mem_enc_reg_region(kvm, &region);
+ if (kvm_x86_ops.mem_enc_reg_region)
+ r = kvm_x86_ops.mem_enc_reg_region(kvm, &region);
break;
}
case KVM_MEMORY_ENCRYPT_UNREG_REGION: {
@@ -5210,8 +5174,8 @@ set_identity_unlock:
goto out;
r = -ENOTTY;
- if (kvm_x86_ops->mem_enc_unreg_region)
- r = kvm_x86_ops->mem_enc_unreg_region(kvm, &region);
+ if (kvm_x86_ops.mem_enc_unreg_region)
+ r = kvm_x86_ops.mem_enc_unreg_region(kvm, &region);
break;
}
case KVM_HYPERV_EVENTFD: {
@@ -5262,28 +5226,28 @@ static void kvm_init_msr_list(void)
continue;
break;
case MSR_TSC_AUX:
- if (!kvm_x86_ops->rdtscp_supported())
+ if (!kvm_cpu_cap_has(X86_FEATURE_RDTSCP))
continue;
break;
case MSR_IA32_RTIT_CTL:
case MSR_IA32_RTIT_STATUS:
- if (!kvm_x86_ops->pt_supported())
+ if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT))
continue;
break;
case MSR_IA32_RTIT_CR3_MATCH:
- if (!kvm_x86_ops->pt_supported() ||
+ if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT) ||
!intel_pt_validate_hw_cap(PT_CAP_cr3_filtering))
continue;
break;
case MSR_IA32_RTIT_OUTPUT_BASE:
case MSR_IA32_RTIT_OUTPUT_MASK:
- if (!kvm_x86_ops->pt_supported() ||
+ if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT) ||
(!intel_pt_validate_hw_cap(PT_CAP_topa_output) &&
!intel_pt_validate_hw_cap(PT_CAP_single_range_output)))
continue;
break;
case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: {
- if (!kvm_x86_ops->pt_supported() ||
+ if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT) ||
msrs_to_save_all[i] - MSR_IA32_RTIT_ADDR0_A >=
intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2)
continue;
@@ -5306,7 +5270,7 @@ static void kvm_init_msr_list(void)
}
for (i = 0; i < ARRAY_SIZE(emulated_msrs_all); i++) {
- if (!kvm_x86_ops->has_emulated_msr(emulated_msrs_all[i]))
+ if (!kvm_x86_ops.has_emulated_msr(emulated_msrs_all[i]))
continue;
emulated_msrs[num_emulated_msrs++] = emulated_msrs_all[i];
@@ -5369,13 +5333,13 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
static void kvm_set_segment(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg)
{
- kvm_x86_ops->set_segment(vcpu, var, seg);
+ kvm_x86_ops.set_segment(vcpu, var, seg);
}
void kvm_get_segment(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg)
{
- kvm_x86_ops->get_segment(vcpu, var, seg);
+ kvm_x86_ops.get_segment(vcpu, var, seg);
}
gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
@@ -5395,14 +5359,14 @@ gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
struct x86_exception *exception)
{
- u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
+ u32 access = (kvm_x86_ops.get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
}
gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
struct x86_exception *exception)
{
- u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
+ u32 access = (kvm_x86_ops.get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
access |= PFERR_FETCH_MASK;
return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
}
@@ -5410,7 +5374,7 @@ gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
struct x86_exception *exception)
{
- u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
+ u32 access = (kvm_x86_ops.get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
access |= PFERR_WRITE_MASK;
return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
}
@@ -5459,7 +5423,7 @@ static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt,
struct x86_exception *exception)
{
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
- u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
+ u32 access = (kvm_x86_ops.get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
unsigned offset;
int ret;
@@ -5484,7 +5448,7 @@ int kvm_read_guest_virt(struct kvm_vcpu *vcpu,
gva_t addr, void *val, unsigned int bytes,
struct x86_exception *exception)
{
- u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
+ u32 access = (kvm_x86_ops.get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
/*
* FIXME: this should call handle_emulation_failure if X86EMUL_IO_NEEDED
@@ -5505,7 +5469,7 @@ static int emulator_read_std(struct x86_emulate_ctxt *ctxt,
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
u32 access = 0;
- if (!system && kvm_x86_ops->get_cpl(vcpu) == 3)
+ if (!system && kvm_x86_ops.get_cpl(vcpu) == 3)
access |= PFERR_USER_MASK;
return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, exception);
@@ -5558,7 +5522,7 @@ static int emulator_write_std(struct x86_emulate_ctxt *ctxt, gva_t addr, void *v
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
u32 access = PFERR_WRITE_MASK;
- if (!system && kvm_x86_ops->get_cpl(vcpu) == 3)
+ if (!system && kvm_x86_ops.get_cpl(vcpu) == 3)
access |= PFERR_USER_MASK;
return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
@@ -5621,7 +5585,7 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
gpa_t *gpa, struct x86_exception *exception,
bool write)
{
- u32 access = ((kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0)
+ u32 access = ((kvm_x86_ops.get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0)
| (write ? PFERR_WRITE_MASK : 0);
/*
@@ -5740,7 +5704,7 @@ static int emulator_read_write_onepage(unsigned long addr, void *val,
int handled, ret;
bool write = ops->write;
struct kvm_mmio_fragment *frag;
- struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
+ struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
/*
* If the exit was due to a NPF we may already have a GPA.
@@ -5749,10 +5713,9 @@ static int emulator_read_write_onepage(unsigned long addr, void *val,
* operation using rep will only have the initial GPA from the NPF
* occurred.
*/
- if (vcpu->arch.gpa_available &&
- emulator_can_use_gpa(ctxt) &&
- (addr & ~PAGE_MASK) == (vcpu->arch.gpa_val & ~PAGE_MASK)) {
- gpa = vcpu->arch.gpa_val;
+ if (ctxt->gpa_available && emulator_can_use_gpa(ctxt) &&
+ (addr & ~PAGE_MASK) == (ctxt->gpa_val & ~PAGE_MASK)) {
+ gpa = ctxt->gpa_val;
ret = vcpu_is_mmio_gpa(vcpu, addr, gpa, write);
} else {
ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
@@ -5972,11 +5935,9 @@ static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size,
return 0;
}
-static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
- int size, unsigned short port, void *val,
- unsigned int count)
+static int emulator_pio_in(struct kvm_vcpu *vcpu, int size,
+ unsigned short port, void *val, unsigned int count)
{
- struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
int ret;
if (vcpu->arch.pio.count)
@@ -5996,20 +5957,33 @@ data_avail:
return 0;
}
-static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt,
- int size, unsigned short port,
- const void *val, unsigned int count)
+static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
+ int size, unsigned short port, void *val,
+ unsigned int count)
{
- struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+ return emulator_pio_in(emul_to_vcpu(ctxt), size, port, val, count);
+
+}
+static int emulator_pio_out(struct kvm_vcpu *vcpu, int size,
+ unsigned short port, const void *val,
+ unsigned int count)
+{
memcpy(vcpu->arch.pio_data, val, size * count);
trace_kvm_pio(KVM_PIO_OUT, port, size, count, vcpu->arch.pio_data);
return emulator_pio_in_out(vcpu, size, port, (void *)val, count, false);
}
+static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt,
+ int size, unsigned short port,
+ const void *val, unsigned int count)
+{
+ return emulator_pio_out(emul_to_vcpu(ctxt), size, port, val, count);
+}
+
static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
{
- return kvm_x86_ops->get_segment_base(vcpu, seg);
+ return kvm_x86_ops.get_segment_base(vcpu, seg);
}
static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address)
@@ -6022,7 +5996,7 @@ static int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu)
if (!need_emulate_wbinvd(vcpu))
return X86EMUL_CONTINUE;
- if (kvm_x86_ops->has_wbinvd_exit()) {
+ if (kvm_x86_ops.has_wbinvd_exit()) {
int cpu = get_cpu();
cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
@@ -6127,27 +6101,27 @@ static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val)
static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt)
{
- return kvm_x86_ops->get_cpl(emul_to_vcpu(ctxt));
+ return kvm_x86_ops.get_cpl(emul_to_vcpu(ctxt));
}
static void emulator_get_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
{
- kvm_x86_ops->get_gdt(emul_to_vcpu(ctxt), dt);
+ kvm_x86_ops.get_gdt(emul_to_vcpu(ctxt), dt);
}
static void emulator_get_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
{
- kvm_x86_ops->get_idt(emul_to_vcpu(ctxt), dt);
+ kvm_x86_ops.get_idt(emul_to_vcpu(ctxt), dt);
}
static void emulator_set_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
{
- kvm_x86_ops->set_gdt(emul_to_vcpu(ctxt), dt);
+ kvm_x86_ops.set_gdt(emul_to_vcpu(ctxt), dt);
}
static void emulator_set_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
{
- kvm_x86_ops->set_idt(emul_to_vcpu(ctxt), dt);
+ kvm_x86_ops.set_idt(emul_to_vcpu(ctxt), dt);
}
static unsigned long emulator_get_cached_segment_base(
@@ -6269,13 +6243,15 @@ static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
struct x86_instruction_info *info,
enum x86_intercept_stage stage)
{
- return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage);
+ return kvm_x86_ops.check_intercept(emul_to_vcpu(ctxt), info, stage,
+ &ctxt->exception);
}
static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
- u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, bool check_limit)
+ u32 *eax, u32 *ebx, u32 *ecx, u32 *edx,
+ bool exact_only)
{
- return kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx, check_limit);
+ return kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx, exact_only);
}
static bool emulator_guest_has_long_mode(struct x86_emulate_ctxt *ctxt)
@@ -6305,7 +6281,7 @@ static void emulator_write_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg, ulon
static void emulator_set_nmi_mask(struct x86_emulate_ctxt *ctxt, bool masked)
{
- kvm_x86_ops->set_nmi_mask(emul_to_vcpu(ctxt), masked);
+ kvm_x86_ops.set_nmi_mask(emul_to_vcpu(ctxt), masked);
}
static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt)
@@ -6321,7 +6297,7 @@ static void emulator_set_hflags(struct x86_emulate_ctxt *ctxt, unsigned emul_fla
static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt,
const char *smstate)
{
- return kvm_x86_ops->pre_leave_smm(emul_to_vcpu(ctxt), smstate);
+ return kvm_x86_ops.pre_leave_smm(emul_to_vcpu(ctxt), smstate);
}
static void emulator_post_leave_smm(struct x86_emulate_ctxt *ctxt)
@@ -6383,7 +6359,7 @@ static const struct x86_emulate_ops emulate_ops = {
static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
{
- u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu);
+ u32 int_shadow = kvm_x86_ops.get_interrupt_shadow(vcpu);
/*
* an sti; sti; sequence only disable interrupts for the first
* instruction. So, if the last instruction, be it emulated or
@@ -6394,7 +6370,7 @@ static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
if (int_shadow & mask)
mask = 0;
if (unlikely(int_shadow || mask)) {
- kvm_x86_ops->set_interrupt_shadow(vcpu, mask);
+ kvm_x86_ops.set_interrupt_shadow(vcpu, mask);
if (!mask)
kvm_make_request(KVM_REQ_EVENT, vcpu);
}
@@ -6402,7 +6378,7 @@ static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
static bool inject_emulated_exception(struct kvm_vcpu *vcpu)
{
- struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
+ struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
if (ctxt->exception.vector == PF_VECTOR)
return kvm_propagate_fault(vcpu, &ctxt->exception);
@@ -6414,13 +6390,31 @@ static bool inject_emulated_exception(struct kvm_vcpu *vcpu)
return false;
}
+static struct x86_emulate_ctxt *alloc_emulate_ctxt(struct kvm_vcpu *vcpu)
+{
+ struct x86_emulate_ctxt *ctxt;
+
+ ctxt = kmem_cache_zalloc(x86_emulator_cache, GFP_KERNEL_ACCOUNT);
+ if (!ctxt) {
+ pr_err("kvm: failed to allocate vcpu's emulator\n");
+ return NULL;
+ }
+
+ ctxt->vcpu = vcpu;
+ ctxt->ops = &emulate_ops;
+ vcpu->arch.emulate_ctxt = ctxt;
+
+ return ctxt;
+}
+
static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
{
- struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
+ struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
int cs_db, cs_l;
- kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
+ kvm_x86_ops.get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
+ ctxt->gpa_available = false;
ctxt->eflags = kvm_get_rflags(vcpu);
ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
@@ -6440,7 +6434,7 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
{
- struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
+ struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
int ret;
init_emulate_ctxt(vcpu);
@@ -6479,7 +6473,7 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
kvm_queue_exception(vcpu, UD_VECTOR);
- if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) {
+ if (!is_guest_mode(vcpu) && kvm_x86_ops.get_cpl(vcpu) == 0) {
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
vcpu->run->internal.ndata = 0;
@@ -6496,10 +6490,11 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
gpa_t gpa = cr2_or_gpa;
kvm_pfn_t pfn;
- if (!(emulation_type & EMULTYPE_ALLOW_RETRY))
+ if (!(emulation_type & EMULTYPE_ALLOW_RETRY_PF))
return false;
- if (WARN_ON_ONCE(is_guest_mode(vcpu)))
+ if (WARN_ON_ONCE(is_guest_mode(vcpu)) ||
+ WARN_ON_ONCE(!(emulation_type & EMULTYPE_PF)))
return false;
if (!vcpu->arch.mmu->direct_map) {
@@ -6587,10 +6582,11 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
*/
vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0;
- if (!(emulation_type & EMULTYPE_ALLOW_RETRY))
+ if (!(emulation_type & EMULTYPE_ALLOW_RETRY_PF))
return false;
- if (WARN_ON_ONCE(is_guest_mode(vcpu)))
+ if (WARN_ON_ONCE(is_guest_mode(vcpu)) ||
+ WARN_ON_ONCE(!(emulation_type & EMULTYPE_PF)))
return false;
if (x86_page_table_writing_insn(ctxt))
@@ -6658,10 +6654,10 @@ static int kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu)
int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
{
- unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
+ unsigned long rflags = kvm_x86_ops.get_rflags(vcpu);
int r;
- r = kvm_x86_ops->skip_emulated_instruction(vcpu);
+ r = kvm_x86_ops.skip_emulated_instruction(vcpu);
if (unlikely(!r))
return 0;
@@ -6753,7 +6749,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
int emulation_type, void *insn, int insn_len)
{
int r;
- struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
+ struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
bool writeback = true;
bool write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable;
@@ -6843,8 +6839,19 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
}
restart:
- /* Save the faulting GPA (cr2) in the address field */
- ctxt->exception.address = cr2_or_gpa;
+ if (emulation_type & EMULTYPE_PF) {
+ /* Save the faulting GPA (cr2) in the address field */
+ ctxt->exception.address = cr2_or_gpa;
+
+ /* With shadow page tables, cr2 contains a GVA or nGPA. */
+ if (vcpu->arch.mmu->direct_map) {
+ ctxt->gpa_available = true;
+ ctxt->gpa_val = cr2_or_gpa;
+ }
+ } else {
+ /* Sanitize the address out of an abundance of paranoia. */
+ ctxt->exception.address = 0;
+ }
r = x86_emulate_insn(ctxt);
@@ -6885,7 +6892,7 @@ restart:
r = 1;
if (writeback) {
- unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
+ unsigned long rflags = kvm_x86_ops.get_rflags(vcpu);
toggle_interruptibility(vcpu, ctxt->interruptibility);
vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
if (!ctxt->have_exception ||
@@ -6893,8 +6900,8 @@ restart:
kvm_rip_write(vcpu, ctxt->eip);
if (r && ctxt->tf)
r = kvm_vcpu_do_singlestep(vcpu);
- if (kvm_x86_ops->update_emulated_instruction)
- kvm_x86_ops->update_emulated_instruction(vcpu);
+ if (kvm_x86_ops.update_emulated_instruction)
+ kvm_x86_ops.update_emulated_instruction(vcpu);
__kvm_set_rflags(vcpu, ctxt->eflags);
}
@@ -6945,8 +6952,8 @@ static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size,
unsigned short port)
{
unsigned long val = kvm_rax_read(vcpu);
- int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt,
- size, port, &val, 1);
+ int ret = emulator_pio_out(vcpu, size, port, &val, 1);
+
if (ret)
return ret;
@@ -6982,11 +6989,10 @@ static int complete_fast_pio_in(struct kvm_vcpu *vcpu)
val = (vcpu->arch.pio.size < 4) ? kvm_rax_read(vcpu) : 0;
/*
- * Since vcpu->arch.pio.count == 1 let emulator_pio_in_emulated perform
+ * Since vcpu->arch.pio.count == 1 let emulator_pio_in perform
* the copy and tracing
*/
- emulator_pio_in_emulated(&vcpu->arch.emulate_ctxt, vcpu->arch.pio.size,
- vcpu->arch.pio.port, &val, 1);
+ emulator_pio_in(vcpu, vcpu->arch.pio.size, vcpu->arch.pio.port, &val, 1);
kvm_rax_write(vcpu, val);
return kvm_skip_emulated_instruction(vcpu);
@@ -7001,8 +7007,7 @@ static int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size,
/* For size less than 4 we merge, else we zero extend */
val = (size < 4) ? kvm_rax_read(vcpu) : 0;
- ret = emulator_pio_in_emulated(&vcpu->arch.emulate_ctxt, size, port,
- &val, 1);
+ ret = emulator_pio_in(vcpu, size, port, &val, 1);
if (ret) {
kvm_rax_write(vcpu, val);
return ret;
@@ -7225,7 +7230,7 @@ static int kvm_is_user_mode(void)
int user_mode = 3;
if (__this_cpu_read(current_vcpu))
- user_mode = kvm_x86_ops->get_cpl(__this_cpu_read(current_vcpu));
+ user_mode = kvm_x86_ops.get_cpl(__this_cpu_read(current_vcpu));
return user_mode != 0;
}
@@ -7302,10 +7307,10 @@ static struct notifier_block pvclock_gtod_notifier = {
int kvm_arch_init(void *opaque)
{
+ struct kvm_x86_init_ops *ops = opaque;
int r;
- struct kvm_x86_ops *ops = opaque;
- if (kvm_x86_ops) {
+ if (kvm_x86_ops.hardware_enable) {
printk(KERN_ERR "kvm: already loaded the other module\n");
r = -EEXIST;
goto out;
@@ -7342,18 +7347,22 @@ int kvm_arch_init(void *opaque)
goto out;
}
+ x86_emulator_cache = kvm_alloc_emulator_cache();
+ if (!x86_emulator_cache) {
+ pr_err("kvm: failed to allocate cache for x86 emulator\n");
+ goto out_free_x86_fpu_cache;
+ }
+
shared_msrs = alloc_percpu(struct kvm_shared_msrs);
if (!shared_msrs) {
printk(KERN_ERR "kvm: failed to allocate percpu kvm_shared_msrs\n");
- goto out_free_x86_fpu_cache;
+ goto out_free_x86_emulator_cache;
}
r = kvm_mmu_module_init();
if (r)
goto out_free_percpu;
- kvm_x86_ops = ops;
-
kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
PT_DIRTY_MASK, PT64_NX_MASK, 0,
PT_PRESENT_MASK, 0, sme_me_mask);
@@ -7361,8 +7370,10 @@ int kvm_arch_init(void *opaque)
perf_register_guest_info_callbacks(&kvm_guest_cbs);
- if (boot_cpu_has(X86_FEATURE_XSAVE))
+ if (boot_cpu_has(X86_FEATURE_XSAVE)) {
host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
+ supported_xcr0 = host_xcr0 & KVM_SUPPORTED_XCR0;
+ }
kvm_lapic_init();
if (pi_inject_timer == -1)
@@ -7378,6 +7389,8 @@ int kvm_arch_init(void *opaque)
out_free_percpu:
free_percpu(shared_msrs);
+out_free_x86_emulator_cache:
+ kmem_cache_destroy(x86_emulator_cache);
out_free_x86_fpu_cache:
kmem_cache_destroy(x86_fpu_cache);
out:
@@ -7400,7 +7413,7 @@ void kvm_arch_exit(void)
#ifdef CONFIG_X86_64
pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
#endif
- kvm_x86_ops = NULL;
+ kvm_x86_ops.hardware_enable = NULL;
kvm_mmu_module_exit();
free_percpu(shared_msrs);
kmem_cache_destroy(x86_fpu_cache);
@@ -7538,7 +7551,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
a3 &= 0xFFFFFFFF;
}
- if (kvm_x86_ops->get_cpl(vcpu) != 0) {
+ if (kvm_x86_ops.get_cpl(vcpu) != 0) {
ret = -KVM_EPERM;
goto out;
}
@@ -7584,7 +7597,7 @@ static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
char instruction[3];
unsigned long rip = kvm_rip_read(vcpu);
- kvm_x86_ops->patch_hypercall(vcpu, instruction);
+ kvm_x86_ops.patch_hypercall(vcpu, instruction);
return emulator_write_emulated(ctxt, rip, instruction, 3,
&ctxt->exception);
@@ -7613,7 +7626,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
{
int max_irr, tpr;
- if (!kvm_x86_ops->update_cr8_intercept)
+ if (!kvm_x86_ops.update_cr8_intercept)
return;
if (!lapic_in_kernel(vcpu))
@@ -7632,17 +7645,17 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
tpr = kvm_lapic_get_cr8(vcpu);
- kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
+ kvm_x86_ops.update_cr8_intercept(vcpu, tpr, max_irr);
}
-static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
+static int inject_pending_event(struct kvm_vcpu *vcpu)
{
int r;
/* try to reinject previous events if any */
if (vcpu->arch.exception.injected)
- kvm_x86_ops->queue_exception(vcpu);
+ kvm_x86_ops.queue_exception(vcpu);
/*
* Do not inject an NMI or interrupt if there is a pending
* exception. Exceptions and interrupts are recognized at
@@ -7659,9 +7672,9 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
*/
else if (!vcpu->arch.exception.pending) {
if (vcpu->arch.nmi_injected)
- kvm_x86_ops->set_nmi(vcpu);
+ kvm_x86_ops.set_nmi(vcpu);
else if (vcpu->arch.interrupt.injected)
- kvm_x86_ops->set_irq(vcpu);
+ kvm_x86_ops.set_irq(vcpu);
}
/*
@@ -7670,8 +7683,8 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
* from L2 to L1 due to pending L1 events which require exit
* from L2 to L1.
*/
- if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) {
- r = kvm_x86_ops->check_nested_events(vcpu, req_int_win);
+ if (is_guest_mode(vcpu) && kvm_x86_ops.check_nested_events) {
+ r = kvm_x86_ops.check_nested_events(vcpu);
if (r != 0)
return r;
}
@@ -7708,7 +7721,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
}
}
- kvm_x86_ops->queue_exception(vcpu);
+ kvm_x86_ops.queue_exception(vcpu);
}
/* Don't consider new event if we re-injected an event */
@@ -7716,14 +7729,14 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
return 0;
if (vcpu->arch.smi_pending && !is_smm(vcpu) &&
- kvm_x86_ops->smi_allowed(vcpu)) {
+ kvm_x86_ops.smi_allowed(vcpu)) {
vcpu->arch.smi_pending = false;
++vcpu->arch.smi_count;
enter_smm(vcpu);
- } else if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) {
+ } else if (vcpu->arch.nmi_pending && kvm_x86_ops.nmi_allowed(vcpu)) {
--vcpu->arch.nmi_pending;
vcpu->arch.nmi_injected = true;
- kvm_x86_ops->set_nmi(vcpu);
+ kvm_x86_ops.set_nmi(vcpu);
} else if (kvm_cpu_has_injectable_intr(vcpu)) {
/*
* Because interrupts can be injected asynchronously, we are
@@ -7732,15 +7745,15 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
* proposal and current concerns. Perhaps we should be setting
* KVM_REQ_EVENT only on certain events and not unconditionally?
*/
- if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) {
- r = kvm_x86_ops->check_nested_events(vcpu, req_int_win);
+ if (is_guest_mode(vcpu) && kvm_x86_ops.check_nested_events) {
+ r = kvm_x86_ops.check_nested_events(vcpu);
if (r != 0)
return r;
}
- if (kvm_x86_ops->interrupt_allowed(vcpu)) {
+ if (kvm_x86_ops.interrupt_allowed(vcpu)) {
kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
false);
- kvm_x86_ops->set_irq(vcpu);
+ kvm_x86_ops.set_irq(vcpu);
}
}
@@ -7756,7 +7769,7 @@ static void process_nmi(struct kvm_vcpu *vcpu)
* If an NMI is already in progress, limit further NMIs to just one.
* Otherwise, allow two (and we'll inject the first one immediately).
*/
- if (kvm_x86_ops->get_nmi_mask(vcpu) || vcpu->arch.nmi_injected)
+ if (kvm_x86_ops.get_nmi_mask(vcpu) || vcpu->arch.nmi_injected)
limit = 1;
vcpu->arch.nmi_pending += atomic_xchg(&vcpu->arch.nmi_queued, 0);
@@ -7846,11 +7859,11 @@ static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf)
put_smstate(u32, buf, 0x7f7c, seg.limit);
put_smstate(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg));
- kvm_x86_ops->get_gdt(vcpu, &dt);
+ kvm_x86_ops.get_gdt(vcpu, &dt);
put_smstate(u32, buf, 0x7f74, dt.address);
put_smstate(u32, buf, 0x7f70, dt.size);
- kvm_x86_ops->get_idt(vcpu, &dt);
+ kvm_x86_ops.get_idt(vcpu, &dt);
put_smstate(u32, buf, 0x7f58, dt.address);
put_smstate(u32, buf, 0x7f54, dt.size);
@@ -7900,7 +7913,7 @@ static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
put_smstate(u32, buf, 0x7e94, seg.limit);
put_smstate(u64, buf, 0x7e98, seg.base);
- kvm_x86_ops->get_idt(vcpu, &dt);
+ kvm_x86_ops.get_idt(vcpu, &dt);
put_smstate(u32, buf, 0x7e84, dt.size);
put_smstate(u64, buf, 0x7e88, dt.address);
@@ -7910,7 +7923,7 @@ static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
put_smstate(u32, buf, 0x7e74, seg.limit);
put_smstate(u64, buf, 0x7e78, seg.base);
- kvm_x86_ops->get_gdt(vcpu, &dt);
+ kvm_x86_ops.get_gdt(vcpu, &dt);
put_smstate(u32, buf, 0x7e64, dt.size);
put_smstate(u64, buf, 0x7e68, dt.address);
@@ -7940,28 +7953,28 @@ static void enter_smm(struct kvm_vcpu *vcpu)
* vCPU state (e.g. leave guest mode) after we've saved the state into
* the SMM state-save area.
*/
- kvm_x86_ops->pre_enter_smm(vcpu, buf);
+ kvm_x86_ops.pre_enter_smm(vcpu, buf);
vcpu->arch.hflags |= HF_SMM_MASK;
kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf));
- if (kvm_x86_ops->get_nmi_mask(vcpu))
+ if (kvm_x86_ops.get_nmi_mask(vcpu))
vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
else
- kvm_x86_ops->set_nmi_mask(vcpu, true);
+ kvm_x86_ops.set_nmi_mask(vcpu, true);
kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
kvm_rip_write(vcpu, 0x8000);
cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
- kvm_x86_ops->set_cr0(vcpu, cr0);
+ kvm_x86_ops.set_cr0(vcpu, cr0);
vcpu->arch.cr0 = cr0;
- kvm_x86_ops->set_cr4(vcpu, 0);
+ kvm_x86_ops.set_cr4(vcpu, 0);
/* Undocumented: IDT limit is set to zero on entry to SMM. */
dt.address = dt.size = 0;
- kvm_x86_ops->set_idt(vcpu, &dt);
+ kvm_x86_ops.set_idt(vcpu, &dt);
__kvm_set_dr(vcpu, 7, DR7_FIXED_1);
@@ -7992,7 +8005,7 @@ static void enter_smm(struct kvm_vcpu *vcpu)
#ifdef CONFIG_X86_64
if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
- kvm_x86_ops->set_efer(vcpu, 0);
+ kvm_x86_ops.set_efer(vcpu, 0);
#endif
kvm_update_cpuid(vcpu);
@@ -8030,7 +8043,7 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
vcpu->arch.apicv_active = kvm_apicv_activated(vcpu->kvm);
kvm_apic_update_apicv(vcpu);
- kvm_x86_ops->refresh_apicv_exec_ctrl(vcpu);
+ kvm_x86_ops.refresh_apicv_exec_ctrl(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv);
@@ -8043,23 +8056,30 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv);
*/
void kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit)
{
- if (!kvm_x86_ops->check_apicv_inhibit_reasons ||
- !kvm_x86_ops->check_apicv_inhibit_reasons(bit))
+ unsigned long old, new, expected;
+
+ if (!kvm_x86_ops.check_apicv_inhibit_reasons ||
+ !kvm_x86_ops.check_apicv_inhibit_reasons(bit))
return;
- if (activate) {
- if (!test_and_clear_bit(bit, &kvm->arch.apicv_inhibit_reasons) ||
- !kvm_apicv_activated(kvm))
- return;
- } else {
- if (test_and_set_bit(bit, &kvm->arch.apicv_inhibit_reasons) ||
- kvm_apicv_activated(kvm))
- return;
- }
+ old = READ_ONCE(kvm->arch.apicv_inhibit_reasons);
+ do {
+ expected = new = old;
+ if (activate)
+ __clear_bit(bit, &new);
+ else
+ __set_bit(bit, &new);
+ if (new == old)
+ break;
+ old = cmpxchg(&kvm->arch.apicv_inhibit_reasons, expected, new);
+ } while (old != expected);
+
+ if (!!old == !!new)
+ return;
trace_kvm_apicv_update_request(activate, bit);
- if (kvm_x86_ops->pre_update_apicv_exec_ctrl)
- kvm_x86_ops->pre_update_apicv_exec_ctrl(kvm, activate);
+ if (kvm_x86_ops.pre_update_apicv_exec_ctrl)
+ kvm_x86_ops.pre_update_apicv_exec_ctrl(kvm, activate);
kvm_make_all_cpus_request(kvm, KVM_REQ_APICV_UPDATE);
}
EXPORT_SYMBOL_GPL(kvm_request_apicv_update);
@@ -8075,7 +8095,7 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors);
else {
if (vcpu->arch.apicv_active)
- kvm_x86_ops->sync_pir_to_irr(vcpu);
+ kvm_x86_ops.sync_pir_to_irr(vcpu);
if (ioapic_in_kernel(vcpu->kvm))
kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
}
@@ -8095,7 +8115,7 @@ static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu)
bitmap_or((ulong *)eoi_exit_bitmap, vcpu->arch.ioapic_handled_vectors,
vcpu_to_synic(vcpu)->vec_bitmap, 256);
- kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
+ kvm_x86_ops.load_eoi_exitmap(vcpu, eoi_exit_bitmap);
}
int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
@@ -8122,13 +8142,13 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
if (!lapic_in_kernel(vcpu))
return;
- if (!kvm_x86_ops->set_apic_access_page_addr)
+ if (!kvm_x86_ops.set_apic_access_page_addr)
return;
page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
if (is_error_page(page))
return;
- kvm_x86_ops->set_apic_access_page_addr(vcpu, page_to_phys(page));
+ kvm_x86_ops.set_apic_access_page_addr(vcpu, page_to_phys(page));
/*
* Do not pin apic access page in memory, the MMU notifier
@@ -8160,7 +8180,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (kvm_request_pending(vcpu)) {
if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu)) {
- if (unlikely(!kvm_x86_ops->get_vmcs12_pages(vcpu))) {
+ if (unlikely(!kvm_x86_ops.get_vmcs12_pages(vcpu))) {
r = 0;
goto out;
}
@@ -8180,8 +8200,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
}
if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
kvm_mmu_sync_roots(vcpu);
- if (kvm_check_request(KVM_REQ_LOAD_CR3, vcpu))
- kvm_mmu_load_cr3(vcpu);
+ if (kvm_check_request(KVM_REQ_LOAD_MMU_PGD, vcpu))
+ kvm_mmu_load_pgd(vcpu);
if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
kvm_vcpu_flush_tlb(vcpu, true);
if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
@@ -8266,7 +8286,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
goto out;
}
- if (inject_pending_event(vcpu, req_int_win) != 0)
+ if (inject_pending_event(vcpu) != 0)
req_immediate_exit = true;
else {
/* Enable SMI/NMI/IRQ window open exits if needed.
@@ -8284,12 +8304,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
* SMI.
*/
if (vcpu->arch.smi_pending && !is_smm(vcpu))
- if (!kvm_x86_ops->enable_smi_window(vcpu))
+ if (!kvm_x86_ops.enable_smi_window(vcpu))
req_immediate_exit = true;
if (vcpu->arch.nmi_pending)
- kvm_x86_ops->enable_nmi_window(vcpu);
+ kvm_x86_ops.enable_nmi_window(vcpu);
if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
- kvm_x86_ops->enable_irq_window(vcpu);
+ kvm_x86_ops.enable_irq_window(vcpu);
WARN_ON(vcpu->arch.exception.pending);
}
@@ -8306,7 +8326,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
preempt_disable();
- kvm_x86_ops->prepare_guest_switch(vcpu);
+ kvm_x86_ops.prepare_guest_switch(vcpu);
/*
* Disable IRQs before setting IN_GUEST_MODE. Posted interrupt
@@ -8337,7 +8357,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
* notified with kvm_vcpu_kick.
*/
if (kvm_lapic_enabled(vcpu) && vcpu->arch.apicv_active)
- kvm_x86_ops->sync_pir_to_irr(vcpu);
+ kvm_x86_ops.sync_pir_to_irr(vcpu);
if (vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu)
|| need_resched() || signal_pending(current)) {
@@ -8352,7 +8372,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (req_immediate_exit) {
kvm_make_request(KVM_REQ_EVENT, vcpu);
- kvm_x86_ops->request_immediate_exit(vcpu);
+ kvm_x86_ops.request_immediate_exit(vcpu);
}
trace_kvm_entry(vcpu->vcpu_id);
@@ -8372,7 +8392,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
}
- kvm_x86_ops->run(vcpu);
+ kvm_x86_ops.run(vcpu);
/*
* Do this here before restoring debug registers on the host. And
@@ -8382,7 +8402,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
*/
if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) {
WARN_ON(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP);
- kvm_x86_ops->sync_dirty_debug_regs(vcpu);
+ kvm_x86_ops.sync_dirty_debug_regs(vcpu);
kvm_update_dr0123(vcpu);
kvm_update_dr6(vcpu);
kvm_update_dr7(vcpu);
@@ -8404,7 +8424,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
vcpu->mode = OUTSIDE_GUEST_MODE;
smp_wmb();
- kvm_x86_ops->handle_exit_irqoff(vcpu, &exit_fastpath);
+ kvm_x86_ops.handle_exit_irqoff(vcpu, &exit_fastpath);
/*
* Consume any pending interrupts, including the possible source of
@@ -8447,12 +8467,11 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (vcpu->arch.apic_attention)
kvm_lapic_sync_from_vapic(vcpu);
- vcpu->arch.gpa_available = false;
- r = kvm_x86_ops->handle_exit(vcpu, exit_fastpath);
+ r = kvm_x86_ops.handle_exit(vcpu, exit_fastpath);
return r;
cancel_injection:
- kvm_x86_ops->cancel_injection(vcpu);
+ kvm_x86_ops.cancel_injection(vcpu);
if (unlikely(vcpu->arch.apic_attention))
kvm_lapic_sync_from_vapic(vcpu);
out:
@@ -8462,13 +8481,13 @@ out:
static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
{
if (!kvm_arch_vcpu_runnable(vcpu) &&
- (!kvm_x86_ops->pre_block || kvm_x86_ops->pre_block(vcpu) == 0)) {
+ (!kvm_x86_ops.pre_block || kvm_x86_ops.pre_block(vcpu) == 0)) {
srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
kvm_vcpu_block(vcpu);
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
- if (kvm_x86_ops->post_block)
- kvm_x86_ops->post_block(vcpu);
+ if (kvm_x86_ops.post_block)
+ kvm_x86_ops.post_block(vcpu);
if (!kvm_check_request(KVM_REQ_UNHALT, vcpu))
return 1;
@@ -8488,15 +8507,14 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
break;
default:
return -EINTR;
- break;
}
return 1;
}
static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu)
{
- if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events)
- kvm_x86_ops->check_nested_events(vcpu, false);
+ if (is_guest_mode(vcpu) && kvm_x86_ops.check_nested_events)
+ kvm_x86_ops.check_nested_events(vcpu);
return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
!vcpu->arch.apf.halted);
@@ -8652,7 +8670,7 @@ static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
kvm_save_current_fpu(vcpu->arch.user_fpu);
- /* PKRU is separately restored in kvm_x86_ops->run. */
+ /* PKRU is separately restored in kvm_x86_ops.run. */
__copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state,
~XFEATURE_MASK_PKRU);
@@ -8757,7 +8775,7 @@ static void __get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
* that usually, but some bad designed PV devices (vmware
* backdoor interface) need this to work
*/
- emulator_writeback_register_cache(&vcpu->arch.emulate_ctxt);
+ emulator_writeback_register_cache(vcpu->arch.emulate_ctxt);
vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
}
regs->rax = kvm_rax_read(vcpu);
@@ -8855,10 +8873,10 @@ static void __get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
kvm_get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
- kvm_x86_ops->get_idt(vcpu, &dt);
+ kvm_x86_ops.get_idt(vcpu, &dt);
sregs->idt.limit = dt.size;
sregs->idt.base = dt.address;
- kvm_x86_ops->get_gdt(vcpu, &dt);
+ kvm_x86_ops.get_gdt(vcpu, &dt);
sregs->gdt.limit = dt.size;
sregs->gdt.base = dt.address;
@@ -8943,7 +8961,7 @@ out:
int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
int reason, bool has_error_code, u32 error_code)
{
- struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
+ struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
int ret;
init_emulate_ctxt(vcpu);
@@ -9005,10 +9023,10 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
dt.size = sregs->idt.limit;
dt.address = sregs->idt.base;
- kvm_x86_ops->set_idt(vcpu, &dt);
+ kvm_x86_ops.set_idt(vcpu, &dt);
dt.size = sregs->gdt.limit;
dt.address = sregs->gdt.base;
- kvm_x86_ops->set_gdt(vcpu, &dt);
+ kvm_x86_ops.set_gdt(vcpu, &dt);
vcpu->arch.cr2 = sregs->cr2;
mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3;
@@ -9018,16 +9036,16 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
kvm_set_cr8(vcpu, sregs->cr8);
mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
- kvm_x86_ops->set_efer(vcpu, sregs->efer);
+ kvm_x86_ops.set_efer(vcpu, sregs->efer);
mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
- kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
+ kvm_x86_ops.set_cr0(vcpu, sregs->cr0);
vcpu->arch.cr0 = sregs->cr0;
mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
cpuid_update_needed |= ((kvm_read_cr4(vcpu) ^ sregs->cr4) &
(X86_CR4_OSXSAVE | X86_CR4_PKE));
- kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
+ kvm_x86_ops.set_cr4(vcpu, sregs->cr4);
if (cpuid_update_needed)
kvm_update_cpuid(vcpu);
@@ -9133,7 +9151,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
*/
kvm_set_rflags(vcpu, rflags);
- kvm_x86_ops->update_bp_intercept(vcpu);
+ kvm_x86_ops.update_bp_intercept(vcpu);
r = 0;
@@ -9275,7 +9293,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
struct page *page;
int r;
- vcpu->arch.emulate_ctxt.ops = &emulate_ops;
if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu))
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
else
@@ -9313,11 +9330,14 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
GFP_KERNEL_ACCOUNT))
goto fail_free_mce_banks;
+ if (!alloc_emulate_ctxt(vcpu))
+ goto free_wbinvd_dirty_mask;
+
vcpu->arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache,
GFP_KERNEL_ACCOUNT);
if (!vcpu->arch.user_fpu) {
pr_err("kvm: failed to allocate userspace's fpu\n");
- goto free_wbinvd_dirty_mask;
+ goto free_emulate_ctxt;
}
vcpu->arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache,
@@ -9342,7 +9362,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
kvm_hv_vcpu_init(vcpu);
- r = kvm_x86_ops->vcpu_create(vcpu);
+ r = kvm_x86_ops.vcpu_create(vcpu);
if (r)
goto free_guest_fpu;
@@ -9359,6 +9379,8 @@ free_guest_fpu:
kmem_cache_free(x86_fpu_cache, vcpu->arch.guest_fpu);
free_user_fpu:
kmem_cache_free(x86_fpu_cache, vcpu->arch.user_fpu);
+free_emulate_ctxt:
+ kmem_cache_free(x86_emulator_cache, vcpu->arch.emulate_ctxt);
free_wbinvd_dirty_mask:
free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
fail_free_mce_banks:
@@ -9393,11 +9415,9 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
mutex_unlock(&vcpu->mutex);
- if (!kvmclock_periodic_sync)
- return;
-
- schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
- KVMCLOCK_SYNC_PERIOD);
+ if (kvmclock_periodic_sync && vcpu->vcpu_idx == 0)
+ schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
+ KVMCLOCK_SYNC_PERIOD);
}
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
@@ -9409,8 +9429,9 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
kvmclock_reset(vcpu);
- kvm_x86_ops->vcpu_free(vcpu);
+ kvm_x86_ops.vcpu_free(vcpu);
+ kmem_cache_free(x86_emulator_cache, vcpu->arch.emulate_ctxt);
free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
kmem_cache_free(x86_fpu_cache, vcpu->arch.user_fpu);
kmem_cache_free(x86_fpu_cache, vcpu->arch.guest_fpu);
@@ -9496,7 +9517,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
vcpu->arch.ia32_xss = 0;
- kvm_x86_ops->vcpu_reset(vcpu, init_event);
+ kvm_x86_ops.vcpu_reset(vcpu, init_event);
}
void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
@@ -9521,7 +9542,7 @@ int kvm_arch_hardware_enable(void)
bool stable, backwards_tsc = false;
kvm_shared_msr_cpu_online();
- ret = kvm_x86_ops->hardware_enable();
+ ret = kvm_x86_ops.hardware_enable();
if (ret != 0)
return ret;
@@ -9603,18 +9624,29 @@ int kvm_arch_hardware_enable(void)
void kvm_arch_hardware_disable(void)
{
- kvm_x86_ops->hardware_disable();
+ kvm_x86_ops.hardware_disable();
drop_user_return_notifiers();
}
-int kvm_arch_hardware_setup(void)
+int kvm_arch_hardware_setup(void *opaque)
{
+ struct kvm_x86_init_ops *ops = opaque;
int r;
- r = kvm_x86_ops->hardware_setup();
+ rdmsrl_safe(MSR_EFER, &host_efer);
+
+ if (boot_cpu_has(X86_FEATURE_XSAVES))
+ rdmsrl(MSR_IA32_XSS, host_xss);
+
+ r = ops->hardware_setup();
if (r != 0)
return r;
+ memcpy(&kvm_x86_ops, ops->runtime_ops, sizeof(kvm_x86_ops));
+
+ if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
+ supported_xss = 0;
+
cr4_reserved_bits = kvm_host_cr4_reserved_bits(&boot_cpu_data);
if (kvm_has_tsc_control) {
@@ -9631,28 +9663,26 @@ int kvm_arch_hardware_setup(void)
kvm_default_tsc_scaling_ratio = 1ULL << kvm_tsc_scaling_ratio_frac_bits;
}
- if (boot_cpu_has(X86_FEATURE_XSAVES))
- rdmsrl(MSR_IA32_XSS, host_xss);
-
kvm_init_msr_list();
return 0;
}
void kvm_arch_hardware_unsetup(void)
{
- kvm_x86_ops->hardware_unsetup();
+ kvm_x86_ops.hardware_unsetup();
}
-int kvm_arch_check_processor_compat(void)
+int kvm_arch_check_processor_compat(void *opaque)
{
struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
+ struct kvm_x86_init_ops *ops = opaque;
WARN_ON(!irqs_disabled());
if (kvm_host_cr4_reserved_bits(c) != cr4_reserved_bits)
return -EIO;
- return kvm_x86_ops->check_processor_compatibility();
+ return ops->check_processor_compatibility();
}
bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu)
@@ -9678,9 +9708,16 @@ void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
pmu->need_cleanup = true;
kvm_make_request(KVM_REQ_PMU, vcpu);
}
- kvm_x86_ops->sched_in(vcpu, cpu);
+ kvm_x86_ops.sched_in(vcpu, cpu);
+}
+
+void kvm_arch_free_vm(struct kvm *kvm)
+{
+ kfree(kvm->arch.hyperv.hv_pa_pg);
+ vfree(kvm);
}
+
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
{
if (type)
@@ -9715,7 +9752,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm_page_track_init(kvm);
kvm_mmu_init_vm(kvm);
- return kvm_x86_ops->vm_init(kvm);
+ return kvm_x86_ops.vm_init(kvm);
}
int kvm_arch_post_init_vm(struct kvm *kvm)
@@ -9763,9 +9800,9 @@ void kvm_arch_sync_events(struct kvm *kvm)
int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
{
int i, r;
- unsigned long hva;
+ unsigned long hva, uninitialized_var(old_npages);
struct kvm_memslots *slots = kvm_memslots(kvm);
- struct kvm_memory_slot *slot, old;
+ struct kvm_memory_slot *slot;
/* Called with kvm->slots_lock held. */
if (WARN_ON(id >= KVM_MEM_SLOTS_NUM))
@@ -9773,7 +9810,7 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
slot = id_to_memslot(slots, id);
if (size) {
- if (slot->npages)
+ if (slot && slot->npages)
return -EEXIST;
/*
@@ -9785,13 +9822,18 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
if (IS_ERR((void *)hva))
return PTR_ERR((void *)hva);
} else {
- if (!slot->npages)
+ if (!slot || !slot->npages)
return 0;
- hva = 0;
+ /*
+ * Stuff a non-canonical value to catch use-after-delete. This
+ * ends up being 0 on 32-bit KVM, but there's no better
+ * alternative.
+ */
+ hva = (unsigned long)(0xdeadull << 48);
+ old_npages = slot->npages;
}
- old = *slot;
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
struct kvm_userspace_memory_region m;
@@ -9806,7 +9848,7 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
}
if (!size)
- vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE);
+ vm_munmap(hva, old_npages * PAGE_SIZE);
return 0;
}
@@ -9833,8 +9875,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
__x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0);
mutex_unlock(&kvm->slots_lock);
}
- if (kvm_x86_ops->vm_destroy)
- kvm_x86_ops->vm_destroy(kvm);
+ if (kvm_x86_ops.vm_destroy)
+ kvm_x86_ops.vm_destroy(kvm);
kvm_pic_destroy(kvm);
kvm_ioapic_destroy(kvm);
kvm_free_vcpus(kvm);
@@ -9845,34 +9887,36 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm_hv_destroy_vm(kvm);
}
-void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
- struct kvm_memory_slot *dont)
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
{
int i;
for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
- if (!dont || free->arch.rmap[i] != dont->arch.rmap[i]) {
- kvfree(free->arch.rmap[i]);
- free->arch.rmap[i] = NULL;
- }
+ kvfree(slot->arch.rmap[i]);
+ slot->arch.rmap[i] = NULL;
+
if (i == 0)
continue;
- if (!dont || free->arch.lpage_info[i - 1] !=
- dont->arch.lpage_info[i - 1]) {
- kvfree(free->arch.lpage_info[i - 1]);
- free->arch.lpage_info[i - 1] = NULL;
- }
+ kvfree(slot->arch.lpage_info[i - 1]);
+ slot->arch.lpage_info[i - 1] = NULL;
}
- kvm_page_track_free_memslot(free, dont);
+ kvm_page_track_free_memslot(slot);
}
-int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
- unsigned long npages)
+static int kvm_alloc_memslot_metadata(struct kvm_memory_slot *slot,
+ unsigned long npages)
{
int i;
+ /*
+ * Clear out the previous array pointers for the KVM_MR_MOVE case. The
+ * old arrays will be freed by __kvm_set_memory_region() if installing
+ * the new memslot is successful.
+ */
+ memset(&slot->arch, 0, sizeof(slot->arch));
+
for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
struct kvm_lpage_info *linfo;
unsigned long ugfn;
@@ -9903,11 +9947,9 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
ugfn = slot->userspace_addr >> PAGE_SHIFT;
/*
* If the gfn and userspace address are not aligned wrt each
- * other, or if explicitly asked to, disable large page
- * support for this slot
+ * other, disable large page support for this slot.
*/
- if ((slot->base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) ||
- !kvm_largepages_enabled()) {
+ if ((slot->base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1)) {
unsigned long j;
for (j = 0; j < lpages; ++j)
@@ -9954,6 +9996,9 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
const struct kvm_userspace_memory_region *mem,
enum kvm_mr_change change)
{
+ if (change == KVM_MR_CREATE || change == KVM_MR_MOVE)
+ return kvm_alloc_memslot_metadata(memslot,
+ mem->memory_size >> PAGE_SHIFT);
return 0;
}
@@ -9962,14 +10007,14 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
{
/* Still write protect RO slot */
if (new->flags & KVM_MEM_READONLY) {
- kvm_mmu_slot_remove_write_access(kvm, new);
+ kvm_mmu_slot_remove_write_access(kvm, new, PT_PAGE_TABLE_LEVEL);
return;
}
/*
* Call kvm_x86_ops dirty logging hooks when they are valid.
*
- * kvm_x86_ops->slot_disable_log_dirty is called when:
+ * kvm_x86_ops.slot_disable_log_dirty is called when:
*
* - KVM_MR_CREATE with dirty logging is disabled
* - KVM_MR_FLAGS_ONLY with dirty logging is disabled in new flag
@@ -9981,7 +10026,7 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
* any additional overhead from PML when guest is running with dirty
* logging disabled for memory slots.
*
- * kvm_x86_ops->slot_enable_log_dirty is called when switching new slot
+ * kvm_x86_ops.slot_enable_log_dirty is called when switching new slot
* to dirty logging mode.
*
* If kvm_x86_ops dirty logging hooks are invalid, use write protect.
@@ -9997,19 +10042,32 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
* See the comments in fast_page_fault().
*/
if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
- if (kvm_x86_ops->slot_enable_log_dirty)
- kvm_x86_ops->slot_enable_log_dirty(kvm, new);
- else
- kvm_mmu_slot_remove_write_access(kvm, new);
+ if (kvm_x86_ops.slot_enable_log_dirty) {
+ kvm_x86_ops.slot_enable_log_dirty(kvm, new);
+ } else {
+ int level =
+ kvm_dirty_log_manual_protect_and_init_set(kvm) ?
+ PT_DIRECTORY_LEVEL : PT_PAGE_TABLE_LEVEL;
+
+ /*
+ * If we're with initial-all-set, we don't need
+ * to write protect any small page because
+ * they're reported as dirty already. However
+ * we still need to write-protect huge pages
+ * so that the page split can happen lazily on
+ * the first write to the huge page.
+ */
+ kvm_mmu_slot_remove_write_access(kvm, new, level);
+ }
} else {
- if (kvm_x86_ops->slot_disable_log_dirty)
- kvm_x86_ops->slot_disable_log_dirty(kvm, new);
+ if (kvm_x86_ops.slot_disable_log_dirty)
+ kvm_x86_ops.slot_disable_log_dirty(kvm, new);
}
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
const struct kvm_userspace_memory_region *mem,
- const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *old,
const struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
@@ -10051,6 +10109,10 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
*/
if (change != KVM_MR_DELETE)
kvm_mmu_slot_apply_flags(kvm, (struct kvm_memory_slot *) new);
+
+ /* Free the arrays associated with the old memslot. */
+ if (change == KVM_MR_MOVE)
+ kvm_arch_free_memslot(kvm, old);
}
void kvm_arch_flush_shadow_all(struct kvm *kvm)
@@ -10067,8 +10129,8 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
static inline bool kvm_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
{
return (is_guest_mode(vcpu) &&
- kvm_x86_ops->guest_apic_has_interrupt &&
- kvm_x86_ops->guest_apic_has_interrupt(vcpu));
+ kvm_x86_ops.guest_apic_has_interrupt &&
+ kvm_x86_ops.guest_apic_has_interrupt(vcpu));
}
static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
@@ -10087,7 +10149,7 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
(vcpu->arch.nmi_pending &&
- kvm_x86_ops->nmi_allowed(vcpu)))
+ kvm_x86_ops.nmi_allowed(vcpu)))
return true;
if (kvm_test_request(KVM_REQ_SMI, vcpu) ||
@@ -10120,7 +10182,7 @@ bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu)
kvm_test_request(KVM_REQ_EVENT, vcpu))
return true;
- if (vcpu->arch.apicv_active && kvm_x86_ops->dy_apicv_has_pending_interrupt(vcpu))
+ if (vcpu->arch.apicv_active && kvm_x86_ops.dy_apicv_has_pending_interrupt(vcpu))
return true;
return false;
@@ -10138,7 +10200,7 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
{
- return kvm_x86_ops->interrupt_allowed(vcpu);
+ return kvm_x86_ops.interrupt_allowed(vcpu);
}
unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu)
@@ -10160,7 +10222,7 @@ unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
{
unsigned long rflags;
- rflags = kvm_x86_ops->get_rflags(vcpu);
+ rflags = kvm_x86_ops.get_rflags(vcpu);
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
rflags &= ~X86_EFLAGS_TF;
return rflags;
@@ -10172,7 +10234,7 @@ static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip))
rflags |= X86_EFLAGS_TF;
- kvm_x86_ops->set_rflags(vcpu, rflags);
+ kvm_x86_ops.set_rflags(vcpu, rflags);
}
void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
@@ -10195,7 +10257,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
return;
if (!vcpu->arch.mmu->direct_map &&
- work->arch.cr3 != vcpu->arch.mmu->get_cr3(vcpu))
+ work->arch.cr3 != vcpu->arch.mmu->get_guest_pgd(vcpu))
return;
kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true);
@@ -10283,7 +10345,7 @@ static bool kvm_can_deliver_async_pf(struct kvm_vcpu *vcpu)
if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) ||
(vcpu->arch.apf.send_user_only &&
- kvm_x86_ops->get_cpl(vcpu) == 0))
+ kvm_x86_ops.get_cpl(vcpu) == 0))
return false;
return true;
@@ -10303,7 +10365,7 @@ bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
* If interrupts are off we cannot even use an artificial
* halt state.
*/
- return kvm_x86_ops->interrupt_allowed(vcpu);
+ return kvm_x86_ops.interrupt_allowed(vcpu);
}
void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
@@ -10432,7 +10494,7 @@ int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
irqfd->producer = prod;
- return kvm_x86_ops->update_pi_irte(irqfd->kvm,
+ return kvm_x86_ops.update_pi_irte(irqfd->kvm,
prod->irq, irqfd->gsi, 1);
}
@@ -10452,7 +10514,7 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
* when the irq is masked/disabled or the consumer side (KVM
* int this case doesn't want to receive the interrupts.
*/
- ret = kvm_x86_ops->update_pi_irte(irqfd->kvm, prod->irq, irqfd->gsi, 0);
+ ret = kvm_x86_ops.update_pi_irte(irqfd->kvm, prod->irq, irqfd->gsi, 0);
if (ret)
printk(KERN_INFO "irq bypass consumer (token %p) unregistration"
" fails: %d\n", irqfd->consumer.token, ret);
@@ -10461,7 +10523,7 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
uint32_t guest_irq, bool set)
{
- return kvm_x86_ops->update_pi_irte(kvm, host_irq, guest_irq, set);
+ return kvm_x86_ops.update_pi_irte(kvm, host_irq, guest_irq, set);
}
bool kvm_vector_hashing_enabled(void)
@@ -10518,4 +10580,5 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pi_irte_update);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_incomplete_ipi);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_ga_log);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_apicv_update_request);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 3624665acee4..b968acc0516f 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -5,6 +5,7 @@
#include <linux/kvm_host.h>
#include <asm/pvclock.h>
#include "kvm_cache_regs.h"
+#include "kvm_emulate.h"
#define KVM_DEFAULT_PLE_GAP 128
#define KVM_VMX_DEFAULT_PLE_WINDOW 4096
@@ -96,7 +97,7 @@ static inline bool is_64_bit_mode(struct kvm_vcpu *vcpu)
if (!is_long_mode(vcpu))
return false;
- kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
+ kvm_x86_ops.get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
return cs_l;
}
@@ -149,11 +150,6 @@ static inline u8 vcpu_virt_addr_bits(struct kvm_vcpu *vcpu)
return kvm_read_cr4_bits(vcpu, X86_CR4_LA57) ? 57 : 48;
}
-static inline u8 ctxt_virt_addr_bits(struct x86_emulate_ctxt *ctxt)
-{
- return (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_LA57) ? 57 : 48;
-}
-
static inline u64 get_canonical(u64 la, u8 vaddr_bits)
{
return ((int64_t)la << (64 - vaddr_bits)) >> (64 - vaddr_bits);
@@ -164,12 +160,6 @@ static inline bool is_noncanonical_address(u64 la, struct kvm_vcpu *vcpu)
return get_canonical(la, vcpu_virt_addr_bits(vcpu)) != la;
}
-static inline bool emul_is_noncanonical_address(u64 la,
- struct x86_emulate_ctxt *ctxt)
-{
- return get_canonical(la, ctxt_virt_addr_bits(ctxt)) != la;
-}
-
static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
gva_t gva, gfn_t gfn, unsigned access)
{
@@ -247,7 +237,7 @@ static inline bool kvm_check_has_quirk(struct kvm *kvm, u64 quirk)
static inline bool kvm_vcpu_latch_init(struct kvm_vcpu *vcpu)
{
- return is_smm(vcpu) || kvm_x86_ops->apic_init_signal_blocked(vcpu);
+ return is_smm(vcpu) || kvm_x86_ops.apic_init_signal_blocked(vcpu);
}
void kvm_set_pending_timer(struct kvm_vcpu *vcpu);
@@ -280,13 +270,15 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
int emulation_type, void *insn, int insn_len);
enum exit_fastpath_completion handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu);
-#define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
- | XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \
- | XFEATURE_MASK_BNDCSR | XFEATURE_MASK_AVX512 \
- | XFEATURE_MASK_PKRU)
extern u64 host_xcr0;
+extern u64 supported_xcr0;
+extern u64 supported_xss;
-extern u64 kvm_supported_xcr0(void);
+static inline bool kvm_mpx_supported(void)
+{
+ return (supported_xcr0 & (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR))
+ == (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR);
+}
extern unsigned int min_timer_period_us;