summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/virtual/kvm/api.txt31
-rw-r--r--Documentation/virtual/kvm/cpuid.txt7
-rw-r--r--Documentation/virtual/kvm/locking.txt19
-rw-r--r--arch/arm/include/asm/kvm_arm.h4
-rw-r--r--arch/arm/include/asm/kvm_asm.h2
-rw-r--r--arch/arm/include/asm/kvm_host.h6
-rw-r--r--arch/arm/include/uapi/asm/kvm.h3
-rw-r--r--arch/arm/kvm/Makefile2
-rw-r--r--arch/arm/kvm/arm.c13
-rw-r--r--arch/arm/kvm/coproc.c114
-rw-r--r--arch/arm/kvm/coproc_a15.c117
-rw-r--r--arch/arm/kvm/coproc_a7.c54
-rw-r--r--arch/arm/kvm/emulate.c2
-rw-r--r--arch/arm/kvm/guest.c24
-rw-r--r--arch/arm/kvm/reset.c15
-rw-r--r--arch/arm64/include/asm/kvm_host.h6
-rw-r--r--arch/arm64/kvm/guest.c20
-rw-r--r--arch/ia64/include/asm/kvm_host.h4
-rw-r--r--arch/mips/include/asm/kvm_host.h7
-rw-r--r--arch/powerpc/include/asm/kvm_host.h5
-rw-r--r--arch/s390/include/asm/kvm_host.h8
-rw-r--r--arch/s390/kvm/diag.c4
-rw-r--r--arch/s390/kvm/gaccess.h21
-rw-r--r--arch/s390/kvm/intercept.c6
-rw-r--r--arch/s390/kvm/interrupt.c3
-rw-r--r--arch/s390/kvm/kvm-s390.c91
-rw-r--r--arch/s390/kvm/kvm-s390.h9
-rw-r--r--arch/s390/kvm/priv.c61
-rw-r--r--arch/x86/include/asm/kvm_host.h16
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h1
-rw-r--r--arch/x86/kvm/cpuid.c36
-rw-r--r--arch/x86/kvm/mmu.c115
-rw-r--r--arch/x86/kvm/mmu.h4
-rw-r--r--arch/x86/kvm/svm.c8
-rw-r--r--arch/x86/kvm/vmx.c142
-rw-r--r--arch/x86/kvm/x86.c45
-rw-r--r--arch/x86/kvm/x86.h1
-rw-r--r--include/linux/kvm_host.h12
-rw-r--r--include/trace/events/kvm.h10
-rw-r--r--include/uapi/linux/kvm.h1
-rw-r--r--virt/kvm/async_pf.c22
-rw-r--r--virt/kvm/iommu.c12
-rw-r--r--virt/kvm/kvm_main.c54
43 files changed, 699 insertions, 438 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 858aecf21db2..a89a5ee0b940 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2304,7 +2304,31 @@ Possible features:
Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only).
-4.83 KVM_GET_REG_LIST
+4.83 KVM_ARM_PREFERRED_TARGET
+
+Capability: basic
+Architectures: arm, arm64
+Type: vm ioctl
+Parameters: struct struct kvm_vcpu_init (out)
+Returns: 0 on success; -1 on error
+Errors:
+ ENODEV: no preferred target available for the host
+
+This queries KVM for preferred CPU target type which can be emulated
+by KVM on underlying host.
+
+The ioctl returns struct kvm_vcpu_init instance containing information
+about preferred CPU target type and recommended features for it. The
+kvm_vcpu_init->features bitmap returned will have feature bits set if
+the preferred target recommends setting these features, but this is
+not mandatory.
+
+The information returned by this ioctl can be used to prepare an instance
+of struct kvm_vcpu_init for KVM_ARM_VCPU_INIT ioctl which will result in
+in VCPU matching underlying host.
+
+
+4.84 KVM_GET_REG_LIST
Capability: basic
Architectures: arm, arm64
@@ -2323,8 +2347,7 @@ struct kvm_reg_list {
This ioctl returns the guest registers that are supported for the
KVM_GET_ONE_REG/KVM_SET_ONE_REG calls.
-
-4.84 KVM_ARM_SET_DEVICE_ADDR
+4.85 KVM_ARM_SET_DEVICE_ADDR
Capability: KVM_CAP_ARM_SET_DEVICE_ADDR
Architectures: arm, arm64
@@ -2362,7 +2385,7 @@ must be called after calling KVM_CREATE_IRQCHIP, but before calling
KVM_RUN on any of the VCPUs. Calling this ioctl twice for any of the
base addresses will return -EEXIST.
-4.85 KVM_PPC_RTAS_DEFINE_TOKEN
+4.86 KVM_PPC_RTAS_DEFINE_TOKEN
Capability: KVM_CAP_PPC_RTAS
Architectures: ppc
diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt
index 22ff659bc0fb..3c65feb83010 100644
--- a/Documentation/virtual/kvm/cpuid.txt
+++ b/Documentation/virtual/kvm/cpuid.txt
@@ -43,6 +43,13 @@ KVM_FEATURE_CLOCKSOURCE2 || 3 || kvmclock available at msrs
KVM_FEATURE_ASYNC_PF || 4 || async pf can be enabled by
|| || writing to msr 0x4b564d02
------------------------------------------------------------------------------
+KVM_FEATURE_STEAL_TIME || 5 || steal time can be enabled by
+ || || writing to msr 0x4b564d03.
+------------------------------------------------------------------------------
+KVM_FEATURE_PV_EOI || 6 || paravirtualized end of interrupt
+ || || handler can be enabled by writing
+ || || to msr 0x4b564d04.
+------------------------------------------------------------------------------
KVM_FEATURE_PV_UNHALT || 7 || guest checks this feature bit
|| || before enabling paravirtualized
|| || spinlock support.
diff --git a/Documentation/virtual/kvm/locking.txt b/Documentation/virtual/kvm/locking.txt
index 41b7ac9884b5..f8869410d40c 100644
--- a/Documentation/virtual/kvm/locking.txt
+++ b/Documentation/virtual/kvm/locking.txt
@@ -132,10 +132,14 @@ See the comments in spte_has_volatile_bits() and mmu_spte_update().
------------
Name: kvm_lock
-Type: raw_spinlock
+Type: spinlock_t
Arch: any
Protects: - vm_list
- - hardware virtualization enable/disable
+
+Name: kvm_count_lock
+Type: raw_spinlock_t
+Arch: any
+Protects: - hardware virtualization enable/disable
Comment: 'raw' because hardware enabling/disabling must be atomic /wrt
migration.
@@ -151,3 +155,14 @@ Type: spinlock_t
Arch: any
Protects: -shadow page/shadow tlb entry
Comment: it is a spinlock since it is used in mmu notifier.
+
+Name: kvm->srcu
+Type: srcu lock
+Arch: any
+Protects: - kvm->memslots
+ - kvm->buses
+Comment: The srcu read lock must be held while accessing memslots (e.g.
+ when using gfn_to_* functions) and while accessing in-kernel
+ MMIO/PIO address->device structure mapping (kvm->buses).
+ The srcu index can be stored in kvm_vcpu->srcu_idx per vcpu
+ if it is needed by multiple functions.
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index 64e96960de29..d556f03bca17 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -95,12 +95,12 @@
#define TTBCR_IRGN1 (3 << 24)
#define TTBCR_EPD1 (1 << 23)
#define TTBCR_A1 (1 << 22)
-#define TTBCR_T1SZ (3 << 16)
+#define TTBCR_T1SZ (7 << 16)
#define TTBCR_SH0 (3 << 12)
#define TTBCR_ORGN0 (3 << 10)
#define TTBCR_IRGN0 (3 << 8)
#define TTBCR_EPD0 (1 << 7)
-#define TTBCR_T0SZ 3
+#define TTBCR_T0SZ (7 << 0)
#define HTCR_MASK (TTBCR_T0SZ | TTBCR_IRGN0 | TTBCR_ORGN0 | TTBCR_SH0)
/* Hyp System Trap Register */
diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index a2f43ddcc300..661da11f76f4 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -39,7 +39,7 @@
#define c6_IFAR 17 /* Instruction Fault Address Register */
#define c7_PAR 18 /* Physical Address Register */
#define c7_PAR_high 19 /* PAR top 32 bits */
-#define c9_L2CTLR 20 /* Cortex A15 L2 Control Register */
+#define c9_L2CTLR 20 /* Cortex A15/A7 L2 Control Register */
#define c10_PRRR 21 /* Primary Region Remap Register */
#define c10_NMRR 22 /* Normal Memory Remap Register */
#define c12_VBAR 23 /* Vector Base Address Register */
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 7d22517d8071..8a6f6db14ee4 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -38,11 +38,6 @@
#define KVM_VCPU_MAX_FEATURES 1
-/* We don't currently support large pages. */
-#define KVM_HPAGE_GFN_SHIFT(x) 0
-#define KVM_NR_PAGE_SIZES 1
-#define KVM_PAGES_PER_HPAGE(x) (1UL<<31)
-
#include <kvm/arm_vgic.h>
struct kvm_vcpu;
@@ -154,6 +149,7 @@ struct kvm_vcpu_stat {
struct kvm_vcpu_init;
int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
const struct kvm_vcpu_init *init);
+int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
struct kvm_one_reg;
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index c1ee007523d7..c498b60c0505 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -63,7 +63,8 @@ struct kvm_regs {
/* Supported Processor Types */
#define KVM_ARM_TARGET_CORTEX_A15 0
-#define KVM_ARM_NUM_TARGETS 1
+#define KVM_ARM_TARGET_CORTEX_A7 1
+#define KVM_ARM_NUM_TARGETS 2
/* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
#define KVM_ARM_DEVICE_TYPE_SHIFT 0
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index d99bee4950e5..789bca9e64a7 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -19,6 +19,6 @@ kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o
obj-y += kvm-arm.o init.o interrupts.o
obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
-obj-y += coproc.o coproc_a15.o mmio.o psci.o perf.o
+obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o
obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 9c697db2787e..cc5adb9349ef 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -797,6 +797,19 @@ long kvm_arch_vm_ioctl(struct file *filp,
return -EFAULT;
return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr);
}
+ case KVM_ARM_PREFERRED_TARGET: {
+ int err;
+ struct kvm_vcpu_init init;
+
+ err = kvm_vcpu_preferred_target(&init);
+ if (err)
+ return err;
+
+ if (copy_to_user(argp, &init, sizeof(init)))
+ return -EFAULT;
+
+ return 0;
+ }
default:
return -EINVAL;
}
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index db9cf692d4dd..a629f2c1d0f9 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -71,6 +71,92 @@ int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run)
return 1;
}
+static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
+{
+ /*
+ * Compute guest MPIDR. No need to mess around with different clusters
+ * but we read the 'U' bit from the underlying hardware directly.
+ */
+ vcpu->arch.cp15[c0_MPIDR] = (read_cpuid_mpidr() & MPIDR_SMP_BITMASK)
+ | vcpu->vcpu_id;
+}
+
+/* TRM entries A7:4.3.31 A15:4.3.28 - RO WI */
+static bool access_actlr(struct kvm_vcpu *vcpu,
+ const struct coproc_params *p,
+ const struct coproc_reg *r)
+{
+ if (p->is_write)
+ return ignore_write(vcpu, p);
+
+ *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c1_ACTLR];
+ return true;
+}
+
+/* TRM entries A7:4.3.56, A15:4.3.60 - R/O. */
+static bool access_cbar(struct kvm_vcpu *vcpu,
+ const struct coproc_params *p,
+ const struct coproc_reg *r)
+{
+ if (p->is_write)
+ return write_to_read_only(vcpu, p);
+ return read_zero(vcpu, p);
+}
+
+/* TRM entries A7:4.3.49, A15:4.3.48 - R/O WI */
+static bool access_l2ctlr(struct kvm_vcpu *vcpu,
+ const struct coproc_params *p,
+ const struct coproc_reg *r)
+{
+ if (p->is_write)
+ return ignore_write(vcpu, p);
+
+ *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c9_L2CTLR];
+ return true;
+}
+
+static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
+{
+ u32 l2ctlr, ncores;
+
+ asm volatile("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr));
+ l2ctlr &= ~(3 << 24);
+ ncores = atomic_read(&vcpu->kvm->online_vcpus) - 1;
+ l2ctlr |= (ncores & 3) << 24;
+
+ vcpu->arch.cp15[c9_L2CTLR] = l2ctlr;
+}
+
+static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
+{
+ u32 actlr;
+
+ /* ACTLR contains SMP bit: make sure you create all cpus first! */
+ asm volatile("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr));
+ /* Make the SMP bit consistent with the guest configuration */
+ if (atomic_read(&vcpu->kvm->online_vcpus) > 1)
+ actlr |= 1U << 6;
+ else
+ actlr &= ~(1U << 6);
+
+ vcpu->arch.cp15[c1_ACTLR] = actlr;
+}
+
+/*
+ * TRM entries: A7:4.3.50, A15:4.3.49
+ * R/O WI (even if NSACR.NS_L2ERR, a write of 1 is ignored).
+ */
+static bool access_l2ectlr(struct kvm_vcpu *vcpu,
+ const struct coproc_params *p,
+ const struct coproc_reg *r)
+{
+ if (p->is_write)
+ return ignore_write(vcpu, p);
+
+ *vcpu_reg(vcpu, p->Rt1) = 0;
+ return true;
+}
+
/* See note at ARM ARM B1.14.4 */
static bool access_dcsw(struct kvm_vcpu *vcpu,
const struct coproc_params *p,
@@ -153,10 +239,22 @@ static bool pm_fake(struct kvm_vcpu *vcpu,
* registers preceding 32-bit ones.
*/
static const struct coproc_reg cp15_regs[] = {
+ /* MPIDR: we use VMPIDR for guest access. */
+ { CRn( 0), CRm( 0), Op1( 0), Op2( 5), is32,
+ NULL, reset_mpidr, c0_MPIDR },
+
/* CSSELR: swapped by interrupt.S. */
{ CRn( 0), CRm( 0), Op1( 2), Op2( 0), is32,
NULL, reset_unknown, c0_CSSELR },
+ /* ACTLR: trapped by HCR.TAC bit. */
+ { CRn( 1), CRm( 0), Op1( 0), Op2( 1), is32,
+ access_actlr, reset_actlr, c1_ACTLR },
+
+ /* CPACR: swapped by interrupt.S. */
+ { CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32,
+ NULL, reset_val, c1_CPACR, 0x00000000 },
+
/* TTBR0/TTBR1: swapped by interrupt.S. */
{ CRm64( 2), Op1( 0), is64, NULL, reset_unknown64, c2_TTBR0 },
{ CRm64( 2), Op1( 1), is64, NULL, reset_unknown64, c2_TTBR1 },
@@ -195,6 +293,13 @@ static const struct coproc_reg cp15_regs[] = {
{ CRn( 7), CRm(10), Op1( 0), Op2( 2), is32, access_dcsw},
{ CRn( 7), CRm(14), Op1( 0), Op2( 2), is32, access_dcsw},
/*
+ * L2CTLR access (guest wants to know #CPUs).
+ */
+ { CRn( 9), CRm( 0), Op1( 1), Op2( 2), is32,
+ access_l2ctlr, reset_l2ctlr, c9_L2CTLR },
+ { CRn( 9), CRm( 0), Op1( 1), Op2( 3), is32, access_l2ectlr},
+
+ /*
* Dummy performance monitor implementation.
*/
{ CRn( 9), CRm(12), Op1( 0), Op2( 0), is32, access_pmcr},
@@ -234,6 +339,9 @@ static const struct coproc_reg cp15_regs[] = {
/* CNTKCTL: swapped by interrupt.S. */
{ CRn(14), CRm( 1), Op1( 0), Op2( 0), is32,
NULL, reset_val, c14_CNTKCTL, 0x00000000 },
+
+ /* The Configuration Base Address Register. */
+ { CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar},
};
/* Target specific emulation tables */
@@ -241,6 +349,12 @@ static struct kvm_coproc_target_table *target_tables[KVM_ARM_NUM_TARGETS];
void kvm_register_target_coproc_table(struct kvm_coproc_target_table *table)
{
+ unsigned int i;
+
+ for (i = 1; i < table->num; i++)
+ BUG_ON(cmp_reg(&table->table[i-1],
+ &table->table[i]) >= 0);
+
target_tables[table->target] = table;
}
diff --git a/arch/arm/kvm/coproc_a15.c b/arch/arm/kvm/coproc_a15.c
index cf93472b9dd6..bb0cac1410cc 100644
--- a/arch/arm/kvm/coproc_a15.c
+++ b/arch/arm/kvm/coproc_a15.c
@@ -17,101 +17,12 @@
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include <linux/kvm_host.h>
-#include <asm/cputype.h>
-#include <asm/kvm_arm.h>
-#include <asm/kvm_host.h>
-#include <asm/kvm_emulate.h>
#include <asm/kvm_coproc.h>
+#include <asm/kvm_emulate.h>
#include <linux/init.h>
-static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
-{
- /*
- * Compute guest MPIDR:
- * (Even if we present only one VCPU to the guest on an SMP
- * host we don't set the U bit in the MPIDR, or vice versa, as
- * revealing the underlying hardware properties is likely to
- * be the best choice).
- */
- vcpu->arch.cp15[c0_MPIDR] = (read_cpuid_mpidr() & ~MPIDR_LEVEL_MASK)
- | (vcpu->vcpu_id & MPIDR_LEVEL_MASK);
-}
-
#include "coproc.h"
-/* A15 TRM 4.3.28: RO WI */
-static bool access_actlr(struct kvm_vcpu *vcpu,
- const struct coproc_params *p,
- const struct coproc_reg *r)
-{
- if (p->is_write)
- return ignore_write(vcpu, p);
-
- *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c1_ACTLR];
- return true;
-}
-
-/* A15 TRM 4.3.60: R/O. */
-static bool access_cbar(struct kvm_vcpu *vcpu,
- const struct coproc_params *p,
- const struct coproc_reg *r)
-{
- if (p->is_write)
- return write_to_read_only(vcpu, p);
- return read_zero(vcpu, p);
-}
-
-/* A15 TRM 4.3.48: R/O WI. */
-static bool access_l2ctlr(struct kvm_vcpu *vcpu,
- const struct coproc_params *p,
- const struct coproc_reg *r)
-{
- if (p->is_write)
- return ignore_write(vcpu, p);
-
- *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c9_L2CTLR];
- return true;
-}
-
-static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
-{
- u32 l2ctlr, ncores;
-
- asm volatile("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr));
- l2ctlr &= ~(3 << 24);
- ncores = atomic_read(&vcpu->kvm->online_vcpus) - 1;
- l2ctlr |= (ncores & 3) << 24;
-
- vcpu->arch.cp15[c9_L2CTLR] = l2ctlr;
-}
-
-static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
-{
- u32 actlr;
-
- /* ACTLR contains SMP bit: make sure you create all cpus first! */
- asm volatile("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr));
- /* Make the SMP bit consistent with the guest configuration */
- if (atomic_read(&vcpu->kvm->online_vcpus) > 1)
- actlr |= 1U << 6;
- else
- actlr &= ~(1U << 6);
-
- vcpu->arch.cp15[c1_ACTLR] = actlr;
-}
-
-/* A15 TRM 4.3.49: R/O WI (even if NSACR.NS_L2ERR, a write of 1 is ignored). */
-static bool access_l2ectlr(struct kvm_vcpu *vcpu,
- const struct coproc_params *p,
- const struct coproc_reg *r)
-{
- if (p->is_write)
- return ignore_write(vcpu, p);
-
- *vcpu_reg(vcpu, p->Rt1) = 0;
- return true;
-}
-
/*
* A15-specific CP15 registers.
* CRn denotes the primary register number, but is copied to the CRm in the
@@ -121,29 +32,9 @@ static bool access_l2ectlr(struct kvm_vcpu *vcpu,
* registers preceding 32-bit ones.
*/
static const struct coproc_reg a15_regs[] = {
- /* MPIDR: we use VMPIDR for guest access. */
- { CRn( 0), CRm( 0), Op1( 0), Op2( 5), is32,
- NULL, reset_mpidr, c0_MPIDR },
-
/* SCTLR: swapped by interrupt.S. */
{ CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32,
NULL, reset_val, c1_SCTLR, 0x00C50078 },
- /* ACTLR: trapped by HCR.TAC bit. */
- { CRn( 1), CRm( 0), Op1( 0), Op2( 1), is32,
- access_actlr, reset_actlr, c1_ACTLR },
- /* CPACR: swapped by interrupt.S. */
- { CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32,
- NULL, reset_val, c1_CPACR, 0x00000000 },
-
- /*
- * L2CTLR access (guest wants to know #CPUs).
- */
- { CRn( 9), CRm( 0), Op1( 1), Op2( 2), is32,
- access_l2ctlr, reset_l2ctlr, c9_L2CTLR },
- { CRn( 9), CRm( 0), Op1( 1), Op2( 3), is32, access_l2ectlr},
-
- /* The Configuration Base Address Register. */
- { CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar},
};
static struct kvm_coproc_target_table a15_target_table = {
@@ -154,12 +45,6 @@ static struct kvm_coproc_target_table a15_target_table = {
static int __init coproc_a15_init(void)
{
- unsigned int i;
-
- for (i = 1; i < ARRAY_SIZE(a15_regs); i++)
- BUG_ON(cmp_reg(&a15_regs[i-1],
- &a15_regs[i]) >= 0);
-
kvm_register_target_coproc_table(&a15_target_table);
return 0;
}
diff --git a/arch/arm/kvm/coproc_a7.c b/arch/arm/kvm/coproc_a7.c
new file mode 100644
index 000000000000..1df767331588
--- /dev/null
+++ b/arch/arm/kvm/coproc_a7.c
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Copyright (C) 2013 - ARM Ltd
+ *
+ * Authors: Rusty Russell <rusty@rustcorp.au>
+ * Christoffer Dall <c.dall@virtualopensystems.com>
+ * Jonathan Austin <jonathan.austin@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+#include <linux/kvm_host.h>
+#include <asm/kvm_coproc.h>
+#include <asm/kvm_emulate.h>
+#include <linux/init.h>
+
+#include "coproc.h"
+
+/*
+ * Cortex-A7 specific CP15 registers.
+ * CRn denotes the primary register number, but is copied to the CRm in the
+ * user space API for 64-bit register access in line with the terminology used
+ * in the ARM ARM.
+ * Important: Must be sorted ascending by CRn, CRM, Op1, Op2 and with 64-bit
+ * registers preceding 32-bit ones.
+ */
+static const struct coproc_reg a7_regs[] = {
+ /* SCTLR: swapped by interrupt.S. */
+ { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32,
+ NULL, reset_val, c1_SCTLR, 0x00C50878 },
+};
+
+static struct kvm_coproc_target_table a7_target_table = {
+ .target = KVM_ARM_TARGET_CORTEX_A7,
+ .table = a7_regs,
+ .num = ARRAY_SIZE(a7_regs),
+};
+
+static int __init coproc_a7_init(void)
+{
+ kvm_register_target_coproc_table(&a7_target_table);
+ return 0;
+}
+late_initcall(coproc_a7_init);
diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c
index bdede9e7da51..d6c005283678 100644
--- a/arch/arm/kvm/emulate.c
+++ b/arch/arm/kvm/emulate.c
@@ -354,7 +354,7 @@ static void inject_abt(struct kvm_vcpu *vcpu, bool is_pabt, unsigned long addr)
*vcpu_pc(vcpu) = exc_vector_base(vcpu) + vect_offset;
if (is_pabt) {
- /* Set DFAR and DFSR */
+ /* Set IFAR and IFSR */
vcpu->arch.cp15[c6_IFAR] = addr;
is_lpae = (vcpu->arch.cp15[c2_TTBCR] >> 31);
/* Always give debug fault for now - should give guest a clue */
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 152d03612181..20f8d97904af 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -190,6 +190,8 @@ int __attribute_const__ kvm_target_cpu(void)
return -EINVAL;
switch (part_number) {
+ case ARM_CPU_PART_CORTEX_A7:
+ return KVM_ARM_TARGET_CORTEX_A7;
case ARM_CPU_PART_CORTEX_A15:
return KVM_ARM_TARGET_CORTEX_A15;
default:
@@ -202,7 +204,7 @@ int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
{
unsigned int i;
- /* We can only do a cortex A15 for now. */
+ /* We can only cope with guest==host and only on A15/A7 (for now). */
if (init->target != kvm_target_cpu())
return -EINVAL;
@@ -222,6 +224,26 @@ int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
return kvm_reset_vcpu(vcpu);
}
+int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
+{
+ int target = kvm_target_cpu();
+
+ if (target < 0)
+ return -ENODEV;
+
+ memset(init, 0, sizeof(*init));
+
+ /*
+ * For now, we don't return any features.
+ * In future, we might use features to return target
+ * specific features available for the preferred
+ * target type.
+ */
+ init->target = (__u32)target;
+
+ return 0;
+}
+
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
return -EINVAL;
diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c
index c02ba4af599f..d153e64d1255 100644
--- a/arch/arm/kvm/reset.c
+++ b/arch/arm/kvm/reset.c
@@ -30,16 +30,16 @@
#include <kvm/arm_arch_timer.h>
/******************************************************************************
- * Cortex-A15 Reset Values
+ * Cortex-A15 and Cortex-A7 Reset Values
*/
-static const int a15_max_cpu_idx = 3;
+static const int cortexa_max_cpu_idx = 3;
-static struct kvm_regs a15_regs_reset = {
+static struct kvm_regs cortexa_regs_reset = {
.usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT,
};
-static const struct kvm_irq_level a15_vtimer_irq = {
+static const struct kvm_irq_level cortexa_vtimer_irq = {
{ .irq = 27 },
.level = 1,
};
@@ -62,12 +62,13 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
const struct kvm_irq_level *cpu_vtimer_irq;
switch (vcpu->arch.target) {
+ case KVM_ARM_TARGET_CORTEX_A7:
case KVM_ARM_TARGET_CORTEX_A15:
- if (vcpu->vcpu_id > a15_max_cpu_idx)
+ if (vcpu->vcpu_id > cortexa_max_cpu_idx)
return -EINVAL;
- reset_regs = &a15_regs_reset;
+ reset_regs = &cortexa_regs_reset;
vcpu->arch.midr = read_cpuid_id();
- cpu_vtimer_irq = &a15_vtimer_irq;
+ cpu_vtimer_irq = &cortexa_vtimer_irq;
break;
default:
return -ENODEV;
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 0859a4ddd1e7..5d85a02d1231 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -36,11 +36,6 @@
#define KVM_VCPU_MAX_FEATURES 2
-/* We don't currently support large pages. */
-#define KVM_HPAGE_GFN_SHIFT(x) 0
-#define KVM_NR_PAGE_SIZES 1
-#define KVM_PAGES_PER_HPAGE(x) (1UL<<31)
-
struct kvm_vcpu;
int kvm_target_cpu(void);
int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
@@ -151,6 +146,7 @@ struct kvm_vcpu_stat {
struct kvm_vcpu_init;
int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
const struct kvm_vcpu_init *init);
+int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
struct kvm_one_reg;
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 2c3ff67a8ecb..3f0731e53274 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -248,6 +248,26 @@ int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
return kvm_reset_vcpu(vcpu);
}
+int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
+{
+ int target = kvm_target_cpu();
+
+ if (target < 0)
+ return -ENODEV;
+
+ memset(init, 0, sizeof(*init));
+
+ /*
+ * For now, we don't return any features.
+ * In future, we might use features to return target
+ * specific features available for the preferred
+ * target type.
+ */
+ init->target = (__u32)target;
+
+ return 0;
+}
+
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
return -EINVAL;
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index 989dd3fe8de1..95a3ff93777c 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -234,10 +234,6 @@ struct kvm_vm_data {
#define KVM_REQ_PTC_G 32
#define KVM_REQ_RESUME 33
-#define KVM_HPAGE_GFN_SHIFT(x) 0
-#define KVM_NR_PAGE_SIZES 1
-#define KVM_PAGES_PER_HPAGE(x) 1
-
struct kvm;
struct kvm_vcpu;
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 4d6fa0bf1305..32966969f2f9 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -27,13 +27,6 @@
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
-/* Don't support huge pages */
-#define KVM_HPAGE_GFN_SHIFT(x) 0
-
-/* We don't currently support large pages. */
-#define KVM_NR_PAGE_SIZES 1
-#define KVM_PAGES_PER_HPAGE(x) 1
-
/* Special address that contains the comm page, used for reducing # of traps */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 33283532e9d8..0866230b7c2d 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -63,11 +63,6 @@ extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
#endif
-/* We don't currently support large pages. */
-#define KVM_HPAGE_GFN_SHIFT(x) 0
-#define KVM_NR_PAGE_SIZES 1
-#define KVM_PAGES_PER_HPAGE(x) (1UL<<31)
-
#define HPTEG_CACHE_NUM (1 << 15)
#define HPTEG_HASH_BITS_PTE 13
#define HPTEG_HASH_BITS_PTE_LONG 12
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index e87ecaa2c569..d5bc3750616e 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -38,13 +38,6 @@ struct sca_block {
struct sca_entry cpu[64];
} __attribute__((packed));
-#define KVM_NR_PAGE_SIZES 2
-#define KVM_HPAGE_GFN_SHIFT(x) (((x) - 1) * 8)
-#define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + KVM_HPAGE_GFN_SHIFT(x))
-#define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x))
-#define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1))
-#define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE)
-
#define CPUSTAT_STOPPED 0x80000000
#define CPUSTAT_WAIT 0x10000000
#define CPUSTAT_ECALL_PEND 0x08000000
@@ -220,7 +213,6 @@ struct kvm_s390_interrupt_info {
/* for local_interrupt.action_flags */
#define ACTION_STORE_ON_STOP (1<<0)
#define ACTION_STOP_ON_STOP (1<<1)
-#define ACTION_RELOADVCPU_ON_STOP (1<<2)
struct kvm_s390_local_interrupt {
spinlock_t lock;
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 3a74d8af0d69..78d967f180f4 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -107,14 +107,13 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
{
- int ret, idx;
+ int ret;
/* No virtio-ccw notification? Get out quickly. */
if (!vcpu->kvm->arch.css_support ||
(vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY))
return -EOPNOTSUPP;
- idx = srcu_read_lock(&vcpu->kvm->srcu);
/*
* The layout is as follows:
* - gpr 2 contains the subchannel id (passed as addr)
@@ -125,7 +124,6 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
vcpu->run->s.regs.gprs[2],
8, &vcpu->run->s.regs.gprs[3],
vcpu->run->s.regs.gprs[4]);
- srcu_read_unlock(&vcpu->kvm->srcu, idx);
/*
* Return cookie in gpr 2, but don't overwrite the register if the
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index 99d789e8a018..374a439ccc60 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -18,20 +18,27 @@
#include <asm/uaccess.h>
#include "kvm-s390.h"
+/* Convert real to absolute address by applying the prefix of the CPU */
+static inline unsigned long kvm_s390_real_to_abs(struct kvm_vcpu *vcpu,
+ unsigned long gaddr)
+{
+ unsigned long prefix = vcpu->arch.sie_block->prefix;
+ if (gaddr < 2 * PAGE_SIZE)
+ gaddr += prefix;
+ else if (gaddr >= prefix && gaddr < prefix + 2 * PAGE_SIZE)
+ gaddr -= prefix;
+ return gaddr;
+}
+
static inline void __user *__gptr_to_uptr(struct kvm_vcpu *vcpu,
void __user *gptr,
int prefixing)
{
- unsigned long prefix = vcpu->arch.sie_block->prefix;
unsigned long gaddr = (unsigned long) gptr;
unsigned long uaddr;
- if (prefixing) {
- if (gaddr < 2 * PAGE_SIZE)
- gaddr += prefix;
- else if ((gaddr >= prefix) && (gaddr < prefix + 2 * PAGE_SIZE))
- gaddr -= prefix;
- }
+ if (prefixing)
+ gaddr = kvm_s390_real_to_abs(vcpu, gaddr);
uaddr = gmap_fault(gaddr, vcpu->arch.gmap);
if (IS_ERR_VALUE(uaddr))
uaddr = -EFAULT;
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 5ee56e5acc23..5ddbbde6f65c 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -62,12 +62,6 @@ static int handle_stop(struct kvm_vcpu *vcpu)
trace_kvm_s390_stop_request(vcpu->arch.local_int.action_bits);
- if (vcpu->arch.local_int.action_bits & ACTION_RELOADVCPU_ON_STOP) {
- vcpu->arch.local_int.action_bits &= ~ACTION_RELOADVCPU_ON_STOP;
- rc = SIE_INTERCEPT_RERUNVCPU;
- vcpu->run->exit_reason = KVM_EXIT_INTR;
- }
-
if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) {
atomic_set_mask(CPUSTAT_STOPPED,
&vcpu->arch.sie_block->cpuflags);
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 7f35cb33e510..e7323cd9f109 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -436,6 +436,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL);
VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", sltime);
no_timer:
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
spin_lock(&vcpu->arch.local_int.float_int->lock);
spin_lock_bh(&vcpu->arch.local_int.lock);
add_wait_queue(&vcpu->wq, &wait);
@@ -455,6 +456,8 @@ no_timer:
remove_wait_queue(&vcpu->wq, &wait);
spin_unlock_bh(&vcpu->arch.local_int.lock);
spin_unlock(&vcpu->arch.local_int.float_int->lock);
+ vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+
hrtimer_try_to_cancel(&vcpu->arch.ckc_timer);
return 0;
}
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 776dafe918db..1e4e7b97337a 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -689,9 +689,9 @@ static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
return 0;
}
-static int __vcpu_run(struct kvm_vcpu *vcpu)
+static int vcpu_pre_run(struct kvm_vcpu *vcpu)
{
- int rc;
+ int rc, cpuflags;
memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
@@ -709,28 +709,24 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
return rc;
vcpu->arch.sie_block->icptcode = 0;
- VCPU_EVENT(vcpu, 6, "entering sie flags %x",
- atomic_read(&vcpu->arch.sie_block->cpuflags));
- trace_kvm_s390_sie_enter(vcpu,
- atomic_read(&vcpu->arch.sie_block->cpuflags));
+ cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
+ VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
+ trace_kvm_s390_sie_enter(vcpu, cpuflags);
- /*
- * As PF_VCPU will be used in fault handler, between guest_enter
- * and guest_exit should be no uaccess.
- */
- preempt_disable();
- kvm_guest_enter();
- preempt_enable();
- rc = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs);
- kvm_guest_exit();
+ return 0;
+}
+
+static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
+{
+ int rc;
VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
vcpu->arch.sie_block->icptcode);
trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
- if (rc > 0)
+ if (exit_reason >= 0) {
rc = 0;
- if (rc < 0) {
+ } else {
if (kvm_is_ucontrol(vcpu->kvm)) {
rc = SIE_INTERCEPT_UCONTROL;
} else {
@@ -741,6 +737,49 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
}
memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
+
+ if (rc == 0) {
+ if (kvm_is_ucontrol(vcpu->kvm))
+ rc = -EOPNOTSUPP;
+ else
+ rc = kvm_handle_sie_intercept(vcpu);
+ }
+
+ return rc;
+}
+
+static int __vcpu_run(struct kvm_vcpu *vcpu)
+{
+ int rc, exit_reason;
+
+ /*
+ * We try to hold kvm->srcu during most of vcpu_run (except when run-
+ * ning the guest), so that memslots (and other stuff) are protected
+ */
+ vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+ do {
+ rc = vcpu_pre_run(vcpu);
+ if (rc)
+ break;
+
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+ /*
+ * As PF_VCPU will be used in fault handler, between
+ * guest_enter and guest_exit should be no uaccess.
+ */
+ preempt_disable();
+ kvm_guest_enter();
+ preempt_enable();
+ exit_reason = sie64a(vcpu->arch.sie_block,
+ vcpu->run->s.regs.gprs);
+ kvm_guest_exit();
+ vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+ rc = vcpu_post_run(vcpu, exit_reason);
+ } while (!signal_pending(current) && !rc);
+
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
return rc;
}
@@ -749,7 +788,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
int rc;
sigset_t sigsaved;
-rerun_vcpu:
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
@@ -782,19 +820,7 @@ rerun_vcpu:
}
might_fault();
-
- do {
- rc = __vcpu_run(vcpu);
- if (rc)
- break;
- if (kvm_is_ucontrol(vcpu->kvm))
- rc = -EOPNOTSUPP;
- else
- rc = kvm_handle_sie_intercept(vcpu);
- } while (!signal_pending(current) && !rc);
-
- if (rc == SIE_INTERCEPT_RERUNVCPU)
- goto rerun_vcpu;
+ rc = __vcpu_run(vcpu);
if (signal_pending(current) && !rc) {
kvm_run->exit_reason = KVM_EXIT_INTR;
@@ -951,6 +977,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
{
struct kvm_vcpu *vcpu = filp->private_data;
void __user *argp = (void __user *)arg;
+ int idx;
long r;
switch (ioctl) {
@@ -964,7 +991,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
break;
}
case KVM_S390_STORE_STATUS:
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
r = kvm_s390_vcpu_store_status(vcpu, arg);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
break;
case KVM_S390_SET_INITIAL_PSW: {
psw_t psw;
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index dc99f1ca4267..b44912a32949 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -28,8 +28,7 @@ typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
extern unsigned long *vfacilities;
/* negativ values are error codes, positive values for internal conditions */
-#define SIE_INTERCEPT_RERUNVCPU (1<<0)
-#define SIE_INTERCEPT_UCONTROL (1<<1)
+#define SIE_INTERCEPT_UCONTROL (1<<0)
int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
#define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\
@@ -91,8 +90,10 @@ static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu,
static inline void kvm_s390_get_regs_rre(struct kvm_vcpu *vcpu, int *r1, int *r2)
{
- *r1 = (vcpu->arch.sie_block->ipb & 0x00f00000) >> 20;
- *r2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16;
+ if (r1)
+ *r1 = (vcpu->arch.sie_block->ipb & 0x00f00000) >> 20;
+ if (r2)
+ *r2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16;
}
static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu)
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 59200ee275e5..2440602e6df1 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -30,6 +30,38 @@
#include "kvm-s390.h"
#include "trace.h"
+/* Handle SCK (SET CLOCK) interception */
+static int handle_set_clock(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu *cpup;
+ s64 hostclk, val;
+ u64 op2;
+ int i;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ op2 = kvm_s390_get_base_disp_s(vcpu);
+ if (op2 & 7) /* Operand must be on a doubleword boundary */
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ if (get_guest(vcpu, val, (u64 __user *) op2))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+
+ if (store_tod_clock(&hostclk)) {
+ kvm_s390_set_psw_cc(vcpu, 3);
+ return 0;
+ }
+ val = (val - hostclk) & ~0x3fUL;
+
+ mutex_lock(&vcpu->kvm->lock);
+ kvm_for_each_vcpu(i, cpup, vcpu->kvm)
+ cpup->arch.sie_block->epoch = val;
+ mutex_unlock(&vcpu->kvm->lock);
+
+ kvm_s390_set_psw_cc(vcpu, 0);
+ return 0;
+}
+
static int handle_set_prefix(struct kvm_vcpu *vcpu)
{
u64 operand2;
@@ -128,6 +160,33 @@ static int handle_skey(struct kvm_vcpu *vcpu)
return 0;
}
+static int handle_test_block(struct kvm_vcpu *vcpu)
+{
+ unsigned long hva;
+ gpa_t addr;
+ int reg2;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ kvm_s390_get_regs_rre(vcpu, NULL, &reg2);
+ addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
+ addr = kvm_s390_real_to_abs(vcpu, addr);
+
+ hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(addr));
+ if (kvm_is_error_hva(hva))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ /*
+ * We don't expect errors on modern systems, and do not care
+ * about storage keys (yet), so let's just clear the page.
+ */
+ if (clear_user((void __user *)hva, PAGE_SIZE) != 0)
+ return -EFAULT;
+ kvm_s390_set_psw_cc(vcpu, 0);
+ vcpu->run->s.regs.gprs[0] = 0;
+ return 0;
+}
+
static int handle_tpi(struct kvm_vcpu *vcpu)
{
struct kvm_s390_interrupt_info *inti;
@@ -438,12 +497,14 @@ out_exception:
static const intercept_handler_t b2_handlers[256] = {
[0x02] = handle_stidp,
+ [0x04] = handle_set_clock,
[0x10] = handle_set_prefix,
[0x11] = handle_store_prefix,
[0x12] = handle_store_cpu_address,
[0x29] = handle_skey,
[0x2a] = handle_skey,
[0x2b] = handle_skey,
+ [0x2c] = handle_test_block,
[0x30] = handle_io_inst,
[0x31] = handle_io_inst,
[0x32] = handle_io_inst,
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c76ff74a98f2..5cbf3166257c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -79,6 +79,13 @@
#define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1))
#define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE)
+static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
+{
+ /* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */
+ return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
+ (base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
+}
+
#define SELECTOR_TI_MASK (1 << 2)
#define SELECTOR_RPL_MASK 0x03
@@ -253,7 +260,6 @@ struct kvm_pio_request {
* mode.
*/
struct kvm_mmu {
- void (*new_cr3)(struct kvm_vcpu *vcpu);
void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root);
unsigned long (*get_cr3)(struct kvm_vcpu *vcpu);
u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index);
@@ -261,7 +267,6 @@ struct kvm_mmu {
bool prefault);
void (*inject_page_fault)(struct kvm_vcpu *vcpu,
struct x86_exception *fault);
- void (*free)(struct kvm_vcpu *vcpu);
gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access,
struct x86_exception *exception);
gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access);
@@ -389,6 +394,8 @@ struct kvm_vcpu_arch {
struct fpu guest_fpu;
u64 xcr0;
+ u64 guest_supported_xcr0;
+ u32 guest_xstate_size;
struct kvm_pio_request pio;
void *pio_data;
@@ -780,11 +787,11 @@ void kvm_mmu_module_exit(void);
void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
int kvm_mmu_create(struct kvm_vcpu *vcpu);
-int kvm_mmu_setup(struct kvm_vcpu *vcpu);
+void kvm_mmu_setup(struct kvm_vcpu *vcpu);
void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
u64 dirty_mask, u64 nx_mask, u64 x_mask);
-int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
+void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
struct kvm_memory_slot *slot,
@@ -922,6 +929,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code,
void *insn, int insn_len);
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
+void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu);
void kvm_enable_tdp(void);
void kvm_disable_tdp(void);
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index bb0465090ae5..b93e09a0fa21 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -536,6 +536,7 @@
/* MSR_IA32_VMX_MISC bits */
#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
+#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F
/* AMD-V MSRs */
#define MSR_VM_CR 0xc0010114
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index b110fe6c03d4..0a1e3b8b964d 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -23,6 +23,26 @@
#include "mmu.h"
#include "trace.h"
+static u32 xstate_required_size(u64 xstate_bv)
+{
+ int feature_bit = 0;
+ u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
+
+ xstate_bv &= ~XSTATE_FPSSE;
+ while (xstate_bv) {
+ if (xstate_bv & 0x1) {
+ u32 eax, ebx, ecx, edx;
+ cpuid_count(0xD, feature_bit, &eax, &ebx, &ecx, &edx);
+ ret = max(ret, eax + ebx);
+ }
+
+ xstate_bv >>= 1;
+ feature_bit++;
+ }
+
+ return ret;
+}
+
void kvm_update_cpuid(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
@@ -46,6 +66,18 @@ void kvm_update_cpuid(struct kvm_vcpu *vcpu)
apic->lapic_timer.timer_mode_mask = 1 << 17;
}
+ best = kvm_find_cpuid_entry(vcpu, 0xD, 0);
+ if (!best) {
+ vcpu->arch.guest_supported_xcr0 = 0;
+ vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
+ } else {
+ vcpu->arch.guest_supported_xcr0 =
+ (best->eax | ((u64)best->edx << 32)) &
+ host_xcr0 & KVM_SUPPORTED_XCR0;
+ vcpu->arch.guest_xstate_size =
+ xstate_required_size(vcpu->arch.guest_supported_xcr0);
+ }
+
kvm_pmu_cpuid_update(vcpu);
}
@@ -182,7 +214,7 @@ static bool supported_xcr0_bit(unsigned bit)
{
u64 mask = ((u64)1 << bit);
- return mask & (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) & host_xcr0;
+ return mask & KVM_SUPPORTED_XCR0 & host_xcr0;
}
#define F(x) bit(X86_FEATURE_##x)
@@ -383,6 +415,8 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
case 0xd: {
int idx, i;
+ entry->eax &= host_xcr0 & KVM_SUPPORTED_XCR0;
+ entry->edx &= (host_xcr0 & KVM_SUPPORTED_XCR0) >> 32;
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
for (idx = 1, i = 1; idx < 64; ++idx) {
if (*nent >= maxnent)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index dce0df8150df..40772ef0f2b1 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2570,11 +2570,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
kvm_release_pfn_clean(pfn);
}
-static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
-{
- mmu_free_roots(vcpu);
-}
-
static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
bool no_dirty_log)
{
@@ -3424,18 +3419,11 @@ out_unlock:
return 0;
}
-static void nonpaging_free(struct kvm_vcpu *vcpu)
-{
- mmu_free_roots(vcpu);
-}
-
-static int nonpaging_init_context(struct kvm_vcpu *vcpu,
- struct kvm_mmu *context)
+static void nonpaging_init_context(struct kvm_vcpu *vcpu,
+ struct kvm_mmu *context)
{
- context->new_cr3 = nonpaging_new_cr3;
context->page_fault = nonpaging_page_fault;
context->gva_to_gpa = nonpaging_gva_to_gpa;
- context->free = nonpaging_free;
context->sync_page = nonpaging_sync_page;
context->invlpg = nonpaging_invlpg;
context->update_pte = nonpaging_update_pte;
@@ -3444,7 +3432,6 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu,
context->root_hpa = INVALID_PAGE;
context->direct_map = true;
context->nx = false;
- return 0;
}
void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
@@ -3454,9 +3441,8 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_mmu_flush_tlb);
-static void paging_new_cr3(struct kvm_vcpu *vcpu)
+void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu)
{
- pgprintk("%s: cr3 %lx\n", __func__, kvm_read_cr3(vcpu));
mmu_free_roots(vcpu);
}
@@ -3471,11 +3457,6 @@ static void inject_page_fault(struct kvm_vcpu *vcpu,
vcpu->arch.mmu.inject_page_fault(vcpu, fault);
}
-static void paging_free(struct kvm_vcpu *vcpu)
-{
- nonpaging_free(vcpu);
-}
-
static bool sync_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn,
unsigned access, int *nr_present)
{
@@ -3665,9 +3646,9 @@ static void update_last_pte_bitmap(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
mmu->last_pte_bitmap = map;
}
-static int paging64_init_context_common(struct kvm_vcpu *vcpu,
- struct kvm_mmu *context,
- int level)
+static void paging64_init_context_common(struct kvm_vcpu *vcpu,
+ struct kvm_mmu *context,
+ int level)
{
context->nx = is_nx(vcpu);
context->root_level = level;
@@ -3677,27 +3658,24 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu,
update_last_pte_bitmap(vcpu, context);
ASSERT(is_pae(vcpu));
- context->new_cr3 = paging_new_cr3;
context->page_fault = paging64_page_fault;
context->gva_to_gpa = paging64_gva_to_gpa;
context->sync_page = paging64_sync_page;
context->invlpg = paging64_invlpg;
context->update_pte = paging64_update_pte;
- context->free = paging_free;
context->shadow_root_level = level;
context->root_hpa = INVALID_PAGE;
context->direct_map = false;
- return 0;
}
-static int paging64_init_context(struct kvm_vcpu *vcpu,
- struct kvm_mmu *context)
+static void paging64_init_context(struct kvm_vcpu *vcpu,
+ struct kvm_mmu *context)
{
- return paging64_init_context_common(vcpu, context, PT64_ROOT_LEVEL);
+ paging64_init_context_common(vcpu, context, PT64_ROOT_LEVEL);
}
-static int paging32_init_context(struct kvm_vcpu *vcpu,
- struct kvm_mmu *context)
+static void paging32_init_context(struct kvm_vcpu *vcpu,
+ struct kvm_mmu *context)
{
context->nx = false;
context->root_level = PT32_ROOT_LEVEL;
@@ -3706,33 +3684,28 @@ static int paging32_init_context(struct kvm_vcpu *vcpu,
update_permission_bitmask(vcpu, context, false);
update_last_pte_bitmap(vcpu, context);
- context->new_cr3 = paging_new_cr3;
context->page_fault = paging32_page_fault;
context->gva_to_gpa = paging32_gva_to_gpa;
- context->free = paging_free;
context->sync_page = paging32_sync_page;
context->invlpg = paging32_invlpg;
context->update_pte = paging32_update_pte;
context->shadow_root_level = PT32E_ROOT_LEVEL;
context->root_hpa = INVALID_PAGE;
context->direct_map = false;
- return 0;
}
-static int paging32E_init_context(struct kvm_vcpu *vcpu,
- struct kvm_mmu *context)
+static void paging32E_init_context(struct kvm_vcpu *vcpu,
+ struct kvm_mmu *context)
{
- return paging64_init_context_common(vcpu, context, PT32E_ROOT_LEVEL);
+ paging64_init_context_common(vcpu, context, PT32E_ROOT_LEVEL);
}
-static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
+static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
{
struct kvm_mmu *context = vcpu->arch.walk_mmu;
context->base_role.word = 0;
- context->new_cr3 = nonpaging_new_cr3;
context->page_fault = tdp_page_fault;
- context->free = nonpaging_free;
context->sync_page = nonpaging_sync_page;
context->invlpg = nonpaging_invlpg;
context->update_pte = nonpaging_update_pte;
@@ -3767,37 +3740,32 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
update_permission_bitmask(vcpu, context, false);
update_last_pte_bitmap(vcpu, context);
-
- return 0;
}
-int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
+void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
{
- int r;
bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
ASSERT(vcpu);
ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
if (!is_paging(vcpu))
- r = nonpaging_init_context(vcpu, context);
+ nonpaging_init_context(vcpu, context);
else if (is_long_mode(vcpu))
- r = paging64_init_context(vcpu, context);
+ paging64_init_context(vcpu, context);
else if (is_pae(vcpu))
- r = paging32E_init_context(vcpu, context);
+ paging32E_init_context(vcpu, context);
else
- r = paging32_init_context(vcpu, context);
+ paging32_init_context(vcpu, context);
vcpu->arch.mmu.base_role.nxe = is_nx(vcpu);
vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu);
vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu);
vcpu->arch.mmu.base_role.smep_andnot_wp
= smep && !is_write_protection(vcpu);
-
- return r;
}
EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
-int kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
+void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
bool execonly)
{
ASSERT(vcpu);
@@ -3806,37 +3774,30 @@ int kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
context->shadow_root_level = kvm_x86_ops->get_tdp_level();
context->nx = true;
- context->new_cr3 = paging_new_cr3;
context->page_fault = ept_page_fault;
context->gva_to_gpa = ept_gva_to_gpa;
context->sync_page = ept_sync_page;
context->invlpg = ept_invlpg;
context->update_pte = ept_update_pte;
- context->free = paging_free;
context->root_level = context->shadow_root_level;
context->root_hpa = INVALID_PAGE;
context->direct_map = false;
update_permission_bitmask(vcpu, context, true);
reset_rsvds_bits_mask_ept(vcpu, context, execonly);
-
- return 0;
}
EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu);
-static int init_kvm_softmmu(struct kvm_vcpu *vcpu)
+static void init_kvm_softmmu(struct kvm_vcpu *vcpu)
{
- int r = kvm_init_shadow_mmu(vcpu, vcpu->arch.walk_mmu);
-
+ kvm_init_shadow_mmu(vcpu, vcpu->arch.walk_mmu);
vcpu->arch.walk_mmu->set_cr3 = kvm_x86_ops->set_cr3;
vcpu->arch.walk_mmu->get_cr3 = get_cr3;
vcpu->arch.walk_mmu->get_pdptr = kvm_pdptr_read;
vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
-
- return r;
}
-static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
+static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
{
struct kvm_mmu *g_context = &vcpu->arch.nested_mmu;
@@ -3873,11 +3834,9 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
update_permission_bitmask(vcpu, g_context, false);
update_last_pte_bitmap(vcpu, g_context);
-
- return 0;
}
-static int init_kvm_mmu(struct kvm_vcpu *vcpu)
+static void init_kvm_mmu(struct kvm_vcpu *vcpu)
{
if (mmu_is_nested(vcpu))
return init_kvm_nested_mmu(vcpu);
@@ -3887,18 +3846,12 @@ static int init_kvm_mmu(struct kvm_vcpu *vcpu)
return init_kvm_softmmu(vcpu);
}
-static void destroy_kvm_mmu(struct kvm_vcpu *vcpu)
+void kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
{
ASSERT(vcpu);
- if (VALID_PAGE(vcpu->arch.mmu.root_hpa))
- /* mmu.free() should set root_hpa = INVALID_PAGE */
- vcpu->arch.mmu.free(vcpu);
-}
-int kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
-{
- destroy_kvm_mmu(vcpu);
- return init_kvm_mmu(vcpu);
+ kvm_mmu_unload(vcpu);
+ init_kvm_mmu(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_mmu_reset_context);
@@ -3923,6 +3876,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_load);
void kvm_mmu_unload(struct kvm_vcpu *vcpu)
{
mmu_free_roots(vcpu);
+ WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa));
}
EXPORT_SYMBOL_GPL(kvm_mmu_unload);
@@ -4281,12 +4235,12 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
return alloc_mmu_pages(vcpu);
}
-int kvm_mmu_setup(struct kvm_vcpu *vcpu)
+void kvm_mmu_setup(struct kvm_vcpu *vcpu)
{
ASSERT(vcpu);
ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
- return init_kvm_mmu(vcpu);
+ init_kvm_mmu(vcpu);
}
void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
@@ -4428,7 +4382,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
int nr_to_scan = sc->nr_to_scan;
unsigned long freed = 0;
- raw_spin_lock(&kvm_lock);
+ spin_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
int idx;
@@ -4478,9 +4432,8 @@ unlock:
break;
}
- raw_spin_unlock(&kvm_lock);
+ spin_unlock(&kvm_lock);
return freed;
-
}
static unsigned long
@@ -4574,7 +4527,7 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
{
ASSERT(vcpu);
- destroy_kvm_mmu(vcpu);
+ kvm_mmu_unload(vcpu);
free_mmu_pages(vcpu);
mmu_free_memory_caches(vcpu);
}
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 77e044a0f5f7..292615274358 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -70,8 +70,8 @@ enum {
};
int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct);
-int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
-int kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
+void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
+void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
bool execonly);
static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index c0bc80391e40..c7168a5cff1b 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1959,11 +1959,9 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
nested_svm_vmexit(svm);
}
-static int nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
+static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
{
- int r;
-
- r = kvm_init_shadow_mmu(vcpu, &vcpu->arch.mmu);
+ kvm_init_shadow_mmu(vcpu, &vcpu->arch.mmu);
vcpu->arch.mmu.set_cr3 = nested_svm_set_tdp_cr3;
vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3;
@@ -1971,8 +1969,6 @@ static int nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit;
vcpu->arch.mmu.shadow_root_level = get_npt_level();
vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
-
- return r;
}
static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 2b2fce1b2009..06fd7629068a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1898,16 +1898,12 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
/*
* KVM wants to inject page-faults which it got to the guest. This function
* checks whether in a nested guest, we need to inject them to L1 or L2.
- * This function assumes it is called with the exit reason in vmcs02 being
- * a #PF exception (this is the only case in which KVM injects a #PF when L2
- * is running).
*/
-static int nested_pf_handled(struct kvm_vcpu *vcpu)
+static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned nr)
{
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
- /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */
- if (!(vmcs12->exception_bitmap & (1u << PF_VECTOR)))
+ if (!(vmcs12->exception_bitmap & (1u << nr)))
return 0;
nested_vmx_vmexit(vcpu);
@@ -1921,8 +1917,8 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 intr_info = nr | INTR_INFO_VALID_MASK;
- if (nr == PF_VECTOR && is_guest_mode(vcpu) &&
- !vmx->nested.nested_run_pending && nested_pf_handled(vcpu))
+ if (!reinject && is_guest_mode(vcpu) &&
+ nested_vmx_check_exception(vcpu, nr))
return;
if (has_error_code) {
@@ -2204,9 +2200,15 @@ static __init void nested_vmx_setup_ctls_msrs(void)
#ifdef CONFIG_X86_64
VM_EXIT_HOST_ADDR_SPACE_SIZE |
#endif
- VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT;
+ VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT |
+ VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
+ if (!(nested_vmx_pinbased_ctls_high & PIN_BASED_VMX_PREEMPTION_TIMER) ||
+ !(nested_vmx_exit_ctls_high & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) {
+ nested_vmx_exit_ctls_high &= ~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
+ nested_vmx_pinbased_ctls_high &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
+ }
nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
- VM_EXIT_LOAD_IA32_EFER);
+ VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER);
/* entry controls */
rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
@@ -2252,6 +2254,7 @@ static __init void nested_vmx_setup_ctls_msrs(void)
nested_vmx_secondary_ctls_low = 0;
nested_vmx_secondary_ctls_high &=
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+ SECONDARY_EXEC_UNRESTRICTED_GUEST |
SECONDARY_EXEC_WBINVD_EXITING;
if (enable_ept) {
@@ -3380,8 +3383,10 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
if (enable_ept) {
eptp = construct_eptp(cr3);
vmcs_write64(EPT_POINTER, eptp);
- guest_cr3 = is_paging(vcpu) ? kvm_read_cr3(vcpu) :
- vcpu->kvm->arch.ept_identity_map_addr;
+ if (is_paging(vcpu) || is_guest_mode(vcpu))
+ guest_cr3 = kvm_read_cr3(vcpu);
+ else
+ guest_cr3 = vcpu->kvm->arch.ept_identity_map_addr;
ept_load_pdptrs(vcpu);
}
@@ -4879,6 +4884,17 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
hypercall[2] = 0xc1;
}
+static bool nested_cr0_valid(struct vmcs12 *vmcs12, unsigned long val)
+{
+ unsigned long always_on = VMXON_CR0_ALWAYSON;
+
+ if (nested_vmx_secondary_ctls_high &
+ SECONDARY_EXEC_UNRESTRICTED_GUEST &&
+ nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST))
+ always_on &= ~(X86_CR0_PE | X86_CR0_PG);
+ return (val & always_on) == always_on;
+}
+
/* called to set cr0 as appropriate for a mov-to-cr0 exit. */
static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
{
@@ -4897,9 +4913,7 @@ static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
val = (val & ~vmcs12->cr0_guest_host_mask) |
(vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask);
- /* TODO: will have to take unrestricted guest mode into
- * account */
- if ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON)
+ if (!nested_cr0_valid(vmcs12, val))
return 1;
if (kvm_set_cr0(vcpu, val))
@@ -6722,6 +6736,27 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
*info2 = vmcs_read32(VM_EXIT_INTR_INFO);
}
+static void nested_adjust_preemption_timer(struct kvm_vcpu *vcpu)
+{
+ u64 delta_tsc_l1;
+ u32 preempt_val_l1, preempt_val_l2, preempt_scale;
+
+ if (!(get_vmcs12(vcpu)->pin_based_vm_exec_control &
+ PIN_BASED_VMX_PREEMPTION_TIMER))
+ return;
+ preempt_scale = native_read_msr(MSR_IA32_VMX_MISC) &
+ MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE;
+ preempt_val_l2 = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
+ delta_tsc_l1 = vmx_read_l1_tsc(vcpu, native_read_tsc())
+ - vcpu->arch.last_guest_tsc;
+ preempt_val_l1 = delta_tsc_l1 >> preempt_scale;
+ if (preempt_val_l2 <= preempt_val_l1)
+ preempt_val_l2 = 0;
+ else
+ preempt_val_l2 -= preempt_val_l1;
+ vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val_l2);
+}
+
/*
* The guest has exited. See if we can fix it or if we need userspace
* assistance.
@@ -6736,20 +6771,6 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
if (vmx->emulation_required)
return handle_invalid_guest_state(vcpu);
- /*
- * the KVM_REQ_EVENT optimization bit is only on for one entry, and if
- * we did not inject a still-pending event to L1 now because of
- * nested_run_pending, we need to re-enable this bit.
- */
- if (vmx->nested.nested_run_pending)
- kvm_make_request(KVM_REQ_EVENT, vcpu);
-
- if (!is_guest_mode(vcpu) && (exit_reason == EXIT_REASON_VMLAUNCH ||
- exit_reason == EXIT_REASON_VMRESUME))
- vmx->nested.nested_run_pending = 1;
- else
- vmx->nested.nested_run_pending = 0;
-
if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) {
nested_vmx_vmexit(vcpu);
return 1;
@@ -7061,9 +7082,9 @@ static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
case INTR_TYPE_HARD_EXCEPTION:
if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
u32 err = vmcs_read32(error_code_field);
- kvm_queue_exception_e(vcpu, vector, err);
+ kvm_requeue_exception_e(vcpu, vector, err);
} else
- kvm_queue_exception(vcpu, vector);
+ kvm_requeue_exception(vcpu, vector);
break;
case INTR_TYPE_SOFT_INTR:
vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
@@ -7146,6 +7167,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
atomic_switch_perf_msrs(vmx);
debugctlmsr = get_debugctlmsr();
+ if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending)
+ nested_adjust_preemption_timer(vcpu);
vmx->__launched = vmx->loaded_vmcs->launched;
asm(
/* Store host registers */
@@ -7284,6 +7307,16 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
trace_kvm_exit(vmx->exit_reason, vcpu, KVM_ISA_VMX);
+ /*
+ * the KVM_REQ_EVENT optimization bit is only on for one entry, and if
+ * we did not inject a still-pending event to L1 now because of
+ * nested_run_pending, we need to re-enable this bit.
+ */
+ if (vmx->nested.nested_run_pending)
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+
+ vmx->nested.nested_run_pending = 0;
+
vmx_complete_atomic_exit(vmx);
vmx_recover_nmi_blocking(vmx);
vmx_complete_interrupts(vmx);
@@ -7501,9 +7534,9 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu)
return get_vmcs12(vcpu)->ept_pointer;
}
-static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
+static void nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
{
- int r = kvm_init_shadow_ept_mmu(vcpu, &vcpu->arch.mmu,
+ kvm_init_shadow_ept_mmu(vcpu, &vcpu->arch.mmu,
nested_vmx_ept_caps & VMX_EPT_EXECUTE_ONLY_BIT);
vcpu->arch.mmu.set_cr3 = vmx_set_cr3;
@@ -7511,8 +7544,6 @@ static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault;
vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
-
- return r;
}
static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
@@ -7520,6 +7551,20 @@ static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
vcpu->arch.walk_mmu = &vcpu->arch.mmu;
}
+static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
+ struct x86_exception *fault)
+{
+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+
+ WARN_ON(!is_guest_mode(vcpu));
+
+ /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */
+ if (vmcs12->exception_bitmap & (1u << PF_VECTOR))
+ nested_vmx_vmexit(vcpu);
+ else
+ kvm_inject_page_fault(vcpu, fault);
+}
+
/*
* prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
* L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
@@ -7533,6 +7578,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 exec_control;
+ u32 exit_control;
vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
@@ -7706,7 +7752,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
* we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
* bits are further modified by vmx_set_efer() below.
*/
- vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
+ exit_control = vmcs_config.vmexit_ctrl;
+ if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER)
+ exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
+ vmcs_write32(VM_EXIT_CONTROLS, exit_control);
/* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
* emulated by vmx_set_efer(), below.
@@ -7773,6 +7822,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
kvm_set_cr3(vcpu, vmcs12->guest_cr3);
kvm_mmu_reset_context(vcpu);
+ if (!enable_ept)
+ vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
+
/*
* L1 may access the L2's PDPTR, so save them to construct vmcs12
*/
@@ -7876,7 +7928,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
return 1;
}
- if (((vmcs12->guest_cr0 & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON) ||
+ if (!nested_cr0_valid(vmcs12, vmcs12->guest_cr0) ||
((vmcs12->guest_cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)) {
nested_vmx_entry_failure(vcpu, vmcs12,
EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT);
@@ -7938,6 +7990,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
enter_guest_mode(vcpu);
+ vmx->nested.nested_run_pending = 1;
+
vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET);
cpu = get_cpu();
@@ -8005,7 +8059,7 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
u32 idt_vectoring;
unsigned int nr;
- if (vcpu->arch.exception.pending) {
+ if (vcpu->arch.exception.pending && vcpu->arch.exception.reinject) {
nr = vcpu->arch.exception.nr;
idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
@@ -8105,6 +8159,11 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmcs12->guest_pending_dbg_exceptions =
vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
+ if ((vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) &&
+ (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER))
+ vmcs12->vmx_preemption_timer_value =
+ vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
+
/*
* In some cases (usually, nested EPT), L2 is allowed to change its
* own CR3 without exiting. If it has changed it, we must keep it.
@@ -8130,6 +8189,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT)
vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT);
+ if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER)
+ vmcs12->guest_ia32_efer = vcpu->arch.efer;
vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
@@ -8201,7 +8262,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
* fpu_active (which may have changed).
* Note that vmx_set_cr0 refers to efer set above.
*/
- kvm_set_cr0(vcpu, vmcs12->host_cr0);
+ vmx_set_cr0(vcpu, vmcs12->host_cr0);
/*
* If we did fpu_activate()/fpu_deactivate() during L2's run, we need
* to apply the same changes to L1's vmcs. We just set cr0 correctly,
@@ -8224,6 +8285,9 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
kvm_set_cr3(vcpu, vmcs12->host_cr3);
kvm_mmu_reset_context(vcpu);
+ if (!enable_ept)
+ vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
+
if (enable_vpid) {
/*
* Trivially support vpid by letting L2s share their parent
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e5ca72a5cdb6..edf2a07df3a3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -586,7 +586,7 @@ int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
return 1;
if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE))
return 1;
- if (xcr0 & ~host_xcr0)
+ if (xcr0 & ~vcpu->arch.guest_supported_xcr0)
return 1;
kvm_put_guest_xcr0(vcpu);
vcpu->arch.xcr0 = xcr0;
@@ -684,7 +684,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
vcpu->arch.cr3 = cr3;
__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
- vcpu->arch.mmu.new_cr3(vcpu);
+ kvm_mmu_new_cr3(vcpu);
return 0;
}
EXPORT_SYMBOL_GPL(kvm_set_cr3);
@@ -2984,11 +2984,13 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
struct kvm_xsave *guest_xsave)
{
- if (cpu_has_xsave)
+ if (cpu_has_xsave) {
memcpy(guest_xsave->region,
&vcpu->arch.guest_fpu.state->xsave,
- xstate_size);
- else {
+ vcpu->arch.guest_xstate_size);
+ *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] &=
+ vcpu->arch.guest_supported_xcr0 | XSTATE_FPSSE;
+ } else {
memcpy(guest_xsave->region,
&vcpu->arch.guest_fpu.state->fxsave,
sizeof(struct i387_fxsave_struct));
@@ -3003,10 +3005,19 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
u64 xstate_bv =
*(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
- if (cpu_has_xsave)
+ if (cpu_has_xsave) {
+ /*
+ * Here we allow setting states that are not present in
+ * CPUID leaf 0xD, index 0, EDX:EAX. This is for compatibility
+ * with old userspace.
+ */
+ if (xstate_bv & ~KVM_SUPPORTED_XCR0)
+ return -EINVAL;
+ if (xstate_bv & ~host_xcr0)
+ return -EINVAL;
memcpy(&vcpu->arch.guest_fpu.state->xsave,
- guest_xsave->region, xstate_size);
- else {
+ guest_xsave->region, vcpu->arch.guest_xstate_size);
+ } else {
if (xstate_bv & ~XSTATE_FPSSE)
return -EINVAL;
memcpy(&vcpu->arch.guest_fpu.state->fxsave,
@@ -5263,7 +5274,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
- raw_spin_lock(&kvm_lock);
+ spin_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
kvm_for_each_vcpu(i, vcpu, kvm) {
if (vcpu->cpu != freq->cpu)
@@ -5273,7 +5284,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
send_ipi = 1;
}
}
- raw_spin_unlock(&kvm_lock);
+ spin_unlock(&kvm_lock);
if (freq->old < freq->new && send_ipi) {
/*
@@ -5426,12 +5437,12 @@ static void pvclock_gtod_update_fn(struct work_struct *work)
struct kvm_vcpu *vcpu;
int i;
- raw_spin_lock(&kvm_lock);
+ spin_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list)
kvm_for_each_vcpu(i, vcpu, kvm)
set_bit(KVM_REQ_MASTERCLOCK_UPDATE, &vcpu->requests);
atomic_set(&kvm_guest_has_master_clock, 0);
- raw_spin_unlock(&kvm_lock);
+ spin_unlock(&kvm_lock);
}
static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
@@ -6688,7 +6699,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
if (r)
return r;
kvm_vcpu_reset(vcpu);
- r = kvm_mmu_setup(vcpu);
+ kvm_mmu_setup(vcpu);
vcpu_put(vcpu);
return r;
@@ -6940,6 +6951,10 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
vcpu->arch.ia32_tsc_adjust_msr = 0x0;
vcpu->arch.pv_time_enabled = false;
+
+ vcpu->arch.guest_supported_xcr0 = 0;
+ vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
+
kvm_async_pf_hash_reset(vcpu);
kvm_pmu_init(vcpu);
@@ -7283,7 +7298,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
int r;
if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) ||
- is_error_page(work->page))
+ work->wakeup_all)
return;
r = kvm_mmu_reload(vcpu);
@@ -7393,7 +7408,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
struct x86_exception fault;
trace_kvm_async_pf_ready(work->arch.token, work->gva);
- if (is_error_page(work->page))
+ if (work->wakeup_all)
work->arch.token = ~0; /* broadcast wakeup */
else
kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index e224f7a671b6..587fb9ede436 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -122,6 +122,7 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
gva_t addr, void *val, unsigned int bytes,
struct x86_exception *exception);
+#define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM)
extern u64 host_xcr0;
extern struct static_key kvm_no_apic_vcpu;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 0fbbc7aa02cb..c9d4236ab442 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -142,7 +142,7 @@ struct kvm;
struct kvm_vcpu;
extern struct kmem_cache *kvm_vcpu_cache;
-extern raw_spinlock_t kvm_lock;
+extern spinlock_t kvm_lock;
extern struct list_head vm_list;
struct kvm_io_range {
@@ -189,8 +189,7 @@ struct kvm_async_pf {
gva_t gva;
unsigned long addr;
struct kvm_arch_async_pf arch;
- struct page *page;
- bool done;
+ bool wakeup_all;
};
void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu);
@@ -842,13 +841,6 @@ static inline int memslot_id(struct kvm *kvm, gfn_t gfn)
return gfn_to_memslot(kvm, gfn)->id;
}
-static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
-{
- /* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */
- return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
- (base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
-}
-
static inline gfn_t
hva_to_gfn_memslot(unsigned long hva, struct kvm_memory_slot *slot)
{
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index 7005d1109ec9..131a0bda7aec 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -296,23 +296,21 @@ DEFINE_EVENT(kvm_async_pf_nopresent_ready, kvm_async_pf_ready,
TRACE_EVENT(
kvm_async_pf_completed,
- TP_PROTO(unsigned long address, struct page *page, u64 gva),
- TP_ARGS(address, page, gva),
+ TP_PROTO(unsigned long address, u64 gva),
+ TP_ARGS(address, gva),
TP_STRUCT__entry(
__field(unsigned long, address)
- __field(pfn_t, pfn)
__field(u64, gva)
),
TP_fast_assign(
__entry->address = address;
- __entry->pfn = page ? page_to_pfn(page) : 0;
__entry->gva = gva;
),
- TP_printk("gva %#llx address %#lx pfn %#llx", __entry->gva,
- __entry->address, __entry->pfn)
+ TP_printk("gva %#llx address %#lx", __entry->gva,
+ __entry->address)
);
#endif
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 99c25338ede8..e32e776f20c0 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1012,6 +1012,7 @@ struct kvm_s390_ucas_mapping {
/* VM is being stopped by host */
#define KVM_KVMCLOCK_CTRL _IO(KVMIO, 0xad)
#define KVM_ARM_VCPU_INIT _IOW(KVMIO, 0xae, struct kvm_vcpu_init)
+#define KVM_ARM_PREFERRED_TARGET _IOR(KVMIO, 0xaf, struct kvm_vcpu_init)
#define KVM_GET_REG_LIST _IOWR(KVMIO, 0xb0, struct kvm_reg_list)
#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 8a39dda7a325..8631d9c14320 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -56,7 +56,6 @@ void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu)
static void async_pf_execute(struct work_struct *work)
{
- struct page *page = NULL;
struct kvm_async_pf *apf =
container_of(work, struct kvm_async_pf, work);
struct mm_struct *mm = apf->mm;
@@ -68,14 +67,12 @@ static void async_pf_execute(struct work_struct *work)
use_mm(mm);
down_read(&mm->mmap_sem);
- get_user_pages(current, mm, addr, 1, 1, 0, &page, NULL);
+ get_user_pages(current, mm, addr, 1, 1, 0, NULL, NULL);
up_read(&mm->mmap_sem);
unuse_mm(mm);
spin_lock(&vcpu->async_pf.lock);
list_add_tail(&apf->link, &vcpu->async_pf.done);
- apf->page = page;
- apf->done = true;
spin_unlock(&vcpu->async_pf.lock);
/*
@@ -83,7 +80,7 @@ static void async_pf_execute(struct work_struct *work)
* this point
*/
- trace_kvm_async_pf_completed(addr, page, gva);
+ trace_kvm_async_pf_completed(addr, gva);
if (waitqueue_active(&vcpu->wq))
wake_up_interruptible(&vcpu->wq);
@@ -99,9 +96,8 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
struct kvm_async_pf *work =
list_entry(vcpu->async_pf.queue.next,
typeof(*work), queue);
- cancel_work_sync(&work->work);
list_del(&work->queue);
- if (!work->done) { /* work was canceled */
+ if (cancel_work_sync(&work->work)) {
mmdrop(work->mm);
kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */
kmem_cache_free(async_pf_cache, work);
@@ -114,8 +110,6 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
list_entry(vcpu->async_pf.done.next,
typeof(*work), link);
list_del(&work->link);
- if (!is_error_page(work->page))
- kvm_release_page_clean(work->page);
kmem_cache_free(async_pf_cache, work);
}
spin_unlock(&vcpu->async_pf.lock);
@@ -135,14 +129,11 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
list_del(&work->link);
spin_unlock(&vcpu->async_pf.lock);
- if (work->page)
- kvm_arch_async_page_ready(vcpu, work);
+ kvm_arch_async_page_ready(vcpu, work);
kvm_arch_async_page_present(vcpu, work);
list_del(&work->queue);
vcpu->async_pf.queued--;
- if (!is_error_page(work->page))
- kvm_release_page_clean(work->page);
kmem_cache_free(async_pf_cache, work);
}
}
@@ -165,8 +156,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
if (!work)
return 0;
- work->page = NULL;
- work->done = false;
+ work->wakeup_all = false;
work->vcpu = vcpu;
work->gva = gva;
work->addr = gfn_to_hva(vcpu->kvm, gfn);
@@ -206,7 +196,7 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu)
if (!work)
return -ENOMEM;
- work->page = KVM_ERR_PTR_BAD_PAGE;
+ work->wakeup_all = true;
INIT_LIST_HEAD(&work->queue); /* for list_del to work */
spin_lock(&vcpu->async_pf.lock);
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index 72a130bc448a..a3b14109049b 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -190,11 +190,7 @@ int kvm_assign_device(struct kvm *kvm,
pdev->dev_flags |= PCI_DEV_FLAGS_ASSIGNED;
- printk(KERN_DEBUG "assign device %x:%x:%x.%x\n",
- assigned_dev->host_segnr,
- assigned_dev->host_busnr,
- PCI_SLOT(assigned_dev->host_devfn),
- PCI_FUNC(assigned_dev->host_devfn));
+ dev_info(&pdev->dev, "kvm assign device\n");
return 0;
out_unmap:
@@ -220,11 +216,7 @@ int kvm_deassign_device(struct kvm *kvm,
pdev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED;
- printk(KERN_DEBUG "deassign device %x:%x:%x.%x\n",
- assigned_dev->host_segnr,
- assigned_dev->host_busnr,
- PCI_SLOT(assigned_dev->host_devfn),
- PCI_FUNC(assigned_dev->host_devfn));
+ dev_info(&pdev->dev, "kvm deassign device\n");
return 0;
}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a9dd682cf5e3..0d20c320a33d 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -70,7 +70,8 @@ MODULE_LICENSE("GPL");
* kvm->lock --> kvm->slots_lock --> kvm->irq_lock
*/
-DEFINE_RAW_SPINLOCK(kvm_lock);
+DEFINE_SPINLOCK(kvm_lock);
+static DEFINE_RAW_SPINLOCK(kvm_count_lock);
LIST_HEAD(vm_list);
static cpumask_var_t cpus_hardware_enabled;
@@ -490,9 +491,9 @@ static struct kvm *kvm_create_vm(unsigned long type)
if (r)
goto out_err;
- raw_spin_lock(&kvm_lock);
+ spin_lock(&kvm_lock);
list_add(&kvm->vm_list, &vm_list);
- raw_spin_unlock(&kvm_lock);
+ spin_unlock(&kvm_lock);
return kvm;
@@ -581,9 +582,9 @@ static void kvm_destroy_vm(struct kvm *kvm)
struct mm_struct *mm = kvm->mm;
kvm_arch_sync_events(kvm);
- raw_spin_lock(&kvm_lock);
+ spin_lock(&kvm_lock);
list_del(&kvm->vm_list);
- raw_spin_unlock(&kvm_lock);
+ spin_unlock(&kvm_lock);
kvm_free_irq_routing(kvm);
for (i = 0; i < KVM_NR_BUSES; i++)
kvm_io_bus_destroy(kvm->buses[i]);
@@ -2683,11 +2684,12 @@ static void hardware_enable_nolock(void *junk)
}
}
-static void hardware_enable(void *junk)
+static void hardware_enable(void)
{
- raw_spin_lock(&kvm_lock);
- hardware_enable_nolock(junk);
- raw_spin_unlock(&kvm_lock);
+ raw_spin_lock(&kvm_count_lock);
+ if (kvm_usage_count)
+ hardware_enable_nolock(NULL);
+ raw_spin_unlock(&kvm_count_lock);
}
static void hardware_disable_nolock(void *junk)
@@ -2700,11 +2702,12 @@ static void hardware_disable_nolock(void *junk)
kvm_arch_hardware_disable(NULL);
}
-static void hardware_disable(void *junk)
+static void hardware_disable(void)
{
- raw_spin_lock(&kvm_lock);
- hardware_disable_nolock(junk);
- raw_spin_unlock(&kvm_lock);
+ raw_spin_lock(&kvm_count_lock);
+ if (kvm_usage_count)
+ hardware_disable_nolock(NULL);
+ raw_spin_unlock(&kvm_count_lock);
}
static void hardware_disable_all_nolock(void)
@@ -2718,16 +2721,16 @@ static void hardware_disable_all_nolock(void)
static void hardware_disable_all(void)
{
- raw_spin_lock(&kvm_lock);
+ raw_spin_lock(&kvm_count_lock);
hardware_disable_all_nolock();
- raw_spin_unlock(&kvm_lock);
+ raw_spin_unlock(&kvm_count_lock);
}
static int hardware_enable_all(void)
{
int r = 0;
- raw_spin_lock(&kvm_lock);
+ raw_spin_lock(&kvm_count_lock);
kvm_usage_count++;
if (kvm_usage_count == 1) {
@@ -2740,7 +2743,7 @@ static int hardware_enable_all(void)
}
}
- raw_spin_unlock(&kvm_lock);
+ raw_spin_unlock(&kvm_count_lock);
return r;
}
@@ -2750,20 +2753,17 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
{
int cpu = (long)v;
- if (!kvm_usage_count)
- return NOTIFY_OK;
-
val &= ~CPU_TASKS_FROZEN;
switch (val) {
case CPU_DYING:
printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
cpu);
- hardware_disable(NULL);
+ hardware_disable();
break;
case CPU_STARTING:
printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
cpu);
- hardware_enable(NULL);
+ hardware_enable();
break;
}
return NOTIFY_OK;
@@ -3056,10 +3056,10 @@ static int vm_stat_get(void *_offset, u64 *val)
struct kvm *kvm;
*val = 0;
- raw_spin_lock(&kvm_lock);
+ spin_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list)
*val += *(u32 *)((void *)kvm + offset);
- raw_spin_unlock(&kvm_lock);
+ spin_unlock(&kvm_lock);
return 0;
}
@@ -3073,12 +3073,12 @@ static int vcpu_stat_get(void *_offset, u64 *val)
int i;
*val = 0;
- raw_spin_lock(&kvm_lock);
+ spin_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list)
kvm_for_each_vcpu(i, vcpu, kvm)
*val += *(u32 *)((void *)vcpu + offset);
- raw_spin_unlock(&kvm_lock);
+ spin_unlock(&kvm_lock);
return 0;
}
@@ -3133,7 +3133,7 @@ static int kvm_suspend(void)
static void kvm_resume(void)
{
if (kvm_usage_count) {
- WARN_ON(raw_spin_is_locked(&kvm_lock));
+ WARN_ON(raw_spin_is_locked(&kvm_count_lock));
hardware_enable_nolock(NULL);
}
}