summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/vmx.h1
-rw-r--r--arch/x86/include/uapi/asm/vmx.h4
-rw-r--r--arch/x86/kvm/lapic.c201
-rw-r--r--arch/x86/kvm/lapic.h2
-rw-r--r--arch/x86/kvm/mmu.c10
-rw-r--r--arch/x86/kvm/vmx.c283
-rw-r--r--arch/x86/kvm/x86.c2
7 files changed, 238 insertions, 265 deletions
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index a002b07a7099..5ef9848f743f 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -60,6 +60,7 @@
*/
#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
#define SECONDARY_EXEC_ENABLE_EPT 0x00000002
+#define SECONDARY_EXEC_DESC 0x00000004
#define SECONDARY_EXEC_RDTSCP 0x00000008
#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010
#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index 37fee272618f..f9dea4fd4107 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -65,6 +65,8 @@
#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
#define EXIT_REASON_APIC_ACCESS 44
#define EXIT_REASON_EOI_INDUCED 45
+#define EXIT_REASON_GDTR_IDTR 46
+#define EXIT_REASON_LDTR_TR 47
#define EXIT_REASON_EPT_VIOLATION 48
#define EXIT_REASON_EPT_MISCONFIG 49
#define EXIT_REASON_INVEPT 50
@@ -113,6 +115,8 @@
{ EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \
{ EXIT_REASON_TPR_BELOW_THRESHOLD, "TPR_BELOW_THRESHOLD" }, \
{ EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \
+ { EXIT_REASON_GDTR_IDTR, "GDTR_IDTR" }, \
+ { EXIT_REASON_LDTR_TR, "LDTR_TR" }, \
{ EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \
{ EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \
{ EXIT_REASON_INVEPT, "INVEPT" }, \
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 23b99f305382..890f218ddd7a 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -342,9 +342,11 @@ void __kvm_apic_update_irr(u32 *pir, void *regs)
u32 i, pir_val;
for (i = 0; i <= 7; i++) {
- pir_val = xchg(&pir[i], 0);
- if (pir_val)
+ pir_val = READ_ONCE(pir[i]);
+ if (pir_val) {
+ pir_val = xchg(&pir[i], 0);
*((u32 *)(regs + APIC_IRR + i * 0x10)) |= pir_val;
+ }
}
}
EXPORT_SYMBOL_GPL(__kvm_apic_update_irr);
@@ -1090,7 +1092,7 @@ static void apic_send_ipi(struct kvm_lapic *apic)
static u32 apic_get_tmcct(struct kvm_lapic *apic)
{
- ktime_t remaining;
+ ktime_t remaining, now;
s64 ns;
u32 tmcct;
@@ -1101,7 +1103,8 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
apic->lapic_timer.period == 0)
return 0;
- remaining = hrtimer_get_remaining(&apic->lapic_timer.timer);
+ now = ktime_get();
+ remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
if (ktime_to_ns(remaining) < 0)
remaining = ktime_set(0, 0);
@@ -1332,7 +1335,7 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
local_irq_save(flags);
- now = apic->lapic_timer.timer.base->get_time();
+ now = ktime_get();
guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
if (likely(tscdeadline > guest_tsc)) {
ns = (tscdeadline - guest_tsc) * 1000000ULL;
@@ -1347,6 +1350,79 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
local_irq_restore(flags);
}
+static void start_sw_period(struct kvm_lapic *apic)
+{
+ if (!apic->lapic_timer.period)
+ return;
+
+ if (apic_lvtt_oneshot(apic) &&
+ ktime_after(ktime_get(),
+ apic->lapic_timer.target_expiration)) {
+ apic_timer_expired(apic);
+ return;
+ }
+
+ hrtimer_start(&apic->lapic_timer.timer,
+ apic->lapic_timer.target_expiration,
+ HRTIMER_MODE_ABS_PINNED);
+}
+
+static bool set_target_expiration(struct kvm_lapic *apic)
+{
+ ktime_t now;
+ u64 tscl = rdtsc();
+
+ now = ktime_get();
+ apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
+ * APIC_BUS_CYCLE_NS * apic->divide_count;
+
+ if (!apic->lapic_timer.period)
+ return false;
+
+ /*
+ * Do not allow the guest to program periodic timers with small
+ * interval, since the hrtimers are not throttled by the host
+ * scheduler.
+ */
+ if (apic_lvtt_period(apic)) {
+ s64 min_period = min_timer_period_us * 1000LL;
+
+ if (apic->lapic_timer.period < min_period) {
+ pr_info_ratelimited(
+ "kvm: vcpu %i: requested %lld ns "
+ "lapic timer period limited to %lld ns\n",
+ apic->vcpu->vcpu_id,
+ apic->lapic_timer.period, min_period);
+ apic->lapic_timer.period = min_period;
+ }
+ }
+
+ apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
+ PRIx64 ", "
+ "timer initial count 0x%x, period %lldns, "
+ "expire @ 0x%016" PRIx64 ".\n", __func__,
+ APIC_BUS_CYCLE_NS, ktime_to_ns(now),
+ kvm_lapic_get_reg(apic, APIC_TMICT),
+ apic->lapic_timer.period,
+ ktime_to_ns(ktime_add_ns(now,
+ apic->lapic_timer.period)));
+
+ apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
+ nsec_to_cycles(apic->vcpu, apic->lapic_timer.period);
+ apic->lapic_timer.target_expiration = ktime_add_ns(now, apic->lapic_timer.period);
+
+ return true;
+}
+
+static void advance_periodic_target_expiration(struct kvm_lapic *apic)
+{
+ apic->lapic_timer.tscdeadline +=
+ nsec_to_cycles(apic->vcpu, apic->lapic_timer.period);
+ apic->lapic_timer.target_expiration =
+ ktime_add_ns(apic->lapic_timer.target_expiration,
+ apic->lapic_timer.period);
+}
+
bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
{
if (!lapic_in_kernel(vcpu))
@@ -1356,52 +1432,59 @@ bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use);
-static void cancel_hv_tscdeadline(struct kvm_lapic *apic)
+static void cancel_hv_timer(struct kvm_lapic *apic)
{
kvm_x86_ops->cancel_hv_timer(apic->vcpu);
apic->lapic_timer.hv_timer_in_use = false;
}
-void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
-{
- struct kvm_lapic *apic = vcpu->arch.apic;
-
- WARN_ON(!apic->lapic_timer.hv_timer_in_use);
- WARN_ON(swait_active(&vcpu->wq));
- cancel_hv_tscdeadline(apic);
- apic_timer_expired(apic);
-}
-EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
-
-static bool start_hv_tscdeadline(struct kvm_lapic *apic)
+static bool start_hv_timer(struct kvm_lapic *apic)
{
u64 tscdeadline = apic->lapic_timer.tscdeadline;
- if (atomic_read(&apic->lapic_timer.pending) ||
+ if ((atomic_read(&apic->lapic_timer.pending) &&
+ !apic_lvtt_period(apic)) ||
kvm_x86_ops->set_hv_timer(apic->vcpu, tscdeadline)) {
if (apic->lapic_timer.hv_timer_in_use)
- cancel_hv_tscdeadline(apic);
+ cancel_hv_timer(apic);
} else {
apic->lapic_timer.hv_timer_in_use = true;
hrtimer_cancel(&apic->lapic_timer.timer);
/* In case the sw timer triggered in the window */
- if (atomic_read(&apic->lapic_timer.pending))
- cancel_hv_tscdeadline(apic);
+ if (atomic_read(&apic->lapic_timer.pending) &&
+ !apic_lvtt_period(apic))
+ cancel_hv_timer(apic);
}
trace_kvm_hv_timer_state(apic->vcpu->vcpu_id,
apic->lapic_timer.hv_timer_in_use);
return apic->lapic_timer.hv_timer_in_use;
}
+void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
+{
+ struct kvm_lapic *apic = vcpu->arch.apic;
+
+ WARN_ON(!apic->lapic_timer.hv_timer_in_use);
+ WARN_ON(swait_active(&vcpu->wq));
+ cancel_hv_timer(apic);
+ apic_timer_expired(apic);
+
+ if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
+ advance_periodic_target_expiration(apic);
+ if (!start_hv_timer(apic))
+ start_sw_period(apic);
+ }
+}
+EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
+
void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
WARN_ON(apic->lapic_timer.hv_timer_in_use);
- if (apic_lvtt_tscdeadline(apic))
- start_hv_tscdeadline(apic);
+ start_hv_timer(apic);
}
EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_hv_timer);
@@ -1413,62 +1496,28 @@ void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu)
if (!apic->lapic_timer.hv_timer_in_use)
return;
- cancel_hv_tscdeadline(apic);
+ cancel_hv_timer(apic);
if (atomic_read(&apic->lapic_timer.pending))
return;
- start_sw_tscdeadline(apic);
+ if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
+ start_sw_period(apic);
+ else if (apic_lvtt_tscdeadline(apic))
+ start_sw_tscdeadline(apic);
}
EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer);
static void start_apic_timer(struct kvm_lapic *apic)
{
- ktime_t now;
-
atomic_set(&apic->lapic_timer.pending, 0);
if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
- /* lapic timer in oneshot or periodic mode */
- now = apic->lapic_timer.timer.base->get_time();
- apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
- * APIC_BUS_CYCLE_NS * apic->divide_count;
-
- if (!apic->lapic_timer.period)
- return;
- /*
- * Do not allow the guest to program periodic timers with small
- * interval, since the hrtimers are not throttled by the host
- * scheduler.
- */
- if (apic_lvtt_period(apic)) {
- s64 min_period = min_timer_period_us * 1000LL;
-
- if (apic->lapic_timer.period < min_period) {
- pr_info_ratelimited(
- "kvm: vcpu %i: requested %lld ns "
- "lapic timer period limited to %lld ns\n",
- apic->vcpu->vcpu_id,
- apic->lapic_timer.period, min_period);
- apic->lapic_timer.period = min_period;
- }
- }
-
- hrtimer_start(&apic->lapic_timer.timer,
- ktime_add_ns(now, apic->lapic_timer.period),
- HRTIMER_MODE_ABS_PINNED);
-
- apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
- PRIx64 ", "
- "timer initial count 0x%x, period %lldns, "
- "expire @ 0x%016" PRIx64 ".\n", __func__,
- APIC_BUS_CYCLE_NS, ktime_to_ns(now),
- kvm_lapic_get_reg(apic, APIC_TMICT),
- apic->lapic_timer.period,
- ktime_to_ns(ktime_add_ns(now,
- apic->lapic_timer.period)));
+ if (set_target_expiration(apic) &&
+ !(kvm_x86_ops->set_hv_timer && start_hv_timer(apic)))
+ start_sw_period(apic);
} else if (apic_lvtt_tscdeadline(apic)) {
- if (!(kvm_x86_ops->set_hv_timer && start_hv_tscdeadline(apic)))
+ if (!(kvm_x86_ops->set_hv_timer && start_hv_timer(apic)))
start_sw_tscdeadline(apic);
}
}
@@ -1701,13 +1750,22 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu)
* LAPIC interface
*----------------------------------------------------------------------
*/
+u64 kvm_get_lapic_target_expiration_tsc(struct kvm_vcpu *vcpu)
+{
+ struct kvm_lapic *apic = vcpu->arch.apic;
+
+ if (!lapic_in_kernel(vcpu))
+ return 0;
+
+ return apic->lapic_timer.tscdeadline;
+}
u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
- if (!lapic_in_kernel(vcpu) || apic_lvtt_oneshot(apic) ||
- apic_lvtt_period(apic))
+ if (!lapic_in_kernel(vcpu) ||
+ !apic_lvtt_tscdeadline(apic))
return 0;
return apic->lapic_timer.tscdeadline;
@@ -1909,6 +1967,7 @@ static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
apic_timer_expired(apic);
if (lapic_is_periodic(apic)) {
+ advance_periodic_target_expiration(apic);
hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
return HRTIMER_RESTART;
} else
@@ -1993,6 +2052,10 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
kvm_apic_local_deliver(apic, APIC_LVTT);
if (apic_lvtt_tscdeadline(apic))
apic->lapic_timer.tscdeadline = 0;
+ if (apic_lvtt_oneshot(apic)) {
+ apic->lapic_timer.tscdeadline = 0;
+ apic->lapic_timer.target_expiration = ktime_set(0, 0);
+ }
atomic_set(&apic->lapic_timer.pending, 0);
}
}
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index f60d01c29d51..e0c80233b3e1 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -15,6 +15,7 @@
struct kvm_timer {
struct hrtimer timer;
s64 period; /* unit: ns */
+ ktime_t target_expiration;
u32 timer_mode;
u32 timer_mode_mask;
u64 tscdeadline;
@@ -85,6 +86,7 @@ int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s);
int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s);
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
+u64 kvm_get_lapic_target_expiration_tsc(struct kvm_vcpu *vcpu);
u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu);
void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 87c5880ba3b7..1d2f350dcf6c 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1660,17 +1660,9 @@ int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
* This has some overhead, but not as much as the cost of swapping
* out actively used pages or breaking up actively used hugepages.
*/
- if (!shadow_accessed_mask) {
- /*
- * We are holding the kvm->mmu_lock, and we are blowing up
- * shadow PTEs. MMU notifier consumers need to be kept at bay.
- * This is correct as long as we don't decouple the mmu_lock
- * protected regions (like invalidate_range_start|end does).
- */
- kvm->mmu_notifier_seq++;
+ if (!shadow_accessed_mask)
return kvm_handle_hva_range(kvm, start, end, 0,
kvm_unmap_rmapp);
- }
return kvm_handle_hva_range(kvm, start, end, 0, kvm_age_rmapp);
}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5382b82462fc..a91a5b01f38e 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -520,6 +520,12 @@ static inline void pi_set_sn(struct pi_desc *pi_desc)
(unsigned long *)&pi_desc->control);
}
+static inline void pi_clear_on(struct pi_desc *pi_desc)
+{
+ clear_bit(POSTED_INTR_ON,
+ (unsigned long *)&pi_desc->control);
+}
+
static inline int pi_test_on(struct pi_desc *pi_desc)
{
return test_bit(POSTED_INTR_ON,
@@ -920,16 +926,32 @@ static DEFINE_PER_CPU(struct desc_ptr, host_gdt);
static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
-static unsigned long *vmx_io_bitmap_a;
-static unsigned long *vmx_io_bitmap_b;
-static unsigned long *vmx_msr_bitmap_legacy;
-static unsigned long *vmx_msr_bitmap_longmode;
-static unsigned long *vmx_msr_bitmap_legacy_x2apic;
-static unsigned long *vmx_msr_bitmap_longmode_x2apic;
-static unsigned long *vmx_msr_bitmap_legacy_x2apic_apicv_inactive;
-static unsigned long *vmx_msr_bitmap_longmode_x2apic_apicv_inactive;
-static unsigned long *vmx_vmread_bitmap;
-static unsigned long *vmx_vmwrite_bitmap;
+enum {
+ VMX_IO_BITMAP_A,
+ VMX_IO_BITMAP_B,
+ VMX_MSR_BITMAP_LEGACY,
+ VMX_MSR_BITMAP_LONGMODE,
+ VMX_MSR_BITMAP_LEGACY_X2APIC_APICV,
+ VMX_MSR_BITMAP_LONGMODE_X2APIC_APICV,
+ VMX_MSR_BITMAP_LEGACY_X2APIC,
+ VMX_MSR_BITMAP_LONGMODE_X2APIC,
+ VMX_VMREAD_BITMAP,
+ VMX_VMWRITE_BITMAP,
+ VMX_BITMAP_NR
+};
+
+static unsigned long *vmx_bitmap[VMX_BITMAP_NR];
+
+#define vmx_io_bitmap_a (vmx_bitmap[VMX_IO_BITMAP_A])
+#define vmx_io_bitmap_b (vmx_bitmap[VMX_IO_BITMAP_B])
+#define vmx_msr_bitmap_legacy (vmx_bitmap[VMX_MSR_BITMAP_LEGACY])
+#define vmx_msr_bitmap_longmode (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE])
+#define vmx_msr_bitmap_legacy_x2apic_apicv (vmx_bitmap[VMX_MSR_BITMAP_LEGACY_X2APIC_APICV])
+#define vmx_msr_bitmap_longmode_x2apic_apicv (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE_X2APIC_APICV])
+#define vmx_msr_bitmap_legacy_x2apic (vmx_bitmap[VMX_MSR_BITMAP_LEGACY_X2APIC])
+#define vmx_msr_bitmap_longmode_x2apic (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE_X2APIC])
+#define vmx_vmread_bitmap (vmx_bitmap[VMX_VMREAD_BITMAP])
+#define vmx_vmwrite_bitmap (vmx_bitmap[VMX_VMWRITE_BITMAP])
static bool cpu_has_load_ia32_efer;
static bool cpu_has_load_perf_global_ctrl;
@@ -2529,14 +2551,14 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) {
if (is_long_mode(vcpu))
- msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
+ msr_bitmap = vmx_msr_bitmap_longmode_x2apic_apicv;
else
- msr_bitmap = vmx_msr_bitmap_legacy_x2apic;
+ msr_bitmap = vmx_msr_bitmap_legacy_x2apic_apicv;
} else {
if (is_long_mode(vcpu))
- msr_bitmap = vmx_msr_bitmap_longmode_x2apic_apicv_inactive;
+ msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
else
- msr_bitmap = vmx_msr_bitmap_legacy_x2apic_apicv_inactive;
+ msr_bitmap = vmx_msr_bitmap_legacy_x2apic;
}
} else {
if (is_long_mode(vcpu))
@@ -2780,6 +2802,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
vmx->nested.nested_vmx_secondary_ctls_high &=
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
SECONDARY_EXEC_RDTSCP |
+ SECONDARY_EXEC_DESC |
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
SECONDARY_EXEC_ENABLE_VPID |
SECONDARY_EXEC_APIC_REGISTER_VIRT |
@@ -4575,41 +4598,6 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
}
}
-static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
- u32 msr, int type)
-{
- int f = sizeof(unsigned long);
-
- if (!cpu_has_vmx_msr_bitmap())
- return;
-
- /*
- * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
- * have the write-low and read-high bitmap offsets the wrong way round.
- * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
- */
- if (msr <= 0x1fff) {
- if (type & MSR_TYPE_R)
- /* read-low */
- __set_bit(msr, msr_bitmap + 0x000 / f);
-
- if (type & MSR_TYPE_W)
- /* write-low */
- __set_bit(msr, msr_bitmap + 0x800 / f);
-
- } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
- msr &= 0x1fff;
- if (type & MSR_TYPE_R)
- /* read-high */
- __set_bit(msr, msr_bitmap + 0x400 / f);
-
- if (type & MSR_TYPE_W)
- /* write-high */
- __set_bit(msr, msr_bitmap + 0xc00 / f);
-
- }
-}
-
/*
* If a msr is allowed by L0, we should check whether it is allowed by L1.
* The corresponding bit will be cleared unless both of L0 and L1 allow it.
@@ -4665,48 +4653,18 @@ static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
msr, MSR_TYPE_R | MSR_TYPE_W);
}
-static void vmx_enable_intercept_msr_read_x2apic(u32 msr, bool apicv_active)
+static void vmx_disable_intercept_msr_x2apic(u32 msr, int type, bool apicv_active)
{
if (apicv_active) {
- __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
- msr, MSR_TYPE_R);
- __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
- msr, MSR_TYPE_R);
+ __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv,
+ msr, type);
+ __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv,
+ msr, type);
} else {
- __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
- msr, MSR_TYPE_R);
- __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
- msr, MSR_TYPE_R);
- }
-}
-
-static void vmx_disable_intercept_msr_read_x2apic(u32 msr, bool apicv_active)
-{
- if (apicv_active) {
__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
- msr, MSR_TYPE_R);
+ msr, type);
__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
- msr, MSR_TYPE_R);
- } else {
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
- msr, MSR_TYPE_R);
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
- msr, MSR_TYPE_R);
- }
-}
-
-static void vmx_disable_intercept_msr_write_x2apic(u32 msr, bool apicv_active)
-{
- if (apicv_active) {
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
- msr, MSR_TYPE_W);
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
- msr, MSR_TYPE_W);
- } else {
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
- msr, MSR_TYPE_W);
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
- msr, MSR_TYPE_W);
+ msr, type);
}
}
@@ -4828,9 +4786,15 @@ static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- if (!pi_test_and_clear_on(&vmx->pi_desc))
+ if (!pi_test_on(&vmx->pi_desc))
return;
+ pi_clear_on(&vmx->pi_desc);
+ /*
+ * IOMMU can write to PIR.ON, so the barrier matters even on UP.
+ * But on x86 this is just a compiler barrier anyway.
+ */
+ smp_mb__after_atomic();
kvm_apic_update_irr(vcpu, vmx->pi_desc.pir);
}
@@ -6352,50 +6316,13 @@ static __init int hardware_setup(void)
for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
kvm_define_shared_msr(i, vmx_msr_index[i]);
- vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_io_bitmap_a)
- return r;
+ for (i = 0; i < VMX_BITMAP_NR; i++) {
+ vmx_bitmap[i] = (unsigned long *)__get_free_page(GFP_KERNEL);
+ if (!vmx_bitmap[i])
+ goto out;
+ }
vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_io_bitmap_b)
- goto out;
-
- vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_msr_bitmap_legacy)
- goto out1;
-
- vmx_msr_bitmap_legacy_x2apic =
- (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_msr_bitmap_legacy_x2apic)
- goto out2;
-
- vmx_msr_bitmap_legacy_x2apic_apicv_inactive =
- (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_msr_bitmap_legacy_x2apic_apicv_inactive)
- goto out3;
-
- vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_msr_bitmap_longmode)
- goto out4;
-
- vmx_msr_bitmap_longmode_x2apic =
- (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_msr_bitmap_longmode_x2apic)
- goto out5;
-
- vmx_msr_bitmap_longmode_x2apic_apicv_inactive =
- (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_msr_bitmap_longmode_x2apic_apicv_inactive)
- goto out6;
-
- vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_vmread_bitmap)
- goto out7;
-
- vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_vmwrite_bitmap)
- goto out8;
-
memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
@@ -6413,7 +6340,7 @@ static __init int hardware_setup(void)
if (setup_vmcs_config(&vmcs_config) < 0) {
r = -EIO;
- goto out9;
+ goto out;
}
if (boot_cpu_has(X86_FEATURE_NX))
@@ -6476,39 +6403,34 @@ static __init int hardware_setup(void)
vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
- memcpy(vmx_msr_bitmap_legacy_x2apic,
+ memcpy(vmx_msr_bitmap_legacy_x2apic_apicv,
vmx_msr_bitmap_legacy, PAGE_SIZE);
- memcpy(vmx_msr_bitmap_longmode_x2apic,
+ memcpy(vmx_msr_bitmap_longmode_x2apic_apicv,
vmx_msr_bitmap_longmode, PAGE_SIZE);
- memcpy(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
+ memcpy(vmx_msr_bitmap_legacy_x2apic,
vmx_msr_bitmap_legacy, PAGE_SIZE);
- memcpy(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
+ memcpy(vmx_msr_bitmap_longmode_x2apic,
vmx_msr_bitmap_longmode, PAGE_SIZE);
set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
+ for (msr = 0x800; msr <= 0x8ff; msr++) {
+ if (msr == 0x839 /* TMCCT */)
+ continue;
+ vmx_disable_intercept_msr_x2apic(msr, MSR_TYPE_R, true);
+ }
+
/*
- * enable_apicv && kvm_vcpu_apicv_active()
+ * TPR reads and writes can be virtualized even if virtual interrupt
+ * delivery is not in use.
*/
- for (msr = 0x800; msr <= 0x8ff; msr++)
- vmx_disable_intercept_msr_read_x2apic(msr, true);
+ vmx_disable_intercept_msr_x2apic(0x808, MSR_TYPE_W, true);
+ vmx_disable_intercept_msr_x2apic(0x808, MSR_TYPE_R | MSR_TYPE_W, false);
- /* TMCCT */
- vmx_enable_intercept_msr_read_x2apic(0x839, true);
- /* TPR */
- vmx_disable_intercept_msr_write_x2apic(0x808, true);
/* EOI */
- vmx_disable_intercept_msr_write_x2apic(0x80b, true);
+ vmx_disable_intercept_msr_x2apic(0x80b, MSR_TYPE_W, true);
/* SELF-IPI */
- vmx_disable_intercept_msr_write_x2apic(0x83f, true);
-
- /*
- * (enable_apicv && !kvm_vcpu_apicv_active()) ||
- * !enable_apicv
- */
- /* TPR */
- vmx_disable_intercept_msr_read_x2apic(0x808, false);
- vmx_disable_intercept_msr_write_x2apic(0x808, false);
+ vmx_disable_intercept_msr_x2apic(0x83f, MSR_TYPE_W, true);
if (enable_ept) {
kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK,
@@ -6555,42 +6477,19 @@ static __init int hardware_setup(void)
return alloc_kvm_area();
-out9:
- free_page((unsigned long)vmx_vmwrite_bitmap);
-out8:
- free_page((unsigned long)vmx_vmread_bitmap);
-out7:
- free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic_apicv_inactive);
-out6:
- free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
-out5:
- free_page((unsigned long)vmx_msr_bitmap_longmode);
-out4:
- free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic_apicv_inactive);
-out3:
- free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
-out2:
- free_page((unsigned long)vmx_msr_bitmap_legacy);
-out1:
- free_page((unsigned long)vmx_io_bitmap_b);
out:
- free_page((unsigned long)vmx_io_bitmap_a);
+ for (i = 0; i < VMX_BITMAP_NR; i++)
+ free_page((unsigned long)vmx_bitmap[i]);
return r;
}
static __exit void hardware_unsetup(void)
{
- free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
- free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic_apicv_inactive);
- free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
- free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic_apicv_inactive);
- free_page((unsigned long)vmx_msr_bitmap_legacy);
- free_page((unsigned long)vmx_msr_bitmap_longmode);
- free_page((unsigned long)vmx_io_bitmap_b);
- free_page((unsigned long)vmx_io_bitmap_a);
- free_page((unsigned long)vmx_vmwrite_bitmap);
- free_page((unsigned long)vmx_vmread_bitmap);
+ int i;
+
+ for (i = 0; i < VMX_BITMAP_NR; i++)
+ free_page((unsigned long)vmx_bitmap[i]);
free_kvm_area();
}
@@ -8075,6 +7974,8 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
return nested_cpu_has(vmcs12, CPU_BASED_MOV_DR_EXITING);
case EXIT_REASON_IO_INSTRUCTION:
return nested_vmx_exit_handled_io(vcpu, vmcs12);
+ case EXIT_REASON_GDTR_IDTR: case EXIT_REASON_LDTR_TR:
+ return nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC);
case EXIT_REASON_MSR_READ:
case EXIT_REASON_MSR_WRITE:
return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason);
@@ -8624,11 +8525,6 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
register void *__sp asm(_ASM_SP);
- /*
- * If external interrupt exists, IF bit is set in rflags/eflags on the
- * interrupt stack frame, and interrupt will be enabled on a return
- * from interrupt handler.
- */
if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK))
== (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) {
unsigned int vector;
@@ -9968,6 +9864,15 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmx_set_constant_host_state(vmx);
/*
+ * Set the MSR load/store lists to match L0's settings.
+ */
+ vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
+ vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
+ vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host));
+ vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
+ vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest));
+
+ /*
* HOST_RSP is normally set correctly in vmx_vcpu_run() just before
* entry, but only if the current (host) sp changed from the value
* we wrote last (vmx->host_rsp). This cache is no longer relevant
@@ -10755,6 +10660,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+ u32 vm_inst_error = 0;
/* trying to cancel vmlaunch/vmresume is a bug */
WARN_ON_ONCE(vmx->nested.nested_run_pending);
@@ -10767,6 +10673,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
vmcs12->vm_exit_msr_store_count))
nested_vmx_abort(vcpu, VMX_ABORT_SAVE_GUEST_MSR_FAIL);
+ if (unlikely(vmx->fail))
+ vm_inst_error = vmcs_read32(VM_INSTRUCTION_ERROR);
+
vmx_load_vmcs01(vcpu);
if ((exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT)
@@ -10795,6 +10704,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
load_vmcs12_host_state(vcpu, vmcs12);
/* Update any VMCS fields that might have changed while L2 ran */
+ vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
+ vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
if (vmx->hv_deadline_tsc == -1)
vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL,
@@ -10843,7 +10754,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
*/
if (unlikely(vmx->fail)) {
vmx->fail = 0;
- nested_vmx_failValid(vcpu, vmcs_read32(VM_INSTRUCTION_ERROR));
+ nested_vmx_failValid(vcpu, vm_inst_error);
} else
nested_vmx_succeed(vcpu);
if (enable_shadow_vmcs)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7e30c720d0c5..83990ad3710e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2794,7 +2794,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
}
if (kvm_lapic_hv_timer_in_use(vcpu) &&
kvm_x86_ops->set_hv_timer(vcpu,
- kvm_get_lapic_tscdeadline_msr(vcpu)))
+ kvm_get_lapic_target_expiration_tsc(vcpu)))
kvm_lapic_switch_to_sw_timer(vcpu);
/*
* On a host with synchronized TSC, there is no need to update