Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM update from Paolo Bonzini: "Fairly small update, but there are some interesting new features. Common: Optional support for adding a small amount of polling on each HLT instruction executed in the guest (or equivalent for other architectures). This can improve latency up to 50% on some scenarios (e.g. O_DSYNC writes or TCP_RR netperf tests). This also has to be enabled manually for now, but the plan is to auto-tune this in the future. ARM/ARM64: The highlights are support for GICv3 emulation and dirty page tracking s390: Several optimizations and bugfixes. Also a first: a feature exposed by KVM (UUID and long guest name in /proc/sysinfo) before it is available in IBM's hypervisor! :) MIPS: Bugfixes. x86: Support for PML (page modification logging, a new feature in Broadwell Xeons that speeds up dirty page tracking), nested virtualization improvements (nested APICv---a nice optimization), usual round of emulation fixes. There is also a new option to reduce latency of the TSC deadline timer in the guest; this needs to be tuned manually. Some commits are common between this pull and Catalin's; I see you have already included his tree. Powerpc: Nothing yet. The KVM/PPC changes will come in through the PPC maintainers, because I haven't received them yet and I might end up being offline for some part of next week" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (130 commits) KVM: ia64: drop kvm.h from installed user headers KVM: x86: fix build with !CONFIG_SMP KVM: x86: emulate: correct page fault error code for NoWrite instructions KVM: Disable compat ioctl for s390 KVM: s390: add cpu model support KVM: s390: use facilities and cpu_id per KVM KVM: s390/CPACF: Choose crypto control block format s390/kernel: Update /proc/sysinfo file with Extended Name and UUID KVM: s390: reenable LPP facility KVM: s390: floating irqs: fix user triggerable endless loop kvm: add halt_poll_ns module parameter kvm: remove KVM_MMIO_SIZE KVM: MIPS: Don't leak FPU/DSP to guest KVM: MIPS: Disable HTW while in guest KVM: nVMX: Enable nested posted interrupt processing KVM: nVMX: Enable nested virtual interrupt delivery KVM: nVMX: Enable nested apic register virtualization KVM: nVMX: Make nested control MSRs per-cpu KVM: nVMX: Enable nested virtualize x2apic mode KVM: nVMX: Prepare for using hardware MSR bitmap ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2015-02-13 18:55:09 +0100
committer: Linus Torvalds <torvalds@linux-foundation.org> 2015-02-13 18:55:09 +0100
commit: b9085bcbf5f43adf60533f9b635b2e7faeed0fe9 (patch)
tree: e397abf5682a45c096e75b3d0fa99c8e228425fc /arch/x86/kvm/lapic.c
parent: Merge tag 'for-f2fs-3.20' of git://git.kernel.org/pub/scm/linux/kernel/git/ja... (diff)
parent: KVM: ia64: drop kvm.h from installed user headers (diff)
download: linux-b9085bcbf5f43adf60533f9b635b2e7faeed0fe9.tar.xz
linux-b9085bcbf5f43adf60533f9b635b2e7faeed0fe9.zip
1 files changed, 96 insertions, 51 deletions
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index d52dcf0776ea..e55b5fc344eb 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -33,6 +33,7 @@
 #include <asm/page.h>
 #include <asm/current.h>
 #include <asm/apicdef.h>
+#include <asm/delay.h>
 #include <linux/atomic.h>
 #include <linux/jump_label.h>
 #include "kvm_cache_regs.h"
@@ -327,17 +328,24 @@ static u8 count_vectors(void *bitmap)
 	return count;
 }
 
-void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir)
+void __kvm_apic_update_irr(u32 *pir, void *regs)
 {
 	u32 i, pir_val;
-	struct kvm_lapic *apic = vcpu->arch.apic;
 
 	for (i = 0; i <= 7; i++) {
 		pir_val = xchg(&pir[i], 0);
 		if (pir_val)
-			*((u32 *)(apic->regs + APIC_IRR + i * 0x10)) |= pir_val;
+			*((u32 *)(regs + APIC_IRR + i * 0x10)) |= pir_val;
 	}
 }
+EXPORT_SYMBOL_GPL(__kvm_apic_update_irr);
+
+void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir)
+{
+	struct kvm_lapic *apic = vcpu->arch.apic;
+
+	__kvm_apic_update_irr(pir, apic->regs);
+}
 EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
 
 static inline void apic_set_irr(int vec, struct kvm_lapic *apic)
@@ -405,7 +413,7 @@ static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
 	 * because the processor can modify ISR under the hood.  Instead
 	 * just set SVI.
 	 */
-	if (unlikely(kvm_apic_vid_enabled(vcpu->kvm)))
+	if (unlikely(kvm_x86_ops->hwapic_isr_update))
 		kvm_x86_ops->hwapic_isr_update(vcpu->kvm, vec);
 	else {
 		++apic->isr_count;
@@ -453,7 +461,7 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
 	 * on the other hand isr_count and highest_isr_cache are unused
 	 * and must be left alone.
 	 */
-	if (unlikely(kvm_apic_vid_enabled(vcpu->kvm)))
+	if (unlikely(kvm_x86_ops->hwapic_isr_update))
 		kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
 					       apic_find_highest_isr(apic));
 	else {
@@ -580,55 +588,48 @@ static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
 	apic_update_ppr(apic);
 }
 
-static int kvm_apic_broadcast(struct kvm_lapic *apic, u32 dest)
+static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 dest)
 {
 	return dest == (apic_x2apic_mode(apic) ?
 			X2APIC_BROADCAST : APIC_BROADCAST);
 }
 
-int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 dest)
+static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 dest)
 {
 	return kvm_apic_id(apic) == dest || kvm_apic_broadcast(apic, dest);
 }
 
-int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
+static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
 {
-	int result = 0;
 	u32 logical_id;
 
 	if (kvm_apic_broadcast(apic, mda))
-		return 1;
+		return true;
 
-	if (apic_x2apic_mode(apic)) {
-		logical_id = kvm_apic_get_reg(apic, APIC_LDR);
-		return logical_id & mda;
-	}
+	logical_id = kvm_apic_get_reg(apic, APIC_LDR);
 
-	logical_id = GET_APIC_LOGICAL_ID(kvm_apic_get_reg(apic, APIC_LDR));
+	if (apic_x2apic_mode(apic))
+		return ((logical_id >> 16) == (mda >> 16))
+		       && (logical_id & mda & 0xffff) != 0;
+
+	logical_id = GET_APIC_LOGICAL_ID(logical_id);
 
 	switch (kvm_apic_get_reg(apic, APIC_DFR)) {
 	case APIC_DFR_FLAT:
-		if (logical_id & mda)
-			result = 1;
-		break;
+		return (logical_id & mda) != 0;
 	case APIC_DFR_CLUSTER:
-		if (((logical_id >> 4) == (mda >> 0x4))
-		    && (logical_id & mda & 0xf))
-			result = 1;
-		break;
+		return ((logical_id >> 4) == (mda >> 4))
+		       && (logical_id & mda & 0xf) != 0;
 	default:
 		apic_debug("Bad DFR vcpu %d: %08x\n",
 			   apic->vcpu->vcpu_id, kvm_apic_get_reg(apic, APIC_DFR));
-		break;
+		return false;
 	}
-
-	return result;
 }
 
-int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
+bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
 			   int short_hand, unsigned int dest, int dest_mode)
 {
-	int result = 0;
 	struct kvm_lapic *target = vcpu->arch.apic;
 
 	apic_debug("target %p, source %p, dest 0x%x, "
@@ -638,29 +639,21 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
 	ASSERT(target);
 	switch (short_hand) {
 	case APIC_DEST_NOSHORT:
-		if (dest_mode == 0)
-			/* Physical mode. */
-			result = kvm_apic_match_physical_addr(target, dest);
+		if (dest_mode == APIC_DEST_PHYSICAL)
+			return kvm_apic_match_physical_addr(target, dest);
 		else
-			/* Logical mode. */
-			result = kvm_apic_match_logical_addr(target, dest);
-		break;
+			return kvm_apic_match_logical_addr(target, dest);
 	case APIC_DEST_SELF:
-		result = (target == source);
-		break;
+		return target == source;
 	case APIC_DEST_ALLINC:
-		result = 1;
-		break;
+		return true;
 	case APIC_DEST_ALLBUT:
-		result = (target != source);
-		break;
+		return target != source;
 	default:
 		apic_debug("kvm: apic: Bad dest shorthand value %x\n",
 			   short_hand);
-		break;
+		return false;
 	}
-
-	return result;
 }
 
 bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
@@ -693,7 +686,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 
 	ret = true;
 
-	if (irq->dest_mode == 0) { /* physical mode */
+	if (irq->dest_mode == APIC_DEST_PHYSICAL) {
 		if (irq->dest_id >= ARRAY_SIZE(map->phys_map))
 			goto out;
 
@@ -1076,25 +1069,72 @@ static void apic_timer_expired(struct kvm_lapic *apic)
 {
 	struct kvm_vcpu *vcpu = apic->vcpu;
 	wait_queue_head_t *q = &vcpu->wq;
+	struct kvm_timer *ktimer = &apic->lapic_timer;
 
-	/*
-	 * Note: KVM_REQ_PENDING_TIMER is implicitly checked in
-	 * vcpu_enter_guest.
-	 */
 	if (atomic_read(&apic->lapic_timer.pending))
 		return;
 
 	atomic_inc(&apic->lapic_timer.pending);
-	/* FIXME: this code should not know anything about vcpus */
-	kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
+	kvm_set_pending_timer(vcpu);
 
 	if (waitqueue_active(q))
 		wake_up_interruptible(q);
+
+	if (apic_lvtt_tscdeadline(apic))
+		ktimer->expired_tscdeadline = ktimer->tscdeadline;
+}
+
+/*
+ * On APICv, this test will cause a busy wait
+ * during a higher-priority task.
+ */
+
+static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu)
+{
+	struct kvm_lapic *apic = vcpu->arch.apic;
+	u32 reg = kvm_apic_get_reg(apic, APIC_LVTT);
+
+	if (kvm_apic_hw_enabled(apic)) {
+		int vec = reg & APIC_VECTOR_MASK;
+		void *bitmap = apic->regs + APIC_ISR;
+
+		if (kvm_x86_ops->deliver_posted_interrupt)
+			bitmap = apic->regs + APIC_IRR;
+
+		if (apic_test_vector(vec, bitmap))
+			return true;
+	}
+	return false;
+}
+
+void wait_lapic_expire(struct kvm_vcpu *vcpu)
+{
+	struct kvm_lapic *apic = vcpu->arch.apic;
+	u64 guest_tsc, tsc_deadline;
+
+	if (!kvm_vcpu_has_lapic(vcpu))
+		return;
+
+	if (apic->lapic_timer.expired_tscdeadline == 0)
+		return;
+
+	if (!lapic_timer_int_injected(vcpu))
+		return;
+
+	tsc_deadline = apic->lapic_timer.expired_tscdeadline;
+	apic->lapic_timer.expired_tscdeadline = 0;
+	guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, native_read_tsc());
+	trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline);
+
+	/* __delay is delay_tsc whenever the hardware has TSC, thus always.  */
+	if (guest_tsc < tsc_deadline)
+		__delay(tsc_deadline - guest_tsc);
 }
 
 static void start_apic_timer(struct kvm_lapic *apic)
 {
 	ktime_t now;
+
 	atomic_set(&apic->lapic_timer.pending, 0);
 
 	if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
@@ -1140,6 +1180,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
 		/* lapic timer in tsc deadline mode */
 		u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline;
 		u64 ns = 0;
+		ktime_t expire;
 		struct kvm_vcpu *vcpu = apic->vcpu;
 		unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz;
 		unsigned long flags;
@@ -1154,8 +1195,10 @@ static void start_apic_timer(struct kvm_lapic *apic)
 		if (likely(tscdeadline > guest_tsc)) {
 			ns = (tscdeadline - guest_tsc) * 1000000ULL;
 			do_div(ns, this_tsc_khz);
+			expire = ktime_add_ns(now, ns);
+			expire = ktime_sub_ns(expire, lapic_timer_advance_ns);
 			hrtimer_start(&apic->lapic_timer.timer,
-				ktime_add_ns(now, ns), HRTIMER_MODE_ABS);
+				      expire, HRTIMER_MODE_ABS);
 		} else
 			apic_timer_expired(apic);
 
@@ -1745,7 +1788,9 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
 	if (kvm_x86_ops->hwapic_irr_update)
 		kvm_x86_ops->hwapic_irr_update(vcpu,
 				apic_find_highest_irr(apic));
-	kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic));
+	if (unlikely(kvm_x86_ops->hwapic_isr_update))
+		kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
+				apic_find_highest_isr(apic));
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
 	kvm_rtc_eoi_tracking_restore_one(vcpu);
 }
author	Linus Torvalds <torvalds@linux-foundation.org>	2015-02-13 18:55:09 +0100
committer	Linus Torvalds <torvalds@linux-foundation.org>	2015-02-13 18:55:09 +0100
commit	b9085bcbf5f43adf60533f9b635b2e7faeed0fe9 (patch)
tree	e397abf5682a45c096e75b3d0fa99c8e228425fc /arch/x86/kvm/lapic.c
parent	Merge tag 'for-f2fs-3.20' of git://git.kernel.org/pub/scm/linux/kernel/git/ja... (diff)
parent	KVM: ia64: drop kvm.h from installed user headers (diff)
download	linux-b9085bcbf5f43adf60533f9b635b2e7faeed0fe9.tar.xz linux-b9085bcbf5f43adf60533f9b635b2e7faeed0fe9.zip