summaryrefslogtreecommitdiffstats
path: root/virt/kvm
diff options
context:
space:
mode:
authorAvi Kivity <avi@redhat.com>2012-07-26 10:54:21 +0200
committerAvi Kivity <avi@redhat.com>2012-07-26 10:54:21 +0200
commite9bda6f6f902e6b55d9baceb5523468a048cbe56 (patch)
treebf09cc165da1197cd34967da0593d08b9a37c0f3 /virt/kvm
parentMerge tag 'scsi-misc' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi (diff)
parentKVM: Choose better candidate for directed yield (diff)
downloadlinux-e9bda6f6f902e6b55d9baceb5523468a048cbe56.tar.xz
linux-e9bda6f6f902e6b55d9baceb5523468a048cbe56.zip
Merge branch 'queue' into next
Merge patches queued during the run-up to the merge window. * queue: (25 commits) KVM: Choose better candidate for directed yield KVM: Note down when cpu relax intercepted or pause loop exited KVM: Add config to support ple or cpu relax optimzation KVM: switch to symbolic name for irq_states size KVM: x86: Fix typos in pmu.c KVM: x86: Fix typos in lapic.c KVM: x86: Fix typos in cpuid.c KVM: x86: Fix typos in emulate.c KVM: x86: Fix typos in x86.c KVM: SVM: Fix typos KVM: VMX: Fix typos KVM: remove the unused parameter of gfn_to_pfn_memslot KVM: remove is_error_hpa KVM: make bad_pfn static to kvm_main.c KVM: using get_fault_pfn to get the fault pfn KVM: MMU: track the refcount when unmap the page KVM: x86: remove unnecessary mark_page_dirty KVM: MMU: Avoid handling same rmap_pde in kvm_handle_hva_range() KVM: MMU: Push trace_kvm_age_page() into kvm_age_rmapp() KVM: MMU: Add memslot parameter to hva handlers ... Signed-off-by: Avi Kivity <avi@redhat.com>
Diffstat (limited to 'virt/kvm')
-rw-r--r--virt/kvm/Kconfig3
-rw-r--r--virt/kvm/iommu.c10
-rw-r--r--virt/kvm/irq_comm.c4
-rw-r--r--virt/kvm/kvm_main.c84
4 files changed, 72 insertions, 29 deletions
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 28694f4a9139..d01b24b72c61 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -21,3 +21,6 @@ config KVM_ASYNC_PF
config HAVE_KVM_MSI
bool
+
+config HAVE_KVM_CPU_RELAX_INTERCEPT
+ bool
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index e9fff9830bf0..c03f1fb26701 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -42,13 +42,13 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm);
static void kvm_iommu_put_pages(struct kvm *kvm,
gfn_t base_gfn, unsigned long npages);
-static pfn_t kvm_pin_pages(struct kvm *kvm, struct kvm_memory_slot *slot,
- gfn_t gfn, unsigned long size)
+static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn,
+ unsigned long size)
{
gfn_t end_gfn;
pfn_t pfn;
- pfn = gfn_to_pfn_memslot(kvm, slot, gfn);
+ pfn = gfn_to_pfn_memslot(slot, gfn);
end_gfn = gfn + (size >> PAGE_SHIFT);
gfn += 1;
@@ -56,7 +56,7 @@ static pfn_t kvm_pin_pages(struct kvm *kvm, struct kvm_memory_slot *slot,
return pfn;
while (gfn < end_gfn)
- gfn_to_pfn_memslot(kvm, slot, gfn++);
+ gfn_to_pfn_memslot(slot, gfn++);
return pfn;
}
@@ -105,7 +105,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
* Pin all pages we are about to map in memory. This is
* important because we unmap and unpin in 4kb steps later.
*/
- pfn = kvm_pin_pages(kvm, slot, gfn, page_size);
+ pfn = kvm_pin_pages(slot, gfn, page_size);
if (is_error_pfn(pfn)) {
gfn += 1;
continue;
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 83402d74a767..7118be0f2f2c 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -321,11 +321,11 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt,
switch (ue->u.irqchip.irqchip) {
case KVM_IRQCHIP_PIC_MASTER:
e->set = kvm_set_pic_irq;
- max_pin = 16;
+ max_pin = PIC_NUM_PINS;
break;
case KVM_IRQCHIP_PIC_SLAVE:
e->set = kvm_set_pic_irq;
- max_pin = 16;
+ max_pin = PIC_NUM_PINS;
delta = 8;
break;
case KVM_IRQCHIP_IOAPIC:
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 246852397e30..0014ee99dc7f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -100,11 +100,14 @@ EXPORT_SYMBOL_GPL(kvm_rebooting);
static bool largepages_enabled = true;
+struct page *bad_page;
+static pfn_t bad_pfn;
+
static struct page *hwpoison_page;
static pfn_t hwpoison_pfn;
-struct page *fault_page;
-pfn_t fault_pfn;
+static struct page *fault_page;
+static pfn_t fault_pfn;
inline int kvm_is_mmio_pfn(pfn_t pfn)
{
@@ -236,6 +239,9 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
}
vcpu->run = page_address(page);
+ kvm_vcpu_set_in_spin_loop(vcpu, false);
+ kvm_vcpu_set_dy_eligible(vcpu, false);
+
r = kvm_arch_vcpu_init(vcpu);
if (r < 0)
goto fail_free_run;
@@ -332,8 +338,7 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
* count is also read inside the mmu_lock critical section.
*/
kvm->mmu_notifier_count++;
- for (; start < end; start += PAGE_SIZE)
- need_tlb_flush |= kvm_unmap_hva(kvm, start);
+ need_tlb_flush = kvm_unmap_hva_range(kvm, start, end);
need_tlb_flush |= kvm->tlbs_dirty;
/* we've to flush the tlb before the pages can be freed */
if (need_tlb_flush)
@@ -950,12 +955,6 @@ int is_hwpoison_pfn(pfn_t pfn)
}
EXPORT_SYMBOL_GPL(is_hwpoison_pfn);
-int is_fault_pfn(pfn_t pfn)
-{
- return pfn == fault_pfn;
-}
-EXPORT_SYMBOL_GPL(is_fault_pfn);
-
int is_noslot_pfn(pfn_t pfn)
{
return pfn == bad_pfn;
@@ -1039,11 +1038,12 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
}
EXPORT_SYMBOL_GPL(gfn_to_hva);
-static pfn_t get_fault_pfn(void)
+pfn_t get_fault_pfn(void)
{
get_page(fault_page);
return fault_pfn;
}
+EXPORT_SYMBOL_GPL(get_fault_pfn);
int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, int write, struct page **page)
@@ -1065,8 +1065,8 @@ static inline int check_user_page_hwpoison(unsigned long addr)
return rc == -EHWPOISON;
}
-static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic,
- bool *async, bool write_fault, bool *writable)
+static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
+ bool write_fault, bool *writable)
{
struct page *page[1];
int npages = 0;
@@ -1146,9 +1146,9 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic,
return pfn;
}
-pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr)
+pfn_t hva_to_pfn_atomic(unsigned long addr)
{
- return hva_to_pfn(kvm, addr, true, NULL, true, NULL);
+ return hva_to_pfn(addr, true, NULL, true, NULL);
}
EXPORT_SYMBOL_GPL(hva_to_pfn_atomic);
@@ -1166,7 +1166,7 @@ static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async,
return page_to_pfn(bad_page);
}
- return hva_to_pfn(kvm, addr, atomic, async, write_fault, writable);
+ return hva_to_pfn(addr, atomic, async, write_fault, writable);
}
pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn)
@@ -1195,11 +1195,10 @@ pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
}
EXPORT_SYMBOL_GPL(gfn_to_pfn_prot);
-pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
- struct kvm_memory_slot *slot, gfn_t gfn)
+pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
{
unsigned long addr = gfn_to_hva_memslot(slot, gfn);
- return hva_to_pfn(kvm, addr, false, NULL, true, NULL);
+ return hva_to_pfn(addr, false, NULL, true, NULL);
}
int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
@@ -1580,6 +1579,43 @@ bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
}
EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to);
+#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
+/*
+ * Helper that checks whether a VCPU is eligible for directed yield.
+ * Most eligible candidate to yield is decided by following heuristics:
+ *
+ * (a) VCPU which has not done pl-exit or cpu relax intercepted recently
+ * (preempted lock holder), indicated by @in_spin_loop.
+ * Set at the beiginning and cleared at the end of interception/PLE handler.
+ *
+ * (b) VCPU which has done pl-exit/ cpu relax intercepted but did not get
+ * chance last time (mostly it has become eligible now since we have probably
+ * yielded to lockholder in last iteration. This is done by toggling
+ * @dy_eligible each time a VCPU checked for eligibility.)
+ *
+ * Yielding to a recently pl-exited/cpu relax intercepted VCPU before yielding
+ * to preempted lock-holder could result in wrong VCPU selection and CPU
+ * burning. Giving priority for a potential lock-holder increases lock
+ * progress.
+ *
+ * Since algorithm is based on heuristics, accessing another VCPU data without
+ * locking does not harm. It may result in trying to yield to same VCPU, fail
+ * and continue with next VCPU and so on.
+ */
+bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
+{
+ bool eligible;
+
+ eligible = !vcpu->spin_loop.in_spin_loop ||
+ (vcpu->spin_loop.in_spin_loop &&
+ vcpu->spin_loop.dy_eligible);
+
+ if (vcpu->spin_loop.in_spin_loop)
+ kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible);
+
+ return eligible;
+}
+#endif
void kvm_vcpu_on_spin(struct kvm_vcpu *me)
{
struct kvm *kvm = me->kvm;
@@ -1589,6 +1625,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
int pass;
int i;
+ kvm_vcpu_set_in_spin_loop(me, true);
/*
* We boost the priority of a VCPU that is runnable but not
* currently running, because it got preempted by something
@@ -1607,6 +1644,8 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
continue;
if (waitqueue_active(&vcpu->wq))
continue;
+ if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
+ continue;
if (kvm_vcpu_yield_to(vcpu)) {
kvm->last_boosted_vcpu = i;
yielded = 1;
@@ -1614,6 +1653,10 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
}
}
}
+ kvm_vcpu_set_in_spin_loop(me, false);
+
+ /* Ensure vcpu is not eligible during next spinloop */
+ kvm_vcpu_set_dy_eligible(me, false);
}
EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin);
@@ -2697,9 +2740,6 @@ static struct syscore_ops kvm_syscore_ops = {
.resume = kvm_resume,
};
-struct page *bad_page;
-pfn_t bad_pfn;
-
static inline
struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn)
{