From 184564efae4d775225c8fe3b762a56956fb1f827 Mon Sep 17 00:00:00 2001 From: Zhang Haoyu Date: Thu, 11 Sep 2014 16:47:04 +0800 Subject: kvm: ioapic: conditionally delay irq delivery duringeoi broadcast Currently, we call ioapic_service() immediately when we find the irq is still active during eoi broadcast. But for real hardware, there's some delay between the EOI writing and irq delivery. If we do not emulate this behavior, and re-inject the interrupt immediately after the guest sends an EOI and re-enables interrupts, a guest might spend all its time in the ISR if it has a broken handler for a level-triggered interrupt. Such livelock actually happens with Windows guests when resuming from hibernation. As there's no way to recognize the broken handle from new raised ones, this patch delays an interrupt if 10.000 consecutive EOIs found that the interrupt was still high. The guest can then make a little forward progress, until a proper IRQ handler is set or until some detection routine in the guest (such as Linux's note_interrupt()) recognizes the situation. Cc: Michael S. Tsirkin Signed-off-by: Jason Wang Signed-off-by: Zhang Haoyu Signed-off-by: Paolo Bonzini --- include/trace/events/kvm.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 908925ace776..ab679c395042 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -95,6 +95,26 @@ TRACE_EVENT(kvm_ioapic_set_irq, __entry->coalesced ? " (coalesced)" : "") ); +TRACE_EVENT(kvm_ioapic_delayed_eoi_inj, + TP_PROTO(__u64 e), + TP_ARGS(e), + + TP_STRUCT__entry( + __field( __u64, e ) + ), + + TP_fast_assign( + __entry->e = e; + ), + + TP_printk("dst %x vec=%u (%s|%s|%s%s)", + (u8)(__entry->e >> 56), (u8)__entry->e, + __print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode), + (__entry->e & (1<<11)) ? "logical" : "physical", + (__entry->e & (1<<15)) ? "level" : "edge", + (__entry->e & (1<<16)) ? "|masked" : "") +); + TRACE_EVENT(kvm_msi_set_irq, TP_PROTO(__u64 address, __u64 data), TP_ARGS(address, data), -- cgit v1.2.3 From 8a9522d2fe6a1b643d3aef5ab7f097f73c601e7a Mon Sep 17 00:00:00 2001 From: Andres Lagar-Cavilla Date: Tue, 23 Sep 2014 12:34:54 -0700 Subject: kvm/x86/mmu: Pass gfn and level to rmapp callback. Callbacks don't have to do extra computation to learn what the caller (lvm_handle_hva_range()) knows very well. Useful for debugging/tracing/printk/future. Signed-off-by: Andres Lagar-Cavilla Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 37 ++++++++++++++++++++++++------------- include/trace/events/kvm.h | 16 +++++++++------- 2 files changed, 33 insertions(+), 20 deletions(-) (limited to 'include/trace') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index bdd1acbab84d..47d534066325 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1262,7 +1262,8 @@ static bool rmap_write_protect(struct kvm *kvm, u64 gfn) } static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, - struct kvm_memory_slot *slot, unsigned long data) + struct kvm_memory_slot *slot, gfn_t gfn, int level, + unsigned long data) { u64 *sptep; struct rmap_iterator iter; @@ -1270,7 +1271,8 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, while ((sptep = rmap_get_first(*rmapp, &iter))) { BUG_ON(!(*sptep & PT_PRESENT_MASK)); - rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", sptep, *sptep); + rmap_printk("kvm_rmap_unmap_hva: spte %p %llx gfn %llx (%d)\n", + sptep, *sptep, gfn, level); drop_spte(kvm, sptep); need_tlb_flush = 1; @@ -1280,7 +1282,8 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, } static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, - struct kvm_memory_slot *slot, unsigned long data) + struct kvm_memory_slot *slot, gfn_t gfn, int level, + unsigned long data) { u64 *sptep; struct rmap_iterator iter; @@ -1294,7 +1297,8 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, for (sptep = rmap_get_first(*rmapp, &iter); sptep;) { BUG_ON(!is_shadow_present_pte(*sptep)); - rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", sptep, *sptep); + rmap_printk("kvm_set_pte_rmapp: spte %p %llx gfn %llx (%d)\n", + sptep, *sptep, gfn, level); need_flush = 1; @@ -1328,6 +1332,8 @@ static int kvm_handle_hva_range(struct kvm *kvm, int (*handler)(struct kvm *kvm, unsigned long *rmapp, struct kvm_memory_slot *slot, + gfn_t gfn, + int level, unsigned long data)) { int j; @@ -1357,6 +1363,7 @@ static int kvm_handle_hva_range(struct kvm *kvm, j < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++j) { unsigned long idx, idx_end; unsigned long *rmapp; + gfn_t gfn = gfn_start; /* * {idx(page_j) | page_j intersects with @@ -1367,8 +1374,10 @@ static int kvm_handle_hva_range(struct kvm *kvm, rmapp = __gfn_to_rmap(gfn_start, j, memslot); - for (; idx <= idx_end; ++idx) - ret |= handler(kvm, rmapp++, memslot, data); + for (; idx <= idx_end; + ++idx, gfn += (1UL << KVM_HPAGE_GFN_SHIFT(j))) + ret |= handler(kvm, rmapp++, memslot, + gfn, j, data); } } @@ -1379,6 +1388,7 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, unsigned long data, int (*handler)(struct kvm *kvm, unsigned long *rmapp, struct kvm_memory_slot *slot, + gfn_t gfn, int level, unsigned long data)) { return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler); @@ -1400,7 +1410,8 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) } static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, - struct kvm_memory_slot *slot, unsigned long data) + struct kvm_memory_slot *slot, gfn_t gfn, int level, + unsigned long data) { u64 *sptep; struct rmap_iterator uninitialized_var(iter); @@ -1415,7 +1426,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, * out actively used pages or breaking up actively used hugepages. */ if (!shadow_accessed_mask) { - young = kvm_unmap_rmapp(kvm, rmapp, slot, data); + young = kvm_unmap_rmapp(kvm, rmapp, slot, gfn, level, data); goto out; } @@ -1430,13 +1441,13 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, } } out: - /* @data has hva passed to kvm_age_hva(). */ - trace_kvm_age_page(data, slot, young); + trace_kvm_age_page(gfn, level, slot, young); return young; } static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, - struct kvm_memory_slot *slot, unsigned long data) + struct kvm_memory_slot *slot, gfn_t gfn, + int level, unsigned long data) { u64 *sptep; struct rmap_iterator iter; @@ -1474,13 +1485,13 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); - kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, 0); + kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, gfn, sp->role.level, 0); kvm_flush_remote_tlbs(vcpu->kvm); } int kvm_age_hva(struct kvm *kvm, unsigned long hva) { - return kvm_handle_hva(kvm, hva, hva, kvm_age_rmapp); + return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp); } int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index ab679c395042..6edf1f2028cd 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -225,24 +225,26 @@ TRACE_EVENT(kvm_fpu, ); TRACE_EVENT(kvm_age_page, - TP_PROTO(ulong hva, struct kvm_memory_slot *slot, int ref), - TP_ARGS(hva, slot, ref), + TP_PROTO(ulong gfn, int level, struct kvm_memory_slot *slot, int ref), + TP_ARGS(gfn, level, slot, ref), TP_STRUCT__entry( __field( u64, hva ) __field( u64, gfn ) + __field( u8, level ) __field( u8, referenced ) ), TP_fast_assign( - __entry->hva = hva; - __entry->gfn = - slot->base_gfn + ((hva - slot->userspace_addr) >> PAGE_SHIFT); + __entry->gfn = gfn; + __entry->level = level; + __entry->hva = ((gfn - slot->base_gfn) << + PAGE_SHIFT) + slot->userspace_addr; __entry->referenced = ref; ), - TP_printk("hva %llx gfn %llx %s", - __entry->hva, __entry->gfn, + TP_printk("hva %llx gfn %llx level %u %s", + __entry->hva, __entry->gfn, __entry->level, __entry->referenced ? "YOUNG" : "OLD") ); -- cgit v1.2.3