From 34a75b0f63356097ae9f706d64a793934891002f Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 10 Aug 2016 11:27:27 +1000 Subject: KVM: PPC: Implement kvm_arch_intc_initialized() for PPC It doesn't make sense to create irqfds for a VM that doesn't have in-kernel interrupt controller emulation. There is an existing interface for architecture code to tell the irqfd code whether or not any interrupt controller has been initialized, called kvm_arch_intc_initialized(), so let's implement that for powerpc. Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_host.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index ec35af34a3fb..e36ce0cff766 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -43,6 +43,8 @@ #include #define KVM_MAX_VCPU_ID (threads_per_subcore * KVM_MAX_VCORES) +#define __KVM_HAVE_ARCH_INTC_INITIALIZED + #ifdef CONFIG_KVM_MMIO #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #endif -- cgit v1.2.3 From e64fb7e272885c1ea3cd2f68f267ae12fa04c8b1 Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Tue, 2 Aug 2016 14:03:19 +1000 Subject: KVM: PPC: Book3S HV: Move struct kvmppc_vcore from kvm_host.h to kvm_book3s.h The next commit will introduce a member to the kvmppc_vcore struct which references MAX_SMT_THREADS which is defined in kvm_book3s_asm.h, however this file isn't included in kvm_host.h directly. Thus compiling for certain platforms such as pmac32_defconfig and ppc64e_defconfig with KVM fails due to MAX_SMT_THREADS not being defined. Move the struct kvmppc_vcore definition to kvm_book3s.h which explicitly includes kvm_book3s_asm.h. Signed-off-by: Suraj Jitindar Singh Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_book3s.h | 35 +++++++++++++++++++++++++++++++++++ arch/powerpc/include/asm/kvm_host.h | 35 ----------------------------------- 2 files changed, 35 insertions(+), 35 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 8f39796c9da8..a50c5fec9790 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -69,6 +69,41 @@ struct hpte_cache { int pagesize; }; +/* + * Struct for a virtual core. + * Note: entry_exit_map combines a bitmap of threads that have entered + * in the bottom 8 bits and a bitmap of threads that have exited in the + * next 8 bits. This is so that we can atomically set the entry bit + * iff the exit map is 0 without taking a lock. + */ +struct kvmppc_vcore { + int n_runnable; + int num_threads; + int entry_exit_map; + int napping_threads; + int first_vcpuid; + u16 pcpu; + u16 last_cpu; + u8 vcore_state; + u8 in_guest; + struct kvmppc_vcore *master_vcore; + struct list_head runnable_threads; + struct list_head preempt_list; + spinlock_t lock; + struct swait_queue_head wq; + spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */ + u64 stolen_tb; + u64 preempt_tb; + struct kvm_vcpu *runner; + struct kvm *kvm; + u64 tb_offset; /* guest timebase - host timebase */ + ulong lpcr; + u32 arch_compat; + ulong pcr; + ulong dpdes; /* doorbell state (POWER8) */ + ulong conferring_threads; +}; + struct kvmppc_vcpu_book3s { struct kvmppc_sid_map sid_map[SID_MAP_NUM]; struct { diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index e36ce0cff766..7ff9919916c3 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -277,41 +277,6 @@ struct kvm_arch { #endif }; -/* - * Struct for a virtual core. - * Note: entry_exit_map combines a bitmap of threads that have entered - * in the bottom 8 bits and a bitmap of threads that have exited in the - * next 8 bits. This is so that we can atomically set the entry bit - * iff the exit map is 0 without taking a lock. - */ -struct kvmppc_vcore { - int n_runnable; - int num_threads; - int entry_exit_map; - int napping_threads; - int first_vcpuid; - u16 pcpu; - u16 last_cpu; - u8 vcore_state; - u8 in_guest; - struct kvmppc_vcore *master_vcore; - struct list_head runnable_threads; - struct list_head preempt_list; - spinlock_t lock; - struct swait_queue_head wq; - spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */ - u64 stolen_tb; - u64 preempt_tb; - struct kvm_vcpu *runner; - struct kvm *kvm; - u64 tb_offset; /* guest timebase - host timebase */ - ulong lpcr; - u32 arch_compat; - ulong pcr; - ulong dpdes; /* doorbell state (POWER8) */ - ulong conferring_threads; -}; - #define VCORE_ENTRY_MAP(vc) ((vc)->entry_exit_map & 0xff) #define VCORE_EXIT_MAP(vc) ((vc)->entry_exit_map >> 8) #define VCORE_IS_EXITING(vc) (VCORE_EXIT_MAP(vc) != 0) -- cgit v1.2.3 From 7b5f8272c792d49da73d98e9ca32f4cbb6d53808 Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Tue, 2 Aug 2016 14:03:20 +1000 Subject: KVM: PPC: Book3S HV: Change vcore element runnable_threads from linked-list to array The struct kvmppc_vcore is a structure used to store various information about a virtual core for a kvm guest. The runnable_threads element of the struct provides a list of all of the currently runnable vcpus on the core (those in the KVMPPC_VCPU_RUNNABLE state). The previous implementation of this list was a linked_list. The next patch requires that the list be able to be iterated over without holding the vcore lock. Reimplement the runnable_threads list in the kvmppc_vcore struct as an array. Implement function to iterate over valid entries in the array and update access sites accordingly. Signed-off-by: Suraj Jitindar Singh Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_book3s.h | 2 +- arch/powerpc/include/asm/kvm_host.h | 1 - arch/powerpc/kvm/book3s_hv.c | 68 +++++++++++++++++++++-------------- 3 files changed, 43 insertions(+), 28 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index a50c5fec9790..151f8173e596 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -87,7 +87,7 @@ struct kvmppc_vcore { u8 vcore_state; u8 in_guest; struct kvmppc_vcore *master_vcore; - struct list_head runnable_threads; + struct kvm_vcpu *runnable_threads[MAX_SMT_THREADS]; struct list_head preempt_list; spinlock_t lock; struct swait_queue_head wq; diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 7ff9919916c3..851575a0c328 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -635,7 +635,6 @@ struct kvm_vcpu_arch { long pgfault_index; unsigned long pgfault_hpte[2]; - struct list_head run_list; struct task_struct *run_task; struct kvm_run *kvm_run; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 2fd5580c8f6e..ebbab1b2206c 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -58,6 +58,7 @@ #include #include #include +#include #include "book3s.h" @@ -97,6 +98,26 @@ MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core"); static void kvmppc_end_cede(struct kvm_vcpu *vcpu); static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); +static inline struct kvm_vcpu *next_runnable_thread(struct kvmppc_vcore *vc, + int *ip) +{ + int i = *ip; + struct kvm_vcpu *vcpu; + + while (++i < MAX_SMT_THREADS) { + vcpu = READ_ONCE(vc->runnable_threads[i]); + if (vcpu) { + *ip = i; + return vcpu; + } + } + return NULL; +} + +/* Used to traverse the list of runnable threads for a given vcore */ +#define for_each_runnable_thread(i, vcpu, vc) \ + for (i = -1; (vcpu = next_runnable_thread(vc, &i)); ) + static bool kvmppc_ipi_thread(int cpu) { /* On POWER8 for IPIs to threads in the same core, use msgsnd */ @@ -1493,7 +1514,6 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core) if (vcore == NULL) return NULL; - INIT_LIST_HEAD(&vcore->runnable_threads); spin_lock_init(&vcore->lock); spin_lock_init(&vcore->stoltb_lock); init_swait_queue_head(&vcore->wq); @@ -1802,7 +1822,7 @@ static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; spin_unlock_irq(&vcpu->arch.tbacct_lock); --vc->n_runnable; - list_del(&vcpu->arch.run_list); + WRITE_ONCE(vc->runnable_threads[vcpu->arch.ptid], NULL); } static int kvmppc_grab_hwthread(int cpu) @@ -2209,10 +2229,10 @@ static bool can_piggyback(struct kvmppc_vcore *pvc, struct core_info *cip, static void prepare_threads(struct kvmppc_vcore *vc) { - struct kvm_vcpu *vcpu, *vnext; + int i; + struct kvm_vcpu *vcpu; - list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads, - arch.run_list) { + for_each_runnable_thread(i, vcpu, vc) { if (signal_pending(vcpu->arch.run_task)) vcpu->arch.ret = -EINTR; else if (vcpu->arch.vpa.update_pending || @@ -2259,15 +2279,14 @@ static void collect_piggybacks(struct core_info *cip, int target_threads) static void post_guest_process(struct kvmppc_vcore *vc, bool is_master) { - int still_running = 0; + int still_running = 0, i; u64 now; long ret; - struct kvm_vcpu *vcpu, *vnext; + struct kvm_vcpu *vcpu; spin_lock(&vc->lock); now = get_tb(); - list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads, - arch.run_list) { + for_each_runnable_thread(i, vcpu, vc) { /* cancel pending dec exception if dec is positive */ if (now < vcpu->arch.dec_expires && kvmppc_core_pending_dec(vcpu)) @@ -2307,8 +2326,8 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master) } if (vc->n_runnable > 0 && vc->runner == NULL) { /* make sure there's a candidate runner awake */ - vcpu = list_first_entry(&vc->runnable_threads, - struct kvm_vcpu, arch.run_list); + i = -1; + vcpu = next_runnable_thread(vc, &i); wake_up(&vcpu->arch.cpu_run); } } @@ -2361,7 +2380,7 @@ static inline void kvmppc_set_host_core(int cpu) */ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) { - struct kvm_vcpu *vcpu, *vnext; + struct kvm_vcpu *vcpu; int i; int srcu_idx; struct core_info core_info; @@ -2397,8 +2416,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) */ if ((threads_per_core > 1) && ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) { - list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads, - arch.run_list) { + for_each_runnable_thread(i, vcpu, vc) { vcpu->arch.ret = -EBUSY; kvmppc_remove_runnable(vc, vcpu); wake_up(&vcpu->arch.cpu_run); @@ -2477,8 +2495,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) active |= 1 << thr; list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list) { pvc->pcpu = pcpu + thr; - list_for_each_entry(vcpu, &pvc->runnable_threads, - arch.run_list) { + for_each_runnable_thread(i, vcpu, pvc) { kvmppc_start_thread(vcpu, pvc); kvmppc_create_dtl_entry(vcpu, pvc); trace_kvm_guest_enter(vcpu); @@ -2611,7 +2628,7 @@ static void kvmppc_wait_for_exec(struct kvmppc_vcore *vc, static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) { struct kvm_vcpu *vcpu; - int do_sleep = 1; + int do_sleep = 1, i; DECLARE_SWAITQUEUE(wait); prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE); @@ -2620,7 +2637,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) * Check one last time for pending exceptions and ceded state after * we put ourselves on the wait queue */ - list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { + for_each_runnable_thread(i, vcpu, vc) { if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded) { do_sleep = 0; break; @@ -2644,9 +2661,9 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) { - int n_ceded; + int n_ceded, i; struct kvmppc_vcore *vc; - struct kvm_vcpu *v, *vn; + struct kvm_vcpu *v; trace_kvmppc_run_vcpu_enter(vcpu); @@ -2666,7 +2683,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb()); vcpu->arch.state = KVMPPC_VCPU_RUNNABLE; vcpu->arch.busy_preempt = TB_NIL; - list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads); + WRITE_ONCE(vc->runnable_threads[vcpu->arch.ptid], vcpu); ++vc->n_runnable; /* @@ -2706,8 +2723,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) kvmppc_wait_for_exec(vc, vcpu, TASK_INTERRUPTIBLE); continue; } - list_for_each_entry_safe(v, vn, &vc->runnable_threads, - arch.run_list) { + for_each_runnable_thread(i, v, vc) { kvmppc_core_prepare_to_enter(v); if (signal_pending(v->arch.run_task)) { kvmppc_remove_runnable(vc, v); @@ -2720,7 +2736,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) if (!vc->n_runnable || vcpu->arch.state != KVMPPC_VCPU_RUNNABLE) break; n_ceded = 0; - list_for_each_entry(v, &vc->runnable_threads, arch.run_list) { + for_each_runnable_thread(i, v, vc) { if (!v->arch.pending_exceptions) n_ceded += v->arch.ceded; else @@ -2759,8 +2775,8 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) if (vc->n_runnable && vc->vcore_state == VCORE_INACTIVE) { /* Wake up some vcpu to run the core */ - v = list_first_entry(&vc->runnable_threads, - struct kvm_vcpu, arch.run_list); + i = -1; + v = next_runnable_thread(vc, &i); wake_up(&v->arch.cpu_run); } -- cgit v1.2.3 From 0cda69dd7cd64fdd54bdf584b5d6ba53767ba422 Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Tue, 2 Aug 2016 14:03:21 +1000 Subject: KVM: PPC: Book3S HV: Implement halt polling This patch introduces new halt polling functionality into the kvm_hv kernel module. When a vcore is idle it will poll for some period of time before scheduling itself out. When all of the runnable vcpus on a vcore have ceded (and thus the vcore is idle) we schedule ourselves out to allow something else to run. In the event that we need to wake up very quickly (for example an interrupt arrives), we are required to wait until we get scheduled again. Implement halt polling so that when a vcore is idle, and before scheduling ourselves, we poll for vcpus in the runnable_threads list which have pending exceptions or which leave the ceded state. If we poll successfully then we can get back into the guest very quickly without ever scheduling ourselves, otherwise we schedule ourselves out as before. There exists generic halt_polling code in virt/kvm_main.c, however on powerpc the polling conditions are different to the generic case. It would be nice if we could just implement an arch specific kvm_check_block() function, but there is still other arch specific things which need to be done for kvm_hv (for example manipulating vcore states) which means that a separate implementation is the best option. Testing of this patch with a TCP round robin test between two guests with virtio network interfaces has found a decrease in round trip time of ~15us on average. A performance gain is only seen when going out of and back into the guest often and quickly, otherwise there is no net benefit from the polling. The polling interval is adjusted such that when we are often scheduled out for long periods of time it is reduced, and when we often poll successfully it is increased. The rate at which the polling interval increases or decreases, and the maximum polling interval, can be set through module parameters. Based on the implementation in the generic kvm module by Wanpeng Li and Paolo Bonzini, and on direction from Paul Mackerras. Signed-off-by: Suraj Jitindar Singh Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_book3s.h | 1 + arch/powerpc/include/asm/kvm_host.h | 1 + arch/powerpc/kvm/book3s_hv.c | 116 ++++++++++++++++++++++++++++++---- arch/powerpc/kvm/trace_hv.h | 22 +++++++ 4 files changed, 126 insertions(+), 14 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 151f8173e596..c261f52f6a55 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -102,6 +102,7 @@ struct kvmppc_vcore { ulong pcr; ulong dpdes; /* doorbell state (POWER8) */ ulong conferring_threads; + unsigned int halt_poll_ns; }; struct kvmppc_vcpu_book3s { diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 851575a0c328..6ece4a854a59 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -296,6 +296,7 @@ struct kvm_arch { #define VCORE_SLEEPING 3 #define VCORE_RUNNING 4 #define VCORE_EXITING 5 +#define VCORE_POLLING 6 /* * Struct used to manage memory for a virtual processor area diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index ebbab1b2206c..3c85c3b28fc5 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -95,6 +95,23 @@ module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect, MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core"); #endif +/* Maximum halt poll interval defaults to KVM_HALT_POLL_NS_DEFAULT */ +static unsigned int halt_poll_max_ns = KVM_HALT_POLL_NS_DEFAULT; +module_param(halt_poll_max_ns, uint, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(halt_poll_max_ns, "Maximum halt poll time in ns"); + +/* Factor by which the vcore halt poll interval is grown, default is to double + */ +static unsigned int halt_poll_ns_grow = 2; +module_param(halt_poll_ns_grow, int, S_IRUGO); +MODULE_PARM_DESC(halt_poll_ns_grow, "Factor halt poll time is grown by"); + +/* Factor by which the vcore halt poll interval is shrunk, default is to reset + */ +static unsigned int halt_poll_ns_shrink; +module_param(halt_poll_ns_shrink, int, S_IRUGO); +MODULE_PARM_DESC(halt_poll_ns_shrink, "Factor halt poll time is shrunk by"); + static void kvmppc_end_cede(struct kvm_vcpu *vcpu); static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); @@ -2621,32 +2638,82 @@ static void kvmppc_wait_for_exec(struct kvmppc_vcore *vc, finish_wait(&vcpu->arch.cpu_run, &wait); } +static void grow_halt_poll_ns(struct kvmppc_vcore *vc) +{ + /* 10us base */ + if (vc->halt_poll_ns == 0 && halt_poll_ns_grow) + vc->halt_poll_ns = 10000; + else + vc->halt_poll_ns *= halt_poll_ns_grow; + + if (vc->halt_poll_ns > halt_poll_max_ns) + vc->halt_poll_ns = halt_poll_max_ns; +} + +static void shrink_halt_poll_ns(struct kvmppc_vcore *vc) +{ + if (halt_poll_ns_shrink == 0) + vc->halt_poll_ns = 0; + else + vc->halt_poll_ns /= halt_poll_ns_shrink; +} + +/* Check to see if any of the runnable vcpus on the vcore have pending + * exceptions or are no longer ceded + */ +static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc) +{ + struct kvm_vcpu *vcpu; + int i; + + for_each_runnable_thread(i, vcpu, vc) { + if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded) + return 1; + } + + return 0; +} + /* * All the vcpus in this vcore are idle, so wait for a decrementer * or external interrupt to one of the vcpus. vc->lock is held. */ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) { - struct kvm_vcpu *vcpu; - int do_sleep = 1, i; + int do_sleep = 1; + ktime_t cur, start; + u64 block_ns; DECLARE_SWAITQUEUE(wait); - prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE); + /* Poll for pending exceptions and ceded state */ + cur = start = ktime_get(); + if (vc->halt_poll_ns) { + ktime_t stop = ktime_add_ns(start, vc->halt_poll_ns); - /* - * Check one last time for pending exceptions and ceded state after - * we put ourselves on the wait queue - */ - for_each_runnable_thread(i, vcpu, vc) { - if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded) { - do_sleep = 0; - break; - } + vc->vcore_state = VCORE_POLLING; + spin_unlock(&vc->lock); + + do { + if (kvmppc_vcore_check_block(vc)) { + do_sleep = 0; + break; + } + cur = ktime_get(); + } while (single_task_running() && ktime_before(cur, stop)); + + spin_lock(&vc->lock); + vc->vcore_state = VCORE_INACTIVE; + + if (!do_sleep) + goto out; } - if (!do_sleep) { + prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE); + + if (kvmppc_vcore_check_block(vc)) { finish_swait(&vc->wq, &wait); - return; + do_sleep = 0; + goto out; } vc->vcore_state = VCORE_SLEEPING; @@ -2657,6 +2724,27 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) spin_lock(&vc->lock); vc->vcore_state = VCORE_INACTIVE; trace_kvmppc_vcore_blocked(vc, 1); + + cur = ktime_get(); + +out: + block_ns = ktime_to_ns(cur) - ktime_to_ns(start); + + /* Adjust poll time */ + if (halt_poll_max_ns) { + if (block_ns <= vc->halt_poll_ns) + ; + /* We slept and blocked for longer than the max halt time */ + else if (vc->halt_poll_ns && block_ns > halt_poll_max_ns) + shrink_halt_poll_ns(vc); + /* We slept and our poll time is too small */ + else if (vc->halt_poll_ns < halt_poll_max_ns && + block_ns < halt_poll_max_ns) + grow_halt_poll_ns(vc); + } else + vc->halt_poll_ns = 0; + + trace_kvmppc_vcore_wakeup(do_sleep, block_ns); } static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h index 33d9daff5783..fb21990c0fb4 100644 --- a/arch/powerpc/kvm/trace_hv.h +++ b/arch/powerpc/kvm/trace_hv.h @@ -432,6 +432,28 @@ TRACE_EVENT(kvmppc_vcore_blocked, __entry->runner_vcpu, __entry->n_runnable, __entry->tgid) ); +TRACE_EVENT(kvmppc_vcore_wakeup, + TP_PROTO(int do_sleep, __u64 ns), + + TP_ARGS(do_sleep, ns), + + TP_STRUCT__entry( + __field(__u64, ns) + __field(int, waited) + __field(pid_t, tgid) + ), + + TP_fast_assign( + __entry->ns = ns; + __entry->waited = do_sleep; + __entry->tgid = current->tgid; + ), + + TP_printk("%s time %lld ns, tgid=%d", + __entry->waited ? "wait" : "poll", + __entry->ns, __entry->tgid) +); + TRACE_EVENT(kvmppc_run_vcpu_enter, TP_PROTO(struct kvm_vcpu *vcpu), -- cgit v1.2.3 From 8a7e75d47b68193339f8727cf4503271d0a0b1d0 Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Tue, 2 Aug 2016 14:03:22 +1000 Subject: KVM: Add provisioning for ulong vm stats and u64 vcpu stats vms and vcpus have statistics associated with them which can be viewed within the debugfs. Currently it is assumed within the vcpu_stat_get() and vm_stat_get() functions that all of these statistics are represented as u32s, however the next patch adds some u64 vcpu statistics. Change all vcpu statistics to u64 and modify vcpu_stat_get() accordingly. Since vcpu statistics are per vcpu, they will only be updated by a single vcpu at a time so this shouldn't present a problem on 32-bit machines which can't atomically increment 64-bit numbers. However vm statistics could potentially be updated by multiple vcpus from that vm at a time. To avoid the overhead of atomics make all vm statistics ulong such that they are 64-bit on 64-bit systems where they can be atomically incremented and are 32-bit on 32-bit systems which may not be able to atomically increment 64-bit numbers. Modify vm_stat_get() to expect ulongs. Signed-off-by: Suraj Jitindar Singh Reviewed-by: David Matlack Acked-by: Christian Borntraeger Signed-off-by: Paul Mackerras --- arch/arm/include/asm/kvm_host.h | 12 ++-- arch/arm64/include/asm/kvm_host.h | 12 ++-- arch/mips/include/asm/kvm_host.h | 46 ++++++------- arch/powerpc/include/asm/kvm_host.h | 60 ++++++++-------- arch/s390/include/asm/kvm_host.h | 134 ++++++++++++++++++------------------ arch/x86/include/asm/kvm_host.h | 72 +++++++++---------- virt/kvm/kvm_main.c | 4 +- 7 files changed, 170 insertions(+), 170 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index de338d93d11b..6ad21f04a922 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -183,15 +183,15 @@ struct kvm_vcpu_arch { }; struct kvm_vm_stat { - u32 remote_tlb_flush; + ulong remote_tlb_flush; }; struct kvm_vcpu_stat { - u32 halt_successful_poll; - u32 halt_attempted_poll; - u32 halt_poll_invalid; - u32 halt_wakeup; - u32 hvc_exit_stat; + u64 halt_successful_poll; + u64 halt_attempted_poll; + u64 halt_poll_invalid; + u64 halt_wakeup; + u64 hvc_exit_stat; u64 wfe_exit_stat; u64 wfi_exit_stat; u64 mmio_exit_user; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 3eda975837d0..bd94e6766759 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -290,15 +290,15 @@ struct kvm_vcpu_arch { #endif struct kvm_vm_stat { - u32 remote_tlb_flush; + ulong remote_tlb_flush; }; struct kvm_vcpu_stat { - u32 halt_successful_poll; - u32 halt_attempted_poll; - u32 halt_poll_invalid; - u32 halt_wakeup; - u32 hvc_exit_stat; + u64 halt_successful_poll; + u64 halt_attempted_poll; + u64 halt_poll_invalid; + u64 halt_wakeup; + u64 hvc_exit_stat; u64 wfe_exit_stat; u64 wfi_exit_stat; u64 mmio_exit_user; diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index b54bcadd8aec..5f488dc8a7d5 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -110,32 +110,32 @@ extern atomic_t kvm_mips_instance; struct kvm_vm_stat { - u32 remote_tlb_flush; + ulong remote_tlb_flush; }; struct kvm_vcpu_stat { - u32 wait_exits; - u32 cache_exits; - u32 signal_exits; - u32 int_exits; - u32 cop_unusable_exits; - u32 tlbmod_exits; - u32 tlbmiss_ld_exits; - u32 tlbmiss_st_exits; - u32 addrerr_st_exits; - u32 addrerr_ld_exits; - u32 syscall_exits; - u32 resvd_inst_exits; - u32 break_inst_exits; - u32 trap_inst_exits; - u32 msa_fpe_exits; - u32 fpe_exits; - u32 msa_disabled_exits; - u32 flush_dcache_exits; - u32 halt_successful_poll; - u32 halt_attempted_poll; - u32 halt_poll_invalid; - u32 halt_wakeup; + u64 wait_exits; + u64 cache_exits; + u64 signal_exits; + u64 int_exits; + u64 cop_unusable_exits; + u64 tlbmod_exits; + u64 tlbmiss_ld_exits; + u64 tlbmiss_st_exits; + u64 addrerr_st_exits; + u64 addrerr_ld_exits; + u64 syscall_exits; + u64 resvd_inst_exits; + u64 break_inst_exits; + u64 trap_inst_exits; + u64 msa_fpe_exits; + u64 fpe_exits; + u64 msa_disabled_exits; + u64 flush_dcache_exits; + u64 halt_successful_poll; + u64 halt_attempted_poll; + u64 halt_poll_invalid; + u64 halt_wakeup; }; struct kvm_arch_memory_slot { diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 6ece4a854a59..5ec1dcf300f6 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -97,41 +97,41 @@ struct kvmppc_vcpu_book3s; struct kvmppc_book3s_shadow_vcpu; struct kvm_vm_stat { - u32 remote_tlb_flush; + ulong remote_tlb_flush; }; struct kvm_vcpu_stat { - u32 sum_exits; - u32 mmio_exits; - u32 signal_exits; - u32 light_exits; + u64 sum_exits; + u64 mmio_exits; + u64 signal_exits; + u64 light_exits; /* Account for special types of light exits: */ - u32 itlb_real_miss_exits; - u32 itlb_virt_miss_exits; - u32 dtlb_real_miss_exits; - u32 dtlb_virt_miss_exits; - u32 syscall_exits; - u32 isi_exits; - u32 dsi_exits; - u32 emulated_inst_exits; - u32 dec_exits; - u32 ext_intr_exits; - u32 halt_successful_poll; - u32 halt_attempted_poll; - u32 halt_poll_invalid; - u32 halt_wakeup; - u32 dbell_exits; - u32 gdbell_exits; - u32 ld; - u32 st; + u64 itlb_real_miss_exits; + u64 itlb_virt_miss_exits; + u64 dtlb_real_miss_exits; + u64 dtlb_virt_miss_exits; + u64 syscall_exits; + u64 isi_exits; + u64 dsi_exits; + u64 emulated_inst_exits; + u64 dec_exits; + u64 ext_intr_exits; + u64 halt_successful_poll; + u64 halt_attempted_poll; + u64 halt_poll_invalid; + u64 halt_wakeup; + u64 dbell_exits; + u64 gdbell_exits; + u64 ld; + u64 st; #ifdef CONFIG_PPC_BOOK3S - u32 pf_storage; - u32 pf_instruc; - u32 sp_storage; - u32 sp_instruc; - u32 queue_intr; - u32 ld_slow; - u32 st_slow; + u64 pf_storage; + u64 pf_instruc; + u64 sp_storage; + u64 sp_instruc; + u64 queue_intr; + u64 ld_slow; + u64 st_slow; #endif }; diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 8e5daf7a76ce..2bbe675c96b9 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -245,72 +245,72 @@ struct sie_page { } __packed; struct kvm_vcpu_stat { - u32 exit_userspace; - u32 exit_null; - u32 exit_external_request; - u32 exit_external_interrupt; - u32 exit_stop_request; - u32 exit_validity; - u32 exit_instruction; - u32 exit_pei; - u32 halt_successful_poll; - u32 halt_attempted_poll; - u32 halt_poll_invalid; - u32 halt_wakeup; - u32 instruction_lctl; - u32 instruction_lctlg; - u32 instruction_stctl; - u32 instruction_stctg; - u32 exit_program_interruption; - u32 exit_instr_and_program; - u32 exit_operation_exception; - u32 deliver_external_call; - u32 deliver_emergency_signal; - u32 deliver_service_signal; - u32 deliver_virtio_interrupt; - u32 deliver_stop_signal; - u32 deliver_prefix_signal; - u32 deliver_restart_signal; - u32 deliver_program_int; - u32 deliver_io_int; - u32 exit_wait_state; - u32 instruction_pfmf; - u32 instruction_stidp; - u32 instruction_spx; - u32 instruction_stpx; - u32 instruction_stap; - u32 instruction_storage_key; - u32 instruction_ipte_interlock; - u32 instruction_stsch; - u32 instruction_chsc; - u32 instruction_stsi; - u32 instruction_stfl; - u32 instruction_tprot; - u32 instruction_sie; - u32 instruction_essa; - u32 instruction_sthyi; - u32 instruction_sigp_sense; - u32 instruction_sigp_sense_running; - u32 instruction_sigp_external_call; - u32 instruction_sigp_emergency; - u32 instruction_sigp_cond_emergency; - u32 instruction_sigp_start; - u32 instruction_sigp_stop; - u32 instruction_sigp_stop_store_status; - u32 instruction_sigp_store_status; - u32 instruction_sigp_store_adtl_status; - u32 instruction_sigp_arch; - u32 instruction_sigp_prefix; - u32 instruction_sigp_restart; - u32 instruction_sigp_init_cpu_reset; - u32 instruction_sigp_cpu_reset; - u32 instruction_sigp_unknown; - u32 diagnose_10; - u32 diagnose_44; - u32 diagnose_9c; - u32 diagnose_258; - u32 diagnose_308; - u32 diagnose_500; + u64 exit_userspace; + u64 exit_null; + u64 exit_external_request; + u64 exit_external_interrupt; + u64 exit_stop_request; + u64 exit_validity; + u64 exit_instruction; + u64 exit_pei; + u64 halt_successful_poll; + u64 halt_attempted_poll; + u64 halt_poll_invalid; + u64 halt_wakeup; + u64 instruction_lctl; + u64 instruction_lctlg; + u64 instruction_stctl; + u64 instruction_stctg; + u64 exit_program_interruption; + u64 exit_instr_and_program; + u64 exit_operation_exception; + u64 deliver_external_call; + u64 deliver_emergency_signal; + u64 deliver_service_signal; + u64 deliver_virtio_interrupt; + u64 deliver_stop_signal; + u64 deliver_prefix_signal; + u64 deliver_restart_signal; + u64 deliver_program_int; + u64 deliver_io_int; + u64 exit_wait_state; + u64 instruction_pfmf; + u64 instruction_stidp; + u64 instruction_spx; + u64 instruction_stpx; + u64 instruction_stap; + u64 instruction_storage_key; + u64 instruction_ipte_interlock; + u64 instruction_stsch; + u64 instruction_chsc; + u64 instruction_stsi; + u64 instruction_stfl; + u64 instruction_tprot; + u64 instruction_sie; + u64 instruction_essa; + u64 instruction_sthyi; + u64 instruction_sigp_sense; + u64 instruction_sigp_sense_running; + u64 instruction_sigp_external_call; + u64 instruction_sigp_emergency; + u64 instruction_sigp_cond_emergency; + u64 instruction_sigp_start; + u64 instruction_sigp_stop; + u64 instruction_sigp_stop_store_status; + u64 instruction_sigp_store_status; + u64 instruction_sigp_store_adtl_status; + u64 instruction_sigp_arch; + u64 instruction_sigp_prefix; + u64 instruction_sigp_restart; + u64 instruction_sigp_init_cpu_reset; + u64 instruction_sigp_cpu_reset; + u64 instruction_sigp_unknown; + u64 diagnose_10; + u64 diagnose_44; + u64 diagnose_9c; + u64 diagnose_258; + u64 diagnose_308; + u64 diagnose_500; }; #define PGM_OPERATION 0x01 @@ -577,7 +577,7 @@ struct kvm_vcpu_arch { }; struct kvm_vm_stat { - u32 remote_tlb_flush; + ulong remote_tlb_flush; }; struct kvm_arch_memory_slot { diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 33ae3a4d0159..67c8f5268af5 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -790,45 +790,45 @@ struct kvm_arch { }; struct kvm_vm_stat { - u32 mmu_shadow_zapped; - u32 mmu_pte_write; - u32 mmu_pte_updated; - u32 mmu_pde_zapped; - u32 mmu_flooded; - u32 mmu_recycled; - u32 mmu_cache_miss; - u32 mmu_unsync; - u32 remote_tlb_flush; - u32 lpages; + ulong mmu_shadow_zapped; + ulong mmu_pte_write; + ulong mmu_pte_updated; + ulong mmu_pde_zapped; + ulong mmu_flooded; + ulong mmu_recycled; + ulong mmu_cache_miss; + ulong mmu_unsync; + ulong remote_tlb_flush; + ulong lpages; }; struct kvm_vcpu_stat { - u32 pf_fixed; - u32 pf_guest; - u32 tlb_flush; - u32 invlpg; - - u32 exits; - u32 io_exits; - u32 mmio_exits; - u32 signal_exits; - u32 irq_window_exits; - u32 nmi_window_exits; - u32 halt_exits; - u32 halt_successful_poll; - u32 halt_attempted_poll; - u32 halt_poll_invalid; - u32 halt_wakeup; - u32 request_irq_exits; - u32 irq_exits; - u32 host_state_reload; - u32 efer_reload; - u32 fpu_reload; - u32 insn_emulation; - u32 insn_emulation_fail; - u32 hypercalls; - u32 irq_injections; - u32 nmi_injections; + u64 pf_fixed; + u64 pf_guest; + u64 tlb_flush; + u64 invlpg; + + u64 exits; + u64 io_exits; + u64 mmio_exits; + u64 signal_exits; + u64 irq_window_exits; + u64 nmi_window_exits; + u64 halt_exits; + u64 halt_successful_poll; + u64 halt_attempted_poll; + u64 halt_poll_invalid; + u64 halt_wakeup; + u64 request_irq_exits; + u64 irq_exits; + u64 host_state_reload; + u64 efer_reload; + u64 fpu_reload; + u64 insn_emulation; + u64 insn_emulation_fail; + u64 hypercalls; + u64 irq_injections; + u64 nmi_injections; }; struct x86_instruction_info; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 195078225aa5..4171ef326543 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3619,7 +3619,7 @@ static int vm_stat_get_per_vm(void *data, u64 *val) { struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data; - *val = *(u32 *)((void *)stat_data->kvm + stat_data->offset); + *val = *(ulong *)((void *)stat_data->kvm + stat_data->offset); return 0; } @@ -3649,7 +3649,7 @@ static int vcpu_stat_get_per_vm(void *data, u64 *val) *val = 0; kvm_for_each_vcpu(i, vcpu, stat_data->kvm) - *val += *(u32 *)((void *)vcpu + stat_data->offset); + *val += *(u64 *)((void *)vcpu + stat_data->offset); return 0; } -- cgit v1.2.3 From 2a27f514a47d39c50aaa5c07831ab35178955d47 Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Tue, 2 Aug 2016 14:03:23 +1000 Subject: KVM: PPC: Implement existing and add new halt polling vcpu stats vcpu stats are used to collect information about a vcpu which can be viewed in the debugfs. For example halt_attempted_poll and halt_successful_poll are used to keep track of the number of times the vcpu attempts to and successfully polls. These stats are currently not used on powerpc. Implement incrementation of the halt_attempted_poll and halt_successful_poll vcpu stats for powerpc. Since these stats are summed over all the vcpus for all running guests it doesn't matter which vcpu they are attributed to, thus we choose the current runner vcpu of the vcore. Also add new vcpu stats: halt_poll_success_ns, halt_poll_fail_ns and halt_wait_ns to be used to accumulate the total time spend polling successfully, polling unsuccessfully and waiting respectively, and halt_successful_wait to accumulate the number of times the vcpu waits. Given that halt_poll_success_ns, halt_poll_fail_ns and halt_wait_ns are expressed in nanoseconds it is necessary to represent these as 64-bit quantities, otherwise they would overflow after only about 4 seconds. Given that the total time spend either polling or waiting will be known and the number of times that each was done, it will be possible to determine the average poll and wait times. This will give the ability to tune the kvm module parameters based on the calculated average wait and poll times. Signed-off-by: Suraj Jitindar Singh Reviewed-by: David Matlack Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_host.h | 4 ++++ arch/powerpc/kvm/book3s.c | 4 ++++ arch/powerpc/kvm/book3s_hv.c | 36 +++++++++++++++++++++++++++++++----- 3 files changed, 39 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 5ec1dcf300f6..373003f4551d 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -116,8 +116,12 @@ struct kvm_vcpu_stat { u64 emulated_inst_exits; u64 dec_exits; u64 ext_intr_exits; + u64 halt_poll_success_ns; + u64 halt_poll_fail_ns; + u64 halt_wait_ns; u64 halt_successful_poll; u64 halt_attempted_poll; + u64 halt_successful_wait; u64 halt_poll_invalid; u64 halt_wakeup; u64 dbell_exits; diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 47018fcbf7d6..71eb8f3d3b54 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -52,8 +52,12 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "dec", VCPU_STAT(dec_exits) }, { "ext_intr", VCPU_STAT(ext_intr_exits) }, { "queue_intr", VCPU_STAT(queue_intr) }, + { "halt_poll_success_ns", VCPU_STAT(halt_poll_success_ns) }, + { "halt_poll_fail_ns", VCPU_STAT(halt_poll_fail_ns) }, + { "halt_wait_ns", VCPU_STAT(halt_wait_ns) }, { "halt_successful_poll", VCPU_STAT(halt_successful_poll), }, { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll), }, + { "halt_successful_wait", VCPU_STAT(halt_successful_wait) }, { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) }, { "halt_wakeup", VCPU_STAT(halt_wakeup) }, { "pf_storage", VCPU_STAT(pf_storage) }, diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 3c85c3b28fc5..30ff8ab5aba1 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2680,15 +2680,16 @@ static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc) */ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) { + ktime_t cur, start_poll, start_wait; int do_sleep = 1; - ktime_t cur, start; u64 block_ns; DECLARE_SWAITQUEUE(wait); /* Poll for pending exceptions and ceded state */ - cur = start = ktime_get(); + cur = start_poll = ktime_get(); if (vc->halt_poll_ns) { - ktime_t stop = ktime_add_ns(start, vc->halt_poll_ns); + ktime_t stop = ktime_add_ns(start_poll, vc->halt_poll_ns); + ++vc->runner->stat.halt_attempted_poll; vc->vcore_state = VCORE_POLLING; spin_unlock(&vc->lock); @@ -2704,8 +2705,10 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) spin_lock(&vc->lock); vc->vcore_state = VCORE_INACTIVE; - if (!do_sleep) + if (!do_sleep) { + ++vc->runner->stat.halt_successful_poll; goto out; + } } prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE); @@ -2713,9 +2716,14 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) if (kvmppc_vcore_check_block(vc)) { finish_swait(&vc->wq, &wait); do_sleep = 0; + /* If we polled, count this as a successful poll */ + if (vc->halt_poll_ns) + ++vc->runner->stat.halt_successful_poll; goto out; } + start_wait = ktime_get(); + vc->vcore_state = VCORE_SLEEPING; trace_kvmppc_vcore_blocked(vc, 0); spin_unlock(&vc->lock); @@ -2724,11 +2732,29 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) spin_lock(&vc->lock); vc->vcore_state = VCORE_INACTIVE; trace_kvmppc_vcore_blocked(vc, 1); + ++vc->runner->stat.halt_successful_wait; cur = ktime_get(); out: - block_ns = ktime_to_ns(cur) - ktime_to_ns(start); + block_ns = ktime_to_ns(cur) - ktime_to_ns(start_poll); + + /* Attribute wait time */ + if (do_sleep) { + vc->runner->stat.halt_wait_ns += + ktime_to_ns(cur) - ktime_to_ns(start_wait); + /* Attribute failed poll time */ + if (vc->halt_poll_ns) + vc->runner->stat.halt_poll_fail_ns += + ktime_to_ns(start_wait) - + ktime_to_ns(start_poll); + } else { + /* Attribute successful poll time */ + if (vc->halt_poll_ns) + vc->runner->stat.halt_poll_success_ns += + ktime_to_ns(cur) - + ktime_to_ns(start_poll); + } /* Adjust poll time */ if (halt_poll_max_ns) { -- cgit v1.2.3 From 0eeede0c63305a33de31bd90b53b023c1d452c17 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 2 Sep 2016 17:20:43 +1000 Subject: powerpc/mm: Speed up computation of base and actual page size for a HPTE This replaces a 2-D search through an array with a simple 8-bit table lookup for determining the actual and/or base page size for a HPT entry. The encoding in the second doubleword of the HPTE is designed to encode the actual and base page sizes without using any more bits than would be needed for a 4k page number, by using between 1 and 8 low-order bits of the RPN (real page number) field to encode the page sizes. A single "large page" bit in the first doubleword indicates that these low-order bits are to be interpreted like this. We can determine the page sizes by using the low-order 8 bits of the RPN to look up a 256-entry table. For actual page sizes less than 1MB, some of the upper bits of these 8 bits are going to be real address bits, but we can cope with that by replicating the entries for those smaller page sizes. While we're at it, let's move the hpte_page_size() and hpte_base_page_size() functions from a KVM-specific header to a header for 64-bit HPT systems, since this computation doesn't have anything specifically to do with KVM. Reviewed-by: Aneesh Kumar K.V Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/book3s/64/mmu-hash.h | 37 ++++++++++++ arch/powerpc/include/asm/kvm_book3s_64.h | 87 +++------------------------ arch/powerpc/include/asm/mmu.h | 1 + arch/powerpc/mm/hash_native_64.c | 42 +------------ arch/powerpc/mm/hash_utils_64.c | 55 +++++++++++++++++ 5 files changed, 102 insertions(+), 120 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 287a656ceb57..e407af2b7333 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -244,6 +244,43 @@ static inline int segment_shift(int ssize) return SID_SHIFT_1T; } +/* + * This array is indexed by the LP field of the HPTE second dword. + * Since this field may contain some RPN bits, some entries are + * replicated so that we get the same value irrespective of RPN. + * The top 4 bits are the page size index (MMU_PAGE_*) for the + * actual page size, the bottom 4 bits are the base page size. + */ +extern u8 hpte_page_sizes[1 << LP_BITS]; + +static inline unsigned long __hpte_page_size(unsigned long h, unsigned long l, + bool is_base_size) +{ + unsigned int i, lp; + + if (!(h & HPTE_V_LARGE)) + return 1ul << 12; + + /* Look at the 8 bit LP value */ + lp = (l >> LP_SHIFT) & ((1 << LP_BITS) - 1); + i = hpte_page_sizes[lp]; + if (!i) + return 0; + if (!is_base_size) + i >>= 4; + return 1ul << mmu_psize_defs[i & 0xf].shift; +} + +static inline unsigned long hpte_page_size(unsigned long h, unsigned long l) +{ + return __hpte_page_size(h, l, 0); +} + +static inline unsigned long hpte_base_page_size(unsigned long h, unsigned long l) +{ + return __hpte_page_size(h, l, 1); +} + /* * The current system page and segment sizes */ diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 88d17b4ea9c8..4ffd5a1e788d 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -20,6 +20,8 @@ #ifndef __ASM_KVM_BOOK3S_64_H__ #define __ASM_KVM_BOOK3S_64_H__ +#include + #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE static inline struct kvmppc_book3s_shadow_vcpu *svcpu_get(struct kvm_vcpu *vcpu) { @@ -97,56 +99,20 @@ static inline void __unlock_hpte(__be64 *hpte, unsigned long hpte_v) hpte[0] = cpu_to_be64(hpte_v); } -static inline int __hpte_actual_psize(unsigned int lp, int psize) -{ - int i, shift; - unsigned int mask; - - /* start from 1 ignoring MMU_PAGE_4K */ - for (i = 1; i < MMU_PAGE_COUNT; i++) { - - /* invalid penc */ - if (mmu_psize_defs[psize].penc[i] == -1) - continue; - /* - * encoding bits per actual page size - * PTE LP actual page size - * rrrr rrrz >=8KB - * rrrr rrzz >=16KB - * rrrr rzzz >=32KB - * rrrr zzzz >=64KB - * ....... - */ - shift = mmu_psize_defs[i].shift - LP_SHIFT; - if (shift > LP_BITS) - shift = LP_BITS; - mask = (1 << shift) - 1; - if ((lp & mask) == mmu_psize_defs[psize].penc[i]) - return i; - } - return -1; -} - static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, unsigned long pte_index) { - int b_psize = MMU_PAGE_4K, a_psize = MMU_PAGE_4K; + int i, b_psize = MMU_PAGE_4K, a_psize = MMU_PAGE_4K; unsigned int penc; unsigned long rb = 0, va_low, sllp; unsigned int lp = (r >> LP_SHIFT) & ((1 << LP_BITS) - 1); if (v & HPTE_V_LARGE) { - for (b_psize = 0; b_psize < MMU_PAGE_COUNT; b_psize++) { - - /* valid entries have a shift value */ - if (!mmu_psize_defs[b_psize].shift) - continue; - - a_psize = __hpte_actual_psize(lp, b_psize); - if (a_psize != -1) - break; - } + i = hpte_page_sizes[lp]; + b_psize = i & 0xf; + a_psize = i >> 4; } + /* * Ignore the top 14 bits of va * v have top two bits covering segment size, hence move @@ -215,45 +181,6 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, return rb; } -static inline unsigned long __hpte_page_size(unsigned long h, unsigned long l, - bool is_base_size) -{ - - int size, a_psize; - /* Look at the 8 bit LP value */ - unsigned int lp = (l >> LP_SHIFT) & ((1 << LP_BITS) - 1); - - /* only handle 4k, 64k and 16M pages for now */ - if (!(h & HPTE_V_LARGE)) - return 1ul << 12; - else { - for (size = 0; size < MMU_PAGE_COUNT; size++) { - /* valid entries have a shift value */ - if (!mmu_psize_defs[size].shift) - continue; - - a_psize = __hpte_actual_psize(lp, size); - if (a_psize != -1) { - if (is_base_size) - return 1ul << mmu_psize_defs[size].shift; - return 1ul << mmu_psize_defs[a_psize].shift; - } - } - - } - return 0; -} - -static inline unsigned long hpte_page_size(unsigned long h, unsigned long l) -{ - return __hpte_page_size(h, l, 0); -} - -static inline unsigned long hpte_base_page_size(unsigned long h, unsigned long l) -{ - return __hpte_page_size(h, l, 1); -} - static inline unsigned long hpte_rpn(unsigned long ptel, unsigned long psize) { return ((ptel & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT; diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index e2fb408f8398..b78e8d3377f6 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -271,6 +271,7 @@ static inline bool early_radix_enabled(void) #define MMU_PAGE_16G 13 #define MMU_PAGE_64G 14 +/* N.B. we need to change the type of hpte_page_sizes if this gets to be > 16 */ #define MMU_PAGE_COUNT 15 #ifdef CONFIG_PPC_BOOK3S_64 diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 0e4e9654bd2c..83ddc0e171b0 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -493,36 +493,6 @@ static void native_hugepage_invalidate(unsigned long vsid, } #endif -static inline int __hpte_actual_psize(unsigned int lp, int psize) -{ - int i, shift; - unsigned int mask; - - /* start from 1 ignoring MMU_PAGE_4K */ - for (i = 1; i < MMU_PAGE_COUNT; i++) { - - /* invalid penc */ - if (mmu_psize_defs[psize].penc[i] == -1) - continue; - /* - * encoding bits per actual page size - * PTE LP actual page size - * rrrr rrrz >=8KB - * rrrr rrzz >=16KB - * rrrr rzzz >=32KB - * rrrr zzzz >=64KB - * ....... - */ - shift = mmu_psize_defs[i].shift - LP_SHIFT; - if (shift > LP_BITS) - shift = LP_BITS; - mask = (1 << shift) - 1; - if ((lp & mask) == mmu_psize_defs[psize].penc[i]) - return i; - } - return -1; -} - static void hpte_decode(struct hash_pte *hpte, unsigned long slot, int *psize, int *apsize, int *ssize, unsigned long *vpn) { @@ -538,16 +508,8 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot, size = MMU_PAGE_4K; a_size = MMU_PAGE_4K; } else { - for (size = 0; size < MMU_PAGE_COUNT; size++) { - - /* valid entries have a shift value */ - if (!mmu_psize_defs[size].shift) - continue; - - a_size = __hpte_actual_psize(lp, size); - if (a_size != -1) - break; - } + size = hpte_page_sizes[lp] & 0xf; + a_size = hpte_page_sizes[lp] >> 4; } /* This works for all page sizes, and for 256M and 1T segments */ if (cpu_has_feature(CPU_FTR_ARCH_300)) diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 0821556e16f4..ef3ae891a3db 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -93,6 +93,9 @@ static unsigned long _SDR1; struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; EXPORT_SYMBOL_GPL(mmu_psize_defs); +u8 hpte_page_sizes[1 << LP_BITS]; +EXPORT_SYMBOL_GPL(hpte_page_sizes); + struct hash_pte *htab_address; unsigned long htab_size_bytes; unsigned long htab_hash_mask; @@ -564,8 +567,60 @@ static void __init htab_scan_page_sizes(void) #endif /* CONFIG_HUGETLB_PAGE */ } +/* + * Fill in the hpte_page_sizes[] array. + * We go through the mmu_psize_defs[] array looking for all the + * supported base/actual page size combinations. Each combination + * has a unique pagesize encoding (penc) value in the low bits of + * the LP field of the HPTE. For actual page sizes less than 1MB, + * some of the upper LP bits are used for RPN bits, meaning that + * we need to fill in several entries in hpte_page_sizes[]. + * + * In diagrammatic form, with r = RPN bits and z = page size bits: + * PTE LP actual page size + * rrrr rrrz >=8KB + * rrrr rrzz >=16KB + * rrrr rzzz >=32KB + * rrrr zzzz >=64KB + * ... + * + * The zzzz bits are implementation-specific but are chosen so that + * no encoding for a larger page size uses the same value in its + * low-order N bits as the encoding for the 2^(12+N) byte page size + * (if it exists). + */ +static void init_hpte_page_sizes(void) +{ + long int ap, bp; + long int shift, penc; + + for (bp = 0; bp < MMU_PAGE_COUNT; ++bp) { + if (!mmu_psize_defs[bp].shift) + continue; /* not a supported page size */ + for (ap = bp; ap < MMU_PAGE_COUNT; ++ap) { + penc = mmu_psize_defs[bp].penc[ap]; + if (penc == -1) + continue; + shift = mmu_psize_defs[ap].shift - LP_SHIFT; + if (shift <= 0) + continue; /* should never happen */ + /* + * For page sizes less than 1MB, this loop + * replicates the entry for all possible values + * of the rrrr bits. + */ + while (penc < (1 << LP_BITS)) { + hpte_page_sizes[penc] = (ap << 4) | bp; + penc += 1 << shift; + } + } + } +} + static void __init htab_init_page_sizes(void) { + init_hpte_page_sizes(); + if (!debug_pagealloc_enabled()) { /* * Pick a size for the linear mapping. Currently, we only -- cgit v1.2.3 From 07b1fdf5bd135d94eff2b3a6849b90c358963066 Mon Sep 17 00:00:00 2001 From: Suresh Warrier Date: Fri, 19 Aug 2016 15:35:45 +1000 Subject: powerpc: Add simple cache inhibited MMIO accessors Add simple cache inhibited accessors for memory mapped I/O. Unlike the accessors built from the DEF_MMIO_* macros, these don't include any hardware memory barriers, callers need to manage memory barriers on their own. These can only be called in hypervisor real mode. Signed-off-by: Suresh Warrier [paulus@ozlabs.org - added line to comment] Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/io.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index 2fd1690b79d2..f6fda8482f60 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -241,6 +241,35 @@ static inline void out_be64(volatile u64 __iomem *addr, u64 val) #endif #endif /* __powerpc64__ */ + +/* + * Simple Cache inhibited accessors + * Unlike the DEF_MMIO_* macros, these don't include any h/w memory + * barriers, callers need to manage memory barriers on their own. + * These can only be used in hypervisor real mode. + */ + +static inline u32 _lwzcix(unsigned long addr) +{ + u32 ret; + + __asm__ __volatile__("lwzcix %0,0, %1" + : "=r" (ret) : "r" (addr) : "memory"); + return ret; +} + +static inline void _stbcix(u64 addr, u8 val) +{ + __asm__ __volatile__("stbcix %0,0,%1" + : : "r" (val), "r" (addr) : "memory"); +} + +static inline void _stwcix(u64 addr, u32 val) +{ + __asm__ __volatile__("stwcix %0,0,%1" + : : "r" (val), "r" (addr) : "memory"); +} + /* * Low level IO stream instructions are defined out of line for now */ -- cgit v1.2.3 From 4ee11c1a9f7cc20026bb66ac624533310a605312 Mon Sep 17 00:00:00 2001 From: Suresh Warrier Date: Fri, 19 Aug 2016 15:35:49 +1000 Subject: powerpc/powernv: Provide facilities for EOI, usable from real mode This adds a new function pnv_opal_pci_msi_eoi() which does the part of end-of-interrupt (EOI) handling of an MSI which involves doing an OPAL call. This function can be called in real mode. This doesn't just export pnv_ioda2_msi_eoi() because that does a call to icp_native_eoi(), which does not work in real mode. This also adds a function, is_pnv_opal_msi(), which KVM can call to check whether an interrupt is one for which we should be calling pnv_opal_pci_msi_eoi() when we need to do an EOI. [paulus@ozlabs.org - split out the addition of pnv_opal_pci_msi_eoi() from Suresh's patch "KVM: PPC: Book3S HV: Handle passthrough interrupts in guest"; added is_pnv_opal_msi(); wrote description.] Signed-off-by: Suresh Warrier Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/pnv-pci.h | 3 +++ arch/powerpc/platforms/powernv/pci-ioda.c | 24 ++++++++++++++++++++---- 2 files changed, 23 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h index 0cbd8134ce81..1b46b52d3212 100644 --- a/arch/powerpc/include/asm/pnv-pci.h +++ b/arch/powerpc/include/asm/pnv-pci.h @@ -12,6 +12,7 @@ #include #include +#include #include #include @@ -33,6 +34,8 @@ int pnv_cxl_alloc_hwirqs(struct pci_dev *dev, int num); void pnv_cxl_release_hwirqs(struct pci_dev *dev, int hwirq, int num); int pnv_cxl_get_irq_count(struct pci_dev *dev); struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev); +int64_t pnv_opal_pci_msi_eoi(struct irq_chip *chip, unsigned int hw_irq); +bool is_pnv_opal_msi(struct irq_chip *chip); #ifdef CONFIG_CXL_BASE int pnv_cxl_alloc_hwirq_ranges(struct cxl_irq_ranges *irqs, diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index fd9444f9fb0c..9ce48ae55062 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2710,15 +2710,21 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, } #ifdef CONFIG_PCI_MSI -static void pnv_ioda2_msi_eoi(struct irq_data *d) +int64_t pnv_opal_pci_msi_eoi(struct irq_chip *chip, unsigned int hw_irq) { - unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); - struct irq_chip *chip = irq_data_get_irq_chip(d); struct pnv_phb *phb = container_of(chip, struct pnv_phb, ioda.irq_chip); + + return opal_pci_msi_eoi(phb->opal_id, hw_irq); +} + +static void pnv_ioda2_msi_eoi(struct irq_data *d) +{ int64_t rc; + unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); + struct irq_chip *chip = irq_data_get_irq_chip(d); - rc = opal_pci_msi_eoi(phb->opal_id, hw_irq); + rc = pnv_opal_pci_msi_eoi(chip, hw_irq); WARN_ON_ONCE(rc); icp_native_eoi(d); @@ -2748,6 +2754,16 @@ void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq) irq_set_chip(virq, &phb->ioda.irq_chip); } +/* + * Returns true iff chip is something that we could call + * pnv_opal_pci_msi_eoi for. + */ +bool is_pnv_opal_msi(struct irq_chip *chip) +{ + return chip->irq_eoi == pnv_ioda2_msi_eoi; +} +EXPORT_SYMBOL_GPL(is_pnv_opal_msi); + static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, unsigned int hwirq, unsigned int virq, unsigned int is_64, struct msi_msg *msg) -- cgit v1.2.3 From 3f2577749948803491874c6895fec559f3473eab Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 11 Aug 2016 15:07:43 +0200 Subject: powerpc: move hmi.c to arch/powerpc/kvm/ hmi.c functions are unused unless sibling_subcore_state is nonzero, and that in turn happens only if KVM is in use. So move the code to arch/powerpc/kvm/, putting it under CONFIG_KVM_BOOK3S_HV_POSSIBLE rather than CONFIG_PPC_BOOK3S_64. The sibling_subcore_state is also included in struct paca_struct only if KVM is supported by the kernel. Cc: Daniel Axtens Cc: Michael Ellerman Cc: Mahesh Salgaonkar Cc: Paul Mackerras Cc: linuxppc-dev@lists.ozlabs.org Cc: kvm-ppc@vger.kernel.org Cc: kvm@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/hmi.h | 2 +- arch/powerpc/include/asm/paca.h | 12 +++++---- arch/powerpc/kernel/Makefile | 2 +- arch/powerpc/kernel/hmi.c | 56 ---------------------------------------- arch/powerpc/kvm/Makefile | 1 + arch/powerpc/kvm/book3s_hv_hmi.c | 56 ++++++++++++++++++++++++++++++++++++++++ 6 files changed, 66 insertions(+), 63 deletions(-) delete mode 100644 arch/powerpc/kernel/hmi.c create mode 100644 arch/powerpc/kvm/book3s_hv_hmi.c (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/hmi.h b/arch/powerpc/include/asm/hmi.h index 88b4901ac4ee..85b7a1a21e22 100644 --- a/arch/powerpc/include/asm/hmi.h +++ b/arch/powerpc/include/asm/hmi.h @@ -21,7 +21,7 @@ #ifndef __ASM_PPC64_HMI_H__ #define __ASM_PPC64_HMI_H__ -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE #define CORE_TB_RESYNC_REQ_BIT 63 #define MAX_SUBCORE_PER_CORE 4 diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 148303e7771f..6a6792bb39fb 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -183,11 +183,6 @@ struct paca_struct { */ u16 in_mce; u8 hmi_event_available; /* HMI event is available */ - /* - * Bitmap for sibling subcore status. See kvm/book3s_hv_ras.c for - * more details - */ - struct sibling_subcore_state *sibling_subcore_state; #endif /* Stuff for accurate time accounting */ @@ -202,6 +197,13 @@ struct paca_struct { struct kvmppc_book3s_shadow_vcpu shadow_vcpu; #endif struct kvmppc_host_state kvm_hstate; +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + /* + * Bitmap for sibling subcore status. See kvm/book3s_hv_ras.c for + * more details + */ + struct sibling_subcore_state *sibling_subcore_state; +#endif #endif }; diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index b2027a5cf508..fe4c075bcf50 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -41,7 +41,7 @@ obj-$(CONFIG_VDSO32) += vdso32/ obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o -obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o hmi.o +obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o obj-$(CONFIG_PPC64) += vdso64/ obj-$(CONFIG_ALTIVEC) += vecemu.o diff --git a/arch/powerpc/kernel/hmi.c b/arch/powerpc/kernel/hmi.c deleted file mode 100644 index e3f738eb1cac..000000000000 --- a/arch/powerpc/kernel/hmi.c +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Hypervisor Maintenance Interrupt (HMI) handling. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. - * - * Copyright 2015 IBM Corporation - * Author: Mahesh Salgaonkar - */ - -#undef DEBUG - -#include -#include -#include -#include - -void wait_for_subcore_guest_exit(void) -{ - int i; - - /* - * NULL bitmap pointer indicates that KVM module hasn't - * been loaded yet and hence no guests are running. - * If no KVM is in use, no need to co-ordinate among threads - * as all of them will always be in host and no one is going - * to modify TB other than the opal hmi handler. - * Hence, just return from here. - */ - if (!local_paca->sibling_subcore_state) - return; - - for (i = 0; i < MAX_SUBCORE_PER_CORE; i++) - while (local_paca->sibling_subcore_state->in_guest[i]) - cpu_relax(); -} - -void wait_for_tb_resync(void) -{ - if (!local_paca->sibling_subcore_state) - return; - - while (test_bit(CORE_TB_RESYNC_REQ_BIT, - &local_paca->sibling_subcore_state->flags)) - cpu_relax(); -} diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 1f9e5529e692..855d4b95d752 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -78,6 +78,7 @@ kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \ ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \ + book3s_hv_hmi.o \ book3s_hv_rmhandlers.o \ book3s_hv_rm_mmu.o \ book3s_hv_ras.o \ diff --git a/arch/powerpc/kvm/book3s_hv_hmi.c b/arch/powerpc/kvm/book3s_hv_hmi.c new file mode 100644 index 000000000000..e3f738eb1cac --- /dev/null +++ b/arch/powerpc/kvm/book3s_hv_hmi.c @@ -0,0 +1,56 @@ +/* + * Hypervisor Maintenance Interrupt (HMI) handling. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. + * + * Copyright 2015 IBM Corporation + * Author: Mahesh Salgaonkar + */ + +#undef DEBUG + +#include +#include +#include +#include + +void wait_for_subcore_guest_exit(void) +{ + int i; + + /* + * NULL bitmap pointer indicates that KVM module hasn't + * been loaded yet and hence no guests are running. + * If no KVM is in use, no need to co-ordinate among threads + * as all of them will always be in host and no one is going + * to modify TB other than the opal hmi handler. + * Hence, just return from here. + */ + if (!local_paca->sibling_subcore_state) + return; + + for (i = 0; i < MAX_SUBCORE_PER_CORE; i++) + while (local_paca->sibling_subcore_state->in_guest[i]) + cpu_relax(); +} + +void wait_for_tb_resync(void) +{ + if (!local_paca->sibling_subcore_state) + return; + + while (test_bit(CORE_TB_RESYNC_REQ_BIT, + &local_paca->sibling_subcore_state->flags)) + cpu_relax(); +} -- cgit v1.2.3 From 9576730d0e6e301343c5aead5418ad53fcecfd14 Mon Sep 17 00:00:00 2001 From: Suresh Warrier Date: Fri, 19 Aug 2016 15:35:47 +1000 Subject: KVM: PPC: select IRQ_BYPASS_MANAGER Select IRQ_BYPASS_MANAGER for PPC when CONFIG_KVM is set. Add the PPC producer functions for add and del producer. [paulus@ozlabs.org - Moved new functions from book3s.c to powerpc.c so booke compiles; added kvm_arch_has_irq_bypass implementation.] Signed-off-by: Suresh Warrier Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_ppc.h | 4 ++++ arch/powerpc/kvm/Kconfig | 2 ++ arch/powerpc/kvm/powerpc.c | 38 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 2544edabe7f3..94715e22d6a2 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -287,6 +287,10 @@ struct kvmppc_ops { long (*arch_vm_ioctl)(struct file *filp, unsigned int ioctl, unsigned long arg); int (*hcall_implemented)(unsigned long hcall); + int (*irq_bypass_add_producer)(struct irq_bypass_consumer *, + struct irq_bypass_producer *); + void (*irq_bypass_del_producer)(struct irq_bypass_consumer *, + struct irq_bypass_producer *); }; extern struct kvmppc_ops *kvmppc_hv_ops; diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index c0a2478a8a2c..029be26b5a17 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -23,6 +23,8 @@ config KVM select HAVE_KVM_EVENTFD select SRCU select KVM_VFIO + select IRQ_BYPASS_MANAGER + select HAVE_KVM_IRQ_BYPASS config KVM_BOOK3S_HANDLER bool diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index ab2b3d544937..0b7d66422650 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -27,6 +27,8 @@ #include #include #include +#include +#include #include #include #include @@ -739,6 +741,42 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) #endif } +/* + * irq_bypass_add_producer and irq_bypass_del_producer are only + * useful if the architecture supports PCI passthrough. + * irq_bypass_stop and irq_bypass_start are not needed and so + * kvm_ops are not defined for them. + */ +bool kvm_arch_has_irq_bypass(void) +{ + return ((kvmppc_hv_ops && kvmppc_hv_ops->irq_bypass_add_producer) || + (kvmppc_pr_ops && kvmppc_pr_ops->irq_bypass_add_producer)); +} + +int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons, + struct irq_bypass_producer *prod) +{ + struct kvm_kernel_irqfd *irqfd = + container_of(cons, struct kvm_kernel_irqfd, consumer); + struct kvm *kvm = irqfd->kvm; + + if (kvm->arch.kvm_ops->irq_bypass_add_producer) + return kvm->arch.kvm_ops->irq_bypass_add_producer(cons, prod); + + return 0; +} + +void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, + struct irq_bypass_producer *prod) +{ + struct kvm_kernel_irqfd *irqfd = + container_of(cons, struct kvm_kernel_irqfd, consumer); + struct kvm *kvm = irqfd->kvm; + + if (kvm->arch.kvm_ops->irq_bypass_del_producer) + kvm->arch.kvm_ops->irq_bypass_del_producer(cons, prod); +} + static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, struct kvm_run *run) { -- cgit v1.2.3 From 8daaafc88b46fb3af952e92d7c2816a8950e1363 Mon Sep 17 00:00:00 2001 From: Suresh Warrier Date: Fri, 19 Aug 2016 15:35:48 +1000 Subject: KVM: PPC: Book3S HV: Introduce kvmppc_passthru_irqmap This patch introduces an IRQ mapping structure, the kvmppc_passthru_irqmap structure that is to be used to map the real hardware IRQ in the host with the virtual hardware IRQ (gsi) that is injected into a guest by KVM for passthrough adapters. Currently, we assume a separate IRQ mapping structure for each guest. Each kvmppc_passthru_irqmap has a mapping arrays, containing all defined real<->virtual IRQs. [paulus@ozlabs.org - removed irq_chip field from struct kvmppc_passthru_irqmap; changed parameter for kvmppc_get_passthru_irqmap from struct kvm_vcpu * to struct kvm *, removed small cached array.] Signed-off-by: Suresh Warrier Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_host.h | 17 +++++++++++++++++ arch/powerpc/include/asm/kvm_ppc.h | 14 ++++++++++++++ arch/powerpc/kvm/book3s_hv.c | 13 +++++++++++++ 3 files changed, 44 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 373003f4551d..89ac1f6c2cb2 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -203,6 +203,8 @@ struct kvmppc_spapr_tce_table { struct kvmppc_xics; struct kvmppc_icp; +struct kvmppc_passthru_irqmap; + /* * The reverse mapping array has one entry for each HPTE, * which stores the guest's view of the second word of the HPTE @@ -273,6 +275,7 @@ struct kvm_arch { #endif #ifdef CONFIG_KVM_XICS struct kvmppc_xics *xics; + struct kvmppc_passthru_irqmap *pimap; #endif struct kvmppc_ops *kvm_ops; #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE @@ -369,6 +372,20 @@ struct kvmhv_tb_accumulator { u64 tb_max; /* max time */ }; +#ifdef CONFIG_PPC_BOOK3S_64 +struct kvmppc_irq_map { + u32 r_hwirq; + u32 v_hwirq; + struct irq_desc *desc; +}; + +#define KVMPPC_PIRQ_MAPPED 1024 +struct kvmppc_passthru_irqmap { + int n_mapped; + struct kvmppc_irq_map mapped[KVMPPC_PIRQ_MAPPED]; +}; +#endif + # ifdef CONFIG_PPC_FSL_BOOK3E #define KVMPPC_BOOKE_IAC_NUM 2 #define KVMPPC_BOOKE_DAC_NUM 2 diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 94715e22d6a2..4ca2ba36a860 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -457,8 +457,18 @@ static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu) { return vcpu->arch.irq_type == KVMPPC_IRQ_XICS; } + +static inline struct kvmppc_passthru_irqmap *kvmppc_get_passthru_irqmap( + struct kvm *kvm) +{ + if (kvm) + return kvm->arch.pimap; + return NULL; +} + extern void kvmppc_alloc_host_rm_ops(void); extern void kvmppc_free_host_rm_ops(void); +extern void kvmppc_free_pimap(struct kvm *kvm); extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu); extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server); extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args); @@ -470,8 +480,12 @@ extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev, extern void kvmppc_xics_ipi_action(void); extern int h_ipi_redirect; #else +static inline struct kvmppc_passthru_irqmap *kvmppc_get_passthru_irqmap( + struct kvm *kvm) + { return NULL; } static inline void kvmppc_alloc_host_rm_ops(void) {}; static inline void kvmppc_free_host_rm_ops(void) {}; +static inline void kvmppc_free_pimap(struct kvm *kvm) {}; static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu) { return 0; } static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 30ff8ab5aba1..d5c7061b9045 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -3412,6 +3412,19 @@ static int kvmppc_core_check_processor_compat_hv(void) return 0; } +#ifdef CONFIG_KVM_XICS + +void kvmppc_free_pimap(struct kvm *kvm) +{ + kfree(kvm->arch.pimap); +} + +struct kvmppc_passthru_irqmap *kvmppc_alloc_pimap(void) +{ + return kzalloc(sizeof(struct kvmppc_passthru_irqmap), GFP_KERNEL); +} +#endif + static long kvm_arch_vm_ioctl_hv(struct file *filp, unsigned int ioctl, unsigned long arg) { -- cgit v1.2.3 From e3c13e56a4717ee334837a20c596e527eb6355e1 Mon Sep 17 00:00:00 2001 From: Suresh Warrier Date: Fri, 19 Aug 2016 15:35:51 +1000 Subject: KVM: PPC: Book3S HV: Handle passthrough interrupts in guest Currently, KVM switches back to the host to handle any external interrupt (when the interrupt is received while running in the guest). This patch updates real-mode KVM to check if an interrupt is generated by a passthrough adapter that is owned by this guest. If so, the real mode KVM will directly inject the corresponding virtual interrupt to the guest VCPU's ICS and also EOI the interrupt in hardware. In short, the interrupt is handled entirely in real mode in the guest context without switching back to the host. In some rare cases, the interrupt cannot be completely handled in real mode, for instance, a VCPU that is sleeping needs to be woken up. In this case, KVM simply switches back to the host with trap reason set to 0x500. This works, but it is clearly not very efficient. A following patch will distinguish this case and handle it correctly in the host. Note that we can use the existing check_too_hard() routine even though we are not in a hypercall to determine if there is unfinished business that needs to be completed in host virtual mode. The patch assumes that the mapping between hardware interrupt IRQ and virtual IRQ to be injected to the guest already exists for the PCI passthrough interrupts that need to be handled in real mode. If the mapping does not exist, KVM falls back to the default existing behavior. The KVM real mode code reads mappings from the mapped array in the passthrough IRQ map without taking any lock. We carefully order the loads and stores of the fields in the kvmppc_irq_map data structure using memory barriers to avoid an inconsistent mapping being seen by the reader. Thus, although it is possible to miss a map entry, it is not possible to read a stale value. [paulus@ozlabs.org - get irq_chip from irq_map rather than pimap, pulled out powernv eoi change into a separate patch, made kvmppc_read_intr get the vcpu from the paca rather than being passed in, rewrote the logic at the end of kvmppc_read_intr to avoid deep indentation, simplified logic in book3s_hv_rmhandlers.S since we were always restoring SRR0/1 anyway, get rid of the cached array (just use the mapped array), removed the kick_all_cpus_sync() call, clear saved_xirr PACA field when we handle the interrupt in real mode, fix compilation with CONFIG_KVM_XICS=n.] Signed-off-by: Suresh Warrier Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_ppc.h | 3 ++ arch/powerpc/kvm/book3s_hv.c | 8 +++- arch/powerpc/kvm/book3s_hv_builtin.c | 73 ++++++++++++++++++++++++++++++++- arch/powerpc/kvm/book3s_hv_rm_xics.c | 44 ++++++++++++++++++++ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 6 +++ 5 files changed, 132 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 4ca2ba36a860..4299a1f79a91 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -478,6 +478,9 @@ extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval); extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu, u32 cpu); extern void kvmppc_xics_ipi_action(void); +extern long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, u32 xirr, + struct kvmppc_irq_map *irq_map, + struct kvmppc_passthru_irqmap *pimap); extern int h_ipi_redirect; #else static inline struct kvmppc_passthru_irqmap *kvmppc_get_passthru_irqmap( diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index a7fe1783d245..a393c2b6b1b6 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -3490,9 +3490,15 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi) irq_map = &pimap->mapped[i]; irq_map->v_hwirq = guest_gsi; - irq_map->r_hwirq = desc->irq_data.hwirq; irq_map->desc = desc; + /* + * Order the above two stores before the next to serialize with + * the KVM real mode handler. + */ + smp_wmb(); + irq_map->r_hwirq = desc->irq_data.hwirq; + if (i == pimap->n_mapped) pimap->n_mapped++; diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index b476a6af893f..7531e29f90bd 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -288,12 +288,83 @@ void kvmhv_commence_exit(int trap) struct kvmppc_host_rm_ops *kvmppc_host_rm_ops_hv; EXPORT_SYMBOL_GPL(kvmppc_host_rm_ops_hv); +#ifdef CONFIG_KVM_XICS +static struct kvmppc_irq_map *get_irqmap(struct kvmppc_passthru_irqmap *pimap, + u32 xisr) +{ + int i; + + /* + * We access the mapped array here without a lock. That + * is safe because we never reduce the number of entries + * in the array and we never change the v_hwirq field of + * an entry once it is set. + * + * We have also carefully ordered the stores in the writer + * and the loads here in the reader, so that if we find a matching + * hwirq here, the associated GSI and irq_desc fields are valid. + */ + for (i = 0; i < pimap->n_mapped; i++) { + if (xisr == pimap->mapped[i].r_hwirq) { + /* + * Order subsequent reads in the caller to serialize + * with the writer. + */ + smp_rmb(); + return &pimap->mapped[i]; + } + } + return NULL; +} + +/* + * If we have an interrupt that's not an IPI, check if we have a + * passthrough adapter and if so, check if this external interrupt + * is for the adapter. + * We will attempt to deliver the IRQ directly to the target VCPU's + * ICP, the virtual ICP (based on affinity - the xive value in ICS). + * + * If the delivery fails or if this is not for a passthrough adapter, + * return to the host to handle this interrupt. We earlier + * saved a copy of the XIRR in the PACA, it will be picked up by + * the host ICP driver. + */ +static int kvmppc_check_passthru(u32 xisr, __be32 xirr) +{ + struct kvmppc_passthru_irqmap *pimap; + struct kvmppc_irq_map *irq_map; + struct kvm_vcpu *vcpu; + + vcpu = local_paca->kvm_hstate.kvm_vcpu; + if (!vcpu) + return 1; + pimap = kvmppc_get_passthru_irqmap(vcpu->kvm); + if (!pimap) + return 1; + irq_map = get_irqmap(pimap, xisr); + if (!irq_map) + return 1; + + /* We're handling this interrupt, generic code doesn't need to */ + local_paca->kvm_hstate.saved_xirr = 0; + + return kvmppc_deliver_irq_passthru(vcpu, xirr, irq_map, pimap); +} + +#else +static inline int kvmppc_check_passthru(u32 xisr, __be32 xirr) +{ + return 1; +} +#endif + /* * Determine what sort of external interrupt is pending (if any). * Returns: * 0 if no interrupt is pending * 1 if an interrupt is pending that needs to be handled by the host * -1 if there was a guest wakeup IPI (which has now been cleared) + * -2 if there is PCI passthrough external interrupt that was handled */ long kvmppc_read_intr(void) @@ -368,5 +439,5 @@ long kvmppc_read_intr(void) return -1; } - return 1; + return kvmppc_check_passthru(xisr, xirr); } diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index 980d8a6f7284..17f5b851db8c 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "book3s_xics.h" @@ -712,6 +713,49 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) return check_too_hard(xics, icp); } +unsigned long eoi_rc; + +static void icp_eoi(struct irq_chip *c, u32 hwirq, u32 xirr) +{ + unsigned long xics_phys; + int64_t rc; + + rc = pnv_opal_pci_msi_eoi(c, hwirq); + + if (rc) + eoi_rc = rc; + + iosync(); + + /* EOI it */ + xics_phys = local_paca->kvm_hstate.xics_phys; + _stwcix(xics_phys + XICS_XIRR, xirr); +} + +long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, + u32 xirr, + struct kvmppc_irq_map *irq_map, + struct kvmppc_passthru_irqmap *pimap) +{ + struct kvmppc_xics *xics; + struct kvmppc_icp *icp; + u32 irq; + + irq = irq_map->v_hwirq; + xics = vcpu->kvm->arch.xics; + icp = vcpu->arch.icp; + + icp_rm_deliver_irq(xics, icp, irq); + + /* EOI the interrupt */ + icp_eoi(irq_desc_get_chip(irq_map->desc), irq_map->r_hwirq, xirr); + + if (check_too_hard(xics, icp) == H_TOO_HARD) + return 1; + else + return -2; +} + /* --- Non-real mode XICS-related built-in routines --- */ /** diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index dccfa85d35df..12fb2afb23fc 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1198,6 +1198,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) * -1 A guest wakeup IPI (which has now been cleared) * In either case, we return to guest to deliver any pending * guest interrupts. + * + * -2 A PCI passthrough external interrupt was handled + * (interrupt was delivered directly to guest) + * Return to guest to deliver any pending guest interrupts. */ cmpdi r3, 0 @@ -2352,6 +2356,8 @@ machine_check_realmode: * 0 if nothing needs to be done * 1 if something happened that needs to be handled by the host * -1 if there was a guest wakeup (IPI or msgsnd) + * -2 if we handled a PCI passthrough interrupt (returned by + * kvmppc_read_intr only) * * Also sets r12 to the interrupt vector for any interrupt that needs * to be handled now by the host (0x500 for external interrupt), or zero. -- cgit v1.2.3 From f7af5209b87c592aad81da65bd104241aa43d36a Mon Sep 17 00:00:00 2001 From: Suresh Warrier Date: Fri, 19 Aug 2016 15:35:52 +1000 Subject: KVM: PPC: Book3S HV: Complete passthrough interrupt in host In existing real mode ICP code, when updating the virtual ICP state, if there is a required action that cannot be completely handled in real mode, as for instance, a VCPU needs to be woken up, flags are set in the ICP to indicate the required action. This is checked when returning from hypercalls to decide whether the call needs switch back to the host where the action can be performed in virtual mode. Note that if h_ipi_redirect is enabled, real mode code will first try to message a free host CPU to complete this job instead of returning the host to do it ourselves. Currently, the real mode PCI passthrough interrupt handling code checks if any of these flags are set and simply returns to the host. This is not good enough as the trap value (0x500) is treated as an external interrupt by the host code. It is only when the trap value is a hypercall that the host code searches for and acts on unfinished work by calling kvmppc_xics_rm_complete. This patch introduces a special trap BOOK3S_INTERRUPT_HV_RM_HARD which is returned by KVM if there is unfinished business to be completed in host virtual mode after handling a PCI passthrough interrupt. The host checks for this special interrupt condition and calls into the kvmppc_xics_rm_complete, which is made an exported function for this reason. [paulus@ozlabs.org - moved logic to set r12 to BOOK3S_INTERRUPT_HV_RM_HARD in book3s_hv_rmhandlers.S into the end of kvmppc_check_wake_reason.] Signed-off-by: Suresh Warrier Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_asm.h | 10 ++++++++++ arch/powerpc/include/asm/kvm_ppc.h | 3 +++ arch/powerpc/kvm/book3s_hv.c | 8 +++++++- arch/powerpc/kvm/book3s_hv_builtin.c | 1 + arch/powerpc/kvm/book3s_hv_rm_xics.c | 2 +- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 25 +++++++++++++++++++++++++ arch/powerpc/kvm/book3s_xics.c | 3 ++- 7 files changed, 49 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index 5bca220bbb60..05cabed3d1bd 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h @@ -105,6 +105,15 @@ #define BOOK3S_INTERRUPT_FAC_UNAVAIL 0xf60 #define BOOK3S_INTERRUPT_H_FAC_UNAVAIL 0xf80 +/* book3s_hv */ + +/* + * Special trap used to indicate to host that this is a + * passthrough interrupt that could not be handled + * completely in the guest. + */ +#define BOOK3S_INTERRUPT_HV_RM_HARD 0x5555 + #define BOOK3S_IRQPRIO_SYSTEM_RESET 0 #define BOOK3S_IRQPRIO_DATA_SEGMENT 1 #define BOOK3S_IRQPRIO_INST_SEGMENT 2 @@ -136,6 +145,7 @@ #define RESUME_FLAG_NV (1<<0) /* Reload guest nonvolatile state? */ #define RESUME_FLAG_HOST (1<<1) /* Resume host? */ #define RESUME_FLAG_ARCH1 (1<<2) +#define RESUME_FLAG_ARCH2 (1<<3) #define RESUME_GUEST 0 #define RESUME_GUEST_NV RESUME_FLAG_NV diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 4299a1f79a91..e0ada3138649 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -469,6 +469,7 @@ static inline struct kvmppc_passthru_irqmap *kvmppc_get_passthru_irqmap( extern void kvmppc_alloc_host_rm_ops(void); extern void kvmppc_free_host_rm_ops(void); extern void kvmppc_free_pimap(struct kvm *kvm); +extern int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall); extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu); extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server); extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args); @@ -489,6 +490,8 @@ static inline struct kvmppc_passthru_irqmap *kvmppc_get_passthru_irqmap( static inline void kvmppc_alloc_host_rm_ops(void) {}; static inline void kvmppc_free_host_rm_ops(void) {}; static inline void kvmppc_free_pimap(struct kvm *kvm) {}; +static inline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall) + { return 0; } static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu) { return 0; } static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index a393c2b6b1b6..90beb96a0df6 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -74,6 +74,8 @@ /* Used to indicate that a guest page fault needs to be handled */ #define RESUME_PAGE_FAULT (RESUME_GUEST | RESUME_FLAG_ARCH1) +/* Used to indicate that a guest passthrough interrupt needs to be handled */ +#define RESUME_PASSTHROUGH (RESUME_GUEST | RESUME_FLAG_ARCH2) /* Used as a "null" value for timebase values */ #define TB_NIL (~(u64)0) @@ -1032,6 +1034,9 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, kvmppc_core_queue_program(vcpu, SRR1_PROGILL); r = RESUME_GUEST; break; + case BOOK3S_INTERRUPT_HV_RM_HARD: + r = RESUME_PASSTHROUGH; + break; default: kvmppc_dump_regs(vcpu); printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n", @@ -2951,7 +2956,8 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) r = kvmppc_book3s_hv_page_fault(run, vcpu, vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); - } + } else if (r == RESUME_PASSTHROUGH) + r = kvmppc_xics_rm_complete(vcpu, 0); } while (is_kvmppc_resume_guest(r)); out: diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 7531e29f90bd..0c84d6bc8356 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -363,6 +363,7 @@ static inline int kvmppc_check_passthru(u32 xisr, __be32 xirr) * Returns: * 0 if no interrupt is pending * 1 if an interrupt is pending that needs to be handled by the host + * 2 Passthrough that needs completion in the host * -1 if there was a guest wakeup IPI (which has now been cleared) * -2 if there is PCI passthrough external interrupt that was handled */ diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index 17f5b851db8c..3b8d7ac0be7c 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c @@ -751,7 +751,7 @@ long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, icp_eoi(irq_desc_get_chip(irq_map->desc), irq_map->r_hwirq, xirr); if (check_too_hard(xics, icp) == H_TOO_HARD) - return 1; + return 2; else return -2; } diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 12fb2afb23fc..7cc924b5eea2 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1189,6 +1189,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) * 1 An interrupt is pending that needs to be handled by the host * Exit guest and return to host by branching to guest_exit_cont * + * 2 Passthrough that needs completion in the host + * Exit guest and return to host by branching to guest_exit_cont + * However, we also set r12 to BOOK3S_INTERRUPT_HV_RM_HARD + * to indicate to the host to complete handling the interrupt + * * Before returning to guest, we check if any CPU is heading out * to the host and if so, we head out also. If no CPUs are heading * check return values <= 0. @@ -1204,6 +1209,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) * Return to guest to deliver any pending guest interrupts. */ + cmpdi r3, 1 + ble 1f + + /* Return code = 2 */ + li r12, BOOK3S_INTERRUPT_HV_RM_HARD + stw r12, VCPU_TRAP(r9) + b guest_exit_cont + +1: /* Return code <= 1 */ cmpdi r3, 0 bgt guest_exit_cont @@ -2419,6 +2433,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) bl kvmppc_read_intr nop li r12, BOOK3S_INTERRUPT_EXTERNAL + cmpdi r3, 1 + ble 1f + + /* + * Return code of 2 means PCI passthrough interrupt, but + * we need to return back to host to complete handling the + * interrupt. Trap reason is expected in r12 by guest + * exit code. + */ + li r12, BOOK3S_INTERRUPT_HV_RM_HARD +1: ld r0, PPC_MIN_STKFRM+PPC_LR_STKOFF(r1) addi r1, r1, PPC_MIN_STKFRM mtlr r0 diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index 686147efbac5..4edbe7b93eb5 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -812,7 +812,7 @@ static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) return H_SUCCESS; } -static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall) +int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall) { struct kvmppc_xics *xics = vcpu->kvm->arch.xics; struct kvmppc_icp *icp = vcpu->arch.icp; @@ -841,6 +841,7 @@ static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall) return H_SUCCESS; } +EXPORT_SYMBOL_GPL(kvmppc_xics_rm_complete); int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req) { -- cgit v1.2.3 From 644abbb254b1ab171f777431b23e6fb5879599d0 Mon Sep 17 00:00:00 2001 From: Suresh Warrier Date: Fri, 19 Aug 2016 15:35:54 +1000 Subject: KVM: PPC: Book3S HV: Tunable to disable KVM IRQ bypass Add a module parameter kvm_irq_bypass for kvm_hv.ko to disable IRQ bypass for passthrough interrupts. The default value of this tunable is 1 - that is enable the feature. Since the tunable is used by built-in kernel code, we use the module_param_cb macro to achieve this. Signed-off-by: Suresh Warrier Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_book3s.h | 1 + arch/powerpc/include/asm/kvm_ppc.h | 2 +- arch/powerpc/kvm/book3s_hv.c | 10 ++++++++++ arch/powerpc/kvm/book3s_hv_rm_xics.c | 2 ++ 4 files changed, 14 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index c261f52f6a55..cef2b892245c 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -227,6 +227,7 @@ extern void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu, struct kvm_vcpu *vcpu); extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu, struct kvmppc_book3s_shadow_vcpu *svcpu); +extern int kvm_irq_bypass; static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu) { diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index e0ada3138649..97b9bad9ec49 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -461,7 +461,7 @@ static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu) static inline struct kvmppc_passthru_irqmap *kvmppc_get_passthru_irqmap( struct kvm *kvm) { - if (kvm) + if (kvm && kvm_irq_bypass) return kvm->arch.pimap; return NULL; } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 90beb96a0df6..03721ed63131 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -95,6 +95,10 @@ static struct kernel_param_ops module_param_ops = { .get = param_get_int, }; +module_param_cb(kvm_irq_bypass, &module_param_ops, &kvm_irq_bypass, + S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(kvm_irq_bypass, "Bypass passthrough interrupt optimization"); + module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core"); @@ -3443,6 +3447,9 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi) struct irq_chip *chip; int i; + if (!kvm_irq_bypass) + return 1; + desc = irq_to_desc(host_irq); if (!desc) return -EIO; @@ -3519,6 +3526,9 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi) struct kvmppc_passthru_irqmap *pimap; int i; + if (!kvm_irq_bypass) + return 0; + desc = irq_to_desc(host_irq); if (!desc) return -EIO; diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index 3b8d7ac0be7c..00b9dfde9ac4 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c @@ -27,6 +27,8 @@ int h_ipi_redirect = 1; EXPORT_SYMBOL(h_ipi_redirect); +int kvm_irq_bypass = 1; +EXPORT_SYMBOL(kvm_irq_bypass); static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, u32 new_irq); -- cgit v1.2.3 From 5d375199ea963fa2a972eae9c7d83db36ed37082 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 19 Aug 2016 15:35:56 +1000 Subject: KVM: PPC: Book3S HV: Set server for passed-through interrupts When a guest has a PCI pass-through device with an interrupt, it will direct the interrupt to a particular guest VCPU. In fact the physical interrupt might arrive on any CPU, and then get delivered to the target VCPU in the emulated XICS (guest interrupt controller), and eventually delivered to the target VCPU. Now that we have code to handle device interrupts in real mode without exiting to the host kernel, there is an advantage to having the device interrupt arrive on the same sub(core) as the target VCPU is running on. In this situation, the interrupt can be delivered to the target VCPU without any exit to the host kernel (using a hypervisor doorbell interrupt between threads if necessary). This patch aims to get passed-through device interrupts arriving on the correct core by setting the interrupt server in the real hardware XICS for the interrupt to the first thread in the (sub)core where its target VCPU is running. We do this in the real-mode H_EOI code because the H_EOI handler already needs to look at the emulated ICS state for the interrupt (whereas the H_XIRR handler doesn't), and we know we are running in the target VCPU context at that point. We set the server CPU in hardware using an OPAL call, regardless of what the IRQ affinity mask for the interrupt says, and without updating the affinity mask. This amounts to saying that when an interrupt is passed through to a guest, as a matter of policy we allow the guest's affinity for the interrupt to override the host's. This is inspired by an earlier patch from Suresh Warrier, although none of this code came from that earlier patch. Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_ppc.h | 4 +++ arch/powerpc/include/asm/opal.h | 1 + arch/powerpc/kvm/book3s_hv.c | 4 +++ arch/powerpc/kvm/book3s_hv_rm_xics.c | 16 ++++++++++++ arch/powerpc/kvm/book3s_xics.c | 35 ++++++++++++++++++++++++++ arch/powerpc/kvm/book3s_xics.h | 2 ++ arch/powerpc/platforms/powernv/opal-wrappers.S | 1 + 7 files changed, 63 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 97b9bad9ec49..f6e49640dbe1 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -479,6 +479,10 @@ extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval); extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu, u32 cpu); extern void kvmppc_xics_ipi_action(void); +extern void kvmppc_xics_set_mapped(struct kvm *kvm, unsigned long guest_irq, + unsigned long host_irq); +extern void kvmppc_xics_clr_mapped(struct kvm *kvm, unsigned long guest_irq, + unsigned long host_irq); extern long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, u32 xirr, struct kvmppc_irq_map *irq_map, struct kvmppc_passthru_irqmap *pimap); diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index ee05bd203630..e958b7096f19 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -67,6 +67,7 @@ int64_t opal_pci_config_write_half_word(uint64_t phb_id, uint64_t bus_dev_func, int64_t opal_pci_config_write_word(uint64_t phb_id, uint64_t bus_dev_func, uint64_t offset, uint32_t data); int64_t opal_set_xive(uint32_t isn, uint16_t server, uint8_t priority); +int64_t opal_rm_set_xive(uint32_t isn, uint16_t server, uint8_t priority); int64_t opal_get_xive(uint32_t isn, __be16 *server, uint8_t *priority); int64_t opal_register_exception_handler(uint64_t opal_exception, uint64_t handler_address, diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 03721ed63131..9b3bba643b43 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -3515,6 +3515,8 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi) if (i == pimap->n_mapped) pimap->n_mapped++; + kvmppc_xics_set_mapped(kvm, guest_gsi, desc->irq_data.hwirq); + mutex_unlock(&kvm->lock); return 0; @@ -3551,6 +3553,8 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi) return -ENODEV; } + kvmppc_xics_clr_mapped(kvm, guest_gsi, pimap->mapped[i].r_hwirq); + /* invalidate the entry */ pimap->mapped[i].r_hwirq = 0; diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index 554cdfa9aeb3..5f7527ec4ad5 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "book3s_xics.h" @@ -34,6 +35,7 @@ EXPORT_SYMBOL(kvm_irq_bypass); static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, u32 new_irq); +static int xics_opal_rm_set_server(unsigned int hw_irq, int server_cpu); /* -- ICS routines -- */ static void ics_rm_check_resend(struct kvmppc_xics *xics, @@ -713,6 +715,13 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) icp->rm_action |= XICS_RM_NOTIFY_EOI; icp->rm_eoied_irq = irq; } + + if (state->host_irq && state->intr_cpu != -1) { + int pcpu = cpu_first_thread_sibling(raw_smp_processor_id()); + if (state->intr_cpu != pcpu) + xics_opal_rm_set_server(state->host_irq, pcpu); + state->intr_cpu = -1; + } bail: return check_too_hard(xics, icp); } @@ -736,6 +745,13 @@ static void icp_eoi(struct irq_chip *c, u32 hwirq, u32 xirr) _stwcix(xics_phys + XICS_XIRR, xirr); } +static int xics_opal_rm_set_server(unsigned int hw_irq, int server_cpu) +{ + unsigned int mangle_cpu = get_hard_smp_processor_id(server_cpu) << 2; + + return opal_rm_set_xive(hw_irq, mangle_cpu, DEFAULT_PRIORITY); +} + /* * Increment a per-CPU 32-bit unsigned integer variable. * Safe to call in real-mode. Handles vmalloc'ed addresses diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index be5179c97a8f..3bdc639157c1 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -99,6 +99,10 @@ static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level) return 0; } + /* Record which CPU this arrived on for passed-through interrupts */ + if (state->host_irq) + state->intr_cpu = raw_smp_processor_id(); + /* Attempt delivery */ icp_deliver_irq(xics, NULL, irq); @@ -1438,3 +1442,34 @@ int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin) { return pin; } + +void kvmppc_xics_set_mapped(struct kvm *kvm, unsigned long irq, + unsigned long host_irq) +{ + struct kvmppc_xics *xics = kvm->arch.xics; + struct kvmppc_ics *ics; + u16 idx; + + ics = kvmppc_xics_find_ics(xics, irq, &idx); + if (!ics) + return; + + ics->irq_state[idx].host_irq = host_irq; + ics->irq_state[idx].intr_cpu = -1; +} +EXPORT_SYMBOL_GPL(kvmppc_xics_set_mapped); + +void kvmppc_xics_clr_mapped(struct kvm *kvm, unsigned long irq, + unsigned long host_irq) +{ + struct kvmppc_xics *xics = kvm->arch.xics; + struct kvmppc_ics *ics; + u16 idx; + + ics = kvmppc_xics_find_ics(xics, irq, &idx); + if (!ics) + return; + + ics->irq_state[idx].host_irq = 0; +} +EXPORT_SYMBOL_GPL(kvmppc_xics_clr_mapped); diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h index a46b954055c4..2a50320b55ca 100644 --- a/arch/powerpc/kvm/book3s_xics.h +++ b/arch/powerpc/kvm/book3s_xics.h @@ -42,6 +42,8 @@ struct ics_irq_state { u8 lsi; /* level-sensitive interrupt */ u8 asserted; /* Only for LSI */ u8 exists; + int intr_cpu; + u32 host_irq; }; /* Atomic ICP state, updated with a single compare & swap */ diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 3d29d40eb0e9..44d2d842cee7 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -208,6 +208,7 @@ OPAL_CALL(opal_pci_config_write_byte, OPAL_PCI_CONFIG_WRITE_BYTE); OPAL_CALL(opal_pci_config_write_half_word, OPAL_PCI_CONFIG_WRITE_HALF_WORD); OPAL_CALL(opal_pci_config_write_word, OPAL_PCI_CONFIG_WRITE_WORD); OPAL_CALL(opal_set_xive, OPAL_SET_XIVE); +OPAL_CALL_REAL(opal_rm_set_xive, OPAL_SET_XIVE); OPAL_CALL(opal_get_xive, OPAL_GET_XIVE); OPAL_CALL(opal_register_exception_handler, OPAL_REGISTER_OPAL_EXCEPTION_HANDLER); OPAL_CALL(opal_pci_eeh_freeze_status, OPAL_PCI_EEH_FREEZE_STATUS); -- cgit v1.2.3 From 65e7026a6c90484fbaa076d2c51e61baf7241960 Mon Sep 17 00:00:00 2001 From: Suresh Warrier Date: Fri, 19 Aug 2016 15:35:57 +1000 Subject: KVM: PPC: Book3S HV: Counters for passthrough IRQ stats Add VCPU stat counters to track affinity for passthrough interrupts. pthru_all: Counts all passthrough interrupts whose IRQ mappings are in the kvmppc_passthru_irq_map structure. pthru_host: Counts all cached passthrough interrupts that were injected from the host through kvm_set_irq (i.e. not handled in real mode). pthru_bad_aff: Counts how many cached passthrough interrupts have bad affinity (receiving CPU is not running VCPU that is the target of the virtual interrupt in the guest). Signed-off-by: Suresh Warrier Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_host.h | 3 +++ arch/powerpc/kvm/book3s.c | 3 +++ arch/powerpc/kvm/book3s_hv_rm_xics.c | 18 +++++++++++++----- 3 files changed, 19 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 89ac1f6c2cb2..ed30d2ea21b7 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -137,6 +137,9 @@ struct kvm_vcpu_stat { u64 ld_slow; u64 st_slow; #endif + u64 pthru_all; + u64 pthru_host; + u64 pthru_bad_aff; }; enum kvm_exit_types { diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 71eb8f3d3b54..ba231a1d43d4 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -68,6 +68,9 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "ld_slow", VCPU_STAT(ld_slow) }, { "st", VCPU_STAT(st) }, { "st_slow", VCPU_STAT(st_slow) }, + { "pthru_all", VCPU_STAT(pthru_all) }, + { "pthru_host", VCPU_STAT(pthru_host) }, + { "pthru_bad_aff", VCPU_STAT(pthru_bad_aff) }, { NULL } }; diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index 5f7527ec4ad5..82ff5de8b1e7 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c @@ -716,11 +716,19 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) icp->rm_eoied_irq = irq; } - if (state->host_irq && state->intr_cpu != -1) { - int pcpu = cpu_first_thread_sibling(raw_smp_processor_id()); - if (state->intr_cpu != pcpu) - xics_opal_rm_set_server(state->host_irq, pcpu); - state->intr_cpu = -1; + if (state->host_irq) { + ++vcpu->stat.pthru_all; + if (state->intr_cpu != -1) { + int pcpu = raw_smp_processor_id(); + + pcpu = cpu_first_thread_sibling(pcpu); + ++vcpu->stat.pthru_host; + if (state->intr_cpu != pcpu) { + ++vcpu->stat.pthru_bad_aff; + xics_opal_rm_set_server(state->host_irq, pcpu); + } + state->intr_cpu = -1; + } } bail: return check_too_hard(xics, icp); -- cgit v1.2.3 From 88b02cf97bb7e742db3e31671d54177e3e19fd89 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 15 Sep 2016 13:42:52 +1000 Subject: KVM: PPC: Book3S: Treat VTB as a per-subcore register, not per-thread POWER8 has one virtual timebase (VTB) register per subcore, not one per CPU thread. The HV KVM code currently treats VTB as a per-thread register, which can lead to spurious soft lockup messages from guests which use the VTB as the time source for the soft lockup detector. (CPUs before POWER8 did not have the VTB register.) For HV KVM, this fixes the problem by making only the primary thread in each virtual core save and restore the VTB value. With this, the VTB state becomes part of the kvmppc_vcore structure. This also means that "piggybacking" of multiple virtual cores onto one subcore is not possible on POWER8, because then the virtual cores would share a single VTB register. PR KVM emulates a VTB register, which is per-vcpu because PR KVM has no notion of CPU threads or SMT. For PR KVM we move the VTB state into the kvmppc_vcpu_book3s struct. Cc: stable@vger.kernel.org # v3.14+ Reported-by: Thomas Huth Tested-by: Thomas Huth Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_book3s.h | 2 ++ arch/powerpc/include/asm/kvm_host.h | 1 - arch/powerpc/kernel/asm-offsets.c | 2 +- arch/powerpc/kvm/book3s.c | 6 ------ arch/powerpc/kvm/book3s_emulate.c | 2 +- arch/powerpc/kvm/book3s_hv.c | 14 +++++++++++--- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 14 +++++++------- arch/powerpc/kvm/book3s_pr.c | 8 +++++++- 8 files changed, 29 insertions(+), 20 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index cef2b892245c..5cf306ae0ac3 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -101,6 +101,7 @@ struct kvmppc_vcore { u32 arch_compat; ulong pcr; ulong dpdes; /* doorbell state (POWER8) */ + ulong vtb; /* virtual timebase */ ulong conferring_threads; unsigned int halt_poll_ns; }; @@ -119,6 +120,7 @@ struct kvmppc_vcpu_book3s { u64 sdr1; u64 hior; u64 msr_mask; + u64 vtb; #ifdef CONFIG_PPC_BOOK3S_32 u32 vsid_pool[VSID_POOL_SIZE]; u32 vsid_next; diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index ed30d2ea21b7..28350a294b1e 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -475,7 +475,6 @@ struct kvm_vcpu_arch { ulong purr; ulong spurr; ulong ic; - ulong vtb; ulong dscr; ulong amr; ulong uamor; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index b89d14c0352c..a51ae9b165e0 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -506,7 +506,6 @@ int main(void) DEFINE(VCPU_PURR, offsetof(struct kvm_vcpu, arch.purr)); DEFINE(VCPU_SPURR, offsetof(struct kvm_vcpu, arch.spurr)); DEFINE(VCPU_IC, offsetof(struct kvm_vcpu, arch.ic)); - DEFINE(VCPU_VTB, offsetof(struct kvm_vcpu, arch.vtb)); DEFINE(VCPU_DSCR, offsetof(struct kvm_vcpu, arch.dscr)); DEFINE(VCPU_AMR, offsetof(struct kvm_vcpu, arch.amr)); DEFINE(VCPU_UAMOR, offsetof(struct kvm_vcpu, arch.uamor)); @@ -557,6 +556,7 @@ int main(void) DEFINE(VCORE_LPCR, offsetof(struct kvmppc_vcore, lpcr)); DEFINE(VCORE_PCR, offsetof(struct kvmppc_vcore, pcr)); DEFINE(VCORE_DPDES, offsetof(struct kvmppc_vcore, dpdes)); + DEFINE(VCORE_VTB, offsetof(struct kvmppc_vcore, vtb)); DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige)); DEFINE(VCPU_SLB_V, offsetof(struct kvmppc_slb, origv)); DEFINE(VCPU_SLB_SIZE, sizeof(struct kvmppc_slb)); diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index ba231a1d43d4..b6952dd23152 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -599,9 +599,6 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_BESCR: *val = get_reg_val(id, vcpu->arch.bescr); break; - case KVM_REG_PPC_VTB: - *val = get_reg_val(id, vcpu->arch.vtb); - break; case KVM_REG_PPC_IC: *val = get_reg_val(id, vcpu->arch.ic); break; @@ -673,9 +670,6 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_BESCR: vcpu->arch.bescr = set_reg_val(id, *val); break; - case KVM_REG_PPC_VTB: - vcpu->arch.vtb = set_reg_val(id, *val); - break; case KVM_REG_PPC_IC: vcpu->arch.ic = set_reg_val(id, *val); break; diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 2afdb9c0937d..7d9e4ed2e415 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -579,7 +579,7 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val *spr_val = vcpu->arch.spurr; break; case SPRN_VTB: - *spr_val = vcpu->arch.vtb; + *spr_val = to_book3s(vcpu)->vtb; break; case SPRN_IC: *spr_val = vcpu->arch.ic; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 9b3bba643b43..c4f8971c954d 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1199,6 +1199,9 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_DPDES: *val = get_reg_val(id, vcpu->arch.vcore->dpdes); break; + case KVM_REG_PPC_VTB: + *val = get_reg_val(id, vcpu->arch.vcore->vtb); + break; case KVM_REG_PPC_DAWR: *val = get_reg_val(id, vcpu->arch.dawr); break; @@ -1391,6 +1394,9 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_DPDES: vcpu->arch.vcore->dpdes = set_reg_val(id, *val); break; + case KVM_REG_PPC_VTB: + vcpu->arch.vcore->vtb = set_reg_val(id, *val); + break; case KVM_REG_PPC_DAWR: vcpu->arch.dawr = set_reg_val(id, *val); break; @@ -2213,9 +2219,11 @@ static bool can_piggyback_subcore(struct kvmppc_vcore *pvc, pvc->lpcr != vc->lpcr) return false; - /* P8 guest with > 1 thread per core would see wrong TIR value */ - if (cpu_has_feature(CPU_FTR_ARCH_207S) && - (vc->num_threads > 1 || pvc->num_threads > 1)) + /* + * P8 guests can't do piggybacking, because then the + * VTB would be shared between the vcpus. + */ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) return false; n_thr = cip->subcore_threads[sub] + pvc->num_threads; diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 7cc924b5eea2..c3c1d1bcfc67 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -644,9 +644,11 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_207S) 38: BEGIN_FTR_SECTION - /* DPDES is shared between threads */ + /* DPDES and VTB are shared between threads */ ld r8, VCORE_DPDES(r5) + ld r7, VCORE_VTB(r5) mtspr SPRN_DPDES, r8 + mtspr SPRN_VTB, r7 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) /* Mark the subcore state as inside guest */ @@ -806,10 +808,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) mtspr SPRN_CIABR, r7 mtspr SPRN_TAR, r8 ld r5, VCPU_IC(r4) - ld r6, VCPU_VTB(r4) - mtspr SPRN_IC, r5 - mtspr SPRN_VTB, r6 ld r8, VCPU_EBBHR(r4) + mtspr SPRN_IC, r5 mtspr SPRN_EBBHR, r8 ld r5, VCPU_EBBRR(r4) ld r6, VCPU_BESCR(r4) @@ -1334,10 +1334,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) stw r6, VCPU_PSPB(r9) std r7, VCPU_FSCR(r9) mfspr r5, SPRN_IC - mfspr r6, SPRN_VTB mfspr r7, SPRN_TAR std r5, VCPU_IC(r9) - std r6, VCPU_VTB(r9) std r7, VCPU_TAR(r9) mfspr r8, SPRN_EBBHR std r8, VCPU_EBBHR(r9) @@ -1564,9 +1562,11 @@ kvmhv_switch_to_host: isync BEGIN_FTR_SECTION - /* DPDES is shared between threads */ + /* DPDES and VTB are shared between threads */ mfspr r7, SPRN_DPDES + mfspr r8, SPRN_VTB std r7, VCORE_DPDES(r5) + std r8, VCORE_VTB(r5) /* clear DPDES so we don't get guest doorbells in the host */ li r8, 0 mtspr SPRN_DPDES, r8 diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index e76f79a45988..69ebd70160f2 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -226,7 +226,7 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu, */ vcpu->arch.purr += get_tb() - vcpu->arch.entry_tb; vcpu->arch.spurr += get_tb() - vcpu->arch.entry_tb; - vcpu->arch.vtb += get_vtb() - vcpu->arch.entry_vtb; + to_book3s(vcpu)->vtb += get_vtb() - vcpu->arch.entry_vtb; if (cpu_has_feature(CPU_FTR_ARCH_207S)) vcpu->arch.ic += mfspr(SPRN_IC) - vcpu->arch.entry_ic; svcpu->in_use = false; @@ -1361,6 +1361,9 @@ static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_HIOR: *val = get_reg_val(id, to_book3s(vcpu)->hior); break; + case KVM_REG_PPC_VTB: + *val = get_reg_val(id, to_book3s(vcpu)->vtb); + break; case KVM_REG_PPC_LPCR: case KVM_REG_PPC_LPCR_64: /* @@ -1397,6 +1400,9 @@ static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id, to_book3s(vcpu)->hior = set_reg_val(id, *val); to_book3s(vcpu)->hior_explicit = true; break; + case KVM_REG_PPC_VTB: + to_book3s(vcpu)->vtb = set_reg_val(id, *val); + break; case KVM_REG_PPC_LPCR: case KVM_REG_PPC_LPCR_64: kvmppc_set_lpcr_pr(vcpu, set_reg_val(id, *val)); -- cgit v1.2.3 From 4f053d06dce6cbc82806d9baff24fb3add877205 Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Fri, 16 Sep 2016 17:25:50 +1000 Subject: KVM: PPC: Book3S: Remove duplicate setting of the B field in tlbie Remove duplicate setting of the the "B" field when doing a tlbie(l). In compute_tlbie_rb(), the "B" field is set again just before returning the rb value to be used for tlbie(l). Signed-off-by: Balbir Singh Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_book3s_64.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 4ffd5a1e788d..848292176908 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -125,7 +125,6 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, /* This covers 14..54 bits of va*/ rb = (v & ~0x7fUL) << 16; /* AVA field */ - rb |= (v >> HPTE_V_SSIZE_SHIFT) << 8; /* B field */ /* * AVA in v had cleared lower 23 bits. We need to derive * that from pteg index @@ -177,7 +176,7 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, break; } } - rb |= (v >> 54) & 0x300; /* B field */ + rb |= (v >> HPTE_V_SSIZE_SHIFT) << 8; /* B field */ return rb; } -- cgit v1.2.3 From fa73c3b25bd8d0d393dc6109a1dba3c2aef0451e Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Wed, 21 Sep 2016 15:06:45 +0200 Subject: KVM: PPC: Book3s PR: Allow access to unprivileged MMCR2 register The MMCR2 register is available twice, one time with number 785 (privileged access), and one time with number 769 (unprivileged, but it can be disabled completely). In former times, the Linux kernel was using the unprivileged register 769 only, but since commit 8dd75ccb571f3c92c ("powerpc: Use privileged SPR number for MMCR2"), it uses the privileged register 785 instead. The KVM-PR code then of course also switched to use the SPR 785, but this is causing older guest kernels to crash, since these kernels still access 769 instead. So to support older kernels with KVM-PR again, we have to support register 769 in KVM-PR, too. Fixes: 8dd75ccb571f3c92c48014b3dabd3d51a115ab41 Cc: stable@vger.kernel.org # v3.10+ Signed-off-by: Thomas Huth Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/reg.h | 1 + arch/powerpc/kvm/book3s_emulate.c | 2 ++ 2 files changed, 3 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index f69f40f1519a..978dada662ae 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -737,6 +737,7 @@ #define MMCR0_FCHV 0x00000001UL /* freeze conditions in hypervisor mode */ #define SPRN_MMCR1 798 #define SPRN_MMCR2 785 +#define SPRN_UMMCR2 769 #define SPRN_MMCRA 0x312 #define MMCRA_SDSYNC 0x80000000UL /* SDAR synced with SIAR */ #define MMCRA_SDAR_DCACHE_MISS 0x40000000UL diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 7d9e4ed2e415..8359752b3efc 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -498,6 +498,7 @@ int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) case SPRN_MMCR0: case SPRN_MMCR1: case SPRN_MMCR2: + case SPRN_UMMCR2: #endif break; unprivileged: @@ -640,6 +641,7 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val case SPRN_MMCR0: case SPRN_MMCR1: case SPRN_MMCR2: + case SPRN_UMMCR2: case SPRN_TIR: #endif *spr_val = 0; -- cgit v1.2.3