diff options
Diffstat (limited to 'virt')
-rw-r--r-- | virt/kvm/arm/arch_timer.c | 124 | ||||
-rw-r--r-- | virt/kvm/arm/hyp/vgic-v2-sr.c | 78 | ||||
-rw-r--r-- | virt/kvm/arm/hyp/vgic-v3-sr.c | 87 | ||||
-rw-r--r-- | virt/kvm/arm/pmu.c | 39 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-init.c | 127 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-its.c | 109 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-mmio-v2.c | 20 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-mmio.c | 32 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-v2.c | 101 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-v3.c | 92 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic.c | 64 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic.h | 18 | ||||
-rw-r--r-- | virt/kvm/eventfd.c | 7 | ||||
-rw-r--r-- | virt/kvm/irqchip.c | 16 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 164 | ||||
-rw-r--r-- | virt/kvm/vfio.c | 105 |
16 files changed, 731 insertions, 452 deletions
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 35d7100e0815..5976609ef27c 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -184,28 +184,47 @@ bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) return cval <= now; } +/* + * Reflect the timer output level into the kvm_run structure + */ +void kvm_timer_update_run(struct kvm_vcpu *vcpu) +{ + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); + struct kvm_sync_regs *regs = &vcpu->run->s.regs; + + /* Populate the device bitmap with the timer states */ + regs->device_irq_level &= ~(KVM_ARM_DEV_EL1_VTIMER | + KVM_ARM_DEV_EL1_PTIMER); + if (vtimer->irq.level) + regs->device_irq_level |= KVM_ARM_DEV_EL1_VTIMER; + if (ptimer->irq.level) + regs->device_irq_level |= KVM_ARM_DEV_EL1_PTIMER; +} + static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, struct arch_timer_context *timer_ctx) { int ret; - BUG_ON(!vgic_initialized(vcpu->kvm)); - timer_ctx->active_cleared_last = false; timer_ctx->irq.level = new_level; trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq, timer_ctx->irq.level); - ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, timer_ctx->irq.irq, - timer_ctx->irq.level); - WARN_ON(ret); + if (likely(irqchip_in_kernel(vcpu->kvm))) { + ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, + timer_ctx->irq.irq, + timer_ctx->irq.level); + WARN_ON(ret); + } } /* * Check if there was a change in the timer state (should we raise or lower * the line level to the GIC). */ -static int kvm_timer_update_state(struct kvm_vcpu *vcpu) +static void kvm_timer_update_state(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); @@ -217,16 +236,14 @@ static int kvm_timer_update_state(struct kvm_vcpu *vcpu) * because the guest would never see the interrupt. Instead wait * until we call this function from kvm_timer_flush_hwstate. */ - if (!vgic_initialized(vcpu->kvm) || !timer->enabled) - return -ENODEV; + if (unlikely(!timer->enabled)) + return; if (kvm_timer_should_fire(vtimer) != vtimer->irq.level) kvm_timer_update_irq(vcpu, !vtimer->irq.level, vtimer); if (kvm_timer_should_fire(ptimer) != ptimer->irq.level) kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer); - - return 0; } /* Schedule the background timer for the emulated timer. */ @@ -286,25 +303,12 @@ void kvm_timer_unschedule(struct kvm_vcpu *vcpu) timer_disarm(timer); } -/** - * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu - * @vcpu: The vcpu pointer - * - * Check if the virtual timer has expired while we were running in the host, - * and inject an interrupt if that was the case. - */ -void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) +static void kvm_timer_flush_hwstate_vgic(struct kvm_vcpu *vcpu) { struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); bool phys_active; int ret; - if (kvm_timer_update_state(vcpu)) - return; - - /* Set the background timer for the physical timer emulation. */ - kvm_timer_emulate(vcpu, vcpu_ptimer(vcpu)); - /* * If we enter the guest with the virtual input level to the VGIC * asserted, then we have already told the VGIC what we need to, and @@ -356,11 +360,72 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) vtimer->active_cleared_last = !phys_active; } +bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu) +{ + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); + struct kvm_sync_regs *sregs = &vcpu->run->s.regs; + bool vlevel, plevel; + + if (likely(irqchip_in_kernel(vcpu->kvm))) + return false; + + vlevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_VTIMER; + plevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_PTIMER; + + return vtimer->irq.level != vlevel || + ptimer->irq.level != plevel; +} + +static void kvm_timer_flush_hwstate_user(struct kvm_vcpu *vcpu) +{ + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + + /* + * To prevent continuously exiting from the guest, we mask the + * physical interrupt such that the guest can make forward progress. + * Once we detect the output level being deasserted, we unmask the + * interrupt again so that we exit from the guest when the timer + * fires. + */ + if (vtimer->irq.level) + disable_percpu_irq(host_vtimer_irq); + else + enable_percpu_irq(host_vtimer_irq, 0); +} + +/** + * kvm_timer_flush_hwstate - prepare timers before running the vcpu + * @vcpu: The vcpu pointer + * + * Check if the virtual timer has expired while we were running in the host, + * and inject an interrupt if that was the case, making sure the timer is + * masked or disabled on the host so that we keep executing. Also schedule a + * software timer for the physical timer if it is enabled. + */ +void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + + if (unlikely(!timer->enabled)) + return; + + kvm_timer_update_state(vcpu); + + /* Set the background timer for the physical timer emulation. */ + kvm_timer_emulate(vcpu, vcpu_ptimer(vcpu)); + + if (unlikely(!irqchip_in_kernel(vcpu->kvm))) + kvm_timer_flush_hwstate_user(vcpu); + else + kvm_timer_flush_hwstate_vgic(vcpu); +} + /** * kvm_timer_sync_hwstate - sync timer state from cpu * @vcpu: The vcpu pointer * - * Check if the virtual timer has expired while we were running in the guest, + * Check if any of the timers have expired while we were running in the guest, * and inject an interrupt if that was the case. */ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) @@ -560,6 +625,13 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) if (timer->enabled) return 0; + /* Without a VGIC we do not map virtual IRQs to physical IRQs */ + if (!irqchip_in_kernel(vcpu->kvm)) + goto no_vgic; + + if (!vgic_initialized(vcpu->kvm)) + return -ENODEV; + /* * Find the physical IRQ number corresponding to the host_vtimer_irq */ @@ -583,8 +655,8 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) if (ret) return ret; +no_vgic: timer->enabled = 1; - return 0; } diff --git a/virt/kvm/arm/hyp/vgic-v2-sr.c b/virt/kvm/arm/hyp/vgic-v2-sr.c index c8aeb7b91ec8..a3f18d362366 100644 --- a/virt/kvm/arm/hyp/vgic-v2-sr.c +++ b/virt/kvm/arm/hyp/vgic-v2-sr.c @@ -22,49 +22,6 @@ #include <asm/kvm_emulate.h> #include <asm/kvm_hyp.h> -static void __hyp_text save_maint_int_state(struct kvm_vcpu *vcpu, - void __iomem *base) -{ - struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - int nr_lr = (kern_hyp_va(&kvm_vgic_global_state))->nr_lr; - u32 eisr0, eisr1; - int i; - bool expect_mi; - - expect_mi = !!(cpu_if->vgic_hcr & GICH_HCR_UIE); - - for (i = 0; i < nr_lr; i++) { - if (!(vcpu->arch.vgic_cpu.live_lrs & (1UL << i))) - continue; - - expect_mi |= (!(cpu_if->vgic_lr[i] & GICH_LR_HW) && - (cpu_if->vgic_lr[i] & GICH_LR_EOI)); - } - - if (expect_mi) { - cpu_if->vgic_misr = readl_relaxed(base + GICH_MISR); - - if (cpu_if->vgic_misr & GICH_MISR_EOI) { - eisr0 = readl_relaxed(base + GICH_EISR0); - if (unlikely(nr_lr > 32)) - eisr1 = readl_relaxed(base + GICH_EISR1); - else - eisr1 = 0; - } else { - eisr0 = eisr1 = 0; - } - } else { - cpu_if->vgic_misr = 0; - eisr0 = eisr1 = 0; - } - -#ifdef CONFIG_CPU_BIG_ENDIAN - cpu_if->vgic_eisr = ((u64)eisr0 << 32) | eisr1; -#else - cpu_if->vgic_eisr = ((u64)eisr1 << 32) | eisr0; -#endif -} - static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base) { struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; @@ -87,13 +44,10 @@ static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base) static void __hyp_text save_lrs(struct kvm_vcpu *vcpu, void __iomem *base) { struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - int nr_lr = (kern_hyp_va(&kvm_vgic_global_state))->nr_lr; int i; + u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; - for (i = 0; i < nr_lr; i++) { - if (!(vcpu->arch.vgic_cpu.live_lrs & (1UL << i))) - continue; - + for (i = 0; i < used_lrs; i++) { if (cpu_if->vgic_elrsr & (1UL << i)) cpu_if->vgic_lr[i] &= ~GICH_LR_STATE; else @@ -110,26 +64,20 @@ void __hyp_text __vgic_v2_save_state(struct kvm_vcpu *vcpu) struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; struct vgic_dist *vgic = &kvm->arch.vgic; void __iomem *base = kern_hyp_va(vgic->vctrl_base); + u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; if (!base) return; - cpu_if->vgic_vmcr = readl_relaxed(base + GICH_VMCR); - - if (vcpu->arch.vgic_cpu.live_lrs) { + if (used_lrs) { cpu_if->vgic_apr = readl_relaxed(base + GICH_APR); - save_maint_int_state(vcpu, base); save_elrsr(vcpu, base); save_lrs(vcpu, base); writel_relaxed(0, base + GICH_HCR); - - vcpu->arch.vgic_cpu.live_lrs = 0; } else { - cpu_if->vgic_eisr = 0; cpu_if->vgic_elrsr = ~0UL; - cpu_if->vgic_misr = 0; cpu_if->vgic_apr = 0; } } @@ -141,32 +89,20 @@ void __hyp_text __vgic_v2_restore_state(struct kvm_vcpu *vcpu) struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; struct vgic_dist *vgic = &kvm->arch.vgic; void __iomem *base = kern_hyp_va(vgic->vctrl_base); - int nr_lr = (kern_hyp_va(&kvm_vgic_global_state))->nr_lr; int i; - u64 live_lrs = 0; + u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; if (!base) return; - - for (i = 0; i < nr_lr; i++) - if (cpu_if->vgic_lr[i] & GICH_LR_STATE) - live_lrs |= 1UL << i; - - if (live_lrs) { + if (used_lrs) { writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR); writel_relaxed(cpu_if->vgic_apr, base + GICH_APR); - for (i = 0; i < nr_lr; i++) { - if (!(live_lrs & (1UL << i))) - continue; - + for (i = 0; i < used_lrs; i++) { writel_relaxed(cpu_if->vgic_lr[i], base + GICH_LR0 + (i * 4)); } } - - writel_relaxed(cpu_if->vgic_vmcr, base + GICH_VMCR); - vcpu->arch.vgic_cpu.live_lrs = live_lrs; } #ifdef CONFIG_ARM64 diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index 3947095cc0a1..bce6037cf01d 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c @@ -118,66 +118,32 @@ static void __hyp_text __gic_v3_set_lr(u64 val, int lr) } } -static void __hyp_text save_maint_int_state(struct kvm_vcpu *vcpu, int nr_lr) -{ - struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; - int i; - bool expect_mi; - - expect_mi = !!(cpu_if->vgic_hcr & ICH_HCR_UIE); - - for (i = 0; i < nr_lr; i++) { - if (!(vcpu->arch.vgic_cpu.live_lrs & (1UL << i))) - continue; - - expect_mi |= (!(cpu_if->vgic_lr[i] & ICH_LR_HW) && - (cpu_if->vgic_lr[i] & ICH_LR_EOI)); - } - - if (expect_mi) { - cpu_if->vgic_misr = read_gicreg(ICH_MISR_EL2); - - if (cpu_if->vgic_misr & ICH_MISR_EOI) - cpu_if->vgic_eisr = read_gicreg(ICH_EISR_EL2); - else - cpu_if->vgic_eisr = 0; - } else { - cpu_if->vgic_misr = 0; - cpu_if->vgic_eisr = 0; - } -} - void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; u64 val; /* * Make sure stores to the GIC via the memory mapped interface * are now visible to the system register interface. */ - if (!cpu_if->vgic_sre) + if (!cpu_if->vgic_sre) { dsb(st); + cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2); + } - cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2); - - if (vcpu->arch.vgic_cpu.live_lrs) { + if (used_lrs) { int i; - u32 max_lr_idx, nr_pri_bits; + u32 nr_pri_bits; cpu_if->vgic_elrsr = read_gicreg(ICH_ELSR_EL2); write_gicreg(0, ICH_HCR_EL2); val = read_gicreg(ICH_VTR_EL2); - max_lr_idx = vtr_to_max_lr_idx(val); nr_pri_bits = vtr_to_nr_pri_bits(val); - save_maint_int_state(vcpu, max_lr_idx + 1); - - for (i = 0; i <= max_lr_idx; i++) { - if (!(vcpu->arch.vgic_cpu.live_lrs & (1UL << i))) - continue; - + for (i = 0; i < used_lrs; i++) { if (cpu_if->vgic_elrsr & (1 << i)) cpu_if->vgic_lr[i] &= ~ICH_LR_STATE; else @@ -205,11 +171,7 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) default: cpu_if->vgic_ap1r[0] = read_gicreg(ICH_AP1R0_EL2); } - - vcpu->arch.vgic_cpu.live_lrs = 0; } else { - cpu_if->vgic_misr = 0; - cpu_if->vgic_eisr = 0; cpu_if->vgic_elrsr = 0xffff; cpu_if->vgic_ap0r[0] = 0; cpu_if->vgic_ap0r[1] = 0; @@ -234,9 +196,9 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; u64 val; - u32 max_lr_idx, nr_pri_bits; - u16 live_lrs = 0; + u32 nr_pri_bits; int i; /* @@ -245,25 +207,19 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) * delivered as a FIQ to the guest, with potentially fatal * consequences. So we must make sure that ICC_SRE_EL1 has * been actually programmed with the value we want before - * starting to mess with the rest of the GIC. + * starting to mess with the rest of the GIC, and VMCR_EL2 in + * particular. */ if (!cpu_if->vgic_sre) { write_gicreg(0, ICC_SRE_EL1); isb(); + write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2); } val = read_gicreg(ICH_VTR_EL2); - max_lr_idx = vtr_to_max_lr_idx(val); nr_pri_bits = vtr_to_nr_pri_bits(val); - for (i = 0; i <= max_lr_idx; i++) { - if (cpu_if->vgic_lr[i] & ICH_LR_STATE) - live_lrs |= (1 << i); - } - - write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2); - - if (live_lrs) { + if (used_lrs) { write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); switch (nr_pri_bits) { @@ -286,12 +242,8 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2); } - for (i = 0; i <= max_lr_idx; i++) { - if (!(live_lrs & (1 << i))) - continue; - + for (i = 0; i < used_lrs; i++) __gic_v3_set_lr(cpu_if->vgic_lr[i], i); - } } /* @@ -303,7 +255,6 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) isb(); dsb(sy); } - vcpu->arch.vgic_cpu.live_lrs = live_lrs; /* * Prevent the guest from touching the GIC system registers if @@ -326,3 +277,13 @@ u64 __hyp_text __vgic_v3_get_ich_vtr_el2(void) { return read_gicreg(ICH_VTR_EL2); } + +u64 __hyp_text __vgic_v3_read_vmcr(void) +{ + return read_gicreg(ICH_VMCR_EL2); +} + +void __hyp_text __vgic_v3_write_vmcr(u32 vmcr) +{ + write_gicreg(vmcr, ICH_VMCR_EL2); +} diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index 69ccce308458..4b43e7f3b158 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -230,13 +230,44 @@ static void kvm_pmu_update_state(struct kvm_vcpu *vcpu) return; overflow = !!kvm_pmu_overflow_status(vcpu); - if (pmu->irq_level != overflow) { - pmu->irq_level = overflow; - kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, - pmu->irq_num, overflow); + if (pmu->irq_level == overflow) + return; + + pmu->irq_level = overflow; + + if (likely(irqchip_in_kernel(vcpu->kvm))) { + int ret; + ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, + pmu->irq_num, overflow); + WARN_ON(ret); } } +bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu) +{ + struct kvm_pmu *pmu = &vcpu->arch.pmu; + struct kvm_sync_regs *sregs = &vcpu->run->s.regs; + bool run_level = sregs->device_irq_level & KVM_ARM_DEV_PMU; + + if (likely(irqchip_in_kernel(vcpu->kvm))) + return false; + + return pmu->irq_level != run_level; +} + +/* + * Reflect the PMU overflow interrupt output level into the kvm_run structure + */ +void kvm_pmu_update_run(struct kvm_vcpu *vcpu) +{ + struct kvm_sync_regs *regs = &vcpu->run->s.regs; + + /* Populate the timer bitmap for user space */ + regs->device_irq_level &= ~KVM_ARM_DEV_PMU; + if (vcpu->arch.pmu.irq_level) + regs->device_irq_level |= KVM_ARM_DEV_PMU; +} + /** * kvm_pmu_flush_hwstate - flush pmu state to cpu * @vcpu: The vcpu pointer diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index 276139a24e6f..25fd1b942c11 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -24,7 +24,12 @@ /* * Initialization rules: there are multiple stages to the vgic - * initialization, both for the distributor and the CPU interfaces. + * initialization, both for the distributor and the CPU interfaces. The basic + * idea is that even though the VGIC is not functional or not requested from + * user space, the critical path of the run loop can still call VGIC functions + * that just won't do anything, without them having to check additional + * initialization flags to ensure they don't look at uninitialized data + * structures. * * Distributor: * @@ -39,23 +44,67 @@ * * CPU Interface: * - * - kvm_vgic_cpu_early_init(): initialization of static data that + * - kvm_vgic_vcpu_early_init(): initialization of static data that * doesn't depend on any sizing information or emulation type. No * allocation is allowed there. */ /* EARLY INIT */ -/* - * Those 2 functions should not be needed anymore but they - * still are called from arm.c +/** + * kvm_vgic_early_init() - Initialize static VGIC VCPU data structures + * @kvm: The VM whose VGIC districutor should be initialized + * + * Only do initialization of static structures that don't require any + * allocation or sizing information from userspace. vgic_init() called + * kvm_vgic_dist_init() which takes care of the rest. */ void kvm_vgic_early_init(struct kvm *kvm) { + struct vgic_dist *dist = &kvm->arch.vgic; + + INIT_LIST_HEAD(&dist->lpi_list_head); + spin_lock_init(&dist->lpi_list_lock); } +/** + * kvm_vgic_vcpu_early_init() - Initialize static VGIC VCPU data structures + * @vcpu: The VCPU whose VGIC data structures whould be initialized + * + * Only do initialization, but do not actually enable the VGIC CPU interface + * yet. + */ void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu) { + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + int i; + + INIT_LIST_HEAD(&vgic_cpu->ap_list_head); + spin_lock_init(&vgic_cpu->ap_list_lock); + + /* + * Enable and configure all SGIs to be edge-triggered and + * configure all PPIs as level-triggered. + */ + for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) { + struct vgic_irq *irq = &vgic_cpu->private_irqs[i]; + + INIT_LIST_HEAD(&irq->ap_list); + spin_lock_init(&irq->irq_lock); + irq->intid = i; + irq->vcpu = NULL; + irq->target_vcpu = vcpu; + irq->targets = 1U << vcpu->vcpu_id; + kref_init(&irq->refcount); + if (vgic_irq_is_sgi(i)) { + /* SGIs */ + irq->enabled = 1; + irq->config = VGIC_CONFIG_EDGE; + } else { + /* PPIs */ + irq->config = VGIC_CONFIG_LEVEL; + } + } } /* CREATION */ @@ -148,9 +197,6 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis) struct kvm_vcpu *vcpu0 = kvm_get_vcpu(kvm, 0); int i; - INIT_LIST_HEAD(&dist->lpi_list_head); - spin_lock_init(&dist->lpi_list_lock); - dist->spis = kcalloc(nr_spis, sizeof(struct vgic_irq), GFP_KERNEL); if (!dist->spis) return -ENOMEM; @@ -181,41 +227,11 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis) } /** - * kvm_vgic_vcpu_init: initialize the vcpu data structures and - * enable the VCPU interface - * @vcpu: the VCPU which's VGIC should be initialized + * kvm_vgic_vcpu_init() - Enable the VCPU interface + * @vcpu: the VCPU which's VGIC should be enabled */ static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) { - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - int i; - - INIT_LIST_HEAD(&vgic_cpu->ap_list_head); - spin_lock_init(&vgic_cpu->ap_list_lock); - - /* - * Enable and configure all SGIs to be edge-triggered and - * configure all PPIs as level-triggered. - */ - for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) { - struct vgic_irq *irq = &vgic_cpu->private_irqs[i]; - - INIT_LIST_HEAD(&irq->ap_list); - spin_lock_init(&irq->irq_lock); - irq->intid = i; - irq->vcpu = NULL; - irq->target_vcpu = vcpu; - irq->targets = 1U << vcpu->vcpu_id; - kref_init(&irq->refcount); - if (vgic_irq_is_sgi(i)) { - /* SGIs */ - irq->enabled = 1; - irq->config = VGIC_CONFIG_EDGE; - } else { - /* PPIs */ - irq->config = VGIC_CONFIG_LEVEL; - } - } if (kvm_vgic_global_state.type == VGIC_V2) vgic_v2_enable(vcpu); else @@ -262,6 +278,18 @@ int vgic_init(struct kvm *kvm) vgic_debug_init(kvm); dist->initialized = true; + + /* + * If we're initializing GICv2 on-demand when first running the VCPU + * then we need to load the VGIC state onto the CPU. We can detect + * this easily by checking if we are in between vcpu_load and vcpu_put + * when we just initialized the VGIC. + */ + preempt_disable(); + vcpu = kvm_arm_get_running_vcpu(); + if (vcpu) + kvm_vgic_load(vcpu); + preempt_enable(); out: return ret; } @@ -392,6 +420,25 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data) } /** + * kvm_vgic_init_cpu_hardware - initialize the GIC VE hardware + * + * For a specific CPU, initialize the GIC VE hardware. + */ +void kvm_vgic_init_cpu_hardware(void) +{ + BUG_ON(preemptible()); + + /* + * We want to make sure the list registers start out clear so that we + * only have the program the used registers. + */ + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_init_lrs(); + else + kvm_call_hyp(__vgic_v3_init_lrs); +} + +/** * kvm_vgic_hyp_init: populates the kvm_vgic_global_state variable * according to the host GIC model. Accordingly calls either * vgic_v2/v3_probe which registers the KVM_DEVICE that can be diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 571b64a01c50..8d1da1af4b09 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -360,29 +360,6 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu) return ret; } -static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu, - struct vgic_its *its, - gpa_t addr, unsigned int len) -{ - u32 reg = 0; - - mutex_lock(&its->cmd_lock); - if (its->creadr == its->cwriter) - reg |= GITS_CTLR_QUIESCENT; - if (its->enabled) - reg |= GITS_CTLR_ENABLE; - mutex_unlock(&its->cmd_lock); - - return reg; -} - -static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its, - gpa_t addr, unsigned int len, - unsigned long val) -{ - its->enabled = !!(val & GITS_CTLR_ENABLE); -} - static unsigned long vgic_mmio_read_its_typer(struct kvm *kvm, struct vgic_its *its, gpa_t addr, unsigned int len) @@ -1161,33 +1138,16 @@ static void vgic_mmio_write_its_cbaser(struct kvm *kvm, struct vgic_its *its, #define ITS_CMD_SIZE 32 #define ITS_CMD_OFFSET(reg) ((reg) & GENMASK(19, 5)) -/* - * By writing to CWRITER the guest announces new commands to be processed. - * To avoid any races in the first place, we take the its_cmd lock, which - * protects our ring buffer variables, so that there is only one user - * per ITS handling commands at a given time. - */ -static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its, - gpa_t addr, unsigned int len, - unsigned long val) +/* Must be called with the cmd_lock held. */ +static void vgic_its_process_commands(struct kvm *kvm, struct vgic_its *its) { gpa_t cbaser; u64 cmd_buf[4]; - u32 reg; - if (!its) - return; - - mutex_lock(&its->cmd_lock); - - reg = update_64bit_reg(its->cwriter, addr & 7, len, val); - reg = ITS_CMD_OFFSET(reg); - if (reg >= ITS_CMD_BUFFER_SIZE(its->cbaser)) { - mutex_unlock(&its->cmd_lock); + /* Commands are only processed when the ITS is enabled. */ + if (!its->enabled) return; - } - its->cwriter = reg; cbaser = CBASER_ADDRESS(its->cbaser); while (its->cwriter != its->creadr) { @@ -1207,6 +1167,34 @@ static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its, if (its->creadr == ITS_CMD_BUFFER_SIZE(its->cbaser)) its->creadr = 0; } +} + +/* + * By writing to CWRITER the guest announces new commands to be processed. + * To avoid any races in the first place, we take the its_cmd lock, which + * protects our ring buffer variables, so that there is only one user + * per ITS handling commands at a given time. + */ +static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u64 reg; + + if (!its) + return; + + mutex_lock(&its->cmd_lock); + + reg = update_64bit_reg(its->cwriter, addr & 7, len, val); + reg = ITS_CMD_OFFSET(reg); + if (reg >= ITS_CMD_BUFFER_SIZE(its->cbaser)) { + mutex_unlock(&its->cmd_lock); + return; + } + its->cwriter = reg; + + vgic_its_process_commands(kvm, its); mutex_unlock(&its->cmd_lock); } @@ -1287,6 +1275,39 @@ static void vgic_mmio_write_its_baser(struct kvm *kvm, *regptr = reg; } +static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu, + struct vgic_its *its, + gpa_t addr, unsigned int len) +{ + u32 reg = 0; + + mutex_lock(&its->cmd_lock); + if (its->creadr == its->cwriter) + reg |= GITS_CTLR_QUIESCENT; + if (its->enabled) + reg |= GITS_CTLR_ENABLE; + mutex_unlock(&its->cmd_lock); + + return reg; +} + +static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val) +{ + mutex_lock(&its->cmd_lock); + + its->enabled = !!(val & GITS_CTLR_ENABLE); + + /* + * Try to process any pending commands. This function bails out early + * if the ITS is disabled or no commands have been queued. + */ + vgic_its_process_commands(kvm, its); + + mutex_unlock(&its->cmd_lock); +} + #define REGISTER_ITS_DESC(off, rd, wr, length, acc) \ { \ .reg_offset = off, \ diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c index a3ad7ff95c9b..0a4283ed9aa7 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v2.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c @@ -229,7 +229,15 @@ static unsigned long vgic_mmio_read_vcpuif(struct kvm_vcpu *vcpu, val = vmcr.ctlr; break; case GIC_CPU_PRIMASK: - val = vmcr.pmr; + /* + * Our KVM_DEV_TYPE_ARM_VGIC_V2 device ABI exports the + * the PMR field as GICH_VMCR.VMPriMask rather than + * GICC_PMR.Priority, so we expose the upper five bits of + * priority mask to userspace using the lower bits in the + * unsigned long. + */ + val = (vmcr.pmr & GICV_PMR_PRIORITY_MASK) >> + GICV_PMR_PRIORITY_SHIFT; break; case GIC_CPU_BINPOINT: val = vmcr.bpr; @@ -262,7 +270,15 @@ static void vgic_mmio_write_vcpuif(struct kvm_vcpu *vcpu, vmcr.ctlr = val; break; case GIC_CPU_PRIMASK: - vmcr.pmr = val; + /* + * Our KVM_DEV_TYPE_ARM_VGIC_V2 device ABI exports the + * the PMR field as GICH_VMCR.VMPriMask rather than + * GICC_PMR.Priority, so we expose the upper five bits of + * priority mask to userspace using the lower bits in the + * unsigned long. + */ + vmcr.pmr = (val << GICV_PMR_PRIORITY_SHIFT) & + GICV_PMR_PRIORITY_MASK; break; case GIC_CPU_BINPOINT: vmcr.bpr = val; diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c index 3654b4c835ef..2a5db1352722 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.c +++ b/virt/kvm/arm/vgic/vgic-mmio.c @@ -180,21 +180,37 @@ unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu, static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq, bool new_active_state) { + struct kvm_vcpu *requester_vcpu; spin_lock(&irq->irq_lock); + + /* + * The vcpu parameter here can mean multiple things depending on how + * this function is called; when handling a trap from the kernel it + * depends on the GIC version, and these functions are also called as + * part of save/restore from userspace. + * + * Therefore, we have to figure out the requester in a reliable way. + * + * When accessing VGIC state from user space, the requester_vcpu is + * NULL, which is fine, because we guarantee that no VCPUs are running + * when accessing VGIC state from user space so irq->vcpu->cpu is + * always -1. + */ + requester_vcpu = kvm_arm_get_running_vcpu(); + /* * If this virtual IRQ was written into a list register, we * have to make sure the CPU that runs the VCPU thread has - * synced back LR state to the struct vgic_irq. We can only - * know this for sure, when either this irq is not assigned to - * anyone's AP list anymore, or the VCPU thread is not - * running on any CPUs. + * synced back the LR state to the struct vgic_irq. * - * In the opposite case, we know the VCPU thread may be on its - * way back from the guest and still has to sync back this - * IRQ, so we release and re-acquire the spin_lock to let the - * other thread sync back the IRQ. + * As long as the conditions below are true, we know the VCPU thread + * may be on its way back from the guest (we kicked the VCPU thread in + * vgic_change_active_prepare) and still has to sync back this IRQ, + * so we release and re-acquire the spin_lock to let the other thread + * sync back the IRQ. */ while (irq->vcpu && /* IRQ may have state in an LR somewhere */ + irq->vcpu != requester_vcpu && /* Current thread is not the VCPU thread */ irq->vcpu->cpu != -1) /* VCPU thread is running */ cond_resched_lock(&irq->irq_lock); diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index b834ecdf3225..a65757aab6d3 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -22,52 +22,19 @@ #include "vgic.h" -/* - * Call this function to convert a u64 value to an unsigned long * bitmask - * in a way that works on both 32-bit and 64-bit LE and BE platforms. - * - * Warning: Calling this function may modify *val. - */ -static unsigned long *u64_to_bitmask(u64 *val) +static inline void vgic_v2_write_lr(int lr, u32 val) { -#if defined(CONFIG_CPU_BIG_ENDIAN) && BITS_PER_LONG == 32 - *val = (*val >> 32) | (*val << 32); -#endif - return (unsigned long *)val; + void __iomem *base = kvm_vgic_global_state.vctrl_base; + + writel_relaxed(val, base + GICH_LR0 + (lr * 4)); } -void vgic_v2_process_maintenance(struct kvm_vcpu *vcpu) +void vgic_v2_init_lrs(void) { - struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2; + int i; - if (cpuif->vgic_misr & GICH_MISR_EOI) { - u64 eisr = cpuif->vgic_eisr; - unsigned long *eisr_bmap = u64_to_bitmask(&eisr); - int lr; - - for_each_set_bit(lr, eisr_bmap, kvm_vgic_global_state.nr_lr) { - u32 intid = cpuif->vgic_lr[lr] & GICH_LR_VIRTUALID; - - WARN_ON(cpuif->vgic_lr[lr] & GICH_LR_STATE); - - /* Only SPIs require notification */ - if (vgic_valid_spi(vcpu->kvm, intid)) - kvm_notify_acked_irq(vcpu->kvm, 0, - intid - VGIC_NR_PRIVATE_IRQS); - } - } - - /* check and disable underflow maintenance IRQ */ - cpuif->vgic_hcr &= ~GICH_HCR_UIE; - - /* - * In the next iterations of the vcpu loop, if we sync the - * vgic state after flushing it, but before entering the guest - * (this happens for pending signals and vmid rollovers), then - * make sure we don't pick up any old maintenance interrupts - * here. - */ - cpuif->vgic_eisr = 0; + for (i = 0; i < kvm_vgic_global_state.nr_lr; i++) + vgic_v2_write_lr(i, 0); } void vgic_v2_set_underflow(struct kvm_vcpu *vcpu) @@ -77,6 +44,12 @@ void vgic_v2_set_underflow(struct kvm_vcpu *vcpu) cpuif->vgic_hcr |= GICH_HCR_UIE; } +static bool lr_signals_eoi_mi(u32 lr_val) +{ + return !(lr_val & GICH_LR_STATE) && (lr_val & GICH_LR_EOI) && + !(lr_val & GICH_LR_HW); +} + /* * transfer the content of the LRs back into the corresponding ap_list: * - active bit is transferred as is @@ -86,14 +59,22 @@ void vgic_v2_set_underflow(struct kvm_vcpu *vcpu) */ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) { - struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2; + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_v2_cpu_if *cpuif = &vgic_cpu->vgic_v2; int lr; - for (lr = 0; lr < vcpu->arch.vgic_cpu.used_lrs; lr++) { + cpuif->vgic_hcr &= ~GICH_HCR_UIE; + + for (lr = 0; lr < vgic_cpu->used_lrs; lr++) { u32 val = cpuif->vgic_lr[lr]; u32 intid = val & GICH_LR_VIRTUALID; struct vgic_irq *irq; + /* Notify fds when the guest EOI'ed a level-triggered SPI */ + if (lr_signals_eoi_mi(val) && vgic_valid_spi(vcpu->kvm, intid)) + kvm_notify_acked_irq(vcpu->kvm, 0, + intid - VGIC_NR_PRIVATE_IRQS); + irq = vgic_get_irq(vcpu->kvm, vcpu, intid); spin_lock(&irq->irq_lock); @@ -126,6 +107,8 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) spin_unlock(&irq->irq_lock); vgic_put_irq(vcpu->kvm, irq); } + + vgic_cpu->used_lrs = 0; } /* @@ -184,6 +167,7 @@ void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr) void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) { + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; u32 vmcr; vmcr = (vmcrp->ctlr << GICH_VMCR_CTRL_SHIFT) & GICH_VMCR_CTRL_MASK; @@ -191,15 +175,18 @@ void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) GICH_VMCR_ALIAS_BINPOINT_MASK; vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) & GICH_VMCR_BINPOINT_MASK; - vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) & - GICH_VMCR_PRIMASK_MASK; + vmcr |= ((vmcrp->pmr >> GICV_PMR_PRIORITY_SHIFT) << + GICH_VMCR_PRIMASK_SHIFT) & GICH_VMCR_PRIMASK_MASK; - vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr; + cpu_if->vgic_vmcr = vmcr; } void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) { - u32 vmcr = vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr; + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; + u32 vmcr; + + vmcr = cpu_if->vgic_vmcr; vmcrp->ctlr = (vmcr & GICH_VMCR_CTRL_MASK) >> GICH_VMCR_CTRL_SHIFT; @@ -207,8 +194,8 @@ void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) GICH_VMCR_ALIAS_BINPOINT_SHIFT; vmcrp->bpr = (vmcr & GICH_VMCR_BINPOINT_MASK) >> GICH_VMCR_BINPOINT_SHIFT; - vmcrp->pmr = (vmcr & GICH_VMCR_PRIMASK_MASK) >> - GICH_VMCR_PRIMASK_SHIFT; + vmcrp->pmr = ((vmcr & GICH_VMCR_PRIMASK_MASK) >> + GICH_VMCR_PRIMASK_SHIFT) << GICV_PMR_PRIORITY_SHIFT; } void vgic_v2_enable(struct kvm_vcpu *vcpu) @@ -375,3 +362,19 @@ out: return ret; } + +void vgic_v2_load(struct kvm_vcpu *vcpu) +{ + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; + struct vgic_dist *vgic = &vcpu->kvm->arch.vgic; + + writel_relaxed(cpu_if->vgic_vmcr, vgic->vctrl_base + GICH_VMCR); +} + +void vgic_v2_put(struct kvm_vcpu *vcpu) +{ + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; + struct vgic_dist *vgic = &vcpu->kvm->arch.vgic; + + cpu_if->vgic_vmcr = readl_relaxed(vgic->vctrl_base + GICH_VMCR); +} diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index edc6ee2dc852..df1503650300 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -21,59 +21,29 @@ #include "vgic.h" -void vgic_v3_process_maintenance(struct kvm_vcpu *vcpu) +void vgic_v3_set_underflow(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3; - u32 model = vcpu->kvm->arch.vgic.vgic_model; - - if (cpuif->vgic_misr & ICH_MISR_EOI) { - unsigned long eisr_bmap = cpuif->vgic_eisr; - int lr; - - for_each_set_bit(lr, &eisr_bmap, kvm_vgic_global_state.nr_lr) { - u32 intid; - u64 val = cpuif->vgic_lr[lr]; - - if (model == KVM_DEV_TYPE_ARM_VGIC_V3) - intid = val & ICH_LR_VIRTUAL_ID_MASK; - else - intid = val & GICH_LR_VIRTUALID; - - WARN_ON(cpuif->vgic_lr[lr] & ICH_LR_STATE); - - /* Only SPIs require notification */ - if (vgic_valid_spi(vcpu->kvm, intid)) - kvm_notify_acked_irq(vcpu->kvm, 0, - intid - VGIC_NR_PRIVATE_IRQS); - } - - /* - * In the next iterations of the vcpu loop, if we sync - * the vgic state after flushing it, but before - * entering the guest (this happens for pending - * signals and vmid rollovers), then make sure we - * don't pick up any old maintenance interrupts here. - */ - cpuif->vgic_eisr = 0; - } - cpuif->vgic_hcr &= ~ICH_HCR_UIE; + cpuif->vgic_hcr |= ICH_HCR_UIE; } -void vgic_v3_set_underflow(struct kvm_vcpu *vcpu) +static bool lr_signals_eoi_mi(u64 lr_val) { - struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3; - - cpuif->vgic_hcr |= ICH_HCR_UIE; + return !(lr_val & ICH_LR_STATE) && (lr_val & ICH_LR_EOI) && + !(lr_val & ICH_LR_HW); } void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) { - struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3; + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_v3_cpu_if *cpuif = &vgic_cpu->vgic_v3; u32 model = vcpu->kvm->arch.vgic.vgic_model; int lr; - for (lr = 0; lr < vcpu->arch.vgic_cpu.used_lrs; lr++) { + cpuif->vgic_hcr &= ~ICH_HCR_UIE; + + for (lr = 0; lr < vgic_cpu->used_lrs; lr++) { u64 val = cpuif->vgic_lr[lr]; u32 intid; struct vgic_irq *irq; @@ -82,6 +52,12 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) intid = val & ICH_LR_VIRTUAL_ID_MASK; else intid = val & GICH_LR_VIRTUALID; + + /* Notify fds when the guest EOI'ed a level-triggered IRQ */ + if (lr_signals_eoi_mi(val) && vgic_valid_spi(vcpu->kvm, intid)) + kvm_notify_acked_irq(vcpu->kvm, 0, + intid - VGIC_NR_PRIVATE_IRQS); + irq = vgic_get_irq(vcpu->kvm, vcpu, intid); if (!irq) /* An LPI could have been unmapped. */ continue; @@ -117,6 +93,8 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) spin_unlock(&irq->irq_lock); vgic_put_irq(vcpu->kvm, irq); } + + vgic_cpu->used_lrs = 0; } /* Requires the irq to be locked already */ @@ -173,6 +151,7 @@ void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr) void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) { + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; u32 vmcr; /* @@ -188,12 +167,15 @@ void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) vmcr |= (vmcrp->grpen0 << ICH_VMCR_ENG0_SHIFT) & ICH_VMCR_ENG0_MASK; vmcr |= (vmcrp->grpen1 << ICH_VMCR_ENG1_SHIFT) & ICH_VMCR_ENG1_MASK; - vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = vmcr; + cpu_if->vgic_vmcr = vmcr; } void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) { - u32 vmcr = vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr; + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + u32 vmcr; + + vmcr = cpu_if->vgic_vmcr; /* * Ignore the FIQen bit, because GIC emulation always implies @@ -229,10 +211,13 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu) /* * If we are emulating a GICv3, we do it in an non-GICv2-compatible * way, so we force SRE to 1 to demonstrate this to the guest. + * Also, we don't support any form of IRQ/FIQ bypass. * This goes with the spec allowing the value to be RAO/WI. */ if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { - vgic_v3->vgic_sre = ICC_SRE_EL1_SRE; + vgic_v3->vgic_sre = (ICC_SRE_EL1_DIB | + ICC_SRE_EL1_DFB | + ICC_SRE_EL1_SRE); vcpu->arch.vgic_cpu.pendbaser = INITIAL_PENDBASER_VALUE; } else { vgic_v3->vgic_sre = 0; @@ -383,3 +368,24 @@ int vgic_v3_probe(const struct gic_kvm_info *info) return 0; } + +void vgic_v3_load(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + + /* + * If dealing with a GICv2 emulation on GICv3, VMCR_EL2.VFIQen + * is dependent on ICC_SRE_EL1.SRE, and we have to perform the + * VMCR_EL2 save/restore in the world switch. + */ + if (likely(cpu_if->vgic_sre)) + kvm_call_hyp(__vgic_v3_write_vmcr, cpu_if->vgic_vmcr); +} + +void vgic_v3_put(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + + if (likely(cpu_if->vgic_sre)) + cpu_if->vgic_vmcr = kvm_call_hyp(__vgic_v3_read_vmcr); +} diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index 654dfd40e449..4346bc7d08dc 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -29,7 +29,9 @@ #define DEBUG_SPINLOCK_BUG_ON(p) #endif -struct vgic_global __section(.hyp.text) kvm_vgic_global_state = {.gicv3_cpuif = STATIC_KEY_FALSE_INIT,}; +struct vgic_global kvm_vgic_global_state __ro_after_init = { + .gicv3_cpuif = STATIC_KEY_FALSE_INIT, +}; /* * Locking order is always: @@ -527,14 +529,6 @@ retry: spin_unlock(&vgic_cpu->ap_list_lock); } -static inline void vgic_process_maintenance_interrupt(struct kvm_vcpu *vcpu) -{ - if (kvm_vgic_global_state.type == VGIC_V2) - vgic_v2_process_maintenance(vcpu); - else - vgic_v3_process_maintenance(vcpu); -} - static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu) { if (kvm_vgic_global_state.type == VGIC_V2) @@ -601,10 +595,8 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu) DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock)); - if (compute_ap_list_depth(vcpu) > kvm_vgic_global_state.nr_lr) { - vgic_set_underflow(vcpu); + if (compute_ap_list_depth(vcpu) > kvm_vgic_global_state.nr_lr) vgic_sort_ap_list(vcpu); - } list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { spin_lock(&irq->irq_lock); @@ -623,8 +615,12 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu) next: spin_unlock(&irq->irq_lock); - if (count == kvm_vgic_global_state.nr_lr) + if (count == kvm_vgic_global_state.nr_lr) { + if (!list_is_last(&irq->ap_list, + &vgic_cpu->ap_list_head)) + vgic_set_underflow(vcpu); break; + } } vcpu->arch.vgic_cpu.used_lrs = count; @@ -637,18 +633,30 @@ next: /* Sync back the hardware VGIC state into our emulation after a guest's run. */ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) { - if (unlikely(!vgic_initialized(vcpu->kvm))) + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + + /* An empty ap_list_head implies used_lrs == 0 */ + if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) return; - vgic_process_maintenance_interrupt(vcpu); - vgic_fold_lr_state(vcpu); + if (vgic_cpu->used_lrs) + vgic_fold_lr_state(vcpu); vgic_prune_ap_list(vcpu); } /* Flush our emulation state into the GIC hardware before entering the guest. */ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) { - if (unlikely(!vgic_initialized(vcpu->kvm))) + /* + * If there are no virtual interrupts active or pending for this + * VCPU, then there is no work to do and we can bail out without + * taking any lock. There is a potential race with someone injecting + * interrupts to the VCPU, but it is a benign race as the VCPU will + * either observe the new interrupt before or after doing this check, + * and introducing additional synchronization mechanism doesn't change + * this. + */ + if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) return; spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock); @@ -656,6 +664,28 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); } +void kvm_vgic_load(struct kvm_vcpu *vcpu) +{ + if (unlikely(!vgic_initialized(vcpu->kvm))) + return; + + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_load(vcpu); + else + vgic_v3_load(vcpu); +} + +void kvm_vgic_put(struct kvm_vcpu *vcpu) +{ + if (unlikely(!vgic_initialized(vcpu->kvm))) + return; + + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_put(vcpu); + else + vgic_v3_put(vcpu); +} + int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index db28f7cadab2..799fd651b260 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -81,11 +81,18 @@ static inline bool irq_is_pending(struct vgic_irq *irq) return irq->pending_latch || irq->line_level; } +/* + * This struct provides an intermediate representation of the fields contained + * in the GICH_VMCR and ICH_VMCR registers, such that code exporting the GIC + * state to userspace can generate either GICv2 or GICv3 CPU interface + * registers regardless of the hardware backed GIC used. + */ struct vgic_vmcr { u32 ctlr; u32 abpr; u32 bpr; - u32 pmr; + u32 pmr; /* Priority mask field in the GICC_PMR and + * ICC_PMR_EL1 priority field format */ /* Below member variable are valid only for GICv3 */ u32 grpen0; u32 grpen1; @@ -112,7 +119,6 @@ void vgic_kick_vcpus(struct kvm *kvm); int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr, phys_addr_t addr, phys_addr_t alignment); -void vgic_v2_process_maintenance(struct kvm_vcpu *vcpu); void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu); void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr); void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr); @@ -130,6 +136,10 @@ int vgic_v2_map_resources(struct kvm *kvm); int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address, enum vgic_type); +void vgic_v2_init_lrs(void); +void vgic_v2_load(struct kvm_vcpu *vcpu); +void vgic_v2_put(struct kvm_vcpu *vcpu); + static inline void vgic_get_irq_kref(struct vgic_irq *irq) { if (irq->intid < VGIC_MIN_LPI) @@ -138,7 +148,6 @@ static inline void vgic_get_irq_kref(struct vgic_irq *irq) kref_get(&irq->refcount); } -void vgic_v3_process_maintenance(struct kvm_vcpu *vcpu); void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu); void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr); void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr); @@ -150,6 +159,9 @@ int vgic_v3_probe(const struct gic_kvm_info *info); int vgic_v3_map_resources(struct kvm *kvm); int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t dist_base_address); +void vgic_v3_load(struct kvm_vcpu *vcpu); +void vgic_v3_put(struct kvm_vcpu *vcpu); + int vgic_register_its_iodevs(struct kvm *kvm); bool vgic_has_its(struct kvm *kvm); int kvm_vgic_register_its_device(void); diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index a29786dd9522..a8d540398bbd 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -490,7 +490,7 @@ void kvm_register_irq_ack_notifier(struct kvm *kvm, mutex_lock(&kvm->irq_lock); hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); mutex_unlock(&kvm->irq_lock); - kvm_vcpu_request_scan_ioapic(kvm); + kvm_arch_post_irq_ack_notifier_list_update(kvm); } void kvm_unregister_irq_ack_notifier(struct kvm *kvm, @@ -500,7 +500,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm, hlist_del_init_rcu(&kian->link); mutex_unlock(&kvm->irq_lock); synchronize_srcu(&kvm->irq_srcu); - kvm_vcpu_request_scan_ioapic(kvm); + kvm_arch_post_irq_ack_notifier_list_update(kvm); } #endif @@ -870,7 +870,8 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx, continue; kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); - kvm->buses[bus_idx]->ioeventfd_count--; + if (kvm->buses[bus_idx]) + kvm->buses[bus_idx]->ioeventfd_count--; ioeventfd_release(p); ret = 0; break; diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index 3bcc9990adf7..31e40c9e81df 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -142,8 +142,8 @@ static int setup_routing_entry(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) { - int r = -EINVAL; struct kvm_kernel_irq_routing_entry *ei; + int r; /* * Do not allow GSI to be mapped to the same irqchip more than once. @@ -153,26 +153,30 @@ static int setup_routing_entry(struct kvm *kvm, if (ei->type != KVM_IRQ_ROUTING_IRQCHIP || ue->type != KVM_IRQ_ROUTING_IRQCHIP || ue->u.irqchip.irqchip == ei->irqchip.irqchip) - return r; + return -EINVAL; e->gsi = ue->gsi; e->type = ue->type; r = kvm_set_routing_entry(kvm, e, ue); if (r) - goto out; + return r; if (e->type == KVM_IRQ_ROUTING_IRQCHIP) rt->chip[e->irqchip.irqchip][e->irqchip.pin] = e->gsi; hlist_add_head(&e->link, &rt->map[e->gsi]); - r = 0; -out: - return r; + + return 0; } void __attribute__((weak)) kvm_arch_irq_routing_update(struct kvm *kvm) { } +bool __weak kvm_arch_can_set_irq_routing(struct kvm *kvm) +{ + return true; +} + int kvm_set_irq_routing(struct kvm *kvm, const struct kvm_irq_routing_entry *ue, unsigned nr, diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a17d78759727..b3d151ee2a67 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -165,6 +165,24 @@ void vcpu_put(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(vcpu_put); +/* TODO: merge with kvm_arch_vcpu_should_kick */ +static bool kvm_request_needs_ipi(struct kvm_vcpu *vcpu, unsigned req) +{ + int mode = kvm_vcpu_exiting_guest_mode(vcpu); + + /* + * We need to wait for the VCPU to reenable interrupts and get out of + * READING_SHADOW_PAGE_TABLES mode. + */ + if (req & KVM_REQUEST_WAIT) + return mode != OUTSIDE_GUEST_MODE; + + /* + * Need to kick a running VCPU, but otherwise there is nothing to do. + */ + return mode == IN_GUEST_MODE; +} + static void ack_flush(void *_completed) { } @@ -174,6 +192,7 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req) int i, cpu, me; cpumask_var_t cpus; bool called = true; + bool wait = req & KVM_REQUEST_WAIT; struct kvm_vcpu *vcpu; zalloc_cpumask_var(&cpus, GFP_ATOMIC); @@ -183,17 +202,17 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req) kvm_make_request(req, vcpu); cpu = vcpu->cpu; - /* Set ->requests bit before we read ->mode. */ - smp_mb__after_atomic(); + if (!(req & KVM_REQUEST_NO_WAKEUP) && kvm_vcpu_wake_up(vcpu)) + continue; if (cpus != NULL && cpu != -1 && cpu != me && - kvm_vcpu_exiting_guest_mode(vcpu) != OUTSIDE_GUEST_MODE) + kvm_request_needs_ipi(vcpu, req)) cpumask_set_cpu(cpu, cpus); } if (unlikely(cpus == NULL)) - smp_call_function_many(cpu_online_mask, ack_flush, NULL, 1); + smp_call_function_many(cpu_online_mask, ack_flush, NULL, wait); else if (!cpumask_empty(cpus)) - smp_call_function_many(cpus, ack_flush, NULL, 1); + smp_call_function_many(cpus, ack_flush, NULL, wait); else called = false; put_cpu(); @@ -504,7 +523,7 @@ static struct kvm_memslots *kvm_alloc_memslots(void) int i; struct kvm_memslots *slots; - slots = kvm_kvzalloc(sizeof(struct kvm_memslots)); + slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); if (!slots) return NULL; @@ -689,18 +708,6 @@ out_err_no_disable: return ERR_PTR(r); } -/* - * Avoid using vmalloc for a small buffer. - * Should not be used when the size is statically known. - */ -void *kvm_kvzalloc(unsigned long size) -{ - if (size > PAGE_SIZE) - return vzalloc(size); - else - return kzalloc(size, GFP_KERNEL); -} - static void kvm_destroy_devices(struct kvm *kvm) { struct kvm_device *dev, *tmp; @@ -727,8 +734,11 @@ static void kvm_destroy_vm(struct kvm *kvm) list_del(&kvm->vm_list); spin_unlock(&kvm_lock); kvm_free_irq_routing(kvm); - for (i = 0; i < KVM_NR_BUSES; i++) - kvm_io_bus_destroy(kvm->buses[i]); + for (i = 0; i < KVM_NR_BUSES; i++) { + if (kvm->buses[i]) + kvm_io_bus_destroy(kvm->buses[i]); + kvm->buses[i] = NULL; + } kvm_coalesced_mmio_free(kvm); #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); @@ -779,7 +789,7 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) { unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot); - memslot->dirty_bitmap = kvm_kvzalloc(dirty_bytes); + memslot->dirty_bitmap = kvzalloc(dirty_bytes, GFP_KERNEL); if (!memslot->dirty_bitmap) return -ENOMEM; @@ -1005,7 +1015,7 @@ int __kvm_set_memory_region(struct kvm *kvm, goto out_free; } - slots = kvm_kvzalloc(sizeof(struct kvm_memslots)); + slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); if (!slots) goto out_free; memcpy(slots, __kvm_memslots(kvm, as_id), sizeof(struct kvm_memslots)); @@ -1016,8 +1026,6 @@ int __kvm_set_memory_region(struct kvm *kvm, old_memslots = install_new_memslots(kvm, as_id, slots); - /* slot was deleted or moved, clear iommu mapping */ - kvm_iommu_unmap_pages(kvm, &old); /* From this point no new shadow pages pointing to a deleted, * or moved, memslot will be created. * @@ -1052,21 +1060,6 @@ int __kvm_set_memory_region(struct kvm *kvm, kvm_free_memslot(kvm, &old, &new); kvfree(old_memslots); - - /* - * IOMMU mapping: New slots need to be mapped. Old slots need to be - * un-mapped and re-mapped if their base changes. Since base change - * unmapping is handled above with slot deletion, mapping alone is - * needed here. Anything else the iommu might care about for existing - * slots (size changes, userspace addr changes and read-only flag - * changes) is disallowed above, so any other attribute changes getting - * here can be skipped. - */ - if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { - r = kvm_iommu_map_pages(kvm, &new); - return r; - } - return 0; out_slots: @@ -1970,18 +1963,18 @@ static int __kvm_gfn_to_hva_cache_init(struct kvm_memslots *slots, return 0; } -int kvm_vcpu_gfn_to_hva_cache_init(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc, +int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, gpa_t gpa, unsigned long len) { - struct kvm_memslots *slots = kvm_vcpu_memslots(vcpu); + struct kvm_memslots *slots = kvm_memslots(kvm); return __kvm_gfn_to_hva_cache_init(slots, ghc, gpa, len); } -EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_hva_cache_init); +EXPORT_SYMBOL_GPL(kvm_gfn_to_hva_cache_init); -int kvm_vcpu_write_guest_offset_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc, - void *data, int offset, unsigned long len) +int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, + void *data, int offset, unsigned long len) { - struct kvm_memslots *slots = kvm_vcpu_memslots(vcpu); + struct kvm_memslots *slots = kvm_memslots(kvm); int r; gpa_t gpa = ghc->gpa + offset; @@ -1991,7 +1984,7 @@ int kvm_vcpu_write_guest_offset_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_ __kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len); if (unlikely(!ghc->memslot)) - return kvm_vcpu_write_guest(vcpu, gpa, data, len); + return kvm_write_guest(kvm, gpa, data, len); if (kvm_is_error_hva(ghc->hva)) return -EFAULT; @@ -2003,19 +1996,19 @@ int kvm_vcpu_write_guest_offset_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_ return 0; } -EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_offset_cached); +EXPORT_SYMBOL_GPL(kvm_write_guest_offset_cached); -int kvm_vcpu_write_guest_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc, - void *data, unsigned long len) +int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, + void *data, unsigned long len) { - return kvm_vcpu_write_guest_offset_cached(vcpu, ghc, data, 0, len); + return kvm_write_guest_offset_cached(kvm, ghc, data, 0, len); } -EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_cached); +EXPORT_SYMBOL_GPL(kvm_write_guest_cached); -int kvm_vcpu_read_guest_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc, - void *data, unsigned long len) +int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, + void *data, unsigned long len) { - struct kvm_memslots *slots = kvm_vcpu_memslots(vcpu); + struct kvm_memslots *slots = kvm_memslots(kvm); int r; BUG_ON(len > ghc->len); @@ -2024,7 +2017,7 @@ int kvm_vcpu_read_guest_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *g __kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len); if (unlikely(!ghc->memslot)) - return kvm_vcpu_read_guest(vcpu, ghc->gpa, data, len); + return kvm_read_guest(kvm, ghc->gpa, data, len); if (kvm_is_error_hva(ghc->hva)) return -EFAULT; @@ -2035,7 +2028,7 @@ int kvm_vcpu_read_guest_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *g return 0; } -EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_cached); +EXPORT_SYMBOL_GPL(kvm_read_guest_cached); int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len) { @@ -2209,8 +2202,7 @@ out: } EXPORT_SYMBOL_GPL(kvm_vcpu_block); -#ifndef CONFIG_S390 -void kvm_vcpu_wake_up(struct kvm_vcpu *vcpu) +bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu) { struct swait_queue_head *wqp; @@ -2218,11 +2210,14 @@ void kvm_vcpu_wake_up(struct kvm_vcpu *vcpu) if (swait_active(wqp)) { swake_up(wqp); ++vcpu->stat.halt_wakeup; + return true; } + return false; } EXPORT_SYMBOL_GPL(kvm_vcpu_wake_up); +#ifndef CONFIG_S390 /* * Kick a sleeping VCPU, or a guest VCPU in guest mode, into host kernel mode. */ @@ -2231,7 +2226,9 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu) int me; int cpu = vcpu->cpu; - kvm_vcpu_wake_up(vcpu); + if (kvm_vcpu_wake_up(vcpu)) + return; + me = get_cpu(); if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) if (kvm_arch_vcpu_should_kick(vcpu)) @@ -2363,7 +2360,7 @@ static int kvm_vcpu_fault(struct vm_fault *vmf) else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET) page = virt_to_page(vcpu->arch.pio_data); #endif -#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET +#ifdef CONFIG_KVM_MMIO else if (vmf->pgoff == KVM_COALESCED_MMIO_PAGE_OFFSET) page = virt_to_page(vcpu->kvm->coalesced_mmio_ring); #endif @@ -2932,6 +2929,10 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) case KVM_CAP_IOEVENTFD_ANY_LENGTH: case KVM_CAP_CHECK_EXTENSION_VM: return 1; +#ifdef CONFIG_KVM_MMIO + case KVM_CAP_COALESCED_MMIO: + return KVM_COALESCED_MMIO_PAGE_OFFSET; +#endif #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING case KVM_CAP_IRQ_ROUTING: return KVM_MAX_IRQ_ROUTES; @@ -2981,7 +2982,7 @@ static long kvm_vm_ioctl(struct file *filp, r = kvm_vm_ioctl_get_dirty_log(kvm, &log); break; } -#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET +#ifdef CONFIG_KVM_MMIO case KVM_REGISTER_COALESCED_MMIO: { struct kvm_coalesced_mmio_zone zone; @@ -3064,6 +3065,8 @@ static long kvm_vm_ioctl(struct file *filp, if (copy_from_user(&routing, argp, sizeof(routing))) goto out; r = -EINVAL; + if (!kvm_arch_can_set_irq_routing(kvm)) + goto out; if (routing.nr > KVM_MAX_IRQ_ROUTES) goto out; if (routing.flags) @@ -3173,7 +3176,7 @@ static int kvm_dev_ioctl_create_vm(unsigned long type) kvm = kvm_create_vm(type); if (IS_ERR(kvm)) return PTR_ERR(kvm); -#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET +#ifdef CONFIG_KVM_MMIO r = kvm_coalesced_mmio_init(kvm); if (r < 0) { kvm_put_kvm(kvm); @@ -3226,7 +3229,7 @@ static long kvm_dev_ioctl(struct file *filp, #ifdef CONFIG_X86 r += PAGE_SIZE; /* pio data page */ #endif -#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET +#ifdef CONFIG_KVM_MMIO r += PAGE_SIZE; /* coalesced mmio ring page */ #endif break; @@ -3474,6 +3477,8 @@ int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, }; bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); + if (!bus) + return -ENOMEM; r = __kvm_io_bus_write(vcpu, bus, &range, val); return r < 0 ? r : 0; } @@ -3491,6 +3496,8 @@ int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, }; bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); + if (!bus) + return -ENOMEM; /* First try the device referenced by cookie. */ if ((cookie >= 0) && (cookie < bus->dev_count) && @@ -3541,6 +3548,8 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, }; bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); + if (!bus) + return -ENOMEM; r = __kvm_io_bus_read(vcpu, bus, &range, val); return r < 0 ? r : 0; } @@ -3553,6 +3562,9 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, struct kvm_io_bus *new_bus, *bus; bus = kvm->buses[bus_idx]; + if (!bus) + return -ENOMEM; + /* exclude ioeventfd which is limited by maximum fd */ if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1) return -ENOSPC; @@ -3572,37 +3584,41 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, } /* Caller must hold slots_lock. */ -int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, - struct kvm_io_device *dev) +void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, + struct kvm_io_device *dev) { - int i, r; + int i; struct kvm_io_bus *new_bus, *bus; bus = kvm->buses[bus_idx]; - r = -ENOENT; + if (!bus) + return; + for (i = 0; i < bus->dev_count; i++) if (bus->range[i].dev == dev) { - r = 0; break; } - if (r) - return r; + if (i == bus->dev_count) + return; new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count - 1) * sizeof(struct kvm_io_range)), GFP_KERNEL); - if (!new_bus) - return -ENOMEM; + if (!new_bus) { + pr_err("kvm: failed to shrink bus, removing it completely\n"); + goto broken; + } memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range)); new_bus->dev_count--; memcpy(new_bus->range + i, bus->range + i + 1, (new_bus->dev_count - i) * sizeof(struct kvm_io_range)); +broken: rcu_assign_pointer(kvm->buses[bus_idx], new_bus); synchronize_srcu_expedited(&kvm->srcu); kfree(bus); - return r; + return; } struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx, @@ -3615,6 +3631,8 @@ struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx, srcu_idx = srcu_read_lock(&kvm->srcu); bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); + if (!bus) + goto out_unlock; dev_idx = kvm_io_bus_get_first_dev(bus, addr, 1); if (dev_idx < 0) diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c index d32f239eb471..37d9118fd84b 100644 --- a/virt/kvm/vfio.c +++ b/virt/kvm/vfio.c @@ -20,6 +20,10 @@ #include <linux/vfio.h> #include "vfio.h" +#ifdef CONFIG_SPAPR_TCE_IOMMU +#include <asm/kvm_ppc.h> +#endif + struct kvm_vfio_group { struct list_head node; struct vfio_group *vfio_group; @@ -89,6 +93,47 @@ static bool kvm_vfio_group_is_coherent(struct vfio_group *vfio_group) return ret > 0; } +#ifdef CONFIG_SPAPR_TCE_IOMMU +static int kvm_vfio_external_user_iommu_id(struct vfio_group *vfio_group) +{ + int (*fn)(struct vfio_group *); + int ret = -EINVAL; + + fn = symbol_get(vfio_external_user_iommu_id); + if (!fn) + return ret; + + ret = fn(vfio_group); + + symbol_put(vfio_external_user_iommu_id); + + return ret; +} + +static struct iommu_group *kvm_vfio_group_get_iommu_group( + struct vfio_group *group) +{ + int group_id = kvm_vfio_external_user_iommu_id(group); + + if (group_id < 0) + return NULL; + + return iommu_group_get_by_id(group_id); +} + +static void kvm_spapr_tce_release_vfio_group(struct kvm *kvm, + struct vfio_group *vfio_group) +{ + struct iommu_group *grp = kvm_vfio_group_get_iommu_group(vfio_group); + + if (WARN_ON_ONCE(!grp)) + return; + + kvm_spapr_tce_release_iommu_group(kvm, grp); + iommu_group_put(grp); +} +#endif + /* * Groups can use the same or different IOMMU domains. If the same then * adding a new group may change the coherency of groups we've previously @@ -211,6 +256,9 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) mutex_unlock(&kv->lock); +#ifdef CONFIG_SPAPR_TCE_IOMMU + kvm_spapr_tce_release_vfio_group(dev->kvm, vfio_group); +#endif kvm_vfio_group_set_kvm(vfio_group, NULL); kvm_vfio_group_put_external_user(vfio_group); @@ -218,6 +266,57 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) kvm_vfio_update_coherency(dev); return ret; + +#ifdef CONFIG_SPAPR_TCE_IOMMU + case KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE: { + struct kvm_vfio_spapr_tce param; + struct kvm_vfio *kv = dev->private; + struct vfio_group *vfio_group; + struct kvm_vfio_group *kvg; + struct fd f; + struct iommu_group *grp; + + if (copy_from_user(¶m, (void __user *)arg, + sizeof(struct kvm_vfio_spapr_tce))) + return -EFAULT; + + f = fdget(param.groupfd); + if (!f.file) + return -EBADF; + + vfio_group = kvm_vfio_group_get_external_user(f.file); + fdput(f); + + if (IS_ERR(vfio_group)) + return PTR_ERR(vfio_group); + + grp = kvm_vfio_group_get_iommu_group(vfio_group); + if (WARN_ON_ONCE(!grp)) { + kvm_vfio_group_put_external_user(vfio_group); + return -EIO; + } + + ret = -ENOENT; + + mutex_lock(&kv->lock); + + list_for_each_entry(kvg, &kv->group_list, node) { + if (kvg->vfio_group != vfio_group) + continue; + + ret = kvm_spapr_tce_attach_iommu_group(dev->kvm, + param.tablefd, grp); + break; + } + + mutex_unlock(&kv->lock); + + iommu_group_put(grp); + kvm_vfio_group_put_external_user(vfio_group); + + return ret; + } +#endif /* CONFIG_SPAPR_TCE_IOMMU */ } return -ENXIO; @@ -242,6 +341,9 @@ static int kvm_vfio_has_attr(struct kvm_device *dev, switch (attr->attr) { case KVM_DEV_VFIO_GROUP_ADD: case KVM_DEV_VFIO_GROUP_DEL: +#ifdef CONFIG_SPAPR_TCE_IOMMU + case KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE: +#endif return 0; } @@ -257,6 +359,9 @@ static void kvm_vfio_destroy(struct kvm_device *dev) struct kvm_vfio_group *kvg, *tmp; list_for_each_entry_safe(kvg, tmp, &kv->group_list, node) { +#ifdef CONFIG_SPAPR_TCE_IOMMU + kvm_spapr_tce_release_vfio_group(dev->kvm, kvg->vfio_group); +#endif kvm_vfio_group_set_kvm(kvg->vfio_group, NULL); kvm_vfio_group_put_external_user(kvg->vfio_group); list_del(&kvg->node); |