summaryrefslogtreecommitdiffstats
path: root/virt
diff options
context:
space:
mode:
Diffstat (limited to 'virt')
-rw-r--r--virt/kvm/arm/arch_timer.c124
-rw-r--r--virt/kvm/arm/hyp/vgic-v2-sr.c78
-rw-r--r--virt/kvm/arm/hyp/vgic-v3-sr.c87
-rw-r--r--virt/kvm/arm/pmu.c39
-rw-r--r--virt/kvm/arm/vgic/vgic-init.c127
-rw-r--r--virt/kvm/arm/vgic/vgic-its.c109
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio-v2.c20
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio.c32
-rw-r--r--virt/kvm/arm/vgic/vgic-v2.c101
-rw-r--r--virt/kvm/arm/vgic/vgic-v3.c92
-rw-r--r--virt/kvm/arm/vgic/vgic.c64
-rw-r--r--virt/kvm/arm/vgic/vgic.h18
-rw-r--r--virt/kvm/eventfd.c7
-rw-r--r--virt/kvm/irqchip.c16
-rw-r--r--virt/kvm/kvm_main.c164
-rw-r--r--virt/kvm/vfio.c105
16 files changed, 731 insertions, 452 deletions
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 35d7100e0815..5976609ef27c 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -184,28 +184,47 @@ bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
return cval <= now;
}
+/*
+ * Reflect the timer output level into the kvm_run structure
+ */
+void kvm_timer_update_run(struct kvm_vcpu *vcpu)
+{
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+ struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
+ struct kvm_sync_regs *regs = &vcpu->run->s.regs;
+
+ /* Populate the device bitmap with the timer states */
+ regs->device_irq_level &= ~(KVM_ARM_DEV_EL1_VTIMER |
+ KVM_ARM_DEV_EL1_PTIMER);
+ if (vtimer->irq.level)
+ regs->device_irq_level |= KVM_ARM_DEV_EL1_VTIMER;
+ if (ptimer->irq.level)
+ regs->device_irq_level |= KVM_ARM_DEV_EL1_PTIMER;
+}
+
static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
struct arch_timer_context *timer_ctx)
{
int ret;
- BUG_ON(!vgic_initialized(vcpu->kvm));
-
timer_ctx->active_cleared_last = false;
timer_ctx->irq.level = new_level;
trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq,
timer_ctx->irq.level);
- ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, timer_ctx->irq.irq,
- timer_ctx->irq.level);
- WARN_ON(ret);
+ if (likely(irqchip_in_kernel(vcpu->kvm))) {
+ ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
+ timer_ctx->irq.irq,
+ timer_ctx->irq.level);
+ WARN_ON(ret);
+ }
}
/*
* Check if there was a change in the timer state (should we raise or lower
* the line level to the GIC).
*/
-static int kvm_timer_update_state(struct kvm_vcpu *vcpu)
+static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
@@ -217,16 +236,14 @@ static int kvm_timer_update_state(struct kvm_vcpu *vcpu)
* because the guest would never see the interrupt. Instead wait
* until we call this function from kvm_timer_flush_hwstate.
*/
- if (!vgic_initialized(vcpu->kvm) || !timer->enabled)
- return -ENODEV;
+ if (unlikely(!timer->enabled))
+ return;
if (kvm_timer_should_fire(vtimer) != vtimer->irq.level)
kvm_timer_update_irq(vcpu, !vtimer->irq.level, vtimer);
if (kvm_timer_should_fire(ptimer) != ptimer->irq.level)
kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer);
-
- return 0;
}
/* Schedule the background timer for the emulated timer. */
@@ -286,25 +303,12 @@ void kvm_timer_unschedule(struct kvm_vcpu *vcpu)
timer_disarm(timer);
}
-/**
- * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu
- * @vcpu: The vcpu pointer
- *
- * Check if the virtual timer has expired while we were running in the host,
- * and inject an interrupt if that was the case.
- */
-void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
+static void kvm_timer_flush_hwstate_vgic(struct kvm_vcpu *vcpu)
{
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
bool phys_active;
int ret;
- if (kvm_timer_update_state(vcpu))
- return;
-
- /* Set the background timer for the physical timer emulation. */
- kvm_timer_emulate(vcpu, vcpu_ptimer(vcpu));
-
/*
* If we enter the guest with the virtual input level to the VGIC
* asserted, then we have already told the VGIC what we need to, and
@@ -356,11 +360,72 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
vtimer->active_cleared_last = !phys_active;
}
+bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
+{
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+ struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
+ struct kvm_sync_regs *sregs = &vcpu->run->s.regs;
+ bool vlevel, plevel;
+
+ if (likely(irqchip_in_kernel(vcpu->kvm)))
+ return false;
+
+ vlevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_VTIMER;
+ plevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_PTIMER;
+
+ return vtimer->irq.level != vlevel ||
+ ptimer->irq.level != plevel;
+}
+
+static void kvm_timer_flush_hwstate_user(struct kvm_vcpu *vcpu)
+{
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+
+ /*
+ * To prevent continuously exiting from the guest, we mask the
+ * physical interrupt such that the guest can make forward progress.
+ * Once we detect the output level being deasserted, we unmask the
+ * interrupt again so that we exit from the guest when the timer
+ * fires.
+ */
+ if (vtimer->irq.level)
+ disable_percpu_irq(host_vtimer_irq);
+ else
+ enable_percpu_irq(host_vtimer_irq, 0);
+}
+
+/**
+ * kvm_timer_flush_hwstate - prepare timers before running the vcpu
+ * @vcpu: The vcpu pointer
+ *
+ * Check if the virtual timer has expired while we were running in the host,
+ * and inject an interrupt if that was the case, making sure the timer is
+ * masked or disabled on the host so that we keep executing. Also schedule a
+ * software timer for the physical timer if it is enabled.
+ */
+void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
+{
+ struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+
+ if (unlikely(!timer->enabled))
+ return;
+
+ kvm_timer_update_state(vcpu);
+
+ /* Set the background timer for the physical timer emulation. */
+ kvm_timer_emulate(vcpu, vcpu_ptimer(vcpu));
+
+ if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
+ kvm_timer_flush_hwstate_user(vcpu);
+ else
+ kvm_timer_flush_hwstate_vgic(vcpu);
+}
+
/**
* kvm_timer_sync_hwstate - sync timer state from cpu
* @vcpu: The vcpu pointer
*
- * Check if the virtual timer has expired while we were running in the guest,
+ * Check if any of the timers have expired while we were running in the guest,
* and inject an interrupt if that was the case.
*/
void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
@@ -560,6 +625,13 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
if (timer->enabled)
return 0;
+ /* Without a VGIC we do not map virtual IRQs to physical IRQs */
+ if (!irqchip_in_kernel(vcpu->kvm))
+ goto no_vgic;
+
+ if (!vgic_initialized(vcpu->kvm))
+ return -ENODEV;
+
/*
* Find the physical IRQ number corresponding to the host_vtimer_irq
*/
@@ -583,8 +655,8 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
if (ret)
return ret;
+no_vgic:
timer->enabled = 1;
-
return 0;
}
diff --git a/virt/kvm/arm/hyp/vgic-v2-sr.c b/virt/kvm/arm/hyp/vgic-v2-sr.c
index c8aeb7b91ec8..a3f18d362366 100644
--- a/virt/kvm/arm/hyp/vgic-v2-sr.c
+++ b/virt/kvm/arm/hyp/vgic-v2-sr.c
@@ -22,49 +22,6 @@
#include <asm/kvm_emulate.h>
#include <asm/kvm_hyp.h>
-static void __hyp_text save_maint_int_state(struct kvm_vcpu *vcpu,
- void __iomem *base)
-{
- struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
- int nr_lr = (kern_hyp_va(&kvm_vgic_global_state))->nr_lr;
- u32 eisr0, eisr1;
- int i;
- bool expect_mi;
-
- expect_mi = !!(cpu_if->vgic_hcr & GICH_HCR_UIE);
-
- for (i = 0; i < nr_lr; i++) {
- if (!(vcpu->arch.vgic_cpu.live_lrs & (1UL << i)))
- continue;
-
- expect_mi |= (!(cpu_if->vgic_lr[i] & GICH_LR_HW) &&
- (cpu_if->vgic_lr[i] & GICH_LR_EOI));
- }
-
- if (expect_mi) {
- cpu_if->vgic_misr = readl_relaxed(base + GICH_MISR);
-
- if (cpu_if->vgic_misr & GICH_MISR_EOI) {
- eisr0 = readl_relaxed(base + GICH_EISR0);
- if (unlikely(nr_lr > 32))
- eisr1 = readl_relaxed(base + GICH_EISR1);
- else
- eisr1 = 0;
- } else {
- eisr0 = eisr1 = 0;
- }
- } else {
- cpu_if->vgic_misr = 0;
- eisr0 = eisr1 = 0;
- }
-
-#ifdef CONFIG_CPU_BIG_ENDIAN
- cpu_if->vgic_eisr = ((u64)eisr0 << 32) | eisr1;
-#else
- cpu_if->vgic_eisr = ((u64)eisr1 << 32) | eisr0;
-#endif
-}
-
static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base)
{
struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
@@ -87,13 +44,10 @@ static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base)
static void __hyp_text save_lrs(struct kvm_vcpu *vcpu, void __iomem *base)
{
struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
- int nr_lr = (kern_hyp_va(&kvm_vgic_global_state))->nr_lr;
int i;
+ u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
- for (i = 0; i < nr_lr; i++) {
- if (!(vcpu->arch.vgic_cpu.live_lrs & (1UL << i)))
- continue;
-
+ for (i = 0; i < used_lrs; i++) {
if (cpu_if->vgic_elrsr & (1UL << i))
cpu_if->vgic_lr[i] &= ~GICH_LR_STATE;
else
@@ -110,26 +64,20 @@ void __hyp_text __vgic_v2_save_state(struct kvm_vcpu *vcpu)
struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
struct vgic_dist *vgic = &kvm->arch.vgic;
void __iomem *base = kern_hyp_va(vgic->vctrl_base);
+ u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
if (!base)
return;
- cpu_if->vgic_vmcr = readl_relaxed(base + GICH_VMCR);
-
- if (vcpu->arch.vgic_cpu.live_lrs) {
+ if (used_lrs) {
cpu_if->vgic_apr = readl_relaxed(base + GICH_APR);
- save_maint_int_state(vcpu, base);
save_elrsr(vcpu, base);
save_lrs(vcpu, base);
writel_relaxed(0, base + GICH_HCR);
-
- vcpu->arch.vgic_cpu.live_lrs = 0;
} else {
- cpu_if->vgic_eisr = 0;
cpu_if->vgic_elrsr = ~0UL;
- cpu_if->vgic_misr = 0;
cpu_if->vgic_apr = 0;
}
}
@@ -141,32 +89,20 @@ void __hyp_text __vgic_v2_restore_state(struct kvm_vcpu *vcpu)
struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
struct vgic_dist *vgic = &kvm->arch.vgic;
void __iomem *base = kern_hyp_va(vgic->vctrl_base);
- int nr_lr = (kern_hyp_va(&kvm_vgic_global_state))->nr_lr;
int i;
- u64 live_lrs = 0;
+ u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
if (!base)
return;
-
- for (i = 0; i < nr_lr; i++)
- if (cpu_if->vgic_lr[i] & GICH_LR_STATE)
- live_lrs |= 1UL << i;
-
- if (live_lrs) {
+ if (used_lrs) {
writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR);
writel_relaxed(cpu_if->vgic_apr, base + GICH_APR);
- for (i = 0; i < nr_lr; i++) {
- if (!(live_lrs & (1UL << i)))
- continue;
-
+ for (i = 0; i < used_lrs; i++) {
writel_relaxed(cpu_if->vgic_lr[i],
base + GICH_LR0 + (i * 4));
}
}
-
- writel_relaxed(cpu_if->vgic_vmcr, base + GICH_VMCR);
- vcpu->arch.vgic_cpu.live_lrs = live_lrs;
}
#ifdef CONFIG_ARM64
diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c
index 3947095cc0a1..bce6037cf01d 100644
--- a/virt/kvm/arm/hyp/vgic-v3-sr.c
+++ b/virt/kvm/arm/hyp/vgic-v3-sr.c
@@ -118,66 +118,32 @@ static void __hyp_text __gic_v3_set_lr(u64 val, int lr)
}
}
-static void __hyp_text save_maint_int_state(struct kvm_vcpu *vcpu, int nr_lr)
-{
- struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
- int i;
- bool expect_mi;
-
- expect_mi = !!(cpu_if->vgic_hcr & ICH_HCR_UIE);
-
- for (i = 0; i < nr_lr; i++) {
- if (!(vcpu->arch.vgic_cpu.live_lrs & (1UL << i)))
- continue;
-
- expect_mi |= (!(cpu_if->vgic_lr[i] & ICH_LR_HW) &&
- (cpu_if->vgic_lr[i] & ICH_LR_EOI));
- }
-
- if (expect_mi) {
- cpu_if->vgic_misr = read_gicreg(ICH_MISR_EL2);
-
- if (cpu_if->vgic_misr & ICH_MISR_EOI)
- cpu_if->vgic_eisr = read_gicreg(ICH_EISR_EL2);
- else
- cpu_if->vgic_eisr = 0;
- } else {
- cpu_if->vgic_misr = 0;
- cpu_if->vgic_eisr = 0;
- }
-}
-
void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
{
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
+ u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
u64 val;
/*
* Make sure stores to the GIC via the memory mapped interface
* are now visible to the system register interface.
*/
- if (!cpu_if->vgic_sre)
+ if (!cpu_if->vgic_sre) {
dsb(st);
+ cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2);
+ }
- cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2);
-
- if (vcpu->arch.vgic_cpu.live_lrs) {
+ if (used_lrs) {
int i;
- u32 max_lr_idx, nr_pri_bits;
+ u32 nr_pri_bits;
cpu_if->vgic_elrsr = read_gicreg(ICH_ELSR_EL2);
write_gicreg(0, ICH_HCR_EL2);
val = read_gicreg(ICH_VTR_EL2);
- max_lr_idx = vtr_to_max_lr_idx(val);
nr_pri_bits = vtr_to_nr_pri_bits(val);
- save_maint_int_state(vcpu, max_lr_idx + 1);
-
- for (i = 0; i <= max_lr_idx; i++) {
- if (!(vcpu->arch.vgic_cpu.live_lrs & (1UL << i)))
- continue;
-
+ for (i = 0; i < used_lrs; i++) {
if (cpu_if->vgic_elrsr & (1 << i))
cpu_if->vgic_lr[i] &= ~ICH_LR_STATE;
else
@@ -205,11 +171,7 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
default:
cpu_if->vgic_ap1r[0] = read_gicreg(ICH_AP1R0_EL2);
}
-
- vcpu->arch.vgic_cpu.live_lrs = 0;
} else {
- cpu_if->vgic_misr = 0;
- cpu_if->vgic_eisr = 0;
cpu_if->vgic_elrsr = 0xffff;
cpu_if->vgic_ap0r[0] = 0;
cpu_if->vgic_ap0r[1] = 0;
@@ -234,9 +196,9 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
{
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
+ u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
u64 val;
- u32 max_lr_idx, nr_pri_bits;
- u16 live_lrs = 0;
+ u32 nr_pri_bits;
int i;
/*
@@ -245,25 +207,19 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
* delivered as a FIQ to the guest, with potentially fatal
* consequences. So we must make sure that ICC_SRE_EL1 has
* been actually programmed with the value we want before
- * starting to mess with the rest of the GIC.
+ * starting to mess with the rest of the GIC, and VMCR_EL2 in
+ * particular.
*/
if (!cpu_if->vgic_sre) {
write_gicreg(0, ICC_SRE_EL1);
isb();
+ write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2);
}
val = read_gicreg(ICH_VTR_EL2);
- max_lr_idx = vtr_to_max_lr_idx(val);
nr_pri_bits = vtr_to_nr_pri_bits(val);
- for (i = 0; i <= max_lr_idx; i++) {
- if (cpu_if->vgic_lr[i] & ICH_LR_STATE)
- live_lrs |= (1 << i);
- }
-
- write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2);
-
- if (live_lrs) {
+ if (used_lrs) {
write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
switch (nr_pri_bits) {
@@ -286,12 +242,8 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2);
}
- for (i = 0; i <= max_lr_idx; i++) {
- if (!(live_lrs & (1 << i)))
- continue;
-
+ for (i = 0; i < used_lrs; i++)
__gic_v3_set_lr(cpu_if->vgic_lr[i], i);
- }
}
/*
@@ -303,7 +255,6 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
isb();
dsb(sy);
}
- vcpu->arch.vgic_cpu.live_lrs = live_lrs;
/*
* Prevent the guest from touching the GIC system registers if
@@ -326,3 +277,13 @@ u64 __hyp_text __vgic_v3_get_ich_vtr_el2(void)
{
return read_gicreg(ICH_VTR_EL2);
}
+
+u64 __hyp_text __vgic_v3_read_vmcr(void)
+{
+ return read_gicreg(ICH_VMCR_EL2);
+}
+
+void __hyp_text __vgic_v3_write_vmcr(u32 vmcr)
+{
+ write_gicreg(vmcr, ICH_VMCR_EL2);
+}
diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
index 69ccce308458..4b43e7f3b158 100644
--- a/virt/kvm/arm/pmu.c
+++ b/virt/kvm/arm/pmu.c
@@ -230,13 +230,44 @@ static void kvm_pmu_update_state(struct kvm_vcpu *vcpu)
return;
overflow = !!kvm_pmu_overflow_status(vcpu);
- if (pmu->irq_level != overflow) {
- pmu->irq_level = overflow;
- kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
- pmu->irq_num, overflow);
+ if (pmu->irq_level == overflow)
+ return;
+
+ pmu->irq_level = overflow;
+
+ if (likely(irqchip_in_kernel(vcpu->kvm))) {
+ int ret;
+ ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
+ pmu->irq_num, overflow);
+ WARN_ON(ret);
}
}
+bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu)
+{
+ struct kvm_pmu *pmu = &vcpu->arch.pmu;
+ struct kvm_sync_regs *sregs = &vcpu->run->s.regs;
+ bool run_level = sregs->device_irq_level & KVM_ARM_DEV_PMU;
+
+ if (likely(irqchip_in_kernel(vcpu->kvm)))
+ return false;
+
+ return pmu->irq_level != run_level;
+}
+
+/*
+ * Reflect the PMU overflow interrupt output level into the kvm_run structure
+ */
+void kvm_pmu_update_run(struct kvm_vcpu *vcpu)
+{
+ struct kvm_sync_regs *regs = &vcpu->run->s.regs;
+
+ /* Populate the timer bitmap for user space */
+ regs->device_irq_level &= ~KVM_ARM_DEV_PMU;
+ if (vcpu->arch.pmu.irq_level)
+ regs->device_irq_level |= KVM_ARM_DEV_PMU;
+}
+
/**
* kvm_pmu_flush_hwstate - flush pmu state to cpu
* @vcpu: The vcpu pointer
diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
index 276139a24e6f..25fd1b942c11 100644
--- a/virt/kvm/arm/vgic/vgic-init.c
+++ b/virt/kvm/arm/vgic/vgic-init.c
@@ -24,7 +24,12 @@
/*
* Initialization rules: there are multiple stages to the vgic
- * initialization, both for the distributor and the CPU interfaces.
+ * initialization, both for the distributor and the CPU interfaces. The basic
+ * idea is that even though the VGIC is not functional or not requested from
+ * user space, the critical path of the run loop can still call VGIC functions
+ * that just won't do anything, without them having to check additional
+ * initialization flags to ensure they don't look at uninitialized data
+ * structures.
*
* Distributor:
*
@@ -39,23 +44,67 @@
*
* CPU Interface:
*
- * - kvm_vgic_cpu_early_init(): initialization of static data that
+ * - kvm_vgic_vcpu_early_init(): initialization of static data that
* doesn't depend on any sizing information or emulation type. No
* allocation is allowed there.
*/
/* EARLY INIT */
-/*
- * Those 2 functions should not be needed anymore but they
- * still are called from arm.c
+/**
+ * kvm_vgic_early_init() - Initialize static VGIC VCPU data structures
+ * @kvm: The VM whose VGIC districutor should be initialized
+ *
+ * Only do initialization of static structures that don't require any
+ * allocation or sizing information from userspace. vgic_init() called
+ * kvm_vgic_dist_init() which takes care of the rest.
*/
void kvm_vgic_early_init(struct kvm *kvm)
{
+ struct vgic_dist *dist = &kvm->arch.vgic;
+
+ INIT_LIST_HEAD(&dist->lpi_list_head);
+ spin_lock_init(&dist->lpi_list_lock);
}
+/**
+ * kvm_vgic_vcpu_early_init() - Initialize static VGIC VCPU data structures
+ * @vcpu: The VCPU whose VGIC data structures whould be initialized
+ *
+ * Only do initialization, but do not actually enable the VGIC CPU interface
+ * yet.
+ */
void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu)
{
+ struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+ int i;
+
+ INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
+ spin_lock_init(&vgic_cpu->ap_list_lock);
+
+ /*
+ * Enable and configure all SGIs to be edge-triggered and
+ * configure all PPIs as level-triggered.
+ */
+ for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) {
+ struct vgic_irq *irq = &vgic_cpu->private_irqs[i];
+
+ INIT_LIST_HEAD(&irq->ap_list);
+ spin_lock_init(&irq->irq_lock);
+ irq->intid = i;
+ irq->vcpu = NULL;
+ irq->target_vcpu = vcpu;
+ irq->targets = 1U << vcpu->vcpu_id;
+ kref_init(&irq->refcount);
+ if (vgic_irq_is_sgi(i)) {
+ /* SGIs */
+ irq->enabled = 1;
+ irq->config = VGIC_CONFIG_EDGE;
+ } else {
+ /* PPIs */
+ irq->config = VGIC_CONFIG_LEVEL;
+ }
+ }
}
/* CREATION */
@@ -148,9 +197,6 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
struct kvm_vcpu *vcpu0 = kvm_get_vcpu(kvm, 0);
int i;
- INIT_LIST_HEAD(&dist->lpi_list_head);
- spin_lock_init(&dist->lpi_list_lock);
-
dist->spis = kcalloc(nr_spis, sizeof(struct vgic_irq), GFP_KERNEL);
if (!dist->spis)
return -ENOMEM;
@@ -181,41 +227,11 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
}
/**
- * kvm_vgic_vcpu_init: initialize the vcpu data structures and
- * enable the VCPU interface
- * @vcpu: the VCPU which's VGIC should be initialized
+ * kvm_vgic_vcpu_init() - Enable the VCPU interface
+ * @vcpu: the VCPU which's VGIC should be enabled
*/
static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
{
- struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
- int i;
-
- INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
- spin_lock_init(&vgic_cpu->ap_list_lock);
-
- /*
- * Enable and configure all SGIs to be edge-triggered and
- * configure all PPIs as level-triggered.
- */
- for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) {
- struct vgic_irq *irq = &vgic_cpu->private_irqs[i];
-
- INIT_LIST_HEAD(&irq->ap_list);
- spin_lock_init(&irq->irq_lock);
- irq->intid = i;
- irq->vcpu = NULL;
- irq->target_vcpu = vcpu;
- irq->targets = 1U << vcpu->vcpu_id;
- kref_init(&irq->refcount);
- if (vgic_irq_is_sgi(i)) {
- /* SGIs */
- irq->enabled = 1;
- irq->config = VGIC_CONFIG_EDGE;
- } else {
- /* PPIs */
- irq->config = VGIC_CONFIG_LEVEL;
- }
- }
if (kvm_vgic_global_state.type == VGIC_V2)
vgic_v2_enable(vcpu);
else
@@ -262,6 +278,18 @@ int vgic_init(struct kvm *kvm)
vgic_debug_init(kvm);
dist->initialized = true;
+
+ /*
+ * If we're initializing GICv2 on-demand when first running the VCPU
+ * then we need to load the VGIC state onto the CPU. We can detect
+ * this easily by checking if we are in between vcpu_load and vcpu_put
+ * when we just initialized the VGIC.
+ */
+ preempt_disable();
+ vcpu = kvm_arm_get_running_vcpu();
+ if (vcpu)
+ kvm_vgic_load(vcpu);
+ preempt_enable();
out:
return ret;
}
@@ -392,6 +420,25 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data)
}
/**
+ * kvm_vgic_init_cpu_hardware - initialize the GIC VE hardware
+ *
+ * For a specific CPU, initialize the GIC VE hardware.
+ */
+void kvm_vgic_init_cpu_hardware(void)
+{
+ BUG_ON(preemptible());
+
+ /*
+ * We want to make sure the list registers start out clear so that we
+ * only have the program the used registers.
+ */
+ if (kvm_vgic_global_state.type == VGIC_V2)
+ vgic_v2_init_lrs();
+ else
+ kvm_call_hyp(__vgic_v3_init_lrs);
+}
+
+/**
* kvm_vgic_hyp_init: populates the kvm_vgic_global_state variable
* according to the host GIC model. Accordingly calls either
* vgic_v2/v3_probe which registers the KVM_DEVICE that can be
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 571b64a01c50..8d1da1af4b09 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -360,29 +360,6 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu)
return ret;
}
-static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu,
- struct vgic_its *its,
- gpa_t addr, unsigned int len)
-{
- u32 reg = 0;
-
- mutex_lock(&its->cmd_lock);
- if (its->creadr == its->cwriter)
- reg |= GITS_CTLR_QUIESCENT;
- if (its->enabled)
- reg |= GITS_CTLR_ENABLE;
- mutex_unlock(&its->cmd_lock);
-
- return reg;
-}
-
-static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its,
- gpa_t addr, unsigned int len,
- unsigned long val)
-{
- its->enabled = !!(val & GITS_CTLR_ENABLE);
-}
-
static unsigned long vgic_mmio_read_its_typer(struct kvm *kvm,
struct vgic_its *its,
gpa_t addr, unsigned int len)
@@ -1161,33 +1138,16 @@ static void vgic_mmio_write_its_cbaser(struct kvm *kvm, struct vgic_its *its,
#define ITS_CMD_SIZE 32
#define ITS_CMD_OFFSET(reg) ((reg) & GENMASK(19, 5))
-/*
- * By writing to CWRITER the guest announces new commands to be processed.
- * To avoid any races in the first place, we take the its_cmd lock, which
- * protects our ring buffer variables, so that there is only one user
- * per ITS handling commands at a given time.
- */
-static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its,
- gpa_t addr, unsigned int len,
- unsigned long val)
+/* Must be called with the cmd_lock held. */
+static void vgic_its_process_commands(struct kvm *kvm, struct vgic_its *its)
{
gpa_t cbaser;
u64 cmd_buf[4];
- u32 reg;
- if (!its)
- return;
-
- mutex_lock(&its->cmd_lock);
-
- reg = update_64bit_reg(its->cwriter, addr & 7, len, val);
- reg = ITS_CMD_OFFSET(reg);
- if (reg >= ITS_CMD_BUFFER_SIZE(its->cbaser)) {
- mutex_unlock(&its->cmd_lock);
+ /* Commands are only processed when the ITS is enabled. */
+ if (!its->enabled)
return;
- }
- its->cwriter = reg;
cbaser = CBASER_ADDRESS(its->cbaser);
while (its->cwriter != its->creadr) {
@@ -1207,6 +1167,34 @@ static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its,
if (its->creadr == ITS_CMD_BUFFER_SIZE(its->cbaser))
its->creadr = 0;
}
+}
+
+/*
+ * By writing to CWRITER the guest announces new commands to be processed.
+ * To avoid any races in the first place, we take the its_cmd lock, which
+ * protects our ring buffer variables, so that there is only one user
+ * per ITS handling commands at a given time.
+ */
+static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ u64 reg;
+
+ if (!its)
+ return;
+
+ mutex_lock(&its->cmd_lock);
+
+ reg = update_64bit_reg(its->cwriter, addr & 7, len, val);
+ reg = ITS_CMD_OFFSET(reg);
+ if (reg >= ITS_CMD_BUFFER_SIZE(its->cbaser)) {
+ mutex_unlock(&its->cmd_lock);
+ return;
+ }
+ its->cwriter = reg;
+
+ vgic_its_process_commands(kvm, its);
mutex_unlock(&its->cmd_lock);
}
@@ -1287,6 +1275,39 @@ static void vgic_mmio_write_its_baser(struct kvm *kvm,
*regptr = reg;
}
+static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu,
+ struct vgic_its *its,
+ gpa_t addr, unsigned int len)
+{
+ u32 reg = 0;
+
+ mutex_lock(&its->cmd_lock);
+ if (its->creadr == its->cwriter)
+ reg |= GITS_CTLR_QUIESCENT;
+ if (its->enabled)
+ reg |= GITS_CTLR_ENABLE;
+ mutex_unlock(&its->cmd_lock);
+
+ return reg;
+}
+
+static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ mutex_lock(&its->cmd_lock);
+
+ its->enabled = !!(val & GITS_CTLR_ENABLE);
+
+ /*
+ * Try to process any pending commands. This function bails out early
+ * if the ITS is disabled or no commands have been queued.
+ */
+ vgic_its_process_commands(kvm, its);
+
+ mutex_unlock(&its->cmd_lock);
+}
+
#define REGISTER_ITS_DESC(off, rd, wr, length, acc) \
{ \
.reg_offset = off, \
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c
index a3ad7ff95c9b..0a4283ed9aa7 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v2.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c
@@ -229,7 +229,15 @@ static unsigned long vgic_mmio_read_vcpuif(struct kvm_vcpu *vcpu,
val = vmcr.ctlr;
break;
case GIC_CPU_PRIMASK:
- val = vmcr.pmr;
+ /*
+ * Our KVM_DEV_TYPE_ARM_VGIC_V2 device ABI exports the
+ * the PMR field as GICH_VMCR.VMPriMask rather than
+ * GICC_PMR.Priority, so we expose the upper five bits of
+ * priority mask to userspace using the lower bits in the
+ * unsigned long.
+ */
+ val = (vmcr.pmr & GICV_PMR_PRIORITY_MASK) >>
+ GICV_PMR_PRIORITY_SHIFT;
break;
case GIC_CPU_BINPOINT:
val = vmcr.bpr;
@@ -262,7 +270,15 @@ static void vgic_mmio_write_vcpuif(struct kvm_vcpu *vcpu,
vmcr.ctlr = val;
break;
case GIC_CPU_PRIMASK:
- vmcr.pmr = val;
+ /*
+ * Our KVM_DEV_TYPE_ARM_VGIC_V2 device ABI exports the
+ * the PMR field as GICH_VMCR.VMPriMask rather than
+ * GICC_PMR.Priority, so we expose the upper five bits of
+ * priority mask to userspace using the lower bits in the
+ * unsigned long.
+ */
+ vmcr.pmr = (val << GICV_PMR_PRIORITY_SHIFT) &
+ GICV_PMR_PRIORITY_MASK;
break;
case GIC_CPU_BINPOINT:
vmcr.bpr = val;
diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c
index 3654b4c835ef..2a5db1352722 100644
--- a/virt/kvm/arm/vgic/vgic-mmio.c
+++ b/virt/kvm/arm/vgic/vgic-mmio.c
@@ -180,21 +180,37 @@ unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu,
static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
bool new_active_state)
{
+ struct kvm_vcpu *requester_vcpu;
spin_lock(&irq->irq_lock);
+
+ /*
+ * The vcpu parameter here can mean multiple things depending on how
+ * this function is called; when handling a trap from the kernel it
+ * depends on the GIC version, and these functions are also called as
+ * part of save/restore from userspace.
+ *
+ * Therefore, we have to figure out the requester in a reliable way.
+ *
+ * When accessing VGIC state from user space, the requester_vcpu is
+ * NULL, which is fine, because we guarantee that no VCPUs are running
+ * when accessing VGIC state from user space so irq->vcpu->cpu is
+ * always -1.
+ */
+ requester_vcpu = kvm_arm_get_running_vcpu();
+
/*
* If this virtual IRQ was written into a list register, we
* have to make sure the CPU that runs the VCPU thread has
- * synced back LR state to the struct vgic_irq. We can only
- * know this for sure, when either this irq is not assigned to
- * anyone's AP list anymore, or the VCPU thread is not
- * running on any CPUs.
+ * synced back the LR state to the struct vgic_irq.
*
- * In the opposite case, we know the VCPU thread may be on its
- * way back from the guest and still has to sync back this
- * IRQ, so we release and re-acquire the spin_lock to let the
- * other thread sync back the IRQ.
+ * As long as the conditions below are true, we know the VCPU thread
+ * may be on its way back from the guest (we kicked the VCPU thread in
+ * vgic_change_active_prepare) and still has to sync back this IRQ,
+ * so we release and re-acquire the spin_lock to let the other thread
+ * sync back the IRQ.
*/
while (irq->vcpu && /* IRQ may have state in an LR somewhere */
+ irq->vcpu != requester_vcpu && /* Current thread is not the VCPU thread */
irq->vcpu->cpu != -1) /* VCPU thread is running */
cond_resched_lock(&irq->irq_lock);
diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c
index b834ecdf3225..a65757aab6d3 100644
--- a/virt/kvm/arm/vgic/vgic-v2.c
+++ b/virt/kvm/arm/vgic/vgic-v2.c
@@ -22,52 +22,19 @@
#include "vgic.h"
-/*
- * Call this function to convert a u64 value to an unsigned long * bitmask
- * in a way that works on both 32-bit and 64-bit LE and BE platforms.
- *
- * Warning: Calling this function may modify *val.
- */
-static unsigned long *u64_to_bitmask(u64 *val)
+static inline void vgic_v2_write_lr(int lr, u32 val)
{
-#if defined(CONFIG_CPU_BIG_ENDIAN) && BITS_PER_LONG == 32
- *val = (*val >> 32) | (*val << 32);
-#endif
- return (unsigned long *)val;
+ void __iomem *base = kvm_vgic_global_state.vctrl_base;
+
+ writel_relaxed(val, base + GICH_LR0 + (lr * 4));
}
-void vgic_v2_process_maintenance(struct kvm_vcpu *vcpu)
+void vgic_v2_init_lrs(void)
{
- struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2;
+ int i;
- if (cpuif->vgic_misr & GICH_MISR_EOI) {
- u64 eisr = cpuif->vgic_eisr;
- unsigned long *eisr_bmap = u64_to_bitmask(&eisr);
- int lr;
-
- for_each_set_bit(lr, eisr_bmap, kvm_vgic_global_state.nr_lr) {
- u32 intid = cpuif->vgic_lr[lr] & GICH_LR_VIRTUALID;
-
- WARN_ON(cpuif->vgic_lr[lr] & GICH_LR_STATE);
-
- /* Only SPIs require notification */
- if (vgic_valid_spi(vcpu->kvm, intid))
- kvm_notify_acked_irq(vcpu->kvm, 0,
- intid - VGIC_NR_PRIVATE_IRQS);
- }
- }
-
- /* check and disable underflow maintenance IRQ */
- cpuif->vgic_hcr &= ~GICH_HCR_UIE;
-
- /*
- * In the next iterations of the vcpu loop, if we sync the
- * vgic state after flushing it, but before entering the guest
- * (this happens for pending signals and vmid rollovers), then
- * make sure we don't pick up any old maintenance interrupts
- * here.
- */
- cpuif->vgic_eisr = 0;
+ for (i = 0; i < kvm_vgic_global_state.nr_lr; i++)
+ vgic_v2_write_lr(i, 0);
}
void vgic_v2_set_underflow(struct kvm_vcpu *vcpu)
@@ -77,6 +44,12 @@ void vgic_v2_set_underflow(struct kvm_vcpu *vcpu)
cpuif->vgic_hcr |= GICH_HCR_UIE;
}
+static bool lr_signals_eoi_mi(u32 lr_val)
+{
+ return !(lr_val & GICH_LR_STATE) && (lr_val & GICH_LR_EOI) &&
+ !(lr_val & GICH_LR_HW);
+}
+
/*
* transfer the content of the LRs back into the corresponding ap_list:
* - active bit is transferred as is
@@ -86,14 +59,22 @@ void vgic_v2_set_underflow(struct kvm_vcpu *vcpu)
*/
void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
{
- struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2;
+ struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+ struct vgic_v2_cpu_if *cpuif = &vgic_cpu->vgic_v2;
int lr;
- for (lr = 0; lr < vcpu->arch.vgic_cpu.used_lrs; lr++) {
+ cpuif->vgic_hcr &= ~GICH_HCR_UIE;
+
+ for (lr = 0; lr < vgic_cpu->used_lrs; lr++) {
u32 val = cpuif->vgic_lr[lr];
u32 intid = val & GICH_LR_VIRTUALID;
struct vgic_irq *irq;
+ /* Notify fds when the guest EOI'ed a level-triggered SPI */
+ if (lr_signals_eoi_mi(val) && vgic_valid_spi(vcpu->kvm, intid))
+ kvm_notify_acked_irq(vcpu->kvm, 0,
+ intid - VGIC_NR_PRIVATE_IRQS);
+
irq = vgic_get_irq(vcpu->kvm, vcpu, intid);
spin_lock(&irq->irq_lock);
@@ -126,6 +107,8 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
spin_unlock(&irq->irq_lock);
vgic_put_irq(vcpu->kvm, irq);
}
+
+ vgic_cpu->used_lrs = 0;
}
/*
@@ -184,6 +167,7 @@ void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr)
void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
{
+ struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
u32 vmcr;
vmcr = (vmcrp->ctlr << GICH_VMCR_CTRL_SHIFT) & GICH_VMCR_CTRL_MASK;
@@ -191,15 +175,18 @@ void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
GICH_VMCR_ALIAS_BINPOINT_MASK;
vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) &
GICH_VMCR_BINPOINT_MASK;
- vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) &
- GICH_VMCR_PRIMASK_MASK;
+ vmcr |= ((vmcrp->pmr >> GICV_PMR_PRIORITY_SHIFT) <<
+ GICH_VMCR_PRIMASK_SHIFT) & GICH_VMCR_PRIMASK_MASK;
- vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr;
+ cpu_if->vgic_vmcr = vmcr;
}
void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
{
- u32 vmcr = vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr;
+ struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
+ u32 vmcr;
+
+ vmcr = cpu_if->vgic_vmcr;
vmcrp->ctlr = (vmcr & GICH_VMCR_CTRL_MASK) >>
GICH_VMCR_CTRL_SHIFT;
@@ -207,8 +194,8 @@ void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
GICH_VMCR_ALIAS_BINPOINT_SHIFT;
vmcrp->bpr = (vmcr & GICH_VMCR_BINPOINT_MASK) >>
GICH_VMCR_BINPOINT_SHIFT;
- vmcrp->pmr = (vmcr & GICH_VMCR_PRIMASK_MASK) >>
- GICH_VMCR_PRIMASK_SHIFT;
+ vmcrp->pmr = ((vmcr & GICH_VMCR_PRIMASK_MASK) >>
+ GICH_VMCR_PRIMASK_SHIFT) << GICV_PMR_PRIORITY_SHIFT;
}
void vgic_v2_enable(struct kvm_vcpu *vcpu)
@@ -375,3 +362,19 @@ out:
return ret;
}
+
+void vgic_v2_load(struct kvm_vcpu *vcpu)
+{
+ struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
+ struct vgic_dist *vgic = &vcpu->kvm->arch.vgic;
+
+ writel_relaxed(cpu_if->vgic_vmcr, vgic->vctrl_base + GICH_VMCR);
+}
+
+void vgic_v2_put(struct kvm_vcpu *vcpu)
+{
+ struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
+ struct vgic_dist *vgic = &vcpu->kvm->arch.vgic;
+
+ cpu_if->vgic_vmcr = readl_relaxed(vgic->vctrl_base + GICH_VMCR);
+}
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index edc6ee2dc852..df1503650300 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -21,59 +21,29 @@
#include "vgic.h"
-void vgic_v3_process_maintenance(struct kvm_vcpu *vcpu)
+void vgic_v3_set_underflow(struct kvm_vcpu *vcpu)
{
struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3;
- u32 model = vcpu->kvm->arch.vgic.vgic_model;
-
- if (cpuif->vgic_misr & ICH_MISR_EOI) {
- unsigned long eisr_bmap = cpuif->vgic_eisr;
- int lr;
-
- for_each_set_bit(lr, &eisr_bmap, kvm_vgic_global_state.nr_lr) {
- u32 intid;
- u64 val = cpuif->vgic_lr[lr];
-
- if (model == KVM_DEV_TYPE_ARM_VGIC_V3)
- intid = val & ICH_LR_VIRTUAL_ID_MASK;
- else
- intid = val & GICH_LR_VIRTUALID;
-
- WARN_ON(cpuif->vgic_lr[lr] & ICH_LR_STATE);
-
- /* Only SPIs require notification */
- if (vgic_valid_spi(vcpu->kvm, intid))
- kvm_notify_acked_irq(vcpu->kvm, 0,
- intid - VGIC_NR_PRIVATE_IRQS);
- }
-
- /*
- * In the next iterations of the vcpu loop, if we sync
- * the vgic state after flushing it, but before
- * entering the guest (this happens for pending
- * signals and vmid rollovers), then make sure we
- * don't pick up any old maintenance interrupts here.
- */
- cpuif->vgic_eisr = 0;
- }
- cpuif->vgic_hcr &= ~ICH_HCR_UIE;
+ cpuif->vgic_hcr |= ICH_HCR_UIE;
}
-void vgic_v3_set_underflow(struct kvm_vcpu *vcpu)
+static bool lr_signals_eoi_mi(u64 lr_val)
{
- struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3;
-
- cpuif->vgic_hcr |= ICH_HCR_UIE;
+ return !(lr_val & ICH_LR_STATE) && (lr_val & ICH_LR_EOI) &&
+ !(lr_val & ICH_LR_HW);
}
void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
{
- struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3;
+ struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+ struct vgic_v3_cpu_if *cpuif = &vgic_cpu->vgic_v3;
u32 model = vcpu->kvm->arch.vgic.vgic_model;
int lr;
- for (lr = 0; lr < vcpu->arch.vgic_cpu.used_lrs; lr++) {
+ cpuif->vgic_hcr &= ~ICH_HCR_UIE;
+
+ for (lr = 0; lr < vgic_cpu->used_lrs; lr++) {
u64 val = cpuif->vgic_lr[lr];
u32 intid;
struct vgic_irq *irq;
@@ -82,6 +52,12 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
intid = val & ICH_LR_VIRTUAL_ID_MASK;
else
intid = val & GICH_LR_VIRTUALID;
+
+ /* Notify fds when the guest EOI'ed a level-triggered IRQ */
+ if (lr_signals_eoi_mi(val) && vgic_valid_spi(vcpu->kvm, intid))
+ kvm_notify_acked_irq(vcpu->kvm, 0,
+ intid - VGIC_NR_PRIVATE_IRQS);
+
irq = vgic_get_irq(vcpu->kvm, vcpu, intid);
if (!irq) /* An LPI could have been unmapped. */
continue;
@@ -117,6 +93,8 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
spin_unlock(&irq->irq_lock);
vgic_put_irq(vcpu->kvm, irq);
}
+
+ vgic_cpu->used_lrs = 0;
}
/* Requires the irq to be locked already */
@@ -173,6 +151,7 @@ void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr)
void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
{
+ struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
u32 vmcr;
/*
@@ -188,12 +167,15 @@ void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
vmcr |= (vmcrp->grpen0 << ICH_VMCR_ENG0_SHIFT) & ICH_VMCR_ENG0_MASK;
vmcr |= (vmcrp->grpen1 << ICH_VMCR_ENG1_SHIFT) & ICH_VMCR_ENG1_MASK;
- vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = vmcr;
+ cpu_if->vgic_vmcr = vmcr;
}
void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
{
- u32 vmcr = vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr;
+ struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
+ u32 vmcr;
+
+ vmcr = cpu_if->vgic_vmcr;
/*
* Ignore the FIQen bit, because GIC emulation always implies
@@ -229,10 +211,13 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu)
/*
* If we are emulating a GICv3, we do it in an non-GICv2-compatible
* way, so we force SRE to 1 to demonstrate this to the guest.
+ * Also, we don't support any form of IRQ/FIQ bypass.
* This goes with the spec allowing the value to be RAO/WI.
*/
if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
- vgic_v3->vgic_sre = ICC_SRE_EL1_SRE;
+ vgic_v3->vgic_sre = (ICC_SRE_EL1_DIB |
+ ICC_SRE_EL1_DFB |
+ ICC_SRE_EL1_SRE);
vcpu->arch.vgic_cpu.pendbaser = INITIAL_PENDBASER_VALUE;
} else {
vgic_v3->vgic_sre = 0;
@@ -383,3 +368,24 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
return 0;
}
+
+void vgic_v3_load(struct kvm_vcpu *vcpu)
+{
+ struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
+
+ /*
+ * If dealing with a GICv2 emulation on GICv3, VMCR_EL2.VFIQen
+ * is dependent on ICC_SRE_EL1.SRE, and we have to perform the
+ * VMCR_EL2 save/restore in the world switch.
+ */
+ if (likely(cpu_if->vgic_sre))
+ kvm_call_hyp(__vgic_v3_write_vmcr, cpu_if->vgic_vmcr);
+}
+
+void vgic_v3_put(struct kvm_vcpu *vcpu)
+{
+ struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
+
+ if (likely(cpu_if->vgic_sre))
+ cpu_if->vgic_vmcr = kvm_call_hyp(__vgic_v3_read_vmcr);
+}
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
index 654dfd40e449..4346bc7d08dc 100644
--- a/virt/kvm/arm/vgic/vgic.c
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -29,7 +29,9 @@
#define DEBUG_SPINLOCK_BUG_ON(p)
#endif
-struct vgic_global __section(.hyp.text) kvm_vgic_global_state = {.gicv3_cpuif = STATIC_KEY_FALSE_INIT,};
+struct vgic_global kvm_vgic_global_state __ro_after_init = {
+ .gicv3_cpuif = STATIC_KEY_FALSE_INIT,
+};
/*
* Locking order is always:
@@ -527,14 +529,6 @@ retry:
spin_unlock(&vgic_cpu->ap_list_lock);
}
-static inline void vgic_process_maintenance_interrupt(struct kvm_vcpu *vcpu)
-{
- if (kvm_vgic_global_state.type == VGIC_V2)
- vgic_v2_process_maintenance(vcpu);
- else
- vgic_v3_process_maintenance(vcpu);
-}
-
static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu)
{
if (kvm_vgic_global_state.type == VGIC_V2)
@@ -601,10 +595,8 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock));
- if (compute_ap_list_depth(vcpu) > kvm_vgic_global_state.nr_lr) {
- vgic_set_underflow(vcpu);
+ if (compute_ap_list_depth(vcpu) > kvm_vgic_global_state.nr_lr)
vgic_sort_ap_list(vcpu);
- }
list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
spin_lock(&irq->irq_lock);
@@ -623,8 +615,12 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
next:
spin_unlock(&irq->irq_lock);
- if (count == kvm_vgic_global_state.nr_lr)
+ if (count == kvm_vgic_global_state.nr_lr) {
+ if (!list_is_last(&irq->ap_list,
+ &vgic_cpu->ap_list_head))
+ vgic_set_underflow(vcpu);
break;
+ }
}
vcpu->arch.vgic_cpu.used_lrs = count;
@@ -637,18 +633,30 @@ next:
/* Sync back the hardware VGIC state into our emulation after a guest's run. */
void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
{
- if (unlikely(!vgic_initialized(vcpu->kvm)))
+ struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+
+ /* An empty ap_list_head implies used_lrs == 0 */
+ if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head))
return;
- vgic_process_maintenance_interrupt(vcpu);
- vgic_fold_lr_state(vcpu);
+ if (vgic_cpu->used_lrs)
+ vgic_fold_lr_state(vcpu);
vgic_prune_ap_list(vcpu);
}
/* Flush our emulation state into the GIC hardware before entering the guest. */
void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
{
- if (unlikely(!vgic_initialized(vcpu->kvm)))
+ /*
+ * If there are no virtual interrupts active or pending for this
+ * VCPU, then there is no work to do and we can bail out without
+ * taking any lock. There is a potential race with someone injecting
+ * interrupts to the VCPU, but it is a benign race as the VCPU will
+ * either observe the new interrupt before or after doing this check,
+ * and introducing additional synchronization mechanism doesn't change
+ * this.
+ */
+ if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head))
return;
spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock);
@@ -656,6 +664,28 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock);
}
+void kvm_vgic_load(struct kvm_vcpu *vcpu)
+{
+ if (unlikely(!vgic_initialized(vcpu->kvm)))
+ return;
+
+ if (kvm_vgic_global_state.type == VGIC_V2)
+ vgic_v2_load(vcpu);
+ else
+ vgic_v3_load(vcpu);
+}
+
+void kvm_vgic_put(struct kvm_vcpu *vcpu)
+{
+ if (unlikely(!vgic_initialized(vcpu->kvm)))
+ return;
+
+ if (kvm_vgic_global_state.type == VGIC_V2)
+ vgic_v2_put(vcpu);
+ else
+ vgic_v3_put(vcpu);
+}
+
int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index db28f7cadab2..799fd651b260 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -81,11 +81,18 @@ static inline bool irq_is_pending(struct vgic_irq *irq)
return irq->pending_latch || irq->line_level;
}
+/*
+ * This struct provides an intermediate representation of the fields contained
+ * in the GICH_VMCR and ICH_VMCR registers, such that code exporting the GIC
+ * state to userspace can generate either GICv2 or GICv3 CPU interface
+ * registers regardless of the hardware backed GIC used.
+ */
struct vgic_vmcr {
u32 ctlr;
u32 abpr;
u32 bpr;
- u32 pmr;
+ u32 pmr; /* Priority mask field in the GICC_PMR and
+ * ICC_PMR_EL1 priority field format */
/* Below member variable are valid only for GICv3 */
u32 grpen0;
u32 grpen1;
@@ -112,7 +119,6 @@ void vgic_kick_vcpus(struct kvm *kvm);
int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr,
phys_addr_t addr, phys_addr_t alignment);
-void vgic_v2_process_maintenance(struct kvm_vcpu *vcpu);
void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu);
void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr);
@@ -130,6 +136,10 @@ int vgic_v2_map_resources(struct kvm *kvm);
int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address,
enum vgic_type);
+void vgic_v2_init_lrs(void);
+void vgic_v2_load(struct kvm_vcpu *vcpu);
+void vgic_v2_put(struct kvm_vcpu *vcpu);
+
static inline void vgic_get_irq_kref(struct vgic_irq *irq)
{
if (irq->intid < VGIC_MIN_LPI)
@@ -138,7 +148,6 @@ static inline void vgic_get_irq_kref(struct vgic_irq *irq)
kref_get(&irq->refcount);
}
-void vgic_v3_process_maintenance(struct kvm_vcpu *vcpu);
void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu);
void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr);
@@ -150,6 +159,9 @@ int vgic_v3_probe(const struct gic_kvm_info *info);
int vgic_v3_map_resources(struct kvm *kvm);
int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t dist_base_address);
+void vgic_v3_load(struct kvm_vcpu *vcpu);
+void vgic_v3_put(struct kvm_vcpu *vcpu);
+
int vgic_register_its_iodevs(struct kvm *kvm);
bool vgic_has_its(struct kvm *kvm);
int kvm_vgic_register_its_device(void);
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index a29786dd9522..a8d540398bbd 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -490,7 +490,7 @@ void kvm_register_irq_ack_notifier(struct kvm *kvm,
mutex_lock(&kvm->irq_lock);
hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
mutex_unlock(&kvm->irq_lock);
- kvm_vcpu_request_scan_ioapic(kvm);
+ kvm_arch_post_irq_ack_notifier_list_update(kvm);
}
void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
@@ -500,7 +500,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
hlist_del_init_rcu(&kian->link);
mutex_unlock(&kvm->irq_lock);
synchronize_srcu(&kvm->irq_srcu);
- kvm_vcpu_request_scan_ioapic(kvm);
+ kvm_arch_post_irq_ack_notifier_list_update(kvm);
}
#endif
@@ -870,7 +870,8 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
continue;
kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
- kvm->buses[bus_idx]->ioeventfd_count--;
+ if (kvm->buses[bus_idx])
+ kvm->buses[bus_idx]->ioeventfd_count--;
ioeventfd_release(p);
ret = 0;
break;
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 3bcc9990adf7..31e40c9e81df 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -142,8 +142,8 @@ static int setup_routing_entry(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue)
{
- int r = -EINVAL;
struct kvm_kernel_irq_routing_entry *ei;
+ int r;
/*
* Do not allow GSI to be mapped to the same irqchip more than once.
@@ -153,26 +153,30 @@ static int setup_routing_entry(struct kvm *kvm,
if (ei->type != KVM_IRQ_ROUTING_IRQCHIP ||
ue->type != KVM_IRQ_ROUTING_IRQCHIP ||
ue->u.irqchip.irqchip == ei->irqchip.irqchip)
- return r;
+ return -EINVAL;
e->gsi = ue->gsi;
e->type = ue->type;
r = kvm_set_routing_entry(kvm, e, ue);
if (r)
- goto out;
+ return r;
if (e->type == KVM_IRQ_ROUTING_IRQCHIP)
rt->chip[e->irqchip.irqchip][e->irqchip.pin] = e->gsi;
hlist_add_head(&e->link, &rt->map[e->gsi]);
- r = 0;
-out:
- return r;
+
+ return 0;
}
void __attribute__((weak)) kvm_arch_irq_routing_update(struct kvm *kvm)
{
}
+bool __weak kvm_arch_can_set_irq_routing(struct kvm *kvm)
+{
+ return true;
+}
+
int kvm_set_irq_routing(struct kvm *kvm,
const struct kvm_irq_routing_entry *ue,
unsigned nr,
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a17d78759727..b3d151ee2a67 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -165,6 +165,24 @@ void vcpu_put(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(vcpu_put);
+/* TODO: merge with kvm_arch_vcpu_should_kick */
+static bool kvm_request_needs_ipi(struct kvm_vcpu *vcpu, unsigned req)
+{
+ int mode = kvm_vcpu_exiting_guest_mode(vcpu);
+
+ /*
+ * We need to wait for the VCPU to reenable interrupts and get out of
+ * READING_SHADOW_PAGE_TABLES mode.
+ */
+ if (req & KVM_REQUEST_WAIT)
+ return mode != OUTSIDE_GUEST_MODE;
+
+ /*
+ * Need to kick a running VCPU, but otherwise there is nothing to do.
+ */
+ return mode == IN_GUEST_MODE;
+}
+
static void ack_flush(void *_completed)
{
}
@@ -174,6 +192,7 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
int i, cpu, me;
cpumask_var_t cpus;
bool called = true;
+ bool wait = req & KVM_REQUEST_WAIT;
struct kvm_vcpu *vcpu;
zalloc_cpumask_var(&cpus, GFP_ATOMIC);
@@ -183,17 +202,17 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
kvm_make_request(req, vcpu);
cpu = vcpu->cpu;
- /* Set ->requests bit before we read ->mode. */
- smp_mb__after_atomic();
+ if (!(req & KVM_REQUEST_NO_WAKEUP) && kvm_vcpu_wake_up(vcpu))
+ continue;
if (cpus != NULL && cpu != -1 && cpu != me &&
- kvm_vcpu_exiting_guest_mode(vcpu) != OUTSIDE_GUEST_MODE)
+ kvm_request_needs_ipi(vcpu, req))
cpumask_set_cpu(cpu, cpus);
}
if (unlikely(cpus == NULL))
- smp_call_function_many(cpu_online_mask, ack_flush, NULL, 1);
+ smp_call_function_many(cpu_online_mask, ack_flush, NULL, wait);
else if (!cpumask_empty(cpus))
- smp_call_function_many(cpus, ack_flush, NULL, 1);
+ smp_call_function_many(cpus, ack_flush, NULL, wait);
else
called = false;
put_cpu();
@@ -504,7 +523,7 @@ static struct kvm_memslots *kvm_alloc_memslots(void)
int i;
struct kvm_memslots *slots;
- slots = kvm_kvzalloc(sizeof(struct kvm_memslots));
+ slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
if (!slots)
return NULL;
@@ -689,18 +708,6 @@ out_err_no_disable:
return ERR_PTR(r);
}
-/*
- * Avoid using vmalloc for a small buffer.
- * Should not be used when the size is statically known.
- */
-void *kvm_kvzalloc(unsigned long size)
-{
- if (size > PAGE_SIZE)
- return vzalloc(size);
- else
- return kzalloc(size, GFP_KERNEL);
-}
-
static void kvm_destroy_devices(struct kvm *kvm)
{
struct kvm_device *dev, *tmp;
@@ -727,8 +734,11 @@ static void kvm_destroy_vm(struct kvm *kvm)
list_del(&kvm->vm_list);
spin_unlock(&kvm_lock);
kvm_free_irq_routing(kvm);
- for (i = 0; i < KVM_NR_BUSES; i++)
- kvm_io_bus_destroy(kvm->buses[i]);
+ for (i = 0; i < KVM_NR_BUSES; i++) {
+ if (kvm->buses[i])
+ kvm_io_bus_destroy(kvm->buses[i]);
+ kvm->buses[i] = NULL;
+ }
kvm_coalesced_mmio_free(kvm);
#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
@@ -779,7 +789,7 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
{
unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot);
- memslot->dirty_bitmap = kvm_kvzalloc(dirty_bytes);
+ memslot->dirty_bitmap = kvzalloc(dirty_bytes, GFP_KERNEL);
if (!memslot->dirty_bitmap)
return -ENOMEM;
@@ -1005,7 +1015,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
goto out_free;
}
- slots = kvm_kvzalloc(sizeof(struct kvm_memslots));
+ slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
if (!slots)
goto out_free;
memcpy(slots, __kvm_memslots(kvm, as_id), sizeof(struct kvm_memslots));
@@ -1016,8 +1026,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
old_memslots = install_new_memslots(kvm, as_id, slots);
- /* slot was deleted or moved, clear iommu mapping */
- kvm_iommu_unmap_pages(kvm, &old);
/* From this point no new shadow pages pointing to a deleted,
* or moved, memslot will be created.
*
@@ -1052,21 +1060,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
kvm_free_memslot(kvm, &old, &new);
kvfree(old_memslots);
-
- /*
- * IOMMU mapping: New slots need to be mapped. Old slots need to be
- * un-mapped and re-mapped if their base changes. Since base change
- * unmapping is handled above with slot deletion, mapping alone is
- * needed here. Anything else the iommu might care about for existing
- * slots (size changes, userspace addr changes and read-only flag
- * changes) is disallowed above, so any other attribute changes getting
- * here can be skipped.
- */
- if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
- r = kvm_iommu_map_pages(kvm, &new);
- return r;
- }
-
return 0;
out_slots:
@@ -1970,18 +1963,18 @@ static int __kvm_gfn_to_hva_cache_init(struct kvm_memslots *slots,
return 0;
}
-int kvm_vcpu_gfn_to_hva_cache_init(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc,
+int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
gpa_t gpa, unsigned long len)
{
- struct kvm_memslots *slots = kvm_vcpu_memslots(vcpu);
+ struct kvm_memslots *slots = kvm_memslots(kvm);
return __kvm_gfn_to_hva_cache_init(slots, ghc, gpa, len);
}
-EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_hva_cache_init);
+EXPORT_SYMBOL_GPL(kvm_gfn_to_hva_cache_init);
-int kvm_vcpu_write_guest_offset_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc,
- void *data, int offset, unsigned long len)
+int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
+ void *data, int offset, unsigned long len)
{
- struct kvm_memslots *slots = kvm_vcpu_memslots(vcpu);
+ struct kvm_memslots *slots = kvm_memslots(kvm);
int r;
gpa_t gpa = ghc->gpa + offset;
@@ -1991,7 +1984,7 @@ int kvm_vcpu_write_guest_offset_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_
__kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len);
if (unlikely(!ghc->memslot))
- return kvm_vcpu_write_guest(vcpu, gpa, data, len);
+ return kvm_write_guest(kvm, gpa, data, len);
if (kvm_is_error_hva(ghc->hva))
return -EFAULT;
@@ -2003,19 +1996,19 @@ int kvm_vcpu_write_guest_offset_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_offset_cached);
+EXPORT_SYMBOL_GPL(kvm_write_guest_offset_cached);
-int kvm_vcpu_write_guest_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc,
- void *data, unsigned long len)
+int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
+ void *data, unsigned long len)
{
- return kvm_vcpu_write_guest_offset_cached(vcpu, ghc, data, 0, len);
+ return kvm_write_guest_offset_cached(kvm, ghc, data, 0, len);
}
-EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_cached);
+EXPORT_SYMBOL_GPL(kvm_write_guest_cached);
-int kvm_vcpu_read_guest_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc,
- void *data, unsigned long len)
+int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
+ void *data, unsigned long len)
{
- struct kvm_memslots *slots = kvm_vcpu_memslots(vcpu);
+ struct kvm_memslots *slots = kvm_memslots(kvm);
int r;
BUG_ON(len > ghc->len);
@@ -2024,7 +2017,7 @@ int kvm_vcpu_read_guest_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *g
__kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len);
if (unlikely(!ghc->memslot))
- return kvm_vcpu_read_guest(vcpu, ghc->gpa, data, len);
+ return kvm_read_guest(kvm, ghc->gpa, data, len);
if (kvm_is_error_hva(ghc->hva))
return -EFAULT;
@@ -2035,7 +2028,7 @@ int kvm_vcpu_read_guest_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *g
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_cached);
+EXPORT_SYMBOL_GPL(kvm_read_guest_cached);
int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len)
{
@@ -2209,8 +2202,7 @@ out:
}
EXPORT_SYMBOL_GPL(kvm_vcpu_block);
-#ifndef CONFIG_S390
-void kvm_vcpu_wake_up(struct kvm_vcpu *vcpu)
+bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu)
{
struct swait_queue_head *wqp;
@@ -2218,11 +2210,14 @@ void kvm_vcpu_wake_up(struct kvm_vcpu *vcpu)
if (swait_active(wqp)) {
swake_up(wqp);
++vcpu->stat.halt_wakeup;
+ return true;
}
+ return false;
}
EXPORT_SYMBOL_GPL(kvm_vcpu_wake_up);
+#ifndef CONFIG_S390
/*
* Kick a sleeping VCPU, or a guest VCPU in guest mode, into host kernel mode.
*/
@@ -2231,7 +2226,9 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
int me;
int cpu = vcpu->cpu;
- kvm_vcpu_wake_up(vcpu);
+ if (kvm_vcpu_wake_up(vcpu))
+ return;
+
me = get_cpu();
if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
if (kvm_arch_vcpu_should_kick(vcpu))
@@ -2363,7 +2360,7 @@ static int kvm_vcpu_fault(struct vm_fault *vmf)
else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET)
page = virt_to_page(vcpu->arch.pio_data);
#endif
-#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
+#ifdef CONFIG_KVM_MMIO
else if (vmf->pgoff == KVM_COALESCED_MMIO_PAGE_OFFSET)
page = virt_to_page(vcpu->kvm->coalesced_mmio_ring);
#endif
@@ -2932,6 +2929,10 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
case KVM_CAP_IOEVENTFD_ANY_LENGTH:
case KVM_CAP_CHECK_EXTENSION_VM:
return 1;
+#ifdef CONFIG_KVM_MMIO
+ case KVM_CAP_COALESCED_MMIO:
+ return KVM_COALESCED_MMIO_PAGE_OFFSET;
+#endif
#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
case KVM_CAP_IRQ_ROUTING:
return KVM_MAX_IRQ_ROUTES;
@@ -2981,7 +2982,7 @@ static long kvm_vm_ioctl(struct file *filp,
r = kvm_vm_ioctl_get_dirty_log(kvm, &log);
break;
}
-#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
+#ifdef CONFIG_KVM_MMIO
case KVM_REGISTER_COALESCED_MMIO: {
struct kvm_coalesced_mmio_zone zone;
@@ -3064,6 +3065,8 @@ static long kvm_vm_ioctl(struct file *filp,
if (copy_from_user(&routing, argp, sizeof(routing)))
goto out;
r = -EINVAL;
+ if (!kvm_arch_can_set_irq_routing(kvm))
+ goto out;
if (routing.nr > KVM_MAX_IRQ_ROUTES)
goto out;
if (routing.flags)
@@ -3173,7 +3176,7 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
kvm = kvm_create_vm(type);
if (IS_ERR(kvm))
return PTR_ERR(kvm);
-#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
+#ifdef CONFIG_KVM_MMIO
r = kvm_coalesced_mmio_init(kvm);
if (r < 0) {
kvm_put_kvm(kvm);
@@ -3226,7 +3229,7 @@ static long kvm_dev_ioctl(struct file *filp,
#ifdef CONFIG_X86
r += PAGE_SIZE; /* pio data page */
#endif
-#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
+#ifdef CONFIG_KVM_MMIO
r += PAGE_SIZE; /* coalesced mmio ring page */
#endif
break;
@@ -3474,6 +3477,8 @@ int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
};
bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
+ if (!bus)
+ return -ENOMEM;
r = __kvm_io_bus_write(vcpu, bus, &range, val);
return r < 0 ? r : 0;
}
@@ -3491,6 +3496,8 @@ int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx,
};
bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
+ if (!bus)
+ return -ENOMEM;
/* First try the device referenced by cookie. */
if ((cookie >= 0) && (cookie < bus->dev_count) &&
@@ -3541,6 +3548,8 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
};
bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
+ if (!bus)
+ return -ENOMEM;
r = __kvm_io_bus_read(vcpu, bus, &range, val);
return r < 0 ? r : 0;
}
@@ -3553,6 +3562,9 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
struct kvm_io_bus *new_bus, *bus;
bus = kvm->buses[bus_idx];
+ if (!bus)
+ return -ENOMEM;
+
/* exclude ioeventfd which is limited by maximum fd */
if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1)
return -ENOSPC;
@@ -3572,37 +3584,41 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
}
/* Caller must hold slots_lock. */
-int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
- struct kvm_io_device *dev)
+void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
+ struct kvm_io_device *dev)
{
- int i, r;
+ int i;
struct kvm_io_bus *new_bus, *bus;
bus = kvm->buses[bus_idx];
- r = -ENOENT;
+ if (!bus)
+ return;
+
for (i = 0; i < bus->dev_count; i++)
if (bus->range[i].dev == dev) {
- r = 0;
break;
}
- if (r)
- return r;
+ if (i == bus->dev_count)
+ return;
new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count - 1) *
sizeof(struct kvm_io_range)), GFP_KERNEL);
- if (!new_bus)
- return -ENOMEM;
+ if (!new_bus) {
+ pr_err("kvm: failed to shrink bus, removing it completely\n");
+ goto broken;
+ }
memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range));
new_bus->dev_count--;
memcpy(new_bus->range + i, bus->range + i + 1,
(new_bus->dev_count - i) * sizeof(struct kvm_io_range));
+broken:
rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
synchronize_srcu_expedited(&kvm->srcu);
kfree(bus);
- return r;
+ return;
}
struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx,
@@ -3615,6 +3631,8 @@ struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx,
srcu_idx = srcu_read_lock(&kvm->srcu);
bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
+ if (!bus)
+ goto out_unlock;
dev_idx = kvm_io_bus_get_first_dev(bus, addr, 1);
if (dev_idx < 0)
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index d32f239eb471..37d9118fd84b 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -20,6 +20,10 @@
#include <linux/vfio.h>
#include "vfio.h"
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+#include <asm/kvm_ppc.h>
+#endif
+
struct kvm_vfio_group {
struct list_head node;
struct vfio_group *vfio_group;
@@ -89,6 +93,47 @@ static bool kvm_vfio_group_is_coherent(struct vfio_group *vfio_group)
return ret > 0;
}
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+static int kvm_vfio_external_user_iommu_id(struct vfio_group *vfio_group)
+{
+ int (*fn)(struct vfio_group *);
+ int ret = -EINVAL;
+
+ fn = symbol_get(vfio_external_user_iommu_id);
+ if (!fn)
+ return ret;
+
+ ret = fn(vfio_group);
+
+ symbol_put(vfio_external_user_iommu_id);
+
+ return ret;
+}
+
+static struct iommu_group *kvm_vfio_group_get_iommu_group(
+ struct vfio_group *group)
+{
+ int group_id = kvm_vfio_external_user_iommu_id(group);
+
+ if (group_id < 0)
+ return NULL;
+
+ return iommu_group_get_by_id(group_id);
+}
+
+static void kvm_spapr_tce_release_vfio_group(struct kvm *kvm,
+ struct vfio_group *vfio_group)
+{
+ struct iommu_group *grp = kvm_vfio_group_get_iommu_group(vfio_group);
+
+ if (WARN_ON_ONCE(!grp))
+ return;
+
+ kvm_spapr_tce_release_iommu_group(kvm, grp);
+ iommu_group_put(grp);
+}
+#endif
+
/*
* Groups can use the same or different IOMMU domains. If the same then
* adding a new group may change the coherency of groups we've previously
@@ -211,6 +256,9 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
mutex_unlock(&kv->lock);
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+ kvm_spapr_tce_release_vfio_group(dev->kvm, vfio_group);
+#endif
kvm_vfio_group_set_kvm(vfio_group, NULL);
kvm_vfio_group_put_external_user(vfio_group);
@@ -218,6 +266,57 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
kvm_vfio_update_coherency(dev);
return ret;
+
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+ case KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE: {
+ struct kvm_vfio_spapr_tce param;
+ struct kvm_vfio *kv = dev->private;
+ struct vfio_group *vfio_group;
+ struct kvm_vfio_group *kvg;
+ struct fd f;
+ struct iommu_group *grp;
+
+ if (copy_from_user(&param, (void __user *)arg,
+ sizeof(struct kvm_vfio_spapr_tce)))
+ return -EFAULT;
+
+ f = fdget(param.groupfd);
+ if (!f.file)
+ return -EBADF;
+
+ vfio_group = kvm_vfio_group_get_external_user(f.file);
+ fdput(f);
+
+ if (IS_ERR(vfio_group))
+ return PTR_ERR(vfio_group);
+
+ grp = kvm_vfio_group_get_iommu_group(vfio_group);
+ if (WARN_ON_ONCE(!grp)) {
+ kvm_vfio_group_put_external_user(vfio_group);
+ return -EIO;
+ }
+
+ ret = -ENOENT;
+
+ mutex_lock(&kv->lock);
+
+ list_for_each_entry(kvg, &kv->group_list, node) {
+ if (kvg->vfio_group != vfio_group)
+ continue;
+
+ ret = kvm_spapr_tce_attach_iommu_group(dev->kvm,
+ param.tablefd, grp);
+ break;
+ }
+
+ mutex_unlock(&kv->lock);
+
+ iommu_group_put(grp);
+ kvm_vfio_group_put_external_user(vfio_group);
+
+ return ret;
+ }
+#endif /* CONFIG_SPAPR_TCE_IOMMU */
}
return -ENXIO;
@@ -242,6 +341,9 @@ static int kvm_vfio_has_attr(struct kvm_device *dev,
switch (attr->attr) {
case KVM_DEV_VFIO_GROUP_ADD:
case KVM_DEV_VFIO_GROUP_DEL:
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+ case KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE:
+#endif
return 0;
}
@@ -257,6 +359,9 @@ static void kvm_vfio_destroy(struct kvm_device *dev)
struct kvm_vfio_group *kvg, *tmp;
list_for_each_entry_safe(kvg, tmp, &kv->group_list, node) {
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+ kvm_spapr_tce_release_vfio_group(dev->kvm, kvg->vfio_group);
+#endif
kvm_vfio_group_set_kvm(kvg->vfio_group, NULL);
kvm_vfio_group_put_external_user(kvg->vfio_group);
list_del(&kvg->node);