summaryrefslogtreecommitdiffstats
path: root/virt/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'virt/kvm')
-rw-r--r--virt/kvm/arm/arch_timer.c45
-rw-r--r--virt/kvm/arm/vgic-v2-emul.c71
-rw-r--r--virt/kvm/arm/vgic-v2.c8
-rw-r--r--virt/kvm/arm/vgic-v3-emul.c246
-rw-r--r--virt/kvm/arm/vgic-v3.c8
-rw-r--r--virt/kvm/arm/vgic.c497
-rw-r--r--virt/kvm/arm/vgic.h37
-rw-r--r--virt/kvm/coalesced_mmio.c7
-rw-r--r--virt/kvm/eventfd.c9
-rw-r--r--virt/kvm/iodev.h70
-rw-r--r--virt/kvm/irqchip.c2
-rw-r--r--virt/kvm/kvm_main.c163
12 files changed, 703 insertions, 460 deletions
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 6e54f3542126..98c95f2fcba4 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -85,13 +85,22 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
return IRQ_HANDLED;
}
+/*
+ * Work function for handling the backup timer that we schedule when a vcpu is
+ * no longer running, but had a timer programmed to fire in the future.
+ */
static void kvm_timer_inject_irq_work(struct work_struct *work)
{
struct kvm_vcpu *vcpu;
vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired);
vcpu->arch.timer_cpu.armed = false;
- kvm_timer_inject_irq(vcpu);
+
+ /*
+ * If the vcpu is blocked we want to wake it up so that it will see
+ * the timer has expired when entering the guest.
+ */
+ kvm_vcpu_kick(vcpu);
}
static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt)
@@ -102,6 +111,21 @@ static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt)
return HRTIMER_NORESTART;
}
+bool kvm_timer_should_fire(struct kvm_vcpu *vcpu)
+{
+ struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+ cycle_t cval, now;
+
+ if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) ||
+ !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE))
+ return false;
+
+ cval = timer->cntv_cval;
+ now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
+
+ return cval <= now;
+}
+
/**
* kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu
* @vcpu: The vcpu pointer
@@ -119,6 +143,13 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
* populate the CPU timer again.
*/
timer_disarm(timer);
+
+ /*
+ * If the timer expired while we were not scheduled, now is the time
+ * to inject it.
+ */
+ if (kvm_timer_should_fire(vcpu))
+ kvm_timer_inject_irq(vcpu);
}
/**
@@ -134,16 +165,9 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
cycle_t cval, now;
u64 ns;
- if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) ||
- !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE))
- return;
-
- cval = timer->cntv_cval;
- now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
-
BUG_ON(timer_is_armed(timer));
- if (cval <= now) {
+ if (kvm_timer_should_fire(vcpu)) {
/*
* Timer has already expired while we were not
* looking. Inject the interrupt and carry on.
@@ -152,6 +176,9 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
return;
}
+ cval = timer->cntv_cval;
+ now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
+
ns = cyclecounter_cyc2ns(timecounter->cc, cval - now, timecounter->mask,
&timecounter->frac);
timer_arm(timer, ns);
diff --git a/virt/kvm/arm/vgic-v2-emul.c b/virt/kvm/arm/vgic-v2-emul.c
index 19c6210f02cf..13907970d11c 100644
--- a/virt/kvm/arm/vgic-v2-emul.c
+++ b/virt/kvm/arm/vgic-v2-emul.c
@@ -107,6 +107,22 @@ static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu,
vcpu->vcpu_id);
}
+static bool handle_mmio_set_active_reg(struct kvm_vcpu *vcpu,
+ struct kvm_exit_mmio *mmio,
+ phys_addr_t offset)
+{
+ return vgic_handle_set_active_reg(vcpu->kvm, mmio, offset,
+ vcpu->vcpu_id);
+}
+
+static bool handle_mmio_clear_active_reg(struct kvm_vcpu *vcpu,
+ struct kvm_exit_mmio *mmio,
+ phys_addr_t offset)
+{
+ return vgic_handle_clear_active_reg(vcpu->kvm, mmio, offset,
+ vcpu->vcpu_id);
+}
+
static bool handle_mmio_priority_reg(struct kvm_vcpu *vcpu,
struct kvm_exit_mmio *mmio,
phys_addr_t offset)
@@ -303,7 +319,7 @@ static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu,
return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, false);
}
-static const struct kvm_mmio_range vgic_dist_ranges[] = {
+static const struct vgic_io_range vgic_dist_ranges[] = {
{
.base = GIC_DIST_CTRL,
.len = 12,
@@ -344,13 +360,13 @@ static const struct kvm_mmio_range vgic_dist_ranges[] = {
.base = GIC_DIST_ACTIVE_SET,
.len = VGIC_MAX_IRQS / 8,
.bits_per_irq = 1,
- .handle_mmio = handle_mmio_raz_wi,
+ .handle_mmio = handle_mmio_set_active_reg,
},
{
.base = GIC_DIST_ACTIVE_CLEAR,
.len = VGIC_MAX_IRQS / 8,
.bits_per_irq = 1,
- .handle_mmio = handle_mmio_raz_wi,
+ .handle_mmio = handle_mmio_clear_active_reg,
},
{
.base = GIC_DIST_PRI,
@@ -388,24 +404,6 @@ static const struct kvm_mmio_range vgic_dist_ranges[] = {
{}
};
-static bool vgic_v2_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
- struct kvm_exit_mmio *mmio)
-{
- unsigned long base = vcpu->kvm->arch.vgic.vgic_dist_base;
-
- if (!is_in_range(mmio->phys_addr, mmio->len, base,
- KVM_VGIC_V2_DIST_SIZE))
- return false;
-
- /* GICv2 does not support accesses wider than 32 bits */
- if (mmio->len > 4) {
- kvm_inject_dabt(vcpu, mmio->phys_addr);
- return true;
- }
-
- return vgic_handle_mmio_range(vcpu, run, mmio, vgic_dist_ranges, base);
-}
-
static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
{
struct kvm *kvm = vcpu->kvm;
@@ -490,6 +488,7 @@ static bool vgic_v2_queue_sgi(struct kvm_vcpu *vcpu, int irq)
static int vgic_v2_map_resources(struct kvm *kvm,
const struct vgic_params *params)
{
+ struct vgic_dist *dist = &kvm->arch.vgic;
int ret = 0;
if (!irqchip_in_kernel(kvm))
@@ -500,13 +499,17 @@ static int vgic_v2_map_resources(struct kvm *kvm,
if (vgic_ready(kvm))
goto out;
- if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) ||
- IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base)) {
+ if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) ||
+ IS_VGIC_ADDR_UNDEF(dist->vgic_cpu_base)) {
kvm_err("Need to set vgic cpu and dist addresses first\n");
ret = -ENXIO;
goto out;
}
+ vgic_register_kvm_io_dev(kvm, dist->vgic_dist_base,
+ KVM_VGIC_V2_DIST_SIZE,
+ vgic_dist_ranges, -1, &dist->dist_iodev);
+
/*
* Initialize the vgic if this hasn't already been done on demand by
* accessing the vgic state from userspace.
@@ -514,18 +517,23 @@ static int vgic_v2_map_resources(struct kvm *kvm,
ret = vgic_init(kvm);
if (ret) {
kvm_err("Unable to allocate maps\n");
- goto out;
+ goto out_unregister;
}
- ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base,
+ ret = kvm_phys_addr_ioremap(kvm, dist->vgic_cpu_base,
params->vcpu_base, KVM_VGIC_V2_CPU_SIZE,
true);
if (ret) {
kvm_err("Unable to remap VGIC CPU to VCPU\n");
- goto out;
+ goto out_unregister;
}
- kvm->arch.vgic.ready = true;
+ dist->ready = true;
+ goto out;
+
+out_unregister:
+ kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &dist->dist_iodev.dev);
+
out:
if (ret)
kvm_vgic_destroy(kvm);
@@ -554,7 +562,6 @@ void vgic_v2_init_emulation(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
- dist->vm_ops.handle_mmio = vgic_v2_handle_mmio;
dist->vm_ops.queue_sgi = vgic_v2_queue_sgi;
dist->vm_ops.add_sgi_source = vgic_v2_add_sgi_source;
dist->vm_ops.init_model = vgic_v2_init_model;
@@ -631,7 +638,7 @@ static bool handle_cpu_mmio_ident(struct kvm_vcpu *vcpu,
* CPU Interface Register accesses - these are not accessed by the VM, but by
* user space for saving and restoring VGIC state.
*/
-static const struct kvm_mmio_range vgic_cpu_ranges[] = {
+static const struct vgic_io_range vgic_cpu_ranges[] = {
{
.base = GIC_CPU_CTRL,
.len = 12,
@@ -658,12 +665,13 @@ static int vgic_attr_regs_access(struct kvm_device *dev,
struct kvm_device_attr *attr,
u32 *reg, bool is_write)
{
- const struct kvm_mmio_range *r = NULL, *ranges;
+ const struct vgic_io_range *r = NULL, *ranges;
phys_addr_t offset;
int ret, cpuid, c;
struct kvm_vcpu *vcpu, *tmp_vcpu;
struct vgic_dist *vgic;
struct kvm_exit_mmio mmio;
+ u32 data;
offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >>
@@ -685,6 +693,7 @@ static int vgic_attr_regs_access(struct kvm_device *dev,
mmio.len = 4;
mmio.is_write = is_write;
+ mmio.data = &data;
if (is_write)
mmio_data_write(&mmio, ~0, *reg);
switch (attr->group) {
@@ -699,7 +708,7 @@ static int vgic_attr_regs_access(struct kvm_device *dev,
default:
BUG();
}
- r = vgic_find_range(ranges, &mmio, offset);
+ r = vgic_find_range(ranges, 4, offset);
if (unlikely(!r || !r->handle_mmio)) {
ret = -ENXIO;
diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c
index a0a7b5d1a070..f9b9c7c51372 100644
--- a/virt/kvm/arm/vgic-v2.c
+++ b/virt/kvm/arm/vgic-v2.c
@@ -72,6 +72,8 @@ static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
{
if (!(lr_desc.state & LR_STATE_MASK))
vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr |= (1ULL << lr);
+ else
+ vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr &= ~(1ULL << lr);
}
static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu)
@@ -84,6 +86,11 @@ static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu)
return vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr;
}
+static void vgic_v2_clear_eisr(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr = 0;
+}
+
static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu)
{
u32 misr = vcpu->arch.vgic_cpu.vgic_v2.vgic_misr;
@@ -148,6 +155,7 @@ static const struct vgic_ops vgic_v2_ops = {
.sync_lr_elrsr = vgic_v2_sync_lr_elrsr,
.get_elrsr = vgic_v2_get_elrsr,
.get_eisr = vgic_v2_get_eisr,
+ .clear_eisr = vgic_v2_clear_eisr,
.get_interrupt_status = vgic_v2_get_interrupt_status,
.enable_underflow = vgic_v2_enable_underflow,
.disable_underflow = vgic_v2_disable_underflow,
diff --git a/virt/kvm/arm/vgic-v3-emul.c b/virt/kvm/arm/vgic-v3-emul.c
index b3f154631515..e9c3a7a83833 100644
--- a/virt/kvm/arm/vgic-v3-emul.c
+++ b/virt/kvm/arm/vgic-v3-emul.c
@@ -340,7 +340,7 @@ static bool handle_mmio_idregs(struct kvm_vcpu *vcpu,
return false;
}
-static const struct kvm_mmio_range vgic_v3_dist_ranges[] = {
+static const struct vgic_io_range vgic_v3_dist_ranges[] = {
{
.base = GICD_CTLR,
.len = 0x04,
@@ -502,6 +502,43 @@ static const struct kvm_mmio_range vgic_v3_dist_ranges[] = {
{},
};
+static bool handle_mmio_ctlr_redist(struct kvm_vcpu *vcpu,
+ struct kvm_exit_mmio *mmio,
+ phys_addr_t offset)
+{
+ /* since we don't support LPIs, this register is zero for now */
+ vgic_reg_access(mmio, NULL, offset,
+ ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
+ return false;
+}
+
+static bool handle_mmio_typer_redist(struct kvm_vcpu *vcpu,
+ struct kvm_exit_mmio *mmio,
+ phys_addr_t offset)
+{
+ u32 reg;
+ u64 mpidr;
+ struct kvm_vcpu *redist_vcpu = mmio->private;
+ int target_vcpu_id = redist_vcpu->vcpu_id;
+
+ /* the upper 32 bits contain the affinity value */
+ if ((offset & ~3) == 4) {
+ mpidr = kvm_vcpu_get_mpidr_aff(redist_vcpu);
+ reg = compress_mpidr(mpidr);
+
+ vgic_reg_access(mmio, &reg, offset,
+ ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
+ return false;
+ }
+
+ reg = redist_vcpu->vcpu_id << 8;
+ if (target_vcpu_id == atomic_read(&vcpu->kvm->online_vcpus) - 1)
+ reg |= GICR_TYPER_LAST;
+ vgic_reg_access(mmio, &reg, offset,
+ ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
+ return false;
+}
+
static bool handle_mmio_set_enable_reg_redist(struct kvm_vcpu *vcpu,
struct kvm_exit_mmio *mmio,
phys_addr_t offset)
@@ -570,186 +607,107 @@ static bool handle_mmio_cfg_reg_redist(struct kvm_vcpu *vcpu,
return vgic_handle_cfg_reg(reg, mmio, offset);
}
-static const struct kvm_mmio_range vgic_redist_sgi_ranges[] = {
+#define SGI_base(x) ((x) + SZ_64K)
+
+static const struct vgic_io_range vgic_redist_ranges[] = {
+ {
+ .base = GICR_CTLR,
+ .len = 0x04,
+ .bits_per_irq = 0,
+ .handle_mmio = handle_mmio_ctlr_redist,
+ },
+ {
+ .base = GICR_TYPER,
+ .len = 0x08,
+ .bits_per_irq = 0,
+ .handle_mmio = handle_mmio_typer_redist,
+ },
+ {
+ .base = GICR_IIDR,
+ .len = 0x04,
+ .bits_per_irq = 0,
+ .handle_mmio = handle_mmio_iidr,
+ },
+ {
+ .base = GICR_WAKER,
+ .len = 0x04,
+ .bits_per_irq = 0,
+ .handle_mmio = handle_mmio_raz_wi,
+ },
{
- .base = GICR_IGROUPR0,
+ .base = GICR_IDREGS,
+ .len = 0x30,
+ .bits_per_irq = 0,
+ .handle_mmio = handle_mmio_idregs,
+ },
+ {
+ .base = SGI_base(GICR_IGROUPR0),
.len = 0x04,
.bits_per_irq = 1,
.handle_mmio = handle_mmio_rao_wi,
},
{
- .base = GICR_ISENABLER0,
+ .base = SGI_base(GICR_ISENABLER0),
.len = 0x04,
.bits_per_irq = 1,
.handle_mmio = handle_mmio_set_enable_reg_redist,
},
{
- .base = GICR_ICENABLER0,
+ .base = SGI_base(GICR_ICENABLER0),
.len = 0x04,
.bits_per_irq = 1,
.handle_mmio = handle_mmio_clear_enable_reg_redist,
},
{
- .base = GICR_ISPENDR0,
+ .base = SGI_base(GICR_ISPENDR0),
.len = 0x04,
.bits_per_irq = 1,
.handle_mmio = handle_mmio_set_pending_reg_redist,
},
{
- .base = GICR_ICPENDR0,
+ .base = SGI_base(GICR_ICPENDR0),
.len = 0x04,
.bits_per_irq = 1,
.handle_mmio = handle_mmio_clear_pending_reg_redist,
},
{
- .base = GICR_ISACTIVER0,
+ .base = SGI_base(GICR_ISACTIVER0),
.len = 0x04,
.bits_per_irq = 1,
.handle_mmio = handle_mmio_raz_wi,
},
{
- .base = GICR_ICACTIVER0,
+ .base = SGI_base(GICR_ICACTIVER0),
.len = 0x04,
.bits_per_irq = 1,
.handle_mmio = handle_mmio_raz_wi,
},
{
- .base = GICR_IPRIORITYR0,
+ .base = SGI_base(GICR_IPRIORITYR0),
.len = 0x20,
.bits_per_irq = 8,
.handle_mmio = handle_mmio_priority_reg_redist,
},
{
- .base = GICR_ICFGR0,
+ .base = SGI_base(GICR_ICFGR0),
.len = 0x08,
.bits_per_irq = 2,
.handle_mmio = handle_mmio_cfg_reg_redist,
},
{
- .base = GICR_IGRPMODR0,
+ .base = SGI_base(GICR_IGRPMODR0),
.len = 0x04,
.bits_per_irq = 1,
.handle_mmio = handle_mmio_raz_wi,
},
{
- .base = GICR_NSACR,
+ .base = SGI_base(GICR_NSACR),
.len = 0x04,
.handle_mmio = handle_mmio_raz_wi,
},
{},
};
-static bool handle_mmio_ctlr_redist(struct kvm_vcpu *vcpu,
- struct kvm_exit_mmio *mmio,
- phys_addr_t offset)
-{
- /* since we don't support LPIs, this register is zero for now */
- vgic_reg_access(mmio, NULL, offset,
- ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
- return false;
-}
-
-static bool handle_mmio_typer_redist(struct kvm_vcpu *vcpu,
- struct kvm_exit_mmio *mmio,
- phys_addr_t offset)
-{
- u32 reg;
- u64 mpidr;
- struct kvm_vcpu *redist_vcpu = mmio->private;
- int target_vcpu_id = redist_vcpu->vcpu_id;
-
- /* the upper 32 bits contain the affinity value */
- if ((offset & ~3) == 4) {
- mpidr = kvm_vcpu_get_mpidr_aff(redist_vcpu);
- reg = compress_mpidr(mpidr);
-
- vgic_reg_access(mmio, &reg, offset,
- ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
- return false;
- }
-
- reg = redist_vcpu->vcpu_id << 8;
- if (target_vcpu_id == atomic_read(&vcpu->kvm->online_vcpus) - 1)
- reg |= GICR_TYPER_LAST;
- vgic_reg_access(mmio, &reg, offset,
- ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
- return false;
-}
-
-static const struct kvm_mmio_range vgic_redist_ranges[] = {
- {
- .base = GICR_CTLR,
- .len = 0x04,
- .bits_per_irq = 0,
- .handle_mmio = handle_mmio_ctlr_redist,
- },
- {
- .base = GICR_TYPER,
- .len = 0x08,
- .bits_per_irq = 0,
- .handle_mmio = handle_mmio_typer_redist,
- },
- {
- .base = GICR_IIDR,
- .len = 0x04,
- .bits_per_irq = 0,
- .handle_mmio = handle_mmio_iidr,
- },
- {
- .base = GICR_WAKER,
- .len = 0x04,
- .bits_per_irq = 0,
- .handle_mmio = handle_mmio_raz_wi,
- },
- {
- .base = GICR_IDREGS,
- .len = 0x30,
- .bits_per_irq = 0,
- .handle_mmio = handle_mmio_idregs,
- },
- {},
-};
-
-/*
- * This function splits accesses between the distributor and the two
- * redistributor parts (private/SPI). As each redistributor is accessible
- * from any CPU, we have to determine the affected VCPU by taking the faulting
- * address into account. We then pass this VCPU to the handler function via
- * the private parameter.
- */
-#define SGI_BASE_OFFSET SZ_64K
-static bool vgic_v3_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
- struct kvm_exit_mmio *mmio)
-{
- struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
- unsigned long dbase = dist->vgic_dist_base;
- unsigned long rdbase = dist->vgic_redist_base;
- int nrcpus = atomic_read(&vcpu->kvm->online_vcpus);
- int vcpu_id;
- const struct kvm_mmio_range *mmio_range;
-
- if (is_in_range(mmio->phys_addr, mmio->len, dbase, GIC_V3_DIST_SIZE)) {
- return vgic_handle_mmio_range(vcpu, run, mmio,
- vgic_v3_dist_ranges, dbase);
- }
-
- if (!is_in_range(mmio->phys_addr, mmio->len, rdbase,
- GIC_V3_REDIST_SIZE * nrcpus))
- return false;
-
- vcpu_id = (mmio->phys_addr - rdbase) / GIC_V3_REDIST_SIZE;
- rdbase += (vcpu_id * GIC_V3_REDIST_SIZE);
- mmio->private = kvm_get_vcpu(vcpu->kvm, vcpu_id);
-
- if (mmio->phys_addr >= rdbase + SGI_BASE_OFFSET) {
- rdbase += SGI_BASE_OFFSET;
- mmio_range = vgic_redist_sgi_ranges;
- } else {
- mmio_range = vgic_redist_ranges;
- }
- return vgic_handle_mmio_range(vcpu, run, mmio, mmio_range, rdbase);
-}
-
static bool vgic_v3_queue_sgi(struct kvm_vcpu *vcpu, int irq)
{
if (vgic_queue_irq(vcpu, 0, irq)) {
@@ -766,6 +724,9 @@ static int vgic_v3_map_resources(struct kvm *kvm,
{
int ret = 0;
struct vgic_dist *dist = &kvm->arch.vgic;
+ gpa_t rdbase = dist->vgic_redist_base;
+ struct vgic_io_device *iodevs = NULL;
+ int i;
if (!irqchip_in_kernel(kvm))
return 0;
@@ -791,7 +752,41 @@ static int vgic_v3_map_resources(struct kvm *kvm,
goto out;
}
- kvm->arch.vgic.ready = true;
+ ret = vgic_register_kvm_io_dev(kvm, dist->vgic_dist_base,
+ GIC_V3_DIST_SIZE, vgic_v3_dist_ranges,
+ -1, &dist->dist_iodev);
+ if (ret)
+ goto out;
+
+ iodevs = kcalloc(dist->nr_cpus, sizeof(iodevs[0]), GFP_KERNEL);
+ if (!iodevs) {
+ ret = -ENOMEM;
+ goto out_unregister;
+ }
+
+ for (i = 0; i < dist->nr_cpus; i++) {
+ ret = vgic_register_kvm_io_dev(kvm, rdbase,
+ SZ_128K, vgic_redist_ranges,
+ i, &iodevs[i]);
+ if (ret)
+ goto out_unregister;
+ rdbase += GIC_V3_REDIST_SIZE;
+ }
+
+ dist->redist_iodevs = iodevs;
+ dist->ready = true;
+ goto out;
+
+out_unregister:
+ kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &dist->dist_iodev.dev);
+ if (iodevs) {
+ for (i = 0; i < dist->nr_cpus; i++) {
+ if (iodevs[i].dev.ops)
+ kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
+ &iodevs[i].dev);
+ }
+ }
+
out:
if (ret)
kvm_vgic_destroy(kvm);
@@ -832,7 +827,6 @@ void vgic_v3_init_emulation(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
- dist->vm_ops.handle_mmio = vgic_v3_handle_mmio;
dist->vm_ops.queue_sgi = vgic_v3_queue_sgi;
dist->vm_ops.add_sgi_source = vgic_v3_add_sgi_source;
dist->vm_ops.init_model = vgic_v3_init_model;
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
index 3a62d8a9a2c6..dff06021e748 100644
--- a/virt/kvm/arm/vgic-v3.c
+++ b/virt/kvm/arm/vgic-v3.c
@@ -104,6 +104,8 @@ static void vgic_v3_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
{
if (!(lr_desc.state & LR_STATE_MASK))
vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr);
+ else
+ vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr &= ~(1U << lr);
}
static u64 vgic_v3_get_elrsr(const struct kvm_vcpu *vcpu)
@@ -116,6 +118,11 @@ static u64 vgic_v3_get_eisr(const struct kvm_vcpu *vcpu)
return vcpu->arch.vgic_cpu.vgic_v3.vgic_eisr;
}
+static void vgic_v3_clear_eisr(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.vgic_cpu.vgic_v3.vgic_eisr = 0;
+}
+
static u32 vgic_v3_get_interrupt_status(const struct kvm_vcpu *vcpu)
{
u32 misr = vcpu->arch.vgic_cpu.vgic_v3.vgic_misr;
@@ -192,6 +199,7 @@ static const struct vgic_ops vgic_v3_ops = {
.sync_lr_elrsr = vgic_v3_sync_lr_elrsr,
.get_elrsr = vgic_v3_get_elrsr,
.get_eisr = vgic_v3_get_eisr,
+ .clear_eisr = vgic_v3_clear_eisr,
.get_interrupt_status = vgic_v3_get_interrupt_status,
.enable_underflow = vgic_v3_enable_underflow,
.disable_underflow = vgic_v3_disable_underflow,
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 0cc6ab6005a0..8d550ff14700 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -31,6 +31,9 @@
#include <asm/kvm_emulate.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_mmu.h>
+#include <trace/events/kvm.h>
+#include <asm/kvm.h>
+#include <kvm/iodev.h>
/*
* How the whole thing works (courtesy of Christoffer Dall):
@@ -263,6 +266,13 @@ static int vgic_irq_is_queued(struct kvm_vcpu *vcpu, int irq)
return vgic_bitmap_get_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq);
}
+static int vgic_irq_is_active(struct kvm_vcpu *vcpu, int irq)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ return vgic_bitmap_get_irq_val(&dist->irq_active, vcpu->vcpu_id, irq);
+}
+
static void vgic_irq_set_queued(struct kvm_vcpu *vcpu, int irq)
{
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
@@ -277,6 +287,20 @@ static void vgic_irq_clear_queued(struct kvm_vcpu *vcpu, int irq)
vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 0);
}
+static void vgic_irq_set_active(struct kvm_vcpu *vcpu, int irq)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 1);
+}
+
+static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 0);
+}
+
static int vgic_dist_irq_get_level(struct kvm_vcpu *vcpu, int irq)
{
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
@@ -520,6 +544,44 @@ bool vgic_handle_clear_pending_reg(struct kvm *kvm,
return false;
}
+bool vgic_handle_set_active_reg(struct kvm *kvm,
+ struct kvm_exit_mmio *mmio,
+ phys_addr_t offset, int vcpu_id)
+{
+ u32 *reg;
+ struct vgic_dist *dist = &kvm->arch.vgic;
+
+ reg = vgic_bitmap_get_reg(&dist->irq_active, vcpu_id, offset);
+ vgic_reg_access(mmio, reg, offset,
+ ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
+
+ if (mmio->is_write) {
+ vgic_update_state(kvm);
+ return true;
+ }
+
+ return false;
+}
+
+bool vgic_handle_clear_active_reg(struct kvm *kvm,
+ struct kvm_exit_mmio *mmio,
+ phys_addr_t offset, int vcpu_id)
+{
+ u32 *reg;
+ struct vgic_dist *dist = &kvm->arch.vgic;
+
+ reg = vgic_bitmap_get_reg(&dist->irq_active, vcpu_id, offset);
+ vgic_reg_access(mmio, reg, offset,
+ ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
+
+ if (mmio->is_write) {
+ vgic_update_state(kvm);
+ return true;
+ }
+
+ return false;
+}
+
static u32 vgic_cfg_expand(u16 val)
{
u32 res = 0;
@@ -588,16 +650,12 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio,
}
/**
- * vgic_unqueue_irqs - move pending IRQs from LRs to the distributor
+ * vgic_unqueue_irqs - move pending/active IRQs from LRs to the distributor
* @vgic_cpu: Pointer to the vgic_cpu struct holding the LRs
*
- * Move any pending IRQs that have already been assigned to LRs back to the
+ * Move any IRQs that have already been assigned to LRs back to the
* emulated distributor state so that the complete emulated state can be read
* from the main emulation structures without investigating the LRs.
- *
- * Note that IRQs in the active state in the LRs get their pending state moved
- * to the distributor but the active state stays in the LRs, because we don't
- * track the active state on the distributor side.
*/
void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
{
@@ -613,12 +671,22 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
* 01: pending
* 10: active
* 11: pending and active
- *
- * If the LR holds only an active interrupt (not pending) then
- * just leave it alone.
*/
- if ((lr.state & LR_STATE_MASK) == LR_STATE_ACTIVE)
- continue;
+ BUG_ON(!(lr.state & LR_STATE_MASK));
+
+ /* Reestablish SGI source for pending and active IRQs */
+ if (lr.irq < VGIC_NR_SGIS)
+ add_sgi_source(vcpu, lr.irq, lr.source);
+
+ /*
+ * If the LR holds an active (10) or a pending and active (11)
+ * interrupt then move the active state to the
+ * distributor tracking bit.
+ */
+ if (lr.state & LR_STATE_ACTIVE) {
+ vgic_irq_set_active(vcpu, lr.irq);
+ lr.state &= ~LR_STATE_ACTIVE;
+ }
/*
* Reestablish the pending state on the distributor and the
@@ -626,21 +694,19 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
* is fine, then we are only setting a few bits that were
* already set.
*/
- vgic_dist_irq_set_pending(vcpu, lr.irq);
- if (lr.irq < VGIC_NR_SGIS)
- add_sgi_source(vcpu, lr.irq, lr.source);
- lr.state &= ~LR_STATE_PENDING;
+ if (lr.state & LR_STATE_PENDING) {
+ vgic_dist_irq_set_pending(vcpu, lr.irq);
+ lr.state &= ~LR_STATE_PENDING;
+ }
+
vgic_set_lr(vcpu, i, lr);
/*
- * If there's no state left on the LR (it could still be
- * active), then the LR does not hold any useful info and can
- * be marked as free for other use.
+ * Mark the LR as free for other use.
*/
- if (!(lr.state & LR_STATE_MASK)) {
- vgic_retire_lr(i, lr.irq, vcpu);
- vgic_irq_clear_queued(vcpu, lr.irq);
- }
+ BUG_ON(lr.state & LR_STATE_MASK);
+ vgic_retire_lr(i, lr.irq, vcpu);
+ vgic_irq_clear_queued(vcpu, lr.irq);
/* Finally update the VGIC state. */
vgic_update_state(vcpu->kvm);
@@ -648,24 +714,21 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
}
const
-struct kvm_mmio_range *vgic_find_range(const struct kvm_mmio_range *ranges,
- struct kvm_exit_mmio *mmio,
- phys_addr_t offset)
-{
- const struct kvm_mmio_range *r = ranges;
-
- while (r->len) {
- if (offset >= r->base &&
- (offset + mmio->len) <= (r->base + r->len))
- return r;
- r++;
+struct vgic_io_range *vgic_find_range(const struct vgic_io_range *ranges,
+ int len, gpa_t offset)
+{
+ while (ranges->len) {
+ if (offset >= ranges->base &&
+ (offset + len) <= (ranges->base + ranges->len))
+ return ranges;
+ ranges++;
}
return NULL;
}
static bool vgic_validate_access(const struct vgic_dist *dist,
- const struct kvm_mmio_range *range,
+ const struct vgic_io_range *range,
unsigned long offset)
{
int irq;
@@ -693,9 +756,8 @@ static bool vgic_validate_access(const struct vgic_dist *dist,
static bool call_range_handler(struct kvm_vcpu *vcpu,
struct kvm_exit_mmio *mmio,
unsigned long offset,
- const struct kvm_mmio_range *range)
+ const struct vgic_io_range *range)
{
- u32 *data32 = (void *)mmio->data;
struct kvm_exit_mmio mmio32;
bool ret;
@@ -712,91 +774,142 @@ static bool call_range_handler(struct kvm_vcpu *vcpu,
mmio32.private = mmio->private;
mmio32.phys_addr = mmio->phys_addr + 4;
- if (mmio->is_write)
- *(u32 *)mmio32.data = data32[1];
+ mmio32.data = &((u32 *)mmio->data)[1];
ret = range->handle_mmio(vcpu, &mmio32, offset + 4);
- if (!mmio->is_write)
- data32[1] = *(u32 *)mmio32.data;
mmio32.phys_addr = mmio->phys_addr;
- if (mmio->is_write)
- *(u32 *)mmio32.data = data32[0];
+ mmio32.data = &((u32 *)mmio->data)[0];
ret |= range->handle_mmio(vcpu, &mmio32, offset);
- if (!mmio->is_write)
- data32[0] = *(u32 *)mmio32.data;
return ret;
}
/**
- * vgic_handle_mmio_range - handle an in-kernel MMIO access
+ * vgic_handle_mmio_access - handle an in-kernel MMIO access
+ * This is called by the read/write KVM IO device wrappers below.
* @vcpu: pointer to the vcpu performing the access
- * @run: pointer to the kvm_run structure
- * @mmio: pointer to the data describing the access
- * @ranges: array of MMIO ranges in a given region
- * @mmio_base: base address of that region
+ * @this: pointer to the KVM IO device in charge
+ * @addr: guest physical address of the access
+ * @len: size of the access
+ * @val: pointer to the data region
+ * @is_write: read or write access
*
* returns true if the MMIO access could be performed
*/
-bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct kvm_run *run,
- struct kvm_exit_mmio *mmio,
- const struct kvm_mmio_range *ranges,
- unsigned long mmio_base)
+static int vgic_handle_mmio_access(struct kvm_vcpu *vcpu,
+ struct kvm_io_device *this, gpa_t addr,
+ int len, void *val, bool is_write)
{
- const struct kvm_mmio_range *range;
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+ struct vgic_io_device *iodev = container_of(this,
+ struct vgic_io_device, dev);
+ struct kvm_run *run = vcpu->run;
+ const struct vgic_io_range *range;
+ struct kvm_exit_mmio mmio;
bool updated_state;
- unsigned long offset;
+ gpa_t offset;
- offset = mmio->phys_addr - mmio_base;
- range = vgic_find_range(ranges, mmio, offset);
+ offset = addr - iodev->addr;
+ range = vgic_find_range(iodev->reg_ranges, len, offset);
if (unlikely(!range || !range->handle_mmio)) {
- pr_warn("Unhandled access %d %08llx %d\n",
- mmio->is_write, mmio->phys_addr, mmio->len);
- return false;
+ pr_warn("Unhandled access %d %08llx %d\n", is_write, addr, len);
+ return -ENXIO;
}
- spin_lock(&vcpu->kvm->arch.vgic.lock);
+ mmio.phys_addr = addr;
+ mmio.len = len;
+ mmio.is_write = is_write;
+ mmio.data = val;
+ mmio.private = iodev->redist_vcpu;
+
+ spin_lock(&dist->lock);
offset -= range->base;
if (vgic_validate_access(dist, range, offset)) {
- updated_state = call_range_handler(vcpu, mmio, offset, range);
+ updated_state = call_range_handler(vcpu, &mmio, offset, range);
} else {
- if (!mmio->is_write)
- memset(mmio->data, 0, mmio->len);
+ if (!is_write)
+ memset(val, 0, len);
updated_state = false;
}
- spin_unlock(&vcpu->kvm->arch.vgic.lock);
- kvm_prepare_mmio(run, mmio);
+ spin_unlock(&dist->lock);
+ run->mmio.is_write = is_write;
+ run->mmio.len = len;
+ run->mmio.phys_addr = addr;
+ memcpy(run->mmio.data, val, len);
+
kvm_handle_mmio_return(vcpu, run);
if (updated_state)
vgic_kick_vcpus(vcpu->kvm);
- return true;
+ return 0;
}
+static int vgic_handle_mmio_read(struct kvm_vcpu *vcpu,
+ struct kvm_io_device *this,
+ gpa_t addr, int len, void *val)
+{
+ return vgic_handle_mmio_access(vcpu, this, addr, len, val, false);
+}
+
+static int vgic_handle_mmio_write(struct kvm_vcpu *vcpu,
+ struct kvm_io_device *this,
+ gpa_t addr, int len, const void *val)
+{
+ return vgic_handle_mmio_access(vcpu, this, addr, len, (void *)val,
+ true);
+}
+
+struct kvm_io_device_ops vgic_io_ops = {
+ .read = vgic_handle_mmio_read,
+ .write = vgic_handle_mmio_write,
+};
+
/**
- * vgic_handle_mmio - handle an in-kernel MMIO access for the GIC emulation
- * @vcpu: pointer to the vcpu performing the access
- * @run: pointer to the kvm_run structure
- * @mmio: pointer to the data describing the access
+ * vgic_register_kvm_io_dev - register VGIC register frame on the KVM I/O bus
+ * @kvm: The VM structure pointer
+ * @base: The (guest) base address for the register frame
+ * @len: Length of the register frame window
+ * @ranges: Describing the handler functions for each register
+ * @redist_vcpu_id: The VCPU ID to pass on to the handlers on call
+ * @iodev: Points to memory to be passed on to the handler
*
- * returns true if the MMIO access has been performed in kernel space,
- * and false if it needs to be emulated in user space.
- * Calls the actual handling routine for the selected VGIC model.
+ * @iodev stores the parameters of this function to be usable by the handler
+ * respectively the dispatcher function (since the KVM I/O bus framework lacks
+ * an opaque parameter). Initialization is done in this function, but the
+ * reference should be valid and unique for the whole VGIC lifetime.
+ * If the register frame is not mapped for a specific VCPU, pass -1 to
+ * @redist_vcpu_id.
*/
-bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
- struct kvm_exit_mmio *mmio)
+int vgic_register_kvm_io_dev(struct kvm *kvm, gpa_t base, int len,
+ const struct vgic_io_range *ranges,
+ int redist_vcpu_id,
+ struct vgic_io_device *iodev)
{
- if (!irqchip_in_kernel(vcpu->kvm))
- return false;
+ struct kvm_vcpu *vcpu = NULL;
+ int ret;
- /*
- * This will currently call either vgic_v2_handle_mmio() or
- * vgic_v3_handle_mmio(), which in turn will call
- * vgic_handle_mmio_range() defined above.
- */
- return vcpu->kvm->arch.vgic.vm_ops.handle_mmio(vcpu, run, mmio);
+ if (redist_vcpu_id >= 0)
+ vcpu = kvm_get_vcpu(kvm, redist_vcpu_id);
+
+ iodev->addr = base;
+ iodev->len = len;
+ iodev->reg_ranges = ranges;
+ iodev->redist_vcpu = vcpu;
+
+ kvm_iodevice_init(&iodev->dev, &vgic_io_ops);
+
+ mutex_lock(&kvm->slots_lock);
+
+ ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, base, len,
+ &iodev->dev);
+ mutex_unlock(&kvm->slots_lock);
+
+ /* Mark the iodev as invalid if registration fails. */
+ if (ret)
+ iodev->dev.ops = NULL;
+
+ return ret;
}
static int vgic_nr_shared_irqs(struct vgic_dist *dist)
@@ -804,6 +917,36 @@ static int vgic_nr_shared_irqs(struct vgic_dist *dist)
return dist->nr_irqs - VGIC_NR_PRIVATE_IRQS;
}
+static int compute_active_for_cpu(struct kvm_vcpu *vcpu)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+ unsigned long *active, *enabled, *act_percpu, *act_shared;
+ unsigned long active_private, active_shared;
+ int nr_shared = vgic_nr_shared_irqs(dist);
+ int vcpu_id;
+
+ vcpu_id = vcpu->vcpu_id;
+ act_percpu = vcpu->arch.vgic_cpu.active_percpu;
+ act_shared = vcpu->arch.vgic_cpu.active_shared;
+
+ active = vgic_bitmap_get_cpu_map(&dist->irq_active, vcpu_id);
+ enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id);
+ bitmap_and(act_percpu, active, enabled, VGIC_NR_PRIVATE_IRQS);
+
+ active = vgic_bitmap_get_shared_map(&dist->irq_active);
+ enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled);
+ bitmap_and(act_shared, active, enabled, nr_shared);
+ bitmap_and(act_shared, act_shared,
+ vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]),
+ nr_shared);
+
+ active_private = find_first_bit(act_percpu, VGIC_NR_PRIVATE_IRQS);
+ active_shared = find_first_bit(act_shared, nr_shared);
+
+ return (active_private < VGIC_NR_PRIVATE_IRQS ||
+ active_shared < nr_shared);
+}
+
static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
{
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
@@ -835,7 +978,7 @@ static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
/*
* Update the interrupt state and determine which CPUs have pending
- * interrupts. Must be called with distributor lock held.
+ * or active interrupts. Must be called with distributor lock held.
*/
void vgic_update_state(struct kvm *kvm)
{
@@ -849,10 +992,13 @@ void vgic_update_state(struct kvm *kvm)
}
kvm_for_each_vcpu(c, vcpu, kvm) {
- if (compute_pending_for_cpu(vcpu)) {
- pr_debug("CPU%d has pending interrupts\n", c);
+ if (compute_pending_for_cpu(vcpu))
set_bit(c, dist->irq_pending_on_cpu);
- }
+
+ if (compute_active_for_cpu(vcpu))
+ set_bit(c, dist->irq_active_on_cpu);
+ else
+ clear_bit(c, dist->irq_active_on_cpu);
}
}
@@ -883,6 +1029,11 @@ static inline u64 vgic_get_eisr(struct kvm_vcpu *vcpu)
return vgic_ops->get_eisr(vcpu);
}
+static inline void vgic_clear_eisr(struct kvm_vcpu *vcpu)
+{
+ vgic_ops->clear_eisr(vcpu);
+}
+
static inline u32 vgic_get_interrupt_status(struct kvm_vcpu *vcpu)
{
return vgic_ops->get_interrupt_status(vcpu);
@@ -922,6 +1073,7 @@ static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
vgic_set_lr(vcpu, lr_nr, vlr);
clear_bit(lr_nr, vgic_cpu->lr_used);
vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
+ vgic_sync_lr_elrsr(vcpu, lr_nr, vlr);
}
/*
@@ -949,6 +1101,26 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
}
}
+static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq,
+ int lr_nr, struct vgic_lr vlr)
+{
+ if (vgic_irq_is_active(vcpu, irq)) {
+ vlr.state |= LR_STATE_ACTIVE;
+ kvm_debug("Set active, clear distributor: 0x%x\n", vlr.state);
+ vgic_irq_clear_active(vcpu, irq);
+ vgic_update_state(vcpu->kvm);
+ } else if (vgic_dist_irq_is_pending(vcpu, irq)) {
+ vlr.state |= LR_STATE_PENDING;
+ kvm_debug("Set pending: 0x%x\n", vlr.state);
+ }
+
+ if (!vgic_irq_is_edge(vcpu, irq))
+ vlr.state |= LR_EOI_INT;
+
+ vgic_set_lr(vcpu, lr_nr, vlr);
+ vgic_sync_lr_elrsr(vcpu, lr_nr, vlr);
+}
+
/*
* Queue an interrupt to a CPU virtual interface. Return true on success,
* or false if it wasn't possible to queue it.
@@ -976,8 +1148,7 @@ bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
if (vlr.source == sgi_source_id) {
kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq);
BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
- vlr.state |= LR_STATE_PENDING;
- vgic_set_lr(vcpu, lr, vlr);
+ vgic_queue_irq_to_lr(vcpu, irq, lr, vlr);
return true;
}
}
@@ -994,11 +1165,8 @@ bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
vlr.irq = irq;
vlr.source = sgi_source_id;
- vlr.state = LR_STATE_PENDING;
- if (!vgic_irq_is_edge(vcpu, irq))
- vlr.state |= LR_EOI_INT;
-
- vgic_set_lr(vcpu, lr, vlr);
+ vlr.state = 0;
+ vgic_queue_irq_to_lr(vcpu, irq, lr, vlr);
return true;
}
@@ -1030,39 +1198,49 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+ unsigned long *pa_percpu, *pa_shared;
int i, vcpu_id;
int overflow = 0;
+ int nr_shared = vgic_nr_shared_irqs(dist);
vcpu_id = vcpu->vcpu_id;
+ pa_percpu = vcpu->arch.vgic_cpu.pend_act_percpu;
+ pa_shared = vcpu->arch.vgic_cpu.pend_act_shared;
+
+ bitmap_or(pa_percpu, vgic_cpu->pending_percpu, vgic_cpu->active_percpu,
+ VGIC_NR_PRIVATE_IRQS);
+ bitmap_or(pa_shared, vgic_cpu->pending_shared, vgic_cpu->active_shared,
+ nr_shared);
/*
* We may not have any pending interrupt, or the interrupts
* may have been serviced from another vcpu. In all cases,
* move along.
*/
- if (!kvm_vgic_vcpu_pending_irq(vcpu)) {
- pr_debug("CPU%d has no pending interrupt\n", vcpu_id);
+ if (!kvm_vgic_vcpu_pending_irq(vcpu) && !kvm_vgic_vcpu_active_irq(vcpu))
goto epilog;
- }
/* SGIs */
- for_each_set_bit(i, vgic_cpu->pending_percpu, VGIC_NR_SGIS) {
+ for_each_set_bit(i, pa_percpu, VGIC_NR_SGIS) {
if (!queue_sgi(vcpu, i))
overflow = 1;
}
/* PPIs */
- for_each_set_bit_from(i, vgic_cpu->pending_percpu, VGIC_NR_PRIVATE_IRQS) {
+ for_each_set_bit_from(i, pa_percpu, VGIC_NR_PRIVATE_IRQS) {
if (!vgic_queue_hwirq(vcpu, i))
overflow = 1;
}
/* SPIs */
- for_each_set_bit(i, vgic_cpu->pending_shared, vgic_nr_shared_irqs(dist)) {
+ for_each_set_bit(i, pa_shared, nr_shared) {
if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS))
overflow = 1;
}
+
+
+
epilog:
if (overflow) {
vgic_enable_underflow(vcpu);
@@ -1081,7 +1259,9 @@ epilog:
static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
{
u32 status = vgic_get_interrupt_status(vcpu);
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
bool level_pending = false;
+ struct kvm *kvm = vcpu->kvm;
kvm_debug("STATUS = %08x\n", status);
@@ -1098,6 +1278,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq));
+ spin_lock(&dist->lock);
vgic_irq_clear_queued(vcpu, vlr.irq);
WARN_ON(vlr.state & LR_STATE_MASK);
vlr.state = 0;
@@ -1116,6 +1297,17 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
*/
vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq);
+ /*
+ * kvm_notify_acked_irq calls kvm_set_irq()
+ * to reset the IRQ level. Need to release the
+ * lock for kvm_set_irq to grab it.
+ */
+ spin_unlock(&dist->lock);
+
+ kvm_notify_acked_irq(kvm, 0,
+ vlr.irq - VGIC_NR_PRIVATE_IRQS);
+ spin_lock(&dist->lock);
+
/* Any additional pending interrupt? */
if (vgic_dist_irq_get_level(vcpu, vlr.irq)) {
vgic_cpu_irq_set(vcpu, vlr.irq);
@@ -1125,6 +1317,8 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
vgic_cpu_irq_clear(vcpu, vlr.irq);
}
+ spin_unlock(&dist->lock);
+
/*
* Despite being EOIed, the LR may not have
* been marked as empty.
@@ -1136,13 +1330,18 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
if (status & INT_STATUS_UNDERFLOW)
vgic_disable_underflow(vcpu);
+ /*
+ * In the next iterations of the vcpu loop, if we sync the vgic state
+ * after flushing it, but before entering the guest (this happens for
+ * pending signals and vmid rollovers), then make sure we don't pick
+ * up any old maintenance interrupts here.
+ */
+ vgic_clear_eisr(vcpu);
+
return level_pending;
}
-/*
- * Sync back the VGIC state after a guest run. The distributor lock is
- * needed so we don't get preempted in the middle of the state processing.
- */
+/* Sync back the VGIC state after a guest run */
static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
@@ -1189,14 +1388,10 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
{
- struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
if (!irqchip_in_kernel(vcpu->kvm))
return;
- spin_lock(&dist->lock);
__kvm_vgic_sync_hwstate(vcpu);
- spin_unlock(&dist->lock);
}
int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
@@ -1209,6 +1404,17 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
return test_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
}
+int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ if (!irqchip_in_kernel(vcpu->kvm))
+ return 0;
+
+ return test_bit(vcpu->vcpu_id, dist->irq_active_on_cpu);
+}
+
+
void vgic_kick_vcpus(struct kvm *kvm)
{
struct kvm_vcpu *vcpu;
@@ -1381,8 +1587,12 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
kfree(vgic_cpu->pending_shared);
+ kfree(vgic_cpu->active_shared);
+ kfree(vgic_cpu->pend_act_shared);
kfree(vgic_cpu->vgic_irq_lr_map);
vgic_cpu->pending_shared = NULL;
+ vgic_cpu->active_shared = NULL;
+ vgic_cpu->pend_act_shared = NULL;
vgic_cpu->vgic_irq_lr_map = NULL;
}
@@ -1392,9 +1602,14 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8;
vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL);
+ vgic_cpu->active_shared = kzalloc(sz, GFP_KERNEL);
+ vgic_cpu->pend_act_shared = kzalloc(sz, GFP_KERNEL);
vgic_cpu->vgic_irq_lr_map = kmalloc(nr_irqs, GFP_KERNEL);
- if (!vgic_cpu->pending_shared || !vgic_cpu->vgic_irq_lr_map) {
+ if (!vgic_cpu->pending_shared
+ || !vgic_cpu->active_shared
+ || !vgic_cpu->pend_act_shared
+ || !vgic_cpu->vgic_irq_lr_map) {
kvm_vgic_vcpu_destroy(vcpu);
return -ENOMEM;
}
@@ -1447,10 +1662,12 @@ void kvm_vgic_destroy(struct kvm *kvm)
kfree(dist->irq_spi_mpidr);
kfree(dist->irq_spi_target);
kfree(dist->irq_pending_on_cpu);
+ kfree(dist->irq_active_on_cpu);
dist->irq_sgi_sources = NULL;
dist->irq_spi_cpu = NULL;
dist->irq_spi_target = NULL;
dist->irq_pending_on_cpu = NULL;
+ dist->irq_active_on_cpu = NULL;
dist->nr_cpus = 0;
}
@@ -1486,6 +1703,7 @@ int vgic_init(struct kvm *kvm)
ret |= vgic_init_bitmap(&dist->irq_pending, nr_cpus, nr_irqs);
ret |= vgic_init_bitmap(&dist->irq_soft_pend, nr_cpus, nr_irqs);
ret |= vgic_init_bitmap(&dist->irq_queued, nr_cpus, nr_irqs);
+ ret |= vgic_init_bitmap(&dist->irq_active, nr_cpus, nr_irqs);
ret |= vgic_init_bitmap(&dist->irq_cfg, nr_cpus, nr_irqs);
ret |= vgic_init_bytemap(&dist->irq_priority, nr_cpus, nr_irqs);
@@ -1498,10 +1716,13 @@ int vgic_init(struct kvm *kvm)
GFP_KERNEL);
dist->irq_pending_on_cpu = kzalloc(BITS_TO_LONGS(nr_cpus) * sizeof(long),
GFP_KERNEL);
+ dist->irq_active_on_cpu = kzalloc(BITS_TO_LONGS(nr_cpus) * sizeof(long),
+ GFP_KERNEL);
if (!dist->irq_sgi_sources ||
!dist->irq_spi_cpu ||
!dist->irq_spi_target ||
- !dist->irq_pending_on_cpu) {
+ !dist->irq_pending_on_cpu ||
+ !dist->irq_active_on_cpu) {
ret = -ENOMEM;
goto out;
}
@@ -1583,8 +1804,10 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
* emulation. So check this here again. KVM_CREATE_DEVICE does
* the proper checks already.
*/
- if (type == KVM_DEV_TYPE_ARM_VGIC_V2 && !vgic->can_emulate_gicv2)
- return -ENODEV;
+ if (type == KVM_DEV_TYPE_ARM_VGIC_V2 && !vgic->can_emulate_gicv2) {
+ ret = -ENODEV;
+ goto out;
+ }
/*
* Any time a vcpu is run, vcpu_load is called which tries to grab the
@@ -1827,12 +2050,9 @@ int vgic_get_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
return r;
}
-int vgic_has_attr_regs(const struct kvm_mmio_range *ranges, phys_addr_t offset)
+int vgic_has_attr_regs(const struct vgic_io_range *ranges, phys_addr_t offset)
{
- struct kvm_exit_mmio dev_attr_mmio;
-
- dev_attr_mmio.len = 4;
- if (vgic_find_range(ranges, &dev_attr_mmio, offset))
+ if (vgic_find_range(ranges, 4, offset))
return 0;
else
return -ENXIO;
@@ -1865,8 +2085,10 @@ static struct notifier_block vgic_cpu_nb = {
};
static const struct of_device_id vgic_ids[] = {
- { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, },
- { .compatible = "arm,gic-v3", .data = vgic_v3_probe, },
+ { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, },
+ { .compatible = "arm,cortex-a7-gic", .data = vgic_v2_probe, },
+ { .compatible = "arm,gic-400", .data = vgic_v2_probe, },
+ { .compatible = "arm,gic-v3", .data = vgic_v3_probe, },
{},
};
@@ -1914,3 +2136,38 @@ out_free_irq:
free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus());
return ret;
}
+
+int kvm_irq_map_gsi(struct kvm *kvm,
+ struct kvm_kernel_irq_routing_entry *entries,
+ int gsi)
+{
+ return gsi;
+}
+
+int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin)
+{
+ return pin;
+}
+
+int kvm_set_irq(struct kvm *kvm, int irq_source_id,
+ u32 irq, int level, bool line_status)
+{
+ unsigned int spi = irq + VGIC_NR_PRIVATE_IRQS;
+
+ trace_kvm_set_irq(irq, level, irq_source_id);
+
+ BUG_ON(!vgic_initialized(kvm));
+
+ if (spi > kvm->arch.vgic.nr_irqs)
+ return -EINVAL;
+ return kvm_vgic_inject_irq(kvm, 0, spi, level);
+
+}
+
+/* MSI not implemented yet */
+int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
+ struct kvm *kvm, int irq_source_id,
+ int level, bool line_status)
+{
+ return 0;
+}
diff --git a/virt/kvm/arm/vgic.h b/virt/kvm/arm/vgic.h
index 1e83bdf5f499..0df74cbb6200 100644
--- a/virt/kvm/arm/vgic.h
+++ b/virt/kvm/arm/vgic.h
@@ -20,6 +20,8 @@
#ifndef __KVM_VGIC_H__
#define __KVM_VGIC_H__
+#include <kvm/iodev.h>
+
#define VGIC_ADDR_UNDEF (-1)
#define IS_VGIC_ADDR_UNDEF(_x) ((_x) == VGIC_ADDR_UNDEF)
@@ -57,6 +59,14 @@ void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq);
void vgic_unqueue_irqs(struct kvm_vcpu *vcpu);
+struct kvm_exit_mmio {
+ phys_addr_t phys_addr;
+ void *data;
+ u32 len;
+ bool is_write;
+ void *private;
+};
+
void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg,
phys_addr_t offset, int mode);
bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio,
@@ -74,7 +84,7 @@ void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value)
*((u32 *)mmio->data) = cpu_to_le32(value) & mask;
}
-struct kvm_mmio_range {
+struct vgic_io_range {
phys_addr_t base;
unsigned long len;
int bits_per_irq;
@@ -82,6 +92,11 @@ struct kvm_mmio_range {
phys_addr_t offset);
};
+int vgic_register_kvm_io_dev(struct kvm *kvm, gpa_t base, int len,
+ const struct vgic_io_range *ranges,
+ int redist_id,
+ struct vgic_io_device *iodev);
+
static inline bool is_in_range(phys_addr_t addr, unsigned long len,
phys_addr_t baseaddr, unsigned long size)
{
@@ -89,14 +104,8 @@ static inline bool is_in_range(phys_addr_t addr, unsigned long len,
}
const
-struct kvm_mmio_range *vgic_find_range(const struct kvm_mmio_range *ranges,
- struct kvm_exit_mmio *mmio,
- phys_addr_t offset);
-
-bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct kvm_run *run,
- struct kvm_exit_mmio *mmio,
- const struct kvm_mmio_range *ranges,
- unsigned long mmio_base);
+struct vgic_io_range *vgic_find_range(const struct vgic_io_range *ranges,
+ int len, gpa_t offset);
bool vgic_handle_enable_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio,
phys_addr_t offset, int vcpu_id, int access);
@@ -107,12 +116,20 @@ bool vgic_handle_set_pending_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio,
bool vgic_handle_clear_pending_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio,
phys_addr_t offset, int vcpu_id);
+bool vgic_handle_set_active_reg(struct kvm *kvm,
+ struct kvm_exit_mmio *mmio,
+ phys_addr_t offset, int vcpu_id);
+
+bool vgic_handle_clear_active_reg(struct kvm *kvm,
+ struct kvm_exit_mmio *mmio,
+ phys_addr_t offset, int vcpu_id);
+
bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio,
phys_addr_t offset);
void vgic_kick_vcpus(struct kvm *kvm);
-int vgic_has_attr_regs(const struct kvm_mmio_range *ranges, phys_addr_t offset);
+int vgic_has_attr_regs(const struct vgic_io_range *ranges, phys_addr_t offset);
int vgic_set_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr);
int vgic_get_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr);
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
index 00d86427af0f..571c1ce37d15 100644
--- a/virt/kvm/coalesced_mmio.c
+++ b/virt/kvm/coalesced_mmio.c
@@ -8,7 +8,7 @@
*
*/
-#include "iodev.h"
+#include <kvm/iodev.h>
#include <linux/kvm_host.h>
#include <linux/slab.h>
@@ -60,8 +60,9 @@ static int coalesced_mmio_has_room(struct kvm_coalesced_mmio_dev *dev)
return 1;
}
-static int coalesced_mmio_write(struct kvm_io_device *this,
- gpa_t addr, int len, const void *val)
+static int coalesced_mmio_write(struct kvm_vcpu *vcpu,
+ struct kvm_io_device *this, gpa_t addr,
+ int len, const void *val)
{
struct kvm_coalesced_mmio_dev *dev = to_mmio(this);
struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring;
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 148b2392c762..9ff4193dfa49 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -36,7 +36,7 @@
#include <linux/seqlock.h>
#include <trace/events/kvm.h>
-#include "iodev.h"
+#include <kvm/iodev.h>
#ifdef CONFIG_HAVE_KVM_IRQFD
/*
@@ -311,6 +311,9 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
unsigned int events;
int idx;
+ if (!kvm_arch_intc_initialized(kvm))
+ return -EAGAIN;
+
irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL);
if (!irqfd)
return -ENOMEM;
@@ -712,8 +715,8 @@ ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val)
/* MMIO/PIO writes trigger an event if the addr/val match */
static int
-ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len,
- const void *val)
+ioeventfd_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr,
+ int len, const void *val)
{
struct _ioeventfd *p = to_ioeventfd(this);
diff --git a/virt/kvm/iodev.h b/virt/kvm/iodev.h
deleted file mode 100644
index 12fd3caffd2b..000000000000
--- a/virt/kvm/iodev.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#ifndef __KVM_IODEV_H__
-#define __KVM_IODEV_H__
-
-#include <linux/kvm_types.h>
-#include <asm/errno.h>
-
-struct kvm_io_device;
-
-/**
- * kvm_io_device_ops are called under kvm slots_lock.
- * read and write handlers return 0 if the transaction has been handled,
- * or non-zero to have it passed to the next device.
- **/
-struct kvm_io_device_ops {
- int (*read)(struct kvm_io_device *this,
- gpa_t addr,
- int len,
- void *val);
- int (*write)(struct kvm_io_device *this,
- gpa_t addr,
- int len,
- const void *val);
- void (*destructor)(struct kvm_io_device *this);
-};
-
-
-struct kvm_io_device {
- const struct kvm_io_device_ops *ops;
-};
-
-static inline void kvm_iodevice_init(struct kvm_io_device *dev,
- const struct kvm_io_device_ops *ops)
-{
- dev->ops = ops;
-}
-
-static inline int kvm_iodevice_read(struct kvm_io_device *dev,
- gpa_t addr, int l, void *v)
-{
- return dev->ops->read ? dev->ops->read(dev, addr, l, v) : -EOPNOTSUPP;
-}
-
-static inline int kvm_iodevice_write(struct kvm_io_device *dev,
- gpa_t addr, int l, const void *v)
-{
- return dev->ops->write ? dev->ops->write(dev, addr, l, v) : -EOPNOTSUPP;
-}
-
-static inline void kvm_iodevice_destructor(struct kvm_io_device *dev)
-{
- if (dev->ops->destructor)
- dev->ops->destructor(dev);
-}
-
-#endif /* __KVM_IODEV_H__ */
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 7f256f31df10..1d56a901e791 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -105,7 +105,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
i = kvm_irq_map_gsi(kvm, irq_set, irq);
srcu_read_unlock(&kvm->irq_srcu, idx);
- while(i--) {
+ while (i--) {
int r;
r = irq_set[i].set(&irq_set[i], kvm, irq_source_id, level,
line_status);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a1093700f3a4..d3fc9399062a 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -16,7 +16,7 @@
*
*/
-#include "iodev.h"
+#include <kvm/iodev.h>
#include <linux/kvm_host.h>
#include <linux/kvm.h>
@@ -66,13 +66,13 @@
MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
-unsigned int halt_poll_ns = 0;
+static unsigned int halt_poll_ns;
module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR);
/*
* Ordering of locks:
*
- * kvm->lock --> kvm->slots_lock --> kvm->irq_lock
+ * kvm->lock --> kvm->slots_lock --> kvm->irq_lock
*/
DEFINE_SPINLOCK(kvm_lock);
@@ -80,7 +80,7 @@ static DEFINE_RAW_SPINLOCK(kvm_count_lock);
LIST_HEAD(vm_list);
static cpumask_var_t cpus_hardware_enabled;
-static int kvm_usage_count = 0;
+static int kvm_usage_count;
static atomic_t hardware_enable_failed;
struct kmem_cache *kvm_vcpu_cache;
@@ -471,7 +471,7 @@ static struct kvm *kvm_create_vm(unsigned long type)
BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX);
r = -ENOMEM;
- kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
+ kvm->memslots = kvm_kvzalloc(sizeof(struct kvm_memslots));
if (!kvm->memslots)
goto out_err_no_srcu;
@@ -522,7 +522,7 @@ out_err_no_srcu:
out_err_no_disable:
for (i = 0; i < KVM_NR_BUSES; i++)
kfree(kvm->buses[i]);
- kfree(kvm->memslots);
+ kvfree(kvm->memslots);
kvm_arch_free_vm(kvm);
return ERR_PTR(r);
}
@@ -539,20 +539,12 @@ void *kvm_kvzalloc(unsigned long size)
return kzalloc(size, GFP_KERNEL);
}
-void kvm_kvfree(const void *addr)
-{
- if (is_vmalloc_addr(addr))
- vfree(addr);
- else
- kfree(addr);
-}
-
static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
{
if (!memslot->dirty_bitmap)
return;
- kvm_kvfree(memslot->dirty_bitmap);
+ kvfree(memslot->dirty_bitmap);
memslot->dirty_bitmap = NULL;
}
@@ -578,7 +570,7 @@ static void kvm_free_physmem(struct kvm *kvm)
kvm_for_each_memslot(memslot, slots)
kvm_free_physmem_slot(kvm, memslot, NULL);
- kfree(kvm->memslots);
+ kvfree(kvm->memslots);
}
static void kvm_destroy_devices(struct kvm *kvm)
@@ -871,10 +863,10 @@ int __kvm_set_memory_region(struct kvm *kvm,
goto out_free;
}
- slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
- GFP_KERNEL);
+ slots = kvm_kvzalloc(sizeof(struct kvm_memslots));
if (!slots)
goto out_free;
+ memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) {
slot = id_to_memslot(slots, mem->slot);
@@ -888,8 +880,8 @@ int __kvm_set_memory_region(struct kvm *kvm,
* or moved, memslot will be created.
*
* validation of sp->gfn happens in:
- * - gfn_to_hva (kvm_read_guest, gfn_to_pfn)
- * - kvm_is_visible_gfn (mmu_check_roots)
+ * - gfn_to_hva (kvm_read_guest, gfn_to_pfn)
+ * - kvm_is_visible_gfn (mmu_check_roots)
*/
kvm_arch_flush_shadow_memslot(kvm, slot);
@@ -917,7 +909,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
kvm_arch_commit_memory_region(kvm, mem, &old, change);
kvm_free_physmem_slot(kvm, &old, &new);
- kfree(old_memslots);
+ kvfree(old_memslots);
/*
* IOMMU mapping: New slots need to be mapped. Old slots need to be
@@ -936,7 +928,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
return 0;
out_slots:
- kfree(slots);
+ kvfree(slots);
out_free:
kvm_free_physmem_slot(kvm, &new, &old);
out:
@@ -1061,9 +1053,11 @@ int kvm_get_dirty_log_protect(struct kvm *kvm,
mask = xchg(&dirty_bitmap[i], 0);
dirty_bitmap_buffer[i] = mask;
- offset = i * BITS_PER_LONG;
- kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, offset,
- mask);
+ if (mask) {
+ offset = i * BITS_PER_LONG;
+ kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot,
+ offset, mask);
+ }
}
spin_unlock(&kvm->mmu_lock);
@@ -1193,16 +1187,6 @@ unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
return gfn_to_hva_memslot_prot(slot, gfn, writable);
}
-static int kvm_read_hva(void *data, void __user *hva, int len)
-{
- return __copy_from_user(data, hva, len);
-}
-
-static int kvm_read_hva_atomic(void *data, void __user *hva, int len)
-{
- return __copy_from_user_inatomic(data, hva, len);
-}
-
static int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, int write, struct page **page)
{
@@ -1481,7 +1465,6 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
return kvm_pfn_to_page(pfn);
}
-
EXPORT_SYMBOL_GPL(gfn_to_page);
void kvm_release_page_clean(struct page *page)
@@ -1517,6 +1500,7 @@ void kvm_set_pfn_dirty(pfn_t pfn)
{
if (!kvm_is_reserved_pfn(pfn)) {
struct page *page = pfn_to_page(pfn);
+
if (!PageReserved(page))
SetPageDirty(page);
}
@@ -1554,7 +1538,7 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
addr = gfn_to_hva_prot(kvm, gfn, NULL);
if (kvm_is_error_hva(addr))
return -EFAULT;
- r = kvm_read_hva(data, (void __user *)addr + offset, len);
+ r = __copy_from_user(data, (void __user *)addr + offset, len);
if (r)
return -EFAULT;
return 0;
@@ -1593,7 +1577,7 @@ int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
if (kvm_is_error_hva(addr))
return -EFAULT;
pagefault_disable();
- r = kvm_read_hva_atomic(data, (void __user *)addr + offset, len);
+ r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len);
pagefault_enable();
if (r)
return -EFAULT;
@@ -1653,8 +1637,8 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
ghc->generation = slots->generation;
ghc->len = len;
ghc->memslot = gfn_to_memslot(kvm, start_gfn);
- ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, &nr_pages_avail);
- if (!kvm_is_error_hva(ghc->hva) && nr_pages_avail >= nr_pages_needed) {
+ ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, NULL);
+ if (!kvm_is_error_hva(ghc->hva) && nr_pages_needed <= 1) {
ghc->hva += offset;
} else {
/*
@@ -1742,7 +1726,7 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len)
int offset = offset_in_page(gpa);
int ret;
- while ((seg = next_segment(len, offset)) != 0) {
+ while ((seg = next_segment(len, offset)) != 0) {
ret = kvm_clear_guest_page(kvm, gfn, offset, seg);
if (ret < 0)
return ret;
@@ -1800,6 +1784,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
start = cur = ktime_get();
if (halt_poll_ns) {
ktime_t stop = ktime_add_ns(ktime_get(), halt_poll_ns);
+
do {
/*
* This sets KVM_REQ_UNHALT if an interrupt
@@ -2118,7 +2103,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
* Special cases: vcpu ioctls that are asynchronous to vcpu execution,
* so vcpu_load() would break it.
*/
- if (ioctl == KVM_S390_INTERRUPT || ioctl == KVM_INTERRUPT)
+ if (ioctl == KVM_S390_INTERRUPT || ioctl == KVM_S390_IRQ || ioctl == KVM_INTERRUPT)
return kvm_arch_vcpu_ioctl(filp, ioctl, arg);
#endif
@@ -2135,6 +2120,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
/* The thread running this VCPU changed. */
struct pid *oldpid = vcpu->pid;
struct pid *newpid = get_task_pid(current, PIDTYPE_PID);
+
rcu_assign_pointer(vcpu->pid, newpid);
if (oldpid)
synchronize_rcu();
@@ -2205,7 +2191,7 @@ out_free1:
if (r)
goto out;
r = -EFAULT;
- if (copy_to_user(argp, &mp_state, sizeof mp_state))
+ if (copy_to_user(argp, &mp_state, sizeof(mp_state)))
goto out;
r = 0;
break;
@@ -2214,7 +2200,7 @@ out_free1:
struct kvm_mp_state mp_state;
r = -EFAULT;
- if (copy_from_user(&mp_state, argp, sizeof mp_state))
+ if (copy_from_user(&mp_state, argp, sizeof(mp_state)))
goto out;
r = kvm_arch_vcpu_ioctl_set_mpstate(vcpu, &mp_state);
break;
@@ -2223,13 +2209,13 @@ out_free1:
struct kvm_translation tr;
r = -EFAULT;
- if (copy_from_user(&tr, argp, sizeof tr))
+ if (copy_from_user(&tr, argp, sizeof(tr)))
goto out;
r = kvm_arch_vcpu_ioctl_translate(vcpu, &tr);
if (r)
goto out;
r = -EFAULT;
- if (copy_to_user(argp, &tr, sizeof tr))
+ if (copy_to_user(argp, &tr, sizeof(tr)))
goto out;
r = 0;
break;
@@ -2238,7 +2224,7 @@ out_free1:
struct kvm_guest_debug dbg;
r = -EFAULT;
- if (copy_from_user(&dbg, argp, sizeof dbg))
+ if (copy_from_user(&dbg, argp, sizeof(dbg)))
goto out;
r = kvm_arch_vcpu_ioctl_set_guest_debug(vcpu, &dbg);
break;
@@ -2252,14 +2238,14 @@ out_free1:
if (argp) {
r = -EFAULT;
if (copy_from_user(&kvm_sigmask, argp,
- sizeof kvm_sigmask))
+ sizeof(kvm_sigmask)))
goto out;
r = -EINVAL;
- if (kvm_sigmask.len != sizeof sigset)
+ if (kvm_sigmask.len != sizeof(sigset))
goto out;
r = -EFAULT;
if (copy_from_user(&sigset, sigmask_arg->sigset,
- sizeof sigset))
+ sizeof(sigset)))
goto out;
p = &sigset;
}
@@ -2321,14 +2307,14 @@ static long kvm_vcpu_compat_ioctl(struct file *filp,
if (argp) {
r = -EFAULT;
if (copy_from_user(&kvm_sigmask, argp,
- sizeof kvm_sigmask))
+ sizeof(kvm_sigmask)))
goto out;
r = -EINVAL;
- if (kvm_sigmask.len != sizeof csigset)
+ if (kvm_sigmask.len != sizeof(csigset))
goto out;
r = -EFAULT;
if (copy_from_user(&csigset, sigmask_arg->sigset,
- sizeof csigset))
+ sizeof(csigset)))
goto out;
sigset_from_compat(&sigset, &csigset);
r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset);
@@ -2492,6 +2478,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
case KVM_CAP_SIGNAL_MSI:
#endif
#ifdef CONFIG_HAVE_KVM_IRQFD
+ case KVM_CAP_IRQFD:
case KVM_CAP_IRQFD_RESAMPLE:
#endif
case KVM_CAP_CHECK_EXTENSION_VM:
@@ -2524,7 +2511,7 @@ static long kvm_vm_ioctl(struct file *filp,
r = -EFAULT;
if (copy_from_user(&kvm_userspace_mem, argp,
- sizeof kvm_userspace_mem))
+ sizeof(kvm_userspace_mem)))
goto out;
r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem);
@@ -2534,7 +2521,7 @@ static long kvm_vm_ioctl(struct file *filp,
struct kvm_dirty_log log;
r = -EFAULT;
- if (copy_from_user(&log, argp, sizeof log))
+ if (copy_from_user(&log, argp, sizeof(log)))
goto out;
r = kvm_vm_ioctl_get_dirty_log(kvm, &log);
break;
@@ -2542,16 +2529,18 @@ static long kvm_vm_ioctl(struct file *filp,
#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
case KVM_REGISTER_COALESCED_MMIO: {
struct kvm_coalesced_mmio_zone zone;
+
r = -EFAULT;
- if (copy_from_user(&zone, argp, sizeof zone))
+ if (copy_from_user(&zone, argp, sizeof(zone)))
goto out;
r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone);
break;
}
case KVM_UNREGISTER_COALESCED_MMIO: {
struct kvm_coalesced_mmio_zone zone;
+
r = -EFAULT;
- if (copy_from_user(&zone, argp, sizeof zone))
+ if (copy_from_user(&zone, argp, sizeof(zone)))
goto out;
r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone);
break;
@@ -2561,7 +2550,7 @@ static long kvm_vm_ioctl(struct file *filp,
struct kvm_irqfd data;
r = -EFAULT;
- if (copy_from_user(&data, argp, sizeof data))
+ if (copy_from_user(&data, argp, sizeof(data)))
goto out;
r = kvm_irqfd(kvm, &data);
break;
@@ -2570,7 +2559,7 @@ static long kvm_vm_ioctl(struct file *filp,
struct kvm_ioeventfd data;
r = -EFAULT;
- if (copy_from_user(&data, argp, sizeof data))
+ if (copy_from_user(&data, argp, sizeof(data)))
goto out;
r = kvm_ioeventfd(kvm, &data);
break;
@@ -2591,7 +2580,7 @@ static long kvm_vm_ioctl(struct file *filp,
struct kvm_msi msi;
r = -EFAULT;
- if (copy_from_user(&msi, argp, sizeof msi))
+ if (copy_from_user(&msi, argp, sizeof(msi)))
goto out;
r = kvm_send_userspace_msi(kvm, &msi);
break;
@@ -2603,7 +2592,7 @@ static long kvm_vm_ioctl(struct file *filp,
struct kvm_irq_level irq_event;
r = -EFAULT;
- if (copy_from_user(&irq_event, argp, sizeof irq_event))
+ if (copy_from_user(&irq_event, argp, sizeof(irq_event)))
goto out;
r = kvm_vm_ioctl_irq_line(kvm, &irq_event,
@@ -2613,7 +2602,7 @@ static long kvm_vm_ioctl(struct file *filp,
r = -EFAULT;
if (ioctl == KVM_IRQ_LINE_STATUS) {
- if (copy_to_user(argp, &irq_event, sizeof irq_event))
+ if (copy_to_user(argp, &irq_event, sizeof(irq_event)))
goto out;
}
@@ -2646,7 +2635,7 @@ static long kvm_vm_ioctl(struct file *filp,
goto out_free_irq_routing;
r = kvm_set_irq_routing(kvm, entries, routing.nr,
routing.flags);
- out_free_irq_routing:
+out_free_irq_routing:
vfree(entries);
break;
}
@@ -2821,8 +2810,7 @@ static void hardware_enable_nolock(void *junk)
if (r) {
cpumask_clear_cpu(cpu, cpus_hardware_enabled);
atomic_inc(&hardware_enable_failed);
- printk(KERN_INFO "kvm: enabling virtualization on "
- "CPU%d failed\n", cpu);
+ pr_info("kvm: enabling virtualization on CPU%d failed\n", cpu);
}
}
@@ -2898,12 +2886,12 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
val &= ~CPU_TASKS_FROZEN;
switch (val) {
case CPU_DYING:
- printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
+ pr_info("kvm: disabling virtualization on CPU%d\n",
cpu);
hardware_disable();
break;
case CPU_STARTING:
- printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
+ pr_info("kvm: enabling virtualization on CPU%d\n",
cpu);
hardware_enable();
break;
@@ -2920,7 +2908,7 @@ static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
*
* And Intel TXT required VMX off for all cpu when system shutdown.
*/
- printk(KERN_INFO "kvm: exiting hardware virtualization\n");
+ pr_info("kvm: exiting hardware virtualization\n");
kvm_rebooting = true;
on_each_cpu(hardware_disable_nolock, NULL, 1);
return NOTIFY_OK;
@@ -2944,7 +2932,7 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus)
}
static inline int kvm_io_bus_cmp(const struct kvm_io_range *r1,
- const struct kvm_io_range *r2)
+ const struct kvm_io_range *r2)
{
if (r1->addr < r2->addr)
return -1;
@@ -2997,7 +2985,7 @@ static int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus,
return off;
}
-static int __kvm_io_bus_write(struct kvm_io_bus *bus,
+static int __kvm_io_bus_write(struct kvm_vcpu *vcpu, struct kvm_io_bus *bus,
struct kvm_io_range *range, const void *val)
{
int idx;
@@ -3008,7 +2996,7 @@ static int __kvm_io_bus_write(struct kvm_io_bus *bus,
while (idx < bus->dev_count &&
kvm_io_bus_cmp(range, &bus->range[idx]) == 0) {
- if (!kvm_iodevice_write(bus->range[idx].dev, range->addr,
+ if (!kvm_iodevice_write(vcpu, bus->range[idx].dev, range->addr,
range->len, val))
return idx;
idx++;
@@ -3018,7 +3006,7 @@ static int __kvm_io_bus_write(struct kvm_io_bus *bus,
}
/* kvm_io_bus_write - called under kvm->slots_lock */
-int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
+int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
int len, const void *val)
{
struct kvm_io_bus *bus;
@@ -3030,14 +3018,14 @@ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
.len = len,
};
- bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
- r = __kvm_io_bus_write(bus, &range, val);
+ bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
+ r = __kvm_io_bus_write(vcpu, bus, &range, val);
return r < 0 ? r : 0;
}
/* kvm_io_bus_write_cookie - called under kvm->slots_lock */
-int kvm_io_bus_write_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
- int len, const void *val, long cookie)
+int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx,
+ gpa_t addr, int len, const void *val, long cookie)
{
struct kvm_io_bus *bus;
struct kvm_io_range range;
@@ -3047,12 +3035,12 @@ int kvm_io_bus_write_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
.len = len,
};
- bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
+ bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
/* First try the device referenced by cookie. */
if ((cookie >= 0) && (cookie < bus->dev_count) &&
(kvm_io_bus_cmp(&range, &bus->range[cookie]) == 0))
- if (!kvm_iodevice_write(bus->range[cookie].dev, addr, len,
+ if (!kvm_iodevice_write(vcpu, bus->range[cookie].dev, addr, len,
val))
return cookie;
@@ -3060,11 +3048,11 @@ int kvm_io_bus_write_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
* cookie contained garbage; fall back to search and return the
* correct cookie value.
*/
- return __kvm_io_bus_write(bus, &range, val);
+ return __kvm_io_bus_write(vcpu, bus, &range, val);
}
-static int __kvm_io_bus_read(struct kvm_io_bus *bus, struct kvm_io_range *range,
- void *val)
+static int __kvm_io_bus_read(struct kvm_vcpu *vcpu, struct kvm_io_bus *bus,
+ struct kvm_io_range *range, void *val)
{
int idx;
@@ -3074,7 +3062,7 @@ static int __kvm_io_bus_read(struct kvm_io_bus *bus, struct kvm_io_range *range,
while (idx < bus->dev_count &&
kvm_io_bus_cmp(range, &bus->range[idx]) == 0) {
- if (!kvm_iodevice_read(bus->range[idx].dev, range->addr,
+ if (!kvm_iodevice_read(vcpu, bus->range[idx].dev, range->addr,
range->len, val))
return idx;
idx++;
@@ -3085,7 +3073,7 @@ static int __kvm_io_bus_read(struct kvm_io_bus *bus, struct kvm_io_range *range,
EXPORT_SYMBOL_GPL(kvm_io_bus_write);
/* kvm_io_bus_read - called under kvm->slots_lock */
-int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
+int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
int len, void *val)
{
struct kvm_io_bus *bus;
@@ -3097,8 +3085,8 @@ int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
.len = len,
};
- bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
- r = __kvm_io_bus_read(bus, &range, val);
+ bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
+ r = __kvm_io_bus_read(vcpu, bus, &range, val);
return r < 0 ? r : 0;
}
@@ -3268,6 +3256,7 @@ struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn)
static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
{
struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
+
if (vcpu->preempted)
vcpu->preempted = false;
@@ -3349,7 +3338,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
r = misc_register(&kvm_dev);
if (r) {
- printk(KERN_ERR "kvm: misc device register failed\n");
+ pr_err("kvm: misc device register failed\n");
goto out_unreg;
}
@@ -3360,7 +3349,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
r = kvm_init_debug();
if (r) {
- printk(KERN_ERR "kvm: create debugfs files failed\n");
+ pr_err("kvm: create debugfs files failed\n");
goto out_undebugfs;
}