diff options
Diffstat (limited to 'virt/kvm')
-rw-r--r-- | virt/kvm/arm/arch_timer.c | 45 | ||||
-rw-r--r-- | virt/kvm/arm/vgic-v2-emul.c | 71 | ||||
-rw-r--r-- | virt/kvm/arm/vgic-v2.c | 8 | ||||
-rw-r--r-- | virt/kvm/arm/vgic-v3-emul.c | 246 | ||||
-rw-r--r-- | virt/kvm/arm/vgic-v3.c | 8 | ||||
-rw-r--r-- | virt/kvm/arm/vgic.c | 497 | ||||
-rw-r--r-- | virt/kvm/arm/vgic.h | 37 | ||||
-rw-r--r-- | virt/kvm/coalesced_mmio.c | 7 | ||||
-rw-r--r-- | virt/kvm/eventfd.c | 9 | ||||
-rw-r--r-- | virt/kvm/iodev.h | 70 | ||||
-rw-r--r-- | virt/kvm/irqchip.c | 2 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 163 |
12 files changed, 703 insertions, 460 deletions
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 6e54f3542126..98c95f2fcba4 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -85,13 +85,22 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) return IRQ_HANDLED; } +/* + * Work function for handling the backup timer that we schedule when a vcpu is + * no longer running, but had a timer programmed to fire in the future. + */ static void kvm_timer_inject_irq_work(struct work_struct *work) { struct kvm_vcpu *vcpu; vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired); vcpu->arch.timer_cpu.armed = false; - kvm_timer_inject_irq(vcpu); + + /* + * If the vcpu is blocked we want to wake it up so that it will see + * the timer has expired when entering the guest. + */ + kvm_vcpu_kick(vcpu); } static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) @@ -102,6 +111,21 @@ static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) return HRTIMER_NORESTART; } +bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + cycle_t cval, now; + + if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) || + !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE)) + return false; + + cval = timer->cntv_cval; + now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; + + return cval <= now; +} + /** * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu * @vcpu: The vcpu pointer @@ -119,6 +143,13 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) * populate the CPU timer again. */ timer_disarm(timer); + + /* + * If the timer expired while we were not scheduled, now is the time + * to inject it. + */ + if (kvm_timer_should_fire(vcpu)) + kvm_timer_inject_irq(vcpu); } /** @@ -134,16 +165,9 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) cycle_t cval, now; u64 ns; - if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) || - !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE)) - return; - - cval = timer->cntv_cval; - now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; - BUG_ON(timer_is_armed(timer)); - if (cval <= now) { + if (kvm_timer_should_fire(vcpu)) { /* * Timer has already expired while we were not * looking. Inject the interrupt and carry on. @@ -152,6 +176,9 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) return; } + cval = timer->cntv_cval; + now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; + ns = cyclecounter_cyc2ns(timecounter->cc, cval - now, timecounter->mask, &timecounter->frac); timer_arm(timer, ns); diff --git a/virt/kvm/arm/vgic-v2-emul.c b/virt/kvm/arm/vgic-v2-emul.c index 19c6210f02cf..13907970d11c 100644 --- a/virt/kvm/arm/vgic-v2-emul.c +++ b/virt/kvm/arm/vgic-v2-emul.c @@ -107,6 +107,22 @@ static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu, vcpu->vcpu_id); } +static bool handle_mmio_set_active_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + return vgic_handle_set_active_reg(vcpu->kvm, mmio, offset, + vcpu->vcpu_id); +} + +static bool handle_mmio_clear_active_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + return vgic_handle_clear_active_reg(vcpu->kvm, mmio, offset, + vcpu->vcpu_id); +} + static bool handle_mmio_priority_reg(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) @@ -303,7 +319,7 @@ static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu, return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, false); } -static const struct kvm_mmio_range vgic_dist_ranges[] = { +static const struct vgic_io_range vgic_dist_ranges[] = { { .base = GIC_DIST_CTRL, .len = 12, @@ -344,13 +360,13 @@ static const struct kvm_mmio_range vgic_dist_ranges[] = { .base = GIC_DIST_ACTIVE_SET, .len = VGIC_MAX_IRQS / 8, .bits_per_irq = 1, - .handle_mmio = handle_mmio_raz_wi, + .handle_mmio = handle_mmio_set_active_reg, }, { .base = GIC_DIST_ACTIVE_CLEAR, .len = VGIC_MAX_IRQS / 8, .bits_per_irq = 1, - .handle_mmio = handle_mmio_raz_wi, + .handle_mmio = handle_mmio_clear_active_reg, }, { .base = GIC_DIST_PRI, @@ -388,24 +404,6 @@ static const struct kvm_mmio_range vgic_dist_ranges[] = { {} }; -static bool vgic_v2_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, - struct kvm_exit_mmio *mmio) -{ - unsigned long base = vcpu->kvm->arch.vgic.vgic_dist_base; - - if (!is_in_range(mmio->phys_addr, mmio->len, base, - KVM_VGIC_V2_DIST_SIZE)) - return false; - - /* GICv2 does not support accesses wider than 32 bits */ - if (mmio->len > 4) { - kvm_inject_dabt(vcpu, mmio->phys_addr); - return true; - } - - return vgic_handle_mmio_range(vcpu, run, mmio, vgic_dist_ranges, base); -} - static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) { struct kvm *kvm = vcpu->kvm; @@ -490,6 +488,7 @@ static bool vgic_v2_queue_sgi(struct kvm_vcpu *vcpu, int irq) static int vgic_v2_map_resources(struct kvm *kvm, const struct vgic_params *params) { + struct vgic_dist *dist = &kvm->arch.vgic; int ret = 0; if (!irqchip_in_kernel(kvm)) @@ -500,13 +499,17 @@ static int vgic_v2_map_resources(struct kvm *kvm, if (vgic_ready(kvm)) goto out; - if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) || - IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base)) { + if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) || + IS_VGIC_ADDR_UNDEF(dist->vgic_cpu_base)) { kvm_err("Need to set vgic cpu and dist addresses first\n"); ret = -ENXIO; goto out; } + vgic_register_kvm_io_dev(kvm, dist->vgic_dist_base, + KVM_VGIC_V2_DIST_SIZE, + vgic_dist_ranges, -1, &dist->dist_iodev); + /* * Initialize the vgic if this hasn't already been done on demand by * accessing the vgic state from userspace. @@ -514,18 +517,23 @@ static int vgic_v2_map_resources(struct kvm *kvm, ret = vgic_init(kvm); if (ret) { kvm_err("Unable to allocate maps\n"); - goto out; + goto out_unregister; } - ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base, + ret = kvm_phys_addr_ioremap(kvm, dist->vgic_cpu_base, params->vcpu_base, KVM_VGIC_V2_CPU_SIZE, true); if (ret) { kvm_err("Unable to remap VGIC CPU to VCPU\n"); - goto out; + goto out_unregister; } - kvm->arch.vgic.ready = true; + dist->ready = true; + goto out; + +out_unregister: + kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &dist->dist_iodev.dev); + out: if (ret) kvm_vgic_destroy(kvm); @@ -554,7 +562,6 @@ void vgic_v2_init_emulation(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; - dist->vm_ops.handle_mmio = vgic_v2_handle_mmio; dist->vm_ops.queue_sgi = vgic_v2_queue_sgi; dist->vm_ops.add_sgi_source = vgic_v2_add_sgi_source; dist->vm_ops.init_model = vgic_v2_init_model; @@ -631,7 +638,7 @@ static bool handle_cpu_mmio_ident(struct kvm_vcpu *vcpu, * CPU Interface Register accesses - these are not accessed by the VM, but by * user space for saving and restoring VGIC state. */ -static const struct kvm_mmio_range vgic_cpu_ranges[] = { +static const struct vgic_io_range vgic_cpu_ranges[] = { { .base = GIC_CPU_CTRL, .len = 12, @@ -658,12 +665,13 @@ static int vgic_attr_regs_access(struct kvm_device *dev, struct kvm_device_attr *attr, u32 *reg, bool is_write) { - const struct kvm_mmio_range *r = NULL, *ranges; + const struct vgic_io_range *r = NULL, *ranges; phys_addr_t offset; int ret, cpuid, c; struct kvm_vcpu *vcpu, *tmp_vcpu; struct vgic_dist *vgic; struct kvm_exit_mmio mmio; + u32 data; offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >> @@ -685,6 +693,7 @@ static int vgic_attr_regs_access(struct kvm_device *dev, mmio.len = 4; mmio.is_write = is_write; + mmio.data = &data; if (is_write) mmio_data_write(&mmio, ~0, *reg); switch (attr->group) { @@ -699,7 +708,7 @@ static int vgic_attr_regs_access(struct kvm_device *dev, default: BUG(); } - r = vgic_find_range(ranges, &mmio, offset); + r = vgic_find_range(ranges, 4, offset); if (unlikely(!r || !r->handle_mmio)) { ret = -ENXIO; diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c index a0a7b5d1a070..f9b9c7c51372 100644 --- a/virt/kvm/arm/vgic-v2.c +++ b/virt/kvm/arm/vgic-v2.c @@ -72,6 +72,8 @@ static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, { if (!(lr_desc.state & LR_STATE_MASK)) vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr |= (1ULL << lr); + else + vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr &= ~(1ULL << lr); } static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu) @@ -84,6 +86,11 @@ static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu) return vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr; } +static void vgic_v2_clear_eisr(struct kvm_vcpu *vcpu) +{ + vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr = 0; +} + static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu) { u32 misr = vcpu->arch.vgic_cpu.vgic_v2.vgic_misr; @@ -148,6 +155,7 @@ static const struct vgic_ops vgic_v2_ops = { .sync_lr_elrsr = vgic_v2_sync_lr_elrsr, .get_elrsr = vgic_v2_get_elrsr, .get_eisr = vgic_v2_get_eisr, + .clear_eisr = vgic_v2_clear_eisr, .get_interrupt_status = vgic_v2_get_interrupt_status, .enable_underflow = vgic_v2_enable_underflow, .disable_underflow = vgic_v2_disable_underflow, diff --git a/virt/kvm/arm/vgic-v3-emul.c b/virt/kvm/arm/vgic-v3-emul.c index b3f154631515..e9c3a7a83833 100644 --- a/virt/kvm/arm/vgic-v3-emul.c +++ b/virt/kvm/arm/vgic-v3-emul.c @@ -340,7 +340,7 @@ static bool handle_mmio_idregs(struct kvm_vcpu *vcpu, return false; } -static const struct kvm_mmio_range vgic_v3_dist_ranges[] = { +static const struct vgic_io_range vgic_v3_dist_ranges[] = { { .base = GICD_CTLR, .len = 0x04, @@ -502,6 +502,43 @@ static const struct kvm_mmio_range vgic_v3_dist_ranges[] = { {}, }; +static bool handle_mmio_ctlr_redist(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + /* since we don't support LPIs, this register is zero for now */ + vgic_reg_access(mmio, NULL, offset, + ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); + return false; +} + +static bool handle_mmio_typer_redist(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + u32 reg; + u64 mpidr; + struct kvm_vcpu *redist_vcpu = mmio->private; + int target_vcpu_id = redist_vcpu->vcpu_id; + + /* the upper 32 bits contain the affinity value */ + if ((offset & ~3) == 4) { + mpidr = kvm_vcpu_get_mpidr_aff(redist_vcpu); + reg = compress_mpidr(mpidr); + + vgic_reg_access(mmio, ®, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); + return false; + } + + reg = redist_vcpu->vcpu_id << 8; + if (target_vcpu_id == atomic_read(&vcpu->kvm->online_vcpus) - 1) + reg |= GICR_TYPER_LAST; + vgic_reg_access(mmio, ®, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); + return false; +} + static bool handle_mmio_set_enable_reg_redist(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) @@ -570,186 +607,107 @@ static bool handle_mmio_cfg_reg_redist(struct kvm_vcpu *vcpu, return vgic_handle_cfg_reg(reg, mmio, offset); } -static const struct kvm_mmio_range vgic_redist_sgi_ranges[] = { +#define SGI_base(x) ((x) + SZ_64K) + +static const struct vgic_io_range vgic_redist_ranges[] = { + { + .base = GICR_CTLR, + .len = 0x04, + .bits_per_irq = 0, + .handle_mmio = handle_mmio_ctlr_redist, + }, + { + .base = GICR_TYPER, + .len = 0x08, + .bits_per_irq = 0, + .handle_mmio = handle_mmio_typer_redist, + }, + { + .base = GICR_IIDR, + .len = 0x04, + .bits_per_irq = 0, + .handle_mmio = handle_mmio_iidr, + }, + { + .base = GICR_WAKER, + .len = 0x04, + .bits_per_irq = 0, + .handle_mmio = handle_mmio_raz_wi, + }, { - .base = GICR_IGROUPR0, + .base = GICR_IDREGS, + .len = 0x30, + .bits_per_irq = 0, + .handle_mmio = handle_mmio_idregs, + }, + { + .base = SGI_base(GICR_IGROUPR0), .len = 0x04, .bits_per_irq = 1, .handle_mmio = handle_mmio_rao_wi, }, { - .base = GICR_ISENABLER0, + .base = SGI_base(GICR_ISENABLER0), .len = 0x04, .bits_per_irq = 1, .handle_mmio = handle_mmio_set_enable_reg_redist, }, { - .base = GICR_ICENABLER0, + .base = SGI_base(GICR_ICENABLER0), .len = 0x04, .bits_per_irq = 1, .handle_mmio = handle_mmio_clear_enable_reg_redist, }, { - .base = GICR_ISPENDR0, + .base = SGI_base(GICR_ISPENDR0), .len = 0x04, .bits_per_irq = 1, .handle_mmio = handle_mmio_set_pending_reg_redist, }, { - .base = GICR_ICPENDR0, + .base = SGI_base(GICR_ICPENDR0), .len = 0x04, .bits_per_irq = 1, .handle_mmio = handle_mmio_clear_pending_reg_redist, }, { - .base = GICR_ISACTIVER0, + .base = SGI_base(GICR_ISACTIVER0), .len = 0x04, .bits_per_irq = 1, .handle_mmio = handle_mmio_raz_wi, }, { - .base = GICR_ICACTIVER0, + .base = SGI_base(GICR_ICACTIVER0), .len = 0x04, .bits_per_irq = 1, .handle_mmio = handle_mmio_raz_wi, }, { - .base = GICR_IPRIORITYR0, + .base = SGI_base(GICR_IPRIORITYR0), .len = 0x20, .bits_per_irq = 8, .handle_mmio = handle_mmio_priority_reg_redist, }, { - .base = GICR_ICFGR0, + .base = SGI_base(GICR_ICFGR0), .len = 0x08, .bits_per_irq = 2, .handle_mmio = handle_mmio_cfg_reg_redist, }, { - .base = GICR_IGRPMODR0, + .base = SGI_base(GICR_IGRPMODR0), .len = 0x04, .bits_per_irq = 1, .handle_mmio = handle_mmio_raz_wi, }, { - .base = GICR_NSACR, + .base = SGI_base(GICR_NSACR), .len = 0x04, .handle_mmio = handle_mmio_raz_wi, }, {}, }; -static bool handle_mmio_ctlr_redist(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - /* since we don't support LPIs, this register is zero for now */ - vgic_reg_access(mmio, NULL, offset, - ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); - return false; -} - -static bool handle_mmio_typer_redist(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - u32 reg; - u64 mpidr; - struct kvm_vcpu *redist_vcpu = mmio->private; - int target_vcpu_id = redist_vcpu->vcpu_id; - - /* the upper 32 bits contain the affinity value */ - if ((offset & ~3) == 4) { - mpidr = kvm_vcpu_get_mpidr_aff(redist_vcpu); - reg = compress_mpidr(mpidr); - - vgic_reg_access(mmio, ®, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); - return false; - } - - reg = redist_vcpu->vcpu_id << 8; - if (target_vcpu_id == atomic_read(&vcpu->kvm->online_vcpus) - 1) - reg |= GICR_TYPER_LAST; - vgic_reg_access(mmio, ®, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); - return false; -} - -static const struct kvm_mmio_range vgic_redist_ranges[] = { - { - .base = GICR_CTLR, - .len = 0x04, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_ctlr_redist, - }, - { - .base = GICR_TYPER, - .len = 0x08, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_typer_redist, - }, - { - .base = GICR_IIDR, - .len = 0x04, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_iidr, - }, - { - .base = GICR_WAKER, - .len = 0x04, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_raz_wi, - }, - { - .base = GICR_IDREGS, - .len = 0x30, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_idregs, - }, - {}, -}; - -/* - * This function splits accesses between the distributor and the two - * redistributor parts (private/SPI). As each redistributor is accessible - * from any CPU, we have to determine the affected VCPU by taking the faulting - * address into account. We then pass this VCPU to the handler function via - * the private parameter. - */ -#define SGI_BASE_OFFSET SZ_64K -static bool vgic_v3_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, - struct kvm_exit_mmio *mmio) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - unsigned long dbase = dist->vgic_dist_base; - unsigned long rdbase = dist->vgic_redist_base; - int nrcpus = atomic_read(&vcpu->kvm->online_vcpus); - int vcpu_id; - const struct kvm_mmio_range *mmio_range; - - if (is_in_range(mmio->phys_addr, mmio->len, dbase, GIC_V3_DIST_SIZE)) { - return vgic_handle_mmio_range(vcpu, run, mmio, - vgic_v3_dist_ranges, dbase); - } - - if (!is_in_range(mmio->phys_addr, mmio->len, rdbase, - GIC_V3_REDIST_SIZE * nrcpus)) - return false; - - vcpu_id = (mmio->phys_addr - rdbase) / GIC_V3_REDIST_SIZE; - rdbase += (vcpu_id * GIC_V3_REDIST_SIZE); - mmio->private = kvm_get_vcpu(vcpu->kvm, vcpu_id); - - if (mmio->phys_addr >= rdbase + SGI_BASE_OFFSET) { - rdbase += SGI_BASE_OFFSET; - mmio_range = vgic_redist_sgi_ranges; - } else { - mmio_range = vgic_redist_ranges; - } - return vgic_handle_mmio_range(vcpu, run, mmio, mmio_range, rdbase); -} - static bool vgic_v3_queue_sgi(struct kvm_vcpu *vcpu, int irq) { if (vgic_queue_irq(vcpu, 0, irq)) { @@ -766,6 +724,9 @@ static int vgic_v3_map_resources(struct kvm *kvm, { int ret = 0; struct vgic_dist *dist = &kvm->arch.vgic; + gpa_t rdbase = dist->vgic_redist_base; + struct vgic_io_device *iodevs = NULL; + int i; if (!irqchip_in_kernel(kvm)) return 0; @@ -791,7 +752,41 @@ static int vgic_v3_map_resources(struct kvm *kvm, goto out; } - kvm->arch.vgic.ready = true; + ret = vgic_register_kvm_io_dev(kvm, dist->vgic_dist_base, + GIC_V3_DIST_SIZE, vgic_v3_dist_ranges, + -1, &dist->dist_iodev); + if (ret) + goto out; + + iodevs = kcalloc(dist->nr_cpus, sizeof(iodevs[0]), GFP_KERNEL); + if (!iodevs) { + ret = -ENOMEM; + goto out_unregister; + } + + for (i = 0; i < dist->nr_cpus; i++) { + ret = vgic_register_kvm_io_dev(kvm, rdbase, + SZ_128K, vgic_redist_ranges, + i, &iodevs[i]); + if (ret) + goto out_unregister; + rdbase += GIC_V3_REDIST_SIZE; + } + + dist->redist_iodevs = iodevs; + dist->ready = true; + goto out; + +out_unregister: + kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &dist->dist_iodev.dev); + if (iodevs) { + for (i = 0; i < dist->nr_cpus; i++) { + if (iodevs[i].dev.ops) + kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, + &iodevs[i].dev); + } + } + out: if (ret) kvm_vgic_destroy(kvm); @@ -832,7 +827,6 @@ void vgic_v3_init_emulation(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; - dist->vm_ops.handle_mmio = vgic_v3_handle_mmio; dist->vm_ops.queue_sgi = vgic_v3_queue_sgi; dist->vm_ops.add_sgi_source = vgic_v3_add_sgi_source; dist->vm_ops.init_model = vgic_v3_init_model; diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c index 3a62d8a9a2c6..dff06021e748 100644 --- a/virt/kvm/arm/vgic-v3.c +++ b/virt/kvm/arm/vgic-v3.c @@ -104,6 +104,8 @@ static void vgic_v3_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, { if (!(lr_desc.state & LR_STATE_MASK)) vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr); + else + vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr &= ~(1U << lr); } static u64 vgic_v3_get_elrsr(const struct kvm_vcpu *vcpu) @@ -116,6 +118,11 @@ static u64 vgic_v3_get_eisr(const struct kvm_vcpu *vcpu) return vcpu->arch.vgic_cpu.vgic_v3.vgic_eisr; } +static void vgic_v3_clear_eisr(struct kvm_vcpu *vcpu) +{ + vcpu->arch.vgic_cpu.vgic_v3.vgic_eisr = 0; +} + static u32 vgic_v3_get_interrupt_status(const struct kvm_vcpu *vcpu) { u32 misr = vcpu->arch.vgic_cpu.vgic_v3.vgic_misr; @@ -192,6 +199,7 @@ static const struct vgic_ops vgic_v3_ops = { .sync_lr_elrsr = vgic_v3_sync_lr_elrsr, .get_elrsr = vgic_v3_get_elrsr, .get_eisr = vgic_v3_get_eisr, + .clear_eisr = vgic_v3_clear_eisr, .get_interrupt_status = vgic_v3_get_interrupt_status, .enable_underflow = vgic_v3_enable_underflow, .disable_underflow = vgic_v3_disable_underflow, diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 0cc6ab6005a0..8d550ff14700 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -31,6 +31,9 @@ #include <asm/kvm_emulate.h> #include <asm/kvm_arm.h> #include <asm/kvm_mmu.h> +#include <trace/events/kvm.h> +#include <asm/kvm.h> +#include <kvm/iodev.h> /* * How the whole thing works (courtesy of Christoffer Dall): @@ -263,6 +266,13 @@ static int vgic_irq_is_queued(struct kvm_vcpu *vcpu, int irq) return vgic_bitmap_get_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq); } +static int vgic_irq_is_active(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + return vgic_bitmap_get_irq_val(&dist->irq_active, vcpu->vcpu_id, irq); +} + static void vgic_irq_set_queued(struct kvm_vcpu *vcpu, int irq) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; @@ -277,6 +287,20 @@ static void vgic_irq_clear_queued(struct kvm_vcpu *vcpu, int irq) vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 0); } +static void vgic_irq_set_active(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 1); +} + +static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 0); +} + static int vgic_dist_irq_get_level(struct kvm_vcpu *vcpu, int irq) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; @@ -520,6 +544,44 @@ bool vgic_handle_clear_pending_reg(struct kvm *kvm, return false; } +bool vgic_handle_set_active_reg(struct kvm *kvm, + struct kvm_exit_mmio *mmio, + phys_addr_t offset, int vcpu_id) +{ + u32 *reg; + struct vgic_dist *dist = &kvm->arch.vgic; + + reg = vgic_bitmap_get_reg(&dist->irq_active, vcpu_id, offset); + vgic_reg_access(mmio, reg, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); + + if (mmio->is_write) { + vgic_update_state(kvm); + return true; + } + + return false; +} + +bool vgic_handle_clear_active_reg(struct kvm *kvm, + struct kvm_exit_mmio *mmio, + phys_addr_t offset, int vcpu_id) +{ + u32 *reg; + struct vgic_dist *dist = &kvm->arch.vgic; + + reg = vgic_bitmap_get_reg(&dist->irq_active, vcpu_id, offset); + vgic_reg_access(mmio, reg, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); + + if (mmio->is_write) { + vgic_update_state(kvm); + return true; + } + + return false; +} + static u32 vgic_cfg_expand(u16 val) { u32 res = 0; @@ -588,16 +650,12 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio, } /** - * vgic_unqueue_irqs - move pending IRQs from LRs to the distributor + * vgic_unqueue_irqs - move pending/active IRQs from LRs to the distributor * @vgic_cpu: Pointer to the vgic_cpu struct holding the LRs * - * Move any pending IRQs that have already been assigned to LRs back to the + * Move any IRQs that have already been assigned to LRs back to the * emulated distributor state so that the complete emulated state can be read * from the main emulation structures without investigating the LRs. - * - * Note that IRQs in the active state in the LRs get their pending state moved - * to the distributor but the active state stays in the LRs, because we don't - * track the active state on the distributor side. */ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) { @@ -613,12 +671,22 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) * 01: pending * 10: active * 11: pending and active - * - * If the LR holds only an active interrupt (not pending) then - * just leave it alone. */ - if ((lr.state & LR_STATE_MASK) == LR_STATE_ACTIVE) - continue; + BUG_ON(!(lr.state & LR_STATE_MASK)); + + /* Reestablish SGI source for pending and active IRQs */ + if (lr.irq < VGIC_NR_SGIS) + add_sgi_source(vcpu, lr.irq, lr.source); + + /* + * If the LR holds an active (10) or a pending and active (11) + * interrupt then move the active state to the + * distributor tracking bit. + */ + if (lr.state & LR_STATE_ACTIVE) { + vgic_irq_set_active(vcpu, lr.irq); + lr.state &= ~LR_STATE_ACTIVE; + } /* * Reestablish the pending state on the distributor and the @@ -626,21 +694,19 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) * is fine, then we are only setting a few bits that were * already set. */ - vgic_dist_irq_set_pending(vcpu, lr.irq); - if (lr.irq < VGIC_NR_SGIS) - add_sgi_source(vcpu, lr.irq, lr.source); - lr.state &= ~LR_STATE_PENDING; + if (lr.state & LR_STATE_PENDING) { + vgic_dist_irq_set_pending(vcpu, lr.irq); + lr.state &= ~LR_STATE_PENDING; + } + vgic_set_lr(vcpu, i, lr); /* - * If there's no state left on the LR (it could still be - * active), then the LR does not hold any useful info and can - * be marked as free for other use. + * Mark the LR as free for other use. */ - if (!(lr.state & LR_STATE_MASK)) { - vgic_retire_lr(i, lr.irq, vcpu); - vgic_irq_clear_queued(vcpu, lr.irq); - } + BUG_ON(lr.state & LR_STATE_MASK); + vgic_retire_lr(i, lr.irq, vcpu); + vgic_irq_clear_queued(vcpu, lr.irq); /* Finally update the VGIC state. */ vgic_update_state(vcpu->kvm); @@ -648,24 +714,21 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) } const -struct kvm_mmio_range *vgic_find_range(const struct kvm_mmio_range *ranges, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - const struct kvm_mmio_range *r = ranges; - - while (r->len) { - if (offset >= r->base && - (offset + mmio->len) <= (r->base + r->len)) - return r; - r++; +struct vgic_io_range *vgic_find_range(const struct vgic_io_range *ranges, + int len, gpa_t offset) +{ + while (ranges->len) { + if (offset >= ranges->base && + (offset + len) <= (ranges->base + ranges->len)) + return ranges; + ranges++; } return NULL; } static bool vgic_validate_access(const struct vgic_dist *dist, - const struct kvm_mmio_range *range, + const struct vgic_io_range *range, unsigned long offset) { int irq; @@ -693,9 +756,8 @@ static bool vgic_validate_access(const struct vgic_dist *dist, static bool call_range_handler(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, unsigned long offset, - const struct kvm_mmio_range *range) + const struct vgic_io_range *range) { - u32 *data32 = (void *)mmio->data; struct kvm_exit_mmio mmio32; bool ret; @@ -712,91 +774,142 @@ static bool call_range_handler(struct kvm_vcpu *vcpu, mmio32.private = mmio->private; mmio32.phys_addr = mmio->phys_addr + 4; - if (mmio->is_write) - *(u32 *)mmio32.data = data32[1]; + mmio32.data = &((u32 *)mmio->data)[1]; ret = range->handle_mmio(vcpu, &mmio32, offset + 4); - if (!mmio->is_write) - data32[1] = *(u32 *)mmio32.data; mmio32.phys_addr = mmio->phys_addr; - if (mmio->is_write) - *(u32 *)mmio32.data = data32[0]; + mmio32.data = &((u32 *)mmio->data)[0]; ret |= range->handle_mmio(vcpu, &mmio32, offset); - if (!mmio->is_write) - data32[0] = *(u32 *)mmio32.data; return ret; } /** - * vgic_handle_mmio_range - handle an in-kernel MMIO access + * vgic_handle_mmio_access - handle an in-kernel MMIO access + * This is called by the read/write KVM IO device wrappers below. * @vcpu: pointer to the vcpu performing the access - * @run: pointer to the kvm_run structure - * @mmio: pointer to the data describing the access - * @ranges: array of MMIO ranges in a given region - * @mmio_base: base address of that region + * @this: pointer to the KVM IO device in charge + * @addr: guest physical address of the access + * @len: size of the access + * @val: pointer to the data region + * @is_write: read or write access * * returns true if the MMIO access could be performed */ -bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct kvm_run *run, - struct kvm_exit_mmio *mmio, - const struct kvm_mmio_range *ranges, - unsigned long mmio_base) +static int vgic_handle_mmio_access(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, gpa_t addr, + int len, void *val, bool is_write) { - const struct kvm_mmio_range *range; struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + struct vgic_io_device *iodev = container_of(this, + struct vgic_io_device, dev); + struct kvm_run *run = vcpu->run; + const struct vgic_io_range *range; + struct kvm_exit_mmio mmio; bool updated_state; - unsigned long offset; + gpa_t offset; - offset = mmio->phys_addr - mmio_base; - range = vgic_find_range(ranges, mmio, offset); + offset = addr - iodev->addr; + range = vgic_find_range(iodev->reg_ranges, len, offset); if (unlikely(!range || !range->handle_mmio)) { - pr_warn("Unhandled access %d %08llx %d\n", - mmio->is_write, mmio->phys_addr, mmio->len); - return false; + pr_warn("Unhandled access %d %08llx %d\n", is_write, addr, len); + return -ENXIO; } - spin_lock(&vcpu->kvm->arch.vgic.lock); + mmio.phys_addr = addr; + mmio.len = len; + mmio.is_write = is_write; + mmio.data = val; + mmio.private = iodev->redist_vcpu; + + spin_lock(&dist->lock); offset -= range->base; if (vgic_validate_access(dist, range, offset)) { - updated_state = call_range_handler(vcpu, mmio, offset, range); + updated_state = call_range_handler(vcpu, &mmio, offset, range); } else { - if (!mmio->is_write) - memset(mmio->data, 0, mmio->len); + if (!is_write) + memset(val, 0, len); updated_state = false; } - spin_unlock(&vcpu->kvm->arch.vgic.lock); - kvm_prepare_mmio(run, mmio); + spin_unlock(&dist->lock); + run->mmio.is_write = is_write; + run->mmio.len = len; + run->mmio.phys_addr = addr; + memcpy(run->mmio.data, val, len); + kvm_handle_mmio_return(vcpu, run); if (updated_state) vgic_kick_vcpus(vcpu->kvm); - return true; + return 0; } +static int vgic_handle_mmio_read(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, + gpa_t addr, int len, void *val) +{ + return vgic_handle_mmio_access(vcpu, this, addr, len, val, false); +} + +static int vgic_handle_mmio_write(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, + gpa_t addr, int len, const void *val) +{ + return vgic_handle_mmio_access(vcpu, this, addr, len, (void *)val, + true); +} + +struct kvm_io_device_ops vgic_io_ops = { + .read = vgic_handle_mmio_read, + .write = vgic_handle_mmio_write, +}; + /** - * vgic_handle_mmio - handle an in-kernel MMIO access for the GIC emulation - * @vcpu: pointer to the vcpu performing the access - * @run: pointer to the kvm_run structure - * @mmio: pointer to the data describing the access + * vgic_register_kvm_io_dev - register VGIC register frame on the KVM I/O bus + * @kvm: The VM structure pointer + * @base: The (guest) base address for the register frame + * @len: Length of the register frame window + * @ranges: Describing the handler functions for each register + * @redist_vcpu_id: The VCPU ID to pass on to the handlers on call + * @iodev: Points to memory to be passed on to the handler * - * returns true if the MMIO access has been performed in kernel space, - * and false if it needs to be emulated in user space. - * Calls the actual handling routine for the selected VGIC model. + * @iodev stores the parameters of this function to be usable by the handler + * respectively the dispatcher function (since the KVM I/O bus framework lacks + * an opaque parameter). Initialization is done in this function, but the + * reference should be valid and unique for the whole VGIC lifetime. + * If the register frame is not mapped for a specific VCPU, pass -1 to + * @redist_vcpu_id. */ -bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, - struct kvm_exit_mmio *mmio) +int vgic_register_kvm_io_dev(struct kvm *kvm, gpa_t base, int len, + const struct vgic_io_range *ranges, + int redist_vcpu_id, + struct vgic_io_device *iodev) { - if (!irqchip_in_kernel(vcpu->kvm)) - return false; + struct kvm_vcpu *vcpu = NULL; + int ret; - /* - * This will currently call either vgic_v2_handle_mmio() or - * vgic_v3_handle_mmio(), which in turn will call - * vgic_handle_mmio_range() defined above. - */ - return vcpu->kvm->arch.vgic.vm_ops.handle_mmio(vcpu, run, mmio); + if (redist_vcpu_id >= 0) + vcpu = kvm_get_vcpu(kvm, redist_vcpu_id); + + iodev->addr = base; + iodev->len = len; + iodev->reg_ranges = ranges; + iodev->redist_vcpu = vcpu; + + kvm_iodevice_init(&iodev->dev, &vgic_io_ops); + + mutex_lock(&kvm->slots_lock); + + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, base, len, + &iodev->dev); + mutex_unlock(&kvm->slots_lock); + + /* Mark the iodev as invalid if registration fails. */ + if (ret) + iodev->dev.ops = NULL; + + return ret; } static int vgic_nr_shared_irqs(struct vgic_dist *dist) @@ -804,6 +917,36 @@ static int vgic_nr_shared_irqs(struct vgic_dist *dist) return dist->nr_irqs - VGIC_NR_PRIVATE_IRQS; } +static int compute_active_for_cpu(struct kvm_vcpu *vcpu) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + unsigned long *active, *enabled, *act_percpu, *act_shared; + unsigned long active_private, active_shared; + int nr_shared = vgic_nr_shared_irqs(dist); + int vcpu_id; + + vcpu_id = vcpu->vcpu_id; + act_percpu = vcpu->arch.vgic_cpu.active_percpu; + act_shared = vcpu->arch.vgic_cpu.active_shared; + + active = vgic_bitmap_get_cpu_map(&dist->irq_active, vcpu_id); + enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id); + bitmap_and(act_percpu, active, enabled, VGIC_NR_PRIVATE_IRQS); + + active = vgic_bitmap_get_shared_map(&dist->irq_active); + enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled); + bitmap_and(act_shared, active, enabled, nr_shared); + bitmap_and(act_shared, act_shared, + vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]), + nr_shared); + + active_private = find_first_bit(act_percpu, VGIC_NR_PRIVATE_IRQS); + active_shared = find_first_bit(act_shared, nr_shared); + + return (active_private < VGIC_NR_PRIVATE_IRQS || + active_shared < nr_shared); +} + static int compute_pending_for_cpu(struct kvm_vcpu *vcpu) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; @@ -835,7 +978,7 @@ static int compute_pending_for_cpu(struct kvm_vcpu *vcpu) /* * Update the interrupt state and determine which CPUs have pending - * interrupts. Must be called with distributor lock held. + * or active interrupts. Must be called with distributor lock held. */ void vgic_update_state(struct kvm *kvm) { @@ -849,10 +992,13 @@ void vgic_update_state(struct kvm *kvm) } kvm_for_each_vcpu(c, vcpu, kvm) { - if (compute_pending_for_cpu(vcpu)) { - pr_debug("CPU%d has pending interrupts\n", c); + if (compute_pending_for_cpu(vcpu)) set_bit(c, dist->irq_pending_on_cpu); - } + + if (compute_active_for_cpu(vcpu)) + set_bit(c, dist->irq_active_on_cpu); + else + clear_bit(c, dist->irq_active_on_cpu); } } @@ -883,6 +1029,11 @@ static inline u64 vgic_get_eisr(struct kvm_vcpu *vcpu) return vgic_ops->get_eisr(vcpu); } +static inline void vgic_clear_eisr(struct kvm_vcpu *vcpu) +{ + vgic_ops->clear_eisr(vcpu); +} + static inline u32 vgic_get_interrupt_status(struct kvm_vcpu *vcpu) { return vgic_ops->get_interrupt_status(vcpu); @@ -922,6 +1073,7 @@ static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) vgic_set_lr(vcpu, lr_nr, vlr); clear_bit(lr_nr, vgic_cpu->lr_used); vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; + vgic_sync_lr_elrsr(vcpu, lr_nr, vlr); } /* @@ -949,6 +1101,26 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) } } +static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq, + int lr_nr, struct vgic_lr vlr) +{ + if (vgic_irq_is_active(vcpu, irq)) { + vlr.state |= LR_STATE_ACTIVE; + kvm_debug("Set active, clear distributor: 0x%x\n", vlr.state); + vgic_irq_clear_active(vcpu, irq); + vgic_update_state(vcpu->kvm); + } else if (vgic_dist_irq_is_pending(vcpu, irq)) { + vlr.state |= LR_STATE_PENDING; + kvm_debug("Set pending: 0x%x\n", vlr.state); + } + + if (!vgic_irq_is_edge(vcpu, irq)) + vlr.state |= LR_EOI_INT; + + vgic_set_lr(vcpu, lr_nr, vlr); + vgic_sync_lr_elrsr(vcpu, lr_nr, vlr); +} + /* * Queue an interrupt to a CPU virtual interface. Return true on success, * or false if it wasn't possible to queue it. @@ -976,8 +1148,7 @@ bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) if (vlr.source == sgi_source_id) { kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq); BUG_ON(!test_bit(lr, vgic_cpu->lr_used)); - vlr.state |= LR_STATE_PENDING; - vgic_set_lr(vcpu, lr, vlr); + vgic_queue_irq_to_lr(vcpu, irq, lr, vlr); return true; } } @@ -994,11 +1165,8 @@ bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) vlr.irq = irq; vlr.source = sgi_source_id; - vlr.state = LR_STATE_PENDING; - if (!vgic_irq_is_edge(vcpu, irq)) - vlr.state |= LR_EOI_INT; - - vgic_set_lr(vcpu, lr, vlr); + vlr.state = 0; + vgic_queue_irq_to_lr(vcpu, irq, lr, vlr); return true; } @@ -1030,39 +1198,49 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + unsigned long *pa_percpu, *pa_shared; int i, vcpu_id; int overflow = 0; + int nr_shared = vgic_nr_shared_irqs(dist); vcpu_id = vcpu->vcpu_id; + pa_percpu = vcpu->arch.vgic_cpu.pend_act_percpu; + pa_shared = vcpu->arch.vgic_cpu.pend_act_shared; + + bitmap_or(pa_percpu, vgic_cpu->pending_percpu, vgic_cpu->active_percpu, + VGIC_NR_PRIVATE_IRQS); + bitmap_or(pa_shared, vgic_cpu->pending_shared, vgic_cpu->active_shared, + nr_shared); /* * We may not have any pending interrupt, or the interrupts * may have been serviced from another vcpu. In all cases, * move along. */ - if (!kvm_vgic_vcpu_pending_irq(vcpu)) { - pr_debug("CPU%d has no pending interrupt\n", vcpu_id); + if (!kvm_vgic_vcpu_pending_irq(vcpu) && !kvm_vgic_vcpu_active_irq(vcpu)) goto epilog; - } /* SGIs */ - for_each_set_bit(i, vgic_cpu->pending_percpu, VGIC_NR_SGIS) { + for_each_set_bit(i, pa_percpu, VGIC_NR_SGIS) { if (!queue_sgi(vcpu, i)) overflow = 1; } /* PPIs */ - for_each_set_bit_from(i, vgic_cpu->pending_percpu, VGIC_NR_PRIVATE_IRQS) { + for_each_set_bit_from(i, pa_percpu, VGIC_NR_PRIVATE_IRQS) { if (!vgic_queue_hwirq(vcpu, i)) overflow = 1; } /* SPIs */ - for_each_set_bit(i, vgic_cpu->pending_shared, vgic_nr_shared_irqs(dist)) { + for_each_set_bit(i, pa_shared, nr_shared) { if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS)) overflow = 1; } + + + epilog: if (overflow) { vgic_enable_underflow(vcpu); @@ -1081,7 +1259,9 @@ epilog: static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) { u32 status = vgic_get_interrupt_status(vcpu); + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; bool level_pending = false; + struct kvm *kvm = vcpu->kvm; kvm_debug("STATUS = %08x\n", status); @@ -1098,6 +1278,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) struct vgic_lr vlr = vgic_get_lr(vcpu, lr); WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq)); + spin_lock(&dist->lock); vgic_irq_clear_queued(vcpu, vlr.irq); WARN_ON(vlr.state & LR_STATE_MASK); vlr.state = 0; @@ -1116,6 +1297,17 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) */ vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq); + /* + * kvm_notify_acked_irq calls kvm_set_irq() + * to reset the IRQ level. Need to release the + * lock for kvm_set_irq to grab it. + */ + spin_unlock(&dist->lock); + + kvm_notify_acked_irq(kvm, 0, + vlr.irq - VGIC_NR_PRIVATE_IRQS); + spin_lock(&dist->lock); + /* Any additional pending interrupt? */ if (vgic_dist_irq_get_level(vcpu, vlr.irq)) { vgic_cpu_irq_set(vcpu, vlr.irq); @@ -1125,6 +1317,8 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) vgic_cpu_irq_clear(vcpu, vlr.irq); } + spin_unlock(&dist->lock); + /* * Despite being EOIed, the LR may not have * been marked as empty. @@ -1136,13 +1330,18 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) if (status & INT_STATUS_UNDERFLOW) vgic_disable_underflow(vcpu); + /* + * In the next iterations of the vcpu loop, if we sync the vgic state + * after flushing it, but before entering the guest (this happens for + * pending signals and vmid rollovers), then make sure we don't pick + * up any old maintenance interrupts here. + */ + vgic_clear_eisr(vcpu); + return level_pending; } -/* - * Sync back the VGIC state after a guest run. The distributor lock is - * needed so we don't get preempted in the middle of the state processing. - */ +/* Sync back the VGIC state after a guest run */ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; @@ -1189,14 +1388,10 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) { - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - if (!irqchip_in_kernel(vcpu->kvm)) return; - spin_lock(&dist->lock); __kvm_vgic_sync_hwstate(vcpu); - spin_unlock(&dist->lock); } int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) @@ -1209,6 +1404,17 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) return test_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); } +int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + if (!irqchip_in_kernel(vcpu->kvm)) + return 0; + + return test_bit(vcpu->vcpu_id, dist->irq_active_on_cpu); +} + + void vgic_kick_vcpus(struct kvm *kvm) { struct kvm_vcpu *vcpu; @@ -1381,8 +1587,12 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; kfree(vgic_cpu->pending_shared); + kfree(vgic_cpu->active_shared); + kfree(vgic_cpu->pend_act_shared); kfree(vgic_cpu->vgic_irq_lr_map); vgic_cpu->pending_shared = NULL; + vgic_cpu->active_shared = NULL; + vgic_cpu->pend_act_shared = NULL; vgic_cpu->vgic_irq_lr_map = NULL; } @@ -1392,9 +1602,14 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs) int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8; vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL); + vgic_cpu->active_shared = kzalloc(sz, GFP_KERNEL); + vgic_cpu->pend_act_shared = kzalloc(sz, GFP_KERNEL); vgic_cpu->vgic_irq_lr_map = kmalloc(nr_irqs, GFP_KERNEL); - if (!vgic_cpu->pending_shared || !vgic_cpu->vgic_irq_lr_map) { + if (!vgic_cpu->pending_shared + || !vgic_cpu->active_shared + || !vgic_cpu->pend_act_shared + || !vgic_cpu->vgic_irq_lr_map) { kvm_vgic_vcpu_destroy(vcpu); return -ENOMEM; } @@ -1447,10 +1662,12 @@ void kvm_vgic_destroy(struct kvm *kvm) kfree(dist->irq_spi_mpidr); kfree(dist->irq_spi_target); kfree(dist->irq_pending_on_cpu); + kfree(dist->irq_active_on_cpu); dist->irq_sgi_sources = NULL; dist->irq_spi_cpu = NULL; dist->irq_spi_target = NULL; dist->irq_pending_on_cpu = NULL; + dist->irq_active_on_cpu = NULL; dist->nr_cpus = 0; } @@ -1486,6 +1703,7 @@ int vgic_init(struct kvm *kvm) ret |= vgic_init_bitmap(&dist->irq_pending, nr_cpus, nr_irqs); ret |= vgic_init_bitmap(&dist->irq_soft_pend, nr_cpus, nr_irqs); ret |= vgic_init_bitmap(&dist->irq_queued, nr_cpus, nr_irqs); + ret |= vgic_init_bitmap(&dist->irq_active, nr_cpus, nr_irqs); ret |= vgic_init_bitmap(&dist->irq_cfg, nr_cpus, nr_irqs); ret |= vgic_init_bytemap(&dist->irq_priority, nr_cpus, nr_irqs); @@ -1498,10 +1716,13 @@ int vgic_init(struct kvm *kvm) GFP_KERNEL); dist->irq_pending_on_cpu = kzalloc(BITS_TO_LONGS(nr_cpus) * sizeof(long), GFP_KERNEL); + dist->irq_active_on_cpu = kzalloc(BITS_TO_LONGS(nr_cpus) * sizeof(long), + GFP_KERNEL); if (!dist->irq_sgi_sources || !dist->irq_spi_cpu || !dist->irq_spi_target || - !dist->irq_pending_on_cpu) { + !dist->irq_pending_on_cpu || + !dist->irq_active_on_cpu) { ret = -ENOMEM; goto out; } @@ -1583,8 +1804,10 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) * emulation. So check this here again. KVM_CREATE_DEVICE does * the proper checks already. */ - if (type == KVM_DEV_TYPE_ARM_VGIC_V2 && !vgic->can_emulate_gicv2) - return -ENODEV; + if (type == KVM_DEV_TYPE_ARM_VGIC_V2 && !vgic->can_emulate_gicv2) { + ret = -ENODEV; + goto out; + } /* * Any time a vcpu is run, vcpu_load is called which tries to grab the @@ -1827,12 +2050,9 @@ int vgic_get_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr) return r; } -int vgic_has_attr_regs(const struct kvm_mmio_range *ranges, phys_addr_t offset) +int vgic_has_attr_regs(const struct vgic_io_range *ranges, phys_addr_t offset) { - struct kvm_exit_mmio dev_attr_mmio; - - dev_attr_mmio.len = 4; - if (vgic_find_range(ranges, &dev_attr_mmio, offset)) + if (vgic_find_range(ranges, 4, offset)) return 0; else return -ENXIO; @@ -1865,8 +2085,10 @@ static struct notifier_block vgic_cpu_nb = { }; static const struct of_device_id vgic_ids[] = { - { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, }, - { .compatible = "arm,gic-v3", .data = vgic_v3_probe, }, + { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, }, + { .compatible = "arm,cortex-a7-gic", .data = vgic_v2_probe, }, + { .compatible = "arm,gic-400", .data = vgic_v2_probe, }, + { .compatible = "arm,gic-v3", .data = vgic_v3_probe, }, {}, }; @@ -1914,3 +2136,38 @@ out_free_irq: free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus()); return ret; } + +int kvm_irq_map_gsi(struct kvm *kvm, + struct kvm_kernel_irq_routing_entry *entries, + int gsi) +{ + return gsi; +} + +int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin) +{ + return pin; +} + +int kvm_set_irq(struct kvm *kvm, int irq_source_id, + u32 irq, int level, bool line_status) +{ + unsigned int spi = irq + VGIC_NR_PRIVATE_IRQS; + + trace_kvm_set_irq(irq, level, irq_source_id); + + BUG_ON(!vgic_initialized(kvm)); + + if (spi > kvm->arch.vgic.nr_irqs) + return -EINVAL; + return kvm_vgic_inject_irq(kvm, 0, spi, level); + +} + +/* MSI not implemented yet */ +int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, + int level, bool line_status) +{ + return 0; +} diff --git a/virt/kvm/arm/vgic.h b/virt/kvm/arm/vgic.h index 1e83bdf5f499..0df74cbb6200 100644 --- a/virt/kvm/arm/vgic.h +++ b/virt/kvm/arm/vgic.h @@ -20,6 +20,8 @@ #ifndef __KVM_VGIC_H__ #define __KVM_VGIC_H__ +#include <kvm/iodev.h> + #define VGIC_ADDR_UNDEF (-1) #define IS_VGIC_ADDR_UNDEF(_x) ((_x) == VGIC_ADDR_UNDEF) @@ -57,6 +59,14 @@ void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq); void vgic_unqueue_irqs(struct kvm_vcpu *vcpu); +struct kvm_exit_mmio { + phys_addr_t phys_addr; + void *data; + u32 len; + bool is_write; + void *private; +}; + void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg, phys_addr_t offset, int mode); bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, @@ -74,7 +84,7 @@ void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value) *((u32 *)mmio->data) = cpu_to_le32(value) & mask; } -struct kvm_mmio_range { +struct vgic_io_range { phys_addr_t base; unsigned long len; int bits_per_irq; @@ -82,6 +92,11 @@ struct kvm_mmio_range { phys_addr_t offset); }; +int vgic_register_kvm_io_dev(struct kvm *kvm, gpa_t base, int len, + const struct vgic_io_range *ranges, + int redist_id, + struct vgic_io_device *iodev); + static inline bool is_in_range(phys_addr_t addr, unsigned long len, phys_addr_t baseaddr, unsigned long size) { @@ -89,14 +104,8 @@ static inline bool is_in_range(phys_addr_t addr, unsigned long len, } const -struct kvm_mmio_range *vgic_find_range(const struct kvm_mmio_range *ranges, - struct kvm_exit_mmio *mmio, - phys_addr_t offset); - -bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct kvm_run *run, - struct kvm_exit_mmio *mmio, - const struct kvm_mmio_range *ranges, - unsigned long mmio_base); +struct vgic_io_range *vgic_find_range(const struct vgic_io_range *ranges, + int len, gpa_t offset); bool vgic_handle_enable_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio, phys_addr_t offset, int vcpu_id, int access); @@ -107,12 +116,20 @@ bool vgic_handle_set_pending_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio, bool vgic_handle_clear_pending_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio, phys_addr_t offset, int vcpu_id); +bool vgic_handle_set_active_reg(struct kvm *kvm, + struct kvm_exit_mmio *mmio, + phys_addr_t offset, int vcpu_id); + +bool vgic_handle_clear_active_reg(struct kvm *kvm, + struct kvm_exit_mmio *mmio, + phys_addr_t offset, int vcpu_id); + bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio, phys_addr_t offset); void vgic_kick_vcpus(struct kvm *kvm); -int vgic_has_attr_regs(const struct kvm_mmio_range *ranges, phys_addr_t offset); +int vgic_has_attr_regs(const struct vgic_io_range *ranges, phys_addr_t offset); int vgic_set_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr); int vgic_get_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr); diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c index 00d86427af0f..571c1ce37d15 100644 --- a/virt/kvm/coalesced_mmio.c +++ b/virt/kvm/coalesced_mmio.c @@ -8,7 +8,7 @@ * */ -#include "iodev.h" +#include <kvm/iodev.h> #include <linux/kvm_host.h> #include <linux/slab.h> @@ -60,8 +60,9 @@ static int coalesced_mmio_has_room(struct kvm_coalesced_mmio_dev *dev) return 1; } -static int coalesced_mmio_write(struct kvm_io_device *this, - gpa_t addr, int len, const void *val) +static int coalesced_mmio_write(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, gpa_t addr, + int len, const void *val) { struct kvm_coalesced_mmio_dev *dev = to_mmio(this); struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring; diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 148b2392c762..9ff4193dfa49 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -36,7 +36,7 @@ #include <linux/seqlock.h> #include <trace/events/kvm.h> -#include "iodev.h" +#include <kvm/iodev.h> #ifdef CONFIG_HAVE_KVM_IRQFD /* @@ -311,6 +311,9 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) unsigned int events; int idx; + if (!kvm_arch_intc_initialized(kvm)) + return -EAGAIN; + irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL); if (!irqfd) return -ENOMEM; @@ -712,8 +715,8 @@ ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val) /* MMIO/PIO writes trigger an event if the addr/val match */ static int -ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len, - const void *val) +ioeventfd_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr, + int len, const void *val) { struct _ioeventfd *p = to_ioeventfd(this); diff --git a/virt/kvm/iodev.h b/virt/kvm/iodev.h deleted file mode 100644 index 12fd3caffd2b..000000000000 --- a/virt/kvm/iodev.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#ifndef __KVM_IODEV_H__ -#define __KVM_IODEV_H__ - -#include <linux/kvm_types.h> -#include <asm/errno.h> - -struct kvm_io_device; - -/** - * kvm_io_device_ops are called under kvm slots_lock. - * read and write handlers return 0 if the transaction has been handled, - * or non-zero to have it passed to the next device. - **/ -struct kvm_io_device_ops { - int (*read)(struct kvm_io_device *this, - gpa_t addr, - int len, - void *val); - int (*write)(struct kvm_io_device *this, - gpa_t addr, - int len, - const void *val); - void (*destructor)(struct kvm_io_device *this); -}; - - -struct kvm_io_device { - const struct kvm_io_device_ops *ops; -}; - -static inline void kvm_iodevice_init(struct kvm_io_device *dev, - const struct kvm_io_device_ops *ops) -{ - dev->ops = ops; -} - -static inline int kvm_iodevice_read(struct kvm_io_device *dev, - gpa_t addr, int l, void *v) -{ - return dev->ops->read ? dev->ops->read(dev, addr, l, v) : -EOPNOTSUPP; -} - -static inline int kvm_iodevice_write(struct kvm_io_device *dev, - gpa_t addr, int l, const void *v) -{ - return dev->ops->write ? dev->ops->write(dev, addr, l, v) : -EOPNOTSUPP; -} - -static inline void kvm_iodevice_destructor(struct kvm_io_device *dev) -{ - if (dev->ops->destructor) - dev->ops->destructor(dev); -} - -#endif /* __KVM_IODEV_H__ */ diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index 7f256f31df10..1d56a901e791 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -105,7 +105,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, i = kvm_irq_map_gsi(kvm, irq_set, irq); srcu_read_unlock(&kvm->irq_srcu, idx); - while(i--) { + while (i--) { int r; r = irq_set[i].set(&irq_set[i], kvm, irq_source_id, level, line_status); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a1093700f3a4..d3fc9399062a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -16,7 +16,7 @@ * */ -#include "iodev.h" +#include <kvm/iodev.h> #include <linux/kvm_host.h> #include <linux/kvm.h> @@ -66,13 +66,13 @@ MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); -unsigned int halt_poll_ns = 0; +static unsigned int halt_poll_ns; module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR); /* * Ordering of locks: * - * kvm->lock --> kvm->slots_lock --> kvm->irq_lock + * kvm->lock --> kvm->slots_lock --> kvm->irq_lock */ DEFINE_SPINLOCK(kvm_lock); @@ -80,7 +80,7 @@ static DEFINE_RAW_SPINLOCK(kvm_count_lock); LIST_HEAD(vm_list); static cpumask_var_t cpus_hardware_enabled; -static int kvm_usage_count = 0; +static int kvm_usage_count; static atomic_t hardware_enable_failed; struct kmem_cache *kvm_vcpu_cache; @@ -471,7 +471,7 @@ static struct kvm *kvm_create_vm(unsigned long type) BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX); r = -ENOMEM; - kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); + kvm->memslots = kvm_kvzalloc(sizeof(struct kvm_memslots)); if (!kvm->memslots) goto out_err_no_srcu; @@ -522,7 +522,7 @@ out_err_no_srcu: out_err_no_disable: for (i = 0; i < KVM_NR_BUSES; i++) kfree(kvm->buses[i]); - kfree(kvm->memslots); + kvfree(kvm->memslots); kvm_arch_free_vm(kvm); return ERR_PTR(r); } @@ -539,20 +539,12 @@ void *kvm_kvzalloc(unsigned long size) return kzalloc(size, GFP_KERNEL); } -void kvm_kvfree(const void *addr) -{ - if (is_vmalloc_addr(addr)) - vfree(addr); - else - kfree(addr); -} - static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot) { if (!memslot->dirty_bitmap) return; - kvm_kvfree(memslot->dirty_bitmap); + kvfree(memslot->dirty_bitmap); memslot->dirty_bitmap = NULL; } @@ -578,7 +570,7 @@ static void kvm_free_physmem(struct kvm *kvm) kvm_for_each_memslot(memslot, slots) kvm_free_physmem_slot(kvm, memslot, NULL); - kfree(kvm->memslots); + kvfree(kvm->memslots); } static void kvm_destroy_devices(struct kvm *kvm) @@ -871,10 +863,10 @@ int __kvm_set_memory_region(struct kvm *kvm, goto out_free; } - slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots), - GFP_KERNEL); + slots = kvm_kvzalloc(sizeof(struct kvm_memslots)); if (!slots) goto out_free; + memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) { slot = id_to_memslot(slots, mem->slot); @@ -888,8 +880,8 @@ int __kvm_set_memory_region(struct kvm *kvm, * or moved, memslot will be created. * * validation of sp->gfn happens in: - * - gfn_to_hva (kvm_read_guest, gfn_to_pfn) - * - kvm_is_visible_gfn (mmu_check_roots) + * - gfn_to_hva (kvm_read_guest, gfn_to_pfn) + * - kvm_is_visible_gfn (mmu_check_roots) */ kvm_arch_flush_shadow_memslot(kvm, slot); @@ -917,7 +909,7 @@ int __kvm_set_memory_region(struct kvm *kvm, kvm_arch_commit_memory_region(kvm, mem, &old, change); kvm_free_physmem_slot(kvm, &old, &new); - kfree(old_memslots); + kvfree(old_memslots); /* * IOMMU mapping: New slots need to be mapped. Old slots need to be @@ -936,7 +928,7 @@ int __kvm_set_memory_region(struct kvm *kvm, return 0; out_slots: - kfree(slots); + kvfree(slots); out_free: kvm_free_physmem_slot(kvm, &new, &old); out: @@ -1061,9 +1053,11 @@ int kvm_get_dirty_log_protect(struct kvm *kvm, mask = xchg(&dirty_bitmap[i], 0); dirty_bitmap_buffer[i] = mask; - offset = i * BITS_PER_LONG; - kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, offset, - mask); + if (mask) { + offset = i * BITS_PER_LONG; + kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, + offset, mask); + } } spin_unlock(&kvm->mmu_lock); @@ -1193,16 +1187,6 @@ unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable) return gfn_to_hva_memslot_prot(slot, gfn, writable); } -static int kvm_read_hva(void *data, void __user *hva, int len) -{ - return __copy_from_user(data, hva, len); -} - -static int kvm_read_hva_atomic(void *data, void __user *hva, int len) -{ - return __copy_from_user_inatomic(data, hva, len); -} - static int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int write, struct page **page) { @@ -1481,7 +1465,6 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) return kvm_pfn_to_page(pfn); } - EXPORT_SYMBOL_GPL(gfn_to_page); void kvm_release_page_clean(struct page *page) @@ -1517,6 +1500,7 @@ void kvm_set_pfn_dirty(pfn_t pfn) { if (!kvm_is_reserved_pfn(pfn)) { struct page *page = pfn_to_page(pfn); + if (!PageReserved(page)) SetPageDirty(page); } @@ -1554,7 +1538,7 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, addr = gfn_to_hva_prot(kvm, gfn, NULL); if (kvm_is_error_hva(addr)) return -EFAULT; - r = kvm_read_hva(data, (void __user *)addr + offset, len); + r = __copy_from_user(data, (void __user *)addr + offset, len); if (r) return -EFAULT; return 0; @@ -1593,7 +1577,7 @@ int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, if (kvm_is_error_hva(addr)) return -EFAULT; pagefault_disable(); - r = kvm_read_hva_atomic(data, (void __user *)addr + offset, len); + r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len); pagefault_enable(); if (r) return -EFAULT; @@ -1653,8 +1637,8 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, ghc->generation = slots->generation; ghc->len = len; ghc->memslot = gfn_to_memslot(kvm, start_gfn); - ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, &nr_pages_avail); - if (!kvm_is_error_hva(ghc->hva) && nr_pages_avail >= nr_pages_needed) { + ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, NULL); + if (!kvm_is_error_hva(ghc->hva) && nr_pages_needed <= 1) { ghc->hva += offset; } else { /* @@ -1742,7 +1726,7 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len) int offset = offset_in_page(gpa); int ret; - while ((seg = next_segment(len, offset)) != 0) { + while ((seg = next_segment(len, offset)) != 0) { ret = kvm_clear_guest_page(kvm, gfn, offset, seg); if (ret < 0) return ret; @@ -1800,6 +1784,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) start = cur = ktime_get(); if (halt_poll_ns) { ktime_t stop = ktime_add_ns(ktime_get(), halt_poll_ns); + do { /* * This sets KVM_REQ_UNHALT if an interrupt @@ -2118,7 +2103,7 @@ static long kvm_vcpu_ioctl(struct file *filp, * Special cases: vcpu ioctls that are asynchronous to vcpu execution, * so vcpu_load() would break it. */ - if (ioctl == KVM_S390_INTERRUPT || ioctl == KVM_INTERRUPT) + if (ioctl == KVM_S390_INTERRUPT || ioctl == KVM_S390_IRQ || ioctl == KVM_INTERRUPT) return kvm_arch_vcpu_ioctl(filp, ioctl, arg); #endif @@ -2135,6 +2120,7 @@ static long kvm_vcpu_ioctl(struct file *filp, /* The thread running this VCPU changed. */ struct pid *oldpid = vcpu->pid; struct pid *newpid = get_task_pid(current, PIDTYPE_PID); + rcu_assign_pointer(vcpu->pid, newpid); if (oldpid) synchronize_rcu(); @@ -2205,7 +2191,7 @@ out_free1: if (r) goto out; r = -EFAULT; - if (copy_to_user(argp, &mp_state, sizeof mp_state)) + if (copy_to_user(argp, &mp_state, sizeof(mp_state))) goto out; r = 0; break; @@ -2214,7 +2200,7 @@ out_free1: struct kvm_mp_state mp_state; r = -EFAULT; - if (copy_from_user(&mp_state, argp, sizeof mp_state)) + if (copy_from_user(&mp_state, argp, sizeof(mp_state))) goto out; r = kvm_arch_vcpu_ioctl_set_mpstate(vcpu, &mp_state); break; @@ -2223,13 +2209,13 @@ out_free1: struct kvm_translation tr; r = -EFAULT; - if (copy_from_user(&tr, argp, sizeof tr)) + if (copy_from_user(&tr, argp, sizeof(tr))) goto out; r = kvm_arch_vcpu_ioctl_translate(vcpu, &tr); if (r) goto out; r = -EFAULT; - if (copy_to_user(argp, &tr, sizeof tr)) + if (copy_to_user(argp, &tr, sizeof(tr))) goto out; r = 0; break; @@ -2238,7 +2224,7 @@ out_free1: struct kvm_guest_debug dbg; r = -EFAULT; - if (copy_from_user(&dbg, argp, sizeof dbg)) + if (copy_from_user(&dbg, argp, sizeof(dbg))) goto out; r = kvm_arch_vcpu_ioctl_set_guest_debug(vcpu, &dbg); break; @@ -2252,14 +2238,14 @@ out_free1: if (argp) { r = -EFAULT; if (copy_from_user(&kvm_sigmask, argp, - sizeof kvm_sigmask)) + sizeof(kvm_sigmask))) goto out; r = -EINVAL; - if (kvm_sigmask.len != sizeof sigset) + if (kvm_sigmask.len != sizeof(sigset)) goto out; r = -EFAULT; if (copy_from_user(&sigset, sigmask_arg->sigset, - sizeof sigset)) + sizeof(sigset))) goto out; p = &sigset; } @@ -2321,14 +2307,14 @@ static long kvm_vcpu_compat_ioctl(struct file *filp, if (argp) { r = -EFAULT; if (copy_from_user(&kvm_sigmask, argp, - sizeof kvm_sigmask)) + sizeof(kvm_sigmask))) goto out; r = -EINVAL; - if (kvm_sigmask.len != sizeof csigset) + if (kvm_sigmask.len != sizeof(csigset)) goto out; r = -EFAULT; if (copy_from_user(&csigset, sigmask_arg->sigset, - sizeof csigset)) + sizeof(csigset))) goto out; sigset_from_compat(&sigset, &csigset); r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset); @@ -2492,6 +2478,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) case KVM_CAP_SIGNAL_MSI: #endif #ifdef CONFIG_HAVE_KVM_IRQFD + case KVM_CAP_IRQFD: case KVM_CAP_IRQFD_RESAMPLE: #endif case KVM_CAP_CHECK_EXTENSION_VM: @@ -2524,7 +2511,7 @@ static long kvm_vm_ioctl(struct file *filp, r = -EFAULT; if (copy_from_user(&kvm_userspace_mem, argp, - sizeof kvm_userspace_mem)) + sizeof(kvm_userspace_mem))) goto out; r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem); @@ -2534,7 +2521,7 @@ static long kvm_vm_ioctl(struct file *filp, struct kvm_dirty_log log; r = -EFAULT; - if (copy_from_user(&log, argp, sizeof log)) + if (copy_from_user(&log, argp, sizeof(log))) goto out; r = kvm_vm_ioctl_get_dirty_log(kvm, &log); break; @@ -2542,16 +2529,18 @@ static long kvm_vm_ioctl(struct file *filp, #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET case KVM_REGISTER_COALESCED_MMIO: { struct kvm_coalesced_mmio_zone zone; + r = -EFAULT; - if (copy_from_user(&zone, argp, sizeof zone)) + if (copy_from_user(&zone, argp, sizeof(zone))) goto out; r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone); break; } case KVM_UNREGISTER_COALESCED_MMIO: { struct kvm_coalesced_mmio_zone zone; + r = -EFAULT; - if (copy_from_user(&zone, argp, sizeof zone)) + if (copy_from_user(&zone, argp, sizeof(zone))) goto out; r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone); break; @@ -2561,7 +2550,7 @@ static long kvm_vm_ioctl(struct file *filp, struct kvm_irqfd data; r = -EFAULT; - if (copy_from_user(&data, argp, sizeof data)) + if (copy_from_user(&data, argp, sizeof(data))) goto out; r = kvm_irqfd(kvm, &data); break; @@ -2570,7 +2559,7 @@ static long kvm_vm_ioctl(struct file *filp, struct kvm_ioeventfd data; r = -EFAULT; - if (copy_from_user(&data, argp, sizeof data)) + if (copy_from_user(&data, argp, sizeof(data))) goto out; r = kvm_ioeventfd(kvm, &data); break; @@ -2591,7 +2580,7 @@ static long kvm_vm_ioctl(struct file *filp, struct kvm_msi msi; r = -EFAULT; - if (copy_from_user(&msi, argp, sizeof msi)) + if (copy_from_user(&msi, argp, sizeof(msi))) goto out; r = kvm_send_userspace_msi(kvm, &msi); break; @@ -2603,7 +2592,7 @@ static long kvm_vm_ioctl(struct file *filp, struct kvm_irq_level irq_event; r = -EFAULT; - if (copy_from_user(&irq_event, argp, sizeof irq_event)) + if (copy_from_user(&irq_event, argp, sizeof(irq_event))) goto out; r = kvm_vm_ioctl_irq_line(kvm, &irq_event, @@ -2613,7 +2602,7 @@ static long kvm_vm_ioctl(struct file *filp, r = -EFAULT; if (ioctl == KVM_IRQ_LINE_STATUS) { - if (copy_to_user(argp, &irq_event, sizeof irq_event)) + if (copy_to_user(argp, &irq_event, sizeof(irq_event))) goto out; } @@ -2646,7 +2635,7 @@ static long kvm_vm_ioctl(struct file *filp, goto out_free_irq_routing; r = kvm_set_irq_routing(kvm, entries, routing.nr, routing.flags); - out_free_irq_routing: +out_free_irq_routing: vfree(entries); break; } @@ -2821,8 +2810,7 @@ static void hardware_enable_nolock(void *junk) if (r) { cpumask_clear_cpu(cpu, cpus_hardware_enabled); atomic_inc(&hardware_enable_failed); - printk(KERN_INFO "kvm: enabling virtualization on " - "CPU%d failed\n", cpu); + pr_info("kvm: enabling virtualization on CPU%d failed\n", cpu); } } @@ -2898,12 +2886,12 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, val &= ~CPU_TASKS_FROZEN; switch (val) { case CPU_DYING: - printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", + pr_info("kvm: disabling virtualization on CPU%d\n", cpu); hardware_disable(); break; case CPU_STARTING: - printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n", + pr_info("kvm: enabling virtualization on CPU%d\n", cpu); hardware_enable(); break; @@ -2920,7 +2908,7 @@ static int kvm_reboot(struct notifier_block *notifier, unsigned long val, * * And Intel TXT required VMX off for all cpu when system shutdown. */ - printk(KERN_INFO "kvm: exiting hardware virtualization\n"); + pr_info("kvm: exiting hardware virtualization\n"); kvm_rebooting = true; on_each_cpu(hardware_disable_nolock, NULL, 1); return NOTIFY_OK; @@ -2944,7 +2932,7 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus) } static inline int kvm_io_bus_cmp(const struct kvm_io_range *r1, - const struct kvm_io_range *r2) + const struct kvm_io_range *r2) { if (r1->addr < r2->addr) return -1; @@ -2997,7 +2985,7 @@ static int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus, return off; } -static int __kvm_io_bus_write(struct kvm_io_bus *bus, +static int __kvm_io_bus_write(struct kvm_vcpu *vcpu, struct kvm_io_bus *bus, struct kvm_io_range *range, const void *val) { int idx; @@ -3008,7 +2996,7 @@ static int __kvm_io_bus_write(struct kvm_io_bus *bus, while (idx < bus->dev_count && kvm_io_bus_cmp(range, &bus->range[idx]) == 0) { - if (!kvm_iodevice_write(bus->range[idx].dev, range->addr, + if (!kvm_iodevice_write(vcpu, bus->range[idx].dev, range->addr, range->len, val)) return idx; idx++; @@ -3018,7 +3006,7 @@ static int __kvm_io_bus_write(struct kvm_io_bus *bus, } /* kvm_io_bus_write - called under kvm->slots_lock */ -int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, +int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, int len, const void *val) { struct kvm_io_bus *bus; @@ -3030,14 +3018,14 @@ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, .len = len, }; - bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); - r = __kvm_io_bus_write(bus, &range, val); + bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); + r = __kvm_io_bus_write(vcpu, bus, &range, val); return r < 0 ? r : 0; } /* kvm_io_bus_write_cookie - called under kvm->slots_lock */ -int kvm_io_bus_write_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, - int len, const void *val, long cookie) +int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, + gpa_t addr, int len, const void *val, long cookie) { struct kvm_io_bus *bus; struct kvm_io_range range; @@ -3047,12 +3035,12 @@ int kvm_io_bus_write_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, .len = len, }; - bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); + bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); /* First try the device referenced by cookie. */ if ((cookie >= 0) && (cookie < bus->dev_count) && (kvm_io_bus_cmp(&range, &bus->range[cookie]) == 0)) - if (!kvm_iodevice_write(bus->range[cookie].dev, addr, len, + if (!kvm_iodevice_write(vcpu, bus->range[cookie].dev, addr, len, val)) return cookie; @@ -3060,11 +3048,11 @@ int kvm_io_bus_write_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, * cookie contained garbage; fall back to search and return the * correct cookie value. */ - return __kvm_io_bus_write(bus, &range, val); + return __kvm_io_bus_write(vcpu, bus, &range, val); } -static int __kvm_io_bus_read(struct kvm_io_bus *bus, struct kvm_io_range *range, - void *val) +static int __kvm_io_bus_read(struct kvm_vcpu *vcpu, struct kvm_io_bus *bus, + struct kvm_io_range *range, void *val) { int idx; @@ -3074,7 +3062,7 @@ static int __kvm_io_bus_read(struct kvm_io_bus *bus, struct kvm_io_range *range, while (idx < bus->dev_count && kvm_io_bus_cmp(range, &bus->range[idx]) == 0) { - if (!kvm_iodevice_read(bus->range[idx].dev, range->addr, + if (!kvm_iodevice_read(vcpu, bus->range[idx].dev, range->addr, range->len, val)) return idx; idx++; @@ -3085,7 +3073,7 @@ static int __kvm_io_bus_read(struct kvm_io_bus *bus, struct kvm_io_range *range, EXPORT_SYMBOL_GPL(kvm_io_bus_write); /* kvm_io_bus_read - called under kvm->slots_lock */ -int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, +int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, int len, void *val) { struct kvm_io_bus *bus; @@ -3097,8 +3085,8 @@ int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, .len = len, }; - bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); - r = __kvm_io_bus_read(bus, &range, val); + bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); + r = __kvm_io_bus_read(vcpu, bus, &range, val); return r < 0 ? r : 0; } @@ -3268,6 +3256,7 @@ struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) static void kvm_sched_in(struct preempt_notifier *pn, int cpu) { struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); + if (vcpu->preempted) vcpu->preempted = false; @@ -3349,7 +3338,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, r = misc_register(&kvm_dev); if (r) { - printk(KERN_ERR "kvm: misc device register failed\n"); + pr_err("kvm: misc device register failed\n"); goto out_unreg; } @@ -3360,7 +3349,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, r = kvm_init_debug(); if (r) { - printk(KERN_ERR "kvm: create debugfs files failed\n"); + pr_err("kvm: create debugfs files failed\n"); goto out_undebugfs; } |