diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-08-02 22:11:27 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-08-02 22:11:27 +0200 |
commit | 221bb8a46e230b9824204ae86537183d9991ff2a (patch) | |
tree | 92510d72285b2285be7cb87288bf088cb28af4c1 /virt/kvm | |
parent | Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s39... (diff) | |
parent | KVM: PPC: Introduce KVM_CAP_PPC_HTM (diff) | |
download | linux-221bb8a46e230b9824204ae86537183d9991ff2a.tar.xz linux-221bb8a46e230b9824204ae86537183d9991ff2a.zip |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini:
- ARM: GICv3 ITS emulation and various fixes. Removal of the
old VGIC implementation.
- s390: support for trapping software breakpoints, nested
virtualization (vSIE), the STHYI opcode, initial extensions
for CPU model support.
- MIPS: support for MIPS64 hosts (32-bit guests only) and lots
of cleanups, preliminary to this and the upcoming support for
hardware virtualization extensions.
- x86: support for execute-only mappings in nested EPT; reduced
vmexit latency for TSC deadline timer (by about 30%) on Intel
hosts; support for more than 255 vCPUs.
- PPC: bugfixes.
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (302 commits)
KVM: PPC: Introduce KVM_CAP_PPC_HTM
MIPS: Select HAVE_KVM for MIPS64_R{2,6}
MIPS: KVM: Reset CP0_PageMask during host TLB flush
MIPS: KVM: Fix ptr->int cast via KVM_GUEST_KSEGX()
MIPS: KVM: Sign extend MFC0/RDHWR results
MIPS: KVM: Fix 64-bit big endian dynamic translation
MIPS: KVM: Fail if ebase doesn't fit in CP0_EBase
MIPS: KVM: Use 64-bit CP0_EBase when appropriate
MIPS: KVM: Set CP0_Status.KX on MIPS64
MIPS: KVM: Make entry code MIPS64 friendly
MIPS: KVM: Use kmap instead of CKSEG0ADDR()
MIPS: KVM: Use virt_to_phys() to get commpage PFN
MIPS: Fix definition of KSEGX() for 64-bit
KVM: VMX: Add VMCS to CPU's loaded VMCSs before VMPTRLD
kvm: x86: nVMX: maintain internal copy of current VMCS
KVM: PPC: Book3S HV: Save/restore TM state in H_CEDE
KVM: PPC: Book3S HV: Pull out TM state save/restore into separate procedures
KVM: arm64: vgic-its: Simplify MAPI error handling
KVM: arm64: vgic-its: Make vgic_its_cmd_handle_mapi similar to other handlers
KVM: arm64: vgic-its: Turn device_id validation into generic ID validation
...
Diffstat (limited to 'virt/kvm')
-rw-r--r-- | virt/kvm/Kconfig | 3 | ||||
-rw-r--r-- | virt/kvm/arm/hyp/vgic-v2-sr.c | 15 | ||||
-rw-r--r-- | virt/kvm/arm/vgic-v2-emul.c | 856 | ||||
-rw-r--r-- | virt/kvm/arm/vgic-v2.c | 274 | ||||
-rw-r--r-- | virt/kvm/arm/vgic-v3-emul.c | 1074 | ||||
-rw-r--r-- | virt/kvm/arm/vgic-v3.c | 279 | ||||
-rw-r--r-- | virt/kvm/arm/vgic.c | 2417 | ||||
-rw-r--r-- | virt/kvm/arm/vgic.h | 140 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-init.c | 9 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-its.c | 1500 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-kvm-device.c | 22 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-mmio-v2.c | 10 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-mmio-v3.c | 247 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-mmio.c | 64 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-mmio.h | 31 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-v2.c | 12 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-v3.c | 29 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic.c | 119 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic.h | 38 | ||||
-rw-r--r-- | virt/kvm/irqchip.c | 7 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 110 |
21 files changed, 2119 insertions, 5137 deletions
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index e5d6108f5e85..b0cc1a34db27 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -16,9 +16,6 @@ config HAVE_KVM_EVENTFD bool select EVENTFD -config KVM_APIC_ARCHITECTURE - bool - config KVM_MMIO bool diff --git a/virt/kvm/arm/hyp/vgic-v2-sr.c b/virt/kvm/arm/hyp/vgic-v2-sr.c index 3a3a699b7489..7cffd9338c49 100644 --- a/virt/kvm/arm/hyp/vgic-v2-sr.c +++ b/virt/kvm/arm/hyp/vgic-v2-sr.c @@ -21,18 +21,11 @@ #include <asm/kvm_hyp.h> -#ifdef CONFIG_KVM_NEW_VGIC -extern struct vgic_global kvm_vgic_global_state; -#define vgic_v2_params kvm_vgic_global_state -#else -extern struct vgic_params vgic_v2_params; -#endif - static void __hyp_text save_maint_int_state(struct kvm_vcpu *vcpu, void __iomem *base) { struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - int nr_lr = (kern_hyp_va(&vgic_v2_params))->nr_lr; + int nr_lr = (kern_hyp_va(&kvm_vgic_global_state))->nr_lr; u32 eisr0, eisr1; int i; bool expect_mi; @@ -74,7 +67,7 @@ static void __hyp_text save_maint_int_state(struct kvm_vcpu *vcpu, static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base) { struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - int nr_lr = (kern_hyp_va(&vgic_v2_params))->nr_lr; + int nr_lr = (kern_hyp_va(&kvm_vgic_global_state))->nr_lr; u32 elrsr0, elrsr1; elrsr0 = readl_relaxed(base + GICH_ELRSR0); @@ -93,7 +86,7 @@ static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base) static void __hyp_text save_lrs(struct kvm_vcpu *vcpu, void __iomem *base) { struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - int nr_lr = (kern_hyp_va(&vgic_v2_params))->nr_lr; + int nr_lr = (kern_hyp_va(&kvm_vgic_global_state))->nr_lr; int i; for (i = 0; i < nr_lr; i++) { @@ -147,7 +140,7 @@ void __hyp_text __vgic_v2_restore_state(struct kvm_vcpu *vcpu) struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; struct vgic_dist *vgic = &kvm->arch.vgic; void __iomem *base = kern_hyp_va(vgic->vctrl_base); - int nr_lr = (kern_hyp_va(&vgic_v2_params))->nr_lr; + int nr_lr = (kern_hyp_va(&kvm_vgic_global_state))->nr_lr; int i; u64 live_lrs = 0; diff --git a/virt/kvm/arm/vgic-v2-emul.c b/virt/kvm/arm/vgic-v2-emul.c deleted file mode 100644 index 1b0bee095427..000000000000 --- a/virt/kvm/arm/vgic-v2-emul.c +++ /dev/null @@ -1,856 +0,0 @@ -/* - * Contains GICv2 specific emulation code, was in vgic.c before. - * - * Copyright (C) 2012 ARM Ltd. - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <linux/cpu.h> -#include <linux/kvm.h> -#include <linux/kvm_host.h> -#include <linux/interrupt.h> -#include <linux/io.h> -#include <linux/uaccess.h> - -#include <linux/irqchip/arm-gic.h> - -#include <asm/kvm_emulate.h> -#include <asm/kvm_arm.h> -#include <asm/kvm_mmu.h> - -#include "vgic.h" - -#define GICC_ARCH_VERSION_V2 0x2 - -static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg); -static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi) -{ - return dist->irq_sgi_sources + vcpu_id * VGIC_NR_SGIS + sgi; -} - -static bool handle_mmio_misc(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, phys_addr_t offset) -{ - u32 reg; - u32 word_offset = offset & 3; - - switch (offset & ~3) { - case 0: /* GICD_CTLR */ - reg = vcpu->kvm->arch.vgic.enabled; - vgic_reg_access(mmio, ®, word_offset, - ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); - if (mmio->is_write) { - vcpu->kvm->arch.vgic.enabled = reg & 1; - vgic_update_state(vcpu->kvm); - return true; - } - break; - - case 4: /* GICD_TYPER */ - reg = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5; - reg |= (vcpu->kvm->arch.vgic.nr_irqs >> 5) - 1; - vgic_reg_access(mmio, ®, word_offset, - ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); - break; - - case 8: /* GICD_IIDR */ - reg = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0); - vgic_reg_access(mmio, ®, word_offset, - ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); - break; - } - - return false; -} - -static bool handle_mmio_set_enable_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - return vgic_handle_enable_reg(vcpu->kvm, mmio, offset, - vcpu->vcpu_id, ACCESS_WRITE_SETBIT); -} - -static bool handle_mmio_clear_enable_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - return vgic_handle_enable_reg(vcpu->kvm, mmio, offset, - vcpu->vcpu_id, ACCESS_WRITE_CLEARBIT); -} - -static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - return vgic_handle_set_pending_reg(vcpu->kvm, mmio, offset, - vcpu->vcpu_id); -} - -static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - return vgic_handle_clear_pending_reg(vcpu->kvm, mmio, offset, - vcpu->vcpu_id); -} - -static bool handle_mmio_set_active_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - return vgic_handle_set_active_reg(vcpu->kvm, mmio, offset, - vcpu->vcpu_id); -} - -static bool handle_mmio_clear_active_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - return vgic_handle_clear_active_reg(vcpu->kvm, mmio, offset, - vcpu->vcpu_id); -} - -static bool handle_mmio_priority_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - u32 *reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority, - vcpu->vcpu_id, offset); - vgic_reg_access(mmio, reg, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); - return false; -} - -#define GICD_ITARGETSR_SIZE 32 -#define GICD_CPUTARGETS_BITS 8 -#define GICD_IRQS_PER_ITARGETSR (GICD_ITARGETSR_SIZE / GICD_CPUTARGETS_BITS) -static u32 vgic_get_target_reg(struct kvm *kvm, int irq) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - int i; - u32 val = 0; - - irq -= VGIC_NR_PRIVATE_IRQS; - - for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) - val |= 1 << (dist->irq_spi_cpu[irq + i] + i * 8); - - return val; -} - -static void vgic_set_target_reg(struct kvm *kvm, u32 val, int irq) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - struct kvm_vcpu *vcpu; - int i, c; - unsigned long *bmap; - u32 target; - - irq -= VGIC_NR_PRIVATE_IRQS; - - /* - * Pick the LSB in each byte. This ensures we target exactly - * one vcpu per IRQ. If the byte is null, assume we target - * CPU0. - */ - for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) { - int shift = i * GICD_CPUTARGETS_BITS; - - target = ffs((val >> shift) & 0xffU); - target = target ? (target - 1) : 0; - dist->irq_spi_cpu[irq + i] = target; - kvm_for_each_vcpu(c, vcpu, kvm) { - bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[c]); - if (c == target) - set_bit(irq + i, bmap); - else - clear_bit(irq + i, bmap); - } - } -} - -static bool handle_mmio_target_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - u32 reg; - - /* We treat the banked interrupts targets as read-only */ - if (offset < 32) { - u32 roreg; - - roreg = 1 << vcpu->vcpu_id; - roreg |= roreg << 8; - roreg |= roreg << 16; - - vgic_reg_access(mmio, &roreg, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); - return false; - } - - reg = vgic_get_target_reg(vcpu->kvm, offset & ~3U); - vgic_reg_access(mmio, ®, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); - if (mmio->is_write) { - vgic_set_target_reg(vcpu->kvm, reg, offset & ~3U); - vgic_update_state(vcpu->kvm); - return true; - } - - return false; -} - -static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, phys_addr_t offset) -{ - u32 *reg; - - reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg, - vcpu->vcpu_id, offset >> 1); - - return vgic_handle_cfg_reg(reg, mmio, offset); -} - -static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, phys_addr_t offset) -{ - u32 reg; - - vgic_reg_access(mmio, ®, offset, - ACCESS_READ_RAZ | ACCESS_WRITE_VALUE); - if (mmio->is_write) { - vgic_dispatch_sgi(vcpu, reg); - vgic_update_state(vcpu->kvm); - return true; - } - - return false; -} - -/* Handle reads of GICD_CPENDSGIRn and GICD_SPENDSGIRn */ -static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - int sgi; - int min_sgi = (offset & ~0x3); - int max_sgi = min_sgi + 3; - int vcpu_id = vcpu->vcpu_id; - u32 reg = 0; - - /* Copy source SGIs from distributor side */ - for (sgi = min_sgi; sgi <= max_sgi; sgi++) { - u8 sources = *vgic_get_sgi_sources(dist, vcpu_id, sgi); - - reg |= ((u32)sources) << (8 * (sgi - min_sgi)); - } - - mmio_data_write(mmio, ~0, reg); - return false; -} - -static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset, bool set) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - int sgi; - int min_sgi = (offset & ~0x3); - int max_sgi = min_sgi + 3; - int vcpu_id = vcpu->vcpu_id; - u32 reg; - bool updated = false; - - reg = mmio_data_read(mmio, ~0); - - /* Clear pending SGIs on the distributor */ - for (sgi = min_sgi; sgi <= max_sgi; sgi++) { - u8 mask = reg >> (8 * (sgi - min_sgi)); - u8 *src = vgic_get_sgi_sources(dist, vcpu_id, sgi); - - if (set) { - if ((*src & mask) != mask) - updated = true; - *src |= mask; - } else { - if (*src & mask) - updated = true; - *src &= ~mask; - } - } - - if (updated) - vgic_update_state(vcpu->kvm); - - return updated; -} - -static bool handle_mmio_sgi_set(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - if (!mmio->is_write) - return read_set_clear_sgi_pend_reg(vcpu, mmio, offset); - else - return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, true); -} - -static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - if (!mmio->is_write) - return read_set_clear_sgi_pend_reg(vcpu, mmio, offset); - else - return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, false); -} - -static const struct vgic_io_range vgic_dist_ranges[] = { - { - .base = GIC_DIST_SOFTINT, - .len = 4, - .handle_mmio = handle_mmio_sgi_reg, - }, - { - .base = GIC_DIST_CTRL, - .len = 12, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_misc, - }, - { - .base = GIC_DIST_IGROUP, - .len = VGIC_MAX_IRQS / 8, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_raz_wi, - }, - { - .base = GIC_DIST_ENABLE_SET, - .len = VGIC_MAX_IRQS / 8, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_set_enable_reg, - }, - { - .base = GIC_DIST_ENABLE_CLEAR, - .len = VGIC_MAX_IRQS / 8, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_clear_enable_reg, - }, - { - .base = GIC_DIST_PENDING_SET, - .len = VGIC_MAX_IRQS / 8, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_set_pending_reg, - }, - { - .base = GIC_DIST_PENDING_CLEAR, - .len = VGIC_MAX_IRQS / 8, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_clear_pending_reg, - }, - { - .base = GIC_DIST_ACTIVE_SET, - .len = VGIC_MAX_IRQS / 8, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_set_active_reg, - }, - { - .base = GIC_DIST_ACTIVE_CLEAR, - .len = VGIC_MAX_IRQS / 8, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_clear_active_reg, - }, - { - .base = GIC_DIST_PRI, - .len = VGIC_MAX_IRQS, - .bits_per_irq = 8, - .handle_mmio = handle_mmio_priority_reg, - }, - { - .base = GIC_DIST_TARGET, - .len = VGIC_MAX_IRQS, - .bits_per_irq = 8, - .handle_mmio = handle_mmio_target_reg, - }, - { - .base = GIC_DIST_CONFIG, - .len = VGIC_MAX_IRQS / 4, - .bits_per_irq = 2, - .handle_mmio = handle_mmio_cfg_reg, - }, - { - .base = GIC_DIST_SGI_PENDING_CLEAR, - .len = VGIC_NR_SGIS, - .handle_mmio = handle_mmio_sgi_clear, - }, - { - .base = GIC_DIST_SGI_PENDING_SET, - .len = VGIC_NR_SGIS, - .handle_mmio = handle_mmio_sgi_set, - }, - {} -}; - -static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) -{ - struct kvm *kvm = vcpu->kvm; - struct vgic_dist *dist = &kvm->arch.vgic; - int nrcpus = atomic_read(&kvm->online_vcpus); - u8 target_cpus; - int sgi, mode, c, vcpu_id; - - vcpu_id = vcpu->vcpu_id; - - sgi = reg & 0xf; - target_cpus = (reg >> 16) & 0xff; - mode = (reg >> 24) & 3; - - switch (mode) { - case 0: - if (!target_cpus) - return; - break; - - case 1: - target_cpus = ((1 << nrcpus) - 1) & ~(1 << vcpu_id) & 0xff; - break; - - case 2: - target_cpus = 1 << vcpu_id; - break; - } - - kvm_for_each_vcpu(c, vcpu, kvm) { - if (target_cpus & 1) { - /* Flag the SGI as pending */ - vgic_dist_irq_set_pending(vcpu, sgi); - *vgic_get_sgi_sources(dist, c, sgi) |= 1 << vcpu_id; - kvm_debug("SGI%d from CPU%d to CPU%d\n", - sgi, vcpu_id, c); - } - - target_cpus >>= 1; - } -} - -static bool vgic_v2_queue_sgi(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - unsigned long sources; - int vcpu_id = vcpu->vcpu_id; - int c; - - sources = *vgic_get_sgi_sources(dist, vcpu_id, irq); - - for_each_set_bit(c, &sources, dist->nr_cpus) { - if (vgic_queue_irq(vcpu, c, irq)) - clear_bit(c, &sources); - } - - *vgic_get_sgi_sources(dist, vcpu_id, irq) = sources; - - /* - * If the sources bitmap has been cleared it means that we - * could queue all the SGIs onto link registers (see the - * clear_bit above), and therefore we are done with them in - * our emulated gic and can get rid of them. - */ - if (!sources) { - vgic_dist_irq_clear_pending(vcpu, irq); - vgic_cpu_irq_clear(vcpu, irq); - return true; - } - - return false; -} - -/** - * kvm_vgic_map_resources - Configure global VGIC state before running any VCPUs - * @kvm: pointer to the kvm struct - * - * Map the virtual CPU interface into the VM before running any VCPUs. We - * can't do this at creation time, because user space must first set the - * virtual CPU interface address in the guest physical address space. - */ -static int vgic_v2_map_resources(struct kvm *kvm, - const struct vgic_params *params) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - int ret = 0; - - if (!irqchip_in_kernel(kvm)) - return 0; - - mutex_lock(&kvm->lock); - - if (vgic_ready(kvm)) - goto out; - - if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) || - IS_VGIC_ADDR_UNDEF(dist->vgic_cpu_base)) { - kvm_err("Need to set vgic cpu and dist addresses first\n"); - ret = -ENXIO; - goto out; - } - - vgic_register_kvm_io_dev(kvm, dist->vgic_dist_base, - KVM_VGIC_V2_DIST_SIZE, - vgic_dist_ranges, -1, &dist->dist_iodev); - - /* - * Initialize the vgic if this hasn't already been done on demand by - * accessing the vgic state from userspace. - */ - ret = vgic_init(kvm); - if (ret) { - kvm_err("Unable to allocate maps\n"); - goto out_unregister; - } - - ret = kvm_phys_addr_ioremap(kvm, dist->vgic_cpu_base, - params->vcpu_base, KVM_VGIC_V2_CPU_SIZE, - true); - if (ret) { - kvm_err("Unable to remap VGIC CPU to VCPU\n"); - goto out_unregister; - } - - dist->ready = true; - goto out; - -out_unregister: - kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &dist->dist_iodev.dev); - -out: - if (ret) - kvm_vgic_destroy(kvm); - mutex_unlock(&kvm->lock); - return ret; -} - -static void vgic_v2_add_sgi_source(struct kvm_vcpu *vcpu, int irq, int source) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - *vgic_get_sgi_sources(dist, vcpu->vcpu_id, irq) |= 1 << source; -} - -static int vgic_v2_init_model(struct kvm *kvm) -{ - int i; - - for (i = VGIC_NR_PRIVATE_IRQS; i < kvm->arch.vgic.nr_irqs; i += 4) - vgic_set_target_reg(kvm, 0, i); - - return 0; -} - -void vgic_v2_init_emulation(struct kvm *kvm) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - - dist->vm_ops.queue_sgi = vgic_v2_queue_sgi; - dist->vm_ops.add_sgi_source = vgic_v2_add_sgi_source; - dist->vm_ops.init_model = vgic_v2_init_model; - dist->vm_ops.map_resources = vgic_v2_map_resources; - - kvm->arch.max_vcpus = VGIC_V2_MAX_CPUS; -} - -static bool handle_cpu_mmio_misc(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, phys_addr_t offset) -{ - bool updated = false; - struct vgic_vmcr vmcr; - u32 *vmcr_field; - u32 reg; - - vgic_get_vmcr(vcpu, &vmcr); - - switch (offset & ~0x3) { - case GIC_CPU_CTRL: - vmcr_field = &vmcr.ctlr; - break; - case GIC_CPU_PRIMASK: - vmcr_field = &vmcr.pmr; - break; - case GIC_CPU_BINPOINT: - vmcr_field = &vmcr.bpr; - break; - case GIC_CPU_ALIAS_BINPOINT: - vmcr_field = &vmcr.abpr; - break; - default: - BUG(); - } - - if (!mmio->is_write) { - reg = *vmcr_field; - mmio_data_write(mmio, ~0, reg); - } else { - reg = mmio_data_read(mmio, ~0); - if (reg != *vmcr_field) { - *vmcr_field = reg; - vgic_set_vmcr(vcpu, &vmcr); - updated = true; - } - } - return updated; -} - -static bool handle_mmio_abpr(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, phys_addr_t offset) -{ - return handle_cpu_mmio_misc(vcpu, mmio, GIC_CPU_ALIAS_BINPOINT); -} - -static bool handle_cpu_mmio_ident(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - u32 reg; - - if (mmio->is_write) - return false; - - /* GICC_IIDR */ - reg = (PRODUCT_ID_KVM << 20) | - (GICC_ARCH_VERSION_V2 << 16) | - (IMPLEMENTER_ARM << 0); - mmio_data_write(mmio, ~0, reg); - return false; -} - -/* - * CPU Interface Register accesses - these are not accessed by the VM, but by - * user space for saving and restoring VGIC state. - */ -static const struct vgic_io_range vgic_cpu_ranges[] = { - { - .base = GIC_CPU_CTRL, - .len = 12, - .handle_mmio = handle_cpu_mmio_misc, - }, - { - .base = GIC_CPU_ALIAS_BINPOINT, - .len = 4, - .handle_mmio = handle_mmio_abpr, - }, - { - .base = GIC_CPU_ACTIVEPRIO, - .len = 16, - .handle_mmio = handle_mmio_raz_wi, - }, - { - .base = GIC_CPU_IDENT, - .len = 4, - .handle_mmio = handle_cpu_mmio_ident, - }, -}; - -static int vgic_attr_regs_access(struct kvm_device *dev, - struct kvm_device_attr *attr, - u32 *reg, bool is_write) -{ - const struct vgic_io_range *r = NULL, *ranges; - phys_addr_t offset; - int ret, cpuid, c; - struct kvm_vcpu *vcpu, *tmp_vcpu; - struct vgic_dist *vgic; - struct kvm_exit_mmio mmio; - u32 data; - - offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; - cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >> - KVM_DEV_ARM_VGIC_CPUID_SHIFT; - - mutex_lock(&dev->kvm->lock); - - ret = vgic_init(dev->kvm); - if (ret) - goto out; - - if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) { - ret = -EINVAL; - goto out; - } - - vcpu = kvm_get_vcpu(dev->kvm, cpuid); - vgic = &dev->kvm->arch.vgic; - - mmio.len = 4; - mmio.is_write = is_write; - mmio.data = &data; - if (is_write) - mmio_data_write(&mmio, ~0, *reg); - switch (attr->group) { - case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: - mmio.phys_addr = vgic->vgic_dist_base + offset; - ranges = vgic_dist_ranges; - break; - case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: - mmio.phys_addr = vgic->vgic_cpu_base + offset; - ranges = vgic_cpu_ranges; - break; - default: - BUG(); - } - r = vgic_find_range(ranges, 4, offset); - - if (unlikely(!r || !r->handle_mmio)) { - ret = -ENXIO; - goto out; - } - - - spin_lock(&vgic->lock); - - /* - * Ensure that no other VCPU is running by checking the vcpu->cpu - * field. If no other VPCUs are running we can safely access the VGIC - * state, because even if another VPU is run after this point, that - * VCPU will not touch the vgic state, because it will block on - * getting the vgic->lock in kvm_vgic_sync_hwstate(). - */ - kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm) { - if (unlikely(tmp_vcpu->cpu != -1)) { - ret = -EBUSY; - goto out_vgic_unlock; - } - } - - /* - * Move all pending IRQs from the LRs on all VCPUs so the pending - * state can be properly represented in the register state accessible - * through this API. - */ - kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm) - vgic_unqueue_irqs(tmp_vcpu); - - offset -= r->base; - r->handle_mmio(vcpu, &mmio, offset); - - if (!is_write) - *reg = mmio_data_read(&mmio, ~0); - - ret = 0; -out_vgic_unlock: - spin_unlock(&vgic->lock); -out: - mutex_unlock(&dev->kvm->lock); - return ret; -} - -static int vgic_v2_create(struct kvm_device *dev, u32 type) -{ - return kvm_vgic_create(dev->kvm, type); -} - -static void vgic_v2_destroy(struct kvm_device *dev) -{ - kfree(dev); -} - -static int vgic_v2_set_attr(struct kvm_device *dev, - struct kvm_device_attr *attr) -{ - int ret; - - ret = vgic_set_common_attr(dev, attr); - if (ret != -ENXIO) - return ret; - - switch (attr->group) { - case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: - case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: { - u32 __user *uaddr = (u32 __user *)(long)attr->addr; - u32 reg; - - if (get_user(reg, uaddr)) - return -EFAULT; - - return vgic_attr_regs_access(dev, attr, ®, true); - } - - } - - return -ENXIO; -} - -static int vgic_v2_get_attr(struct kvm_device *dev, - struct kvm_device_attr *attr) -{ - int ret; - - ret = vgic_get_common_attr(dev, attr); - if (ret != -ENXIO) - return ret; - - switch (attr->group) { - case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: - case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: { - u32 __user *uaddr = (u32 __user *)(long)attr->addr; - u32 reg = 0; - - ret = vgic_attr_regs_access(dev, attr, ®, false); - if (ret) - return ret; - return put_user(reg, uaddr); - } - - } - - return -ENXIO; -} - -static int vgic_v2_has_attr(struct kvm_device *dev, - struct kvm_device_attr *attr) -{ - phys_addr_t offset; - - switch (attr->group) { - case KVM_DEV_ARM_VGIC_GRP_ADDR: - switch (attr->attr) { - case KVM_VGIC_V2_ADDR_TYPE_DIST: - case KVM_VGIC_V2_ADDR_TYPE_CPU: - return 0; - } - break; - case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: - offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; - return vgic_has_attr_regs(vgic_dist_ranges, offset); - case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: - offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; - return vgic_has_attr_regs(vgic_cpu_ranges, offset); - case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: - return 0; - case KVM_DEV_ARM_VGIC_GRP_CTRL: - switch (attr->attr) { - case KVM_DEV_ARM_VGIC_CTRL_INIT: - return 0; - } - } - return -ENXIO; -} - -struct kvm_device_ops kvm_arm_vgic_v2_ops = { - .name = "kvm-arm-vgic-v2", - .create = vgic_v2_create, - .destroy = vgic_v2_destroy, - .set_attr = vgic_v2_set_attr, - .get_attr = vgic_v2_get_attr, - .has_attr = vgic_v2_has_attr, -}; diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c deleted file mode 100644 index 334cd7a89106..000000000000 --- a/virt/kvm/arm/vgic-v2.c +++ /dev/null @@ -1,274 +0,0 @@ -/* - * Copyright (C) 2012,2013 ARM Limited, All Rights Reserved. - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <linux/cpu.h> -#include <linux/kvm.h> -#include <linux/kvm_host.h> -#include <linux/interrupt.h> -#include <linux/io.h> - -#include <linux/irqchip/arm-gic.h> - -#include <asm/kvm_emulate.h> -#include <asm/kvm_arm.h> -#include <asm/kvm_mmu.h> - -static struct vgic_lr vgic_v2_get_lr(const struct kvm_vcpu *vcpu, int lr) -{ - struct vgic_lr lr_desc; - u32 val = vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr]; - - lr_desc.irq = val & GICH_LR_VIRTUALID; - if (lr_desc.irq <= 15) - lr_desc.source = (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7; - else - lr_desc.source = 0; - lr_desc.state = 0; - - if (val & GICH_LR_PENDING_BIT) - lr_desc.state |= LR_STATE_PENDING; - if (val & GICH_LR_ACTIVE_BIT) - lr_desc.state |= LR_STATE_ACTIVE; - if (val & GICH_LR_EOI) - lr_desc.state |= LR_EOI_INT; - if (val & GICH_LR_HW) { - lr_desc.state |= LR_HW; - lr_desc.hwirq = (val & GICH_LR_PHYSID_CPUID) >> GICH_LR_PHYSID_CPUID_SHIFT; - } - - return lr_desc; -} - -static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr, - struct vgic_lr lr_desc) -{ - u32 lr_val; - - lr_val = lr_desc.irq; - - if (lr_desc.state & LR_STATE_PENDING) - lr_val |= GICH_LR_PENDING_BIT; - if (lr_desc.state & LR_STATE_ACTIVE) - lr_val |= GICH_LR_ACTIVE_BIT; - if (lr_desc.state & LR_EOI_INT) - lr_val |= GICH_LR_EOI; - - if (lr_desc.state & LR_HW) { - lr_val |= GICH_LR_HW; - lr_val |= (u32)lr_desc.hwirq << GICH_LR_PHYSID_CPUID_SHIFT; - } - - if (lr_desc.irq < VGIC_NR_SGIS) - lr_val |= (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT); - - vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val; - - if (!(lr_desc.state & LR_STATE_MASK)) - vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr |= (1ULL << lr); - else - vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr &= ~(1ULL << lr); -} - -static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu) -{ - return vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr; -} - -static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu) -{ - return vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr; -} - -static void vgic_v2_clear_eisr(struct kvm_vcpu *vcpu) -{ - vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr = 0; -} - -static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu) -{ - u32 misr = vcpu->arch.vgic_cpu.vgic_v2.vgic_misr; - u32 ret = 0; - - if (misr & GICH_MISR_EOI) - ret |= INT_STATUS_EOI; - if (misr & GICH_MISR_U) - ret |= INT_STATUS_UNDERFLOW; - - return ret; -} - -static void vgic_v2_enable_underflow(struct kvm_vcpu *vcpu) -{ - vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr |= GICH_HCR_UIE; -} - -static void vgic_v2_disable_underflow(struct kvm_vcpu *vcpu) -{ - vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr &= ~GICH_HCR_UIE; -} - -static void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) -{ - u32 vmcr = vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr; - - vmcrp->ctlr = (vmcr & GICH_VMCR_CTRL_MASK) >> GICH_VMCR_CTRL_SHIFT; - vmcrp->abpr = (vmcr & GICH_VMCR_ALIAS_BINPOINT_MASK) >> GICH_VMCR_ALIAS_BINPOINT_SHIFT; - vmcrp->bpr = (vmcr & GICH_VMCR_BINPOINT_MASK) >> GICH_VMCR_BINPOINT_SHIFT; - vmcrp->pmr = (vmcr & GICH_VMCR_PRIMASK_MASK) >> GICH_VMCR_PRIMASK_SHIFT; -} - -static void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) -{ - u32 vmcr; - - vmcr = (vmcrp->ctlr << GICH_VMCR_CTRL_SHIFT) & GICH_VMCR_CTRL_MASK; - vmcr |= (vmcrp->abpr << GICH_VMCR_ALIAS_BINPOINT_SHIFT) & GICH_VMCR_ALIAS_BINPOINT_MASK; - vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) & GICH_VMCR_BINPOINT_MASK; - vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) & GICH_VMCR_PRIMASK_MASK; - - vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr; -} - -static void vgic_v2_enable(struct kvm_vcpu *vcpu) -{ - /* - * By forcing VMCR to zero, the GIC will restore the binary - * points to their reset values. Anything else resets to zero - * anyway. - */ - vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0; - vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr = ~0; - - /* Get the show on the road... */ - vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN; -} - -static const struct vgic_ops vgic_v2_ops = { - .get_lr = vgic_v2_get_lr, - .set_lr = vgic_v2_set_lr, - .get_elrsr = vgic_v2_get_elrsr, - .get_eisr = vgic_v2_get_eisr, - .clear_eisr = vgic_v2_clear_eisr, - .get_interrupt_status = vgic_v2_get_interrupt_status, - .enable_underflow = vgic_v2_enable_underflow, - .disable_underflow = vgic_v2_disable_underflow, - .get_vmcr = vgic_v2_get_vmcr, - .set_vmcr = vgic_v2_set_vmcr, - .enable = vgic_v2_enable, -}; - -struct vgic_params __section(.hyp.text) vgic_v2_params; - -static void vgic_cpu_init_lrs(void *params) -{ - struct vgic_params *vgic = params; - int i; - - for (i = 0; i < vgic->nr_lr; i++) - writel_relaxed(0, vgic->vctrl_base + GICH_LR0 + (i * 4)); -} - -/** - * vgic_v2_probe - probe for a GICv2 compatible interrupt controller - * @gic_kvm_info: pointer to the GIC description - * @ops: address of a pointer to the GICv2 operations - * @params: address of a pointer to HW-specific parameters - * - * Returns 0 if a GICv2 has been found, with the low level operations - * in *ops and the HW parameters in *params. Returns an error code - * otherwise. - */ -int vgic_v2_probe(const struct gic_kvm_info *gic_kvm_info, - const struct vgic_ops **ops, - const struct vgic_params **params) -{ - int ret; - struct vgic_params *vgic = &vgic_v2_params; - const struct resource *vctrl_res = &gic_kvm_info->vctrl; - const struct resource *vcpu_res = &gic_kvm_info->vcpu; - - memset(vgic, 0, sizeof(*vgic)); - - if (!gic_kvm_info->maint_irq) { - kvm_err("error getting vgic maintenance irq\n"); - ret = -ENXIO; - goto out; - } - vgic->maint_irq = gic_kvm_info->maint_irq; - - if (!gic_kvm_info->vctrl.start) { - kvm_err("GICH not present in the firmware table\n"); - ret = -ENXIO; - goto out; - } - - vgic->vctrl_base = ioremap(gic_kvm_info->vctrl.start, - resource_size(&gic_kvm_info->vctrl)); - if (!vgic->vctrl_base) { - kvm_err("Cannot ioremap GICH\n"); - ret = -ENOMEM; - goto out; - } - - vgic->nr_lr = readl_relaxed(vgic->vctrl_base + GICH_VTR); - vgic->nr_lr = (vgic->nr_lr & 0x3f) + 1; - - ret = create_hyp_io_mappings(vgic->vctrl_base, - vgic->vctrl_base + resource_size(vctrl_res), - vctrl_res->start); - if (ret) { - kvm_err("Cannot map VCTRL into hyp\n"); - goto out_unmap; - } - - if (!PAGE_ALIGNED(vcpu_res->start)) { - kvm_err("GICV physical address 0x%llx not page aligned\n", - (unsigned long long)vcpu_res->start); - ret = -ENXIO; - goto out_unmap; - } - - if (!PAGE_ALIGNED(resource_size(vcpu_res))) { - kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n", - (unsigned long long)resource_size(vcpu_res), - PAGE_SIZE); - ret = -ENXIO; - goto out_unmap; - } - - vgic->can_emulate_gicv2 = true; - kvm_register_device_ops(&kvm_arm_vgic_v2_ops, KVM_DEV_TYPE_ARM_VGIC_V2); - - vgic->vcpu_base = vcpu_res->start; - - kvm_info("GICH base=0x%llx, GICV base=0x%llx, IRQ=%d\n", - gic_kvm_info->vctrl.start, vgic->vcpu_base, vgic->maint_irq); - - vgic->type = VGIC_V2; - vgic->max_gic_vcpus = VGIC_V2_MAX_CPUS; - - on_each_cpu(vgic_cpu_init_lrs, vgic, 1); - - *ops = &vgic_v2_ops; - *params = vgic; - goto out; - -out_unmap: - iounmap(vgic->vctrl_base); -out: - return ret; -} diff --git a/virt/kvm/arm/vgic-v3-emul.c b/virt/kvm/arm/vgic-v3-emul.c deleted file mode 100644 index e661e7fb9d91..000000000000 --- a/virt/kvm/arm/vgic-v3-emul.c +++ /dev/null @@ -1,1074 +0,0 @@ -/* - * GICv3 distributor and redistributor emulation - * - * GICv3 emulation is currently only supported on a GICv3 host (because - * we rely on the hardware's CPU interface virtualization support), but - * supports both hardware with or without the optional GICv2 backwards - * compatibility features. - * - * Limitations of the emulation: - * (RAZ/WI: read as zero, write ignore, RAO/WI: read as one, write ignore) - * - We do not support LPIs (yet). TYPER.LPIS is reported as 0 and is RAZ/WI. - * - We do not support the message based interrupts (MBIs) triggered by - * writes to the GICD_{SET,CLR}SPI_* registers. TYPER.MBIS is reported as 0. - * - We do not support the (optional) backwards compatibility feature. - * GICD_CTLR.ARE resets to 1 and is RAO/WI. If the _host_ GIC supports - * the compatiblity feature, you can use a GICv2 in the guest, though. - * - We only support a single security state. GICD_CTLR.DS is 1 and is RAO/WI. - * - Priorities are not emulated (same as the GICv2 emulation). Linux - * as a guest is fine with this, because it does not use priorities. - * - We only support Group1 interrupts. Again Linux uses only those. - * - * Copyright (C) 2014 ARM Ltd. - * Author: Andre Przywara <andre.przywara@arm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <linux/cpu.h> -#include <linux/kvm.h> -#include <linux/kvm_host.h> -#include <linux/interrupt.h> - -#include <linux/irqchip/arm-gic-v3.h> -#include <kvm/arm_vgic.h> - -#include <asm/kvm_emulate.h> -#include <asm/kvm_arm.h> -#include <asm/kvm_mmu.h> - -#include "vgic.h" - -static bool handle_mmio_rao_wi(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, phys_addr_t offset) -{ - u32 reg = 0xffffffff; - - vgic_reg_access(mmio, ®, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); - - return false; -} - -static bool handle_mmio_ctlr(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, phys_addr_t offset) -{ - u32 reg = 0; - - /* - * Force ARE and DS to 1, the guest cannot change this. - * For the time being we only support Group1 interrupts. - */ - if (vcpu->kvm->arch.vgic.enabled) - reg = GICD_CTLR_ENABLE_SS_G1; - reg |= GICD_CTLR_ARE_NS | GICD_CTLR_DS; - - vgic_reg_access(mmio, ®, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); - if (mmio->is_write) { - vcpu->kvm->arch.vgic.enabled = !!(reg & GICD_CTLR_ENABLE_SS_G1); - vgic_update_state(vcpu->kvm); - return true; - } - return false; -} - -/* - * As this implementation does not provide compatibility - * with GICv2 (ARE==1), we report zero CPUs in bits [5..7]. - * Also LPIs and MBIs are not supported, so we set the respective bits to 0. - * Also we report at most 2**10=1024 interrupt IDs (to match 1024 SPIs). - */ -#define INTERRUPT_ID_BITS 10 -static bool handle_mmio_typer(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, phys_addr_t offset) -{ - u32 reg; - - reg = (min(vcpu->kvm->arch.vgic.nr_irqs, 1024) >> 5) - 1; - - reg |= (INTERRUPT_ID_BITS - 1) << 19; - - vgic_reg_access(mmio, ®, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); - - return false; -} - -static bool handle_mmio_iidr(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, phys_addr_t offset) -{ - u32 reg; - - reg = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0); - vgic_reg_access(mmio, ®, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); - - return false; -} - -static bool handle_mmio_set_enable_reg_dist(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - if (likely(offset >= VGIC_NR_PRIVATE_IRQS / 8)) - return vgic_handle_enable_reg(vcpu->kvm, mmio, offset, - vcpu->vcpu_id, - ACCESS_WRITE_SETBIT); - - vgic_reg_access(mmio, NULL, offset, - ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); - return false; -} - -static bool handle_mmio_clear_enable_reg_dist(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - if (likely(offset >= VGIC_NR_PRIVATE_IRQS / 8)) - return vgic_handle_enable_reg(vcpu->kvm, mmio, offset, - vcpu->vcpu_id, - ACCESS_WRITE_CLEARBIT); - - vgic_reg_access(mmio, NULL, offset, - ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); - return false; -} - -static bool handle_mmio_set_pending_reg_dist(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - if (likely(offset >= VGIC_NR_PRIVATE_IRQS / 8)) - return vgic_handle_set_pending_reg(vcpu->kvm, mmio, offset, - vcpu->vcpu_id); - - vgic_reg_access(mmio, NULL, offset, - ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); - return false; -} - -static bool handle_mmio_clear_pending_reg_dist(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - if (likely(offset >= VGIC_NR_PRIVATE_IRQS / 8)) - return vgic_handle_clear_pending_reg(vcpu->kvm, mmio, offset, - vcpu->vcpu_id); - - vgic_reg_access(mmio, NULL, offset, - ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); - return false; -} - -static bool handle_mmio_set_active_reg_dist(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - if (likely(offset >= VGIC_NR_PRIVATE_IRQS / 8)) - return vgic_handle_set_active_reg(vcpu->kvm, mmio, offset, - vcpu->vcpu_id); - - vgic_reg_access(mmio, NULL, offset, - ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); - return false; -} - -static bool handle_mmio_clear_active_reg_dist(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - if (likely(offset >= VGIC_NR_PRIVATE_IRQS / 8)) - return vgic_handle_clear_active_reg(vcpu->kvm, mmio, offset, - vcpu->vcpu_id); - - vgic_reg_access(mmio, NULL, offset, - ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); - return false; -} - -static bool handle_mmio_priority_reg_dist(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - u32 *reg; - - if (unlikely(offset < VGIC_NR_PRIVATE_IRQS)) { - vgic_reg_access(mmio, NULL, offset, - ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); - return false; - } - - reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority, - vcpu->vcpu_id, offset); - vgic_reg_access(mmio, reg, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); - return false; -} - -static bool handle_mmio_cfg_reg_dist(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - u32 *reg; - - if (unlikely(offset < VGIC_NR_PRIVATE_IRQS / 4)) { - vgic_reg_access(mmio, NULL, offset, - ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); - return false; - } - - reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg, - vcpu->vcpu_id, offset >> 1); - - return vgic_handle_cfg_reg(reg, mmio, offset); -} - -/* - * We use a compressed version of the MPIDR (all 32 bits in one 32-bit word) - * when we store the target MPIDR written by the guest. - */ -static u32 compress_mpidr(unsigned long mpidr) -{ - u32 ret; - - ret = MPIDR_AFFINITY_LEVEL(mpidr, 0); - ret |= MPIDR_AFFINITY_LEVEL(mpidr, 1) << 8; - ret |= MPIDR_AFFINITY_LEVEL(mpidr, 2) << 16; - ret |= MPIDR_AFFINITY_LEVEL(mpidr, 3) << 24; - - return ret; -} - -static unsigned long uncompress_mpidr(u32 value) -{ - unsigned long mpidr; - - mpidr = ((value >> 0) & 0xFF) << MPIDR_LEVEL_SHIFT(0); - mpidr |= ((value >> 8) & 0xFF) << MPIDR_LEVEL_SHIFT(1); - mpidr |= ((value >> 16) & 0xFF) << MPIDR_LEVEL_SHIFT(2); - mpidr |= (u64)((value >> 24) & 0xFF) << MPIDR_LEVEL_SHIFT(3); - - return mpidr; -} - -/* - * Lookup the given MPIDR value to get the vcpu_id (if there is one) - * and store that in the irq_spi_cpu[] array. - * This limits the number of VCPUs to 255 for now, extending the data - * type (or storing kvm_vcpu pointers) should lift the limit. - * Store the original MPIDR value in an extra array to support read-as-written. - * Unallocated MPIDRs are translated to a special value and caught - * before any array accesses. - */ -static bool handle_mmio_route_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - struct kvm *kvm = vcpu->kvm; - struct vgic_dist *dist = &kvm->arch.vgic; - int spi; - u32 reg; - int vcpu_id; - unsigned long *bmap, mpidr; - - /* - * The upper 32 bits of each 64 bit register are zero, - * as we don't support Aff3. - */ - if ((offset & 4)) { - vgic_reg_access(mmio, NULL, offset, - ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); - return false; - } - - /* This region only covers SPIs, so no handling of private IRQs here. */ - spi = offset / 8; - - /* get the stored MPIDR for this IRQ */ - mpidr = uncompress_mpidr(dist->irq_spi_mpidr[spi]); - reg = mpidr; - - vgic_reg_access(mmio, ®, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); - - if (!mmio->is_write) - return false; - - /* - * Now clear the currently assigned vCPU from the map, making room - * for the new one to be written below - */ - vcpu = kvm_mpidr_to_vcpu(kvm, mpidr); - if (likely(vcpu)) { - vcpu_id = vcpu->vcpu_id; - bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]); - __clear_bit(spi, bmap); - } - - dist->irq_spi_mpidr[spi] = compress_mpidr(reg); - vcpu = kvm_mpidr_to_vcpu(kvm, reg & MPIDR_HWID_BITMASK); - - /* - * The spec says that non-existent MPIDR values should not be - * forwarded to any existent (v)CPU, but should be able to become - * pending anyway. We simply keep the irq_spi_target[] array empty, so - * the interrupt will never be injected. - * irq_spi_cpu[irq] gets a magic value in this case. - */ - if (likely(vcpu)) { - vcpu_id = vcpu->vcpu_id; - dist->irq_spi_cpu[spi] = vcpu_id; - bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]); - __set_bit(spi, bmap); - } else { - dist->irq_spi_cpu[spi] = VCPU_NOT_ALLOCATED; - } - - vgic_update_state(kvm); - - return true; -} - -/* - * We should be careful about promising too much when a guest reads - * this register. Don't claim to be like any hardware implementation, - * but just report the GIC as version 3 - which is what a Linux guest - * would check. - */ -static bool handle_mmio_idregs(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - u32 reg = 0; - - switch (offset + GICD_IDREGS) { - case GICD_PIDR2: - reg = 0x3b; - break; - } - - vgic_reg_access(mmio, ®, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); - - return false; -} - -static const struct vgic_io_range vgic_v3_dist_ranges[] = { - { - .base = GICD_CTLR, - .len = 0x04, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_ctlr, - }, - { - .base = GICD_TYPER, - .len = 0x04, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_typer, - }, - { - .base = GICD_IIDR, - .len = 0x04, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_iidr, - }, - { - /* this register is optional, it is RAZ/WI if not implemented */ - .base = GICD_STATUSR, - .len = 0x04, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_raz_wi, - }, - { - /* this write only register is WI when TYPER.MBIS=0 */ - .base = GICD_SETSPI_NSR, - .len = 0x04, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_raz_wi, - }, - { - /* this write only register is WI when TYPER.MBIS=0 */ - .base = GICD_CLRSPI_NSR, - .len = 0x04, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_raz_wi, - }, - { - /* this is RAZ/WI when DS=1 */ - .base = GICD_SETSPI_SR, - .len = 0x04, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_raz_wi, - }, - { - /* this is RAZ/WI when DS=1 */ - .base = GICD_CLRSPI_SR, - .len = 0x04, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_raz_wi, - }, - { - .base = GICD_IGROUPR, - .len = 0x80, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_rao_wi, - }, - { - .base = GICD_ISENABLER, - .len = 0x80, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_set_enable_reg_dist, - }, - { - .base = GICD_ICENABLER, - .len = 0x80, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_clear_enable_reg_dist, - }, - { - .base = GICD_ISPENDR, - .len = 0x80, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_set_pending_reg_dist, - }, - { - .base = GICD_ICPENDR, - .len = 0x80, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_clear_pending_reg_dist, - }, - { - .base = GICD_ISACTIVER, - .len = 0x80, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_set_active_reg_dist, - }, - { - .base = GICD_ICACTIVER, - .len = 0x80, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_clear_active_reg_dist, - }, - { - .base = GICD_IPRIORITYR, - .len = 0x400, - .bits_per_irq = 8, - .handle_mmio = handle_mmio_priority_reg_dist, - }, - { - /* TARGETSRn is RES0 when ARE=1 */ - .base = GICD_ITARGETSR, - .len = 0x400, - .bits_per_irq = 8, - .handle_mmio = handle_mmio_raz_wi, - }, - { - .base = GICD_ICFGR, - .len = 0x100, - .bits_per_irq = 2, - .handle_mmio = handle_mmio_cfg_reg_dist, - }, - { - /* this is RAZ/WI when DS=1 */ - .base = GICD_IGRPMODR, - .len = 0x80, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_raz_wi, - }, - { - /* this is RAZ/WI when DS=1 */ - .base = GICD_NSACR, - .len = 0x100, - .bits_per_irq = 2, - .handle_mmio = handle_mmio_raz_wi, - }, - { - /* this is RAZ/WI when ARE=1 */ - .base = GICD_SGIR, - .len = 0x04, - .handle_mmio = handle_mmio_raz_wi, - }, - { - /* this is RAZ/WI when ARE=1 */ - .base = GICD_CPENDSGIR, - .len = 0x10, - .handle_mmio = handle_mmio_raz_wi, - }, - { - /* this is RAZ/WI when ARE=1 */ - .base = GICD_SPENDSGIR, - .len = 0x10, - .handle_mmio = handle_mmio_raz_wi, - }, - { - .base = GICD_IROUTER + 0x100, - .len = 0x1ee0, - .bits_per_irq = 64, - .handle_mmio = handle_mmio_route_reg, - }, - { - .base = GICD_IDREGS, - .len = 0x30, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_idregs, - }, - {}, -}; - -static bool handle_mmio_ctlr_redist(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - /* since we don't support LPIs, this register is zero for now */ - vgic_reg_access(mmio, NULL, offset, - ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); - return false; -} - -static bool handle_mmio_typer_redist(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - u32 reg; - u64 mpidr; - struct kvm_vcpu *redist_vcpu = mmio->private; - int target_vcpu_id = redist_vcpu->vcpu_id; - - /* the upper 32 bits contain the affinity value */ - if ((offset & ~3) == 4) { - mpidr = kvm_vcpu_get_mpidr_aff(redist_vcpu); - reg = compress_mpidr(mpidr); - - vgic_reg_access(mmio, ®, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); - return false; - } - - reg = redist_vcpu->vcpu_id << 8; - if (target_vcpu_id == atomic_read(&vcpu->kvm->online_vcpus) - 1) - reg |= GICR_TYPER_LAST; - vgic_reg_access(mmio, ®, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); - return false; -} - -static bool handle_mmio_set_enable_reg_redist(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - struct kvm_vcpu *redist_vcpu = mmio->private; - - return vgic_handle_enable_reg(vcpu->kvm, mmio, offset, - redist_vcpu->vcpu_id, - ACCESS_WRITE_SETBIT); -} - -static bool handle_mmio_clear_enable_reg_redist(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - struct kvm_vcpu *redist_vcpu = mmio->private; - - return vgic_handle_enable_reg(vcpu->kvm, mmio, offset, - redist_vcpu->vcpu_id, - ACCESS_WRITE_CLEARBIT); -} - -static bool handle_mmio_set_active_reg_redist(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - struct kvm_vcpu *redist_vcpu = mmio->private; - - return vgic_handle_set_active_reg(vcpu->kvm, mmio, offset, - redist_vcpu->vcpu_id); -} - -static bool handle_mmio_clear_active_reg_redist(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - struct kvm_vcpu *redist_vcpu = mmio->private; - - return vgic_handle_clear_active_reg(vcpu->kvm, mmio, offset, - redist_vcpu->vcpu_id); -} - -static bool handle_mmio_set_pending_reg_redist(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - struct kvm_vcpu *redist_vcpu = mmio->private; - - return vgic_handle_set_pending_reg(vcpu->kvm, mmio, offset, - redist_vcpu->vcpu_id); -} - -static bool handle_mmio_clear_pending_reg_redist(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - struct kvm_vcpu *redist_vcpu = mmio->private; - - return vgic_handle_clear_pending_reg(vcpu->kvm, mmio, offset, - redist_vcpu->vcpu_id); -} - -static bool handle_mmio_priority_reg_redist(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - struct kvm_vcpu *redist_vcpu = mmio->private; - u32 *reg; - - reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority, - redist_vcpu->vcpu_id, offset); - vgic_reg_access(mmio, reg, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); - return false; -} - -static bool handle_mmio_cfg_reg_redist(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - struct kvm_vcpu *redist_vcpu = mmio->private; - - u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg, - redist_vcpu->vcpu_id, offset >> 1); - - return vgic_handle_cfg_reg(reg, mmio, offset); -} - -#define SGI_base(x) ((x) + SZ_64K) - -static const struct vgic_io_range vgic_redist_ranges[] = { - { - .base = GICR_CTLR, - .len = 0x04, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_ctlr_redist, - }, - { - .base = GICR_TYPER, - .len = 0x08, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_typer_redist, - }, - { - .base = GICR_IIDR, - .len = 0x04, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_iidr, - }, - { - .base = GICR_WAKER, - .len = 0x04, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_raz_wi, - }, - { - .base = GICR_IDREGS, - .len = 0x30, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_idregs, - }, - { - .base = SGI_base(GICR_IGROUPR0), - .len = 0x04, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_rao_wi, - }, - { - .base = SGI_base(GICR_ISENABLER0), - .len = 0x04, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_set_enable_reg_redist, - }, - { - .base = SGI_base(GICR_ICENABLER0), - .len = 0x04, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_clear_enable_reg_redist, - }, - { - .base = SGI_base(GICR_ISPENDR0), - .len = 0x04, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_set_pending_reg_redist, - }, - { - .base = SGI_base(GICR_ICPENDR0), - .len = 0x04, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_clear_pending_reg_redist, - }, - { - .base = SGI_base(GICR_ISACTIVER0), - .len = 0x04, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_set_active_reg_redist, - }, - { - .base = SGI_base(GICR_ICACTIVER0), - .len = 0x04, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_clear_active_reg_redist, - }, - { - .base = SGI_base(GICR_IPRIORITYR0), - .len = 0x20, - .bits_per_irq = 8, - .handle_mmio = handle_mmio_priority_reg_redist, - }, - { - .base = SGI_base(GICR_ICFGR0), - .len = 0x08, - .bits_per_irq = 2, - .handle_mmio = handle_mmio_cfg_reg_redist, - }, - { - .base = SGI_base(GICR_IGRPMODR0), - .len = 0x04, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_raz_wi, - }, - { - .base = SGI_base(GICR_NSACR), - .len = 0x04, - .handle_mmio = handle_mmio_raz_wi, - }, - {}, -}; - -static bool vgic_v3_queue_sgi(struct kvm_vcpu *vcpu, int irq) -{ - if (vgic_queue_irq(vcpu, 0, irq)) { - vgic_dist_irq_clear_pending(vcpu, irq); - vgic_cpu_irq_clear(vcpu, irq); - return true; - } - - return false; -} - -static int vgic_v3_map_resources(struct kvm *kvm, - const struct vgic_params *params) -{ - int ret = 0; - struct vgic_dist *dist = &kvm->arch.vgic; - gpa_t rdbase = dist->vgic_redist_base; - struct vgic_io_device *iodevs = NULL; - int i; - - if (!irqchip_in_kernel(kvm)) - return 0; - - mutex_lock(&kvm->lock); - - if (vgic_ready(kvm)) - goto out; - - if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) || - IS_VGIC_ADDR_UNDEF(dist->vgic_redist_base)) { - kvm_err("Need to set vgic distributor addresses first\n"); - ret = -ENXIO; - goto out; - } - - /* - * For a VGICv3 we require the userland to explicitly initialize - * the VGIC before we need to use it. - */ - if (!vgic_initialized(kvm)) { - ret = -EBUSY; - goto out; - } - - ret = vgic_register_kvm_io_dev(kvm, dist->vgic_dist_base, - GIC_V3_DIST_SIZE, vgic_v3_dist_ranges, - -1, &dist->dist_iodev); - if (ret) - goto out; - - iodevs = kcalloc(dist->nr_cpus, sizeof(iodevs[0]), GFP_KERNEL); - if (!iodevs) { - ret = -ENOMEM; - goto out_unregister; - } - - for (i = 0; i < dist->nr_cpus; i++) { - ret = vgic_register_kvm_io_dev(kvm, rdbase, - SZ_128K, vgic_redist_ranges, - i, &iodevs[i]); - if (ret) - goto out_unregister; - rdbase += GIC_V3_REDIST_SIZE; - } - - dist->redist_iodevs = iodevs; - dist->ready = true; - goto out; - -out_unregister: - kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &dist->dist_iodev.dev); - if (iodevs) { - for (i = 0; i < dist->nr_cpus; i++) { - if (iodevs[i].dev.ops) - kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, - &iodevs[i].dev); - } - } - -out: - if (ret) - kvm_vgic_destroy(kvm); - mutex_unlock(&kvm->lock); - return ret; -} - -static int vgic_v3_init_model(struct kvm *kvm) -{ - int i; - u32 mpidr; - struct vgic_dist *dist = &kvm->arch.vgic; - int nr_spis = dist->nr_irqs - VGIC_NR_PRIVATE_IRQS; - - dist->irq_spi_mpidr = kcalloc(nr_spis, sizeof(dist->irq_spi_mpidr[0]), - GFP_KERNEL); - - if (!dist->irq_spi_mpidr) - return -ENOMEM; - - /* Initialize the target VCPUs for each IRQ to VCPU 0 */ - mpidr = compress_mpidr(kvm_vcpu_get_mpidr_aff(kvm_get_vcpu(kvm, 0))); - for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i++) { - dist->irq_spi_cpu[i - VGIC_NR_PRIVATE_IRQS] = 0; - dist->irq_spi_mpidr[i - VGIC_NR_PRIVATE_IRQS] = mpidr; - vgic_bitmap_set_irq_val(dist->irq_spi_target, 0, i, 1); - } - - return 0; -} - -/* GICv3 does not keep track of SGI sources anymore. */ -static void vgic_v3_add_sgi_source(struct kvm_vcpu *vcpu, int irq, int source) -{ -} - -void vgic_v3_init_emulation(struct kvm *kvm) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - - dist->vm_ops.queue_sgi = vgic_v3_queue_sgi; - dist->vm_ops.add_sgi_source = vgic_v3_add_sgi_source; - dist->vm_ops.init_model = vgic_v3_init_model; - dist->vm_ops.map_resources = vgic_v3_map_resources; - - kvm->arch.max_vcpus = KVM_MAX_VCPUS; -} - -/* - * Compare a given affinity (level 1-3 and a level 0 mask, from the SGI - * generation register ICC_SGI1R_EL1) with a given VCPU. - * If the VCPU's MPIDR matches, return the level0 affinity, otherwise - * return -1. - */ -static int match_mpidr(u64 sgi_aff, u16 sgi_cpu_mask, struct kvm_vcpu *vcpu) -{ - unsigned long affinity; - int level0; - - /* - * Split the current VCPU's MPIDR into affinity level 0 and the - * rest as this is what we have to compare against. - */ - affinity = kvm_vcpu_get_mpidr_aff(vcpu); - level0 = MPIDR_AFFINITY_LEVEL(affinity, 0); - affinity &= ~MPIDR_LEVEL_MASK; - - /* bail out if the upper three levels don't match */ - if (sgi_aff != affinity) - return -1; - - /* Is this VCPU's bit set in the mask ? */ - if (!(sgi_cpu_mask & BIT(level0))) - return -1; - - return level0; -} - -#define SGI_AFFINITY_LEVEL(reg, level) \ - ((((reg) & ICC_SGI1R_AFFINITY_## level ##_MASK) \ - >> ICC_SGI1R_AFFINITY_## level ##_SHIFT) << MPIDR_LEVEL_SHIFT(level)) - -/** - * vgic_v3_dispatch_sgi - handle SGI requests from VCPUs - * @vcpu: The VCPU requesting a SGI - * @reg: The value written into the ICC_SGI1R_EL1 register by that VCPU - * - * With GICv3 (and ARE=1) CPUs trigger SGIs by writing to a system register. - * This will trap in sys_regs.c and call this function. - * This ICC_SGI1R_EL1 register contains the upper three affinity levels of the - * target processors as well as a bitmask of 16 Aff0 CPUs. - * If the interrupt routing mode bit is not set, we iterate over all VCPUs to - * check for matching ones. If this bit is set, we signal all, but not the - * calling VCPU. - */ -void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg) -{ - struct kvm *kvm = vcpu->kvm; - struct kvm_vcpu *c_vcpu; - struct vgic_dist *dist = &kvm->arch.vgic; - u16 target_cpus; - u64 mpidr; - int sgi, c; - int vcpu_id = vcpu->vcpu_id; - bool broadcast; - int updated = 0; - - sgi = (reg & ICC_SGI1R_SGI_ID_MASK) >> ICC_SGI1R_SGI_ID_SHIFT; - broadcast = reg & BIT(ICC_SGI1R_IRQ_ROUTING_MODE_BIT); - target_cpus = (reg & ICC_SGI1R_TARGET_LIST_MASK) >> ICC_SGI1R_TARGET_LIST_SHIFT; - mpidr = SGI_AFFINITY_LEVEL(reg, 3); - mpidr |= SGI_AFFINITY_LEVEL(reg, 2); - mpidr |= SGI_AFFINITY_LEVEL(reg, 1); - - /* - * We take the dist lock here, because we come from the sysregs - * code path and not from the MMIO one (which already takes the lock). - */ - spin_lock(&dist->lock); - - /* - * We iterate over all VCPUs to find the MPIDRs matching the request. - * If we have handled one CPU, we clear it's bit to detect early - * if we are already finished. This avoids iterating through all - * VCPUs when most of the times we just signal a single VCPU. - */ - kvm_for_each_vcpu(c, c_vcpu, kvm) { - - /* Exit early if we have dealt with all requested CPUs */ - if (!broadcast && target_cpus == 0) - break; - - /* Don't signal the calling VCPU */ - if (broadcast && c == vcpu_id) - continue; - - if (!broadcast) { - int level0; - - level0 = match_mpidr(mpidr, target_cpus, c_vcpu); - if (level0 == -1) - continue; - - /* remove this matching VCPU from the mask */ - target_cpus &= ~BIT(level0); - } - - /* Flag the SGI as pending */ - vgic_dist_irq_set_pending(c_vcpu, sgi); - updated = 1; - kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c); - } - if (updated) - vgic_update_state(vcpu->kvm); - spin_unlock(&dist->lock); - if (updated) - vgic_kick_vcpus(vcpu->kvm); -} - -static int vgic_v3_create(struct kvm_device *dev, u32 type) -{ - return kvm_vgic_create(dev->kvm, type); -} - -static void vgic_v3_destroy(struct kvm_device *dev) -{ - kfree(dev); -} - -static int vgic_v3_set_attr(struct kvm_device *dev, - struct kvm_device_attr *attr) -{ - int ret; - - ret = vgic_set_common_attr(dev, attr); - if (ret != -ENXIO) - return ret; - - switch (attr->group) { - case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: - case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: - return -ENXIO; - } - - return -ENXIO; -} - -static int vgic_v3_get_attr(struct kvm_device *dev, - struct kvm_device_attr *attr) -{ - int ret; - - ret = vgic_get_common_attr(dev, attr); - if (ret != -ENXIO) - return ret; - - switch (attr->group) { - case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: - case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: - return -ENXIO; - } - - return -ENXIO; -} - -static int vgic_v3_has_attr(struct kvm_device *dev, - struct kvm_device_attr *attr) -{ - switch (attr->group) { - case KVM_DEV_ARM_VGIC_GRP_ADDR: - switch (attr->attr) { - case KVM_VGIC_V2_ADDR_TYPE_DIST: - case KVM_VGIC_V2_ADDR_TYPE_CPU: - return -ENXIO; - case KVM_VGIC_V3_ADDR_TYPE_DIST: - case KVM_VGIC_V3_ADDR_TYPE_REDIST: - return 0; - } - break; - case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: - case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: - return -ENXIO; - case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: - return 0; - case KVM_DEV_ARM_VGIC_GRP_CTRL: - switch (attr->attr) { - case KVM_DEV_ARM_VGIC_CTRL_INIT: - return 0; - } - } - return -ENXIO; -} - -struct kvm_device_ops kvm_arm_vgic_v3_ops = { - .name = "kvm-arm-vgic-v3", - .create = vgic_v3_create, - .destroy = vgic_v3_destroy, - .set_attr = vgic_v3_set_attr, - .get_attr = vgic_v3_get_attr, - .has_attr = vgic_v3_has_attr, -}; diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c deleted file mode 100644 index 75b02fa86436..000000000000 --- a/virt/kvm/arm/vgic-v3.c +++ /dev/null @@ -1,279 +0,0 @@ -/* - * Copyright (C) 2013 ARM Limited, All Rights Reserved. - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <linux/cpu.h> -#include <linux/kvm.h> -#include <linux/kvm_host.h> -#include <linux/interrupt.h> -#include <linux/io.h> - -#include <linux/irqchip/arm-gic-v3.h> -#include <linux/irqchip/arm-gic-common.h> - -#include <asm/kvm_emulate.h> -#include <asm/kvm_arm.h> -#include <asm/kvm_asm.h> -#include <asm/kvm_mmu.h> - -static u32 ich_vtr_el2; - -static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr) -{ - struct vgic_lr lr_desc; - u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr]; - - if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) - lr_desc.irq = val & ICH_LR_VIRTUAL_ID_MASK; - else - lr_desc.irq = val & GICH_LR_VIRTUALID; - - lr_desc.source = 0; - if (lr_desc.irq <= 15 && - vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) - lr_desc.source = (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7; - - lr_desc.state = 0; - - if (val & ICH_LR_PENDING_BIT) - lr_desc.state |= LR_STATE_PENDING; - if (val & ICH_LR_ACTIVE_BIT) - lr_desc.state |= LR_STATE_ACTIVE; - if (val & ICH_LR_EOI) - lr_desc.state |= LR_EOI_INT; - if (val & ICH_LR_HW) { - lr_desc.state |= LR_HW; - lr_desc.hwirq = (val >> ICH_LR_PHYS_ID_SHIFT) & GENMASK(9, 0); - } - - return lr_desc; -} - -static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr, - struct vgic_lr lr_desc) -{ - u64 lr_val; - - lr_val = lr_desc.irq; - - /* - * Currently all guest IRQs are Group1, as Group0 would result - * in a FIQ in the guest, which it wouldn't expect. - * Eventually we want to make this configurable, so we may revisit - * this in the future. - */ - switch (vcpu->kvm->arch.vgic.vgic_model) { - case KVM_DEV_TYPE_ARM_VGIC_V3: - lr_val |= ICH_LR_GROUP; - break; - case KVM_DEV_TYPE_ARM_VGIC_V2: - if (lr_desc.irq < VGIC_NR_SGIS) - lr_val |= (u32)lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT; - break; - default: - BUG(); - } - - if (lr_desc.state & LR_STATE_PENDING) - lr_val |= ICH_LR_PENDING_BIT; - if (lr_desc.state & LR_STATE_ACTIVE) - lr_val |= ICH_LR_ACTIVE_BIT; - if (lr_desc.state & LR_EOI_INT) - lr_val |= ICH_LR_EOI; - if (lr_desc.state & LR_HW) { - lr_val |= ICH_LR_HW; - lr_val |= ((u64)lr_desc.hwirq) << ICH_LR_PHYS_ID_SHIFT; - } - - vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr] = lr_val; - - if (!(lr_desc.state & LR_STATE_MASK)) - vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr); - else - vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr &= ~(1U << lr); -} - -static u64 vgic_v3_get_elrsr(const struct kvm_vcpu *vcpu) -{ - return vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr; -} - -static u64 vgic_v3_get_eisr(const struct kvm_vcpu *vcpu) -{ - return vcpu->arch.vgic_cpu.vgic_v3.vgic_eisr; -} - -static void vgic_v3_clear_eisr(struct kvm_vcpu *vcpu) -{ - vcpu->arch.vgic_cpu.vgic_v3.vgic_eisr = 0; -} - -static u32 vgic_v3_get_interrupt_status(const struct kvm_vcpu *vcpu) -{ - u32 misr = vcpu->arch.vgic_cpu.vgic_v3.vgic_misr; - u32 ret = 0; - - if (misr & ICH_MISR_EOI) - ret |= INT_STATUS_EOI; - if (misr & ICH_MISR_U) - ret |= INT_STATUS_UNDERFLOW; - - return ret; -} - -static void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) -{ - u32 vmcr = vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr; - - vmcrp->ctlr = (vmcr & ICH_VMCR_CTLR_MASK) >> ICH_VMCR_CTLR_SHIFT; - vmcrp->abpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT; - vmcrp->bpr = (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT; - vmcrp->pmr = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT; -} - -static void vgic_v3_enable_underflow(struct kvm_vcpu *vcpu) -{ - vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr |= ICH_HCR_UIE; -} - -static void vgic_v3_disable_underflow(struct kvm_vcpu *vcpu) -{ - vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr &= ~ICH_HCR_UIE; -} - -static void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) -{ - u32 vmcr; - - vmcr = (vmcrp->ctlr << ICH_VMCR_CTLR_SHIFT) & ICH_VMCR_CTLR_MASK; - vmcr |= (vmcrp->abpr << ICH_VMCR_BPR1_SHIFT) & ICH_VMCR_BPR1_MASK; - vmcr |= (vmcrp->bpr << ICH_VMCR_BPR0_SHIFT) & ICH_VMCR_BPR0_MASK; - vmcr |= (vmcrp->pmr << ICH_VMCR_PMR_SHIFT) & ICH_VMCR_PMR_MASK; - - vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = vmcr; -} - -static void vgic_v3_enable(struct kvm_vcpu *vcpu) -{ - struct vgic_v3_cpu_if *vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3; - - /* - * By forcing VMCR to zero, the GIC will restore the binary - * points to their reset values. Anything else resets to zero - * anyway. - */ - vgic_v3->vgic_vmcr = 0; - vgic_v3->vgic_elrsr = ~0; - - /* - * If we are emulating a GICv3, we do it in an non-GICv2-compatible - * way, so we force SRE to 1 to demonstrate this to the guest. - * This goes with the spec allowing the value to be RAO/WI. - */ - if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) - vgic_v3->vgic_sre = ICC_SRE_EL1_SRE; - else - vgic_v3->vgic_sre = 0; - - /* Get the show on the road... */ - vgic_v3->vgic_hcr = ICH_HCR_EN; -} - -static const struct vgic_ops vgic_v3_ops = { - .get_lr = vgic_v3_get_lr, - .set_lr = vgic_v3_set_lr, - .get_elrsr = vgic_v3_get_elrsr, - .get_eisr = vgic_v3_get_eisr, - .clear_eisr = vgic_v3_clear_eisr, - .get_interrupt_status = vgic_v3_get_interrupt_status, - .enable_underflow = vgic_v3_enable_underflow, - .disable_underflow = vgic_v3_disable_underflow, - .get_vmcr = vgic_v3_get_vmcr, - .set_vmcr = vgic_v3_set_vmcr, - .enable = vgic_v3_enable, -}; - -static struct vgic_params vgic_v3_params; - -static void vgic_cpu_init_lrs(void *params) -{ - kvm_call_hyp(__vgic_v3_init_lrs); -} - -/** - * vgic_v3_probe - probe for a GICv3 compatible interrupt controller - * @gic_kvm_info: pointer to the GIC description - * @ops: address of a pointer to the GICv3 operations - * @params: address of a pointer to HW-specific parameters - * - * Returns 0 if a GICv3 has been found, with the low level operations - * in *ops and the HW parameters in *params. Returns an error code - * otherwise. - */ -int vgic_v3_probe(const struct gic_kvm_info *gic_kvm_info, - const struct vgic_ops **ops, - const struct vgic_params **params) -{ - int ret = 0; - struct vgic_params *vgic = &vgic_v3_params; - const struct resource *vcpu_res = &gic_kvm_info->vcpu; - - vgic->maint_irq = gic_kvm_info->maint_irq; - - ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2); - - /* - * The ListRegs field is 5 bits, but there is a architectural - * maximum of 16 list registers. Just ignore bit 4... - */ - vgic->nr_lr = (ich_vtr_el2 & 0xf) + 1; - vgic->can_emulate_gicv2 = false; - - if (!vcpu_res->start) { - kvm_info("GICv3: no GICV resource entry\n"); - vgic->vcpu_base = 0; - } else if (!PAGE_ALIGNED(vcpu_res->start)) { - pr_warn("GICV physical address 0x%llx not page aligned\n", - (unsigned long long)vcpu_res->start); - vgic->vcpu_base = 0; - } else if (!PAGE_ALIGNED(resource_size(vcpu_res))) { - pr_warn("GICV size 0x%llx not a multiple of page size 0x%lx\n", - (unsigned long long)resource_size(vcpu_res), - PAGE_SIZE); - } else { - vgic->vcpu_base = vcpu_res->start; - vgic->can_emulate_gicv2 = true; - kvm_register_device_ops(&kvm_arm_vgic_v2_ops, - KVM_DEV_TYPE_ARM_VGIC_V2); - } - if (vgic->vcpu_base == 0) - kvm_info("disabling GICv2 emulation\n"); - kvm_register_device_ops(&kvm_arm_vgic_v3_ops, KVM_DEV_TYPE_ARM_VGIC_V3); - - vgic->vctrl_base = NULL; - vgic->type = VGIC_V3; - vgic->max_gic_vcpus = VGIC_V3_MAX_CPUS; - - kvm_info("GICV base=0x%llx, IRQ=%d\n", - vgic->vcpu_base, vgic->maint_irq); - - on_each_cpu(vgic_cpu_init_lrs, vgic, 1); - - *ops = &vgic_v3_ops; - *params = vgic; - - return ret; -} diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c deleted file mode 100644 index 67cb5e948be2..000000000000 --- a/virt/kvm/arm/vgic.c +++ /dev/null @@ -1,2417 +0,0 @@ -/* - * Copyright (C) 2012 ARM Ltd. - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <linux/cpu.h> -#include <linux/kvm.h> -#include <linux/kvm_host.h> -#include <linux/interrupt.h> -#include <linux/io.h> -#include <linux/irq.h> -#include <linux/rculist.h> -#include <linux/uaccess.h> - -#include <asm/kvm_emulate.h> -#include <asm/kvm_arm.h> -#include <asm/kvm_mmu.h> -#include <trace/events/kvm.h> -#include <asm/kvm.h> -#include <kvm/iodev.h> -#include <linux/irqchip/arm-gic-common.h> - -#define CREATE_TRACE_POINTS -#include "trace.h" - -/* - * How the whole thing works (courtesy of Christoffer Dall): - * - * - At any time, the dist->irq_pending_on_cpu is the oracle that knows if - * something is pending on the CPU interface. - * - Interrupts that are pending on the distributor are stored on the - * vgic.irq_pending vgic bitmap (this bitmap is updated by both user land - * ioctls and guest mmio ops, and other in-kernel peripherals such as the - * arch. timers). - * - Every time the bitmap changes, the irq_pending_on_cpu oracle is - * recalculated - * - To calculate the oracle, we need info for each cpu from - * compute_pending_for_cpu, which considers: - * - PPI: dist->irq_pending & dist->irq_enable - * - SPI: dist->irq_pending & dist->irq_enable & dist->irq_spi_target - * - irq_spi_target is a 'formatted' version of the GICD_ITARGETSRn - * registers, stored on each vcpu. We only keep one bit of - * information per interrupt, making sure that only one vcpu can - * accept the interrupt. - * - If any of the above state changes, we must recalculate the oracle. - * - The same is true when injecting an interrupt, except that we only - * consider a single interrupt at a time. The irq_spi_cpu array - * contains the target CPU for each SPI. - * - * The handling of level interrupts adds some extra complexity. We - * need to track when the interrupt has been EOIed, so we can sample - * the 'line' again. This is achieved as such: - * - * - When a level interrupt is moved onto a vcpu, the corresponding - * bit in irq_queued is set. As long as this bit is set, the line - * will be ignored for further interrupts. The interrupt is injected - * into the vcpu with the GICH_LR_EOI bit set (generate a - * maintenance interrupt on EOI). - * - When the interrupt is EOIed, the maintenance interrupt fires, - * and clears the corresponding bit in irq_queued. This allows the - * interrupt line to be sampled again. - * - Note that level-triggered interrupts can also be set to pending from - * writes to GICD_ISPENDRn and lowering the external input line does not - * cause the interrupt to become inactive in such a situation. - * Conversely, writes to GICD_ICPENDRn do not cause the interrupt to become - * inactive as long as the external input line is held high. - * - * - * Initialization rules: there are multiple stages to the vgic - * initialization, both for the distributor and the CPU interfaces. - * - * Distributor: - * - * - kvm_vgic_early_init(): initialization of static data that doesn't - * depend on any sizing information or emulation type. No allocation - * is allowed there. - * - * - vgic_init(): allocation and initialization of the generic data - * structures that depend on sizing information (number of CPUs, - * number of interrupts). Also initializes the vcpu specific data - * structures. Can be executed lazily for GICv2. - * [to be renamed to kvm_vgic_init??] - * - * CPU Interface: - * - * - kvm_vgic_cpu_early_init(): initialization of static data that - * doesn't depend on any sizing information or emulation type. No - * allocation is allowed there. - */ - -#include "vgic.h" - -static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu); -static void vgic_retire_lr(int lr_nr, struct kvm_vcpu *vcpu); -static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr); -static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); -static u64 vgic_get_elrsr(struct kvm_vcpu *vcpu); -static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu, - int virt_irq); -static int compute_pending_for_cpu(struct kvm_vcpu *vcpu); - -static const struct vgic_ops *vgic_ops; -static const struct vgic_params *vgic; - -static void add_sgi_source(struct kvm_vcpu *vcpu, int irq, int source) -{ - vcpu->kvm->arch.vgic.vm_ops.add_sgi_source(vcpu, irq, source); -} - -static bool queue_sgi(struct kvm_vcpu *vcpu, int irq) -{ - return vcpu->kvm->arch.vgic.vm_ops.queue_sgi(vcpu, irq); -} - -int kvm_vgic_map_resources(struct kvm *kvm) -{ - return kvm->arch.vgic.vm_ops.map_resources(kvm, vgic); -} - -/* - * struct vgic_bitmap contains a bitmap made of unsigned longs, but - * extracts u32s out of them. - * - * This does not work on 64-bit BE systems, because the bitmap access - * will store two consecutive 32-bit words with the higher-addressed - * register's bits at the lower index and the lower-addressed register's - * bits at the higher index. - * - * Therefore, swizzle the register index when accessing the 32-bit word - * registers to access the right register's value. - */ -#if defined(CONFIG_CPU_BIG_ENDIAN) && BITS_PER_LONG == 64 -#define REG_OFFSET_SWIZZLE 1 -#else -#define REG_OFFSET_SWIZZLE 0 -#endif - -static int vgic_init_bitmap(struct vgic_bitmap *b, int nr_cpus, int nr_irqs) -{ - int nr_longs; - - nr_longs = nr_cpus + BITS_TO_LONGS(nr_irqs - VGIC_NR_PRIVATE_IRQS); - - b->private = kzalloc(sizeof(unsigned long) * nr_longs, GFP_KERNEL); - if (!b->private) - return -ENOMEM; - - b->shared = b->private + nr_cpus; - - return 0; -} - -static void vgic_free_bitmap(struct vgic_bitmap *b) -{ - kfree(b->private); - b->private = NULL; - b->shared = NULL; -} - -/* - * Call this function to convert a u64 value to an unsigned long * bitmask - * in a way that works on both 32-bit and 64-bit LE and BE platforms. - * - * Warning: Calling this function may modify *val. - */ -static unsigned long *u64_to_bitmask(u64 *val) -{ -#if defined(CONFIG_CPU_BIG_ENDIAN) && BITS_PER_LONG == 32 - *val = (*val >> 32) | (*val << 32); -#endif - return (unsigned long *)val; -} - -u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, int cpuid, u32 offset) -{ - offset >>= 2; - if (!offset) - return (u32 *)(x->private + cpuid) + REG_OFFSET_SWIZZLE; - else - return (u32 *)(x->shared) + ((offset - 1) ^ REG_OFFSET_SWIZZLE); -} - -static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x, - int cpuid, int irq) -{ - if (irq < VGIC_NR_PRIVATE_IRQS) - return test_bit(irq, x->private + cpuid); - - return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared); -} - -void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid, - int irq, int val) -{ - unsigned long *reg; - - if (irq < VGIC_NR_PRIVATE_IRQS) { - reg = x->private + cpuid; - } else { - reg = x->shared; - irq -= VGIC_NR_PRIVATE_IRQS; - } - - if (val) - set_bit(irq, reg); - else - clear_bit(irq, reg); -} - -static unsigned long *vgic_bitmap_get_cpu_map(struct vgic_bitmap *x, int cpuid) -{ - return x->private + cpuid; -} - -unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x) -{ - return x->shared; -} - -static int vgic_init_bytemap(struct vgic_bytemap *x, int nr_cpus, int nr_irqs) -{ - int size; - - size = nr_cpus * VGIC_NR_PRIVATE_IRQS; - size += nr_irqs - VGIC_NR_PRIVATE_IRQS; - - x->private = kzalloc(size, GFP_KERNEL); - if (!x->private) - return -ENOMEM; - - x->shared = x->private + nr_cpus * VGIC_NR_PRIVATE_IRQS / sizeof(u32); - return 0; -} - -static void vgic_free_bytemap(struct vgic_bytemap *b) -{ - kfree(b->private); - b->private = NULL; - b->shared = NULL; -} - -u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset) -{ - u32 *reg; - - if (offset < VGIC_NR_PRIVATE_IRQS) { - reg = x->private; - offset += cpuid * VGIC_NR_PRIVATE_IRQS; - } else { - reg = x->shared; - offset -= VGIC_NR_PRIVATE_IRQS; - } - - return reg + (offset / sizeof(u32)); -} - -#define VGIC_CFG_LEVEL 0 -#define VGIC_CFG_EDGE 1 - -static bool vgic_irq_is_edge(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - int irq_val; - - irq_val = vgic_bitmap_get_irq_val(&dist->irq_cfg, vcpu->vcpu_id, irq); - return irq_val == VGIC_CFG_EDGE; -} - -static int vgic_irq_is_enabled(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - return vgic_bitmap_get_irq_val(&dist->irq_enabled, vcpu->vcpu_id, irq); -} - -static int vgic_irq_is_queued(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - return vgic_bitmap_get_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq); -} - -static int vgic_irq_is_active(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - return vgic_bitmap_get_irq_val(&dist->irq_active, vcpu->vcpu_id, irq); -} - -static void vgic_irq_set_queued(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 1); -} - -static void vgic_irq_clear_queued(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 0); -} - -static void vgic_irq_set_active(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 1); -} - -static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 0); -} - -static int vgic_dist_irq_get_level(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - return vgic_bitmap_get_irq_val(&dist->irq_level, vcpu->vcpu_id, irq); -} - -static void vgic_dist_irq_set_level(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - vgic_bitmap_set_irq_val(&dist->irq_level, vcpu->vcpu_id, irq, 1); -} - -static void vgic_dist_irq_clear_level(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - vgic_bitmap_set_irq_val(&dist->irq_level, vcpu->vcpu_id, irq, 0); -} - -static int vgic_dist_irq_soft_pend(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - return vgic_bitmap_get_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq); -} - -static void vgic_dist_irq_clear_soft_pend(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - vgic_bitmap_set_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq, 0); - if (!vgic_dist_irq_get_level(vcpu, irq)) { - vgic_dist_irq_clear_pending(vcpu, irq); - if (!compute_pending_for_cpu(vcpu)) - clear_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); - } -} - -static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - return vgic_bitmap_get_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq); -} - -void vgic_dist_irq_set_pending(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 1); -} - -void vgic_dist_irq_clear_pending(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 0); -} - -static void vgic_cpu_irq_set(struct kvm_vcpu *vcpu, int irq) -{ - if (irq < VGIC_NR_PRIVATE_IRQS) - set_bit(irq, vcpu->arch.vgic_cpu.pending_percpu); - else - set_bit(irq - VGIC_NR_PRIVATE_IRQS, - vcpu->arch.vgic_cpu.pending_shared); -} - -void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq) -{ - if (irq < VGIC_NR_PRIVATE_IRQS) - clear_bit(irq, vcpu->arch.vgic_cpu.pending_percpu); - else - clear_bit(irq - VGIC_NR_PRIVATE_IRQS, - vcpu->arch.vgic_cpu.pending_shared); -} - -static bool vgic_can_sample_irq(struct kvm_vcpu *vcpu, int irq) -{ - return !vgic_irq_is_queued(vcpu, irq); -} - -/** - * vgic_reg_access - access vgic register - * @mmio: pointer to the data describing the mmio access - * @reg: pointer to the virtual backing of vgic distributor data - * @offset: least significant 2 bits used for word offset - * @mode: ACCESS_ mode (see defines above) - * - * Helper to make vgic register access easier using one of the access - * modes defined for vgic register access - * (read,raz,write-ignored,setbit,clearbit,write) - */ -void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg, - phys_addr_t offset, int mode) -{ - int word_offset = (offset & 3) * 8; - u32 mask = (1UL << (mmio->len * 8)) - 1; - u32 regval; - - /* - * Any alignment fault should have been delivered to the guest - * directly (ARM ARM B3.12.7 "Prioritization of aborts"). - */ - - if (reg) { - regval = *reg; - } else { - BUG_ON(mode != (ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED)); - regval = 0; - } - - if (mmio->is_write) { - u32 data = mmio_data_read(mmio, mask) << word_offset; - switch (ACCESS_WRITE_MASK(mode)) { - case ACCESS_WRITE_IGNORED: - return; - - case ACCESS_WRITE_SETBIT: - regval |= data; - break; - - case ACCESS_WRITE_CLEARBIT: - regval &= ~data; - break; - - case ACCESS_WRITE_VALUE: - regval = (regval & ~(mask << word_offset)) | data; - break; - } - *reg = regval; - } else { - switch (ACCESS_READ_MASK(mode)) { - case ACCESS_READ_RAZ: - regval = 0; - /* fall through */ - - case ACCESS_READ_VALUE: - mmio_data_write(mmio, mask, regval >> word_offset); - } - } -} - -bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - vgic_reg_access(mmio, NULL, offset, - ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); - return false; -} - -bool vgic_handle_enable_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio, - phys_addr_t offset, int vcpu_id, int access) -{ - u32 *reg; - int mode = ACCESS_READ_VALUE | access; - struct kvm_vcpu *target_vcpu = kvm_get_vcpu(kvm, vcpu_id); - - reg = vgic_bitmap_get_reg(&kvm->arch.vgic.irq_enabled, vcpu_id, offset); - vgic_reg_access(mmio, reg, offset, mode); - if (mmio->is_write) { - if (access & ACCESS_WRITE_CLEARBIT) { - if (offset < 4) /* Force SGI enabled */ - *reg |= 0xffff; - vgic_retire_disabled_irqs(target_vcpu); - } - vgic_update_state(kvm); - return true; - } - - return false; -} - -bool vgic_handle_set_pending_reg(struct kvm *kvm, - struct kvm_exit_mmio *mmio, - phys_addr_t offset, int vcpu_id) -{ - u32 *reg, orig; - u32 level_mask; - int mode = ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT; - struct vgic_dist *dist = &kvm->arch.vgic; - - reg = vgic_bitmap_get_reg(&dist->irq_cfg, vcpu_id, offset); - level_mask = (~(*reg)); - - /* Mark both level and edge triggered irqs as pending */ - reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu_id, offset); - orig = *reg; - vgic_reg_access(mmio, reg, offset, mode); - - if (mmio->is_write) { - /* Set the soft-pending flag only for level-triggered irqs */ - reg = vgic_bitmap_get_reg(&dist->irq_soft_pend, - vcpu_id, offset); - vgic_reg_access(mmio, reg, offset, mode); - *reg &= level_mask; - - /* Ignore writes to SGIs */ - if (offset < 2) { - *reg &= ~0xffff; - *reg |= orig & 0xffff; - } - - vgic_update_state(kvm); - return true; - } - - return false; -} - -bool vgic_handle_clear_pending_reg(struct kvm *kvm, - struct kvm_exit_mmio *mmio, - phys_addr_t offset, int vcpu_id) -{ - u32 *level_active; - u32 *reg, orig; - int mode = ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT; - struct vgic_dist *dist = &kvm->arch.vgic; - - reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu_id, offset); - orig = *reg; - vgic_reg_access(mmio, reg, offset, mode); - if (mmio->is_write) { - /* Re-set level triggered level-active interrupts */ - level_active = vgic_bitmap_get_reg(&dist->irq_level, - vcpu_id, offset); - reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu_id, offset); - *reg |= *level_active; - - /* Ignore writes to SGIs */ - if (offset < 2) { - *reg &= ~0xffff; - *reg |= orig & 0xffff; - } - - /* Clear soft-pending flags */ - reg = vgic_bitmap_get_reg(&dist->irq_soft_pend, - vcpu_id, offset); - vgic_reg_access(mmio, reg, offset, mode); - - vgic_update_state(kvm); - return true; - } - return false; -} - -bool vgic_handle_set_active_reg(struct kvm *kvm, - struct kvm_exit_mmio *mmio, - phys_addr_t offset, int vcpu_id) -{ - u32 *reg; - struct vgic_dist *dist = &kvm->arch.vgic; - - reg = vgic_bitmap_get_reg(&dist->irq_active, vcpu_id, offset); - vgic_reg_access(mmio, reg, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); - - if (mmio->is_write) { - vgic_update_state(kvm); - return true; - } - - return false; -} - -bool vgic_handle_clear_active_reg(struct kvm *kvm, - struct kvm_exit_mmio *mmio, - phys_addr_t offset, int vcpu_id) -{ - u32 *reg; - struct vgic_dist *dist = &kvm->arch.vgic; - - reg = vgic_bitmap_get_reg(&dist->irq_active, vcpu_id, offset); - vgic_reg_access(mmio, reg, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); - - if (mmio->is_write) { - vgic_update_state(kvm); - return true; - } - - return false; -} - -static u32 vgic_cfg_expand(u16 val) -{ - u32 res = 0; - int i; - - /* - * Turn a 16bit value like abcd...mnop into a 32bit word - * a0b0c0d0...m0n0o0p0, which is what the HW cfg register is. - */ - for (i = 0; i < 16; i++) - res |= ((val >> i) & VGIC_CFG_EDGE) << (2 * i + 1); - - return res; -} - -static u16 vgic_cfg_compress(u32 val) -{ - u16 res = 0; - int i; - - /* - * Turn a 32bit word a0b0c0d0...m0n0o0p0 into 16bit value like - * abcd...mnop which is what we really care about. - */ - for (i = 0; i < 16; i++) - res |= ((val >> (i * 2 + 1)) & VGIC_CFG_EDGE) << i; - - return res; -} - -/* - * The distributor uses 2 bits per IRQ for the CFG register, but the - * LSB is always 0. As such, we only keep the upper bit, and use the - * two above functions to compress/expand the bits - */ -bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - u32 val; - - if (offset & 4) - val = *reg >> 16; - else - val = *reg & 0xffff; - - val = vgic_cfg_expand(val); - vgic_reg_access(mmio, &val, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); - if (mmio->is_write) { - /* Ignore writes to read-only SGI and PPI bits */ - if (offset < 8) - return false; - - val = vgic_cfg_compress(val); - if (offset & 4) { - *reg &= 0xffff; - *reg |= val << 16; - } else { - *reg &= 0xffff << 16; - *reg |= val; - } - } - - return false; -} - -/** - * vgic_unqueue_irqs - move pending/active IRQs from LRs to the distributor - * @vgic_cpu: Pointer to the vgic_cpu struct holding the LRs - * - * Move any IRQs that have already been assigned to LRs back to the - * emulated distributor state so that the complete emulated state can be read - * from the main emulation structures without investigating the LRs. - */ -void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) -{ - u64 elrsr = vgic_get_elrsr(vcpu); - unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr); - int i; - - for_each_clear_bit(i, elrsr_ptr, vgic->nr_lr) { - struct vgic_lr lr = vgic_get_lr(vcpu, i); - - /* - * There are three options for the state bits: - * - * 01: pending - * 10: active - * 11: pending and active - */ - BUG_ON(!(lr.state & LR_STATE_MASK)); - - /* Reestablish SGI source for pending and active IRQs */ - if (lr.irq < VGIC_NR_SGIS) - add_sgi_source(vcpu, lr.irq, lr.source); - - /* - * If the LR holds an active (10) or a pending and active (11) - * interrupt then move the active state to the - * distributor tracking bit. - */ - if (lr.state & LR_STATE_ACTIVE) - vgic_irq_set_active(vcpu, lr.irq); - - /* - * Reestablish the pending state on the distributor and the - * CPU interface and mark the LR as free for other use. - */ - vgic_retire_lr(i, vcpu); - - /* Finally update the VGIC state. */ - vgic_update_state(vcpu->kvm); - } -} - -const -struct vgic_io_range *vgic_find_range(const struct vgic_io_range *ranges, - int len, gpa_t offset) -{ - while (ranges->len) { - if (offset >= ranges->base && - (offset + len) <= (ranges->base + ranges->len)) - return ranges; - ranges++; - } - - return NULL; -} - -static bool vgic_validate_access(const struct vgic_dist *dist, - const struct vgic_io_range *range, - unsigned long offset) -{ - int irq; - - if (!range->bits_per_irq) - return true; /* Not an irq-based access */ - - irq = offset * 8 / range->bits_per_irq; - if (irq >= dist->nr_irqs) - return false; - - return true; -} - -/* - * Call the respective handler function for the given range. - * We split up any 64 bit accesses into two consecutive 32 bit - * handler calls and merge the result afterwards. - * We do this in a little endian fashion regardless of the host's - * or guest's endianness, because the GIC is always LE and the rest of - * the code (vgic_reg_access) also puts it in a LE fashion already. - * At this point we have already identified the handle function, so - * range points to that one entry and offset is relative to this. - */ -static bool call_range_handler(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - unsigned long offset, - const struct vgic_io_range *range) -{ - struct kvm_exit_mmio mmio32; - bool ret; - - if (likely(mmio->len <= 4)) - return range->handle_mmio(vcpu, mmio, offset); - - /* - * Any access bigger than 4 bytes (that we currently handle in KVM) - * is actually 8 bytes long, caused by a 64-bit access - */ - - mmio32.len = 4; - mmio32.is_write = mmio->is_write; - mmio32.private = mmio->private; - - mmio32.phys_addr = mmio->phys_addr + 4; - mmio32.data = &((u32 *)mmio->data)[1]; - ret = range->handle_mmio(vcpu, &mmio32, offset + 4); - - mmio32.phys_addr = mmio->phys_addr; - mmio32.data = &((u32 *)mmio->data)[0]; - ret |= range->handle_mmio(vcpu, &mmio32, offset); - - return ret; -} - -/** - * vgic_handle_mmio_access - handle an in-kernel MMIO access - * This is called by the read/write KVM IO device wrappers below. - * @vcpu: pointer to the vcpu performing the access - * @this: pointer to the KVM IO device in charge - * @addr: guest physical address of the access - * @len: size of the access - * @val: pointer to the data region - * @is_write: read or write access - * - * returns true if the MMIO access could be performed - */ -static int vgic_handle_mmio_access(struct kvm_vcpu *vcpu, - struct kvm_io_device *this, gpa_t addr, - int len, void *val, bool is_write) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - struct vgic_io_device *iodev = container_of(this, - struct vgic_io_device, dev); - const struct vgic_io_range *range; - struct kvm_exit_mmio mmio; - bool updated_state; - gpa_t offset; - - offset = addr - iodev->addr; - range = vgic_find_range(iodev->reg_ranges, len, offset); - if (unlikely(!range || !range->handle_mmio)) { - pr_warn("Unhandled access %d %08llx %d\n", is_write, addr, len); - return -ENXIO; - } - - mmio.phys_addr = addr; - mmio.len = len; - mmio.is_write = is_write; - mmio.data = val; - mmio.private = iodev->redist_vcpu; - - spin_lock(&dist->lock); - offset -= range->base; - if (vgic_validate_access(dist, range, offset)) { - updated_state = call_range_handler(vcpu, &mmio, offset, range); - } else { - if (!is_write) - memset(val, 0, len); - updated_state = false; - } - spin_unlock(&dist->lock); - - if (updated_state) - vgic_kick_vcpus(vcpu->kvm); - - return 0; -} - -static int vgic_handle_mmio_read(struct kvm_vcpu *vcpu, - struct kvm_io_device *this, - gpa_t addr, int len, void *val) -{ - return vgic_handle_mmio_access(vcpu, this, addr, len, val, false); -} - -static int vgic_handle_mmio_write(struct kvm_vcpu *vcpu, - struct kvm_io_device *this, - gpa_t addr, int len, const void *val) -{ - return vgic_handle_mmio_access(vcpu, this, addr, len, (void *)val, - true); -} - -static struct kvm_io_device_ops vgic_io_ops = { - .read = vgic_handle_mmio_read, - .write = vgic_handle_mmio_write, -}; - -/** - * vgic_register_kvm_io_dev - register VGIC register frame on the KVM I/O bus - * @kvm: The VM structure pointer - * @base: The (guest) base address for the register frame - * @len: Length of the register frame window - * @ranges: Describing the handler functions for each register - * @redist_vcpu_id: The VCPU ID to pass on to the handlers on call - * @iodev: Points to memory to be passed on to the handler - * - * @iodev stores the parameters of this function to be usable by the handler - * respectively the dispatcher function (since the KVM I/O bus framework lacks - * an opaque parameter). Initialization is done in this function, but the - * reference should be valid and unique for the whole VGIC lifetime. - * If the register frame is not mapped for a specific VCPU, pass -1 to - * @redist_vcpu_id. - */ -int vgic_register_kvm_io_dev(struct kvm *kvm, gpa_t base, int len, - const struct vgic_io_range *ranges, - int redist_vcpu_id, - struct vgic_io_device *iodev) -{ - struct kvm_vcpu *vcpu = NULL; - int ret; - - if (redist_vcpu_id >= 0) - vcpu = kvm_get_vcpu(kvm, redist_vcpu_id); - - iodev->addr = base; - iodev->len = len; - iodev->reg_ranges = ranges; - iodev->redist_vcpu = vcpu; - - kvm_iodevice_init(&iodev->dev, &vgic_io_ops); - - mutex_lock(&kvm->slots_lock); - - ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, base, len, - &iodev->dev); - mutex_unlock(&kvm->slots_lock); - - /* Mark the iodev as invalid if registration fails. */ - if (ret) - iodev->dev.ops = NULL; - - return ret; -} - -static int vgic_nr_shared_irqs(struct vgic_dist *dist) -{ - return dist->nr_irqs - VGIC_NR_PRIVATE_IRQS; -} - -static int compute_active_for_cpu(struct kvm_vcpu *vcpu) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - unsigned long *active, *enabled, *act_percpu, *act_shared; - unsigned long active_private, active_shared; - int nr_shared = vgic_nr_shared_irqs(dist); - int vcpu_id; - - vcpu_id = vcpu->vcpu_id; - act_percpu = vcpu->arch.vgic_cpu.active_percpu; - act_shared = vcpu->arch.vgic_cpu.active_shared; - - active = vgic_bitmap_get_cpu_map(&dist->irq_active, vcpu_id); - enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id); - bitmap_and(act_percpu, active, enabled, VGIC_NR_PRIVATE_IRQS); - - active = vgic_bitmap_get_shared_map(&dist->irq_active); - enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled); - bitmap_and(act_shared, active, enabled, nr_shared); - bitmap_and(act_shared, act_shared, - vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]), - nr_shared); - - active_private = find_first_bit(act_percpu, VGIC_NR_PRIVATE_IRQS); - active_shared = find_first_bit(act_shared, nr_shared); - - return (active_private < VGIC_NR_PRIVATE_IRQS || - active_shared < nr_shared); -} - -static int compute_pending_for_cpu(struct kvm_vcpu *vcpu) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - unsigned long *pending, *enabled, *pend_percpu, *pend_shared; - unsigned long pending_private, pending_shared; - int nr_shared = vgic_nr_shared_irqs(dist); - int vcpu_id; - - vcpu_id = vcpu->vcpu_id; - pend_percpu = vcpu->arch.vgic_cpu.pending_percpu; - pend_shared = vcpu->arch.vgic_cpu.pending_shared; - - if (!dist->enabled) { - bitmap_zero(pend_percpu, VGIC_NR_PRIVATE_IRQS); - bitmap_zero(pend_shared, nr_shared); - return 0; - } - - pending = vgic_bitmap_get_cpu_map(&dist->irq_pending, vcpu_id); - enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id); - bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS); - - pending = vgic_bitmap_get_shared_map(&dist->irq_pending); - enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled); - bitmap_and(pend_shared, pending, enabled, nr_shared); - bitmap_and(pend_shared, pend_shared, - vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]), - nr_shared); - - pending_private = find_first_bit(pend_percpu, VGIC_NR_PRIVATE_IRQS); - pending_shared = find_first_bit(pend_shared, nr_shared); - return (pending_private < VGIC_NR_PRIVATE_IRQS || - pending_shared < vgic_nr_shared_irqs(dist)); -} - -/* - * Update the interrupt state and determine which CPUs have pending - * or active interrupts. Must be called with distributor lock held. - */ -void vgic_update_state(struct kvm *kvm) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - struct kvm_vcpu *vcpu; - int c; - - kvm_for_each_vcpu(c, vcpu, kvm) { - if (compute_pending_for_cpu(vcpu)) - set_bit(c, dist->irq_pending_on_cpu); - - if (compute_active_for_cpu(vcpu)) - set_bit(c, dist->irq_active_on_cpu); - else - clear_bit(c, dist->irq_active_on_cpu); - } -} - -static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr) -{ - return vgic_ops->get_lr(vcpu, lr); -} - -static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, - struct vgic_lr vlr) -{ - vgic_ops->set_lr(vcpu, lr, vlr); -} - -static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu) -{ - return vgic_ops->get_elrsr(vcpu); -} - -static inline u64 vgic_get_eisr(struct kvm_vcpu *vcpu) -{ - return vgic_ops->get_eisr(vcpu); -} - -static inline void vgic_clear_eisr(struct kvm_vcpu *vcpu) -{ - vgic_ops->clear_eisr(vcpu); -} - -static inline u32 vgic_get_interrupt_status(struct kvm_vcpu *vcpu) -{ - return vgic_ops->get_interrupt_status(vcpu); -} - -static inline void vgic_enable_underflow(struct kvm_vcpu *vcpu) -{ - vgic_ops->enable_underflow(vcpu); -} - -static inline void vgic_disable_underflow(struct kvm_vcpu *vcpu) -{ - vgic_ops->disable_underflow(vcpu); -} - -void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) -{ - vgic_ops->get_vmcr(vcpu, vmcr); -} - -void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) -{ - vgic_ops->set_vmcr(vcpu, vmcr); -} - -static inline void vgic_enable(struct kvm_vcpu *vcpu) -{ - vgic_ops->enable(vcpu); -} - -static void vgic_retire_lr(int lr_nr, struct kvm_vcpu *vcpu) -{ - struct vgic_lr vlr = vgic_get_lr(vcpu, lr_nr); - - vgic_irq_clear_queued(vcpu, vlr.irq); - - /* - * We must transfer the pending state back to the distributor before - * retiring the LR, otherwise we may loose edge-triggered interrupts. - */ - if (vlr.state & LR_STATE_PENDING) { - vgic_dist_irq_set_pending(vcpu, vlr.irq); - vlr.hwirq = 0; - } - - vlr.state = 0; - vgic_set_lr(vcpu, lr_nr, vlr); -} - -static bool dist_active_irq(struct kvm_vcpu *vcpu) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - return test_bit(vcpu->vcpu_id, dist->irq_active_on_cpu); -} - -bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq) -{ - int i; - - for (i = 0; i < vgic->nr_lr; i++) { - struct vgic_lr vlr = vgic_get_lr(vcpu, i); - - if (vlr.irq == virt_irq && vlr.state & LR_STATE_ACTIVE) - return true; - } - - return vgic_irq_is_active(vcpu, virt_irq); -} - -/* - * An interrupt may have been disabled after being made pending on the - * CPU interface (the classic case is a timer running while we're - * rebooting the guest - the interrupt would kick as soon as the CPU - * interface gets enabled, with deadly consequences). - * - * The solution is to examine already active LRs, and check the - * interrupt is still enabled. If not, just retire it. - */ -static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) -{ - u64 elrsr = vgic_get_elrsr(vcpu); - unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr); - int lr; - - for_each_clear_bit(lr, elrsr_ptr, vgic->nr_lr) { - struct vgic_lr vlr = vgic_get_lr(vcpu, lr); - - if (!vgic_irq_is_enabled(vcpu, vlr.irq)) - vgic_retire_lr(lr, vcpu); - } -} - -static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq, - int lr_nr, struct vgic_lr vlr) -{ - if (vgic_irq_is_active(vcpu, irq)) { - vlr.state |= LR_STATE_ACTIVE; - kvm_debug("Set active, clear distributor: 0x%x\n", vlr.state); - vgic_irq_clear_active(vcpu, irq); - vgic_update_state(vcpu->kvm); - } else { - WARN_ON(!vgic_dist_irq_is_pending(vcpu, irq)); - vlr.state |= LR_STATE_PENDING; - kvm_debug("Set pending: 0x%x\n", vlr.state); - } - - if (!vgic_irq_is_edge(vcpu, irq)) - vlr.state |= LR_EOI_INT; - - if (vlr.irq >= VGIC_NR_SGIS) { - struct irq_phys_map *map; - map = vgic_irq_map_search(vcpu, irq); - - if (map) { - vlr.hwirq = map->phys_irq; - vlr.state |= LR_HW; - vlr.state &= ~LR_EOI_INT; - - /* - * Make sure we're not going to sample this - * again, as a HW-backed interrupt cannot be - * in the PENDING_ACTIVE stage. - */ - vgic_irq_set_queued(vcpu, irq); - } - } - - vgic_set_lr(vcpu, lr_nr, vlr); -} - -/* - * Queue an interrupt to a CPU virtual interface. Return true on success, - * or false if it wasn't possible to queue it. - * sgi_source must be zero for any non-SGI interrupts. - */ -bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - u64 elrsr = vgic_get_elrsr(vcpu); - unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr); - struct vgic_lr vlr; - int lr; - - /* Sanitize the input... */ - BUG_ON(sgi_source_id & ~7); - BUG_ON(sgi_source_id && irq >= VGIC_NR_SGIS); - BUG_ON(irq >= dist->nr_irqs); - - kvm_debug("Queue IRQ%d\n", irq); - - /* Do we have an active interrupt for the same CPUID? */ - for_each_clear_bit(lr, elrsr_ptr, vgic->nr_lr) { - vlr = vgic_get_lr(vcpu, lr); - if (vlr.irq == irq && vlr.source == sgi_source_id) { - kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq); - vgic_queue_irq_to_lr(vcpu, irq, lr, vlr); - return true; - } - } - - /* Try to use another LR for this interrupt */ - lr = find_first_bit(elrsr_ptr, vgic->nr_lr); - if (lr >= vgic->nr_lr) - return false; - - kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id); - - vlr.irq = irq; - vlr.source = sgi_source_id; - vlr.state = 0; - vgic_queue_irq_to_lr(vcpu, irq, lr, vlr); - - return true; -} - -static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq) -{ - if (!vgic_can_sample_irq(vcpu, irq)) - return true; /* level interrupt, already queued */ - - if (vgic_queue_irq(vcpu, 0, irq)) { - if (vgic_irq_is_edge(vcpu, irq)) { - vgic_dist_irq_clear_pending(vcpu, irq); - vgic_cpu_irq_clear(vcpu, irq); - } else { - vgic_irq_set_queued(vcpu, irq); - } - - return true; - } - - return false; -} - -/* - * Fill the list registers with pending interrupts before running the - * guest. - */ -static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) -{ - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - unsigned long *pa_percpu, *pa_shared; - int i, vcpu_id; - int overflow = 0; - int nr_shared = vgic_nr_shared_irqs(dist); - - vcpu_id = vcpu->vcpu_id; - - pa_percpu = vcpu->arch.vgic_cpu.pend_act_percpu; - pa_shared = vcpu->arch.vgic_cpu.pend_act_shared; - - bitmap_or(pa_percpu, vgic_cpu->pending_percpu, vgic_cpu->active_percpu, - VGIC_NR_PRIVATE_IRQS); - bitmap_or(pa_shared, vgic_cpu->pending_shared, vgic_cpu->active_shared, - nr_shared); - /* - * We may not have any pending interrupt, or the interrupts - * may have been serviced from another vcpu. In all cases, - * move along. - */ - if (!kvm_vgic_vcpu_pending_irq(vcpu) && !dist_active_irq(vcpu)) - goto epilog; - - /* SGIs */ - for_each_set_bit(i, pa_percpu, VGIC_NR_SGIS) { - if (!queue_sgi(vcpu, i)) - overflow = 1; - } - - /* PPIs */ - for_each_set_bit_from(i, pa_percpu, VGIC_NR_PRIVATE_IRQS) { - if (!vgic_queue_hwirq(vcpu, i)) - overflow = 1; - } - - /* SPIs */ - for_each_set_bit(i, pa_shared, nr_shared) { - if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS)) - overflow = 1; - } - - - - -epilog: - if (overflow) { - vgic_enable_underflow(vcpu); - } else { - vgic_disable_underflow(vcpu); - /* - * We're about to run this VCPU, and we've consumed - * everything the distributor had in store for - * us. Claim we don't have anything pending. We'll - * adjust that if needed while exiting. - */ - clear_bit(vcpu_id, dist->irq_pending_on_cpu); - } -} - -static int process_queued_irq(struct kvm_vcpu *vcpu, - int lr, struct vgic_lr vlr) -{ - int pending = 0; - - /* - * If the IRQ was EOIed (called from vgic_process_maintenance) or it - * went from active to non-active (called from vgic_sync_hwirq) it was - * also ACKed and we we therefore assume we can clear the soft pending - * state (should it had been set) for this interrupt. - * - * Note: if the IRQ soft pending state was set after the IRQ was - * acked, it actually shouldn't be cleared, but we have no way of - * knowing that unless we start trapping ACKs when the soft-pending - * state is set. - */ - vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq); - - /* - * Tell the gic to start sampling this interrupt again. - */ - vgic_irq_clear_queued(vcpu, vlr.irq); - - /* Any additional pending interrupt? */ - if (vgic_irq_is_edge(vcpu, vlr.irq)) { - BUG_ON(!(vlr.state & LR_HW)); - pending = vgic_dist_irq_is_pending(vcpu, vlr.irq); - } else { - if (vgic_dist_irq_get_level(vcpu, vlr.irq)) { - vgic_cpu_irq_set(vcpu, vlr.irq); - pending = 1; - } else { - vgic_dist_irq_clear_pending(vcpu, vlr.irq); - vgic_cpu_irq_clear(vcpu, vlr.irq); - } - } - - /* - * Despite being EOIed, the LR may not have - * been marked as empty. - */ - vlr.state = 0; - vlr.hwirq = 0; - vgic_set_lr(vcpu, lr, vlr); - - return pending; -} - -static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) -{ - u32 status = vgic_get_interrupt_status(vcpu); - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - struct kvm *kvm = vcpu->kvm; - int level_pending = 0; - - kvm_debug("STATUS = %08x\n", status); - - if (status & INT_STATUS_EOI) { - /* - * Some level interrupts have been EOIed. Clear their - * active bit. - */ - u64 eisr = vgic_get_eisr(vcpu); - unsigned long *eisr_ptr = u64_to_bitmask(&eisr); - int lr; - - for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) { - struct vgic_lr vlr = vgic_get_lr(vcpu, lr); - - WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq)); - WARN_ON(vlr.state & LR_STATE_MASK); - - - /* - * kvm_notify_acked_irq calls kvm_set_irq() - * to reset the IRQ level, which grabs the dist->lock - * so we call this before taking the dist->lock. - */ - kvm_notify_acked_irq(kvm, 0, - vlr.irq - VGIC_NR_PRIVATE_IRQS); - - spin_lock(&dist->lock); - level_pending |= process_queued_irq(vcpu, lr, vlr); - spin_unlock(&dist->lock); - } - } - - if (status & INT_STATUS_UNDERFLOW) - vgic_disable_underflow(vcpu); - - /* - * In the next iterations of the vcpu loop, if we sync the vgic state - * after flushing it, but before entering the guest (this happens for - * pending signals and vmid rollovers), then make sure we don't pick - * up any old maintenance interrupts here. - */ - vgic_clear_eisr(vcpu); - - return level_pending; -} - -/* - * Save the physical active state, and reset it to inactive. - * - * Return true if there's a pending forwarded interrupt to queue. - */ -static bool vgic_sync_hwirq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - bool level_pending; - - if (!(vlr.state & LR_HW)) - return false; - - if (vlr.state & LR_STATE_ACTIVE) - return false; - - spin_lock(&dist->lock); - level_pending = process_queued_irq(vcpu, lr, vlr); - spin_unlock(&dist->lock); - return level_pending; -} - -/* Sync back the VGIC state after a guest run */ -static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - u64 elrsr; - unsigned long *elrsr_ptr; - int lr, pending; - bool level_pending; - - level_pending = vgic_process_maintenance(vcpu); - - /* Deal with HW interrupts, and clear mappings for empty LRs */ - for (lr = 0; lr < vgic->nr_lr; lr++) { - struct vgic_lr vlr = vgic_get_lr(vcpu, lr); - - level_pending |= vgic_sync_hwirq(vcpu, lr, vlr); - BUG_ON(vlr.irq >= dist->nr_irqs); - } - - /* Check if we still have something up our sleeve... */ - elrsr = vgic_get_elrsr(vcpu); - elrsr_ptr = u64_to_bitmask(&elrsr); - pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr); - if (level_pending || pending < vgic->nr_lr) - set_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); -} - -void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - if (!irqchip_in_kernel(vcpu->kvm)) - return; - - spin_lock(&dist->lock); - __kvm_vgic_flush_hwstate(vcpu); - spin_unlock(&dist->lock); -} - -void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) -{ - if (!irqchip_in_kernel(vcpu->kvm)) - return; - - __kvm_vgic_sync_hwstate(vcpu); -} - -int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - if (!irqchip_in_kernel(vcpu->kvm)) - return 0; - - return test_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); -} - -void vgic_kick_vcpus(struct kvm *kvm) -{ - struct kvm_vcpu *vcpu; - int c; - - /* - * We've injected an interrupt, time to find out who deserves - * a good kick... - */ - kvm_for_each_vcpu(c, vcpu, kvm) { - if (kvm_vgic_vcpu_pending_irq(vcpu)) - kvm_vcpu_kick(vcpu); - } -} - -static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level) -{ - int edge_triggered = vgic_irq_is_edge(vcpu, irq); - - /* - * Only inject an interrupt if: - * - edge triggered and we have a rising edge - * - level triggered and we change level - */ - if (edge_triggered) { - int state = vgic_dist_irq_is_pending(vcpu, irq); - return level > state; - } else { - int state = vgic_dist_irq_get_level(vcpu, irq); - return level != state; - } -} - -static int vgic_update_irq_pending(struct kvm *kvm, int cpuid, - unsigned int irq_num, bool level) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - struct kvm_vcpu *vcpu; - int edge_triggered, level_triggered; - int enabled; - bool ret = true, can_inject = true; - - trace_vgic_update_irq_pending(cpuid, irq_num, level); - - if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020)) - return -EINVAL; - - spin_lock(&dist->lock); - - vcpu = kvm_get_vcpu(kvm, cpuid); - edge_triggered = vgic_irq_is_edge(vcpu, irq_num); - level_triggered = !edge_triggered; - - if (!vgic_validate_injection(vcpu, irq_num, level)) { - ret = false; - goto out; - } - - if (irq_num >= VGIC_NR_PRIVATE_IRQS) { - cpuid = dist->irq_spi_cpu[irq_num - VGIC_NR_PRIVATE_IRQS]; - if (cpuid == VCPU_NOT_ALLOCATED) { - /* Pretend we use CPU0, and prevent injection */ - cpuid = 0; - can_inject = false; - } - vcpu = kvm_get_vcpu(kvm, cpuid); - } - - kvm_debug("Inject IRQ%d level %d CPU%d\n", irq_num, level, cpuid); - - if (level) { - if (level_triggered) - vgic_dist_irq_set_level(vcpu, irq_num); - vgic_dist_irq_set_pending(vcpu, irq_num); - } else { - if (level_triggered) { - vgic_dist_irq_clear_level(vcpu, irq_num); - if (!vgic_dist_irq_soft_pend(vcpu, irq_num)) { - vgic_dist_irq_clear_pending(vcpu, irq_num); - vgic_cpu_irq_clear(vcpu, irq_num); - if (!compute_pending_for_cpu(vcpu)) - clear_bit(cpuid, dist->irq_pending_on_cpu); - } - } - - ret = false; - goto out; - } - - enabled = vgic_irq_is_enabled(vcpu, irq_num); - - if (!enabled || !can_inject) { - ret = false; - goto out; - } - - if (!vgic_can_sample_irq(vcpu, irq_num)) { - /* - * Level interrupt in progress, will be picked up - * when EOId. - */ - ret = false; - goto out; - } - - if (level) { - vgic_cpu_irq_set(vcpu, irq_num); - set_bit(cpuid, dist->irq_pending_on_cpu); - } - -out: - spin_unlock(&dist->lock); - - if (ret) { - /* kick the specified vcpu */ - kvm_vcpu_kick(kvm_get_vcpu(kvm, cpuid)); - } - - return 0; -} - -static int vgic_lazy_init(struct kvm *kvm) -{ - int ret = 0; - - if (unlikely(!vgic_initialized(kvm))) { - /* - * We only provide the automatic initialization of the VGIC - * for the legacy case of a GICv2. Any other type must - * be explicitly initialized once setup with the respective - * KVM device call. - */ - if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2) - return -EBUSY; - - mutex_lock(&kvm->lock); - ret = vgic_init(kvm); - mutex_unlock(&kvm->lock); - } - - return ret; -} - -/** - * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic - * @kvm: The VM structure pointer - * @cpuid: The CPU for PPIs - * @irq_num: The IRQ number that is assigned to the device. This IRQ - * must not be mapped to a HW interrupt. - * @level: Edge-triggered: true: to trigger the interrupt - * false: to ignore the call - * Level-sensitive true: raise the input signal - * false: lower the input signal - * - * The GIC is not concerned with devices being active-LOW or active-HIGH for - * level-sensitive interrupts. You can think of the level parameter as 1 - * being HIGH and 0 being LOW and all devices being active-HIGH. - */ -int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, - bool level) -{ - struct irq_phys_map *map; - int ret; - - ret = vgic_lazy_init(kvm); - if (ret) - return ret; - - map = vgic_irq_map_search(kvm_get_vcpu(kvm, cpuid), irq_num); - if (map) - return -EINVAL; - - return vgic_update_irq_pending(kvm, cpuid, irq_num, level); -} - -/** - * kvm_vgic_inject_mapped_irq - Inject a physically mapped IRQ to the vgic - * @kvm: The VM structure pointer - * @cpuid: The CPU for PPIs - * @virt_irq: The virtual IRQ to be injected - * @level: Edge-triggered: true: to trigger the interrupt - * false: to ignore the call - * Level-sensitive true: raise the input signal - * false: lower the input signal - * - * The GIC is not concerned with devices being active-LOW or active-HIGH for - * level-sensitive interrupts. You can think of the level parameter as 1 - * being HIGH and 0 being LOW and all devices being active-HIGH. - */ -int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid, - unsigned int virt_irq, bool level) -{ - int ret; - - ret = vgic_lazy_init(kvm); - if (ret) - return ret; - - return vgic_update_irq_pending(kvm, cpuid, virt_irq, level); -} - -static irqreturn_t vgic_maintenance_handler(int irq, void *data) -{ - /* - * We cannot rely on the vgic maintenance interrupt to be - * delivered synchronously. This means we can only use it to - * exit the VM, and we perform the handling of EOIed - * interrupts on the exit path (see vgic_process_maintenance). - */ - return IRQ_HANDLED; -} - -static struct list_head *vgic_get_irq_phys_map_list(struct kvm_vcpu *vcpu, - int virt_irq) -{ - if (virt_irq < VGIC_NR_PRIVATE_IRQS) - return &vcpu->arch.vgic_cpu.irq_phys_map_list; - else - return &vcpu->kvm->arch.vgic.irq_phys_map_list; -} - -/** - * kvm_vgic_map_phys_irq - map a virtual IRQ to a physical IRQ - * @vcpu: The VCPU pointer - * @virt_irq: The virtual IRQ number for the guest - * @phys_irq: The hardware IRQ number of the host - * - * Establish a mapping between a guest visible irq (@virt_irq) and a - * hardware irq (@phys_irq). On injection, @virt_irq will be associated with - * the physical interrupt represented by @phys_irq. This mapping can be - * established multiple times as long as the parameters are the same. - * - * Returns 0 on success or an error value otherwise. - */ -int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, int virt_irq, int phys_irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - struct list_head *root = vgic_get_irq_phys_map_list(vcpu, virt_irq); - struct irq_phys_map *map; - struct irq_phys_map_entry *entry; - int ret = 0; - - /* Create a new mapping */ - entry = kzalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) - return -ENOMEM; - - spin_lock(&dist->irq_phys_map_lock); - - /* Try to match an existing mapping */ - map = vgic_irq_map_search(vcpu, virt_irq); - if (map) { - /* Make sure this mapping matches */ - if (map->phys_irq != phys_irq) - ret = -EINVAL; - - /* Found an existing, valid mapping */ - goto out; - } - - map = &entry->map; - map->virt_irq = virt_irq; - map->phys_irq = phys_irq; - - list_add_tail_rcu(&entry->entry, root); - -out: - spin_unlock(&dist->irq_phys_map_lock); - /* If we've found a hit in the existing list, free the useless - * entry */ - if (ret || map != &entry->map) - kfree(entry); - return ret; -} - -static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu, - int virt_irq) -{ - struct list_head *root = vgic_get_irq_phys_map_list(vcpu, virt_irq); - struct irq_phys_map_entry *entry; - struct irq_phys_map *map; - - rcu_read_lock(); - - list_for_each_entry_rcu(entry, root, entry) { - map = &entry->map; - if (map->virt_irq == virt_irq) { - rcu_read_unlock(); - return map; - } - } - - rcu_read_unlock(); - - return NULL; -} - -static void vgic_free_phys_irq_map_rcu(struct rcu_head *rcu) -{ - struct irq_phys_map_entry *entry; - - entry = container_of(rcu, struct irq_phys_map_entry, rcu); - kfree(entry); -} - -/** - * kvm_vgic_unmap_phys_irq - Remove a virtual to physical IRQ mapping - * @vcpu: The VCPU pointer - * @virt_irq: The virtual IRQ number to be unmapped - * - * Remove an existing mapping between virtual and physical interrupts. - */ -int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - struct irq_phys_map_entry *entry; - struct list_head *root; - - root = vgic_get_irq_phys_map_list(vcpu, virt_irq); - - spin_lock(&dist->irq_phys_map_lock); - - list_for_each_entry(entry, root, entry) { - if (entry->map.virt_irq == virt_irq) { - list_del_rcu(&entry->entry); - call_rcu(&entry->rcu, vgic_free_phys_irq_map_rcu); - break; - } - } - - spin_unlock(&dist->irq_phys_map_lock); - - return 0; -} - -static void vgic_destroy_irq_phys_map(struct kvm *kvm, struct list_head *root) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - struct irq_phys_map_entry *entry; - - spin_lock(&dist->irq_phys_map_lock); - - list_for_each_entry(entry, root, entry) { - list_del_rcu(&entry->entry); - call_rcu(&entry->rcu, vgic_free_phys_irq_map_rcu); - } - - spin_unlock(&dist->irq_phys_map_lock); -} - -void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) -{ - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - - kfree(vgic_cpu->pending_shared); - kfree(vgic_cpu->active_shared); - kfree(vgic_cpu->pend_act_shared); - vgic_destroy_irq_phys_map(vcpu->kvm, &vgic_cpu->irq_phys_map_list); - vgic_cpu->pending_shared = NULL; - vgic_cpu->active_shared = NULL; - vgic_cpu->pend_act_shared = NULL; -} - -static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs) -{ - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - int nr_longs = BITS_TO_LONGS(nr_irqs - VGIC_NR_PRIVATE_IRQS); - int sz = nr_longs * sizeof(unsigned long); - vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL); - vgic_cpu->active_shared = kzalloc(sz, GFP_KERNEL); - vgic_cpu->pend_act_shared = kzalloc(sz, GFP_KERNEL); - - if (!vgic_cpu->pending_shared - || !vgic_cpu->active_shared - || !vgic_cpu->pend_act_shared) { - kvm_vgic_vcpu_destroy(vcpu); - return -ENOMEM; - } - - return 0; -} - -/** - * kvm_vgic_vcpu_early_init - Earliest possible per-vcpu vgic init stage - * - * No memory allocation should be performed here, only static init. - */ -void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu) -{ - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - INIT_LIST_HEAD(&vgic_cpu->irq_phys_map_list); -} - -/** - * kvm_vgic_get_max_vcpus - Get the maximum number of VCPUs allowed by HW - * - * The host's GIC naturally limits the maximum amount of VCPUs a guest - * can use. - */ -int kvm_vgic_get_max_vcpus(void) -{ - return vgic->max_gic_vcpus; -} - -void kvm_vgic_destroy(struct kvm *kvm) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - struct kvm_vcpu *vcpu; - int i; - - kvm_for_each_vcpu(i, vcpu, kvm) - kvm_vgic_vcpu_destroy(vcpu); - - vgic_free_bitmap(&dist->irq_enabled); - vgic_free_bitmap(&dist->irq_level); - vgic_free_bitmap(&dist->irq_pending); - vgic_free_bitmap(&dist->irq_soft_pend); - vgic_free_bitmap(&dist->irq_queued); - vgic_free_bitmap(&dist->irq_cfg); - vgic_free_bytemap(&dist->irq_priority); - if (dist->irq_spi_target) { - for (i = 0; i < dist->nr_cpus; i++) - vgic_free_bitmap(&dist->irq_spi_target[i]); - } - kfree(dist->irq_sgi_sources); - kfree(dist->irq_spi_cpu); - kfree(dist->irq_spi_mpidr); - kfree(dist->irq_spi_target); - kfree(dist->irq_pending_on_cpu); - kfree(dist->irq_active_on_cpu); - vgic_destroy_irq_phys_map(kvm, &dist->irq_phys_map_list); - dist->irq_sgi_sources = NULL; - dist->irq_spi_cpu = NULL; - dist->irq_spi_target = NULL; - dist->irq_pending_on_cpu = NULL; - dist->irq_active_on_cpu = NULL; - dist->nr_cpus = 0; -} - -/* - * Allocate and initialize the various data structures. Must be called - * with kvm->lock held! - */ -int vgic_init(struct kvm *kvm) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - struct kvm_vcpu *vcpu; - int nr_cpus, nr_irqs; - int ret, i, vcpu_id; - - if (vgic_initialized(kvm)) - return 0; - - nr_cpus = dist->nr_cpus = atomic_read(&kvm->online_vcpus); - if (!nr_cpus) /* No vcpus? Can't be good... */ - return -ENODEV; - - /* - * If nobody configured the number of interrupts, use the - * legacy one. - */ - if (!dist->nr_irqs) - dist->nr_irqs = VGIC_NR_IRQS_LEGACY; - - nr_irqs = dist->nr_irqs; - - ret = vgic_init_bitmap(&dist->irq_enabled, nr_cpus, nr_irqs); - ret |= vgic_init_bitmap(&dist->irq_level, nr_cpus, nr_irqs); - ret |= vgic_init_bitmap(&dist->irq_pending, nr_cpus, nr_irqs); - ret |= vgic_init_bitmap(&dist->irq_soft_pend, nr_cpus, nr_irqs); - ret |= vgic_init_bitmap(&dist->irq_queued, nr_cpus, nr_irqs); - ret |= vgic_init_bitmap(&dist->irq_active, nr_cpus, nr_irqs); - ret |= vgic_init_bitmap(&dist->irq_cfg, nr_cpus, nr_irqs); - ret |= vgic_init_bytemap(&dist->irq_priority, nr_cpus, nr_irqs); - - if (ret) - goto out; - - dist->irq_sgi_sources = kzalloc(nr_cpus * VGIC_NR_SGIS, GFP_KERNEL); - dist->irq_spi_cpu = kzalloc(nr_irqs - VGIC_NR_PRIVATE_IRQS, GFP_KERNEL); - dist->irq_spi_target = kzalloc(sizeof(*dist->irq_spi_target) * nr_cpus, - GFP_KERNEL); - dist->irq_pending_on_cpu = kzalloc(BITS_TO_LONGS(nr_cpus) * sizeof(long), - GFP_KERNEL); - dist->irq_active_on_cpu = kzalloc(BITS_TO_LONGS(nr_cpus) * sizeof(long), - GFP_KERNEL); - if (!dist->irq_sgi_sources || - !dist->irq_spi_cpu || - !dist->irq_spi_target || - !dist->irq_pending_on_cpu || - !dist->irq_active_on_cpu) { - ret = -ENOMEM; - goto out; - } - - for (i = 0; i < nr_cpus; i++) - ret |= vgic_init_bitmap(&dist->irq_spi_target[i], - nr_cpus, nr_irqs); - - if (ret) - goto out; - - ret = kvm->arch.vgic.vm_ops.init_model(kvm); - if (ret) - goto out; - - kvm_for_each_vcpu(vcpu_id, vcpu, kvm) { - ret = vgic_vcpu_init_maps(vcpu, nr_irqs); - if (ret) { - kvm_err("VGIC: Failed to allocate vcpu memory\n"); - break; - } - - /* - * Enable and configure all SGIs to be edge-triggere and - * configure all PPIs as level-triggered. - */ - for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) { - if (i < VGIC_NR_SGIS) { - /* SGIs */ - vgic_bitmap_set_irq_val(&dist->irq_enabled, - vcpu->vcpu_id, i, 1); - vgic_bitmap_set_irq_val(&dist->irq_cfg, - vcpu->vcpu_id, i, - VGIC_CFG_EDGE); - } else if (i < VGIC_NR_PRIVATE_IRQS) { - /* PPIs */ - vgic_bitmap_set_irq_val(&dist->irq_cfg, - vcpu->vcpu_id, i, - VGIC_CFG_LEVEL); - } - } - - vgic_enable(vcpu); - } - -out: - if (ret) - kvm_vgic_destroy(kvm); - - return ret; -} - -static int init_vgic_model(struct kvm *kvm, int type) -{ - switch (type) { - case KVM_DEV_TYPE_ARM_VGIC_V2: - vgic_v2_init_emulation(kvm); - break; -#ifdef CONFIG_KVM_ARM_VGIC_V3 - case KVM_DEV_TYPE_ARM_VGIC_V3: - vgic_v3_init_emulation(kvm); - break; -#endif - default: - return -ENODEV; - } - - if (atomic_read(&kvm->online_vcpus) > kvm->arch.max_vcpus) - return -E2BIG; - - return 0; -} - -/** - * kvm_vgic_early_init - Earliest possible vgic initialization stage - * - * No memory allocation should be performed here, only static init. - */ -void kvm_vgic_early_init(struct kvm *kvm) -{ - spin_lock_init(&kvm->arch.vgic.lock); - spin_lock_init(&kvm->arch.vgic.irq_phys_map_lock); - INIT_LIST_HEAD(&kvm->arch.vgic.irq_phys_map_list); -} - -int kvm_vgic_create(struct kvm *kvm, u32 type) -{ - int i, vcpu_lock_idx = -1, ret; - struct kvm_vcpu *vcpu; - - mutex_lock(&kvm->lock); - - if (irqchip_in_kernel(kvm)) { - ret = -EEXIST; - goto out; - } - - /* - * This function is also called by the KVM_CREATE_IRQCHIP handler, - * which had no chance yet to check the availability of the GICv2 - * emulation. So check this here again. KVM_CREATE_DEVICE does - * the proper checks already. - */ - if (type == KVM_DEV_TYPE_ARM_VGIC_V2 && !vgic->can_emulate_gicv2) { - ret = -ENODEV; - goto out; - } - - /* - * Any time a vcpu is run, vcpu_load is called which tries to grab the - * vcpu->mutex. By grabbing the vcpu->mutex of all VCPUs we ensure - * that no other VCPUs are run while we create the vgic. - */ - ret = -EBUSY; - kvm_for_each_vcpu(i, vcpu, kvm) { - if (!mutex_trylock(&vcpu->mutex)) - goto out_unlock; - vcpu_lock_idx = i; - } - - kvm_for_each_vcpu(i, vcpu, kvm) { - if (vcpu->arch.has_run_once) - goto out_unlock; - } - ret = 0; - - ret = init_vgic_model(kvm, type); - if (ret) - goto out_unlock; - - kvm->arch.vgic.in_kernel = true; - kvm->arch.vgic.vgic_model = type; - kvm->arch.vgic.vctrl_base = vgic->vctrl_base; - kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; - kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; - kvm->arch.vgic.vgic_redist_base = VGIC_ADDR_UNDEF; - -out_unlock: - for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { - vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx); - mutex_unlock(&vcpu->mutex); - } - -out: - mutex_unlock(&kvm->lock); - return ret; -} - -static int vgic_ioaddr_overlap(struct kvm *kvm) -{ - phys_addr_t dist = kvm->arch.vgic.vgic_dist_base; - phys_addr_t cpu = kvm->arch.vgic.vgic_cpu_base; - - if (IS_VGIC_ADDR_UNDEF(dist) || IS_VGIC_ADDR_UNDEF(cpu)) - return 0; - if ((dist <= cpu && dist + KVM_VGIC_V2_DIST_SIZE > cpu) || - (cpu <= dist && cpu + KVM_VGIC_V2_CPU_SIZE > dist)) - return -EBUSY; - return 0; -} - -static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr, - phys_addr_t addr, phys_addr_t size) -{ - int ret; - - if (addr & ~KVM_PHYS_MASK) - return -E2BIG; - - if (addr & (SZ_4K - 1)) - return -EINVAL; - - if (!IS_VGIC_ADDR_UNDEF(*ioaddr)) - return -EEXIST; - if (addr + size < addr) - return -EINVAL; - - *ioaddr = addr; - ret = vgic_ioaddr_overlap(kvm); - if (ret) - *ioaddr = VGIC_ADDR_UNDEF; - - return ret; -} - -/** - * kvm_vgic_addr - set or get vgic VM base addresses - * @kvm: pointer to the vm struct - * @type: the VGIC addr type, one of KVM_VGIC_V[23]_ADDR_TYPE_XXX - * @addr: pointer to address value - * @write: if true set the address in the VM address space, if false read the - * address - * - * Set or get the vgic base addresses for the distributor and the virtual CPU - * interface in the VM physical address space. These addresses are properties - * of the emulated core/SoC and therefore user space initially knows this - * information. - */ -int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) -{ - int r = 0; - struct vgic_dist *vgic = &kvm->arch.vgic; - int type_needed; - phys_addr_t *addr_ptr, block_size; - phys_addr_t alignment; - - mutex_lock(&kvm->lock); - switch (type) { - case KVM_VGIC_V2_ADDR_TYPE_DIST: - type_needed = KVM_DEV_TYPE_ARM_VGIC_V2; - addr_ptr = &vgic->vgic_dist_base; - block_size = KVM_VGIC_V2_DIST_SIZE; - alignment = SZ_4K; - break; - case KVM_VGIC_V2_ADDR_TYPE_CPU: - type_needed = KVM_DEV_TYPE_ARM_VGIC_V2; - addr_ptr = &vgic->vgic_cpu_base; - block_size = KVM_VGIC_V2_CPU_SIZE; - alignment = SZ_4K; - break; -#ifdef CONFIG_KVM_ARM_VGIC_V3 - case KVM_VGIC_V3_ADDR_TYPE_DIST: - type_needed = KVM_DEV_TYPE_ARM_VGIC_V3; - addr_ptr = &vgic->vgic_dist_base; - block_size = KVM_VGIC_V3_DIST_SIZE; - alignment = SZ_64K; - break; - case KVM_VGIC_V3_ADDR_TYPE_REDIST: - type_needed = KVM_DEV_TYPE_ARM_VGIC_V3; - addr_ptr = &vgic->vgic_redist_base; - block_size = KVM_VGIC_V3_REDIST_SIZE; - alignment = SZ_64K; - break; -#endif - default: - r = -ENODEV; - goto out; - } - - if (vgic->vgic_model != type_needed) { - r = -ENODEV; - goto out; - } - - if (write) { - if (!IS_ALIGNED(*addr, alignment)) - r = -EINVAL; - else - r = vgic_ioaddr_assign(kvm, addr_ptr, *addr, - block_size); - } else { - *addr = *addr_ptr; - } - -out: - mutex_unlock(&kvm->lock); - return r; -} - -int vgic_set_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr) -{ - int r; - - switch (attr->group) { - case KVM_DEV_ARM_VGIC_GRP_ADDR: { - u64 __user *uaddr = (u64 __user *)(long)attr->addr; - u64 addr; - unsigned long type = (unsigned long)attr->attr; - - if (copy_from_user(&addr, uaddr, sizeof(addr))) - return -EFAULT; - - r = kvm_vgic_addr(dev->kvm, type, &addr, true); - return (r == -ENODEV) ? -ENXIO : r; - } - case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: { - u32 __user *uaddr = (u32 __user *)(long)attr->addr; - u32 val; - int ret = 0; - - if (get_user(val, uaddr)) - return -EFAULT; - - /* - * We require: - * - at least 32 SPIs on top of the 16 SGIs and 16 PPIs - * - at most 1024 interrupts - * - a multiple of 32 interrupts - */ - if (val < (VGIC_NR_PRIVATE_IRQS + 32) || - val > VGIC_MAX_IRQS || - (val & 31)) - return -EINVAL; - - mutex_lock(&dev->kvm->lock); - - if (vgic_ready(dev->kvm) || dev->kvm->arch.vgic.nr_irqs) - ret = -EBUSY; - else - dev->kvm->arch.vgic.nr_irqs = val; - - mutex_unlock(&dev->kvm->lock); - - return ret; - } - case KVM_DEV_ARM_VGIC_GRP_CTRL: { - switch (attr->attr) { - case KVM_DEV_ARM_VGIC_CTRL_INIT: - r = vgic_init(dev->kvm); - return r; - } - break; - } - } - - return -ENXIO; -} - -int vgic_get_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr) -{ - int r = -ENXIO; - - switch (attr->group) { - case KVM_DEV_ARM_VGIC_GRP_ADDR: { - u64 __user *uaddr = (u64 __user *)(long)attr->addr; - u64 addr; - unsigned long type = (unsigned long)attr->attr; - - r = kvm_vgic_addr(dev->kvm, type, &addr, false); - if (r) - return (r == -ENODEV) ? -ENXIO : r; - - if (copy_to_user(uaddr, &addr, sizeof(addr))) - return -EFAULT; - break; - } - case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: { - u32 __user *uaddr = (u32 __user *)(long)attr->addr; - - r = put_user(dev->kvm->arch.vgic.nr_irqs, uaddr); - break; - } - - } - - return r; -} - -int vgic_has_attr_regs(const struct vgic_io_range *ranges, phys_addr_t offset) -{ - if (vgic_find_range(ranges, 4, offset)) - return 0; - else - return -ENXIO; -} - -static int vgic_starting_cpu(unsigned int cpu) -{ - enable_percpu_irq(vgic->maint_irq, 0); - return 0; -} - -static int vgic_dying_cpu(unsigned int cpu) -{ - disable_percpu_irq(vgic->maint_irq); - return 0; -} - -static int kvm_vgic_probe(void) -{ - const struct gic_kvm_info *gic_kvm_info; - int ret; - - gic_kvm_info = gic_get_kvm_info(); - if (!gic_kvm_info) - return -ENODEV; - - switch (gic_kvm_info->type) { - case GIC_V2: - ret = vgic_v2_probe(gic_kvm_info, &vgic_ops, &vgic); - break; - case GIC_V3: - ret = vgic_v3_probe(gic_kvm_info, &vgic_ops, &vgic); - break; - default: - ret = -ENODEV; - } - - return ret; -} - -int kvm_vgic_hyp_init(void) -{ - int ret; - - ret = kvm_vgic_probe(); - if (ret) { - kvm_err("error: KVM vGIC probing failed\n"); - return ret; - } - - ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler, - "vgic", kvm_get_running_vcpus()); - if (ret) { - kvm_err("Cannot register interrupt %d\n", vgic->maint_irq); - return ret; - } - - cpuhp_setup_state(CPUHP_AP_KVM_ARM_VGIC_STARTING, - "AP_KVM_ARM_VGIC_STARTING", vgic_starting_cpu, - vgic_dying_cpu); - return 0; -} - -int kvm_irq_map_gsi(struct kvm *kvm, - struct kvm_kernel_irq_routing_entry *entries, - int gsi) -{ - return 0; -} - -int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin) -{ - return pin; -} - -int kvm_set_irq(struct kvm *kvm, int irq_source_id, - u32 irq, int level, bool line_status) -{ - unsigned int spi = irq + VGIC_NR_PRIVATE_IRQS; - - trace_kvm_set_irq(irq, level, irq_source_id); - - BUG_ON(!vgic_initialized(kvm)); - - return kvm_vgic_inject_irq(kvm, 0, spi, level); -} - -/* MSI not implemented yet */ -int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int irq_source_id, - int level, bool line_status) -{ - return 0; -} diff --git a/virt/kvm/arm/vgic.h b/virt/kvm/arm/vgic.h deleted file mode 100644 index 0df74cbb6200..000000000000 --- a/virt/kvm/arm/vgic.h +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Copyright (C) 2012-2014 ARM Ltd. - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * Derived from virt/kvm/arm/vgic.c - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#ifndef __KVM_VGIC_H__ -#define __KVM_VGIC_H__ - -#include <kvm/iodev.h> - -#define VGIC_ADDR_UNDEF (-1) -#define IS_VGIC_ADDR_UNDEF(_x) ((_x) == VGIC_ADDR_UNDEF) - -#define PRODUCT_ID_KVM 0x4b /* ASCII code K */ -#define IMPLEMENTER_ARM 0x43b - -#define ACCESS_READ_VALUE (1 << 0) -#define ACCESS_READ_RAZ (0 << 0) -#define ACCESS_READ_MASK(x) ((x) & (1 << 0)) -#define ACCESS_WRITE_IGNORED (0 << 1) -#define ACCESS_WRITE_SETBIT (1 << 1) -#define ACCESS_WRITE_CLEARBIT (2 << 1) -#define ACCESS_WRITE_VALUE (3 << 1) -#define ACCESS_WRITE_MASK(x) ((x) & (3 << 1)) - -#define VCPU_NOT_ALLOCATED ((u8)-1) - -unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x); - -void vgic_update_state(struct kvm *kvm); -int vgic_init_common_maps(struct kvm *kvm); - -u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, int cpuid, u32 offset); -u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset); - -void vgic_dist_irq_set_pending(struct kvm_vcpu *vcpu, int irq); -void vgic_dist_irq_clear_pending(struct kvm_vcpu *vcpu, int irq); -void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq); -void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid, - int irq, int val); - -void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); -void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); - -bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq); -void vgic_unqueue_irqs(struct kvm_vcpu *vcpu); - -struct kvm_exit_mmio { - phys_addr_t phys_addr; - void *data; - u32 len; - bool is_write; - void *private; -}; - -void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg, - phys_addr_t offset, int mode); -bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, - phys_addr_t offset); - -static inline -u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask) -{ - return le32_to_cpu(*((u32 *)mmio->data)) & mask; -} - -static inline -void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value) -{ - *((u32 *)mmio->data) = cpu_to_le32(value) & mask; -} - -struct vgic_io_range { - phys_addr_t base; - unsigned long len; - int bits_per_irq; - bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, - phys_addr_t offset); -}; - -int vgic_register_kvm_io_dev(struct kvm *kvm, gpa_t base, int len, - const struct vgic_io_range *ranges, - int redist_id, - struct vgic_io_device *iodev); - -static inline bool is_in_range(phys_addr_t addr, unsigned long len, - phys_addr_t baseaddr, unsigned long size) -{ - return (addr >= baseaddr) && (addr + len <= baseaddr + size); -} - -const -struct vgic_io_range *vgic_find_range(const struct vgic_io_range *ranges, - int len, gpa_t offset); - -bool vgic_handle_enable_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio, - phys_addr_t offset, int vcpu_id, int access); - -bool vgic_handle_set_pending_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio, - phys_addr_t offset, int vcpu_id); - -bool vgic_handle_clear_pending_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio, - phys_addr_t offset, int vcpu_id); - -bool vgic_handle_set_active_reg(struct kvm *kvm, - struct kvm_exit_mmio *mmio, - phys_addr_t offset, int vcpu_id); - -bool vgic_handle_clear_active_reg(struct kvm *kvm, - struct kvm_exit_mmio *mmio, - phys_addr_t offset, int vcpu_id); - -bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio, - phys_addr_t offset); - -void vgic_kick_vcpus(struct kvm *kvm); - -int vgic_has_attr_regs(const struct vgic_io_range *ranges, phys_addr_t offset); -int vgic_set_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr); -int vgic_get_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr); - -int vgic_init(struct kvm *kvm); -void vgic_v2_init_emulation(struct kvm *kvm); -void vgic_v3_init_emulation(struct kvm *kvm); - -#endif diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index 2c7f0d5a62ea..1e30ce08700d 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -157,6 +157,9 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis) struct kvm_vcpu *vcpu0 = kvm_get_vcpu(kvm, 0); int i; + INIT_LIST_HEAD(&dist->lpi_list_head); + spin_lock_init(&dist->lpi_list_lock); + dist->spis = kcalloc(nr_spis, sizeof(struct vgic_irq), GFP_KERNEL); if (!dist->spis) return -ENOMEM; @@ -177,6 +180,7 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis) spin_lock_init(&irq->irq_lock); irq->vcpu = NULL; irq->target_vcpu = vcpu0; + kref_init(&irq->refcount); if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) irq->targets = 0; else @@ -211,6 +215,7 @@ static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) irq->vcpu = NULL; irq->target_vcpu = vcpu; irq->targets = 1U << vcpu->vcpu_id; + kref_init(&irq->refcount); if (vgic_irq_is_sgi(i)) { /* SGIs */ irq->enabled = 1; @@ -253,6 +258,9 @@ int vgic_init(struct kvm *kvm) if (ret) goto out; + if (vgic_has_its(kvm)) + dist->msis_require_devid = true; + kvm_for_each_vcpu(i, vcpu, kvm) kvm_vgic_vcpu_init(vcpu); @@ -271,7 +279,6 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm) dist->initialized = false; kfree(dist->spis); - kfree(dist->redist_iodevs); dist->nr_spis = 0; mutex_unlock(&kvm->lock); diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c new file mode 100644 index 000000000000..07411cf967b9 --- /dev/null +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -0,0 +1,1500 @@ +/* + * GICv3 ITS emulation + * + * Copyright (C) 2015,2016 ARM Ltd. + * Author: Andre Przywara <andre.przywara@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/cpu.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/interrupt.h> +#include <linux/list.h> +#include <linux/uaccess.h> + +#include <linux/irqchip/arm-gic-v3.h> + +#include <asm/kvm_emulate.h> +#include <asm/kvm_arm.h> +#include <asm/kvm_mmu.h> + +#include "vgic.h" +#include "vgic-mmio.h" + +/* + * Creates a new (reference to a) struct vgic_irq for a given LPI. + * If this LPI is already mapped on another ITS, we increase its refcount + * and return a pointer to the existing structure. + * If this is a "new" LPI, we allocate and initialize a new struct vgic_irq. + * This function returns a pointer to the _unlocked_ structure. + */ +static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct vgic_irq *irq = vgic_get_irq(kvm, NULL, intid), *oldirq; + + /* In this case there is no put, since we keep the reference. */ + if (irq) + return irq; + + irq = kzalloc(sizeof(struct vgic_irq), GFP_KERNEL); + if (!irq) + return NULL; + + INIT_LIST_HEAD(&irq->lpi_list); + INIT_LIST_HEAD(&irq->ap_list); + spin_lock_init(&irq->irq_lock); + + irq->config = VGIC_CONFIG_EDGE; + kref_init(&irq->refcount); + irq->intid = intid; + + spin_lock(&dist->lpi_list_lock); + + /* + * There could be a race with another vgic_add_lpi(), so we need to + * check that we don't add a second list entry with the same LPI. + */ + list_for_each_entry(oldirq, &dist->lpi_list_head, lpi_list) { + if (oldirq->intid != intid) + continue; + + /* Someone was faster with adding this LPI, lets use that. */ + kfree(irq); + irq = oldirq; + + /* + * This increases the refcount, the caller is expected to + * call vgic_put_irq() on the returned pointer once it's + * finished with the IRQ. + */ + vgic_get_irq_kref(irq); + + goto out_unlock; + } + + list_add_tail(&irq->lpi_list, &dist->lpi_list_head); + dist->lpi_list_count++; + +out_unlock: + spin_unlock(&dist->lpi_list_lock); + + return irq; +} + +struct its_device { + struct list_head dev_list; + + /* the head for the list of ITTEs */ + struct list_head itt_head; + u32 device_id; +}; + +#define COLLECTION_NOT_MAPPED ((u32)~0) + +struct its_collection { + struct list_head coll_list; + + u32 collection_id; + u32 target_addr; +}; + +#define its_is_collection_mapped(coll) ((coll) && \ + ((coll)->target_addr != COLLECTION_NOT_MAPPED)) + +struct its_itte { + struct list_head itte_list; + + struct vgic_irq *irq; + struct its_collection *collection; + u32 lpi; + u32 event_id; +}; + +/* + * Find and returns a device in the device table for an ITS. + * Must be called with the its_lock mutex held. + */ +static struct its_device *find_its_device(struct vgic_its *its, u32 device_id) +{ + struct its_device *device; + + list_for_each_entry(device, &its->device_list, dev_list) + if (device_id == device->device_id) + return device; + + return NULL; +} + +/* + * Find and returns an interrupt translation table entry (ITTE) for a given + * Device ID/Event ID pair on an ITS. + * Must be called with the its_lock mutex held. + */ +static struct its_itte *find_itte(struct vgic_its *its, u32 device_id, + u32 event_id) +{ + struct its_device *device; + struct its_itte *itte; + + device = find_its_device(its, device_id); + if (device == NULL) + return NULL; + + list_for_each_entry(itte, &device->itt_head, itte_list) + if (itte->event_id == event_id) + return itte; + + return NULL; +} + +/* To be used as an iterator this macro misses the enclosing parentheses */ +#define for_each_lpi_its(dev, itte, its) \ + list_for_each_entry(dev, &(its)->device_list, dev_list) \ + list_for_each_entry(itte, &(dev)->itt_head, itte_list) + +/* + * We only implement 48 bits of PA at the moment, although the ITS + * supports more. Let's be restrictive here. + */ +#define BASER_ADDRESS(x) ((x) & GENMASK_ULL(47, 16)) +#define CBASER_ADDRESS(x) ((x) & GENMASK_ULL(47, 12)) +#define PENDBASER_ADDRESS(x) ((x) & GENMASK_ULL(47, 16)) +#define PROPBASER_ADDRESS(x) ((x) & GENMASK_ULL(47, 12)) + +#define GIC_LPI_OFFSET 8192 + +/* + * Finds and returns a collection in the ITS collection table. + * Must be called with the its_lock mutex held. + */ +static struct its_collection *find_collection(struct vgic_its *its, int coll_id) +{ + struct its_collection *collection; + + list_for_each_entry(collection, &its->collection_list, coll_list) { + if (coll_id == collection->collection_id) + return collection; + } + + return NULL; +} + +#define LPI_PROP_ENABLE_BIT(p) ((p) & LPI_PROP_ENABLED) +#define LPI_PROP_PRIORITY(p) ((p) & 0xfc) + +/* + * Reads the configuration data for a given LPI from guest memory and + * updates the fields in struct vgic_irq. + * If filter_vcpu is not NULL, applies only if the IRQ is targeting this + * VCPU. Unconditionally applies if filter_vcpu is NULL. + */ +static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq, + struct kvm_vcpu *filter_vcpu) +{ + u64 propbase = PROPBASER_ADDRESS(kvm->arch.vgic.propbaser); + u8 prop; + int ret; + + ret = kvm_read_guest(kvm, propbase + irq->intid - GIC_LPI_OFFSET, + &prop, 1); + + if (ret) + return ret; + + spin_lock(&irq->irq_lock); + + if (!filter_vcpu || filter_vcpu == irq->target_vcpu) { + irq->priority = LPI_PROP_PRIORITY(prop); + irq->enabled = LPI_PROP_ENABLE_BIT(prop); + + vgic_queue_irq_unlock(kvm, irq); + } else { + spin_unlock(&irq->irq_lock); + } + + return 0; +} + +/* + * Create a snapshot of the current LPI list, so that we can enumerate all + * LPIs without holding any lock. + * Returns the array length and puts the kmalloc'ed array into intid_ptr. + */ +static int vgic_copy_lpi_list(struct kvm *kvm, u32 **intid_ptr) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct vgic_irq *irq; + u32 *intids; + int irq_count = dist->lpi_list_count, i = 0; + + /* + * We use the current value of the list length, which may change + * after the kmalloc. We don't care, because the guest shouldn't + * change anything while the command handling is still running, + * and in the worst case we would miss a new IRQ, which one wouldn't + * expect to be covered by this command anyway. + */ + intids = kmalloc_array(irq_count, sizeof(intids[0]), GFP_KERNEL); + if (!intids) + return -ENOMEM; + + spin_lock(&dist->lpi_list_lock); + list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) { + /* We don't need to "get" the IRQ, as we hold the list lock. */ + intids[i] = irq->intid; + if (++i == irq_count) + break; + } + spin_unlock(&dist->lpi_list_lock); + + *intid_ptr = intids; + return irq_count; +} + +/* + * Promotes the ITS view of affinity of an ITTE (which redistributor this LPI + * is targeting) to the VGIC's view, which deals with target VCPUs. + * Needs to be called whenever either the collection for a LPIs has + * changed or the collection itself got retargeted. + */ +static void update_affinity_itte(struct kvm *kvm, struct its_itte *itte) +{ + struct kvm_vcpu *vcpu; + + if (!its_is_collection_mapped(itte->collection)) + return; + + vcpu = kvm_get_vcpu(kvm, itte->collection->target_addr); + + spin_lock(&itte->irq->irq_lock); + itte->irq->target_vcpu = vcpu; + spin_unlock(&itte->irq->irq_lock); +} + +/* + * Updates the target VCPU for every LPI targeting this collection. + * Must be called with the its_lock mutex held. + */ +static void update_affinity_collection(struct kvm *kvm, struct vgic_its *its, + struct its_collection *coll) +{ + struct its_device *device; + struct its_itte *itte; + + for_each_lpi_its(device, itte, its) { + if (!itte->collection || coll != itte->collection) + continue; + + update_affinity_itte(kvm, itte); + } +} + +static u32 max_lpis_propbaser(u64 propbaser) +{ + int nr_idbits = (propbaser & 0x1f) + 1; + + return 1U << min(nr_idbits, INTERRUPT_ID_BITS_ITS); +} + +/* + * Scan the whole LPI pending table and sync the pending bit in there + * with our own data structures. This relies on the LPI being + * mapped before. + */ +static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu) +{ + gpa_t pendbase = PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser); + struct vgic_irq *irq; + int last_byte_offset = -1; + int ret = 0; + u32 *intids; + int nr_irqs, i; + + nr_irqs = vgic_copy_lpi_list(vcpu->kvm, &intids); + if (nr_irqs < 0) + return nr_irqs; + + for (i = 0; i < nr_irqs; i++) { + int byte_offset, bit_nr; + u8 pendmask; + + byte_offset = intids[i] / BITS_PER_BYTE; + bit_nr = intids[i] % BITS_PER_BYTE; + + /* + * For contiguously allocated LPIs chances are we just read + * this very same byte in the last iteration. Reuse that. + */ + if (byte_offset != last_byte_offset) { + ret = kvm_read_guest(vcpu->kvm, pendbase + byte_offset, + &pendmask, 1); + if (ret) { + kfree(intids); + return ret; + } + last_byte_offset = byte_offset; + } + + irq = vgic_get_irq(vcpu->kvm, NULL, intids[i]); + spin_lock(&irq->irq_lock); + irq->pending = pendmask & (1U << bit_nr); + vgic_queue_irq_unlock(vcpu->kvm, irq); + vgic_put_irq(vcpu->kvm, irq); + } + + kfree(intids); + + return ret; +} + +static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu, + struct vgic_its *its, + gpa_t addr, unsigned int len) +{ + u32 reg = 0; + + mutex_lock(&its->cmd_lock); + if (its->creadr == its->cwriter) + reg |= GITS_CTLR_QUIESCENT; + if (its->enabled) + reg |= GITS_CTLR_ENABLE; + mutex_unlock(&its->cmd_lock); + + return reg; +} + +static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val) +{ + its->enabled = !!(val & GITS_CTLR_ENABLE); +} + +static unsigned long vgic_mmio_read_its_typer(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len) +{ + u64 reg = GITS_TYPER_PLPIS; + + /* + * We use linear CPU numbers for redistributor addressing, + * so GITS_TYPER.PTA is 0. + * Also we force all PROPBASER registers to be the same, so + * CommonLPIAff is 0 as well. + * To avoid memory waste in the guest, we keep the number of IDBits and + * DevBits low - as least for the time being. + */ + reg |= 0x0f << GITS_TYPER_DEVBITS_SHIFT; + reg |= 0x0f << GITS_TYPER_IDBITS_SHIFT; + + return extract_bytes(reg, addr & 7, len); +} + +static unsigned long vgic_mmio_read_its_iidr(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len) +{ + return (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0); +} + +static unsigned long vgic_mmio_read_its_idregs(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len) +{ + switch (addr & 0xffff) { + case GITS_PIDR0: + return 0x92; /* part number, bits[7:0] */ + case GITS_PIDR1: + return 0xb4; /* part number, bits[11:8] */ + case GITS_PIDR2: + return GIC_PIDR2_ARCH_GICv3 | 0x0b; + case GITS_PIDR4: + return 0x40; /* This is a 64K software visible page */ + /* The following are the ID registers for (any) GIC. */ + case GITS_CIDR0: + return 0x0d; + case GITS_CIDR1: + return 0xf0; + case GITS_CIDR2: + return 0x05; + case GITS_CIDR3: + return 0xb1; + } + + return 0; +} + +/* + * Find the target VCPU and the LPI number for a given devid/eventid pair + * and make this IRQ pending, possibly injecting it. + * Must be called with the its_lock mutex held. + */ +static void vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its, + u32 devid, u32 eventid) +{ + struct its_itte *itte; + + if (!its->enabled) + return; + + itte = find_itte(its, devid, eventid); + /* Triggering an unmapped IRQ gets silently dropped. */ + if (itte && its_is_collection_mapped(itte->collection)) { + struct kvm_vcpu *vcpu; + + vcpu = kvm_get_vcpu(kvm, itte->collection->target_addr); + if (vcpu && vcpu->arch.vgic_cpu.lpis_enabled) { + spin_lock(&itte->irq->irq_lock); + itte->irq->pending = true; + vgic_queue_irq_unlock(kvm, itte->irq); + } + } +} + +/* + * Queries the KVM IO bus framework to get the ITS pointer from the given + * doorbell address. + * We then call vgic_its_trigger_msi() with the decoded data. + */ +int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi) +{ + u64 address; + struct kvm_io_device *kvm_io_dev; + struct vgic_io_device *iodev; + + if (!vgic_has_its(kvm)) + return -ENODEV; + + if (!(msi->flags & KVM_MSI_VALID_DEVID)) + return -EINVAL; + + address = (u64)msi->address_hi << 32 | msi->address_lo; + + kvm_io_dev = kvm_io_bus_get_dev(kvm, KVM_MMIO_BUS, address); + if (!kvm_io_dev) + return -ENODEV; + + iodev = container_of(kvm_io_dev, struct vgic_io_device, dev); + + mutex_lock(&iodev->its->its_lock); + vgic_its_trigger_msi(kvm, iodev->its, msi->devid, msi->data); + mutex_unlock(&iodev->its->its_lock); + + return 0; +} + +/* Requires the its_lock to be held. */ +static void its_free_itte(struct kvm *kvm, struct its_itte *itte) +{ + list_del(&itte->itte_list); + + /* This put matches the get in vgic_add_lpi. */ + vgic_put_irq(kvm, itte->irq); + + kfree(itte); +} + +static u64 its_cmd_mask_field(u64 *its_cmd, int word, int shift, int size) +{ + return (le64_to_cpu(its_cmd[word]) >> shift) & (BIT_ULL(size) - 1); +} + +#define its_cmd_get_command(cmd) its_cmd_mask_field(cmd, 0, 0, 8) +#define its_cmd_get_deviceid(cmd) its_cmd_mask_field(cmd, 0, 32, 32) +#define its_cmd_get_id(cmd) its_cmd_mask_field(cmd, 1, 0, 32) +#define its_cmd_get_physical_id(cmd) its_cmd_mask_field(cmd, 1, 32, 32) +#define its_cmd_get_collection(cmd) its_cmd_mask_field(cmd, 2, 0, 16) +#define its_cmd_get_target_addr(cmd) its_cmd_mask_field(cmd, 2, 16, 32) +#define its_cmd_get_validbit(cmd) its_cmd_mask_field(cmd, 2, 63, 1) + +/* + * The DISCARD command frees an Interrupt Translation Table Entry (ITTE). + * Must be called with the its_lock mutex held. + */ +static int vgic_its_cmd_handle_discard(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u32 device_id = its_cmd_get_deviceid(its_cmd); + u32 event_id = its_cmd_get_id(its_cmd); + struct its_itte *itte; + + + itte = find_itte(its, device_id, event_id); + if (itte && itte->collection) { + /* + * Though the spec talks about removing the pending state, we + * don't bother here since we clear the ITTE anyway and the + * pending state is a property of the ITTE struct. + */ + its_free_itte(kvm, itte); + return 0; + } + + return E_ITS_DISCARD_UNMAPPED_INTERRUPT; +} + +/* + * The MOVI command moves an ITTE to a different collection. + * Must be called with the its_lock mutex held. + */ +static int vgic_its_cmd_handle_movi(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u32 device_id = its_cmd_get_deviceid(its_cmd); + u32 event_id = its_cmd_get_id(its_cmd); + u32 coll_id = its_cmd_get_collection(its_cmd); + struct kvm_vcpu *vcpu; + struct its_itte *itte; + struct its_collection *collection; + + itte = find_itte(its, device_id, event_id); + if (!itte) + return E_ITS_MOVI_UNMAPPED_INTERRUPT; + + if (!its_is_collection_mapped(itte->collection)) + return E_ITS_MOVI_UNMAPPED_COLLECTION; + + collection = find_collection(its, coll_id); + if (!its_is_collection_mapped(collection)) + return E_ITS_MOVI_UNMAPPED_COLLECTION; + + itte->collection = collection; + vcpu = kvm_get_vcpu(kvm, collection->target_addr); + + spin_lock(&itte->irq->irq_lock); + itte->irq->target_vcpu = vcpu; + spin_unlock(&itte->irq->irq_lock); + + return 0; +} + +/* + * Check whether an ID can be stored into the corresponding guest table. + * For a direct table this is pretty easy, but gets a bit nasty for + * indirect tables. We check whether the resulting guest physical address + * is actually valid (covered by a memslot and guest accessbible). + * For this we have to read the respective first level entry. + */ +static bool vgic_its_check_id(struct vgic_its *its, u64 baser, int id) +{ + int l1_tbl_size = GITS_BASER_NR_PAGES(baser) * SZ_64K; + int index; + u64 indirect_ptr; + gfn_t gfn; + + if (!(baser & GITS_BASER_INDIRECT)) { + phys_addr_t addr; + + if (id >= (l1_tbl_size / GITS_BASER_ENTRY_SIZE(baser))) + return false; + + addr = BASER_ADDRESS(baser) + id * GITS_BASER_ENTRY_SIZE(baser); + gfn = addr >> PAGE_SHIFT; + + return kvm_is_visible_gfn(its->dev->kvm, gfn); + } + + /* calculate and check the index into the 1st level */ + index = id / (SZ_64K / GITS_BASER_ENTRY_SIZE(baser)); + if (index >= (l1_tbl_size / sizeof(u64))) + return false; + + /* Each 1st level entry is represented by a 64-bit value. */ + if (kvm_read_guest(its->dev->kvm, + BASER_ADDRESS(baser) + index * sizeof(indirect_ptr), + &indirect_ptr, sizeof(indirect_ptr))) + return false; + + indirect_ptr = le64_to_cpu(indirect_ptr); + + /* check the valid bit of the first level entry */ + if (!(indirect_ptr & BIT_ULL(63))) + return false; + + /* + * Mask the guest physical address and calculate the frame number. + * Any address beyond our supported 48 bits of PA will be caught + * by the actual check in the final step. + */ + indirect_ptr &= GENMASK_ULL(51, 16); + + /* Find the address of the actual entry */ + index = id % (SZ_64K / GITS_BASER_ENTRY_SIZE(baser)); + indirect_ptr += index * GITS_BASER_ENTRY_SIZE(baser); + gfn = indirect_ptr >> PAGE_SHIFT; + + return kvm_is_visible_gfn(its->dev->kvm, gfn); +} + +static int vgic_its_alloc_collection(struct vgic_its *its, + struct its_collection **colp, + u32 coll_id) +{ + struct its_collection *collection; + + if (!vgic_its_check_id(its, its->baser_coll_table, coll_id)) + return E_ITS_MAPC_COLLECTION_OOR; + + collection = kzalloc(sizeof(*collection), GFP_KERNEL); + + collection->collection_id = coll_id; + collection->target_addr = COLLECTION_NOT_MAPPED; + + list_add_tail(&collection->coll_list, &its->collection_list); + *colp = collection; + + return 0; +} + +static void vgic_its_free_collection(struct vgic_its *its, u32 coll_id) +{ + struct its_collection *collection; + struct its_device *device; + struct its_itte *itte; + + /* + * Clearing the mapping for that collection ID removes the + * entry from the list. If there wasn't any before, we can + * go home early. + */ + collection = find_collection(its, coll_id); + if (!collection) + return; + + for_each_lpi_its(device, itte, its) + if (itte->collection && + itte->collection->collection_id == coll_id) + itte->collection = NULL; + + list_del(&collection->coll_list); + kfree(collection); +} + +/* + * The MAPTI and MAPI commands map LPIs to ITTEs. + * Must be called with its_lock mutex held. + */ +static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u32 device_id = its_cmd_get_deviceid(its_cmd); + u32 event_id = its_cmd_get_id(its_cmd); + u32 coll_id = its_cmd_get_collection(its_cmd); + struct its_itte *itte; + struct its_device *device; + struct its_collection *collection, *new_coll = NULL; + int lpi_nr; + + device = find_its_device(its, device_id); + if (!device) + return E_ITS_MAPTI_UNMAPPED_DEVICE; + + if (its_cmd_get_command(its_cmd) == GITS_CMD_MAPTI) + lpi_nr = its_cmd_get_physical_id(its_cmd); + else + lpi_nr = event_id; + if (lpi_nr < GIC_LPI_OFFSET || + lpi_nr >= max_lpis_propbaser(kvm->arch.vgic.propbaser)) + return E_ITS_MAPTI_PHYSICALID_OOR; + + collection = find_collection(its, coll_id); + if (!collection) { + int ret = vgic_its_alloc_collection(its, &collection, coll_id); + if (ret) + return ret; + new_coll = collection; + } + + itte = find_itte(its, device_id, event_id); + if (!itte) { + itte = kzalloc(sizeof(struct its_itte), GFP_KERNEL); + if (!itte) { + if (new_coll) + vgic_its_free_collection(its, coll_id); + return -ENOMEM; + } + + itte->event_id = event_id; + list_add_tail(&itte->itte_list, &device->itt_head); + } + + itte->collection = collection; + itte->lpi = lpi_nr; + itte->irq = vgic_add_lpi(kvm, lpi_nr); + update_affinity_itte(kvm, itte); + + /* + * We "cache" the configuration table entries in out struct vgic_irq's. + * However we only have those structs for mapped IRQs, so we read in + * the respective config data from memory here upon mapping the LPI. + */ + update_lpi_config(kvm, itte->irq, NULL); + + return 0; +} + +/* Requires the its_lock to be held. */ +static void vgic_its_unmap_device(struct kvm *kvm, struct its_device *device) +{ + struct its_itte *itte, *temp; + + /* + * The spec says that unmapping a device with still valid + * ITTEs associated is UNPREDICTABLE. We remove all ITTEs, + * since we cannot leave the memory unreferenced. + */ + list_for_each_entry_safe(itte, temp, &device->itt_head, itte_list) + its_free_itte(kvm, itte); + + list_del(&device->dev_list); + kfree(device); +} + +/* + * MAPD maps or unmaps a device ID to Interrupt Translation Tables (ITTs). + * Must be called with the its_lock mutex held. + */ +static int vgic_its_cmd_handle_mapd(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u32 device_id = its_cmd_get_deviceid(its_cmd); + bool valid = its_cmd_get_validbit(its_cmd); + struct its_device *device; + + if (!vgic_its_check_id(its, its->baser_device_table, device_id)) + return E_ITS_MAPD_DEVICE_OOR; + + device = find_its_device(its, device_id); + + /* + * The spec says that calling MAPD on an already mapped device + * invalidates all cached data for this device. We implement this + * by removing the mapping and re-establishing it. + */ + if (device) + vgic_its_unmap_device(kvm, device); + + /* + * The spec does not say whether unmapping a not-mapped device + * is an error, so we are done in any case. + */ + if (!valid) + return 0; + + device = kzalloc(sizeof(struct its_device), GFP_KERNEL); + if (!device) + return -ENOMEM; + + device->device_id = device_id; + INIT_LIST_HEAD(&device->itt_head); + + list_add_tail(&device->dev_list, &its->device_list); + + return 0; +} + +/* + * The MAPC command maps collection IDs to redistributors. + * Must be called with the its_lock mutex held. + */ +static int vgic_its_cmd_handle_mapc(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u16 coll_id; + u32 target_addr; + struct its_collection *collection; + bool valid; + + valid = its_cmd_get_validbit(its_cmd); + coll_id = its_cmd_get_collection(its_cmd); + target_addr = its_cmd_get_target_addr(its_cmd); + + if (target_addr >= atomic_read(&kvm->online_vcpus)) + return E_ITS_MAPC_PROCNUM_OOR; + + if (!valid) { + vgic_its_free_collection(its, coll_id); + } else { + collection = find_collection(its, coll_id); + + if (!collection) { + int ret; + + ret = vgic_its_alloc_collection(its, &collection, + coll_id); + if (ret) + return ret; + collection->target_addr = target_addr; + } else { + collection->target_addr = target_addr; + update_affinity_collection(kvm, its, collection); + } + } + + return 0; +} + +/* + * The CLEAR command removes the pending state for a particular LPI. + * Must be called with the its_lock mutex held. + */ +static int vgic_its_cmd_handle_clear(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u32 device_id = its_cmd_get_deviceid(its_cmd); + u32 event_id = its_cmd_get_id(its_cmd); + struct its_itte *itte; + + + itte = find_itte(its, device_id, event_id); + if (!itte) + return E_ITS_CLEAR_UNMAPPED_INTERRUPT; + + itte->irq->pending = false; + + return 0; +} + +/* + * The INV command syncs the configuration bits from the memory table. + * Must be called with the its_lock mutex held. + */ +static int vgic_its_cmd_handle_inv(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u32 device_id = its_cmd_get_deviceid(its_cmd); + u32 event_id = its_cmd_get_id(its_cmd); + struct its_itte *itte; + + + itte = find_itte(its, device_id, event_id); + if (!itte) + return E_ITS_INV_UNMAPPED_INTERRUPT; + + return update_lpi_config(kvm, itte->irq, NULL); +} + +/* + * The INVALL command requests flushing of all IRQ data in this collection. + * Find the VCPU mapped to that collection, then iterate over the VM's list + * of mapped LPIs and update the configuration for each IRQ which targets + * the specified vcpu. The configuration will be read from the in-memory + * configuration table. + * Must be called with the its_lock mutex held. + */ +static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u32 coll_id = its_cmd_get_collection(its_cmd); + struct its_collection *collection; + struct kvm_vcpu *vcpu; + struct vgic_irq *irq; + u32 *intids; + int irq_count, i; + + collection = find_collection(its, coll_id); + if (!its_is_collection_mapped(collection)) + return E_ITS_INVALL_UNMAPPED_COLLECTION; + + vcpu = kvm_get_vcpu(kvm, collection->target_addr); + + irq_count = vgic_copy_lpi_list(kvm, &intids); + if (irq_count < 0) + return irq_count; + + for (i = 0; i < irq_count; i++) { + irq = vgic_get_irq(kvm, NULL, intids[i]); + if (!irq) + continue; + update_lpi_config(kvm, irq, vcpu); + vgic_put_irq(kvm, irq); + } + + kfree(intids); + + return 0; +} + +/* + * The MOVALL command moves the pending state of all IRQs targeting one + * redistributor to another. We don't hold the pending state in the VCPUs, + * but in the IRQs instead, so there is really not much to do for us here. + * However the spec says that no IRQ must target the old redistributor + * afterwards, so we make sure that no LPI is using the associated target_vcpu. + * This command affects all LPIs in the system that target that redistributor. + */ +static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + u32 target1_addr = its_cmd_get_target_addr(its_cmd); + u32 target2_addr = its_cmd_mask_field(its_cmd, 3, 16, 32); + struct kvm_vcpu *vcpu1, *vcpu2; + struct vgic_irq *irq; + + if (target1_addr >= atomic_read(&kvm->online_vcpus) || + target2_addr >= atomic_read(&kvm->online_vcpus)) + return E_ITS_MOVALL_PROCNUM_OOR; + + if (target1_addr == target2_addr) + return 0; + + vcpu1 = kvm_get_vcpu(kvm, target1_addr); + vcpu2 = kvm_get_vcpu(kvm, target2_addr); + + spin_lock(&dist->lpi_list_lock); + + list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) { + spin_lock(&irq->irq_lock); + + if (irq->target_vcpu == vcpu1) + irq->target_vcpu = vcpu2; + + spin_unlock(&irq->irq_lock); + } + + spin_unlock(&dist->lpi_list_lock); + + return 0; +} + +/* + * The INT command injects the LPI associated with that DevID/EvID pair. + * Must be called with the its_lock mutex held. + */ +static int vgic_its_cmd_handle_int(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u32 msi_data = its_cmd_get_id(its_cmd); + u64 msi_devid = its_cmd_get_deviceid(its_cmd); + + vgic_its_trigger_msi(kvm, its, msi_devid, msi_data); + + return 0; +} + +/* + * This function is called with the its_cmd lock held, but the ITS data + * structure lock dropped. + */ +static int vgic_its_handle_command(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + int ret = -ENODEV; + + mutex_lock(&its->its_lock); + switch (its_cmd_get_command(its_cmd)) { + case GITS_CMD_MAPD: + ret = vgic_its_cmd_handle_mapd(kvm, its, its_cmd); + break; + case GITS_CMD_MAPC: + ret = vgic_its_cmd_handle_mapc(kvm, its, its_cmd); + break; + case GITS_CMD_MAPI: + ret = vgic_its_cmd_handle_mapi(kvm, its, its_cmd); + break; + case GITS_CMD_MAPTI: + ret = vgic_its_cmd_handle_mapi(kvm, its, its_cmd); + break; + case GITS_CMD_MOVI: + ret = vgic_its_cmd_handle_movi(kvm, its, its_cmd); + break; + case GITS_CMD_DISCARD: + ret = vgic_its_cmd_handle_discard(kvm, its, its_cmd); + break; + case GITS_CMD_CLEAR: + ret = vgic_its_cmd_handle_clear(kvm, its, its_cmd); + break; + case GITS_CMD_MOVALL: + ret = vgic_its_cmd_handle_movall(kvm, its, its_cmd); + break; + case GITS_CMD_INT: + ret = vgic_its_cmd_handle_int(kvm, its, its_cmd); + break; + case GITS_CMD_INV: + ret = vgic_its_cmd_handle_inv(kvm, its, its_cmd); + break; + case GITS_CMD_INVALL: + ret = vgic_its_cmd_handle_invall(kvm, its, its_cmd); + break; + case GITS_CMD_SYNC: + /* we ignore this command: we are in sync all of the time */ + ret = 0; + break; + } + mutex_unlock(&its->its_lock); + + return ret; +} + +static u64 vgic_sanitise_its_baser(u64 reg) +{ + reg = vgic_sanitise_field(reg, GITS_BASER_SHAREABILITY_MASK, + GITS_BASER_SHAREABILITY_SHIFT, + vgic_sanitise_shareability); + reg = vgic_sanitise_field(reg, GITS_BASER_INNER_CACHEABILITY_MASK, + GITS_BASER_INNER_CACHEABILITY_SHIFT, + vgic_sanitise_inner_cacheability); + reg = vgic_sanitise_field(reg, GITS_BASER_OUTER_CACHEABILITY_MASK, + GITS_BASER_OUTER_CACHEABILITY_SHIFT, + vgic_sanitise_outer_cacheability); + + /* Bits 15:12 contain bits 51:48 of the PA, which we don't support. */ + reg &= ~GENMASK_ULL(15, 12); + + /* We support only one (ITS) page size: 64K */ + reg = (reg & ~GITS_BASER_PAGE_SIZE_MASK) | GITS_BASER_PAGE_SIZE_64K; + + return reg; +} + +static u64 vgic_sanitise_its_cbaser(u64 reg) +{ + reg = vgic_sanitise_field(reg, GITS_CBASER_SHAREABILITY_MASK, + GITS_CBASER_SHAREABILITY_SHIFT, + vgic_sanitise_shareability); + reg = vgic_sanitise_field(reg, GITS_CBASER_INNER_CACHEABILITY_MASK, + GITS_CBASER_INNER_CACHEABILITY_SHIFT, + vgic_sanitise_inner_cacheability); + reg = vgic_sanitise_field(reg, GITS_CBASER_OUTER_CACHEABILITY_MASK, + GITS_CBASER_OUTER_CACHEABILITY_SHIFT, + vgic_sanitise_outer_cacheability); + + /* + * Sanitise the physical address to be 64k aligned. + * Also limit the physical addresses to 48 bits. + */ + reg &= ~(GENMASK_ULL(51, 48) | GENMASK_ULL(15, 12)); + + return reg; +} + +static unsigned long vgic_mmio_read_its_cbaser(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len) +{ + return extract_bytes(its->cbaser, addr & 7, len); +} + +static void vgic_mmio_write_its_cbaser(struct kvm *kvm, struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val) +{ + /* When GITS_CTLR.Enable is 1, this register is RO. */ + if (its->enabled) + return; + + mutex_lock(&its->cmd_lock); + its->cbaser = update_64bit_reg(its->cbaser, addr & 7, len, val); + its->cbaser = vgic_sanitise_its_cbaser(its->cbaser); + its->creadr = 0; + /* + * CWRITER is architecturally UNKNOWN on reset, but we need to reset + * it to CREADR to make sure we start with an empty command buffer. + */ + its->cwriter = its->creadr; + mutex_unlock(&its->cmd_lock); +} + +#define ITS_CMD_BUFFER_SIZE(baser) ((((baser) & 0xff) + 1) << 12) +#define ITS_CMD_SIZE 32 +#define ITS_CMD_OFFSET(reg) ((reg) & GENMASK(19, 5)) + +/* + * By writing to CWRITER the guest announces new commands to be processed. + * To avoid any races in the first place, we take the its_cmd lock, which + * protects our ring buffer variables, so that there is only one user + * per ITS handling commands at a given time. + */ +static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val) +{ + gpa_t cbaser; + u64 cmd_buf[4]; + u32 reg; + + if (!its) + return; + + mutex_lock(&its->cmd_lock); + + reg = update_64bit_reg(its->cwriter, addr & 7, len, val); + reg = ITS_CMD_OFFSET(reg); + if (reg >= ITS_CMD_BUFFER_SIZE(its->cbaser)) { + mutex_unlock(&its->cmd_lock); + return; + } + + its->cwriter = reg; + cbaser = CBASER_ADDRESS(its->cbaser); + + while (its->cwriter != its->creadr) { + int ret = kvm_read_guest(kvm, cbaser + its->creadr, + cmd_buf, ITS_CMD_SIZE); + /* + * If kvm_read_guest() fails, this could be due to the guest + * programming a bogus value in CBASER or something else going + * wrong from which we cannot easily recover. + * According to section 6.3.2 in the GICv3 spec we can just + * ignore that command then. + */ + if (!ret) + vgic_its_handle_command(kvm, its, cmd_buf); + + its->creadr += ITS_CMD_SIZE; + if (its->creadr == ITS_CMD_BUFFER_SIZE(its->cbaser)) + its->creadr = 0; + } + + mutex_unlock(&its->cmd_lock); +} + +static unsigned long vgic_mmio_read_its_cwriter(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len) +{ + return extract_bytes(its->cwriter, addr & 0x7, len); +} + +static unsigned long vgic_mmio_read_its_creadr(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len) +{ + return extract_bytes(its->creadr, addr & 0x7, len); +} + +#define BASER_INDEX(addr) (((addr) / sizeof(u64)) & 0x7) +static unsigned long vgic_mmio_read_its_baser(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len) +{ + u64 reg; + + switch (BASER_INDEX(addr)) { + case 0: + reg = its->baser_device_table; + break; + case 1: + reg = its->baser_coll_table; + break; + default: + reg = 0; + break; + } + + return extract_bytes(reg, addr & 7, len); +} + +#define GITS_BASER_RO_MASK (GENMASK_ULL(52, 48) | GENMASK_ULL(58, 56)) +static void vgic_mmio_write_its_baser(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u64 entry_size, device_type; + u64 reg, *regptr, clearbits = 0; + + /* When GITS_CTLR.Enable is 1, we ignore write accesses. */ + if (its->enabled) + return; + + switch (BASER_INDEX(addr)) { + case 0: + regptr = &its->baser_device_table; + entry_size = 8; + device_type = GITS_BASER_TYPE_DEVICE; + break; + case 1: + regptr = &its->baser_coll_table; + entry_size = 8; + device_type = GITS_BASER_TYPE_COLLECTION; + clearbits = GITS_BASER_INDIRECT; + break; + default: + return; + } + + reg = update_64bit_reg(*regptr, addr & 7, len, val); + reg &= ~GITS_BASER_RO_MASK; + reg &= ~clearbits; + + reg |= (entry_size - 1) << GITS_BASER_ENTRY_SIZE_SHIFT; + reg |= device_type << GITS_BASER_TYPE_SHIFT; + reg = vgic_sanitise_its_baser(reg); + + *regptr = reg; +} + +#define REGISTER_ITS_DESC(off, rd, wr, length, acc) \ +{ \ + .reg_offset = off, \ + .len = length, \ + .access_flags = acc, \ + .its_read = rd, \ + .its_write = wr, \ +} + +static void its_mmio_write_wi(struct kvm *kvm, struct vgic_its *its, + gpa_t addr, unsigned int len, unsigned long val) +{ + /* Ignore */ +} + +static struct vgic_register_region its_registers[] = { + REGISTER_ITS_DESC(GITS_CTLR, + vgic_mmio_read_its_ctlr, vgic_mmio_write_its_ctlr, 4, + VGIC_ACCESS_32bit), + REGISTER_ITS_DESC(GITS_IIDR, + vgic_mmio_read_its_iidr, its_mmio_write_wi, 4, + VGIC_ACCESS_32bit), + REGISTER_ITS_DESC(GITS_TYPER, + vgic_mmio_read_its_typer, its_mmio_write_wi, 8, + VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), + REGISTER_ITS_DESC(GITS_CBASER, + vgic_mmio_read_its_cbaser, vgic_mmio_write_its_cbaser, 8, + VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), + REGISTER_ITS_DESC(GITS_CWRITER, + vgic_mmio_read_its_cwriter, vgic_mmio_write_its_cwriter, 8, + VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), + REGISTER_ITS_DESC(GITS_CREADR, + vgic_mmio_read_its_creadr, its_mmio_write_wi, 8, + VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), + REGISTER_ITS_DESC(GITS_BASER, + vgic_mmio_read_its_baser, vgic_mmio_write_its_baser, 0x40, + VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), + REGISTER_ITS_DESC(GITS_IDREGS_BASE, + vgic_mmio_read_its_idregs, its_mmio_write_wi, 0x30, + VGIC_ACCESS_32bit), +}; + +/* This is called on setting the LPI enable bit in the redistributor. */ +void vgic_enable_lpis(struct kvm_vcpu *vcpu) +{ + if (!(vcpu->arch.vgic_cpu.pendbaser & GICR_PENDBASER_PTZ)) + its_sync_lpi_pending_table(vcpu); +} + +static int vgic_its_init_its(struct kvm *kvm, struct vgic_its *its) +{ + struct vgic_io_device *iodev = &its->iodev; + int ret; + + if (its->initialized) + return 0; + + if (IS_VGIC_ADDR_UNDEF(its->vgic_its_base)) + return -ENXIO; + + iodev->regions = its_registers; + iodev->nr_regions = ARRAY_SIZE(its_registers); + kvm_iodevice_init(&iodev->dev, &kvm_io_gic_ops); + + iodev->base_addr = its->vgic_its_base; + iodev->iodev_type = IODEV_ITS; + iodev->its = its; + mutex_lock(&kvm->slots_lock); + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, iodev->base_addr, + KVM_VGIC_V3_ITS_SIZE, &iodev->dev); + mutex_unlock(&kvm->slots_lock); + + if (!ret) + its->initialized = true; + + return ret; +} + +#define INITIAL_BASER_VALUE \ + (GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWb) | \ + GIC_BASER_CACHEABILITY(GITS_BASER, OUTER, SameAsInner) | \ + GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable) | \ + ((8ULL - 1) << GITS_BASER_ENTRY_SIZE_SHIFT) | \ + GITS_BASER_PAGE_SIZE_64K) + +#define INITIAL_PROPBASER_VALUE \ + (GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWb) | \ + GIC_BASER_CACHEABILITY(GICR_PROPBASER, OUTER, SameAsInner) | \ + GIC_BASER_SHAREABILITY(GICR_PROPBASER, InnerShareable)) + +static int vgic_its_create(struct kvm_device *dev, u32 type) +{ + struct vgic_its *its; + + if (type != KVM_DEV_TYPE_ARM_VGIC_ITS) + return -ENODEV; + + its = kzalloc(sizeof(struct vgic_its), GFP_KERNEL); + if (!its) + return -ENOMEM; + + mutex_init(&its->its_lock); + mutex_init(&its->cmd_lock); + + its->vgic_its_base = VGIC_ADDR_UNDEF; + + INIT_LIST_HEAD(&its->device_list); + INIT_LIST_HEAD(&its->collection_list); + + dev->kvm->arch.vgic.has_its = true; + its->initialized = false; + its->enabled = false; + its->dev = dev; + + its->baser_device_table = INITIAL_BASER_VALUE | + ((u64)GITS_BASER_TYPE_DEVICE << GITS_BASER_TYPE_SHIFT); + its->baser_coll_table = INITIAL_BASER_VALUE | + ((u64)GITS_BASER_TYPE_COLLECTION << GITS_BASER_TYPE_SHIFT); + dev->kvm->arch.vgic.propbaser = INITIAL_PROPBASER_VALUE; + + dev->private = its; + + return 0; +} + +static void vgic_its_destroy(struct kvm_device *kvm_dev) +{ + struct kvm *kvm = kvm_dev->kvm; + struct vgic_its *its = kvm_dev->private; + struct its_device *dev; + struct its_itte *itte; + struct list_head *dev_cur, *dev_temp; + struct list_head *cur, *temp; + + /* + * We may end up here without the lists ever having been initialized. + * Check this and bail out early to avoid dereferencing a NULL pointer. + */ + if (!its->device_list.next) + return; + + mutex_lock(&its->its_lock); + list_for_each_safe(dev_cur, dev_temp, &its->device_list) { + dev = container_of(dev_cur, struct its_device, dev_list); + list_for_each_safe(cur, temp, &dev->itt_head) { + itte = (container_of(cur, struct its_itte, itte_list)); + its_free_itte(kvm, itte); + } + list_del(dev_cur); + kfree(dev); + } + + list_for_each_safe(cur, temp, &its->collection_list) { + list_del(cur); + kfree(container_of(cur, struct its_collection, coll_list)); + } + mutex_unlock(&its->its_lock); + + kfree(its); +} + +static int vgic_its_has_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_ADDR: + switch (attr->attr) { + case KVM_VGIC_ITS_ADDR_TYPE: + return 0; + } + break; + case KVM_DEV_ARM_VGIC_GRP_CTRL: + switch (attr->attr) { + case KVM_DEV_ARM_VGIC_CTRL_INIT: + return 0; + } + break; + } + return -ENXIO; +} + +static int vgic_its_set_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + struct vgic_its *its = dev->private; + int ret; + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_ADDR: { + u64 __user *uaddr = (u64 __user *)(long)attr->addr; + unsigned long type = (unsigned long)attr->attr; + u64 addr; + + if (type != KVM_VGIC_ITS_ADDR_TYPE) + return -ENODEV; + + if (its->initialized) + return -EBUSY; + + if (copy_from_user(&addr, uaddr, sizeof(addr))) + return -EFAULT; + + ret = vgic_check_ioaddr(dev->kvm, &its->vgic_its_base, + addr, SZ_64K); + if (ret) + return ret; + + its->vgic_its_base = addr; + + return 0; + } + case KVM_DEV_ARM_VGIC_GRP_CTRL: + switch (attr->attr) { + case KVM_DEV_ARM_VGIC_CTRL_INIT: + return vgic_its_init_its(dev->kvm, its); + } + break; + } + return -ENXIO; +} + +static int vgic_its_get_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_ADDR: { + struct vgic_its *its = dev->private; + u64 addr = its->vgic_its_base; + u64 __user *uaddr = (u64 __user *)(long)attr->addr; + unsigned long type = (unsigned long)attr->attr; + + if (type != KVM_VGIC_ITS_ADDR_TYPE) + return -ENODEV; + + if (copy_to_user(uaddr, &addr, sizeof(addr))) + return -EFAULT; + break; + default: + return -ENXIO; + } + } + + return 0; +} + +static struct kvm_device_ops kvm_arm_vgic_its_ops = { + .name = "kvm-arm-vgic-its", + .create = vgic_its_create, + .destroy = vgic_its_destroy, + .set_attr = vgic_its_set_attr, + .get_attr = vgic_its_get_attr, + .has_attr = vgic_its_has_attr, +}; + +int kvm_vgic_register_its_device(void) +{ + return kvm_register_device_ops(&kvm_arm_vgic_its_ops, + KVM_DEV_TYPE_ARM_VGIC_ITS); +} diff --git a/virt/kvm/arm/vgic/vgic-kvm-device.c b/virt/kvm/arm/vgic/vgic-kvm-device.c index 0130c4b147b7..1813f93b5cde 100644 --- a/virt/kvm/arm/vgic/vgic-kvm-device.c +++ b/virt/kvm/arm/vgic/vgic-kvm-device.c @@ -21,8 +21,8 @@ /* common helpers */ -static int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr, - phys_addr_t addr, phys_addr_t alignment) +int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr, + phys_addr_t addr, phys_addr_t alignment) { if (addr & ~KVM_PHYS_MASK) return -E2BIG; @@ -210,20 +210,27 @@ static void vgic_destroy(struct kvm_device *dev) kfree(dev); } -void kvm_register_vgic_device(unsigned long type) +int kvm_register_vgic_device(unsigned long type) { + int ret = -ENODEV; + switch (type) { case KVM_DEV_TYPE_ARM_VGIC_V2: - kvm_register_device_ops(&kvm_arm_vgic_v2_ops, - KVM_DEV_TYPE_ARM_VGIC_V2); + ret = kvm_register_device_ops(&kvm_arm_vgic_v2_ops, + KVM_DEV_TYPE_ARM_VGIC_V2); break; #ifdef CONFIG_KVM_ARM_VGIC_V3 case KVM_DEV_TYPE_ARM_VGIC_V3: - kvm_register_device_ops(&kvm_arm_vgic_v3_ops, - KVM_DEV_TYPE_ARM_VGIC_V3); + ret = kvm_register_device_ops(&kvm_arm_vgic_v3_ops, + KVM_DEV_TYPE_ARM_VGIC_V3); + if (ret) + break; + ret = kvm_vgic_register_its_device(); break; #endif } + + return ret; } /** vgic_attr_regs_access: allows user space to read/write VGIC registers @@ -428,4 +435,3 @@ struct kvm_device_ops kvm_arm_vgic_v3_ops = { }; #endif /* CONFIG_KVM_ARM_VGIC_V3 */ - diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c index a21393637e4b..b44b359cbbad 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v2.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c @@ -102,6 +102,7 @@ static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu, irq->source |= 1U << source_vcpu->vcpu_id; vgic_queue_irq_unlock(source_vcpu->kvm, irq); + vgic_put_irq(source_vcpu->kvm, irq); } } @@ -116,6 +117,8 @@ static unsigned long vgic_mmio_read_target(struct kvm_vcpu *vcpu, struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); val |= (u64)irq->targets << (i * 8); + + vgic_put_irq(vcpu->kvm, irq); } return val; @@ -143,6 +146,7 @@ static void vgic_mmio_write_target(struct kvm_vcpu *vcpu, irq->target_vcpu = kvm_get_vcpu(vcpu->kvm, target); spin_unlock(&irq->irq_lock); + vgic_put_irq(vcpu->kvm, irq); } } @@ -157,6 +161,8 @@ static unsigned long vgic_mmio_read_sgipend(struct kvm_vcpu *vcpu, struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); val |= (u64)irq->source << (i * 8); + + vgic_put_irq(vcpu->kvm, irq); } return val; } @@ -178,6 +184,7 @@ static void vgic_mmio_write_sgipendc(struct kvm_vcpu *vcpu, irq->pending = false; spin_unlock(&irq->irq_lock); + vgic_put_irq(vcpu->kvm, irq); } } @@ -201,6 +208,7 @@ static void vgic_mmio_write_sgipends(struct kvm_vcpu *vcpu, } else { spin_unlock(&irq->irq_lock); } + vgic_put_irq(vcpu->kvm, irq); } } @@ -429,6 +437,7 @@ int vgic_v2_cpuif_uaccess(struct kvm_vcpu *vcpu, bool is_write, struct vgic_io_device dev = { .regions = vgic_v2_cpu_registers, .nr_regions = ARRAY_SIZE(vgic_v2_cpu_registers), + .iodev_type = IODEV_CPUIF, }; return vgic_uaccess(vcpu, &dev, is_write, offset, val); @@ -440,6 +449,7 @@ int vgic_v2_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write, struct vgic_io_device dev = { .regions = vgic_v2_dist_registers, .nr_regions = ARRAY_SIZE(vgic_v2_dist_registers), + .iodev_type = IODEV_DIST, }; return vgic_uaccess(vcpu, &dev, is_write, offset, val); diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index a0c515a412a7..ff668e0dd586 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c @@ -23,12 +23,35 @@ #include "vgic-mmio.h" /* extract @num bytes at @offset bytes offset in data */ -static unsigned long extract_bytes(unsigned long data, unsigned int offset, - unsigned int num) +unsigned long extract_bytes(unsigned long data, unsigned int offset, + unsigned int num) { return (data >> (offset * 8)) & GENMASK_ULL(num * 8 - 1, 0); } +/* allows updates of any half of a 64-bit register (or the whole thing) */ +u64 update_64bit_reg(u64 reg, unsigned int offset, unsigned int len, + unsigned long val) +{ + int lower = (offset & 4) * 8; + int upper = lower + 8 * len - 1; + + reg &= ~GENMASK_ULL(upper, lower); + val &= GENMASK_ULL(len * 8 - 1, 0); + + return reg | ((u64)val << lower); +} + +bool vgic_has_its(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + + if (dist->vgic_model != KVM_DEV_TYPE_ARM_VGIC_V3) + return false; + + return dist->has_its; +} + static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len) { @@ -43,7 +66,12 @@ static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu, case GICD_TYPER: value = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS; value = (value >> 5) - 1; - value |= (INTERRUPT_ID_BITS_SPIS - 1) << 19; + if (vgic_has_its(vcpu->kvm)) { + value |= (INTERRUPT_ID_BITS_ITS - 1) << 19; + value |= GICD_TYPER_LPIS; + } else { + value |= (INTERRUPT_ID_BITS_SPIS - 1) << 19; + } break; case GICD_IIDR: value = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0); @@ -80,15 +108,17 @@ static unsigned long vgic_mmio_read_irouter(struct kvm_vcpu *vcpu, { int intid = VGIC_ADDR_TO_INTID(addr, 64); struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid); + unsigned long ret = 0; if (!irq) return 0; /* The upper word is RAZ for us. */ - if (addr & 4) - return 0; + if (!(addr & 4)) + ret = extract_bytes(READ_ONCE(irq->mpidr), addr & 7, len); - return extract_bytes(READ_ONCE(irq->mpidr), addr & 7, len); + vgic_put_irq(vcpu->kvm, irq); + return ret; } static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu, @@ -96,15 +126,17 @@ static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu, unsigned long val) { int intid = VGIC_ADDR_TO_INTID(addr, 64); - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid); - - if (!irq) - return; + struct vgic_irq *irq; /* The upper word is WI for us since we don't implement Aff3. */ if (addr & 4) return; + irq = vgic_get_irq(vcpu->kvm, NULL, intid); + + if (!irq) + return; + spin_lock(&irq->irq_lock); /* We only care about and preserve Aff0, Aff1 and Aff2. */ @@ -112,6 +144,32 @@ static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu, irq->target_vcpu = kvm_mpidr_to_vcpu(vcpu->kvm, irq->mpidr); spin_unlock(&irq->irq_lock); + vgic_put_irq(vcpu->kvm, irq); +} + +static unsigned long vgic_mmio_read_v3r_ctlr(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + + return vgic_cpu->lpis_enabled ? GICR_CTLR_ENABLE_LPIS : 0; +} + + +static void vgic_mmio_write_v3r_ctlr(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + bool was_enabled = vgic_cpu->lpis_enabled; + + if (!vgic_has_its(vcpu->kvm)) + return; + + vgic_cpu->lpis_enabled = val & GICR_CTLR_ENABLE_LPIS; + + if (!was_enabled && vgic_cpu->lpis_enabled) + vgic_enable_lpis(vcpu); } static unsigned long vgic_mmio_read_v3r_typer(struct kvm_vcpu *vcpu, @@ -125,6 +183,8 @@ static unsigned long vgic_mmio_read_v3r_typer(struct kvm_vcpu *vcpu, value |= ((target_vcpu_id & 0xffff) << 8); if (target_vcpu_id == atomic_read(&vcpu->kvm->online_vcpus) - 1) value |= GICR_TYPER_LAST; + if (vgic_has_its(vcpu->kvm)) + value |= GICR_TYPER_PLPIS; return extract_bytes(value, addr & 7, len); } @@ -147,6 +207,142 @@ static unsigned long vgic_mmio_read_v3_idregs(struct kvm_vcpu *vcpu, return 0; } +/* We want to avoid outer shareable. */ +u64 vgic_sanitise_shareability(u64 field) +{ + switch (field) { + case GIC_BASER_OuterShareable: + return GIC_BASER_InnerShareable; + default: + return field; + } +} + +/* Avoid any inner non-cacheable mapping. */ +u64 vgic_sanitise_inner_cacheability(u64 field) +{ + switch (field) { + case GIC_BASER_CACHE_nCnB: + case GIC_BASER_CACHE_nC: + return GIC_BASER_CACHE_RaWb; + default: + return field; + } +} + +/* Non-cacheable or same-as-inner are OK. */ +u64 vgic_sanitise_outer_cacheability(u64 field) +{ + switch (field) { + case GIC_BASER_CACHE_SameAsInner: + case GIC_BASER_CACHE_nC: + return field; + default: + return GIC_BASER_CACHE_nC; + } +} + +u64 vgic_sanitise_field(u64 reg, u64 field_mask, int field_shift, + u64 (*sanitise_fn)(u64)) +{ + u64 field = (reg & field_mask) >> field_shift; + + field = sanitise_fn(field) << field_shift; + return (reg & ~field_mask) | field; +} + +#define PROPBASER_RES0_MASK \ + (GENMASK_ULL(63, 59) | GENMASK_ULL(55, 52) | GENMASK_ULL(6, 5)) +#define PENDBASER_RES0_MASK \ + (BIT_ULL(63) | GENMASK_ULL(61, 59) | GENMASK_ULL(55, 52) | \ + GENMASK_ULL(15, 12) | GENMASK_ULL(6, 0)) + +static u64 vgic_sanitise_pendbaser(u64 reg) +{ + reg = vgic_sanitise_field(reg, GICR_PENDBASER_SHAREABILITY_MASK, + GICR_PENDBASER_SHAREABILITY_SHIFT, + vgic_sanitise_shareability); + reg = vgic_sanitise_field(reg, GICR_PENDBASER_INNER_CACHEABILITY_MASK, + GICR_PENDBASER_INNER_CACHEABILITY_SHIFT, + vgic_sanitise_inner_cacheability); + reg = vgic_sanitise_field(reg, GICR_PENDBASER_OUTER_CACHEABILITY_MASK, + GICR_PENDBASER_OUTER_CACHEABILITY_SHIFT, + vgic_sanitise_outer_cacheability); + + reg &= ~PENDBASER_RES0_MASK; + reg &= ~GENMASK_ULL(51, 48); + + return reg; +} + +static u64 vgic_sanitise_propbaser(u64 reg) +{ + reg = vgic_sanitise_field(reg, GICR_PROPBASER_SHAREABILITY_MASK, + GICR_PROPBASER_SHAREABILITY_SHIFT, + vgic_sanitise_shareability); + reg = vgic_sanitise_field(reg, GICR_PROPBASER_INNER_CACHEABILITY_MASK, + GICR_PROPBASER_INNER_CACHEABILITY_SHIFT, + vgic_sanitise_inner_cacheability); + reg = vgic_sanitise_field(reg, GICR_PROPBASER_OUTER_CACHEABILITY_MASK, + GICR_PROPBASER_OUTER_CACHEABILITY_SHIFT, + vgic_sanitise_outer_cacheability); + + reg &= ~PROPBASER_RES0_MASK; + reg &= ~GENMASK_ULL(51, 48); + return reg; +} + +static unsigned long vgic_mmio_read_propbase(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + return extract_bytes(dist->propbaser, addr & 7, len); +} + +static void vgic_mmio_write_propbase(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + u64 propbaser = dist->propbaser; + + /* Storing a value with LPIs already enabled is undefined */ + if (vgic_cpu->lpis_enabled) + return; + + propbaser = update_64bit_reg(propbaser, addr & 4, len, val); + propbaser = vgic_sanitise_propbaser(propbaser); + + dist->propbaser = propbaser; +} + +static unsigned long vgic_mmio_read_pendbase(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + + return extract_bytes(vgic_cpu->pendbaser, addr & 7, len); +} + +static void vgic_mmio_write_pendbase(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + u64 pendbaser = vgic_cpu->pendbaser; + + /* Storing a value with LPIs already enabled is undefined */ + if (vgic_cpu->lpis_enabled) + return; + + pendbaser = update_64bit_reg(pendbaser, addr & 4, len, val); + pendbaser = vgic_sanitise_pendbaser(pendbaser); + + vgic_cpu->pendbaser = pendbaser; +} + /* * The GICv3 per-IRQ registers are split to control PPIs and SGIs in the * redistributors, while SPIs are covered by registers in the distributor @@ -218,7 +414,7 @@ static const struct vgic_register_region vgic_v3_dist_registers[] = { static const struct vgic_register_region vgic_v3_rdbase_registers[] = { REGISTER_DESC_WITH_LENGTH(GICR_CTLR, - vgic_mmio_read_raz, vgic_mmio_write_wi, 4, + vgic_mmio_read_v3r_ctlr, vgic_mmio_write_v3r_ctlr, 4, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_LENGTH(GICR_IIDR, vgic_mmio_read_v3r_iidr, vgic_mmio_write_wi, 4, @@ -227,10 +423,10 @@ static const struct vgic_register_region vgic_v3_rdbase_registers[] = { vgic_mmio_read_v3r_typer, vgic_mmio_write_wi, 8, VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), REGISTER_DESC_WITH_LENGTH(GICR_PROPBASER, - vgic_mmio_read_raz, vgic_mmio_write_wi, 8, + vgic_mmio_read_propbase, vgic_mmio_write_propbase, 8, VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), REGISTER_DESC_WITH_LENGTH(GICR_PENDBASER, - vgic_mmio_read_raz, vgic_mmio_write_wi, 8, + vgic_mmio_read_pendbase, vgic_mmio_write_pendbase, 8, VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), REGISTER_DESC_WITH_LENGTH(GICR_IDREGS, vgic_mmio_read_v3_idregs, vgic_mmio_write_wi, 48, @@ -285,24 +481,18 @@ unsigned int vgic_v3_init_dist_iodev(struct vgic_io_device *dev) int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t redist_base_address) { - int nr_vcpus = atomic_read(&kvm->online_vcpus); struct kvm_vcpu *vcpu; - struct vgic_io_device *devices; int c, ret = 0; - devices = kmalloc(sizeof(struct vgic_io_device) * nr_vcpus * 2, - GFP_KERNEL); - if (!devices) - return -ENOMEM; - kvm_for_each_vcpu(c, vcpu, kvm) { gpa_t rd_base = redist_base_address + c * SZ_64K * 2; gpa_t sgi_base = rd_base + SZ_64K; - struct vgic_io_device *rd_dev = &devices[c * 2]; - struct vgic_io_device *sgi_dev = &devices[c * 2 + 1]; + struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev; + struct vgic_io_device *sgi_dev = &vcpu->arch.vgic_cpu.sgi_iodev; kvm_iodevice_init(&rd_dev->dev, &kvm_io_gic_ops); rd_dev->base_addr = rd_base; + rd_dev->iodev_type = IODEV_REDIST; rd_dev->regions = vgic_v3_rdbase_registers; rd_dev->nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers); rd_dev->redist_vcpu = vcpu; @@ -317,6 +507,7 @@ int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t redist_base_address) kvm_iodevice_init(&sgi_dev->dev, &kvm_io_gic_ops); sgi_dev->base_addr = sgi_base; + sgi_dev->iodev_type = IODEV_REDIST; sgi_dev->regions = vgic_v3_sgibase_registers; sgi_dev->nr_regions = ARRAY_SIZE(vgic_v3_sgibase_registers); sgi_dev->redist_vcpu = vcpu; @@ -335,14 +526,15 @@ int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t redist_base_address) if (ret) { /* The current c failed, so we start with the previous one. */ for (c--; c >= 0; c--) { + struct vgic_cpu *vgic_cpu; + + vcpu = kvm_get_vcpu(kvm, c); + vgic_cpu = &vcpu->arch.vgic_cpu; kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, - &devices[c * 2].dev); + &vgic_cpu->rd_iodev.dev); kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, - &devices[c * 2 + 1].dev); + &vgic_cpu->sgi_iodev.dev); } - kfree(devices); - } else { - kvm->arch.vgic.redist_iodevs = devices; } return ret; @@ -451,5 +643,6 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg) irq->pending = true; vgic_queue_irq_unlock(vcpu->kvm, irq); + vgic_put_irq(vcpu->kvm, irq); } } diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c index 9f6fab74dce7..3bad3c5ed431 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.c +++ b/virt/kvm/arm/vgic/vgic-mmio.c @@ -56,6 +56,8 @@ unsigned long vgic_mmio_read_enable(struct kvm_vcpu *vcpu, if (irq->enabled) value |= (1U << i); + + vgic_put_irq(vcpu->kvm, irq); } return value; @@ -74,6 +76,8 @@ void vgic_mmio_write_senable(struct kvm_vcpu *vcpu, spin_lock(&irq->irq_lock); irq->enabled = true; vgic_queue_irq_unlock(vcpu->kvm, irq); + + vgic_put_irq(vcpu->kvm, irq); } } @@ -92,6 +96,7 @@ void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu, irq->enabled = false; spin_unlock(&irq->irq_lock); + vgic_put_irq(vcpu->kvm, irq); } } @@ -108,6 +113,8 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, if (irq->pending) value |= (1U << i); + + vgic_put_irq(vcpu->kvm, irq); } return value; @@ -129,6 +136,7 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, irq->soft_pending = true; vgic_queue_irq_unlock(vcpu->kvm, irq); + vgic_put_irq(vcpu->kvm, irq); } } @@ -152,6 +160,7 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, } spin_unlock(&irq->irq_lock); + vgic_put_irq(vcpu->kvm, irq); } } @@ -168,6 +177,8 @@ unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu, if (irq->active) value |= (1U << i); + + vgic_put_irq(vcpu->kvm, irq); } return value; @@ -242,6 +253,7 @@ void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu, for_each_set_bit(i, &val, len * 8) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); vgic_mmio_change_active(vcpu, irq, false); + vgic_put_irq(vcpu->kvm, irq); } vgic_change_active_finish(vcpu, intid); } @@ -257,6 +269,7 @@ void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu, for_each_set_bit(i, &val, len * 8) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); vgic_mmio_change_active(vcpu, irq, true); + vgic_put_irq(vcpu->kvm, irq); } vgic_change_active_finish(vcpu, intid); } @@ -272,6 +285,8 @@ unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu, struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); val |= (u64)irq->priority << (i * 8); + + vgic_put_irq(vcpu->kvm, irq); } return val; @@ -298,6 +313,8 @@ void vgic_mmio_write_priority(struct kvm_vcpu *vcpu, /* Narrow the priority range to what we actually support */ irq->priority = (val >> (i * 8)) & GENMASK(7, 8 - VGIC_PRI_BITS); spin_unlock(&irq->irq_lock); + + vgic_put_irq(vcpu->kvm, irq); } } @@ -313,6 +330,8 @@ unsigned long vgic_mmio_read_config(struct kvm_vcpu *vcpu, if (irq->config == VGIC_CONFIG_EDGE) value |= (2U << (i * 2)); + + vgic_put_irq(vcpu->kvm, irq); } return value; @@ -326,7 +345,7 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu, int i; for (i = 0; i < len * 4; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq; /* * The configuration cannot be changed for SGIs in general, @@ -337,14 +356,18 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu, if (intid + i < VGIC_NR_PRIVATE_IRQS) continue; + irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); spin_lock(&irq->irq_lock); + if (test_bit(i * 2 + 1, &val)) { irq->config = VGIC_CONFIG_EDGE; } else { irq->config = VGIC_CONFIG_LEVEL; irq->pending = irq->line_level | irq->soft_pending; } + spin_unlock(&irq->irq_lock); + vgic_put_irq(vcpu->kvm, irq); } } @@ -450,8 +473,7 @@ static int dispatch_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, { struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev); const struct vgic_register_region *region; - struct kvm_vcpu *r_vcpu; - unsigned long data; + unsigned long data = 0; region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions, addr - iodev->base_addr); @@ -460,8 +482,21 @@ static int dispatch_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, return 0; } - r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu; - data = region->read(r_vcpu, addr, len); + switch (iodev->iodev_type) { + case IODEV_CPUIF: + data = region->read(vcpu, addr, len); + break; + case IODEV_DIST: + data = region->read(vcpu, addr, len); + break; + case IODEV_REDIST: + data = region->read(iodev->redist_vcpu, addr, len); + break; + case IODEV_ITS: + data = region->its_read(vcpu->kvm, iodev->its, addr, len); + break; + } + vgic_data_host_to_mmio_bus(val, len, data); return 0; } @@ -471,7 +506,6 @@ static int dispatch_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, { struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev); const struct vgic_register_region *region; - struct kvm_vcpu *r_vcpu; unsigned long data = vgic_data_mmio_bus_to_host(val, len); region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions, @@ -482,8 +516,21 @@ static int dispatch_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, if (!check_region(region, addr, len)) return 0; - r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu; - region->write(r_vcpu, addr, len, data); + switch (iodev->iodev_type) { + case IODEV_CPUIF: + region->write(vcpu, addr, len, data); + break; + case IODEV_DIST: + region->write(vcpu, addr, len, data); + break; + case IODEV_REDIST: + region->write(iodev->redist_vcpu, addr, len, data); + break; + case IODEV_ITS: + region->its_write(vcpu->kvm, iodev->its, addr, len, data); + break; + } + return 0; } @@ -513,6 +560,7 @@ int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address, } io_device->base_addr = dist_base_address; + io_device->iodev_type = IODEV_DIST; io_device->redist_vcpu = NULL; mutex_lock(&kvm->slots_lock); diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h index 850901482aec..0b3ecf9d100e 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.h +++ b/virt/kvm/arm/vgic/vgic-mmio.h @@ -21,10 +21,19 @@ struct vgic_register_region { unsigned int len; unsigned int bits_per_irq; unsigned int access_flags; - unsigned long (*read)(struct kvm_vcpu *vcpu, gpa_t addr, - unsigned int len); - void (*write)(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, - unsigned long val); + union { + unsigned long (*read)(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len); + unsigned long (*its_read)(struct kvm *kvm, struct vgic_its *its, + gpa_t addr, unsigned int len); + }; + union { + void (*write)(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len, unsigned long val); + void (*its_write)(struct kvm *kvm, struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val); + }; }; extern struct kvm_io_device_ops kvm_io_gic_ops; @@ -87,6 +96,12 @@ unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len); void vgic_data_host_to_mmio_bus(void *buf, unsigned int len, unsigned long data); +unsigned long extract_bytes(unsigned long data, unsigned int offset, + unsigned int num); + +u64 update_64bit_reg(u64 reg, unsigned int offset, unsigned int len, + unsigned long val); + unsigned long vgic_mmio_read_raz(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len); @@ -147,4 +162,12 @@ unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev); unsigned int vgic_v3_init_dist_iodev(struct vgic_io_device *dev); +#ifdef CONFIG_KVM_ARM_VGIC_V3 +u64 vgic_sanitise_outer_cacheability(u64 reg); +u64 vgic_sanitise_inner_cacheability(u64 reg); +u64 vgic_sanitise_shareability(u64 reg); +u64 vgic_sanitise_field(u64 reg, u64 field_mask, int field_shift, + u64 (*sanitise_fn)(u64)); +#endif + #endif diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index e31405ee5515..0bf6709d1006 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -124,6 +124,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) } spin_unlock(&irq->irq_lock); + vgic_put_irq(vcpu->kvm, irq); } } @@ -332,20 +333,25 @@ int vgic_v2_probe(const struct gic_kvm_info *info) vtr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VTR); kvm_vgic_global_state.nr_lr = (vtr & 0x3f) + 1; + ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2); + if (ret) { + kvm_err("Cannot register GICv2 KVM device\n"); + iounmap(kvm_vgic_global_state.vctrl_base); + return ret; + } + ret = create_hyp_io_mappings(kvm_vgic_global_state.vctrl_base, kvm_vgic_global_state.vctrl_base + resource_size(&info->vctrl), info->vctrl.start); - if (ret) { kvm_err("Cannot map VCTRL into hyp\n"); + kvm_unregister_device_ops(KVM_DEV_TYPE_ARM_VGIC_V2); iounmap(kvm_vgic_global_state.vctrl_base); return ret; } kvm_vgic_global_state.can_emulate_gicv2 = true; - kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2); - kvm_vgic_global_state.vcpu_base = info->vcpu.start; kvm_vgic_global_state.type = VGIC_V2; kvm_vgic_global_state.max_gic_vcpus = VGIC_V2_MAX_CPUS; diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 346b4ad12b49..0506543df38a 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -81,6 +81,8 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) else intid = val & GICH_LR_VIRTUALID; irq = vgic_get_irq(vcpu->kvm, vcpu, intid); + if (!irq) /* An LPI could have been unmapped. */ + continue; spin_lock(&irq->irq_lock); @@ -113,6 +115,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) } spin_unlock(&irq->irq_lock); + vgic_put_irq(vcpu->kvm, irq); } } @@ -190,6 +193,11 @@ void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) vmcrp->pmr = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT; } +#define INITIAL_PENDBASER_VALUE \ + (GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWb) | \ + GIC_BASER_CACHEABILITY(GICR_PENDBASER, OUTER, SameAsInner) | \ + GIC_BASER_SHAREABILITY(GICR_PENDBASER, InnerShareable)) + void vgic_v3_enable(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3; @@ -207,10 +215,12 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu) * way, so we force SRE to 1 to demonstrate this to the guest. * This goes with the spec allowing the value to be RAO/WI. */ - if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) + if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { vgic_v3->vgic_sre = ICC_SRE_EL1_SRE; - else + vcpu->arch.vgic_cpu.pendbaser = INITIAL_PENDBASER_VALUE; + } else { vgic_v3->vgic_sre = 0; + } /* Get the show on the road... */ vgic_v3->vgic_hcr = ICH_HCR_EN; @@ -296,6 +306,7 @@ out: int vgic_v3_probe(const struct gic_kvm_info *info) { u32 ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2); + int ret; /* * The ListRegs field is 5 bits, but there is a architectural @@ -319,12 +330,22 @@ int vgic_v3_probe(const struct gic_kvm_info *info) } else { kvm_vgic_global_state.vcpu_base = info->vcpu.start; kvm_vgic_global_state.can_emulate_gicv2 = true; - kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2); + ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2); + if (ret) { + kvm_err("Cannot register GICv2 KVM device.\n"); + return ret; + } kvm_info("vgic-v2@%llx\n", info->vcpu.start); } + ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V3); + if (ret) { + kvm_err("Cannot register GICv3 KVM device.\n"); + kvm_unregister_device_ops(KVM_DEV_TYPE_ARM_VGIC_V2); + return ret; + } + if (kvm_vgic_global_state.vcpu_base == 0) kvm_info("disabling GICv2 emulation\n"); - kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V3); kvm_vgic_global_state.vctrl_base = NULL; kvm_vgic_global_state.type = VGIC_V3; diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index 69b61abefa19..39f3358c6d91 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -33,10 +33,17 @@ struct vgic_global __section(.hyp.text) kvm_vgic_global_state; /* * Locking order is always: - * vgic_cpu->ap_list_lock - * vgic_irq->irq_lock + * its->cmd_lock (mutex) + * its->its_lock (mutex) + * vgic_cpu->ap_list_lock + * kvm->lpi_list_lock + * vgic_irq->irq_lock * - * (that is, always take the ap_list_lock before the struct vgic_irq lock). + * If you need to take multiple locks, always take the upper lock first, + * then the lower ones, e.g. first take the its_lock, then the irq_lock. + * If you are already holding a lock and need to take a higher one, you + * have to drop the lower ranking lock first and re-aquire it after having + * taken the upper one. * * When taking more than one ap_list_lock at the same time, always take the * lowest numbered VCPU's ap_list_lock first, so: @@ -45,6 +52,41 @@ struct vgic_global __section(.hyp.text) kvm_vgic_global_state; * spin_lock(vcpuY->arch.vgic_cpu.ap_list_lock); */ +/* + * Iterate over the VM's list of mapped LPIs to find the one with a + * matching interrupt ID and return a reference to the IRQ structure. + */ +static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct vgic_irq *irq = NULL; + + spin_lock(&dist->lpi_list_lock); + + list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) { + if (irq->intid != intid) + continue; + + /* + * This increases the refcount, the caller is expected to + * call vgic_put_irq() later once it's finished with the IRQ. + */ + vgic_get_irq_kref(irq); + goto out_unlock; + } + irq = NULL; + +out_unlock: + spin_unlock(&dist->lpi_list_lock); + + return irq; +} + +/* + * This looks up the virtual interrupt ID to get the corresponding + * struct vgic_irq. It also increases the refcount, so any caller is expected + * to call vgic_put_irq() once it's finished with this IRQ. + */ struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 intid) { @@ -56,14 +98,43 @@ struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, if (intid <= VGIC_MAX_SPI) return &kvm->arch.vgic.spis[intid - VGIC_NR_PRIVATE_IRQS]; - /* LPIs are not yet covered */ + /* LPIs */ if (intid >= VGIC_MIN_LPI) - return NULL; + return vgic_get_lpi(kvm, intid); WARN(1, "Looking up struct vgic_irq for reserved INTID"); return NULL; } +/* + * We can't do anything in here, because we lack the kvm pointer to + * lock and remove the item from the lpi_list. So we keep this function + * empty and use the return value of kref_put() to trigger the freeing. + */ +static void vgic_irq_release(struct kref *ref) +{ +} + +void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq) +{ + struct vgic_dist *dist; + + if (irq->intid < VGIC_MIN_LPI) + return; + + if (!kref_put(&irq->refcount, vgic_irq_release)) + return; + + dist = &kvm->arch.vgic; + + spin_lock(&dist->lpi_list_lock); + list_del(&irq->lpi_list); + dist->lpi_list_count--; + spin_unlock(&dist->lpi_list_lock); + + kfree(irq); +} + /** * kvm_vgic_target_oracle - compute the target vcpu for an irq * @@ -236,6 +307,11 @@ retry: goto retry; } + /* + * Grab a reference to the irq to reflect the fact that it is + * now in the ap_list. + */ + vgic_get_irq_kref(irq); list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head); irq->vcpu = vcpu; @@ -269,14 +345,17 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid, if (!irq) return -EINVAL; - if (irq->hw != mapped_irq) + if (irq->hw != mapped_irq) { + vgic_put_irq(kvm, irq); return -EINVAL; + } spin_lock(&irq->irq_lock); if (!vgic_validate_injection(irq, level)) { /* Nothing to see here, move along... */ spin_unlock(&irq->irq_lock); + vgic_put_irq(kvm, irq); return 0; } @@ -288,6 +367,7 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid, } vgic_queue_irq_unlock(kvm, irq); + vgic_put_irq(kvm, irq); return 0; } @@ -330,25 +410,28 @@ int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq) irq->hwintid = phys_irq; spin_unlock(&irq->irq_lock); + vgic_put_irq(vcpu->kvm, irq); return 0; } int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq); - - BUG_ON(!irq); + struct vgic_irq *irq; if (!vgic_initialized(vcpu->kvm)) return -EAGAIN; + irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq); + BUG_ON(!irq); + spin_lock(&irq->irq_lock); irq->hw = false; irq->hwintid = 0; spin_unlock(&irq->irq_lock); + vgic_put_irq(vcpu->kvm, irq); return 0; } @@ -386,6 +469,15 @@ retry: list_del(&irq->ap_list); irq->vcpu = NULL; spin_unlock(&irq->irq_lock); + + /* + * This vgic_put_irq call matches the + * vgic_get_irq_kref in vgic_queue_irq_unlock, + * where we added the LPI to the ap_list. As + * we remove the irq from the list, we drop + * also drop the refcount. + */ + vgic_put_irq(vcpu->kvm, irq); continue; } @@ -614,6 +706,15 @@ bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq) spin_lock(&irq->irq_lock); map_is_active = irq->hw && irq->active; spin_unlock(&irq->irq_lock); + vgic_put_irq(vcpu->kvm, irq); return map_is_active; } + +int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi) +{ + if (vgic_has_its(kvm)) + return vgic_its_inject_msi(kvm, msi); + else + return -ENODEV; +} diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index 7b300ca370b7..1d8e21d5c13f 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -25,6 +25,7 @@ #define IS_VGIC_ADDR_UNDEF(_x) ((_x) == VGIC_ADDR_UNDEF) #define INTERRUPT_ID_BITS_SPIS 10 +#define INTERRUPT_ID_BITS_ITS 16 #define VGIC_PRI_BITS 5 #define vgic_irq_is_sgi(intid) ((intid) < VGIC_NR_SGIS) @@ -38,9 +39,13 @@ struct vgic_vmcr { struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 intid); +void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq); bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq); void vgic_kick_vcpus(struct kvm *kvm); +int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr, + phys_addr_t addr, phys_addr_t alignment); + void vgic_v2_process_maintenance(struct kvm_vcpu *vcpu); void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu); void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr); @@ -59,6 +64,14 @@ int vgic_v2_map_resources(struct kvm *kvm); int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address, enum vgic_type); +static inline void vgic_get_irq_kref(struct vgic_irq *irq) +{ + if (irq->intid < VGIC_MIN_LPI) + return; + + kref_get(&irq->refcount); +} + #ifdef CONFIG_KVM_ARM_VGIC_V3 void vgic_v3_process_maintenance(struct kvm_vcpu *vcpu); void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu); @@ -71,6 +84,10 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu); int vgic_v3_probe(const struct gic_kvm_info *info); int vgic_v3_map_resources(struct kvm *kvm); int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t dist_base_address); +bool vgic_has_its(struct kvm *kvm); +int kvm_vgic_register_its_device(void); +void vgic_enable_lpis(struct kvm_vcpu *vcpu); +int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi); #else static inline void vgic_v3_process_maintenance(struct kvm_vcpu *vcpu) { @@ -122,9 +139,28 @@ static inline int vgic_register_redist_iodevs(struct kvm *kvm, { return -ENODEV; } + +static inline bool vgic_has_its(struct kvm *kvm) +{ + return false; +} + +static inline int kvm_vgic_register_its_device(void) +{ + return -ENODEV; +} + +static inline void vgic_enable_lpis(struct kvm_vcpu *vcpu) +{ +} + +static inline int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi) +{ + return -ENODEV; +} #endif -void kvm_register_vgic_device(unsigned long type); +int kvm_register_vgic_device(unsigned long type); int vgic_lazy_init(struct kvm *kvm); int vgic_init(struct kvm *kvm); diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index 8db197bb6c7a..df99e9c3b64d 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -135,7 +135,8 @@ void kvm_free_irq_routing(struct kvm *kvm) free_irq_routing_table(rt); } -static int setup_routing_entry(struct kvm_irq_routing_table *rt, +static int setup_routing_entry(struct kvm *kvm, + struct kvm_irq_routing_table *rt, struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) { @@ -154,7 +155,7 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt, e->gsi = ue->gsi; e->type = ue->type; - r = kvm_set_routing_entry(e, ue); + r = kvm_set_routing_entry(kvm, e, ue); if (r) goto out; if (e->type == KVM_IRQ_ROUTING_IRQCHIP) @@ -211,7 +212,7 @@ int kvm_set_irq_routing(struct kvm *kvm, kfree(e); goto out; } - r = setup_routing_entry(new, e, ue); + r = setup_routing_entry(kvm, new, e, ue); if (r) { kfree(e); goto out; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 2e791367c576..cc081ccfcaa3 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1444,6 +1444,52 @@ static bool vma_is_valid(struct vm_area_struct *vma, bool write_fault) return true; } +static int hva_to_pfn_remapped(struct vm_area_struct *vma, + unsigned long addr, bool *async, + bool write_fault, kvm_pfn_t *p_pfn) +{ + unsigned long pfn; + int r; + + r = follow_pfn(vma, addr, &pfn); + if (r) { + /* + * get_user_pages fails for VM_IO and VM_PFNMAP vmas and does + * not call the fault handler, so do it here. + */ + bool unlocked = false; + r = fixup_user_fault(current, current->mm, addr, + (write_fault ? FAULT_FLAG_WRITE : 0), + &unlocked); + if (unlocked) + return -EAGAIN; + if (r) + return r; + + r = follow_pfn(vma, addr, &pfn); + if (r) + return r; + + } + + + /* + * Get a reference here because callers of *hva_to_pfn* and + * *gfn_to_pfn* ultimately call kvm_release_pfn_clean on the + * returned pfn. This is only needed if the VMA has VM_MIXEDMAP + * set, but the kvm_get_pfn/kvm_release_pfn_clean pair will + * simply do nothing for reserved pfns. + * + * Whoever called remap_pfn_range is also going to call e.g. + * unmap_mapping_range before the underlying pages are freed, + * causing a call to our MMU notifier. + */ + kvm_get_pfn(pfn); + + *p_pfn = pfn; + return 0; +} + /* * Pin guest page in memory and return its pfn. * @addr: host virtual address which maps memory to the guest @@ -1463,7 +1509,7 @@ static kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async, { struct vm_area_struct *vma; kvm_pfn_t pfn = 0; - int npages; + int npages, r; /* we can do it either atomically or asynchronously, not both */ BUG_ON(atomic && async); @@ -1485,14 +1531,17 @@ static kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async, goto exit; } +retry: vma = find_vma_intersection(current->mm, addr, addr + 1); if (vma == NULL) pfn = KVM_PFN_ERR_FAULT; - else if ((vma->vm_flags & VM_PFNMAP)) { - pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + - vma->vm_pgoff; - BUG_ON(!kvm_is_reserved_pfn(pfn)); + else if (vma->vm_flags & (VM_IO | VM_PFNMAP)) { + r = hva_to_pfn_remapped(vma, addr, async, write_fault, &pfn); + if (r == -EAGAIN) + goto retry; + if (r < 0) + pfn = KVM_PFN_ERR_FAULT; } else { if (async && vma_is_valid(vma, write_fault)) *async = true; @@ -2348,9 +2397,20 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) if (id >= KVM_MAX_VCPU_ID) return -EINVAL; + mutex_lock(&kvm->lock); + if (kvm->created_vcpus == KVM_MAX_VCPUS) { + mutex_unlock(&kvm->lock); + return -EINVAL; + } + + kvm->created_vcpus++; + mutex_unlock(&kvm->lock); + vcpu = kvm_arch_vcpu_create(kvm, id); - if (IS_ERR(vcpu)) - return PTR_ERR(vcpu); + if (IS_ERR(vcpu)) { + r = PTR_ERR(vcpu); + goto vcpu_decrement; + } preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops); @@ -2359,14 +2419,6 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) goto vcpu_destroy; mutex_lock(&kvm->lock); - if (!kvm_vcpu_compatible(vcpu)) { - r = -EINVAL; - goto unlock_vcpu_destroy; - } - if (atomic_read(&kvm->online_vcpus) == KVM_MAX_VCPUS) { - r = -EINVAL; - goto unlock_vcpu_destroy; - } if (kvm_get_vcpu_by_id(kvm, id)) { r = -EEXIST; goto unlock_vcpu_destroy; @@ -2399,6 +2451,10 @@ unlock_vcpu_destroy: mutex_unlock(&kvm->lock); vcpu_destroy: kvm_arch_vcpu_destroy(vcpu); +vcpu_decrement: + mutex_lock(&kvm->lock); + kvm->created_vcpus--; + mutex_unlock(&kvm->lock); return r; } @@ -3487,6 +3543,30 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, return r; } +struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx, + gpa_t addr) +{ + struct kvm_io_bus *bus; + int dev_idx, srcu_idx; + struct kvm_io_device *iodev = NULL; + + srcu_idx = srcu_read_lock(&kvm->srcu); + + bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); + + dev_idx = kvm_io_bus_get_first_dev(bus, addr, 1); + if (dev_idx < 0) + goto out_unlock; + + iodev = bus->range[dev_idx].dev; + +out_unlock: + srcu_read_unlock(&kvm->srcu, srcu_idx); + + return iodev; +} +EXPORT_SYMBOL_GPL(kvm_io_bus_get_dev); + static int kvm_debugfs_open(struct inode *inode, struct file *file, int (*get)(void *, u64 *), int (*set)(void *, u64), const char *fmt) |