diff options
author | Paolo Bonzini <pbonzini@redhat.com> | 2014-12-15 13:06:40 +0100 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2014-12-15 13:06:40 +0100 |
commit | 333bce5aac9e8cb7f6b27e0122a224d17be4dd5d (patch) | |
tree | 7a171a51d9a999f90dd6f17f2b7dd0997dbc1a38 | |
parent | KVM: x86: em_ret_far overrides cpl (diff) | |
parent | arm/arm64: KVM: Require in-kernel vgic for the arch timers (diff) | |
download | linux-333bce5aac9e8cb7f6b27e0122a224d17be4dd5d.tar.xz linux-333bce5aac9e8cb7f6b27e0122a224d17be4dd5d.zip |
Merge tag 'kvm-arm-for-3.19-take2' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD
Second round of changes for KVM for arm/arm64 for v3.19; fixes reboot
problems, clarifies VCPU init, and fixes a regression concerning the
VGIC init flow.
Conflicts:
arch/ia64/kvm/kvm-ia64.c [deleted in HEAD and modified in kvmarm]
-rw-r--r-- | Documentation/virtual/kvm/api.txt | 17 | ||||
-rw-r--r-- | arch/arm/include/asm/kvm_emulate.h | 5 | ||||
-rw-r--r-- | arch/arm/include/asm/kvm_host.h | 2 | ||||
-rw-r--r-- | arch/arm/include/asm/kvm_mmu.h | 6 | ||||
-rw-r--r-- | arch/arm/kvm/arm.c | 78 | ||||
-rw-r--r-- | arch/arm/kvm/guest.c | 26 | ||||
-rw-r--r-- | arch/arm/kvm/mmio.c | 15 | ||||
-rw-r--r-- | arch/arm/kvm/mmu.c | 99 | ||||
-rw-r--r-- | arch/arm/kvm/psci.c | 18 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_emulate.h | 5 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_host.h | 3 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_mmu.h | 6 | ||||
-rw-r--r-- | arch/arm64/kvm/guest.c | 26 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.c | 6 | ||||
-rw-r--r-- | include/kvm/arm_arch_timer.h | 10 | ||||
-rw-r--r-- | include/kvm/arm_vgic.h | 12 | ||||
-rw-r--r-- | include/linux/kvm_host.h | 3 | ||||
-rw-r--r-- | virt/kvm/arm/arch_timer.c | 30 | ||||
-rw-r--r-- | virt/kvm/arm/vgic.c | 116 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 16 |
20 files changed, 335 insertions, 164 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 80bfe59fc992..0007fef4ed81 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2478,9 +2478,15 @@ return ENOEXEC for that vcpu. Note that because some registers reflect machine topology, all vcpus should be created before this ioctl is invoked. +Userspace can call this function multiple times for a given vcpu, including +after the vcpu has been run. This will reset the vcpu to its initial +state. All calls to this function after the initial call must use the same +target and same set of feature flags, otherwise EINVAL will be returned. + Possible features: - KVM_ARM_VCPU_POWER_OFF: Starts the CPU in a power-off state. - Depends on KVM_CAP_ARM_PSCI. + Depends on KVM_CAP_ARM_PSCI. If not set, the CPU will be powered on + and execute guest code when KVM_RUN is called. - KVM_ARM_VCPU_EL1_32BIT: Starts the CPU in a 32bit mode. Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only). - KVM_ARM_VCPU_PSCI_0_2: Emulate PSCI v0.2 for the CPU. @@ -2976,6 +2982,15 @@ HVC instruction based PSCI call from the vcpu. The 'type' field describes the system-level event type. The 'flags' field describes architecture specific flags for the system-level event. +Valid values for 'type' are: + KVM_SYSTEM_EVENT_SHUTDOWN -- the guest has requested a shutdown of the + VM. Userspace is not obliged to honour this, and if it does honour + this does not need to destroy the VM synchronously (ie it may call + KVM_RUN again before shutdown finally occurs). + KVM_SYSTEM_EVENT_RESET -- the guest has requested a reset of the VM. + As with SHUTDOWN, userspace can choose to ignore the request, or + to schedule the reset to occur in the future and may call KVM_RUN again. + /* Fix the size of the union. */ char padding[256]; }; diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index b9db269c6e61..66ce17655bb9 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -33,6 +33,11 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu); void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); +static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) +{ + vcpu->arch.hcr = HCR_GUEST_MASK; +} + static inline bool vcpu_mode_is_32bit(struct kvm_vcpu *vcpu) { return 1; diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 53036e21756b..254e0650e48b 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -150,8 +150,6 @@ struct kvm_vcpu_stat { u32 halt_wakeup; }; -int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, - const struct kvm_vcpu_init *init); int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index acb0d5712716..63e0ecc04901 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -52,6 +52,7 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t); void free_boot_hyp_pgd(void); void free_hyp_pgds(void); +void stage2_unmap_vm(struct kvm *kvm); int kvm_alloc_stage2_pgd(struct kvm *kvm); void kvm_free_stage2_pgd(struct kvm *kvm); int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, @@ -161,9 +162,10 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) } static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, - unsigned long size) + unsigned long size, + bool ipa_uncached) { - if (!vcpu_has_cache_enabled(vcpu)) + if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached) kvm_flush_dcache_to_poc((void *)hva, size); /* diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 9e193c8a959e..2d6d91001062 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -213,6 +213,11 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) int err; struct kvm_vcpu *vcpu; + if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) { + err = -EBUSY; + goto out; + } + vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); if (!vcpu) { err = -ENOMEM; @@ -263,6 +268,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) { /* Force users to call KVM_ARM_VCPU_INIT */ vcpu->arch.target = -1; + bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES); /* Set up the timer */ kvm_timer_vcpu_init(vcpu); @@ -419,6 +425,7 @@ static void update_vttbr(struct kvm *kvm) static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) { + struct kvm *kvm = vcpu->kvm; int ret; if (likely(vcpu->arch.has_run_once)) @@ -427,15 +434,23 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) vcpu->arch.has_run_once = true; /* - * Initialize the VGIC before running a vcpu the first time on - * this VM. + * Map the VGIC hardware resources before running a vcpu the first + * time on this VM. */ - if (unlikely(!vgic_initialized(vcpu->kvm))) { - ret = kvm_vgic_init(vcpu->kvm); + if (unlikely(!vgic_ready(kvm))) { + ret = kvm_vgic_map_resources(kvm); if (ret) return ret; } + /* + * Enable the arch timers only if we have an in-kernel VGIC + * and it has been properly initialized, since we cannot handle + * interrupts from the virtual timer with a userspace gic. + */ + if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) + kvm_timer_enable(kvm); + return 0; } @@ -649,6 +664,48 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, return -EINVAL; } +static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, + const struct kvm_vcpu_init *init) +{ + unsigned int i; + int phys_target = kvm_target_cpu(); + + if (init->target != phys_target) + return -EINVAL; + + /* + * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must + * use the same target. + */ + if (vcpu->arch.target != -1 && vcpu->arch.target != init->target) + return -EINVAL; + + /* -ENOENT for unknown features, -EINVAL for invalid combinations. */ + for (i = 0; i < sizeof(init->features) * 8; i++) { + bool set = (init->features[i / 32] & (1 << (i % 32))); + + if (set && i >= KVM_VCPU_MAX_FEATURES) + return -ENOENT; + + /* + * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must + * use the same feature set. + */ + if (vcpu->arch.target != -1 && i < KVM_VCPU_MAX_FEATURES && + test_bit(i, vcpu->arch.features) != set) + return -EINVAL; + + if (set) + set_bit(i, vcpu->arch.features); + } + + vcpu->arch.target = phys_target; + + /* Now we know what it is, we can reset it. */ + return kvm_reset_vcpu(vcpu); +} + + static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) { @@ -659,10 +716,21 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, return ret; /* + * Ensure a rebooted VM will fault in RAM pages and detect if the + * guest MMU is turned off and flush the caches as needed. + */ + if (vcpu->arch.has_run_once) + stage2_unmap_vm(vcpu->kvm); + + vcpu_reset_hcr(vcpu); + + /* * Handle the "start in power-off" case by marking the VCPU as paused. */ - if (__test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) + if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) vcpu->arch.pause = true; + else + vcpu->arch.pause = false; return 0; } diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index cc0b78769bd8..384bab67c462 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -38,7 +38,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { - vcpu->arch.hcr = HCR_GUEST_MASK; return 0; } @@ -274,31 +273,6 @@ int __attribute_const__ kvm_target_cpu(void) } } -int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, - const struct kvm_vcpu_init *init) -{ - unsigned int i; - - /* We can only cope with guest==host and only on A15/A7 (for now). */ - if (init->target != kvm_target_cpu()) - return -EINVAL; - - vcpu->arch.target = init->target; - bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES); - - /* -ENOENT for unknown features, -EINVAL for invalid combinations. */ - for (i = 0; i < sizeof(init->features) * 8; i++) { - if (test_bit(i, (void *)init->features)) { - if (i >= KVM_VCPU_MAX_FEATURES) - return -ENOENT; - set_bit(i, vcpu->arch.features); - } - } - - /* Now we know what it is, we can reset it. */ - return kvm_reset_vcpu(vcpu); -} - int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init) { int target = kvm_target_cpu(); diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c index 4cb5a93182e9..5d3bfc0eb3f0 100644 --- a/arch/arm/kvm/mmio.c +++ b/arch/arm/kvm/mmio.c @@ -187,15 +187,18 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, } rt = vcpu->arch.mmio_decode.rt; - data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), mmio.len); - trace_kvm_mmio((mmio.is_write) ? KVM_TRACE_MMIO_WRITE : - KVM_TRACE_MMIO_READ_UNSATISFIED, - mmio.len, fault_ipa, - (mmio.is_write) ? data : 0); + if (mmio.is_write) { + data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), + mmio.len); - if (mmio.is_write) + trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, mmio.len, + fault_ipa, data); mmio_write_buf(mmio.data, mmio.len, data); + } else { + trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, mmio.len, + fault_ipa, 0); + } if (vgic_handle_mmio(vcpu, run, &mmio)) return 1; diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 57a403a5c22b..3756dd3e85c2 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -611,6 +611,71 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) unmap_range(kvm, kvm->arch.pgd, start, size); } +static void stage2_unmap_memslot(struct kvm *kvm, + struct kvm_memory_slot *memslot) +{ + hva_t hva = memslot->userspace_addr; + phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT; + phys_addr_t size = PAGE_SIZE * memslot->npages; + hva_t reg_end = hva + size; + + /* + * A memory region could potentially cover multiple VMAs, and any holes + * between them, so iterate over all of them to find out if we should + * unmap any of them. + * + * +--------------------------------------------+ + * +---------------+----------------+ +----------------+ + * | : VMA 1 | VMA 2 | | VMA 3 : | + * +---------------+----------------+ +----------------+ + * | memory region | + * +--------------------------------------------+ + */ + do { + struct vm_area_struct *vma = find_vma(current->mm, hva); + hva_t vm_start, vm_end; + + if (!vma || vma->vm_start >= reg_end) + break; + + /* + * Take the intersection of this VMA with the memory region + */ + vm_start = max(hva, vma->vm_start); + vm_end = min(reg_end, vma->vm_end); + + if (!(vma->vm_flags & VM_PFNMAP)) { + gpa_t gpa = addr + (vm_start - memslot->userspace_addr); + unmap_stage2_range(kvm, gpa, vm_end - vm_start); + } + hva = vm_end; + } while (hva < reg_end); +} + +/** + * stage2_unmap_vm - Unmap Stage-2 RAM mappings + * @kvm: The struct kvm pointer + * + * Go through the memregions and unmap any reguler RAM + * backing memory already mapped to the VM. + */ +void stage2_unmap_vm(struct kvm *kvm) +{ + struct kvm_memslots *slots; + struct kvm_memory_slot *memslot; + int idx; + + idx = srcu_read_lock(&kvm->srcu); + spin_lock(&kvm->mmu_lock); + + slots = kvm_memslots(kvm); + kvm_for_each_memslot(memslot, slots) + stage2_unmap_memslot(kvm, memslot); + + spin_unlock(&kvm->mmu_lock); + srcu_read_unlock(&kvm->srcu, idx); +} + /** * kvm_free_stage2_pgd - free all stage-2 tables * @kvm: The KVM struct pointer for the VM. @@ -834,6 +899,11 @@ static bool kvm_is_write_fault(struct kvm_vcpu *vcpu) return kvm_vcpu_dabt_iswrite(vcpu); } +static bool kvm_is_device_pfn(unsigned long pfn) +{ + return !pfn_valid(pfn); +} + static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_memory_slot *memslot, unsigned long hva, unsigned long fault_status) @@ -847,6 +917,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct vm_area_struct *vma; pfn_t pfn; pgprot_t mem_type = PAGE_S2; + bool fault_ipa_uncached; write_fault = kvm_is_write_fault(vcpu); if (fault_status == FSC_PERM && !write_fault) { @@ -904,7 +975,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (is_error_pfn(pfn)) return -EFAULT; - if (kvm_is_mmio_pfn(pfn)) + if (kvm_is_device_pfn(pfn)) mem_type = PAGE_S2_DEVICE; spin_lock(&kvm->mmu_lock); @@ -913,6 +984,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (!hugetlb && !force_pte) hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa); + fault_ipa_uncached = memslot->flags & KVM_MEMSLOT_INCOHERENT; + if (hugetlb) { pmd_t new_pmd = pfn_pmd(pfn, mem_type); new_pmd = pmd_mkhuge(new_pmd); @@ -920,7 +993,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, kvm_set_s2pmd_writable(&new_pmd); kvm_set_pfn_dirty(pfn); } - coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE); + coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE, + fault_ipa_uncached); ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); } else { pte_t new_pte = pfn_pte(pfn, mem_type); @@ -928,7 +1002,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, kvm_set_s2pte_writable(&new_pte); kvm_set_pfn_dirty(pfn); } - coherent_cache_guest_page(vcpu, hva, PAGE_SIZE); + coherent_cache_guest_page(vcpu, hva, PAGE_SIZE, + fault_ipa_uncached); ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE)); } @@ -1288,11 +1363,12 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, hva = vm_end; } while (hva < reg_end); - if (ret) { - spin_lock(&kvm->mmu_lock); + spin_lock(&kvm->mmu_lock); + if (ret) unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size); - spin_unlock(&kvm->mmu_lock); - } + else + stage2_flush_memslot(kvm, memslot); + spin_unlock(&kvm->mmu_lock); return ret; } @@ -1304,6 +1380,15 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, unsigned long npages) { + /* + * Readonly memslots are not incoherent with the caches by definition, + * but in practice, they are used mostly to emulate ROMs or NOR flashes + * that the guest may consider devices and hence map as uncached. + * To prevent incoherency issues in these cases, tag all readonly + * regions as incoherent. + */ + if (slot->flags & KVM_MEM_READONLY) + slot->flags |= KVM_MEMSLOT_INCOHERENT; return 0; } diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index 09cf37737ee2..58cb3248d277 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -15,6 +15,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/preempt.h> #include <linux/kvm_host.h> #include <linux/wait.h> @@ -166,6 +167,23 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu) static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type) { + int i; + struct kvm_vcpu *tmp; + + /* + * The KVM ABI specifies that a system event exit may call KVM_RUN + * again and may perform shutdown/reboot at a later time that when the + * actual request is made. Since we are implementing PSCI and a + * caller of PSCI reboot and shutdown expects that the system shuts + * down or reboots immediately, let's make sure that VCPUs are not run + * after this call is handled and before the VCPUs have been + * re-initialized. + */ + kvm_for_each_vcpu(i, tmp, vcpu->kvm) { + tmp->arch.pause = true; + kvm_vcpu_kick(tmp); + } + memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event)); vcpu->run->system_event.type = type; vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 5674a55b5518..8127e45e2637 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -38,6 +38,11 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu); void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); +static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) +{ + vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS; +} + static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu) { return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 2012c4ba8d67..0b7dfdb931df 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -165,8 +165,6 @@ struct kvm_vcpu_stat { u32 halt_wakeup; }; -int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, - const struct kvm_vcpu_init *init); int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); @@ -200,6 +198,7 @@ struct kvm_vcpu *kvm_arm_get_running_vcpu(void); struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); u64 kvm_call_hyp(void *hypfn, ...); +void force_vm_exit(const cpumask_t *mask); int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, int exception_index); diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 0caf7a59f6a1..14a74f136272 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -83,6 +83,7 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t); void free_boot_hyp_pgd(void); void free_hyp_pgds(void); +void stage2_unmap_vm(struct kvm *kvm); int kvm_alloc_stage2_pgd(struct kvm *kvm); void kvm_free_stage2_pgd(struct kvm *kvm); int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, @@ -243,9 +244,10 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) } static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, - unsigned long size) + unsigned long size, + bool ipa_uncached) { - if (!vcpu_has_cache_enabled(vcpu)) + if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached) kvm_flush_dcache_to_poc((void *)hva, size); if (!icache_is_aliasing()) { /* PIPT */ diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 76794692c20b..9535bd555d1d 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -38,7 +38,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { - vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS; return 0; } @@ -297,31 +296,6 @@ int __attribute_const__ kvm_target_cpu(void) return -EINVAL; } -int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, - const struct kvm_vcpu_init *init) -{ - unsigned int i; - int phys_target = kvm_target_cpu(); - - if (init->target != phys_target) - return -EINVAL; - - vcpu->arch.target = phys_target; - bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES); - - /* -ENOENT for unknown features, -EINVAL for invalid combinations. */ - for (i = 0; i < sizeof(init->features) * 8; i++) { - if (init->features[i / 32] & (1 << (i % 32))) { - if (i >= KVM_VCPU_MAX_FEATURES) - return -ENOENT; - set_bit(i, vcpu->arch.features); - } - } - - /* Now we know what it is, we can reset it. */ - return kvm_reset_vcpu(vcpu); -} - int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init) { int target = kvm_target_cpu(); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 4ea0dcb0b21b..10fbed126b11 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -629,7 +629,7 @@ static int mmu_spte_clear_track_bits(u64 *sptep) * kvm mmu, before reclaiming the page, we should * unmap it from mmu first. */ - WARN_ON(!kvm_is_mmio_pfn(pfn) && !page_count(pfn_to_page(pfn))); + WARN_ON(!kvm_is_reserved_pfn(pfn) && !page_count(pfn_to_page(pfn))); if (!shadow_accessed_mask || old_spte & shadow_accessed_mask) kvm_set_pfn_accessed(pfn); @@ -2460,7 +2460,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, spte |= PT_PAGE_SIZE_MASK; if (tdp_enabled) spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, - kvm_is_mmio_pfn(pfn)); + kvm_is_reserved_pfn(pfn)); if (host_writable) spte |= SPTE_HOST_WRITEABLE; @@ -2736,7 +2736,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, * PT_PAGE_TABLE_LEVEL and there would be no adjustment done * here. */ - if (!is_error_noslot_pfn(pfn) && !kvm_is_mmio_pfn(pfn) && + if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn) && level == PT_PAGE_TABLE_LEVEL && PageTransCompound(pfn_to_page(pfn)) && !has_wrprotected_page(vcpu->kvm, gfn, PT_DIRECTORY_LEVEL)) { diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index ad9db6045b2f..b3f45a578344 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -60,7 +60,8 @@ struct arch_timer_cpu { #ifdef CONFIG_KVM_ARM_TIMER int kvm_timer_hyp_init(void); -int kvm_timer_init(struct kvm *kvm); +void kvm_timer_enable(struct kvm *kvm); +void kvm_timer_init(struct kvm *kvm); void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, const struct kvm_irq_level *irq); void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); @@ -77,11 +78,8 @@ static inline int kvm_timer_hyp_init(void) return 0; }; -static inline int kvm_timer_init(struct kvm *kvm) -{ - return 0; -} - +static inline void kvm_timer_enable(struct kvm *kvm) {} +static inline void kvm_timer_init(struct kvm *kvm) {} static inline void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, const struct kvm_irq_level *irq) {} static inline void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) {} diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 206dcc3b3f7a..ac4888dc86bc 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -274,7 +274,7 @@ struct kvm_exit_mmio; #ifdef CONFIG_KVM_ARM_VGIC int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write); int kvm_vgic_hyp_init(void); -int kvm_vgic_init(struct kvm *kvm); +int kvm_vgic_map_resources(struct kvm *kvm); int kvm_vgic_create(struct kvm *kvm); void kvm_vgic_destroy(struct kvm *kvm); void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu); @@ -287,7 +287,8 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, struct kvm_exit_mmio *mmio); #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) -#define vgic_initialized(k) ((k)->arch.vgic.ready) +#define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus)) +#define vgic_ready(k) ((k)->arch.vgic.ready) int vgic_v2_probe(struct device_node *vgic_node, const struct vgic_ops **ops, @@ -321,7 +322,7 @@ static inline int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, return -ENXIO; } -static inline int kvm_vgic_init(struct kvm *kvm) +static inline int kvm_vgic_map_resources(struct kvm *kvm) { return 0; } @@ -373,6 +374,11 @@ static inline bool vgic_initialized(struct kvm *kvm) { return true; } + +static inline bool vgic_ready(struct kvm *kvm) +{ + return true; +} #endif #endif diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 193bca68372d..26f106022c88 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -43,6 +43,7 @@ * include/linux/kvm_h. */ #define KVM_MEMSLOT_INVALID (1UL << 16) +#define KVM_MEMSLOT_INCOHERENT (1UL << 17) /* Two fragments for cross MMIO pages. */ #define KVM_MAX_MMIO_FRAGMENTS 2 @@ -712,7 +713,7 @@ void kvm_arch_sync_events(struct kvm *kvm); int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu); void kvm_vcpu_kick(struct kvm_vcpu *vcpu); -bool kvm_is_mmio_pfn(pfn_t pfn); +bool kvm_is_reserved_pfn(pfn_t pfn); struct kvm_irq_ack_notifier { struct hlist_node link; diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 22fa819a9b6a..1c0772b340d8 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -61,12 +61,14 @@ static void timer_disarm(struct arch_timer_cpu *timer) static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu) { + int ret; struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK; - kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, - timer->irq->irq, - timer->irq->level); + ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, + timer->irq->irq, + timer->irq->level); + WARN_ON(ret); } static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) @@ -307,12 +309,24 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) timer_disarm(timer); } -int kvm_timer_init(struct kvm *kvm) +void kvm_timer_enable(struct kvm *kvm) { - if (timecounter && wqueue) { - kvm->arch.timer.cntvoff = kvm_phys_timer_read(); + if (kvm->arch.timer.enabled) + return; + + /* + * There is a potential race here between VCPUs starting for the first + * time, which may be enabling the timer multiple times. That doesn't + * hurt though, because we're just setting a variable to the same + * variable that it already was. The important thing is that all + * VCPUs have the enabled variable set, before entering the guest, if + * the arch timers are enabled. + */ + if (timecounter && wqueue) kvm->arch.timer.enabled = 1; - } +} - return 0; +void kvm_timer_init(struct kvm *kvm) +{ + kvm->arch.timer.cntvoff = kvm_phys_timer_read(); } diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 3aaca49de325..e373b76c5420 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -91,6 +91,7 @@ #define ACCESS_WRITE_VALUE (3 << 1) #define ACCESS_WRITE_MASK(x) ((x) & (3 << 1)) +static int vgic_init(struct kvm *kvm); static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu); static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu); static void vgic_update_state(struct kvm *kvm); @@ -1607,7 +1608,7 @@ static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level) } } -static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid, +static int vgic_update_irq_pending(struct kvm *kvm, int cpuid, unsigned int irq_num, bool level) { struct vgic_dist *dist = &kvm->arch.vgic; @@ -1643,9 +1644,10 @@ static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid, vgic_dist_irq_clear_level(vcpu, irq_num); if (!vgic_dist_irq_soft_pend(vcpu, irq_num)) vgic_dist_irq_clear_pending(vcpu, irq_num); - } else { - vgic_dist_irq_clear_pending(vcpu, irq_num); } + + ret = false; + goto out; } enabled = vgic_irq_is_enabled(vcpu, irq_num); @@ -1672,7 +1674,7 @@ static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid, out: spin_unlock(&dist->lock); - return ret; + return ret ? cpuid : -EINVAL; } /** @@ -1692,11 +1694,26 @@ out: int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, bool level) { - if (likely(vgic_initialized(kvm)) && - vgic_update_irq_pending(kvm, cpuid, irq_num, level)) - vgic_kick_vcpus(kvm); + int ret = 0; + int vcpu_id; - return 0; + if (unlikely(!vgic_initialized(kvm))) { + mutex_lock(&kvm->lock); + ret = vgic_init(kvm); + mutex_unlock(&kvm->lock); + + if (ret) + goto out; + } + + vcpu_id = vgic_update_irq_pending(kvm, cpuid, irq_num, level); + if (vcpu_id >= 0) { + /* kick the specified vcpu */ + kvm_vcpu_kick(kvm_get_vcpu(kvm, vcpu_id)); + } + +out: + return ret; } static irqreturn_t vgic_maintenance_handler(int irq, void *data) @@ -1726,39 +1743,14 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs) int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8; vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL); - vgic_cpu->vgic_irq_lr_map = kzalloc(nr_irqs, GFP_KERNEL); + vgic_cpu->vgic_irq_lr_map = kmalloc(nr_irqs, GFP_KERNEL); if (!vgic_cpu->pending_shared || !vgic_cpu->vgic_irq_lr_map) { kvm_vgic_vcpu_destroy(vcpu); return -ENOMEM; } - return 0; -} - -/** - * kvm_vgic_vcpu_init - Initialize per-vcpu VGIC state - * @vcpu: pointer to the vcpu struct - * - * Initialize the vgic_cpu struct and vgic_dist struct fields pertaining to - * this vcpu and enable the VGIC for this VCPU - */ -static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) -{ - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - int i; - - for (i = 0; i < dist->nr_irqs; i++) { - if (i < VGIC_NR_PPIS) - vgic_bitmap_set_irq_val(&dist->irq_enabled, - vcpu->vcpu_id, i, 1); - if (i < VGIC_NR_PRIVATE_IRQS) - vgic_bitmap_set_irq_val(&dist->irq_cfg, - vcpu->vcpu_id, i, VGIC_CFG_EDGE); - - vgic_cpu->vgic_irq_lr_map[i] = LR_EMPTY; - } + memset(vgic_cpu->vgic_irq_lr_map, LR_EMPTY, nr_irqs); /* * Store the number of LRs per vcpu, so we don't have to go @@ -1767,7 +1759,7 @@ static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) */ vgic_cpu->nr_lr = vgic->nr_lr; - vgic_enable(vcpu); + return 0; } void kvm_vgic_destroy(struct kvm *kvm) @@ -1798,20 +1790,21 @@ void kvm_vgic_destroy(struct kvm *kvm) dist->irq_spi_cpu = NULL; dist->irq_spi_target = NULL; dist->irq_pending_on_cpu = NULL; + dist->nr_cpus = 0; } /* * Allocate and initialize the various data structures. Must be called * with kvm->lock held! */ -static int vgic_init_maps(struct kvm *kvm) +static int vgic_init(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; struct kvm_vcpu *vcpu; int nr_cpus, nr_irqs; - int ret, i; + int ret, i, vcpu_id; - if (dist->nr_cpus) /* Already allocated */ + if (vgic_initialized(kvm)) return 0; nr_cpus = dist->nr_cpus = atomic_read(&kvm->online_vcpus); @@ -1859,16 +1852,28 @@ static int vgic_init_maps(struct kvm *kvm) if (ret) goto out; - kvm_for_each_vcpu(i, vcpu, kvm) { + for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i += 4) + vgic_set_target_reg(kvm, 0, i); + + kvm_for_each_vcpu(vcpu_id, vcpu, kvm) { ret = vgic_vcpu_init_maps(vcpu, nr_irqs); if (ret) { kvm_err("VGIC: Failed to allocate vcpu memory\n"); break; } - } - for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i += 4) - vgic_set_target_reg(kvm, 0, i); + for (i = 0; i < dist->nr_irqs; i++) { + if (i < VGIC_NR_PPIS) + vgic_bitmap_set_irq_val(&dist->irq_enabled, + vcpu->vcpu_id, i, 1); + if (i < VGIC_NR_PRIVATE_IRQS) + vgic_bitmap_set_irq_val(&dist->irq_cfg, + vcpu->vcpu_id, i, + VGIC_CFG_EDGE); + } + + vgic_enable(vcpu); + } out: if (ret) @@ -1878,25 +1883,23 @@ out: } /** - * kvm_vgic_init - Initialize global VGIC state before running any VCPUs + * kvm_vgic_map_resources - Configure global VGIC state before running any VCPUs * @kvm: pointer to the kvm struct * * Map the virtual CPU interface into the VM before running any VCPUs. We * can't do this at creation time, because user space must first set the - * virtual CPU interface address in the guest physical address space. Also - * initialize the ITARGETSRn regs to 0 on the emulated distributor. + * virtual CPU interface address in the guest physical address space. */ -int kvm_vgic_init(struct kvm *kvm) +int kvm_vgic_map_resources(struct kvm *kvm) { - struct kvm_vcpu *vcpu; - int ret = 0, i; + int ret = 0; if (!irqchip_in_kernel(kvm)) return 0; mutex_lock(&kvm->lock); - if (vgic_initialized(kvm)) + if (vgic_ready(kvm)) goto out; if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) || @@ -1906,7 +1909,11 @@ int kvm_vgic_init(struct kvm *kvm) goto out; } - ret = vgic_init_maps(kvm); + /* + * Initialize the vgic if this hasn't already been done on demand by + * accessing the vgic state from userspace. + */ + ret = vgic_init(kvm); if (ret) { kvm_err("Unable to allocate maps\n"); goto out; @@ -1920,9 +1927,6 @@ int kvm_vgic_init(struct kvm *kvm) goto out; } - kvm_for_each_vcpu(i, vcpu, kvm) - kvm_vgic_vcpu_init(vcpu); - kvm->arch.vgic.ready = true; out: if (ret) @@ -2167,7 +2171,7 @@ static int vgic_attr_regs_access(struct kvm_device *dev, mutex_lock(&dev->kvm->lock); - ret = vgic_init_maps(dev->kvm); + ret = vgic_init(dev->kvm); if (ret) goto out; @@ -2289,7 +2293,7 @@ static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) mutex_lock(&dev->kvm->lock); - if (vgic_initialized(dev->kvm) || dev->kvm->arch.vgic.nr_irqs) + if (vgic_ready(dev->kvm) || dev->kvm->arch.vgic.nr_irqs) ret = -EBUSY; else dev->kvm->arch.vgic.nr_irqs = val; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index c5c186af823b..f5283438ee05 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -107,10 +107,10 @@ EXPORT_SYMBOL_GPL(kvm_rebooting); static bool largepages_enabled = true; -bool kvm_is_mmio_pfn(pfn_t pfn) +bool kvm_is_reserved_pfn(pfn_t pfn) { if (pfn_valid(pfn)) - return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn)); + return PageReserved(pfn_to_page(pfn)); return true; } @@ -1301,7 +1301,7 @@ static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async, else if ((vma->vm_flags & VM_PFNMAP)) { pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; - BUG_ON(!kvm_is_mmio_pfn(pfn)); + BUG_ON(!kvm_is_reserved_pfn(pfn)); } else { if (async && vma_is_valid(vma, write_fault)) *async = true; @@ -1407,7 +1407,7 @@ static struct page *kvm_pfn_to_page(pfn_t pfn) if (is_error_noslot_pfn(pfn)) return KVM_ERR_PTR_BAD_PAGE; - if (kvm_is_mmio_pfn(pfn)) { + if (kvm_is_reserved_pfn(pfn)) { WARN_ON(1); return KVM_ERR_PTR_BAD_PAGE; } @@ -1436,7 +1436,7 @@ EXPORT_SYMBOL_GPL(kvm_release_page_clean); void kvm_release_pfn_clean(pfn_t pfn) { - if (!is_error_noslot_pfn(pfn) && !kvm_is_mmio_pfn(pfn)) + if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn)) put_page(pfn_to_page(pfn)); } EXPORT_SYMBOL_GPL(kvm_release_pfn_clean); @@ -1457,7 +1457,7 @@ static void kvm_release_pfn_dirty(pfn_t pfn) void kvm_set_pfn_dirty(pfn_t pfn) { - if (!kvm_is_mmio_pfn(pfn)) { + if (!kvm_is_reserved_pfn(pfn)) { struct page *page = pfn_to_page(pfn); if (!PageReserved(page)) SetPageDirty(page); @@ -1467,14 +1467,14 @@ EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty); void kvm_set_pfn_accessed(pfn_t pfn) { - if (!kvm_is_mmio_pfn(pfn)) + if (!kvm_is_reserved_pfn(pfn)) mark_page_accessed(pfn_to_page(pfn)); } EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed); void kvm_get_pfn(pfn_t pfn) { - if (!kvm_is_mmio_pfn(pfn)) + if (!kvm_is_reserved_pfn(pfn)) get_page(pfn_to_page(pfn)); } EXPORT_SYMBOL_GPL(kvm_get_pfn); |