diff options
Diffstat (limited to 'arch/arm64/kvm/arm.c')
-rw-r--r-- | arch/arm64/kvm/arm.c | 164 |
1 files changed, 134 insertions, 30 deletions
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index cedc3ba2c098..400bb0fe2745 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -97,6 +97,10 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, } mutex_unlock(&kvm->lock); break; + case KVM_CAP_ARM_SYSTEM_SUSPEND: + r = 0; + set_bit(KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED, &kvm->arch.flags); + break; default: r = -EINVAL; break; @@ -153,9 +157,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm_vgic_early_init(kvm); /* The maximum number of VCPUs is limited by the host's GIC model */ - kvm->arch.max_vcpus = kvm_arm_default_max_vcpus(); + kvm->max_vcpus = kvm_arm_default_max_vcpus(); set_default_spectre(kvm); + kvm_arm_init_hypercalls(kvm); return ret; out_free_stage2_pgd: @@ -210,6 +215,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_SET_GUEST_DEBUG: case KVM_CAP_VCPU_ATTRIBUTES: case KVM_CAP_PTP_KVM: + case KVM_CAP_ARM_SYSTEM_SUSPEND: r = 1; break; case KVM_CAP_SET_GUEST_DEBUG2: @@ -230,7 +236,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_MAX_VCPUS: case KVM_CAP_MAX_VCPU_ID: if (kvm) - r = kvm->arch.max_vcpus; + r = kvm->max_vcpus; else r = kvm_arm_default_max_vcpus(); break; @@ -306,7 +312,7 @@ int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) return -EBUSY; - if (id >= kvm->arch.max_vcpus) + if (id >= kvm->max_vcpus) return -EINVAL; return 0; @@ -356,11 +362,6 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) kvm_arm_vcpu_destroy(vcpu); } -int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) -{ - return kvm_timer_is_pending(vcpu); -} - void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) { @@ -432,20 +433,34 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) vcpu->cpu = -1; } -static void vcpu_power_off(struct kvm_vcpu *vcpu) +void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu) { - vcpu->arch.power_off = true; + vcpu->arch.mp_state.mp_state = KVM_MP_STATE_STOPPED; kvm_make_request(KVM_REQ_SLEEP, vcpu); kvm_vcpu_kick(vcpu); } +bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.mp_state.mp_state == KVM_MP_STATE_STOPPED; +} + +static void kvm_arm_vcpu_suspend(struct kvm_vcpu *vcpu) +{ + vcpu->arch.mp_state.mp_state = KVM_MP_STATE_SUSPENDED; + kvm_make_request(KVM_REQ_SUSPEND, vcpu); + kvm_vcpu_kick(vcpu); +} + +static bool kvm_arm_vcpu_suspended(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.mp_state.mp_state == KVM_MP_STATE_SUSPENDED; +} + int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { - if (vcpu->arch.power_off) - mp_state->mp_state = KVM_MP_STATE_STOPPED; - else - mp_state->mp_state = KVM_MP_STATE_RUNNABLE; + *mp_state = vcpu->arch.mp_state; return 0; } @@ -457,10 +472,13 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, switch (mp_state->mp_state) { case KVM_MP_STATE_RUNNABLE: - vcpu->arch.power_off = false; + vcpu->arch.mp_state = *mp_state; break; case KVM_MP_STATE_STOPPED: - vcpu_power_off(vcpu); + kvm_arm_vcpu_power_off(vcpu); + break; + case KVM_MP_STATE_SUSPENDED: + kvm_arm_vcpu_suspend(vcpu); break; default: ret = -EINVAL; @@ -480,7 +498,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) { bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF); return ((irq_lines || kvm_vgic_vcpu_pending_irq(v)) - && !v->arch.power_off && !v->arch.pause); + && !kvm_arm_vcpu_stopped(v) && !v->arch.pause); } bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) @@ -592,15 +610,15 @@ void kvm_arm_resume_guest(struct kvm *kvm) } } -static void vcpu_req_sleep(struct kvm_vcpu *vcpu) +static void kvm_vcpu_sleep(struct kvm_vcpu *vcpu) { struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu); rcuwait_wait_event(wait, - (!vcpu->arch.power_off) &&(!vcpu->arch.pause), + (!kvm_arm_vcpu_stopped(vcpu)) && (!vcpu->arch.pause), TASK_INTERRUPTIBLE); - if (vcpu->arch.power_off || vcpu->arch.pause) { + if (kvm_arm_vcpu_stopped(vcpu) || vcpu->arch.pause) { /* Awaken to handle a signal, request we sleep again later. */ kvm_make_request(KVM_REQ_SLEEP, vcpu); } @@ -639,6 +657,7 @@ void kvm_vcpu_wfi(struct kvm_vcpu *vcpu) preempt_enable(); kvm_vcpu_halt(vcpu); + vcpu->arch.flags &= ~KVM_ARM64_WFIT; kvm_clear_request(KVM_REQ_UNHALT, vcpu); preempt_disable(); @@ -646,11 +665,53 @@ void kvm_vcpu_wfi(struct kvm_vcpu *vcpu) preempt_enable(); } -static void check_vcpu_requests(struct kvm_vcpu *vcpu) +static int kvm_vcpu_suspend(struct kvm_vcpu *vcpu) +{ + if (!kvm_arm_vcpu_suspended(vcpu)) + return 1; + + kvm_vcpu_wfi(vcpu); + + /* + * The suspend state is sticky; we do not leave it until userspace + * explicitly marks the vCPU as runnable. Request that we suspend again + * later. + */ + kvm_make_request(KVM_REQ_SUSPEND, vcpu); + + /* + * Check to make sure the vCPU is actually runnable. If so, exit to + * userspace informing it of the wakeup condition. + */ + if (kvm_arch_vcpu_runnable(vcpu)) { + memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event)); + vcpu->run->system_event.type = KVM_SYSTEM_EVENT_WAKEUP; + vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; + return 0; + } + + /* + * Otherwise, we were unblocked to process a different event, such as a + * pending signal. Return 1 and allow kvm_arch_vcpu_ioctl_run() to + * process the event. + */ + return 1; +} + +/** + * check_vcpu_requests - check and handle pending vCPU requests + * @vcpu: the VCPU pointer + * + * Return: 1 if we should enter the guest + * 0 if we should exit to userspace + * < 0 if we should exit to userspace, where the return value indicates + * an error + */ +static int check_vcpu_requests(struct kvm_vcpu *vcpu) { if (kvm_request_pending(vcpu)) { if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) - vcpu_req_sleep(vcpu); + kvm_vcpu_sleep(vcpu); if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu)) kvm_reset_vcpu(vcpu); @@ -675,7 +736,12 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu) if (kvm_check_request(KVM_REQ_RELOAD_PMU, vcpu)) kvm_pmu_handle_pmcr(vcpu, __vcpu_sys_reg(vcpu, PMCR_EL0)); + + if (kvm_check_request(KVM_REQ_SUSPEND, vcpu)) + return kvm_vcpu_suspend(vcpu); } + + return 1; } static bool vcpu_mode_is_bad_32bit(struct kvm_vcpu *vcpu) @@ -792,7 +858,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) if (!ret) ret = 1; - check_vcpu_requests(vcpu); + if (ret > 0) + ret = check_vcpu_requests(vcpu); /* * Preparing the interrupts to be injected also @@ -816,6 +883,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) kvm_vgic_flush_hwstate(vcpu); + kvm_pmu_update_vcpu_events(vcpu); + /* * Ensure we set mode to IN_GUEST_MODE after we disable * interrupts and before the final VCPU requests check. @@ -1125,9 +1194,9 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, * Handle the "start in power-off" case. */ if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) - vcpu_power_off(vcpu); + kvm_arm_vcpu_power_off(vcpu); else - vcpu->arch.power_off = false; + vcpu->arch.mp_state.mp_state = KVM_MP_STATE_RUNNABLE; return 0; } @@ -1485,7 +1554,6 @@ static void cpu_prepare_hyp_mode(int cpu) tcr |= (idmap_t0sz & GENMASK(TCR_TxSZ_WIDTH - 1, 0)) << TCR_T0SZ_OFFSET; params->tcr_el2 = tcr; - params->stack_hyp_va = kern_hyp_va(per_cpu(kvm_arm_hyp_stack_page, cpu) + PAGE_SIZE); params->pgd_pa = kvm_mmu_get_httbr(); if (is_protected_kvm_enabled()) params->hcr_el2 = HCR_HOST_NVHE_PROTECTED_FLAGS; @@ -1763,8 +1831,6 @@ static int init_subsystems(void) kvm_register_perf_callbacks(NULL); - kvm_sys_reg_table_init(); - out: if (err || !is_protected_kvm_enabled()) on_each_cpu(_kvm_arch_hardware_disable, NULL, 1); @@ -1935,14 +2001,46 @@ static int init_hyp_mode(void) * Map the Hyp stack pages */ for_each_possible_cpu(cpu) { + struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu); char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu); - err = create_hyp_mappings(stack_page, stack_page + PAGE_SIZE, - PAGE_HYP); + unsigned long hyp_addr; + + /* + * Allocate a contiguous HYP private VA range for the stack + * and guard page. The allocation is also aligned based on + * the order of its size. + */ + err = hyp_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr); + if (err) { + kvm_err("Cannot allocate hyp stack guard page\n"); + goto out_err; + } + /* + * Since the stack grows downwards, map the stack to the page + * at the higher address and leave the lower guard page + * unbacked. + * + * Any valid stack address now has the PAGE_SHIFT bit as 1 + * and addresses corresponding to the guard page have the + * PAGE_SHIFT bit as 0 - this is used for overflow detection. + */ + err = __create_hyp_mappings(hyp_addr + PAGE_SIZE, PAGE_SIZE, + __pa(stack_page), PAGE_HYP); if (err) { kvm_err("Cannot map hyp stack\n"); goto out_err; } + + /* + * Save the stack PA in nvhe_init_params. This will be needed + * to recreate the stack mapping in protected nVHE mode. + * __hyp_pa() won't do the right thing there, since the stack + * has been mapped in the flexible private VA space. + */ + params->stack_pa = __pa(stack_page); + + params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE); } for_each_possible_cpu(cpu) { @@ -2091,6 +2189,12 @@ int kvm_arch_init(void *opaque) return -ENODEV; } + err = kvm_sys_reg_table_init(); + if (err) { + kvm_info("Error initializing system register tables"); + return err; + } + in_hyp_mode = is_kernel_in_hyp_mode(); if (cpus_have_final_cap(ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE) || |