summaryrefslogtreecommitdiffstats
path: root/arch/arm64/kvm/arm.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-05-26 23:20:14 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2022-05-26 23:20:14 +0200
commitbf9095424d027e942e1d1ee74977e17b7df8e455 (patch)
tree57659cf68b7df09005bc5ada4d315d66472cebf3 /arch/arm64/kvm/arm.c
parentMerge tag 'mm-stable-2022-05-25' of git://git.kernel.org/pub/scm/linux/kernel... (diff)
parentKVM: x86: Fix the intel_pt PMI handling wrongly considered from guest (diff)
downloadlinux-bf9095424d027e942e1d1ee74977e17b7df8e455.tar.xz
linux-bf9095424d027e942e1d1ee74977e17b7df8e455.zip
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini: "S390: - ultravisor communication device driver - fix TEID on terminating storage key ops RISC-V: - Added Sv57x4 support for G-stage page table - Added range based local HFENCE functions - Added remote HFENCE functions based on VCPU requests - Added ISA extension registers in ONE_REG interface - Updated KVM RISC-V maintainers entry to cover selftests support ARM: - Add support for the ARMv8.6 WFxT extension - Guard pages for the EL2 stacks - Trap and emulate AArch32 ID registers to hide unsupported features - Ability to select and save/restore the set of hypercalls exposed to the guest - Support for PSCI-initiated suspend in collaboration with userspace - GICv3 register-based LPI invalidation support - Move host PMU event merging into the vcpu data structure - GICv3 ITS save/restore fixes - The usual set of small-scale cleanups and fixes x86: - New ioctls to get/set TSC frequency for a whole VM - Allow userspace to opt out of hypercall patching - Only do MSR filtering for MSRs accessed by rdmsr/wrmsr AMD SEV improvements: - Add KVM_EXIT_SHUTDOWN metadata for SEV-ES - V_TSC_AUX support Nested virtualization improvements for AMD: - Support for "nested nested" optimizations (nested vVMLOAD/VMSAVE, nested vGIF) - Allow AVIC to co-exist with a nested guest running - Fixes for LBR virtualizations when a nested guest is running, and nested LBR virtualization support - PAUSE filtering for nested hypervisors Guest support: - Decoupling of vcpu_is_preempted from PV spinlocks" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (199 commits) KVM: x86: Fix the intel_pt PMI handling wrongly considered from guest KVM: selftests: x86: Sync the new name of the test case to .gitignore Documentation: kvm: reorder ARM-specific section about KVM_SYSTEM_EVENT_SUSPEND x86, kvm: use correct GFP flags for preemption disabled KVM: LAPIC: Drop pending LAPIC timer injection when canceling the timer x86/kvm: Alloc dummy async #PF token outside of raw spinlock KVM: x86: avoid calling x86 emulator without a decoded instruction KVM: SVM: Use kzalloc for sev ioctl interfaces to prevent kernel data leak x86/fpu: KVM: Set the base guest FPU uABI size to sizeof(struct kvm_xsave) s390/uv_uapi: depend on CONFIG_S390 KVM: selftests: x86: Fix test failure on arch lbr capable platforms KVM: LAPIC: Trace LAPIC timer expiration on every vmentry KVM: s390: selftest: Test suppression indication on key prot exception KVM: s390: Don't indicate suppression on dirtying, failing memop selftests: drivers/s390x: Add uvdevice tests drivers/s390/char: Add Ultravisor io device MAINTAINERS: Update KVM RISC-V entry to cover selftests support RISC-V: KVM: Introduce ISA extension register RISC-V: KVM: Cleanup stale TLB entries when host CPU changes RISC-V: KVM: Add remote HFENCE functions based on VCPU requests ...
Diffstat (limited to 'arch/arm64/kvm/arm.c')
-rw-r--r--arch/arm64/kvm/arm.c164
1 files changed, 134 insertions, 30 deletions
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index cedc3ba2c098..400bb0fe2745 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -97,6 +97,10 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
}
mutex_unlock(&kvm->lock);
break;
+ case KVM_CAP_ARM_SYSTEM_SUSPEND:
+ r = 0;
+ set_bit(KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED, &kvm->arch.flags);
+ break;
default:
r = -EINVAL;
break;
@@ -153,9 +157,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm_vgic_early_init(kvm);
/* The maximum number of VCPUs is limited by the host's GIC model */
- kvm->arch.max_vcpus = kvm_arm_default_max_vcpus();
+ kvm->max_vcpus = kvm_arm_default_max_vcpus();
set_default_spectre(kvm);
+ kvm_arm_init_hypercalls(kvm);
return ret;
out_free_stage2_pgd:
@@ -210,6 +215,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_SET_GUEST_DEBUG:
case KVM_CAP_VCPU_ATTRIBUTES:
case KVM_CAP_PTP_KVM:
+ case KVM_CAP_ARM_SYSTEM_SUSPEND:
r = 1;
break;
case KVM_CAP_SET_GUEST_DEBUG2:
@@ -230,7 +236,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_MAX_VCPUS:
case KVM_CAP_MAX_VCPU_ID:
if (kvm)
- r = kvm->arch.max_vcpus;
+ r = kvm->max_vcpus;
else
r = kvm_arm_default_max_vcpus();
break;
@@ -306,7 +312,7 @@ int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
if (irqchip_in_kernel(kvm) && vgic_initialized(kvm))
return -EBUSY;
- if (id >= kvm->arch.max_vcpus)
+ if (id >= kvm->max_vcpus)
return -EINVAL;
return 0;
@@ -356,11 +362,6 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
kvm_arm_vcpu_destroy(vcpu);
}
-int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
-{
- return kvm_timer_is_pending(vcpu);
-}
-
void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
{
@@ -432,20 +433,34 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
vcpu->cpu = -1;
}
-static void vcpu_power_off(struct kvm_vcpu *vcpu)
+void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu)
{
- vcpu->arch.power_off = true;
+ vcpu->arch.mp_state.mp_state = KVM_MP_STATE_STOPPED;
kvm_make_request(KVM_REQ_SLEEP, vcpu);
kvm_vcpu_kick(vcpu);
}
+bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.mp_state.mp_state == KVM_MP_STATE_STOPPED;
+}
+
+static void kvm_arm_vcpu_suspend(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.mp_state.mp_state = KVM_MP_STATE_SUSPENDED;
+ kvm_make_request(KVM_REQ_SUSPEND, vcpu);
+ kvm_vcpu_kick(vcpu);
+}
+
+static bool kvm_arm_vcpu_suspended(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.mp_state.mp_state == KVM_MP_STATE_SUSPENDED;
+}
+
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
- if (vcpu->arch.power_off)
- mp_state->mp_state = KVM_MP_STATE_STOPPED;
- else
- mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
+ *mp_state = vcpu->arch.mp_state;
return 0;
}
@@ -457,10 +472,13 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
switch (mp_state->mp_state) {
case KVM_MP_STATE_RUNNABLE:
- vcpu->arch.power_off = false;
+ vcpu->arch.mp_state = *mp_state;
break;
case KVM_MP_STATE_STOPPED:
- vcpu_power_off(vcpu);
+ kvm_arm_vcpu_power_off(vcpu);
+ break;
+ case KVM_MP_STATE_SUSPENDED:
+ kvm_arm_vcpu_suspend(vcpu);
break;
default:
ret = -EINVAL;
@@ -480,7 +498,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
{
bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF);
return ((irq_lines || kvm_vgic_vcpu_pending_irq(v))
- && !v->arch.power_off && !v->arch.pause);
+ && !kvm_arm_vcpu_stopped(v) && !v->arch.pause);
}
bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
@@ -592,15 +610,15 @@ void kvm_arm_resume_guest(struct kvm *kvm)
}
}
-static void vcpu_req_sleep(struct kvm_vcpu *vcpu)
+static void kvm_vcpu_sleep(struct kvm_vcpu *vcpu)
{
struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
rcuwait_wait_event(wait,
- (!vcpu->arch.power_off) &&(!vcpu->arch.pause),
+ (!kvm_arm_vcpu_stopped(vcpu)) && (!vcpu->arch.pause),
TASK_INTERRUPTIBLE);
- if (vcpu->arch.power_off || vcpu->arch.pause) {
+ if (kvm_arm_vcpu_stopped(vcpu) || vcpu->arch.pause) {
/* Awaken to handle a signal, request we sleep again later. */
kvm_make_request(KVM_REQ_SLEEP, vcpu);
}
@@ -639,6 +657,7 @@ void kvm_vcpu_wfi(struct kvm_vcpu *vcpu)
preempt_enable();
kvm_vcpu_halt(vcpu);
+ vcpu->arch.flags &= ~KVM_ARM64_WFIT;
kvm_clear_request(KVM_REQ_UNHALT, vcpu);
preempt_disable();
@@ -646,11 +665,53 @@ void kvm_vcpu_wfi(struct kvm_vcpu *vcpu)
preempt_enable();
}
-static void check_vcpu_requests(struct kvm_vcpu *vcpu)
+static int kvm_vcpu_suspend(struct kvm_vcpu *vcpu)
+{
+ if (!kvm_arm_vcpu_suspended(vcpu))
+ return 1;
+
+ kvm_vcpu_wfi(vcpu);
+
+ /*
+ * The suspend state is sticky; we do not leave it until userspace
+ * explicitly marks the vCPU as runnable. Request that we suspend again
+ * later.
+ */
+ kvm_make_request(KVM_REQ_SUSPEND, vcpu);
+
+ /*
+ * Check to make sure the vCPU is actually runnable. If so, exit to
+ * userspace informing it of the wakeup condition.
+ */
+ if (kvm_arch_vcpu_runnable(vcpu)) {
+ memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event));
+ vcpu->run->system_event.type = KVM_SYSTEM_EVENT_WAKEUP;
+ vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
+ return 0;
+ }
+
+ /*
+ * Otherwise, we were unblocked to process a different event, such as a
+ * pending signal. Return 1 and allow kvm_arch_vcpu_ioctl_run() to
+ * process the event.
+ */
+ return 1;
+}
+
+/**
+ * check_vcpu_requests - check and handle pending vCPU requests
+ * @vcpu: the VCPU pointer
+ *
+ * Return: 1 if we should enter the guest
+ * 0 if we should exit to userspace
+ * < 0 if we should exit to userspace, where the return value indicates
+ * an error
+ */
+static int check_vcpu_requests(struct kvm_vcpu *vcpu)
{
if (kvm_request_pending(vcpu)) {
if (kvm_check_request(KVM_REQ_SLEEP, vcpu))
- vcpu_req_sleep(vcpu);
+ kvm_vcpu_sleep(vcpu);
if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu))
kvm_reset_vcpu(vcpu);
@@ -675,7 +736,12 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu)
if (kvm_check_request(KVM_REQ_RELOAD_PMU, vcpu))
kvm_pmu_handle_pmcr(vcpu,
__vcpu_sys_reg(vcpu, PMCR_EL0));
+
+ if (kvm_check_request(KVM_REQ_SUSPEND, vcpu))
+ return kvm_vcpu_suspend(vcpu);
}
+
+ return 1;
}
static bool vcpu_mode_is_bad_32bit(struct kvm_vcpu *vcpu)
@@ -792,7 +858,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
if (!ret)
ret = 1;
- check_vcpu_requests(vcpu);
+ if (ret > 0)
+ ret = check_vcpu_requests(vcpu);
/*
* Preparing the interrupts to be injected also
@@ -816,6 +883,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
kvm_vgic_flush_hwstate(vcpu);
+ kvm_pmu_update_vcpu_events(vcpu);
+
/*
* Ensure we set mode to IN_GUEST_MODE after we disable
* interrupts and before the final VCPU requests check.
@@ -1125,9 +1194,9 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
* Handle the "start in power-off" case.
*/
if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
- vcpu_power_off(vcpu);
+ kvm_arm_vcpu_power_off(vcpu);
else
- vcpu->arch.power_off = false;
+ vcpu->arch.mp_state.mp_state = KVM_MP_STATE_RUNNABLE;
return 0;
}
@@ -1485,7 +1554,6 @@ static void cpu_prepare_hyp_mode(int cpu)
tcr |= (idmap_t0sz & GENMASK(TCR_TxSZ_WIDTH - 1, 0)) << TCR_T0SZ_OFFSET;
params->tcr_el2 = tcr;
- params->stack_hyp_va = kern_hyp_va(per_cpu(kvm_arm_hyp_stack_page, cpu) + PAGE_SIZE);
params->pgd_pa = kvm_mmu_get_httbr();
if (is_protected_kvm_enabled())
params->hcr_el2 = HCR_HOST_NVHE_PROTECTED_FLAGS;
@@ -1763,8 +1831,6 @@ static int init_subsystems(void)
kvm_register_perf_callbacks(NULL);
- kvm_sys_reg_table_init();
-
out:
if (err || !is_protected_kvm_enabled())
on_each_cpu(_kvm_arch_hardware_disable, NULL, 1);
@@ -1935,14 +2001,46 @@ static int init_hyp_mode(void)
* Map the Hyp stack pages
*/
for_each_possible_cpu(cpu) {
+ struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu);
char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu);
- err = create_hyp_mappings(stack_page, stack_page + PAGE_SIZE,
- PAGE_HYP);
+ unsigned long hyp_addr;
+
+ /*
+ * Allocate a contiguous HYP private VA range for the stack
+ * and guard page. The allocation is also aligned based on
+ * the order of its size.
+ */
+ err = hyp_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr);
+ if (err) {
+ kvm_err("Cannot allocate hyp stack guard page\n");
+ goto out_err;
+ }
+ /*
+ * Since the stack grows downwards, map the stack to the page
+ * at the higher address and leave the lower guard page
+ * unbacked.
+ *
+ * Any valid stack address now has the PAGE_SHIFT bit as 1
+ * and addresses corresponding to the guard page have the
+ * PAGE_SHIFT bit as 0 - this is used for overflow detection.
+ */
+ err = __create_hyp_mappings(hyp_addr + PAGE_SIZE, PAGE_SIZE,
+ __pa(stack_page), PAGE_HYP);
if (err) {
kvm_err("Cannot map hyp stack\n");
goto out_err;
}
+
+ /*
+ * Save the stack PA in nvhe_init_params. This will be needed
+ * to recreate the stack mapping in protected nVHE mode.
+ * __hyp_pa() won't do the right thing there, since the stack
+ * has been mapped in the flexible private VA space.
+ */
+ params->stack_pa = __pa(stack_page);
+
+ params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE);
}
for_each_possible_cpu(cpu) {
@@ -2091,6 +2189,12 @@ int kvm_arch_init(void *opaque)
return -ENODEV;
}
+ err = kvm_sys_reg_table_init();
+ if (err) {
+ kvm_info("Error initializing system register tables");
+ return err;
+ }
+
in_hyp_mode = is_kernel_in_hyp_mode();
if (cpus_have_final_cap(ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE) ||