From 0acc7239c20a8401b8968c2adace8f7c9b0295ae Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 27 Mar 2023 16:47:44 +0000 Subject: KVM: arm64: Avoid vcpu->mutex v. kvm->lock inversion in CPU_ON KVM/arm64 had the lock ordering backwards on vcpu->mutex and kvm->lock from the very beginning. One such example is the way vCPU resets are handled: the kvm->lock is acquired while handling a guest CPU_ON PSCI call. Add a dedicated lock to serialize writes to kvm_vcpu_arch::{mp_state, reset_state}. Promote all accessors of mp_state to {READ,WRITE}_ONCE() as readers do not acquire the mp_state_lock. While at it, plug yet another race by taking the mp_state_lock in the KVM_SET_MP_STATE ioctl handler. As changes to MP state are now guarded with a dedicated lock, drop the kvm->lock acquisition from the PSCI CPU_ON path. Similarly, move the reader of reset_state outside of the kvm->lock and instead protect it with the mp_state_lock. Note that writes to reset_state::reset have been demoted to regular stores as both readers and writers acquire the mp_state_lock. While the kvm->lock inversion still exists in kvm_reset_vcpu(), at least now PSCI CPU_ON no longer depends on it for serializing vCPU reset. Cc: stable@vger.kernel.org Tested-by: Jeremy Linton Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230327164747.2466958-2-oliver.upton@linux.dev --- arch/arm64/kvm/psci.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) (limited to 'arch/arm64/kvm/psci.c') diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c index 7fbc4c1b9df0..5767e6baa61a 100644 --- a/arch/arm64/kvm/psci.c +++ b/arch/arm64/kvm/psci.c @@ -62,6 +62,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) struct vcpu_reset_state *reset_state; struct kvm *kvm = source_vcpu->kvm; struct kvm_vcpu *vcpu = NULL; + int ret = PSCI_RET_SUCCESS; unsigned long cpu_id; cpu_id = smccc_get_arg1(source_vcpu); @@ -76,11 +77,15 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) */ if (!vcpu) return PSCI_RET_INVALID_PARAMS; + + spin_lock(&vcpu->arch.mp_state_lock); if (!kvm_arm_vcpu_stopped(vcpu)) { if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1) - return PSCI_RET_ALREADY_ON; + ret = PSCI_RET_ALREADY_ON; else - return PSCI_RET_INVALID_PARAMS; + ret = PSCI_RET_INVALID_PARAMS; + + goto out_unlock; } reset_state = &vcpu->arch.reset_state; @@ -96,7 +101,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) */ reset_state->r0 = smccc_get_arg3(source_vcpu); - WRITE_ONCE(reset_state->reset, true); + reset_state->reset = true; kvm_make_request(KVM_REQ_VCPU_RESET, vcpu); /* @@ -108,7 +113,9 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) vcpu->arch.mp_state.mp_state = KVM_MP_STATE_RUNNABLE; kvm_vcpu_wake_up(vcpu); - return PSCI_RET_SUCCESS; +out_unlock: + spin_unlock(&vcpu->arch.mp_state_lock); + return ret; } static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu) @@ -168,8 +175,11 @@ static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type, u64 flags) * after this call is handled and before the VCPUs have been * re-initialized. */ - kvm_for_each_vcpu(i, tmp, vcpu->kvm) - tmp->arch.mp_state.mp_state = KVM_MP_STATE_STOPPED; + kvm_for_each_vcpu(i, tmp, vcpu->kvm) { + spin_lock(&tmp->arch.mp_state_lock); + WRITE_ONCE(tmp->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED); + spin_unlock(&tmp->arch.mp_state_lock); + } kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP); memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event)); @@ -229,7 +239,6 @@ static unsigned long kvm_psci_check_allowed_function(struct kvm_vcpu *vcpu, u32 static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) { - struct kvm *kvm = vcpu->kvm; u32 psci_fn = smccc_get_function(vcpu); unsigned long val; int ret = 1; @@ -254,9 +263,7 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) kvm_psci_narrow_to_32bit(vcpu); fallthrough; case PSCI_0_2_FN64_CPU_ON: - mutex_lock(&kvm->lock); val = kvm_psci_vcpu_on(vcpu); - mutex_unlock(&kvm->lock); break; case PSCI_0_2_FN_AFFINITY_INFO: kvm_psci_narrow_to_32bit(vcpu); @@ -395,7 +402,6 @@ static int kvm_psci_1_x_call(struct kvm_vcpu *vcpu, u32 minor) static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) { - struct kvm *kvm = vcpu->kvm; u32 psci_fn = smccc_get_function(vcpu); unsigned long val; @@ -405,9 +411,7 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) val = PSCI_RET_SUCCESS; break; case KVM_PSCI_FN_CPU_ON: - mutex_lock(&kvm->lock); val = kvm_psci_vcpu_on(vcpu); - mutex_unlock(&kvm->lock); break; default: val = PSCI_RET_NOT_SUPPORTED; -- cgit v1.2.3