summaryrefslogtreecommitdiffstats
path: root/virt/kvm
diff options
context:
space:
mode:
authorIsaku Yamahata <isaku.yamahata@intel.com>2022-12-01 00:09:28 +0100
committerPaolo Bonzini <pbonzini@redhat.com>2022-12-29 21:48:34 +0100
commit0bf50497f03b3d892c470c7d1a10a3e9c3c95821 (patch)
treede5da829c15886daf80443c3e80235b0278a5465 /virt/kvm
parentKVM: Ensure CPU is stable during low level hardware enable/disable (diff)
downloadlinux-0bf50497f03b3d892c470c7d1a10a3e9c3c95821.tar.xz
linux-0bf50497f03b3d892c470c7d1a10a3e9c3c95821.zip
KVM: Drop kvm_count_lock and instead protect kvm_usage_count with kvm_lock
Drop kvm_count_lock and instead protect kvm_usage_count with kvm_lock now that KVM hooks CPU hotplug during the ONLINE phase, which can sleep. Previously, KVM hooked the STARTING phase, which is not allowed to sleep and thus could not take kvm_lock (a mutex). This effectively allows the task that's initiating hardware enabling/disabling to preempted and/or migrated. Note, the Documentation/virt/kvm/locking.rst statement that kvm_count_lock is "raw" because hardware enabling/disabling needs to be atomic with respect to migration is wrong on multiple fronts. First, while regular spinlocks can be preempted, the task holding the lock cannot be migrated. Second, preventing migration is not required. on_each_cpu() disables preemption, which ensures that cpus_hardware_enabled correctly reflects hardware state. The task may be preempted/migrated between bumping kvm_usage_count and invoking on_each_cpu(), but that's perfectly ok as kvm_usage_count is still protected, e.g. other tasks that call hardware_enable_all() will be blocked until the preempted/migrated owner exits its critical section. KVM does have lockless accesses to kvm_usage_count in the suspend/resume flows, but those are safe because all tasks must be frozen prior to suspending CPUs, and a task cannot be frozen while it holds one or more locks (userspace tasks are frozen via a fake signal). Preemption doesn't need to be explicitly disabled in the hotplug path. The hotplug thread is pinned to the CPU that's being hotplugged, and KVM only cares about having a stable CPU, i.e. to ensure hardware is enabled on the correct CPU. Lockep, i.e. check_preemption_disabled(), plays nice with this state too, as is_percpu_thread() is true for the hotplug thread. Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com> Co-developed-by: Sean Christopherson <seanjc@google.com> Signed-off-by: Sean Christopherson <seanjc@google.com> Message-Id: <20221130230934.1014142-45-seanjc@google.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Diffstat (limited to 'virt/kvm')
-rw-r--r--virt/kvm/kvm_main.c36
1 files changed, 24 insertions, 12 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 0c3b9d6fa1b1..f6caee790dc8 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -100,7 +100,6 @@ EXPORT_SYMBOL_GPL(halt_poll_ns_shrink);
*/
DEFINE_MUTEX(kvm_lock);
-static DEFINE_RAW_SPINLOCK(kvm_count_lock);
LIST_HEAD(vm_list);
static cpumask_var_t cpus_hardware_enabled;
@@ -5123,17 +5122,18 @@ static int kvm_online_cpu(unsigned int cpu)
* be enabled. Otherwise running VMs would encounter unrecoverable
* errors when scheduled to this CPU.
*/
- raw_spin_lock(&kvm_count_lock);
+ mutex_lock(&kvm_lock);
if (kvm_usage_count) {
WARN_ON_ONCE(atomic_read(&hardware_enable_failed));
hardware_enable_nolock(NULL);
+
if (atomic_read(&hardware_enable_failed)) {
atomic_set(&hardware_enable_failed, 0);
ret = -EIO;
}
}
- raw_spin_unlock(&kvm_count_lock);
+ mutex_unlock(&kvm_lock);
return ret;
}
@@ -5149,10 +5149,10 @@ static void hardware_disable_nolock(void *junk)
static int kvm_offline_cpu(unsigned int cpu)
{
- raw_spin_lock(&kvm_count_lock);
+ mutex_lock(&kvm_lock);
if (kvm_usage_count)
hardware_disable_nolock(NULL);
- raw_spin_unlock(&kvm_count_lock);
+ mutex_unlock(&kvm_lock);
return 0;
}
@@ -5168,9 +5168,9 @@ static void hardware_disable_all_nolock(void)
static void hardware_disable_all(void)
{
cpus_read_lock();
- raw_spin_lock(&kvm_count_lock);
+ mutex_lock(&kvm_lock);
hardware_disable_all_nolock();
- raw_spin_unlock(&kvm_count_lock);
+ mutex_unlock(&kvm_lock);
cpus_read_unlock();
}
@@ -5187,7 +5187,7 @@ static int hardware_enable_all(void)
* enable hardware multiple times.
*/
cpus_read_lock();
- raw_spin_lock(&kvm_count_lock);
+ mutex_lock(&kvm_lock);
kvm_usage_count++;
if (kvm_usage_count == 1) {
@@ -5200,7 +5200,7 @@ static int hardware_enable_all(void)
}
}
- raw_spin_unlock(&kvm_count_lock);
+ mutex_unlock(&kvm_lock);
cpus_read_unlock();
return r;
@@ -5806,6 +5806,17 @@ static void kvm_init_debug(void)
static int kvm_suspend(void)
{
+ /*
+ * Secondary CPUs and CPU hotplug are disabled across the suspend/resume
+ * callbacks, i.e. no need to acquire kvm_lock to ensure the usage count
+ * is stable. Assert that kvm_lock is not held to ensure the system
+ * isn't suspended while KVM is enabling hardware. Hardware enabling
+ * can be preempted, but the task cannot be frozen until it has dropped
+ * all locks (userspace tasks are frozen via a fake signal).
+ */
+ lockdep_assert_not_held(&kvm_lock);
+ lockdep_assert_irqs_disabled();
+
if (kvm_usage_count)
hardware_disable_nolock(NULL);
return 0;
@@ -5813,10 +5824,11 @@ static int kvm_suspend(void)
static void kvm_resume(void)
{
- if (kvm_usage_count) {
- lockdep_assert_not_held(&kvm_count_lock);
+ lockdep_assert_not_held(&kvm_lock);
+ lockdep_assert_irqs_disabled();
+
+ if (kvm_usage_count)
hardware_enable_nolock(NULL);
- }
}
static struct syscore_ops kvm_syscore_ops = {