summaryrefslogtreecommitdiffstats
path: root/virt
diff options
context:
space:
mode:
Diffstat (limited to 'virt')
-rw-r--r--virt/kvm/Kconfig1
-rw-r--r--virt/kvm/arm/aarch32.c97
-rw-r--r--virt/kvm/arm/arch_timer.c452
-rw-r--r--virt/kvm/arm/arm.c79
-rw-r--r--virt/kvm/arm/hyp/timer-sr.c74
-rw-r--r--virt/kvm/arm/trace.h1
-rw-r--r--virt/kvm/arm/vgic/trace.h1
-rw-r--r--virt/kvm/arm/vgic/vgic-its.c272
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio-v2.c22
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio-v3.c17
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio.c44
-rw-r--r--virt/kvm/arm/vgic/vgic-v2.c5
-rw-r--r--virt/kvm/arm/vgic/vgic-v3.c12
-rw-r--r--virt/kvm/arm/vgic/vgic.c62
-rw-r--r--virt/kvm/arm/vgic/vgic.h3
-rw-r--r--virt/kvm/coalesced_mmio.c1
-rw-r--r--virt/kvm/coalesced_mmio.h1
-rw-r--r--virt/kvm/eventfd.c2
-rw-r--r--virt/kvm/kvm_main.c8
-rw-r--r--virt/kvm/vfio.h1
20 files changed, 745 insertions, 410 deletions
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index b0cc1a34db27..70691c08e1ed 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
# KVM common configuration items and defaults
config HAVE_KVM
diff --git a/virt/kvm/arm/aarch32.c b/virt/kvm/arm/aarch32.c
index 79c7c357804b..8bc479fa37e6 100644
--- a/virt/kvm/arm/aarch32.c
+++ b/virt/kvm/arm/aarch32.c
@@ -25,11 +25,6 @@
#include <asm/kvm_emulate.h>
#include <asm/kvm_hyp.h>
-#ifndef CONFIG_ARM64
-#define COMPAT_PSR_T_BIT PSR_T_BIT
-#define COMPAT_PSR_IT_MASK PSR_IT_MASK
-#endif
-
/*
* stolen from arch/arm/kernel/opcodes.c
*
@@ -150,3 +145,95 @@ void __hyp_text kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr)
*vcpu_pc(vcpu) += 4;
kvm_adjust_itstate(vcpu);
}
+
+/*
+ * Table taken from ARMv8 ARM DDI0487B-B, table G1-10.
+ */
+static const u8 return_offsets[8][2] = {
+ [0] = { 0, 0 }, /* Reset, unused */
+ [1] = { 4, 2 }, /* Undefined */
+ [2] = { 0, 0 }, /* SVC, unused */
+ [3] = { 4, 4 }, /* Prefetch abort */
+ [4] = { 8, 8 }, /* Data abort */
+ [5] = { 0, 0 }, /* HVC, unused */
+ [6] = { 4, 4 }, /* IRQ, unused */
+ [7] = { 4, 4 }, /* FIQ, unused */
+};
+
+static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset)
+{
+ unsigned long cpsr;
+ unsigned long new_spsr_value = *vcpu_cpsr(vcpu);
+ bool is_thumb = (new_spsr_value & COMPAT_PSR_T_BIT);
+ u32 return_offset = return_offsets[vect_offset >> 2][is_thumb];
+ u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR);
+
+ cpsr = mode | COMPAT_PSR_I_BIT;
+
+ if (sctlr & (1 << 30))
+ cpsr |= COMPAT_PSR_T_BIT;
+ if (sctlr & (1 << 25))
+ cpsr |= COMPAT_PSR_E_BIT;
+
+ *vcpu_cpsr(vcpu) = cpsr;
+
+ /* Note: These now point to the banked copies */
+ *vcpu_spsr(vcpu) = new_spsr_value;
+ *vcpu_reg32(vcpu, 14) = *vcpu_pc(vcpu) + return_offset;
+
+ /* Branch to exception vector */
+ if (sctlr & (1 << 13))
+ vect_offset += 0xffff0000;
+ else /* always have security exceptions */
+ vect_offset += vcpu_cp15(vcpu, c12_VBAR);
+
+ *vcpu_pc(vcpu) = vect_offset;
+}
+
+void kvm_inject_undef32(struct kvm_vcpu *vcpu)
+{
+ prepare_fault32(vcpu, COMPAT_PSR_MODE_UND, 4);
+}
+
+/*
+ * Modelled after TakeDataAbortException() and TakePrefetchAbortException
+ * pseudocode.
+ */
+static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt,
+ unsigned long addr)
+{
+ u32 vect_offset;
+ u32 *far, *fsr;
+ bool is_lpae;
+
+ if (is_pabt) {
+ vect_offset = 12;
+ far = &vcpu_cp15(vcpu, c6_IFAR);
+ fsr = &vcpu_cp15(vcpu, c5_IFSR);
+ } else { /* !iabt */
+ vect_offset = 16;
+ far = &vcpu_cp15(vcpu, c6_DFAR);
+ fsr = &vcpu_cp15(vcpu, c5_DFSR);
+ }
+
+ prepare_fault32(vcpu, COMPAT_PSR_MODE_ABT | COMPAT_PSR_A_BIT, vect_offset);
+
+ *far = addr;
+
+ /* Give the guest an IMPLEMENTATION DEFINED exception */
+ is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31);
+ if (is_lpae)
+ *fsr = 1 << 9 | 0x34;
+ else
+ *fsr = 0x14;
+}
+
+void kvm_inject_dabt32(struct kvm_vcpu *vcpu, unsigned long addr)
+{
+ inject_abt32(vcpu, false, addr);
+}
+
+void kvm_inject_pabt32(struct kvm_vcpu *vcpu, unsigned long addr)
+{
+ inject_abt32(vcpu, true, addr);
+}
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 8e89d63005c7..4db54ff08d9e 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -46,49 +46,68 @@ static const struct kvm_irq_level default_vtimer_irq = {
.level = 1,
};
-void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
-{
- vcpu_vtimer(vcpu)->active_cleared_last = false;
-}
+static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx);
+static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
+ struct arch_timer_context *timer_ctx);
+static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx);
u64 kvm_phys_timer_read(void)
{
return timecounter->cc->read(timecounter->cc);
}
-static bool timer_is_armed(struct arch_timer_cpu *timer)
+static void soft_timer_start(struct hrtimer *hrt, u64 ns)
{
- return timer->armed;
+ hrtimer_start(hrt, ktime_add_ns(ktime_get(), ns),
+ HRTIMER_MODE_ABS);
}
-/* timer_arm: as in "arm the timer", not as in ARM the company */
-static void timer_arm(struct arch_timer_cpu *timer, u64 ns)
+static void soft_timer_cancel(struct hrtimer *hrt, struct work_struct *work)
{
- timer->armed = true;
- hrtimer_start(&timer->timer, ktime_add_ns(ktime_get(), ns),
- HRTIMER_MODE_ABS);
+ hrtimer_cancel(hrt);
+ if (work)
+ cancel_work_sync(work);
}
-static void timer_disarm(struct arch_timer_cpu *timer)
+static void kvm_vtimer_update_mask_user(struct kvm_vcpu *vcpu)
{
- if (timer_is_armed(timer)) {
- hrtimer_cancel(&timer->timer);
- cancel_work_sync(&timer->expired);
- timer->armed = false;
- }
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+
+ /*
+ * When using a userspace irqchip with the architected timers, we must
+ * prevent continuously exiting from the guest, and therefore mask the
+ * physical interrupt by disabling it on the host interrupt controller
+ * when the virtual level is high, such that the guest can make
+ * forward progress. Once we detect the output level being
+ * de-asserted, we unmask the interrupt again so that we exit from the
+ * guest when the timer fires.
+ */
+ if (vtimer->irq.level)
+ disable_percpu_irq(host_vtimer_irq);
+ else
+ enable_percpu_irq(host_vtimer_irq, 0);
}
static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
{
struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
+ struct arch_timer_context *vtimer;
+
+ if (!vcpu) {
+ pr_warn_once("Spurious arch timer IRQ on non-VCPU thread\n");
+ return IRQ_NONE;
+ }
+ vtimer = vcpu_vtimer(vcpu);
+
+ if (!vtimer->irq.level) {
+ vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
+ if (kvm_timer_irq_can_fire(vtimer))
+ kvm_timer_update_irq(vcpu, true, vtimer);
+ }
+
+ if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
+ kvm_vtimer_update_mask_user(vcpu);
- /*
- * We disable the timer in the world switch and let it be
- * handled by kvm_timer_sync_hwstate(). Getting a timer
- * interrupt at this point is a sure sign of some major
- * breakage.
- */
- pr_warn("Unexpected interrupt %d on vcpu %p\n", irq, vcpu);
return IRQ_HANDLED;
}
@@ -158,13 +177,13 @@ static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu)
return min(min_virt, min_phys);
}
-static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt)
+static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt)
{
struct arch_timer_cpu *timer;
struct kvm_vcpu *vcpu;
u64 ns;
- timer = container_of(hrt, struct arch_timer_cpu, timer);
+ timer = container_of(hrt, struct arch_timer_cpu, bg_timer);
vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu);
/*
@@ -182,7 +201,33 @@ static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt)
return HRTIMER_NORESTART;
}
-bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
+static enum hrtimer_restart kvm_phys_timer_expire(struct hrtimer *hrt)
+{
+ struct arch_timer_context *ptimer;
+ struct arch_timer_cpu *timer;
+ struct kvm_vcpu *vcpu;
+ u64 ns;
+
+ timer = container_of(hrt, struct arch_timer_cpu, phys_timer);
+ vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu);
+ ptimer = vcpu_ptimer(vcpu);
+
+ /*
+ * Check that the timer has really expired from the guest's
+ * PoV (NTP on the host may have forced it to expire
+ * early). If not ready, schedule for a later time.
+ */
+ ns = kvm_timer_compute_delta(ptimer);
+ if (unlikely(ns)) {
+ hrtimer_forward_now(hrt, ns_to_ktime(ns));
+ return HRTIMER_RESTART;
+ }
+
+ kvm_timer_update_irq(vcpu, true, ptimer);
+ return HRTIMER_NORESTART;
+}
+
+static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
{
u64 cval, now;
@@ -195,6 +240,25 @@ bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
return cval <= now;
}
+bool kvm_timer_is_pending(struct kvm_vcpu *vcpu)
+{
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+ struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
+
+ if (vtimer->irq.level || ptimer->irq.level)
+ return true;
+
+ /*
+ * When this is called from withing the wait loop of kvm_vcpu_block(),
+ * the software view of the timer state is up to date (timer->loaded
+ * is false), and so we can simply check if the timer should fire now.
+ */
+ if (!vtimer->loaded && kvm_timer_should_fire(vtimer))
+ return true;
+
+ return kvm_timer_should_fire(ptimer);
+}
+
/*
* Reflect the timer output level into the kvm_run structure
*/
@@ -218,7 +282,6 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
{
int ret;
- timer_ctx->active_cleared_last = false;
timer_ctx->irq.level = new_level;
trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq,
timer_ctx->irq.level);
@@ -232,9 +295,29 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
}
}
+/* Schedule the background timer for the emulated timer. */
+static void phys_timer_emulate(struct kvm_vcpu *vcpu)
+{
+ struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+ struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
+
+ /*
+ * If the timer can fire now we have just raised the IRQ line and we
+ * don't need to have a soft timer scheduled for the future. If the
+ * timer cannot fire at all, then we also don't need a soft timer.
+ */
+ if (kvm_timer_should_fire(ptimer) || !kvm_timer_irq_can_fire(ptimer)) {
+ soft_timer_cancel(&timer->phys_timer, NULL);
+ return;
+ }
+
+ soft_timer_start(&timer->phys_timer, kvm_timer_compute_delta(ptimer));
+}
+
/*
- * Check if there was a change in the timer state (should we raise or lower
- * the line level to the GIC).
+ * Check if there was a change in the timer state, so that we should either
+ * raise or lower the line level to the GIC or schedule a background timer to
+ * emulate the physical timer.
*/
static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
{
@@ -242,12 +325,6 @@ static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
- /*
- * If userspace modified the timer registers via SET_ONE_REG before
- * the vgic was initialized, we mustn't set the vtimer->irq.level value
- * because the guest would never see the interrupt. Instead wait
- * until we call this function from kvm_timer_flush_hwstate.
- */
if (unlikely(!timer->enabled))
return;
@@ -256,22 +333,32 @@ static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
if (kvm_timer_should_fire(ptimer) != ptimer->irq.level)
kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer);
+
+ phys_timer_emulate(vcpu);
}
-/* Schedule the background timer for the emulated timer. */
-static void kvm_timer_emulate(struct kvm_vcpu *vcpu,
- struct arch_timer_context *timer_ctx)
+static void vtimer_save_state(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+ unsigned long flags;
- if (kvm_timer_should_fire(timer_ctx))
- return;
+ local_irq_save(flags);
- if (!kvm_timer_irq_can_fire(timer_ctx))
- return;
+ if (!vtimer->loaded)
+ goto out;
- /* The timer has not yet expired, schedule a background timer */
- timer_arm(timer, kvm_timer_compute_delta(timer_ctx));
+ if (timer->enabled) {
+ vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
+ vtimer->cnt_cval = read_sysreg_el0(cntv_cval);
+ }
+
+ /* Disable the virtual timer */
+ write_sysreg_el0(0, cntv_ctl);
+
+ vtimer->loaded = false;
+out:
+ local_irq_restore(flags);
}
/*
@@ -285,7 +372,7 @@ void kvm_timer_schedule(struct kvm_vcpu *vcpu)
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
- BUG_ON(timer_is_armed(timer));
+ vtimer_save_state(vcpu);
/*
* No need to schedule a background timer if any guest timer has
@@ -306,70 +393,97 @@ void kvm_timer_schedule(struct kvm_vcpu *vcpu)
* The guest timers have not yet expired, schedule a background timer.
* Set the earliest expiration time among the guest timers.
*/
- timer_arm(timer, kvm_timer_earliest_exp(vcpu));
+ soft_timer_start(&timer->bg_timer, kvm_timer_earliest_exp(vcpu));
+}
+
+static void vtimer_restore_state(struct kvm_vcpu *vcpu)
+{
+ struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+ unsigned long flags;
+
+ local_irq_save(flags);
+
+ if (vtimer->loaded)
+ goto out;
+
+ if (timer->enabled) {
+ write_sysreg_el0(vtimer->cnt_cval, cntv_cval);
+ isb();
+ write_sysreg_el0(vtimer->cnt_ctl, cntv_ctl);
+ }
+
+ vtimer->loaded = true;
+out:
+ local_irq_restore(flags);
}
void kvm_timer_unschedule(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
- timer_disarm(timer);
+
+ vtimer_restore_state(vcpu);
+
+ soft_timer_cancel(&timer->bg_timer, &timer->expired);
+}
+
+static void set_cntvoff(u64 cntvoff)
+{
+ u32 low = lower_32_bits(cntvoff);
+ u32 high = upper_32_bits(cntvoff);
+
+ /*
+ * Since kvm_call_hyp doesn't fully support the ARM PCS especially on
+ * 32-bit systems, but rather passes register by register shifted one
+ * place (we put the function address in r0/x0), we cannot simply pass
+ * a 64-bit value as an argument, but have to split the value in two
+ * 32-bit halves.
+ */
+ kvm_call_hyp(__kvm_timer_set_cntvoff, low, high);
}
-static void kvm_timer_flush_hwstate_vgic(struct kvm_vcpu *vcpu)
+static void kvm_timer_vcpu_load_vgic(struct kvm_vcpu *vcpu)
{
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
bool phys_active;
int ret;
- /*
- * If we enter the guest with the virtual input level to the VGIC
- * asserted, then we have already told the VGIC what we need to, and
- * we don't need to exit from the guest until the guest deactivates
- * the already injected interrupt, so therefore we should set the
- * hardware active state to prevent unnecessary exits from the guest.
- *
- * Also, if we enter the guest with the virtual timer interrupt active,
- * then it must be active on the physical distributor, because we set
- * the HW bit and the guest must be able to deactivate the virtual and
- * physical interrupt at the same time.
- *
- * Conversely, if the virtual input level is deasserted and the virtual
- * interrupt is not active, then always clear the hardware active state
- * to ensure that hardware interrupts from the timer triggers a guest
- * exit.
- */
phys_active = vtimer->irq.level ||
- kvm_vgic_map_is_active(vcpu, vtimer->irq.irq);
-
- /*
- * We want to avoid hitting the (re)distributor as much as
- * possible, as this is a potentially expensive MMIO access
- * (not to mention locks in the irq layer), and a solution for
- * this is to cache the "active" state in memory.
- *
- * Things to consider: we cannot cache an "active set" state,
- * because the HW can change this behind our back (it becomes
- * "clear" in the HW). We must then restrict the caching to
- * the "clear" state.
- *
- * The cache is invalidated on:
- * - vcpu put, indicating that the HW cannot be trusted to be
- * in a sane state on the next vcpu load,
- * - any change in the interrupt state
- *
- * Usage conditions:
- * - cached value is "active clear"
- * - value to be programmed is "active clear"
- */
- if (vtimer->active_cleared_last && !phys_active)
- return;
+ kvm_vgic_map_is_active(vcpu, vtimer->irq.irq);
ret = irq_set_irqchip_state(host_vtimer_irq,
IRQCHIP_STATE_ACTIVE,
phys_active);
WARN_ON(ret);
+}
- vtimer->active_cleared_last = !phys_active;
+static void kvm_timer_vcpu_load_user(struct kvm_vcpu *vcpu)
+{
+ kvm_vtimer_update_mask_user(vcpu);
+}
+
+void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
+{
+ struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+
+ if (unlikely(!timer->enabled))
+ return;
+
+ if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
+ kvm_timer_vcpu_load_user(vcpu);
+ else
+ kvm_timer_vcpu_load_vgic(vcpu);
+
+ set_cntvoff(vtimer->cntvoff);
+
+ vtimer_restore_state(vcpu);
+
+ if (has_vhe())
+ disable_el1_phys_timer_access();
+
+ /* Set the background timer for the physical timer emulation. */
+ phys_timer_emulate(vcpu);
}
bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
@@ -389,48 +503,60 @@ bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
ptimer->irq.level != plevel;
}
-static void kvm_timer_flush_hwstate_user(struct kvm_vcpu *vcpu)
+void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
{
- struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+ struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+
+ if (unlikely(!timer->enabled))
+ return;
+
+ if (has_vhe())
+ enable_el1_phys_timer_access();
+
+ vtimer_save_state(vcpu);
/*
- * To prevent continuously exiting from the guest, we mask the
- * physical interrupt such that the guest can make forward progress.
- * Once we detect the output level being deasserted, we unmask the
- * interrupt again so that we exit from the guest when the timer
- * fires.
- */
- if (vtimer->irq.level)
- disable_percpu_irq(host_vtimer_irq);
- else
- enable_percpu_irq(host_vtimer_irq, 0);
+ * Cancel the physical timer emulation, because the only case where we
+ * need it after a vcpu_put is in the context of a sleeping VCPU, and
+ * in that case we already factor in the deadline for the physical
+ * timer when scheduling the bg_timer.
+ *
+ * In any case, we re-schedule the hrtimer for the physical timer when
+ * coming back to the VCPU thread in kvm_timer_vcpu_load().
+ */
+ soft_timer_cancel(&timer->phys_timer, NULL);
+
+ /*
+ * The kernel may decide to run userspace after calling vcpu_put, so
+ * we reset cntvoff to 0 to ensure a consistent read between user
+ * accesses to the virtual counter and kernel access to the physical
+ * counter.
+ */
+ set_cntvoff(0);
}
-/**
- * kvm_timer_flush_hwstate - prepare timers before running the vcpu
- * @vcpu: The vcpu pointer
- *
- * Check if the virtual timer has expired while we were running in the host,
- * and inject an interrupt if that was the case, making sure the timer is
- * masked or disabled on the host so that we keep executing. Also schedule a
- * software timer for the physical timer if it is enabled.
- */
-void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
+static void unmask_vtimer_irq(struct kvm_vcpu *vcpu)
{
- struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
- if (unlikely(!timer->enabled))
+ if (unlikely(!irqchip_in_kernel(vcpu->kvm))) {
+ kvm_vtimer_update_mask_user(vcpu);
return;
+ }
- kvm_timer_update_state(vcpu);
-
- /* Set the background timer for the physical timer emulation. */
- kvm_timer_emulate(vcpu, vcpu_ptimer(vcpu));
-
- if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
- kvm_timer_flush_hwstate_user(vcpu);
- else
- kvm_timer_flush_hwstate_vgic(vcpu);
+ /*
+ * If the guest disabled the timer without acking the interrupt, then
+ * we must make sure the physical and virtual active states are in
+ * sync by deactivating the physical interrupt, because otherwise we
+ * wouldn't see the next timer interrupt in the host.
+ */
+ if (!kvm_vgic_map_is_active(vcpu, vtimer->irq.irq)) {
+ int ret;
+ ret = irq_set_irqchip_state(host_vtimer_irq,
+ IRQCHIP_STATE_ACTIVE,
+ false);
+ WARN_ON(ret);
+ }
}
/**
@@ -442,19 +568,21 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
*/
void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
{
- struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
-
- /*
- * This is to cancel the background timer for the physical timer
- * emulation if it is set.
- */
- timer_disarm(timer);
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
/*
- * The guest could have modified the timer registers or the timer
- * could have expired, update the timer state.
+ * If we entered the guest with the vtimer output asserted we have to
+ * check if the guest has modified the timer so that we should lower
+ * the line at this point.
*/
- kvm_timer_update_state(vcpu);
+ if (vtimer->irq.level) {
+ vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
+ vtimer->cnt_cval = read_sysreg_el0(cntv_cval);
+ if (!kvm_timer_should_fire(vtimer)) {
+ kvm_timer_update_irq(vcpu, false, vtimer);
+ unmask_vtimer_irq(vcpu);
+ }
+ }
}
int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
@@ -505,8 +633,11 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
vcpu_ptimer(vcpu)->cntvoff = 0;
INIT_WORK(&timer->expired, kvm_timer_inject_irq_work);
- hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
- timer->timer.function = kvm_timer_expire;
+ hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+ timer->bg_timer.function = kvm_bg_timer_expire;
+
+ hrtimer_init(&timer->phys_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+ timer->phys_timer.function = kvm_phys_timer_expire;
vtimer->irq.irq = default_vtimer_irq.irq;
ptimer->irq.irq = default_ptimer_irq.irq;
@@ -520,10 +651,11 @@ static void kvm_timer_init_interrupt(void *info)
int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
{
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+ struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
switch (regid) {
case KVM_REG_ARM_TIMER_CTL:
- vtimer->cnt_ctl = value;
+ vtimer->cnt_ctl = value & ~ARCH_TIMER_CTRL_IT_STAT;
break;
case KVM_REG_ARM_TIMER_CNT:
update_vtimer_cntvoff(vcpu, kvm_phys_timer_read() - value);
@@ -531,6 +663,13 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
case KVM_REG_ARM_TIMER_CVAL:
vtimer->cnt_cval = value;
break;
+ case KVM_REG_ARM_PTIMER_CTL:
+ ptimer->cnt_ctl = value & ~ARCH_TIMER_CTRL_IT_STAT;
+ break;
+ case KVM_REG_ARM_PTIMER_CVAL:
+ ptimer->cnt_cval = value;
+ break;
+
default:
return -1;
}
@@ -539,17 +678,38 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
return 0;
}
+static u64 read_timer_ctl(struct arch_timer_context *timer)
+{
+ /*
+ * Set ISTATUS bit if it's expired.
+ * Note that according to ARMv8 ARM Issue A.k, ISTATUS bit is
+ * UNKNOWN when ENABLE bit is 0, so we chose to set ISTATUS bit
+ * regardless of ENABLE bit for our implementation convenience.
+ */
+ if (!kvm_timer_compute_delta(timer))
+ return timer->cnt_ctl | ARCH_TIMER_CTRL_IT_STAT;
+ else
+ return timer->cnt_ctl;
+}
+
u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
{
+ struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
switch (regid) {
case KVM_REG_ARM_TIMER_CTL:
- return vtimer->cnt_ctl;
+ return read_timer_ctl(vtimer);
case KVM_REG_ARM_TIMER_CNT:
return kvm_phys_timer_read() - vtimer->cntvoff;
case KVM_REG_ARM_TIMER_CVAL:
return vtimer->cnt_cval;
+ case KVM_REG_ARM_PTIMER_CTL:
+ return read_timer_ctl(ptimer);
+ case KVM_REG_ARM_PTIMER_CVAL:
+ return ptimer->cnt_cval;
+ case KVM_REG_ARM_PTIMER_CNT:
+ return kvm_phys_timer_read();
}
return (u64)-1;
}
@@ -602,11 +762,20 @@ int kvm_timer_hyp_init(void)
return err;
}
+ err = irq_set_vcpu_affinity(host_vtimer_irq, kvm_get_running_vcpus());
+ if (err) {
+ kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
+ goto out_free_irq;
+ }
+
kvm_info("virtual timer IRQ%d\n", host_vtimer_irq);
cpuhp_setup_state(CPUHP_AP_KVM_ARM_TIMER_STARTING,
"kvm/arm/timer:starting", kvm_timer_starting_cpu,
kvm_timer_dying_cpu);
+ return 0;
+out_free_irq:
+ free_percpu_irq(host_vtimer_irq, kvm_get_running_vcpus());
return err;
}
@@ -615,7 +784,8 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
- timer_disarm(timer);
+ soft_timer_cancel(&timer->bg_timer, &timer->expired);
+ soft_timer_cancel(&timer->phys_timer, NULL);
kvm_vgic_unmap_phys_irq(vcpu, vtimer->irq.irq);
}
@@ -691,7 +861,11 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
return ret;
no_vgic:
+ preempt_disable();
timer->enabled = 1;
+ kvm_timer_vcpu_load_vgic(vcpu);
+ preempt_enable();
+
return 0;
}
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index b9f68e4add71..772bf74ac2e9 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -307,8 +307,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
{
- return kvm_timer_should_fire(vcpu_vtimer(vcpu)) ||
- kvm_timer_should_fire(vcpu_ptimer(vcpu));
+ return kvm_timer_is_pending(vcpu);
}
void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
@@ -354,18 +353,18 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state);
kvm_arm_set_running_vcpu(vcpu);
-
kvm_vgic_load(vcpu);
+ kvm_timer_vcpu_load(vcpu);
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
+ kvm_timer_vcpu_put(vcpu);
kvm_vgic_put(vcpu);
vcpu->cpu = -1;
kvm_arm_set_running_vcpu(NULL);
- kvm_timer_vcpu_put(vcpu);
}
static void vcpu_power_off(struct kvm_vcpu *vcpu)
@@ -652,13 +651,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
*/
preempt_disable();
- kvm_pmu_flush_hwstate(vcpu);
+ /* Flush FP/SIMD state that can't survive guest entry/exit */
+ kvm_fpsimd_flush_cpu_state();
- kvm_timer_flush_hwstate(vcpu);
- kvm_vgic_flush_hwstate(vcpu);
+ kvm_pmu_flush_hwstate(vcpu);
local_irq_disable();
+ kvm_vgic_flush_hwstate(vcpu);
+
/*
* If we have a singal pending, or need to notify a userspace
* irqchip about timer or PMU level changes, then we exit (and
@@ -683,10 +684,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) ||
kvm_request_pending(vcpu)) {
vcpu->mode = OUTSIDE_GUEST_MODE;
- local_irq_enable();
kvm_pmu_sync_hwstate(vcpu);
kvm_timer_sync_hwstate(vcpu);
kvm_vgic_sync_hwstate(vcpu);
+ local_irq_enable();
preempt_enable();
continue;
}
@@ -710,6 +711,27 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
kvm_arm_clear_debug(vcpu);
/*
+ * We must sync the PMU state before the vgic state so
+ * that the vgic can properly sample the updated state of the
+ * interrupt line.
+ */
+ kvm_pmu_sync_hwstate(vcpu);
+
+ /*
+ * Sync the vgic state before syncing the timer state because
+ * the timer code needs to know if the virtual timer
+ * interrupts are active.
+ */
+ kvm_vgic_sync_hwstate(vcpu);
+
+ /*
+ * Sync the timer hardware state before enabling interrupts as
+ * we don't want vtimer interrupts to race with syncing the
+ * timer virtual interrupt state.
+ */
+ kvm_timer_sync_hwstate(vcpu);
+
+ /*
* We may have taken a host interrupt in HYP mode (ie
* while executing the guest). This interrupt is still
* pending, as we haven't serviced it yet!
@@ -732,16 +754,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
guest_exit();
trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
- /*
- * We must sync the PMU and timer state before the vgic state so
- * that the vgic can properly sample the updated state of the
- * interrupt line.
- */
- kvm_pmu_sync_hwstate(vcpu);
- kvm_timer_sync_hwstate(vcpu);
-
- kvm_vgic_sync_hwstate(vcpu);
-
preempt_enable();
ret = handle_exit(vcpu, run, ret);
@@ -1326,21 +1338,12 @@ static void teardown_hyp_mode(void)
{
int cpu;
- if (is_kernel_in_hyp_mode())
- return;
-
free_hyp_pgds();
for_each_possible_cpu(cpu)
free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
hyp_cpu_pm_exit();
}
-static int init_vhe_mode(void)
-{
- kvm_info("VHE mode initialized successfully\n");
- return 0;
-}
-
/**
* Inits Hyp-mode on all online CPUs
*/
@@ -1421,8 +1424,6 @@ static int init_hyp_mode(void)
}
}
- kvm_info("Hyp mode initialized successfully\n");
-
return 0;
out_err:
@@ -1456,6 +1457,7 @@ int kvm_arch_init(void *opaque)
{
int err;
int ret, cpu;
+ bool in_hyp_mode;
if (!is_hyp_mode_available()) {
kvm_err("HYP mode not available\n");
@@ -1474,21 +1476,28 @@ int kvm_arch_init(void *opaque)
if (err)
return err;
- if (is_kernel_in_hyp_mode())
- err = init_vhe_mode();
- else
+ in_hyp_mode = is_kernel_in_hyp_mode();
+
+ if (!in_hyp_mode) {
err = init_hyp_mode();
- if (err)
- goto out_err;
+ if (err)
+ goto out_err;
+ }
err = init_subsystems();
if (err)
goto out_hyp;
+ if (in_hyp_mode)
+ kvm_info("VHE mode initialized successfully\n");
+ else
+ kvm_info("Hyp mode initialized successfully\n");
+
return 0;
out_hyp:
- teardown_hyp_mode();
+ if (!in_hyp_mode)
+ teardown_hyp_mode();
out_err:
teardown_common_resources();
return err;
diff --git a/virt/kvm/arm/hyp/timer-sr.c b/virt/kvm/arm/hyp/timer-sr.c
index 4734915ab71f..f39861639f08 100644
--- a/virt/kvm/arm/hyp/timer-sr.c
+++ b/virt/kvm/arm/hyp/timer-sr.c
@@ -21,58 +21,48 @@
#include <asm/kvm_hyp.h>
-/* vcpu is already in the HYP VA space */
-void __hyp_text __timer_save_state(struct kvm_vcpu *vcpu)
+void __hyp_text __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high)
+{
+ u64 cntvoff = (u64)cntvoff_high << 32 | cntvoff_low;
+ write_sysreg(cntvoff, cntvoff_el2);
+}
+
+void __hyp_text enable_el1_phys_timer_access(void)
{
- struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
- struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
u64 val;
- if (timer->enabled) {
- vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
- vtimer->cnt_cval = read_sysreg_el0(cntv_cval);
- }
+ /* Allow physical timer/counter access for the host */
+ val = read_sysreg(cnthctl_el2);
+ val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN;
+ write_sysreg(val, cnthctl_el2);
+}
- /* Disable the virtual timer */
- write_sysreg_el0(0, cntv_ctl);
+void __hyp_text disable_el1_phys_timer_access(void)
+{
+ u64 val;
/*
+ * Disallow physical timer access for the guest
+ * Physical counter access is allowed
+ */
+ val = read_sysreg(cnthctl_el2);
+ val &= ~CNTHCTL_EL1PCEN;
+ val |= CNTHCTL_EL1PCTEN;
+ write_sysreg(val, cnthctl_el2);
+}
+
+void __hyp_text __timer_disable_traps(struct kvm_vcpu *vcpu)
+{
+ /*
* We don't need to do this for VHE since the host kernel runs in EL2
* with HCR_EL2.TGE ==1, which makes those bits have no impact.
*/
- if (!has_vhe()) {
- /* Allow physical timer/counter access for the host */
- val = read_sysreg(cnthctl_el2);
- val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN;
- write_sysreg(val, cnthctl_el2);
- }
-
- /* Clear cntvoff for the host */
- write_sysreg(0, cntvoff_el2);
+ if (!has_vhe())
+ enable_el1_phys_timer_access();
}
-void __hyp_text __timer_restore_state(struct kvm_vcpu *vcpu)
+void __hyp_text __timer_enable_traps(struct kvm_vcpu *vcpu)
{
- struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
- struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
- u64 val;
-
- /* Those bits are already configured at boot on VHE-system */
- if (!has_vhe()) {
- /*
- * Disallow physical timer access for the guest
- * Physical counter access is allowed
- */
- val = read_sysreg(cnthctl_el2);
- val &= ~CNTHCTL_EL1PCEN;
- val |= CNTHCTL_EL1PCTEN;
- write_sysreg(val, cnthctl_el2);
- }
-
- if (timer->enabled) {
- write_sysreg(vtimer->cntvoff, cntvoff_el2);
- write_sysreg_el0(vtimer->cnt_cval, cntv_cval);
- isb();
- write_sysreg_el0(vtimer->cnt_ctl, cntv_ctl);
- }
+ if (!has_vhe())
+ disable_el1_phys_timer_access();
}
diff --git a/virt/kvm/arm/trace.h b/virt/kvm/arm/trace.h
index f7dc5ddd6847..e53b596f483b 100644
--- a/virt/kvm/arm/trace.h
+++ b/virt/kvm/arm/trace.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_KVM_H
diff --git a/virt/kvm/arm/vgic/trace.h b/virt/kvm/arm/vgic/trace.h
index ed3229282888..55fed77a9f73 100644
--- a/virt/kvm/arm/vgic/trace.h
+++ b/virt/kvm/arm/vgic/trace.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#if !defined(_TRACE_VGIC_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_VGIC_H
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index f51c1e1b3f70..d2a99ab0ade7 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -278,6 +278,7 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
u64 propbase = GICR_PROPBASER_ADDRESS(kvm->arch.vgic.propbaser);
u8 prop;
int ret;
+ unsigned long flags;
ret = kvm_read_guest(kvm, propbase + irq->intid - GIC_LPI_OFFSET,
&prop, 1);
@@ -285,15 +286,15 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
if (ret)
return ret;
- spin_lock(&irq->irq_lock);
+ spin_lock_irqsave(&irq->irq_lock, flags);
if (!filter_vcpu || filter_vcpu == irq->target_vcpu) {
irq->priority = LPI_PROP_PRIORITY(prop);
irq->enabled = LPI_PROP_ENABLE_BIT(prop);
- vgic_queue_irq_unlock(kvm, irq);
+ vgic_queue_irq_unlock(kvm, irq, flags);
} else {
- spin_unlock(&irq->irq_lock);
+ spin_unlock_irqrestore(&irq->irq_lock, flags);
}
return 0;
@@ -393,6 +394,7 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu)
int ret = 0;
u32 *intids;
int nr_irqs, i;
+ unsigned long flags;
nr_irqs = vgic_copy_lpi_list(vcpu, &intids);
if (nr_irqs < 0)
@@ -420,9 +422,9 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu)
}
irq = vgic_get_irq(vcpu->kvm, NULL, intids[i]);
- spin_lock(&irq->irq_lock);
+ spin_lock_irqsave(&irq->irq_lock, flags);
irq->pending_latch = pendmask & (1U << bit_nr);
- vgic_queue_irq_unlock(vcpu->kvm, irq);
+ vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
vgic_put_irq(vcpu->kvm, irq);
}
@@ -515,6 +517,7 @@ static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its,
{
struct kvm_vcpu *vcpu;
struct its_ite *ite;
+ unsigned long flags;
if (!its->enabled)
return -EBUSY;
@@ -530,9 +533,9 @@ static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its,
if (!vcpu->arch.vgic_cpu.lpis_enabled)
return -EBUSY;
- spin_lock(&ite->irq->irq_lock);
+ spin_lock_irqsave(&ite->irq->irq_lock, flags);
ite->irq->pending_latch = true;
- vgic_queue_irq_unlock(kvm, ite->irq);
+ vgic_queue_irq_unlock(kvm, ite->irq, flags);
return 0;
}
@@ -894,7 +897,7 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
}
/* Requires the its_lock to be held. */
-static void vgic_its_unmap_device(struct kvm *kvm, struct its_device *device)
+static void vgic_its_free_device(struct kvm *kvm, struct its_device *device)
{
struct its_ite *ite, *temp;
@@ -910,6 +913,24 @@ static void vgic_its_unmap_device(struct kvm *kvm, struct its_device *device)
kfree(device);
}
+/* its lock must be held */
+static void vgic_its_free_device_list(struct kvm *kvm, struct vgic_its *its)
+{
+ struct its_device *cur, *temp;
+
+ list_for_each_entry_safe(cur, temp, &its->device_list, dev_list)
+ vgic_its_free_device(kvm, cur);
+}
+
+/* its lock must be held */
+static void vgic_its_free_collection_list(struct kvm *kvm, struct vgic_its *its)
+{
+ struct its_collection *cur, *temp;
+
+ list_for_each_entry_safe(cur, temp, &its->collection_list, coll_list)
+ vgic_its_free_collection(its, cur->collection_id);
+}
+
/* Must be called with its_lock mutex held */
static struct its_device *vgic_its_alloc_device(struct vgic_its *its,
u32 device_id, gpa_t itt_addr,
@@ -957,7 +978,7 @@ static int vgic_its_cmd_handle_mapd(struct kvm *kvm, struct vgic_its *its,
* by removing the mapping and re-establishing it.
*/
if (device)
- vgic_its_unmap_device(kvm, device);
+ vgic_its_free_device(kvm, device);
/*
* The spec does not say whether unmapping a not-mapped device
@@ -1410,7 +1431,7 @@ static void vgic_mmio_write_its_baser(struct kvm *kvm,
unsigned long val)
{
const struct vgic_its_abi *abi = vgic_its_get_abi(its);
- u64 entry_size, device_type;
+ u64 entry_size, table_type;
u64 reg, *regptr, clearbits = 0;
/* When GITS_CTLR.Enable is 1, we ignore write accesses. */
@@ -1421,12 +1442,12 @@ static void vgic_mmio_write_its_baser(struct kvm *kvm,
case 0:
regptr = &its->baser_device_table;
entry_size = abi->dte_esz;
- device_type = GITS_BASER_TYPE_DEVICE;
+ table_type = GITS_BASER_TYPE_DEVICE;
break;
case 1:
regptr = &its->baser_coll_table;
entry_size = abi->cte_esz;
- device_type = GITS_BASER_TYPE_COLLECTION;
+ table_type = GITS_BASER_TYPE_COLLECTION;
clearbits = GITS_BASER_INDIRECT;
break;
default:
@@ -1438,10 +1459,24 @@ static void vgic_mmio_write_its_baser(struct kvm *kvm,
reg &= ~clearbits;
reg |= (entry_size - 1) << GITS_BASER_ENTRY_SIZE_SHIFT;
- reg |= device_type << GITS_BASER_TYPE_SHIFT;
+ reg |= table_type << GITS_BASER_TYPE_SHIFT;
reg = vgic_sanitise_its_baser(reg);
*regptr = reg;
+
+ if (!(reg & GITS_BASER_VALID)) {
+ /* Take the its_lock to prevent a race with a save/restore */
+ mutex_lock(&its->its_lock);
+ switch (table_type) {
+ case GITS_BASER_TYPE_DEVICE:
+ vgic_its_free_device_list(kvm, its);
+ break;
+ case GITS_BASER_TYPE_COLLECTION:
+ vgic_its_free_collection_list(kvm, its);
+ break;
+ }
+ mutex_unlock(&its->its_lock);
+ }
}
static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu,
@@ -1466,6 +1501,16 @@ static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its,
{
mutex_lock(&its->cmd_lock);
+ /*
+ * It is UNPREDICTABLE to enable the ITS if any of the CBASER or
+ * device/collection BASER are invalid
+ */
+ if (!its->enabled && (val & GITS_CTLR_ENABLE) &&
+ (!(its->baser_device_table & GITS_BASER_VALID) ||
+ !(its->baser_coll_table & GITS_BASER_VALID) ||
+ !(its->cbaser & GITS_CBASER_VALID)))
+ goto out;
+
its->enabled = !!(val & GITS_CTLR_ENABLE);
/*
@@ -1474,6 +1519,7 @@ static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its,
*/
vgic_its_process_commands(kvm, its);
+out:
mutex_unlock(&its->cmd_lock);
}
@@ -1612,46 +1658,17 @@ static int vgic_its_create(struct kvm_device *dev, u32 type)
return vgic_its_set_abi(its, NR_ITS_ABIS - 1);
}
-static void vgic_its_free_device(struct kvm *kvm, struct its_device *dev)
-{
- struct its_ite *ite, *tmp;
-
- list_for_each_entry_safe(ite, tmp, &dev->itt_head, ite_list)
- its_free_ite(kvm, ite);
- list_del(&dev->dev_list);
- kfree(dev);
-}
-
static void vgic_its_destroy(struct kvm_device *kvm_dev)
{
struct kvm *kvm = kvm_dev->kvm;
struct vgic_its *its = kvm_dev->private;
- struct list_head *cur, *temp;
-
- /*
- * We may end up here without the lists ever having been initialized.
- * Check this and bail out early to avoid dereferencing a NULL pointer.
- */
- if (!its->device_list.next)
- return;
mutex_lock(&its->its_lock);
- list_for_each_safe(cur, temp, &its->device_list) {
- struct its_device *dev;
- dev = list_entry(cur, struct its_device, dev_list);
- vgic_its_free_device(kvm, dev);
- }
+ vgic_its_free_device_list(kvm, its);
+ vgic_its_free_collection_list(kvm, its);
- list_for_each_safe(cur, temp, &its->collection_list) {
- struct its_collection *coll;
-
- coll = list_entry(cur, struct its_collection, coll_list);
- list_del(cur);
- kfree(coll);
- }
mutex_unlock(&its->its_lock);
-
kfree(its);
}
@@ -1801,37 +1818,33 @@ typedef int (*entry_fn_t)(struct vgic_its *its, u32 id, void *entry,
static int scan_its_table(struct vgic_its *its, gpa_t base, int size, int esz,
int start_id, entry_fn_t fn, void *opaque)
{
- void *entry = kzalloc(esz, GFP_KERNEL);
struct kvm *kvm = its->dev->kvm;
unsigned long len = size;
int id = start_id;
gpa_t gpa = base;
+ char entry[esz];
int ret;
+ memset(entry, 0, esz);
+
while (len > 0) {
int next_offset;
size_t byte_offset;
ret = kvm_read_guest(kvm, gpa, entry, esz);
if (ret)
- goto out;
+ return ret;
next_offset = fn(its, id, entry, opaque);
- if (next_offset <= 0) {
- ret = next_offset;
- goto out;
- }
+ if (next_offset <= 0)
+ return next_offset;
byte_offset = next_offset * esz;
id += next_offset;
gpa += byte_offset;
len -= byte_offset;
}
- ret = 1;
-
-out:
- kfree(entry);
- return ret;
+ return 1;
}
/**
@@ -1940,6 +1953,14 @@ static int vgic_its_save_itt(struct vgic_its *its, struct its_device *device)
return 0;
}
+/**
+ * vgic_its_restore_itt - restore the ITT of a device
+ *
+ * @its: its handle
+ * @dev: device handle
+ *
+ * Return 0 on success, < 0 on error
+ */
static int vgic_its_restore_itt(struct vgic_its *its, struct its_device *dev)
{
const struct vgic_its_abi *abi = vgic_its_get_abi(its);
@@ -1951,6 +1972,10 @@ static int vgic_its_restore_itt(struct vgic_its *its, struct its_device *dev)
ret = scan_its_table(its, base, max_size, ite_esz, 0,
vgic_its_restore_ite, dev);
+ /* scan_its_table returns +1 if all ITEs are invalid */
+ if (ret > 0)
+ ret = 0;
+
return ret;
}
@@ -2048,11 +2073,12 @@ static int vgic_its_device_cmp(void *priv, struct list_head *a,
static int vgic_its_save_device_tables(struct vgic_its *its)
{
const struct vgic_its_abi *abi = vgic_its_get_abi(its);
+ u64 baser = its->baser_device_table;
struct its_device *dev;
int dte_esz = abi->dte_esz;
- u64 baser;
- baser = its->baser_device_table;
+ if (!(baser & GITS_BASER_VALID))
+ return 0;
list_sort(NULL, &its->device_list, vgic_its_device_cmp);
@@ -2107,10 +2133,7 @@ static int handle_l1_dte(struct vgic_its *its, u32 id, void *addr,
ret = scan_its_table(its, gpa, SZ_64K, dte_esz,
l2_start_id, vgic_its_restore_dte, NULL);
- if (ret <= 0)
- return ret;
-
- return 1;
+ return ret;
}
/**
@@ -2140,8 +2163,9 @@ static int vgic_its_restore_device_tables(struct vgic_its *its)
vgic_its_restore_dte, NULL);
}
+ /* scan_its_table returns +1 if all entries are invalid */
if (ret > 0)
- ret = -EINVAL;
+ ret = 0;
return ret;
}
@@ -2198,17 +2222,17 @@ static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz)
static int vgic_its_save_collection_table(struct vgic_its *its)
{
const struct vgic_its_abi *abi = vgic_its_get_abi(its);
+ u64 baser = its->baser_coll_table;
+ gpa_t gpa = BASER_ADDRESS(baser);
struct its_collection *collection;
u64 val;
- gpa_t gpa;
size_t max_size, filled = 0;
int ret, cte_esz = abi->cte_esz;
- gpa = BASER_ADDRESS(its->baser_coll_table);
- if (!gpa)
+ if (!(baser & GITS_BASER_VALID))
return 0;
- max_size = GITS_BASER_NR_PAGES(its->baser_coll_table) * SZ_64K;
+ max_size = GITS_BASER_NR_PAGES(baser) * SZ_64K;
list_for_each_entry(collection, &its->collection_list, coll_list) {
ret = vgic_its_save_cte(its, collection, gpa, cte_esz);
@@ -2239,17 +2263,18 @@ static int vgic_its_save_collection_table(struct vgic_its *its)
static int vgic_its_restore_collection_table(struct vgic_its *its)
{
const struct vgic_its_abi *abi = vgic_its_get_abi(its);
+ u64 baser = its->baser_coll_table;
int cte_esz = abi->cte_esz;
size_t max_size, read = 0;
gpa_t gpa;
int ret;
- if (!(its->baser_coll_table & GITS_BASER_VALID))
+ if (!(baser & GITS_BASER_VALID))
return 0;
- gpa = BASER_ADDRESS(its->baser_coll_table);
+ gpa = BASER_ADDRESS(baser);
- max_size = GITS_BASER_NR_PAGES(its->baser_coll_table) * SZ_64K;
+ max_size = GITS_BASER_NR_PAGES(baser) * SZ_64K;
while (read < max_size) {
ret = vgic_its_restore_cte(its, gpa, cte_esz);
@@ -2258,6 +2283,10 @@ static int vgic_its_restore_collection_table(struct vgic_its *its)
gpa += cte_esz;
read += cte_esz;
}
+
+ if (ret > 0)
+ return 0;
+
return ret;
}
@@ -2267,29 +2296,13 @@ static int vgic_its_restore_collection_table(struct vgic_its *its)
*/
static int vgic_its_save_tables_v0(struct vgic_its *its)
{
- struct kvm *kvm = its->dev->kvm;
int ret;
- mutex_lock(&kvm->lock);
- mutex_lock(&its->its_lock);
-
- if (!lock_all_vcpus(kvm)) {
- mutex_unlock(&its->its_lock);
- mutex_unlock(&kvm->lock);
- return -EBUSY;
- }
-
ret = vgic_its_save_device_tables(its);
if (ret)
- goto out;
-
- ret = vgic_its_save_collection_table(its);
+ return ret;
-out:
- unlock_all_vcpus(kvm);
- mutex_unlock(&its->its_lock);
- mutex_unlock(&kvm->lock);
- return ret;
+ return vgic_its_save_collection_table(its);
}
/**
@@ -2299,29 +2312,13 @@ out:
*/
static int vgic_its_restore_tables_v0(struct vgic_its *its)
{
- struct kvm *kvm = its->dev->kvm;
int ret;
- mutex_lock(&kvm->lock);
- mutex_lock(&its->its_lock);
-
- if (!lock_all_vcpus(kvm)) {
- mutex_unlock(&its->its_lock);
- mutex_unlock(&kvm->lock);
- return -EBUSY;
- }
-
ret = vgic_its_restore_collection_table(its);
if (ret)
- goto out;
-
- ret = vgic_its_restore_device_tables(its);
-out:
- unlock_all_vcpus(kvm);
- mutex_unlock(&its->its_lock);
- mutex_unlock(&kvm->lock);
+ return ret;
- return ret;
+ return vgic_its_restore_device_tables(its);
}
static int vgic_its_commit_v0(struct vgic_its *its)
@@ -2340,6 +2337,19 @@ static int vgic_its_commit_v0(struct vgic_its *its)
return 0;
}
+static void vgic_its_reset(struct kvm *kvm, struct vgic_its *its)
+{
+ /* We need to keep the ABI specific field values */
+ its->baser_coll_table &= ~GITS_BASER_VALID;
+ its->baser_device_table &= ~GITS_BASER_VALID;
+ its->cbaser = 0;
+ its->creadr = 0;
+ its->cwriter = 0;
+ its->enabled = 0;
+ vgic_its_free_device_list(kvm, its);
+ vgic_its_free_collection_list(kvm, its);
+}
+
static int vgic_its_has_attr(struct kvm_device *dev,
struct kvm_device_attr *attr)
{
@@ -2354,6 +2364,8 @@ static int vgic_its_has_attr(struct kvm_device *dev,
switch (attr->attr) {
case KVM_DEV_ARM_VGIC_CTRL_INIT:
return 0;
+ case KVM_DEV_ARM_ITS_CTRL_RESET:
+ return 0;
case KVM_DEV_ARM_ITS_SAVE_TABLES:
return 0;
case KVM_DEV_ARM_ITS_RESTORE_TABLES:
@@ -2366,6 +2378,41 @@ static int vgic_its_has_attr(struct kvm_device *dev,
return -ENXIO;
}
+static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr)
+{
+ const struct vgic_its_abi *abi = vgic_its_get_abi(its);
+ int ret = 0;
+
+ if (attr == KVM_DEV_ARM_VGIC_CTRL_INIT) /* Nothing to do */
+ return 0;
+
+ mutex_lock(&kvm->lock);
+ mutex_lock(&its->its_lock);
+
+ if (!lock_all_vcpus(kvm)) {
+ mutex_unlock(&its->its_lock);
+ mutex_unlock(&kvm->lock);
+ return -EBUSY;
+ }
+
+ switch (attr) {
+ case KVM_DEV_ARM_ITS_CTRL_RESET:
+ vgic_its_reset(kvm, its);
+ break;
+ case KVM_DEV_ARM_ITS_SAVE_TABLES:
+ ret = abi->save_tables(its);
+ break;
+ case KVM_DEV_ARM_ITS_RESTORE_TABLES:
+ ret = abi->restore_tables(its);
+ break;
+ }
+
+ unlock_all_vcpus(kvm);
+ mutex_unlock(&its->its_lock);
+ mutex_unlock(&kvm->lock);
+ return ret;
+}
+
static int vgic_its_set_attr(struct kvm_device *dev,
struct kvm_device_attr *attr)
{
@@ -2391,19 +2438,8 @@ static int vgic_its_set_attr(struct kvm_device *dev,
return vgic_register_its_iodev(dev->kvm, its, addr);
}
- case KVM_DEV_ARM_VGIC_GRP_CTRL: {
- const struct vgic_its_abi *abi = vgic_its_get_abi(its);
-
- switch (attr->attr) {
- case KVM_DEV_ARM_VGIC_CTRL_INIT:
- /* Nothing to do */
- return 0;
- case KVM_DEV_ARM_ITS_SAVE_TABLES:
- return abi->save_tables(its);
- case KVM_DEV_ARM_ITS_RESTORE_TABLES:
- return abi->restore_tables(its);
- }
- }
+ case KVM_DEV_ARM_VGIC_GRP_CTRL:
+ return vgic_its_ctrl(dev->kvm, its, attr->attr);
case KVM_DEV_ARM_VGIC_GRP_ITS_REGS: {
u64 __user *uaddr = (u64 __user *)(long)attr->addr;
u64 reg;
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c
index b3d4a10f09a1..e21e2f49b005 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v2.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c
@@ -74,6 +74,7 @@ static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu,
int mode = (val >> 24) & 0x03;
int c;
struct kvm_vcpu *vcpu;
+ unsigned long flags;
switch (mode) {
case 0x0: /* as specified by targets */
@@ -97,11 +98,11 @@ static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu,
irq = vgic_get_irq(source_vcpu->kvm, vcpu, intid);
- spin_lock(&irq->irq_lock);
+ spin_lock_irqsave(&irq->irq_lock, flags);
irq->pending_latch = true;
irq->source |= 1U << source_vcpu->vcpu_id;
- vgic_queue_irq_unlock(source_vcpu->kvm, irq);
+ vgic_queue_irq_unlock(source_vcpu->kvm, irq, flags);
vgic_put_irq(source_vcpu->kvm, irq);
}
}
@@ -131,6 +132,7 @@ static void vgic_mmio_write_target(struct kvm_vcpu *vcpu,
u32 intid = VGIC_ADDR_TO_INTID(addr, 8);
u8 cpu_mask = GENMASK(atomic_read(&vcpu->kvm->online_vcpus) - 1, 0);
int i;
+ unsigned long flags;
/* GICD_ITARGETSR[0-7] are read-only */
if (intid < VGIC_NR_PRIVATE_IRQS)
@@ -140,13 +142,13 @@ static void vgic_mmio_write_target(struct kvm_vcpu *vcpu,
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid + i);
int target;
- spin_lock(&irq->irq_lock);
+ spin_lock_irqsave(&irq->irq_lock, flags);
irq->targets = (val >> (i * 8)) & cpu_mask;
target = irq->targets ? __ffs(irq->targets) : 0;
irq->target_vcpu = kvm_get_vcpu(vcpu->kvm, target);
- spin_unlock(&irq->irq_lock);
+ spin_unlock_irqrestore(&irq->irq_lock, flags);
vgic_put_irq(vcpu->kvm, irq);
}
}
@@ -174,17 +176,18 @@ static void vgic_mmio_write_sgipendc(struct kvm_vcpu *vcpu,
{
u32 intid = addr & 0x0f;
int i;
+ unsigned long flags;
for (i = 0; i < len; i++) {
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
- spin_lock(&irq->irq_lock);
+ spin_lock_irqsave(&irq->irq_lock, flags);
irq->source &= ~((val >> (i * 8)) & 0xff);
if (!irq->source)
irq->pending_latch = false;
- spin_unlock(&irq->irq_lock);
+ spin_unlock_irqrestore(&irq->irq_lock, flags);
vgic_put_irq(vcpu->kvm, irq);
}
}
@@ -195,19 +198,20 @@ static void vgic_mmio_write_sgipends(struct kvm_vcpu *vcpu,
{
u32 intid = addr & 0x0f;
int i;
+ unsigned long flags;
for (i = 0; i < len; i++) {
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
- spin_lock(&irq->irq_lock);
+ spin_lock_irqsave(&irq->irq_lock, flags);
irq->source |= (val >> (i * 8)) & 0xff;
if (irq->source) {
irq->pending_latch = true;
- vgic_queue_irq_unlock(vcpu->kvm, irq);
+ vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
} else {
- spin_unlock(&irq->irq_lock);
+ spin_unlock_irqrestore(&irq->irq_lock, flags);
}
vgic_put_irq(vcpu->kvm, irq);
}
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
index 408ef06638fc..83786108829e 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
@@ -129,6 +129,7 @@ static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu,
{
int intid = VGIC_ADDR_TO_INTID(addr, 64);
struct vgic_irq *irq;
+ unsigned long flags;
/* The upper word is WI for us since we don't implement Aff3. */
if (addr & 4)
@@ -139,13 +140,13 @@ static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu,
if (!irq)
return;
- spin_lock(&irq->irq_lock);
+ spin_lock_irqsave(&irq->irq_lock, flags);
/* We only care about and preserve Aff0, Aff1 and Aff2. */
irq->mpidr = val & GENMASK(23, 0);
irq->target_vcpu = kvm_mpidr_to_vcpu(vcpu->kvm, irq->mpidr);
- spin_unlock(&irq->irq_lock);
+ spin_unlock_irqrestore(&irq->irq_lock, flags);
vgic_put_irq(vcpu->kvm, irq);
}
@@ -241,11 +242,12 @@ static void vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu,
{
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
int i;
+ unsigned long flags;
for (i = 0; i < len * 8; i++) {
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
- spin_lock(&irq->irq_lock);
+ spin_lock_irqsave(&irq->irq_lock, flags);
if (test_bit(i, &val)) {
/*
* pending_latch is set irrespective of irq type
@@ -253,10 +255,10 @@ static void vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu,
* restore irq config before pending info.
*/
irq->pending_latch = true;
- vgic_queue_irq_unlock(vcpu->kvm, irq);
+ vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
} else {
irq->pending_latch = false;
- spin_unlock(&irq->irq_lock);
+ spin_unlock_irqrestore(&irq->irq_lock, flags);
}
vgic_put_irq(vcpu->kvm, irq);
@@ -799,6 +801,7 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg)
int sgi, c;
int vcpu_id = vcpu->vcpu_id;
bool broadcast;
+ unsigned long flags;
sgi = (reg & ICC_SGI1R_SGI_ID_MASK) >> ICC_SGI1R_SGI_ID_SHIFT;
broadcast = reg & BIT_ULL(ICC_SGI1R_IRQ_ROUTING_MODE_BIT);
@@ -837,10 +840,10 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg)
irq = vgic_get_irq(vcpu->kvm, c_vcpu, sgi);
- spin_lock(&irq->irq_lock);
+ spin_lock_irqsave(&irq->irq_lock, flags);
irq->pending_latch = true;
- vgic_queue_irq_unlock(vcpu->kvm, irq);
+ vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
vgic_put_irq(vcpu->kvm, irq);
}
}
diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c
index c1e4bdd66131..deb51ee16a3d 100644
--- a/virt/kvm/arm/vgic/vgic-mmio.c
+++ b/virt/kvm/arm/vgic/vgic-mmio.c
@@ -69,13 +69,14 @@ void vgic_mmio_write_senable(struct kvm_vcpu *vcpu,
{
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
int i;
+ unsigned long flags;
for_each_set_bit(i, &val, len * 8) {
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
- spin_lock(&irq->irq_lock);
+ spin_lock_irqsave(&irq->irq_lock, flags);
irq->enabled = true;
- vgic_queue_irq_unlock(vcpu->kvm, irq);
+ vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
vgic_put_irq(vcpu->kvm, irq);
}
@@ -87,15 +88,16 @@ void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu,
{
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
int i;
+ unsigned long flags;
for_each_set_bit(i, &val, len * 8) {
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
- spin_lock(&irq->irq_lock);
+ spin_lock_irqsave(&irq->irq_lock, flags);
irq->enabled = false;
- spin_unlock(&irq->irq_lock);
+ spin_unlock_irqrestore(&irq->irq_lock, flags);
vgic_put_irq(vcpu->kvm, irq);
}
}
@@ -126,14 +128,15 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
{
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
int i;
+ unsigned long flags;
for_each_set_bit(i, &val, len * 8) {
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
- spin_lock(&irq->irq_lock);
+ spin_lock_irqsave(&irq->irq_lock, flags);
irq->pending_latch = true;
- vgic_queue_irq_unlock(vcpu->kvm, irq);
+ vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
vgic_put_irq(vcpu->kvm, irq);
}
}
@@ -144,15 +147,16 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
{
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
int i;
+ unsigned long flags;
for_each_set_bit(i, &val, len * 8) {
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
- spin_lock(&irq->irq_lock);
+ spin_lock_irqsave(&irq->irq_lock, flags);
irq->pending_latch = false;
- spin_unlock(&irq->irq_lock);
+ spin_unlock_irqrestore(&irq->irq_lock, flags);
vgic_put_irq(vcpu->kvm, irq);
}
}
@@ -181,7 +185,8 @@ static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
bool new_active_state)
{
struct kvm_vcpu *requester_vcpu;
- spin_lock(&irq->irq_lock);
+ unsigned long flags;
+ spin_lock_irqsave(&irq->irq_lock, flags);
/*
* The vcpu parameter here can mean multiple things depending on how
@@ -216,9 +221,9 @@ static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
irq->active = new_active_state;
if (new_active_state)
- vgic_queue_irq_unlock(vcpu->kvm, irq);
+ vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
else
- spin_unlock(&irq->irq_lock);
+ spin_unlock_irqrestore(&irq->irq_lock, flags);
}
/*
@@ -352,14 +357,15 @@ void vgic_mmio_write_priority(struct kvm_vcpu *vcpu,
{
u32 intid = VGIC_ADDR_TO_INTID(addr, 8);
int i;
+ unsigned long flags;
for (i = 0; i < len; i++) {
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
- spin_lock(&irq->irq_lock);
+ spin_lock_irqsave(&irq->irq_lock, flags);
/* Narrow the priority range to what we actually support */
irq->priority = (val >> (i * 8)) & GENMASK(7, 8 - VGIC_PRI_BITS);
- spin_unlock(&irq->irq_lock);
+ spin_unlock_irqrestore(&irq->irq_lock, flags);
vgic_put_irq(vcpu->kvm, irq);
}
@@ -390,6 +396,7 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu,
{
u32 intid = VGIC_ADDR_TO_INTID(addr, 2);
int i;
+ unsigned long flags;
for (i = 0; i < len * 4; i++) {
struct vgic_irq *irq;
@@ -404,14 +411,14 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu,
continue;
irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
- spin_lock(&irq->irq_lock);
+ spin_lock_irqsave(&irq->irq_lock, flags);
if (test_bit(i * 2 + 1, &val))
irq->config = VGIC_CONFIG_EDGE;
else
irq->config = VGIC_CONFIG_LEVEL;
- spin_unlock(&irq->irq_lock);
+ spin_unlock_irqrestore(&irq->irq_lock, flags);
vgic_put_irq(vcpu->kvm, irq);
}
}
@@ -443,6 +450,7 @@ void vgic_write_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid,
{
int i;
int nr_irqs = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
+ unsigned long flags;
for (i = 0; i < 32; i++) {
struct vgic_irq *irq;
@@ -459,12 +467,12 @@ void vgic_write_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid,
* restore irq config before line level.
*/
new_level = !!(val & (1U << i));
- spin_lock(&irq->irq_lock);
+ spin_lock_irqsave(&irq->irq_lock, flags);
irq->line_level = new_level;
if (new_level)
- vgic_queue_irq_unlock(vcpu->kvm, irq);
+ vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
else
- spin_unlock(&irq->irq_lock);
+ spin_unlock_irqrestore(&irq->irq_lock, flags);
vgic_put_irq(vcpu->kvm, irq);
}
diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c
index e4187e52bb26..80897102da26 100644
--- a/virt/kvm/arm/vgic/vgic-v2.c
+++ b/virt/kvm/arm/vgic/vgic-v2.c
@@ -62,6 +62,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_v2_cpu_if *cpuif = &vgic_cpu->vgic_v2;
int lr;
+ unsigned long flags;
cpuif->vgic_hcr &= ~GICH_HCR_UIE;
@@ -77,7 +78,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
irq = vgic_get_irq(vcpu->kvm, vcpu, intid);
- spin_lock(&irq->irq_lock);
+ spin_lock_irqsave(&irq->irq_lock, flags);
/* Always preserve the active bit */
irq->active = !!(val & GICH_LR_ACTIVE_BIT);
@@ -104,7 +105,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
irq->pending_latch = false;
}
- spin_unlock(&irq->irq_lock);
+ spin_unlock_irqrestore(&irq->irq_lock, flags);
vgic_put_irq(vcpu->kvm, irq);
}
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index 96ea597db0e7..863351c090d8 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -44,6 +44,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
struct vgic_v3_cpu_if *cpuif = &vgic_cpu->vgic_v3;
u32 model = vcpu->kvm->arch.vgic.vgic_model;
int lr;
+ unsigned long flags;
cpuif->vgic_hcr &= ~ICH_HCR_UIE;
@@ -66,7 +67,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
if (!irq) /* An LPI could have been unmapped. */
continue;
- spin_lock(&irq->irq_lock);
+ spin_lock_irqsave(&irq->irq_lock, flags);
/* Always preserve the active bit */
irq->active = !!(val & ICH_LR_ACTIVE_BIT);
@@ -94,7 +95,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
irq->pending_latch = false;
}
- spin_unlock(&irq->irq_lock);
+ spin_unlock_irqrestore(&irq->irq_lock, flags);
vgic_put_irq(vcpu->kvm, irq);
}
@@ -278,6 +279,7 @@ int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq)
bool status;
u8 val;
int ret;
+ unsigned long flags;
retry:
vcpu = irq->target_vcpu;
@@ -296,13 +298,13 @@ retry:
status = val & (1 << bit_nr);
- spin_lock(&irq->irq_lock);
+ spin_lock_irqsave(&irq->irq_lock, flags);
if (irq->target_vcpu != vcpu) {
- spin_unlock(&irq->irq_lock);
+ spin_unlock_irqrestore(&irq->irq_lock, flags);
goto retry;
}
irq->pending_latch = status;
- vgic_queue_irq_unlock(vcpu->kvm, irq);
+ vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
if (status) {
/* clear consumed data */
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
index fed717e07938..e54ef2fdf73d 100644
--- a/virt/kvm/arm/vgic/vgic.c
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -53,6 +53,10 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = {
* vcpuX->vcpu_id < vcpuY->vcpu_id:
* spin_lock(vcpuX->arch.vgic_cpu.ap_list_lock);
* spin_lock(vcpuY->arch.vgic_cpu.ap_list_lock);
+ *
+ * Since the VGIC must support injecting virtual interrupts from ISRs, we have
+ * to use the spin_lock_irqsave/spin_unlock_irqrestore versions of outer
+ * spinlocks for any lock that may be taken while injecting an interrupt.
*/
/*
@@ -261,7 +265,8 @@ static bool vgic_validate_injection(struct vgic_irq *irq, bool level, void *owne
* Needs to be entered with the IRQ lock already held, but will return
* with all locks dropped.
*/
-bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq)
+bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
+ unsigned long flags)
{
struct kvm_vcpu *vcpu;
@@ -279,7 +284,7 @@ retry:
* not need to be inserted into an ap_list and there is also
* no more work for us to do.
*/
- spin_unlock(&irq->irq_lock);
+ spin_unlock_irqrestore(&irq->irq_lock, flags);
/*
* We have to kick the VCPU here, because we could be
@@ -301,11 +306,11 @@ retry:
* We must unlock the irq lock to take the ap_list_lock where
* we are going to insert this new pending interrupt.
*/
- spin_unlock(&irq->irq_lock);
+ spin_unlock_irqrestore(&irq->irq_lock, flags);
/* someone can do stuff here, which we re-check below */
- spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock);
+ spin_lock_irqsave(&vcpu->arch.vgic_cpu.ap_list_lock, flags);
spin_lock(&irq->irq_lock);
/*
@@ -322,9 +327,9 @@ retry:
if (unlikely(irq->vcpu || vcpu != vgic_target_oracle(irq))) {
spin_unlock(&irq->irq_lock);
- spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock);
+ spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock, flags);
- spin_lock(&irq->irq_lock);
+ spin_lock_irqsave(&irq->irq_lock, flags);
goto retry;
}
@@ -337,7 +342,7 @@ retry:
irq->vcpu = vcpu;
spin_unlock(&irq->irq_lock);
- spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock);
+ spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock, flags);
kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
kvm_vcpu_kick(vcpu);
@@ -367,6 +372,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
{
struct kvm_vcpu *vcpu;
struct vgic_irq *irq;
+ unsigned long flags;
int ret;
trace_vgic_update_irq_pending(cpuid, intid, level);
@@ -383,11 +389,11 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
if (!irq)
return -EINVAL;
- spin_lock(&irq->irq_lock);
+ spin_lock_irqsave(&irq->irq_lock, flags);
if (!vgic_validate_injection(irq, level, owner)) {
/* Nothing to see here, move along... */
- spin_unlock(&irq->irq_lock);
+ spin_unlock_irqrestore(&irq->irq_lock, flags);
vgic_put_irq(kvm, irq);
return 0;
}
@@ -397,7 +403,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
else
irq->pending_latch = true;
- vgic_queue_irq_unlock(kvm, irq);
+ vgic_queue_irq_unlock(kvm, irq, flags);
vgic_put_irq(kvm, irq);
return 0;
@@ -406,15 +412,16 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq)
{
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq);
+ unsigned long flags;
BUG_ON(!irq);
- spin_lock(&irq->irq_lock);
+ spin_lock_irqsave(&irq->irq_lock, flags);
irq->hw = true;
irq->hwintid = phys_irq;
- spin_unlock(&irq->irq_lock);
+ spin_unlock_irqrestore(&irq->irq_lock, flags);
vgic_put_irq(vcpu->kvm, irq);
return 0;
@@ -423,6 +430,7 @@ int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq)
int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq)
{
struct vgic_irq *irq;
+ unsigned long flags;
if (!vgic_initialized(vcpu->kvm))
return -EAGAIN;
@@ -430,12 +438,12 @@ int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq)
irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq);
BUG_ON(!irq);
- spin_lock(&irq->irq_lock);
+ spin_lock_irqsave(&irq->irq_lock, flags);
irq->hw = false;
irq->hwintid = 0;
- spin_unlock(&irq->irq_lock);
+ spin_unlock_irqrestore(&irq->irq_lock, flags);
vgic_put_irq(vcpu->kvm, irq);
return 0;
@@ -486,9 +494,10 @@ static void vgic_prune_ap_list(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_irq *irq, *tmp;
+ unsigned long flags;
retry:
- spin_lock(&vgic_cpu->ap_list_lock);
+ spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags);
list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) {
struct kvm_vcpu *target_vcpu, *vcpuA, *vcpuB;
@@ -528,7 +537,7 @@ retry:
/* This interrupt looks like it has to be migrated. */
spin_unlock(&irq->irq_lock);
- spin_unlock(&vgic_cpu->ap_list_lock);
+ spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags);
/*
* Ensure locking order by always locking the smallest
@@ -542,7 +551,7 @@ retry:
vcpuB = vcpu;
}
- spin_lock(&vcpuA->arch.vgic_cpu.ap_list_lock);
+ spin_lock_irqsave(&vcpuA->arch.vgic_cpu.ap_list_lock, flags);
spin_lock_nested(&vcpuB->arch.vgic_cpu.ap_list_lock,
SINGLE_DEPTH_NESTING);
spin_lock(&irq->irq_lock);
@@ -566,11 +575,11 @@ retry:
spin_unlock(&irq->irq_lock);
spin_unlock(&vcpuB->arch.vgic_cpu.ap_list_lock);
- spin_unlock(&vcpuA->arch.vgic_cpu.ap_list_lock);
+ spin_unlock_irqrestore(&vcpuA->arch.vgic_cpu.ap_list_lock, flags);
goto retry;
}
- spin_unlock(&vgic_cpu->ap_list_lock);
+ spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags);
}
static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu)
@@ -703,6 +712,8 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head))
return;
+ DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
+
spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock);
vgic_flush_lr_state(vcpu);
spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock);
@@ -735,11 +746,12 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_irq *irq;
bool pending = false;
+ unsigned long flags;
if (!vcpu->kvm->arch.vgic.enabled)
return false;
- spin_lock(&vgic_cpu->ap_list_lock);
+ spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags);
list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
spin_lock(&irq->irq_lock);
@@ -750,7 +762,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
break;
}
- spin_unlock(&vgic_cpu->ap_list_lock);
+ spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags);
return pending;
}
@@ -776,10 +788,14 @@ bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq)
{
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq);
bool map_is_active;
+ unsigned long flags;
- spin_lock(&irq->irq_lock);
+ if (!vgic_initialized(vcpu->kvm))
+ return false;
+
+ spin_lock_irqsave(&irq->irq_lock, flags);
map_is_active = irq->hw && irq->active;
- spin_unlock(&irq->irq_lock);
+ spin_unlock_irqrestore(&irq->irq_lock, flags);
vgic_put_irq(vcpu->kvm, irq);
return map_is_active;
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index bf9ceab67c77..4f8aecb07ae6 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -140,7 +140,8 @@ vgic_get_mmio_region(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev,
struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
u32 intid);
void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq);
-bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq);
+bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
+ unsigned long flags);
void vgic_kick_vcpus(struct kvm *kvm);
int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr,
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
index 571c1ce37d15..9e65feb6fa58 100644
--- a/virt/kvm/coalesced_mmio.c
+++ b/virt/kvm/coalesced_mmio.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* KVM coalesced MMIO
*
diff --git a/virt/kvm/coalesced_mmio.h b/virt/kvm/coalesced_mmio.h
index 6bca74ca5331..36f84264ed25 100644
--- a/virt/kvm/coalesced_mmio.h
+++ b/virt/kvm/coalesced_mmio.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __KVM_COALESCED_MMIO_H__
#define __KVM_COALESCED_MMIO_H__
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index c608ab495282..f2ac53ab8243 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -565,8 +565,6 @@ kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
{
if (args->flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_RESAMPLE))
return -EINVAL;
- if (args->gsi >= KVM_MAX_IRQ_ROUTES)
- return -EINVAL;
if (args->flags & KVM_IRQFD_FLAG_DEASSIGN)
return kvm_irqfd_deassign(kvm, args);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 99bfe50a0589..f169ecc4f2e8 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -122,7 +122,6 @@ static void hardware_disable_all(void);
static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
-static void kvm_release_pfn_dirty(kvm_pfn_t pfn);
static void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot, gfn_t gfn);
__visible bool kvm_rebooting;
@@ -1679,11 +1678,12 @@ void kvm_release_page_dirty(struct page *page)
}
EXPORT_SYMBOL_GPL(kvm_release_page_dirty);
-static void kvm_release_pfn_dirty(kvm_pfn_t pfn)
+void kvm_release_pfn_dirty(kvm_pfn_t pfn)
{
kvm_set_pfn_dirty(pfn);
kvm_release_pfn_clean(pfn);
}
+EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty);
void kvm_set_pfn_dirty(kvm_pfn_t pfn)
{
@@ -2302,7 +2302,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
continue;
} else if (pass && i > last_boosted_vcpu)
break;
- if (!ACCESS_ONCE(vcpu->preempted))
+ if (!READ_ONCE(vcpu->preempted))
continue;
if (vcpu == me)
continue;
@@ -4007,7 +4007,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
if (!vcpu_align)
vcpu_align = __alignof__(struct kvm_vcpu);
kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, vcpu_align,
- 0, NULL);
+ SLAB_ACCOUNT, NULL);
if (!kvm_vcpu_cache) {
r = -ENOMEM;
goto out_free_3;
diff --git a/virt/kvm/vfio.h b/virt/kvm/vfio.h
index ab88c7dc0514..e130a4a03530 100644
--- a/virt/kvm/vfio.h
+++ b/virt/kvm/vfio.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __KVM_VFIO_H
#define __KVM_VFIO_H