summaryrefslogtreecommitdiffstats
path: root/virt/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'virt/kvm')
-rw-r--r--virt/kvm/arm/arch_timer.c15
-rw-r--r--virt/kvm/arm/arm.c51
-rw-r--r--virt/kvm/arm/mmu.c87
-rw-r--r--virt/kvm/arm/vgic/vgic-debug.c50
-rw-r--r--virt/kvm/arm/vgic/vgic-init.c24
-rw-r--r--virt/kvm/arm/vgic/vgic-its.c27
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio-v2.c66
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio-v3.c72
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio.c56
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio.h25
-rw-r--r--virt/kvm/arm/vgic/vgic-v2.c10
-rw-r--r--virt/kvm/arm/vgic/vgic-v3.c13
-rw-r--r--virt/kvm/arm/vgic/vgic.c19
-rw-r--r--virt/kvm/arm/vgic/vgic.h7
-rw-r--r--virt/kvm/kvm_main.c50
15 files changed, 443 insertions, 129 deletions
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index bd3d57f40f1b..17cecc96f735 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -295,9 +295,9 @@ static void phys_timer_emulate(struct kvm_vcpu *vcpu)
struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
/*
- * If the timer can fire now we have just raised the IRQ line and we
- * don't need to have a soft timer scheduled for the future. If the
- * timer cannot fire at all, then we also don't need a soft timer.
+ * If the timer can fire now, we don't need to have a soft timer
+ * scheduled for the future. If the timer cannot fire at all,
+ * then we also don't need a soft timer.
*/
if (kvm_timer_should_fire(ptimer) || !kvm_timer_irq_can_fire(ptimer)) {
soft_timer_cancel(&timer->phys_timer, NULL);
@@ -332,10 +332,10 @@ static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
level = kvm_timer_should_fire(vtimer);
kvm_timer_update_irq(vcpu, level, vtimer);
+ phys_timer_emulate(vcpu);
+
if (kvm_timer_should_fire(ptimer) != ptimer->irq.level)
kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer);
-
- phys_timer_emulate(vcpu);
}
static void vtimer_save_state(struct kvm_vcpu *vcpu)
@@ -487,6 +487,7 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+ struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
if (unlikely(!timer->enabled))
return;
@@ -502,6 +503,10 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
/* Set the background timer for the physical timer emulation. */
phys_timer_emulate(vcpu);
+
+ /* If the timer fired while we weren't running, inject it now */
+ if (kvm_timer_should_fire(ptimer) != ptimer->irq.level)
+ kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer);
}
bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 108250e4d376..c92053bc3f96 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -30,6 +30,7 @@
#include <linux/kvm.h>
#include <linux/kvm_irqfd.h>
#include <linux/irqbypass.h>
+#include <linux/sched/stat.h>
#include <trace/events/kvm.h>
#include <kvm/arm_pmu.h>
#include <kvm/arm_psci.h>
@@ -380,6 +381,11 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
kvm_timer_vcpu_load(vcpu);
kvm_vcpu_load_sysregs(vcpu);
kvm_arch_vcpu_load_fp(vcpu);
+
+ if (single_task_running())
+ vcpu_clear_wfe_traps(vcpu);
+ else
+ vcpu_set_wfe_traps(vcpu);
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -1044,6 +1050,32 @@ static int kvm_arm_vcpu_has_attr(struct kvm_vcpu *vcpu,
return ret;
}
+static int kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu_events *events)
+{
+ memset(events, 0, sizeof(*events));
+
+ return __kvm_arm_vcpu_get_events(vcpu, events);
+}
+
+static int kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu_events *events)
+{
+ int i;
+
+ /* check whether the reserved field is zero */
+ for (i = 0; i < ARRAY_SIZE(events->reserved); i++)
+ if (events->reserved[i])
+ return -EINVAL;
+
+ /* check whether the pad field is zero */
+ for (i = 0; i < ARRAY_SIZE(events->exception.pad); i++)
+ if (events->exception.pad[i])
+ return -EINVAL;
+
+ return __kvm_arm_vcpu_set_events(vcpu, events);
+}
+
long kvm_arch_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -1124,6 +1156,25 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = kvm_arm_vcpu_has_attr(vcpu, &attr);
break;
}
+ case KVM_GET_VCPU_EVENTS: {
+ struct kvm_vcpu_events events;
+
+ if (kvm_arm_vcpu_get_events(vcpu, &events))
+ return -EINVAL;
+
+ if (copy_to_user(argp, &events, sizeof(events)))
+ return -EFAULT;
+
+ return 0;
+ }
+ case KVM_SET_VCPU_EVENTS: {
+ struct kvm_vcpu_events events;
+
+ if (copy_from_user(&events, argp, sizeof(events)))
+ return -EFAULT;
+
+ return kvm_arm_vcpu_set_events(vcpu, &events);
+ }
default:
r = -EINVAL;
}
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 1d90d79706bd..91aaf73b00df 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -177,6 +177,35 @@ static void clear_stage2_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr
put_page(virt_to_page(pmd));
}
+static inline void kvm_set_pte(pte_t *ptep, pte_t new_pte)
+{
+ WRITE_ONCE(*ptep, new_pte);
+ dsb(ishst);
+}
+
+static inline void kvm_set_pmd(pmd_t *pmdp, pmd_t new_pmd)
+{
+ WRITE_ONCE(*pmdp, new_pmd);
+ dsb(ishst);
+}
+
+static inline void kvm_pmd_populate(pmd_t *pmdp, pte_t *ptep)
+{
+ kvm_set_pmd(pmdp, kvm_mk_pmd(ptep));
+}
+
+static inline void kvm_pud_populate(pud_t *pudp, pmd_t *pmdp)
+{
+ WRITE_ONCE(*pudp, kvm_mk_pud(pmdp));
+ dsb(ishst);
+}
+
+static inline void kvm_pgd_populate(pgd_t *pgdp, pud_t *pudp)
+{
+ WRITE_ONCE(*pgdp, kvm_mk_pgd(pudp));
+ dsb(ishst);
+}
+
/*
* Unmapping vs dcache management:
*
@@ -196,6 +225,10 @@ static void clear_stage2_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr
* This is why right after unmapping a page/section and invalidating
* the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure
* the IO subsystem will never hit in the cache.
+ *
+ * This is all avoided on systems that have ARM64_HAS_STAGE2_FWB, as
+ * we then fully enforce cacheability of RAM, no matter what the guest
+ * does.
*/
static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd,
phys_addr_t addr, phys_addr_t end)
@@ -576,7 +609,6 @@ static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start,
pte = pte_offset_kernel(pmd, addr);
kvm_set_pte(pte, pfn_pte(pfn, prot));
get_page(virt_to_page(pte));
- kvm_flush_dcache_to_poc(pte, sizeof(*pte));
pfn++;
} while (addr += PAGE_SIZE, addr != end);
}
@@ -601,9 +633,8 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
kvm_err("Cannot allocate Hyp pte\n");
return -ENOMEM;
}
- pmd_populate_kernel(NULL, pmd, pte);
+ kvm_pmd_populate(pmd, pte);
get_page(virt_to_page(pmd));
- kvm_flush_dcache_to_poc(pmd, sizeof(*pmd));
}
next = pmd_addr_end(addr, end);
@@ -634,9 +665,8 @@ static int create_hyp_pud_mappings(pgd_t *pgd, unsigned long start,
kvm_err("Cannot allocate Hyp pmd\n");
return -ENOMEM;
}
- pud_populate(NULL, pud, pmd);
+ kvm_pud_populate(pud, pmd);
get_page(virt_to_page(pud));
- kvm_flush_dcache_to_poc(pud, sizeof(*pud));
}
next = pud_addr_end(addr, end);
@@ -671,9 +701,8 @@ static int __create_hyp_mappings(pgd_t *pgdp, unsigned long ptrs_per_pgd,
err = -ENOMEM;
goto out;
}
- pgd_populate(NULL, pgd, pud);
+ kvm_pgd_populate(pgd, pud);
get_page(virt_to_page(pgd));
- kvm_flush_dcache_to_poc(pgd, sizeof(*pgd));
}
next = pgd_addr_end(addr, end);
@@ -1015,19 +1044,35 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
pmd = stage2_get_pmd(kvm, cache, addr);
VM_BUG_ON(!pmd);
- /*
- * Mapping in huge pages should only happen through a fault. If a
- * page is merged into a transparent huge page, the individual
- * subpages of that huge page should be unmapped through MMU
- * notifiers before we get here.
- *
- * Merging of CompoundPages is not supported; they should become
- * splitting first, unmapped, merged, and mapped back in on-demand.
- */
- VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd));
-
old_pmd = *pmd;
if (pmd_present(old_pmd)) {
+ /*
+ * Multiple vcpus faulting on the same PMD entry, can
+ * lead to them sequentially updating the PMD with the
+ * same value. Following the break-before-make
+ * (pmd_clear() followed by tlb_flush()) process can
+ * hinder forward progress due to refaults generated
+ * on missing translations.
+ *
+ * Skip updating the page table if the entry is
+ * unchanged.
+ */
+ if (pmd_val(old_pmd) == pmd_val(*new_pmd))
+ return 0;
+
+ /*
+ * Mapping in huge pages should only happen through a
+ * fault. If a page is merged into a transparent huge
+ * page, the individual subpages of that huge page
+ * should be unmapped through MMU notifiers before we
+ * get here.
+ *
+ * Merging of CompoundPages is not supported; they
+ * should become splitting first, unmapped, merged,
+ * and mapped back in on-demand.
+ */
+ VM_BUG_ON(pmd_pfn(old_pmd) != pmd_pfn(*new_pmd));
+
pmd_clear(pmd);
kvm_tlb_flush_vmid_ipa(kvm, addr);
} else {
@@ -1090,7 +1135,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
if (!cache)
return 0; /* ignore calls from kvm_set_spte_hva */
pte = mmu_memory_cache_alloc(cache);
- pmd_populate_kernel(NULL, pmd, pte);
+ kvm_pmd_populate(pmd, pte);
get_page(virt_to_page(pmd));
}
@@ -1102,6 +1147,10 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
/* Create 2nd stage page table mapping - Level 3 */
old_pte = *pte;
if (pte_present(old_pte)) {
+ /* Skip page table update if there is no change */
+ if (pte_val(old_pte) == pte_val(*new_pte))
+ return 0;
+
kvm_set_pte(pte, __pte(0));
kvm_tlb_flush_vmid_ipa(kvm, addr);
} else {
diff --git a/virt/kvm/arm/vgic/vgic-debug.c b/virt/kvm/arm/vgic/vgic-debug.c
index c589d4c2b478..07aa900bac56 100644
--- a/virt/kvm/arm/vgic/vgic-debug.c
+++ b/virt/kvm/arm/vgic/vgic-debug.c
@@ -36,9 +36,12 @@
struct vgic_state_iter {
int nr_cpus;
int nr_spis;
+ int nr_lpis;
int dist_id;
int vcpu_id;
int intid;
+ int lpi_idx;
+ u32 *lpi_array;
};
static void iter_next(struct vgic_state_iter *iter)
@@ -52,6 +55,12 @@ static void iter_next(struct vgic_state_iter *iter)
if (iter->intid == VGIC_NR_PRIVATE_IRQS &&
++iter->vcpu_id < iter->nr_cpus)
iter->intid = 0;
+
+ if (iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS)) {
+ if (iter->lpi_idx < iter->nr_lpis)
+ iter->intid = iter->lpi_array[iter->lpi_idx];
+ iter->lpi_idx++;
+ }
}
static void iter_init(struct kvm *kvm, struct vgic_state_iter *iter,
@@ -63,6 +72,11 @@ static void iter_init(struct kvm *kvm, struct vgic_state_iter *iter,
iter->nr_cpus = nr_cpus;
iter->nr_spis = kvm->arch.vgic.nr_spis;
+ if (kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
+ iter->nr_lpis = vgic_copy_lpi_list(kvm, NULL, &iter->lpi_array);
+ if (iter->nr_lpis < 0)
+ iter->nr_lpis = 0;
+ }
/* Fast forward to the right position if needed */
while (pos--)
@@ -73,7 +87,8 @@ static bool end_of_vgic(struct vgic_state_iter *iter)
{
return iter->dist_id > 0 &&
iter->vcpu_id == iter->nr_cpus &&
- (iter->intid - VGIC_NR_PRIVATE_IRQS) == iter->nr_spis;
+ iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS) &&
+ iter->lpi_idx > iter->nr_lpis;
}
static void *vgic_debug_start(struct seq_file *s, loff_t *pos)
@@ -130,6 +145,7 @@ static void vgic_debug_stop(struct seq_file *s, void *v)
mutex_lock(&kvm->lock);
iter = kvm->arch.vgic.iter;
+ kfree(iter->lpi_array);
kfree(iter);
kvm->arch.vgic.iter = NULL;
mutex_unlock(&kvm->lock);
@@ -137,17 +153,20 @@ static void vgic_debug_stop(struct seq_file *s, void *v)
static void print_dist_state(struct seq_file *s, struct vgic_dist *dist)
{
+ bool v3 = dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3;
+
seq_printf(s, "Distributor\n");
seq_printf(s, "===========\n");
- seq_printf(s, "vgic_model:\t%s\n",
- (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) ?
- "GICv3" : "GICv2");
+ seq_printf(s, "vgic_model:\t%s\n", v3 ? "GICv3" : "GICv2");
seq_printf(s, "nr_spis:\t%d\n", dist->nr_spis);
+ if (v3)
+ seq_printf(s, "nr_lpis:\t%d\n", dist->lpi_list_count);
seq_printf(s, "enabled:\t%d\n", dist->enabled);
seq_printf(s, "\n");
seq_printf(s, "P=pending_latch, L=line_level, A=active\n");
seq_printf(s, "E=enabled, H=hw, C=config (level=1, edge=0)\n");
+ seq_printf(s, "G=group\n");
}
static void print_header(struct seq_file *s, struct vgic_irq *irq,
@@ -162,8 +181,8 @@ static void print_header(struct seq_file *s, struct vgic_irq *irq,
}
seq_printf(s, "\n");
- seq_printf(s, "%s%2d TYP ID TGT_ID PLAEHC HWID TARGET SRC PRI VCPU_ID\n", hdr, id);
- seq_printf(s, "---------------------------------------------------------------\n");
+ seq_printf(s, "%s%2d TYP ID TGT_ID PLAEHCG HWID TARGET SRC PRI VCPU_ID\n", hdr, id);
+ seq_printf(s, "----------------------------------------------------------------\n");
}
static void print_irq_state(struct seq_file *s, struct vgic_irq *irq,
@@ -174,15 +193,17 @@ static void print_irq_state(struct seq_file *s, struct vgic_irq *irq,
type = "SGI";
else if (irq->intid < VGIC_NR_PRIVATE_IRQS)
type = "PPI";
- else
+ else if (irq->intid < VGIC_MAX_SPI)
type = "SPI";
+ else
+ type = "LPI";
if (irq->intid ==0 || irq->intid == VGIC_NR_PRIVATE_IRQS)
print_header(s, irq, vcpu);
seq_printf(s, " %s %4d "
" %2d "
- "%d%d%d%d%d%d "
+ "%d%d%d%d%d%d%d "
"%8d "
"%8x "
" %2x "
@@ -197,12 +218,12 @@ static void print_irq_state(struct seq_file *s, struct vgic_irq *irq,
irq->enabled,
irq->hw,
irq->config == VGIC_CONFIG_LEVEL,
+ irq->group,
irq->hwintid,
irq->mpidr,
irq->source,
irq->priority,
(irq->vcpu) ? irq->vcpu->vcpu_id : -1);
-
}
static int vgic_debug_show(struct seq_file *s, void *v)
@@ -221,17 +242,20 @@ static int vgic_debug_show(struct seq_file *s, void *v)
if (!kvm->arch.vgic.initialized)
return 0;
- if (iter->vcpu_id < iter->nr_cpus) {
+ if (iter->vcpu_id < iter->nr_cpus)
vcpu = kvm_get_vcpu(kvm, iter->vcpu_id);
- irq = &vcpu->arch.vgic_cpu.private_irqs[iter->intid];
- } else {
- irq = &kvm->arch.vgic.spis[iter->intid - VGIC_NR_PRIVATE_IRQS];
+
+ irq = vgic_get_irq(kvm, vcpu, iter->intid);
+ if (!irq) {
+ seq_printf(s, " LPI %4d freed\n", iter->intid);
+ return 0;
}
spin_lock_irqsave(&irq->irq_lock, flags);
print_irq_state(s, irq, vcpu);
spin_unlock_irqrestore(&irq->irq_lock, flags);
+ vgic_put_irq(kvm, irq);
return 0;
}
diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
index 2673efce65f3..c0c0b88af1d5 100644
--- a/virt/kvm/arm/vgic/vgic-init.c
+++ b/virt/kvm/arm/vgic/vgic-init.c
@@ -175,10 +175,13 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
irq->vcpu = NULL;
irq->target_vcpu = vcpu0;
kref_init(&irq->refcount);
- if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2)
+ if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) {
irq->targets = 0;
- else
+ irq->group = 0;
+ } else {
irq->mpidr = 0;
+ irq->group = 1;
+ }
}
return 0;
}
@@ -227,6 +230,18 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
/* PPIs */
irq->config = VGIC_CONFIG_LEVEL;
}
+
+ /*
+ * GICv3 can only be created via the KVM_DEVICE_CREATE API and
+ * so we always know the emulation type at this point as it's
+ * either explicitly configured as GICv3, or explicitly
+ * configured as GICv2, or not configured yet which also
+ * implies GICv2.
+ */
+ if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
+ irq->group = 1;
+ else
+ irq->group = 0;
}
if (!irqchip_in_kernel(vcpu->kvm))
@@ -271,6 +286,10 @@ int vgic_init(struct kvm *kvm)
if (vgic_initialized(kvm))
return 0;
+ /* Are we also in the middle of creating a VCPU? */
+ if (kvm->created_vcpus != atomic_read(&kvm->online_vcpus))
+ return -EBUSY;
+
/* freeze the number of spis */
if (!dist->nr_spis)
dist->nr_spis = VGIC_NR_IRQS_LEGACY - VGIC_NR_PRIVATE_IRQS;
@@ -294,6 +313,7 @@ int vgic_init(struct kvm *kvm)
vgic_debug_init(kvm);
+ dist->implementation_rev = 2;
dist->initialized = true;
out:
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 4ed79c939fb4..12502251727e 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -71,6 +71,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
kref_init(&irq->refcount);
irq->intid = intid;
irq->target_vcpu = vcpu;
+ irq->group = 1;
spin_lock_irqsave(&dist->lpi_list_lock, flags);
@@ -168,8 +169,14 @@ struct vgic_its_abi {
int (*commit)(struct vgic_its *its);
};
+#define ABI_0_ESZ 8
+#define ESZ_MAX ABI_0_ESZ
+
static const struct vgic_its_abi its_table_abi_versions[] = {
- [0] = {.cte_esz = 8, .dte_esz = 8, .ite_esz = 8,
+ [0] = {
+ .cte_esz = ABI_0_ESZ,
+ .dte_esz = ABI_0_ESZ,
+ .ite_esz = ABI_0_ESZ,
.save_tables = vgic_its_save_tables_v0,
.restore_tables = vgic_its_restore_tables_v0,
.commit = vgic_its_commit_v0,
@@ -183,7 +190,7 @@ inline const struct vgic_its_abi *vgic_its_get_abi(struct vgic_its *its)
return &its_table_abi_versions[its->abi_rev];
}
-int vgic_its_set_abi(struct vgic_its *its, int rev)
+static int vgic_its_set_abi(struct vgic_its *its, u32 rev)
{
const struct vgic_its_abi *abi;
@@ -312,9 +319,9 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
* enumerate those LPIs without holding any lock.
* Returns their number and puts the kmalloc'ed array into intid_ptr.
*/
-static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr)
+int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr)
{
- struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+ struct vgic_dist *dist = &kvm->arch.vgic;
struct vgic_irq *irq;
unsigned long flags;
u32 *intids;
@@ -337,7 +344,7 @@ static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr)
if (i == irq_count)
break;
/* We don't need to "get" the IRQ, as we hold the list lock. */
- if (irq->target_vcpu != vcpu)
+ if (vcpu && irq->target_vcpu != vcpu)
continue;
intids[i++] = irq->intid;
}
@@ -429,7 +436,7 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu)
unsigned long flags;
u8 pendmask;
- nr_irqs = vgic_copy_lpi_list(vcpu, &intids);
+ nr_irqs = vgic_copy_lpi_list(vcpu->kvm, vcpu, &intids);
if (nr_irqs < 0)
return nr_irqs;
@@ -1154,7 +1161,7 @@ static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its,
vcpu = kvm_get_vcpu(kvm, collection->target_addr);
- irq_count = vgic_copy_lpi_list(vcpu, &intids);
+ irq_count = vgic_copy_lpi_list(kvm, vcpu, &intids);
if (irq_count < 0)
return irq_count;
@@ -1202,7 +1209,7 @@ static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its,
vcpu1 = kvm_get_vcpu(kvm, target1_addr);
vcpu2 = kvm_get_vcpu(kvm, target2_addr);
- irq_count = vgic_copy_lpi_list(vcpu1, &intids);
+ irq_count = vgic_copy_lpi_list(kvm, vcpu1, &intids);
if (irq_count < 0)
return irq_count;
@@ -1881,14 +1888,14 @@ typedef int (*entry_fn_t)(struct vgic_its *its, u32 id, void *entry,
* Return: < 0 on error, 0 if last element was identified, 1 otherwise
* (the last element may not be found on second level tables)
*/
-static int scan_its_table(struct vgic_its *its, gpa_t base, int size, int esz,
+static int scan_its_table(struct vgic_its *its, gpa_t base, int size, u32 esz,
int start_id, entry_fn_t fn, void *opaque)
{
struct kvm *kvm = its->dev->kvm;
unsigned long len = size;
int id = start_id;
gpa_t gpa = base;
- char entry[esz];
+ char entry[ESZ_MAX];
int ret;
memset(entry, 0, esz);
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c
index ffc587bf4742..738b65d2d0e7 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v2.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c
@@ -22,22 +22,33 @@
#include "vgic.h"
#include "vgic-mmio.h"
+/*
+ * The Revision field in the IIDR have the following meanings:
+ *
+ * Revision 1: Report GICv2 interrupts as group 0 instead of group 1
+ * Revision 2: Interrupt groups are guest-configurable and signaled using
+ * their configured groups.
+ */
+
static unsigned long vgic_mmio_read_v2_misc(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len)
{
+ struct vgic_dist *vgic = &vcpu->kvm->arch.vgic;
u32 value;
switch (addr & 0x0c) {
case GIC_DIST_CTRL:
- value = vcpu->kvm->arch.vgic.enabled ? GICD_ENABLE : 0;
+ value = vgic->enabled ? GICD_ENABLE : 0;
break;
case GIC_DIST_CTR:
- value = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
+ value = vgic->nr_spis + VGIC_NR_PRIVATE_IRQS;
value = (value >> 5) - 1;
value |= (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5;
break;
case GIC_DIST_IIDR:
- value = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
+ value = (PRODUCT_ID_KVM << GICD_IIDR_PRODUCT_ID_SHIFT) |
+ (vgic->implementation_rev << GICD_IIDR_REVISION_SHIFT) |
+ (IMPLEMENTER_ARM << GICD_IIDR_IMPLEMENTER_SHIFT);
break;
default:
return 0;
@@ -66,6 +77,42 @@ static void vgic_mmio_write_v2_misc(struct kvm_vcpu *vcpu,
}
}
+static int vgic_mmio_uaccess_write_v2_misc(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ switch (addr & 0x0c) {
+ case GIC_DIST_IIDR:
+ if (val != vgic_mmio_read_v2_misc(vcpu, addr, len))
+ return -EINVAL;
+
+ /*
+ * If we observe a write to GICD_IIDR we know that userspace
+ * has been updated and has had a chance to cope with older
+ * kernels (VGICv2 IIDR.Revision == 0) incorrectly reporting
+ * interrupts as group 1, and therefore we now allow groups to
+ * be user writable. Doing this by default would break
+ * migration from old kernels to new kernels with legacy
+ * userspace.
+ */
+ vcpu->kvm->arch.vgic.v2_groups_user_writable = true;
+ return 0;
+ }
+
+ vgic_mmio_write_v2_misc(vcpu, addr, len, val);
+ return 0;
+}
+
+static int vgic_mmio_uaccess_write_v2_group(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ if (vcpu->kvm->arch.vgic.v2_groups_user_writable)
+ vgic_mmio_write_group(vcpu, addr, len, val);
+
+ return 0;
+}
+
static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu,
gpa_t addr, unsigned int len,
unsigned long val)
@@ -352,17 +399,22 @@ static void vgic_mmio_write_apr(struct kvm_vcpu *vcpu,
if (n > vgic_v3_max_apr_idx(vcpu))
return;
+
+ n = array_index_nospec(n, 4);
+
/* GICv3 only uses ICH_AP1Rn for memory mapped (GICv2) guests */
vgicv3->vgic_ap1r[n] = val;
}
}
static const struct vgic_register_region vgic_v2_dist_registers[] = {
- REGISTER_DESC_WITH_LENGTH(GIC_DIST_CTRL,
- vgic_mmio_read_v2_misc, vgic_mmio_write_v2_misc, 12,
- VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_LENGTH_UACCESS(GIC_DIST_CTRL,
+ vgic_mmio_read_v2_misc, vgic_mmio_write_v2_misc,
+ NULL, vgic_mmio_uaccess_write_v2_misc,
+ 12, VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_IGROUP,
- vgic_mmio_read_rao, vgic_mmio_write_wi, NULL, NULL, 1,
+ vgic_mmio_read_group, vgic_mmio_write_group,
+ NULL, vgic_mmio_uaccess_write_v2_group, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_SET,
vgic_mmio_read_enable, vgic_mmio_write_senable, NULL, NULL, 1,
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
index 287784095b5b..a2a175b08b17 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
@@ -59,19 +59,27 @@ bool vgic_supports_direct_msis(struct kvm *kvm)
return kvm_vgic_global_state.has_gicv4 && vgic_has_its(kvm);
}
+/*
+ * The Revision field in the IIDR have the following meanings:
+ *
+ * Revision 2: Interrupt groups are guest-configurable and signaled using
+ * their configured groups.
+ */
+
static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len)
{
+ struct vgic_dist *vgic = &vcpu->kvm->arch.vgic;
u32 value = 0;
switch (addr & 0x0c) {
case GICD_CTLR:
- if (vcpu->kvm->arch.vgic.enabled)
+ if (vgic->enabled)
value |= GICD_CTLR_ENABLE_SS_G1;
value |= GICD_CTLR_ARE_NS | GICD_CTLR_DS;
break;
case GICD_TYPER:
- value = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
+ value = vgic->nr_spis + VGIC_NR_PRIVATE_IRQS;
value = (value >> 5) - 1;
if (vgic_has_its(vcpu->kvm)) {
value |= (INTERRUPT_ID_BITS_ITS - 1) << 19;
@@ -81,7 +89,9 @@ static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu,
}
break;
case GICD_IIDR:
- value = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
+ value = (PRODUCT_ID_KVM << GICD_IIDR_PRODUCT_ID_SHIFT) |
+ (vgic->implementation_rev << GICD_IIDR_REVISION_SHIFT) |
+ (IMPLEMENTER_ARM << GICD_IIDR_IMPLEMENTER_SHIFT);
break;
default:
return 0;
@@ -110,6 +120,20 @@ static void vgic_mmio_write_v3_misc(struct kvm_vcpu *vcpu,
}
}
+static int vgic_mmio_uaccess_write_v3_misc(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ switch (addr & 0x0c) {
+ case GICD_IIDR:
+ if (val != vgic_mmio_read_v3_misc(vcpu, addr, len))
+ return -EINVAL;
+ }
+
+ vgic_mmio_write_v3_misc(vcpu, addr, len, val);
+ return 0;
+}
+
static unsigned long vgic_mmio_read_irouter(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len)
{
@@ -246,9 +270,9 @@ static unsigned long vgic_v3_uaccess_read_pending(struct kvm_vcpu *vcpu,
return value;
}
-static void vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len,
- unsigned long val)
+static int vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
{
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
int i;
@@ -273,6 +297,8 @@ static void vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu,
vgic_put_irq(vcpu->kvm, irq);
}
+
+ return 0;
}
/* We want to avoid outer shareable. */
@@ -444,14 +470,15 @@ static void vgic_mmio_write_pendbase(struct kvm_vcpu *vcpu,
}
static const struct vgic_register_region vgic_v3_dist_registers[] = {
- REGISTER_DESC_WITH_LENGTH(GICD_CTLR,
- vgic_mmio_read_v3_misc, vgic_mmio_write_v3_misc, 16,
- VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_LENGTH_UACCESS(GICD_CTLR,
+ vgic_mmio_read_v3_misc, vgic_mmio_write_v3_misc,
+ NULL, vgic_mmio_uaccess_write_v3_misc,
+ 16, VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_LENGTH(GICD_STATUSR,
vgic_mmio_read_rao, vgic_mmio_write_wi, 4,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IGROUPR,
- vgic_mmio_read_rao, vgic_mmio_write_wi, NULL, NULL, 1,
+ vgic_mmio_read_group, vgic_mmio_write_group, NULL, NULL, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISENABLER,
vgic_mmio_read_enable, vgic_mmio_write_senable, NULL, NULL, 1,
@@ -465,7 +492,7 @@ static const struct vgic_register_region vgic_v3_dist_registers[] = {
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICPENDR,
vgic_mmio_read_pending, vgic_mmio_write_cpending,
- vgic_mmio_read_raz, vgic_mmio_write_wi, 1,
+ vgic_mmio_read_raz, vgic_mmio_uaccess_write_wi, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISACTIVER,
vgic_mmio_read_active, vgic_mmio_write_sactive,
@@ -524,7 +551,7 @@ static const struct vgic_register_region vgic_v3_rdbase_registers[] = {
static const struct vgic_register_region vgic_v3_sgibase_registers[] = {
REGISTER_DESC_WITH_LENGTH(GICR_IGROUPR0,
- vgic_mmio_read_rao, vgic_mmio_write_wi, 4,
+ vgic_mmio_read_group, vgic_mmio_write_group, 4,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_LENGTH(GICR_ISENABLER0,
vgic_mmio_read_enable, vgic_mmio_write_senable, 4,
@@ -538,7 +565,7 @@ static const struct vgic_register_region vgic_v3_sgibase_registers[] = {
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ICPENDR0,
vgic_mmio_read_pending, vgic_mmio_write_cpending,
- vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
+ vgic_mmio_read_raz, vgic_mmio_uaccess_write_wi, 4,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ISACTIVER0,
vgic_mmio_read_active, vgic_mmio_write_sactive,
@@ -873,7 +900,8 @@ static int match_mpidr(u64 sgi_aff, u16 sgi_cpu_mask, struct kvm_vcpu *vcpu)
/**
* vgic_v3_dispatch_sgi - handle SGI requests from VCPUs
* @vcpu: The VCPU requesting a SGI
- * @reg: The value written into the ICC_SGI1R_EL1 register by that VCPU
+ * @reg: The value written into ICC_{ASGI1,SGI0,SGI1}R by that VCPU
+ * @allow_group1: Does the sysreg access allow generation of G1 SGIs
*
* With GICv3 (and ARE=1) CPUs trigger SGIs by writing to a system register.
* This will trap in sys_regs.c and call this function.
@@ -883,7 +911,7 @@ static int match_mpidr(u64 sgi_aff, u16 sgi_cpu_mask, struct kvm_vcpu *vcpu)
* check for matching ones. If this bit is set, we signal all, but not the
* calling VCPU.
*/
-void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg)
+void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1)
{
struct kvm *kvm = vcpu->kvm;
struct kvm_vcpu *c_vcpu;
@@ -932,9 +960,19 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg)
irq = vgic_get_irq(vcpu->kvm, c_vcpu, sgi);
spin_lock_irqsave(&irq->irq_lock, flags);
- irq->pending_latch = true;
- vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
+ /*
+ * An access targetting Group0 SGIs can only generate
+ * those, while an access targetting Group1 SGIs can
+ * generate interrupts of either group.
+ */
+ if (!irq->group || allow_group1) {
+ irq->pending_latch = true;
+ vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
+ } else {
+ spin_unlock_irqrestore(&irq->irq_lock, flags);
+ }
+
vgic_put_irq(vcpu->kvm, irq);
}
}
diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c
index ff9655cfeb2f..f56ff1cf52ec 100644
--- a/virt/kvm/arm/vgic/vgic-mmio.c
+++ b/virt/kvm/arm/vgic/vgic-mmio.c
@@ -40,6 +40,51 @@ void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr,
/* Ignore */
}
+int vgic_mmio_uaccess_write_wi(struct kvm_vcpu *vcpu, gpa_t addr,
+ unsigned int len, unsigned long val)
+{
+ /* Ignore */
+ return 0;
+}
+
+unsigned long vgic_mmio_read_group(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len)
+{
+ u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
+ u32 value = 0;
+ int i;
+
+ /* Loop over all IRQs affected by this read */
+ for (i = 0; i < len * 8; i++) {
+ struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+
+ if (irq->group)
+ value |= BIT(i);
+
+ vgic_put_irq(vcpu->kvm, irq);
+ }
+
+ return value;
+}
+
+void vgic_mmio_write_group(struct kvm_vcpu *vcpu, gpa_t addr,
+ unsigned int len, unsigned long val)
+{
+ u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
+ int i;
+ unsigned long flags;
+
+ for (i = 0; i < len * 8; i++) {
+ struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+
+ spin_lock_irqsave(&irq->irq_lock, flags);
+ irq->group = !!(val & BIT(i));
+ vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
+
+ vgic_put_irq(vcpu->kvm, irq);
+ }
+}
+
/*
* Read accesses to both GICD_ICENABLER and GICD_ISENABLER return the value
* of the enabled bit, so there is only one function for both here.
@@ -363,11 +408,12 @@ void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu,
mutex_unlock(&vcpu->kvm->lock);
}
-void vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu,
+int vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val)
{
__vgic_mmio_write_cactive(vcpu, addr, len, val);
+ return 0;
}
static void __vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
@@ -399,11 +445,12 @@ void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
mutex_unlock(&vcpu->kvm->lock);
}
-void vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu,
+int vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val)
{
__vgic_mmio_write_sactive(vcpu, addr, len, val);
+ return 0;
}
unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu,
@@ -735,10 +782,9 @@ static int vgic_uaccess_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu;
if (region->uaccess_write)
- region->uaccess_write(r_vcpu, addr, sizeof(u32), *val);
- else
- region->write(r_vcpu, addr, sizeof(u32), *val);
+ return region->uaccess_write(r_vcpu, addr, sizeof(u32), *val);
+ region->write(r_vcpu, addr, sizeof(u32), *val);
return 0;
}
diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h
index 5693f6df45ec..a07f90acdaec 100644
--- a/virt/kvm/arm/vgic/vgic-mmio.h
+++ b/virt/kvm/arm/vgic/vgic-mmio.h
@@ -37,8 +37,8 @@ struct vgic_register_region {
unsigned long (*uaccess_read)(struct kvm_vcpu *vcpu, gpa_t addr,
unsigned int len);
union {
- void (*uaccess_write)(struct kvm_vcpu *vcpu, gpa_t addr,
- unsigned int len, unsigned long val);
+ int (*uaccess_write)(struct kvm_vcpu *vcpu, gpa_t addr,
+ unsigned int len, unsigned long val);
int (*uaccess_its_write)(struct kvm *kvm, struct vgic_its *its,
gpa_t addr, unsigned int len,
unsigned long val);
@@ -134,6 +134,15 @@ unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu,
void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr,
unsigned int len, unsigned long val);
+int vgic_mmio_uaccess_write_wi(struct kvm_vcpu *vcpu, gpa_t addr,
+ unsigned int len, unsigned long val);
+
+unsigned long vgic_mmio_read_group(struct kvm_vcpu *vcpu, gpa_t addr,
+ unsigned int len);
+
+void vgic_mmio_write_group(struct kvm_vcpu *vcpu, gpa_t addr,
+ unsigned int len, unsigned long val);
+
unsigned long vgic_mmio_read_enable(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len);
@@ -167,13 +176,13 @@ void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val);
-void vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len,
- unsigned long val);
+int vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val);
-void vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len,
- unsigned long val);
+int vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val);
unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len);
diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c
index a5f2e44f1c33..69b892abd7dc 100644
--- a/virt/kvm/arm/vgic/vgic-v2.c
+++ b/virt/kvm/arm/vgic/vgic-v2.c
@@ -62,7 +62,8 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_v2_cpu_if *cpuif = &vgic_cpu->vgic_v2;
int lr;
- unsigned long flags;
+
+ DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
cpuif->vgic_hcr &= ~GICH_HCR_UIE;
@@ -83,7 +84,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
irq = vgic_get_irq(vcpu->kvm, vcpu, intid);
- spin_lock_irqsave(&irq->irq_lock, flags);
+ spin_lock(&irq->irq_lock);
/* Always preserve the active bit */
irq->active = !!(val & GICH_LR_ACTIVE_BIT);
@@ -126,7 +127,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
vgic_irq_set_phys_active(irq, false);
}
- spin_unlock_irqrestore(&irq->irq_lock, flags);
+ spin_unlock(&irq->irq_lock);
vgic_put_irq(vcpu->kvm, irq);
}
@@ -159,6 +160,9 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
}
}
+ if (irq->group)
+ val |= GICH_LR_GROUP1;
+
if (irq->hw) {
val |= GICH_LR_HW;
val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT;
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index cdce653e3c47..9c0dd234ebe8 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -46,7 +46,8 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
struct vgic_v3_cpu_if *cpuif = &vgic_cpu->vgic_v3;
u32 model = vcpu->kvm->arch.vgic.vgic_model;
int lr;
- unsigned long flags;
+
+ DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
cpuif->vgic_hcr &= ~ICH_HCR_UIE;
@@ -75,7 +76,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
if (!irq) /* An LPI could have been unmapped. */
continue;
- spin_lock_irqsave(&irq->irq_lock, flags);
+ spin_lock(&irq->irq_lock);
/* Always preserve the active bit */
irq->active = !!(val & ICH_LR_ACTIVE_BIT);
@@ -118,7 +119,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
vgic_irq_set_phys_active(irq, false);
}
- spin_unlock_irqrestore(&irq->irq_lock, flags);
+ spin_unlock(&irq->irq_lock);
vgic_put_irq(vcpu->kvm, irq);
}
@@ -197,11 +198,7 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
if (vgic_irq_is_mapped_level(irq) && (val & ICH_LR_PENDING_BIT))
irq->line_level = false;
- /*
- * We currently only support Group1 interrupts, which is a
- * known defect. This needs to be addressed at some point.
- */
- if (model == KVM_DEV_TYPE_ARM_VGIC_V3)
+ if (irq->group)
val |= ICH_LR_GROUP;
val |= (u64)irq->priority << ICH_LR_PRIORITY_SHIFT;
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
index 33c8325c8f35..7cfdfbc910e0 100644
--- a/virt/kvm/arm/vgic/vgic.c
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -28,12 +28,6 @@
#define CREATE_TRACE_POINTS
#include "trace.h"
-#ifdef CONFIG_DEBUG_SPINLOCK
-#define DEBUG_SPINLOCK_BUG_ON(p) BUG_ON(p)
-#else
-#define DEBUG_SPINLOCK_BUG_ON(p)
-#endif
-
struct vgic_global kvm_vgic_global_state __ro_after_init = {
.gicv3_cpuif = STATIC_KEY_FALSE_INIT,
};
@@ -599,10 +593,11 @@ static void vgic_prune_ap_list(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_irq *irq, *tmp;
- unsigned long flags;
+
+ DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
retry:
- spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags);
+ spin_lock(&vgic_cpu->ap_list_lock);
list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) {
struct kvm_vcpu *target_vcpu, *vcpuA, *vcpuB;
@@ -643,7 +638,7 @@ retry:
/* This interrupt looks like it has to be migrated. */
spin_unlock(&irq->irq_lock);
- spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags);
+ spin_unlock(&vgic_cpu->ap_list_lock);
/*
* Ensure locking order by always locking the smallest
@@ -657,7 +652,7 @@ retry:
vcpuB = vcpu;
}
- spin_lock_irqsave(&vcpuA->arch.vgic_cpu.ap_list_lock, flags);
+ spin_lock(&vcpuA->arch.vgic_cpu.ap_list_lock);
spin_lock_nested(&vcpuB->arch.vgic_cpu.ap_list_lock,
SINGLE_DEPTH_NESTING);
spin_lock(&irq->irq_lock);
@@ -682,7 +677,7 @@ retry:
spin_unlock(&irq->irq_lock);
spin_unlock(&vcpuB->arch.vgic_cpu.ap_list_lock);
- spin_unlock_irqrestore(&vcpuA->arch.vgic_cpu.ap_list_lock, flags);
+ spin_unlock(&vcpuA->arch.vgic_cpu.ap_list_lock);
if (target_vcpu_needs_kick) {
kvm_make_request(KVM_REQ_IRQ_PENDING, target_vcpu);
@@ -692,7 +687,7 @@ retry:
goto retry;
}
- spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags);
+ spin_unlock(&vgic_cpu->ap_list_lock);
}
static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu)
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index ead00b2072b2..a90024718ca4 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -103,6 +103,12 @@
#define KVM_VGIC_V3_RDIST_COUNT_MASK GENMASK_ULL(63, 52)
#define KVM_VGIC_V3_RDIST_COUNT_SHIFT 52
+#ifdef CONFIG_DEBUG_SPINLOCK
+#define DEBUG_SPINLOCK_BUG_ON(p) BUG_ON(p)
+#else
+#define DEBUG_SPINLOCK_BUG_ON(p)
+#endif
+
/* Requires the irq_lock to be held by the caller. */
static inline bool irq_is_pending(struct vgic_irq *irq)
{
@@ -305,6 +311,7 @@ static inline bool vgic_dist_overlap(struct kvm *kvm, gpa_t base, size_t size)
(base < d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE);
}
+int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr);
int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
u32 devid, u32 eventid, struct vgic_irq **irq);
struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 3d233ebfbee9..f986e31fa68c 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -140,9 +140,10 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm);
static unsigned long long kvm_createvm_count;
static unsigned long long kvm_active_vms;
-__weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
- unsigned long start, unsigned long end)
+__weak int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+ unsigned long start, unsigned long end, bool blockable)
{
+ return 0;
}
bool kvm_is_reserved_pfn(kvm_pfn_t pfn)
@@ -273,7 +274,8 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
* kvm_make_all_cpus_request() reads vcpu->mode. We reuse that
* barrier here.
*/
- if (kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
+ if (!kvm_arch_flush_remote_tlb(kvm)
+ || kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
++kvm->stat.remote_tlb_flush;
cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
}
@@ -359,13 +361,15 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
srcu_read_unlock(&kvm->srcu, idx);
}
-static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
+static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start,
- unsigned long end)
+ unsigned long end,
+ bool blockable)
{
struct kvm *kvm = mmu_notifier_to_kvm(mn);
int need_tlb_flush = 0, idx;
+ int ret;
idx = srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock);
@@ -383,9 +387,11 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
spin_unlock(&kvm->mmu_lock);
- kvm_arch_mmu_notifier_invalidate_range(kvm, start, end);
+ ret = kvm_arch_mmu_notifier_invalidate_range(kvm, start, end, blockable);
srcu_read_unlock(&kvm->srcu, idx);
+
+ return ret;
}
static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
@@ -1169,7 +1175,7 @@ int kvm_get_dirty_log_protect(struct kvm *kvm,
n = kvm_dirty_bitmap_bytes(memslot);
- dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long);
+ dirty_bitmap_buffer = kvm_second_dirty_bitmap(memslot);
memset(dirty_bitmap_buffer, 0, n);
spin_lock(&kvm->mmu_lock);
@@ -1342,18 +1348,16 @@ static inline int check_user_page_hwpoison(unsigned long addr)
}
/*
- * The atomic path to get the writable pfn which will be stored in @pfn,
- * true indicates success, otherwise false is returned.
+ * The fast path to get the writable pfn which will be stored in @pfn,
+ * true indicates success, otherwise false is returned. It's also the
+ * only part that runs if we can are in atomic context.
*/
-static bool hva_to_pfn_fast(unsigned long addr, bool atomic, bool *async,
- bool write_fault, bool *writable, kvm_pfn_t *pfn)
+static bool hva_to_pfn_fast(unsigned long addr, bool write_fault,
+ bool *writable, kvm_pfn_t *pfn)
{
struct page *page[1];
int npages;
- if (!(async || atomic))
- return false;
-
/*
* Fast pin a writable pfn only if it is a write fault request
* or the caller allows to map a writable pfn for a read fault
@@ -1497,7 +1501,7 @@ static kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
/* we can do it either atomically or asynchronously, not both */
BUG_ON(atomic && async);
- if (hva_to_pfn_fast(addr, atomic, async, write_fault, writable, &pfn))
+ if (hva_to_pfn_fast(addr, write_fault, writable, &pfn))
return pfn;
if (atomic)
@@ -2127,16 +2131,22 @@ static void shrink_halt_poll_ns(struct kvm_vcpu *vcpu)
static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu)
{
+ int ret = -EINTR;
+ int idx = srcu_read_lock(&vcpu->kvm->srcu);
+
if (kvm_arch_vcpu_runnable(vcpu)) {
kvm_make_request(KVM_REQ_UNHALT, vcpu);
- return -EINTR;
+ goto out;
}
if (kvm_cpu_has_pending_timer(vcpu))
- return -EINTR;
+ goto out;
if (signal_pending(current))
- return -EINTR;
+ goto out;
- return 0;
+ ret = 0;
+out:
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+ return ret;
}
/*
@@ -2563,7 +2573,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
if (arg)
goto out;
oldpid = rcu_access_pointer(vcpu->pid);
- if (unlikely(oldpid != current->pids[PIDTYPE_PID].pid)) {
+ if (unlikely(oldpid != task_pid(current))) {
/* The thread running this VCPU changed. */
struct pid *newpid;