summaryrefslogtreecommitdiffstats
path: root/virt/kvm
diff options
context:
space:
mode:
authorMichal Marek <mmarek@suse.cz>2010-10-28 00:15:57 +0200
committerMichal Marek <mmarek@suse.cz>2010-10-28 00:15:57 +0200
commitb74b953b998bcc2db91b694446f3a2619ec32de6 (patch)
tree6ce24caabd730f6ae9287ed0676ec32e6ff31e9d /virt/kvm
parentscripts/namespace.pl: improve to get more correct results (diff)
parentLinux 2.6.36 (diff)
downloadlinux-b74b953b998bcc2db91b694446f3a2619ec32de6.tar.xz
linux-b74b953b998bcc2db91b694446f3a2619ec32de6.zip
Merge commit 'v2.6.36' into kbuild/misc
Update to be able to fix a recent change to scripts/basic/docproc.c (commit eda603f).
Diffstat (limited to 'virt/kvm')
-rw-r--r--virt/kvm/Kconfig3
-rw-r--r--virt/kvm/assigned-dev.c28
-rw-r--r--virt/kvm/coalesced_mmio.c51
-rw-r--r--virt/kvm/coalesced_mmio.h15
-rw-r--r--virt/kvm/eventfd.c26
-rw-r--r--virt/kvm/ioapic.c75
-rw-r--r--virt/kvm/ioapic.h4
-rw-r--r--virt/kvm/iommu.c153
-rw-r--r--virt/kvm/irq_comm.c16
-rw-r--r--virt/kvm/kvm_main.c553
10 files changed, 640 insertions, 284 deletions
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index daece36c0a57..7f1178f6b839 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -12,3 +12,6 @@ config HAVE_KVM_EVENTFD
config KVM_APIC_ARCHITECTURE
bool
+
+config KVM_MMIO
+ bool
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
index f73de631e3ee..7c98928b09d9 100644
--- a/virt/kvm/assigned-dev.c
+++ b/virt/kvm/assigned-dev.c
@@ -1,7 +1,7 @@
/*
* Kernel-based Virtual Machine - device assignment support
*
- * Copyright (C) 2006-9 Red Hat, Inc
+ * Copyright (C) 2010 Red Hat, Inc. and/or its affiliates.
*
* This work is licensed under the terms of the GNU GPL, version 2. See
* the COPYING file in the top-level directory.
@@ -16,6 +16,7 @@
#include <linux/spinlock.h>
#include <linux/pci.h>
#include <linux/interrupt.h>
+#include <linux/slab.h>
#include "irq.h"
static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
@@ -57,12 +58,10 @@ static int find_index_from_host_irq(struct kvm_assigned_dev_kernel
static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
{
struct kvm_assigned_dev_kernel *assigned_dev;
- struct kvm *kvm;
int i;
assigned_dev = container_of(work, struct kvm_assigned_dev_kernel,
interrupt_work);
- kvm = assigned_dev->kvm;
spin_lock_irq(&assigned_dev->assigned_dev_lock);
if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
@@ -315,12 +314,16 @@ static int assigned_device_enable_host_msix(struct kvm *kvm,
kvm_assigned_dev_intr, 0,
"kvm_assigned_msix_device",
(void *)dev);
- /* FIXME: free requested_irq's on failure */
if (r)
- return r;
+ goto err;
}
return 0;
+err:
+ for (i -= 1; i >= 0; i--)
+ free_irq(dev->host_msix_entries[i].vector, (void *)dev);
+ pci_disable_msix(dev->dev);
+ return r;
}
#endif
@@ -443,9 +446,6 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
struct kvm_assigned_dev_kernel *match;
unsigned long host_irq_type, guest_irq_type;
- if (!capable(CAP_SYS_RAWIO))
- return -EPERM;
-
if (!irqchip_in_kernel(kvm))
return r;
@@ -504,12 +504,12 @@ out:
static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
struct kvm_assigned_pci_dev *assigned_dev)
{
- int r = 0;
+ int r = 0, idx;
struct kvm_assigned_dev_kernel *match;
struct pci_dev *dev;
mutex_lock(&kvm->lock);
- down_read(&kvm->slots_lock);
+ idx = srcu_read_lock(&kvm->srcu);
match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
assigned_dev->assigned_dev_id);
@@ -526,7 +526,8 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
r = -ENOMEM;
goto out;
}
- dev = pci_get_bus_and_slot(assigned_dev->busnr,
+ dev = pci_get_domain_bus_and_slot(assigned_dev->segnr,
+ assigned_dev->busnr,
assigned_dev->devfn);
if (!dev) {
printk(KERN_INFO "%s: host device not found\n", __func__);
@@ -548,6 +549,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
pci_reset_function(dev);
match->assigned_dev_id = assigned_dev->assigned_dev_id;
+ match->host_segnr = assigned_dev->segnr;
match->host_busnr = assigned_dev->busnr;
match->host_devfn = assigned_dev->devfn;
match->flags = assigned_dev->flags;
@@ -573,7 +575,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
}
out:
- up_read(&kvm->slots_lock);
+ srcu_read_unlock(&kvm->srcu, idx);
mutex_unlock(&kvm->lock);
return r;
out_list_del:
@@ -585,7 +587,7 @@ out_put:
pci_dev_put(dev);
out_free:
kfree(match);
- up_read(&kvm->slots_lock);
+ srcu_read_unlock(&kvm->srcu, idx);
mutex_unlock(&kvm->lock);
return r;
}
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
index 04d69cd7049b..fc8487564d1f 100644
--- a/virt/kvm/coalesced_mmio.c
+++ b/virt/kvm/coalesced_mmio.c
@@ -2,6 +2,7 @@
* KVM coalesced MMIO
*
* Copyright (c) 2008 Bull S.A.S.
+ * Copyright 2009 Red Hat, Inc. and/or its affiliates.
*
* Author: Laurent Vivier <Laurent.Vivier@bull.net>
*
@@ -10,6 +11,7 @@
#include "iodev.h"
#include <linux/kvm_host.h>
+#include <linux/slab.h>
#include <linux/kvm.h>
#include "coalesced_mmio.h"
@@ -92,41 +94,66 @@ static const struct kvm_io_device_ops coalesced_mmio_ops = {
int kvm_coalesced_mmio_init(struct kvm *kvm)
{
struct kvm_coalesced_mmio_dev *dev;
+ struct page *page;
int ret;
+ ret = -ENOMEM;
+ page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ if (!page)
+ goto out_err;
+ kvm->coalesced_mmio_ring = page_address(page);
+
+ ret = -ENOMEM;
dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL);
if (!dev)
- return -ENOMEM;
+ goto out_free_page;
spin_lock_init(&dev->lock);
kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops);
dev->kvm = kvm;
kvm->coalesced_mmio_dev = dev;
- ret = kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &dev->dev);
+ mutex_lock(&kvm->slots_lock);
+ ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &dev->dev);
+ mutex_unlock(&kvm->slots_lock);
if (ret < 0)
- kfree(dev);
+ goto out_free_dev;
+
+ return ret;
+out_free_dev:
+ kvm->coalesced_mmio_dev = NULL;
+ kfree(dev);
+out_free_page:
+ kvm->coalesced_mmio_ring = NULL;
+ __free_page(page);
+out_err:
return ret;
}
+void kvm_coalesced_mmio_free(struct kvm *kvm)
+{
+ if (kvm->coalesced_mmio_ring)
+ free_page((unsigned long)kvm->coalesced_mmio_ring);
+}
+
int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
- struct kvm_coalesced_mmio_zone *zone)
+ struct kvm_coalesced_mmio_zone *zone)
{
struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev;
if (dev == NULL)
- return -EINVAL;
+ return -ENXIO;
- down_write(&kvm->slots_lock);
+ mutex_lock(&kvm->slots_lock);
if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) {
- up_write(&kvm->slots_lock);
+ mutex_unlock(&kvm->slots_lock);
return -ENOBUFS;
}
dev->zone[dev->nb_zones] = *zone;
dev->nb_zones++;
- up_write(&kvm->slots_lock);
+ mutex_unlock(&kvm->slots_lock);
return 0;
}
@@ -138,12 +165,12 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
struct kvm_coalesced_mmio_zone *z;
if (dev == NULL)
- return -EINVAL;
+ return -ENXIO;
- down_write(&kvm->slots_lock);
+ mutex_lock(&kvm->slots_lock);
i = dev->nb_zones;
- while(i) {
+ while (i) {
z = &dev->zone[i - 1];
/* unregister all zones
@@ -158,7 +185,7 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
i--;
}
- up_write(&kvm->slots_lock);
+ mutex_unlock(&kvm->slots_lock);
return 0;
}
diff --git a/virt/kvm/coalesced_mmio.h b/virt/kvm/coalesced_mmio.h
index 4b49f27fa31e..8a5959e3535f 100644
--- a/virt/kvm/coalesced_mmio.h
+++ b/virt/kvm/coalesced_mmio.h
@@ -1,3 +1,6 @@
+#ifndef __KVM_COALESCED_MMIO_H__
+#define __KVM_COALESCED_MMIO_H__
+
/*
* KVM coalesced MMIO
*
@@ -7,6 +10,8 @@
*
*/
+#ifdef CONFIG_KVM_MMIO
+
#define KVM_COALESCED_MMIO_ZONE_MAX 100
struct kvm_coalesced_mmio_dev {
@@ -18,7 +23,17 @@ struct kvm_coalesced_mmio_dev {
};
int kvm_coalesced_mmio_init(struct kvm *kvm);
+void kvm_coalesced_mmio_free(struct kvm *kvm);
int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
struct kvm_coalesced_mmio_zone *zone);
int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
struct kvm_coalesced_mmio_zone *zone);
+
+#else
+
+static inline int kvm_coalesced_mmio_init(struct kvm *kvm) { return 0; }
+static inline void kvm_coalesced_mmio_free(struct kvm *kvm) { }
+
+#endif
+
+#endif
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index a9d3fc6c681c..c1f1e3c62984 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -2,6 +2,7 @@
* kvm eventfd support - use eventfd objects to signal various KVM events
*
* Copyright 2009 Novell. All Rights Reserved.
+ * Copyright 2010 Red Hat, Inc. and/or its affiliates.
*
* Author:
* Gregory Haskins <ghaskins@novell.com>
@@ -30,6 +31,7 @@
#include <linux/list.h>
#include <linux/eventfd.h>
#include <linux/kernel.h>
+#include <linux/slab.h>
#include "iodev.h"
@@ -47,7 +49,6 @@ struct _irqfd {
int gsi;
struct list_head list;
poll_table pt;
- wait_queue_head_t *wqh;
wait_queue_t wait;
struct work_struct inject;
struct work_struct shutdown;
@@ -159,8 +160,6 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
poll_table *pt)
{
struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt);
-
- irqfd->wqh = wqh;
add_wait_queue(wqh, &irqfd->wait);
}
@@ -219,7 +218,6 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
events = file->f_op->poll(file, &irqfd->pt);
list_add_tail(&irqfd->list, &kvm->irqfds.items);
- spin_unlock_irq(&kvm->irqfds.lock);
/*
* Check if there was an event already pending on the eventfd
@@ -228,6 +226,8 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
if (events & POLLIN)
schedule_work(&irqfd->inject);
+ spin_unlock_irq(&kvm->irqfds.lock);
+
/*
* do not drop the file until the irqfd is fully initialized, otherwise
* we might race against the POLLHUP
@@ -463,7 +463,7 @@ static int
kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
{
int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
- struct kvm_io_bus *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus;
+ enum kvm_bus bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS;
struct _ioeventfd *p;
struct eventfd_ctx *eventfd;
int ret;
@@ -508,7 +508,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
else
p->wildcard = true;
- down_write(&kvm->slots_lock);
+ mutex_lock(&kvm->slots_lock);
/* Verify that there isnt a match already */
if (ioeventfd_check_collision(kvm, p)) {
@@ -518,18 +518,18 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
kvm_iodevice_init(&p->dev, &ioeventfd_ops);
- ret = __kvm_io_bus_register_dev(bus, &p->dev);
+ ret = kvm_io_bus_register_dev(kvm, bus_idx, &p->dev);
if (ret < 0)
goto unlock_fail;
list_add_tail(&p->list, &kvm->ioeventfds);
- up_write(&kvm->slots_lock);
+ mutex_unlock(&kvm->slots_lock);
return 0;
unlock_fail:
- up_write(&kvm->slots_lock);
+ mutex_unlock(&kvm->slots_lock);
fail:
kfree(p);
@@ -542,7 +542,7 @@ static int
kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
{
int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
- struct kvm_io_bus *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus;
+ enum kvm_bus bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS;
struct _ioeventfd *p, *tmp;
struct eventfd_ctx *eventfd;
int ret = -ENOENT;
@@ -551,7 +551,7 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
if (IS_ERR(eventfd))
return PTR_ERR(eventfd);
- down_write(&kvm->slots_lock);
+ mutex_lock(&kvm->slots_lock);
list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) {
bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH);
@@ -565,13 +565,13 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
if (!p->wildcard && p->datamatch != args->datamatch)
continue;
- __kvm_io_bus_unregister_dev(bus, &p->dev);
+ kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
ioeventfd_release(p);
ret = 0;
break;
}
- up_write(&kvm->slots_lock);
+ mutex_unlock(&kvm->slots_lock);
eventfd_ctx_put(eventfd);
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 38a2d20b89de..0b9df8303dcf 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -1,5 +1,6 @@
/*
* Copyright (C) 2001 MandrakeSoft S.A.
+ * Copyright 2010 Red Hat, Inc. and/or its affiliates.
*
* MandrakeSoft S.A.
* 43, rue d'Aboukir
@@ -33,6 +34,7 @@
#include <linux/smp.h>
#include <linux/hrtimer.h>
#include <linux/io.h>
+#include <linux/slab.h>
#include <asm/processor.h>
#include <asm/page.h>
#include <asm/current.h>
@@ -100,6 +102,19 @@ static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
return injected;
}
+static void update_handled_vectors(struct kvm_ioapic *ioapic)
+{
+ DECLARE_BITMAP(handled_vectors, 256);
+ int i;
+
+ memset(handled_vectors, 0, sizeof(handled_vectors));
+ for (i = 0; i < IOAPIC_NUM_PINS; ++i)
+ __set_bit(ioapic->redirtbl[i].fields.vector, handled_vectors);
+ memcpy(ioapic->handled_vectors, handled_vectors,
+ sizeof(handled_vectors));
+ smp_wmb();
+}
+
static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
{
unsigned index;
@@ -134,9 +149,10 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
e->bits |= (u32) val;
e->fields.remote_irr = 0;
}
+ update_handled_vectors(ioapic);
mask_after = e->fields.mask;
if (mask_before != mask_after)
- kvm_fire_mask_notifiers(ioapic->kvm, index, mask_after);
+ kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after);
if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG
&& ioapic->irr & (1 << index))
ioapic_service(ioapic, index);
@@ -177,12 +193,13 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
{
- u32 old_irr = ioapic->irr;
+ u32 old_irr;
u32 mask = 1 << irq;
union kvm_ioapic_redirect_entry entry;
int ret = 1;
- mutex_lock(&ioapic->lock);
+ spin_lock(&ioapic->lock);
+ old_irr = ioapic->irr;
if (irq >= 0 && irq < IOAPIC_NUM_PINS) {
entry = ioapic->redirtbl[irq];
level ^= entry.fields.polarity;
@@ -199,7 +216,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
}
trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
}
- mutex_unlock(&ioapic->lock);
+ spin_unlock(&ioapic->lock);
return ret;
}
@@ -223,9 +240,9 @@ static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector,
* is dropped it will be put into irr and will be delivered
* after ack notifier returns.
*/
- mutex_unlock(&ioapic->lock);
+ spin_unlock(&ioapic->lock);
kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i);
- mutex_lock(&ioapic->lock);
+ spin_lock(&ioapic->lock);
if (trigger_mode != IOAPIC_LEVEL_TRIG)
continue;
@@ -241,9 +258,12 @@ void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode)
{
struct kvm_ioapic *ioapic = kvm->arch.vioapic;
- mutex_lock(&ioapic->lock);
+ smp_rmb();
+ if (!test_bit(vector, ioapic->handled_vectors))
+ return;
+ spin_lock(&ioapic->lock);
__kvm_ioapic_update_eoi(ioapic, vector, trigger_mode);
- mutex_unlock(&ioapic->lock);
+ spin_unlock(&ioapic->lock);
}
static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev)
@@ -269,7 +289,7 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
ASSERT(!(addr & 0xf)); /* check alignment */
addr &= 0xff;
- mutex_lock(&ioapic->lock);
+ spin_lock(&ioapic->lock);
switch (addr) {
case IOAPIC_REG_SELECT:
result = ioapic->ioregsel;
@@ -283,7 +303,7 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
result = 0;
break;
}
- mutex_unlock(&ioapic->lock);
+ spin_unlock(&ioapic->lock);
switch (len) {
case 8:
@@ -320,7 +340,7 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
}
addr &= 0xff;
- mutex_lock(&ioapic->lock);
+ spin_lock(&ioapic->lock);
switch (addr) {
case IOAPIC_REG_SELECT:
ioapic->ioregsel = data;
@@ -338,7 +358,7 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
default:
break;
}
- mutex_unlock(&ioapic->lock);
+ spin_unlock(&ioapic->lock);
return 0;
}
@@ -352,6 +372,7 @@ void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
ioapic->ioregsel = 0;
ioapic->irr = 0;
ioapic->id = 0;
+ update_handled_vectors(ioapic);
}
static const struct kvm_io_device_ops ioapic_mmio_ops = {
@@ -367,27 +388,42 @@ int kvm_ioapic_init(struct kvm *kvm)
ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL);
if (!ioapic)
return -ENOMEM;
- mutex_init(&ioapic->lock);
+ spin_lock_init(&ioapic->lock);
kvm->arch.vioapic = ioapic;
kvm_ioapic_reset(ioapic);
kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
ioapic->kvm = kvm;
- ret = kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &ioapic->dev);
- if (ret < 0)
+ mutex_lock(&kvm->slots_lock);
+ ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
+ mutex_unlock(&kvm->slots_lock);
+ if (ret < 0) {
+ kvm->arch.vioapic = NULL;
kfree(ioapic);
+ }
return ret;
}
+void kvm_ioapic_destroy(struct kvm *kvm)
+{
+ struct kvm_ioapic *ioapic = kvm->arch.vioapic;
+
+ if (ioapic) {
+ kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
+ kvm->arch.vioapic = NULL;
+ kfree(ioapic);
+ }
+}
+
int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
{
struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
if (!ioapic)
return -EINVAL;
- mutex_lock(&ioapic->lock);
+ spin_lock(&ioapic->lock);
memcpy(state, ioapic, sizeof(struct kvm_ioapic_state));
- mutex_unlock(&ioapic->lock);
+ spin_unlock(&ioapic->lock);
return 0;
}
@@ -397,8 +433,9 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
if (!ioapic)
return -EINVAL;
- mutex_lock(&ioapic->lock);
+ spin_lock(&ioapic->lock);
memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
- mutex_unlock(&ioapic->lock);
+ update_handled_vectors(ioapic);
+ spin_unlock(&ioapic->lock);
return 0;
}
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index 419c43b667ab..0b190c34ccc3 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -45,7 +45,8 @@ struct kvm_ioapic {
struct kvm_io_device dev;
struct kvm *kvm;
void (*ack_notifier)(void *opaque, int irq);
- struct mutex lock;
+ spinlock_t lock;
+ DECLARE_BITMAP(handled_vectors, 256);
};
#ifdef DEBUG
@@ -71,6 +72,7 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode);
int kvm_ioapic_init(struct kvm *kvm);
+void kvm_ioapic_destroy(struct kvm *kvm);
int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index 15147583abd1..62a9caf0563c 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -16,6 +16,8 @@
*
* Copyright (C) 2006-2008 Intel Corporation
* Copyright IBM Corporation, 2008
+ * Copyright 2010 Red Hat, Inc. and/or its affiliates.
+ *
* Author: Allen M. Kay <allen.m.kay@intel.com>
* Author: Weidong Han <weidong.han@intel.com>
* Author: Ben-Ami Yassour <benami@il.ibm.com>
@@ -32,12 +34,30 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm);
static void kvm_iommu_put_pages(struct kvm *kvm,
gfn_t base_gfn, unsigned long npages);
-int kvm_iommu_map_pages(struct kvm *kvm,
- gfn_t base_gfn, unsigned long npages)
+static pfn_t kvm_pin_pages(struct kvm *kvm, struct kvm_memory_slot *slot,
+ gfn_t gfn, unsigned long size)
+{
+ gfn_t end_gfn;
+ pfn_t pfn;
+
+ pfn = gfn_to_pfn_memslot(kvm, slot, gfn);
+ end_gfn = gfn + (size >> PAGE_SHIFT);
+ gfn += 1;
+
+ if (is_error_pfn(pfn))
+ return pfn;
+
+ while (gfn < end_gfn)
+ gfn_to_pfn_memslot(kvm, slot, gfn++);
+
+ return pfn;
+}
+
+int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
{
- gfn_t gfn = base_gfn;
+ gfn_t gfn, end_gfn;
pfn_t pfn;
- int i, r = 0;
+ int r = 0;
struct iommu_domain *domain = kvm->arch.iommu_domain;
int flags;
@@ -45,44 +65,79 @@ int kvm_iommu_map_pages(struct kvm *kvm,
if (!domain)
return 0;
+ gfn = slot->base_gfn;
+ end_gfn = gfn + slot->npages;
+
flags = IOMMU_READ | IOMMU_WRITE;
if (kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY)
flags |= IOMMU_CACHE;
- for (i = 0; i < npages; i++) {
- /* check if already mapped */
- if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn)))
+
+ while (gfn < end_gfn) {
+ unsigned long page_size;
+
+ /* Check if already mapped */
+ if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) {
+ gfn += 1;
continue;
+ }
+
+ /* Get the page size we could use to map */
+ page_size = kvm_host_page_size(kvm, gfn);
+
+ /* Make sure the page_size does not exceed the memslot */
+ while ((gfn + (page_size >> PAGE_SHIFT)) > end_gfn)
+ page_size >>= 1;
+
+ /* Make sure gfn is aligned to the page size we want to map */
+ while ((gfn << PAGE_SHIFT) & (page_size - 1))
+ page_size >>= 1;
+
+ /*
+ * Pin all pages we are about to map in memory. This is
+ * important because we unmap and unpin in 4kb steps later.
+ */
+ pfn = kvm_pin_pages(kvm, slot, gfn, page_size);
+ if (is_error_pfn(pfn)) {
+ gfn += 1;
+ continue;
+ }
- pfn = gfn_to_pfn(kvm, gfn);
- r = iommu_map_range(domain,
- gfn_to_gpa(gfn),
- pfn_to_hpa(pfn),
- PAGE_SIZE, flags);
+ /* Map into IO address space */
+ r = iommu_map(domain, gfn_to_gpa(gfn), pfn_to_hpa(pfn),
+ get_order(page_size), flags);
if (r) {
printk(KERN_ERR "kvm_iommu_map_address:"
- "iommu failed to map pfn=%lx\n", pfn);
+ "iommu failed to map pfn=%llx\n", pfn);
goto unmap_pages;
}
- gfn++;
+
+ gfn += page_size >> PAGE_SHIFT;
+
+
}
+
return 0;
unmap_pages:
- kvm_iommu_put_pages(kvm, base_gfn, i);
+ kvm_iommu_put_pages(kvm, slot->base_gfn, gfn);
return r;
}
static int kvm_iommu_map_memslots(struct kvm *kvm)
{
- int i, r = 0;
+ int i, idx, r = 0;
+ struct kvm_memslots *slots;
+
+ idx = srcu_read_lock(&kvm->srcu);
+ slots = kvm_memslots(kvm);
- for (i = 0; i < kvm->nmemslots; i++) {
- r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
- kvm->memslots[i].npages);
+ for (i = 0; i < slots->nmemslots; i++) {
+ r = kvm_iommu_map_pages(kvm, &slots->memslots[i]);
if (r)
break;
}
+ srcu_read_unlock(&kvm->srcu, idx);
return r;
}
@@ -104,7 +159,8 @@ int kvm_assign_device(struct kvm *kvm,
r = iommu_attach_device(domain, &pdev->dev);
if (r) {
- printk(KERN_ERR "assign device %x:%x.%x failed",
+ printk(KERN_ERR "assign device %x:%x:%x.%x failed",
+ pci_domain_nr(pdev->bus),
pdev->bus->number,
PCI_SLOT(pdev->devfn),
PCI_FUNC(pdev->devfn));
@@ -125,7 +181,8 @@ int kvm_assign_device(struct kvm *kvm,
goto out_unmap;
}
- printk(KERN_DEBUG "assign device: host bdf = %x:%x:%x\n",
+ printk(KERN_DEBUG "assign device %x:%x:%x.%x\n",
+ assigned_dev->host_segnr,
assigned_dev->host_busnr,
PCI_SLOT(assigned_dev->host_devfn),
PCI_FUNC(assigned_dev->host_devfn));
@@ -152,7 +209,8 @@ int kvm_deassign_device(struct kvm *kvm,
iommu_detach_device(domain, &pdev->dev);
- printk(KERN_DEBUG "deassign device: host bdf = %x:%x:%x\n",
+ printk(KERN_DEBUG "deassign device %x:%x:%x.%x\n",
+ assigned_dev->host_segnr,
assigned_dev->host_busnr,
PCI_SLOT(assigned_dev->host_devfn),
PCI_FUNC(assigned_dev->host_devfn));
@@ -184,37 +242,62 @@ out_unmap:
return r;
}
+static void kvm_unpin_pages(struct kvm *kvm, pfn_t pfn, unsigned long npages)
+{
+ unsigned long i;
+
+ for (i = 0; i < npages; ++i)
+ kvm_release_pfn_clean(pfn + i);
+}
+
static void kvm_iommu_put_pages(struct kvm *kvm,
gfn_t base_gfn, unsigned long npages)
{
- gfn_t gfn = base_gfn;
+ struct iommu_domain *domain;
+ gfn_t end_gfn, gfn;
pfn_t pfn;
- struct iommu_domain *domain = kvm->arch.iommu_domain;
- unsigned long i;
u64 phys;
+ domain = kvm->arch.iommu_domain;
+ end_gfn = base_gfn + npages;
+ gfn = base_gfn;
+
/* check if iommu exists and in use */
if (!domain)
return;
- for (i = 0; i < npages; i++) {
+ while (gfn < end_gfn) {
+ unsigned long unmap_pages;
+ int order;
+
+ /* Get physical address */
phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn));
- pfn = phys >> PAGE_SHIFT;
- kvm_release_pfn_clean(pfn);
- gfn++;
- }
+ pfn = phys >> PAGE_SHIFT;
- iommu_unmap_range(domain, gfn_to_gpa(base_gfn), PAGE_SIZE * npages);
+ /* Unmap address from IO address space */
+ order = iommu_unmap(domain, gfn_to_gpa(gfn), 0);
+ unmap_pages = 1ULL << order;
+
+ /* Unpin all pages we just unmapped to not leak any memory */
+ kvm_unpin_pages(kvm, pfn, unmap_pages);
+
+ gfn += unmap_pages;
+ }
}
static int kvm_iommu_unmap_memslots(struct kvm *kvm)
{
- int i;
+ int i, idx;
+ struct kvm_memslots *slots;
+
+ idx = srcu_read_lock(&kvm->srcu);
+ slots = kvm_memslots(kvm);
- for (i = 0; i < kvm->nmemslots; i++) {
- kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
- kvm->memslots[i].npages);
+ for (i = 0; i < slots->nmemslots; i++) {
+ kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn,
+ slots->memslots[i].npages);
}
+ srcu_read_unlock(&kvm->srcu, idx);
return 0;
}
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 9fd5b3ebc517..369e38010ad5 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -17,9 +17,11 @@
* Authors:
* Yaozu (Eddie) Dong <Eddie.dong@intel.com>
*
+ * Copyright 2010 Red Hat, Inc. and/or its affilates.
*/
#include <linux/kvm_host.h>
+#include <linux/slab.h>
#include <trace/events/kvm.h>
#include <asm/msidef.h>
@@ -98,7 +100,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
if (r < 0)
r = 0;
r += kvm_apic_set_irq(vcpu, irq);
- } else {
+ } else if (kvm_lapic_enabled(vcpu)) {
if (!lowest)
lowest = vcpu;
else if (kvm_apic_compare_prio(vcpu, lowest) < 0)
@@ -277,15 +279,19 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
synchronize_rcu();
}
-void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
+void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
+ bool mask)
{
struct kvm_irq_mask_notifier *kimn;
struct hlist_node *n;
+ int gsi;
rcu_read_lock();
- hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link)
- if (kimn->irq == irq)
- kimn->func(kimn, mask);
+ gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
+ if (gsi != -1)
+ hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link)
+ if (kimn->irq == gsi)
+ kimn->func(kimn, mask);
rcu_read_unlock();
}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a944be392d6e..5186e728c53e 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -5,6 +5,7 @@
* machines without emulation or binary translation.
*
* Copyright (C) 2006 Qumranet, Inc.
+ * Copyright 2010 Red Hat, Inc. and/or its affilates.
*
* Authors:
* Avi Kivity <avi@qumranet.com>
@@ -22,7 +23,6 @@
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/percpu.h>
-#include <linux/gfp.h>
#include <linux/mm.h>
#include <linux/miscdevice.h>
#include <linux/vmalloc.h>
@@ -44,6 +44,9 @@
#include <linux/bitops.h>
#include <linux/spinlock.h>
#include <linux/compat.h>
+#include <linux/srcu.h>
+#include <linux/hugetlb.h>
+#include <linux/slab.h>
#include <asm/processor.h>
#include <asm/io.h>
@@ -51,9 +54,7 @@
#include <asm/pgtable.h>
#include <asm-generic/bitops/le.h>
-#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
#include "coalesced_mmio.h"
-#endif
#define CREATE_TRACE_POINTS
#include <trace/events/kvm.h>
@@ -86,10 +87,18 @@ static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
static int hardware_enable_all(void);
static void hardware_disable_all(void);
+static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
+
static bool kvm_rebooting;
static bool largepages_enabled = true;
+static struct page *hwpoison_page;
+static pfn_t hwpoison_pfn;
+
+static struct page *fault_page;
+static pfn_t fault_pfn;
+
inline int kvm_is_mmio_pfn(pfn_t pfn)
{
if (pfn_valid(pfn)) {
@@ -136,10 +145,10 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
zalloc_cpumask_var(&cpus, GFP_ATOMIC);
- spin_lock(&kvm->requests_lock);
+ raw_spin_lock(&kvm->requests_lock);
me = smp_processor_id();
kvm_for_each_vcpu(i, vcpu, kvm) {
- if (test_and_set_bit(req, &vcpu->requests))
+ if (kvm_make_check_request(req, vcpu))
continue;
cpu = vcpu->cpu;
if (cpus != NULL && cpu != -1 && cpu != me)
@@ -151,7 +160,7 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
smp_call_function_many(cpus, ack_flush, NULL, 1);
else
called = false;
- spin_unlock(&kvm->requests_lock);
+ raw_spin_unlock(&kvm->requests_lock);
free_cpumask_var(cpus);
return called;
}
@@ -215,7 +224,7 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
unsigned long address)
{
struct kvm *kvm = mmu_notifier_to_kvm(mn);
- int need_tlb_flush;
+ int need_tlb_flush, idx;
/*
* When ->invalidate_page runs, the linux pte has been zapped
@@ -235,10 +244,12 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
* pte after kvm_unmap_hva returned, without noticing the page
* is going to be freed.
*/
+ idx = srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock);
kvm->mmu_notifier_seq++;
need_tlb_flush = kvm_unmap_hva(kvm, address);
spin_unlock(&kvm->mmu_lock);
+ srcu_read_unlock(&kvm->srcu, idx);
/* we've to flush the tlb before the pages can be freed */
if (need_tlb_flush)
@@ -252,11 +263,14 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
pte_t pte)
{
struct kvm *kvm = mmu_notifier_to_kvm(mn);
+ int idx;
+ idx = srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock);
kvm->mmu_notifier_seq++;
kvm_set_spte_hva(kvm, address, pte);
spin_unlock(&kvm->mmu_lock);
+ srcu_read_unlock(&kvm->srcu, idx);
}
static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
@@ -265,8 +279,9 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
unsigned long end)
{
struct kvm *kvm = mmu_notifier_to_kvm(mn);
- int need_tlb_flush = 0;
+ int need_tlb_flush = 0, idx;
+ idx = srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock);
/*
* The count increase must become visible at unlock time as no
@@ -277,6 +292,7 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
for (; start < end; start += PAGE_SIZE)
need_tlb_flush |= kvm_unmap_hva(kvm, start);
spin_unlock(&kvm->mmu_lock);
+ srcu_read_unlock(&kvm->srcu, idx);
/* we've to flush the tlb before the pages can be freed */
if (need_tlb_flush)
@@ -314,11 +330,13 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
unsigned long address)
{
struct kvm *kvm = mmu_notifier_to_kvm(mn);
- int young;
+ int young, idx;
+ idx = srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock);
young = kvm_age_hva(kvm, address);
spin_unlock(&kvm->mmu_lock);
+ srcu_read_unlock(&kvm->srcu, idx);
if (young)
kvm_flush_remote_tlbs(kvm);
@@ -330,7 +348,11 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
struct mm_struct *mm)
{
struct kvm *kvm = mmu_notifier_to_kvm(mn);
+ int idx;
+
+ idx = srcu_read_lock(&kvm->srcu);
kvm_arch_flush_shadow(kvm);
+ srcu_read_unlock(&kvm->srcu, idx);
}
static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
@@ -341,15 +363,26 @@ static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
.change_pte = kvm_mmu_notifier_change_pte,
.release = kvm_mmu_notifier_release,
};
+
+static int kvm_init_mmu_notifier(struct kvm *kvm)
+{
+ kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops;
+ return mmu_notifier_register(&kvm->mmu_notifier, current->mm);
+}
+
+#else /* !(CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER) */
+
+static int kvm_init_mmu_notifier(struct kvm *kvm)
+{
+ return 0;
+}
+
#endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */
static struct kvm *kvm_create_vm(void)
{
- int r = 0;
+ int r = 0, i;
struct kvm *kvm = kvm_arch_create_vm();
-#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
- struct page *page;
-#endif
if (IS_ERR(kvm))
goto out;
@@ -363,55 +396,48 @@ static struct kvm *kvm_create_vm(void)
INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
#endif
-#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
- page = alloc_page(GFP_KERNEL | __GFP_ZERO);
- if (!page) {
- r = -ENOMEM;
+ r = -ENOMEM;
+ kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
+ if (!kvm->memslots)
goto out_err;
- }
- kvm->coalesced_mmio_ring =
- (struct kvm_coalesced_mmio_ring *)page_address(page);
-#endif
-
-#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
- {
- kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops;
- r = mmu_notifier_register(&kvm->mmu_notifier, current->mm);
- if (r) {
-#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
- put_page(page);
-#endif
+ if (init_srcu_struct(&kvm->srcu))
+ goto out_err;
+ for (i = 0; i < KVM_NR_BUSES; i++) {
+ kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus),
+ GFP_KERNEL);
+ if (!kvm->buses[i]) {
+ cleanup_srcu_struct(&kvm->srcu);
goto out_err;
}
}
-#endif
+
+ r = kvm_init_mmu_notifier(kvm);
+ if (r) {
+ cleanup_srcu_struct(&kvm->srcu);
+ goto out_err;
+ }
kvm->mm = current->mm;
atomic_inc(&kvm->mm->mm_count);
spin_lock_init(&kvm->mmu_lock);
- spin_lock_init(&kvm->requests_lock);
- kvm_io_bus_init(&kvm->pio_bus);
+ raw_spin_lock_init(&kvm->requests_lock);
kvm_eventfd_init(kvm);
mutex_init(&kvm->lock);
mutex_init(&kvm->irq_lock);
- kvm_io_bus_init(&kvm->mmio_bus);
- init_rwsem(&kvm->slots_lock);
+ mutex_init(&kvm->slots_lock);
atomic_set(&kvm->users_count, 1);
spin_lock(&kvm_lock);
list_add(&kvm->vm_list, &vm_list);
spin_unlock(&kvm_lock);
-#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
- kvm_coalesced_mmio_init(kvm);
-#endif
out:
return kvm;
-#if defined(KVM_COALESCED_MMIO_PAGE_OFFSET) || \
- (defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER))
out_err:
hardware_disable_all();
-#endif
out_err_nodisable:
+ for (i = 0; i < KVM_NR_BUSES; i++)
+ kfree(kvm->buses[i]);
+ kfree(kvm->memslots);
kfree(kvm);
return ERR_PTR(r);
}
@@ -446,13 +472,17 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
void kvm_free_physmem(struct kvm *kvm)
{
int i;
+ struct kvm_memslots *slots = kvm->memslots;
- for (i = 0; i < kvm->nmemslots; ++i)
- kvm_free_physmem_slot(&kvm->memslots[i], NULL);
+ for (i = 0; i < slots->nmemslots; ++i)
+ kvm_free_physmem_slot(&slots->memslots[i], NULL);
+
+ kfree(kvm->memslots);
}
static void kvm_destroy_vm(struct kvm *kvm)
{
+ int i;
struct mm_struct *mm = kvm->mm;
kvm_arch_sync_events(kvm);
@@ -460,12 +490,9 @@ static void kvm_destroy_vm(struct kvm *kvm)
list_del(&kvm->vm_list);
spin_unlock(&kvm_lock);
kvm_free_irq_routing(kvm);
- kvm_io_bus_destroy(&kvm->pio_bus);
- kvm_io_bus_destroy(&kvm->mmio_bus);
-#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
- if (kvm->coalesced_mmio_ring != NULL)
- free_page((unsigned long)kvm->coalesced_mmio_ring);
-#endif
+ for (i = 0; i < KVM_NR_BUSES; i++)
+ kvm_io_bus_destroy(kvm->buses[i]);
+ kvm_coalesced_mmio_free(kvm);
#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
#else
@@ -512,12 +539,13 @@ int __kvm_set_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
int user_alloc)
{
- int r;
+ int r, flush_shadow = 0;
gfn_t base_gfn;
unsigned long npages;
unsigned long i;
struct kvm_memory_slot *memslot;
struct kvm_memory_slot old, new;
+ struct kvm_memslots *slots, *old_memslots;
r = -EINVAL;
/* General sanity checks */
@@ -532,15 +560,20 @@ int __kvm_set_memory_region(struct kvm *kvm,
if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
goto out;
- memslot = &kvm->memslots[mem->slot];
+ memslot = &kvm->memslots->memslots[mem->slot];
base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
npages = mem->memory_size >> PAGE_SHIFT;
+ r = -EINVAL;
+ if (npages > KVM_MEM_MAX_NR_PAGES)
+ goto out;
+
if (!npages)
mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES;
new = old = *memslot;
+ new.id = mem->slot;
new.base_gfn = base_gfn;
new.npages = npages;
new.flags = mem->flags;
@@ -553,7 +586,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
/* Check for overlaps */
r = -EEXIST;
for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
- struct kvm_memory_slot *s = &kvm->memslots[i];
+ struct kvm_memory_slot *s = &kvm->memslots->memslots[i];
if (s == memslot || !s->npages)
continue;
@@ -571,7 +604,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
/* Allocate if a slot is being created */
#ifndef CONFIG_S390
if (npages && !new.rmap) {
- new.rmap = vmalloc(npages * sizeof(struct page *));
+ new.rmap = vmalloc(npages * sizeof(*new.rmap));
if (!new.rmap)
goto out_free;
@@ -579,15 +612,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
memset(new.rmap, 0, npages * sizeof(*new.rmap));
new.user_alloc = user_alloc;
- /*
- * hva_to_rmmap() serialzies with the mmu_lock and to be
- * safe it has to ignore memslots with !user_alloc &&
- * !userspace_addr.
- */
- if (user_alloc)
- new.userspace_addr = mem->userspace_addr;
- else
- new.userspace_addr = 0;
+ new.userspace_addr = mem->userspace_addr;
}
if (!npages)
goto skip_lpage;
@@ -604,9 +629,9 @@ int __kvm_set_memory_region(struct kvm *kvm,
if (new.lpage_info[i])
continue;
- lpages = 1 + (base_gfn + npages - 1) /
- KVM_PAGES_PER_HPAGE(level);
- lpages -= base_gfn / KVM_PAGES_PER_HPAGE(level);
+ lpages = 1 + ((base_gfn + npages - 1)
+ >> KVM_HPAGE_GFN_SHIFT(level));
+ lpages -= base_gfn >> KVM_HPAGE_GFN_SHIFT(level);
new.lpage_info[i] = vmalloc(lpages * sizeof(*new.lpage_info[i]));
@@ -616,9 +641,9 @@ int __kvm_set_memory_region(struct kvm *kvm,
memset(new.lpage_info[i], 0,
lpages * sizeof(*new.lpage_info[i]));
- if (base_gfn % KVM_PAGES_PER_HPAGE(level))
+ if (base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1))
new.lpage_info[i][0].write_count = 1;
- if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE(level))
+ if ((base_gfn+npages) & (KVM_PAGES_PER_HPAGE(level) - 1))
new.lpage_info[i][lpages - 1].write_count = 1;
ugfn = new.userspace_addr >> PAGE_SHIFT;
/*
@@ -636,14 +661,15 @@ skip_lpage:
/* Allocate page dirty bitmap if needed */
if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) {
- unsigned dirty_bytes = ALIGN(npages, BITS_PER_LONG) / 8;
+ unsigned long dirty_bytes = kvm_dirty_bitmap_bytes(&new);
new.dirty_bitmap = vmalloc(dirty_bytes);
if (!new.dirty_bitmap)
goto out_free;
memset(new.dirty_bitmap, 0, dirty_bytes);
+ /* destroy any largepage mappings for dirty tracking */
if (old.npages)
- kvm_arch_flush_shadow(kvm);
+ flush_shadow = 1;
}
#else /* not defined CONFIG_S390 */
new.user_alloc = user_alloc;
@@ -651,36 +677,72 @@ skip_lpage:
new.userspace_addr = mem->userspace_addr;
#endif /* not defined CONFIG_S390 */
- if (!npages)
+ if (!npages) {
+ r = -ENOMEM;
+ slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
+ if (!slots)
+ goto out_free;
+ memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
+ if (mem->slot >= slots->nmemslots)
+ slots->nmemslots = mem->slot + 1;
+ slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID;
+
+ old_memslots = kvm->memslots;
+ rcu_assign_pointer(kvm->memslots, slots);
+ synchronize_srcu_expedited(&kvm->srcu);
+ /* From this point no new shadow pages pointing to a deleted
+ * memslot will be created.
+ *
+ * validation of sp->gfn happens in:
+ * - gfn_to_hva (kvm_read_guest, gfn_to_pfn)
+ * - kvm_is_visible_gfn (mmu_check_roots)
+ */
kvm_arch_flush_shadow(kvm);
+ kfree(old_memslots);
+ }
- spin_lock(&kvm->mmu_lock);
- if (mem->slot >= kvm->nmemslots)
- kvm->nmemslots = mem->slot + 1;
-
- *memslot = new;
- spin_unlock(&kvm->mmu_lock);
-
- r = kvm_arch_set_memory_region(kvm, mem, old, user_alloc);
- if (r) {
- spin_lock(&kvm->mmu_lock);
- *memslot = old;
- spin_unlock(&kvm->mmu_lock);
+ r = kvm_arch_prepare_memory_region(kvm, &new, old, mem, user_alloc);
+ if (r)
goto out_free;
- }
- kvm_free_physmem_slot(&old, npages ? &new : NULL);
- /* Slot deletion case: we have to update the current slot */
- spin_lock(&kvm->mmu_lock);
- if (!npages)
- *memslot = old;
- spin_unlock(&kvm->mmu_lock);
#ifdef CONFIG_DMAR
/* map the pages in iommu page table */
- r = kvm_iommu_map_pages(kvm, base_gfn, npages);
- if (r)
- goto out;
+ if (npages) {
+ r = kvm_iommu_map_pages(kvm, &new);
+ if (r)
+ goto out_free;
+ }
#endif
+
+ r = -ENOMEM;
+ slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
+ if (!slots)
+ goto out_free;
+ memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
+ if (mem->slot >= slots->nmemslots)
+ slots->nmemslots = mem->slot + 1;
+
+ /* actual memory is freed via old in kvm_free_physmem_slot below */
+ if (!npages) {
+ new.rmap = NULL;
+ new.dirty_bitmap = NULL;
+ for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i)
+ new.lpage_info[i] = NULL;
+ }
+
+ slots->memslots[mem->slot] = new;
+ old_memslots = kvm->memslots;
+ rcu_assign_pointer(kvm->memslots, slots);
+ synchronize_srcu_expedited(&kvm->srcu);
+
+ kvm_arch_commit_memory_region(kvm, mem, old, user_alloc);
+
+ kvm_free_physmem_slot(&old, &new);
+ kfree(old_memslots);
+
+ if (flush_shadow)
+ kvm_arch_flush_shadow(kvm);
+
return 0;
out_free:
@@ -697,9 +759,9 @@ int kvm_set_memory_region(struct kvm *kvm,
{
int r;
- down_write(&kvm->slots_lock);
+ mutex_lock(&kvm->slots_lock);
r = __kvm_set_memory_region(kvm, mem, user_alloc);
- up_write(&kvm->slots_lock);
+ mutex_unlock(&kvm->slots_lock);
return r;
}
EXPORT_SYMBOL_GPL(kvm_set_memory_region);
@@ -719,19 +781,19 @@ int kvm_get_dirty_log(struct kvm *kvm,
{
struct kvm_memory_slot *memslot;
int r, i;
- int n;
+ unsigned long n;
unsigned long any = 0;
r = -EINVAL;
if (log->slot >= KVM_MEMORY_SLOTS)
goto out;
- memslot = &kvm->memslots[log->slot];
+ memslot = &kvm->memslots->memslots[log->slot];
r = -ENOENT;
if (!memslot->dirty_bitmap)
goto out;
- n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
+ n = kvm_dirty_bitmap_bytes(memslot);
for (i = 0; !any && i < n/sizeof(long); ++i)
any = memslot->dirty_bitmap[i];
@@ -756,16 +818,28 @@ EXPORT_SYMBOL_GPL(kvm_disable_largepages);
int is_error_page(struct page *page)
{
- return page == bad_page;
+ return page == bad_page || page == hwpoison_page || page == fault_page;
}
EXPORT_SYMBOL_GPL(is_error_page);
int is_error_pfn(pfn_t pfn)
{
- return pfn == bad_pfn;
+ return pfn == bad_pfn || pfn == hwpoison_pfn || pfn == fault_pfn;
}
EXPORT_SYMBOL_GPL(is_error_pfn);
+int is_hwpoison_pfn(pfn_t pfn)
+{
+ return pfn == hwpoison_pfn;
+}
+EXPORT_SYMBOL_GPL(is_hwpoison_pfn);
+
+int is_fault_pfn(pfn_t pfn)
+{
+ return pfn == fault_pfn;
+}
+EXPORT_SYMBOL_GPL(is_fault_pfn);
+
static inline unsigned long bad_hva(void)
{
return PAGE_OFFSET;
@@ -777,12 +851,13 @@ int kvm_is_error_hva(unsigned long addr)
}
EXPORT_SYMBOL_GPL(kvm_is_error_hva);
-struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn)
+struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
{
int i;
+ struct kvm_memslots *slots = kvm_memslots(kvm);
- for (i = 0; i < kvm->nmemslots; ++i) {
- struct kvm_memory_slot *memslot = &kvm->memslots[i];
+ for (i = 0; i < slots->nmemslots; ++i) {
+ struct kvm_memory_slot *memslot = &slots->memslots[i];
if (gfn >= memslot->base_gfn
&& gfn < memslot->base_gfn + memslot->npages)
@@ -790,21 +865,18 @@ struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn)
}
return NULL;
}
-EXPORT_SYMBOL_GPL(gfn_to_memslot_unaliased);
-
-struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
-{
- gfn = unalias_gfn(kvm, gfn);
- return gfn_to_memslot_unaliased(kvm, gfn);
-}
+EXPORT_SYMBOL_GPL(gfn_to_memslot);
int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
{
int i;
+ struct kvm_memslots *slots = kvm_memslots(kvm);
- gfn = unalias_gfn(kvm, gfn);
for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
- struct kvm_memory_slot *memslot = &kvm->memslots[i];
+ struct kvm_memory_slot *memslot = &slots->memslots[i];
+
+ if (memslot->flags & KVM_MEMSLOT_INVALID)
+ continue;
if (gfn >= memslot->base_gfn
&& gfn < memslot->base_gfn + memslot->npages)
@@ -814,46 +886,90 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
}
EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);
+unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn)
+{
+ struct vm_area_struct *vma;
+ unsigned long addr, size;
+
+ size = PAGE_SIZE;
+
+ addr = gfn_to_hva(kvm, gfn);
+ if (kvm_is_error_hva(addr))
+ return PAGE_SIZE;
+
+ down_read(&current->mm->mmap_sem);
+ vma = find_vma(current->mm, addr);
+ if (!vma)
+ goto out;
+
+ size = vma_kernel_pagesize(vma);
+
+out:
+ up_read(&current->mm->mmap_sem);
+
+ return size;
+}
+
+int memslot_id(struct kvm *kvm, gfn_t gfn)
+{
+ int i;
+ struct kvm_memslots *slots = kvm_memslots(kvm);
+ struct kvm_memory_slot *memslot = NULL;
+
+ for (i = 0; i < slots->nmemslots; ++i) {
+ memslot = &slots->memslots[i];
+
+ if (gfn >= memslot->base_gfn
+ && gfn < memslot->base_gfn + memslot->npages)
+ break;
+ }
+
+ return memslot - slots->memslots;
+}
+
+static unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
+{
+ return slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE;
+}
+
unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
{
struct kvm_memory_slot *slot;
- gfn = unalias_gfn(kvm, gfn);
- slot = gfn_to_memslot_unaliased(kvm, gfn);
- if (!slot)
+ slot = gfn_to_memslot(kvm, gfn);
+ if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
return bad_hva();
- return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
+ return gfn_to_hva_memslot(slot, gfn);
}
EXPORT_SYMBOL_GPL(gfn_to_hva);
-pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
+static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr)
{
struct page *page[1];
- unsigned long addr;
int npages;
pfn_t pfn;
might_sleep();
- addr = gfn_to_hva(kvm, gfn);
- if (kvm_is_error_hva(addr)) {
- get_page(bad_page);
- return page_to_pfn(bad_page);
- }
-
npages = get_user_pages_fast(addr, 1, 1, page);
if (unlikely(npages != 1)) {
struct vm_area_struct *vma;
down_read(&current->mm->mmap_sem);
+ if (is_hwpoison_address(addr)) {
+ up_read(&current->mm->mmap_sem);
+ get_page(hwpoison_page);
+ return page_to_pfn(hwpoison_page);
+ }
+
vma = find_vma(current->mm, addr);
if (vma == NULL || addr < vma->vm_start ||
!(vma->vm_flags & VM_PFNMAP)) {
up_read(&current->mm->mmap_sem);
- get_page(bad_page);
- return page_to_pfn(bad_page);
+ get_page(fault_page);
+ return page_to_pfn(fault_page);
}
pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
@@ -865,8 +981,27 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
return pfn;
}
+pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
+{
+ unsigned long addr;
+
+ addr = gfn_to_hva(kvm, gfn);
+ if (kvm_is_error_hva(addr)) {
+ get_page(bad_page);
+ return page_to_pfn(bad_page);
+ }
+
+ return hva_to_pfn(kvm, addr);
+}
EXPORT_SYMBOL_GPL(gfn_to_pfn);
+pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *slot, gfn_t gfn)
+{
+ unsigned long addr = gfn_to_hva_memslot(slot, gfn);
+ return hva_to_pfn(kvm, addr);
+}
+
struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
{
pfn_t pfn;
@@ -1069,14 +1204,11 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
{
struct kvm_memory_slot *memslot;
- gfn = unalias_gfn(kvm, gfn);
- memslot = gfn_to_memslot_unaliased(kvm, gfn);
+ memslot = gfn_to_memslot(kvm, gfn);
if (memslot && memslot->dirty_bitmap) {
unsigned long rel_gfn = gfn - memslot->base_gfn;
- /* avoid RMW */
- if (!generic_test_le_bit(rel_gfn, memslot->dirty_bitmap))
- generic___set_le_bit(rel_gfn, memslot->dirty_bitmap);
+ generic___set_le_bit(rel_gfn, memslot->dirty_bitmap);
}
}
@@ -1091,7 +1223,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
if (kvm_arch_vcpu_runnable(vcpu)) {
- set_bit(KVM_REQ_UNHALT, &vcpu->requests);
+ kvm_make_request(KVM_REQ_UNHALT, vcpu);
break;
}
if (kvm_cpu_has_pending_timer(vcpu))
@@ -1262,6 +1394,18 @@ static long kvm_vcpu_ioctl(struct file *filp,
if (vcpu->kvm->mm != current->mm)
return -EIO;
+
+#if defined(CONFIG_S390) || defined(CONFIG_PPC)
+ /*
+ * Special cases: vcpu ioctls that are asynchronous to vcpu execution,
+ * so vcpu_load() would break it.
+ */
+ if (ioctl == KVM_S390_INTERRUPT || ioctl == KVM_INTERRUPT)
+ return kvm_arch_vcpu_ioctl(filp, ioctl, arg);
+#endif
+
+
+ vcpu_load(vcpu);
switch (ioctl) {
case KVM_RUN:
r = -EINVAL;
@@ -1404,7 +1548,7 @@ out_free2:
goto out;
p = &sigset;
}
- r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset);
+ r = kvm_vcpu_ioctl_set_sigmask(vcpu, p);
break;
}
case KVM_GET_FPU: {
@@ -1439,6 +1583,7 @@ out_free2:
r = kvm_arch_vcpu_ioctl(filp, ioctl, arg);
}
out:
+ vcpu_put(vcpu);
kfree(fpu);
kfree(kvm_sregs);
return r;
@@ -1489,7 +1634,6 @@ static long kvm_vm_ioctl(struct file *filp,
r = -EFAULT;
if (copy_from_user(&zone, argp, sizeof zone))
goto out;
- r = -ENXIO;
r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone);
if (r)
goto out;
@@ -1501,7 +1645,6 @@ static long kvm_vm_ioctl(struct file *filp,
r = -EFAULT;
if (copy_from_user(&zone, argp, sizeof zone))
goto out;
- r = -ENXIO;
r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone);
if (r)
goto out;
@@ -1635,12 +1778,19 @@ static struct file_operations kvm_vm_fops = {
static int kvm_dev_ioctl_create_vm(void)
{
- int fd;
+ int fd, r;
struct kvm *kvm;
kvm = kvm_create_vm();
if (IS_ERR(kvm))
return PTR_ERR(kvm);
+#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
+ r = kvm_coalesced_mmio_init(kvm);
+ if (r < 0) {
+ kvm_put_kvm(kvm);
+ return r;
+ }
+#endif
fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR);
if (fd < 0)
kvm_put_kvm(kvm);
@@ -1808,15 +1958,10 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
cpu);
hardware_disable(NULL);
break;
- case CPU_UP_CANCELED:
- printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
- cpu);
- smp_call_function_single(cpu, hardware_disable, NULL, 1);
- break;
- case CPU_ONLINE:
+ case CPU_STARTING:
printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
cpu);
- smp_call_function_single(cpu, hardware_enable, NULL, 1);
+ hardware_enable(NULL);
break;
}
return NOTIFY_OK;
@@ -1825,10 +1970,12 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
asmlinkage void kvm_handle_fault_on_reboot(void)
{
- if (kvm_rebooting)
+ if (kvm_rebooting) {
/* spin while reset goes on */
+ local_irq_enable();
while (true)
;
+ }
/* Fault while not rebooting. We want the trace. */
BUG();
}
@@ -1854,12 +2001,7 @@ static struct notifier_block kvm_reboot_notifier = {
.priority = 0,
};
-void kvm_io_bus_init(struct kvm_io_bus *bus)
-{
- memset(bus, 0, sizeof(*bus));
-}
-
-void kvm_io_bus_destroy(struct kvm_io_bus *bus)
+static void kvm_io_bus_destroy(struct kvm_io_bus *bus)
{
int i;
@@ -1868,13 +2010,17 @@ void kvm_io_bus_destroy(struct kvm_io_bus *bus)
kvm_iodevice_destructor(pos);
}
+ kfree(bus);
}
/* kvm_io_bus_write - called under kvm->slots_lock */
-int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr,
+int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
int len, const void *val)
{
int i;
+ struct kvm_io_bus *bus;
+
+ bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
for (i = 0; i < bus->dev_count; i++)
if (!kvm_iodevice_write(bus->devs[i], addr, len, val))
return 0;
@@ -1882,64 +2028,76 @@ int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr,
}
/* kvm_io_bus_read - called under kvm->slots_lock */
-int kvm_io_bus_read(struct kvm_io_bus *bus, gpa_t addr, int len, void *val)
+int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
+ int len, void *val)
{
int i;
+ struct kvm_io_bus *bus;
+
+ bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
for (i = 0; i < bus->dev_count; i++)
if (!kvm_iodevice_read(bus->devs[i], addr, len, val))
return 0;
return -EOPNOTSUPP;
}
-int kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus,
- struct kvm_io_device *dev)
+/* Caller must hold slots_lock. */
+int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
+ struct kvm_io_device *dev)
{
- int ret;
-
- down_write(&kvm->slots_lock);
- ret = __kvm_io_bus_register_dev(bus, dev);
- up_write(&kvm->slots_lock);
-
- return ret;
-}
+ struct kvm_io_bus *new_bus, *bus;
-/* An unlocked version. Caller must have write lock on slots_lock. */
-int __kvm_io_bus_register_dev(struct kvm_io_bus *bus,
- struct kvm_io_device *dev)
-{
+ bus = kvm->buses[bus_idx];
if (bus->dev_count > NR_IOBUS_DEVS-1)
return -ENOSPC;
- bus->devs[bus->dev_count++] = dev;
+ new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL);
+ if (!new_bus)
+ return -ENOMEM;
+ memcpy(new_bus, bus, sizeof(struct kvm_io_bus));
+ new_bus->devs[new_bus->dev_count++] = dev;
+ rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
+ synchronize_srcu_expedited(&kvm->srcu);
+ kfree(bus);
return 0;
}
-void kvm_io_bus_unregister_dev(struct kvm *kvm,
- struct kvm_io_bus *bus,
- struct kvm_io_device *dev)
+/* Caller must hold slots_lock. */
+int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
+ struct kvm_io_device *dev)
{
- down_write(&kvm->slots_lock);
- __kvm_io_bus_unregister_dev(bus, dev);
- up_write(&kvm->slots_lock);
-}
+ int i, r;
+ struct kvm_io_bus *new_bus, *bus;
-/* An unlocked version. Caller must have write lock on slots_lock. */
-void __kvm_io_bus_unregister_dev(struct kvm_io_bus *bus,
- struct kvm_io_device *dev)
-{
- int i;
+ new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL);
+ if (!new_bus)
+ return -ENOMEM;
- for (i = 0; i < bus->dev_count; i++)
- if (bus->devs[i] == dev) {
- bus->devs[i] = bus->devs[--bus->dev_count];
+ bus = kvm->buses[bus_idx];
+ memcpy(new_bus, bus, sizeof(struct kvm_io_bus));
+
+ r = -ENOENT;
+ for (i = 0; i < new_bus->dev_count; i++)
+ if (new_bus->devs[i] == dev) {
+ r = 0;
+ new_bus->devs[i] = new_bus->devs[--new_bus->dev_count];
break;
}
+
+ if (r) {
+ kfree(new_bus);
+ return r;
+ }
+
+ rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
+ synchronize_srcu_expedited(&kvm->srcu);
+ kfree(bus);
+ return r;
}
static struct notifier_block kvm_cpu_notifier = {
.notifier_call = kvm_cpu_hotplug,
- .priority = 20, /* must be > scheduler priority */
};
static int vm_stat_get(void *_offset, u64 *val)
@@ -2050,7 +2208,7 @@ static void kvm_sched_out(struct preempt_notifier *pn,
kvm_arch_vcpu_put(vcpu);
}
-int kvm_init(void *opaque, unsigned int vcpu_size,
+int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
struct module *module)
{
int r;
@@ -2069,6 +2227,24 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
bad_pfn = page_to_pfn(bad_page);
+ hwpoison_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+
+ if (hwpoison_page == NULL) {
+ r = -ENOMEM;
+ goto out_free_0;
+ }
+
+ hwpoison_pfn = page_to_pfn(hwpoison_page);
+
+ fault_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+
+ if (fault_page == NULL) {
+ r = -ENOMEM;
+ goto out_free_0;
+ }
+
+ fault_pfn = page_to_pfn(fault_page);
+
if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
r = -ENOMEM;
goto out_free_0;
@@ -2100,8 +2276,9 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
goto out_free_4;
/* A kmem cache lets us meet the alignment requirements of fx_save. */
- kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size,
- __alignof__(struct kvm_vcpu),
+ if (!vcpu_align)
+ vcpu_align = __alignof__(struct kvm_vcpu);
+ kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, vcpu_align,
0, NULL);
if (!kvm_vcpu_cache) {
r = -ENOMEM;
@@ -2140,6 +2317,10 @@ out_free_1:
out_free_0a:
free_cpumask_var(cpus_hardware_enabled);
out_free_0:
+ if (fault_page)
+ __free_page(fault_page);
+ if (hwpoison_page)
+ __free_page(hwpoison_page);
__free_page(bad_page);
out:
kvm_arch_exit();
@@ -2150,7 +2331,6 @@ EXPORT_SYMBOL_GPL(kvm_init);
void kvm_exit(void)
{
- tracepoint_synchronize_unregister();
kvm_exit_debug();
misc_deregister(&kvm_dev);
kmem_cache_destroy(kvm_vcpu_cache);
@@ -2162,6 +2342,7 @@ void kvm_exit(void)
kvm_arch_hardware_unsetup();
kvm_arch_exit();
free_cpumask_var(cpus_hardware_enabled);
+ __free_page(hwpoison_page);
__free_page(bad_page);
}
EXPORT_SYMBOL_GPL(kvm_exit);