diff options
Diffstat (limited to 'virt/kvm')
-rw-r--r-- | virt/kvm/assigned-dev.c | 8 | ||||
-rw-r--r-- | virt/kvm/coalesced_mmio.c | 6 | ||||
-rw-r--r-- | virt/kvm/ioapic.c | 30 | ||||
-rw-r--r-- | virt/kvm/ioapic.h | 2 | ||||
-rw-r--r-- | virt/kvm/iommu.c | 117 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 63 |
6 files changed, 150 insertions, 76 deletions
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index 02ff2b19dbe2..4d10b1e047f4 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c @@ -316,12 +316,16 @@ static int assigned_device_enable_host_msix(struct kvm *kvm, kvm_assigned_dev_intr, 0, "kvm_assigned_msix_device", (void *)dev); - /* FIXME: free requested_irq's on failure */ if (r) - return r; + goto err; } return 0; +err: + for (i -= 1; i >= 0; i--) + free_irq(dev->host_msix_entries[i].vector, (void *)dev); + pci_disable_msix(dev->dev); + return r; } #endif diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c index 36e258029649..53850177163f 100644 --- a/virt/kvm/coalesced_mmio.c +++ b/virt/kvm/coalesced_mmio.c @@ -120,8 +120,10 @@ int kvm_coalesced_mmio_init(struct kvm *kvm) return ret; out_free_dev: + kvm->coalesced_mmio_dev = NULL; kfree(dev); out_free_page: + kvm->coalesced_mmio_ring = NULL; __free_page(page); out_err: return ret; @@ -139,7 +141,7 @@ int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev; if (dev == NULL) - return -EINVAL; + return -ENXIO; mutex_lock(&kvm->slots_lock); if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) { @@ -162,7 +164,7 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, struct kvm_coalesced_mmio_zone *z; if (dev == NULL) - return -EINVAL; + return -ENXIO; mutex_lock(&kvm->slots_lock); diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 03a5eb22da2b..7c79c1d76d0c 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -197,7 +197,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) union kvm_ioapic_redirect_entry entry; int ret = 1; - mutex_lock(&ioapic->lock); + spin_lock(&ioapic->lock); if (irq >= 0 && irq < IOAPIC_NUM_PINS) { entry = ioapic->redirtbl[irq]; level ^= entry.fields.polarity; @@ -214,7 +214,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) } trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0); } - mutex_unlock(&ioapic->lock); + spin_unlock(&ioapic->lock); return ret; } @@ -238,9 +238,9 @@ static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector, * is dropped it will be put into irr and will be delivered * after ack notifier returns. */ - mutex_unlock(&ioapic->lock); + spin_unlock(&ioapic->lock); kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i); - mutex_lock(&ioapic->lock); + spin_lock(&ioapic->lock); if (trigger_mode != IOAPIC_LEVEL_TRIG) continue; @@ -259,9 +259,9 @@ void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode) smp_rmb(); if (!test_bit(vector, ioapic->handled_vectors)) return; - mutex_lock(&ioapic->lock); + spin_lock(&ioapic->lock); __kvm_ioapic_update_eoi(ioapic, vector, trigger_mode); - mutex_unlock(&ioapic->lock); + spin_unlock(&ioapic->lock); } static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev) @@ -287,7 +287,7 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, ASSERT(!(addr & 0xf)); /* check alignment */ addr &= 0xff; - mutex_lock(&ioapic->lock); + spin_lock(&ioapic->lock); switch (addr) { case IOAPIC_REG_SELECT: result = ioapic->ioregsel; @@ -301,7 +301,7 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, result = 0; break; } - mutex_unlock(&ioapic->lock); + spin_unlock(&ioapic->lock); switch (len) { case 8: @@ -338,7 +338,7 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, } addr &= 0xff; - mutex_lock(&ioapic->lock); + spin_lock(&ioapic->lock); switch (addr) { case IOAPIC_REG_SELECT: ioapic->ioregsel = data; @@ -356,7 +356,7 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, default: break; } - mutex_unlock(&ioapic->lock); + spin_unlock(&ioapic->lock); return 0; } @@ -386,7 +386,7 @@ int kvm_ioapic_init(struct kvm *kvm) ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL); if (!ioapic) return -ENOMEM; - mutex_init(&ioapic->lock); + spin_lock_init(&ioapic->lock); kvm->arch.vioapic = ioapic; kvm_ioapic_reset(ioapic); kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops); @@ -419,9 +419,9 @@ int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) if (!ioapic) return -EINVAL; - mutex_lock(&ioapic->lock); + spin_lock(&ioapic->lock); memcpy(state, ioapic, sizeof(struct kvm_ioapic_state)); - mutex_unlock(&ioapic->lock); + spin_unlock(&ioapic->lock); return 0; } @@ -431,9 +431,9 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) if (!ioapic) return -EINVAL; - mutex_lock(&ioapic->lock); + spin_lock(&ioapic->lock); memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); update_handled_vectors(ioapic); - mutex_unlock(&ioapic->lock); + spin_unlock(&ioapic->lock); return 0; } diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index 8a751b78a430..0b190c34ccc3 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h @@ -45,7 +45,7 @@ struct kvm_ioapic { struct kvm_io_device dev; struct kvm *kvm; void (*ack_notifier)(void *opaque, int irq); - struct mutex lock; + spinlock_t lock; DECLARE_BITMAP(handled_vectors, 256); }; diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index 80fd3ad3b2de..d2f06be63354 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c @@ -32,12 +32,30 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm); static void kvm_iommu_put_pages(struct kvm *kvm, gfn_t base_gfn, unsigned long npages); +static pfn_t kvm_pin_pages(struct kvm *kvm, struct kvm_memory_slot *slot, + gfn_t gfn, unsigned long size) +{ + gfn_t end_gfn; + pfn_t pfn; + + pfn = gfn_to_pfn_memslot(kvm, slot, gfn); + end_gfn = gfn + (size >> PAGE_SHIFT); + gfn += 1; + + if (is_error_pfn(pfn)) + return pfn; + + while (gfn < end_gfn) + gfn_to_pfn_memslot(kvm, slot, gfn++); + + return pfn; +} + int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) { - gfn_t gfn = slot->base_gfn; - unsigned long npages = slot->npages; + gfn_t gfn, end_gfn; pfn_t pfn; - int i, r = 0; + int r = 0; struct iommu_domain *domain = kvm->arch.iommu_domain; int flags; @@ -45,31 +63,62 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) if (!domain) return 0; + gfn = slot->base_gfn; + end_gfn = gfn + slot->npages; + flags = IOMMU_READ | IOMMU_WRITE; if (kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY) flags |= IOMMU_CACHE; - for (i = 0; i < npages; i++) { - /* check if already mapped */ - if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) + + while (gfn < end_gfn) { + unsigned long page_size; + + /* Check if already mapped */ + if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) { + gfn += 1; + continue; + } + + /* Get the page size we could use to map */ + page_size = kvm_host_page_size(kvm, gfn); + + /* Make sure the page_size does not exceed the memslot */ + while ((gfn + (page_size >> PAGE_SHIFT)) > end_gfn) + page_size >>= 1; + + /* Make sure gfn is aligned to the page size we want to map */ + while ((gfn << PAGE_SHIFT) & (page_size - 1)) + page_size >>= 1; + + /* + * Pin all pages we are about to map in memory. This is + * important because we unmap and unpin in 4kb steps later. + */ + pfn = kvm_pin_pages(kvm, slot, gfn, page_size); + if (is_error_pfn(pfn)) { + gfn += 1; continue; + } - pfn = gfn_to_pfn_memslot(kvm, slot, gfn); - r = iommu_map_range(domain, - gfn_to_gpa(gfn), - pfn_to_hpa(pfn), - PAGE_SIZE, flags); + /* Map into IO address space */ + r = iommu_map(domain, gfn_to_gpa(gfn), pfn_to_hpa(pfn), + get_order(page_size), flags); if (r) { printk(KERN_ERR "kvm_iommu_map_address:" "iommu failed to map pfn=%lx\n", pfn); goto unmap_pages; } - gfn++; + + gfn += page_size >> PAGE_SHIFT; + + } + return 0; unmap_pages: - kvm_iommu_put_pages(kvm, slot->base_gfn, i); + kvm_iommu_put_pages(kvm, slot->base_gfn, gfn); return r; } @@ -78,7 +127,7 @@ static int kvm_iommu_map_memslots(struct kvm *kvm) int i, r = 0; struct kvm_memslots *slots; - slots = rcu_dereference(kvm->memslots); + slots = kvm_memslots(kvm); for (i = 0; i < slots->nmemslots; i++) { r = kvm_iommu_map_pages(kvm, &slots->memslots[i]); @@ -189,27 +238,47 @@ out_unmap: return r; } +static void kvm_unpin_pages(struct kvm *kvm, pfn_t pfn, unsigned long npages) +{ + unsigned long i; + + for (i = 0; i < npages; ++i) + kvm_release_pfn_clean(pfn + i); +} + static void kvm_iommu_put_pages(struct kvm *kvm, gfn_t base_gfn, unsigned long npages) { - gfn_t gfn = base_gfn; + struct iommu_domain *domain; + gfn_t end_gfn, gfn; pfn_t pfn; - struct iommu_domain *domain = kvm->arch.iommu_domain; - unsigned long i; u64 phys; + domain = kvm->arch.iommu_domain; + end_gfn = base_gfn + npages; + gfn = base_gfn; + /* check if iommu exists and in use */ if (!domain) return; - for (i = 0; i < npages; i++) { + while (gfn < end_gfn) { + unsigned long unmap_pages; + int order; + + /* Get physical address */ phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn)); - pfn = phys >> PAGE_SHIFT; - kvm_release_pfn_clean(pfn); - gfn++; - } + pfn = phys >> PAGE_SHIFT; + + /* Unmap address from IO address space */ + order = iommu_unmap(domain, gfn_to_gpa(gfn), PAGE_SIZE); + unmap_pages = 1ULL << order; - iommu_unmap_range(domain, gfn_to_gpa(base_gfn), PAGE_SIZE * npages); + /* Unpin all pages we just unmapped to not leak any memory */ + kvm_unpin_pages(kvm, pfn, unmap_pages); + + gfn += unmap_pages; + } } static int kvm_iommu_unmap_memslots(struct kvm *kvm) @@ -217,7 +286,7 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm) int i; struct kvm_memslots *slots; - slots = rcu_dereference(kvm->memslots); + slots = kvm_memslots(kvm); for (i = 0; i < slots->nmemslots; i++) { kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn, diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index c82ae2492634..f032806a212f 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -422,9 +422,6 @@ static struct kvm *kvm_create_vm(void) spin_lock(&kvm_lock); list_add(&kvm->vm_list, &vm_list); spin_unlock(&kvm_lock); -#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET - kvm_coalesced_mmio_init(kvm); -#endif out: return kvm; @@ -560,6 +557,10 @@ int __kvm_set_memory_region(struct kvm *kvm, base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; npages = mem->memory_size >> PAGE_SHIFT; + r = -EINVAL; + if (npages > KVM_MEM_MAX_NR_PAGES) + goto out; + if (!npages) mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES; @@ -833,7 +834,7 @@ EXPORT_SYMBOL_GPL(kvm_is_error_hva); struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn) { int i; - struct kvm_memslots *slots = rcu_dereference(kvm->memslots); + struct kvm_memslots *slots = kvm_memslots(kvm); for (i = 0; i < slots->nmemslots; ++i) { struct kvm_memory_slot *memslot = &slots->memslots[i]; @@ -855,7 +856,7 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) { int i; - struct kvm_memslots *slots = rcu_dereference(kvm->memslots); + struct kvm_memslots *slots = kvm_memslots(kvm); gfn = unalias_gfn_instantiation(kvm, gfn); for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { @@ -899,7 +900,7 @@ out: int memslot_id(struct kvm *kvm, gfn_t gfn) { int i; - struct kvm_memslots *slots = rcu_dereference(kvm->memslots); + struct kvm_memslots *slots = kvm_memslots(kvm); struct kvm_memory_slot *memslot = NULL; gfn = unalias_gfn(kvm, gfn); @@ -914,6 +915,11 @@ int memslot_id(struct kvm *kvm, gfn_t gfn) return memslot - slots->memslots; } +static unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn) +{ + return slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE; +} + unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) { struct kvm_memory_slot *slot; @@ -922,7 +928,7 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) slot = gfn_to_memslot_unaliased(kvm, gfn); if (!slot || slot->flags & KVM_MEMSLOT_INVALID) return bad_hva(); - return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); + return gfn_to_hva_memslot(slot, gfn); } EXPORT_SYMBOL_GPL(gfn_to_hva); @@ -972,11 +978,6 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) } EXPORT_SYMBOL_GPL(gfn_to_pfn); -static unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn) -{ - return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); -} - pfn_t gfn_to_pfn_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn) { @@ -1190,13 +1191,8 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn) memslot = gfn_to_memslot_unaliased(kvm, gfn); if (memslot && memslot->dirty_bitmap) { unsigned long rel_gfn = gfn - memslot->base_gfn; - unsigned long *p = memslot->dirty_bitmap + - rel_gfn / BITS_PER_LONG; - int offset = rel_gfn % BITS_PER_LONG; - /* avoid RMW */ - if (!generic_test_le_bit(offset, p)) - generic___set_le_bit(offset, p); + generic___set_le_bit(rel_gfn, memslot->dirty_bitmap); } } @@ -1609,7 +1605,6 @@ static long kvm_vm_ioctl(struct file *filp, r = -EFAULT; if (copy_from_user(&zone, argp, sizeof zone)) goto out; - r = -ENXIO; r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone); if (r) goto out; @@ -1621,7 +1616,6 @@ static long kvm_vm_ioctl(struct file *filp, r = -EFAULT; if (copy_from_user(&zone, argp, sizeof zone)) goto out; - r = -ENXIO; r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone); if (r) goto out; @@ -1755,12 +1749,19 @@ static struct file_operations kvm_vm_fops = { static int kvm_dev_ioctl_create_vm(void) { - int fd; + int fd, r; struct kvm *kvm; kvm = kvm_create_vm(); if (IS_ERR(kvm)) return PTR_ERR(kvm); +#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET + r = kvm_coalesced_mmio_init(kvm); + if (r < 0) { + kvm_put_kvm(kvm); + return r; + } +#endif fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); if (fd < 0) kvm_put_kvm(kvm); @@ -1928,11 +1929,6 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, cpu); hardware_disable(NULL); break; - case CPU_UP_CANCELED: - printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", - cpu); - smp_call_function_single(cpu, hardware_disable, NULL, 1); - break; case CPU_ONLINE: printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n", cpu); @@ -1991,7 +1987,9 @@ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, const void *val) { int i; - struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]); + struct kvm_io_bus *bus; + + bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); for (i = 0; i < bus->dev_count; i++) if (!kvm_iodevice_write(bus->devs[i], addr, len, val)) return 0; @@ -2003,8 +2001,9 @@ int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, void *val) { int i; - struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]); + struct kvm_io_bus *bus; + bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); for (i = 0; i < bus->dev_count; i++) if (!kvm_iodevice_read(bus->devs[i], addr, len, val)) return 0; @@ -2179,7 +2178,7 @@ static void kvm_sched_out(struct preempt_notifier *pn, kvm_arch_vcpu_put(vcpu); } -int kvm_init(void *opaque, unsigned int vcpu_size, +int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, struct module *module) { int r; @@ -2229,8 +2228,9 @@ int kvm_init(void *opaque, unsigned int vcpu_size, goto out_free_4; /* A kmem cache lets us meet the alignment requirements of fx_save. */ - kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, - __alignof__(struct kvm_vcpu), + if (!vcpu_align) + vcpu_align = __alignof__(struct kvm_vcpu); + kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, vcpu_align, 0, NULL); if (!kvm_vcpu_cache) { r = -ENOMEM; @@ -2279,7 +2279,6 @@ EXPORT_SYMBOL_GPL(kvm_init); void kvm_exit(void) { - tracepoint_synchronize_unregister(); kvm_exit_debug(); misc_deregister(&kvm_dev); kmem_cache_destroy(kvm_vcpu_cache); |