diff options
Diffstat (limited to 'virt')
-rw-r--r-- | virt/kvm/arm/arm.c | 48 | ||||
-rw-r--r-- | virt/kvm/arm/mmu.c | 20 | ||||
-rw-r--r-- | virt/kvm/arm/psci.c | 1 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 645 |
4 files changed, 442 insertions, 272 deletions
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index d65a0faa46d8..bfdba1caf59d 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -1183,55 +1183,15 @@ long kvm_arch_vcpu_ioctl(struct file *filp, return r; } -/** - * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot - * @kvm: kvm instance - * @log: slot id and address to which we copy the log - * - * Steps 1-4 below provide general overview of dirty page logging. See - * kvm_get_dirty_log_protect() function description for additional details. - * - * We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we - * always flush the TLB (step 4) even if previous step failed and the dirty - * bitmap may be corrupt. Regardless of previous outcome the KVM logging API - * does not preclude user space subsequent dirty log read. Flushing TLB ensures - * writes will be marked dirty for next log read. - * - * 1. Take a snapshot of the bit and clear it if needed. - * 2. Write protect the corresponding page. - * 3. Copy the snapshot to the userspace. - * 4. Flush TLB's if needed. - */ -int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) +void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) { - bool flush = false; - int r; - - mutex_lock(&kvm->slots_lock); - - r = kvm_get_dirty_log_protect(kvm, log, &flush); - if (flush) - kvm_flush_remote_tlbs(kvm); - - mutex_unlock(&kvm->slots_lock); - return r; } -int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, struct kvm_clear_dirty_log *log) +void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm, + struct kvm_memory_slot *memslot) { - bool flush = false; - int r; - - mutex_lock(&kvm->slots_lock); - - r = kvm_clear_dirty_log_protect(kvm, log, &flush); - - if (flush) - kvm_flush_remote_tlbs(kvm); - - mutex_unlock(&kvm->slots_lock); - return r; + kvm_flush_remote_tlbs(kvm); } static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 19c961ac4e3c..e3b9ee268823 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -1534,8 +1534,13 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot) { struct kvm_memslots *slots = kvm_memslots(kvm); struct kvm_memory_slot *memslot = id_to_memslot(slots, slot); - phys_addr_t start = memslot->base_gfn << PAGE_SHIFT; - phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT; + phys_addr_t start, end; + + if (WARN_ON_ONCE(!memslot)) + return; + + start = memslot->base_gfn << PAGE_SHIFT; + end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT; spin_lock(&kvm->mmu_lock); stage2_wp_range(kvm, start, end); @@ -2251,7 +2256,7 @@ out: void kvm_arch_commit_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, - const struct kvm_memory_slot *old, + struct kvm_memory_slot *old, const struct kvm_memory_slot *new, enum kvm_mr_change change) { @@ -2349,17 +2354,10 @@ out: return ret; } -void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, - struct kvm_memory_slot *dont) +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) { } -int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, - unsigned long npages) -{ - return 0; -} - void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) { } diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c index 17e2bdd4b76f..14a162e295a9 100644 --- a/virt/kvm/arm/psci.c +++ b/virt/kvm/arm/psci.c @@ -12,7 +12,6 @@ #include <asm/cputype.h> #include <asm/kvm_emulate.h> -#include <asm/kvm_host.h> #include <kvm/arm_psci.h> #include <kvm/arm_hypercalls.h> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 70f03ce0e5c1..f744bc603c53 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -149,8 +149,6 @@ static void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot, gfn_t gfn); __visible bool kvm_rebooting; EXPORT_SYMBOL_GPL(kvm_rebooting); -static bool largepages_enabled = true; - #define KVM_EVENT_CREATE_VM 0 #define KVM_EVENT_DESTROY_VM 1 static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm); @@ -566,7 +564,7 @@ static struct kvm_memslots *kvm_alloc_memslots(void) return NULL; for (i = 0; i < KVM_MEM_SLOTS_NUM; i++) - slots->id_to_index[i] = slots->memslots[i].id = i; + slots->id_to_index[i] = -1; return slots; } @@ -580,18 +578,14 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot) memslot->dirty_bitmap = NULL; } -/* - * Free any memory in @free but not in @dont. - */ -static void kvm_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, - struct kvm_memory_slot *dont) +static void kvm_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) { - if (!dont || free->dirty_bitmap != dont->dirty_bitmap) - kvm_destroy_dirty_bitmap(free); + kvm_destroy_dirty_bitmap(slot); - kvm_arch_free_memslot(kvm, free, dont); + kvm_arch_free_memslot(kvm, slot); - free->npages = 0; + slot->flags = 0; + slot->npages = 0; } static void kvm_free_memslots(struct kvm *kvm, struct kvm_memslots *slots) @@ -602,7 +596,7 @@ static void kvm_free_memslots(struct kvm *kvm, struct kvm_memslots *slots) return; kvm_for_each_memslot(memslot, slots) - kvm_free_memslot(kvm, memslot, NULL); + kvm_free_memslot(kvm, memslot); kvfree(slots); } @@ -860,9 +854,9 @@ static int kvm_vm_release(struct inode *inode, struct file *filp) /* * Allocation size is twice as large as the actual dirty bitmap size. - * See x86's kvm_vm_ioctl_get_dirty_log() why this is needed. + * See kvm_vm_ioctl_get_dirty_log() why this is needed. */ -static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) +static int kvm_alloc_dirty_bitmap(struct kvm_memory_slot *memslot) { unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot); @@ -874,63 +868,165 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) } /* - * Insert memslot and re-sort memslots based on their GFN, - * so binary search could be used to lookup GFN. - * Sorting algorithm takes advantage of having initially - * sorted array and known changed memslot position. + * Delete a memslot by decrementing the number of used slots and shifting all + * other entries in the array forward one spot. */ -static void update_memslots(struct kvm_memslots *slots, - struct kvm_memory_slot *new, - enum kvm_mr_change change) +static inline void kvm_memslot_delete(struct kvm_memslots *slots, + struct kvm_memory_slot *memslot) { - int id = new->id; - int i = slots->id_to_index[id]; struct kvm_memory_slot *mslots = slots->memslots; + int i; - WARN_ON(mslots[i].id != id); - switch (change) { - case KVM_MR_CREATE: - slots->used_slots++; - WARN_ON(mslots[i].npages || !new->npages); - break; - case KVM_MR_DELETE: - slots->used_slots--; - WARN_ON(new->npages || !mslots[i].npages); - break; - default: - break; - } + if (WARN_ON(slots->id_to_index[memslot->id] == -1)) + return; - while (i < KVM_MEM_SLOTS_NUM - 1 && - new->base_gfn <= mslots[i + 1].base_gfn) { - if (!mslots[i + 1].npages) - break; + slots->used_slots--; + + if (atomic_read(&slots->lru_slot) >= slots->used_slots) + atomic_set(&slots->lru_slot, 0); + + for (i = slots->id_to_index[memslot->id]; i < slots->used_slots; i++) { mslots[i] = mslots[i + 1]; slots->id_to_index[mslots[i].id] = i; - i++; } + mslots[i] = *memslot; + slots->id_to_index[memslot->id] = -1; +} + +/* + * "Insert" a new memslot by incrementing the number of used slots. Returns + * the new slot's initial index into the memslots array. + */ +static inline int kvm_memslot_insert_back(struct kvm_memslots *slots) +{ + return slots->used_slots++; +} + +/* + * Move a changed memslot backwards in the array by shifting existing slots + * with a higher GFN toward the front of the array. Note, the changed memslot + * itself is not preserved in the array, i.e. not swapped at this time, only + * its new index into the array is tracked. Returns the changed memslot's + * current index into the memslots array. + */ +static inline int kvm_memslot_move_backward(struct kvm_memslots *slots, + struct kvm_memory_slot *memslot) +{ + struct kvm_memory_slot *mslots = slots->memslots; + int i; + + if (WARN_ON_ONCE(slots->id_to_index[memslot->id] == -1) || + WARN_ON_ONCE(!slots->used_slots)) + return -1; /* - * The ">=" is needed when creating a slot with base_gfn == 0, - * so that it moves before all those with base_gfn == npages == 0. - * - * On the other hand, if new->npages is zero, the above loop has - * already left i pointing to the beginning of the empty part of - * mslots, and the ">=" would move the hole backwards in this - * case---which is wrong. So skip the loop when deleting a slot. + * Move the target memslot backward in the array by shifting existing + * memslots with a higher GFN (than the target memslot) towards the + * front of the array. */ - if (new->npages) { - while (i > 0 && - new->base_gfn >= mslots[i - 1].base_gfn) { - mslots[i] = mslots[i - 1]; - slots->id_to_index[mslots[i].id] = i; - i--; - } - } else - WARN_ON_ONCE(i != slots->used_slots); + for (i = slots->id_to_index[memslot->id]; i < slots->used_slots - 1; i++) { + if (memslot->base_gfn > mslots[i + 1].base_gfn) + break; + + WARN_ON_ONCE(memslot->base_gfn == mslots[i + 1].base_gfn); - mslots[i] = *new; - slots->id_to_index[mslots[i].id] = i; + /* Shift the next memslot forward one and update its index. */ + mslots[i] = mslots[i + 1]; + slots->id_to_index[mslots[i].id] = i; + } + return i; +} + +/* + * Move a changed memslot forwards in the array by shifting existing slots with + * a lower GFN toward the back of the array. Note, the changed memslot itself + * is not preserved in the array, i.e. not swapped at this time, only its new + * index into the array is tracked. Returns the changed memslot's final index + * into the memslots array. + */ +static inline int kvm_memslot_move_forward(struct kvm_memslots *slots, + struct kvm_memory_slot *memslot, + int start) +{ + struct kvm_memory_slot *mslots = slots->memslots; + int i; + + for (i = start; i > 0; i--) { + if (memslot->base_gfn < mslots[i - 1].base_gfn) + break; + + WARN_ON_ONCE(memslot->base_gfn == mslots[i - 1].base_gfn); + + /* Shift the next memslot back one and update its index. */ + mslots[i] = mslots[i - 1]; + slots->id_to_index[mslots[i].id] = i; + } + return i; +} + +/* + * Re-sort memslots based on their GFN to account for an added, deleted, or + * moved memslot. Sorting memslots by GFN allows using a binary search during + * memslot lookup. + * + * IMPORTANT: Slots are sorted from highest GFN to lowest GFN! I.e. the entry + * at memslots[0] has the highest GFN. + * + * The sorting algorithm takes advantage of having initially sorted memslots + * and knowing the position of the changed memslot. Sorting is also optimized + * by not swapping the updated memslot and instead only shifting other memslots + * and tracking the new index for the update memslot. Only once its final + * index is known is the updated memslot copied into its position in the array. + * + * - When deleting a memslot, the deleted memslot simply needs to be moved to + * the end of the array. + * + * - When creating a memslot, the algorithm "inserts" the new memslot at the + * end of the array and then it forward to its correct location. + * + * - When moving a memslot, the algorithm first moves the updated memslot + * backward to handle the scenario where the memslot's GFN was changed to a + * lower value. update_memslots() then falls through and runs the same flow + * as creating a memslot to move the memslot forward to handle the scenario + * where its GFN was changed to a higher value. + * + * Note, slots are sorted from highest->lowest instead of lowest->highest for + * historical reasons. Originally, invalid memslots where denoted by having + * GFN=0, thus sorting from highest->lowest naturally sorted invalid memslots + * to the end of the array. The current algorithm uses dedicated logic to + * delete a memslot and thus does not rely on invalid memslots having GFN=0. + * + * The other historical motiviation for highest->lowest was to improve the + * performance of memslot lookup. KVM originally used a linear search starting + * at memslots[0]. On x86, the largest memslot usually has one of the highest, + * if not *the* highest, GFN, as the bulk of the guest's RAM is located in a + * single memslot above the 4gb boundary. As the largest memslot is also the + * most likely to be referenced, sorting it to the front of the array was + * advantageous. The current binary search starts from the middle of the array + * and uses an LRU pointer to improve performance for all memslots and GFNs. + */ +static void update_memslots(struct kvm_memslots *slots, + struct kvm_memory_slot *memslot, + enum kvm_mr_change change) +{ + int i; + + if (change == KVM_MR_DELETE) { + kvm_memslot_delete(slots, memslot); + } else { + if (change == KVM_MR_CREATE) + i = kvm_memslot_insert_back(slots); + else + i = kvm_memslot_move_backward(slots, memslot); + i = kvm_memslot_move_forward(slots, memslot, i); + + /* + * Copy the memslot to its new position in memslots and update + * its index accordingly. + */ + slots->memslots[i] = *memslot; + slots->id_to_index[memslot->id] = i; + } } static int check_memory_region_flags(const struct kvm_userspace_memory_region *mem) @@ -984,6 +1080,112 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, } /* + * Note, at a minimum, the current number of used slots must be allocated, even + * when deleting a memslot, as we need a complete duplicate of the memslots for + * use when invalidating a memslot prior to deleting/moving the memslot. + */ +static struct kvm_memslots *kvm_dup_memslots(struct kvm_memslots *old, + enum kvm_mr_change change) +{ + struct kvm_memslots *slots; + size_t old_size, new_size; + + old_size = sizeof(struct kvm_memslots) + + (sizeof(struct kvm_memory_slot) * old->used_slots); + + if (change == KVM_MR_CREATE) + new_size = old_size + sizeof(struct kvm_memory_slot); + else + new_size = old_size; + + slots = kvzalloc(new_size, GFP_KERNEL_ACCOUNT); + if (likely(slots)) + memcpy(slots, old, old_size); + + return slots; +} + +static int kvm_set_memslot(struct kvm *kvm, + const struct kvm_userspace_memory_region *mem, + struct kvm_memory_slot *old, + struct kvm_memory_slot *new, int as_id, + enum kvm_mr_change change) +{ + struct kvm_memory_slot *slot; + struct kvm_memslots *slots; + int r; + + slots = kvm_dup_memslots(__kvm_memslots(kvm, as_id), change); + if (!slots) + return -ENOMEM; + + if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) { + /* + * Note, the INVALID flag needs to be in the appropriate entry + * in the freshly allocated memslots, not in @old or @new. + */ + slot = id_to_memslot(slots, old->id); + slot->flags |= KVM_MEMSLOT_INVALID; + + /* + * We can re-use the old memslots, the only difference from the + * newly installed memslots is the invalid flag, which will get + * dropped by update_memslots anyway. We'll also revert to the + * old memslots if preparing the new memory region fails. + */ + slots = install_new_memslots(kvm, as_id, slots); + + /* From this point no new shadow pages pointing to a deleted, + * or moved, memslot will be created. + * + * validation of sp->gfn happens in: + * - gfn_to_hva (kvm_read_guest, gfn_to_pfn) + * - kvm_is_visible_gfn (mmu_check_root) + */ + kvm_arch_flush_shadow_memslot(kvm, slot); + } + + r = kvm_arch_prepare_memory_region(kvm, new, mem, change); + if (r) + goto out_slots; + + update_memslots(slots, new, change); + slots = install_new_memslots(kvm, as_id, slots); + + kvm_arch_commit_memory_region(kvm, mem, old, new, change); + + kvfree(slots); + return 0; + +out_slots: + if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) + slots = install_new_memslots(kvm, as_id, slots); + kvfree(slots); + return r; +} + +static int kvm_delete_memslot(struct kvm *kvm, + const struct kvm_userspace_memory_region *mem, + struct kvm_memory_slot *old, int as_id) +{ + struct kvm_memory_slot new; + int r; + + if (!old->npages) + return -EINVAL; + + memset(&new, 0, sizeof(new)); + new.id = old->id; + + r = kvm_set_memslot(kvm, mem, old, &new, as_id, KVM_MR_DELETE); + if (r) + return r; + + kvm_free_memslot(kvm, old); + return 0; +} + +/* * Allocate some memory and give it an address in the guest physical address * space. * @@ -994,162 +1196,118 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, int __kvm_set_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem) { - int r; - gfn_t base_gfn; - unsigned long npages; - struct kvm_memory_slot *slot; struct kvm_memory_slot old, new; - struct kvm_memslots *slots = NULL, *old_memslots; - int as_id, id; + struct kvm_memory_slot *tmp; enum kvm_mr_change change; + int as_id, id; + int r; r = check_memory_region_flags(mem); if (r) - goto out; + return r; - r = -EINVAL; as_id = mem->slot >> 16; id = (u16)mem->slot; /* General sanity checks */ if (mem->memory_size & (PAGE_SIZE - 1)) - goto out; + return -EINVAL; if (mem->guest_phys_addr & (PAGE_SIZE - 1)) - goto out; + return -EINVAL; /* We can read the guest memory with __xxx_user() later on. */ if ((id < KVM_USER_MEM_SLOTS) && ((mem->userspace_addr & (PAGE_SIZE - 1)) || !access_ok((void __user *)(unsigned long)mem->userspace_addr, mem->memory_size))) - goto out; + return -EINVAL; if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_MEM_SLOTS_NUM) - goto out; + return -EINVAL; if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) - goto out; - - slot = id_to_memslot(__kvm_memslots(kvm, as_id), id); - base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; - npages = mem->memory_size >> PAGE_SHIFT; + return -EINVAL; - if (npages > KVM_MEM_MAX_NR_PAGES) - goto out; + /* + * Make a full copy of the old memslot, the pointer will become stale + * when the memslots are re-sorted by update_memslots(), and the old + * memslot needs to be referenced after calling update_memslots(), e.g. + * to free its resources and for arch specific behavior. + */ + tmp = id_to_memslot(__kvm_memslots(kvm, as_id), id); + if (tmp) { + old = *tmp; + tmp = NULL; + } else { + memset(&old, 0, sizeof(old)); + old.id = id; + } - new = old = *slot; + if (!mem->memory_size) + return kvm_delete_memslot(kvm, mem, &old, as_id); new.id = id; - new.base_gfn = base_gfn; - new.npages = npages; + new.base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; + new.npages = mem->memory_size >> PAGE_SHIFT; new.flags = mem->flags; + new.userspace_addr = mem->userspace_addr; - if (npages) { - if (!old.npages) - change = KVM_MR_CREATE; - else { /* Modify an existing slot. */ - if ((mem->userspace_addr != old.userspace_addr) || - (npages != old.npages) || - ((new.flags ^ old.flags) & KVM_MEM_READONLY)) - goto out; + if (new.npages > KVM_MEM_MAX_NR_PAGES) + return -EINVAL; - if (base_gfn != old.base_gfn) - change = KVM_MR_MOVE; - else if (new.flags != old.flags) - change = KVM_MR_FLAGS_ONLY; - else { /* Nothing to change. */ - r = 0; - goto out; - } - } - } else { - if (!old.npages) - goto out; + if (!old.npages) { + change = KVM_MR_CREATE; + new.dirty_bitmap = NULL; + memset(&new.arch, 0, sizeof(new.arch)); + } else { /* Modify an existing slot. */ + if ((new.userspace_addr != old.userspace_addr) || + (new.npages != old.npages) || + ((new.flags ^ old.flags) & KVM_MEM_READONLY)) + return -EINVAL; + + if (new.base_gfn != old.base_gfn) + change = KVM_MR_MOVE; + else if (new.flags != old.flags) + change = KVM_MR_FLAGS_ONLY; + else /* Nothing to change. */ + return 0; - change = KVM_MR_DELETE; - new.base_gfn = 0; - new.flags = 0; + /* Copy dirty_bitmap and arch from the current memslot. */ + new.dirty_bitmap = old.dirty_bitmap; + memcpy(&new.arch, &old.arch, sizeof(new.arch)); } if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { /* Check for overlaps */ - r = -EEXIST; - kvm_for_each_memslot(slot, __kvm_memslots(kvm, as_id)) { - if (slot->id == id) + kvm_for_each_memslot(tmp, __kvm_memslots(kvm, as_id)) { + if (tmp->id == id) continue; - if (!((base_gfn + npages <= slot->base_gfn) || - (base_gfn >= slot->base_gfn + slot->npages))) - goto out; + if (!((new.base_gfn + new.npages <= tmp->base_gfn) || + (new.base_gfn >= tmp->base_gfn + tmp->npages))) + return -EEXIST; } } - /* Free page dirty bitmap if unneeded */ + /* Allocate/free page dirty bitmap as needed */ if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES)) new.dirty_bitmap = NULL; + else if (!new.dirty_bitmap) { + r = kvm_alloc_dirty_bitmap(&new); + if (r) + return r; - r = -ENOMEM; - if (change == KVM_MR_CREATE) { - new.userspace_addr = mem->userspace_addr; - - if (kvm_arch_create_memslot(kvm, &new, npages)) - goto out_free; - } - - /* Allocate page dirty bitmap if needed */ - if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { - if (kvm_create_dirty_bitmap(&new) < 0) - goto out_free; - } - - slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL_ACCOUNT); - if (!slots) - goto out_free; - memcpy(slots, __kvm_memslots(kvm, as_id), sizeof(struct kvm_memslots)); - - if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) { - slot = id_to_memslot(slots, id); - slot->flags |= KVM_MEMSLOT_INVALID; - - old_memslots = install_new_memslots(kvm, as_id, slots); - - /* From this point no new shadow pages pointing to a deleted, - * or moved, memslot will be created. - * - * validation of sp->gfn happens in: - * - gfn_to_hva (kvm_read_guest, gfn_to_pfn) - * - kvm_is_visible_gfn (mmu_check_root) - */ - kvm_arch_flush_shadow_memslot(kvm, slot); - - /* - * We can re-use the old_memslots from above, the only difference - * from the currently installed memslots is the invalid flag. This - * will get overwritten by update_memslots anyway. - */ - slots = old_memslots; + if (kvm_dirty_log_manual_protect_and_init_set(kvm)) + bitmap_set(new.dirty_bitmap, 0, new.npages); } - r = kvm_arch_prepare_memory_region(kvm, &new, mem, change); + r = kvm_set_memslot(kvm, mem, &old, &new, as_id, change); if (r) - goto out_slots; - - /* actual memory is freed via old in kvm_free_memslot below */ - if (change == KVM_MR_DELETE) { - new.dirty_bitmap = NULL; - memset(&new.arch, 0, sizeof(new.arch)); - } - - update_memslots(slots, &new, change); - old_memslots = install_new_memslots(kvm, as_id, slots); - - kvm_arch_commit_memory_region(kvm, mem, &old, &new, change); + goto out_bitmap; - kvm_free_memslot(kvm, &old, &new); - kvfree(old_memslots); + if (old.dirty_bitmap && !new.dirty_bitmap) + kvm_destroy_dirty_bitmap(&old); return 0; -out_slots: - kvfree(slots); -out_free: - kvm_free_memslot(kvm, &new, &old); -out: +out_bitmap: + if (new.dirty_bitmap && !old.dirty_bitmap) + kvm_destroy_dirty_bitmap(&new); return r; } EXPORT_SYMBOL_GPL(__kvm_set_memory_region); @@ -1175,31 +1333,43 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, return kvm_set_memory_region(kvm, mem); } -int kvm_get_dirty_log(struct kvm *kvm, - struct kvm_dirty_log *log, int *is_dirty) +#ifndef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT +/** + * kvm_get_dirty_log - get a snapshot of dirty pages + * @kvm: pointer to kvm instance + * @log: slot id and address to which we copy the log + * @is_dirty: set to '1' if any dirty pages were found + * @memslot: set to the associated memslot, always valid on success + */ +int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log, + int *is_dirty, struct kvm_memory_slot **memslot) { struct kvm_memslots *slots; - struct kvm_memory_slot *memslot; int i, as_id, id; unsigned long n; unsigned long any = 0; + *memslot = NULL; + *is_dirty = 0; + as_id = log->slot >> 16; id = (u16)log->slot; if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS) return -EINVAL; slots = __kvm_memslots(kvm, as_id); - memslot = id_to_memslot(slots, id); - if (!memslot->dirty_bitmap) + *memslot = id_to_memslot(slots, id); + if (!(*memslot) || !(*memslot)->dirty_bitmap) return -ENOENT; - n = kvm_dirty_bitmap_bytes(memslot); + kvm_arch_sync_dirty_log(kvm, *memslot); + + n = kvm_dirty_bitmap_bytes(*memslot); for (i = 0; !any && i < n/sizeof(long); ++i) - any = memslot->dirty_bitmap[i]; + any = (*memslot)->dirty_bitmap[i]; - if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) + if (copy_to_user(log->dirty_bitmap, (*memslot)->dirty_bitmap, n)) return -EFAULT; if (any) @@ -1208,13 +1378,12 @@ int kvm_get_dirty_log(struct kvm *kvm, } EXPORT_SYMBOL_GPL(kvm_get_dirty_log); -#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT +#else /* CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */ /** * kvm_get_dirty_log_protect - get a snapshot of dirty pages * and reenable dirty page tracking for the corresponding pages. * @kvm: pointer to kvm instance * @log: slot id and address to which we copy the log - * @flush: true if TLB flush is needed by caller * * We need to keep it in mind that VCPU threads can write to the bitmap * concurrently. So, to avoid losing track of dirty pages we keep the @@ -1231,8 +1400,7 @@ EXPORT_SYMBOL_GPL(kvm_get_dirty_log); * exiting to userspace will be logged for the next call. * */ -int kvm_get_dirty_log_protect(struct kvm *kvm, - struct kvm_dirty_log *log, bool *flush) +static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log) { struct kvm_memslots *slots; struct kvm_memory_slot *memslot; @@ -1240,6 +1408,7 @@ int kvm_get_dirty_log_protect(struct kvm *kvm, unsigned long n; unsigned long *dirty_bitmap; unsigned long *dirty_bitmap_buffer; + bool flush; as_id = log->slot >> 16; id = (u16)log->slot; @@ -1248,13 +1417,15 @@ int kvm_get_dirty_log_protect(struct kvm *kvm, slots = __kvm_memslots(kvm, as_id); memslot = id_to_memslot(slots, id); + if (!memslot || !memslot->dirty_bitmap) + return -ENOENT; dirty_bitmap = memslot->dirty_bitmap; - if (!dirty_bitmap) - return -ENOENT; + + kvm_arch_sync_dirty_log(kvm, memslot); n = kvm_dirty_bitmap_bytes(memslot); - *flush = false; + flush = false; if (kvm->manual_dirty_log_protect) { /* * Unlike kvm_get_dirty_log, we always return false in *flush, @@ -1277,7 +1448,7 @@ int kvm_get_dirty_log_protect(struct kvm *kvm, if (!dirty_bitmap[i]) continue; - *flush = true; + flush = true; mask = xchg(&dirty_bitmap[i], 0); dirty_bitmap_buffer[i] = mask; @@ -1288,21 +1459,55 @@ int kvm_get_dirty_log_protect(struct kvm *kvm, spin_unlock(&kvm->mmu_lock); } + if (flush) + kvm_arch_flush_remote_tlbs_memslot(kvm, memslot); + if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n)) return -EFAULT; return 0; } -EXPORT_SYMBOL_GPL(kvm_get_dirty_log_protect); + + +/** + * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot + * @kvm: kvm instance + * @log: slot id and address to which we copy the log + * + * Steps 1-4 below provide general overview of dirty page logging. See + * kvm_get_dirty_log_protect() function description for additional details. + * + * We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we + * always flush the TLB (step 4) even if previous step failed and the dirty + * bitmap may be corrupt. Regardless of previous outcome the KVM logging API + * does not preclude user space subsequent dirty log read. Flushing TLB ensures + * writes will be marked dirty for next log read. + * + * 1. Take a snapshot of the bit and clear it if needed. + * 2. Write protect the corresponding page. + * 3. Copy the snapshot to the userspace. + * 4. Flush TLB's if needed. + */ +static int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, + struct kvm_dirty_log *log) +{ + int r; + + mutex_lock(&kvm->slots_lock); + + r = kvm_get_dirty_log_protect(kvm, log); + + mutex_unlock(&kvm->slots_lock); + return r; +} /** * kvm_clear_dirty_log_protect - clear dirty bits in the bitmap * and reenable dirty page tracking for the corresponding pages. * @kvm: pointer to kvm instance * @log: slot id and address from which to fetch the bitmap of dirty pages - * @flush: true if TLB flush is needed by caller */ -int kvm_clear_dirty_log_protect(struct kvm *kvm, - struct kvm_clear_dirty_log *log, bool *flush) +static int kvm_clear_dirty_log_protect(struct kvm *kvm, + struct kvm_clear_dirty_log *log) { struct kvm_memslots *slots; struct kvm_memory_slot *memslot; @@ -1311,6 +1516,7 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm, unsigned long i, n; unsigned long *dirty_bitmap; unsigned long *dirty_bitmap_buffer; + bool flush; as_id = log->slot >> 16; id = (u16)log->slot; @@ -1322,10 +1528,10 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm, slots = __kvm_memslots(kvm, as_id); memslot = id_to_memslot(slots, id); + if (!memslot || !memslot->dirty_bitmap) + return -ENOENT; dirty_bitmap = memslot->dirty_bitmap; - if (!dirty_bitmap) - return -ENOENT; n = ALIGN(log->num_pages, BITS_PER_LONG) / 8; @@ -1334,7 +1540,9 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm, (log->num_pages < memslot->npages - log->first_page && (log->num_pages & 63))) return -EINVAL; - *flush = false; + kvm_arch_sync_dirty_log(kvm, memslot); + + flush = false; dirty_bitmap_buffer = kvm_second_dirty_bitmap(memslot); if (copy_from_user(dirty_bitmap_buffer, log->dirty_bitmap, n)) return -EFAULT; @@ -1357,28 +1565,32 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm, * a problem if userspace sets them in log->dirty_bitmap. */ if (mask) { - *flush = true; + flush = true; kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, offset, mask); } } spin_unlock(&kvm->mmu_lock); + if (flush) + kvm_arch_flush_remote_tlbs_memslot(kvm, memslot); + return 0; } -EXPORT_SYMBOL_GPL(kvm_clear_dirty_log_protect); -#endif -bool kvm_largepages_enabled(void) +static int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, + struct kvm_clear_dirty_log *log) { - return largepages_enabled; -} + int r; -void kvm_disable_largepages(void) -{ - largepages_enabled = false; + mutex_lock(&kvm->slots_lock); + + r = kvm_clear_dirty_log_protect(kvm, log); + + mutex_unlock(&kvm->slots_lock); + return r; } -EXPORT_SYMBOL_GPL(kvm_disable_largepages); +#endif /* CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) { @@ -1754,12 +1966,6 @@ kvm_pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn) } EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot_atomic); -kvm_pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn) -{ - return gfn_to_pfn_memslot_atomic(gfn_to_memslot(kvm, gfn), gfn); -} -EXPORT_SYMBOL_GPL(gfn_to_pfn_atomic); - kvm_pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn) { return gfn_to_pfn_memslot_atomic(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn); @@ -3310,9 +3516,6 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) case KVM_CAP_IOEVENTFD_ANY_LENGTH: case KVM_CAP_CHECK_EXTENSION_VM: case KVM_CAP_ENABLE_CAP_VM: -#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT - case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2: -#endif return 1; #ifdef CONFIG_KVM_MMIO case KVM_CAP_COALESCED_MMIO: @@ -3320,6 +3523,10 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) case KVM_CAP_COALESCED_PIO: return 1; #endif +#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT + case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2: + return KVM_DIRTY_LOG_MANUAL_CAPS; +#endif #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING case KVM_CAP_IRQ_ROUTING: return KVM_MAX_IRQ_ROUTES; @@ -3347,11 +3554,17 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm, { switch (cap->cap) { #ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT - case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2: - if (cap->flags || (cap->args[0] & ~1)) + case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2: { + u64 allowed_options = KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE; + + if (cap->args[0] & KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE) + allowed_options = KVM_DIRTY_LOG_MANUAL_CAPS; + + if (cap->flags || (cap->args[0] & ~allowed_options)) return -EINVAL; kvm->manual_dirty_log_protect = cap->args[0]; return 0; + } #endif default: return kvm_vm_ioctl_enable_cap(kvm, cap); |