summaryrefslogtreecommitdiffstats
path: root/virt/kvm/kvm_main.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-12-26 20:46:28 +0100
committerLinus Torvalds <torvalds@linux-foundation.org>2018-12-26 20:46:28 +0100
commit42b00f122cfbfed79fc29b0b3610f3abbb1e3864 (patch)
treec110a18c03e9ada45b6f3593843f0a06b36773dc /virt/kvm/kvm_main.c
parentMerge tag 'for-linus-4.21-rc1-tag' of git://git.kernel.org/pub/scm/linux/kern... (diff)
parentKVM: x86: Add CPUID support for new instruction WBNOINVD (diff)
downloadlinux-42b00f122cfbfed79fc29b0b3610f3abbb1e3864.tar.xz
linux-42b00f122cfbfed79fc29b0b3610f3abbb1e3864.zip
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini: "ARM: - selftests improvements - large PUD support for HugeTLB - single-stepping fixes - improved tracing - various timer and vGIC fixes x86: - Processor Tracing virtualization - STIBP support - some correctness fixes - refactorings and splitting of vmx.c - use the Hyper-V range TLB flush hypercall - reduce order of vcpu struct - WBNOINVD support - do not use -ftrace for __noclone functions - nested guest support for PAUSE filtering on AMD - more Hyper-V enlightenments (direct mode for synthetic timers) PPC: - nested VFIO s390: - bugfixes only this time" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (171 commits) KVM: x86: Add CPUID support for new instruction WBNOINVD kvm: selftests: ucall: fix exit mmio address guessing Revert "compiler-gcc: disable -ftracer for __noclone functions" KVM: VMX: Move VM-Enter + VM-Exit handling to non-inline sub-routines KVM: VMX: Explicitly reference RCX as the vmx_vcpu pointer in asm blobs KVM: x86: Use jmp to invoke kvm_spurious_fault() from .fixup MAINTAINERS: Add arch/x86/kvm sub-directories to existing KVM/x86 entry KVM/x86: Use SVM assembly instruction mnemonics instead of .byte streams KVM/MMU: Flush tlb directly in the kvm_zap_gfn_range() KVM/MMU: Flush tlb directly in kvm_set_pte_rmapp() KVM/MMU: Move tlb flush in kvm_set_pte_rmapp() to kvm_mmu_notifier_change_pte() KVM: Make kvm_set_spte_hva() return int KVM: Replace old tlb flush function with new one to flush a specified range. KVM/MMU: Add tlb flush with range helper function KVM/VMX: Add hv tlb range flush support x86/hyper-v: Add HvFlushGuestAddressList hypercall support KVM: Add tlb_remote_flush_with_range callback in kvm_x86_ops KVM: x86: Disable Intel PT when VMXON in L1 guest KVM: x86: Set intercept for Intel PT MSRs read/write KVM: x86: Implement Intel PT MSRs read/write emulation ...
Diffstat (limited to 'virt/kvm/kvm_main.c')
-rw-r--r--virt/kvm/kvm_main.c208
1 files changed, 168 insertions, 40 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 2679e476b6c3..cf7cc0554094 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -354,7 +354,10 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
idx = srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock);
kvm->mmu_notifier_seq++;
- kvm_set_spte_hva(kvm, address, pte);
+
+ if (kvm_set_spte_hva(kvm, address, pte))
+ kvm_flush_remote_tlbs(kvm);
+
spin_unlock(&kvm->mmu_lock);
srcu_read_unlock(&kvm->srcu, idx);
}
@@ -1133,7 +1136,7 @@ EXPORT_SYMBOL_GPL(kvm_get_dirty_log);
#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
/**
* kvm_get_dirty_log_protect - get a snapshot of dirty pages, and if any pages
- * are dirty write protect them for next write.
+ * and reenable dirty page tracking for the corresponding pages.
* @kvm: pointer to kvm instance
* @log: slot id and address to which we copy the log
* @is_dirty: flag set if any page is dirty
@@ -1154,7 +1157,7 @@ EXPORT_SYMBOL_GPL(kvm_get_dirty_log);
*
*/
int kvm_get_dirty_log_protect(struct kvm *kvm,
- struct kvm_dirty_log *log, bool *is_dirty)
+ struct kvm_dirty_log *log, bool *flush)
{
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
@@ -1176,37 +1179,114 @@ int kvm_get_dirty_log_protect(struct kvm *kvm,
return -ENOENT;
n = kvm_dirty_bitmap_bytes(memslot);
+ *flush = false;
+ if (kvm->manual_dirty_log_protect) {
+ /*
+ * Unlike kvm_get_dirty_log, we always return false in *flush,
+ * because no flush is needed until KVM_CLEAR_DIRTY_LOG. There
+ * is some code duplication between this function and
+ * kvm_get_dirty_log, but hopefully all architecture
+ * transition to kvm_get_dirty_log_protect and kvm_get_dirty_log
+ * can be eliminated.
+ */
+ dirty_bitmap_buffer = dirty_bitmap;
+ } else {
+ dirty_bitmap_buffer = kvm_second_dirty_bitmap(memslot);
+ memset(dirty_bitmap_buffer, 0, n);
+
+ spin_lock(&kvm->mmu_lock);
+ for (i = 0; i < n / sizeof(long); i++) {
+ unsigned long mask;
+ gfn_t offset;
+
+ if (!dirty_bitmap[i])
+ continue;
+
+ *flush = true;
+ mask = xchg(&dirty_bitmap[i], 0);
+ dirty_bitmap_buffer[i] = mask;
+
+ if (mask) {
+ offset = i * BITS_PER_LONG;
+ kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot,
+ offset, mask);
+ }
+ }
+ spin_unlock(&kvm->mmu_lock);
+ }
+
+ if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
+ return -EFAULT;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_get_dirty_log_protect);
+/**
+ * kvm_clear_dirty_log_protect - clear dirty bits in the bitmap
+ * and reenable dirty page tracking for the corresponding pages.
+ * @kvm: pointer to kvm instance
+ * @log: slot id and address from which to fetch the bitmap of dirty pages
+ */
+int kvm_clear_dirty_log_protect(struct kvm *kvm,
+ struct kvm_clear_dirty_log *log, bool *flush)
+{
+ struct kvm_memslots *slots;
+ struct kvm_memory_slot *memslot;
+ int as_id, id, n;
+ gfn_t offset;
+ unsigned long i;
+ unsigned long *dirty_bitmap;
+ unsigned long *dirty_bitmap_buffer;
+
+ as_id = log->slot >> 16;
+ id = (u16)log->slot;
+ if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS)
+ return -EINVAL;
+
+ if ((log->first_page & 63) || (log->num_pages & 63))
+ return -EINVAL;
+
+ slots = __kvm_memslots(kvm, as_id);
+ memslot = id_to_memslot(slots, id);
+
+ dirty_bitmap = memslot->dirty_bitmap;
+ if (!dirty_bitmap)
+ return -ENOENT;
+
+ n = kvm_dirty_bitmap_bytes(memslot);
+ *flush = false;
dirty_bitmap_buffer = kvm_second_dirty_bitmap(memslot);
- memset(dirty_bitmap_buffer, 0, n);
+ if (copy_from_user(dirty_bitmap_buffer, log->dirty_bitmap, n))
+ return -EFAULT;
spin_lock(&kvm->mmu_lock);
- *is_dirty = false;
- for (i = 0; i < n / sizeof(long); i++) {
- unsigned long mask;
- gfn_t offset;
-
- if (!dirty_bitmap[i])
+ for (offset = log->first_page,
+ i = offset / BITS_PER_LONG, n = log->num_pages / BITS_PER_LONG; n--;
+ i++, offset += BITS_PER_LONG) {
+ unsigned long mask = *dirty_bitmap_buffer++;
+ atomic_long_t *p = (atomic_long_t *) &dirty_bitmap[i];
+ if (!mask)
continue;
- *is_dirty = true;
-
- mask = xchg(&dirty_bitmap[i], 0);
- dirty_bitmap_buffer[i] = mask;
+ mask &= atomic_long_fetch_andnot(mask, p);
+ /*
+ * mask contains the bits that really have been cleared. This
+ * never includes any bits beyond the length of the memslot (if
+ * the length is not aligned to 64 pages), therefore it is not
+ * a problem if userspace sets them in log->dirty_bitmap.
+ */
if (mask) {
- offset = i * BITS_PER_LONG;
+ *flush = true;
kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot,
offset, mask);
}
}
-
spin_unlock(&kvm->mmu_lock);
- if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
- return -EFAULT;
+
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_get_dirty_log_protect);
+EXPORT_SYMBOL_GPL(kvm_clear_dirty_log_protect);
#endif
bool kvm_largepages_enabled(void)
@@ -1928,32 +2008,33 @@ static int __kvm_gfn_to_hva_cache_init(struct kvm_memslots *slots,
gfn_t end_gfn = (gpa + len - 1) >> PAGE_SHIFT;
gfn_t nr_pages_needed = end_gfn - start_gfn + 1;
gfn_t nr_pages_avail;
+ int r = start_gfn <= end_gfn ? 0 : -EINVAL;
ghc->gpa = gpa;
ghc->generation = slots->generation;
ghc->len = len;
- ghc->memslot = __gfn_to_memslot(slots, start_gfn);
- ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, NULL);
- if (!kvm_is_error_hva(ghc->hva) && nr_pages_needed <= 1) {
+ ghc->hva = KVM_HVA_ERR_BAD;
+
+ /*
+ * If the requested region crosses two memslots, we still
+ * verify that the entire region is valid here.
+ */
+ while (!r && start_gfn <= end_gfn) {
+ ghc->memslot = __gfn_to_memslot(slots, start_gfn);
+ ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn,
+ &nr_pages_avail);
+ if (kvm_is_error_hva(ghc->hva))
+ r = -EFAULT;
+ start_gfn += nr_pages_avail;
+ }
+
+ /* Use the slow path for cross page reads and writes. */
+ if (!r && nr_pages_needed == 1)
ghc->hva += offset;
- } else {
- /*
- * If the requested region crosses two memslots, we still
- * verify that the entire region is valid here.
- */
- while (start_gfn <= end_gfn) {
- nr_pages_avail = 0;
- ghc->memslot = __gfn_to_memslot(slots, start_gfn);
- ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn,
- &nr_pages_avail);
- if (kvm_is_error_hva(ghc->hva))
- return -EFAULT;
- start_gfn += nr_pages_avail;
- }
- /* Use the slow path for cross page reads and writes. */
+ else
ghc->memslot = NULL;
- }
- return 0;
+
+ return r;
}
int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
@@ -1965,7 +2046,8 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
EXPORT_SYMBOL_GPL(kvm_gfn_to_hva_cache_init);
int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
- void *data, int offset, unsigned long len)
+ void *data, unsigned int offset,
+ unsigned long len)
{
struct kvm_memslots *slots = kvm_memslots(kvm);
int r;
@@ -2948,6 +3030,10 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
#endif
case KVM_CAP_IOEVENTFD_ANY_LENGTH:
case KVM_CAP_CHECK_EXTENSION_VM:
+ case KVM_CAP_ENABLE_CAP_VM:
+#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
+ case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT:
+#endif
return 1;
#ifdef CONFIG_KVM_MMIO
case KVM_CAP_COALESCED_MMIO:
@@ -2971,6 +3057,28 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
return kvm_vm_ioctl_check_extension(kvm, arg);
}
+int __attribute__((weak)) kvm_vm_ioctl_enable_cap(struct kvm *kvm,
+ struct kvm_enable_cap *cap)
+{
+ return -EINVAL;
+}
+
+static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm,
+ struct kvm_enable_cap *cap)
+{
+ switch (cap->cap) {
+#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
+ case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT:
+ if (cap->flags || (cap->args[0] & ~1))
+ return -EINVAL;
+ kvm->manual_dirty_log_protect = cap->args[0];
+ return 0;
+#endif
+ default:
+ return kvm_vm_ioctl_enable_cap(kvm, cap);
+ }
+}
+
static long kvm_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -2984,6 +3092,15 @@ static long kvm_vm_ioctl(struct file *filp,
case KVM_CREATE_VCPU:
r = kvm_vm_ioctl_create_vcpu(kvm, arg);
break;
+ case KVM_ENABLE_CAP: {
+ struct kvm_enable_cap cap;
+
+ r = -EFAULT;
+ if (copy_from_user(&cap, argp, sizeof(cap)))
+ goto out;
+ r = kvm_vm_ioctl_enable_cap_generic(kvm, &cap);
+ break;
+ }
case KVM_SET_USER_MEMORY_REGION: {
struct kvm_userspace_memory_region kvm_userspace_mem;
@@ -3004,6 +3121,17 @@ static long kvm_vm_ioctl(struct file *filp,
r = kvm_vm_ioctl_get_dirty_log(kvm, &log);
break;
}
+#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
+ case KVM_CLEAR_DIRTY_LOG: {
+ struct kvm_clear_dirty_log log;
+
+ r = -EFAULT;
+ if (copy_from_user(&log, argp, sizeof(log)))
+ goto out;
+ r = kvm_vm_ioctl_clear_dirty_log(kvm, &log);
+ break;
+ }
+#endif
#ifdef CONFIG_KVM_MMIO
case KVM_REGISTER_COALESCED_MMIO: {
struct kvm_coalesced_mmio_zone zone;