summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/linux/kvm_dirty_ring.h103
-rw-r--r--include/linux/kvm_host.h13
-rw-r--r--include/trace/events/kvm.h63
-rw-r--r--include/uapi/linux/kvm.h53
4 files changed, 232 insertions, 0 deletions
diff --git a/include/linux/kvm_dirty_ring.h b/include/linux/kvm_dirty_ring.h
new file mode 100644
index 000000000000..120e5e90fa1d
--- /dev/null
+++ b/include/linux/kvm_dirty_ring.h
@@ -0,0 +1,103 @@
+#ifndef KVM_DIRTY_RING_H
+#define KVM_DIRTY_RING_H
+
+#include <linux/kvm.h>
+
+/**
+ * kvm_dirty_ring: KVM internal dirty ring structure
+ *
+ * @dirty_index: free running counter that points to the next slot in
+ * dirty_ring->dirty_gfns, where a new dirty page should go
+ * @reset_index: free running counter that points to the next dirty page
+ * in dirty_ring->dirty_gfns for which dirty trap needs to
+ * be reenabled
+ * @size: size of the compact list, dirty_ring->dirty_gfns
+ * @soft_limit: when the number of dirty pages in the list reaches this
+ * limit, vcpu that owns this ring should exit to userspace
+ * to allow userspace to harvest all the dirty pages
+ * @dirty_gfns: the array to keep the dirty gfns
+ * @index: index of this dirty ring
+ */
+struct kvm_dirty_ring {
+ u32 dirty_index;
+ u32 reset_index;
+ u32 size;
+ u32 soft_limit;
+ struct kvm_dirty_gfn *dirty_gfns;
+ int index;
+};
+
+#if (KVM_DIRTY_LOG_PAGE_OFFSET == 0)
+/*
+ * If KVM_DIRTY_LOG_PAGE_OFFSET not defined, kvm_dirty_ring.o should
+ * not be included as well, so define these nop functions for the arch.
+ */
+static inline u32 kvm_dirty_ring_get_rsvd_entries(void)
+{
+ return 0;
+}
+
+static inline int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring,
+ int index, u32 size)
+{
+ return 0;
+}
+
+static inline struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm)
+{
+ return NULL;
+}
+
+static inline int kvm_dirty_ring_reset(struct kvm *kvm,
+ struct kvm_dirty_ring *ring)
+{
+ return 0;
+}
+
+static inline void kvm_dirty_ring_push(struct kvm_dirty_ring *ring,
+ u32 slot, u64 offset)
+{
+}
+
+static inline struct page *kvm_dirty_ring_get_page(struct kvm_dirty_ring *ring,
+ u32 offset)
+{
+ return NULL;
+}
+
+static inline void kvm_dirty_ring_free(struct kvm_dirty_ring *ring)
+{
+}
+
+static inline bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring)
+{
+ return true;
+}
+
+#else /* KVM_DIRTY_LOG_PAGE_OFFSET == 0 */
+
+u32 kvm_dirty_ring_get_rsvd_entries(void);
+int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, int index, u32 size);
+struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm);
+
+/*
+ * called with kvm->slots_lock held, returns the number of
+ * processed pages.
+ */
+int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring);
+
+/*
+ * returns =0: successfully pushed
+ * <0: unable to push, need to wait
+ */
+void kvm_dirty_ring_push(struct kvm_dirty_ring *ring, u32 slot, u64 offset);
+
+/* for use in vm_operations_struct */
+struct page *kvm_dirty_ring_get_page(struct kvm_dirty_ring *ring, u32 offset);
+
+void kvm_dirty_ring_free(struct kvm_dirty_ring *ring);
+bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring);
+
+#endif /* KVM_DIRTY_LOG_PAGE_OFFSET == 0 */
+
+#endif /* KVM_DIRTY_RING_H */
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ca7c1459a8e3..864b156391c8 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -34,6 +34,7 @@
#include <linux/kvm_types.h>
#include <asm/kvm_host.h>
+#include <linux/kvm_dirty_ring.h>
#ifndef KVM_MAX_VCPU_ID
#define KVM_MAX_VCPU_ID KVM_MAX_VCPUS
@@ -319,6 +320,7 @@ struct kvm_vcpu {
bool preempted;
bool ready;
struct kvm_vcpu_arch arch;
+ struct kvm_dirty_ring dirty_ring;
};
static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
@@ -505,6 +507,7 @@ struct kvm {
struct srcu_struct irq_srcu;
pid_t userspace_pid;
unsigned int max_halt_poll_ns;
+ u32 dirty_ring_size;
};
#define kvm_err(fmt, ...) \
@@ -1477,4 +1480,14 @@ static inline void kvm_handle_signal_exit(struct kvm_vcpu *vcpu)
}
#endif /* CONFIG_KVM_XFER_TO_GUEST_WORK */
+/*
+ * This defines how many reserved entries we want to keep before we
+ * kick the vcpu to the userspace to avoid dirty ring full. This
+ * value can be tuned to higher if e.g. PML is enabled on the host.
+ */
+#define KVM_DIRTY_RING_RSVD_ENTRIES 64
+
+/* Max number of entries allowed for each kvm dirty ring */
+#define KVM_DIRTY_RING_MAX_ENTRIES 65536
+
#endif
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index 26cfb0fa8e7e..49d7d0fe29f6 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -399,6 +399,69 @@ TRACE_EVENT(kvm_halt_poll_ns,
#define trace_kvm_halt_poll_ns_shrink(vcpu_id, new, old) \
trace_kvm_halt_poll_ns(false, vcpu_id, new, old)
+TRACE_EVENT(kvm_dirty_ring_push,
+ TP_PROTO(struct kvm_dirty_ring *ring, u32 slot, u64 offset),
+ TP_ARGS(ring, slot, offset),
+
+ TP_STRUCT__entry(
+ __field(int, index)
+ __field(u32, dirty_index)
+ __field(u32, reset_index)
+ __field(u32, slot)
+ __field(u64, offset)
+ ),
+
+ TP_fast_assign(
+ __entry->index = ring->index;
+ __entry->dirty_index = ring->dirty_index;
+ __entry->reset_index = ring->reset_index;
+ __entry->slot = slot;
+ __entry->offset = offset;
+ ),
+
+ TP_printk("ring %d: dirty 0x%x reset 0x%x "
+ "slot %u offset 0x%llx (used %u)",
+ __entry->index, __entry->dirty_index,
+ __entry->reset_index, __entry->slot, __entry->offset,
+ __entry->dirty_index - __entry->reset_index)
+);
+
+TRACE_EVENT(kvm_dirty_ring_reset,
+ TP_PROTO(struct kvm_dirty_ring *ring),
+ TP_ARGS(ring),
+
+ TP_STRUCT__entry(
+ __field(int, index)
+ __field(u32, dirty_index)
+ __field(u32, reset_index)
+ ),
+
+ TP_fast_assign(
+ __entry->index = ring->index;
+ __entry->dirty_index = ring->dirty_index;
+ __entry->reset_index = ring->reset_index;
+ ),
+
+ TP_printk("ring %d: dirty 0x%x reset 0x%x (used %u)",
+ __entry->index, __entry->dirty_index, __entry->reset_index,
+ __entry->dirty_index - __entry->reset_index)
+);
+
+TRACE_EVENT(kvm_dirty_ring_exit,
+ TP_PROTO(struct kvm_vcpu *vcpu),
+ TP_ARGS(vcpu),
+
+ TP_STRUCT__entry(
+ __field(int, vcpu_id)
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu_id = vcpu->vcpu_id;
+ ),
+
+ TP_printk("vcpu %d", __entry->vcpu_id)
+);
+
#endif /* _TRACE_KVM_MAIN_H */
/* This part must be outside protection */
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 204afbe1240e..886802b8ffba 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -250,6 +250,7 @@ struct kvm_hyperv_exit {
#define KVM_EXIT_ARM_NISV 28
#define KVM_EXIT_X86_RDMSR 29
#define KVM_EXIT_X86_WRMSR 30
+#define KVM_EXIT_DIRTY_RING_FULL 31
/* For KVM_EXIT_INTERNAL_ERROR */
/* Emulate instruction failed. */
@@ -1054,6 +1055,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_X86_MSR_FILTER 189
#define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190
#define KVM_CAP_SYS_HYPERV_CPUID 191
+#define KVM_CAP_DIRTY_LOG_RING 192
#ifdef KVM_CAP_IRQ_ROUTING
@@ -1558,6 +1560,9 @@ struct kvm_pv_cmd {
/* Available with KVM_CAP_X86_MSR_FILTER */
#define KVM_X86_SET_MSR_FILTER _IOW(KVMIO, 0xc6, struct kvm_msr_filter)
+/* Available with KVM_CAP_DIRTY_LOG_RING */
+#define KVM_RESET_DIRTY_RINGS _IO(KVMIO, 0xc7)
+
/* Secure Encrypted Virtualization command */
enum sev_cmd_id {
/* Guest initialization commands */
@@ -1711,4 +1716,52 @@ struct kvm_hyperv_eventfd {
#define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0)
#define KVM_DIRTY_LOG_INITIALLY_SET (1 << 1)
+/*
+ * Arch needs to define the macro after implementing the dirty ring
+ * feature. KVM_DIRTY_LOG_PAGE_OFFSET should be defined as the
+ * starting page offset of the dirty ring structures.
+ */
+#ifndef KVM_DIRTY_LOG_PAGE_OFFSET
+#define KVM_DIRTY_LOG_PAGE_OFFSET 0
+#endif
+
+/*
+ * KVM dirty GFN flags, defined as:
+ *
+ * |---------------+---------------+--------------|
+ * | bit 1 (reset) | bit 0 (dirty) | Status |
+ * |---------------+---------------+--------------|
+ * | 0 | 0 | Invalid GFN |
+ * | 0 | 1 | Dirty GFN |
+ * | 1 | X | GFN to reset |
+ * |---------------+---------------+--------------|
+ *
+ * Lifecycle of a dirty GFN goes like:
+ *
+ * dirtied harvested reset
+ * 00 -----------> 01 -------------> 1X -------+
+ * ^ |
+ * | |
+ * +------------------------------------------+
+ *
+ * The userspace program is only responsible for the 01->1X state
+ * conversion after harvesting an entry. Also, it must not skip any
+ * dirty bits, so that dirty bits are always harvested in sequence.
+ */
+#define KVM_DIRTY_GFN_F_DIRTY BIT(0)
+#define KVM_DIRTY_GFN_F_RESET BIT(1)
+#define KVM_DIRTY_GFN_F_MASK 0x3
+
+/*
+ * KVM dirty rings should be mapped at KVM_DIRTY_LOG_PAGE_OFFSET of
+ * per-vcpu mmaped regions as an array of struct kvm_dirty_gfn. The
+ * size of the gfn buffer is decided by the first argument when
+ * enabling KVM_CAP_DIRTY_LOG_RING.
+ */
+struct kvm_dirty_gfn {
+ __u32 flags;
+ __u32 slot;
+ __u64 offset;
+};
+
#endif /* __LINUX_KVM_H */