vhost: log dirty page correctly

Vhost dirty page logging API is designed to sync through GPA. But we try to log GIOVA when device IOTLB is enabled. This is wrong and may lead to missing data after migration. To solve this issue, when logging with device IOTLB enabled, we will: 1) reuse the device IOTLB translation result of GIOVA->HVA mapping to get HVA, for writable descriptor, get HVA through iovec. For used ring update, translate its GIOVA to HVA 2) traverse the GPA->HVA mapping to get the possible GPA and log through GPA. Pay attention this reverse mapping is not guaranteed to be unique, so we should log each possible GPA in this case. This fix the failure of scp to guest during migration. In -next, we will probably support passing GIOVA->GPA instead of GIOVA->HVA. Fixes: 6b1e6cc7855b ("vhost: new device IOTLB API") Reported-by: Jintack Lim <jintack@cs.columbia.edu> Cc: Jintack Lim <jintack@cs.columbia.edu> Signed-off-by: Jason Wang <jasowang@redhat.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
author: Jason Wang <jasowang@redhat.com> 2019-01-16 09:54:42 +0100
committer: David S. Miller <davem@davemloft.net> 2019-01-18 06:43:24 +0100
commit: cc5e710759470bc7f3c61d11fd54586f15fdbdf4 (patch)
tree: c636103033e16eb5d9218b7c7cdbd98a89c4077c /drivers/vhost/vhost.c
parent: Documentation: timestamping: correct path to net_tstamp.h (diff)
download: linux-cc5e710759470bc7f3c61d11fd54586f15fdbdf4.tar.xz
linux-cc5e710759470bc7f3c61d11fd54586f15fdbdf4.zip
1 files changed, 83 insertions, 14 deletions
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 9f7942cbcbb2..babbb32b9bf0 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -1733,13 +1733,87 @@ static int log_write(void __user *log_base,
 	return r;
 }
 
+static int log_write_hva(struct vhost_virtqueue *vq, u64 hva, u64 len)
+{
+	struct vhost_umem *umem = vq->umem;
+	struct vhost_umem_node *u;
+	u64 start, end, l, min;
+	int r;
+	bool hit = false;
+
+	while (len) {
+		min = len;
+		/* More than one GPAs can be mapped into a single HVA. So
+		 * iterate all possible umems here to be safe.
+		 */
+		list_for_each_entry(u, &umem->umem_list, link) {
+			if (u->userspace_addr > hva - 1 + len ||
+			    u->userspace_addr - 1 + u->size < hva)
+				continue;
+			start = max(u->userspace_addr, hva);
+			end = min(u->userspace_addr - 1 + u->size,
+				  hva - 1 + len);
+			l = end - start + 1;
+			r = log_write(vq->log_base,
+				      u->start + start - u->userspace_addr,
+				      l);
+			if (r < 0)
+				return r;
+			hit = true;
+			min = min(l, min);
+		}
+
+		if (!hit)
+			return -EFAULT;
+
+		len -= min;
+		hva += min;
+	}
+
+	return 0;
+}
+
+static int log_used(struct vhost_virtqueue *vq, u64 used_offset, u64 len)
+{
+	struct iovec iov[64];
+	int i, ret;
+
+	if (!vq->iotlb)
+		return log_write(vq->log_base, vq->log_addr + used_offset, len);
+
+	ret = translate_desc(vq, (uintptr_t)vq->used + used_offset,
+			     len, iov, 64, VHOST_ACCESS_WO);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < ret; i++) {
+		ret = log_write_hva(vq,	(uintptr_t)iov[i].iov_base,
+				    iov[i].iov_len);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
-		    unsigned int log_num, u64 len)
+		    unsigned int log_num, u64 len, struct iovec *iov, int count)
 {
 	int i, r;
 
 	/* Make sure data written is seen before log. */
 	smp_wmb();
+
+	if (vq->iotlb) {
+		for (i = 0; i < count; i++) {
+			r = log_write_hva(vq, (uintptr_t)iov[i].iov_base,
+					  iov[i].iov_len);
+			if (r < 0)
+				return r;
+		}
+		return 0;
+	}
+
 	for (i = 0; i < log_num; ++i) {
 		u64 l = min(log[i].len, len);
 		r = log_write(vq->log_base, log[i].addr, l);
@@ -1769,9 +1843,8 @@ static int vhost_update_used_flags(struct vhost_virtqueue *vq)
 		smp_wmb();
 		/* Log used flag write. */
 		used = &vq->used->flags;
-		log_write(vq->log_base, vq->log_addr +
-			  (used - (void __user *)vq->used),
-			  sizeof vq->used->flags);
+		log_used(vq, (used - (void __user *)vq->used),
+			 sizeof vq->used->flags);
 		if (vq->log_ctx)
 			eventfd_signal(vq->log_ctx, 1);
 	}
@@ -1789,9 +1862,8 @@ static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event)
 		smp_wmb();
 		/* Log avail event write */
 		used = vhost_avail_event(vq);
-		log_write(vq->log_base, vq->log_addr +
-			  (used - (void __user *)vq->used),
-			  sizeof *vhost_avail_event(vq));
+		log_used(vq, (used - (void __user *)vq->used),
+			 sizeof *vhost_avail_event(vq));
 		if (vq->log_ctx)
 			eventfd_signal(vq->log_ctx, 1);
 	}
@@ -2191,10 +2263,8 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq,
 		/* Make sure data is seen before log. */
 		smp_wmb();
 		/* Log used ring entry write. */
-		log_write(vq->log_base,
-			  vq->log_addr +
-			   ((void __user *)used - (void __user *)vq->used),
-			  count * sizeof *used);
+		log_used(vq, ((void __user *)used - (void __user *)vq->used),
+			 count * sizeof *used);
 	}
 	old = vq->last_used_idx;
 	new = (vq->last_used_idx += count);
@@ -2236,9 +2306,8 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
 		/* Make sure used idx is seen before log. */
 		smp_wmb();
 		/* Log used index update. */
-		log_write(vq->log_base,
-			  vq->log_addr + offsetof(struct vring_used, idx),
-			  sizeof vq->used->idx);
+		log_used(vq, offsetof(struct vring_used, idx),
+			 sizeof vq->used->idx);
 		if (vq->log_ctx)
 			eventfd_signal(vq->log_ctx, 1);
 	}
author	Jason Wang <jasowang@redhat.com>	2019-01-16 09:54:42 +0100
committer	David S. Miller <davem@davemloft.net>	2019-01-18 06:43:24 +0100
commit	cc5e710759470bc7f3c61d11fd54586f15fdbdf4 (patch)
tree	c636103033e16eb5d9218b7c7cdbd98a89c4077c /drivers/vhost/vhost.c
parent	Documentation: timestamping: correct path to net_tstamp.h (diff)
download	linux-cc5e710759470bc7f3c61d11fd54586f15fdbdf4.tar.xz linux-cc5e710759470bc7f3c61d11fd54586f15fdbdf4.zip