gpu: host1x: Optimize CDMA push buffer memory usage

The host1x CDMA push buffer is terminated by a special opcode (RESTART) that tells the CDMA to wrap around to the beginning of the push buffer. To accomodate the RESTART opcode, an extra 4 bytes are allocated on top of the 512 * 8 = 4096 bytes needed for the 512 slots (1 slot = 2 words) that are used for other commands passed to CDMA. This requires that two memory pages are allocated, but most of the second page (4092 bytes) is never used. Decrease the number of slots to 511 so that the RESTART opcode fits within the page. Adjust the push buffer wraparound code to take into account push buffer sizes that are not a power of two. Signed-off-by: Thierry Reding <treding@nvidia.com>
author: Thierry Reding <treding@nvidia.com> 2019-02-01 14:28:30 +0100
committer: Thierry Reding <treding@nvidia.com> 2019-02-07 18:28:59 +0100
commit: e1f338c0f8a9aacf351e42e5cfd0639fc73dc5b9 (patch)
tree: 5f68c4e9a35909349f67f38ac8e9a9ed82fc6ff6 /drivers/gpu/host1x
parent: gpu: host1x: Use correct semantics for HOST1X_CHANNEL_DMAEND (diff)
download: linux-e1f338c0f8a9aacf351e42e5cfd0639fc73dc5b9.tar.xz
linux-e1f338c0f8a9aacf351e42e5cfd0639fc73dc5b9.zip
1 files changed, 25 insertions, 4 deletions
diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c
index 64099cc1964b..07df85b92ebf 100644
--- a/drivers/gpu/host1x/cdma.c
+++ b/drivers/gpu/host1x/cdma.c
@@ -41,7 +41,17 @@
  * means that the push buffer is full, not empty.
  */
 
-#define HOST1X_PUSHBUFFER_SLOTS	512
+/*
+ * Typically the commands written into the push buffer are a pair of words. We
+ * use slots to represent each of these pairs and to simplify things. Note the
+ * strange number of slots allocated here. 512 slots will fit exactly within a
+ * single memory page. We also need one additional word at the end of the push
+ * buffer for the RESTART opcode that will instruct the CDMA to jump back to
+ * the beginning of the push buffer. With 512 slots, this means that we'll use
+ * 2 memory pages and waste 4092 bytes of the second page that will never be
+ * used.
+ */
+#define HOST1X_PUSHBUFFER_SLOTS	511
 
 /*
  * Clean up push buffer resources
@@ -143,7 +153,10 @@ static void host1x_pushbuffer_push(struct push_buffer *pb, u32 op1, u32 op2)
 	WARN_ON(pb->pos == pb->fence);
 	*(p++) = op1;
 	*(p++) = op2;
-	pb->pos = (pb->pos + 8) & (pb->size - 1);
+	pb->pos += 8;
+
+	if (pb->pos >= pb->size)
+		pb->pos -= pb->size;
 }
 
 /*
@@ -153,7 +166,10 @@ static void host1x_pushbuffer_push(struct push_buffer *pb, u32 op1, u32 op2)
 static void host1x_pushbuffer_pop(struct push_buffer *pb, unsigned int slots)
 {
 	/* Advance the next write position */
-	pb->fence = (pb->fence + slots * 8) & (pb->size - 1);
+	pb->fence += slots * 8;
+
+	if (pb->fence >= pb->size)
+		pb->fence -= pb->size;
 }
 
 /*
@@ -161,7 +177,12 @@ static void host1x_pushbuffer_pop(struct push_buffer *pb, unsigned int slots)
  */
 static u32 host1x_pushbuffer_space(struct push_buffer *pb)
 {
-	return ((pb->fence - pb->pos) & (pb->size - 1)) / 8;
+	unsigned int fence = pb->fence;
+
+	if (pb->fence < pb->pos)
+		fence += pb->size;
+
+	return (fence - pb->pos) / 8;
 }
 
 /*
author	Thierry Reding <treding@nvidia.com>	2019-02-01 14:28:30 +0100
committer	Thierry Reding <treding@nvidia.com>	2019-02-07 18:28:59 +0100
commit	e1f338c0f8a9aacf351e42e5cfd0639fc73dc5b9 (patch)
tree	5f68c4e9a35909349f67f38ac8e9a9ed82fc6ff6 /drivers/gpu/host1x
parent	gpu: host1x: Use correct semantics for HOST1X_CHANNEL_DMAEND (diff)
download	linux-e1f338c0f8a9aacf351e42e5cfd0639fc73dc5b9.tar.xz linux-e1f338c0f8a9aacf351e42e5cfd0639fc73dc5b9.zip