summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/msm/adreno
diff options
context:
space:
mode:
authorRob Clark <robdclark@chromium.org>2021-04-28 21:36:49 +0200
committerRob Clark <robdclark@chromium.org>2021-08-07 20:48:37 +0200
commit0710a740dc211ea4f94f5d2b887d5efafd524193 (patch)
tree576b0f36275defaf51007d2bdda31e4b40832831 /drivers/gpu/drm/msm/adreno
parentdrm/msm: Implement mmap as GEM object function (diff)
downloadlinux-0710a740dc211ea4f94f5d2b887d5efafd524193.tar.xz
linux-0710a740dc211ea4f94f5d2b887d5efafd524193.zip
drm/msm: Periodically update RPTR shadow
On a5xx and a6xx devices that are using CP_WHERE_AM_I to update a ringbuffer read-ptr shadow value, periodically emit a CP_WHERE_AM_I every 32 commands, so that a later submit waiting for ringbuffer space to become available sees partial progress, rather than not seeing rptr advance at all until the GPU gets to the end of the submit that it is currently chewing on. Signed-off-by: Rob Clark <robdclark@chromium.org> Acked-by: Jordan Crouse <jordan@cosmicpenguin.net> Link: https://lore.kernel.org/r/20210428193654.1498482-3-robdclark@gmail.com Signed-off-by: Rob Clark <robdclark@chromium.org>
Diffstat (limited to 'drivers/gpu/drm/msm/adreno')
-rw-r--r--drivers/gpu/drm/msm/adreno/a5xx_gpu.c29
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gpu.c27
2 files changed, 45 insertions, 11 deletions
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index 0a93ed1d6b06..5e2750eb3810 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -18,6 +18,18 @@ static void a5xx_dump(struct msm_gpu *gpu);
#define GPU_PAS_ID 13
+static void update_shadow_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
+{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
+
+ if (a5xx_gpu->has_whereami) {
+ OUT_PKT7(ring, CP_WHERE_AM_I, 2);
+ OUT_RING(ring, lower_32_bits(shadowptr(a5xx_gpu, ring)));
+ OUT_RING(ring, upper_32_bits(shadowptr(a5xx_gpu, ring)));
+ }
+}
+
void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
bool sync)
{
@@ -30,11 +42,8 @@ void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
* Most flush operations need to issue a WHERE_AM_I opcode to sync up
* the rptr shadow
*/
- if (a5xx_gpu->has_whereami && sync) {
- OUT_PKT7(ring, CP_WHERE_AM_I, 2);
- OUT_RING(ring, lower_32_bits(shadowptr(a5xx_gpu, ring)));
- OUT_RING(ring, upper_32_bits(shadowptr(a5xx_gpu, ring)));
- }
+ if (sync)
+ update_shadow_rptr(gpu, ring);
spin_lock_irqsave(&ring->preempt_lock, flags);
@@ -168,6 +177,16 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
ibs++;
break;
}
+
+ /*
+ * Periodically update shadow-wptr if needed, so that we
+ * can see partial progress of submits with large # of
+ * cmds.. otherwise we could needlessly stall waiting for
+ * ringbuffer state, simply due to looking at a shadow
+ * rptr value that has not been updated
+ */
+ if ((ibs % 32) == 0)
+ update_shadow_rptr(gpu, ring);
}
/*
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 6ddd9010cc4b..c19d2ae59de8 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -52,21 +52,25 @@ static bool a6xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
return true;
}
-static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
+static void update_shadow_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
- uint32_t wptr;
- unsigned long flags;
/* Expanded APRIV doesn't need to issue the WHERE_AM_I opcode */
if (a6xx_gpu->has_whereami && !adreno_gpu->base.hw_apriv) {
- struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
-
OUT_PKT7(ring, CP_WHERE_AM_I, 2);
OUT_RING(ring, lower_32_bits(shadowptr(a6xx_gpu, ring)));
OUT_RING(ring, upper_32_bits(shadowptr(a6xx_gpu, ring)));
}
+}
+
+static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
+{
+ uint32_t wptr;
+ unsigned long flags;
+
+ update_shadow_rptr(gpu, ring);
spin_lock_irqsave(&ring->preempt_lock, flags);
@@ -145,7 +149,7 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
struct msm_ringbuffer *ring = submit->ring;
- unsigned int i;
+ unsigned int i, ibs = 0;
a6xx_set_pagetable(a6xx_gpu, ring, submit->queue->ctx);
@@ -181,8 +185,19 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
OUT_RING(ring, submit->cmd[i].size);
+ ibs++;
break;
}
+
+ /*
+ * Periodically update shadow-wptr if needed, so that we
+ * can see partial progress of submits with large # of
+ * cmds.. otherwise we could needlessly stall waiting for
+ * ringbuffer state, simply due to looking at a shadow
+ * rptr value that has not been updated
+ */
+ if ((ibs % 32) == 0)
+ update_shadow_rptr(gpu, ring);
}
get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0),