summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRob Clark <robdclark@chromium.org>2023-11-17 16:24:28 +0100
committerRob Clark <robdclark@chromium.org>2023-11-21 02:15:19 +0100
commit12578c075f89d6bd1b8af21751fbc2e1f78d2ce0 (patch)
tree3e16ed17f0531ad0577ae5d52bbbca98e7469446
parentdrm/msm: Reduce fallout of fence signaling vs reclaim hangs (diff)
downloadlinux-12578c075f89d6bd1b8af21751fbc2e1f78d2ce0.tar.xz
linux-12578c075f89d6bd1b8af21751fbc2e1f78d2ce0.zip
drm/msm/gpu: Skip retired submits in recover worker
If we somehow raced with submit retiring, either while waiting for worker to have a chance to run or acquiring the gpu lock, then the recover worker should just bail. Signed-off-by: Rob Clark <robdclark@chromium.org> Patchwork: https://patchwork.freedesktop.org/patch/568034/
-rw-r--r--drivers/gpu/drm/msm/msm_gpu.c41
1 files changed, 22 insertions, 19 deletions
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 2b7c9db3ded3..095390774f22 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -365,29 +365,31 @@ static void recover_worker(struct kthread_work *work)
DRM_DEV_ERROR(dev->dev, "%s: hangcheck recover!\n", gpu->name);
submit = find_submit(cur_ring, cur_ring->memptrs->fence + 1);
- if (submit) {
- /* Increment the fault counts */
- submit->queue->faults++;
- if (submit->aspace)
- submit->aspace->faults++;
- get_comm_cmdline(submit, &comm, &cmd);
+ /*
+ * If the submit retired while we were waiting for the worker to run,
+ * or waiting to acquire the gpu lock, then nothing more to do.
+ */
+ if (!submit)
+ goto out_unlock;
- if (comm && cmd) {
- DRM_DEV_ERROR(dev->dev, "%s: offending task: %s (%s)\n",
- gpu->name, comm, cmd);
+ /* Increment the fault counts */
+ submit->queue->faults++;
+ if (submit->aspace)
+ submit->aspace->faults++;
- msm_rd_dump_submit(priv->hangrd, submit,
- "offending task: %s (%s)", comm, cmd);
- } else {
- msm_rd_dump_submit(priv->hangrd, submit, NULL);
- }
+ get_comm_cmdline(submit, &comm, &cmd);
+
+ if (comm && cmd) {
+ DRM_DEV_ERROR(dev->dev, "%s: offending task: %s (%s)\n",
+ gpu->name, comm, cmd);
+
+ msm_rd_dump_submit(priv->hangrd, submit,
+ "offending task: %s (%s)", comm, cmd);
} else {
- /*
- * We couldn't attribute this fault to any particular context,
- * so increment the global fault count instead.
- */
- gpu->global_faults++;
+ DRM_DEV_ERROR(dev->dev, "%s: offending task: unknown\n", gpu->name);
+
+ msm_rd_dump_submit(priv->hangrd, submit, NULL);
}
/* Record the crash state */
@@ -440,6 +442,7 @@ static void recover_worker(struct kthread_work *work)
pm_runtime_put(&gpu->pdev->dev);
+out_unlock:
mutex_unlock(&gpu->lock);
msm_gpu_retire(gpu);