diff options
author | Rob Clark <robdclark@chromium.org> | 2023-08-10 23:31:41 +0200 |
---|---|---|
committer | Rob Clark <robdclark@chromium.org> | 2023-08-15 19:09:30 +0200 |
commit | abe2023b4cea192ab266b351fd38dc9dbd846df0 (patch) | |
tree | def4da9e366c668fac0d602fa6c0088ae88c78fe | |
parent | drm/msm/a690: Switch to a660_gmu.bin (diff) | |
download | linux-abe2023b4cea192ab266b351fd38dc9dbd846df0.tar.xz linux-abe2023b4cea192ab266b351fd38dc9dbd846df0.zip |
drm/msm/gpu: Push gpu lock down past runpm
Avoid holding gpu lock when calling runpm, to avoid this lockdep splat:
======================================================
WARNING: possible circular locking dependency detected
6.4.3-debug+ #14 Not tainted
------------------------------------------------------
ring0/373 is trying to acquire lock:
ffffffead86efb98 (prepare_lock){+.+.}-{3:3}, at: clk_prepare_lock+0x70/0x98
but task is already holding lock:
ffffff809cd19170 (&gpu->lock){+.+.}-{3:3}, at: msm_job_run+0x7c/0x128 [msm]
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #4 (&gpu->lock){+.+.}-{3:3}:
__mutex_lock+0xc8/0x388
mutex_lock_nested+0x2c/0x38
msm_job_run+0x7c/0x128 [msm]
drm_sched_main+0x264/0x354 [gpu_sched]
kthread+0xf0/0x100
ret_from_fork+0x10/0x20
-> #3 (dma_fence_map){++++}-{0:0}:
__dma_fence_might_wait+0x74/0xc0
dma_resv_lockdep+0x1f0/0x2e8
do_one_initcall+0xb4/0x214
kernel_init_freeable+0x338/0x33c
kernel_init+0x30/0x134
ret_from_fork+0x10/0x20
-> #2 (mmu_notifier_invalidate_range_start){+.+.}-{0:0}:
fs_reclaim_acquire+0x7c/0x9c
slab_pre_alloc_hook.constprop.0+0x40/0x250
__kmem_cache_alloc_node+0x60/0x18c
kmalloc_node_trace+0x40/0x84
alloc_worker+0x2c/0x64
init_rescuer+0x34/0xe0
workqueue_init+0x168/0x1fc
kernel_init_freeable+0x15c/0x33c
kernel_init+0x30/0x134
ret_from_fork+0x10/0x20
-> #1 (fs_reclaim){+.+.}-{0:0}:
__fs_reclaim_acquire+0x3c/0x48
fs_reclaim_acquire+0x50/0x9c
slab_pre_alloc_hook.constprop.0+0x40/0x250
__kmem_cache_alloc_node+0x60/0x18c
kmalloc_trace+0x44/0x88
clk_rcg2_dfs_determine_rate+0x60/0x214
clk_core_determine_round_nolock+0xb8/0xf0
clk_core_round_rate_nolock+0x84/0x118
clk_core_round_rate_nolock+0xd8/0x118
clk_round_rate+0x6c/0xd0
geni_se_clk_tbl_get+0x78/0xc0
geni_se_clk_freq_match+0x44/0xe4
get_spi_clk_cfg+0x50/0xf4
geni_spi_set_clock_and_bw+0x54/0x104
spi_geni_prepare_message+0x130/0x174
__spi_pump_transfer_message+0x200/0x4d8
__spi_sync+0x13c/0x23c
spi_sync_locked+0x18/0x24
do_cros_ec_pkt_xfer_spi+0x124/0x3f0
cros_ec_xfer_high_pri_work+0x28/0x3c
kthread_worker_fn+0x14c/0x27c
kthread+0xf0/0x100
ret_from_fork+0x10/0x20
-> #0 (prepare_lock){+.+.}-{3:3}:
__lock_acquire+0xdf8/0x109c
lock_acquire+0x234/0x284
__mutex_lock+0xc8/0x388
mutex_lock_nested+0x2c/0x38
clk_prepare_lock+0x70/0x98
clk_prepare+0x24/0x50
clk_bulk_prepare+0x50/0x9c
a6xx_gmu_resume+0x94/0x800 [msm]
a6xx_gmu_pm_resume+0x38/0x158 [msm]
adreno_runtime_resume+0x2c/0x38 [msm]
pm_generic_runtime_resume+0x30/0x44
__rpm_callback+0x4c/0x134
rpm_callback+0x78/0x7c
rpm_resume+0x3a4/0x46c
__pm_runtime_resume+0x78/0xbc
pm_runtime_get_sync.isra.0+0x14/0x20 [msm]
msm_gpu_submit+0x4c/0x12c [msm]
msm_job_run+0x88/0x128 [msm]
drm_sched_main+0x264/0x354 [gpu_sched]
kthread+0xf0/0x100
ret_from_fork+0x10/0x20
other info that might help us debug this:
Chain exists of:
prepare_lock --> dma_fence_map --> &gpu->lock
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock(&gpu->lock);
lock(dma_fence_map);
lock(&gpu->lock);
lock(prepare_lock);
*** DEADLOCK ***
2 locks held by ring0/373:
#0: ffffffead875ae50 (dma_fence_map){++++}-{0:0}, at: drm_sched_main+0x54/0x354 [gpu_sched]
#1: ffffff809cd19170 (&gpu->lock){+.+.}-{3:3}, at: msm_job_run+0x7c/0x128 [msm]
stack backtrace:
CPU: 2 PID: 373 Comm: ring0 Not tainted 6.4.3-debug+ #14
Hardware name: Google Villager (rev1+) with LTE (DT)
Call trace:
dump_backtrace+0xb4/0xf0
show_stack+0x20/0x30
dump_stack_lvl+0x60/0x84
dump_stack+0x18/0x24
print_circular_bug+0x1cc/0x234
check_noncircular+0x78/0xac
__lock_acquire+0xdf8/0x109c
lock_acquire+0x234/0x284
__mutex_lock+0xc8/0x388
mutex_lock_nested+0x2c/0x38
clk_prepare_lock+0x70/0x98
clk_prepare+0x24/0x50
clk_bulk_prepare+0x50/0x9c
a6xx_gmu_resume+0x94/0x800 [msm]
a6xx_gmu_pm_resume+0x38/0x158 [msm]
adreno_runtime_resume+0x2c/0x38 [msm]
pm_generic_runtime_resume+0x30/0x44
__rpm_callback+0x4c/0x134
rpm_callback+0x78/0x7c
rpm_resume+0x3a4/0x46c
__pm_runtime_resume+0x78/0xbc
pm_runtime_get_sync.isra.0+0x14/0x20 [msm]
msm_gpu_submit+0x4c/0x12c [msm]
msm_job_run+0x88/0x128 [msm]
drm_sched_main+0x264/0x354 [gpu_sched]
kthread+0xf0/0x100
ret_from_fork+0x10/0x20
Signed-off-by: Rob Clark <robdclark@chromium.org>
Patchwork: https://patchwork.freedesktop.org/patch/552298/
-rw-r--r-- | drivers/gpu/drm/msm/msm_gpu.c | 11 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/msm_ringbuffer.c | 7 |
2 files changed, 8 insertions, 10 deletions
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 5c10b559a595..7f64c6667300 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -749,13 +749,11 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) struct msm_ringbuffer *ring = submit->ring; unsigned long flags; - WARN_ON(!mutex_is_locked(&gpu->lock)); - pm_runtime_get_sync(&gpu->pdev->dev); - msm_gpu_hw_init(gpu); + mutex_lock(&gpu->lock); - submit->seqno = submit->hw_fence->seqno; + msm_gpu_hw_init(gpu); update_sw_cntrs(gpu); @@ -781,8 +779,11 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) gpu->funcs->submit(gpu, submit); gpu->cur_ctx_seqno = submit->queue->ctx->seqno; - pm_runtime_put(&gpu->pdev->dev); hangcheck_timer_reset(gpu); + + mutex_unlock(&gpu->lock); + + pm_runtime_put(&gpu->pdev->dev); } /* diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c index 7f5e0a961bba..40c0bc35a44c 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.c +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c @@ -21,6 +21,8 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job) msm_fence_init(submit->hw_fence, fctx); + submit->seqno = submit->hw_fence->seqno; + mutex_lock(&priv->lru.lock); for (i = 0; i < submit->nr_bos; i++) { @@ -32,13 +34,8 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job) mutex_unlock(&priv->lru.lock); - /* TODO move submit path over to using a per-ring lock.. */ - mutex_lock(&gpu->lock); - msm_gpu_submit(gpu, submit); - mutex_unlock(&gpu->lock); - return dma_fence_get(submit->hw_fence); } |