diff options
author | Dave Airlie <airlied@redhat.com> | 2024-06-11 06:01:55 +0200 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2024-06-11 06:01:55 +0200 |
commit | 1ddaaa244021aba8496536a6627b4ad2bc0f936a (patch) | |
tree | 2b37ec6170094757daaa0c7445670eebf3b996d9 /drivers/gpu/drm/amd/display/dc/dml | |
parent | Merge tag 'drm-xe-next-2024-06-06' of https://gitlab.freedesktop.org/drm/xe/k... (diff) | |
parent | drm/amdgpu: add RAS is_rma flag (diff) | |
download | linux-1ddaaa244021aba8496536a6627b4ad2bc0f936a.tar.xz linux-1ddaaa244021aba8496536a6627b4ad2bc0f936a.zip |
Merge tag 'amd-drm-next-6.11-2024-06-07' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.11-2024-06-07:
amdgpu:
- DCN 4.0.x support
- DCN 3.5 updates
- GC 12.0 support
- DP MST fixes
- Cursor fixes
- MES11 updates
- MMHUB 4.1 support
- DML2 Updates
- DCN 3.1.5 fixes
- IPS fixes
- Various code cleanups
- GMC 12.0 support
- SDMA 7.0 support
- SMU 13 updates
- SR-IOV fixes
- VCN 5.x fixes
- MES12 support
- SMU 14.x updates
- Devcoredump improvements
- Fixes for HDP flush on platforms with >4k pages
- GC 9.4.3 fixes
- RAS ACA updates
- Silence UBSAN flex array warnings
- MMHUB 3.3 updates
amdkfd:
- Contiguous VRAM allocations
- GC 12.0 support
- SDMA 7.0 support
- SR-IOV fixes
radeon:
- Backlight workaround for iMac
- Silence UBSAN flex array warnings
UAPI:
- GFX12 modifier and DCC support
Proposed Mesa changes:
https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29510
- KFD GFX ALU exceptions
Proposed ROCdebugger changes:
https://github.com/ROCm/ROCdbgapi/commit/08c760622b6601abf906f75abbc5e21d9fd425df
https://github.com/ROCm/ROCgdb/commit/944fe1c1414a68700414e86e32273b6bfa62ba6f
- KFD Contiguous VRAM allocation flag
Proposed ROCr/HIP changes:
https://github.com/ROCm/ROCT-Thunk-Interface/commit/f7b4a269914a3ab4f1e2453c2879adb97b5cc9e5
https://github.com/ROCm/ROCR-Runtime/pull/214/commits/26e8530d05a775872cb06dde6693db72be0c454a
https://github.com/ROCm/clr/commit/1d48f2a1ab38b632919c4b7274899b3faf4279ff
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240607195900.902537-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/display/dc/dml')
17 files changed, 351 insertions, 42 deletions
diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index a94b6d546cd1..3c0222aa4df1 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -108,6 +108,9 @@ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calcs.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_auto.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_math.o := $(dml_rcflags) +CFLAGS_$(AMDDALPATH)/dc/dml/dcn401/dcn401_fpu.o := $(dml_ccflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn401/dcn401_fpu.o := $(dml_rcflags) + ifdef CONFIG_DRM_AMD_DC_FP DML += display_mode_lib.o display_rq_dlg_helpers.o dml1_display_rq_dlg_calc.o DML += dcn10/dcn10_fpu.o @@ -128,6 +131,7 @@ DML += dcn303/dcn303_fpu.o DML += dcn314/dcn314_fpu.o DML += dcn35/dcn35_fpu.o DML += dcn351/dcn351_fpu.o +DML += dcn401/dcn401_fpu.o DML += dsc/rc_calc_fpu.o DML += calcs/dcn_calcs.o calcs/dcn_calc_math.o calcs/dcn_calc_auto.o endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c index 0c4a8fe8e5ca..f1cde1e4265f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c @@ -1453,10 +1453,9 @@ void dcn_bw_update_from_pplib_fclks( ASSERT(fclks->num_levels); vmin0p65_idx = 0; - vmid0p72_idx = fclks->num_levels - - (fclks->num_levels > 2 ? 3 : (fclks->num_levels > 1 ? 2 : 1)); - vnom0p8_idx = fclks->num_levels - (fclks->num_levels > 1 ? 2 : 1); - vmax0p9_idx = fclks->num_levels - 1; + vmid0p72_idx = fclks->num_levels > 2 ? fclks->num_levels - 3 : 0; + vnom0p8_idx = fclks->num_levels > 1 ? fclks->num_levels - 2 : 0; + vmax0p9_idx = fclks->num_levels > 0 ? fclks->num_levels - 1 : 0; dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 32 * (fclks->data[vmin0p65_idx].clocks_in_khz / 1000.0) / 1000.0; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h index 63219ecd8478..1bf6b12f5663 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h @@ -29,4 +29,4 @@ void dcn10_resource_construct_fp(struct dc *dc); -#endif /* __DCN20_FPU_H__ */ +#endif /* __DCN10_FPU_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c index 81f7b90849ce..aac0a0ae2966 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c @@ -387,13 +387,17 @@ void dcn30_fpu_calculate_wm_and_dlg( double dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][maxMpcComb]; bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] != dm_dram_clock_change_unsupported; unsigned int dummy_latency_index = 0; + struct dc_stream_status *stream_status = NULL; dc_assert_fp_enabled(); context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = false; - for (i = 0; i < context->stream_count; i++) { + for (i = 0; i < context->stream_count; i++) { + stream_status = NULL; if (context->streams[i]) - context->streams[i]->fpo_in_use = false; + stream_status = dc_state_get_stream_status(context, context->streams[i]); + if (stream_status) + stream_status->fpo_in_use = false; } if (!pstate_en) { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c index 6ce90678b33c..0c0b2d67c9cd 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c @@ -320,7 +320,7 @@ static void calculate_wm_set_for_vlevel(int vlevel, } -void dcn301_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) +void dcn301_fpu_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) { struct _vcs_dpi_voltage_scaling_st *s = dc->scratch.update_bw_bounding_box.clock_limits; struct dcn301_resource_pool *pool = TO_DCN301_RES_POOL(dc->res_pool); @@ -409,7 +409,7 @@ void dcn301_fpu_init_soc_bounding_box(struct bp_soc_bb_info bb_info) dcn3_01_soc.sr_exit_time_us = bb_info.dram_sr_exit_latency_100ns * 10; } -void dcn301_calculate_wm_and_dlg_fp(struct dc *dc, +void dcn301_fpu_calculate_wm_and_dlg(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, int pipe_cnt, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h index 774b0fdfc80b..3e103e23dc6f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h @@ -26,15 +26,14 @@ #ifndef __DCN301_FPU_H__ #define __DCN301_FPU_H__ -void dcn301_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params); +void dcn301_fpu_init_soc_bounding_box(struct bp_soc_bb_info bb_info); +void dcn301_fpu_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params); void dcn301_fpu_set_wm_ranges(int i, struct pp_smu_wm_range_sets *ranges, struct _vcs_dpi_soc_bounding_box_st *loaded_bb); -void dcn301_fpu_init_soc_bounding_box(struct bp_soc_bb_info bb_info); - -void dcn301_calculate_wm_and_dlg_fp(struct dc *dc, +void dcn301_fpu_calculate_wm_and_dlg(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, int pipe_cnt, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn302/dcn302_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn302/dcn302_fpu.c index e2bcd205aa93..8da97a96b1ce 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn302/dcn302_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn302/dcn302_fpu.c @@ -304,6 +304,16 @@ void dcn302_fpu_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_p dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; } + /* bw_params->clk_table.entries[MAX_NUM_DPM_LVL]. + * MAX_NUM_DPM_LVL is 8. + * dcn3_02_soc.clock_limits[DC__VOLTAGE_STATES]. + * DC__VOLTAGE_STATES is 40. + */ + if (num_states > MAX_NUM_DPM_LVL) { + ASSERT(0); + return; + } + dcn3_02_soc.num_states = num_states; for (i = 0; i < dcn3_02_soc.num_states; i++) { dcn3_02_soc.clock_limits[i].state = i; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn303/dcn303_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn303/dcn303_fpu.c index 3f02bb806d42..e968870a4b81 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn303/dcn303_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn303/dcn303_fpu.c @@ -310,6 +310,16 @@ void dcn303_fpu_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_p dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; } + /* bw_params->clk_table.entries[MAX_NUM_DPM_LVL]. + * MAX_NUM_DPM_LVL is 8. + * dcn3_02_soc.clock_limits[DC__VOLTAGE_STATES]. + * DC__VOLTAGE_STATES is 40. + */ + if (num_states > MAX_NUM_DPM_LVL) { + ASSERT(0); + return; + } + dcn3_03_soc.num_states = num_states; for (i = 0; i < dcn3_03_soc.num_states; i++) { dcn3_03_soc.clock_limits[i].state = i; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c index 94317b2e4a85..17a21bcbde17 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c @@ -647,9 +647,9 @@ void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params dcn3_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; s[i].phyclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz; } - if (clk_table->num_entries) { + + if (clk_table->num_entries) dcn3_1_soc.num_states = clk_table->num_entries; - } memcpy(dcn3_1_soc.clock_limits, s, sizeof(dcn3_1_soc.clock_limits)); @@ -762,23 +762,11 @@ void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param break; } } - // Ported from DCN315 - if (clk_table->num_entries == 1) { - /*smu gives one DPM level, let's take the highest one*/ - closest_clk_lvl = dcn3_16_soc.num_states - 1; - } s[i].state = i; /* Clocks dependent on voltage level. */ s[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; - if (clk_table->num_entries == 1 && - s[i].dcfclk_mhz < - dcn3_16_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) { - /*SMU fix not released yet*/ - s[i].dcfclk_mhz = - dcn3_16_soc.clock_limits[closest_clk_lvl].dcfclk_mhz; - } s[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; s[i].socclk_mhz = clk_table->entries[i].socclk_mhz; s[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * @@ -799,9 +787,9 @@ void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param dcn3_16_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; s[i].phyclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].phyclk_mhz; } - if (clk_table->num_entries) { + + if (clk_table->num_entries) dcn3_16_soc.num_states = clk_table->num_entries; - } memcpy(dcn3_16_soc.clock_limits, s, sizeof(dcn3_16_soc.clock_limits)); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c index 3242957d00c5..f52b9e3d2bee 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c @@ -24,10 +24,7 @@ * */ -#define UNIT_TEST 0 -#if !UNIT_TEST #include "dc.h" -#endif #include "../display_mode_lib.h" #include "display_mode_vba_314.h" #include "../dml_inline_defs.h" diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index f6fe0a64beac..194422dd979d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -2309,6 +2309,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, bool need_fclk_lat_as_dummy = false; bool is_subvp_p_drr = false; struct dc_stream_state *fpo_candidate_stream = NULL; + struct dc_stream_status *stream_status = NULL; dc_assert_fp_enabled(); @@ -2343,8 +2344,11 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = false; for (i = 0; i < context->stream_count; i++) { + stream_status = NULL; if (context->streams[i]) - context->streams[i]->fpo_in_use = false; + stream_status = dc_state_get_stream_status(context, context->streams[i]); + if (stream_status) + stream_status->fpo_in_use = false; } if (!pstate_en || (!dc->debug.disable_fpo_optimizations && @@ -2352,7 +2356,9 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, /* only when the mclk switch can not be natural, is the fw based vblank stretch attempted */ fpo_candidate_stream = dcn32_can_support_mclk_switch_using_fw_based_vblank_stretch(dc, context); if (fpo_candidate_stream) { - fpo_candidate_stream->fpo_in_use = true; + stream_status = dc_state_get_stream_status(context, fpo_candidate_stream); + if (stream_status) + stream_status->fpo_in_use = true; context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = true; } @@ -2389,8 +2395,11 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, */ context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = false; for (i = 0; i < context->stream_count; i++) { + stream_status = NULL; if (context->streams[i]) - context->streams[i]->fpo_in_use = false; + stream_status = dc_state_get_stream_status(context, context->streams[i]); + if (stream_status) + stream_status->fpo_in_use = false; } context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.fclk_change_latency_us; dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, false); @@ -3232,6 +3241,16 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; } + /* bw_params->clk_table.entries[MAX_NUM_DPM_LVL]. + * MAX_NUM_DPM_LVL is 8. + * dcn3_02_soc.clock_limits[DC__VOLTAGE_STATES]. + * DC__VOLTAGE_STATES is 40. + */ + if (num_states > MAX_NUM_DPM_LVL) { + ASSERT(0); + return; + } + dcn3_2_soc.num_states = num_states; for (i = 0; i < dcn3_2_soc.num_states; i++) { dcn3_2_soc.clock_limits[i].state = i; @@ -3521,15 +3540,16 @@ void dcn32_assign_fpo_vactive_candidate(struct dc *dc, const struct dc_state *co * * @dc: current dc state * @context: new dc state + * @fpo_candidate_stream: candidate stream to be chosen for FPO * @vactive_margin_req_us: The vactive marign required for a vactive pipe to be considered "found" * * Return: True if VACTIVE display is found, false otherwise */ -bool dcn32_find_vactive_pipe(struct dc *dc, const struct dc_state *context, uint32_t vactive_margin_req_us) +bool dcn32_find_vactive_pipe(struct dc *dc, const struct dc_state *context, struct dc_stream_state *fpo_candidate_stream, uint32_t vactive_margin_req_us) { unsigned int i, pipe_idx; const struct vba_vars_st *vba = &context->bw_ctx.dml.vba; - bool vactive_found = false; + bool vactive_found = true; unsigned int blank_us = 0; for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { @@ -3538,11 +3558,20 @@ bool dcn32_find_vactive_pipe(struct dc *dc, const struct dc_state *context, uint if (!pipe->stream) continue; + /* Don't need to check for vactive margin on the FPO candidate stream */ + if (fpo_candidate_stream && pipe->stream == fpo_candidate_stream) { + pipe_idx++; + continue; + } + + /* Every plane (apart from the ones driven by the FPO pipes) needs to have active margin + * in order for us to have found a valid "vactive" config for FPO + Vactive + */ blank_us = ((pipe->stream->timing.v_total - pipe->stream->timing.v_addressable) * pipe->stream->timing.h_total / (double)(pipe->stream->timing.pix_clk_100hz * 100)) * 1000000; - if (vba->ActiveDRAMClockChangeLatencyMarginPerState[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] >= vactive_margin_req_us && - !(pipe->stream->vrr_active_variable || pipe->stream->vrr_active_fixed) && blank_us < dc->debug.fpo_vactive_max_blank_us) { - vactive_found = true; + if (vba->ActiveDRAMClockChangeLatencyMarginPerState[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] < vactive_margin_req_us || + pipe->stream->vrr_active_variable || pipe->stream->vrr_active_fixed || blank_us >= dc->debug.fpo_vactive_max_blank_us) { + vactive_found = false; break; } pipe_idx++; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h index d25c3f730a59..276e90e4e0ce 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h @@ -71,7 +71,7 @@ void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes, void dcn32_assign_fpo_vactive_candidate(struct dc *dc, const struct dc_state *context, struct dc_stream_state **fpo_candidate_stream); -bool dcn32_find_vactive_pipe(struct dc *dc, const struct dc_state *context, uint32_t vactive_margin_req); +bool dcn32_find_vactive_pipe(struct dc *dc, const struct dc_state *context, struct dc_stream_state *fpo_candidate_stream, uint32_t vactive_margin_req); void dcn32_override_min_req_memclk(struct dc *dc, struct dc_state *context); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c index ff4d795c7966..4297402bdab3 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c @@ -803,6 +803,16 @@ void dcn321_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_p dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; } + /* bw_params->clk_table.entries[MAX_NUM_DPM_LVL]. + * MAX_NUM_DPM_LVL is 8. + * dcn3_02_soc.clock_limits[DC__VOLTAGE_STATES]. + * DC__VOLTAGE_STATES is 40. + */ + if (num_states > MAX_NUM_DPM_LVL) { + ASSERT(0); + return; + } + dcn3_21_soc.num_states = num_states; for (i = 0; i < dcn3_21_soc.num_states; i++) { dcn3_21_soc.clock_limits[i].state = i; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn401/dcn401_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn401/dcn401_fpu.c new file mode 100644 index 000000000000..4fbecb5ff349 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn401/dcn401_fpu.c @@ -0,0 +1,239 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dcn401_fpu.h" +#include "dcn401/dcn401_resource.h" +// We need this includes for WATERMARKS_* defines +#include "clk_mgr/dcn401/dcn401_smu14_driver_if.h" +#include "link.h" + +#define DC_LOGGER_INIT(logger) + +void dcn401_build_wm_range_table_fpu(struct clk_mgr *clk_mgr) +{ + /* defaults */ + double pstate_latency_us = clk_mgr->ctx->dc->dml.soc.dram_clock_change_latency_us; + double fclk_change_latency_us = clk_mgr->ctx->dc->dml.soc.fclk_change_latency_us; + double sr_exit_time_us = clk_mgr->ctx->dc->dml.soc.sr_exit_time_us; + double sr_enter_plus_exit_time_us = clk_mgr->ctx->dc->dml.soc.sr_enter_plus_exit_time_us; + /* For min clocks use as reported by PM FW and report those as min */ + uint16_t min_uclk_mhz = clk_mgr->bw_params->clk_table.entries[0].memclk_mhz; + uint16_t min_dcfclk_mhz = clk_mgr->bw_params->clk_table.entries[0].dcfclk_mhz; + uint16_t setb_min_uclk_mhz = min_uclk_mhz; + uint16_t dcfclk_mhz_for_the_second_state = clk_mgr->ctx->dc->dml.soc.clock_limits[2].dcfclk_mhz; + + dc_assert_fp_enabled(); + + /* For Set B ranges use min clocks state 2 when available, and report those to PM FW */ + if (dcfclk_mhz_for_the_second_state) + clk_mgr->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = dcfclk_mhz_for_the_second_state; + else + clk_mgr->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = clk_mgr->bw_params->clk_table.entries[0].dcfclk_mhz; + + if (clk_mgr->bw_params->clk_table.entries[2].memclk_mhz) + setb_min_uclk_mhz = clk_mgr->bw_params->clk_table.entries[2].memclk_mhz; + + /* Set A - Normal - default values */ + clk_mgr->bw_params->wm_table.nv_entries[WM_A].valid = true; + clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us = pstate_latency_us; + clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.fclk_change_latency_us = fclk_change_latency_us; + clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us = sr_exit_time_us; + clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; + clk_mgr->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE; + clk_mgr->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz; + clk_mgr->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_dcfclk = 0xFFFF; + clk_mgr->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_uclk = min_uclk_mhz; + clk_mgr->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_uclk = 0xFFFF; + + /* Set B - Performance - higher clocks, using DPM[2] DCFCLK and UCLK */ + clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid = true; + clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us = pstate_latency_us; + clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.fclk_change_latency_us = fclk_change_latency_us; + clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us = sr_exit_time_us; + clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; + clk_mgr->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE; + clk_mgr->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_dcfclk = 0xFFFF; + clk_mgr->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_uclk = setb_min_uclk_mhz; + clk_mgr->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_uclk = 0xFFFF; + + /* Set C - Dummy P-State - P-State latency set to "dummy p-state" value */ + /* 'DalDummyClockChangeLatencyNs' registry key option set to 0x7FFFFFFF can be used to disable Set C for dummy p-state */ + if (clk_mgr->ctx->dc->bb_overrides.dummy_clock_change_latency_ns != 0x7FFFFFFF) { + clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid = true; + clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = 50; + clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.fclk_change_latency_us = fclk_change_latency_us; + clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us = sr_exit_time_us; + clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; + clk_mgr->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.wm_type = WATERMARKS_DUMMY_PSTATE; + clk_mgr->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz; + clk_mgr->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_dcfclk = 0xFFFF; + clk_mgr->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_uclk = min_uclk_mhz; + clk_mgr->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_uclk = 0xFFFF; + clk_mgr->bw_params->dummy_pstate_table[0].dram_speed_mts = clk_mgr->bw_params->clk_table.entries[0].memclk_mhz * 16; + clk_mgr->bw_params->dummy_pstate_table[0].dummy_pstate_latency_us = 50; + clk_mgr->bw_params->dummy_pstate_table[1].dram_speed_mts = clk_mgr->bw_params->clk_table.entries[1].memclk_mhz * 16; + clk_mgr->bw_params->dummy_pstate_table[1].dummy_pstate_latency_us = 9; + clk_mgr->bw_params->dummy_pstate_table[2].dram_speed_mts = clk_mgr->bw_params->clk_table.entries[2].memclk_mhz * 16; + clk_mgr->bw_params->dummy_pstate_table[2].dummy_pstate_latency_us = 8; + clk_mgr->bw_params->dummy_pstate_table[3].dram_speed_mts = clk_mgr->bw_params->clk_table.entries[3].memclk_mhz * 16; + clk_mgr->bw_params->dummy_pstate_table[3].dummy_pstate_latency_us = 5; + } + /* Set D - MALL - SR enter and exit time specific to MALL, TBD after bringup or later phase for now use DRAM values / 2 */ + /* For MALL DRAM clock change latency is N/A, for watermak calculations use lowest value dummy P state latency */ + clk_mgr->bw_params->wm_table.nv_entries[WM_D].valid = true; + clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us = clk_mgr->bw_params->dummy_pstate_table[3].dummy_pstate_latency_us; + clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.fclk_change_latency_us = fclk_change_latency_us; + clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us = sr_exit_time_us / 2; // TBD + clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us / 2; // TBD + clk_mgr->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.wm_type = WATERMARKS_MALL; + clk_mgr->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz; + clk_mgr->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_dcfclk = 0xFFFF; + clk_mgr->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_uclk = min_uclk_mhz; + clk_mgr->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_uclk = 0xFFFF; +} + +/* + * dcn401_update_bw_bounding_box + * + * This would override some dcn4_01 ip_or_soc initial parameters hardcoded from + * spreadsheet with actual values as per dGPU SKU: + * - with passed few options from dc->config + * - with dentist_vco_frequency from Clk Mgr (currently hardcoded, but might + * need to get it from PM FW) + * - with passed latency values (passed in ns units) in dc-> bb override for + * debugging purposes + * - with passed latencies from VBIOS (in 100_ns units) if available for + * certain dGPU SKU + * - with number of DRAM channels from VBIOS (which differ for certain dGPU SKU + * of the same ASIC) + * - clocks levels with passed clk_table entries from Clk Mgr as reported by PM + * FW for different clocks (which might differ for certain dGPU SKU of the + * same ASIC) + */ +void dcn401_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params) +{ + dc_assert_fp_enabled(); + + /* Override from passed dc->bb_overrides if available*/ + if (dc->bb_overrides.sr_exit_time_ns) + dc->dml2_options.bbox_overrides.sr_exit_latency_us = + dc->bb_overrides.sr_exit_time_ns / 1000.0; + + if (dc->bb_overrides.sr_enter_plus_exit_time_ns) + dc->dml2_options.bbox_overrides.sr_enter_plus_exit_latency_us = + dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0; + + if (dc->bb_overrides.urgent_latency_ns) + dc->dml2_options.bbox_overrides.urgent_latency_us = + dc->bb_overrides.urgent_latency_ns / 1000.0; + + if (dc->bb_overrides.dram_clock_change_latency_ns) + dc->dml2_options.bbox_overrides.dram_clock_change_latency_us = + dc->bb_overrides.dram_clock_change_latency_ns / 1000.0; + + if (dc->bb_overrides.fclk_clock_change_latency_ns) + dc->dml2_options.bbox_overrides.fclk_change_latency_us = + dc->bb_overrides.fclk_clock_change_latency_ns / 1000; + + /* Override from VBIOS if VBIOS bb_info available */ + if (dc->ctx->dc_bios->funcs->get_soc_bb_info) { + struct bp_soc_bb_info bb_info = {0}; + if (dc->ctx->dc_bios->funcs->get_soc_bb_info(dc->ctx->dc_bios, &bb_info) == BP_RESULT_OK) { + if (bb_info.dram_clock_change_latency_100ns > 0) + dc->dml2_options.bbox_overrides.dram_clock_change_latency_us = + bb_info.dram_clock_change_latency_100ns * 10; + + if (bb_info.dram_sr_enter_exit_latency_100ns > 0) + dc->dml2_options.bbox_overrides.sr_enter_plus_exit_latency_us = + bb_info.dram_sr_enter_exit_latency_100ns * 10; + + if (bb_info.dram_sr_exit_latency_100ns > 0) + dc->dml2_options.bbox_overrides.sr_exit_latency_us = + bb_info.dram_sr_exit_latency_100ns * 10; + } + } + + /* Override from VBIOS for num_chan */ + if (dc->ctx->dc_bios->vram_info.num_chans) { + dc->dml2_options.bbox_overrides.dram_num_chan = + dc->ctx->dc_bios->vram_info.num_chans; + + } + + if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes) + dc->dml2_options.bbox_overrides.dram_chanel_width_bytes = + dc->ctx->dc_bios->vram_info.dram_channel_width_bytes; + + dc->dml2_options.bbox_overrides.disp_pll_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; + dc->dml2_options.bbox_overrides.xtalclk_mhz = dc->ctx->dc_bios->fw_info.pll_info.crystal_frequency / 1000.0; + dc->dml2_options.bbox_overrides.dchub_refclk_mhz = dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000.0; + dc->dml2_options.bbox_overrides.dprefclk_mhz = dc->clk_mgr->dprefclk_khz / 1000.0; + + if (dc->clk_mgr->bw_params->clk_table.num_entries > 1) { + unsigned int i = 0; + + dc->dml2_options.bbox_overrides.clks_table.num_states = dc->clk_mgr->bw_params->clk_table.num_entries; + + dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels = + dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dcfclk_levels; + + dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_fclk_levels = + dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_fclk_levels; + + dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_memclk_levels = + dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_memclk_levels; + + dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_socclk_levels = + dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_socclk_levels; + + dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dtbclk_levels = + dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dtbclk_levels; + + dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dispclk_levels = + dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dispclk_levels; + + dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dppclk_levels = + dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dppclk_levels; + + for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dcfclk_levels; i++) { + if (dc->clk_mgr->bw_params->clk_table.entries[i].dcfclk_mhz) + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dcfclk_mhz = + dc->clk_mgr->bw_params->clk_table.entries[i].dcfclk_mhz; + } + + for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_fclk_levels; i++) { + if (dc->clk_mgr->bw_params->clk_table.entries[i].fclk_mhz) + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].fclk_mhz = + dc->clk_mgr->bw_params->clk_table.entries[i].fclk_mhz; + } + + for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_memclk_levels; i++) { + if (dc->clk_mgr->bw_params->clk_table.entries[i].memclk_mhz) + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].memclk_mhz = + dc->clk_mgr->bw_params->clk_table.entries[i].memclk_mhz; + } + + for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_socclk_levels; i++) { + if (dc->clk_mgr->bw_params->clk_table.entries[i].socclk_mhz) + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].socclk_mhz = + dc->clk_mgr->bw_params->clk_table.entries[i].socclk_mhz; + } + + for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dtbclk_levels; i++) { + if (dc->clk_mgr->bw_params->clk_table.entries[i].dtbclk_mhz) + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz = + dc->clk_mgr->bw_params->clk_table.entries[i].dtbclk_mhz; + } + + for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dispclk_levels; i++) { + if (dc->clk_mgr->bw_params->clk_table.entries[i].dispclk_mhz) { + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dispclk_mhz = + dc->clk_mgr->bw_params->clk_table.entries[i].dispclk_mhz; + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dppclk_mhz = + dc->clk_mgr->bw_params->clk_table.entries[i].dispclk_mhz; + } + } + } +} + diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn401/dcn401_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn401/dcn401_fpu.h new file mode 100644 index 000000000000..329f1788843c --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn401/dcn401_fpu.h @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DCN401_FPU_H__ +#define __DCN401_FPU_H__ + +#include "clk_mgr.h" + +void dcn401_build_wm_range_table_fpu(struct clk_mgr *clk_mgr); + +void dcn401_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h index fb17f8868cb4..410e4b671228 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h @@ -632,6 +632,7 @@ struct _vcs_dpi_display_dlg_regs_st { unsigned int ref_freq_to_pix_freq; unsigned int vratio_prefetch; unsigned int vratio_prefetch_c; + unsigned int refcyc_per_tdlut_group; unsigned int refcyc_per_pte_group_vblank_l; unsigned int refcyc_per_pte_group_vblank_c; unsigned int refcyc_per_meta_chunk_vblank_l; diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c index 9a3ded311195..85453bbb4f9b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c @@ -1099,8 +1099,13 @@ void ModeSupportAndSystemConfiguration(struct display_mode_lib *mode_lib) // Total Available Pipes Support Check for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - total_pipes += mode_lib->vba.DPPPerPlane[k]; pipe_idx = get_pipe_idx(mode_lib, k); + if (pipe_idx == -1) { + ASSERT(0); + continue; // skip inactive planes + } + total_pipes += mode_lib->vba.DPPPerPlane[k]; + if (mode_lib->vba.cache_pipes[pipe_idx].clks_cfg.dppclk_mhz > 0.0) mode_lib->vba.DPPCLK[k] = mode_lib->vba.cache_pipes[pipe_idx].clks_cfg.dppclk_mhz; else |