diff options
Diffstat (limited to 'drivers/gpu')
162 files changed, 2267 insertions, 1233 deletions
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 1168351267fd..bfdadc3667e0 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -95,6 +95,7 @@ config DRM_KMS_FB_HELPER config DRM_DEBUG_DP_MST_TOPOLOGY_REFS bool "Enable refcount backtrace history in the DP MST helpers" + depends on STACKTRACE_SUPPORT select STACKDEPOT depends on DRM_KMS_HELPER depends on DEBUG_KERNEL diff --git a/drivers/gpu/drm/amd/acp/Kconfig b/drivers/gpu/drm/amd/acp/Kconfig index d968c2471412..0d12ebf66174 100644 --- a/drivers/gpu/drm/amd/acp/Kconfig +++ b/drivers/gpu/drm/amd/acp/Kconfig @@ -1,4 +1,4 @@ -# SPDX-License-Identifier: GPL-2.0-only +# SPDX-License-Identifier: MIT menu "ACP (Audio CoProcessor) Configuration" config DRM_AMD_ACP diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index 2e98c016cb47..9375e7f12420 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -1,4 +1,4 @@ -# SPDX-License-Identifier: GPL-2.0-only +# SPDX-License-Identifier: MIT config DRM_AMDGPU_SI bool "Enable amdgpu support for SI parts" depends on DRM_AMDGPU diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 7d35b5b66229..888209eb8cec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -105,11 +105,24 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void) (kfd_mem_limit.max_ttm_mem_limit >> 20)); } +/* Estimate page table size needed to represent a given memory size + * + * With 4KB pages, we need one 8 byte PTE for each 4KB of memory + * (factor 512, >> 9). With 2MB pages, we need one 8 byte PTE for 2MB + * of memory (factor 256K, >> 18). ROCm user mode tries to optimize + * for 2MB pages for TLB efficiency. However, small allocations and + * fragmented system memory still need some 4KB pages. We choose a + * compromise that should work in most cases without reserving too + * much memory for page tables unnecessarily (factor 16K, >> 14). + */ +#define ESTIMATE_PT_SIZE(mem_size) ((mem_size) >> 14) + static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 domain, bool sg) { + uint64_t reserved_for_pt = + ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed; - uint64_t reserved_for_pt = amdgpu_amdkfd_total_mem_size >> 9; int ret = 0; acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 2cdaf3b2a721..6614d8a6f4c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -604,11 +604,8 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) continue; } - for (i = 0; i < num_entities; i++) { - mutex_lock(&ctx->adev->lock_reset); + for (i = 0; i < num_entities; i++) drm_sched_entity_fini(&ctx->entities[0][i].entity); - mutex_unlock(&ctx->adev->lock_reset); - } } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 2770cba56a6b..44be3a45b25e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -1487,8 +1487,8 @@ out: return ret; /* Start rlc autoload after psp recieved all the gfx firmware */ - if (psp->autoload_supported && ucode->ucode_id == - AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM) { + if (psp->autoload_supported && ucode->ucode_id == (amdgpu_sriov_vf(adev) ? + AMDGPU_UCODE_ID_CP_MEC2 : AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM)) { ret = psp_rlc_autoload(psp); if (ret) { DRM_ERROR("Failed to start rlc autoload\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 7de16c0c2f20..2a8e04895595 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -27,7 +27,8 @@ #include <linux/bits.h> #include "smu_v11_0_i2c.h" -#define EEPROM_I2C_TARGET_ADDR 0xA0 +#define EEPROM_I2C_TARGET_ADDR_ARCTURUS 0xA8 +#define EEPROM_I2C_TARGET_ADDR_VEGA20 0xA0 /* * The 2 macros bellow represent the actual size in bytes that @@ -83,7 +84,7 @@ static int __update_table_header(struct amdgpu_ras_eeprom_control *control, { int ret = 0; struct i2c_msg msg = { - .addr = EEPROM_I2C_TARGET_ADDR, + .addr = 0, .flags = 0, .len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE, .buf = buff, @@ -93,6 +94,8 @@ static int __update_table_header(struct amdgpu_ras_eeprom_control *control, *(uint16_t *)buff = EEPROM_HDR_START; __encode_table_header_to_buff(&control->tbl_hdr, buff + EEPROM_ADDRESS_SIZE); + msg.addr = control->i2c_address; + ret = i2c_transfer(&control->eeprom_accessor, &msg, 1); if (ret < 1) DRM_ERROR("Failed to write EEPROM table header, ret:%d", ret); @@ -203,7 +206,7 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control) unsigned char buff[EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE] = { 0 }; struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr; struct i2c_msg msg = { - .addr = EEPROM_I2C_TARGET_ADDR, + .addr = 0, .flags = I2C_M_RD, .len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE, .buf = buff, @@ -213,10 +216,12 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control) switch (adev->asic_type) { case CHIP_VEGA20: + control->i2c_address = EEPROM_I2C_TARGET_ADDR_VEGA20; ret = smu_v11_0_i2c_eeprom_control_init(&control->eeprom_accessor); break; case CHIP_ARCTURUS: + control->i2c_address = EEPROM_I2C_TARGET_ADDR_ARCTURUS; ret = smu_i2c_eeprom_init(&adev->smu, &control->eeprom_accessor); break; @@ -229,6 +234,8 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control) return ret; } + msg.addr = control->i2c_address; + /* Read/Create table header from EEPROM address 0 */ ret = i2c_transfer(&control->eeprom_accessor, &msg, 1); if (ret < 1) { @@ -408,8 +415,8 @@ int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control, * Update bits 16,17 of EEPROM address in I2C address by setting them * to bits 1,2 of Device address byte */ - msg->addr = EEPROM_I2C_TARGET_ADDR | - ((control->next_addr & EEPROM_ADDR_MSB_MASK) >> 15); + msg->addr = control->i2c_address | + ((control->next_addr & EEPROM_ADDR_MSB_MASK) >> 15); msg->flags = write ? 0 : I2C_M_RD; msg->len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE; msg->buf = buff; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h index 622269957c1b..ca78f812d436 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h @@ -50,6 +50,7 @@ struct amdgpu_ras_eeprom_control { struct mutex tbl_mutex; bool bus_locked; uint32_t tbl_byte_sum; + uint16_t i2c_address; // 8-bit represented address }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c index c8793e6cc3c5..6373bfb47d55 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c @@ -124,13 +124,12 @@ int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, u32 dws) */ int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev) { - volatile u32 *dst_ptr; u32 dws; int r; /* allocate clear state block */ adev->gfx.rlc.clear_state_size = dws = adev->gfx.rlc.funcs->get_csb_size(adev); - r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, + r = amdgpu_bo_create_kernel(adev, dws * 4, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, &adev->gfx.rlc.clear_state_obj, &adev->gfx.rlc.clear_state_gpu_addr, @@ -141,13 +140,6 @@ int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev) return r; } - /* set up the cs buffer */ - dst_ptr = adev->gfx.rlc.cs_ptr; - adev->gfx.rlc.funcs->get_csb_buffer(adev, dst_ptr); - amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); - amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 7a43993544c1..1befdee9f0f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1346,10 +1346,13 @@ static int cik_asic_reset(struct amdgpu_device *adev) { int r; - if (cik_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) + if (cik_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { + if (!adev->in_suspend) + amdgpu_inc_vram_lost(adev); r = smu7_asic_baco_reset(adev); - else + } else { r = cik_asic_pci_config_reset(adev); + } return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c index 16fbd2bc8ad1..4043ebcea5de 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c @@ -268,23 +268,29 @@ static void df_v3_6_update_medium_grain_clock_gating(struct amdgpu_device *adev, { u32 tmp; - /* Put DF on broadcast mode */ - adev->df_funcs->enable_broadcast_mode(adev, true); - - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG)) { - tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater); - tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK; - tmp |= DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY; - WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp); - } else { - tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater); - tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK; - tmp |= DF_V3_6_MGCG_DISABLE; - WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp); - } + if (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG) { + /* Put DF on broadcast mode */ + adev->df_funcs->enable_broadcast_mode(adev, true); + + if (enable) { + tmp = RREG32_SOC15(DF, 0, + mmDF_PIE_AON0_DfGlobalClkGater); + tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK; + tmp |= DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY; + WREG32_SOC15(DF, 0, + mmDF_PIE_AON0_DfGlobalClkGater, tmp); + } else { + tmp = RREG32_SOC15(DF, 0, + mmDF_PIE_AON0_DfGlobalClkGater); + tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK; + tmp |= DF_V3_6_MGCG_DISABLE; + WREG32_SOC15(DF, 0, + mmDF_PIE_AON0_DfGlobalClkGater, tmp); + } - /* Exit broadcast mode */ - adev->df_funcs->enable_broadcast_mode(adev, false); + /* Exit broadcast mode */ + adev->df_funcs->enable_broadcast_mode(adev, false); + } } static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index ca5f0e7ea1ac..ba9e53a1abc3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -117,10 +117,13 @@ static const struct soc15_reg_golden golden_settings_gc_10_1[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0x10000000, 0x10000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xffffffff, 0x1402002f), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xffff9fff, 0x00001188), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800), SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x08000009), SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00400000, 0x04440000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000800, 0x00000820), SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL, 0x001f0000, 0x00070104), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000100, 0x00000130), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff), @@ -162,10 +165,13 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_1[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0xffff0fff, 0x10000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xffffffff, 0x1402002f), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xffffbfff, 0x00000188), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800), SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x08000009), SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00400000, 0x04440000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000800, 0x00000820), SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL, 0x001f0000, 0x00070105), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000133, 0x00000130), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff), @@ -690,59 +696,61 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); - err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); - if (err) - goto out; - err = amdgpu_ucode_validate(adev->gfx.rlc_fw); - rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; - version_major = le16_to_cpu(rlc_hdr->header.header_version_major); - version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); - if (version_major == 2 && version_minor == 1) - adev->gfx.rlc.is_rlc_v2_1 = true; - - adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); - adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); - adev->gfx.rlc.save_and_restore_offset = + if (!amdgpu_sriov_vf(adev)) { + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); + err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); + if (err) + goto out; + err = amdgpu_ucode_validate(adev->gfx.rlc_fw); + rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; + version_major = le16_to_cpu(rlc_hdr->header.header_version_major); + version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); + if (version_major == 2 && version_minor == 1) + adev->gfx.rlc.is_rlc_v2_1 = true; + + adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); + adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); + adev->gfx.rlc.save_and_restore_offset = le32_to_cpu(rlc_hdr->save_and_restore_offset); - adev->gfx.rlc.clear_state_descriptor_offset = + adev->gfx.rlc.clear_state_descriptor_offset = le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); - adev->gfx.rlc.avail_scratch_ram_locations = + adev->gfx.rlc.avail_scratch_ram_locations = le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); - adev->gfx.rlc.reg_restore_list_size = + adev->gfx.rlc.reg_restore_list_size = le32_to_cpu(rlc_hdr->reg_restore_list_size); - adev->gfx.rlc.reg_list_format_start = + adev->gfx.rlc.reg_list_format_start = le32_to_cpu(rlc_hdr->reg_list_format_start); - adev->gfx.rlc.reg_list_format_separate_start = + adev->gfx.rlc.reg_list_format_separate_start = le32_to_cpu(rlc_hdr->reg_list_format_separate_start); - adev->gfx.rlc.starting_offsets_start = + adev->gfx.rlc.starting_offsets_start = le32_to_cpu(rlc_hdr->starting_offsets_start); - adev->gfx.rlc.reg_list_format_size_bytes = + adev->gfx.rlc.reg_list_format_size_bytes = le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); - adev->gfx.rlc.reg_list_size_bytes = + adev->gfx.rlc.reg_list_size_bytes = le32_to_cpu(rlc_hdr->reg_list_size_bytes); - adev->gfx.rlc.register_list_format = + adev->gfx.rlc.register_list_format = kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + - adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); - if (!adev->gfx.rlc.register_list_format) { - err = -ENOMEM; - goto out; - } + adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); + if (!adev->gfx.rlc.register_list_format) { + err = -ENOMEM; + goto out; + } - tmp = (unsigned int *)((uintptr_t)rlc_hdr + - le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); - for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) - adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); + tmp = (unsigned int *)((uintptr_t)rlc_hdr + + le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); + for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) + adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); - adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; + adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; - tmp = (unsigned int *)((uintptr_t)rlc_hdr + - le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); - for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) - adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); + tmp = (unsigned int *)((uintptr_t)rlc_hdr + + le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); + for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) + adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); - if (adev->gfx.rlc.is_rlc_v2_1) - gfx_v10_0_init_rlc_ext_microcode(adev); + if (adev->gfx.rlc.is_rlc_v2_1) + gfx_v10_0_init_rlc_ext_microcode(adev); + } snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec%s.bin", chip_name, wks); err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); @@ -993,39 +1001,6 @@ static int gfx_v10_0_rlc_init(struct amdgpu_device *adev) return 0; } -static int gfx_v10_0_csb_vram_pin(struct amdgpu_device *adev) -{ - int r; - - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); - if (unlikely(r != 0)) - return r; - - r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, - AMDGPU_GEM_DOMAIN_VRAM); - if (!r) - adev->gfx.rlc.clear_state_gpu_addr = - amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj); - - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - - return r; -} - -static void gfx_v10_0_csb_vram_unpin(struct amdgpu_device *adev) -{ - int r; - - if (!adev->gfx.rlc.clear_state_obj) - return; - - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); - if (likely(r == 0)) { - amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - } -} - static void gfx_v10_0_mec_fini(struct amdgpu_device *adev) { amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); @@ -1787,25 +1762,7 @@ static void gfx_v10_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, static int gfx_v10_0_init_csb(struct amdgpu_device *adev) { - int r; - - if (adev->in_gpu_reset) { - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); - if (r) - return r; - - r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, - (void **)&adev->gfx.rlc.cs_ptr); - if (!r) { - adev->gfx.rlc.funcs->get_csb_buffer(adev, - adev->gfx.rlc.cs_ptr); - amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); - } - - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - if (r) - return r; - } + adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); /* csib */ WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_HI, @@ -1817,22 +1774,6 @@ static int gfx_v10_0_init_csb(struct amdgpu_device *adev) return 0; } -static int gfx_v10_0_init_pg(struct amdgpu_device *adev) -{ - int i; - int r; - - r = gfx_v10_0_init_csb(adev); - if (r) - return r; - - for (i = 0; i < adev->num_vmhubs; i++) - amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0); - - /* TODO: init power gating */ - return 0; -} - void gfx_v10_0_rlc_stop(struct amdgpu_device *adev) { u32 tmp = RREG32_SOC15(GC, 0, mmRLC_CNTL); @@ -1925,21 +1866,16 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev) { int r; - if (amdgpu_sriov_vf(adev)) - return 0; - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - r = gfx_v10_0_wait_for_rlc_autoload_complete(adev); - if (r) - return r; - r = gfx_v10_0_init_pg(adev); + r = gfx_v10_0_wait_for_rlc_autoload_complete(adev); if (r) return r; - /* enable RLC SRM */ - gfx_v10_0_rlc_enable_srm(adev); + gfx_v10_0_init_csb(adev); + if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */ + gfx_v10_0_rlc_enable_srm(adev); } else { adev->gfx.rlc.funcs->stop(adev); @@ -1961,9 +1897,7 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev) return r; } - r = gfx_v10_0_init_pg(adev); - if (r) - return r; + gfx_v10_0_init_csb(adev); adev->gfx.rlc.funcs->start(adev); @@ -2825,7 +2759,7 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev) /* Init gfx ring 0 for pipe 0 */ mutex_lock(&adev->srbm_mutex); gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID0); - mutex_unlock(&adev->srbm_mutex); + /* Set ring buffer size */ ring = &adev->gfx.gfx_ring[0]; rb_bufsz = order_base_2(ring->ring_size / 8); @@ -2863,11 +2797,11 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, mmCP_RB_ACTIVE, 1); gfx_v10_0_cp_gfx_set_doorbell(adev, ring); + mutex_unlock(&adev->srbm_mutex); /* Init gfx ring 1 for pipe 1 */ mutex_lock(&adev->srbm_mutex); gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID1); - mutex_unlock(&adev->srbm_mutex); ring = &adev->gfx.gfx_ring[1]; rb_bufsz = order_base_2(ring->ring_size / 8); tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz); @@ -2897,6 +2831,7 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, mmCP_RB1_ACTIVE, 1); gfx_v10_0_cp_gfx_set_doorbell(adev, ring); + mutex_unlock(&adev->srbm_mutex); /* Switch to pipe 0 */ mutex_lock(&adev->srbm_mutex); @@ -3775,10 +3710,6 @@ static int gfx_v10_0_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = gfx_v10_0_csb_vram_pin(adev); - if (r) - return r; - if (!amdgpu_emu_mode) gfx_v10_0_init_golden_registers(adev); @@ -3861,12 +3792,11 @@ static int gfx_v10_0_hw_fini(void *handle) if (amdgpu_gfx_disable_kcq(adev)) DRM_ERROR("KCQ disable failed\n"); if (amdgpu_sriov_vf(adev)) { - pr_debug("For SRIOV client, shouldn't do anything.\n"); + gfx_v10_0_cp_gfx_enable(adev, false); return 0; } gfx_v10_0_cp_enable(adev, false); gfx_v10_0_enable_gui_idle_interrupt(adev, false); - gfx_v10_0_csb_vram_unpin(adev); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 791ba398f007..d92e92e5d50b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -4554,6 +4554,8 @@ static int gfx_v7_0_hw_init(void *handle) gfx_v7_0_constants_init(adev); + /* init CSB */ + adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); /* init rlc */ r = adev->gfx.rlc.funcs->resume(adev); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index ffbde9136372..52a647d7022d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -1321,39 +1321,6 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) return 0; } -static int gfx_v8_0_csb_vram_pin(struct amdgpu_device *adev) -{ - int r; - - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); - if (unlikely(r != 0)) - return r; - - r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, - AMDGPU_GEM_DOMAIN_VRAM); - if (!r) - adev->gfx.rlc.clear_state_gpu_addr = - amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj); - - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - - return r; -} - -static void gfx_v8_0_csb_vram_unpin(struct amdgpu_device *adev) -{ - int r; - - if (!adev->gfx.rlc.clear_state_obj) - return; - - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); - if (likely(r == 0)) { - amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - } -} - static void gfx_v8_0_mec_fini(struct amdgpu_device *adev) { amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); @@ -3917,6 +3884,7 @@ static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, static void gfx_v8_0_init_csb(struct amdgpu_device *adev) { + adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); /* csib */ WREG32(mmRLC_CSIB_ADDR_HI, adev->gfx.rlc.clear_state_gpu_addr >> 32); @@ -4837,10 +4805,6 @@ static int gfx_v8_0_hw_init(void *handle) gfx_v8_0_init_golden_registers(adev); gfx_v8_0_constants_init(adev); - r = gfx_v8_0_csb_vram_pin(adev); - if (r) - return r; - r = adev->gfx.rlc.funcs->resume(adev); if (r) return r; @@ -4958,8 +4922,6 @@ static int gfx_v8_0_hw_fini(void *handle) pr_err("rlc is busy, skip halt rlc\n"); amdgpu_gfx_rlc_exit_safe_mode(adev); - gfx_v8_0_csb_vram_unpin(adev); - return 0; } @@ -6184,7 +6146,23 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; - /* EVENT_WRITE_EOP - flush caches, send int */ + /* Workaround for cache flush problems. First send a dummy EOP + * event down the pipe with seq one below. + */ + amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); + amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | + EOP_TC_ACTION_EN | + EOP_TC_WB_ACTION_EN | + EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | + EVENT_INDEX(5))); + amdgpu_ring_write(ring, addr & 0xfffffffc); + amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | + DATA_SEL(1) | INT_SEL(0)); + amdgpu_ring_write(ring, lower_32_bits(seq - 1)); + amdgpu_ring_write(ring, upper_32_bits(seq - 1)); + + /* Then send the real EOP event down the pipe: + * EVENT_WRITE_EOP - flush caches, send int */ amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN | @@ -6926,7 +6904,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { 5 + /* COND_EXEC */ 7 + /* PIPELINE_SYNC */ VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */ - 8 + /* FENCE for VM_FLUSH */ + 12 + /* FENCE for VM_FLUSH */ 20 + /* GDS switch */ 4 + /* double SWITCH_BUFFER, the first COND_EXEC jump to the place just @@ -6938,7 +6916,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { 31 + /* DE_META */ 3 + /* CNTX_CTRL */ 5 + /* HDP_INVL */ - 8 + 8 + /* FENCE x2 */ + 12 + 12 + /* FENCE x2 */ 2, /* SWITCH_BUFFER */ .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */ .emit_ib = gfx_v8_0_ring_emit_ib_gfx, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index faf2ffce5837..66328ffa395a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1695,39 +1695,6 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) return 0; } -static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev) -{ - int r; - - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); - if (unlikely(r != 0)) - return r; - - r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, - AMDGPU_GEM_DOMAIN_VRAM); - if (!r) - adev->gfx.rlc.clear_state_gpu_addr = - amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj); - - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - - return r; -} - -static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev) -{ - int r; - - if (!adev->gfx.rlc.clear_state_obj) - return; - - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); - if (likely(r == 0)) { - amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - } -} - static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) { amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); @@ -2415,6 +2382,7 @@ static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, static void gfx_v9_0_init_csb(struct amdgpu_device *adev) { + adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); /* csib */ WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), adev->gfx.rlc.clear_state_gpu_addr >> 32); @@ -3706,10 +3674,6 @@ static int gfx_v9_0_hw_init(void *handle) gfx_v9_0_constants_init(adev); - r = gfx_v9_0_csb_vram_pin(adev); - if (r) - return r; - r = adev->gfx.rlc.funcs->resume(adev); if (r) return r; @@ -3791,8 +3755,6 @@ static int gfx_v9_0_hw_fini(void *handle) gfx_v9_0_cp_enable(adev, false); adev->gfx.rlc.funcs->stop(adev); - gfx_v9_0_csb_vram_unpin(adev); - return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c index 5e9ab8eb214a..c0ab71df0d90 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c @@ -33,16 +33,31 @@ int gfxhub_v1_1_get_xgmi_info(struct amdgpu_device *adev) u32 xgmi_lfb_cntl = RREG32_SOC15(GC, 0, mmMC_VM_XGMI_LFB_CNTL); u32 max_region = REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL, PF_MAX_REGION); + u32 max_num_physical_nodes = 0; + u32 max_physical_node_id = 0; + + switch (adev->asic_type) { + case CHIP_VEGA20: + max_num_physical_nodes = 4; + max_physical_node_id = 3; + break; + case CHIP_ARCTURUS: + max_num_physical_nodes = 8; + max_physical_node_id = 7; + break; + default: + return -EINVAL; + } /* PF_MAX_REGION=0 means xgmi is disabled */ if (max_region) { adev->gmc.xgmi.num_physical_nodes = max_region + 1; - if (adev->gmc.xgmi.num_physical_nodes > 4) + if (adev->gmc.xgmi.num_physical_nodes > max_num_physical_nodes) return -EINVAL; adev->gmc.xgmi.physical_node_id = REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL, PF_LFB_REGION); - if (adev->gmc.xgmi.physical_node_id > 3) + if (adev->gmc.xgmi.physical_node_id > max_physical_node_id) return -EINVAL; adev->gmc.xgmi.node_segment_size = REG_GET_FIELD( RREG32_SOC15(GC, 0, mmMC_VM_XGMI_LFB_SIZE), diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 321f8a997be8..f5725336a5f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -219,6 +219,21 @@ static uint32_t gmc_v10_0_get_invalidate_req(unsigned int vmid, return req; } +/** + * gmc_v10_0_use_invalidate_semaphore - judge whether to use semaphore + * + * @adev: amdgpu_device pointer + * @vmhub: vmhub type + * + */ +static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev, + uint32_t vmhub) +{ + return ((vmhub == AMDGPU_MMHUB_0 || + vmhub == AMDGPU_MMHUB_1) && + (!amdgpu_sriov_vf(adev))); +} + /* * GART * VMID 0 is the physical GPU addresses as used by the kernel. @@ -229,6 +244,7 @@ static uint32_t gmc_v10_0_get_invalidate_req(unsigned int vmid, static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, unsigned int vmhub, uint32_t flush_type) { + bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, vmhub); struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; u32 tmp = gmc_v10_0_get_invalidate_req(vmid, flush_type); /* Use register 17 for GART */ @@ -244,8 +260,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, */ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ - if (vmhub == AMDGPU_MMHUB_0 || - vmhub == AMDGPU_MMHUB_1) { + if (use_semaphore) { for (i = 0; i < adev->usec_timeout; i++) { /* a read return value of 1 means semaphore acuqire */ tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng); @@ -278,8 +293,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, } /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ - if (vmhub == AMDGPU_MMHUB_0 || - vmhub == AMDGPU_MMHUB_1) + if (use_semaphore) /* * add semaphore release after invalidation, * write with 0 means semaphore release @@ -326,7 +340,8 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, if (!adev->mman.buffer_funcs_enabled || !adev->ib_pool_ready || - adev->in_gpu_reset) { + adev->in_gpu_reset || + ring->sched.ready == false) { gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB_0, 0); mutex_unlock(&adev->mman.gtt_window_lock); return; @@ -368,6 +383,7 @@ error_alloc: static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { + bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(ring->adev, ring->funcs->vmhub); struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; uint32_t req = gmc_v10_0_get_invalidate_req(vmid, 0); unsigned eng = ring->vm_inv_eng; @@ -380,8 +396,7 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, */ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ - if (ring->funcs->vmhub == AMDGPU_MMHUB_0 || - ring->funcs->vmhub == AMDGPU_MMHUB_1) + if (use_semaphore) /* a read return value of 1 means semaphore acuqire */ amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_sem + eng, 0x1, 0x1); @@ -397,8 +412,7 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, req, 1 << vmid); /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ - if (ring->funcs->vmhub == AMDGPU_MMHUB_0 || - ring->funcs->vmhub == AMDGPU_MMHUB_1) + if (use_semaphore) /* * add semaphore release after invalidation, * write with 0 means semaphore release diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 3c355fb5d2b4..a5b68b5e452f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -416,6 +416,24 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid, return req; } +/** + * gmc_v9_0_use_invalidate_semaphore - judge whether to use semaphore + * + * @adev: amdgpu_device pointer + * @vmhub: vmhub type + * + */ +static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev, + uint32_t vmhub) +{ + return ((vmhub == AMDGPU_MMHUB_0 || + vmhub == AMDGPU_MMHUB_1) && + (!amdgpu_sriov_vf(adev)) && + (!(adev->asic_type == CHIP_RAVEN && + adev->rev_id < 0x8 && + adev->pdev->device == 0x15d8))); +} + /* * GART * VMID 0 is the physical GPU addresses as used by the kernel. @@ -435,6 +453,7 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid, static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, uint32_t vmhub, uint32_t flush_type) { + bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub); const unsigned eng = 17; u32 j, tmp; struct amdgpu_vmhub *hub; @@ -468,8 +487,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, */ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ - if (vmhub == AMDGPU_MMHUB_0 || - vmhub == AMDGPU_MMHUB_1) { + if (use_semaphore) { for (j = 0; j < adev->usec_timeout; j++) { /* a read return value of 1 means semaphore acuqire */ tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng); @@ -499,8 +517,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, } /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ - if (vmhub == AMDGPU_MMHUB_0 || - vmhub == AMDGPU_MMHUB_1) + if (use_semaphore) /* * add semaphore release after invalidation, * write with 0 means semaphore release @@ -518,6 +535,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { + bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(ring->adev, ring->funcs->vmhub); struct amdgpu_device *adev = ring->adev; struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub]; uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0); @@ -531,8 +549,7 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, */ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ - if (ring->funcs->vmhub == AMDGPU_MMHUB_0 || - ring->funcs->vmhub == AMDGPU_MMHUB_1) + if (use_semaphore) /* a read return value of 1 means semaphore acuqire */ amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_sem + eng, 0x1, 0x1); @@ -548,8 +565,7 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, req, 1 << vmid); /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ - if (ring->funcs->vmhub == AMDGPU_MMHUB_0 || - ring->funcs->vmhub == AMDGPU_MMHUB_1) + if (use_semaphore) /* * add semaphore release after invalidation, * write with 0 means semaphore release diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 78e5cdc0c058..f1b171e30774 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -783,10 +783,13 @@ static int vi_asic_reset(struct amdgpu_device *adev) { int r; - if (vi_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) + if (vi_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { + if (!adev->in_suspend) + amdgpu_inc_vram_lost(adev); r = smu7_asic_baco_reset(adev); - else + } else { r = vi_asic_pci_config_reset(adev); + } return r; } diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig index a1a35d4d594b..b3672d10ea54 100644 --- a/drivers/gpu/drm/amd/amdkfd/Kconfig +++ b/drivers/gpu/drm/amd/amdkfd/Kconfig @@ -1,11 +1,11 @@ -# SPDX-License-Identifier: GPL-2.0-only +# SPDX-License-Identifier: MIT # # Heterogenous system architecture configuration # config HSA_AMD bool "HSA kernel driver for AMD GPU devices" - depends on DRM_AMDGPU && (X86_64 || ARM64) + depends on DRM_AMDGPU && (X86_64 || ARM64 || PPC64) imply AMD_IOMMU_V2 if X86_64 select MMU_NOTIFIER help diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index 313183b80032..ae161fe86ebb 100644 --- a/drivers/gpu/drm/amd/display/Kconfig +++ b/drivers/gpu/drm/amd/display/Kconfig @@ -1,4 +1,4 @@ -# SPDX-License-Identifier: GPL-2.0-only +# SPDX-License-Identifier: MIT menu "Display Engine Configuration" depends on DRM && DRM_AMDGPU diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c index 55a520a63712..778f186b3a05 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c @@ -342,7 +342,8 @@ bool dm_pp_get_clock_levels_by_type( if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_clock_by_type) { if (adev->powerplay.pp_funcs->get_clock_by_type(pp_handle, dc_to_pp_clock_type(clk_type), &pp_clks)) { - /* Error in pplib. Provide default values. */ + /* Error in pplib. Provide default values. */ + get_default_clock_levels(clk_type, dc_clks); return true; } } else if (adev->smu.ppt_funcs && adev->smu.ppt_funcs->get_clock_by_type) { diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index 7873abea4112..5c3fcaa47410 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -1625,6 +1625,7 @@ static enum bp_result construct_integrated_info( /* Don't need to check major revision as they are all 1 */ switch (revision.minor) { case 11: + case 12: result = get_integrated_info_v11(bp, info); break; default: diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c index 790a2d211bd6..35c55e54eac0 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c @@ -471,12 +471,28 @@ static void rn_notify_wm_ranges(struct clk_mgr *clk_mgr_base) } +static bool rn_are_clock_states_equal(struct dc_clocks *a, + struct dc_clocks *b) +{ + if (a->dispclk_khz != b->dispclk_khz) + return false; + else if (a->dppclk_khz != b->dppclk_khz) + return false; + else if (a->dcfclk_khz != b->dcfclk_khz) + return false; + else if (a->dcfclk_deep_sleep_khz != b->dcfclk_deep_sleep_khz) + return false; + + return true; +} + + static struct clk_mgr_funcs dcn21_funcs = { .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz, .update_clocks = rn_update_clocks, .init_clocks = rn_init_clocks, .enable_pme_wa = rn_enable_pme_wa, - /* .dump_clk_registers = rn_dump_clk_registers, */ + .are_clock_states_equal = rn_are_clock_states_equal, .notify_wm_ranges = rn_notify_wm_ranges }; @@ -518,36 +534,83 @@ struct clk_bw_params rn_bw_params = { .num_entries = 4, }, - .wm_table = { - .entries = { - { - .wm_inst = WM_A, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 23.84, - .valid = true, - }, - { - .wm_inst = WM_B, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 23.84, - .valid = true, - }, - { - .wm_inst = WM_C, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 23.84, - .valid = true, - }, - { - .wm_inst = WM_D, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 23.84, - .valid = true, - }, +}; + +struct wm_table ddr4_wm_table = { + .entries = { + { + .wm_inst = WM_A, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 6.09, + .sr_enter_plus_exit_time_us = 7.14, + .valid = true, + }, + { + .wm_inst = WM_B, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 10.12, + .sr_enter_plus_exit_time_us = 11.48, + .valid = true, + }, + { + .wm_inst = WM_C, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 10.12, + .sr_enter_plus_exit_time_us = 11.48, + .valid = true, + }, + { + .wm_inst = WM_D, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 10.12, + .sr_enter_plus_exit_time_us = 11.48, + .valid = true, }, } }; +struct wm_table lpddr4_wm_table = { + .entries = { + { + .wm_inst = WM_A, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 23.84, + .sr_exit_time_us = 12.5, + .sr_enter_plus_exit_time_us = 17.0, + .valid = true, + }, + { + .wm_inst = WM_B, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 23.84, + .sr_exit_time_us = 12.5, + .sr_enter_plus_exit_time_us = 17.0, + .valid = true, + }, + { + .wm_inst = WM_C, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 23.84, + .sr_exit_time_us = 12.5, + .sr_enter_plus_exit_time_us = 17.0, + .valid = true, + }, + { + .wm_inst = WM_D, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 23.84, + .sr_exit_time_us = 12.5, + .sr_enter_plus_exit_time_us = 17.0, + .valid = true, + }, + } +}; + + static unsigned int find_dcfclk_for_voltage(struct dpm_clocks *clock_table, unsigned int voltage) { int i; @@ -561,7 +624,7 @@ static unsigned int find_dcfclk_for_voltage(struct dpm_clocks *clock_table, unsi return 0; } -static void rn_clk_mgr_helper_populate_bw_params(struct clk_bw_params *bw_params, struct dpm_clocks *clock_table, struct hw_asic_id *asic_id) +static void rn_clk_mgr_helper_populate_bw_params(struct clk_bw_params *bw_params, struct dpm_clocks *clock_table, struct integrated_info *bios_info) { int i, j = 0; @@ -593,8 +656,8 @@ static void rn_clk_mgr_helper_populate_bw_params(struct clk_bw_params *bw_params bw_params->clk_table.entries[i].dcfclk_mhz = find_dcfclk_for_voltage(clock_table, clock_table->FClocks[j].Vol); } - bw_params->vram_type = asic_id->vram_type; - bw_params->num_channels = asic_id->vram_width / DDR4_DRAM_WIDTH; + bw_params->vram_type = bios_info->memory_type; + bw_params->num_channels = bios_info->ma_channel_number; for (i = 0; i < WM_SET_COUNT; i++) { bw_params->wm_table.entries[i].wm_inst = i; @@ -669,15 +732,24 @@ void rn_clk_mgr_construct( ASSERT(clk_mgr->base.dprefclk_khz == 600000); clk_mgr->base.dprefclk_khz = 600000; } + + if (ctx->dc_bios->integrated_info->memory_type == LpDdr4MemType) { + rn_bw_params.wm_table = lpddr4_wm_table; + } else { + rn_bw_params.wm_table = ddr4_wm_table; + } } dce_clock_read_ss_info(clk_mgr); + clk_mgr->base.bw_params = &rn_bw_params; if (pp_smu && pp_smu->rn_funcs.get_dpm_clock_table) { pp_smu->rn_funcs.get_dpm_clock_table(&pp_smu->rn_funcs.pp_smu, &clock_table); - rn_clk_mgr_helper_populate_bw_params(clk_mgr->base.bw_params, &clock_table, &ctx->asic_id); + if (ctx->dc_bios && ctx->dc_bios->integrated_info) { + rn_clk_mgr_helper_populate_bw_params (clk_mgr->base.bw_params, &clock_table, ctx->dc_bios->integrated_info); + } } if (!IS_FPGA_MAXIMUS_DC(ctx->dce_environment) && clk_mgr->smu_ver >= 0x00371500) { diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 12ba6fdf89b7..62d8289abb4e 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -372,7 +372,7 @@ bool dc_link_is_dp_sink_present(struct dc_link *link) if (GPIO_RESULT_OK != dal_ddc_open( ddc, GPIO_MODE_INPUT, GPIO_DDC_CONFIG_TYPE_MODE_I2C)) { - dal_gpio_destroy_ddc(&ddc); + dal_ddc_close(ddc); return present; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c index 7f904d55c1bc..81789191d4ec 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c @@ -586,7 +586,7 @@ bool dal_ddc_service_query_ddc_data( bool dal_ddc_submit_aux_command(struct ddc_service *ddc, struct aux_payload *payload) { - uint8_t retrieved = 0; + uint32_t retrieved = 0; bool ret = 0; if (!ddc) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 0f59b68aa4c2..504055fc70e8 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -3522,7 +3522,14 @@ void dp_set_fec_enable(struct dc_link *link, bool enable) if (link_enc->funcs->fec_set_enable && link->dpcd_caps.fec_cap.bits.FEC_CAPABLE) { if (link->fec_state == dc_link_fec_ready && enable) { - msleep(1); + /* Accord to DP spec, FEC enable sequence can first + * be transmitted anytime after 1000 LL codes have + * been transmitted on the link after link training + * completion. Using 1 lane RBR should have the maximum + * time for transmitting 1000 LL codes which is 6.173 us. + * So use 7 microseconds delay instead. + */ + udelay(7); link_enc->funcs->fec_set_enable(link_enc, true); link->fec_state = dc_link_fec_enabled; } else if (link->fec_state == dc_link_fec_enabled && !enable) { diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c index e472608faf33..793c0cec407f 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c @@ -583,6 +583,8 @@ bool dce_aux_transfer_with_retries(struct ddc_service *ddc, uint8_t reply; bool payload_reply = true; enum aux_channel_operation_result operation_result; + bool retry_on_defer = false; + int aux_ack_retries = 0, aux_defer_retries = 0, aux_i2c_defer_retries = 0, @@ -613,8 +615,10 @@ bool dce_aux_transfer_with_retries(struct ddc_service *ddc, break; case AUX_TRANSACTION_REPLY_AUX_DEFER: - case AUX_TRANSACTION_REPLY_I2C_OVER_AUX_NACK: case AUX_TRANSACTION_REPLY_I2C_OVER_AUX_DEFER: + retry_on_defer = true; + /* fall through */ + case AUX_TRANSACTION_REPLY_I2C_OVER_AUX_NACK: if (++aux_defer_retries >= AUX_MAX_DEFER_RETRIES) { goto fail; } else { @@ -647,15 +651,24 @@ bool dce_aux_transfer_with_retries(struct ddc_service *ddc, break; case AUX_CHANNEL_OPERATION_FAILED_TIMEOUT: - if (++aux_timeout_retries >= AUX_MAX_TIMEOUT_RETRIES) - goto fail; - else { - /* - * DP 1.4, 2.8.2: AUX Transaction Response/Reply Timeouts - * According to the DP spec there should be 3 retries total - * with a 400us wait inbetween each. Hardware already waits - * for 550us therefore no wait is required here. - */ + // Check whether a DEFER had occurred before the timeout. + // If so, treat timeout as a DEFER. + if (retry_on_defer) { + if (++aux_defer_retries >= AUX_MAX_DEFER_RETRIES) + goto fail; + else if (payload->defer_delay > 0) + msleep(payload->defer_delay); + } else { + if (++aux_timeout_retries >= AUX_MAX_TIMEOUT_RETRIES) + goto fail; + else { + /* + * DP 1.4, 2.8.2: AUX Transaction Response/Reply Timeouts + * According to the DP spec there should be 3 retries total + * with a 400us wait inbetween each. Hardware already waits + * for 550us therefore no wait is required here. + */ + } } break; diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile index 63f3bddba7da..10b47986526b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: MIT # # Makefile for DCN. diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index 921a36668ced..ac8c18fadefc 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -1037,6 +1037,25 @@ void dcn20_pipe_control_lock( if (pipe->plane_state != NULL) flip_immediate = pipe->plane_state->flip_immediate; + if (flip_immediate && lock) { + const int TIMEOUT_FOR_FLIP_PENDING = 100000; + int i; + + for (i = 0; i < TIMEOUT_FOR_FLIP_PENDING; ++i) { + if (!pipe->plane_res.hubp->funcs->hubp_is_flip_pending(pipe->plane_res.hubp)) + break; + udelay(1); + } + + if (pipe->bottom_pipe != NULL) { + for (i = 0; i < TIMEOUT_FOR_FLIP_PENDING; ++i) { + if (!pipe->bottom_pipe->plane_res.hubp->funcs->hubp_is_flip_pending(pipe->bottom_pipe->plane_res.hubp)) + break; + udelay(1); + } + } + } + /* In flip immediate and pipe splitting case, we need to use GSL * for synchronization. Only do setup on locking and on flip type change. */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index bbd1c98564be..23ff2f1c75b5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -157,6 +157,74 @@ struct _vcs_dpi_ip_params_st dcn2_0_ip = { .xfc_fill_constant_bytes = 0, }; +struct _vcs_dpi_ip_params_st dcn2_0_nv14_ip = { + .odm_capable = 1, + .gpuvm_enable = 0, + .hostvm_enable = 0, + .gpuvm_max_page_table_levels = 4, + .hostvm_max_page_table_levels = 4, + .hostvm_cached_page_table_levels = 0, + .num_dsc = 5, + .rob_buffer_size_kbytes = 168, + .det_buffer_size_kbytes = 164, + .dpte_buffer_size_in_pte_reqs_luma = 84, + .dpte_buffer_size_in_pte_reqs_chroma = 42,//todo + .dpp_output_buffer_pixels = 2560, + .opp_output_buffer_lines = 1, + .pixel_chunk_size_kbytes = 8, + .pte_enable = 1, + .max_page_table_levels = 4, + .pte_chunk_size_kbytes = 2, + .meta_chunk_size_kbytes = 2, + .writeback_chunk_size_kbytes = 2, + .line_buffer_size_bits = 789504, + .is_line_buffer_bpp_fixed = 0, + .line_buffer_fixed_bpp = 0, + .dcc_supported = true, + .max_line_buffer_lines = 12, + .writeback_luma_buffer_size_kbytes = 12, + .writeback_chroma_buffer_size_kbytes = 8, + .writeback_chroma_line_buffer_width_pixels = 4, + .writeback_max_hscl_ratio = 1, + .writeback_max_vscl_ratio = 1, + .writeback_min_hscl_ratio = 1, + .writeback_min_vscl_ratio = 1, + .writeback_max_hscl_taps = 12, + .writeback_max_vscl_taps = 12, + .writeback_line_buffer_luma_buffer_size = 0, + .writeback_line_buffer_chroma_buffer_size = 14643, + .cursor_buffer_size = 8, + .cursor_chunk_size = 2, + .max_num_otg = 5, + .max_num_dpp = 5, + .max_num_wb = 1, + .max_dchub_pscl_bw_pix_per_clk = 4, + .max_pscl_lb_bw_pix_per_clk = 2, + .max_lb_vscl_bw_pix_per_clk = 4, + .max_vscl_hscl_bw_pix_per_clk = 4, + .max_hscl_ratio = 8, + .max_vscl_ratio = 8, + .hscl_mults = 4, + .vscl_mults = 4, + .max_hscl_taps = 8, + .max_vscl_taps = 8, + .dispclk_ramp_margin_percent = 1, + .underscan_factor = 1.10, + .min_vblank_lines = 32, // + .dppclk_delay_subtotal = 77, // + .dppclk_delay_scl_lb_only = 16, + .dppclk_delay_scl = 50, + .dppclk_delay_cnvc_formatter = 8, + .dppclk_delay_cnvc_cursor = 6, + .dispclk_delay_subtotal = 87, // + .dcfclk_cstate_latency = 10, // SRExitTime + .max_inter_dcn_tile_repeaters = 8, + .xfc_supported = true, + .xfc_fill_bw_overhead_percent = 10.0, + .xfc_fill_constant_bytes = 0, + .ptoi_supported = 0 +}; + struct _vcs_dpi_soc_bounding_box_st dcn2_0_soc = { /* Defaults that get patched on driver load from firmware. */ .clock_limits = { @@ -854,6 +922,10 @@ static const struct resource_caps res_cap_nv14 = { .num_pll = 5, .num_dwb = 1, .num_ddc = 5, + .num_vmid = 16, +#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT + .num_dsc = 5, +#endif }; static const struct dc_debug_options debug_defaults_drv = { @@ -1466,13 +1538,20 @@ enum dc_status dcn20_build_mapped_resource(const struct dc *dc, struct dc_state static void acquire_dsc(struct resource_context *res_ctx, const struct resource_pool *pool, - struct display_stream_compressor **dsc) + struct display_stream_compressor **dsc, + int pipe_idx) { int i; ASSERT(*dsc == NULL); *dsc = NULL; + if (pool->res_cap->num_dsc == pool->res_cap->num_opp) { + *dsc = pool->dscs[pipe_idx]; + res_ctx->is_dsc_acquired[pipe_idx] = true; + return; + } + /* Find first free DSC */ for (i = 0; i < pool->res_cap->num_dsc; i++) if (!res_ctx->is_dsc_acquired[i]) { @@ -1515,7 +1594,7 @@ static enum dc_status add_dsc_to_stream_resource(struct dc *dc, if (pipe_ctx->stream != dc_stream) continue; - acquire_dsc(&dc_ctx->res_ctx, pool, &pipe_ctx->stream_res.dsc); + acquire_dsc(&dc_ctx->res_ctx, pool, &pipe_ctx->stream_res.dsc, i); /* The number of DSCs can be less than the number of pipes */ if (!pipe_ctx->stream_res.dsc) { @@ -1715,7 +1794,7 @@ bool dcn20_split_stream_for_odm( next_odm_pipe->stream_res.opp = pool->opps[next_odm_pipe->pipe_idx]; #ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT if (next_odm_pipe->stream->timing.flags.DSC == 1) { - acquire_dsc(res_ctx, pool, &next_odm_pipe->stream_res.dsc); + acquire_dsc(res_ctx, pool, &next_odm_pipe->stream_res.dsc, next_odm_pipe->pipe_idx); ASSERT(next_odm_pipe->stream_res.dsc); if (next_odm_pipe->stream_res.dsc == NULL) return false; @@ -3212,6 +3291,10 @@ static struct _vcs_dpi_soc_bounding_box_st *get_asic_rev_soc_bb( static struct _vcs_dpi_ip_params_st *get_asic_rev_ip_params( uint32_t hw_internal_rev) { + /* NV14 */ + if (ASICREV_IS_NAVI14_M(hw_internal_rev)) + return &dcn2_0_nv14_ip; + /* NV12 and NV10 */ return &dcn2_0_ip; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c index 4b3401616434..fcb3877b4fcb 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c @@ -492,15 +492,23 @@ void enc2_stream_encoder_dp_unblank( DP_VID_N_MUL, n_multiply); } - /* set DIG_START to 0x1 to reset FIFO */ + /* make sure stream is disabled before resetting steer fifo */ + REG_UPDATE(DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, false); + REG_WAIT(DP_VID_STREAM_CNTL, DP_VID_STREAM_STATUS, 0, 10, 5000); + /* set DIG_START to 0x1 to reset FIFO */ REG_UPDATE(DIG_FE_CNTL, DIG_START, 1); + udelay(1); /* write 0 to take the FIFO out of reset */ REG_UPDATE(DIG_FE_CNTL, DIG_START, 0); - /* switch DP encoder to CRTC data */ + /* switch DP encoder to CRTC data, but reset it the fifo first. It may happen + * that it overflows during mode transition, and sometimes doesn't recover. + */ + REG_UPDATE(DP_STEER_FIFO, DP_STEER_FIFO_RESET, 1); + udelay(10); REG_UPDATE(DP_STEER_FIFO, DP_STEER_FIFO_RESET, 0); diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile index 14113ccf498d..5b8c17564bc1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: MIT # # Makefile for DCN21. diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index 459bd9a5caed..b29b2c99a564 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -23,6 +23,8 @@ * */ +#include <linux/slab.h> + #include "dm_services.h" #include "dc.h" @@ -257,7 +259,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn2_1_soc = { .vmm_page_size_bytes = 4096, .dram_clock_change_latency_us = 23.84, .return_bus_width_bytes = 64, - .dispclk_dppclk_vco_speed_mhz = 3550, + .dispclk_dppclk_vco_speed_mhz = 3600, .xfc_bus_transport_time_us = 4, .xfc_xbuf_latency_tolerance_us = 4, .use_urgent_burst_bw = 1, @@ -1000,6 +1002,8 @@ static void calculate_wm_set_for_vlevel( pipes[0].clks_cfg.socclk_mhz = dml->soc.clock_limits[vlevel].socclk_mhz; dml->soc.dram_clock_change_latency_us = table_entry->pstate_latency_us; + dml->soc.sr_exit_time_us = table_entry->sr_exit_time_us; + dml->soc.sr_enter_plus_exit_time_us = table_entry->sr_enter_plus_exit_time_us; wm_set->urgent_ns = get_wm_urgent(dml, pipes, pipe_cnt) * 1000; wm_set->cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(dml, pipes, pipe_cnt) * 1000; @@ -1017,14 +1021,21 @@ static void calculate_wm_set_for_vlevel( static void patch_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st *bb) { + int i; + kernel_fpu_begin(); if (dc->bb_overrides.sr_exit_time_ns) { - bb->sr_exit_time_us = dc->bb_overrides.sr_exit_time_ns / 1000.0; + for (i = 0; i < WM_SET_COUNT; i++) { + dc->clk_mgr->bw_params->wm_table.entries[i].sr_exit_time_us = + dc->bb_overrides.sr_exit_time_ns / 1000.0; + } } if (dc->bb_overrides.sr_enter_plus_exit_time_ns) { - bb->sr_enter_plus_exit_time_us = - dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0; + for (i = 0; i < WM_SET_COUNT; i++) { + dc->clk_mgr->bw_params->wm_table.entries[i].sr_enter_plus_exit_time_us = + dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0; + } } if (dc->bb_overrides.urgent_latency_ns) { @@ -1032,9 +1043,12 @@ static void patch_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_s } if (dc->bb_overrides.dram_clock_change_latency_ns) { - bb->dram_clock_change_latency_us = + for (i = 0; i < WM_SET_COUNT; i++) { + dc->clk_mgr->bw_params->wm_table.entries[i].pstate_latency_us = dc->bb_overrides.dram_clock_change_latency_ns / 1000.0; + } } + kernel_fpu_end(); } diff --git a/drivers/gpu/drm/amd/display/dc/dsc/Makefile b/drivers/gpu/drm/amd/display/dc/dsc/Makefile index 970737217e53..641ffb7cfaed 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dsc/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: MIT # # Makefile for the 'dsc' sub-component of DAL. diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h index 4e18e77dcf42..026e6a2a2c44 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h @@ -69,6 +69,8 @@ struct wm_range_table_entry { unsigned int wm_inst; unsigned int wm_type; double pstate_latency_us; + double sr_exit_time_us; + double sr_enter_plus_exit_time_us; bool valid; }; diff --git a/drivers/gpu/drm/amd/display/include/i2caux_interface.h b/drivers/gpu/drm/amd/display/include/i2caux_interface.h index bb012cb1a9f5..c7fbb9c3ad6b 100644 --- a/drivers/gpu/drm/amd/display/include/i2caux_interface.h +++ b/drivers/gpu/drm/amd/display/include/i2caux_interface.h @@ -42,7 +42,7 @@ struct aux_payload { bool write; bool mot; uint32_t address; - uint8_t length; + uint32_t length; uint8_t *data; /* * used to return the reply type of the transaction diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c index 16e69bbc69aa..5437b50e9f90 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c @@ -37,8 +37,8 @@ #define STATIC_SCREEN_RAMP_DELTA_REFRESH_RATE_PER_FRAME ((1000 / 60) * 65) /* Number of elements in the render times cache array */ #define RENDER_TIMES_MAX_COUNT 10 -/* Threshold to exit/exit BTR (to avoid frequent enter-exits at the lower limit) */ -#define BTR_MAX_MARGIN 2500 +/* Threshold to exit BTR (to avoid frequent enter-exits at the lower limit) */ +#define BTR_EXIT_MARGIN 2000 /* Threshold to change BTR multiplier (to avoid frequent changes) */ #define BTR_DRIFT_MARGIN 2000 /*Threshold to exit fixed refresh rate*/ @@ -254,22 +254,24 @@ static void apply_below_the_range(struct core_freesync *core_freesync, unsigned int delta_from_mid_point_in_us_1 = 0xFFFFFFFF; unsigned int delta_from_mid_point_in_us_2 = 0xFFFFFFFF; unsigned int frames_to_insert = 0; + unsigned int min_frame_duration_in_ns = 0; + unsigned int max_render_time_in_us = in_out_vrr->max_duration_in_us; unsigned int delta_from_mid_point_delta_in_us; - unsigned int max_render_time_in_us = - in_out_vrr->max_duration_in_us - in_out_vrr->btr.margin_in_us; + + min_frame_duration_in_ns = ((unsigned int) (div64_u64( + (1000000000ULL * 1000000), + in_out_vrr->max_refresh_in_uhz))); /* Program BTR */ - if ((last_render_time_in_us + in_out_vrr->btr.margin_in_us / 2) < max_render_time_in_us) { + if (last_render_time_in_us + BTR_EXIT_MARGIN < max_render_time_in_us) { /* Exit Below the Range */ if (in_out_vrr->btr.btr_active) { in_out_vrr->btr.frame_counter = 0; in_out_vrr->btr.btr_active = false; } - } else if (last_render_time_in_us > (max_render_time_in_us + in_out_vrr->btr.margin_in_us / 2)) { + } else if (last_render_time_in_us > max_render_time_in_us) { /* Enter Below the Range */ - if (!in_out_vrr->btr.btr_active) { - in_out_vrr->btr.btr_active = true; - } + in_out_vrr->btr.btr_active = true; } /* BTR set to "not active" so disengage */ @@ -325,9 +327,7 @@ static void apply_below_the_range(struct core_freesync *core_freesync, /* Choose number of frames to insert based on how close it * can get to the mid point of the variable range. */ - if ((frame_time_in_us / mid_point_frames_ceil) > in_out_vrr->min_duration_in_us && - (delta_from_mid_point_in_us_1 < delta_from_mid_point_in_us_2 || - mid_point_frames_floor < 2)) { + if (delta_from_mid_point_in_us_1 < delta_from_mid_point_in_us_2) { frames_to_insert = mid_point_frames_ceil; delta_from_mid_point_delta_in_us = delta_from_mid_point_in_us_2 - delta_from_mid_point_in_us_1; @@ -343,7 +343,7 @@ static void apply_below_the_range(struct core_freesync *core_freesync, if (in_out_vrr->btr.frames_to_insert != 0 && delta_from_mid_point_delta_in_us < BTR_DRIFT_MARGIN) { if (((last_render_time_in_us / in_out_vrr->btr.frames_to_insert) < - max_render_time_in_us) && + in_out_vrr->max_duration_in_us) && ((last_render_time_in_us / in_out_vrr->btr.frames_to_insert) > in_out_vrr->min_duration_in_us)) frames_to_insert = in_out_vrr->btr.frames_to_insert; @@ -796,11 +796,6 @@ void mod_freesync_build_vrr_params(struct mod_freesync *mod_freesync, refresh_range = in_out_vrr->max_refresh_in_uhz - in_out_vrr->min_refresh_in_uhz; - in_out_vrr->btr.margin_in_us = in_out_vrr->max_duration_in_us - - 2 * in_out_vrr->min_duration_in_us; - if (in_out_vrr->btr.margin_in_us > BTR_MAX_MARGIN) - in_out_vrr->btr.margin_in_us = BTR_MAX_MARGIN; - in_out_vrr->supported = true; } @@ -816,7 +811,6 @@ void mod_freesync_build_vrr_params(struct mod_freesync *mod_freesync, in_out_vrr->btr.inserted_duration_in_us = 0; in_out_vrr->btr.frames_to_insert = 0; in_out_vrr->btr.frame_counter = 0; - in_out_vrr->btr.mid_point_in_us = (in_out_vrr->min_duration_in_us + in_out_vrr->max_duration_in_us) / 2; diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h b/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h index dbe7835aabcf..dc187844d10b 100644 --- a/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h +++ b/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h @@ -92,7 +92,6 @@ struct mod_vrr_params_btr { uint32_t inserted_duration_in_us; uint32_t frames_to_insert; uint32_t frame_counter; - uint32_t margin_in_us; }; struct mod_vrr_params_fixed_refresh { diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index 40b546c75fc2..5ff7ccedfbed 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -2548,3 +2548,12 @@ uint32_t smu_get_pptable_power_limit(struct smu_context *smu) return ret; } + +int smu_send_smc_msg(struct smu_context *smu, + enum smu_message_type msg) +{ + int ret; + + ret = smu_send_smc_msg_with_param(smu, msg, 0); + return ret; +} diff --git a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c index 58c7c4a3053e..cc71a1078a7a 100644 --- a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c @@ -1313,12 +1313,17 @@ static int arcturus_get_power_profile_mode(struct smu_context *smu, "VR", "COMPUTE", "CUSTOM"}; + static const char *title[] = { + "PROFILE_INDEX(NAME)"}; uint32_t i, size = 0; int16_t workload_type = 0; if (!smu->pm_enabled || !buf) return -EINVAL; + size += sprintf(buf + size, "%16s\n", + title[0]); + for (i = 0; i <= PP_SMC_POWER_PROFILE_CUSTOM; i++) { /* * Conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT @@ -2130,7 +2135,6 @@ static const struct pptable_funcs arcturus_ppt_funcs = { .set_tool_table_location = smu_v11_0_set_tool_table_location, .notify_memory_pool_location = smu_v11_0_notify_memory_pool_location, .system_features_control = smu_v11_0_system_features_control, - .send_smc_msg = smu_v11_0_send_msg, .send_smc_msg_with_param = smu_v11_0_send_msg_with_param, .read_smc_arg = smu_v11_0_read_arg, .init_display_count = smu_v11_0_init_display_count, diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h index 031e0c22fcc7..ac9758305ab3 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h @@ -497,8 +497,8 @@ struct pptable_funcs { int (*notify_memory_pool_location)(struct smu_context *smu); int (*set_last_dcef_min_deep_sleep_clk)(struct smu_context *smu); int (*system_features_control)(struct smu_context *smu, bool en); - int (*send_smc_msg)(struct smu_context *smu, uint16_t msg); - int (*send_smc_msg_with_param)(struct smu_context *smu, uint16_t msg, uint32_t param); + int (*send_smc_msg_with_param)(struct smu_context *smu, + enum smu_message_type msg, uint32_t param); int (*read_smc_arg)(struct smu_context *smu, uint32_t *arg); int (*init_display_count)(struct smu_context *smu, uint32_t count); int (*set_allowed_mask)(struct smu_context *smu); diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h index 606149085683..719844257713 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h @@ -177,10 +177,9 @@ int smu_v11_0_notify_memory_pool_location(struct smu_context *smu); int smu_v11_0_system_features_control(struct smu_context *smu, bool en); -int smu_v11_0_send_msg(struct smu_context *smu, uint16_t msg); - int -smu_v11_0_send_msg_with_param(struct smu_context *smu, uint16_t msg, +smu_v11_0_send_msg_with_param(struct smu_context *smu, + enum smu_message_type msg, uint32_t param); int smu_v11_0_read_arg(struct smu_context *smu, uint32_t *arg); diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_v12_0.h b/drivers/gpu/drm/amd/powerplay/inc/smu_v12_0.h index 9b9f5df0911c..9d81d789c713 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu_v12_0.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu_v12_0.h @@ -44,10 +44,9 @@ int smu_v12_0_read_arg(struct smu_context *smu, uint32_t *arg); int smu_v12_0_wait_for_response(struct smu_context *smu); -int smu_v12_0_send_msg(struct smu_context *smu, uint16_t msg); - int -smu_v12_0_send_msg_with_param(struct smu_context *smu, uint16_t msg, +smu_v12_0_send_msg_with_param(struct smu_context *smu, + enum smu_message_type msg, uint32_t param); int smu_v12_0_check_fw_status(struct smu_context *smu); diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c index aaec884d63ed..4a14fd1f9fd5 100644 --- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c @@ -2055,7 +2055,6 @@ static const struct pptable_funcs navi10_ppt_funcs = { .set_tool_table_location = smu_v11_0_set_tool_table_location, .notify_memory_pool_location = smu_v11_0_notify_memory_pool_location, .system_features_control = smu_v11_0_system_features_control, - .send_smc_msg = smu_v11_0_send_msg, .send_smc_msg_with_param = smu_v11_0_send_msg_with_param, .read_smc_arg = smu_v11_0_read_arg, .init_display_count = smu_v11_0_init_display_count, diff --git a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c index 04daf7e9fe05..977bdd962e98 100644 --- a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c @@ -697,7 +697,6 @@ static const struct pptable_funcs renoir_ppt_funcs = { .check_fw_version = smu_v12_0_check_fw_version, .powergate_sdma = smu_v12_0_powergate_sdma, .powergate_vcn = smu_v12_0_powergate_vcn, - .send_smc_msg = smu_v12_0_send_msg, .send_smc_msg_with_param = smu_v12_0_send_msg_with_param, .read_smc_arg = smu_v12_0_read_arg, .set_gfx_cgpg = smu_v12_0_set_gfx_cgpg, diff --git a/drivers/gpu/drm/amd/powerplay/smu_internal.h b/drivers/gpu/drm/amd/powerplay/smu_internal.h index 8bcda7871309..8872f8b2d502 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_internal.h +++ b/drivers/gpu/drm/amd/powerplay/smu_internal.h @@ -75,8 +75,8 @@ #define smu_set_default_od_settings(smu, initialize) \ ((smu)->ppt_funcs->set_default_od_settings ? (smu)->ppt_funcs->set_default_od_settings((smu), (initialize)) : 0) -#define smu_send_smc_msg(smu, msg) \ - ((smu)->ppt_funcs->send_smc_msg? (smu)->ppt_funcs->send_smc_msg((smu), (msg)) : 0) +int smu_send_smc_msg(struct smu_context *smu, enum smu_message_type msg); + #define smu_send_smc_msg_with_param(smu, msg, param) \ ((smu)->ppt_funcs->send_smc_msg_with_param? (smu)->ppt_funcs->send_smc_msg_with_param((smu), (msg), (param)) : 0) #define smu_read_smc_arg(smu, arg) \ diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c index fc9679ea2368..e4268a627eff 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c @@ -90,36 +90,11 @@ static int smu_v11_0_wait_for_response(struct smu_context *smu) return RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90) == 0x1 ? 0 : -EIO; } -int smu_v11_0_send_msg(struct smu_context *smu, uint16_t msg) -{ - struct amdgpu_device *adev = smu->adev; - int ret = 0, index = 0; - - index = smu_msg_get_index(smu, msg); - if (index < 0) - return index; - - smu_v11_0_wait_for_response(smu); - - WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0); - - smu_v11_0_send_msg_without_waiting(smu, (uint16_t)index); - - ret = smu_v11_0_wait_for_response(smu); - - if (ret) - pr_err("failed send message: %10s (%d) response %#x\n", - smu_get_message_name(smu, msg), index, ret); - - return ret; - -} - int -smu_v11_0_send_msg_with_param(struct smu_context *smu, uint16_t msg, +smu_v11_0_send_msg_with_param(struct smu_context *smu, + enum smu_message_type msg, uint32_t param) { - struct amdgpu_device *adev = smu->adev; int ret = 0, index = 0; diff --git a/drivers/gpu/drm/amd/powerplay/smu_v12_0.c b/drivers/gpu/drm/amd/powerplay/smu_v12_0.c index 139dd737eaa5..094cfc46adac 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v12_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v12_0.c @@ -77,33 +77,9 @@ int smu_v12_0_wait_for_response(struct smu_context *smu) return RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90) == 0x1 ? 0 : -EIO; } -int smu_v12_0_send_msg(struct smu_context *smu, uint16_t msg) -{ - struct amdgpu_device *adev = smu->adev; - int ret = 0, index = 0; - - index = smu_msg_get_index(smu, msg); - if (index < 0) - return index; - - smu_v12_0_wait_for_response(smu); - - WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0); - - smu_v12_0_send_msg_without_waiting(smu, (uint16_t)index); - - ret = smu_v12_0_wait_for_response(smu); - - if (ret) - pr_err("Failed to send message 0x%x, response 0x%x\n", index, - ret); - - return ret; - -} - int -smu_v12_0_send_msg_with_param(struct smu_context *smu, uint16_t msg, +smu_v12_0_send_msg_with_param(struct smu_context *smu, + enum smu_message_type msg, uint32_t param) { struct amdgpu_device *adev = smu->adev; diff --git a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c index 0b4892833808..60b9ff097142 100644 --- a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c @@ -3231,7 +3231,6 @@ static const struct pptable_funcs vega20_ppt_funcs = { .set_tool_table_location = smu_v11_0_set_tool_table_location, .notify_memory_pool_location = smu_v11_0_notify_memory_pool_location, .system_features_control = smu_v11_0_system_features_control, - .send_smc_msg = smu_v11_0_send_msg, .send_smc_msg_with_param = smu_v11_0_send_msg_with_param, .read_smc_arg = smu_v11_0_read_arg, .init_display_count = smu_v11_0_init_display_count, diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index ae5809a1f19a..273dd80fabf3 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -3176,9 +3176,11 @@ int drm_dp_update_payload_part1(struct drm_dp_mst_topology_mgr *mgr) drm_dp_mst_topology_put_port(port); } - for (i = 0; i < mgr->max_payloads; i++) { - if (mgr->payloads[i].payload_state != DP_PAYLOAD_DELETE_LOCAL) + for (i = 0; i < mgr->max_payloads; /* do nothing */) { + if (mgr->payloads[i].payload_state != DP_PAYLOAD_DELETE_LOCAL) { + i++; continue; + } DRM_DEBUG_KMS("removing payload %d\n", i); for (j = i; j < mgr->max_payloads - 1; j++) { diff --git a/drivers/gpu/drm/drm_property.c b/drivers/gpu/drm/drm_property.c index 892ce636ef72..6ee04803c362 100644 --- a/drivers/gpu/drm/drm_property.c +++ b/drivers/gpu/drm/drm_property.c @@ -561,7 +561,7 @@ drm_property_create_blob(struct drm_device *dev, size_t length, struct drm_property_blob *blob; int ret; - if (!length || length > ULONG_MAX - sizeof(struct drm_property_blob)) + if (!length || length > INT_MAX - sizeof(struct drm_property_blob)) return ERR_PTR(-EINVAL); blob = kvzalloc(sizeof(struct drm_property_blob)+length, GFP_KERNEL); diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c index 7ae087b0504d..88b6fcaa20be 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c @@ -1313,6 +1313,7 @@ static int gsc_remove(struct platform_device *pdev) { struct device *dev = &pdev->dev; + component_del(dev, &gsc_component_ops); pm_runtime_dont_use_autosuspend(dev); pm_runtime_disable(dev); diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile index 1799537a3228..c280b6ae38eb 100644 --- a/drivers/gpu/drm/i915/Kconfig.profile +++ b/drivers/gpu/drm/i915/Kconfig.profile @@ -25,7 +25,7 @@ config DRM_I915_HEARTBEAT_INTERVAL config DRM_I915_PREEMPT_TIMEOUT int "Preempt timeout (ms, jiffy granularity)" - default 100 # milliseconds + default 640 # milliseconds help How long to wait (in milliseconds) for a preemption event to occur when submitting a new context via execlists. If the current context diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 0caef2592a7e..ed8c7ce62119 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -1273,7 +1273,9 @@ static u8 icl_calc_voltage_level(int cdclk) static u8 ehl_calc_voltage_level(int cdclk) { - if (cdclk > 312000) + if (cdclk > 326400) + return 3; + else if (cdclk > 312000) return 2; else if (cdclk > 180000) return 1; diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 0d6e494b4508..2a27fb5d7dc6 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -593,7 +593,7 @@ struct tgl_dkl_phy_ddi_buf_trans { u32 dkl_de_emphasis_control; }; -static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_ddi_translations[] = { +static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_dp_ddi_trans[] = { /* VS pre-emp Non-trans mV Pre-emph dB */ { 0x7, 0x0, 0x00 }, /* 0 0 400mV 0 dB */ { 0x5, 0x0, 0x03 }, /* 0 1 400mV 3.5 dB */ @@ -607,6 +607,20 @@ static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_ddi_translations[] = { { 0x0, 0x0, 0x00 }, /* 3 0 1200mV 0 dB HDMI default */ }; +static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_hdmi_ddi_trans[] = { + /* HDMI Preset VS Pre-emph */ + { 0x7, 0x0, 0x0 }, /* 1 400mV 0dB */ + { 0x6, 0x0, 0x0 }, /* 2 500mV 0dB */ + { 0x4, 0x0, 0x0 }, /* 3 650mV 0dB */ + { 0x2, 0x0, 0x0 }, /* 4 800mV 0dB */ + { 0x0, 0x0, 0x0 }, /* 5 1000mV 0dB */ + { 0x0, 0x0, 0x5 }, /* 6 Full -1.5 dB */ + { 0x0, 0x0, 0x6 }, /* 7 Full -1.8 dB */ + { 0x0, 0x0, 0x7 }, /* 8 Full -2 dB */ + { 0x0, 0x0, 0x8 }, /* 9 Full -2.5 dB */ + { 0x0, 0x0, 0xA }, /* 10 Full -3 dB */ +}; + static const struct ddi_buf_trans * bdw_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries) { @@ -898,7 +912,7 @@ static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port por icl_get_combo_buf_trans(dev_priv, INTEL_OUTPUT_HDMI, 0, &n_entries); else - n_entries = ARRAY_SIZE(tgl_dkl_phy_ddi_translations); + n_entries = ARRAY_SIZE(tgl_dkl_phy_hdmi_ddi_trans); default_entry = n_entries - 1; } else if (INTEL_GEN(dev_priv) == 11) { if (intel_phy_is_combo(dev_priv, phy)) @@ -2371,7 +2385,7 @@ u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder) icl_get_combo_buf_trans(dev_priv, encoder->type, intel_dp->link_rate, &n_entries); else - n_entries = ARRAY_SIZE(tgl_dkl_phy_ddi_translations); + n_entries = ARRAY_SIZE(tgl_dkl_phy_dp_ddi_trans); } else if (INTEL_GEN(dev_priv) == 11) { if (intel_phy_is_combo(dev_priv, phy)) icl_get_combo_buf_trans(dev_priv, encoder->type, @@ -2823,8 +2837,13 @@ tgl_dkl_phy_ddi_vswing_sequence(struct intel_encoder *encoder, int link_clock, const struct tgl_dkl_phy_ddi_buf_trans *ddi_translations; u32 n_entries, val, ln, dpcnt_mask, dpcnt_val; - n_entries = ARRAY_SIZE(tgl_dkl_phy_ddi_translations); - ddi_translations = tgl_dkl_phy_ddi_translations; + if (encoder->type == INTEL_OUTPUT_HDMI) { + n_entries = ARRAY_SIZE(tgl_dkl_phy_hdmi_ddi_trans); + ddi_translations = tgl_dkl_phy_hdmi_ddi_trans; + } else { + n_entries = ARRAY_SIZE(tgl_dkl_phy_dp_ddi_trans); + ddi_translations = tgl_dkl_phy_dp_ddi_trans; + } if (level >= n_entries) level = n_entries - 1; @@ -3967,6 +3986,7 @@ static void intel_enable_ddi(struct intel_encoder *encoder, if (conn_state->content_protection == DRM_MODE_CONTENT_PROTECTION_DESIRED) intel_hdcp_enable(to_intel_connector(conn_state->connector), + crtc_state->cpu_transcoder, (u8)conn_state->hdcp_content_type); } @@ -4070,7 +4090,9 @@ static void intel_ddi_update_pipe(struct intel_encoder *encoder, if (conn_state->content_protection == DRM_MODE_CONTENT_PROTECTION_DESIRED || content_protection_type_changed) - intel_hdcp_enable(connector, (u8)conn_state->hdcp_content_type); + intel_hdcp_enable(connector, + crtc_state->cpu_transcoder, + (u8)conn_state->hdcp_content_type); } static void diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index ce1b64f4dd44..12ba74788cce 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -3688,6 +3688,151 @@ static const struct i915_power_well_desc icl_power_wells[] = { }, }; +static const struct i915_power_well_desc ehl_power_wells[] = { + { + .name = "always-on", + .always_on = true, + .domains = POWER_DOMAIN_MASK, + .ops = &i9xx_always_on_power_well_ops, + .id = DISP_PW_ID_NONE, + }, + { + .name = "power well 1", + /* Handled by the DMC firmware */ + .always_on = true, + .domains = 0, + .ops = &hsw_power_well_ops, + .id = SKL_DISP_PW_1, + { + .hsw.regs = &hsw_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_PW_1, + .hsw.has_fuses = true, + }, + }, + { + .name = "DC off", + .domains = ICL_DISPLAY_DC_OFF_POWER_DOMAINS, + .ops = &gen9_dc_off_power_well_ops, + .id = SKL_DISP_DC_OFF, + }, + { + .name = "power well 2", + .domains = ICL_PW_2_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = SKL_DISP_PW_2, + { + .hsw.regs = &hsw_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_PW_2, + .hsw.has_fuses = true, + }, + }, + { + .name = "power well 3", + .domains = ICL_PW_3_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &hsw_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_PW_3, + .hsw.irq_pipe_mask = BIT(PIPE_B), + .hsw.has_vga = true, + .hsw.has_fuses = true, + }, + }, + { + .name = "DDI A IO", + .domains = ICL_DDI_IO_A_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_ddi_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_DDI_A, + }, + }, + { + .name = "DDI B IO", + .domains = ICL_DDI_IO_B_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_ddi_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_DDI_B, + }, + }, + { + .name = "DDI C IO", + .domains = ICL_DDI_IO_C_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_ddi_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_DDI_C, + }, + }, + { + .name = "DDI D IO", + .domains = ICL_DDI_IO_D_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_ddi_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_DDI_D, + }, + }, + { + .name = "AUX A", + .domains = ICL_AUX_A_IO_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_aux_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_AUX_A, + }, + }, + { + .name = "AUX B", + .domains = ICL_AUX_B_IO_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_aux_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_AUX_B, + }, + }, + { + .name = "AUX C", + .domains = ICL_AUX_C_TC1_IO_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_aux_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_AUX_C, + }, + }, + { + .name = "AUX D", + .domains = ICL_AUX_D_TC2_IO_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_aux_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_AUX_D, + }, + }, + { + .name = "power well 4", + .domains = ICL_PW_4_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &hsw_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_PW_4, + .hsw.has_fuses = true, + .hsw.irq_pipe_mask = BIT(PIPE_C), + }, + }, +}; + static const struct i915_power_well_desc tgl_power_wells[] = { { .name = "always-on", @@ -3832,7 +3977,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = { { .name = "AUX A", .domains = TGL_AUX_A_IO_POWER_DOMAINS, - .ops = &icl_combo_phy_aux_power_well_ops, + .ops = &hsw_power_well_ops, .id = DISP_PW_ID_NONE, { .hsw.regs = &icl_aux_power_well_regs, @@ -3842,7 +3987,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = { { .name = "AUX B", .domains = TGL_AUX_B_IO_POWER_DOMAINS, - .ops = &icl_combo_phy_aux_power_well_ops, + .ops = &hsw_power_well_ops, .id = DISP_PW_ID_NONE, { .hsw.regs = &icl_aux_power_well_regs, @@ -3852,7 +3997,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = { { .name = "AUX C", .domains = TGL_AUX_C_IO_POWER_DOMAINS, - .ops = &icl_combo_phy_aux_power_well_ops, + .ops = &hsw_power_well_ops, .id = DISP_PW_ID_NONE, { .hsw.regs = &icl_aux_power_well_regs, @@ -4162,6 +4307,8 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv) */ if (IS_GEN(dev_priv, 12)) { err = set_power_wells(power_domains, tgl_power_wells); + } else if (IS_ELKHARTLAKE(dev_priv)) { + err = set_power_wells(power_domains, ehl_power_wells); } else if (IS_GEN(dev_priv, 11)) { err = set_power_wells(power_domains, icl_power_wells); } else if (IS_CANNONLAKE(dev_priv)) { diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index c61ac0c3acb5..b05b2191b919 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -2414,9 +2414,6 @@ intel_dp_compute_config(struct intel_encoder *encoder, intel_psr_compute_config(intel_dp, pipe_config); - intel_hdcp_transcoder_config(intel_connector, - pipe_config->cpu_transcoder); - return 0; } @@ -5476,15 +5473,13 @@ static bool bxt_digital_port_connected(struct intel_encoder *encoder) return I915_READ(GEN8_DE_PORT_ISR) & bit; } -static bool icl_combo_port_connected(struct drm_i915_private *dev_priv, - struct intel_digital_port *intel_dig_port) +static bool intel_combo_phy_connected(struct drm_i915_private *dev_priv, + enum phy phy) { - enum port port = intel_dig_port->base.port; - - if (HAS_PCH_MCC(dev_priv) && port == PORT_C) + if (HAS_PCH_MCC(dev_priv) && phy == PHY_C) return I915_READ(SDEISR) & SDE_TC_HOTPLUG_ICP(PORT_TC1); - return I915_READ(SDEISR) & SDE_DDI_HOTPLUG_ICP(port); + return I915_READ(SDEISR) & SDE_DDI_HOTPLUG_ICP(phy); } static bool icl_digital_port_connected(struct intel_encoder *encoder) @@ -5494,7 +5489,7 @@ static bool icl_digital_port_connected(struct intel_encoder *encoder) enum phy phy = intel_port_to_phy(dev_priv, encoder->port); if (intel_phy_is_combo(dev_priv, phy)) - return icl_combo_port_connected(dev_priv, dig_port); + return intel_combo_phy_connected(dev_priv, phy); else if (intel_phy_is_tc(dev_priv, phy)) return intel_tc_port_connected(dig_port); else diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 3111ecaeabd0..20616639b8ab 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -1284,7 +1284,7 @@ static int intel_sanitize_fbc_option(struct drm_i915_private *dev_priv) return 0; /* https://bugs.freedesktop.org/show_bug.cgi?id=108085 */ - if (IS_GEMINILAKE(dev_priv)) + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) return 0; if (IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) >= 9) diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c index f1f41ca8402b..a448815d8fc2 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_hdcp.c @@ -1821,23 +1821,6 @@ enum mei_fw_tc intel_get_mei_fw_tc(enum transcoder cpu_transcoder) } } -void intel_hdcp_transcoder_config(struct intel_connector *connector, - enum transcoder cpu_transcoder) -{ - struct drm_i915_private *dev_priv = to_i915(connector->base.dev); - struct intel_hdcp *hdcp = &connector->hdcp; - - if (!hdcp->shim) - return; - - if (INTEL_GEN(dev_priv) >= 12) { - mutex_lock(&hdcp->mutex); - hdcp->cpu_transcoder = cpu_transcoder; - hdcp->port_data.fw_tc = intel_get_mei_fw_tc(cpu_transcoder); - mutex_unlock(&hdcp->mutex); - } -} - static inline int initialize_hdcp_port_data(struct intel_connector *connector, const struct intel_hdcp_shim *shim) { @@ -1959,8 +1942,10 @@ int intel_hdcp_init(struct intel_connector *connector, return 0; } -int intel_hdcp_enable(struct intel_connector *connector, u8 content_type) +int intel_hdcp_enable(struct intel_connector *connector, + enum transcoder cpu_transcoder, u8 content_type) { + struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_hdcp *hdcp = &connector->hdcp; unsigned long check_link_interval = DRM_HDCP_CHECK_PERIOD_MS; int ret = -EINVAL; @@ -1972,6 +1957,11 @@ int intel_hdcp_enable(struct intel_connector *connector, u8 content_type) WARN_ON(hdcp->value == DRM_MODE_CONTENT_PROTECTION_ENABLED); hdcp->content_type = content_type; + if (INTEL_GEN(dev_priv) >= 12) { + hdcp->cpu_transcoder = cpu_transcoder; + hdcp->port_data.fw_tc = intel_get_mei_fw_tc(cpu_transcoder); + } + /* * Considering that HDCP2.2 is more secure than HDCP1.4, If the setup * is capable of HDCP2.2, it is preferred to use HDCP2.2. diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.h b/drivers/gpu/drm/i915/display/intel_hdcp.h index 41c1053d9e38..f3c3272e712a 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.h +++ b/drivers/gpu/drm/i915/display/intel_hdcp.h @@ -21,11 +21,10 @@ enum transcoder; void intel_hdcp_atomic_check(struct drm_connector *connector, struct drm_connector_state *old_state, struct drm_connector_state *new_state); -void intel_hdcp_transcoder_config(struct intel_connector *connector, - enum transcoder cpu_transcoder); int intel_hdcp_init(struct intel_connector *connector, const struct intel_hdcp_shim *hdcp_shim); -int intel_hdcp_enable(struct intel_connector *connector, u8 content_type); +int intel_hdcp_enable(struct intel_connector *connector, + enum transcoder cpu_transcoder, u8 content_type); int intel_hdcp_disable(struct intel_connector *connector); bool is_hdcp_supported(struct drm_i915_private *dev_priv, enum port port); bool intel_hdcp_capable(struct intel_connector *connector); diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index f6f5312205c4..f56fffc474fa 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -2489,9 +2489,6 @@ int intel_hdmi_compute_config(struct intel_encoder *encoder, return -EINVAL; } - intel_hdcp_transcoder_config(intel_hdmi->attached_connector, - pipe_config->cpu_transcoder); - return 0; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index e553ca8d98eb..42385277c684 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -368,7 +368,7 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce) if (!ce->timeline) return NULL; - rcu_read_lock(); + mutex_lock(&ce->timeline->mutex); list_for_each_entry_reverse(rq, &ce->timeline->requests, link) { if (i915_request_completed(rq)) break; @@ -378,7 +378,7 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce) if (engine) break; } - rcu_read_unlock(); + mutex_unlock(&ce->timeline->mutex); return engine; } @@ -2167,8 +2167,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, ext_data.fpriv = file->driver_priv; if (client_is_banned(ext_data.fpriv)) { DRM_DEBUG("client %s[%d] banned from creating ctx\n", - current->comm, - pid_nr(get_task_pid(current, PIDTYPE_PID))); + current->comm, task_pid_nr(current)); return -EIO; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index f0998f1225af..bc3a67226163 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -2694,6 +2694,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, err = eb_submit(&eb); err_request: add_to_client(eb.request, file); + i915_request_get(eb.request); i915_request_add(eb.request); if (fences) @@ -2709,6 +2710,7 @@ err_request: fput(out_fence->file); } } + i915_request_put(eb.request); err_batch_unpin: if (eb.batch_flags & I915_DISPATCH_SECURE) diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index ee9d2bcd2c13..ef7bc41ffffa 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -310,10 +310,23 @@ int intel_context_prepare_remote_request(struct intel_context *ce, GEM_BUG_ON(rq->hw_context == ce); if (rcu_access_pointer(rq->timeline) != tl) { /* timeline sharing! */ - err = mutex_lock_interruptible_nested(&tl->mutex, - SINGLE_DEPTH_NESTING); - if (err) - return err; + /* + * Ideally, we just want to insert our foreign fence as + * a barrier into the remove context, such that this operation + * occurs after all current operations in that context, and + * all future operations must occur after this. + * + * Currently, the timeline->last_request tracking is guarded + * by its mutex and so we must obtain that to atomically + * insert our barrier. However, since we already hold our + * timeline->mutex, we must be careful against potential + * inversion if we are the kernel_context as the remote context + * will itself poke at the kernel_context when it needs to + * unpin. Ergo, if already locked, we drop both locks and + * try again (through the magic of userspace repeating EAGAIN). + */ + if (!mutex_trylock(&tl->mutex)) + return -EAGAIN; /* Queue this switch after current activity by this context. */ err = i915_active_fence_set(&tl->last_request, rq); diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index bc3b72bfa9e3..01765a7ec18f 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -100,9 +100,7 @@ execlists_num_ports(const struct intel_engine_execlists * const execlists) static inline struct i915_request * execlists_active(const struct intel_engine_execlists *execlists) { - GEM_BUG_ON(execlists->active - execlists->inflight > - execlists_num_ports(execlists)); - return READ_ONCE(*execlists->active); + return *READ_ONCE(execlists->active); } static inline void diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 5ca3ec911e50..813bd3a610d2 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -28,13 +28,13 @@ #include "i915_drv.h" -#include "gt/intel_gt.h" - +#include "intel_context.h" #include "intel_engine.h" #include "intel_engine_pm.h" #include "intel_engine_pool.h" #include "intel_engine_user.h" -#include "intel_context.h" +#include "intel_gt.h" +#include "intel_gt_requests.h" #include "intel_lrc.h" #include "intel_reset.h" #include "intel_ring.h" @@ -616,6 +616,7 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine) intel_engine_init_execlists(engine); intel_engine_init_cmd_parser(engine); intel_engine_init__pm(engine); + intel_engine_init_retire(engine); intel_engine_pool_init(&engine->pool); @@ -838,6 +839,7 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) cleanup_status_page(engine); + intel_engine_fini_retire(engine); intel_engine_pool_fini(&engine->pool); intel_engine_fini_breadcrumbs(engine); intel_engine_cleanup_cmd_parser(engine); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 874d82677179..c1dd0cd3efc7 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -73,8 +73,42 @@ static inline void __timeline_mark_unlock(struct intel_context *ce, #endif /* !IS_ENABLED(CONFIG_LOCKDEP) */ +static void +__queue_and_release_pm(struct i915_request *rq, + struct intel_timeline *tl, + struct intel_engine_cs *engine) +{ + struct intel_gt_timelines *timelines = &engine->gt->timelines; + + GEM_TRACE("%s\n", engine->name); + + /* + * We have to serialise all potential retirement paths with our + * submission, as we don't want to underflow either the + * engine->wakeref.counter or our timeline->active_count. + * + * Equally, we cannot allow a new submission to start until + * after we finish queueing, nor could we allow that submitter + * to retire us before we are ready! + */ + spin_lock(&timelines->lock); + + /* Let intel_gt_retire_requests() retire us (acquired under lock) */ + if (!atomic_fetch_inc(&tl->active_count)) + list_add_tail(&tl->link, &timelines->active_list); + + /* Hand the request over to HW and so engine_retire() */ + __i915_request_queue(rq, NULL); + + /* Let new submissions commence (and maybe retire this timeline) */ + __intel_wakeref_defer_park(&engine->wakeref); + + spin_unlock(&timelines->lock); +} + static bool switch_to_kernel_context(struct intel_engine_cs *engine) { + struct intel_context *ce = engine->kernel_context; struct i915_request *rq; unsigned long flags; bool result = true; @@ -98,16 +132,31 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) * This should hold true as we can only park the engine after * retiring the last request, thus all rings should be empty and * all timelines idle. + * + * For unlocking, there are 2 other parties and the GPU who have a + * stake here. + * + * A new gpu user will be waiting on the engine-pm to start their + * engine_unpark. New waiters are predicated on engine->wakeref.count + * and so intel_wakeref_defer_park() acts like a mutex_unlock of the + * engine->wakeref. + * + * The other party is intel_gt_retire_requests(), which is walking the + * list of active timelines looking for completions. Meanwhile as soon + * as we call __i915_request_queue(), the GPU may complete our request. + * Ergo, if we put ourselves on the timelines.active_list + * (se intel_timeline_enter()) before we increment the + * engine->wakeref.count, we may see the request completion and retire + * it causing an undeflow of the engine->wakeref. */ - flags = __timeline_mark_lock(engine->kernel_context); + flags = __timeline_mark_lock(ce); + GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0); - rq = __i915_request_create(engine->kernel_context, GFP_NOWAIT); + rq = __i915_request_create(ce, GFP_NOWAIT); if (IS_ERR(rq)) /* Context switch failed, hope for the best! Maybe reset? */ goto out_unlock; - intel_timeline_enter(i915_request_timeline(rq)); - /* Check again on the next retirement. */ engine->wakeref_serial = engine->serial + 1; i915_request_add_active_barriers(rq); @@ -116,13 +165,12 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) rq->sched.attr.priority = I915_PRIORITY_BARRIER; __i915_request_commit(rq); - /* Release our exclusive hold on the engine */ - __intel_wakeref_defer_park(&engine->wakeref); - __i915_request_queue(rq, NULL); + /* Expose ourselves to the world */ + __queue_and_release_pm(rq, ce->timeline, engine); result = false; out_unlock: - __timeline_mark_unlock(engine->kernel_context, flags); + __timeline_mark_unlock(ce, flags); return result; } @@ -177,7 +225,8 @@ static int __engine_park(struct intel_wakeref *wf) engine->execlists.no_priolist = false; - intel_gt_pm_put(engine->gt); + /* While gt calls i915_vma_parked(), we have to break the lock cycle */ + intel_gt_pm_put_async(engine->gt); return 0; } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h index 739c50fefcef..24e20344dc22 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h @@ -31,6 +31,16 @@ static inline void intel_engine_pm_put(struct intel_engine_cs *engine) intel_wakeref_put(&engine->wakeref); } +static inline void intel_engine_pm_put_async(struct intel_engine_cs *engine) +{ + intel_wakeref_put_async(&engine->wakeref); +} + +static inline void intel_engine_pm_flush(struct intel_engine_cs *engine) +{ + intel_wakeref_unlock_wait(&engine->wakeref); +} + void intel_engine_init__pm(struct intel_engine_cs *engine); #endif /* INTEL_ENGINE_PM_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 758f0e8ec672..17f1f1441efc 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -451,6 +451,14 @@ struct intel_engine_cs { struct intel_engine_execlists execlists; + /* + * Keep track of completed timelines on this engine for early + * retirement with the goal of quickly enabling powersaving as + * soon as the engine is idle. + */ + struct intel_timeline *retire; + struct work_struct retire_work; + /* status_notifier: list of callbacks for context-switch changes */ struct atomic_notifier_head context_status_notifier; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 6187cdd06646..a459a42ad5c2 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -105,7 +105,6 @@ static int __gt_park(struct intel_wakeref *wf) static const struct intel_wakeref_ops wf_ops = { .get = __gt_unpark, .put = __gt_park, - .flags = INTEL_WAKEREF_PUT_ASYNC, }; void intel_gt_pm_init_early(struct intel_gt *gt) @@ -272,7 +271,7 @@ void intel_gt_suspend_prepare(struct intel_gt *gt) static suspend_state_t pm_suspend_target(void) { -#if IS_ENABLED(CONFIG_PM_SLEEP) +#if IS_ENABLED(CONFIG_SUSPEND) && IS_ENABLED(CONFIG_PM_SLEEP) return pm_suspend_target_state; #else return PM_SUSPEND_TO_IDLE; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h index b3e17399be9b..990efc27a4e4 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h @@ -32,6 +32,11 @@ static inline void intel_gt_pm_put(struct intel_gt *gt) intel_wakeref_put(>->wakeref); } +static inline void intel_gt_pm_put_async(struct intel_gt *gt) +{ + intel_wakeref_put_async(>->wakeref); +} + static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt) { return intel_wakeref_wait_for_idle(>->wakeref); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c index 353809ac2754..3dc13ecf41bf 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c @@ -4,6 +4,8 @@ * Copyright © 2019 Intel Corporation */ +#include <linux/workqueue.h> + #include "i915_drv.h" /* for_each_engine() */ #include "i915_request.h" #include "intel_gt.h" @@ -29,6 +31,79 @@ static void flush_submission(struct intel_gt *gt) intel_engine_flush_submission(engine); } +static void engine_retire(struct work_struct *work) +{ + struct intel_engine_cs *engine = + container_of(work, typeof(*engine), retire_work); + struct intel_timeline *tl = xchg(&engine->retire, NULL); + + do { + struct intel_timeline *next = xchg(&tl->retire, NULL); + + /* + * Our goal here is to retire _idle_ timelines as soon as + * possible (as they are idle, we do not expect userspace + * to be cleaning up anytime soon). + * + * If the timeline is currently locked, either it is being + * retired elsewhere or about to be! + */ + if (mutex_trylock(&tl->mutex)) { + retire_requests(tl); + mutex_unlock(&tl->mutex); + } + intel_timeline_put(tl); + + GEM_BUG_ON(!next); + tl = ptr_mask_bits(next, 1); + } while (tl); +} + +static bool add_retire(struct intel_engine_cs *engine, + struct intel_timeline *tl) +{ + struct intel_timeline *first; + + /* + * We open-code a llist here to include the additional tag [BIT(0)] + * so that we know when the timeline is already on a + * retirement queue: either this engine or another. + * + * However, we rely on that a timeline can only be active on a single + * engine at any one time and that add_retire() is called before the + * engine releases the timeline and transferred to another to retire. + */ + + if (READ_ONCE(tl->retire)) /* already queued */ + return false; + + intel_timeline_get(tl); + first = READ_ONCE(engine->retire); + do + tl->retire = ptr_pack_bits(first, 1, 1); + while (!try_cmpxchg(&engine->retire, &first, tl)); + + return !first; +} + +void intel_engine_add_retire(struct intel_engine_cs *engine, + struct intel_timeline *tl) +{ + if (add_retire(engine, tl)) + schedule_work(&engine->retire_work); +} + +void intel_engine_init_retire(struct intel_engine_cs *engine) +{ + INIT_WORK(&engine->retire_work, engine_retire); +} + +void intel_engine_fini_retire(struct intel_engine_cs *engine) +{ + flush_work(&engine->retire_work); + GEM_BUG_ON(engine->retire); +} + long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) { struct intel_gt_timelines *timelines = >->timelines; @@ -52,8 +127,8 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) } intel_timeline_get(tl); - GEM_BUG_ON(!tl->active_count); - tl->active_count++; /* pin the list element */ + GEM_BUG_ON(!atomic_read(&tl->active_count)); + atomic_inc(&tl->active_count); /* pin the list element */ spin_unlock_irqrestore(&timelines->lock, flags); if (timeout > 0) { @@ -74,7 +149,7 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) /* Resume iteration after dropping lock */ list_safe_reset_next(tl, tn, link); - if (!--tl->active_count) + if (atomic_dec_and_test(&tl->active_count)) list_del(&tl->link); else active_count += !!rcu_access_pointer(tl->last_request.fence); @@ -83,7 +158,7 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) /* Defer the final release to after the spinlock */ if (refcount_dec_and_test(&tl->kref.refcount)) { - GEM_BUG_ON(tl->active_count); + GEM_BUG_ON(atomic_read(&tl->active_count)); list_add(&tl->link, &free); } } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.h b/drivers/gpu/drm/i915/gt/intel_gt_requests.h index bd31cbce47e0..d626fb115386 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.h @@ -7,7 +7,9 @@ #ifndef INTEL_GT_REQUESTS_H #define INTEL_GT_REQUESTS_H +struct intel_engine_cs; struct intel_gt; +struct intel_timeline; long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout); static inline void intel_gt_retire_requests(struct intel_gt *gt) @@ -15,6 +17,11 @@ static inline void intel_gt_retire_requests(struct intel_gt *gt) intel_gt_retire_requests_timeout(gt, 0); } +void intel_engine_init_retire(struct intel_engine_cs *engine); +void intel_engine_add_retire(struct intel_engine_cs *engine, + struct intel_timeline *tl); +void intel_engine_fini_retire(struct intel_engine_cs *engine); + int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout); void intel_gt_init_requests(struct intel_gt *gt); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 0ac3b26674ad..75dd0e0367b7 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -142,6 +142,7 @@ #include "intel_engine_pm.h" #include "intel_gt.h" #include "intel_gt_pm.h" +#include "intel_gt_requests.h" #include "intel_lrc_reg.h" #include "intel_mocs.h" #include "intel_reset.h" @@ -844,12 +845,6 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine) } } -static void unwind_wa_tail(struct i915_request *rq) -{ - rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES); - assert_ring_tail_valid(rq->ring, rq->tail); -} - static struct i915_request * __unwind_incomplete_requests(struct intel_engine_cs *engine) { @@ -862,12 +857,10 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) list_for_each_entry_safe_reverse(rq, rn, &engine->active.requests, sched.link) { - if (i915_request_completed(rq)) continue; /* XXX */ __i915_request_unsubmit(rq); - unwind_wa_tail(rq); /* * Push the request back into the queue for later resubmission. @@ -1115,9 +1108,17 @@ __execlists_schedule_out(struct i915_request *rq, * refrain from doing non-trivial work here. */ + /* + * If we have just completed this context, the engine may now be + * idle and we want to re-enter powersaving. + */ + if (list_is_last(&rq->link, &ce->timeline->requests) && + i915_request_completed(rq)) + intel_engine_add_retire(engine, ce->timeline); + intel_engine_context_out(engine); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); - intel_gt_pm_put(engine->gt); + intel_gt_pm_put_async(engine->gt); /* * If this is part of a virtual engine, its next request may @@ -1152,13 +1153,29 @@ execlists_schedule_out(struct i915_request *rq) i915_request_put(rq); } -static u64 execlists_update_context(const struct i915_request *rq) +static u64 execlists_update_context(struct i915_request *rq) { struct intel_context *ce = rq->hw_context; - u64 desc; + u64 desc = ce->lrc_desc; + u32 tail; - ce->lrc_reg_state[CTX_RING_TAIL] = - intel_ring_set_tail(rq->ring, rq->tail); + /* + * WaIdleLiteRestore:bdw,skl + * + * We should never submit the context with the same RING_TAIL twice + * just in case we submit an empty ring, which confuses the HW. + * + * We append a couple of NOOPs (gen8_emit_wa_tail) after the end of + * the normal request to be able to always advance the RING_TAIL on + * subsequent resubmissions (for lite restore). Should that fail us, + * and we try and submit the same tail again, force the context + * reload. + */ + tail = intel_ring_set_tail(rq->ring, rq->tail); + if (unlikely(ce->lrc_reg_state[CTX_RING_TAIL] == tail)) + desc |= CTX_DESC_FORCE_RESTORE; + ce->lrc_reg_state[CTX_RING_TAIL] = tail; + rq->tail = rq->wa_tail; /* * Make sure the context image is complete before we submit it to HW. @@ -1177,13 +1194,11 @@ static u64 execlists_update_context(const struct i915_request *rq) */ mb(); - desc = ce->lrc_desc; - ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE; - /* Wa_1607138340:tgl */ if (IS_TGL_REVID(rq->i915, TGL_REVID_A0, TGL_REVID_A0)) desc |= CTX_DESC_FORCE_RESTORE; + ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE; return desc; } @@ -1694,16 +1709,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) return; } - - /* - * WaIdleLiteRestore:bdw,skl - * Apply the wa NOOPs to prevent - * ring:HEAD == rq:TAIL as we resubmit the - * request. See gen8_emit_fini_breadcrumb() for - * where we prepare the padding after the - * end of the request. - */ - last->tail = last->wa_tail; } } @@ -1937,16 +1942,17 @@ skip_submit: static void cancel_port_requests(struct intel_engine_execlists * const execlists) { - struct i915_request * const *port, *rq; + struct i915_request * const *port; - for (port = execlists->pending; (rq = *port); port++) - execlists_schedule_out(rq); + for (port = execlists->pending; *port; port++) + execlists_schedule_out(*port); memset(execlists->pending, 0, sizeof(execlists->pending)); - for (port = execlists->active; (rq = *port); port++) - execlists_schedule_out(rq); - execlists->active = - memset(execlists->inflight, 0, sizeof(execlists->inflight)); + /* Mark the end of active before we overwrite *active */ + for (port = xchg(&execlists->active, execlists->pending); *port; port++) + execlists_schedule_out(*port); + WRITE_ONCE(execlists->active, + memset(execlists->inflight, 0, sizeof(execlists->inflight))); } static inline void @@ -2099,23 +2105,27 @@ static void process_csb(struct intel_engine_cs *engine) else promote = gen8_csb_parse(execlists, buf + 2 * head); if (promote) { + struct i915_request * const *old = execlists->active; + + /* Point active to the new ELSP; prevent overwriting */ + WRITE_ONCE(execlists->active, execlists->pending); + set_timeslice(engine); + if (!inject_preempt_hang(execlists)) ring_set_paused(engine, 0); /* cancel old inflight, prepare for switch */ - trace_ports(execlists, "preempted", execlists->active); - while (*execlists->active) - execlists_schedule_out(*execlists->active++); + trace_ports(execlists, "preempted", old); + while (*old) + execlists_schedule_out(*old++); /* switch pending to inflight */ GEM_BUG_ON(!assert_pending_valid(execlists, "promote")); - execlists->active = - memcpy(execlists->inflight, - execlists->pending, - execlists_num_ports(execlists) * - sizeof(*execlists->pending)); - - set_timeslice(engine); + WRITE_ONCE(execlists->active, + memcpy(execlists->inflight, + execlists->pending, + execlists_num_ports(execlists) * + sizeof(*execlists->pending))); WRITE_ONCE(execlists->pending[0], NULL); } else { @@ -4106,17 +4116,18 @@ static void virtual_context_destroy(struct kref *kref) for (n = 0; n < ve->num_siblings; n++) { struct intel_engine_cs *sibling = ve->siblings[n]; struct rb_node *node = &ve->nodes[sibling->id].rb; + unsigned long flags; if (RB_EMPTY_NODE(node)) continue; - spin_lock_irq(&sibling->active.lock); + spin_lock_irqsave(&sibling->active.lock, flags); /* Detachment is lazily performed in the execlists tasklet */ if (!RB_EMPTY_NODE(node)) rb_erase_cached(node, &sibling->execlists.virtual); - spin_unlock_irq(&sibling->active.lock); + spin_unlock_irqrestore(&sibling->active.lock, flags); } GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet)); diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index f03e000051c1..c97423a76642 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -1114,7 +1114,7 @@ int intel_engine_reset(struct intel_engine_cs *engine, const char *msg) out: intel_engine_cancel_stop_cs(engine); reset_finish_engine(engine); - intel_engine_pm_put(engine); + intel_engine_pm_put_async(engine); return ret; } diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c index ece20504d240..374b28f13ca0 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.c +++ b/drivers/gpu/drm/i915/gt/intel_ring.c @@ -57,9 +57,10 @@ int intel_ring_pin(struct intel_ring *ring) i915_vma_make_unshrinkable(vma); - GEM_BUG_ON(ring->vaddr); - ring->vaddr = addr; + /* Discard any unused bytes beyond that submitted to hw. */ + intel_ring_reset(ring, ring->emit); + ring->vaddr = addr; return 0; err_ring: @@ -85,20 +86,14 @@ void intel_ring_unpin(struct intel_ring *ring) if (!atomic_dec_and_test(&ring->pin_count)) return; - /* Discard any unused bytes beyond that submitted to hw. */ - intel_ring_reset(ring, ring->emit); - i915_vma_unset_ggtt_write(vma); if (i915_vma_is_map_and_fenceable(vma)) i915_vma_unpin_iomap(vma); else i915_gem_object_unpin_map(vma->obj); - GEM_BUG_ON(!ring->vaddr); - ring->vaddr = NULL; - - i915_vma_unpin(vma); i915_vma_make_purgeable(vma); + i915_vma_unpin(vma); } static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size) diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 14ad10acd548..649798c184fb 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -282,6 +282,7 @@ void intel_timeline_fini(struct intel_timeline *timeline) { GEM_BUG_ON(atomic_read(&timeline->pin_count)); GEM_BUG_ON(!list_empty(&timeline->requests)); + GEM_BUG_ON(timeline->retire); if (timeline->hwsp_cacheline) cacheline_free(timeline->hwsp_cacheline); @@ -339,15 +340,33 @@ void intel_timeline_enter(struct intel_timeline *tl) struct intel_gt_timelines *timelines = &tl->gt->timelines; unsigned long flags; + /* + * Pretend we are serialised by the timeline->mutex. + * + * While generally true, there are a few exceptions to the rule + * for the engine->kernel_context being used to manage power + * transitions. As the engine_park may be called from under any + * timeline, it uses the power mutex as a global serialisation + * lock to prevent any other request entering its timeline. + * + * The rule is generally tl->mutex, otherwise engine->wakeref.mutex. + * + * However, intel_gt_retire_request() does not know which engine + * it is retiring along and so cannot partake in the engine-pm + * barrier, and there we use the tl->active_count as a means to + * pin the timeline in the active_list while the locks are dropped. + * Ergo, as that is outside of the engine-pm barrier, we need to + * use atomic to manipulate tl->active_count. + */ lockdep_assert_held(&tl->mutex); - GEM_BUG_ON(!atomic_read(&tl->pin_count)); - if (tl->active_count++) + + if (atomic_add_unless(&tl->active_count, 1, 0)) return; - GEM_BUG_ON(!tl->active_count); /* overflow? */ spin_lock_irqsave(&timelines->lock, flags); - list_add(&tl->link, &timelines->active_list); + if (!atomic_fetch_inc(&tl->active_count)) + list_add_tail(&tl->link, &timelines->active_list); spin_unlock_irqrestore(&timelines->lock, flags); } @@ -356,14 +375,16 @@ void intel_timeline_exit(struct intel_timeline *tl) struct intel_gt_timelines *timelines = &tl->gt->timelines; unsigned long flags; + /* See intel_timeline_enter() */ lockdep_assert_held(&tl->mutex); - GEM_BUG_ON(!tl->active_count); - if (--tl->active_count) + GEM_BUG_ON(!atomic_read(&tl->active_count)); + if (atomic_add_unless(&tl->active_count, -1, 1)) return; spin_lock_irqsave(&timelines->lock, flags); - list_del(&tl->link); + if (atomic_dec_and_test(&tl->active_count)) + list_del(&tl->link); spin_unlock_irqrestore(&timelines->lock, flags); /* diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h index 98d9ee166379..aaf15cbe1ce1 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h +++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h @@ -42,7 +42,7 @@ struct intel_timeline { * from the intel_context caller plus internal atomicity. */ atomic_t pin_count; - unsigned int active_count; + atomic_t active_count; const u32 *hwsp_seqno; struct i915_vma *hwsp_ggtt; @@ -66,6 +66,9 @@ struct intel_timeline { */ struct i915_active_fence last_request; + /** A chain of completed timelines ready for early retirement. */ + struct intel_timeline *retire; + /** * We track the most recent seqno that we wait on in every context so * that we only have to emit a new await and dependency on a more diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c index 20b9c83f43ad..cbf6b0735272 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c @@ -51,11 +51,12 @@ static int live_engine_pm(void *arg) pr_err("intel_engine_pm_get_if_awake(%s) failed under %s\n", engine->name, p->name); else - intel_engine_pm_put(engine); - intel_engine_pm_put(engine); + intel_engine_pm_put_async(engine); + intel_engine_pm_put_async(engine); p->critical_section_end(); - /* engine wakeref is sync (instant) */ + intel_engine_pm_flush(engine); + if (intel_engine_pm_is_awake(engine)) { pr_err("%s is still awake after flushing pm\n", engine->name); diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 6a3ac8cde95d..21a176cd8acc 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -1599,9 +1599,9 @@ static int cmd_handler_mi_op_2f(struct parser_exec_state *s) if (!(cmd_val(s, 0) & (1 << 22))) return ret; - /* check if QWORD */ - if (DWORD_FIELD(0, 20, 19) == 1) - valid_len += 8; + /* check inline data */ + if (cmd_val(s, 0) & BIT(18)) + valid_len = CMD_LEN(9); ret = gvt_check_valid_cmd_length(cmd_length(s), valid_len); if (ret) diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c index e451298d11c3..2477a1e5a166 100644 --- a/drivers/gpu/drm/i915/gvt/dmabuf.c +++ b/drivers/gpu/drm/i915/gvt/dmabuf.c @@ -36,13 +36,32 @@ #define GEN8_DECODE_PTE(pte) (pte & GENMASK_ULL(63, 12)) +static int vgpu_pin_dma_address(struct intel_vgpu *vgpu, + unsigned long size, + dma_addr_t dma_addr) +{ + int ret = 0; + + if (intel_gvt_hypervisor_dma_pin_guest_page(vgpu, dma_addr)) + ret = -EINVAL; + + return ret; +} + +static void vgpu_unpin_dma_address(struct intel_vgpu *vgpu, + dma_addr_t dma_addr) +{ + intel_gvt_hypervisor_dma_unmap_guest_page(vgpu, dma_addr); +} + static int vgpu_gem_get_pages( struct drm_i915_gem_object *obj) { struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + struct intel_vgpu *vgpu; struct sg_table *st; struct scatterlist *sg; - int i, ret; + int i, j, ret; gen8_pte_t __iomem *gtt_entries; struct intel_vgpu_fb_info *fb_info; u32 page_num; @@ -51,6 +70,10 @@ static int vgpu_gem_get_pages( if (WARN_ON(!fb_info)) return -ENODEV; + vgpu = fb_info->obj->vgpu; + if (WARN_ON(!vgpu)) + return -ENODEV; + st = kmalloc(sizeof(*st), GFP_KERNEL); if (unlikely(!st)) return -ENOMEM; @@ -64,21 +87,53 @@ static int vgpu_gem_get_pages( gtt_entries = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + (fb_info->start >> PAGE_SHIFT); for_each_sg(st->sgl, sg, page_num, i) { + dma_addr_t dma_addr = + GEN8_DECODE_PTE(readq(>t_entries[i])); + if (vgpu_pin_dma_address(vgpu, PAGE_SIZE, dma_addr)) { + ret = -EINVAL; + goto out; + } + sg->offset = 0; sg->length = PAGE_SIZE; - sg_dma_address(sg) = - GEN8_DECODE_PTE(readq(>t_entries[i])); sg_dma_len(sg) = PAGE_SIZE; + sg_dma_address(sg) = dma_addr; } __i915_gem_object_set_pages(obj, st, PAGE_SIZE); +out: + if (ret) { + dma_addr_t dma_addr; + + for_each_sg(st->sgl, sg, i, j) { + dma_addr = sg_dma_address(sg); + if (dma_addr) + vgpu_unpin_dma_address(vgpu, dma_addr); + } + sg_free_table(st); + kfree(st); + } + + return ret; - return 0; } static void vgpu_gem_put_pages(struct drm_i915_gem_object *obj, struct sg_table *pages) { + struct scatterlist *sg; + + if (obj->base.dma_buf) { + struct intel_vgpu_fb_info *fb_info = obj->gvt_info; + struct intel_vgpu_dmabuf_obj *obj = fb_info->obj; + struct intel_vgpu *vgpu = obj->vgpu; + int i; + + for_each_sg(pages->sgl, sg, fb_info->size, i) + vgpu_unpin_dma_address(vgpu, + sg_dma_address(sg)); + } + sg_free_table(pages); kfree(pages); } @@ -163,6 +218,7 @@ static struct drm_i915_gem_object *vgpu_create_gem(struct drm_device *dev, drm_gem_private_object_init(dev, &obj->base, roundup(info->size, PAGE_SIZE)); i915_gem_object_init(obj, &intel_vgpu_gem_ops, &lock_class); + i915_gem_object_set_readonly(obj); obj->read_domains = I915_GEM_DOMAIN_GTT; obj->write_domain = 0; diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index bd12af349123..1043e6d564df 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -341,6 +341,10 @@ static int gdrst_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, gvt_dbg_mmio("vgpu%d: request VCS2 Reset\n", vgpu->id); engine_mask |= BIT(VCS1); } + if (data & GEN9_GRDOM_GUC) { + gvt_dbg_mmio("vgpu%d: request GUC Reset\n", vgpu->id); + vgpu_vreg_t(vgpu, GUC_STATUS) |= GS_MIA_IN_RESET; + } engine_mask &= INTEL_INFO(vgpu->gvt->dev_priv)->engine_mask; } @@ -460,6 +464,7 @@ static int pipeconf_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, static i915_reg_t force_nonpriv_white_list[] = { GEN9_CS_DEBUG_MODE1, //_MMIO(0x20ec) GEN9_CTX_PREEMPT_REG,//_MMIO(0x2248) + PS_INVOCATION_COUNT,//_MMIO(0x2348) GEN8_CS_CHICKEN1,//_MMIO(0x2580) _MMIO(0x2690), _MMIO(0x2694), @@ -508,7 +513,7 @@ static inline bool in_whitelist(unsigned int reg) static int force_nonpriv_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { - u32 reg_nonpriv = *(u32 *)p_data; + u32 reg_nonpriv = (*(u32 *)p_data) & REG_GENMASK(25, 2); int ring_id = intel_gvt_render_mmio_to_ring_id(vgpu->gvt, offset); u32 ring_base; struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; @@ -528,7 +533,7 @@ static int force_nonpriv_write(struct intel_vgpu *vgpu, bytes); } else gvt_err("vgpu(%d) Invalid FORCE_NONPRIV write %x at offset %x\n", - vgpu->id, reg_nonpriv, offset); + vgpu->id, *(u32 *)p_data, offset); return 0; } @@ -1635,6 +1640,16 @@ static int edp_psr_imr_iir_write(struct intel_vgpu *vgpu, return 0; } +static int guc_status_read(struct intel_vgpu *vgpu, + unsigned int offset, void *p_data, + unsigned int bytes) +{ + /* keep MIA_IN_RESET before clearing */ + read_vreg(vgpu, offset, p_data, bytes); + vgpu_vreg(vgpu, offset) &= ~GS_MIA_IN_RESET; + return 0; +} + static int mmio_read_from_hw(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { @@ -2671,6 +2686,8 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_DH(EDP_PSR_IMR, D_BDW_PLUS, NULL, edp_psr_imr_iir_write); MMIO_DH(EDP_PSR_IIR, D_BDW_PLUS, NULL, edp_psr_imr_iir_write); + MMIO_DH(GUC_STATUS, D_ALL, guc_status_read, NULL); + return 0; } diff --git a/drivers/gpu/drm/i915/gvt/hypercall.h b/drivers/gpu/drm/i915/gvt/hypercall.h index 4862fb12778e..b19a3b1ea4c1 100644 --- a/drivers/gpu/drm/i915/gvt/hypercall.h +++ b/drivers/gpu/drm/i915/gvt/hypercall.h @@ -62,6 +62,8 @@ struct intel_gvt_mpt { unsigned long size, dma_addr_t *dma_addr); void (*dma_unmap_guest_page)(unsigned long handle, dma_addr_t dma_addr); + int (*dma_pin_guest_page)(unsigned long handle, dma_addr_t dma_addr); + int (*map_gfn_to_mfn)(unsigned long handle, unsigned long gfn, unsigned long mfn, unsigned int nr, bool map); int (*set_trap_area)(unsigned long handle, u64 start, u64 end, diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 04a5a0d90823..3259a1fa69e1 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -1916,6 +1916,28 @@ err_unlock: return ret; } +static int kvmgt_dma_pin_guest_page(unsigned long handle, dma_addr_t dma_addr) +{ + struct kvmgt_guest_info *info; + struct gvt_dma *entry; + int ret = 0; + + if (!handle_valid(handle)) + return -ENODEV; + + info = (struct kvmgt_guest_info *)handle; + + mutex_lock(&info->vgpu->vdev.cache_lock); + entry = __gvt_cache_find_dma_addr(info->vgpu, dma_addr); + if (entry) + kref_get(&entry->ref); + else + ret = -ENOMEM; + mutex_unlock(&info->vgpu->vdev.cache_lock); + + return ret; +} + static void __gvt_dma_release(struct kref *ref) { struct gvt_dma *entry = container_of(ref, typeof(*entry), ref); @@ -2027,6 +2049,7 @@ static struct intel_gvt_mpt kvmgt_mpt = { .gfn_to_mfn = kvmgt_gfn_to_pfn, .dma_map_guest_page = kvmgt_dma_map_guest_page, .dma_unmap_guest_page = kvmgt_dma_unmap_guest_page, + .dma_pin_guest_page = kvmgt_dma_pin_guest_page, .set_opregion = kvmgt_set_opregion, .set_edid = kvmgt_set_edid, .get_vfio_device = kvmgt_get_vfio_device, diff --git a/drivers/gpu/drm/i915/gvt/mpt.h b/drivers/gpu/drm/i915/gvt/mpt.h index 0f9440128123..9ad224df9c68 100644 --- a/drivers/gpu/drm/i915/gvt/mpt.h +++ b/drivers/gpu/drm/i915/gvt/mpt.h @@ -255,6 +255,21 @@ static inline void intel_gvt_hypervisor_dma_unmap_guest_page( } /** + * intel_gvt_hypervisor_dma_pin_guest_page - pin guest dma buf + * @vgpu: a vGPU + * @dma_addr: guest dma addr + * + * Returns: + * 0 on success, negative error code if failed. + */ +static inline int +intel_gvt_hypervisor_dma_pin_guest_page(struct intel_vgpu *vgpu, + dma_addr_t dma_addr) +{ + return intel_gvt_host.mpt->dma_pin_guest_page(vgpu->handle, dma_addr); +} + +/** * intel_gvt_hypervisor_map_gfn_to_mfn - map a GFN region to MFN * @vgpu: a vGPU * @gfn: guest PFN diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index d5a6e4e3d0fd..85bd9bf4f6ee 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -212,9 +212,9 @@ static void intel_gvt_update_vgpu_types(struct intel_gvt *gvt) */ void intel_gvt_activate_vgpu(struct intel_vgpu *vgpu) { - mutex_lock(&vgpu->gvt->lock); + mutex_lock(&vgpu->vgpu_lock); vgpu->active = true; - mutex_unlock(&vgpu->gvt->lock); + mutex_unlock(&vgpu->vgpu_lock); } /** diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index 3c424cb90702..a19e7d89bc8a 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -672,12 +672,13 @@ void i915_active_acquire_barrier(struct i915_active *ref) * populated by i915_request_add_active_barriers() to point to the * request that will eventually release them. */ - spin_lock_irqsave_nested(&ref->tree_lock, flags, SINGLE_DEPTH_NESTING); llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) { struct active_node *node = barrier_from_ll(pos); struct intel_engine_cs *engine = barrier_to_engine(node); struct rb_node **p, *parent; + spin_lock_irqsave_nested(&ref->tree_lock, flags, + SINGLE_DEPTH_NESTING); parent = NULL; p = &ref->tree.rb_node; while (*p) { @@ -693,12 +694,12 @@ void i915_active_acquire_barrier(struct i915_active *ref) } rb_link_node(&node->node, parent, p); rb_insert_color(&node->node, &ref->tree); + spin_unlock_irqrestore(&ref->tree_lock, flags); GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); llist_add(barrier_to_ll(node), &engine->barrier_tasks); intel_engine_pm_put(engine); } - spin_unlock_irqrestore(&ref->tree_lock, flags); } void i915_request_add_active_barriers(struct i915_request *rq) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b9eb6b3149b7..d034fa413164 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -45,6 +45,7 @@ #include "gem/i915_gem_context.h" #include "gem/i915_gem_ioctls.h" #include "gem/i915_gem_pm.h" +#include "gt/intel_context.h" #include "gt/intel_engine_user.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" @@ -1053,6 +1054,18 @@ out: return err; } +static int __intel_context_flush_retire(struct intel_context *ce) +{ + struct intel_timeline *tl; + + tl = intel_context_timeline_lock(ce); + if (IS_ERR(tl)) + return PTR_ERR(tl); + + intel_context_timeline_unlock(tl); + return 0; +} + static int __intel_engines_record_defaults(struct intel_gt *gt) { struct i915_request *requests[I915_NUM_ENGINES] = {}; @@ -1121,13 +1134,20 @@ err_rq: if (!rq) continue; - /* We want to be able to unbind the state from the GGTT */ - GEM_BUG_ON(intel_context_is_pinned(rq->hw_context)); - + GEM_BUG_ON(!test_bit(CONTEXT_ALLOC_BIT, + &rq->hw_context->flags)); state = rq->hw_context->state; if (!state) continue; + /* Serialise with retirement on another CPU */ + err = __intel_context_flush_retire(rq->hw_context); + if (err) + goto out; + + /* We want to be able to unbind the state from the GGTT */ + GEM_BUG_ON(intel_context_is_pinned(rq->hw_context)); + /* * As we will hold a reference to the logical state, it will * not be torn down with the context, and importantly the diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 65d7c2e599de..2ae14bc14931 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -2078,20 +2078,12 @@ gen8_update_reg_state_unlocked(const struct intel_context *ce, u32 *reg_state = ce->lrc_reg_state; int i; - if (IS_GEN(stream->perf->i915, 12)) { - u32 format = stream->oa_buffer.format; + reg_state[ctx_oactxctrl + 1] = + (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | + (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | + GEN8_OA_COUNTER_RESUME; - reg_state[ctx_oactxctrl + 1] = - (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) | - (stream->oa_config ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0); - } else { - reg_state[ctx_oactxctrl + 1] = - (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | - (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | - GEN8_OA_COUNTER_RESUME; - } - - for (i = 0; !!ctx_flexeu0 && i < ARRAY_SIZE(flex_regs); i++) + for (i = 0; i < ARRAY_SIZE(flex_regs); i++) reg_state[ctx_flexeu0 + i * 2 + 1] = oa_config_flex_reg(stream->oa_config, flex_regs[i]); @@ -2224,34 +2216,51 @@ static int gen8_configure_context(struct i915_gem_context *ctx, return err; } -static int gen12_emit_oar_config(struct intel_context *ce, bool enable) +static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool enable) { - struct i915_request *rq; - u32 *cs; - int err = 0; - - rq = i915_request_create(ce); - if (IS_ERR(rq)) - return PTR_ERR(rq); - - cs = intel_ring_begin(rq, 4); - if (IS_ERR(cs)) { - err = PTR_ERR(cs); - goto out; - } - - *cs++ = MI_LOAD_REGISTER_IMM(1); - *cs++ = i915_mmio_reg_offset(RING_CONTEXT_CONTROL(ce->engine->mmio_base)); - *cs++ = _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE, - enable ? GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : 0); - *cs++ = MI_NOOP; + int err; + struct intel_context *ce = stream->pinned_ctx; + u32 format = stream->oa_buffer.format; + struct flex regs_context[] = { + { + GEN8_OACTXCONTROL, + stream->perf->ctx_oactxctrl_offset + 1, + enable ? GEN8_OA_COUNTER_RESUME : 0, + }, + }; + /* Offsets in regs_lri are not used since this configuration is only + * applied using LRI. Initialize the correct offsets for posterity. + */ +#define GEN12_OAR_OACONTROL_OFFSET 0x5B0 + struct flex regs_lri[] = { + { + GEN12_OAR_OACONTROL, + GEN12_OAR_OACONTROL_OFFSET + 1, + (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) | + (enable ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0) + }, + { + RING_CONTEXT_CONTROL(ce->engine->mmio_base), + CTX_CONTEXT_CONTROL, + _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE, + enable ? + GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : + 0) + }, + }; - intel_ring_advance(rq, cs); + /* Modify the context image of pinned context with regs_context*/ + err = intel_context_lock_pinned(ce); + if (err) + return err; -out: - i915_request_add(rq); + err = gen8_modify_context(ce, regs_context, ARRAY_SIZE(regs_context)); + intel_context_unlock_pinned(ce); + if (err) + return err; - return err; + /* Apply regs_lri using LRI with pinned context */ + return gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri)); } /* @@ -2277,53 +2286,16 @@ out: * per-context OA state. * * Note: it's only the RCS/Render context that has any OA state. + * Note: the first flex register passed must always be R_PWR_CLK_STATE */ -static int lrc_configure_all_contexts(struct i915_perf_stream *stream, - const struct i915_oa_config *oa_config) +static int oa_configure_all_contexts(struct i915_perf_stream *stream, + struct flex *regs, + size_t num_regs) { struct drm_i915_private *i915 = stream->perf->i915; - /* The MMIO offsets for Flex EU registers aren't contiguous */ - const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset; -#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1) - struct flex regs[] = { - { - GEN8_R_PWR_CLK_STATE, - CTX_R_PWR_CLK_STATE, - }, - { - IS_GEN(i915, 12) ? - GEN12_OAR_OACONTROL : GEN8_OACTXCONTROL, - stream->perf->ctx_oactxctrl_offset + 1, - }, - { EU_PERF_CNTL0, ctx_flexeuN(0) }, - { EU_PERF_CNTL1, ctx_flexeuN(1) }, - { EU_PERF_CNTL2, ctx_flexeuN(2) }, - { EU_PERF_CNTL3, ctx_flexeuN(3) }, - { EU_PERF_CNTL4, ctx_flexeuN(4) }, - { EU_PERF_CNTL5, ctx_flexeuN(5) }, - { EU_PERF_CNTL6, ctx_flexeuN(6) }, - }; -#undef ctx_flexeuN struct intel_engine_cs *engine; struct i915_gem_context *ctx, *cn; - size_t array_size = IS_GEN(i915, 12) ? 2 : ARRAY_SIZE(regs); - int i, err; - - if (IS_GEN(i915, 12)) { - u32 format = stream->oa_buffer.format; - - regs[1].value = - (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) | - (oa_config ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0); - } else { - regs[1].value = - (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | - (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | - GEN8_OA_COUNTER_RESUME; - } - - for (i = 2; !!ctx_flexeu0 && i < array_size; i++) - regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg); + int err; lockdep_assert_held(&stream->perf->lock); @@ -2353,7 +2325,7 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream, spin_unlock(&i915->gem.contexts.lock); - err = gen8_configure_context(ctx, regs, array_size); + err = gen8_configure_context(ctx, regs, num_regs); if (err) { i915_gem_context_put(ctx); return err; @@ -2378,7 +2350,7 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream, regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu); - err = gen8_modify_self(ce, regs, array_size); + err = gen8_modify_self(ce, regs, num_regs); if (err) return err; } @@ -2386,6 +2358,56 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream, return 0; } +static int gen12_configure_all_contexts(struct i915_perf_stream *stream, + const struct i915_oa_config *oa_config) +{ + struct flex regs[] = { + { + GEN8_R_PWR_CLK_STATE, + CTX_R_PWR_CLK_STATE, + }, + }; + + return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs)); +} + +static int lrc_configure_all_contexts(struct i915_perf_stream *stream, + const struct i915_oa_config *oa_config) +{ + /* The MMIO offsets for Flex EU registers aren't contiguous */ + const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset; +#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1) + struct flex regs[] = { + { + GEN8_R_PWR_CLK_STATE, + CTX_R_PWR_CLK_STATE, + }, + { + GEN8_OACTXCONTROL, + stream->perf->ctx_oactxctrl_offset + 1, + }, + { EU_PERF_CNTL0, ctx_flexeuN(0) }, + { EU_PERF_CNTL1, ctx_flexeuN(1) }, + { EU_PERF_CNTL2, ctx_flexeuN(2) }, + { EU_PERF_CNTL3, ctx_flexeuN(3) }, + { EU_PERF_CNTL4, ctx_flexeuN(4) }, + { EU_PERF_CNTL5, ctx_flexeuN(5) }, + { EU_PERF_CNTL6, ctx_flexeuN(6) }, + }; +#undef ctx_flexeuN + int i; + + regs[1].value = + (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | + (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | + GEN8_OA_COUNTER_RESUME; + + for (i = 2; i < ARRAY_SIZE(regs); i++) + regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg); + + return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs)); +} + static int gen8_enable_metric_set(struct i915_perf_stream *stream) { struct intel_uncore *uncore = stream->uncore; @@ -2464,7 +2486,7 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream) * to make sure all slices/subslices are ON before writing to NOA * registers. */ - ret = lrc_configure_all_contexts(stream, oa_config); + ret = gen12_configure_all_contexts(stream, oa_config); if (ret) return ret; @@ -2474,8 +2496,7 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream) * requested this. */ if (stream->ctx) { - ret = gen12_emit_oar_config(stream->pinned_ctx, - oa_config != NULL); + ret = gen12_configure_oar_context(stream, true); if (ret) return ret; } @@ -2509,11 +2530,11 @@ static void gen12_disable_metric_set(struct i915_perf_stream *stream) struct intel_uncore *uncore = stream->uncore; /* Reset all contexts' slices/subslices configurations. */ - lrc_configure_all_contexts(stream, NULL); + gen12_configure_all_contexts(stream, NULL); /* disable the context save/restore or OAR counters */ if (stream->ctx) - gen12_emit_oar_config(stream->pinned_ctx, false); + gen12_configure_oar_context(stream, false); /* Make sure we disable noa to save power. */ intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0); @@ -2713,7 +2734,8 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, return -EINVAL; } - if (!(props->sample_flags & SAMPLE_OA_REPORT)) { + if (!(props->sample_flags & SAMPLE_OA_REPORT) && + (INTEL_GEN(perf->i915) < 12 || !stream->ctx)) { DRM_DEBUG("Only OA report sampling supported\n"); return -EINVAL; } @@ -2745,7 +2767,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, format_size = perf->oa_formats[props->oa_format].size; - stream->sample_flags |= SAMPLE_OA_REPORT; + stream->sample_flags = props->sample_flags; stream->sample_size += format_size; stream->oa_buffer.format_size = format_size; @@ -2854,7 +2876,11 @@ void i915_oa_init_reg_state(const struct intel_context *ce, return; stream = engine->i915->perf.exclusive_stream; - if (stream) + /* + * For gen12, only CTX_R_PWR_CLK_STATE needs update, but the caller + * is already doing that, so nothing to be done for gen12 here. + */ + if (stream && INTEL_GEN(stream->perf->i915) < 12) gen8_update_reg_state_unlocked(ce, stream); } diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 0d40dccd1409..2814218c5ba1 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -190,7 +190,7 @@ static u64 get_rc6(struct intel_gt *gt) val = 0; if (intel_gt_pm_get_if_awake(gt)) { val = __get_rc6(gt); - intel_gt_pm_put(gt); + intel_gt_pm_put_async(gt); } spin_lock_irqsave(&pmu->lock, flags); @@ -343,7 +343,7 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns) skip: spin_unlock_irqrestore(&engine->uncore->lock, flags); - intel_engine_pm_put(engine); + intel_engine_pm_put_async(engine); } } @@ -368,7 +368,7 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns) if (intel_gt_pm_get_if_awake(gt)) { val = intel_uncore_read_notrace(uncore, GEN6_RPSTAT1); val = intel_get_cagf(rps, val); - intel_gt_pm_put(gt); + intel_gt_pm_put_async(gt); } add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT], diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c index c27cfef9281c..ef25ce6e395e 100644 --- a/drivers/gpu/drm/i915/i915_query.c +++ b/drivers/gpu/drm/i915/i915_query.c @@ -103,15 +103,18 @@ query_engine_info(struct drm_i915_private *i915, struct drm_i915_engine_info __user *info_ptr; struct drm_i915_query_engine_info query; struct drm_i915_engine_info info = { }; + unsigned int num_uabi_engines = 0; struct intel_engine_cs *engine; int len, ret; if (query_item->flags) return -EINVAL; + for_each_uabi_engine(engine, i915) + num_uabi_engines++; + len = sizeof(struct drm_i915_query_engine_info) + - RUNTIME_INFO(i915)->num_engines * - sizeof(struct drm_i915_engine_info); + num_uabi_engines * sizeof(struct drm_i915_engine_info); ret = copy_query_item(&query, sizeof(query), len, query_item); if (ret != 0) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 73079b503724..4fd3d76db346 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -9405,11 +9405,9 @@ enum skl_power_gate { #define _ICL_AUX_REG_IDX(pw_idx) ((pw_idx) - ICL_PW_CTL_IDX_AUX_A) #define _ICL_AUX_ANAOVRD1_A 0x162398 #define _ICL_AUX_ANAOVRD1_B 0x6C398 -#define _TGL_AUX_ANAOVRD1_C 0x160398 #define ICL_AUX_ANAOVRD1(pw_idx) _MMIO(_PICK(_ICL_AUX_REG_IDX(pw_idx), \ _ICL_AUX_ANAOVRD1_A, \ - _ICL_AUX_ANAOVRD1_B, \ - _TGL_AUX_ANAOVRD1_C)) + _ICL_AUX_ANAOVRD1_B)) #define ICL_AUX_ANAOVRD1_LDO_BYPASS (1 << 7) #define ICL_AUX_ANAOVRD1_ENABLE (1 << 0) @@ -11994,7 +11992,7 @@ enum skl_power_gate { /* This register controls the Display State Buffer (DSB) engines. */ #define _DSBSL_INSTANCE_BASE 0x70B00 #define DSBSL_INSTANCE(pipe, id) (_DSBSL_INSTANCE_BASE + \ - (pipe) * 0x1000 + (id) * 100) + (pipe) * 0x1000 + (id) * 0x100) #define DSB_HEAD(pipe, id) _MMIO(DSBSL_INSTANCE(pipe, id) + 0x0) #define DSB_TAIL(pipe, id) _MMIO(DSBSL_INSTANCE(pipe, id) + 0x4) #define DSB_CTRL(pipe, id) _MMIO(DSBSL_INSTANCE(pipe, id) + 0x8) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index bbd71af00a91..765bec89fc0d 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -300,11 +300,11 @@ void i915_request_retire_upto(struct i915_request *rq) } static int -__i915_request_await_execution(struct i915_request *rq, - struct i915_request *signal, - void (*hook)(struct i915_request *rq, - struct dma_fence *signal), - gfp_t gfp) +__await_execution(struct i915_request *rq, + struct i915_request *signal, + void (*hook)(struct i915_request *rq, + struct dma_fence *signal), + gfp_t gfp) { struct execute_cb *cb; @@ -341,6 +341,8 @@ __i915_request_await_execution(struct i915_request *rq, } spin_unlock_irq(&signal->lock); + /* Copy across semaphore status as we need the same behaviour */ + rq->sched.flags |= signal->sched.flags; return 0; } @@ -811,31 +813,21 @@ already_busywaiting(struct i915_request *rq) } static int -emit_semaphore_wait(struct i915_request *to, - struct i915_request *from, - gfp_t gfp) +__emit_semaphore_wait(struct i915_request *to, + struct i915_request *from, + u32 seqno) { const int has_token = INTEL_GEN(to->i915) >= 12; u32 hwsp_offset; - int len; + int len, err; u32 *cs; GEM_BUG_ON(INTEL_GEN(to->i915) < 8); - /* Just emit the first semaphore we see as request space is limited. */ - if (already_busywaiting(to) & from->engine->mask) - goto await_fence; - - if (i915_request_await_start(to, from) < 0) - goto await_fence; - - /* Only submit our spinner after the signaler is running! */ - if (__i915_request_await_execution(to, from, NULL, gfp)) - goto await_fence; - /* We need to pin the signaler's HWSP until we are finished reading. */ - if (intel_timeline_read_hwsp(from, to, &hwsp_offset)) - goto await_fence; + err = intel_timeline_read_hwsp(from, to, &hwsp_offset); + if (err) + return err; len = 4; if (has_token) @@ -858,7 +850,7 @@ emit_semaphore_wait(struct i915_request *to, MI_SEMAPHORE_POLL | MI_SEMAPHORE_SAD_GTE_SDD) + has_token; - *cs++ = from->fence.seqno; + *cs++ = seqno; *cs++ = hwsp_offset; *cs++ = 0; if (has_token) { @@ -867,6 +859,28 @@ emit_semaphore_wait(struct i915_request *to, } intel_ring_advance(to, cs); + return 0; +} + +static int +emit_semaphore_wait(struct i915_request *to, + struct i915_request *from, + gfp_t gfp) +{ + /* Just emit the first semaphore we see as request space is limited. */ + if (already_busywaiting(to) & from->engine->mask) + goto await_fence; + + if (i915_request_await_start(to, from) < 0) + goto await_fence; + + /* Only submit our spinner after the signaler is running! */ + if (__await_execution(to, from, NULL, gfp)) + goto await_fence; + + if (__emit_semaphore_wait(to, from, from->fence.seqno)) + goto await_fence; + to->sched.semaphores |= from->engine->mask; to->sched.flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN; return 0; @@ -980,6 +994,57 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) return 0; } +static bool intel_timeline_sync_has_start(struct intel_timeline *tl, + struct dma_fence *fence) +{ + return __intel_timeline_sync_is_later(tl, + fence->context, + fence->seqno - 1); +} + +static int intel_timeline_sync_set_start(struct intel_timeline *tl, + const struct dma_fence *fence) +{ + return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1); +} + +static int +__i915_request_await_execution(struct i915_request *to, + struct i915_request *from, + void (*hook)(struct i915_request *rq, + struct dma_fence *signal)) +{ + int err; + + /* Submit both requests at the same time */ + err = __await_execution(to, from, hook, I915_FENCE_GFP); + if (err) + return err; + + /* Squash repeated depenendices to the same timelines */ + if (intel_timeline_sync_has_start(i915_request_timeline(to), + &from->fence)) + return 0; + + /* Ensure both start together [after all semaphores in signal] */ + if (intel_engine_has_semaphores(to->engine)) + err = __emit_semaphore_wait(to, from, from->fence.seqno - 1); + else + err = i915_request_await_start(to, from); + if (err < 0) + return err; + + /* Couple the dependency tree for PI on this exposed to->fence */ + if (to->engine->schedule) { + err = i915_sched_node_add_dependency(&to->sched, &from->sched); + if (err < 0) + return err; + } + + return intel_timeline_sync_set_start(i915_request_timeline(to), + &from->fence); +} + int i915_request_await_execution(struct i915_request *rq, struct dma_fence *fence, @@ -1013,8 +1078,7 @@ i915_request_await_execution(struct i915_request *rq, if (dma_fence_is_i915(fence)) ret = __i915_request_await_execution(rq, to_request(fence), - hook, - I915_FENCE_GFP); + hook); else ret = i915_sw_fence_await_dma_fence(&rq->submit, fence, I915_FENCE_TIMEOUT, diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 010d67f48ad9..247a9671bca5 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -474,7 +474,6 @@ void i915_sched_node_fini(struct i915_sched_node *node) * so we may be called out-of-order. */ list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) { - GEM_BUG_ON(!node_signaled(dep->signaler)); GEM_BUG_ON(!list_empty(&dep->dfs_link)); list_del(&dep->wait_link); diff --git a/drivers/gpu/drm/i915/i915_sw_fence_work.c b/drivers/gpu/drm/i915/i915_sw_fence_work.c index 07552cd544f2..8538ee7a521d 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence_work.c +++ b/drivers/gpu/drm/i915/i915_sw_fence_work.c @@ -78,12 +78,11 @@ static const struct dma_fence_ops fence_ops = { void dma_fence_work_init(struct dma_fence_work *f, const struct dma_fence_work_ops *ops) { + f->ops = ops; spin_lock_init(&f->lock); dma_fence_init(&f->dma, &fence_ops, &f->lock, 0, 0); i915_sw_fence_init(&f->chain, fence_notify); INIT_WORK(&f->work, fence_work); - - f->ops = ops; } int dma_fence_work_chain(struct dma_fence_work *f, struct dma_fence *signal) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 809bff955b5a..75ae6f495161 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4291,8 +4291,8 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *crtc_state, &crtc_state->wm.skl.optimal.planes[plane_id]; if (plane_id == PLANE_CURSOR) { - if (WARN_ON(wm->wm[level].min_ddb_alloc > - total[PLANE_CURSOR])) { + if (wm->wm[level].min_ddb_alloc > total[PLANE_CURSOR]) { + WARN_ON(wm->wm[level].min_ddb_alloc != U16_MAX); blocks = U32_MAX; break; } diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c index 868cc78048d0..59aa1b6f1827 100644 --- a/drivers/gpu/drm/i915/intel_wakeref.c +++ b/drivers/gpu/drm/i915/intel_wakeref.c @@ -54,7 +54,8 @@ int __intel_wakeref_get_first(struct intel_wakeref *wf) static void ____intel_wakeref_put_last(struct intel_wakeref *wf) { - if (!atomic_dec_and_test(&wf->count)) + INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count) <= 0); + if (unlikely(!atomic_dec_and_test(&wf->count))) goto unlock; /* ops->put() must reschedule its own release on error/deferral */ @@ -67,13 +68,12 @@ unlock: mutex_unlock(&wf->mutex); } -void __intel_wakeref_put_last(struct intel_wakeref *wf) +void __intel_wakeref_put_last(struct intel_wakeref *wf, unsigned long flags) { INTEL_WAKEREF_BUG_ON(work_pending(&wf->work)); /* Assume we are not in process context and so cannot sleep. */ - if (wf->ops->flags & INTEL_WAKEREF_PUT_ASYNC || - !mutex_trylock(&wf->mutex)) { + if (flags & INTEL_WAKEREF_PUT_ASYNC || !mutex_trylock(&wf->mutex)) { schedule_work(&wf->work); return; } @@ -109,8 +109,17 @@ void __intel_wakeref_init(struct intel_wakeref *wf, int intel_wakeref_wait_for_idle(struct intel_wakeref *wf) { - return wait_var_event_killable(&wf->wakeref, - !intel_wakeref_is_active(wf)); + int err; + + might_sleep(); + + err = wait_var_event_killable(&wf->wakeref, + !intel_wakeref_is_active(wf)); + if (err) + return err; + + intel_wakeref_unlock_wait(wf); + return 0; } static void wakeref_auto_timeout(struct timer_list *t) diff --git a/drivers/gpu/drm/i915/intel_wakeref.h b/drivers/gpu/drm/i915/intel_wakeref.h index 5f0c972a80fb..da6e8fd506e6 100644 --- a/drivers/gpu/drm/i915/intel_wakeref.h +++ b/drivers/gpu/drm/i915/intel_wakeref.h @@ -9,6 +9,7 @@ #include <linux/atomic.h> #include <linux/bits.h> +#include <linux/lockdep.h> #include <linux/mutex.h> #include <linux/refcount.h> #include <linux/stackdepot.h> @@ -29,9 +30,6 @@ typedef depot_stack_handle_t intel_wakeref_t; struct intel_wakeref_ops { int (*get)(struct intel_wakeref *wf); int (*put)(struct intel_wakeref *wf); - - unsigned long flags; -#define INTEL_WAKEREF_PUT_ASYNC BIT(0) }; struct intel_wakeref { @@ -57,7 +55,7 @@ void __intel_wakeref_init(struct intel_wakeref *wf, } while (0) int __intel_wakeref_get_first(struct intel_wakeref *wf); -void __intel_wakeref_put_last(struct intel_wakeref *wf); +void __intel_wakeref_put_last(struct intel_wakeref *wf, unsigned long flags); /** * intel_wakeref_get: Acquire the wakeref @@ -100,10 +98,9 @@ intel_wakeref_get_if_active(struct intel_wakeref *wf) } /** - * intel_wakeref_put: Release the wakeref - * @i915: the drm_i915_private device + * intel_wakeref_put_flags: Release the wakeref * @wf: the wakeref - * @fn: callback for releasing the wakeref, called only on final release. + * @flags: control flags * * Release our hold on the wakeref. When there are no more users, * the runtime pm wakeref will be released after the @fn callback is called @@ -116,11 +113,25 @@ intel_wakeref_get_if_active(struct intel_wakeref *wf) * code otherwise. */ static inline void -intel_wakeref_put(struct intel_wakeref *wf) +__intel_wakeref_put(struct intel_wakeref *wf, unsigned long flags) +#define INTEL_WAKEREF_PUT_ASYNC BIT(0) { INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count) <= 0); if (unlikely(!atomic_add_unless(&wf->count, -1, 1))) - __intel_wakeref_put_last(wf); + __intel_wakeref_put_last(wf, flags); +} + +static inline void +intel_wakeref_put(struct intel_wakeref *wf) +{ + might_sleep(); + __intel_wakeref_put(wf, 0); +} + +static inline void +intel_wakeref_put_async(struct intel_wakeref *wf) +{ + __intel_wakeref_put(wf, INTEL_WAKEREF_PUT_ASYNC); } /** @@ -152,6 +163,21 @@ intel_wakeref_unlock(struct intel_wakeref *wf) } /** + * intel_wakeref_unlock_wait: Wait until the active callback is complete + * @wf: the wakeref + * + * Waits for the active callback (under the @wf->mutex or another CPU) is + * complete. + */ +static inline void +intel_wakeref_unlock_wait(struct intel_wakeref *wf) +{ + mutex_lock(&wf->mutex); + mutex_unlock(&wf->mutex); + flush_work(&wf->work); +} + +/** * intel_wakeref_is_active: Query whether the wakeref is currently held * @wf: the wakeref * @@ -170,6 +196,7 @@ intel_wakeref_is_active(const struct intel_wakeref *wf) static inline void __intel_wakeref_defer_park(struct intel_wakeref *wf) { + lockdep_assert_held(&wf->mutex); INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count)); atomic_set_release(&wf->count, 1); } diff --git a/drivers/gpu/drm/mcde/mcde_dsi.c b/drivers/gpu/drm/mcde/mcde_dsi.c index d6214d3c8b33..ef4c630afe3f 100644 --- a/drivers/gpu/drm/mcde/mcde_dsi.c +++ b/drivers/gpu/drm/mcde/mcde_dsi.c @@ -935,11 +935,13 @@ static int mcde_dsi_bind(struct device *dev, struct device *master, for_each_available_child_of_node(dev->of_node, child) { panel = of_drm_find_panel(child); if (IS_ERR(panel)) { - dev_err(dev, "failed to find panel try bridge (%lu)\n", + dev_err(dev, "failed to find panel try bridge (%ld)\n", PTR_ERR(panel)); + panel = NULL; + bridge = of_drm_find_bridge(child); if (IS_ERR(bridge)) { - dev_err(dev, "failed to find bridge (%lu)\n", + dev_err(dev, "failed to find bridge (%ld)\n", PTR_ERR(bridge)); return PTR_ERR(bridge); } diff --git a/drivers/gpu/drm/meson/meson_venc_cvbs.c b/drivers/gpu/drm/meson/meson_venc_cvbs.c index 9ab27aecfcf3..1bd6b6d15ffb 100644 --- a/drivers/gpu/drm/meson/meson_venc_cvbs.c +++ b/drivers/gpu/drm/meson/meson_venc_cvbs.c @@ -64,6 +64,25 @@ struct meson_cvbs_mode meson_cvbs_modes[MESON_CVBS_MODES_COUNT] = { }, }; +static const struct meson_cvbs_mode * +meson_cvbs_get_mode(const struct drm_display_mode *req_mode) +{ + int i; + + for (i = 0; i < MESON_CVBS_MODES_COUNT; ++i) { + struct meson_cvbs_mode *meson_mode = &meson_cvbs_modes[i]; + + if (drm_mode_match(req_mode, &meson_mode->mode, + DRM_MODE_MATCH_TIMINGS | + DRM_MODE_MATCH_CLOCK | + DRM_MODE_MATCH_FLAGS | + DRM_MODE_MATCH_3D_FLAGS)) + return meson_mode; + } + + return NULL; +} + /* Connector */ static void meson_cvbs_connector_destroy(struct drm_connector *connector) @@ -136,14 +155,8 @@ static int meson_venc_cvbs_encoder_atomic_check(struct drm_encoder *encoder, struct drm_crtc_state *crtc_state, struct drm_connector_state *conn_state) { - int i; - - for (i = 0; i < MESON_CVBS_MODES_COUNT; ++i) { - struct meson_cvbs_mode *meson_mode = &meson_cvbs_modes[i]; - - if (drm_mode_equal(&crtc_state->mode, &meson_mode->mode)) - return 0; - } + if (meson_cvbs_get_mode(&crtc_state->mode)) + return 0; return -EINVAL; } @@ -191,24 +204,17 @@ static void meson_venc_cvbs_encoder_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode) { + const struct meson_cvbs_mode *meson_mode = meson_cvbs_get_mode(mode); struct meson_venc_cvbs *meson_venc_cvbs = encoder_to_meson_venc_cvbs(encoder); struct meson_drm *priv = meson_venc_cvbs->priv; - int i; - for (i = 0; i < MESON_CVBS_MODES_COUNT; ++i) { - struct meson_cvbs_mode *meson_mode = &meson_cvbs_modes[i]; + if (meson_mode) { + meson_venci_cvbs_mode_set(priv, meson_mode->enci); - if (drm_mode_equal(mode, &meson_mode->mode)) { - meson_venci_cvbs_mode_set(priv, - meson_mode->enci); - - /* Setup 27MHz vclk2 for ENCI and VDAC */ - meson_vclk_setup(priv, MESON_VCLK_TARGET_CVBS, - MESON_VCLK_CVBS, MESON_VCLK_CVBS, - MESON_VCLK_CVBS, true); - break; - } + /* Setup 27MHz vclk2 for ENCI and VDAC */ + meson_vclk_setup(priv, MESON_VCLK_TARGET_CVBS, MESON_VCLK_CVBS, + MESON_VCLK_CVBS, MESON_VCLK_CVBS, true); } } diff --git a/drivers/gpu/drm/mgag200/mgag200_drv.c b/drivers/gpu/drm/mgag200/mgag200_drv.c index 397f8b0a9af8..b113876c2428 100644 --- a/drivers/gpu/drm/mgag200/mgag200_drv.c +++ b/drivers/gpu/drm/mgag200/mgag200_drv.c @@ -30,7 +30,8 @@ module_param_named(modeset, mgag200_modeset, int, 0400); static struct drm_driver driver; static const struct pci_device_id pciidlist[] = { - { PCI_VENDOR_ID_MATROX, 0x522, PCI_ANY_ID, PCI_ANY_ID, 0, 0, G200_SE_A }, + { PCI_VENDOR_ID_MATROX, 0x522, PCI_ANY_ID, PCI_ANY_ID, 0, 0, + G200_SE_A | MGAG200_FLAG_HW_BUG_NO_STARTADD}, { PCI_VENDOR_ID_MATROX, 0x524, PCI_ANY_ID, PCI_ANY_ID, 0, 0, G200_SE_B }, { PCI_VENDOR_ID_MATROX, 0x530, PCI_ANY_ID, PCI_ANY_ID, 0, 0, G200_EV }, { PCI_VENDOR_ID_MATROX, 0x532, PCI_ANY_ID, PCI_ANY_ID, 0, 0, G200_WB }, @@ -60,6 +61,35 @@ static void mga_pci_remove(struct pci_dev *pdev) DEFINE_DRM_GEM_FOPS(mgag200_driver_fops); +static bool mgag200_pin_bo_at_0(const struct mga_device *mdev) +{ + return mdev->flags & MGAG200_FLAG_HW_BUG_NO_STARTADD; +} + +int mgag200_driver_dumb_create(struct drm_file *file, + struct drm_device *dev, + struct drm_mode_create_dumb *args) +{ + struct mga_device *mdev = dev->dev_private; + unsigned long pg_align; + + if (WARN_ONCE(!dev->vram_mm, "VRAM MM not initialized")) + return -EINVAL; + + pg_align = 0ul; + + /* + * Aligning scanout buffers to the size of the video ram forces + * placement at offset 0. Works around a bug where HW does not + * respect 'startadd' field. + */ + if (mgag200_pin_bo_at_0(mdev)) + pg_align = PFN_UP(mdev->mc.vram_size); + + return drm_gem_vram_fill_create_dumb(file, dev, &dev->vram_mm->bdev, + pg_align, false, args); +} + static struct drm_driver driver = { .driver_features = DRIVER_GEM | DRIVER_MODESET, .load = mgag200_driver_load, @@ -71,7 +101,10 @@ static struct drm_driver driver = { .major = DRIVER_MAJOR, .minor = DRIVER_MINOR, .patchlevel = DRIVER_PATCHLEVEL, - DRM_GEM_VRAM_DRIVER + .debugfs_init = drm_vram_mm_debugfs_init, + .dumb_create = mgag200_driver_dumb_create, + .dumb_map_offset = drm_gem_vram_driver_dumb_mmap_offset, + .gem_prime_mmap = drm_gem_prime_mmap, }; static struct pci_driver mgag200_pci_driver = { diff --git a/drivers/gpu/drm/mgag200/mgag200_drv.h b/drivers/gpu/drm/mgag200/mgag200_drv.h index 0ea9a525e57d..aa32aad222c2 100644 --- a/drivers/gpu/drm/mgag200/mgag200_drv.h +++ b/drivers/gpu/drm/mgag200/mgag200_drv.h @@ -150,6 +150,12 @@ enum mga_type { G200_EW3, }; +/* HW does not handle 'startadd' field correct. */ +#define MGAG200_FLAG_HW_BUG_NO_STARTADD (1ul << 8) + +#define MGAG200_TYPE_MASK (0x000000ff) +#define MGAG200_FLAG_MASK (0x00ffff00) + #define IS_G200_SE(mdev) (mdev->type == G200_SE_A || mdev->type == G200_SE_B) struct mga_device { @@ -181,6 +187,18 @@ struct mga_device { u32 unique_rev_id; }; +static inline enum mga_type +mgag200_type_from_driver_data(kernel_ulong_t driver_data) +{ + return (enum mga_type)(driver_data & MGAG200_TYPE_MASK); +} + +static inline unsigned long +mgag200_flags_from_driver_data(kernel_ulong_t driver_data) +{ + return driver_data & MGAG200_FLAG_MASK; +} + /* mgag200_mode.c */ int mgag200_modeset_init(struct mga_device *mdev); void mgag200_modeset_fini(struct mga_device *mdev); diff --git a/drivers/gpu/drm/mgag200/mgag200_main.c b/drivers/gpu/drm/mgag200/mgag200_main.c index 5f74aabcd3df..e1bc5b0aa774 100644 --- a/drivers/gpu/drm/mgag200/mgag200_main.c +++ b/drivers/gpu/drm/mgag200/mgag200_main.c @@ -94,7 +94,8 @@ static int mgag200_device_init(struct drm_device *dev, struct mga_device *mdev = dev->dev_private; int ret, option; - mdev->type = flags; + mdev->flags = mgag200_flags_from_driver_data(flags); + mdev->type = mgag200_type_from_driver_data(flags); /* Hardcode the number of CRTCs to 1 */ mdev->num_crtc = 1; diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig index e9160ce39cbb..6deaa7d01654 100644 --- a/drivers/gpu/drm/msm/Kconfig +++ b/drivers/gpu/drm/msm/Kconfig @@ -7,6 +7,7 @@ config DRM_MSM depends on OF && COMMON_CLK depends on MMU depends on INTERCONNECT || !INTERCONNECT + depends on QCOM_OCMEM || QCOM_OCMEM=n select QCOM_MDT_LOADER if ARCH_QCOM select REGULATOR select DRM_KMS_HELPER diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c index 5f7e98028eaf..7ad14937fcdf 100644 --- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c @@ -6,10 +6,6 @@ * Copyright (c) 2014 The Linux Foundation. All rights reserved. */ -#ifdef CONFIG_MSM_OCMEM -# include <mach/ocmem.h> -#endif - #include "a3xx_gpu.h" #define A3XX_INT0_MASK \ @@ -195,9 +191,9 @@ static int a3xx_hw_init(struct msm_gpu *gpu) gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x00000000); /* Set the OCMEM base address for A330, etc */ - if (a3xx_gpu->ocmem_hdl) { + if (a3xx_gpu->ocmem.hdl) { gpu_write(gpu, REG_A3XX_RB_GMEM_BASE_ADDR, - (unsigned int)(a3xx_gpu->ocmem_base >> 14)); + (unsigned int)(a3xx_gpu->ocmem.base >> 14)); } /* Turn on performance counters: */ @@ -318,10 +314,7 @@ static void a3xx_destroy(struct msm_gpu *gpu) adreno_gpu_cleanup(adreno_gpu); -#ifdef CONFIG_MSM_OCMEM - if (a3xx_gpu->ocmem_base) - ocmem_free(OCMEM_GRAPHICS, a3xx_gpu->ocmem_hdl); -#endif + adreno_gpu_ocmem_cleanup(&a3xx_gpu->ocmem); kfree(a3xx_gpu); } @@ -494,17 +487,10 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev) /* if needed, allocate gmem: */ if (adreno_is_a330(adreno_gpu)) { -#ifdef CONFIG_MSM_OCMEM - /* TODO this is different/missing upstream: */ - struct ocmem_buf *ocmem_hdl = - ocmem_allocate(OCMEM_GRAPHICS, adreno_gpu->gmem); - - a3xx_gpu->ocmem_hdl = ocmem_hdl; - a3xx_gpu->ocmem_base = ocmem_hdl->addr; - adreno_gpu->gmem = ocmem_hdl->len; - DBG("using %dK of OCMEM at 0x%08x", adreno_gpu->gmem / 1024, - a3xx_gpu->ocmem_base); -#endif + ret = adreno_gpu_ocmem_init(&adreno_gpu->base.pdev->dev, + adreno_gpu, &a3xx_gpu->ocmem); + if (ret) + goto fail; } if (!gpu->aspace) { diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.h b/drivers/gpu/drm/msm/adreno/a3xx_gpu.h index 5dc33e5ea53b..c555fb13e0d7 100644 --- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.h @@ -19,8 +19,7 @@ struct a3xx_gpu { struct adreno_gpu base; /* if OCMEM is used for GMEM: */ - uint32_t ocmem_base; - void *ocmem_hdl; + struct adreno_ocmem ocmem; }; #define to_a3xx_gpu(x) container_of(x, struct a3xx_gpu, base) diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c index ab2b752566d8..b01388a9e89e 100644 --- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c @@ -2,9 +2,6 @@ /* Copyright (c) 2014 The Linux Foundation. All rights reserved. */ #include "a4xx_gpu.h" -#ifdef CONFIG_MSM_OCMEM -# include <soc/qcom/ocmem.h> -#endif #define A4XX_INT0_MASK \ (A4XX_INT0_RBBM_AHB_ERROR | \ @@ -188,7 +185,7 @@ static int a4xx_hw_init(struct msm_gpu *gpu) (1 << 30) | 0xFFFF); gpu_write(gpu, REG_A4XX_RB_GMEM_BASE_ADDR, - (unsigned int)(a4xx_gpu->ocmem_base >> 14)); + (unsigned int)(a4xx_gpu->ocmem.base >> 14)); /* Turn on performance counters: */ gpu_write(gpu, REG_A4XX_RBBM_PERFCTR_CTL, 0x01); @@ -318,10 +315,7 @@ static void a4xx_destroy(struct msm_gpu *gpu) adreno_gpu_cleanup(adreno_gpu); -#ifdef CONFIG_MSM_OCMEM - if (a4xx_gpu->ocmem_base) - ocmem_free(OCMEM_GRAPHICS, a4xx_gpu->ocmem_hdl); -#endif + adreno_gpu_ocmem_cleanup(&a4xx_gpu->ocmem); kfree(a4xx_gpu); } @@ -578,17 +572,10 @@ struct msm_gpu *a4xx_gpu_init(struct drm_device *dev) /* if needed, allocate gmem: */ if (adreno_is_a4xx(adreno_gpu)) { -#ifdef CONFIG_MSM_OCMEM - /* TODO this is different/missing upstream: */ - struct ocmem_buf *ocmem_hdl = - ocmem_allocate(OCMEM_GRAPHICS, adreno_gpu->gmem); - - a4xx_gpu->ocmem_hdl = ocmem_hdl; - a4xx_gpu->ocmem_base = ocmem_hdl->addr; - adreno_gpu->gmem = ocmem_hdl->len; - DBG("using %dK of OCMEM at 0x%08x", adreno_gpu->gmem / 1024, - a4xx_gpu->ocmem_base); -#endif + ret = adreno_gpu_ocmem_init(dev->dev, adreno_gpu, + &a4xx_gpu->ocmem); + if (ret) + goto fail; } if (!gpu->aspace) { diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.h b/drivers/gpu/drm/msm/adreno/a4xx_gpu.h index d506311ee240..a01448cba2ea 100644 --- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.h @@ -16,8 +16,7 @@ struct a4xx_gpu { struct adreno_gpu base; /* if OCMEM is used for GMEM: */ - uint32_t ocmem_base; - void *ocmem_hdl; + struct adreno_ocmem ocmem; }; #define to_a4xx_gpu(x) container_of(x, struct a4xx_gpu, base) diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index e9c55d1d6c04..b02e2042547f 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -353,6 +353,9 @@ static int a5xx_me_init(struct msm_gpu *gpu) * 2D mode 3 draw */ OUT_RING(ring, 0x0000000B); + } else if (adreno_is_a510(adreno_gpu)) { + /* Workaround for token and syncs */ + OUT_RING(ring, 0x00000001); } else { /* No workarounds enabled */ OUT_RING(ring, 0x00000000); @@ -568,15 +571,24 @@ static int a5xx_hw_init(struct msm_gpu *gpu) 0x00100000 + adreno_gpu->gmem - 1); gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000); - gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40); - if (adreno_is_a530(adreno_gpu)) - gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40); - if (adreno_is_a540(adreno_gpu)) - gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400); - gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060); - gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16); - - gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, (0x400 << 11 | 0x300 << 22)); + if (adreno_is_a510(adreno_gpu)) { + gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x20); + gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x20); + gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030); + gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A); + gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, + (0x200 << 11 | 0x200 << 22)); + } else { + gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40); + if (adreno_is_a530(adreno_gpu)) + gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40); + if (adreno_is_a540(adreno_gpu)) + gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400); + gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060); + gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16); + gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, + (0x400 << 11 | 0x300 << 22)); + } if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI) gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8)); @@ -589,6 +601,19 @@ static int a5xx_hw_init(struct msm_gpu *gpu) /* Enable ME/PFP split notification */ gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF); + /* + * In A5x, CCU can send context_done event of a particular context to + * UCHE which ultimately reaches CP even when there is valid + * transaction of that context inside CCU. This can let CP to program + * config registers, which will make the "valid transaction" inside + * CCU to be interpreted differently. This can cause gpu fault. This + * bug is fixed in latest A510 revision. To enable this bug fix - + * bit[11] of RB_DBG_ECO_CNTL need to be set to 0, default is 1 + * (disable). For older A510 version this bit is unused. + */ + if (adreno_is_a510(adreno_gpu)) + gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, (1 << 11), 0); + /* Enable HWCG */ a5xx_set_hwcg(gpu, true); @@ -635,7 +660,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu) /* UCHE */ gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16)); - if (adreno_is_a530(adreno_gpu)) + if (adreno_is_a530(adreno_gpu) || adreno_is_a510(adreno_gpu)) gpu_write(gpu, REG_A5XX_CP_PROTECT(17), ADRENO_PROTECT_RW(0x10000, 0x8000)); @@ -679,7 +704,8 @@ static int a5xx_hw_init(struct msm_gpu *gpu) a5xx_preempt_hw_init(gpu); - a5xx_gpmu_ucode_init(gpu); + if (!adreno_is_a510(adreno_gpu)) + a5xx_gpmu_ucode_init(gpu); ret = a5xx_ucode_init(gpu); if (ret) @@ -712,7 +738,8 @@ static int a5xx_hw_init(struct msm_gpu *gpu) } /* - * Try to load a zap shader into the secure world. If successful + * If the chip that we are using does support loading one, then + * try to load a zap shader into the secure world. If successful * we can use the CP to switch out of secure mode. If not then we * have no resource but to try to switch ourselves out manually. If we * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will @@ -1066,6 +1093,7 @@ static void a5xx_dump(struct msm_gpu *gpu) static int a5xx_pm_resume(struct msm_gpu *gpu) { + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); int ret; /* Turn on the core power */ @@ -1073,6 +1101,15 @@ static int a5xx_pm_resume(struct msm_gpu *gpu) if (ret) return ret; + if (adreno_is_a510(adreno_gpu)) { + /* Halt the sp_input_clk at HM level */ + gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0x00000055); + a5xx_set_hwcg(gpu, true); + /* Turn on sp_input_clk at HM level */ + gpu_rmw(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0xff, 0); + return 0; + } + /* Turn the RBCCU domain first to limit the chances of voltage droop */ gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000); @@ -1101,9 +1138,17 @@ static int a5xx_pm_resume(struct msm_gpu *gpu) static int a5xx_pm_suspend(struct msm_gpu *gpu) { + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + u32 mask = 0xf; + + /* A510 has 3 XIN ports in VBIF */ + if (adreno_is_a510(adreno_gpu)) + mask = 0x7; + /* Clear the VBIF pipe before shutting down */ - gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0xF); - spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) & 0xF) == 0xF); + gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, mask); + spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) & + mask) == mask); gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0); @@ -1289,7 +1334,7 @@ static void a5xx_gpu_state_destroy(struct kref *kref) kfree(a5xx_state); } -int a5xx_gpu_state_put(struct msm_gpu_state *state) +static int a5xx_gpu_state_put(struct msm_gpu_state *state) { if (IS_ERR_OR_NULL(state)) return 1; @@ -1299,8 +1344,8 @@ int a5xx_gpu_state_put(struct msm_gpu_state *state) #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP) -void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state, - struct drm_printer *p) +static void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state, + struct drm_printer *p) { int i, j; u32 pos = 0; diff --git a/drivers/gpu/drm/msm/adreno/a5xx_power.c b/drivers/gpu/drm/msm/adreno/a5xx_power.c index a3a06db675ba..321a8061fd32 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_power.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_power.c @@ -297,6 +297,10 @@ int a5xx_power_init(struct msm_gpu *gpu) struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); int ret; + /* Not all A5xx chips have a GPMU */ + if (adreno_is_a510(adreno_gpu)) + return 0; + /* Set up the limits management */ if (adreno_is_a530(adreno_gpu)) a530_lm_setup(gpu); @@ -326,6 +330,9 @@ void a5xx_gpmu_ucode_init(struct msm_gpu *gpu) unsigned int *data, *ptr, *cmds; unsigned int cmds_size; + if (adreno_is_a510(adreno_gpu)) + return; + if (a5xx_gpu->gpmu_bo) return; diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index 0888e0df660d..fbbdf86504f5 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -115,6 +115,21 @@ static const struct adreno_info gpulist[] = { .inactive_period = DRM_MSM_INACTIVE_PERIOD, .init = a4xx_gpu_init, }, { + .rev = ADRENO_REV(5, 1, 0, ANY_ID), + .revn = 510, + .name = "A510", + .fw = { + [ADRENO_FW_PM4] = "a530_pm4.fw", + [ADRENO_FW_PFP] = "a530_pfp.fw", + }, + .gmem = SZ_256K, + /* + * Increase inactive period to 250 to avoid bouncing + * the GDSC which appears to make it grumpy + */ + .inactive_period = 250, + .init = a5xx_gpu_init, + }, { .rev = ADRENO_REV(5, 3, 0, 2), .revn = 530, .name = "A530", diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 048c8be426f3..0783e4b5486a 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -14,6 +14,7 @@ #include <linux/pm_opp.h> #include <linux/slab.h> #include <linux/soc/qcom/mdt_loader.h> +#include <soc/qcom/ocmem.h> #include "adreno_gpu.h" #include "msm_gem.h" #include "msm_mmu.h" @@ -893,6 +894,45 @@ static int adreno_get_pwrlevels(struct device *dev, return 0; } +int adreno_gpu_ocmem_init(struct device *dev, struct adreno_gpu *adreno_gpu, + struct adreno_ocmem *adreno_ocmem) +{ + struct ocmem_buf *ocmem_hdl; + struct ocmem *ocmem; + + ocmem = of_get_ocmem(dev); + if (IS_ERR(ocmem)) { + if (PTR_ERR(ocmem) == -ENODEV) { + /* + * Return success since either the ocmem property was + * not specified in device tree, or ocmem support is + * not compiled into the kernel. + */ + return 0; + } + + return PTR_ERR(ocmem); + } + + ocmem_hdl = ocmem_allocate(ocmem, OCMEM_GRAPHICS, adreno_gpu->gmem); + if (IS_ERR(ocmem_hdl)) + return PTR_ERR(ocmem_hdl); + + adreno_ocmem->ocmem = ocmem; + adreno_ocmem->base = ocmem_hdl->addr; + adreno_ocmem->hdl = ocmem_hdl; + adreno_gpu->gmem = ocmem_hdl->len; + + return 0; +} + +void adreno_gpu_ocmem_cleanup(struct adreno_ocmem *adreno_ocmem) +{ + if (adreno_ocmem && adreno_ocmem->base) + ocmem_free(adreno_ocmem->ocmem, OCMEM_GRAPHICS, + adreno_ocmem->hdl); +} + int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, struct adreno_gpu *adreno_gpu, const struct adreno_gpu_funcs *funcs, int nr_rings) diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index c7441fb8313e..e71a7570ef72 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -126,6 +126,12 @@ struct adreno_gpu { }; #define to_adreno_gpu(x) container_of(x, struct adreno_gpu, base) +struct adreno_ocmem { + struct ocmem *ocmem; + unsigned long base; + void *hdl; +}; + /* platform config data (ie. from DT, or pdata) */ struct adreno_platform_config { struct adreno_rev rev; @@ -206,6 +212,11 @@ static inline int adreno_is_a430(struct adreno_gpu *gpu) return gpu->revn == 430; } +static inline int adreno_is_a510(struct adreno_gpu *gpu) +{ + return gpu->revn == 510; +} + static inline int adreno_is_a530(struct adreno_gpu *gpu) { return gpu->revn == 530; @@ -236,6 +247,10 @@ void adreno_dump(struct msm_gpu *gpu); void adreno_wait_ring(struct msm_ringbuffer *ring, uint32_t ndwords); struct msm_ringbuffer *adreno_active_ring(struct msm_gpu *gpu); +int adreno_gpu_ocmem_init(struct device *dev, struct adreno_gpu *adreno_gpu, + struct adreno_ocmem *ocmem); +void adreno_gpu_ocmem_cleanup(struct adreno_ocmem *ocmem); + int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, struct adreno_gpu *gpu, const struct adreno_gpu_funcs *funcs, int nr_rings); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_irq.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_irq.c index cdbea38b8697..f1bc6a1af7a7 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_irq.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_irq.c @@ -55,8 +55,7 @@ static void dpu_core_irq_callback_handler(void *arg, int irq_idx) int dpu_core_irq_idx_lookup(struct dpu_kms *dpu_kms, enum dpu_intr_type intr_type, u32 instance_idx) { - if (!dpu_kms || !dpu_kms->hw_intr || - !dpu_kms->hw_intr->ops.irq_idx_lookup) + if (!dpu_kms->hw_intr || !dpu_kms->hw_intr->ops.irq_idx_lookup) return -EINVAL; return dpu_kms->hw_intr->ops.irq_idx_lookup(intr_type, @@ -73,7 +72,7 @@ static int _dpu_core_irq_enable(struct dpu_kms *dpu_kms, int irq_idx) unsigned long irq_flags; int ret = 0, enable_count; - if (!dpu_kms || !dpu_kms->hw_intr || + if (!dpu_kms->hw_intr || !dpu_kms->irq_obj.enable_counts || !dpu_kms->irq_obj.irq_counts) { DPU_ERROR("invalid params\n"); @@ -114,7 +113,7 @@ int dpu_core_irq_enable(struct dpu_kms *dpu_kms, int *irq_idxs, u32 irq_count) { int i, ret = 0, counts; - if (!dpu_kms || !irq_idxs || !irq_count) { + if (!irq_idxs || !irq_count) { DPU_ERROR("invalid params\n"); return -EINVAL; } @@ -138,7 +137,7 @@ static int _dpu_core_irq_disable(struct dpu_kms *dpu_kms, int irq_idx) { int ret = 0, enable_count; - if (!dpu_kms || !dpu_kms->hw_intr || !dpu_kms->irq_obj.enable_counts) { + if (!dpu_kms->hw_intr || !dpu_kms->irq_obj.enable_counts) { DPU_ERROR("invalid params\n"); return -EINVAL; } @@ -169,7 +168,7 @@ int dpu_core_irq_disable(struct dpu_kms *dpu_kms, int *irq_idxs, u32 irq_count) { int i, ret = 0, counts; - if (!dpu_kms || !irq_idxs || !irq_count) { + if (!irq_idxs || !irq_count) { DPU_ERROR("invalid params\n"); return -EINVAL; } @@ -186,7 +185,7 @@ int dpu_core_irq_disable(struct dpu_kms *dpu_kms, int *irq_idxs, u32 irq_count) u32 dpu_core_irq_read(struct dpu_kms *dpu_kms, int irq_idx, bool clear) { - if (!dpu_kms || !dpu_kms->hw_intr || + if (!dpu_kms->hw_intr || !dpu_kms->hw_intr->ops.get_interrupt_status) return 0; @@ -205,7 +204,7 @@ int dpu_core_irq_register_callback(struct dpu_kms *dpu_kms, int irq_idx, { unsigned long irq_flags; - if (!dpu_kms || !dpu_kms->irq_obj.irq_cb_tbl) { + if (!dpu_kms->irq_obj.irq_cb_tbl) { DPU_ERROR("invalid params\n"); return -EINVAL; } @@ -240,7 +239,7 @@ int dpu_core_irq_unregister_callback(struct dpu_kms *dpu_kms, int irq_idx, { unsigned long irq_flags; - if (!dpu_kms || !dpu_kms->irq_obj.irq_cb_tbl) { + if (!dpu_kms->irq_obj.irq_cb_tbl) { DPU_ERROR("invalid params\n"); return -EINVAL; } @@ -274,8 +273,7 @@ int dpu_core_irq_unregister_callback(struct dpu_kms *dpu_kms, int irq_idx, static void dpu_clear_all_irqs(struct dpu_kms *dpu_kms) { - if (!dpu_kms || !dpu_kms->hw_intr || - !dpu_kms->hw_intr->ops.clear_all_irqs) + if (!dpu_kms->hw_intr || !dpu_kms->hw_intr->ops.clear_all_irqs) return; dpu_kms->hw_intr->ops.clear_all_irqs(dpu_kms->hw_intr); @@ -283,8 +281,7 @@ static void dpu_clear_all_irqs(struct dpu_kms *dpu_kms) static void dpu_disable_all_irqs(struct dpu_kms *dpu_kms) { - if (!dpu_kms || !dpu_kms->hw_intr || - !dpu_kms->hw_intr->ops.disable_all_irqs) + if (!dpu_kms->hw_intr || !dpu_kms->hw_intr->ops.disable_all_irqs) return; dpu_kms->hw_intr->ops.disable_all_irqs(dpu_kms->hw_intr); @@ -343,18 +340,8 @@ void dpu_debugfs_core_irq_init(struct dpu_kms *dpu_kms, void dpu_core_irq_preinstall(struct dpu_kms *dpu_kms) { - struct msm_drm_private *priv; int i; - if (!dpu_kms->dev) { - DPU_ERROR("invalid drm device\n"); - return; - } else if (!dpu_kms->dev->dev_private) { - DPU_ERROR("invalid device private\n"); - return; - } - priv = dpu_kms->dev->dev_private; - pm_runtime_get_sync(&dpu_kms->pdev->dev); dpu_clear_all_irqs(dpu_kms); dpu_disable_all_irqs(dpu_kms); @@ -379,18 +366,8 @@ void dpu_core_irq_preinstall(struct dpu_kms *dpu_kms) void dpu_core_irq_uninstall(struct dpu_kms *dpu_kms) { - struct msm_drm_private *priv; int i; - if (!dpu_kms->dev) { - DPU_ERROR("invalid drm device\n"); - return; - } else if (!dpu_kms->dev->dev_private) { - DPU_ERROR("invalid device private\n"); - return; - } - priv = dpu_kms->dev->dev_private; - pm_runtime_get_sync(&dpu_kms->pdev->dev); for (i = 0; i < dpu_kms->irq_obj.total_irqs; i++) if (atomic_read(&dpu_kms->irq_obj.enable_counts[i]) || diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c index 09a49b59bb5b..11f2bebe3869 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c @@ -32,18 +32,7 @@ enum dpu_perf_mode { static struct dpu_kms *_dpu_crtc_get_kms(struct drm_crtc *crtc) { struct msm_drm_private *priv; - - if (!crtc->dev || !crtc->dev->dev_private) { - DPU_ERROR("invalid device\n"); - return NULL; - } - priv = crtc->dev->dev_private; - if (!priv || !priv->kms) { - DPU_ERROR("invalid kms\n"); - return NULL; - } - return to_dpu_kms(priv->kms); } @@ -116,7 +105,7 @@ int dpu_core_perf_crtc_check(struct drm_crtc *crtc, } kms = _dpu_crtc_get_kms(crtc); - if (!kms || !kms->catalog) { + if (!kms->catalog) { DPU_ERROR("invalid parameters\n"); return 0; } @@ -215,7 +204,6 @@ static int _dpu_core_perf_crtc_update_bus(struct dpu_kms *kms, void dpu_core_perf_crtc_release_bw(struct drm_crtc *crtc) { struct dpu_crtc *dpu_crtc; - struct dpu_crtc_state *dpu_cstate; struct dpu_kms *kms; if (!crtc) { @@ -224,13 +212,12 @@ void dpu_core_perf_crtc_release_bw(struct drm_crtc *crtc) } kms = _dpu_crtc_get_kms(crtc); - if (!kms || !kms->catalog) { + if (!kms->catalog) { DPU_ERROR("invalid kms\n"); return; } dpu_crtc = to_dpu_crtc(crtc); - dpu_cstate = to_dpu_crtc_state(crtc->state); if (atomic_dec_return(&kms->bandwidth_ref) > 0) return; @@ -287,7 +274,6 @@ int dpu_core_perf_crtc_update(struct drm_crtc *crtc, u64 clk_rate = 0; struct dpu_crtc *dpu_crtc; struct dpu_crtc_state *dpu_cstate; - struct msm_drm_private *priv; struct dpu_kms *kms; int ret; @@ -297,11 +283,10 @@ int dpu_core_perf_crtc_update(struct drm_crtc *crtc, } kms = _dpu_crtc_get_kms(crtc); - if (!kms || !kms->catalog) { + if (!kms->catalog) { DPU_ERROR("invalid kms\n"); return -EINVAL; } - priv = kms->dev->dev_private; dpu_crtc = to_dpu_crtc(crtc); dpu_cstate = to_dpu_crtc_state(crtc->state); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c index ce59adff06aa..f197dce54576 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c @@ -266,11 +266,20 @@ enum dpu_intf_mode dpu_crtc_get_intf_mode(struct drm_crtc *crtc) { struct drm_encoder *encoder; - if (!crtc || !crtc->dev) { + if (!crtc) { DPU_ERROR("invalid crtc\n"); return INTF_MODE_NONE; } + /* + * TODO: This function is called from dpu debugfs and as part of atomic + * check. When called from debugfs, the crtc->mutex must be held to + * read crtc->state. However reading crtc->state from atomic check isn't + * allowed (unless you have a good reason, a big comment, and a deep + * understanding of how the atomic/modeset locks work (<- and this is + * probably not possible)). So we'll keep the WARN_ON here for now, but + * really we need to figure out a better way to track our operating mode + */ WARN_ON(!drm_modeset_is_locked(&crtc->mutex)); /* TODO: Returns the first INTF_MODE, could there be multiple values? */ @@ -694,7 +703,7 @@ static void dpu_crtc_disable(struct drm_crtc *crtc, unsigned long flags; bool release_bandwidth = false; - if (!crtc || !crtc->dev || !crtc->dev->dev_private || !crtc->state) { + if (!crtc || !crtc->state) { DPU_ERROR("invalid crtc\n"); return; } @@ -766,7 +775,7 @@ static void dpu_crtc_enable(struct drm_crtc *crtc, struct msm_drm_private *priv; bool request_bandwidth; - if (!crtc || !crtc->dev || !crtc->dev->dev_private) { + if (!crtc) { DPU_ERROR("invalid crtc\n"); return; } @@ -1288,13 +1297,8 @@ struct drm_crtc *dpu_crtc_init(struct drm_device *dev, struct drm_plane *plane, { struct drm_crtc *crtc = NULL; struct dpu_crtc *dpu_crtc = NULL; - struct msm_drm_private *priv = NULL; - struct dpu_kms *kms = NULL; int i; - priv = dev->dev_private; - kms = to_dpu_kms(priv->kms); - dpu_crtc = kzalloc(sizeof(*dpu_crtc), GFP_KERNEL); if (!dpu_crtc) return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index d82ea994063f..f96e142c4361 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -645,11 +645,6 @@ static void _dpu_encoder_update_vsync_source(struct dpu_encoder_virt *dpu_enc, priv = drm_enc->dev->dev_private; dpu_kms = to_dpu_kms(priv->kms); - if (!dpu_kms) { - DPU_ERROR("invalid dpu_kms\n"); - return; - } - hw_mdptop = dpu_kms->hw_mdp; if (!hw_mdptop) { DPU_ERROR("invalid mdptop\n"); @@ -735,8 +730,7 @@ static int dpu_encoder_resource_control(struct drm_encoder *drm_enc, struct msm_drm_private *priv; bool is_vid_mode = false; - if (!drm_enc || !drm_enc->dev || !drm_enc->dev->dev_private || - !drm_enc->crtc) { + if (!drm_enc || !drm_enc->dev || !drm_enc->crtc) { DPU_ERROR("invalid parameters\n"); return -EINVAL; } @@ -1092,17 +1086,13 @@ static void _dpu_encoder_virt_enable_helper(struct drm_encoder *drm_enc) struct msm_drm_private *priv; struct dpu_kms *dpu_kms; - if (!drm_enc || !drm_enc->dev || !drm_enc->dev->dev_private) { + if (!drm_enc || !drm_enc->dev) { DPU_ERROR("invalid parameters\n"); return; } priv = drm_enc->dev->dev_private; dpu_kms = to_dpu_kms(priv->kms); - if (!dpu_kms) { - DPU_ERROR("invalid dpu_kms\n"); - return; - } dpu_enc = to_dpu_encoder_virt(drm_enc); if (!dpu_enc || !dpu_enc->cur_master) { @@ -1184,7 +1174,6 @@ static void dpu_encoder_virt_disable(struct drm_encoder *drm_enc) struct dpu_encoder_virt *dpu_enc = NULL; struct msm_drm_private *priv; struct dpu_kms *dpu_kms; - struct drm_display_mode *mode; int i = 0; if (!drm_enc) { @@ -1193,9 +1182,6 @@ static void dpu_encoder_virt_disable(struct drm_encoder *drm_enc) } else if (!drm_enc->dev) { DPU_ERROR("invalid dev\n"); return; - } else if (!drm_enc->dev->dev_private) { - DPU_ERROR("invalid dev_private\n"); - return; } dpu_enc = to_dpu_encoder_virt(drm_enc); @@ -1204,8 +1190,6 @@ static void dpu_encoder_virt_disable(struct drm_encoder *drm_enc) mutex_lock(&dpu_enc->enc_lock); dpu_enc->enabled = false; - mode = &drm_enc->crtc->state->adjusted_mode; - priv = drm_enc->dev->dev_private; dpu_kms = to_dpu_kms(priv->kms); @@ -1734,8 +1718,7 @@ static void dpu_encoder_vsync_event_handler(struct timer_list *t) struct msm_drm_private *priv; struct msm_drm_thread *event_thread; - if (!drm_enc->dev || !drm_enc->dev->dev_private || - !drm_enc->crtc) { + if (!drm_enc->dev || !drm_enc->crtc) { DPU_ERROR("invalid parameters\n"); return; } @@ -1914,8 +1897,6 @@ static int _dpu_encoder_debugfs_status_open(struct inode *inode, static int _dpu_encoder_init_debugfs(struct drm_encoder *drm_enc) { struct dpu_encoder_virt *dpu_enc = to_dpu_encoder_virt(drm_enc); - struct msm_drm_private *priv; - struct dpu_kms *dpu_kms; int i; static const struct file_operations debugfs_status_fops = { @@ -1927,14 +1908,11 @@ static int _dpu_encoder_init_debugfs(struct drm_encoder *drm_enc) char name[DPU_NAME_SIZE]; - if (!drm_enc->dev || !drm_enc->dev->dev_private) { + if (!drm_enc->dev) { DPU_ERROR("invalid encoder or kms\n"); return -EINVAL; } - priv = drm_enc->dev->dev_private; - dpu_kms = to_dpu_kms(priv->kms); - snprintf(name, DPU_NAME_SIZE, "encoder%u", drm_enc->base.id); /* create overall sub-directory for the encoder */ @@ -2042,9 +2020,8 @@ static int dpu_encoder_setup_display(struct dpu_encoder_virt *dpu_enc, enum dpu_intf_type intf_type; struct dpu_enc_phys_init_params phys_params; - if (!dpu_enc || !dpu_kms) { - DPU_ERROR("invalid arg(s), enc %d kms %d\n", - dpu_enc != 0, dpu_kms != 0); + if (!dpu_enc) { + DPU_ERROR("invalid arg(s), enc %d\n", dpu_enc != 0); return -EINVAL; } @@ -2133,14 +2110,12 @@ static void dpu_encoder_frame_done_timeout(struct timer_list *t) struct dpu_encoder_virt *dpu_enc = from_timer(dpu_enc, t, frame_done_timer); struct drm_encoder *drm_enc = &dpu_enc->base; - struct msm_drm_private *priv; u32 event; - if (!drm_enc->dev || !drm_enc->dev->dev_private) { + if (!drm_enc->dev) { DPU_ERROR("invalid parameters\n"); return; } - priv = drm_enc->dev->dev_private; if (!dpu_enc->frame_busy_mask[0] || !dpu_enc->crtc_frame_event_cb) { DRM_DEBUG_KMS("id:%u invalid timeout frame_busy_mask=%lu\n", diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c index 2923b63d95fe..047960949fbb 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c @@ -124,13 +124,11 @@ static void dpu_encoder_phys_cmd_pp_rd_ptr_irq(void *arg, int irq_idx) static void dpu_encoder_phys_cmd_ctl_start_irq(void *arg, int irq_idx) { struct dpu_encoder_phys *phys_enc = arg; - struct dpu_encoder_phys_cmd *cmd_enc; if (!phys_enc || !phys_enc->hw_ctl) return; DPU_ATRACE_BEGIN("ctl_start_irq"); - cmd_enc = to_dpu_encoder_phys_cmd(phys_enc); atomic_add_unless(&phys_enc->pending_ctlstart_cnt, -1, 0); @@ -316,13 +314,9 @@ end: static void dpu_encoder_phys_cmd_irq_control(struct dpu_encoder_phys *phys_enc, bool enable) { - struct dpu_encoder_phys_cmd *cmd_enc; - if (!phys_enc) return; - cmd_enc = to_dpu_encoder_phys_cmd(phys_enc); - trace_dpu_enc_phys_cmd_irq_ctrl(DRMID(phys_enc->parent), phys_enc->hw_pp->idx - PINGPONG_0, enable, atomic_read(&phys_enc->vblank_refcount)); @@ -355,7 +349,6 @@ static void dpu_encoder_phys_cmd_tearcheck_config( struct drm_display_mode *mode; bool tc_enable = true; u32 vsync_hz; - struct msm_drm_private *priv; struct dpu_kms *dpu_kms; if (!phys_enc || !phys_enc->hw_pp) { @@ -373,11 +366,6 @@ static void dpu_encoder_phys_cmd_tearcheck_config( } dpu_kms = phys_enc->dpu_kms; - if (!dpu_kms || !dpu_kms->dev || !dpu_kms->dev->dev_private) { - DPU_ERROR("invalid device\n"); - return; - } - priv = dpu_kms->dev->dev_private; /* * TE default: dsi byte clock calculated base on 70 fps; @@ -650,13 +638,10 @@ static int dpu_encoder_phys_cmd_wait_for_tx_complete( struct dpu_encoder_phys *phys_enc) { int rc; - struct dpu_encoder_phys_cmd *cmd_enc; if (!phys_enc) return -EINVAL; - cmd_enc = to_dpu_encoder_phys_cmd(phys_enc); - rc = _dpu_encoder_phys_cmd_wait_for_idle(phys_enc); if (rc) { DRM_ERROR("failed wait_for_idle: id:%u ret:%d intf:%d\n", diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c index b9c84fb4d4a1..3123ef873cdf 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c @@ -374,7 +374,7 @@ static void dpu_encoder_phys_vid_mode_set( struct drm_display_mode *mode, struct drm_display_mode *adj_mode) { - if (!phys_enc || !phys_enc->dpu_kms) { + if (!phys_enc) { DPU_ERROR("invalid encoder/kms\n"); return; } @@ -566,16 +566,13 @@ static void dpu_encoder_phys_vid_prepare_for_kickoff( static void dpu_encoder_phys_vid_disable(struct dpu_encoder_phys *phys_enc) { - struct msm_drm_private *priv; unsigned long lock_flags; int ret; - if (!phys_enc || !phys_enc->parent || !phys_enc->parent->dev || - !phys_enc->parent->dev->dev_private) { + if (!phys_enc || !phys_enc->parent || !phys_enc->parent->dev) { DPU_ERROR("invalid encoder/device\n"); return; } - priv = phys_enc->parent->dev->dev_private; if (!phys_enc->hw_intf || !phys_enc->hw_ctl) { DPU_ERROR("invalid hw_intf %d hw_ctl %d\n", diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c index 58b0485dc375..6c92f0fbeac9 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c @@ -30,10 +30,6 @@ #define CREATE_TRACE_POINTS #include "dpu_trace.h" -static const char * const iommu_ports[] = { - "mdp_0", -}; - /* * To enable overall DRM driver logging * # echo 0x2 > /sys/module/drm/parameters/debug @@ -68,16 +64,14 @@ static int _dpu_danger_signal_status(struct seq_file *s, bool danger_status) { struct dpu_kms *kms = (struct dpu_kms *)s->private; - struct msm_drm_private *priv; struct dpu_danger_safe_status status; int i; - if (!kms->dev || !kms->dev->dev_private || !kms->hw_mdp) { + if (!kms->hw_mdp) { DPU_ERROR("invalid arg(s)\n"); return 0; } - priv = kms->dev->dev_private; memset(&status, 0, sizeof(struct dpu_danger_safe_status)); pm_runtime_get_sync(&kms->pdev->dev); @@ -153,13 +147,7 @@ static int _dpu_debugfs_show_regset32(struct seq_file *s, void *data) return 0; dev = dpu_kms->dev; - if (!dev) - return 0; - priv = dev->dev_private; - if (!priv) - return 0; - base = dpu_kms->mmio + regset->offset; /* insert padding spaces, if needed */ @@ -280,7 +268,6 @@ static void dpu_kms_prepare_commit(struct msm_kms *kms, struct drm_atomic_state *state) { struct dpu_kms *dpu_kms; - struct msm_drm_private *priv; struct drm_device *dev; struct drm_crtc *crtc; struct drm_crtc_state *crtc_state; @@ -292,10 +279,6 @@ static void dpu_kms_prepare_commit(struct msm_kms *kms, dpu_kms = to_dpu_kms(kms); dev = dpu_kms->dev; - if (!dev || !dev->dev_private) - return; - priv = dev->dev_private; - /* Call prepare_commit for all affected encoders */ for_each_new_crtc_in_state(state, crtc, crtc_state, i) { drm_for_each_encoder_mask(encoder, crtc->dev, @@ -333,7 +316,6 @@ void dpu_kms_encoder_enable(struct drm_encoder *encoder) if (funcs && funcs->commit) funcs->commit(encoder); - WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex)); drm_for_each_crtc(crtc, dev) { if (!(crtc->state->encoder_mask & drm_encoder_mask(encoder))) continue; @@ -464,16 +446,6 @@ static void _dpu_kms_drm_obj_destroy(struct dpu_kms *dpu_kms) struct msm_drm_private *priv; int i; - if (!dpu_kms) { - DPU_ERROR("invalid dpu_kms\n"); - return; - } else if (!dpu_kms->dev) { - DPU_ERROR("invalid dev\n"); - return; - } else if (!dpu_kms->dev->dev_private) { - DPU_ERROR("invalid dev_private\n"); - return; - } priv = dpu_kms->dev->dev_private; for (i = 0; i < priv->num_crtcs; i++) @@ -505,7 +477,6 @@ static int _dpu_kms_drm_obj_init(struct dpu_kms *dpu_kms) int primary_planes_idx = 0, cursor_planes_idx = 0, i, ret; int max_crtc_count; - dev = dpu_kms->dev; priv = dev->dev_private; catalog = dpu_kms->catalog; @@ -585,8 +556,6 @@ static void _dpu_kms_hw_destroy(struct dpu_kms *dpu_kms) int i; dev = dpu_kms->dev; - if (!dev) - return; if (dpu_kms->hw_intr) dpu_hw_intr_destroy(dpu_kms->hw_intr); @@ -725,8 +694,7 @@ static void _dpu_kms_mmu_destroy(struct dpu_kms *dpu_kms) mmu = dpu_kms->base.aspace->mmu; - mmu->funcs->detach(mmu, (const char **)iommu_ports, - ARRAY_SIZE(iommu_ports)); + mmu->funcs->detach(mmu); msm_gem_address_space_put(dpu_kms->base.aspace); dpu_kms->base.aspace = NULL; @@ -752,8 +720,7 @@ static int _dpu_kms_mmu_init(struct dpu_kms *dpu_kms) return PTR_ERR(aspace); } - ret = aspace->mmu->funcs->attach(aspace->mmu, iommu_ports, - ARRAY_SIZE(iommu_ports)); + ret = aspace->mmu->funcs->attach(aspace->mmu); if (ret) { DPU_ERROR("failed to attach iommu %d\n", ret); msm_gem_address_space_put(aspace); @@ -803,16 +770,7 @@ static int dpu_kms_hw_init(struct msm_kms *kms) dpu_kms = to_dpu_kms(kms); dev = dpu_kms->dev; - if (!dev) { - DPU_ERROR("invalid device\n"); - return rc; - } - priv = dev->dev_private; - if (!priv) { - DPU_ERROR("invalid private data\n"); - return rc; - } atomic_set(&dpu_kms->bandwidth_ref, 0); @@ -974,7 +932,7 @@ struct msm_kms *dpu_kms_init(struct drm_device *dev) struct dpu_kms *dpu_kms; int irq; - if (!dev || !dev->dev_private) { + if (!dev) { DPU_ERROR("drm device node invalid\n"); return ERR_PTR(-EINVAL); } @@ -1064,11 +1022,6 @@ static int __maybe_unused dpu_runtime_suspend(struct device *dev) struct dss_module_power *mp = &dpu_kms->mp; ddev = dpu_kms->dev; - if (!ddev) { - DPU_ERROR("invalid drm_device\n"); - return rc; - } - rc = msm_dss_enable_clk(mp->clk_config, mp->num_clk, false); if (rc) DPU_ERROR("clock disable failed rc:%d\n", rc); @@ -1086,11 +1039,6 @@ static int __maybe_unused dpu_runtime_resume(struct device *dev) struct dss_module_power *mp = &dpu_kms->mp; ddev = dpu_kms->dev; - if (!ddev) { - DPU_ERROR("invalid drm_device\n"); - return rc; - } - rc = msm_dss_enable_clk(mp->clk_config, mp->num_clk, true); if (rc) { DPU_ERROR("clock enable failed rc:%d\n", rc); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h index 959d03e007fa..c6169e7df19d 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h @@ -139,10 +139,6 @@ struct vsync_info { #define to_dpu_kms(x) container_of(x, struct dpu_kms, base) -/* get struct msm_kms * from drm_device * */ -#define ddev_to_msm_kms(D) ((D) && (D)->dev_private ? \ - ((struct msm_drm_private *)((D)->dev_private))->kms : NULL) - /** * Debugfs functions - extra helper functions for debugfs support * diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_vbif.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_vbif.c index 8d24b79fd400..991f4c8f8a12 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_vbif.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_vbif.c @@ -154,10 +154,6 @@ void dpu_vbif_set_ot_limit(struct dpu_kms *dpu_kms, u32 ot_lim; int ret, i; - if (!dpu_kms) { - DPU_ERROR("invalid arguments\n"); - return; - } mdp = dpu_kms->hw_mdp; for (i = 0; i < ARRAY_SIZE(dpu_kms->hw_vbif); i++) { @@ -214,7 +210,7 @@ void dpu_vbif_set_qos_remap(struct dpu_kms *dpu_kms, const struct dpu_vbif_qos_tbl *qos_tbl; int i; - if (!dpu_kms || !params || !dpu_kms->hw_mdp) { + if (!params || !dpu_kms->hw_mdp) { DPU_ERROR("invalid arguments\n"); return; } diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c index 50711ccc8691..dda05436f716 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c @@ -157,10 +157,6 @@ static long mdp4_round_pixclk(struct msm_kms *kms, unsigned long rate, } } -static const char * const iommu_ports[] = { - "mdp_port0_cb0", "mdp_port1_cb0", -}; - static void mdp4_destroy(struct msm_kms *kms) { struct mdp4_kms *mdp4_kms = to_mdp4_kms(to_mdp_kms(kms)); @@ -172,8 +168,7 @@ static void mdp4_destroy(struct msm_kms *kms) drm_gem_object_put_unlocked(mdp4_kms->blank_cursor_bo); if (aspace) { - aspace->mmu->funcs->detach(aspace->mmu, - iommu_ports, ARRAY_SIZE(iommu_ports)); + aspace->mmu->funcs->detach(aspace->mmu); msm_gem_address_space_put(aspace); } @@ -524,8 +519,7 @@ struct msm_kms *mdp4_kms_init(struct drm_device *dev) kms->aspace = aspace; - ret = aspace->mmu->funcs->attach(aspace->mmu, iommu_ports, - ARRAY_SIZE(iommu_ports)); + ret = aspace->mmu->funcs->attach(aspace->mmu); if (ret) goto fail; } else { diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c index f6e71ff539ca..1f48f64539a2 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c @@ -14,7 +14,7 @@ struct mdp5_cfg_handler { /* mdp5_cfg must be exposed (used in mdp5.xml.h) */ const struct mdp5_cfg_hw *mdp5_cfg = NULL; -const struct mdp5_cfg_hw msm8x74v1_config = { +static const struct mdp5_cfg_hw msm8x74v1_config = { .name = "msm8x74v1", .mdp = { .count = 1, @@ -98,7 +98,7 @@ const struct mdp5_cfg_hw msm8x74v1_config = { .max_clk = 200000000, }; -const struct mdp5_cfg_hw msm8x74v2_config = { +static const struct mdp5_cfg_hw msm8x74v2_config = { .name = "msm8x74", .mdp = { .count = 1, @@ -180,7 +180,7 @@ const struct mdp5_cfg_hw msm8x74v2_config = { .max_clk = 200000000, }; -const struct mdp5_cfg_hw apq8084_config = { +static const struct mdp5_cfg_hw apq8084_config = { .name = "apq8084", .mdp = { .count = 1, @@ -275,7 +275,7 @@ const struct mdp5_cfg_hw apq8084_config = { .max_clk = 320000000, }; -const struct mdp5_cfg_hw msm8x16_config = { +static const struct mdp5_cfg_hw msm8x16_config = { .name = "msm8x16", .mdp = { .count = 1, @@ -342,7 +342,7 @@ const struct mdp5_cfg_hw msm8x16_config = { .max_clk = 320000000, }; -const struct mdp5_cfg_hw msm8x94_config = { +static const struct mdp5_cfg_hw msm8x94_config = { .name = "msm8x94", .mdp = { .count = 1, @@ -437,7 +437,7 @@ const struct mdp5_cfg_hw msm8x94_config = { .max_clk = 400000000, }; -const struct mdp5_cfg_hw msm8x96_config = { +static const struct mdp5_cfg_hw msm8x96_config = { .name = "msm8x96", .mdp = { .count = 1, @@ -545,7 +545,104 @@ const struct mdp5_cfg_hw msm8x96_config = { .max_clk = 412500000, }; -const struct mdp5_cfg_hw msm8917_config = { +const struct mdp5_cfg_hw msm8x76_config = { + .name = "msm8x76", + .mdp = { + .count = 1, + .caps = MDP_CAP_SMP | + MDP_CAP_DSC | + MDP_CAP_SRC_SPLIT | + 0, + }, + .ctl = { + .count = 3, + .base = { 0x01000, 0x01200, 0x01400 }, + .flush_hw_mask = 0xffffffff, + }, + .smp = { + .mmb_count = 10, + .mmb_size = 10240, + .clients = { + [SSPP_VIG0] = 1, [SSPP_VIG1] = 9, + [SSPP_DMA0] = 4, + [SSPP_RGB0] = 7, [SSPP_RGB1] = 8, + }, + }, + .pipe_vig = { + .count = 2, + .base = { 0x04000, 0x06000 }, + .caps = MDP_PIPE_CAP_HFLIP | + MDP_PIPE_CAP_VFLIP | + MDP_PIPE_CAP_SCALE | + MDP_PIPE_CAP_CSC | + MDP_PIPE_CAP_DECIMATION | + MDP_PIPE_CAP_SW_PIX_EXT | + 0, + }, + .pipe_rgb = { + .count = 2, + .base = { 0x14000, 0x16000 }, + .caps = MDP_PIPE_CAP_HFLIP | + MDP_PIPE_CAP_VFLIP | + MDP_PIPE_CAP_DECIMATION | + MDP_PIPE_CAP_SW_PIX_EXT | + 0, + }, + .pipe_dma = { + .count = 1, + .base = { 0x24000 }, + .caps = MDP_PIPE_CAP_HFLIP | + MDP_PIPE_CAP_VFLIP | + MDP_PIPE_CAP_SW_PIX_EXT | + 0, + }, + .pipe_cursor = { + .count = 1, + .base = { 0x440DC }, + .caps = MDP_PIPE_CAP_HFLIP | + MDP_PIPE_CAP_VFLIP | + MDP_PIPE_CAP_SW_PIX_EXT | + MDP_PIPE_CAP_CURSOR | + 0, + }, + .lm = { + .count = 2, + .base = { 0x44000, 0x45000 }, + .instances = { + { .id = 0, .pp = 0, .dspp = 0, + .caps = MDP_LM_CAP_DISPLAY, }, + { .id = 1, .pp = -1, .dspp = -1, + .caps = MDP_LM_CAP_WB }, + }, + .nb_stages = 8, + .max_width = 2560, + .max_height = 0xFFFF, + }, + .dspp = { + .count = 1, + .base = { 0x54000 }, + + }, + .pp = { + .count = 3, + .base = { 0x70000, 0x70800, 0x72000 }, + }, + .dsc = { + .count = 2, + .base = { 0x80000, 0x80400 }, + }, + .intf = { + .base = { 0x6a000, 0x6a800, 0x6b000 }, + .connect = { + [0] = INTF_DISABLED, + [1] = INTF_DSI, + [2] = INTF_DSI, + }, + }, + .max_clk = 360000000, +}; + +static const struct mdp5_cfg_hw msm8917_config = { .name = "msm8917", .mdp = { .count = 1, @@ -630,7 +727,7 @@ const struct mdp5_cfg_hw msm8917_config = { .max_clk = 320000000, }; -const struct mdp5_cfg_hw msm8998_config = { +static const struct mdp5_cfg_hw msm8998_config = { .name = "msm8998", .mdp = { .count = 1, @@ -745,6 +842,7 @@ static const struct mdp5_cfg_handler cfg_handlers_v1[] = { { .revision = 6, .config = { .hw = &msm8x16_config } }, { .revision = 9, .config = { .hw = &msm8x94_config } }, { .revision = 7, .config = { .hw = &msm8x96_config } }, + { .revision = 11, .config = { .hw = &msm8x76_config } }, { .revision = 15, .config = { .hw = &msm8917_config } }, }; diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c index eb0b4b7dc7cc..05cc04f729d6 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c @@ -214,7 +214,6 @@ static void blend_setup(struct drm_crtc *crtc) struct mdp5_pipeline *pipeline = &mdp5_cstate->pipeline; struct mdp5_kms *mdp5_kms = get_kms(crtc); struct drm_plane *plane; - const struct mdp5_cfg_hw *hw_cfg; struct mdp5_plane_state *pstate, *pstates[STAGE_MAX + 1] = {NULL}; const struct mdp_format *format; struct mdp5_hw_mixer *mixer = pipeline->mixer; @@ -232,8 +231,6 @@ static void blend_setup(struct drm_crtc *crtc) u32 val; #define blender(stage) ((stage) - STAGE0) - hw_cfg = mdp5_cfg_get_hw_config(mdp5_kms->cfg); - spin_lock_irqsave(&mdp5_crtc->lm_lock, flags); /* ctl could be released already when we are shutting down: */ diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c index 91cd76a2bab1..e43ecd4be10a 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c @@ -19,10 +19,6 @@ #include "msm_mmu.h" #include "mdp5_kms.h" -static const char *iommu_ports[] = { - "mdp_0", -}; - static int mdp5_hw_init(struct msm_kms *kms) { struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(kms)); @@ -233,8 +229,7 @@ static void mdp5_kms_destroy(struct msm_kms *kms) mdp5_pipe_destroy(mdp5_kms->hwpipes[i]); if (aspace) { - aspace->mmu->funcs->detach(aspace->mmu, - iommu_ports, ARRAY_SIZE(iommu_ports)); + aspace->mmu->funcs->detach(aspace->mmu); msm_gem_address_space_put(aspace); } } @@ -314,6 +309,10 @@ int mdp5_disable(struct mdp5_kms *mdp5_kms) mdp5_kms->enable_count--; WARN_ON(mdp5_kms->enable_count < 0); + if (mdp5_kms->tbu_rt_clk) + clk_disable_unprepare(mdp5_kms->tbu_rt_clk); + if (mdp5_kms->tbu_clk) + clk_disable_unprepare(mdp5_kms->tbu_clk); clk_disable_unprepare(mdp5_kms->ahb_clk); clk_disable_unprepare(mdp5_kms->axi_clk); clk_disable_unprepare(mdp5_kms->core_clk); @@ -334,6 +333,10 @@ int mdp5_enable(struct mdp5_kms *mdp5_kms) clk_prepare_enable(mdp5_kms->core_clk); if (mdp5_kms->lut_clk) clk_prepare_enable(mdp5_kms->lut_clk); + if (mdp5_kms->tbu_clk) + clk_prepare_enable(mdp5_kms->tbu_clk); + if (mdp5_kms->tbu_rt_clk) + clk_prepare_enable(mdp5_kms->tbu_rt_clk); return 0; } @@ -466,14 +469,11 @@ static int modeset_init(struct mdp5_kms *mdp5_kms) { struct drm_device *dev = mdp5_kms->dev; struct msm_drm_private *priv = dev->dev_private; - const struct mdp5_cfg_hw *hw_cfg; unsigned int num_crtcs; int i, ret, pi = 0, ci = 0; struct drm_plane *primary[MAX_BASES] = { NULL }; struct drm_plane *cursor[MAX_BASES] = { NULL }; - hw_cfg = mdp5_cfg_get_hw_config(mdp5_kms->cfg); - /* * Construct encoders and modeset initialize connector devices * for each external display interface. @@ -737,8 +737,7 @@ struct msm_kms *mdp5_kms_init(struct drm_device *dev) kms->aspace = aspace; - ret = aspace->mmu->funcs->attach(aspace->mmu, iommu_ports, - ARRAY_SIZE(iommu_ports)); + ret = aspace->mmu->funcs->attach(aspace->mmu); if (ret) { DRM_DEV_ERROR(&pdev->dev, "failed to attach iommu: %d\n", ret); @@ -974,6 +973,8 @@ static int mdp5_init(struct platform_device *pdev, struct drm_device *dev) /* optional clocks: */ get_clk(pdev, &mdp5_kms->lut_clk, "lut", false); + get_clk(pdev, &mdp5_kms->tbu_clk, "tbu", false); + get_clk(pdev, &mdp5_kms->tbu_rt_clk, "tbu_rt", false); /* we need to set a default rate before enabling. Set a safe * rate first, then figure out hw revision, and then set a diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.h b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.h index d1bf4fdfc815..128866742593 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.h +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.h @@ -53,6 +53,8 @@ struct mdp5_kms { struct clk *ahb_clk; struct clk *core_clk; struct clk *lut_clk; + struct clk *tbu_clk; + struct clk *tbu_rt_clk; struct clk *vsync_clk; /* diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c index b31cfb554fa2..d7fa2c49e741 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c @@ -121,7 +121,6 @@ uint32_t mdp5_smp_calculate(struct mdp5_smp *smp, struct mdp5_kms *mdp5_kms = get_kms(smp); int rev = mdp5_cfg_get_hw_rev(mdp5_kms->cfg); int i, hsub, nplanes, nlines; - u32 fmt = format->base.pixel_format; uint32_t blkcfg = 0; nplanes = info->num_planes; @@ -135,7 +134,6 @@ uint32_t mdp5_smp_calculate(struct mdp5_smp *smp, * them together, writes to SMP using a single client. */ if ((rev > 0) && (format->chroma_sample > CHROMA_FULL)) { - fmt = DRM_FORMAT_NV24; nplanes = 2; /* if decimation is enabled, HW decimates less on the diff --git a/drivers/gpu/drm/msm/dsi/dsi_cfg.c b/drivers/gpu/drm/msm/dsi/dsi_cfg.c index b7b7c1a9164a..86ad3fdf207d 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_cfg.c +++ b/drivers/gpu/drm/msm/dsi/dsi_cfg.c @@ -66,6 +66,26 @@ static const struct msm_dsi_config msm8916_dsi_cfg = { .num_dsi = 1, }; +static const char * const dsi_8976_bus_clk_names[] = { + "mdp_core", "iface", "bus", +}; + +static const struct msm_dsi_config msm8976_dsi_cfg = { + .io_offset = DSI_6G_REG_SHIFT, + .reg_cfg = { + .num = 3, + .regs = { + {"gdsc", -1, -1}, + {"vdda", 100000, 100}, /* 1.2 V */ + {"vddio", 100000, 100}, /* 1.8 V */ + }, + }, + .bus_clk_names = dsi_8976_bus_clk_names, + .num_bus_clks = ARRAY_SIZE(dsi_8976_bus_clk_names), + .io_start = { 0x1a94000, 0x1a96000 }, + .num_dsi = 2, +}; + static const struct msm_dsi_config msm8994_dsi_cfg = { .io_offset = DSI_6G_REG_SHIFT, .reg_cfg = { @@ -147,7 +167,7 @@ static const struct msm_dsi_config sdm845_dsi_cfg = { .num_dsi = 2, }; -const static struct msm_dsi_host_cfg_ops msm_dsi_v2_host_ops = { +static const struct msm_dsi_host_cfg_ops msm_dsi_v2_host_ops = { .link_clk_enable = dsi_link_clk_enable_v2, .link_clk_disable = dsi_link_clk_disable_v2, .clk_init_ver = dsi_clk_init_v2, @@ -158,7 +178,7 @@ const static struct msm_dsi_host_cfg_ops msm_dsi_v2_host_ops = { .calc_clk_rate = dsi_calc_clk_rate_v2, }; -const static struct msm_dsi_host_cfg_ops msm_dsi_6g_host_ops = { +static const struct msm_dsi_host_cfg_ops msm_dsi_6g_host_ops = { .link_clk_enable = dsi_link_clk_enable_6g, .link_clk_disable = dsi_link_clk_disable_6g, .clk_init_ver = NULL, @@ -169,7 +189,7 @@ const static struct msm_dsi_host_cfg_ops msm_dsi_6g_host_ops = { .calc_clk_rate = dsi_calc_clk_rate_6g, }; -const static struct msm_dsi_host_cfg_ops msm_dsi_6g_v2_host_ops = { +static const struct msm_dsi_host_cfg_ops msm_dsi_6g_v2_host_ops = { .link_clk_enable = dsi_link_clk_enable_6g, .link_clk_disable = dsi_link_clk_disable_6g, .clk_init_ver = dsi_clk_init_6g_v2, @@ -197,6 +217,8 @@ static const struct msm_dsi_cfg_handler dsi_cfg_handlers[] = { &msm8916_dsi_cfg, &msm_dsi_6g_host_ops}, {MSM_DSI_VER_MAJOR_6G, MSM_DSI_6G_VER_MINOR_V1_4_1, &msm8996_dsi_cfg, &msm_dsi_6g_host_ops}, + {MSM_DSI_VER_MAJOR_6G, MSM_DSI_6G_VER_MINOR_V1_4_2, + &msm8976_dsi_cfg, &msm_dsi_6g_host_ops}, {MSM_DSI_VER_MAJOR_6G, MSM_DSI_6G_VER_MINOR_V2_2_0, &msm8998_dsi_cfg, &msm_dsi_6g_v2_host_ops}, {MSM_DSI_VER_MAJOR_6G, MSM_DSI_6G_VER_MINOR_V2_2_1, diff --git a/drivers/gpu/drm/msm/dsi/dsi_cfg.h b/drivers/gpu/drm/msm/dsi/dsi_cfg.h index e2b7a7dfbe49..50a37ceb6a25 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_cfg.h +++ b/drivers/gpu/drm/msm/dsi/dsi_cfg.h @@ -17,6 +17,7 @@ #define MSM_DSI_6G_VER_MINOR_V1_3 0x10030000 #define MSM_DSI_6G_VER_MINOR_V1_3_1 0x10030001 #define MSM_DSI_6G_VER_MINOR_V1_4_1 0x10040001 +#define MSM_DSI_6G_VER_MINOR_V1_4_2 0x10040002 #define MSM_DSI_6G_VER_MINOR_V2_2_0 0x20000000 #define MSM_DSI_6G_VER_MINOR_V2_2_1 0x20020001 diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index 1e7b1be25bb0..458cec82ae13 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -1293,14 +1293,13 @@ static int dsi_cmd_dma_tx(struct msm_dsi_host *msm_host, int len) static int dsi_cmd_dma_rx(struct msm_dsi_host *msm_host, u8 *buf, int rx_byte, int pkt_size) { - u32 *lp, *temp, data; + u32 *temp, data; int i, j = 0, cnt; u32 read_cnt; u8 reg[16]; int repeated_bytes = 0; int buf_offset = buf - msm_host->rx_buf; - lp = (u32 *)buf; temp = (u32 *)reg; cnt = (rx_byte + 3) >> 2; if (cnt > 4) diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c index 3522863a4984..b0cfa67d2a57 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c @@ -145,7 +145,7 @@ int msm_dsi_dphy_timing_calc_v2(struct msm_dsi_dphy_timing *timing, { const unsigned long bit_rate = clk_req->bitclk_rate; const unsigned long esc_rate = clk_req->escclk_rate; - s32 ui, ui_x8, lpx; + s32 ui, ui_x8; s32 tmax, tmin; s32 pcnt0 = 50; s32 pcnt1 = 50; @@ -175,7 +175,6 @@ int msm_dsi_dphy_timing_calc_v2(struct msm_dsi_dphy_timing *timing, ui = mult_frac(NSEC_PER_MSEC, coeff, bit_rate / 1000); ui_x8 = ui << 3; - lpx = mult_frac(NSEC_PER_MSEC, coeff, esc_rate / 1000); temp = S_DIV_ROUND_UP(38 * coeff - val_ckln * ui, ui_x8); tmin = max_t(s32, temp, 0); @@ -262,7 +261,7 @@ int msm_dsi_dphy_timing_calc_v3(struct msm_dsi_dphy_timing *timing, { const unsigned long bit_rate = clk_req->bitclk_rate; const unsigned long esc_rate = clk_req->escclk_rate; - s32 ui, ui_x8, lpx; + s32 ui, ui_x8; s32 tmax, tmin; s32 pcnt0 = 50; s32 pcnt1 = 50; @@ -284,7 +283,6 @@ int msm_dsi_dphy_timing_calc_v3(struct msm_dsi_dphy_timing *timing, ui = mult_frac(NSEC_PER_MSEC, coeff, bit_rate / 1000); ui_x8 = ui << 3; - lpx = mult_frac(NSEC_PER_MSEC, coeff, esc_rate / 1000); temp = S_DIV_ROUND_UP(38 * coeff, ui_x8); tmin = max_t(s32, temp, 0); @@ -485,6 +483,8 @@ static const struct of_device_id dsi_phy_dt_match[] = { #ifdef CONFIG_DRM_MSM_DSI_28NM_PHY { .compatible = "qcom,dsi-phy-28nm-hpm", .data = &dsi_phy_28nm_hpm_cfgs }, + { .compatible = "qcom,dsi-phy-28nm-hpm-fam-b", + .data = &dsi_phy_28nm_hpm_famb_cfgs }, { .compatible = "qcom,dsi-phy-28nm-lp", .data = &dsi_phy_28nm_lp_cfgs }, #endif diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h index c4069ce6afe6..24b294ed3059 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h @@ -40,6 +40,7 @@ struct msm_dsi_phy_cfg { }; extern const struct msm_dsi_phy_cfg dsi_phy_28nm_hpm_cfgs; +extern const struct msm_dsi_phy_cfg dsi_phy_28nm_hpm_famb_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_28nm_lp_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_20nm_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_28nm_8960_cfgs; diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c index b3f678f6c2aa..c3c580cfd8b1 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c @@ -39,15 +39,10 @@ static void dsi_28nm_dphy_set_timing(struct msm_dsi_phy *phy, DSI_28nm_PHY_TIMING_CTRL_11_TRIG3_CMD(0)); } -static void dsi_28nm_phy_regulator_ctrl(struct msm_dsi_phy *phy, bool enable) +static void dsi_28nm_phy_regulator_enable_dcdc(struct msm_dsi_phy *phy) { void __iomem *base = phy->reg_base; - if (!enable) { - dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CAL_PWR_CFG, 0); - return; - } - dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CTRL_0, 0x0); dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CAL_PWR_CFG, 1); dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CTRL_5, 0); @@ -56,6 +51,39 @@ static void dsi_28nm_phy_regulator_ctrl(struct msm_dsi_phy *phy, bool enable) dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CTRL_1, 0x9); dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CTRL_0, 0x7); dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CTRL_4, 0x20); + dsi_phy_write(phy->base + REG_DSI_28nm_PHY_LDO_CNTRL, 0x00); +} + +static void dsi_28nm_phy_regulator_enable_ldo(struct msm_dsi_phy *phy) +{ + void __iomem *base = phy->reg_base; + + dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CTRL_0, 0x0); + dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CAL_PWR_CFG, 0); + dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CTRL_5, 0x7); + dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CTRL_3, 0); + dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CTRL_2, 0x1); + dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CTRL_1, 0x1); + dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CTRL_4, 0x20); + + if (phy->cfg->type == MSM_DSI_PHY_28NM_LP) + dsi_phy_write(phy->base + REG_DSI_28nm_PHY_LDO_CNTRL, 0x05); + else + dsi_phy_write(phy->base + REG_DSI_28nm_PHY_LDO_CNTRL, 0x0d); +} + +static void dsi_28nm_phy_regulator_ctrl(struct msm_dsi_phy *phy, bool enable) +{ + if (!enable) { + dsi_phy_write(phy->reg_base + + REG_DSI_28nm_PHY_REGULATOR_CAL_PWR_CFG, 0); + return; + } + + if (phy->regulator_ldo_mode) + dsi_28nm_phy_regulator_enable_ldo(phy); + else + dsi_28nm_phy_regulator_enable_dcdc(phy); } static int dsi_28nm_phy_enable(struct msm_dsi_phy *phy, int src_pll_id, @@ -77,8 +105,6 @@ static int dsi_28nm_phy_enable(struct msm_dsi_phy *phy, int src_pll_id, dsi_28nm_phy_regulator_ctrl(phy, true); - dsi_phy_write(base + REG_DSI_28nm_PHY_LDO_CNTRL, 0x00); - dsi_28nm_dphy_set_timing(phy, timing); dsi_phy_write(base + REG_DSI_28nm_PHY_CTRL_1, 0x00); @@ -142,6 +168,24 @@ const struct msm_dsi_phy_cfg dsi_phy_28nm_hpm_cfgs = { .num_dsi_phy = 2, }; +const struct msm_dsi_phy_cfg dsi_phy_28nm_hpm_famb_cfgs = { + .type = MSM_DSI_PHY_28NM_HPM, + .src_pll_truthtable = { {true, true}, {false, true} }, + .reg_cfg = { + .num = 1, + .regs = { + {"vddio", 100000, 100}, + }, + }, + .ops = { + .enable = dsi_28nm_phy_enable, + .disable = dsi_28nm_phy_disable, + .init = msm_dsi_phy_init_common, + }, + .io_start = { 0x1a94400, 0x1a96400 }, + .num_dsi_phy = 2, +}; + const struct msm_dsi_phy_cfg dsi_phy_28nm_lp_cfgs = { .type = MSM_DSI_PHY_28NM_LP, .src_pll_truthtable = { {true, true}, {true, true} }, diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_phy.c b/drivers/gpu/drm/msm/hdmi/hdmi_phy.c index 1697e61f9c2f..8a38d4b95102 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_phy.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_phy.c @@ -29,8 +29,12 @@ static int msm_hdmi_phy_resource_init(struct hdmi_phy *phy) reg = devm_regulator_get(dev, cfg->reg_names[i]); if (IS_ERR(reg)) { ret = PTR_ERR(reg); - DRM_DEV_ERROR(dev, "failed to get phy regulator: %s (%d)\n", - cfg->reg_names[i], ret); + if (ret != -EPROBE_DEFER) { + DRM_DEV_ERROR(dev, + "failed to get phy regulator: %s (%d)\n", + cfg->reg_names[i], ret); + } + return ret; } diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index a052364a5d74..18f3a5c53ffb 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -16,6 +16,7 @@ #include <linux/pm_opp.h> #include <linux/devfreq.h> #include <linux/devcoredump.h> +#include <linux/sched/task.h> /* * Power Management: @@ -838,7 +839,7 @@ msm_gpu_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev, return ERR_CAST(aspace); } - ret = aspace->mmu->funcs->attach(aspace->mmu, NULL, 0); + ret = aspace->mmu->funcs->attach(aspace->mmu); if (ret) { msm_gem_address_space_put(aspace); return ERR_PTR(ret); @@ -995,8 +996,7 @@ void msm_gpu_cleanup(struct msm_gpu *gpu) msm_gem_kernel_put(gpu->memptrs_bo, gpu->aspace, false); if (!IS_ERR_OR_NULL(gpu->aspace)) { - gpu->aspace->mmu->funcs->detach(gpu->aspace->mmu, - NULL, 0); + gpu->aspace->mmu->funcs->detach(gpu->aspace->mmu); msm_gem_address_space_put(gpu->aspace); } } diff --git a/drivers/gpu/drm/msm/msm_gpummu.c b/drivers/gpu/drm/msm/msm_gpummu.c index 34f643a0c28a..34980d8eb7ad 100644 --- a/drivers/gpu/drm/msm/msm_gpummu.c +++ b/drivers/gpu/drm/msm/msm_gpummu.c @@ -21,14 +21,12 @@ struct msm_gpummu { #define GPUMMU_PAGE_SIZE SZ_4K #define TABLE_SIZE (sizeof(uint32_t) * GPUMMU_VA_RANGE / GPUMMU_PAGE_SIZE) -static int msm_gpummu_attach(struct msm_mmu *mmu, const char * const *names, - int cnt) +static int msm_gpummu_attach(struct msm_mmu *mmu) { return 0; } -static void msm_gpummu_detach(struct msm_mmu *mmu, const char * const *names, - int cnt) +static void msm_gpummu_detach(struct msm_mmu *mmu) { } diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index 8c95c31e2b12..ad58cfe5998e 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -23,16 +23,14 @@ static int msm_fault_handler(struct iommu_domain *domain, struct device *dev, return 0; } -static int msm_iommu_attach(struct msm_mmu *mmu, const char * const *names, - int cnt) +static int msm_iommu_attach(struct msm_mmu *mmu) { struct msm_iommu *iommu = to_msm_iommu(mmu); return iommu_attach_device(iommu->domain, mmu->dev); } -static void msm_iommu_detach(struct msm_mmu *mmu, const char * const *names, - int cnt) +static void msm_iommu_detach(struct msm_mmu *mmu) { struct msm_iommu *iommu = to_msm_iommu(mmu); diff --git a/drivers/gpu/drm/msm/msm_mmu.h b/drivers/gpu/drm/msm/msm_mmu.h index 871d56303697..67a623f14319 100644 --- a/drivers/gpu/drm/msm/msm_mmu.h +++ b/drivers/gpu/drm/msm/msm_mmu.h @@ -10,8 +10,8 @@ #include <linux/iommu.h> struct msm_mmu_funcs { - int (*attach)(struct msm_mmu *mmu, const char * const *names, int cnt); - void (*detach)(struct msm_mmu *mmu, const char * const *names, int cnt); + int (*attach)(struct msm_mmu *mmu); + void (*detach)(struct msm_mmu *mmu); int (*map)(struct msm_mmu *mmu, uint64_t iova, struct sg_table *sgt, unsigned len, int prot); int (*unmap)(struct msm_mmu *mmu, uint64_t iova, unsigned len); diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c index c7832a951039..af7ceb246c7c 100644 --- a/drivers/gpu/drm/msm/msm_rd.c +++ b/drivers/gpu/drm/msm/msm_rd.c @@ -298,7 +298,7 @@ void msm_rd_debugfs_cleanup(struct msm_drm_private *priv) static void snapshot_buf(struct msm_rd_state *rd, struct msm_gem_submit *submit, int idx, - uint64_t iova, uint32_t size) + uint64_t iova, uint32_t size, bool full) { struct msm_gem_object *obj = submit->bos[idx].obj; unsigned offset = 0; @@ -318,6 +318,9 @@ static void snapshot_buf(struct msm_rd_state *rd, rd_write_section(rd, RD_GPUADDR, (uint32_t[3]){ iova, size, iova >> 32 }, 12); + if (!full) + return; + /* But only dump the contents of buffers marked READ */ if (!(submit->bos[idx].flags & MSM_SUBMIT_BO_READ)) return; @@ -381,18 +384,21 @@ void msm_rd_dump_submit(struct msm_rd_state *rd, struct msm_gem_submit *submit, rd_write_section(rd, RD_CMD, msg, ALIGN(n, 4)); for (i = 0; i < submit->nr_bos; i++) - if (should_dump(submit, i)) - snapshot_buf(rd, submit, i, 0, 0); + snapshot_buf(rd, submit, i, 0, 0, should_dump(submit, i)); for (i = 0; i < submit->nr_cmds; i++) { - uint64_t iova = submit->cmd[i].iova; uint32_t szd = submit->cmd[i].size; /* in dwords */ /* snapshot cmdstream bo's (if we haven't already): */ if (!should_dump(submit, i)) { snapshot_buf(rd, submit, submit->cmd[i].idx, - submit->cmd[i].iova, szd * 4); + submit->cmd[i].iova, szd * 4, true); } + } + + for (i = 0; i < submit->nr_cmds; i++) { + uint64_t iova = submit->cmd[i].iova; + uint32_t szd = submit->cmd[i].size; /* in dwords */ switch (submit->cmd[i].type) { case MSM_SUBMIT_CMD_IB_TARGET_BUF: diff --git a/drivers/gpu/drm/nouveau/dispnv50/atom.h b/drivers/gpu/drm/nouveau/dispnv50/atom.h index 43df86c38f58..24f7700768da 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/atom.h +++ b/drivers/gpu/drm/nouveau/dispnv50/atom.h @@ -114,6 +114,7 @@ struct nv50_head_atom { u8 nhsync:1; u8 nvsync:1; u8 depth:4; + u8 bpc; } or; /* Currently only used for MST */ diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index 549486f1d937..63425e246018 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -326,9 +326,9 @@ nv50_outp_atomic_check_view(struct drm_encoder *encoder, * same size as the native one (e.g. different * refresh rate) */ - if (adjusted_mode->hdisplay == native_mode->hdisplay && - adjusted_mode->vdisplay == native_mode->vdisplay && - adjusted_mode->type & DRM_MODE_TYPE_DRIVER) + if (mode->hdisplay == native_mode->hdisplay && + mode->vdisplay == native_mode->vdisplay && + mode->type & DRM_MODE_TYPE_DRIVER) break; mode = native_mode; asyc->scaler.full = true; @@ -353,10 +353,20 @@ nv50_outp_atomic_check(struct drm_encoder *encoder, struct drm_crtc_state *crtc_state, struct drm_connector_state *conn_state) { - struct nouveau_connector *nv_connector = - nouveau_connector(conn_state->connector); - return nv50_outp_atomic_check_view(encoder, crtc_state, conn_state, - nv_connector->native_mode); + struct drm_connector *connector = conn_state->connector; + struct nouveau_connector *nv_connector = nouveau_connector(connector); + struct nv50_head_atom *asyh = nv50_head_atom(crtc_state); + int ret; + + ret = nv50_outp_atomic_check_view(encoder, crtc_state, conn_state, + nv_connector->native_mode); + if (ret) + return ret; + + if (crtc_state->mode_changed || crtc_state->connectors_changed) + asyh->or.bpc = connector->display_info.bpc; + + return 0; } /****************************************************************************** @@ -770,32 +780,54 @@ nv50_msto_atomic_check(struct drm_encoder *encoder, struct nv50_mstm *mstm = mstc->mstm; struct nv50_head_atom *asyh = nv50_head_atom(crtc_state); int slots; + int ret; + + ret = nv50_outp_atomic_check_view(encoder, crtc_state, conn_state, + mstc->native); + if (ret) + return ret; + + if (!crtc_state->mode_changed && !crtc_state->connectors_changed) + return 0; + + /* + * When restoring duplicated states, we need to make sure that the bw + * remains the same and avoid recalculating it, as the connector's bpc + * may have changed after the state was duplicated + */ + if (!state->duplicated) { + const int clock = crtc_state->adjusted_mode.clock; - if (crtc_state->mode_changed || crtc_state->connectors_changed) { /* - * When restoring duplicated states, we need to make sure that - * the bw remains the same and avoid recalculating it, as the - * connector's bpc may have changed after the state was - * duplicated + * XXX: Since we don't use HDR in userspace quite yet, limit + * the bpc to 8 to save bandwidth on the topology. In the + * future, we'll want to properly fix this by dynamically + * selecting the highest possible bpc that would fit in the + * topology */ - if (!state->duplicated) { - const int bpp = connector->display_info.bpc * 3; - const int clock = crtc_state->adjusted_mode.clock; + asyh->or.bpc = min(connector->display_info.bpc, 8U); + asyh->dp.pbn = drm_dp_calc_pbn_mode(clock, asyh->or.bpc * 3); + } - asyh->dp.pbn = drm_dp_calc_pbn_mode(clock, bpp); - } + slots = drm_dp_atomic_find_vcpi_slots(state, &mstm->mgr, mstc->port, + asyh->dp.pbn); + if (slots < 0) + return slots; - slots = drm_dp_atomic_find_vcpi_slots(state, &mstm->mgr, - mstc->port, - asyh->dp.pbn); - if (slots < 0) - return slots; + asyh->dp.tu = slots; - asyh->dp.tu = slots; - } + return 0; +} - return nv50_outp_atomic_check_view(encoder, crtc_state, conn_state, - mstc->native); +static u8 +nv50_dp_bpc_to_depth(unsigned int bpc) +{ + switch (bpc) { + case 6: return 0x2; + case 8: return 0x5; + case 10: /* fall-through */ + default: return 0x6; + } } static void @@ -808,7 +840,7 @@ nv50_msto_enable(struct drm_encoder *encoder) struct nv50_mstm *mstm = NULL; struct drm_connector *connector; struct drm_connector_list_iter conn_iter; - u8 proto, depth; + u8 proto; bool r; drm_connector_list_iter_begin(encoder->dev, &conn_iter); @@ -837,14 +869,8 @@ nv50_msto_enable(struct drm_encoder *encoder) else proto = 0x9; - switch (mstc->connector.display_info.bpc) { - case 6: depth = 0x2; break; - case 8: depth = 0x5; break; - case 10: - default: depth = 0x6; break; - } - - mstm->outp->update(mstm->outp, head->base.index, armh, proto, depth); + mstm->outp->update(mstm->outp, head->base.index, armh, proto, + nv50_dp_bpc_to_depth(armh->or.bpc)); msto->head = head; msto->mstc = mstc; @@ -1498,20 +1524,14 @@ nv50_sor_enable(struct drm_encoder *encoder) lvds.lvds.script |= 0x0200; } - if (nv_connector->base.display_info.bpc == 8) + if (asyh->or.bpc == 8) lvds.lvds.script |= 0x0200; } nvif_mthd(&disp->disp->object, 0, &lvds, sizeof(lvds)); break; case DCB_OUTPUT_DP: - if (nv_connector->base.display_info.bpc == 6) - depth = 0x2; - else - if (nv_connector->base.display_info.bpc == 8) - depth = 0x5; - else - depth = 0x6; + depth = nv50_dp_bpc_to_depth(asyh->or.bpc); if (nv_encoder->link & 1) proto = 0x8; @@ -1662,7 +1682,7 @@ nv50_pior_enable(struct drm_encoder *encoder) nv50_outp_acquire(nv_encoder); nv_connector = nouveau_encoder_connector_get(nv_encoder); - switch (nv_connector->base.display_info.bpc) { + switch (asyh->or.bpc) { case 10: asyh->or.depth = 0x6; break; case 8: asyh->or.depth = 0x5; break; case 6: asyh->or.depth = 0x2; break; diff --git a/drivers/gpu/drm/nouveau/dispnv50/head.c b/drivers/gpu/drm/nouveau/dispnv50/head.c index 71c23bf1fe25..c9692df2b76c 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/head.c +++ b/drivers/gpu/drm/nouveau/dispnv50/head.c @@ -81,18 +81,17 @@ nv50_head_atomic_check_dither(struct nv50_head_atom *armh, struct nv50_head_atom *asyh, struct nouveau_conn_atom *asyc) { - struct drm_connector *connector = asyc->state.connector; u32 mode = 0x00; if (asyc->dither.mode == DITHERING_MODE_AUTO) { - if (asyh->base.depth > connector->display_info.bpc * 3) + if (asyh->base.depth > asyh->or.bpc * 3) mode = DITHERING_MODE_DYNAMIC2X2; } else { mode = asyc->dither.mode; } if (asyc->dither.depth == DITHERING_DEPTH_AUTO) { - if (connector->display_info.bpc >= 8) + if (asyh->or.bpc >= 8) mode |= DITHERING_DEPTH_8BPC; } else { mode |= asyc->dither.depth; diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index 5b413588b823..9a9a7f5003d3 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -245,14 +245,22 @@ nouveau_conn_atomic_duplicate_state(struct drm_connector *connector) void nouveau_conn_reset(struct drm_connector *connector) { + struct nouveau_connector *nv_connector = nouveau_connector(connector); struct nouveau_conn_atom *asyc; - if (WARN_ON(!(asyc = kzalloc(sizeof(*asyc), GFP_KERNEL)))) - return; + if (drm_drv_uses_atomic_modeset(connector->dev)) { + if (WARN_ON(!(asyc = kzalloc(sizeof(*asyc), GFP_KERNEL)))) + return; + + if (connector->state) + nouveau_conn_atomic_destroy_state(connector, + connector->state); + + __drm_atomic_helper_connector_reset(connector, &asyc->state); + } else { + asyc = &nv_connector->properties_state; + } - if (connector->state) - nouveau_conn_atomic_destroy_state(connector, connector->state); - __drm_atomic_helper_connector_reset(connector, &asyc->state); asyc->dither.mode = DITHERING_MODE_AUTO; asyc->dither.depth = DITHERING_DEPTH_AUTO; asyc->scaler.mode = DRM_MODE_SCALE_NONE; @@ -276,8 +284,14 @@ void nouveau_conn_attach_properties(struct drm_connector *connector) { struct drm_device *dev = connector->dev; - struct nouveau_conn_atom *armc = nouveau_conn_atom(connector->state); struct nouveau_display *disp = nouveau_display(dev); + struct nouveau_connector *nv_connector = nouveau_connector(connector); + struct nouveau_conn_atom *armc; + + if (drm_drv_uses_atomic_modeset(connector->dev)) + armc = nouveau_conn_atom(connector->state); + else + armc = &nv_connector->properties_state; /* Init DVI-I specific properties. */ if (connector->connector_type == DRM_MODE_CONNECTOR_DVII) @@ -748,9 +762,9 @@ static int nouveau_connector_set_property(struct drm_connector *connector, struct drm_property *property, uint64_t value) { - struct nouveau_conn_atom *asyc = nouveau_conn_atom(connector->state); struct nouveau_connector *nv_connector = nouveau_connector(connector); struct nouveau_encoder *nv_encoder = nv_connector->detected_encoder; + struct nouveau_conn_atom *asyc = &nv_connector->properties_state; struct drm_encoder *encoder = to_drm_encoder(nv_encoder); int ret; diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.h b/drivers/gpu/drm/nouveau/nouveau_connector.h index f43a8d63aef8..de84fb4708c7 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.h +++ b/drivers/gpu/drm/nouveau/nouveau_connector.h @@ -29,6 +29,7 @@ #include <nvif/notify.h> +#include <drm/drm_crtc.h> #include <drm/drm_edid.h> #include <drm/drm_encoder.h> #include <drm/drm_dp_helper.h> @@ -44,6 +45,60 @@ struct dcb_output; struct nouveau_backlight; #endif +#define nouveau_conn_atom(p) \ + container_of((p), struct nouveau_conn_atom, state) + +struct nouveau_conn_atom { + struct drm_connector_state state; + + struct { + /* The enum values specifically defined here match nv50/gf119 + * hw values, and the code relies on this. + */ + enum { + DITHERING_MODE_OFF = 0x00, + DITHERING_MODE_ON = 0x01, + DITHERING_MODE_DYNAMIC2X2 = 0x10 | DITHERING_MODE_ON, + DITHERING_MODE_STATIC2X2 = 0x18 | DITHERING_MODE_ON, + DITHERING_MODE_TEMPORAL = 0x20 | DITHERING_MODE_ON, + DITHERING_MODE_AUTO + } mode; + enum { + DITHERING_DEPTH_6BPC = 0x00, + DITHERING_DEPTH_8BPC = 0x02, + DITHERING_DEPTH_AUTO + } depth; + } dither; + + struct { + int mode; /* DRM_MODE_SCALE_* */ + struct { + enum { + UNDERSCAN_OFF, + UNDERSCAN_ON, + UNDERSCAN_AUTO, + } mode; + u32 hborder; + u32 vborder; + } underscan; + bool full; + } scaler; + + struct { + int color_vibrance; + int vibrant_hue; + } procamp; + + union { + struct { + bool dither:1; + bool scaler:1; + bool procamp:1; + }; + u8 mask; + } set; +}; + struct nouveau_connector { struct drm_connector base; enum dcb_connector_type type; @@ -63,6 +118,12 @@ struct nouveau_connector { #ifdef CONFIG_DRM_NOUVEAU_BACKLIGHT struct nouveau_backlight *backlight; #endif + /* + * Our connector property code expects a nouveau_conn_atom struct + * even on pre-nv50 where we do not support atomic. This embedded + * version gets used in the non atomic modeset case. + */ + struct nouveau_conn_atom properties_state; }; static inline struct nouveau_connector *nouveau_connector( @@ -121,61 +182,6 @@ extern int nouveau_ignorelid; extern int nouveau_duallink; extern int nouveau_hdmimhz; -#include <drm/drm_crtc.h> -#define nouveau_conn_atom(p) \ - container_of((p), struct nouveau_conn_atom, state) - -struct nouveau_conn_atom { - struct drm_connector_state state; - - struct { - /* The enum values specifically defined here match nv50/gf119 - * hw values, and the code relies on this. - */ - enum { - DITHERING_MODE_OFF = 0x00, - DITHERING_MODE_ON = 0x01, - DITHERING_MODE_DYNAMIC2X2 = 0x10 | DITHERING_MODE_ON, - DITHERING_MODE_STATIC2X2 = 0x18 | DITHERING_MODE_ON, - DITHERING_MODE_TEMPORAL = 0x20 | DITHERING_MODE_ON, - DITHERING_MODE_AUTO - } mode; - enum { - DITHERING_DEPTH_6BPC = 0x00, - DITHERING_DEPTH_8BPC = 0x02, - DITHERING_DEPTH_AUTO - } depth; - } dither; - - struct { - int mode; /* DRM_MODE_SCALE_* */ - struct { - enum { - UNDERSCAN_OFF, - UNDERSCAN_ON, - UNDERSCAN_AUTO, - } mode; - u32 hborder; - u32 vborder; - } underscan; - bool full; - } scaler; - - struct { - int color_vibrance; - int vibrant_hue; - } procamp; - - union { - struct { - bool dither:1; - bool scaler:1; - bool procamp:1; - }; - u8 mask; - } set; -}; - void nouveau_conn_attach_properties(struct drm_connector *); void nouveau_conn_reset(struct drm_connector *); struct drm_connector_state * diff --git a/drivers/gpu/drm/omapdrm/omap_gem.c b/drivers/gpu/drm/omapdrm/omap_gem.c index e518d93ca6df..d08ae95ecc0a 100644 --- a/drivers/gpu/drm/omapdrm/omap_gem.c +++ b/drivers/gpu/drm/omapdrm/omap_gem.c @@ -843,9 +843,13 @@ fail: */ static void omap_gem_unpin_locked(struct drm_gem_object *obj) { + struct omap_drm_private *priv = obj->dev->dev_private; struct omap_gem_object *omap_obj = to_omap_bo(obj); int ret; + if (omap_gem_is_contiguous(omap_obj) || !priv->has_dmm) + return; + if (refcount_dec_and_test(&omap_obj->dma_addr_cnt)) { ret = tiler_unpin(omap_obj->block); if (ret) { diff --git a/drivers/gpu/drm/panfrost/panfrost_devfreq.c b/drivers/gpu/drm/panfrost/panfrost_devfreq.c index 4c4e8a30a1ac..536ba93b0f46 100644 --- a/drivers/gpu/drm/panfrost/panfrost_devfreq.c +++ b/drivers/gpu/drm/panfrost/panfrost_devfreq.c @@ -18,15 +18,18 @@ static void panfrost_devfreq_update_utilization(struct panfrost_device *pfdev); static int panfrost_devfreq_target(struct device *dev, unsigned long *freq, u32 flags) { - struct panfrost_device *pfdev = dev_get_drvdata(dev); + struct dev_pm_opp *opp; int err; + opp = devfreq_recommended_opp(dev, freq, flags); + if (IS_ERR(opp)) + return PTR_ERR(opp); + dev_pm_opp_put(opp); + err = dev_pm_opp_set_rate(dev, *freq); if (err) return err; - *freq = clk_get_rate(pfdev->clock); - return 0; } @@ -60,20 +63,10 @@ static int panfrost_devfreq_get_dev_status(struct device *dev, return 0; } -static int panfrost_devfreq_get_cur_freq(struct device *dev, unsigned long *freq) -{ - struct panfrost_device *pfdev = platform_get_drvdata(to_platform_device(dev)); - - *freq = clk_get_rate(pfdev->clock); - - return 0; -} - static struct devfreq_dev_profile panfrost_devfreq_profile = { .polling_ms = 50, /* ~3 frames */ .target = panfrost_devfreq_target, .get_dev_status = panfrost_devfreq_get_dev_status, - .get_cur_freq = panfrost_devfreq_get_cur_freq, }; int panfrost_devfreq_init(struct panfrost_device *pfdev) diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index 9458dc6c750c..f61364f7c471 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -303,14 +303,17 @@ static int panfrost_ioctl_mmap_bo(struct drm_device *dev, void *data, } /* Don't allow mmapping of heap objects as pages are not pinned. */ - if (to_panfrost_bo(gem_obj)->is_heap) - return -EINVAL; + if (to_panfrost_bo(gem_obj)->is_heap) { + ret = -EINVAL; + goto out; + } ret = drm_gem_create_mmap_offset(gem_obj); if (ret == 0) args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node); - drm_gem_object_put_unlocked(gem_obj); +out: + drm_gem_object_put_unlocked(gem_obj); return ret; } @@ -347,20 +350,19 @@ static int panfrost_ioctl_madvise(struct drm_device *dev, void *data, return -ENOENT; } + mutex_lock(&pfdev->shrinker_lock); args->retained = drm_gem_shmem_madvise(gem_obj, args->madv); if (args->retained) { struct panfrost_gem_object *bo = to_panfrost_bo(gem_obj); - mutex_lock(&pfdev->shrinker_lock); - if (args->madv == PANFROST_MADV_DONTNEED) - list_add_tail(&bo->base.madv_list, &pfdev->shrinker_list); + list_add_tail(&bo->base.madv_list, + &pfdev->shrinker_list); else if (args->madv == PANFROST_MADV_WILLNEED) list_del_init(&bo->base.madv_list); - - mutex_unlock(&pfdev->shrinker_lock); } + mutex_unlock(&pfdev->shrinker_lock); drm_gem_object_put_unlocked(gem_obj); return 0; @@ -443,7 +445,7 @@ panfrost_postclose(struct drm_device *dev, struct drm_file *file) { struct panfrost_file_priv *panfrost_priv = file->driver_priv; - panfrost_perfcnt_close(panfrost_priv); + panfrost_perfcnt_close(file); panfrost_job_close(panfrost_priv); panfrost_mmu_pgtable_free(panfrost_priv); diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c index deca0c30bbd4..fd766b1395fb 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.c +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c @@ -19,6 +19,16 @@ static void panfrost_gem_free_object(struct drm_gem_object *obj) struct panfrost_gem_object *bo = to_panfrost_bo(obj); struct panfrost_device *pfdev = obj->dev->dev_private; + /* + * Make sure the BO is no longer inserted in the shrinker list before + * taking care of the destruction itself. If we don't do that we have a + * race condition between this function and what's done in + * panfrost_gem_shrinker_scan(). + */ + mutex_lock(&pfdev->shrinker_lock); + list_del_init(&bo->base.madv_list); + mutex_unlock(&pfdev->shrinker_lock); + if (bo->sgts) { int i; int n_sgt = bo->base.base.size / SZ_2M; @@ -33,15 +43,10 @@ static void panfrost_gem_free_object(struct drm_gem_object *obj) kfree(bo->sgts); } - mutex_lock(&pfdev->shrinker_lock); - if (!list_empty(&bo->base.madv_list)) - list_del(&bo->base.madv_list); - mutex_unlock(&pfdev->shrinker_lock); - drm_gem_shmem_free_object(obj); } -static int panfrost_gem_open(struct drm_gem_object *obj, struct drm_file *file_priv) +int panfrost_gem_open(struct drm_gem_object *obj, struct drm_file *file_priv) { int ret; size_t size = obj->size; @@ -80,7 +85,7 @@ static int panfrost_gem_open(struct drm_gem_object *obj, struct drm_file *file_p return ret; } -static void panfrost_gem_close(struct drm_gem_object *obj, struct drm_file *file_priv) +void panfrost_gem_close(struct drm_gem_object *obj, struct drm_file *file_priv) { struct panfrost_gem_object *bo = to_panfrost_bo(obj); struct panfrost_file_priv *priv = file_priv->driver_priv; diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h index 50920819cc16..4b17e7308764 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.h +++ b/drivers/gpu/drm/panfrost/panfrost_gem.h @@ -45,6 +45,10 @@ panfrost_gem_create_with_handle(struct drm_file *file_priv, u32 flags, uint32_t *handle); +int panfrost_gem_open(struct drm_gem_object *obj, struct drm_file *file_priv); +void panfrost_gem_close(struct drm_gem_object *obj, + struct drm_file *file_priv); + void panfrost_gem_shrinker_init(struct drm_device *dev); void panfrost_gem_shrinker_cleanup(struct drm_device *dev); diff --git a/drivers/gpu/drm/panfrost/panfrost_perfcnt.c b/drivers/gpu/drm/panfrost/panfrost_perfcnt.c index 2dba192bf198..2c04e858c50a 100644 --- a/drivers/gpu/drm/panfrost/panfrost_perfcnt.c +++ b/drivers/gpu/drm/panfrost/panfrost_perfcnt.c @@ -67,9 +67,10 @@ static int panfrost_perfcnt_dump_locked(struct panfrost_device *pfdev) } static int panfrost_perfcnt_enable_locked(struct panfrost_device *pfdev, - struct panfrost_file_priv *user, + struct drm_file *file_priv, unsigned int counterset) { + struct panfrost_file_priv *user = file_priv->driver_priv; struct panfrost_perfcnt *perfcnt = pfdev->perfcnt; struct drm_gem_shmem_object *bo; u32 cfg; @@ -91,14 +92,14 @@ static int panfrost_perfcnt_enable_locked(struct panfrost_device *pfdev, perfcnt->bo = to_panfrost_bo(&bo->base); /* Map the perfcnt buf in the address space attached to file_priv. */ - ret = panfrost_mmu_map(perfcnt->bo); + ret = panfrost_gem_open(&perfcnt->bo->base.base, file_priv); if (ret) goto err_put_bo; perfcnt->buf = drm_gem_shmem_vmap(&bo->base); if (IS_ERR(perfcnt->buf)) { ret = PTR_ERR(perfcnt->buf); - goto err_put_bo; + goto err_close_bo; } /* @@ -157,14 +158,17 @@ static int panfrost_perfcnt_enable_locked(struct panfrost_device *pfdev, err_vunmap: drm_gem_shmem_vunmap(&perfcnt->bo->base.base, perfcnt->buf); +err_close_bo: + panfrost_gem_close(&perfcnt->bo->base.base, file_priv); err_put_bo: drm_gem_object_put_unlocked(&bo->base); return ret; } static int panfrost_perfcnt_disable_locked(struct panfrost_device *pfdev, - struct panfrost_file_priv *user) + struct drm_file *file_priv) { + struct panfrost_file_priv *user = file_priv->driver_priv; struct panfrost_perfcnt *perfcnt = pfdev->perfcnt; if (user != perfcnt->user) @@ -180,6 +184,7 @@ static int panfrost_perfcnt_disable_locked(struct panfrost_device *pfdev, perfcnt->user = NULL; drm_gem_shmem_vunmap(&perfcnt->bo->base.base, perfcnt->buf); perfcnt->buf = NULL; + panfrost_gem_close(&perfcnt->bo->base.base, file_priv); drm_gem_object_put_unlocked(&perfcnt->bo->base.base); perfcnt->bo = NULL; pm_runtime_mark_last_busy(pfdev->dev); @@ -191,7 +196,6 @@ static int panfrost_perfcnt_disable_locked(struct panfrost_device *pfdev, int panfrost_ioctl_perfcnt_enable(struct drm_device *dev, void *data, struct drm_file *file_priv) { - struct panfrost_file_priv *pfile = file_priv->driver_priv; struct panfrost_device *pfdev = dev->dev_private; struct panfrost_perfcnt *perfcnt = pfdev->perfcnt; struct drm_panfrost_perfcnt_enable *req = data; @@ -207,10 +211,10 @@ int panfrost_ioctl_perfcnt_enable(struct drm_device *dev, void *data, mutex_lock(&perfcnt->lock); if (req->enable) - ret = panfrost_perfcnt_enable_locked(pfdev, pfile, + ret = panfrost_perfcnt_enable_locked(pfdev, file_priv, req->counterset); else - ret = panfrost_perfcnt_disable_locked(pfdev, pfile); + ret = panfrost_perfcnt_disable_locked(pfdev, file_priv); mutex_unlock(&perfcnt->lock); return ret; @@ -248,15 +252,16 @@ out: return ret; } -void panfrost_perfcnt_close(struct panfrost_file_priv *pfile) +void panfrost_perfcnt_close(struct drm_file *file_priv) { + struct panfrost_file_priv *pfile = file_priv->driver_priv; struct panfrost_device *pfdev = pfile->pfdev; struct panfrost_perfcnt *perfcnt = pfdev->perfcnt; pm_runtime_get_sync(pfdev->dev); mutex_lock(&perfcnt->lock); if (perfcnt->user == pfile) - panfrost_perfcnt_disable_locked(pfdev, pfile); + panfrost_perfcnt_disable_locked(pfdev, file_priv); mutex_unlock(&perfcnt->lock); pm_runtime_mark_last_busy(pfdev->dev); pm_runtime_put_autosuspend(pfdev->dev); diff --git a/drivers/gpu/drm/panfrost/panfrost_perfcnt.h b/drivers/gpu/drm/panfrost/panfrost_perfcnt.h index 13b8fdaa1b43..8bbcf5f5fb33 100644 --- a/drivers/gpu/drm/panfrost/panfrost_perfcnt.h +++ b/drivers/gpu/drm/panfrost/panfrost_perfcnt.h @@ -9,7 +9,7 @@ void panfrost_perfcnt_sample_done(struct panfrost_device *pfdev); void panfrost_perfcnt_clean_cache_done(struct panfrost_device *pfdev); int panfrost_perfcnt_init(struct panfrost_device *pfdev); void panfrost_perfcnt_fini(struct panfrost_device *pfdev); -void panfrost_perfcnt_close(struct panfrost_file_priv *pfile); +void panfrost_perfcnt_close(struct drm_file *file_priv); int panfrost_ioctl_perfcnt_enable(struct drm_device *dev, void *data, struct drm_file *file_priv); int panfrost_ioctl_perfcnt_dump(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 7089dfc8c2a9..110fb38004b1 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -1826,8 +1826,8 @@ static int r100_packet0_check(struct radeon_cs_parser *p, track->textures[i].use_pitch = 1; } else { track->textures[i].use_pitch = 0; - track->textures[i].width = 1 << ((idx_value >> RADEON_TXFORMAT_WIDTH_SHIFT) & RADEON_TXFORMAT_WIDTH_MASK); - track->textures[i].height = 1 << ((idx_value >> RADEON_TXFORMAT_HEIGHT_SHIFT) & RADEON_TXFORMAT_HEIGHT_MASK); + track->textures[i].width = 1 << ((idx_value & RADEON_TXFORMAT_WIDTH_MASK) >> RADEON_TXFORMAT_WIDTH_SHIFT); + track->textures[i].height = 1 << ((idx_value & RADEON_TXFORMAT_HEIGHT_MASK) >> RADEON_TXFORMAT_HEIGHT_SHIFT); } if (idx_value & RADEON_TXFORMAT_CUBIC_MAP_ENABLE) track->textures[i].tex_coord_type = 2; diff --git a/drivers/gpu/drm/radeon/r200.c b/drivers/gpu/drm/radeon/r200.c index 840401413c58..f5f2ffea5ab2 100644 --- a/drivers/gpu/drm/radeon/r200.c +++ b/drivers/gpu/drm/radeon/r200.c @@ -476,8 +476,8 @@ int r200_packet0_check(struct radeon_cs_parser *p, track->textures[i].use_pitch = 1; } else { track->textures[i].use_pitch = 0; - track->textures[i].width = 1 << ((idx_value >> RADEON_TXFORMAT_WIDTH_SHIFT) & RADEON_TXFORMAT_WIDTH_MASK); - track->textures[i].height = 1 << ((idx_value >> RADEON_TXFORMAT_HEIGHT_SHIFT) & RADEON_TXFORMAT_HEIGHT_MASK); + track->textures[i].width = 1 << ((idx_value & RADEON_TXFORMAT_WIDTH_MASK) >> RADEON_TXFORMAT_WIDTH_SHIFT); + track->textures[i].height = 1 << ((idx_value & RADEON_TXFORMAT_HEIGHT_MASK) >> RADEON_TXFORMAT_HEIGHT_SHIFT); } if (idx_value & R200_TXFORMAT_LOOKUP_DISABLE) track->textures[i].lookup_disable = true; diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c index 5b1f9ff97576..714af052fbef 100644 --- a/drivers/gpu/drm/tegra/dc.c +++ b/drivers/gpu/drm/tegra/dc.c @@ -837,16 +837,15 @@ static int tegra_cursor_atomic_check(struct drm_plane *plane, static void tegra_cursor_atomic_update(struct drm_plane *plane, struct drm_plane_state *old_state) { - struct tegra_bo *bo = tegra_fb_get_plane(plane->state->fb, 0); + struct tegra_plane_state *state = to_tegra_plane_state(plane->state); struct tegra_dc *dc = to_tegra_dc(plane->state->crtc); - struct drm_plane_state *state = plane->state; u32 value = CURSOR_CLIP_DISPLAY; /* rien ne va plus */ if (!plane->state->crtc || !plane->state->fb) return; - switch (state->crtc_w) { + switch (plane->state->crtc_w) { case 32: value |= CURSOR_SIZE_32x32; break; @@ -864,16 +863,16 @@ static void tegra_cursor_atomic_update(struct drm_plane *plane, break; default: - WARN(1, "cursor size %ux%u not supported\n", state->crtc_w, - state->crtc_h); + WARN(1, "cursor size %ux%u not supported\n", + plane->state->crtc_w, plane->state->crtc_h); return; } - value |= (bo->iova >> 10) & 0x3fffff; + value |= (state->iova[0] >> 10) & 0x3fffff; tegra_dc_writel(dc, value, DC_DISP_CURSOR_START_ADDR); #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - value = (bo->iova >> 32) & 0x3; + value = (state->iova[0] >> 32) & 0x3; tegra_dc_writel(dc, value, DC_DISP_CURSOR_START_ADDR_HI); #endif @@ -892,7 +891,8 @@ static void tegra_cursor_atomic_update(struct drm_plane *plane, tegra_dc_writel(dc, value, DC_DISP_BLEND_CURSOR_CONTROL); /* position the cursor */ - value = (state->crtc_y & 0x3fff) << 16 | (state->crtc_x & 0x3fff); + value = (plane->state->crtc_y & 0x3fff) << 16 | + (plane->state->crtc_x & 0x3fff); tegra_dc_writel(dc, value, DC_DISP_CURSOR_POSITION); } @@ -2017,7 +2017,7 @@ static int tegra_dc_init(struct host1x_client *client) dev_warn(dc->dev, "failed to allocate syncpoint\n"); err = host1x_client_iommu_attach(client); - if (err < 0) { + if (err < 0 && err != -ENODEV) { dev_err(client->dev, "failed to attach to domain: %d\n", err); return err; } diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index 56e5e7a5c108..f455ce71e85d 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -920,10 +920,8 @@ int host1x_client_iommu_attach(struct host1x_client *client) if (tegra->domain) { group = iommu_group_get(client->dev); - if (!group) { - dev_err(client->dev, "failed to get IOMMU group\n"); + if (!group) return -ENODEV; - } if (domain != tegra->domain) { err = iommu_attach_group(tegra->domain, group); @@ -1243,6 +1241,9 @@ static int host1x_drm_remove(struct host1x_device *dev) drm_atomic_helper_shutdown(drm); drm_mode_config_cleanup(drm); + if (tegra->hub) + tegra_display_hub_cleanup(tegra->hub); + err = host1x_device_exit(dev); if (err < 0) dev_err(&dev->dev, "host1x device cleanup failed: %d\n", err); diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c index 746dae32c484..bc15b430156d 100644 --- a/drivers/gpu/drm/tegra/gem.c +++ b/drivers/gpu/drm/tegra/gem.c @@ -27,6 +27,29 @@ static void tegra_bo_put(struct host1x_bo *bo) drm_gem_object_put_unlocked(&obj->gem); } +/* XXX move this into lib/scatterlist.c? */ +static int sg_alloc_table_from_sg(struct sg_table *sgt, struct scatterlist *sg, + unsigned int nents, gfp_t gfp_mask) +{ + struct scatterlist *dst; + unsigned int i; + int err; + + err = sg_alloc_table(sgt, nents, gfp_mask); + if (err < 0) + return err; + + dst = sgt->sgl; + + for (i = 0; i < nents; i++) { + sg_set_page(dst, sg_page(sg), sg->length, 0); + dst = sg_next(dst); + sg = sg_next(sg); + } + + return 0; +} + static struct sg_table *tegra_bo_pin(struct device *dev, struct host1x_bo *bo, dma_addr_t *phys) { @@ -52,11 +75,31 @@ static struct sg_table *tegra_bo_pin(struct device *dev, struct host1x_bo *bo, return ERR_PTR(-ENOMEM); if (obj->pages) { + /* + * If the buffer object was allocated from the explicit IOMMU + * API code paths, construct an SG table from the pages. + */ err = sg_alloc_table_from_pages(sgt, obj->pages, obj->num_pages, 0, obj->gem.size, GFP_KERNEL); if (err < 0) goto free; + } else if (obj->sgt) { + /* + * If the buffer object already has an SG table but no pages + * were allocated for it, it means the buffer was imported and + * the SG table needs to be copied to avoid overwriting any + * other potential users of the original SG table. + */ + err = sg_alloc_table_from_sg(sgt, obj->sgt->sgl, obj->sgt->nents, + GFP_KERNEL); + if (err < 0) + goto free; } else { + /* + * If the buffer object had no pages allocated and if it was + * not imported, it had to be allocated with the DMA API, so + * the DMA API helper can be used. + */ err = dma_get_sgtable(dev, sgt, obj->vaddr, obj->iova, obj->gem.size); if (err < 0) @@ -397,13 +440,6 @@ static struct tegra_bo *tegra_bo_import(struct drm_device *drm, err = tegra_bo_iommu_map(tegra, bo); if (err < 0) goto detach; - } else { - if (bo->sgt->nents > 1) { - err = -EINVAL; - goto detach; - } - - bo->iova = sg_dma_address(bo->sgt->sgl); } bo->gem.import_attach = attach; diff --git a/drivers/gpu/drm/tegra/hub.c b/drivers/gpu/drm/tegra/hub.c index 2b4082d0bc9e..47d985ac7cd7 100644 --- a/drivers/gpu/drm/tegra/hub.c +++ b/drivers/gpu/drm/tegra/hub.c @@ -605,11 +605,8 @@ static struct tegra_display_hub_state * tegra_display_hub_get_state(struct tegra_display_hub *hub, struct drm_atomic_state *state) { - struct drm_device *drm = dev_get_drvdata(hub->client.parent); struct drm_private_state *priv; - WARN_ON(!drm_modeset_is_locked(&drm->mode_config.connection_mutex)); - priv = drm_atomic_get_private_obj_state(state, &hub->base); if (IS_ERR(priv)) return ERR_CAST(priv); diff --git a/drivers/gpu/drm/tegra/plane.c b/drivers/gpu/drm/tegra/plane.c index 163b590be224..cadcdd9ea427 100644 --- a/drivers/gpu/drm/tegra/plane.c +++ b/drivers/gpu/drm/tegra/plane.c @@ -129,6 +129,17 @@ static int tegra_dc_pin(struct tegra_dc *dc, struct tegra_plane_state *state) goto unpin; } + /* + * The display controller needs contiguous memory, so + * fail if the buffer is discontiguous and we fail to + * map its SG table to a single contiguous chunk of + * I/O virtual memory. + */ + if (err > 1) { + err = -EINVAL; + goto unpin; + } + state->iova[i] = sg_dma_address(sgt->sgl); state->sgt[i] = sgt; } else { diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c index 615cb319fa8b..a68d3b36b972 100644 --- a/drivers/gpu/drm/tegra/sor.c +++ b/drivers/gpu/drm/tegra/sor.c @@ -3912,8 +3912,7 @@ static int tegra_sor_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM -static int tegra_sor_suspend(struct device *dev) +static int tegra_sor_runtime_suspend(struct device *dev) { struct tegra_sor *sor = dev_get_drvdata(dev); int err; @@ -3935,7 +3934,7 @@ static int tegra_sor_suspend(struct device *dev) return 0; } -static int tegra_sor_resume(struct device *dev) +static int tegra_sor_runtime_resume(struct device *dev) { struct tegra_sor *sor = dev_get_drvdata(dev); int err; @@ -3967,10 +3966,39 @@ static int tegra_sor_resume(struct device *dev) return 0; } -#endif + +static int tegra_sor_suspend(struct device *dev) +{ + struct tegra_sor *sor = dev_get_drvdata(dev); + int err; + + if (sor->hdmi_supply) { + err = regulator_disable(sor->hdmi_supply); + if (err < 0) + return err; + } + + return 0; +} + +static int tegra_sor_resume(struct device *dev) +{ + struct tegra_sor *sor = dev_get_drvdata(dev); + int err; + + if (sor->hdmi_supply) { + err = regulator_enable(sor->hdmi_supply); + if (err < 0) + return err; + } + + return 0; +} static const struct dev_pm_ops tegra_sor_pm_ops = { - SET_RUNTIME_PM_OPS(tegra_sor_suspend, tegra_sor_resume, NULL) + SET_RUNTIME_PM_OPS(tegra_sor_runtime_suspend, tegra_sor_runtime_resume, + NULL) + SET_SYSTEM_SLEEP_PM_OPS(tegra_sor_suspend, tegra_sor_resume) }; struct platform_driver tegra_sor_driver = { diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c index 9444ba183990..3526c2892ddb 100644 --- a/drivers/gpu/drm/tegra/vic.c +++ b/drivers/gpu/drm/tegra/vic.c @@ -167,7 +167,7 @@ static int vic_init(struct host1x_client *client) int err; err = host1x_client_iommu_attach(client); - if (err < 0) { + if (err < 0 && err != -ENODEV) { dev_err(vic->dev, "failed to attach to domain: %d\n", err); return err; } @@ -386,13 +386,14 @@ static const struct vic_config vic_t194_config = { .supports_sid = true, }; -static const struct of_device_id vic_match[] = { +static const struct of_device_id tegra_vic_of_match[] = { { .compatible = "nvidia,tegra124-vic", .data = &vic_t124_config }, { .compatible = "nvidia,tegra210-vic", .data = &vic_t210_config }, { .compatible = "nvidia,tegra186-vic", .data = &vic_t186_config }, { .compatible = "nvidia,tegra194-vic", .data = &vic_t194_config }, { }, }; +MODULE_DEVICE_TABLE(of, tegra_vic_of_match); static int vic_probe(struct platform_device *pdev) { @@ -516,7 +517,7 @@ static const struct dev_pm_ops vic_pm_ops = { struct platform_driver tegra_vic_driver = { .driver = { .name = "tegra-vic", - .of_match_table = vic_match, + .of_match_table = tegra_vic_of_match, .pm = &vic_pm_ops }, .probe = vic_probe, |