From 2858c00d2823c83acce2a1175dbabb2cebee8678 Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 1 Aug 2013 17:34:07 +0200 Subject: drm/radeon: fix halting UVD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Removing the clock/power or resetting the VCPU can cause hangs if that happens in the middle of a register write. Stall the memory and register bus before putting the VCPU into reset. Keep it in reset when unloading the module or suspending. Signed-off-by: Christian König Cc: stable@vger.kernel.org Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/cik.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/radeon/cik.c') diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 6dacec4e2090..524db70aaf6e 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -6194,7 +6194,7 @@ int cik_suspend(struct radeon_device *rdev) radeon_vm_manager_fini(rdev); cik_cp_enable(rdev, false); cik_sdma_enable(rdev, false); - r600_uvd_rbc_stop(rdev); + r600_uvd_stop(rdev); radeon_uvd_suspend(rdev); cik_irq_suspend(rdev); radeon_wb_disable(rdev); @@ -6358,6 +6358,7 @@ void cik_fini(struct radeon_device *rdev) radeon_vm_manager_fini(rdev); radeon_ib_pool_fini(rdev); radeon_irq_kms_fini(rdev); + r600_uvd_stop(rdev); radeon_uvd_fini(rdev); cik_pcie_gart_fini(rdev); r600_vram_scratch_fini(rdev); -- cgit v1.2.3 From 6fab3febf6d949b0a12b1e4e73db38e4a177a79e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sun, 4 Aug 2013 12:13:17 -0400 Subject: drm/radeon: always program the MC on startup For r6xx+ asics. This mirrors the behavior of pre-r6xx asics. We need to program the MC even if something else in startup() fails. Failure to do so results in an unusable GPU. Based on a fix from: Mark Kettenis Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/cik.c | 3 ++- drivers/gpu/drm/radeon/evergreen.c | 3 ++- drivers/gpu/drm/radeon/ni.c | 3 ++- drivers/gpu/drm/radeon/r600.c | 3 ++- drivers/gpu/drm/radeon/rv770.c | 3 ++- drivers/gpu/drm/radeon/si.c | 3 ++- 6 files changed, 12 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/radeon/cik.c') diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 524db70aaf6e..5b587876c7f5 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -5954,6 +5954,8 @@ static int cik_startup(struct radeon_device *rdev) struct radeon_ring *ring; int r; + cik_mc_program(rdev); + if (rdev->flags & RADEON_IS_IGP) { if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw || !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) { @@ -5985,7 +5987,6 @@ static int cik_startup(struct radeon_device *rdev) if (r) return r; - cik_mc_program(rdev); r = cik_pcie_gart_enable(rdev); if (r) return r; diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 5b98e573d60a..d5b49e33315e 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -5106,6 +5106,8 @@ static int evergreen_startup(struct radeon_device *rdev) /* enable aspm */ evergreen_program_aspm(rdev); + evergreen_mc_program(rdev); + if (ASIC_IS_DCE5(rdev)) { if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw || !rdev->mc_fw) { r = ni_init_microcode(rdev); @@ -5133,7 +5135,6 @@ static int evergreen_startup(struct radeon_device *rdev) if (r) return r; - evergreen_mc_program(rdev); if (rdev->flags & RADEON_IS_AGP) { evergreen_agp_enable(rdev); } else { diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 16e91b08bf57..12cebe46e34f 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -2079,6 +2079,8 @@ static int cayman_startup(struct radeon_device *rdev) /* enable aspm */ evergreen_program_aspm(rdev); + evergreen_mc_program(rdev); + if (rdev->flags & RADEON_IS_IGP) { if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw) { r = ni_init_microcode(rdev); @@ -2107,7 +2109,6 @@ static int cayman_startup(struct radeon_device *rdev) if (r) return r; - evergreen_mc_program(rdev); r = cayman_pcie_gart_enable(rdev); if (r) return r; diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 0a9553abec54..af848045670c 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -3330,6 +3330,8 @@ static int r600_startup(struct radeon_device *rdev) /* enable pcie gen2 link */ r600_pcie_gen2_enable(rdev); + r600_mc_program(rdev); + if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw) { r = r600_init_microcode(rdev); if (r) { @@ -3342,7 +3344,6 @@ static int r600_startup(struct radeon_device *rdev) if (r) return r; - r600_mc_program(rdev); if (rdev->flags & RADEON_IS_AGP) { r600_agp_enable(rdev); } else { diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index f1010131bac0..3cc08a4d99d9 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -1829,6 +1829,8 @@ static int rv770_startup(struct radeon_device *rdev) /* enable pcie gen2 link */ rv770_pcie_gen2_enable(rdev); + rv770_mc_program(rdev); + if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw) { r = r600_init_microcode(rdev); if (r) { @@ -1841,7 +1843,6 @@ static int rv770_startup(struct radeon_device *rdev) if (r) return r; - rv770_mc_program(rdev); if (rdev->flags & RADEON_IS_AGP) { rv770_agp_enable(rdev); } else { diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 242c1ac83e23..6a2dca4acfc1 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -6418,6 +6418,8 @@ static int si_startup(struct radeon_device *rdev) /* enable aspm */ si_program_aspm(rdev); + si_mc_program(rdev); + if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw || !rdev->rlc_fw || !rdev->mc_fw) { r = si_init_microcode(rdev); @@ -6437,7 +6439,6 @@ static int si_startup(struct radeon_device *rdev) if (r) return r; - si_mc_program(rdev); r = si_pcie_gart_enable(rdev); if (r) return r; -- cgit v1.2.3 From 4ad9c1c774c2af152283f510062094e768876f55 Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 5 Aug 2013 14:10:55 +0200 Subject: drm/radeon: only save UVD bo when we have open handles MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise just reinitialize from scratch on resume, and so make it more likely to succeed. Signed-off-by: Christian König Cc: stable@vger.kernel.org Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/cik.c | 2 +- drivers/gpu/drm/radeon/radeon.h | 2 +- drivers/gpu/drm/radeon/radeon_fence.c | 2 +- drivers/gpu/drm/radeon/radeon_uvd.c | 46 +++++++++++++++++++++++++---------- drivers/gpu/drm/radeon/rv770.c | 2 +- 5 files changed, 37 insertions(+), 17 deletions(-) (limited to 'drivers/gpu/drm/radeon/cik.c') diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 5b587876c7f5..58136f20c060 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -6980,7 +6980,7 @@ int cik_uvd_resume(struct radeon_device *rdev) /* programm the VCPU memory controller bits 0-27 */ addr = rdev->uvd.gpu_addr >> 3; - size = RADEON_GPU_PAGE_ALIGN(rdev->uvd.fw_size + 4) >> 3; + size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3; WREG32(UVD_VCPU_CACHE_OFFSET0, addr); WREG32(UVD_VCPU_CACHE_SIZE0, size); diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 2f08219c39b6..76dbe8e9b5c8 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1468,7 +1468,6 @@ struct radeon_uvd { void *cpu_addr; uint64_t gpu_addr; void *saved_bo; - unsigned fw_size; atomic_t handles[RADEON_MAX_UVD_HANDLES]; struct drm_file *filp[RADEON_MAX_UVD_HANDLES]; struct delayed_work idle_work; @@ -2066,6 +2065,7 @@ struct radeon_device { const struct firmware *mec_fw; /* CIK MEC firmware */ const struct firmware *sdma_fw; /* CIK SDMA firmware */ const struct firmware *smc_fw; /* SMC firmware */ + const struct firmware *uvd_fw; /* UVD firmware */ struct r600_blit r600_blit; struct r600_vram_scratch vram_scratch; int msi_enabled; /* msi enabled */ diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 7ddb0efe2408..ddb8f8e04eb5 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -782,7 +782,7 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring) } else { /* put fence directly behind firmware */ - index = ALIGN(rdev->uvd.fw_size, 8); + index = ALIGN(rdev->uvd_fw->size, 8); rdev->fence_drv[ring].cpu_addr = rdev->uvd.cpu_addr + index; rdev->fence_drv[ring].gpu_addr = rdev->uvd.gpu_addr + index; } diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c index 414fd145d20e..ca0d7358ed33 100644 --- a/drivers/gpu/drm/radeon/radeon_uvd.c +++ b/drivers/gpu/drm/radeon/radeon_uvd.c @@ -56,7 +56,6 @@ static void radeon_uvd_idle_work_handler(struct work_struct *work); int radeon_uvd_init(struct radeon_device *rdev) { - const struct firmware *fw; unsigned long bo_size; const char *fw_name; int i, r; @@ -105,14 +104,14 @@ int radeon_uvd_init(struct radeon_device *rdev) return -EINVAL; } - r = request_firmware(&fw, fw_name, rdev->dev); + r = request_firmware(&rdev->uvd_fw, fw_name, rdev->dev); if (r) { dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n", fw_name); return r; } - bo_size = RADEON_GPU_PAGE_ALIGN(fw->size + 8) + + bo_size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 8) + RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE; r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->uvd.vcpu_bo); @@ -145,12 +144,6 @@ int radeon_uvd_init(struct radeon_device *rdev) radeon_bo_unreserve(rdev->uvd.vcpu_bo); - rdev->uvd.fw_size = fw->size; - memset(rdev->uvd.cpu_addr, 0, bo_size); - memcpy(rdev->uvd.cpu_addr, fw->data, fw->size); - - release_firmware(fw); - for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { atomic_set(&rdev->uvd.handles[i], 0); rdev->uvd.filp[i] = NULL; @@ -174,33 +167,60 @@ void radeon_uvd_fini(struct radeon_device *rdev) } radeon_bo_unref(&rdev->uvd.vcpu_bo); + + release_firmware(rdev->uvd_fw); } int radeon_uvd_suspend(struct radeon_device *rdev) { unsigned size; + void *ptr; + int i; if (rdev->uvd.vcpu_bo == NULL) return 0; + for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) + if (atomic_read(&rdev->uvd.handles[i])) + break; + + if (i == RADEON_MAX_UVD_HANDLES) + return 0; + size = radeon_bo_size(rdev->uvd.vcpu_bo); + size -= rdev->uvd_fw->size; + + ptr = rdev->uvd.cpu_addr; + ptr += rdev->uvd_fw->size; + rdev->uvd.saved_bo = kmalloc(size, GFP_KERNEL); - memcpy(rdev->uvd.saved_bo, rdev->uvd.cpu_addr, size); + memcpy(rdev->uvd.saved_bo, ptr, size); return 0; } int radeon_uvd_resume(struct radeon_device *rdev) { + unsigned size; + void *ptr; + if (rdev->uvd.vcpu_bo == NULL) return -EINVAL; + memcpy(rdev->uvd.cpu_addr, rdev->uvd_fw->data, rdev->uvd_fw->size); + + size = radeon_bo_size(rdev->uvd.vcpu_bo); + size -= rdev->uvd_fw->size; + + ptr = rdev->uvd.cpu_addr; + ptr += rdev->uvd_fw->size; + if (rdev->uvd.saved_bo != NULL) { - unsigned size = radeon_bo_size(rdev->uvd.vcpu_bo); - memcpy(rdev->uvd.cpu_addr, rdev->uvd.saved_bo, size); + memcpy(ptr, rdev->uvd.saved_bo, size); kfree(rdev->uvd.saved_bo); rdev->uvd.saved_bo = NULL; - } + } else + memset(ptr, 0, size); return 0; } diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index 3cc08a4d99d9..bcc68ec204ad 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -813,7 +813,7 @@ int rv770_uvd_resume(struct radeon_device *rdev) /* programm the VCPU memory controller bits 0-27 */ addr = rdev->uvd.gpu_addr >> 3; - size = RADEON_GPU_PAGE_ALIGN(rdev->uvd.fw_size + 4) >> 3; + size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3; WREG32(UVD_VCPU_CACHE_OFFSET0, addr); WREG32(UVD_VCPU_CACHE_SIZE0, size); -- cgit v1.2.3 From f61d5b46771a352fad1ac7f99008ef52a7ffcb72 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 6 Aug 2013 12:40:16 -0400 Subject: drm/radeon/cik: use a mutex to properly lock srbm instanced registers We need proper locking in the driver when accessing instanced registers on CIK. Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/cik.c | 10 ++++++++++ drivers/gpu/drm/radeon/radeon.h | 2 ++ drivers/gpu/drm/radeon/radeon_device.c | 1 + 3 files changed, 13 insertions(+) (limited to 'drivers/gpu/drm/radeon/cik.c') diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 58136f20c060..8928bd109c16 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -2587,9 +2587,11 @@ u32 cik_compute_ring_get_rptr(struct radeon_device *rdev, if (rdev->wb.enabled) { rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]); } else { + mutex_lock(&rdev->srbm_mutex); cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0); rptr = RREG32(CP_HQD_PQ_RPTR); cik_srbm_select(rdev, 0, 0, 0, 0); + mutex_unlock(&rdev->srbm_mutex); } rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift; @@ -2604,9 +2606,11 @@ u32 cik_compute_ring_get_wptr(struct radeon_device *rdev, if (rdev->wb.enabled) { wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]); } else { + mutex_lock(&rdev->srbm_mutex); cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0); wptr = RREG32(CP_HQD_PQ_WPTR); cik_srbm_select(rdev, 0, 0, 0, 0); + mutex_unlock(&rdev->srbm_mutex); } wptr = (wptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift; @@ -2897,6 +2901,7 @@ static int cik_cp_compute_resume(struct radeon_device *rdev) WREG32(CP_CPF_DEBUG, tmp); /* init the pipes */ + mutex_lock(&rdev->srbm_mutex); for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) { int me = (i < 4) ? 1 : 2; int pipe = (i < 4) ? i : (i - 4); @@ -2919,6 +2924,7 @@ static int cik_cp_compute_resume(struct radeon_device *rdev) WREG32(CP_HPD_EOP_CONTROL, tmp); } cik_srbm_select(rdev, 0, 0, 0, 0); + mutex_unlock(&rdev->srbm_mutex); /* init the queues. Just two for now. */ for (i = 0; i < 2; i++) { @@ -2972,6 +2978,7 @@ static int cik_cp_compute_resume(struct radeon_device *rdev) mqd->static_thread_mgmt23[0] = 0xffffffff; mqd->static_thread_mgmt23[1] = 0xffffffff; + mutex_lock(&rdev->srbm_mutex); cik_srbm_select(rdev, rdev->ring[idx].me, rdev->ring[idx].pipe, rdev->ring[idx].queue, 0); @@ -3099,6 +3106,7 @@ static int cik_cp_compute_resume(struct radeon_device *rdev) WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active); cik_srbm_select(rdev, 0, 0, 0, 0); + mutex_unlock(&rdev->srbm_mutex); radeon_bo_kunmap(rdev->ring[idx].mqd_obj); radeon_bo_unreserve(rdev->ring[idx].mqd_obj); @@ -4320,6 +4328,7 @@ static int cik_pcie_gart_enable(struct radeon_device *rdev) /* XXX SH_MEM regs */ /* where to put LDS, scratch, GPUVM in FSA64 space */ + mutex_lock(&rdev->srbm_mutex); for (i = 0; i < 16; i++) { cik_srbm_select(rdev, 0, 0, 0, i); /* CP and shaders */ @@ -4335,6 +4344,7 @@ static int cik_pcie_gart_enable(struct radeon_device *rdev) /* XXX SDMA RLC - todo */ } cik_srbm_select(rdev, 0, 0, 0, 0); + mutex_unlock(&rdev->srbm_mutex); cik_pcie_gart_tlb_flush(rdev); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 76dbe8e9b5c8..274b8e1b889f 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -2095,6 +2095,8 @@ struct radeon_device { /* ACPI interface */ struct radeon_atif atif; struct radeon_atcs atcs; + /* srbm instance registers */ + struct mutex srbm_mutex; }; int radeon_device_init(struct radeon_device *rdev, diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 84dd2dcbcf69..63398ae1dbf5 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1163,6 +1163,7 @@ int radeon_device_init(struct radeon_device *rdev, mutex_init(&rdev->gem.mutex); mutex_init(&rdev->pm.mutex); mutex_init(&rdev->gpu_clock_mutex); + mutex_init(&rdev->srbm_mutex); init_rwsem(&rdev->pm.mclk_lock); init_rwsem(&rdev->exclusive_lock); init_waitqueue_head(&rdev->irq.vblank_queue); -- cgit v1.2.3 From 58ea2deab36ecf0b416d3486442cc6df693dcc79 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 24 Jan 2013 10:03:39 -0500 Subject: drm/radeon/kms: fix up dce8 display watermark calc for dpm Calculate the low and high watermarks based on the low and high clocks for the current power state. The dynamic pm hw will select the appropriate watermark based on the internal dpm state. Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/cik.c | 96 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 74 insertions(+), 22 deletions(-) (limited to 'drivers/gpu/drm/radeon/cik.c') diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 8928bd109c16..42b143e75d42 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -6826,7 +6826,7 @@ static void dce8_program_watermarks(struct radeon_device *rdev, u32 lb_size, u32 num_heads) { struct drm_display_mode *mode = &radeon_crtc->base.mode; - struct dce8_wm_params wm; + struct dce8_wm_params wm_low, wm_high; u32 pixel_period; u32 line_time = 0; u32 latency_watermark_a = 0, latency_watermark_b = 0; @@ -6836,35 +6836,82 @@ static void dce8_program_watermarks(struct radeon_device *rdev, pixel_period = 1000000 / (u32)mode->clock; line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535); - wm.yclk = rdev->pm.current_mclk * 10; - wm.sclk = rdev->pm.current_sclk * 10; - wm.disp_clk = mode->clock; - wm.src_width = mode->crtc_hdisplay; - wm.active_time = mode->crtc_hdisplay * pixel_period; - wm.blank_time = line_time - wm.active_time; - wm.interlaced = false; + /* watermark for high clocks */ + if ((rdev->pm.pm_method == PM_METHOD_DPM) && + rdev->pm.dpm_enabled) { + wm_high.yclk = + radeon_dpm_get_mclk(rdev, false) * 10; + wm_high.sclk = + radeon_dpm_get_sclk(rdev, false) * 10; + } else { + wm_high.yclk = rdev->pm.current_mclk * 10; + wm_high.sclk = rdev->pm.current_sclk * 10; + } + + wm_high.disp_clk = mode->clock; + wm_high.src_width = mode->crtc_hdisplay; + wm_high.active_time = mode->crtc_hdisplay * pixel_period; + wm_high.blank_time = line_time - wm_high.active_time; + wm_high.interlaced = false; if (mode->flags & DRM_MODE_FLAG_INTERLACE) - wm.interlaced = true; - wm.vsc = radeon_crtc->vsc; - wm.vtaps = 1; + wm_high.interlaced = true; + wm_high.vsc = radeon_crtc->vsc; + wm_high.vtaps = 1; if (radeon_crtc->rmx_type != RMX_OFF) - wm.vtaps = 2; - wm.bytes_per_pixel = 4; /* XXX: get this from fb config */ - wm.lb_size = lb_size; - wm.dram_channels = cik_get_number_of_dram_channels(rdev); - wm.num_heads = num_heads; + wm_high.vtaps = 2; + wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */ + wm_high.lb_size = lb_size; + wm_high.dram_channels = cik_get_number_of_dram_channels(rdev); + wm_high.num_heads = num_heads; /* set for high clocks */ - latency_watermark_a = min(dce8_latency_watermark(&wm), (u32)65535); + latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535); + + /* possibly force display priority to high */ + /* should really do this at mode validation time... */ + if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) || + !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) || + !dce8_check_latency_hiding(&wm_high) || + (rdev->disp_priority == 2)) { + DRM_DEBUG_KMS("force priority to high\n"); + } + + /* watermark for low clocks */ + if ((rdev->pm.pm_method == PM_METHOD_DPM) && + rdev->pm.dpm_enabled) { + wm_low.yclk = + radeon_dpm_get_mclk(rdev, true) * 10; + wm_low.sclk = + radeon_dpm_get_sclk(rdev, true) * 10; + } else { + wm_low.yclk = rdev->pm.current_mclk * 10; + wm_low.sclk = rdev->pm.current_sclk * 10; + } + + wm_low.disp_clk = mode->clock; + wm_low.src_width = mode->crtc_hdisplay; + wm_low.active_time = mode->crtc_hdisplay * pixel_period; + wm_low.blank_time = line_time - wm_low.active_time; + wm_low.interlaced = false; + if (mode->flags & DRM_MODE_FLAG_INTERLACE) + wm_low.interlaced = true; + wm_low.vsc = radeon_crtc->vsc; + wm_low.vtaps = 1; + if (radeon_crtc->rmx_type != RMX_OFF) + wm_low.vtaps = 2; + wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */ + wm_low.lb_size = lb_size; + wm_low.dram_channels = cik_get_number_of_dram_channels(rdev); + wm_low.num_heads = num_heads; + /* set for low clocks */ - /* wm.yclk = low clk; wm.sclk = low clk */ - latency_watermark_b = min(dce8_latency_watermark(&wm), (u32)65535); + latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535); /* possibly force display priority to high */ /* should really do this at mode validation time... */ - if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm) || - !dce8_average_bandwidth_vs_available_bandwidth(&wm) || - !dce8_check_latency_hiding(&wm) || + if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) || + !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) || + !dce8_check_latency_hiding(&wm_low) || (rdev->disp_priority == 2)) { DRM_DEBUG_KMS("force priority to high\n"); } @@ -6889,6 +6936,11 @@ static void dce8_program_watermarks(struct radeon_device *rdev, LATENCY_HIGH_WATERMARK(line_time))); /* restore original selection */ WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask); + + /* save values for DPM */ + radeon_crtc->line_time = line_time; + radeon_crtc->wm_high = latency_watermark_a; + radeon_crtc->wm_low = latency_watermark_b; } /** -- cgit v1.2.3 From 8c68e3938863460b6c224a3871d5d1ac8bb4b09f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 21 Jun 2013 15:38:37 -0400 Subject: drm/radeon: switch CIK to use radeon_ucode.h Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/cik.c | 17 +---------------- drivers/gpu/drm/radeon/radeon_ucode.h | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 16 deletions(-) (limited to 'drivers/gpu/drm/radeon/cik.c') diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 42b143e75d42..2ceb9003206c 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -30,22 +30,7 @@ #include "cikd.h" #include "atom.h" #include "cik_blit_shaders.h" - -/* GFX */ -#define CIK_PFP_UCODE_SIZE 2144 -#define CIK_ME_UCODE_SIZE 2144 -#define CIK_CE_UCODE_SIZE 2144 -/* compute */ -#define CIK_MEC_UCODE_SIZE 4192 -/* interrupts */ -#define BONAIRE_RLC_UCODE_SIZE 2048 -#define KB_RLC_UCODE_SIZE 2560 -#define KV_RLC_UCODE_SIZE 2560 -/* gddr controller */ -#define CIK_MC_UCODE_SIZE 7866 -/* sdma */ -#define CIK_SDMA_UCODE_SIZE 1050 -#define CIK_SDMA_UCODE_VERSION 64 +#include "radeon_ucode.h" MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin"); MODULE_FIRMWARE("radeon/BONAIRE_me.bin"); diff --git a/drivers/gpu/drm/radeon/radeon_ucode.h b/drivers/gpu/drm/radeon/radeon_ucode.h index d8b05f7bcf1a..fad27c051bbf 100644 --- a/drivers/gpu/drm/radeon/radeon_ucode.h +++ b/drivers/gpu/drm/radeon/radeon_ucode.h @@ -35,6 +35,12 @@ #define SI_PFP_UCODE_SIZE 2144 #define SI_PM4_UCODE_SIZE 2144 #define SI_CE_UCODE_SIZE 2144 +#define CIK_PFP_UCODE_SIZE 2144 +#define CIK_ME_UCODE_SIZE 2144 +#define CIK_CE_UCODE_SIZE 2144 + +/* MEC */ +#define CIK_MEC_UCODE_SIZE 4192 /* RLC */ #define R600_RLC_UCODE_SIZE 768 @@ -43,12 +49,20 @@ #define CAYMAN_RLC_UCODE_SIZE 1024 #define ARUBA_RLC_UCODE_SIZE 1536 #define SI_RLC_UCODE_SIZE 2048 +#define BONAIRE_RLC_UCODE_SIZE 2048 +#define KB_RLC_UCODE_SIZE 2560 +#define KV_RLC_UCODE_SIZE 2560 /* MC */ #define BTC_MC_UCODE_SIZE 6024 #define CAYMAN_MC_UCODE_SIZE 6037 #define SI_MC_UCODE_SIZE 7769 #define OLAND_MC_UCODE_SIZE 7863 +#define CIK_MC_UCODE_SIZE 7866 + +/* SDMA */ +#define CIK_SDMA_UCODE_SIZE 1050 +#define CIK_SDMA_UCODE_VERSION 64 /* SMC */ #define RV770_SMC_UCODE_START 0x0100 -- cgit v1.2.3 From 8a7cd27679d0451c7cf072af70acce51d15c446d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 6 Aug 2013 11:29:39 -0400 Subject: drm/radeon/cik: add support for pcie gen1/2/3 switching Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/cik.c | 161 ++++++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/cikd.h | 57 +++++++++++++++ 2 files changed, 218 insertions(+) (limited to 'drivers/gpu/drm/radeon/cik.c') diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 2ceb9003206c..a30fb32c2301 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -61,6 +61,7 @@ extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *m extern void si_rlc_fini(struct radeon_device *rdev); extern int si_rlc_init(struct radeon_device *rdev); static void cik_rlc_stop(struct radeon_device *rdev); +static void cik_pcie_gen3_enable(struct radeon_device *rdev); /* * Indirect registers accessor @@ -5949,6 +5950,9 @@ static int cik_startup(struct radeon_device *rdev) struct radeon_ring *ring; int r; + /* enable pcie gen2/3 link */ + cik_pcie_gen3_enable(rdev); + cik_mc_program(rdev); if (rdev->flags & RADEON_IS_IGP) { @@ -7051,3 +7055,160 @@ int cik_uvd_resume(struct radeon_device *rdev) return 0; } + +static void cik_pcie_gen3_enable(struct radeon_device *rdev) +{ + struct pci_dev *root = rdev->pdev->bus->self; + int bridge_pos, gpu_pos; + u32 speed_cntl, mask, current_data_rate; + int ret, i; + u16 tmp16; + + if (radeon_pcie_gen2 == 0) + return; + + if (rdev->flags & RADEON_IS_IGP) + return; + + if (!(rdev->flags & RADEON_IS_PCIE)) + return; + + ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask); + if (ret != 0) + return; + + if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80))) + return; + + speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL); + current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >> + LC_CURRENT_DATA_RATE_SHIFT; + if (mask & DRM_PCIE_SPEED_80) { + if (current_data_rate == 2) { + DRM_INFO("PCIE gen 3 link speeds already enabled\n"); + return; + } + DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n"); + } else if (mask & DRM_PCIE_SPEED_50) { + if (current_data_rate == 1) { + DRM_INFO("PCIE gen 2 link speeds already enabled\n"); + return; + } + DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n"); + } + + bridge_pos = pci_pcie_cap(root); + if (!bridge_pos) + return; + + gpu_pos = pci_pcie_cap(rdev->pdev); + if (!gpu_pos) + return; + + if (mask & DRM_PCIE_SPEED_80) { + /* re-try equalization if gen3 is not already enabled */ + if (current_data_rate != 2) { + u16 bridge_cfg, gpu_cfg; + u16 bridge_cfg2, gpu_cfg2; + u32 max_lw, current_lw, tmp; + + pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg); + pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg); + + tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD; + pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16); + + tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD; + pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16); + + tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1); + max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT; + current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT; + + if (current_lw < max_lw) { + tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL); + if (tmp & LC_RENEGOTIATION_SUPPORT) { + tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS); + tmp |= (max_lw << LC_LINK_WIDTH_SHIFT); + tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW; + WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp); + } + } + + for (i = 0; i < 10; i++) { + /* check status */ + pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16); + if (tmp16 & PCI_EXP_DEVSTA_TRPND) + break; + + pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg); + pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg); + + pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2); + pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2); + + tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4); + tmp |= LC_SET_QUIESCE; + WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp); + + tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4); + tmp |= LC_REDO_EQ; + WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp); + + mdelay(100); + + /* linkctl */ + pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16); + tmp16 &= ~PCI_EXP_LNKCTL_HAWD; + tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD); + pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16); + + pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16); + tmp16 &= ~PCI_EXP_LNKCTL_HAWD; + tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD); + pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16); + + /* linkctl2 */ + pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16); + tmp16 &= ~((1 << 4) | (7 << 9)); + tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9))); + pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16); + + pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16); + tmp16 &= ~((1 << 4) | (7 << 9)); + tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9))); + pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16); + + tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4); + tmp &= ~LC_SET_QUIESCE; + WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp); + } + } + } + + /* set the link speed */ + speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE; + speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE; + WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl); + + pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16); + tmp16 &= ~0xf; + if (mask & DRM_PCIE_SPEED_80) + tmp16 |= 3; /* gen3 */ + else if (mask & DRM_PCIE_SPEED_50) + tmp16 |= 2; /* gen2 */ + else + tmp16 |= 1; /* gen1 */ + pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16); + + speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL); + speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE; + WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl); + + for (i = 0; i < rdev->usec_timeout; i++) { + speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL); + if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0) + break; + udelay(1); + } +} diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h index 7e9275eaef80..a1f376e6dd31 100644 --- a/drivers/gpu/drm/radeon/cikd.h +++ b/drivers/gpu/drm/radeon/cikd.h @@ -35,6 +35,63 @@ #define CG_CLKPIN_CNTL 0xC05001A0 # define XTALIN_DIVIDE (1 << 1) +/* PCIE registers idx/data 0x38/0x3c */ +#define PCIE_LC_STATUS1 0x1400028 /* PCIE */ +# define LC_REVERSE_RCVR (1 << 0) +# define LC_REVERSE_XMIT (1 << 1) +# define LC_OPERATING_LINK_WIDTH_MASK (0x7 << 2) +# define LC_OPERATING_LINK_WIDTH_SHIFT 2 +# define LC_DETECTED_LINK_WIDTH_MASK (0x7 << 5) +# define LC_DETECTED_LINK_WIDTH_SHIFT 5 + +#define PCIE_LC_LINK_WIDTH_CNTL 0x100100A2 /* PCIE */ +# define LC_LINK_WIDTH_SHIFT 0 +# define LC_LINK_WIDTH_MASK 0x7 +# define LC_LINK_WIDTH_X0 0 +# define LC_LINK_WIDTH_X1 1 +# define LC_LINK_WIDTH_X2 2 +# define LC_LINK_WIDTH_X4 3 +# define LC_LINK_WIDTH_X8 4 +# define LC_LINK_WIDTH_X16 6 +# define LC_LINK_WIDTH_RD_SHIFT 4 +# define LC_LINK_WIDTH_RD_MASK 0x70 +# define LC_RECONFIG_ARC_MISSING_ESCAPE (1 << 7) +# define LC_RECONFIG_NOW (1 << 8) +# define LC_RENEGOTIATION_SUPPORT (1 << 9) +# define LC_RENEGOTIATE_EN (1 << 10) +# define LC_SHORT_RECONFIG_EN (1 << 11) +# define LC_UPCONFIGURE_SUPPORT (1 << 12) +# define LC_UPCONFIGURE_DIS (1 << 13) +# define LC_DYN_LANES_PWR_STATE(x) ((x) << 21) +# define LC_DYN_LANES_PWR_STATE_MASK (0x3 << 21) +# define LC_DYN_LANES_PWR_STATE_SHIFT 21 + +#define PCIE_LC_SPEED_CNTL 0x100100A4 /* PCIE */ +# define LC_GEN2_EN_STRAP (1 << 0) +# define LC_GEN3_EN_STRAP (1 << 1) +# define LC_TARGET_LINK_SPEED_OVERRIDE_EN (1 << 2) +# define LC_TARGET_LINK_SPEED_OVERRIDE_MASK (0x3 << 3) +# define LC_TARGET_LINK_SPEED_OVERRIDE_SHIFT 3 +# define LC_FORCE_EN_SW_SPEED_CHANGE (1 << 5) +# define LC_FORCE_DIS_SW_SPEED_CHANGE (1 << 6) +# define LC_FORCE_EN_HW_SPEED_CHANGE (1 << 7) +# define LC_FORCE_DIS_HW_SPEED_CHANGE (1 << 8) +# define LC_INITIATE_LINK_SPEED_CHANGE (1 << 9) +# define LC_SPEED_CHANGE_ATTEMPTS_ALLOWED_MASK (0x3 << 10) +# define LC_SPEED_CHANGE_ATTEMPTS_ALLOWED_SHIFT 10 +# define LC_CURRENT_DATA_RATE_MASK (0x3 << 13) /* 0/1/2 = gen1/2/3 */ +# define LC_CURRENT_DATA_RATE_SHIFT 13 +# define LC_CLR_FAILED_SPD_CHANGE_CNT (1 << 16) +# define LC_OTHER_SIDE_EVER_SENT_GEN2 (1 << 18) +# define LC_OTHER_SIDE_SUPPORTS_GEN2 (1 << 19) +# define LC_OTHER_SIDE_EVER_SENT_GEN3 (1 << 20) +# define LC_OTHER_SIDE_SUPPORTS_GEN3 (1 << 21) + +#define PCIE_LC_CNTL4 0x100100B6 /* PCIE */ +# define LC_REDO_EQ (1 << 5) +# define LC_SET_QUIESCE (1 << 13) + +/* direct registers */ #define PCIE_INDEX 0x38 #define PCIE_DATA 0x3C -- cgit v1.2.3 From 7235711a43b6839f5759327d003fa334c4a703f2 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 4 Apr 2013 13:58:09 -0400 Subject: drm/radeon: add support for ASPM on CIK asics Enables PCIE ASPM (Active State Power Management) on CIK asics. Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/cik.c | 151 ++++++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/cikd.h | 72 +++++++++++++++++++- 2 files changed, 222 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/radeon/cik.c') diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index a30fb32c2301..0da9d6715ef5 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -62,6 +62,7 @@ extern void si_rlc_fini(struct radeon_device *rdev); extern int si_rlc_init(struct radeon_device *rdev); static void cik_rlc_stop(struct radeon_device *rdev); static void cik_pcie_gen3_enable(struct radeon_device *rdev); +static void cik_program_aspm(struct radeon_device *rdev); /* * Indirect registers accessor @@ -5952,6 +5953,8 @@ static int cik_startup(struct radeon_device *rdev) /* enable pcie gen2/3 link */ cik_pcie_gen3_enable(rdev); + /* enable aspm */ + cik_program_aspm(rdev); cik_mc_program(rdev); @@ -7212,3 +7215,151 @@ static void cik_pcie_gen3_enable(struct radeon_device *rdev) udelay(1); } } + +static void cik_program_aspm(struct radeon_device *rdev) +{ + u32 data, orig; + bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false; + bool disable_clkreq = false; + + if (radeon_aspm == 0) + return; + + /* XXX double check IGPs */ + if (rdev->flags & RADEON_IS_IGP) + return; + + if (!(rdev->flags & RADEON_IS_PCIE)) + return; + + orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL); + data &= ~LC_XMIT_N_FTS_MASK; + data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN; + if (orig != data) + WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data); + + orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3); + data |= LC_GO_TO_RECOVERY; + if (orig != data) + WREG32_PCIE_PORT(PCIE_LC_CNTL3, data); + + orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL); + data |= P_IGNORE_EDB_ERR; + if (orig != data) + WREG32_PCIE_PORT(PCIE_P_CNTL, data); + + orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL); + data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK); + data |= LC_PMI_TO_L1_DIS; + if (!disable_l0s) + data |= LC_L0S_INACTIVITY(7); + + if (!disable_l1) { + data |= LC_L1_INACTIVITY(7); + data &= ~LC_PMI_TO_L1_DIS; + if (orig != data) + WREG32_PCIE_PORT(PCIE_LC_CNTL, data); + + if (!disable_plloff_in_l1) { + bool clk_req_support; + + orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0); + data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK); + data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7); + if (orig != data) + WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data); + + orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1); + data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK); + data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7); + if (orig != data) + WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data); + + orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0); + data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK); + data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7); + if (orig != data) + WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data); + + orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1); + data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK); + data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7); + if (orig != data) + WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data); + + orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL); + data &= ~LC_DYN_LANES_PWR_STATE_MASK; + data |= LC_DYN_LANES_PWR_STATE(3); + if (orig != data) + WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data); + + if (!disable_clkreq) { + struct pci_dev *root = rdev->pdev->bus->self; + u32 lnkcap; + + clk_req_support = false; + pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap); + if (lnkcap & PCI_EXP_LNKCAP_CLKPM) + clk_req_support = true; + } else { + clk_req_support = false; + } + + if (clk_req_support) { + orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2); + data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23; + if (orig != data) + WREG32_PCIE_PORT(PCIE_LC_CNTL2, data); + + orig = data = RREG32_SMC(THM_CLK_CNTL); + data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK); + data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1); + if (orig != data) + WREG32_SMC(THM_CLK_CNTL, data); + + orig = data = RREG32_SMC(MISC_CLK_CTRL); + data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK); + data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1); + if (orig != data) + WREG32_SMC(MISC_CLK_CTRL, data); + + orig = data = RREG32_SMC(CG_CLKPIN_CNTL); + data &= ~BCLK_AS_XCLK; + if (orig != data) + WREG32_SMC(CG_CLKPIN_CNTL, data); + + orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2); + data &= ~FORCE_BIF_REFCLK_EN; + if (orig != data) + WREG32_SMC(CG_CLKPIN_CNTL_2, data); + + orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL); + data &= ~MPLL_CLKOUT_SEL_MASK; + data |= MPLL_CLKOUT_SEL(4); + if (orig != data) + WREG32_SMC(MPLL_BYPASSCLK_SEL, data); + } + } + } else { + if (orig != data) + WREG32_PCIE_PORT(PCIE_LC_CNTL, data); + } + + orig = data = RREG32_PCIE_PORT(PCIE_CNTL2); + data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN; + if (orig != data) + WREG32_PCIE_PORT(PCIE_CNTL2, data); + + if (!disable_l0s) { + data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL); + if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) { + data = RREG32_PCIE_PORT(PCIE_LC_STATUS1); + if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) { + orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL); + data &= ~LC_L0S_INACTIVITY_MASK; + if (orig != data) + WREG32_PCIE_PORT(PCIE_LC_CNTL, data); + } + } + } +} diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h index a1f376e6dd31..d50593fd9cd5 100644 --- a/drivers/gpu/drm/radeon/cikd.h +++ b/drivers/gpu/drm/radeon/cikd.h @@ -32,10 +32,53 @@ #define GENERAL_PWRMGT 0xC0200000 # define GPU_COUNTER_CLK (1 << 15) +#define MPLL_BYPASSCLK_SEL 0xC050019C +# define MPLL_CLKOUT_SEL(x) ((x) << 8) +# define MPLL_CLKOUT_SEL_MASK 0xFF00 #define CG_CLKPIN_CNTL 0xC05001A0 # define XTALIN_DIVIDE (1 << 1) +# define BCLK_AS_XCLK (1 << 2) +#define CG_CLKPIN_CNTL_2 0xC05001A4 +# define FORCE_BIF_REFCLK_EN (1 << 3) +# define MUX_TCLK_TO_XCLK (1 << 8) +#define THM_CLK_CNTL 0xC05001A8 +# define CMON_CLK_SEL(x) ((x) << 0) +# define CMON_CLK_SEL_MASK 0xFF +# define TMON_CLK_SEL(x) ((x) << 8) +# define TMON_CLK_SEL_MASK 0xFF00 +#define MISC_CLK_CTRL 0xC05001AC +# define DEEP_SLEEP_CLK_SEL(x) ((x) << 0) +# define DEEP_SLEEP_CLK_SEL_MASK 0xFF +# define ZCLK_SEL(x) ((x) << 8) +# define ZCLK_SEL_MASK 0xFF00 /* PCIE registers idx/data 0x38/0x3c */ +#define PB0_PIF_PWRDOWN_0 0x1100012 /* PCIE */ +# define PLL_POWER_STATE_IN_TXS2_0(x) ((x) << 7) +# define PLL_POWER_STATE_IN_TXS2_0_MASK (0x7 << 7) +# define PLL_POWER_STATE_IN_TXS2_0_SHIFT 7 +# define PLL_POWER_STATE_IN_OFF_0(x) ((x) << 10) +# define PLL_POWER_STATE_IN_OFF_0_MASK (0x7 << 10) +# define PLL_POWER_STATE_IN_OFF_0_SHIFT 10 +# define PLL_RAMP_UP_TIME_0(x) ((x) << 24) +# define PLL_RAMP_UP_TIME_0_MASK (0x7 << 24) +# define PLL_RAMP_UP_TIME_0_SHIFT 24 +#define PB0_PIF_PWRDOWN_1 0x1100013 /* PCIE */ +# define PLL_POWER_STATE_IN_TXS2_1(x) ((x) << 7) +# define PLL_POWER_STATE_IN_TXS2_1_MASK (0x7 << 7) +# define PLL_POWER_STATE_IN_TXS2_1_SHIFT 7 +# define PLL_POWER_STATE_IN_OFF_1(x) ((x) << 10) +# define PLL_POWER_STATE_IN_OFF_1_MASK (0x7 << 10) +# define PLL_POWER_STATE_IN_OFF_1_SHIFT 10 +# define PLL_RAMP_UP_TIME_1(x) ((x) << 24) +# define PLL_RAMP_UP_TIME_1_MASK (0x7 << 24) +# define PLL_RAMP_UP_TIME_1_SHIFT 24 + +#define PCIE_CNTL2 0x1001001c /* PCIE */ +# define SLV_MEM_LS_EN (1 << 16) +# define MST_MEM_LS_EN (1 << 18) +# define REPLAY_MEM_LS_EN (1 << 19) + #define PCIE_LC_STATUS1 0x1400028 /* PCIE */ # define LC_REVERSE_RCVR (1 << 0) # define LC_REVERSE_XMIT (1 << 1) @@ -44,6 +87,22 @@ # define LC_DETECTED_LINK_WIDTH_MASK (0x7 << 5) # define LC_DETECTED_LINK_WIDTH_SHIFT 5 +#define PCIE_P_CNTL 0x1400040 /* PCIE */ +# define P_IGNORE_EDB_ERR (1 << 6) + +#define PB1_PIF_PWRDOWN_0 0x2100012 /* PCIE */ +#define PB1_PIF_PWRDOWN_1 0x2100013 /* PCIE */ + +#define PCIE_LC_CNTL 0x100100A0 /* PCIE */ +# define LC_L0S_INACTIVITY(x) ((x) << 8) +# define LC_L0S_INACTIVITY_MASK (0xf << 8) +# define LC_L0S_INACTIVITY_SHIFT 8 +# define LC_L1_INACTIVITY(x) ((x) << 12) +# define LC_L1_INACTIVITY_MASK (0xf << 12) +# define LC_L1_INACTIVITY_SHIFT 12 +# define LC_PMI_TO_L1_DIS (1 << 16) +# define LC_ASPM_TO_L1_DIS (1 << 24) + #define PCIE_LC_LINK_WIDTH_CNTL 0x100100A2 /* PCIE */ # define LC_LINK_WIDTH_SHIFT 0 # define LC_LINK_WIDTH_MASK 0x7 @@ -65,7 +124,12 @@ # define LC_DYN_LANES_PWR_STATE(x) ((x) << 21) # define LC_DYN_LANES_PWR_STATE_MASK (0x3 << 21) # define LC_DYN_LANES_PWR_STATE_SHIFT 21 - +#define PCIE_LC_N_FTS_CNTL 0x100100a3 /* PCIE */ +# define LC_XMIT_N_FTS(x) ((x) << 0) +# define LC_XMIT_N_FTS_MASK (0xff << 0) +# define LC_XMIT_N_FTS_SHIFT 0 +# define LC_XMIT_N_FTS_OVERRIDE_EN (1 << 8) +# define LC_N_FTS_MASK (0xff << 24) #define PCIE_LC_SPEED_CNTL 0x100100A4 /* PCIE */ # define LC_GEN2_EN_STRAP (1 << 0) # define LC_GEN3_EN_STRAP (1 << 1) @@ -87,6 +151,12 @@ # define LC_OTHER_SIDE_EVER_SENT_GEN3 (1 << 20) # define LC_OTHER_SIDE_SUPPORTS_GEN3 (1 << 21) +#define PCIE_LC_CNTL2 0x100100B1 /* PCIE */ +# define LC_ALLOW_PDWN_IN_L1 (1 << 17) +# define LC_ALLOW_PDWN_IN_L23 (1 << 18) + +#define PCIE_LC_CNTL3 0x100100B5 /* PCIE */ +# define LC_GO_TO_RECOVERY (1 << 30) #define PCIE_LC_CNTL4 0x100100B6 /* PCIE */ # define LC_REDO_EQ (1 << 5) # define LC_SET_QUIESCE (1 << 13) -- cgit v1.2.3 From 866d83de0c9cc36a598252282bdedc158f50dcc2 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 15 Apr 2013 17:13:29 -0400 Subject: drm/radeon/cik: restructure rlc setup Restructure rlc setup to handle clock and power gating. Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/cik.c | 84 +++++++++++++++++++++++++++---------------- drivers/gpu/drm/radeon/cikd.h | 1 + drivers/gpu/drm/radeon/si.c | 2 +- 3 files changed, 56 insertions(+), 31 deletions(-) (limited to 'drivers/gpu/drm/radeon/cik.c') diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 0da9d6715ef5..e92a9721ca25 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -60,6 +60,7 @@ extern bool evergreen_is_display_hung(struct radeon_device *rdev); extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc); extern void si_rlc_fini(struct radeon_device *rdev); extern int si_rlc_init(struct radeon_device *rdev); +extern void si_rlc_reset(struct radeon_device *rdev); static void cik_rlc_stop(struct radeon_device *rdev); static void cik_pcie_gen3_enable(struct radeon_device *rdev); static void cik_program_aspm(struct radeon_device *rdev); @@ -4728,31 +4729,34 @@ void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm * variety of functions, the most important of which is * the interrupt controller. */ -/** - * cik_rlc_stop - stop the RLC ME - * - * @rdev: radeon_device pointer - * - * Halt the RLC ME (MicroEngine) (CIK). - */ -static void cik_rlc_stop(struct radeon_device *rdev) +static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev, + bool enable) { - int i, j, k; - u32 mask, tmp; + u32 tmp = RREG32(CP_INT_CNTL_RING0); - tmp = RREG32(CP_INT_CNTL_RING0); - tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); + if (enable) + tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); + else + tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); WREG32(CP_INT_CNTL_RING0, tmp); +} - RREG32(CB_CGTT_SCLK_CTRL); - RREG32(CB_CGTT_SCLK_CTRL); - RREG32(CB_CGTT_SCLK_CTRL); - RREG32(CB_CGTT_SCLK_CTRL); +static void cik_enable_lbpw(struct radeon_device *rdev, bool enable) +{ + u32 tmp; - tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc; - WREG32(RLC_CGCG_CGLS_CTRL, tmp); + tmp = RREG32(RLC_LB_CNTL); + if (enable) + tmp |= LOAD_BALANCE_ENABLE; + else + tmp &= ~LOAD_BALANCE_ENABLE; + WREG32(RLC_LB_CNTL, tmp); +} - WREG32(RLC_CNTL, 0); +static void cik_wait_for_rlc_serdes(struct radeon_device *rdev) +{ + u32 i, j, k; + u32 mask; for (i = 0; i < rdev->config.cik.max_shader_engines; i++) { for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) { @@ -4774,6 +4778,32 @@ static void cik_rlc_stop(struct radeon_device *rdev) } } +/** + * cik_rlc_stop - stop the RLC ME + * + * @rdev: radeon_device pointer + * + * Halt the RLC ME (MicroEngine) (CIK). + */ +static void cik_rlc_stop(struct radeon_device *rdev) +{ + u32 tmp; + + cik_enable_gui_idle_interrupt(rdev, false); + + RREG32(CB_CGTT_SCLK_CTRL); + RREG32(CB_CGTT_SCLK_CTRL); + RREG32(CB_CGTT_SCLK_CTRL); + RREG32(CB_CGTT_SCLK_CTRL); + + tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc; + WREG32(RLC_CGCG_CGLS_CTRL, tmp); + + WREG32(RLC_CNTL, 0); + + cik_wait_for_rlc_serdes(rdev); +} + /** * cik_rlc_start - start the RLC ME * @@ -4783,13 +4813,9 @@ static void cik_rlc_stop(struct radeon_device *rdev) */ static void cik_rlc_start(struct radeon_device *rdev) { - u32 tmp; - WREG32(RLC_CNTL, RLC_ENABLE); - tmp = RREG32(CP_INT_CNTL_RING0); - tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); - WREG32(CP_INT_CNTL_RING0, tmp); + cik_enable_gui_idle_interrupt(rdev, true); udelay(50); } @@ -4827,12 +4853,7 @@ static int cik_rlc_resume(struct radeon_device *rdev) cik_rlc_stop(rdev); - WREG32(GRBM_SOFT_RESET, SOFT_RESET_RLC); - RREG32(GRBM_SOFT_RESET); - udelay(50); - WREG32(GRBM_SOFT_RESET, 0); - RREG32(GRBM_SOFT_RESET); - udelay(50); + si_rlc_reset(rdev); WREG32(RLC_LB_CNTR_INIT, 0); WREG32(RLC_LB_CNTR_MAX, 0x00008000); @@ -4851,6 +4872,9 @@ static int cik_rlc_resume(struct radeon_device *rdev) WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++)); WREG32(RLC_GPM_UCODE_ADDR, 0); + /* XXX - find out what chips support lbpw */ + cik_enable_lbpw(rdev, false); + /* XXX */ clear_state_info[0] = 0;//upper_32_bits(rdev->rlc.save_restore_gpu_addr); clear_state_info[1] = 0;//rdev->rlc.save_restore_gpu_addr; diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h index d50593fd9cd5..4742f3db4aa6 100644 --- a/drivers/gpu/drm/radeon/cikd.h +++ b/drivers/gpu/drm/radeon/cikd.h @@ -905,6 +905,7 @@ #define RLC_LB_CNTR_MAX 0xC348 #define RLC_LB_CNTL 0xC364 +# define LOAD_BALANCE_ENABLE (1 << 0) #define RLC_LB_CNTR_INIT 0xC36C diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index daa8d2df8ec5..610adfc86bea 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -5435,7 +5435,7 @@ int si_rlc_init(struct radeon_device *rdev) return 0; } -static void si_rlc_reset(struct radeon_device *rdev) +void si_rlc_reset(struct radeon_device *rdev) { u32 tmp = RREG32(GRBM_SOFT_RESET); -- cgit v1.2.3 From 1fd11777c2f0e6b6b37432b984bf40e3c6072f23 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 17 Apr 2013 17:53:50 -0400 Subject: drm/radeon: convert SI,CIK to use sumo_rlc functions and remove duplicate si_rlc functions. Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/cik.c | 10 +- drivers/gpu/drm/radeon/clearstate_cayman.h | 2 +- drivers/gpu/drm/radeon/clearstate_evergreen.h | 2 +- drivers/gpu/drm/radeon/evergreen.c | 46 ++++--- drivers/gpu/drm/radeon/ni.c | 6 +- drivers/gpu/drm/radeon/radeon.h | 4 +- drivers/gpu/drm/radeon/si.c | 174 ++------------------------ 7 files changed, 49 insertions(+), 195 deletions(-) (limited to 'drivers/gpu/drm/radeon/cik.c') diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index e92a9721ca25..8389917af9a2 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -57,9 +57,9 @@ extern void r600_ih_ring_fini(struct radeon_device *rdev); extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save); extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save); extern bool evergreen_is_display_hung(struct radeon_device *rdev); +extern void sumo_rlc_fini(struct radeon_device *rdev); +extern int sumo_rlc_init(struct radeon_device *rdev); extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc); -extern void si_rlc_fini(struct radeon_device *rdev); -extern int si_rlc_init(struct radeon_device *rdev); extern void si_rlc_reset(struct radeon_device *rdev); static void cik_rlc_stop(struct radeon_device *rdev); static void cik_pcie_gen3_enable(struct radeon_device *rdev); @@ -6019,7 +6019,7 @@ static int cik_startup(struct radeon_device *rdev) cik_gpu_init(rdev); /* allocate rlc buffers */ - r = si_rlc_init(rdev); + r = sumo_rlc_init(rdev); if (r) { DRM_ERROR("Failed to init rlc BOs!\n"); return r; @@ -6343,7 +6343,7 @@ int cik_init(struct radeon_device *rdev) cik_cp_fini(rdev); cik_sdma_fini(rdev); cik_irq_fini(rdev); - si_rlc_fini(rdev); + sumo_rlc_fini(rdev); cik_mec_fini(rdev); radeon_wb_fini(rdev); radeon_ib_pool_fini(rdev); @@ -6379,7 +6379,7 @@ void cik_fini(struct radeon_device *rdev) cik_cp_fini(rdev); cik_sdma_fini(rdev); cik_irq_fini(rdev); - si_rlc_fini(rdev); + sumo_rlc_fini(rdev); cik_mec_fini(rdev); radeon_wb_fini(rdev); radeon_vm_manager_fini(rdev); diff --git a/drivers/gpu/drm/radeon/clearstate_cayman.h b/drivers/gpu/drm/radeon/clearstate_cayman.h index c00339440c5e..aa908c55a513 100644 --- a/drivers/gpu/drm/radeon/clearstate_cayman.h +++ b/drivers/gpu/drm/radeon/clearstate_cayman.h @@ -1073,7 +1073,7 @@ static const struct cs_extent_def SECT_CTRLCONST_defs[] = {SECT_CTRLCONST_def_1, 0x0000f3fc, 2 }, { 0, 0, 0 } }; -struct cs_section_def cayman_cs_data[] = { +static const struct cs_section_def cayman_cs_data[] = { { SECT_CONTEXT_defs, SECT_CONTEXT }, { SECT_CLEAR_defs, SECT_CLEAR }, { SECT_CTRLCONST_defs, SECT_CTRLCONST }, diff --git a/drivers/gpu/drm/radeon/clearstate_evergreen.h b/drivers/gpu/drm/radeon/clearstate_evergreen.h index 4791d856b7fd..63a1ffbb3ced 100644 --- a/drivers/gpu/drm/radeon/clearstate_evergreen.h +++ b/drivers/gpu/drm/radeon/clearstate_evergreen.h @@ -1072,7 +1072,7 @@ static const struct cs_extent_def SECT_CTRLCONST_defs[] = {SECT_CTRLCONST_def_1, 0x0000f3fc, 2 }, { 0, 0, 0 } }; -struct cs_section_def evergreen_cs_data[] = { +static const struct cs_section_def evergreen_cs_data[] = { { SECT_CONTEXT_defs, SECT_CONTEXT }, { SECT_CLEAR_defs, SECT_CLEAR }, { SECT_CTRLCONST_defs, SECT_CTRLCONST }, diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 617840829114..6fc876a444d4 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -47,7 +47,7 @@ static const u32 crtc_offsets[6] = #include "clearstate_evergreen.h" -static u32 sumo_rlc_save_restore_register_list[] = +static const u32 sumo_rlc_save_restore_register_list[] = { 0x98fc, 0x9830, @@ -131,7 +131,6 @@ static u32 sumo_rlc_save_restore_register_list[] = 0x9150, 0x802c, }; -static u32 sumo_rlc_save_restore_register_list_size = ARRAY_SIZE(sumo_rlc_save_restore_register_list); static void evergreen_gpu_init(struct radeon_device *rdev); void evergreen_fini(struct radeon_device *rdev); @@ -3898,12 +3897,12 @@ void sumo_rlc_fini(struct radeon_device *rdev) int sumo_rlc_init(struct radeon_device *rdev) { - u32 *src_ptr; + const u32 *src_ptr; volatile u32 *dst_ptr; u32 dws, data, i, j, k, reg_num; u32 reg_list_num, reg_list_hdr_blk_index, reg_list_blk_index; u64 reg_list_mc_addr; - struct cs_section_def *cs_data; + const struct cs_section_def *cs_data; int r; src_ptr = rdev->rlc.reg_list; @@ -3943,22 +3942,28 @@ int sumo_rlc_init(struct radeon_device *rdev) } /* write the sr buffer */ dst_ptr = rdev->rlc.sr_ptr; - /* format: - * dw0: (reg2 << 16) | reg1 - * dw1: reg1 save space - * dw2: reg2 save space - */ - for (i = 0; i < dws; i++) { - data = src_ptr[i] >> 2; - i++; - if (i < dws) - data |= (src_ptr[i] >> 2) << 16; - j = (((i - 1) * 3) / 2); - dst_ptr[j] = data; + if (rdev->family >= CHIP_TAHITI) { + /* SI */ + for (i = 0; i < dws; i++) + dst_ptr[i] = src_ptr[i]; + } else { + /* ON/LN/TN */ + /* format: + * dw0: (reg2 << 16) | reg1 + * dw1: reg1 save space + * dw2: reg2 save space + */ + for (i = 0; i < dws; i++) { + data = src_ptr[i] >> 2; + i++; + if (i < dws) + data |= (src_ptr[i] >> 2) << 16; + j = (((i - 1) * 3) / 2); + dst_ptr[j] = data; + } + j = ((i * 3) / 2); + dst_ptr[j] = RLC_SAVE_RESTORE_LIST_END_MARKER; } - j = ((i * 3) / 2); - dst_ptr[j] = RLC_SAVE_RESTORE_LIST_END_MARKER; - radeon_bo_kunmap(rdev->rlc.save_restore_obj); radeon_bo_unreserve(rdev->rlc.save_restore_obj); } @@ -5152,7 +5157,8 @@ static int evergreen_startup(struct radeon_device *rdev) /* allocate rlc buffers */ if (rdev->flags & RADEON_IS_IGP) { rdev->rlc.reg_list = sumo_rlc_save_restore_register_list; - rdev->rlc.reg_list_size = sumo_rlc_save_restore_register_list_size; + rdev->rlc.reg_list_size = + (u32)ARRAY_SIZE(sumo_rlc_save_restore_register_list); rdev->rlc.cs_data = evergreen_cs_data; r = sumo_rlc_init(rdev); if (r) { diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index acdd6039ef14..21f2eceff2c6 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -35,7 +35,7 @@ #include "radeon_ucode.h" #include "clearstate_cayman.h" -static u32 tn_rlc_save_restore_register_list[] = +static const u32 tn_rlc_save_restore_register_list[] = { 0x98fc, 0x98f0, @@ -160,7 +160,6 @@ static u32 tn_rlc_save_restore_register_list[] = 0x9830, 0x802c, }; -static u32 tn_rlc_save_restore_register_list_size = ARRAY_SIZE(tn_rlc_save_restore_register_list); extern bool evergreen_is_display_hung(struct radeon_device *rdev); extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev); @@ -2121,7 +2120,8 @@ static int cayman_startup(struct radeon_device *rdev) /* allocate rlc buffers */ if (rdev->flags & RADEON_IS_IGP) { rdev->rlc.reg_list = tn_rlc_save_restore_register_list; - rdev->rlc.reg_list_size = tn_rlc_save_restore_register_list_size; + rdev->rlc.reg_list_size = + (u32)ARRAY_SIZE(tn_rlc_save_restore_register_list); rdev->rlc.cs_data = cayman_cs_data; r = sumo_rlc_init(rdev); if (r) { diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 034782804846..b9706e83e827 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -854,13 +854,13 @@ struct radeon_rlc { struct radeon_bo *save_restore_obj; uint64_t save_restore_gpu_addr; volatile uint32_t *sr_ptr; - u32 *reg_list; + const u32 *reg_list; u32 reg_list_size; /* for clear state */ struct radeon_bo *clear_state_obj; uint64_t clear_state_gpu_addr; volatile uint32_t *cs_ptr; - struct cs_section_def *cs_data; + const struct cs_section_def *cs_data; }; int radeon_ib_get(struct radeon_device *rdev, int ring, diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 610adfc86bea..8b8963d4a732 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -68,6 +68,8 @@ MODULE_FIRMWARE("radeon/HAINAN_smc.bin"); static void si_pcie_gen3_enable(struct radeon_device *rdev); static void si_program_aspm(struct radeon_device *rdev); +extern void sumo_rlc_fini(struct radeon_device *rdev); +extern int sumo_rlc_init(struct radeon_device *rdev); extern int r600_ih_ring_alloc(struct radeon_device *rdev); extern void r600_ih_ring_fini(struct radeon_device *rdev); extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev); @@ -5275,166 +5277,6 @@ static void si_fini_pg(struct radeon_device *rdev) /* * RLC */ -void si_rlc_fini(struct radeon_device *rdev) -{ - int r; - - /* save restore block */ - if (rdev->rlc.save_restore_obj) { - r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false); - if (unlikely(r != 0)) - dev_warn(rdev->dev, "(%d) reserve RLC sr bo failed\n", r); - radeon_bo_unpin(rdev->rlc.save_restore_obj); - radeon_bo_unreserve(rdev->rlc.save_restore_obj); - - radeon_bo_unref(&rdev->rlc.save_restore_obj); - rdev->rlc.save_restore_obj = NULL; - } - - /* clear state block */ - if (rdev->rlc.clear_state_obj) { - r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false); - if (unlikely(r != 0)) - dev_warn(rdev->dev, "(%d) reserve RLC c bo failed\n", r); - radeon_bo_unpin(rdev->rlc.clear_state_obj); - radeon_bo_unreserve(rdev->rlc.clear_state_obj); - - radeon_bo_unref(&rdev->rlc.clear_state_obj); - rdev->rlc.clear_state_obj = NULL; - } -} - -#define RLC_CLEAR_STATE_END_MARKER 0x00000001 - -int si_rlc_init(struct radeon_device *rdev) -{ - volatile u32 *dst_ptr; - u32 dws, data, i, j, k, reg_num; - u32 reg_list_num, reg_list_hdr_blk_index, reg_list_blk_index; - u64 reg_list_mc_addr; - const struct cs_section_def *cs_data = si_cs_data; - int r; - - /* save restore block */ - if (rdev->rlc.save_restore_obj == NULL) { - r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true, - RADEON_GEM_DOMAIN_VRAM, NULL, - &rdev->rlc.save_restore_obj); - if (r) { - dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r); - return r; - } - } - - r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false); - if (unlikely(r != 0)) { - si_rlc_fini(rdev); - return r; - } - r = radeon_bo_pin(rdev->rlc.save_restore_obj, RADEON_GEM_DOMAIN_VRAM, - &rdev->rlc.save_restore_gpu_addr); - if (r) { - radeon_bo_unreserve(rdev->rlc.save_restore_obj); - dev_warn(rdev->dev, "(%d) pin RLC sr bo failed\n", r); - si_rlc_fini(rdev); - return r; - } - - if (rdev->family == CHIP_VERDE) { - r = radeon_bo_kmap(rdev->rlc.save_restore_obj, (void **)&rdev->rlc.sr_ptr); - if (r) { - dev_warn(rdev->dev, "(%d) map RLC sr bo failed\n", r); - si_rlc_fini(rdev); - return r; - } - /* write the sr buffer */ - dst_ptr = rdev->rlc.sr_ptr; - for (i = 0; i < ARRAY_SIZE(verde_rlc_save_restore_register_list); i++) { - dst_ptr[i] = verde_rlc_save_restore_register_list[i]; - } - radeon_bo_kunmap(rdev->rlc.save_restore_obj); - } - radeon_bo_unreserve(rdev->rlc.save_restore_obj); - - /* clear state block */ - reg_list_num = 0; - dws = 0; - for (i = 0; cs_data[i].section != NULL; i++) { - for (j = 0; cs_data[i].section[j].extent != NULL; j++) { - reg_list_num++; - dws += cs_data[i].section[j].reg_count; - } - } - reg_list_blk_index = (3 * reg_list_num + 2); - dws += reg_list_blk_index; - - if (rdev->rlc.clear_state_obj == NULL) { - r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true, - RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->rlc.clear_state_obj); - if (r) { - dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r); - si_rlc_fini(rdev); - return r; - } - } - r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false); - if (unlikely(r != 0)) { - si_rlc_fini(rdev); - return r; - } - r = radeon_bo_pin(rdev->rlc.clear_state_obj, RADEON_GEM_DOMAIN_VRAM, - &rdev->rlc.clear_state_gpu_addr); - if (r) { - - radeon_bo_unreserve(rdev->rlc.clear_state_obj); - dev_warn(rdev->dev, "(%d) pin RLC c bo failed\n", r); - si_rlc_fini(rdev); - return r; - } - r = radeon_bo_kmap(rdev->rlc.clear_state_obj, (void **)&rdev->rlc.cs_ptr); - if (r) { - dev_warn(rdev->dev, "(%d) map RLC c bo failed\n", r); - si_rlc_fini(rdev); - return r; - } - /* set up the cs buffer */ - dst_ptr = rdev->rlc.cs_ptr; - reg_list_hdr_blk_index = 0; - reg_list_mc_addr = rdev->rlc.clear_state_gpu_addr + (reg_list_blk_index * 4); - data = upper_32_bits(reg_list_mc_addr); - dst_ptr[reg_list_hdr_blk_index] = data; - reg_list_hdr_blk_index++; - for (i = 0; cs_data[i].section != NULL; i++) { - for (j = 0; cs_data[i].section[j].extent != NULL; j++) { - reg_num = cs_data[i].section[j].reg_count; - data = reg_list_mc_addr & 0xffffffff; - dst_ptr[reg_list_hdr_blk_index] = data; - reg_list_hdr_blk_index++; - - data = (cs_data[i].section[j].reg_index * 4) & 0xffffffff; - dst_ptr[reg_list_hdr_blk_index] = data; - reg_list_hdr_blk_index++; - - data = 0x08000000 | (reg_num * 4); - dst_ptr[reg_list_hdr_blk_index] = data; - reg_list_hdr_blk_index++; - - for (k = 0; k < reg_num; k++) { - data = cs_data[i].section[j].extent[k]; - dst_ptr[reg_list_blk_index + k] = data; - } - reg_list_mc_addr += reg_num * 4; - reg_list_blk_index += reg_num; - } - } - dst_ptr[reg_list_hdr_blk_index] = RLC_CLEAR_STATE_END_MARKER; - - radeon_bo_kunmap(rdev->rlc.clear_state_obj); - radeon_bo_unreserve(rdev->rlc.clear_state_obj); - - return 0; -} - void si_rlc_reset(struct radeon_device *rdev) { u32 tmp = RREG32(GRBM_SOFT_RESET); @@ -6449,7 +6291,13 @@ static int si_startup(struct radeon_device *rdev) si_gpu_init(rdev); /* allocate rlc buffers */ - r = si_rlc_init(rdev); + if (rdev->family == CHIP_VERDE) { + rdev->rlc.reg_list = verde_rlc_save_restore_register_list; + rdev->rlc.reg_list_size = + (u32)ARRAY_SIZE(verde_rlc_save_restore_register_list); + } + rdev->rlc.cs_data = si_cs_data; + r = sumo_rlc_init(rdev); if (r) { DRM_ERROR("Failed to init rlc BOs!\n"); return r; @@ -6735,7 +6583,7 @@ int si_init(struct radeon_device *rdev) si_cp_fini(rdev); cayman_dma_fini(rdev); si_irq_fini(rdev); - si_rlc_fini(rdev); + sumo_rlc_fini(rdev); radeon_wb_fini(rdev); radeon_ib_pool_fini(rdev); radeon_vm_manager_fini(rdev); @@ -6761,7 +6609,7 @@ void si_fini(struct radeon_device *rdev) si_cp_fini(rdev); cayman_dma_fini(rdev); si_irq_fini(rdev); - si_rlc_fini(rdev); + sumo_rlc_fini(rdev); si_fini_cg(rdev); si_fini_pg(rdev); radeon_wb_fini(rdev); -- cgit v1.2.3 From 22c775ce80ed921fe9490f3cc2ca66dcda44f572 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 23 Jul 2013 09:41:05 -0400 Subject: drm/radeon: implement clock and power gating for CIK (v3) Only the APUs support power gating. v2: disable cgcg for now v3: workaround hw issue in mgcg Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/cik.c | 1475 +++++++++++++++++++++++++++++++- drivers/gpu/drm/radeon/cikd.h | 95 +- drivers/gpu/drm/radeon/clearstate_ci.h | 944 ++++++++++++++++++++ drivers/gpu/drm/radeon/evergreen.c | 57 +- drivers/gpu/drm/radeon/radeon.h | 14 + drivers/gpu/drm/radeon/radeon_asic.c | 2 + drivers/gpu/drm/radeon/si.c | 2 +- 7 files changed, 2565 insertions(+), 24 deletions(-) create mode 100644 drivers/gpu/drm/radeon/clearstate_ci.h (limited to 'drivers/gpu/drm/radeon/cik.c') diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 8389917af9a2..a36e98c9a875 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -31,6 +31,7 @@ #include "atom.h" #include "cik_blit_shaders.h" #include "radeon_ucode.h" +#include "clearstate_ci.h" MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin"); MODULE_FIRMWARE("radeon/BONAIRE_me.bin"); @@ -61,9 +62,12 @@ extern void sumo_rlc_fini(struct radeon_device *rdev); extern int sumo_rlc_init(struct radeon_device *rdev); extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc); extern void si_rlc_reset(struct radeon_device *rdev); +extern void si_init_uvd_internal_cg(struct radeon_device *rdev); static void cik_rlc_stop(struct radeon_device *rdev); static void cik_pcie_gen3_enable(struct radeon_device *rdev); static void cik_program_aspm(struct radeon_device *rdev); +static void cik_init_pg(struct radeon_device *rdev); +static void cik_init_cg(struct radeon_device *rdev); /* * Indirect registers accessor @@ -86,6 +90,778 @@ void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v) (void)RREG32(PCIE_DATA); } +static const u32 spectre_rlc_save_restore_register_list[] = +{ + (0x0e00 << 16) | (0xc12c >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc140 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc150 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc15c >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc168 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc170 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc178 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc204 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc2b4 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc2b8 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc2bc >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc2c0 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8228 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x829c >> 2), + 0x00000000, + (0x0e00 << 16) | (0x869c >> 2), + 0x00000000, + (0x0600 << 16) | (0x98f4 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x98f8 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x9900 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc260 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x90e8 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x3c000 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x3c00c >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8c1c >> 2), + 0x00000000, + (0x0e00 << 16) | (0x9700 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xcd20 >> 2), + 0x00000000, + (0x4e00 << 16) | (0xcd20 >> 2), + 0x00000000, + (0x5e00 << 16) | (0xcd20 >> 2), + 0x00000000, + (0x6e00 << 16) | (0xcd20 >> 2), + 0x00000000, + (0x7e00 << 16) | (0xcd20 >> 2), + 0x00000000, + (0x8e00 << 16) | (0xcd20 >> 2), + 0x00000000, + (0x9e00 << 16) | (0xcd20 >> 2), + 0x00000000, + (0xae00 << 16) | (0xcd20 >> 2), + 0x00000000, + (0xbe00 << 16) | (0xcd20 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x89bc >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8900 >> 2), + 0x00000000, + 0x3, + (0x0e00 << 16) | (0xc130 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc134 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc1fc >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc208 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc264 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc268 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc26c >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc270 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc274 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc278 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc27c >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc280 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc284 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc288 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc28c >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc290 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc294 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc298 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc29c >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc2a0 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc2a4 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc2a8 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc2ac >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc2b0 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x301d0 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x30238 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x30250 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x30254 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x30258 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x3025c >> 2), + 0x00000000, + (0x4e00 << 16) | (0xc900 >> 2), + 0x00000000, + (0x5e00 << 16) | (0xc900 >> 2), + 0x00000000, + (0x6e00 << 16) | (0xc900 >> 2), + 0x00000000, + (0x7e00 << 16) | (0xc900 >> 2), + 0x00000000, + (0x8e00 << 16) | (0xc900 >> 2), + 0x00000000, + (0x9e00 << 16) | (0xc900 >> 2), + 0x00000000, + (0xae00 << 16) | (0xc900 >> 2), + 0x00000000, + (0xbe00 << 16) | (0xc900 >> 2), + 0x00000000, + (0x4e00 << 16) | (0xc904 >> 2), + 0x00000000, + (0x5e00 << 16) | (0xc904 >> 2), + 0x00000000, + (0x6e00 << 16) | (0xc904 >> 2), + 0x00000000, + (0x7e00 << 16) | (0xc904 >> 2), + 0x00000000, + (0x8e00 << 16) | (0xc904 >> 2), + 0x00000000, + (0x9e00 << 16) | (0xc904 >> 2), + 0x00000000, + (0xae00 << 16) | (0xc904 >> 2), + 0x00000000, + (0xbe00 << 16) | (0xc904 >> 2), + 0x00000000, + (0x4e00 << 16) | (0xc908 >> 2), + 0x00000000, + (0x5e00 << 16) | (0xc908 >> 2), + 0x00000000, + (0x6e00 << 16) | (0xc908 >> 2), + 0x00000000, + (0x7e00 << 16) | (0xc908 >> 2), + 0x00000000, + (0x8e00 << 16) | (0xc908 >> 2), + 0x00000000, + (0x9e00 << 16) | (0xc908 >> 2), + 0x00000000, + (0xae00 << 16) | (0xc908 >> 2), + 0x00000000, + (0xbe00 << 16) | (0xc908 >> 2), + 0x00000000, + (0x4e00 << 16) | (0xc90c >> 2), + 0x00000000, + (0x5e00 << 16) | (0xc90c >> 2), + 0x00000000, + (0x6e00 << 16) | (0xc90c >> 2), + 0x00000000, + (0x7e00 << 16) | (0xc90c >> 2), + 0x00000000, + (0x8e00 << 16) | (0xc90c >> 2), + 0x00000000, + (0x9e00 << 16) | (0xc90c >> 2), + 0x00000000, + (0xae00 << 16) | (0xc90c >> 2), + 0x00000000, + (0xbe00 << 16) | (0xc90c >> 2), + 0x00000000, + (0x4e00 << 16) | (0xc910 >> 2), + 0x00000000, + (0x5e00 << 16) | (0xc910 >> 2), + 0x00000000, + (0x6e00 << 16) | (0xc910 >> 2), + 0x00000000, + (0x7e00 << 16) | (0xc910 >> 2), + 0x00000000, + (0x8e00 << 16) | (0xc910 >> 2), + 0x00000000, + (0x9e00 << 16) | (0xc910 >> 2), + 0x00000000, + (0xae00 << 16) | (0xc910 >> 2), + 0x00000000, + (0xbe00 << 16) | (0xc910 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc99c >> 2), + 0x00000000, + (0x0e00 << 16) | (0x9834 >> 2), + 0x00000000, + (0x0000 << 16) | (0x30f00 >> 2), + 0x00000000, + (0x0001 << 16) | (0x30f00 >> 2), + 0x00000000, + (0x0000 << 16) | (0x30f04 >> 2), + 0x00000000, + (0x0001 << 16) | (0x30f04 >> 2), + 0x00000000, + (0x0000 << 16) | (0x30f08 >> 2), + 0x00000000, + (0x0001 << 16) | (0x30f08 >> 2), + 0x00000000, + (0x0000 << 16) | (0x30f0c >> 2), + 0x00000000, + (0x0001 << 16) | (0x30f0c >> 2), + 0x00000000, + (0x0600 << 16) | (0x9b7c >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8a14 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8a18 >> 2), + 0x00000000, + (0x0600 << 16) | (0x30a00 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8bf0 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8bcc >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8b24 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x30a04 >> 2), + 0x00000000, + (0x0600 << 16) | (0x30a10 >> 2), + 0x00000000, + (0x0600 << 16) | (0x30a14 >> 2), + 0x00000000, + (0x0600 << 16) | (0x30a18 >> 2), + 0x00000000, + (0x0600 << 16) | (0x30a2c >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc700 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc704 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc708 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc768 >> 2), + 0x00000000, + (0x0400 << 16) | (0xc770 >> 2), + 0x00000000, + (0x0400 << 16) | (0xc774 >> 2), + 0x00000000, + (0x0400 << 16) | (0xc778 >> 2), + 0x00000000, + (0x0400 << 16) | (0xc77c >> 2), + 0x00000000, + (0x0400 << 16) | (0xc780 >> 2), + 0x00000000, + (0x0400 << 16) | (0xc784 >> 2), + 0x00000000, + (0x0400 << 16) | (0xc788 >> 2), + 0x00000000, + (0x0400 << 16) | (0xc78c >> 2), + 0x00000000, + (0x0400 << 16) | (0xc798 >> 2), + 0x00000000, + (0x0400 << 16) | (0xc79c >> 2), + 0x00000000, + (0x0400 << 16) | (0xc7a0 >> 2), + 0x00000000, + (0x0400 << 16) | (0xc7a4 >> 2), + 0x00000000, + (0x0400 << 16) | (0xc7a8 >> 2), + 0x00000000, + (0x0400 << 16) | (0xc7ac >> 2), + 0x00000000, + (0x0400 << 16) | (0xc7b0 >> 2), + 0x00000000, + (0x0400 << 16) | (0xc7b4 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x9100 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x3c010 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x92a8 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x92ac >> 2), + 0x00000000, + (0x0e00 << 16) | (0x92b4 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x92b8 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x92bc >> 2), + 0x00000000, + (0x0e00 << 16) | (0x92c0 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x92c4 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x92c8 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x92cc >> 2), + 0x00000000, + (0x0e00 << 16) | (0x92d0 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8c00 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8c04 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8c20 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8c38 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8c3c >> 2), + 0x00000000, + (0x0e00 << 16) | (0xae00 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x9604 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac08 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac0c >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac10 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac14 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac58 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac68 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac6c >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac70 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac74 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac78 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac7c >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac80 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac84 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac88 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac8c >> 2), + 0x00000000, + (0x0e00 << 16) | (0x970c >> 2), + 0x00000000, + (0x0e00 << 16) | (0x9714 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x9718 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x971c >> 2), + 0x00000000, + (0x0e00 << 16) | (0x31068 >> 2), + 0x00000000, + (0x4e00 << 16) | (0x31068 >> 2), + 0x00000000, + (0x5e00 << 16) | (0x31068 >> 2), + 0x00000000, + (0x6e00 << 16) | (0x31068 >> 2), + 0x00000000, + (0x7e00 << 16) | (0x31068 >> 2), + 0x00000000, + (0x8e00 << 16) | (0x31068 >> 2), + 0x00000000, + (0x9e00 << 16) | (0x31068 >> 2), + 0x00000000, + (0xae00 << 16) | (0x31068 >> 2), + 0x00000000, + (0xbe00 << 16) | (0x31068 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xcd10 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xcd14 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x88b0 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x88b4 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x88b8 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x88bc >> 2), + 0x00000000, + (0x0400 << 16) | (0x89c0 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x88c4 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x88c8 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x88d0 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x88d4 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x88d8 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8980 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x30938 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x3093c >> 2), + 0x00000000, + (0x0e00 << 16) | (0x30940 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x89a0 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x30900 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x30904 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x89b4 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x3c210 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x3c214 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x3c218 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8904 >> 2), + 0x00000000, + 0x5, + (0x0e00 << 16) | (0x8c28 >> 2), + (0x0e00 << 16) | (0x8c2c >> 2), + (0x0e00 << 16) | (0x8c30 >> 2), + (0x0e00 << 16) | (0x8c34 >> 2), + (0x0e00 << 16) | (0x9600 >> 2), +}; + +static const u32 kalindi_rlc_save_restore_register_list[] = +{ + (0x0e00 << 16) | (0xc12c >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc140 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc150 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc15c >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc168 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc170 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc204 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc2b4 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc2b8 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc2bc >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc2c0 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8228 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x829c >> 2), + 0x00000000, + (0x0e00 << 16) | (0x869c >> 2), + 0x00000000, + (0x0600 << 16) | (0x98f4 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x98f8 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x9900 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc260 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x90e8 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x3c000 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x3c00c >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8c1c >> 2), + 0x00000000, + (0x0e00 << 16) | (0x9700 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xcd20 >> 2), + 0x00000000, + (0x4e00 << 16) | (0xcd20 >> 2), + 0x00000000, + (0x5e00 << 16) | (0xcd20 >> 2), + 0x00000000, + (0x6e00 << 16) | (0xcd20 >> 2), + 0x00000000, + (0x7e00 << 16) | (0xcd20 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x89bc >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8900 >> 2), + 0x00000000, + 0x3, + (0x0e00 << 16) | (0xc130 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc134 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc1fc >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc208 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc264 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc268 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc26c >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc270 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc274 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc28c >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc290 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc294 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc298 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc2a0 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc2a4 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc2a8 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc2ac >> 2), + 0x00000000, + (0x0e00 << 16) | (0x301d0 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x30238 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x30250 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x30254 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x30258 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x3025c >> 2), + 0x00000000, + (0x4e00 << 16) | (0xc900 >> 2), + 0x00000000, + (0x5e00 << 16) | (0xc900 >> 2), + 0x00000000, + (0x6e00 << 16) | (0xc900 >> 2), + 0x00000000, + (0x7e00 << 16) | (0xc900 >> 2), + 0x00000000, + (0x4e00 << 16) | (0xc904 >> 2), + 0x00000000, + (0x5e00 << 16) | (0xc904 >> 2), + 0x00000000, + (0x6e00 << 16) | (0xc904 >> 2), + 0x00000000, + (0x7e00 << 16) | (0xc904 >> 2), + 0x00000000, + (0x4e00 << 16) | (0xc908 >> 2), + 0x00000000, + (0x5e00 << 16) | (0xc908 >> 2), + 0x00000000, + (0x6e00 << 16) | (0xc908 >> 2), + 0x00000000, + (0x7e00 << 16) | (0xc908 >> 2), + 0x00000000, + (0x4e00 << 16) | (0xc90c >> 2), + 0x00000000, + (0x5e00 << 16) | (0xc90c >> 2), + 0x00000000, + (0x6e00 << 16) | (0xc90c >> 2), + 0x00000000, + (0x7e00 << 16) | (0xc90c >> 2), + 0x00000000, + (0x4e00 << 16) | (0xc910 >> 2), + 0x00000000, + (0x5e00 << 16) | (0xc910 >> 2), + 0x00000000, + (0x6e00 << 16) | (0xc910 >> 2), + 0x00000000, + (0x7e00 << 16) | (0xc910 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc99c >> 2), + 0x00000000, + (0x0e00 << 16) | (0x9834 >> 2), + 0x00000000, + (0x0000 << 16) | (0x30f00 >> 2), + 0x00000000, + (0x0000 << 16) | (0x30f04 >> 2), + 0x00000000, + (0x0000 << 16) | (0x30f08 >> 2), + 0x00000000, + (0x0000 << 16) | (0x30f0c >> 2), + 0x00000000, + (0x0600 << 16) | (0x9b7c >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8a14 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8a18 >> 2), + 0x00000000, + (0x0600 << 16) | (0x30a00 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8bf0 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8bcc >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8b24 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x30a04 >> 2), + 0x00000000, + (0x0600 << 16) | (0x30a10 >> 2), + 0x00000000, + (0x0600 << 16) | (0x30a14 >> 2), + 0x00000000, + (0x0600 << 16) | (0x30a18 >> 2), + 0x00000000, + (0x0600 << 16) | (0x30a2c >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc700 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc704 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc708 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc768 >> 2), + 0x00000000, + (0x0400 << 16) | (0xc770 >> 2), + 0x00000000, + (0x0400 << 16) | (0xc774 >> 2), + 0x00000000, + (0x0400 << 16) | (0xc798 >> 2), + 0x00000000, + (0x0400 << 16) | (0xc79c >> 2), + 0x00000000, + (0x0e00 << 16) | (0x9100 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x3c010 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8c00 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8c04 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8c20 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8c38 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8c3c >> 2), + 0x00000000, + (0x0e00 << 16) | (0xae00 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x9604 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac08 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac0c >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac10 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac14 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac58 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac68 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac6c >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac70 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac74 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac78 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac7c >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac80 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac84 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac88 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac8c >> 2), + 0x00000000, + (0x0e00 << 16) | (0x970c >> 2), + 0x00000000, + (0x0e00 << 16) | (0x9714 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x9718 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x971c >> 2), + 0x00000000, + (0x0e00 << 16) | (0x31068 >> 2), + 0x00000000, + (0x4e00 << 16) | (0x31068 >> 2), + 0x00000000, + (0x5e00 << 16) | (0x31068 >> 2), + 0x00000000, + (0x6e00 << 16) | (0x31068 >> 2), + 0x00000000, + (0x7e00 << 16) | (0x31068 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xcd10 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xcd14 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x88b0 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x88b4 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x88b8 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x88bc >> 2), + 0x00000000, + (0x0400 << 16) | (0x89c0 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x88c4 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x88c8 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x88d0 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x88d4 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x88d8 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8980 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x30938 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x3093c >> 2), + 0x00000000, + (0x0e00 << 16) | (0x30940 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x89a0 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x30900 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x30904 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x89b4 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x3e1fc >> 2), + 0x00000000, + (0x0e00 << 16) | (0x3c210 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x3c214 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x3c218 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8904 >> 2), + 0x00000000, + 0x5, + (0x0e00 << 16) | (0x8c28 >> 2), + (0x0e00 << 16) | (0x8c2c >> 2), + (0x0e00 << 16) | (0x8c30 >> 2), + (0x0e00 << 16) | (0x8c34 >> 2), + (0x0e00 << 16) | (0x9600 >> 2), +}; + static const u32 bonaire_golden_spm_registers[] = { 0x30800, 0xe0ffffff, 0xe0000000 @@ -4778,6 +5554,39 @@ static void cik_wait_for_rlc_serdes(struct radeon_device *rdev) } } +static void cik_update_rlc(struct radeon_device *rdev, u32 rlc) +{ + u32 tmp; + + tmp = RREG32(RLC_CNTL); + if (tmp != rlc) + WREG32(RLC_CNTL, rlc); +} + +static u32 cik_halt_rlc(struct radeon_device *rdev) +{ + u32 data, orig; + + orig = data = RREG32(RLC_CNTL); + + if (data & RLC_ENABLE) { + u32 i; + + data &= ~RLC_ENABLE; + WREG32(RLC_CNTL, data); + + for (i = 0; i < rdev->usec_timeout; i++) { + if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0) + break; + udelay(1); + } + + cik_wait_for_rlc_serdes(rdev); + } + + return orig; +} + /** * cik_rlc_stop - stop the RLC ME * @@ -4787,20 +5596,10 @@ static void cik_wait_for_rlc_serdes(struct radeon_device *rdev) */ static void cik_rlc_stop(struct radeon_device *rdev) { - u32 tmp; + WREG32(RLC_CNTL, 0); cik_enable_gui_idle_interrupt(rdev, false); - RREG32(CB_CGTT_SCLK_CTRL); - RREG32(CB_CGTT_SCLK_CTRL); - RREG32(CB_CGTT_SCLK_CTRL); - RREG32(CB_CGTT_SCLK_CTRL); - - tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc; - WREG32(RLC_CGCG_CGLS_CTRL, tmp); - - WREG32(RLC_CNTL, 0); - cik_wait_for_rlc_serdes(rdev); } @@ -4831,8 +5630,7 @@ static void cik_rlc_start(struct radeon_device *rdev) */ static int cik_rlc_resume(struct radeon_device *rdev) { - u32 i, size; - u32 clear_state_info[3]; + u32 i, size, tmp; const __be32 *fw_data; if (!rdev->rlc_fw) @@ -4853,8 +5651,16 @@ static int cik_rlc_resume(struct radeon_device *rdev) cik_rlc_stop(rdev); + /* disable CG */ + tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc; + WREG32(RLC_CGCG_CGLS_CTRL, tmp); + si_rlc_reset(rdev); + cik_init_pg(rdev); + + cik_init_cg(rdev); + WREG32(RLC_LB_CNTR_INIT, 0); WREG32(RLC_LB_CNTR_MAX, 0x00008000); @@ -4875,20 +5681,634 @@ static int cik_rlc_resume(struct radeon_device *rdev) /* XXX - find out what chips support lbpw */ cik_enable_lbpw(rdev, false); - /* XXX */ - clear_state_info[0] = 0;//upper_32_bits(rdev->rlc.save_restore_gpu_addr); - clear_state_info[1] = 0;//rdev->rlc.save_restore_gpu_addr; - clear_state_info[2] = 0;//cik_default_size; - WREG32(RLC_GPM_SCRATCH_ADDR, 0x3d); - for (i = 0; i < 3; i++) - WREG32(RLC_GPM_SCRATCH_DATA, clear_state_info[i]); - WREG32(RLC_DRIVER_DMA_STATUS, 0); + if (rdev->family == CHIP_BONAIRE) + WREG32(RLC_DRIVER_DMA_STATUS, 0); cik_rlc_start(rdev); return 0; } +static void cik_enable_cgcg(struct radeon_device *rdev, bool enable) +{ + u32 data, orig, tmp, tmp2; + + orig = data = RREG32(RLC_CGCG_CGLS_CTRL); + + cik_enable_gui_idle_interrupt(rdev, enable); + + if (enable) { + tmp = cik_halt_rlc(rdev); + + cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); + WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); + WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); + tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE; + WREG32(RLC_SERDES_WR_CTRL, tmp2); + + cik_update_rlc(rdev, tmp); + + data |= CGCG_EN | CGLS_EN; + } else { + RREG32(CB_CGTT_SCLK_CTRL); + RREG32(CB_CGTT_SCLK_CTRL); + RREG32(CB_CGTT_SCLK_CTRL); + RREG32(CB_CGTT_SCLK_CTRL); + + data &= ~(CGCG_EN | CGLS_EN); + } + + if (orig != data) + WREG32(RLC_CGCG_CGLS_CTRL, data); + +} + +static void cik_enable_mgcg(struct radeon_device *rdev, bool enable) +{ + u32 data, orig, tmp = 0; + + if (enable) { + orig = data = RREG32(CP_MEM_SLP_CNTL); + data |= CP_MEM_LS_EN; + if (orig != data) + WREG32(CP_MEM_SLP_CNTL, data); + + orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE); + data &= 0xfffffffd; + if (orig != data) + WREG32(RLC_CGTT_MGCG_OVERRIDE, data); + + tmp = cik_halt_rlc(rdev); + + cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); + WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); + WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); + data = BPM_ADDR_MASK | MGCG_OVERRIDE_0; + WREG32(RLC_SERDES_WR_CTRL, data); + + cik_update_rlc(rdev, tmp); + + orig = data = RREG32(CGTS_SM_CTRL_REG); + data &= ~SM_MODE_MASK; + data |= SM_MODE(0x2); + data |= SM_MODE_ENABLE; + data &= ~CGTS_OVERRIDE; + data &= ~CGTS_LS_OVERRIDE; + data &= ~ON_MONITOR_ADD_MASK; + data |= ON_MONITOR_ADD_EN; + data |= ON_MONITOR_ADD(0x96); + if (orig != data) + WREG32(CGTS_SM_CTRL_REG, data); + } else { + orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE); + data |= 0x00000002; + if (orig != data) + WREG32(RLC_CGTT_MGCG_OVERRIDE, data); + + data = RREG32(RLC_MEM_SLP_CNTL); + if (data & RLC_MEM_LS_EN) { + data &= ~RLC_MEM_LS_EN; + WREG32(RLC_MEM_SLP_CNTL, data); + } + + data = RREG32(CP_MEM_SLP_CNTL); + if (data & CP_MEM_LS_EN) { + data &= ~CP_MEM_LS_EN; + WREG32(CP_MEM_SLP_CNTL, data); + } + + orig = data = RREG32(CGTS_SM_CTRL_REG); + data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE; + if (orig != data) + WREG32(CGTS_SM_CTRL_REG, data); + + tmp = cik_halt_rlc(rdev); + + cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); + WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); + WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); + data = BPM_ADDR_MASK | MGCG_OVERRIDE_1; + WREG32(RLC_SERDES_WR_CTRL, data); + + cik_update_rlc(rdev, tmp); + } +} + +static const u32 mc_cg_registers[] = +{ + MC_HUB_MISC_HUB_CG, + MC_HUB_MISC_SIP_CG, + MC_HUB_MISC_VM_CG, + MC_XPB_CLK_GAT, + ATC_MISC_CG, + MC_CITF_MISC_WR_CG, + MC_CITF_MISC_RD_CG, + MC_CITF_MISC_VM_CG, + VM_L2_CG, +}; + +static void cik_enable_mc_ls(struct radeon_device *rdev, + bool enable) +{ + int i; + u32 orig, data; + + for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) { + orig = data = RREG32(mc_cg_registers[i]); + if (enable) + data |= MC_LS_ENABLE; + else + data &= ~MC_LS_ENABLE; + if (data != orig) + WREG32(mc_cg_registers[i], data); + } +} + +static void cik_enable_mc_mgcg(struct radeon_device *rdev, + bool enable) +{ + int i; + u32 orig, data; + + for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) { + orig = data = RREG32(mc_cg_registers[i]); + if (enable) + data |= MC_CG_ENABLE; + else + data &= ~MC_CG_ENABLE; + if (data != orig) + WREG32(mc_cg_registers[i], data); + } +} + +static void cik_enable_sdma_mgcg(struct radeon_device *rdev, + bool enable) +{ + u32 orig, data; + + if (enable) { + WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100); + WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100); + } else { + orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET); + data |= 0xff000000; + if (data != orig) + WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data); + + orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET); + data |= 0xff000000; + if (data != orig) + WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data); + } +} + +static void cik_enable_sdma_mgls(struct radeon_device *rdev, + bool enable) +{ + u32 orig, data; + + if (enable) { + orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET); + data |= 0x100; + if (orig != data) + WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data); + + orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET); + data |= 0x100; + if (orig != data) + WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data); + } else { + orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET); + data &= ~0x100; + if (orig != data) + WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data); + + orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET); + data &= ~0x100; + if (orig != data) + WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data); + } +} + +static void cik_enable_uvd_mgcg(struct radeon_device *rdev, + bool enable) +{ + u32 orig, data; + + if (enable) { + data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL); + data = 0xfff; + WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data); + + orig = data = RREG32(UVD_CGC_CTRL); + data |= DCM; + if (orig != data) + WREG32(UVD_CGC_CTRL, data); + } else { + data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL); + data &= ~0xfff; + WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data); + + orig = data = RREG32(UVD_CGC_CTRL); + data &= ~DCM; + if (orig != data) + WREG32(UVD_CGC_CTRL, data); + } +} + +static void cik_enable_hdp_mgcg(struct radeon_device *rdev, + bool enable) +{ + u32 orig, data; + + orig = data = RREG32(HDP_HOST_PATH_CNTL); + + if (enable) + data &= ~CLOCK_GATING_DIS; + else + data |= CLOCK_GATING_DIS; + + if (orig != data) + WREG32(HDP_HOST_PATH_CNTL, data); +} + +static void cik_enable_hdp_ls(struct radeon_device *rdev, + bool enable) +{ + u32 orig, data; + + orig = data = RREG32(HDP_MEM_POWER_LS); + + if (enable) + data |= HDP_LS_ENABLE; + else + data &= ~HDP_LS_ENABLE; + + if (orig != data) + WREG32(HDP_MEM_POWER_LS, data); +} + +void cik_update_cg(struct radeon_device *rdev, + u32 block, bool enable) +{ + if (block & RADEON_CG_BLOCK_GFX) { + /* order matters! */ + if (enable) { + cik_enable_mgcg(rdev, true); + cik_enable_cgcg(rdev, true); + } else { + cik_enable_cgcg(rdev, false); + cik_enable_mgcg(rdev, false); + } + } + + if (block & RADEON_CG_BLOCK_MC) { + if (!(rdev->flags & RADEON_IS_IGP)) { + cik_enable_mc_mgcg(rdev, enable); + cik_enable_mc_ls(rdev, enable); + } + } + + if (block & RADEON_CG_BLOCK_SDMA) { + cik_enable_sdma_mgcg(rdev, enable); + cik_enable_sdma_mgls(rdev, enable); + } + + if (block & RADEON_CG_BLOCK_UVD) { + if (rdev->has_uvd) + cik_enable_uvd_mgcg(rdev, enable); + } + + if (block & RADEON_CG_BLOCK_HDP) { + cik_enable_hdp_mgcg(rdev, enable); + cik_enable_hdp_ls(rdev, enable); + } +} + +static void cik_init_cg(struct radeon_device *rdev) +{ + + cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false); /* XXX true */ + + if (rdev->has_uvd) + si_init_uvd_internal_cg(rdev); + + cik_update_cg(rdev, (RADEON_CG_BLOCK_MC | + RADEON_CG_BLOCK_SDMA | + RADEON_CG_BLOCK_UVD | + RADEON_CG_BLOCK_HDP), true); +} + +static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev, + bool enable) +{ + u32 data, orig; + + orig = data = RREG32(RLC_PG_CNTL); + if (enable) + data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE; + else + data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE; + if (orig != data) + WREG32(RLC_PG_CNTL, data); +} + +static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev, + bool enable) +{ + u32 data, orig; + + orig = data = RREG32(RLC_PG_CNTL); + if (enable) + data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE; + else + data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE; + if (orig != data) + WREG32(RLC_PG_CNTL, data); +} + +static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable) +{ + u32 data, orig; + + orig = data = RREG32(RLC_PG_CNTL); + if (enable) + data &= ~DISABLE_CP_PG; + else + data |= DISABLE_CP_PG; + if (orig != data) + WREG32(RLC_PG_CNTL, data); +} + +static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable) +{ + u32 data, orig; + + orig = data = RREG32(RLC_PG_CNTL); + if (enable) + data &= ~DISABLE_GDS_PG; + else + data |= DISABLE_GDS_PG; + if (orig != data) + WREG32(RLC_PG_CNTL, data); +} + +#define CP_ME_TABLE_SIZE 96 +#define CP_ME_TABLE_OFFSET 2048 +#define CP_MEC_TABLE_OFFSET 4096 + +void cik_init_cp_pg_table(struct radeon_device *rdev) +{ + const __be32 *fw_data; + volatile u32 *dst_ptr; + int me, i, max_me = 4; + u32 bo_offset = 0; + u32 table_offset; + + if (rdev->family == CHIP_KAVERI) + max_me = 5; + + if (rdev->rlc.cp_table_ptr == NULL) + return; + + /* write the cp table buffer */ + dst_ptr = rdev->rlc.cp_table_ptr; + for (me = 0; me < max_me; me++) { + if (me == 0) { + fw_data = (const __be32 *)rdev->ce_fw->data; + table_offset = CP_ME_TABLE_OFFSET; + } else if (me == 1) { + fw_data = (const __be32 *)rdev->pfp_fw->data; + table_offset = CP_ME_TABLE_OFFSET; + } else if (me == 2) { + fw_data = (const __be32 *)rdev->me_fw->data; + table_offset = CP_ME_TABLE_OFFSET; + } else { + fw_data = (const __be32 *)rdev->mec_fw->data; + table_offset = CP_MEC_TABLE_OFFSET; + } + + for (i = 0; i < CP_ME_TABLE_SIZE; i ++) { + dst_ptr[bo_offset + i] = be32_to_cpu(fw_data[table_offset + i]); + } + bo_offset += CP_ME_TABLE_SIZE; + } +} + +static void cik_enable_gfx_cgpg(struct radeon_device *rdev, + bool enable) +{ + u32 data, orig; + + if (enable) { + orig = data = RREG32(RLC_PG_CNTL); + data |= GFX_PG_ENABLE; + if (orig != data) + WREG32(RLC_PG_CNTL, data); + + orig = data = RREG32(RLC_AUTO_PG_CTRL); + data |= AUTO_PG_EN; + if (orig != data) + WREG32(RLC_AUTO_PG_CTRL, data); + } else { + orig = data = RREG32(RLC_PG_CNTL); + data &= ~GFX_PG_ENABLE; + if (orig != data) + WREG32(RLC_PG_CNTL, data); + + orig = data = RREG32(RLC_AUTO_PG_CTRL); + data &= ~AUTO_PG_EN; + if (orig != data) + WREG32(RLC_AUTO_PG_CTRL, data); + + data = RREG32(DB_RENDER_CONTROL); + } +} + +static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh) +{ + u32 mask = 0, tmp, tmp1; + int i; + + cik_select_se_sh(rdev, se, sh); + tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG); + tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG); + cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); + + tmp &= 0xffff0000; + + tmp |= tmp1; + tmp >>= 16; + + for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) { + mask <<= 1; + mask |= 1; + } + + return (~tmp) & mask; +} + +static void cik_init_ao_cu_mask(struct radeon_device *rdev) +{ + u32 i, j, k, active_cu_number = 0; + u32 mask, counter, cu_bitmap; + u32 tmp = 0; + + for (i = 0; i < rdev->config.cik.max_shader_engines; i++) { + for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) { + mask = 1; + cu_bitmap = 0; + counter = 0; + for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) { + if (cik_get_cu_active_bitmap(rdev, i, j) & mask) { + if (counter < 2) + cu_bitmap |= mask; + counter ++; + } + mask <<= 1; + } + + active_cu_number += counter; + tmp |= (cu_bitmap << (i * 16 + j * 8)); + } + } + + WREG32(RLC_PG_AO_CU_MASK, tmp); + + tmp = RREG32(RLC_MAX_PG_CU); + tmp &= ~MAX_PU_CU_MASK; + tmp |= MAX_PU_CU(active_cu_number); + WREG32(RLC_MAX_PG_CU, tmp); +} + +static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev, + bool enable) +{ + u32 data, orig; + + orig = data = RREG32(RLC_PG_CNTL); + if (enable) + data |= STATIC_PER_CU_PG_ENABLE; + else + data &= ~STATIC_PER_CU_PG_ENABLE; + if (orig != data) + WREG32(RLC_PG_CNTL, data); +} + +static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev, + bool enable) +{ + u32 data, orig; + + orig = data = RREG32(RLC_PG_CNTL); + if (enable) + data |= DYN_PER_CU_PG_ENABLE; + else + data &= ~DYN_PER_CU_PG_ENABLE; + if (orig != data) + WREG32(RLC_PG_CNTL, data); +} + +#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90 +#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D + +static void cik_init_gfx_cgpg(struct radeon_device *rdev) +{ + u32 data, orig; + u32 i; + + if (rdev->rlc.cs_data) { + WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET); + WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr)); + WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_gpu_addr); + WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size); + } else { + WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET); + for (i = 0; i < 3; i++) + WREG32(RLC_GPM_SCRATCH_DATA, 0); + } + if (rdev->rlc.reg_list) { + WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET); + for (i = 0; i < rdev->rlc.reg_list_size; i++) + WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]); + } + + orig = data = RREG32(RLC_PG_CNTL); + data |= GFX_PG_SRC; + if (orig != data) + WREG32(RLC_PG_CNTL, data); + + WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8); + WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8); + + data = RREG32(CP_RB_WPTR_POLL_CNTL); + data &= ~IDLE_POLL_COUNT_MASK; + data |= IDLE_POLL_COUNT(0x60); + WREG32(CP_RB_WPTR_POLL_CNTL, data); + + data = 0x10101010; + WREG32(RLC_PG_DELAY, data); + + data = RREG32(RLC_PG_DELAY_2); + data &= ~0xff; + data |= 0x3; + WREG32(RLC_PG_DELAY_2, data); + + data = RREG32(RLC_AUTO_PG_CTRL); + data &= ~GRBM_REG_SGIT_MASK; + data |= GRBM_REG_SGIT(0x700); + WREG32(RLC_AUTO_PG_CTRL, data); + +} + +static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable) +{ + bool has_pg = false; + bool has_dyn_mgpg = false; + bool has_static_mgpg = false; + + /* only APUs have PG */ + if (rdev->flags & RADEON_IS_IGP) { + has_pg = true; + has_static_mgpg = true; + if (rdev->family == CHIP_KAVERI) + has_dyn_mgpg = true; + } + + if (has_pg) { + cik_enable_gfx_cgpg(rdev, enable); + if (enable) { + cik_enable_gfx_static_mgpg(rdev, has_static_mgpg); + cik_enable_gfx_dynamic_mgpg(rdev, has_dyn_mgpg); + } else { + cik_enable_gfx_static_mgpg(rdev, false); + cik_enable_gfx_dynamic_mgpg(rdev, false); + } + } + +} + +void cik_init_pg(struct radeon_device *rdev) +{ + bool has_pg = false; + + /* only APUs have PG */ + if (rdev->flags & RADEON_IS_IGP) { + /* XXX disable this for now */ + /* has_pg = true; */ + } + + if (has_pg) { + cik_enable_sck_slowdown_on_pu(rdev, true); + cik_enable_sck_slowdown_on_pd(rdev, true); + cik_init_gfx_cgpg(rdev); + cik_enable_cp_pg(rdev, true); + cik_enable_gds_pg(rdev, true); + cik_init_ao_cu_mask(rdev); + cik_update_gfx_pg(rdev, true); + } +} + /* * Interrupts * Starting with r6xx, interrupts are handled via a ring buffer. @@ -6019,6 +7439,19 @@ static int cik_startup(struct radeon_device *rdev) cik_gpu_init(rdev); /* allocate rlc buffers */ + if (rdev->flags & RADEON_IS_IGP) { + if (rdev->family == CHIP_KAVERI) { + rdev->rlc.reg_list = spectre_rlc_save_restore_register_list; + rdev->rlc.reg_list_size = + (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list); + } else { + rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list; + rdev->rlc.reg_list_size = + (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list); + } + } + rdev->rlc.cs_data = ci_cs_data; + rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4; r = sumo_rlc_init(rdev); if (r) { DRM_ERROR("Failed to init rlc BOs!\n"); diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h index 4742f3db4aa6..63955abb1e11 100644 --- a/drivers/gpu/drm/radeon/cikd.h +++ b/drivers/gpu/drm/radeon/cikd.h @@ -299,6 +299,10 @@ #define VM_CONTEXT0_PAGE_TABLE_END_ADDR 0x157C #define VM_CONTEXT1_PAGE_TABLE_END_ADDR 0x1580 +#define VM_L2_CG 0x15c0 +#define MC_CG_ENABLE (1 << 18) +#define MC_LS_ENABLE (1 << 19) + #define MC_SHARED_CHMAP 0x2004 #define NOOFCHAN_SHIFT 12 #define NOOFCHAN_MASK 0x0000f000 @@ -328,6 +332,17 @@ #define MC_SHARED_BLACKOUT_CNTL 0x20ac +#define MC_HUB_MISC_HUB_CG 0x20b8 +#define MC_HUB_MISC_VM_CG 0x20bc + +#define MC_HUB_MISC_SIP_CG 0x20c0 + +#define MC_XPB_CLK_GAT 0x2478 + +#define MC_CITF_MISC_RD_CG 0x2648 +#define MC_CITF_MISC_WR_CG 0x264c +#define MC_CITF_MISC_VM_CG 0x2650 + #define MC_ARB_RAMCFG 0x2760 #define NOOFBANK_SHIFT 0 #define NOOFBANK_MASK 0x00000003 @@ -357,6 +372,7 @@ #define MC_SEQ_IO_DEBUG_DATA 0x2a48 #define HDP_HOST_PATH_CNTL 0x2C00 +#define CLOCK_GATING_DIS (1 << 23) #define HDP_NONSURFACE_BASE 0x2C04 #define HDP_NONSURFACE_INFO 0x2C08 #define HDP_NONSURFACE_SIZE 0x2C0C @@ -364,6 +380,10 @@ #define HDP_ADDR_CONFIG 0x2F48 #define HDP_MISC_CNTL 0x2F4C #define HDP_FLUSH_INVALIDATE_CACHE (1 << 0) +#define HDP_MEM_POWER_LS 0x2F50 +#define HDP_LS_ENABLE (1 << 0) + +#define ATC_MISC_CG 0x3350 #define IH_RB_CNTL 0x3e00 # define IH_RB_ENABLE (1 << 0) @@ -631,6 +651,9 @@ #define CP_RB0_RPTR 0x8700 #define CP_RB_WPTR_DELAY 0x8704 +#define CP_RB_WPTR_POLL_CNTL 0x8708 +#define IDLE_POLL_COUNT(x) ((x) << 16) +#define IDLE_POLL_COUNT_MASK (0xffff << 16) #define CP_MEQ_THRESHOLDS 0x8764 #define MEQ1_START(x) ((x) << 0) @@ -857,6 +880,9 @@ # define CP_RINGID1_INT_STAT (1 << 30) # define CP_RINGID0_INT_STAT (1 << 31) +#define CP_MEM_SLP_CNTL 0xC1E4 +# define CP_MEM_LS_EN (1 << 0) + #define CP_CPF_DEBUG 0xC200 #define CP_PQ_WPTR_POLL_CNTL 0xC20C @@ -902,6 +928,9 @@ #define RLC_MC_CNTL 0xC30C +#define RLC_MEM_SLP_CNTL 0xC318 +# define RLC_MEM_LS_EN (1 << 0) + #define RLC_LB_CNTR_MAX 0xC348 #define RLC_LB_CNTL 0xC364 @@ -910,7 +939,9 @@ #define RLC_LB_CNTR_INIT 0xC36C #define RLC_SAVE_AND_RESTORE_BASE 0xC374 -#define RLC_DRIVER_DMA_STATUS 0xC378 +#define RLC_DRIVER_DMA_STATUS 0xC378 /* dGPU */ +#define RLC_CP_TABLE_RESTORE 0xC378 /* APU */ +#define RLC_PG_DELAY_2 0xC37C #define RLC_GPM_UCODE_ADDR 0xC388 #define RLC_GPM_UCODE_DATA 0xC38C @@ -919,12 +950,50 @@ #define RLC_CAPTURE_GPU_CLOCK_COUNT 0xC398 #define RLC_UCODE_CNTL 0xC39C +#define RLC_GPM_STAT 0xC400 +# define RLC_GPM_BUSY (1 << 0) + +#define RLC_PG_CNTL 0xC40C +# define GFX_PG_ENABLE (1 << 0) +# define GFX_PG_SRC (1 << 1) +# define DYN_PER_CU_PG_ENABLE (1 << 2) +# define STATIC_PER_CU_PG_ENABLE (1 << 3) +# define DISABLE_GDS_PG (1 << 13) +# define DISABLE_CP_PG (1 << 15) +# define SMU_CLK_SLOWDOWN_ON_PU_ENABLE (1 << 17) +# define SMU_CLK_SLOWDOWN_ON_PD_ENABLE (1 << 18) + +#define RLC_CGTT_MGCG_OVERRIDE 0xC420 #define RLC_CGCG_CGLS_CTRL 0xC424 +# define CGCG_EN (1 << 0) +# define CGLS_EN (1 << 1) + +#define RLC_PG_DELAY 0xC434 #define RLC_LB_INIT_CU_MASK 0xC43C #define RLC_LB_PARAMS 0xC444 +#define RLC_PG_AO_CU_MASK 0xC44C + +#define RLC_MAX_PG_CU 0xC450 +# define MAX_PU_CU(x) ((x) << 0) +# define MAX_PU_CU_MASK (0xff << 0) +#define RLC_AUTO_PG_CTRL 0xC454 +# define AUTO_PG_EN (1 << 0) +# define GRBM_REG_SGIT(x) ((x) << 3) +# define GRBM_REG_SGIT_MASK (0xffff << 3) + +#define RLC_SERDES_WR_CU_MASTER_MASK 0xC474 +#define RLC_SERDES_WR_NONCU_MASTER_MASK 0xC478 +#define RLC_SERDES_WR_CTRL 0xC47C +#define BPM_ADDR(x) ((x) << 0) +#define BPM_ADDR_MASK (0xff << 0) +#define CGLS_ENABLE (1 << 16) +#define CGCG_OVERRIDE_0 (1 << 20) +#define MGCG_OVERRIDE_0 (1 << 22) +#define MGCG_OVERRIDE_1 (1 << 23) + #define RLC_SERDES_CU_MASTER_BUSY 0xC484 #define RLC_SERDES_NONCU_MASTER_BUSY 0xC488 # define SE_MASTER_BUSY_MASK 0x0000ffff @@ -979,6 +1048,8 @@ #define MQD_VMID(x) ((x) << 0) #define MQD_VMID_MASK (0xf << 0) +#define DB_RENDER_CONTROL 0x28000 + #define PA_SC_RASTER_CONFIG 0x28350 # define RASTER_CONFIG_RB_MAP_0 0 # define RASTER_CONFIG_RB_MAP_1 1 @@ -1072,6 +1143,16 @@ #define CP_PERFMON_CNTL 0x36020 +#define CGTS_SM_CTRL_REG 0x3c000 +#define SM_MODE(x) ((x) << 17) +#define SM_MODE_MASK (0x7 << 17) +#define SM_MODE_ENABLE (1 << 20) +#define CGTS_OVERRIDE (1 << 21) +#define CGTS_LS_OVERRIDE (1 << 22) +#define ON_MONITOR_ADD_EN (1 << 23) +#define ON_MONITOR_ADD(x) ((x) << 24) +#define ON_MONITOR_ADD_MASK (0xff << 24) + #define CGTS_TCC_DISABLE 0x3c00c #define CGTS_USER_TCC_DISABLE 0x3c010 #define TCC_DISABLE_MASK 0xFFFF0000 @@ -1304,6 +1385,8 @@ #define SDMA0_UCODE_ADDR 0xD000 #define SDMA0_UCODE_DATA 0xD004 +#define SDMA0_POWER_CNTL 0xD008 +#define SDMA0_CLK_CTRL 0xD00C #define SDMA0_CNTL 0xD010 # define TRAP_ENABLE (1 << 0) @@ -1428,6 +1511,13 @@ #define UVD_RBC_RB_RPTR 0xf690 #define UVD_RBC_RB_WPTR 0xf694 +#define UVD_CGC_CTRL 0xF4B0 +# define DCM (1 << 0) +# define CG_DT(x) ((x) << 2) +# define CG_DT_MASK (0xf << 2) +# define CLK_OD(x) ((x) << 6) +# define CLK_OD_MASK (0x1f << 6) + /* UVD clocks */ #define CG_DCLK_CNTL 0xC050009C @@ -1438,4 +1528,7 @@ #define CG_VCLK_CNTL 0xC05000A4 #define CG_VCLK_STATUS 0xC05000A8 +/* UVD CTX indirect */ +#define UVD_CGC_MEM_CTRL 0xC0 + #endif diff --git a/drivers/gpu/drm/radeon/clearstate_ci.h b/drivers/gpu/drm/radeon/clearstate_ci.h new file mode 100644 index 000000000000..c3982f9475fb --- /dev/null +++ b/drivers/gpu/drm/radeon/clearstate_ci.h @@ -0,0 +1,944 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +static const unsigned int ci_SECT_CONTEXT_def_1[] = +{ + 0x00000000, // DB_RENDER_CONTROL + 0x00000000, // DB_COUNT_CONTROL + 0x00000000, // DB_DEPTH_VIEW + 0x00000000, // DB_RENDER_OVERRIDE + 0x00000000, // DB_RENDER_OVERRIDE2 + 0x00000000, // DB_HTILE_DATA_BASE + 0, // HOLE + 0, // HOLE + 0x00000000, // DB_DEPTH_BOUNDS_MIN + 0x00000000, // DB_DEPTH_BOUNDS_MAX + 0x00000000, // DB_STENCIL_CLEAR + 0x00000000, // DB_DEPTH_CLEAR + 0x00000000, // PA_SC_SCREEN_SCISSOR_TL + 0x40004000, // PA_SC_SCREEN_SCISSOR_BR + 0, // HOLE + 0x00000000, // DB_DEPTH_INFO + 0x00000000, // DB_Z_INFO + 0x00000000, // DB_STENCIL_INFO + 0x00000000, // DB_Z_READ_BASE + 0x00000000, // DB_STENCIL_READ_BASE + 0x00000000, // DB_Z_WRITE_BASE + 0x00000000, // DB_STENCIL_WRITE_BASE + 0x00000000, // DB_DEPTH_SIZE + 0x00000000, // DB_DEPTH_SLICE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0x00000000, // TA_BC_BASE_ADDR + 0x00000000, // TA_BC_BASE_ADDR_HI + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0x00000000, // COHER_DEST_BASE_HI_0 + 0x00000000, // COHER_DEST_BASE_HI_1 + 0x00000000, // COHER_DEST_BASE_HI_2 + 0x00000000, // COHER_DEST_BASE_HI_3 + 0x00000000, // COHER_DEST_BASE_2 + 0x00000000, // COHER_DEST_BASE_3 + 0x00000000, // PA_SC_WINDOW_OFFSET + 0x80000000, // PA_SC_WINDOW_SCISSOR_TL + 0x40004000, // PA_SC_WINDOW_SCISSOR_BR + 0x0000ffff, // PA_SC_CLIPRECT_RULE + 0x00000000, // PA_SC_CLIPRECT_0_TL + 0x40004000, // PA_SC_CLIPRECT_0_BR + 0x00000000, // PA_SC_CLIPRECT_1_TL + 0x40004000, // PA_SC_CLIPRECT_1_BR + 0x00000000, // PA_SC_CLIPRECT_2_TL + 0x40004000, // PA_SC_CLIPRECT_2_BR + 0x00000000, // PA_SC_CLIPRECT_3_TL + 0x40004000, // PA_SC_CLIPRECT_3_BR + 0xaa99aaaa, // PA_SC_EDGERULE + 0x00000000, // PA_SU_HARDWARE_SCREEN_OFFSET + 0xffffffff, // CB_TARGET_MASK + 0xffffffff, // CB_SHADER_MASK + 0x80000000, // PA_SC_GENERIC_SCISSOR_TL + 0x40004000, // PA_SC_GENERIC_SCISSOR_BR + 0x00000000, // COHER_DEST_BASE_0 + 0x00000000, // COHER_DEST_BASE_1 + 0x80000000, // PA_SC_VPORT_SCISSOR_0_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_0_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_1_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_1_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_2_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_2_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_3_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_3_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_4_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_4_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_5_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_5_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_6_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_6_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_7_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_7_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_8_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_8_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_9_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_9_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_10_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_10_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_11_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_11_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_12_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_12_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_13_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_13_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_14_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_14_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_15_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_15_BR + 0x00000000, // PA_SC_VPORT_ZMIN_0 + 0x3f800000, // PA_SC_VPORT_ZMAX_0 + 0x00000000, // PA_SC_VPORT_ZMIN_1 + 0x3f800000, // PA_SC_VPORT_ZMAX_1 + 0x00000000, // PA_SC_VPORT_ZMIN_2 + 0x3f800000, // PA_SC_VPORT_ZMAX_2 + 0x00000000, // PA_SC_VPORT_ZMIN_3 + 0x3f800000, // PA_SC_VPORT_ZMAX_3 + 0x00000000, // PA_SC_VPORT_ZMIN_4 + 0x3f800000, // PA_SC_VPORT_ZMAX_4 + 0x00000000, // PA_SC_VPORT_ZMIN_5 + 0x3f800000, // PA_SC_VPORT_ZMAX_5 + 0x00000000, // PA_SC_VPORT_ZMIN_6 + 0x3f800000, // PA_SC_VPORT_ZMAX_6 + 0x00000000, // PA_SC_VPORT_ZMIN_7 + 0x3f800000, // PA_SC_VPORT_ZMAX_7 + 0x00000000, // PA_SC_VPORT_ZMIN_8 + 0x3f800000, // PA_SC_VPORT_ZMAX_8 + 0x00000000, // PA_SC_VPORT_ZMIN_9 + 0x3f800000, // PA_SC_VPORT_ZMAX_9 + 0x00000000, // PA_SC_VPORT_ZMIN_10 + 0x3f800000, // PA_SC_VPORT_ZMAX_10 + 0x00000000, // PA_SC_VPORT_ZMIN_11 + 0x3f800000, // PA_SC_VPORT_ZMAX_11 + 0x00000000, // PA_SC_VPORT_ZMIN_12 + 0x3f800000, // PA_SC_VPORT_ZMAX_12 + 0x00000000, // PA_SC_VPORT_ZMIN_13 + 0x3f800000, // PA_SC_VPORT_ZMAX_13 + 0x00000000, // PA_SC_VPORT_ZMIN_14 + 0x3f800000, // PA_SC_VPORT_ZMAX_14 + 0x00000000, // PA_SC_VPORT_ZMIN_15 + 0x3f800000, // PA_SC_VPORT_ZMAX_15 +}; +static const unsigned int ci_SECT_CONTEXT_def_2[] = +{ + 0x00000000, // PA_SC_SCREEN_EXTENT_CONTROL + 0, // HOLE + 0x00000000, // CP_PERFMON_CNTX_CNTL + 0x00000000, // CP_RINGID + 0x00000000, // CP_VMID + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0xffffffff, // VGT_MAX_VTX_INDX + 0x00000000, // VGT_MIN_VTX_INDX + 0x00000000, // VGT_INDX_OFFSET + 0x00000000, // VGT_MULTI_PRIM_IB_RESET_INDX + 0, // HOLE + 0x00000000, // CB_BLEND_RED + 0x00000000, // CB_BLEND_GREEN + 0x00000000, // CB_BLEND_BLUE + 0x00000000, // CB_BLEND_ALPHA + 0, // HOLE + 0, // HOLE + 0x00000000, // DB_STENCIL_CONTROL + 0x00000000, // DB_STENCILREFMASK + 0x00000000, // DB_STENCILREFMASK_BF + 0, // HOLE + 0x00000000, // PA_CL_VPORT_XSCALE + 0x00000000, // PA_CL_VPORT_XOFFSET + 0x00000000, // PA_CL_VPORT_YSCALE + 0x00000000, // PA_CL_VPORT_YOFFSET + 0x00000000, // PA_CL_VPORT_ZSCALE + 0x00000000, // PA_CL_VPORT_ZOFFSET + 0x00000000, // PA_CL_VPORT_XSCALE_1 + 0x00000000, // PA_CL_VPORT_XOFFSET_1 + 0x00000000, // PA_CL_VPORT_YSCALE_1 + 0x00000000, // PA_CL_VPORT_YOFFSET_1 + 0x00000000, // PA_CL_VPORT_ZSCALE_1 + 0x00000000, // PA_CL_VPORT_ZOFFSET_1 + 0x00000000, // PA_CL_VPORT_XSCALE_2 + 0x00000000, // PA_CL_VPORT_XOFFSET_2 + 0x00000000, // PA_CL_VPORT_YSCALE_2 + 0x00000000, // PA_CL_VPORT_YOFFSET_2 + 0x00000000, // PA_CL_VPORT_ZSCALE_2 + 0x00000000, // PA_CL_VPORT_ZOFFSET_2 + 0x00000000, // PA_CL_VPORT_XSCALE_3 + 0x00000000, // PA_CL_VPORT_XOFFSET_3 + 0x00000000, // PA_CL_VPORT_YSCALE_3 + 0x00000000, // PA_CL_VPORT_YOFFSET_3 + 0x00000000, // PA_CL_VPORT_ZSCALE_3 + 0x00000000, // PA_CL_VPORT_ZOFFSET_3 + 0x00000000, // PA_CL_VPORT_XSCALE_4 + 0x00000000, // PA_CL_VPORT_XOFFSET_4 + 0x00000000, // PA_CL_VPORT_YSCALE_4 + 0x00000000, // PA_CL_VPORT_YOFFSET_4 + 0x00000000, // PA_CL_VPORT_ZSCALE_4 + 0x00000000, // PA_CL_VPORT_ZOFFSET_4 + 0x00000000, // PA_CL_VPORT_XSCALE_5 + 0x00000000, // PA_CL_VPORT_XOFFSET_5 + 0x00000000, // PA_CL_VPORT_YSCALE_5 + 0x00000000, // PA_CL_VPORT_YOFFSET_5 + 0x00000000, // PA_CL_VPORT_ZSCALE_5 + 0x00000000, // PA_CL_VPORT_ZOFFSET_5 + 0x00000000, // PA_CL_VPORT_XSCALE_6 + 0x00000000, // PA_CL_VPORT_XOFFSET_6 + 0x00000000, // PA_CL_VPORT_YSCALE_6 + 0x00000000, // PA_CL_VPORT_YOFFSET_6 + 0x00000000, // PA_CL_VPORT_ZSCALE_6 + 0x00000000, // PA_CL_VPORT_ZOFFSET_6 + 0x00000000, // PA_CL_VPORT_XSCALE_7 + 0x00000000, // PA_CL_VPORT_XOFFSET_7 + 0x00000000, // PA_CL_VPORT_YSCALE_7 + 0x00000000, // PA_CL_VPORT_YOFFSET_7 + 0x00000000, // PA_CL_VPORT_ZSCALE_7 + 0x00000000, // PA_CL_VPORT_ZOFFSET_7 + 0x00000000, // PA_CL_VPORT_XSCALE_8 + 0x00000000, // PA_CL_VPORT_XOFFSET_8 + 0x00000000, // PA_CL_VPORT_YSCALE_8 + 0x00000000, // PA_CL_VPORT_YOFFSET_8 + 0x00000000, // PA_CL_VPORT_ZSCALE_8 + 0x00000000, // PA_CL_VPORT_ZOFFSET_8 + 0x00000000, // PA_CL_VPORT_XSCALE_9 + 0x00000000, // PA_CL_VPORT_XOFFSET_9 + 0x00000000, // PA_CL_VPORT_YSCALE_9 + 0x00000000, // PA_CL_VPORT_YOFFSET_9 + 0x00000000, // PA_CL_VPORT_ZSCALE_9 + 0x00000000, // PA_CL_VPORT_ZOFFSET_9 + 0x00000000, // PA_CL_VPORT_XSCALE_10 + 0x00000000, // PA_CL_VPORT_XOFFSET_10 + 0x00000000, // PA_CL_VPORT_YSCALE_10 + 0x00000000, // PA_CL_VPORT_YOFFSET_10 + 0x00000000, // PA_CL_VPORT_ZSCALE_10 + 0x00000000, // PA_CL_VPORT_ZOFFSET_10 + 0x00000000, // PA_CL_VPORT_XSCALE_11 + 0x00000000, // PA_CL_VPORT_XOFFSET_11 + 0x00000000, // PA_CL_VPORT_YSCALE_11 + 0x00000000, // PA_CL_VPORT_YOFFSET_11 + 0x00000000, // PA_CL_VPORT_ZSCALE_11 + 0x00000000, // PA_CL_VPORT_ZOFFSET_11 + 0x00000000, // PA_CL_VPORT_XSCALE_12 + 0x00000000, // PA_CL_VPORT_XOFFSET_12 + 0x00000000, // PA_CL_VPORT_YSCALE_12 + 0x00000000, // PA_CL_VPORT_YOFFSET_12 + 0x00000000, // PA_CL_VPORT_ZSCALE_12 + 0x00000000, // PA_CL_VPORT_ZOFFSET_12 + 0x00000000, // PA_CL_VPORT_XSCALE_13 + 0x00000000, // PA_CL_VPORT_XOFFSET_13 + 0x00000000, // PA_CL_VPORT_YSCALE_13 + 0x00000000, // PA_CL_VPORT_YOFFSET_13 + 0x00000000, // PA_CL_VPORT_ZSCALE_13 + 0x00000000, // PA_CL_VPORT_ZOFFSET_13 + 0x00000000, // PA_CL_VPORT_XSCALE_14 + 0x00000000, // PA_CL_VPORT_XOFFSET_14 + 0x00000000, // PA_CL_VPORT_YSCALE_14 + 0x00000000, // PA_CL_VPORT_YOFFSET_14 + 0x00000000, // PA_CL_VPORT_ZSCALE_14 + 0x00000000, // PA_CL_VPORT_ZOFFSET_14 + 0x00000000, // PA_CL_VPORT_XSCALE_15 + 0x00000000, // PA_CL_VPORT_XOFFSET_15 + 0x00000000, // PA_CL_VPORT_YSCALE_15 + 0x00000000, // PA_CL_VPORT_YOFFSET_15 + 0x00000000, // PA_CL_VPORT_ZSCALE_15 + 0x00000000, // PA_CL_VPORT_ZOFFSET_15 + 0x00000000, // PA_CL_UCP_0_X + 0x00000000, // PA_CL_UCP_0_Y + 0x00000000, // PA_CL_UCP_0_Z + 0x00000000, // PA_CL_UCP_0_W + 0x00000000, // PA_CL_UCP_1_X + 0x00000000, // PA_CL_UCP_1_Y + 0x00000000, // PA_CL_UCP_1_Z + 0x00000000, // PA_CL_UCP_1_W + 0x00000000, // PA_CL_UCP_2_X + 0x00000000, // PA_CL_UCP_2_Y + 0x00000000, // PA_CL_UCP_2_Z + 0x00000000, // PA_CL_UCP_2_W + 0x00000000, // PA_CL_UCP_3_X + 0x00000000, // PA_CL_UCP_3_Y + 0x00000000, // PA_CL_UCP_3_Z + 0x00000000, // PA_CL_UCP_3_W + 0x00000000, // PA_CL_UCP_4_X + 0x00000000, // PA_CL_UCP_4_Y + 0x00000000, // PA_CL_UCP_4_Z + 0x00000000, // PA_CL_UCP_4_W + 0x00000000, // PA_CL_UCP_5_X + 0x00000000, // PA_CL_UCP_5_Y + 0x00000000, // PA_CL_UCP_5_Z + 0x00000000, // PA_CL_UCP_5_W + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0x00000000, // SPI_PS_INPUT_CNTL_0 + 0x00000000, // SPI_PS_INPUT_CNTL_1 + 0x00000000, // SPI_PS_INPUT_CNTL_2 + 0x00000000, // SPI_PS_INPUT_CNTL_3 + 0x00000000, // SPI_PS_INPUT_CNTL_4 + 0x00000000, // SPI_PS_INPUT_CNTL_5 + 0x00000000, // SPI_PS_INPUT_CNTL_6 + 0x00000000, // SPI_PS_INPUT_CNTL_7 + 0x00000000, // SPI_PS_INPUT_CNTL_8 + 0x00000000, // SPI_PS_INPUT_CNTL_9 + 0x00000000, // SPI_PS_INPUT_CNTL_10 + 0x00000000, // SPI_PS_INPUT_CNTL_11 + 0x00000000, // SPI_PS_INPUT_CNTL_12 + 0x00000000, // SPI_PS_INPUT_CNTL_13 + 0x00000000, // SPI_PS_INPUT_CNTL_14 + 0x00000000, // SPI_PS_INPUT_CNTL_15 + 0x00000000, // SPI_PS_INPUT_CNTL_16 + 0x00000000, // SPI_PS_INPUT_CNTL_17 + 0x00000000, // SPI_PS_INPUT_CNTL_18 + 0x00000000, // SPI_PS_INPUT_CNTL_19 + 0x00000000, // SPI_PS_INPUT_CNTL_20 + 0x00000000, // SPI_PS_INPUT_CNTL_21 + 0x00000000, // SPI_PS_INPUT_CNTL_22 + 0x00000000, // SPI_PS_INPUT_CNTL_23 + 0x00000000, // SPI_PS_INPUT_CNTL_24 + 0x00000000, // SPI_PS_INPUT_CNTL_25 + 0x00000000, // SPI_PS_INPUT_CNTL_26 + 0x00000000, // SPI_PS_INPUT_CNTL_27 + 0x00000000, // SPI_PS_INPUT_CNTL_28 + 0x00000000, // SPI_PS_INPUT_CNTL_29 + 0x00000000, // SPI_PS_INPUT_CNTL_30 + 0x00000000, // SPI_PS_INPUT_CNTL_31 + 0x00000000, // SPI_VS_OUT_CONFIG + 0, // HOLE + 0x00000000, // SPI_PS_INPUT_ENA + 0x00000000, // SPI_PS_INPUT_ADDR + 0x00000000, // SPI_INTERP_CONTROL_0 + 0x00000002, // SPI_PS_IN_CONTROL + 0, // HOLE + 0x00000000, // SPI_BARYC_CNTL + 0, // HOLE + 0x00000000, // SPI_TMPRING_SIZE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0x00000000, // SPI_SHADER_POS_FORMAT + 0x00000000, // SPI_SHADER_Z_FORMAT + 0x00000000, // SPI_SHADER_COL_FORMAT + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0x00000000, // CB_BLEND0_CONTROL + 0x00000000, // CB_BLEND1_CONTROL + 0x00000000, // CB_BLEND2_CONTROL + 0x00000000, // CB_BLEND3_CONTROL + 0x00000000, // CB_BLEND4_CONTROL + 0x00000000, // CB_BLEND5_CONTROL + 0x00000000, // CB_BLEND6_CONTROL + 0x00000000, // CB_BLEND7_CONTROL +}; +static const unsigned int ci_SECT_CONTEXT_def_3[] = +{ + 0x00000000, // PA_CL_POINT_X_RAD + 0x00000000, // PA_CL_POINT_Y_RAD + 0x00000000, // PA_CL_POINT_SIZE + 0x00000000, // PA_CL_POINT_CULL_RAD + 0x00000000, // VGT_DMA_BASE_HI + 0x00000000, // VGT_DMA_BASE +}; +static const unsigned int ci_SECT_CONTEXT_def_4[] = +{ + 0x00000000, // DB_DEPTH_CONTROL + 0x00000000, // DB_EQAA + 0x00000000, // CB_COLOR_CONTROL + 0x00000000, // DB_SHADER_CONTROL + 0x00090000, // PA_CL_CLIP_CNTL + 0x00000004, // PA_SU_SC_MODE_CNTL + 0x00000000, // PA_CL_VTE_CNTL + 0x00000000, // PA_CL_VS_OUT_CNTL + 0x00000000, // PA_CL_NANINF_CNTL + 0x00000000, // PA_SU_LINE_STIPPLE_CNTL + 0x00000000, // PA_SU_LINE_STIPPLE_SCALE + 0x00000000, // PA_SU_PRIM_FILTER_CNTL + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0x00000000, // PA_SU_POINT_SIZE + 0x00000000, // PA_SU_POINT_MINMAX + 0x00000000, // PA_SU_LINE_CNTL + 0x00000000, // PA_SC_LINE_STIPPLE + 0x00000000, // VGT_OUTPUT_PATH_CNTL + 0x00000000, // VGT_HOS_CNTL + 0x00000000, // VGT_HOS_MAX_TESS_LEVEL + 0x00000000, // VGT_HOS_MIN_TESS_LEVEL + 0x00000000, // VGT_HOS_REUSE_DEPTH + 0x00000000, // VGT_GROUP_PRIM_TYPE + 0x00000000, // VGT_GROUP_FIRST_DECR + 0x00000000, // VGT_GROUP_DECR + 0x00000000, // VGT_GROUP_VECT_0_CNTL + 0x00000000, // VGT_GROUP_VECT_1_CNTL + 0x00000000, // VGT_GROUP_VECT_0_FMT_CNTL + 0x00000000, // VGT_GROUP_VECT_1_FMT_CNTL + 0x00000000, // VGT_GS_MODE + 0x00000000, // VGT_GS_ONCHIP_CNTL + 0x00000000, // PA_SC_MODE_CNTL_0 + 0x00000000, // PA_SC_MODE_CNTL_1 + 0x00000000, // VGT_ENHANCE + 0x00000100, // VGT_GS_PER_ES + 0x00000080, // VGT_ES_PER_GS + 0x00000002, // VGT_GS_PER_VS + 0x00000000, // VGT_GSVS_RING_OFFSET_1 + 0x00000000, // VGT_GSVS_RING_OFFSET_2 + 0x00000000, // VGT_GSVS_RING_OFFSET_3 + 0x00000000, // VGT_GS_OUT_PRIM_TYPE + 0x00000000, // IA_ENHANCE +}; +static const unsigned int ci_SECT_CONTEXT_def_5[] = +{ + 0x00000000, // WD_ENHANCE + 0x00000000, // VGT_PRIMITIVEID_EN +}; +static const unsigned int ci_SECT_CONTEXT_def_6[] = +{ + 0x00000000, // VGT_PRIMITIVEID_RESET +}; +static const unsigned int ci_SECT_CONTEXT_def_7[] = +{ + 0x00000000, // VGT_MULTI_PRIM_IB_RESET_EN + 0, // HOLE + 0, // HOLE + 0x00000000, // VGT_INSTANCE_STEP_RATE_0 + 0x00000000, // VGT_INSTANCE_STEP_RATE_1 + 0x000000ff, // IA_MULTI_VGT_PARAM + 0x00000000, // VGT_ESGS_RING_ITEMSIZE + 0x00000000, // VGT_GSVS_RING_ITEMSIZE + 0x00000000, // VGT_REUSE_OFF + 0x00000000, // VGT_VTX_CNT_EN + 0x00000000, // DB_HTILE_SURFACE + 0x00000000, // DB_SRESULTS_COMPARE_STATE0 + 0x00000000, // DB_SRESULTS_COMPARE_STATE1 + 0x00000000, // DB_PRELOAD_CONTROL + 0, // HOLE + 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_0 + 0x00000000, // VGT_STRMOUT_VTX_STRIDE_0 + 0, // HOLE + 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_0 + 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_1 + 0x00000000, // VGT_STRMOUT_VTX_STRIDE_1 + 0, // HOLE + 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_1 + 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_2 + 0x00000000, // VGT_STRMOUT_VTX_STRIDE_2 + 0, // HOLE + 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_2 + 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_3 + 0x00000000, // VGT_STRMOUT_VTX_STRIDE_3 + 0, // HOLE + 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_3 + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_OFFSET + 0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE + 0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE + 0, // HOLE + 0x00000000, // VGT_GS_MAX_VERT_OUT + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0x00000000, // VGT_SHADER_STAGES_EN + 0x00000000, // VGT_LS_HS_CONFIG + 0x00000000, // VGT_GS_VERT_ITEMSIZE + 0x00000000, // VGT_GS_VERT_ITEMSIZE_1 + 0x00000000, // VGT_GS_VERT_ITEMSIZE_2 + 0x00000000, // VGT_GS_VERT_ITEMSIZE_3 + 0x00000000, // VGT_TF_PARAM + 0x00000000, // DB_ALPHA_TO_MASK + 0, // HOLE + 0x00000000, // PA_SU_POLY_OFFSET_DB_FMT_CNTL + 0x00000000, // PA_SU_POLY_OFFSET_CLAMP + 0x00000000, // PA_SU_POLY_OFFSET_FRONT_SCALE + 0x00000000, // PA_SU_POLY_OFFSET_FRONT_OFFSET + 0x00000000, // PA_SU_POLY_OFFSET_BACK_SCALE + 0x00000000, // PA_SU_POLY_OFFSET_BACK_OFFSET + 0x00000000, // VGT_GS_INSTANCE_CNT + 0x00000000, // VGT_STRMOUT_CONFIG + 0x00000000, // VGT_STRMOUT_BUFFER_CONFIG + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0x00000000, // PA_SC_CENTROID_PRIORITY_0 + 0x00000000, // PA_SC_CENTROID_PRIORITY_1 + 0x00001000, // PA_SC_LINE_CNTL + 0x00000000, // PA_SC_AA_CONFIG + 0x00000005, // PA_SU_VTX_CNTL + 0x3f800000, // PA_CL_GB_VERT_CLIP_ADJ + 0x3f800000, // PA_CL_GB_VERT_DISC_ADJ + 0x3f800000, // PA_CL_GB_HORZ_CLIP_ADJ + 0x3f800000, // PA_CL_GB_HORZ_DISC_ADJ + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3 + 0xffffffff, // PA_SC_AA_MASK_X0Y0_X1Y0 + 0xffffffff, // PA_SC_AA_MASK_X0Y1_X1Y1 + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0x0000000e, // VGT_VERTEX_REUSE_BLOCK_CNTL + 0x00000010, // VGT_OUT_DEALLOC_CNTL + 0x00000000, // CB_COLOR0_BASE + 0x00000000, // CB_COLOR0_PITCH + 0x00000000, // CB_COLOR0_SLICE + 0x00000000, // CB_COLOR0_VIEW + 0x00000000, // CB_COLOR0_INFO + 0x00000000, // CB_COLOR0_ATTRIB + 0, // HOLE + 0x00000000, // CB_COLOR0_CMASK + 0x00000000, // CB_COLOR0_CMASK_SLICE + 0x00000000, // CB_COLOR0_FMASK + 0x00000000, // CB_COLOR0_FMASK_SLICE + 0x00000000, // CB_COLOR0_CLEAR_WORD0 + 0x00000000, // CB_COLOR0_CLEAR_WORD1 + 0, // HOLE + 0, // HOLE + 0x00000000, // CB_COLOR1_BASE + 0x00000000, // CB_COLOR1_PITCH + 0x00000000, // CB_COLOR1_SLICE + 0x00000000, // CB_COLOR1_VIEW + 0x00000000, // CB_COLOR1_INFO + 0x00000000, // CB_COLOR1_ATTRIB + 0, // HOLE + 0x00000000, // CB_COLOR1_CMASK + 0x00000000, // CB_COLOR1_CMASK_SLICE + 0x00000000, // CB_COLOR1_FMASK + 0x00000000, // CB_COLOR1_FMASK_SLICE + 0x00000000, // CB_COLOR1_CLEAR_WORD0 + 0x00000000, // CB_COLOR1_CLEAR_WORD1 + 0, // HOLE + 0, // HOLE + 0x00000000, // CB_COLOR2_BASE + 0x00000000, // CB_COLOR2_PITCH + 0x00000000, // CB_COLOR2_SLICE + 0x00000000, // CB_COLOR2_VIEW + 0x00000000, // CB_COLOR2_INFO + 0x00000000, // CB_COLOR2_ATTRIB + 0, // HOLE + 0x00000000, // CB_COLOR2_CMASK + 0x00000000, // CB_COLOR2_CMASK_SLICE + 0x00000000, // CB_COLOR2_FMASK + 0x00000000, // CB_COLOR2_FMASK_SLICE + 0x00000000, // CB_COLOR2_CLEAR_WORD0 + 0x00000000, // CB_COLOR2_CLEAR_WORD1 + 0, // HOLE + 0, // HOLE + 0x00000000, // CB_COLOR3_BASE + 0x00000000, // CB_COLOR3_PITCH + 0x00000000, // CB_COLOR3_SLICE + 0x00000000, // CB_COLOR3_VIEW + 0x00000000, // CB_COLOR3_INFO + 0x00000000, // CB_COLOR3_ATTRIB + 0, // HOLE + 0x00000000, // CB_COLOR3_CMASK + 0x00000000, // CB_COLOR3_CMASK_SLICE + 0x00000000, // CB_COLOR3_FMASK + 0x00000000, // CB_COLOR3_FMASK_SLICE + 0x00000000, // CB_COLOR3_CLEAR_WORD0 + 0x00000000, // CB_COLOR3_CLEAR_WORD1 + 0, // HOLE + 0, // HOLE + 0x00000000, // CB_COLOR4_BASE + 0x00000000, // CB_COLOR4_PITCH + 0x00000000, // CB_COLOR4_SLICE + 0x00000000, // CB_COLOR4_VIEW + 0x00000000, // CB_COLOR4_INFO + 0x00000000, // CB_COLOR4_ATTRIB + 0, // HOLE + 0x00000000, // CB_COLOR4_CMASK + 0x00000000, // CB_COLOR4_CMASK_SLICE + 0x00000000, // CB_COLOR4_FMASK + 0x00000000, // CB_COLOR4_FMASK_SLICE + 0x00000000, // CB_COLOR4_CLEAR_WORD0 + 0x00000000, // CB_COLOR4_CLEAR_WORD1 + 0, // HOLE + 0, // HOLE + 0x00000000, // CB_COLOR5_BASE + 0x00000000, // CB_COLOR5_PITCH + 0x00000000, // CB_COLOR5_SLICE + 0x00000000, // CB_COLOR5_VIEW + 0x00000000, // CB_COLOR5_INFO + 0x00000000, // CB_COLOR5_ATTRIB + 0, // HOLE + 0x00000000, // CB_COLOR5_CMASK + 0x00000000, // CB_COLOR5_CMASK_SLICE + 0x00000000, // CB_COLOR5_FMASK + 0x00000000, // CB_COLOR5_FMASK_SLICE + 0x00000000, // CB_COLOR5_CLEAR_WORD0 + 0x00000000, // CB_COLOR5_CLEAR_WORD1 + 0, // HOLE + 0, // HOLE + 0x00000000, // CB_COLOR6_BASE + 0x00000000, // CB_COLOR6_PITCH + 0x00000000, // CB_COLOR6_SLICE + 0x00000000, // CB_COLOR6_VIEW + 0x00000000, // CB_COLOR6_INFO + 0x00000000, // CB_COLOR6_ATTRIB + 0, // HOLE + 0x00000000, // CB_COLOR6_CMASK + 0x00000000, // CB_COLOR6_CMASK_SLICE + 0x00000000, // CB_COLOR6_FMASK + 0x00000000, // CB_COLOR6_FMASK_SLICE + 0x00000000, // CB_COLOR6_CLEAR_WORD0 + 0x00000000, // CB_COLOR6_CLEAR_WORD1 + 0, // HOLE + 0, // HOLE + 0x00000000, // CB_COLOR7_BASE + 0x00000000, // CB_COLOR7_PITCH + 0x00000000, // CB_COLOR7_SLICE + 0x00000000, // CB_COLOR7_VIEW + 0x00000000, // CB_COLOR7_INFO + 0x00000000, // CB_COLOR7_ATTRIB + 0, // HOLE + 0x00000000, // CB_COLOR7_CMASK + 0x00000000, // CB_COLOR7_CMASK_SLICE + 0x00000000, // CB_COLOR7_FMASK + 0x00000000, // CB_COLOR7_FMASK_SLICE + 0x00000000, // CB_COLOR7_CLEAR_WORD0 + 0x00000000, // CB_COLOR7_CLEAR_WORD1 +}; +static const struct cs_extent_def ci_SECT_CONTEXT_defs[] = +{ + {ci_SECT_CONTEXT_def_1, 0x0000a000, 212 }, + {ci_SECT_CONTEXT_def_2, 0x0000a0d6, 274 }, + {ci_SECT_CONTEXT_def_3, 0x0000a1f5, 6 }, + {ci_SECT_CONTEXT_def_4, 0x0000a200, 157 }, + {ci_SECT_CONTEXT_def_5, 0x0000a2a0, 2 }, + {ci_SECT_CONTEXT_def_6, 0x0000a2a3, 1 }, + {ci_SECT_CONTEXT_def_7, 0x0000a2a5, 233 }, + { 0, 0, 0 } +}; +static const struct cs_section_def ci_cs_data[] = { + { ci_SECT_CONTEXT_defs, SECT_CONTEXT }, + { 0, SECT_NONE } +}; diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 6fc876a444d4..2ce12ee3e67f 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -140,6 +140,7 @@ extern void cayman_cp_int_cntl_setup(struct radeon_device *rdev, int ring, u32 cp_int_cntl); extern void cayman_vm_decode_fault(struct radeon_device *rdev, u32 status, u32 addr); +void cik_init_cp_pg_table(struct radeon_device *rdev); static const u32 evergreen_golden_registers[] = { @@ -3893,8 +3894,22 @@ void sumo_rlc_fini(struct radeon_device *rdev) radeon_bo_unref(&rdev->rlc.clear_state_obj); rdev->rlc.clear_state_obj = NULL; } + + /* clear state block */ + if (rdev->rlc.cp_table_obj) { + r = radeon_bo_reserve(rdev->rlc.cp_table_obj, false); + if (unlikely(r != 0)) + dev_warn(rdev->dev, "(%d) reserve RLC cp table bo failed\n", r); + radeon_bo_unpin(rdev->rlc.cp_table_obj); + radeon_bo_unreserve(rdev->rlc.cp_table_obj); + + radeon_bo_unref(&rdev->rlc.cp_table_obj); + rdev->rlc.cp_table_obj = NULL; + } } +#define CP_ME_TABLE_SIZE 96 + int sumo_rlc_init(struct radeon_device *rdev) { const u32 *src_ptr; @@ -3980,9 +3995,10 @@ int sumo_rlc_init(struct radeon_device *rdev) } reg_list_blk_index = (3 * reg_list_num + 2); dws += reg_list_blk_index; + rdev->rlc.clear_state_size = dws; if (rdev->rlc.clear_state_obj == NULL) { - r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true, + r = radeon_bo_create(rdev, rdev->rlc.clear_state_size * 4, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->rlc.clear_state_obj); if (r) { dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r); @@ -4046,6 +4062,45 @@ int sumo_rlc_init(struct radeon_device *rdev) radeon_bo_unreserve(rdev->rlc.clear_state_obj); } + if (rdev->rlc.cp_table_size) { + if (rdev->rlc.cp_table_obj == NULL) { + r = radeon_bo_create(rdev, rdev->rlc.cp_table_size, PAGE_SIZE, true, + RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->rlc.cp_table_obj); + if (r) { + dev_warn(rdev->dev, "(%d) create RLC cp table bo failed\n", r); + sumo_rlc_fini(rdev); + return r; + } + } + + r = radeon_bo_reserve(rdev->rlc.cp_table_obj, false); + if (unlikely(r != 0)) { + dev_warn(rdev->dev, "(%d) reserve RLC cp table bo failed\n", r); + sumo_rlc_fini(rdev); + return r; + } + r = radeon_bo_pin(rdev->rlc.cp_table_obj, RADEON_GEM_DOMAIN_VRAM, + &rdev->rlc.cp_table_gpu_addr); + if (r) { + radeon_bo_unreserve(rdev->rlc.cp_table_obj); + dev_warn(rdev->dev, "(%d) pin RLC cp_table bo failed\n", r); + sumo_rlc_fini(rdev); + return r; + } + r = radeon_bo_kmap(rdev->rlc.cp_table_obj, (void **)&rdev->rlc.cp_table_ptr); + if (r) { + dev_warn(rdev->dev, "(%d) map RLC cp table bo failed\n", r); + sumo_rlc_fini(rdev); + return r; + } + + cik_init_cp_pg_table(rdev); + + radeon_bo_kunmap(rdev->rlc.cp_table_obj); + radeon_bo_unreserve(rdev->rlc.cp_table_obj); + + } + return 0; } diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index b9706e83e827..5941ada063d1 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -152,6 +152,14 @@ extern int radeon_aspm; #define RADEON_RESET_MC (1 << 10) #define RADEON_RESET_DISPLAY (1 << 11) +/* CG block flags */ +#define RADEON_CG_BLOCK_GFX (1 << 0) +#define RADEON_CG_BLOCK_MC (1 << 1) +#define RADEON_CG_BLOCK_SDMA (1 << 2) +#define RADEON_CG_BLOCK_UVD (1 << 3) +#define RADEON_CG_BLOCK_VCE (1 << 4) +#define RADEON_CG_BLOCK_HDP (1 << 5) + /* max cursor sizes (in pixels) */ #define CURSOR_WIDTH 64 #define CURSOR_HEIGHT 64 @@ -861,6 +869,12 @@ struct radeon_rlc { uint64_t clear_state_gpu_addr; volatile uint32_t *cs_ptr; const struct cs_section_def *cs_data; + u32 clear_state_size; + /* for cp tables */ + struct radeon_bo *cp_table_obj; + uint64_t cp_table_gpu_addr; + volatile uint32_t *cp_table_ptr; + u32 cp_table_size; }; int radeon_ib_get(struct radeon_device *rdev, int ring, diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 1926ec06a638..880551b6df61 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -2780,6 +2780,7 @@ int radeon_asic_init(struct radeon_device *rdev) case CHIP_BONAIRE: rdev->asic = &ci_asic; rdev->num_crtc = 6; + rdev->has_uvd = true; break; case CHIP_KAVERI: case CHIP_KABINI: @@ -2789,6 +2790,7 @@ int radeon_asic_init(struct radeon_device *rdev) rdev->num_crtc = 4; else rdev->num_crtc = 2; + rdev->has_uvd = true; break; default: /* FIXME: not supported yet */ diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 8b8963d4a732..4f91e1f4d814 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -4901,7 +4901,7 @@ static void si_set_uvd_dcm(struct radeon_device *rdev, WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2); } -static void si_init_uvd_internal_cg(struct radeon_device *rdev) +void si_init_uvd_internal_cg(struct radeon_device *rdev) { bool hw_mode = true; -- cgit v1.2.3 From a412fce0548105f14e48d25094d98fc87f7c0df4 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 22 Apr 2013 20:23:31 -0400 Subject: drm/radeon/cik: add rlc helpers for DPM Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/cik.c | 29 +++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/cikd.h | 9 +++++++++ 2 files changed, 38 insertions(+) (limited to 'drivers/gpu/drm/radeon/cik.c') diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index a36e98c9a875..727c296662f1 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -5587,6 +5587,35 @@ static u32 cik_halt_rlc(struct radeon_device *rdev) return orig; } +void cik_enter_rlc_safe_mode(struct radeon_device *rdev) +{ + u32 tmp, i, mask; + + tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE); + WREG32(RLC_GPR_REG2, tmp); + + mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS; + for (i = 0; i < rdev->usec_timeout; i++) { + if ((RREG32(RLC_GPM_STAT) & mask) == mask) + break; + udelay(1); + } + + for (i = 0; i < rdev->usec_timeout; i++) { + if ((RREG32(RLC_GPR_REG2) & REQ) == 0) + break; + udelay(1); + } +} + +void cik_exit_rlc_safe_mode(struct radeon_device *rdev) +{ + u32 tmp; + + tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE); + WREG32(RLC_GPR_REG2, tmp); +} + /** * cik_rlc_stop - stop the RLC ME * diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h index 63955abb1e11..116b3131a683 100644 --- a/drivers/gpu/drm/radeon/cikd.h +++ b/drivers/gpu/drm/radeon/cikd.h @@ -952,6 +952,8 @@ #define RLC_GPM_STAT 0xC400 # define RLC_GPM_BUSY (1 << 0) +# define GFX_POWER_STATUS (1 << 1) +# define GFX_CLOCK_STATUS (1 << 2) #define RLC_PG_CNTL 0xC40C # define GFX_PG_ENABLE (1 << 0) @@ -1004,6 +1006,13 @@ #define RLC_GPM_SCRATCH_ADDR 0xC4B0 #define RLC_GPM_SCRATCH_DATA 0xC4B4 +#define RLC_GPR_REG2 0xC4E8 +#define REQ 0x00000001 +#define MESSAGE(x) ((x) << 1) +#define MESSAGE_MASK 0x0000001e +#define MSG_ENTER_RLC_SAFE_MODE 1 +#define MSG_EXIT_RLC_SAFE_MODE 0 + #define CP_HPD_EOP_BASE_ADDR 0xC904 #define CP_HPD_EOP_BASE_ADDR_HI 0xC908 #define CP_HPD_EOP_VMID 0xC90C -- cgit v1.2.3 From 286d9cc67a87863ba510b22d3f32cbeed9864b85 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 21 Jun 2013 15:50:47 -0400 Subject: drm/radeon: add get_temperature() callbacks for CIK (v2) This added support for the on-chip thermal sensors on CIK asics. v2: fix register offset. Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/cik.c | 37 ++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/cikd.h | 8 ++++++++ drivers/gpu/drm/radeon/radeon_asic.c | 2 ++ drivers/gpu/drm/radeon/radeon_asic.h | 2 ++ drivers/gpu/drm/radeon/radeon_pm.c | 2 ++ 5 files changed, 51 insertions(+) (limited to 'drivers/gpu/drm/radeon/cik.c') diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 727c296662f1..d0804f79efed 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -69,6 +69,43 @@ static void cik_program_aspm(struct radeon_device *rdev); static void cik_init_pg(struct radeon_device *rdev); static void cik_init_cg(struct radeon_device *rdev); +/* get temperature in millidegrees */ +int ci_get_temp(struct radeon_device *rdev) +{ + u32 temp; + int actual_temp = 0; + + temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >> + CTF_TEMP_SHIFT; + + if (temp & 0x200) + actual_temp = 255; + else + actual_temp = temp & 0x1ff; + + actual_temp = actual_temp * 1000; + + return actual_temp; +} + +/* get temperature in millidegrees */ +int kv_get_temp(struct radeon_device *rdev) +{ + u32 temp; + int actual_temp = 0; + + temp = RREG32_SMC(0xC0300E0C); + + if (temp) + actual_temp = (temp / 8) - 49; + else + actual_temp = 0; + + actual_temp = actual_temp * 1000; + + return actual_temp; +} + /* * Indirect registers accessor */ diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h index 116b3131a683..65886caaf756 100644 --- a/drivers/gpu/drm/radeon/cikd.h +++ b/drivers/gpu/drm/radeon/cikd.h @@ -32,6 +32,14 @@ #define GENERAL_PWRMGT 0xC0200000 # define GPU_COUNTER_CLK (1 << 15) +#define CG_MULT_THERMAL_STATUS 0xC0300014 +#define ASIC_MAX_TEMP(x) ((x) << 0) +#define ASIC_MAX_TEMP_MASK 0x000001ff +#define ASIC_MAX_TEMP_SHIFT 0 +#define CTF_TEMP(x) ((x) << 9) +#define CTF_TEMP_MASK 0x0003fe00 +#define CTF_TEMP_SHIFT 9 + #define MPLL_BYPASSCLK_SEL 0xC050019C # define MPLL_CLKOUT_SEL(x) ((x) << 8) # define MPLL_CLKOUT_SEL_MASK 0xFF00 diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 880551b6df61..3a55540fe280 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -2452,6 +2452,7 @@ static struct radeon_asic ci_asic = { .set_pcie_lanes = NULL, .set_clock_gating = NULL, .set_uvd_clocks = &cik_set_uvd_clocks, + .get_temperature = &ci_get_temp, }, .pflip = { .pre_page_flip = &evergreen_pre_page_flip, @@ -2607,6 +2608,7 @@ static struct radeon_asic kv_asic = { .set_pcie_lanes = NULL, .set_clock_gating = NULL, .set_uvd_clocks = &cik_set_uvd_clocks, + .get_temperature = &kv_get_temp, }, .pflip = { .pre_page_flip = &evergreen_pre_page_flip, diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 3cf7d89c1bd8..d5c6c5b10edf 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -747,5 +747,7 @@ u32 cik_compute_ring_get_wptr(struct radeon_device *rdev, struct radeon_ring *ring); void cik_compute_ring_set_wptr(struct radeon_device *rdev, struct radeon_ring *ring); +int ci_get_temp(struct radeon_device *rdev); +int kv_get_temp(struct radeon_device *rdev); #endif diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index 79a03de4ac0a..1408014dce8f 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -569,6 +569,8 @@ static int radeon_hwmon_init(struct radeon_device *rdev) case THERMAL_TYPE_NI: case THERMAL_TYPE_SUMO: case THERMAL_TYPE_SI: + case THERMAL_TYPE_CI: + case THERMAL_TYPE_KV: if (rdev->asic->pm.get_temperature == NULL) return err; rdev->pm.int_hwmon_dev = hwmon_device_register(rdev->dev); -- cgit v1.2.3 From 41a524abff2630dce0f9c38eb7340fbf2dc5bf27 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 14 Aug 2013 01:01:40 -0400 Subject: drm/radeon/kms: add dpm support for KB/KV This adds dpm support for KB/KV asics. This includes: - dynamic engine clock scaling - dynamic voltage scaling - power containment - shader power scaling Set radeon.dpm=1 to enable. Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/Makefile | 2 +- drivers/gpu/drm/radeon/cik.c | 30 +- drivers/gpu/drm/radeon/cikd.h | 72 + drivers/gpu/drm/radeon/kv_dpm.c | 2536 ++++++++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/kv_dpm.h | 199 +++ drivers/gpu/drm/radeon/kv_smc.c | 207 +++ drivers/gpu/drm/radeon/ppsmc.h | 34 + drivers/gpu/drm/radeon/radeon_asic.c | 14 + drivers/gpu/drm/radeon/radeon_asic.h | 14 + drivers/gpu/drm/radeon/radeon_pm.c | 2 + drivers/gpu/drm/radeon/smu7.h | 170 +++ drivers/gpu/drm/radeon/smu7_fusion.h | 300 ++++ 12 files changed, 3576 insertions(+), 4 deletions(-) create mode 100644 drivers/gpu/drm/radeon/kv_dpm.c create mode 100644 drivers/gpu/drm/radeon/kv_dpm.h create mode 100644 drivers/gpu/drm/radeon/kv_smc.c create mode 100644 drivers/gpu/drm/radeon/smu7.h create mode 100644 drivers/gpu/drm/radeon/smu7_fusion.h (limited to 'drivers/gpu/drm/radeon/cik.c') diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile index bfabd69b4e39..d3265b5d4661 100644 --- a/drivers/gpu/drm/radeon/Makefile +++ b/drivers/gpu/drm/radeon/Makefile @@ -79,7 +79,7 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \ si_blit_shaders.o radeon_prime.o radeon_uvd.o cik.o cik_blit_shaders.o \ r600_dpm.o rs780_dpm.o rv6xx_dpm.o rv770_dpm.o rv730_dpm.o rv740_dpm.o \ rv770_smc.o cypress_dpm.o btc_dpm.o sumo_dpm.o sumo_smc.o trinity_dpm.o \ - trinity_smc.o ni_dpm.o si_smc.o si_dpm.o + trinity_smc.o ni_dpm.o si_smc.o si_dpm.o kv_smc.o kv_dpm.o radeon-$(CONFIG_COMPAT) += radeon_ioc32.o radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index d0804f79efed..87e5aeed6e88 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -6593,6 +6593,7 @@ int cik_irq_set(struct radeon_device *rdev) u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6; u32 grbm_int_cntl = 0; u32 dma_cntl, dma_cntl1; + u32 thermal_int; if (!rdev->irq.installed) { WARN(1, "Can't enable IRQ/MSI because no handler is installed\n"); @@ -6625,6 +6626,9 @@ int cik_irq_set(struct radeon_device *rdev) cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; + thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) & + ~(THERM_INTH_MASK | THERM_INTL_MASK); + /* enable CP interrupts on all rings */ if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { DRM_DEBUG("cik_irq_set: sw int gfx\n"); @@ -6782,6 +6786,11 @@ int cik_irq_set(struct radeon_device *rdev) hpd6 |= DC_HPDx_INT_EN; } + if (rdev->irq.dpm_thermal) { + DRM_DEBUG("dpm thermal\n"); + thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK; + } + WREG32(CP_INT_CNTL_RING0, cp_int_cntl); WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl); @@ -6816,6 +6825,8 @@ int cik_irq_set(struct radeon_device *rdev) WREG32(DC_HPD5_INT_CONTROL, hpd5); WREG32(DC_HPD6_INT_CONTROL, hpd6); + WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int); + return 0; } @@ -7027,6 +7038,7 @@ int cik_irq_process(struct radeon_device *rdev) bool queue_hotplug = false; bool queue_reset = false; u32 addr, status, mc_client; + bool queue_thermal = false; if (!rdev->ih.enabled || rdev->shutdown) return IRQ_NONE; @@ -7377,6 +7389,19 @@ restart_ih: break; } break; + case 230: /* thermal low to high */ + DRM_DEBUG("IH: thermal low to high\n"); + rdev->pm.dpm.thermal.high_to_low = false; + queue_thermal = true; + break; + case 231: /* thermal high to low */ + DRM_DEBUG("IH: thermal high to low\n"); + rdev->pm.dpm.thermal.high_to_low = true; + queue_thermal = true; + break; + case 233: /* GUI IDLE */ + DRM_DEBUG("IH: GUI idle\n"); + break; case 241: /* SDMA Privileged inst */ case 247: /* SDMA Privileged inst */ DRM_ERROR("Illegal instruction in SDMA command stream\n"); @@ -7416,9 +7441,6 @@ restart_ih: break; } break; - case 233: /* GUI IDLE */ - DRM_DEBUG("IH: GUI idle\n"); - break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); break; @@ -7432,6 +7454,8 @@ restart_ih: schedule_work(&rdev->hotplug_work); if (queue_reset) schedule_work(&rdev->reset_work); + if (queue_thermal) + schedule_work(&rdev->pm.dpm.thermal.work); rdev->ih.rptr = rptr; WREG32(IH_RB_RPTR, rdev->ih.rptr); atomic_set(&rdev->ih.lock, 0); diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h index 65886caaf756..179ca3625ae4 100644 --- a/drivers/gpu/drm/radeon/cikd.h +++ b/drivers/gpu/drm/radeon/cikd.h @@ -28,10 +28,59 @@ #define CIK_RB_BITMAP_WIDTH_PER_SH 2 +/* DIDT IND registers */ +#define DIDT_SQ_CTRL0 0x0 +# define DIDT_CTRL_EN (1 << 0) +#define DIDT_DB_CTRL0 0x20 +#define DIDT_TD_CTRL0 0x40 +#define DIDT_TCP_CTRL0 0x60 + /* SMC IND registers */ +#define NB_DPM_CONFIG_1 0x3F9E8 +# define Dpm0PgNbPsLo(x) ((x) << 0) +# define Dpm0PgNbPsLo_MASK 0x000000ff +# define Dpm0PgNbPsLo_SHIFT 0 +# define Dpm0PgNbPsHi(x) ((x) << 8) +# define Dpm0PgNbPsHi_MASK 0x0000ff00 +# define Dpm0PgNbPsHi_SHIFT 8 +# define DpmXNbPsLo(x) ((x) << 16) +# define DpmXNbPsLo_MASK 0x00ff0000 +# define DpmXNbPsLo_SHIFT 16 +# define DpmXNbPsHi(x) ((x) << 24) +# define DpmXNbPsHi_MASK 0xff000000 +# define DpmXNbPsHi_SHIFT 24 + +#define SMC_SYSCON_MSG_ARG_0 0x80000068 + #define GENERAL_PWRMGT 0xC0200000 +# define GLOBAL_PWRMGT_EN (1 << 0) # define GPU_COUNTER_CLK (1 << 15) +#define SCLK_PWRMGT_CNTL 0xC0200008 +# define RESET_BUSY_CNT (1 << 4) +# define RESET_SCLK_CNT (1 << 5) +# define DYNAMIC_PM_EN (1 << 21) + +#define CG_FTV_0 0xC02001A8 + +#define LCAC_SX0_OVR_SEL 0xC0400D04 +#define LCAC_SX0_OVR_VAL 0xC0400D08 + +#define LCAC_MC0_OVR_SEL 0xC0400D34 +#define LCAC_MC0_OVR_VAL 0xC0400D38 + +#define LCAC_MC1_OVR_SEL 0xC0400D40 +#define LCAC_MC1_OVR_VAL 0xC0400D44 + +#define LCAC_MC2_OVR_SEL 0xC0400D4C +#define LCAC_MC2_OVR_VAL 0xC0400D50 + +#define LCAC_MC3_OVR_SEL 0xC0400D58 +#define LCAC_MC3_OVR_VAL 0xC0400D5C + +#define LCAC_CPL_OVR_SEL 0xC0400D84 +#define LCAC_CPL_OVR_VAL 0xC0400D88 + #define CG_MULT_THERMAL_STATUS 0xC0300014 #define ASIC_MAX_TEMP(x) ((x) << 0) #define ASIC_MAX_TEMP_MASK 0x000001ff @@ -60,6 +109,16 @@ # define ZCLK_SEL(x) ((x) << 8) # define ZCLK_SEL_MASK 0xFF00 +#define CG_THERMAL_INT_CTRL 0xC2100028 +#define DIG_THERM_INTH(x) ((x) << 0) +#define DIG_THERM_INTH_MASK 0x000000FF +#define DIG_THERM_INTH_SHIFT 0 +#define DIG_THERM_INTL(x) ((x) << 8) +#define DIG_THERM_INTL_MASK 0x0000FF00 +#define DIG_THERM_INTL_SHIFT 8 +#define THERM_INTH_MASK (1 << 24) +#define THERM_INTL_MASK (1 << 25) + /* PCIE registers idx/data 0x38/0x3c */ #define PB0_PIF_PWRDOWN_0 0x1100012 /* PCIE */ # define PLL_POWER_STATE_IN_TXS2_0(x) ((x) << 7) @@ -173,6 +232,19 @@ #define PCIE_INDEX 0x38 #define PCIE_DATA 0x3C +#define SMC_IND_INDEX_0 0x200 +#define SMC_IND_DATA_0 0x204 + +#define SMC_IND_ACCESS_CNTL 0x240 +#define AUTO_INCREMENT_IND_0 (1 << 0) + +#define SMC_MESSAGE_0 0x250 +#define SMC_MSG_MASK 0xffff +#define SMC_RESP_0 0x254 +#define SMC_RESP_MASK 0xffff + +#define SMC_MSG_ARG_0 0x290 + #define VGA_HDP_CONTROL 0x328 #define VGA_MEMORY_DISABLE (1 << 4) diff --git a/drivers/gpu/drm/radeon/kv_dpm.c b/drivers/gpu/drm/radeon/kv_dpm.c new file mode 100644 index 000000000000..2e4016356dab --- /dev/null +++ b/drivers/gpu/drm/radeon/kv_dpm.c @@ -0,0 +1,2536 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "drmP.h" +#include "radeon.h" +#include "cikd.h" +#include "r600_dpm.h" +#include "kv_dpm.h" + +#define KV_MAX_DEEPSLEEP_DIVIDER_ID 5 +#define KV_MINIMUM_ENGINE_CLOCK 800 +#define SMC_RAM_END 0x40000 + +static void kv_init_graphics_levels(struct radeon_device *rdev); +static int kv_calculate_ds_divider(struct radeon_device *rdev); +static int kv_calculate_nbps_level_settings(struct radeon_device *rdev); +static int kv_calculate_dpm_settings(struct radeon_device *rdev); +static void kv_enable_new_levels(struct radeon_device *rdev); +static void kv_program_nbps_index_settings(struct radeon_device *rdev, + struct radeon_ps *new_rps); +static int kv_set_enabled_levels(struct radeon_device *rdev); +static int kv_force_dpm_lowest(struct radeon_device *rdev); +static void kv_apply_state_adjust_rules(struct radeon_device *rdev, + struct radeon_ps *new_rps, + struct radeon_ps *old_rps); +static int kv_set_thermal_temperature_range(struct radeon_device *rdev, + int min_temp, int max_temp); +static int kv_init_fps_limits(struct radeon_device *rdev); + +static void kv_dpm_powergate_uvd(struct radeon_device *rdev, bool gate); +static void kv_dpm_powergate_vce(struct radeon_device *rdev, bool gate); +static void kv_dpm_powergate_samu(struct radeon_device *rdev, bool gate); +static void kv_dpm_powergate_acp(struct radeon_device *rdev, bool gate); + +extern void cik_enter_rlc_safe_mode(struct radeon_device *rdev); +extern void cik_exit_rlc_safe_mode(struct radeon_device *rdev); +extern void cik_update_cg(struct radeon_device *rdev, + u32 block, bool enable); + +static const struct kv_lcac_config_values sx_local_cac_cfg_kv[] = +{ + { 0, 4, 1 }, + { 1, 4, 1 }, + { 2, 5, 1 }, + { 3, 4, 2 }, + { 4, 1, 1 }, + { 5, 5, 2 }, + { 6, 6, 1 }, + { 7, 9, 2 }, + { 0xffffffff } +}; + +static const struct kv_lcac_config_values mc0_local_cac_cfg_kv[] = +{ + { 0, 4, 1 }, + { 0xffffffff } +}; + +static const struct kv_lcac_config_values mc1_local_cac_cfg_kv[] = +{ + { 0, 4, 1 }, + { 0xffffffff } +}; + +static const struct kv_lcac_config_values mc2_local_cac_cfg_kv[] = +{ + { 0, 4, 1 }, + { 0xffffffff } +}; + +static const struct kv_lcac_config_values mc3_local_cac_cfg_kv[] = +{ + { 0, 4, 1 }, + { 0xffffffff } +}; + +static const struct kv_lcac_config_values cpl_local_cac_cfg_kv[] = +{ + { 0, 4, 1 }, + { 1, 4, 1 }, + { 2, 5, 1 }, + { 3, 4, 1 }, + { 4, 1, 1 }, + { 5, 5, 1 }, + { 6, 6, 1 }, + { 7, 9, 1 }, + { 8, 4, 1 }, + { 9, 2, 1 }, + { 10, 3, 1 }, + { 11, 6, 1 }, + { 12, 8, 2 }, + { 13, 1, 1 }, + { 14, 2, 1 }, + { 15, 3, 1 }, + { 16, 1, 1 }, + { 17, 4, 1 }, + { 18, 3, 1 }, + { 19, 1, 1 }, + { 20, 8, 1 }, + { 21, 5, 1 }, + { 22, 1, 1 }, + { 23, 1, 1 }, + { 24, 4, 1 }, + { 27, 6, 1 }, + { 28, 1, 1 }, + { 0xffffffff } +}; + +static const struct kv_lcac_config_reg sx0_cac_config_reg[] = +{ + { 0xc0400d00, 0x003e0000, 17, 0x3fc00000, 22, 0x0001fffe, 1, 0x00000001, 0 } +}; + +static const struct kv_lcac_config_reg mc0_cac_config_reg[] = +{ + { 0xc0400d30, 0x003e0000, 17, 0x3fc00000, 22, 0x0001fffe, 1, 0x00000001, 0 } +}; + +static const struct kv_lcac_config_reg mc1_cac_config_reg[] = +{ + { 0xc0400d3c, 0x003e0000, 17, 0x3fc00000, 22, 0x0001fffe, 1, 0x00000001, 0 } +}; + +static const struct kv_lcac_config_reg mc2_cac_config_reg[] = +{ + { 0xc0400d48, 0x003e0000, 17, 0x3fc00000, 22, 0x0001fffe, 1, 0x00000001, 0 } +}; + +static const struct kv_lcac_config_reg mc3_cac_config_reg[] = +{ + { 0xc0400d54, 0x003e0000, 17, 0x3fc00000, 22, 0x0001fffe, 1, 0x00000001, 0 } +}; + +static const struct kv_lcac_config_reg cpl_cac_config_reg[] = +{ + { 0xc0400d80, 0x003e0000, 17, 0x3fc00000, 22, 0x0001fffe, 1, 0x00000001, 0 } +}; + +static const struct kv_pt_config_reg didt_config_kv[] = +{ + { 0x10, 0x000000ff, 0, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x10, 0x0000ff00, 8, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x10, 0x00ff0000, 16, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x10, 0xff000000, 24, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x11, 0x000000ff, 0, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x11, 0x0000ff00, 8, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x11, 0x00ff0000, 16, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x11, 0xff000000, 24, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x12, 0x000000ff, 0, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x12, 0x0000ff00, 8, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x12, 0x00ff0000, 16, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x12, 0xff000000, 24, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x2, 0x00003fff, 0, 0x4, KV_CONFIGREG_DIDT_IND }, + { 0x2, 0x03ff0000, 16, 0x80, KV_CONFIGREG_DIDT_IND }, + { 0x2, 0x78000000, 27, 0x3, KV_CONFIGREG_DIDT_IND }, + { 0x1, 0x0000ffff, 0, 0x3FFF, KV_CONFIGREG_DIDT_IND }, + { 0x1, 0xffff0000, 16, 0x3FFF, KV_CONFIGREG_DIDT_IND }, + { 0x0, 0x00000001, 0, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x30, 0x000000ff, 0, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x30, 0x0000ff00, 8, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x30, 0x00ff0000, 16, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x30, 0xff000000, 24, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x31, 0x000000ff, 0, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x31, 0x0000ff00, 8, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x31, 0x00ff0000, 16, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x31, 0xff000000, 24, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x32, 0x000000ff, 0, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x32, 0x0000ff00, 8, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x32, 0x00ff0000, 16, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x32, 0xff000000, 24, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x22, 0x00003fff, 0, 0x4, KV_CONFIGREG_DIDT_IND }, + { 0x22, 0x03ff0000, 16, 0x80, KV_CONFIGREG_DIDT_IND }, + { 0x22, 0x78000000, 27, 0x3, KV_CONFIGREG_DIDT_IND }, + { 0x21, 0x0000ffff, 0, 0x3FFF, KV_CONFIGREG_DIDT_IND }, + { 0x21, 0xffff0000, 16, 0x3FFF, KV_CONFIGREG_DIDT_IND }, + { 0x20, 0x00000001, 0, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x50, 0x000000ff, 0, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x50, 0x0000ff00, 8, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x50, 0x00ff0000, 16, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x50, 0xff000000, 24, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x51, 0x000000ff, 0, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x51, 0x0000ff00, 8, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x51, 0x00ff0000, 16, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x51, 0xff000000, 24, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x52, 0x000000ff, 0, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x52, 0x0000ff00, 8, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x52, 0x00ff0000, 16, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x52, 0xff000000, 24, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x42, 0x00003fff, 0, 0x4, KV_CONFIGREG_DIDT_IND }, + { 0x42, 0x03ff0000, 16, 0x80, KV_CONFIGREG_DIDT_IND }, + { 0x42, 0x78000000, 27, 0x3, KV_CONFIGREG_DIDT_IND }, + { 0x41, 0x0000ffff, 0, 0x3FFF, KV_CONFIGREG_DIDT_IND }, + { 0x41, 0xffff0000, 16, 0x3FFF, KV_CONFIGREG_DIDT_IND }, + { 0x40, 0x00000001, 0, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x70, 0x000000ff, 0, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x70, 0x0000ff00, 8, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x70, 0x00ff0000, 16, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x70, 0xff000000, 24, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x71, 0x000000ff, 0, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x71, 0x0000ff00, 8, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x71, 0x00ff0000, 16, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x71, 0xff000000, 24, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x72, 0x000000ff, 0, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x72, 0x0000ff00, 8, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x72, 0x00ff0000, 16, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x72, 0xff000000, 24, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x62, 0x00003fff, 0, 0x4, KV_CONFIGREG_DIDT_IND }, + { 0x62, 0x03ff0000, 16, 0x80, KV_CONFIGREG_DIDT_IND }, + { 0x62, 0x78000000, 27, 0x3, KV_CONFIGREG_DIDT_IND }, + { 0x61, 0x0000ffff, 0, 0x3FFF, KV_CONFIGREG_DIDT_IND }, + { 0x61, 0xffff0000, 16, 0x3FFF, KV_CONFIGREG_DIDT_IND }, + { 0x60, 0x00000001, 0, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0xFFFFFFFF } +}; + +static struct kv_ps *kv_get_ps(struct radeon_ps *rps) +{ + struct kv_ps *ps = rps->ps_priv; + + return ps; +} + +static struct kv_power_info *kv_get_pi(struct radeon_device *rdev) +{ + struct kv_power_info *pi = rdev->pm.dpm.priv; + + return pi; +} + +#if 0 +static void kv_program_local_cac_table(struct radeon_device *rdev, + const struct kv_lcac_config_values *local_cac_table, + const struct kv_lcac_config_reg *local_cac_reg) +{ + u32 i, count, data; + const struct kv_lcac_config_values *values = local_cac_table; + + while (values->block_id != 0xffffffff) { + count = values->signal_id; + for (i = 0; i < count; i++) { + data = ((values->block_id << local_cac_reg->block_shift) & + local_cac_reg->block_mask); + data |= ((i << local_cac_reg->signal_shift) & + local_cac_reg->signal_mask); + data |= ((values->t << local_cac_reg->t_shift) & + local_cac_reg->t_mask); + data |= ((1 << local_cac_reg->enable_shift) & + local_cac_reg->enable_mask); + WREG32_SMC(local_cac_reg->cntl, data); + } + values++; + } +} +#endif + +static int kv_program_pt_config_registers(struct radeon_device *rdev, + const struct kv_pt_config_reg *cac_config_regs) +{ + const struct kv_pt_config_reg *config_regs = cac_config_regs; + u32 data; + u32 cache = 0; + + if (config_regs == NULL) + return -EINVAL; + + while (config_regs->offset != 0xFFFFFFFF) { + if (config_regs->type == KV_CONFIGREG_CACHE) { + cache |= ((config_regs->value << config_regs->shift) & config_regs->mask); + } else { + switch (config_regs->type) { + case KV_CONFIGREG_SMC_IND: + data = RREG32_SMC(config_regs->offset); + break; + case KV_CONFIGREG_DIDT_IND: + data = RREG32_DIDT(config_regs->offset); + break; + default: + data = RREG32(config_regs->offset << 2); + break; + } + + data &= ~config_regs->mask; + data |= ((config_regs->value << config_regs->shift) & config_regs->mask); + data |= cache; + cache = 0; + + switch (config_regs->type) { + case KV_CONFIGREG_SMC_IND: + WREG32_SMC(config_regs->offset, data); + break; + case KV_CONFIGREG_DIDT_IND: + WREG32_DIDT(config_regs->offset, data); + break; + default: + WREG32(config_regs->offset << 2, data); + break; + } + } + config_regs++; + } + + return 0; +} + +static void kv_do_enable_didt(struct radeon_device *rdev, bool enable) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + u32 data; + + if (pi->caps_sq_ramping) { + data = RREG32_DIDT(DIDT_SQ_CTRL0); + if (enable) + data |= DIDT_CTRL_EN; + else + data &= ~DIDT_CTRL_EN; + WREG32_DIDT(DIDT_SQ_CTRL0, data); + } + + if (pi->caps_db_ramping) { + data = RREG32_DIDT(DIDT_DB_CTRL0); + if (enable) + data |= DIDT_CTRL_EN; + else + data &= ~DIDT_CTRL_EN; + WREG32_DIDT(DIDT_DB_CTRL0, data); + } + + if (pi->caps_td_ramping) { + data = RREG32_DIDT(DIDT_TD_CTRL0); + if (enable) + data |= DIDT_CTRL_EN; + else + data &= ~DIDT_CTRL_EN; + WREG32_DIDT(DIDT_TD_CTRL0, data); + } + + if (pi->caps_tcp_ramping) { + data = RREG32_DIDT(DIDT_TCP_CTRL0); + if (enable) + data |= DIDT_CTRL_EN; + else + data &= ~DIDT_CTRL_EN; + WREG32_DIDT(DIDT_TCP_CTRL0, data); + } +} + +static int kv_enable_didt(struct radeon_device *rdev, bool enable) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + int ret; + + if (pi->caps_sq_ramping || + pi->caps_db_ramping || + pi->caps_td_ramping || + pi->caps_tcp_ramping) { + cik_enter_rlc_safe_mode(rdev); + + if (enable) { + ret = kv_program_pt_config_registers(rdev, didt_config_kv); + if (ret) { + cik_exit_rlc_safe_mode(rdev); + return ret; + } + } + + kv_do_enable_didt(rdev, enable); + + cik_exit_rlc_safe_mode(rdev); + } + + return 0; +} + +#if 0 +static void kv_initialize_hardware_cac_manager(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + + if (pi->caps_cac) { + WREG32_SMC(LCAC_SX0_OVR_SEL, 0); + WREG32_SMC(LCAC_SX0_OVR_VAL, 0); + kv_program_local_cac_table(rdev, sx_local_cac_cfg_kv, sx0_cac_config_reg); + + WREG32_SMC(LCAC_MC0_OVR_SEL, 0); + WREG32_SMC(LCAC_MC0_OVR_VAL, 0); + kv_program_local_cac_table(rdev, mc0_local_cac_cfg_kv, mc0_cac_config_reg); + + WREG32_SMC(LCAC_MC1_OVR_SEL, 0); + WREG32_SMC(LCAC_MC1_OVR_VAL, 0); + kv_program_local_cac_table(rdev, mc1_local_cac_cfg_kv, mc1_cac_config_reg); + + WREG32_SMC(LCAC_MC2_OVR_SEL, 0); + WREG32_SMC(LCAC_MC2_OVR_VAL, 0); + kv_program_local_cac_table(rdev, mc2_local_cac_cfg_kv, mc2_cac_config_reg); + + WREG32_SMC(LCAC_MC3_OVR_SEL, 0); + WREG32_SMC(LCAC_MC3_OVR_VAL, 0); + kv_program_local_cac_table(rdev, mc3_local_cac_cfg_kv, mc3_cac_config_reg); + + WREG32_SMC(LCAC_CPL_OVR_SEL, 0); + WREG32_SMC(LCAC_CPL_OVR_VAL, 0); + kv_program_local_cac_table(rdev, cpl_local_cac_cfg_kv, cpl_cac_config_reg); + } +} +#endif + +static int kv_enable_smc_cac(struct radeon_device *rdev, bool enable) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + int ret = 0; + + if (pi->caps_cac) { + if (enable) { + ret = kv_notify_message_to_smu(rdev, PPSMC_MSG_EnableCac); + if (ret) + pi->cac_enabled = false; + else + pi->cac_enabled = true; + } else if (pi->cac_enabled) { + kv_notify_message_to_smu(rdev, PPSMC_MSG_DisableCac); + pi->cac_enabled = false; + } + } + + return ret; +} + +static int kv_process_firmware_header(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + u32 tmp; + int ret; + + ret = kv_read_smc_sram_dword(rdev, SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU7_Firmware_Header, DpmTable), + &tmp, pi->sram_end); + + if (ret == 0) + pi->dpm_table_start = tmp; + + ret = kv_read_smc_sram_dword(rdev, SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU7_Firmware_Header, SoftRegisters), + &tmp, pi->sram_end); + + if (ret == 0) + pi->soft_regs_start = tmp; + + return ret; +} + +static int kv_enable_dpm_voltage_scaling(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + int ret; + + pi->graphics_voltage_change_enable = 1; + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, GraphicsVoltageChangeEnable), + &pi->graphics_voltage_change_enable, + sizeof(u8), pi->sram_end); + + return ret; +} + +static int kv_set_dpm_interval(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + int ret; + + pi->graphics_interval = 1; + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, GraphicsInterval), + &pi->graphics_interval, + sizeof(u8), pi->sram_end); + + return ret; +} + +static int kv_set_dpm_boot_state(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + int ret; + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, GraphicsBootLevel), + &pi->graphics_boot_level, + sizeof(u8), pi->sram_end); + + return ret; +} + +static void kv_program_vc(struct radeon_device *rdev) +{ + WREG32_SMC(CG_FTV_0, 0x3FFFC000); +} + +static void kv_clear_vc(struct radeon_device *rdev) +{ + WREG32_SMC(CG_FTV_0, 0); +} + +static int kv_set_divider_value(struct radeon_device *rdev, + u32 index, u32 sclk) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + struct atom_clock_dividers dividers; + int ret; + + ret = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM, + sclk, false, ÷rs); + if (ret) + return ret; + + pi->graphics_level[index].SclkDid = (u8)dividers.post_div; + pi->graphics_level[index].SclkFrequency = cpu_to_be32(sclk); + + return 0; +} + +static u16 kv_convert_8bit_index_to_voltage(struct radeon_device *rdev, + u16 voltage) +{ + return 6200 - (voltage * 25); +} + +static u16 kv_convert_2bit_index_to_voltage(struct radeon_device *rdev, + u32 vid_2bit) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + u32 vid_8bit = sumo_convert_vid2_to_vid7(rdev, + &pi->sys_info.vid_mapping_table, + vid_2bit); + + return kv_convert_8bit_index_to_voltage(rdev, (u16)vid_8bit); +} + + +static int kv_set_vid(struct radeon_device *rdev, u32 index, u32 vid) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + + pi->graphics_level[index].VoltageDownH = (u8)pi->voltage_drop_t; + pi->graphics_level[index].MinVddNb = + cpu_to_be32(kv_convert_2bit_index_to_voltage(rdev, vid)); + + return 0; +} + +static int kv_set_at(struct radeon_device *rdev, u32 index, u32 at) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + + pi->graphics_level[index].AT = cpu_to_be16((u16)at); + + return 0; +} + +static void kv_dpm_power_level_enable(struct radeon_device *rdev, + u32 index, bool enable) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + + pi->graphics_level[index].EnabledForActivity = enable ? 1 : 0; +} + +static void kv_start_dpm(struct radeon_device *rdev) +{ + u32 tmp = RREG32_SMC(GENERAL_PWRMGT); + + tmp |= GLOBAL_PWRMGT_EN; + WREG32_SMC(GENERAL_PWRMGT, tmp); + + kv_smc_dpm_enable(rdev, true); +} + +static void kv_stop_dpm(struct radeon_device *rdev) +{ + kv_smc_dpm_enable(rdev, false); +} + +static void kv_start_am(struct radeon_device *rdev) +{ + u32 sclk_pwrmgt_cntl = RREG32_SMC(SCLK_PWRMGT_CNTL); + + sclk_pwrmgt_cntl &= ~(RESET_SCLK_CNT | RESET_BUSY_CNT); + sclk_pwrmgt_cntl |= DYNAMIC_PM_EN; + + WREG32_SMC(SCLK_PWRMGT_CNTL, sclk_pwrmgt_cntl); +} + +static void kv_reset_am(struct radeon_device *rdev) +{ + u32 sclk_pwrmgt_cntl = RREG32_SMC(SCLK_PWRMGT_CNTL); + + sclk_pwrmgt_cntl |= (RESET_SCLK_CNT | RESET_BUSY_CNT); + + WREG32_SMC(SCLK_PWRMGT_CNTL, sclk_pwrmgt_cntl); +} + +static int kv_freeze_sclk_dpm(struct radeon_device *rdev, bool freeze) +{ + return kv_notify_message_to_smu(rdev, freeze ? + PPSMC_MSG_SCLKDPM_FreezeLevel : PPSMC_MSG_SCLKDPM_UnfreezeLevel); +} + +static int kv_force_lowest_valid(struct radeon_device *rdev) +{ + return kv_force_dpm_lowest(rdev); +} + +static int kv_unforce_levels(struct radeon_device *rdev) +{ + return kv_notify_message_to_smu(rdev, PPSMC_MSG_NoForcedLevel); +} + +static int kv_update_sclk_t(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + u32 low_sclk_interrupt_t = 0; + int ret = 0; + + if (pi->caps_sclk_throttle_low_notification) { + low_sclk_interrupt_t = cpu_to_be32(pi->low_sclk_interrupt_t); + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, LowSclkInterruptT), + (u8 *)&low_sclk_interrupt_t, + sizeof(u32), pi->sram_end); + } + return ret; +} + +static int kv_program_bootup_state(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + u32 i; + struct radeon_clock_voltage_dependency_table *table = + &rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk; + + if (table && table->count) { + for (i = pi->graphics_dpm_level_count - 1; i >= 0; i--) { + if ((table->entries[i].clk == pi->boot_pl.sclk) || + (i == 0)) + break; + } + + pi->graphics_boot_level = (u8)i; + kv_dpm_power_level_enable(rdev, i, true); + } else { + struct sumo_sclk_voltage_mapping_table *table = + &pi->sys_info.sclk_voltage_mapping_table; + + if (table->num_max_dpm_entries == 0) + return -EINVAL; + + for (i = pi->graphics_dpm_level_count - 1; i >= 0; i--) { + if ((table->entries[i].sclk_frequency == pi->boot_pl.sclk) || + (i == 0)) + break; + } + + pi->graphics_boot_level = (u8)i; + kv_dpm_power_level_enable(rdev, i, true); + } + return 0; +} + +static int kv_enable_auto_thermal_throttling(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + int ret; + + pi->graphics_therm_throttle_enable = 1; + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, GraphicsThermThrottleEnable), + &pi->graphics_therm_throttle_enable, + sizeof(u8), pi->sram_end); + + return ret; +} + +static int kv_upload_dpm_settings(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + int ret; + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, GraphicsLevel), + (u8 *)&pi->graphics_level, + sizeof(SMU7_Fusion_GraphicsLevel) * SMU7_MAX_LEVELS_GRAPHICS, + pi->sram_end); + + if (ret) + return ret; + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, GraphicsDpmLevelCount), + &pi->graphics_dpm_level_count, + sizeof(u8), pi->sram_end); + + return ret; +} + +static u32 kv_get_clock_difference(u32 a, u32 b) +{ + return (a >= b) ? a - b : b - a; +} + +static u32 kv_get_clk_bypass(struct radeon_device *rdev, u32 clk) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + u32 value; + + if (pi->caps_enable_dfs_bypass) { + if (kv_get_clock_difference(clk, 40000) < 200) + value = 3; + else if (kv_get_clock_difference(clk, 30000) < 200) + value = 2; + else if (kv_get_clock_difference(clk, 20000) < 200) + value = 7; + else if (kv_get_clock_difference(clk, 15000) < 200) + value = 6; + else if (kv_get_clock_difference(clk, 10000) < 200) + value = 8; + else + value = 0; + } else { + value = 0; + } + + return value; +} + +static int kv_populate_uvd_table(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + struct radeon_uvd_clock_voltage_dependency_table *table = + &rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table; + struct atom_clock_dividers dividers; + int ret; + u32 i; + + if (table == NULL || table->count == 0) + return 0; + + pi->uvd_level_count = 0; + for (i = 0; i < table->count; i++) { + if (pi->high_voltage_t && + (pi->high_voltage_t < table->entries[i].v)) + break; + + pi->uvd_level[i].VclkFrequency = cpu_to_be32(table->entries[i].vclk); + pi->uvd_level[i].DclkFrequency = cpu_to_be32(table->entries[i].dclk); + pi->uvd_level[i].MinVddNb = cpu_to_be16(table->entries[i].v); + + pi->uvd_level[i].VClkBypassCntl = + (u8)kv_get_clk_bypass(rdev, table->entries[i].vclk); + pi->uvd_level[i].DClkBypassCntl = + (u8)kv_get_clk_bypass(rdev, table->entries[i].dclk); + + ret = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM, + table->entries[i].vclk, false, ÷rs); + if (ret) + return ret; + pi->uvd_level[i].VclkDivider = (u8)dividers.post_div; + + ret = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM, + table->entries[i].dclk, false, ÷rs); + if (ret) + return ret; + pi->uvd_level[i].DclkDivider = (u8)dividers.post_div; + + pi->uvd_level_count++; + } + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, UvdLevelCount), + (u8 *)&pi->uvd_level_count, + sizeof(u8), pi->sram_end); + if (ret) + return ret; + + pi->uvd_interval = 1; + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, UVDInterval), + &pi->uvd_interval, + sizeof(u8), pi->sram_end); + if (ret) + return ret; + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, UvdLevel), + (u8 *)&pi->uvd_level, + sizeof(SMU7_Fusion_UvdLevel) * SMU7_MAX_LEVELS_UVD, + pi->sram_end); + + return ret; + +} + +static int kv_populate_vce_table(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + int ret; + u32 i; + struct radeon_vce_clock_voltage_dependency_table *table = + &rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table; + struct atom_clock_dividers dividers; + + if (table == NULL || table->count == 0) + return 0; + + pi->vce_level_count = 0; + for (i = 0; i < table->count; i++) { + if (pi->high_voltage_t && + pi->high_voltage_t < table->entries[i].v) + break; + + pi->vce_level[i].Frequency = cpu_to_be32(table->entries[i].evclk); + pi->vce_level[i].MinVoltage = cpu_to_be16(table->entries[i].v); + + pi->vce_level[i].ClkBypassCntl = + (u8)kv_get_clk_bypass(rdev, table->entries[i].evclk); + + ret = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM, + table->entries[i].evclk, false, ÷rs); + if (ret) + return ret; + pi->vce_level[i].Divider = (u8)dividers.post_div; + + pi->vce_level_count++; + } + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, VceLevelCount), + (u8 *)&pi->vce_level_count, + sizeof(u8), + pi->sram_end); + if (ret) + return ret; + + pi->vce_interval = 1; + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, VCEInterval), + (u8 *)&pi->vce_interval, + sizeof(u8), + pi->sram_end); + if (ret) + return ret; + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, VceLevel), + (u8 *)&pi->vce_level, + sizeof(SMU7_Fusion_ExtClkLevel) * SMU7_MAX_LEVELS_VCE, + pi->sram_end); + + return ret; +} + +static int kv_populate_samu_table(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + struct radeon_clock_voltage_dependency_table *table = + &rdev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table; + struct atom_clock_dividers dividers; + int ret; + u32 i; + + if (table == NULL || table->count == 0) + return 0; + + pi->samu_level_count = 0; + for (i = 0; i < table->count; i++) { + if (pi->high_voltage_t && + pi->high_voltage_t < table->entries[i].v) + break; + + pi->samu_level[i].Frequency = cpu_to_be32(table->entries[i].clk); + pi->samu_level[i].MinVoltage = cpu_to_be16(table->entries[i].v); + + pi->samu_level[i].ClkBypassCntl = + (u8)kv_get_clk_bypass(rdev, table->entries[i].clk); + + ret = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM, + table->entries[i].clk, false, ÷rs); + if (ret) + return ret; + pi->samu_level[i].Divider = (u8)dividers.post_div; + + pi->samu_level_count++; + } + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, SamuLevelCount), + (u8 *)&pi->samu_level_count, + sizeof(u8), + pi->sram_end); + if (ret) + return ret; + + pi->samu_interval = 1; + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, SAMUInterval), + (u8 *)&pi->samu_interval, + sizeof(u8), + pi->sram_end); + if (ret) + return ret; + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, SamuLevel), + (u8 *)&pi->samu_level, + sizeof(SMU7_Fusion_ExtClkLevel) * SMU7_MAX_LEVELS_SAMU, + pi->sram_end); + if (ret) + return ret; + + return ret; +} + + +static int kv_populate_acp_table(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + struct radeon_clock_voltage_dependency_table *table = + &rdev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table; + struct atom_clock_dividers dividers; + int ret; + u32 i; + + if (table == NULL || table->count == 0) + return 0; + + pi->acp_level_count = 0; + for (i = 0; i < table->count; i++) { + pi->acp_level[i].Frequency = cpu_to_be32(table->entries[i].clk); + pi->acp_level[i].MinVoltage = cpu_to_be16(table->entries[i].v); + + ret = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM, + table->entries[i].clk, false, ÷rs); + if (ret) + return ret; + pi->acp_level[i].Divider = (u8)dividers.post_div; + + pi->acp_level_count++; + } + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, AcpLevelCount), + (u8 *)&pi->acp_level_count, + sizeof(u8), + pi->sram_end); + if (ret) + return ret; + + pi->acp_interval = 1; + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, ACPInterval), + (u8 *)&pi->acp_interval, + sizeof(u8), + pi->sram_end); + if (ret) + return ret; + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, AcpLevel), + (u8 *)&pi->acp_level, + sizeof(SMU7_Fusion_ExtClkLevel) * SMU7_MAX_LEVELS_ACP, + pi->sram_end); + if (ret) + return ret; + + return ret; +} + +static void kv_calculate_dfs_bypass_settings(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + u32 i; + struct radeon_clock_voltage_dependency_table *table = + &rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk; + + if (table && table->count) { + for (i = 0; i < pi->graphics_dpm_level_count; i++) { + if (pi->caps_enable_dfs_bypass) { + if (kv_get_clock_difference(table->entries[i].clk, 40000) < 200) + pi->graphics_level[i].ClkBypassCntl = 3; + else if (kv_get_clock_difference(table->entries[i].clk, 30000) < 200) + pi->graphics_level[i].ClkBypassCntl = 2; + else if (kv_get_clock_difference(table->entries[i].clk, 26600) < 200) + pi->graphics_level[i].ClkBypassCntl = 7; + else if (kv_get_clock_difference(table->entries[i].clk , 20000) < 200) + pi->graphics_level[i].ClkBypassCntl = 6; + else if (kv_get_clock_difference(table->entries[i].clk , 10000) < 200) + pi->graphics_level[i].ClkBypassCntl = 8; + else + pi->graphics_level[i].ClkBypassCntl = 0; + } else { + pi->graphics_level[i].ClkBypassCntl = 0; + } + } + } else { + struct sumo_sclk_voltage_mapping_table *table = + &pi->sys_info.sclk_voltage_mapping_table; + for (i = 0; i < pi->graphics_dpm_level_count; i++) { + if (pi->caps_enable_dfs_bypass) { + if (kv_get_clock_difference(table->entries[i].sclk_frequency, 40000) < 200) + pi->graphics_level[i].ClkBypassCntl = 3; + else if (kv_get_clock_difference(table->entries[i].sclk_frequency, 30000) < 200) + pi->graphics_level[i].ClkBypassCntl = 2; + else if (kv_get_clock_difference(table->entries[i].sclk_frequency, 26600) < 200) + pi->graphics_level[i].ClkBypassCntl = 7; + else if (kv_get_clock_difference(table->entries[i].sclk_frequency, 20000) < 200) + pi->graphics_level[i].ClkBypassCntl = 6; + else if (kv_get_clock_difference(table->entries[i].sclk_frequency, 10000) < 200) + pi->graphics_level[i].ClkBypassCntl = 8; + else + pi->graphics_level[i].ClkBypassCntl = 0; + } else { + pi->graphics_level[i].ClkBypassCntl = 0; + } + } + } +} + +static int kv_enable_ulv(struct radeon_device *rdev, bool enable) +{ + return kv_notify_message_to_smu(rdev, enable ? + PPSMC_MSG_EnableULV : PPSMC_MSG_DisableULV); +} + +static void kv_update_current_ps(struct radeon_device *rdev, + struct radeon_ps *rps) +{ + struct kv_ps *new_ps = kv_get_ps(rps); + struct kv_power_info *pi = kv_get_pi(rdev); + + pi->current_rps = *rps; + pi->current_ps = *new_ps; + pi->current_rps.ps_priv = &pi->current_ps; +} + +static void kv_update_requested_ps(struct radeon_device *rdev, + struct radeon_ps *rps) +{ + struct kv_ps *new_ps = kv_get_ps(rps); + struct kv_power_info *pi = kv_get_pi(rdev); + + pi->requested_rps = *rps; + pi->requested_ps = *new_ps; + pi->requested_rps.ps_priv = &pi->requested_ps; +} + +int kv_dpm_enable(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + int ret; + + ret = kv_process_firmware_header(rdev); + if (ret) { + DRM_ERROR("kv_process_firmware_header failed\n"); + return ret; + } + kv_init_fps_limits(rdev); + kv_init_graphics_levels(rdev); + ret = kv_program_bootup_state(rdev); + if (ret) { + DRM_ERROR("kv_program_bootup_state failed\n"); + return ret; + } + kv_calculate_dfs_bypass_settings(rdev); + ret = kv_upload_dpm_settings(rdev); + if (ret) { + DRM_ERROR("kv_upload_dpm_settings failed\n"); + return ret; + } + ret = kv_populate_uvd_table(rdev); + if (ret) { + DRM_ERROR("kv_populate_uvd_table failed\n"); + return ret; + } + ret = kv_populate_vce_table(rdev); + if (ret) { + DRM_ERROR("kv_populate_vce_table failed\n"); + return ret; + } + ret = kv_populate_samu_table(rdev); + if (ret) { + DRM_ERROR("kv_populate_samu_table failed\n"); + return ret; + } + ret = kv_populate_acp_table(rdev); + if (ret) { + DRM_ERROR("kv_populate_acp_table failed\n"); + return ret; + } + kv_program_vc(rdev); +#if 0 + kv_initialize_hardware_cac_manager(rdev); +#endif + kv_start_am(rdev); + if (pi->enable_auto_thermal_throttling) { + ret = kv_enable_auto_thermal_throttling(rdev); + if (ret) { + DRM_ERROR("kv_enable_auto_thermal_throttling failed\n"); + return ret; + } + } + ret = kv_enable_dpm_voltage_scaling(rdev); + if (ret) { + DRM_ERROR("kv_enable_dpm_voltage_scaling failed\n"); + return ret; + } + ret = kv_set_dpm_interval(rdev); + if (ret) { + DRM_ERROR("kv_set_dpm_interval failed\n"); + return ret; + } + ret = kv_set_dpm_boot_state(rdev); + if (ret) { + DRM_ERROR("kv_set_dpm_boot_state failed\n"); + return ret; + } + ret = kv_enable_ulv(rdev, true); + if (ret) { + DRM_ERROR("kv_enable_ulv failed\n"); + return ret; + } + kv_start_dpm(rdev); + ret = kv_enable_didt(rdev, true); + if (ret) { + DRM_ERROR("kv_enable_didt failed\n"); + return ret; + } + ret = kv_enable_smc_cac(rdev, true); + if (ret) { + DRM_ERROR("kv_enable_smc_cac failed\n"); + return ret; + } + + if (rdev->irq.installed && + r600_is_internal_thermal_sensor(rdev->pm.int_thermal_type)) { + ret = kv_set_thermal_temperature_range(rdev, R600_TEMP_RANGE_MIN, R600_TEMP_RANGE_MAX); + if (ret) { + DRM_ERROR("kv_set_thermal_temperature_range failed\n"); + return ret; + } + rdev->irq.dpm_thermal = true; + radeon_irq_set(rdev); + } + + /* powerdown unused blocks for now */ + kv_dpm_powergate_acp(rdev, true); + kv_dpm_powergate_samu(rdev, true); + kv_dpm_powergate_vce(rdev, true); + + kv_update_current_ps(rdev, rdev->pm.dpm.boot_ps); + + return ret; +} + +void kv_dpm_disable(struct radeon_device *rdev) +{ + kv_enable_smc_cac(rdev, false); + kv_enable_didt(rdev, false); + kv_clear_vc(rdev); + kv_stop_dpm(rdev); + kv_enable_ulv(rdev, false); + kv_reset_am(rdev); + + kv_update_current_ps(rdev, rdev->pm.dpm.boot_ps); +} + +#if 0 +static int kv_write_smc_soft_register(struct radeon_device *rdev, + u16 reg_offset, u32 value) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + + return kv_copy_bytes_to_smc(rdev, pi->soft_regs_start + reg_offset, + (u8 *)&value, sizeof(u16), pi->sram_end); +} + +static int kv_read_smc_soft_register(struct radeon_device *rdev, + u16 reg_offset, u32 *value) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + + return kv_read_smc_sram_dword(rdev, pi->soft_regs_start + reg_offset, + value, pi->sram_end); +} +#endif + +static void kv_init_sclk_t(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + + pi->low_sclk_interrupt_t = 0; +} + +static int kv_init_fps_limits(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + int ret = 0; + + if (pi->caps_fps) { + u16 tmp; + + tmp = 45; + pi->fps_high_t = cpu_to_be16(tmp); + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, FpsHighT), + (u8 *)&pi->fps_high_t, + sizeof(u16), pi->sram_end); + + tmp = 30; + pi->fps_low_t = cpu_to_be16(tmp); + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, FpsLowT), + (u8 *)&pi->fps_low_t, + sizeof(u16), pi->sram_end); + + } + return ret; +} + +static void kv_init_powergate_state(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + + pi->uvd_power_gated = false; + pi->vce_power_gated = false; + pi->samu_power_gated = false; + pi->acp_power_gated = false; + +} + +static int kv_enable_uvd_dpm(struct radeon_device *rdev, bool enable) +{ + return kv_notify_message_to_smu(rdev, enable ? + PPSMC_MSG_UVDDPM_Enable : PPSMC_MSG_UVDDPM_Disable); +} + +#if 0 +static int kv_enable_vce_dpm(struct radeon_device *rdev, bool enable) +{ + return kv_notify_message_to_smu(rdev, enable ? + PPSMC_MSG_VCEDPM_Enable : PPSMC_MSG_VCEDPM_Disable); +} +#endif + +static int kv_enable_samu_dpm(struct radeon_device *rdev, bool enable) +{ + return kv_notify_message_to_smu(rdev, enable ? + PPSMC_MSG_SAMUDPM_Enable : PPSMC_MSG_SAMUDPM_Disable); +} + +static int kv_enable_acp_dpm(struct radeon_device *rdev, bool enable) +{ + return kv_notify_message_to_smu(rdev, enable ? + PPSMC_MSG_ACPDPM_Enable : PPSMC_MSG_ACPDPM_Disable); +} + +static int kv_update_uvd_dpm(struct radeon_device *rdev, bool gate) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + struct radeon_uvd_clock_voltage_dependency_table *table = + &rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table; + int ret; + + if (!gate) { + if (!pi->caps_uvd_dpm || table->count || pi->caps_stable_p_state) + pi->uvd_boot_level = table->count - 1; + else + pi->uvd_boot_level = 0; + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, UvdBootLevel), + (uint8_t *)&pi->uvd_boot_level, + sizeof(u8), pi->sram_end); + if (ret) + return ret; + + if (!pi->caps_uvd_dpm || + pi->caps_stable_p_state) + kv_send_msg_to_smc_with_parameter(rdev, + PPSMC_MSG_UVDDPM_SetEnabledMask, + (1 << pi->uvd_boot_level)); + } + + return kv_enable_uvd_dpm(rdev, !gate); +} + +#if 0 +static u8 kv_get_vce_boot_level(struct radeon_device *rdev) +{ + u8 i; + struct radeon_vce_clock_voltage_dependency_table *table = + &rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table; + + for (i = 0; i < table->count; i++) { + if (table->entries[i].evclk >= 0) /* XXX */ + break; + } + + return i; +} + +static int kv_update_vce_dpm(struct radeon_device *rdev, + struct radeon_ps *radeon_new_state, + struct radeon_ps *radeon_current_state) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + struct radeon_vce_clock_voltage_dependency_table *table = + &rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table; + int ret; + + if (radeon_new_state->evclk > 0 && radeon_current_state->evclk == 0) { + if (pi->caps_stable_p_state) + pi->vce_boot_level = table->count - 1; + else + pi->vce_boot_level = kv_get_vce_boot_level(rdev); + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, VceBootLevel), + (u8 *)&pi->vce_boot_level, + sizeof(u8), + pi->sram_end); + if (ret) + return ret; + + if (pi->caps_stable_p_state) + kv_send_msg_to_smc_with_parameter(rdev, + PPSMC_MSG_VCEDPM_SetEnabledMask, + (1 << pi->vce_boot_level)); + + kv_enable_vce_dpm(rdev, true); + } else if (radeon_new_state->evclk == 0 && radeon_current_state->evclk > 0) { + kv_enable_vce_dpm(rdev, false); + } + + return 0; +} +#endif + +static int kv_update_samu_dpm(struct radeon_device *rdev, bool gate) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + struct radeon_clock_voltage_dependency_table *table = + &rdev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table; + int ret; + + if (!gate) { + if (pi->caps_stable_p_state) + pi->samu_boot_level = table->count - 1; + else + pi->samu_boot_level = 0; + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, SamuBootLevel), + (u8 *)&pi->samu_boot_level, + sizeof(u8), + pi->sram_end); + if (ret) + return ret; + + if (pi->caps_stable_p_state) + kv_send_msg_to_smc_with_parameter(rdev, + PPSMC_MSG_SAMUDPM_SetEnabledMask, + (1 << pi->samu_boot_level)); + } + + return kv_enable_samu_dpm(rdev, !gate); +} + +static int kv_update_acp_dpm(struct radeon_device *rdev, bool gate) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + struct radeon_clock_voltage_dependency_table *table = + &rdev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table; + int ret; + + if (!gate) { + if (pi->caps_stable_p_state) + pi->acp_boot_level = table->count - 1; + else + pi->acp_boot_level = 0; + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, AcpBootLevel), + (u8 *)&pi->acp_boot_level, + sizeof(u8), + pi->sram_end); + if (ret) + return ret; + + if (pi->caps_stable_p_state) + kv_send_msg_to_smc_with_parameter(rdev, + PPSMC_MSG_ACPDPM_SetEnabledMask, + (1 << pi->acp_boot_level)); + } + + return kv_enable_acp_dpm(rdev, !gate); +} + +static void kv_dpm_powergate_uvd(struct radeon_device *rdev, bool gate) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + + if (pi->uvd_power_gated == gate) + return; + + pi->uvd_power_gated = gate; + + if (gate) { + kv_update_uvd_dpm(rdev, true); + if (pi->caps_uvd_pg) + kv_notify_message_to_smu(rdev, PPSMC_MSG_UVDPowerOFF); + } else { + if (pi->caps_uvd_pg) + kv_notify_message_to_smu(rdev, PPSMC_MSG_UVDPowerON); + kv_update_uvd_dpm(rdev, false); + } +} + +static void kv_dpm_powergate_vce(struct radeon_device *rdev, bool gate) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + + if (pi->vce_power_gated == gate) + return; + + pi->vce_power_gated = gate; + + if (gate) { + if (pi->caps_vce_pg) + kv_notify_message_to_smu(rdev, PPSMC_MSG_VCEPowerOFF); + } else { + if (pi->caps_vce_pg) + kv_notify_message_to_smu(rdev, PPSMC_MSG_VCEPowerON); + } +} + +static void kv_dpm_powergate_samu(struct radeon_device *rdev, bool gate) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + + if (pi->samu_power_gated == gate) + return; + + pi->samu_power_gated = gate; + + if (gate) { + kv_update_samu_dpm(rdev, true); + if (pi->caps_samu_pg) + kv_notify_message_to_smu(rdev, PPSMC_MSG_SAMPowerOFF); + } else { + if (pi->caps_samu_pg) + kv_notify_message_to_smu(rdev, PPSMC_MSG_SAMPowerON); + kv_update_samu_dpm(rdev, false); + } +} + +static void kv_dpm_powergate_acp(struct radeon_device *rdev, bool gate) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + + if (pi->acp_power_gated == gate) + return; + + if (rdev->family == CHIP_KABINI) + return; + + pi->acp_power_gated = gate; + + if (gate) { + kv_update_acp_dpm(rdev, true); + if (pi->caps_acp_pg) + kv_notify_message_to_smu(rdev, PPSMC_MSG_ACPPowerOFF); + } else { + if (pi->caps_acp_pg) + kv_notify_message_to_smu(rdev, PPSMC_MSG_ACPPowerON); + kv_update_acp_dpm(rdev, false); + } +} + +static void kv_set_valid_clock_range(struct radeon_device *rdev, + struct radeon_ps *new_rps) +{ + struct kv_ps *new_ps = kv_get_ps(new_rps); + struct kv_power_info *pi = kv_get_pi(rdev); + u32 i; + struct radeon_clock_voltage_dependency_table *table = + &rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk; + + if (table && table->count) { + for (i = 0; i < pi->graphics_dpm_level_count; i++) { + if ((table->entries[i].clk >= new_ps->levels[0].sclk) || + (i == (pi->graphics_dpm_level_count - 1))) { + pi->lowest_valid = i; + break; + } + } + + for (i = pi->graphics_dpm_level_count - 1; i >= 0; i--) { + if ((table->entries[i].clk <= new_ps->levels[new_ps->num_levels -1].sclk) || + (i == 0)) { + pi->highest_valid = i; + break; + } + } + + if (pi->lowest_valid > pi->highest_valid) { + if ((new_ps->levels[0].sclk - table->entries[pi->highest_valid].clk) > + (table->entries[pi->lowest_valid].clk - new_ps->levels[new_ps->num_levels - 1].sclk)) + pi->highest_valid = pi->lowest_valid; + else + pi->lowest_valid = pi->highest_valid; + } + } else { + struct sumo_sclk_voltage_mapping_table *table = + &pi->sys_info.sclk_voltage_mapping_table; + + for (i = 0; i < (int)pi->graphics_dpm_level_count; i++) { + if (table->entries[i].sclk_frequency >= new_ps->levels[0].sclk || + i == (int)(pi->graphics_dpm_level_count - 1)) { + pi->lowest_valid = i; + break; + } + } + + for (i = pi->graphics_dpm_level_count - 1; i >= 0; i--) { + if (table->entries[i].sclk_frequency <= + new_ps->levels[new_ps->num_levels - 1].sclk || + i == 0) { + pi->highest_valid = i; + break; + } + } + + if (pi->lowest_valid > pi->highest_valid) { + if ((new_ps->levels[0].sclk - + table->entries[pi->highest_valid].sclk_frequency) > + (table->entries[pi->lowest_valid].sclk_frequency - + new_ps->levels[new_ps->num_levels -1].sclk)) + pi->highest_valid = pi->lowest_valid; + else + pi->lowest_valid = pi->highest_valid; + } + } +} + +static int kv_update_dfs_bypass_settings(struct radeon_device *rdev, + struct radeon_ps *new_rps) +{ + struct kv_ps *new_ps = kv_get_ps(new_rps); + struct kv_power_info *pi = kv_get_pi(rdev); + int ret = 0; + u8 clk_bypass_cntl; + + if (pi->caps_enable_dfs_bypass) { + clk_bypass_cntl = new_ps->need_dfs_bypass ? + pi->graphics_level[pi->graphics_boot_level].ClkBypassCntl : 0; + ret = kv_copy_bytes_to_smc(rdev, + (pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, GraphicsLevel) + + (pi->graphics_boot_level * sizeof(SMU7_Fusion_GraphicsLevel)) + + offsetof(SMU7_Fusion_GraphicsLevel, ClkBypassCntl)), + &clk_bypass_cntl, + sizeof(u8), pi->sram_end); + } + + return ret; +} + +static int kv_enable_nb_dpm(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + int ret = 0; + + if (pi->enable_nb_dpm && !pi->nb_dpm_enabled) { + ret = kv_notify_message_to_smu(rdev, PPSMC_MSG_NBDPM_Enable); + if (ret == 0) + pi->nb_dpm_enabled = true; + } + + return ret; +} + +int kv_dpm_pre_set_power_state(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + struct radeon_ps requested_ps = *rdev->pm.dpm.requested_ps; + struct radeon_ps *new_ps = &requested_ps; + + kv_update_requested_ps(rdev, new_ps); + + kv_apply_state_adjust_rules(rdev, + &pi->requested_rps, + &pi->current_rps); + + return 0; +} + +int kv_dpm_set_power_state(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + struct radeon_ps *new_ps = &pi->requested_rps; + /*struct radeon_ps *old_ps = &pi->current_rps;*/ + int ret; + + if (rdev->family == CHIP_KABINI) { + if (pi->enable_dpm) { + kv_set_valid_clock_range(rdev, new_ps); + kv_update_dfs_bypass_settings(rdev, new_ps); + ret = kv_calculate_ds_divider(rdev); + if (ret) { + DRM_ERROR("kv_calculate_ds_divider failed\n"); + return ret; + } + kv_calculate_nbps_level_settings(rdev); + kv_calculate_dpm_settings(rdev); + kv_force_lowest_valid(rdev); + kv_enable_new_levels(rdev); + kv_upload_dpm_settings(rdev); + kv_program_nbps_index_settings(rdev, new_ps); + kv_unforce_levels(rdev); + kv_set_enabled_levels(rdev); + kv_force_lowest_valid(rdev); + kv_unforce_levels(rdev); +#if 0 + ret = kv_update_vce_dpm(rdev, new_ps, old_ps); + if (ret) { + DRM_ERROR("kv_update_vce_dpm failed\n"); + return ret; + } +#endif + kv_update_uvd_dpm(rdev, false); + kv_update_sclk_t(rdev); + } + } else { + if (pi->enable_dpm) { + kv_set_valid_clock_range(rdev, new_ps); + kv_update_dfs_bypass_settings(rdev, new_ps); + ret = kv_calculate_ds_divider(rdev); + if (ret) { + DRM_ERROR("kv_calculate_ds_divider failed\n"); + return ret; + } + kv_calculate_nbps_level_settings(rdev); + kv_calculate_dpm_settings(rdev); + kv_freeze_sclk_dpm(rdev, true); + kv_upload_dpm_settings(rdev); + kv_program_nbps_index_settings(rdev, new_ps); + kv_freeze_sclk_dpm(rdev, false); + kv_set_enabled_levels(rdev); +#if 0 + ret = kv_update_vce_dpm(rdev, new_ps, old_ps); + if (ret) { + DRM_ERROR("kv_update_vce_dpm failed\n"); + return ret; + } +#endif + kv_update_uvd_dpm(rdev, false); + kv_update_sclk_t(rdev); + kv_enable_nb_dpm(rdev); + } + } + return 0; +} + +void kv_dpm_post_set_power_state(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + struct radeon_ps *new_ps = &pi->requested_rps; + + kv_update_current_ps(rdev, new_ps); +} + +void kv_dpm_setup_asic(struct radeon_device *rdev) +{ + sumo_take_smu_control(rdev, true); + kv_init_powergate_state(rdev); + kv_init_sclk_t(rdev); +} + +void kv_dpm_reset_asic(struct radeon_device *rdev) +{ + kv_force_lowest_valid(rdev); + kv_init_graphics_levels(rdev); + kv_program_bootup_state(rdev); + kv_upload_dpm_settings(rdev); + kv_force_lowest_valid(rdev); + kv_unforce_levels(rdev); +} + +//XXX use sumo_dpm_display_configuration_changed + +static void kv_construct_max_power_limits_table(struct radeon_device *rdev, + struct radeon_clock_and_voltage_limits *table) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + + if (pi->sys_info.sclk_voltage_mapping_table.num_max_dpm_entries > 0) { + int idx = pi->sys_info.sclk_voltage_mapping_table.num_max_dpm_entries - 1; + table->sclk = + pi->sys_info.sclk_voltage_mapping_table.entries[idx].sclk_frequency; + table->vddc = + kv_convert_2bit_index_to_voltage(rdev, + pi->sys_info.sclk_voltage_mapping_table.entries[idx].vid_2bit); + } + + table->mclk = pi->sys_info.nbp_memory_clock[0]; +} + +static void kv_patch_voltage_values(struct radeon_device *rdev) +{ + int i; + struct radeon_uvd_clock_voltage_dependency_table *table = + &rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table; + + if (table->count) { + for (i = 0; i < table->count; i++) + table->entries[i].v = + kv_convert_8bit_index_to_voltage(rdev, + table->entries[i].v); + } + +} + +static void kv_construct_boot_state(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + + pi->boot_pl.sclk = pi->sys_info.bootup_sclk; + pi->boot_pl.vddc_index = pi->sys_info.bootup_nb_voltage_index; + pi->boot_pl.ds_divider_index = 0; + pi->boot_pl.ss_divider_index = 0; + pi->boot_pl.allow_gnb_slow = 1; + pi->boot_pl.force_nbp_state = 0; + pi->boot_pl.display_wm = 0; + pi->boot_pl.vce_wm = 0; +} + +static int kv_force_dpm_lowest(struct radeon_device *rdev) +{ + int ret; + u32 enable_mask, i; + + ret = kv_dpm_get_enable_mask(rdev, &enable_mask); + if (ret) + return ret; + + for (i = 0; i < SMU7_MAX_LEVELS_GRAPHICS; i++) { + if (enable_mask & (1 << i)) + break; + } + + return kv_send_msg_to_smc_with_parameter(rdev, PPSMC_MSG_DPM_ForceState, i); +} + +static u8 kv_get_sleep_divider_id_from_clock(struct radeon_device *rdev, + u32 sclk, u32 min_sclk_in_sr) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + u32 i; + u32 temp; + u32 min = (min_sclk_in_sr > KV_MINIMUM_ENGINE_CLOCK) ? + min_sclk_in_sr : KV_MINIMUM_ENGINE_CLOCK; + + if (sclk < min) + return 0; + + if (!pi->caps_sclk_ds) + return 0; + + for (i = KV_MAX_DEEPSLEEP_DIVIDER_ID; i <= 0; i--) { + temp = sclk / sumo_get_sleep_divider_from_id(i); + if ((temp >= min) || (i == 0)) + break; + } + + return (u8)i; +} + +static int kv_get_high_voltage_limit(struct radeon_device *rdev, int *limit) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + struct radeon_clock_voltage_dependency_table *table = + &rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk; + int i; + + if (table && table->count) { + for (i = table->count - 1; i >= 0; i--) { + if (pi->high_voltage_t && + (kv_convert_8bit_index_to_voltage(rdev, table->entries[i].v) <= + pi->high_voltage_t)) { + *limit = i; + return 0; + } + } + } else { + struct sumo_sclk_voltage_mapping_table *table = + &pi->sys_info.sclk_voltage_mapping_table; + + for (i = table->num_max_dpm_entries - 1; i >= 0; i--) { + if (pi->high_voltage_t && + (kv_convert_2bit_index_to_voltage(rdev, table->entries[i].vid_2bit) <= + pi->high_voltage_t)) { + *limit = i; + return 0; + } + } + } + + *limit = 0; + return 0; +} + +static void kv_apply_state_adjust_rules(struct radeon_device *rdev, + struct radeon_ps *new_rps, + struct radeon_ps *old_rps) +{ + struct kv_ps *ps = kv_get_ps(new_rps); + struct kv_power_info *pi = kv_get_pi(rdev); + u32 min_sclk = 10000; /* ??? */ + u32 sclk, mclk = 0; + int i, limit; + bool force_high; + struct radeon_clock_voltage_dependency_table *table = + &rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk; + u32 stable_p_state_sclk = 0; + struct radeon_clock_and_voltage_limits *max_limits = + &rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac; + + mclk = max_limits->mclk; + sclk = min_sclk; + + if (pi->caps_stable_p_state) { + stable_p_state_sclk = (max_limits->sclk * 75) / 100; + + for (i = table->count - 1; i >= 0; i++) { + if (stable_p_state_sclk >= table->entries[i].clk) { + stable_p_state_sclk = table->entries[i].clk; + break; + } + } + + if (i > 0) + stable_p_state_sclk = table->entries[0].clk; + + sclk = stable_p_state_sclk; + } + + ps->need_dfs_bypass = true; + + for (i = 0; i < ps->num_levels; i++) { + if (ps->levels[i].sclk < sclk) + ps->levels[i].sclk = sclk; + } + + if (table && table->count) { + for (i = 0; i < ps->num_levels; i++) { + if (pi->high_voltage_t && + (pi->high_voltage_t < + kv_convert_8bit_index_to_voltage(rdev, ps->levels[i].vddc_index))) { + kv_get_high_voltage_limit(rdev, &limit); + ps->levels[i].sclk = table->entries[limit].clk; + } + } + } else { + struct sumo_sclk_voltage_mapping_table *table = + &pi->sys_info.sclk_voltage_mapping_table; + + for (i = 0; i < ps->num_levels; i++) { + if (pi->high_voltage_t && + (pi->high_voltage_t < + kv_convert_8bit_index_to_voltage(rdev, ps->levels[i].vddc_index))) { + kv_get_high_voltage_limit(rdev, &limit); + ps->levels[i].sclk = table->entries[limit].sclk_frequency; + } + } + } + + if (pi->caps_stable_p_state) { + for (i = 0; i < ps->num_levels; i++) { + ps->levels[i].sclk = stable_p_state_sclk; + } + } + + pi->video_start = new_rps->dclk || new_rps->vclk; + + if ((new_rps->class & ATOM_PPLIB_CLASSIFICATION_UI_MASK) == + ATOM_PPLIB_CLASSIFICATION_UI_BATTERY) + pi->battery_state = true; + else + pi->battery_state = false; + + if (rdev->family == CHIP_KABINI) { + ps->dpm0_pg_nb_ps_lo = 0x1; + ps->dpm0_pg_nb_ps_hi = 0x0; + ps->dpmx_nb_ps_lo = 0x1; + ps->dpmx_nb_ps_hi = 0x0; + } else { + ps->dpm0_pg_nb_ps_lo = 0x1; + ps->dpm0_pg_nb_ps_hi = 0x0; + ps->dpmx_nb_ps_lo = 0x2; + ps->dpmx_nb_ps_hi = 0x1; + + if (pi->sys_info.nb_dpm_enable && pi->battery_state) { + force_high = (mclk >= pi->sys_info.nbp_memory_clock[3]) || + pi->video_start || (rdev->pm.dpm.new_active_crtc_count >= 3) || + pi->disable_nb_ps3_in_battery; + ps->dpm0_pg_nb_ps_lo = force_high ? 0x2 : 0x3; + ps->dpm0_pg_nb_ps_hi = 0x2; + ps->dpmx_nb_ps_lo = force_high ? 0x2 : 0x3; + ps->dpmx_nb_ps_hi = 0x2; + } + } +} + +static void kv_dpm_power_level_enabled_for_throttle(struct radeon_device *rdev, + u32 index, bool enable) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + + pi->graphics_level[index].EnabledForThrottle = enable ? 1 : 0; +} + +static int kv_calculate_ds_divider(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + u32 sclk_in_sr = 10000; /* ??? */ + u32 i; + + if (pi->lowest_valid > pi->highest_valid) + return -EINVAL; + + for (i = pi->lowest_valid; i <= pi->highest_valid; i++) { + pi->graphics_level[i].DeepSleepDivId = + kv_get_sleep_divider_id_from_clock(rdev, + be32_to_cpu(pi->graphics_level[i].SclkFrequency), + sclk_in_sr); + } + return 0; +} + +static int kv_calculate_nbps_level_settings(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + u32 i; + bool force_high; + struct radeon_clock_and_voltage_limits *max_limits = + &rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac; + u32 mclk = max_limits->mclk; + + if (pi->lowest_valid > pi->highest_valid) + return -EINVAL; + + if (rdev->family == CHIP_KABINI) { + for (i = pi->lowest_valid; i <= pi->highest_valid; i++) { + pi->graphics_level[i].GnbSlow = 1; + pi->graphics_level[i].ForceNbPs1 = 0; + pi->graphics_level[i].UpH = 0; + } + + if (!pi->sys_info.nb_dpm_enable) + return 0; + + force_high = ((mclk >= pi->sys_info.nbp_memory_clock[3]) || + (rdev->pm.dpm.new_active_crtc_count >= 3) || pi->video_start); + + if (force_high) { + for (i = pi->lowest_valid; i <= pi->highest_valid; i++) + pi->graphics_level[i].GnbSlow = 0; + } else { + if (pi->battery_state) + pi->graphics_level[0].ForceNbPs1 = 1; + + pi->graphics_level[1].GnbSlow = 0; + pi->graphics_level[2].GnbSlow = 0; + pi->graphics_level[3].GnbSlow = 0; + pi->graphics_level[4].GnbSlow = 0; + } + } else { + for (i = pi->lowest_valid; i <= pi->highest_valid; i++) { + pi->graphics_level[i].GnbSlow = 1; + pi->graphics_level[i].ForceNbPs1 = 0; + pi->graphics_level[i].UpH = 0; + } + + if (pi->sys_info.nb_dpm_enable && pi->battery_state) { + pi->graphics_level[pi->lowest_valid].UpH = 0x28; + pi->graphics_level[pi->lowest_valid].GnbSlow = 0; + if (pi->lowest_valid != pi->highest_valid) + pi->graphics_level[pi->lowest_valid].ForceNbPs1 = 1; + } + } + return 0; +} + +static int kv_calculate_dpm_settings(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + u32 i; + + if (pi->lowest_valid > pi->highest_valid) + return -EINVAL; + + for (i = pi->lowest_valid; i <= pi->highest_valid; i++) + pi->graphics_level[i].DisplayWatermark = (i == pi->highest_valid) ? 1 : 0; + + return 0; +} + +static void kv_init_graphics_levels(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + u32 i; + struct radeon_clock_voltage_dependency_table *table = + &rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk; + + if (table && table->count) { + u32 vid_2bit; + + pi->graphics_dpm_level_count = 0; + for (i = 0; i < table->count; i++) { + if (pi->high_voltage_t && + (pi->high_voltage_t < + kv_convert_8bit_index_to_voltage(rdev, table->entries[i].v))) + break; + + kv_set_divider_value(rdev, i, table->entries[i].clk); + vid_2bit = sumo_convert_vid7_to_vid2(rdev, + &pi->sys_info.vid_mapping_table, + table->entries[i].v); + kv_set_vid(rdev, i, vid_2bit); + kv_set_at(rdev, i, pi->at[i]); + kv_dpm_power_level_enabled_for_throttle(rdev, i, true); + pi->graphics_dpm_level_count++; + } + } else { + struct sumo_sclk_voltage_mapping_table *table = + &pi->sys_info.sclk_voltage_mapping_table; + + pi->graphics_dpm_level_count = 0; + for (i = 0; i < table->num_max_dpm_entries; i++) { + if (pi->high_voltage_t && + pi->high_voltage_t < + kv_convert_2bit_index_to_voltage(rdev, table->entries[i].vid_2bit)) + break; + + kv_set_divider_value(rdev, i, table->entries[i].sclk_frequency); + kv_set_vid(rdev, i, table->entries[i].vid_2bit); + kv_set_at(rdev, i, pi->at[i]); + kv_dpm_power_level_enabled_for_throttle(rdev, i, true); + pi->graphics_dpm_level_count++; + } + } + + for (i = 0; i < SMU7_MAX_LEVELS_GRAPHICS; i++) + kv_dpm_power_level_enable(rdev, i, false); +} + +static void kv_enable_new_levels(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + u32 i; + + for (i = 0; i < SMU7_MAX_LEVELS_GRAPHICS; i++) { + if (i >= pi->lowest_valid && i <= pi->highest_valid) + kv_dpm_power_level_enable(rdev, i, true); + } +} + +static int kv_set_enabled_levels(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + u32 i, new_mask = 0; + + for (i = pi->lowest_valid; i <= pi->highest_valid; i++) + new_mask |= (1 << i); + + return kv_send_msg_to_smc_with_parameter(rdev, + PPSMC_MSG_SCLKDPM_SetEnabledMask, + new_mask); +} + +static void kv_program_nbps_index_settings(struct radeon_device *rdev, + struct radeon_ps *new_rps) +{ + struct kv_ps *new_ps = kv_get_ps(new_rps); + struct kv_power_info *pi = kv_get_pi(rdev); + u32 nbdpmconfig1; + + if (rdev->family == CHIP_KABINI) + return; + + if (pi->sys_info.nb_dpm_enable) { + nbdpmconfig1 = RREG32_SMC(NB_DPM_CONFIG_1); + nbdpmconfig1 &= ~(Dpm0PgNbPsLo_MASK | Dpm0PgNbPsHi_MASK | + DpmXNbPsLo_MASK | DpmXNbPsHi_MASK); + nbdpmconfig1 |= (Dpm0PgNbPsLo(new_ps->dpm0_pg_nb_ps_lo) | + Dpm0PgNbPsHi(new_ps->dpm0_pg_nb_ps_hi) | + DpmXNbPsLo(new_ps->dpmx_nb_ps_lo) | + DpmXNbPsHi(new_ps->dpmx_nb_ps_hi)); + WREG32_SMC(NB_DPM_CONFIG_1, nbdpmconfig1); + } +} + +static int kv_set_thermal_temperature_range(struct radeon_device *rdev, + int min_temp, int max_temp) +{ + int low_temp = 0 * 1000; + int high_temp = 255 * 1000; + u32 tmp; + + if (low_temp < min_temp) + low_temp = min_temp; + if (high_temp > max_temp) + high_temp = max_temp; + if (high_temp < low_temp) { + DRM_ERROR("invalid thermal range: %d - %d\n", low_temp, high_temp); + return -EINVAL; + } + + tmp = RREG32_SMC(CG_THERMAL_INT_CTRL); + tmp &= ~(DIG_THERM_INTH_MASK | DIG_THERM_INTL_MASK); + tmp |= (DIG_THERM_INTH(49 + (high_temp / 1000)) | + DIG_THERM_INTL(49 + (low_temp / 1000))); + WREG32_SMC(CG_THERMAL_INT_CTRL, tmp); + + rdev->pm.dpm.thermal.min_temp = low_temp; + rdev->pm.dpm.thermal.max_temp = high_temp; + + return 0; +} + +union igp_info { + struct _ATOM_INTEGRATED_SYSTEM_INFO info; + struct _ATOM_INTEGRATED_SYSTEM_INFO_V2 info_2; + struct _ATOM_INTEGRATED_SYSTEM_INFO_V5 info_5; + struct _ATOM_INTEGRATED_SYSTEM_INFO_V6 info_6; + struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_7 info_7; + struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_8 info_8; +}; + +static int kv_parse_sys_info_table(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + struct radeon_mode_info *mode_info = &rdev->mode_info; + int index = GetIndexIntoMasterTable(DATA, IntegratedSystemInfo); + union igp_info *igp_info; + u8 frev, crev; + u16 data_offset; + int i; + + if (atom_parse_data_header(mode_info->atom_context, index, NULL, + &frev, &crev, &data_offset)) { + igp_info = (union igp_info *)(mode_info->atom_context->bios + + data_offset); + + if (crev != 8) { + DRM_ERROR("Unsupported IGP table: %d %d\n", frev, crev); + return -EINVAL; + } + pi->sys_info.bootup_sclk = le32_to_cpu(igp_info->info_8.ulBootUpEngineClock); + pi->sys_info.bootup_uma_clk = le32_to_cpu(igp_info->info_8.ulBootUpUMAClock); + pi->sys_info.bootup_nb_voltage_index = + le16_to_cpu(igp_info->info_8.usBootUpNBVoltage); + if (igp_info->info_8.ucHtcTmpLmt == 0) + pi->sys_info.htc_tmp_lmt = 203; + else + pi->sys_info.htc_tmp_lmt = igp_info->info_8.ucHtcTmpLmt; + if (igp_info->info_8.ucHtcHystLmt == 0) + pi->sys_info.htc_hyst_lmt = 5; + else + pi->sys_info.htc_hyst_lmt = igp_info->info_8.ucHtcHystLmt; + if (pi->sys_info.htc_tmp_lmt <= pi->sys_info.htc_hyst_lmt) { + DRM_ERROR("The htcTmpLmt should be larger than htcHystLmt.\n"); + } + + if (le32_to_cpu(igp_info->info_8.ulSystemConfig) & (1 << 3)) + pi->sys_info.nb_dpm_enable = true; + else + pi->sys_info.nb_dpm_enable = false; + + for (i = 0; i < KV_NUM_NBPSTATES; i++) { + pi->sys_info.nbp_memory_clock[i] = + le32_to_cpu(igp_info->info_8.ulNbpStateMemclkFreq[i]); + pi->sys_info.nbp_n_clock[i] = + le32_to_cpu(igp_info->info_8.ulNbpStateNClkFreq[i]); + } + if (le32_to_cpu(igp_info->info_8.ulGPUCapInfo) & + SYS_INFO_GPUCAPS__ENABEL_DFS_BYPASS) + pi->caps_enable_dfs_bypass = true; + + sumo_construct_sclk_voltage_mapping_table(rdev, + &pi->sys_info.sclk_voltage_mapping_table, + igp_info->info_8.sAvail_SCLK); + + sumo_construct_vid_mapping_table(rdev, + &pi->sys_info.vid_mapping_table, + igp_info->info_8.sAvail_SCLK); + + kv_construct_max_power_limits_table(rdev, + &rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac); + } + return 0; +} + +union power_info { + struct _ATOM_POWERPLAY_INFO info; + struct _ATOM_POWERPLAY_INFO_V2 info_2; + struct _ATOM_POWERPLAY_INFO_V3 info_3; + struct _ATOM_PPLIB_POWERPLAYTABLE pplib; + struct _ATOM_PPLIB_POWERPLAYTABLE2 pplib2; + struct _ATOM_PPLIB_POWERPLAYTABLE3 pplib3; +}; + +union pplib_clock_info { + struct _ATOM_PPLIB_R600_CLOCK_INFO r600; + struct _ATOM_PPLIB_RS780_CLOCK_INFO rs780; + struct _ATOM_PPLIB_EVERGREEN_CLOCK_INFO evergreen; + struct _ATOM_PPLIB_SUMO_CLOCK_INFO sumo; +}; + +union pplib_power_state { + struct _ATOM_PPLIB_STATE v1; + struct _ATOM_PPLIB_STATE_V2 v2; +}; + +static void kv_patch_boot_state(struct radeon_device *rdev, + struct kv_ps *ps) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + + ps->num_levels = 1; + ps->levels[0] = pi->boot_pl; +} + +static void kv_parse_pplib_non_clock_info(struct radeon_device *rdev, + struct radeon_ps *rps, + struct _ATOM_PPLIB_NONCLOCK_INFO *non_clock_info, + u8 table_rev) +{ + struct kv_ps *ps = kv_get_ps(rps); + + rps->caps = le32_to_cpu(non_clock_info->ulCapsAndSettings); + rps->class = le16_to_cpu(non_clock_info->usClassification); + rps->class2 = le16_to_cpu(non_clock_info->usClassification2); + + if (ATOM_PPLIB_NONCLOCKINFO_VER1 < table_rev) { + rps->vclk = le32_to_cpu(non_clock_info->ulVCLK); + rps->dclk = le32_to_cpu(non_clock_info->ulDCLK); + } else { + rps->vclk = 0; + rps->dclk = 0; + } + + if (rps->class & ATOM_PPLIB_CLASSIFICATION_BOOT) { + rdev->pm.dpm.boot_ps = rps; + kv_patch_boot_state(rdev, ps); + } + if (rps->class & ATOM_PPLIB_CLASSIFICATION_UVDSTATE) + rdev->pm.dpm.uvd_ps = rps; +} + +static void kv_parse_pplib_clock_info(struct radeon_device *rdev, + struct radeon_ps *rps, int index, + union pplib_clock_info *clock_info) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + struct kv_ps *ps = kv_get_ps(rps); + struct kv_pl *pl = &ps->levels[index]; + u32 sclk; + + sclk = le16_to_cpu(clock_info->sumo.usEngineClockLow); + sclk |= clock_info->sumo.ucEngineClockHigh << 16; + pl->sclk = sclk; + pl->vddc_index = clock_info->sumo.vddcIndex; + + ps->num_levels = index + 1; + + if (pi->caps_sclk_ds) { + pl->ds_divider_index = 5; + pl->ss_divider_index = 5; + } +} + +static int kv_parse_power_table(struct radeon_device *rdev) +{ + struct radeon_mode_info *mode_info = &rdev->mode_info; + struct _ATOM_PPLIB_NONCLOCK_INFO *non_clock_info; + union pplib_power_state *power_state; + int i, j, k, non_clock_array_index, clock_array_index; + union pplib_clock_info *clock_info; + struct _StateArray *state_array; + struct _ClockInfoArray *clock_info_array; + struct _NonClockInfoArray *non_clock_info_array; + union power_info *power_info; + int index = GetIndexIntoMasterTable(DATA, PowerPlayInfo); + u16 data_offset; + u8 frev, crev; + u8 *power_state_offset; + struct kv_ps *ps; + + if (!atom_parse_data_header(mode_info->atom_context, index, NULL, + &frev, &crev, &data_offset)) + return -EINVAL; + power_info = (union power_info *)(mode_info->atom_context->bios + data_offset); + + state_array = (struct _StateArray *) + (mode_info->atom_context->bios + data_offset + + le16_to_cpu(power_info->pplib.usStateArrayOffset)); + clock_info_array = (struct _ClockInfoArray *) + (mode_info->atom_context->bios + data_offset + + le16_to_cpu(power_info->pplib.usClockInfoArrayOffset)); + non_clock_info_array = (struct _NonClockInfoArray *) + (mode_info->atom_context->bios + data_offset + + le16_to_cpu(power_info->pplib.usNonClockInfoArrayOffset)); + + rdev->pm.dpm.ps = kzalloc(sizeof(struct radeon_ps) * + state_array->ucNumEntries, GFP_KERNEL); + if (!rdev->pm.dpm.ps) + return -ENOMEM; + power_state_offset = (u8 *)state_array->states; + rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); + rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); + rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); + for (i = 0; i < state_array->ucNumEntries; i++) { + power_state = (union pplib_power_state *)power_state_offset; + non_clock_array_index = power_state->v2.nonClockInfoIndex; + non_clock_info = (struct _ATOM_PPLIB_NONCLOCK_INFO *) + &non_clock_info_array->nonClockInfo[non_clock_array_index]; + if (!rdev->pm.power_state[i].clock_info) + return -EINVAL; + ps = kzalloc(sizeof(struct kv_ps), GFP_KERNEL); + if (ps == NULL) { + kfree(rdev->pm.dpm.ps); + return -ENOMEM; + } + rdev->pm.dpm.ps[i].ps_priv = ps; + k = 0; + for (j = 0; j < power_state->v2.ucNumDPMLevels; j++) { + clock_array_index = power_state->v2.clockInfoIndex[j]; + if (clock_array_index >= clock_info_array->ucNumEntries) + continue; + if (k >= SUMO_MAX_HARDWARE_POWERLEVELS) + break; + clock_info = (union pplib_clock_info *) + &clock_info_array->clockInfo[clock_array_index * clock_info_array->ucEntrySize]; + kv_parse_pplib_clock_info(rdev, + &rdev->pm.dpm.ps[i], k, + clock_info); + k++; + } + kv_parse_pplib_non_clock_info(rdev, &rdev->pm.dpm.ps[i], + non_clock_info, + non_clock_info_array->ucEntrySize); + power_state_offset += 2 + power_state->v2.ucNumDPMLevels; + } + rdev->pm.dpm.num_ps = state_array->ucNumEntries; + return 0; +} + +int kv_dpm_init(struct radeon_device *rdev) +{ + struct kv_power_info *pi; + int ret, i; + + pi = kzalloc(sizeof(struct kv_power_info), GFP_KERNEL); + if (pi == NULL) + return -ENOMEM; + rdev->pm.dpm.priv = pi; + + ret = r600_parse_extended_power_table(rdev); + if (ret) + return ret; + + for (i = 0; i < SUMO_MAX_HARDWARE_POWERLEVELS; i++) + pi->at[i] = TRINITY_AT_DFLT; + + pi->sram_end = SMC_RAM_END; + + if (rdev->family == CHIP_KABINI) + pi->high_voltage_t = 4001; + + pi->enable_nb_dpm = true; + + pi->caps_power_containment = true; + pi->caps_cac = true; + pi->enable_didt = false; + if (pi->enable_didt) { + pi->caps_sq_ramping = true; + pi->caps_db_ramping = true; + pi->caps_td_ramping = true; + pi->caps_tcp_ramping = true; + } + + pi->caps_sclk_ds = true; + pi->enable_auto_thermal_throttling = true; + pi->disable_nb_ps3_in_battery = false; + pi->bapm_enable = true; + pi->voltage_drop_t = 0; + pi->caps_sclk_throttle_low_notification = false; + pi->caps_fps = false; /* true? */ + pi->caps_uvd_pg = false; /* XXX */ + pi->caps_uvd_dpm = true; + pi->caps_vce_pg = false; + pi->caps_samu_pg = false; + pi->caps_acp_pg = false; + pi->caps_stable_p_state = false; + + ret = kv_parse_sys_info_table(rdev); + if (ret) + return ret; + + kv_patch_voltage_values(rdev); + kv_construct_boot_state(rdev); + + ret = kv_parse_power_table(rdev); + if (ret) + return ret; + + pi->enable_dpm = true; + + return 0; +} + +void kv_dpm_print_power_state(struct radeon_device *rdev, + struct radeon_ps *rps) +{ + int i; + struct kv_ps *ps = kv_get_ps(rps); + + r600_dpm_print_class_info(rps->class, rps->class2); + r600_dpm_print_cap_info(rps->caps); + printk("\tuvd vclk: %d dclk: %d\n", rps->vclk, rps->dclk); + for (i = 0; i < ps->num_levels; i++) { + struct kv_pl *pl = &ps->levels[i]; + printk("\t\tpower level %d sclk: %u vddc: %u\n", + i, pl->sclk, + kv_convert_8bit_index_to_voltage(rdev, pl->vddc_index)); + } + r600_dpm_print_ps_status(rdev, rps); +} + +void kv_dpm_fini(struct radeon_device *rdev) +{ + int i; + + for (i = 0; i < rdev->pm.dpm.num_ps; i++) { + kfree(rdev->pm.dpm.ps[i].ps_priv); + } + kfree(rdev->pm.dpm.ps); + kfree(rdev->pm.dpm.priv); + r600_free_extended_power_table(rdev); +} + +void kv_dpm_display_configuration_changed(struct radeon_device *rdev) +{ + +} + +u32 kv_dpm_get_sclk(struct radeon_device *rdev, bool low) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + struct kv_ps *requested_state = kv_get_ps(&pi->requested_rps); + + if (low) + return requested_state->levels[0].sclk; + else + return requested_state->levels[requested_state->num_levels - 1].sclk; +} + +u32 kv_dpm_get_mclk(struct radeon_device *rdev, bool low) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + + return pi->sys_info.bootup_uma_clk; +} + diff --git a/drivers/gpu/drm/radeon/kv_dpm.h b/drivers/gpu/drm/radeon/kv_dpm.h new file mode 100644 index 000000000000..32bb079572d7 --- /dev/null +++ b/drivers/gpu/drm/radeon/kv_dpm.h @@ -0,0 +1,199 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __KV_DPM_H__ +#define __KV_DPM_H__ + +#define SMU__NUM_SCLK_DPM_STATE 8 +#define SMU__NUM_MCLK_DPM_LEVELS 4 +#define SMU__NUM_LCLK_DPM_LEVELS 8 +#define SMU__NUM_PCIE_DPM_LEVELS 0 /* ??? */ +#include "smu7_fusion.h" +#include "trinity_dpm.h" +#include "ppsmc.h" + +#define KV_NUM_NBPSTATES 4 + +enum kv_pt_config_reg_type { + KV_CONFIGREG_MMR = 0, + KV_CONFIGREG_SMC_IND, + KV_CONFIGREG_DIDT_IND, + KV_CONFIGREG_CACHE, + KV_CONFIGREG_MAX +}; + +struct kv_pt_config_reg { + u32 offset; + u32 mask; + u32 shift; + u32 value; + enum kv_pt_config_reg_type type; +}; + +struct kv_lcac_config_values { + u32 block_id; + u32 signal_id; + u32 t; +}; + +struct kv_lcac_config_reg { + u32 cntl; + u32 block_mask; + u32 block_shift; + u32 signal_mask; + u32 signal_shift; + u32 t_mask; + u32 t_shift; + u32 enable_mask; + u32 enable_shift; +}; + +struct kv_pl { + u32 sclk; + u8 vddc_index; + u8 ds_divider_index; + u8 ss_divider_index; + u8 allow_gnb_slow; + u8 force_nbp_state; + u8 display_wm; + u8 vce_wm; +}; + +struct kv_ps { + struct kv_pl levels[SUMO_MAX_HARDWARE_POWERLEVELS]; + u32 num_levels; + bool need_dfs_bypass; + u8 dpm0_pg_nb_ps_lo; + u8 dpm0_pg_nb_ps_hi; + u8 dpmx_nb_ps_lo; + u8 dpmx_nb_ps_hi; +}; + +struct kv_sys_info { + u32 bootup_uma_clk; + u32 bootup_sclk; + u32 dentist_vco_freq; + u32 nb_dpm_enable; + u32 nbp_memory_clock[KV_NUM_NBPSTATES]; + u32 nbp_n_clock[KV_NUM_NBPSTATES]; + u16 bootup_nb_voltage_index; + u8 htc_tmp_lmt; + u8 htc_hyst_lmt; + struct sumo_sclk_voltage_mapping_table sclk_voltage_mapping_table; + struct sumo_vid_mapping_table vid_mapping_table; + u32 uma_channel_number; +}; + +struct kv_power_info { + u32 at[SUMO_MAX_HARDWARE_POWERLEVELS]; + u32 voltage_drop_t; + struct kv_sys_info sys_info; + struct kv_pl boot_pl; + bool enable_nb_ps_policy; + bool disable_nb_ps3_in_battery; + bool video_start; + bool battery_state; + u32 lowest_valid; + u32 highest_valid; + u16 high_voltage_t; + bool cac_enabled; + bool bapm_enable; + /* smc offsets */ + u32 sram_end; + u32 dpm_table_start; + u32 soft_regs_start; + /* dpm SMU tables */ + u8 graphics_dpm_level_count; + u8 uvd_level_count; + u8 vce_level_count; + u8 acp_level_count; + u8 samu_level_count; + u16 fps_high_t; + SMU7_Fusion_GraphicsLevel graphics_level[SMU__NUM_SCLK_DPM_STATE]; + SMU7_Fusion_ACPILevel acpi_level; + SMU7_Fusion_UvdLevel uvd_level[SMU7_MAX_LEVELS_UVD]; + SMU7_Fusion_ExtClkLevel vce_level[SMU7_MAX_LEVELS_VCE]; + SMU7_Fusion_ExtClkLevel acp_level[SMU7_MAX_LEVELS_ACP]; + SMU7_Fusion_ExtClkLevel samu_level[SMU7_MAX_LEVELS_SAMU]; + u8 uvd_boot_level; + u8 vce_boot_level; + u8 acp_boot_level; + u8 samu_boot_level; + u8 uvd_interval; + u8 vce_interval; + u8 acp_interval; + u8 samu_interval; + u8 graphics_boot_level; + u8 graphics_interval; + u8 graphics_therm_throttle_enable; + u8 graphics_voltage_change_enable; + u8 graphics_clk_slow_enable; + u8 graphics_clk_slow_divider; + u8 fps_low_t; + u32 low_sclk_interrupt_t; + bool uvd_power_gated; + bool vce_power_gated; + bool acp_power_gated; + bool samu_power_gated; + bool nb_dpm_enabled; + /* flags */ + bool enable_didt; + bool enable_dpm; + bool enable_auto_thermal_throttling; + bool enable_nb_dpm; + /* caps */ + bool caps_cac; + bool caps_power_containment; + bool caps_sq_ramping; + bool caps_db_ramping; + bool caps_td_ramping; + bool caps_tcp_ramping; + bool caps_sclk_throttle_low_notification; + bool caps_fps; + bool caps_uvd_dpm; + bool caps_uvd_pg; + bool caps_vce_pg; + bool caps_samu_pg; + bool caps_acp_pg; + bool caps_stable_p_state; + bool caps_enable_dfs_bypass; + bool caps_sclk_ds; + struct radeon_ps current_rps; + struct kv_ps current_ps; + struct radeon_ps requested_rps; + struct kv_ps requested_ps; +}; + + +/* kv_smc.c */ +int kv_notify_message_to_smu(struct radeon_device *rdev, u32 id); +int kv_dpm_get_enable_mask(struct radeon_device *rdev, u32 *enable_mask); +int kv_send_msg_to_smc_with_parameter(struct radeon_device *rdev, + PPSMC_Msg msg, u32 parameter); +int kv_read_smc_sram_dword(struct radeon_device *rdev, u32 smc_address, + u32 *value, u32 limit); +int kv_smc_dpm_enable(struct radeon_device *rdev, bool enable); +int kv_copy_bytes_to_smc(struct radeon_device *rdev, + u32 smc_start_address, + const u8 *src, u32 byte_count, u32 limit); + +#endif diff --git a/drivers/gpu/drm/radeon/kv_smc.c b/drivers/gpu/drm/radeon/kv_smc.c new file mode 100644 index 000000000000..34a226d7e34a --- /dev/null +++ b/drivers/gpu/drm/radeon/kv_smc.c @@ -0,0 +1,207 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Alex Deucher + */ + +#include "drmP.h" +#include "radeon.h" +#include "cikd.h" +#include "kv_dpm.h" + +int kv_notify_message_to_smu(struct radeon_device *rdev, u32 id) +{ + u32 i; + u32 tmp = 0; + + WREG32(SMC_MESSAGE_0, id & SMC_MSG_MASK); + + for (i = 0; i < rdev->usec_timeout; i++) { + if ((RREG32(SMC_RESP_0) & SMC_RESP_MASK) != 0) + break; + udelay(1); + } + tmp = RREG32(SMC_RESP_0) & SMC_RESP_MASK; + + if (tmp != 1) { + if (tmp == 0xFF) + return -EINVAL; + else if (tmp == 0xFE) + return -EINVAL; + } + + return 0; +} + +int kv_dpm_get_enable_mask(struct radeon_device *rdev, u32 *enable_mask) +{ + int ret; + + ret = kv_notify_message_to_smu(rdev, PPSMC_MSG_SCLKDPM_GetEnabledMask); + + if (ret == 0) + *enable_mask = RREG32_SMC(SMC_SYSCON_MSG_ARG_0); + + return ret; +} + +int kv_send_msg_to_smc_with_parameter(struct radeon_device *rdev, + PPSMC_Msg msg, u32 parameter) +{ + + WREG32(SMC_MSG_ARG_0, parameter); + + return kv_notify_message_to_smu(rdev, msg); +} + +static int kv_set_smc_sram_address(struct radeon_device *rdev, + u32 smc_address, u32 limit) +{ + if (smc_address & 3) + return -EINVAL; + if ((smc_address + 3) > limit) + return -EINVAL; + + WREG32(SMC_IND_INDEX_0, smc_address); + WREG32_P(SMC_IND_ACCESS_CNTL, 0, ~AUTO_INCREMENT_IND_0); + + return 0; +} + +int kv_read_smc_sram_dword(struct radeon_device *rdev, u32 smc_address, + u32 *value, u32 limit) +{ + int ret; + + ret = kv_set_smc_sram_address(rdev, smc_address, limit); + if (ret) + return ret; + + *value = RREG32(SMC_IND_DATA_0); + return 0; +} + +int kv_smc_dpm_enable(struct radeon_device *rdev, bool enable) +{ + if (enable) + return kv_notify_message_to_smu(rdev, PPSMC_MSG_DPM_Enable); + else + return kv_notify_message_to_smu(rdev, PPSMC_MSG_DPM_Disable); +} + +int kv_copy_bytes_to_smc(struct radeon_device *rdev, + u32 smc_start_address, + const u8 *src, u32 byte_count, u32 limit) +{ + int ret; + u32 data, original_data, addr, extra_shift, t_byte, count, mask; + + if ((smc_start_address + byte_count) > limit) + return -EINVAL; + + addr = smc_start_address; + t_byte = addr & 3; + + /* RMW for the initial bytes */ + if (t_byte != 0) { + addr -= t_byte; + + ret = kv_set_smc_sram_address(rdev, addr, limit); + if (ret) + return ret; + + original_data = RREG32(SMC_IND_DATA_0); + + data = 0; + mask = 0; + count = 4; + while (count > 0) { + if (t_byte > 0) { + mask = (mask << 8) | 0xff; + t_byte--; + } else if (byte_count > 0) { + data = (data << 8) + *src++; + byte_count--; + mask <<= 8; + } else { + data <<= 8; + mask = (mask << 8) | 0xff; + } + count--; + } + + data |= original_data & mask; + + ret = kv_set_smc_sram_address(rdev, addr, limit); + if (ret) + return ret; + + WREG32(SMC_IND_DATA_0, data); + + addr += 4; + } + + while (byte_count >= 4) { + /* SMC address space is BE */ + data = (src[0] << 24) + (src[1] << 16) + (src[2] << 8) + src[3]; + + ret = kv_set_smc_sram_address(rdev, addr, limit); + if (ret) + return ret; + + WREG32(SMC_IND_DATA_0, data); + + src += 4; + byte_count -= 4; + addr += 4; + } + + /* RMW for the final bytes */ + if (byte_count > 0) { + data = 0; + + ret = kv_set_smc_sram_address(rdev, addr, limit); + if (ret) + return ret; + + original_data= RREG32(SMC_IND_DATA_0); + + extra_shift = 8 * (4 - byte_count); + + while (byte_count > 0) { + /* SMC address space is BE */ + data = (data << 8) + *src++; + byte_count--; + } + + data <<= extra_shift; + + data |= (original_data & ~((~0UL) << extra_shift)); + + ret = kv_set_smc_sram_address(rdev, addr, limit); + if (ret) + return ret; + + WREG32(SMC_IND_DATA_0, data); + } + return 0; +} + diff --git a/drivers/gpu/drm/radeon/ppsmc.h b/drivers/gpu/drm/radeon/ppsmc.h index b5564a3645d2..6db6e320bc79 100644 --- a/drivers/gpu/drm/radeon/ppsmc.h +++ b/drivers/gpu/drm/radeon/ppsmc.h @@ -99,11 +99,45 @@ typedef uint8_t PPSMC_Result; #define PPSMC_MSG_ThrottleOVRDSCLKDS ((uint8_t)0x96) #define PPSMC_MSG_CancelThrottleOVRDSCLKDS ((uint8_t)0x97) +/* KV/KB */ +#define PPSMC_MSG_UVDDPM_SetEnabledMask ((uint16_t) 0x12D) +#define PPSMC_MSG_VCEDPM_SetEnabledMask ((uint16_t) 0x12E) +#define PPSMC_MSG_ACPDPM_SetEnabledMask ((uint16_t) 0x12F) +#define PPSMC_MSG_SAMUDPM_SetEnabledMask ((uint16_t) 0x130) +#define PPSMC_MSG_MCLKDPM_ForceState ((uint16_t) 0x131) +#define PPSMC_MSG_MCLKDPM_NoForcedLevel ((uint16_t) 0x132) +#define PPSMC_MSG_Voltage_Cntl_Disable ((uint16_t) 0x135) +#define PPSMC_MSG_PCIeDPM_Enable ((uint16_t) 0x136) +#define PPSMC_MSG_ACPPowerOFF ((uint16_t) 0x137) +#define PPSMC_MSG_ACPPowerON ((uint16_t) 0x138) +#define PPSMC_MSG_SAMPowerOFF ((uint16_t) 0x139) +#define PPSMC_MSG_SAMPowerON ((uint16_t) 0x13a) +#define PPSMC_MSG_PCIeDPM_Disable ((uint16_t) 0x13d) +#define PPSMC_MSG_NBDPM_Enable ((uint16_t) 0x140) +#define PPSMC_MSG_NBDPM_Disable ((uint16_t) 0x141) +#define PPSMC_MSG_SCLKDPM_SetEnabledMask ((uint16_t) 0x145) +#define PPSMC_MSG_DPM_Enable ((uint16_t) 0x14e) +#define PPSMC_MSG_DPM_Disable ((uint16_t) 0x14f) +#define PPSMC_MSG_UVDDPM_Enable ((uint16_t) 0x154) +#define PPSMC_MSG_UVDDPM_Disable ((uint16_t) 0x155) +#define PPSMC_MSG_SAMUDPM_Enable ((uint16_t) 0x156) +#define PPSMC_MSG_SAMUDPM_Disable ((uint16_t) 0x157) +#define PPSMC_MSG_ACPDPM_Enable ((uint16_t) 0x158) +#define PPSMC_MSG_ACPDPM_Disable ((uint16_t) 0x159) +#define PPSMC_MSG_VCEDPM_Enable ((uint16_t) 0x15a) +#define PPSMC_MSG_VCEDPM_Disable ((uint16_t) 0x15b) +#define PPSMC_MSG_SCLKDPM_GetEnabledMask ((uint16_t) 0x162) +#define PPSMC_MSG_SCLKDPM_FreezeLevel ((uint16_t) 0x189) +#define PPSMC_MSG_SCLKDPM_UnfreezeLevel ((uint16_t) 0x18A) + /* TN */ #define PPSMC_MSG_DPM_Config ((uint32_t) 0x102) #define PPSMC_MSG_DPM_ForceState ((uint32_t) 0x104) #define PPSMC_MSG_PG_SIMD_Config ((uint32_t) 0x108) #define PPSMC_MSG_DPM_N_LevelsDisabled ((uint32_t) 0x112) +#define PPSMC_MSG_Voltage_Cntl_Enable ((uint32_t) 0x109) +#define PPSMC_MSG_VCEPowerOFF ((uint32_t) 0x10e) +#define PPSMC_MSG_VCEPowerON ((uint32_t) 0x10f) #define PPSMC_MSG_DCE_RemoveVoltageAdjustment ((uint32_t) 0x11d) #define PPSMC_MSG_DCE_AllowVoltageAdjustment ((uint32_t) 0x11e) #define PPSMC_MSG_UVD_DPM_Config ((uint32_t) 0x124) diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 3a55540fe280..9c83ecfd0eb7 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -2610,6 +2610,20 @@ static struct radeon_asic kv_asic = { .set_uvd_clocks = &cik_set_uvd_clocks, .get_temperature = &kv_get_temp, }, + .dpm = { + .init = &kv_dpm_init, + .setup_asic = &kv_dpm_setup_asic, + .enable = &kv_dpm_enable, + .disable = &kv_dpm_disable, + .pre_set_power_state = &kv_dpm_pre_set_power_state, + .set_power_state = &kv_dpm_set_power_state, + .post_set_power_state = &kv_dpm_post_set_power_state, + .display_configuration_changed = &kv_dpm_display_configuration_changed, + .fini = &kv_dpm_fini, + .get_sclk = &kv_dpm_get_sclk, + .get_mclk = &kv_dpm_get_mclk, + .print_power_state = &kv_dpm_print_power_state, + }, .pflip = { .pre_page_flip = &evergreen_pre_page_flip, .page_flip = &evergreen_page_flip, diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index d5c6c5b10edf..68a1a1fb371d 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -750,4 +750,18 @@ void cik_compute_ring_set_wptr(struct radeon_device *rdev, int ci_get_temp(struct radeon_device *rdev); int kv_get_temp(struct radeon_device *rdev); +int kv_dpm_init(struct radeon_device *rdev); +int kv_dpm_enable(struct radeon_device *rdev); +void kv_dpm_disable(struct radeon_device *rdev); +int kv_dpm_pre_set_power_state(struct radeon_device *rdev); +int kv_dpm_set_power_state(struct radeon_device *rdev); +void kv_dpm_post_set_power_state(struct radeon_device *rdev); +void kv_dpm_setup_asic(struct radeon_device *rdev); +void kv_dpm_display_configuration_changed(struct radeon_device *rdev); +void kv_dpm_fini(struct radeon_device *rdev); +u32 kv_dpm_get_sclk(struct radeon_device *rdev, bool low); +u32 kv_dpm_get_mclk(struct radeon_device *rdev, bool low); +void kv_dpm_print_power_state(struct radeon_device *rdev, + struct radeon_ps *ps); + #endif diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index 1408014dce8f..37d3d343f687 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -1202,6 +1202,8 @@ int radeon_pm_init(struct radeon_device *rdev) case CHIP_VERDE: case CHIP_OLAND: case CHIP_HAINAN: + case CHIP_KABINI: + case CHIP_KAVERI: /* DPM requires the RLC, RV770+ dGPU requires SMC */ if (!rdev->rlc_fw) rdev->pm.pm_method = PM_METHOD_PROFILE; diff --git a/drivers/gpu/drm/radeon/smu7.h b/drivers/gpu/drm/radeon/smu7.h new file mode 100644 index 000000000000..75a380a15292 --- /dev/null +++ b/drivers/gpu/drm/radeon/smu7.h @@ -0,0 +1,170 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef SMU7_H +#define SMU7_H + +#pragma pack(push, 1) + +#define SMU7_CONTEXT_ID_SMC 1 +#define SMU7_CONTEXT_ID_VBIOS 2 + + +#define SMU7_CONTEXT_ID_SMC 1 +#define SMU7_CONTEXT_ID_VBIOS 2 + +#define SMU7_MAX_LEVELS_VDDC 8 +#define SMU7_MAX_LEVELS_VDDCI 4 +#define SMU7_MAX_LEVELS_MVDD 4 +#define SMU7_MAX_LEVELS_VDDNB 8 + +#define SMU7_MAX_LEVELS_GRAPHICS SMU__NUM_SCLK_DPM_STATE // SCLK + SQ DPM + ULV +#define SMU7_MAX_LEVELS_MEMORY SMU__NUM_MCLK_DPM_LEVELS // MCLK Levels DPM +#define SMU7_MAX_LEVELS_GIO SMU__NUM_LCLK_DPM_LEVELS // LCLK Levels +#define SMU7_MAX_LEVELS_LINK SMU__NUM_PCIE_DPM_LEVELS // PCIe speed and number of lanes. +#define SMU7_MAX_LEVELS_UVD 8 // VCLK/DCLK levels for UVD. +#define SMU7_MAX_LEVELS_VCE 8 // ECLK levels for VCE. +#define SMU7_MAX_LEVELS_ACP 8 // ACLK levels for ACP. +#define SMU7_MAX_LEVELS_SAMU 8 // SAMCLK levels for SAMU. +#define SMU7_MAX_ENTRIES_SMIO 32 // Number of entries in SMIO table. + +#define DPM_NO_LIMIT 0 +#define DPM_NO_UP 1 +#define DPM_GO_DOWN 2 +#define DPM_GO_UP 3 + +#define SMU7_FIRST_DPM_GRAPHICS_LEVEL 0 +#define SMU7_FIRST_DPM_MEMORY_LEVEL 0 + +#define GPIO_CLAMP_MODE_VRHOT 1 +#define GPIO_CLAMP_MODE_THERM 2 +#define GPIO_CLAMP_MODE_DC 4 + +#define SCRATCH_B_TARG_PCIE_INDEX_SHIFT 0 +#define SCRATCH_B_TARG_PCIE_INDEX_MASK (0x7< Date: Wed, 14 Aug 2013 01:03:41 -0400 Subject: drm/radeon: add dpm support for CI dGPUs (v2) This adds dpm support for btc asics. This includes: - dynamic engine clock scaling - dynamic memory clock scaling - dynamic voltage scaling - dynamic pcie gen switching Set radeon.dpm=1 to enable. v2: remove unused radeon_atombios.c changes, make missing smc ucode non-fatal Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/Makefile | 3 +- drivers/gpu/drm/radeon/ci_dpm.c | 5006 ++++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/ci_dpm.h | 331 ++ drivers/gpu/drm/radeon/ci_smc.c | 262 ++ drivers/gpu/drm/radeon/cik.c | 41 +- drivers/gpu/drm/radeon/cikd.h | 259 +- drivers/gpu/drm/radeon/ppsmc.h | 24 +- drivers/gpu/drm/radeon/radeon.h | 6 + drivers/gpu/drm/radeon/radeon_asic.c | 14 + drivers/gpu/drm/radeon/radeon_asic.h | 14 + drivers/gpu/drm/radeon/radeon_atombios.c | 2 +- drivers/gpu/drm/radeon/radeon_pm.c | 1 + drivers/gpu/drm/radeon/radeon_ucode.h | 3 + drivers/gpu/drm/radeon/si_dpm.c | 10 +- drivers/gpu/drm/radeon/smu7_discrete.h | 486 +++ 15 files changed, 6447 insertions(+), 15 deletions(-) create mode 100644 drivers/gpu/drm/radeon/ci_dpm.c create mode 100644 drivers/gpu/drm/radeon/ci_dpm.h create mode 100644 drivers/gpu/drm/radeon/ci_smc.c create mode 100644 drivers/gpu/drm/radeon/smu7_discrete.h (limited to 'drivers/gpu/drm/radeon/cik.c') diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile index d3265b5d4661..ea913cc681b4 100644 --- a/drivers/gpu/drm/radeon/Makefile +++ b/drivers/gpu/drm/radeon/Makefile @@ -79,7 +79,8 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \ si_blit_shaders.o radeon_prime.o radeon_uvd.o cik.o cik_blit_shaders.o \ r600_dpm.o rs780_dpm.o rv6xx_dpm.o rv770_dpm.o rv730_dpm.o rv740_dpm.o \ rv770_smc.o cypress_dpm.o btc_dpm.o sumo_dpm.o sumo_smc.o trinity_dpm.o \ - trinity_smc.o ni_dpm.o si_smc.o si_dpm.o kv_smc.o kv_dpm.o + trinity_smc.o ni_dpm.o si_smc.o si_dpm.o kv_smc.o kv_dpm.o ci_smc.o \ + ci_dpm.o radeon-$(CONFIG_COMPAT) += radeon_ioc32.o radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c new file mode 100644 index 000000000000..72ab92b60e6e --- /dev/null +++ b/drivers/gpu/drm/radeon/ci_dpm.c @@ -0,0 +1,5006 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "drmP.h" +#include "radeon.h" +#include "cikd.h" +#include "r600_dpm.h" +#include "ci_dpm.h" +#include "atom.h" + +#define MC_CG_ARB_FREQ_F0 0x0a +#define MC_CG_ARB_FREQ_F1 0x0b +#define MC_CG_ARB_FREQ_F2 0x0c +#define MC_CG_ARB_FREQ_F3 0x0d + +#define SMC_RAM_END 0x40000 + +#define VOLTAGE_SCALE 4 +#define VOLTAGE_VID_OFFSET_SCALE1 625 +#define VOLTAGE_VID_OFFSET_SCALE2 100 + +static const struct ci_pt_defaults defaults_bonaire_xt = +{ + 1, 0xF, 0xFD, 0x19, 5, 45, 0, 0xB0000, + { 0x79, 0x253, 0x25D, 0xAE, 0x72, 0x80, 0x83, 0x86, 0x6F, 0xC8, 0xC9, 0xC9, 0x2F, 0x4D, 0x61 }, + { 0x17C, 0x172, 0x180, 0x1BC, 0x1B3, 0x1BD, 0x206, 0x200, 0x203, 0x25D, 0x25A, 0x255, 0x2C3, 0x2C5, 0x2B4 } +}; + +static const struct ci_pt_defaults defaults_bonaire_pro = +{ + 1, 0xF, 0xFD, 0x19, 5, 45, 0, 0x65062, + { 0x8C, 0x23F, 0x244, 0xA6, 0x83, 0x85, 0x86, 0x86, 0x83, 0xDB, 0xDB, 0xDA, 0x67, 0x60, 0x5F }, + { 0x187, 0x193, 0x193, 0x1C7, 0x1D1, 0x1D1, 0x210, 0x219, 0x219, 0x266, 0x26C, 0x26C, 0x2C9, 0x2CB, 0x2CB } +}; + +static const struct ci_pt_defaults defaults_saturn_xt = +{ + 1, 0xF, 0xFD, 0x19, 5, 55, 0, 0x70000, + { 0x8C, 0x247, 0x249, 0xA6, 0x80, 0x81, 0x8B, 0x89, 0x86, 0xC9, 0xCA, 0xC9, 0x4D, 0x4D, 0x4D }, + { 0x187, 0x187, 0x187, 0x1C7, 0x1C7, 0x1C7, 0x210, 0x210, 0x210, 0x266, 0x266, 0x266, 0x2C9, 0x2C9, 0x2C9 } +}; + +static const struct ci_pt_defaults defaults_saturn_pro = +{ + 1, 0xF, 0xFD, 0x19, 5, 55, 0, 0x30000, + { 0x96, 0x21D, 0x23B, 0xA1, 0x85, 0x87, 0x83, 0x84, 0x81, 0xE6, 0xE6, 0xE6, 0x71, 0x6A, 0x6A }, + { 0x193, 0x19E, 0x19E, 0x1D2, 0x1DC, 0x1DC, 0x21A, 0x223, 0x223, 0x26E, 0x27E, 0x274, 0x2CF, 0x2D2, 0x2D2 } +}; + +static const struct ci_pt_config_reg didt_config_ci[] = +{ + { 0x10, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x10, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x10, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x10, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x11, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x11, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x11, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x11, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x12, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x12, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x12, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x12, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x2, 0x00003fff, 0, 0x4, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x2, 0x03ff0000, 16, 0x80, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x2, 0x78000000, 27, 0x3, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x1, 0x0000ffff, 0, 0x3FFF, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x1, 0xffff0000, 16, 0x3FFF, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x0, 0x00000001, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x30, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x30, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x30, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x30, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x31, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x31, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x31, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x31, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x32, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x32, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x32, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x32, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x22, 0x00003fff, 0, 0x4, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x22, 0x03ff0000, 16, 0x80, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x22, 0x78000000, 27, 0x3, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x21, 0x0000ffff, 0, 0x3FFF, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x21, 0xffff0000, 16, 0x3FFF, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x20, 0x00000001, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x50, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x50, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x50, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x50, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x51, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x51, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x51, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x51, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x52, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x52, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x52, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x52, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x42, 0x00003fff, 0, 0x4, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x42, 0x03ff0000, 16, 0x80, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x42, 0x78000000, 27, 0x3, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x41, 0x0000ffff, 0, 0x3FFF, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x41, 0xffff0000, 16, 0x3FFF, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x40, 0x00000001, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x70, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x70, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x70, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x70, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x71, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x71, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x71, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x71, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x72, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x72, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x72, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x72, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x62, 0x00003fff, 0, 0x4, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x62, 0x03ff0000, 16, 0x80, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x62, 0x78000000, 27, 0x3, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x61, 0x0000ffff, 0, 0x3FFF, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x61, 0xffff0000, 16, 0x3FFF, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x60, 0x00000001, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0xFFFFFFFF } +}; + +extern u8 rv770_get_memory_module_index(struct radeon_device *rdev); +extern int ni_copy_and_switch_arb_sets(struct radeon_device *rdev, + u32 arb_freq_src, u32 arb_freq_dest); +extern u8 si_get_ddr3_mclk_frequency_ratio(u32 memory_clock); +extern u8 si_get_mclk_frequency_ratio(u32 memory_clock, bool strobe_mode); +extern void si_trim_voltage_table_to_fit_state_table(struct radeon_device *rdev, + u32 max_voltage_steps, + struct atom_voltage_table *voltage_table); +extern void cik_enter_rlc_safe_mode(struct radeon_device *rdev); +extern void cik_exit_rlc_safe_mode(struct radeon_device *rdev); + +static int ci_get_std_voltage_value_sidd(struct radeon_device *rdev, + struct atom_voltage_table_entry *voltage_table, + u16 *std_voltage_hi_sidd, u16 *std_voltage_lo_sidd); +static int ci_set_power_limit(struct radeon_device *rdev, u32 n); +static int ci_set_overdrive_target_tdp(struct radeon_device *rdev, + u32 target_tdp); +static int ci_update_uvd_dpm(struct radeon_device *rdev, bool gate); + +static struct ci_power_info *ci_get_pi(struct radeon_device *rdev) +{ + struct ci_power_info *pi = rdev->pm.dpm.priv; + + return pi; +} + +static struct ci_ps *ci_get_ps(struct radeon_ps *rps) +{ + struct ci_ps *ps = rps->ps_priv; + + return ps; +} + +static void ci_initialize_powertune_defaults(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + + switch (rdev->pdev->device) { + case 0x6650: + case 0x6658: + case 0x665C: + default: + pi->powertune_defaults = &defaults_bonaire_xt; + break; + case 0x6651: + case 0x665D: + pi->powertune_defaults = &defaults_bonaire_pro; + break; + case 0x6640: + pi->powertune_defaults = &defaults_saturn_xt; + break; + case 0x6641: + pi->powertune_defaults = &defaults_saturn_pro; + break; + } + + pi->dte_tj_offset = 0; + + pi->caps_power_containment = true; + pi->caps_cac = false; + pi->caps_sq_ramping = false; + pi->caps_db_ramping = false; + pi->caps_td_ramping = false; + pi->caps_tcp_ramping = false; + + if (pi->caps_power_containment) { + pi->caps_cac = true; + pi->enable_bapm_feature = true; + pi->enable_tdc_limit_feature = true; + pi->enable_pkg_pwr_tracking_feature = true; + } +} + +static u8 ci_convert_to_vid(u16 vddc) +{ + return (6200 - (vddc * VOLTAGE_SCALE)) / 25; +} + +static int ci_populate_bapm_vddc_vid_sidd(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + u8 *hi_vid = pi->smc_powertune_table.BapmVddCVidHiSidd; + u8 *lo_vid = pi->smc_powertune_table.BapmVddCVidLoSidd; + u8 *hi2_vid = pi->smc_powertune_table.BapmVddCVidHiSidd2; + u32 i; + + if (rdev->pm.dpm.dyn_state.cac_leakage_table.entries == NULL) + return -EINVAL; + if (rdev->pm.dpm.dyn_state.cac_leakage_table.count > 8) + return -EINVAL; + if (rdev->pm.dpm.dyn_state.cac_leakage_table.count != + rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.count) + return -EINVAL; + + for (i = 0; i < rdev->pm.dpm.dyn_state.cac_leakage_table.count; i++) { + if (rdev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_EVV) { + lo_vid[i] = ci_convert_to_vid(rdev->pm.dpm.dyn_state.cac_leakage_table.entries[i].vddc1); + hi_vid[i] = ci_convert_to_vid(rdev->pm.dpm.dyn_state.cac_leakage_table.entries[i].vddc2); + hi2_vid[i] = ci_convert_to_vid(rdev->pm.dpm.dyn_state.cac_leakage_table.entries[i].vddc3); + } else { + lo_vid[i] = ci_convert_to_vid(rdev->pm.dpm.dyn_state.cac_leakage_table.entries[i].vddc); + hi_vid[i] = ci_convert_to_vid((u16)rdev->pm.dpm.dyn_state.cac_leakage_table.entries[i].leakage); + } + } + return 0; +} + +static int ci_populate_vddc_vid(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + u8 *vid = pi->smc_powertune_table.VddCVid; + u32 i; + + if (pi->vddc_voltage_table.count > 8) + return -EINVAL; + + for (i = 0; i < pi->vddc_voltage_table.count; i++) + vid[i] = ci_convert_to_vid(pi->vddc_voltage_table.entries[i].value); + + return 0; +} + +static int ci_populate_svi_load_line(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + const struct ci_pt_defaults *pt_defaults = pi->powertune_defaults; + + pi->smc_powertune_table.SviLoadLineEn = pt_defaults->svi_load_line_en; + pi->smc_powertune_table.SviLoadLineVddC = pt_defaults->svi_load_line_vddc; + pi->smc_powertune_table.SviLoadLineTrimVddC = 3; + pi->smc_powertune_table.SviLoadLineOffsetVddC = 0; + + return 0; +} + +static int ci_populate_tdc_limit(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + const struct ci_pt_defaults *pt_defaults = pi->powertune_defaults; + u16 tdc_limit; + + tdc_limit = rdev->pm.dpm.dyn_state.cac_tdp_table->tdc * 256; + pi->smc_powertune_table.TDC_VDDC_PkgLimit = cpu_to_be16(tdc_limit); + pi->smc_powertune_table.TDC_VDDC_ThrottleReleaseLimitPerc = + pt_defaults->tdc_vddc_throttle_release_limit_perc; + pi->smc_powertune_table.TDC_MAWt = pt_defaults->tdc_mawt; + + return 0; +} + +static int ci_populate_dw8(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + const struct ci_pt_defaults *pt_defaults = pi->powertune_defaults; + int ret; + + ret = ci_read_smc_sram_dword(rdev, + SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU7_Firmware_Header, PmFuseTable) + + offsetof(SMU7_Discrete_PmFuses, TdcWaterfallCtl), + (u32 *)&pi->smc_powertune_table.TdcWaterfallCtl, + pi->sram_end); + if (ret) + return -EINVAL; + else + pi->smc_powertune_table.TdcWaterfallCtl = pt_defaults->tdc_waterfall_ctl; + + return 0; +} + +static int ci_min_max_v_gnbl_pm_lid_from_bapm_vddc(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + u8 *hi_vid = pi->smc_powertune_table.BapmVddCVidHiSidd; + u8 *lo_vid = pi->smc_powertune_table.BapmVddCVidLoSidd; + int i, min, max; + + min = max = hi_vid[0]; + for (i = 0; i < 8; i++) { + if (0 != hi_vid[i]) { + if (min > hi_vid[i]) + min = hi_vid[i]; + if (max < hi_vid[i]) + max = hi_vid[i]; + } + + if (0 != lo_vid[i]) { + if (min > lo_vid[i]) + min = lo_vid[i]; + if (max < lo_vid[i]) + max = lo_vid[i]; + } + } + + if ((min == 0) || (max == 0)) + return -EINVAL; + pi->smc_powertune_table.GnbLPMLMaxVid = (u8)max; + pi->smc_powertune_table.GnbLPMLMinVid = (u8)min; + + return 0; +} + +static int ci_populate_bapm_vddc_base_leakage_sidd(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + u16 hi_sidd = pi->smc_powertune_table.BapmVddCBaseLeakageHiSidd; + u16 lo_sidd = pi->smc_powertune_table.BapmVddCBaseLeakageLoSidd; + struct radeon_cac_tdp_table *cac_tdp_table = + rdev->pm.dpm.dyn_state.cac_tdp_table; + + hi_sidd = cac_tdp_table->high_cac_leakage / 100 * 256; + lo_sidd = cac_tdp_table->low_cac_leakage / 100 * 256; + + pi->smc_powertune_table.BapmVddCBaseLeakageHiSidd = cpu_to_be16(hi_sidd); + pi->smc_powertune_table.BapmVddCBaseLeakageLoSidd = cpu_to_be16(lo_sidd); + + return 0; +} + +static int ci_populate_bapm_parameters_in_dpm_table(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + const struct ci_pt_defaults *pt_defaults = pi->powertune_defaults; + SMU7_Discrete_DpmTable *dpm_table = &pi->smc_state_table; + struct radeon_cac_tdp_table *cac_tdp_table = + rdev->pm.dpm.dyn_state.cac_tdp_table; + struct radeon_ppm_table *ppm = rdev->pm.dpm.dyn_state.ppm_table; + int i, j, k; + const u16 *def1; + const u16 *def2; + + dpm_table->DefaultTdp = cac_tdp_table->tdp * 256; + dpm_table->TargetTdp = cac_tdp_table->configurable_tdp * 256; + + dpm_table->DTETjOffset = (u8)pi->dte_tj_offset; + dpm_table->GpuTjMax = + (u8)(pi->thermal_temp_setting.temperature_high / 1000); + dpm_table->GpuTjHyst = 8; + + dpm_table->DTEAmbientTempBase = pt_defaults->dte_ambient_temp_base; + + if (ppm) { + dpm_table->PPM_PkgPwrLimit = cpu_to_be16((u16)ppm->dgpu_tdp * 256 / 1000); + dpm_table->PPM_TemperatureLimit = cpu_to_be16((u16)ppm->tj_max * 256); + } else { + dpm_table->PPM_PkgPwrLimit = cpu_to_be16(0); + dpm_table->PPM_TemperatureLimit = cpu_to_be16(0); + } + + dpm_table->BAPM_TEMP_GRADIENT = cpu_to_be32(pt_defaults->bapm_temp_gradient); + def1 = pt_defaults->bapmti_r; + def2 = pt_defaults->bapmti_rc; + + for (i = 0; i < SMU7_DTE_ITERATIONS; i++) { + for (j = 0; j < SMU7_DTE_SOURCES; j++) { + for (k = 0; k < SMU7_DTE_SINKS; k++) { + dpm_table->BAPMTI_R[i][j][k] = cpu_to_be16(*def1); + dpm_table->BAPMTI_RC[i][j][k] = cpu_to_be16(*def2); + def1++; + def2++; + } + } + } + + return 0; +} + +static int ci_populate_pm_base(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + u32 pm_fuse_table_offset; + int ret; + + if (pi->caps_power_containment) { + ret = ci_read_smc_sram_dword(rdev, + SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU7_Firmware_Header, PmFuseTable), + &pm_fuse_table_offset, pi->sram_end); + if (ret) + return ret; + ret = ci_populate_bapm_vddc_vid_sidd(rdev); + if (ret) + return ret; + ret = ci_populate_vddc_vid(rdev); + if (ret) + return ret; + ret = ci_populate_svi_load_line(rdev); + if (ret) + return ret; + ret = ci_populate_tdc_limit(rdev); + if (ret) + return ret; + ret = ci_populate_dw8(rdev); + if (ret) + return ret; + ret = ci_min_max_v_gnbl_pm_lid_from_bapm_vddc(rdev); + if (ret) + return ret; + ret = ci_populate_bapm_vddc_base_leakage_sidd(rdev); + if (ret) + return ret; + ret = ci_copy_bytes_to_smc(rdev, pm_fuse_table_offset, + (u8 *)&pi->smc_powertune_table, + sizeof(SMU7_Discrete_PmFuses), pi->sram_end); + if (ret) + return ret; + } + + return 0; +} + +static void ci_do_enable_didt(struct radeon_device *rdev, const bool enable) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + u32 data; + + if (pi->caps_sq_ramping) { + data = RREG32_DIDT(DIDT_SQ_CTRL0); + if (enable) + data |= DIDT_CTRL_EN; + else + data &= ~DIDT_CTRL_EN; + WREG32_DIDT(DIDT_SQ_CTRL0, data); + } + + if (pi->caps_db_ramping) { + data = RREG32_DIDT(DIDT_DB_CTRL0); + if (enable) + data |= DIDT_CTRL_EN; + else + data &= ~DIDT_CTRL_EN; + WREG32_DIDT(DIDT_DB_CTRL0, data); + } + + if (pi->caps_td_ramping) { + data = RREG32_DIDT(DIDT_TD_CTRL0); + if (enable) + data |= DIDT_CTRL_EN; + else + data &= ~DIDT_CTRL_EN; + WREG32_DIDT(DIDT_TD_CTRL0, data); + } + + if (pi->caps_tcp_ramping) { + data = RREG32_DIDT(DIDT_TCP_CTRL0); + if (enable) + data |= DIDT_CTRL_EN; + else + data &= ~DIDT_CTRL_EN; + WREG32_DIDT(DIDT_TCP_CTRL0, data); + } +} + +static int ci_program_pt_config_registers(struct radeon_device *rdev, + const struct ci_pt_config_reg *cac_config_regs) +{ + const struct ci_pt_config_reg *config_regs = cac_config_regs; + u32 data; + u32 cache = 0; + + if (config_regs == NULL) + return -EINVAL; + + while (config_regs->offset != 0xFFFFFFFF) { + if (config_regs->type == CISLANDS_CONFIGREG_CACHE) { + cache |= ((config_regs->value << config_regs->shift) & config_regs->mask); + } else { + switch (config_regs->type) { + case CISLANDS_CONFIGREG_SMC_IND: + data = RREG32_SMC(config_regs->offset); + break; + case CISLANDS_CONFIGREG_DIDT_IND: + data = RREG32_DIDT(config_regs->offset); + break; + default: + data = RREG32(config_regs->offset << 2); + break; + } + + data &= ~config_regs->mask; + data |= ((config_regs->value << config_regs->shift) & config_regs->mask); + data |= cache; + + switch (config_regs->type) { + case CISLANDS_CONFIGREG_SMC_IND: + WREG32_SMC(config_regs->offset, data); + break; + case CISLANDS_CONFIGREG_DIDT_IND: + WREG32_DIDT(config_regs->offset, data); + break; + default: + WREG32(config_regs->offset << 2, data); + break; + } + cache = 0; + } + config_regs++; + } + return 0; +} + +static int ci_enable_didt(struct radeon_device *rdev, bool enable) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + int ret; + + if (pi->caps_sq_ramping || pi->caps_db_ramping || + pi->caps_td_ramping || pi->caps_tcp_ramping) { + cik_enter_rlc_safe_mode(rdev); + + if (enable) { + ret = ci_program_pt_config_registers(rdev, didt_config_ci); + if (ret) { + cik_exit_rlc_safe_mode(rdev); + return ret; + } + } + + ci_do_enable_didt(rdev, enable); + + cik_exit_rlc_safe_mode(rdev); + } + + return 0; +} + +static int ci_enable_power_containment(struct radeon_device *rdev, bool enable) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + PPSMC_Result smc_result; + int ret = 0; + + if (enable) { + pi->power_containment_features = 0; + if (pi->caps_power_containment) { + if (pi->enable_bapm_feature) { + smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_EnableDTE); + if (smc_result != PPSMC_Result_OK) + ret = -EINVAL; + else + pi->power_containment_features |= POWERCONTAINMENT_FEATURE_BAPM; + } + + if (pi->enable_tdc_limit_feature) { + smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_TDCLimitEnable); + if (smc_result != PPSMC_Result_OK) + ret = -EINVAL; + else + pi->power_containment_features |= POWERCONTAINMENT_FEATURE_TDCLimit; + } + + if (pi->enable_pkg_pwr_tracking_feature) { + smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_PkgPwrLimitEnable); + if (smc_result != PPSMC_Result_OK) { + ret = -EINVAL; + } else { + struct radeon_cac_tdp_table *cac_tdp_table = + rdev->pm.dpm.dyn_state.cac_tdp_table; + u32 default_pwr_limit = + (u32)(cac_tdp_table->maximum_power_delivery_limit * 256); + + pi->power_containment_features |= POWERCONTAINMENT_FEATURE_PkgPwrLimit; + + ci_set_power_limit(rdev, default_pwr_limit); + } + } + } + } else { + if (pi->caps_power_containment && pi->power_containment_features) { + if (pi->power_containment_features & POWERCONTAINMENT_FEATURE_TDCLimit) + ci_send_msg_to_smc(rdev, PPSMC_MSG_TDCLimitDisable); + + if (pi->power_containment_features & POWERCONTAINMENT_FEATURE_BAPM) + ci_send_msg_to_smc(rdev, PPSMC_MSG_DisableDTE); + + if (pi->power_containment_features & POWERCONTAINMENT_FEATURE_PkgPwrLimit) + ci_send_msg_to_smc(rdev, PPSMC_MSG_PkgPwrLimitDisable); + pi->power_containment_features = 0; + } + } + + return ret; +} + +static int ci_enable_smc_cac(struct radeon_device *rdev, bool enable) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + PPSMC_Result smc_result; + int ret = 0; + + if (pi->caps_cac) { + if (enable) { + smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_EnableCac); + if (smc_result != PPSMC_Result_OK) { + ret = -EINVAL; + pi->cac_enabled = false; + } else { + pi->cac_enabled = true; + } + } else if (pi->cac_enabled) { + ci_send_msg_to_smc(rdev, PPSMC_MSG_DisableCac); + pi->cac_enabled = false; + } + } + + return ret; +} + +static int ci_power_control_set_level(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct radeon_cac_tdp_table *cac_tdp_table = + rdev->pm.dpm.dyn_state.cac_tdp_table; + s32 adjust_percent; + s32 target_tdp; + int ret = 0; + bool adjust_polarity = false; /* ??? */ + + if (pi->caps_power_containment && + (pi->power_containment_features & POWERCONTAINMENT_FEATURE_BAPM)) { + adjust_percent = adjust_polarity ? + rdev->pm.dpm.tdp_adjustment : (-1 * rdev->pm.dpm.tdp_adjustment); + target_tdp = ((100 + adjust_percent) * + (s32)cac_tdp_table->configurable_tdp) / 100; + target_tdp *= 256; + + ret = ci_set_overdrive_target_tdp(rdev, (u32)target_tdp); + } + + return ret; +} + +static void ci_dpm_powergate_uvd(struct radeon_device *rdev, bool gate) +{ + ci_update_uvd_dpm(rdev, gate); +} + +static void ci_apply_state_adjust_rules(struct radeon_device *rdev, + struct radeon_ps *rps) +{ + struct ci_ps *ps = ci_get_ps(rps); + struct ci_power_info *pi = ci_get_pi(rdev); + struct radeon_clock_and_voltage_limits *max_limits; + bool disable_mclk_switching; + u32 sclk, mclk; + int i; + + if (rdev->pm.dpm.new_active_crtc_count > 1) + disable_mclk_switching = true; + else + disable_mclk_switching = false; + + if ((rps->class & ATOM_PPLIB_CLASSIFICATION_UI_MASK) == ATOM_PPLIB_CLASSIFICATION_UI_BATTERY) + pi->battery_state = true; + else + pi->battery_state = false; + + if (rdev->pm.dpm.ac_power) + max_limits = &rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac; + else + max_limits = &rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc; + + if (rdev->pm.dpm.ac_power == false) { + for (i = 0; i < ps->performance_level_count; i++) { + if (ps->performance_levels[i].mclk > max_limits->mclk) + ps->performance_levels[i].mclk = max_limits->mclk; + if (ps->performance_levels[i].sclk > max_limits->sclk) + ps->performance_levels[i].sclk = max_limits->sclk; + } + } + + /* XXX validate the min clocks required for display */ + + if (disable_mclk_switching) { + mclk = ps->performance_levels[ps->performance_level_count - 1].mclk; + sclk = ps->performance_levels[0].sclk; + } else { + mclk = ps->performance_levels[0].mclk; + sclk = ps->performance_levels[0].sclk; + } + + ps->performance_levels[0].sclk = sclk; + ps->performance_levels[0].mclk = mclk; + + if (ps->performance_levels[1].sclk < ps->performance_levels[0].sclk) + ps->performance_levels[1].sclk = ps->performance_levels[0].sclk; + + if (disable_mclk_switching) { + if (ps->performance_levels[0].mclk < ps->performance_levels[1].mclk) + ps->performance_levels[0].mclk = ps->performance_levels[1].mclk; + } else { + if (ps->performance_levels[1].mclk < ps->performance_levels[0].mclk) + ps->performance_levels[1].mclk = ps->performance_levels[0].mclk; + } +} + +static int ci_set_thermal_temperature_range(struct radeon_device *rdev, + int min_temp, int max_temp) +{ + int low_temp = 0 * 1000; + int high_temp = 255 * 1000; + u32 tmp; + + if (low_temp < min_temp) + low_temp = min_temp; + if (high_temp > max_temp) + high_temp = max_temp; + if (high_temp < low_temp) { + DRM_ERROR("invalid thermal range: %d - %d\n", low_temp, high_temp); + return -EINVAL; + } + + tmp = RREG32_SMC(CG_THERMAL_INT); + tmp &= ~(CI_DIG_THERM_INTH_MASK | CI_DIG_THERM_INTL_MASK); + tmp |= CI_DIG_THERM_INTH(high_temp / 1000) | + CI_DIG_THERM_INTL(low_temp / 1000); + WREG32_SMC(CG_THERMAL_INT, tmp); + +#if 0 + /* XXX: need to figure out how to handle this properly */ + tmp = RREG32_SMC(CG_THERMAL_CTRL); + tmp &= DIG_THERM_DPM_MASK; + tmp |= DIG_THERM_DPM(high_temp / 1000); + WREG32_SMC(CG_THERMAL_CTRL, tmp); +#endif + + return 0; +} + +#if 0 +static int ci_read_smc_soft_register(struct radeon_device *rdev, + u16 reg_offset, u32 *value) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + + return ci_read_smc_sram_dword(rdev, + pi->soft_regs_start + reg_offset, + value, pi->sram_end); +} +#endif + +static int ci_write_smc_soft_register(struct radeon_device *rdev, + u16 reg_offset, u32 value) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + + return ci_write_smc_sram_dword(rdev, + pi->soft_regs_start + reg_offset, + value, pi->sram_end); +} + +static void ci_init_fps_limits(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + SMU7_Discrete_DpmTable *table = &pi->smc_state_table; + + if (pi->caps_fps) { + u16 tmp; + + tmp = 45; + table->FpsHighT = cpu_to_be16(tmp); + + tmp = 30; + table->FpsLowT = cpu_to_be16(tmp); + } +} + +static int ci_update_sclk_t(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + int ret = 0; + u32 low_sclk_interrupt_t = 0; + + if (pi->caps_sclk_throttle_low_notification) { + low_sclk_interrupt_t = cpu_to_be32(pi->low_sclk_interrupt_t); + + ret = ci_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Discrete_DpmTable, LowSclkInterruptT), + (u8 *)&low_sclk_interrupt_t, + sizeof(u32), pi->sram_end); + + } + + return ret; +} + +static void ci_get_leakage_voltages(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + u16 leakage_id, virtual_voltage_id; + u16 vddc, vddci; + int i; + + pi->vddc_leakage.count = 0; + pi->vddci_leakage.count = 0; + + if (radeon_atom_get_leakage_id_from_vbios(rdev, &leakage_id) == 0) { + for (i = 0; i < CISLANDS_MAX_LEAKAGE_COUNT; i++) { + virtual_voltage_id = ATOM_VIRTUAL_VOLTAGE_ID0 + i; + if (radeon_atom_get_leakage_vddc_based_on_leakage_params(rdev, &vddc, &vddci, + virtual_voltage_id, + leakage_id) == 0) { + if (vddc != 0 && vddc != virtual_voltage_id) { + pi->vddc_leakage.actual_voltage[pi->vddc_leakage.count] = vddc; + pi->vddc_leakage.leakage_id[pi->vddc_leakage.count] = virtual_voltage_id; + pi->vddc_leakage.count++; + } + if (vddci != 0 && vddci != virtual_voltage_id) { + pi->vddci_leakage.actual_voltage[pi->vddci_leakage.count] = vddci; + pi->vddci_leakage.leakage_id[pi->vddci_leakage.count] = virtual_voltage_id; + pi->vddci_leakage.count++; + } + } + } + } +} + +static void ci_set_dpm_event_sources(struct radeon_device *rdev, u32 sources) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + bool want_thermal_protection; + enum radeon_dpm_event_src dpm_event_src; + u32 tmp; + + switch (sources) { + case 0: + default: + want_thermal_protection = false; + break; + case (1 << RADEON_DPM_AUTO_THROTTLE_SRC_THERMAL): + want_thermal_protection = true; + dpm_event_src = RADEON_DPM_EVENT_SRC_DIGITAL; + break; + case (1 << RADEON_DPM_AUTO_THROTTLE_SRC_EXTERNAL): + want_thermal_protection = true; + dpm_event_src = RADEON_DPM_EVENT_SRC_EXTERNAL; + break; + case ((1 << RADEON_DPM_AUTO_THROTTLE_SRC_EXTERNAL) | + (1 << RADEON_DPM_AUTO_THROTTLE_SRC_THERMAL)): + want_thermal_protection = true; + dpm_event_src = RADEON_DPM_EVENT_SRC_DIGIAL_OR_EXTERNAL; + break; + } + + if (want_thermal_protection) { +#if 0 + /* XXX: need to figure out how to handle this properly */ + tmp = RREG32_SMC(CG_THERMAL_CTRL); + tmp &= DPM_EVENT_SRC_MASK; + tmp |= DPM_EVENT_SRC(dpm_event_src); + WREG32_SMC(CG_THERMAL_CTRL, tmp); +#endif + + tmp = RREG32_SMC(GENERAL_PWRMGT); + if (pi->thermal_protection) + tmp &= ~THERMAL_PROTECTION_DIS; + else + tmp |= THERMAL_PROTECTION_DIS; + WREG32_SMC(GENERAL_PWRMGT, tmp); + } else { + tmp = RREG32_SMC(GENERAL_PWRMGT); + tmp |= THERMAL_PROTECTION_DIS; + WREG32_SMC(GENERAL_PWRMGT, tmp); + } +} + +static void ci_enable_auto_throttle_source(struct radeon_device *rdev, + enum radeon_dpm_auto_throttle_src source, + bool enable) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + + if (enable) { + if (!(pi->active_auto_throttle_sources & (1 << source))) { + pi->active_auto_throttle_sources |= 1 << source; + ci_set_dpm_event_sources(rdev, pi->active_auto_throttle_sources); + } + } else { + if (pi->active_auto_throttle_sources & (1 << source)) { + pi->active_auto_throttle_sources &= ~(1 << source); + ci_set_dpm_event_sources(rdev, pi->active_auto_throttle_sources); + } + } +} + +static void ci_enable_vr_hot_gpio_interrupt(struct radeon_device *rdev) +{ + if (rdev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_REGULATOR_HOT) + ci_send_msg_to_smc(rdev, PPSMC_MSG_EnableVRHotGPIOInterrupt); +} + +static int ci_unfreeze_sclk_mclk_dpm(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + PPSMC_Result smc_result; + + if (!pi->need_update_smu7_dpm_table) + return 0; + + if ((!pi->sclk_dpm_key_disabled) && + (pi->need_update_smu7_dpm_table & (DPMTABLE_OD_UPDATE_SCLK | DPMTABLE_UPDATE_SCLK))) { + smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_SCLKDPM_UnfreezeLevel); + if (smc_result != PPSMC_Result_OK) + return -EINVAL; + } + + if ((!pi->mclk_dpm_key_disabled) && + (pi->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_MCLK)) { + smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_MCLKDPM_UnfreezeLevel); + if (smc_result != PPSMC_Result_OK) + return -EINVAL; + } + + pi->need_update_smu7_dpm_table = 0; + return 0; +} + +static int ci_enable_sclk_mclk_dpm(struct radeon_device *rdev, bool enable) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + PPSMC_Result smc_result; + + if (enable) { + if (!pi->sclk_dpm_key_disabled) { + smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_DPM_Enable); + if (smc_result != PPSMC_Result_OK) + return -EINVAL; + } + + if (!pi->mclk_dpm_key_disabled) { + smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_MCLKDPM_Enable); + if (smc_result != PPSMC_Result_OK) + return -EINVAL; + + WREG32_P(MC_SEQ_CNTL_3, CAC_EN, ~CAC_EN); + + WREG32_SMC(LCAC_MC0_CNTL, 0x05); + WREG32_SMC(LCAC_MC1_CNTL, 0x05); + WREG32_SMC(LCAC_CPL_CNTL, 0x100005); + + udelay(10); + + WREG32_SMC(LCAC_MC0_CNTL, 0x400005); + WREG32_SMC(LCAC_MC1_CNTL, 0x400005); + WREG32_SMC(LCAC_CPL_CNTL, 0x500005); + } + } else { + if (!pi->sclk_dpm_key_disabled) { + smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_DPM_Disable); + if (smc_result != PPSMC_Result_OK) + return -EINVAL; + } + + if (!pi->mclk_dpm_key_disabled) { + smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_MCLKDPM_Disable); + if (smc_result != PPSMC_Result_OK) + return -EINVAL; + } + } + + return 0; +} + +static int ci_start_dpm(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + PPSMC_Result smc_result; + int ret; + u32 tmp; + + tmp = RREG32_SMC(GENERAL_PWRMGT); + tmp |= GLOBAL_PWRMGT_EN; + WREG32_SMC(GENERAL_PWRMGT, tmp); + + tmp = RREG32_SMC(SCLK_PWRMGT_CNTL); + tmp |= DYNAMIC_PM_EN; + WREG32_SMC(SCLK_PWRMGT_CNTL, tmp); + + ci_write_smc_soft_register(rdev, offsetof(SMU7_SoftRegisters, VoltageChangeTimeout), 0x1000); + + WREG32_P(BIF_LNCNT_RESET, 0, ~RESET_LNCNT_EN); + + smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_Voltage_Cntl_Enable); + if (smc_result != PPSMC_Result_OK) + return -EINVAL; + + ret = ci_enable_sclk_mclk_dpm(rdev, true); + if (ret) + return ret; + + if (!pi->pcie_dpm_key_disabled) { + smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_PCIeDPM_Enable); + if (smc_result != PPSMC_Result_OK) + return -EINVAL; + } + + return 0; +} + +static int ci_freeze_sclk_mclk_dpm(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + PPSMC_Result smc_result; + + if (!pi->need_update_smu7_dpm_table) + return 0; + + if ((!pi->sclk_dpm_key_disabled) && + (pi->need_update_smu7_dpm_table & (DPMTABLE_OD_UPDATE_SCLK | DPMTABLE_UPDATE_SCLK))) { + smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_SCLKDPM_FreezeLevel); + if (smc_result != PPSMC_Result_OK) + return -EINVAL; + } + + if ((!pi->mclk_dpm_key_disabled) && + (pi->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_MCLK)) { + smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_MCLKDPM_FreezeLevel); + if (smc_result != PPSMC_Result_OK) + return -EINVAL; + } + + return 0; +} + +static int ci_stop_dpm(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + PPSMC_Result smc_result; + int ret; + u32 tmp; + + tmp = RREG32_SMC(GENERAL_PWRMGT); + tmp &= ~GLOBAL_PWRMGT_EN; + WREG32_SMC(GENERAL_PWRMGT, tmp); + + tmp = RREG32(SCLK_PWRMGT_CNTL); + tmp &= ~DYNAMIC_PM_EN; + WREG32_SMC(SCLK_PWRMGT_CNTL, tmp); + + if (!pi->pcie_dpm_key_disabled) { + smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_PCIeDPM_Disable); + if (smc_result != PPSMC_Result_OK) + return -EINVAL; + } + + ret = ci_enable_sclk_mclk_dpm(rdev, false); + if (ret) + return ret; + + smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_Voltage_Cntl_Disable); + if (smc_result != PPSMC_Result_OK) + return -EINVAL; + + return 0; +} + +static void ci_enable_sclk_control(struct radeon_device *rdev, bool enable) +{ + u32 tmp = RREG32_SMC(SCLK_PWRMGT_CNTL); + + if (enable) + tmp &= ~SCLK_PWRMGT_OFF; + else + tmp |= SCLK_PWRMGT_OFF; + WREG32_SMC(SCLK_PWRMGT_CNTL, tmp); +} + +#if 0 +static int ci_notify_hw_of_power_source(struct radeon_device *rdev, + bool ac_power) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct radeon_cac_tdp_table *cac_tdp_table = + rdev->pm.dpm.dyn_state.cac_tdp_table; + u32 power_limit; + + if (ac_power) + power_limit = (u32)(cac_tdp_table->maximum_power_delivery_limit * 256); + else + power_limit = (u32)(cac_tdp_table->battery_power_limit * 256); + + ci_set_power_limit(rdev, power_limit); + + if (pi->caps_automatic_dc_transition) { + if (ac_power) + ci_send_msg_to_smc(rdev, PPSMC_MSG_RunningOnAC); + else + ci_send_msg_to_smc(rdev, PPSMC_MSG_Remove_DC_Clamp); + } + + return 0; +} +#endif + +static PPSMC_Result ci_send_msg_to_smc_with_parameter(struct radeon_device *rdev, + PPSMC_Msg msg, u32 parameter) +{ + WREG32(SMC_MSG_ARG_0, parameter); + return ci_send_msg_to_smc(rdev, msg); +} + +static PPSMC_Result ci_send_msg_to_smc_return_parameter(struct radeon_device *rdev, + PPSMC_Msg msg, u32 *parameter) +{ + PPSMC_Result smc_result; + + smc_result = ci_send_msg_to_smc(rdev, msg); + + if ((smc_result == PPSMC_Result_OK) && parameter) + *parameter = RREG32(SMC_MSG_ARG_0); + + return smc_result; +} + +static int ci_dpm_force_state_sclk(struct radeon_device *rdev, u32 n) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + + if (!pi->sclk_dpm_key_disabled) { + PPSMC_Result smc_result = + ci_send_msg_to_smc_with_parameter(rdev, PPSMC_MSG_DPM_ForceState, n); + if (smc_result != PPSMC_Result_OK) + return -EINVAL; + } + + return 0; +} + +static int ci_dpm_force_state_mclk(struct radeon_device *rdev, u32 n) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + + if (!pi->mclk_dpm_key_disabled) { + PPSMC_Result smc_result = + ci_send_msg_to_smc_with_parameter(rdev, PPSMC_MSG_MCLKDPM_ForceState, n); + if (smc_result != PPSMC_Result_OK) + return -EINVAL; + } + + return 0; +} + +static int ci_dpm_force_state_pcie(struct radeon_device *rdev, u32 n) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + + if (!pi->pcie_dpm_key_disabled) { + PPSMC_Result smc_result = + ci_send_msg_to_smc_with_parameter(rdev, PPSMC_MSG_PCIeDPM_ForceLevel, n); + if (smc_result != PPSMC_Result_OK) + return -EINVAL; + } + + return 0; +} + +static int ci_set_power_limit(struct radeon_device *rdev, u32 n) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + + if (pi->power_containment_features & POWERCONTAINMENT_FEATURE_PkgPwrLimit) { + PPSMC_Result smc_result = + ci_send_msg_to_smc_with_parameter(rdev, PPSMC_MSG_PkgPwrSetLimit, n); + if (smc_result != PPSMC_Result_OK) + return -EINVAL; + } + + return 0; +} + +static int ci_set_overdrive_target_tdp(struct radeon_device *rdev, + u32 target_tdp) +{ + PPSMC_Result smc_result = + ci_send_msg_to_smc_with_parameter(rdev, PPSMC_MSG_OverDriveSetTargetTdp, target_tdp); + if (smc_result != PPSMC_Result_OK) + return -EINVAL; + return 0; +} + +static int ci_set_boot_state(struct radeon_device *rdev) +{ + return ci_enable_sclk_mclk_dpm(rdev, false); +} + +static u32 ci_get_average_sclk_freq(struct radeon_device *rdev) +{ + u32 sclk_freq; + PPSMC_Result smc_result = + ci_send_msg_to_smc_return_parameter(rdev, + PPSMC_MSG_API_GetSclkFrequency, + &sclk_freq); + if (smc_result != PPSMC_Result_OK) + sclk_freq = 0; + + return sclk_freq; +} + +static u32 ci_get_average_mclk_freq(struct radeon_device *rdev) +{ + u32 mclk_freq; + PPSMC_Result smc_result = + ci_send_msg_to_smc_return_parameter(rdev, + PPSMC_MSG_API_GetMclkFrequency, + &mclk_freq); + if (smc_result != PPSMC_Result_OK) + mclk_freq = 0; + + return mclk_freq; +} + +static void ci_dpm_start_smc(struct radeon_device *rdev) +{ + int i; + + ci_program_jump_on_start(rdev); + ci_start_smc_clock(rdev); + ci_start_smc(rdev); + for (i = 0; i < rdev->usec_timeout; i++) { + if (RREG32_SMC(FIRMWARE_FLAGS) & INTERRUPTS_ENABLED) + break; + } +} + +static void ci_dpm_stop_smc(struct radeon_device *rdev) +{ + ci_reset_smc(rdev); + ci_stop_smc_clock(rdev); +} + +static int ci_process_firmware_header(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + u32 tmp; + int ret; + + ret = ci_read_smc_sram_dword(rdev, + SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU7_Firmware_Header, DpmTable), + &tmp, pi->sram_end); + if (ret) + return ret; + + pi->dpm_table_start = tmp; + + ret = ci_read_smc_sram_dword(rdev, + SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU7_Firmware_Header, SoftRegisters), + &tmp, pi->sram_end); + if (ret) + return ret; + + pi->soft_regs_start = tmp; + + ret = ci_read_smc_sram_dword(rdev, + SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU7_Firmware_Header, mcRegisterTable), + &tmp, pi->sram_end); + if (ret) + return ret; + + pi->mc_reg_table_start = tmp; + + ret = ci_read_smc_sram_dword(rdev, + SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU7_Firmware_Header, FanTable), + &tmp, pi->sram_end); + if (ret) + return ret; + + pi->fan_table_start = tmp; + + ret = ci_read_smc_sram_dword(rdev, + SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU7_Firmware_Header, mcArbDramTimingTable), + &tmp, pi->sram_end); + if (ret) + return ret; + + pi->arb_table_start = tmp; + + return 0; +} + +static void ci_read_clock_registers(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + + pi->clock_registers.cg_spll_func_cntl = + RREG32_SMC(CG_SPLL_FUNC_CNTL); + pi->clock_registers.cg_spll_func_cntl_2 = + RREG32_SMC(CG_SPLL_FUNC_CNTL_2); + pi->clock_registers.cg_spll_func_cntl_3 = + RREG32_SMC(CG_SPLL_FUNC_CNTL_3); + pi->clock_registers.cg_spll_func_cntl_4 = + RREG32_SMC(CG_SPLL_FUNC_CNTL_4); + pi->clock_registers.cg_spll_spread_spectrum = + RREG32_SMC(CG_SPLL_SPREAD_SPECTRUM); + pi->clock_registers.cg_spll_spread_spectrum_2 = + RREG32_SMC(CG_SPLL_SPREAD_SPECTRUM_2); + pi->clock_registers.dll_cntl = RREG32(DLL_CNTL); + pi->clock_registers.mclk_pwrmgt_cntl = RREG32(MCLK_PWRMGT_CNTL); + pi->clock_registers.mpll_ad_func_cntl = RREG32(MPLL_AD_FUNC_CNTL); + pi->clock_registers.mpll_dq_func_cntl = RREG32(MPLL_DQ_FUNC_CNTL); + pi->clock_registers.mpll_func_cntl = RREG32(MPLL_FUNC_CNTL); + pi->clock_registers.mpll_func_cntl_1 = RREG32(MPLL_FUNC_CNTL_1); + pi->clock_registers.mpll_func_cntl_2 = RREG32(MPLL_FUNC_CNTL_2); + pi->clock_registers.mpll_ss1 = RREG32(MPLL_SS1); + pi->clock_registers.mpll_ss2 = RREG32(MPLL_SS2); +} + +static void ci_init_sclk_t(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + + pi->low_sclk_interrupt_t = 0; +} + +static void ci_enable_thermal_protection(struct radeon_device *rdev, + bool enable) +{ + u32 tmp = RREG32_SMC(GENERAL_PWRMGT); + + if (enable) + tmp &= ~THERMAL_PROTECTION_DIS; + else + tmp |= THERMAL_PROTECTION_DIS; + WREG32_SMC(GENERAL_PWRMGT, tmp); +} + +static void ci_enable_acpi_power_management(struct radeon_device *rdev) +{ + u32 tmp = RREG32_SMC(GENERAL_PWRMGT); + + tmp |= STATIC_PM_EN; + + WREG32_SMC(GENERAL_PWRMGT, tmp); +} + +#if 0 +static int ci_enter_ulp_state(struct radeon_device *rdev) +{ + + WREG32(SMC_MESSAGE_0, PPSMC_MSG_SwitchToMinimumPower); + + udelay(25000); + + return 0; +} + +static int ci_exit_ulp_state(struct radeon_device *rdev) +{ + int i; + + WREG32(SMC_MESSAGE_0, PPSMC_MSG_ResumeFromMinimumPower); + + udelay(7000); + + for (i = 0; i < rdev->usec_timeout; i++) { + if (RREG32(SMC_RESP_0) == 1) + break; + udelay(1000); + } + + return 0; +} +#endif + +static int ci_notify_smc_display_change(struct radeon_device *rdev, + bool has_display) +{ + PPSMC_Msg msg = has_display ? PPSMC_MSG_HasDisplay : PPSMC_MSG_NoDisplay; + + return (ci_send_msg_to_smc(rdev, msg) == PPSMC_Result_OK) ? 0 : -EINVAL; +} + +static int ci_enable_ds_master_switch(struct radeon_device *rdev, + bool enable) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + + if (enable) { + if (pi->caps_sclk_ds) { + if (ci_send_msg_to_smc(rdev, PPSMC_MSG_MASTER_DeepSleep_ON) != PPSMC_Result_OK) + return -EINVAL; + } else { + if (ci_send_msg_to_smc(rdev, PPSMC_MSG_MASTER_DeepSleep_OFF) != PPSMC_Result_OK) + return -EINVAL; + } + } else { + if (pi->caps_sclk_ds) { + if (ci_send_msg_to_smc(rdev, PPSMC_MSG_MASTER_DeepSleep_OFF) != PPSMC_Result_OK) + return -EINVAL; + } + } + + return 0; +} + +static void ci_program_display_gap(struct radeon_device *rdev) +{ + u32 tmp = RREG32_SMC(CG_DISPLAY_GAP_CNTL); + u32 pre_vbi_time_in_us; + u32 frame_time_in_us; + u32 ref_clock = rdev->clock.spll.reference_freq; + u32 refresh_rate = r600_dpm_get_vrefresh(rdev); + u32 vblank_time = r600_dpm_get_vblank_time(rdev); + + tmp &= ~DISP_GAP_MASK; + if (rdev->pm.dpm.new_active_crtc_count > 0) + tmp |= DISP_GAP(R600_PM_DISPLAY_GAP_VBLANK_OR_WM); + else + tmp |= DISP_GAP(R600_PM_DISPLAY_GAP_IGNORE); + WREG32_SMC(CG_DISPLAY_GAP_CNTL, tmp); + + if (refresh_rate == 0) + refresh_rate = 60; + if (vblank_time == 0xffffffff) + vblank_time = 500; + frame_time_in_us = 1000000 / refresh_rate; + pre_vbi_time_in_us = + frame_time_in_us - 200 - vblank_time; + tmp = pre_vbi_time_in_us * (ref_clock / 100); + + WREG32_SMC(CG_DISPLAY_GAP_CNTL2, tmp); + ci_write_smc_soft_register(rdev, offsetof(SMU7_SoftRegisters, PreVBlankGap), 0x64); + ci_write_smc_soft_register(rdev, offsetof(SMU7_SoftRegisters, VBlankTimeout), (frame_time_in_us - pre_vbi_time_in_us)); + + + ci_notify_smc_display_change(rdev, (rdev->pm.dpm.new_active_crtc_count == 1)); + +} + +static void ci_enable_spread_spectrum(struct radeon_device *rdev, bool enable) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + u32 tmp; + + if (enable) { + if (pi->caps_sclk_ss_support) { + tmp = RREG32_SMC(GENERAL_PWRMGT); + tmp |= DYN_SPREAD_SPECTRUM_EN; + WREG32_SMC(GENERAL_PWRMGT, tmp); + } + } else { + tmp = RREG32_SMC(CG_SPLL_SPREAD_SPECTRUM); + tmp &= ~SSEN; + WREG32_SMC(CG_SPLL_SPREAD_SPECTRUM, tmp); + + tmp = RREG32_SMC(GENERAL_PWRMGT); + tmp &= ~DYN_SPREAD_SPECTRUM_EN; + WREG32_SMC(GENERAL_PWRMGT, tmp); + } +} + +static void ci_program_sstp(struct radeon_device *rdev) +{ + WREG32_SMC(CG_SSP, (SSTU(R600_SSTU_DFLT) | SST(R600_SST_DFLT))); +} + +static void ci_enable_display_gap(struct radeon_device *rdev) +{ + u32 tmp = RREG32_SMC(CG_DISPLAY_GAP_CNTL); + + tmp &= ~(DISP_GAP_MASK | DISP_GAP_MCHG_MASK); + tmp |= (DISP_GAP(R600_PM_DISPLAY_GAP_IGNORE) | + DISP_GAP_MCHG(R600_PM_DISPLAY_GAP_VBLANK)); + + WREG32_SMC(CG_DISPLAY_GAP_CNTL, tmp); +} + +static void ci_program_vc(struct radeon_device *rdev) +{ + u32 tmp; + + tmp = RREG32_SMC(SCLK_PWRMGT_CNTL); + tmp &= ~(RESET_SCLK_CNT | RESET_BUSY_CNT); + WREG32_SMC(SCLK_PWRMGT_CNTL, tmp); + + WREG32_SMC(CG_FTV_0, CISLANDS_VRC_DFLT0); + WREG32_SMC(CG_FTV_1, CISLANDS_VRC_DFLT1); + WREG32_SMC(CG_FTV_2, CISLANDS_VRC_DFLT2); + WREG32_SMC(CG_FTV_3, CISLANDS_VRC_DFLT3); + WREG32_SMC(CG_FTV_4, CISLANDS_VRC_DFLT4); + WREG32_SMC(CG_FTV_5, CISLANDS_VRC_DFLT5); + WREG32_SMC(CG_FTV_6, CISLANDS_VRC_DFLT6); + WREG32_SMC(CG_FTV_7, CISLANDS_VRC_DFLT7); +} + +static void ci_clear_vc(struct radeon_device *rdev) +{ + u32 tmp; + + tmp = RREG32_SMC(SCLK_PWRMGT_CNTL); + tmp |= (RESET_SCLK_CNT | RESET_BUSY_CNT); + WREG32_SMC(SCLK_PWRMGT_CNTL, tmp); + + WREG32_SMC(CG_FTV_0, 0); + WREG32_SMC(CG_FTV_1, 0); + WREG32_SMC(CG_FTV_2, 0); + WREG32_SMC(CG_FTV_3, 0); + WREG32_SMC(CG_FTV_4, 0); + WREG32_SMC(CG_FTV_5, 0); + WREG32_SMC(CG_FTV_6, 0); + WREG32_SMC(CG_FTV_7, 0); +} + +static int ci_upload_firmware(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + int i, ret; + + for (i = 0; i < rdev->usec_timeout; i++) { + if (RREG32_SMC(RCU_UC_EVENTS) & BOOT_SEQ_DONE) + break; + } + WREG32_SMC(SMC_SYSCON_MISC_CNTL, 1); + + ci_stop_smc_clock(rdev); + ci_reset_smc(rdev); + + ret = ci_load_smc_ucode(rdev, pi->sram_end); + + return ret; + +} + +static int ci_get_svi2_voltage_table(struct radeon_device *rdev, + struct radeon_clock_voltage_dependency_table *voltage_dependency_table, + struct atom_voltage_table *voltage_table) +{ + u32 i; + + if (voltage_dependency_table == NULL) + return -EINVAL; + + voltage_table->mask_low = 0; + voltage_table->phase_delay = 0; + + voltage_table->count = voltage_dependency_table->count; + for (i = 0; i < voltage_table->count; i++) { + voltage_table->entries[i].value = voltage_dependency_table->entries[i].v; + voltage_table->entries[i].smio_low = 0; + } + + return 0; +} + +static int ci_construct_voltage_tables(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + int ret; + + if (pi->voltage_control == CISLANDS_VOLTAGE_CONTROL_BY_GPIO) { + ret = radeon_atom_get_voltage_table(rdev, VOLTAGE_TYPE_VDDC, + VOLTAGE_OBJ_GPIO_LUT, + &pi->vddc_voltage_table); + if (ret) + return ret; + } else if (pi->voltage_control == CISLANDS_VOLTAGE_CONTROL_BY_SVID2) { + ret = ci_get_svi2_voltage_table(rdev, + &rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk, + &pi->vddc_voltage_table); + if (ret) + return ret; + } + + if (pi->vddc_voltage_table.count > SMU7_MAX_LEVELS_VDDC) + si_trim_voltage_table_to_fit_state_table(rdev, SMU7_MAX_LEVELS_VDDC, + &pi->vddc_voltage_table); + + if (pi->vddci_control == CISLANDS_VOLTAGE_CONTROL_BY_GPIO) { + ret = radeon_atom_get_voltage_table(rdev, VOLTAGE_TYPE_VDDCI, + VOLTAGE_OBJ_GPIO_LUT, + &pi->vddci_voltage_table); + if (ret) + return ret; + } else if (pi->vddci_control == CISLANDS_VOLTAGE_CONTROL_BY_SVID2) { + ret = ci_get_svi2_voltage_table(rdev, + &rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk, + &pi->vddci_voltage_table); + if (ret) + return ret; + } + + if (pi->vddci_voltage_table.count > SMU7_MAX_LEVELS_VDDCI) + si_trim_voltage_table_to_fit_state_table(rdev, SMU7_MAX_LEVELS_VDDCI, + &pi->vddci_voltage_table); + + if (pi->mvdd_control == CISLANDS_VOLTAGE_CONTROL_BY_GPIO) { + ret = radeon_atom_get_voltage_table(rdev, VOLTAGE_TYPE_MVDDC, + VOLTAGE_OBJ_GPIO_LUT, + &pi->mvdd_voltage_table); + if (ret) + return ret; + } else if (pi->mvdd_control == CISLANDS_VOLTAGE_CONTROL_BY_SVID2) { + ret = ci_get_svi2_voltage_table(rdev, + &rdev->pm.dpm.dyn_state.mvdd_dependency_on_mclk, + &pi->mvdd_voltage_table); + if (ret) + return ret; + } + + if (pi->mvdd_voltage_table.count > SMU7_MAX_LEVELS_MVDD) + si_trim_voltage_table_to_fit_state_table(rdev, SMU7_MAX_LEVELS_MVDD, + &pi->mvdd_voltage_table); + + return 0; +} + +static void ci_populate_smc_voltage_table(struct radeon_device *rdev, + struct atom_voltage_table_entry *voltage_table, + SMU7_Discrete_VoltageLevel *smc_voltage_table) +{ + int ret; + + ret = ci_get_std_voltage_value_sidd(rdev, voltage_table, + &smc_voltage_table->StdVoltageHiSidd, + &smc_voltage_table->StdVoltageLoSidd); + + if (ret) { + smc_voltage_table->StdVoltageHiSidd = voltage_table->value * VOLTAGE_SCALE; + smc_voltage_table->StdVoltageLoSidd = voltage_table->value * VOLTAGE_SCALE; + } + + smc_voltage_table->Voltage = cpu_to_be16(voltage_table->value * VOLTAGE_SCALE); + smc_voltage_table->StdVoltageHiSidd = + cpu_to_be16(smc_voltage_table->StdVoltageHiSidd); + smc_voltage_table->StdVoltageLoSidd = + cpu_to_be16(smc_voltage_table->StdVoltageLoSidd); +} + +static int ci_populate_smc_vddc_table(struct radeon_device *rdev, + SMU7_Discrete_DpmTable *table) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + unsigned int count; + + table->VddcLevelCount = pi->vddc_voltage_table.count; + for (count = 0; count < table->VddcLevelCount; count++) { + ci_populate_smc_voltage_table(rdev, + &pi->vddc_voltage_table.entries[count], + &table->VddcLevel[count]); + + if (pi->voltage_control == CISLANDS_VOLTAGE_CONTROL_BY_GPIO) + table->VddcLevel[count].Smio |= + pi->vddc_voltage_table.entries[count].smio_low; + else + table->VddcLevel[count].Smio = 0; + } + table->VddcLevelCount = cpu_to_be32(table->VddcLevelCount); + + return 0; +} + +static int ci_populate_smc_vddci_table(struct radeon_device *rdev, + SMU7_Discrete_DpmTable *table) +{ + unsigned int count; + struct ci_power_info *pi = ci_get_pi(rdev); + + table->VddciLevelCount = pi->vddci_voltage_table.count; + for (count = 0; count < table->VddciLevelCount; count++) { + ci_populate_smc_voltage_table(rdev, + &pi->vddci_voltage_table.entries[count], + &table->VddciLevel[count]); + + if (pi->vddci_control == CISLANDS_VOLTAGE_CONTROL_BY_GPIO) + table->VddciLevel[count].Smio |= + pi->vddci_voltage_table.entries[count].smio_low; + else + table->VddciLevel[count].Smio = 0; + } + table->VddciLevelCount = cpu_to_be32(table->VddciLevelCount); + + return 0; +} + +static int ci_populate_smc_mvdd_table(struct radeon_device *rdev, + SMU7_Discrete_DpmTable *table) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + unsigned int count; + + table->MvddLevelCount = pi->mvdd_voltage_table.count; + for (count = 0; count < table->MvddLevelCount; count++) { + ci_populate_smc_voltage_table(rdev, + &pi->mvdd_voltage_table.entries[count], + &table->MvddLevel[count]); + + if (pi->mvdd_control == CISLANDS_VOLTAGE_CONTROL_BY_GPIO) + table->MvddLevel[count].Smio |= + pi->mvdd_voltage_table.entries[count].smio_low; + else + table->MvddLevel[count].Smio = 0; + } + table->MvddLevelCount = cpu_to_be32(table->MvddLevelCount); + + return 0; +} + +static int ci_populate_smc_voltage_tables(struct radeon_device *rdev, + SMU7_Discrete_DpmTable *table) +{ + int ret; + + ret = ci_populate_smc_vddc_table(rdev, table); + if (ret) + return ret; + + ret = ci_populate_smc_vddci_table(rdev, table); + if (ret) + return ret; + + ret = ci_populate_smc_mvdd_table(rdev, table); + if (ret) + return ret; + + return 0; +} + +static int ci_populate_mvdd_value(struct radeon_device *rdev, u32 mclk, + SMU7_Discrete_VoltageLevel *voltage) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + u32 i = 0; + + if (pi->mvdd_control != CISLANDS_VOLTAGE_CONTROL_NONE) { + for (i = 0; i < rdev->pm.dpm.dyn_state.mvdd_dependency_on_mclk.count; i++) { + if (mclk <= rdev->pm.dpm.dyn_state.mvdd_dependency_on_mclk.entries[i].clk) { + voltage->Voltage = pi->mvdd_voltage_table.entries[i].value; + break; + } + } + + if (i >= rdev->pm.dpm.dyn_state.mvdd_dependency_on_mclk.count) + return -EINVAL; + } + + return -EINVAL; +} + +static int ci_get_std_voltage_value_sidd(struct radeon_device *rdev, + struct atom_voltage_table_entry *voltage_table, + u16 *std_voltage_hi_sidd, u16 *std_voltage_lo_sidd) +{ + u16 v_index, idx; + bool voltage_found = false; + *std_voltage_hi_sidd = voltage_table->value * VOLTAGE_SCALE; + *std_voltage_lo_sidd = voltage_table->value * VOLTAGE_SCALE; + + if (rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.entries == NULL) + return -EINVAL; + + if (rdev->pm.dpm.dyn_state.cac_leakage_table.entries) { + for (v_index = 0; (u32)v_index < rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.count; v_index++) { + if (voltage_table->value == + rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.entries[v_index].v) { + voltage_found = true; + if ((u32)v_index < rdev->pm.dpm.dyn_state.cac_leakage_table.count) + idx = v_index; + else + idx = rdev->pm.dpm.dyn_state.cac_leakage_table.count - 1; + *std_voltage_lo_sidd = + rdev->pm.dpm.dyn_state.cac_leakage_table.entries[idx].vddc * VOLTAGE_SCALE; + *std_voltage_hi_sidd = + rdev->pm.dpm.dyn_state.cac_leakage_table.entries[idx].leakage * VOLTAGE_SCALE; + break; + } + } + + if (!voltage_found) { + for (v_index = 0; (u32)v_index < rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.count; v_index++) { + if (voltage_table->value <= + rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.entries[v_index].v) { + voltage_found = true; + if ((u32)v_index < rdev->pm.dpm.dyn_state.cac_leakage_table.count) + idx = v_index; + else + idx = rdev->pm.dpm.dyn_state.cac_leakage_table.count - 1; + *std_voltage_lo_sidd = + rdev->pm.dpm.dyn_state.cac_leakage_table.entries[idx].vddc * VOLTAGE_SCALE; + *std_voltage_hi_sidd = + rdev->pm.dpm.dyn_state.cac_leakage_table.entries[idx].leakage * VOLTAGE_SCALE; + break; + } + } + } + } + + return 0; +} + +static void ci_populate_phase_value_based_on_sclk(struct radeon_device *rdev, + const struct radeon_phase_shedding_limits_table *limits, + u32 sclk, + u32 *phase_shedding) +{ + unsigned int i; + + *phase_shedding = 1; + + for (i = 0; i < limits->count; i++) { + if (sclk < limits->entries[i].sclk) { + *phase_shedding = i; + break; + } + } +} + +static void ci_populate_phase_value_based_on_mclk(struct radeon_device *rdev, + const struct radeon_phase_shedding_limits_table *limits, + u32 mclk, + u32 *phase_shedding) +{ + unsigned int i; + + *phase_shedding = 1; + + for (i = 0; i < limits->count; i++) { + if (mclk < limits->entries[i].mclk) { + *phase_shedding = i; + break; + } + } +} + +static int ci_init_arb_table_index(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + u32 tmp; + int ret; + + ret = ci_read_smc_sram_dword(rdev, pi->arb_table_start, + &tmp, pi->sram_end); + if (ret) + return ret; + + tmp &= 0x00FFFFFF; + tmp |= MC_CG_ARB_FREQ_F1 << 24; + + return ci_write_smc_sram_dword(rdev, pi->arb_table_start, + tmp, pi->sram_end); +} + +static int ci_get_dependency_volt_by_clk(struct radeon_device *rdev, + struct radeon_clock_voltage_dependency_table *allowed_clock_voltage_table, + u32 clock, u32 *voltage) +{ + u32 i = 0; + + if (allowed_clock_voltage_table->count == 0) + return -EINVAL; + + for (i = 0; i < allowed_clock_voltage_table->count; i++) { + if (allowed_clock_voltage_table->entries[i].clk >= clock) { + *voltage = allowed_clock_voltage_table->entries[i].v; + return 0; + } + } + + *voltage = allowed_clock_voltage_table->entries[i-1].v; + + return 0; +} + +static u8 ci_get_sleep_divider_id_from_clock(struct radeon_device *rdev, + u32 sclk, u32 min_sclk_in_sr) +{ + u32 i; + u32 tmp; + u32 min = (min_sclk_in_sr > CISLAND_MINIMUM_ENGINE_CLOCK) ? + min_sclk_in_sr : CISLAND_MINIMUM_ENGINE_CLOCK; + + if (sclk < min) + return 0; + + for (i = CISLAND_MAX_DEEPSLEEP_DIVIDER_ID; ; i--) { + tmp = sclk / (1 << i); + if (tmp >= min || i == 0) + break; + } + + return (u8)i; +} + +static int ci_initial_switch_from_arb_f0_to_f1(struct radeon_device *rdev) +{ + return ni_copy_and_switch_arb_sets(rdev, MC_CG_ARB_FREQ_F0, MC_CG_ARB_FREQ_F1); +} + +static int ci_reset_to_default(struct radeon_device *rdev) +{ + return (ci_send_msg_to_smc(rdev, PPSMC_MSG_ResetToDefaults) == PPSMC_Result_OK) ? + 0 : -EINVAL; +} + +static int ci_force_switch_to_arb_f0(struct radeon_device *rdev) +{ + u32 tmp; + + tmp = (RREG32_SMC(SMC_SCRATCH9) & 0x0000ff00) >> 8; + + if (tmp == MC_CG_ARB_FREQ_F0) + return 0; + + return ni_copy_and_switch_arb_sets(rdev, tmp, MC_CG_ARB_FREQ_F0); +} + +static int ci_populate_memory_timing_parameters(struct radeon_device *rdev, + u32 sclk, + u32 mclk, + SMU7_Discrete_MCArbDramTimingTableEntry *arb_regs) +{ + u32 dram_timing; + u32 dram_timing2; + u32 burst_time; + + radeon_atom_set_engine_dram_timings(rdev, sclk, mclk); + + dram_timing = RREG32(MC_ARB_DRAM_TIMING); + dram_timing2 = RREG32(MC_ARB_DRAM_TIMING2); + burst_time = RREG32(MC_ARB_BURST_TIME) & STATE0_MASK; + + arb_regs->McArbDramTiming = cpu_to_be32(dram_timing); + arb_regs->McArbDramTiming2 = cpu_to_be32(dram_timing2); + arb_regs->McArbBurstTime = (u8)burst_time; + + return 0; +} + +static int ci_do_program_memory_timing_parameters(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + SMU7_Discrete_MCArbDramTimingTable arb_regs; + u32 i, j; + int ret = 0; + + memset(&arb_regs, 0, sizeof(SMU7_Discrete_MCArbDramTimingTable)); + + for (i = 0; i < pi->dpm_table.sclk_table.count; i++) { + for (j = 0; j < pi->dpm_table.mclk_table.count; j++) { + ret = ci_populate_memory_timing_parameters(rdev, + pi->dpm_table.sclk_table.dpm_levels[i].value, + pi->dpm_table.mclk_table.dpm_levels[j].value, + &arb_regs.entries[i][j]); + if (ret) + break; + } + } + + if (ret == 0) + ret = ci_copy_bytes_to_smc(rdev, + pi->arb_table_start, + (u8 *)&arb_regs, + sizeof(SMU7_Discrete_MCArbDramTimingTable), + pi->sram_end); + + return ret; +} + +static int ci_program_memory_timing_parameters(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + + if (pi->need_update_smu7_dpm_table == 0) + return 0; + + return ci_do_program_memory_timing_parameters(rdev); +} + +static void ci_populate_smc_initial_state(struct radeon_device *rdev, + struct radeon_ps *radeon_boot_state) +{ + struct ci_ps *boot_state = ci_get_ps(radeon_boot_state); + struct ci_power_info *pi = ci_get_pi(rdev); + u32 level = 0; + + for (level = 0; level < rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.count; level++) { + if (rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.entries[level].clk >= + boot_state->performance_levels[0].sclk) { + pi->smc_state_table.GraphicsBootLevel = level; + break; + } + } + + for (level = 0; level < rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk.count; level++) { + if (rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk.entries[level].clk >= + boot_state->performance_levels[0].mclk) { + pi->smc_state_table.MemoryBootLevel = level; + break; + } + } +} + +static u32 ci_get_dpm_level_enable_mask_value(struct ci_single_dpm_table *dpm_table) +{ + u32 i; + u32 mask_value = 0; + + for (i = dpm_table->count; i > 0; i--) { + mask_value = mask_value << 1; + if (dpm_table->dpm_levels[i-1].enabled) + mask_value |= 0x1; + else + mask_value &= 0xFFFFFFFE; + } + + return mask_value; +} + +static void ci_populate_smc_link_level(struct radeon_device *rdev, + SMU7_Discrete_DpmTable *table) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct ci_dpm_table *dpm_table = &pi->dpm_table; + u32 i; + + for (i = 0; i < dpm_table->pcie_speed_table.count; i++) { + table->LinkLevel[i].PcieGenSpeed = + (u8)dpm_table->pcie_speed_table.dpm_levels[i].value; + table->LinkLevel[i].PcieLaneCount = + r600_encode_pci_lane_width(dpm_table->pcie_speed_table.dpm_levels[i].param1); + table->LinkLevel[i].EnabledForActivity = 1; + table->LinkLevel[i].DownT = cpu_to_be32(5); + table->LinkLevel[i].UpT = cpu_to_be32(30); + } + + pi->smc_state_table.LinkLevelCount = (u8)dpm_table->pcie_speed_table.count; + pi->dpm_level_enable_mask.pcie_dpm_enable_mask = + ci_get_dpm_level_enable_mask_value(&dpm_table->pcie_speed_table); +} + +static int ci_populate_smc_uvd_level(struct radeon_device *rdev, + SMU7_Discrete_DpmTable *table) +{ + u32 count; + struct atom_clock_dividers dividers; + int ret = -EINVAL; + + table->UvdLevelCount = + rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.count; + + for (count = 0; count < table->UvdLevelCount; count++) { + table->UvdLevel[count].VclkFrequency = + rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries[count].vclk; + table->UvdLevel[count].DclkFrequency = + rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries[count].dclk; + table->UvdLevel[count].MinVddc = + rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries[count].v * VOLTAGE_SCALE; + table->UvdLevel[count].MinVddcPhases = 1; + + ret = radeon_atom_get_clock_dividers(rdev, + COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK, + table->UvdLevel[count].VclkFrequency, false, ÷rs); + if (ret) + return ret; + + table->UvdLevel[count].VclkDivider = (u8)dividers.post_divider; + + ret = radeon_atom_get_clock_dividers(rdev, + COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK, + table->UvdLevel[count].DclkFrequency, false, ÷rs); + if (ret) + return ret; + + table->UvdLevel[count].DclkDivider = (u8)dividers.post_divider; + + table->UvdLevel[count].VclkFrequency = cpu_to_be32(table->UvdLevel[count].VclkFrequency); + table->UvdLevel[count].DclkFrequency = cpu_to_be32(table->UvdLevel[count].DclkFrequency); + table->UvdLevel[count].MinVddc = cpu_to_be16(table->UvdLevel[count].MinVddc); + } + + return ret; +} + +static int ci_populate_smc_vce_level(struct radeon_device *rdev, + SMU7_Discrete_DpmTable *table) +{ + u32 count; + struct atom_clock_dividers dividers; + int ret = -EINVAL; + + table->VceLevelCount = + rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.count; + + for (count = 0; count < table->VceLevelCount; count++) { + table->VceLevel[count].Frequency = + rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries[count].evclk; + table->VceLevel[count].MinVoltage = + (u16)rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries[count].v * VOLTAGE_SCALE; + table->VceLevel[count].MinPhases = 1; + + ret = radeon_atom_get_clock_dividers(rdev, + COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK, + table->VceLevel[count].Frequency, false, ÷rs); + if (ret) + return ret; + + table->VceLevel[count].Divider = (u8)dividers.post_divider; + + table->VceLevel[count].Frequency = cpu_to_be32(table->VceLevel[count].Frequency); + table->VceLevel[count].MinVoltage = cpu_to_be16(table->VceLevel[count].MinVoltage); + } + + return ret; + +} + +static int ci_populate_smc_acp_level(struct radeon_device *rdev, + SMU7_Discrete_DpmTable *table) +{ + u32 count; + struct atom_clock_dividers dividers; + int ret = -EINVAL; + + table->AcpLevelCount = (u8) + (rdev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.count); + + for (count = 0; count < table->AcpLevelCount; count++) { + table->AcpLevel[count].Frequency = + rdev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.entries[count].clk; + table->AcpLevel[count].MinVoltage = + rdev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.entries[count].v; + table->AcpLevel[count].MinPhases = 1; + + ret = radeon_atom_get_clock_dividers(rdev, + COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK, + table->AcpLevel[count].Frequency, false, ÷rs); + if (ret) + return ret; + + table->AcpLevel[count].Divider = (u8)dividers.post_divider; + + table->AcpLevel[count].Frequency = cpu_to_be32(table->AcpLevel[count].Frequency); + table->AcpLevel[count].MinVoltage = cpu_to_be16(table->AcpLevel[count].MinVoltage); + } + + return ret; +} + +static int ci_populate_smc_samu_level(struct radeon_device *rdev, + SMU7_Discrete_DpmTable *table) +{ + u32 count; + struct atom_clock_dividers dividers; + int ret = -EINVAL; + + table->SamuLevelCount = + rdev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.count; + + for (count = 0; count < table->SamuLevelCount; count++) { + table->SamuLevel[count].Frequency = + rdev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.entries[count].clk; + table->SamuLevel[count].MinVoltage = + rdev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.entries[count].v * VOLTAGE_SCALE; + table->SamuLevel[count].MinPhases = 1; + + ret = radeon_atom_get_clock_dividers(rdev, + COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK, + table->SamuLevel[count].Frequency, false, ÷rs); + if (ret) + return ret; + + table->SamuLevel[count].Divider = (u8)dividers.post_divider; + + table->SamuLevel[count].Frequency = cpu_to_be32(table->SamuLevel[count].Frequency); + table->SamuLevel[count].MinVoltage = cpu_to_be16(table->SamuLevel[count].MinVoltage); + } + + return ret; +} + +static int ci_calculate_mclk_params(struct radeon_device *rdev, + u32 memory_clock, + SMU7_Discrete_MemoryLevel *mclk, + bool strobe_mode, + bool dll_state_on) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + u32 dll_cntl = pi->clock_registers.dll_cntl; + u32 mclk_pwrmgt_cntl = pi->clock_registers.mclk_pwrmgt_cntl; + u32 mpll_ad_func_cntl = pi->clock_registers.mpll_ad_func_cntl; + u32 mpll_dq_func_cntl = pi->clock_registers.mpll_dq_func_cntl; + u32 mpll_func_cntl = pi->clock_registers.mpll_func_cntl; + u32 mpll_func_cntl_1 = pi->clock_registers.mpll_func_cntl_1; + u32 mpll_func_cntl_2 = pi->clock_registers.mpll_func_cntl_2; + u32 mpll_ss1 = pi->clock_registers.mpll_ss1; + u32 mpll_ss2 = pi->clock_registers.mpll_ss2; + struct atom_mpll_param mpll_param; + int ret; + + ret = radeon_atom_get_memory_pll_dividers(rdev, memory_clock, strobe_mode, &mpll_param); + if (ret) + return ret; + + mpll_func_cntl &= ~BWCTRL_MASK; + mpll_func_cntl |= BWCTRL(mpll_param.bwcntl); + + mpll_func_cntl_1 &= ~(CLKF_MASK | CLKFRAC_MASK | VCO_MODE_MASK); + mpll_func_cntl_1 |= CLKF(mpll_param.clkf) | + CLKFRAC(mpll_param.clkfrac) | VCO_MODE(mpll_param.vco_mode); + + mpll_ad_func_cntl &= ~YCLK_POST_DIV_MASK; + mpll_ad_func_cntl |= YCLK_POST_DIV(mpll_param.post_div); + + if (pi->mem_gddr5) { + mpll_dq_func_cntl &= ~(YCLK_SEL_MASK | YCLK_POST_DIV_MASK); + mpll_dq_func_cntl |= YCLK_SEL(mpll_param.yclk_sel) | + YCLK_POST_DIV(mpll_param.post_div); + } + + if (pi->caps_mclk_ss_support) { + struct radeon_atom_ss ss; + u32 freq_nom; + u32 tmp; + u32 reference_clock = rdev->clock.mpll.reference_freq; + + if (pi->mem_gddr5) + freq_nom = memory_clock * 4; + else + freq_nom = memory_clock * 2; + + tmp = (freq_nom / reference_clock); + tmp = tmp * tmp; + if (radeon_atombios_get_asic_ss_info(rdev, &ss, + ASIC_INTERNAL_MEMORY_SS, freq_nom)) { + u32 clks = reference_clock * 5 / ss.rate; + u32 clkv = (u32)((((131 * ss.percentage * ss.rate) / 100) * tmp) / freq_nom); + + mpll_ss1 &= ~CLKV_MASK; + mpll_ss1 |= CLKV(clkv); + + mpll_ss2 &= ~CLKS_MASK; + mpll_ss2 |= CLKS(clks); + } + } + + mclk_pwrmgt_cntl &= ~DLL_SPEED_MASK; + mclk_pwrmgt_cntl |= DLL_SPEED(mpll_param.dll_speed); + + if (dll_state_on) + mclk_pwrmgt_cntl |= MRDCK0_PDNB | MRDCK1_PDNB; + else + mclk_pwrmgt_cntl &= ~(MRDCK0_PDNB | MRDCK1_PDNB); + + mclk->MclkFrequency = memory_clock; + mclk->MpllFuncCntl = mpll_func_cntl; + mclk->MpllFuncCntl_1 = mpll_func_cntl_1; + mclk->MpllFuncCntl_2 = mpll_func_cntl_2; + mclk->MpllAdFuncCntl = mpll_ad_func_cntl; + mclk->MpllDqFuncCntl = mpll_dq_func_cntl; + mclk->MclkPwrmgtCntl = mclk_pwrmgt_cntl; + mclk->DllCntl = dll_cntl; + mclk->MpllSs1 = mpll_ss1; + mclk->MpllSs2 = mpll_ss2; + + return 0; +} + +static int ci_populate_single_memory_level(struct radeon_device *rdev, + u32 memory_clock, + SMU7_Discrete_MemoryLevel *memory_level) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + int ret; + bool dll_state_on; + + if (rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk.entries) { + ret = ci_get_dependency_volt_by_clk(rdev, + &rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk, + memory_clock, &memory_level->MinVddc); + if (ret) + return ret; + } + + if (rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk.entries) { + ret = ci_get_dependency_volt_by_clk(rdev, + &rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk, + memory_clock, &memory_level->MinVddci); + if (ret) + return ret; + } + + if (rdev->pm.dpm.dyn_state.mvdd_dependency_on_mclk.entries) { + ret = ci_get_dependency_volt_by_clk(rdev, + &rdev->pm.dpm.dyn_state.mvdd_dependency_on_mclk, + memory_clock, &memory_level->MinMvdd); + if (ret) + return ret; + } + + memory_level->MinVddcPhases = 1; + + if (pi->vddc_phase_shed_control) + ci_populate_phase_value_based_on_mclk(rdev, + &rdev->pm.dpm.dyn_state.phase_shedding_limits_table, + memory_clock, + &memory_level->MinVddcPhases); + + memory_level->EnabledForThrottle = 1; + memory_level->EnabledForActivity = 1; + memory_level->UpH = 0; + memory_level->DownH = 100; + memory_level->VoltageDownH = 0; + memory_level->ActivityLevel = (u16)pi->mclk_activity_target; + + memory_level->StutterEnable = false; + memory_level->StrobeEnable = false; + memory_level->EdcReadEnable = false; + memory_level->EdcWriteEnable = false; + memory_level->RttEnable = false; + + memory_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW; + + if (pi->mclk_stutter_mode_threshold && + (memory_clock <= pi->mclk_stutter_mode_threshold) && + (pi->uvd_enabled == false) && + (RREG32(DPG_PIPE_STUTTER_CONTROL) & STUTTER_ENABLE) && + (rdev->pm.dpm.new_active_crtc_count <= 2)) + memory_level->StutterEnable = true; + + if (pi->mclk_strobe_mode_threshold && + (memory_clock <= pi->mclk_strobe_mode_threshold)) + memory_level->StrobeEnable = 1; + + if (pi->mem_gddr5) { + memory_level->StrobeRatio = + si_get_mclk_frequency_ratio(memory_clock, memory_level->StrobeEnable); + if (pi->mclk_edc_enable_threshold && + (memory_clock > pi->mclk_edc_enable_threshold)) + memory_level->EdcReadEnable = true; + + if (pi->mclk_edc_wr_enable_threshold && + (memory_clock > pi->mclk_edc_wr_enable_threshold)) + memory_level->EdcWriteEnable = true; + + if (memory_level->StrobeEnable) { + if (si_get_mclk_frequency_ratio(memory_clock, true) >= + ((RREG32(MC_SEQ_MISC7) >> 16) & 0xf)) + dll_state_on = ((RREG32(MC_SEQ_MISC5) >> 1) & 0x1) ? true : false; + else + dll_state_on = ((RREG32(MC_SEQ_MISC6) >> 1) & 0x1) ? true : false; + } else { + dll_state_on = pi->dll_default_on; + } + } else { + memory_level->StrobeRatio = si_get_ddr3_mclk_frequency_ratio(memory_clock); + dll_state_on = ((RREG32(MC_SEQ_MISC5) >> 1) & 0x1) ? true : false; + } + + ret = ci_calculate_mclk_params(rdev, memory_clock, memory_level, memory_level->StrobeEnable, dll_state_on); + if (ret) + return ret; + + memory_level->MinVddc = cpu_to_be32(memory_level->MinVddc * VOLTAGE_SCALE); + memory_level->MinVddcPhases = cpu_to_be32(memory_level->MinVddcPhases); + memory_level->MinVddci = cpu_to_be32(memory_level->MinVddci * VOLTAGE_SCALE); + memory_level->MinMvdd = cpu_to_be32(memory_level->MinMvdd * VOLTAGE_SCALE); + + memory_level->MclkFrequency = cpu_to_be32(memory_level->MclkFrequency); + memory_level->ActivityLevel = cpu_to_be16(memory_level->ActivityLevel); + memory_level->MpllFuncCntl = cpu_to_be32(memory_level->MpllFuncCntl); + memory_level->MpllFuncCntl_1 = cpu_to_be32(memory_level->MpllFuncCntl_1); + memory_level->MpllFuncCntl_2 = cpu_to_be32(memory_level->MpllFuncCntl_2); + memory_level->MpllAdFuncCntl = cpu_to_be32(memory_level->MpllAdFuncCntl); + memory_level->MpllDqFuncCntl = cpu_to_be32(memory_level->MpllDqFuncCntl); + memory_level->MclkPwrmgtCntl = cpu_to_be32(memory_level->MclkPwrmgtCntl); + memory_level->DllCntl = cpu_to_be32(memory_level->DllCntl); + memory_level->MpllSs1 = cpu_to_be32(memory_level->MpllSs1); + memory_level->MpllSs2 = cpu_to_be32(memory_level->MpllSs2); + + return 0; +} + +static int ci_populate_smc_acpi_level(struct radeon_device *rdev, + SMU7_Discrete_DpmTable *table) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct atom_clock_dividers dividers; + SMU7_Discrete_VoltageLevel voltage_level; + u32 spll_func_cntl = pi->clock_registers.cg_spll_func_cntl; + u32 spll_func_cntl_2 = pi->clock_registers.cg_spll_func_cntl_2; + u32 dll_cntl = pi->clock_registers.dll_cntl; + u32 mclk_pwrmgt_cntl = pi->clock_registers.mclk_pwrmgt_cntl; + int ret; + + table->ACPILevel.Flags &= ~PPSMC_SWSTATE_FLAG_DC; + + if (pi->acpi_vddc) + table->ACPILevel.MinVddc = cpu_to_be32(pi->acpi_vddc * VOLTAGE_SCALE); + else + table->ACPILevel.MinVddc = cpu_to_be32(pi->min_vddc_in_pp_table * VOLTAGE_SCALE); + + table->ACPILevel.MinVddcPhases = pi->vddc_phase_shed_control ? 0 : 1; + + table->ACPILevel.SclkFrequency = rdev->clock.spll.reference_freq; + + ret = radeon_atom_get_clock_dividers(rdev, + COMPUTE_GPUCLK_INPUT_FLAG_SCLK, + table->ACPILevel.SclkFrequency, false, ÷rs); + if (ret) + return ret; + + table->ACPILevel.SclkDid = (u8)dividers.post_divider; + table->ACPILevel.DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW; + table->ACPILevel.DeepSleepDivId = 0; + + spll_func_cntl &= ~SPLL_PWRON; + spll_func_cntl |= SPLL_RESET; + + spll_func_cntl_2 &= ~SCLK_MUX_SEL_MASK; + spll_func_cntl_2 |= SCLK_MUX_SEL(4); + + table->ACPILevel.CgSpllFuncCntl = spll_func_cntl; + table->ACPILevel.CgSpllFuncCntl2 = spll_func_cntl_2; + table->ACPILevel.CgSpllFuncCntl3 = pi->clock_registers.cg_spll_func_cntl_3; + table->ACPILevel.CgSpllFuncCntl4 = pi->clock_registers.cg_spll_func_cntl_4; + table->ACPILevel.SpllSpreadSpectrum = pi->clock_registers.cg_spll_spread_spectrum; + table->ACPILevel.SpllSpreadSpectrum2 = pi->clock_registers.cg_spll_spread_spectrum_2; + table->ACPILevel.CcPwrDynRm = 0; + table->ACPILevel.CcPwrDynRm1 = 0; + + table->ACPILevel.Flags = cpu_to_be32(table->ACPILevel.Flags); + table->ACPILevel.MinVddcPhases = cpu_to_be32(table->ACPILevel.MinVddcPhases); + table->ACPILevel.SclkFrequency = cpu_to_be32(table->ACPILevel.SclkFrequency); + table->ACPILevel.CgSpllFuncCntl = cpu_to_be32(table->ACPILevel.CgSpllFuncCntl); + table->ACPILevel.CgSpllFuncCntl2 = cpu_to_be32(table->ACPILevel.CgSpllFuncCntl2); + table->ACPILevel.CgSpllFuncCntl3 = cpu_to_be32(table->ACPILevel.CgSpllFuncCntl3); + table->ACPILevel.CgSpllFuncCntl4 = cpu_to_be32(table->ACPILevel.CgSpllFuncCntl4); + table->ACPILevel.SpllSpreadSpectrum = cpu_to_be32(table->ACPILevel.SpllSpreadSpectrum); + table->ACPILevel.SpllSpreadSpectrum2 = cpu_to_be32(table->ACPILevel.SpllSpreadSpectrum2); + table->ACPILevel.CcPwrDynRm = cpu_to_be32(table->ACPILevel.CcPwrDynRm); + table->ACPILevel.CcPwrDynRm1 = cpu_to_be32(table->ACPILevel.CcPwrDynRm1); + + table->MemoryACPILevel.MinVddc = table->ACPILevel.MinVddc; + table->MemoryACPILevel.MinVddcPhases = table->ACPILevel.MinVddcPhases; + + if (pi->vddci_control != CISLANDS_VOLTAGE_CONTROL_NONE) { + if (pi->acpi_vddci) + table->MemoryACPILevel.MinVddci = + cpu_to_be32(pi->acpi_vddci * VOLTAGE_SCALE); + else + table->MemoryACPILevel.MinVddci = + cpu_to_be32(pi->min_vddci_in_pp_table * VOLTAGE_SCALE); + } + + if (ci_populate_mvdd_value(rdev, 0, &voltage_level)) + table->MemoryACPILevel.MinMvdd = 0; + else + table->MemoryACPILevel.MinMvdd = + cpu_to_be32(voltage_level.Voltage * VOLTAGE_SCALE); + + mclk_pwrmgt_cntl |= MRDCK0_RESET | MRDCK1_RESET; + mclk_pwrmgt_cntl &= ~(MRDCK0_PDNB | MRDCK1_PDNB); + + dll_cntl &= ~(MRDCK0_BYPASS | MRDCK1_BYPASS); + + table->MemoryACPILevel.DllCntl = cpu_to_be32(dll_cntl); + table->MemoryACPILevel.MclkPwrmgtCntl = cpu_to_be32(mclk_pwrmgt_cntl); + table->MemoryACPILevel.MpllAdFuncCntl = + cpu_to_be32(pi->clock_registers.mpll_ad_func_cntl); + table->MemoryACPILevel.MpllDqFuncCntl = + cpu_to_be32(pi->clock_registers.mpll_dq_func_cntl); + table->MemoryACPILevel.MpllFuncCntl = + cpu_to_be32(pi->clock_registers.mpll_func_cntl); + table->MemoryACPILevel.MpllFuncCntl_1 = + cpu_to_be32(pi->clock_registers.mpll_func_cntl_1); + table->MemoryACPILevel.MpllFuncCntl_2 = + cpu_to_be32(pi->clock_registers.mpll_func_cntl_2); + table->MemoryACPILevel.MpllSs1 = cpu_to_be32(pi->clock_registers.mpll_ss1); + table->MemoryACPILevel.MpllSs2 = cpu_to_be32(pi->clock_registers.mpll_ss2); + + table->MemoryACPILevel.EnabledForThrottle = 0; + table->MemoryACPILevel.EnabledForActivity = 0; + table->MemoryACPILevel.UpH = 0; + table->MemoryACPILevel.DownH = 100; + table->MemoryACPILevel.VoltageDownH = 0; + table->MemoryACPILevel.ActivityLevel = + cpu_to_be16((u16)pi->mclk_activity_target); + + table->MemoryACPILevel.StutterEnable = false; + table->MemoryACPILevel.StrobeEnable = false; + table->MemoryACPILevel.EdcReadEnable = false; + table->MemoryACPILevel.EdcWriteEnable = false; + table->MemoryACPILevel.RttEnable = false; + + return 0; +} + + +static int ci_enable_ulv(struct radeon_device *rdev, bool enable) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct ci_ulv_parm *ulv = &pi->ulv; + + if (ulv->supported) { + if (enable) + return (ci_send_msg_to_smc(rdev, PPSMC_MSG_EnableULV) == PPSMC_Result_OK) ? + 0 : -EINVAL; + else + return (ci_send_msg_to_smc(rdev, PPSMC_MSG_DisableULV) == PPSMC_Result_OK) ? + 0 : -EINVAL; + } + + return 0; +} + +static int ci_populate_ulv_level(struct radeon_device *rdev, + SMU7_Discrete_Ulv *state) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + u16 ulv_voltage = rdev->pm.dpm.backbias_response_time; + + state->CcPwrDynRm = 0; + state->CcPwrDynRm1 = 0; + + if (ulv_voltage == 0) { + pi->ulv.supported = false; + return 0; + } + + if (pi->voltage_control != CISLANDS_VOLTAGE_CONTROL_BY_SVID2) { + if (ulv_voltage > rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.entries[0].v) + state->VddcOffset = 0; + else + state->VddcOffset = + rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.entries[0].v - ulv_voltage; + } else { + if (ulv_voltage > rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.entries[0].v) + state->VddcOffsetVid = 0; + else + state->VddcOffsetVid = (u8) + ((rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.entries[0].v - ulv_voltage) * + VOLTAGE_VID_OFFSET_SCALE2 / VOLTAGE_VID_OFFSET_SCALE1); + } + state->VddcPhase = pi->vddc_phase_shed_control ? 0 : 1; + + state->CcPwrDynRm = cpu_to_be32(state->CcPwrDynRm); + state->CcPwrDynRm1 = cpu_to_be32(state->CcPwrDynRm1); + state->VddcOffset = cpu_to_be16(state->VddcOffset); + + return 0; +} + +static int ci_calculate_sclk_params(struct radeon_device *rdev, + u32 engine_clock, + SMU7_Discrete_GraphicsLevel *sclk) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct atom_clock_dividers dividers; + u32 spll_func_cntl_3 = pi->clock_registers.cg_spll_func_cntl_3; + u32 spll_func_cntl_4 = pi->clock_registers.cg_spll_func_cntl_4; + u32 cg_spll_spread_spectrum = pi->clock_registers.cg_spll_spread_spectrum; + u32 cg_spll_spread_spectrum_2 = pi->clock_registers.cg_spll_spread_spectrum_2; + u32 reference_clock = rdev->clock.spll.reference_freq; + u32 reference_divider; + u32 fbdiv; + int ret; + + ret = radeon_atom_get_clock_dividers(rdev, + COMPUTE_GPUCLK_INPUT_FLAG_SCLK, + engine_clock, false, ÷rs); + if (ret) + return ret; + + reference_divider = 1 + dividers.ref_div; + fbdiv = dividers.fb_div & 0x3FFFFFF; + + spll_func_cntl_3 &= ~SPLL_FB_DIV_MASK; + spll_func_cntl_3 |= SPLL_FB_DIV(fbdiv); + spll_func_cntl_3 |= SPLL_DITHEN; + + if (pi->caps_sclk_ss_support) { + struct radeon_atom_ss ss; + u32 vco_freq = engine_clock * dividers.post_div; + + if (radeon_atombios_get_asic_ss_info(rdev, &ss, + ASIC_INTERNAL_ENGINE_SS, vco_freq)) { + u32 clk_s = reference_clock * 5 / (reference_divider * ss.rate); + u32 clk_v = 4 * ss.percentage * fbdiv / (clk_s * 10000); + + cg_spll_spread_spectrum &= ~CLK_S_MASK; + cg_spll_spread_spectrum |= CLK_S(clk_s); + cg_spll_spread_spectrum |= SSEN; + + cg_spll_spread_spectrum_2 &= ~CLK_V_MASK; + cg_spll_spread_spectrum_2 |= CLK_V(clk_v); + } + } + + sclk->SclkFrequency = engine_clock; + sclk->CgSpllFuncCntl3 = spll_func_cntl_3; + sclk->CgSpllFuncCntl4 = spll_func_cntl_4; + sclk->SpllSpreadSpectrum = cg_spll_spread_spectrum; + sclk->SpllSpreadSpectrum2 = cg_spll_spread_spectrum_2; + sclk->SclkDid = (u8)dividers.post_divider; + + return 0; +} + +static int ci_populate_single_graphic_level(struct radeon_device *rdev, + u32 engine_clock, + u16 sclk_activity_level_t, + SMU7_Discrete_GraphicsLevel *graphic_level) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + int ret; + + ret = ci_calculate_sclk_params(rdev, engine_clock, graphic_level); + if (ret) + return ret; + + ret = ci_get_dependency_volt_by_clk(rdev, + &rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk, + engine_clock, &graphic_level->MinVddc); + if (ret) + return ret; + + graphic_level->SclkFrequency = engine_clock; + + graphic_level->Flags = 0; + graphic_level->MinVddcPhases = 1; + + if (pi->vddc_phase_shed_control) + ci_populate_phase_value_based_on_sclk(rdev, + &rdev->pm.dpm.dyn_state.phase_shedding_limits_table, + engine_clock, + &graphic_level->MinVddcPhases); + + graphic_level->ActivityLevel = sclk_activity_level_t; + + graphic_level->CcPwrDynRm = 0; + graphic_level->CcPwrDynRm1 = 0; + graphic_level->EnabledForActivity = 1; + graphic_level->EnabledForThrottle = 1; + graphic_level->UpH = 0; + graphic_level->DownH = 0; + graphic_level->VoltageDownH = 0; + graphic_level->PowerThrottle = 0; + + if (pi->caps_sclk_ds) + graphic_level->DeepSleepDivId = ci_get_sleep_divider_id_from_clock(rdev, + engine_clock, + CISLAND_MINIMUM_ENGINE_CLOCK); + + graphic_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW; + + graphic_level->Flags = cpu_to_be32(graphic_level->Flags); + graphic_level->MinVddc = cpu_to_be32(graphic_level->MinVddc * VOLTAGE_SCALE); + graphic_level->MinVddcPhases = cpu_to_be32(graphic_level->MinVddcPhases); + graphic_level->SclkFrequency = cpu_to_be32(graphic_level->SclkFrequency); + graphic_level->ActivityLevel = cpu_to_be16(graphic_level->ActivityLevel); + graphic_level->CgSpllFuncCntl3 = cpu_to_be32(graphic_level->CgSpllFuncCntl3); + graphic_level->CgSpllFuncCntl4 = cpu_to_be32(graphic_level->CgSpllFuncCntl4); + graphic_level->SpllSpreadSpectrum = cpu_to_be32(graphic_level->SpllSpreadSpectrum); + graphic_level->SpllSpreadSpectrum2 = cpu_to_be32(graphic_level->SpllSpreadSpectrum2); + graphic_level->CcPwrDynRm = cpu_to_be32(graphic_level->CcPwrDynRm); + graphic_level->CcPwrDynRm1 = cpu_to_be32(graphic_level->CcPwrDynRm1); + + return 0; +} + +static int ci_populate_all_graphic_levels(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct ci_dpm_table *dpm_table = &pi->dpm_table; + u32 level_array_address = pi->dpm_table_start + + offsetof(SMU7_Discrete_DpmTable, GraphicsLevel); + u32 level_array_size = sizeof(SMU7_Discrete_GraphicsLevel) * + SMU7_MAX_LEVELS_GRAPHICS; + SMU7_Discrete_GraphicsLevel *levels = pi->smc_state_table.GraphicsLevel; + u32 i, ret; + + memset(levels, 0, level_array_size); + + for (i = 0; i < dpm_table->sclk_table.count; i++) { + ret = ci_populate_single_graphic_level(rdev, + dpm_table->sclk_table.dpm_levels[i].value, + (u16)pi->activity_target[i], + &pi->smc_state_table.GraphicsLevel[i]); + if (ret) + return ret; + if (i == (dpm_table->sclk_table.count - 1)) + pi->smc_state_table.GraphicsLevel[i].DisplayWatermark = + PPSMC_DISPLAY_WATERMARK_HIGH; + } + + pi->smc_state_table.GraphicsDpmLevelCount = (u8)dpm_table->sclk_table.count; + pi->dpm_level_enable_mask.sclk_dpm_enable_mask = + ci_get_dpm_level_enable_mask_value(&dpm_table->sclk_table); + + ret = ci_copy_bytes_to_smc(rdev, level_array_address, + (u8 *)levels, level_array_size, + pi->sram_end); + if (ret) + return ret; + + return 0; +} + +static int ci_populate_ulv_state(struct radeon_device *rdev, + SMU7_Discrete_Ulv *ulv_level) +{ + return ci_populate_ulv_level(rdev, ulv_level); +} + +static int ci_populate_all_memory_levels(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct ci_dpm_table *dpm_table = &pi->dpm_table; + u32 level_array_address = pi->dpm_table_start + + offsetof(SMU7_Discrete_DpmTable, MemoryLevel); + u32 level_array_size = sizeof(SMU7_Discrete_MemoryLevel) * + SMU7_MAX_LEVELS_MEMORY; + SMU7_Discrete_MemoryLevel *levels = pi->smc_state_table.MemoryLevel; + u32 i, ret; + + memset(levels, 0, level_array_size); + + for (i = 0; i < dpm_table->mclk_table.count; i++) { + if (dpm_table->mclk_table.dpm_levels[i].value == 0) + return -EINVAL; + ret = ci_populate_single_memory_level(rdev, + dpm_table->mclk_table.dpm_levels[i].value, + &pi->smc_state_table.MemoryLevel[i]); + if (ret) + return ret; + } + + pi->smc_state_table.MemoryLevel[0].ActivityLevel = cpu_to_be16(0x1F); + + pi->smc_state_table.MemoryDpmLevelCount = (u8)dpm_table->mclk_table.count; + pi->dpm_level_enable_mask.mclk_dpm_enable_mask = + ci_get_dpm_level_enable_mask_value(&dpm_table->mclk_table); + + pi->smc_state_table.MemoryLevel[dpm_table->mclk_table.count - 1].DisplayWatermark = + PPSMC_DISPLAY_WATERMARK_HIGH; + + ret = ci_copy_bytes_to_smc(rdev, level_array_address, + (u8 *)levels, level_array_size, + pi->sram_end); + if (ret) + return ret; + + return 0; +} + +static void ci_reset_single_dpm_table(struct radeon_device *rdev, + struct ci_single_dpm_table* dpm_table, + u32 count) +{ + u32 i; + + dpm_table->count = count; + for (i = 0; i < MAX_REGULAR_DPM_NUMBER; i++) + dpm_table->dpm_levels[i].enabled = false; +} + +static void ci_setup_pcie_table_entry(struct ci_single_dpm_table* dpm_table, + u32 index, u32 pcie_gen, u32 pcie_lanes) +{ + dpm_table->dpm_levels[index].value = pcie_gen; + dpm_table->dpm_levels[index].param1 = pcie_lanes; + dpm_table->dpm_levels[index].enabled = true; +} + +static int ci_setup_default_pcie_tables(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + + if (!pi->use_pcie_performance_levels && !pi->use_pcie_powersaving_levels) + return -EINVAL; + + if (pi->use_pcie_performance_levels && !pi->use_pcie_powersaving_levels) { + pi->pcie_gen_powersaving = pi->pcie_gen_performance; + pi->pcie_lane_powersaving = pi->pcie_lane_performance; + } else if (!pi->use_pcie_performance_levels && pi->use_pcie_powersaving_levels) { + pi->pcie_gen_performance = pi->pcie_gen_powersaving; + pi->pcie_lane_performance = pi->pcie_lane_powersaving; + } + + ci_reset_single_dpm_table(rdev, + &pi->dpm_table.pcie_speed_table, + SMU7_MAX_LEVELS_LINK); + + ci_setup_pcie_table_entry(&pi->dpm_table.pcie_speed_table, 0, + pi->pcie_gen_powersaving.min, + pi->pcie_lane_powersaving.min); + ci_setup_pcie_table_entry(&pi->dpm_table.pcie_speed_table, 1, + pi->pcie_gen_performance.min, + pi->pcie_lane_performance.min); + ci_setup_pcie_table_entry(&pi->dpm_table.pcie_speed_table, 2, + pi->pcie_gen_powersaving.min, + pi->pcie_lane_powersaving.max); + ci_setup_pcie_table_entry(&pi->dpm_table.pcie_speed_table, 3, + pi->pcie_gen_performance.min, + pi->pcie_lane_performance.max); + ci_setup_pcie_table_entry(&pi->dpm_table.pcie_speed_table, 4, + pi->pcie_gen_powersaving.max, + pi->pcie_lane_powersaving.max); + ci_setup_pcie_table_entry(&pi->dpm_table.pcie_speed_table, 5, + pi->pcie_gen_performance.max, + pi->pcie_lane_performance.max); + + pi->dpm_table.pcie_speed_table.count = 6; + + return 0; +} + +static int ci_setup_default_dpm_tables(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct radeon_clock_voltage_dependency_table *allowed_sclk_vddc_table = + &rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk; + struct radeon_clock_voltage_dependency_table *allowed_mclk_table = + &rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk; + struct radeon_cac_leakage_table *std_voltage_table = + &rdev->pm.dpm.dyn_state.cac_leakage_table; + u32 i; + + if (allowed_sclk_vddc_table == NULL) + return -EINVAL; + if (allowed_sclk_vddc_table->count < 1) + return -EINVAL; + if (allowed_mclk_table == NULL) + return -EINVAL; + if (allowed_mclk_table->count < 1) + return -EINVAL; + + memset(&pi->dpm_table, 0, sizeof(struct ci_dpm_table)); + + ci_reset_single_dpm_table(rdev, + &pi->dpm_table.sclk_table, + SMU7_MAX_LEVELS_GRAPHICS); + ci_reset_single_dpm_table(rdev, + &pi->dpm_table.mclk_table, + SMU7_MAX_LEVELS_MEMORY); + ci_reset_single_dpm_table(rdev, + &pi->dpm_table.vddc_table, + SMU7_MAX_LEVELS_VDDC); + ci_reset_single_dpm_table(rdev, + &pi->dpm_table.vddci_table, + SMU7_MAX_LEVELS_VDDCI); + ci_reset_single_dpm_table(rdev, + &pi->dpm_table.mvdd_table, + SMU7_MAX_LEVELS_MVDD); + + pi->dpm_table.sclk_table.count = 0; + for (i = 0; i < allowed_sclk_vddc_table->count; i++) { + if ((i == 0) || + (pi->dpm_table.sclk_table.dpm_levels[pi->dpm_table.sclk_table.count-1].value != + allowed_sclk_vddc_table->entries[i].clk)) { + pi->dpm_table.sclk_table.dpm_levels[pi->dpm_table.sclk_table.count].value = + allowed_sclk_vddc_table->entries[i].clk; + pi->dpm_table.sclk_table.dpm_levels[pi->dpm_table.sclk_table.count].enabled = true; + pi->dpm_table.sclk_table.count++; + } + } + + pi->dpm_table.mclk_table.count = 0; + for (i = 0; i < allowed_mclk_table->count; i++) { + if ((i==0) || + (pi->dpm_table.mclk_table.dpm_levels[pi->dpm_table.mclk_table.count-1].value != + allowed_mclk_table->entries[i].clk)) { + pi->dpm_table.mclk_table.dpm_levels[pi->dpm_table.mclk_table.count].value = + allowed_mclk_table->entries[i].clk; + pi->dpm_table.mclk_table.dpm_levels[pi->dpm_table.mclk_table.count].enabled = true; + pi->dpm_table.mclk_table.count++; + } + } + + for (i = 0; i < allowed_sclk_vddc_table->count; i++) { + pi->dpm_table.vddc_table.dpm_levels[i].value = + allowed_sclk_vddc_table->entries[i].v; + pi->dpm_table.vddc_table.dpm_levels[i].param1 = + std_voltage_table->entries[i].leakage; + pi->dpm_table.vddc_table.dpm_levels[i].enabled = true; + } + pi->dpm_table.vddc_table.count = allowed_sclk_vddc_table->count; + + allowed_mclk_table = &rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk; + if (allowed_mclk_table) { + for (i = 0; i < allowed_mclk_table->count; i++) { + pi->dpm_table.vddci_table.dpm_levels[i].value = + allowed_mclk_table->entries[i].v; + pi->dpm_table.vddci_table.dpm_levels[i].enabled = true; + } + pi->dpm_table.vddci_table.count = allowed_mclk_table->count; + } + + allowed_mclk_table = &rdev->pm.dpm.dyn_state.mvdd_dependency_on_mclk; + if (allowed_mclk_table) { + for (i = 0; i < allowed_mclk_table->count; i++) { + pi->dpm_table.mvdd_table.dpm_levels[i].value = + allowed_mclk_table->entries[i].v; + pi->dpm_table.mvdd_table.dpm_levels[i].enabled = true; + } + pi->dpm_table.mvdd_table.count = allowed_mclk_table->count; + } + + ci_setup_default_pcie_tables(rdev); + + return 0; +} + +static int ci_find_boot_level(struct ci_single_dpm_table *table, + u32 value, u32 *boot_level) +{ + u32 i; + int ret = -EINVAL; + + for(i = 0; i < table->count; i++) { + if (value == table->dpm_levels[i].value) { + *boot_level = i; + ret = 0; + } + } + + return ret; +} + +static int ci_init_smc_table(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct ci_ulv_parm *ulv = &pi->ulv; + struct radeon_ps *radeon_boot_state = rdev->pm.dpm.boot_ps; + SMU7_Discrete_DpmTable *table = &pi->smc_state_table; + int ret; + + ret = ci_setup_default_dpm_tables(rdev); + if (ret) + return ret; + + if (pi->voltage_control != CISLANDS_VOLTAGE_CONTROL_NONE) + ci_populate_smc_voltage_tables(rdev, table); + + ci_init_fps_limits(rdev); + + if (rdev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_HARDWAREDC) + table->SystemFlags |= PPSMC_SYSTEMFLAG_GPIO_DC; + + if (rdev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_STEPVDDC) + table->SystemFlags |= PPSMC_SYSTEMFLAG_STEPVDDC; + + if (pi->mem_gddr5) + table->SystemFlags |= PPSMC_SYSTEMFLAG_GDDR5; + + if (ulv->supported) { + ret = ci_populate_ulv_state(rdev, &pi->smc_state_table.Ulv); + if (ret) + return ret; + WREG32_SMC(CG_ULV_PARAMETER, ulv->cg_ulv_parameter); + } + + ret = ci_populate_all_graphic_levels(rdev); + if (ret) + return ret; + + ret = ci_populate_all_memory_levels(rdev); + if (ret) + return ret; + + ci_populate_smc_link_level(rdev, table); + + ret = ci_populate_smc_acpi_level(rdev, table); + if (ret) + return ret; + + ret = ci_populate_smc_vce_level(rdev, table); + if (ret) + return ret; + + ret = ci_populate_smc_acp_level(rdev, table); + if (ret) + return ret; + + ret = ci_populate_smc_samu_level(rdev, table); + if (ret) + return ret; + + ret = ci_do_program_memory_timing_parameters(rdev); + if (ret) + return ret; + + ret = ci_populate_smc_uvd_level(rdev, table); + if (ret) + return ret; + + table->UvdBootLevel = 0; + table->VceBootLevel = 0; + table->AcpBootLevel = 0; + table->SamuBootLevel = 0; + table->GraphicsBootLevel = 0; + table->MemoryBootLevel = 0; + + ret = ci_find_boot_level(&pi->dpm_table.sclk_table, + pi->vbios_boot_state.sclk_bootup_value, + (u32 *)&pi->smc_state_table.GraphicsBootLevel); + + ret = ci_find_boot_level(&pi->dpm_table.mclk_table, + pi->vbios_boot_state.mclk_bootup_value, + (u32 *)&pi->smc_state_table.MemoryBootLevel); + + table->BootVddc = pi->vbios_boot_state.vddc_bootup_value; + table->BootVddci = pi->vbios_boot_state.vddci_bootup_value; + table->BootMVdd = pi->vbios_boot_state.mvdd_bootup_value; + + ci_populate_smc_initial_state(rdev, radeon_boot_state); + + ret = ci_populate_bapm_parameters_in_dpm_table(rdev); + if (ret) + return ret; + + table->UVDInterval = 1; + table->VCEInterval = 1; + table->ACPInterval = 1; + table->SAMUInterval = 1; + table->GraphicsVoltageChangeEnable = 1; + table->GraphicsThermThrottleEnable = 1; + table->GraphicsInterval = 1; + table->VoltageInterval = 1; + table->ThermalInterval = 1; + table->TemperatureLimitHigh = (u16)((pi->thermal_temp_setting.temperature_high * + CISLANDS_Q88_FORMAT_CONVERSION_UNIT) / 1000); + table->TemperatureLimitLow = (u16)((pi->thermal_temp_setting.temperature_low * + CISLANDS_Q88_FORMAT_CONVERSION_UNIT) / 1000); + table->MemoryVoltageChangeEnable = 1; + table->MemoryInterval = 1; + table->VoltageResponseTime = 0; + table->VddcVddciDelta = 4000; + table->PhaseResponseTime = 0; + table->MemoryThermThrottleEnable = 1; + table->PCIeBootLinkLevel = 0; + table->PCIeGenInterval = 1; + if (pi->voltage_control == CISLANDS_VOLTAGE_CONTROL_BY_SVID2) + table->SVI2Enable = 1; + else + table->SVI2Enable = 0; + + table->ThermGpio = 17; + table->SclkStepSize = 0x4000; + + table->SystemFlags = cpu_to_be32(table->SystemFlags); + table->SmioMaskVddcVid = cpu_to_be32(table->SmioMaskVddcVid); + table->SmioMaskVddcPhase = cpu_to_be32(table->SmioMaskVddcPhase); + table->SmioMaskVddciVid = cpu_to_be32(table->SmioMaskVddciVid); + table->SmioMaskMvddVid = cpu_to_be32(table->SmioMaskMvddVid); + table->SclkStepSize = cpu_to_be32(table->SclkStepSize); + table->TemperatureLimitHigh = cpu_to_be16(table->TemperatureLimitHigh); + table->TemperatureLimitLow = cpu_to_be16(table->TemperatureLimitLow); + table->VddcVddciDelta = cpu_to_be16(table->VddcVddciDelta); + table->VoltageResponseTime = cpu_to_be16(table->VoltageResponseTime); + table->PhaseResponseTime = cpu_to_be16(table->PhaseResponseTime); + table->BootVddc = cpu_to_be16(table->BootVddc * VOLTAGE_SCALE); + table->BootVddci = cpu_to_be16(table->BootVddci * VOLTAGE_SCALE); + table->BootMVdd = cpu_to_be16(table->BootMVdd * VOLTAGE_SCALE); + + ret = ci_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Discrete_DpmTable, SystemFlags), + (u8 *)&table->SystemFlags, + sizeof(SMU7_Discrete_DpmTable) - 3 * sizeof(SMU7_PIDController), + pi->sram_end); + if (ret) + return ret; + + return 0; +} + +static void ci_trim_single_dpm_states(struct radeon_device *rdev, + struct ci_single_dpm_table *dpm_table, + u32 low_limit, u32 high_limit) +{ + u32 i; + + for (i = 0; i < dpm_table->count; i++) { + if ((dpm_table->dpm_levels[i].value < low_limit) || + (dpm_table->dpm_levels[i].value > high_limit)) + dpm_table->dpm_levels[i].enabled = false; + else + dpm_table->dpm_levels[i].enabled = true; + } +} + +static void ci_trim_pcie_dpm_states(struct radeon_device *rdev, + u32 speed_low, u32 lanes_low, + u32 speed_high, u32 lanes_high) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct ci_single_dpm_table *pcie_table = &pi->dpm_table.pcie_speed_table; + u32 i, j; + + for (i = 0; i < pcie_table->count; i++) { + if ((pcie_table->dpm_levels[i].value < speed_low) || + (pcie_table->dpm_levels[i].param1 < lanes_low) || + (pcie_table->dpm_levels[i].value > speed_high) || + (pcie_table->dpm_levels[i].param1 > lanes_high)) + pcie_table->dpm_levels[i].enabled = false; + else + pcie_table->dpm_levels[i].enabled = true; + } + + for (i = 0; i < pcie_table->count; i++) { + if (pcie_table->dpm_levels[i].enabled) { + for (j = i + 1; j < pcie_table->count; j++) { + if (pcie_table->dpm_levels[j].enabled) { + if ((pcie_table->dpm_levels[i].value == pcie_table->dpm_levels[j].value) && + (pcie_table->dpm_levels[i].param1 == pcie_table->dpm_levels[j].param1)) + pcie_table->dpm_levels[j].enabled = false; + } + } + } + } +} + +static int ci_trim_dpm_states(struct radeon_device *rdev, + struct radeon_ps *radeon_state) +{ + struct ci_ps *state = ci_get_ps(radeon_state); + struct ci_power_info *pi = ci_get_pi(rdev); + u32 high_limit_count; + + if (state->performance_level_count < 1) + return -EINVAL; + + if (state->performance_level_count == 1) + high_limit_count = 0; + else + high_limit_count = 1; + + ci_trim_single_dpm_states(rdev, + &pi->dpm_table.sclk_table, + state->performance_levels[0].sclk, + state->performance_levels[high_limit_count].sclk); + + ci_trim_single_dpm_states(rdev, + &pi->dpm_table.mclk_table, + state->performance_levels[0].mclk, + state->performance_levels[high_limit_count].mclk); + + ci_trim_pcie_dpm_states(rdev, + state->performance_levels[0].pcie_gen, + state->performance_levels[0].pcie_lane, + state->performance_levels[high_limit_count].pcie_gen, + state->performance_levels[high_limit_count].pcie_lane); + + return 0; +} + +static int ci_apply_disp_minimum_voltage_request(struct radeon_device *rdev) +{ + struct radeon_clock_voltage_dependency_table *disp_voltage_table = + &rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk; + struct radeon_clock_voltage_dependency_table *vddc_table = + &rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk; + u32 requested_voltage = 0; + u32 i; + + if (disp_voltage_table == NULL) + return -EINVAL; + if (!disp_voltage_table->count) + return -EINVAL; + + for (i = 0; i < disp_voltage_table->count; i++) { + if (rdev->clock.current_dispclk == disp_voltage_table->entries[i].clk) + requested_voltage = disp_voltage_table->entries[i].v; + } + + for (i = 0; i < vddc_table->count; i++) { + if (requested_voltage <= vddc_table->entries[i].v) { + requested_voltage = vddc_table->entries[i].v; + return (ci_send_msg_to_smc_with_parameter(rdev, + PPSMC_MSG_VddC_Request, + requested_voltage * VOLTAGE_SCALE) == PPSMC_Result_OK) ? + 0 : -EINVAL; + } + } + + return -EINVAL; +} + +static int ci_upload_dpm_level_enable_mask(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + PPSMC_Result result; + + if (!pi->sclk_dpm_key_disabled) { + if (pi->dpm_level_enable_mask.sclk_dpm_enable_mask) { + result = ci_send_msg_to_smc_with_parameter(rdev, + PPSMC_MSG_SCLKDPM_SetEnabledMask, + pi->dpm_level_enable_mask.sclk_dpm_enable_mask); + if (result != PPSMC_Result_OK) + return -EINVAL; + } + } + + if (!pi->mclk_dpm_key_disabled) { + if (pi->dpm_level_enable_mask.mclk_dpm_enable_mask) { + result = ci_send_msg_to_smc_with_parameter(rdev, + PPSMC_MSG_MCLKDPM_SetEnabledMask, + pi->dpm_level_enable_mask.mclk_dpm_enable_mask); + if (result != PPSMC_Result_OK) + return -EINVAL; + } + } + + if (!pi->pcie_dpm_key_disabled) { + if (pi->dpm_level_enable_mask.pcie_dpm_enable_mask) { + result = ci_send_msg_to_smc_with_parameter(rdev, + PPSMC_MSG_PCIeDPM_SetEnabledMask, + pi->dpm_level_enable_mask.pcie_dpm_enable_mask); + if (result != PPSMC_Result_OK) + return -EINVAL; + } + } + + ci_apply_disp_minimum_voltage_request(rdev); + + return 0; +} + +static void ci_find_dpm_states_clocks_in_dpm_table(struct radeon_device *rdev, + struct radeon_ps *radeon_state) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct ci_ps *state = ci_get_ps(radeon_state); + struct ci_single_dpm_table *sclk_table = &pi->dpm_table.sclk_table; + u32 sclk = state->performance_levels[state->performance_level_count-1].sclk; + struct ci_single_dpm_table *mclk_table = &pi->dpm_table.mclk_table; + u32 mclk = state->performance_levels[state->performance_level_count-1].mclk; + u32 i; + + pi->need_update_smu7_dpm_table = 0; + + for (i = 0; i < sclk_table->count; i++) { + if (sclk == sclk_table->dpm_levels[i].value) + break; + } + + if (i >= sclk_table->count) { + pi->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_SCLK; + } else { + /* XXX check display min clock requirements */ + if (0 != CISLAND_MINIMUM_ENGINE_CLOCK) + pi->need_update_smu7_dpm_table |= DPMTABLE_UPDATE_SCLK; + } + + for (i = 0; i < mclk_table->count; i++) { + if (mclk == mclk_table->dpm_levels[i].value) + break; + } + + if (i >= mclk_table->count) + pi->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_MCLK; + + if (rdev->pm.dpm.current_active_crtc_count != + rdev->pm.dpm.new_active_crtc_count) + pi->need_update_smu7_dpm_table |= DPMTABLE_UPDATE_MCLK; +} + +static int ci_populate_and_upload_sclk_mclk_dpm_levels(struct radeon_device *rdev, + struct radeon_ps *radeon_state) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct ci_ps *state = ci_get_ps(radeon_state); + u32 sclk = state->performance_levels[state->performance_level_count-1].sclk; + u32 mclk = state->performance_levels[state->performance_level_count-1].mclk; + struct ci_dpm_table *dpm_table = &pi->dpm_table; + int ret; + + if (!pi->need_update_smu7_dpm_table) + return 0; + + if (pi->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_SCLK) + dpm_table->sclk_table.dpm_levels[dpm_table->sclk_table.count-1].value = sclk; + + if (pi->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_MCLK) + dpm_table->mclk_table.dpm_levels[dpm_table->mclk_table.count-1].value = mclk; + + if (pi->need_update_smu7_dpm_table & (DPMTABLE_OD_UPDATE_SCLK | DPMTABLE_UPDATE_SCLK)) { + ret = ci_populate_all_graphic_levels(rdev); + if (ret) + return ret; + } + + if (pi->need_update_smu7_dpm_table & (DPMTABLE_OD_UPDATE_MCLK | DPMTABLE_UPDATE_MCLK)) { + ret = ci_populate_all_memory_levels(rdev); + if (ret) + return ret; + } + + return 0; +} + +static int ci_enable_uvd_dpm(struct radeon_device *rdev, bool enable) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + const struct radeon_clock_and_voltage_limits *max_limits; + int i; + + if (rdev->pm.dpm.ac_power) + max_limits = &rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac; + else + max_limits = &rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc; + + if (enable) { + pi->dpm_level_enable_mask.uvd_dpm_enable_mask = 0; + + for (i = rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.count - 1; i >= 0; i--) { + if (rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries[i].v <= max_limits->vddc) { + pi->dpm_level_enable_mask.uvd_dpm_enable_mask |= 1 << i; + + if (!pi->caps_uvd_dpm) + break; + } + } + + ci_send_msg_to_smc_with_parameter(rdev, + PPSMC_MSG_UVDDPM_SetEnabledMask, + pi->dpm_level_enable_mask.uvd_dpm_enable_mask); + + if (pi->last_mclk_dpm_enable_mask & 0x1) { + pi->uvd_enabled = true; + pi->dpm_level_enable_mask.mclk_dpm_enable_mask &= 0xFFFFFFFE; + ci_send_msg_to_smc_with_parameter(rdev, + PPSMC_MSG_MCLKDPM_SetEnabledMask, + pi->dpm_level_enable_mask.mclk_dpm_enable_mask); + } + } else { + if (pi->last_mclk_dpm_enable_mask & 0x1) { + pi->uvd_enabled = false; + pi->dpm_level_enable_mask.mclk_dpm_enable_mask |= 1; + ci_send_msg_to_smc_with_parameter(rdev, + PPSMC_MSG_MCLKDPM_SetEnabledMask, + pi->dpm_level_enable_mask.mclk_dpm_enable_mask); + } + } + + return (ci_send_msg_to_smc(rdev, enable ? + PPSMC_MSG_UVDDPM_Enable : PPSMC_MSG_UVDDPM_Disable) == PPSMC_Result_OK) ? + 0 : -EINVAL; +} + +#if 0 +static int ci_enable_vce_dpm(struct radeon_device *rdev, bool enable) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + const struct radeon_clock_and_voltage_limits *max_limits; + int i; + + if (rdev->pm.dpm.ac_power) + max_limits = &rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac; + else + max_limits = &rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc; + + if (enable) { + pi->dpm_level_enable_mask.vce_dpm_enable_mask = 0; + for (i = rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.count - 1; i >= 0; i--) { + if (rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries[i].v <= max_limits->vddc) { + pi->dpm_level_enable_mask.vce_dpm_enable_mask |= 1 << i; + + if (!pi->caps_vce_dpm) + break; + } + } + + ci_send_msg_to_smc_with_parameter(rdev, + PPSMC_MSG_VCEDPM_SetEnabledMask, + pi->dpm_level_enable_mask.vce_dpm_enable_mask); + } + + return (ci_send_msg_to_smc(rdev, enable ? + PPSMC_MSG_VCEDPM_Enable : PPSMC_MSG_VCEDPM_Disable) == PPSMC_Result_OK) ? + 0 : -EINVAL; +} + +static int ci_enable_samu_dpm(struct radeon_device *rdev, bool enable) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + const struct radeon_clock_and_voltage_limits *max_limits; + int i; + + if (rdev->pm.dpm.ac_power) + max_limits = &rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac; + else + max_limits = &rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc; + + if (enable) { + pi->dpm_level_enable_mask.samu_dpm_enable_mask = 0; + for (i = rdev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.count - 1; i >= 0; i--) { + if (rdev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.entries[i].v <= max_limits->vddc) { + pi->dpm_level_enable_mask.samu_dpm_enable_mask |= 1 << i; + + if (!pi->caps_samu_dpm) + break; + } + } + + ci_send_msg_to_smc_with_parameter(rdev, + PPSMC_MSG_SAMUDPM_SetEnabledMask, + pi->dpm_level_enable_mask.samu_dpm_enable_mask); + } + return (ci_send_msg_to_smc(rdev, enable ? + PPSMC_MSG_SAMUDPM_Enable : PPSMC_MSG_SAMUDPM_Disable) == PPSMC_Result_OK) ? + 0 : -EINVAL; +} + +static int ci_enable_acp_dpm(struct radeon_device *rdev, bool enable) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + const struct radeon_clock_and_voltage_limits *max_limits; + int i; + + if (rdev->pm.dpm.ac_power) + max_limits = &rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac; + else + max_limits = &rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc; + + if (enable) { + pi->dpm_level_enable_mask.acp_dpm_enable_mask = 0; + for (i = rdev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.count - 1; i >= 0; i--) { + if (rdev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.entries[i].v <= max_limits->vddc) { + pi->dpm_level_enable_mask.acp_dpm_enable_mask |= 1 << i; + + if (!pi->caps_acp_dpm) + break; + } + } + + ci_send_msg_to_smc_with_parameter(rdev, + PPSMC_MSG_ACPDPM_SetEnabledMask, + pi->dpm_level_enable_mask.acp_dpm_enable_mask); + } + + return (ci_send_msg_to_smc(rdev, enable ? + PPSMC_MSG_ACPDPM_Enable : PPSMC_MSG_ACPDPM_Disable) == PPSMC_Result_OK) ? + 0 : -EINVAL; +} +#endif + +static int ci_update_uvd_dpm(struct radeon_device *rdev, bool gate) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + u32 tmp; + + if (!gate) { + if (pi->caps_uvd_dpm || + (rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.count <= 0)) + pi->smc_state_table.UvdBootLevel = 0; + else + pi->smc_state_table.UvdBootLevel = + rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.count - 1; + + tmp = RREG32_SMC(DPM_TABLE_475); + tmp &= ~UvdBootLevel_MASK; + tmp |= UvdBootLevel(pi->smc_state_table.UvdBootLevel); + WREG32_SMC(DPM_TABLE_475, tmp); + } + + return ci_enable_uvd_dpm(rdev, !gate); +} + +#if 0 +static u8 ci_get_vce_boot_level(struct radeon_device *rdev) +{ + u8 i; + u32 min_evclk = 30000; /* ??? */ + struct radeon_vce_clock_voltage_dependency_table *table = + &rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table; + + for (i = 0; i < table->count; i++) { + if (table->entries[i].evclk >= min_evclk) + return i; + } + + return table->count - 1; +} + +static int ci_update_vce_dpm(struct radeon_device *rdev, + struct radeon_ps *radeon_new_state, + struct radeon_ps *radeon_current_state) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + bool new_vce_clock_non_zero = (radeon_new_state->evclk != 0); + bool old_vce_clock_non_zero = (radeon_current_state->evclk != 0); + int ret = 0; + u32 tmp; + + if (new_vce_clock_non_zero != old_vce_clock_non_zero) { + if (new_vce_clock_non_zero) { + pi->smc_state_table.VceBootLevel = ci_get_vce_boot_level(rdev); + + tmp = RREG32_SMC(DPM_TABLE_475); + tmp &= ~VceBootLevel_MASK; + tmp |= VceBootLevel(pi->smc_state_table.VceBootLevel); + WREG32_SMC(DPM_TABLE_475, tmp); + + ret = ci_enable_vce_dpm(rdev, true); + } else { + ret = ci_enable_vce_dpm(rdev, false); + } + } + return ret; +} + +static int ci_update_samu_dpm(struct radeon_device *rdev, bool gate) +{ + return ci_enable_samu_dpm(rdev, gate); +} + +static int ci_update_acp_dpm(struct radeon_device *rdev, bool gate) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + u32 tmp; + + if (!gate) { + pi->smc_state_table.AcpBootLevel = 0; + + tmp = RREG32_SMC(DPM_TABLE_475); + tmp &= ~AcpBootLevel_MASK; + tmp |= AcpBootLevel(pi->smc_state_table.AcpBootLevel); + WREG32_SMC(DPM_TABLE_475, tmp); + } + + return ci_enable_acp_dpm(rdev, !gate); +} +#endif + +static int ci_generate_dpm_level_enable_mask(struct radeon_device *rdev, + struct radeon_ps *radeon_state) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + int ret; + + ret = ci_trim_dpm_states(rdev, radeon_state); + if (ret) + return ret; + + pi->dpm_level_enable_mask.sclk_dpm_enable_mask = + ci_get_dpm_level_enable_mask_value(&pi->dpm_table.sclk_table); + pi->dpm_level_enable_mask.mclk_dpm_enable_mask = + ci_get_dpm_level_enable_mask_value(&pi->dpm_table.mclk_table); + pi->last_mclk_dpm_enable_mask = + pi->dpm_level_enable_mask.mclk_dpm_enable_mask; + if (pi->uvd_enabled) { + if (pi->dpm_level_enable_mask.mclk_dpm_enable_mask & 1) + pi->dpm_level_enable_mask.mclk_dpm_enable_mask &= 0xFFFFFFFE; + } + pi->dpm_level_enable_mask.pcie_dpm_enable_mask = + ci_get_dpm_level_enable_mask_value(&pi->dpm_table.pcie_speed_table); + + return 0; +} + +static int ci_set_mc_special_registers(struct radeon_device *rdev, + struct ci_mc_reg_table *table) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + u8 i, j, k; + u32 temp_reg; + + for (i = 0, j = table->last; i < table->last; i++) { + if (j >= SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE) + return -EINVAL; + switch(table->mc_reg_address[i].s1 << 2) { + case MC_SEQ_MISC1: + temp_reg = RREG32(MC_PMG_CMD_EMRS); + table->mc_reg_address[j].s1 = MC_PMG_CMD_EMRS >> 2; + table->mc_reg_address[j].s0 = MC_SEQ_PMG_CMD_EMRS_LP >> 2; + for (k = 0; k < table->num_entries; k++) { + table->mc_reg_table_entry[k].mc_data[j] = + ((temp_reg & 0xffff0000)) | ((table->mc_reg_table_entry[k].mc_data[i] & 0xffff0000) >> 16); + } + j++; + if (j >= SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE) + return -EINVAL; + + temp_reg = RREG32(MC_PMG_CMD_MRS); + table->mc_reg_address[j].s1 = MC_PMG_CMD_MRS >> 2; + table->mc_reg_address[j].s0 = MC_SEQ_PMG_CMD_MRS_LP >> 2; + for (k = 0; k < table->num_entries; k++) { + table->mc_reg_table_entry[k].mc_data[j] = + (temp_reg & 0xffff0000) | (table->mc_reg_table_entry[k].mc_data[i] & 0x0000ffff); + if (!pi->mem_gddr5) + table->mc_reg_table_entry[k].mc_data[j] |= 0x100; + } + j++; + if (j > SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE) + return -EINVAL; + + if (!pi->mem_gddr5) { + table->mc_reg_address[j].s1 = MC_PMG_AUTO_CMD >> 2; + table->mc_reg_address[j].s0 = MC_PMG_AUTO_CMD >> 2; + for (k = 0; k < table->num_entries; k++) { + table->mc_reg_table_entry[k].mc_data[j] = + (table->mc_reg_table_entry[k].mc_data[i] & 0xffff0000) >> 16; + } + j++; + if (j > SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE) + return -EINVAL; + } + break; + case MC_SEQ_RESERVE_M: + temp_reg = RREG32(MC_PMG_CMD_MRS1); + table->mc_reg_address[j].s1 = MC_PMG_CMD_MRS1 >> 2; + table->mc_reg_address[j].s0 = MC_SEQ_PMG_CMD_MRS1_LP >> 2; + for (k = 0; k < table->num_entries; k++) { + table->mc_reg_table_entry[k].mc_data[j] = + (temp_reg & 0xffff0000) | (table->mc_reg_table_entry[k].mc_data[i] & 0x0000ffff); + } + j++; + if (j > SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE) + return -EINVAL; + break; + default: + break; + } + + } + + table->last = j; + + return 0; +} + +static bool ci_check_s0_mc_reg_index(u16 in_reg, u16 *out_reg) +{ + bool result = true; + + switch(in_reg) { + case MC_SEQ_RAS_TIMING >> 2: + *out_reg = MC_SEQ_RAS_TIMING_LP >> 2; + break; + case MC_SEQ_DLL_STBY >> 2: + *out_reg = MC_SEQ_DLL_STBY_LP >> 2; + break; + case MC_SEQ_G5PDX_CMD0 >> 2: + *out_reg = MC_SEQ_G5PDX_CMD0_LP >> 2; + break; + case MC_SEQ_G5PDX_CMD1 >> 2: + *out_reg = MC_SEQ_G5PDX_CMD1_LP >> 2; + break; + case MC_SEQ_G5PDX_CTRL >> 2: + *out_reg = MC_SEQ_G5PDX_CTRL_LP >> 2; + break; + case MC_SEQ_CAS_TIMING >> 2: + *out_reg = MC_SEQ_CAS_TIMING_LP >> 2; + break; + case MC_SEQ_MISC_TIMING >> 2: + *out_reg = MC_SEQ_MISC_TIMING_LP >> 2; + break; + case MC_SEQ_MISC_TIMING2 >> 2: + *out_reg = MC_SEQ_MISC_TIMING2_LP >> 2; + break; + case MC_SEQ_PMG_DVS_CMD >> 2: + *out_reg = MC_SEQ_PMG_DVS_CMD_LP >> 2; + break; + case MC_SEQ_PMG_DVS_CTL >> 2: + *out_reg = MC_SEQ_PMG_DVS_CTL_LP >> 2; + break; + case MC_SEQ_RD_CTL_D0 >> 2: + *out_reg = MC_SEQ_RD_CTL_D0_LP >> 2; + break; + case MC_SEQ_RD_CTL_D1 >> 2: + *out_reg = MC_SEQ_RD_CTL_D1_LP >> 2; + break; + case MC_SEQ_WR_CTL_D0 >> 2: + *out_reg = MC_SEQ_WR_CTL_D0_LP >> 2; + break; + case MC_SEQ_WR_CTL_D1 >> 2: + *out_reg = MC_SEQ_WR_CTL_D1_LP >> 2; + break; + case MC_PMG_CMD_EMRS >> 2: + *out_reg = MC_SEQ_PMG_CMD_EMRS_LP >> 2; + break; + case MC_PMG_CMD_MRS >> 2: + *out_reg = MC_SEQ_PMG_CMD_MRS_LP >> 2; + break; + case MC_PMG_CMD_MRS1 >> 2: + *out_reg = MC_SEQ_PMG_CMD_MRS1_LP >> 2; + break; + case MC_SEQ_PMG_TIMING >> 2: + *out_reg = MC_SEQ_PMG_TIMING_LP >> 2; + break; + case MC_PMG_CMD_MRS2 >> 2: + *out_reg = MC_SEQ_PMG_CMD_MRS2_LP >> 2; + break; + case MC_SEQ_WR_CTL_2 >> 2: + *out_reg = MC_SEQ_WR_CTL_2_LP >> 2; + break; + default: + result = false; + break; + } + + return result; +} + +static void ci_set_valid_flag(struct ci_mc_reg_table *table) +{ + u8 i, j; + + for (i = 0; i < table->last; i++) { + for (j = 1; j < table->num_entries; j++) { + if (table->mc_reg_table_entry[j-1].mc_data[i] != + table->mc_reg_table_entry[j].mc_data[i]) { + table->valid_flag |= 1 << i; + break; + } + } + } +} + +static void ci_set_s0_mc_reg_index(struct ci_mc_reg_table *table) +{ + u32 i; + u16 address; + + for (i = 0; i < table->last; i++) { + table->mc_reg_address[i].s0 = + ci_check_s0_mc_reg_index(table->mc_reg_address[i].s1, &address) ? + address : table->mc_reg_address[i].s1; + } +} + +static int ci_copy_vbios_mc_reg_table(const struct atom_mc_reg_table *table, + struct ci_mc_reg_table *ci_table) +{ + u8 i, j; + + if (table->last > SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE) + return -EINVAL; + if (table->num_entries > MAX_AC_TIMING_ENTRIES) + return -EINVAL; + + for (i = 0; i < table->last; i++) + ci_table->mc_reg_address[i].s1 = table->mc_reg_address[i].s1; + + ci_table->last = table->last; + + for (i = 0; i < table->num_entries; i++) { + ci_table->mc_reg_table_entry[i].mclk_max = + table->mc_reg_table_entry[i].mclk_max; + for (j = 0; j < table->last; j++) + ci_table->mc_reg_table_entry[i].mc_data[j] = + table->mc_reg_table_entry[i].mc_data[j]; + } + ci_table->num_entries = table->num_entries; + + return 0; +} + +static int ci_initialize_mc_reg_table(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct atom_mc_reg_table *table; + struct ci_mc_reg_table *ci_table = &pi->mc_reg_table; + u8 module_index = rv770_get_memory_module_index(rdev); + int ret; + + table = kzalloc(sizeof(struct atom_mc_reg_table), GFP_KERNEL); + if (!table) + return -ENOMEM; + + WREG32(MC_SEQ_RAS_TIMING_LP, RREG32(MC_SEQ_RAS_TIMING)); + WREG32(MC_SEQ_CAS_TIMING_LP, RREG32(MC_SEQ_CAS_TIMING)); + WREG32(MC_SEQ_DLL_STBY_LP, RREG32(MC_SEQ_DLL_STBY)); + WREG32(MC_SEQ_G5PDX_CMD0_LP, RREG32(MC_SEQ_G5PDX_CMD0)); + WREG32(MC_SEQ_G5PDX_CMD1_LP, RREG32(MC_SEQ_G5PDX_CMD1)); + WREG32(MC_SEQ_G5PDX_CTRL_LP, RREG32(MC_SEQ_G5PDX_CTRL)); + WREG32(MC_SEQ_PMG_DVS_CMD_LP, RREG32(MC_SEQ_PMG_DVS_CMD)); + WREG32(MC_SEQ_PMG_DVS_CTL_LP, RREG32(MC_SEQ_PMG_DVS_CTL)); + WREG32(MC_SEQ_MISC_TIMING_LP, RREG32(MC_SEQ_MISC_TIMING)); + WREG32(MC_SEQ_MISC_TIMING2_LP, RREG32(MC_SEQ_MISC_TIMING2)); + WREG32(MC_SEQ_PMG_CMD_EMRS_LP, RREG32(MC_PMG_CMD_EMRS)); + WREG32(MC_SEQ_PMG_CMD_MRS_LP, RREG32(MC_PMG_CMD_MRS)); + WREG32(MC_SEQ_PMG_CMD_MRS1_LP, RREG32(MC_PMG_CMD_MRS1)); + WREG32(MC_SEQ_WR_CTL_D0_LP, RREG32(MC_SEQ_WR_CTL_D0)); + WREG32(MC_SEQ_WR_CTL_D1_LP, RREG32(MC_SEQ_WR_CTL_D1)); + WREG32(MC_SEQ_RD_CTL_D0_LP, RREG32(MC_SEQ_RD_CTL_D0)); + WREG32(MC_SEQ_RD_CTL_D1_LP, RREG32(MC_SEQ_RD_CTL_D1)); + WREG32(MC_SEQ_PMG_TIMING_LP, RREG32(MC_SEQ_PMG_TIMING)); + WREG32(MC_SEQ_PMG_CMD_MRS2_LP, RREG32(MC_PMG_CMD_MRS2)); + WREG32(MC_SEQ_WR_CTL_2_LP, RREG32(MC_SEQ_WR_CTL_2)); + + ret = radeon_atom_init_mc_reg_table(rdev, module_index, table); + if (ret) + goto init_mc_done; + + ret = ci_copy_vbios_mc_reg_table(table, ci_table); + if (ret) + goto init_mc_done; + + ci_set_s0_mc_reg_index(ci_table); + + ret = ci_set_mc_special_registers(rdev, ci_table); + if (ret) + goto init_mc_done; + + ci_set_valid_flag(ci_table); + +init_mc_done: + kfree(table); + + return ret; +} + +static int ci_populate_mc_reg_addresses(struct radeon_device *rdev, + SMU7_Discrete_MCRegisters *mc_reg_table) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + u32 i, j; + + for (i = 0, j = 0; j < pi->mc_reg_table.last; j++) { + if (pi->mc_reg_table.valid_flag & (1 << j)) { + if (i >= SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE) + return -EINVAL; + mc_reg_table->address[i].s0 = cpu_to_be16(pi->mc_reg_table.mc_reg_address[j].s0); + mc_reg_table->address[i].s1 = cpu_to_be16(pi->mc_reg_table.mc_reg_address[j].s1); + i++; + } + } + + mc_reg_table->last = (u8)i; + + return 0; +} + +static void ci_convert_mc_registers(const struct ci_mc_reg_entry *entry, + SMU7_Discrete_MCRegisterSet *data, + u32 num_entries, u32 valid_flag) +{ + u32 i, j; + + for (i = 0, j = 0; j < num_entries; j++) { + if (valid_flag & (1 << j)) { + data->value[i] = cpu_to_be32(entry->mc_data[j]); + i++; + } + } +} + +static void ci_convert_mc_reg_table_entry_to_smc(struct radeon_device *rdev, + const u32 memory_clock, + SMU7_Discrete_MCRegisterSet *mc_reg_table_data) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + u32 i = 0; + + for(i = 0; i < pi->mc_reg_table.num_entries; i++) { + if (memory_clock <= pi->mc_reg_table.mc_reg_table_entry[i].mclk_max) + break; + } + + if ((i == pi->mc_reg_table.num_entries) && (i > 0)) + --i; + + ci_convert_mc_registers(&pi->mc_reg_table.mc_reg_table_entry[i], + mc_reg_table_data, pi->mc_reg_table.last, + pi->mc_reg_table.valid_flag); +} + +static void ci_convert_mc_reg_table_to_smc(struct radeon_device *rdev, + SMU7_Discrete_MCRegisters *mc_reg_table) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + u32 i; + + for (i = 0; i < pi->dpm_table.mclk_table.count; i++) + ci_convert_mc_reg_table_entry_to_smc(rdev, + pi->dpm_table.mclk_table.dpm_levels[i].value, + &mc_reg_table->data[i]); +} + +static int ci_populate_initial_mc_reg_table(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + int ret; + + memset(&pi->smc_mc_reg_table, 0, sizeof(SMU7_Discrete_MCRegisters)); + + ret = ci_populate_mc_reg_addresses(rdev, &pi->smc_mc_reg_table); + if (ret) + return ret; + ci_convert_mc_reg_table_to_smc(rdev, &pi->smc_mc_reg_table); + + return ci_copy_bytes_to_smc(rdev, + pi->mc_reg_table_start, + (u8 *)&pi->smc_mc_reg_table, + sizeof(SMU7_Discrete_MCRegisters), + pi->sram_end); +} + +static int ci_update_and_upload_mc_reg_table(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + + if (!(pi->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_MCLK)) + return 0; + + memset(&pi->smc_mc_reg_table, 0, sizeof(SMU7_Discrete_MCRegisters)); + + ci_convert_mc_reg_table_to_smc(rdev, &pi->smc_mc_reg_table); + + return ci_copy_bytes_to_smc(rdev, + pi->mc_reg_table_start + + offsetof(SMU7_Discrete_MCRegisters, data[0]), + (u8 *)&pi->smc_mc_reg_table.data[0], + sizeof(SMU7_Discrete_MCRegisterSet) * + pi->dpm_table.mclk_table.count, + pi->sram_end); +} + +static void ci_enable_voltage_control(struct radeon_device *rdev) +{ + u32 tmp = RREG32_SMC(GENERAL_PWRMGT); + + tmp |= VOLT_PWRMGT_EN; + WREG32_SMC(GENERAL_PWRMGT, tmp); +} + +static enum radeon_pcie_gen ci_get_maximum_link_speed(struct radeon_device *rdev, + struct radeon_ps *radeon_state) +{ + struct ci_ps *state = ci_get_ps(radeon_state); + int i; + u16 pcie_speed, max_speed = 0; + + for (i = 0; i < state->performance_level_count; i++) { + pcie_speed = state->performance_levels[i].pcie_gen; + if (max_speed < pcie_speed) + max_speed = pcie_speed; + } + + return max_speed; +} + +static u16 ci_get_current_pcie_speed(struct radeon_device *rdev) +{ + u32 speed_cntl = 0; + + speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL) & LC_CURRENT_DATA_RATE_MASK; + speed_cntl >>= LC_CURRENT_DATA_RATE_SHIFT; + + return (u16)speed_cntl; +} + +static int ci_get_current_pcie_lane_number(struct radeon_device *rdev) +{ + u32 link_width = 0; + + link_width = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL) & LC_LINK_WIDTH_RD_MASK; + link_width >>= LC_LINK_WIDTH_RD_SHIFT; + + switch (link_width) { + case RADEON_PCIE_LC_LINK_WIDTH_X1: + return 1; + case RADEON_PCIE_LC_LINK_WIDTH_X2: + return 2; + case RADEON_PCIE_LC_LINK_WIDTH_X4: + return 4; + case RADEON_PCIE_LC_LINK_WIDTH_X8: + return 8; + case RADEON_PCIE_LC_LINK_WIDTH_X12: + /* not actually supported */ + return 12; + case RADEON_PCIE_LC_LINK_WIDTH_X0: + case RADEON_PCIE_LC_LINK_WIDTH_X16: + default: + return 16; + } +} + +static void ci_request_link_speed_change_before_state_change(struct radeon_device *rdev, + struct radeon_ps *radeon_new_state, + struct radeon_ps *radeon_current_state) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + enum radeon_pcie_gen target_link_speed = + ci_get_maximum_link_speed(rdev, radeon_new_state); + enum radeon_pcie_gen current_link_speed; + + if (pi->force_pcie_gen == RADEON_PCIE_GEN_INVALID) + current_link_speed = ci_get_maximum_link_speed(rdev, radeon_current_state); + else + current_link_speed = pi->force_pcie_gen; + + pi->force_pcie_gen = RADEON_PCIE_GEN_INVALID; + pi->pspp_notify_required = false; + if (target_link_speed > current_link_speed) { + switch (target_link_speed) { + case RADEON_PCIE_GEN3: + if (radeon_acpi_pcie_performance_request(rdev, PCIE_PERF_REQ_PECI_GEN3, false) == 0) + break; + pi->force_pcie_gen = RADEON_PCIE_GEN2; + if (current_link_speed == RADEON_PCIE_GEN2) + break; + case RADEON_PCIE_GEN2: + if (radeon_acpi_pcie_performance_request(rdev, PCIE_PERF_REQ_PECI_GEN2, false) == 0) + break; + default: + pi->force_pcie_gen = ci_get_current_pcie_speed(rdev); + break; + } + } else { + if (target_link_speed < current_link_speed) + pi->pspp_notify_required = true; + } +} + +static void ci_notify_link_speed_change_after_state_change(struct radeon_device *rdev, + struct radeon_ps *radeon_new_state, + struct radeon_ps *radeon_current_state) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + enum radeon_pcie_gen target_link_speed = + ci_get_maximum_link_speed(rdev, radeon_new_state); + u8 request; + + if (pi->pspp_notify_required) { + if (target_link_speed == RADEON_PCIE_GEN3) + request = PCIE_PERF_REQ_PECI_GEN3; + else if (target_link_speed == RADEON_PCIE_GEN2) + request = PCIE_PERF_REQ_PECI_GEN2; + else + request = PCIE_PERF_REQ_PECI_GEN1; + + if ((request == PCIE_PERF_REQ_PECI_GEN1) && + (ci_get_current_pcie_speed(rdev) > 0)) + return; + + radeon_acpi_pcie_performance_request(rdev, request, false); + } +} + +static int ci_set_private_data_variables_based_on_pptable(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct radeon_clock_voltage_dependency_table *allowed_sclk_vddc_table = + &rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk; + struct radeon_clock_voltage_dependency_table *allowed_mclk_vddc_table = + &rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk; + struct radeon_clock_voltage_dependency_table *allowed_mclk_vddci_table = + &rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk; + + if (allowed_sclk_vddc_table == NULL) + return -EINVAL; + if (allowed_sclk_vddc_table->count < 1) + return -EINVAL; + if (allowed_mclk_vddc_table == NULL) + return -EINVAL; + if (allowed_mclk_vddc_table->count < 1) + return -EINVAL; + if (allowed_mclk_vddci_table == NULL) + return -EINVAL; + if (allowed_mclk_vddci_table->count < 1) + return -EINVAL; + + pi->min_vddc_in_pp_table = allowed_sclk_vddc_table->entries[0].v; + pi->max_vddc_in_pp_table = + allowed_sclk_vddc_table->entries[allowed_sclk_vddc_table->count - 1].v; + + pi->min_vddci_in_pp_table = allowed_mclk_vddci_table->entries[0].v; + pi->max_vddci_in_pp_table = + allowed_mclk_vddci_table->entries[allowed_mclk_vddci_table->count - 1].v; + + rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk = + allowed_sclk_vddc_table->entries[allowed_sclk_vddc_table->count - 1].clk; + rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.mclk = + allowed_mclk_vddc_table->entries[allowed_sclk_vddc_table->count - 1].clk; + rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.vddc = + allowed_sclk_vddc_table->entries[allowed_sclk_vddc_table->count - 1].v; + rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.vddci = + allowed_mclk_vddci_table->entries[allowed_mclk_vddci_table->count - 1].v; + + return 0; +} + +static void ci_patch_with_vddc_leakage(struct radeon_device *rdev, u16 *vddc) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct ci_leakage_voltage *leakage_table = &pi->vddc_leakage; + u32 leakage_index; + + for (leakage_index = 0; leakage_index < leakage_table->count; leakage_index++) { + if (leakage_table->leakage_id[leakage_index] == *vddc) { + *vddc = leakage_table->actual_voltage[leakage_index]; + break; + } + } +} + +static void ci_patch_with_vddci_leakage(struct radeon_device *rdev, u16 *vddci) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct ci_leakage_voltage *leakage_table = &pi->vddci_leakage; + u32 leakage_index; + + for (leakage_index = 0; leakage_index < leakage_table->count; leakage_index++) { + if (leakage_table->leakage_id[leakage_index] == *vddci) { + *vddci = leakage_table->actual_voltage[leakage_index]; + break; + } + } +} + +static void ci_patch_clock_voltage_dependency_table_with_vddc_leakage(struct radeon_device *rdev, + struct radeon_clock_voltage_dependency_table *table) +{ + u32 i; + + if (table) { + for (i = 0; i < table->count; i++) + ci_patch_with_vddc_leakage(rdev, &table->entries[i].v); + } +} + +static void ci_patch_clock_voltage_dependency_table_with_vddci_leakage(struct radeon_device *rdev, + struct radeon_clock_voltage_dependency_table *table) +{ + u32 i; + + if (table) { + for (i = 0; i < table->count; i++) + ci_patch_with_vddci_leakage(rdev, &table->entries[i].v); + } +} + +static void ci_patch_vce_clock_voltage_dependency_table_with_vddc_leakage(struct radeon_device *rdev, + struct radeon_vce_clock_voltage_dependency_table *table) +{ + u32 i; + + if (table) { + for (i = 0; i < table->count; i++) + ci_patch_with_vddc_leakage(rdev, &table->entries[i].v); + } +} + +static void ci_patch_uvd_clock_voltage_dependency_table_with_vddc_leakage(struct radeon_device *rdev, + struct radeon_uvd_clock_voltage_dependency_table *table) +{ + u32 i; + + if (table) { + for (i = 0; i < table->count; i++) + ci_patch_with_vddc_leakage(rdev, &table->entries[i].v); + } +} + +static void ci_patch_vddc_phase_shed_limit_table_with_vddc_leakage(struct radeon_device *rdev, + struct radeon_phase_shedding_limits_table *table) +{ + u32 i; + + if (table) { + for (i = 0; i < table->count; i++) + ci_patch_with_vddc_leakage(rdev, &table->entries[i].voltage); + } +} + +static void ci_patch_clock_voltage_limits_with_vddc_leakage(struct radeon_device *rdev, + struct radeon_clock_and_voltage_limits *table) +{ + if (table) { + ci_patch_with_vddc_leakage(rdev, (u16 *)&table->vddc); + ci_patch_with_vddci_leakage(rdev, (u16 *)&table->vddci); + } +} + +static void ci_patch_cac_leakage_table_with_vddc_leakage(struct radeon_device *rdev, + struct radeon_cac_leakage_table *table) +{ + u32 i; + + if (table) { + for (i = 0; i < table->count; i++) + ci_patch_with_vddc_leakage(rdev, &table->entries[i].vddc); + } +} + +static void ci_patch_dependency_tables_with_leakage(struct radeon_device *rdev) +{ + + ci_patch_clock_voltage_dependency_table_with_vddc_leakage(rdev, + &rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk); + ci_patch_clock_voltage_dependency_table_with_vddc_leakage(rdev, + &rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk); + ci_patch_clock_voltage_dependency_table_with_vddc_leakage(rdev, + &rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk); + ci_patch_clock_voltage_dependency_table_with_vddci_leakage(rdev, + &rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk); + ci_patch_vce_clock_voltage_dependency_table_with_vddc_leakage(rdev, + &rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table); + ci_patch_uvd_clock_voltage_dependency_table_with_vddc_leakage(rdev, + &rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table); + ci_patch_clock_voltage_dependency_table_with_vddc_leakage(rdev, + &rdev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table); + ci_patch_clock_voltage_dependency_table_with_vddc_leakage(rdev, + &rdev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table); + ci_patch_vddc_phase_shed_limit_table_with_vddc_leakage(rdev, + &rdev->pm.dpm.dyn_state.phase_shedding_limits_table); + ci_patch_clock_voltage_limits_with_vddc_leakage(rdev, + &rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac); + ci_patch_clock_voltage_limits_with_vddc_leakage(rdev, + &rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc); + ci_patch_cac_leakage_table_with_vddc_leakage(rdev, + &rdev->pm.dpm.dyn_state.cac_leakage_table); + +} + +static void ci_get_memory_type(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + u32 tmp; + + tmp = RREG32(MC_SEQ_MISC0); + + if (((tmp & MC_SEQ_MISC0_GDDR5_MASK) >> MC_SEQ_MISC0_GDDR5_SHIFT) == + MC_SEQ_MISC0_GDDR5_VALUE) + pi->mem_gddr5 = true; + else + pi->mem_gddr5 = false; + +} + +void ci_update_current_ps(struct radeon_device *rdev, + struct radeon_ps *rps) +{ + struct ci_ps *new_ps = ci_get_ps(rps); + struct ci_power_info *pi = ci_get_pi(rdev); + + pi->current_rps = *rps; + pi->current_ps = *new_ps; + pi->current_rps.ps_priv = &pi->current_ps; +} + +void ci_update_requested_ps(struct radeon_device *rdev, + struct radeon_ps *rps) +{ + struct ci_ps *new_ps = ci_get_ps(rps); + struct ci_power_info *pi = ci_get_pi(rdev); + + pi->requested_rps = *rps; + pi->requested_ps = *new_ps; + pi->requested_rps.ps_priv = &pi->requested_ps; +} + +int ci_dpm_pre_set_power_state(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct radeon_ps requested_ps = *rdev->pm.dpm.requested_ps; + struct radeon_ps *new_ps = &requested_ps; + + ci_update_requested_ps(rdev, new_ps); + + ci_apply_state_adjust_rules(rdev, &pi->requested_rps); + + return 0; +} + +void ci_dpm_post_set_power_state(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct radeon_ps *new_ps = &pi->requested_rps; + + ci_update_current_ps(rdev, new_ps); +} + + +void ci_dpm_setup_asic(struct radeon_device *rdev) +{ + ci_read_clock_registers(rdev); + ci_get_memory_type(rdev); + ci_enable_acpi_power_management(rdev); + ci_init_sclk_t(rdev); +} + +int ci_dpm_enable(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct radeon_ps *boot_ps = rdev->pm.dpm.boot_ps; + int ret; + + if (ci_is_smc_running(rdev)) + return -EINVAL; + if (pi->voltage_control != CISLANDS_VOLTAGE_CONTROL_NONE) { + ci_enable_voltage_control(rdev); + ret = ci_construct_voltage_tables(rdev); + if (ret) { + DRM_ERROR("ci_construct_voltage_tables failed\n"); + return ret; + } + } + if (pi->caps_dynamic_ac_timing) { + ret = ci_initialize_mc_reg_table(rdev); + if (ret) + pi->caps_dynamic_ac_timing = false; + } + if (pi->dynamic_ss) + ci_enable_spread_spectrum(rdev, true); + if (pi->thermal_protection) + ci_enable_thermal_protection(rdev, true); + ci_program_sstp(rdev); + ci_enable_display_gap(rdev); + ci_program_vc(rdev); + ret = ci_upload_firmware(rdev); + if (ret) { + DRM_ERROR("ci_upload_firmware failed\n"); + return ret; + } + ret = ci_process_firmware_header(rdev); + if (ret) { + DRM_ERROR("ci_process_firmware_header failed\n"); + return ret; + } + ret = ci_initial_switch_from_arb_f0_to_f1(rdev); + if (ret) { + DRM_ERROR("ci_initial_switch_from_arb_f0_to_f1 failed\n"); + return ret; + } + ret = ci_init_smc_table(rdev); + if (ret) { + DRM_ERROR("ci_init_smc_table failed\n"); + return ret; + } + ret = ci_init_arb_table_index(rdev); + if (ret) { + DRM_ERROR("ci_init_arb_table_index failed\n"); + return ret; + } + if (pi->caps_dynamic_ac_timing) { + ret = ci_populate_initial_mc_reg_table(rdev); + if (ret) { + DRM_ERROR("ci_populate_initial_mc_reg_table failed\n"); + return ret; + } + } + ret = ci_populate_pm_base(rdev); + if (ret) { + DRM_ERROR("ci_populate_pm_base failed\n"); + return ret; + } + ci_dpm_start_smc(rdev); + ci_enable_vr_hot_gpio_interrupt(rdev); + ret = ci_notify_smc_display_change(rdev, false); + if (ret) { + DRM_ERROR("ci_notify_smc_display_change failed\n"); + return ret; + } + ci_enable_sclk_control(rdev, true); + ret = ci_enable_ulv(rdev, true); + if (ret) { + DRM_ERROR("ci_enable_ulv failed\n"); + return ret; + } + ret = ci_enable_ds_master_switch(rdev, true); + if (ret) { + DRM_ERROR("ci_enable_ds_master_switch failed\n"); + return ret; + } + ret = ci_start_dpm(rdev); + if (ret) { + DRM_ERROR("ci_start_dpm failed\n"); + return ret; + } + ret = ci_enable_didt(rdev, true); + if (ret) { + DRM_ERROR("ci_enable_didt failed\n"); + return ret; + } + ret = ci_enable_smc_cac(rdev, true); + if (ret) { + DRM_ERROR("ci_enable_smc_cac failed\n"); + return ret; + } + ret = ci_enable_power_containment(rdev, true); + if (ret) { + DRM_ERROR("ci_enable_power_containment failed\n"); + return ret; + } + if (rdev->irq.installed && + r600_is_internal_thermal_sensor(rdev->pm.int_thermal_type)) { +#if 0 + PPSMC_Result result; +#endif + ret = ci_set_thermal_temperature_range(rdev, R600_TEMP_RANGE_MIN, R600_TEMP_RANGE_MAX); + if (ret) { + DRM_ERROR("ci_set_thermal_temperature_range failed\n"); + return ret; + } + rdev->irq.dpm_thermal = true; + radeon_irq_set(rdev); +#if 0 + result = ci_send_msg_to_smc(rdev, PPSMC_MSG_EnableThermalInterrupt); + + if (result != PPSMC_Result_OK) + DRM_DEBUG_KMS("Could not enable thermal interrupts.\n"); +#endif + } + + ci_enable_auto_throttle_source(rdev, RADEON_DPM_AUTO_THROTTLE_SRC_THERMAL, true); + + ci_update_current_ps(rdev, boot_ps); + + return 0; +} + +void ci_dpm_disable(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct radeon_ps *boot_ps = rdev->pm.dpm.boot_ps; + + if (!ci_is_smc_running(rdev)) + return; + + if (pi->thermal_protection) + ci_enable_thermal_protection(rdev, false); + ci_enable_power_containment(rdev, false); + ci_enable_smc_cac(rdev, false); + ci_enable_didt(rdev, false); + ci_enable_spread_spectrum(rdev, false); + ci_enable_auto_throttle_source(rdev, RADEON_DPM_AUTO_THROTTLE_SRC_THERMAL, false); + ci_stop_dpm(rdev); + ci_enable_ds_master_switch(rdev, true); + ci_enable_ulv(rdev, false); + ci_clear_vc(rdev); + ci_reset_to_default(rdev); + ci_dpm_stop_smc(rdev); + ci_force_switch_to_arb_f0(rdev); + + ci_update_current_ps(rdev, boot_ps); +} + +int ci_dpm_set_power_state(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct radeon_ps *new_ps = &pi->requested_rps; + struct radeon_ps *old_ps = &pi->current_rps; + int ret; + + ci_find_dpm_states_clocks_in_dpm_table(rdev, new_ps); + if (pi->pcie_performance_request) + ci_request_link_speed_change_before_state_change(rdev, new_ps, old_ps); + ret = ci_freeze_sclk_mclk_dpm(rdev); + if (ret) { + DRM_ERROR("ci_freeze_sclk_mclk_dpm failed\n"); + return ret; + } + ret = ci_populate_and_upload_sclk_mclk_dpm_levels(rdev, new_ps); + if (ret) { + DRM_ERROR("ci_populate_and_upload_sclk_mclk_dpm_levels failed\n"); + return ret; + } + ret = ci_generate_dpm_level_enable_mask(rdev, new_ps); + if (ret) { + DRM_ERROR("ci_generate_dpm_level_enable_mask failed\n"); + return ret; + } +#if 0 + ret = ci_update_vce_dpm(rdev, new_ps, old_ps); + if (ret) { + DRM_ERROR("ci_update_vce_dpm failed\n"); + return ret; + } +#endif + ret = ci_update_uvd_dpm(rdev, false); + if (ret) { + DRM_ERROR("ci_update_uvd_dpm failed\n"); + return ret; + } + ret = ci_update_sclk_t(rdev); + if (ret) { + DRM_ERROR("ci_update_sclk_t failed\n"); + return ret; + } + if (pi->caps_dynamic_ac_timing) { + ret = ci_update_and_upload_mc_reg_table(rdev); + if (ret) { + DRM_ERROR("ci_update_and_upload_mc_reg_table failed\n"); + return ret; + } + } + ret = ci_program_memory_timing_parameters(rdev); + if (ret) { + DRM_ERROR("ci_program_memory_timing_parameters failed\n"); + return ret; + } + ret = ci_unfreeze_sclk_mclk_dpm(rdev); + if (ret) { + DRM_ERROR("ci_unfreeze_sclk_mclk_dpm failed\n"); + return ret; + } + ret = ci_upload_dpm_level_enable_mask(rdev); + if (ret) { + DRM_ERROR("ci_upload_dpm_level_enable_mask failed\n"); + return ret; + } + if (pi->pcie_performance_request) + ci_notify_link_speed_change_after_state_change(rdev, new_ps, old_ps); + + return 0; +} + +int ci_dpm_power_control_set_level(struct radeon_device *rdev) +{ + return ci_power_control_set_level(rdev); +} + +void ci_dpm_reset_asic(struct radeon_device *rdev) +{ + ci_set_boot_state(rdev); +} + +void ci_dpm_display_configuration_changed(struct radeon_device *rdev) +{ + ci_program_display_gap(rdev); +} + +union power_info { + struct _ATOM_POWERPLAY_INFO info; + struct _ATOM_POWERPLAY_INFO_V2 info_2; + struct _ATOM_POWERPLAY_INFO_V3 info_3; + struct _ATOM_PPLIB_POWERPLAYTABLE pplib; + struct _ATOM_PPLIB_POWERPLAYTABLE2 pplib2; + struct _ATOM_PPLIB_POWERPLAYTABLE3 pplib3; +}; + +union pplib_clock_info { + struct _ATOM_PPLIB_R600_CLOCK_INFO r600; + struct _ATOM_PPLIB_RS780_CLOCK_INFO rs780; + struct _ATOM_PPLIB_EVERGREEN_CLOCK_INFO evergreen; + struct _ATOM_PPLIB_SUMO_CLOCK_INFO sumo; + struct _ATOM_PPLIB_SI_CLOCK_INFO si; + struct _ATOM_PPLIB_CI_CLOCK_INFO ci; +}; + +union pplib_power_state { + struct _ATOM_PPLIB_STATE v1; + struct _ATOM_PPLIB_STATE_V2 v2; +}; + +static void ci_parse_pplib_non_clock_info(struct radeon_device *rdev, + struct radeon_ps *rps, + struct _ATOM_PPLIB_NONCLOCK_INFO *non_clock_info, + u8 table_rev) +{ + rps->caps = le32_to_cpu(non_clock_info->ulCapsAndSettings); + rps->class = le16_to_cpu(non_clock_info->usClassification); + rps->class2 = le16_to_cpu(non_clock_info->usClassification2); + + if (ATOM_PPLIB_NONCLOCKINFO_VER1 < table_rev) { + rps->vclk = le32_to_cpu(non_clock_info->ulVCLK); + rps->dclk = le32_to_cpu(non_clock_info->ulDCLK); + } else { + rps->vclk = 0; + rps->dclk = 0; + } + + if (rps->class & ATOM_PPLIB_CLASSIFICATION_BOOT) + rdev->pm.dpm.boot_ps = rps; + if (rps->class & ATOM_PPLIB_CLASSIFICATION_UVDSTATE) + rdev->pm.dpm.uvd_ps = rps; +} + +static void ci_parse_pplib_clock_info(struct radeon_device *rdev, + struct radeon_ps *rps, int index, + union pplib_clock_info *clock_info) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct ci_ps *ps = ci_get_ps(rps); + struct ci_pl *pl = &ps->performance_levels[index]; + + ps->performance_level_count = index + 1; + + pl->sclk = le16_to_cpu(clock_info->ci.usEngineClockLow); + pl->sclk |= clock_info->ci.ucEngineClockHigh << 16; + pl->mclk = le16_to_cpu(clock_info->ci.usMemoryClockLow); + pl->mclk |= clock_info->ci.ucMemoryClockHigh << 16; + + pl->pcie_gen = r600_get_pcie_gen_support(rdev, + pi->sys_pcie_mask, + pi->vbios_boot_state.pcie_gen_bootup_value, + clock_info->ci.ucPCIEGen); + pl->pcie_lane = r600_get_pcie_lane_support(rdev, + pi->vbios_boot_state.pcie_lane_bootup_value, + le16_to_cpu(clock_info->ci.usPCIELane)); + + if (rps->class & ATOM_PPLIB_CLASSIFICATION_ACPI) { + pi->acpi_pcie_gen = pl->pcie_gen; + } + + if (rps->class2 & ATOM_PPLIB_CLASSIFICATION2_ULV) { + pi->ulv.supported = true; + pi->ulv.pl = *pl; + pi->ulv.cg_ulv_parameter = CISLANDS_CGULVPARAMETER_DFLT; + } + + /* patch up boot state */ + if (rps->class & ATOM_PPLIB_CLASSIFICATION_BOOT) { + pl->mclk = pi->vbios_boot_state.mclk_bootup_value; + pl->sclk = pi->vbios_boot_state.sclk_bootup_value; + pl->pcie_gen = pi->vbios_boot_state.pcie_gen_bootup_value; + pl->pcie_lane = pi->vbios_boot_state.pcie_lane_bootup_value; + } + + switch (rps->class & ATOM_PPLIB_CLASSIFICATION_UI_MASK) { + case ATOM_PPLIB_CLASSIFICATION_UI_BATTERY: + pi->use_pcie_powersaving_levels = true; + if (pi->pcie_gen_powersaving.max < pl->pcie_gen) + pi->pcie_gen_powersaving.max = pl->pcie_gen; + if (pi->pcie_gen_powersaving.min > pl->pcie_gen) + pi->pcie_gen_powersaving.min = pl->pcie_gen; + if (pi->pcie_lane_powersaving.max < pl->pcie_lane) + pi->pcie_lane_powersaving.max = pl->pcie_lane; + if (pi->pcie_lane_powersaving.min > pl->pcie_lane) + pi->pcie_lane_powersaving.min = pl->pcie_lane; + break; + case ATOM_PPLIB_CLASSIFICATION_UI_PERFORMANCE: + pi->use_pcie_performance_levels = true; + if (pi->pcie_gen_performance.max < pl->pcie_gen) + pi->pcie_gen_performance.max = pl->pcie_gen; + if (pi->pcie_gen_performance.min > pl->pcie_gen) + pi->pcie_gen_performance.min = pl->pcie_gen; + if (pi->pcie_lane_performance.max < pl->pcie_lane) + pi->pcie_lane_performance.max = pl->pcie_lane; + if (pi->pcie_lane_performance.min > pl->pcie_lane) + pi->pcie_lane_performance.min = pl->pcie_lane; + break; + default: + break; + } +} + +static int ci_parse_power_table(struct radeon_device *rdev) +{ + struct radeon_mode_info *mode_info = &rdev->mode_info; + struct _ATOM_PPLIB_NONCLOCK_INFO *non_clock_info; + union pplib_power_state *power_state; + int i, j, k, non_clock_array_index, clock_array_index; + union pplib_clock_info *clock_info; + struct _StateArray *state_array; + struct _ClockInfoArray *clock_info_array; + struct _NonClockInfoArray *non_clock_info_array; + union power_info *power_info; + int index = GetIndexIntoMasterTable(DATA, PowerPlayInfo); + u16 data_offset; + u8 frev, crev; + u8 *power_state_offset; + struct ci_ps *ps; + + if (!atom_parse_data_header(mode_info->atom_context, index, NULL, + &frev, &crev, &data_offset)) + return -EINVAL; + power_info = (union power_info *)(mode_info->atom_context->bios + data_offset); + + state_array = (struct _StateArray *) + (mode_info->atom_context->bios + data_offset + + le16_to_cpu(power_info->pplib.usStateArrayOffset)); + clock_info_array = (struct _ClockInfoArray *) + (mode_info->atom_context->bios + data_offset + + le16_to_cpu(power_info->pplib.usClockInfoArrayOffset)); + non_clock_info_array = (struct _NonClockInfoArray *) + (mode_info->atom_context->bios + data_offset + + le16_to_cpu(power_info->pplib.usNonClockInfoArrayOffset)); + + rdev->pm.dpm.ps = kzalloc(sizeof(struct radeon_ps) * + state_array->ucNumEntries, GFP_KERNEL); + if (!rdev->pm.dpm.ps) + return -ENOMEM; + power_state_offset = (u8 *)state_array->states; + rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); + rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); + rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); + for (i = 0; i < state_array->ucNumEntries; i++) { + power_state = (union pplib_power_state *)power_state_offset; + non_clock_array_index = power_state->v2.nonClockInfoIndex; + non_clock_info = (struct _ATOM_PPLIB_NONCLOCK_INFO *) + &non_clock_info_array->nonClockInfo[non_clock_array_index]; + if (!rdev->pm.power_state[i].clock_info) + return -EINVAL; + ps = kzalloc(sizeof(struct ci_ps), GFP_KERNEL); + if (ps == NULL) { + kfree(rdev->pm.dpm.ps); + return -ENOMEM; + } + rdev->pm.dpm.ps[i].ps_priv = ps; + ci_parse_pplib_non_clock_info(rdev, &rdev->pm.dpm.ps[i], + non_clock_info, + non_clock_info_array->ucEntrySize); + k = 0; + for (j = 0; j < power_state->v2.ucNumDPMLevels; j++) { + clock_array_index = power_state->v2.clockInfoIndex[j]; + if (clock_array_index >= clock_info_array->ucNumEntries) + continue; + if (k >= CISLANDS_MAX_HARDWARE_POWERLEVELS) + break; + clock_info = (union pplib_clock_info *) + &clock_info_array->clockInfo[clock_array_index * clock_info_array->ucEntrySize]; + ci_parse_pplib_clock_info(rdev, + &rdev->pm.dpm.ps[i], k, + clock_info); + k++; + } + power_state_offset += 2 + power_state->v2.ucNumDPMLevels; + } + rdev->pm.dpm.num_ps = state_array->ucNumEntries; + return 0; +} + +int ci_get_vbios_boot_values(struct radeon_device *rdev, + struct ci_vbios_boot_state *boot_state) +{ + struct radeon_mode_info *mode_info = &rdev->mode_info; + int index = GetIndexIntoMasterTable(DATA, FirmwareInfo); + ATOM_FIRMWARE_INFO_V2_2 *firmware_info; + u8 frev, crev; + u16 data_offset; + + if (atom_parse_data_header(mode_info->atom_context, index, NULL, + &frev, &crev, &data_offset)) { + firmware_info = + (ATOM_FIRMWARE_INFO_V2_2 *)(mode_info->atom_context->bios + + data_offset); + boot_state->mvdd_bootup_value = le16_to_cpu(firmware_info->usBootUpMVDDCVoltage); + boot_state->vddc_bootup_value = le16_to_cpu(firmware_info->usBootUpVDDCVoltage); + boot_state->vddci_bootup_value = le16_to_cpu(firmware_info->usBootUpVDDCIVoltage); + boot_state->pcie_gen_bootup_value = ci_get_current_pcie_speed(rdev); + boot_state->pcie_lane_bootup_value = ci_get_current_pcie_lane_number(rdev); + boot_state->sclk_bootup_value = le32_to_cpu(firmware_info->ulDefaultEngineClock); + boot_state->mclk_bootup_value = le32_to_cpu(firmware_info->ulDefaultMemoryClock); + + return 0; + } + return -EINVAL; +} + +void ci_dpm_fini(struct radeon_device *rdev) +{ + int i; + + for (i = 0; i < rdev->pm.dpm.num_ps; i++) { + kfree(rdev->pm.dpm.ps[i].ps_priv); + } + kfree(rdev->pm.dpm.ps); + kfree(rdev->pm.dpm.priv); + kfree(rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries); + r600_free_extended_power_table(rdev); +} + +int ci_dpm_init(struct radeon_device *rdev) +{ + int index = GetIndexIntoMasterTable(DATA, ASIC_InternalSS_Info); + u16 data_offset, size; + u8 frev, crev; + struct ci_power_info *pi; + int ret; + u32 mask; + + pi = kzalloc(sizeof(struct ci_power_info), GFP_KERNEL); + if (pi == NULL) + return -ENOMEM; + rdev->pm.dpm.priv = pi; + + ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask); + if (ret) + pi->sys_pcie_mask = 0; + else + pi->sys_pcie_mask = mask; + pi->force_pcie_gen = RADEON_PCIE_GEN_INVALID; + + pi->pcie_gen_performance.max = RADEON_PCIE_GEN1; + pi->pcie_gen_performance.min = RADEON_PCIE_GEN3; + pi->pcie_gen_powersaving.max = RADEON_PCIE_GEN1; + pi->pcie_gen_powersaving.min = RADEON_PCIE_GEN3; + + pi->pcie_lane_performance.max = 0; + pi->pcie_lane_performance.min = 16; + pi->pcie_lane_powersaving.max = 0; + pi->pcie_lane_powersaving.min = 16; + + ret = ci_get_vbios_boot_values(rdev, &pi->vbios_boot_state); + if (ret) { + ci_dpm_fini(rdev); + return ret; + } + ret = ci_parse_power_table(rdev); + if (ret) { + ci_dpm_fini(rdev); + return ret; + } + ret = r600_parse_extended_power_table(rdev); + if (ret) { + ci_dpm_fini(rdev); + return ret; + } + + pi->dll_default_on = false; + pi->sram_end = SMC_RAM_END; + + pi->activity_target[0] = CISLAND_TARGETACTIVITY_DFLT; + pi->activity_target[1] = CISLAND_TARGETACTIVITY_DFLT; + pi->activity_target[2] = CISLAND_TARGETACTIVITY_DFLT; + pi->activity_target[3] = CISLAND_TARGETACTIVITY_DFLT; + pi->activity_target[4] = CISLAND_TARGETACTIVITY_DFLT; + pi->activity_target[5] = CISLAND_TARGETACTIVITY_DFLT; + pi->activity_target[6] = CISLAND_TARGETACTIVITY_DFLT; + pi->activity_target[7] = CISLAND_TARGETACTIVITY_DFLT; + + pi->mclk_activity_target = CISLAND_MCLK_TARGETACTIVITY_DFLT; + + pi->sclk_dpm_key_disabled = 0; + pi->mclk_dpm_key_disabled = 0; + pi->pcie_dpm_key_disabled = 0; + + pi->caps_sclk_ds = true; + + pi->mclk_strobe_mode_threshold = 40000; + pi->mclk_stutter_mode_threshold = 40000; + pi->mclk_edc_enable_threshold = 40000; + pi->mclk_edc_wr_enable_threshold = 40000; + + ci_initialize_powertune_defaults(rdev); + + pi->caps_fps = false; + + pi->caps_sclk_throttle_low_notification = false; + + ci_get_leakage_voltages(rdev); + ci_patch_dependency_tables_with_leakage(rdev); + ci_set_private_data_variables_based_on_pptable(rdev); + + rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries = + kzalloc(4 * sizeof(struct radeon_clock_voltage_dependency_entry), GFP_KERNEL); + if (!rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries) { + ci_dpm_fini(rdev); + return -ENOMEM; + } + rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.count = 4; + rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[0].clk = 0; + rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[0].v = 0; + rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[1].clk = 36000; + rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[1].v = 720; + rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[2].clk = 54000; + rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[2].v = 810; + rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[3].clk = 72000; + rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[3].v = 900; + + rdev->pm.dpm.dyn_state.mclk_sclk_ratio = 4; + rdev->pm.dpm.dyn_state.sclk_mclk_delta = 15000; + rdev->pm.dpm.dyn_state.vddc_vddci_delta = 200; + + rdev->pm.dpm.dyn_state.valid_sclk_values.count = 0; + rdev->pm.dpm.dyn_state.valid_sclk_values.values = NULL; + rdev->pm.dpm.dyn_state.valid_mclk_values.count = 0; + rdev->pm.dpm.dyn_state.valid_mclk_values.values = NULL; + + pi->thermal_temp_setting.temperature_low = 99500; + pi->thermal_temp_setting.temperature_high = 100000; + pi->thermal_temp_setting.temperature_shutdown = 104000; + + pi->uvd_enabled = false; + + pi->voltage_control = CISLANDS_VOLTAGE_CONTROL_NONE; + pi->vddci_control = CISLANDS_VOLTAGE_CONTROL_NONE; + pi->mvdd_control = CISLANDS_VOLTAGE_CONTROL_NONE; + if (radeon_atom_is_voltage_gpio(rdev, VOLTAGE_TYPE_VDDC, VOLTAGE_OBJ_GPIO_LUT)) + pi->voltage_control = CISLANDS_VOLTAGE_CONTROL_BY_GPIO; + else if (radeon_atom_is_voltage_gpio(rdev, VOLTAGE_TYPE_VDDC, VOLTAGE_OBJ_SVID2)) + pi->voltage_control = CISLANDS_VOLTAGE_CONTROL_BY_SVID2; + + if (rdev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_VDDCI_CONTROL) { + if (radeon_atom_is_voltage_gpio(rdev, VOLTAGE_TYPE_VDDCI, VOLTAGE_OBJ_GPIO_LUT)) + pi->vddci_control = CISLANDS_VOLTAGE_CONTROL_BY_GPIO; + else if (radeon_atom_is_voltage_gpio(rdev, VOLTAGE_TYPE_VDDCI, VOLTAGE_OBJ_SVID2)) + pi->vddci_control = CISLANDS_VOLTAGE_CONTROL_BY_SVID2; + else + rdev->pm.dpm.platform_caps &= ~ATOM_PP_PLATFORM_CAP_VDDCI_CONTROL; + } + + if (rdev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_MVDDCONTROL) { + if (radeon_atom_is_voltage_gpio(rdev, VOLTAGE_TYPE_MVDDC, VOLTAGE_OBJ_GPIO_LUT)) + pi->mvdd_control = CISLANDS_VOLTAGE_CONTROL_BY_GPIO; + else if (radeon_atom_is_voltage_gpio(rdev, VOLTAGE_TYPE_MVDDC, VOLTAGE_OBJ_SVID2)) + pi->mvdd_control = CISLANDS_VOLTAGE_CONTROL_BY_SVID2; + else + rdev->pm.dpm.platform_caps &= ~ATOM_PP_PLATFORM_CAP_MVDDCONTROL; + } + + pi->vddc_phase_shed_control = true; + +#if defined(CONFIG_ACPI) + pi->pcie_performance_request = + radeon_acpi_is_pcie_performance_request_supported(rdev); +#else + pi->pcie_performance_request = false; +#endif + + if (atom_parse_data_header(rdev->mode_info.atom_context, index, &size, + &frev, &crev, &data_offset)) { + pi->caps_sclk_ss_support = true; + pi->caps_mclk_ss_support = true; + pi->dynamic_ss = true; + } else { + pi->caps_sclk_ss_support = false; + pi->caps_mclk_ss_support = false; + pi->dynamic_ss = true; + } + + if (rdev->pm.int_thermal_type != THERMAL_TYPE_NONE) + pi->thermal_protection = true; + else + pi->thermal_protection = false; + + pi->caps_dynamic_ac_timing = true; + + return 0; +} + +void ci_dpm_print_power_state(struct radeon_device *rdev, + struct radeon_ps *rps) +{ + struct ci_ps *ps = ci_get_ps(rps); + struct ci_pl *pl; + int i; + + r600_dpm_print_class_info(rps->class, rps->class2); + r600_dpm_print_cap_info(rps->caps); + printk("\tuvd vclk: %d dclk: %d\n", rps->vclk, rps->dclk); + for (i = 0; i < ps->performance_level_count; i++) { + pl = &ps->performance_levels[i]; + printk("\t\tpower level %d sclk: %u mclk: %u pcie gen: %u pcie lanes: %u\n", + i, pl->sclk, pl->mclk, pl->pcie_gen + 1, pl->pcie_lane); + } + r600_dpm_print_ps_status(rdev, rps); +} + +u32 ci_dpm_get_sclk(struct radeon_device *rdev, bool low) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct ci_ps *requested_state = ci_get_ps(&pi->requested_rps); + + if (low) + return requested_state->performance_levels[0].sclk; + else + return requested_state->performance_levels[requested_state->performance_level_count - 1].sclk; +} + +u32 ci_dpm_get_mclk(struct radeon_device *rdev, bool low) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct ci_ps *requested_state = ci_get_ps(&pi->requested_rps); + + if (low) + return requested_state->performance_levels[0].mclk; + else + return requested_state->performance_levels[requested_state->performance_level_count - 1].mclk; +} diff --git a/drivers/gpu/drm/radeon/ci_dpm.h b/drivers/gpu/drm/radeon/ci_dpm.h new file mode 100644 index 000000000000..de504b5ac33f --- /dev/null +++ b/drivers/gpu/drm/radeon/ci_dpm.h @@ -0,0 +1,331 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __CI_DPM_H__ +#define __CI_DPM_H__ + +#include "ppsmc.h" + +#define SMU__NUM_SCLK_DPM_STATE 8 +#define SMU__NUM_MCLK_DPM_LEVELS 6 +#define SMU__NUM_LCLK_DPM_LEVELS 8 +#define SMU__NUM_PCIE_DPM_LEVELS 8 +#include "smu7_discrete.h" + +#define CISLANDS_MAX_HARDWARE_POWERLEVELS 2 + +struct ci_pl { + u32 mclk; + u32 sclk; + enum radeon_pcie_gen pcie_gen; + u16 pcie_lane; +}; + +struct ci_ps { + u16 performance_level_count; + bool dc_compatible; + u32 sclk_t; + struct ci_pl performance_levels[CISLANDS_MAX_HARDWARE_POWERLEVELS]; +}; + +struct ci_dpm_level { + bool enabled; + u32 value; + u32 param1; +}; + +#define CISLAND_MAX_DEEPSLEEP_DIVIDER_ID 5 +#define MAX_REGULAR_DPM_NUMBER 8 +#define CISLAND_MINIMUM_ENGINE_CLOCK 800 + +struct ci_single_dpm_table { + u32 count; + struct ci_dpm_level dpm_levels[MAX_REGULAR_DPM_NUMBER]; +}; + +struct ci_dpm_table { + struct ci_single_dpm_table sclk_table; + struct ci_single_dpm_table mclk_table; + struct ci_single_dpm_table pcie_speed_table; + struct ci_single_dpm_table vddc_table; + struct ci_single_dpm_table vddci_table; + struct ci_single_dpm_table mvdd_table; +}; + +struct ci_mc_reg_entry { + u32 mclk_max; + u32 mc_data[SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE]; +}; + +struct ci_mc_reg_table { + u8 last; + u8 num_entries; + u16 valid_flag; + struct ci_mc_reg_entry mc_reg_table_entry[MAX_AC_TIMING_ENTRIES]; + SMU7_Discrete_MCRegisterAddress mc_reg_address[SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE]; +}; + +struct ci_ulv_parm +{ + bool supported; + u32 cg_ulv_parameter; + u32 volt_change_delay; + struct ci_pl pl; +}; + +#define CISLANDS_MAX_LEAKAGE_COUNT 8 + +struct ci_leakage_voltage { + u16 count; + u16 leakage_id[CISLANDS_MAX_LEAKAGE_COUNT]; + u16 actual_voltage[CISLANDS_MAX_LEAKAGE_COUNT]; +}; + +struct ci_dpm_level_enable_mask { + u32 uvd_dpm_enable_mask; + u32 vce_dpm_enable_mask; + u32 acp_dpm_enable_mask; + u32 samu_dpm_enable_mask; + u32 sclk_dpm_enable_mask; + u32 mclk_dpm_enable_mask; + u32 pcie_dpm_enable_mask; +}; + +struct ci_vbios_boot_state +{ + u16 mvdd_bootup_value; + u16 vddc_bootup_value; + u16 vddci_bootup_value; + u32 sclk_bootup_value; + u32 mclk_bootup_value; + u16 pcie_gen_bootup_value; + u16 pcie_lane_bootup_value; +}; + +struct ci_clock_registers { + u32 cg_spll_func_cntl; + u32 cg_spll_func_cntl_2; + u32 cg_spll_func_cntl_3; + u32 cg_spll_func_cntl_4; + u32 cg_spll_spread_spectrum; + u32 cg_spll_spread_spectrum_2; + u32 dll_cntl; + u32 mclk_pwrmgt_cntl; + u32 mpll_ad_func_cntl; + u32 mpll_dq_func_cntl; + u32 mpll_func_cntl; + u32 mpll_func_cntl_1; + u32 mpll_func_cntl_2; + u32 mpll_ss1; + u32 mpll_ss2; +}; + +struct ci_thermal_temperature_setting { + s32 temperature_low; + s32 temperature_high; + s32 temperature_shutdown; +}; + +struct ci_pcie_perf_range { + u16 max; + u16 min; +}; + +enum ci_pt_config_reg_type { + CISLANDS_CONFIGREG_MMR = 0, + CISLANDS_CONFIGREG_SMC_IND, + CISLANDS_CONFIGREG_DIDT_IND, + CISLANDS_CONFIGREG_CACHE, + CISLANDS_CONFIGREG_MAX +}; + +#define POWERCONTAINMENT_FEATURE_BAPM 0x00000001 +#define POWERCONTAINMENT_FEATURE_TDCLimit 0x00000002 +#define POWERCONTAINMENT_FEATURE_PkgPwrLimit 0x00000004 + +struct ci_pt_config_reg { + u32 offset; + u32 mask; + u32 shift; + u32 value; + enum ci_pt_config_reg_type type; +}; + +struct ci_pt_defaults { + u8 svi_load_line_en; + u8 svi_load_line_vddc; + u8 tdc_vddc_throttle_release_limit_perc; + u8 tdc_mawt; + u8 tdc_waterfall_ctl; + u8 dte_ambient_temp_base; + u32 display_cac; + u32 bapm_temp_gradient; + u16 bapmti_r[SMU7_DTE_ITERATIONS * SMU7_DTE_SOURCES * SMU7_DTE_SINKS]; + u16 bapmti_rc[SMU7_DTE_ITERATIONS * SMU7_DTE_SOURCES * SMU7_DTE_SINKS]; +}; + +#define DPMTABLE_OD_UPDATE_SCLK 0x00000001 +#define DPMTABLE_OD_UPDATE_MCLK 0x00000002 +#define DPMTABLE_UPDATE_SCLK 0x00000004 +#define DPMTABLE_UPDATE_MCLK 0x00000008 + +struct ci_power_info { + struct ci_dpm_table dpm_table; + u32 voltage_control; + u32 mvdd_control; + u32 vddci_control; + u32 active_auto_throttle_sources; + struct ci_clock_registers clock_registers; + u16 acpi_vddc; + u16 acpi_vddci; + enum radeon_pcie_gen force_pcie_gen; + enum radeon_pcie_gen acpi_pcie_gen; + struct ci_leakage_voltage vddc_leakage; + struct ci_leakage_voltage vddci_leakage; + u16 max_vddc_in_pp_table; + u16 min_vddc_in_pp_table; + u16 max_vddci_in_pp_table; + u16 min_vddci_in_pp_table; + u32 mclk_strobe_mode_threshold; + u32 mclk_stutter_mode_threshold; + u32 mclk_edc_enable_threshold; + u32 mclk_edc_wr_enable_threshold; + struct ci_vbios_boot_state vbios_boot_state; + /* smc offsets */ + u32 sram_end; + u32 dpm_table_start; + u32 soft_regs_start; + u32 mc_reg_table_start; + u32 fan_table_start; + u32 arb_table_start; + /* smc tables */ + SMU7_Discrete_DpmTable smc_state_table; + SMU7_Discrete_MCRegisters smc_mc_reg_table; + SMU7_Discrete_PmFuses smc_powertune_table; + /* other stuff */ + struct ci_mc_reg_table mc_reg_table; + struct atom_voltage_table vddc_voltage_table; + struct atom_voltage_table vddci_voltage_table; + struct atom_voltage_table mvdd_voltage_table; + struct ci_ulv_parm ulv; + u32 power_containment_features; + const struct ci_pt_defaults *powertune_defaults; + u32 dte_tj_offset; + bool vddc_phase_shed_control; + struct ci_thermal_temperature_setting thermal_temp_setting; + struct ci_dpm_level_enable_mask dpm_level_enable_mask; + u32 need_update_smu7_dpm_table; + u32 sclk_dpm_key_disabled; + u32 mclk_dpm_key_disabled; + u32 pcie_dpm_key_disabled; + struct ci_pcie_perf_range pcie_gen_performance; + struct ci_pcie_perf_range pcie_lane_performance; + struct ci_pcie_perf_range pcie_gen_powersaving; + struct ci_pcie_perf_range pcie_lane_powersaving; + u32 activity_target[SMU7_MAX_LEVELS_GRAPHICS]; + u32 mclk_activity_target; + u32 low_sclk_interrupt_t; + u32 last_mclk_dpm_enable_mask; + u32 sys_pcie_mask; + /* caps */ + bool caps_power_containment; + bool caps_cac; + bool caps_sq_ramping; + bool caps_db_ramping; + bool caps_td_ramping; + bool caps_tcp_ramping; + bool caps_fps; + bool caps_sclk_ds; + bool caps_sclk_ss_support; + bool caps_mclk_ss_support; + bool caps_uvd_dpm; + bool caps_vce_dpm; + bool caps_samu_dpm; + bool caps_acp_dpm; + bool caps_automatic_dc_transition; + bool caps_sclk_throttle_low_notification; + bool caps_dynamic_ac_timing; + /* flags */ + bool thermal_protection; + bool pcie_performance_request; + bool dynamic_ss; + bool dll_default_on; + bool cac_enabled; + bool uvd_enabled; + bool battery_state; + bool pspp_notify_required; + bool mem_gddr5; + bool enable_bapm_feature; + bool enable_tdc_limit_feature; + bool enable_pkg_pwr_tracking_feature; + bool use_pcie_performance_levels; + bool use_pcie_powersaving_levels; + /* driver states */ + struct radeon_ps current_rps; + struct ci_ps current_ps; + struct radeon_ps requested_rps; + struct ci_ps requested_ps; +}; + +#define CISLANDS_VOLTAGE_CONTROL_NONE 0x0 +#define CISLANDS_VOLTAGE_CONTROL_BY_GPIO 0x1 +#define CISLANDS_VOLTAGE_CONTROL_BY_SVID2 0x2 + +#define CISLANDS_Q88_FORMAT_CONVERSION_UNIT 256 + +#define CISLANDS_VRC_DFLT0 0x3FFFC000 +#define CISLANDS_VRC_DFLT1 0x000400 +#define CISLANDS_VRC_DFLT2 0xC00080 +#define CISLANDS_VRC_DFLT3 0xC00200 +#define CISLANDS_VRC_DFLT4 0xC01680 +#define CISLANDS_VRC_DFLT5 0xC00033 +#define CISLANDS_VRC_DFLT6 0xC00033 +#define CISLANDS_VRC_DFLT7 0x3FFFC000 + +#define CISLANDS_CGULVPARAMETER_DFLT 0x00040035 +#define CISLAND_TARGETACTIVITY_DFLT 30 +#define CISLAND_MCLK_TARGETACTIVITY_DFLT 10 + +#define PCIE_PERF_REQ_REMOVE_REGISTRY 0 +#define PCIE_PERF_REQ_FORCE_LOWPOWER 1 +#define PCIE_PERF_REQ_PECI_GEN1 2 +#define PCIE_PERF_REQ_PECI_GEN2 3 +#define PCIE_PERF_REQ_PECI_GEN3 4 + +int ci_copy_bytes_to_smc(struct radeon_device *rdev, + u32 smc_start_address, + const u8 *src, u32 byte_count, u32 limit); +void ci_start_smc(struct radeon_device *rdev); +void ci_reset_smc(struct radeon_device *rdev); +int ci_program_jump_on_start(struct radeon_device *rdev); +void ci_stop_smc_clock(struct radeon_device *rdev); +void ci_start_smc_clock(struct radeon_device *rdev); +bool ci_is_smc_running(struct radeon_device *rdev); +PPSMC_Result ci_send_msg_to_smc(struct radeon_device *rdev, PPSMC_Msg msg); +PPSMC_Result ci_wait_for_smc_inactive(struct radeon_device *rdev); +int ci_load_smc_ucode(struct radeon_device *rdev, u32 limit); +int ci_read_smc_sram_dword(struct radeon_device *rdev, + u32 smc_address, u32 *value, u32 limit); +int ci_write_smc_sram_dword(struct radeon_device *rdev, + u32 smc_address, u32 value, u32 limit); + +#endif diff --git a/drivers/gpu/drm/radeon/ci_smc.c b/drivers/gpu/drm/radeon/ci_smc.c new file mode 100644 index 000000000000..53b43dd3cf1e --- /dev/null +++ b/drivers/gpu/drm/radeon/ci_smc.c @@ -0,0 +1,262 @@ +/* + * Copyright 2011 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Alex Deucher + */ + +#include +#include "drmP.h" +#include "radeon.h" +#include "cikd.h" +#include "ppsmc.h" +#include "radeon_ucode.h" + +static int ci_set_smc_sram_address(struct radeon_device *rdev, + u32 smc_address, u32 limit) +{ + if (smc_address & 3) + return -EINVAL; + if ((smc_address + 3) > limit) + return -EINVAL; + + WREG32(SMC_IND_INDEX_0, smc_address); + WREG32_P(SMC_IND_ACCESS_CNTL, 0, ~AUTO_INCREMENT_IND_0); + + return 0; +} + +int ci_copy_bytes_to_smc(struct radeon_device *rdev, + u32 smc_start_address, + const u8 *src, u32 byte_count, u32 limit) +{ + u32 data, original_data; + u32 addr; + u32 extra_shift; + int ret; + + if (smc_start_address & 3) + return -EINVAL; + if ((smc_start_address + byte_count) > limit) + return -EINVAL; + + addr = smc_start_address; + + while (byte_count >= 4) { + /* SMC address space is BE */ + data = (src[0] << 24) | (src[1] << 16) | (src[2] << 8) | src[3]; + + ret = ci_set_smc_sram_address(rdev, addr, limit); + if (ret) + return ret; + + WREG32(SMC_IND_DATA_0, data); + + src += 4; + byte_count -= 4; + addr += 4; + } + + /* RMW for the final bytes */ + if (byte_count > 0) { + data = 0; + + ret = ci_set_smc_sram_address(rdev, addr, limit); + if (ret) + return ret; + + original_data = RREG32(SMC_IND_DATA_0); + + extra_shift = 8 * (4 - byte_count); + + while (byte_count > 0) { + data = (data << 8) + *src++; + byte_count--; + } + + data <<= extra_shift; + + data |= (original_data & ~((~0UL) << extra_shift)); + + ret = ci_set_smc_sram_address(rdev, addr, limit); + if (ret) + return ret; + + WREG32(SMC_IND_DATA_0, data); + } + return 0; +} + +void ci_start_smc(struct radeon_device *rdev) +{ + u32 tmp = RREG32_SMC(SMC_SYSCON_RESET_CNTL); + + tmp &= ~RST_REG; + WREG32_SMC(SMC_SYSCON_RESET_CNTL, tmp); +} + +void ci_reset_smc(struct radeon_device *rdev) +{ + u32 tmp = RREG32_SMC(SMC_SYSCON_RESET_CNTL); + + tmp |= RST_REG; + WREG32_SMC(SMC_SYSCON_RESET_CNTL, tmp); +} + +int ci_program_jump_on_start(struct radeon_device *rdev) +{ + static u8 data[] = { 0xE0, 0x00, 0x80, 0x40 }; + + return ci_copy_bytes_to_smc(rdev, 0x0, data, 4, sizeof(data)+1); +} + +void ci_stop_smc_clock(struct radeon_device *rdev) +{ + u32 tmp = RREG32_SMC(SMC_SYSCON_CLOCK_CNTL_0); + + tmp |= CK_DISABLE; + + WREG32_SMC(SMC_SYSCON_CLOCK_CNTL_0, tmp); +} + +void ci_start_smc_clock(struct radeon_device *rdev) +{ + u32 tmp = RREG32_SMC(SMC_SYSCON_CLOCK_CNTL_0); + + tmp &= ~CK_DISABLE; + + WREG32_SMC(SMC_SYSCON_CLOCK_CNTL_0, tmp); +} + +bool ci_is_smc_running(struct radeon_device *rdev) +{ + u32 clk = RREG32_SMC(SMC_SYSCON_CLOCK_CNTL_0); + u32 pc_c = RREG32_SMC(SMC_PC_C); + + if (!(clk & CK_DISABLE) && (0x20100 <= pc_c)) + return true; + + return false; +} + +PPSMC_Result ci_send_msg_to_smc(struct radeon_device *rdev, PPSMC_Msg msg) +{ + u32 tmp; + int i; + + if (!ci_is_smc_running(rdev)) + return PPSMC_Result_Failed; + + WREG32(SMC_MESSAGE_0, msg); + + for (i = 0; i < rdev->usec_timeout; i++) { + tmp = RREG32(SMC_RESP_0); + if (tmp != 0) + break; + udelay(1); + } + tmp = RREG32(SMC_RESP_0); + + return (PPSMC_Result)tmp; +} + +PPSMC_Result ci_wait_for_smc_inactive(struct radeon_device *rdev) +{ + u32 tmp; + int i; + + if (!ci_is_smc_running(rdev)) + return PPSMC_Result_OK; + + for (i = 0; i < rdev->usec_timeout; i++) { + tmp = RREG32_SMC(SMC_SYSCON_CLOCK_CNTL_0); + if ((tmp & CKEN) == 0) + break; + udelay(1); + } + + return PPSMC_Result_OK; +} + +int ci_load_smc_ucode(struct radeon_device *rdev, u32 limit) +{ + u32 ucode_start_address; + u32 ucode_size; + const u8 *src; + u32 data; + + if (!rdev->smc_fw) + return -EINVAL; + + switch (rdev->family) { + case CHIP_BONAIRE: + ucode_start_address = BONAIRE_SMC_UCODE_START; + ucode_size = BONAIRE_SMC_UCODE_SIZE; + break; + default: + DRM_ERROR("unknown asic in smc ucode loader\n"); + BUG(); + } + + if (ucode_size & 3) + return -EINVAL; + + src = (const u8 *)rdev->smc_fw->data; + WREG32(SMC_IND_INDEX_0, ucode_start_address); + WREG32_P(SMC_IND_ACCESS_CNTL, AUTO_INCREMENT_IND_0, ~AUTO_INCREMENT_IND_0); + while (ucode_size >= 4) { + /* SMC address space is BE */ + data = (src[0] << 24) | (src[1] << 16) | (src[2] << 8) | src[3]; + + WREG32(SMC_IND_DATA_0, data); + + src += 4; + ucode_size -= 4; + } + WREG32_P(SMC_IND_ACCESS_CNTL, 0, ~AUTO_INCREMENT_IND_0); + + return 0; +} + +int ci_read_smc_sram_dword(struct radeon_device *rdev, + u32 smc_address, u32 *value, u32 limit) +{ + int ret; + + ret = ci_set_smc_sram_address(rdev, smc_address, limit); + if (ret) + return ret; + + *value = RREG32(SMC_IND_DATA_0); + return 0; +} + +int ci_write_smc_sram_dword(struct radeon_device *rdev, + u32 smc_address, u32 value, u32 limit) +{ + int ret; + + ret = ci_set_smc_sram_address(rdev, smc_address, limit); + if (ret) + return ret; + + WREG32(SMC_IND_DATA_0, value); + return 0; +} diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 87e5aeed6e88..736a416b51a7 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -40,6 +40,7 @@ MODULE_FIRMWARE("radeon/BONAIRE_mec.bin"); MODULE_FIRMWARE("radeon/BONAIRE_mc.bin"); MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin"); MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin"); +MODULE_FIRMWARE("radeon/BONAIRE_smc.bin"); MODULE_FIRMWARE("radeon/KAVERI_pfp.bin"); MODULE_FIRMWARE("radeon/KAVERI_me.bin"); MODULE_FIRMWARE("radeon/KAVERI_ce.bin"); @@ -1545,7 +1546,7 @@ static int cik_init_microcode(struct radeon_device *rdev) const char *chip_name; size_t pfp_req_size, me_req_size, ce_req_size, mec_req_size, rlc_req_size, mc_req_size, - sdma_req_size; + sdma_req_size, smc_req_size; char fw_name[30]; int err; @@ -1561,6 +1562,7 @@ static int cik_init_microcode(struct radeon_device *rdev) rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4; mc_req_size = CIK_MC_UCODE_SIZE * 4; sdma_req_size = CIK_SDMA_UCODE_SIZE * 4; + smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4); break; case CHIP_KAVERI: chip_name = "KAVERI"; @@ -1652,7 +1654,7 @@ static int cik_init_microcode(struct radeon_device *rdev) err = -EINVAL; } - /* No MC ucode on APUs */ + /* No SMC, MC ucode on APUs */ if (!(rdev->flags & RADEON_IS_IGP)) { snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name); err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev); @@ -1664,6 +1666,21 @@ static int cik_init_microcode(struct radeon_device *rdev) rdev->mc_fw->size, fw_name); err = -EINVAL; } + + snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name); + err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev); + if (err) { + printk(KERN_ERR + "smc: error loading firmware \"%s\"\n", + fw_name); + release_firmware(rdev->smc_fw); + rdev->smc_fw = NULL; + } else if (rdev->smc_fw->size != smc_req_size) { + printk(KERN_ERR + "cik_smc: Bogus length %zu in firmware \"%s\"\n", + rdev->smc_fw->size, fw_name); + err = -EINVAL; + } } out: @@ -1682,6 +1699,8 @@ out: rdev->rlc_fw = NULL; release_firmware(rdev->mc_fw); rdev->mc_fw = NULL; + release_firmware(rdev->smc_fw); + rdev->smc_fw = NULL; } return err; } @@ -6626,8 +6645,12 @@ int cik_irq_set(struct radeon_device *rdev) cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; - thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) & - ~(THERM_INTH_MASK | THERM_INTL_MASK); + if (rdev->flags & RADEON_IS_IGP) + thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) & + ~(THERM_INTH_MASK | THERM_INTL_MASK); + else + thermal_int = RREG32_SMC(CG_THERMAL_INT) & + ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW); /* enable CP interrupts on all rings */ if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { @@ -6788,7 +6811,10 @@ int cik_irq_set(struct radeon_device *rdev) if (rdev->irq.dpm_thermal) { DRM_DEBUG("dpm thermal\n"); - thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK; + if (rdev->flags & RADEON_IS_IGP) + thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK; + else + thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW; } WREG32(CP_INT_CNTL_RING0, cp_int_cntl); @@ -6825,7 +6851,10 @@ int cik_irq_set(struct radeon_device *rdev) WREG32(DC_HPD5_INT_CONTROL, hpd5); WREG32(DC_HPD6_INT_CONTROL, hpd6); - WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int); + if (rdev->flags & RADEON_IS_IGP) + WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int); + else + WREG32_SMC(CG_THERMAL_INT, thermal_int); return 0; } diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h index 179ca3625ae4..861fb3ec161c 100644 --- a/drivers/gpu/drm/radeon/cikd.h +++ b/drivers/gpu/drm/radeon/cikd.h @@ -36,6 +36,23 @@ #define DIDT_TCP_CTRL0 0x60 /* SMC IND registers */ +#define DPM_TABLE_475 0x3F768 +# define SamuBootLevel(x) ((x) << 0) +# define SamuBootLevel_MASK 0x000000ff +# define SamuBootLevel_SHIFT 0 +# define AcpBootLevel(x) ((x) << 8) +# define AcpBootLevel_MASK 0x0000ff00 +# define AcpBootLevel_SHIFT 8 +# define VceBootLevel(x) ((x) << 16) +# define VceBootLevel_MASK 0x00ff0000 +# define VceBootLevel_SHIFT 16 +# define UvdBootLevel(x) ((x) << 24) +# define UvdBootLevel_MASK 0xff000000 +# define UvdBootLevel_SHIFT 24 + +#define FIRMWARE_FLAGS 0x3F800 +# define INTERRUPTS_ENABLED (1 << 0) + #define NB_DPM_CONFIG_1 0x3F9E8 # define Dpm0PgNbPsLo(x) ((x) << 0) # define Dpm0PgNbPsLo_MASK 0x000000ff @@ -50,25 +67,85 @@ # define DpmXNbPsHi_MASK 0xff000000 # define DpmXNbPsHi_SHIFT 24 +#define SMC_SYSCON_RESET_CNTL 0x80000000 +# define RST_REG (1 << 0) +#define SMC_SYSCON_CLOCK_CNTL_0 0x80000004 +# define CK_DISABLE (1 << 0) +# define CKEN (1 << 24) + +#define SMC_SYSCON_MISC_CNTL 0x80000010 + #define SMC_SYSCON_MSG_ARG_0 0x80000068 +#define SMC_PC_C 0x80000370 + +#define SMC_SCRATCH9 0x80000424 + +#define RCU_UC_EVENTS 0xC0000004 +# define BOOT_SEQ_DONE (1 << 7) + #define GENERAL_PWRMGT 0xC0200000 # define GLOBAL_PWRMGT_EN (1 << 0) +# define STATIC_PM_EN (1 << 1) +# define THERMAL_PROTECTION_DIS (1 << 2) +# define THERMAL_PROTECTION_TYPE (1 << 3) +# define SW_SMIO_INDEX(x) ((x) << 6) +# define SW_SMIO_INDEX_MASK (1 << 6) +# define SW_SMIO_INDEX_SHIFT 6 +# define VOLT_PWRMGT_EN (1 << 10) # define GPU_COUNTER_CLK (1 << 15) +# define DYN_SPREAD_SPECTRUM_EN (1 << 23) + +#define CNB_PWRMGT_CNTL 0xC0200004 +# define GNB_SLOW_MODE(x) ((x) << 0) +# define GNB_SLOW_MODE_MASK (3 << 0) +# define GNB_SLOW_MODE_SHIFT 0 +# define GNB_SLOW (1 << 2) +# define FORCE_NB_PS1 (1 << 3) +# define DPM_ENABLED (1 << 4) #define SCLK_PWRMGT_CNTL 0xC0200008 +# define SCLK_PWRMGT_OFF (1 << 0) # define RESET_BUSY_CNT (1 << 4) # define RESET_SCLK_CNT (1 << 5) # define DYNAMIC_PM_EN (1 << 21) +#define CG_SSP 0xC0200044 +# define SST(x) ((x) << 0) +# define SST_MASK (0xffff << 0) +# define SSTU(x) ((x) << 16) +# define SSTU_MASK (0xf << 16) + +#define CG_DISPLAY_GAP_CNTL 0xC0200060 +# define DISP_GAP(x) ((x) << 0) +# define DISP_GAP_MASK (3 << 0) +# define VBI_TIMER_COUNT(x) ((x) << 4) +# define VBI_TIMER_COUNT_MASK (0x3fff << 4) +# define VBI_TIMER_UNIT(x) ((x) << 20) +# define VBI_TIMER_UNIT_MASK (7 << 20) +# define DISP_GAP_MCHG(x) ((x) << 24) +# define DISP_GAP_MCHG_MASK (3 << 24) + +#define CG_ULV_PARAMETER 0xC0200158 + #define CG_FTV_0 0xC02001A8 +#define CG_FTV_1 0xC02001AC +#define CG_FTV_2 0xC02001B0 +#define CG_FTV_3 0xC02001B4 +#define CG_FTV_4 0xC02001B8 +#define CG_FTV_5 0xC02001BC +#define CG_FTV_6 0xC02001C0 +#define CG_FTV_7 0xC02001C4 + +#define CG_DISPLAY_GAP_CNTL2 0xC0200230 #define LCAC_SX0_OVR_SEL 0xC0400D04 #define LCAC_SX0_OVR_VAL 0xC0400D08 +#define LCAC_MC0_CNTL 0xC0400D30 #define LCAC_MC0_OVR_SEL 0xC0400D34 #define LCAC_MC0_OVR_VAL 0xC0400D38 - +#define LCAC_MC1_CNTL 0xC0400D3C #define LCAC_MC1_OVR_SEL 0xC0400D40 #define LCAC_MC1_OVR_VAL 0xC0400D44 @@ -78,9 +155,28 @@ #define LCAC_MC3_OVR_SEL 0xC0400D58 #define LCAC_MC3_OVR_VAL 0xC0400D5C +#define LCAC_CPL_CNTL 0xC0400D80 #define LCAC_CPL_OVR_SEL 0xC0400D84 #define LCAC_CPL_OVR_VAL 0xC0400D88 +/* dGPU */ +#define CG_THERMAL_CTRL 0xC0300004 +#define DPM_EVENT_SRC(x) ((x) << 0) +#define DPM_EVENT_SRC_MASK (7 << 0) +#define DIG_THERM_DPM(x) ((x) << 14) +#define DIG_THERM_DPM_MASK 0x003FC000 +#define DIG_THERM_DPM_SHIFT 14 + +#define CG_THERMAL_INT 0xC030000C +#define CI_DIG_THERM_INTH(x) ((x) << 8) +#define CI_DIG_THERM_INTH_MASK 0x0000FF00 +#define CI_DIG_THERM_INTH_SHIFT 8 +#define CI_DIG_THERM_INTL(x) ((x) << 16) +#define CI_DIG_THERM_INTL_MASK 0x00FF0000 +#define CI_DIG_THERM_INTL_SHIFT 16 +#define THERM_INT_MASK_HIGH (1 << 24) +#define THERM_INT_MASK_LOW (1 << 25) + #define CG_MULT_THERMAL_STATUS 0xC0300014 #define ASIC_MAX_TEMP(x) ((x) << 0) #define ASIC_MAX_TEMP_MASK 0x000001ff @@ -89,6 +185,35 @@ #define CTF_TEMP_MASK 0x0003fe00 #define CTF_TEMP_SHIFT 9 +#define CG_SPLL_FUNC_CNTL 0xC0500140 +#define SPLL_RESET (1 << 0) +#define SPLL_PWRON (1 << 1) +#define SPLL_BYPASS_EN (1 << 3) +#define SPLL_REF_DIV(x) ((x) << 5) +#define SPLL_REF_DIV_MASK (0x3f << 5) +#define SPLL_PDIV_A(x) ((x) << 20) +#define SPLL_PDIV_A_MASK (0x7f << 20) +#define SPLL_PDIV_A_SHIFT 20 +#define CG_SPLL_FUNC_CNTL_2 0xC0500144 +#define SCLK_MUX_SEL(x) ((x) << 0) +#define SCLK_MUX_SEL_MASK (0x1ff << 0) +#define CG_SPLL_FUNC_CNTL_3 0xC0500148 +#define SPLL_FB_DIV(x) ((x) << 0) +#define SPLL_FB_DIV_MASK (0x3ffffff << 0) +#define SPLL_FB_DIV_SHIFT 0 +#define SPLL_DITHEN (1 << 28) +#define CG_SPLL_FUNC_CNTL_4 0xC050014C + +#define CG_SPLL_SPREAD_SPECTRUM 0xC0500164 +#define SSEN (1 << 0) +#define CLK_S(x) ((x) << 4) +#define CLK_S_MASK (0xfff << 4) +#define CLK_S_SHIFT 4 +#define CG_SPLL_SPREAD_SPECTRUM_2 0xC0500168 +#define CLK_V(x) ((x) << 0) +#define CLK_V_MASK (0x3ffffff << 0) +#define CLK_V_SHIFT 0 + #define MPLL_BYPASSCLK_SEL 0xC050019C # define MPLL_CLKOUT_SEL(x) ((x) << 8) # define MPLL_CLKOUT_SEL_MASK 0xFF00 @@ -109,6 +234,7 @@ # define ZCLK_SEL(x) ((x) << 8) # define ZCLK_SEL_MASK 0xFF00 +/* KV/KB */ #define CG_THERMAL_INT_CTRL 0xC2100028 #define DIG_THERM_INTH(x) ((x) << 0) #define DIG_THERM_INTH_MASK 0x000000FF @@ -437,9 +563,37 @@ #define NOOFGROUPS_SHIFT 12 #define NOOFGROUPS_MASK 0x00001000 +#define MC_ARB_DRAM_TIMING 0x2774 +#define MC_ARB_DRAM_TIMING2 0x2778 + +#define MC_ARB_BURST_TIME 0x2808 +#define STATE0(x) ((x) << 0) +#define STATE0_MASK (0x1f << 0) +#define STATE0_SHIFT 0 +#define STATE1(x) ((x) << 5) +#define STATE1_MASK (0x1f << 5) +#define STATE1_SHIFT 5 +#define STATE2(x) ((x) << 10) +#define STATE2_MASK (0x1f << 10) +#define STATE2_SHIFT 10 +#define STATE3(x) ((x) << 15) +#define STATE3_MASK (0x1f << 15) +#define STATE3_SHIFT 15 + +#define MC_SEQ_RAS_TIMING 0x28a0 +#define MC_SEQ_CAS_TIMING 0x28a4 +#define MC_SEQ_MISC_TIMING 0x28a8 +#define MC_SEQ_MISC_TIMING2 0x28ac +#define MC_SEQ_PMG_TIMING 0x28b0 +#define MC_SEQ_RD_CTL_D0 0x28b4 +#define MC_SEQ_RD_CTL_D1 0x28b8 +#define MC_SEQ_WR_CTL_D0 0x28bc +#define MC_SEQ_WR_CTL_D1 0x28c0 + #define MC_SEQ_SUP_CNTL 0x28c8 #define RUN_MASK (1 << 0) #define MC_SEQ_SUP_PGM 0x28cc +#define MC_PMG_AUTO_CMD 0x28d0 #define MC_SEQ_TRAIN_WAKEUP_CNTL 0x28e8 #define TRAIN_DONE_D0 (1 << 30) @@ -448,9 +602,90 @@ #define MC_IO_PAD_CNTL_D0 0x29d0 #define MEM_FALL_OUT_CMD (1 << 8) +#define MC_SEQ_MISC0 0x2a00 +#define MC_SEQ_MISC0_VEN_ID_SHIFT 8 +#define MC_SEQ_MISC0_VEN_ID_MASK 0x00000f00 +#define MC_SEQ_MISC0_VEN_ID_VALUE 3 +#define MC_SEQ_MISC0_REV_ID_SHIFT 12 +#define MC_SEQ_MISC0_REV_ID_MASK 0x0000f000 +#define MC_SEQ_MISC0_REV_ID_VALUE 1 +#define MC_SEQ_MISC0_GDDR5_SHIFT 28 +#define MC_SEQ_MISC0_GDDR5_MASK 0xf0000000 +#define MC_SEQ_MISC0_GDDR5_VALUE 5 +#define MC_SEQ_MISC1 0x2a04 +#define MC_SEQ_RESERVE_M 0x2a08 +#define MC_PMG_CMD_EMRS 0x2a0c + #define MC_SEQ_IO_DEBUG_INDEX 0x2a44 #define MC_SEQ_IO_DEBUG_DATA 0x2a48 +#define MC_SEQ_MISC5 0x2a54 +#define MC_SEQ_MISC6 0x2a58 + +#define MC_SEQ_MISC7 0x2a64 + +#define MC_SEQ_RAS_TIMING_LP 0x2a6c +#define MC_SEQ_CAS_TIMING_LP 0x2a70 +#define MC_SEQ_MISC_TIMING_LP 0x2a74 +#define MC_SEQ_MISC_TIMING2_LP 0x2a78 +#define MC_SEQ_WR_CTL_D0_LP 0x2a7c +#define MC_SEQ_WR_CTL_D1_LP 0x2a80 +#define MC_SEQ_PMG_CMD_EMRS_LP 0x2a84 +#define MC_SEQ_PMG_CMD_MRS_LP 0x2a88 + +#define MC_PMG_CMD_MRS 0x2aac + +#define MC_SEQ_RD_CTL_D0_LP 0x2b1c +#define MC_SEQ_RD_CTL_D1_LP 0x2b20 + +#define MC_PMG_CMD_MRS1 0x2b44 +#define MC_SEQ_PMG_CMD_MRS1_LP 0x2b48 +#define MC_SEQ_PMG_TIMING_LP 0x2b4c + +#define MC_SEQ_WR_CTL_2 0x2b54 +#define MC_SEQ_WR_CTL_2_LP 0x2b58 +#define MC_PMG_CMD_MRS2 0x2b5c +#define MC_SEQ_PMG_CMD_MRS2_LP 0x2b60 + +#define MCLK_PWRMGT_CNTL 0x2ba0 +# define DLL_SPEED(x) ((x) << 0) +# define DLL_SPEED_MASK (0x1f << 0) +# define DLL_READY (1 << 6) +# define MC_INT_CNTL (1 << 7) +# define MRDCK0_PDNB (1 << 8) +# define MRDCK1_PDNB (1 << 9) +# define MRDCK0_RESET (1 << 16) +# define MRDCK1_RESET (1 << 17) +# define DLL_READY_READ (1 << 24) +#define DLL_CNTL 0x2ba4 +# define MRDCK0_BYPASS (1 << 24) +# define MRDCK1_BYPASS (1 << 25) + +#define MPLL_FUNC_CNTL 0x2bb4 +#define BWCTRL(x) ((x) << 20) +#define BWCTRL_MASK (0xff << 20) +#define MPLL_FUNC_CNTL_1 0x2bb8 +#define VCO_MODE(x) ((x) << 0) +#define VCO_MODE_MASK (3 << 0) +#define CLKFRAC(x) ((x) << 4) +#define CLKFRAC_MASK (0xfff << 4) +#define CLKF(x) ((x) << 16) +#define CLKF_MASK (0xfff << 16) +#define MPLL_FUNC_CNTL_2 0x2bbc +#define MPLL_AD_FUNC_CNTL 0x2bc0 +#define YCLK_POST_DIV(x) ((x) << 0) +#define YCLK_POST_DIV_MASK (7 << 0) +#define MPLL_DQ_FUNC_CNTL 0x2bc4 +#define YCLK_SEL(x) ((x) << 4) +#define YCLK_SEL_MASK (1 << 4) + +#define MPLL_SS1 0x2bcc +#define CLKV(x) ((x) << 0) +#define CLKV_MASK (0x3ffffff << 0) +#define MPLL_SS2 0x2bd0 +#define CLKS(x) ((x) << 0) +#define CLKS_MASK (0xfff << 0) + #define HDP_HOST_PATH_CNTL 0x2C00 #define CLOCK_GATING_DIS (1 << 23) #define HDP_NONSURFACE_BASE 0x2C04 @@ -465,6 +700,22 @@ #define ATC_MISC_CG 0x3350 +#define MC_SEQ_CNTL_3 0x3600 +# define CAC_EN (1 << 31) +#define MC_SEQ_G5PDX_CTRL 0x3604 +#define MC_SEQ_G5PDX_CTRL_LP 0x3608 +#define MC_SEQ_G5PDX_CMD0 0x360c +#define MC_SEQ_G5PDX_CMD0_LP 0x3610 +#define MC_SEQ_G5PDX_CMD1 0x3614 +#define MC_SEQ_G5PDX_CMD1_LP 0x3618 + +#define MC_SEQ_PMG_DVS_CTL 0x3628 +#define MC_SEQ_PMG_DVS_CTL_LP 0x362c +#define MC_SEQ_PMG_DVS_CMD 0x3630 +#define MC_SEQ_PMG_DVS_CMD_LP 0x3634 +#define MC_SEQ_DLL_STBY 0x3638 +#define MC_SEQ_DLL_STBY_LP 0x363c + #define IH_RB_CNTL 0x3e00 # define IH_RB_ENABLE (1 << 0) # define IH_RB_SIZE(x) ((x) << 1) /* log2 */ @@ -492,6 +743,9 @@ # define MC_WR_CLEAN_CNT(x) ((x) << 20) # define MC_VMID(x) ((x) << 25) +#define BIF_LNCNT_RESET 0x5220 +# define RESET_LNCNT_EN (1 << 0) + #define CONFIG_MEMSIZE 0x5428 #define INTERRUPT_CNTL 0x5468 @@ -628,6 +882,9 @@ # define DC_HPDx_RX_INT_TIMER(x) ((x) << 16) # define DC_HPDx_EN (1 << 28) +#define DPG_PIPE_STUTTER_CONTROL 0x6cd4 +# define STUTTER_ENABLE (1 << 0) + #define GRBM_CNTL 0x8000 #define GRBM_READ_TIMEOUT(x) ((x) << 0) diff --git a/drivers/gpu/drm/radeon/ppsmc.h b/drivers/gpu/drm/radeon/ppsmc.h index 6db6e320bc79..4c1ee6df09a0 100644 --- a/drivers/gpu/drm/radeon/ppsmc.h +++ b/drivers/gpu/drm/radeon/ppsmc.h @@ -99,7 +99,7 @@ typedef uint8_t PPSMC_Result; #define PPSMC_MSG_ThrottleOVRDSCLKDS ((uint8_t)0x96) #define PPSMC_MSG_CancelThrottleOVRDSCLKDS ((uint8_t)0x97) -/* KV/KB */ +/* CI/KV/KB */ #define PPSMC_MSG_UVDDPM_SetEnabledMask ((uint16_t) 0x12D) #define PPSMC_MSG_VCEDPM_SetEnabledMask ((uint16_t) 0x12E) #define PPSMC_MSG_ACPDPM_SetEnabledMask ((uint16_t) 0x12F) @@ -108,6 +108,7 @@ typedef uint8_t PPSMC_Result; #define PPSMC_MSG_MCLKDPM_NoForcedLevel ((uint16_t) 0x132) #define PPSMC_MSG_Voltage_Cntl_Disable ((uint16_t) 0x135) #define PPSMC_MSG_PCIeDPM_Enable ((uint16_t) 0x136) +#define PPSMC_MSG_PCIeDPM_Disable ((uint16_t) 0x13d) #define PPSMC_MSG_ACPPowerOFF ((uint16_t) 0x137) #define PPSMC_MSG_ACPPowerON ((uint16_t) 0x138) #define PPSMC_MSG_SAMPowerOFF ((uint16_t) 0x139) @@ -116,8 +117,13 @@ typedef uint8_t PPSMC_Result; #define PPSMC_MSG_NBDPM_Enable ((uint16_t) 0x140) #define PPSMC_MSG_NBDPM_Disable ((uint16_t) 0x141) #define PPSMC_MSG_SCLKDPM_SetEnabledMask ((uint16_t) 0x145) +#define PPSMC_MSG_MCLKDPM_SetEnabledMask ((uint16_t) 0x146) +#define PPSMC_MSG_PCIeDPM_ForceLevel ((uint16_t) 0x147) +#define PPSMC_MSG_EnableVRHotGPIOInterrupt ((uint16_t) 0x14a) #define PPSMC_MSG_DPM_Enable ((uint16_t) 0x14e) #define PPSMC_MSG_DPM_Disable ((uint16_t) 0x14f) +#define PPSMC_MSG_MCLKDPM_Enable ((uint16_t) 0x150) +#define PPSMC_MSG_MCLKDPM_Disable ((uint16_t) 0x151) #define PPSMC_MSG_UVDDPM_Enable ((uint16_t) 0x154) #define PPSMC_MSG_UVDDPM_Disable ((uint16_t) 0x155) #define PPSMC_MSG_SAMUDPM_Enable ((uint16_t) 0x156) @@ -126,9 +132,25 @@ typedef uint8_t PPSMC_Result; #define PPSMC_MSG_ACPDPM_Disable ((uint16_t) 0x159) #define PPSMC_MSG_VCEDPM_Enable ((uint16_t) 0x15a) #define PPSMC_MSG_VCEDPM_Disable ((uint16_t) 0x15b) +#define PPSMC_MSG_VddC_Request ((uint16_t) 0x15f) #define PPSMC_MSG_SCLKDPM_GetEnabledMask ((uint16_t) 0x162) +#define PPSMC_MSG_PCIeDPM_SetEnabledMask ((uint16_t) 0x167) +#define PPSMC_MSG_TDCLimitEnable ((uint16_t) 0x169) +#define PPSMC_MSG_TDCLimitDisable ((uint16_t) 0x16a) +#define PPSMC_MSG_PkgPwrLimitEnable ((uint16_t) 0x185) +#define PPSMC_MSG_PkgPwrLimitDisable ((uint16_t) 0x186) +#define PPSMC_MSG_PkgPwrSetLimit ((uint16_t) 0x187) +#define PPSMC_MSG_OverDriveSetTargetTdp ((uint16_t) 0x188) #define PPSMC_MSG_SCLKDPM_FreezeLevel ((uint16_t) 0x189) #define PPSMC_MSG_SCLKDPM_UnfreezeLevel ((uint16_t) 0x18A) +#define PPSMC_MSG_MCLKDPM_FreezeLevel ((uint16_t) 0x18B) +#define PPSMC_MSG_MCLKDPM_UnfreezeLevel ((uint16_t) 0x18C) +#define PPSMC_MSG_MASTER_DeepSleep_ON ((uint16_t) 0x18F) +#define PPSMC_MSG_MASTER_DeepSleep_OFF ((uint16_t) 0x190) +#define PPSMC_MSG_Remove_DC_Clamp ((uint16_t) 0x191) + +#define PPSMC_MSG_API_GetSclkFrequency ((uint16_t) 0x200) +#define PPSMC_MSG_API_GetMclkFrequency ((uint16_t) 0x201) /* TN */ #define PPSMC_MSG_DPM_Config ((uint32_t) 0x102) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index b6bac497f001..930650ec769c 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -246,6 +246,12 @@ int radeon_atom_get_max_vddc(struct radeon_device *rdev, u8 voltage_type, int radeon_atom_get_leakage_vddc_based_on_leakage_idx(struct radeon_device *rdev, u16 *voltage, u16 leakage_idx); +int radeon_atom_get_leakage_id_from_vbios(struct radeon_device *rdev, + u16 *leakage_id); +int radeon_atom_get_leakage_vddc_based_on_leakage_params(struct radeon_device *rdev, + u16 *vddc, u16 *vddci, + u16 virtual_voltage_id, + u16 vbios_voltage_id); int radeon_atom_round_to_true_voltage(struct radeon_device *rdev, u8 voltage_type, u16 nominal_voltage, diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 9c83ecfd0eb7..c633fa53def0 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -2454,6 +2454,20 @@ static struct radeon_asic ci_asic = { .set_uvd_clocks = &cik_set_uvd_clocks, .get_temperature = &ci_get_temp, }, + .dpm = { + .init = &ci_dpm_init, + .setup_asic = &ci_dpm_setup_asic, + .enable = &ci_dpm_enable, + .disable = &ci_dpm_disable, + .pre_set_power_state = &ci_dpm_pre_set_power_state, + .set_power_state = &ci_dpm_set_power_state, + .post_set_power_state = &ci_dpm_post_set_power_state, + .display_configuration_changed = &ci_dpm_display_configuration_changed, + .fini = &ci_dpm_fini, + .get_sclk = &ci_dpm_get_sclk, + .get_mclk = &ci_dpm_get_mclk, + .print_power_state = &ci_dpm_print_power_state, + }, .pflip = { .pre_page_flip = &evergreen_pre_page_flip, .page_flip = &evergreen_page_flip, diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 68a1a1fb371d..350da1704964 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -750,6 +750,20 @@ void cik_compute_ring_set_wptr(struct radeon_device *rdev, int ci_get_temp(struct radeon_device *rdev); int kv_get_temp(struct radeon_device *rdev); +int ci_dpm_init(struct radeon_device *rdev); +int ci_dpm_enable(struct radeon_device *rdev); +void ci_dpm_disable(struct radeon_device *rdev); +int ci_dpm_pre_set_power_state(struct radeon_device *rdev); +int ci_dpm_set_power_state(struct radeon_device *rdev); +void ci_dpm_post_set_power_state(struct radeon_device *rdev); +void ci_dpm_setup_asic(struct radeon_device *rdev); +void ci_dpm_display_configuration_changed(struct radeon_device *rdev); +void ci_dpm_fini(struct radeon_device *rdev); +u32 ci_dpm_get_sclk(struct radeon_device *rdev, bool low); +u32 ci_dpm_get_mclk(struct radeon_device *rdev, bool low); +void ci_dpm_print_power_state(struct radeon_device *rdev, + struct radeon_ps *ps); + int kv_dpm_init(struct radeon_device *rdev); int kv_dpm_enable(struct radeon_device *rdev); void kv_dpm_disable(struct radeon_device *rdev); diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c index 6247b5e2d074..7ba439e9f30f 100644 --- a/drivers/gpu/drm/radeon/radeon_atombios.c +++ b/drivers/gpu/drm/radeon/radeon_atombios.c @@ -211,7 +211,7 @@ void radeon_atombios_i2c_init(struct radeon_device *rdev) } static struct radeon_gpio_rec radeon_lookup_gpio(struct radeon_device *rdev, - u8 id) + u8 id) { struct atom_context *ctx = rdev->mode_info.atom_context; struct radeon_gpio_rec gpio; diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index 37d3d343f687..66b04af16949 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -1202,6 +1202,7 @@ int radeon_pm_init(struct radeon_device *rdev) case CHIP_VERDE: case CHIP_OLAND: case CHIP_HAINAN: + case CHIP_BONAIRE: case CHIP_KABINI: case CHIP_KAVERI: /* DPM requires the RLC, RV770+ dGPU requires SMC */ diff --git a/drivers/gpu/drm/radeon/radeon_ucode.h b/drivers/gpu/drm/radeon/radeon_ucode.h index fad27c051bbf..33858364fe89 100644 --- a/drivers/gpu/drm/radeon/radeon_ucode.h +++ b/drivers/gpu/drm/radeon/radeon_ucode.h @@ -140,4 +140,7 @@ #define HAINAN_SMC_UCODE_START 0x10000 #define HAINAN_SMC_UCODE_SIZE 0xe67C +#define BONAIRE_SMC_UCODE_START 0x20000 +#define BONAIRE_SMC_UCODE_SIZE 0x1FDEC + #endif diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index 0f8be48c2ef4..96d96f5df9e7 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -3663,7 +3663,7 @@ static void si_clear_vc(struct radeon_device *rdev) WREG32(CG_FTV, 0); } -static u8 si_get_ddr3_mclk_frequency_ratio(u32 memory_clock) +u8 si_get_ddr3_mclk_frequency_ratio(u32 memory_clock) { u8 mc_para_index; @@ -3676,7 +3676,7 @@ static u8 si_get_ddr3_mclk_frequency_ratio(u32 memory_clock) return mc_para_index; } -static u8 si_get_mclk_frequency_ratio(u32 memory_clock, bool strobe_mode) +u8 si_get_mclk_frequency_ratio(u32 memory_clock, bool strobe_mode) { u8 mc_para_index; @@ -3758,9 +3758,9 @@ static bool si_validate_phase_shedding_tables(struct radeon_device *rdev, return true; } -static void si_trim_voltage_table_to_fit_state_table(struct radeon_device *rdev, - u32 max_voltage_steps, - struct atom_voltage_table *voltage_table) +void si_trim_voltage_table_to_fit_state_table(struct radeon_device *rdev, + u32 max_voltage_steps, + struct atom_voltage_table *voltage_table) { unsigned int i, diff; diff --git a/drivers/gpu/drm/radeon/smu7_discrete.h b/drivers/gpu/drm/radeon/smu7_discrete.h new file mode 100644 index 000000000000..82f70c90a9ee --- /dev/null +++ b/drivers/gpu/drm/radeon/smu7_discrete.h @@ -0,0 +1,486 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef SMU7_DISCRETE_H +#define SMU7_DISCRETE_H + +#include "smu7.h" + +#pragma pack(push, 1) + +#define SMU7_DTE_ITERATIONS 5 +#define SMU7_DTE_SOURCES 3 +#define SMU7_DTE_SINKS 1 +#define SMU7_NUM_CPU_TES 0 +#define SMU7_NUM_GPU_TES 1 +#define SMU7_NUM_NON_TES 2 + +struct SMU7_SoftRegisters +{ + uint32_t RefClockFrequency; + uint32_t PmTimerP; + uint32_t FeatureEnables; + uint32_t PreVBlankGap; + uint32_t VBlankTimeout; + uint32_t TrainTimeGap; + + uint32_t MvddSwitchTime; + uint32_t LongestAcpiTrainTime; + uint32_t AcpiDelay; + uint32_t G5TrainTime; + uint32_t DelayMpllPwron; + uint32_t VoltageChangeTimeout; + uint32_t HandshakeDisables; + + uint8_t DisplayPhy1Config; + uint8_t DisplayPhy2Config; + uint8_t DisplayPhy3Config; + uint8_t DisplayPhy4Config; + + uint8_t DisplayPhy5Config; + uint8_t DisplayPhy6Config; + uint8_t DisplayPhy7Config; + uint8_t DisplayPhy8Config; + + uint32_t AverageGraphicsA; + uint32_t AverageMemoryA; + uint32_t AverageGioA; + + uint8_t SClkDpmEnabledLevels; + uint8_t MClkDpmEnabledLevels; + uint8_t LClkDpmEnabledLevels; + uint8_t PCIeDpmEnabledLevels; + + uint8_t UVDDpmEnabledLevels; + uint8_t SAMUDpmEnabledLevels; + uint8_t ACPDpmEnabledLevels; + uint8_t VCEDpmEnabledLevels; + + uint32_t DRAM_LOG_ADDR_H; + uint32_t DRAM_LOG_ADDR_L; + uint32_t DRAM_LOG_PHY_ADDR_H; + uint32_t DRAM_LOG_PHY_ADDR_L; + uint32_t DRAM_LOG_BUFF_SIZE; + uint32_t UlvEnterC; + uint32_t UlvTime; + uint32_t Reserved[3]; + +}; + +typedef struct SMU7_SoftRegisters SMU7_SoftRegisters; + +struct SMU7_Discrete_VoltageLevel +{ + uint16_t Voltage; + uint16_t StdVoltageHiSidd; + uint16_t StdVoltageLoSidd; + uint8_t Smio; + uint8_t padding; +}; + +typedef struct SMU7_Discrete_VoltageLevel SMU7_Discrete_VoltageLevel; + +struct SMU7_Discrete_GraphicsLevel +{ + uint32_t Flags; + uint32_t MinVddc; + uint32_t MinVddcPhases; + + uint32_t SclkFrequency; + + uint8_t padding1[2]; + uint16_t ActivityLevel; + + uint32_t CgSpllFuncCntl3; + uint32_t CgSpllFuncCntl4; + uint32_t SpllSpreadSpectrum; + uint32_t SpllSpreadSpectrum2; + uint32_t CcPwrDynRm; + uint32_t CcPwrDynRm1; + uint8_t SclkDid; + uint8_t DisplayWatermark; + uint8_t EnabledForActivity; + uint8_t EnabledForThrottle; + uint8_t UpH; + uint8_t DownH; + uint8_t VoltageDownH; + uint8_t PowerThrottle; + uint8_t DeepSleepDivId; + uint8_t padding[3]; +}; + +typedef struct SMU7_Discrete_GraphicsLevel SMU7_Discrete_GraphicsLevel; + +struct SMU7_Discrete_ACPILevel +{ + uint32_t Flags; + uint32_t MinVddc; + uint32_t MinVddcPhases; + uint32_t SclkFrequency; + uint8_t SclkDid; + uint8_t DisplayWatermark; + uint8_t DeepSleepDivId; + uint8_t padding; + uint32_t CgSpllFuncCntl; + uint32_t CgSpllFuncCntl2; + uint32_t CgSpllFuncCntl3; + uint32_t CgSpllFuncCntl4; + uint32_t SpllSpreadSpectrum; + uint32_t SpllSpreadSpectrum2; + uint32_t CcPwrDynRm; + uint32_t CcPwrDynRm1; +}; + +typedef struct SMU7_Discrete_ACPILevel SMU7_Discrete_ACPILevel; + +struct SMU7_Discrete_Ulv +{ + uint32_t CcPwrDynRm; + uint32_t CcPwrDynRm1; + uint16_t VddcOffset; + uint8_t VddcOffsetVid; + uint8_t VddcPhase; + uint32_t Reserved; +}; + +typedef struct SMU7_Discrete_Ulv SMU7_Discrete_Ulv; + +struct SMU7_Discrete_MemoryLevel +{ + uint32_t MinVddc; + uint32_t MinVddcPhases; + uint32_t MinVddci; + uint32_t MinMvdd; + + uint32_t MclkFrequency; + + uint8_t EdcReadEnable; + uint8_t EdcWriteEnable; + uint8_t RttEnable; + uint8_t StutterEnable; + + uint8_t StrobeEnable; + uint8_t StrobeRatio; + uint8_t EnabledForThrottle; + uint8_t EnabledForActivity; + + uint8_t UpH; + uint8_t DownH; + uint8_t VoltageDownH; + uint8_t padding; + + uint16_t ActivityLevel; + uint8_t DisplayWatermark; + uint8_t padding1; + + uint32_t MpllFuncCntl; + uint32_t MpllFuncCntl_1; + uint32_t MpllFuncCntl_2; + uint32_t MpllAdFuncCntl; + uint32_t MpllDqFuncCntl; + uint32_t MclkPwrmgtCntl; + uint32_t DllCntl; + uint32_t MpllSs1; + uint32_t MpllSs2; +}; + +typedef struct SMU7_Discrete_MemoryLevel SMU7_Discrete_MemoryLevel; + +struct SMU7_Discrete_LinkLevel +{ + uint8_t PcieGenSpeed; + uint8_t PcieLaneCount; + uint8_t EnabledForActivity; + uint8_t Padding; + uint32_t DownT; + uint32_t UpT; + uint32_t Reserved; +}; + +typedef struct SMU7_Discrete_LinkLevel SMU7_Discrete_LinkLevel; + + +struct SMU7_Discrete_MCArbDramTimingTableEntry +{ + uint32_t McArbDramTiming; + uint32_t McArbDramTiming2; + uint8_t McArbBurstTime; + uint8_t padding[3]; +}; + +typedef struct SMU7_Discrete_MCArbDramTimingTableEntry SMU7_Discrete_MCArbDramTimingTableEntry; + +struct SMU7_Discrete_MCArbDramTimingTable +{ + SMU7_Discrete_MCArbDramTimingTableEntry entries[SMU__NUM_SCLK_DPM_STATE][SMU__NUM_MCLK_DPM_LEVELS]; +}; + +typedef struct SMU7_Discrete_MCArbDramTimingTable SMU7_Discrete_MCArbDramTimingTable; + +struct SMU7_Discrete_UvdLevel +{ + uint32_t VclkFrequency; + uint32_t DclkFrequency; + uint16_t MinVddc; + uint8_t MinVddcPhases; + uint8_t VclkDivider; + uint8_t DclkDivider; + uint8_t padding[3]; +}; + +typedef struct SMU7_Discrete_UvdLevel SMU7_Discrete_UvdLevel; + +struct SMU7_Discrete_ExtClkLevel +{ + uint32_t Frequency; + uint16_t MinVoltage; + uint8_t MinPhases; + uint8_t Divider; +}; + +typedef struct SMU7_Discrete_ExtClkLevel SMU7_Discrete_ExtClkLevel; + +struct SMU7_Discrete_StateInfo +{ + uint32_t SclkFrequency; + uint32_t MclkFrequency; + uint32_t VclkFrequency; + uint32_t DclkFrequency; + uint32_t SamclkFrequency; + uint32_t AclkFrequency; + uint32_t EclkFrequency; + uint16_t MvddVoltage; + uint16_t padding16; + uint8_t DisplayWatermark; + uint8_t McArbIndex; + uint8_t McRegIndex; + uint8_t SeqIndex; + uint8_t SclkDid; + int8_t SclkIndex; + int8_t MclkIndex; + uint8_t PCIeGen; + +}; + +typedef struct SMU7_Discrete_StateInfo SMU7_Discrete_StateInfo; + + +struct SMU7_Discrete_DpmTable +{ + SMU7_PIDController GraphicsPIDController; + SMU7_PIDController MemoryPIDController; + SMU7_PIDController LinkPIDController; + + uint32_t SystemFlags; + + + uint32_t SmioMaskVddcVid; + uint32_t SmioMaskVddcPhase; + uint32_t SmioMaskVddciVid; + uint32_t SmioMaskMvddVid; + + uint32_t VddcLevelCount; + uint32_t VddciLevelCount; + uint32_t MvddLevelCount; + + SMU7_Discrete_VoltageLevel VddcLevel [SMU7_MAX_LEVELS_VDDC]; +// SMU7_Discrete_VoltageLevel VddcStandardReference [SMU7_MAX_LEVELS_VDDC]; + SMU7_Discrete_VoltageLevel VddciLevel [SMU7_MAX_LEVELS_VDDCI]; + SMU7_Discrete_VoltageLevel MvddLevel [SMU7_MAX_LEVELS_MVDD]; + + uint8_t GraphicsDpmLevelCount; + uint8_t MemoryDpmLevelCount; + uint8_t LinkLevelCount; + uint8_t UvdLevelCount; + uint8_t VceLevelCount; + uint8_t AcpLevelCount; + uint8_t SamuLevelCount; + uint8_t MasterDeepSleepControl; + uint32_t Reserved[5]; +// uint32_t SamuDefaultLevel; + + SMU7_Discrete_GraphicsLevel GraphicsLevel [SMU7_MAX_LEVELS_GRAPHICS]; + SMU7_Discrete_MemoryLevel MemoryACPILevel; + SMU7_Discrete_MemoryLevel MemoryLevel [SMU7_MAX_LEVELS_MEMORY]; + SMU7_Discrete_LinkLevel LinkLevel [SMU7_MAX_LEVELS_LINK]; + SMU7_Discrete_ACPILevel ACPILevel; + SMU7_Discrete_UvdLevel UvdLevel [SMU7_MAX_LEVELS_UVD]; + SMU7_Discrete_ExtClkLevel VceLevel [SMU7_MAX_LEVELS_VCE]; + SMU7_Discrete_ExtClkLevel AcpLevel [SMU7_MAX_LEVELS_ACP]; + SMU7_Discrete_ExtClkLevel SamuLevel [SMU7_MAX_LEVELS_SAMU]; + SMU7_Discrete_Ulv Ulv; + + uint32_t SclkStepSize; + uint32_t Smio [SMU7_MAX_ENTRIES_SMIO]; + + uint8_t UvdBootLevel; + uint8_t VceBootLevel; + uint8_t AcpBootLevel; + uint8_t SamuBootLevel; + + uint8_t UVDInterval; + uint8_t VCEInterval; + uint8_t ACPInterval; + uint8_t SAMUInterval; + + uint8_t GraphicsBootLevel; + uint8_t GraphicsVoltageChangeEnable; + uint8_t GraphicsThermThrottleEnable; + uint8_t GraphicsInterval; + + uint8_t VoltageInterval; + uint8_t ThermalInterval; + uint16_t TemperatureLimitHigh; + + uint16_t TemperatureLimitLow; + uint8_t MemoryBootLevel; + uint8_t MemoryVoltageChangeEnable; + + uint8_t MemoryInterval; + uint8_t MemoryThermThrottleEnable; + uint16_t VddcVddciDelta; + + uint16_t VoltageResponseTime; + uint16_t PhaseResponseTime; + + uint8_t PCIeBootLinkLevel; + uint8_t PCIeGenInterval; + uint8_t DTEInterval; + uint8_t DTEMode; + + uint8_t SVI2Enable; + uint8_t VRHotGpio; + uint8_t AcDcGpio; + uint8_t ThermGpio; + + uint16_t PPM_PkgPwrLimit; + uint16_t PPM_TemperatureLimit; + + uint16_t DefaultTdp; + uint16_t TargetTdp; + + uint16_t FpsHighT; + uint16_t FpsLowT; + + uint16_t BAPMTI_R [SMU7_DTE_ITERATIONS][SMU7_DTE_SOURCES][SMU7_DTE_SINKS]; + uint16_t BAPMTI_RC [SMU7_DTE_ITERATIONS][SMU7_DTE_SOURCES][SMU7_DTE_SINKS]; + + uint8_t DTEAmbientTempBase; + uint8_t DTETjOffset; + uint8_t GpuTjMax; + uint8_t GpuTjHyst; + + uint16_t BootVddc; + uint16_t BootVddci; + + uint16_t BootMVdd; + uint16_t padding; + + uint32_t BAPM_TEMP_GRADIENT; + + uint32_t LowSclkInterruptT; +}; + +typedef struct SMU7_Discrete_DpmTable SMU7_Discrete_DpmTable; + +#define SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE 16 +#define SMU7_DISCRETE_MC_REGISTER_ARRAY_SET_COUNT SMU7_MAX_LEVELS_MEMORY + +struct SMU7_Discrete_MCRegisterAddress +{ + uint16_t s0; + uint16_t s1; +}; + +typedef struct SMU7_Discrete_MCRegisterAddress SMU7_Discrete_MCRegisterAddress; + +struct SMU7_Discrete_MCRegisterSet +{ + uint32_t value[SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE]; +}; + +typedef struct SMU7_Discrete_MCRegisterSet SMU7_Discrete_MCRegisterSet; + +struct SMU7_Discrete_MCRegisters +{ + uint8_t last; + uint8_t reserved[3]; + SMU7_Discrete_MCRegisterAddress address[SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE]; + SMU7_Discrete_MCRegisterSet data[SMU7_DISCRETE_MC_REGISTER_ARRAY_SET_COUNT]; +}; + +typedef struct SMU7_Discrete_MCRegisters SMU7_Discrete_MCRegisters; + +struct SMU7_Discrete_PmFuses { + // dw0-dw1 + uint8_t BapmVddCVidHiSidd[8]; + + // dw2-dw3 + uint8_t BapmVddCVidLoSidd[8]; + + // dw4-dw5 + uint8_t VddCVid[8]; + + // dw6 + uint8_t SviLoadLineEn; + uint8_t SviLoadLineVddC; + uint8_t SviLoadLineTrimVddC; + uint8_t SviLoadLineOffsetVddC; + + // dw7 + uint16_t TDC_VDDC_PkgLimit; + uint8_t TDC_VDDC_ThrottleReleaseLimitPerc; + uint8_t TDC_MAWt; + + // dw8 + uint8_t TdcWaterfallCtl; + uint8_t LPMLTemperatureMin; + uint8_t LPMLTemperatureMax; + uint8_t Reserved; + + // dw9-dw10 + uint8_t BapmVddCVidHiSidd2[8]; + + // dw11-dw12 + uint32_t Reserved6[2]; + + // dw13-dw16 + uint8_t GnbLPML[16]; + + // dw17 + uint8_t GnbLPMLMaxVid; + uint8_t GnbLPMLMinVid; + uint8_t Reserved1[2]; + + // dw18 + uint16_t BapmVddCBaseLeakageHiSidd; + uint16_t BapmVddCBaseLeakageLoSidd; +}; + +typedef struct SMU7_Discrete_PmFuses SMU7_Discrete_PmFuses; + + +#pragma pack(pop) + +#endif + -- cgit v1.2.3 From 5e884f606cdba9c599c9c9373808f272ae794088 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 6 Aug 2013 11:39:38 -0400 Subject: drm/radeon: restructure UVD code to handle UVD PG (v2) When we PG (powergate) UVD, we need to re-initialize it before we can use it again. v2: rebase on UVD stop fixes Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/cik.c | 14 +++---- drivers/gpu/drm/radeon/evergreen.c | 2 +- drivers/gpu/drm/radeon/ni.c | 2 +- drivers/gpu/drm/radeon/r600.c | 74 +++++++++++++++++++----------------- drivers/gpu/drm/radeon/radeon_asic.h | 4 +- drivers/gpu/drm/radeon/rv770.c | 2 +- drivers/gpu/drm/radeon/si.c | 2 +- 7 files changed, 50 insertions(+), 50 deletions(-) (limited to 'drivers/gpu/drm/radeon/cik.c') diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 736a416b51a7..59b866aa08d9 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -69,6 +69,7 @@ static void cik_pcie_gen3_enable(struct radeon_device *rdev); static void cik_program_aspm(struct radeon_device *rdev); static void cik_init_pg(struct radeon_device *rdev); static void cik_init_cg(struct radeon_device *rdev); +static void cik_uvd_resume(struct radeon_device *rdev); /* get temperature in millidegrees */ int ci_get_temp(struct radeon_device *rdev) @@ -7619,8 +7620,9 @@ static int cik_startup(struct radeon_device *rdev) return r; } - r = cik_uvd_resume(rdev); + r = radeon_uvd_resume(rdev); if (!r) { + cik_uvd_resume(rdev); r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX); if (r) @@ -7708,7 +7710,7 @@ static int cik_startup(struct radeon_device *rdev) UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, 0, 0xfffff, RADEON_CP_PACKET2); if (!r) - r = r600_uvd_init(rdev); + r = r600_uvd_init(rdev, true); if (r) DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); } @@ -8598,15 +8600,10 @@ int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk) return r; } -int cik_uvd_resume(struct radeon_device *rdev) +static void cik_uvd_resume(struct radeon_device *rdev) { uint64_t addr; uint32_t size; - int r; - - r = radeon_uvd_resume(rdev); - if (r) - return r; /* programm the VCPU memory controller bits 0-27 */ addr = rdev->uvd.gpu_addr >> 3; @@ -8632,7 +8629,6 @@ int cik_uvd_resume(struct radeon_device *rdev) addr = (rdev->uvd.gpu_addr >> 32) & 0xFF; WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31)); - return 0; } static void cik_pcie_gen3_enable(struct radeon_device *rdev) diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 2ce12ee3e67f..710c1d4ae5db 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -5296,7 +5296,7 @@ static int evergreen_startup(struct radeon_device *rdev) UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, 0, 0xfffff, RADEON_CP_PACKET2); if (!r) - r = r600_uvd_init(rdev); + r = r600_uvd_init(rdev, true); if (r) DRM_ERROR("radeon: error initializing UVD (%d).\n", r); diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 21f2eceff2c6..bc298a3500a4 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -2230,7 +2230,7 @@ static int cayman_startup(struct radeon_device *rdev) UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, 0, 0xfffff, RADEON_CP_PACKET2); if (!r) - r = r600_uvd_init(rdev); + r = r600_uvd_init(rdev, true); if (r) DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); } diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 3db2e4ddb2d6..8a600153ef6c 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -2623,7 +2623,7 @@ void r600_dma_fini(struct radeon_device *rdev) /* * UVD */ -int r600_uvd_rbc_start(struct radeon_device *rdev) +static int r600_uvd_rbc_start(struct radeon_device *rdev, bool ring_test) { struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; uint64_t rptr_addr; @@ -2664,47 +2664,47 @@ int r600_uvd_rbc_start(struct radeon_device *rdev) rb_bufsz = (0x1 << 8) | rb_bufsz; WREG32(UVD_RBC_RB_CNTL, rb_bufsz); - ring->ready = true; - r = radeon_ring_test(rdev, R600_RING_TYPE_UVD_INDEX, ring); - if (r) { - ring->ready = false; - return r; - } + if (ring_test) { + ring->ready = true; + r = radeon_ring_test(rdev, R600_RING_TYPE_UVD_INDEX, ring); + if (r) { + ring->ready = false; + return r; + } - r = radeon_ring_lock(rdev, ring, 10); - if (r) { - DRM_ERROR("radeon: ring failed to lock UVD ring (%d).\n", r); - return r; - } + r = radeon_ring_lock(rdev, ring, 10); + if (r) { + DRM_ERROR("radeon: ring failed to lock UVD ring (%d).\n", r); + return r; + } - tmp = PACKET0(UVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL, 0); - radeon_ring_write(ring, tmp); - radeon_ring_write(ring, 0xFFFFF); + tmp = PACKET0(UVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL, 0); + radeon_ring_write(ring, tmp); + radeon_ring_write(ring, 0xFFFFF); - tmp = PACKET0(UVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL, 0); - radeon_ring_write(ring, tmp); - radeon_ring_write(ring, 0xFFFFF); + tmp = PACKET0(UVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL, 0); + radeon_ring_write(ring, tmp); + radeon_ring_write(ring, 0xFFFFF); - tmp = PACKET0(UVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL, 0); - radeon_ring_write(ring, tmp); - radeon_ring_write(ring, 0xFFFFF); + tmp = PACKET0(UVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL, 0); + radeon_ring_write(ring, tmp); + radeon_ring_write(ring, 0xFFFFF); - /* Clear timeout status bits */ - radeon_ring_write(ring, PACKET0(UVD_SEMA_TIMEOUT_STATUS, 0)); - radeon_ring_write(ring, 0x8); + /* Clear timeout status bits */ + radeon_ring_write(ring, PACKET0(UVD_SEMA_TIMEOUT_STATUS, 0)); + radeon_ring_write(ring, 0x8); - radeon_ring_write(ring, PACKET0(UVD_SEMA_CNTL, 0)); - radeon_ring_write(ring, 3); + radeon_ring_write(ring, PACKET0(UVD_SEMA_CNTL, 0)); + radeon_ring_write(ring, 3); - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring); + } return 0; } -void r600_uvd_stop(struct radeon_device *rdev) +void r600_do_uvd_stop(struct radeon_device *rdev) { - struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; - /* force RBC into idle state */ WREG32(UVD_RBC_RB_CNTL, 0x11010101); @@ -2723,11 +2723,17 @@ void r600_uvd_stop(struct radeon_device *rdev) /* Unstall UMC and register bus */ WREG32_P(UVD_LMI_CTRL2, 0, ~(1 << 8)); WREG32_P(UVD_RB_ARB_CTRL, 0, ~(1 << 3)); +} +void r600_uvd_stop(struct radeon_device *rdev) +{ + struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; + + r600_do_uvd_stop(rdev); ring->ready = false; } -int r600_uvd_init(struct radeon_device *rdev) +int r600_uvd_init(struct radeon_device *rdev, bool ring_test) { int i, j, r; /* disable byte swapping */ @@ -2815,17 +2821,17 @@ int r600_uvd_init(struct radeon_device *rdev) if (r) { DRM_ERROR("UVD not responding, giving up!!!\n"); - radeon_set_uvd_clocks(rdev, 0, 0); - return r; + goto done; } /* enable interupt */ WREG32_P(UVD_MASTINT_EN, 3<<1, ~(3 << 1)); - r = r600_uvd_rbc_start(rdev); + r = r600_uvd_rbc_start(rdev, ring_test); if (!r) DRM_INFO("UVD initialized successfully.\n"); +done: /* lower clocks again */ radeon_set_uvd_clocks(rdev, 0, 0); diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 1e386c48ae2d..3570817a5847 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -424,8 +424,7 @@ void rs780_dpm_debugfs_print_current_performance_level(struct radeon_device *rde struct seq_file *m); /* uvd */ -int r600_uvd_init(struct radeon_device *rdev); -int r600_uvd_rbc_start(struct radeon_device *rdev); +int r600_uvd_init(struct radeon_device *rdev, bool ring_test); void r600_uvd_stop(struct radeon_device *rdev); int r600_uvd_ib_test(struct radeon_device *rdev, struct radeon_ring *ring); void r600_uvd_fence_emit(struct radeon_device *rdev, @@ -696,7 +695,6 @@ u32 cik_get_xclk(struct radeon_device *rdev); uint32_t cik_pciep_rreg(struct radeon_device *rdev, uint32_t reg); void cik_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v); int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk); -int cik_uvd_resume(struct radeon_device *rdev); void cik_sdma_fence_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence); void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev, diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index 95590bd07afb..52253b2ab0d5 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -1928,7 +1928,7 @@ static int rv770_startup(struct radeon_device *rdev) UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, 0, 0xfffff, RADEON_CP_PACKET2); if (!r) - r = r600_uvd_init(rdev); + r = r600_uvd_init(rdev, true); if (r) DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 4f91e1f4d814..da23ce8f4388 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -6421,7 +6421,7 @@ static int si_startup(struct radeon_device *rdev) UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, 0, 0xfffff, RADEON_CP_PACKET2); if (!r) - r = r600_uvd_init(rdev); + r = r600_uvd_init(rdev, true); if (r) DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); } -- cgit v1.2.3 From 77df508a98834d8e2fe4c7c4e1089a1ce66ccaa1 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 9 Aug 2013 10:02:40 -0400 Subject: drm/radeon/dpm: implement UVD powergating for KB/KV Powergate the UVD block when not in use to save power. Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/cik.c | 4 ++-- drivers/gpu/drm/radeon/kv_dpm.c | 22 +++++++++++++++------- drivers/gpu/drm/radeon/radeon_asic.c | 1 + drivers/gpu/drm/radeon/radeon_asic.h | 1 + 4 files changed, 19 insertions(+), 9 deletions(-) (limited to 'drivers/gpu/drm/radeon/cik.c') diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 59b866aa08d9..e661aec734b2 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -69,7 +69,7 @@ static void cik_pcie_gen3_enable(struct radeon_device *rdev); static void cik_program_aspm(struct radeon_device *rdev); static void cik_init_pg(struct radeon_device *rdev); static void cik_init_cg(struct radeon_device *rdev); -static void cik_uvd_resume(struct radeon_device *rdev); +void cik_uvd_resume(struct radeon_device *rdev); /* get temperature in millidegrees */ int ci_get_temp(struct radeon_device *rdev) @@ -8600,7 +8600,7 @@ int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk) return r; } -static void cik_uvd_resume(struct radeon_device *rdev) +void cik_uvd_resume(struct radeon_device *rdev) { uint64_t addr; uint32_t size; diff --git a/drivers/gpu/drm/radeon/kv_dpm.c b/drivers/gpu/drm/radeon/kv_dpm.c index c26c4e3005e7..3e232a4d3f4c 100644 --- a/drivers/gpu/drm/radeon/kv_dpm.c +++ b/drivers/gpu/drm/radeon/kv_dpm.c @@ -49,7 +49,7 @@ static int kv_set_thermal_temperature_range(struct radeon_device *rdev, int min_temp, int max_temp); static int kv_init_fps_limits(struct radeon_device *rdev); -static void kv_dpm_powergate_uvd(struct radeon_device *rdev, bool gate); +void kv_dpm_powergate_uvd(struct radeon_device *rdev, bool gate); static void kv_dpm_powergate_vce(struct radeon_device *rdev, bool gate); static void kv_dpm_powergate_samu(struct radeon_device *rdev, bool gate); static void kv_dpm_powergate_acp(struct radeon_device *rdev, bool gate); @@ -59,6 +59,10 @@ extern void cik_exit_rlc_safe_mode(struct radeon_device *rdev); extern void cik_update_cg(struct radeon_device *rdev, u32 block, bool enable); +extern void cik_uvd_resume(struct radeon_device *rdev); +extern int r600_uvd_init(struct radeon_device *rdev, bool ring_test); +extern void r600_do_uvd_stop(struct radeon_device *rdev); + static const struct kv_lcac_config_values sx_local_cac_cfg_kv[] = { { 0, 4, 1 }, @@ -1201,6 +1205,7 @@ int kv_dpm_enable(struct radeon_device *rdev) kv_dpm_powergate_acp(rdev, true); kv_dpm_powergate_samu(rdev, true); kv_dpm_powergate_vce(rdev, true); + kv_dpm_powergate_uvd(rdev, true); kv_update_current_ps(rdev, rdev->pm.dpm.boot_ps); @@ -1458,7 +1463,7 @@ static int kv_update_acp_dpm(struct radeon_device *rdev, bool gate) return kv_enable_acp_dpm(rdev, !gate); } -static void kv_dpm_powergate_uvd(struct radeon_device *rdev, bool gate) +void kv_dpm_powergate_uvd(struct radeon_device *rdev, bool gate) { struct kv_power_info *pi = kv_get_pi(rdev); @@ -1468,13 +1473,18 @@ static void kv_dpm_powergate_uvd(struct radeon_device *rdev, bool gate) pi->uvd_power_gated = gate; if (gate) { - kv_update_uvd_dpm(rdev, true); + r600_do_uvd_stop(rdev); + cik_update_cg(rdev, RADEON_CG_BLOCK_UVD, false); + kv_update_uvd_dpm(rdev, gate); if (pi->caps_uvd_pg) kv_notify_message_to_smu(rdev, PPSMC_MSG_UVDPowerOFF); } else { if (pi->caps_uvd_pg) kv_notify_message_to_smu(rdev, PPSMC_MSG_UVDPowerON); - kv_update_uvd_dpm(rdev, false); + cik_uvd_resume(rdev); + r600_uvd_init(rdev, false); + cik_update_cg(rdev, RADEON_CG_BLOCK_UVD, true); + kv_update_uvd_dpm(rdev, gate); } } @@ -1714,7 +1724,6 @@ int kv_dpm_set_power_state(struct radeon_device *rdev) return ret; } #endif - kv_update_uvd_dpm(rdev, false); kv_update_sclk_t(rdev); } } else { @@ -1740,7 +1749,6 @@ int kv_dpm_set_power_state(struct radeon_device *rdev) return ret; } #endif - kv_update_uvd_dpm(rdev, false); kv_update_sclk_t(rdev); kv_enable_nb_dpm(rdev); } @@ -2502,7 +2510,7 @@ int kv_dpm_init(struct radeon_device *rdev) pi->voltage_drop_t = 0; pi->caps_sclk_throttle_low_notification = false; pi->caps_fps = false; /* true? */ - pi->caps_uvd_pg = false; /* XXX */ + pi->caps_uvd_pg = true; pi->caps_uvd_dpm = true; pi->caps_vce_pg = false; pi->caps_samu_pg = false; diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 1dad80c0e17f..63b6aae66236 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -2642,6 +2642,7 @@ static struct radeon_asic kv_asic = { .print_power_state = &kv_dpm_print_power_state, .debugfs_print_current_performance_level = &kv_dpm_debugfs_print_current_performance_level, .force_performance_level = &kv_dpm_force_performance_level, + .powergate_uvd = &kv_dpm_powergate_uvd, }, .pflip = { .pre_page_flip = &evergreen_pre_page_flip, diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 3570817a5847..9060757e4dc1 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -784,5 +784,6 @@ void kv_dpm_debugfs_print_current_performance_level(struct radeon_device *rdev, struct seq_file *m); int kv_dpm_force_performance_level(struct radeon_device *rdev, enum radeon_dpm_forced_level level); +void kv_dpm_powergate_uvd(struct radeon_device *rdev, bool gate); #endif -- cgit v1.2.3 From 02c9f7fa4e7230fc4ae8bf26f64e45aa76011f9c Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 13 Aug 2013 11:56:51 +0200 Subject: drm/radeon: rework UVD writeback & [rw]ptr handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hardware just doesn't support this correctly. Disable it before we accidentally write anywhere we shouldn't. Signed-off-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/cik.c | 3 +-- drivers/gpu/drm/radeon/evergreen.c | 3 +-- drivers/gpu/drm/radeon/ni.c | 3 +-- drivers/gpu/drm/radeon/r600.c | 33 ++++++++++++++++++++------------- drivers/gpu/drm/radeon/radeon.h | 1 - drivers/gpu/drm/radeon/radeon_asic.c | 12 ++++++------ drivers/gpu/drm/radeon/radeon_asic.h | 6 ++++++ drivers/gpu/drm/radeon/radeon_ring.c | 2 +- drivers/gpu/drm/radeon/rv770.c | 3 +-- drivers/gpu/drm/radeon/si.c | 3 +-- 10 files changed, 38 insertions(+), 31 deletions(-) (limited to 'drivers/gpu/drm/radeon/cik.c') diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index e661aec734b2..ce7036ae9f5a 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -7705,8 +7705,7 @@ static int cik_startup(struct radeon_device *rdev) ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; if (ring->ring_size) { - r = radeon_ring_init(rdev, ring, ring->ring_size, - R600_WB_UVD_RPTR_OFFSET, + r = radeon_ring_init(rdev, ring, ring->ring_size, 0, UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, 0, 0xfffff, RADEON_CP_PACKET2); if (!r) diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 710c1d4ae5db..2139f6c64341 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -5291,8 +5291,7 @@ static int evergreen_startup(struct radeon_device *rdev) ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; if (ring->ring_size) { - r = radeon_ring_init(rdev, ring, ring->ring_size, - R600_WB_UVD_RPTR_OFFSET, + r = radeon_ring_init(rdev, ring, ring->ring_size, 0, UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, 0, 0xfffff, RADEON_CP_PACKET2); if (!r) diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index bc298a3500a4..f543f4ca4dda 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -2225,8 +2225,7 @@ static int cayman_startup(struct radeon_device *rdev) ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; if (ring->ring_size) { - r = radeon_ring_init(rdev, ring, ring->ring_size, - R600_WB_UVD_RPTR_OFFSET, + r = radeon_ring_init(rdev, ring, ring->ring_size, 0, UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, 0, 0xfffff, RADEON_CP_PACKET2); if (!r) diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 8a600153ef6c..c1b0aba4431a 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -2623,31 +2623,38 @@ void r600_dma_fini(struct radeon_device *rdev) /* * UVD */ +uint32_t r600_uvd_get_rptr(struct radeon_device *rdev, + struct radeon_ring *ring) +{ + return RREG32(UVD_RBC_RB_RPTR); +} + +uint32_t r600_uvd_get_wptr(struct radeon_device *rdev, + struct radeon_ring *ring) +{ + return RREG32(UVD_RBC_RB_WPTR); +} + +void r600_uvd_set_wptr(struct radeon_device *rdev, + struct radeon_ring *ring) +{ + WREG32(UVD_RBC_RB_WPTR, ring->wptr); +} + static int r600_uvd_rbc_start(struct radeon_device *rdev, bool ring_test) { struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; - uint64_t rptr_addr; uint32_t rb_bufsz, tmp; int r; - rptr_addr = rdev->wb.gpu_addr + R600_WB_UVD_RPTR_OFFSET; - - if (upper_32_bits(rptr_addr) != upper_32_bits(ring->gpu_addr)) { - DRM_ERROR("UVD ring and rptr not in the same 4GB segment!\n"); - return -EINVAL; - } - /* force RBC into idle state */ WREG32(UVD_RBC_RB_CNTL, 0x11010101); /* Set the write pointer delay */ WREG32(UVD_RBC_RB_WPTR_CNTL, 0); - /* set the wb address */ - WREG32(UVD_RBC_RB_RPTR_ADDR, rptr_addr >> 2); - /* programm the 4GB memory segment for rptr and ring buffer */ - WREG32(UVD_LMI_EXT40_ADDR, upper_32_bits(rptr_addr) | + WREG32(UVD_LMI_EXT40_ADDR, upper_32_bits(ring->gpu_addr) | (0x7 << 16) | (0x1 << 31)); /* Initialize the ring buffer's read and write pointers */ @@ -2662,7 +2669,7 @@ static int r600_uvd_rbc_start(struct radeon_device *rdev, bool ring_test) /* Set ring buffer size */ rb_bufsz = drm_order(ring->ring_size); rb_bufsz = (0x1 << 8) | rb_bufsz; - WREG32(UVD_RBC_RB_CNTL, rb_bufsz); + WREG32_P(UVD_RBC_RB_CNTL, rb_bufsz, ~0x11f1f); if (ring_test) { ring->ready = true; diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index b26a20fe2859..2eab174bf22e 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1027,7 +1027,6 @@ struct radeon_wb { #define R600_WB_DMA_RPTR_OFFSET 1792 #define R600_WB_IH_WPTR_OFFSET 2048 #define CAYMAN_WB_DMA1_RPTR_OFFSET 2304 -#define R600_WB_UVD_RPTR_OFFSET 2560 #define R600_WB_EVENT_OFFSET 3072 #define CIK_WB_CP1_WPTR_OFFSET 3328 #define CIK_WB_CP2_WPTR_OFFSET 3584 diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 012fe7218c74..7432247a812a 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -1157,9 +1157,9 @@ static struct radeon_asic_ring rv770_uvd_ring = { .ring_test = &r600_uvd_ring_test, .ib_test = &r600_uvd_ib_test, .is_lockup = &radeon_ring_test_lockup, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, + .get_rptr = &r600_uvd_get_rptr, + .get_wptr = &r600_uvd_get_wptr, + .set_wptr = &r600_uvd_set_wptr, }; static struct radeon_asic rv770_asic = { @@ -1593,9 +1593,9 @@ static struct radeon_asic_ring cayman_uvd_ring = { .ring_test = &r600_uvd_ring_test, .ib_test = &r600_uvd_ib_test, .is_lockup = &radeon_ring_test_lockup, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, + .get_rptr = &r600_uvd_get_rptr, + .get_wptr = &r600_uvd_get_wptr, + .set_wptr = &r600_uvd_set_wptr, }; static struct radeon_asic cayman_asic = { diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 5630291c4b06..37baf9c696f0 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -424,6 +424,12 @@ void rs780_dpm_debugfs_print_current_performance_level(struct radeon_device *rde struct seq_file *m); /* uvd */ +uint32_t r600_uvd_get_rptr(struct radeon_device *rdev, + struct radeon_ring *ring); +uint32_t r600_uvd_get_wptr(struct radeon_device *rdev, + struct radeon_ring *ring); +void r600_uvd_set_wptr(struct radeon_device *rdev, + struct radeon_ring *ring); int r600_uvd_init(struct radeon_device *rdev, bool ring_test); void r600_uvd_stop(struct radeon_device *rdev); int r600_uvd_ib_test(struct radeon_device *rdev, struct radeon_ring *ring); diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index fb5ea6208970..cb4b931d8d9f 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -363,7 +363,7 @@ u32 radeon_ring_generic_get_rptr(struct radeon_device *rdev, { u32 rptr; - if (rdev->wb.enabled && ring != &rdev->ring[R600_RING_TYPE_UVD_INDEX]) + if (rdev->wb.enabled) rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]); else rptr = RREG32(ring->rptr_reg); diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index 52253b2ab0d5..1e8cf49d5871 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -1923,8 +1923,7 @@ static int rv770_startup(struct radeon_device *rdev) ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; if (ring->ring_size) { - r = radeon_ring_init(rdev, ring, ring->ring_size, - R600_WB_UVD_RPTR_OFFSET, + r = radeon_ring_init(rdev, ring, ring->ring_size, 0, UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, 0, 0xfffff, RADEON_CP_PACKET2); if (!r) diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index da23ce8f4388..4ff59c8f508f 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -6416,8 +6416,7 @@ static int si_startup(struct radeon_device *rdev) if (rdev->has_uvd) { ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; if (ring->ring_size) { - r = radeon_ring_init(rdev, ring, ring->ring_size, - R600_WB_UVD_RPTR_OFFSET, + r = radeon_ring_init(rdev, ring, ring->ring_size, 0, UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, 0, 0xfffff, RADEON_CP_PACKET2); if (!r) -- cgit v1.2.3 From 2e1e6dad6a6d437e4c40611fdcc4e6cd9e2f969e Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 13 Aug 2013 11:56:52 +0200 Subject: drm/radeon: remove special handling for the DMA ring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that we have callbacks for [rw]ptr handling we can remove the special handling for the DMA rings and use the callbacks instead. Signed-off-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/cik.c | 20 ++++++--------- drivers/gpu/drm/radeon/evergreen.c | 6 ++--- drivers/gpu/drm/radeon/ni.c | 8 +++--- drivers/gpu/drm/radeon/r100.c | 2 +- drivers/gpu/drm/radeon/r600.c | 47 ++++++++++++++++++++++++++++++++++-- drivers/gpu/drm/radeon/radeon.h | 5 +--- drivers/gpu/drm/radeon/radeon_asic.c | 30 +++++++++++------------ drivers/gpu/drm/radeon/radeon_asic.h | 7 ++++++ drivers/gpu/drm/radeon/radeon_ring.c | 11 ++------- drivers/gpu/drm/radeon/rv770.c | 6 ++--- drivers/gpu/drm/radeon/si.c | 12 ++++----- 11 files changed, 95 insertions(+), 59 deletions(-) (limited to 'drivers/gpu/drm/radeon/cik.c') diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index ce7036ae9f5a..34be795de173 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -3414,7 +3414,6 @@ u32 cik_compute_ring_get_rptr(struct radeon_device *rdev, cik_srbm_select(rdev, 0, 0, 0, 0); mutex_unlock(&rdev->srbm_mutex); } - rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift; return rptr; } @@ -3433,7 +3432,6 @@ u32 cik_compute_ring_get_wptr(struct radeon_device *rdev, cik_srbm_select(rdev, 0, 0, 0, 0); mutex_unlock(&rdev->srbm_mutex); } - wptr = (wptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift; return wptr; } @@ -3441,10 +3439,8 @@ u32 cik_compute_ring_get_wptr(struct radeon_device *rdev, void cik_compute_ring_set_wptr(struct radeon_device *rdev, struct radeon_ring *ring) { - u32 wptr = (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask; - - rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(wptr); - WDOORBELL32(ring->doorbell_offset, wptr); + rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(ring->wptr); + WDOORBELL32(ring->doorbell_offset, ring->wptr); } /** @@ -7649,7 +7645,7 @@ static int cik_startup(struct radeon_device *rdev) ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, CP_RB0_RPTR, CP_RB0_WPTR, - 0, 0xfffff, RADEON_CP_PACKET2); + RADEON_CP_PACKET2); if (r) return r; @@ -7658,7 +7654,7 @@ static int cik_startup(struct radeon_device *rdev) ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET, CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR, - 0, 0xfffff, PACKET3(PACKET3_NOP, 0x3FFF)); + PACKET3(PACKET3_NOP, 0x3FFF)); if (r) return r; ring->me = 1; /* first MEC */ @@ -7670,7 +7666,7 @@ static int cik_startup(struct radeon_device *rdev) ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET, CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR, - 0, 0xffffffff, PACKET3(PACKET3_NOP, 0x3FFF)); + PACKET3(PACKET3_NOP, 0x3FFF)); if (r) return r; /* dGPU only have 1 MEC */ @@ -7683,7 +7679,7 @@ static int cik_startup(struct radeon_device *rdev) r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET, SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET, SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET, - 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0)); + SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0)); if (r) return r; @@ -7691,7 +7687,7 @@ static int cik_startup(struct radeon_device *rdev) r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET, SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET, SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET, - 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0)); + SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0)); if (r) return r; @@ -7707,7 +7703,7 @@ static int cik_startup(struct radeon_device *rdev) if (ring->ring_size) { r = radeon_ring_init(rdev, ring, ring->ring_size, 0, UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, - 0, 0xfffff, RADEON_CP_PACKET2); + RADEON_CP_PACKET2); if (!r) r = r600_uvd_init(rdev, true); if (r) diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 2139f6c64341..389f5a981358 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -5268,14 +5268,14 @@ static int evergreen_startup(struct radeon_device *rdev) ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, R600_CP_RB_RPTR, R600_CP_RB_WPTR, - 0, 0xfffff, RADEON_CP_PACKET2); + RADEON_CP_PACKET2); if (r) return r; ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET, DMA_RB_RPTR, DMA_RB_WPTR, - 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0)); + DMA_PACKET(DMA_PACKET_NOP, 0, 0)); if (r) return r; @@ -5293,7 +5293,7 @@ static int evergreen_startup(struct radeon_device *rdev) if (ring->ring_size) { r = radeon_ring_init(rdev, ring, ring->ring_size, 0, UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, - 0, 0xfffff, RADEON_CP_PACKET2); + RADEON_CP_PACKET2); if (!r) r = r600_uvd_init(rdev, true); diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index f543f4ca4dda..e04b17338336 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -2192,7 +2192,7 @@ static int cayman_startup(struct radeon_device *rdev) r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, CP_RB0_RPTR, CP_RB0_WPTR, - 0, 0xfffff, RADEON_CP_PACKET2); + RADEON_CP_PACKET2); if (r) return r; @@ -2200,7 +2200,7 @@ static int cayman_startup(struct radeon_device *rdev) r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET, DMA_RB_RPTR + DMA0_REGISTER_OFFSET, DMA_RB_WPTR + DMA0_REGISTER_OFFSET, - 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); + DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); if (r) return r; @@ -2208,7 +2208,7 @@ static int cayman_startup(struct radeon_device *rdev) r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET, DMA_RB_RPTR + DMA1_REGISTER_OFFSET, DMA_RB_WPTR + DMA1_REGISTER_OFFSET, - 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); + DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); if (r) return r; @@ -2227,7 +2227,7 @@ static int cayman_startup(struct radeon_device *rdev) if (ring->ring_size) { r = radeon_ring_init(rdev, ring, ring->ring_size, 0, UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, - 0, 0xfffff, RADEON_CP_PACKET2); + RADEON_CP_PACKET2); if (!r) r = r600_uvd_init(rdev, true); if (r) diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 75349cdaa84b..2cbc512645d4 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -1102,7 +1102,7 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size) r100_cp_load_microcode(rdev); r = radeon_ring_init(rdev, ring, ring_size, RADEON_WB_CP_RPTR_OFFSET, RADEON_CP_RB_RPTR, RADEON_CP_RB_WPTR, - 0, 0x7fffff, RADEON_CP_PACKET2); + RADEON_CP_PACKET2); if (r) { return r; } diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index c1b0aba4431a..30849eca6e07 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -2504,6 +2504,49 @@ void r600_cp_fini(struct radeon_device *rdev) * solid fills, and a number of other things. It also * has support for tiling/detiling of buffers. */ + +/** + * r600_dma_get_rptr - get the current read pointer + * + * @rdev: radeon_device pointer + * @ring: radeon ring pointer + * + * Get the current rptr from the hardware (r6xx+). + */ +uint32_t r600_dma_get_rptr(struct radeon_device *rdev, + struct radeon_ring *ring) +{ + return (radeon_ring_generic_get_rptr(rdev, ring) & 0x3fffc) >> 2; +} + +/** + * r600_dma_get_wptr - get the current write pointer + * + * @rdev: radeon_device pointer + * @ring: radeon ring pointer + * + * Get the current wptr from the hardware (r6xx+). + */ +uint32_t r600_dma_get_wptr(struct radeon_device *rdev, + struct radeon_ring *ring) +{ + return (RREG32(ring->wptr_reg) & 0x3fffc) >> 2; +} + +/** + * r600_dma_set_wptr - commit the write pointer + * + * @rdev: radeon_device pointer + * @ring: radeon ring pointer + * + * Write the wptr back to the hardware (r6xx+). + */ +void r600_dma_set_wptr(struct radeon_device *rdev, + struct radeon_ring *ring) +{ + WREG32(ring->wptr_reg, (ring->wptr << 2) & 0x3fffc); +} + /** * r600_dma_stop - stop the async dma engine * @@ -3386,14 +3429,14 @@ static int r600_startup(struct radeon_device *rdev) ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, R600_CP_RB_RPTR, R600_CP_RB_WPTR, - 0, 0xfffff, RADEON_CP_PACKET2); + RADEON_CP_PACKET2); if (r) return r; ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET, DMA_RB_RPTR, DMA_RB_WPTR, - 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); + DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); if (r) return r; diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 2eab174bf22e..791cc8de6395 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -760,8 +760,6 @@ struct radeon_ring { uint32_t align_mask; uint32_t ptr_mask; bool ready; - u32 ptr_reg_shift; - u32 ptr_reg_mask; u32 nop; u32 idx; u64 last_semaphore_signal_addr; @@ -912,8 +910,7 @@ unsigned radeon_ring_backup(struct radeon_device *rdev, struct radeon_ring *ring int radeon_ring_restore(struct radeon_device *rdev, struct radeon_ring *ring, unsigned size, uint32_t *data); int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *cp, unsigned ring_size, - unsigned rptr_offs, unsigned rptr_reg, unsigned wptr_reg, - u32 ptr_reg_shift, u32 ptr_reg_mask, u32 nop); + unsigned rptr_offs, unsigned rptr_reg, unsigned wptr_reg, u32 nop); void radeon_ring_fini(struct radeon_device *rdev, struct radeon_ring *cp); diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 7432247a812a..785b7a7add77 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -896,9 +896,9 @@ static struct radeon_asic_ring r600_dma_ring = { .ring_test = &r600_dma_ring_test, .ib_test = &r600_dma_ib_test, .is_lockup = &r600_dma_is_lockup, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, + .get_rptr = &r600_dma_get_rptr, + .get_wptr = &r600_dma_get_wptr, + .set_wptr = &r600_dma_set_wptr, }; static struct radeon_asic r600_asic = { @@ -1275,9 +1275,9 @@ static struct radeon_asic_ring evergreen_dma_ring = { .ring_test = &r600_dma_ring_test, .ib_test = &r600_dma_ib_test, .is_lockup = &evergreen_dma_is_lockup, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, + .get_rptr = &r600_dma_get_rptr, + .get_wptr = &r600_dma_get_wptr, + .set_wptr = &r600_dma_set_wptr, }; static struct radeon_asic evergreen_asic = { @@ -1580,9 +1580,9 @@ static struct radeon_asic_ring cayman_dma_ring = { .ib_test = &r600_dma_ib_test, .is_lockup = &cayman_dma_is_lockup, .vm_flush = &cayman_dma_vm_flush, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr + .get_rptr = &r600_dma_get_rptr, + .get_wptr = &r600_dma_get_wptr, + .set_wptr = &r600_dma_set_wptr }; static struct radeon_asic_ring cayman_uvd_ring = { @@ -1822,9 +1822,9 @@ static struct radeon_asic_ring si_dma_ring = { .ib_test = &r600_dma_ib_test, .is_lockup = &si_dma_is_lockup, .vm_flush = &si_dma_vm_flush, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, + .get_rptr = &r600_dma_get_rptr, + .get_wptr = &r600_dma_get_wptr, + .set_wptr = &r600_dma_set_wptr, }; static struct radeon_asic si_asic = { @@ -1966,9 +1966,9 @@ static struct radeon_asic_ring ci_dma_ring = { .ib_test = &cik_sdma_ib_test, .is_lockup = &cik_sdma_is_lockup, .vm_flush = &cik_dma_vm_flush, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, + .get_rptr = &r600_dma_get_rptr, + .get_wptr = &r600_dma_get_wptr, + .set_wptr = &r600_dma_set_wptr, }; static struct radeon_asic ci_asic = { diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 37baf9c696f0..5c53eb78b22d 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -392,6 +392,13 @@ uint64_t r600_get_gpu_clock_counter(struct radeon_device *rdev); int rv6xx_get_temp(struct radeon_device *rdev); int r600_dpm_pre_set_power_state(struct radeon_device *rdev); void r600_dpm_post_set_power_state(struct radeon_device *rdev); +/* r600 dma */ +uint32_t r600_dma_get_rptr(struct radeon_device *rdev, + struct radeon_ring *ring); +uint32_t r600_dma_get_wptr(struct radeon_device *rdev, + struct radeon_ring *ring); +void r600_dma_set_wptr(struct radeon_device *rdev, + struct radeon_ring *ring); /* rv6xx dpm */ int rv6xx_dpm_init(struct radeon_device *rdev); int rv6xx_dpm_enable(struct radeon_device *rdev); diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index cb4b931d8d9f..46a25f037b84 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -367,7 +367,6 @@ u32 radeon_ring_generic_get_rptr(struct radeon_device *rdev, rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]); else rptr = RREG32(ring->rptr_reg); - rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift; return rptr; } @@ -378,7 +377,6 @@ u32 radeon_ring_generic_get_wptr(struct radeon_device *rdev, u32 wptr; wptr = RREG32(ring->wptr_reg); - wptr = (wptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift; return wptr; } @@ -386,7 +384,7 @@ u32 radeon_ring_generic_get_wptr(struct radeon_device *rdev, void radeon_ring_generic_set_wptr(struct radeon_device *rdev, struct radeon_ring *ring) { - WREG32(ring->wptr_reg, (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask); + WREG32(ring->wptr_reg, ring->wptr); (void)RREG32(ring->wptr_reg); } @@ -719,16 +717,13 @@ int radeon_ring_restore(struct radeon_device *rdev, struct radeon_ring *ring, * @rptr_offs: offset of the rptr writeback location in the WB buffer * @rptr_reg: MMIO offset of the rptr register * @wptr_reg: MMIO offset of the wptr register - * @ptr_reg_shift: bit offset of the rptr/wptr values - * @ptr_reg_mask: bit mask of the rptr/wptr values * @nop: nop packet for this ring * * Initialize the driver information for the selected ring (all asics). * Returns 0 on success, error on failure. */ int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsigned ring_size, - unsigned rptr_offs, unsigned rptr_reg, unsigned wptr_reg, - u32 ptr_reg_shift, u32 ptr_reg_mask, u32 nop) + unsigned rptr_offs, unsigned rptr_reg, unsigned wptr_reg, u32 nop) { int r; @@ -736,8 +731,6 @@ int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsig ring->rptr_offs = rptr_offs; ring->rptr_reg = rptr_reg; ring->wptr_reg = wptr_reg; - ring->ptr_reg_shift = ptr_reg_shift; - ring->ptr_reg_mask = ptr_reg_mask; ring->nop = nop; /* Allocate ring buffer */ if (ring->ring_obj == NULL) { diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index 1e8cf49d5871..fd9dcb2d182b 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -1899,14 +1899,14 @@ static int rv770_startup(struct radeon_device *rdev) ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, R600_CP_RB_RPTR, R600_CP_RB_WPTR, - 0, 0xfffff, RADEON_CP_PACKET2); + RADEON_CP_PACKET2); if (r) return r; ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET, DMA_RB_RPTR, DMA_RB_WPTR, - 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); + DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); if (r) return r; @@ -1925,7 +1925,7 @@ static int rv770_startup(struct radeon_device *rdev) if (ring->ring_size) { r = radeon_ring_init(rdev, ring, ring->ring_size, 0, UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, - 0, 0xfffff, RADEON_CP_PACKET2); + RADEON_CP_PACKET2); if (!r) r = r600_uvd_init(rdev, true); diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 4ff59c8f508f..ae232be62921 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -6368,21 +6368,21 @@ static int si_startup(struct radeon_device *rdev) ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, CP_RB0_RPTR, CP_RB0_WPTR, - 0, 0xfffff, RADEON_CP_PACKET2); + RADEON_CP_PACKET2); if (r) return r; ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET, CP_RB1_RPTR, CP_RB1_WPTR, - 0, 0xfffff, RADEON_CP_PACKET2); + RADEON_CP_PACKET2); if (r) return r; ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET, CP_RB2_RPTR, CP_RB2_WPTR, - 0, 0xfffff, RADEON_CP_PACKET2); + RADEON_CP_PACKET2); if (r) return r; @@ -6390,7 +6390,7 @@ static int si_startup(struct radeon_device *rdev) r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET, DMA_RB_RPTR + DMA0_REGISTER_OFFSET, DMA_RB_WPTR + DMA0_REGISTER_OFFSET, - 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0)); + DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0)); if (r) return r; @@ -6398,7 +6398,7 @@ static int si_startup(struct radeon_device *rdev) r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET, DMA_RB_RPTR + DMA1_REGISTER_OFFSET, DMA_RB_WPTR + DMA1_REGISTER_OFFSET, - 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0)); + DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0)); if (r) return r; @@ -6418,7 +6418,7 @@ static int si_startup(struct radeon_device *rdev) if (ring->ring_size) { r = radeon_ring_init(rdev, ring, ring->ring_size, 0, UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, - 0, 0xfffff, RADEON_CP_PACKET2); + RADEON_CP_PACKET2); if (!r) r = r600_uvd_init(rdev, true); if (r) -- cgit v1.2.3 From e409b128625732926c112cc9b709fb7bb1aa387f Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 13 Aug 2013 11:56:53 +0200 Subject: drm/radeon: separate UVD code v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Our different hardware blocks are actually completely separated, so it doesn't make much sense any more to structure the code by pure chipset generations. Start restructuring the code by separating our the UVD block. v2: updated commit message v3: rebased and restructurized start/stop functions for kv dpm. Signed-off-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/Makefile | 8 + drivers/gpu/drm/radeon/cik.c | 41 +--- drivers/gpu/drm/radeon/evergreen.c | 8 +- drivers/gpu/drm/radeon/kv_dpm.c | 11 +- drivers/gpu/drm/radeon/ni.c | 25 +- drivers/gpu/drm/radeon/r600.c | 345 ---------------------------- drivers/gpu/drm/radeon/radeon_asic.c | 32 +-- drivers/gpu/drm/radeon/radeon_asic.h | 55 +++-- drivers/gpu/drm/radeon/rv770.c | 105 +-------- drivers/gpu/drm/radeon/rv770d.h | 16 ++ drivers/gpu/drm/radeon/si.c | 8 +- drivers/gpu/drm/radeon/uvd_v1_0.c | 434 +++++++++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/uvd_v2_2.c | 165 +++++++++++++ drivers/gpu/drm/radeon/uvd_v3_1.c | 55 +++++ drivers/gpu/drm/radeon/uvd_v4_2.c | 73 ++++++ 15 files changed, 826 insertions(+), 555 deletions(-) create mode 100644 drivers/gpu/drm/radeon/uvd_v1_0.c create mode 100644 drivers/gpu/drm/radeon/uvd_v2_2.c create mode 100644 drivers/gpu/drm/radeon/uvd_v3_1.c create mode 100644 drivers/gpu/drm/radeon/uvd_v4_2.c (limited to 'drivers/gpu/drm/radeon/cik.c') diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile index ea913cc681b4..1e23b18d549a 100644 --- a/drivers/gpu/drm/radeon/Makefile +++ b/drivers/gpu/drm/radeon/Makefile @@ -82,6 +82,14 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \ trinity_smc.o ni_dpm.o si_smc.o si_dpm.o kv_smc.o kv_dpm.o ci_smc.o \ ci_dpm.o +# add UVD block +radeon-y += \ + radeon_uvd.o \ + uvd_v1_0.o \ + uvd_v2_2.o \ + uvd_v3_1.o \ + uvd_v4_2.o + radeon-$(CONFIG_COMPAT) += radeon_ioc32.o radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o radeon-$(CONFIG_ACPI) += radeon_acpi.o diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 34be795de173..1400b5203db1 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -69,7 +69,6 @@ static void cik_pcie_gen3_enable(struct radeon_device *rdev); static void cik_program_aspm(struct radeon_device *rdev); static void cik_init_pg(struct radeon_device *rdev); static void cik_init_cg(struct radeon_device *rdev); -void cik_uvd_resume(struct radeon_device *rdev); /* get temperature in millidegrees */ int ci_get_temp(struct radeon_device *rdev) @@ -7616,9 +7615,8 @@ static int cik_startup(struct radeon_device *rdev) return r; } - r = radeon_uvd_resume(rdev); + r = uvd_v4_2_resume(rdev); if (!r) { - cik_uvd_resume(rdev); r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX); if (r) @@ -7705,7 +7703,7 @@ static int cik_startup(struct radeon_device *rdev) UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, RADEON_CP_PACKET2); if (!r) - r = r600_uvd_init(rdev, true); + r = uvd_v1_0_init(rdev); if (r) DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); } @@ -7770,7 +7768,7 @@ int cik_suspend(struct radeon_device *rdev) radeon_vm_manager_fini(rdev); cik_cp_enable(rdev, false); cik_sdma_enable(rdev, false); - r600_uvd_stop(rdev); + uvd_v1_0_fini(rdev); radeon_uvd_suspend(rdev); cik_irq_suspend(rdev); radeon_wb_disable(rdev); @@ -7934,7 +7932,7 @@ void cik_fini(struct radeon_device *rdev) radeon_vm_manager_fini(rdev); radeon_ib_pool_fini(rdev); radeon_irq_kms_fini(rdev); - r600_uvd_stop(rdev); + uvd_v1_0_fini(rdev); radeon_uvd_fini(rdev); cik_pcie_gart_fini(rdev); r600_vram_scratch_fini(rdev); @@ -8595,37 +8593,6 @@ int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk) return r; } -void cik_uvd_resume(struct radeon_device *rdev) -{ - uint64_t addr; - uint32_t size; - - /* programm the VCPU memory controller bits 0-27 */ - addr = rdev->uvd.gpu_addr >> 3; - size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3; - WREG32(UVD_VCPU_CACHE_OFFSET0, addr); - WREG32(UVD_VCPU_CACHE_SIZE0, size); - - addr += size; - size = RADEON_UVD_STACK_SIZE >> 3; - WREG32(UVD_VCPU_CACHE_OFFSET1, addr); - WREG32(UVD_VCPU_CACHE_SIZE1, size); - - addr += size; - size = RADEON_UVD_HEAP_SIZE >> 3; - WREG32(UVD_VCPU_CACHE_OFFSET2, addr); - WREG32(UVD_VCPU_CACHE_SIZE2, size); - - /* bits 28-31 */ - addr = (rdev->uvd.gpu_addr >> 28) & 0xF; - WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0)); - - /* bits 32-39 */ - addr = (rdev->uvd.gpu_addr >> 32) & 0xFF; - WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31)); - -} - static void cik_pcie_gen3_enable(struct radeon_device *rdev) { struct pci_dev *root = rdev->pdev->bus->self; diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 389f5a981358..52ed22333f0d 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -5239,7 +5239,7 @@ static int evergreen_startup(struct radeon_device *rdev) return r; } - r = rv770_uvd_resume(rdev); + r = uvd_v2_2_resume(rdev); if (!r) { r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX); @@ -5295,7 +5295,7 @@ static int evergreen_startup(struct radeon_device *rdev) UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, RADEON_CP_PACKET2); if (!r) - r = r600_uvd_init(rdev, true); + r = uvd_v1_0_init(rdev); if (r) DRM_ERROR("radeon: error initializing UVD (%d).\n", r); @@ -5350,7 +5350,7 @@ int evergreen_resume(struct radeon_device *rdev) int evergreen_suspend(struct radeon_device *rdev) { r600_audio_fini(rdev); - r600_uvd_stop(rdev); + uvd_v1_0_fini(rdev); radeon_uvd_suspend(rdev); r700_cp_stop(rdev); r600_dma_stop(rdev); @@ -5487,7 +5487,7 @@ void evergreen_fini(struct radeon_device *rdev) radeon_ib_pool_fini(rdev); radeon_irq_kms_fini(rdev); evergreen_pcie_gart_fini(rdev); - r600_uvd_stop(rdev); + uvd_v1_0_fini(rdev); radeon_uvd_fini(rdev); r600_vram_scratch_fini(rdev); radeon_gem_fini(rdev); diff --git a/drivers/gpu/drm/radeon/kv_dpm.c b/drivers/gpu/drm/radeon/kv_dpm.c index 3e232a4d3f4c..ef6c901690da 100644 --- a/drivers/gpu/drm/radeon/kv_dpm.c +++ b/drivers/gpu/drm/radeon/kv_dpm.c @@ -26,6 +26,7 @@ #include "cikd.h" #include "r600_dpm.h" #include "kv_dpm.h" +#include "radeon_asic.h" #include #define KV_MAX_DEEPSLEEP_DIVIDER_ID 5 @@ -59,10 +60,6 @@ extern void cik_exit_rlc_safe_mode(struct radeon_device *rdev); extern void cik_update_cg(struct radeon_device *rdev, u32 block, bool enable); -extern void cik_uvd_resume(struct radeon_device *rdev); -extern int r600_uvd_init(struct radeon_device *rdev, bool ring_test); -extern void r600_do_uvd_stop(struct radeon_device *rdev); - static const struct kv_lcac_config_values sx_local_cac_cfg_kv[] = { { 0, 4, 1 }, @@ -1473,7 +1470,7 @@ void kv_dpm_powergate_uvd(struct radeon_device *rdev, bool gate) pi->uvd_power_gated = gate; if (gate) { - r600_do_uvd_stop(rdev); + uvd_v1_0_stop(rdev); cik_update_cg(rdev, RADEON_CG_BLOCK_UVD, false); kv_update_uvd_dpm(rdev, gate); if (pi->caps_uvd_pg) @@ -1481,8 +1478,8 @@ void kv_dpm_powergate_uvd(struct radeon_device *rdev, bool gate) } else { if (pi->caps_uvd_pg) kv_notify_message_to_smu(rdev, PPSMC_MSG_UVDPowerON); - cik_uvd_resume(rdev); - r600_uvd_init(rdev, false); + uvd_v4_2_resume(rdev); + uvd_v1_0_start(rdev); cik_update_cg(rdev, RADEON_CG_BLOCK_UVD, true); kv_update_uvd_dpm(rdev, gate); } diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index e04b17338336..0205fa1594fa 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1373,23 +1373,6 @@ void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) radeon_ring_write(ring, 10); /* poll interval */ } -void cayman_uvd_semaphore_emit(struct radeon_device *rdev, - struct radeon_ring *ring, - struct radeon_semaphore *semaphore, - bool emit_wait) -{ - uint64_t addr = semaphore->gpu_addr; - - radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_LOW, 0)); - radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF); - - radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_HIGH, 0)); - radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF); - - radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0)); - radeon_ring_write(ring, 0x80 | (emit_wait ? 1 : 0)); -} - static void cayman_cp_enable(struct radeon_device *rdev, bool enable) { if (enable) @@ -2141,7 +2124,7 @@ static int cayman_startup(struct radeon_device *rdev) return r; } - r = rv770_uvd_resume(rdev); + r = uvd_v2_2_resume(rdev); if (!r) { r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX); @@ -2229,7 +2212,7 @@ static int cayman_startup(struct radeon_device *rdev) UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, RADEON_CP_PACKET2); if (!r) - r = r600_uvd_init(rdev, true); + r = uvd_v1_0_init(rdev); if (r) DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); } @@ -2283,7 +2266,7 @@ int cayman_suspend(struct radeon_device *rdev) radeon_vm_manager_fini(rdev); cayman_cp_enable(rdev, false); cayman_dma_stop(rdev); - r600_uvd_stop(rdev); + uvd_v1_0_fini(rdev); radeon_uvd_suspend(rdev); evergreen_irq_suspend(rdev); radeon_wb_disable(rdev); @@ -2414,7 +2397,7 @@ void cayman_fini(struct radeon_device *rdev) radeon_vm_manager_fini(rdev); radeon_ib_pool_fini(rdev); radeon_irq_kms_fini(rdev); - r600_uvd_stop(rdev); + uvd_v1_0_fini(rdev); radeon_uvd_fini(rdev); cayman_pcie_gart_fini(rdev); r600_vram_scratch_fini(rdev); diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 30849eca6e07..3a08ef92d33f 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -2663,231 +2663,6 @@ void r600_dma_fini(struct radeon_device *rdev) radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]); } -/* - * UVD - */ -uint32_t r600_uvd_get_rptr(struct radeon_device *rdev, - struct radeon_ring *ring) -{ - return RREG32(UVD_RBC_RB_RPTR); -} - -uint32_t r600_uvd_get_wptr(struct radeon_device *rdev, - struct radeon_ring *ring) -{ - return RREG32(UVD_RBC_RB_WPTR); -} - -void r600_uvd_set_wptr(struct radeon_device *rdev, - struct radeon_ring *ring) -{ - WREG32(UVD_RBC_RB_WPTR, ring->wptr); -} - -static int r600_uvd_rbc_start(struct radeon_device *rdev, bool ring_test) -{ - struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; - uint32_t rb_bufsz, tmp; - int r; - - /* force RBC into idle state */ - WREG32(UVD_RBC_RB_CNTL, 0x11010101); - - /* Set the write pointer delay */ - WREG32(UVD_RBC_RB_WPTR_CNTL, 0); - - /* programm the 4GB memory segment for rptr and ring buffer */ - WREG32(UVD_LMI_EXT40_ADDR, upper_32_bits(ring->gpu_addr) | - (0x7 << 16) | (0x1 << 31)); - - /* Initialize the ring buffer's read and write pointers */ - WREG32(UVD_RBC_RB_RPTR, 0x0); - - ring->wptr = ring->rptr = RREG32(UVD_RBC_RB_RPTR); - WREG32(UVD_RBC_RB_WPTR, ring->wptr); - - /* set the ring address */ - WREG32(UVD_RBC_RB_BASE, ring->gpu_addr); - - /* Set ring buffer size */ - rb_bufsz = drm_order(ring->ring_size); - rb_bufsz = (0x1 << 8) | rb_bufsz; - WREG32_P(UVD_RBC_RB_CNTL, rb_bufsz, ~0x11f1f); - - if (ring_test) { - ring->ready = true; - r = radeon_ring_test(rdev, R600_RING_TYPE_UVD_INDEX, ring); - if (r) { - ring->ready = false; - return r; - } - - r = radeon_ring_lock(rdev, ring, 10); - if (r) { - DRM_ERROR("radeon: ring failed to lock UVD ring (%d).\n", r); - return r; - } - - tmp = PACKET0(UVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL, 0); - radeon_ring_write(ring, tmp); - radeon_ring_write(ring, 0xFFFFF); - - tmp = PACKET0(UVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL, 0); - radeon_ring_write(ring, tmp); - radeon_ring_write(ring, 0xFFFFF); - - tmp = PACKET0(UVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL, 0); - radeon_ring_write(ring, tmp); - radeon_ring_write(ring, 0xFFFFF); - - /* Clear timeout status bits */ - radeon_ring_write(ring, PACKET0(UVD_SEMA_TIMEOUT_STATUS, 0)); - radeon_ring_write(ring, 0x8); - - radeon_ring_write(ring, PACKET0(UVD_SEMA_CNTL, 0)); - radeon_ring_write(ring, 3); - - radeon_ring_unlock_commit(rdev, ring); - } - - return 0; -} - -void r600_do_uvd_stop(struct radeon_device *rdev) -{ - /* force RBC into idle state */ - WREG32(UVD_RBC_RB_CNTL, 0x11010101); - - /* Stall UMC and register bus before resetting VCPU */ - WREG32_P(UVD_LMI_CTRL2, 1 << 8, ~(1 << 8)); - WREG32_P(UVD_RB_ARB_CTRL, 1 << 3, ~(1 << 3)); - mdelay(1); - - /* put VCPU into reset */ - WREG32(UVD_SOFT_RESET, VCPU_SOFT_RESET); - mdelay(5); - - /* disable VCPU clock */ - WREG32(UVD_VCPU_CNTL, 0x0); - - /* Unstall UMC and register bus */ - WREG32_P(UVD_LMI_CTRL2, 0, ~(1 << 8)); - WREG32_P(UVD_RB_ARB_CTRL, 0, ~(1 << 3)); -} - -void r600_uvd_stop(struct radeon_device *rdev) -{ - struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; - - r600_do_uvd_stop(rdev); - ring->ready = false; -} - -int r600_uvd_init(struct radeon_device *rdev, bool ring_test) -{ - int i, j, r; - /* disable byte swapping */ - u32 lmi_swap_cntl = 0; - u32 mp_swap_cntl = 0; - - /* raise clocks while booting up the VCPU */ - radeon_set_uvd_clocks(rdev, 53300, 40000); - - /* disable clock gating */ - WREG32(UVD_CGC_GATE, 0); - - /* disable interupt */ - WREG32_P(UVD_MASTINT_EN, 0, ~(1 << 1)); - - /* Stall UMC and register bus before resetting VCPU */ - WREG32_P(UVD_LMI_CTRL2, 1 << 8, ~(1 << 8)); - WREG32_P(UVD_RB_ARB_CTRL, 1 << 3, ~(1 << 3)); - mdelay(1); - - /* put LMI, VCPU, RBC etc... into reset */ - WREG32(UVD_SOFT_RESET, LMI_SOFT_RESET | VCPU_SOFT_RESET | - LBSI_SOFT_RESET | RBC_SOFT_RESET | CSM_SOFT_RESET | - CXW_SOFT_RESET | TAP_SOFT_RESET | LMI_UMC_SOFT_RESET); - mdelay(5); - - /* take UVD block out of reset */ - WREG32_P(SRBM_SOFT_RESET, 0, ~SOFT_RESET_UVD); - mdelay(5); - - /* initialize UVD memory controller */ - WREG32(UVD_LMI_CTRL, 0x40 | (1 << 8) | (1 << 13) | - (1 << 21) | (1 << 9) | (1 << 20)); - -#ifdef __BIG_ENDIAN - /* swap (8 in 32) RB and IB */ - lmi_swap_cntl = 0xa; - mp_swap_cntl = 0; -#endif - WREG32(UVD_LMI_SWAP_CNTL, lmi_swap_cntl); - WREG32(UVD_MP_SWAP_CNTL, mp_swap_cntl); - - WREG32(UVD_MPC_SET_MUXA0, 0x40c2040); - WREG32(UVD_MPC_SET_MUXA1, 0x0); - WREG32(UVD_MPC_SET_MUXB0, 0x40c2040); - WREG32(UVD_MPC_SET_MUXB1, 0x0); - WREG32(UVD_MPC_SET_ALU, 0); - WREG32(UVD_MPC_SET_MUX, 0x88); - - /* take all subblocks out of reset, except VCPU */ - WREG32(UVD_SOFT_RESET, VCPU_SOFT_RESET); - mdelay(5); - - /* enable VCPU clock */ - WREG32(UVD_VCPU_CNTL, 1 << 9); - - /* enable UMC */ - WREG32_P(UVD_LMI_CTRL2, 0, ~(1 << 8)); - - /* boot up the VCPU */ - WREG32(UVD_SOFT_RESET, 0); - mdelay(10); - - WREG32_P(UVD_RB_ARB_CTRL, 0, ~(1 << 3)); - - for (i = 0; i < 10; ++i) { - uint32_t status; - for (j = 0; j < 100; ++j) { - status = RREG32(UVD_STATUS); - if (status & 2) - break; - mdelay(10); - } - r = 0; - if (status & 2) - break; - - DRM_ERROR("UVD not responding, trying to reset the VCPU!!!\n"); - WREG32_P(UVD_SOFT_RESET, VCPU_SOFT_RESET, ~VCPU_SOFT_RESET); - mdelay(10); - WREG32_P(UVD_SOFT_RESET, 0, ~VCPU_SOFT_RESET); - mdelay(10); - r = -1; - } - - if (r) { - DRM_ERROR("UVD not responding, giving up!!!\n"); - goto done; - } - - /* enable interupt */ - WREG32_P(UVD_MASTINT_EN, 3<<1, ~(3 << 1)); - - r = r600_uvd_rbc_start(rdev, ring_test); - if (!r) - DRM_INFO("UVD initialized successfully.\n"); - -done: - /* lower clocks again */ - radeon_set_uvd_clocks(rdev, 0, 0); - - return r; -} - /* * GPU scratch registers helpers function. */ @@ -2997,40 +2772,6 @@ int r600_dma_ring_test(struct radeon_device *rdev, return r; } -int r600_uvd_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) -{ - uint32_t tmp = 0; - unsigned i; - int r; - - WREG32(UVD_CONTEXT_ID, 0xCAFEDEAD); - r = radeon_ring_lock(rdev, ring, 3); - if (r) { - DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", - ring->idx, r); - return r; - } - radeon_ring_write(ring, PACKET0(UVD_CONTEXT_ID, 0)); - radeon_ring_write(ring, 0xDEADBEEF); - radeon_ring_unlock_commit(rdev, ring); - for (i = 0; i < rdev->usec_timeout; i++) { - tmp = RREG32(UVD_CONTEXT_ID); - if (tmp == 0xDEADBEEF) - break; - DRM_UDELAY(1); - } - - if (i < rdev->usec_timeout) { - DRM_INFO("ring test on %d succeeded in %d usecs\n", - ring->idx, i); - } else { - DRM_ERROR("radeon: ring %d test failed (0x%08X)\n", - ring->idx, tmp); - r = -EINVAL; - } - return r; -} - /* * CP fences/semaphores */ @@ -3082,30 +2823,6 @@ void r600_fence_ring_emit(struct radeon_device *rdev, } } -void r600_uvd_fence_emit(struct radeon_device *rdev, - struct radeon_fence *fence) -{ - struct radeon_ring *ring = &rdev->ring[fence->ring]; - uint64_t addr = rdev->fence_drv[fence->ring].gpu_addr; - - radeon_ring_write(ring, PACKET0(UVD_CONTEXT_ID, 0)); - radeon_ring_write(ring, fence->seq); - radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0)); - radeon_ring_write(ring, addr & 0xffffffff); - radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0)); - radeon_ring_write(ring, upper_32_bits(addr) & 0xff); - radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0)); - radeon_ring_write(ring, 0); - - radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0)); - radeon_ring_write(ring, 0); - radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0)); - radeon_ring_write(ring, 0); - radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0)); - radeon_ring_write(ring, 2); - return; -} - void r600_semaphore_ring_emit(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_semaphore *semaphore, @@ -3175,23 +2892,6 @@ void r600_dma_semaphore_ring_emit(struct radeon_device *rdev, radeon_ring_write(ring, upper_32_bits(addr) & 0xff); } -void r600_uvd_semaphore_emit(struct radeon_device *rdev, - struct radeon_ring *ring, - struct radeon_semaphore *semaphore, - bool emit_wait) -{ - uint64_t addr = semaphore->gpu_addr; - - radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_LOW, 0)); - radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF); - - radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_HIGH, 0)); - radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF); - - radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0)); - radeon_ring_write(ring, emit_wait ? 1 : 0); -} - /** * r600_copy_cpdma - copy pages using the CP DMA engine * @@ -3656,16 +3356,6 @@ void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) radeon_ring_write(ring, ib->length_dw); } -void r600_uvd_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) -{ - struct radeon_ring *ring = &rdev->ring[ib->ring]; - - radeon_ring_write(ring, PACKET0(UVD_RBC_IB_BASE, 0)); - radeon_ring_write(ring, ib->gpu_addr); - radeon_ring_write(ring, PACKET0(UVD_RBC_IB_SIZE, 0)); - radeon_ring_write(ring, ib->length_dw); -} - int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) { struct radeon_ib ib; @@ -3783,41 +3473,6 @@ int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) return r; } -int r600_uvd_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) -{ - struct radeon_fence *fence = NULL; - int r; - - r = radeon_set_uvd_clocks(rdev, 53300, 40000); - if (r) { - DRM_ERROR("radeon: failed to raise UVD clocks (%d).\n", r); - return r; - } - - r = radeon_uvd_get_create_msg(rdev, ring->idx, 1, NULL); - if (r) { - DRM_ERROR("radeon: failed to get create msg (%d).\n", r); - goto error; - } - - r = radeon_uvd_get_destroy_msg(rdev, ring->idx, 1, &fence); - if (r) { - DRM_ERROR("radeon: failed to get destroy ib (%d).\n", r); - goto error; - } - - r = radeon_fence_wait(fence, false); - if (r) { - DRM_ERROR("radeon: fence wait failed (%d).\n", r); - goto error; - } - DRM_INFO("ib test on ring %d succeeded\n", ring->idx); -error: - radeon_fence_unref(&fence); - radeon_set_uvd_clocks(rdev, 0, 0); - return r; -} - /** * r600_dma_ring_ib_execute - Schedule an IB on the DMA engine * diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 785b7a7add77..da755bf37421 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -1150,16 +1150,16 @@ static struct radeon_asic rs780_asic = { }; static struct radeon_asic_ring rv770_uvd_ring = { - .ib_execute = &r600_uvd_ib_execute, - .emit_fence = &r600_uvd_fence_emit, - .emit_semaphore = &r600_uvd_semaphore_emit, + .ib_execute = &uvd_v1_0_ib_execute, + .emit_fence = &uvd_v2_2_fence_emit, + .emit_semaphore = &uvd_v1_0_semaphore_emit, .cs_parse = &radeon_uvd_cs_parse, - .ring_test = &r600_uvd_ring_test, - .ib_test = &r600_uvd_ib_test, + .ring_test = &uvd_v1_0_ring_test, + .ib_test = &uvd_v1_0_ib_test, .is_lockup = &radeon_ring_test_lockup, - .get_rptr = &r600_uvd_get_rptr, - .get_wptr = &r600_uvd_get_wptr, - .set_wptr = &r600_uvd_set_wptr, + .get_rptr = &uvd_v1_0_get_rptr, + .get_wptr = &uvd_v1_0_get_wptr, + .set_wptr = &uvd_v1_0_set_wptr, }; static struct radeon_asic rv770_asic = { @@ -1586,16 +1586,16 @@ static struct radeon_asic_ring cayman_dma_ring = { }; static struct radeon_asic_ring cayman_uvd_ring = { - .ib_execute = &r600_uvd_ib_execute, - .emit_fence = &r600_uvd_fence_emit, - .emit_semaphore = &cayman_uvd_semaphore_emit, + .ib_execute = &uvd_v1_0_ib_execute, + .emit_fence = &uvd_v2_2_fence_emit, + .emit_semaphore = &uvd_v3_1_semaphore_emit, .cs_parse = &radeon_uvd_cs_parse, - .ring_test = &r600_uvd_ring_test, - .ib_test = &r600_uvd_ib_test, + .ring_test = &uvd_v1_0_ring_test, + .ib_test = &uvd_v1_0_ib_test, .is_lockup = &radeon_ring_test_lockup, - .get_rptr = &r600_uvd_get_rptr, - .get_wptr = &r600_uvd_get_wptr, - .set_wptr = &r600_uvd_set_wptr, + .get_rptr = &uvd_v1_0_get_rptr, + .get_wptr = &uvd_v1_0_get_wptr, + .set_wptr = &uvd_v1_0_set_wptr, }; static struct radeon_asic cayman_asic = { diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 5c53eb78b22d..e69f00a7f153 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -336,7 +336,6 @@ int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring); void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *cp); int r600_dma_ring_test(struct radeon_device *rdev, struct radeon_ring *cp); -int r600_uvd_ring_test(struct radeon_device *rdev, struct radeon_ring *ring); int r600_copy_cpdma(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, struct radeon_fence **fence); @@ -430,24 +429,6 @@ void rs780_dpm_print_power_state(struct radeon_device *rdev, void rs780_dpm_debugfs_print_current_performance_level(struct radeon_device *rdev, struct seq_file *m); -/* uvd */ -uint32_t r600_uvd_get_rptr(struct radeon_device *rdev, - struct radeon_ring *ring); -uint32_t r600_uvd_get_wptr(struct radeon_device *rdev, - struct radeon_ring *ring); -void r600_uvd_set_wptr(struct radeon_device *rdev, - struct radeon_ring *ring); -int r600_uvd_init(struct radeon_device *rdev, bool ring_test); -void r600_uvd_stop(struct radeon_device *rdev); -int r600_uvd_ib_test(struct radeon_device *rdev, struct radeon_ring *ring); -void r600_uvd_fence_emit(struct radeon_device *rdev, - struct radeon_fence *fence); -void r600_uvd_semaphore_emit(struct radeon_device *rdev, - struct radeon_ring *ring, - struct radeon_semaphore *semaphore, - bool emit_wait); -void r600_uvd_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); - /* * rv770,rv730,rv710,rv740 */ @@ -465,7 +446,6 @@ int rv770_copy_dma(struct radeon_device *rdev, unsigned num_gpu_pages, struct radeon_fence **fence); u32 rv770_get_xclk(struct radeon_device *rdev); -int rv770_uvd_resume(struct radeon_device *rdev); int rv770_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk); int rv770_get_temp(struct radeon_device *rdev); /* rv7xx pm */ @@ -800,4 +780,39 @@ int kv_dpm_force_performance_level(struct radeon_device *rdev, enum radeon_dpm_forced_level level); void kv_dpm_powergate_uvd(struct radeon_device *rdev, bool gate); +/* uvd v1.0 */ +uint32_t uvd_v1_0_get_rptr(struct radeon_device *rdev, + struct radeon_ring *ring); +uint32_t uvd_v1_0_get_wptr(struct radeon_device *rdev, + struct radeon_ring *ring); +void uvd_v1_0_set_wptr(struct radeon_device *rdev, + struct radeon_ring *ring); + +int uvd_v1_0_init(struct radeon_device *rdev); +void uvd_v1_0_fini(struct radeon_device *rdev); +int uvd_v1_0_start(struct radeon_device *rdev); +void uvd_v1_0_stop(struct radeon_device *rdev); + +int uvd_v1_0_ring_test(struct radeon_device *rdev, struct radeon_ring *ring); +int uvd_v1_0_ib_test(struct radeon_device *rdev, struct radeon_ring *ring); +void uvd_v1_0_semaphore_emit(struct radeon_device *rdev, + struct radeon_ring *ring, + struct radeon_semaphore *semaphore, + bool emit_wait); +void uvd_v1_0_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); + +/* uvd v2.2 */ +int uvd_v2_2_resume(struct radeon_device *rdev); +void uvd_v2_2_fence_emit(struct radeon_device *rdev, + struct radeon_fence *fence); + +/* uvd v3.1 */ +void uvd_v3_1_semaphore_emit(struct radeon_device *rdev, + struct radeon_ring *ring, + struct radeon_semaphore *semaphore, + bool emit_wait); + +/* uvd v4.2 */ +int uvd_v4_2_resume(struct radeon_device *rdev); + #endif diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index fd9dcb2d182b..aaab7b1bba27 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -801,103 +801,6 @@ u32 rv770_get_xclk(struct radeon_device *rdev) return reference_clock; } -int rv770_uvd_resume(struct radeon_device *rdev) -{ - uint64_t addr; - uint32_t chip_id, size; - int r; - - r = radeon_uvd_resume(rdev); - if (r) - return r; - - /* programm the VCPU memory controller bits 0-27 */ - addr = rdev->uvd.gpu_addr >> 3; - size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3; - WREG32(UVD_VCPU_CACHE_OFFSET0, addr); - WREG32(UVD_VCPU_CACHE_SIZE0, size); - - addr += size; - size = RADEON_UVD_STACK_SIZE >> 3; - WREG32(UVD_VCPU_CACHE_OFFSET1, addr); - WREG32(UVD_VCPU_CACHE_SIZE1, size); - - addr += size; - size = RADEON_UVD_HEAP_SIZE >> 3; - WREG32(UVD_VCPU_CACHE_OFFSET2, addr); - WREG32(UVD_VCPU_CACHE_SIZE2, size); - - /* bits 28-31 */ - addr = (rdev->uvd.gpu_addr >> 28) & 0xF; - WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0)); - - /* bits 32-39 */ - addr = (rdev->uvd.gpu_addr >> 32) & 0xFF; - WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31)); - - /* tell firmware which hardware it is running on */ - switch (rdev->family) { - default: - return -EINVAL; - case CHIP_RV710: - chip_id = 0x01000005; - break; - case CHIP_RV730: - chip_id = 0x01000006; - break; - case CHIP_RV740: - chip_id = 0x01000007; - break; - case CHIP_CYPRESS: - case CHIP_HEMLOCK: - chip_id = 0x01000008; - break; - case CHIP_JUNIPER: - chip_id = 0x01000009; - break; - case CHIP_REDWOOD: - chip_id = 0x0100000a; - break; - case CHIP_CEDAR: - chip_id = 0x0100000b; - break; - case CHIP_SUMO: - case CHIP_SUMO2: - chip_id = 0x0100000c; - break; - case CHIP_PALM: - chip_id = 0x0100000e; - break; - case CHIP_CAYMAN: - chip_id = 0x0100000f; - break; - case CHIP_BARTS: - chip_id = 0x01000010; - break; - case CHIP_TURKS: - chip_id = 0x01000011; - break; - case CHIP_CAICOS: - chip_id = 0x01000012; - break; - case CHIP_TAHITI: - chip_id = 0x01000014; - break; - case CHIP_VERDE: - chip_id = 0x01000015; - break; - case CHIP_PITCAIRN: - chip_id = 0x01000016; - break; - case CHIP_ARUBA: - chip_id = 0x01000017; - break; - } - WREG32(UVD_VCPU_CHIP_ID, chip_id); - - return 0; -} - u32 rv770_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base) { struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id]; @@ -1870,7 +1773,7 @@ static int rv770_startup(struct radeon_device *rdev) return r; } - r = rv770_uvd_resume(rdev); + r = uvd_v2_2_resume(rdev); if (!r) { r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX); @@ -1927,7 +1830,7 @@ static int rv770_startup(struct radeon_device *rdev) UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, RADEON_CP_PACKET2); if (!r) - r = r600_uvd_init(rdev, true); + r = uvd_v1_0_init(rdev); if (r) DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); @@ -1977,7 +1880,7 @@ int rv770_resume(struct radeon_device *rdev) int rv770_suspend(struct radeon_device *rdev) { r600_audio_fini(rdev); - r600_uvd_stop(rdev); + uvd_v1_0_fini(rdev); radeon_uvd_suspend(rdev); r700_cp_stop(rdev); r600_dma_stop(rdev); @@ -2092,7 +1995,7 @@ void rv770_fini(struct radeon_device *rdev) radeon_ib_pool_fini(rdev); radeon_irq_kms_fini(rdev); rv770_pcie_gart_fini(rdev); - r600_uvd_stop(rdev); + uvd_v1_0_fini(rdev); radeon_uvd_fini(rdev); r600_vram_scratch_fini(rdev); radeon_gem_fini(rdev); diff --git a/drivers/gpu/drm/radeon/rv770d.h b/drivers/gpu/drm/radeon/rv770d.h index 6bef2b7d601b..9fe60e542922 100644 --- a/drivers/gpu/drm/radeon/rv770d.h +++ b/drivers/gpu/drm/radeon/rv770d.h @@ -971,7 +971,21 @@ # define TARGET_LINK_SPEED_MASK (0xf << 0) # define SELECTABLE_DEEMPHASIS (1 << 6) +/* + * PM4 + */ +#define PACKET0(reg, n) ((RADEON_PACKET_TYPE0 << 30) | \ + (((reg) >> 2) & 0xFFFF) | \ + ((n) & 0x3FFF) << 16) +#define PACKET3(op, n) ((RADEON_PACKET_TYPE3 << 30) | \ + (((op) & 0xFF) << 8) | \ + ((n) & 0x3FFF) << 16) + /* UVD */ +#define UVD_GPCOM_VCPU_CMD 0xef0c +#define UVD_GPCOM_VCPU_DATA0 0xef10 +#define UVD_GPCOM_VCPU_DATA1 0xef14 + #define UVD_LMI_EXT40_ADDR 0xf498 #define UVD_VCPU_CHIP_ID 0xf4d4 #define UVD_VCPU_CACHE_OFFSET0 0xf4d8 @@ -985,4 +999,6 @@ #define UVD_RBC_RB_RPTR 0xf690 #define UVD_RBC_RB_WPTR 0xf694 +#define UVD_CONTEXT_ID 0xf6f4 + #endif diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index ae232be62921..f3f79089405e 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -6339,7 +6339,7 @@ static int si_startup(struct radeon_device *rdev) } if (rdev->has_uvd) { - r = rv770_uvd_resume(rdev); + r = uvd_v2_2_resume(rdev); if (!r) { r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX); @@ -6420,7 +6420,7 @@ static int si_startup(struct radeon_device *rdev) UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, RADEON_CP_PACKET2); if (!r) - r = r600_uvd_init(rdev, true); + r = uvd_v1_0_init(rdev); if (r) DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); } @@ -6473,7 +6473,7 @@ int si_suspend(struct radeon_device *rdev) si_cp_enable(rdev, false); cayman_dma_stop(rdev); if (rdev->has_uvd) { - r600_uvd_stop(rdev); + uvd_v1_0_fini(rdev); radeon_uvd_suspend(rdev); } si_irq_suspend(rdev); @@ -6616,7 +6616,7 @@ void si_fini(struct radeon_device *rdev) radeon_ib_pool_fini(rdev); radeon_irq_kms_fini(rdev); if (rdev->has_uvd) { - r600_uvd_stop(rdev); + uvd_v1_0_fini(rdev); radeon_uvd_fini(rdev); } si_pcie_gart_fini(rdev); diff --git a/drivers/gpu/drm/radeon/uvd_v1_0.c b/drivers/gpu/drm/radeon/uvd_v1_0.c new file mode 100644 index 000000000000..76ca669f0c8e --- /dev/null +++ b/drivers/gpu/drm/radeon/uvd_v1_0.c @@ -0,0 +1,434 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Christian König + */ + +#include +#include "radeon.h" +#include "radeon_asic.h" +#include "r600d.h" + +/** + * uvd_v1_0_get_rptr - get read pointer + * + * @rdev: radeon_device pointer + * @ring: radeon_ring pointer + * + * Returns the current hardware read pointer + */ +uint32_t uvd_v1_0_get_rptr(struct radeon_device *rdev, + struct radeon_ring *ring) +{ + return RREG32(UVD_RBC_RB_RPTR); +} + +/** + * uvd_v1_0_get_wptr - get write pointer + * + * @rdev: radeon_device pointer + * @ring: radeon_ring pointer + * + * Returns the current hardware write pointer + */ +uint32_t uvd_v1_0_get_wptr(struct radeon_device *rdev, + struct radeon_ring *ring) +{ + return RREG32(UVD_RBC_RB_WPTR); +} + +/** + * uvd_v1_0_set_wptr - set write pointer + * + * @rdev: radeon_device pointer + * @ring: radeon_ring pointer + * + * Commits the write pointer to the hardware + */ +void uvd_v1_0_set_wptr(struct radeon_device *rdev, + struct radeon_ring *ring) +{ + WREG32(UVD_RBC_RB_WPTR, ring->wptr); +} + +/** + * uvd_v1_0_init - start and test UVD block + * + * @rdev: radeon_device pointer + * + * Initialize the hardware, boot up the VCPU and do some testing + */ +int uvd_v1_0_init(struct radeon_device *rdev) +{ + struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; + uint32_t tmp; + int r; + + /* raise clocks while booting up the VCPU */ + radeon_set_uvd_clocks(rdev, 53300, 40000); + + uvd_v1_0_start(rdev); + + ring->ready = true; + r = radeon_ring_test(rdev, R600_RING_TYPE_UVD_INDEX, ring); + if (r) { + ring->ready = false; + goto done; + } + + r = radeon_ring_lock(rdev, ring, 10); + if (r) { + DRM_ERROR("radeon: ring failed to lock UVD ring (%d).\n", r); + goto done; + } + + tmp = PACKET0(UVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL, 0); + radeon_ring_write(ring, tmp); + radeon_ring_write(ring, 0xFFFFF); + + tmp = PACKET0(UVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL, 0); + radeon_ring_write(ring, tmp); + radeon_ring_write(ring, 0xFFFFF); + + tmp = PACKET0(UVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL, 0); + radeon_ring_write(ring, tmp); + radeon_ring_write(ring, 0xFFFFF); + + /* Clear timeout status bits */ + radeon_ring_write(ring, PACKET0(UVD_SEMA_TIMEOUT_STATUS, 0)); + radeon_ring_write(ring, 0x8); + + radeon_ring_write(ring, PACKET0(UVD_SEMA_CNTL, 0)); + radeon_ring_write(ring, 3); + + radeon_ring_unlock_commit(rdev, ring); + +done: + /* lower clocks again */ + radeon_set_uvd_clocks(rdev, 0, 0); + + if (!r) + DRM_INFO("UVD initialized successfully.\n"); + + return r; +} + +/** + * uvd_v1_0_fini - stop the hardware block + * + * @rdev: radeon_device pointer + * + * Stop the UVD block, mark ring as not ready any more + */ +void uvd_v1_0_fini(struct radeon_device *rdev) +{ + struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; + + uvd_v1_0_stop(rdev); + ring->ready = false; +} + +/** + * uvd_v1_0_start - start UVD block + * + * @rdev: radeon_device pointer + * + * Setup and start the UVD block + */ +int uvd_v1_0_start(struct radeon_device *rdev) +{ + struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; + uint32_t rb_bufsz; + int i, j, r; + + /* disable byte swapping */ + u32 lmi_swap_cntl = 0; + u32 mp_swap_cntl = 0; + + /* disable clock gating */ + WREG32(UVD_CGC_GATE, 0); + + /* disable interupt */ + WREG32_P(UVD_MASTINT_EN, 0, ~(1 << 1)); + + /* Stall UMC and register bus before resetting VCPU */ + WREG32_P(UVD_LMI_CTRL2, 1 << 8, ~(1 << 8)); + WREG32_P(UVD_RB_ARB_CTRL, 1 << 3, ~(1 << 3)); + mdelay(1); + + /* put LMI, VCPU, RBC etc... into reset */ + WREG32(UVD_SOFT_RESET, LMI_SOFT_RESET | VCPU_SOFT_RESET | + LBSI_SOFT_RESET | RBC_SOFT_RESET | CSM_SOFT_RESET | + CXW_SOFT_RESET | TAP_SOFT_RESET | LMI_UMC_SOFT_RESET); + mdelay(5); + + /* take UVD block out of reset */ + WREG32_P(SRBM_SOFT_RESET, 0, ~SOFT_RESET_UVD); + mdelay(5); + + /* initialize UVD memory controller */ + WREG32(UVD_LMI_CTRL, 0x40 | (1 << 8) | (1 << 13) | + (1 << 21) | (1 << 9) | (1 << 20)); + +#ifdef __BIG_ENDIAN + /* swap (8 in 32) RB and IB */ + lmi_swap_cntl = 0xa; + mp_swap_cntl = 0; +#endif + WREG32(UVD_LMI_SWAP_CNTL, lmi_swap_cntl); + WREG32(UVD_MP_SWAP_CNTL, mp_swap_cntl); + + WREG32(UVD_MPC_SET_MUXA0, 0x40c2040); + WREG32(UVD_MPC_SET_MUXA1, 0x0); + WREG32(UVD_MPC_SET_MUXB0, 0x40c2040); + WREG32(UVD_MPC_SET_MUXB1, 0x0); + WREG32(UVD_MPC_SET_ALU, 0); + WREG32(UVD_MPC_SET_MUX, 0x88); + + /* take all subblocks out of reset, except VCPU */ + WREG32(UVD_SOFT_RESET, VCPU_SOFT_RESET); + mdelay(5); + + /* enable VCPU clock */ + WREG32(UVD_VCPU_CNTL, 1 << 9); + + /* enable UMC */ + WREG32_P(UVD_LMI_CTRL2, 0, ~(1 << 8)); + + /* boot up the VCPU */ + WREG32(UVD_SOFT_RESET, 0); + mdelay(10); + + WREG32_P(UVD_RB_ARB_CTRL, 0, ~(1 << 3)); + + for (i = 0; i < 10; ++i) { + uint32_t status; + for (j = 0; j < 100; ++j) { + status = RREG32(UVD_STATUS); + if (status & 2) + break; + mdelay(10); + } + r = 0; + if (status & 2) + break; + + DRM_ERROR("UVD not responding, trying to reset the VCPU!!!\n"); + WREG32_P(UVD_SOFT_RESET, VCPU_SOFT_RESET, ~VCPU_SOFT_RESET); + mdelay(10); + WREG32_P(UVD_SOFT_RESET, 0, ~VCPU_SOFT_RESET); + mdelay(10); + r = -1; + } + + if (r) { + DRM_ERROR("UVD not responding, giving up!!!\n"); + return r; + } + + /* enable interupt */ + WREG32_P(UVD_MASTINT_EN, 3<<1, ~(3 << 1)); + + /* force RBC into idle state */ + WREG32(UVD_RBC_RB_CNTL, 0x11010101); + + /* Set the write pointer delay */ + WREG32(UVD_RBC_RB_WPTR_CNTL, 0); + + /* programm the 4GB memory segment for rptr and ring buffer */ + WREG32(UVD_LMI_EXT40_ADDR, upper_32_bits(ring->gpu_addr) | + (0x7 << 16) | (0x1 << 31)); + + /* Initialize the ring buffer's read and write pointers */ + WREG32(UVD_RBC_RB_RPTR, 0x0); + + ring->wptr = ring->rptr = RREG32(UVD_RBC_RB_RPTR); + WREG32(UVD_RBC_RB_WPTR, ring->wptr); + + /* set the ring address */ + WREG32(UVD_RBC_RB_BASE, ring->gpu_addr); + + /* Set ring buffer size */ + rb_bufsz = drm_order(ring->ring_size); + rb_bufsz = (0x1 << 8) | rb_bufsz; + WREG32_P(UVD_RBC_RB_CNTL, rb_bufsz, ~0x11f1f); + + return 0; +} + +/** + * uvd_v1_0_stop - stop UVD block + * + * @rdev: radeon_device pointer + * + * stop the UVD block + */ +void uvd_v1_0_stop(struct radeon_device *rdev) +{ + /* force RBC into idle state */ + WREG32(UVD_RBC_RB_CNTL, 0x11010101); + + /* Stall UMC and register bus before resetting VCPU */ + WREG32_P(UVD_LMI_CTRL2, 1 << 8, ~(1 << 8)); + WREG32_P(UVD_RB_ARB_CTRL, 1 << 3, ~(1 << 3)); + mdelay(1); + + /* put VCPU into reset */ + WREG32(UVD_SOFT_RESET, VCPU_SOFT_RESET); + mdelay(5); + + /* disable VCPU clock */ + WREG32(UVD_VCPU_CNTL, 0x0); + + /* Unstall UMC and register bus */ + WREG32_P(UVD_LMI_CTRL2, 0, ~(1 << 8)); + WREG32_P(UVD_RB_ARB_CTRL, 0, ~(1 << 3)); +} + +/** + * uvd_v1_0_ring_test - register write test + * + * @rdev: radeon_device pointer + * @ring: radeon_ring pointer + * + * Test if we can successfully write to the context register + */ +int uvd_v1_0_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) +{ + uint32_t tmp = 0; + unsigned i; + int r; + + WREG32(UVD_CONTEXT_ID, 0xCAFEDEAD); + r = radeon_ring_lock(rdev, ring, 3); + if (r) { + DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", + ring->idx, r); + return r; + } + radeon_ring_write(ring, PACKET0(UVD_CONTEXT_ID, 0)); + radeon_ring_write(ring, 0xDEADBEEF); + radeon_ring_unlock_commit(rdev, ring); + for (i = 0; i < rdev->usec_timeout; i++) { + tmp = RREG32(UVD_CONTEXT_ID); + if (tmp == 0xDEADBEEF) + break; + DRM_UDELAY(1); + } + + if (i < rdev->usec_timeout) { + DRM_INFO("ring test on %d succeeded in %d usecs\n", + ring->idx, i); + } else { + DRM_ERROR("radeon: ring %d test failed (0x%08X)\n", + ring->idx, tmp); + r = -EINVAL; + } + return r; +} + +/** + * uvd_v1_0_semaphore_emit - emit semaphore command + * + * @rdev: radeon_device pointer + * @ring: radeon_ring pointer + * @semaphore: semaphore to emit commands for + * @emit_wait: true if we should emit a wait command + * + * Emit a semaphore command (either wait or signal) to the UVD ring. + */ +void uvd_v1_0_semaphore_emit(struct radeon_device *rdev, + struct radeon_ring *ring, + struct radeon_semaphore *semaphore, + bool emit_wait) +{ + uint64_t addr = semaphore->gpu_addr; + + radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_LOW, 0)); + radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF); + + radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_HIGH, 0)); + radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF); + + radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0)); + radeon_ring_write(ring, emit_wait ? 1 : 0); +} + +/** + * uvd_v1_0_ib_execute - execute indirect buffer + * + * @rdev: radeon_device pointer + * @ib: indirect buffer to execute + * + * Write ring commands to execute the indirect buffer + */ +void uvd_v1_0_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) +{ + struct radeon_ring *ring = &rdev->ring[ib->ring]; + + radeon_ring_write(ring, PACKET0(UVD_RBC_IB_BASE, 0)); + radeon_ring_write(ring, ib->gpu_addr); + radeon_ring_write(ring, PACKET0(UVD_RBC_IB_SIZE, 0)); + radeon_ring_write(ring, ib->length_dw); +} + +/** + * uvd_v1_0_ib_test - test ib execution + * + * @rdev: radeon_device pointer + * @ring: radeon_ring pointer + * + * Test if we can successfully execute an IB + */ +int uvd_v1_0_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) +{ + struct radeon_fence *fence = NULL; + int r; + + r = radeon_set_uvd_clocks(rdev, 53300, 40000); + if (r) { + DRM_ERROR("radeon: failed to raise UVD clocks (%d).\n", r); + return r; + } + + r = radeon_uvd_get_create_msg(rdev, ring->idx, 1, NULL); + if (r) { + DRM_ERROR("radeon: failed to get create msg (%d).\n", r); + goto error; + } + + r = radeon_uvd_get_destroy_msg(rdev, ring->idx, 1, &fence); + if (r) { + DRM_ERROR("radeon: failed to get destroy ib (%d).\n", r); + goto error; + } + + r = radeon_fence_wait(fence, false); + if (r) { + DRM_ERROR("radeon: fence wait failed (%d).\n", r); + goto error; + } + DRM_INFO("ib test on ring %d succeeded\n", ring->idx); +error: + radeon_fence_unref(&fence); + radeon_set_uvd_clocks(rdev, 0, 0); + return r; +} diff --git a/drivers/gpu/drm/radeon/uvd_v2_2.c b/drivers/gpu/drm/radeon/uvd_v2_2.c new file mode 100644 index 000000000000..b19ef4951085 --- /dev/null +++ b/drivers/gpu/drm/radeon/uvd_v2_2.c @@ -0,0 +1,165 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Christian König + */ + +#include +#include +#include "radeon.h" +#include "radeon_asic.h" +#include "rv770d.h" + +/** + * uvd_v2_2_fence_emit - emit an fence & trap command + * + * @rdev: radeon_device pointer + * @fence: fence to emit + * + * Write a fence and a trap command to the ring. + */ +void uvd_v2_2_fence_emit(struct radeon_device *rdev, + struct radeon_fence *fence) +{ + struct radeon_ring *ring = &rdev->ring[fence->ring]; + uint64_t addr = rdev->fence_drv[fence->ring].gpu_addr; + + radeon_ring_write(ring, PACKET0(UVD_CONTEXT_ID, 0)); + radeon_ring_write(ring, fence->seq); + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0)); + radeon_ring_write(ring, addr & 0xffffffff); + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0)); + radeon_ring_write(ring, upper_32_bits(addr) & 0xff); + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0)); + radeon_ring_write(ring, 0); + + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0)); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0)); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0)); + radeon_ring_write(ring, 2); + return; +} + +/** + * uvd_v2_2_resume - memory controller programming + * + * @rdev: radeon_device pointer + * + * Let the UVD memory controller know it's offsets + */ +int uvd_v2_2_resume(struct radeon_device *rdev) +{ + uint64_t addr; + uint32_t chip_id, size; + int r; + + r = radeon_uvd_resume(rdev); + if (r) + return r; + + /* programm the VCPU memory controller bits 0-27 */ + addr = rdev->uvd.gpu_addr >> 3; + size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3; + WREG32(UVD_VCPU_CACHE_OFFSET0, addr); + WREG32(UVD_VCPU_CACHE_SIZE0, size); + + addr += size; + size = RADEON_UVD_STACK_SIZE >> 3; + WREG32(UVD_VCPU_CACHE_OFFSET1, addr); + WREG32(UVD_VCPU_CACHE_SIZE1, size); + + addr += size; + size = RADEON_UVD_HEAP_SIZE >> 3; + WREG32(UVD_VCPU_CACHE_OFFSET2, addr); + WREG32(UVD_VCPU_CACHE_SIZE2, size); + + /* bits 28-31 */ + addr = (rdev->uvd.gpu_addr >> 28) & 0xF; + WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0)); + + /* bits 32-39 */ + addr = (rdev->uvd.gpu_addr >> 32) & 0xFF; + WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31)); + + /* tell firmware which hardware it is running on */ + switch (rdev->family) { + default: + return -EINVAL; + case CHIP_RV710: + chip_id = 0x01000005; + break; + case CHIP_RV730: + chip_id = 0x01000006; + break; + case CHIP_RV740: + chip_id = 0x01000007; + break; + case CHIP_CYPRESS: + case CHIP_HEMLOCK: + chip_id = 0x01000008; + break; + case CHIP_JUNIPER: + chip_id = 0x01000009; + break; + case CHIP_REDWOOD: + chip_id = 0x0100000a; + break; + case CHIP_CEDAR: + chip_id = 0x0100000b; + break; + case CHIP_SUMO: + case CHIP_SUMO2: + chip_id = 0x0100000c; + break; + case CHIP_PALM: + chip_id = 0x0100000e; + break; + case CHIP_CAYMAN: + chip_id = 0x0100000f; + break; + case CHIP_BARTS: + chip_id = 0x01000010; + break; + case CHIP_TURKS: + chip_id = 0x01000011; + break; + case CHIP_CAICOS: + chip_id = 0x01000012; + break; + case CHIP_TAHITI: + chip_id = 0x01000014; + break; + case CHIP_VERDE: + chip_id = 0x01000015; + break; + case CHIP_PITCAIRN: + chip_id = 0x01000016; + break; + case CHIP_ARUBA: + chip_id = 0x01000017; + break; + } + WREG32(UVD_VCPU_CHIP_ID, chip_id); + + return 0; +} diff --git a/drivers/gpu/drm/radeon/uvd_v3_1.c b/drivers/gpu/drm/radeon/uvd_v3_1.c new file mode 100644 index 000000000000..5b6fa1f62d4e --- /dev/null +++ b/drivers/gpu/drm/radeon/uvd_v3_1.c @@ -0,0 +1,55 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Christian König + */ + +#include +#include "radeon.h" +#include "radeon_asic.h" +#include "nid.h" + +/** + * uvd_v3_1_semaphore_emit - emit semaphore command + * + * @rdev: radeon_device pointer + * @ring: radeon_ring pointer + * @semaphore: semaphore to emit commands for + * @emit_wait: true if we should emit a wait command + * + * Emit a semaphore command (either wait or signal) to the UVD ring. + */ +void uvd_v3_1_semaphore_emit(struct radeon_device *rdev, + struct radeon_ring *ring, + struct radeon_semaphore *semaphore, + bool emit_wait) +{ + uint64_t addr = semaphore->gpu_addr; + + radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_LOW, 0)); + radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF); + + radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_HIGH, 0)); + radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF); + + radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0)); + radeon_ring_write(ring, 0x80 | (emit_wait ? 1 : 0)); +} diff --git a/drivers/gpu/drm/radeon/uvd_v4_2.c b/drivers/gpu/drm/radeon/uvd_v4_2.c new file mode 100644 index 000000000000..d7e480786098 --- /dev/null +++ b/drivers/gpu/drm/radeon/uvd_v4_2.c @@ -0,0 +1,73 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Christian König + */ + +#include +#include +#include "radeon.h" +#include "radeon_asic.h" +#include "cikd.h" + +/** + * uvd_v4_2_resume - memory controller programming + * + * @rdev: radeon_device pointer + * + * Let the UVD memory controller know it's offsets + */ +int uvd_v4_2_resume(struct radeon_device *rdev) +{ + uint64_t addr; + uint32_t size; + int r; + + r = radeon_uvd_resume(rdev); + if (r) + return r; + + /* programm the VCPU memory controller bits 0-27 */ + addr = rdev->uvd.gpu_addr >> 3; + size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3; + WREG32(UVD_VCPU_CACHE_OFFSET0, addr); + WREG32(UVD_VCPU_CACHE_SIZE0, size); + + addr += size; + size = RADEON_UVD_STACK_SIZE >> 3; + WREG32(UVD_VCPU_CACHE_OFFSET1, addr); + WREG32(UVD_VCPU_CACHE_SIZE1, size); + + addr += size; + size = RADEON_UVD_HEAP_SIZE >> 3; + WREG32(UVD_VCPU_CACHE_OFFSET2, addr); + WREG32(UVD_VCPU_CACHE_SIZE2, size); + + /* bits 28-31 */ + addr = (rdev->uvd.gpu_addr >> 28) & 0xF; + WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0)); + + /* bits 32-39 */ + addr = (rdev->uvd.gpu_addr >> 32) & 0xFF; + WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31)); + + return 0; +} -- cgit v1.2.3 From 2483b4ea982efe8a544697d3f9642932e9af4dc1 Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 13 Aug 2013 11:56:54 +0200 Subject: drm/radeon: separate DMA code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Similar to separating the UVD code, just put the DMA functions into separate files. Signed-off-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/Makefile | 9 + drivers/gpu/drm/radeon/cik.c | 736 +------------------------------ drivers/gpu/drm/radeon/cik_sdma.c | 785 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/evergreen.c | 161 +------ drivers/gpu/drm/radeon/evergreen_dma.c | 190 ++++++++ drivers/gpu/drm/radeon/ni.c | 293 +----------- drivers/gpu/drm/radeon/ni_dma.c | 338 ++++++++++++++ drivers/gpu/drm/radeon/r600.c | 473 +------------------- drivers/gpu/drm/radeon/r600_dma.c | 497 +++++++++++++++++++++ drivers/gpu/drm/radeon/rv770.c | 74 ---- drivers/gpu/drm/radeon/rv770_dma.c | 101 +++++ drivers/gpu/drm/radeon/si.c | 188 +------- drivers/gpu/drm/radeon/si_dma.c | 235 ++++++++++ 13 files changed, 2181 insertions(+), 1899 deletions(-) create mode 100644 drivers/gpu/drm/radeon/cik_sdma.c create mode 100644 drivers/gpu/drm/radeon/evergreen_dma.c create mode 100644 drivers/gpu/drm/radeon/ni_dma.c create mode 100644 drivers/gpu/drm/radeon/r600_dma.c create mode 100644 drivers/gpu/drm/radeon/rv770_dma.c create mode 100644 drivers/gpu/drm/radeon/si_dma.c (limited to 'drivers/gpu/drm/radeon/cik.c') diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile index 1e23b18d549a..da2a8e9e9308 100644 --- a/drivers/gpu/drm/radeon/Makefile +++ b/drivers/gpu/drm/radeon/Makefile @@ -82,6 +82,15 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \ trinity_smc.o ni_dpm.o si_smc.o si_dpm.o kv_smc.o kv_dpm.o ci_smc.o \ ci_dpm.o +# add async DMA block +radeon-y += \ + r600_dma.o \ + rv770_dma.o \ + evergreen_dma.o \ + ni_dma.o \ + si_dma.o \ + cik_sdma.o \ + # add UVD block radeon-y += \ radeon_uvd.o \ diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 1400b5203db1..692e31b95d34 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -64,6 +64,14 @@ extern int sumo_rlc_init(struct radeon_device *rdev); extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc); extern void si_rlc_reset(struct radeon_device *rdev); extern void si_init_uvd_internal_cg(struct radeon_device *rdev); +extern int cik_sdma_resume(struct radeon_device *rdev); +extern void cik_sdma_enable(struct radeon_device *rdev, bool enable); +extern void cik_sdma_fini(struct radeon_device *rdev); +extern void cik_sdma_vm_set_page(struct radeon_device *rdev, + struct radeon_ib *ib, + uint64_t pe, + uint64_t addr, unsigned count, + uint32_t incr, uint32_t flags); static void cik_rlc_stop(struct radeon_device *rdev); static void cik_pcie_gen3_enable(struct radeon_device *rdev); static void cik_program_aspm(struct radeon_device *rdev); @@ -3987,579 +3995,6 @@ static int cik_cp_resume(struct radeon_device *rdev) return 0; } -/* - * sDMA - System DMA - * Starting with CIK, the GPU has new asynchronous - * DMA engines. These engines are used for compute - * and gfx. There are two DMA engines (SDMA0, SDMA1) - * and each one supports 1 ring buffer used for gfx - * and 2 queues used for compute. - * - * The programming model is very similar to the CP - * (ring buffer, IBs, etc.), but sDMA has it's own - * packet format that is different from the PM4 format - * used by the CP. sDMA supports copying data, writing - * embedded data, solid fills, and a number of other - * things. It also has support for tiling/detiling of - * buffers. - */ -/** - * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine - * - * @rdev: radeon_device pointer - * @ib: IB object to schedule - * - * Schedule an IB in the DMA ring (CIK). - */ -void cik_sdma_ring_ib_execute(struct radeon_device *rdev, - struct radeon_ib *ib) -{ - struct radeon_ring *ring = &rdev->ring[ib->ring]; - u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf; - - if (rdev->wb.enabled) { - u32 next_rptr = ring->wptr + 5; - while ((next_rptr & 7) != 4) - next_rptr++; - next_rptr += 4; - radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0)); - radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); - radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff); - radeon_ring_write(ring, 1); /* number of DWs to follow */ - radeon_ring_write(ring, next_rptr); - } - - /* IB packet must end on a 8 DW boundary */ - while ((ring->wptr & 7) != 4) - radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0)); - radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits)); - radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */ - radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff); - radeon_ring_write(ring, ib->length_dw); - -} - -/** - * cik_sdma_fence_ring_emit - emit a fence on the DMA ring - * - * @rdev: radeon_device pointer - * @fence: radeon fence object - * - * Add a DMA fence packet to the ring to write - * the fence seq number and DMA trap packet to generate - * an interrupt if needed (CIK). - */ -void cik_sdma_fence_ring_emit(struct radeon_device *rdev, - struct radeon_fence *fence) -{ - struct radeon_ring *ring = &rdev->ring[fence->ring]; - u64 addr = rdev->fence_drv[fence->ring].gpu_addr; - u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) | - SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */ - u32 ref_and_mask; - - if (fence->ring == R600_RING_TYPE_DMA_INDEX) - ref_and_mask = SDMA0; - else - ref_and_mask = SDMA1; - - /* write the fence */ - radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0)); - radeon_ring_write(ring, addr & 0xffffffff); - radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff); - radeon_ring_write(ring, fence->seq); - /* generate an interrupt */ - radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0)); - /* flush HDP */ - radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits)); - radeon_ring_write(ring, GPU_HDP_FLUSH_DONE); - radeon_ring_write(ring, GPU_HDP_FLUSH_REQ); - radeon_ring_write(ring, ref_and_mask); /* REFERENCE */ - radeon_ring_write(ring, ref_and_mask); /* MASK */ - radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */ -} - -/** - * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring - * - * @rdev: radeon_device pointer - * @ring: radeon_ring structure holding ring information - * @semaphore: radeon semaphore object - * @emit_wait: wait or signal semaphore - * - * Add a DMA semaphore packet to the ring wait on or signal - * other rings (CIK). - */ -void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev, - struct radeon_ring *ring, - struct radeon_semaphore *semaphore, - bool emit_wait) -{ - u64 addr = semaphore->gpu_addr; - u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S; - - radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits)); - radeon_ring_write(ring, addr & 0xfffffff8); - radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff); -} - -/** - * cik_sdma_gfx_stop - stop the gfx async dma engines - * - * @rdev: radeon_device pointer - * - * Stop the gfx async dma ring buffers (CIK). - */ -static void cik_sdma_gfx_stop(struct radeon_device *rdev) -{ - u32 rb_cntl, reg_offset; - int i; - - radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); - - for (i = 0; i < 2; i++) { - if (i == 0) - reg_offset = SDMA0_REGISTER_OFFSET; - else - reg_offset = SDMA1_REGISTER_OFFSET; - rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset); - rb_cntl &= ~SDMA_RB_ENABLE; - WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl); - WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0); - } -} - -/** - * cik_sdma_rlc_stop - stop the compute async dma engines - * - * @rdev: radeon_device pointer - * - * Stop the compute async dma queues (CIK). - */ -static void cik_sdma_rlc_stop(struct radeon_device *rdev) -{ - /* XXX todo */ -} - -/** - * cik_sdma_enable - stop the async dma engines - * - * @rdev: radeon_device pointer - * @enable: enable/disable the DMA MEs. - * - * Halt or unhalt the async dma engines (CIK). - */ -static void cik_sdma_enable(struct radeon_device *rdev, bool enable) -{ - u32 me_cntl, reg_offset; - int i; - - for (i = 0; i < 2; i++) { - if (i == 0) - reg_offset = SDMA0_REGISTER_OFFSET; - else - reg_offset = SDMA1_REGISTER_OFFSET; - me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset); - if (enable) - me_cntl &= ~SDMA_HALT; - else - me_cntl |= SDMA_HALT; - WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl); - } -} - -/** - * cik_sdma_gfx_resume - setup and start the async dma engines - * - * @rdev: radeon_device pointer - * - * Set up the gfx DMA ring buffers and enable them (CIK). - * Returns 0 for success, error for failure. - */ -static int cik_sdma_gfx_resume(struct radeon_device *rdev) -{ - struct radeon_ring *ring; - u32 rb_cntl, ib_cntl; - u32 rb_bufsz; - u32 reg_offset, wb_offset; - int i, r; - - for (i = 0; i < 2; i++) { - if (i == 0) { - ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; - reg_offset = SDMA0_REGISTER_OFFSET; - wb_offset = R600_WB_DMA_RPTR_OFFSET; - } else { - ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; - reg_offset = SDMA1_REGISTER_OFFSET; - wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET; - } - - WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0); - WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0); - - /* Set ring buffer size in dwords */ - rb_bufsz = drm_order(ring->ring_size / 4); - rb_cntl = rb_bufsz << 1; -#ifdef __BIG_ENDIAN - rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE; -#endif - WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl); - - /* Initialize the ring buffer's read and write pointers */ - WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0); - WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0); - - /* set the wb address whether it's enabled or not */ - WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset, - upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF); - WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset, - ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC)); - - if (rdev->wb.enabled) - rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE; - - WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8); - WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40); - - ring->wptr = 0; - WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2); - - ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2; - - /* enable DMA RB */ - WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE); - - ib_cntl = SDMA_IB_ENABLE; -#ifdef __BIG_ENDIAN - ib_cntl |= SDMA_IB_SWAP_ENABLE; -#endif - /* enable DMA IBs */ - WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl); - - ring->ready = true; - - r = radeon_ring_test(rdev, ring->idx, ring); - if (r) { - ring->ready = false; - return r; - } - } - - radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); - - return 0; -} - -/** - * cik_sdma_rlc_resume - setup and start the async dma engines - * - * @rdev: radeon_device pointer - * - * Set up the compute DMA queues and enable them (CIK). - * Returns 0 for success, error for failure. - */ -static int cik_sdma_rlc_resume(struct radeon_device *rdev) -{ - /* XXX todo */ - return 0; -} - -/** - * cik_sdma_load_microcode - load the sDMA ME ucode - * - * @rdev: radeon_device pointer - * - * Loads the sDMA0/1 ucode. - * Returns 0 for success, -EINVAL if the ucode is not available. - */ -static int cik_sdma_load_microcode(struct radeon_device *rdev) -{ - const __be32 *fw_data; - int i; - - if (!rdev->sdma_fw) - return -EINVAL; - - /* stop the gfx rings and rlc compute queues */ - cik_sdma_gfx_stop(rdev); - cik_sdma_rlc_stop(rdev); - - /* halt the MEs */ - cik_sdma_enable(rdev, false); - - /* sdma0 */ - fw_data = (const __be32 *)rdev->sdma_fw->data; - WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0); - for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++) - WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++)); - WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION); - - /* sdma1 */ - fw_data = (const __be32 *)rdev->sdma_fw->data; - WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0); - for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++) - WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++)); - WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION); - - WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0); - WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0); - return 0; -} - -/** - * cik_sdma_resume - setup and start the async dma engines - * - * @rdev: radeon_device pointer - * - * Set up the DMA engines and enable them (CIK). - * Returns 0 for success, error for failure. - */ -static int cik_sdma_resume(struct radeon_device *rdev) -{ - int r; - - /* Reset dma */ - WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1); - RREG32(SRBM_SOFT_RESET); - udelay(50); - WREG32(SRBM_SOFT_RESET, 0); - RREG32(SRBM_SOFT_RESET); - - r = cik_sdma_load_microcode(rdev); - if (r) - return r; - - /* unhalt the MEs */ - cik_sdma_enable(rdev, true); - - /* start the gfx rings and rlc compute queues */ - r = cik_sdma_gfx_resume(rdev); - if (r) - return r; - r = cik_sdma_rlc_resume(rdev); - if (r) - return r; - - return 0; -} - -/** - * cik_sdma_fini - tear down the async dma engines - * - * @rdev: radeon_device pointer - * - * Stop the async dma engines and free the rings (CIK). - */ -static void cik_sdma_fini(struct radeon_device *rdev) -{ - /* stop the gfx rings and rlc compute queues */ - cik_sdma_gfx_stop(rdev); - cik_sdma_rlc_stop(rdev); - /* halt the MEs */ - cik_sdma_enable(rdev, false); - radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]); - radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]); - /* XXX - compute dma queue tear down */ -} - -/** - * cik_copy_dma - copy pages using the DMA engine - * - * @rdev: radeon_device pointer - * @src_offset: src GPU address - * @dst_offset: dst GPU address - * @num_gpu_pages: number of GPU pages to xfer - * @fence: radeon fence object - * - * Copy GPU paging using the DMA engine (CIK). - * Used by the radeon ttm implementation to move pages if - * registered as the asic copy callback. - */ -int cik_copy_dma(struct radeon_device *rdev, - uint64_t src_offset, uint64_t dst_offset, - unsigned num_gpu_pages, - struct radeon_fence **fence) -{ - struct radeon_semaphore *sem = NULL; - int ring_index = rdev->asic->copy.dma_ring_index; - struct radeon_ring *ring = &rdev->ring[ring_index]; - u32 size_in_bytes, cur_size_in_bytes; - int i, num_loops; - int r = 0; - - r = radeon_semaphore_create(rdev, &sem); - if (r) { - DRM_ERROR("radeon: moving bo (%d).\n", r); - return r; - } - - size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT); - num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff); - r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14); - if (r) { - DRM_ERROR("radeon: moving bo (%d).\n", r); - radeon_semaphore_free(rdev, &sem, NULL); - return r; - } - - if (radeon_fence_need_sync(*fence, ring->idx)) { - radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, - ring->idx); - radeon_fence_note_sync(*fence, ring->idx); - } else { - radeon_semaphore_free(rdev, &sem, NULL); - } - - for (i = 0; i < num_loops; i++) { - cur_size_in_bytes = size_in_bytes; - if (cur_size_in_bytes > 0x1fffff) - cur_size_in_bytes = 0x1fffff; - size_in_bytes -= cur_size_in_bytes; - radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0)); - radeon_ring_write(ring, cur_size_in_bytes); - radeon_ring_write(ring, 0); /* src/dst endian swap */ - radeon_ring_write(ring, src_offset & 0xffffffff); - radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff); - radeon_ring_write(ring, dst_offset & 0xfffffffc); - radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff); - src_offset += cur_size_in_bytes; - dst_offset += cur_size_in_bytes; - } - - r = radeon_fence_emit(rdev, fence, ring->idx); - if (r) { - radeon_ring_unlock_undo(rdev, ring); - return r; - } - - radeon_ring_unlock_commit(rdev, ring); - radeon_semaphore_free(rdev, &sem, *fence); - - return r; -} - -/** - * cik_sdma_ring_test - simple async dma engine test - * - * @rdev: radeon_device pointer - * @ring: radeon_ring structure holding ring information - * - * Test the DMA engine by writing using it to write an - * value to memory. (CIK). - * Returns 0 for success, error for failure. - */ -int cik_sdma_ring_test(struct radeon_device *rdev, - struct radeon_ring *ring) -{ - unsigned i; - int r; - void __iomem *ptr = (void *)rdev->vram_scratch.ptr; - u32 tmp; - - if (!ptr) { - DRM_ERROR("invalid vram scratch pointer\n"); - return -EINVAL; - } - - tmp = 0xCAFEDEAD; - writel(tmp, ptr); - - r = radeon_ring_lock(rdev, ring, 4); - if (r) { - DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r); - return r; - } - radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0)); - radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc); - radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff); - radeon_ring_write(ring, 1); /* number of DWs to follow */ - radeon_ring_write(ring, 0xDEADBEEF); - radeon_ring_unlock_commit(rdev, ring); - - for (i = 0; i < rdev->usec_timeout; i++) { - tmp = readl(ptr); - if (tmp == 0xDEADBEEF) - break; - DRM_UDELAY(1); - } - - if (i < rdev->usec_timeout) { - DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); - } else { - DRM_ERROR("radeon: ring %d test failed (0x%08X)\n", - ring->idx, tmp); - r = -EINVAL; - } - return r; -} - -/** - * cik_sdma_ib_test - test an IB on the DMA engine - * - * @rdev: radeon_device pointer - * @ring: radeon_ring structure holding ring information - * - * Test a simple IB in the DMA ring (CIK). - * Returns 0 on success, error on failure. - */ -int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) -{ - struct radeon_ib ib; - unsigned i; - int r; - void __iomem *ptr = (void *)rdev->vram_scratch.ptr; - u32 tmp = 0; - - if (!ptr) { - DRM_ERROR("invalid vram scratch pointer\n"); - return -EINVAL; - } - - tmp = 0xCAFEDEAD; - writel(tmp, ptr); - - r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256); - if (r) { - DRM_ERROR("radeon: failed to get ib (%d).\n", r); - return r; - } - - ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0); - ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc; - ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff; - ib.ptr[3] = 1; - ib.ptr[4] = 0xDEADBEEF; - ib.length_dw = 5; - - r = radeon_ib_schedule(rdev, &ib, NULL); - if (r) { - radeon_ib_free(rdev, &ib); - DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); - return r; - } - r = radeon_fence_wait(ib.fence, false); - if (r) { - DRM_ERROR("radeon: fence wait failed (%d).\n", r); - return r; - } - for (i = 0; i < rdev->usec_timeout; i++) { - tmp = readl(ptr); - if (tmp == 0xDEADBEEF) - break; - DRM_UDELAY(1); - } - if (i < rdev->usec_timeout) { - DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i); - } else { - DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp); - r = -EINVAL; - } - radeon_ib_free(rdev, &ib); - return r; -} - - static void cik_print_gpu_status_regs(struct radeon_device *rdev) { dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n", @@ -4609,7 +4044,7 @@ static void cik_print_gpu_status_regs(struct radeon_device *rdev) * mask to be used by cik_gpu_soft_reset(). * Returns a mask of the blocks to be reset. */ -static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev) +u32 cik_gpu_check_soft_reset(struct radeon_device *rdev) { u32 reset_mask = 0; u32 tmp; @@ -4860,34 +4295,6 @@ bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) return radeon_ring_test_lockup(rdev, ring); } -/** - * cik_sdma_is_lockup - Check if the DMA engine is locked up - * - * @rdev: radeon_device pointer - * @ring: radeon_ring structure holding ring information - * - * Check if the async DMA engine is locked up (CIK). - * Returns true if the engine appears to be locked up, false if not. - */ -bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) -{ - u32 reset_mask = cik_gpu_check_soft_reset(rdev); - u32 mask; - - if (ring->idx == R600_RING_TYPE_DMA_INDEX) - mask = RADEON_RESET_DMA; - else - mask = RADEON_RESET_DMA1; - - if (!(reset_mask & mask)) { - radeon_ring_lockup_update(ring); - return false; - } - /* force ring activities */ - radeon_ring_force_activity(rdev, ring); - return radeon_ring_test_lockup(rdev, ring); -} - /* MC */ /** * cik_mc_program - program the GPU memory controller @@ -5424,131 +4831,8 @@ void cik_vm_set_page(struct radeon_device *rdev, } } else { /* DMA */ - if (flags & RADEON_VM_PAGE_SYSTEM) { - while (count) { - ndw = count * 2; - if (ndw > 0xFFFFE) - ndw = 0xFFFFE; - - /* for non-physically contiguous pages (system) */ - ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0); - ib->ptr[ib->length_dw++] = pe; - ib->ptr[ib->length_dw++] = upper_32_bits(pe); - ib->ptr[ib->length_dw++] = ndw; - for (; ndw > 0; ndw -= 2, --count, pe += 8) { - if (flags & RADEON_VM_PAGE_SYSTEM) { - value = radeon_vm_map_gart(rdev, addr); - value &= 0xFFFFFFFFFFFFF000ULL; - } else if (flags & RADEON_VM_PAGE_VALID) { - value = addr; - } else { - value = 0; - } - addr += incr; - value |= r600_flags; - ib->ptr[ib->length_dw++] = value; - ib->ptr[ib->length_dw++] = upper_32_bits(value); - } - } - } else { - while (count) { - ndw = count; - if (ndw > 0x7FFFF) - ndw = 0x7FFFF; - - if (flags & RADEON_VM_PAGE_VALID) - value = addr; - else - value = 0; - /* for physically contiguous pages (vram) */ - ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0); - ib->ptr[ib->length_dw++] = pe; /* dst addr */ - ib->ptr[ib->length_dw++] = upper_32_bits(pe); - ib->ptr[ib->length_dw++] = r600_flags; /* mask */ - ib->ptr[ib->length_dw++] = 0; - ib->ptr[ib->length_dw++] = value; /* value */ - ib->ptr[ib->length_dw++] = upper_32_bits(value); - ib->ptr[ib->length_dw++] = incr; /* increment size */ - ib->ptr[ib->length_dw++] = 0; - ib->ptr[ib->length_dw++] = ndw; /* number of entries */ - pe += ndw * 8; - addr += ndw * incr; - count -= ndw; - } - } - while (ib->length_dw & 0x7) - ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0); - } -} - -/** - * cik_dma_vm_flush - cik vm flush using sDMA - * - * @rdev: radeon_device pointer - * - * Update the page table base and flush the VM TLB - * using sDMA (CIK). - */ -void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) -{ - struct radeon_ring *ring = &rdev->ring[ridx]; - u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) | - SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */ - u32 ref_and_mask; - - if (vm == NULL) - return; - - if (ridx == R600_RING_TYPE_DMA_INDEX) - ref_and_mask = SDMA0; - else - ref_and_mask = SDMA1; - - radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); - if (vm->id < 8) { - radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2); - } else { - radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2); + cik_sdma_vm_set_page(rdev, ib, pe, addr, count, incr, flags); } - radeon_ring_write(ring, vm->pd_gpu_addr >> 12); - - /* update SH_MEM_* regs */ - radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); - radeon_ring_write(ring, SRBM_GFX_CNTL >> 2); - radeon_ring_write(ring, VMID(vm->id)); - - radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); - radeon_ring_write(ring, SH_MEM_BASES >> 2); - radeon_ring_write(ring, 0); - - radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); - radeon_ring_write(ring, SH_MEM_CONFIG >> 2); - radeon_ring_write(ring, 0); - - radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); - radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2); - radeon_ring_write(ring, 1); - - radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); - radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2); - radeon_ring_write(ring, 0); - - radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); - radeon_ring_write(ring, SRBM_GFX_CNTL >> 2); - radeon_ring_write(ring, VMID(0)); - - /* flush HDP */ - radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits)); - radeon_ring_write(ring, GPU_HDP_FLUSH_DONE); - radeon_ring_write(ring, GPU_HDP_FLUSH_REQ); - radeon_ring_write(ring, ref_and_mask); /* REFERENCE */ - radeon_ring_write(ring, ref_and_mask); /* MASK */ - radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */ - - /* flush TLB */ - radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); - radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); - radeon_ring_write(ring, 1 << vm->id); } /* diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c new file mode 100644 index 000000000000..8925185a0049 --- /dev/null +++ b/drivers/gpu/drm/radeon/cik_sdma.c @@ -0,0 +1,785 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Alex Deucher + */ +#include +#include +#include "radeon.h" +#include "radeon_asic.h" +#include "cikd.h" + +/* sdma */ +#define CIK_SDMA_UCODE_SIZE 1050 +#define CIK_SDMA_UCODE_VERSION 64 + +u32 cik_gpu_check_soft_reset(struct radeon_device *rdev); + +/* + * sDMA - System DMA + * Starting with CIK, the GPU has new asynchronous + * DMA engines. These engines are used for compute + * and gfx. There are two DMA engines (SDMA0, SDMA1) + * and each one supports 1 ring buffer used for gfx + * and 2 queues used for compute. + * + * The programming model is very similar to the CP + * (ring buffer, IBs, etc.), but sDMA has it's own + * packet format that is different from the PM4 format + * used by the CP. sDMA supports copying data, writing + * embedded data, solid fills, and a number of other + * things. It also has support for tiling/detiling of + * buffers. + */ + +/** + * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine + * + * @rdev: radeon_device pointer + * @ib: IB object to schedule + * + * Schedule an IB in the DMA ring (CIK). + */ +void cik_sdma_ring_ib_execute(struct radeon_device *rdev, + struct radeon_ib *ib) +{ + struct radeon_ring *ring = &rdev->ring[ib->ring]; + u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf; + + if (rdev->wb.enabled) { + u32 next_rptr = ring->wptr + 5; + while ((next_rptr & 7) != 4) + next_rptr++; + next_rptr += 4; + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0)); + radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); + radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff); + radeon_ring_write(ring, 1); /* number of DWs to follow */ + radeon_ring_write(ring, next_rptr); + } + + /* IB packet must end on a 8 DW boundary */ + while ((ring->wptr & 7) != 4) + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0)); + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits)); + radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */ + radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff); + radeon_ring_write(ring, ib->length_dw); + +} + +/** + * cik_sdma_fence_ring_emit - emit a fence on the DMA ring + * + * @rdev: radeon_device pointer + * @fence: radeon fence object + * + * Add a DMA fence packet to the ring to write + * the fence seq number and DMA trap packet to generate + * an interrupt if needed (CIK). + */ +void cik_sdma_fence_ring_emit(struct radeon_device *rdev, + struct radeon_fence *fence) +{ + struct radeon_ring *ring = &rdev->ring[fence->ring]; + u64 addr = rdev->fence_drv[fence->ring].gpu_addr; + u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) | + SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */ + u32 ref_and_mask; + + if (fence->ring == R600_RING_TYPE_DMA_INDEX) + ref_and_mask = SDMA0; + else + ref_and_mask = SDMA1; + + /* write the fence */ + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0)); + radeon_ring_write(ring, addr & 0xffffffff); + radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff); + radeon_ring_write(ring, fence->seq); + /* generate an interrupt */ + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0)); + /* flush HDP */ + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits)); + radeon_ring_write(ring, GPU_HDP_FLUSH_DONE); + radeon_ring_write(ring, GPU_HDP_FLUSH_REQ); + radeon_ring_write(ring, ref_and_mask); /* REFERENCE */ + radeon_ring_write(ring, ref_and_mask); /* MASK */ + radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */ +} + +/** + * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring + * + * @rdev: radeon_device pointer + * @ring: radeon_ring structure holding ring information + * @semaphore: radeon semaphore object + * @emit_wait: wait or signal semaphore + * + * Add a DMA semaphore packet to the ring wait on or signal + * other rings (CIK). + */ +void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev, + struct radeon_ring *ring, + struct radeon_semaphore *semaphore, + bool emit_wait) +{ + u64 addr = semaphore->gpu_addr; + u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S; + + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits)); + radeon_ring_write(ring, addr & 0xfffffff8); + radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff); +} + +/** + * cik_sdma_gfx_stop - stop the gfx async dma engines + * + * @rdev: radeon_device pointer + * + * Stop the gfx async dma ring buffers (CIK). + */ +static void cik_sdma_gfx_stop(struct radeon_device *rdev) +{ + u32 rb_cntl, reg_offset; + int i; + + radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); + + for (i = 0; i < 2; i++) { + if (i == 0) + reg_offset = SDMA0_REGISTER_OFFSET; + else + reg_offset = SDMA1_REGISTER_OFFSET; + rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset); + rb_cntl &= ~SDMA_RB_ENABLE; + WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl); + WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0); + } +} + +/** + * cik_sdma_rlc_stop - stop the compute async dma engines + * + * @rdev: radeon_device pointer + * + * Stop the compute async dma queues (CIK). + */ +static void cik_sdma_rlc_stop(struct radeon_device *rdev) +{ + /* XXX todo */ +} + +/** + * cik_sdma_enable - stop the async dma engines + * + * @rdev: radeon_device pointer + * @enable: enable/disable the DMA MEs. + * + * Halt or unhalt the async dma engines (CIK). + */ +void cik_sdma_enable(struct radeon_device *rdev, bool enable) +{ + u32 me_cntl, reg_offset; + int i; + + for (i = 0; i < 2; i++) { + if (i == 0) + reg_offset = SDMA0_REGISTER_OFFSET; + else + reg_offset = SDMA1_REGISTER_OFFSET; + me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset); + if (enable) + me_cntl &= ~SDMA_HALT; + else + me_cntl |= SDMA_HALT; + WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl); + } +} + +/** + * cik_sdma_gfx_resume - setup and start the async dma engines + * + * @rdev: radeon_device pointer + * + * Set up the gfx DMA ring buffers and enable them (CIK). + * Returns 0 for success, error for failure. + */ +static int cik_sdma_gfx_resume(struct radeon_device *rdev) +{ + struct radeon_ring *ring; + u32 rb_cntl, ib_cntl; + u32 rb_bufsz; + u32 reg_offset, wb_offset; + int i, r; + + for (i = 0; i < 2; i++) { + if (i == 0) { + ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; + reg_offset = SDMA0_REGISTER_OFFSET; + wb_offset = R600_WB_DMA_RPTR_OFFSET; + } else { + ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; + reg_offset = SDMA1_REGISTER_OFFSET; + wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET; + } + + WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0); + WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0); + + /* Set ring buffer size in dwords */ + rb_bufsz = drm_order(ring->ring_size / 4); + rb_cntl = rb_bufsz << 1; +#ifdef __BIG_ENDIAN + rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE; +#endif + WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl); + + /* Initialize the ring buffer's read and write pointers */ + WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0); + WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0); + + /* set the wb address whether it's enabled or not */ + WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset, + upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF); + WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset, + ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC)); + + if (rdev->wb.enabled) + rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE; + + WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8); + WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40); + + ring->wptr = 0; + WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2); + + ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2; + + /* enable DMA RB */ + WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE); + + ib_cntl = SDMA_IB_ENABLE; +#ifdef __BIG_ENDIAN + ib_cntl |= SDMA_IB_SWAP_ENABLE; +#endif + /* enable DMA IBs */ + WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl); + + ring->ready = true; + + r = radeon_ring_test(rdev, ring->idx, ring); + if (r) { + ring->ready = false; + return r; + } + } + + radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); + + return 0; +} + +/** + * cik_sdma_rlc_resume - setup and start the async dma engines + * + * @rdev: radeon_device pointer + * + * Set up the compute DMA queues and enable them (CIK). + * Returns 0 for success, error for failure. + */ +static int cik_sdma_rlc_resume(struct radeon_device *rdev) +{ + /* XXX todo */ + return 0; +} + +/** + * cik_sdma_load_microcode - load the sDMA ME ucode + * + * @rdev: radeon_device pointer + * + * Loads the sDMA0/1 ucode. + * Returns 0 for success, -EINVAL if the ucode is not available. + */ +static int cik_sdma_load_microcode(struct radeon_device *rdev) +{ + const __be32 *fw_data; + int i; + + if (!rdev->sdma_fw) + return -EINVAL; + + /* stop the gfx rings and rlc compute queues */ + cik_sdma_gfx_stop(rdev); + cik_sdma_rlc_stop(rdev); + + /* halt the MEs */ + cik_sdma_enable(rdev, false); + + /* sdma0 */ + fw_data = (const __be32 *)rdev->sdma_fw->data; + WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0); + for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++) + WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++)); + WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION); + + /* sdma1 */ + fw_data = (const __be32 *)rdev->sdma_fw->data; + WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0); + for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++) + WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++)); + WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION); + + WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0); + WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0); + return 0; +} + +/** + * cik_sdma_resume - setup and start the async dma engines + * + * @rdev: radeon_device pointer + * + * Set up the DMA engines and enable them (CIK). + * Returns 0 for success, error for failure. + */ +int cik_sdma_resume(struct radeon_device *rdev) +{ + int r; + + /* Reset dma */ + WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1); + RREG32(SRBM_SOFT_RESET); + udelay(50); + WREG32(SRBM_SOFT_RESET, 0); + RREG32(SRBM_SOFT_RESET); + + r = cik_sdma_load_microcode(rdev); + if (r) + return r; + + /* unhalt the MEs */ + cik_sdma_enable(rdev, true); + + /* start the gfx rings and rlc compute queues */ + r = cik_sdma_gfx_resume(rdev); + if (r) + return r; + r = cik_sdma_rlc_resume(rdev); + if (r) + return r; + + return 0; +} + +/** + * cik_sdma_fini - tear down the async dma engines + * + * @rdev: radeon_device pointer + * + * Stop the async dma engines and free the rings (CIK). + */ +void cik_sdma_fini(struct radeon_device *rdev) +{ + /* stop the gfx rings and rlc compute queues */ + cik_sdma_gfx_stop(rdev); + cik_sdma_rlc_stop(rdev); + /* halt the MEs */ + cik_sdma_enable(rdev, false); + radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]); + radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]); + /* XXX - compute dma queue tear down */ +} + +/** + * cik_copy_dma - copy pages using the DMA engine + * + * @rdev: radeon_device pointer + * @src_offset: src GPU address + * @dst_offset: dst GPU address + * @num_gpu_pages: number of GPU pages to xfer + * @fence: radeon fence object + * + * Copy GPU paging using the DMA engine (CIK). + * Used by the radeon ttm implementation to move pages if + * registered as the asic copy callback. + */ +int cik_copy_dma(struct radeon_device *rdev, + uint64_t src_offset, uint64_t dst_offset, + unsigned num_gpu_pages, + struct radeon_fence **fence) +{ + struct radeon_semaphore *sem = NULL; + int ring_index = rdev->asic->copy.dma_ring_index; + struct radeon_ring *ring = &rdev->ring[ring_index]; + u32 size_in_bytes, cur_size_in_bytes; + int i, num_loops; + int r = 0; + + r = radeon_semaphore_create(rdev, &sem); + if (r) { + DRM_ERROR("radeon: moving bo (%d).\n", r); + return r; + } + + size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT); + num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff); + r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14); + if (r) { + DRM_ERROR("radeon: moving bo (%d).\n", r); + radeon_semaphore_free(rdev, &sem, NULL); + return r; + } + + if (radeon_fence_need_sync(*fence, ring->idx)) { + radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, + ring->idx); + radeon_fence_note_sync(*fence, ring->idx); + } else { + radeon_semaphore_free(rdev, &sem, NULL); + } + + for (i = 0; i < num_loops; i++) { + cur_size_in_bytes = size_in_bytes; + if (cur_size_in_bytes > 0x1fffff) + cur_size_in_bytes = 0x1fffff; + size_in_bytes -= cur_size_in_bytes; + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0)); + radeon_ring_write(ring, cur_size_in_bytes); + radeon_ring_write(ring, 0); /* src/dst endian swap */ + radeon_ring_write(ring, src_offset & 0xffffffff); + radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff); + radeon_ring_write(ring, dst_offset & 0xfffffffc); + radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff); + src_offset += cur_size_in_bytes; + dst_offset += cur_size_in_bytes; + } + + r = radeon_fence_emit(rdev, fence, ring->idx); + if (r) { + radeon_ring_unlock_undo(rdev, ring); + return r; + } + + radeon_ring_unlock_commit(rdev, ring); + radeon_semaphore_free(rdev, &sem, *fence); + + return r; +} + +/** + * cik_sdma_ring_test - simple async dma engine test + * + * @rdev: radeon_device pointer + * @ring: radeon_ring structure holding ring information + * + * Test the DMA engine by writing using it to write an + * value to memory. (CIK). + * Returns 0 for success, error for failure. + */ +int cik_sdma_ring_test(struct radeon_device *rdev, + struct radeon_ring *ring) +{ + unsigned i; + int r; + void __iomem *ptr = (void *)rdev->vram_scratch.ptr; + u32 tmp; + + if (!ptr) { + DRM_ERROR("invalid vram scratch pointer\n"); + return -EINVAL; + } + + tmp = 0xCAFEDEAD; + writel(tmp, ptr); + + r = radeon_ring_lock(rdev, ring, 4); + if (r) { + DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r); + return r; + } + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0)); + radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc); + radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff); + radeon_ring_write(ring, 1); /* number of DWs to follow */ + radeon_ring_write(ring, 0xDEADBEEF); + radeon_ring_unlock_commit(rdev, ring); + + for (i = 0; i < rdev->usec_timeout; i++) { + tmp = readl(ptr); + if (tmp == 0xDEADBEEF) + break; + DRM_UDELAY(1); + } + + if (i < rdev->usec_timeout) { + DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); + } else { + DRM_ERROR("radeon: ring %d test failed (0x%08X)\n", + ring->idx, tmp); + r = -EINVAL; + } + return r; +} + +/** + * cik_sdma_ib_test - test an IB on the DMA engine + * + * @rdev: radeon_device pointer + * @ring: radeon_ring structure holding ring information + * + * Test a simple IB in the DMA ring (CIK). + * Returns 0 on success, error on failure. + */ +int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) +{ + struct radeon_ib ib; + unsigned i; + int r; + void __iomem *ptr = (void *)rdev->vram_scratch.ptr; + u32 tmp = 0; + + if (!ptr) { + DRM_ERROR("invalid vram scratch pointer\n"); + return -EINVAL; + } + + tmp = 0xCAFEDEAD; + writel(tmp, ptr); + + r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256); + if (r) { + DRM_ERROR("radeon: failed to get ib (%d).\n", r); + return r; + } + + ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0); + ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc; + ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff; + ib.ptr[3] = 1; + ib.ptr[4] = 0xDEADBEEF; + ib.length_dw = 5; + + r = radeon_ib_schedule(rdev, &ib, NULL); + if (r) { + radeon_ib_free(rdev, &ib); + DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); + return r; + } + r = radeon_fence_wait(ib.fence, false); + if (r) { + DRM_ERROR("radeon: fence wait failed (%d).\n", r); + return r; + } + for (i = 0; i < rdev->usec_timeout; i++) { + tmp = readl(ptr); + if (tmp == 0xDEADBEEF) + break; + DRM_UDELAY(1); + } + if (i < rdev->usec_timeout) { + DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i); + } else { + DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp); + r = -EINVAL; + } + radeon_ib_free(rdev, &ib); + return r; +} + +/** + * cik_sdma_is_lockup - Check if the DMA engine is locked up + * + * @rdev: radeon_device pointer + * @ring: radeon_ring structure holding ring information + * + * Check if the async DMA engine is locked up (CIK). + * Returns true if the engine appears to be locked up, false if not. + */ +bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) +{ + u32 reset_mask = cik_gpu_check_soft_reset(rdev); + u32 mask; + + if (ring->idx == R600_RING_TYPE_DMA_INDEX) + mask = RADEON_RESET_DMA; + else + mask = RADEON_RESET_DMA1; + + if (!(reset_mask & mask)) { + radeon_ring_lockup_update(ring); + return false; + } + /* force ring activities */ + radeon_ring_force_activity(rdev, ring); + return radeon_ring_test_lockup(rdev, ring); +} + +/** + * cik_sdma_vm_set_page - update the page tables using sDMA + * + * @rdev: radeon_device pointer + * @ib: indirect buffer to fill with commands + * @pe: addr of the page entry + * @addr: dst addr to write into pe + * @count: number of page entries to update + * @incr: increase next addr by incr bytes + * @flags: access flags + * + * Update the page tables using sDMA (CIK). + */ +void cik_sdma_vm_set_page(struct radeon_device *rdev, + struct radeon_ib *ib, + uint64_t pe, + uint64_t addr, unsigned count, + uint32_t incr, uint32_t flags) +{ + uint32_t r600_flags = cayman_vm_page_flags(rdev, flags); + uint64_t value; + unsigned ndw; + + if (flags & RADEON_VM_PAGE_SYSTEM) { + while (count) { + ndw = count * 2; + if (ndw > 0xFFFFE) + ndw = 0xFFFFE; + + /* for non-physically contiguous pages (system) */ + ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0); + ib->ptr[ib->length_dw++] = pe; + ib->ptr[ib->length_dw++] = upper_32_bits(pe); + ib->ptr[ib->length_dw++] = ndw; + for (; ndw > 0; ndw -= 2, --count, pe += 8) { + if (flags & RADEON_VM_PAGE_SYSTEM) { + value = radeon_vm_map_gart(rdev, addr); + value &= 0xFFFFFFFFFFFFF000ULL; + } else if (flags & RADEON_VM_PAGE_VALID) { + value = addr; + } else { + value = 0; + } + addr += incr; + value |= r600_flags; + ib->ptr[ib->length_dw++] = value; + ib->ptr[ib->length_dw++] = upper_32_bits(value); + } + } + } else { + while (count) { + ndw = count; + if (ndw > 0x7FFFF) + ndw = 0x7FFFF; + + if (flags & RADEON_VM_PAGE_VALID) + value = addr; + else + value = 0; + /* for physically contiguous pages (vram) */ + ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0); + ib->ptr[ib->length_dw++] = pe; /* dst addr */ + ib->ptr[ib->length_dw++] = upper_32_bits(pe); + ib->ptr[ib->length_dw++] = r600_flags; /* mask */ + ib->ptr[ib->length_dw++] = 0; + ib->ptr[ib->length_dw++] = value; /* value */ + ib->ptr[ib->length_dw++] = upper_32_bits(value); + ib->ptr[ib->length_dw++] = incr; /* increment size */ + ib->ptr[ib->length_dw++] = 0; + ib->ptr[ib->length_dw++] = ndw; /* number of entries */ + pe += ndw * 8; + addr += ndw * incr; + count -= ndw; + } + } + while (ib->length_dw & 0x7) + ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0); +} + +/** + * cik_dma_vm_flush - cik vm flush using sDMA + * + * @rdev: radeon_device pointer + * + * Update the page table base and flush the VM TLB + * using sDMA (CIK). + */ +void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) +{ + struct radeon_ring *ring = &rdev->ring[ridx]; + u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) | + SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */ + u32 ref_and_mask; + + if (vm == NULL) + return; + + if (ridx == R600_RING_TYPE_DMA_INDEX) + ref_and_mask = SDMA0; + else + ref_and_mask = SDMA1; + + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); + if (vm->id < 8) { + radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2); + } else { + radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2); + } + radeon_ring_write(ring, vm->pd_gpu_addr >> 12); + + /* update SH_MEM_* regs */ + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); + radeon_ring_write(ring, SRBM_GFX_CNTL >> 2); + radeon_ring_write(ring, VMID(vm->id)); + + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); + radeon_ring_write(ring, SH_MEM_BASES >> 2); + radeon_ring_write(ring, 0); + + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); + radeon_ring_write(ring, SH_MEM_CONFIG >> 2); + radeon_ring_write(ring, 0); + + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); + radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2); + radeon_ring_write(ring, 1); + + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); + radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2); + radeon_ring_write(ring, 0); + + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); + radeon_ring_write(ring, SRBM_GFX_CNTL >> 2); + radeon_ring_write(ring, VMID(0)); + + /* flush HDP */ + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits)); + radeon_ring_write(ring, GPU_HDP_FLUSH_DONE); + radeon_ring_write(ring, GPU_HDP_FLUSH_REQ); + radeon_ring_write(ring, ref_and_mask); /* REFERENCE */ + radeon_ring_write(ring, ref_and_mask); /* MASK */ + radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */ + + /* flush TLB */ + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); + radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); + radeon_ring_write(ring, 1 << vm->id); +} + diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 52ed22333f0d..bbaa4f2056ce 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -3613,7 +3613,7 @@ bool evergreen_is_display_hung(struct radeon_device *rdev) return true; } -static u32 evergreen_gpu_check_soft_reset(struct radeon_device *rdev) +u32 evergreen_gpu_check_soft_reset(struct radeon_device *rdev) { u32 reset_mask = 0; u32 tmp; @@ -3839,28 +3839,6 @@ bool evergreen_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *rin return radeon_ring_test_lockup(rdev, ring); } -/** - * evergreen_dma_is_lockup - Check if the DMA engine is locked up - * - * @rdev: radeon_device pointer - * @ring: radeon_ring structure holding ring information - * - * Check if the async DMA engine is locked up. - * Returns true if the engine appears to be locked up, false if not. - */ -bool evergreen_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) -{ - u32 reset_mask = evergreen_gpu_check_soft_reset(rdev); - - if (!(reset_mask & RADEON_RESET_DMA)) { - radeon_ring_lockup_update(ring); - return false; - } - /* force ring activities */ - radeon_ring_force_activity(rdev, ring); - return radeon_ring_test_lockup(rdev, ring); -} - /* * RLC */ @@ -5024,143 +5002,6 @@ restart_ih: return IRQ_HANDLED; } -/** - * evergreen_dma_fence_ring_emit - emit a fence on the DMA ring - * - * @rdev: radeon_device pointer - * @fence: radeon fence object - * - * Add a DMA fence packet to the ring to write - * the fence seq number and DMA trap packet to generate - * an interrupt if needed (evergreen-SI). - */ -void evergreen_dma_fence_ring_emit(struct radeon_device *rdev, - struct radeon_fence *fence) -{ - struct radeon_ring *ring = &rdev->ring[fence->ring]; - u64 addr = rdev->fence_drv[fence->ring].gpu_addr; - /* write the fence */ - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0)); - radeon_ring_write(ring, addr & 0xfffffffc); - radeon_ring_write(ring, (upper_32_bits(addr) & 0xff)); - radeon_ring_write(ring, fence->seq); - /* generate an interrupt */ - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0)); - /* flush HDP */ - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0)); - radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2)); - radeon_ring_write(ring, 1); -} - -/** - * evergreen_dma_ring_ib_execute - schedule an IB on the DMA engine - * - * @rdev: radeon_device pointer - * @ib: IB object to schedule - * - * Schedule an IB in the DMA ring (evergreen). - */ -void evergreen_dma_ring_ib_execute(struct radeon_device *rdev, - struct radeon_ib *ib) -{ - struct radeon_ring *ring = &rdev->ring[ib->ring]; - - if (rdev->wb.enabled) { - u32 next_rptr = ring->wptr + 4; - while ((next_rptr & 7) != 5) - next_rptr++; - next_rptr += 3; - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 1)); - radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); - radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff); - radeon_ring_write(ring, next_rptr); - } - - /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. - * Pad as necessary with NOPs. - */ - while ((ring->wptr & 7) != 5) - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0)); - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0)); - radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); - radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF)); - -} - -/** - * evergreen_copy_dma - copy pages using the DMA engine - * - * @rdev: radeon_device pointer - * @src_offset: src GPU address - * @dst_offset: dst GPU address - * @num_gpu_pages: number of GPU pages to xfer - * @fence: radeon fence object - * - * Copy GPU paging using the DMA engine (evergreen-cayman). - * Used by the radeon ttm implementation to move pages if - * registered as the asic copy callback. - */ -int evergreen_copy_dma(struct radeon_device *rdev, - uint64_t src_offset, uint64_t dst_offset, - unsigned num_gpu_pages, - struct radeon_fence **fence) -{ - struct radeon_semaphore *sem = NULL; - int ring_index = rdev->asic->copy.dma_ring_index; - struct radeon_ring *ring = &rdev->ring[ring_index]; - u32 size_in_dw, cur_size_in_dw; - int i, num_loops; - int r = 0; - - r = radeon_semaphore_create(rdev, &sem); - if (r) { - DRM_ERROR("radeon: moving bo (%d).\n", r); - return r; - } - - size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4; - num_loops = DIV_ROUND_UP(size_in_dw, 0xfffff); - r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11); - if (r) { - DRM_ERROR("radeon: moving bo (%d).\n", r); - radeon_semaphore_free(rdev, &sem, NULL); - return r; - } - - if (radeon_fence_need_sync(*fence, ring->idx)) { - radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, - ring->idx); - radeon_fence_note_sync(*fence, ring->idx); - } else { - radeon_semaphore_free(rdev, &sem, NULL); - } - - for (i = 0; i < num_loops; i++) { - cur_size_in_dw = size_in_dw; - if (cur_size_in_dw > 0xFFFFF) - cur_size_in_dw = 0xFFFFF; - size_in_dw -= cur_size_in_dw; - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, cur_size_in_dw)); - radeon_ring_write(ring, dst_offset & 0xfffffffc); - radeon_ring_write(ring, src_offset & 0xfffffffc); - radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff); - radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff); - src_offset += cur_size_in_dw * 4; - dst_offset += cur_size_in_dw * 4; - } - - r = radeon_fence_emit(rdev, fence, ring->idx); - if (r) { - radeon_ring_unlock_undo(rdev, ring); - return r; - } - - radeon_ring_unlock_commit(rdev, ring); - radeon_semaphore_free(rdev, &sem, *fence); - - return r; -} - static int evergreen_startup(struct radeon_device *rdev) { struct radeon_ring *ring; diff --git a/drivers/gpu/drm/radeon/evergreen_dma.c b/drivers/gpu/drm/radeon/evergreen_dma.c new file mode 100644 index 000000000000..6a0656d00ed0 --- /dev/null +++ b/drivers/gpu/drm/radeon/evergreen_dma.c @@ -0,0 +1,190 @@ +/* + * Copyright 2010 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Alex Deucher + */ +#include +#include "radeon.h" +#include "radeon_asic.h" +#include "evergreend.h" + +u32 evergreen_gpu_check_soft_reset(struct radeon_device *rdev); + +/** + * evergreen_dma_fence_ring_emit - emit a fence on the DMA ring + * + * @rdev: radeon_device pointer + * @fence: radeon fence object + * + * Add a DMA fence packet to the ring to write + * the fence seq number and DMA trap packet to generate + * an interrupt if needed (evergreen-SI). + */ +void evergreen_dma_fence_ring_emit(struct radeon_device *rdev, + struct radeon_fence *fence) +{ + struct radeon_ring *ring = &rdev->ring[fence->ring]; + u64 addr = rdev->fence_drv[fence->ring].gpu_addr; + /* write the fence */ + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0)); + radeon_ring_write(ring, addr & 0xfffffffc); + radeon_ring_write(ring, (upper_32_bits(addr) & 0xff)); + radeon_ring_write(ring, fence->seq); + /* generate an interrupt */ + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0)); + /* flush HDP */ + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0)); + radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2)); + radeon_ring_write(ring, 1); +} + +/** + * evergreen_dma_ring_ib_execute - schedule an IB on the DMA engine + * + * @rdev: radeon_device pointer + * @ib: IB object to schedule + * + * Schedule an IB in the DMA ring (evergreen). + */ +void evergreen_dma_ring_ib_execute(struct radeon_device *rdev, + struct radeon_ib *ib) +{ + struct radeon_ring *ring = &rdev->ring[ib->ring]; + + if (rdev->wb.enabled) { + u32 next_rptr = ring->wptr + 4; + while ((next_rptr & 7) != 5) + next_rptr++; + next_rptr += 3; + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 1)); + radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); + radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff); + radeon_ring_write(ring, next_rptr); + } + + /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. + * Pad as necessary with NOPs. + */ + while ((ring->wptr & 7) != 5) + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0)); + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0)); + radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); + radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF)); + +} + +/** + * evergreen_copy_dma - copy pages using the DMA engine + * + * @rdev: radeon_device pointer + * @src_offset: src GPU address + * @dst_offset: dst GPU address + * @num_gpu_pages: number of GPU pages to xfer + * @fence: radeon fence object + * + * Copy GPU paging using the DMA engine (evergreen-cayman). + * Used by the radeon ttm implementation to move pages if + * registered as the asic copy callback. + */ +int evergreen_copy_dma(struct radeon_device *rdev, + uint64_t src_offset, uint64_t dst_offset, + unsigned num_gpu_pages, + struct radeon_fence **fence) +{ + struct radeon_semaphore *sem = NULL; + int ring_index = rdev->asic->copy.dma_ring_index; + struct radeon_ring *ring = &rdev->ring[ring_index]; + u32 size_in_dw, cur_size_in_dw; + int i, num_loops; + int r = 0; + + r = radeon_semaphore_create(rdev, &sem); + if (r) { + DRM_ERROR("radeon: moving bo (%d).\n", r); + return r; + } + + size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4; + num_loops = DIV_ROUND_UP(size_in_dw, 0xfffff); + r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11); + if (r) { + DRM_ERROR("radeon: moving bo (%d).\n", r); + radeon_semaphore_free(rdev, &sem, NULL); + return r; + } + + if (radeon_fence_need_sync(*fence, ring->idx)) { + radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, + ring->idx); + radeon_fence_note_sync(*fence, ring->idx); + } else { + radeon_semaphore_free(rdev, &sem, NULL); + } + + for (i = 0; i < num_loops; i++) { + cur_size_in_dw = size_in_dw; + if (cur_size_in_dw > 0xFFFFF) + cur_size_in_dw = 0xFFFFF; + size_in_dw -= cur_size_in_dw; + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, cur_size_in_dw)); + radeon_ring_write(ring, dst_offset & 0xfffffffc); + radeon_ring_write(ring, src_offset & 0xfffffffc); + radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff); + radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff); + src_offset += cur_size_in_dw * 4; + dst_offset += cur_size_in_dw * 4; + } + + r = radeon_fence_emit(rdev, fence, ring->idx); + if (r) { + radeon_ring_unlock_undo(rdev, ring); + return r; + } + + radeon_ring_unlock_commit(rdev, ring); + radeon_semaphore_free(rdev, &sem, *fence); + + return r; +} + +/** + * evergreen_dma_is_lockup - Check if the DMA engine is locked up + * + * @rdev: radeon_device pointer + * @ring: radeon_ring structure holding ring information + * + * Check if the async DMA engine is locked up. + * Returns true if the engine appears to be locked up, false if not. + */ +bool evergreen_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) +{ + u32 reset_mask = evergreen_gpu_check_soft_reset(rdev); + + if (!(reset_mask & RADEON_RESET_DMA)) { + radeon_ring_lockup_update(ring); + return false; + } + /* force ring activities */ + radeon_ring_force_activity(rdev, ring); + return radeon_ring_test_lockup(rdev, ring); +} + + diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 0205fa1594fa..2db8ce0023ac 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -174,6 +174,11 @@ extern void evergreen_pcie_gen2_enable(struct radeon_device *rdev); extern void evergreen_program_aspm(struct radeon_device *rdev); extern void sumo_rlc_fini(struct radeon_device *rdev); extern int sumo_rlc_init(struct radeon_device *rdev); +extern void cayman_dma_vm_set_page(struct radeon_device *rdev, + struct radeon_ib *ib, + uint64_t pe, + uint64_t addr, unsigned count, + uint32_t incr, uint32_t flags); /* Firmware Names */ MODULE_FIRMWARE("radeon/BARTS_pfp.bin"); @@ -1595,186 +1600,7 @@ static int cayman_cp_resume(struct radeon_device *rdev) return 0; } -/* - * DMA - * Starting with R600, the GPU has an asynchronous - * DMA engine. The programming model is very similar - * to the 3D engine (ring buffer, IBs, etc.), but the - * DMA controller has it's own packet format that is - * different form the PM4 format used by the 3D engine. - * It supports copying data, writing embedded data, - * solid fills, and a number of other things. It also - * has support for tiling/detiling of buffers. - * Cayman and newer support two asynchronous DMA engines. - */ -/** - * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine - * - * @rdev: radeon_device pointer - * @ib: IB object to schedule - * - * Schedule an IB in the DMA ring (cayman-SI). - */ -void cayman_dma_ring_ib_execute(struct radeon_device *rdev, - struct radeon_ib *ib) -{ - struct radeon_ring *ring = &rdev->ring[ib->ring]; - - if (rdev->wb.enabled) { - u32 next_rptr = ring->wptr + 4; - while ((next_rptr & 7) != 5) - next_rptr++; - next_rptr += 3; - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); - radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); - radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff); - radeon_ring_write(ring, next_rptr); - } - - /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. - * Pad as necessary with NOPs. - */ - while ((ring->wptr & 7) != 5) - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); - radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0)); - radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); - radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF)); - -} - -/** - * cayman_dma_stop - stop the async dma engines - * - * @rdev: radeon_device pointer - * - * Stop the async dma engines (cayman-SI). - */ -void cayman_dma_stop(struct radeon_device *rdev) -{ - u32 rb_cntl; - - radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); - - /* dma0 */ - rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET); - rb_cntl &= ~DMA_RB_ENABLE; - WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl); - - /* dma1 */ - rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET); - rb_cntl &= ~DMA_RB_ENABLE; - WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl); - - rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false; - rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false; -} - -/** - * cayman_dma_resume - setup and start the async dma engines - * - * @rdev: radeon_device pointer - * - * Set up the DMA ring buffers and enable them. (cayman-SI). - * Returns 0 for success, error for failure. - */ -int cayman_dma_resume(struct radeon_device *rdev) -{ - struct radeon_ring *ring; - u32 rb_cntl, dma_cntl, ib_cntl; - u32 rb_bufsz; - u32 reg_offset, wb_offset; - int i, r; - - /* Reset dma */ - WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA | SOFT_RESET_DMA1); - RREG32(SRBM_SOFT_RESET); - udelay(50); - WREG32(SRBM_SOFT_RESET, 0); - - for (i = 0; i < 2; i++) { - if (i == 0) { - ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; - reg_offset = DMA0_REGISTER_OFFSET; - wb_offset = R600_WB_DMA_RPTR_OFFSET; - } else { - ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; - reg_offset = DMA1_REGISTER_OFFSET; - wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET; - } - - WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0); - WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0); - - /* Set ring buffer size in dwords */ - rb_bufsz = drm_order(ring->ring_size / 4); - rb_cntl = rb_bufsz << 1; -#ifdef __BIG_ENDIAN - rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE; -#endif - WREG32(DMA_RB_CNTL + reg_offset, rb_cntl); - - /* Initialize the ring buffer's read and write pointers */ - WREG32(DMA_RB_RPTR + reg_offset, 0); - WREG32(DMA_RB_WPTR + reg_offset, 0); - - /* set the wb address whether it's enabled or not */ - WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset, - upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF); - WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset, - ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC)); - - if (rdev->wb.enabled) - rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE; - - WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8); - - /* enable DMA IBs */ - ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE; -#ifdef __BIG_ENDIAN - ib_cntl |= DMA_IB_SWAP_ENABLE; -#endif - WREG32(DMA_IB_CNTL + reg_offset, ib_cntl); - - dma_cntl = RREG32(DMA_CNTL + reg_offset); - dma_cntl &= ~CTXEMPTY_INT_ENABLE; - WREG32(DMA_CNTL + reg_offset, dma_cntl); - - ring->wptr = 0; - WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2); - - ring->rptr = RREG32(DMA_RB_RPTR + reg_offset) >> 2; - - WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE); - - ring->ready = true; - - r = radeon_ring_test(rdev, ring->idx, ring); - if (r) { - ring->ready = false; - return r; - } - } - - radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); - - return 0; -} - -/** - * cayman_dma_fini - tear down the async dma engines - * - * @rdev: radeon_device pointer - * - * Stop the async dma engines and free the rings (cayman-SI). - */ -void cayman_dma_fini(struct radeon_device *rdev) -{ - cayman_dma_stop(rdev); - radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]); - radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]); -} - -static u32 cayman_gpu_check_soft_reset(struct radeon_device *rdev) +u32 cayman_gpu_check_soft_reset(struct radeon_device *rdev) { u32 reset_mask = 0; u32 tmp; @@ -2027,34 +1853,6 @@ bool cayman_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) return radeon_ring_test_lockup(rdev, ring); } -/** - * cayman_dma_is_lockup - Check if the DMA engine is locked up - * - * @rdev: radeon_device pointer - * @ring: radeon_ring structure holding ring information - * - * Check if the async DMA engine is locked up. - * Returns true if the engine appears to be locked up, false if not. - */ -bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) -{ - u32 reset_mask = cayman_gpu_check_soft_reset(rdev); - u32 mask; - - if (ring->idx == R600_RING_TYPE_DMA_INDEX) - mask = RADEON_RESET_DMA; - else - mask = RADEON_RESET_DMA1; - - if (!(reset_mask & mask)) { - radeon_ring_lockup_update(ring); - return false; - } - /* force ring activities */ - radeon_ring_force_activity(rdev, ring); - return radeon_ring_test_lockup(rdev, ring); -} - static int cayman_startup(struct radeon_device *rdev) { struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; @@ -2658,61 +2456,7 @@ void cayman_vm_set_page(struct radeon_device *rdev, } } } else { - if ((flags & RADEON_VM_PAGE_SYSTEM) || - (count == 1)) { - while (count) { - ndw = count * 2; - if (ndw > 0xFFFFE) - ndw = 0xFFFFE; - - /* for non-physically contiguous pages (system) */ - ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw); - ib->ptr[ib->length_dw++] = pe; - ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; - for (; ndw > 0; ndw -= 2, --count, pe += 8) { - if (flags & RADEON_VM_PAGE_SYSTEM) { - value = radeon_vm_map_gart(rdev, addr); - value &= 0xFFFFFFFFFFFFF000ULL; - } else if (flags & RADEON_VM_PAGE_VALID) { - value = addr; - } else { - value = 0; - } - addr += incr; - value |= r600_flags; - ib->ptr[ib->length_dw++] = value; - ib->ptr[ib->length_dw++] = upper_32_bits(value); - } - } - while (ib->length_dw & 0x7) - ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0); - } else { - while (count) { - ndw = count * 2; - if (ndw > 0xFFFFE) - ndw = 0xFFFFE; - - if (flags & RADEON_VM_PAGE_VALID) - value = addr; - else - value = 0; - /* for physically contiguous pages (vram) */ - ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw); - ib->ptr[ib->length_dw++] = pe; /* dst addr */ - ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; - ib->ptr[ib->length_dw++] = r600_flags; /* mask */ - ib->ptr[ib->length_dw++] = 0; - ib->ptr[ib->length_dw++] = value; /* value */ - ib->ptr[ib->length_dw++] = upper_32_bits(value); - ib->ptr[ib->length_dw++] = incr; /* increment size */ - ib->ptr[ib->length_dw++] = 0; - pe += ndw * 4; - addr += (ndw / 2) * incr; - count -= ndw / 2; - } - } - while (ib->length_dw & 0x7) - ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0); + cayman_dma_vm_set_page(rdev, ib, pe, addr, count, incr, flags); } } @@ -2746,26 +2490,3 @@ void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); radeon_ring_write(ring, 0x0); } - -void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) -{ - struct radeon_ring *ring = &rdev->ring[ridx]; - - if (vm == NULL) - return; - - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); - radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2)); - radeon_ring_write(ring, vm->pd_gpu_addr >> 12); - - /* flush hdp cache */ - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); - radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2)); - radeon_ring_write(ring, 1); - - /* bits 0-7 are the VM contexts0-7 */ - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); - radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2)); - radeon_ring_write(ring, 1 << vm->id); -} - diff --git a/drivers/gpu/drm/radeon/ni_dma.c b/drivers/gpu/drm/radeon/ni_dma.c new file mode 100644 index 000000000000..0f3c0baea4a6 --- /dev/null +++ b/drivers/gpu/drm/radeon/ni_dma.c @@ -0,0 +1,338 @@ +/* + * Copyright 2010 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Alex Deucher + */ +#include +#include "radeon.h" +#include "radeon_asic.h" +#include "nid.h" + +u32 cayman_gpu_check_soft_reset(struct radeon_device *rdev); + +/* + * DMA + * Starting with R600, the GPU has an asynchronous + * DMA engine. The programming model is very similar + * to the 3D engine (ring buffer, IBs, etc.), but the + * DMA controller has it's own packet format that is + * different form the PM4 format used by the 3D engine. + * It supports copying data, writing embedded data, + * solid fills, and a number of other things. It also + * has support for tiling/detiling of buffers. + * Cayman and newer support two asynchronous DMA engines. + */ + +/** + * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine + * + * @rdev: radeon_device pointer + * @ib: IB object to schedule + * + * Schedule an IB in the DMA ring (cayman-SI). + */ +void cayman_dma_ring_ib_execute(struct radeon_device *rdev, + struct radeon_ib *ib) +{ + struct radeon_ring *ring = &rdev->ring[ib->ring]; + + if (rdev->wb.enabled) { + u32 next_rptr = ring->wptr + 4; + while ((next_rptr & 7) != 5) + next_rptr++; + next_rptr += 3; + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); + radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); + radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff); + radeon_ring_write(ring, next_rptr); + } + + /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. + * Pad as necessary with NOPs. + */ + while ((ring->wptr & 7) != 5) + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); + radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0)); + radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); + radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF)); + +} + +/** + * cayman_dma_stop - stop the async dma engines + * + * @rdev: radeon_device pointer + * + * Stop the async dma engines (cayman-SI). + */ +void cayman_dma_stop(struct radeon_device *rdev) +{ + u32 rb_cntl; + + radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); + + /* dma0 */ + rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET); + rb_cntl &= ~DMA_RB_ENABLE; + WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl); + + /* dma1 */ + rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET); + rb_cntl &= ~DMA_RB_ENABLE; + WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl); + + rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false; + rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false; +} + +/** + * cayman_dma_resume - setup and start the async dma engines + * + * @rdev: radeon_device pointer + * + * Set up the DMA ring buffers and enable them. (cayman-SI). + * Returns 0 for success, error for failure. + */ +int cayman_dma_resume(struct radeon_device *rdev) +{ + struct radeon_ring *ring; + u32 rb_cntl, dma_cntl, ib_cntl; + u32 rb_bufsz; + u32 reg_offset, wb_offset; + int i, r; + + /* Reset dma */ + WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA | SOFT_RESET_DMA1); + RREG32(SRBM_SOFT_RESET); + udelay(50); + WREG32(SRBM_SOFT_RESET, 0); + + for (i = 0; i < 2; i++) { + if (i == 0) { + ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; + reg_offset = DMA0_REGISTER_OFFSET; + wb_offset = R600_WB_DMA_RPTR_OFFSET; + } else { + ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; + reg_offset = DMA1_REGISTER_OFFSET; + wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET; + } + + WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0); + WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0); + + /* Set ring buffer size in dwords */ + rb_bufsz = drm_order(ring->ring_size / 4); + rb_cntl = rb_bufsz << 1; +#ifdef __BIG_ENDIAN + rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE; +#endif + WREG32(DMA_RB_CNTL + reg_offset, rb_cntl); + + /* Initialize the ring buffer's read and write pointers */ + WREG32(DMA_RB_RPTR + reg_offset, 0); + WREG32(DMA_RB_WPTR + reg_offset, 0); + + /* set the wb address whether it's enabled or not */ + WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset, + upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF); + WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset, + ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC)); + + if (rdev->wb.enabled) + rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE; + + WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8); + + /* enable DMA IBs */ + ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE; +#ifdef __BIG_ENDIAN + ib_cntl |= DMA_IB_SWAP_ENABLE; +#endif + WREG32(DMA_IB_CNTL + reg_offset, ib_cntl); + + dma_cntl = RREG32(DMA_CNTL + reg_offset); + dma_cntl &= ~CTXEMPTY_INT_ENABLE; + WREG32(DMA_CNTL + reg_offset, dma_cntl); + + ring->wptr = 0; + WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2); + + ring->rptr = RREG32(DMA_RB_RPTR + reg_offset) >> 2; + + WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE); + + ring->ready = true; + + r = radeon_ring_test(rdev, ring->idx, ring); + if (r) { + ring->ready = false; + return r; + } + } + + radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); + + return 0; +} + +/** + * cayman_dma_fini - tear down the async dma engines + * + * @rdev: radeon_device pointer + * + * Stop the async dma engines and free the rings (cayman-SI). + */ +void cayman_dma_fini(struct radeon_device *rdev) +{ + cayman_dma_stop(rdev); + radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]); + radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]); +} + +/** + * cayman_dma_is_lockup - Check if the DMA engine is locked up + * + * @rdev: radeon_device pointer + * @ring: radeon_ring structure holding ring information + * + * Check if the async DMA engine is locked up. + * Returns true if the engine appears to be locked up, false if not. + */ +bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) +{ + u32 reset_mask = cayman_gpu_check_soft_reset(rdev); + u32 mask; + + if (ring->idx == R600_RING_TYPE_DMA_INDEX) + mask = RADEON_RESET_DMA; + else + mask = RADEON_RESET_DMA1; + + if (!(reset_mask & mask)) { + radeon_ring_lockup_update(ring); + return false; + } + /* force ring activities */ + radeon_ring_force_activity(rdev, ring); + return radeon_ring_test_lockup(rdev, ring); +} + +/** + * cayman_dma_vm_set_page - update the page tables using the DMA + * + * @rdev: radeon_device pointer + * @ib: indirect buffer to fill with commands + * @pe: addr of the page entry + * @addr: dst addr to write into pe + * @count: number of page entries to update + * @incr: increase next addr by incr bytes + * @flags: access flags + * @r600_flags: hw access flags + * + * Update the page tables using the DMA (cayman/TN). + */ +void cayman_dma_vm_set_page(struct radeon_device *rdev, + struct radeon_ib *ib, + uint64_t pe, + uint64_t addr, unsigned count, + uint32_t incr, uint32_t flags) +{ + uint32_t r600_flags = cayman_vm_page_flags(rdev, flags); + uint64_t value; + unsigned ndw; + + if ((flags & RADEON_VM_PAGE_SYSTEM) || (count == 1)) { + while (count) { + ndw = count * 2; + if (ndw > 0xFFFFE) + ndw = 0xFFFFE; + + /* for non-physically contiguous pages (system) */ + ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw); + ib->ptr[ib->length_dw++] = pe; + ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; + for (; ndw > 0; ndw -= 2, --count, pe += 8) { + if (flags & RADEON_VM_PAGE_SYSTEM) { + value = radeon_vm_map_gart(rdev, addr); + value &= 0xFFFFFFFFFFFFF000ULL; + } else if (flags & RADEON_VM_PAGE_VALID) { + value = addr; + } else { + value = 0; + } + addr += incr; + value |= r600_flags; + ib->ptr[ib->length_dw++] = value; + ib->ptr[ib->length_dw++] = upper_32_bits(value); + } + } + } else { + while (count) { + ndw = count * 2; + if (ndw > 0xFFFFE) + ndw = 0xFFFFE; + + if (flags & RADEON_VM_PAGE_VALID) + value = addr; + else + value = 0; + /* for physically contiguous pages (vram) */ + ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw); + ib->ptr[ib->length_dw++] = pe; /* dst addr */ + ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; + ib->ptr[ib->length_dw++] = r600_flags; /* mask */ + ib->ptr[ib->length_dw++] = 0; + ib->ptr[ib->length_dw++] = value; /* value */ + ib->ptr[ib->length_dw++] = upper_32_bits(value); + ib->ptr[ib->length_dw++] = incr; /* increment size */ + ib->ptr[ib->length_dw++] = 0; + pe += ndw * 4; + addr += (ndw / 2) * incr; + count -= ndw / 2; + } + } + while (ib->length_dw & 0x7) + ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0); +} + +void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) +{ + struct radeon_ring *ring = &rdev->ring[ridx]; + + if (vm == NULL) + return; + + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); + radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2)); + radeon_ring_write(ring, vm->pd_gpu_addr >> 12); + + /* flush hdp cache */ + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); + radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2)); + radeon_ring_write(ring, 1); + + /* bits 0-7 are the VM contexts0-7 */ + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); + radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2)); + radeon_ring_write(ring, 1 << vm->id); +} + diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 3a08ef92d33f..087cff444ba2 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -1374,7 +1374,7 @@ static bool r600_is_display_hung(struct radeon_device *rdev) return true; } -static u32 r600_gpu_check_soft_reset(struct radeon_device *rdev) +u32 r600_gpu_check_soft_reset(struct radeon_device *rdev) { u32 reset_mask = 0; u32 tmp; @@ -1622,28 +1622,6 @@ bool r600_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) return radeon_ring_test_lockup(rdev, ring); } -/** - * r600_dma_is_lockup - Check if the DMA engine is locked up - * - * @rdev: radeon_device pointer - * @ring: radeon_ring structure holding ring information - * - * Check if the async DMA engine is locked up. - * Returns true if the engine appears to be locked up, false if not. - */ -bool r600_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) -{ - u32 reset_mask = r600_gpu_check_soft_reset(rdev); - - if (!(reset_mask & RADEON_RESET_DMA)) { - radeon_ring_lockup_update(ring); - return false; - } - /* force ring activities */ - radeon_ring_force_activity(rdev, ring); - return radeon_ring_test_lockup(rdev, ring); -} - u32 r6xx_remap_render_backend(struct radeon_device *rdev, u32 tiling_pipe_num, u32 max_rb_num, @@ -2493,176 +2471,6 @@ void r600_cp_fini(struct radeon_device *rdev) radeon_scratch_free(rdev, ring->rptr_save_reg); } -/* - * DMA - * Starting with R600, the GPU has an asynchronous - * DMA engine. The programming model is very similar - * to the 3D engine (ring buffer, IBs, etc.), but the - * DMA controller has it's own packet format that is - * different form the PM4 format used by the 3D engine. - * It supports copying data, writing embedded data, - * solid fills, and a number of other things. It also - * has support for tiling/detiling of buffers. - */ - -/** - * r600_dma_get_rptr - get the current read pointer - * - * @rdev: radeon_device pointer - * @ring: radeon ring pointer - * - * Get the current rptr from the hardware (r6xx+). - */ -uint32_t r600_dma_get_rptr(struct radeon_device *rdev, - struct radeon_ring *ring) -{ - return (radeon_ring_generic_get_rptr(rdev, ring) & 0x3fffc) >> 2; -} - -/** - * r600_dma_get_wptr - get the current write pointer - * - * @rdev: radeon_device pointer - * @ring: radeon ring pointer - * - * Get the current wptr from the hardware (r6xx+). - */ -uint32_t r600_dma_get_wptr(struct radeon_device *rdev, - struct radeon_ring *ring) -{ - return (RREG32(ring->wptr_reg) & 0x3fffc) >> 2; -} - -/** - * r600_dma_set_wptr - commit the write pointer - * - * @rdev: radeon_device pointer - * @ring: radeon ring pointer - * - * Write the wptr back to the hardware (r6xx+). - */ -void r600_dma_set_wptr(struct radeon_device *rdev, - struct radeon_ring *ring) -{ - WREG32(ring->wptr_reg, (ring->wptr << 2) & 0x3fffc); -} - -/** - * r600_dma_stop - stop the async dma engine - * - * @rdev: radeon_device pointer - * - * Stop the async dma engine (r6xx-evergreen). - */ -void r600_dma_stop(struct radeon_device *rdev) -{ - u32 rb_cntl = RREG32(DMA_RB_CNTL); - - radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); - - rb_cntl &= ~DMA_RB_ENABLE; - WREG32(DMA_RB_CNTL, rb_cntl); - - rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false; -} - -/** - * r600_dma_resume - setup and start the async dma engine - * - * @rdev: radeon_device pointer - * - * Set up the DMA ring buffer and enable it. (r6xx-evergreen). - * Returns 0 for success, error for failure. - */ -int r600_dma_resume(struct radeon_device *rdev) -{ - struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; - u32 rb_cntl, dma_cntl, ib_cntl; - u32 rb_bufsz; - int r; - - /* Reset dma */ - if (rdev->family >= CHIP_RV770) - WREG32(SRBM_SOFT_RESET, RV770_SOFT_RESET_DMA); - else - WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA); - RREG32(SRBM_SOFT_RESET); - udelay(50); - WREG32(SRBM_SOFT_RESET, 0); - - WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL, 0); - WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL, 0); - - /* Set ring buffer size in dwords */ - rb_bufsz = drm_order(ring->ring_size / 4); - rb_cntl = rb_bufsz << 1; -#ifdef __BIG_ENDIAN - rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE; -#endif - WREG32(DMA_RB_CNTL, rb_cntl); - - /* Initialize the ring buffer's read and write pointers */ - WREG32(DMA_RB_RPTR, 0); - WREG32(DMA_RB_WPTR, 0); - - /* set the wb address whether it's enabled or not */ - WREG32(DMA_RB_RPTR_ADDR_HI, - upper_32_bits(rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFF); - WREG32(DMA_RB_RPTR_ADDR_LO, - ((rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFFFFFFFC)); - - if (rdev->wb.enabled) - rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE; - - WREG32(DMA_RB_BASE, ring->gpu_addr >> 8); - - /* enable DMA IBs */ - ib_cntl = DMA_IB_ENABLE; -#ifdef __BIG_ENDIAN - ib_cntl |= DMA_IB_SWAP_ENABLE; -#endif - WREG32(DMA_IB_CNTL, ib_cntl); - - dma_cntl = RREG32(DMA_CNTL); - dma_cntl &= ~CTXEMPTY_INT_ENABLE; - WREG32(DMA_CNTL, dma_cntl); - - if (rdev->family >= CHIP_RV770) - WREG32(DMA_MODE, 1); - - ring->wptr = 0; - WREG32(DMA_RB_WPTR, ring->wptr << 2); - - ring->rptr = RREG32(DMA_RB_RPTR) >> 2; - - WREG32(DMA_RB_CNTL, rb_cntl | DMA_RB_ENABLE); - - ring->ready = true; - - r = radeon_ring_test(rdev, R600_RING_TYPE_DMA_INDEX, ring); - if (r) { - ring->ready = false; - return r; - } - - radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); - - return 0; -} - -/** - * r600_dma_fini - tear down the async dma engine - * - * @rdev: radeon_device pointer - * - * Stop the async dma engine and free the ring (r6xx-evergreen). - */ -void r600_dma_fini(struct radeon_device *rdev) -{ - r600_dma_stop(rdev); - radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]); -} - /* * GPU scratch registers helpers function. */ @@ -2718,60 +2526,6 @@ int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) return r; } -/** - * r600_dma_ring_test - simple async dma engine test - * - * @rdev: radeon_device pointer - * @ring: radeon_ring structure holding ring information - * - * Test the DMA engine by writing using it to write an - * value to memory. (r6xx-SI). - * Returns 0 for success, error for failure. - */ -int r600_dma_ring_test(struct radeon_device *rdev, - struct radeon_ring *ring) -{ - unsigned i; - int r; - void __iomem *ptr = (void *)rdev->vram_scratch.ptr; - u32 tmp; - - if (!ptr) { - DRM_ERROR("invalid vram scratch pointer\n"); - return -EINVAL; - } - - tmp = 0xCAFEDEAD; - writel(tmp, ptr); - - r = radeon_ring_lock(rdev, ring, 4); - if (r) { - DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r); - return r; - } - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); - radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc); - radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff); - radeon_ring_write(ring, 0xDEADBEEF); - radeon_ring_unlock_commit(rdev, ring); - - for (i = 0; i < rdev->usec_timeout; i++) { - tmp = readl(ptr); - if (tmp == 0xDEADBEEF) - break; - DRM_UDELAY(1); - } - - if (i < rdev->usec_timeout) { - DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); - } else { - DRM_ERROR("radeon: ring %d test failed (0x%08X)\n", - ring->idx, tmp); - r = -EINVAL; - } - return r; -} - /* * CP fences/semaphores */ @@ -2839,59 +2593,6 @@ void r600_semaphore_ring_emit(struct radeon_device *rdev, radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | sel); } -/* - * DMA fences/semaphores - */ - -/** - * r600_dma_fence_ring_emit - emit a fence on the DMA ring - * - * @rdev: radeon_device pointer - * @fence: radeon fence object - * - * Add a DMA fence packet to the ring to write - * the fence seq number and DMA trap packet to generate - * an interrupt if needed (r6xx-r7xx). - */ -void r600_dma_fence_ring_emit(struct radeon_device *rdev, - struct radeon_fence *fence) -{ - struct radeon_ring *ring = &rdev->ring[fence->ring]; - u64 addr = rdev->fence_drv[fence->ring].gpu_addr; - - /* write the fence */ - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0)); - radeon_ring_write(ring, addr & 0xfffffffc); - radeon_ring_write(ring, (upper_32_bits(addr) & 0xff)); - radeon_ring_write(ring, lower_32_bits(fence->seq)); - /* generate an interrupt */ - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0)); -} - -/** - * r600_dma_semaphore_ring_emit - emit a semaphore on the dma ring - * - * @rdev: radeon_device pointer - * @ring: radeon_ring structure holding ring information - * @semaphore: radeon semaphore object - * @emit_wait: wait or signal semaphore - * - * Add a DMA semaphore packet to the ring wait on or signal - * other rings (r6xx-SI). - */ -void r600_dma_semaphore_ring_emit(struct radeon_device *rdev, - struct radeon_ring *ring, - struct radeon_semaphore *semaphore, - bool emit_wait) -{ - u64 addr = semaphore->gpu_addr; - u32 s = emit_wait ? 0 : 1; - - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SEMAPHORE, 0, s, 0)); - radeon_ring_write(ring, addr & 0xfffffffc); - radeon_ring_write(ring, upper_32_bits(addr) & 0xff); -} - /** * r600_copy_cpdma - copy pages using the CP DMA engine * @@ -2976,80 +2677,6 @@ int r600_copy_cpdma(struct radeon_device *rdev, return r; } -/** - * r600_copy_dma - copy pages using the DMA engine - * - * @rdev: radeon_device pointer - * @src_offset: src GPU address - * @dst_offset: dst GPU address - * @num_gpu_pages: number of GPU pages to xfer - * @fence: radeon fence object - * - * Copy GPU paging using the DMA engine (r6xx). - * Used by the radeon ttm implementation to move pages if - * registered as the asic copy callback. - */ -int r600_copy_dma(struct radeon_device *rdev, - uint64_t src_offset, uint64_t dst_offset, - unsigned num_gpu_pages, - struct radeon_fence **fence) -{ - struct radeon_semaphore *sem = NULL; - int ring_index = rdev->asic->copy.dma_ring_index; - struct radeon_ring *ring = &rdev->ring[ring_index]; - u32 size_in_dw, cur_size_in_dw; - int i, num_loops; - int r = 0; - - r = radeon_semaphore_create(rdev, &sem); - if (r) { - DRM_ERROR("radeon: moving bo (%d).\n", r); - return r; - } - - size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4; - num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFE); - r = radeon_ring_lock(rdev, ring, num_loops * 4 + 8); - if (r) { - DRM_ERROR("radeon: moving bo (%d).\n", r); - radeon_semaphore_free(rdev, &sem, NULL); - return r; - } - - if (radeon_fence_need_sync(*fence, ring->idx)) { - radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, - ring->idx); - radeon_fence_note_sync(*fence, ring->idx); - } else { - radeon_semaphore_free(rdev, &sem, NULL); - } - - for (i = 0; i < num_loops; i++) { - cur_size_in_dw = size_in_dw; - if (cur_size_in_dw > 0xFFFE) - cur_size_in_dw = 0xFFFE; - size_in_dw -= cur_size_in_dw; - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw)); - radeon_ring_write(ring, dst_offset & 0xfffffffc); - radeon_ring_write(ring, src_offset & 0xfffffffc); - radeon_ring_write(ring, (((upper_32_bits(dst_offset) & 0xff) << 16) | - (upper_32_bits(src_offset) & 0xff))); - src_offset += cur_size_in_dw * 4; - dst_offset += cur_size_in_dw * 4; - } - - r = radeon_fence_emit(rdev, fence, ring->idx); - if (r) { - radeon_ring_unlock_undo(rdev, ring); - return r; - } - - radeon_ring_unlock_commit(rdev, ring); - radeon_semaphore_free(rdev, &sem, *fence); - - return r; -} - int r600_set_surface_reg(struct radeon_device *rdev, int reg, uint32_t tiling_flags, uint32_t pitch, uint32_t offset, uint32_t obj_size) @@ -3409,104 +3036,6 @@ free_scratch: return r; } -/** - * r600_dma_ib_test - test an IB on the DMA engine - * - * @rdev: radeon_device pointer - * @ring: radeon_ring structure holding ring information - * - * Test a simple IB in the DMA ring (r6xx-SI). - * Returns 0 on success, error on failure. - */ -int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) -{ - struct radeon_ib ib; - unsigned i; - int r; - void __iomem *ptr = (void *)rdev->vram_scratch.ptr; - u32 tmp = 0; - - if (!ptr) { - DRM_ERROR("invalid vram scratch pointer\n"); - return -EINVAL; - } - - tmp = 0xCAFEDEAD; - writel(tmp, ptr); - - r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256); - if (r) { - DRM_ERROR("radeon: failed to get ib (%d).\n", r); - return r; - } - - ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1); - ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc; - ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff; - ib.ptr[3] = 0xDEADBEEF; - ib.length_dw = 4; - - r = radeon_ib_schedule(rdev, &ib, NULL); - if (r) { - radeon_ib_free(rdev, &ib); - DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); - return r; - } - r = radeon_fence_wait(ib.fence, false); - if (r) { - DRM_ERROR("radeon: fence wait failed (%d).\n", r); - return r; - } - for (i = 0; i < rdev->usec_timeout; i++) { - tmp = readl(ptr); - if (tmp == 0xDEADBEEF) - break; - DRM_UDELAY(1); - } - if (i < rdev->usec_timeout) { - DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i); - } else { - DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp); - r = -EINVAL; - } - radeon_ib_free(rdev, &ib); - return r; -} - -/** - * r600_dma_ring_ib_execute - Schedule an IB on the DMA engine - * - * @rdev: radeon_device pointer - * @ib: IB object to schedule - * - * Schedule an IB in the DMA ring (r6xx-r7xx). - */ -void r600_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) -{ - struct radeon_ring *ring = &rdev->ring[ib->ring]; - - if (rdev->wb.enabled) { - u32 next_rptr = ring->wptr + 4; - while ((next_rptr & 7) != 5) - next_rptr++; - next_rptr += 3; - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); - radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); - radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff); - radeon_ring_write(ring, next_rptr); - } - - /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. - * Pad as necessary with NOPs. - */ - while ((ring->wptr & 7) != 5) - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0, 0)); - radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); - radeon_ring_write(ring, (ib->length_dw << 16) | (upper_32_bits(ib->gpu_addr) & 0xFF)); - -} - /* * Interrupts * diff --git a/drivers/gpu/drm/radeon/r600_dma.c b/drivers/gpu/drm/radeon/r600_dma.c new file mode 100644 index 000000000000..bff05576266b --- /dev/null +++ b/drivers/gpu/drm/radeon/r600_dma.c @@ -0,0 +1,497 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Alex Deucher + */ +#include +#include "radeon.h" +#include "radeon_asic.h" +#include "r600d.h" + +u32 r600_gpu_check_soft_reset(struct radeon_device *rdev); + +/* + * DMA + * Starting with R600, the GPU has an asynchronous + * DMA engine. The programming model is very similar + * to the 3D engine (ring buffer, IBs, etc.), but the + * DMA controller has it's own packet format that is + * different form the PM4 format used by the 3D engine. + * It supports copying data, writing embedded data, + * solid fills, and a number of other things. It also + * has support for tiling/detiling of buffers. + */ + +/** + * r600_dma_get_rptr - get the current read pointer + * + * @rdev: radeon_device pointer + * @ring: radeon ring pointer + * + * Get the current rptr from the hardware (r6xx+). + */ +uint32_t r600_dma_get_rptr(struct radeon_device *rdev, + struct radeon_ring *ring) +{ + return (radeon_ring_generic_get_rptr(rdev, ring) & 0x3fffc) >> 2; +} + +/** + * r600_dma_get_wptr - get the current write pointer + * + * @rdev: radeon_device pointer + * @ring: radeon ring pointer + * + * Get the current wptr from the hardware (r6xx+). + */ +uint32_t r600_dma_get_wptr(struct radeon_device *rdev, + struct radeon_ring *ring) +{ + return (RREG32(ring->wptr_reg) & 0x3fffc) >> 2; +} + +/** + * r600_dma_set_wptr - commit the write pointer + * + * @rdev: radeon_device pointer + * @ring: radeon ring pointer + * + * Write the wptr back to the hardware (r6xx+). + */ +void r600_dma_set_wptr(struct radeon_device *rdev, + struct radeon_ring *ring) +{ + WREG32(ring->wptr_reg, (ring->wptr << 2) & 0x3fffc); +} + +/** + * r600_dma_stop - stop the async dma engine + * + * @rdev: radeon_device pointer + * + * Stop the async dma engine (r6xx-evergreen). + */ +void r600_dma_stop(struct radeon_device *rdev) +{ + u32 rb_cntl = RREG32(DMA_RB_CNTL); + + radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); + + rb_cntl &= ~DMA_RB_ENABLE; + WREG32(DMA_RB_CNTL, rb_cntl); + + rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false; +} + +/** + * r600_dma_resume - setup and start the async dma engine + * + * @rdev: radeon_device pointer + * + * Set up the DMA ring buffer and enable it. (r6xx-evergreen). + * Returns 0 for success, error for failure. + */ +int r600_dma_resume(struct radeon_device *rdev) +{ + struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; + u32 rb_cntl, dma_cntl, ib_cntl; + u32 rb_bufsz; + int r; + + /* Reset dma */ + if (rdev->family >= CHIP_RV770) + WREG32(SRBM_SOFT_RESET, RV770_SOFT_RESET_DMA); + else + WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA); + RREG32(SRBM_SOFT_RESET); + udelay(50); + WREG32(SRBM_SOFT_RESET, 0); + + WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL, 0); + WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL, 0); + + /* Set ring buffer size in dwords */ + rb_bufsz = drm_order(ring->ring_size / 4); + rb_cntl = rb_bufsz << 1; +#ifdef __BIG_ENDIAN + rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE; +#endif + WREG32(DMA_RB_CNTL, rb_cntl); + + /* Initialize the ring buffer's read and write pointers */ + WREG32(DMA_RB_RPTR, 0); + WREG32(DMA_RB_WPTR, 0); + + /* set the wb address whether it's enabled or not */ + WREG32(DMA_RB_RPTR_ADDR_HI, + upper_32_bits(rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFF); + WREG32(DMA_RB_RPTR_ADDR_LO, + ((rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFFFFFFFC)); + + if (rdev->wb.enabled) + rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE; + + WREG32(DMA_RB_BASE, ring->gpu_addr >> 8); + + /* enable DMA IBs */ + ib_cntl = DMA_IB_ENABLE; +#ifdef __BIG_ENDIAN + ib_cntl |= DMA_IB_SWAP_ENABLE; +#endif + WREG32(DMA_IB_CNTL, ib_cntl); + + dma_cntl = RREG32(DMA_CNTL); + dma_cntl &= ~CTXEMPTY_INT_ENABLE; + WREG32(DMA_CNTL, dma_cntl); + + if (rdev->family >= CHIP_RV770) + WREG32(DMA_MODE, 1); + + ring->wptr = 0; + WREG32(DMA_RB_WPTR, ring->wptr << 2); + + ring->rptr = RREG32(DMA_RB_RPTR) >> 2; + + WREG32(DMA_RB_CNTL, rb_cntl | DMA_RB_ENABLE); + + ring->ready = true; + + r = radeon_ring_test(rdev, R600_RING_TYPE_DMA_INDEX, ring); + if (r) { + ring->ready = false; + return r; + } + + radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); + + return 0; +} + +/** + * r600_dma_fini - tear down the async dma engine + * + * @rdev: radeon_device pointer + * + * Stop the async dma engine and free the ring (r6xx-evergreen). + */ +void r600_dma_fini(struct radeon_device *rdev) +{ + r600_dma_stop(rdev); + radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]); +} + +/** + * r600_dma_is_lockup - Check if the DMA engine is locked up + * + * @rdev: radeon_device pointer + * @ring: radeon_ring structure holding ring information + * + * Check if the async DMA engine is locked up. + * Returns true if the engine appears to be locked up, false if not. + */ +bool r600_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) +{ + u32 reset_mask = r600_gpu_check_soft_reset(rdev); + + if (!(reset_mask & RADEON_RESET_DMA)) { + radeon_ring_lockup_update(ring); + return false; + } + /* force ring activities */ + radeon_ring_force_activity(rdev, ring); + return radeon_ring_test_lockup(rdev, ring); +} + + +/** + * r600_dma_ring_test - simple async dma engine test + * + * @rdev: radeon_device pointer + * @ring: radeon_ring structure holding ring information + * + * Test the DMA engine by writing using it to write an + * value to memory. (r6xx-SI). + * Returns 0 for success, error for failure. + */ +int r600_dma_ring_test(struct radeon_device *rdev, + struct radeon_ring *ring) +{ + unsigned i; + int r; + void __iomem *ptr = (void *)rdev->vram_scratch.ptr; + u32 tmp; + + if (!ptr) { + DRM_ERROR("invalid vram scratch pointer\n"); + return -EINVAL; + } + + tmp = 0xCAFEDEAD; + writel(tmp, ptr); + + r = radeon_ring_lock(rdev, ring, 4); + if (r) { + DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r); + return r; + } + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); + radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc); + radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff); + radeon_ring_write(ring, 0xDEADBEEF); + radeon_ring_unlock_commit(rdev, ring); + + for (i = 0; i < rdev->usec_timeout; i++) { + tmp = readl(ptr); + if (tmp == 0xDEADBEEF) + break; + DRM_UDELAY(1); + } + + if (i < rdev->usec_timeout) { + DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); + } else { + DRM_ERROR("radeon: ring %d test failed (0x%08X)\n", + ring->idx, tmp); + r = -EINVAL; + } + return r; +} + +/** + * r600_dma_fence_ring_emit - emit a fence on the DMA ring + * + * @rdev: radeon_device pointer + * @fence: radeon fence object + * + * Add a DMA fence packet to the ring to write + * the fence seq number and DMA trap packet to generate + * an interrupt if needed (r6xx-r7xx). + */ +void r600_dma_fence_ring_emit(struct radeon_device *rdev, + struct radeon_fence *fence) +{ + struct radeon_ring *ring = &rdev->ring[fence->ring]; + u64 addr = rdev->fence_drv[fence->ring].gpu_addr; + + /* write the fence */ + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0)); + radeon_ring_write(ring, addr & 0xfffffffc); + radeon_ring_write(ring, (upper_32_bits(addr) & 0xff)); + radeon_ring_write(ring, lower_32_bits(fence->seq)); + /* generate an interrupt */ + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0)); +} + +/** + * r600_dma_semaphore_ring_emit - emit a semaphore on the dma ring + * + * @rdev: radeon_device pointer + * @ring: radeon_ring structure holding ring information + * @semaphore: radeon semaphore object + * @emit_wait: wait or signal semaphore + * + * Add a DMA semaphore packet to the ring wait on or signal + * other rings (r6xx-SI). + */ +void r600_dma_semaphore_ring_emit(struct radeon_device *rdev, + struct radeon_ring *ring, + struct radeon_semaphore *semaphore, + bool emit_wait) +{ + u64 addr = semaphore->gpu_addr; + u32 s = emit_wait ? 0 : 1; + + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SEMAPHORE, 0, s, 0)); + radeon_ring_write(ring, addr & 0xfffffffc); + radeon_ring_write(ring, upper_32_bits(addr) & 0xff); +} + +/** + * r600_dma_ib_test - test an IB on the DMA engine + * + * @rdev: radeon_device pointer + * @ring: radeon_ring structure holding ring information + * + * Test a simple IB in the DMA ring (r6xx-SI). + * Returns 0 on success, error on failure. + */ +int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) +{ + struct radeon_ib ib; + unsigned i; + int r; + void __iomem *ptr = (void *)rdev->vram_scratch.ptr; + u32 tmp = 0; + + if (!ptr) { + DRM_ERROR("invalid vram scratch pointer\n"); + return -EINVAL; + } + + tmp = 0xCAFEDEAD; + writel(tmp, ptr); + + r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256); + if (r) { + DRM_ERROR("radeon: failed to get ib (%d).\n", r); + return r; + } + + ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1); + ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc; + ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff; + ib.ptr[3] = 0xDEADBEEF; + ib.length_dw = 4; + + r = radeon_ib_schedule(rdev, &ib, NULL); + if (r) { + radeon_ib_free(rdev, &ib); + DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); + return r; + } + r = radeon_fence_wait(ib.fence, false); + if (r) { + DRM_ERROR("radeon: fence wait failed (%d).\n", r); + return r; + } + for (i = 0; i < rdev->usec_timeout; i++) { + tmp = readl(ptr); + if (tmp == 0xDEADBEEF) + break; + DRM_UDELAY(1); + } + if (i < rdev->usec_timeout) { + DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i); + } else { + DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp); + r = -EINVAL; + } + radeon_ib_free(rdev, &ib); + return r; +} + +/** + * r600_dma_ring_ib_execute - Schedule an IB on the DMA engine + * + * @rdev: radeon_device pointer + * @ib: IB object to schedule + * + * Schedule an IB in the DMA ring (r6xx-r7xx). + */ +void r600_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) +{ + struct radeon_ring *ring = &rdev->ring[ib->ring]; + + if (rdev->wb.enabled) { + u32 next_rptr = ring->wptr + 4; + while ((next_rptr & 7) != 5) + next_rptr++; + next_rptr += 3; + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); + radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); + radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff); + radeon_ring_write(ring, next_rptr); + } + + /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. + * Pad as necessary with NOPs. + */ + while ((ring->wptr & 7) != 5) + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0, 0)); + radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); + radeon_ring_write(ring, (ib->length_dw << 16) | (upper_32_bits(ib->gpu_addr) & 0xFF)); + +} + +/** + * r600_copy_dma - copy pages using the DMA engine + * + * @rdev: radeon_device pointer + * @src_offset: src GPU address + * @dst_offset: dst GPU address + * @num_gpu_pages: number of GPU pages to xfer + * @fence: radeon fence object + * + * Copy GPU paging using the DMA engine (r6xx). + * Used by the radeon ttm implementation to move pages if + * registered as the asic copy callback. + */ +int r600_copy_dma(struct radeon_device *rdev, + uint64_t src_offset, uint64_t dst_offset, + unsigned num_gpu_pages, + struct radeon_fence **fence) +{ + struct radeon_semaphore *sem = NULL; + int ring_index = rdev->asic->copy.dma_ring_index; + struct radeon_ring *ring = &rdev->ring[ring_index]; + u32 size_in_dw, cur_size_in_dw; + int i, num_loops; + int r = 0; + + r = radeon_semaphore_create(rdev, &sem); + if (r) { + DRM_ERROR("radeon: moving bo (%d).\n", r); + return r; + } + + size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4; + num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFE); + r = radeon_ring_lock(rdev, ring, num_loops * 4 + 8); + if (r) { + DRM_ERROR("radeon: moving bo (%d).\n", r); + radeon_semaphore_free(rdev, &sem, NULL); + return r; + } + + if (radeon_fence_need_sync(*fence, ring->idx)) { + radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, + ring->idx); + radeon_fence_note_sync(*fence, ring->idx); + } else { + radeon_semaphore_free(rdev, &sem, NULL); + } + + for (i = 0; i < num_loops; i++) { + cur_size_in_dw = size_in_dw; + if (cur_size_in_dw > 0xFFFE) + cur_size_in_dw = 0xFFFE; + size_in_dw -= cur_size_in_dw; + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw)); + radeon_ring_write(ring, dst_offset & 0xfffffffc); + radeon_ring_write(ring, src_offset & 0xfffffffc); + radeon_ring_write(ring, (((upper_32_bits(dst_offset) & 0xff) << 16) | + (upper_32_bits(src_offset) & 0xff))); + src_offset += cur_size_in_dw * 4; + dst_offset += cur_size_in_dw * 4; + } + + r = radeon_fence_emit(rdev, fence, ring->idx); + if (r) { + radeon_ring_unlock_undo(rdev, ring); + return r; + } + + radeon_ring_unlock_commit(rdev, ring); + radeon_semaphore_free(rdev, &sem, *fence); + + return r; +} diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index aaab7b1bba27..b811296462a3 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -1650,80 +1650,6 @@ static int rv770_mc_init(struct radeon_device *rdev) return 0; } -/** - * rv770_copy_dma - copy pages using the DMA engine - * - * @rdev: radeon_device pointer - * @src_offset: src GPU address - * @dst_offset: dst GPU address - * @num_gpu_pages: number of GPU pages to xfer - * @fence: radeon fence object - * - * Copy GPU paging using the DMA engine (r7xx). - * Used by the radeon ttm implementation to move pages if - * registered as the asic copy callback. - */ -int rv770_copy_dma(struct radeon_device *rdev, - uint64_t src_offset, uint64_t dst_offset, - unsigned num_gpu_pages, - struct radeon_fence **fence) -{ - struct radeon_semaphore *sem = NULL; - int ring_index = rdev->asic->copy.dma_ring_index; - struct radeon_ring *ring = &rdev->ring[ring_index]; - u32 size_in_dw, cur_size_in_dw; - int i, num_loops; - int r = 0; - - r = radeon_semaphore_create(rdev, &sem); - if (r) { - DRM_ERROR("radeon: moving bo (%d).\n", r); - return r; - } - - size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4; - num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFF); - r = radeon_ring_lock(rdev, ring, num_loops * 5 + 8); - if (r) { - DRM_ERROR("radeon: moving bo (%d).\n", r); - radeon_semaphore_free(rdev, &sem, NULL); - return r; - } - - if (radeon_fence_need_sync(*fence, ring->idx)) { - radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, - ring->idx); - radeon_fence_note_sync(*fence, ring->idx); - } else { - radeon_semaphore_free(rdev, &sem, NULL); - } - - for (i = 0; i < num_loops; i++) { - cur_size_in_dw = size_in_dw; - if (cur_size_in_dw > 0xFFFF) - cur_size_in_dw = 0xFFFF; - size_in_dw -= cur_size_in_dw; - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw)); - radeon_ring_write(ring, dst_offset & 0xfffffffc); - radeon_ring_write(ring, src_offset & 0xfffffffc); - radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff); - radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff); - src_offset += cur_size_in_dw * 4; - dst_offset += cur_size_in_dw * 4; - } - - r = radeon_fence_emit(rdev, fence, ring->idx); - if (r) { - radeon_ring_unlock_undo(rdev, ring); - return r; - } - - radeon_ring_unlock_commit(rdev, ring); - radeon_semaphore_free(rdev, &sem, *fence); - - return r; -} - static int rv770_startup(struct radeon_device *rdev) { struct radeon_ring *ring; diff --git a/drivers/gpu/drm/radeon/rv770_dma.c b/drivers/gpu/drm/radeon/rv770_dma.c new file mode 100644 index 000000000000..f9b02e3d6830 --- /dev/null +++ b/drivers/gpu/drm/radeon/rv770_dma.c @@ -0,0 +1,101 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Alex Deucher + */ +#include +#include "radeon.h" +#include "radeon_asic.h" +#include "rv770d.h" + +/** + * rv770_copy_dma - copy pages using the DMA engine + * + * @rdev: radeon_device pointer + * @src_offset: src GPU address + * @dst_offset: dst GPU address + * @num_gpu_pages: number of GPU pages to xfer + * @fence: radeon fence object + * + * Copy GPU paging using the DMA engine (r7xx). + * Used by the radeon ttm implementation to move pages if + * registered as the asic copy callback. + */ +int rv770_copy_dma(struct radeon_device *rdev, + uint64_t src_offset, uint64_t dst_offset, + unsigned num_gpu_pages, + struct radeon_fence **fence) +{ + struct radeon_semaphore *sem = NULL; + int ring_index = rdev->asic->copy.dma_ring_index; + struct radeon_ring *ring = &rdev->ring[ring_index]; + u32 size_in_dw, cur_size_in_dw; + int i, num_loops; + int r = 0; + + r = radeon_semaphore_create(rdev, &sem); + if (r) { + DRM_ERROR("radeon: moving bo (%d).\n", r); + return r; + } + + size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4; + num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFF); + r = radeon_ring_lock(rdev, ring, num_loops * 5 + 8); + if (r) { + DRM_ERROR("radeon: moving bo (%d).\n", r); + radeon_semaphore_free(rdev, &sem, NULL); + return r; + } + + if (radeon_fence_need_sync(*fence, ring->idx)) { + radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, + ring->idx); + radeon_fence_note_sync(*fence, ring->idx); + } else { + radeon_semaphore_free(rdev, &sem, NULL); + } + + for (i = 0; i < num_loops; i++) { + cur_size_in_dw = size_in_dw; + if (cur_size_in_dw > 0xFFFF) + cur_size_in_dw = 0xFFFF; + size_in_dw -= cur_size_in_dw; + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw)); + radeon_ring_write(ring, dst_offset & 0xfffffffc); + radeon_ring_write(ring, src_offset & 0xfffffffc); + radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff); + radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff); + src_offset += cur_size_in_dw * 4; + dst_offset += cur_size_in_dw * 4; + } + + r = radeon_fence_emit(rdev, fence, ring->idx); + if (r) { + radeon_ring_unlock_undo(rdev, ring); + return r; + } + + radeon_ring_unlock_commit(rdev, ring); + radeon_semaphore_free(rdev, &sem, *fence); + + return r; +} diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index f3f79089405e..f5307e6bb92b 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -78,6 +78,11 @@ extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_ extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev); extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev); extern bool evergreen_is_display_hung(struct radeon_device *rdev); +extern void si_dma_vm_set_page(struct radeon_device *rdev, + struct radeon_ib *ib, + uint64_t pe, + uint64_t addr, unsigned count, + uint32_t incr, uint32_t flags); static const u32 verde_rlc_save_restore_register_list[] = { @@ -3495,7 +3500,7 @@ static int si_cp_resume(struct radeon_device *rdev) return 0; } -static u32 si_gpu_check_soft_reset(struct radeon_device *rdev) +u32 si_gpu_check_soft_reset(struct radeon_device *rdev) { u32 reset_mask = 0; u32 tmp; @@ -3744,34 +3749,6 @@ bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) return radeon_ring_test_lockup(rdev, ring); } -/** - * si_dma_is_lockup - Check if the DMA engine is locked up - * - * @rdev: radeon_device pointer - * @ring: radeon_ring structure holding ring information - * - * Check if the async DMA engine is locked up. - * Returns true if the engine appears to be locked up, false if not. - */ -bool si_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) -{ - u32 reset_mask = si_gpu_check_soft_reset(rdev); - u32 mask; - - if (ring->idx == R600_RING_TYPE_DMA_INDEX) - mask = RADEON_RESET_DMA; - else - mask = RADEON_RESET_DMA1; - - if (!(reset_mask & mask)) { - radeon_ring_lockup_update(ring); - return false; - } - /* force ring activities */ - radeon_ring_force_activity(rdev, ring); - return radeon_ring_test_lockup(rdev, ring); -} - /* MC */ static void si_mc_program(struct radeon_device *rdev) { @@ -4710,58 +4687,7 @@ void si_vm_set_page(struct radeon_device *rdev, } } else { /* DMA */ - if (flags & RADEON_VM_PAGE_SYSTEM) { - while (count) { - ndw = count * 2; - if (ndw > 0xFFFFE) - ndw = 0xFFFFE; - - /* for non-physically contiguous pages (system) */ - ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw); - ib->ptr[ib->length_dw++] = pe; - ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; - for (; ndw > 0; ndw -= 2, --count, pe += 8) { - if (flags & RADEON_VM_PAGE_SYSTEM) { - value = radeon_vm_map_gart(rdev, addr); - value &= 0xFFFFFFFFFFFFF000ULL; - } else if (flags & RADEON_VM_PAGE_VALID) { - value = addr; - } else { - value = 0; - } - addr += incr; - value |= r600_flags; - ib->ptr[ib->length_dw++] = value; - ib->ptr[ib->length_dw++] = upper_32_bits(value); - } - } - } else { - while (count) { - ndw = count * 2; - if (ndw > 0xFFFFE) - ndw = 0xFFFFE; - - if (flags & RADEON_VM_PAGE_VALID) - value = addr; - else - value = 0; - /* for physically contiguous pages (vram) */ - ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw); - ib->ptr[ib->length_dw++] = pe; /* dst addr */ - ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; - ib->ptr[ib->length_dw++] = r600_flags; /* mask */ - ib->ptr[ib->length_dw++] = 0; - ib->ptr[ib->length_dw++] = value; /* value */ - ib->ptr[ib->length_dw++] = upper_32_bits(value); - ib->ptr[ib->length_dw++] = incr; /* increment size */ - ib->ptr[ib->length_dw++] = 0; - pe += ndw * 4; - addr += (ndw / 2) * incr; - count -= ndw / 2; - } - } - while (ib->length_dw & 0x7) - ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0); + si_dma_vm_set_page(rdev, ib, pe, addr, count, incr, flags); } } @@ -4808,32 +4734,6 @@ void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) radeon_ring_write(ring, 0x0); } -void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) -{ - struct radeon_ring *ring = &rdev->ring[ridx]; - - if (vm == NULL) - return; - - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); - if (vm->id < 8) { - radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2)); - } else { - radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2)); - } - radeon_ring_write(ring, vm->pd_gpu_addr >> 12); - - /* flush hdp cache */ - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); - radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2)); - radeon_ring_write(ring, 1); - - /* bits 0-7 are the VM contexts0-7 */ - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); - radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2)); - radeon_ring_write(ring, 1 << vm->id); -} - /* * Power and clock gating */ @@ -6177,80 +6077,6 @@ restart_ih: return IRQ_HANDLED; } -/** - * si_copy_dma - copy pages using the DMA engine - * - * @rdev: radeon_device pointer - * @src_offset: src GPU address - * @dst_offset: dst GPU address - * @num_gpu_pages: number of GPU pages to xfer - * @fence: radeon fence object - * - * Copy GPU paging using the DMA engine (SI). - * Used by the radeon ttm implementation to move pages if - * registered as the asic copy callback. - */ -int si_copy_dma(struct radeon_device *rdev, - uint64_t src_offset, uint64_t dst_offset, - unsigned num_gpu_pages, - struct radeon_fence **fence) -{ - struct radeon_semaphore *sem = NULL; - int ring_index = rdev->asic->copy.dma_ring_index; - struct radeon_ring *ring = &rdev->ring[ring_index]; - u32 size_in_bytes, cur_size_in_bytes; - int i, num_loops; - int r = 0; - - r = radeon_semaphore_create(rdev, &sem); - if (r) { - DRM_ERROR("radeon: moving bo (%d).\n", r); - return r; - } - - size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT); - num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff); - r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11); - if (r) { - DRM_ERROR("radeon: moving bo (%d).\n", r); - radeon_semaphore_free(rdev, &sem, NULL); - return r; - } - - if (radeon_fence_need_sync(*fence, ring->idx)) { - radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, - ring->idx); - radeon_fence_note_sync(*fence, ring->idx); - } else { - radeon_semaphore_free(rdev, &sem, NULL); - } - - for (i = 0; i < num_loops; i++) { - cur_size_in_bytes = size_in_bytes; - if (cur_size_in_bytes > 0xFFFFF) - cur_size_in_bytes = 0xFFFFF; - size_in_bytes -= cur_size_in_bytes; - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes)); - radeon_ring_write(ring, dst_offset & 0xffffffff); - radeon_ring_write(ring, src_offset & 0xffffffff); - radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff); - radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff); - src_offset += cur_size_in_bytes; - dst_offset += cur_size_in_bytes; - } - - r = radeon_fence_emit(rdev, fence, ring->idx); - if (r) { - radeon_ring_unlock_undo(rdev, ring); - return r; - } - - radeon_ring_unlock_commit(rdev, ring); - radeon_semaphore_free(rdev, &sem, *fence); - - return r; -} - /* * startup/shutdown callbacks */ diff --git a/drivers/gpu/drm/radeon/si_dma.c b/drivers/gpu/drm/radeon/si_dma.c new file mode 100644 index 000000000000..49909d23dfce --- /dev/null +++ b/drivers/gpu/drm/radeon/si_dma.c @@ -0,0 +1,235 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Alex Deucher + */ +#include +#include "radeon.h" +#include "radeon_asic.h" +#include "sid.h" + +u32 si_gpu_check_soft_reset(struct radeon_device *rdev); + +/** + * si_dma_is_lockup - Check if the DMA engine is locked up + * + * @rdev: radeon_device pointer + * @ring: radeon_ring structure holding ring information + * + * Check if the async DMA engine is locked up. + * Returns true if the engine appears to be locked up, false if not. + */ +bool si_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) +{ + u32 reset_mask = si_gpu_check_soft_reset(rdev); + u32 mask; + + if (ring->idx == R600_RING_TYPE_DMA_INDEX) + mask = RADEON_RESET_DMA; + else + mask = RADEON_RESET_DMA1; + + if (!(reset_mask & mask)) { + radeon_ring_lockup_update(ring); + return false; + } + /* force ring activities */ + radeon_ring_force_activity(rdev, ring); + return radeon_ring_test_lockup(rdev, ring); +} + +/** + * si_dma_vm_set_page - update the page tables using the DMA + * + * @rdev: radeon_device pointer + * @ib: indirect buffer to fill with commands + * @pe: addr of the page entry + * @addr: dst addr to write into pe + * @count: number of page entries to update + * @incr: increase next addr by incr bytes + * @flags: access flags + * + * Update the page tables using the DMA (SI). + */ +void si_dma_vm_set_page(struct radeon_device *rdev, + struct radeon_ib *ib, + uint64_t pe, + uint64_t addr, unsigned count, + uint32_t incr, uint32_t flags) +{ + uint32_t r600_flags = cayman_vm_page_flags(rdev, flags); + uint64_t value; + unsigned ndw; + + if (flags & RADEON_VM_PAGE_SYSTEM) { + while (count) { + ndw = count * 2; + if (ndw > 0xFFFFE) + ndw = 0xFFFFE; + + /* for non-physically contiguous pages (system) */ + ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw); + ib->ptr[ib->length_dw++] = pe; + ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; + for (; ndw > 0; ndw -= 2, --count, pe += 8) { + if (flags & RADEON_VM_PAGE_SYSTEM) { + value = radeon_vm_map_gart(rdev, addr); + value &= 0xFFFFFFFFFFFFF000ULL; + } else if (flags & RADEON_VM_PAGE_VALID) { + value = addr; + } else { + value = 0; + } + addr += incr; + value |= r600_flags; + ib->ptr[ib->length_dw++] = value; + ib->ptr[ib->length_dw++] = upper_32_bits(value); + } + } + } else { + while (count) { + ndw = count * 2; + if (ndw > 0xFFFFE) + ndw = 0xFFFFE; + + if (flags & RADEON_VM_PAGE_VALID) + value = addr; + else + value = 0; + /* for physically contiguous pages (vram) */ + ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw); + ib->ptr[ib->length_dw++] = pe; /* dst addr */ + ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; + ib->ptr[ib->length_dw++] = r600_flags; /* mask */ + ib->ptr[ib->length_dw++] = 0; + ib->ptr[ib->length_dw++] = value; /* value */ + ib->ptr[ib->length_dw++] = upper_32_bits(value); + ib->ptr[ib->length_dw++] = incr; /* increment size */ + ib->ptr[ib->length_dw++] = 0; + pe += ndw * 4; + addr += (ndw / 2) * incr; + count -= ndw / 2; + } + } + while (ib->length_dw & 0x7) + ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0); +} + +void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) +{ + struct radeon_ring *ring = &rdev->ring[ridx]; + + if (vm == NULL) + return; + + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); + if (vm->id < 8) { + radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2)); + } else { + radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2)); + } + radeon_ring_write(ring, vm->pd_gpu_addr >> 12); + + /* flush hdp cache */ + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); + radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2)); + radeon_ring_write(ring, 1); + + /* bits 0-7 are the VM contexts0-7 */ + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); + radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2)); + radeon_ring_write(ring, 1 << vm->id); +} + +/** + * si_copy_dma - copy pages using the DMA engine + * + * @rdev: radeon_device pointer + * @src_offset: src GPU address + * @dst_offset: dst GPU address + * @num_gpu_pages: number of GPU pages to xfer + * @fence: radeon fence object + * + * Copy GPU paging using the DMA engine (SI). + * Used by the radeon ttm implementation to move pages if + * registered as the asic copy callback. + */ +int si_copy_dma(struct radeon_device *rdev, + uint64_t src_offset, uint64_t dst_offset, + unsigned num_gpu_pages, + struct radeon_fence **fence) +{ + struct radeon_semaphore *sem = NULL; + int ring_index = rdev->asic->copy.dma_ring_index; + struct radeon_ring *ring = &rdev->ring[ring_index]; + u32 size_in_bytes, cur_size_in_bytes; + int i, num_loops; + int r = 0; + + r = radeon_semaphore_create(rdev, &sem); + if (r) { + DRM_ERROR("radeon: moving bo (%d).\n", r); + return r; + } + + size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT); + num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff); + r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11); + if (r) { + DRM_ERROR("radeon: moving bo (%d).\n", r); + radeon_semaphore_free(rdev, &sem, NULL); + return r; + } + + if (radeon_fence_need_sync(*fence, ring->idx)) { + radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, + ring->idx); + radeon_fence_note_sync(*fence, ring->idx); + } else { + radeon_semaphore_free(rdev, &sem, NULL); + } + + for (i = 0; i < num_loops; i++) { + cur_size_in_bytes = size_in_bytes; + if (cur_size_in_bytes > 0xFFFFF) + cur_size_in_bytes = 0xFFFFF; + size_in_bytes -= cur_size_in_bytes; + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes)); + radeon_ring_write(ring, dst_offset & 0xffffffff); + radeon_ring_write(ring, src_offset & 0xffffffff); + radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff); + radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff); + src_offset += cur_size_in_bytes; + dst_offset += cur_size_in_bytes; + } + + r = radeon_fence_emit(rdev, fence, ring->idx); + if (r) { + radeon_ring_unlock_undo(rdev, ring); + return r; + } + + radeon_ring_unlock_commit(rdev, ring); + radeon_semaphore_free(rdev, &sem, *fence); + + return r; +} + -- cgit v1.2.3 From b530602fd4625f763344e455902981b22f85f609 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 31 Jul 2013 16:51:33 -0400 Subject: drm/radeon: add audio support for DCE6/8 GPUs (v12) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Similar to DCE4/5, but supports multiple audio pins which can be assigned per afmt block. v2: rework the driver to handle more than one audio pin. v3: try different dto reg v4: properly program dto v5 (ck): change dto programming order v6: program speaker allocation block v7: rebase v8: rebase on Rafał's changes v9: integrated Rafał's comments, update to latest drm_edid_to_speaker_allocation API v10: add missing line break in error message v11: add back audio enabled messages v12: fix copy paste typo in r600_audio_enable Signed-off-by: Alex Deucher Signed-off-by: Christian König Acked-by: Rafał Miłecki --- drivers/gpu/drm/radeon/Makefile | 2 +- drivers/gpu/drm/radeon/atombios_encoders.c | 11 +- drivers/gpu/drm/radeon/cik.c | 5 + drivers/gpu/drm/radeon/dce6_afmt.c | 251 +++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/evergreen_hdmi.c | 54 +++++-- drivers/gpu/drm/radeon/ni.c | 17 +- drivers/gpu/drm/radeon/r600_audio.c | 60 ++++--- drivers/gpu/drm/radeon/r600_hdmi.c | 7 +- drivers/gpu/drm/radeon/radeon.h | 18 ++- drivers/gpu/drm/radeon/radeon_asic.c | 8 + drivers/gpu/drm/radeon/radeon_asic.h | 4 +- drivers/gpu/drm/radeon/radeon_display.c | 13 +- drivers/gpu/drm/radeon/radeon_mode.h | 3 +- drivers/gpu/drm/radeon/si.c | 5 + drivers/gpu/drm/radeon/sid.h | 59 +++++++ 15 files changed, 455 insertions(+), 62 deletions(-) create mode 100644 drivers/gpu/drm/radeon/dce6_afmt.c (limited to 'drivers/gpu/drm/radeon/cik.c') diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile index da2a8e9e9308..306364a1ecda 100644 --- a/drivers/gpu/drm/radeon/Makefile +++ b/drivers/gpu/drm/radeon/Makefile @@ -80,7 +80,7 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \ r600_dpm.o rs780_dpm.o rv6xx_dpm.o rv770_dpm.o rv730_dpm.o rv740_dpm.o \ rv770_smc.o cypress_dpm.o btc_dpm.o sumo_dpm.o sumo_smc.o trinity_dpm.o \ trinity_smc.o ni_dpm.o si_smc.o si_dpm.o kv_smc.o kv_dpm.o ci_smc.o \ - ci_dpm.o + ci_dpm.o dce6_afmt.o # add async DMA block radeon-y += \ diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c index 092275d53d4a..dfac7965ea28 100644 --- a/drivers/gpu/drm/radeon/atombios_encoders.c +++ b/drivers/gpu/drm/radeon/atombios_encoders.c @@ -682,8 +682,6 @@ atombios_digital_setup(struct drm_encoder *encoder, int action) int atombios_get_encoder_mode(struct drm_encoder *encoder) { - struct drm_device *dev = encoder->dev; - struct radeon_device *rdev = dev->dev_private; struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct drm_connector *connector; struct radeon_connector *radeon_connector; @@ -710,8 +708,7 @@ atombios_get_encoder_mode(struct drm_encoder *encoder) case DRM_MODE_CONNECTOR_DVII: case DRM_MODE_CONNECTOR_HDMIB: /* HDMI-B is basically DL-DVI; analog works fine */ if (drm_detect_hdmi_monitor(radeon_connector->edid) && - radeon_audio && - !ASIC_IS_DCE6(rdev)) /* remove once we support DCE6 */ + radeon_audio) return ATOM_ENCODER_MODE_HDMI; else if (radeon_connector->use_digital) return ATOM_ENCODER_MODE_DVI; @@ -722,8 +719,7 @@ atombios_get_encoder_mode(struct drm_encoder *encoder) case DRM_MODE_CONNECTOR_HDMIA: default: if (drm_detect_hdmi_monitor(radeon_connector->edid) && - radeon_audio && - !ASIC_IS_DCE6(rdev)) /* remove once we support DCE6 */ + radeon_audio) return ATOM_ENCODER_MODE_HDMI; else return ATOM_ENCODER_MODE_DVI; @@ -737,8 +733,7 @@ atombios_get_encoder_mode(struct drm_encoder *encoder) (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_eDP)) return ATOM_ENCODER_MODE_DP; else if (drm_detect_hdmi_monitor(radeon_connector->edid) && - radeon_audio && - !ASIC_IS_DCE6(rdev)) /* remove once we support DCE6 */ + radeon_audio) return ATOM_ENCODER_MODE_HDMI; else return ATOM_ENCODER_MODE_DVI; diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 692e31b95d34..2b6049d55233 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -7004,6 +7004,10 @@ static int cik_startup(struct radeon_device *rdev) return r; } + r = dce6_audio_init(rdev); + if (r) + return r; + return 0; } @@ -7049,6 +7053,7 @@ int cik_resume(struct radeon_device *rdev) */ int cik_suspend(struct radeon_device *rdev) { + dce6_audio_fini(rdev); radeon_vm_manager_fini(rdev); cik_cp_enable(rdev, false); cik_sdma_enable(rdev, false); diff --git a/drivers/gpu/drm/radeon/dce6_afmt.c b/drivers/gpu/drm/radeon/dce6_afmt.c new file mode 100644 index 000000000000..0d9a6a21088c --- /dev/null +++ b/drivers/gpu/drm/radeon/dce6_afmt.c @@ -0,0 +1,251 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include +#include +#include "radeon.h" +#include "sid.h" + +static u32 dce6_endpoint_rreg(struct radeon_device *rdev, + u32 block_offset, u32 reg) +{ + u32 r; + + WREG32(AZ_F0_CODEC_ENDPOINT_INDEX + block_offset, reg); + r = RREG32(AZ_F0_CODEC_ENDPOINT_DATA + block_offset); + return r; +} + +static void dce6_endpoint_wreg(struct radeon_device *rdev, + u32 block_offset, u32 reg, u32 v) +{ + if (ASIC_IS_DCE8(rdev)) + WREG32(AZ_F0_CODEC_ENDPOINT_INDEX + block_offset, reg); + else + WREG32(AZ_F0_CODEC_ENDPOINT_INDEX + block_offset, + AZ_ENDPOINT_REG_WRITE_EN | AZ_ENDPOINT_REG_INDEX(reg)); + WREG32(AZ_F0_CODEC_ENDPOINT_DATA + block_offset, v); +} + +#define RREG32_ENDPOINT(block, reg) dce6_endpoint_rreg(rdev, (block), (reg)) +#define WREG32_ENDPOINT(block, reg, v) dce6_endpoint_wreg(rdev, (block), (reg), (v)) + + +static void dce6_afmt_get_connected_pins(struct radeon_device *rdev) +{ + int i; + u32 offset, tmp; + + for (i = 0; i < rdev->audio.num_pins; i++) { + offset = rdev->audio.pin[i].offset; + tmp = RREG32_ENDPOINT(offset, + AZ_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT); + if (((tmp & PORT_CONNECTIVITY_MASK) >> PORT_CONNECTIVITY_SHIFT) == 1) + rdev->audio.pin[i].connected = false; + else + rdev->audio.pin[i].connected = true; + } +} + +struct r600_audio_pin *dce6_audio_get_pin(struct radeon_device *rdev) +{ + int i; + + dce6_afmt_get_connected_pins(rdev); + + for (i = 0; i < rdev->audio.num_pins; i++) { + if (rdev->audio.pin[i].connected) + return &rdev->audio.pin[i]; + } + DRM_ERROR("No connected audio pins found!\n"); + return NULL; +} + +void dce6_afmt_select_pin(struct drm_encoder *encoder) +{ + struct radeon_device *rdev = encoder->dev->dev_private; + struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); + struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; + u32 offset = dig->afmt->offset; + u32 id = dig->afmt->pin->id; + + if (!dig->afmt->pin) + return; + + WREG32(AFMT_AUDIO_SRC_CONTROL + offset, AFMT_AUDIO_SRC_SELECT(id)); +} + +void dce6_afmt_write_sad_regs(struct drm_encoder *encoder) +{ + struct radeon_device *rdev = encoder->dev->dev_private; + struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); + struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; + u32 offset, tmp; + struct drm_connector *connector; + struct radeon_connector *radeon_connector = NULL; + struct cea_sad *sads; + int i, sad_count, sadb_count; + u8 *sadb; + + static const u16 eld_reg_to_type[][2] = { + { AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, HDMI_AUDIO_CODING_TYPE_PCM }, + { AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR1, HDMI_AUDIO_CODING_TYPE_AC3 }, + { AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR2, HDMI_AUDIO_CODING_TYPE_MPEG1 }, + { AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR3, HDMI_AUDIO_CODING_TYPE_MP3 }, + { AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR4, HDMI_AUDIO_CODING_TYPE_MPEG2 }, + { AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR5, HDMI_AUDIO_CODING_TYPE_AAC_LC }, + { AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR6, HDMI_AUDIO_CODING_TYPE_DTS }, + { AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR7, HDMI_AUDIO_CODING_TYPE_ATRAC }, + { AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR9, HDMI_AUDIO_CODING_TYPE_EAC3 }, + { AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR10, HDMI_AUDIO_CODING_TYPE_DTS_HD }, + { AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR11, HDMI_AUDIO_CODING_TYPE_MLP }, + { AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13, HDMI_AUDIO_CODING_TYPE_WMA_PRO }, + }; + + if (!dig->afmt->pin) + return; + + offset = dig->afmt->pin->offset; + + list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + if (connector->encoder == encoder) + radeon_connector = to_radeon_connector(connector); + } + + if (!radeon_connector) { + DRM_ERROR("Couldn't find encoder's connector\n"); + return; + } + + sad_count = drm_edid_to_sad(radeon_connector->edid, &sads); + if (sad_count < 0) { + DRM_ERROR("Couldn't read SADs: %d\n", sad_count); + return; + } + BUG_ON(!sads); + + sadb_count = drm_edid_to_speaker_allocation(radeon_connector->edid, &sadb); + if (sadb_count < 0) { + DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sadb_count); + return; + } + + /* program the speaker allocation */ + tmp = RREG32_ENDPOINT(offset, AZ_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER); + tmp &= ~(DP_CONNECTION | SPEAKER_ALLOCATION_MASK); + /* set HDMI mode */ + tmp |= HDMI_CONNECTION; + if (sadb_count) + tmp |= SPEAKER_ALLOCATION(sadb[0]); + else + tmp |= SPEAKER_ALLOCATION(5); /* stereo */ + WREG32_ENDPOINT(offset, AZ_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, tmp); + + for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) { + u32 value = 0; + int j; + + for (j = 0; j < sad_count; j++) { + struct cea_sad *sad = &sads[j]; + + if (sad->format == eld_reg_to_type[i][1]) { + value = MAX_CHANNELS(sad->channels) | + DESCRIPTOR_BYTE_2(sad->byte2) | + SUPPORTED_FREQUENCIES(sad->freq); + if (sad->format == HDMI_AUDIO_CODING_TYPE_PCM) + value |= SUPPORTED_FREQUENCIES_STEREO(sad->freq); + break; + } + } + WREG32_ENDPOINT(offset, eld_reg_to_type[i][0], value); + } + + kfree(sads); + kfree(sadb); +} + +static int dce6_audio_chipset_supported(struct radeon_device *rdev) +{ + return !ASIC_IS_NODCE(rdev); +} + +static void dce6_audio_enable(struct radeon_device *rdev, + struct r600_audio_pin *pin, + bool enable) +{ + WREG32_ENDPOINT(pin->offset, AZ_F0_CODEC_PIN_CONTROL_HOTPLUG_CONTROL, + AUDIO_ENABLED); + DRM_INFO("%s audio %d support\n", enable ? "Enabling" : "Disabling", pin->id); +} + +static const u32 pin_offsets[7] = +{ + (0x5e00 - 0x5e00), + (0x5e18 - 0x5e00), + (0x5e30 - 0x5e00), + (0x5e48 - 0x5e00), + (0x5e60 - 0x5e00), + (0x5e78 - 0x5e00), + (0x5e90 - 0x5e00), +}; + +int dce6_audio_init(struct radeon_device *rdev) +{ + int i; + + if (!radeon_audio || !dce6_audio_chipset_supported(rdev)) + return 0; + + rdev->audio.enabled = true; + + if (ASIC_IS_DCE8(rdev)) + rdev->audio.num_pins = 7; + else + rdev->audio.num_pins = 6; + + for (i = 0; i < rdev->audio.num_pins; i++) { + rdev->audio.pin[i].channels = -1; + rdev->audio.pin[i].rate = -1; + rdev->audio.pin[i].bits_per_sample = -1; + rdev->audio.pin[i].status_bits = 0; + rdev->audio.pin[i].category_code = 0; + rdev->audio.pin[i].connected = false; + rdev->audio.pin[i].offset = pin_offsets[i]; + rdev->audio.pin[i].id = i; + dce6_audio_enable(rdev, &rdev->audio.pin[i], true); + } + + return 0; +} + +void dce6_audio_fini(struct radeon_device *rdev) +{ + int i; + + if (!rdev->audio.enabled) + return; + + for (i = 0; i < rdev->audio.num_pins; i++) + dce6_audio_enable(rdev, &rdev->audio.pin[i], false); + + rdev->audio.enabled = false; +} diff --git a/drivers/gpu/drm/radeon/evergreen_hdmi.c b/drivers/gpu/drm/radeon/evergreen_hdmi.c index b0e280058b9b..c5acdf0a301a 100644 --- a/drivers/gpu/drm/radeon/evergreen_hdmi.c +++ b/drivers/gpu/drm/radeon/evergreen_hdmi.c @@ -32,6 +32,9 @@ #include "evergreend.h" #include "atom.h" +extern void dce6_afmt_write_sad_regs(struct drm_encoder *encoder); +extern void dce6_afmt_select_pin(struct drm_encoder *encoder); + /* * update the N and CTS parameters for a given pixel clock rate */ @@ -157,22 +160,26 @@ static void evergreen_audio_set_dto(struct drm_encoder *encoder, u32 clock) if (!dig || !dig->afmt) return; - if (max_ratio >= 8) { - dto_phase = 192 * 1000; - wallclock_ratio = 3; - } else if (max_ratio >= 4) { - dto_phase = 96 * 1000; - wallclock_ratio = 2; - } else if (max_ratio >= 2) { - dto_phase = 48 * 1000; - wallclock_ratio = 1; - } else { + if (ASIC_IS_DCE6(rdev)) { dto_phase = 24 * 1000; - wallclock_ratio = 0; + } else { + if (max_ratio >= 8) { + dto_phase = 192 * 1000; + wallclock_ratio = 3; + } else if (max_ratio >= 4) { + dto_phase = 96 * 1000; + wallclock_ratio = 2; + } else if (max_ratio >= 2) { + dto_phase = 48 * 1000; + wallclock_ratio = 1; + } else { + dto_phase = 24 * 1000; + wallclock_ratio = 0; + } + dto_cntl = RREG32(DCCG_AUDIO_DTO0_CNTL) & ~DCCG_AUDIO_DTO_WALLCLOCK_RATIO_MASK; + dto_cntl |= DCCG_AUDIO_DTO_WALLCLOCK_RATIO(wallclock_ratio); + WREG32(DCCG_AUDIO_DTO0_CNTL, dto_cntl); } - dto_cntl = RREG32(DCCG_AUDIO_DTO0_CNTL) & ~DCCG_AUDIO_DTO_WALLCLOCK_RATIO_MASK; - dto_cntl |= DCCG_AUDIO_DTO_WALLCLOCK_RATIO(wallclock_ratio); - WREG32(DCCG_AUDIO_DTO0_CNTL, dto_cntl); /* XXX two dtos; generally use dto0 for hdmi */ /* Express [24MHz / target pixel clock] as an exact rational @@ -266,7 +273,13 @@ void evergreen_hdmi_setmode(struct drm_encoder *encoder, struct drm_display_mode AFMT_AUDIO_CHANNEL_ENABLE(0xff)); /* fglrx sets 0x40 in 0x5f80 here */ - evergreen_hdmi_write_sad_regs(encoder); + + if (ASIC_IS_DCE6(rdev)) { + dce6_afmt_select_pin(encoder); + dce6_afmt_write_sad_regs(encoder); + } else { + evergreen_hdmi_write_sad_regs(encoder); + } err = drm_hdmi_avi_infoframe_from_display_mode(&frame, mode); if (err < 0) { @@ -302,6 +315,8 @@ void evergreen_hdmi_setmode(struct drm_encoder *encoder, struct drm_display_mode void evergreen_hdmi_enable(struct drm_encoder *encoder, bool enable) { + struct drm_device *dev = encoder->dev; + struct radeon_device *rdev = dev->dev_private; struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; @@ -314,6 +329,15 @@ void evergreen_hdmi_enable(struct drm_encoder *encoder, bool enable) if (!enable && !dig->afmt->enabled) return; + if (enable) { + if (ASIC_IS_DCE6(rdev)) + dig->afmt->pin = dce6_audio_get_pin(rdev); + else + dig->afmt->pin = r600_audio_get_pin(rdev); + } else { + dig->afmt->pin = NULL; + } + dig->afmt->enabled = enable; DRM_DEBUG("%sabling HDMI interface @ 0x%04X for encoder 0x%x\n", diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 2db8ce0023ac..69499fff06b0 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -2027,9 +2027,15 @@ static int cayman_startup(struct radeon_device *rdev) return r; } - r = r600_audio_init(rdev); - if (r) - return r; + if (ASIC_IS_DCE6(rdev)) { + r = dce6_audio_init(rdev); + if (r) + return r; + } else { + r = r600_audio_init(rdev); + if (r) + return r; + } return 0; } @@ -2060,7 +2066,10 @@ int cayman_resume(struct radeon_device *rdev) int cayman_suspend(struct radeon_device *rdev) { - r600_audio_fini(rdev); + if (ASIC_IS_DCE6(rdev)) + dce6_audio_fini(rdev); + else + r600_audio_fini(rdev); radeon_vm_manager_fini(rdev); cayman_cp_enable(rdev, false); cayman_dma_stop(rdev); diff --git a/drivers/gpu/drm/radeon/r600_audio.c b/drivers/gpu/drm/radeon/r600_audio.c index c92eb86a8e55..47fc2b886979 100644 --- a/drivers/gpu/drm/radeon/r600_audio.c +++ b/drivers/gpu/drm/radeon/r600_audio.c @@ -57,12 +57,12 @@ static bool radeon_dig_encoder(struct drm_encoder *encoder) */ static int r600_audio_chipset_supported(struct radeon_device *rdev) { - return ASIC_IS_DCE2(rdev) && !ASIC_IS_DCE6(rdev); + return ASIC_IS_DCE2(rdev) && !ASIC_IS_NODCE(rdev); } -struct r600_audio r600_audio_status(struct radeon_device *rdev) +struct r600_audio_pin r600_audio_status(struct radeon_device *rdev) { - struct r600_audio status; + struct r600_audio_pin status; uint32_t value; value = RREG32(R600_AUDIO_RATE_BPS_CHANNEL); @@ -120,16 +120,16 @@ void r600_audio_update_hdmi(struct work_struct *work) struct radeon_device *rdev = container_of(work, struct radeon_device, audio_work); struct drm_device *dev = rdev->ddev; - struct r600_audio audio_status = r600_audio_status(rdev); + struct r600_audio_pin audio_status = r600_audio_status(rdev); struct drm_encoder *encoder; bool changed = false; - if (rdev->audio_status.channels != audio_status.channels || - rdev->audio_status.rate != audio_status.rate || - rdev->audio_status.bits_per_sample != audio_status.bits_per_sample || - rdev->audio_status.status_bits != audio_status.status_bits || - rdev->audio_status.category_code != audio_status.category_code) { - rdev->audio_status = audio_status; + if (rdev->audio.pin[0].channels != audio_status.channels || + rdev->audio.pin[0].rate != audio_status.rate || + rdev->audio.pin[0].bits_per_sample != audio_status.bits_per_sample || + rdev->audio.pin[0].status_bits != audio_status.status_bits || + rdev->audio.pin[0].category_code != audio_status.category_code) { + rdev->audio.pin[0] = audio_status; changed = true; } @@ -141,13 +141,13 @@ void r600_audio_update_hdmi(struct work_struct *work) } } -/* - * turn on/off audio engine - */ -static void r600_audio_engine_enable(struct radeon_device *rdev, bool enable) +/* enable the audio stream */ +static void r600_audio_enable(struct radeon_device *rdev, + struct r600_audio_pin *pin, + bool enable) { u32 value = 0; - DRM_INFO("%s audio support\n", enable ? "Enabling" : "Disabling"); + if (ASIC_IS_DCE4(rdev)) { if (enable) { value |= 0x81000000; /* Required to enable audio */ @@ -158,7 +158,7 @@ static void r600_audio_engine_enable(struct radeon_device *rdev, bool enable) WREG32_P(R600_AUDIO_ENABLE, enable ? 0x81000000 : 0x0, ~0x81000000); } - rdev->audio_enabled = enable; + DRM_INFO("%s audio %d support\n", enable ? "Enabling" : "Disabling", pin->id); } /* @@ -169,13 +169,17 @@ int r600_audio_init(struct radeon_device *rdev) if (!radeon_audio || !r600_audio_chipset_supported(rdev)) return 0; - r600_audio_engine_enable(rdev, true); + rdev->audio.enabled = true; + + rdev->audio.num_pins = 1; + rdev->audio.pin[0].channels = -1; + rdev->audio.pin[0].rate = -1; + rdev->audio.pin[0].bits_per_sample = -1; + rdev->audio.pin[0].status_bits = 0; + rdev->audio.pin[0].category_code = 0; + rdev->audio.pin[0].id = 0; - rdev->audio_status.channels = -1; - rdev->audio_status.rate = -1; - rdev->audio_status.bits_per_sample = -1; - rdev->audio_status.status_bits = 0; - rdev->audio_status.category_code = 0; + r600_audio_enable(rdev, &rdev->audio.pin[0], true); return 0; } @@ -186,8 +190,16 @@ int r600_audio_init(struct radeon_device *rdev) */ void r600_audio_fini(struct radeon_device *rdev) { - if (!rdev->audio_enabled) + if (!rdev->audio.enabled) return; - r600_audio_engine_enable(rdev, false); + r600_audio_enable(rdev, &rdev->audio.pin[0], false); + + rdev->audio.enabled = false; +} + +struct r600_audio_pin *r600_audio_get_pin(struct radeon_device *rdev) +{ + /* only one pin on 6xx-NI */ + return &rdev->audio.pin[0]; } diff --git a/drivers/gpu/drm/radeon/r600_hdmi.c b/drivers/gpu/drm/radeon/r600_hdmi.c index f264df5470f7..e1dec1339461 100644 --- a/drivers/gpu/drm/radeon/r600_hdmi.c +++ b/drivers/gpu/drm/radeon/r600_hdmi.c @@ -382,7 +382,7 @@ void r600_hdmi_update_audio_settings(struct drm_encoder *encoder) struct radeon_device *rdev = dev->dev_private; struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; - struct r600_audio audio = r600_audio_status(rdev); + struct r600_audio_pin audio = r600_audio_status(rdev); uint8_t buffer[HDMI_INFOFRAME_HEADER_SIZE + HDMI_AUDIO_INFOFRAME_SIZE]; struct hdmi_audio_infoframe frame; uint32_t offset; @@ -491,6 +491,11 @@ void r600_hdmi_enable(struct drm_encoder *encoder, bool enable) if (!enable && !dig->afmt->enabled) return; + if (enable) + dig->afmt->pin = r600_audio_get_pin(rdev); + else + dig->afmt->pin = NULL; + /* Older chipsets require setting HDMI and routing manually */ if (!ASIC_IS_DCE3(rdev)) { if (enable) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 791cc8de6395..82fef854b686 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -696,7 +696,7 @@ union radeon_irq_stat_regs { #define RADEON_MAX_HPD_PINS 6 #define RADEON_MAX_CRTCS 6 -#define RADEON_MAX_AFMT_BLOCKS 6 +#define RADEON_MAX_AFMT_BLOCKS 7 struct radeon_irq { bool installed; @@ -1537,12 +1537,21 @@ int radeon_uvd_calc_upll_dividers(struct radeon_device *rdev, int radeon_uvd_send_upll_ctlreq(struct radeon_device *rdev, unsigned cg_upll_func_cntl); -struct r600_audio { +struct r600_audio_pin { int channels; int rate; int bits_per_sample; u8 status_bits; u8 category_code; + u32 offset; + bool connected; + u32 id; +}; + +struct r600_audio { + bool enabled; + struct r600_audio_pin pin[RADEON_MAX_AFMT_BLOCKS]; + int num_pins; }; /* @@ -2128,9 +2137,8 @@ struct radeon_device { struct work_struct reset_work; int num_crtc; /* number of crtcs */ struct mutex dc_hw_i2c_mutex; /* display controller hw i2c mutex */ - bool audio_enabled; bool has_uvd; - struct r600_audio audio_status; /* audio stuff */ + struct r600_audio audio; /* audio stuff */ struct notifier_block acpi_nb; /* only one userspace can use Hyperz features or CMASK at a time */ struct drm_file *hyperz_filp; @@ -2594,6 +2602,8 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev, /* audio */ void r600_audio_update_hdmi(struct work_struct *work); +struct r600_audio_pin *r600_audio_get_pin(struct radeon_device *rdev); +struct r600_audio_pin *dce6_audio_get_pin(struct radeon_device *rdev); /* * R600 vram scratch functions diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index da755bf37421..69f3122779c3 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -1739,6 +1739,8 @@ static struct radeon_asic trinity_asic = { .wait_for_vblank = &dce4_wait_for_vblank, .set_backlight_level = &atombios_set_backlight_level, .get_backlight_level = &atombios_get_backlight_level, + .hdmi_enable = &evergreen_hdmi_enable, + .hdmi_setmode = &evergreen_hdmi_setmode, }, .copy = { .blit = &r600_copy_cpdma, @@ -1867,6 +1869,8 @@ static struct radeon_asic si_asic = { .wait_for_vblank = &dce4_wait_for_vblank, .set_backlight_level = &atombios_set_backlight_level, .get_backlight_level = &atombios_get_backlight_level, + .hdmi_enable = &evergreen_hdmi_enable, + .hdmi_setmode = &evergreen_hdmi_setmode, }, .copy = { .blit = NULL, @@ -2009,6 +2013,8 @@ static struct radeon_asic ci_asic = { .bandwidth_update = &dce8_bandwidth_update, .get_vblank_counter = &evergreen_get_vblank_counter, .wait_for_vblank = &dce4_wait_for_vblank, + .hdmi_enable = &evergreen_hdmi_enable, + .hdmi_setmode = &evergreen_hdmi_setmode, }, .copy = { .blit = NULL, @@ -2107,6 +2113,8 @@ static struct radeon_asic kv_asic = { .bandwidth_update = &dce8_bandwidth_update, .get_vblank_counter = &evergreen_get_vblank_counter, .wait_for_vblank = &dce4_wait_for_vblank, + .hdmi_enable = &evergreen_hdmi_enable, + .hdmi_setmode = &evergreen_hdmi_setmode, }, .copy = { .blit = NULL, diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index e69f00a7f153..818bbe6b884b 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -379,7 +379,7 @@ void r600_disable_interrupts(struct radeon_device *rdev); void r600_rlc_stop(struct radeon_device *rdev); /* r600 audio */ int r600_audio_init(struct radeon_device *rdev); -struct r600_audio r600_audio_status(struct radeon_device *rdev); +struct r600_audio_pin r600_audio_status(struct radeon_device *rdev); void r600_audio_fini(struct radeon_device *rdev); int r600_hdmi_buffer_status_changed(struct drm_encoder *encoder); void r600_hdmi_update_audio_settings(struct drm_encoder *encoder); @@ -628,6 +628,8 @@ int trinity_dpm_force_performance_level(struct radeon_device *rdev, /* DCE6 - SI */ void dce6_bandwidth_update(struct radeon_device *rdev); +int dce6_audio_init(struct radeon_device *rdev); +void dce6_audio_fini(struct radeon_device *rdev); /* * si diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 31d9fbe85c72..af9cd6a57efc 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -1254,8 +1254,8 @@ static void radeon_afmt_init(struct radeon_device *rdev) for (i = 0; i < RADEON_MAX_AFMT_BLOCKS; i++) rdev->mode_info.afmt[i] = NULL; - if (ASIC_IS_DCE6(rdev)) { - /* todo */ + if (ASIC_IS_NODCE(rdev)) { + /* nothing to do */ } else if (ASIC_IS_DCE4(rdev)) { static uint32_t eg_offsets[] = { EVERGREEN_CRTC0_REGISTER_OFFSET, @@ -1264,12 +1264,19 @@ static void radeon_afmt_init(struct radeon_device *rdev) EVERGREEN_CRTC3_REGISTER_OFFSET, EVERGREEN_CRTC4_REGISTER_OFFSET, EVERGREEN_CRTC5_REGISTER_OFFSET, + 0x13830 - 0x7030, }; int num_afmt; + /* DCE8 has 7 audio blocks tied to DIG encoders */ + /* DCE6 has 6 audio blocks tied to DIG encoders */ /* DCE4/5 has 6 audio blocks tied to DIG encoders */ /* DCE4.1 has 2 audio blocks tied to DIG encoders */ - if (ASIC_IS_DCE5(rdev)) + if (ASIC_IS_DCE8(rdev)) + num_afmt = 7; + else if (ASIC_IS_DCE6(rdev)) + num_afmt = 6; + else if (ASIC_IS_DCE5(rdev)) num_afmt = 6; else if (ASIC_IS_DCE41(rdev)) num_afmt = 2; diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h index 8296632a4235..d908d8d68f6b 100644 --- a/drivers/gpu/drm/radeon/radeon_mode.h +++ b/drivers/gpu/drm/radeon/radeon_mode.h @@ -225,6 +225,7 @@ struct radeon_afmt { int offset; bool last_buffer_filled_status; int id; + struct r600_audio_pin *pin; }; struct radeon_mode_info { @@ -233,7 +234,7 @@ struct radeon_mode_info { enum radeon_connector_table connector_table; bool mode_config_initialized; struct radeon_crtc *crtcs[6]; - struct radeon_afmt *afmt[6]; + struct radeon_afmt *afmt[7]; /* DVI-I properties */ struct drm_property *coherent_mode_property; /* DAC enable load detect */ diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index f5307e6bb92b..fb2058c9670d 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -6264,6 +6264,10 @@ static int si_startup(struct radeon_device *rdev) return r; } + r = dce6_audio_init(rdev); + if (r) + return r; + return 0; } @@ -6295,6 +6299,7 @@ int si_resume(struct radeon_device *rdev) int si_suspend(struct radeon_device *rdev) { + dce6_audio_fini(rdev); radeon_vm_manager_fini(rdev); si_cp_enable(rdev, false); cayman_dma_stop(rdev); diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h index 2c8da27a929f..968cf699c29e 100644 --- a/drivers/gpu/drm/radeon/sid.h +++ b/drivers/gpu/drm/radeon/sid.h @@ -635,6 +635,54 @@ #define HDP_REG_COHERENCY_FLUSH_CNTL 0x54A0 +/* DCE6 ELD audio interface */ +#define AZ_F0_CODEC_ENDPOINT_INDEX 0x5E00 +# define AZ_ENDPOINT_REG_INDEX(x) (((x) & 0xff) << 0) +# define AZ_ENDPOINT_REG_WRITE_EN (1 << 8) +#define AZ_F0_CODEC_ENDPOINT_DATA 0x5E04 + +#define AZ_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER 0x25 +#define SPEAKER_ALLOCATION(x) (((x) & 0x7f) << 0) +#define SPEAKER_ALLOCATION_MASK (0x7f << 0) +#define SPEAKER_ALLOCATION_SHIFT 0 +#define HDMI_CONNECTION (1 << 16) +#define DP_CONNECTION (1 << 17) + +#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0 0x28 /* LPCM */ +#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR1 0x29 /* AC3 */ +#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR2 0x2A /* MPEG1 */ +#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR3 0x2B /* MP3 */ +#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR4 0x2C /* MPEG2 */ +#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR5 0x2D /* AAC */ +#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR6 0x2E /* DTS */ +#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR7 0x2F /* ATRAC */ +#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR8 0x30 /* one bit audio - leave at 0 (default) */ +#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR9 0x31 /* Dolby Digital */ +#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR10 0x32 /* DTS-HD */ +#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR11 0x33 /* MAT-MLP */ +#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR12 0x34 /* DTS */ +#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13 0x35 /* WMA Pro */ +# define MAX_CHANNELS(x) (((x) & 0x7) << 0) +/* max channels minus one. 7 = 8 channels */ +# define SUPPORTED_FREQUENCIES(x) (((x) & 0xff) << 8) +# define DESCRIPTOR_BYTE_2(x) (((x) & 0xff) << 16) +# define SUPPORTED_FREQUENCIES_STEREO(x) (((x) & 0xff) << 24) /* LPCM only */ +/* SUPPORTED_FREQUENCIES, SUPPORTED_FREQUENCIES_STEREO + * bit0 = 32 kHz + * bit1 = 44.1 kHz + * bit2 = 48 kHz + * bit3 = 88.2 kHz + * bit4 = 96 kHz + * bit5 = 176.4 kHz + * bit6 = 192 kHz + */ +#define AZ_F0_CODEC_PIN_CONTROL_HOTPLUG_CONTROL 0x54 +# define AUDIO_ENABLED (1 << 31) + +#define AZ_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT 0x56 +#define PORT_CONNECTIVITY_MASK (3 << 30) +#define PORT_CONNECTIVITY_SHIFT 30 + #define DC_LB_MEMORY_SPLIT 0x6b0c #define DC_LB_MEMORY_CONFIG(x) ((x) << 20) @@ -755,6 +803,17 @@ /* 0x6e98, 0x7a98, 0x10698, 0x11298, 0x11e98, 0x12a98 */ #define CRTC_STATUS_FRAME_COUNT 0x6e98 +#define AFMT_AUDIO_SRC_CONTROL 0x713c +#define AFMT_AUDIO_SRC_SELECT(x) (((x) & 7) << 0) +/* AFMT_AUDIO_SRC_SELECT + * 0 = stream0 + * 1 = stream1 + * 2 = stream2 + * 3 = stream3 + * 4 = stream4 + * 5 = stream5 + */ + #define GRBM_CNTL 0x8000 #define GRBM_READ_TIMEOUT(x) ((x) << 0) -- cgit v1.2.3 From 473359bc28e193031a76d99f71e8b6c4808719a6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 9 Aug 2013 11:18:39 -0400 Subject: drm/radeon: restructure cg/pg on cik (v2) - use new cg/pg flags for finer grained clock and powergating control - restructure the cg/pg code so it can be called from other components such as dpm v2: fix build breakage from rebase Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/cik.c | 165 +++++++++++++++++++++-------------- drivers/gpu/drm/radeon/cikd.h | 1 + drivers/gpu/drm/radeon/radeon_asic.c | 73 +++++++++++++++- 3 files changed, 170 insertions(+), 69 deletions(-) (limited to 'drivers/gpu/drm/radeon/cik.c') diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 2b6049d55233..b7859fe3df80 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -5062,7 +5062,7 @@ static void cik_enable_cgcg(struct radeon_device *rdev, bool enable) cik_enable_gui_idle_interrupt(rdev, enable); - if (enable) { + if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) { tmp = cik_halt_rlc(rdev); cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); @@ -5092,11 +5092,15 @@ static void cik_enable_mgcg(struct radeon_device *rdev, bool enable) { u32 data, orig, tmp = 0; - if (enable) { - orig = data = RREG32(CP_MEM_SLP_CNTL); - data |= CP_MEM_LS_EN; - if (orig != data) - WREG32(CP_MEM_SLP_CNTL, data); + if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) { + if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) { + if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) { + orig = data = RREG32(CP_MEM_SLP_CNTL); + data |= CP_MEM_LS_EN; + if (orig != data) + WREG32(CP_MEM_SLP_CNTL, data); + } + } orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE); data &= 0xfffffffd; @@ -5113,17 +5117,21 @@ static void cik_enable_mgcg(struct radeon_device *rdev, bool enable) cik_update_rlc(rdev, tmp); - orig = data = RREG32(CGTS_SM_CTRL_REG); - data &= ~SM_MODE_MASK; - data |= SM_MODE(0x2); - data |= SM_MODE_ENABLE; - data &= ~CGTS_OVERRIDE; - data &= ~CGTS_LS_OVERRIDE; - data &= ~ON_MONITOR_ADD_MASK; - data |= ON_MONITOR_ADD_EN; - data |= ON_MONITOR_ADD(0x96); - if (orig != data) - WREG32(CGTS_SM_CTRL_REG, data); + if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) { + orig = data = RREG32(CGTS_SM_CTRL_REG); + data &= ~SM_MODE_MASK; + data |= SM_MODE(0x2); + data |= SM_MODE_ENABLE; + data &= ~CGTS_OVERRIDE; + if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) && + (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS)) + data &= ~CGTS_LS_OVERRIDE; + data &= ~ON_MONITOR_ADD_MASK; + data |= ON_MONITOR_ADD_EN; + data |= ON_MONITOR_ADD(0x96); + if (orig != data) + WREG32(CGTS_SM_CTRL_REG, data); + } } else { orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE); data |= 0x00000002; @@ -5180,7 +5188,7 @@ static void cik_enable_mc_ls(struct radeon_device *rdev, for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) { orig = data = RREG32(mc_cg_registers[i]); - if (enable) + if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS)) data |= MC_LS_ENABLE; else data &= ~MC_LS_ENABLE; @@ -5197,7 +5205,7 @@ static void cik_enable_mc_mgcg(struct radeon_device *rdev, for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) { orig = data = RREG32(mc_cg_registers[i]); - if (enable) + if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG)) data |= MC_CG_ENABLE; else data &= ~MC_CG_ENABLE; @@ -5211,7 +5219,7 @@ static void cik_enable_sdma_mgcg(struct radeon_device *rdev, { u32 orig, data; - if (enable) { + if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) { WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100); WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100); } else { @@ -5232,7 +5240,7 @@ static void cik_enable_sdma_mgls(struct radeon_device *rdev, { u32 orig, data; - if (enable) { + if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) { orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET); data |= 0x100; if (orig != data) @@ -5260,7 +5268,7 @@ static void cik_enable_uvd_mgcg(struct radeon_device *rdev, { u32 orig, data; - if (enable) { + if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) { data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL); data = 0xfff; WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data); @@ -5281,6 +5289,24 @@ static void cik_enable_uvd_mgcg(struct radeon_device *rdev, } } +static void cik_enable_bif_mgls(struct radeon_device *rdev, + bool enable) +{ + u32 orig, data; + + orig = data = RREG32_PCIE_PORT(PCIE_CNTL2); + + if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS)) + data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | + REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN; + else + data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN | + REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN); + + if (orig != data) + WREG32_PCIE_PORT(PCIE_CNTL2, data); +} + static void cik_enable_hdp_mgcg(struct radeon_device *rdev, bool enable) { @@ -5288,7 +5314,7 @@ static void cik_enable_hdp_mgcg(struct radeon_device *rdev, orig = data = RREG32(HDP_HOST_PATH_CNTL); - if (enable) + if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG)) data &= ~CLOCK_GATING_DIS; else data |= CLOCK_GATING_DIS; @@ -5304,7 +5330,7 @@ static void cik_enable_hdp_ls(struct radeon_device *rdev, orig = data = RREG32(HDP_MEM_POWER_LS); - if (enable) + if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS)) data |= HDP_LS_ENABLE; else data &= ~HDP_LS_ENABLE; @@ -5339,6 +5365,10 @@ void cik_update_cg(struct radeon_device *rdev, cik_enable_sdma_mgls(rdev, enable); } + if (block & RADEON_CG_BLOCK_BIF) { + cik_enable_bif_mgls(rdev, enable); + } + if (block & RADEON_CG_BLOCK_UVD) { if (rdev->has_uvd) cik_enable_uvd_mgcg(rdev, enable); @@ -5360,17 +5390,29 @@ static void cik_init_cg(struct radeon_device *rdev) cik_update_cg(rdev, (RADEON_CG_BLOCK_MC | RADEON_CG_BLOCK_SDMA | + RADEON_CG_BLOCK_BIF | RADEON_CG_BLOCK_UVD | RADEON_CG_BLOCK_HDP), true); } +static void cik_fini_cg(struct radeon_device *rdev) +{ + cik_update_cg(rdev, (RADEON_CG_BLOCK_MC | + RADEON_CG_BLOCK_SDMA | + RADEON_CG_BLOCK_BIF | + RADEON_CG_BLOCK_UVD | + RADEON_CG_BLOCK_HDP), false); + + cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false); +} + static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev, bool enable) { u32 data, orig; orig = data = RREG32(RLC_PG_CNTL); - if (enable) + if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS)) data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE; else data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE; @@ -5384,7 +5426,7 @@ static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev, u32 data, orig; orig = data = RREG32(RLC_PG_CNTL); - if (enable) + if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS)) data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE; else data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE; @@ -5397,7 +5439,7 @@ static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable) u32 data, orig; orig = data = RREG32(RLC_PG_CNTL); - if (enable) + if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP)) data &= ~DISABLE_CP_PG; else data |= DISABLE_CP_PG; @@ -5410,7 +5452,7 @@ static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable) u32 data, orig; orig = data = RREG32(RLC_PG_CNTL); - if (enable) + if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS)) data &= ~DISABLE_GDS_PG; else data |= DISABLE_GDS_PG; @@ -5465,7 +5507,7 @@ static void cik_enable_gfx_cgpg(struct radeon_device *rdev, { u32 data, orig; - if (enable) { + if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_CG)) { orig = data = RREG32(RLC_PG_CNTL); data |= GFX_PG_ENABLE; if (orig != data) @@ -5552,7 +5594,7 @@ static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev, u32 data, orig; orig = data = RREG32(RLC_PG_CNTL); - if (enable) + if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG)) data |= STATIC_PER_CU_PG_ENABLE; else data &= ~STATIC_PER_CU_PG_ENABLE; @@ -5566,7 +5608,7 @@ static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev, u32 data, orig; orig = data = RREG32(RLC_PG_CNTL); - if (enable) + if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG)) data |= DYN_PER_CU_PG_ENABLE; else data &= ~DYN_PER_CU_PG_ENABLE; @@ -5628,52 +5670,37 @@ static void cik_init_gfx_cgpg(struct radeon_device *rdev) static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable) { - bool has_pg = false; - bool has_dyn_mgpg = false; - bool has_static_mgpg = false; - - /* only APUs have PG */ - if (rdev->flags & RADEON_IS_IGP) { - has_pg = true; - has_static_mgpg = true; - if (rdev->family == CHIP_KAVERI) - has_dyn_mgpg = true; - } - - if (has_pg) { - cik_enable_gfx_cgpg(rdev, enable); - if (enable) { - cik_enable_gfx_static_mgpg(rdev, has_static_mgpg); - cik_enable_gfx_dynamic_mgpg(rdev, has_dyn_mgpg); - } else { - cik_enable_gfx_static_mgpg(rdev, false); - cik_enable_gfx_dynamic_mgpg(rdev, false); - } - } - + cik_enable_gfx_cgpg(rdev, enable); + cik_enable_gfx_static_mgpg(rdev, enable); + cik_enable_gfx_dynamic_mgpg(rdev, enable); } -void cik_init_pg(struct radeon_device *rdev) +static void cik_init_pg(struct radeon_device *rdev) { - bool has_pg = false; - - /* only APUs have PG */ - if (rdev->flags & RADEON_IS_IGP) { - /* XXX disable this for now */ - /* has_pg = true; */ - } - - if (has_pg) { + if (rdev->pg_flags) { cik_enable_sck_slowdown_on_pu(rdev, true); cik_enable_sck_slowdown_on_pd(rdev, true); - cik_init_gfx_cgpg(rdev); - cik_enable_cp_pg(rdev, true); - cik_enable_gds_pg(rdev, true); + if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_CG) { + cik_init_gfx_cgpg(rdev); + cik_enable_cp_pg(rdev, true); + cik_enable_gds_pg(rdev, true); + } cik_init_ao_cu_mask(rdev); cik_update_gfx_pg(rdev, true); } } +static void cik_fini_pg(struct radeon_device *rdev) +{ + if (rdev->pg_flags) { + cik_update_gfx_pg(rdev, false); + if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_CG) { + cik_enable_cp_pg(rdev, false); + cik_enable_gds_pg(rdev, false); + } + } +} + /* * Interrupts * Starting with r6xx, interrupts are handled via a ring buffer. @@ -7059,6 +7086,8 @@ int cik_suspend(struct radeon_device *rdev) cik_sdma_enable(rdev, false); uvd_v1_0_fini(rdev); radeon_uvd_suspend(rdev); + cik_fini_pg(rdev); + cik_fini_cg(rdev); cik_irq_suspend(rdev); radeon_wb_disable(rdev); cik_pcie_gart_disable(rdev); @@ -7214,6 +7243,8 @@ void cik_fini(struct radeon_device *rdev) { cik_cp_fini(rdev); cik_sdma_fini(rdev); + cik_fini_pg(rdev); + cik_fini_cg(rdev); cik_irq_fini(rdev); sumo_rlc_fini(rdev); cik_mec_fini(rdev); diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h index 259b81c7cdd8..6a92f491cb91 100644 --- a/drivers/gpu/drm/radeon/cikd.h +++ b/drivers/gpu/drm/radeon/cikd.h @@ -285,6 +285,7 @@ #define PCIE_CNTL2 0x1001001c /* PCIE */ # define SLV_MEM_LS_EN (1 << 16) +# define SLV_MEM_AGGRESSIVE_LS_EN (1 << 17) # define MST_MEM_LS_EN (1 << 18) # define REPLAY_MEM_LS_EN (1 << 19) diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 78e9bbc8446b..630853b96841 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -2438,15 +2438,84 @@ int radeon_asic_init(struct radeon_device *rdev) rdev->asic = &ci_asic; rdev->num_crtc = 6; rdev->has_uvd = true; + rdev->cg_flags = + RADEON_CG_SUPPORT_GFX_MGCG | + RADEON_CG_SUPPORT_GFX_MGLS | + /*RADEON_CG_SUPPORT_GFX_CGCG |*/ + RADEON_CG_SUPPORT_GFX_CGLS | + RADEON_CG_SUPPORT_GFX_CGTS | + RADEON_CG_SUPPORT_GFX_CGTS_LS | + RADEON_CG_SUPPORT_GFX_CP_LS | + RADEON_CG_SUPPORT_MC_LS | + RADEON_CG_SUPPORT_MC_MGCG | + RADEON_CG_SUPPORT_SDMA_MGCG | + RADEON_CG_SUPPORT_SDMA_LS | + RADEON_CG_SUPPORT_BIF_LS | + RADEON_CG_SUPPORT_VCE_MGCG | + RADEON_CG_SUPPORT_UVD_MGCG | + RADEON_CG_SUPPORT_HDP_LS | + RADEON_CG_SUPPORT_HDP_MGCG; + rdev->pg_flags = 0; break; case CHIP_KAVERI: case CHIP_KABINI: rdev->asic = &kv_asic; /* set num crtcs */ - if (rdev->family == CHIP_KAVERI) + if (rdev->family == CHIP_KAVERI) { rdev->num_crtc = 4; - else + rdev->cg_flags = + RADEON_CG_SUPPORT_GFX_MGCG | + RADEON_CG_SUPPORT_GFX_MGLS | + /*RADEON_CG_SUPPORT_GFX_CGCG |*/ + RADEON_CG_SUPPORT_GFX_CGLS | + RADEON_CG_SUPPORT_GFX_CGTS | + RADEON_CG_SUPPORT_GFX_CGTS_LS | + RADEON_CG_SUPPORT_GFX_CP_LS | + RADEON_CG_SUPPORT_SDMA_MGCG | + RADEON_CG_SUPPORT_SDMA_LS | + RADEON_CG_SUPPORT_BIF_LS | + RADEON_CG_SUPPORT_VCE_MGCG | + RADEON_CG_SUPPORT_UVD_MGCG | + RADEON_CG_SUPPORT_HDP_LS | + RADEON_CG_SUPPORT_HDP_MGCG; + rdev->pg_flags = 0; + /*RADEON_PG_SUPPORT_GFX_CG | + RADEON_PG_SUPPORT_GFX_SMG | + RADEON_PG_SUPPORT_GFX_DMG | + RADEON_PG_SUPPORT_UVD | + RADEON_PG_SUPPORT_VCE | + RADEON_PG_SUPPORT_CP | + RADEON_PG_SUPPORT_GDS | + RADEON_PG_SUPPORT_RLC_SMU_HS | + RADEON_PG_SUPPORT_ACP | + RADEON_PG_SUPPORT_SAMU;*/ + } else { rdev->num_crtc = 2; + rdev->cg_flags = + RADEON_CG_SUPPORT_GFX_MGCG | + RADEON_CG_SUPPORT_GFX_MGLS | + /*RADEON_CG_SUPPORT_GFX_CGCG |*/ + RADEON_CG_SUPPORT_GFX_CGLS | + RADEON_CG_SUPPORT_GFX_CGTS | + RADEON_CG_SUPPORT_GFX_CGTS_LS | + RADEON_CG_SUPPORT_GFX_CP_LS | + RADEON_CG_SUPPORT_SDMA_MGCG | + RADEON_CG_SUPPORT_SDMA_LS | + RADEON_CG_SUPPORT_BIF_LS | + RADEON_CG_SUPPORT_VCE_MGCG | + RADEON_CG_SUPPORT_UVD_MGCG | + RADEON_CG_SUPPORT_HDP_LS | + RADEON_CG_SUPPORT_HDP_MGCG; + rdev->pg_flags = 0; + /*RADEON_PG_SUPPORT_GFX_CG | + RADEON_PG_SUPPORT_GFX_SMG | + RADEON_PG_SUPPORT_UVD | + RADEON_PG_SUPPORT_VCE | + RADEON_PG_SUPPORT_CP | + RADEON_PG_SUPPORT_GDS | + RADEON_PG_SUPPORT_RLC_SMU_HS | + RADEON_PG_SUPPORT_SAMU;*/ + } rdev->has_uvd = true; break; default: -- cgit v1.2.3 From ddc76ff6c78ecb189102bdc3bd9d14de5b750a6f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 12 Aug 2013 17:25:26 -0400 Subject: drm/radeon: fixes for gfx clockgating on CIK Clockgating requires signalling between the CP and the RLC to work properly. Resetting the CP block in the CP resume code messed up the internal coordination between the blocks. Removing the reset allows gfx clockgating to work properly. However, when gfx clock gating is enabled, there is a strange interaction with dpm which causes the chip to stay in the high performance level all the time, so leave gfx clockgating disabled for now. Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/cik.c | 15 +++++---------- drivers/gpu/drm/radeon/radeon_asic.c | 6 +++--- 2 files changed, 8 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm/radeon/cik.c') diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index b7859fe3df80..1f088800295d 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -3974,13 +3974,6 @@ static int cik_cp_resume(struct radeon_device *rdev) { int r; - /* Reset all cp blocks */ - WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP); - RREG32(GRBM_SOFT_RESET); - mdelay(15); - WREG32(GRBM_SOFT_RESET, 0); - RREG32(GRBM_SOFT_RESET); - r = cik_cp_load_microcode(rdev); if (r) return r; @@ -5060,9 +5053,9 @@ static void cik_enable_cgcg(struct radeon_device *rdev, bool enable) orig = data = RREG32(RLC_CGCG_CGLS_CTRL); - cik_enable_gui_idle_interrupt(rdev, enable); - if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) { + cik_enable_gui_idle_interrupt(rdev, true); + tmp = cik_halt_rlc(rdev); cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); @@ -5075,6 +5068,8 @@ static void cik_enable_cgcg(struct radeon_device *rdev, bool enable) data |= CGCG_EN | CGLS_EN; } else { + cik_enable_gui_idle_interrupt(rdev, false); + RREG32(CB_CGTT_SCLK_CTRL); RREG32(CB_CGTT_SCLK_CTRL); RREG32(CB_CGTT_SCLK_CTRL); @@ -5383,7 +5378,7 @@ void cik_update_cg(struct radeon_device *rdev, static void cik_init_cg(struct radeon_device *rdev) { - cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false); /* XXX true */ + cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true); if (rdev->has_uvd) si_init_uvd_internal_cg(rdev); diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 630853b96841..6152169d011f 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -2439,7 +2439,7 @@ int radeon_asic_init(struct radeon_device *rdev) rdev->num_crtc = 6; rdev->has_uvd = true; rdev->cg_flags = - RADEON_CG_SUPPORT_GFX_MGCG | + /*RADEON_CG_SUPPORT_GFX_MGCG |*/ RADEON_CG_SUPPORT_GFX_MGLS | /*RADEON_CG_SUPPORT_GFX_CGCG |*/ RADEON_CG_SUPPORT_GFX_CGLS | @@ -2464,7 +2464,7 @@ int radeon_asic_init(struct radeon_device *rdev) if (rdev->family == CHIP_KAVERI) { rdev->num_crtc = 4; rdev->cg_flags = - RADEON_CG_SUPPORT_GFX_MGCG | + /*RADEON_CG_SUPPORT_GFX_MGCG |*/ RADEON_CG_SUPPORT_GFX_MGLS | /*RADEON_CG_SUPPORT_GFX_CGCG |*/ RADEON_CG_SUPPORT_GFX_CGLS | @@ -2492,7 +2492,7 @@ int radeon_asic_init(struct radeon_device *rdev) } else { rdev->num_crtc = 2; rdev->cg_flags = - RADEON_CG_SUPPORT_GFX_MGCG | + /*RADEON_CG_SUPPORT_GFX_MGCG |*/ RADEON_CG_SUPPORT_GFX_MGLS | /*RADEON_CG_SUPPORT_GFX_CGCG |*/ RADEON_CG_SUPPORT_GFX_CGLS | -- cgit v1.2.3 From a0f38609c9870fe0e3d5c10b1e6926a5750d0a7a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 22 Aug 2013 11:57:46 -0400 Subject: drm/radeon/cik: properly set up the clearstate buffer for pg (v2) The format of the clearstate buffer used for pg (powergating) changed between NI and SI. This formats it properly for what the hardware expects on SI+. v2: fix addresses Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/cik.c | 93 +++++++++++++++++++++++++++++++++++++- drivers/gpu/drm/radeon/evergreen.c | 13 +++++- 2 files changed, 103 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/radeon/cik.c') diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 1f088800295d..582f8e4f36d4 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -5622,7 +5622,7 @@ static void cik_init_gfx_cgpg(struct radeon_device *rdev) if (rdev->rlc.cs_data) { WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET); WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr)); - WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_gpu_addr); + WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr)); WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size); } else { WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET); @@ -5670,6 +5670,97 @@ static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable) cik_enable_gfx_dynamic_mgpg(rdev, enable); } +u32 cik_get_csb_size(struct radeon_device *rdev) +{ + u32 count = 0; + const struct cs_section_def *sect = NULL; + const struct cs_extent_def *ext = NULL; + + if (rdev->rlc.cs_data == NULL) + return 0; + + /* begin clear state */ + count += 2; + /* context control state */ + count += 3; + + for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) { + for (ext = sect->section; ext->extent != NULL; ++ext) { + if (sect->id == SECT_CONTEXT) + count += 2 + ext->reg_count; + else + return 0; + } + } + /* pa_sc_raster_config/pa_sc_raster_config1 */ + count += 4; + /* end clear state */ + count += 2; + /* clear state */ + count += 2; + + return count; +} + +void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer) +{ + u32 count = 0, i; + const struct cs_section_def *sect = NULL; + const struct cs_extent_def *ext = NULL; + + if (rdev->rlc.cs_data == NULL) + return; + if (buffer == NULL) + return; + + buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0); + buffer[count++] = PACKET3_PREAMBLE_BEGIN_CLEAR_STATE; + + buffer[count++] = PACKET3(PACKET3_CONTEXT_CONTROL, 1); + buffer[count++] = 0x80000000; + buffer[count++] = 0x80000000; + + for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) { + for (ext = sect->section; ext->extent != NULL; ++ext) { + if (sect->id == SECT_CONTEXT) { + buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count); + buffer[count++] = ext->reg_index - 0xa000; + for (i = 0; i < ext->reg_count; i++) + buffer[count++] = ext->extent[i]; + } else { + return; + } + } + } + + buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2); + buffer[count++] = PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START; + switch (rdev->family) { + case CHIP_BONAIRE: + buffer[count++] = 0x16000012; + buffer[count++] = 0x00000000; + break; + case CHIP_KAVERI: + buffer[count++] = 0x00000000; /* XXX */ + buffer[count++] = 0x00000000; + break; + case CHIP_KABINI: + buffer[count++] = 0x00000000; /* XXX */ + buffer[count++] = 0x00000000; + break; + default: + buffer[count++] = 0x00000000; + buffer[count++] = 0x00000000; + break; + } + + buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0); + buffer[count++] = PACKET3_PREAMBLE_END_CLEAR_STATE; + + buffer[count++] = PACKET3(PACKET3_CLEAR_STATE, 0); + buffer[count++] = 0; +} + static void cik_init_pg(struct radeon_device *rdev) { if (rdev->pg_flags) { diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 536908109001..2ca9f13f2c79 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -144,6 +144,8 @@ void cik_init_cp_pg_table(struct radeon_device *rdev); extern u32 si_get_csb_size(struct radeon_device *rdev); extern void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer); +extern u32 cik_get_csb_size(struct radeon_device *rdev); +extern void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer); static const u32 evergreen_golden_registers[] = { @@ -3903,6 +3905,9 @@ int sumo_rlc_init(struct radeon_device *rdev) src_ptr = rdev->rlc.reg_list; dws = rdev->rlc.reg_list_size; + if (rdev->family >= CHIP_BONAIRE) { + dws += (5 * 16) + 48 + 48 + 64; + } cs_data = rdev->rlc.cs_data; if (src_ptr) { @@ -3966,7 +3971,9 @@ int sumo_rlc_init(struct radeon_device *rdev) if (cs_data) { /* clear state block */ - if (rdev->family >= CHIP_TAHITI) { + if (rdev->family >= CHIP_BONAIRE) { + rdev->rlc.clear_state_size = dws = cik_get_csb_size(rdev); + } else if (rdev->family >= CHIP_TAHITI) { rdev->rlc.clear_state_size = si_get_csb_size(rdev); dws = rdev->rlc.clear_state_size + (256 / 4); } else { @@ -4014,7 +4021,9 @@ int sumo_rlc_init(struct radeon_device *rdev) } /* set up the cs buffer */ dst_ptr = rdev->rlc.cs_ptr; - if (rdev->family >= CHIP_TAHITI) { + if (rdev->family >= CHIP_BONAIRE) { + cik_get_csb_buffer(rdev, dst_ptr); + } else if (rdev->family >= CHIP_TAHITI) { reg_list_mc_addr = rdev->rlc.clear_state_gpu_addr + 256; dst_ptr[0] = upper_32_bits(reg_list_mc_addr); dst_ptr[1] = lower_32_bits(reg_list_mc_addr); -- cgit v1.2.3 From bc01a8c7a24169f8b111b7dda6f5d8e7088309af Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 19 Aug 2013 11:39:27 -0400 Subject: drm/radeon: update line buffer allocation for dce8 We need to allocate line buffer to each display when setting up the watermarks. Failure to do so can lead to a blank screen. This fixes blank screen problems on dce8 asics. Based on an initial fix from: Jay Cornwall Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/cik.c | 29 ++++++++++++++++++++++------- drivers/gpu/drm/radeon/cikd.h | 4 ++++ 2 files changed, 26 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/radeon/cik.c') diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 582f8e4f36d4..1942571496ea 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -7367,8 +7367,8 @@ static u32 dce8_line_buffer_adjust(struct radeon_device *rdev, struct radeon_crtc *radeon_crtc, struct drm_display_mode *mode) { - u32 tmp; - + u32 tmp, buffer_alloc, i; + u32 pipe_offset = radeon_crtc->crtc_id * 0x20; /* * Line Buffer Setup * There are 6 line buffers, one for each display controllers. @@ -7378,22 +7378,37 @@ static u32 dce8_line_buffer_adjust(struct radeon_device *rdev, * them using the stereo blender. */ if (radeon_crtc->base.enabled && mode) { - if (mode->crtc_hdisplay < 1920) + if (mode->crtc_hdisplay < 1920) { tmp = 1; - else if (mode->crtc_hdisplay < 2560) + buffer_alloc = 2; + } else if (mode->crtc_hdisplay < 2560) { tmp = 2; - else if (mode->crtc_hdisplay < 4096) + buffer_alloc = 2; + } else if (mode->crtc_hdisplay < 4096) { tmp = 0; - else { + buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4; + } else { DRM_DEBUG_KMS("Mode too big for LB!\n"); tmp = 0; + buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4; } - } else + } else { tmp = 1; + buffer_alloc = 0; + } WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset, LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0)); + WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset, + DMIF_BUFFERS_ALLOCATED(buffer_alloc)); + for (i = 0; i < rdev->usec_timeout; i++) { + if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) & + DMIF_BUFFERS_ALLOCATED_COMPLETED) + break; + udelay(1); + } + if (radeon_crtc->base.enabled && mode) { switch (tmp) { case 0: diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h index 6a92f491cb91..203d2a09a1f5 100644 --- a/drivers/gpu/drm/radeon/cikd.h +++ b/drivers/gpu/drm/radeon/cikd.h @@ -393,6 +393,10 @@ #define DMIF_ADDR_CALC 0xC00 +#define PIPE0_DMIF_BUFFER_CONTROL 0x0ca0 +# define DMIF_BUFFERS_ALLOCATED(x) ((x) << 0) +# define DMIF_BUFFERS_ALLOCATED_COMPLETED (1 << 4) + #define SRBM_GFX_CNTL 0xE44 #define PIPEID(x) ((x) << 0) #define MEID(x) ((x) << 2) -- cgit v1.2.3 From b2e4c70a9747ecb618d563b004ba746869dde5aa Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 10 Jun 2013 15:18:26 -0400 Subject: drm/radeon: fill in gpu_init for berlin GPU cores This fills in the GPU specific details for berlin GPU cores so that the driver will work with them. Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/cik.c | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/radeon/cik.c') diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 1942571496ea..e336a31230ea 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -2708,7 +2708,46 @@ static void cik_gpu_init(struct radeon_device *rdev) gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN; break; case CHIP_KAVERI: - /* TODO */ + rdev->config.cik.max_shader_engines = 1; + rdev->config.cik.max_tile_pipes = 4; + if ((rdev->pdev->device == 0x1304) || + (rdev->pdev->device == 0x1305) || + (rdev->pdev->device == 0x130C) || + (rdev->pdev->device == 0x130F) || + (rdev->pdev->device == 0x1310) || + (rdev->pdev->device == 0x1311) || + (rdev->pdev->device == 0x131C)) { + rdev->config.cik.max_cu_per_sh = 8; + rdev->config.cik.max_backends_per_se = 2; + } else if ((rdev->pdev->device == 0x1309) || + (rdev->pdev->device == 0x130A) || + (rdev->pdev->device == 0x130D) || + (rdev->pdev->device == 0x1313)) { + rdev->config.cik.max_cu_per_sh = 6; + rdev->config.cik.max_backends_per_se = 2; + } else if ((rdev->pdev->device == 0x1306) || + (rdev->pdev->device == 0x1307) || + (rdev->pdev->device == 0x130B) || + (rdev->pdev->device == 0x130E) || + (rdev->pdev->device == 0x1315) || + (rdev->pdev->device == 0x131B)) { + rdev->config.cik.max_cu_per_sh = 4; + rdev->config.cik.max_backends_per_se = 1; + } else { + rdev->config.cik.max_cu_per_sh = 3; + rdev->config.cik.max_backends_per_se = 1; + } + rdev->config.cik.max_sh_per_se = 1; + rdev->config.cik.max_texture_channel_caches = 4; + rdev->config.cik.max_gprs = 256; + rdev->config.cik.max_gs_threads = 16; + rdev->config.cik.max_hw_contexts = 8; + + rdev->config.cik.sc_prim_fifo_size_frontend = 0x20; + rdev->config.cik.sc_prim_fifo_size_backend = 0x100; + rdev->config.cik.sc_hiz_tile_fifo_size = 0x30; + rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130; + gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN; break; case CHIP_KABINI: default: -- cgit v1.2.3 From 2ce529dac71ae7753981a587932d074fdb248608 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 28 Aug 2013 18:12:59 -0400 Subject: drm/radeon: split out radeon_uvd_resume from uvd_v4_2_resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For powergating, we just need to re-init the registers, there is no need to restore the uvd BOs. This just adds needless work when powergating uvd for playback while the system is on. We only need to restore the uvd BOs on an actual resume from suspend or when the driver loads. This fixes multi-stream UVD playback on KB systems. Signed-off-by: Alex Deucher Reviewed-by: Christian König --- drivers/gpu/drm/radeon/cik.c | 13 ++++++++----- drivers/gpu/drm/radeon/uvd_v4_2.c | 5 ----- 2 files changed, 8 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm/radeon/cik.c') diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index e336a31230ea..79124f81c00e 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -7051,12 +7051,15 @@ static int cik_startup(struct radeon_device *rdev) return r; } - r = uvd_v4_2_resume(rdev); + r = radeon_uvd_resume(rdev); if (!r) { - r = radeon_fence_driver_start_ring(rdev, - R600_RING_TYPE_UVD_INDEX); - if (r) - dev_err(rdev->dev, "UVD fences init error (%d).\n", r); + r = uvd_v4_2_resume(rdev); + if (!r) { + r = radeon_fence_driver_start_ring(rdev, + R600_RING_TYPE_UVD_INDEX); + if (r) + dev_err(rdev->dev, "UVD fences init error (%d).\n", r); + } } if (r) rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0; diff --git a/drivers/gpu/drm/radeon/uvd_v4_2.c b/drivers/gpu/drm/radeon/uvd_v4_2.c index d7e480786098..d04d5073eef2 100644 --- a/drivers/gpu/drm/radeon/uvd_v4_2.c +++ b/drivers/gpu/drm/radeon/uvd_v4_2.c @@ -39,11 +39,6 @@ int uvd_v4_2_resume(struct radeon_device *rdev) { uint64_t addr; uint32_t size; - int r; - - r = radeon_uvd_resume(rdev); - if (r) - return r; /* programm the VCPU memory controller bits 0-27 */ addr = rdev->uvd.gpu_addr >> 3; -- cgit v1.2.3 From e5903d399a7b0e5c14673c1206f4aeec2859c730 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 30 Aug 2013 08:58:20 -0400 Subject: drm/radeon: fix init ordering for r600+ The vram scratch buffer needs to be initialized before the mc is programmed otherwise we program 0 as the GPU address of the default GPU fault page. In most cases we put vram at zero anyway and reserve a page for the legacy vga buffer so in practice this shouldn't cause any problems, but better to make it correct. Was changed in: 6fab3febf6d949b0a12b1e4e73db38e4a177a79e Reported-by: FrankR Huang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/cik.c | 9 +++++---- drivers/gpu/drm/radeon/evergreen.c | 9 +++++---- drivers/gpu/drm/radeon/ni.c | 9 +++++---- drivers/gpu/drm/radeon/r600.c | 9 +++++---- drivers/gpu/drm/radeon/rv770.c | 9 +++++---- drivers/gpu/drm/radeon/si.c | 9 +++++---- 6 files changed, 30 insertions(+), 24 deletions(-) (limited to 'drivers/gpu/drm/radeon/cik.c') diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 79124f81c00e..148c539684bb 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -6951,6 +6951,11 @@ static int cik_startup(struct radeon_device *rdev) /* enable aspm */ cik_program_aspm(rdev); + /* scratch needs to be initialized before MC */ + r = r600_vram_scratch_init(rdev); + if (r) + return r; + cik_mc_program(rdev); if (rdev->flags & RADEON_IS_IGP) { @@ -6980,10 +6985,6 @@ static int cik_startup(struct radeon_device *rdev) } } - r = r600_vram_scratch_init(rdev); - if (r) - return r; - r = cik_pcie_gart_enable(rdev); if (r) return r; diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 183213689478..6398c1f76fb8 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -5053,6 +5053,11 @@ static int evergreen_startup(struct radeon_device *rdev) /* enable aspm */ evergreen_program_aspm(rdev); + /* scratch needs to be initialized before MC */ + r = r600_vram_scratch_init(rdev); + if (r) + return r; + evergreen_mc_program(rdev); if (ASIC_IS_DCE5(rdev)) { @@ -5078,10 +5083,6 @@ static int evergreen_startup(struct radeon_device *rdev) } } - r = r600_vram_scratch_init(rdev); - if (r) - return r; - if (rdev->flags & RADEON_IS_AGP) { evergreen_agp_enable(rdev); } else { diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 69499fff06b0..d60049efd7ac 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1863,6 +1863,11 @@ static int cayman_startup(struct radeon_device *rdev) /* enable aspm */ evergreen_program_aspm(rdev); + /* scratch needs to be initialized before MC */ + r = r600_vram_scratch_init(rdev); + if (r) + return r; + evergreen_mc_program(rdev); if (rdev->flags & RADEON_IS_IGP) { @@ -1889,10 +1894,6 @@ static int cayman_startup(struct radeon_device *rdev) } } - r = r600_vram_scratch_init(rdev); - if (r) - return r; - r = cayman_pcie_gart_enable(rdev); if (r) return r; diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 087cff444ba2..b72d4d717a72 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -2698,6 +2698,11 @@ static int r600_startup(struct radeon_device *rdev) /* enable pcie gen2 link */ r600_pcie_gen2_enable(rdev); + /* scratch needs to be initialized before MC */ + r = r600_vram_scratch_init(rdev); + if (r) + return r; + r600_mc_program(rdev); if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw) { @@ -2708,10 +2713,6 @@ static int r600_startup(struct radeon_device *rdev) } } - r = r600_vram_scratch_init(rdev); - if (r) - return r; - if (rdev->flags & RADEON_IS_AGP) { r600_agp_enable(rdev); } else { diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index b811296462a3..9f5846743c9e 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -1658,6 +1658,11 @@ static int rv770_startup(struct radeon_device *rdev) /* enable pcie gen2 link */ rv770_pcie_gen2_enable(rdev); + /* scratch needs to be initialized before MC */ + r = r600_vram_scratch_init(rdev); + if (r) + return r; + rv770_mc_program(rdev); if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw) { @@ -1668,10 +1673,6 @@ static int rv770_startup(struct radeon_device *rdev) } } - r = r600_vram_scratch_init(rdev); - if (r) - return r; - if (rdev->flags & RADEON_IS_AGP) { rv770_agp_enable(rdev); } else { diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 89393ed593fa..fe8bca686900 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -6343,6 +6343,11 @@ static int si_startup(struct radeon_device *rdev) /* enable aspm */ si_program_aspm(rdev); + /* scratch needs to be initialized before MC */ + r = r600_vram_scratch_init(rdev); + if (r) + return r; + si_mc_program(rdev); if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw || @@ -6360,10 +6365,6 @@ static int si_startup(struct radeon_device *rdev) return r; } - r = r600_vram_scratch_init(rdev); - if (r) - return r; - r = si_pcie_gart_enable(rdev); if (r) return r; -- cgit v1.2.3 From 6a3808b8233eb91b57c230cf1161ac116a189ffd Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 30 Aug 2013 11:10:33 +0200 Subject: drm/radeon: enable UVD interrupts on CIK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The same as on evergreen. Signed-off-by: Christian König Reported-by: FrankR Huang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/cik.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu/drm/radeon/cik.c') diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 148c539684bb..a77b593185fb 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -6733,6 +6733,10 @@ restart_ih: break; } break; + case 124: /* UVD */ + DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data); + radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX); + break; case 146: case 147: addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR); -- cgit v1.2.3