diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
126 files changed, 11974 insertions, 1397 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 210d57a4afc8..403ec3db29df 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -63,12 +63,13 @@ amdgpu-$(CONFIG_PERF_EVENTS) += amdgpu_pmu.o amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o -amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o +amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o \ + uvd_v3_1.o amdgpu-y += \ vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \ vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o navi10_reg_init.o navi14_reg_init.o \ - arct_reg_init.o navi12_reg_init.o mxgpu_nv.o + arct_reg_init.o navi12_reg_init.o mxgpu_nv.o sienna_cichlid_reg_init.o # add DF block amdgpu-y += \ @@ -80,7 +81,7 @@ amdgpu-y += \ gmc_v7_0.o \ gmc_v8_0.o \ gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o mmhub_v9_4.o \ - gfxhub_v2_0.o mmhub_v2_0.o gmc_v10_0.o + gfxhub_v2_0.o mmhub_v2_0.o gmc_v10_0.o gfxhub_v2_1.o # add UMC block amdgpu-y += \ @@ -129,7 +130,8 @@ amdgpu-y += \ sdma_v2_4.o \ sdma_v3_0.o \ sdma_v4_0.o \ - sdma_v5_0.o + sdma_v5_0.o \ + sdma_v5_2.o # add MES block amdgpu-y += \ @@ -154,15 +156,18 @@ amdgpu-y += \ vcn_v1_0.o \ vcn_v2_0.o \ vcn_v2_5.o \ + vcn_v3_0.o \ amdgpu_jpeg.o \ jpeg_v1_0.o \ jpeg_v2_0.o \ - jpeg_v2_5.o + jpeg_v2_5.o \ + jpeg_v3_0.o # add ATHUB block amdgpu-y += \ athub_v1_0.o \ - athub_v2_0.o + athub_v2_0.o \ + athub_v2_1.o # add amdkfd interfaces amdgpu-y += amdgpu_amdkfd.o @@ -172,12 +177,13 @@ AMDKFD_PATH := ../amdkfd include $(FULL_AMD_PATH)/amdkfd/Makefile amdgpu-y += $(AMDKFD_FILES) amdgpu-y += \ - amdgpu_amdkfd_fence.o \ - amdgpu_amdkfd_gpuvm.o \ - amdgpu_amdkfd_gfx_v8.o \ - amdgpu_amdkfd_gfx_v9.o \ - amdgpu_amdkfd_arcturus.o \ - amdgpu_amdkfd_gfx_v10.o + amdgpu_amdkfd_fence.o \ + amdgpu_amdkfd_gpuvm.o \ + amdgpu_amdkfd_gfx_v8.o \ + amdgpu_amdkfd_gfx_v9.o \ + amdgpu_amdkfd_arcturus.o \ + amdgpu_amdkfd_gfx_v10.o \ + amdgpu_amdkfd_gfx_v10_3.o ifneq ($(CONFIG_DRM_AMDGPU_CIK),) amdgpu-y += amdgpu_amdkfd_gfx_v7.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index cd913986863e..327a0daf4a1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -186,11 +186,14 @@ extern int amdgpu_noretry; extern int amdgpu_force_asic_type; #ifdef CONFIG_HSA_AMD extern int sched_policy; +extern bool debug_evictions; #else static const int sched_policy = KFD_SCHED_POLICY_HWS; +static const bool debug_evictions; /* = false */ #endif extern int amdgpu_tmz; +extern int amdgpu_reset_method; #ifdef CONFIG_DRM_AMDGPU_SI extern int amdgpu_si_support; @@ -652,10 +655,6 @@ struct amdgpu_fw_vram_usage { u64 size; struct amdgpu_bo *reserved_bo; void *va; - - /* GDDR6 training support flag. - */ - bool mem_train_support; }; /* @@ -990,9 +989,12 @@ struct amdgpu_device { /* Chip product information */ char product_number[16]; char product_name[32]; - char serial[16]; + char serial[20]; struct amdgpu_autodump autodump; + + atomic_t throttling_logging_enabled; + struct ratelimit_state throttling_logging_rs; }; static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev) @@ -1009,10 +1011,10 @@ int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev); void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, uint32_t *buf, size_t size, bool write); -uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, uint32_t reg, - uint32_t acc_flags); -void amdgpu_device_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, +uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t acc_flags); +void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, + uint32_t acc_flags); void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t acc_flags); void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value); @@ -1031,8 +1033,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev); */ #define AMDGPU_REGS_NO_KIQ (1<<1) -#define RREG32_NO_KIQ(reg) amdgpu_device_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ) -#define WREG32_NO_KIQ(reg, v) amdgpu_device_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ) +#define RREG32_NO_KIQ(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ) +#define WREG32_NO_KIQ(reg, v) amdgpu_mm_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ) #define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg)) #define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v)) @@ -1040,9 +1042,9 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define RREG8(reg) amdgpu_mm_rreg8(adev, (reg)) #define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v)) -#define RREG32(reg) amdgpu_device_rreg(adev, (reg), 0) -#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", amdgpu_device_rreg(adev, (reg), 0)) -#define WREG32(reg, v) amdgpu_device_wreg(adev, (reg), (v), 0) +#define RREG32(reg) amdgpu_mm_rreg(adev, (reg), 0) +#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", amdgpu_mm_rreg(adev, (reg), 0)) +#define WREG32(reg, v) amdgpu_mm_wreg(adev, (reg), (v), 0) #define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK) #define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK) #define RREG32_PCIE(reg) adev->pcie_rreg(adev, (reg)) @@ -1079,7 +1081,16 @@ int emu_soc_asic_init(struct amdgpu_device *adev); tmp_ |= ((val) & ~(mask)); \ WREG32_PLL(reg, tmp_); \ } while (0) -#define DREG32_SYS(sqf, adev, reg) seq_printf((sqf), #reg " : 0x%08X\n", amdgpu_device_rreg((adev), (reg), false)) + +#define WREG32_SMC_P(_Reg, _Val, _Mask) \ + do { \ + u32 tmp = RREG32_SMC(_Reg); \ + tmp &= (_Mask); \ + tmp |= ((_Val) & ~(_Mask)); \ + WREG32_SMC(_Reg, tmp); \ + } while (0) + +#define DREG32_SYS(sqf, adev, reg) seq_printf((sqf), #reg " : 0x%08X\n", amdgpu_mm_rreg((adev), (reg), false)) #define RREG32_IO(reg) amdgpu_io_rreg(adev, (reg)) #define WREG32_IO(reg, v) amdgpu_io_wreg(adev, (reg), (v)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 956cbbda4793..913c8f0513bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -64,7 +64,9 @@ struct amdgpu_atif { struct amdgpu_atif_notifications notifications; struct amdgpu_atif_functions functions; struct amdgpu_atif_notification_cfg notification_cfg; - struct amdgpu_encoder *encoder_for_bl; +#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE) + struct backlight_device *bd; +#endif struct amdgpu_dm_backlight_caps backlight_caps; }; @@ -444,45 +446,21 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev, DRM_DEBUG_DRIVER("ATIF: %d pending SBIOS requests\n", count); - if ((req.pending & ATIF_PANEL_BRIGHTNESS_CHANGE_REQUEST) && - !amdgpu_device_has_dc_support(adev)) { - struct amdgpu_encoder *enc = atif->encoder_for_bl; - - if (enc) { - struct amdgpu_encoder_atom_dig *dig = enc->enc_priv; - - DRM_DEBUG_DRIVER("Changing brightness to %d\n", - req.backlight_level); - - amdgpu_display_backlight_set_level(adev, enc, req.backlight_level); - -#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE) - backlight_force_update(dig->bl_dev, - BACKLIGHT_UPDATE_HOTKEY); -#endif - } - } -#if defined(CONFIG_DRM_AMD_DC) + if (req.pending & ATIF_PANEL_BRIGHTNESS_CHANGE_REQUEST) { #if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE) - if ((req.pending & ATIF_PANEL_BRIGHTNESS_CHANGE_REQUEST) && - amdgpu_device_has_dc_support(adev)) { - struct amdgpu_display_manager *dm = &adev->dm; - struct backlight_device *bd = dm->backlight_dev; - - if (bd) { + if (atif->bd) { DRM_DEBUG_DRIVER("Changing brightness to %d\n", req.backlight_level); - /* * XXX backlight_device_set_brightness() is * hardwired to post BACKLIGHT_UPDATE_SYSFS. * It probably should accept 'reason' parameter. */ - backlight_device_set_brightness(bd, req.backlight_level); + backlight_device_set_brightness(atif->bd, req.backlight_level); } - } -#endif #endif + } + if (req.pending & ATIF_DGPU_DISPLAY_EVENT) { if (adev->flags & AMD_IS_PX) { pm_runtime_get_sync(adev->ddev->dev); @@ -829,23 +807,32 @@ int amdgpu_acpi_init(struct amdgpu_device *adev) adev->atif = atif; if (atif->notifications.brightness_change) { - struct drm_encoder *tmp; - - /* Find the encoder controlling the brightness */ - list_for_each_entry(tmp, &adev->ddev->mode_config.encoder_list, - head) { - struct amdgpu_encoder *enc = to_amdgpu_encoder(tmp); - - if ((enc->devices & (ATOM_DEVICE_LCD_SUPPORT)) && - enc->enc_priv) { - struct amdgpu_encoder_atom_dig *dig = enc->enc_priv; - if (dig->bl_dev) { - atif->encoder_for_bl = enc; - break; +#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE) + if (amdgpu_device_has_dc_support(adev)) { +#if defined(CONFIG_DRM_AMD_DC) + struct amdgpu_display_manager *dm = &adev->dm; + atif->bd = dm->backlight_dev; +#endif + } else { + struct drm_encoder *tmp; + + /* Find the encoder controlling the brightness */ + list_for_each_entry(tmp, &adev->ddev->mode_config.encoder_list, + head) { + struct amdgpu_encoder *enc = to_amdgpu_encoder(tmp); + + if ((enc->devices & (ATOM_DEVICE_LCD_SUPPORT)) && + enc->enc_priv) { + struct amdgpu_encoder_atom_dig *dig = enc->enc_priv; + if (dig->bl_dev) { + atif->bd = dig->bl_dev; + break; + } } } } } +#endif if (atif->functions.sbios_requests && !atif->functions.system_params) { /* XXX check this workraround, if sbios request function is diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index e2b4d3fc601d..478f67498a17 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -31,8 +31,6 @@ #include "amdgpu_xgmi.h" #include <uapi/linux/kfd_ioctl.h> -static const unsigned int compute_vmid_bitmap = 0xFF00; - /* Total memory size in system memory and all GPU VRAM. Used to * estimate worst case amount of memory to reserve for page tables */ @@ -113,7 +111,9 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) if (adev->kfd.dev) { struct kgd2kfd_shared_resources gpu_resources = { - .compute_vmid_bitmap = compute_vmid_bitmap, + .compute_vmid_bitmap = + ((1 << AMDGPU_NUM_VMID) - 1) - + ((1 << adev->vm_manager.first_kfd_vmid) - 1), .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec, .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe, .gpuvm_size = min(adev->vm_manager.max_pfn @@ -638,10 +638,8 @@ void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle) bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) { - if (adev->kfd.dev) { - if ((1 << vmid) & compute_vmid_bitmap) - return true; - } + if (adev->kfd.dev) + return vmid >= adev->vm_manager.first_kfd_vmid; return false; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c new file mode 100644 index 000000000000..7e59e473a190 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c @@ -0,0 +1,834 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include <linux/mmu_context.h> +#include "amdgpu.h" +#include "amdgpu_amdkfd.h" +#include "gc/gc_10_3_0_offset.h" +#include "gc/gc_10_3_0_sh_mask.h" +#include "navi10_enum.h" +#include "oss/osssys_5_0_0_offset.h" +#include "oss/osssys_5_0_0_sh_mask.h" +#include "soc15_common.h" +#include "v10_structs.h" +#include "nv.h" +#include "nvd.h" +#include "gfxhub_v2_1.h" + +enum hqd_dequeue_request_type { + NO_ACTION = 0, + DRAIN_PIPE, + RESET_WAVES, + SAVE_WAVES +}; + +static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) +{ + return (struct amdgpu_device *)kgd; +} + +static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe, + uint32_t queue, uint32_t vmid) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + mutex_lock(&adev->srbm_mutex); + nv_grbm_select(adev, mec, pipe, queue, vmid); +} + +static void unlock_srbm(struct kgd_dev *kgd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + nv_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); +} + +static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, + uint32_t queue_id) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + lock_srbm(kgd, mec, pipe, queue_id, 0); +} + +static uint64_t get_queue_mask(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id) +{ + unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe + + queue_id; + + return 1ull << bit; +} + +static void release_queue(struct kgd_dev *kgd) +{ + unlock_srbm(kgd); +} + +static void program_sh_mem_settings_v10_3(struct kgd_dev *kgd, uint32_t vmid, + uint32_t sh_mem_config, + uint32_t sh_mem_ape1_base, + uint32_t sh_mem_ape1_limit, + uint32_t sh_mem_bases) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + lock_srbm(kgd, 0, 0, 0, vmid); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases); + /* APE1 no longer exists on GFX9 */ + + unlock_srbm(kgd); +} + +/* ATC is defeatured on Sienna_Cichlid */ +static int set_pasid_vmid_mapping_v10_3(struct kgd_dev *kgd, unsigned int pasid, + unsigned int vmid) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + uint32_t value = pasid << IH_VMID_0_LUT__PASID__SHIFT; + + /* Mapping vmid to pasid also for IH block */ + pr_debug("mapping vmid %d -> pasid %d in IH block for GFX client\n", + vmid, pasid); + WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid, value); + + return 0; +} + +static int init_interrupts_v10_3(struct kgd_dev *kgd, uint32_t pipe_id) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t mec; + uint32_t pipe; + + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + lock_srbm(kgd, mec, pipe, 0, 0); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL), + CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | + CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); + + unlock_srbm(kgd); + + return 0; +} + +static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev, + unsigned int engine_id, + unsigned int queue_id) +{ + uint32_t sdma_engine_reg_base = 0; + uint32_t sdma_rlc_reg_offset; + + switch (engine_id) { + default: + dev_warn(adev->dev, + "Invalid sdma engine id (%d), using engine id 0\n", + engine_id); + /* fall through */ + case 0: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0, + mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL; + break; + case 1: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0, + mmSDMA1_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL; + break; + case 2: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA2, 0, + mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL; + break; + case 3: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA3, 0, + mmSDMA3_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL; + break; + } + + sdma_rlc_reg_offset = sdma_engine_reg_base + + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL); + + pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id, + queue_id, sdma_rlc_reg_offset); + + return sdma_rlc_reg_offset; +} + +static inline struct v10_compute_mqd *get_mqd(void *mqd) +{ + return (struct v10_compute_mqd *)mqd; +} + +static inline struct v10_sdma_mqd *get_sdma_mqd(void *mqd) +{ + return (struct v10_sdma_mqd *)mqd; +} + +static int hqd_load_v10_3(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + uint32_t queue_id, uint32_t __user *wptr, + uint32_t wptr_shift, uint32_t wptr_mask, + struct mm_struct *mm) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct v10_compute_mqd *m; + uint32_t *mqd_hqd; + uint32_t reg, hqd_base, data; + + m = get_mqd(mqd); + + pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id); + acquire_queue(kgd, pipe_id, queue_id); + + /* HIQ is set during driver init period with vmid set to 0*/ + if (m->cp_hqd_vmid == 0) { + uint32_t value, mec, pipe; + + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", + mec, pipe, queue_id); + value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS)); + value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1, + ((mec << 5) | (pipe << 3) | queue_id | 0x80)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value); + } + + /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ + mqd_hqd = &m->cp_mqd_base_addr_lo; + hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); + + for (reg = hqd_base; + reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) + WREG32(reg, mqd_hqd[reg - hqd_base]); + + + /* Activate doorbell logic before triggering WPTR poll. */ + data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, + CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data); + + if (wptr) { + /* Don't read wptr with get_user because the user + * context may not be accessible (if this function + * runs in a work queue). Instead trigger a one-shot + * polling read from memory in the CP. This assumes + * that wptr is GPU-accessible in the queue's VMID via + * ATC or SVM. WPTR==RPTR before starting the poll so + * the CP starts fetching new commands from the right + * place. + * + * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit + * tricky. Assume that the queue didn't overflow. The + * number of valid bits in the 32-bit RPTR depends on + * the queue size. The remaining bits are taken from + * the saved 64-bit WPTR. If the WPTR wrapped, add the + * queue size. + */ + uint32_t queue_size = + 2 << REG_GET_FIELD(m->cp_hqd_pq_control, + CP_HQD_PQ_CONTROL, QUEUE_SIZE); + uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1); + + if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr) + guessed_wptr += queue_size; + guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); + guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; + + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), + lower_32_bits(guessed_wptr)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), + upper_32_bits(guessed_wptr)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), + lower_32_bits((uint64_t)wptr)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), + upper_32_bits((uint64_t)wptr)); + pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__, + (uint32_t)get_queue_mask(adev, pipe_id, queue_id)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1), + (uint32_t)get_queue_mask(adev, pipe_id, queue_id)); + } + + /* Start the EOP fetcher */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR), + REG_SET_FIELD(m->cp_hqd_eop_rptr, + CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); + + data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data); + + release_queue(kgd); + + return 0; +} + +static int hiq_mqd_load_v10_3(struct kgd_dev *kgd, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t doorbell_off) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; + struct v10_compute_mqd *m; + uint32_t mec, pipe; + int r; + + m = get_mqd(mqd); + + acquire_queue(kgd, pipe_id, queue_id); + + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", + mec, pipe, queue_id); + + spin_lock(&adev->gfx.kiq.ring_lock); + r = amdgpu_ring_alloc(kiq_ring, 7); + if (r) { + pr_err("Failed to alloc KIQ (%d).\n", r); + goto out_unlock; + } + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); + amdgpu_ring_write(kiq_ring, + PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ + PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */ + PACKET3_MAP_QUEUES_QUEUE(queue_id) | + PACKET3_MAP_QUEUES_PIPE(pipe) | + PACKET3_MAP_QUEUES_ME((mec - 1)) | + PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ + PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ + PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */ + PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ + amdgpu_ring_write(kiq_ring, + PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off)); + amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo); + amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi); + amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo); + amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi); + amdgpu_ring_commit(kiq_ring); + +out_unlock: + spin_unlock(&adev->gfx.kiq.ring_lock); + release_queue(kgd); + + return r; +} + +static int hqd_dump_v10_3(struct kgd_dev *kgd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t i = 0, reg; +#define HQD_N_REGS 56 +#define DUMP_REG(addr) do { \ + if (WARN_ON_ONCE(i >= HQD_N_REGS)) \ + break; \ + (*dump)[i][0] = (addr) << 2; \ + (*dump)[i++][1] = RREG32(addr); \ + } while (0) + + *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + if (*dump == NULL) + return -ENOMEM; + + acquire_queue(kgd, pipe_id, queue_id); + + for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); + reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) + DUMP_REG(reg); + + release_queue(kgd); + + WARN_ON_ONCE(i != HQD_N_REGS); + *n_regs = i; + + return 0; +} + +static int hqd_sdma_load_v10_3(struct kgd_dev *kgd, void *mqd, + uint32_t __user *wptr, struct mm_struct *mm) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct v10_sdma_mqd *m; + uint32_t sdma_rlc_reg_offset; + unsigned long end_jiffies; + uint32_t data; + uint64_t data64; + uint64_t __user *wptr64 = (uint64_t __user *)wptr; + + m = get_sdma_mqd(mqd); + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, + m->sdma_queue_id); + + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, + m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); + + end_jiffies = msecs_to_jiffies(2000) + jiffies; + while (true) { + data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); + if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) + break; + if (time_after(jiffies, end_jiffies)) { + pr_err("SDMA RLC not idle in %s\n", __func__); + return -ETIME; + } + usleep_range(500, 1000); + } + + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET, + m->sdmax_rlcx_doorbell_offset); + + data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, + ENABLE, 1); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR, + m->sdmax_rlcx_rb_rptr); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI, + m->sdmax_rlcx_rb_rptr_hi); + + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); + if (read_user_wptr(mm, wptr64, data64)) { + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, + lower_32_bits(data64)); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI, + upper_32_bits(data64)); + } else { + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, + m->sdmax_rlcx_rb_rptr); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI, + m->sdmax_rlcx_rb_rptr_hi); + } + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); + + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI, + m->sdmax_rlcx_rb_base_hi); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, + m->sdmax_rlcx_rb_rptr_addr_lo); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, + m->sdmax_rlcx_rb_rptr_addr_hi); + + data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, + RB_ENABLE, 1); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data); + + return 0; +} + +static int hqd_sdma_dump_v10_3(struct kgd_dev *kgd, + uint32_t engine_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, + engine_id, queue_id); + uint32_t i = 0, reg; +#undef HQD_N_REGS +#define HQD_N_REGS (19+6+7+10) + + *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + if (*dump == NULL) + return -ENOMEM; + + for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) + DUMP_REG(sdma_rlc_reg_offset + reg); + for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++) + DUMP_REG(sdma_rlc_reg_offset + reg); + for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; + reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++) + DUMP_REG(sdma_rlc_reg_offset + reg); + for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; + reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++) + DUMP_REG(sdma_rlc_reg_offset + reg); + + WARN_ON_ONCE(i != HQD_N_REGS); + *n_regs = i; + + return 0; +} + +static bool hqd_is_occupied_v10_3(struct kgd_dev *kgd, uint64_t queue_address, + uint32_t pipe_id, uint32_t queue_id) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t act; + bool retval = false; + uint32_t low, high; + + acquire_queue(kgd, pipe_id, queue_id); + act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)); + if (act) { + low = lower_32_bits(queue_address >> 8); + high = upper_32_bits(queue_address >> 8); + + if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) && + high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI))) + retval = true; + } + release_queue(kgd); + return retval; +} + +static bool hqd_sdma_is_occupied_v10_3(struct kgd_dev *kgd, void *mqd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct v10_sdma_mqd *m; + uint32_t sdma_rlc_reg_offset; + uint32_t sdma_rlc_rb_cntl; + + m = get_sdma_mqd(mqd); + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, + m->sdma_queue_id); + + sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); + + if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) + return true; + + return false; +} + +static int hqd_destroy_v10_3(struct kgd_dev *kgd, void *mqd, + enum kfd_preempt_type reset_type, + unsigned int utimeout, uint32_t pipe_id, + uint32_t queue_id) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + enum hqd_dequeue_request_type type; + unsigned long end_jiffies; + uint32_t temp; + struct v10_compute_mqd *m = get_mqd(mqd); + + acquire_queue(kgd, pipe_id, queue_id); + + if (m->cp_hqd_vmid == 0) + WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0); + + switch (reset_type) { + case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: + type = DRAIN_PIPE; + break; + case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: + type = RESET_WAVES; + break; + default: + type = DRAIN_PIPE; + break; + } + + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type); + + end_jiffies = (utimeout * HZ / 1000) + jiffies; + while (true) { + temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)); + if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) + break; + if (time_after(jiffies, end_jiffies)) { + pr_err("cp queue pipe %d queue %d preemption failed\n", + pipe_id, queue_id); + release_queue(kgd); + return -ETIME; + } + usleep_range(500, 1000); + } + + release_queue(kgd); + return 0; +} + +static int hqd_sdma_destroy_v10_3(struct kgd_dev *kgd, void *mqd, + unsigned int utimeout) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct v10_sdma_mqd *m; + uint32_t sdma_rlc_reg_offset; + uint32_t temp; + unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; + + m = get_sdma_mqd(mqd); + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, + m->sdma_queue_id); + + temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); + temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp); + + while (true) { + temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); + if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) + break; + if (time_after(jiffies, end_jiffies)) { + pr_err("SDMA RLC not idle in %s\n", __func__); + return -ETIME; + } + usleep_range(500, 1000); + } + + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, + RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) | + SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); + + m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR); + m->sdmax_rlcx_rb_rptr_hi = + RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI); + + return 0; +} + + +static int address_watch_disable_v10_3(struct kgd_dev *kgd) +{ + return 0; +} + +static int address_watch_execute_v10_3(struct kgd_dev *kgd, + unsigned int watch_point_id, + uint32_t cntl_val, + uint32_t addr_hi, + uint32_t addr_lo) +{ + return 0; +} + +static int wave_control_execute_v10_3(struct kgd_dev *kgd, + uint32_t gfx_index_val, + uint32_t sq_cmd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t data = 0; + + mutex_lock(&adev->grbm_idx_mutex); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), gfx_index_val); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd); + + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, + INSTANCE_BROADCAST_WRITES, 1); + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, + SA_BROADCAST_WRITES, 1); + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, + SE_BROADCAST_WRITES, 1); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data); + mutex_unlock(&adev->grbm_idx_mutex); + + return 0; +} + +static uint32_t address_watch_get_offset_v10_3(struct kgd_dev *kgd, + unsigned int watch_point_id, + unsigned int reg_offset) +{ + return 0; +} + +static void set_vm_context_page_table_base_v10_3(struct kgd_dev *kgd, uint32_t vmid, + uint64_t page_table_base) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + /* SDMA is on gfxhub as well for Navi1* series */ + gfxhub_v2_1_setup_vm_pt_regs(adev, vmid, page_table_base); +} + +#if 0 +uint32_t enable_debug_trap_v10_3(struct kgd_dev *kgd, + uint32_t trap_debug_wave_launch_mode, + uint32_t vmid) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t data = 0; + uint32_t orig_wave_cntl_value; + uint32_t orig_stall_vmid; + + mutex_lock(&adev->grbm_idx_mutex); + + orig_wave_cntl_value = RREG32(SOC15_REG_OFFSET(GC, + 0, + mmSPI_GDBG_WAVE_CNTL)); + orig_stall_vmid = REG_GET_FIELD(orig_wave_cntl_value, + SPI_GDBG_WAVE_CNTL, + STALL_VMID); + + data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA, 1); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data); + + data = 0; + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), orig_stall_vmid); + + mutex_unlock(&adev->grbm_idx_mutex); + + return 0; +} + +uint32_t disable_debug_trap_v10_3(struct kgd_dev *kgd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + mutex_lock(&adev->grbm_idx_mutex); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0); + + mutex_unlock(&adev->grbm_idx_mutex); + + return 0; +} + +uint32_t set_wave_launch_trap_override_v10_3(struct kgd_dev *kgd, + uint32_t trap_override, + uint32_t trap_mask) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t data = 0; + + mutex_lock(&adev->grbm_idx_mutex); + + data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL)); + data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA, 1); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data); + + data = 0; + data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, + EXCP_EN, trap_mask); + data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, + REPLACE, trap_override); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data); + + data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL)); + data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA, 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data); + + mutex_unlock(&adev->grbm_idx_mutex); + + return 0; +} + +uint32_t set_wave_launch_mode_v10_3(struct kgd_dev *kgd, + uint8_t wave_launch_mode, + uint32_t vmid) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t data = 0; + bool is_stall_mode; + bool is_mode_set; + + is_stall_mode = (wave_launch_mode == 4); + is_mode_set = (wave_launch_mode != 0 && wave_launch_mode != 4); + + mutex_lock(&adev->grbm_idx_mutex); + + data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2, + VMID_MASK, is_mode_set ? 1 << vmid : 0); + data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2, + MODE, is_mode_set ? wave_launch_mode : 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL2), data); + + data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL)); + data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, + STALL_VMID, is_stall_mode ? 1 << vmid : 0); + data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, + STALL_RA, is_stall_mode ? 1 : 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data); + + mutex_unlock(&adev->grbm_idx_mutex); + + return 0; +} + +/* kgd_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values + * The values read are: + * ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads. + * atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics Offloads. + * wrm_offload_wait_time -- Wait Count for WAIT_REG_MEM Offloads. + * gws_wait_time -- Wait Count for Global Wave Syncs. + * que_sleep_wait_time -- Wait Count for Dequeue Retry. + * sch_wave_wait_time -- Wait Count for Scheduling Wave Message. + * sem_rearm_wait_time -- Wait Count for Semaphore re-arm. + * deq_retry_wait_time -- Wait Count for Global Wave Syncs. + */ +void get_iq_wait_times_v10_3(struct kgd_dev *kgd, + uint32_t *wait_times) + +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2)); +} + +void build_grace_period_packet_info_v10_3(struct kgd_dev *kgd, + uint32_t wait_times, + uint32_t grace_period, + uint32_t *reg_offset, + uint32_t *reg_data) +{ + *reg_data = wait_times; + + *reg_data = REG_SET_FIELD(*reg_data, + CP_IQ_WAIT_TIME2, + SCH_WAVE, + grace_period); + + *reg_offset = mmCP_IQ_WAIT_TIME2; +} +#endif + +const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = { + .program_sh_mem_settings = program_sh_mem_settings_v10_3, + .set_pasid_vmid_mapping = set_pasid_vmid_mapping_v10_3, + .init_interrupts = init_interrupts_v10_3, + .hqd_load = hqd_load_v10_3, + .hiq_mqd_load = hiq_mqd_load_v10_3, + .hqd_sdma_load = hqd_sdma_load_v10_3, + .hqd_dump = hqd_dump_v10_3, + .hqd_sdma_dump = hqd_sdma_dump_v10_3, + .hqd_is_occupied = hqd_is_occupied_v10_3, + .hqd_sdma_is_occupied = hqd_sdma_is_occupied_v10_3, + .hqd_destroy = hqd_destroy_v10_3, + .hqd_sdma_destroy = hqd_sdma_destroy_v10_3, + .address_watch_disable = address_watch_disable_v10_3, + .address_watch_execute = address_watch_execute_v10_3, + .wave_control_execute = wave_control_execute_v10_3, + .address_watch_get_offset = address_watch_get_offset_v10_3, + .get_atc_vmid_pasid_mapping_info = NULL, + .set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3, + .get_hive_id = amdgpu_amdkfd_get_hive_id, +#if 0 + .enable_debug_trap = enable_debug_trap_v10_3, + .disable_debug_trap = disable_debug_trap_v10_3, + .set_wave_launch_trap_override = set_wave_launch_trap_override_v10_3, + .set_wave_launch_mode = set_wave_launch_mode_v10_3, + .get_iq_wait_times = get_iq_wait_times_v10_3, + .build_grace_period_packet_info = build_grace_period_packet_info_v10_3, +#endif +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 55d2e870fdda..a7a30e39aa1b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -395,7 +395,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) if (ret) return ret; - return amdgpu_sync_fence(sync, vm->last_update, false); + return amdgpu_sync_fence(sync, vm->last_update); } static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) @@ -785,7 +785,7 @@ static int unmap_bo_from_gpuvm(struct amdgpu_device *adev, amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); - amdgpu_sync_fence(sync, bo_va->last_pt_update, false); + amdgpu_sync_fence(sync, bo_va->last_pt_update); return 0; } @@ -804,7 +804,7 @@ static int update_gpuvm_pte(struct amdgpu_device *adev, return ret; } - return amdgpu_sync_fence(sync, bo_va->last_pt_update, false); + return amdgpu_sync_fence(sync, bo_va->last_pt_update); } static int map_bo_to_gpuvm(struct amdgpu_device *adev, @@ -2102,7 +2102,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) pr_debug("Memory eviction: Validate BOs failed. Try again\n"); goto validate_map_fail; } - ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving, false); + ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving); if (ret) { pr_debug("Memory eviction: Sync BO fence failed. Try again\n"); goto validate_map_fail; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index fdd52d86a4d7..29f767e026e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -2022,11 +2022,6 @@ int amdgpu_atombios_init(struct amdgpu_device *adev) if (adev->is_atom_fw) { amdgpu_atomfirmware_scratch_regs_init(adev); amdgpu_atomfirmware_allocate_fb_scratch(adev); - ret = amdgpu_atomfirmware_get_mem_train_info(adev); - if (ret) { - DRM_ERROR("Failed to get mem train fb location.\n"); - return ret; - } } else { amdgpu_atombios_scratch_regs_init(adev); amdgpu_atombios_allocate_fb_scratch(adev); @@ -2041,3 +2036,20 @@ int amdgpu_atombios_init(struct amdgpu_device *adev) return 0; } +int amdgpu_atombios_get_data_table(struct amdgpu_device *adev, + uint32_t table, + uint16_t *size, + uint8_t *frev, + uint8_t *crev, + uint8_t **addr) +{ + uint16_t data_start; + + if (!amdgpu_atom_parse_data_header(adev->mode_info.atom_context, table, + size, frev, crev, &data_start)) + return -EINVAL; + + *addr = (uint8_t *)adev->mode_info.atom_context->bios + data_start; + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h index fd8f18074f7a..1321ec09c734 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h @@ -216,6 +216,13 @@ int amdgpu_atombios_get_svi2_info(struct amdgpu_device *adev, u8 voltage_type, u8 *svd_gpio_id, u8 *svc_gpio_id); +int amdgpu_atombios_get_data_table(struct amdgpu_device *adev, + uint32_t table, + uint16_t *size, + uint8_t *frev, + uint8_t *crev, + uint8_t **addr); + void amdgpu_atombios_fini(struct amdgpu_device *adev); int amdgpu_atombios_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index 58f9d8c3a17a..1279053324f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -111,6 +111,7 @@ int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev) union igp_info { struct atom_integrated_system_info_v1_11 v11; + struct atom_integrated_system_info_v1_12 v12; }; union umc_info { @@ -120,11 +121,13 @@ union umc_info { union vram_info { struct atom_vram_info_header_v2_3 v23; struct atom_vram_info_header_v2_4 v24; + struct atom_vram_info_header_v2_5 v25; }; union vram_module { struct atom_vram_module_v9 v9; struct atom_vram_module_v10 v10; + struct atom_vram_module_v11 v11; }; static int convert_atom_mem_type_to_vram_type(struct amdgpu_device *adev, @@ -212,6 +215,15 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev, if (vram_type) *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); break; + case 12: + mem_channel_number = igp_info->v12.umachannelnumber; + /* channel width is 64 */ + if (vram_width) + *vram_width = mem_channel_number * 64; + mem_type = igp_info->v12.memorytype; + if (vram_type) + *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); + break; default: return -EINVAL; } @@ -260,6 +272,26 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev, if (vram_vendor) *vram_vendor = mem_vendor; break; + case 5: + if (module_id > vram_info->v25.vram_module_num) + module_id = 0; + vram_module = (union vram_module *)vram_info->v25.vram_module; + while (i < module_id) { + vram_module = (union vram_module *) + ((u8 *)vram_module + vram_module->v11.vram_module_size); + i++; + } + mem_type = vram_module->v11.memory_type; + if (vram_type) + *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); + mem_channel_number = vram_module->v11.channel_num; + mem_channel_width = vram_module->v11.channel_width; + if (vram_width) + *vram_width = mem_channel_number * (1 << mem_channel_width); + mem_vendor = (vram_module->v11.vender_rev_id) & 0xF; + if (vram_vendor) + *vram_vendor = mem_vendor; + break; default: return -EINVAL; } @@ -303,6 +335,9 @@ bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev) union firmware_info { struct atom_firmware_info_v3_1 v31; + struct atom_firmware_info_v3_2 v32; + struct atom_firmware_info_v3_3 v33; + struct atom_firmware_info_v3_4 v34; }; /* @@ -491,7 +526,7 @@ static bool gddr6_mem_train_vbios_support(struct amdgpu_device *adev) return false; } -static int gddr6_mem_train_support(struct amdgpu_device *adev) +int amdgpu_mem_train_support(struct amdgpu_device *adev) { int ret; uint32_t major, minor, revision, hw_v; @@ -507,6 +542,7 @@ static int gddr6_mem_train_support(struct amdgpu_device *adev) switch (hw_v) { case HW_REV(11, 0, 0): case HW_REV(11, 0, 5): + case HW_REV(11, 0, 7): ret = 1; break; default: @@ -525,46 +561,37 @@ static int gddr6_mem_train_support(struct amdgpu_device *adev) return ret; } -int amdgpu_atomfirmware_get_mem_train_info(struct amdgpu_device *adev) +int amdgpu_atomfirmware_get_fw_reserved_fb_size(struct amdgpu_device *adev) { struct atom_context *ctx = adev->mode_info.atom_context; + union firmware_info *firmware_info; int index; - uint8_t frev, crev; - uint16_t data_offset, size; - int ret; - - adev->fw_vram_usage.mem_train_support = false; + u16 data_offset, size; + u8 frev, crev; + int fw_reserved_fb_size; - if (adev->asic_type != CHIP_NAVI10 && - adev->asic_type != CHIP_NAVI14) - return 0; + index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, + firmwareinfo); - if (amdgpu_sriov_vf(adev)) + if (!amdgpu_atom_parse_data_header(ctx, index, &size, + &frev, &crev, &data_offset)) + /* fail to parse data_header */ return 0; - ret = gddr6_mem_train_support(adev); - if (ret == -1) - return -EINVAL; - else if (ret == 0) - return 0; + firmware_info = (union firmware_info *)(ctx->bios + data_offset); - index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, - vram_usagebyfirmware); - ret = amdgpu_atom_parse_data_header(ctx, index, &size, &frev, &crev, - &data_offset); - if (ret == 0) { - DRM_ERROR("parse data header failed.\n"); + if (frev !=3) return -EINVAL; - } - DRM_DEBUG("atom firmware common table header size:0x%04x, frev:0x%02x," - " crev:0x%02x, data_offset:0x%04x.\n", size, frev, crev, data_offset); - /* only support 2.1+ */ - if (((uint16_t)frev << 8 | crev) < 0x0201) { - DRM_ERROR("frev:0x%02x, crev:0x%02x < 2.1 !\n", frev, crev); - return -EINVAL; + switch (crev) { + case 4: + fw_reserved_fb_size = + (firmware_info->v34.fw_reserved_size_in_kb << 10); + break; + default: + fw_reserved_fb_size = 0; + break; } - adev->fw_vram_usage.mem_train_support = true; - return 0; + return fw_reserved_fb_size; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h index 434fe2fa0089..9f0d4356e8df 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h @@ -31,10 +31,11 @@ void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev); int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev); int amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev, int *vram_width, int *vram_type, int *vram_vendor); -int amdgpu_atomfirmware_get_mem_train_info(struct amdgpu_device *adev); int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev); int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev); bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev); bool amdgpu_atomfirmware_sram_ecc_supported(struct amdgpu_device *adev); +int amdgpu_atomfirmware_get_fw_reserved_fb_size(struct amdgpu_device *adev); +int amdgpu_mem_train_support(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index 8955c4fed8cb..93160a849af4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -717,8 +717,10 @@ amdgpu_connector_lvds_detect(struct drm_connector *connector, bool force) if (!drm_kms_helper_is_poll_worker()) { r = pm_runtime_get_sync(connector->dev->dev); - if (r < 0) + if (r < 0) { + pm_runtime_put_autosuspend(connector->dev->dev); return connector_status_disconnected; + } } if (encoder) { @@ -855,8 +857,10 @@ amdgpu_connector_vga_detect(struct drm_connector *connector, bool force) if (!drm_kms_helper_is_poll_worker()) { r = pm_runtime_get_sync(connector->dev->dev); - if (r < 0) + if (r < 0) { + pm_runtime_put_autosuspend(connector->dev->dev); return connector_status_disconnected; + } } encoder = amdgpu_connector_best_single_encoder(connector); @@ -978,8 +982,10 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force) if (!drm_kms_helper_is_poll_worker()) { r = pm_runtime_get_sync(connector->dev->dev); - if (r < 0) + if (r < 0) { + pm_runtime_put_autosuspend(connector->dev->dev); return connector_status_disconnected; + } } if (!force && amdgpu_connector_check_hpd_status_unchanged(connector)) { @@ -1329,8 +1335,10 @@ amdgpu_connector_dp_detect(struct drm_connector *connector, bool force) if (!drm_kms_helper_is_poll_worker()) { r = pm_runtime_get_sync(connector->dev->dev); - if (r < 0) + if (r < 0) { + pm_runtime_put_autosuspend(connector->dev->dev); return connector_status_disconnected; + } } if (!force && amdgpu_connector_check_hpd_status_unchanged(connector)) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 65b67c82a4b9..ecd051976bce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -992,7 +992,7 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, dma_fence_put(old); } - r = amdgpu_sync_fence(&p->job->sync, fence, true); + r = amdgpu_sync_fence(&p->job->sync, fence); dma_fence_put(fence); if (r) return r; @@ -1014,7 +1014,7 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, return r; } - r = amdgpu_sync_fence(&p->job->sync, fence, true); + r = amdgpu_sync_fence(&p->job->sync, fence); dma_fence_put(fence); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index d33cb344be69..193ffdb957b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -223,12 +223,16 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f, *pos &= (1UL << 22) - 1; r = pm_runtime_get_sync(adev->ddev->dev); - if (r < 0) + if (r < 0) { + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } r = amdgpu_virt_enable_access_debugfs(adev); - if (r < 0) + if (r < 0) { + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } if (use_bank) { if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) || @@ -332,12 +336,16 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf, return -EINVAL; r = pm_runtime_get_sync(adev->ddev->dev); - if (r < 0) + if (r < 0) { + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } r = amdgpu_virt_enable_access_debugfs(adev); - if (r < 0) + if (r < 0) { + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } while (size) { uint32_t value; @@ -387,12 +395,16 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user return -EINVAL; r = pm_runtime_get_sync(adev->ddev->dev); - if (r < 0) + if (r < 0) { + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } r = amdgpu_virt_enable_access_debugfs(adev); - if (r < 0) + if (r < 0) { + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } while (size) { uint32_t value; @@ -443,12 +455,16 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf, return -EINVAL; r = pm_runtime_get_sync(adev->ddev->dev); - if (r < 0) + if (r < 0) { + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } r = amdgpu_virt_enable_access_debugfs(adev); - if (r < 0) + if (r < 0) { + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } while (size) { uint32_t value; @@ -498,12 +514,16 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user return -EINVAL; r = pm_runtime_get_sync(adev->ddev->dev); - if (r < 0) + if (r < 0) { + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } r = amdgpu_virt_enable_access_debugfs(adev); - if (r < 0) + if (r < 0) { + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } while (size) { uint32_t value; @@ -554,12 +574,16 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf, return -EINVAL; r = pm_runtime_get_sync(adev->ddev->dev); - if (r < 0) + if (r < 0) { + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } r = amdgpu_virt_enable_access_debugfs(adev); - if (r < 0) + if (r < 0) { + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } while (size) { uint32_t value; @@ -609,12 +633,16 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user * return -EINVAL; r = pm_runtime_get_sync(adev->ddev->dev); - if (r < 0) + if (r < 0) { + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } r = amdgpu_virt_enable_access_debugfs(adev); - if (r < 0) + if (r < 0) { + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } while (size) { uint32_t value; @@ -764,12 +792,16 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf, valuesize = sizeof(values); r = pm_runtime_get_sync(adev->ddev->dev); - if (r < 0) + if (r < 0) { + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } r = amdgpu_virt_enable_access_debugfs(adev); - if (r < 0) + if (r < 0) { + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize); @@ -842,12 +874,16 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf, simd = (*pos & GENMASK_ULL(44, 37)) >> 37; r = pm_runtime_get_sync(adev->ddev->dev); - if (r < 0) + if (r < 0) { + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } r = amdgpu_virt_enable_access_debugfs(adev); - if (r < 0) + if (r < 0) { + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } /* switch to the specific se/sh/cu */ mutex_lock(&adev->grbm_idx_mutex); @@ -937,11 +973,11 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf, r = pm_runtime_get_sync(adev->ddev->dev); if (r < 0) - return r; + goto err; r = amdgpu_virt_enable_access_debugfs(adev); if (r < 0) - return r; + goto err; /* switch to the specific se/sh/cu */ mutex_lock(&adev->grbm_idx_mutex); @@ -967,7 +1003,7 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf, value = data[result >> 2]; r = put_user(value, (uint32_t *)buf); if (r) { - result = r; + amdgpu_virt_disable_access_debugfs(adev); goto err; } @@ -976,10 +1012,14 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf, size -= 4; } -err: kfree(data); amdgpu_virt_disable_access_debugfs(adev); return result; + +err: + pm_runtime_put_autosuspend(adev->ddev->dev); + kfree(data); + return r; } /** @@ -1003,8 +1043,10 @@ static ssize_t amdgpu_debugfs_gfxoff_write(struct file *f, const char __user *bu return -EINVAL; r = pm_runtime_get_sync(adev->ddev->dev); - if (r < 0) + if (r < 0) { + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } while (size) { uint32_t value; @@ -1031,6 +1073,57 @@ static ssize_t amdgpu_debugfs_gfxoff_write(struct file *f, const char __user *bu } +/** + * amdgpu_debugfs_regs_gfxoff_status - read gfxoff status + * + * @f: open file handle + * @buf: User buffer to store read data in + * @size: Number of bytes to read + * @pos: Offset to seek to + */ +static ssize_t amdgpu_debugfs_gfxoff_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = file_inode(f)->i_private; + ssize_t result = 0; + int r; + + if (size & 0x3 || *pos & 0x3) + return -EINVAL; + + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + + while (size) { + uint32_t value; + + r = amdgpu_get_gfx_off_status(adev, &value); + if (r) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return r; + } + + r = put_user(value, (uint32_t *)buf); + if (r) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return r; + } + + result += 4; + buf += 4; + *pos += 4; + size -= 4; + } + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + + return result; +} + static const struct file_operations amdgpu_debugfs_regs_fops = { .owner = THIS_MODULE, .read = amdgpu_debugfs_regs_read, @@ -1081,7 +1174,9 @@ static const struct file_operations amdgpu_debugfs_gpr_fops = { static const struct file_operations amdgpu_debugfs_gfxoff_fops = { .owner = THIS_MODULE, + .read = amdgpu_debugfs_gfxoff_read, .write = amdgpu_debugfs_gfxoff_write, + .llseek = default_llseek }; static const struct file_operations *debugfs_regs[] = { @@ -1140,8 +1235,10 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data) int r = 0, i; r = pm_runtime_get_sync(dev->dev); - if (r < 0) + if (r < 0) { + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } /* Avoid accidently unparking the sched thread during GPU reset */ mutex_lock(&adev->lock_reset); @@ -1197,8 +1294,10 @@ static int amdgpu_debugfs_evict_vram(struct seq_file *m, void *data) int r; r = pm_runtime_get_sync(dev->dev); - if (r < 0) + if (r < 0) { + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } seq_printf(m, "(%d)\n", amdgpu_bo_evict_vram(adev)); @@ -1216,8 +1315,10 @@ static int amdgpu_debugfs_evict_gtt(struct seq_file *m, void *data) int r; r = pm_runtime_get_sync(dev->dev); - if (r < 0) + if (r < 0) { + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } seq_printf(m, "(%d)\n", ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_TT)); @@ -1295,27 +1396,37 @@ static void amdgpu_ib_preempt_job_recovery(struct drm_gpu_scheduler *sched) static void amdgpu_ib_preempt_mark_partial_job(struct amdgpu_ring *ring) { struct amdgpu_job *job; - struct drm_sched_job *s_job; + struct drm_sched_job *s_job, *tmp; uint32_t preempt_seq; struct dma_fence *fence, **ptr; struct amdgpu_fence_driver *drv = &ring->fence_drv; struct drm_gpu_scheduler *sched = &ring->sched; + bool preempted = true; if (ring->funcs->type != AMDGPU_RING_TYPE_GFX) return; preempt_seq = le32_to_cpu(*(drv->cpu_addr + 2)); - if (preempt_seq <= atomic_read(&drv->last_seq)) - return; + if (preempt_seq <= atomic_read(&drv->last_seq)) { + preempted = false; + goto no_preempt; + } preempt_seq &= drv->num_fences_mask; ptr = &drv->fences[preempt_seq]; fence = rcu_dereference_protected(*ptr, 1); +no_preempt: spin_lock(&sched->job_list_lock); - list_for_each_entry(s_job, &sched->ring_mirror_list, node) { + list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) { + if (dma_fence_is_signaled(&s_job->s_fence->finished)) { + /* remove job from ring_mirror_list */ + list_del_init(&s_job->node); + sched->ops->free_job(s_job); + continue; + } job = to_amdgpu_job(s_job); - if (job->fence == fence) + if (preempted && job->fence == fence) /* mark the job as preempted */ job->preemption_status |= AMDGPU_IB_PREEMPTED; } @@ -1407,14 +1518,16 @@ static int amdgpu_debugfs_sclk_set(void *data, u64 val) return -EINVAL; ret = pm_runtime_get_sync(adev->ddev->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_autosuspend(adev->ddev->dev); return ret; + } if (is_support_sw_smu(adev)) { - ret = smu_get_dpm_freq_range(&adev->smu, SMU_SCLK, &min_freq, &max_freq, true); + ret = smu_get_dpm_freq_range(&adev->smu, SMU_SCLK, &min_freq, &max_freq); if (ret || val > max_freq || val < min_freq) return -EINVAL; - ret = smu_set_soft_freq_range(&adev->smu, SMU_SCLK, (uint32_t)val, (uint32_t)val, true); + ret = smu_set_soft_freq_range(&adev->smu, SMU_SCLK, (uint32_t)val, (uint32_t)val); } else { return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index a93200fa3098..4204cda680f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -80,6 +80,8 @@ MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin"); +MODULE_FIRMWARE("amdgpu/sienna_cichlid_gpu_info.bin"); +MODULE_FIRMWARE("amdgpu/navy_flounder_gpu_info.bin"); #define AMDGPU_RESUME_MS 2000 @@ -112,6 +114,8 @@ const char *amdgpu_asic_name[] = { "NAVI10", "NAVI14", "NAVI12", + "SIENNA_CICHLID", + "NAVY_FLOUNDER", "LAST", }; @@ -299,10 +303,10 @@ void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, } /* - * device register access helper functions. + * MMIO register access helper functions. */ /** - * amdgpu_device_rreg - read a register + * amdgpu_mm_rreg - read a memory mapped IO register * * @adev: amdgpu_device pointer * @reg: dword aligned register offset @@ -310,8 +314,8 @@ void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, * * Returns the 32 bit value from the offset specified. */ -uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, uint32_t reg, - uint32_t acc_flags) +uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, + uint32_t acc_flags) { uint32_t ret; @@ -320,9 +324,15 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, uint32_t reg, if ((reg * 4) < adev->rmmio_size) ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); - else - ret = adev->pcie_rreg(adev, (reg * 4)); - trace_amdgpu_device_rreg(adev->pdev->device, reg, ret); + else { + unsigned long flags; + + spin_lock_irqsave(&adev->mmio_idx_lock, flags); + writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4)); + ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4)); + spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); + } + trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret); return ret; } @@ -368,19 +378,24 @@ void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) BUG(); } -void static inline amdgpu_device_wreg_no_kiq(struct amdgpu_device *adev, uint32_t reg, - uint32_t v, uint32_t acc_flags) +void static inline amdgpu_mm_wreg_mmio(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t acc_flags) { - trace_amdgpu_device_wreg(adev->pdev->device, reg, v); + trace_amdgpu_mm_wreg(adev->pdev->device, reg, v); if ((reg * 4) < adev->rmmio_size) writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); - else - adev->pcie_wreg(adev, (reg * 4), v); + else { + unsigned long flags; + + spin_lock_irqsave(&adev->mmio_idx_lock, flags); + writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4)); + writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4)); + spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); + } } /** - * amdgpu_device_wreg - write to a register + * amdgpu_mm_wreg - write to a memory mapped IO register * * @adev: amdgpu_device pointer * @reg: dword aligned register offset @@ -389,13 +404,13 @@ void static inline amdgpu_device_wreg_no_kiq(struct amdgpu_device *adev, uint32_ * * Writes the value specified to the offset specified. */ -void amdgpu_device_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, - uint32_t acc_flags) +void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, + uint32_t acc_flags) { if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) return amdgpu_kiq_wreg(adev, reg, v); - amdgpu_device_wreg_no_kiq(adev, reg, v, acc_flags); + amdgpu_mm_wreg_mmio(adev, reg, v, acc_flags); } /* @@ -414,7 +429,7 @@ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v); } - amdgpu_device_wreg_no_kiq(adev, reg, v, acc_flags); + amdgpu_mm_wreg_mmio(adev, reg, v, acc_flags); } /** @@ -907,6 +922,11 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) return 0; + /* skip if the bios has already enabled large BAR */ + if (adev->gmc.real_vram_size && + (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size)) + return 0; + /* Check if the root BUS has 64bit memory resources */ root = adev->pdev->bus; while (root->parent) @@ -1159,6 +1179,16 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev) amdgpu_vm_fragment_size = -1; } + if (amdgpu_sched_hw_submission < 2) { + dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n", + amdgpu_sched_hw_submission); + amdgpu_sched_hw_submission = 2; + } else if (!is_power_of_2(amdgpu_sched_hw_submission)) { + dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n", + amdgpu_sched_hw_submission); + amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission); + } + amdgpu_device_check_smu_prv_buffer_size(adev); amdgpu_device_check_vm_size(adev); @@ -1527,22 +1557,25 @@ static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev) static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) { const char *chip_name; - char fw_name[30]; + char fw_name[40]; int err; const struct gpu_info_firmware_header_v1_0 *hdr; adev->firmware.gpu_info_fw = NULL; + if (adev->discovery_bin) { + amdgpu_discovery_get_gfx_info(adev); + + /* + * FIXME: The bounding box is still needed by Navi12, so + * temporarily read it from gpu_info firmware. Should be droped + * when DAL no longer needs it. + */ + if (adev->asic_type != CHIP_NAVI12) + return 0; + } + switch (adev->asic_type) { - case CHIP_TOPAZ: - case CHIP_TONGA: - case CHIP_FIJI: - case CHIP_POLARIS10: - case CHIP_POLARIS11: - case CHIP_POLARIS12: - case CHIP_VEGAM: - case CHIP_CARRIZO: - case CHIP_STONEY: #ifdef CONFIG_DRM_AMDGPU_SI case CHIP_VERDE: case CHIP_TAHITI: @@ -1557,6 +1590,15 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) case CHIP_KABINI: case CHIP_MULLINS: #endif + case CHIP_TOPAZ: + case CHIP_TONGA: + case CHIP_FIJI: + case CHIP_POLARIS10: + case CHIP_POLARIS11: + case CHIP_POLARIS12: + case CHIP_VEGAM: + case CHIP_CARRIZO: + case CHIP_STONEY: case CHIP_VEGA20: default: return 0; @@ -1589,6 +1631,12 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) case CHIP_NAVI12: chip_name = "navi12"; break; + case CHIP_SIENNA_CICHLID: + chip_name = "sienna_cichlid"; + break; + case CHIP_NAVY_FLOUNDER: + chip_name = "navy_flounder"; + break; } snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name); @@ -1617,10 +1665,11 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) { - amdgpu_discovery_get_gfx_info(adev); + /* + * Should be droped when DAL no longer needs it. + */ + if (adev->asic_type == CHIP_NAVI12) goto parse_soc_bounding_box; - } adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se); adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh); @@ -1653,7 +1702,7 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) parse_soc_bounding_box: /* * soc bounding box info is not integrated in disocovery table, - * we always need to parse it from gpu info firmware. + * we always need to parse it from gpu info firmware if needed. */ if (hdr->version_minor == 2) { const struct gpu_info_firmware_v1_2 *gpu_info_fw = @@ -1689,25 +1738,13 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) amdgpu_device_enable_virtual_display(adev); - switch (adev->asic_type) { - case CHIP_TOPAZ: - case CHIP_TONGA: - case CHIP_FIJI: - case CHIP_POLARIS10: - case CHIP_POLARIS11: - case CHIP_POLARIS12: - case CHIP_VEGAM: - case CHIP_CARRIZO: - case CHIP_STONEY: - if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY) - adev->family = AMDGPU_FAMILY_CZ; - else - adev->family = AMDGPU_FAMILY_VI; - - r = vi_set_ip_blocks(adev); + if (amdgpu_sriov_vf(adev)) { + r = amdgpu_virt_request_full_gpu(adev, true); if (r) return r; - break; + } + + switch (adev->asic_type) { #ifdef CONFIG_DRM_AMDGPU_SI case CHIP_VERDE: case CHIP_TAHITI: @@ -1726,24 +1763,41 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) case CHIP_KAVERI: case CHIP_KABINI: case CHIP_MULLINS: - if ((adev->asic_type == CHIP_BONAIRE) || (adev->asic_type == CHIP_HAWAII)) - adev->family = AMDGPU_FAMILY_CI; - else + if (adev->flags & AMD_IS_APU) adev->family = AMDGPU_FAMILY_KV; + else + adev->family = AMDGPU_FAMILY_CI; r = cik_set_ip_blocks(adev); if (r) return r; break; #endif + case CHIP_TOPAZ: + case CHIP_TONGA: + case CHIP_FIJI: + case CHIP_POLARIS10: + case CHIP_POLARIS11: + case CHIP_POLARIS12: + case CHIP_VEGAM: + case CHIP_CARRIZO: + case CHIP_STONEY: + if (adev->flags & AMD_IS_APU) + adev->family = AMDGPU_FAMILY_CZ; + else + adev->family = AMDGPU_FAMILY_VI; + + r = vi_set_ip_blocks(adev); + if (r) + return r; + break; case CHIP_VEGA10: case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_RAVEN: case CHIP_ARCTURUS: case CHIP_RENOIR: - if (adev->asic_type == CHIP_RAVEN || - adev->asic_type == CHIP_RENOIR) + if (adev->flags & AMD_IS_APU) adev->family = AMDGPU_FAMILY_RV; else adev->family = AMDGPU_FAMILY_AI; @@ -1755,6 +1809,8 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) case CHIP_NAVI10: case CHIP_NAVI14: case CHIP_NAVI12: + case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: adev->family = AMDGPU_FAMILY_NV; r = nv_set_ip_blocks(adev); @@ -1768,31 +1824,6 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) amdgpu_amdkfd_device_probe(adev); - if (amdgpu_sriov_vf(adev)) { - /* handle vbios stuff prior full access mode for new handshake */ - if (adev->virt.req_init_data_ver == 1) { - if (!amdgpu_get_bios(adev)) { - DRM_ERROR("failed to get vbios\n"); - return -EINVAL; - } - - r = amdgpu_atombios_init(adev); - if (r) { - dev_err(adev->dev, "amdgpu_atombios_init failed\n"); - amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0); - return r; - } - } - } - - /* we need to send REQ_GPU here for legacy handshaker otherwise the vbios - * will not be prepared by host for this VF */ - if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver < 1) { - r = amdgpu_virt_request_full_gpu(adev, true); - if (r) - return r; - } - adev->pm.pp_feature = amdgpu_pp_feature_mask; if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS) adev->pm.pp_feature &= ~PP_GFXOFF_MASK; @@ -1824,10 +1855,6 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) if (r) return r; - /* skip vbios handling for new handshake */ - if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver == 1) - continue; - /* Read BIOS */ if (!amdgpu_get_bios(adev)) return -EINVAL; @@ -1954,12 +1981,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) if (r) return r; - if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver > 0) { - r = amdgpu_virt_request_full_gpu(adev, true); - if (r) - return -EAGAIN; - } - for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) continue; @@ -2308,6 +2329,9 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) { int i, r; + if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done) + amdgpu_virt_release_ras_err_handler_data(adev); + amdgpu_ras_pre_fini(adev); if (adev->gmc.xgmi.num_physical_nodes > 1) @@ -2438,18 +2462,21 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev) for (i = adev->num_ip_blocks - 1; i >= 0; i--) { if (!adev->ip_blocks[i].status.valid) continue; + /* displays are handled separately */ - if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) { - /* XXX handle errors */ - r = adev->ip_blocks[i].version->funcs->suspend(adev); - /* XXX handle errors */ - if (r) { - DRM_ERROR("suspend of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); - return r; - } - adev->ip_blocks[i].status.hw = false; + if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE) + continue; + + /* XXX handle errors */ + r = adev->ip_blocks[i].version->funcs->suspend(adev); + /* XXX handle errors */ + if (r) { + DRM_ERROR("suspend of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); + return r; } + + adev->ip_blocks[i].status.hw = false; } return 0; @@ -2547,6 +2574,9 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) AMD_IP_BLOCK_TYPE_IH, }; + for (i = 0; i < adev->num_ip_blocks; i++) + adev->ip_blocks[i].status.hw = false; + for (i = 0; i < ARRAY_SIZE(ip_order); i++) { int j; struct amdgpu_ip_block *block; @@ -2554,7 +2584,6 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) for (j = 0; j < adev->num_ip_blocks; j++) { block = &adev->ip_blocks[j]; - block->status.hw = false; if (block->version->type != ip_order[i] || !block->status.valid) continue; @@ -2779,6 +2808,10 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) case CHIP_NAVI12: case CHIP_RENOIR: #endif +#if defined(CONFIG_DRM_AMD_DC_DCN3_0) + case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: +#endif return amdgpu_dc != 0; #endif default: @@ -3036,6 +3069,17 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->gfx.gfx_off_req_count = 1; adev->pm.ac_power = power_supply_is_system_supplied() > 0; + atomic_set(&adev->throttling_logging_enabled, 1); + /* + * If throttling continues, logging will be performed every minute + * to avoid log flooding. "-1" is subtracted since the thermal + * throttling interrupt comes every second. Thus, the total logging + * interval is 59 seconds(retelimited printk interval) + 1(waiting + * for throttling interrupt) = 60 seconds. + */ + ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1); + ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE); + /* Registers mapping */ /* TODO: block userspace mapping of io register */ if (adev->asic_type >= CHIP_BONAIRE) { @@ -3274,6 +3318,9 @@ fence_driver_init: queue_delayed_work(system_wq, &adev->delayed_init_work, msecs_to_jiffies(AMDGPU_RESUME_MS)); + if (amdgpu_sriov_vf(adev)) + flush_delayed_work(&adev->delayed_init_work); + r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes); if (r) { dev_err(adev->dev, "Could not create amdgpu device attr\n"); @@ -3330,10 +3377,8 @@ void amdgpu_device_fini(struct amdgpu_device *adev) amdgpu_pm_sysfs_fini(adev); amdgpu_fbdev_fini(adev); r = amdgpu_device_ip_fini(adev); - if (adev->firmware.gpu_info_fw) { - release_firmware(adev->firmware.gpu_info_fw); - adev->firmware.gpu_info_fw = NULL; - } + release_firmware(adev->firmware.gpu_info_fw); + adev->firmware.gpu_info_fw = NULL; adev->accel_working = false; /* free i2c buses */ if (!amdgpu_device_has_dc_support(adev)) @@ -3365,7 +3410,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev) sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes); if (IS_ENABLED(CONFIG_PERF_EVENTS)) amdgpu_pmu_fini(adev); - if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) + if (adev->discovery_bin) amdgpu_discovery_fini(adev); } @@ -3377,7 +3422,6 @@ void amdgpu_device_fini(struct amdgpu_device *adev) * amdgpu_device_suspend - initiate device suspend * * @dev: drm dev pointer - * @suspend: suspend state * @fbcon : notify the fbdev of suspend * * Puts the hw in the suspend state (all asics). @@ -3474,7 +3518,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) * amdgpu_device_resume - initiate device resume * * @dev: drm dev pointer - * @resume: resume state * @fbcon : notify the fbdev of resume * * Bring the hw back to operating state (all asics). @@ -3905,6 +3948,7 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev) case CHIP_NAVI10: case CHIP_NAVI14: case CHIP_NAVI12: + case CHIP_SIENNA_CICHLID: break; default: goto disabled; @@ -4209,18 +4253,19 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, struct amdgpu_hive_info *hive = NULL; struct amdgpu_device *tmp_adev = NULL; int i, r = 0; - bool in_ras_intr = amdgpu_ras_intr_triggered(); - bool use_baco = - (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ? - true : false; + bool need_emergency_restart = false; bool audio_suspended = false; + /** + * Special case: RAS triggered and full reset isn't supported + */ + need_emergency_restart = amdgpu_ras_need_emergency_restart(adev); + /* * Flush RAM to disk so that after reboot * the user can read log and see why the system rebooted. */ - if (in_ras_intr && !use_baco && amdgpu_ras_get_context(adev)->reboot) { - + if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) { DRM_WARN("Emergency reboot."); ksys_sync_helper(); @@ -4228,7 +4273,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, } dev_info(adev->dev, "GPU %s begin!\n", - (in_ras_intr && !use_baco) ? "jobs stop":"reset"); + need_emergency_restart ? "jobs stop":"reset"); /* * Here we trylock to avoid chain of resets executing from @@ -4300,7 +4345,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, amdgpu_fbdev_set_suspend(tmp_adev, 1); /* disable ras on ALL IPs */ - if (!(in_ras_intr && !use_baco) && + if (!need_emergency_restart && amdgpu_device_ip_need_full_reset(tmp_adev)) amdgpu_ras_suspend(tmp_adev); @@ -4312,12 +4357,12 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, drm_sched_stop(&ring->sched, job ? &job->base : NULL); - if (in_ras_intr && !use_baco) + if (need_emergency_restart) amdgpu_job_stop_all_jobs_on_sched(&ring->sched); } } - if (in_ras_intr && !use_baco) + if (need_emergency_restart) goto skip_sched_resume; /* @@ -4394,7 +4439,7 @@ skip_hw_reset: skip_sched_resume: list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { /*unlock kfd: SRIOV would do it separately */ - if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev)) + if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev)) amdgpu_amdkfd_post_reset(tmp_adev); if (audio_suspended) amdgpu_device_resume_display_audio(tmp_adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index b5d6274952a5..a50ff2306504 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -32,7 +32,7 @@ #define mmMM_DATA 0x1 #define HW_ID_MAX 300 -const char *hw_id_names[HW_ID_MAX] = { +static const char *hw_id_names[HW_ID_MAX] = { [MP1_HWID] = "MP1", [MP2_HWID] = "MP2", [THM_HWID] = "THM", @@ -133,7 +133,7 @@ static int hw_id_map[MAX_HWIP] = { static int amdgpu_discovery_read_binary(struct amdgpu_device *adev, uint8_t *binary) { uint64_t vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20; - uint64_t pos = vram_size - adev->discovery_tmr_size; + uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET; amdgpu_device_vram_access(adev, pos, (uint32_t *)binary, adev->discovery_tmr_size, false); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h index d50d597c45ed..8f6183801cb3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h @@ -24,7 +24,8 @@ #ifndef __AMDGPU_DISCOVERY__ #define __AMDGPU_DISCOVERY__ -#define DISCOVERY_TMR_SIZE (64 << 10) +#define DISCOVERY_TMR_SIZE (4 << 10) +#define DISCOVERY_TMR_OFFSET (64 << 10) void amdgpu_discovery_fini(struct amdgpu_device *adev); int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index c56438a6c9f2..d76172965199 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -282,7 +282,7 @@ int amdgpu_display_crtc_set_config(struct drm_mode_set *set, ret = pm_runtime_get_sync(dev->dev); if (ret < 0) - return ret; + goto out; ret = drm_crtc_helper_set_config(set, ctx); @@ -297,7 +297,7 @@ int amdgpu_display_crtc_set_config(struct drm_mode_set *set, take the current one */ if (active && !adev->have_disp_power_ref) { adev->have_disp_power_ref = true; - return ret; + goto out; } /* if we have no active crtcs, then drop the power ref we got before */ @@ -306,6 +306,7 @@ int amdgpu_display_crtc_set_config(struct drm_mode_set *set, adev->have_disp_power_ref = false; } +out: /* drop the power reference we got coming in here */ pm_runtime_put_autosuspend(dev->dev); return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h index 3fa18003d4d6..89e6ad30396f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h @@ -53,6 +53,7 @@ struct amdgpu_doorbell_index { uint32_t gfx_ring0; uint32_t gfx_ring1; uint32_t sdma_engine[8]; + uint32_t mes_ring; uint32_t ih; union { struct { @@ -177,9 +178,12 @@ typedef enum _AMDGPU_NAVI10_DOORBELL_ASSIGNMENT AMDGPU_NAVI10_DOORBELL_USERQUEUE_END = 0x08A, AMDGPU_NAVI10_DOORBELL_GFX_RING0 = 0x08B, AMDGPU_NAVI10_DOORBELL_GFX_RING1 = 0x08C, + AMDGPU_NAVI10_DOORBELL_MES_RING = 0x090, /* SDMA:256~335*/ AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0 = 0x100, AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE1 = 0x10A, + AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE2 = 0x114, + AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE3 = 0x11E, /* IH: 376~391 */ AMDGPU_NAVI10_DOORBELL_IH = 0x178, /* MMSCH: 392~407 @@ -191,8 +195,13 @@ typedef enum _AMDGPU_NAVI10_DOORBELL_ASSIGNMENT AMDGPU_NAVI10_DOORBELL64_VCN4_5 = 0x18A, AMDGPU_NAVI10_DOORBELL64_VCN6_7 = 0x18B, + AMDGPU_NAVI10_DOORBELL64_VCN8_9 = 0x18C, + AMDGPU_NAVI10_DOORBELL64_VCNa_b = 0x18D, + AMDGPU_NAVI10_DOORBELL64_VCNc_d = 0x18E, + AMDGPU_NAVI10_DOORBELL64_VCNe_f = 0x18F, + AMDGPU_NAVI10_DOORBELL64_FIRST_NON_CP = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0, - AMDGPU_NAVI10_DOORBELL64_LAST_NON_CP = AMDGPU_NAVI10_DOORBELL64_VCN6_7, + AMDGPU_NAVI10_DOORBELL64_LAST_NON_CP = AMDGPU_NAVI10_DOORBELL64_VCNe_f, AMDGPU_NAVI10_DOORBELL_MAX_ASSIGNMENT = 0x18F, AMDGPU_NAVI10_DOORBELL_INVALID = 0xFFFF diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c index d2a105e3bf7c..2082c0acd216 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c @@ -911,8 +911,7 @@ int amdgpu_dpm_get_sclk(struct amdgpu_device *adev, bool low) if (is_support_sw_smu(adev)) { ret = smu_get_dpm_freq_range(&adev->smu, SMU_GFXCLK, low ? &clk_freq : NULL, - !low ? &clk_freq : NULL, - true); + !low ? &clk_freq : NULL); if (ret) return 0; return clk_freq * 100; @@ -929,8 +928,7 @@ int amdgpu_dpm_get_mclk(struct amdgpu_device *adev, bool low) if (is_support_sw_smu(adev)) { ret = smu_get_dpm_freq_range(&adev->smu, SMU_UCLK, low ? &clk_freq : NULL, - !low ? &clk_freq : NULL, - true); + !low ? &clk_freq : NULL); if (ret) return 0; return clk_freq * 100; @@ -1141,6 +1139,26 @@ int amdgpu_dpm_baco_reset(struct amdgpu_device *adev) return 0; } +bool amdgpu_dpm_is_mode1_reset_supported(struct amdgpu_device *adev) +{ + struct smu_context *smu = &adev->smu; + + if (is_support_sw_smu(adev)) + return smu_mode1_reset_is_support(smu); + + return false; +} + +int amdgpu_dpm_mode1_reset(struct amdgpu_device *adev) +{ + struct smu_context *smu = &adev->smu; + + if (is_support_sw_smu(adev)) + return smu_mode1_reset(smu); + + return -EOPNOTSUPP; +} + int amdgpu_dpm_switch_power_profile(struct amdgpu_device *adev, enum PP_SMC_POWER_PROFILE type, bool en) @@ -1162,7 +1180,7 @@ int amdgpu_dpm_set_xgmi_pstate(struct amdgpu_device *adev, { int ret = 0; - if (is_support_sw_smu_xgmi(adev)) + if (is_support_sw_smu(adev)) ret = smu_set_xgmi_pstate(&adev->smu, pstate); else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_xgmi_pstate) @@ -1197,4 +1215,4 @@ int amdgpu_dpm_allow_xgmi_power_down(struct amdgpu_device *adev, bool en) return smu_allow_xgmi_power_down(smu, en); return 0; -}
\ No newline at end of file +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h index 6a8aae70a0e6..aa27fe65cdfa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h @@ -425,6 +425,7 @@ struct amdgpu_pm { u32 default_sclk; u32 default_mclk; struct amdgpu_i2c_chan *i2c_bus; + bool bus_locked; /* internal thermal controller on rv6xx+ */ enum amdgpu_int_thermal_type int_thermal_type; struct device *int_hwmon_dev; @@ -529,6 +530,9 @@ int amdgpu_dpm_mode2_reset(struct amdgpu_device *adev); bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev); +bool amdgpu_dpm_is_mode1_reset_supported(struct amdgpu_device *adev); +int amdgpu_dpm_mode1_reset(struct amdgpu_device *adev); + int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev, enum pp_mp1_state mp1_state); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 5c4263335cba..81a79760ca61 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -87,9 +87,10 @@ * - 3.36.0 - Allow reading more status registers on si/cik * - 3.37.0 - L2 is invalidated before SDMA IBs, needed for correctness * - 3.38.0 - Add AMDGPU_IB_FLAG_EMIT_MEM_SYNC + * - 3.39.0 - DMABUF implicit sync does a full pipeline sync */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 38 +#define KMS_DRIVER_MINOR 39 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit = 0; @@ -148,6 +149,7 @@ int amdgpu_mes = 0; int amdgpu_noretry; int amdgpu_force_asic_type = -1; int amdgpu_tmz = 0; +int amdgpu_reset_method = -1; /* auto */ struct amdgpu_mgpu_info mgpu_info = { .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex), @@ -705,6 +707,14 @@ MODULE_PARM_DESC(hws_gws_support, "Assume MEC2 FW supports GWS barriers (false = int queue_preemption_timeout_ms = 9000; module_param(queue_preemption_timeout_ms, int, 0644); MODULE_PARM_DESC(queue_preemption_timeout_ms, "queue preemption timeout in ms (1 = Minimum, 9000 = default)"); + +/** + * DOC: debug_evictions(bool) + * Enable extra debug messages to help determine the cause of evictions + */ +bool debug_evictions; +module_param(debug_evictions, bool, 0644); +MODULE_PARM_DESC(debug_evictions, "enable eviction debug messages (false = default)"); #endif /** @@ -748,6 +758,13 @@ module_param_named(abmlevel, amdgpu_dm_abm_level, uint, 0444); MODULE_PARM_DESC(tmz, "Enable TMZ feature (-1 = auto, 0 = off (default), 1 = on)"); module_param_named(tmz, amdgpu_tmz, int, 0444); +/** + * DOC: reset_method (int) + * GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco) + */ +MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)"); +module_param_named(reset_method, amdgpu_reset_method, int, 0444); + static const struct pci_device_id pciidlist[] = { #ifdef CONFIG_DRM_AMDGPU_SI {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, @@ -1111,7 +1128,9 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, pci_set_drvdata(pdev, dev); - amdgpu_driver_load_kms(dev, ent->driver_data); + ret = amdgpu_driver_load_kms(dev, ent->driver_data); + if (ret) + goto err_pci; retry_init: ret = drm_dev_register(dev, ent->driver_data); @@ -1167,7 +1186,8 @@ amdgpu_pci_shutdown(struct pci_dev *pdev) * unfortunately we can't detect certain * hypervisors so just do this all the time. */ - adev->mp1_state = PP_MP1_STATE_UNLOAD; + if (!amdgpu_passthrough(adev)) + adev->mp1_state = PP_MP1_STATE_UNLOAD; amdgpu_device_ip_suspend(adev); adev->mp1_state = PP_MP1_STATE_NONE; } @@ -1373,11 +1393,12 @@ long amdgpu_drm_ioctl(struct file *filp, dev = file_priv->minor->dev; ret = pm_runtime_get_sync(dev->dev); if (ret < 0) - return ret; + goto out; ret = drm_ioctl(filp, cmd, arg); pm_runtime_mark_last_busy(dev->dev); +out: pm_runtime_put_autosuspend(dev->dev); return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index d878fe7fee51..58d4c219178a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -416,15 +416,16 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, ring->fence_drv.gpu_addr = adev->uvd.inst[ring->me].gpu_addr + index; } amdgpu_fence_write(ring, atomic_read(&ring->fence_drv.last_seq)); - amdgpu_irq_get(adev, irq_src, irq_type); + + if (irq_src) + amdgpu_irq_get(adev, irq_src, irq_type); ring->fence_drv.irq_src = irq_src; ring->fence_drv.irq_type = irq_type; ring->fence_drv.initialized = true; - DRM_DEV_DEBUG(adev->dev, "fence driver on ring %s use gpu addr " - "0x%016llx, cpu addr 0x%p\n", ring->name, - ring->fence_drv.gpu_addr, ring->fence_drv.cpu_addr); + DRM_DEV_DEBUG(adev->dev, "fence driver on ring %s use gpu addr 0x%016llx\n", + ring->name, ring->fence_drv.gpu_addr); return 0; } @@ -448,8 +449,7 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, if (!adev) return -EINVAL; - /* Check that num_hw_submission is a power of two */ - if ((num_hw_submission & (num_hw_submission - 1)) != 0) + if (!is_power_of_2(num_hw_submission)) return -EINVAL; ring->fence_drv.cpu_addr = NULL; @@ -467,8 +467,8 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, if (!ring->fence_drv.fences) return -ENOMEM; - /* No need to setup the GPU scheduler for KIQ ring */ - if (ring->funcs->type != AMDGPU_RING_TYPE_KIQ) { + /* No need to setup the GPU scheduler for rings that don't need it */ + if (!ring->no_scheduler) { switch (ring->funcs->type) { case AMDGPU_RING_TYPE_GFX: timeout = adev->gfx_timeout; @@ -537,9 +537,11 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev) /* no need to trigger GPU reset as we are unloading */ amdgpu_fence_driver_force_completion(ring); } - amdgpu_irq_put(adev, ring->fence_drv.irq_src, - ring->fence_drv.irq_type); - drm_sched_fini(&ring->sched); + if (ring->fence_drv.irq_src) + amdgpu_irq_put(adev, ring->fence_drv.irq_src, + ring->fence_drv.irq_type); + if (!ring->no_scheduler) + drm_sched_fini(&ring->sched); del_timer_sync(&ring->fence_drv.fallback_timer); for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j) dma_fence_put(ring->fence_drv.fences[j]); @@ -574,8 +576,9 @@ void amdgpu_fence_driver_suspend(struct amdgpu_device *adev) } /* disable the interrupt */ - amdgpu_irq_put(adev, ring->fence_drv.irq_src, - ring->fence_drv.irq_type); + if (ring->fence_drv.irq_src) + amdgpu_irq_put(adev, ring->fence_drv.irq_src, + ring->fence_drv.irq_type); } } @@ -601,8 +604,9 @@ void amdgpu_fence_driver_resume(struct amdgpu_device *adev) continue; /* enable the interrupt */ - amdgpu_irq_get(adev, ring->fence_drv.irq_src, - ring->fence_drv.irq_type); + if (ring->fence_drv.irq_src) + amdgpu_irq_get(adev, ring->fence_drv.irq_src, + ring->fence_drv.irq_type); } } @@ -749,8 +753,10 @@ static int amdgpu_debugfs_gpu_recover(struct seq_file *m, void *data) int r; r = pm_runtime_get_sync(dev->dev); - if (r < 0) + if (r < 0) { + pm_runtime_put_autosuspend(dev->dev); return 0; + } seq_printf(m, "gpu recover\n"); amdgpu_device_gpu_recover(adev, NULL); @@ -775,8 +781,10 @@ int amdgpu_debugfs_fence_init(struct amdgpu_device *adev) { #if defined(CONFIG_DEBUG_FS) if (amdgpu_sriov_vf(adev)) - return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list_sriov, 1); - return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list, 2); + return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list_sriov, + ARRAY_SIZE(amdgpu_debugfs_fence_list_sriov)); + return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list, + ARRAY_SIZE(amdgpu_debugfs_fence_list)); #else return 0; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c index 815c072ac4da..e811fecc540f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c @@ -26,12 +26,13 @@ #include "amdgpu_i2c.h" #include "smu_v11_0_i2c.h" #include "atom.h" +#include "amdgpu_fru_eeprom.h" #define I2C_PRODUCT_INFO_ADDR 0xAC #define I2C_PRODUCT_INFO_ADDR_SIZE 0x2 #define I2C_PRODUCT_INFO_OFFSET 0xC0 -bool is_fru_eeprom_supported(struct amdgpu_device *adev) +static bool is_fru_eeprom_supported(struct amdgpu_device *adev) { /* TODO: Gaming SKUs don't have the FRU EEPROM. * Use this hack to address hangs on modprobe on gaming SKUs @@ -47,7 +48,7 @@ bool is_fru_eeprom_supported(struct amdgpu_device *adev) return false; } -int amdgpu_fru_read_eeprom(struct amdgpu_device *adev, uint32_t addrptr, +static int amdgpu_fru_read_eeprom(struct amdgpu_device *adev, uint32_t addrptr, unsigned char *buff) { int ret, size; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h index 968115c97e33..f29a8611d69b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h @@ -21,8 +21,8 @@ * */ -#ifndef __AMDGPU_PRODINFO_H__ -#define __AMDGPU_PRODINFO_H__ +#ifndef __AMDGPU_FRU_EEPROM_H__ +#define __AMDGPU_FRU_EEPROM_H__ int amdgpu_fru_get_product_info(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index b2dc320e7305..6b1eb9d045ae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -930,7 +930,8 @@ static const struct drm_info_list amdgpu_debugfs_gem_list[] = { int amdgpu_debugfs_gem_init(struct amdgpu_device *adev) { #if defined(CONFIG_DEBUG_FS) - return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_gem_list, 1); + return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_gem_list, + ARRAY_SIZE(amdgpu_debugfs_gem_list)); #endif return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index d612033a23ac..78d37f92c7be 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -578,6 +578,20 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) mutex_unlock(&adev->gfx.gfx_off_mutex); } +int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value) +{ + + int r = 0; + + mutex_lock(&adev->gfx.gfx_off_mutex); + + r = smu_get_status_gfxoff(adev, value); + + mutex_unlock(&adev->gfx.gfx_off_mutex); + + return r; +} + int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev) { int r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index d43c11671a38..1e7a2b0997c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -134,6 +134,7 @@ struct gb_addr_config { uint8_t num_banks; uint8_t num_se; uint8_t num_rb_per_se; + uint8_t num_pkrs; }; struct amdgpu_gfx_config { @@ -377,6 +378,7 @@ void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit, bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, int me, int pipe, int queue); void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable); +int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value); int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev); void amdgpu_gfx_ras_fini(struct amdgpu_device *adev); int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index acabb57aa8af..34cbd6f6a56b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -357,6 +357,9 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) ring = adev->rings[i]; vmhub = ring->funcs->vmhub; + if (ring == &adev->mes.ring) + continue; + inv_eng = ffs(vm_inv_engs[vmhub]); if (!inv_eng) { dev_err(adev->dev, "no VM inv eng for ring %s\n", diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 2bd9423c1dab..acdb61cfa24c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -83,6 +83,15 @@ struct amdgpu_vmhub { uint32_t vm_context0_cntl; uint32_t vm_l2_pro_fault_status; uint32_t vm_l2_pro_fault_cntl; + + /* + * store the register distances between two continuous context domain + * and invalidation engine. + */ + uint32_t ctx_distance; + uint32_t ctx_addr_distance; /* include LO32/HI32 */ + uint32_t eng_distance; + uint32_t eng_addr_distance; /* include LO32/HI32 */ }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index bab8feed46da..697bc2c6fdb2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c @@ -232,8 +232,6 @@ err_out: * amdgpu_gtt_mgr_del - free ranges * * @man: TTM memory type manager - * @tbo: TTM BO we need this range for - * @place: placement flags and restrictions * @mem: TTM memory object * * Free the allocated GTT again. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index b91853fd66d3..dcd492170598 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -178,7 +178,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, need_ctx_switch = ring->current_ctx != fence_ctx; if (ring->funcs->emit_pipeline_sync && job && - ((tmp = amdgpu_sync_get_fence(&job->sched_sync, NULL)) || + ((tmp = amdgpu_sync_get_fence(&job->sched_sync)) || (amdgpu_sriov_vf(adev) && need_ctx_switch) || amdgpu_vm_need_pipeline_sync(ring, job))) { need_pipe_sync = true; @@ -468,7 +468,8 @@ static const struct drm_info_list amdgpu_debugfs_sa_list[] = { int amdgpu_debugfs_sa_init(struct amdgpu_device *adev) { #if defined(CONFIG_DEBUG_FS) - return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_sa_list, 1); + return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_sa_list, + ARRAY_SIZE(amdgpu_debugfs_sa_list)); #else return 0; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index fe92dcd94d4a..7521f4ab55de 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -206,7 +206,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm, int r; if (ring->vmid_wait && !dma_fence_is_signaled(ring->vmid_wait)) - return amdgpu_sync_fence(sync, ring->vmid_wait, false); + return amdgpu_sync_fence(sync, ring->vmid_wait); fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL); if (!fences) @@ -241,7 +241,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm, return -ENOMEM; } - r = amdgpu_sync_fence(sync, &array->base, false); + r = amdgpu_sync_fence(sync, &array->base); dma_fence_put(ring->vmid_wait); ring->vmid_wait = &array->base; return r; @@ -294,7 +294,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, tmp = amdgpu_sync_peek_fence(&(*id)->active, ring); if (tmp) { *id = NULL; - r = amdgpu_sync_fence(sync, tmp, false); + r = amdgpu_sync_fence(sync, tmp); return r; } needs_flush = true; @@ -303,7 +303,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, /* Good we can use this VMID. Remember this submission as * user of the VMID. */ - r = amdgpu_sync_fence(&(*id)->active, fence, false); + r = amdgpu_sync_fence(&(*id)->active, fence); if (r) return r; @@ -375,7 +375,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, /* Good, we can use this VMID. Remember this submission as * user of the VMID. */ - r = amdgpu_sync_fence(&(*id)->active, fence, false); + r = amdgpu_sync_fence(&(*id)->active, fence); if (r) return r; @@ -435,7 +435,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, id = idle; /* Remember this submission as user of the VMID */ - r = amdgpu_sync_fence(&id->active, fence, false); + r = amdgpu_sync_fence(&id->active, fence); if (r) goto error; @@ -574,6 +574,9 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev) INIT_LIST_HEAD(&id_mgr->ids_lru); atomic_set(&id_mgr->reserved_vmid_num, 0); + /* manage only VMIDs not used by KFD */ + id_mgr->num_ids = adev->vm_manager.first_kfd_vmid; + /* skip over VMID 0, since it is the system VM */ for (j = 1; j < id_mgr->num_ids; ++j) { amdgpu_vmid_reset(adev, i, j); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 47207188c569..937029ad5271 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -37,7 +37,8 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job) memset(&ti, 0, sizeof(struct amdgpu_task_info)); - if (amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) { + if (amdgpu_gpu_recovery && + amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) { DRM_ERROR("ring %s timeout, but soft recovered\n", s_job->sched->name); return; @@ -183,16 +184,13 @@ static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job, struct amdgpu_job *job = to_amdgpu_job(sched_job); struct amdgpu_vm *vm = job->vm; struct dma_fence *fence; - bool explicit = false; int r; - fence = amdgpu_sync_get_fence(&job->sync, &explicit); - if (fence && explicit) { - if (drm_sched_dependency_optimized(fence, s_entity)) { - r = amdgpu_sync_fence(&job->sched_sync, fence, false); - if (r) - DRM_ERROR("Error adding fence (%d)\n", r); - } + fence = amdgpu_sync_get_fence(&job->sync); + if (fence && drm_sched_dependency_optimized(fence, s_entity)) { + r = amdgpu_sync_fence(&job->sched_sync, fence); + if (r) + DRM_ERROR("Error adding fence (%d)\n", r); } while (fence == NULL && vm && !job->vmid) { @@ -202,7 +200,7 @@ static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job, if (r) DRM_ERROR("Error getting VM ID (%d)\n", r); - fence = amdgpu_sync_get_fence(&job->sync, NULL); + fence = amdgpu_sync_get_fence(&job->sync); } return fence; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c index d31d65e6b039..8996cb4ed57a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c @@ -37,6 +37,8 @@ static void amdgpu_jpeg_idle_work_handler(struct work_struct *work); int amdgpu_jpeg_sw_init(struct amdgpu_device *adev) { INIT_DELAYED_WORK(&adev->jpeg.idle_work, amdgpu_jpeg_idle_work_handler); + mutex_init(&adev->jpeg.jpeg_pg_lock); + atomic_set(&adev->jpeg.total_submission_cnt, 0); return 0; } @@ -54,6 +56,8 @@ int amdgpu_jpeg_sw_fini(struct amdgpu_device *adev) amdgpu_ring_fini(&adev->jpeg.inst[i].ring_dec); } + mutex_destroy(&adev->jpeg.jpeg_pg_lock); + return 0; } @@ -83,7 +87,7 @@ static void amdgpu_jpeg_idle_work_handler(struct work_struct *work) fences += amdgpu_fence_count_emitted(&adev->jpeg.inst[i].ring_dec); } - if (fences == 0) + if (!fences && !atomic_read(&adev->jpeg.total_submission_cnt)) amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_JPEG, AMD_PG_STATE_GATE); else @@ -93,15 +97,19 @@ static void amdgpu_jpeg_idle_work_handler(struct work_struct *work) void amdgpu_jpeg_ring_begin_use(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - bool set_clocks = !cancel_delayed_work_sync(&adev->jpeg.idle_work); - if (set_clocks) - amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_JPEG, + atomic_inc(&adev->jpeg.total_submission_cnt); + cancel_delayed_work_sync(&adev->jpeg.idle_work); + + mutex_lock(&adev->jpeg.jpeg_pg_lock); + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_JPEG, AMD_PG_STATE_UNGATE); + mutex_unlock(&adev->jpeg.jpeg_pg_lock); } void amdgpu_jpeg_ring_end_use(struct amdgpu_ring *ring) { + atomic_dec(&ring->adev->jpeg.total_submission_cnt); schedule_delayed_work(&ring->adev->jpeg.idle_work, JPEG_IDLE_TIMEOUT); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h index 5131a0a1bc8a..55fbff2be761 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h @@ -46,6 +46,8 @@ struct amdgpu_jpeg { unsigned harvest_config; struct delayed_work idle_work; enum amd_powergating_state cur_state; + struct mutex jpeg_pg_lock; + atomic_t total_submission_cnt; }; int amdgpu_jpeg_sw_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index fff9c013f337..55ff071217a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -167,18 +167,33 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags) } if (amdgpu_device_supports_boco(dev) && - (amdgpu_runtime_pm != 0)) /* enable runpm by default for boco */ - adev->runpm = true; - else if (amdgpu_device_supports_baco(dev) && - (amdgpu_runtime_pm != 0) && - (adev->asic_type >= CHIP_TOPAZ) && - (adev->asic_type != CHIP_VEGA10) && - (adev->asic_type != CHIP_VEGA20) && - (adev->asic_type != CHIP_ARCTURUS)) /* enable runpm on VI+ */ - adev->runpm = true; - else if (amdgpu_device_supports_baco(dev) && - (amdgpu_runtime_pm > 0)) /* enable runpm if runpm=1 on CI */ + (amdgpu_runtime_pm != 0)) { /* enable runpm by default for boco */ adev->runpm = true; + } else if (amdgpu_device_supports_baco(dev) && + (amdgpu_runtime_pm != 0)) { + switch (adev->asic_type) { +#ifdef CONFIG_DRM_AMDGPU_CIK + case CHIP_BONAIRE: + case CHIP_HAWAII: +#endif + case CHIP_VEGA20: + case CHIP_ARCTURUS: + case CHIP_SIENNA_CICHLID: + /* enable runpm if runpm=1 */ + if (amdgpu_runtime_pm > 0) + adev->runpm = true; + break; + case CHIP_VEGA10: + /* turn runpm on if noretry=0 */ + if (!amdgpu_noretry) + adev->runpm = true; + break; + default: + /* enable runpm on VI+ */ + adev->runpm = true; + break; + } + } /* Call ACPI methods: require modeset init * but failure is not fatal @@ -695,9 +710,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file return n ? -EFAULT : 0; } case AMDGPU_INFO_DEV_INFO: { - struct drm_amdgpu_info_device dev_info = {}; + struct drm_amdgpu_info_device dev_info; uint64_t vm_size; + memset(&dev_info, 0, sizeof(dev_info)); dev_info.device_id = dev->pdev->device; dev_info.chip_rev = adev->rev_id; dev_info.external_rev = adev->external_rev_id; @@ -994,7 +1010,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) r = pm_runtime_get_sync(dev->dev); if (r < 0) - return r; + goto pm_put; fpriv = kzalloc(sizeof(*fpriv), GFP_KERNEL); if (unlikely(!fpriv)) { @@ -1045,6 +1061,7 @@ error_pasid: out_suspend: pm_runtime_mark_last_busy(dev->dev); +pm_put: pm_runtime_put_autosuspend(dev->dev); return r; @@ -1343,8 +1360,7 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data) fw_info.feature, fw_info.ver); /* MEC2 */ - if (adev->asic_type == CHIP_KAVERI || - (adev->asic_type > CHIP_TOPAZ && adev->asic_type != CHIP_STONEY)) { + if (adev->gfx.mec2_fw) { query_fw.index = 1; ret = amdgpu_firmware_info(&fw_info, &query_fw, adev); if (ret) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 78fe49033543..7334982ea702 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -24,10 +24,32 @@ #ifndef __AMDGPU_MES_H__ #define __AMDGPU_MES_H__ +#define AMDGPU_MES_MAX_COMPUTE_PIPES 8 +#define AMDGPU_MES_MAX_GFX_PIPES 2 +#define AMDGPU_MES_MAX_SDMA_PIPES 2 + +enum amdgpu_mes_priority_level { + AMDGPU_MES_PRIORITY_LEVEL_LOW = 0, + AMDGPU_MES_PRIORITY_LEVEL_NORMAL = 1, + AMDGPU_MES_PRIORITY_LEVEL_MEDIUM = 2, + AMDGPU_MES_PRIORITY_LEVEL_HIGH = 3, + AMDGPU_MES_PRIORITY_LEVEL_REALTIME = 4, + AMDGPU_MES_PRIORITY_NUM_LEVELS +}; + struct amdgpu_mes_funcs; struct amdgpu_mes { - struct amdgpu_adev *adev; + struct amdgpu_device *adev; + + uint32_t total_max_queue; + uint32_t doorbell_id_offset; + uint32_t max_doorbell_slices; + + uint64_t default_process_quantum; + uint64_t default_gang_quantum; + + struct amdgpu_ring ring; const struct firmware *fw; @@ -45,8 +67,27 @@ struct amdgpu_mes { uint32_t data_fw_version; uint64_t data_start_addr; + /* eop gpu obj */ + struct amdgpu_bo *eop_gpu_obj; + uint64_t eop_gpu_addr; + + void *mqd_backup; + + uint32_t vmid_mask_gfxhub; + uint32_t vmid_mask_mmhub; + uint32_t compute_hqd_mask[AMDGPU_MES_MAX_COMPUTE_PIPES]; + uint32_t gfx_hqd_mask[AMDGPU_MES_MAX_GFX_PIPES]; + uint32_t sdma_hqd_mask[AMDGPU_MES_MAX_SDMA_PIPES]; + uint32_t agreegated_doorbells[AMDGPU_MES_PRIORITY_NUM_LEVELS]; + uint32_t sch_ctx_offs; + uint64_t sch_ctx_gpu_addr; + uint64_t *sch_ctx_ptr; + uint32_t query_status_fence_offs; + uint64_t query_status_fence_gpu_addr; + uint64_t *query_status_fence_ptr; + /* ip specific functions */ - struct amdgpu_mes_funcs *funcs; + const struct amdgpu_mes_funcs *funcs; }; struct mes_add_queue_input { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 16596a9ccabe..e4dbf14320b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -167,8 +167,10 @@ static ssize_t amdgpu_get_power_dpm_state(struct device *dev, return -EPERM; ret = pm_runtime_get_sync(ddev->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_autosuspend(ddev->dev); return ret; + } if (is_support_sw_smu(adev)) { if (adev->smu.ppt_funcs->get_current_power_state) @@ -212,8 +214,10 @@ static ssize_t amdgpu_set_power_dpm_state(struct device *dev, return -EINVAL; ret = pm_runtime_get_sync(ddev->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_autosuspend(ddev->dev); return ret; + } if (is_support_sw_smu(adev)) { mutex_lock(&adev->pm.mutex); @@ -307,8 +311,10 @@ static ssize_t amdgpu_get_power_dpm_force_performance_level(struct device *dev, return -EPERM; ret = pm_runtime_get_sync(ddev->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_autosuspend(ddev->dev); return ret; + } if (is_support_sw_smu(adev)) level = smu_get_performance_level(&adev->smu); @@ -369,8 +375,10 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev, } ret = pm_runtime_get_sync(ddev->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_autosuspend(ddev->dev); return ret; + } if (is_support_sw_smu(adev)) current_level = smu_get_performance_level(&adev->smu); @@ -449,8 +457,10 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev, return -EPERM; ret = pm_runtime_get_sync(ddev->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_autosuspend(ddev->dev); return ret; + } if (is_support_sw_smu(adev)) { ret = smu_get_power_num_states(&adev->smu, &data); @@ -491,8 +501,10 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev, return -EPERM; ret = pm_runtime_get_sync(ddev->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_autosuspend(ddev->dev); return ret; + } if (is_support_sw_smu(adev)) { pm = smu_get_current_power_state(smu); @@ -567,8 +579,10 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev, state = data.states[idx]; ret = pm_runtime_get_sync(ddev->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_autosuspend(ddev->dev); return ret; + } /* only set user selected power states */ if (state != POWER_STATE_TYPE_INTERNAL_BOOT && @@ -608,8 +622,10 @@ static ssize_t amdgpu_get_pp_table(struct device *dev, return -EPERM; ret = pm_runtime_get_sync(ddev->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_autosuspend(ddev->dev); return ret; + } if (is_support_sw_smu(adev)) { size = smu_sys_get_pp_table(&adev->smu, (void **)&table); @@ -650,8 +666,10 @@ static ssize_t amdgpu_set_pp_table(struct device *dev, return -EPERM; ret = pm_runtime_get_sync(ddev->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_autosuspend(ddev->dev); return ret; + } if (is_support_sw_smu(adev)) { ret = smu_sys_set_pp_table(&adev->smu, (void *)buf, count); @@ -778,8 +796,7 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, tmp_str++; while (isspace(*++tmp_str)); - while (tmp_str[0]) { - sub_str = strsep(&tmp_str, delimiter); + while ((sub_str = strsep(&tmp_str, delimiter)) != NULL) { ret = kstrtol(sub_str, 0, ¶meter[parameter_size]); if (ret) return -EINVAL; @@ -790,8 +807,10 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, } ret = pm_runtime_get_sync(ddev->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_autosuspend(ddev->dev); return ret; + } if (is_support_sw_smu(adev)) { ret = smu_od_edit_dpm_table(&adev->smu, type, @@ -847,8 +866,10 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev, return -EPERM; ret = pm_runtime_get_sync(ddev->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_autosuspend(ddev->dev); return ret; + } if (is_support_sw_smu(adev)) { size = smu_print_clk_levels(&adev->smu, SMU_OD_SCLK, buf); @@ -905,8 +926,10 @@ static ssize_t amdgpu_set_pp_features(struct device *dev, pr_debug("featuremask = 0x%llx\n", featuremask); ret = pm_runtime_get_sync(ddev->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_autosuspend(ddev->dev); return ret; + } if (is_support_sw_smu(adev)) { ret = smu_sys_set_pp_feature_mask(&adev->smu, featuremask); @@ -942,8 +965,10 @@ static ssize_t amdgpu_get_pp_features(struct device *dev, return -EPERM; ret = pm_runtime_get_sync(ddev->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_autosuspend(ddev->dev); return ret; + } if (is_support_sw_smu(adev)) size = smu_sys_get_pp_feature_mask(&adev->smu, buf); @@ -1001,8 +1026,10 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev, return -EPERM; ret = pm_runtime_get_sync(ddev->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_autosuspend(ddev->dev); return ret; + } if (is_support_sw_smu(adev)) size = smu_print_clk_levels(&adev->smu, SMU_SCLK, buf); @@ -1039,8 +1066,7 @@ static ssize_t amdgpu_read_mask(const char *buf, size_t count, uint32_t *mask) memcpy(buf_cpy, buf, bytes); buf_cpy[bytes] = '\0'; tmp = buf_cpy; - while (tmp[0]) { - sub_str = strsep(&tmp, delimiter); + while ((sub_str = strsep(&tmp, delimiter)) != NULL) { if (strlen(sub_str)) { ret = kstrtol(sub_str, 0, &level); if (ret) @@ -1071,11 +1097,13 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev, return ret; ret = pm_runtime_get_sync(ddev->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_autosuspend(ddev->dev); return ret; + } if (is_support_sw_smu(adev)) - ret = smu_force_clk_levels(&adev->smu, SMU_SCLK, mask, true); + ret = smu_force_clk_levels(&adev->smu, SMU_SCLK, mask); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask); @@ -1101,8 +1129,10 @@ static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev, return -EPERM; ret = pm_runtime_get_sync(ddev->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_autosuspend(ddev->dev); return ret; + } if (is_support_sw_smu(adev)) size = smu_print_clk_levels(&adev->smu, SMU_MCLK, buf); @@ -1135,11 +1165,13 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev, return ret; ret = pm_runtime_get_sync(ddev->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_autosuspend(ddev->dev); return ret; + } if (is_support_sw_smu(adev)) - ret = smu_force_clk_levels(&adev->smu, SMU_MCLK, mask, true); + ret = smu_force_clk_levels(&adev->smu, SMU_MCLK, mask); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask); @@ -1165,8 +1197,10 @@ static ssize_t amdgpu_get_pp_dpm_socclk(struct device *dev, return -EPERM; ret = pm_runtime_get_sync(ddev->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_autosuspend(ddev->dev); return ret; + } if (is_support_sw_smu(adev)) size = smu_print_clk_levels(&adev->smu, SMU_SOCCLK, buf); @@ -1199,11 +1233,13 @@ static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev, return ret; ret = pm_runtime_get_sync(ddev->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_autosuspend(ddev->dev); return ret; + } if (is_support_sw_smu(adev)) - ret = smu_force_clk_levels(&adev->smu, SMU_SOCCLK, mask, true); + ret = smu_force_clk_levels(&adev->smu, SMU_SOCCLK, mask); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_SOCCLK, mask); else @@ -1231,8 +1267,10 @@ static ssize_t amdgpu_get_pp_dpm_fclk(struct device *dev, return -EPERM; ret = pm_runtime_get_sync(ddev->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_autosuspend(ddev->dev); return ret; + } if (is_support_sw_smu(adev)) size = smu_print_clk_levels(&adev->smu, SMU_FCLK, buf); @@ -1265,11 +1303,13 @@ static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev, return ret; ret = pm_runtime_get_sync(ddev->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_autosuspend(ddev->dev); return ret; + } if (is_support_sw_smu(adev)) - ret = smu_force_clk_levels(&adev->smu, SMU_FCLK, mask, true); + ret = smu_force_clk_levels(&adev->smu, SMU_FCLK, mask); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_FCLK, mask); else @@ -1297,8 +1337,10 @@ static ssize_t amdgpu_get_pp_dpm_dcefclk(struct device *dev, return -EPERM; ret = pm_runtime_get_sync(ddev->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_autosuspend(ddev->dev); return ret; + } if (is_support_sw_smu(adev)) size = smu_print_clk_levels(&adev->smu, SMU_DCEFCLK, buf); @@ -1331,11 +1373,13 @@ static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev, return ret; ret = pm_runtime_get_sync(ddev->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_autosuspend(ddev->dev); return ret; + } if (is_support_sw_smu(adev)) - ret = smu_force_clk_levels(&adev->smu, SMU_DCEFCLK, mask, true); + ret = smu_force_clk_levels(&adev->smu, SMU_DCEFCLK, mask); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_DCEFCLK, mask); else @@ -1363,8 +1407,10 @@ static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev, return -EPERM; ret = pm_runtime_get_sync(ddev->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_autosuspend(ddev->dev); return ret; + } if (is_support_sw_smu(adev)) size = smu_print_clk_levels(&adev->smu, SMU_PCIE, buf); @@ -1397,11 +1443,13 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev, return ret; ret = pm_runtime_get_sync(ddev->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_autosuspend(ddev->dev); return ret; + } if (is_support_sw_smu(adev)) - ret = smu_force_clk_levels(&adev->smu, SMU_PCIE, mask, true); + ret = smu_force_clk_levels(&adev->smu, SMU_PCIE, mask); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask); else @@ -1429,8 +1477,10 @@ static ssize_t amdgpu_get_pp_sclk_od(struct device *dev, return -EPERM; ret = pm_runtime_get_sync(ddev->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_autosuspend(ddev->dev); return ret; + } if (is_support_sw_smu(adev)) value = smu_get_od_percentage(&(adev->smu), SMU_OD_SCLK); @@ -1462,8 +1512,10 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev, return -EINVAL; ret = pm_runtime_get_sync(ddev->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_autosuspend(ddev->dev); return ret; + } if (is_support_sw_smu(adev)) { value = smu_set_od_percentage(&(adev->smu), SMU_OD_SCLK, (uint32_t)value); @@ -1498,8 +1550,10 @@ static ssize_t amdgpu_get_pp_mclk_od(struct device *dev, return -EPERM; ret = pm_runtime_get_sync(ddev->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_autosuspend(ddev->dev); return ret; + } if (is_support_sw_smu(adev)) value = smu_get_od_percentage(&(adev->smu), SMU_OD_MCLK); @@ -1531,8 +1585,10 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev, return -EINVAL; ret = pm_runtime_get_sync(ddev->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_autosuspend(ddev->dev); return ret; + } if (is_support_sw_smu(adev)) { value = smu_set_od_percentage(&(adev->smu), SMU_OD_MCLK, (uint32_t)value); @@ -1587,8 +1643,10 @@ static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev, return -EPERM; ret = pm_runtime_get_sync(ddev->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_autosuspend(ddev->dev); return ret; + } if (is_support_sw_smu(adev)) size = smu_get_power_profile_mode(&adev->smu, buf); @@ -1609,7 +1667,7 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev, const char *buf, size_t count) { - int ret = 0xff; + int ret; struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; uint32_t parameter_size = 0; @@ -1637,8 +1695,7 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev, i++; memcpy(buf_cpy, buf, count-i); tmp_str = buf_cpy; - while (tmp_str[0]) { - sub_str = strsep(&tmp_str, delimiter); + while ((sub_str = strsep(&tmp_str, delimiter)) != NULL) { ret = kstrtol(sub_str, 0, ¶meter[parameter_size]); if (ret) return -EINVAL; @@ -1650,8 +1707,10 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev, parameter[parameter_size] = profile_mode; ret = pm_runtime_get_sync(ddev->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_autosuspend(ddev->dev); return ret; + } if (is_support_sw_smu(adev)) ret = smu_set_power_profile_mode(&adev->smu, parameter, parameter_size, true); @@ -1687,8 +1746,10 @@ static ssize_t amdgpu_get_gpu_busy_percent(struct device *dev, return -EPERM; r = pm_runtime_get_sync(ddev->dev); - if (r < 0) + if (r < 0) { + pm_runtime_put_autosuspend(ddev->dev); return r; + } /* read the IP busy sensor */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD, @@ -1723,8 +1784,10 @@ static ssize_t amdgpu_get_mem_busy_percent(struct device *dev, return -EPERM; r = pm_runtime_get_sync(ddev->dev); - if (r < 0) + if (r < 0) { + pm_runtime_put_autosuspend(ddev->dev); return r; + } /* read the IP busy sensor */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD, @@ -1770,8 +1833,10 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev, return -ENODATA; ret = pm_runtime_get_sync(ddev->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_autosuspend(ddev->dev); return ret; + } amdgpu_asic_get_pcie_usage(adev, &count0, &count1); @@ -1808,9 +1873,76 @@ static ssize_t amdgpu_get_unique_id(struct device *dev, return 0; } +/** + * DOC: thermal_throttling_logging + * + * Thermal throttling pulls down the clock frequency and thus the performance. + * It's an useful mechanism to protect the chip from overheating. Since it + * impacts performance, the user controls whether it is enabled and if so, + * the log frequency. + * + * Reading back the file shows you the status(enabled or disabled) and + * the interval(in seconds) between each thermal logging. + * + * Writing an integer to the file, sets a new logging interval, in seconds. + * The value should be between 1 and 3600. If the value is less than 1, + * thermal logging is disabled. Values greater than 3600 are ignored. + */ +static ssize_t amdgpu_get_thermal_throttling_logging(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + + return snprintf(buf, PAGE_SIZE, "%s: thermal throttling logging %s, with interval %d seconds\n", + adev->ddev->unique, + atomic_read(&adev->throttling_logging_enabled) ? "enabled" : "disabled", + adev->throttling_logging_rs.interval / HZ + 1); +} + +static ssize_t amdgpu_set_thermal_throttling_logging(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + long throttling_logging_interval; + unsigned long flags; + int ret = 0; + + ret = kstrtol(buf, 0, &throttling_logging_interval); + if (ret) + return ret; + + if (throttling_logging_interval > 3600) + return -EINVAL; + + if (throttling_logging_interval > 0) { + raw_spin_lock_irqsave(&adev->throttling_logging_rs.lock, flags); + /* + * Reset the ratelimit timer internals. + * This can effectively restart the timer. + */ + adev->throttling_logging_rs.interval = + (throttling_logging_interval - 1) * HZ; + adev->throttling_logging_rs.begin = 0; + adev->throttling_logging_rs.printed = 0; + adev->throttling_logging_rs.missed = 0; + raw_spin_unlock_irqrestore(&adev->throttling_logging_rs.lock, flags); + + atomic_set(&adev->throttling_logging_enabled, 1); + } else { + atomic_set(&adev->throttling_logging_enabled, 0); + } + + return count; +} + static struct amdgpu_device_attr amdgpu_device_attrs[] = { AMDGPU_DEVICE_ATTR_RW(power_dpm_state, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF), - AMDGPU_DEVICE_ATTR_RW(power_dpm_force_performance_level, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF), + AMDGPU_DEVICE_ATTR_RW(power_dpm_force_performance_level, ATTR_FLAG_BASIC), AMDGPU_DEVICE_ATTR_RO(pp_num_states, ATTR_FLAG_BASIC), AMDGPU_DEVICE_ATTR_RO(pp_cur_state, ATTR_FLAG_BASIC), AMDGPU_DEVICE_ATTR_RW(pp_force_state, ATTR_FLAG_BASIC), @@ -1830,6 +1962,7 @@ static struct amdgpu_device_attr amdgpu_device_attrs[] = { AMDGPU_DEVICE_ATTR_RO(pcie_bw, ATTR_FLAG_BASIC), AMDGPU_DEVICE_ATTR_RW(pp_features, ATTR_FLAG_BASIC), AMDGPU_DEVICE_ATTR_RO(unique_id, ATTR_FLAG_BASIC), + AMDGPU_DEVICE_ATTR_RW(thermal_throttling_logging, ATTR_FLAG_BASIC), }; static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_attr *attr, @@ -1872,7 +2005,9 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ if (adev->flags & AMD_IS_APU) *states = ATTR_STATE_UNSUPPORTED; } else if (DEVICE_ATTR_IS(unique_id)) { - if (!adev->unique_id) + if (asic_type != CHIP_VEGA10 && + asic_type != CHIP_VEGA20 && + asic_type != CHIP_ARCTURUS) *states = ATTR_STATE_UNSUPPORTED; } else if (DEVICE_ATTR_IS(pp_features)) { if (adev->flags & AMD_IS_APU || asic_type < CHIP_VEGA10) @@ -2003,8 +2138,10 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev, return -EINVAL; r = pm_runtime_get_sync(adev->ddev->dev); - if (r < 0) + if (r < 0) { + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } switch (channel) { case PP_TEMP_JUNCTION: @@ -2134,8 +2271,10 @@ static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev, return -EPERM; ret = pm_runtime_get_sync(adev->ddev->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_autosuspend(adev->ddev->dev); return ret; + } if (is_support_sw_smu(adev)) { pwm_mode = smu_get_fan_control_mode(&adev->smu); @@ -2172,8 +2311,10 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev, return err; ret = pm_runtime_get_sync(adev->ddev->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_autosuspend(adev->ddev->dev); return ret; + } if (is_support_sw_smu(adev)) { smu_set_fan_control_mode(&adev->smu, value); @@ -2220,8 +2361,10 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev, return -EPERM; err = pm_runtime_get_sync(adev->ddev->dev); - if (err < 0) + if (err < 0) { + pm_runtime_put_autosuspend(adev->ddev->dev); return err; + } if (is_support_sw_smu(adev)) pwm_mode = smu_get_fan_control_mode(&adev->smu); @@ -2272,8 +2415,10 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev, return -EPERM; err = pm_runtime_get_sync(adev->ddev->dev); - if (err < 0) + if (err < 0) { + pm_runtime_put_autosuspend(adev->ddev->dev); return err; + } if (is_support_sw_smu(adev)) err = smu_get_fan_speed_percent(&adev->smu, &speed); @@ -2305,8 +2450,10 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev, return -EPERM; err = pm_runtime_get_sync(adev->ddev->dev); - if (err < 0) + if (err < 0) { + pm_runtime_put_autosuspend(adev->ddev->dev); return err; + } if (is_support_sw_smu(adev)) err = smu_get_fan_speed_rpm(&adev->smu, &speed); @@ -2337,8 +2484,10 @@ static ssize_t amdgpu_hwmon_get_fan1_min(struct device *dev, return -EPERM; r = pm_runtime_get_sync(adev->ddev->dev); - if (r < 0) + if (r < 0) { + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MIN_FAN_RPM, (void *)&min_rpm, &size); @@ -2365,8 +2514,10 @@ static ssize_t amdgpu_hwmon_get_fan1_max(struct device *dev, return -EPERM; r = pm_runtime_get_sync(adev->ddev->dev); - if (r < 0) + if (r < 0) { + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MAX_FAN_RPM, (void *)&max_rpm, &size); @@ -2392,8 +2543,10 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev, return -EPERM; err = pm_runtime_get_sync(adev->ddev->dev); - if (err < 0) + if (err < 0) { + pm_runtime_put_autosuspend(adev->ddev->dev); return err; + } if (is_support_sw_smu(adev)) err = smu_get_fan_speed_rpm(&adev->smu, &rpm); @@ -2424,8 +2577,10 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev, return -EPERM; err = pm_runtime_get_sync(adev->ddev->dev); - if (err < 0) + if (err < 0) { + pm_runtime_put_autosuspend(adev->ddev->dev); return err; + } if (is_support_sw_smu(adev)) pwm_mode = smu_get_fan_control_mode(&adev->smu); @@ -2473,8 +2628,10 @@ static ssize_t amdgpu_hwmon_get_fan1_enable(struct device *dev, return -EPERM; ret = pm_runtime_get_sync(adev->ddev->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_autosuspend(adev->ddev->dev); return ret; + } if (is_support_sw_smu(adev)) { pwm_mode = smu_get_fan_control_mode(&adev->smu); @@ -2519,8 +2676,10 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev, return -EINVAL; err = pm_runtime_get_sync(adev->ddev->dev); - if (err < 0) + if (err < 0) { + pm_runtime_put_autosuspend(adev->ddev->dev); return err; + } if (is_support_sw_smu(adev)) { smu_set_fan_control_mode(&adev->smu, pwm_mode); @@ -2551,8 +2710,10 @@ static ssize_t amdgpu_hwmon_show_vddgfx(struct device *dev, return -EPERM; r = pm_runtime_get_sync(adev->ddev->dev); - if (r < 0) + if (r < 0) { + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } /* get the voltage */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDGFX, @@ -2590,8 +2751,10 @@ static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev, return -EINVAL; r = pm_runtime_get_sync(adev->ddev->dev); - if (r < 0) + if (r < 0) { + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } /* get the voltage */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB, @@ -2626,8 +2789,10 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev, return -EPERM; r = pm_runtime_get_sync(adev->ddev->dev); - if (r < 0) + if (r < 0) { + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } /* get the voltage */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER, @@ -2665,11 +2830,13 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev, return -EPERM; r = pm_runtime_get_sync(adev->ddev->dev); - if (r < 0) + if (r < 0) { + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } if (is_support_sw_smu(adev)) { - smu_get_power_limit(&adev->smu, &limit, true, true); + smu_get_power_limit(&adev->smu, &limit, true); size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); } else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) { adev->powerplay.pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, true); @@ -2697,11 +2864,13 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev, return -EPERM; r = pm_runtime_get_sync(adev->ddev->dev); - if (r < 0) + if (r < 0) { + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } if (is_support_sw_smu(adev)) { - smu_get_power_limit(&adev->smu, &limit, false, true); + smu_get_power_limit(&adev->smu, &limit, false); size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); } else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) { adev->powerplay.pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, false); @@ -2740,8 +2909,10 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev, err = pm_runtime_get_sync(adev->ddev->dev); - if (err < 0) + if (err < 0) { + pm_runtime_put_autosuspend(adev->ddev->dev); return err; + } if (is_support_sw_smu(adev)) err = smu_set_power_limit(&adev->smu, value); @@ -2771,8 +2942,10 @@ static ssize_t amdgpu_hwmon_show_sclk(struct device *dev, return -EPERM; r = pm_runtime_get_sync(adev->ddev->dev); - if (r < 0) + if (r < 0) { + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } /* get the sclk */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_SCLK, @@ -2784,7 +2957,7 @@ static ssize_t amdgpu_hwmon_show_sclk(struct device *dev, if (r) return r; - return snprintf(buf, PAGE_SIZE, "%d\n", sclk * 10 * 1000); + return snprintf(buf, PAGE_SIZE, "%u\n", sclk * 10 * 1000); } static ssize_t amdgpu_hwmon_show_sclk_label(struct device *dev, @@ -2806,8 +2979,10 @@ static ssize_t amdgpu_hwmon_show_mclk(struct device *dev, return -EPERM; r = pm_runtime_get_sync(adev->ddev->dev); - if (r < 0) + if (r < 0) { + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } /* get the sclk */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_MCLK, @@ -2819,7 +2994,7 @@ static ssize_t amdgpu_hwmon_show_mclk(struct device *dev, if (r) return r; - return snprintf(buf, PAGE_SIZE, "%d\n", mclk * 10 * 1000); + return snprintf(buf, PAGE_SIZE, "%u\n", mclk * 10 * 1000); } static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev, @@ -3037,6 +3212,12 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, attr == &sensor_dev_attr_fan1_enable.dev_attr.attr)) return 0; + /* Skip crit temp on APU */ + if ((adev->flags & AMD_IS_APU) && (adev->family >= AMDGPU_FAMILY_CZ) && + (attr == &sensor_dev_attr_temp1_crit.dev_attr.attr || + attr == &sensor_dev_attr_temp1_crit_hyst.dev_attr.attr)) + return 0; + /* Skip limit attributes if DPM is not enabled */ if (!adev->pm.dpm_enabled && (attr == &sensor_dev_attr_temp1_crit.dev_attr.attr || @@ -3380,21 +3561,34 @@ void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable) { int ret = 0; - ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable); - if (ret) - DRM_ERROR("Dpm %s uvd failed, ret = %d. \n", - enable ? "enable" : "disable", ret); - - /* enable/disable Low Memory PState for UVD (4k videos) */ - if (adev->asic_type == CHIP_STONEY && - adev->uvd.decode_image_width >= WIDTH_4K) { - struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; + if (adev->family == AMDGPU_FAMILY_SI) { + mutex_lock(&adev->pm.mutex); + if (enable) { + adev->pm.dpm.uvd_active = true; + adev->pm.dpm.state = POWER_STATE_TYPE_INTERNAL_UVD; + } else { + adev->pm.dpm.uvd_active = false; + } + mutex_unlock(&adev->pm.mutex); - if (hwmgr && hwmgr->hwmgr_func && - hwmgr->hwmgr_func->update_nbdpm_pstate) - hwmgr->hwmgr_func->update_nbdpm_pstate(hwmgr, - !enable, - true); + amdgpu_pm_compute_clocks(adev); + } else { + ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable); + if (ret) + DRM_ERROR("Dpm %s uvd failed, ret = %d. \n", + enable ? "enable" : "disable", ret); + + /* enable/disable Low Memory PState for UVD (4k videos) */ + if (adev->asic_type == CHIP_STONEY && + adev->uvd.decode_image_width >= WIDTH_4K) { + struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; + + if (hwmgr && hwmgr->hwmgr_func && + hwmgr->hwmgr_func->update_nbdpm_pstate) + hwmgr->hwmgr_func->update_nbdpm_pstate(hwmgr, + !enable, + true); + } } } @@ -3402,10 +3596,24 @@ void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable) { int ret = 0; - ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable); - if (ret) - DRM_ERROR("Dpm %s vce failed, ret = %d. \n", - enable ? "enable" : "disable", ret); + if (adev->family == AMDGPU_FAMILY_SI) { + mutex_lock(&adev->pm.mutex); + if (enable) { + adev->pm.dpm.vce_active = true; + /* XXX select vce level based on ring/task */ + adev->pm.dpm.vce_level = AMD_VCE_LEVEL_AC_ALL; + } else { + adev->pm.dpm.vce_active = false; + } + mutex_unlock(&adev->pm.mutex); + + amdgpu_pm_compute_clocks(adev); + } else { + ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable); + if (ret) + DRM_ERROR("Dpm %s vce failed, ret = %d. \n", + enable ? "enable" : "disable", ret); + } } void amdgpu_pm_print_power_states(struct amdgpu_device *adev) @@ -3669,8 +3877,10 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data) return -EPERM; r = pm_runtime_get_sync(dev->dev); - if (r < 0) + if (r < 0) { + pm_runtime_put_autosuspend(dev->dev); return r; + } amdgpu_device_ip_get_clockgating_state(adev, &flags); seq_printf(m, "Clock Gating Flags Mask: 0x%x\n", flags); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 7301fdcfb8bc..7fe564275457 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -98,6 +98,8 @@ static int psp_early_init(void *handle) case CHIP_NAVI10: case CHIP_NAVI14: case CHIP_NAVI12: + case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: psp_v11_0_set_psp_funcs(psp); psp->autoload_supported = true; break; @@ -115,6 +117,44 @@ static int psp_early_init(void *handle) return 0; } +static void psp_memory_training_fini(struct psp_context *psp) +{ + struct psp_memory_training_context *ctx = &psp->mem_train_ctx; + + ctx->init = PSP_MEM_TRAIN_NOT_SUPPORT; + kfree(ctx->sys_cache); + ctx->sys_cache = NULL; +} + +static int psp_memory_training_init(struct psp_context *psp) +{ + int ret; + struct psp_memory_training_context *ctx = &psp->mem_train_ctx; + + if (ctx->init != PSP_MEM_TRAIN_RESERVE_SUCCESS) { + DRM_DEBUG("memory training is not supported!\n"); + return 0; + } + + ctx->sys_cache = kzalloc(ctx->train_data_size, GFP_KERNEL); + if (ctx->sys_cache == NULL) { + DRM_ERROR("alloc mem_train_ctx.sys_cache failed!\n"); + ret = -ENOMEM; + goto Err_out; + } + + DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n", + ctx->train_data_size, + ctx->p2c_train_data_offset, + ctx->c2p_train_data_offset); + ctx->init = PSP_MEM_TRAIN_INIT_SUCCESS; + return 0; + +Err_out: + psp_memory_training_fini(psp); + return ret; +} + static int psp_sw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -127,7 +167,7 @@ static int psp_sw_init(void *handle) return ret; } - ret = psp_mem_training_init(psp); + ret = psp_memory_training_init(psp); if (ret) { DRM_ERROR("Failed to initialize memory training!\n"); return ret; @@ -152,11 +192,15 @@ static int psp_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - psp_mem_training_fini(&adev->psp); - release_firmware(adev->psp.sos_fw); - adev->psp.sos_fw = NULL; - release_firmware(adev->psp.asd_fw); - adev->psp.asd_fw = NULL; + psp_memory_training_fini(&adev->psp); + if (adev->psp.sos_fw) { + release_firmware(adev->psp.sos_fw); + adev->psp.sos_fw = NULL; + } + if (adev->psp.asd_fw) { + release_firmware(adev->psp.asd_fw); + adev->psp.asd_fw = NULL; + } if (adev->psp.ta_fw) { release_firmware(adev->psp.ta_fw); adev->psp.ta_fw = NULL; @@ -231,8 +275,9 @@ psp_cmd_submit_buf(struct psp_context *psp, amdgpu_asic_invalidate_hdp(psp->adev, NULL); } - /* We allow TEE_ERROR_NOT_SUPPORTED for VMR command in SRIOV */ - skip_unsupport = (psp->cmd_buf_mem->resp.status == 0xffff000a) && amdgpu_sriov_vf(psp->adev); + /* We allow TEE_ERROR_NOT_SUPPORTED for VMR command and PSP_ERR_UNKNOWN_COMMAND in SRIOV */ + skip_unsupport = (psp->cmd_buf_mem->resp.status == TEE_ERROR_NOT_SUPPORTED || + psp->cmd_buf_mem->resp.status == PSP_ERR_UNKNOWN_COMMAND) && amdgpu_sriov_vf(psp->adev); /* In some cases, psp response status is not 0 even there is no * problem while the command is submitted. Some version of PSP FW @@ -350,11 +395,48 @@ static int psp_tmr_init(struct psp_context *psp) return ret; } +static int psp_clear_vf_fw(struct psp_context *psp) +{ + int ret; + struct psp_gfx_cmd_resp *cmd; + + if (!amdgpu_sriov_vf(psp->adev) || psp->adev->asic_type != CHIP_NAVI12) + return 0; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + cmd->cmd_id = GFX_CMD_ID_CLEAR_VF_FW; + + ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); + kfree(cmd); + + return ret; +} + +static bool psp_skip_tmr(struct psp_context *psp) +{ + switch (psp->adev->asic_type) { + case CHIP_NAVI12: + case CHIP_SIENNA_CICHLID: + return true; + default: + return false; + } +} + static int psp_tmr_load(struct psp_context *psp) { int ret; struct psp_gfx_cmd_resp *cmd; + /* For Navi12 and CHIP_SIENNA_CICHLID SRIOV, do not set up TMR. + * Already set up by host driver. + */ + if (amdgpu_sriov_vf(psp->adev) && psp_skip_tmr(psp)) + return 0; + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); if (!cmd) return -ENOMEM; @@ -372,6 +454,52 @@ static int psp_tmr_load(struct psp_context *psp) return ret; } +static void psp_prep_tmr_unload_cmd_buf(struct psp_context *psp, + struct psp_gfx_cmd_resp *cmd) +{ + if (amdgpu_sriov_vf(psp->adev)) + cmd->cmd_id = GFX_CMD_ID_DESTROY_VMR; + else + cmd->cmd_id = GFX_CMD_ID_DESTROY_TMR; +} + +static int psp_tmr_unload(struct psp_context *psp) +{ + int ret; + struct psp_gfx_cmd_resp *cmd; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + psp_prep_tmr_unload_cmd_buf(psp, cmd); + DRM_INFO("free PSP TMR buffer\n"); + + ret = psp_cmd_submit_buf(psp, NULL, cmd, + psp->fence_buf_mc_addr); + + kfree(cmd); + + return ret; +} + +static int psp_tmr_terminate(struct psp_context *psp) +{ + int ret; + void *tmr_buf; + void **pptr; + + ret = psp_tmr_unload(psp); + if (ret) + return ret; + + /* free TMR memory buffer */ + pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL; + amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, pptr); + + return 0; +} + static void psp_prep_asd_load_cmd_buf(struct psp_gfx_cmd_resp *cmd, uint64_t asd_mc, uint32_t size) { @@ -394,7 +522,8 @@ static int psp_asd_load(struct psp_context *psp) * add workaround to bypass it for sriov now. * TODO: add version check to make it common */ - if (amdgpu_sriov_vf(psp->adev)) + if (amdgpu_sriov_vf(psp->adev) || + (psp->adev->asic_type == CHIP_NAVY_FLOUNDER)) return 0; cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); @@ -523,7 +652,7 @@ static void psp_prep_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd, cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id; } -int psp_ta_invoke(struct psp_context *psp, +static int psp_ta_invoke(struct psp_context *psp, uint32_t ta_cmd_id, uint32_t session_id) { @@ -1316,6 +1445,14 @@ static int psp_hw_start(struct psp_context *psp) } } + if (psp->spl_bin_size) { + ret = psp_bootloader_load_spl(psp); + if (ret) { + DRM_ERROR("PSP load spl failed!\n"); + return ret; + } + } + ret = psp_bootloader_load_sysdrv(psp); if (ret) { DRM_ERROR("PSP load sysdrv failed!\n"); @@ -1335,6 +1472,12 @@ static int psp_hw_start(struct psp_context *psp) return ret; } + ret = psp_clear_vf_fw(psp); + if (ret) { + DRM_ERROR("PSP clear vf fw!\n"); + return ret; + } + ret = psp_tmr_init(psp); if (ret) { DRM_ERROR("PSP tmr init failed!\n"); @@ -1389,6 +1532,12 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode, case AMDGPU_UCODE_ID_SDMA7: *type = GFX_FW_TYPE_SDMA7; break; + case AMDGPU_UCODE_ID_CP_MES: + *type = GFX_FW_TYPE_CP_MES; + break; + case AMDGPU_UCODE_ID_CP_MES_DATA: + *type = GFX_FW_TYPE_MES_STACK; + break; case AMDGPU_UCODE_ID_CP_CE: *type = GFX_FW_TYPE_CP_CE; break; @@ -1638,6 +1787,16 @@ static int psp_np_fw_load(struct psp_context *psp) if (fw_load_skip_check(psp, ucode)) continue; + if (psp->autoload_supported && + (adev->asic_type == CHIP_SIENNA_CICHLID || + adev->asic_type == CHIP_NAVY_FLOUNDER) && + (ucode->ucode_id == AMDGPU_UCODE_ID_SDMA1 || + ucode->ucode_id == AMDGPU_UCODE_ID_SDMA2 || + ucode->ucode_id == AMDGPU_UCODE_ID_SDMA3)) + /* PSP only receive one SDMA fw for sienna_cichlid, + * as all four sdma fw are same */ + continue; + psp_print_fw_hdr(psp, ucode); ret = psp_execute_np_fw_load(psp, ucode); @@ -1779,8 +1938,7 @@ static int psp_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct psp_context *psp = &adev->psp; - void *tmr_buf; - void **pptr; + int ret; if (psp->adev->psp.ta_fw) { psp_ras_terminate(psp); @@ -1789,11 +1947,15 @@ static int psp_hw_fini(void *handle) } psp_asd_unload(psp); + ret = psp_clear_vf_fw(psp); + if (ret) { + DRM_ERROR("PSP clear vf fw!\n"); + return ret; + } + psp_tmr_terminate(psp); psp_ring_destroy(psp, PSP_RING_TYPE__KM); - pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL; - amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, pptr); amdgpu_bo_free_kernel(&psp->fw_pri_bo, &psp->fw_pri_mc_addr, &psp->fw_pri_buf); amdgpu_bo_free_kernel(&psp->fence_buf_bo, @@ -1840,6 +2002,18 @@ static int psp_suspend(void *handle) } } + ret = psp_asd_unload(psp); + if (ret) { + DRM_ERROR("Failed to unload asd\n"); + return ret; + } + + ret = psp_tmr_terminate(psp); + if (ret) { + DRM_ERROR("Failed to terminate tmr\n"); + return ret; + } + ret = psp_ring_stop(psp, PSP_RING_TYPE__KM); if (ret) { DRM_ERROR("PSP ring stop failed\n"); @@ -2054,6 +2228,7 @@ int psp_init_sos_microcode(struct psp_context *psp, const struct psp_firmware_header_v1_0 *sos_hdr; const struct psp_firmware_header_v1_1 *sos_hdr_v1_1; const struct psp_firmware_header_v1_2 *sos_hdr_v1_2; + const struct psp_firmware_header_v1_3 *sos_hdr_v1_3; int err = 0; if (!chip_name) { @@ -2098,6 +2273,18 @@ int psp_init_sos_microcode(struct psp_context *psp, adev->psp.kdb_start_addr = (uint8_t *)adev->psp.sys_start_addr + le32_to_cpu(sos_hdr_v1_2->kdb_offset_bytes); } + if (sos_hdr->header.header_version_minor == 3) { + sos_hdr_v1_3 = (const struct psp_firmware_header_v1_3 *)adev->psp.sos_fw->data; + adev->psp.toc_bin_size = le32_to_cpu(sos_hdr_v1_3->v1_1.toc_size_bytes); + adev->psp.toc_start_addr = (uint8_t *)adev->psp.sys_start_addr + + le32_to_cpu(sos_hdr_v1_3->v1_1.toc_offset_bytes); + adev->psp.kdb_bin_size = le32_to_cpu(sos_hdr_v1_3->v1_1.kdb_size_bytes); + adev->psp.kdb_start_addr = (uint8_t *)adev->psp.sys_start_addr + + le32_to_cpu(sos_hdr_v1_3->v1_1.kdb_offset_bytes); + adev->psp.spl_bin_size = le32_to_cpu(sos_hdr_v1_3->spl_size_bytes); + adev->psp.spl_start_addr = (uint8_t *)adev->psp.sys_start_addr + + le32_to_cpu(sos_hdr_v1_3->spl_offset_bytes); + } break; default: dev_err(adev->dev, @@ -2116,6 +2303,107 @@ out: return err; } +int parse_ta_bin_descriptor(struct psp_context *psp, + const struct ta_fw_bin_desc *desc, + const struct ta_firmware_header_v2_0 *ta_hdr) +{ + uint8_t *ucode_start_addr = NULL; + + if (!psp || !desc || !ta_hdr) + return -EINVAL; + + ucode_start_addr = (uint8_t *)ta_hdr + + le32_to_cpu(desc->offset_bytes) + + le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes); + + switch (desc->fw_type) { + case TA_FW_TYPE_PSP_ASD: + psp->asd_fw_version = le32_to_cpu(desc->fw_version); + psp->asd_feature_version = le32_to_cpu(desc->fw_version); + psp->asd_ucode_size = le32_to_cpu(desc->size_bytes); + psp->asd_start_addr = ucode_start_addr; + break; + case TA_FW_TYPE_PSP_XGMI: + psp->ta_xgmi_ucode_version = le32_to_cpu(desc->fw_version); + psp->ta_xgmi_ucode_size = le32_to_cpu(desc->size_bytes); + psp->ta_xgmi_start_addr = ucode_start_addr; + break; + case TA_FW_TYPE_PSP_RAS: + psp->ta_ras_ucode_version = le32_to_cpu(desc->fw_version); + psp->ta_ras_ucode_size = le32_to_cpu(desc->size_bytes); + psp->ta_ras_start_addr = ucode_start_addr; + break; + case TA_FW_TYPE_PSP_HDCP: + psp->ta_hdcp_ucode_version = le32_to_cpu(desc->fw_version); + psp->ta_hdcp_ucode_size = le32_to_cpu(desc->size_bytes); + psp->ta_hdcp_start_addr = ucode_start_addr; + break; + case TA_FW_TYPE_PSP_DTM: + psp->ta_dtm_ucode_version = le32_to_cpu(desc->fw_version); + psp->ta_dtm_ucode_size = le32_to_cpu(desc->size_bytes); + psp->ta_dtm_start_addr = ucode_start_addr; + break; + default: + dev_warn(psp->adev->dev, "Unsupported TA type: %d\n", desc->fw_type); + break; + } + + return 0; +} + +int psp_init_ta_microcode(struct psp_context *psp, + const char *chip_name) +{ + struct amdgpu_device *adev = psp->adev; + char fw_name[30]; + const struct ta_firmware_header_v2_0 *ta_hdr; + int err = 0; + int ta_index = 0; + + if (!chip_name) { + dev_err(adev->dev, "invalid chip name for ta microcode\n"); + return -EINVAL; + } + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name); + err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev); + if (err) + goto out; + + err = amdgpu_ucode_validate(adev->psp.ta_fw); + if (err) + goto out; + + ta_hdr = (const struct ta_firmware_header_v2_0 *)adev->psp.ta_fw->data; + + if (le16_to_cpu(ta_hdr->header.header_version_major) != 2) { + dev_err(adev->dev, "unsupported TA header version\n"); + err = -EINVAL; + goto out; + } + + if (le32_to_cpu(ta_hdr->ta_fw_bin_count) >= UCODE_MAX_TA_PACKAGING) { + dev_err(adev->dev, "packed TA count exceeds maximum limit\n"); + err = -EINVAL; + goto out; + } + + for (ta_index = 0; ta_index < le32_to_cpu(ta_hdr->ta_fw_bin_count); ta_index++) { + err = parse_ta_bin_descriptor(psp, + &ta_hdr->ta_fw_bin[ta_index], + ta_hdr); + if (err) + goto out; + } + + return 0; +out: + dev_err(adev->dev, "fail to initialize ta microcode\n"); + release_firmware(adev->psp.ta_fw); + adev->psp.ta_fw = NULL; + return err; +} + static int psp_set_clockgating_state(void *handle, enum amd_clockgating_state state) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 2a56ad996d83..623888bf30cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -50,6 +50,7 @@ enum psp_bootloader_cmd { PSP_BL__LOAD_KEY_DATABASE = 0x80000, PSP_BL__DRAM_LONG_TRAIN = 0x100000, PSP_BL__DRAM_SHORT_TRAIN = 0x200000, + PSP_BL__LOAD_TOS_SPL_TABLE = 0x10000000, }; enum psp_ring_type @@ -84,6 +85,7 @@ struct psp_funcs { int (*init_microcode)(struct psp_context *psp); int (*bootloader_load_kdb)(struct psp_context *psp); + int (*bootloader_load_spl)(struct psp_context *psp); int (*bootloader_load_sysdrv)(struct psp_context *psp); int (*bootloader_load_sos)(struct psp_context *psp); int (*ring_init)(struct psp_context *psp, enum psp_ring_type ring_type); @@ -95,8 +97,6 @@ struct psp_funcs enum psp_ring_type ring_type); bool (*smu_reload_quirk)(struct psp_context *psp); int (*mode1_reset)(struct psp_context *psp); - int (*mem_training_init)(struct psp_context *psp); - void (*mem_training_fini)(struct psp_context *psp); int (*mem_training)(struct psp_context *psp, uint32_t ops); uint32_t (*ring_get_wptr)(struct psp_context *psp); void (*ring_set_wptr)(struct psp_context *psp, uint32_t value); @@ -224,10 +224,12 @@ struct psp_context uint32_t sos_bin_size; uint32_t toc_bin_size; uint32_t kdb_bin_size; + uint32_t spl_bin_size; uint8_t *sys_start_addr; uint8_t *sos_start_addr; uint8_t *toc_start_addr; uint8_t *kdb_start_addr; + uint8_t *spl_start_addr; /* tmr buffer */ struct amdgpu_bo *tmr_bo; @@ -298,6 +300,8 @@ struct amdgpu_psp_funcs { ((psp)->funcs->init_microcode ? (psp)->funcs->init_microcode((psp)) : 0) #define psp_bootloader_load_kdb(psp) \ ((psp)->funcs->bootloader_load_kdb ? (psp)->funcs->bootloader_load_kdb((psp)) : 0) +#define psp_bootloader_load_spl(psp) \ + ((psp)->funcs->bootloader_load_spl ? (psp)->funcs->bootloader_load_spl((psp)) : 0) #define psp_bootloader_load_sysdrv(psp) \ ((psp)->funcs->bootloader_load_sysdrv ? (psp)->funcs->bootloader_load_sysdrv((psp)) : 0) #define psp_bootloader_load_sos(psp) \ @@ -306,10 +310,6 @@ struct amdgpu_psp_funcs { ((psp)->funcs->smu_reload_quirk ? (psp)->funcs->smu_reload_quirk((psp)) : false) #define psp_mode1_reset(psp) \ ((psp)->funcs->mode1_reset ? (psp)->funcs->mode1_reset((psp)) : false) -#define psp_mem_training_init(psp) \ - ((psp)->funcs->mem_training_init ? (psp)->funcs->mem_training_init((psp)) : 0) -#define psp_mem_training_fini(psp) \ - ((psp)->funcs->mem_training_fini ? (psp)->funcs->mem_training_fini((psp)) : 0) #define psp_mem_training(psp, ops) \ ((psp)->funcs->mem_training ? (psp)->funcs->mem_training((psp), (ops)) : 0) @@ -371,4 +371,6 @@ int psp_init_asd_microcode(struct psp_context *psp, const char *chip_name); int psp_init_sos_microcode(struct psp_context *psp, const char *chip_name); +int psp_init_ta_microcode(struct psp_context *psp, + const char *chip_name); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 50fe08bf2f72..bcce4c0be462 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -86,7 +86,7 @@ void amdgpu_ras_set_error_query_ready(struct amdgpu_device *adev, bool ready) amdgpu_ras_get_context(adev)->error_query_ready = ready; } -bool amdgpu_ras_get_error_query_ready(struct amdgpu_device *adev) +static bool amdgpu_ras_get_error_query_ready(struct amdgpu_device *adev) { if (adev && amdgpu_ras_get_context(adev)) return amdgpu_ras_get_context(adev)->error_query_ready; @@ -318,6 +318,9 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user * case 2: if ((data.inject.address >= adev->gmc.mc_vram_size) || (data.inject.address >= RAS_UMC_INJECT_ADDR_LIMIT)) { + dev_warn(adev->dev, "RAS WARN: input address " + "0x%llx is invalid.", + data.inject.address); ret = -EINVAL; break; } @@ -502,7 +505,7 @@ struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev, } /* obj end */ -void amdgpu_ras_parse_status_code(struct amdgpu_device* adev, +static void amdgpu_ras_parse_status_code(struct amdgpu_device *adev, const char* invoke_type, const char* block_name, enum ta_ras_status ret) @@ -812,7 +815,7 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev, } /* Trigger XGMI/WAFL error */ -int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev, +static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev, struct ta_ras_trigger_error_input *block_info) { int ret; @@ -1615,7 +1618,7 @@ static int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev) data = con->eh_data; save_count = data->count - control->num_recs; /* only new entries are saved */ - if (save_count > 0) + if (save_count > 0) { if (amdgpu_ras_eeprom_process_recods(control, &data->bps[control->num_recs], true, @@ -1624,6 +1627,9 @@ static int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev) return -EIO; } + dev_info(adev->dev, "Saved %d pages to EEPROM table.\n", save_count); + } + return 0; } @@ -1914,9 +1920,8 @@ int amdgpu_ras_init(struct amdgpu_device *adev) amdgpu_ras_check_supported(adev, &con->hw_supported, &con->supported); if (!con->hw_supported) { - amdgpu_ras_set_context(adev, NULL); - kfree(con); - return 0; + r = 0; + goto err_out; } con->features = 0; @@ -1927,29 +1932,29 @@ int amdgpu_ras_init(struct amdgpu_device *adev) if (adev->nbio.funcs->init_ras_controller_interrupt) { r = adev->nbio.funcs->init_ras_controller_interrupt(adev); if (r) - return r; + goto err_out; } if (adev->nbio.funcs->init_ras_err_event_athub_interrupt) { r = adev->nbio.funcs->init_ras_err_event_athub_interrupt(adev); if (r) - return r; + goto err_out; } - amdgpu_ras_mask &= AMDGPU_RAS_BLOCK_MASK; - - if (amdgpu_ras_fs_init(adev)) - goto fs_out; + if (amdgpu_ras_fs_init(adev)) { + r = -EINVAL; + goto err_out; + } dev_info(adev->dev, "RAS INFO: ras initialized successfully, " "hardware ability[%x] ras_mask[%x]\n", con->hw_supported, con->supported); return 0; -fs_out: +err_out: amdgpu_ras_set_context(adev, NULL); kfree(con); - return -EINVAL; + return r; } /* helper function to handle common stuff in ip late init phase */ @@ -2129,3 +2134,14 @@ void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev) amdgpu_ras_reset_gpu(adev); } } + +bool amdgpu_ras_need_emergency_restart(struct amdgpu_device *adev) +{ + if (adev->asic_type == CHIP_VEGA20 && + adev->pm.fw_version <= 0x283400) { + return !(amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) && + amdgpu_ras_intr_triggered(); + } + + return false; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index e7df5d8429f8..b2667342cf67 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -633,4 +633,5 @@ void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev); void amdgpu_ras_set_error_query_ready(struct amdgpu_device *adev, bool ready); +bool amdgpu_ras_need_emergency_restart(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h index 7e8647a05df7..9e7d640920fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h @@ -47,7 +47,6 @@ struct amdgpu_ras_eeprom_control { uint32_t next_addr; unsigned int num_recs; struct mutex tbl_mutex; - bool bus_locked; uint32_t tbl_byte_sum; uint16_t i2c_address; // 8-bit represented address }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index be218754629a..da871d84b742 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -62,7 +62,8 @@ enum amdgpu_ring_type { AMDGPU_RING_TYPE_VCN_DEC = AMDGPU_HW_IP_VCN_DEC, AMDGPU_RING_TYPE_VCN_ENC = AMDGPU_HW_IP_VCN_ENC, AMDGPU_RING_TYPE_VCN_JPEG = AMDGPU_HW_IP_VCN_JPEG, - AMDGPU_RING_TYPE_KIQ + AMDGPU_RING_TYPE_KIQ, + AMDGPU_RING_TYPE_MES }; enum amdgpu_ib_pool_type { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index b87ca171986a..8ea6c49529e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -35,7 +35,6 @@ struct amdgpu_sync_entry { struct hlist_node node; struct dma_fence *fence; - bool explicit; }; static struct kmem_cache *amdgpu_sync_slab; @@ -129,8 +128,7 @@ static void amdgpu_sync_keep_later(struct dma_fence **keep, * Tries to add the fence to an existing hash entry. Returns true when an entry * was found, false otherwise. */ -static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f, - bool explicit) +static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f) { struct amdgpu_sync_entry *e; @@ -139,10 +137,6 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f, continue; amdgpu_sync_keep_later(&e->fence, f); - - /* Preserve eplicit flag to not loose pipe line sync */ - e->explicit |= explicit; - return true; } return false; @@ -153,27 +147,23 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f, * * @sync: sync object to add fence to * @f: fence to sync to - * @explicit: if this is an explicit dependency * * Add the fence to the sync object. */ -int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f, - bool explicit) +int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f) { struct amdgpu_sync_entry *e; if (!f) return 0; - if (amdgpu_sync_add_later(sync, f, explicit)) + if (amdgpu_sync_add_later(sync, f)) return 0; e = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL); if (!e) return -ENOMEM; - e->explicit = explicit; - hash_add(sync->fences, &e->node, f->context); e->fence = dma_fence_get(f); return 0; @@ -194,7 +184,7 @@ int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence) return 0; amdgpu_sync_keep_later(&sync->last_vm_update, fence); - return amdgpu_sync_fence(sync, fence, false); + return amdgpu_sync_fence(sync, fence); } /** @@ -221,7 +211,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, /* always sync to the exclusive fence */ f = dma_resv_get_excl(resv); - r = amdgpu_sync_fence(sync, f, false); + r = amdgpu_sync_fence(sync, f); flist = dma_resv_get_list(resv); if (!flist || r) @@ -237,7 +227,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, /* Always sync to moves, no matter what */ if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED) { - r = amdgpu_sync_fence(sync, f, false); + r = amdgpu_sync_fence(sync, f); if (r) break; } @@ -275,7 +265,9 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, continue; } - r = amdgpu_sync_fence(sync, f, false); + WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD, + "Adding eviction fence to sync obj"); + r = amdgpu_sync_fence(sync, f); if (r) break; } @@ -330,11 +322,10 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, * amdgpu_sync_get_fence - get the next fence from the sync object * * @sync: sync object to use - * @explicit: true if the next fence is explicit * * Get and removes the next fence from the sync object not signaled yet. */ -struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit) +struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync) { struct amdgpu_sync_entry *e; struct hlist_node *tmp; @@ -343,8 +334,6 @@ struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit hash_for_each_safe(sync->fences, i, tmp, e, node) { f = e->fence; - if (explicit) - *explicit = e->explicit; hash_del(&e->node); kmem_cache_free(amdgpu_sync_slab, e); @@ -376,7 +365,7 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone) hash_for_each_safe(source->fences, i, tmp, e, node) { f = e->fence; if (!dma_fence_is_signaled(f)) { - r = amdgpu_sync_fence(clone, f, e->explicit); + r = amdgpu_sync_fence(clone, f); if (r) return r; } else { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h index cfbe5788b8b9..7c0fe20c470d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h @@ -47,16 +47,14 @@ struct amdgpu_sync { }; void amdgpu_sync_create(struct amdgpu_sync *sync); -int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f, - bool explicit); +int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f); int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence); int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct dma_resv *resv, enum amdgpu_sync_mode mode, void *owner); struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, struct amdgpu_ring *ring); -struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, - bool *explicit); +struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync); int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone); int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr); void amdgpu_sync_free(struct amdgpu_sync *sync); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 5da20fc166d9..63e734a125fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -35,7 +35,7 @@ #define AMDGPU_JOB_GET_TIMELINE_NAME(job) \ job->base.s_fence->finished.ops->get_timeline_name(&job->base.s_fence->finished) -TRACE_EVENT(amdgpu_device_rreg, +TRACE_EVENT(amdgpu_mm_rreg, TP_PROTO(unsigned did, uint32_t reg, uint32_t value), TP_ARGS(did, reg, value), TP_STRUCT__entry( @@ -54,7 +54,7 @@ TRACE_EVENT(amdgpu_device_rreg, (unsigned long)__entry->value) ); -TRACE_EVENT(amdgpu_device_wreg, +TRACE_EVENT(amdgpu_mm_wreg, TP_PROTO(unsigned did, uint32_t reg, uint32_t value), TP_ARGS(did, reg, value), TP_STRUCT__entry( diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 462402fcd1a4..95f144788b14 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -58,6 +58,7 @@ #include "amdgpu_amdkfd.h" #include "amdgpu_sdma.h" #include "amdgpu_ras.h" +#include "amdgpu_atomfirmware.h" #include "bif/bif_4_1_d.h" #define AMDGPU_TTM_VRAM_MAX_DW_READ (size_t)128 @@ -1063,7 +1064,7 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) #endif } -int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, +static int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, struct ttm_buffer_object *tbo, uint64_t flags) { @@ -1241,7 +1242,7 @@ int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo) * Called by ttm_tt_unbind() on behalf of ttm_bo_move_ttm() and * ttm_tt_destroy(). */ -static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm) +static void amdgpu_ttm_backend_unbind(struct ttm_tt *ttm) { struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); struct amdgpu_ttm_tt *gtt = (void *)ttm; @@ -1252,14 +1253,13 @@ static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm) amdgpu_ttm_tt_unpin_userptr(ttm); if (gtt->offset == AMDGPU_BO_INVALID_OFFSET) - return 0; + return; /* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */ r = amdgpu_gart_unbind(adev, gtt->offset, ttm->num_pages); if (r) DRM_ERROR("failed to unbind %lu pages at 0x%08llX\n", gtt->ttm.ttm.num_pages, gtt->offset); - return r; } static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm) @@ -1773,54 +1773,86 @@ static int amdgpu_ttm_training_reserve_vram_fini(struct amdgpu_device *adev) return 0; } -static u64 amdgpu_ttm_training_get_c2p_offset(u64 vram_size) +static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev) { - if ((vram_size & (SZ_1M - 1)) < (SZ_4K + 1) ) - vram_size -= SZ_1M; + struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx; - return ALIGN(vram_size, SZ_1M); + memset(ctx, 0, sizeof(*ctx)); + + ctx->c2p_train_data_offset = + ALIGN((adev->gmc.mc_vram_size - adev->discovery_tmr_size - SZ_1M), SZ_1M); + ctx->p2c_train_data_offset = + (adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET); + ctx->train_data_size = + GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES; + + DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n", + ctx->train_data_size, + ctx->p2c_train_data_offset, + ctx->c2p_train_data_offset); } -/** - * amdgpu_ttm_training_reserve_vram_init - create bo vram reservation from memory training - * - * @adev: amdgpu_device pointer - * - * create bo vram reservation from memory training. +/* + * reserve TMR memory at the top of VRAM which holds + * IP Discovery data and is protected by PSP. */ -static int amdgpu_ttm_training_reserve_vram_init(struct amdgpu_device *adev) +static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) { int ret; struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx; + bool mem_train_support = false; - memset(ctx, 0, sizeof(*ctx)); - if (!adev->fw_vram_usage.mem_train_support) { - DRM_DEBUG("memory training does not support!\n"); - return 0; + if (!amdgpu_sriov_vf(adev)) { + ret = amdgpu_mem_train_support(adev); + if (ret == 1) + mem_train_support = true; + else if (ret == -1) + return -EINVAL; + else + DRM_DEBUG("memory training does not support!\n"); } - ctx->c2p_train_data_offset = amdgpu_ttm_training_get_c2p_offset(adev->gmc.mc_vram_size); - ctx->p2c_train_data_offset = (adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET); - ctx->train_data_size = GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES; - - DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n", - ctx->train_data_size, - ctx->p2c_train_data_offset, - ctx->c2p_train_data_offset); - - ret = amdgpu_bo_create_kernel_at(adev, + /* + * Query reserved tmr size through atom firmwareinfo for Sienna_Cichlid and onwards for all + * the use cases (IP discovery/G6 memory training/profiling/diagnostic data.etc) + * + * Otherwise, fallback to legacy approach to check and reserve tmr block for ip + * discovery data and G6 memory training data respectively + */ + adev->discovery_tmr_size = + amdgpu_atomfirmware_get_fw_reserved_fb_size(adev); + if (!adev->discovery_tmr_size) + adev->discovery_tmr_size = DISCOVERY_TMR_OFFSET; + + if (mem_train_support) { + /* reserve vram for mem train according to TMR location */ + amdgpu_ttm_training_data_block_init(adev); + ret = amdgpu_bo_create_kernel_at(adev, ctx->c2p_train_data_offset, ctx->train_data_size, AMDGPU_GEM_DOMAIN_VRAM, &ctx->c2p_bo, NULL); + if (ret) { + DRM_ERROR("alloc c2p_bo failed(%d)!\n", ret); + amdgpu_ttm_training_reserve_vram_fini(adev); + return ret; + } + ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS; + } + + ret = amdgpu_bo_create_kernel_at(adev, + adev->gmc.real_vram_size - adev->discovery_tmr_size, + adev->discovery_tmr_size, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->discovery_memory, + NULL); if (ret) { - DRM_ERROR("alloc c2p_bo failed(%d)!\n", ret); - amdgpu_ttm_training_reserve_vram_fini(adev); + DRM_ERROR("alloc tmr failed(%d)!\n", ret); + amdgpu_bo_free_kernel(&adev->discovery_memory, NULL, NULL); return ret; } - ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS; return 0; } @@ -1887,11 +1919,12 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) } /* - *The reserved vram for memory training must be pinned to the specified - *place on the VRAM, so reserve it early. + * only NAVI10 and onwards ASIC support for IP discovery. + * If IP discovery enabled, a block of memory should be + * reserved for IP discovey. */ - if (!amdgpu_sriov_vf(adev)) { - r = amdgpu_ttm_training_reserve_vram_init(adev); + if (adev->discovery_bin) { + r = amdgpu_ttm_reserve_tmr(adev); if (r) return r; } @@ -1907,21 +1940,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) if (r) return r; - /* - * reserve TMR memory at the top of VRAM which holds - * IP Discovery data and is protected by PSP. - */ - if (adev->discovery_tmr_size > 0) { - r = amdgpu_bo_create_kernel_at(adev, - adev->gmc.real_vram_size - adev->discovery_tmr_size, - adev->discovery_tmr_size, - AMDGPU_GEM_DOMAIN_VRAM, - &adev->discovery_memory, - NULL); - if (r) - return r; - } - DRM_INFO("amdgpu: %uM of VRAM memory ready\n", (unsigned) (adev->gmc.real_vram_size / (1024 * 1024))); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 65bb25e31d45..183743c5fb7b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -279,6 +279,30 @@ void amdgpu_ucode_print_psp_hdr(const struct common_firmware_header *hdr) DRM_DEBUG("kdb_size_bytes: %u\n", le32_to_cpu(psp_hdr_v1_2->kdb_size_bytes)); } + if (version_minor == 3) { + const struct psp_firmware_header_v1_1 *psp_hdr_v1_1 = + container_of(psp_hdr, struct psp_firmware_header_v1_1, v1_0); + const struct psp_firmware_header_v1_3 *psp_hdr_v1_3 = + container_of(psp_hdr_v1_1, struct psp_firmware_header_v1_3, v1_1); + DRM_DEBUG("toc_header_version: %u\n", + le32_to_cpu(psp_hdr_v1_3->v1_1.toc_header_version)); + DRM_DEBUG("toc_offset_bytes: %u\n", + le32_to_cpu(psp_hdr_v1_3->v1_1.toc_offset_bytes)); + DRM_DEBUG("toc_size_bytes: %u\n", + le32_to_cpu(psp_hdr_v1_3->v1_1.toc_size_bytes)); + DRM_DEBUG("kdb_header_version: %u\n", + le32_to_cpu(psp_hdr_v1_3->v1_1.kdb_header_version)); + DRM_DEBUG("kdb_offset_bytes: %u\n", + le32_to_cpu(psp_hdr_v1_3->v1_1.kdb_offset_bytes)); + DRM_DEBUG("kdb_size_bytes: %u\n", + le32_to_cpu(psp_hdr_v1_3->v1_1.kdb_size_bytes)); + DRM_DEBUG("spl_header_version: %u\n", + le32_to_cpu(psp_hdr_v1_3->spl_header_version)); + DRM_DEBUG("spl_offset_bytes: %u\n", + le32_to_cpu(psp_hdr_v1_3->spl_offset_bytes)); + DRM_DEBUG("spl_size_bytes: %u\n", + le32_to_cpu(psp_hdr_v1_3->spl_size_bytes)); + } } else { DRM_ERROR("Unknown PSP ucode version: %u.%u\n", version_major, version_minor); @@ -365,11 +389,12 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) case CHIP_NAVI10: case CHIP_NAVI14: case CHIP_NAVI12: + case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: if (!load_type) return AMDGPU_FW_LOAD_DIRECT; else return AMDGPU_FW_LOAD_PSP; - default: DRM_ERROR("Unknown firmware load type\n"); } @@ -448,6 +473,7 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, const struct gfx_firmware_header_v1_0 *cp_hdr = NULL; const struct dmcu_firmware_header_v1_0 *dmcu_hdr = NULL; const struct dmcub_firmware_header_v1_0 *dmcub_hdr = NULL; + const struct mes_firmware_header_v1_0 *mes_hdr = NULL; if (NULL == ucode->fw) return 0; @@ -462,12 +488,15 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, cp_hdr = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data; dmcu_hdr = (const struct dmcu_firmware_header_v1_0 *)ucode->fw->data; dmcub_hdr = (const struct dmcub_firmware_header_v1_0 *)ucode->fw->data; + mes_hdr = (const struct mes_firmware_header_v1_0 *)ucode->fw->data; if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP || (ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1 && ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2 && ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1_JT && ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2_JT && + ucode->ucode_id != AMDGPU_UCODE_ID_CP_MES && + ucode->ucode_id != AMDGPU_UCODE_ID_CP_MES_DATA && ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL && ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM && ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM && @@ -527,6 +556,16 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, ucode->ucode_size = adev->gfx.rlc.save_restore_list_srm_size_bytes; memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_srm, ucode->ucode_size); + } else if (ucode->ucode_id == AMDGPU_UCODE_ID_CP_MES) { + ucode->ucode_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes); + memcpy(ucode->kaddr, (void *)((uint8_t *)adev->mes.fw->data + + le32_to_cpu(mes_hdr->mes_ucode_offset_bytes)), + ucode->ucode_size); + } else if (ucode->ucode_id == AMDGPU_UCODE_ID_CP_MES_DATA) { + ucode->ucode_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes); + memcpy(ucode->kaddr, (void *)((uint8_t *)adev->mes.fw->data + + le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes)), + ucode->ucode_size); } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index b0e656409c03..12a8bc8fca0b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -99,6 +99,14 @@ struct psp_firmware_header_v1_2 { uint32_t kdb_size_bytes; }; +/* version_major=1, version_minor=3 */ +struct psp_firmware_header_v1_3 { + struct psp_firmware_header_v1_1 v1_1; + uint32_t spl_header_version; + uint32_t spl_offset_bytes; + uint32_t spl_size_bytes; +}; + /* version_major=1, version_minor=0 */ struct ta_firmware_header_v1_0 { struct common_firmware_header header; @@ -116,6 +124,29 @@ struct ta_firmware_header_v1_0 { uint32_t ta_dtm_size_bytes; }; +enum ta_fw_type { + TA_FW_TYPE_UNKOWN, + TA_FW_TYPE_PSP_ASD, + TA_FW_TYPE_PSP_XGMI, + TA_FW_TYPE_PSP_RAS, + TA_FW_TYPE_PSP_HDCP, + TA_FW_TYPE_PSP_DTM, +}; + +struct ta_fw_bin_desc { + uint32_t fw_type; + uint32_t fw_version; + uint32_t offset_bytes; + uint32_t size_bytes; +}; + +/* version_major=2, version_minor=0 */ +struct ta_firmware_header_v2_0 { + struct common_firmware_header header; + uint32_t ta_fw_bin_count; + struct ta_fw_bin_desc ta_fw_bin[]; +}; + /* version_major=1, version_minor=0 */ struct gfx_firmware_header_v1_0 { struct common_firmware_header header; @@ -266,7 +297,9 @@ union amdgpu_firmware_header { struct smc_firmware_header_v2_0 smc_v2_0; struct psp_firmware_header_v1_0 psp; struct psp_firmware_header_v1_1 psp_v1_1; + struct psp_firmware_header_v1_3 psp_v1_3; struct ta_firmware_header_v1_0 ta; + struct ta_firmware_header_v2_0 ta_v2_0; struct gfx_firmware_header_v1_0 gfx; struct rlc_firmware_header_v1_0 rlc; struct rlc_firmware_header_v2_0 rlc_v2_0; @@ -279,6 +312,8 @@ union amdgpu_firmware_header { uint8_t raw[0x100]; }; +#define UCODE_MAX_TA_PACKAGING ((sizeof(union amdgpu_firmware_header) - sizeof(struct common_firmware_header) - 4) / sizeof(struct ta_fw_bin_desc)) + /* * fw loading support */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 5100ebe8858d..f8bebf18ee36 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -54,6 +54,12 @@ #define FW_1_66_16 ((1 << 24) | (66 << 16) | (16 << 8)) /* Firmware Names */ +#ifdef CONFIG_DRM_AMDGPU_SI +#define FIRMWARE_TAHITI "amdgpu/tahiti_uvd.bin" +#define FIRMWARE_VERDE "amdgpu/verde_uvd.bin" +#define FIRMWARE_PITCAIRN "amdgpu/pitcairn_uvd.bin" +#define FIRMWARE_OLAND "amdgpu/oland_uvd.bin" +#endif #ifdef CONFIG_DRM_AMDGPU_CIK #define FIRMWARE_BONAIRE "amdgpu/bonaire_uvd.bin" #define FIRMWARE_KABINI "amdgpu/kabini_uvd.bin" @@ -100,6 +106,12 @@ struct amdgpu_uvd_cs_ctx { unsigned *buf_sizes; }; +#ifdef CONFIG_DRM_AMDGPU_SI +MODULE_FIRMWARE(FIRMWARE_TAHITI); +MODULE_FIRMWARE(FIRMWARE_VERDE); +MODULE_FIRMWARE(FIRMWARE_PITCAIRN); +MODULE_FIRMWARE(FIRMWARE_OLAND); +#endif #ifdef CONFIG_DRM_AMDGPU_CIK MODULE_FIRMWARE(FIRMWARE_BONAIRE); MODULE_FIRMWARE(FIRMWARE_KABINI); @@ -133,6 +145,20 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler); switch (adev->asic_type) { +#ifdef CONFIG_DRM_AMDGPU_SI + case CHIP_TAHITI: + fw_name = FIRMWARE_TAHITI; + break; + case CHIP_VERDE: + fw_name = FIRMWARE_VERDE; + break; + case CHIP_PITCAIRN: + fw_name = FIRMWARE_PITCAIRN; + break; + case CHIP_OLAND: + fw_name = FIRMWARE_OLAND; + break; +#endif #ifdef CONFIG_DRM_AMDGPU_CIK case CHIP_BONAIRE: fw_name = FIRMWARE_BONAIRE; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 2badbc0355f2..495c3d7bb2b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -42,6 +42,8 @@ #define FIRMWARE_NAVI10 "amdgpu/navi10_vcn.bin" #define FIRMWARE_NAVI14 "amdgpu/navi14_vcn.bin" #define FIRMWARE_NAVI12 "amdgpu/navi12_vcn.bin" +#define FIRMWARE_SIENNA_CICHLID "amdgpu/sienna_cichlid_vcn.bin" +#define FIRMWARE_NAVY_FLOUNDER "amdgpu/navy_flounder_vcn.bin" MODULE_FIRMWARE(FIRMWARE_RAVEN); MODULE_FIRMWARE(FIRMWARE_PICASSO); @@ -51,12 +53,14 @@ MODULE_FIRMWARE(FIRMWARE_RENOIR); MODULE_FIRMWARE(FIRMWARE_NAVI10); MODULE_FIRMWARE(FIRMWARE_NAVI14); MODULE_FIRMWARE(FIRMWARE_NAVI12); +MODULE_FIRMWARE(FIRMWARE_SIENNA_CICHLID); +MODULE_FIRMWARE(FIRMWARE_NAVY_FLOUNDER); static void amdgpu_vcn_idle_work_handler(struct work_struct *work); int amdgpu_vcn_sw_init(struct amdgpu_device *adev) { - unsigned long bo_size, fw_shared_bo_size; + unsigned long bo_size; const char *fw_name; const struct common_firmware_header *hdr; unsigned char fw_check; @@ -107,6 +111,18 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) adev->vcn.indirect_sram = true; break; + case CHIP_SIENNA_CICHLID: + fw_name = FIRMWARE_SIENNA_CICHLID; + if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) + adev->vcn.indirect_sram = true; + break; + case CHIP_NAVY_FLOUNDER: + fw_name = FIRMWARE_NAVY_FLOUNDER; + if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) + adev->vcn.indirect_sram = true; + break; default: return -EINVAL; } @@ -160,6 +176,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE; if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); + bo_size += AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)); for (i = 0; i < adev->vcn.num_vcn_inst; i++) { if (adev->vcn.harvest_config & (1 << i)) @@ -173,6 +190,11 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) return r; } + adev->vcn.inst[i].fw_shared_cpu_addr = adev->vcn.inst[i].cpu_addr + + bo_size - AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)); + adev->vcn.inst[i].fw_shared_gpu_addr = adev->vcn.inst[i].gpu_addr + + bo_size - AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)); + if (adev->vcn.indirect_sram) { r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].dpg_sram_bo, @@ -182,17 +204,6 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) return r; } } - - r = amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)), - PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].fw_shared_bo, - &adev->vcn.inst[i].fw_shared_gpu_addr, &adev->vcn.inst[i].fw_shared_cpu_addr); - if (r) { - dev_err(adev->dev, "VCN %d (%d) failed to allocate firmware shared bo\n", i, r); - return r; - } - - fw_shared_bo_size = amdgpu_bo_size(adev->vcn.inst[i].fw_shared_bo); - adev->vcn.inst[i].saved_shm_bo = kvmalloc(fw_shared_bo_size, GFP_KERNEL); } return 0; @@ -208,11 +219,6 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) if (adev->vcn.harvest_config & (1 << j)) continue; - kvfree(adev->vcn.inst[j].saved_shm_bo); - amdgpu_bo_free_kernel(&adev->vcn.inst[j].fw_shared_bo, - &adev->vcn.inst[j].fw_shared_gpu_addr, - (void **)&adev->vcn.inst[j].fw_shared_cpu_addr); - if (adev->vcn.indirect_sram) { amdgpu_bo_free_kernel(&adev->vcn.inst[j].dpg_sram_bo, &adev->vcn.inst[j].dpg_sram_gpu_addr, @@ -258,17 +264,6 @@ int amdgpu_vcn_suspend(struct amdgpu_device *adev) return -ENOMEM; memcpy_fromio(adev->vcn.inst[i].saved_bo, ptr, size); - - if (adev->vcn.inst[i].fw_shared_bo == NULL) - return 0; - - if (!adev->vcn.inst[i].saved_shm_bo) - return -ENOMEM; - - size = amdgpu_bo_size(adev->vcn.inst[i].fw_shared_bo); - ptr = adev->vcn.inst[i].fw_shared_cpu_addr; - - memcpy_fromio(adev->vcn.inst[i].saved_shm_bo, ptr, size); } return 0; } @@ -306,17 +301,6 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev) } memset_io(ptr, 0, size); } - - if (adev->vcn.inst[i].fw_shared_bo == NULL) - return -EINVAL; - - size = amdgpu_bo_size(adev->vcn.inst[i].fw_shared_bo); - ptr = adev->vcn.inst[i].fw_shared_cpu_addr; - - if (adev->vcn.inst[i].saved_shm_bo != NULL) - memcpy_toio(ptr, adev->vcn.inst[i].saved_shm_bo, size); - else - memset_io(ptr, 0, size); } return 0; } @@ -413,6 +397,10 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) unsigned i; int r; + /* VCN in SRIOV does not support direct register read/write */ + if (amdgpu_sriov_vf(adev)) + return 0; + WREG32(adev->vcn.inst[ring->me].external.scratch9, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 90aa12b22725..7a9b804bc988 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -53,7 +53,9 @@ #define VCN_ENC_CMD_REG_WAIT 0x0000000c #define VCN_VID_SOC_ADDRESS_2_0 0x1fa00 +#define VCN1_VID_SOC_ADDRESS_3_0 0x48200 #define VCN_AON_SOC_ADDRESS_2_0 0x1f800 +#define VCN1_AON_SOC_ADDRESS_3_0 0x48000 #define VCN_VID_IP_ADDRESS_2_0 0x0 #define VCN_AON_IP_ADDRESS_2_0 0x30000 @@ -65,7 +67,7 @@ /* 1 second timeout */ #define VCN_IDLE_TIMEOUT msecs_to_jiffies(1000) -#define RREG32_SOC15_DPG_MODE(ip, inst_idx, reg, mask, sram_sel) \ +#define RREG32_SOC15_DPG_MODE_1_0(ip, inst_idx, reg, mask, sram_sel) \ ({ WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_MASK, mask); \ WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_CTL, \ UVD_DPG_LMA_CTL__MASK_EN_MASK | \ @@ -75,7 +77,7 @@ RREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_DATA); \ }) -#define WREG32_SOC15_DPG_MODE(ip, inst_idx, reg, value, mask, sram_sel) \ +#define WREG32_SOC15_DPG_MODE_1_0(ip, inst_idx, reg, value, mask, sram_sel) \ do { \ WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_DATA, value); \ WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_MASK, mask); \ @@ -86,30 +88,40 @@ (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \ } while (0) -#define SOC15_DPG_MODE_OFFSET_2_0(ip, inst_idx, reg) \ +#define SOC15_DPG_MODE_OFFSET(ip, inst_idx, reg) \ ({ \ uint32_t internal_reg_offset, addr; \ - bool video_range, aon_range; \ + bool video_range, video1_range, aon_range, aon1_range; \ \ addr = (adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg); \ addr <<= 2; \ video_range = ((((0xFFFFF & addr) >= (VCN_VID_SOC_ADDRESS_2_0)) && \ ((0xFFFFF & addr) < ((VCN_VID_SOC_ADDRESS_2_0 + 0x2600))))); \ + video1_range = ((((0xFFFFF & addr) >= (VCN1_VID_SOC_ADDRESS_3_0)) && \ + ((0xFFFFF & addr) < ((VCN1_VID_SOC_ADDRESS_3_0 + 0x2600))))); \ aon_range = ((((0xFFFFF & addr) >= (VCN_AON_SOC_ADDRESS_2_0)) && \ ((0xFFFFF & addr) < ((VCN_AON_SOC_ADDRESS_2_0 + 0x600))))); \ + aon1_range = ((((0xFFFFF & addr) >= (VCN1_AON_SOC_ADDRESS_3_0)) && \ + ((0xFFFFF & addr) < ((VCN1_AON_SOC_ADDRESS_3_0 + 0x600))))); \ if (video_range) \ internal_reg_offset = ((0xFFFFF & addr) - (VCN_VID_SOC_ADDRESS_2_0) + \ (VCN_VID_IP_ADDRESS_2_0)); \ else if (aon_range) \ internal_reg_offset = ((0xFFFFF & addr) - (VCN_AON_SOC_ADDRESS_2_0) + \ (VCN_AON_IP_ADDRESS_2_0)); \ + else if (video1_range) \ + internal_reg_offset = ((0xFFFFF & addr) - (VCN1_VID_SOC_ADDRESS_3_0) + \ + (VCN_VID_IP_ADDRESS_2_0)); \ + else if (aon1_range) \ + internal_reg_offset = ((0xFFFFF & addr) - (VCN1_AON_SOC_ADDRESS_3_0) + \ + (VCN_AON_IP_ADDRESS_2_0)); \ else \ internal_reg_offset = (0xFFFFF & addr); \ \ internal_reg_offset >>= 2; \ }) -#define RREG32_SOC15_DPG_MODE_2_0(inst_idx, offset, mask_en) \ +#define RREG32_SOC15_DPG_MODE(inst_idx, offset, mask_en) \ ({ \ WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_CTL, \ (0x0 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \ @@ -118,7 +130,7 @@ RREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA); \ }) -#define WREG32_SOC15_DPG_MODE_2_0(inst_idx, offset, value, mask_en, indirect) \ +#define WREG32_SOC15_DPG_MODE(inst_idx, offset, value, mask_en, indirect) \ do { \ if (!indirect) { \ WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA, value); \ @@ -142,6 +154,7 @@ enum fw_queue_mode { enum engine_status_constants { UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON = 0x2AAAA0, UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON_2_0 = 0xAAAA0, + UVD_PGFSM_STATUS__UVDM_UVDU_UVDLM_PWR_ON_3_0 = 0x2A2A8AA0, UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON = 0x00000002, UVD_STATUS__UVD_BUSY = 0x00000004, GB_ADDR_CONFIG_DEFAULT = 0x26010011, @@ -186,7 +199,6 @@ struct amdgpu_vcn_inst { struct amdgpu_irq_src irq; struct amdgpu_vcn_reg external; struct amdgpu_bo *dpg_sram_bo; - struct amdgpu_bo *fw_shared_bo; struct dpg_pause_state pause_state; void *dpg_sram_cpu_addr; uint64_t dpg_sram_gpu_addr; @@ -194,7 +206,6 @@ struct amdgpu_vcn_inst { atomic_t dpg_enc_submission_cnt; void *fw_shared_cpu_addr; uint64_t fw_shared_gpu_addr; - void *saved_shm_bo; }; struct amdgpu_vcn { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index f3b38c9e04ca..1203c20491e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -26,6 +26,10 @@ #include <drm/drm_drv.h> #include "amdgpu.h" +#include "amdgpu_ras.h" +#include "vi.h" +#include "soc15.h" +#include "nv.h" bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev) { @@ -255,12 +259,171 @@ int amdgpu_virt_fw_reserve_get_checksum(void *obj, return ret; } +static int amdgpu_virt_init_ras_err_handler_data(struct amdgpu_device *adev) +{ + struct amdgpu_virt *virt = &adev->virt; + struct amdgpu_virt_ras_err_handler_data **data = &virt->virt_eh_data; + /* GPU will be marked bad on host if bp count more then 10, + * so alloc 512 is enough. + */ + unsigned int align_space = 512; + void *bps = NULL; + struct amdgpu_bo **bps_bo = NULL; + + *data = kmalloc(sizeof(struct amdgpu_virt_ras_err_handler_data), GFP_KERNEL); + if (!*data) + return -ENOMEM; + + bps = kmalloc(align_space * sizeof((*data)->bps), GFP_KERNEL); + bps_bo = kmalloc(align_space * sizeof((*data)->bps_bo), GFP_KERNEL); + + if (!bps || !bps_bo) { + kfree(bps); + kfree(bps_bo); + kfree(*data); + return -ENOMEM; + } + + (*data)->bps = bps; + (*data)->bps_bo = bps_bo; + (*data)->count = 0; + (*data)->last_reserved = 0; + + virt->ras_init_done = true; + + return 0; +} + +static void amdgpu_virt_ras_release_bp(struct amdgpu_device *adev) +{ + struct amdgpu_virt *virt = &adev->virt; + struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data; + struct amdgpu_bo *bo; + int i; + + if (!data) + return; + + for (i = data->last_reserved - 1; i >= 0; i--) { + bo = data->bps_bo[i]; + amdgpu_bo_free_kernel(&bo, NULL, NULL); + data->bps_bo[i] = bo; + data->last_reserved = i; + } +} + +void amdgpu_virt_release_ras_err_handler_data(struct amdgpu_device *adev) +{ + struct amdgpu_virt *virt = &adev->virt; + struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data; + + virt->ras_init_done = false; + + if (!data) + return; + + amdgpu_virt_ras_release_bp(adev); + + kfree(data->bps); + kfree(data->bps_bo); + kfree(data); + virt->virt_eh_data = NULL; +} + +static void amdgpu_virt_ras_add_bps(struct amdgpu_device *adev, + struct eeprom_table_record *bps, int pages) +{ + struct amdgpu_virt *virt = &adev->virt; + struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data; + + if (!data) + return; + + memcpy(&data->bps[data->count], bps, pages * sizeof(*data->bps)); + data->count += pages; +} + +static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev) +{ + struct amdgpu_virt *virt = &adev->virt; + struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data; + struct amdgpu_bo *bo = NULL; + uint64_t bp; + int i; + + if (!data) + return; + + for (i = data->last_reserved; i < data->count; i++) { + bp = data->bps[i].retired_page; + + /* There are two cases of reserve error should be ignored: + * 1) a ras bad page has been allocated (used by someone); + * 2) a ras bad page has been reserved (duplicate error injection + * for one page); + */ + if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT, + AMDGPU_GPU_PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &bo, NULL)) + DRM_DEBUG("RAS WARN: reserve vram for retired page %llx fail\n", bp); + + data->bps_bo[i] = bo; + data->last_reserved = i + 1; + bo = NULL; + } +} + +static bool amdgpu_virt_ras_check_bad_page(struct amdgpu_device *adev, + uint64_t retired_page) +{ + struct amdgpu_virt *virt = &adev->virt; + struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data; + int i; + + if (!data) + return true; + + for (i = 0; i < data->count; i++) + if (retired_page == data->bps[i].retired_page) + return true; + + return false; +} + +static void amdgpu_virt_add_bad_page(struct amdgpu_device *adev, + uint64_t bp_block_offset, uint32_t bp_block_size) +{ + struct eeprom_table_record bp; + uint64_t retired_page; + uint32_t bp_idx, bp_cnt; + + if (bp_block_size) { + bp_cnt = bp_block_size / sizeof(uint64_t); + for (bp_idx = 0; bp_idx < bp_cnt; bp_idx++) { + retired_page = *(uint64_t *)(adev->fw_vram_usage.va + + bp_block_offset + bp_idx * sizeof(uint64_t)); + bp.retired_page = retired_page; + + if (amdgpu_virt_ras_check_bad_page(adev, retired_page)) + continue; + + amdgpu_virt_ras_add_bps(adev, &bp, 1); + + amdgpu_virt_ras_reserve_bps(adev); + } + } +} + void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev) { uint32_t pf2vf_size = 0; uint32_t checksum = 0; uint32_t checkval; char *str; + uint64_t bp_block_offset = 0; + uint32_t bp_block_size = 0; + struct amdgim_pf2vf_info_v2 *pf2vf_v2 = NULL; adev->virt.fw_reserve.p_pf2vf = NULL; adev->virt.fw_reserve.p_vf2pf = NULL; @@ -275,6 +438,19 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev) /* pf2vf message must be in 4K */ if (pf2vf_size > 0 && pf2vf_size < 4096) { + if (adev->virt.fw_reserve.p_pf2vf->version == 2) { + pf2vf_v2 = (struct amdgim_pf2vf_info_v2 *)adev->virt.fw_reserve.p_pf2vf; + bp_block_offset = ((uint64_t)pf2vf_v2->bp_block_offset_L & 0xFFFFFFFF) | + ((((uint64_t)pf2vf_v2->bp_block_offset_H) << 32) & 0xFFFFFFFF00000000); + bp_block_size = pf2vf_v2->bp_block_size; + + if (bp_block_size && !adev->virt.ras_init_done) + amdgpu_virt_init_ras_err_handler_data(adev); + + if (adev->virt.ras_init_done) + amdgpu_virt_add_bad_page(adev, bp_block_offset, bp_block_size); + } + checkval = amdgpu_virt_fw_reserve_get_checksum( adev->virt.fw_reserve.p_pf2vf, pf2vf_size, adev->virt.fw_reserve.checksum_key, checksum); @@ -321,6 +497,7 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev) case CHIP_VEGA20: case CHIP_NAVI10: case CHIP_NAVI12: + case CHIP_SIENNA_CICHLID: case CHIP_ARCTURUS: reg = RREG32(mmRCC_IOV_FUNC_IDENTIFIER); break; @@ -339,14 +516,39 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev) if (is_virtual_machine()) /* passthrough mode exclus sriov mod */ adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE; } + + /* we have the ability to check now */ + if (amdgpu_sriov_vf(adev)) { + switch (adev->asic_type) { + case CHIP_TONGA: + case CHIP_FIJI: + vi_set_virt_ops(adev); + break; + case CHIP_VEGA10: + case CHIP_VEGA20: + case CHIP_ARCTURUS: + soc15_set_virt_ops(adev); + break; + case CHIP_NAVI10: + case CHIP_NAVI12: + case CHIP_SIENNA_CICHLID: + nv_set_virt_ops(adev); + /* try send GPU_INIT_DATA request to host */ + amdgpu_virt_request_init_data(adev); + break; + default: /* other chip doesn't support SRIOV */ + DRM_ERROR("Unknown asic type: %d!\n", adev->asic_type); + break; + } + } } -bool amdgpu_virt_access_debugfs_is_mmio(struct amdgpu_device *adev) +static bool amdgpu_virt_access_debugfs_is_mmio(struct amdgpu_device *adev) { return amdgpu_sriov_is_debug(adev) ? true : false; } -bool amdgpu_virt_access_debugfs_is_kiq(struct amdgpu_device *adev) +static bool amdgpu_virt_access_debugfs_is_kiq(struct amdgpu_device *adev) { return amdgpu_sriov_is_normal(adev) ? true : false; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index b90e822cebd7..f826945989c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -143,19 +143,27 @@ struct amdgim_pf2vf_info_v2 { uint32_t vce_enc_max_pixels_count; /* 16x16 pixels/sec, codec independent */ uint32_t vce_enc_max_bandwidth; + /* Bad pages block position in BYTE */ + uint32_t bp_block_offset_L; + uint32_t bp_block_offset_H; + /* Bad pages block size in BYTE */ + uint32_t bp_block_size; /* MEC FW position in kb from the start of VF visible frame buffer */ - uint64_t mecfw_kboffset; + uint32_t mecfw_kboffset_L; + uint32_t mecfw_kboffset_H; /* MEC FW size in KB */ uint32_t mecfw_ksize; /* UVD FW position in kb from the start of VF visible frame buffer */ - uint64_t uvdfw_kboffset; + uint32_t uvdfw_kboffset_L; + uint32_t uvdfw_kboffset_H; /* UVD FW size in KB */ uint32_t uvdfw_ksize; /* VCE FW position in kb from the start of VF visible frame buffer */ - uint64_t vcefw_kboffset; + uint32_t vcefw_kboffset_L; + uint32_t vcefw_kboffset_H; /* VCE FW size in KB */ uint32_t vcefw_ksize; - uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256, 0, 0, (9 + sizeof(struct amd_sriov_msg_pf2vf_info_header)/sizeof(uint32_t)), 3)]; + uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256, 0, 0, (18 + sizeof(struct amd_sriov_msg_pf2vf_info_header)/sizeof(uint32_t)), 0)]; } __aligned(4); @@ -254,6 +262,17 @@ typedef struct amdgim_vf2pf_info_v2 amdgim_vf2pf_info ; } \ } while (0) +struct amdgpu_virt_ras_err_handler_data { + /* point to bad page records array */ + struct eeprom_table_record *bps; + /* point to reserved bo array */ + struct amdgpu_bo **bps_bo; + /* the count of entries */ + int count; + /* last reserved entry's index + 1 */ + int last_reserved; +}; + /* GPU virtualization */ struct amdgpu_virt { uint32_t caps; @@ -272,6 +291,8 @@ struct amdgpu_virt { uint32_t reg_access_mode; int req_init_data_ver; bool tdr_debug; + struct amdgpu_virt_ras_err_handler_data *virt_eh_data; + bool ras_init_done; }; #define amdgpu_sriov_enabled(adev) \ @@ -323,6 +344,7 @@ void amdgpu_virt_free_mm_table(struct amdgpu_device *adev); int amdgpu_virt_fw_reserve_get_checksum(void *obj, unsigned long obj_size, unsigned int key, unsigned int chksum); +void amdgpu_virt_release_ras_err_handler_data(struct amdgpu_device *adev); void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev); void amdgpu_detect_virtualization(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 920a0553e172..8bc2253939be 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2208,7 +2208,8 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, /* make sure object fit at this offset */ eaddr = saddr + size - 1; if (saddr >= eaddr || - (bo && offset + size > amdgpu_bo_size(bo))) + (bo && offset + size > amdgpu_bo_size(bo)) || + (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT)) return -EINVAL; saddr /= AMDGPU_GPU_PAGE_SIZE; @@ -2273,7 +2274,8 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, /* make sure object fit at this offset */ eaddr = saddr + size - 1; if (saddr >= eaddr || - (bo && offset + size > amdgpu_bo_size(bo))) + (bo && offset + size > amdgpu_bo_size(bo)) || + (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT)) return -EINVAL; /* Allocate all the needed memory */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index c8e68d7890bf..770025a5e500 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -324,6 +324,7 @@ struct amdgpu_vm { struct amdgpu_vm_manager { /* Handling of VMIDs */ struct amdgpu_vmid_mgr id_mgr[AMDGPU_MAX_VMHUBS]; + unsigned int first_kfd_vmid; /* Handling of VM fences */ u64 fence_context; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c index e06412f5066e..189d46ea603b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -208,7 +208,7 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, int r; /* Wait for PD/PT moves to be completed */ - r = amdgpu_sync_fence(&p->job->sync, bo->tbo.moving, false); + r = amdgpu_sync_fence(&p->job->sync, bo->tbo.moving); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index ff70ae00e602..7574be6cd7a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -422,8 +422,6 @@ error: * amdgpu_vram_mgr_del - free ranges * * @man: TTM memory type manager - * @tbo: TTM BO we need this range for - * @place: placement flags and restrictions * @mem: TTM memory object * * Free the allocated VRAM again. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 91837a991319..e3a3755cb999 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -144,11 +144,6 @@ static const struct amdgpu_pcs_ras_field wafl_pcs_ras_fields[] = { SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, RecoveryRelockAttemptErr)}, }; -void *amdgpu_xgmi_hive_try_lock(struct amdgpu_hive_info *hive) -{ - return &hive->device_list; -} - /** * DOC: AMDGPU XGMI Support * diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c new file mode 100644 index 000000000000..939eca63b094 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c @@ -0,0 +1,101 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "athub_v2_1.h" + +#include "athub/athub_2_1_0_offset.h" +#include "athub/athub_2_1_0_sh_mask.h" +#include "navi10_enum.h" + +#include "soc15_common.h" + +static void +athub_v2_1_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) + data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK; + else + data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK; + + if (def != data) + WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data); +} + +static void +athub_v2_1_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) && + (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)) + data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; + else + data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; + + if(def != data) + WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data); +} + +int athub_v2_1_set_clockgating(struct amdgpu_device *adev, + enum amd_clockgating_state state) +{ + if (amdgpu_sriov_vf(adev)) + return 0; + + switch (adev->asic_type) { + case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: + athub_v2_1_update_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE ? true : false); + athub_v2_1_update_medium_grain_light_sleep(adev, + state == AMD_CG_STATE_GATE ? true : false); + break; + default: + break; + } + + return 0; +} + +void athub_v2_1_get_clockgating(struct amdgpu_device *adev, u32 *flags) +{ + int data; + + /* AMD_CG_SUPPORT_ATHUB_MGCG */ + data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); + if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_ATHUB_MGCG; + + /* AMD_CG_SUPPORT_ATHUB_LS */ + if (data & ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_ATHUB_LS; +} diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.h b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.h new file mode 100644 index 000000000000..5e6824c0f591 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.h @@ -0,0 +1,30 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __ATHUB_V2_1_H__ +#define __ATHUB_V2_1_H__ + +int athub_v2_1_set_clockgating(struct amdgpu_device *adev, + enum amd_clockgating_state state); +void athub_v2_1_get_clockgating(struct amdgpu_device *adev, u32 *flags); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index fe306d0f73f7..c2c67ab68a43 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1326,6 +1326,14 @@ cik_asic_reset_method(struct amdgpu_device *adev) { bool baco_reset; + if (amdgpu_reset_method == AMD_RESET_METHOD_LEGACY || + amdgpu_reset_method == AMD_RESET_METHOD_BACO) + return amdgpu_reset_method; + + if (amdgpu_reset_method != -1) + dev_warn(adev->dev, "Specified reset:%d isn't supported, using AUTO instead.\n", + amdgpu_reset_method); + switch (adev->asic_type) { case CHIP_BONAIRE: case CHIP_HAWAII: diff --git a/drivers/gpu/drm/amd/amdgpu/cikd.h b/drivers/gpu/drm/amd/amdgpu/cikd.h index 5f3f6ebfb387..55982c0064b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/cikd.h +++ b/drivers/gpu/drm/amd/amdgpu/cikd.h @@ -54,8 +54,6 @@ #define BONAIRE_GB_ADDR_CONFIG_GOLDEN 0x12010001 #define HAWAII_GB_ADDR_CONFIG_GOLDEN 0x12011003 -#define AMDGPU_NUM_OF_VMIDS 8 - #define PIPEID(x) ((x) << 0) #define MEID(x) ((x) << 2) #define VMID(x) ((x) << 4) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index ec61532e2f83..01ce52266966 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -2342,7 +2342,7 @@ static u32 dce_v11_0_pick_pll(struct drm_crtc *crtc) /* XXX need to determine what plls are available on each DCE11 part */ pll_in_use = amdgpu_pll_get_use_mask(crtc); - if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY) { + if (adev->flags & AMD_IS_APU) { if (!(pll_in_use & (1 << ATOM_PPLL1))) return ATOM_PPLL1; if (!(pll_in_use & (1 << ATOM_PPLL0))) diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c index a7b8292cefee..1ab261836983 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c @@ -560,7 +560,7 @@ static void df_v3_6_pmc_release_cntr(struct amdgpu_device *adev, static void df_v3_6_reset_perfmon_cntr(struct amdgpu_device *adev, uint64_t config) { - uint32_t lo_base_addr, hi_base_addr; + uint32_t lo_base_addr = 0, hi_base_addr = 0; df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr, &hi_base_addr); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index bd5dd4f64311..65997ffaed45 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -55,6 +55,7 @@ * 2. Async ring */ #define GFX10_NUM_GFX_RINGS_NV1X 1 +#define GFX10_NUM_GFX_RINGS_Sienna_Cichlid 1 #define GFX10_MEC_HPD_SIZE 2048 #define F32_CE_PROGRAM_RAM_SIZE 65536 @@ -62,6 +63,54 @@ #define mmCGTT_GS_NGG_CLK_CTRL 0x5087 #define mmCGTT_GS_NGG_CLK_CTRL_BASE_IDX 1 +#define mmCGTT_SPI_RA0_CLK_CTRL 0x507a +#define mmCGTT_SPI_RA0_CLK_CTRL_BASE_IDX 1 +#define mmCGTT_SPI_RA1_CLK_CTRL 0x507b +#define mmCGTT_SPI_RA1_CLK_CTRL_BASE_IDX 1 + +#define GB_ADDR_CONFIG__NUM_PKRS__SHIFT 0x8 +#define GB_ADDR_CONFIG__NUM_PKRS_MASK 0x00000700L + +#define mmCP_MEC_CNTL_Sienna_Cichlid 0x0f55 +#define mmCP_MEC_CNTL_Sienna_Cichlid_BASE_IDX 0 +#define mmRLC_SAFE_MODE_Sienna_Cichlid 0x4ca0 +#define mmRLC_SAFE_MODE_Sienna_Cichlid_BASE_IDX 1 +#define mmRLC_CP_SCHEDULERS_Sienna_Cichlid 0x4ca1 +#define mmRLC_CP_SCHEDULERS_Sienna_Cichlid_BASE_IDX 1 +#define mmSPI_CONFIG_CNTL_Sienna_Cichlid 0x11ec +#define mmSPI_CONFIG_CNTL_Sienna_Cichlid_BASE_IDX 0 +#define mmVGT_ESGS_RING_SIZE_Sienna_Cichlid 0x0fc1 +#define mmVGT_ESGS_RING_SIZE_Sienna_Cichlid_BASE_IDX 0 +#define mmVGT_GSVS_RING_SIZE_Sienna_Cichlid 0x0fc2 +#define mmVGT_GSVS_RING_SIZE_Sienna_Cichlid_BASE_IDX 0 +#define mmVGT_TF_RING_SIZE_Sienna_Cichlid 0x0fc3 +#define mmVGT_TF_RING_SIZE_Sienna_Cichlid_BASE_IDX 0 +#define mmVGT_HS_OFFCHIP_PARAM_Sienna_Cichlid 0x0fc4 +#define mmVGT_HS_OFFCHIP_PARAM_Sienna_Cichlid_BASE_IDX 0 +#define mmVGT_TF_MEMORY_BASE_Sienna_Cichlid 0x0fc5 +#define mmVGT_TF_MEMORY_BASE_Sienna_Cichlid_BASE_IDX 0 +#define mmVGT_TF_MEMORY_BASE_HI_Sienna_Cichlid 0x0fc6 +#define mmVGT_TF_MEMORY_BASE_HI_Sienna_Cichlid_BASE_IDX 0 +#define GRBM_STATUS2__RLC_BUSY_Sienna_Cichlid__SHIFT 0x1a +#define GRBM_STATUS2__RLC_BUSY_Sienna_Cichlid_MASK 0x04000000L +#define CP_RB_DOORBELL_RANGE_LOWER__DOORBELL_RANGE_LOWER_Sienna_Cichlid_MASK 0x00000FFCL +#define CP_RB_DOORBELL_RANGE_LOWER__DOORBELL_RANGE_LOWER_Sienna_Cichlid__SHIFT 0x2 +#define CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_Sienna_Cichlid_MASK 0x00000FFCL +#define mmGCR_GENERAL_CNTL_Sienna_Cichlid 0x1580 +#define mmGCR_GENERAL_CNTL_Sienna_Cichlid_BASE_IDX 0 + +#define mmCP_HYP_PFP_UCODE_ADDR 0x5814 +#define mmCP_HYP_PFP_UCODE_ADDR_BASE_IDX 1 +#define mmCP_HYP_PFP_UCODE_DATA 0x5815 +#define mmCP_HYP_PFP_UCODE_DATA_BASE_IDX 1 +#define mmCP_HYP_CE_UCODE_ADDR 0x5818 +#define mmCP_HYP_CE_UCODE_ADDR_BASE_IDX 1 +#define mmCP_HYP_CE_UCODE_DATA 0x5819 +#define mmCP_HYP_CE_UCODE_DATA_BASE_IDX 1 +#define mmCP_HYP_ME_UCODE_ADDR 0x5816 +#define mmCP_HYP_ME_UCODE_ADDR_BASE_IDX 1 +#define mmCP_HYP_ME_UCODE_DATA 0x5817 +#define mmCP_HYP_ME_UCODE_DATA_BASE_IDX 1 MODULE_FIRMWARE("amdgpu/navi10_ce.bin"); MODULE_FIRMWARE("amdgpu/navi10_pfp.bin"); @@ -89,6 +138,20 @@ MODULE_FIRMWARE("amdgpu/navi12_mec.bin"); MODULE_FIRMWARE("amdgpu/navi12_mec2.bin"); MODULE_FIRMWARE("amdgpu/navi12_rlc.bin"); +MODULE_FIRMWARE("amdgpu/sienna_cichlid_ce.bin"); +MODULE_FIRMWARE("amdgpu/sienna_cichlid_pfp.bin"); +MODULE_FIRMWARE("amdgpu/sienna_cichlid_me.bin"); +MODULE_FIRMWARE("amdgpu/sienna_cichlid_mec.bin"); +MODULE_FIRMWARE("amdgpu/sienna_cichlid_mec2.bin"); +MODULE_FIRMWARE("amdgpu/sienna_cichlid_rlc.bin"); + +MODULE_FIRMWARE("amdgpu/navy_flounder_ce.bin"); +MODULE_FIRMWARE("amdgpu/navy_flounder_pfp.bin"); +MODULE_FIRMWARE("amdgpu/navy_flounder_me.bin"); +MODULE_FIRMWARE("amdgpu/navy_flounder_mec.bin"); +MODULE_FIRMWARE("amdgpu/navy_flounder_mec2.bin"); +MODULE_FIRMWARE("amdgpu/navy_flounder_rlc.bin"); + static const struct soc15_reg_golden golden_settings_gc_10_1[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014), @@ -3013,6 +3076,93 @@ static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_1_2_nv12[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xFFFFFFFF, 0xe0000000) }; +static const struct soc15_reg_golden golden_settings_gc_10_3[] = +{ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_PS_CLK_CTRL, 0xff7f0fff, 0x78000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0xff7f0fff, 0x30000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA1_CLK_CTRL, 0xff7f0fff, 0x7e000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_EXCEPTION_CONTROL, 0x7fff0f1f, 0x00b80000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Sienna_Cichlid, 0x1ff1ffff, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PC_CNTL, 0x003fffff, 0x00280400), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0xff8fff0f, 0x580f1008), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xf7ffffff, 0x10f80988), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0xf17fffff, 0x01200007), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0xffffffbf, 0x00000820), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffffffff, 0x00070104), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xe07df47f, 0x00180070), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER0_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER1_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER10_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER11_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER12_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER13_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER14_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER15_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER2_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER3_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER4_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER5_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER6_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER7_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER8_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffbfffff, 0x00a00000) +}; + +static const struct soc15_reg_golden golden_settings_gc_10_3_sienna_cichlid[] = +{ + /* Pending on emulation bring up */ +}; + +static const struct soc15_reg_golden golden_settings_gc_10_3_2[] = +{ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_PS_CLK_CTRL, 0xff7f0fff, 0x78000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0xff7f0fff, 0x30000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA1_CLK_CTRL, 0xff7f0fff, 0x7e000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_EXCEPTION_CONTROL, 0x7fff0f1f, 0x00b80000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Sienna_Cichlid, 0x1ff1ffff, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PC_CNTL, 0x003fffff, 0x00280400), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0xff8fff0f, 0x580f1008), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xf7ffffff, 0x00f80988), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0xf17fffff, 0x01200007), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0xffffffbf, 0x00000820), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffffffff, 0x00070104), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_START_PHASE, 0x000000ff, 0x00000004), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xe07df47f, 0x00180070), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER0_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER1_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER10_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER11_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER12_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER13_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER14_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER15_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER2_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER3_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER4_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER5_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER6_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER7_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER8_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffbfffff, 0x00a00000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff) +}; + #define DEFAULT_SH_MEM_CONFIG \ ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ @@ -3193,6 +3343,20 @@ static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_gc_rlc_spm_10_1_2_nv12, (const u32)ARRAY_SIZE(golden_settings_gc_rlc_spm_10_1_2_nv12)); break; + case CHIP_SIENNA_CICHLID: + soc15_program_register_sequence(adev, + golden_settings_gc_10_3, + (const u32)ARRAY_SIZE(golden_settings_gc_10_3)); + soc15_program_register_sequence(adev, + golden_settings_gc_10_3_sienna_cichlid, + (const u32)ARRAY_SIZE(golden_settings_gc_10_3_sienna_cichlid)); + break; + case CHIP_NAVY_FLOUNDER: + soc15_program_register_sequence(adev, + golden_settings_gc_10_3_2, + (const u32)ARRAY_SIZE(golden_settings_gc_10_3_2)); + break; + default: break; } @@ -3373,6 +3537,10 @@ static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev) (adev->gfx.mec_feature_version >= 27)) adev->gfx.cp_fw_write_wait = true; break; + case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: + adev->gfx.cp_fw_write_wait = true; + break; default: break; } @@ -3463,6 +3631,12 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) case CHIP_NAVI12: chip_name = "navi12"; break; + case CHIP_SIENNA_CICHLID: + chip_name = "sienna_cichlid"; + break; + case CHIP_NAVY_FLOUNDER: + chip_name = "navy_flounder"; + break; default: BUG(); } @@ -3992,6 +4166,17 @@ static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev) adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); break; + case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: + adev->gfx.config.max_hw_contexts = 8; + adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; + adev->gfx.config.sc_prim_fifo_size_backend = 0x100; + adev->gfx.config.sc_hiz_tile_fifo_size = 0; + adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; + gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); + adev->gfx.config.gb_addr_config_fields.num_pkrs = + 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS); + break; default: BUG(); break; @@ -4104,6 +4289,15 @@ static int gfx_v10_0_sw_init(void *handle) adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 8; break; + case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: + adev->gfx.me.num_me = 1; + adev->gfx.me.num_pipe_per_me = 1; + adev->gfx.me.num_queue_per_pipe = 1; + adev->gfx.mec.num_mec = 2; + adev->gfx.mec.num_pipe_per_mec = 4; + adev->gfx.mec.num_queue_per_pipe = 4; + break; default: adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; @@ -4273,12 +4467,6 @@ static int gfx_v10_0_sw_fini(void *handle) return 0; } - -static void gfx_v10_0_tiling_mode_table_init(struct amdgpu_device *adev) -{ - /* TODO */ -} - static void gfx_v10_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance) { @@ -4355,6 +4543,12 @@ static u32 gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device *ade uint32_t num_packer_per_sc; uint32_t pa_sc_tile_steering_override; + /* for ASICs that integrates GFX v10.3 + * pa_sc_tile_steering_override should be set to 0 */ + if (adev->asic_type == CHIP_SIENNA_CICHLID || + adev->asic_type == CHIP_NAVY_FLOUNDER) + return 0; + /* init num_sc */ num_sc = adev->gfx.config.max_shader_engines * adev->gfx.config.max_sh_per_se * adev->gfx.config.num_sc_per_sh; @@ -4380,8 +4574,6 @@ static u32 gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device *ade } #define DEFAULT_SH_MEM_BASES (0x6000) -#define FIRST_COMPUTE_VMID (8) -#define LAST_COMPUTE_VMID (16) static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev) { @@ -4397,7 +4589,7 @@ static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev) sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); mutex_lock(&adev->srbm_mutex); - for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { + for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { nv_grbm_select(adev, 0, 0, 0, i); /* CP and shaders */ WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); @@ -4408,7 +4600,7 @@ static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev) /* Initialize all compute VMIDs to have no GDS, GWS, or OA acccess. These should be enabled by FW for target VMIDs. */ - for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { + for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0); WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0); WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0); @@ -4523,8 +4715,6 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev) WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); - gfx_v10_0_tiling_mode_table_init(adev); - gfx_v10_0_setup_rb(adev); gfx_v10_0_get_cu_info(adev, &adev->gfx.cu_info); gfx_v10_0_get_tcc_info(adev); @@ -4558,7 +4748,12 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev) static void gfx_v10_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, bool enable) { - u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); + u32 tmp; + + if (amdgpu_sriov_vf(adev)) + return; + + tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); @@ -4577,12 +4772,19 @@ static int gfx_v10_0_init_csb(struct amdgpu_device *adev) adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); /* csib */ - WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_ADDR_HI, - adev->gfx.rlc.clear_state_gpu_addr >> 32); - WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_ADDR_LO, - adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); - WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size); - + if (adev->asic_type == CHIP_NAVI12) { + WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_ADDR_HI, + adev->gfx.rlc.clear_state_gpu_addr >> 32); + WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_ADDR_LO, + adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); + WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size); + } else { + WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_HI, + adev->gfx.rlc.clear_state_gpu_addr >> 32); + WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_LO, + adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); + WREG32_SOC15(GC, 0, mmRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size); + } return 0; } @@ -5190,7 +5392,12 @@ static int gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1); - WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp); + + if (adev->asic_type == CHIP_NAVI12) { + WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp); + } else { + WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp); + } for (i = 0; i < adev->usec_timeout; i++) { if (RREG32_SOC15(GC, 0, mmCP_STAT) == 0) @@ -5271,6 +5478,14 @@ static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_HI, upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); + WREG32_SOC15(GC, 0, mmCP_HYP_PFP_UCODE_ADDR, 0); + + for (i = 0; i < pfp_hdr->jt_size; i++) + WREG32_SOC15(GC, 0, mmCP_HYP_PFP_UCODE_DATA, + le32_to_cpup(fw_data + pfp_hdr->jt_offset + i)); + + WREG32_SOC15(GC, 0, mmCP_HYP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); + return 0; } @@ -5340,6 +5555,14 @@ static int gfx_v10_0_cp_gfx_load_ce_microcode(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_HI, upper_32_bits(adev->gfx.ce.ce_fw_gpu_addr)); + WREG32_SOC15(GC, 0, mmCP_HYP_CE_UCODE_ADDR, 0); + + for (i = 0; i < ce_hdr->jt_size; i++) + WREG32_SOC15(GC, 0, mmCP_HYP_CE_UCODE_DATA, + le32_to_cpup(fw_data + ce_hdr->jt_offset + i)); + + WREG32_SOC15(GC, 0, mmCP_HYP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); + return 0; } @@ -5409,6 +5632,14 @@ static int gfx_v10_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_HI, upper_32_bits(adev->gfx.me.me_fw_gpu_addr)); + WREG32_SOC15(GC, 0, mmCP_HYP_ME_UCODE_ADDR, 0); + + for (i = 0; i < me_hdr->jt_size; i++) + WREG32_SOC15(GC, 0, mmCP_HYP_ME_UCODE_DATA, + le32_to_cpup(fw_data + me_hdr->jt_offset + i)); + + WREG32_SOC15(GC, 0, mmCP_HYP_ME_UCODE_ADDR, adev->gfx.me_fw_version); + return 0; } @@ -5549,12 +5780,25 @@ static void gfx_v10_0_cp_gfx_set_doorbell(struct amdgpu_device *adev, DOORBELL_EN, 0); } WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp); - tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, - DOORBELL_RANGE_LOWER, ring->doorbell_index); - WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); + switch (adev->asic_type) { + case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: + tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, + DOORBELL_RANGE_LOWER_Sienna_Cichlid, ring->doorbell_index); + WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); + + WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER, + CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_Sienna_Cichlid_MASK); + break; + default: + tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, + DOORBELL_RANGE_LOWER, ring->doorbell_index); + WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); - WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER, - CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); + WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER, + CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); + break; + } } static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev) @@ -5669,11 +5913,29 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev) static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) { if (enable) { - WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, 0); + switch (adev->asic_type) { + case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: + WREG32_SOC15(GC, 0, mmCP_MEC_CNTL_Sienna_Cichlid, 0); + break; + default: + WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, 0); + break; + } } else { - WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, - (CP_MEC_CNTL__MEC_ME1_HALT_MASK | - CP_MEC_CNTL__MEC_ME2_HALT_MASK)); + switch (adev->asic_type) { + case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: + WREG32_SOC15(GC, 0, mmCP_MEC_CNTL_Sienna_Cichlid, + (CP_MEC_CNTL__MEC_ME1_HALT_MASK | + CP_MEC_CNTL__MEC_ME2_HALT_MASK)); + break; + default: + WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, + (CP_MEC_CNTL__MEC_ME1_HALT_MASK | + CP_MEC_CNTL__MEC_ME2_HALT_MASK)); + break; + } adev->gfx.kiq.ring.sched.ready = false; } udelay(50); @@ -5755,12 +6017,25 @@ static void gfx_v10_0_kiq_setting(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; /* tell RLC which is KIQ queue */ - tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); - tmp &= 0xffffff00; - tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); - WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); - tmp |= 0x80; - WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); + switch (adev->asic_type) { + case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: + tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid); + tmp &= 0xffffff00; + tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); + WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp); + tmp |= 0x80; + WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp); + break; + default: + tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); + tmp &= 0xffffff00; + tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); + WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); + tmp |= 0x80; + WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); + break; + } } static int gfx_v10_0_gfx_mqd_init(struct amdgpu_ring *ring) @@ -6446,18 +6721,34 @@ static bool gfx_v10_0_check_grbm_cam_remapping(struct amdgpu_device *adev) /* check if mmVGT_ESGS_RING_SIZE_UMD * has been remapped to mmVGT_ESGS_RING_SIZE */ - data = RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE); - - WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE, 0); - - WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, pattern); + switch (adev->asic_type) { + case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: + data = RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid); + WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid, 0); + WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, pattern); + + if (RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid) == pattern) { + WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD , data); + return true; + } else { + WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid, data); + return false; + } + break; + default: + data = RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE); + WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE, 0); + WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, pattern); - if (RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE) == pattern) { - WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, data); - return true; - } else { - WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE, data); - return false; + if (RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE) == pattern) { + WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, data); + return true; + } else { + WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE, data); + return false; + } + break; } } @@ -6469,59 +6760,120 @@ static void gfx_v10_0_setup_grbm_cam_remapping(struct amdgpu_device *adev) * index will auto-inc after each data writting */ WREG32_SOC15(GC, 0, mmGRBM_CAM_INDEX, 0); - /* mmVGT_TF_RING_SIZE_UMD -> mmVGT_TF_RING_SIZE */ - data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE_UMD) << - GRBM_CAM_DATA__CAM_ADDR__SHIFT) | - (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE) << - GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); - WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); - WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); - - /* mmVGT_TF_MEMORY_BASE_UMD -> mmVGT_TF_MEMORY_BASE */ - data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_UMD) << - GRBM_CAM_DATA__CAM_ADDR__SHIFT) | - (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE) << - GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); - WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); - WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); - - /* mmVGT_TF_MEMORY_BASE_HI_UMD -> mmVGT_TF_MEMORY_BASE_HI */ - data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_HI_UMD) << - GRBM_CAM_DATA__CAM_ADDR__SHIFT) | - (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_HI) << - GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); - WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); - WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); - - /* mmVGT_HS_OFFCHIP_PARAM_UMD -> mmVGT_HS_OFFCHIP_PARAM */ - data = (SOC15_REG_OFFSET(GC, 0, mmVGT_HS_OFFCHIP_PARAM_UMD) << - GRBM_CAM_DATA__CAM_ADDR__SHIFT) | - (SOC15_REG_OFFSET(GC, 0, mmVGT_HS_OFFCHIP_PARAM) << - GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); - WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); - WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); - - /* mmVGT_ESGS_RING_SIZE_UMD -> mmVGT_ESGS_RING_SIZE */ - data = (SOC15_REG_OFFSET(GC, 0, mmVGT_ESGS_RING_SIZE_UMD) << - GRBM_CAM_DATA__CAM_ADDR__SHIFT) | - (SOC15_REG_OFFSET(GC, 0, mmVGT_ESGS_RING_SIZE) << - GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); - WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); - WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); - - /* mmVGT_GSVS_RING_SIZE_UMD -> mmVGT_GSVS_RING_SIZE */ - data = (SOC15_REG_OFFSET(GC, 0, mmVGT_GSVS_RING_SIZE_UMD) << - GRBM_CAM_DATA__CAM_ADDR__SHIFT) | - (SOC15_REG_OFFSET(GC, 0, mmVGT_GSVS_RING_SIZE) << - GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); - WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); - WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); + switch (adev->asic_type) { + case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: + /* mmVGT_TF_RING_SIZE_UMD -> mmVGT_TF_RING_SIZE */ + data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE_UMD) << + GRBM_CAM_DATA__CAM_ADDR__SHIFT) | + (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE_Sienna_Cichlid) << + GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); + + /* mmVGT_TF_MEMORY_BASE_UMD -> mmVGT_TF_MEMORY_BASE */ + data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_UMD) << + GRBM_CAM_DATA__CAM_ADDR__SHIFT) | + (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_Sienna_Cichlid) << + GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); + + /* mmVGT_TF_MEMORY_BASE_HI_UMD -> mmVGT_TF_MEMORY_BASE_HI */ + data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_HI_UMD) << + GRBM_CAM_DATA__CAM_ADDR__SHIFT) | + (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_HI_Sienna_Cichlid) << + GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); + + /* mmVGT_HS_OFFCHIP_PARAM_UMD -> mmVGT_HS_OFFCHIP_PARAM */ + data = (SOC15_REG_OFFSET(GC, 0, mmVGT_HS_OFFCHIP_PARAM_UMD) << + GRBM_CAM_DATA__CAM_ADDR__SHIFT) | + (SOC15_REG_OFFSET(GC, 0, mmVGT_HS_OFFCHIP_PARAM_Sienna_Cichlid) << + GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); + + /* mmVGT_ESGS_RING_SIZE_UMD -> mmVGT_ESGS_RING_SIZE */ + data = (SOC15_REG_OFFSET(GC, 0, mmVGT_ESGS_RING_SIZE_UMD) << + GRBM_CAM_DATA__CAM_ADDR__SHIFT) | + (SOC15_REG_OFFSET(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid) << + GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); + + /* mmVGT_GSVS_RING_SIZE_UMD -> mmVGT_GSVS_RING_SIZE */ + data = (SOC15_REG_OFFSET(GC, 0, mmVGT_GSVS_RING_SIZE_UMD) << + GRBM_CAM_DATA__CAM_ADDR__SHIFT) | + (SOC15_REG_OFFSET(GC, 0, mmVGT_GSVS_RING_SIZE_Sienna_Cichlid) << + GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); + + /* mmSPI_CONFIG_CNTL_REMAP -> mmSPI_CONFIG_CNTL */ + data = (SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_REMAP) << + GRBM_CAM_DATA__CAM_ADDR__SHIFT) | + (SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_Sienna_Cichlid) << + GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); + break; + default: + /* mmVGT_TF_RING_SIZE_UMD -> mmVGT_TF_RING_SIZE */ + data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE_UMD) << + GRBM_CAM_DATA__CAM_ADDR__SHIFT) | + (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE) << + GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); + + /* mmVGT_TF_MEMORY_BASE_UMD -> mmVGT_TF_MEMORY_BASE */ + data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_UMD) << + GRBM_CAM_DATA__CAM_ADDR__SHIFT) | + (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE) << + GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); + + /* mmVGT_TF_MEMORY_BASE_HI_UMD -> mmVGT_TF_MEMORY_BASE_HI */ + data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_HI_UMD) << + GRBM_CAM_DATA__CAM_ADDR__SHIFT) | + (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_HI) << + GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); + + /* mmVGT_HS_OFFCHIP_PARAM_UMD -> mmVGT_HS_OFFCHIP_PARAM */ + data = (SOC15_REG_OFFSET(GC, 0, mmVGT_HS_OFFCHIP_PARAM_UMD) << + GRBM_CAM_DATA__CAM_ADDR__SHIFT) | + (SOC15_REG_OFFSET(GC, 0, mmVGT_HS_OFFCHIP_PARAM) << + GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); + + /* mmVGT_ESGS_RING_SIZE_UMD -> mmVGT_ESGS_RING_SIZE */ + data = (SOC15_REG_OFFSET(GC, 0, mmVGT_ESGS_RING_SIZE_UMD) << + GRBM_CAM_DATA__CAM_ADDR__SHIFT) | + (SOC15_REG_OFFSET(GC, 0, mmVGT_ESGS_RING_SIZE) << + GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); + + /* mmVGT_GSVS_RING_SIZE_UMD -> mmVGT_GSVS_RING_SIZE */ + data = (SOC15_REG_OFFSET(GC, 0, mmVGT_GSVS_RING_SIZE_UMD) << + GRBM_CAM_DATA__CAM_ADDR__SHIFT) | + (SOC15_REG_OFFSET(GC, 0, mmVGT_GSVS_RING_SIZE) << + GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); + + /* mmSPI_CONFIG_CNTL_REMAP -> mmSPI_CONFIG_CNTL */ + data = (SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_REMAP) << + GRBM_CAM_DATA__CAM_ADDR__SHIFT) | + (SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL) << + GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); + break; + } - /* mmSPI_CONFIG_CNTL_REMAP -> mmSPI_CONFIG_CNTL */ - data = (SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_REMAP) << - GRBM_CAM_DATA__CAM_ADDR__SHIFT) | - (SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL) << - GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); } @@ -6602,6 +6954,7 @@ static int gfx_v10_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; + uint32_t tmp; amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); @@ -6616,6 +6969,11 @@ static int gfx_v10_0_hw_fini(void *handle) DRM_ERROR("KCQ disable failed\n"); if (amdgpu_sriov_vf(adev)) { gfx_v10_0_cp_gfx_enable(adev, false); + /* Program KIQ position of RLC_CP_SCHEDULERS during destroy */ + tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); + tmp &= 0xffffff00; + WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); + return 0; } gfx_v10_0_cp_enable(adev, false); @@ -6693,10 +7051,23 @@ static int gfx_v10_0_soft_reset(void *handle) /* GRBM_STATUS2 */ tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); - if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) - grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, - GRBM_SOFT_RESET, SOFT_RESET_RLC, - 1); + switch (adev->asic_type) { + case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: + if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY_Sienna_Cichlid)) + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, + GRBM_SOFT_RESET, + SOFT_RESET_RLC, + 1); + break; + default: + if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, + GRBM_SOFT_RESET, + SOFT_RESET_RLC, + 1); + break; + } if (grbm_soft_reset) { /* stop the rlc */ @@ -6774,7 +7145,19 @@ static int gfx_v10_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS_NV1X; + switch (adev->asic_type) { + case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: + adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS_NV1X; + break; + case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: + adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS_Sienna_Cichlid; + break; + default: + break; + } adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; @@ -6819,13 +7202,31 @@ static void gfx_v10_0_set_safe_mode(struct amdgpu_device *adev) data = RLC_SAFE_MODE__CMD_MASK; data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); - WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); - /* wait for RLC_SAFE_MODE */ - for (i = 0; i < adev->usec_timeout; i++) { - if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) - break; - udelay(1); + switch (adev->asic_type) { + case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: + WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid, data); + + /* wait for RLC_SAFE_MODE */ + for (i = 0; i < adev->usec_timeout; i++) { + if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid), + RLC_SAFE_MODE, CMD)) + break; + udelay(1); + } + break; + default: + WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); + + /* wait for RLC_SAFE_MODE */ + for (i = 0; i < adev->usec_timeout; i++) { + if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), + RLC_SAFE_MODE, CMD)) + break; + udelay(1); + } + break; } } @@ -6834,7 +7235,15 @@ static void gfx_v10_0_unset_safe_mode(struct amdgpu_device *adev) uint32_t data; data = RLC_SAFE_MODE__CMD_MASK; - WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); + switch (adev->asic_type) { + case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: + WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid, data); + break; + default: + WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); + break; + } } static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, @@ -7118,6 +7527,9 @@ static int gfx_v10_0_set_powergating_state(void *handle, switch (adev->asic_type) { case CHIP_NAVI10: case CHIP_NAVI14: + case CHIP_NAVI12: + case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: amdgpu_gfx_off_ctrl(adev, enable); break; default: @@ -7138,6 +7550,8 @@ static int gfx_v10_0_set_clockgating_state(void *handle, case CHIP_NAVI10: case CHIP_NAVI14: case CHIP_NAVI12: + case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: gfx_v10_0_update_gfx_clock_gating(adev, state == AMD_CG_STATE_GATE); break; @@ -7153,12 +7567,12 @@ static void gfx_v10_0_get_clockgating_state(void *handle, u32 *flags) int data; /* AMD_CG_SUPPORT_GFX_MGCG */ - data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); + data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE)); if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) *flags |= AMD_CG_SUPPORT_GFX_MGCG; /* AMD_CG_SUPPORT_GFX_CGCG */ - data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); + data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL)); if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) *flags |= AMD_CG_SUPPORT_GFX_CGCG; @@ -7167,17 +7581,17 @@ static void gfx_v10_0_get_clockgating_state(void *handle, u32 *flags) *flags |= AMD_CG_SUPPORT_GFX_CGLS; /* AMD_CG_SUPPORT_GFX_RLC_LS */ - data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); + data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL)); if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; /* AMD_CG_SUPPORT_GFX_CP_LS */ - data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); + data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL)); if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; /* AMD_CG_SUPPORT_GFX_3D_CGCG */ - data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); + data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D)); if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; @@ -7355,14 +7769,9 @@ static void gfx_v10_0_ring_emit_ib_compute(struct amdgpu_ring *ring, static void gfx_v10_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, unsigned flags) { - struct amdgpu_device *adev = ring->adev; bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; - /* Interrupt not work fine on GFX10.1 model yet. Use fallback instead */ - if (adev->pdev->device == 0x50) - int_sel = false; - /* RELEASE_MEM - flush caches, send int */ amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ | @@ -7513,12 +7922,17 @@ static int gfx_v10_0_ring_preempt_ib(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_ring *kiq_ring = &kiq->ring; + unsigned long flags; if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; - if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) + spin_lock_irqsave(&kiq->ring_lock, flags); + + if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); return -ENOMEM; + } /* assert preemption condition */ amdgpu_ring_set_preempt_cond_exec(ring, false); @@ -7529,6 +7943,8 @@ static int gfx_v10_0_ring_preempt_ib(struct amdgpu_ring *ring) ++ring->trail_seq); amdgpu_ring_commit(kiq_ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + /* poll the trailing fence */ for (i = 0; i < adev->usec_timeout; i++) { if (ring->trail_seq == @@ -8236,6 +8652,8 @@ static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_NAVI10: case CHIP_NAVI14: + case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: adev->gfx.rlc.funcs = &gfx_v10_0_rlc_funcs; break; case CHIP_NAVI12: diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 0cc011f9190d..04eaf3a8fddb 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -1850,8 +1850,6 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev) * */ #define DEFAULT_SH_MEM_BASES (0x6000) -#define FIRST_COMPUTE_VMID (8) -#define LAST_COMPUTE_VMID (16) static void gfx_v7_0_init_compute_vmid(struct amdgpu_device *adev) { int i; @@ -1869,7 +1867,7 @@ static void gfx_v7_0_init_compute_vmid(struct amdgpu_device *adev) SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; sh_mem_config |= MTYPE_NONCACHED << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT; mutex_lock(&adev->srbm_mutex); - for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { + for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { cik_srbm_select(adev, 0, 0, 0, i); /* CP and shaders */ WREG32(mmSH_MEM_CONFIG, sh_mem_config); @@ -1882,7 +1880,7 @@ static void gfx_v7_0_init_compute_vmid(struct amdgpu_device *adev) /* Initialize all compute VMIDs to have no GDS, GWS, or OA acccess. These should be enabled by FW for target VMIDs. */ - for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { + for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { WREG32(amdgpu_gds_reg_offset[i].mem_base, 0); WREG32(amdgpu_gds_reg_offset[i].mem_size, 0); WREG32(amdgpu_gds_reg_offset[i].gws, 0); @@ -3039,7 +3037,7 @@ static void gfx_v7_0_mqd_init(struct amdgpu_device *adev, mqd->cp_hqd_active = 1; } -int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd) +static int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd) { uint32_t tmp; uint32_t mqd_reg; @@ -5209,7 +5207,7 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev) cu_info->lds_size = 64; } -const struct amdgpu_ip_block_version gfx_v7_0_ip_block = +static const struct amdgpu_ip_block_version gfx_v7_0_ip_block = { .type = AMD_IP_BLOCK_TYPE_GFX, .major = 7, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 1d4128227ffd..33f1c4a46ebe 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -3686,8 +3686,6 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) * */ #define DEFAULT_SH_MEM_BASES (0x6000) -#define FIRST_COMPUTE_VMID (8) -#define LAST_COMPUTE_VMID (16) static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) { int i; @@ -3710,7 +3708,7 @@ static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) SH_MEM_CONFIG__PRIVATE_ATC_MASK; mutex_lock(&adev->srbm_mutex); - for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { + for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { vi_srbm_select(adev, 0, 0, 0, i); /* CP and shaders */ WREG32(mmSH_MEM_CONFIG, sh_mem_config); @@ -3723,7 +3721,7 @@ static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) /* Initialize all compute VMIDs to have no GDS, GWS, or OA acccess. These should be enabled by FW for target VMIDs. */ - for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { + for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { WREG32(amdgpu_gds_reg_offset[i].mem_base, 0); WREG32(amdgpu_gds_reg_offset[i].mem_size, 0); WREG32(amdgpu_gds_reg_offset[i].gws, 0); @@ -4589,7 +4587,7 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) return 0; } -int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, +static int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, struct vi_mqd *mqd) { uint32_t mqd_reg; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 711e9dd19705..cb9d60a4e05e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -722,7 +722,7 @@ static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] = mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0, }; -void gfx_v9_0_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 v) +static void gfx_v9_0_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 v) { static void *scratch_reg0; static void *scratch_reg1; @@ -1890,7 +1890,7 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) return r; } - if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) { + if (adev->flags & AMD_IS_APU) { /* TODO: double check the cp_table_size for RV */ adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ r = amdgpu_gfx_rlc_init_cpt(adev); @@ -1960,7 +1960,7 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev) fw_data = (const __le32 *) (adev->gfx.mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); - fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4; + fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes); r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, @@ -2384,7 +2384,7 @@ static int gfx_v9_0_sw_fini(void *handle) gfx_v9_0_mec_fini(adev); amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); - if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) { + if (adev->flags & AMD_IS_APU) { amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, &adev->gfx.rlc.cp_table_gpu_addr, (void **)&adev->gfx.rlc.cp_table_ptr); @@ -2463,8 +2463,6 @@ static void gfx_v9_0_setup_rb(struct amdgpu_device *adev) } #define DEFAULT_SH_MEM_BASES (0x6000) -#define FIRST_COMPUTE_VMID (8) -#define LAST_COMPUTE_VMID (16) static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) { int i; @@ -2484,7 +2482,7 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; mutex_lock(&adev->srbm_mutex); - for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { + for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { soc15_grbm_select(adev, 0, 0, 0, i); /* CP and shaders */ WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); @@ -2495,7 +2493,7 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) /* Initialize all compute VMIDs to have no GDS, GWS, or OA acccess. These should be enabled by FW for target VMIDs. */ - for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { + for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0); WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0); WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0); @@ -2856,8 +2854,8 @@ static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev) /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */ data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT); WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data); - - pwr_10_0_gfxip_control_over_cgpg(adev, true); + if (adev->asic_type != CHIP_RENOIR) + pwr_10_0_gfxip_control_over_cgpg(adev, true); } } diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 6682b843bafe..529e46386a50 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -38,15 +38,15 @@ u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev) void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - /* two registers distance between mmVM_CONTEXT0_* to mmVM_CONTEXT1_* */ - int offset = mmVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - - mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, - offset * vmid, lower_32_bits(page_table_base)); + hub->ctx_addr_distance * vmid, + lower_32_bits(page_table_base)); WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, - offset * vmid, upper_32_bits(page_table_base)); + hub->ctx_addr_distance * vmid, + upper_32_bits(page_table_base)); } static void gfxhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev) @@ -207,6 +207,7 @@ static void gfxhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev) static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; unsigned num_level, block_size; uint32_t tmp; int i; @@ -245,25 +246,31 @@ static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, !amdgpu_noretry); - WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i, tmp); - WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0); - WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0); - WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, i*2, - lower_32_bits(adev->vm_manager.max_pfn - 1)); - WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, i*2, - upper_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, + i * hub->ctx_distance, tmp); + WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, + i * hub->ctx_addr_distance, + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, + i * hub->ctx_addr_distance, + upper_32_bits(adev->vm_manager.max_pfn - 1)); } } static void gfxhub_v1_0_program_invalidation(struct amdgpu_device *adev) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; unsigned i; for (i = 0 ; i < 18; ++i) { WREG32_SOC15_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, - 2 * i, 0xffffffff); + i * hub->eng_addr_distance, 0xffffffff); WREG32_SOC15_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_HI32, - 2 * i, 0x1f); + i * hub->eng_addr_distance, 0x1f); } } @@ -299,12 +306,14 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev) void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; u32 tmp; u32 i; /* Disable all tables */ for (i = 0; i < 16; i++) - WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL, i, 0); + WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL, + i * hub->ctx_distance, 0); /* Setup TLB control */ tmp = RREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL); @@ -360,7 +369,7 @@ void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, CRASH_ON_NO_RETRY_FAULT, 1); tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, CRASH_ON_RETRY_FAULT, 1); - } + } WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL, tmp); } @@ -386,4 +395,11 @@ void gfxhub_v1_0_init(struct amdgpu_device *adev) SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_STATUS); hub->vm_l2_pro_fault_cntl = SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL); + + hub->ctx_distance = mmVM_CONTEXT1_CNTL - mmVM_CONTEXT0_CNTL; + hub->ctx_addr_distance = mmVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - + mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + hub->eng_distance = mmVM_INVALIDATE_ENG1_REQ - mmVM_INVALIDATE_ENG0_REQ; + hub->eng_addr_distance = mmVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 - + mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c index cc866c367939..394e6f56948a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c @@ -49,15 +49,15 @@ u64 gfxhub_v2_0_get_mc_fb_offset(struct amdgpu_device *adev) void gfxhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - /* two registers distance between mmGCVM_CONTEXT0_* to mmGCVM_CONTEXT1_* */ - int offset = mmGCVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - - mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, - offset * vmid, lower_32_bits(page_table_base)); + hub->ctx_addr_distance * vmid, + lower_32_bits(page_table_base)); WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, - offset * vmid, upper_32_bits(page_table_base)); + hub->ctx_addr_distance * vmid, + upper_32_bits(page_table_base)); } static void gfxhub_v2_0_init_gart_aperture_regs(struct amdgpu_device *adev) @@ -181,6 +181,10 @@ static void gfxhub_v2_0_init_cache_regs(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0); tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0); WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL4, tmp); + + tmp = mmGCVM_L2_CNTL5_DEFAULT; + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0); + WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL5, tmp); } static void gfxhub_v2_0_enable_system_domain(struct amdgpu_device *adev) @@ -214,6 +218,7 @@ static void gfxhub_v2_0_disable_identity_aperture(struct amdgpu_device *adev) static void gfxhub_v2_0_setup_vmid_config(struct amdgpu_device *adev) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; int i; uint32_t tmp; @@ -243,25 +248,31 @@ static void gfxhub_v2_0_setup_vmid_config(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, !amdgpu_noretry); - WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i, tmp); - WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0); - WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0); - WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, i*2, - lower_32_bits(adev->vm_manager.max_pfn - 1)); - WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, i*2, - upper_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, + i * hub->ctx_distance, tmp); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, + i * hub->ctx_addr_distance, + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, + i * hub->ctx_addr_distance, + upper_32_bits(adev->vm_manager.max_pfn - 1)); } } static void gfxhub_v2_0_program_invalidation(struct amdgpu_device *adev) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; unsigned i; for (i = 0 ; i < 18; ++i) { WREG32_SOC15_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, - 2 * i, 0xffffffff); + i * hub->eng_addr_distance, 0xffffffff); WREG32_SOC15_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_HI32, - 2 * i, 0x1f); + i * hub->eng_addr_distance, 0x1f); } } @@ -283,12 +294,14 @@ int gfxhub_v2_0_gart_enable(struct amdgpu_device *adev) void gfxhub_v2_0_gart_disable(struct amdgpu_device *adev) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; u32 tmp; u32 i; /* Disable all tables */ for (i = 0; i < 16; i++) - WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_CNTL, i, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_CNTL, + i * hub->ctx_distance, 0); /* Setup TLB control */ tmp = RREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL); @@ -369,4 +382,12 @@ void gfxhub_v2_0_init(struct amdgpu_device *adev) SOC15_REG_OFFSET(GC, 0, mmGCVM_L2_PROTECTION_FAULT_STATUS); hub->vm_l2_pro_fault_cntl = SOC15_REG_OFFSET(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL); + + hub->ctx_distance = mmGCVM_CONTEXT1_CNTL - mmGCVM_CONTEXT0_CNTL; + hub->ctx_addr_distance = mmGCVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - + mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + hub->eng_distance = mmGCVM_INVALIDATE_ENG1_REQ - + mmGCVM_INVALIDATE_ENG0_REQ; + hub->eng_addr_distance = mmGCVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 - + mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c new file mode 100644 index 000000000000..fa0bca3e1f73 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c @@ -0,0 +1,428 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "gfxhub_v2_1.h" + +#include "gc/gc_10_3_0_offset.h" +#include "gc/gc_10_3_0_sh_mask.h" +#include "gc/gc_10_3_0_default.h" +#include "navi10_enum.h" + +#include "soc15_common.h" + +u64 gfxhub_v2_1_get_fb_location(struct amdgpu_device *adev) +{ + u64 base = RREG32_SOC15(GC, 0, mmGCMC_VM_FB_LOCATION_BASE); + + base &= GCMC_VM_FB_LOCATION_BASE__FB_BASE_MASK; + base <<= 24; + + return base; +} + +u64 gfxhub_v2_1_get_mc_fb_offset(struct amdgpu_device *adev) +{ + return (u64)RREG32_SOC15(GC, 0, mmGCMC_VM_FB_OFFSET) << 24; +} + +void gfxhub_v2_1_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, + uint64_t page_table_base) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, + hub->ctx_addr_distance * vmid, + lower_32_bits(page_table_base)); + + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, + hub->ctx_addr_distance * vmid, + upper_32_bits(page_table_base)); +} + +static void gfxhub_v2_1_init_gart_aperture_regs(struct amdgpu_device *adev) +{ + uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); + + gfxhub_v2_1_setup_vm_pt_regs(adev, 0, pt_base); + + WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, + (u32)(adev->gmc.gart_start >> 12)); + WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, + (u32)(adev->gmc.gart_start >> 44)); + + WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, + (u32)(adev->gmc.gart_end >> 12)); + WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, + (u32)(adev->gmc.gart_end >> 44)); +} + +static void gfxhub_v2_1_init_system_aperture_regs(struct amdgpu_device *adev) +{ + uint64_t value; + + /* Disable AGP. */ + WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BASE, 0); + WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_TOP, 0); + WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BOT, 0x00FFFFFF); + + /* Program the system aperture low logical page number. */ + WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_LOW_ADDR, + adev->gmc.vram_start >> 18); + WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR, + adev->gmc.vram_end >> 18); + + /* Set default page address. */ + value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start + + adev->vm_manager.vram_base_offset; + WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, + (u32)(value >> 12)); + WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, + (u32)(value >> 44)); + + /* Program "protection fault". */ + WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, + (u32)(adev->dummy_page_addr >> 12)); + WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, + (u32)((u64)adev->dummy_page_addr >> 44)); + + WREG32_FIELD15(GC, 0, GCVM_L2_PROTECTION_FAULT_CNTL2, + ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1); +} + + +static void gfxhub_v2_1_init_tlb_regs(struct amdgpu_device *adev) +{ + uint32_t tmp; + + /* Setup TLB control */ + tmp = RREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL); + + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1); + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3); + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 1); + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, + SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, + MTYPE, MTYPE_UC); /* UC, uncached */ + + WREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL, tmp); +} + +static void gfxhub_v2_1_init_cache_regs(struct amdgpu_device *adev) +{ + uint32_t tmp; + + /* Setup L2 cache */ + tmp = RREG32_SOC15(GC, 0, mmGCVM_L2_CNTL); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_CACHE, 1); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, + ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1); + /* XXX for emulation, Refer to closed source code.*/ + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, + L2_PDE0_CACHE_TAG_GENERATION_MODE, 0); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0); + WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL, tmp); + + tmp = RREG32_SOC15(GC, 0, mmGCVM_L2_CNTL2); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); + WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL2, tmp); + + tmp = mmGCVM_L2_CNTL3_DEFAULT; + if (adev->gmc.translate_further) { + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 12); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 9); + } else { + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 9); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 6); + } + WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL3, tmp); + + tmp = mmGCVM_L2_CNTL4_DEFAULT; + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0); + WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL4, tmp); + + tmp = mmGCVM_L2_CNTL5_DEFAULT; + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0); + WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL5, tmp); +} + +static void gfxhub_v2_1_enable_system_domain(struct amdgpu_device *adev) +{ + uint32_t tmp; + + tmp = RREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_CNTL); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); + WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_CNTL, tmp); +} + +static void gfxhub_v2_1_disable_identity_aperture(struct amdgpu_device *adev) +{ + WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, + 0xFFFFFFFF); + WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32, + 0x0000000F); + + WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, + 0); + WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, + 0); + + WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0); + WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0); + +} + +static void gfxhub_v2_1_setup_vmid_config(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + int i; + uint32_t tmp; + + for (i = 0; i <= 14; i++) { + tmp = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, + adev->vm_manager.num_level); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + PAGE_TABLE_BLOCK_SIZE, + adev->vm_manager.block_size - 9); + /* Send no-retry XNACK on fault to suppress VM fault storm. */ + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, + !amdgpu_noretry); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, + i * hub->ctx_distance, tmp); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, + i * hub->ctx_addr_distance, + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, + i * hub->ctx_addr_distance, + upper_32_bits(adev->vm_manager.max_pfn - 1)); + } +} + +static void gfxhub_v2_1_program_invalidation(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + unsigned i; + + for (i = 0 ; i < 18; ++i) { + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, + i * hub->eng_addr_distance, 0xffffffff); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_HI32, + i * hub->eng_addr_distance, 0x1f); + } +} + +int gfxhub_v2_1_gart_enable(struct amdgpu_device *adev) +{ + if (amdgpu_sriov_vf(adev)) { + /* + * GCMC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are + * VF copy registers so vbios post doesn't program them, for + * SRIOV driver need to program them + */ + WREG32_SOC15(GC, 0, mmGCMC_VM_FB_LOCATION_BASE, + adev->gmc.vram_start >> 24); + WREG32_SOC15(GC, 0, mmGCMC_VM_FB_LOCATION_TOP, + adev->gmc.vram_end >> 24); + } + + /* GART Enable. */ + gfxhub_v2_1_init_gart_aperture_regs(adev); + gfxhub_v2_1_init_system_aperture_regs(adev); + gfxhub_v2_1_init_tlb_regs(adev); + gfxhub_v2_1_init_cache_regs(adev); + + gfxhub_v2_1_enable_system_domain(adev); + gfxhub_v2_1_disable_identity_aperture(adev); + gfxhub_v2_1_setup_vmid_config(adev); + gfxhub_v2_1_program_invalidation(adev); + + return 0; +} + +void gfxhub_v2_1_gart_disable(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + u32 tmp; + u32 i; + + /* Disable all tables */ + for (i = 0; i < 16; i++) + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_CNTL, + i * hub->ctx_distance, 0); + + /* Setup TLB control */ + tmp = RREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL); + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0); + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 0); + WREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL, tmp); + + /* Setup L2 cache */ + WREG32_FIELD15(GC, 0, GCVM_L2_CNTL, ENABLE_L2_CACHE, 0); + WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL3, 0); +} + +/** + * gfxhub_v2_1_set_fault_enable_default - update GART/VM fault handling + * + * @adev: amdgpu_device pointer + * @value: true redirects VM faults to the default page + */ +void gfxhub_v2_1_set_fault_enable_default(struct amdgpu_device *adev, + bool value) +{ + u32 tmp; + tmp = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + if (!value) { + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + CRASH_ON_NO_RETRY_FAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + CRASH_ON_RETRY_FAULT, 1); + } + WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL, tmp); +} + +void gfxhub_v2_1_init(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + + hub->ctx0_ptb_addr_lo32 = + SOC15_REG_OFFSET(GC, 0, + mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32); + hub->ctx0_ptb_addr_hi32 = + SOC15_REG_OFFSET(GC, 0, + mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_sem = + SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_SEM); + hub->vm_inv_eng0_req = + SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_REQ); + hub->vm_inv_eng0_ack = + SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ACK); + hub->vm_context0_cntl = + SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_CNTL); + hub->vm_l2_pro_fault_status = + SOC15_REG_OFFSET(GC, 0, mmGCVM_L2_PROTECTION_FAULT_STATUS); + hub->vm_l2_pro_fault_cntl = + SOC15_REG_OFFSET(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL); + + hub->ctx_distance = mmGCVM_CONTEXT1_CNTL - mmGCVM_CONTEXT0_CNTL; + hub->ctx_addr_distance = mmGCVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - + mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + hub->eng_distance = mmGCVM_INVALIDATE_ENG1_REQ - + mmGCVM_INVALIDATE_ENG0_REQ; + hub->eng_addr_distance = mmGCVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 - + mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32; +} + +int gfxhub_v2_1_get_xgmi_info(struct amdgpu_device *adev) +{ + u32 xgmi_lfb_cntl = RREG32_SOC15(GC, 0, mmGCMC_VM_XGMI_LFB_CNTL); + u32 max_region = + REG_GET_FIELD(xgmi_lfb_cntl, GCMC_VM_XGMI_LFB_CNTL, PF_MAX_REGION); + u32 max_num_physical_nodes = 0; + u32 max_physical_node_id = 0; + + switch (adev->asic_type) { + case CHIP_SIENNA_CICHLID: + max_num_physical_nodes = 4; + max_physical_node_id = 3; + break; + default: + return -EINVAL; + } + + /* PF_MAX_REGION=0 means xgmi is disabled */ + if (max_region) { + adev->gmc.xgmi.num_physical_nodes = max_region + 1; + if (adev->gmc.xgmi.num_physical_nodes > max_num_physical_nodes) + return -EINVAL; + + adev->gmc.xgmi.physical_node_id = + REG_GET_FIELD(xgmi_lfb_cntl, GCMC_VM_XGMI_LFB_CNTL, PF_LFB_REGION); + if (adev->gmc.xgmi.physical_node_id > max_physical_node_id) + return -EINVAL; + + adev->gmc.xgmi.node_segment_size = REG_GET_FIELD( + RREG32_SOC15(GC, 0, mmGCMC_VM_XGMI_LFB_SIZE), + GCMC_VM_XGMI_LFB_SIZE, PF_LFB_SIZE) << 24; + } + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.h new file mode 100644 index 000000000000..3452a4e9a3da --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.h @@ -0,0 +1,39 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __GFXHUB_V2_1_H__ +#define __GFXHUB_V2_1_H__ + +u64 gfxhub_v2_1_get_fb_location(struct amdgpu_device *adev); +int gfxhub_v2_1_gart_enable(struct amdgpu_device *adev); +void gfxhub_v2_1_gart_disable(struct amdgpu_device *adev); +void gfxhub_v2_1_set_fault_enable_default(struct amdgpu_device *adev, + bool value); +void gfxhub_v2_1_init(struct amdgpu_device *adev); +u64 gfxhub_v2_1_get_mc_fb_offset(struct amdgpu_device *adev); +void gfxhub_v2_1_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, + uint64_t page_table_base); + +int gfxhub_v2_1_get_xgmi_info(struct amdgpu_device *adev); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index ba2b7ac0c02d..ec90c62078d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -45,10 +45,10 @@ #include "nbio_v2_3.h" #include "gfxhub_v2_0.h" +#include "gfxhub_v2_1.h" #include "mmhub_v2_0.h" #include "athub_v2_0.h" -/* XXX Move this macro to navi10 header file, which is like vid.h for VI.*/ -#define AMDGPU_NUM_OF_VMIDS 8 +#include "athub_v2_1.h" #if 0 static const struct soc15_reg_golden golden_settings_navi10_hdp[] = @@ -86,7 +86,7 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev, /* MM HUB */ hub = &adev->vmhub[AMDGPU_MMHUB_0]; for (i = 0; i < 16; i++) { - reg = hub->vm_context0_cntl + i; + reg = hub->vm_context0_cntl + hub->ctx_distance * i; tmp = RREG32(reg); tmp &= ~bits[AMDGPU_MMHUB_0]; WREG32(reg, tmp); @@ -95,7 +95,7 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev, /* GFX HUB */ hub = &adev->vmhub[AMDGPU_GFXHUB_0]; for (i = 0; i < 16; i++) { - reg = hub->vm_context0_cntl + i; + reg = hub->vm_context0_cntl + hub->ctx_distance * i; tmp = RREG32(reg); tmp &= ~bits[AMDGPU_GFXHUB_0]; WREG32(reg, tmp); @@ -105,7 +105,7 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev, /* MM HUB */ hub = &adev->vmhub[AMDGPU_MMHUB_0]; for (i = 0; i < 16; i++) { - reg = hub->vm_context0_cntl + i; + reg = hub->vm_context0_cntl + hub->ctx_distance * i; tmp = RREG32(reg); tmp |= bits[AMDGPU_MMHUB_0]; WREG32(reg, tmp); @@ -114,7 +114,7 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev, /* GFX HUB */ hub = &adev->vmhub[AMDGPU_GFXHUB_0]; for (i = 0; i < 16; i++) { - reg = hub->vm_context0_cntl + i; + reg = hub->vm_context0_cntl + hub->ctx_distance * i; tmp = RREG32(reg); tmp |= bits[AMDGPU_GFXHUB_0]; WREG32(reg, tmp); @@ -283,7 +283,8 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, if (use_semaphore) { for (i = 0; i < adev->usec_timeout; i++) { /* a read return value of 1 means semaphore acuqire */ - tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng); + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + + hub->eng_distance * eng); if (tmp & 0x1) break; udelay(1); @@ -293,18 +294,19 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n"); } - WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req); + WREG32_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req); /* * Issue a dummy read to wait for the ACK register to be cleared * to avoid a false ACK due to the new fast GRBM interface. */ if (vmhub == AMDGPU_GFXHUB_0) - RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng); + RREG32_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng); /* Wait for ACK with a delay.*/ for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng); + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + + hub->eng_distance * eng); tmp &= 1 << vmid; if (tmp) break; @@ -318,7 +320,8 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, * add semaphore release after invalidation, * write with 0 means semaphore release */ - WREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng, 0); + WREG32_NO_KIQ(hub->vm_inv_eng0_sem + + hub->eng_distance * eng, 0); spin_unlock(&adev->gmc.invalidate_lock); @@ -348,6 +351,24 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, /* flush hdp cache */ adev->nbio.funcs->hdp_flush(adev, NULL); + /* For SRIOV run time, driver shouldn't access the register through MMIO + * Directly use kiq to do the vm invalidation instead + */ + if (adev->gfx.kiq.ring.sched.ready && + (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) && + !adev->in_gpu_reset) { + + struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; + const unsigned eng = 17; + u32 inv_req = gmc_v10_0_get_invalidate_req(vmid, flush_type); + u32 req = hub->vm_inv_eng0_req + hub->eng_distance * eng; + u32 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; + + amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, + 1 << vmid); + return; + } + mutex_lock(&adev->mman.gtt_window_lock); if (vmhub == AMDGPU_MMHUB_0) { @@ -484,16 +505,21 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, if (use_semaphore) /* a read return value of 1 means semaphore acuqire */ amdgpu_ring_emit_reg_wait(ring, - hub->vm_inv_eng0_sem + eng, 0x1, 0x1); + hub->vm_inv_eng0_sem + + hub->eng_distance * eng, 0x1, 0x1); - amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid), + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + + (hub->ctx_addr_distance * vmid), lower_32_bits(pd_addr)); - amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid), + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + + (hub->ctx_addr_distance * vmid), upper_32_bits(pd_addr)); - amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng, - hub->vm_inv_eng0_ack + eng, + amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + + hub->eng_distance * eng, + hub->vm_inv_eng0_ack + + hub->eng_distance * eng, req, 1 << vmid); /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ @@ -502,7 +528,8 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, * add semaphore release after invalidation, * write with 0 means semaphore release */ - amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + eng, 0); + amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + + hub->eng_distance * eng, 0); return pd_addr; } @@ -666,13 +693,28 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev, { u64 base = 0; - base = gfxhub_v2_0_get_fb_location(adev); + if (adev->asic_type == CHIP_SIENNA_CICHLID || + adev->asic_type == CHIP_NAVY_FLOUNDER) + base = gfxhub_v2_1_get_fb_location(adev); + else + base = gfxhub_v2_0_get_fb_location(adev); + + /* add the xgmi offset of the physical node */ + base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; amdgpu_gmc_vram_location(adev, &adev->gmc, base); amdgpu_gmc_gart_location(adev, mc); /* base offset of vram pages */ - adev->vm_manager.vram_base_offset = gfxhub_v2_0_get_mc_fb_offset(adev); + if (adev->asic_type == CHIP_SIENNA_CICHLID || + adev->asic_type == CHIP_NAVY_FLOUNDER) + adev->vm_manager.vram_base_offset = gfxhub_v2_1_get_mc_fb_offset(adev); + else + adev->vm_manager.vram_base_offset = gfxhub_v2_0_get_mc_fb_offset(adev); + + /* add the xgmi offset of the physical node */ + adev->vm_manager.vram_base_offset += + adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; } /** @@ -712,6 +754,8 @@ static int gmc_v10_0_mc_init(struct amdgpu_device *adev) case CHIP_NAVI10: case CHIP_NAVI14: case CHIP_NAVI12: + case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: default: adev->gmc.gart_size = 512ULL << 20; break; @@ -780,24 +824,34 @@ static int gmc_v10_0_sw_init(void *handle) int r, vram_width = 0, vram_type = 0, vram_vendor = 0; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - gfxhub_v2_0_init(adev); + if (adev->asic_type == CHIP_SIENNA_CICHLID || + adev->asic_type == CHIP_NAVY_FLOUNDER) + gfxhub_v2_1_init(adev); + else + gfxhub_v2_0_init(adev); + mmhub_v2_0_init(adev); spin_lock_init(&adev->gmc.invalidate_lock); - r = amdgpu_atomfirmware_get_vram_info(adev, - &vram_width, &vram_type, &vram_vendor); - if (!amdgpu_emu_mode) - adev->gmc.vram_width = vram_width; - else + if (adev->asic_type == CHIP_SIENNA_CICHLID && amdgpu_emu_mode == 1) { + adev->gmc.vram_type = AMDGPU_VRAM_TYPE_GDDR6; adev->gmc.vram_width = 1 * 128; /* numchan * chansize */ + } else { + r = amdgpu_atomfirmware_get_vram_info(adev, + &vram_width, &vram_type, &vram_vendor); + adev->gmc.vram_width = vram_width; + + adev->gmc.vram_type = vram_type; + adev->gmc.vram_vendor = vram_vendor; + } - adev->gmc.vram_type = vram_type; - adev->gmc.vram_vendor = vram_vendor; switch (adev->asic_type) { case CHIP_NAVI10: case CHIP_NAVI14: case CHIP_NAVI12: + case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: adev->num_vmhubs = 2; /* * To fulfill 4-level page support, @@ -836,6 +890,12 @@ static int gmc_v10_0_sw_init(void *handle) return r; } + if (adev->gmc.xgmi.supported) { + r = gfxhub_v2_1_get_xgmi_info(adev); + if (r) + return r; + } + r = gmc_v10_0_mc_init(adev); if (r) return r; @@ -857,8 +917,7 @@ static int gmc_v10_0_sw_init(void *handle) * amdgpu graphics/compute will use VMIDs 1-7 * amdkfd will use VMIDs 8-15 */ - adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS; - adev->vm_manager.id_mgr[AMDGPU_MMHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.first_kfd_vmid = 8; amdgpu_vm_manager_init(adev); @@ -896,6 +955,8 @@ static void gmc_v10_0_init_golden_registers(struct amdgpu_device *adev) case CHIP_NAVI10: case CHIP_NAVI14: case CHIP_NAVI12: + case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: break; default: break; @@ -922,7 +983,11 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev) if (r) return r; - r = gfxhub_v2_0_gart_enable(adev); + if (adev->asic_type == CHIP_SIENNA_CICHLID || + adev->asic_type == CHIP_NAVY_FLOUNDER) + r = gfxhub_v2_1_gart_enable(adev); + else + r = gfxhub_v2_0_gart_enable(adev); if (r) return r; @@ -943,7 +1008,11 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev) value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? false : true; - gfxhub_v2_0_set_fault_enable_default(adev, value); + if (adev->asic_type == CHIP_SIENNA_CICHLID || + adev->asic_type == CHIP_NAVY_FLOUNDER) + gfxhub_v2_1_set_fault_enable_default(adev, value); + else + gfxhub_v2_0_set_fault_enable_default(adev, value); mmhub_v2_0_set_fault_enable_default(adev, value); gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB_0, 0); gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB_0, 0); @@ -981,7 +1050,11 @@ static int gmc_v10_0_hw_init(void *handle) */ static void gmc_v10_0_gart_disable(struct amdgpu_device *adev) { - gfxhub_v2_0_gart_disable(adev); + if (adev->asic_type == CHIP_SIENNA_CICHLID || + adev->asic_type == CHIP_NAVY_FLOUNDER) + gfxhub_v2_1_gart_disable(adev); + else + gfxhub_v2_0_gart_disable(adev); mmhub_v2_0_gart_disable(adev); amdgpu_gart_table_vram_unpin(adev); } @@ -1052,7 +1125,11 @@ static int gmc_v10_0_set_clockgating_state(void *handle, if (r) return r; - return athub_v2_0_set_clockgating(adev, state); + if (adev->asic_type == CHIP_SIENNA_CICHLID || + adev->asic_type == CHIP_NAVY_FLOUNDER) + return athub_v2_1_set_clockgating(adev, state); + else + return athub_v2_0_set_clockgating(adev, state); } static void gmc_v10_0_get_clockgating_state(void *handle, u32 *flags) @@ -1061,7 +1138,11 @@ static void gmc_v10_0_get_clockgating_state(void *handle, u32 *flags) mmhub_v2_0_get_clockgating(adev, flags); - athub_v2_0_get_clockgating(adev, flags); + if (adev->asic_type == CHIP_SIENNA_CICHLID || + adev->asic_type == CHIP_NAVY_FLOUNDER) + athub_v2_1_get_clockgating(adev, flags); + else + athub_v2_0_get_clockgating(adev, flags); } static int gmc_v10_0_set_powergating_state(void *handle, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index a75e472b4a81..538e7ee35cdf 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -878,7 +878,7 @@ static int gmc_v6_0_sw_init(void *handle) * amdgpu graphics/compute will use VMIDs 1-7 * amdkfd will use VMIDs 8-15 */ - adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.first_kfd_vmid = 8; amdgpu_vm_manager_init(adev); /* base offset of vram pages */ diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index bcd4baecfe11..e18296dc1386 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -1052,7 +1052,7 @@ static int gmc_v7_0_sw_init(void *handle) * amdgpu graphics/compute will use VMIDs 1-7 * amdkfd will use VMIDs 8-15 */ - adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.first_kfd_vmid = 8; amdgpu_vm_manager_init(adev); /* base offset of vram pages */ diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 26976e50e2a2..a9e722b8a458 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -1177,7 +1177,7 @@ static int gmc_v8_0_sw_init(void *handle) * amdgpu graphics/compute will use VMIDs 1-7 * amdkfd will use VMIDs 8-15 */ - adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.first_kfd_vmid = 8; amdgpu_vm_manager_init(adev); /* base offset of vram pages */ diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 11e93a82131d..6e4f3ff4810f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -68,9 +68,6 @@ #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH_MASK 0x00003FFFL #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT_MASK 0x3FFF0000L -/* XXX Move this macro to VEGA10 header file, which is like vid.h for VI.*/ -#define AMDGPU_NUM_OF_VMIDS 8 - static const u32 golden_settings_vega10_hdp[] = { 0xf64, 0x0fffffff, 0x00000000, @@ -505,11 +502,11 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, if (adev->gfx.kiq.ring.sched.ready && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) && !adev->in_gpu_reset) { - uint32_t req = hub->vm_inv_eng0_req + eng; - uint32_t ack = hub->vm_inv_eng0_ack + eng; + uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng; + uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, - 1 << vmid); + 1 << vmid); return; } @@ -526,7 +523,8 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, if (use_semaphore) { for (j = 0; j < adev->usec_timeout; j++) { /* a read return value of 1 means semaphore acuqire */ - tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng); + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + + hub->eng_distance * eng); if (tmp & 0x1) break; udelay(1); @@ -537,7 +535,8 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, } do { - WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req); + WREG32_NO_KIQ(hub->vm_inv_eng0_req + + hub->eng_distance * eng, inv_req); /* * Issue a dummy read to wait for the ACK register to @@ -545,10 +544,12 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, * GRBM interface. */ if (vmhub == AMDGPU_GFXHUB_0) - RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng); + RREG32_NO_KIQ(hub->vm_inv_eng0_req + + hub->eng_distance * eng); for (j = 0; j < adev->usec_timeout; j++) { - tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng); + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + + hub->eng_distance * eng); if (tmp & (1 << vmid)) break; udelay(1); @@ -564,7 +565,8 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, * add semaphore release after invalidation, * write with 0 means semaphore release */ - WREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng, 0); + WREG32_NO_KIQ(hub->vm_inv_eng0_sem + + hub->eng_distance * eng, 0); spin_unlock(&adev->gmc.invalidate_lock); @@ -679,16 +681,21 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, if (use_semaphore) /* a read return value of 1 means semaphore acuqire */ amdgpu_ring_emit_reg_wait(ring, - hub->vm_inv_eng0_sem + eng, 0x1, 0x1); + hub->vm_inv_eng0_sem + + hub->eng_distance * eng, 0x1, 0x1); - amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid), + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + + (hub->ctx_addr_distance * vmid), lower_32_bits(pd_addr)); - amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid), + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + + (hub->ctx_addr_distance * vmid), upper_32_bits(pd_addr)); - amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng, - hub->vm_inv_eng0_ack + eng, + amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + + hub->eng_distance * eng, + hub->vm_inv_eng0_ack + + hub->eng_distance * eng, req, 1 << vmid); /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ @@ -697,7 +704,8 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, * add semaphore release after invalidation, * write with 0 means semaphore release */ - amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + eng, 0); + amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + + hub->eng_distance * eng, 0); return pd_addr; } @@ -1248,12 +1256,15 @@ static int gmc_v9_0_sw_init(void *handle) /* * number of VMs * VMID 0 is reserved for System - * amdgpu graphics/compute will use VMIDs 1-7 - * amdkfd will use VMIDs 8-15 + * amdgpu graphics/compute will use VMIDs 1..n-1 + * amdkfd will use VMIDs n..15 + * + * The first KFD VMID is 8 for GPUs with graphics, 3 for + * compute-only GPUs. On compute-only GPUs that leaves 2 VMIDs + * for video processing. */ - adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS; - adev->vm_manager.id_mgr[AMDGPU_MMHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS; - adev->vm_manager.id_mgr[AMDGPU_MMHUB_1].num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.first_kfd_vmid = + adev->asic_type == CHIP_ARCTURUS ? 3 : 8; amdgpu_vm_manager_init(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c index b10c95cad9a2..bc300283b6ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c @@ -26,6 +26,7 @@ #include "soc15.h" #include "soc15d.h" #include "vcn_v1_0.h" +#include "jpeg_v1_0.h" #include "vcn/vcn_1_0_offset.h" #include "vcn/vcn_1_0_sh_mask.h" @@ -376,7 +377,7 @@ static void jpeg_v1_0_decode_ring_emit_vm_flush(struct amdgpu_ring *ring, pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* wait for register write */ - data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2; + data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance; data1 = lower_32_bits(pd_addr); mask = 0xffffffff; jpeg_v1_0_decode_ring_emit_reg_wait(ring, data0, data1, mask); diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index e67d09cb1b03..94caf5204c8b 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -26,6 +26,7 @@ #include "amdgpu_pm.h" #include "soc15.h" #include "soc15d.h" +#include "jpeg_v2_0.h" #include "vcn/vcn_2_0_0_offset.h" #include "vcn/vcn_2_0_0_sh_mask.h" @@ -229,9 +230,9 @@ static int jpeg_v2_0_disable_power_gating(struct amdgpu_device *adev) data = 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT; WREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_PGFSM_CONFIG), data); - SOC15_WAIT_ON_RREG(JPEG, 0, + r = SOC15_WAIT_ON_RREG(JPEG, 0, mmUVD_PGFSM_STATUS, UVD_PGFSM_STATUS_UVDJ_PWR_ON, - UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK, r); + UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK); if (r) { DRM_ERROR("amdgpu: JPEG disable power gating failed\n"); @@ -260,9 +261,9 @@ static int jpeg_v2_0_enable_power_gating(struct amdgpu_device* adev) data = 2 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT; WREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_PGFSM_CONFIG), data); - SOC15_WAIT_ON_RREG(JPEG, 0, mmUVD_PGFSM_STATUS, + r = SOC15_WAIT_ON_RREG(JPEG, 0, mmUVD_PGFSM_STATUS, (2 << UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT), - UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK, r); + UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK); if (r) { DRM_ERROR("amdgpu: JPEG enable power gating failed\n"); @@ -628,7 +629,7 @@ void jpeg_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* wait for register write */ - data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2; + data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance; data1 = lower_32_bits(pd_addr); mask = 0xffffffff; jpeg_v2_0_dec_ring_emit_reg_wait(ring, data0, data1, mask); @@ -676,10 +677,10 @@ static bool jpeg_v2_0_is_idle(void *handle) static int jpeg_v2_0_wait_for_idle(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int ret = 0; + int ret; - SOC15_WAIT_ON_RREG(JPEG, 0, mmUVD_JRBC_STATUS, UVD_JRBC_STATUS__RB_JOB_DONE_MASK, - UVD_JRBC_STATUS__RB_JOB_DONE_MASK, ret); + ret = SOC15_WAIT_ON_RREG(JPEG, 0, mmUVD_JRBC_STATUS, UVD_JRBC_STATUS__RB_JOB_DONE_MASK, + UVD_JRBC_STATUS__RB_JOB_DONE_MASK); return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index 713c32560445..7a51c615d22d 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -449,20 +449,20 @@ static bool jpeg_v2_5_is_idle(void *handle) static int jpeg_v2_5_wait_for_idle(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int i, ret = 0; + int i, ret; for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { if (adev->jpeg.harvest_config & (1 << i)) continue; - SOC15_WAIT_ON_RREG(JPEG, i, mmUVD_JRBC_STATUS, + ret = SOC15_WAIT_ON_RREG(JPEG, i, mmUVD_JRBC_STATUS, UVD_JRBC_STATUS__RB_JOB_DONE_MASK, - UVD_JRBC_STATUS__RB_JOB_DONE_MASK, ret); + UVD_JRBC_STATUS__RB_JOB_DONE_MASK); if (ret) return ret; } - return ret; + return 0; } static int jpeg_v2_5_set_clockgating_state(void *handle, diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c new file mode 100644 index 000000000000..c41e5590a701 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c @@ -0,0 +1,612 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "amdgpu_jpeg.h" +#include "amdgpu_pm.h" +#include "soc15.h" +#include "soc15d.h" +#include "jpeg_v2_0.h" + +#include "vcn/vcn_3_0_0_offset.h" +#include "vcn/vcn_3_0_0_sh_mask.h" +#include "ivsrcid/vcn/irqsrcs_vcn_2_0.h" + +#define mmUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f + +static void jpeg_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev); +static void jpeg_v3_0_set_irq_funcs(struct amdgpu_device *adev); +static int jpeg_v3_0_set_powergating_state(void *handle, + enum amd_powergating_state state); + +/** + * jpeg_v3_0_early_init - set function pointers + * + * @handle: amdgpu_device pointer + * + * Set ring and irq function pointers + */ +static int jpeg_v3_0_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + u32 harvest = RREG32_SOC15(JPEG, 0, mmCC_UVD_HARVESTING); + + if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK) + return -ENOENT; + + adev->jpeg.num_jpeg_inst = 1; + + jpeg_v3_0_set_dec_ring_funcs(adev); + jpeg_v3_0_set_irq_funcs(adev); + + return 0; +} + +/** + * jpeg_v3_0_sw_init - sw init for JPEG block + * + * @handle: amdgpu_device pointer + * + * Load firmware and sw initialization + */ +static int jpeg_v3_0_sw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring; + int r; + + /* JPEG TRAP */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + VCN_2_0__SRCID__JPEG_DECODE, &adev->jpeg.inst->irq); + if (r) + return r; + + r = amdgpu_jpeg_sw_init(adev); + if (r) + return r; + + r = amdgpu_jpeg_resume(adev); + if (r) + return r; + + ring = &adev->jpeg.inst->ring_dec; + ring->use_doorbell = true; + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1; + sprintf(ring->name, "jpeg_dec"); + r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, + AMDGPU_RING_PRIO_DEFAULT); + if (r) + return r; + + adev->jpeg.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET; + adev->jpeg.inst->external.jpeg_pitch = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH); + + return 0; +} + +/** + * jpeg_v3_0_sw_fini - sw fini for JPEG block + * + * @handle: amdgpu_device pointer + * + * JPEG suspend and free up sw allocation + */ +static int jpeg_v3_0_sw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = amdgpu_jpeg_suspend(adev); + if (r) + return r; + + r = amdgpu_jpeg_sw_fini(adev); + + return r; +} + +/** + * jpeg_v3_0_hw_init - start and test JPEG block + * + * @handle: amdgpu_device pointer + * + */ +static int jpeg_v3_0_hw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec; + int r; + + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0); + + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + + DRM_INFO("JPEG decode initialized successfully.\n"); + + return 0; +} + +/** + * jpeg_v3_0_hw_fini - stop the hardware block + * + * @handle: amdgpu_device pointer + * + * Stop the JPEG block, mark ring as not ready any more + */ +static int jpeg_v3_0_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring; + + ring = &adev->jpeg.inst->ring_dec; + if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && + RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS)) + jpeg_v3_0_set_powergating_state(adev, AMD_PG_STATE_GATE); + + ring->sched.ready = false; + + return 0; +} + +/** + * jpeg_v3_0_suspend - suspend JPEG block + * + * @handle: amdgpu_device pointer + * + * HW fini and suspend JPEG block + */ +static int jpeg_v3_0_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = jpeg_v3_0_hw_fini(adev); + if (r) + return r; + + r = amdgpu_jpeg_suspend(adev); + + return r; +} + +/** + * jpeg_v3_0_resume - resume JPEG block + * + * @handle: amdgpu_device pointer + * + * Resume firmware and hw init JPEG block + */ +static int jpeg_v3_0_resume(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = amdgpu_jpeg_resume(adev); + if (r) + return r; + + r = jpeg_v3_0_hw_init(adev); + + return r; +} + +static void jpeg_v3_0_disable_clock_gating(struct amdgpu_device* adev) +{ + uint32_t data = 0; + + data = RREG32_SOC15(JPEG, 0, mmJPEG_CGC_CTRL); + if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) + data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + + data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(JPEG, 0, mmJPEG_CGC_CTRL, data); + + data = RREG32_SOC15(JPEG, 0, mmJPEG_CGC_GATE); + data &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK + | JPEG_CGC_GATE__JPEG2_DEC_MASK + | JPEG_CGC_GATE__JPEG_ENC_MASK + | JPEG_CGC_GATE__JMCIF_MASK + | JPEG_CGC_GATE__JRBBM_MASK); + WREG32_SOC15(JPEG, 0, mmJPEG_CGC_GATE, data); + + data = RREG32_SOC15(JPEG, 0, mmJPEG_CGC_CTRL); + data &= ~(JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK + | JPEG_CGC_CTRL__JPEG2_DEC_MODE_MASK + | JPEG_CGC_CTRL__JMCIF_MODE_MASK + | JPEG_CGC_CTRL__JRBBM_MODE_MASK); + WREG32_SOC15(JPEG, 0, mmJPEG_CGC_CTRL, data); +} + +static void jpeg_v3_0_enable_clock_gating(struct amdgpu_device* adev) +{ + uint32_t data = 0; + + data = RREG32_SOC15(JPEG, 0, mmJPEG_CGC_GATE); + data |= (JPEG_CGC_GATE__JPEG_DEC_MASK + |JPEG_CGC_GATE__JPEG2_DEC_MASK + |JPEG_CGC_GATE__JPEG_ENC_MASK + |JPEG_CGC_GATE__JMCIF_MASK + |JPEG_CGC_GATE__JRBBM_MASK); + WREG32_SOC15(JPEG, 0, mmJPEG_CGC_GATE, data); +} + +static int jpeg_v3_0_disable_static_power_gating(struct amdgpu_device *adev) +{ + if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) { + uint32_t data = 0; + int r = 0; + + data = 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT; + WREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_PGFSM_CONFIG), data); + + r = SOC15_WAIT_ON_RREG(JPEG, 0, + mmUVD_PGFSM_STATUS, UVD_PGFSM_STATUS_UVDJ_PWR_ON, + UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK); + + if (r) { + DRM_ERROR("amdgpu: JPEG disable power gating failed\n"); + return r; + } + } + + /* disable anti hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_POWER_STATUS), 0, + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + + /* keep the JPEG in static PG mode */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_POWER_STATUS), 0, + ~UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK); + + return 0; +} + +static int jpeg_v3_0_enable_static_power_gating(struct amdgpu_device* adev) +{ + /* enable anti hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_POWER_STATUS), + UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK, + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + + if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) { + uint32_t data = 0; + int r = 0; + + data = 2 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT; + WREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_PGFSM_CONFIG), data); + + r = SOC15_WAIT_ON_RREG(JPEG, 0, mmUVD_PGFSM_STATUS, + (2 << UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT), + UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK); + + if (r) { + DRM_ERROR("amdgpu: JPEG enable power gating failed\n"); + return r; + } + } + + return 0; +} + +/** + * jpeg_v3_0_start - start JPEG block + * + * @adev: amdgpu_device pointer + * + * Setup and start the JPEG block + */ +static int jpeg_v3_0_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec; + int r; + + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_jpeg(adev, true); + + /* disable power gating */ + r = jpeg_v3_0_disable_static_power_gating(adev); + if (r) + return r; + + /* JPEG disable CGC */ + jpeg_v3_0_disable_clock_gating(adev); + + /* MJPEG global tiling registers */ + WREG32_SOC15(JPEG, 0, mmJPEG_DEC_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + WREG32_SOC15(JPEG, 0, mmJPEG_ENC_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + /* enable JMI channel */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JMI_CNTL), 0, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + /* enable System Interrupt for JRBC */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, mmJPEG_SYS_INT_EN), + JPEG_SYS_INT_EN__DJRBC_MASK, + ~JPEG_SYS_INT_EN__DJRBC_MASK); + + WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_VMID, 0); + WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); + WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_RPTR, 0); + WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR, 0); + WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_CNTL, 0x00000002L); + WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4); + ring->wptr = RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR); + + return 0; +} + +/** + * jpeg_v3_0_stop - stop JPEG block + * + * @adev: amdgpu_device pointer + * + * stop the JPEG block + */ +static int jpeg_v3_0_stop(struct amdgpu_device *adev) +{ + int r; + + /* reset JMI */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JMI_CNTL), + UVD_JMI_CNTL__SOFT_RESET_MASK, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + jpeg_v3_0_enable_clock_gating(adev); + + /* enable power gating */ + r = jpeg_v3_0_enable_static_power_gating(adev); + if (r) + return r; + + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_jpeg(adev, false); + + return 0; +} + +/** + * jpeg_v3_0_dec_ring_get_rptr - get read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware read pointer + */ +static uint64_t jpeg_v3_0_dec_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + return RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_RPTR); +} + +/** + * jpeg_v3_0_dec_ring_get_wptr - get write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware write pointer + */ +static uint64_t jpeg_v3_0_dec_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) + return adev->wb.wb[ring->wptr_offs]; + else + return RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR); +} + +/** + * jpeg_v3_0_dec_ring_set_wptr - set write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the write pointer to the hardware + */ +static void jpeg_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) { + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr)); + } +} + +static bool jpeg_v3_0_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int ret = 1; + + ret &= (((RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS) & + UVD_JRBC_STATUS__RB_JOB_DONE_MASK) == + UVD_JRBC_STATUS__RB_JOB_DONE_MASK)); + + return ret; +} + +static int jpeg_v3_0_wait_for_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int ret; + + ret = SOC15_WAIT_ON_RREG(JPEG, 0, mmUVD_JRBC_STATUS, + UVD_JRBC_STATUS__RB_JOB_DONE_MASK, + UVD_JRBC_STATUS__RB_JOB_DONE_MASK); + if (ret) + return ret; + + return ret; +} + +static int jpeg_v3_0_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + + if (enable) { + if (!jpeg_v3_0_is_idle(handle)) + return -EBUSY; + jpeg_v3_0_enable_clock_gating(adev); + } else { + jpeg_v3_0_disable_clock_gating(adev); + } + + return 0; +} + +static int jpeg_v3_0_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int ret; + + if(state == adev->jpeg.cur_state) + return 0; + + if (state == AMD_PG_STATE_GATE) + ret = jpeg_v3_0_stop(adev); + else + ret = jpeg_v3_0_start(adev); + + if(!ret) + adev->jpeg.cur_state = state; + + return ret; +} + +static int jpeg_v3_0_set_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + return 0; +} + +static int jpeg_v3_0_process_interrupt(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_DEBUG("IH: JPEG TRAP\n"); + + switch (entry->src_id) { + case VCN_2_0__SRCID__JPEG_DECODE: + amdgpu_fence_process(&adev->jpeg.inst->ring_dec); + break; + default: + DRM_ERROR("Unhandled interrupt: %d %d\n", + entry->src_id, entry->src_data[0]); + break; + } + + return 0; +} + +static const struct amd_ip_funcs jpeg_v3_0_ip_funcs = { + .name = "jpeg_v3_0", + .early_init = jpeg_v3_0_early_init, + .late_init = NULL, + .sw_init = jpeg_v3_0_sw_init, + .sw_fini = jpeg_v3_0_sw_fini, + .hw_init = jpeg_v3_0_hw_init, + .hw_fini = jpeg_v3_0_hw_fini, + .suspend = jpeg_v3_0_suspend, + .resume = jpeg_v3_0_resume, + .is_idle = jpeg_v3_0_is_idle, + .wait_for_idle = jpeg_v3_0_wait_for_idle, + .check_soft_reset = NULL, + .pre_soft_reset = NULL, + .soft_reset = NULL, + .post_soft_reset = NULL, + .set_clockgating_state = jpeg_v3_0_set_clockgating_state, + .set_powergating_state = jpeg_v3_0_set_powergating_state, +}; + +static const struct amdgpu_ring_funcs jpeg_v3_0_dec_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_JPEG, + .align_mask = 0xf, + .vmhub = AMDGPU_MMHUB_0, + .get_rptr = jpeg_v3_0_dec_ring_get_rptr, + .get_wptr = jpeg_v3_0_dec_ring_get_wptr, + .set_wptr = jpeg_v3_0_dec_ring_set_wptr, + .emit_frame_size = + SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + + 8 + /* jpeg_v3_0_dec_ring_emit_vm_flush */ + 18 + 18 + /* jpeg_v3_0_dec_ring_emit_fence x2 vm fence */ + 8 + 16, + .emit_ib_size = 22, /* jpeg_v3_0_dec_ring_emit_ib */ + .emit_ib = jpeg_v2_0_dec_ring_emit_ib, + .emit_fence = jpeg_v2_0_dec_ring_emit_fence, + .emit_vm_flush = jpeg_v2_0_dec_ring_emit_vm_flush, + .test_ring = amdgpu_jpeg_dec_ring_test_ring, + .test_ib = amdgpu_jpeg_dec_ring_test_ib, + .insert_nop = jpeg_v2_0_dec_ring_nop, + .insert_start = jpeg_v2_0_dec_ring_insert_start, + .insert_end = jpeg_v2_0_dec_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_jpeg_ring_begin_use, + .end_use = amdgpu_jpeg_ring_end_use, + .emit_wreg = jpeg_v2_0_dec_ring_emit_wreg, + .emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +static void jpeg_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev) +{ + adev->jpeg.inst->ring_dec.funcs = &jpeg_v3_0_dec_ring_vm_funcs; + DRM_INFO("JPEG decode is enabled in VM mode\n"); +} + +static const struct amdgpu_irq_src_funcs jpeg_v3_0_irq_funcs = { + .set = jpeg_v3_0_set_interrupt_state, + .process = jpeg_v3_0_process_interrupt, +}; + +static void jpeg_v3_0_set_irq_funcs(struct amdgpu_device *adev) +{ + adev->jpeg.inst->irq.num_types = 1; + adev->jpeg.inst->irq.funcs = &jpeg_v3_0_irq_funcs; +} + +const struct amdgpu_ip_block_version jpeg_v3_0_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_JPEG, + .major = 3, + .minor = 0, + .rev = 0, + .funcs = &jpeg_v3_0_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.h new file mode 100644 index 000000000000..ce775a0c742f --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.h @@ -0,0 +1,29 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __JPEG_V3_0_H__ +#define __JPEG_V3_0_H__ + +extern const struct amdgpu_ip_block_version jpeg_v3_0_ip_block; + +#endif /* __JPEG_V3_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/mes_api_def.h b/drivers/gpu/drm/amd/amdgpu/mes_api_def.h new file mode 100644 index 000000000000..3f4fca5fd1da --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mes_api_def.h @@ -0,0 +1,443 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __MES_API_DEF_H__ +#define __MES_API_DEF_H__ + +#pragma pack(push, 4) + +#define MES_API_VERSION 1 + +/* Driver submits one API(cmd) as a single Frame and this command size is same + * for all API to ease the debugging and parsing of ring buffer. + */ +enum { API_FRAME_SIZE_IN_DWORDS = 64 }; + +/* To avoid command in scheduler context to be overwritten whenenver mutilple + * interrupts come in, this creates another queue. + */ +enum { API_NUMBER_OF_COMMAND_MAX = 32 }; + +enum MES_API_TYPE { + MES_API_TYPE_SCHEDULER = 1, + MES_API_TYPE_MAX +}; + +enum MES_SCH_API_OPCODE { + MES_SCH_API_SET_HW_RSRC = 0, + MES_SCH_API_SET_SCHEDULING_CONFIG = 1, /* agreegated db, quantums, etc */ + MES_SCH_API_ADD_QUEUE = 2, + MES_SCH_API_REMOVE_QUEUE = 3, + MES_SCH_API_PERFORM_YIELD = 4, + MES_SCH_API_SET_GANG_PRIORITY_LEVEL = 5, + MES_SCH_API_SUSPEND = 6, + MES_SCH_API_RESUME = 7, + MES_SCH_API_RESET = 8, + MES_SCH_API_SET_LOG_BUFFER = 9, + MES_SCH_API_CHANGE_GANG_PRORITY = 10, + MES_SCH_API_QUERY_SCHEDULER_STATUS = 11, + MES_SCH_API_PROGRAM_GDS = 12, + MES_SCH_API_SET_DEBUG_VMID = 13, + MES_SCH_API_MISC = 14, + MES_SCH_API_MAX = 0xFF +}; + +union MES_API_HEADER { + struct { + uint32_t type : 4; /* 0 - Invalid; 1 - Scheduling; 2 - TBD */ + uint32_t opcode : 8; + uint32_t dwsize : 8; /* including header */ + uint32_t reserved : 12; + }; + + uint32_t u32All; +}; + +enum MES_AMD_PRIORITY_LEVEL { + AMD_PRIORITY_LEVEL_LOW = 0, + AMD_PRIORITY_LEVEL_NORMAL = 1, + AMD_PRIORITY_LEVEL_MEDIUM = 2, + AMD_PRIORITY_LEVEL_HIGH = 3, + AMD_PRIORITY_LEVEL_REALTIME = 4, + AMD_PRIORITY_NUM_LEVELS +}; + +enum MES_QUEUE_TYPE { + MES_QUEUE_TYPE_GFX, + MES_QUEUE_TYPE_COMPUTE, + MES_QUEUE_TYPE_SDMA, + MES_QUEUE_TYPE_MAX, +}; + +struct MES_API_STATUS { + uint64_t api_completion_fence_addr; + uint64_t api_completion_fence_value; +}; + +enum { MAX_COMPUTE_PIPES = 8 }; +enum { MAX_GFX_PIPES = 2 }; +enum { MAX_SDMA_PIPES = 2 }; + +enum { MAX_COMPUTE_HQD_PER_PIPE = 8 }; +enum { MAX_GFX_HQD_PER_PIPE = 8 }; +enum { MAX_SDMA_HQD_PER_PIPE = 10 }; + +enum { MAX_QUEUES_IN_A_GANG = 8 }; + +enum VM_HUB_TYPE { + VM_HUB_TYPE_GC = 0, + VM_HUB_TYPE_MM = 1, + VM_HUB_TYPE_MAX, +}; + +enum { VMID_INVALID = 0xffff }; + +enum { MAX_VMID_GCHUB = 16 }; +enum { MAX_VMID_MMHUB = 16 }; + +enum MES_LOG_OPERATION { + MES_LOG_OPERATION_CONTEXT_STATE_CHANGE = 0 +}; + +enum MES_LOG_CONTEXT_STATE { + MES_LOG_CONTEXT_STATE_IDLE = 0, + MES_LOG_CONTEXT_STATE_RUNNING = 1, + MES_LOG_CONTEXT_STATE_READY = 2, + MES_LOG_CONTEXT_STATE_READY_STANDBY = 3, +}; + +struct MES_LOG_CONTEXT_STATE_CHANGE { + void *h_context; + enum MES_LOG_CONTEXT_STATE new_context_state; +}; + +struct MES_LOG_ENTRY_HEADER { + uint32_t first_free_entry_index; + uint32_t wraparound_count; + uint64_t number_of_entries; + uint64_t reserved[2]; +}; + +struct MES_LOG_ENTRY_DATA { + uint64_t gpu_time_stamp; + uint32_t operation_type; /* operation_type is of MES_LOG_OPERATION type */ + uint32_t reserved_operation_type_bits; + union { + struct MES_LOG_CONTEXT_STATE_CHANGE context_state_change; + uint64_t reserved_operation_data[2]; + }; +}; + +struct MES_LOG_BUFFER { + struct MES_LOG_ENTRY_HEADER header; + struct MES_LOG_ENTRY_DATA entries[1]; +}; + +union MESAPI_SET_HW_RESOURCES { + struct { + union MES_API_HEADER header; + uint32_t vmid_mask_mmhub; + uint32_t vmid_mask_gfxhub; + uint32_t gds_size; + uint32_t paging_vmid; + uint32_t compute_hqd_mask[MAX_COMPUTE_PIPES]; + uint32_t gfx_hqd_mask[MAX_GFX_PIPES]; + uint32_t sdma_hqd_mask[MAX_SDMA_PIPES]; + uint32_t agreegated_doorbells[AMD_PRIORITY_NUM_LEVELS]; + uint64_t g_sch_ctx_gpu_mc_ptr; + uint64_t query_status_fence_gpu_mc_ptr; + struct MES_API_STATUS api_status; + union { + struct { + uint32_t disable_reset : 1; + uint32_t reserved : 31; + }; + uint32_t uint32_t_all; + }; + }; + + uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; +}; + +union MESAPI__ADD_QUEUE { + struct { + union MES_API_HEADER header; + uint32_t process_id; + uint64_t page_table_base_addr; + uint64_t process_va_start; + uint64_t process_va_end; + uint64_t process_quantum; + uint64_t process_context_addr; + uint64_t gang_quantum; + uint64_t gang_context_addr; + uint32_t inprocess_gang_priority; + enum MES_AMD_PRIORITY_LEVEL gang_global_priority_level; + uint32_t doorbell_offset; + uint64_t mqd_addr; + uint64_t wptr_addr; + enum MES_QUEUE_TYPE queue_type; + uint32_t gds_base; + uint32_t gds_size; + uint32_t gws_base; + uint32_t gws_size; + uint32_t oa_mask; + + struct { + uint32_t paging : 1; + uint32_t debug_vmid : 4; + uint32_t program_gds : 1; + uint32_t is_gang_suspended : 1; + uint32_t is_tmz_queue : 1; + uint32_t reserved : 24; + }; + struct MES_API_STATUS api_status; + }; + + uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; +}; + +union MESAPI__REMOVE_QUEUE { + struct { + union MES_API_HEADER header; + uint32_t doorbell_offset; + uint64_t gang_context_addr; + + struct { + uint32_t unmap_legacy_gfx_queue : 1; + uint32_t reserved : 31; + }; + struct MES_API_STATUS api_status; + }; + + uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; +}; + +union MESAPI__SET_SCHEDULING_CONFIG { + struct { + union MES_API_HEADER header; + /* Grace period when preempting another priority band for this + * priority band. The value for idle priority band is ignored, + * as it never preempts other bands. + */ + uint64_t grace_period_other_levels[AMD_PRIORITY_NUM_LEVELS]; + /* Default quantum for scheduling across processes within + * a priority band. + */ + uint64_t process_quantum_for_level[AMD_PRIORITY_NUM_LEVELS]; + /* Default grace period for processes that preempt each other + * within a priority band. + */ + uint64_t process_grace_period_same_level[AMD_PRIORITY_NUM_LEVELS]; + /* For normal level this field specifies the target GPU + * percentage in situations when it's starved by the high level. + * Valid values are between 0 and 50, with the default being 10. + */ + uint32_t normal_yield_percent; + struct MES_API_STATUS api_status; + }; + + uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; +}; + +union MESAPI__PERFORM_YIELD { + struct { + union MES_API_HEADER header; + uint32_t dummy; + struct MES_API_STATUS api_status; + }; + + uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; +}; + +union MESAPI__CHANGE_GANG_PRIORITY_LEVEL { + struct { + union MES_API_HEADER header; + uint32_t inprocess_gang_priority; + enum MES_AMD_PRIORITY_LEVEL gang_global_priority_level; + uint64_t gang_quantum; + uint64_t gang_context_addr; + struct MES_API_STATUS api_status; + }; + + uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; +}; + +union MESAPI__SUSPEND { + struct { + union MES_API_HEADER header; + /* false - suspend all gangs; true - specific gang */ + struct { + uint32_t suspend_all_gangs : 1; + uint32_t reserved : 31; + }; + /* gang_context_addr is valid only if suspend_all = false */ + uint64_t gang_context_addr; + + uint64_t suspend_fence_addr; + uint32_t suspend_fence_value; + + struct MES_API_STATUS api_status; + }; + + uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; +}; + +union MESAPI__RESUME { + struct { + union MES_API_HEADER header; + /* false - resume all gangs; true - specified gang */ + struct { + uint32_t resume_all_gangs : 1; + uint32_t reserved : 31; + }; + /* valid only if resume_all_gangs = false */ + uint64_t gang_context_addr; + + struct MES_API_STATUS api_status; + }; + + uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; +}; + +union MESAPI__RESET { + struct { + union MES_API_HEADER header; + + struct { + uint32_t reset_queue : 1; + uint32_t reserved : 31; + }; + + uint64_t gang_context_addr; + uint32_t doorbell_offset; /* valid only if reset_queue = true */ + struct MES_API_STATUS api_status; + }; + + uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; +}; + +union MESAPI__SET_LOGGING_BUFFER { + struct { + union MES_API_HEADER header; + /* There are separate log buffers for each queue type */ + enum MES_QUEUE_TYPE log_type; + /* Log buffer GPU Address */ + uint64_t logging_buffer_addr; + /* number of entries in the log buffer */ + uint32_t number_of_entries; + /* Entry index at which CPU interrupt needs to be signalled */ + uint32_t interrupt_entry; + + struct MES_API_STATUS api_status; + }; + + uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; +}; + +union MESAPI__QUERY_MES_STATUS { + struct { + union MES_API_HEADER header; + bool mes_healthy; /* 0 - not healthy, 1 - healthy */ + struct MES_API_STATUS api_status; + }; + + uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; +}; + +union MESAPI__PROGRAM_GDS { + struct { + union MES_API_HEADER header; + uint64_t process_context_addr; + uint32_t gds_base; + uint32_t gds_size; + uint32_t gws_base; + uint32_t gws_size; + uint32_t oa_mask; + struct MES_API_STATUS api_status; + }; + + uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; +}; + +union MESAPI__SET_DEBUG_VMID { + struct { + union MES_API_HEADER header; + struct MES_API_STATUS api_status; + union { + struct { + uint32_t use_gds : 1; + uint32_t reserved : 31; + } flags; + uint32_t u32All; + }; + uint32_t reserved; + uint32_t debug_vmid; + uint64_t process_context_addr; + uint64_t page_table_base_addr; + uint64_t process_va_start; + uint64_t process_va_end; + uint32_t gds_base; + uint32_t gds_size; + uint32_t gws_base; + uint32_t gws_size; + uint32_t oa_mask; + }; + + uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; +}; + +enum MESAPI_MISC_OPCODE { + MESAPI_MISC__MODIFY_REG, + MESAPI_MISC__MAX, +}; + +enum MODIFY_REG_SUBCODE { + MODIFY_REG__OVERWRITE, + MODIFY_REG__RMW_OR, + MODIFY_REG__RMW_AND, + MODIFY_REG__MAX, +}; + +enum { MISC_DATA_MAX_SIZE_IN_DWORDS = 20 }; + +union MESAPI__MISC { + struct { + union MES_API_HEADER header; + enum MESAPI_MISC_OPCODE opcode; + struct MES_API_STATUS api_status; + + union { + struct { + enum MODIFY_REG_SUBCODE subcode; + uint32_t reg_offset; + uint32_t reg_value; + } modify_reg; + uint32_t data[MISC_DATA_MAX_SIZE_IN_DWORDS]; + }; + }; + + uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; +}; + +#pragma pack(pop) +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c index 29fab7984855..4b746584a797 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c @@ -28,19 +28,165 @@ #include "nv.h" #include "gc/gc_10_1_0_offset.h" #include "gc/gc_10_1_0_sh_mask.h" +#include "v10_structs.h" +#include "mes_api_def.h" + +#define mmCP_MES_IC_OP_CNTL_Sienna_Cichlid 0x2820 +#define mmCP_MES_IC_OP_CNTL_Sienna_Cichlid_BASE_IDX 1 MODULE_FIRMWARE("amdgpu/navi10_mes.bin"); +MODULE_FIRMWARE("amdgpu/sienna_cichlid_mes.bin"); + +static int mes_v10_1_hw_fini(void *handle); + +#define MES_EOP_SIZE 2048 + +static void mes_v10_1_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) { + atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], + ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); + } else { + BUG(); + } +} + +static u64 mes_v10_1_ring_get_rptr(struct amdgpu_ring *ring) +{ + return ring->adev->wb.wb[ring->rptr_offs]; +} + +static u64 mes_v10_1_ring_get_wptr(struct amdgpu_ring *ring) +{ + u64 wptr; + + if (ring->use_doorbell) + wptr = atomic64_read((atomic64_t *) + &ring->adev->wb.wb[ring->wptr_offs]); + else + BUG(); + return wptr; +} + +static const struct amdgpu_ring_funcs mes_v10_1_ring_funcs = { + .type = AMDGPU_RING_TYPE_MES, + .align_mask = 1, + .nop = 0, + .support_64bit_ptrs = true, + .get_rptr = mes_v10_1_ring_get_rptr, + .get_wptr = mes_v10_1_ring_get_wptr, + .set_wptr = mes_v10_1_ring_set_wptr, + .insert_nop = amdgpu_ring_insert_nop, +}; + +static int mes_v10_1_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, + void *pkt, int size) +{ + int ndw = size / 4; + signed long r; + union MESAPI__ADD_QUEUE *x_pkt = pkt; + struct amdgpu_device *adev = mes->adev; + struct amdgpu_ring *ring = &mes->ring; + + BUG_ON(size % 4 != 0); + + if (amdgpu_ring_alloc(ring, ndw)) + return -ENOMEM; + + amdgpu_ring_write_multiple(ring, pkt, ndw); + amdgpu_ring_commit(ring); + + DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode); + + r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, + adev->usec_timeout); + if (r < 1) { + DRM_ERROR("MES failed to response msg=%d\n", + x_pkt->header.opcode); + return -ETIMEDOUT; + } + + return 0; +} + +static int convert_to_mes_queue_type(int queue_type) +{ + if (queue_type == AMDGPU_RING_TYPE_GFX) + return MES_QUEUE_TYPE_GFX; + else if (queue_type == AMDGPU_RING_TYPE_COMPUTE) + return MES_QUEUE_TYPE_COMPUTE; + else if (queue_type == AMDGPU_RING_TYPE_SDMA) + return MES_QUEUE_TYPE_SDMA; + else + BUG(); + return -1; +} static int mes_v10_1_add_hw_queue(struct amdgpu_mes *mes, struct mes_add_queue_input *input) { - return 0; + struct amdgpu_device *adev = mes->adev; + union MESAPI__ADD_QUEUE mes_add_queue_pkt; + + memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt)); + + mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE; + mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_add_queue_pkt.process_id = input->process_id; + mes_add_queue_pkt.page_table_base_addr = + input->page_table_base_addr - adev->gmc.vram_start; + mes_add_queue_pkt.process_va_start = input->process_va_start; + mes_add_queue_pkt.process_va_end = input->process_va_end; + mes_add_queue_pkt.process_quantum = input->process_quantum; + mes_add_queue_pkt.process_context_addr = input->process_context_addr; + mes_add_queue_pkt.gang_quantum = input->gang_quantum; + mes_add_queue_pkt.gang_context_addr = input->gang_context_addr; + mes_add_queue_pkt.inprocess_gang_priority = + input->inprocess_gang_priority; + mes_add_queue_pkt.gang_global_priority_level = + input->gang_global_priority_level; + mes_add_queue_pkt.doorbell_offset = input->doorbell_offset; + mes_add_queue_pkt.mqd_addr = input->mqd_addr; + mes_add_queue_pkt.wptr_addr = input->wptr_addr; + mes_add_queue_pkt.queue_type = + convert_to_mes_queue_type(input->queue_type); + mes_add_queue_pkt.paging = input->paging; + + mes_add_queue_pkt.api_status.api_completion_fence_addr = + mes->ring.fence_drv.gpu_addr; + mes_add_queue_pkt.api_status.api_completion_fence_value = + ++mes->ring.fence_drv.sync_seq; + + return mes_v10_1_submit_pkt_and_poll_completion(mes, + &mes_add_queue_pkt, sizeof(mes_add_queue_pkt)); } static int mes_v10_1_remove_hw_queue(struct amdgpu_mes *mes, struct mes_remove_queue_input *input) { - return 0; + union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt; + + memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt)); + + mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE; + mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset; + mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr; + + mes_remove_queue_pkt.api_status.api_completion_fence_addr = + mes->ring.fence_drv.gpu_addr; + mes_remove_queue_pkt.api_status.api_completion_fence_value = + ++mes->ring.fence_drv.sync_seq; + + return mes_v10_1_submit_pkt_and_poll_completion(mes, + &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt)); } static int mes_v10_1_suspend_gang(struct amdgpu_mes *mes, @@ -55,6 +201,68 @@ static int mes_v10_1_resume_gang(struct amdgpu_mes *mes, return 0; } +static int mes_v10_1_query_sched_status(struct amdgpu_mes *mes) +{ + union MESAPI__QUERY_MES_STATUS mes_status_pkt; + + memset(&mes_status_pkt, 0, sizeof(mes_status_pkt)); + + mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS; + mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_status_pkt.api_status.api_completion_fence_addr = + mes->ring.fence_drv.gpu_addr; + mes_status_pkt.api_status.api_completion_fence_value = + ++mes->ring.fence_drv.sync_seq; + + return mes_v10_1_submit_pkt_and_poll_completion(mes, + &mes_status_pkt, sizeof(mes_status_pkt)); +} + +static int mes_v10_1_set_hw_resources(struct amdgpu_mes *mes) +{ + int i; + struct amdgpu_device *adev = mes->adev; + union MESAPI_SET_HW_RESOURCES mes_set_hw_res_pkt; + + memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt)); + + mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC; + mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub; + mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub; + mes_set_hw_res_pkt.gds_size = adev->gds.gds_size; + mes_set_hw_res_pkt.paging_vmid = 0; + mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr; + mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr = + mes->query_status_fence_gpu_addr; + + for (i = 0; i < MAX_COMPUTE_PIPES; i++) + mes_set_hw_res_pkt.compute_hqd_mask[i] = + mes->compute_hqd_mask[i]; + + for (i = 0; i < MAX_GFX_PIPES; i++) + mes_set_hw_res_pkt.gfx_hqd_mask[i] = mes->gfx_hqd_mask[i]; + + for (i = 0; i < MAX_SDMA_PIPES; i++) + mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i]; + + for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++) + mes_set_hw_res_pkt.agreegated_doorbells[i] = + mes->agreegated_doorbells[i]; + + mes_set_hw_res_pkt.api_status.api_completion_fence_addr = + mes->ring.fence_drv.gpu_addr; + mes_set_hw_res_pkt.api_status.api_completion_fence_value = + ++mes->ring.fence_drv.sync_seq; + + return mes_v10_1_submit_pkt_and_poll_completion(mes, + &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt)); +} + static const struct amdgpu_mes_funcs mes_v10_1_funcs = { .add_hw_queue = mes_v10_1_add_hw_queue, .remove_hw_queue = mes_v10_1_remove_hw_queue, @@ -68,11 +276,15 @@ static int mes_v10_1_init_microcode(struct amdgpu_device *adev) char fw_name[30]; int err; const struct mes_firmware_header_v1_0 *mes_hdr; + struct amdgpu_firmware_info *info; switch (adev->asic_type) { case CHIP_NAVI10: chip_name = "navi10"; break; + case CHIP_SIENNA_CICHLID: + chip_name = "sienna_cichlid"; + break; default: BUG(); } @@ -100,6 +312,22 @@ static int mes_v10_1_init_microcode(struct amdgpu_device *adev) le32_to_cpu(mes_hdr->mes_data_start_addr_lo) | ((uint64_t)(le32_to_cpu(mes_hdr->mes_data_start_addr_hi)) << 32); + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MES]; + info->ucode_id = AMDGPU_UCODE_ID_CP_MES; + info->fw = adev->mes.fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(mes_hdr->mes_ucode_size_bytes), + PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MES_DATA]; + info->ucode_id = AMDGPU_UCODE_ID_CP_MES_DATA; + info->fw = adev->mes.fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes), + PAGE_SIZE); + } + return 0; } @@ -267,18 +495,389 @@ static int mes_v10_1_load_microcode(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, mmCP_MES_MDBOUND_LO, 0x3FFFF); /* invalidate ICACHE */ - data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL); + switch (adev->asic_type) { + case CHIP_SIENNA_CICHLID: + data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid); + break; + default: + data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL); + break; + } data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0); data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1); - WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL, data); + switch (adev->asic_type) { + case CHIP_SIENNA_CICHLID: + WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid, data); + break; + default: + WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL, data); + break; + } /* prime the ICACHE. */ - data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL); + switch (adev->asic_type) { + case CHIP_SIENNA_CICHLID: + data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid); + break; + default: + data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL); + break; + } data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1); - WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL, data); + switch (adev->asic_type) { + case CHIP_SIENNA_CICHLID: + WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid, data); + break; + default: + WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL, data); + break; + } + + nv_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + + return 0; +} + +static int mes_v10_1_allocate_eop_buf(struct amdgpu_device *adev) +{ + int r; + u32 *eop; + + r = amdgpu_bo_create_reserved(adev, MES_EOP_SIZE, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, + &adev->mes.eop_gpu_obj, + &adev->mes.eop_gpu_addr, + (void **)&eop); + if (r) { + dev_warn(adev->dev, "(%d) create EOP bo failed\n", r); + return r; + } + + memset(eop, 0, adev->mes.eop_gpu_obj->tbo.mem.size); + + amdgpu_bo_kunmap(adev->mes.eop_gpu_obj); + amdgpu_bo_unreserve(adev->mes.eop_gpu_obj); + + return 0; +} + +static int mes_v10_1_allocate_mem_slots(struct amdgpu_device *adev) +{ + int r; + + r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs); + if (r) { + dev_err(adev->dev, + "(%d) mes sch_ctx_offs wb alloc failed\n", r); + return r; + } + adev->mes.sch_ctx_gpu_addr = + adev->wb.gpu_addr + (adev->mes.sch_ctx_offs * 4); + adev->mes.sch_ctx_ptr = + (uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs]; + + r = amdgpu_device_wb_get(adev, &adev->mes.query_status_fence_offs); + if (r) { + dev_err(adev->dev, + "(%d) query_status_fence_offs wb alloc failed\n", r); + return r; + } + adev->mes.query_status_fence_gpu_addr = + adev->wb.gpu_addr + (adev->mes.query_status_fence_offs * 4); + adev->mes.query_status_fence_ptr = + (uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs]; + + return 0; +} + +static int mes_v10_1_mqd_init(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct v10_compute_mqd *mqd = ring->mqd_ptr; + uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; + uint32_t tmp; + + mqd->header = 0xC0310800; + mqd->compute_pipelinestat_enable = 0x00000001; + mqd->compute_static_thread_mgmt_se0 = 0xffffffff; + mqd->compute_static_thread_mgmt_se1 = 0xffffffff; + mqd->compute_static_thread_mgmt_se2 = 0xffffffff; + mqd->compute_static_thread_mgmt_se3 = 0xffffffff; + mqd->compute_misc_reserved = 0x00000003; + + eop_base_addr = ring->eop_gpu_addr >> 8; + mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; + mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); + + /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ + tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); + tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, + (order_base_2(MES_EOP_SIZE / 4) - 1)); + + mqd->cp_hqd_eop_control = tmp; + + /* enable doorbell? */ + tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); + + if (ring->use_doorbell) { + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_OFFSET, ring->doorbell_index); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_EN, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_SOURCE, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_HIT, 0); + } + else + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_EN, 0); + + mqd->cp_hqd_pq_doorbell_control = tmp; + + /* disable the queue if it's active */ + ring->wptr = 0; + mqd->cp_hqd_dequeue_request = 0; + mqd->cp_hqd_pq_rptr = 0; + mqd->cp_hqd_pq_wptr_lo = 0; + mqd->cp_hqd_pq_wptr_hi = 0; + + /* set the pointer to the MQD */ + mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; + mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); + + /* set MQD vmid to 0 */ + tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL); + tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); + mqd->cp_mqd_control = tmp; + + /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ + hqd_gpu_addr = ring->gpu_addr >> 8; + mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; + mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); + + /* set up the HQD, this is similar to CP_RB0_CNTL */ + tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, + (order_base_2(ring->ring_size / 4) - 1)); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, + ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); +#ifdef __BIG_ENDIAN + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); +#endif + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); + mqd->cp_hqd_pq_control = tmp; + + /* set the wb address whether it's enabled or not */ + wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); + mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; + mqd->cp_hqd_pq_rptr_report_addr_hi = + upper_32_bits(wb_gpu_addr) & 0xffff; + + /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ + wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffff8; + mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; + + tmp = 0; + /* enable the doorbell if requested */ + if (ring->use_doorbell) { + tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_OFFSET, ring->doorbell_index); + + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_EN, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_SOURCE, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_HIT, 0); + } + + mqd->cp_hqd_pq_doorbell_control = tmp; + + /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ + ring->wptr = 0; + mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR); + + /* set the vmid for the queue */ + mqd->cp_hqd_vmid = 0; + + tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE); + tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); + mqd->cp_hqd_persistent_state = tmp; + + /* set MIN_IB_AVAIL_SIZE */ + tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL); + tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); + mqd->cp_hqd_ib_control = tmp; + + /* activate the queue */ + mqd->cp_hqd_active = 1; + return 0; +} + +static void mes_v10_1_queue_init_register(struct amdgpu_ring *ring) +{ + struct v10_compute_mqd *mqd = ring->mqd_ptr; + struct amdgpu_device *adev = ring->adev; + uint32_t data = 0; + + mutex_lock(&adev->srbm_mutex); + nv_grbm_select(adev, 3, 0, 0, 0); + + /* set CP_HQD_VMID.VMID = 0. */ + data = RREG32_SOC15(GC, 0, mmCP_HQD_VMID); + data = REG_SET_FIELD(data, CP_HQD_VMID, VMID, 0); + WREG32_SOC15(GC, 0, mmCP_HQD_VMID, data); + + /* set CP_HQD_PQ_DOORBELL_CONTROL.DOORBELL_EN=0 */ + data = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); + data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_EN, 0); + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, data); + + /* set CP_MQD_BASE_ADDR/HI with the MQD base address */ + WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); + WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); + + /* set CP_MQD_CONTROL.VMID=0 */ + data = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL); + data = REG_SET_FIELD(data, CP_MQD_CONTROL, VMID, 0); + WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL, 0); + + /* set CP_HQD_PQ_BASE/HI with the ring buffer base address */ + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); + + /* set CP_HQD_PQ_RPTR_REPORT_ADDR/HI */ + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, + mqd->cp_hqd_pq_rptr_report_addr_lo); + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, + mqd->cp_hqd_pq_rptr_report_addr_hi); + + /* set CP_HQD_PQ_CONTROL */ + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control); + + /* set CP_HQD_PQ_WPTR_POLL_ADDR/HI */ + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, + mqd->cp_hqd_pq_wptr_poll_addr_lo); + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, + mqd->cp_hqd_pq_wptr_poll_addr_hi); + + /* set CP_HQD_PQ_DOORBELL_CONTROL */ + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, + mqd->cp_hqd_pq_doorbell_control); + + /* set CP_HQD_PERSISTENT_STATE.PRELOAD_SIZE=0x53 */ + WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state); + + /* set CP_HQD_ACTIVE.ACTIVE=1 */ + WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, mqd->cp_hqd_active); nv_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); +} + +#if 0 +static int mes_v10_1_kiq_enable_queue(struct amdgpu_device *adev) +{ + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; + int r; + + if (!kiq->pmf || !kiq->pmf->kiq_map_queues) + return -EINVAL; + + r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size); + if (r) { + DRM_ERROR("Failed to lock KIQ (%d).\n", r); + return r; + } + + kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring); + + r = amdgpu_ring_test_ring(kiq_ring); + if (r) { + DRM_ERROR("kfq enable failed\n"); + kiq_ring->sched.ready = false; + } + return r; +} +#endif + +static int mes_v10_1_queue_init(struct amdgpu_device *adev) +{ + int r; + + r = mes_v10_1_mqd_init(&adev->mes.ring); + if (r) + return r; + +#if 0 + r = mes_v10_1_kiq_enable_queue(adev); + if (r) + return r; +#else + mes_v10_1_queue_init_register(&adev->mes.ring); +#endif + + return 0; +} + +static int mes_v10_1_ring_init(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + int r; + + ring = &adev->mes.ring; + + ring->funcs = &mes_v10_1_ring_funcs; + + ring->me = 3; + ring->pipe = 0; + ring->queue = 0; + + ring->ring_obj = NULL; + ring->use_doorbell = true; + ring->doorbell_index = adev->doorbell_index.mes_ring << 1; + ring->eop_gpu_addr = adev->mes.eop_gpu_addr; + ring->no_scheduler = true; + sprintf(ring->name, "mes_%d.%d.%d", ring->me, ring->pipe, ring->queue); + + r = amdgpu_ring_init(adev, ring, 1024, NULL, 0, AMDGPU_RING_PRIO_DEFAULT); + if (r) + return r; + + return 0; +} + +static int mes_v10_1_mqd_sw_init(struct amdgpu_device *adev) +{ + int r, mqd_size = sizeof(struct v10_compute_mqd); + struct amdgpu_ring *ring = &adev->mes.ring; + + if (ring->mqd_obj) + return 0; + + r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, + &ring->mqd_gpu_addr, &ring->mqd_ptr); + if (r) { + dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r); + return r; + } + + /* prepare MQD backup */ + adev->mes.mqd_backup = kmalloc(mqd_size, GFP_KERNEL); + if (!adev->mes.mqd_backup) + dev_warn(adev->dev, + "no memory to create MQD backup for ring %s\n", + ring->name); return 0; } @@ -288,10 +887,29 @@ static int mes_v10_1_sw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->mes.adev = adev; + adev->mes.funcs = &mes_v10_1_funcs; + r = mes_v10_1_init_microcode(adev); if (r) return r; + r = mes_v10_1_allocate_eop_buf(adev); + if (r) + return r; + + r = mes_v10_1_mqd_sw_init(adev); + if (r) + return r; + + r = mes_v10_1_ring_init(adev); + if (r) + return r; + + r = mes_v10_1_allocate_mem_slots(adev); + if (r) + return r; + return 0; } @@ -299,6 +917,19 @@ static int mes_v10_1_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); + amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); + + kfree(adev->mes.mqd_backup); + + amdgpu_bo_free_kernel(&adev->mes.ring.mqd_obj, + &adev->mes.ring.mqd_gpu_addr, + &adev->mes.ring.mqd_ptr); + + amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj, + &adev->mes.eop_gpu_addr, + NULL); + mes_v10_1_free_microcode(adev); return 0; @@ -315,14 +946,29 @@ static int mes_v10_1_hw_init(void *handle) DRM_ERROR("failed to MES fw, r=%d\n", r); return r; } - } else { - DRM_ERROR("only support direct fw loading on MES\n"); - return -EINVAL; } mes_v10_1_enable(adev, true); + r = mes_v10_1_queue_init(adev); + if (r) + goto failure; + + r = mes_v10_1_set_hw_resources(&adev->mes); + if (r) + goto failure; + + r = mes_v10_1_query_sched_status(&adev->mes); + if (r) { + DRM_ERROR("MES is busy\n"); + goto failure; + } + return 0; + +failure: + mes_v10_1_hw_fini(adev); + return r; } static int mes_v10_1_hw_fini(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index 405767208a4d..dffcb93ecee5 100755 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -54,15 +54,15 @@ u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev) void mmhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - /* two registers distance between mmVM_CONTEXT0_* to mmVM_CONTEXT1_* */ - int offset = mmVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - - mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, - offset * vmid, lower_32_bits(page_table_base)); + hub->ctx_addr_distance * vmid, + lower_32_bits(page_table_base)); WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, - offset * vmid, upper_32_bits(page_table_base)); + hub->ctx_addr_distance * vmid, + upper_32_bits(page_table_base)); } static void mmhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev) @@ -230,6 +230,7 @@ static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev) static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; unsigned num_level, block_size; uint32_t tmp; int i; @@ -268,25 +269,31 @@ static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, !amdgpu_noretry); - WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL, i, tmp); - WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0); - WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0); - WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, i*2, - lower_32_bits(adev->vm_manager.max_pfn - 1)); - WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, i*2, - upper_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL, + i * hub->ctx_distance, tmp); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, + i * hub->ctx_addr_distance, + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, + i * hub->ctx_addr_distance, + upper_32_bits(adev->vm_manager.max_pfn - 1)); } } static void mmhub_v1_0_program_invalidation(struct amdgpu_device *adev) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; unsigned i; for (i = 0; i < 18; ++i) { WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, - 2 * i, 0xffffffff); + i * hub->eng_addr_distance, 0xffffffff); WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_HI32, - 2 * i, 0x1f); + i * hub->eng_addr_distance, 0x1f); } } @@ -333,12 +340,14 @@ int mmhub_v1_0_gart_enable(struct amdgpu_device *adev) void mmhub_v1_0_gart_disable(struct amdgpu_device *adev) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; u32 tmp; u32 i; /* Disable all tables */ for (i = 0; i < 16; i++) - WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_CNTL, i, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_CNTL, + i * hub->ctx_distance, 0); /* Setup TLB control */ tmp = RREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL); @@ -429,6 +438,12 @@ void mmhub_v1_0_init(struct amdgpu_device *adev) hub->vm_l2_pro_fault_cntl = SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL); + hub->ctx_distance = mmVM_CONTEXT1_CNTL - mmVM_CONTEXT0_CNTL; + hub->ctx_addr_distance = mmVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - + mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + hub->eng_distance = mmVM_INVALIDATE_ENG1_REQ - mmVM_INVALIDATE_ENG0_REQ; + hub->eng_addr_distance = mmVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 - + mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32; } static void mmhub_v1_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index fb3f228458e5..757fa8e83f5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -31,18 +31,23 @@ #include "soc15_common.h" +#define mmMM_ATC_L2_MISC_CG_Sienna_Cichlid 0x064d +#define mmMM_ATC_L2_MISC_CG_Sienna_Cichlid_BASE_IDX 0 +#define mmDAGB0_CNTL_MISC2_Sienna_Cichlid 0x0070 +#define mmDAGB0_CNTL_MISC2_Sienna_Cichlid_BASE_IDX 0 + void mmhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - /* two registers distance between mmMMVM_CONTEXT0_* to mmMMVM_CONTEXT1_* */ - int offset = mmMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - - mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, - offset * vmid, lower_32_bits(page_table_base)); + hub->ctx_addr_distance * vmid, + lower_32_bits(page_table_base)); WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, - offset * vmid, upper_32_bits(page_table_base)); + hub->ctx_addr_distance * vmid, + upper_32_bits(page_table_base)); } static void mmhub_v2_0_init_gart_aperture_regs(struct amdgpu_device *adev) @@ -164,6 +169,10 @@ static void mmhub_v2_0_init_cache_regs(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0); tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0); WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL4, tmp); + + tmp = mmMMVM_L2_CNTL5_DEFAULT; + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0); + WREG32_SOC15(GC, 0, mmMMVM_L2_CNTL5, tmp); } static void mmhub_v2_0_enable_system_domain(struct amdgpu_device *adev) @@ -200,6 +209,7 @@ static void mmhub_v2_0_disable_identity_aperture(struct amdgpu_device *adev) static void mmhub_v2_0_setup_vmid_config(struct amdgpu_device *adev) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; int i; uint32_t tmp; @@ -230,25 +240,31 @@ static void mmhub_v2_0_setup_vmid_config(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, !amdgpu_noretry); - WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL, i, tmp); - WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0); - WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0); - WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, i*2, - lower_32_bits(adev->vm_manager.max_pfn - 1)); - WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, i*2, - upper_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL, + i * hub->ctx_distance, tmp); + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, + i * hub->ctx_addr_distance, + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, + i * hub->ctx_addr_distance, + upper_32_bits(adev->vm_manager.max_pfn - 1)); } } static void mmhub_v2_0_program_invalidation(struct amdgpu_device *adev) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; unsigned i; for (i = 0; i < 18; ++i) { WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, - 2 * i, 0xffffffff); + i * hub->eng_addr_distance, 0xffffffff); WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_ADDR_RANGE_HI32, - 2 * i, 0x1f); + i * hub->eng_addr_distance, 0x1f); } } @@ -270,12 +286,14 @@ int mmhub_v2_0_gart_enable(struct amdgpu_device *adev) void mmhub_v2_0_gart_disable(struct amdgpu_device *adev) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; u32 tmp; u32 i; /* Disable all tables */ for (i = 0; i < 16; i++) - WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_CNTL, i, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_CNTL, + i * hub->ctx_distance, 0); /* Setup TLB control */ tmp = RREG32_SOC15(MMHUB, 0, mmMMMC_VM_MX_L1_TLB_CNTL); @@ -356,6 +374,13 @@ void mmhub_v2_0_init(struct amdgpu_device *adev) hub->vm_l2_pro_fault_cntl = SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL); + hub->ctx_distance = mmMMVM_CONTEXT1_CNTL - mmMMVM_CONTEXT0_CNTL; + hub->ctx_addr_distance = mmMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - + mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + hub->eng_distance = mmMMVM_INVALIDATE_ENG1_REQ - + mmMMVM_INVALIDATE_ENG0_REQ; + hub->eng_addr_distance = mmMMVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 - + mmMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32; } static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, @@ -363,9 +388,17 @@ static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *ad { uint32_t def, data, def1, data1; - def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG); - - def1 = data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2); + switch (adev->asic_type) { + case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: + def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid); + def1 = data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_Sienna_Cichlid); + break; + default: + def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG); + def1 = data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2); + break; + } if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) { data |= MM_ATC_L2_MISC_CG__ENABLE_MASK; @@ -388,11 +421,21 @@ static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *ad DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); } - if (def != data) - WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG, data); - - if (def1 != data1) - WREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2, data1); + switch (adev->asic_type) { + case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: + if (def != data) + WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid, data); + if (def1 != data1) + WREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_Sienna_Cichlid, data1); + break; + default: + if (def != data) + WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG, data); + if (def1 != data1) + WREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2, data1); + break; + } } static void mmhub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *adev, @@ -400,15 +443,32 @@ static void mmhub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *ade { uint32_t def, data; - def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG); + switch (adev->asic_type) { + case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: + def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid); + break; + default: + def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG); + break; + } if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS)) data |= MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; else data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; - if (def != data) - WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG, data); + if (def != data) { + switch (adev->asic_type) { + case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: + WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid, data); + break; + default: + WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG, data); + break; + } + } } int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev, @@ -421,6 +481,8 @@ int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev, case CHIP_NAVI10: case CHIP_NAVI14: case CHIP_NAVI12: + case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: mmhub_v2_0_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE); mmhub_v2_0_update_medium_grain_light_sleep(adev, @@ -440,9 +502,17 @@ void mmhub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags) if (amdgpu_sriov_vf(adev)) *flags = 0; - data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG); - - data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2); + switch (adev->asic_type) { + case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: + data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid); + data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_Sienna_Cichlid); + break; + default: + data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG); + data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2); + break; + } /* AMD_CG_SUPPORT_MC_MGCG */ if ((data & MM_ATC_L2_MISC_CG__ENABLE_MASK) && diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index c0e3efcb09bf..9979f54fef57 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -57,20 +57,16 @@ u64 mmhub_v9_4_get_fb_location(struct amdgpu_device *adev) static void mmhub_v9_4_setup_hubid_vm_pt_regs(struct amdgpu_device *adev, int hubid, uint32_t vmid, uint64_t value) { - /* two registers distance between mmVML2VC0_VM_CONTEXT0_* to - * mmVML2VC0_VM_CONTEXT1_* - */ - int dist = mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - - mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, - dist * vmid + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + hub->ctx_addr_distance * vmid + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, lower_32_bits(value)); WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, - dist * vmid + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + hub->ctx_addr_distance * vmid + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, upper_32_bits(value)); } @@ -301,6 +297,7 @@ static void mmhub_v9_4_disable_identity_aperture(struct amdgpu_device *adev, static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; uint32_t tmp; int i; @@ -335,21 +332,25 @@ static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid) RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, !amdgpu_noretry); WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_CNTL, - hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i, - tmp); + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + + i * hub->ctx_distance, tmp); WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, - hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2, 0); + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + + i * hub->ctx_addr_distance, 0); WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, - hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2, 0); + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + + i * hub->ctx_addr_distance, 0); WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, - hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + + i * hub->ctx_addr_distance, lower_32_bits(adev->vm_manager.max_pfn - 1)); WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, - hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + + i * hub->ctx_addr_distance, upper_32_bits(adev->vm_manager.max_pfn - 1)); } } @@ -357,16 +358,19 @@ static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid) static void mmhub_v9_4_program_invalidation(struct amdgpu_device *adev, int hubid) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; unsigned i; for (i = 0; i < 18; ++i) { WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_INVALIDATE_ENG0_ADDR_RANGE_LO32, - hubid * MMHUB_INSTANCE_REGISTER_OFFSET + 2 * i, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + + i * hub->eng_addr_distance, 0xffffffff); WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_INVALIDATE_ENG0_ADDR_RANGE_HI32, - hubid * MMHUB_INSTANCE_REGISTER_OFFSET + 2 * i, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + + i * hub->eng_addr_distance, 0x1f); } } @@ -395,6 +399,7 @@ int mmhub_v9_4_gart_enable(struct amdgpu_device *adev) void mmhub_v9_4_gart_disable(struct amdgpu_device *adev) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; u32 tmp; u32 i, j; @@ -404,7 +409,7 @@ void mmhub_v9_4_gart_disable(struct amdgpu_device *adev) WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT0_CNTL, j * MMHUB_INSTANCE_REGISTER_OFFSET + - i, 0); + i * hub->ctx_distance, 0); /* Setup TLB control */ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, @@ -534,6 +539,15 @@ void mmhub_v9_4_init(struct amdgpu_device *adev) SOC15_REG_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL) + i * MMHUB_INSTANCE_REGISTER_OFFSET; + + hub[i]->ctx_distance = mmVML2VC0_VM_CONTEXT1_CNTL - + mmVML2VC0_VM_CONTEXT0_CNTL; + hub[i]->ctx_addr_distance = mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - + mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + hub[i]->eng_distance = mmVML2VC0_VM_INVALIDATE_ENG1_REQ - + mmVML2VC0_VM_INVALIDATE_ENG0_REQ; + hub[i]->eng_addr_distance = mmVML2VC0_VM_INVALIDATE_ENG1_ADDR_RANGE_LO32 - + mmVML2VC0_VM_INVALIDATE_ENG0_ADDR_RANGE_LO32; } } diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v3_0.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v3_0.h new file mode 100644 index 000000000000..3e4e858a6965 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v3_0.h @@ -0,0 +1,130 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __MMSCH_V3_0_H__ +#define __MMSCH_V3_0_H__ + +#include "amdgpu_vcn.h" + +#define MMSCH_VERSION_MAJOR 3 +#define MMSCH_VERSION_MINOR 0 +#define MMSCH_VERSION (MMSCH_VERSION_MAJOR << 16 | MMSCH_VERSION_MINOR) + +enum mmsch_v3_0_command_type { + MMSCH_COMMAND__DIRECT_REG_WRITE = 0, + MMSCH_COMMAND__DIRECT_REG_POLLING = 2, + MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE = 3, + MMSCH_COMMAND__INDIRECT_REG_WRITE = 8, + MMSCH_COMMAND__END = 0xf +}; + +struct mmsch_v3_0_table_info { + uint32_t init_status; + uint32_t table_offset; + uint32_t table_size; +}; + +struct mmsch_v3_0_init_header { + uint32_t version; + uint32_t total_size; + struct mmsch_v3_0_table_info inst[AMDGPU_MAX_VCN_INSTANCES]; +}; + +struct mmsch_v3_0_cmd_direct_reg_header { + uint32_t reg_offset : 28; + uint32_t command_type : 4; +}; + +struct mmsch_v3_0_cmd_indirect_reg_header { + uint32_t reg_offset : 20; + uint32_t reg_idx_space : 8; + uint32_t command_type : 4; +}; + +struct mmsch_v3_0_cmd_direct_write { + struct mmsch_v3_0_cmd_direct_reg_header cmd_header; + uint32_t reg_value; +}; + +struct mmsch_v3_0_cmd_direct_read_modify_write { + struct mmsch_v3_0_cmd_direct_reg_header cmd_header; + uint32_t write_data; + uint32_t mask_value; +}; + +struct mmsch_v3_0_cmd_direct_polling { + struct mmsch_v3_0_cmd_direct_reg_header cmd_header; + uint32_t mask_value; + uint32_t wait_value; +}; + +struct mmsch_v3_0_cmd_end { + struct mmsch_v3_0_cmd_direct_reg_header cmd_header; +}; + +struct mmsch_v3_0_cmd_indirect_write { + struct mmsch_v3_0_cmd_indirect_reg_header cmd_header; + uint32_t reg_value; +}; + +#define MMSCH_V3_0_INSERT_DIRECT_RD_MOD_WT(reg, mask, data) { \ + size = sizeof(struct mmsch_v3_0_cmd_direct_read_modify_write); \ + size_dw = size / 4; \ + direct_rd_mod_wt.cmd_header.reg_offset = reg; \ + direct_rd_mod_wt.mask_value = mask; \ + direct_rd_mod_wt.write_data = data; \ + memcpy((void *)table_loc, &direct_rd_mod_wt, size); \ + table_loc += size_dw; \ + table_size += size_dw; \ +} + +#define MMSCH_V3_0_INSERT_DIRECT_WT(reg, value) { \ + size = sizeof(struct mmsch_v3_0_cmd_direct_write); \ + size_dw = size / 4; \ + direct_wt.cmd_header.reg_offset = reg; \ + direct_wt.reg_value = value; \ + memcpy((void *)table_loc, &direct_wt, size); \ + table_loc += size_dw; \ + table_size += size_dw; \ +} + +#define MMSCH_V3_0_INSERT_DIRECT_POLL(reg, mask, wait) { \ + size = sizeof(struct mmsch_v3_0_cmd_direct_polling); \ + size_dw = size / 4; \ + direct_poll.cmd_header.reg_offset = reg; \ + direct_poll.mask_value = mask; \ + direct_poll.wait_value = wait; \ + memcpy((void *)table_loc, &direct_poll, size); \ + table_loc += size_dw; \ + table_size += size_dw; \ +} + +#define MMSCH_V3_0_INSERT_END() { \ + size = sizeof(struct mmsch_v3_0_cmd_end); \ + size_dw = size / 4; \ + memcpy((void *)table_loc, &end, size); \ + table_loc += size_dw; \ + table_size += size_dw; \ +} + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index f97857ed3c7e..350f1bf063c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -34,9 +34,54 @@ #define MAX_REARM_RETRY 10 +#define mmIH_CHICKEN_Sienna_Cichlid 0x018d +#define mmIH_CHICKEN_Sienna_Cichlid_BASE_IDX 0 + static void navi10_ih_set_interrupt_funcs(struct amdgpu_device *adev); /** + * force_update_wptr_for_self_int - Force update the wptr for self interrupt + * + * @adev: amdgpu_device pointer + * @threshold: threshold to trigger the wptr reporting + * @timeout: timeout to trigger the wptr reporting + * @enabled: Enable/disable timeout flush mechanism + * + * threshold input range: 0 ~ 15, default 0, + * real_threshold = 2^threshold + * timeout input range: 0 ~ 20, default 8, + * real_timeout = (2^timeout) * 1024 / (socclk_freq) + * + * Force update wptr for self interrupt ( >= SIENNA_CICHLID). + */ +static void +force_update_wptr_for_self_int(struct amdgpu_device *adev, + u32 threshold, u32 timeout, bool enabled) +{ + u32 ih_cntl, ih_rb_cntl; + + if (adev->asic_type < CHIP_SIENNA_CICHLID) + return; + + ih_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_CNTL2); + ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1); + + ih_cntl = REG_SET_FIELD(ih_cntl, IH_CNTL2, + SELF_IV_FORCE_WPTR_UPDATE_TIMEOUT, timeout); + ih_cntl = REG_SET_FIELD(ih_cntl, IH_CNTL2, + SELF_IV_FORCE_WPTR_UPDATE_ENABLE, enabled); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1, + RB_USED_INT_THRESHOLD, threshold); + + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl); + ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2, + RB_USED_INT_THRESHOLD, threshold); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl); + WREG32_SOC15(OSSSYS, 0, mmIH_CNTL2, ih_cntl); +} + +/** * navi10_ih_enable_interrupts - Enable the interrupt ring buffer * * @adev: amdgpu_device pointer @@ -265,10 +310,21 @@ static int navi10_ih_irq_init(struct amdgpu_device *adev) if (unlikely(adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT)) { if (ih->use_bus_addr) { - ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN); - ih_chicken = REG_SET_FIELD(ih_chicken, - IH_CHICKEN, MC_SPACE_GPA_ENABLE, 1); - WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN, ih_chicken); + switch (adev->asic_type) { + case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: + ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN_Sienna_Cichlid); + ih_chicken = REG_SET_FIELD(ih_chicken, + IH_CHICKEN, MC_SPACE_GPA_ENABLE, 1); + WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN_Sienna_Cichlid, ih_chicken); + break; + default: + ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN); + ih_chicken = REG_SET_FIELD(ih_chicken, + IH_CHICKEN, MC_SPACE_GPA_ENABLE, 1); + WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN, ih_chicken); + break; + } } } @@ -357,6 +413,8 @@ static int navi10_ih_irq_init(struct amdgpu_device *adev) /* enable interrupts */ navi10_ih_enable_interrupts(adev); + /* enable wptr force update for self int */ + force_update_wptr_for_self_int(adev, 0, 8, true); return 0; } @@ -370,6 +428,7 @@ static int navi10_ih_irq_init(struct amdgpu_device *adev) */ static void navi10_ih_irq_disable(struct amdgpu_device *adev) { + force_update_wptr_for_self_int(adev, 0, 8, false); navi10_ih_disable_interrupts(adev); /* Wait and acknowledge irq */ diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c index cbcf04578b99..7429f30398b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -33,6 +33,13 @@ #define smnCPM_CONTROL 0x11180460 #define smnPCIE_CNTL2 0x11180070 +#define mmBIF_SDMA2_DOORBELL_RANGE 0x01d6 +#define mmBIF_SDMA2_DOORBELL_RANGE_BASE_IDX 2 +#define mmBIF_SDMA3_DOORBELL_RANGE 0x01d7 +#define mmBIF_SDMA3_DOORBELL_RANGE_BASE_IDX 2 + +#define mmBIF_MMSCH1_DOORBELL_RANGE 0x01d8 +#define mmBIF_MMSCH1_DOORBELL_RANGE_BASE_IDX 2 static void nbio_v2_3_remap_hdp_registers(struct amdgpu_device *adev) { @@ -81,7 +88,9 @@ static void nbio_v2_3_sdma_doorbell_range(struct amdgpu_device *adev, int instan int doorbell_size) { u32 reg = instance == 0 ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE) : - SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE); + instance == 1 ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE) : + instance == 2 ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA2_DOORBELL_RANGE) : + SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA3_DOORBELL_RANGE); u32 doorbell_range = RREG32(reg); @@ -103,7 +112,8 @@ static void nbio_v2_3_sdma_doorbell_range(struct amdgpu_device *adev, int instan static void nbio_v2_3_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell, int doorbell_index, int instance) { - u32 reg = SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH0_DOORBELL_RANGE); + u32 reg = instance ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH1_DOORBELL_RANGE) : + SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH0_DOORBELL_RANGE); u32 doorbell_range = RREG32(reg); diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 6655dd2009b6..da8024c2826e 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -41,6 +41,7 @@ #include "hdp/hdp_5_0_0_offset.h" #include "hdp/hdp_5_0_0_sh_mask.h" #include "smuio/smuio_11_0_0_offset.h" +#include "mp/mp_11_0_offset.h" #include "soc15.h" #include "soc15_common.h" @@ -52,8 +53,11 @@ #include "navi10_ih.h" #include "gfx_v10_0.h" #include "sdma_v5_0.h" +#include "sdma_v5_2.h" #include "vcn_v2_0.h" #include "jpeg_v2_0.h" +#include "vcn_v3_0.h" +#include "jpeg_v3_0.h" #include "dce_virtual.h" #include "mes_v10_1.h" #include "mxgpu_nv.h" @@ -93,6 +97,49 @@ static void nv_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v) spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); } +static u64 nv_pcie_rreg64(struct amdgpu_device *adev, u32 reg) +{ + unsigned long flags, address, data; + u64 r; + address = adev->nbio.funcs->get_pcie_index_offset(adev); + data = adev->nbio.funcs->get_pcie_data_offset(adev); + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + /* read low 32 bit */ + WREG32(address, reg); + (void)RREG32(address); + r = RREG32(data); + + /* read high 32 bit*/ + WREG32(address, reg + 4); + (void)RREG32(address); + r |= ((u64)RREG32(data) << 32); + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); + return r; +} + +static void nv_pcie_wreg64(struct amdgpu_device *adev, u32 reg, u64 v) +{ + unsigned long flags, address, data; + + address = adev->nbio.funcs->get_pcie_index_offset(adev); + data = adev->nbio.funcs->get_pcie_data_offset(adev); + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + /* write low 32 bit */ + WREG32(address, reg); + (void)RREG32(address); + WREG32(data, (u32)(v & 0xffffffffULL)); + (void)RREG32(data); + + /* write high 32 bit */ + WREG32(address, reg + 4); + (void)RREG32(address); + WREG32(data, (u32)(v >> 32)); + (void)RREG32(data); + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); +} + static u32 nv_didt_rreg(struct amdgpu_device *adev, u32 reg) { unsigned long flags, address, data; @@ -188,10 +235,8 @@ static struct soc15_allowed_register_entry nv_allowed_read_registers[] = { { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS_SE1)}, { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS_SE2)}, { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS_SE3)}, -#if 0 /* TODO: will set it when SDMA header is available */ { SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_STATUS_REG)}, { SOC15_REG_ENTRY(SDMA1, 0, mmSDMA1_STATUS_REG)}, -#endif { SOC15_REG_ENTRY(GC, 0, mmCP_STAT)}, { SOC15_REG_ENTRY(GC, 0, mmCP_STALLED_STAT1)}, { SOC15_REG_ENTRY(GC, 0, mmCP_STALLED_STAT2)}, @@ -256,31 +301,6 @@ static int nv_read_register(struct amdgpu_device *adev, u32 se_num, return -EINVAL; } -#if 0 -static void nv_gpu_pci_config_reset(struct amdgpu_device *adev) -{ - u32 i; - - dev_info(adev->dev, "GPU pci config reset\n"); - - /* disable BM */ - pci_clear_master(adev->pdev); - /* reset */ - amdgpu_pci_config_reset(adev); - - udelay(100); - - /* wait for asic to come out of reset */ - for (i = 0; i < adev->usec_timeout; i++) { - u32 memsize = nbio_v2_3_get_memsize(adev); - if (memsize != 0xffffffff) - break; - udelay(1); - } - -} -#endif - static int nv_asic_mode1_reset(struct amdgpu_device *adev) { u32 i; @@ -288,17 +308,21 @@ static int nv_asic_mode1_reset(struct amdgpu_device *adev) amdgpu_atombios_scratch_regs_engine_hung(adev, true); - dev_info(adev->dev, "GPU mode1 reset\n"); - /* disable BM */ pci_clear_master(adev->pdev); pci_save_state(adev->pdev); - ret = psp_gpu_reset(adev); + if (amdgpu_dpm_is_mode1_reset_supported(adev)) { + dev_info(adev->dev, "GPU smu mode1 reset\n"); + ret = amdgpu_dpm_mode1_reset(adev); + } else { + dev_info(adev->dev, "GPU psp mode1 reset\n"); + ret = psp_gpu_reset(adev); + } + if (ret) dev_err(adev->dev, "GPU mode1 reset failed\n"); - pci_restore_state(adev->pdev); /* wait for asic to come out of reset */ @@ -330,36 +354,41 @@ nv_asic_reset_method(struct amdgpu_device *adev) { struct smu_context *smu = &adev->smu; - if (!amdgpu_sriov_vf(adev) && smu_baco_is_support(smu)) - return AMD_RESET_METHOD_BACO; - else + if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 || + amdgpu_reset_method == AMD_RESET_METHOD_BACO) + return amdgpu_reset_method; + + if (amdgpu_reset_method != -1) + dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n", + amdgpu_reset_method); + + switch (adev->asic_type) { + case CHIP_SIENNA_CICHLID: return AMD_RESET_METHOD_MODE1; + default: + if (smu_baco_is_support(smu)) + return AMD_RESET_METHOD_BACO; + else + return AMD_RESET_METHOD_MODE1; + } } static int nv_asic_reset(struct amdgpu_device *adev) { - - /* FIXME: it doesn't work since vega10 */ -#if 0 - amdgpu_atombios_scratch_regs_engine_hung(adev, true); - - nv_gpu_pci_config_reset(adev); - - amdgpu_atombios_scratch_regs_engine_hung(adev, false); -#endif int ret = 0; struct smu_context *smu = &adev->smu; if (nv_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { + dev_info(adev->dev, "GPU BACO reset\n"); + ret = smu_baco_enter(smu); if (ret) return ret; ret = smu_baco_exit(smu); if (ret) return ret; - } else { + } else ret = nv_asic_mode1_reset(adev); - } return ret; } @@ -442,6 +471,10 @@ legacy_init: case CHIP_NAVI12: navi12_reg_base_init(adev); break; + case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: + sienna_cichlid_reg_base_init(adev); + break; default: return -EINVAL; } @@ -449,6 +482,11 @@ legacy_init: return 0; } +void nv_set_virt_ops(struct amdgpu_device *adev) +{ + adev->virt.ops = &xgpu_nv_virt_ops; +} + int nv_set_ip_blocks(struct amdgpu_device *adev) { int r; @@ -456,11 +494,8 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) adev->nbio.funcs = &nbio_v2_3_funcs; adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg; - if (amdgpu_sriov_vf(adev)) { - adev->virt.ops = &xgpu_nv_virt_ops; - /* try send GPU_INIT_DATA request to host */ - amdgpu_virt_request_init_data(adev); - } + if (adev->asic_type == CHIP_SIENNA_CICHLID) + adev->gmc.xgmi.supported = true; /* Set IP register base before any HW register access */ r = nv_reg_base_init(adev); @@ -515,6 +550,53 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) if (!amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block); break; + case CHIP_SIENNA_CICHLID: + amdgpu_device_ip_block_add(adev, &nv_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block); + amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); + if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) + amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && + is_support_sw_smu(adev) && !amdgpu_sriov_vf(adev)) + amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); + if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); +#if defined(CONFIG_DRM_AMD_DC) + else if (amdgpu_device_has_dc_support(adev)) + amdgpu_device_ip_block_add(adev, &dm_ip_block); +#endif + amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v5_2_ip_block); + amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block); + if (!amdgpu_sriov_vf(adev)) + amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block); + + if (adev->enable_mes) + amdgpu_device_ip_block_add(adev, &mes_v10_1_ip_block); + break; + case CHIP_NAVY_FLOUNDER: + amdgpu_device_ip_block_add(adev, &nv_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block); + amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); + if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) + amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && + is_support_sw_smu(adev)) + amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); + if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); +#if defined(CONFIG_DRM_AMD_DC) + else if (amdgpu_device_has_dc_support(adev)) + amdgpu_device_ip_block_add(adev, &dm_ip_block); +#endif + amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v5_2_ip_block); + amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block); + amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block); + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT && + is_support_sw_smu(adev)) + amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); + break; default: return -EINVAL; } @@ -550,7 +632,6 @@ static bool nv_need_full_reset(struct amdgpu_device *adev) static bool nv_need_reset_on_init(struct amdgpu_device *adev) { -#if 0 u32 sol_reg; if (adev->flags & AMD_IS_APU) @@ -562,8 +643,7 @@ static bool nv_need_reset_on_init(struct amdgpu_device *adev) sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); if (sol_reg) return true; -#endif - /* TODO: re-enable it when mode1 reset is functional */ + return false; } @@ -592,8 +672,11 @@ static void nv_init_doorbell_index(struct amdgpu_device *adev) adev->doorbell_index.userqueue_end = AMDGPU_NAVI10_DOORBELL_USERQUEUE_END; adev->doorbell_index.gfx_ring0 = AMDGPU_NAVI10_DOORBELL_GFX_RING0; adev->doorbell_index.gfx_ring1 = AMDGPU_NAVI10_DOORBELL_GFX_RING1; + adev->doorbell_index.mes_ring = AMDGPU_NAVI10_DOORBELL_MES_RING; adev->doorbell_index.sdma_engine[0] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0; adev->doorbell_index.sdma_engine[1] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE1; + adev->doorbell_index.sdma_engine[2] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE2; + adev->doorbell_index.sdma_engine[3] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE3; adev->doorbell_index.ih = AMDGPU_NAVI10_DOORBELL_IH; adev->doorbell_index.vcn.vcn_ring0_1 = AMDGPU_NAVI10_DOORBELL64_VCN0_1; adev->doorbell_index.vcn.vcn_ring2_3 = AMDGPU_NAVI10_DOORBELL64_VCN2_3; @@ -638,6 +721,8 @@ static int nv_common_early_init(void *handle) adev->smc_wreg = NULL; adev->pcie_rreg = &nv_pcie_rreg; adev->pcie_wreg = &nv_pcie_wreg; + adev->pcie_rreg64 = &nv_pcie_rreg64; + adev->pcie_wreg64 = &nv_pcie_wreg64; /* TODO: will add them during VCN v2 implementation */ adev->uvd_ctx_rreg = NULL; @@ -723,6 +808,48 @@ static int nv_common_early_init(void *handle) adev->rev_id = 0; adev->external_rev_id = adev->rev_id + 0xa; break; + case CHIP_SIENNA_CICHLID: + adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_3D_CGCG | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_VCN_MGCG | + AMD_CG_SUPPORT_JPEG_MGCG | + AMD_CG_SUPPORT_HDP_MGCG | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_IH_CG | + AMD_CG_SUPPORT_MC_LS; + adev->pg_flags = AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_VCN_DPG | + AMD_PG_SUPPORT_JPEG | + AMD_PG_SUPPORT_ATHUB | + AMD_PG_SUPPORT_MMHUB; + if (amdgpu_sriov_vf(adev)) { + /* hypervisor control CG and PG enablement */ + adev->cg_flags = 0; + adev->pg_flags = 0; + } + adev->external_rev_id = adev->rev_id + 0x28; + break; + case CHIP_NAVY_FLOUNDER: + adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_3D_CGCG | + AMD_CG_SUPPORT_VCN_MGCG | + AMD_CG_SUPPORT_JPEG_MGCG | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_HDP_MGCG | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_IH_CG; + adev->pg_flags = AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_VCN_DPG | + AMD_PG_SUPPORT_JPEG | + AMD_PG_SUPPORT_ATHUB | + AMD_PG_SUPPORT_MMHUB; + adev->external_rev_id = adev->rev_id + 0x32; + break; + default: /* FIXME: not supported yet */ return -EINVAL; @@ -889,6 +1016,16 @@ static void nv_update_hdp_mem_power_gating(struct amdgpu_device *adev, RC_MEM_POWER_DS_EN, enable); } + /* confirmed that IPH_MEM_POWER_CTRL_EN and RC_MEM_POWER_CTRL_EN have to + * be set for SRAM LS/DS/SD */ + if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_DS | + AMD_CG_SUPPORT_HDP_SD)) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + IPH_MEM_POWER_CTRL_EN, 1); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_CTRL_EN, 1); + } + WREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl); /* restore IPH & RC clock override after clock/power mode changing */ @@ -938,6 +1075,8 @@ static int nv_common_set_clockgating_state(void *handle, case CHIP_NAVI10: case CHIP_NAVI14: case CHIP_NAVI12: + case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: adev->nbio.funcs->update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE); adev->nbio.funcs->update_medium_grain_light_sleep(adev, diff --git a/drivers/gpu/drm/amd/amdgpu/nv.h b/drivers/gpu/drm/amd/amdgpu/nv.h index 82e6cb432f3d..aeef50a6a54b 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.h +++ b/drivers/gpu/drm/amd/amdgpu/nv.h @@ -28,8 +28,10 @@ void nv_grbm_select(struct amdgpu_device *adev, u32 me, u32 pipe, u32 queue, u32 vmid); +void nv_set_virt_ops(struct amdgpu_device *adev); int nv_set_ip_blocks(struct amdgpu_device *adev); int navi10_reg_base_init(struct amdgpu_device *adev); int navi14_reg_base_init(struct amdgpu_device *adev); int navi12_reg_base_init(struct amdgpu_device *adev); +int sienna_cichlid_reg_base_init(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index a44fd6060d5b..cbc04a5c0fe1 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -110,6 +110,7 @@ enum psp_gfx_cmd_id GFX_CMD_ID_SETUP_VMR = 0x00000009, /* setup VMR region */ GFX_CMD_ID_DESTROY_VMR = 0x0000000A, /* destroy VMR region */ GFX_CMD_ID_PROG_REG = 0x0000000B, /* program regs */ + GFX_CMD_ID_CLEAR_VF_FW = 0x0000000D, /* Clear VF FW, to be used on VF shutdown. */ /* IDs upto 0x1F are reserved for older programs (Raven, Vega 10/12/20) */ GFX_CMD_ID_LOAD_TOC = 0x00000020, /* Load TOC and obtain TMR size */ GFX_CMD_ID_AUTOLOAD_RLC = 0x00000021, /* Indicates all graphics fw loaded, start RLC autoload */ @@ -365,4 +366,11 @@ struct psp_gfx_rb_frame /* total 64 bytes */ }; +#define PSP_ERR_UNKNOWN_COMMAND 0x00000100 + +enum tee_error_code { + TEE_SUCCESS = 0x00000000, + TEE_ERROR_NOT_SUPPORTED = 0xFFFF000A, +}; + #endif /* _PSP_TEE_GFX_IF_H_ */ diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index 1de89cc3c355..d488d250805d 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -55,6 +55,10 @@ MODULE_FIRMWARE("amdgpu/navi12_ta.bin"); MODULE_FIRMWARE("amdgpu/arcturus_sos.bin"); MODULE_FIRMWARE("amdgpu/arcturus_asd.bin"); MODULE_FIRMWARE("amdgpu/arcturus_ta.bin"); +MODULE_FIRMWARE("amdgpu/sienna_cichlid_sos.bin"); +MODULE_FIRMWARE("amdgpu/sienna_cichlid_ta.bin"); +MODULE_FIRMWARE("amdgpu/navy_flounder_sos.bin"); +MODULE_FIRMWARE("amdgpu/navy_flounder_asd.bin"); /* address block */ #define smnMP1_FIRMWARE_FLAGS 0x3010024 @@ -95,6 +99,12 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) case CHIP_ARCTURUS: chip_name = "arcturus"; break; + case CHIP_SIENNA_CICHLID: + chip_name = "sienna_cichlid"; + break; + case CHIP_NAVY_FLOUNDER: + chip_name = "navy_flounder"; + break; default: BUG(); } @@ -103,9 +113,12 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) if (err) return err; - err = psp_init_asd_microcode(psp, chip_name); - if (err) - return err; + if (adev->asic_type != CHIP_SIENNA_CICHLID && + adev->asic_type != CHIP_NAVY_FLOUNDER) { + err = psp_init_asd_microcode(psp, chip_name); + if (err) + return err; + } switch (adev->asic_type) { case CHIP_VEGA20: @@ -165,6 +178,13 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) le32_to_cpu(ta_hdr->ta_dtm_offset_bytes); } break; + case CHIP_SIENNA_CICHLID: + err = psp_init_ta_microcode(&adev->psp, chip_name); + if (err) + return err; + break; + case CHIP_NAVY_FLOUNDER: + break; default: BUG(); } @@ -243,6 +263,39 @@ static int psp_v11_0_bootloader_load_kdb(struct psp_context *psp) return ret; } +static int psp_v11_0_bootloader_load_spl(struct psp_context *psp) +{ + int ret; + uint32_t psp_gfxdrv_command_reg = 0; + struct amdgpu_device *adev = psp->adev; + + /* Check tOS sign of life register to confirm sys driver and sOS + * are already been loaded. + */ + if (psp_v11_0_is_sos_alive(psp)) + return 0; + + ret = psp_v11_0_wait_for_bootloader(psp); + if (ret) + return ret; + + memset(psp->fw_pri_buf, 0, PSP_1_MEG); + + /* Copy PSP SPL binary to memory */ + memcpy(psp->fw_pri_buf, psp->spl_start_addr, psp->spl_bin_size); + + /* Provide the PSP SPL to bootloader */ + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, + (uint32_t)(psp->fw_pri_mc_addr >> 20)); + psp_gfxdrv_command_reg = PSP_BL__LOAD_TOS_SPL_TABLE; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, + psp_gfxdrv_command_reg); + + ret = psp_v11_0_wait_for_bootloader(psp); + + return ret; +} + static int psp_v11_0_bootloader_load_sysdrv(struct psp_context *psp) { int ret; @@ -354,7 +407,9 @@ static int psp_v11_0_ring_init(struct psp_context *psp, struct psp_ring *ring; struct amdgpu_device *adev = psp->adev; - if (!amdgpu_sriov_vf(adev)) + if ((!amdgpu_sriov_vf(adev)) && + (adev->asic_type != CHIP_SIENNA_CICHLID) && + (adev->asic_type != CHIP_NAVY_FLOUNDER)) psp_v11_0_reroute_ih(psp); ring = &psp->km_ring; @@ -555,44 +610,6 @@ static int psp_v11_0_memory_training_send_msg(struct psp_context *psp, int msg) return ret; } -static void psp_v11_0_memory_training_fini(struct psp_context *psp) -{ - struct psp_memory_training_context *ctx = &psp->mem_train_ctx; - - ctx->init = PSP_MEM_TRAIN_NOT_SUPPORT; - kfree(ctx->sys_cache); - ctx->sys_cache = NULL; -} - -static int psp_v11_0_memory_training_init(struct psp_context *psp) -{ - int ret; - struct psp_memory_training_context *ctx = &psp->mem_train_ctx; - - if (ctx->init != PSP_MEM_TRAIN_RESERVE_SUCCESS) { - DRM_DEBUG("memory training is not supported!\n"); - return 0; - } - - ctx->sys_cache = kzalloc(ctx->train_data_size, GFP_KERNEL); - if (ctx->sys_cache == NULL) { - DRM_ERROR("alloc mem_train_ctx.sys_cache failed!\n"); - ret = -ENOMEM; - goto Err_out; - } - - DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n", - ctx->train_data_size, - ctx->p2c_train_data_offset, - ctx->c2p_train_data_offset); - ctx->init = PSP_MEM_TRAIN_INIT_SUCCESS; - return 0; - -Err_out: - psp_v11_0_memory_training_fini(psp); - return ret; -} - /* * save and restore proces */ @@ -813,6 +830,7 @@ static int psp_v11_0_read_usbc_pd_fw(struct psp_context *psp, uint32_t *fw_ver) static const struct psp_funcs psp_v11_0_funcs = { .init_microcode = psp_v11_0_init_microcode, .bootloader_load_kdb = psp_v11_0_bootloader_load_kdb, + .bootloader_load_spl = psp_v11_0_bootloader_load_spl, .bootloader_load_sysdrv = psp_v11_0_bootloader_load_sysdrv, .bootloader_load_sos = psp_v11_0_bootloader_load_sos, .ring_init = psp_v11_0_ring_init, @@ -820,8 +838,6 @@ static const struct psp_funcs psp_v11_0_funcs = { .ring_stop = psp_v11_0_ring_stop, .ring_destroy = psp_v11_0_ring_destroy, .mode1_reset = psp_v11_0_mode1_reset, - .mem_training_init = psp_v11_0_memory_training_init, - .mem_training_fini = psp_v11_0_memory_training_fini, .mem_training = psp_v11_0_memory_training, .ring_get_wptr = psp_v11_0_ring_get_wptr, .ring_set_wptr = psp_v11_0_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_common.h b/drivers/gpu/drm/amd/amdgpu/sdma_common.h new file mode 100644 index 000000000000..8629ef7e8ad9 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/sdma_common.h @@ -0,0 +1,42 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __SDMA_COMMON_H__ +#define __SDMA_COMMON_H__ + +enum sdma_utcl2_cache_read_policy { + CACHE_READ_POLICY_L2__LRU = 0x00000000, + CACHE_READ_POLICY_L2__STREAM = 0x00000001, + CACHE_READ_POLICY_L2__NOA = 0x00000002, + CACHE_READ_POLICY_L2__DEFAULT = CACHE_READ_POLICY_L2__NOA, +}; + +enum sdma_utcl2_cache_write_policy { + CACHE_WRITE_POLICY_L2__LRU = 0x00000000, + CACHE_WRITE_POLICY_L2__STREAM = 0x00000001, + CACHE_WRITE_POLICY_L2__NOA = 0x00000002, + CACHE_WRITE_POLICY_L2__BYPASS = 0x00000003, + CACHE_WRITE_POLICY_L2__DEFAULT = CACHE_WRITE_POLICY_L2__BYPASS, +}; + +#endif /* __SDMA_COMMON_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 33501c6c7189..856c50386c86 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -505,6 +505,36 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev) } } +static void sdma_v4_0_setup_ulv(struct amdgpu_device *adev) +{ + int i; + + /* + * The only chips with SDMAv4 and ULV are VG10 and VG20. + * Server SKUs take a different hysteresis setting from other SKUs. + */ + switch (adev->asic_type) { + case CHIP_VEGA10: + if (adev->pdev->device == 0x6860) + break; + return; + case CHIP_VEGA20: + if (adev->pdev->device == 0x66a1) + break; + return; + default: + return; + } + + for (i = 0; i < adev->sdma.num_instances; i++) { + uint32_t temp; + + temp = RREG32_SDMA(i, mmSDMA0_ULV_CNTL); + temp = REG_SET_FIELD(temp, SDMA0_ULV_CNTL, HYSTERESIS, 0x0); + WREG32_SDMA(i, mmSDMA0_ULV_CNTL, temp); + } +} + static int sdma_v4_0_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst) { int err = 0; @@ -529,8 +559,8 @@ static void sdma_v4_0_destroy_inst_ctx(struct amdgpu_device *adev) int i; for (i = 0; i < adev->sdma.num_instances; i++) { - if (adev->sdma.instance[i].fw != NULL) - release_firmware(adev->sdma.instance[i].fw); + release_firmware(adev->sdma.instance[i].fw); + adev->sdma.instance[i].fw = NULL; /* arcturus shares the same FW memory across all SDMA isntances */ @@ -1774,7 +1804,7 @@ static int sdma_v4_0_early_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; - if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) + if (adev->flags & AMD_IS_APU) adev->sdma.num_instances = 1; else if (adev->asic_type == CHIP_ARCTURUS) adev->sdma.num_instances = 8; @@ -1813,6 +1843,8 @@ static int sdma_v4_0_late_init(void *handle) .cb = sdma_v4_0_process_ras_data_cb, }; + sdma_v4_0_setup_ulv(adev); + if (adev->sdma.funcs && adev->sdma.funcs->reset_ras_error_count) adev->sdma.funcs->reset_ras_error_count(adev); @@ -1912,9 +1944,7 @@ static int sdma_v4_0_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if ((adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->set_powergating_by_smu) || - (adev->asic_type == CHIP_RENOIR && !adev->in_gpu_reset)) + if (adev->flags & AMD_IS_APU) amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false); if (!amdgpu_sriov_vf(adev)) @@ -1941,9 +1971,7 @@ static int sdma_v4_0_hw_fini(void *handle) sdma_v4_0_ctx_switch_enable(adev, false); sdma_v4_0_enable(adev, false); - if ((adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs - && adev->powerplay.pp_funcs->set_powergating_by_smu) || - adev->asic_type == CHIP_RENOIR) + if (adev->flags & AMD_IS_APU) amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, true); return 0; @@ -2202,6 +2230,7 @@ static int sdma_v4_0_set_powergating_state(void *handle, switch (adev->asic_type) { case CHIP_RAVEN: + case CHIP_RENOIR: sdma_v4_1_update_power_gating(adev, state == AMD_PG_STATE_GATE ? true : false); break; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index b544baf306f2..e2232dd12d8e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -40,6 +40,7 @@ #include "soc15.h" #include "navi10_sdma_pkt_open.h" #include "nbio_v2_3.h" +#include "sdma_common.h" #include "sdma_v5_0.h" MODULE_FIRMWARE("amdgpu/navi10_sdma.bin"); @@ -314,30 +315,20 @@ static uint64_t sdma_v5_0_ring_get_rptr(struct amdgpu_ring *ring) static uint64_t sdma_v5_0_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - u64 *wptr = NULL; - uint64_t local_wptr = 0; + u64 wptr; if (ring->use_doorbell) { /* XXX check if swapping is necessary on BE */ - wptr = ((u64 *)&adev->wb.wb[ring->wptr_offs]); - DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", *wptr); - *wptr = (*wptr) >> 2; - DRM_DEBUG("wptr/doorbell after shift == 0x%016llx\n", *wptr); + wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs])); + DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr); } else { - u32 lowbit, highbit; - - wptr = &local_wptr; - lowbit = RREG32(sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR)) >> 2; - highbit = RREG32(sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2; - - DRM_DEBUG("wptr [%i]high== 0x%08x low==0x%08x\n", - ring->me, highbit, lowbit); - *wptr = highbit; - *wptr = (*wptr) << 32; - *wptr |= lowbit; + wptr = RREG32(sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI)); + wptr = wptr << 32; + wptr |= RREG32(sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR)); + DRM_DEBUG("wptr before shift [%i] wptr == 0x%016llx\n", ring->me, wptr); } - return *wptr; + return wptr >> 2; } /** @@ -484,7 +475,6 @@ static void sdma_v5_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) static void sdma_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, unsigned flags) { - struct amdgpu_device *adev = ring->adev; bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; /* write the fence */ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE) | @@ -507,8 +497,7 @@ static void sdma_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se amdgpu_ring_write(ring, upper_32_bits(seq)); } - /* Interrupt not work fine on GFX10.1 model yet. Use fallback instead */ - if ((flags & AMDGPU_FENCE_FLAG_INT) && adev->pdev->device != 0x50) { + if (flags & AMDGPU_FENCE_FLAG_INT) { /* generate an interrupt */ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP)); amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); @@ -886,10 +875,6 @@ static int sdma_v5_0_start(struct amdgpu_device *adev) r = sdma_v5_0_load_microcode(adev); if (r) return r; - - /* The value of mmSDMA_F32_CNTL is invalid the moment after loading fw */ - if (amdgpu_emu_mode == 1 && adev->pdev->device == 0x4d) - msleep(1000); } /* unhalt the MEs */ @@ -1298,8 +1283,12 @@ static int sdma_v5_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i; - for (i = 0; i < adev->sdma.num_instances; i++) + for (i = 0; i < adev->sdma.num_instances; i++) { + release_firmware(adev->sdma.instance[i].fw); + adev->sdma.instance[i].fw = NULL; + amdgpu_ring_fini(&adev->sdma.instance[i].ring); + } return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h index d5a94e3d181c..d4e3c2e696f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h @@ -24,21 +24,6 @@ #ifndef __SDMA_V5_0_H__ #define __SDMA_V5_0_H__ -enum sdma_v5_0_utcl2_cache_read_policy { - CACHE_READ_POLICY_L2__LRU = 0x00000000, - CACHE_READ_POLICY_L2__STREAM = 0x00000001, - CACHE_READ_POLICY_L2__NOA = 0x00000002, - CACHE_READ_POLICY_L2__DEFAULT = CACHE_READ_POLICY_L2__NOA, -}; - -enum sdma_v5_0_utcl2_cache_write_policy { - CACHE_WRITE_POLICY_L2__LRU = 0x00000000, - CACHE_WRITE_POLICY_L2__STREAM = 0x00000001, - CACHE_WRITE_POLICY_L2__NOA = 0x00000002, - CACHE_WRITE_POLICY_L2__BYPASS = 0x00000003, - CACHE_WRITE_POLICY_L2__DEFAULT = CACHE_WRITE_POLICY_L2__BYPASS, -}; - extern const struct amd_ip_funcs sdma_v5_0_ip_funcs; extern const struct amdgpu_ip_block_version sdma_v5_0_ip_block; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c new file mode 100644 index 000000000000..46a9617fee5f --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -0,0 +1,1776 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/delay.h> +#include <linux/firmware.h> +#include <linux/module.h> +#include <linux/pci.h> + +#include "amdgpu.h" +#include "amdgpu_ucode.h" +#include "amdgpu_trace.h" + +#include "gc/gc_10_3_0_offset.h" +#include "gc/gc_10_3_0_sh_mask.h" +#include "ivsrcid/sdma0/irqsrcs_sdma0_5_0.h" +#include "ivsrcid/sdma1/irqsrcs_sdma1_5_0.h" +#include "ivsrcid/sdma2/irqsrcs_sdma2_5_0.h" +#include "ivsrcid/sdma3/irqsrcs_sdma3_5_0.h" + +#include "soc15_common.h" +#include "soc15.h" +#include "navi10_sdma_pkt_open.h" +#include "nbio_v2_3.h" +#include "sdma_common.h" +#include "sdma_v5_2.h" + +MODULE_FIRMWARE("amdgpu/sienna_cichlid_sdma.bin"); +MODULE_FIRMWARE("amdgpu/navy_flounder_sdma.bin"); + +#define SDMA1_REG_OFFSET 0x600 +#define SDMA3_REG_OFFSET 0x400 +#define SDMA0_HYP_DEC_REG_START 0x5880 +#define SDMA0_HYP_DEC_REG_END 0x5893 +#define SDMA1_HYP_DEC_REG_OFFSET 0x20 + +static void sdma_v5_2_set_ring_funcs(struct amdgpu_device *adev); +static void sdma_v5_2_set_buffer_funcs(struct amdgpu_device *adev); +static void sdma_v5_2_set_vm_pte_funcs(struct amdgpu_device *adev); +static void sdma_v5_2_set_irq_funcs(struct amdgpu_device *adev); + +static u32 sdma_v5_2_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset) +{ + u32 base; + + if (internal_offset >= SDMA0_HYP_DEC_REG_START && + internal_offset <= SDMA0_HYP_DEC_REG_END) { + base = adev->reg_offset[GC_HWIP][0][1]; + if (instance != 0) + internal_offset += SDMA1_HYP_DEC_REG_OFFSET * instance; + } else { + if (instance < 2) { + base = adev->reg_offset[GC_HWIP][0][0]; + if (instance == 1) + internal_offset += SDMA1_REG_OFFSET; + } else { + base = adev->reg_offset[GC_HWIP][0][2]; + if (instance == 3) + internal_offset += SDMA3_REG_OFFSET; + } + } + + return base + internal_offset; +} + +static void sdma_v5_2_init_golden_registers(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: + break; + default: + break; + } +} + +static int sdma_v5_2_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst) +{ + int err = 0; + const struct sdma_firmware_header_v1_0 *hdr; + + err = amdgpu_ucode_validate(sdma_inst->fw); + if (err) + return err; + + hdr = (const struct sdma_firmware_header_v1_0 *)sdma_inst->fw->data; + sdma_inst->fw_version = le32_to_cpu(hdr->header.ucode_version); + sdma_inst->feature_version = le32_to_cpu(hdr->ucode_feature_version); + + if (sdma_inst->feature_version >= 20) + sdma_inst->burst_nop = true; + + return 0; +} + +static void sdma_v5_2_destroy_inst_ctx(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->sdma.num_instances; i++) { + release_firmware(adev->sdma.instance[i].fw); + adev->sdma.instance[i].fw = NULL; + + if (adev->asic_type == CHIP_SIENNA_CICHLID) + break; + } + + memset((void*)adev->sdma.instance, 0, + sizeof(struct amdgpu_sdma_instance) * AMDGPU_MAX_SDMA_INSTANCES); +} + +/** + * sdma_v5_2_init_microcode - load ucode images from disk + * + * @adev: amdgpu_device pointer + * + * Use the firmware interface to load the ucode images into + * the driver (not loaded into hw). + * Returns 0 on success, error on failure. + */ + +// emulation only, won't work on real chip +// navi10 real chip need to use PSP to load firmware +static int sdma_v5_2_init_microcode(struct amdgpu_device *adev) +{ + const char *chip_name; + char fw_name[40]; + int err = 0, i; + struct amdgpu_firmware_info *info = NULL; + const struct common_firmware_header *header = NULL; + + DRM_DEBUG("\n"); + + switch (adev->asic_type) { + case CHIP_SIENNA_CICHLID: + chip_name = "sienna_cichlid"; + break; + case CHIP_NAVY_FLOUNDER: + chip_name = "navy_flounder"; + break; + default: + BUG(); + } + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name); + + err = request_firmware(&adev->sdma.instance[0].fw, fw_name, adev->dev); + if (err) + goto out; + + err = sdma_v5_2_init_inst_ctx(&adev->sdma.instance[0]); + if (err) + goto out; + + for (i = 1; i < adev->sdma.num_instances; i++) { + if (adev->asic_type == CHIP_SIENNA_CICHLID || + adev->asic_type == CHIP_NAVY_FLOUNDER) { + memcpy((void*)&adev->sdma.instance[i], + (void*)&adev->sdma.instance[0], + sizeof(struct amdgpu_sdma_instance)); + } else { + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma%d.bin", chip_name, i); + err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev); + if (err) + goto out; + + err = sdma_v5_2_init_inst_ctx(&adev->sdma.instance[0]); + if (err) + goto out; + } + } + + DRM_DEBUG("psp_load == '%s'\n", + adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false"); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + for (i = 0; i < adev->sdma.num_instances; i++) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; + info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i; + info->fw = adev->sdma.instance[i].fw; + header = (const struct common_firmware_header *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + } + } + +out: + if (err) { + DRM_ERROR("sdma_v5_2: Failed to load firmware \"%s\"\n", fw_name); + sdma_v5_2_destroy_inst_ctx(adev); + } + return err; +} + +static unsigned sdma_v5_2_ring_init_cond_exec(struct amdgpu_ring *ring) +{ + unsigned ret; + + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COND_EXE)); + amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); + amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); + amdgpu_ring_write(ring, 1); + ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */ + amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */ + + return ret; +} + +static void sdma_v5_2_ring_patch_cond_exec(struct amdgpu_ring *ring, + unsigned offset) +{ + unsigned cur; + + BUG_ON(offset > ring->buf_mask); + BUG_ON(ring->ring[offset] != 0x55aa55aa); + + cur = (ring->wptr - 1) & ring->buf_mask; + if (cur > offset) + ring->ring[offset] = cur - offset; + else + ring->ring[offset] = (ring->buf_mask + 1) - offset + cur; +} + +/** + * sdma_v5_2_ring_get_rptr - get the current read pointer + * + * @ring: amdgpu ring pointer + * + * Get the current rptr from the hardware (NAVI10+). + */ +static uint64_t sdma_v5_2_ring_get_rptr(struct amdgpu_ring *ring) +{ + u64 *rptr; + + /* XXX check if swapping is necessary on BE */ + rptr = ((u64 *)&ring->adev->wb.wb[ring->rptr_offs]); + + DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr); + return ((*rptr) >> 2); +} + +/** + * sdma_v5_2_ring_get_wptr - get the current write pointer + * + * @ring: amdgpu ring pointer + * + * Get the current wptr from the hardware (NAVI10+). + */ +static uint64_t sdma_v5_2_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + u64 wptr; + + if (ring->use_doorbell) { + /* XXX check if swapping is necessary on BE */ + wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs])); + DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr); + } else { + wptr = RREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI)); + wptr = wptr << 32; + wptr |= RREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR)); + DRM_DEBUG("wptr before shift [%i] wptr == 0x%016llx\n", ring->me, wptr); + } + + return wptr >> 2; +} + +/** + * sdma_v5_2_ring_set_wptr - commit the write pointer + * + * @ring: amdgpu ring pointer + * + * Write the wptr back to the hardware (NAVI10+). + */ +static void sdma_v5_2_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + DRM_DEBUG("Setting write pointer\n"); + if (ring->use_doorbell) { + DRM_DEBUG("Using doorbell -- " + "wptr_offs == 0x%08x " + "lower_32_bits(ring->wptr) << 2 == 0x%08x " + "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", + ring->wptr_offs, + lower_32_bits(ring->wptr << 2), + upper_32_bits(ring->wptr << 2)); + /* XXX check if swapping is necessary on BE */ + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr << 2); + adev->wb.wb[ring->wptr_offs + 1] = upper_32_bits(ring->wptr << 2); + DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", + ring->doorbell_index, ring->wptr << 2); + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); + } else { + DRM_DEBUG("Not using doorbell -- " + "mmSDMA%i_GFX_RB_WPTR == 0x%08x " + "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", + ring->me, + lower_32_bits(ring->wptr << 2), + ring->me, + upper_32_bits(ring->wptr << 2)); + WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), + lower_32_bits(ring->wptr << 2)); + WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), + upper_32_bits(ring->wptr << 2)); + } +} + +static void sdma_v5_2_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) +{ + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); + int i; + + for (i = 0; i < count; i++) + if (sdma && sdma->burst_nop && (i == 0)) + amdgpu_ring_write(ring, ring->funcs->nop | + SDMA_PKT_NOP_HEADER_COUNT(count - 1)); + else + amdgpu_ring_write(ring, ring->funcs->nop); +} + +/** + * sdma_v5_2_ring_emit_ib - Schedule an IB on the DMA engine + * + * @ring: amdgpu ring pointer + * @ib: IB object to schedule + * + * Schedule an IB in the DMA ring. + */ +static void sdma_v5_2_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, + struct amdgpu_ib *ib, + uint32_t flags) +{ + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid); + + /* An IB packet must end on a 8 DW boundary--the next dword + * must be on a 8-dword boundary. Our IB packet below is 6 + * dwords long, thus add x number of NOPs, such that, in + * modular arithmetic, + * wptr + 6 + x = 8k, k >= 0, which in C is, + * (wptr + 6 + x) % 8 = 0. + * The expression below, is a solution of x. + */ + sdma_v5_2_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7); + + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | + SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf)); + /* base must be 32 byte aligned */ + amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, ib->length_dw); + amdgpu_ring_write(ring, lower_32_bits(csa_mc_addr)); + amdgpu_ring_write(ring, upper_32_bits(csa_mc_addr)); +} + +/** + * sdma_v5_2_ring_emit_hdp_flush - emit an hdp flush on the DMA ring + * + * @ring: amdgpu ring pointer + * + * Emit an hdp flush packet on the requested DMA ring. + */ +static void sdma_v5_2_ring_emit_hdp_flush(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + u32 ref_and_mask = 0; + const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; + + ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me; + + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | + SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) | + SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ + amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2); + amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2); + amdgpu_ring_write(ring, ref_and_mask); /* reference */ + amdgpu_ring_write(ring, ref_and_mask); /* mask */ + amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | + SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ +} + +/** + * sdma_v5_2_ring_emit_fence - emit a fence on the DMA ring + * + * @ring: amdgpu ring pointer + * @fence: amdgpu fence object + * + * Add a DMA fence packet to the ring to write + * the fence seq number and DMA trap packet to generate + * an interrupt if needed. + */ +static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, + unsigned flags) +{ + bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; + /* write the fence */ + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE) | + SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); /* Ucached(UC) */ + /* zero in first two bits */ + BUG_ON(addr & 0x3); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, lower_32_bits(seq)); + + /* optionally write high bits as well */ + if (write64bit) { + addr += 4; + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE) | + SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); + /* zero in first two bits */ + BUG_ON(addr & 0x3); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(seq)); + } + + if (flags & AMDGPU_FENCE_FLAG_INT) { + /* generate an interrupt */ + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP)); + amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); + } +} + + +/** + * sdma_v5_2_gfx_stop - stop the gfx async dma engines + * + * @adev: amdgpu_device pointer + * + * Stop the gfx async dma ring buffers. + */ +static void sdma_v5_2_gfx_stop(struct amdgpu_device *adev) +{ + struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring; + struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring; + struct amdgpu_ring *sdma2 = &adev->sdma.instance[2].ring; + struct amdgpu_ring *sdma3 = &adev->sdma.instance[3].ring; + u32 rb_cntl, ib_cntl; + int i; + + if ((adev->mman.buffer_funcs_ring == sdma0) || + (adev->mman.buffer_funcs_ring == sdma1) || + (adev->mman.buffer_funcs_ring == sdma2) || + (adev->mman.buffer_funcs_ring == sdma3)) + amdgpu_ttm_set_buffer_funcs_status(adev, false); + + for (i = 0; i < adev->sdma.num_instances; i++) { + rb_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + ib_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); + } + + sdma0->sched.ready = false; + sdma1->sched.ready = false; + sdma2->sched.ready = false; + sdma3->sched.ready = false; +} + +/** + * sdma_v5_2_rlc_stop - stop the compute async dma engines + * + * @adev: amdgpu_device pointer + * + * Stop the compute async dma queues. + */ +static void sdma_v5_2_rlc_stop(struct amdgpu_device *adev) +{ + /* XXX todo */ +} + +/** + * sdma_v_0_ctx_switch_enable - stop the async dma engines context switch + * + * @adev: amdgpu_device pointer + * @enable: enable/disable the DMA MEs context switch. + * + * Halt or unhalt the async dma engines context switch. + */ +static void sdma_v5_2_ctx_switch_enable(struct amdgpu_device *adev, bool enable) +{ + u32 f32_cntl, phase_quantum = 0; + int i; + + if (amdgpu_sdma_phase_quantum) { + unsigned value = amdgpu_sdma_phase_quantum; + unsigned unit = 0; + + while (value > (SDMA0_PHASE0_QUANTUM__VALUE_MASK >> + SDMA0_PHASE0_QUANTUM__VALUE__SHIFT)) { + value = (value + 1) >> 1; + unit++; + } + if (unit > (SDMA0_PHASE0_QUANTUM__UNIT_MASK >> + SDMA0_PHASE0_QUANTUM__UNIT__SHIFT)) { + value = (SDMA0_PHASE0_QUANTUM__VALUE_MASK >> + SDMA0_PHASE0_QUANTUM__VALUE__SHIFT); + unit = (SDMA0_PHASE0_QUANTUM__UNIT_MASK >> + SDMA0_PHASE0_QUANTUM__UNIT__SHIFT); + WARN_ONCE(1, + "clamping sdma_phase_quantum to %uK clock cycles\n", + value << unit); + } + phase_quantum = + value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT | + unit << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT; + } + + for (i = 0; i < adev->sdma.num_instances; i++) { + f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL)); + f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, + AUTO_CTXSW_ENABLE, enable ? 1 : 0); + if (enable && amdgpu_sdma_phase_quantum) { + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM), + phase_quantum); + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM), + phase_quantum); + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM), + phase_quantum); + } + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl); + } + +} + +/** + * sdma_v5_2_enable - stop the async dma engines + * + * @adev: amdgpu_device pointer + * @enable: enable/disable the DMA MEs. + * + * Halt or unhalt the async dma engines. + */ +static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable) +{ + u32 f32_cntl; + int i; + + if (enable == false) { + sdma_v5_2_gfx_stop(adev); + sdma_v5_2_rlc_stop(adev); + } + + for (i = 0; i < adev->sdma.num_instances; i++) { + f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL)); + f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1); + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl); + } +} + +/** + * sdma_v5_2_gfx_resume - setup and start the async dma engines + * + * @adev: amdgpu_device pointer + * + * Set up the gfx DMA ring buffers and enable them. + * Returns 0 for success, error for failure. + */ +static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + u32 rb_cntl, ib_cntl; + u32 rb_bufsz; + u32 wb_offset; + u32 doorbell; + u32 doorbell_offset; + u32 temp; + u32 wptr_poll_cntl; + u64 wptr_gpu_addr; + int i, r; + + for (i = 0; i < adev->sdma.num_instances; i++) { + ring = &adev->sdma.instance[i].ring; + wb_offset = (ring->rptr_offs * 4); + + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0); + + /* Set ring buffer size in dwords */ + rb_bufsz = order_base_2(ring->ring_size / 4); + rb_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz); +#ifdef __BIG_ENDIAN + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, + RPTR_WRITEBACK_SWAP_ENABLE, 1); +#endif + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + + /* Initialize the ring buffer's read and write pointers */ + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0); + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0); + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0); + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0); + + /* setup the wptr shadow polling */ + wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO), + lower_32_bits(wptr_gpu_addr)); + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI), + upper_32_bits(wptr_gpu_addr)); + wptr_poll_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, + mmSDMA0_GFX_RB_WPTR_POLL_CNTL)); + wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, + SDMA0_GFX_RB_WPTR_POLL_CNTL, + F32_POLL_ENABLE, 1); + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), + wptr_poll_cntl); + + /* set the wb address whether it's enabled or not */ + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI), + upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF); + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO), + lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC); + + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); + + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8); + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40); + + ring->wptr = 0; + + /* before programing wptr to a less value, need set minor_ptr_update first */ + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1); + + if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */ + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2); + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); + } + + doorbell = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL)); + doorbell_offset = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET)); + + if (ring->use_doorbell) { + doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1); + doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET, + OFFSET, ring->doorbell_index); + } else { + doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0); + } + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell); + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset); + + adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, + ring->doorbell_index, + adev->doorbell_index.sdma_doorbell_range); + + if (amdgpu_sriov_vf(adev)) + sdma_v5_2_ring_set_wptr(ring); + + /* set minor_ptr_update to 0 after wptr programed */ + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0); + + /* set utc l1 enable flag always to 1 */ + temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL)); + temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1); + + /* enable MCBP */ + temp = REG_SET_FIELD(temp, SDMA0_CNTL, MIDCMD_PREEMPT_ENABLE, 1); + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), temp); + + /* Set up RESP_MODE to non-copy addresses */ + temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL)); + temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3); + temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9); + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL), temp); + + /* program default cache read and write policy */ + temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE)); + /* clean read policy and write policy bits */ + temp &= 0xFF0FFF; + temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) | + (CACHE_WRITE_POLICY_L2__DEFAULT << 14) | + 0x01000000); + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp); + + if (!amdgpu_sriov_vf(adev)) { + /* unhalt engine */ + temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL)); + temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0); + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp); + } + + /* enable DMA RB */ + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1); + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + + ib_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1); +#ifdef __BIG_ENDIAN + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1); +#endif + /* enable DMA IBs */ + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); + + ring->sched.ready = true; + + if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */ + sdma_v5_2_ctx_switch_enable(adev, true); + sdma_v5_2_enable(adev, true); + } + + r = amdgpu_ring_test_ring(ring); + if (r) { + ring->sched.ready = false; + return r; + } + + if (adev->mman.buffer_funcs_ring == ring) + amdgpu_ttm_set_buffer_funcs_status(adev, true); + } + + return 0; +} + +/** + * sdma_v5_2_rlc_resume - setup and start the async dma engines + * + * @adev: amdgpu_device pointer + * + * Set up the compute DMA queues and enable them. + * Returns 0 for success, error for failure. + */ +static int sdma_v5_2_rlc_resume(struct amdgpu_device *adev) +{ + return 0; +} + +/** + * sdma_v5_2_load_microcode - load the sDMA ME ucode + * + * @adev: amdgpu_device pointer + * + * Loads the sDMA0/1/2/3 ucode. + * Returns 0 for success, -EINVAL if the ucode is not available. + */ +static int sdma_v5_2_load_microcode(struct amdgpu_device *adev) +{ + const struct sdma_firmware_header_v1_0 *hdr; + const __le32 *fw_data; + u32 fw_size; + int i, j; + + /* halt the MEs */ + sdma_v5_2_enable(adev, false); + + for (i = 0; i < adev->sdma.num_instances; i++) { + if (!adev->sdma.instance[i].fw) + return -EINVAL; + + hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data; + amdgpu_ucode_print_sdma_hdr(&hdr->header); + fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; + + fw_data = (const __le32 *) + (adev->sdma.instance[i].fw->data + + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), 0); + + for (j = 0; j < fw_size; j++) { + if (amdgpu_emu_mode == 1 && j % 500 == 0) + msleep(1); + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UCODE_DATA), le32_to_cpup(fw_data++)); + } + + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), adev->sdma.instance[i].fw_version); + } + + return 0; +} + +/** + * sdma_v5_2_start - setup and start the async dma engines + * + * @adev: amdgpu_device pointer + * + * Set up the DMA engines and enable them. + * Returns 0 for success, error for failure. + */ +static int sdma_v5_2_start(struct amdgpu_device *adev) +{ + int r = 0; + + if (amdgpu_sriov_vf(adev)) { + sdma_v5_2_ctx_switch_enable(adev, false); + sdma_v5_2_enable(adev, false); + + /* set RB registers */ + r = sdma_v5_2_gfx_resume(adev); + return r; + } + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { + r = sdma_v5_2_load_microcode(adev); + if (r) + return r; + + /* The value of mmSDMA_F32_CNTL is invalid the moment after loading fw */ + if (amdgpu_emu_mode == 1) + msleep(1000); + } + + /* unhalt the MEs */ + sdma_v5_2_enable(adev, true); + /* enable sdma ring preemption */ + sdma_v5_2_ctx_switch_enable(adev, true); + + /* start the gfx rings and rlc compute queues */ + r = sdma_v5_2_gfx_resume(adev); + if (r) + return r; + r = sdma_v5_2_rlc_resume(adev); + + return r; +} + +/** + * sdma_v5_2_ring_test_ring - simple async dma engine test + * + * @ring: amdgpu_ring structure holding ring information + * + * Test the DMA engine by writing using it to write an + * value to memory. + * Returns 0 for success, error for failure. + */ +static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + unsigned i; + unsigned index; + int r; + u32 tmp; + u64 gpu_addr; + + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); + return r; + } + + gpu_addr = adev->wb.gpu_addr + (index * 4); + tmp = 0xCAFEDEAD; + adev->wb.wb[index] = cpu_to_le32(tmp); + + r = amdgpu_ring_alloc(ring, 5); + if (r) { + DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); + amdgpu_device_wb_free(adev, index); + return r; + } + + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | + SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR)); + amdgpu_ring_write(ring, lower_32_bits(gpu_addr)); + amdgpu_ring_write(ring, upper_32_bits(gpu_addr)); + amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0)); + amdgpu_ring_write(ring, 0xDEADBEEF); + amdgpu_ring_commit(ring); + + for (i = 0; i < adev->usec_timeout; i++) { + tmp = le32_to_cpu(adev->wb.wb[index]); + if (tmp == 0xDEADBEEF) + break; + if (amdgpu_emu_mode == 1) + msleep(1); + else + udelay(1); + } + + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + + amdgpu_device_wb_free(adev, index); + + return r; +} + +/** + * sdma_v5_2_ring_test_ib - test an IB on the DMA engine + * + * @ring: amdgpu_ring structure holding ring information + * + * Test a simple IB in the DMA ring. + * Returns 0 on success, error on failure. + */ +static int sdma_v5_2_ring_test_ib(struct amdgpu_ring *ring, long timeout) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_ib ib; + struct dma_fence *f = NULL; + unsigned index; + long r; + u32 tmp = 0; + u64 gpu_addr; + + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); + return r; + } + + gpu_addr = adev->wb.gpu_addr + (index * 4); + tmp = 0xCAFEDEAD; + adev->wb.wb[index] = cpu_to_le32(tmp); + memset(&ib, 0, sizeof(ib)); + r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + goto err0; + } + + ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | + SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); + ib.ptr[1] = lower_32_bits(gpu_addr); + ib.ptr[2] = upper_32_bits(gpu_addr); + ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0); + ib.ptr[4] = 0xDEADBEEF; + ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); + ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); + ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); + ib.length_dw = 8; + + r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); + if (r) + goto err1; + + r = dma_fence_wait_timeout(f, false, timeout); + if (r == 0) { + DRM_ERROR("amdgpu: IB test timed out\n"); + r = -ETIMEDOUT; + goto err1; + } else if (r < 0) { + DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); + goto err1; + } + tmp = le32_to_cpu(adev->wb.wb[index]); + if (tmp == 0xDEADBEEF) + r = 0; + else + r = -EINVAL; + +err1: + amdgpu_ib_free(adev, &ib, NULL); + dma_fence_put(f); +err0: + amdgpu_device_wb_free(adev, index); + return r; +} + + +/** + * sdma_v5_2_vm_copy_pte - update PTEs by copying them from the GART + * + * @ib: indirect buffer to fill with commands + * @pe: addr of the page entry + * @src: src addr to copy from + * @count: number of page entries to update + * + * Update PTEs by copying them from the GART using sDMA. + */ +static void sdma_v5_2_vm_copy_pte(struct amdgpu_ib *ib, + uint64_t pe, uint64_t src, + unsigned count) +{ + unsigned bytes = count * 8; + + ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | + SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); + ib->ptr[ib->length_dw++] = bytes - 1; + ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ + ib->ptr[ib->length_dw++] = lower_32_bits(src); + ib->ptr[ib->length_dw++] = upper_32_bits(src); + ib->ptr[ib->length_dw++] = lower_32_bits(pe); + ib->ptr[ib->length_dw++] = upper_32_bits(pe); + +} + +/** + * sdma_v5_2_vm_write_pte - update PTEs by writing them manually + * + * @ib: indirect buffer to fill with commands + * @pe: addr of the page entry + * @addr: dst addr to write into pe + * @count: number of page entries to update + * @incr: increase next addr by incr bytes + * @flags: access flags + * + * Update PTEs by writing them manually using sDMA. + */ +static void sdma_v5_2_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe, + uint64_t value, unsigned count, + uint32_t incr) +{ + unsigned ndw = count * 2; + + ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | + SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); + ib->ptr[ib->length_dw++] = lower_32_bits(pe); + ib->ptr[ib->length_dw++] = upper_32_bits(pe); + ib->ptr[ib->length_dw++] = ndw - 1; + for (; ndw > 0; ndw -= 2) { + ib->ptr[ib->length_dw++] = lower_32_bits(value); + ib->ptr[ib->length_dw++] = upper_32_bits(value); + value += incr; + } +} + +/** + * sdma_v5_2_vm_set_pte_pde - update the page tables using sDMA + * + * @ib: indirect buffer to fill with commands + * @pe: addr of the page entry + * @addr: dst addr to write into pe + * @count: number of page entries to update + * @incr: increase next addr by incr bytes + * @flags: access flags + * + * Update the page tables using sDMA. + */ +static void sdma_v5_2_vm_set_pte_pde(struct amdgpu_ib *ib, + uint64_t pe, + uint64_t addr, unsigned count, + uint32_t incr, uint64_t flags) +{ + /* for physically contiguous pages (vram) */ + ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_PTEPDE); + ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */ + ib->ptr[ib->length_dw++] = upper_32_bits(pe); + ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */ + ib->ptr[ib->length_dw++] = upper_32_bits(flags); + ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */ + ib->ptr[ib->length_dw++] = upper_32_bits(addr); + ib->ptr[ib->length_dw++] = incr; /* increment size */ + ib->ptr[ib->length_dw++] = 0; + ib->ptr[ib->length_dw++] = count - 1; /* number of entries */ +} + +/** + * sdma_v5_2_ring_pad_ib - pad the IB + * + * @ib: indirect buffer to fill with padding + * + * Pad the IB with NOPs to a boundary multiple of 8. + */ +static void sdma_v5_2_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) +{ + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); + u32 pad_count; + int i; + + pad_count = (-ib->length_dw) & 0x7; + for (i = 0; i < pad_count; i++) + if (sdma && sdma->burst_nop && (i == 0)) + ib->ptr[ib->length_dw++] = + SDMA_PKT_HEADER_OP(SDMA_OP_NOP) | + SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1); + else + ib->ptr[ib->length_dw++] = + SDMA_PKT_HEADER_OP(SDMA_OP_NOP); +} + + +/** + * sdma_v5_2_ring_emit_pipeline_sync - sync the pipeline + * + * @ring: amdgpu_ring pointer + * + * Make sure all previous operations are completed (CIK). + */ +static void sdma_v5_2_ring_emit_pipeline_sync(struct amdgpu_ring *ring) +{ + uint32_t seq = ring->fence_drv.sync_seq; + uint64_t addr = ring->fence_drv.gpu_addr; + + /* wait for idle */ + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | + SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | + SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */ + SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1)); + amdgpu_ring_write(ring, addr & 0xfffffffc); + amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); + amdgpu_ring_write(ring, seq); /* reference */ + amdgpu_ring_write(ring, 0xffffffff); /* mask */ + amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | + SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */ +} + + +/** + * sdma_v5_2_ring_emit_vm_flush - vm flush using sDMA + * + * @ring: amdgpu_ring pointer + * @vm: amdgpu_vm pointer + * + * Update the page table base and flush the VM TLB + * using sDMA. + */ +static void sdma_v5_2_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vmid, uint64_t pd_addr) +{ + amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); +} + +static void sdma_v5_2_ring_emit_wreg(struct amdgpu_ring *ring, + uint32_t reg, uint32_t val) +{ + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | + SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); + amdgpu_ring_write(ring, reg); + amdgpu_ring_write(ring, val); +} + +static void sdma_v5_2_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) +{ + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | + SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | + SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */ + amdgpu_ring_write(ring, reg << 2); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, val); /* reference */ + amdgpu_ring_write(ring, mask); /* mask */ + amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | + SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); +} + +static void sdma_v5_2_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, + uint32_t reg0, uint32_t reg1, + uint32_t ref, uint32_t mask) +{ + amdgpu_ring_emit_wreg(ring, reg0, ref); + /* wait for a cycle to reset vm_inv_eng*_ack */ + amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0); + amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask); +} + +static int sdma_v5_2_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + switch (adev->asic_type) { + case CHIP_SIENNA_CICHLID: + adev->sdma.num_instances = 4; + break; + case CHIP_NAVY_FLOUNDER: + adev->sdma.num_instances = 2; + break; + default: + break; + } + + sdma_v5_2_set_ring_funcs(adev); + sdma_v5_2_set_buffer_funcs(adev); + sdma_v5_2_set_vm_pte_funcs(adev); + sdma_v5_2_set_irq_funcs(adev); + + return 0; +} + +static unsigned sdma_v5_2_seq_to_irq_id(int seq_num) +{ + switch (seq_num) { + case 0: + return SOC15_IH_CLIENTID_SDMA0; + case 1: + return SOC15_IH_CLIENTID_SDMA1; + case 2: + return SOC15_IH_CLIENTID_SDMA2; + case 3: + return SOC15_IH_CLIENTID_SDMA3_Sienna_Cichlid; + default: + break; + } + return -EINVAL; +} + +static unsigned sdma_v5_2_seq_to_trap_id(int seq_num) +{ + switch (seq_num) { + case 0: + return SDMA0_5_0__SRCID__SDMA_TRAP; + case 1: + return SDMA1_5_0__SRCID__SDMA_TRAP; + case 2: + return SDMA2_5_0__SRCID__SDMA_TRAP; + case 3: + return SDMA3_5_0__SRCID__SDMA_TRAP; + default: + break; + } + return -EINVAL; +} + +static int sdma_v5_2_sw_init(void *handle) +{ + struct amdgpu_ring *ring; + int r, i; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* SDMA trap event */ + for (i = 0; i < adev->sdma.num_instances; i++) { + r = amdgpu_irq_add_id(adev, sdma_v5_2_seq_to_irq_id(i), + sdma_v5_2_seq_to_trap_id(i), + &adev->sdma.trap_irq); + if (r) + return r; + } + + r = sdma_v5_2_init_microcode(adev); + if (r) { + DRM_ERROR("Failed to load sdma firmware!\n"); + return r; + } + + for (i = 0; i < adev->sdma.num_instances; i++) { + ring = &adev->sdma.instance[i].ring; + ring->ring_obj = NULL; + ring->use_doorbell = true; + ring->me = i; + + DRM_INFO("use_doorbell being set to: [%s]\n", + ring->use_doorbell?"true":"false"); + + ring->doorbell_index = + (adev->doorbell_index.sdma_engine[i] << 1); //get DWORD offset + + sprintf(ring->name, "sdma%d", i); + r = amdgpu_ring_init(adev, ring, 1024, + &adev->sdma.trap_irq, + AMDGPU_SDMA_IRQ_INSTANCE0 + i, + AMDGPU_RING_PRIO_DEFAULT); + if (r) + return r; + } + + return r; +} + +static int sdma_v5_2_sw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i; + + for (i = 0; i < adev->sdma.num_instances; i++) + amdgpu_ring_fini(&adev->sdma.instance[i].ring); + + sdma_v5_2_destroy_inst_ctx(adev); + + return 0; +} + +static int sdma_v5_2_hw_init(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + sdma_v5_2_init_golden_registers(adev); + + r = sdma_v5_2_start(adev); + + return r; +} + +static int sdma_v5_2_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (amdgpu_sriov_vf(adev)) + return 0; + + sdma_v5_2_ctx_switch_enable(adev, false); + sdma_v5_2_enable(adev, false); + + return 0; +} + +static int sdma_v5_2_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return sdma_v5_2_hw_fini(adev); +} + +static int sdma_v5_2_resume(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return sdma_v5_2_hw_init(adev); +} + +static bool sdma_v5_2_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + u32 i; + + for (i = 0; i < adev->sdma.num_instances; i++) { + u32 tmp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_STATUS_REG)); + + if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK)) + return false; + } + + return true; +} + +static int sdma_v5_2_wait_for_idle(void *handle) +{ + unsigned i; + u32 sdma0, sdma1, sdma2, sdma3; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + for (i = 0; i < adev->usec_timeout; i++) { + sdma0 = RREG32(sdma_v5_2_get_reg_offset(adev, 0, mmSDMA0_STATUS_REG)); + sdma1 = RREG32(sdma_v5_2_get_reg_offset(adev, 1, mmSDMA0_STATUS_REG)); + sdma2 = RREG32(sdma_v5_2_get_reg_offset(adev, 2, mmSDMA0_STATUS_REG)); + sdma3 = RREG32(sdma_v5_2_get_reg_offset(adev, 3, mmSDMA0_STATUS_REG)); + + if (sdma0 & sdma1 & sdma2 & sdma3 & SDMA0_STATUS_REG__IDLE_MASK) + return 0; + udelay(1); + } + return -ETIMEDOUT; +} + +static int sdma_v5_2_soft_reset(void *handle) +{ + /* todo */ + + return 0; +} + +static int sdma_v5_2_ring_preempt_ib(struct amdgpu_ring *ring) +{ + int i, r = 0; + struct amdgpu_device *adev = ring->adev; + u32 index = 0; + u64 sdma_gfx_preempt; + + amdgpu_sdma_get_index_from_ring(ring, &index); + sdma_gfx_preempt = + sdma_v5_2_get_reg_offset(adev, index, mmSDMA0_GFX_PREEMPT); + + /* assert preemption condition */ + amdgpu_ring_set_preempt_cond_exec(ring, false); + + /* emit the trailing fence */ + ring->trail_seq += 1; + amdgpu_ring_alloc(ring, 10); + sdma_v5_2_ring_emit_fence(ring, ring->trail_fence_gpu_addr, + ring->trail_seq, 0); + amdgpu_ring_commit(ring); + + /* assert IB preemption */ + WREG32(sdma_gfx_preempt, 1); + + /* poll the trailing fence */ + for (i = 0; i < adev->usec_timeout; i++) { + if (ring->trail_seq == + le32_to_cpu(*(ring->trail_fence_cpu_addr))) + break; + udelay(1); + } + + if (i >= adev->usec_timeout) { + r = -EINVAL; + DRM_ERROR("ring %d failed to be preempted\n", ring->idx); + } + + /* deassert IB preemption */ + WREG32(sdma_gfx_preempt, 0); + + /* deassert the preemption condition */ + amdgpu_ring_set_preempt_cond_exec(ring, true); + return r; +} + +static int sdma_v5_2_set_trap_irq_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + u32 sdma_cntl; + + u32 reg_offset = sdma_v5_2_get_reg_offset(adev, type, mmSDMA0_CNTL); + + sdma_cntl = RREG32(reg_offset); + sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32(reg_offset, sdma_cntl); + + return 0; +} + +static int sdma_v5_2_process_trap_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_DEBUG("IH: SDMA trap\n"); + switch (entry->client_id) { + case SOC15_IH_CLIENTID_SDMA0: + switch (entry->ring_id) { + case 0: + amdgpu_fence_process(&adev->sdma.instance[0].ring); + break; + case 1: + /* XXX compute */ + break; + case 2: + /* XXX compute */ + break; + case 3: + /* XXX page queue*/ + break; + } + break; + case SOC15_IH_CLIENTID_SDMA1: + switch (entry->ring_id) { + case 0: + amdgpu_fence_process(&adev->sdma.instance[1].ring); + break; + case 1: + /* XXX compute */ + break; + case 2: + /* XXX compute */ + break; + case 3: + /* XXX page queue*/ + break; + } + break; + case SOC15_IH_CLIENTID_SDMA2: + switch (entry->ring_id) { + case 0: + amdgpu_fence_process(&adev->sdma.instance[2].ring); + break; + case 1: + /* XXX compute */ + break; + case 2: + /* XXX compute */ + break; + case 3: + /* XXX page queue*/ + break; + } + break; + case SOC15_IH_CLIENTID_SDMA3_Sienna_Cichlid: + switch (entry->ring_id) { + case 0: + amdgpu_fence_process(&adev->sdma.instance[3].ring); + break; + case 1: + /* XXX compute */ + break; + case 2: + /* XXX compute */ + break; + case 3: + /* XXX page queue*/ + break; + } + break; + } + return 0; +} + +static int sdma_v5_2_process_illegal_inst_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + return 0; +} + +static void sdma_v5_2_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data, def; + int i; + + for (i = 0; i < adev->sdma.num_instances; i++) { + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) { + /* Enable sdma clock gating */ + def = data = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL)); + data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDER_REG_MASK); + if (def != data) + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL), data); + } else { + /* Disable sdma clock gating */ + def = data = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL)); + data |= (SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDER_REG_MASK); + if (def != data) + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL), data); + } + } +} + +static void sdma_v5_2_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data, def; + int i; + + for (i = 0; i < adev->sdma.num_instances; i++) { + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) { + /* Enable sdma mem light sleep */ + def = data = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL)); + data |= SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; + if (def != data) + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL), data); + + } else { + /* Disable sdma mem light sleep */ + def = data = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL)); + data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; + if (def != data) + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL), data); + + } + } +} + +static int sdma_v5_2_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (amdgpu_sriov_vf(adev)) + return 0; + + switch (adev->asic_type) { + case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: + sdma_v5_2_update_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE ? true : false); + sdma_v5_2_update_medium_grain_light_sleep(adev, + state == AMD_CG_STATE_GATE ? true : false); + break; + default: + break; + } + + return 0; +} + +static int sdma_v5_2_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + return 0; +} + +static void sdma_v5_2_get_clockgating_state(void *handle, u32 *flags) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int data; + + if (amdgpu_sriov_vf(adev)) + *flags = 0; + + /* AMD_CG_SUPPORT_SDMA_LS */ + data = RREG32_KIQ(sdma_v5_2_get_reg_offset(adev, 0, mmSDMA0_POWER_CNTL)); + if (data & SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK) + *flags |= AMD_CG_SUPPORT_SDMA_LS; +} + +const struct amd_ip_funcs sdma_v5_2_ip_funcs = { + .name = "sdma_v5_2", + .early_init = sdma_v5_2_early_init, + .late_init = NULL, + .sw_init = sdma_v5_2_sw_init, + .sw_fini = sdma_v5_2_sw_fini, + .hw_init = sdma_v5_2_hw_init, + .hw_fini = sdma_v5_2_hw_fini, + .suspend = sdma_v5_2_suspend, + .resume = sdma_v5_2_resume, + .is_idle = sdma_v5_2_is_idle, + .wait_for_idle = sdma_v5_2_wait_for_idle, + .soft_reset = sdma_v5_2_soft_reset, + .set_clockgating_state = sdma_v5_2_set_clockgating_state, + .set_powergating_state = sdma_v5_2_set_powergating_state, + .get_clockgating_state = sdma_v5_2_get_clockgating_state, +}; + +static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = { + .type = AMDGPU_RING_TYPE_SDMA, + .align_mask = 0xf, + .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), + .support_64bit_ptrs = true, + .vmhub = AMDGPU_GFXHUB_0, + .get_rptr = sdma_v5_2_ring_get_rptr, + .get_wptr = sdma_v5_2_ring_get_wptr, + .set_wptr = sdma_v5_2_ring_set_wptr, + .emit_frame_size = + 5 + /* sdma_v5_2_ring_init_cond_exec */ + 6 + /* sdma_v5_2_ring_emit_hdp_flush */ + 3 + /* hdp_invalidate */ + 6 + /* sdma_v5_2_ring_emit_pipeline_sync */ + /* sdma_v5_2_ring_emit_vm_flush */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + + 10 + 10 + 10, /* sdma_v5_2_ring_emit_fence x3 for user fence, vm fence */ + .emit_ib_size = 7 + 6, /* sdma_v5_2_ring_emit_ib */ + .emit_ib = sdma_v5_2_ring_emit_ib, + .emit_fence = sdma_v5_2_ring_emit_fence, + .emit_pipeline_sync = sdma_v5_2_ring_emit_pipeline_sync, + .emit_vm_flush = sdma_v5_2_ring_emit_vm_flush, + .emit_hdp_flush = sdma_v5_2_ring_emit_hdp_flush, + .test_ring = sdma_v5_2_ring_test_ring, + .test_ib = sdma_v5_2_ring_test_ib, + .insert_nop = sdma_v5_2_ring_insert_nop, + .pad_ib = sdma_v5_2_ring_pad_ib, + .emit_wreg = sdma_v5_2_ring_emit_wreg, + .emit_reg_wait = sdma_v5_2_ring_emit_reg_wait, + .emit_reg_write_reg_wait = sdma_v5_2_ring_emit_reg_write_reg_wait, + .init_cond_exec = sdma_v5_2_ring_init_cond_exec, + .patch_cond_exec = sdma_v5_2_ring_patch_cond_exec, + .preempt_ib = sdma_v5_2_ring_preempt_ib, +}; + +static void sdma_v5_2_set_ring_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->sdma.num_instances; i++) { + adev->sdma.instance[i].ring.funcs = &sdma_v5_2_ring_funcs; + adev->sdma.instance[i].ring.me = i; + } +} + +static const struct amdgpu_irq_src_funcs sdma_v5_2_trap_irq_funcs = { + .set = sdma_v5_2_set_trap_irq_state, + .process = sdma_v5_2_process_trap_irq, +}; + +static const struct amdgpu_irq_src_funcs sdma_v5_2_illegal_inst_irq_funcs = { + .process = sdma_v5_2_process_illegal_inst_irq, +}; + +static void sdma_v5_2_set_irq_funcs(struct amdgpu_device *adev) +{ + adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE0 + + adev->sdma.num_instances; + adev->sdma.trap_irq.funcs = &sdma_v5_2_trap_irq_funcs; + adev->sdma.illegal_inst_irq.funcs = &sdma_v5_2_illegal_inst_irq_funcs; +} + +/** + * sdma_v5_2_emit_copy_buffer - copy buffer using the sDMA engine + * + * @ring: amdgpu_ring structure holding ring information + * @src_offset: src GPU address + * @dst_offset: dst GPU address + * @byte_count: number of bytes to xfer + * + * Copy GPU buffers using the DMA engine. + * Used by the amdgpu ttm implementation to move pages if + * registered as the asic copy callback. + */ +static void sdma_v5_2_emit_copy_buffer(struct amdgpu_ib *ib, + uint64_t src_offset, + uint64_t dst_offset, + uint32_t byte_count, + bool tmz) +{ + ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | + SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | + SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0); + ib->ptr[ib->length_dw++] = byte_count - 1; + ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ + ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); + ib->ptr[ib->length_dw++] = upper_32_bits(src_offset); + ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); + ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); +} + +/** + * sdma_v5_2_emit_fill_buffer - fill buffer using the sDMA engine + * + * @ring: amdgpu_ring structure holding ring information + * @src_data: value to write to buffer + * @dst_offset: dst GPU address + * @byte_count: number of bytes to xfer + * + * Fill GPU buffers using the DMA engine. + */ +static void sdma_v5_2_emit_fill_buffer(struct amdgpu_ib *ib, + uint32_t src_data, + uint64_t dst_offset, + uint32_t byte_count) +{ + ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL); + ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); + ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); + ib->ptr[ib->length_dw++] = src_data; + ib->ptr[ib->length_dw++] = byte_count - 1; +} + +static const struct amdgpu_buffer_funcs sdma_v5_2_buffer_funcs = { + .copy_max_bytes = 0x400000, + .copy_num_dw = 7, + .emit_copy_buffer = sdma_v5_2_emit_copy_buffer, + + .fill_max_bytes = 0x400000, + .fill_num_dw = 5, + .emit_fill_buffer = sdma_v5_2_emit_fill_buffer, +}; + +static void sdma_v5_2_set_buffer_funcs(struct amdgpu_device *adev) +{ + if (adev->mman.buffer_funcs == NULL) { + adev->mman.buffer_funcs = &sdma_v5_2_buffer_funcs; + adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; + } +} + +static const struct amdgpu_vm_pte_funcs sdma_v5_2_vm_pte_funcs = { + .copy_pte_num_dw = 7, + .copy_pte = sdma_v5_2_vm_copy_pte, + .write_pte = sdma_v5_2_vm_write_pte, + .set_pte_pde = sdma_v5_2_vm_set_pte_pde, +}; + +static void sdma_v5_2_set_vm_pte_funcs(struct amdgpu_device *adev) +{ + unsigned i; + + if (adev->vm_manager.vm_pte_funcs == NULL) { + adev->vm_manager.vm_pte_funcs = &sdma_v5_2_vm_pte_funcs; + for (i = 0; i < adev->sdma.num_instances; i++) { + adev->vm_manager.vm_pte_scheds[i] = + &adev->sdma.instance[i].ring.sched; + } + adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; + } +} + +const struct amdgpu_ip_block_version sdma_v5_2_ip_block = { + .type = AMD_IP_BLOCK_TYPE_SDMA, + .major = 5, + .minor = 2, + .rev = 0, + .funcs = &sdma_v5_2_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.h b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.h new file mode 100644 index 000000000000..b70414fef2a1 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.h @@ -0,0 +1,30 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __SDMA_V5_2_H__ +#define __SDMA_V5_2_H__ + +extern const struct amd_ip_funcs sdma_v5_2_ip_funcs; +extern const struct amdgpu_ip_block_version sdma_v5_2_ip_block; + +#endif /* __SDMA_V5_2_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 153db3f763bc..1b449291f068 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -41,9 +41,11 @@ #include "si_dma.h" #include "dce_v6_0.h" #include "si.h" +#include "uvd_v3_1.h" #include "dce_virtual.h" #include "gca/gfx_6_0_d.h" #include "oss/oss_1_0_d.h" +#include "oss/oss_1_0_sh_mask.h" #include "gmc/gmc_6_0_d.h" #include "dce/dce_6_0_d.h" #include "uvd/uvd_4_0_d.h" @@ -973,6 +975,28 @@ static void si_smc_wreg(struct amdgpu_device *adev, u32 reg, u32 v) spin_unlock_irqrestore(&adev->smc_idx_lock, flags); } +static u32 si_uvd_ctx_rreg(struct amdgpu_device *adev, u32 reg) +{ + unsigned long flags; + u32 r; + + spin_lock_irqsave(&adev->uvd_ctx_idx_lock, flags); + WREG32(mmUVD_CTX_INDEX, ((reg) & 0x1ff)); + r = RREG32(mmUVD_CTX_DATA); + spin_unlock_irqrestore(&adev->uvd_ctx_idx_lock, flags); + return r; +} + +static void si_uvd_ctx_wreg(struct amdgpu_device *adev, u32 reg, u32 v) +{ + unsigned long flags; + + spin_lock_irqsave(&adev->uvd_ctx_idx_lock, flags); + WREG32(mmUVD_CTX_INDEX, ((reg) & 0x1ff)); + WREG32(mmUVD_CTX_DATA, (v)); + spin_unlock_irqrestore(&adev->uvd_ctx_idx_lock, flags); +} + static struct amdgpu_allowed_register_entry si_allowed_read_registers[] = { {GRBM_STATUS}, {mmGRBM_STATUS2}, @@ -1205,6 +1229,11 @@ static bool si_asic_supports_baco(struct amdgpu_device *adev) static enum amd_reset_method si_asic_reset_method(struct amdgpu_device *adev) { + if (amdgpu_reset_method != AMD_RESET_METHOD_LEGACY && + amdgpu_reset_method != -1) + dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n", + amdgpu_reset_method); + return AMD_RESET_METHOD_LEGACY; } @@ -1243,12 +1272,6 @@ static u32 si_get_xclk(struct amdgpu_device *adev) return reference_clock; } -//xxx:not implemented -static int si_set_uvd_clocks(struct amdgpu_device *adev, u32 vclk, u32 dclk) -{ - return 0; -} - static void si_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { if (!ring || !ring->funcs->emit_wreg) { @@ -1404,6 +1427,358 @@ static uint64_t si_get_pcie_replay_count(struct amdgpu_device *adev) return (nak_r + nak_g); } +static int si_uvd_send_upll_ctlreq(struct amdgpu_device *adev, + unsigned cg_upll_func_cntl) +{ + unsigned i; + + /* Make sure UPLL_CTLREQ is deasserted */ + WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK); + + mdelay(10); + + /* Assert UPLL_CTLREQ */ + WREG32_P(cg_upll_func_cntl, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK); + + /* Wait for CTLACK and CTLACK2 to get asserted */ + for (i = 0; i < SI_MAX_CTLACKS_ASSERTION_WAIT; ++i) { + uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK; + + if ((RREG32(cg_upll_func_cntl) & mask) == mask) + break; + mdelay(10); + } + + /* Deassert UPLL_CTLREQ */ + WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK); + + if (i == SI_MAX_CTLACKS_ASSERTION_WAIT) { + DRM_ERROR("Timeout setting UVD clocks!\n"); + return -ETIMEDOUT; + } + + return 0; +} + +static unsigned si_uvd_calc_upll_post_div(unsigned vco_freq, + unsigned target_freq, + unsigned pd_min, + unsigned pd_even) +{ + unsigned post_div = vco_freq / target_freq; + + /* Adjust to post divider minimum value */ + if (post_div < pd_min) + post_div = pd_min; + + /* We alway need a frequency less than or equal the target */ + if ((vco_freq / post_div) > target_freq) + post_div += 1; + + /* Post dividers above a certain value must be even */ + if (post_div > pd_even && post_div % 2) + post_div += 1; + + return post_div; +} + +/** + * si_calc_upll_dividers - calc UPLL clock dividers + * + * @adev: amdgpu_device pointer + * @vclk: wanted VCLK + * @dclk: wanted DCLK + * @vco_min: minimum VCO frequency + * @vco_max: maximum VCO frequency + * @fb_factor: factor to multiply vco freq with + * @fb_mask: limit and bitmask for feedback divider + * @pd_min: post divider minimum + * @pd_max: post divider maximum + * @pd_even: post divider must be even above this value + * @optimal_fb_div: resulting feedback divider + * @optimal_vclk_div: resulting vclk post divider + * @optimal_dclk_div: resulting dclk post divider + * + * Calculate dividers for UVDs UPLL (except APUs). + * Returns zero on success; -EINVAL on error. + */ +static int si_calc_upll_dividers(struct amdgpu_device *adev, + unsigned vclk, unsigned dclk, + unsigned vco_min, unsigned vco_max, + unsigned fb_factor, unsigned fb_mask, + unsigned pd_min, unsigned pd_max, + unsigned pd_even, + unsigned *optimal_fb_div, + unsigned *optimal_vclk_div, + unsigned *optimal_dclk_div) +{ + unsigned vco_freq, ref_freq = adev->clock.spll.reference_freq; + + /* Start off with something large */ + unsigned optimal_score = ~0; + + /* Loop through vco from low to high */ + vco_min = max(max(vco_min, vclk), dclk); + for (vco_freq = vco_min; vco_freq <= vco_max; vco_freq += 100) { + uint64_t fb_div = (uint64_t)vco_freq * fb_factor; + unsigned vclk_div, dclk_div, score; + + do_div(fb_div, ref_freq); + + /* fb div out of range ? */ + if (fb_div > fb_mask) + break; /* It can oly get worse */ + + fb_div &= fb_mask; + + /* Calc vclk divider with current vco freq */ + vclk_div = si_uvd_calc_upll_post_div(vco_freq, vclk, + pd_min, pd_even); + if (vclk_div > pd_max) + break; /* vco is too big, it has to stop */ + + /* Calc dclk divider with current vco freq */ + dclk_div = si_uvd_calc_upll_post_div(vco_freq, dclk, + pd_min, pd_even); + if (dclk_div > pd_max) + break; /* vco is too big, it has to stop */ + + /* Calc score with current vco freq */ + score = vclk - (vco_freq / vclk_div) + dclk - (vco_freq / dclk_div); + + /* Determine if this vco setting is better than current optimal settings */ + if (score < optimal_score) { + *optimal_fb_div = fb_div; + *optimal_vclk_div = vclk_div; + *optimal_dclk_div = dclk_div; + optimal_score = score; + if (optimal_score == 0) + break; /* It can't get better than this */ + } + } + + /* Did we found a valid setup ? */ + if (optimal_score == ~0) + return -EINVAL; + + return 0; +} + +static int si_set_uvd_clocks(struct amdgpu_device *adev, u32 vclk, u32 dclk) +{ + unsigned fb_div = 0, vclk_div = 0, dclk_div = 0; + int r; + + /* Bypass vclk and dclk with bclk */ + WREG32_P(CG_UPLL_FUNC_CNTL_2, + VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1), + ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK)); + + /* Put PLL in bypass mode */ + WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK); + + if (!vclk || !dclk) { + /* Keep the Bypass mode */ + return 0; + } + + r = si_calc_upll_dividers(adev, vclk, dclk, 125000, 250000, + 16384, 0x03FFFFFF, 0, 128, 5, + &fb_div, &vclk_div, &dclk_div); + if (r) + return r; + + /* Set RESET_ANTI_MUX to 0 */ + WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK); + + /* Set VCO_MODE to 1 */ + WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK); + + /* Disable sleep mode */ + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK); + + /* Deassert UPLL_RESET */ + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK); + + mdelay(1); + + r = si_uvd_send_upll_ctlreq(adev, CG_UPLL_FUNC_CNTL); + if (r) + return r; + + /* Assert UPLL_RESET again */ + WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK); + + /* Disable spread spectrum. */ + WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK); + + /* Set feedback divider */ + WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK); + + /* Set ref divider to 0 */ + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK); + + if (fb_div < 307200) + WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9); + else + WREG32_P(CG_UPLL_FUNC_CNTL_4, + UPLL_SPARE_ISPARE9, + ~UPLL_SPARE_ISPARE9); + + /* Set PDIV_A and PDIV_B */ + WREG32_P(CG_UPLL_FUNC_CNTL_2, + UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div), + ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK)); + + /* Give the PLL some time to settle */ + mdelay(15); + + /* Deassert PLL_RESET */ + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK); + + mdelay(15); + + /* Switch from bypass mode to normal mode */ + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK); + + r = si_uvd_send_upll_ctlreq(adev, CG_UPLL_FUNC_CNTL); + if (r) + return r; + + /* Switch VCLK and DCLK selection */ + WREG32_P(CG_UPLL_FUNC_CNTL_2, + VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2), + ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK)); + + mdelay(100); + + return 0; +} + +static int si_vce_send_vcepll_ctlreq(struct amdgpu_device *adev) +{ + unsigned i; + + /* Make sure VCEPLL_CTLREQ is deasserted */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK); + + mdelay(10); + + /* Assert UPLL_CTLREQ */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK); + + /* Wait for CTLACK and CTLACK2 to get asserted */ + for (i = 0; i < SI_MAX_CTLACKS_ASSERTION_WAIT; ++i) { + uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK; + + if ((RREG32_SMC(CG_VCEPLL_FUNC_CNTL) & mask) == mask) + break; + mdelay(10); + } + + /* Deassert UPLL_CTLREQ */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK); + + if (i == SI_MAX_CTLACKS_ASSERTION_WAIT) { + DRM_ERROR("Timeout setting UVD clocks!\n"); + return -ETIMEDOUT; + } + + return 0; +} + +static int si_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk) +{ + unsigned fb_div = 0, evclk_div = 0, ecclk_div = 0; + int r; + + /* Bypass evclk and ecclk with bclk */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2, + EVCLK_SRC_SEL(1) | ECCLK_SRC_SEL(1), + ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK)); + + /* Put PLL in bypass mode */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_BYPASS_EN_MASK, + ~VCEPLL_BYPASS_EN_MASK); + + if (!evclk || !ecclk) { + /* Keep the Bypass mode, put PLL to sleep */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK, + ~VCEPLL_SLEEP_MASK); + return 0; + } + + r = si_calc_upll_dividers(adev, evclk, ecclk, 125000, 250000, + 16384, 0x03FFFFFF, 0, 128, 5, + &fb_div, &evclk_div, &ecclk_div); + if (r) + return r; + + /* Set RESET_ANTI_MUX to 0 */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK); + + /* Set VCO_MODE to 1 */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_VCO_MODE_MASK, + ~VCEPLL_VCO_MODE_MASK); + + /* Toggle VCEPLL_SLEEP to 1 then back to 0 */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK, + ~VCEPLL_SLEEP_MASK); + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_SLEEP_MASK); + + /* Deassert VCEPLL_RESET */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK); + + mdelay(1); + + r = si_vce_send_vcepll_ctlreq(adev); + if (r) + return r; + + /* Assert VCEPLL_RESET again */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_RESET_MASK, ~VCEPLL_RESET_MASK); + + /* Disable spread spectrum. */ + WREG32_SMC_P(CG_VCEPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK); + + /* Set feedback divider */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_3, + VCEPLL_FB_DIV(fb_div), + ~VCEPLL_FB_DIV_MASK); + + /* Set ref divider to 0 */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_REF_DIV_MASK); + + /* Set PDIV_A and PDIV_B */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2, + VCEPLL_PDIV_A(evclk_div) | VCEPLL_PDIV_B(ecclk_div), + ~(VCEPLL_PDIV_A_MASK | VCEPLL_PDIV_B_MASK)); + + /* Give the PLL some time to settle */ + mdelay(15); + + /* Deassert PLL_RESET */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK); + + mdelay(15); + + /* Switch from bypass mode to normal mode */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_BYPASS_EN_MASK); + + r = si_vce_send_vcepll_ctlreq(adev); + if (r) + return r; + + /* Switch VCLK and DCLK selection */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2, + EVCLK_SRC_SEL(16) | ECCLK_SRC_SEL(16), + ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK)); + + mdelay(100); + + return 0; +} + static const struct amdgpu_asic_funcs si_asic_funcs = { .read_disabled_bios = &si_read_disabled_bios, @@ -1414,7 +1789,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs = .set_vga_state = &si_vga_set_state, .get_xclk = &si_get_xclk, .set_uvd_clocks = &si_set_uvd_clocks, - .set_vce_clocks = NULL, + .set_vce_clocks = &si_set_vce_clocks, .get_pcie_lanes = &si_get_pcie_lanes, .set_pcie_lanes = &si_set_pcie_lanes, .get_config_memsize = &si_get_config_memsize, @@ -1443,8 +1818,8 @@ static int si_common_early_init(void *handle) adev->pcie_wreg = &si_pcie_wreg; adev->pciep_rreg = &si_pciep_rreg; adev->pciep_wreg = &si_pciep_wreg; - adev->uvd_ctx_rreg = NULL; - adev->uvd_ctx_wreg = NULL; + adev->uvd_ctx_rreg = si_uvd_ctx_rreg; + adev->uvd_ctx_wreg = si_uvd_ctx_wreg; adev->didt_rreg = NULL; adev->didt_wreg = NULL; @@ -2173,7 +2548,7 @@ int si_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); else amdgpu_device_ip_block_add(adev, &dce_v6_0_ip_block); - /* amdgpu_device_ip_block_add(adev, &uvd_v3_1_ip_block); */ + amdgpu_device_ip_block_add(adev, &uvd_v3_1_ip_block); /* amdgpu_device_ip_block_add(adev, &vce_v1_0_ip_block); */ break; case CHIP_OLAND: @@ -2187,8 +2562,7 @@ int si_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); else amdgpu_device_ip_block_add(adev, &dce_v6_4_ip_block); - - /* amdgpu_device_ip_block_add(adev, &uvd_v3_1_ip_block); */ + amdgpu_device_ip_block_add(adev, &uvd_v3_1_ip_block); /* amdgpu_device_ip_block_add(adev, &vce_v1_0_ip_block); */ break; case CHIP_HAINAN: diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c index c00ba4b23c9a..ea914b256ebd 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c @@ -6953,6 +6953,24 @@ static int si_power_control_set_level(struct amdgpu_device *adev) return 0; } +static void si_set_vce_clock(struct amdgpu_device *adev, + struct amdgpu_ps *new_rps, + struct amdgpu_ps *old_rps) +{ + if ((old_rps->evclk != new_rps->evclk) || + (old_rps->ecclk != new_rps->ecclk)) { + /* Turn the clocks on when encoding, off otherwise */ + if (new_rps->evclk || new_rps->ecclk) { + /* Place holder for future VCE1.0 porting to amdgpu + vce_v1_0_enable_mgcg(adev, false, false);*/ + } else { + /* Place holder for future VCE1.0 porting to amdgpu + vce_v1_0_enable_mgcg(adev, true, false); + amdgpu_asic_set_vce_clocks(adev, new_rps->evclk, new_rps->ecclk);*/ + } + } +} + static int si_dpm_set_power_state(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -7029,6 +7047,7 @@ static int si_dpm_set_power_state(void *handle) return ret; } ni_set_uvd_clock_after_set_eng_clock(adev, new_ps, old_ps); + si_set_vce_clock(adev, new_ps, old_ps); if (eg_pi->pcie_performance_request) si_notify_link_speed_change_after_state_change(adev, new_ps, old_ps); ret = si_set_power_state_conditionally_enable_ulv(adev, new_ps); diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.h b/drivers/gpu/drm/amd/amdgpu/si_dpm.h index 6b7d292b919f..bc0be6818e21 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.h @@ -781,7 +781,7 @@ struct NISLANDS_SMC_SWSTATE uint8_t levelCount; uint8_t padding2; uint8_t padding3; - NISLANDS_SMC_HW_PERFORMANCE_LEVEL levels[1]; + NISLANDS_SMC_HW_PERFORMANCE_LEVEL levels[]; }; typedef struct NISLANDS_SMC_SWSTATE NISLANDS_SMC_SWSTATE; diff --git a/drivers/gpu/drm/amd/amdgpu/si_enums.h b/drivers/gpu/drm/amd/amdgpu/si_enums.h index 790ba46eaebb..4e935baa7b91 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_enums.h +++ b/drivers/gpu/drm/amd/amdgpu/si_enums.h @@ -121,7 +121,6 @@ #define CURSOR_UPDATE_LOCK (1 << 16) #define CURSOR_DISABLE_MULTIPLE_UPDATE (1 << 24) -#define AMDGPU_NUM_OF_VMIDS 8 #define SI_CRTC0_REGISTER_OFFSET 0 #define SI_CRTC1_REGISTER_OFFSET 0x300 #define SI_CRTC2_REGISTER_OFFSET 0x2600 diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c index 88ae27a5a03d..621727d7fd18 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c @@ -27,6 +27,8 @@ #include "amdgpu_ih.h" #include "sid.h" #include "si_ih.h" +#include "oss/oss_1_0_d.h" +#include "oss/oss_1_0_sh_mask.h" static void si_ih_set_interrupt_funcs(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/sid.h b/drivers/gpu/drm/amd/amdgpu/sid.h index 7cf12adb3915..9a39cbfe6db9 100644 --- a/drivers/gpu/drm/amd/amdgpu/sid.h +++ b/drivers/gpu/drm/amd/amdgpu/sid.h @@ -47,8 +47,7 @@ #define SI_MAX_LDS_NUM 0xFFFF #define SI_MAX_TCC 16 #define SI_MAX_TCC_MASK 0xFFFF - -#define AMDGPU_NUM_OF_VMIDS 8 +#define SI_MAX_CTLACKS_ASSERTION_WAIT 100 /* SMC IND accessor regs */ #define SMC_IND_INDEX_0 0x80 @@ -1646,9 +1645,10 @@ /* * PM4 */ -#define PACKET0(reg, n) ((RADEON_PACKET_TYPE0 << 30) | \ - (((reg) >> 2) & 0xFFFF) | \ - ((n) & 0x3FFF) << 16) +#define PACKET_TYPE0 0 +#define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \ + ((reg) & 0xFFFF) | \ + ((n) & 0x3FFF) << 16) #define CP_PACKET2 0x80000000 #define PACKET2_PAD_SHIFT 0 #define PACKET2_PAD_MASK (0x3fffffff << 0) @@ -2340,11 +2340,6 @@ # define NI_INPUT_GAMMA_XVYCC_222 3 # define NI_OVL_INPUT_GAMMA_MODE(x) (((x) & 0x3) << 4) -#define IH_RB_WPTR__RB_OVERFLOW_MASK 0x1 -#define IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK 0x80000000 -#define SRBM_STATUS__IH_BUSY_MASK 0x20000 -#define SRBM_SOFT_RESET__SOFT_RESET_IH_MASK 0x400 - #define BLACKOUT_MODE_MASK 0x00000007 #define VGA_RENDER_CONTROL 0xC0 #define R_000300_VGA_RENDER_CONTROL 0xC0 @@ -2431,18 +2426,6 @@ #define MC_SEQ_MISC0__MT__HBM 0x60000000 #define MC_SEQ_MISC0__MT__DDR3 0xB0000000 -#define SRBM_STATUS__MCB_BUSY_MASK 0x200 -#define SRBM_STATUS__MCB_BUSY__SHIFT 0x9 -#define SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK 0x400 -#define SRBM_STATUS__MCB_NON_DISPLAY_BUSY__SHIFT 0xa -#define SRBM_STATUS__MCC_BUSY_MASK 0x800 -#define SRBM_STATUS__MCC_BUSY__SHIFT 0xb -#define SRBM_STATUS__MCD_BUSY_MASK 0x1000 -#define SRBM_STATUS__MCD_BUSY__SHIFT 0xc -#define SRBM_STATUS__VMC_BUSY_MASK 0x100 -#define SRBM_STATUS__VMC_BUSY__SHIFT 0x8 - - #define GRBM_STATUS__GUI_ACTIVE_MASK 0x80000000 #define CP_INT_CNTL_RING__TIME_STAMP_INT_ENABLE_MASK 0x4000000 #define CP_INT_CNTL_RING0__PRIV_REG_INT_ENABLE_MASK 0x800000 @@ -2467,8 +2450,6 @@ #define PCIE_BUS_CLK 10000 #define TCLK (PCIE_BUS_CLK / 10) -#define CC_DRM_ID_STRAPS__ATI_REV_ID_MASK 0xf0000000 -#define CC_DRM_ID_STRAPS__ATI_REV_ID__SHIFT 0x1c #define PCIE_PORT_INDEX 0xe #define PCIE_PORT_DATA 0xf #define EVERGREEN_PIF_PHY0_INDEX 0x8 @@ -2478,4 +2459,36 @@ #define MC_VM_FB_OFFSET 0x81a +/* Discrete VCE clocks */ +#define CG_VCEPLL_FUNC_CNTL 0xc0030600 +#define VCEPLL_RESET_MASK 0x00000001 +#define VCEPLL_SLEEP_MASK 0x00000002 +#define VCEPLL_BYPASS_EN_MASK 0x00000004 +#define VCEPLL_CTLREQ_MASK 0x00000008 +#define VCEPLL_VCO_MODE_MASK 0x00000600 +#define VCEPLL_REF_DIV_MASK 0x003F0000 +#define VCEPLL_CTLACK_MASK 0x40000000 +#define VCEPLL_CTLACK2_MASK 0x80000000 + +#define CG_VCEPLL_FUNC_CNTL_2 0xc0030601 +#define VCEPLL_PDIV_A(x) ((x) << 0) +#define VCEPLL_PDIV_A_MASK 0x0000007F +#define VCEPLL_PDIV_B(x) ((x) << 8) +#define VCEPLL_PDIV_B_MASK 0x00007F00 +#define EVCLK_SRC_SEL(x) ((x) << 20) +#define EVCLK_SRC_SEL_MASK 0x01F00000 +#define ECCLK_SRC_SEL(x) ((x) << 25) +#define ECCLK_SRC_SEL_MASK 0x3E000000 + +#define CG_VCEPLL_FUNC_CNTL_3 0xc0030602 +#define VCEPLL_FB_DIV(x) ((x) << 0) +#define VCEPLL_FB_DIV_MASK 0x01FFFFFF + +#define CG_VCEPLL_FUNC_CNTL_4 0xc0030603 + +#define CG_VCEPLL_FUNC_CNTL_5 0xc0030604 +#define CG_VCEPLL_SPREAD_SPECTRUM 0xc0030606 +#define VCEPLL_SSEN_MASK 0x00000001 + + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid_reg_init.c b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid_reg_init.c new file mode 100644 index 000000000000..5ee69f70c49b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid_reg_init.c @@ -0,0 +1,54 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "nv.h" + +#include "soc15_common.h" +#include "soc15_hw_ip.h" +#include "sienna_cichlid_ip_offset.h" + +int sienna_cichlid_reg_base_init(struct amdgpu_device *adev) +{ + /* HW has more IP blocks, only initialized the blocke needed by driver */ + uint32_t i; + for (i = 0 ; i < MAX_INSTANCE ; ++i) { + adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); + adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i])); + adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); + adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i])); + adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); + adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i])); + adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(VCN_BASE.instance[i])); + adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); + adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DCN_BASE.instance[i])); + adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i])); + adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[SDMA2_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[SDMA3_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); + adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); + } + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c index 9bffbab35041..d55bf64770c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c +++ b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c @@ -32,7 +32,6 @@ #include "amdgpu_amdkfd.h" #include <linux/i2c.h> #include <linux/pci.h> -#include "amdgpu_ras.h" /* error codes */ #define I2C_OK 0 @@ -537,12 +536,12 @@ Fail: return false; } -/***************************** EEPROM I2C GLUE ****************************/ +/***************************** I2C GLUE ****************************/ -static uint32_t smu_v11_0_i2c_eeprom_read_data(struct i2c_adapter *control, - uint8_t address, - uint8_t *data, - uint32_t numbytes) +static uint32_t smu_v11_0_i2c_read_data(struct i2c_adapter *control, + uint8_t address, + uint8_t *data, + uint32_t numbytes) { uint32_t ret = 0; @@ -562,10 +561,10 @@ Fail: return ret; } -static uint32_t smu_v11_0_i2c_eeprom_write_data(struct i2c_adapter *control, - uint8_t address, - uint8_t *data, - uint32_t numbytes) +static uint32_t smu_v11_0_i2c_write_data(struct i2c_adapter *control, + uint8_t address, + uint8_t *data, + uint32_t numbytes) { uint32_t ret; @@ -592,14 +591,13 @@ static uint32_t smu_v11_0_i2c_eeprom_write_data(struct i2c_adapter *control, static void lock_bus(struct i2c_adapter *i2c, unsigned int flags) { struct amdgpu_device *adev = to_amdgpu_device(i2c); - struct amdgpu_ras_eeprom_control *control = &adev->psp.ras.ras->eeprom_control; if (!smu_v11_0_i2c_bus_lock(i2c)) { DRM_ERROR("Failed to lock the bus from SMU"); return; } - control->bus_locked = true; + adev->pm.bus_locked = true; } static int trylock_bus(struct i2c_adapter *i2c, unsigned int flags) @@ -611,14 +609,13 @@ static int trylock_bus(struct i2c_adapter *i2c, unsigned int flags) static void unlock_bus(struct i2c_adapter *i2c, unsigned int flags) { struct amdgpu_device *adev = to_amdgpu_device(i2c); - struct amdgpu_ras_eeprom_control *control = &adev->psp.ras.ras->eeprom_control; if (!smu_v11_0_i2c_bus_unlock(i2c)) { DRM_ERROR("Failed to unlock the bus from SMU"); return; } - control->bus_locked = false; + adev->pm.bus_locked = false; } static const struct i2c_lock_operations smu_v11_0_i2c_i2c_lock_ops = { @@ -627,14 +624,13 @@ static const struct i2c_lock_operations smu_v11_0_i2c_i2c_lock_ops = { .unlock_bus = unlock_bus, }; -static int smu_v11_0_i2c_eeprom_i2c_xfer(struct i2c_adapter *i2c_adap, +static int smu_v11_0_i2c_xfer(struct i2c_adapter *i2c_adap, struct i2c_msg *msgs, int num) { int i, ret; struct amdgpu_device *adev = to_amdgpu_device(i2c_adap); - struct amdgpu_ras_eeprom_control *control = &adev->psp.ras.ras->eeprom_control; - if (!control->bus_locked) { + if (!adev->pm.bus_locked) { DRM_ERROR("I2C bus unlocked, stopping transaction!"); return -EIO; } @@ -643,13 +639,13 @@ static int smu_v11_0_i2c_eeprom_i2c_xfer(struct i2c_adapter *i2c_adap, for (i = 0; i < num; i++) { if (msgs[i].flags & I2C_M_RD) - ret = smu_v11_0_i2c_eeprom_read_data(i2c_adap, - (uint8_t)msgs[i].addr, - msgs[i].buf, msgs[i].len); + ret = smu_v11_0_i2c_read_data(i2c_adap, + (uint8_t)msgs[i].addr, + msgs[i].buf, msgs[i].len); else - ret = smu_v11_0_i2c_eeprom_write_data(i2c_adap, - (uint8_t)msgs[i].addr, - msgs[i].buf, msgs[i].len); + ret = smu_v11_0_i2c_write_data(i2c_adap, + (uint8_t)msgs[i].addr, + msgs[i].buf, msgs[i].len); if (ret != I2C_OK) { num = -EIO; @@ -661,18 +657,18 @@ static int smu_v11_0_i2c_eeprom_i2c_xfer(struct i2c_adapter *i2c_adap, return num; } -static u32 smu_v11_0_i2c_eeprom_i2c_func(struct i2c_adapter *adap) +static u32 smu_v11_0_i2c_func(struct i2c_adapter *adap) { return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL; } -static const struct i2c_algorithm smu_v11_0_i2c_eeprom_i2c_algo = { - .master_xfer = smu_v11_0_i2c_eeprom_i2c_xfer, - .functionality = smu_v11_0_i2c_eeprom_i2c_func, +static const struct i2c_algorithm smu_v11_0_i2c_algo = { + .master_xfer = smu_v11_0_i2c_xfer, + .functionality = smu_v11_0_i2c_func, }; -int smu_v11_0_i2c_eeprom_control_init(struct i2c_adapter *control) +int smu_v11_0_i2c_control_init(struct i2c_adapter *control) { struct amdgpu_device *adev = to_amdgpu_device(control); int res; @@ -680,8 +676,8 @@ int smu_v11_0_i2c_eeprom_control_init(struct i2c_adapter *control) control->owner = THIS_MODULE; control->class = I2C_CLASS_SPD; control->dev.parent = &adev->pdev->dev; - control->algo = &smu_v11_0_i2c_eeprom_i2c_algo; - snprintf(control->name, sizeof(control->name), "AMDGPU EEPROM"); + control->algo = &smu_v11_0_i2c_algo; + snprintf(control->name, sizeof(control->name), "AMDGPU SMU"); control->lock_ops = &smu_v11_0_i2c_i2c_lock_ops; res = i2c_add_adapter(control); @@ -691,7 +687,7 @@ int smu_v11_0_i2c_eeprom_control_init(struct i2c_adapter *control) return res; } -void smu_v11_0_i2c_eeprom_control_fini(struct i2c_adapter *control) +void smu_v11_0_i2c_control_fini(struct i2c_adapter *control) { i2c_del_adapter(control); } @@ -719,9 +715,9 @@ bool smu_v11_0_i2c_test_bus(struct i2c_adapter *control) smu_v11_0_i2c_init(control); /* Write 0xde to address 0x0000 on the EEPROM */ - ret = smu_v11_0_i2c_eeprom_write_data(control, I2C_TARGET_ADDR, data, 6); + ret = smu_v11_0_i2c_write_data(control, I2C_TARGET_ADDR, data, 6); - ret = smu_v11_0_i2c_eeprom_read_data(control, I2C_TARGET_ADDR, data, 6); + ret = smu_v11_0_i2c_read_data(control, I2C_TARGET_ADDR, data, 6); smu_v11_0_i2c_fini(control); diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.h b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.h index 973f28d68e70..44467c05f642 100644 --- a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.h +++ b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.h @@ -28,7 +28,7 @@ struct i2c_adapter; -int smu_v11_0_i2c_eeprom_control_init(struct i2c_adapter *control); -void smu_v11_0_i2c_eeprom_control_fini(struct i2c_adapter *control); +int smu_v11_0_i2c_control_init(struct i2c_adapter *control); +void smu_v11_0_i2c_control_fini(struct i2c_adapter *control); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index c7c9e07962b9..84d811b6e48b 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -415,7 +415,8 @@ static int soc15_read_register(struct amdgpu_device *adev, u32 se_num, *value = 0; for (i = 0; i < ARRAY_SIZE(soc15_allowed_read_registers); i++) { en = &soc15_allowed_read_registers[i]; - if (reg_offset != (adev->reg_offset[en->hwip][en->inst][en->seg] + if (adev->reg_offset[en->hwip][en->inst] && + reg_offset != (adev->reg_offset[en->hwip][en->inst][en->seg] + en->reg_offset)) continue; @@ -531,6 +532,15 @@ soc15_asic_reset_method(struct amdgpu_device *adev) bool baco_reset = false; struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 || + amdgpu_reset_method == AMD_RESET_METHOD_MODE2 || + amdgpu_reset_method == AMD_RESET_METHOD_BACO) + return amdgpu_reset_method; + + if (amdgpu_reset_method != -1) + dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n", + amdgpu_reset_method); + switch (adev->asic_type) { case CHIP_RAVEN: case CHIP_RENOIR: @@ -668,16 +678,29 @@ static uint32_t soc15_get_rev_id(struct amdgpu_device *adev) return adev->nbio.funcs->get_rev_id(adev); } -int soc15_set_ip_blocks(struct amdgpu_device *adev) +static void soc15_reg_base_init(struct amdgpu_device *adev) { + int r; + /* Set IP register base before any HW register access */ switch (adev->asic_type) { case CHIP_VEGA10: case CHIP_VEGA12: case CHIP_RAVEN: - case CHIP_RENOIR: vega10_reg_base_init(adev); break; + case CHIP_RENOIR: + /* It's safe to do ip discovery here for Renior, + * it doesn't support SRIOV. */ + if (amdgpu_discovery) { + r = amdgpu_discovery_reg_base_init(adev); + if (r) { + DRM_WARN("failed to init reg base from ip discovery table, " + "fallback to legacy init method\n"); + vega10_reg_base_init(adev); + } + } + break; case CHIP_VEGA20: vega20_reg_base_init(adev); break; @@ -685,8 +708,26 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) arct_reg_base_init(adev); break; default: - return -EINVAL; + DRM_ERROR("Unsupported asic type: %d!\n", adev->asic_type); + break; } +} + +void soc15_set_virt_ops(struct amdgpu_device *adev) +{ + adev->virt.ops = &xgpu_ai_virt_ops; + + /* init soc15 reg base early enough so we can + * request request full access for sriov before + * set_ip_blocks. */ + soc15_reg_base_init(adev); +} + +int soc15_set_ip_blocks(struct amdgpu_device *adev) +{ + /* for bare metal case */ + if (!amdgpu_sriov_vf(adev)) + soc15_reg_base_init(adev); if (adev->asic_type == CHIP_VEGA20 || adev->asic_type == CHIP_ARCTURUS) adev->gmc.xgmi.supported = true; @@ -710,9 +751,6 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) adev->rev_id = soc15_get_rev_id(adev); - if (amdgpu_sriov_vf(adev)) - adev->virt.ops = &xgpu_ai_virt_ops; - switch (adev->asic_type) { case CHIP_VEGA10: case CHIP_VEGA12: diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index b03f950c486c..8f38f047265b 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -90,6 +90,7 @@ struct soc15_ras_field_entry { void soc15_grbm_select(struct amdgpu_device *adev, u32 me, u32 pipe, u32 queue, u32 vmid); +void soc15_set_virt_ops(struct amdgpu_device *adev); int soc15_set_ip_blocks(struct amdgpu_device *adev); void soc15_program_register_sequence(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h index 56d02aa690a7..a5c00ab8b021 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h @@ -50,18 +50,19 @@ #define WREG32_SOC15_OFFSET(ip, inst, reg, offset, value) \ WREG32((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, value) -#define SOC15_WAIT_ON_RREG(ip, inst, reg, expected_value, mask, ret) \ +#define SOC15_WAIT_ON_RREG(ip, inst, reg, expected_value, mask) \ +({ int ret = 0; \ do { \ - uint32_t old_ = 0; \ + uint32_t old_ = 0; \ uint32_t tmp_ = RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \ uint32_t loop = adev->usec_timeout; \ ret = 0; \ while ((tmp_ & (mask)) != (expected_value)) { \ if (old_ != tmp_) { \ loop = adev->usec_timeout; \ - old_ = tmp_; \ - } else \ - udelay(1); \ + old_ = tmp_; \ + } else \ + udelay(1); \ tmp_ = RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \ loop--; \ if (!loop) { \ @@ -71,7 +72,9 @@ break; \ } \ } \ - } while (0) + } while (0); \ + ret; \ +}) #define WREG32_RLC(reg, value) \ do { \ diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c new file mode 100644 index 000000000000..7cf4b11a65c5 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c @@ -0,0 +1,793 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Sonny Jiang <sonny.jiang@amd.com> + */ + +#include <linux/firmware.h> + +#include "amdgpu.h" +#include "amdgpu_uvd.h" +#include "sid.h" + +#include "uvd/uvd_3_1_d.h" +#include "uvd/uvd_3_1_sh_mask.h" + +#include "oss/oss_1_0_d.h" +#include "oss/oss_1_0_sh_mask.h" + +/** + * uvd_v3_1_ring_get_rptr - get read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware read pointer + */ +static uint64_t uvd_v3_1_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + return RREG32(mmUVD_RBC_RB_RPTR); +} + +/** + * uvd_v3_1_ring_get_wptr - get write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware write pointer + */ +static uint64_t uvd_v3_1_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + return RREG32(mmUVD_RBC_RB_WPTR); +} + +/** + * uvd_v3_1_ring_set_wptr - set write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the write pointer to the hardware + */ +static void uvd_v3_1_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); +} + +/** + * uvd_v3_1_ring_emit_ib - execute indirect buffer + * + * @ring: amdgpu_ring pointer + * @ib: indirect buffer to execute + * + * Write ring commands to execute the indirect buffer + */ +static void uvd_v3_1_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, + struct amdgpu_ib *ib, + uint32_t flags) +{ + amdgpu_ring_write(ring, PACKET0(mmUVD_RBC_IB_BASE, 0)); + amdgpu_ring_write(ring, ib->gpu_addr); + amdgpu_ring_write(ring, PACKET0(mmUVD_RBC_IB_SIZE, 0)); + amdgpu_ring_write(ring, ib->length_dw); +} + +/** + * uvd_v3_1_ring_emit_fence - emit an fence & trap command + * + * @ring: amdgpu_ring pointer + * @fence: fence to emit + * + * Write a fence and a trap command to the ring. + */ +static void uvd_v3_1_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, + unsigned flags) +{ + WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); + + amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0)); + amdgpu_ring_write(ring, seq); + amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0)); + amdgpu_ring_write(ring, addr & 0xffffffff); + amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0)); + amdgpu_ring_write(ring, upper_32_bits(addr) & 0xff); + amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD, 0)); + amdgpu_ring_write(ring, 0); + + amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0)); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0)); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD, 0)); + amdgpu_ring_write(ring, 2); +} + +/** + * uvd_v3_1_ring_test_ring - register write test + * + * @ring: amdgpu_ring pointer + * + * Test if we can successfully write to the context register + */ +static int uvd_v3_1_ring_test_ring(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t tmp = 0; + unsigned i; + int r; + + WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD); + r = amdgpu_ring_alloc(ring, 3); + if (r) + return r; + + amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0)); + amdgpu_ring_write(ring, 0xDEADBEEF); + amdgpu_ring_commit(ring); + for (i = 0; i < adev->usec_timeout; i++) { + tmp = RREG32(mmUVD_CONTEXT_ID); + if (tmp == 0xDEADBEEF) + break; + udelay(1); + } + + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + + return r; +} + +static void uvd_v3_1_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) +{ + int i; + + WARN_ON(ring->wptr % 2 || count % 2); + + for (i = 0; i < count / 2; i++) { + amdgpu_ring_write(ring, PACKET0(mmUVD_NO_OP, 0)); + amdgpu_ring_write(ring, 0); + } +} + +static const struct amdgpu_ring_funcs uvd_v3_1_ring_funcs = { + .type = AMDGPU_RING_TYPE_UVD, + .align_mask = 0xf, + .support_64bit_ptrs = false, + .no_user_fence = true, + .get_rptr = uvd_v3_1_ring_get_rptr, + .get_wptr = uvd_v3_1_ring_get_wptr, + .set_wptr = uvd_v3_1_ring_set_wptr, + .parse_cs = amdgpu_uvd_ring_parse_cs, + .emit_frame_size = + 14, /* uvd_v3_1_ring_emit_fence x1 no user fence */ + .emit_ib_size = 4, /* uvd_v3_1_ring_emit_ib */ + .emit_ib = uvd_v3_1_ring_emit_ib, + .emit_fence = uvd_v3_1_ring_emit_fence, + .test_ring = uvd_v3_1_ring_test_ring, + .test_ib = amdgpu_uvd_ring_test_ib, + .insert_nop = uvd_v3_1_ring_insert_nop, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_uvd_ring_begin_use, + .end_use = amdgpu_uvd_ring_end_use, +}; + +static void uvd_v3_1_set_ring_funcs(struct amdgpu_device *adev) +{ + adev->uvd.inst->ring.funcs = &uvd_v3_1_ring_funcs; +} + +static void uvd_v3_1_set_dcm(struct amdgpu_device *adev, + bool sw_mode) +{ + u32 tmp, tmp2; + + WREG32_FIELD(UVD_CGC_GATE, REGS, 0); + + tmp = RREG32(mmUVD_CGC_CTRL); + tmp &= ~(UVD_CGC_CTRL__CLK_OFF_DELAY_MASK | UVD_CGC_CTRL__CLK_GATE_DLY_TIMER_MASK); + tmp |= UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK | + (1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT) | + (4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT); + + if (sw_mode) { + tmp &= ~0x7ffff800; + tmp2 = UVD_CGC_CTRL2__DYN_OCLK_RAMP_EN_MASK | + UVD_CGC_CTRL2__DYN_RCLK_RAMP_EN_MASK | + (7 << UVD_CGC_CTRL2__GATER_DIV_ID__SHIFT); + } else { + tmp |= 0x7ffff800; + tmp2 = 0; + } + + WREG32(mmUVD_CGC_CTRL, tmp); + WREG32_UVD_CTX(ixUVD_CGC_CTRL2, tmp2); +} + +/** + * uvd_v3_1_mc_resume - memory controller programming + * + * @adev: amdgpu_device pointer + * + * Let the UVD memory controller know it's offsets + */ +static void uvd_v3_1_mc_resume(struct amdgpu_device *adev) +{ + uint64_t addr; + uint32_t size; + + /* programm the VCPU memory controller bits 0-27 */ + addr = (adev->uvd.inst->gpu_addr + AMDGPU_UVD_FIRMWARE_OFFSET) >> 3; + size = AMDGPU_UVD_FIRMWARE_SIZE(adev) >> 3; + WREG32(mmUVD_VCPU_CACHE_OFFSET0, addr); + WREG32(mmUVD_VCPU_CACHE_SIZE0, size); + + addr += size; + size = AMDGPU_UVD_HEAP_SIZE >> 3; + WREG32(mmUVD_VCPU_CACHE_OFFSET1, addr); + WREG32(mmUVD_VCPU_CACHE_SIZE1, size); + + addr += size; + size = (AMDGPU_UVD_STACK_SIZE + + (AMDGPU_UVD_SESSION_SIZE * adev->uvd.max_handles)) >> 3; + WREG32(mmUVD_VCPU_CACHE_OFFSET2, addr); + WREG32(mmUVD_VCPU_CACHE_SIZE2, size); + + /* bits 28-31 */ + addr = (adev->uvd.inst->gpu_addr >> 28) & 0xF; + WREG32(mmUVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0)); + + /* bits 32-39 */ + addr = (adev->uvd.inst->gpu_addr >> 32) & 0xFF; + WREG32(mmUVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31)); + + WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config); + WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); + WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config); +} + +/** + * uvd_v3_1_fw_validate - FW validation operation + * + * @adev: amdgpu_device pointer + * + * Initialate and check UVD validation. + */ +static int uvd_v3_1_fw_validate(struct amdgpu_device *adev) +{ + void *ptr; + uint32_t ucode_len, i; + uint32_t keysel; + + ptr = adev->uvd.inst[0].cpu_addr; + ptr += 192 + 16; + memcpy(&ucode_len, ptr, 4); + ptr += ucode_len; + memcpy(&keysel, ptr, 4); + + WREG32(mmUVD_FW_START, keysel); + + for (i = 0; i < 10; ++i) { + mdelay(10); + if (RREG32(mmUVD_FW_STATUS) & UVD_FW_STATUS__DONE_MASK) + break; + } + + if (i == 10) + return -ETIMEDOUT; + + if (!(RREG32(mmUVD_FW_STATUS) & UVD_FW_STATUS__PASS_MASK)) + return -EINVAL; + + for (i = 0; i < 10; ++i) { + mdelay(10); + if (!(RREG32(mmUVD_FW_STATUS) & UVD_FW_STATUS__BUSY_MASK)) + break; + } + + if (i == 10) + return -ETIMEDOUT; + + return 0; +} + +/** + * uvd_v3_1_start - start UVD block + * + * @adev: amdgpu_device pointer + * + * Setup and start the UVD block + */ +static int uvd_v3_1_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = &adev->uvd.inst->ring; + uint32_t rb_bufsz; + int i, j, r; + u32 tmp; + /* disable byte swapping */ + u32 lmi_swap_cntl = 0; + u32 mp_swap_cntl = 0; + + /* set uvd busy */ + WREG32_P(mmUVD_STATUS, 1<<2, ~(1<<2)); + + uvd_v3_1_set_dcm(adev, true); + WREG32(mmUVD_CGC_GATE, 0); + + /* take UVD block out of reset */ + WREG32_P(mmSRBM_SOFT_RESET, 0, ~SRBM_SOFT_RESET__SOFT_RESET_UVD_MASK); + mdelay(5); + + /* enable VCPU clock */ + WREG32(mmUVD_VCPU_CNTL, 1 << 9); + + /* disable interupt */ + WREG32_P(mmUVD_MASTINT_EN, 0, ~(1 << 1)); + +#ifdef __BIG_ENDIAN + /* swap (8 in 32) RB and IB */ + lmi_swap_cntl = 0xa; + mp_swap_cntl = 0; +#endif + WREG32(mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl); + WREG32(mmUVD_MP_SWAP_CNTL, mp_swap_cntl); + + /* initialize UVD memory controller */ + WREG32(mmUVD_LMI_CTRL, 0x40 | (1 << 8) | (1 << 13) | + (1 << 21) | (1 << 9) | (1 << 20)); + + tmp = RREG32(mmUVD_MPC_CNTL); + WREG32(mmUVD_MPC_CNTL, tmp | 0x10); + + WREG32(mmUVD_MPC_SET_MUXA0, 0x40c2040); + WREG32(mmUVD_MPC_SET_MUXA1, 0x0); + WREG32(mmUVD_MPC_SET_MUXB0, 0x40c2040); + WREG32(mmUVD_MPC_SET_MUXB1, 0x0); + WREG32(mmUVD_MPC_SET_ALU, 0); + WREG32(mmUVD_MPC_SET_MUX, 0x88); + + tmp = RREG32_UVD_CTX(ixUVD_LMI_CACHE_CTRL); + WREG32_UVD_CTX(ixUVD_LMI_CACHE_CTRL, tmp & (~0x10)); + + /* enable UMC */ + WREG32_P(mmUVD_LMI_CTRL2, 0, ~(1 << 8)); + + WREG32_P(mmUVD_SOFT_RESET, 0, ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK); + + WREG32_P(mmUVD_SOFT_RESET, 0, ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK); + + WREG32_P(mmUVD_SOFT_RESET, 0, ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); + + mdelay(10); + + for (i = 0; i < 10; ++i) { + uint32_t status; + for (j = 0; j < 100; ++j) { + status = RREG32(mmUVD_STATUS); + if (status & 2) + break; + mdelay(10); + } + r = 0; + if (status & 2) + break; + + DRM_ERROR("UVD not responding, trying to reset the VCPU!!!\n"); + WREG32_P(mmUVD_SOFT_RESET, UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK, + ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); + mdelay(10); + WREG32_P(mmUVD_SOFT_RESET, 0, ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); + mdelay(10); + r = -1; + } + + if (r) { + DRM_ERROR("UVD not responding, giving up!!!\n"); + return r; + } + + /* enable interupt */ + WREG32_P(mmUVD_MASTINT_EN, 3<<1, ~(3 << 1)); + + WREG32_P(mmUVD_STATUS, 0, ~(1<<2)); + + /* force RBC into idle state */ + WREG32(mmUVD_RBC_RB_CNTL, 0x11010101); + + /* Set the write pointer delay */ + WREG32(mmUVD_RBC_RB_WPTR_CNTL, 0); + + /* programm the 4GB memory segment for rptr and ring buffer */ + WREG32(mmUVD_LMI_EXT40_ADDR, upper_32_bits(ring->gpu_addr) | + (0x7 << 16) | (0x1 << 31)); + + /* Initialize the ring buffer's read and write pointers */ + WREG32(mmUVD_RBC_RB_RPTR, 0x0); + + ring->wptr = RREG32(mmUVD_RBC_RB_RPTR); + WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); + + /* set the ring address */ + WREG32(mmUVD_RBC_RB_BASE, ring->gpu_addr); + + /* Set ring buffer size */ + rb_bufsz = order_base_2(ring->ring_size); + rb_bufsz = (0x1 << 8) | rb_bufsz; + WREG32_P(mmUVD_RBC_RB_CNTL, rb_bufsz, ~0x11f1f); + + return 0; +} + +/** + * uvd_v3_1_stop - stop UVD block + * + * @adev: amdgpu_device pointer + * + * stop the UVD block + */ +static void uvd_v3_1_stop(struct amdgpu_device *adev) +{ + uint32_t i, j; + uint32_t status; + + WREG32(mmUVD_RBC_RB_CNTL, 0x11010101); + + for (i = 0; i < 10; ++i) { + for (j = 0; j < 100; ++j) { + status = RREG32(mmUVD_STATUS); + if (status & 2) + break; + mdelay(1); + } + if (status & 2) + break; + } + + for (i = 0; i < 10; ++i) { + for (j = 0; j < 100; ++j) { + status = RREG32(mmUVD_LMI_STATUS); + if (status & 0xf) + break; + mdelay(1); + } + if (status & 0xf) + break; + } + + /* Stall UMC and register bus before resetting VCPU */ + WREG32_P(mmUVD_LMI_CTRL2, 1 << 8, ~(1 << 8)); + + for (i = 0; i < 10; ++i) { + for (j = 0; j < 100; ++j) { + status = RREG32(mmUVD_LMI_STATUS); + if (status & 0x240) + break; + mdelay(1); + } + if (status & 0x240) + break; + } + + WREG32_P(0x3D49, 0, ~(1 << 2)); + + WREG32_P(mmUVD_VCPU_CNTL, 0, ~(1 << 9)); + + /* put LMI, VCPU, RBC etc... into reset */ + WREG32(mmUVD_SOFT_RESET, UVD_SOFT_RESET__LMI_SOFT_RESET_MASK | + UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK | + UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK); + + WREG32(mmUVD_STATUS, 0); + + uvd_v3_1_set_dcm(adev, false); +} + +static int uvd_v3_1_set_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + return 0; +} + +static int uvd_v3_1_process_interrupt(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_DEBUG("IH: UVD TRAP\n"); + amdgpu_fence_process(&adev->uvd.inst->ring); + return 0; +} + + +static const struct amdgpu_irq_src_funcs uvd_v3_1_irq_funcs = { + .set = uvd_v3_1_set_interrupt_state, + .process = uvd_v3_1_process_interrupt, +}; + +static void uvd_v3_1_set_irq_funcs(struct amdgpu_device *adev) +{ + adev->uvd.inst->irq.num_types = 1; + adev->uvd.inst->irq.funcs = &uvd_v3_1_irq_funcs; +} + + +static int uvd_v3_1_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->uvd.num_uvd_inst = 1; + + uvd_v3_1_set_ring_funcs(adev); + uvd_v3_1_set_irq_funcs(adev); + + return 0; +} + +static int uvd_v3_1_sw_init(void *handle) +{ + struct amdgpu_ring *ring; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + /* UVD TRAP */ + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 124, &adev->uvd.inst->irq); + if (r) + return r; + + r = amdgpu_uvd_sw_init(adev); + if (r) + return r; + + ring = &adev->uvd.inst->ring; + sprintf(ring->name, "uvd"); + r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0, + AMDGPU_RING_PRIO_DEFAULT); + if (r) + return r; + + r = amdgpu_uvd_resume(adev); + if (r) + return r; + + r = amdgpu_uvd_entity_init(adev); + + return r; +} + +static int uvd_v3_1_sw_fini(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = amdgpu_uvd_suspend(adev); + if (r) + return r; + + return amdgpu_uvd_sw_fini(adev); +} + +static void uvd_v3_1_enable_mgcg(struct amdgpu_device *adev, + bool enable) +{ + u32 orig, data; + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG)) { + data = RREG32_UVD_CTX(ixUVD_CGC_MEM_CTRL); + data |= 0x3fff; + WREG32_UVD_CTX(ixUVD_CGC_MEM_CTRL, data); + + orig = data = RREG32(mmUVD_CGC_CTRL); + data |= UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK; + if (orig != data) + WREG32(mmUVD_CGC_CTRL, data); + } else { + data = RREG32_UVD_CTX(ixUVD_CGC_MEM_CTRL); + data &= ~0x3fff; + WREG32_UVD_CTX(ixUVD_CGC_MEM_CTRL, data); + + orig = data = RREG32(mmUVD_CGC_CTRL); + data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK; + if (orig != data) + WREG32(mmUVD_CGC_CTRL, data); + } +} + +/** + * uvd_v3_1_hw_init - start and test UVD block + * + * @adev: amdgpu_device pointer + * + * Initialize the hardware, boot up the VCPU and do some testing + */ +static int uvd_v3_1_hw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring = &adev->uvd.inst->ring; + uint32_t tmp; + int r; + + uvd_v3_1_mc_resume(adev); + + r = uvd_v3_1_fw_validate(adev); + if (r) { + DRM_ERROR("amdgpu: UVD Firmware validate fail (%d).\n", r); + return r; + } + + uvd_v3_1_enable_mgcg(adev, true); + amdgpu_asic_set_uvd_clocks(adev, 53300, 40000); + + uvd_v3_1_start(adev); + + r = amdgpu_ring_test_helper(ring); + if (r) { + DRM_ERROR("amdgpu: UVD ring test fail (%d).\n", r); + goto done; + } + + r = amdgpu_ring_alloc(ring, 10); + if (r) { + DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r); + goto done; + } + + tmp = PACKET0(mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL, 0); + amdgpu_ring_write(ring, tmp); + amdgpu_ring_write(ring, 0xFFFFF); + + tmp = PACKET0(mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL, 0); + amdgpu_ring_write(ring, tmp); + amdgpu_ring_write(ring, 0xFFFFF); + + tmp = PACKET0(mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL, 0); + amdgpu_ring_write(ring, tmp); + amdgpu_ring_write(ring, 0xFFFFF); + + /* Clear timeout status bits */ + amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_TIMEOUT_STATUS, 0)); + amdgpu_ring_write(ring, 0x8); + + amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CNTL, 0)); + amdgpu_ring_write(ring, 3); + + amdgpu_ring_commit(ring); + +done: + if (!r) + DRM_INFO("UVD initialized successfully.\n"); + + return r; +} + +/** + * uvd_v3_1_hw_fini - stop the hardware block + * + * @adev: amdgpu_device pointer + * + * Stop the UVD block, mark ring as not ready any more + */ +static int uvd_v3_1_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (RREG32(mmUVD_STATUS) != 0) + uvd_v3_1_stop(adev); + + return 0; +} + +static int uvd_v3_1_suspend(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = uvd_v3_1_hw_fini(adev); + if (r) + return r; + + return amdgpu_uvd_suspend(adev); +} + +static int uvd_v3_1_resume(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = amdgpu_uvd_resume(adev); + if (r) + return r; + + return uvd_v3_1_hw_init(adev); +} + +static bool uvd_v3_1_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return !(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK); +} + +static int uvd_v3_1_wait_for_idle(void *handle) +{ + unsigned i; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK)) + return 0; + } + return -ETIMEDOUT; +} + +static int uvd_v3_1_soft_reset(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + uvd_v3_1_stop(adev); + + WREG32_P(mmSRBM_SOFT_RESET, SRBM_SOFT_RESET__SOFT_RESET_UVD_MASK, + ~SRBM_SOFT_RESET__SOFT_RESET_UVD_MASK); + mdelay(5); + + return uvd_v3_1_start(adev); +} + +static int uvd_v3_1_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + return 0; +} + +static int uvd_v3_1_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + return 0; +} + +static const struct amd_ip_funcs uvd_v3_1_ip_funcs = { + .name = "uvd_v3_1", + .early_init = uvd_v3_1_early_init, + .late_init = NULL, + .sw_init = uvd_v3_1_sw_init, + .sw_fini = uvd_v3_1_sw_fini, + .hw_init = uvd_v3_1_hw_init, + .hw_fini = uvd_v3_1_hw_fini, + .suspend = uvd_v3_1_suspend, + .resume = uvd_v3_1_resume, + .is_idle = uvd_v3_1_is_idle, + .wait_for_idle = uvd_v3_1_wait_for_idle, + .soft_reset = uvd_v3_1_soft_reset, + .set_clockgating_state = uvd_v3_1_set_clockgating_state, + .set_powergating_state = uvd_v3_1_set_powergating_state, +}; + +const struct amdgpu_ip_block_version uvd_v3_1_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_UVD, + .major = 3, + .minor = 1, + .rev = 0, + .funcs = &uvd_v3_1_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.h b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.h new file mode 100644 index 000000000000..8c2f9b207574 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.h @@ -0,0 +1,29 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __UVD_V3_1_H__ +#define __UVD_V3_1_H__ + +extern const struct amdgpu_ip_block_version uvd_v3_1_ip_block; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index 7a55457e6f9e..e07e3fae99b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -1375,7 +1375,7 @@ static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* wait for reg writes */ - data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2; + data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance; data1 = lower_32_bits(pd_addr); mask = 0xffffffff; uvd_v7_0_ring_emit_reg_wait(ring, data0, data1, mask); @@ -1417,7 +1417,8 @@ static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* wait for reg writes */ - uvd_v7_0_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2, + uvd_v7_0_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + + vmid * hub->ctx_addr_distance, lower_32_bits(pd_addr), 0xffffffff); } diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index a0fb119240f4..37fa163393fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -991,7 +991,8 @@ static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring, pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* wait for reg writes */ - vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2, + vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + + vmid * hub->ctx_addr_distance, lower_32_bits(pd_addr), 0xffffffff); } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 1ad79155ed00..927c330fad21 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -38,6 +38,7 @@ #include "ivsrcid/vcn/irqsrcs_vcn_1_0.h" #include "jpeg_v1_0.h" +#include "vcn_v1_0.h" #define mmUVD_RBC_XX_IB_REG_CHECK_1_0 0x05ab #define mmUVD_RBC_XX_IB_REG_CHECK_1_0_BASE_IDX 1 @@ -360,68 +361,68 @@ static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_device *adev) /* cache window 0: fw */ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo), 0xFFFFFFFF, 0); - WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi), 0xFFFFFFFF, 0); - WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, 0, + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, 0, 0xFFFFFFFF, 0); offset = 0; } else { - WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, lower_32_bits(adev->vcn.inst->gpu_addr), 0xFFFFFFFF, 0); - WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, upper_32_bits(adev->vcn.inst->gpu_addr), 0xFFFFFFFF, 0); offset = size; - WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0xFFFFFFFF, 0); } - WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size, 0xFFFFFFFF, 0); + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size, 0xFFFFFFFF, 0); /* cache window 1: stack */ - WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, lower_32_bits(adev->vcn.inst->gpu_addr + offset), 0xFFFFFFFF, 0); - WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, upper_32_bits(adev->vcn.inst->gpu_addr + offset), 0xFFFFFFFF, 0); - WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0, + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0, 0xFFFFFFFF, 0); - WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE, + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE, 0xFFFFFFFF, 0); /* cache window 2: context */ - WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0xFFFFFFFF, 0); - WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0xFFFFFFFF, 0); - WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0, 0xFFFFFFFF, 0); - WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE, + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0, 0xFFFFFFFF, 0); + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE, 0xFFFFFFFF, 0); /* VCN global tiling registers */ - WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_UDEC_ADDR_CONFIG, + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0); - WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_UDEC_DB_ADDR_CONFIG, + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0); - WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_UDEC_DBW_ADDR_CONFIG, + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0); - WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_UDEC_DBW_UV_ADDR_CONFIG, + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_UDEC_DBW_UV_ADDR_CONFIG, adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0); - WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MIF_CURR_ADDR_CONFIG, + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MIF_CURR_ADDR_CONFIG, adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0); - WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MIF_CURR_UV_ADDR_CONFIG, + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MIF_CURR_UV_ADDR_CONFIG, adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0); - WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MIF_RECON1_ADDR_CONFIG, + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MIF_RECON1_ADDR_CONFIG, adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0); - WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MIF_RECON1_UV_ADDR_CONFIG, + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MIF_RECON1_UV_ADDR_CONFIG, adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0); - WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MIF_REF_ADDR_CONFIG, + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MIF_REF_ADDR_CONFIG, adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0); - WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MIF_REF_UV_ADDR_CONFIG, + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MIF_REF_UV_ADDR_CONFIG, adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0); } @@ -636,9 +637,9 @@ static void vcn_v1_0_clock_gating_dpg_mode(struct amdgpu_device *adev, uint8_t s reg_data = 0 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; reg_data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; reg_data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; - WREG32_SOC15_DPG_MODE(UVD, 0, mmJPEG_CGC_CTRL, reg_data, 0xFFFFFFFF, sram_sel); + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmJPEG_CGC_CTRL, reg_data, 0xFFFFFFFF, sram_sel); - WREG32_SOC15_DPG_MODE(UVD, 0, mmJPEG_CGC_GATE, 0, 0xFFFFFFFF, sram_sel); + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmJPEG_CGC_GATE, 0, 0xFFFFFFFF, sram_sel); /* enable sw clock gating control */ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) @@ -667,22 +668,21 @@ static void vcn_v1_0_clock_gating_dpg_mode(struct amdgpu_device *adev, uint8_t s UVD_CGC_CTRL__WCB_MODE_MASK | UVD_CGC_CTRL__VCPU_MODE_MASK | UVD_CGC_CTRL__SCPU_MODE_MASK); - WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_CGC_CTRL, reg_data, 0xFFFFFFFF, sram_sel); + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_CGC_CTRL, reg_data, 0xFFFFFFFF, sram_sel); /* turn off clock gating */ - WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_CGC_GATE, 0, 0xFFFFFFFF, sram_sel); + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_CGC_GATE, 0, 0xFFFFFFFF, sram_sel); /* turn on SUVD clock gating */ - WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_SUVD_CGC_GATE, 1, 0xFFFFFFFF, sram_sel); + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_SUVD_CGC_GATE, 1, 0xFFFFFFFF, sram_sel); /* turn on sw mode in UVD_SUVD_CGC_CTRL */ - WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_SUVD_CGC_CTRL, 0, 0xFFFFFFFF, sram_sel); + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_SUVD_CGC_CTRL, 0, 0xFFFFFFFF, sram_sel); } static void vcn_1_0_disable_static_power_gating(struct amdgpu_device *adev) { uint32_t data = 0; - int ret; if (adev->pg_flags & AMD_PG_SUPPORT_VCN) { data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT @@ -698,7 +698,7 @@ static void vcn_1_0_disable_static_power_gating(struct amdgpu_device *adev) | 2 << UVD_PGFSM_CONFIG__UVDW_PWR_CONFIG__SHIFT); WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data); - SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON, 0xFFFFFF, ret); + SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON, 0xFFFFFF); } else { data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT | 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT @@ -712,7 +712,7 @@ static void vcn_1_0_disable_static_power_gating(struct amdgpu_device *adev) | 1 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT | 1 << UVD_PGFSM_CONFIG__UVDW_PWR_CONFIG__SHIFT); WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data); - SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, 0, 0xFFFFFFFF, ret); + SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, 0, 0xFFFFFFFF); } /* polling UVD_PGFSM_STATUS to confirm UVDM_PWR_STATUS , UVDU_PWR_STATUS are 0 (power on) */ @@ -728,7 +728,6 @@ static void vcn_1_0_disable_static_power_gating(struct amdgpu_device *adev) static void vcn_1_0_enable_static_power_gating(struct amdgpu_device *adev) { uint32_t data = 0; - int ret; if (adev->pg_flags & AMD_PG_SUPPORT_VCN) { /* Before power off, this indicator has to be turned on */ @@ -763,7 +762,7 @@ static void vcn_1_0_enable_static_power_gating(struct amdgpu_device *adev) | 2 << UVD_PGFSM_STATUS__UVDTE_PWR_STATUS__SHIFT | 2 << UVD_PGFSM_STATUS__UVDE_PWR_STATUS__SHIFT | 2 << UVD_PGFSM_STATUS__UVDW_PWR_STATUS__SHIFT); - SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, data, 0xFFFFFFFF, ret); + SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, data, 0xFFFFFFFF); } } @@ -972,14 +971,14 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev) tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; tmp |= UVD_VCPU_CNTL__MIF_WR_LOW_THRESHOLD_BP_MASK; - WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CNTL, tmp, 0xFFFFFFFF, 0); + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_VCPU_CNTL, tmp, 0xFFFFFFFF, 0); /* disable interupt */ - WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MASTINT_EN, + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MASTINT_EN, 0, UVD_MASTINT_EN__VCPU_EN_MASK, 0); /* initialize VCN memory controller */ - WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_CTRL, + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_CTRL, (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | @@ -993,48 +992,48 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev) /* swap (8 in 32) RB and IB */ lmi_swap_cntl = 0xa; #endif - WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl, 0xFFFFFFFF, 0); + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl, 0xFFFFFFFF, 0); - WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MPC_CNTL, + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MPC_CNTL, 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0xFFFFFFFF, 0); - WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MPC_SET_MUXA0, + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MPC_SET_MUXA0, ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0xFFFFFFFF, 0); - WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MPC_SET_MUXB0, + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MPC_SET_MUXB0, ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0xFFFFFFFF, 0); - WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MPC_SET_MUX, + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MPC_SET_MUX, ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0xFFFFFFFF, 0); vcn_v1_0_mc_resume_dpg_mode(adev); - WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_REG_XX_MASK, 0x10, 0xFFFFFFFF, 0); - WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK, 0x3, 0xFFFFFFFF, 0); + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_REG_XX_MASK, 0x10, 0xFFFFFFFF, 0); + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK, 0x3, 0xFFFFFFFF, 0); /* boot up the VCPU */ - WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_SOFT_RESET, 0, 0xFFFFFFFF, 0); + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_SOFT_RESET, 0, 0xFFFFFFFF, 0); /* enable UMC */ - WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_CTRL2, + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_CTRL2, 0x1F << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT, 0xFFFFFFFF, 0); /* enable master interrupt */ - WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MASTINT_EN, + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MASTINT_EN, UVD_MASTINT_EN__VCPU_EN_MASK, UVD_MASTINT_EN__VCPU_EN_MASK, 0); vcn_v1_0_clock_gating_dpg_mode(adev, 1); /* setup mmUVD_LMI_CTRL */ - WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_CTRL, + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_CTRL, (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | @@ -1046,11 +1045,11 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev) tmp = adev->gfx.config.gb_addr_config; /* setup VCN global tiling registers */ - WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_JPEG_ADDR_CONFIG, tmp, 0xFFFFFFFF, 1); - WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_JPEG_UV_ADDR_CONFIG, tmp, 0xFFFFFFFF, 1); + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_JPEG_ADDR_CONFIG, tmp, 0xFFFFFFFF, 1); + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_JPEG_UV_ADDR_CONFIG, tmp, 0xFFFFFFFF, 1); /* enable System Interrupt for JRBC */ - WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_SYS_INT_EN, + WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_SYS_INT_EN, UVD_SYS_INT_EN__UVD_JRBC_EN_MASK, 0xFFFFFFFF, 1); /* force RBC into idle state */ @@ -1112,15 +1111,15 @@ static int vcn_v1_0_start(struct amdgpu_device *adev) */ static int vcn_v1_0_stop_spg_mode(struct amdgpu_device *adev) { - int ret_code, tmp; + int tmp; - SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7, ret_code); + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7); tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK | UVD_LMI_STATUS__READ_CLEAN_MASK | UVD_LMI_STATUS__WRITE_CLEAN_MASK | UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK; - SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_LMI_STATUS, tmp, tmp, ret_code); + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_LMI_STATUS, tmp, tmp); /* put VCPU into reset */ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), @@ -1129,7 +1128,7 @@ static int vcn_v1_0_stop_spg_mode(struct amdgpu_device *adev) tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK | UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK; - SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_LMI_STATUS, tmp, tmp, ret_code); + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_LMI_STATUS, tmp, tmp); /* disable VCPU clock */ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), 0, @@ -1153,30 +1152,29 @@ static int vcn_v1_0_stop_spg_mode(struct amdgpu_device *adev) static int vcn_v1_0_stop_dpg_mode(struct amdgpu_device *adev) { - int ret_code = 0; uint32_t tmp; /* Wait for power status to be UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF */ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF, - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); /* wait for read ptr to be equal to write ptr */ tmp = RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR); - SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF, ret_code); + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF); tmp = RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2); - SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF, ret_code); + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF); tmp = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR); - SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_JRBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code); + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_JRBC_RB_RPTR, tmp, 0xFFFFFFFF); tmp = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF; - SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code); + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF); SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF, - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); /* disable dynamic power gating mode */ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), 0, @@ -1219,9 +1217,9 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev, ret_code = 0; if (!(reg_data & UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK)) - SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, + ret_code = SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF, - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); if (!ret_code) { /* pause DPG non-jpeg */ @@ -1229,7 +1227,7 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev, WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE, UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, - UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code); + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); /* Restore */ ring = &adev->vcn.inst->ring_enc[0]; @@ -1251,7 +1249,7 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev, RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF); SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); } } else { /* unpause dpg non-jpeg, no need to wait */ @@ -1275,9 +1273,9 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev, ret_code = 0; if (!(reg_data & UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK)) - SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, + ret_code = SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF, - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); if (!ret_code) { /* Make sure JPRG Snoop is disabled before sending the pause */ @@ -1290,7 +1288,7 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev, WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE, UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, - UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, ret_code); + UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK); /* Restore */ ring = &adev->jpeg.inst->ring_dec; @@ -1312,7 +1310,7 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev, RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF); SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); } } else { /* unpause dpg jpeg, no need to wait */ @@ -1335,10 +1333,10 @@ static bool vcn_v1_0_is_idle(void *handle) static int vcn_v1_0_wait_for_idle(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int ret = 0; + int ret; - SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE, - UVD_STATUS__IDLE, ret); + ret = SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE, + UVD_STATUS__IDLE); return ret; } @@ -1541,7 +1539,7 @@ static void vcn_v1_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* wait for register write */ - data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2; + data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance; data1 = lower_32_bits(pd_addr); mask = 0xffffffff; vcn_v1_0_dec_ring_emit_reg_wait(ring, data0, data1, mask); @@ -1681,7 +1679,8 @@ static void vcn_v1_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* wait for reg writes */ - vcn_v1_0_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2, + vcn_v1_0_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + + vmid * hub->ctx_addr_distance, lower_32_bits(pd_addr), 0xffffffff); } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index 90ed773695ea..23a9eb5b2c8a 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -30,6 +30,7 @@ #include "amdgpu_pm.h" #include "amdgpu_psp.h" #include "mmsch_v2_0.h" +#include "vcn_v2_0.h" #include "vcn/vcn_2_0_0_offset.h" #include "vcn/vcn_2_0_0_sh_mask.h" @@ -382,91 +383,91 @@ static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirec /* cache window 0: fw */ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { if (!indirect) { - WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); } else { - WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); } offset = 0; } else { - WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), lower_32_bits(adev->vcn.inst->gpu_addr), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), upper_32_bits(adev->vcn.inst->gpu_addr), 0, indirect); offset = size; - WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect); } if (!indirect) - WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect); else - WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect); /* cache window 1: stack */ if (!indirect) { - WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), lower_32_bits(adev->vcn.inst->gpu_addr + offset), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), upper_32_bits(adev->vcn.inst->gpu_addr + offset), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); } else { - WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); } - WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect); /* cache window 2: context */ - WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect); /* non-cache window */ - WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), lower_32_bits(adev->vcn.inst->fw_shared_gpu_addr), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), upper_32_bits(adev->vcn.inst->fw_shared_gpu_addr), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_VCPU_NONCACHE_SIZE0), AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)), 0, indirect); /* VCN global tiling registers */ - WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); } @@ -615,19 +616,19 @@ static void vcn_v2_0_clock_gating_dpg_mode(struct amdgpu_device *adev, UVD_CGC_CTRL__WCB_MODE_MASK | UVD_CGC_CTRL__VCPU_MODE_MASK | UVD_CGC_CTRL__SCPU_MODE_MASK); - WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect); /* turn off clock gating */ - WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_CGC_GATE), 0, sram_sel, indirect); /* turn on SUVD clock gating */ - WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect); /* turn on sw mode in UVD_SUVD_CGC_CTRL */ - WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect); } @@ -696,7 +697,6 @@ static void vcn_v2_0_enable_clock_gating(struct amdgpu_device *adev) static void vcn_v2_0_disable_static_power_gating(struct amdgpu_device *adev) { uint32_t data = 0; - int ret; if (amdgpu_sriov_vf(adev)) return; @@ -715,7 +715,7 @@ static void vcn_v2_0_disable_static_power_gating(struct amdgpu_device *adev) WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data); SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, - UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON_2_0, 0xFFFFF, ret); + UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON_2_0, 0xFFFFF); } else { data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT | 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT @@ -728,7 +728,7 @@ static void vcn_v2_0_disable_static_power_gating(struct amdgpu_device *adev) | 1 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT | 1 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT); WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data); - SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, 0, 0xFFFFF, ret); + SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, 0, 0xFFFFF); } /* polling UVD_PGFSM_STATUS to confirm UVDM_PWR_STATUS, @@ -746,7 +746,6 @@ static void vcn_v2_0_disable_static_power_gating(struct amdgpu_device *adev) static void vcn_v2_0_enable_static_power_gating(struct amdgpu_device *adev) { uint32_t data = 0; - int ret; if (amdgpu_sriov_vf(adev)) return; @@ -782,7 +781,7 @@ static void vcn_v2_0_enable_static_power_gating(struct amdgpu_device *adev) | 2 << UVD_PGFSM_STATUS__UVDTD_PWR_STATUS__SHIFT | 2 << UVD_PGFSM_STATUS__UVDTE_PWR_STATUS__SHIFT | 2 << UVD_PGFSM_STATUS__UVDE_PWR_STATUS__SHIFT); - SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, data, 0xFFFFF, ret); + SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, data, 0xFFFFF); } } @@ -810,11 +809,11 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; tmp |= UVD_VCPU_CNTL__MIF_WR_LOW_THRESHOLD_BP_MASK; - WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect); /* disable master interupt */ - WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_MASTINT_EN), 0, 0, indirect); /* setup mmUVD_LMI_CTRL */ @@ -826,28 +825,28 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | 0x00100000L); - WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_LMI_CTRL), tmp, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_MPC_CNTL), 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_MPC_SET_MUXA0), ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_MPC_SET_MUXB0), ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_MPC_SET_MUX), ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | @@ -855,22 +854,22 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) vcn_v2_0_mc_resume_dpg_mode(adev, indirect); - WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_REG_XX_MASK), 0x10, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect); /* release VCPU reset to boot */ - WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_SOFT_RESET), 0, 0, indirect); /* enable LMI MC and UMC channels */ - WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_LMI_CTRL2), 0x1F << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT, 0, indirect); /* enable master interrupt */ - WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_MASTINT_EN), UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); @@ -1098,25 +1097,24 @@ static int vcn_v2_0_start(struct amdgpu_device *adev) static int vcn_v2_0_stop_dpg_mode(struct amdgpu_device *adev) { - int ret_code = 0; uint32_t tmp; /* Wait for power status to be 1 */ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, 1, - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); /* wait for read ptr to be equal to write ptr */ tmp = RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR); - SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF, ret_code); + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF); tmp = RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2); - SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF, ret_code); + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF); tmp = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF; - SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code); + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF); SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, 1, - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); /* disable dynamic power gating mode */ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), 0, @@ -1138,7 +1136,7 @@ static int vcn_v2_0_stop(struct amdgpu_device *adev) } /* wait for uvd idle */ - SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7, r); + r = SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7); if (r) return r; @@ -1146,7 +1144,7 @@ static int vcn_v2_0_stop(struct amdgpu_device *adev) UVD_LMI_STATUS__READ_CLEAN_MASK | UVD_LMI_STATUS__WRITE_CLEAN_MASK | UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK; - SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_LMI_STATUS, tmp, tmp, r); + r = SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_LMI_STATUS, tmp, tmp); if (r) return r; @@ -1157,7 +1155,7 @@ static int vcn_v2_0_stop(struct amdgpu_device *adev) tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK| UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK; - SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_LMI_STATUS, tmp, tmp, r); + r = SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_LMI_STATUS, tmp, tmp); if (r) return r; @@ -1208,9 +1206,8 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev, (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); if (new_state->fw_based == VCN_DPG_STATE__PAUSE) { - ret_code = 0; - SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, 0x1, - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + ret_code = SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, 0x1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); if (!ret_code) { volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared_cpu_addr; @@ -1221,7 +1218,7 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev, /* wait for ACK */ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE, UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, - UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code); + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); /* Stall DPG before WPTR/RPTR reset */ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), @@ -1258,7 +1255,7 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev, SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); } } else { /* unpause dpg, no need to wait */ @@ -1281,10 +1278,10 @@ static bool vcn_v2_0_is_idle(void *handle) static int vcn_v2_0_wait_for_idle(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int ret = 0; + int ret; - SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE, - UVD_STATUS__IDLE, ret); + ret = SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE, + UVD_STATUS__IDLE); return ret; } @@ -1508,7 +1505,7 @@ void vcn_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* wait for register write */ - data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2; + data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance; data1 = lower_32_bits(pd_addr); mask = 0xffffffff; vcn_v2_0_dec_ring_emit_reg_wait(ring, data0, data1, mask); @@ -1663,7 +1660,8 @@ void vcn_v2_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* wait for reg writes */ - vcn_v2_0_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2, + vcn_v2_0_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + + vmid * hub->ctx_addr_distance, lower_32_bits(pd_addr), 0xffffffff); } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 3c6eafb62ee6..e99bef6e2354 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -451,91 +451,91 @@ static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx /* cache window 0: fw */ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { if (!indirect) { - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_lo), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_hi), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); } else { - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); } offset = 0; } else { - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect); offset = size; - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_VCPU_CACHE_OFFSET0), AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect); } if (!indirect) - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect); else - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect); /* cache window 1: stack */ if (!indirect) { - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); } else { - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); } - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect); /* cache window 2: context */ - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect); /* non-cache window */ - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), lower_32_bits(adev->vcn.inst[inst_idx].fw_shared_gpu_addr), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), upper_32_bits(adev->vcn.inst[inst_idx].fw_shared_gpu_addr), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_VCPU_NONCACHE_SIZE0), AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)), 0, indirect); /* VCN global tiling registers */ - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_GFX8_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); } @@ -549,7 +549,6 @@ static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx static void vcn_v2_5_disable_clock_gating(struct amdgpu_device *adev) { uint32_t data; - int ret = 0; int i; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { @@ -589,7 +588,7 @@ static void vcn_v2_5_disable_clock_gating(struct amdgpu_device *adev) WREG32_SOC15(VCN, i, mmUVD_CGC_GATE, data); - SOC15_WAIT_ON_RREG(VCN, i, mmUVD_CGC_GATE, 0, 0xFFFFFFFF, ret); + SOC15_WAIT_ON_RREG(VCN, i, mmUVD_CGC_GATE, 0, 0xFFFFFFFF); data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL); data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK @@ -689,19 +688,19 @@ static void vcn_v2_5_clock_gating_dpg_mode(struct amdgpu_device *adev, UVD_CGC_CTRL__WCB_MODE_MASK | UVD_CGC_CTRL__VCPU_MODE_MASK | UVD_CGC_CTRL__MMSCH_MODE_MASK); - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect); /* turn off clock gating */ - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_CGC_GATE), 0, sram_sel, indirect); /* turn on SUVD clock gating */ - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect); /* turn on sw mode in UVD_SUVD_CGC_CTRL */ - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect); } @@ -792,11 +791,11 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; tmp |= UVD_VCPU_CNTL__BLK_RST_MASK; - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect); /* disable master interupt */ - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_MASTINT_EN), 0, 0, indirect); /* setup mmUVD_LMI_CTRL */ @@ -808,28 +807,28 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | 0x00100000L); - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_LMI_CTRL), tmp, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_MPC_CNTL), 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_MPC_SET_MUXA0), ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_MPC_SET_MUXB0), ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_MPC_SET_MUX), ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | @@ -837,26 +836,26 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo vcn_v2_5_mc_resume_dpg_mode(adev, inst_idx, indirect); - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_REG_XX_MASK), 0x10, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect); /* enable LMI MC and UMC channels */ - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_LMI_CTRL2), 0, 0, indirect); /* unblock VCPU register access */ - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_RB_ARB_CTRL), 0, 0, indirect); tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect); /* enable master interrupt */ - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_MASTINT_EN), UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); @@ -1302,25 +1301,24 @@ static int vcn_v2_5_sriov_start(struct amdgpu_device *adev) static int vcn_v2_5_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) { - int ret_code = 0; uint32_t tmp; /* Wait for power status to be 1 */ SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1, - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); /* wait for read ptr to be equal to write ptr */ tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR); - SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF, ret_code); + SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF); tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2); - SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF, ret_code); + SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF); tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF; - SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code); + SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF); SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1, - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); /* disable dynamic power gating mode */ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 0, @@ -1343,7 +1341,7 @@ static int vcn_v2_5_stop(struct amdgpu_device *adev) } /* wait for vcn idle */ - SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7, r); + r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7); if (r) return r; @@ -1351,7 +1349,7 @@ static int vcn_v2_5_stop(struct amdgpu_device *adev) UVD_LMI_STATUS__READ_CLEAN_MASK | UVD_LMI_STATUS__WRITE_CLEAN_MASK | UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK; - SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp, r); + r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp); if (r) return r; @@ -1362,7 +1360,7 @@ static int vcn_v2_5_stop(struct amdgpu_device *adev) tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK| UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK; - SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp, r); + r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp); if (r) return r; @@ -1412,8 +1410,8 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev, (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); if (new_state->fw_based == VCN_DPG_STATE__PAUSE) { - SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 0x1, - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + ret_code = SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 0x1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); if (!ret_code) { volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr; @@ -1425,7 +1423,7 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev, /* wait for ACK */ SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_DPG_PAUSE, UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, - UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code); + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); /* Stall DPG before WPTR/RPTR reset */ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), @@ -1458,13 +1456,13 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev, 0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, - UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); } } else { reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE, reg_data); SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 0x1, - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); } adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based; } @@ -1701,8 +1699,8 @@ static int vcn_v2_5_wait_for_idle(void *handle) for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) continue; - SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, - UVD_STATUS__IDLE, ret); + ret = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, + UVD_STATUS__IDLE); if (ret) return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c new file mode 100644 index 000000000000..63e5547cfb16 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -0,0 +1,1981 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/firmware.h> +#include "amdgpu.h" +#include "amdgpu_vcn.h" +#include "amdgpu_pm.h" +#include "soc15.h" +#include "soc15d.h" +#include "vcn_v2_0.h" +#include "mmsch_v3_0.h" + +#include "vcn/vcn_3_0_0_offset.h" +#include "vcn/vcn_3_0_0_sh_mask.h" +#include "ivsrcid/vcn/irqsrcs_vcn_2_0.h" + +#define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x27 +#define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x0f +#define mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET 0x10 +#define mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET 0x11 +#define mmUVD_NO_OP_INTERNAL_OFFSET 0x29 +#define mmUVD_GP_SCRATCH8_INTERNAL_OFFSET 0x66 +#define mmUVD_SCRATCH9_INTERNAL_OFFSET 0xc01d + +#define mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET 0x431 +#define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x3b4 +#define mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x3b5 +#define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET 0x25c + +#define VCN_INSTANCES_SIENNA_CICHLID 2 + +static int amdgpu_ih_clientid_vcns[] = { + SOC15_IH_CLIENTID_VCN, + SOC15_IH_CLIENTID_VCN1 +}; + +static int amdgpu_ucode_id_vcns[] = { + AMDGPU_UCODE_ID_VCN, + AMDGPU_UCODE_ID_VCN1 +}; + +static int vcn_v3_0_start_sriov(struct amdgpu_device *adev); +static void vcn_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev); +static void vcn_v3_0_set_enc_ring_funcs(struct amdgpu_device *adev); +static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev); +static int vcn_v3_0_set_powergating_state(void *handle, + enum amd_powergating_state state); +static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev, + int inst_idx, struct dpg_pause_state *new_state); + +static void vcn_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring); +static void vcn_v3_0_enc_ring_set_wptr(struct amdgpu_ring *ring); + +/** + * vcn_v3_0_early_init - set function pointers + * + * @handle: amdgpu_device pointer + * + * Set ring and irq function pointers + */ +static int vcn_v3_0_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (amdgpu_sriov_vf(adev)) { + adev->vcn.num_vcn_inst = VCN_INSTANCES_SIENNA_CICHLID; + adev->vcn.harvest_config = 0; + adev->vcn.num_enc_rings = 1; + + } else { + if (adev->asic_type == CHIP_SIENNA_CICHLID) { + u32 harvest; + int i; + + adev->vcn.num_vcn_inst = VCN_INSTANCES_SIENNA_CICHLID; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + harvest = RREG32_SOC15(VCN, i, mmCC_UVD_HARVESTING); + if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK) + adev->vcn.harvest_config |= 1 << i; + } + + if (adev->vcn.harvest_config == (AMDGPU_VCN_HARVEST_VCN0 | + AMDGPU_VCN_HARVEST_VCN1)) + /* both instances are harvested, disable the block */ + return -ENOENT; + } else + adev->vcn.num_vcn_inst = 1; + + adev->vcn.num_enc_rings = 2; + } + + vcn_v3_0_set_dec_ring_funcs(adev); + vcn_v3_0_set_enc_ring_funcs(adev); + vcn_v3_0_set_irq_funcs(adev); + + return 0; +} + +/** + * vcn_v3_0_sw_init - sw init for VCN block + * + * @handle: amdgpu_device pointer + * + * Load firmware and sw initialization + */ +static int vcn_v3_0_sw_init(void *handle) +{ + struct amdgpu_ring *ring; + int i, j, r; + int vcn_doorbell_index = 0; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = amdgpu_vcn_sw_init(adev); + if (r) + return r; + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + const struct common_firmware_header *hdr; + hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].ucode_id = AMDGPU_UCODE_ID_VCN; + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); + + if (adev->vcn.num_vcn_inst == VCN_INSTANCES_SIENNA_CICHLID) { + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN1].ucode_id = AMDGPU_UCODE_ID_VCN1; + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN1].fw = adev->vcn.fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); + } + DRM_INFO("PSP loading VCN firmware\n"); + } + + r = amdgpu_vcn_resume(adev); + if (r) + return r; + + if (amdgpu_sriov_vf(adev)) { + vcn_doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1; + /* get DWORD offset */ + vcn_doorbell_index = vcn_doorbell_index << 1; + } + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET; + adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET; + adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET; + adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET; + adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET; + adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET; + + adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET; + adev->vcn.inst[i].external.scratch9 = SOC15_REG_OFFSET(VCN, i, mmUVD_SCRATCH9); + adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET; + adev->vcn.inst[i].external.data0 = SOC15_REG_OFFSET(VCN, i, mmUVD_GPCOM_VCPU_DATA0); + adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET; + adev->vcn.inst[i].external.data1 = SOC15_REG_OFFSET(VCN, i, mmUVD_GPCOM_VCPU_DATA1); + adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET; + adev->vcn.inst[i].external.cmd = SOC15_REG_OFFSET(VCN, i, mmUVD_GPCOM_VCPU_CMD); + adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET; + adev->vcn.inst[i].external.nop = SOC15_REG_OFFSET(VCN, i, mmUVD_NO_OP); + + /* VCN DEC TRAP */ + r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i], + VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->vcn.inst[i].irq); + if (r) + return r; + + ring = &adev->vcn.inst[i].ring_dec; + ring->use_doorbell = true; + if (amdgpu_sriov_vf(adev)) { + ring->doorbell_index = vcn_doorbell_index; + /* NOTE: increment so next VCN engine use next DOORBELL DWORD */ + vcn_doorbell_index++; + } else { + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i; + } + if (i != 0) + ring->no_scheduler = true; + sprintf(ring->name, "vcn_dec_%d", i); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0, + AMDGPU_RING_PRIO_DEFAULT); + if (r) + return r; + + for (j = 0; j < adev->vcn.num_enc_rings; ++j) { + /* VCN ENC TRAP */ + r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i], + j + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq); + if (r) + return r; + + ring = &adev->vcn.inst[i].ring_enc[j]; + ring->use_doorbell = true; + if (amdgpu_sriov_vf(adev)) { + ring->doorbell_index = vcn_doorbell_index; + /* NOTE: increment so next VCN engine use next DOORBELL DWORD */ + vcn_doorbell_index++; + } else { + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + j + 8 * i; + } + if (i != 1) + ring->no_scheduler = true; + sprintf(ring->name, "vcn_enc_%d.%d", i, j); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0, + AMDGPU_RING_PRIO_DEFAULT); + if (r) + return r; + } + } + + if (amdgpu_sriov_vf(adev)) { + r = amdgpu_virt_alloc_mm_table(adev); + if (r) + return r; + } + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + adev->vcn.pause_dpg_mode = vcn_v3_0_pause_dpg_mode; + + return 0; +} + +/** + * vcn_v3_0_sw_fini - sw fini for VCN block + * + * @handle: amdgpu_device pointer + * + * VCN suspend and free up sw allocation + */ +static int vcn_v3_0_sw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + if (amdgpu_sriov_vf(adev)) + amdgpu_virt_free_mm_table(adev); + + r = amdgpu_vcn_suspend(adev); + if (r) + return r; + + r = amdgpu_vcn_sw_fini(adev); + + return r; +} + +/** + * vcn_v3_0_hw_init - start and test VCN block + * + * @handle: amdgpu_device pointer + * + * Initialize the hardware, boot up the VCPU and do some testing + */ +static int vcn_v3_0_hw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring; + int i, j, r; + + if (amdgpu_sriov_vf(adev)) { + r = vcn_v3_0_start_sriov(adev); + if (r) + goto done; + + /* initialize VCN dec and enc ring buffers */ + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + ring = &adev->vcn.inst[i].ring_dec; + ring->wptr = 0; + ring->wptr_old = 0; + vcn_v3_0_dec_ring_set_wptr(ring); + ring->sched.ready = true; + + for (j = 0; j < adev->vcn.num_enc_rings; ++j) { + ring = &adev->vcn.inst[i].ring_enc[j]; + ring->wptr = 0; + ring->wptr_old = 0; + vcn_v3_0_enc_ring_set_wptr(ring); + ring->sched.ready = true; + } + } + } else { + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + ring = &adev->vcn.inst[i].ring_dec; + + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + ring->doorbell_index, i); + + r = amdgpu_ring_test_helper(ring); + if (r) + goto done; + + for (j = 0; j < adev->vcn.num_enc_rings; ++j) { + ring = &adev->vcn.inst[i].ring_enc[j]; + r = amdgpu_ring_test_helper(ring); + if (r) + goto done; + } + } + } + +done: + if (!r) + DRM_INFO("VCN decode and encode initialized successfully(under %s).\n", + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode"); + + return r; +} + +/** + * vcn_v3_0_hw_fini - stop the hardware block + * + * @handle: amdgpu_device pointer + * + * Stop the VCN block, mark ring as not ready any more + */ +static int vcn_v3_0_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring; + int i, j; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + ring = &adev->vcn.inst[i].ring_dec; + + if (!amdgpu_sriov_vf(adev)) { + if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || + (adev->vcn.cur_state != AMD_PG_STATE_GATE && + RREG32_SOC15(VCN, i, mmUVD_STATUS))) { + vcn_v3_0_set_powergating_state(adev, AMD_PG_STATE_GATE); + } + } + ring->sched.ready = false; + + for (j = 0; j < adev->vcn.num_enc_rings; ++j) { + ring = &adev->vcn.inst[i].ring_enc[j]; + ring->sched.ready = false; + } + } + + return 0; +} + +/** + * vcn_v3_0_suspend - suspend VCN block + * + * @handle: amdgpu_device pointer + * + * HW fini and suspend VCN block + */ +static int vcn_v3_0_suspend(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = vcn_v3_0_hw_fini(adev); + if (r) + return r; + + r = amdgpu_vcn_suspend(adev); + + return r; +} + +/** + * vcn_v3_0_resume - resume VCN block + * + * @handle: amdgpu_device pointer + * + * Resume firmware and hw init VCN block + */ +static int vcn_v3_0_resume(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = amdgpu_vcn_resume(adev); + if (r) + return r; + + r = vcn_v3_0_hw_init(adev); + + return r; +} + +/** + * vcn_v3_0_mc_resume - memory controller programming + * + * @adev: amdgpu_device pointer + * @inst: instance number + * + * Let the VCN memory controller know it's offsets + */ +static void vcn_v3_0_mc_resume(struct amdgpu_device *adev, int inst) +{ + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + uint32_t offset; + + /* cache window 0: fw */ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_lo)); + WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_hi)); + WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_OFFSET0, 0); + offset = 0; + } else { + WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst].gpu_addr)); + WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst].gpu_addr)); + offset = size; + WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_OFFSET0, + AMDGPU_UVD_FIRMWARE_OFFSET >> 3); + } + WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_SIZE0, size); + + /* cache window 1: stack */ + WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset)); + WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset)); + WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_OFFSET1, 0); + WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE); + + /* cache window 2: context */ + WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_OFFSET2, 0); + WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); +} + +static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) +{ + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + uint32_t offset; + + /* cache window 0: fw */ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + if (!indirect) { + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_lo), 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_hi), 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); + } else { + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); + } + offset = 0; + } else { + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect); + offset = size; + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, mmUVD_VCPU_CACHE_OFFSET0), + AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect); + } + + if (!indirect) + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect); + else + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect); + + /* cache window 1: stack */ + if (!indirect) { + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); + } else { + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); + } + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect); + + /* cache window 2: context */ + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect); + + /* non-cache window */ + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), 0, 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), 0, 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, mmUVD_VCPU_NONCACHE_SIZE0), 0, 0, indirect); +} + +static void vcn_v3_0_disable_static_power_gating(struct amdgpu_device *adev, int inst) +{ + uint32_t data = 0; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN) { + data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDIRL_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDLM_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDAB_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDATD_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDNA_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDNB_PWR_CONFIG__SHIFT); + + WREG32_SOC15(VCN, inst, mmUVD_PGFSM_CONFIG, data); + SOC15_WAIT_ON_RREG(VCN, inst, mmUVD_PGFSM_STATUS, + UVD_PGFSM_STATUS__UVDM_UVDU_UVDLM_PWR_ON_3_0, 0x3F3FFFFF); + } else { + data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDIRL_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDLM_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDAB_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDATD_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDNA_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDNB_PWR_CONFIG__SHIFT); + WREG32_SOC15(VCN, inst, mmUVD_PGFSM_CONFIG, data); + SOC15_WAIT_ON_RREG(VCN, inst, mmUVD_PGFSM_STATUS, 0, 0x3F3FFFFF); + } + + data = RREG32_SOC15(VCN, inst, mmUVD_POWER_STATUS); + data &= ~0x103; + if (adev->pg_flags & AMD_PG_SUPPORT_VCN) + data |= UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON | + UVD_POWER_STATUS__UVD_PG_EN_MASK; + + WREG32_SOC15(VCN, inst, mmUVD_POWER_STATUS, data); +} + +static void vcn_v3_0_enable_static_power_gating(struct amdgpu_device *adev, int inst) +{ + uint32_t data; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN) { + /* Before power off, this indicator has to be turned on */ + data = RREG32_SOC15(VCN, inst, mmUVD_POWER_STATUS); + data &= ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK; + data |= UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF; + WREG32_SOC15(VCN, inst, mmUVD_POWER_STATUS, data); + + data = (2 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDIRL_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDLM_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDAB_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDATD_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDNA_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDNB_PWR_CONFIG__SHIFT); + WREG32_SOC15(VCN, inst, mmUVD_PGFSM_CONFIG, data); + + data = (2 << UVD_PGFSM_STATUS__UVDM_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDU_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDF_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDC_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDB_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDIRL_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDLM_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDTD_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDTE_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDE_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDAB_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDATD_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDNA_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDNB_PWR_STATUS__SHIFT); + SOC15_WAIT_ON_RREG(VCN, inst, mmUVD_PGFSM_STATUS, data, 0x3F3FFFFF); + } +} + +/** + * vcn_v3_0_disable_clock_gating - disable VCN clock gating + * + * @adev: amdgpu_device pointer + * @inst: instance number + * + * Disable clock gating for VCN block + */ +static void vcn_v3_0_disable_clock_gating(struct amdgpu_device *adev, int inst) +{ + uint32_t data; + + /* VCN disable CGC */ + data = RREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL); + if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) + data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK; + data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL, data); + + data = RREG32_SOC15(VCN, inst, mmUVD_CGC_GATE); + data &= ~(UVD_CGC_GATE__SYS_MASK + | UVD_CGC_GATE__UDEC_MASK + | UVD_CGC_GATE__MPEG2_MASK + | UVD_CGC_GATE__REGS_MASK + | UVD_CGC_GATE__RBC_MASK + | UVD_CGC_GATE__LMI_MC_MASK + | UVD_CGC_GATE__LMI_UMC_MASK + | UVD_CGC_GATE__IDCT_MASK + | UVD_CGC_GATE__MPRD_MASK + | UVD_CGC_GATE__MPC_MASK + | UVD_CGC_GATE__LBSI_MASK + | UVD_CGC_GATE__LRBBM_MASK + | UVD_CGC_GATE__UDEC_RE_MASK + | UVD_CGC_GATE__UDEC_CM_MASK + | UVD_CGC_GATE__UDEC_IT_MASK + | UVD_CGC_GATE__UDEC_DB_MASK + | UVD_CGC_GATE__UDEC_MP_MASK + | UVD_CGC_GATE__WCB_MASK + | UVD_CGC_GATE__VCPU_MASK + | UVD_CGC_GATE__MMSCH_MASK); + + WREG32_SOC15(VCN, inst, mmUVD_CGC_GATE, data); + + SOC15_WAIT_ON_RREG(VCN, inst, mmUVD_CGC_GATE, 0, 0xFFFFFFFF); + + data = RREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL); + data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK + | UVD_CGC_CTRL__UDEC_CM_MODE_MASK + | UVD_CGC_CTRL__UDEC_IT_MODE_MASK + | UVD_CGC_CTRL__UDEC_DB_MODE_MASK + | UVD_CGC_CTRL__UDEC_MP_MODE_MASK + | UVD_CGC_CTRL__SYS_MODE_MASK + | UVD_CGC_CTRL__UDEC_MODE_MASK + | UVD_CGC_CTRL__MPEG2_MODE_MASK + | UVD_CGC_CTRL__REGS_MODE_MASK + | UVD_CGC_CTRL__RBC_MODE_MASK + | UVD_CGC_CTRL__LMI_MC_MODE_MASK + | UVD_CGC_CTRL__LMI_UMC_MODE_MASK + | UVD_CGC_CTRL__IDCT_MODE_MASK + | UVD_CGC_CTRL__MPRD_MODE_MASK + | UVD_CGC_CTRL__MPC_MODE_MASK + | UVD_CGC_CTRL__LBSI_MODE_MASK + | UVD_CGC_CTRL__LRBBM_MODE_MASK + | UVD_CGC_CTRL__WCB_MODE_MASK + | UVD_CGC_CTRL__VCPU_MODE_MASK + | UVD_CGC_CTRL__MMSCH_MODE_MASK); + WREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL, data); + + data = RREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_GATE); + data |= (UVD_SUVD_CGC_GATE__SRE_MASK + | UVD_SUVD_CGC_GATE__SIT_MASK + | UVD_SUVD_CGC_GATE__SMP_MASK + | UVD_SUVD_CGC_GATE__SCM_MASK + | UVD_SUVD_CGC_GATE__SDB_MASK + | UVD_SUVD_CGC_GATE__SRE_H264_MASK + | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK + | UVD_SUVD_CGC_GATE__SIT_H264_MASK + | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK + | UVD_SUVD_CGC_GATE__SCM_H264_MASK + | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK + | UVD_SUVD_CGC_GATE__SDB_H264_MASK + | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK + | UVD_SUVD_CGC_GATE__SCLR_MASK + | UVD_SUVD_CGC_GATE__ENT_MASK + | UVD_SUVD_CGC_GATE__IME_MASK + | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK + | UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK + | UVD_SUVD_CGC_GATE__SITE_MASK + | UVD_SUVD_CGC_GATE__SRE_VP9_MASK + | UVD_SUVD_CGC_GATE__SCM_VP9_MASK + | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK + | UVD_SUVD_CGC_GATE__SDB_VP9_MASK + | UVD_SUVD_CGC_GATE__IME_HEVC_MASK + | UVD_SUVD_CGC_GATE__EFC_MASK + | UVD_SUVD_CGC_GATE__SAOE_MASK + | 0x08000000 + | UVD_SUVD_CGC_GATE__FBC_PCLK_MASK + | UVD_SUVD_CGC_GATE__FBC_CCLK_MASK + | 0x40000000 + | UVD_SUVD_CGC_GATE__SMPA_MASK); + WREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_GATE, data); + + data = RREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_GATE2); + data |= (UVD_SUVD_CGC_GATE2__MPBE0_MASK + | UVD_SUVD_CGC_GATE2__MPBE1_MASK + | 0x00000004 + | 0x00000008 + | UVD_SUVD_CGC_GATE2__MPC1_MASK); + WREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_GATE2, data); + + data = RREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_CTRL); + data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK + | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK + | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK + | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK + | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK + | UVD_SUVD_CGC_CTRL__IME_MODE_MASK + | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK + | UVD_SUVD_CGC_CTRL__EFC_MODE_MASK + | UVD_SUVD_CGC_CTRL__SAOE_MODE_MASK + | UVD_SUVD_CGC_CTRL__SMPA_MODE_MASK + | UVD_SUVD_CGC_CTRL__MPBE0_MODE_MASK + | UVD_SUVD_CGC_CTRL__MPBE1_MODE_MASK + | 0x00008000 + | 0x00010000 + | UVD_SUVD_CGC_CTRL__MPC1_MODE_MASK + | UVD_SUVD_CGC_CTRL__FBC_PCLK_MASK + | UVD_SUVD_CGC_CTRL__FBC_CCLK_MASK); + WREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_CTRL, data); +} + +static void vcn_v3_0_clock_gating_dpg_mode(struct amdgpu_device *adev, + uint8_t sram_sel, int inst_idx, uint8_t indirect) +{ + uint32_t reg_data = 0; + + /* enable sw clock gating control */ + if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) + reg_data = 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + reg_data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK | + UVD_CGC_CTRL__UDEC_CM_MODE_MASK | + UVD_CGC_CTRL__UDEC_IT_MODE_MASK | + UVD_CGC_CTRL__UDEC_DB_MODE_MASK | + UVD_CGC_CTRL__UDEC_MP_MODE_MASK | + UVD_CGC_CTRL__SYS_MODE_MASK | + UVD_CGC_CTRL__UDEC_MODE_MASK | + UVD_CGC_CTRL__MPEG2_MODE_MASK | + UVD_CGC_CTRL__REGS_MODE_MASK | + UVD_CGC_CTRL__RBC_MODE_MASK | + UVD_CGC_CTRL__LMI_MC_MODE_MASK | + UVD_CGC_CTRL__LMI_UMC_MODE_MASK | + UVD_CGC_CTRL__IDCT_MODE_MASK | + UVD_CGC_CTRL__MPRD_MODE_MASK | + UVD_CGC_CTRL__MPC_MODE_MASK | + UVD_CGC_CTRL__LBSI_MODE_MASK | + UVD_CGC_CTRL__LRBBM_MODE_MASK | + UVD_CGC_CTRL__WCB_MODE_MASK | + UVD_CGC_CTRL__VCPU_MODE_MASK | + UVD_CGC_CTRL__MMSCH_MODE_MASK); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect); + + /* turn off clock gating */ + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, mmUVD_CGC_GATE), 0, sram_sel, indirect); + + /* turn on SUVD clock gating */ + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect); + + /* turn on sw mode in UVD_SUVD_CGC_CTRL */ + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect); +} + +/** + * vcn_v3_0_enable_clock_gating - enable VCN clock gating + * + * @adev: amdgpu_device pointer + * @inst: instance number + * + * Enable clock gating for VCN block + */ +static void vcn_v3_0_enable_clock_gating(struct amdgpu_device *adev, int inst) +{ + uint32_t data; + + /* enable VCN CGC */ + data = RREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL); + if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) + data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL, data); + + data = RREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL); + data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK + | UVD_CGC_CTRL__UDEC_CM_MODE_MASK + | UVD_CGC_CTRL__UDEC_IT_MODE_MASK + | UVD_CGC_CTRL__UDEC_DB_MODE_MASK + | UVD_CGC_CTRL__UDEC_MP_MODE_MASK + | UVD_CGC_CTRL__SYS_MODE_MASK + | UVD_CGC_CTRL__UDEC_MODE_MASK + | UVD_CGC_CTRL__MPEG2_MODE_MASK + | UVD_CGC_CTRL__REGS_MODE_MASK + | UVD_CGC_CTRL__RBC_MODE_MASK + | UVD_CGC_CTRL__LMI_MC_MODE_MASK + | UVD_CGC_CTRL__LMI_UMC_MODE_MASK + | UVD_CGC_CTRL__IDCT_MODE_MASK + | UVD_CGC_CTRL__MPRD_MODE_MASK + | UVD_CGC_CTRL__MPC_MODE_MASK + | UVD_CGC_CTRL__LBSI_MODE_MASK + | UVD_CGC_CTRL__LRBBM_MODE_MASK + | UVD_CGC_CTRL__WCB_MODE_MASK + | UVD_CGC_CTRL__VCPU_MODE_MASK + | UVD_CGC_CTRL__MMSCH_MODE_MASK); + WREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL, data); + + data = RREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_CTRL); + data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK + | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK + | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK + | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK + | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK + | UVD_SUVD_CGC_CTRL__IME_MODE_MASK + | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK + | UVD_SUVD_CGC_CTRL__EFC_MODE_MASK + | UVD_SUVD_CGC_CTRL__SAOE_MODE_MASK + | UVD_SUVD_CGC_CTRL__SMPA_MODE_MASK + | UVD_SUVD_CGC_CTRL__MPBE0_MODE_MASK + | UVD_SUVD_CGC_CTRL__MPBE1_MODE_MASK + | 0x00008000 + | 0x00010000 + | UVD_SUVD_CGC_CTRL__MPC1_MODE_MASK + | UVD_SUVD_CGC_CTRL__FBC_PCLK_MASK + | UVD_SUVD_CGC_CTRL__FBC_CCLK_MASK); + WREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_CTRL, data); +} + +static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) +{ + struct amdgpu_ring *ring; + uint32_t rb_bufsz, tmp; + + /* disable register anti-hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 1, + ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + /* enable dynamic power gating mode */ + tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_POWER_STATUS); + tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK; + tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK; + WREG32_SOC15(VCN, inst_idx, mmUVD_POWER_STATUS, tmp); + + if (indirect) + adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t*)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr; + + /* enable clock gating */ + vcn_v3_0_clock_gating_dpg_mode(adev, 0, inst_idx, indirect); + + /* enable VCPU clock */ + tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); + tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; + tmp |= UVD_VCPU_CNTL__BLK_RST_MASK; + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, mmUVD_VCPU_CNTL), tmp, 0, indirect); + + /* disable master interupt */ + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, mmUVD_MASTINT_EN), 0, 0, indirect); + + /* setup mmUVD_LMI_CTRL */ + tmp = (0x8 | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__REQ_MODE_MASK | + UVD_LMI_CTRL__CRC_RESET_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | + (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | + 0x00100000L); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, mmUVD_LMI_CTRL), tmp, 0, indirect); + + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, mmUVD_MPC_CNTL), + 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect); + + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, mmUVD_MPC_SET_MUXA0), + ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect); + + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, mmUVD_MPC_SET_MUXB0), + ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect); + + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, mmUVD_MPC_SET_MUX), + ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | + (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect); + + vcn_v3_0_mc_resume_dpg_mode(adev, inst_idx, indirect); + + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, mmUVD_REG_XX_MASK), 0x10, 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect); + + /* enable LMI MC and UMC channels */ + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, mmUVD_LMI_CTRL2), 0, 0, indirect); + + /* unblock VCPU register access */ + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, mmUVD_RB_ARB_CTRL), 0, 0, indirect); + + tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); + tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, mmUVD_VCPU_CNTL), tmp, 0, indirect); + + /* enable master interrupt */ + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, mmUVD_MASTINT_EN), + UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); + + /* add nop to workaround PSP size check */ + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, mmUVD_VCPU_CNTL), tmp, 0, indirect); + + if (indirect) + psp_update_vcn_sram(adev, inst_idx, adev->vcn.inst[inst_idx].dpg_sram_gpu_addr, + (uint32_t)((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr - + (uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr)); + + ring = &adev->vcn.inst[inst_idx].ring_dec; + /* force RBC into idle state */ + rb_bufsz = order_base_2(ring->ring_size); + tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); + WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_CNTL, tmp); + + /* set the write pointer delay */ + WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR_CNTL, 0); + + /* set the wb address */ + WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR_ADDR, + (upper_32_bits(ring->gpu_addr) >> 2)); + + /* programm the RB_BASE for ring buffer */ + WREG32_SOC15(VCN, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + + /* Initialize the ring buffer's read and write pointers */ + WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR, 0); + + WREG32_SOC15(VCN, inst_idx, mmUVD_SCRATCH2, 0); + + ring->wptr = RREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR); + WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR, + lower_32_bits(ring->wptr)); + + return 0; +} + +static int vcn_v3_0_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + uint32_t rb_bufsz, tmp; + int i, j, k, r; + + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_uvd(adev, true); + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG){ + r = vcn_v3_0_start_dpg_mode(adev, i, adev->vcn.indirect_sram); + continue; + } + + /* disable VCN power gating */ + vcn_v3_0_disable_static_power_gating(adev, i); + + /* set VCN status busy */ + tmp = RREG32_SOC15(VCN, i, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY; + WREG32_SOC15(VCN, i, mmUVD_STATUS, tmp); + + /*SW clock gating */ + vcn_v3_0_disable_clock_gating(adev, i); + + /* enable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), + UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK); + + /* disable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN), 0, + ~UVD_MASTINT_EN__VCPU_EN_MASK); + + /* enable LMI MC and UMC channels */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_LMI_CTRL2), 0, + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + + tmp = RREG32_SOC15(VCN, i, mmUVD_SOFT_RESET); + tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; + tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; + WREG32_SOC15(VCN, i, mmUVD_SOFT_RESET, tmp); + + /* setup mmUVD_LMI_CTRL */ + tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL); + WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL, tmp | + UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); + + /* setup mmUVD_MPC_CNTL */ + tmp = RREG32_SOC15(VCN, i, mmUVD_MPC_CNTL); + tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK; + tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT; + WREG32_SOC15(VCN, i, mmUVD_MPC_CNTL, tmp); + + /* setup UVD_MPC_SET_MUXA0 */ + WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXA0, + ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT))); + + /* setup UVD_MPC_SET_MUXB0 */ + WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXB0, + ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT))); + + /* setup mmUVD_MPC_SET_MUX */ + WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUX, + ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | + (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT))); + + vcn_v3_0_mc_resume(adev, i); + + /* VCN global tiling registers */ + WREG32_SOC15(VCN, i, mmUVD_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + /* unblock VCPU register access */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL), 0, + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + + /* release VCPU reset to boot */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + for (j = 0; j < 10; ++j) { + uint32_t status; + + for (k = 0; k < 100; ++k) { + status = RREG32_SOC15(VCN, i, mmUVD_STATUS); + if (status & 2) + break; + mdelay(10); + } + r = 0; + if (status & 2) + break; + + DRM_ERROR("VCN[%d] decode not responding, trying to reset the VCPU!!!\n", i); + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + mdelay(10); + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + mdelay(10); + r = -1; + } + + if (r) { + DRM_ERROR("VCN[%d] decode not responding, giving up!!!\n", i); + return r; + } + + /* enable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN), + UVD_MASTINT_EN__VCPU_EN_MASK, + ~UVD_MASTINT_EN__VCPU_EN_MASK); + + /* clear the busy bit of VCN_STATUS */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS), 0, + ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); + + WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_VMID, 0); + + ring = &adev->vcn.inst[i].ring_dec; + /* force RBC into idle state */ + rb_bufsz = order_base_2(ring->ring_size); + tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); + WREG32_SOC15(VCN, i, mmUVD_RBC_RB_CNTL, tmp); + + /* programm the RB_BASE for ring buffer */ + WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + + /* Initialize the ring buffer's read and write pointers */ + WREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR, 0); + + ring->wptr = RREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR); + WREG32_SOC15(VCN, i, mmUVD_RBC_RB_WPTR, + lower_32_bits(ring->wptr)); + ring = &adev->vcn.inst[i].ring_enc[0]; + WREG32_SOC15(VCN, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(VCN, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, i, mmUVD_RB_SIZE, ring->ring_size / 4); + + ring = &adev->vcn.inst[i].ring_enc[1]; + WREG32_SOC15(VCN, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(VCN, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO2, ring->gpu_addr); + WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, i, mmUVD_RB_SIZE2, ring->ring_size / 4); + } + + return 0; +} + +static int vcn_v3_0_start_sriov(struct amdgpu_device *adev) +{ + int i, j; + struct amdgpu_ring *ring; + uint64_t cache_addr; + uint64_t rb_addr; + uint64_t ctx_addr; + uint32_t param, resp, expected; + uint32_t offset, cache_size; + uint32_t tmp, timeout; + uint32_t id; + + struct amdgpu_mm_table *table = &adev->virt.mm_table; + uint32_t *table_loc; + uint32_t table_size; + uint32_t size, size_dw; + + struct mmsch_v3_0_cmd_direct_write + direct_wt = { {0} }; + struct mmsch_v3_0_cmd_direct_read_modify_write + direct_rd_mod_wt = { {0} }; + struct mmsch_v3_0_cmd_direct_polling + direct_poll = { {0} }; + struct mmsch_v3_0_cmd_end end = { {0} }; + struct mmsch_v3_0_init_header header; + + direct_wt.cmd_header.command_type = + MMSCH_COMMAND__DIRECT_REG_WRITE; + direct_rd_mod_wt.cmd_header.command_type = + MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; + direct_poll.cmd_header.command_type = + MMSCH_COMMAND__DIRECT_REG_POLLING; + end.cmd_header.command_type = + MMSCH_COMMAND__END; + + header.version = MMSCH_VERSION; + header.total_size = sizeof(struct mmsch_v3_0_init_header) >> 2; + for (i = 0; i < AMDGPU_MAX_VCN_INSTANCES; i++) { + header.inst[i].init_status = 0; + header.inst[i].table_offset = 0; + header.inst[i].table_size = 0; + } + + table_loc = (uint32_t *)table->cpu_addr; + table_loc += header.total_size; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + table_size = 0; + + MMSCH_V3_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_STATUS), + ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY); + + cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + id = amdgpu_ucode_id_vcns[i]; + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + adev->firmware.ucode[id].tmr_mc_addr_lo); + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + adev->firmware.ucode[id].tmr_mc_addr_hi); + offset = 0; + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_VCPU_CACHE_OFFSET0), + 0); + } else { + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[i].gpu_addr)); + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[i].gpu_addr)); + offset = cache_size; + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_VCPU_CACHE_OFFSET0), + AMDGPU_UVD_FIRMWARE_OFFSET >> 3); + } + + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_VCPU_CACHE_SIZE0), + cache_size); + + cache_addr = adev->vcn.inst[i].gpu_addr + offset; + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), + lower_32_bits(cache_addr)); + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), + upper_32_bits(cache_addr)); + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_VCPU_CACHE_OFFSET1), + 0); + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_VCPU_CACHE_SIZE1), + AMDGPU_VCN_STACK_SIZE); + + cache_addr = adev->vcn.inst[i].gpu_addr + offset + + AMDGPU_VCN_STACK_SIZE; + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), + lower_32_bits(cache_addr)); + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), + upper_32_bits(cache_addr)); + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_VCPU_CACHE_OFFSET2), + 0); + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_VCPU_CACHE_SIZE2), + AMDGPU_VCN_CONTEXT_SIZE); + + for (j = 0; j < adev->vcn.num_enc_rings; ++j) { + ring = &adev->vcn.inst[i].ring_enc[j]; + ring->wptr = 0; + rb_addr = ring->gpu_addr; + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_RB_BASE_LO), + lower_32_bits(rb_addr)); + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_RB_BASE_HI), + upper_32_bits(rb_addr)); + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_RB_SIZE), + ring->ring_size / 4); + } + + ring = &adev->vcn.inst[i].ring_dec; + ring->wptr = 0; + rb_addr = ring->gpu_addr; + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_LMI_RBC_RB_64BIT_BAR_LOW), + lower_32_bits(rb_addr)); + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH), + upper_32_bits(rb_addr)); + /* force RBC into idle state */ + tmp = order_base_2(ring->ring_size); + tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, tmp); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_RBC_RB_CNTL), + tmp); + + /* add end packet */ + MMSCH_V3_0_INSERT_END(); + + /* refine header */ + header.inst[i].init_status = 1; + header.inst[i].table_offset = header.total_size; + header.inst[i].table_size = table_size; + header.total_size += table_size; + } + + /* Update init table header in memory */ + size = sizeof(struct mmsch_v3_0_init_header); + table_loc = (uint32_t *)table->cpu_addr; + memcpy((void *)table_loc, &header, size); + + /* message MMSCH (in VCN[0]) to initialize this client + * 1, write to mmsch_vf_ctx_addr_lo/hi register with GPU mc addr + * of memory descriptor location + */ + ctx_addr = table->gpu_addr; + WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr)); + WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr)); + + /* 2, update vmid of descriptor */ + tmp = RREG32_SOC15(VCN, 0, mmMMSCH_VF_VMID); + tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK; + /* use domain0 for MM scheduler */ + tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); + WREG32_SOC15(VCN, 0, mmMMSCH_VF_VMID, tmp); + + /* 3, notify mmsch about the size of this descriptor */ + size = header.total_size; + WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_SIZE, size); + + /* 4, set resp to zero */ + WREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_RESP, 0); + + /* 5, kick off the initialization and wait until + * MMSCH_VF_MAILBOX_RESP becomes non-zero + */ + param = 0x10000001; + WREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_HOST, param); + tmp = 0; + timeout = 1000; + resp = 0; + expected = param + 1; + while (resp != expected) { + resp = RREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_RESP); + if (resp == expected) + break; + + udelay(10); + tmp = tmp + 10; + if (tmp >= timeout) { + DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\ + " waiting for mmMMSCH_VF_MAILBOX_RESP "\ + "(expected=0x%08x, readback=0x%08x)\n", + tmp, expected, resp); + return -EBUSY; + } + } + + return 0; +} + +static int vcn_v3_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) +{ + uint32_t tmp; + + /* Wait for power status to be 1 */ + SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + + /* wait for read ptr to be equal to write ptr */ + tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR); + SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF); + + tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2); + SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF); + + tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF; + SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF); + + SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + + /* disable dynamic power gating mode */ + WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 0, + ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); + + return 0; +} + +static int vcn_v3_0_stop(struct amdgpu_device *adev) +{ + uint32_t tmp; + int i, r = 0; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + r = vcn_v3_0_stop_dpg_mode(adev, i); + continue; + } + + /* wait for vcn idle */ + r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7); + if (r) + return r; + + tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK | + UVD_LMI_STATUS__READ_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK; + r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp); + if (r) + return r; + + /* disable LMI UMC channel */ + tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2); + tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK; + WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2, tmp); + tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK| + UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK; + r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp); + if (r) + return r; + + /* block VCPU register access */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL), + UVD_RB_ARB_CTRL__VCPU_DIS_MASK, + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + + /* reset VCPU */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + /* disable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0, + ~(UVD_VCPU_CNTL__CLK_EN_MASK)); + + /* apply soft reset */ + tmp = RREG32_SOC15(VCN, i, mmUVD_SOFT_RESET); + tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; + WREG32_SOC15(VCN, i, mmUVD_SOFT_RESET, tmp); + tmp = RREG32_SOC15(VCN, i, mmUVD_SOFT_RESET); + tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; + WREG32_SOC15(VCN, i, mmUVD_SOFT_RESET, tmp); + + /* clear status */ + WREG32_SOC15(VCN, i, mmUVD_STATUS, 0); + + /* apply HW clock gating */ + vcn_v3_0_enable_clock_gating(adev, i); + + /* enable VCN power gating */ + vcn_v3_0_enable_static_power_gating(adev, i); + } + + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_uvd(adev, false); + + return 0; +} + +static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev, + int inst_idx, struct dpg_pause_state *new_state) +{ + struct amdgpu_ring *ring; + uint32_t reg_data = 0; + int ret_code; + + /* pause/unpause if state is changed */ + if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) { + DRM_DEBUG("dpg pause state changed %d -> %d", + adev->vcn.inst[inst_idx].pause_state.fw_based, new_state->fw_based); + reg_data = RREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE) & + (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); + + if (new_state->fw_based == VCN_DPG_STATE__PAUSE) { + ret_code = SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 0x1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + + if (!ret_code) { + /* pause DPG */ + reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE, reg_data); + + /* wait for ACK */ + SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_DPG_PAUSE, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); + + /* Restore */ + ring = &adev->vcn.inst[inst_idx].ring_enc[0]; + WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE, ring->ring_size / 4); + WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + + ring = &adev->vcn.inst[inst_idx].ring_enc[1]; + WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO2, ring->gpu_addr); + WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE2, ring->ring_size / 4); + WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + + WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR, + RREG32_SOC15(VCN, inst_idx, mmUVD_SCRATCH2) & 0x7FFFFFFF); + + SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, + UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + } + } else { + /* unpause dpg, no need to wait */ + reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE, reg_data); + } + adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based; + } + + return 0; +} + +/** + * vcn_v3_0_dec_ring_get_rptr - get read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware read pointer + */ +static uint64_t vcn_v3_0_dec_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + return RREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_RPTR); +} + +/** + * vcn_v3_0_dec_ring_get_wptr - get write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware write pointer + */ +static uint64_t vcn_v3_0_dec_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) + return adev->wb.wb[ring->wptr_offs]; + else + return RREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_WPTR); +} + +/** + * vcn_v3_0_dec_ring_set_wptr - set write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the write pointer to the hardware + */ +static void vcn_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + WREG32_SOC15(VCN, ring->me, mmUVD_SCRATCH2, + lower_32_bits(ring->wptr) | 0x80000000); + + if (ring->use_doorbell) { + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); + } +} + +static const struct amdgpu_ring_funcs vcn_v3_0_dec_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_DEC, + .align_mask = 0xf, + .vmhub = AMDGPU_MMHUB_0, + .get_rptr = vcn_v3_0_dec_ring_get_rptr, + .get_wptr = vcn_v3_0_dec_ring_get_wptr, + .set_wptr = vcn_v3_0_dec_ring_set_wptr, + .emit_frame_size = + SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + + 8 + /* vcn_v2_0_dec_ring_emit_vm_flush */ + 14 + 14 + /* vcn_v2_0_dec_ring_emit_fence x2 vm fence */ + 6, + .emit_ib_size = 8, /* vcn_v2_0_dec_ring_emit_ib */ + .emit_ib = vcn_v2_0_dec_ring_emit_ib, + .emit_fence = vcn_v2_0_dec_ring_emit_fence, + .emit_vm_flush = vcn_v2_0_dec_ring_emit_vm_flush, + .test_ring = vcn_v2_0_dec_ring_test_ring, + .test_ib = amdgpu_vcn_dec_ring_test_ib, + .insert_nop = vcn_v2_0_dec_ring_insert_nop, + .insert_start = vcn_v2_0_dec_ring_insert_start, + .insert_end = vcn_v2_0_dec_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_vcn_ring_begin_use, + .end_use = amdgpu_vcn_ring_end_use, + .emit_wreg = vcn_v2_0_dec_ring_emit_wreg, + .emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +/** + * vcn_v3_0_enc_ring_get_rptr - get enc read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware enc read pointer + */ +static uint64_t vcn_v3_0_enc_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) + return RREG32_SOC15(VCN, ring->me, mmUVD_RB_RPTR); + else + return RREG32_SOC15(VCN, ring->me, mmUVD_RB_RPTR2); +} + +/** + * vcn_v3_0_enc_ring_get_wptr - get enc write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware enc write pointer + */ +static uint64_t vcn_v3_0_enc_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) { + if (ring->use_doorbell) + return adev->wb.wb[ring->wptr_offs]; + else + return RREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR); + } else { + if (ring->use_doorbell) + return adev->wb.wb[ring->wptr_offs]; + else + return RREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR2); + } +} + +/** + * vcn_v3_0_enc_ring_set_wptr - set enc write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the enc write pointer to the hardware + */ +static void vcn_v3_0_enc_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) { + if (ring->use_doorbell) { + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + } + } else { + if (ring->use_doorbell) { + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + } + } +} + +static const struct amdgpu_ring_funcs vcn_v3_0_enc_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_ENC, + .align_mask = 0x3f, + .nop = VCN_ENC_CMD_NO_OP, + .vmhub = AMDGPU_MMHUB_0, + .get_rptr = vcn_v3_0_enc_ring_get_rptr, + .get_wptr = vcn_v3_0_enc_ring_get_wptr, + .set_wptr = vcn_v3_0_enc_ring_set_wptr, + .emit_frame_size = + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + + 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */ + 5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */ + 1, /* vcn_v2_0_enc_ring_insert_end */ + .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */ + .emit_ib = vcn_v2_0_enc_ring_emit_ib, + .emit_fence = vcn_v2_0_enc_ring_emit_fence, + .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush, + .test_ring = amdgpu_vcn_enc_ring_test_ring, + .test_ib = amdgpu_vcn_enc_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .insert_end = vcn_v2_0_enc_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_vcn_ring_begin_use, + .end_use = amdgpu_vcn_ring_end_use, + .emit_wreg = vcn_v2_0_enc_ring_emit_wreg, + .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +static void vcn_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + adev->vcn.inst[i].ring_dec.funcs = &vcn_v3_0_dec_ring_vm_funcs; + adev->vcn.inst[i].ring_dec.me = i; + DRM_INFO("VCN(%d) decode is enabled in VM mode\n", i); + } +} + +static void vcn_v3_0_set_enc_ring_funcs(struct amdgpu_device *adev) +{ + int i, j; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + for (j = 0; j < adev->vcn.num_enc_rings; ++j) { + adev->vcn.inst[i].ring_enc[j].funcs = &vcn_v3_0_enc_ring_vm_funcs; + adev->vcn.inst[i].ring_enc[j].me = i; + } + DRM_INFO("VCN(%d) encode is enabled in VM mode\n", i); + } +} + +static bool vcn_v3_0_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, ret = 1; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + ret &= (RREG32_SOC15(VCN, i, mmUVD_STATUS) == UVD_STATUS__IDLE); + } + + return ret; +} + +static int vcn_v3_0_wait_for_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, ret = 0; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + ret = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, + UVD_STATUS__IDLE); + if (ret) + return ret; + } + + return ret; +} + +static int vcn_v3_0_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + if (enable) { + if (RREG32_SOC15(VCN, i, mmUVD_STATUS) != UVD_STATUS__IDLE) + return -EBUSY; + vcn_v3_0_enable_clock_gating(adev, i); + } else { + vcn_v3_0_disable_clock_gating(adev, i); + } + } + + return 0; +} + +static int vcn_v3_0_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int ret; + + /* for SRIOV, guest should not control VCN Power-gating + * MMSCH FW should control Power-gating and clock-gating + * guest should avoid touching CGC and PG + */ + if (amdgpu_sriov_vf(adev)) { + adev->vcn.cur_state = AMD_PG_STATE_UNGATE; + return 0; + } + + if(state == adev->vcn.cur_state) + return 0; + + if (state == AMD_PG_STATE_GATE) + ret = vcn_v3_0_stop(adev); + else + ret = vcn_v3_0_start(adev); + + if(!ret) + adev->vcn.cur_state = state; + + return ret; +} + +static int vcn_v3_0_set_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + return 0; +} + +static int vcn_v3_0_process_interrupt(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + uint32_t ip_instance; + + switch (entry->client_id) { + case SOC15_IH_CLIENTID_VCN: + ip_instance = 0; + break; + case SOC15_IH_CLIENTID_VCN1: + ip_instance = 1; + break; + default: + DRM_ERROR("Unhandled client id: %d\n", entry->client_id); + return 0; + } + + DRM_DEBUG("IH: VCN TRAP\n"); + + switch (entry->src_id) { + case VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT: + amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_dec); + break; + case VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE: + amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]); + break; + case VCN_2_0__SRCID__UVD_ENC_LOW_LATENCY: + amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[1]); + break; + default: + DRM_ERROR("Unhandled interrupt: %d %d\n", + entry->src_id, entry->src_data[0]); + break; + } + + return 0; +} + +static const struct amdgpu_irq_src_funcs vcn_v3_0_irq_funcs = { + .set = vcn_v3_0_set_interrupt_state, + .process = vcn_v3_0_process_interrupt, +}; + +static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 1; + adev->vcn.inst[i].irq.funcs = &vcn_v3_0_irq_funcs; + } +} + +static const struct amd_ip_funcs vcn_v3_0_ip_funcs = { + .name = "vcn_v3_0", + .early_init = vcn_v3_0_early_init, + .late_init = NULL, + .sw_init = vcn_v3_0_sw_init, + .sw_fini = vcn_v3_0_sw_fini, + .hw_init = vcn_v3_0_hw_init, + .hw_fini = vcn_v3_0_hw_fini, + .suspend = vcn_v3_0_suspend, + .resume = vcn_v3_0_resume, + .is_idle = vcn_v3_0_is_idle, + .wait_for_idle = vcn_v3_0_wait_for_idle, + .check_soft_reset = NULL, + .pre_soft_reset = NULL, + .soft_reset = NULL, + .post_soft_reset = NULL, + .set_clockgating_state = vcn_v3_0_set_clockgating_state, + .set_powergating_state = vcn_v3_0_set_powergating_state, +}; + +const struct amdgpu_ip_block_version vcn_v3_0_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_VCN, + .major = 3, + .minor = 0, + .rev = 0, + .funcs = &vcn_v3_0_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.h b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.h new file mode 100644 index 000000000000..31683582d778 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.h @@ -0,0 +1,29 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __VCN_V3_0_H__ +#define __VCN_V3_0_H__ + +extern const struct amdgpu_ip_block_version vcn_v3_0_ip_block; + +#endif /* __VCN_V3_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index af8986a55354..f6f2ed0830b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -710,6 +710,14 @@ vi_asic_reset_method(struct amdgpu_device *adev) { bool baco_reset; + if (amdgpu_reset_method == AMD_RESET_METHOD_LEGACY || + amdgpu_reset_method == AMD_RESET_METHOD_BACO) + return amdgpu_reset_method; + + if (amdgpu_reset_method != -1) + dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n", + amdgpu_reset_method); + switch (adev->asic_type) { case CHIP_FIJI: case CHIP_TONGA: @@ -1705,11 +1713,13 @@ static const struct amdgpu_ip_block_version vi_common_ip_block = .funcs = &vi_common_ip_funcs, }; -int vi_set_ip_blocks(struct amdgpu_device *adev) +void vi_set_virt_ops(struct amdgpu_device *adev) { - if (amdgpu_sriov_vf(adev)) - adev->virt.ops = &xgpu_vi_virt_ops; + adev->virt.ops = &xgpu_vi_virt_ops; +} +int vi_set_ip_blocks(struct amdgpu_device *adev) +{ switch (adev->asic_type) { case CHIP_TOPAZ: /* topaz has no DCE, UVD, VCE */ diff --git a/drivers/gpu/drm/amd/amdgpu/vi.h b/drivers/gpu/drm/amd/amdgpu/vi.h index defb4aaf929a..9718f98f8533 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.h +++ b/drivers/gpu/drm/amd/amdgpu/vi.h @@ -28,6 +28,7 @@ void vi_srbm_select(struct amdgpu_device *adev, u32 me, u32 pipe, u32 queue, u32 vmid); +void vi_set_virt_ops(struct amdgpu_device *adev); int vi_set_ip_blocks(struct amdgpu_device *adev); void legacy_doorbell_index_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/vid.h b/drivers/gpu/drm/amd/amdgpu/vid.h index 7a01e6133798..80ce42aacc0c 100644 --- a/drivers/gpu/drm/amd/amdgpu/vid.h +++ b/drivers/gpu/drm/amd/amdgpu/vid.h @@ -67,8 +67,6 @@ #define HPD4_REGISTER_OFFSET (0x18b8 - 0x1898) #define HPD5_REGISTER_OFFSET (0x18c0 - 0x1898) -#define AMDGPU_NUM_OF_VMIDS 8 - #define PIPEID(x) ((x) << 0) #define MEID(x) ((x) << 2) #define VMID(x) ((x) << 4) |