diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 149 |
1 files changed, 115 insertions, 34 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index ba2b7ac0c02d..ec90c62078d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -45,10 +45,10 @@ #include "nbio_v2_3.h" #include "gfxhub_v2_0.h" +#include "gfxhub_v2_1.h" #include "mmhub_v2_0.h" #include "athub_v2_0.h" -/* XXX Move this macro to navi10 header file, which is like vid.h for VI.*/ -#define AMDGPU_NUM_OF_VMIDS 8 +#include "athub_v2_1.h" #if 0 static const struct soc15_reg_golden golden_settings_navi10_hdp[] = @@ -86,7 +86,7 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev, /* MM HUB */ hub = &adev->vmhub[AMDGPU_MMHUB_0]; for (i = 0; i < 16; i++) { - reg = hub->vm_context0_cntl + i; + reg = hub->vm_context0_cntl + hub->ctx_distance * i; tmp = RREG32(reg); tmp &= ~bits[AMDGPU_MMHUB_0]; WREG32(reg, tmp); @@ -95,7 +95,7 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev, /* GFX HUB */ hub = &adev->vmhub[AMDGPU_GFXHUB_0]; for (i = 0; i < 16; i++) { - reg = hub->vm_context0_cntl + i; + reg = hub->vm_context0_cntl + hub->ctx_distance * i; tmp = RREG32(reg); tmp &= ~bits[AMDGPU_GFXHUB_0]; WREG32(reg, tmp); @@ -105,7 +105,7 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev, /* MM HUB */ hub = &adev->vmhub[AMDGPU_MMHUB_0]; for (i = 0; i < 16; i++) { - reg = hub->vm_context0_cntl + i; + reg = hub->vm_context0_cntl + hub->ctx_distance * i; tmp = RREG32(reg); tmp |= bits[AMDGPU_MMHUB_0]; WREG32(reg, tmp); @@ -114,7 +114,7 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev, /* GFX HUB */ hub = &adev->vmhub[AMDGPU_GFXHUB_0]; for (i = 0; i < 16; i++) { - reg = hub->vm_context0_cntl + i; + reg = hub->vm_context0_cntl + hub->ctx_distance * i; tmp = RREG32(reg); tmp |= bits[AMDGPU_GFXHUB_0]; WREG32(reg, tmp); @@ -283,7 +283,8 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, if (use_semaphore) { for (i = 0; i < adev->usec_timeout; i++) { /* a read return value of 1 means semaphore acuqire */ - tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng); + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + + hub->eng_distance * eng); if (tmp & 0x1) break; udelay(1); @@ -293,18 +294,19 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n"); } - WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req); + WREG32_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req); /* * Issue a dummy read to wait for the ACK register to be cleared * to avoid a false ACK due to the new fast GRBM interface. */ if (vmhub == AMDGPU_GFXHUB_0) - RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng); + RREG32_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng); /* Wait for ACK with a delay.*/ for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng); + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + + hub->eng_distance * eng); tmp &= 1 << vmid; if (tmp) break; @@ -318,7 +320,8 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, * add semaphore release after invalidation, * write with 0 means semaphore release */ - WREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng, 0); + WREG32_NO_KIQ(hub->vm_inv_eng0_sem + + hub->eng_distance * eng, 0); spin_unlock(&adev->gmc.invalidate_lock); @@ -348,6 +351,24 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, /* flush hdp cache */ adev->nbio.funcs->hdp_flush(adev, NULL); + /* For SRIOV run time, driver shouldn't access the register through MMIO + * Directly use kiq to do the vm invalidation instead + */ + if (adev->gfx.kiq.ring.sched.ready && + (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) && + !adev->in_gpu_reset) { + + struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; + const unsigned eng = 17; + u32 inv_req = gmc_v10_0_get_invalidate_req(vmid, flush_type); + u32 req = hub->vm_inv_eng0_req + hub->eng_distance * eng; + u32 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; + + amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, + 1 << vmid); + return; + } + mutex_lock(&adev->mman.gtt_window_lock); if (vmhub == AMDGPU_MMHUB_0) { @@ -484,16 +505,21 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, if (use_semaphore) /* a read return value of 1 means semaphore acuqire */ amdgpu_ring_emit_reg_wait(ring, - hub->vm_inv_eng0_sem + eng, 0x1, 0x1); + hub->vm_inv_eng0_sem + + hub->eng_distance * eng, 0x1, 0x1); - amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid), + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + + (hub->ctx_addr_distance * vmid), lower_32_bits(pd_addr)); - amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid), + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + + (hub->ctx_addr_distance * vmid), upper_32_bits(pd_addr)); - amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng, - hub->vm_inv_eng0_ack + eng, + amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + + hub->eng_distance * eng, + hub->vm_inv_eng0_ack + + hub->eng_distance * eng, req, 1 << vmid); /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ @@ -502,7 +528,8 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, * add semaphore release after invalidation, * write with 0 means semaphore release */ - amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + eng, 0); + amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + + hub->eng_distance * eng, 0); return pd_addr; } @@ -666,13 +693,28 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev, { u64 base = 0; - base = gfxhub_v2_0_get_fb_location(adev); + if (adev->asic_type == CHIP_SIENNA_CICHLID || + adev->asic_type == CHIP_NAVY_FLOUNDER) + base = gfxhub_v2_1_get_fb_location(adev); + else + base = gfxhub_v2_0_get_fb_location(adev); + + /* add the xgmi offset of the physical node */ + base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; amdgpu_gmc_vram_location(adev, &adev->gmc, base); amdgpu_gmc_gart_location(adev, mc); /* base offset of vram pages */ - adev->vm_manager.vram_base_offset = gfxhub_v2_0_get_mc_fb_offset(adev); + if (adev->asic_type == CHIP_SIENNA_CICHLID || + adev->asic_type == CHIP_NAVY_FLOUNDER) + adev->vm_manager.vram_base_offset = gfxhub_v2_1_get_mc_fb_offset(adev); + else + adev->vm_manager.vram_base_offset = gfxhub_v2_0_get_mc_fb_offset(adev); + + /* add the xgmi offset of the physical node */ + adev->vm_manager.vram_base_offset += + adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; } /** @@ -712,6 +754,8 @@ static int gmc_v10_0_mc_init(struct amdgpu_device *adev) case CHIP_NAVI10: case CHIP_NAVI14: case CHIP_NAVI12: + case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: default: adev->gmc.gart_size = 512ULL << 20; break; @@ -780,24 +824,34 @@ static int gmc_v10_0_sw_init(void *handle) int r, vram_width = 0, vram_type = 0, vram_vendor = 0; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - gfxhub_v2_0_init(adev); + if (adev->asic_type == CHIP_SIENNA_CICHLID || + adev->asic_type == CHIP_NAVY_FLOUNDER) + gfxhub_v2_1_init(adev); + else + gfxhub_v2_0_init(adev); + mmhub_v2_0_init(adev); spin_lock_init(&adev->gmc.invalidate_lock); - r = amdgpu_atomfirmware_get_vram_info(adev, - &vram_width, &vram_type, &vram_vendor); - if (!amdgpu_emu_mode) - adev->gmc.vram_width = vram_width; - else + if (adev->asic_type == CHIP_SIENNA_CICHLID && amdgpu_emu_mode == 1) { + adev->gmc.vram_type = AMDGPU_VRAM_TYPE_GDDR6; adev->gmc.vram_width = 1 * 128; /* numchan * chansize */ + } else { + r = amdgpu_atomfirmware_get_vram_info(adev, + &vram_width, &vram_type, &vram_vendor); + adev->gmc.vram_width = vram_width; + + adev->gmc.vram_type = vram_type; + adev->gmc.vram_vendor = vram_vendor; + } - adev->gmc.vram_type = vram_type; - adev->gmc.vram_vendor = vram_vendor; switch (adev->asic_type) { case CHIP_NAVI10: case CHIP_NAVI14: case CHIP_NAVI12: + case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: adev->num_vmhubs = 2; /* * To fulfill 4-level page support, @@ -836,6 +890,12 @@ static int gmc_v10_0_sw_init(void *handle) return r; } + if (adev->gmc.xgmi.supported) { + r = gfxhub_v2_1_get_xgmi_info(adev); + if (r) + return r; + } + r = gmc_v10_0_mc_init(adev); if (r) return r; @@ -857,8 +917,7 @@ static int gmc_v10_0_sw_init(void *handle) * amdgpu graphics/compute will use VMIDs 1-7 * amdkfd will use VMIDs 8-15 */ - adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS; - adev->vm_manager.id_mgr[AMDGPU_MMHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.first_kfd_vmid = 8; amdgpu_vm_manager_init(adev); @@ -896,6 +955,8 @@ static void gmc_v10_0_init_golden_registers(struct amdgpu_device *adev) case CHIP_NAVI10: case CHIP_NAVI14: case CHIP_NAVI12: + case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: break; default: break; @@ -922,7 +983,11 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev) if (r) return r; - r = gfxhub_v2_0_gart_enable(adev); + if (adev->asic_type == CHIP_SIENNA_CICHLID || + adev->asic_type == CHIP_NAVY_FLOUNDER) + r = gfxhub_v2_1_gart_enable(adev); + else + r = gfxhub_v2_0_gart_enable(adev); if (r) return r; @@ -943,7 +1008,11 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev) value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? false : true; - gfxhub_v2_0_set_fault_enable_default(adev, value); + if (adev->asic_type == CHIP_SIENNA_CICHLID || + adev->asic_type == CHIP_NAVY_FLOUNDER) + gfxhub_v2_1_set_fault_enable_default(adev, value); + else + gfxhub_v2_0_set_fault_enable_default(adev, value); mmhub_v2_0_set_fault_enable_default(adev, value); gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB_0, 0); gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB_0, 0); @@ -981,7 +1050,11 @@ static int gmc_v10_0_hw_init(void *handle) */ static void gmc_v10_0_gart_disable(struct amdgpu_device *adev) { - gfxhub_v2_0_gart_disable(adev); + if (adev->asic_type == CHIP_SIENNA_CICHLID || + adev->asic_type == CHIP_NAVY_FLOUNDER) + gfxhub_v2_1_gart_disable(adev); + else + gfxhub_v2_0_gart_disable(adev); mmhub_v2_0_gart_disable(adev); amdgpu_gart_table_vram_unpin(adev); } @@ -1052,7 +1125,11 @@ static int gmc_v10_0_set_clockgating_state(void *handle, if (r) return r; - return athub_v2_0_set_clockgating(adev, state); + if (adev->asic_type == CHIP_SIENNA_CICHLID || + adev->asic_type == CHIP_NAVY_FLOUNDER) + return athub_v2_1_set_clockgating(adev, state); + else + return athub_v2_0_set_clockgating(adev, state); } static void gmc_v10_0_get_clockgating_state(void *handle, u32 *flags) @@ -1061,7 +1138,11 @@ static void gmc_v10_0_get_clockgating_state(void *handle, u32 *flags) mmhub_v2_0_get_clockgating(adev, flags); - athub_v2_0_get_clockgating(adev, flags); + if (adev->asic_type == CHIP_SIENNA_CICHLID || + adev->asic_type == CHIP_NAVY_FLOUNDER) + athub_v2_1_get_clockgating(adev, flags); + else + athub_v2_0_get_clockgating(adev, flags); } static int gmc_v10_0_set_powergating_state(void *handle, |