diff options
Diffstat (limited to 'drivers/misc/habanalabs/goya/goya.c')
-rw-r--r-- | drivers/misc/habanalabs/goya/goya.c | 635 |
1 files changed, 449 insertions, 186 deletions
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 02d116b01a1a..75294ec65257 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -14,6 +14,8 @@ #include <linux/genalloc.h> #include <linux/hwmon.h> #include <linux/io-64-nonatomic-lo-hi.h> +#include <linux/iommu.h> +#include <linux/seq_file.h> /* * GOYA security scheme: @@ -89,6 +91,30 @@ #define GOYA_CB_POOL_CB_CNT 512 #define GOYA_CB_POOL_CB_SIZE 0x20000 /* 128KB */ +#define IS_QM_IDLE(engine, qm_glbl_sts0) \ + (((qm_glbl_sts0) & engine##_QM_IDLE_MASK) == engine##_QM_IDLE_MASK) +#define IS_DMA_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(DMA, qm_glbl_sts0) +#define IS_TPC_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(TPC, qm_glbl_sts0) +#define IS_MME_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(MME, qm_glbl_sts0) + +#define IS_CMDQ_IDLE(engine, cmdq_glbl_sts0) \ + (((cmdq_glbl_sts0) & engine##_CMDQ_IDLE_MASK) == \ + engine##_CMDQ_IDLE_MASK) +#define IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) \ + IS_CMDQ_IDLE(TPC, cmdq_glbl_sts0) +#define IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) \ + IS_CMDQ_IDLE(MME, cmdq_glbl_sts0) + +#define IS_DMA_IDLE(dma_core_sts0) \ + !((dma_core_sts0) & DMA_CH_0_STS0_DMA_BUSY_MASK) + +#define IS_TPC_IDLE(tpc_cfg_sts) \ + (((tpc_cfg_sts) & TPC_CFG_IDLE_MASK) == TPC_CFG_IDLE_MASK) + +#define IS_MME_IDLE(mme_arch_sts) \ + (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK) + + static const char goya_irq_name[GOYA_MSIX_ENTRIES][GOYA_MAX_STRING_LEN] = { "goya cq 0", "goya cq 1", "goya cq 2", "goya cq 3", "goya cq 4", "goya cpu eq" @@ -297,6 +323,11 @@ static u32 goya_all_events[] = { GOYA_ASYNC_EVENT_ID_DMA_BM_CH4 }; +static int goya_mmu_clear_pgt_range(struct hl_device *hdev); +static int goya_mmu_set_dram_default_page(struct hl_device *hdev); +static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev); +static void goya_mmu_prepare(struct hl_device *hdev, u32 asid); + void goya_get_fixed_properties(struct hl_device *hdev) { struct asic_fixed_properties *prop = &hdev->asic_prop; @@ -467,7 +498,7 @@ static int goya_early_init(struct hl_device *hdev) prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID); - rc = hl_pci_init(hdev, 39); + rc = hl_pci_init(hdev, 48); if (rc) return rc; @@ -539,9 +570,36 @@ int goya_late_init(struct hl_device *hdev) struct asic_fixed_properties *prop = &hdev->asic_prop; int rc; + goya_fetch_psoc_frequency(hdev); + + rc = goya_mmu_clear_pgt_range(hdev); + if (rc) { + dev_err(hdev->dev, + "Failed to clear MMU page tables range %d\n", rc); + return rc; + } + + rc = goya_mmu_set_dram_default_page(hdev); + if (rc) { + dev_err(hdev->dev, "Failed to set DRAM default page %d\n", rc); + return rc; + } + + rc = goya_mmu_add_mappings_for_device_cpu(hdev); + if (rc) + return rc; + + rc = goya_init_cpu_queues(hdev); + if (rc) + return rc; + + rc = goya_test_cpu_queue(hdev); + if (rc) + return rc; + rc = goya_armcp_info_get(hdev); if (rc) { - dev_err(hdev->dev, "Failed to get armcp info\n"); + dev_err(hdev->dev, "Failed to get armcp info %d\n", rc); return rc; } @@ -553,33 +611,15 @@ int goya_late_init(struct hl_device *hdev) rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS); if (rc) { - dev_err(hdev->dev, "Failed to enable PCI access from CPU\n"); + dev_err(hdev->dev, + "Failed to enable PCI access from CPU %d\n", rc); return rc; } WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GOYA_ASYNC_EVENT_ID_INTS_REGISTER); - goya_fetch_psoc_frequency(hdev); - - rc = goya_mmu_clear_pgt_range(hdev); - if (rc) { - dev_err(hdev->dev, "Failed to clear MMU page tables range\n"); - goto disable_pci_access; - } - - rc = goya_mmu_set_dram_default_page(hdev); - if (rc) { - dev_err(hdev->dev, "Failed to set DRAM default page\n"); - goto disable_pci_access; - } - return 0; - -disable_pci_access: - hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS); - - return rc; } /* @@ -655,7 +695,10 @@ static int goya_sw_init(struct hl_device *hdev) goto free_dma_pool; } - hdev->cpu_accessible_dma_pool = gen_pool_create(HL_CPU_PKT_SHIFT, -1); + dev_dbg(hdev->dev, "cpu accessible memory at bus address 0x%llx\n", + hdev->cpu_accessible_dma_address); + + hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1); if (!hdev->cpu_accessible_dma_pool) { dev_err(hdev->dev, "Failed to create CPU accessible DMA pool\n"); @@ -786,7 +829,6 @@ static void goya_init_dma_ch(struct hl_device *hdev, int dma_id) else sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1007; - WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO + reg_off, lower_32_bits(sob_addr)); WREG32(mmDMA_CH_0_WR_COMP_ADDR_HI + reg_off, upper_32_bits(sob_addr)); WREG32(mmDMA_CH_0_WR_COMP_WDATA + reg_off, 0x80000001); } @@ -973,9 +1015,9 @@ int goya_init_cpu_queues(struct hl_device *hdev) WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_3, upper_32_bits(eq->bus_address)); WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_8, - lower_32_bits(hdev->cpu_accessible_dma_address)); + lower_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR)); WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_9, - upper_32_bits(hdev->cpu_accessible_dma_address)); + upper_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR)); WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_5, HL_QUEUE_SIZE_IN_BYTES); WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_4, HL_EQ_SIZE_IN_BYTES); @@ -1001,7 +1043,7 @@ int goya_init_cpu_queues(struct hl_device *hdev) if (err) { dev_err(hdev->dev, - "Failed to communicate with ARM CPU (ArmCP timeout)\n"); + "Failed to setup communication with device CPU\n"); return -EIO; } @@ -2061,10 +2103,12 @@ static void goya_halt_engines(struct hl_device *hdev, bool hard_reset) goya_disable_external_queues(hdev); goya_disable_internal_queues(hdev); - if (hard_reset) + if (hard_reset) { goya_disable_msix(hdev); - else + goya_mmu_remove_device_cpu_mappings(hdev); + } else { goya_sync_irqs(hdev); + } } /* @@ -2277,14 +2321,14 @@ static int goya_init_cpu(struct hl_device *hdev, u32 cpu_timeout) goya_read_device_fw_version(hdev, FW_COMP_UBOOT); goya_read_device_fw_version(hdev, FW_COMP_PREBOOT); - if (status == CPU_BOOT_STATUS_SRAM_AVAIL) - goto out; - if (!hdev->fw_loading) { dev_info(hdev->dev, "Skip loading FW\n"); goto out; } + if (status == CPU_BOOT_STATUS_SRAM_AVAIL) + goto out; + rc = goya_push_linux_to_device(hdev); if (rc) return rc; @@ -2466,34 +2510,11 @@ static int goya_hw_init(struct hl_device *hdev) if (rc) goto disable_queues; - rc = goya_init_cpu_queues(hdev); - if (rc) { - dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n", - rc); - goto disable_msix; - } - - /* - * Check if we managed to set the DMA mask to more then 32 bits. If so, - * let's try to increase it again because in Goya we set the initial - * dma mask to less then 39 bits so that the allocation of the memory - * area for the device's cpu will be under 39 bits - */ - if (hdev->dma_mask > 32) { - rc = hl_pci_set_dma_mask(hdev, 48); - if (rc) - goto disable_pci_access; - } - /* Perform read from the device to flush all MSI-X configuration */ val = RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG); return 0; -disable_pci_access: - hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS); -disable_msix: - goya_disable_msix(hdev); disable_queues: goya_disable_internal_queues(hdev); goya_disable_external_queues(hdev); @@ -2629,7 +2650,6 @@ static int goya_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma, void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi) { u32 db_reg_offset, db_value; - bool invalid_queue = false; switch (hw_queue_id) { case GOYA_QUEUE_ID_DMA_0: @@ -2653,10 +2673,7 @@ void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi) break; case GOYA_QUEUE_ID_CPU_PQ: - if (hdev->cpu_queues_enable) - db_reg_offset = mmCPU_IF_PF_PQ_PI; - else - invalid_queue = true; + db_reg_offset = mmCPU_IF_PF_PQ_PI; break; case GOYA_QUEUE_ID_MME: @@ -2696,12 +2713,8 @@ void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi) break; default: - invalid_queue = true; - } - - if (invalid_queue) { /* Should never get here */ - dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n", + dev_err(hdev->dev, "H/W queue %d is invalid. Can't set pi\n", hw_queue_id); return; } @@ -2808,7 +2821,6 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job) dma_addr_t fence_dma_addr; struct hl_cb *cb; u32 tmp, timeout; - char buf[16] = {}; int rc; if (hdev->pldm) @@ -2816,10 +2828,9 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job) else timeout = HL_DEVICE_TIMEOUT_USEC; - if (!hdev->asic_funcs->is_device_idle(hdev, buf, sizeof(buf))) { + if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) { dev_err_ratelimited(hdev->dev, - "Can't send KMD job on QMAN0 because %s is busy\n", - buf); + "Can't send KMD job on QMAN0 because the device is not idle\n"); return -EBUSY; } @@ -2831,16 +2842,8 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job) return -ENOMEM; } - *fence_ptr = 0; - goya_qman0_set_security(hdev, true); - /* - * goya cs parser saves space for 2xpacket_msg_prot at end of CB. For - * synchronized kernel jobs we only need space for 1 packet_msg_prot - */ - job->job_cb_size -= sizeof(struct packet_msg_prot); - cb = job->patched_cb; fence_pkt = (struct packet_msg_prot *) (uintptr_t) (cb->kernel_address + @@ -2860,14 +2863,14 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job) goto free_fence_ptr; } - rc = hl_poll_timeout_memory(hdev, (u64) (uintptr_t) fence_ptr, timeout, - &tmp); + rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, + (tmp == GOYA_QMAN0_FENCE_VAL), 1000, timeout); hl_hw_queue_inc_ci_kernel(hdev, GOYA_QUEUE_ID_DMA_0); - if ((rc) || (tmp != GOYA_QMAN0_FENCE_VAL)) { - dev_err(hdev->dev, "QMAN0 Job hasn't finished in time\n"); - rc = -ETIMEDOUT; + if (rc == -ETIMEDOUT) { + dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp); + goto free_fence_ptr; } free_fence_ptr: @@ -2941,20 +2944,19 @@ int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id) goto free_pkt; } - rc = hl_poll_timeout_memory(hdev, (u64) (uintptr_t) fence_ptr, - GOYA_TEST_QUEUE_WAIT_USEC, &tmp); + rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val), + 1000, GOYA_TEST_QUEUE_WAIT_USEC); hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id); - if ((!rc) && (tmp == fence_val)) { - dev_info(hdev->dev, - "queue test on H/W queue %d succeeded\n", - hw_queue_id); - } else { + if (rc == -ETIMEDOUT) { dev_err(hdev->dev, "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n", hw_queue_id, (unsigned long long) fence_dma_addr, tmp); - rc = -EINVAL; + rc = -EIO; + } else { + dev_info(hdev->dev, "queue test on H/W queue %d succeeded\n", + hw_queue_id); } free_pkt: @@ -2990,12 +2992,6 @@ int goya_test_queues(struct hl_device *hdev) ret_val = -EINVAL; } - if (hdev->cpu_queues_enable) { - rc = goya_test_cpu_queue(hdev); - if (rc) - ret_val = -EINVAL; - } - return ret_val; } @@ -3028,7 +3024,13 @@ static void goya_dma_pool_free(struct hl_device *hdev, void *vaddr, void *goya_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle) { - return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle); + void *vaddr; + + vaddr = hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle); + *dma_handle = (*dma_handle) - hdev->cpu_accessible_dma_address + + VA_CPU_ACCESSIBLE_MEM_ADDR; + + return vaddr; } void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, @@ -3907,8 +3909,8 @@ int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser) return goya_parse_cb_no_mmu(hdev, parser); } -void goya_add_end_of_cb_packets(u64 kernel_address, u32 len, u64 cq_addr, - u32 cq_val, u32 msix_vec) +void goya_add_end_of_cb_packets(struct hl_device *hdev, u64 kernel_address, + u32 len, u64 cq_addr, u32 cq_val, u32 msix_vec) { struct packet_msg_prot *cq_pkt; u32 tmp; @@ -3939,6 +3941,11 @@ void goya_update_eq_ci(struct hl_device *hdev, u32 val) void goya_restore_phase_topology(struct hl_device *hdev) { + +} + +static void goya_clear_sm_regs(struct hl_device *hdev) +{ int i, num_of_sob_in_longs, num_of_mon_in_longs; num_of_sob_in_longs = @@ -3958,10 +3965,11 @@ void goya_restore_phase_topology(struct hl_device *hdev) } /* - * goya_debugfs_read32 - read a 32bit value from a given device address + * goya_debugfs_read32 - read a 32bit value from a given device or a host mapped + * address. * * @hdev: pointer to hl_device structure - * @addr: address in device + * @addr: device or host mapped address * @val: returned value * * In case of DDR address that is not mapped into the default aperture that @@ -4002,6 +4010,10 @@ static int goya_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val) } if (ddr_bar_addr == U64_MAX) rc = -EIO; + + } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) { + *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE); + } else { rc = -EFAULT; } @@ -4010,10 +4022,11 @@ static int goya_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val) } /* - * goya_debugfs_write32 - write a 32bit value to a given device address + * goya_debugfs_write32 - write a 32bit value to a given device or a host mapped + * address. * * @hdev: pointer to hl_device structure - * @addr: address in device + * @addr: device or host mapped address * @val: returned value * * In case of DDR address that is not mapped into the default aperture that @@ -4054,6 +4067,10 @@ static int goya_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val) } if (ddr_bar_addr == U64_MAX) rc = -EIO; + + } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) { + *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val; + } else { rc = -EFAULT; } @@ -4086,6 +4103,47 @@ static void goya_write_pte(struct hl_device *hdev, u64 addr, u64 val) static const char *_goya_get_event_desc(u16 event_type) { switch (event_type) { + case GOYA_ASYNC_EVENT_ID_PCIE_IF: + return "PCIe_if"; + case GOYA_ASYNC_EVENT_ID_TPC0_ECC: + case GOYA_ASYNC_EVENT_ID_TPC1_ECC: + case GOYA_ASYNC_EVENT_ID_TPC2_ECC: + case GOYA_ASYNC_EVENT_ID_TPC3_ECC: + case GOYA_ASYNC_EVENT_ID_TPC4_ECC: + case GOYA_ASYNC_EVENT_ID_TPC5_ECC: + case GOYA_ASYNC_EVENT_ID_TPC6_ECC: + case GOYA_ASYNC_EVENT_ID_TPC7_ECC: + return "TPC%d_ecc"; + case GOYA_ASYNC_EVENT_ID_MME_ECC: + return "MME_ecc"; + case GOYA_ASYNC_EVENT_ID_MME_ECC_EXT: + return "MME_ecc_ext"; + case GOYA_ASYNC_EVENT_ID_MMU_ECC: + return "MMU_ecc"; + case GOYA_ASYNC_EVENT_ID_DMA_MACRO: + return "DMA_macro"; + case GOYA_ASYNC_EVENT_ID_DMA_ECC: + return "DMA_ecc"; + case GOYA_ASYNC_EVENT_ID_CPU_IF_ECC: + return "CPU_if_ecc"; + case GOYA_ASYNC_EVENT_ID_PSOC_MEM: + return "PSOC_mem"; + case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT: + return "PSOC_coresight"; + case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29: + return "SRAM%d"; + case GOYA_ASYNC_EVENT_ID_GIC500: + return "GIC500"; + case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6: + return "PLL%d"; + case GOYA_ASYNC_EVENT_ID_AXI_ECC: + return "AXI_ecc"; + case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC: + return "L2_ram_ecc"; + case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET: + return "PSOC_gpio_05_sw_reset"; + case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT: + return "PSOC_gpio_10_vrhot_icrit"; case GOYA_ASYNC_EVENT_ID_PCIE_DEC: return "PCIe_dec"; case GOYA_ASYNC_EVENT_ID_TPC0_DEC: @@ -4128,6 +4186,17 @@ static const char *_goya_get_event_desc(u16 event_type) return "DMA%d_qm"; case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH: return "DMA%d_ch"; + case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU: + case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU: + case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU: + case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU: + case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU: + case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU: + case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU: + case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU: + return "TPC%d_bmon_spmu"; + case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4: + return "DMA_bm_ch%d"; default: return "N/A"; } @@ -4138,6 +4207,25 @@ static void goya_get_event_desc(u16 event_type, char *desc, size_t size) u8 index; switch (event_type) { + case GOYA_ASYNC_EVENT_ID_TPC0_ECC: + case GOYA_ASYNC_EVENT_ID_TPC1_ECC: + case GOYA_ASYNC_EVENT_ID_TPC2_ECC: + case GOYA_ASYNC_EVENT_ID_TPC3_ECC: + case GOYA_ASYNC_EVENT_ID_TPC4_ECC: + case GOYA_ASYNC_EVENT_ID_TPC5_ECC: + case GOYA_ASYNC_EVENT_ID_TPC6_ECC: + case GOYA_ASYNC_EVENT_ID_TPC7_ECC: + index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_ECC) / 3; + snprintf(desc, size, _goya_get_event_desc(event_type), index); + break; + case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29: + index = event_type - GOYA_ASYNC_EVENT_ID_SRAM0; + snprintf(desc, size, _goya_get_event_desc(event_type), index); + break; + case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6: + index = event_type - GOYA_ASYNC_EVENT_ID_PLL0; + snprintf(desc, size, _goya_get_event_desc(event_type), index); + break; case GOYA_ASYNC_EVENT_ID_TPC0_DEC: case GOYA_ASYNC_EVENT_ID_TPC1_DEC: case GOYA_ASYNC_EVENT_ID_TPC2_DEC: @@ -4176,6 +4264,21 @@ static void goya_get_event_desc(u16 event_type, char *desc, size_t size) index = event_type - GOYA_ASYNC_EVENT_ID_DMA0_CH; snprintf(desc, size, _goya_get_event_desc(event_type), index); break; + case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU: + case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU: + case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU: + case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU: + case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU: + case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU: + case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU: + case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU: + index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU) / 10; + snprintf(desc, size, _goya_get_event_desc(event_type), index); + break; + case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4: + index = event_type - GOYA_ASYNC_EVENT_ID_DMA_BM_CH0; + snprintf(desc, size, _goya_get_event_desc(event_type), index); + break; default: snprintf(desc, size, _goya_get_event_desc(event_type)); break; @@ -4226,7 +4329,8 @@ static void goya_print_mmu_error_info(struct hl_device *hdev) } } -static void goya_print_irq_info(struct hl_device *hdev, u16 event_type) +static void goya_print_irq_info(struct hl_device *hdev, u16 event_type, + bool razwi) { char desc[20] = ""; @@ -4234,8 +4338,10 @@ static void goya_print_irq_info(struct hl_device *hdev, u16 event_type) dev_err(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n", event_type, desc); - goya_print_razwi_info(hdev); - goya_print_mmu_error_info(hdev); + if (razwi) { + goya_print_razwi_info(hdev); + goya_print_mmu_error_info(hdev); + } } static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr, @@ -4339,19 +4445,12 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT: case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29: case GOYA_ASYNC_EVENT_ID_GIC500: - case GOYA_ASYNC_EVENT_ID_PLL0: - case GOYA_ASYNC_EVENT_ID_PLL1: - case GOYA_ASYNC_EVENT_ID_PLL3: - case GOYA_ASYNC_EVENT_ID_PLL4: - case GOYA_ASYNC_EVENT_ID_PLL5: - case GOYA_ASYNC_EVENT_ID_PLL6: + case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6: case GOYA_ASYNC_EVENT_ID_AXI_ECC: case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC: case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET: case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT: - dev_err(hdev->dev, - "Received H/W interrupt %d, reset the chip\n", - event_type); + goya_print_irq_info(hdev, event_type, false); hl_device_reset(hdev, true, false); break; @@ -4382,7 +4481,7 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) case GOYA_ASYNC_EVENT_ID_MME_CMDQ: case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM: case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH: - goya_print_irq_info(hdev, event_type); + goya_print_irq_info(hdev, event_type, true); goya_unmask_irq(hdev, event_type); break; @@ -4394,12 +4493,9 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU: case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU: case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU: - case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0: - case GOYA_ASYNC_EVENT_ID_DMA_BM_CH1: - case GOYA_ASYNC_EVENT_ID_DMA_BM_CH2: - case GOYA_ASYNC_EVENT_ID_DMA_BM_CH3: - case GOYA_ASYNC_EVENT_ID_DMA_BM_CH4: - dev_info(hdev->dev, "Received H/W interrupt %d\n", event_type); + case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4: + goya_print_irq_info(hdev, event_type, false); + goya_unmask_irq(hdev, event_type); break; default: @@ -4418,36 +4514,47 @@ void *goya_get_events_stat(struct hl_device *hdev, u32 *size) return goya->events_stat; } -static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u32 size, +static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val, bool is_dram) { struct packet_lin_dma *lin_dma_pkt; struct hl_cs_job *job; u32 cb_size, ctl; struct hl_cb *cb; - int rc; + int rc, lin_dma_pkts_cnt; - cb = hl_cb_kernel_create(hdev, PAGE_SIZE); + lin_dma_pkts_cnt = DIV_ROUND_UP_ULL(size, SZ_2G); + cb_size = lin_dma_pkts_cnt * sizeof(struct packet_lin_dma) + + sizeof(struct packet_msg_prot); + cb = hl_cb_kernel_create(hdev, cb_size); if (!cb) - return -EFAULT; + return -ENOMEM; lin_dma_pkt = (struct packet_lin_dma *) (uintptr_t) cb->kernel_address; - memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt)); - cb_size = sizeof(*lin_dma_pkt); - - ctl = ((PACKET_LIN_DMA << GOYA_PKT_CTL_OPCODE_SHIFT) | - (1 << GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT) | - (1 << GOYA_PKT_LIN_DMA_CTL_WO_SHIFT) | - (1 << GOYA_PKT_CTL_RB_SHIFT) | - (1 << GOYA_PKT_CTL_MB_SHIFT)); - ctl |= (is_dram ? DMA_HOST_TO_DRAM : DMA_HOST_TO_SRAM) << - GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT; - lin_dma_pkt->ctl = cpu_to_le32(ctl); + do { + memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt)); + + ctl = ((PACKET_LIN_DMA << GOYA_PKT_CTL_OPCODE_SHIFT) | + (1 << GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT) | + (1 << GOYA_PKT_LIN_DMA_CTL_WO_SHIFT) | + (1 << GOYA_PKT_CTL_RB_SHIFT) | + (1 << GOYA_PKT_CTL_MB_SHIFT)); + ctl |= (is_dram ? DMA_HOST_TO_DRAM : DMA_HOST_TO_SRAM) << + GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT; + lin_dma_pkt->ctl = cpu_to_le32(ctl); + + lin_dma_pkt->src_addr = cpu_to_le64(val); + lin_dma_pkt->dst_addr = cpu_to_le64(addr); + if (lin_dma_pkts_cnt > 1) + lin_dma_pkt->tsize = cpu_to_le32(SZ_2G); + else + lin_dma_pkt->tsize = cpu_to_le32(size); - lin_dma_pkt->src_addr = cpu_to_le64(val); - lin_dma_pkt->dst_addr = cpu_to_le64(addr); - lin_dma_pkt->tsize = cpu_to_le32(size); + size -= SZ_2G; + addr += SZ_2G; + lin_dma_pkt++; + } while (--lin_dma_pkts_cnt); job = hl_cs_allocate_job(hdev, true); if (!job) { @@ -4462,8 +4569,7 @@ static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u32 size, job->user_cb_size = cb_size; job->hw_queue_id = GOYA_QUEUE_ID_DMA_0; job->patched_cb = job->user_cb; - job->job_cb_size = job->user_cb_size + - sizeof(struct packet_msg_prot) * 2; + job->job_cb_size = job->user_cb_size; hl_debugfs_add_job(hdev, job); @@ -4485,10 +4591,12 @@ release_cb: int goya_context_switch(struct hl_device *hdev, u32 asid) { struct asic_fixed_properties *prop = &hdev->asic_prop; - u64 addr = prop->sram_base_address; + u64 addr = prop->sram_base_address, sob_addr; u32 size = hdev->pldm ? 0x10000 : prop->sram_size; u64 val = 0x7777777777777777ull; - int rc; + int rc, dma_id; + u32 channel_off = mmDMA_CH_1_WR_COMP_ADDR_LO - + mmDMA_CH_0_WR_COMP_ADDR_LO; rc = goya_memset_device_memory(hdev, addr, size, val, false); if (rc) { @@ -4496,13 +4604,27 @@ int goya_context_switch(struct hl_device *hdev, u32 asid) return rc; } + /* we need to reset registers that the user is allowed to change */ + sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1007; + WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO, lower_32_bits(sob_addr)); + + for (dma_id = 1 ; dma_id < NUMBER_OF_EXT_HW_QUEUES ; dma_id++) { + sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1000 + + (dma_id - 1) * 4; + WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO + channel_off * dma_id, + lower_32_bits(sob_addr)); + } + WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020); + goya_mmu_prepare(hdev, asid); + goya_clear_sm_regs(hdev); + return 0; } -int goya_mmu_clear_pgt_range(struct hl_device *hdev) +static int goya_mmu_clear_pgt_range(struct hl_device *hdev) { struct asic_fixed_properties *prop = &hdev->asic_prop; struct goya_device *goya = hdev->asic_specific; @@ -4516,7 +4638,7 @@ int goya_mmu_clear_pgt_range(struct hl_device *hdev) return goya_memset_device_memory(hdev, addr, size, 0, true); } -int goya_mmu_set_dram_default_page(struct hl_device *hdev) +static int goya_mmu_set_dram_default_page(struct hl_device *hdev) { struct goya_device *goya = hdev->asic_specific; u64 addr = hdev->asic_prop.mmu_dram_default_page_addr; @@ -4529,7 +4651,123 @@ int goya_mmu_set_dram_default_page(struct hl_device *hdev) return goya_memset_device_memory(hdev, addr, size, val, true); } -void goya_mmu_prepare(struct hl_device *hdev, u32 asid) +static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct goya_device *goya = hdev->asic_specific; + s64 off, cpu_off; + int rc; + + if (!(goya->hw_cap_initialized & HW_CAP_MMU)) + return 0; + + for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) { + rc = hl_mmu_map(hdev->kernel_ctx, prop->dram_base_address + off, + prop->dram_base_address + off, PAGE_SIZE_2MB); + if (rc) { + dev_err(hdev->dev, "Map failed for address 0x%llx\n", + prop->dram_base_address + off); + goto unmap; + } + } + + if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) { + rc = hl_mmu_map(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR, + hdev->cpu_accessible_dma_address, PAGE_SIZE_2MB); + + if (rc) { + dev_err(hdev->dev, + "Map failed for CPU accessible memory\n"); + off -= PAGE_SIZE_2MB; + goto unmap; + } + } else { + for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB) { + rc = hl_mmu_map(hdev->kernel_ctx, + VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off, + hdev->cpu_accessible_dma_address + cpu_off, + PAGE_SIZE_4KB); + if (rc) { + dev_err(hdev->dev, + "Map failed for CPU accessible memory\n"); + cpu_off -= PAGE_SIZE_4KB; + goto unmap_cpu; + } + } + } + + goya_mmu_prepare_reg(hdev, mmCPU_IF_ARUSER_OVR, HL_KERNEL_ASID_ID); + goya_mmu_prepare_reg(hdev, mmCPU_IF_AWUSER_OVR, HL_KERNEL_ASID_ID); + WREG32(mmCPU_IF_ARUSER_OVR_EN, 0x7FF); + WREG32(mmCPU_IF_AWUSER_OVR_EN, 0x7FF); + + /* Make sure configuration is flushed to device */ + RREG32(mmCPU_IF_AWUSER_OVR_EN); + + goya->device_cpu_mmu_mappings_done = true; + + return 0; + +unmap_cpu: + for (; cpu_off >= 0 ; cpu_off -= PAGE_SIZE_4KB) + if (hl_mmu_unmap(hdev->kernel_ctx, + VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off, + PAGE_SIZE_4KB)) + dev_warn_ratelimited(hdev->dev, + "failed to unmap address 0x%llx\n", + VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off); +unmap: + for (; off >= 0 ; off -= PAGE_SIZE_2MB) + if (hl_mmu_unmap(hdev->kernel_ctx, + prop->dram_base_address + off, PAGE_SIZE_2MB)) + dev_warn_ratelimited(hdev->dev, + "failed to unmap address 0x%llx\n", + prop->dram_base_address + off); + + return rc; +} + +void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct goya_device *goya = hdev->asic_specific; + u32 off, cpu_off; + + if (!(goya->hw_cap_initialized & HW_CAP_MMU)) + return; + + if (!goya->device_cpu_mmu_mappings_done) + return; + + WREG32(mmCPU_IF_ARUSER_OVR_EN, 0); + WREG32(mmCPU_IF_AWUSER_OVR_EN, 0); + + if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) { + if (hl_mmu_unmap(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR, + PAGE_SIZE_2MB)) + dev_warn(hdev->dev, + "Failed to unmap CPU accessible memory\n"); + } else { + for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB) + if (hl_mmu_unmap(hdev->kernel_ctx, + VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off, + PAGE_SIZE_4KB)) + dev_warn_ratelimited(hdev->dev, + "failed to unmap address 0x%llx\n", + VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off); + } + + for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) + if (hl_mmu_unmap(hdev->kernel_ctx, + prop->dram_base_address + off, PAGE_SIZE_2MB)) + dev_warn_ratelimited(hdev->dev, + "Failed to unmap address 0x%llx\n", + prop->dram_base_address + off); + + goya->device_cpu_mmu_mappings_done = false; +} + +static void goya_mmu_prepare(struct hl_device *hdev, u32 asid) { struct goya_device *goya = hdev->asic_specific; int i; @@ -4676,57 +4914,82 @@ int goya_armcp_info_get(struct hl_device *hdev) return 0; } -static bool goya_is_device_idle(struct hl_device *hdev, char *buf, size_t size) +static bool goya_is_device_idle(struct hl_device *hdev, u32 *mask, + struct seq_file *s) { - u64 offset, dma_qm_reg, tpc_qm_reg, tpc_cmdq_reg, tpc_cfg_reg; + const char *fmt = "%-5d%-9s%#-14x%#-16x%#x\n"; + const char *dma_fmt = "%-5d%-9s%#-14x%#x\n"; + u32 qm_glbl_sts0, cmdq_glbl_sts0, dma_core_sts0, tpc_cfg_sts, + mme_arch_sts; + bool is_idle = true, is_eng_idle; + u64 offset; int i; + if (s) + seq_puts(s, "\nDMA is_idle QM_GLBL_STS0 DMA_CORE_STS0\n" + "--- ------- ------------ -------------\n"); + offset = mmDMA_QM_1_GLBL_STS0 - mmDMA_QM_0_GLBL_STS0; for (i = 0 ; i < DMA_MAX_NUM ; i++) { - dma_qm_reg = mmDMA_QM_0_GLBL_STS0 + i * offset; + qm_glbl_sts0 = RREG32(mmDMA_QM_0_GLBL_STS0 + i * offset); + dma_core_sts0 = RREG32(mmDMA_CH_0_STS0 + i * offset); + is_eng_idle = IS_DMA_QM_IDLE(qm_glbl_sts0) && + IS_DMA_IDLE(dma_core_sts0); + is_idle &= is_eng_idle; - if ((RREG32(dma_qm_reg) & DMA_QM_IDLE_MASK) != - DMA_QM_IDLE_MASK) - return HL_ENG_BUSY(buf, size, "DMA%d_QM", i); + if (mask) + *mask |= !is_eng_idle << (GOYA_ENGINE_ID_DMA_0 + i); + if (s) + seq_printf(s, dma_fmt, i, is_eng_idle ? "Y" : "N", + qm_glbl_sts0, dma_core_sts0); } + if (s) + seq_puts(s, + "\nTPC is_idle QM_GLBL_STS0 CMDQ_GLBL_STS0 CFG_STATUS\n" + "--- ------- ------------ -------------- ----------\n"); + offset = mmTPC1_QM_GLBL_STS0 - mmTPC0_QM_GLBL_STS0; for (i = 0 ; i < TPC_MAX_NUM ; i++) { - tpc_qm_reg = mmTPC0_QM_GLBL_STS0 + i * offset; - tpc_cmdq_reg = mmTPC0_CMDQ_GLBL_STS0 + i * offset; - tpc_cfg_reg = mmTPC0_CFG_STATUS + i * offset; - - if ((RREG32(tpc_qm_reg) & TPC_QM_IDLE_MASK) != - TPC_QM_IDLE_MASK) - return HL_ENG_BUSY(buf, size, "TPC%d_QM", i); - - if ((RREG32(tpc_cmdq_reg) & TPC_CMDQ_IDLE_MASK) != - TPC_CMDQ_IDLE_MASK) - return HL_ENG_BUSY(buf, size, "TPC%d_CMDQ", i); - - if ((RREG32(tpc_cfg_reg) & TPC_CFG_IDLE_MASK) != - TPC_CFG_IDLE_MASK) - return HL_ENG_BUSY(buf, size, "TPC%d_CFG", i); - } - - if ((RREG32(mmMME_QM_GLBL_STS0) & MME_QM_IDLE_MASK) != - MME_QM_IDLE_MASK) - return HL_ENG_BUSY(buf, size, "MME_QM"); - - if ((RREG32(mmMME_CMDQ_GLBL_STS0) & MME_CMDQ_IDLE_MASK) != - MME_CMDQ_IDLE_MASK) - return HL_ENG_BUSY(buf, size, "MME_CMDQ"); - - if ((RREG32(mmMME_ARCH_STATUS) & MME_ARCH_IDLE_MASK) != - MME_ARCH_IDLE_MASK) - return HL_ENG_BUSY(buf, size, "MME_ARCH"); - - if (RREG32(mmMME_SHADOW_0_STATUS) & MME_SHADOW_IDLE_MASK) - return HL_ENG_BUSY(buf, size, "MME"); - - return true; + qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + i * offset); + cmdq_glbl_sts0 = RREG32(mmTPC0_CMDQ_GLBL_STS0 + i * offset); + tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + i * offset); + is_eng_idle = IS_TPC_QM_IDLE(qm_glbl_sts0) && + IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) && + IS_TPC_IDLE(tpc_cfg_sts); + is_idle &= is_eng_idle; + + if (mask) + *mask |= !is_eng_idle << (GOYA_ENGINE_ID_TPC_0 + i); + if (s) + seq_printf(s, fmt, i, is_eng_idle ? "Y" : "N", + qm_glbl_sts0, cmdq_glbl_sts0, tpc_cfg_sts); + } + + if (s) + seq_puts(s, + "\nMME is_idle QM_GLBL_STS0 CMDQ_GLBL_STS0 ARCH_STATUS\n" + "--- ------- ------------ -------------- -----------\n"); + + qm_glbl_sts0 = RREG32(mmMME_QM_GLBL_STS0); + cmdq_glbl_sts0 = RREG32(mmMME_CMDQ_GLBL_STS0); + mme_arch_sts = RREG32(mmMME_ARCH_STATUS); + is_eng_idle = IS_MME_QM_IDLE(qm_glbl_sts0) && + IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) && + IS_MME_IDLE(mme_arch_sts); + is_idle &= is_eng_idle; + + if (mask) + *mask |= !is_eng_idle << GOYA_ENGINE_ID_MME_0; + if (s) { + seq_printf(s, fmt, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0, + cmdq_glbl_sts0, mme_arch_sts); + seq_puts(s, "\n"); + } + + return is_idle; } static void goya_hw_queues_lock(struct hl_device *hdev) |