diff options
author | Maxime Ripard <maxime@cerno.tech> | 2021-05-11 13:35:52 +0200 |
---|---|---|
committer | Maxime Ripard <maxime@cerno.tech> | 2021-05-11 13:35:52 +0200 |
commit | c55b44c9386f3ee1b08752638559f19deaf6040d (patch) | |
tree | c843a21f45180387fcd9eb2625cc9d1f166a3156 /drivers/misc/habanalabs/gaudi/gaudi.c | |
parent | MAINTAINERS: Update my e-mail (diff) | |
parent | Linux 5.13-rc1 (diff) | |
download | linux-c55b44c9386f3ee1b08752638559f19deaf6040d.tar.xz linux-c55b44c9386f3ee1b08752638559f19deaf6040d.zip |
Merge drm/drm-fixes into drm-misc-fixes
Start this new release drm-misc-fixes branch
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Diffstat (limited to 'drivers/misc/habanalabs/gaudi/gaudi.c')
-rw-r--r-- | drivers/misc/habanalabs/gaudi/gaudi.c | 357 |
1 files changed, 314 insertions, 43 deletions
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 9152242778f5..b751652f80a8 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -105,6 +105,36 @@ #define GAUDI_PLL_MAX 10 +/* + * this enum kept here for compatibility with old FW (in which each asic has + * unique PLL numbering + */ +enum gaudi_pll_index { + GAUDI_CPU_PLL = 0, + GAUDI_PCI_PLL, + GAUDI_SRAM_PLL, + GAUDI_HBM_PLL, + GAUDI_NIC_PLL, + GAUDI_DMA_PLL, + GAUDI_MESH_PLL, + GAUDI_MME_PLL, + GAUDI_TPC_PLL, + GAUDI_IF_PLL, +}; + +static enum pll_index gaudi_pll_map[PLL_MAX] = { + [CPU_PLL] = GAUDI_CPU_PLL, + [PCI_PLL] = GAUDI_PCI_PLL, + [SRAM_PLL] = GAUDI_SRAM_PLL, + [HBM_PLL] = GAUDI_HBM_PLL, + [NIC_PLL] = GAUDI_NIC_PLL, + [DMA_PLL] = GAUDI_DMA_PLL, + [MESH_PLL] = GAUDI_MESH_PLL, + [MME_PLL] = GAUDI_MME_PLL, + [TPC_PLL] = GAUDI_TPC_PLL, + [IF_PLL] = GAUDI_IF_PLL, +}; + static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = { "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3", "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3", @@ -396,6 +426,19 @@ get_collective_mode(struct hl_device *hdev, u32 queue_id) return HL_COLLECTIVE_NOT_SUPPORTED; } +static inline void set_default_power_values(struct hl_device *hdev) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + + if (hdev->card_type == cpucp_card_type_pmc) { + prop->max_power_default = MAX_POWER_DEFAULT_PMC; + prop->dc_power_default = DC_POWER_DEFAULT_PMC; + } else { + prop->max_power_default = MAX_POWER_DEFAULT_PCI; + prop->dc_power_default = DC_POWER_DEFAULT_PCI; + } +} + static int gaudi_get_fixed_properties(struct hl_device *hdev) { struct asic_fixed_properties *prop = &hdev->asic_prop; @@ -507,7 +550,7 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev) prop->num_of_events = GAUDI_EVENT_SIZE; prop->tpc_enabled_mask = TPC_ENABLED_MASK; - prop->max_power_default = MAX_POWER_DEFAULT_PCI; + set_default_power_values(hdev); prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT; prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE; @@ -532,8 +575,6 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev) for (i = 0 ; i < HL_MAX_DCORES ; i++) prop->first_available_cq[i] = USHRT_MAX; - /* disable fw security for now, set it in a later stage */ - prop->fw_security_disabled = true; prop->fw_security_status_valid = false; prop->hard_reset_done_by_fw = false; @@ -588,6 +629,11 @@ static int gaudi_init_iatu(struct hl_device *hdev) struct hl_outbound_pci_region outbound_region; int rc; + if (hdev->asic_prop.iatu_done_by_fw) { + hdev->asic_funcs->set_dma_mask_from_fw(hdev); + return 0; + } + /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */ inbound_region.mode = PCI_BAR_MATCH_MODE; inbound_region.bar = SRAM_BAR_ID; @@ -632,6 +678,7 @@ static int gaudi_early_init(struct hl_device *hdev) { struct asic_fixed_properties *prop = &hdev->asic_prop; struct pci_dev *pdev = hdev->pdev; + u32 fw_boot_status; int rc; rc = gaudi_get_fixed_properties(hdev); @@ -665,6 +712,23 @@ static int gaudi_early_init(struct hl_device *hdev) prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID); + /* If FW security is enabled at this point it means no access to ELBI */ + if (!hdev->asic_prop.fw_security_disabled) { + hdev->asic_prop.iatu_done_by_fw = true; + goto pci_init; + } + + rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0, + &fw_boot_status); + if (rc) + goto free_queue_props; + + /* Check whether FW is configuring iATU */ + if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) && + (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN)) + hdev->asic_prop.iatu_done_by_fw = true; + +pci_init: rc = hl_pci_init(hdev); if (rc) goto free_queue_props; @@ -1588,6 +1652,9 @@ static int gaudi_sw_init(struct hl_device *hdev) hdev->asic_specific = gaudi; + /* store legacy PLL map */ + hdev->legacy_pll_map = gaudi_pll_map; + /* Create DMA pool for small allocations */ hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0); @@ -1766,8 +1833,7 @@ static int gaudi_enable_msi(struct hl_device *hdev) if (gaudi->hw_cap_initialized & HW_CAP_MSI) return 0; - rc = pci_alloc_irq_vectors(hdev->pdev, 1, GAUDI_MSI_ENTRIES, - PCI_IRQ_MSI); + rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI); if (rc < 0) { dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc); return rc; @@ -3701,7 +3767,7 @@ static int gaudi_init_cpu(struct hl_device *hdev) struct gaudi_device *gaudi = hdev->asic_specific; int rc; - if (!hdev->cpu_enable) + if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU)) return 0; if (gaudi->hw_cap_initialized & HW_CAP_CPU) @@ -4873,7 +4939,7 @@ static int gaudi_pin_memory_before_cs(struct hl_device *hdev, parser->job_userptr_list, &userptr)) goto already_pinned; - userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC); + userptr = kzalloc(sizeof(*userptr), GFP_KERNEL); if (!userptr) return -ENOMEM; @@ -5684,18 +5750,26 @@ release_cb: static int gaudi_schedule_register_memset(struct hl_device *hdev, u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val) { - struct hl_ctx *ctx = hdev->compute_ctx; + struct hl_ctx *ctx; struct hl_pending_cb *pending_cb; struct packet_msg_long *pkt; u32 cb_size, ctl; struct hl_cb *cb; - int i; + int i, rc; + + mutex_lock(&hdev->fpriv_list_lock); + ctx = hdev->compute_ctx; /* If no compute context available or context is going down * memset registers directly */ - if (!ctx || kref_read(&ctx->refcount) == 0) - return gaudi_memset_registers(hdev, reg_base, num_regs, val); + if (!ctx || kref_read(&ctx->refcount) == 0) { + rc = gaudi_memset_registers(hdev, reg_base, num_regs, val); + mutex_unlock(&hdev->fpriv_list_lock); + return rc; + } + + mutex_unlock(&hdev->fpriv_list_lock); cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot) * 2; @@ -5911,13 +5985,16 @@ static void gaudi_restore_phase_topology(struct hl_device *hdev) } -static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val) +static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, + bool user_address, u32 *val) { struct asic_fixed_properties *prop = &hdev->asic_prop; struct gaudi_device *gaudi = hdev->asic_specific; - u64 hbm_bar_addr; + u64 hbm_bar_addr, host_phys_end; int rc = 0; + host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE; + if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) { if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && @@ -5949,6 +6026,9 @@ static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val) } if (hbm_bar_addr == U64_MAX) rc = -EIO; + } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end && + user_address && !iommu_present(&pci_bus_type)) { + *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE); } else { rc = -EFAULT; } @@ -5956,13 +6036,16 @@ static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val) return rc; } -static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val) +static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, + bool user_address, u32 val) { struct asic_fixed_properties *prop = &hdev->asic_prop; struct gaudi_device *gaudi = hdev->asic_specific; - u64 hbm_bar_addr; + u64 hbm_bar_addr, host_phys_end; int rc = 0; + host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE; + if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) { if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && @@ -5994,6 +6077,9 @@ static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val) } if (hbm_bar_addr == U64_MAX) rc = -EIO; + } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end && + user_address && !iommu_present(&pci_bus_type)) { + *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val; } else { rc = -EFAULT; } @@ -6001,13 +6087,16 @@ static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val) return rc; } -static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val) +static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, + bool user_address, u64 *val) { struct asic_fixed_properties *prop = &hdev->asic_prop; struct gaudi_device *gaudi = hdev->asic_specific; - u64 hbm_bar_addr; + u64 hbm_bar_addr, host_phys_end; int rc = 0; + host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE; + if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) { if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && @@ -6043,6 +6132,9 @@ static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val) } if (hbm_bar_addr == U64_MAX) rc = -EIO; + } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end && + user_address && !iommu_present(&pci_bus_type)) { + *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE); } else { rc = -EFAULT; } @@ -6050,13 +6142,16 @@ static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val) return rc; } -static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val) +static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, + bool user_address, u64 val) { struct asic_fixed_properties *prop = &hdev->asic_prop; struct gaudi_device *gaudi = hdev->asic_specific; - u64 hbm_bar_addr; + u64 hbm_bar_addr, host_phys_end; int rc = 0; + host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE; + if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) { if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && @@ -6091,6 +6186,9 @@ static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val) } if (hbm_bar_addr == U64_MAX) rc = -EIO; + } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end && + user_address && !iommu_present(&pci_bus_type)) { + *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val; } else { rc = -EFAULT; } @@ -6098,6 +6196,164 @@ static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val) return rc; } +static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr, + u32 size_to_dma, dma_addr_t dma_addr) +{ + u32 err_cause, val; + u64 dma_offset; + int rc; + + dma_offset = dma_id * DMA_CORE_OFFSET; + + WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr)); + WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr)); + WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr)); + WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr)); + WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma); + WREG32(mmDMA0_CORE_COMMIT + dma_offset, + (1 << DMA0_CORE_COMMIT_LIN_SHIFT)); + + rc = hl_poll_timeout( + hdev, + mmDMA0_CORE_STS0 + dma_offset, + val, + ((val & DMA0_CORE_STS0_BUSY_MASK) == 0), + 0, + 1000000); + + if (rc) { + dev_err(hdev->dev, + "DMA %d timed-out during reading of 0x%llx\n", + dma_id, addr); + return -EIO; + } + + /* Verify DMA is OK */ + err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset); + if (err_cause) { + dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause); + dev_dbg(hdev->dev, + "Clearing DMA0 engine from errors (cause 0x%x)\n", + err_cause); + WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause); + + return -EIO; + } + + return 0; +} + +static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, + void *blob_addr) +{ + u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma; + struct gaudi_device *gaudi = hdev->asic_specific; + u64 dma_offset, qm_offset; + dma_addr_t dma_addr; + void *kernel_addr; + bool is_eng_idle; + int rc = 0, dma_id; + + kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent( + hdev, SZ_2M, + &dma_addr, + GFP_KERNEL | __GFP_ZERO); + + if (!kernel_addr) + return -ENOMEM; + + mutex_lock(&gaudi->clk_gate_mutex); + + hdev->asic_funcs->disable_clock_gating(hdev); + + hdev->asic_funcs->hw_queues_lock(hdev); + + dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1]; + dma_offset = dma_id * DMA_CORE_OFFSET; + qm_offset = dma_id * DMA_QMAN_OFFSET; + dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset); + is_eng_idle = IS_DMA_IDLE(dma_core_sts0); + + if (!is_eng_idle) { + dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2]; + dma_offset = dma_id * DMA_CORE_OFFSET; + qm_offset = dma_id * DMA_QMAN_OFFSET; + dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset); + is_eng_idle = IS_DMA_IDLE(dma_core_sts0); + + if (!is_eng_idle) { + dev_err_ratelimited(hdev->dev, + "Can't read via DMA because it is BUSY\n"); + rc = -EAGAIN; + goto out; + } + } + + cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset); + WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, + 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); + + /* TODO: remove this by mapping the DMA temporary buffer to the MMU + * using the compute ctx ASID, if exists. If not, use the kernel ctx + * ASID + */ + WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT)); + + /* Verify DMA is OK */ + err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset); + if (err_cause) { + dev_dbg(hdev->dev, + "Clearing DMA0 engine from errors (cause 0x%x)\n", + err_cause); + WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause); + } + + pos = 0; + size_left = size; + size_to_dma = SZ_2M; + + while (size_left > 0) { + + if (size_left < SZ_2M) + size_to_dma = size_left; + + rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma, + dma_addr); + if (rc) + break; + + memcpy(blob_addr + pos, kernel_addr, size_to_dma); + + if (size_left <= SZ_2M) + break; + + pos += SZ_2M; + addr += SZ_2M; + size_left -= SZ_2M; + } + + /* TODO: remove this by mapping the DMA temporary buffer to the MMU + * using the compute ctx ASID, if exists. If not, use the kernel ctx + * ASID + */ + WREG32_AND(mmDMA0_CORE_PROT + dma_offset, + ~BIT(DMA0_CORE_PROT_VAL_SHIFT)); + + WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1); + +out: + hdev->asic_funcs->hw_queues_unlock(hdev); + + hdev->asic_funcs->set_clock_gating(hdev); + + mutex_unlock(&gaudi->clk_gate_mutex); + + hdev->asic_funcs->asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, + dma_addr); + + return rc; +} + static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr) { struct gaudi_device *gaudi = hdev->asic_specific; @@ -6851,7 +7107,8 @@ static void gaudi_handle_qman_err_generic(struct hl_device *hdev, } /* Write 1 clear errors */ - WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val); + if (!hdev->stop_on_err) + WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val); } arb_err_val = RREG32(arb_err_addr); @@ -7097,6 +7354,15 @@ static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type, } } +static void gaudi_print_out_of_sync_info(struct hl_device *hdev, + struct cpucp_pkt_sync_err *sync_err) +{ + struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ]; + + dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n", + sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci)); +} + static int gaudi_soft_reset_late_init(struct hl_device *hdev) { struct gaudi_device *gaudi = hdev->asic_specific; @@ -7371,18 +7637,14 @@ static void gaudi_handle_eqe(struct hl_device *hdev, case GAUDI_EVENT_MMU_DERR: gaudi_print_irq_info(hdev, event_type, true); gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); - if (hdev->hard_reset_on_fw_events) - hl_device_reset(hdev, true, false); - break; + goto reset_device; case GAUDI_EVENT_GIC500: case GAUDI_EVENT_AXI_ECC: case GAUDI_EVENT_L2_RAM_ECC: case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17: gaudi_print_irq_info(hdev, event_type, false); - if (hdev->hard_reset_on_fw_events) - hl_device_reset(hdev, true, false); - break; + goto reset_device; case GAUDI_EVENT_HBM0_SPI_0: case GAUDI_EVENT_HBM1_SPI_0: @@ -7392,9 +7654,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, gaudi_hbm_read_interrupts(hdev, gaudi_hbm_event_to_dev(event_type), &eq_entry->hbm_ecc_data); - if (hdev->hard_reset_on_fw_events) - hl_device_reset(hdev, true, false); - break; + goto reset_device; case GAUDI_EVENT_HBM0_SPI_1: case GAUDI_EVENT_HBM1_SPI_1: @@ -7423,8 +7683,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, dev_err(hdev->dev, "hard reset required due to %s\n", gaudi_irq_map_table[event_type].name); - if (hdev->hard_reset_on_fw_events) - hl_device_reset(hdev, true, false); + goto reset_device; } else { hl_fw_unmask_irq(hdev, event_type); } @@ -7446,8 +7705,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, dev_err(hdev->dev, "hard reset required due to %s\n", gaudi_irq_map_table[event_type].name); - if (hdev->hard_reset_on_fw_events) - hl_device_reset(hdev, true, false); + goto reset_device; } else { hl_fw_unmask_irq(hdev, event_type); } @@ -7516,9 +7774,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, case GAUDI_EVENT_RAZWI_OR_ADC_SW: gaudi_print_irq_info(hdev, event_type, true); - if (hdev->hard_reset_on_fw_events) - hl_device_reset(hdev, true, false); - break; + goto reset_device; case GAUDI_EVENT_TPC0_BMON_SPMU: case GAUDI_EVENT_TPC1_BMON_SPMU: @@ -7552,11 +7808,28 @@ static void gaudi_handle_eqe(struct hl_device *hdev, event_type, cause); break; + case GAUDI_EVENT_DEV_RESET_REQ: + gaudi_print_irq_info(hdev, event_type, false); + goto reset_device; + + case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC: + gaudi_print_irq_info(hdev, event_type, false); + gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err); + goto reset_device; + default: dev_err(hdev->dev, "Received invalid H/W interrupt %d\n", event_type); break; } + + return; + +reset_device: + if (hdev->hard_reset_on_fw_events) + hl_device_reset(hdev, HL_RESET_HARD); + else + hl_fw_unmask_irq(hdev, event_type); } static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, @@ -7607,7 +7880,7 @@ static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, if (rc) { dev_err_ratelimited(hdev->dev, "MMU cache invalidation timeout\n"); - hl_device_reset(hdev, true, false); + hl_device_reset(hdev, HL_RESET_HARD); } return rc; @@ -7656,7 +7929,7 @@ static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev, if (rc) { dev_err_ratelimited(hdev->dev, "MMU cache invalidation timeout\n"); - hl_device_reset(hdev, true, false); + hl_device_reset(hdev, HL_RESET_HARD); } return rc; @@ -7714,7 +7987,7 @@ static int gaudi_cpucp_info_get(struct hl_device *hdev) if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) return 0; - rc = hl_fw_cpucp_info_get(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0); + rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0); if (rc) return rc; @@ -7724,10 +7997,7 @@ static int gaudi_cpucp_info_get(struct hl_device *hdev) hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type); - if (hdev->card_type == cpucp_card_type_pci) - prop->max_power_default = MAX_POWER_DEFAULT_PCI; - else if (hdev->card_type == cpucp_card_type_pmc) - prop->max_power_default = MAX_POWER_DEFAULT_PMC; + set_default_power_values(hdev); hdev->max_power = prop->max_power_default; @@ -8549,6 +8819,7 @@ static const struct hl_asic_funcs gaudi_funcs = { .debugfs_write32 = gaudi_debugfs_write32, .debugfs_read64 = gaudi_debugfs_read64, .debugfs_write64 = gaudi_debugfs_write64, + .debugfs_read_dma = gaudi_debugfs_read_dma, .add_device_attr = gaudi_add_device_attr, .handle_eqe = gaudi_handle_eqe, .set_pll_profile = gaudi_set_pll_profile, |