diff options
author | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2021-02-09 09:33:51 +0100 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2021-02-09 09:33:51 +0100 |
commit | e3e3eaab2b486294501ae1cf3f56e22f2e992afa (patch) | |
tree | 2b3c15780cdabaff50e5652919a28324b8a32771 /drivers/misc/habanalabs/gaudi | |
parent | Merge tag 'phy-for-5.12' of git://git.kernel.org/pub/scm/linux/kernel/git/phy... (diff) | |
parent | habanalabs/gaudi: don't enable clock gating on DMA5 (diff) | |
download | linux-e3e3eaab2b486294501ae1cf3f56e22f2e992afa.tar.xz linux-e3e3eaab2b486294501ae1cf3f56e22f2e992afa.zip |
Merge tag 'misc-habanalabs-next-2021-02-08' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux into char-misc-next
Oded writes:
This tag contains the following changes for 5.12-rc1:
- Improve communication protocol with device CPU CP application.
The change prevents random (rare) out-of-sync errors.
- Notify F/W to start sending events only after initialization of
device is done. This fixes the issue where fatal events were received
but ignored.
- Fix integer handling (static analysis warning).
- Always fetch HBM ECC errors from F/W (if available).
- Minor fix in GAUDI-specific initialization code.
* tag 'misc-habanalabs-next-2021-02-08' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux:
habanalabs/gaudi: don't enable clock gating on DMA5
habanalabs: return block size + block ID
habanalabs: update security map after init CPU Qs
habanalabs: enable F/W events after init done
habanalabs/gaudi: use HBM_ECC_EN bit for ECC ERR
habanalabs: support fetching first available user CQ
habanalabs: improve communication protocol with cpucp
habanalabs: fix integer handling issue
Diffstat (limited to 'drivers/misc/habanalabs/gaudi')
-rw-r--r-- | drivers/misc/habanalabs/gaudi/gaudi.c | 49 |
1 files changed, 41 insertions, 8 deletions
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 52fcaf25531a..9152242778f5 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -529,6 +529,9 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev) prop->first_available_user_msix_interrupt = USHRT_MAX; + for (i = 0 ; i < HL_MAX_DCORES ; i++) + prop->first_available_cq[i] = USHRT_MAX; + /* disable fw security for now, set it in a later stage */ prop->fw_security_disabled = true; prop->fw_security_status_valid = false; @@ -1379,8 +1382,6 @@ static int gaudi_late_init(struct hl_device *hdev) return rc; } - WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER); - rc = gaudi_fetch_psoc_frequency(hdev); if (rc) { dev_err(hdev->dev, "Failed to fetch psoc frequency\n"); @@ -3459,6 +3460,12 @@ static void gaudi_set_clock_gating(struct hl_device *hdev) enable = !!(hdev->clock_gating_mask & (BIT_ULL(gaudi_dma_assignment[i]))); + /* GC sends work to DMA engine through Upper CP in DMA5 so + * we need to not enable clock gating in that DMA + */ + if (i == GAUDI_HBM_DMA_4) + enable = 0; + qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET; WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, enable ? QMAN_CGM1_PWR_GATE_EN : 0); @@ -3725,6 +3732,7 @@ static int gaudi_init_cpu(struct hl_device *hdev) static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout) { struct gaudi_device *gaudi = hdev->asic_specific; + struct asic_fixed_properties *prop = &hdev->asic_prop; struct hl_eq *eq; u32 status; struct hl_hw_queue *cpu_pq = @@ -3781,6 +3789,10 @@ static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout) return -EIO; } + /* update FW application security bits */ + if (prop->fw_security_status_valid) + prop->fw_app_security_map = RREG32(mmCPU_BOOT_DEV_STS0); + gaudi->hw_cap_initialized |= HW_CAP_CPU_Q; return 0; } @@ -4438,9 +4450,12 @@ static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi) /* ring the doorbell */ WREG32(db_reg_offset, db_value); - if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) + if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) { + /* make sure device CPU will read latest data from host */ + mb(); WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_PI_UPDATE); + } } static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe, @@ -7098,7 +7113,9 @@ static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device, u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch; int err = 0; - if (!hdev->asic_prop.fw_security_disabled) { + if (hdev->asic_prop.fw_security_status_valid && + (hdev->asic_prop.fw_app_security_map & + CPU_BOOT_DEV_STS0_HBM_ECC_EN)) { if (!hbm_ecc_data) { dev_err(hdev->dev, "No FW ECC data"); return 0; @@ -7120,14 +7137,24 @@ static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device, le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); dev_err(hdev->dev, - "HBM%d pc%d ECC: TYPE=%d, WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n", - device, ch, type, wr_par, rd_par, ca_par, serr, derr); + "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n", + device, ch, wr_par, rd_par, ca_par, serr, derr); + dev_err(hdev->dev, + "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n", + device, ch, hbm_ecc_data->first_addr, type, + hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt, + hbm_ecc_data->dec_cnt); err = 1; return 0; } + if (!hdev->asic_prop.fw_security_disabled) { + dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n"); + return 0; + } + base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET; for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) { val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF); @@ -8469,7 +8496,7 @@ static u64 gaudi_get_device_time(struct hl_device *hdev) } static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr, - u32 *block_id) + u32 *block_size, u32 *block_id) { return -EPERM; } @@ -8481,6 +8508,11 @@ static int gaudi_block_mmap(struct hl_device *hdev, return -EPERM; } +static void gaudi_enable_events_from_fw(struct hl_device *hdev) +{ + WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER); +} + static const struct hl_asic_funcs gaudi_funcs = { .early_init = gaudi_early_init, .early_fini = gaudi_early_fini, @@ -8562,7 +8594,8 @@ static const struct hl_asic_funcs gaudi_funcs = { .descramble_addr = hl_mmu_descramble_addr, .ack_protection_bits_errors = gaudi_ack_protection_bits_errors, .get_hw_block_id = gaudi_get_hw_block_id, - .hw_block_mmap = gaudi_block_mmap + .hw_block_mmap = gaudi_block_mmap, + .enable_events_from_fw = gaudi_enable_events_from_fw }; /** |