diff options
Diffstat (limited to 'drivers/accel/habanalabs/gaudi2/gaudi2.c')
-rw-r--r-- | drivers/accel/habanalabs/gaudi2/gaudi2.c | 155 |
1 files changed, 154 insertions, 1 deletions
diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c index 65c720a0c64c..f1f2a58ee68c 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2.c +++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c @@ -7,6 +7,7 @@ #include "gaudi2P.h" #include "gaudi2_masks.h" +#include "../include/gaudi2/gaudi2_special_blocks.h" #include "../include/hw_ip/mmu/mmu_general.h" #include "../include/hw_ip/mmu/mmu_v2_0.h" #include "../include/gaudi2/gaudi2_packets.h" @@ -1683,6 +1684,30 @@ struct hbm_mc_error_causes { char cause[50]; }; +static struct hl_special_block_info gaudi2_special_blocks[] = GAUDI2_SPECIAL_BLOCKS; + +/* Special blocks iterator is currently used to configure security protection bits, + * and read global errors. Most HW blocks are addressable and those who aren't (N/A)- + * must be skipped. Following configurations are commonly used for both PB config + * and global error reading, since currently they both share the same settings. + * Once it changes, we must remember to use separate configurations for either one. + */ +static int gaudi2_iterator_skip_block_types[] = { + GAUDI2_BLOCK_TYPE_PLL, + GAUDI2_BLOCK_TYPE_EU_BIST, + GAUDI2_BLOCK_TYPE_HBM, + GAUDI2_BLOCK_TYPE_XFT +}; + +static struct range gaudi2_iterator_skip_block_ranges[] = { + /* Skip all PSOC blocks except for PSOC_GLOBAL_CONF */ + {mmPSOC_I2C_M0_BASE, mmPSOC_EFUSE_BASE}, + {mmPSOC_BTL_BASE, mmPSOC_MSTR_IF_RR_SHRD_HBW_BASE}, + /* Skip all CPU blocks except for CPU_IF */ + {mmCPU_CA53_CFG_BASE, mmCPU_CA53_CFG_BASE}, + {mmCPU_TIMESTAMP_BASE, mmCPU_MSTR_IF_RR_SHRD_HBW_BASE} +}; + static struct hbm_mc_error_causes hbm_mc_spi[GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE] = { {HBM_MC_SPI_TEMP_PIN_CHG_MASK, "temperature pins changed"}, {HBM_MC_SPI_THR_ENG_MASK, "temperature-based throttling engaged"}, @@ -2997,6 +3022,99 @@ static inline int gaudi2_get_non_zero_random_int(void) return rand ? rand : 1; } +static void gaudi2_special_blocks_free(struct hl_device *hdev) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_skip_blocks_cfg *skip_special_blocks_cfg = + &prop->skip_special_blocks_cfg; + + kfree(prop->special_blocks); + kfree(skip_special_blocks_cfg->block_types); + kfree(skip_special_blocks_cfg->block_ranges); +} + +static void gaudi2_special_blocks_iterator_free(struct hl_device *hdev) +{ + gaudi2_special_blocks_free(hdev); +} + +static bool gaudi2_special_block_skip(struct hl_device *hdev, + struct hl_special_blocks_cfg *special_blocks_cfg, + u32 blk_idx, u32 major, u32 minor, u32 sub_minor) +{ + return false; +} + +static int gaudi2_special_blocks_config(struct hl_device *hdev) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + int i, rc; + + /* Configure Special blocks */ + prop->glbl_err_cause_num = GAUDI2_NUM_OF_GLBL_ERR_CAUSE; + prop->num_of_special_blocks = ARRAY_SIZE(gaudi2_special_blocks); + prop->special_blocks = kmalloc_array(prop->num_of_special_blocks, + sizeof(*prop->special_blocks), GFP_KERNEL); + if (!prop->special_blocks) + return -ENOMEM; + + for (i = 0 ; i < prop->num_of_special_blocks ; i++) + memcpy(&prop->special_blocks[i], &gaudi2_special_blocks[i], + sizeof(*prop->special_blocks)); + + /* Configure when to skip Special blocks */ + memset(&prop->skip_special_blocks_cfg, 0, sizeof(prop->skip_special_blocks_cfg)); + prop->skip_special_blocks_cfg.skip_block_hook = gaudi2_special_block_skip; + + if (ARRAY_SIZE(gaudi2_iterator_skip_block_types)) { + prop->skip_special_blocks_cfg.block_types = + kmalloc_array(ARRAY_SIZE(gaudi2_iterator_skip_block_types), + sizeof(gaudi2_iterator_skip_block_types[0]), GFP_KERNEL); + if (!prop->skip_special_blocks_cfg.block_types) { + rc = -ENOMEM; + goto free_special_blocks; + } + + memcpy(prop->skip_special_blocks_cfg.block_types, gaudi2_iterator_skip_block_types, + sizeof(gaudi2_iterator_skip_block_types)); + + prop->skip_special_blocks_cfg.block_types_len = + ARRAY_SIZE(gaudi2_iterator_skip_block_types); + } + + if (ARRAY_SIZE(gaudi2_iterator_skip_block_ranges)) { + prop->skip_special_blocks_cfg.block_ranges = + kmalloc_array(ARRAY_SIZE(gaudi2_iterator_skip_block_ranges), + sizeof(gaudi2_iterator_skip_block_ranges[0]), GFP_KERNEL); + if (!prop->skip_special_blocks_cfg.block_ranges) { + rc = -ENOMEM; + goto free_skip_special_blocks_types; + } + + for (i = 0 ; i < ARRAY_SIZE(gaudi2_iterator_skip_block_ranges) ; i++) + memcpy(&prop->skip_special_blocks_cfg.block_ranges[i], + &gaudi2_iterator_skip_block_ranges[i], + sizeof(struct range)); + + prop->skip_special_blocks_cfg.block_ranges_len = + ARRAY_SIZE(gaudi2_iterator_skip_block_ranges); + } + + return 0; + +free_skip_special_blocks_types: + kfree(prop->skip_special_blocks_cfg.block_types); +free_special_blocks: + kfree(prop->special_blocks); + + return rc; +} + +static int gaudi2_special_blocks_iterator_config(struct hl_device *hdev) +{ + return gaudi2_special_blocks_config(hdev); +} + static int gaudi2_sw_init(struct hl_device *hdev) { struct asic_fixed_properties *prop = &hdev->asic_prop; @@ -3092,8 +3210,15 @@ static int gaudi2_sw_init(struct hl_device *hdev) hdev->asic_funcs->set_pci_memory_regions(hdev); + rc = gaudi2_special_blocks_iterator_config(hdev); + if (rc) + goto free_scratchpad_mem; + return 0; +free_scratchpad_mem: + hl_asic_dma_pool_free(hdev, gaudi2->scratchpad_kernel_address, + gaudi2->scratchpad_bus_address); free_virt_msix_db_mem: hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr); free_cpu_accessible_dma_pool: @@ -3113,6 +3238,8 @@ static int gaudi2_sw_fini(struct hl_device *hdev) struct asic_fixed_properties *prop = &hdev->asic_prop; struct gaudi2_device *gaudi2 = hdev->asic_specific; + gaudi2_special_blocks_iterator_free(hdev); + hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr); gen_pool_destroy(hdev->cpu_accessible_dma_pool); @@ -7836,9 +7963,11 @@ static int gaudi2_handle_qm_sei_err(struct hl_device *hdev, u16 event_type, error_count += _gaudi2_handle_qm_sei_err(hdev, qman_base + NIC_QM_OFFSET, event_type); - if (extended_err_check) + if (extended_err_check) { /* check if RAZWI happened */ gaudi2_ack_module_razwi_event_handler(hdev, module, 0, 0, event_mask); + hl_check_for_glbl_errors(hdev); + } return error_count; } @@ -7958,6 +8087,8 @@ static int gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *e gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, index, 0, event_mask); } + hl_check_for_glbl_errors(hdev); + return error_count; } @@ -7976,6 +8107,8 @@ static int gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev, u16 event_type } } + hl_check_for_glbl_errors(hdev); + WREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_CLR, sts_clr_val); return error_count; @@ -7996,6 +8129,8 @@ static int gaudi2_handle_cpu_sei_err(struct hl_device *hdev, u16 event_type) } } + hl_check_for_glbl_errors(hdev); + WREG32(mmCPU_IF_CPU_SEI_INTR_CLR, sts_clr_val); return error_count; @@ -8018,6 +8153,7 @@ static int gaudi2_handle_rot_err(struct hl_device *hdev, u8 rot_index, u16 event /* check if RAZWI happened */ gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, rot_index, 0, event_mask); + hl_check_for_glbl_errors(hdev); return error_count; } @@ -8039,6 +8175,7 @@ static int gaudi2_tpc_ack_interrupts(struct hl_device *hdev, u8 tpc_index, u16 /* check if RAZWI happened */ gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, tpc_index, 0, event_mask); + hl_check_for_glbl_errors(hdev); return error_count; } @@ -8072,6 +8209,7 @@ static int gaudi2_handle_dec_err(struct hl_device *hdev, u8 dec_index, u16 event /* check if RAZWI happened */ gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, dec_index, 0, event_mask); + hl_check_for_glbl_errors(hdev); /* Write 1 clear errors */ WREG32(sts_addr, sts_clr_val); @@ -8103,6 +8241,8 @@ static int gaudi2_handle_mme_err(struct hl_device *hdev, u8 mme_index, u16 event for (i = MME_WRITE ; i < MME_INITIATORS_MAX ; i++) gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, i, event_mask); + hl_check_for_glbl_errors(hdev); + WREG32(sts_clr_addr, sts_clr_val); return error_count; @@ -8120,6 +8260,8 @@ static int gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u16 event_type, error_count++; } + hl_check_for_glbl_errors(hdev); + return error_count; } @@ -8146,6 +8288,7 @@ static int gaudi2_handle_mme_wap_err(struct hl_device *hdev, u8 mme_index, u16 e /* check if RAZWI happened on WAP0/1 */ gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP0, event_mask); gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP1, event_mask); + hl_check_for_glbl_errors(hdev); WREG32(sts_clr_addr, sts_clr_val); @@ -8170,6 +8313,8 @@ static int gaudi2_handle_kdma_core_event(struct hl_device *hdev, u16 event_type, error_count++; } + hl_check_for_glbl_errors(hdev); + return error_count; } @@ -8186,6 +8331,8 @@ static int gaudi2_handle_dma_core_event(struct hl_device *hdev, u16 event_type, error_count++; } + hl_check_for_glbl_errors(hdev); + return error_count; } @@ -8238,6 +8385,7 @@ static int gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u16 event_typ switch (intr_cause_data & BIT_ULL(i)) { case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK: + hl_check_for_glbl_errors(hdev); break; case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK: gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(hdev, event_mask); @@ -8412,6 +8560,8 @@ static int gaudi2_handle_sm_err(struct hl_device *hdev, u16 event_type, u8 sm_in WREG32(cq_intr_addr, 0); } + hl_check_for_glbl_errors(hdev); + return error_count; } @@ -8466,6 +8616,7 @@ static int gaudi2_handle_mmu_spi_sei_err(struct hl_device *hdev, u16 event_type, error_count = gaudi2_handle_mmu_spi_sei_generic(hdev, event_type, mmu_base, is_pmmu, event_mask); + hl_check_for_glbl_errors(hdev); return error_count; } @@ -8792,6 +8943,8 @@ static int gaudi2_handle_psoc_drain(struct hl_device *hdev, u64 intr_cause_data) } } + hl_check_for_glbl_errors(hdev); + return error_count; } |