From c1e89ae455282baf69e45b81f04e095716b13cb2 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Thu, 18 Jan 2024 14:29:02 +0200 Subject: accel/habanalabs/gaudi2: check extended errors according to PCIe addr_dec interrupt info The FW interrupt info for a PCIe addr_dec event is set correctly, so check for either global errors or razwi according to the indications there. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Reviewed-by: Carl Vanderlip Signed-off-by: Oded Gabbay --- drivers/accel/habanalabs/gaudi2/gaudi2.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'drivers/accel/habanalabs/gaudi2/gaudi2.c') diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c index 04ada1fb1713..9f033a785c49 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2.c +++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c @@ -8983,9 +8983,6 @@ static int gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u16 event_typ u32 error_count = 0; int i; - gaudi2_print_event(hdev, event_type, true, - "intr_cause_data: %#llx", intr_cause_data); - for (i = 0 ; i < GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE ; i++) { if (!(intr_cause_data & BIT_ULL(i))) continue; @@ -8994,15 +8991,16 @@ static int gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u16 event_typ "err cause: %s", gaudi2_pcie_addr_dec_error_cause[i]); error_count++; - /* - * Always check for LBW and HBW additional info as the indication itself is - * sometimes missing - */ + switch (intr_cause_data & BIT_ULL(i)) { + case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK: + hl_check_for_glbl_errors(hdev); + break; + case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK: + gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(hdev, event_mask); + break; + } } - hl_check_for_glbl_errors(hdev); - gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(hdev, event_mask); - return error_count; } -- cgit v1.2.3