summaryrefslogtreecommitdiffstats
path: root/drivers/misc/habanalabs/gaudi/gaudi.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc/habanalabs/gaudi/gaudi.c')
-rw-r--r--drivers/misc/habanalabs/gaudi/gaudi.c909
1 files changed, 345 insertions, 564 deletions
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 637a9d608707..00a0a7238d81 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -6,12 +6,12 @@
*/
#include "gaudiP.h"
-#include "include/hw_ip/mmu/mmu_general.h"
-#include "include/hw_ip/mmu/mmu_v1_1.h"
-#include "include/gaudi/gaudi_masks.h"
-#include "include/gaudi/gaudi_fw_if.h"
-#include "include/gaudi/gaudi_reg_map.h"
-#include "include/gaudi/gaudi_async_ids_map_extended.h"
+#include "../include/hw_ip/mmu/mmu_general.h"
+#include "../include/hw_ip/mmu/mmu_v1_1.h"
+#include "../include/gaudi/gaudi_masks.h"
+#include "../include/gaudi/gaudi_fw_if.h"
+#include "../include/gaudi/gaudi_reg_map.h"
+#include "../include/gaudi/gaudi_async_ids_map_extended.h"
#include <linux/module.h>
#include <linux/pci.h>
@@ -21,6 +21,7 @@
#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/iommu.h>
#include <linux/seq_file.h>
+#include <linux/bitfield.h>
/*
* Gaudi security scheme:
@@ -74,7 +75,6 @@
#define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
#define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */
-#define GAUDI_PLDM_SRESET_TIMEOUT_MSEC 14000 /* 14s */
#define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
#define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
#define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
@@ -321,6 +321,13 @@ static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_3 */
};
+struct ecc_info_extract_params {
+ u64 block_address;
+ u32 num_memories;
+ bool derr;
+ bool disable_clock_gating;
+};
+
static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
u64 phys_addr);
static int gaudi_send_job_on_qman0(struct hl_device *hdev,
@@ -339,22 +346,25 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev)
struct asic_fixed_properties *prop = &hdev->asic_prop;
int i;
- if (GAUDI_QUEUE_ID_SIZE >= HL_MAX_QUEUES) {
- dev_err(hdev->dev,
- "Number of H/W queues must be smaller than %d\n",
- HL_MAX_QUEUES);
- return -EFAULT;
- }
+ prop->max_queues = GAUDI_QUEUE_ID_SIZE;
+ prop->hw_queues_props = kcalloc(prop->max_queues,
+ sizeof(struct hw_queue_properties),
+ GFP_KERNEL);
- for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
+ if (!prop->hw_queues_props)
+ return -ENOMEM;
+
+ for (i = 0 ; i < prop->max_queues ; i++) {
if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
prop->hw_queues_props[i].driver_only = 0;
prop->hw_queues_props[i].requires_kernel_cb = 1;
+ prop->hw_queues_props[i].supports_sync_stream = 1;
} else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
prop->hw_queues_props[i].driver_only = 1;
prop->hw_queues_props[i].requires_kernel_cb = 0;
+ prop->hw_queues_props[i].supports_sync_stream = 0;
} else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
prop->hw_queues_props[i].driver_only = 0;
@@ -363,14 +373,13 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev)
prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
prop->hw_queues_props[i].driver_only = 0;
prop->hw_queues_props[i].requires_kernel_cb = 0;
+ prop->hw_queues_props[i].supports_sync_stream = 0;
}
}
- for (; i < HL_MAX_QUEUES; i++)
- prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
-
prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
-
+ prop->sync_stream_first_sob = 0;
+ prop->sync_stream_first_mon = 0;
prop->dram_base_address = DRAM_PHYS_BASE;
prop->dram_size = GAUDI_HBM_SIZE_32GB;
prop->dram_end_address = prop->dram_base_address +
@@ -435,6 +444,8 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev)
strncpy(prop->armcp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
CARD_NAME_MAX_LEN);
+ prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
+
return 0;
}
@@ -457,6 +468,7 @@ static int gaudi_pci_bars_map(struct hl_device *hdev)
static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
{
struct gaudi_device *gaudi = hdev->asic_specific;
+ struct hl_inbound_pci_region pci_region;
u64 old_addr = addr;
int rc;
@@ -464,7 +476,10 @@ static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
return old_addr;
/* Inbound Region 2 - Bar 4 - Point to HBM */
- rc = hl_pci_set_dram_bar_base(hdev, 2, 4, addr);
+ pci_region.mode = PCI_BAR_MATCH_MODE;
+ pci_region.bar = HBM_BAR_ID;
+ pci_region.addr = addr;
+ rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
if (rc)
return U64_MAX;
@@ -478,22 +493,43 @@ static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
static int gaudi_init_iatu(struct hl_device *hdev)
{
- int rc = 0;
+ struct hl_inbound_pci_region inbound_region;
+ struct hl_outbound_pci_region outbound_region;
+ int rc;
+
+ /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
+ inbound_region.mode = PCI_BAR_MATCH_MODE;
+ inbound_region.bar = SRAM_BAR_ID;
+ inbound_region.addr = SRAM_BASE_ADDR;
+ rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
+ if (rc)
+ goto done;
/* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
- rc = hl_pci_iatu_write(hdev, 0x314,
- lower_32_bits(SPI_FLASH_BASE_ADDR));
- rc |= hl_pci_iatu_write(hdev, 0x318,
- upper_32_bits(SPI_FLASH_BASE_ADDR));
- rc |= hl_pci_iatu_write(hdev, 0x300, 0);
- /* Enable + Bar match + match enable */
- rc |= hl_pci_iatu_write(hdev, 0x304, 0xC0080200);
+ inbound_region.mode = PCI_BAR_MATCH_MODE;
+ inbound_region.bar = CFG_BAR_ID;
+ inbound_region.addr = SPI_FLASH_BASE_ADDR;
+ rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
+ if (rc)
+ goto done;
+ /* Inbound Region 2 - Bar 4 - Point to HBM */
+ inbound_region.mode = PCI_BAR_MATCH_MODE;
+ inbound_region.bar = HBM_BAR_ID;
+ inbound_region.addr = DRAM_PHYS_BASE;
+ rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
if (rc)
- return -EIO;
+ goto done;
- return hl_pci_init_iatu(hdev, SRAM_BASE_ADDR, DRAM_PHYS_BASE,
- HOST_PHYS_BASE, HOST_PHYS_SIZE);
+ hdev->asic_funcs->set_dma_mask_from_fw(hdev);
+
+ /* Outbound Region 0 - Point to Host */
+ outbound_region.addr = HOST_PHYS_BASE;
+ outbound_region.size = HOST_PHYS_SIZE;
+ rc = hl_pci_set_outbound_region(hdev, &outbound_region);
+
+done:
+ return rc;
}
static int gaudi_early_init(struct hl_device *hdev)
@@ -516,7 +552,8 @@ static int gaudi_early_init(struct hl_device *hdev)
(unsigned long long) pci_resource_len(pdev,
SRAM_BAR_ID),
SRAM_BAR_SIZE);
- return -ENODEV;
+ rc = -ENODEV;
+ goto free_queue_props;
}
if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
@@ -526,20 +563,26 @@ static int gaudi_early_init(struct hl_device *hdev)
(unsigned long long) pci_resource_len(pdev,
CFG_BAR_ID),
CFG_BAR_SIZE);
- return -ENODEV;
+ rc = -ENODEV;
+ goto free_queue_props;
}
prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
rc = hl_pci_init(hdev);
if (rc)
- return rc;
+ goto free_queue_props;
return 0;
+
+free_queue_props:
+ kfree(hdev->asic_prop.hw_queues_props);
+ return rc;
}
static int gaudi_early_fini(struct hl_device *hdev)
{
+ kfree(hdev->asic_prop.hw_queues_props);
hl_pci_fini(hdev);
return 0;
@@ -554,11 +597,36 @@ static int gaudi_early_fini(struct hl_device *hdev)
static void gaudi_fetch_psoc_frequency(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
+ u32 trace_freq = 0;
+ u32 pll_clk = 0;
+ u32 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
+ u32 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
+ u32 nr = RREG32(mmPSOC_CPU_PLL_NR);
+ u32 nf = RREG32(mmPSOC_CPU_PLL_NF);
+ u32 od = RREG32(mmPSOC_CPU_PLL_OD);
+
+ if (div_sel == DIV_SEL_REF_CLK || div_sel == DIV_SEL_DIVIDED_REF) {
+ if (div_sel == DIV_SEL_REF_CLK)
+ trace_freq = PLL_REF_CLK;
+ else
+ trace_freq = PLL_REF_CLK / (div_fctr + 1);
+ } else if (div_sel == DIV_SEL_PLL_CLK ||
+ div_sel == DIV_SEL_DIVIDED_PLL) {
+ pll_clk = PLL_REF_CLK * (nf + 1) / ((nr + 1) * (od + 1));
+ if (div_sel == DIV_SEL_PLL_CLK)
+ trace_freq = pll_clk;
+ else
+ trace_freq = pll_clk / (div_fctr + 1);
+ } else {
+ dev_warn(hdev->dev,
+ "Received invalid div select value: %d", div_sel);
+ }
- prop->psoc_pci_pll_nr = RREG32(mmPSOC_PCI_PLL_NR);
- prop->psoc_pci_pll_nf = RREG32(mmPSOC_PCI_PLL_NF);
- prop->psoc_pci_pll_od = RREG32(mmPSOC_PCI_PLL_OD);
- prop->psoc_pci_pll_div_factor = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
+ prop->psoc_timestamp_frequency = trace_freq;
+ prop->psoc_pci_pll_nr = nr;
+ prop->psoc_pci_pll_nf = nf;
+ prop->psoc_pci_pll_od = od;
+ prop->psoc_pci_pll_div_factor = div_fctr;
}
static int _gaudi_init_tpc_mem(struct hl_device *hdev,
@@ -573,7 +641,7 @@ static int _gaudi_init_tpc_mem(struct hl_device *hdev,
u8 tpc_id;
int rc;
- cb = hl_cb_kernel_create(hdev, PAGE_SIZE);
+ cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
if (!cb)
return -EFAULT;
@@ -1644,8 +1712,8 @@ static void gaudi_init_hbm_cred(struct hl_device *hdev)
uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
hbm0_wr = 0x33333333;
- hbm1_wr = 0x33333333;
hbm0_rd = 0x77777777;
+ hbm1_wr = 0x55555555;
hbm1_rd = 0xDDDDDDDD;
WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
@@ -1695,125 +1763,6 @@ static void gaudi_init_hbm_cred(struct hl_device *hdev)
(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
}
-static void gaudi_init_rate_limiter(struct hl_device *hdev)
-{
- u32 nr, nf, od, sat, rst, timeout;
- u64 freq;
-
- nr = RREG32(mmPSOC_HBM_PLL_NR);
- nf = RREG32(mmPSOC_HBM_PLL_NF);
- od = RREG32(mmPSOC_HBM_PLL_OD);
- freq = (50 * (nf + 1)) / ((nr + 1) * (od + 1));
-
- dev_dbg(hdev->dev, "HBM frequency is %lluMHz\n", freq);
-
- /* Configuration is for five (5) DDMA channels */
- if (freq == 800) {
- sat = 4;
- rst = 11;
- timeout = 15;
- } else if (freq == 900) {
- sat = 4;
- rst = 15;
- timeout = 16;
- } else if (freq == 950) {
- sat = 4;
- rst = 15;
- timeout = 15;
- } else {
- dev_warn(hdev->dev,
- "unsupported HBM frequency %lluMHz, no rate-limiters\n",
- freq);
- return;
- }
-
- WREG32(mmDMA_IF_W_S_DOWN_RSP_MID_WGHT_0, 0x111);
- WREG32(mmDMA_IF_W_S_DOWN_RSP_MID_WGHT_1, 0x111);
- WREG32(mmDMA_IF_E_S_DOWN_RSP_MID_WGHT_0, 0x111);
- WREG32(mmDMA_IF_E_S_DOWN_RSP_MID_WGHT_1, 0x111);
- WREG32(mmDMA_IF_W_N_DOWN_RSP_MID_WGHT_0, 0x111);
- WREG32(mmDMA_IF_W_N_DOWN_RSP_MID_WGHT_1, 0x111);
- WREG32(mmDMA_IF_E_N_DOWN_RSP_MID_WGHT_0, 0x111);
- WREG32(mmDMA_IF_E_N_DOWN_RSP_MID_WGHT_1, 0x111);
-
- if (!hdev->rl_enable) {
- dev_info(hdev->dev, "Rate limiters disabled\n");
- return;
- }
-
- WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_HBM_SAT, sat);
- WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_HBM_SAT, sat);
- WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_HBM_SAT, sat);
- WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_HBM_SAT, sat);
- WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_HBM_SAT, sat);
- WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_HBM_SAT, sat);
- WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_HBM_SAT, sat);
- WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_HBM_SAT, sat);
-
- WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_HBM_RST, rst);
- WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_HBM_RST, rst);
- WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_HBM_RST, rst);
- WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_HBM_RST, rst);
- WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_HBM_RST, rst);
- WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_HBM_RST, rst);
- WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_HBM_RST, rst);
- WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_HBM_RST, rst);
-
- WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_HBM_TIMEOUT, timeout);
- WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_HBM_TIMEOUT, timeout);
- WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_HBM_TIMEOUT, timeout);
- WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_HBM_TIMEOUT, timeout);
- WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_HBM_TIMEOUT, timeout);
- WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_HBM_TIMEOUT, timeout);
- WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_HBM_TIMEOUT, timeout);
- WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_HBM_TIMEOUT, timeout);
-
- WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_HBM_EN, 1);
- WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_HBM_EN, 1);
- WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_HBM_EN, 1);
- WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_HBM_EN, 1);
- WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_HBM_EN, 1);
- WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_HBM_EN, 1);
- WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_HBM_EN, 1);
- WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_HBM_EN, 1);
-
- WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_SRAM_SAT, sat);
- WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_SRAM_SAT, sat);
- WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_SRAM_SAT, sat);
- WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_SRAM_SAT, sat);
- WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_SRAM_SAT, sat);
- WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_SRAM_SAT, sat);
- WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_SRAM_SAT, sat);
- WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_SRAM_SAT, sat);
-
- WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_SRAM_RST, rst);
- WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_SRAM_RST, rst);
- WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_SRAM_RST, rst);
- WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_SRAM_RST, rst);
- WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_SRAM_RST, rst);
- WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_SRAM_RST, rst);
- WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_SRAM_RST, rst);
- WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_SRAM_RST, rst);
-
- WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_SRAM_TIMEOUT, timeout);
- WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_SRAM_TIMEOUT, timeout);
- WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_SRAM_TIMEOUT, timeout);
- WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_SRAM_TIMEOUT, timeout);
- WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_SRAM_TIMEOUT, timeout);
- WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_SRAM_TIMEOUT, timeout);
- WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_SRAM_TIMEOUT, timeout);
- WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_SRAM_TIMEOUT, timeout);
-
- WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_SRAM_EN, 1);
- WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_SRAM_EN, 1);
- WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_SRAM_EN, 1);
- WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_SRAM_EN, 1);
- WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_SRAM_EN, 1);
- WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_SRAM_EN, 1);
- WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_SRAM_EN, 1);
- WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_SRAM_EN, 1);
-}
-
static void gaudi_init_golden_registers(struct hl_device *hdev)
{
u32 tpc_offset;
@@ -1823,8 +1772,6 @@ static void gaudi_init_golden_registers(struct hl_device *hdev)
gaudi_init_hbm_cred(hdev);
- gaudi_init_rate_limiter(hdev);
-
hdev->asic_funcs->disable_clock_gating(hdev);
for (tpc_id = 0, tpc_offset = 0;
@@ -1845,9 +1792,6 @@ static void gaudi_init_golden_registers(struct hl_device *hdev)
WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
-
- /* WA for H3-2081 */
- WREG32(mmPCIE_WRAP_MAX_OUTSTAND, 0x10ff);
}
static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
@@ -2649,29 +2593,16 @@ static void gaudi_disable_timestamp(struct hl_device *hdev)
static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
{
- u32 wait_timeout_ms, cpu_timeout_ms;
+ u32 wait_timeout_ms;
dev_info(hdev->dev,
"Halting compute engines and disabling interrupts\n");
- if (hdev->pldm) {
+ if (hdev->pldm)
wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
- cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
- } else {
+ else
wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
- cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
- }
- if (hard_reset) {
- /*
- * I don't know what is the state of the CPU so make sure it is
- * stopped in any means necessary
- */
- WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
- WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
- GAUDI_EVENT_HALT_MACHINE);
- msleep(cpu_timeout_ms);
- }
gaudi_stop_mme_qmans(hdev);
gaudi_stop_tpc_qmans(hdev);
@@ -2696,10 +2627,7 @@ static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
gaudi_disable_timestamp(hdev);
- if (hard_reset)
- gaudi_disable_msi(hdev);
- else
- gaudi_sync_irqs(hdev);
+ gaudi_disable_msi(hdev);
}
static int gaudi_mmu_init(struct hl_device *hdev)
@@ -2733,8 +2661,7 @@ static int gaudi_mmu_init(struct hl_device *hdev)
WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
- hdev->asic_funcs->mmu_invalidate_cache(hdev, true,
- VM_TYPE_USERPTR | VM_TYPE_PHYS_PACK);
+ hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
WREG32(mmMMU_UP_MMU_ENABLE, 1);
WREG32(mmMMU_UP_SPI_MASK, 0xF);
@@ -2984,16 +2911,6 @@ static int gaudi_hw_init(struct hl_device *hdev)
gaudi_init_hbm_dma_qmans(hdev);
- /*
- * Before pushing u-boot/linux to device, need to set the hbm bar to
- * base address of dram
- */
- if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
- dev_err(hdev->dev,
- "failed to map HBM bar to DRAM base address\n");
- return -EIO;
- }
-
rc = gaudi_init_cpu(hdev);
if (rc) {
dev_err(hdev->dev, "failed to initialize CPU\n");
@@ -3052,48 +2969,56 @@ disable_queues:
static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
{
struct gaudi_device *gaudi = hdev->asic_specific;
- u32 status, reset_timeout_ms, boot_strap = 0;
+ u32 status, reset_timeout_ms, cpu_timeout_ms, boot_strap = 0;
+
+ if (!hard_reset) {
+ dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
+ return;
+ }
if (hdev->pldm) {
- if (hard_reset)
- reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
- else
- reset_timeout_ms = GAUDI_PLDM_SRESET_TIMEOUT_MSEC;
+ reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
+ cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
} else {
reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
+ cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
}
- if (hard_reset) {
- /* Tell ASIC not to re-initialize PCIe */
- WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
+ /* Set device to handle FLR by H/W as we will put the device CPU to
+ * halt mode
+ */
+ WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
+ PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
- boot_strap = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
- /* H/W bug WA:
- * rdata[31:0] = strap_read_val;
- * wdata[31:0] = rdata[30:21],1'b0,rdata[20:0]
- */
- boot_strap = (((boot_strap & 0x7FE00000) << 1) |
- (boot_strap & 0x001FFFFF));
- WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap & ~0x2);
-
- /* Restart BTL/BLR upon hard-reset */
- WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
-
- WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
- 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
- dev_info(hdev->dev,
- "Issued HARD reset command, going to wait %dms\n",
- reset_timeout_ms);
- } else {
- /* Don't restart BTL/BLR upon soft-reset */
- WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 0);
+ /* I don't know what is the state of the CPU so make sure it is
+ * stopped in any means necessary
+ */
+ WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
+ WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_HALT_MACHINE);
- WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST,
- 1 << PSOC_GLOBAL_CONF_SOFT_RST_IND_SHIFT);
- dev_info(hdev->dev,
- "Issued SOFT reset command, going to wait %dms\n",
- reset_timeout_ms);
- }
+ msleep(cpu_timeout_ms);
+
+ /* Tell ASIC not to re-initialize PCIe */
+ WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
+
+ boot_strap = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
+
+ /* H/W bug WA:
+ * rdata[31:0] = strap_read_val;
+ * wdata[31:0] = rdata[30:21],1'b0,rdata[20:0]
+ */
+ boot_strap = (((boot_strap & 0x7FE00000) << 1) |
+ (boot_strap & 0x001FFFFF));
+ WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap & ~0x2);
+
+ /* Restart BTL/BLR upon hard-reset */
+ WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
+
+ WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
+ 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
+ dev_info(hdev->dev,
+ "Issued HARD reset command, going to wait %dms\n",
+ reset_timeout_ms);
/*
* After hard reset, we can't poll the BTM_FSM register because the PSOC
@@ -3107,18 +3032,6 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
"Timeout while waiting for device to reset 0x%x\n",
status);
- if (!hard_reset) {
- gaudi->hw_cap_initialized &= ~(HW_CAP_PCI_DMA | HW_CAP_MME |
- HW_CAP_TPC_MASK |
- HW_CAP_HBM_DMA);
-
- WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
- GAUDI_EVENT_SOFT_RESET);
- return;
- }
-
- /* We continue here only for hard-reset */
-
WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap);
gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
@@ -3578,7 +3491,7 @@ static int gaudi_test_queues(struct hl_device *hdev)
{
int i, rc, ret_val = 0;
- for (i = 0 ; i < HL_MAX_QUEUES ; i++) {
+ for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
rc = gaudi_test_queue(hdev, i);
if (rc)
@@ -4159,9 +4072,8 @@ static int gaudi_parse_cb_mmu(struct hl_device *hdev,
parser->patched_cb_size = parser->user_cb_size +
sizeof(struct packet_msg_prot) * 2;
- rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr,
- parser->patched_cb_size,
- &patched_cb_handle, HL_KERNEL_ASID_ID);
+ rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, parser->patched_cb_size,
+ &patched_cb_handle, HL_KERNEL_ASID_ID, false);
if (rc) {
dev_err(hdev->dev,
@@ -4233,9 +4145,8 @@ static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
if (rc)
goto free_userptr;
- rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr,
- parser->patched_cb_size,
- &patched_cb_handle, HL_KERNEL_ASID_ID);
+ rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, parser->patched_cb_size,
+ &patched_cb_handle, HL_KERNEL_ASID_ID, false);
if (rc) {
dev_err(hdev->dev,
"Failed to allocate patched CB for DMA CS %d\n", rc);
@@ -4364,11 +4275,11 @@ static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
{
struct packet_lin_dma *lin_dma_pkt;
struct hl_cs_job *job;
- u32 cb_size, ctl;
+ u32 cb_size, ctl, err_cause;
struct hl_cb *cb;
int rc;
- cb = hl_cb_kernel_create(hdev, PAGE_SIZE);
+ cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
if (!cb)
return -EFAULT;
@@ -4393,6 +4304,15 @@ static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
goto release_cb;
}
+ /* Verify DMA is OK */
+ err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
+ if (err_cause && !hdev->init_done) {
+ dev_dbg(hdev->dev,
+ "Clearing DMA0 engine from errors (cause 0x%x)\n",
+ err_cause);
+ WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
+ }
+
job->id = 0;
job->user_cb = cb;
job->user_cb->cs_cnt++;
@@ -4404,11 +4324,23 @@ static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
hl_debugfs_add_job(hdev, job);
rc = gaudi_send_job_on_qman0(hdev, job);
-
hl_debugfs_remove_job(hdev, job);
kfree(job);
cb->cs_cnt--;
+ /* Verify DMA is OK */
+ err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
+ if (err_cause) {
+ dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
+ rc = -EIO;
+ if (!hdev->init_done) {
+ dev_dbg(hdev->dev,
+ "Clearing DMA0 engine from errors (cause 0x%x)\n",
+ err_cause);
+ WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
+ }
+ }
+
release_cb:
hl_cb_put(cb);
hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
@@ -5254,62 +5186,76 @@ static void gaudi_print_mmu_error_info(struct hl_device *hdev)
* | |0xF4C memory wrappers 127:96 |
* +-------------------+------------------------------------------------------+
*/
-static void gaudi_print_ecc_info_generic(struct hl_device *hdev,
- const char *block_name,
- u64 block_address, int num_memories,
- bool derr, bool disable_clock_gating)
+static int gaudi_extract_ecc_info(struct hl_device *hdev,
+ struct ecc_info_extract_params *params, u64 *ecc_address,
+ u64 *ecc_syndrom, u8 *memory_wrapper_idx)
{
struct gaudi_device *gaudi = hdev->asic_specific;
- int num_mem_regs = num_memories / 32 + ((num_memories % 32) ? 1 : 0);
+ u32 i, num_mem_regs, reg, err_bit;
+ u64 err_addr, err_word = 0;
+ int rc = 0;
+
+ num_mem_regs = params->num_memories / 32 +
+ ((params->num_memories % 32) ? 1 : 0);
- if (block_address >= CFG_BASE)
- block_address -= CFG_BASE;
+ if (params->block_address >= CFG_BASE)
+ params->block_address -= CFG_BASE;
- if (derr)
- block_address += GAUDI_ECC_DERR0_OFFSET;
+ if (params->derr)
+ err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
else
- block_address += GAUDI_ECC_SERR0_OFFSET;
+ err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
- if (disable_clock_gating) {
+ if (params->disable_clock_gating) {
mutex_lock(&gaudi->clk_gate_mutex);
hdev->asic_funcs->disable_clock_gating(hdev);
}
- switch (num_mem_regs) {
- case 1:
- dev_err(hdev->dev,
- "%s ECC indication: 0x%08x\n",
- block_name, RREG32(block_address));
- break;
- case 2:
- dev_err(hdev->dev,
- "%s ECC indication: 0x%08x 0x%08x\n",
- block_name,
- RREG32(block_address), RREG32(block_address + 4));
- break;
- case 3:
- dev_err(hdev->dev,
- "%s ECC indication: 0x%08x 0x%08x 0x%08x\n",
- block_name,
- RREG32(block_address), RREG32(block_address + 4),
- RREG32(block_address + 8));
- break;
- case 4:
- dev_err(hdev->dev,
- "%s ECC indication: 0x%08x 0x%08x 0x%08x 0x%08x\n",
- block_name,
- RREG32(block_address), RREG32(block_address + 4),
- RREG32(block_address + 8), RREG32(block_address + 0xc));
- break;
- default:
- break;
+ /* Set invalid wrapper index */
+ *memory_wrapper_idx = 0xFF;
+
+ /* Iterate through memory wrappers, a single bit must be set */
+ for (i = 0 ; i > num_mem_regs ; i++) {
+ err_addr += i * 4;
+ err_word = RREG32(err_addr);
+ if (err_word) {
+ err_bit = __ffs(err_word);
+ *memory_wrapper_idx = err_bit + (32 * i);
+ break;
+ }
+ }
+ if (*memory_wrapper_idx == 0xFF) {
+ dev_err(hdev->dev, "ECC error information cannot be found\n");
+ rc = -EINVAL;
+ goto enable_clk_gate;
}
- if (disable_clock_gating) {
+ WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
+ *memory_wrapper_idx);
+
+ *ecc_address =
+ RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
+ *ecc_syndrom =
+ RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
+
+ /* Clear error indication */
+ reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
+ if (params->derr)
+ reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
+ else
+ reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
+
+ WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
+
+enable_clk_gate:
+ if (params->disable_clock_gating) {
hdev->asic_funcs->set_clock_gating(hdev);
+
mutex_unlock(&gaudi->clk_gate_mutex);
}
+
+ return rc;
}
static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
@@ -5362,239 +5308,99 @@ static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
}
}
-static void gaudi_print_ecc_info(struct hl_device *hdev, u16 event_type)
+static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
+ struct hl_eq_ecc_data *ecc_data)
{
- u64 block_address;
- u8 index;
- int num_memories;
- char desc[32];
- bool derr;
- bool disable_clock_gating;
+ struct ecc_info_extract_params params;
+ u64 ecc_address = 0, ecc_syndrom = 0;
+ u8 index, memory_wrapper_idx = 0;
+ bool extract_info_from_fw;
+ int rc;
switch (event_type) {
- case GAUDI_EVENT_PCIE_CORE_SERR:
- snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_CORE");
- block_address = mmPCIE_CORE_BASE;
- num_memories = 51;
- derr = false;
- disable_clock_gating = false;
- break;
- case GAUDI_EVENT_PCIE_CORE_DERR:
- snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_CORE");
- block_address = mmPCIE_CORE_BASE;
- num_memories = 51;
- derr = true;
- disable_clock_gating = false;
- break;
- case GAUDI_EVENT_PCIE_IF_SERR:
- snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_WRAP");
- block_address = mmPCIE_WRAP_BASE;
- num_memories = 11;
- derr = false;
- disable_clock_gating = false;
- break;
- case GAUDI_EVENT_PCIE_IF_DERR:
- snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_WRAP");
- block_address = mmPCIE_WRAP_BASE;
- num_memories = 11;
- derr = true;
- disable_clock_gating = false;
- break;
- case GAUDI_EVENT_PCIE_PHY_SERR:
- snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_PHY");
- block_address = mmPCIE_PHY_BASE;
- num_memories = 4;
- derr = false;
- disable_clock_gating = false;
- break;
- case GAUDI_EVENT_PCIE_PHY_DERR:
- snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_PHY");
- block_address = mmPCIE_PHY_BASE;
- num_memories = 4;
- derr = true;
- disable_clock_gating = false;
+ case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
+ case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
+ extract_info_from_fw = true;
break;
case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
index = event_type - GAUDI_EVENT_TPC0_SERR;
- block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
- snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC", index);
- num_memories = 90;
- derr = false;
- disable_clock_gating = true;
+ params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
+ params.num_memories = 90;
+ params.derr = false;
+ params.disable_clock_gating = true;
+ extract_info_from_fw = false;
break;
case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
index = event_type - GAUDI_EVENT_TPC0_DERR;
- block_address =
+ params.block_address =
mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
- snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC", index);
- num_memories = 90;
- derr = true;
- disable_clock_gating = true;
+ params.num_memories = 90;
+ params.derr = true;
+ params.disable_clock_gating = true;
+ extract_info_from_fw = false;
break;
case GAUDI_EVENT_MME0_ACC_SERR:
case GAUDI_EVENT_MME1_ACC_SERR:
case GAUDI_EVENT_MME2_ACC_SERR:
case GAUDI_EVENT_MME3_ACC_SERR:
index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
- block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
- snprintf(desc, ARRAY_SIZE(desc), "MME%d_ACC", index);
- num_memories = 128;
- derr = false;
- disable_clock_gating = true;
+ params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
+ params.num_memories = 128;
+ params.derr = false;
+ params.disable_clock_gating = true;
+ extract_info_from_fw = false;
break;
case GAUDI_EVENT_MME0_ACC_DERR:
case GAUDI_EVENT_MME1_ACC_DERR:
case GAUDI_EVENT_MME2_ACC_DERR:
case GAUDI_EVENT_MME3_ACC_DERR:
index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
- block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
- snprintf(desc, ARRAY_SIZE(desc), "MME%d_ACC", index);
- num_memories = 128;
- derr = true;
- disable_clock_gating = true;
+ params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
+ params.num_memories = 128;
+ params.derr = true;
+ params.disable_clock_gating = true;
+ extract_info_from_fw = false;
break;
case GAUDI_EVENT_MME0_SBAB_SERR:
case GAUDI_EVENT_MME1_SBAB_SERR:
case GAUDI_EVENT_MME2_SBAB_SERR:
case GAUDI_EVENT_MME3_SBAB_SERR:
index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
- block_address = mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
- snprintf(desc, ARRAY_SIZE(desc), "MME%d_SBAB", index);
- num_memories = 33;
- derr = false;
- disable_clock_gating = true;
+ params.block_address =
+ mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
+ params.num_memories = 33;
+ params.derr = false;
+ params.disable_clock_gating = true;
+ extract_info_from_fw = false;
break;
case GAUDI_EVENT_MME0_SBAB_DERR:
case GAUDI_EVENT_MME1_SBAB_DERR:
case GAUDI_EVENT_MME2_SBAB_DERR:
case GAUDI_EVENT_MME3_SBAB_DERR:
index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
- block_address = mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
- snprintf(desc, ARRAY_SIZE(desc), "MME%d_SBAB", index);
- num_memories = 33;
- derr = true;
- disable_clock_gating = true;
- break;
- case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
- index = event_type - GAUDI_EVENT_DMA0_SERR_ECC;
- block_address = mmDMA0_CORE_BASE + index * DMA_CORE_OFFSET;
- snprintf(desc, ARRAY_SIZE(desc), "DMA%d_CORE", index);
- num_memories = 16;
- derr = false;
- disable_clock_gating = false;
- break;
- case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
- index = event_type - GAUDI_EVENT_DMA0_DERR_ECC;
- block_address = mmDMA0_CORE_BASE + index * DMA_CORE_OFFSET;
- snprintf(desc, ARRAY_SIZE(desc), "DMA%d_CORE", index);
- num_memories = 16;
- derr = true;
- disable_clock_gating = false;
- break;
- case GAUDI_EVENT_CPU_IF_ECC_SERR:
- block_address = mmCPU_IF_BASE;
- snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU");
- num_memories = 4;
- derr = false;
- disable_clock_gating = false;
- break;
- case GAUDI_EVENT_CPU_IF_ECC_DERR:
- block_address = mmCPU_IF_BASE;
- snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU");
- num_memories = 4;
- derr = true;
- disable_clock_gating = false;
- break;
- case GAUDI_EVENT_PSOC_MEM_SERR:
- block_address = mmPSOC_GLOBAL_CONF_BASE;
- snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU");
- num_memories = 4;
- derr = false;
- disable_clock_gating = false;
- break;
- case GAUDI_EVENT_PSOC_MEM_DERR:
- block_address = mmPSOC_GLOBAL_CONF_BASE;
- snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU");
- num_memories = 4;
- derr = true;
- disable_clock_gating = false;
- break;
- case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
- block_address = mmPSOC_CS_TRACE_BASE;
- snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU");
- num_memories = 2;
- derr = false;
- disable_clock_gating = false;
- break;
- case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
- block_address = mmPSOC_CS_TRACE_BASE;
- snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU");
- num_memories = 2;
- derr = true;
- disable_clock_gating = false;
- break;
- case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
- index = event_type - GAUDI_EVENT_SRAM0_SERR;
- block_address =
- mmSRAM_Y0_X0_BANK_BASE + index * SRAM_BANK_OFFSET;
- snprintf(desc, ARRAY_SIZE(desc), "SRAM%d", index);
- num_memories = 2;
- derr = false;
- disable_clock_gating = false;
- break;
- case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
- index = event_type - GAUDI_EVENT_SRAM0_DERR;
- block_address =
- mmSRAM_Y0_X0_BANK_BASE + index * SRAM_BANK_OFFSET;
- snprintf(desc, ARRAY_SIZE(desc), "SRAM%d", index);
- num_memories = 2;
- derr = true;
- disable_clock_gating = false;
- break;
- case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
- index = event_type - GAUDI_EVENT_DMA_IF0_SERR;
- block_address = mmDMA_IF_W_S_BASE +
- index * (mmDMA_IF_E_S_BASE - mmDMA_IF_W_S_BASE);
- snprintf(desc, ARRAY_SIZE(desc), "DMA_IF%d", index);
- num_memories = 60;
- derr = false;
- disable_clock_gating = false;
- break;
- case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
- index = event_type - GAUDI_EVENT_DMA_IF0_DERR;
- block_address = mmDMA_IF_W_S_BASE +
- index * (mmDMA_IF_E_S_BASE - mmDMA_IF_W_S_BASE);
- snprintf(desc, ARRAY_SIZE(desc), "DMA_IF%d", index);
- derr = true;
- num_memories = 60;
- disable_clock_gating = false;
- break;
- case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
- index = event_type - GAUDI_EVENT_HBM_0_SERR;
- /* HBM Registers are at different offsets */
- block_address = mmHBM0_BASE + 0x8000 +
- index * (mmHBM1_BASE - mmHBM0_BASE);
- snprintf(desc, ARRAY_SIZE(desc), "HBM%d", index);
- derr = false;
- num_memories = 64;
- disable_clock_gating = false;
- break;
- case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
- index = event_type - GAUDI_EVENT_HBM_0_SERR;
- /* HBM Registers are at different offsets */
- block_address = mmHBM0_BASE + 0x8000 +
- index * (mmHBM1_BASE - mmHBM0_BASE);
- snprintf(desc, ARRAY_SIZE(desc), "HBM%d", index);
- derr = true;
- num_memories = 64;
- disable_clock_gating = false;
- break;
+ params.block_address =
+ mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
+ params.num_memories = 33;
+ params.derr = true;
+ params.disable_clock_gating = true;
default:
return;
}
- gaudi_print_ecc_info_generic(hdev, desc, block_address, num_memories,
- derr, disable_clock_gating);
+ if (extract_info_from_fw) {
+ ecc_address = le64_to_cpu(ecc_data->ecc_address);
+ ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
+ memory_wrapper_idx = ecc_data->memory_wrapper_idx;
+ } else {
+ rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
+ &ecc_syndrom, &memory_wrapper_idx);
+ if (rc)
+ return;
+ }
+
+ dev_err(hdev->dev,
+ "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
+ ecc_address, ecc_syndrom, memory_wrapper_idx);
}
static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
@@ -5644,8 +5450,6 @@ static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
event_type, desc);
- gaudi_print_ecc_info(hdev, event_type);
-
if (razwi) {
gaudi_print_razwi_info(hdev);
gaudi_print_mmu_error_info(hdev);
@@ -5875,10 +5679,15 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
- fallthrough;
- case GAUDI_EVENT_GIC500:
case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
case GAUDI_EVENT_MMU_DERR:
+ gaudi_print_irq_info(hdev, event_type, true);
+ gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
+ if (hdev->hard_reset_on_fw_events)
+ hl_device_reset(hdev, true, false);
+ break;
+
+ case GAUDI_EVENT_GIC500:
case GAUDI_EVENT_AXI_ECC:
case GAUDI_EVENT_L2_RAM_ECC:
case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
@@ -5974,6 +5783,11 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
fallthrough;
case GAUDI_EVENT_MMU_SERR:
+ gaudi_print_irq_info(hdev, event_type, true);
+ gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
+ hl_fw_unmask_irq(hdev, event_type);
+ break;
+
case GAUDI_EVENT_PCIE_DEC:
case GAUDI_EVENT_MME0_WBC_RSP:
case GAUDI_EVENT_MME0_SBAB0_RSP:
@@ -6458,47 +6272,14 @@ static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
return RREG32(mmHW_STATE);
}
-static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
-{
- return gaudi_cq_assignment[cq_idx];
-}
-
-static void gaudi_ext_queue_init(struct hl_device *hdev, u32 q_idx)
+static int gaudi_ctx_init(struct hl_ctx *ctx)
{
- struct gaudi_device *gaudi = hdev->asic_specific;
- struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx];
- struct hl_hw_sob *hw_sob;
- int sob, ext_idx = gaudi->ext_queue_idx++;
-
- /*
- * The external queues might not sit sequentially, hence use the
- * real external queue index for the SOB/MON base id.
- */
- hw_queue->base_sob_id = ext_idx * HL_RSVD_SOBS;
- hw_queue->base_mon_id = ext_idx * HL_RSVD_MONS;
- hw_queue->next_sob_val = 1;
- hw_queue->curr_sob_offset = 0;
-
- for (sob = 0 ; sob < HL_RSVD_SOBS ; sob++) {
- hw_sob = &hw_queue->hw_sob[sob];
- hw_sob->hdev = hdev;
- hw_sob->sob_id = hw_queue->base_sob_id + sob;
- hw_sob->q_idx = q_idx;
- kref_init(&hw_sob->kref);
- }
+ return 0;
}
-static void gaudi_ext_queue_reset(struct hl_device *hdev, u32 q_idx)
+static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
{
- struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx];
-
- /*
- * In case we got here due to a stuck CS, the refcnt might be bigger
- * than 1 and therefore we reset it.
- */
- kref_init(&hw_queue->hw_sob[hw_queue->curr_sob_offset].kref);
- hw_queue->curr_sob_offset = 0;
- hw_queue->next_sob_val = 1;
+ return gaudi_cq_assignment[cq_idx];
}
static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
@@ -6523,16 +6304,17 @@ static void gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id)
pkt = (struct packet_msg_short *) (uintptr_t) cb->kernel_address;
memset(pkt, 0, sizeof(*pkt));
- value = 1 << GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_SHIFT; /* inc by 1 */
- value |= 1 << GAUDI_PKT_SHORT_VAL_SOB_MOD_SHIFT; /* add mode */
+ /* Inc by 1, Mode ADD */
+ value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
+ value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
- ctl = (sob_id * 4) << GAUDI_PKT_SHORT_CTL_ADDR_SHIFT; /* SOB id */
- ctl |= 0 << GAUDI_PKT_SHORT_CTL_OP_SHIFT; /* write the value */
- ctl |= 3 << GAUDI_PKT_SHORT_CTL_BASE_SHIFT; /* W_S SOB base */
- ctl |= PACKET_MSG_SHORT << GAUDI_PKT_SHORT_CTL_OPCODE_SHIFT;
- ctl |= 1 << GAUDI_PKT_SHORT_CTL_EB_SHIFT;
- ctl |= 1 << GAUDI_PKT_SHORT_CTL_RB_SHIFT;
- ctl |= 1 << GAUDI_PKT_SHORT_CTL_MB_SHIFT;
+ ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
+ ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
+ ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
+ ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
+ ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 1);
+ ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
+ ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
pkt->value = cpu_to_le32(value);
pkt->ctl = cpu_to_le32(ctl);
@@ -6545,12 +6327,12 @@ static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
memset(pkt, 0, pkt_size);
- ctl = addr << GAUDI_PKT_SHORT_CTL_ADDR_SHIFT;
- ctl |= 2 << GAUDI_PKT_SHORT_CTL_BASE_SHIFT; /* W_S MON base */
- ctl |= PACKET_MSG_SHORT << GAUDI_PKT_SHORT_CTL_OPCODE_SHIFT;
- ctl |= 0 << GAUDI_PKT_SHORT_CTL_EB_SHIFT;
- ctl |= 1 << GAUDI_PKT_SHORT_CTL_RB_SHIFT;
- ctl |= 0 << GAUDI_PKT_SHORT_CTL_MB_SHIFT; /* only last pkt needs MB */
+ ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
+ ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
+ ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
+ ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
+ ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
+ ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 0); /* last pkt MB */
pkt->value = cpu_to_le32(value);
pkt->ctl = cpu_to_le32(ctl);
@@ -6566,18 +6348,19 @@ static u32 gaudi_add_arm_monitor_pkt(struct packet_msg_short *pkt, u16 sob_id,
memset(pkt, 0, pkt_size);
- value = (sob_id / 8) << GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_SHIFT;
- value |= sob_val << GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_SHIFT;
- value |= 0 << GAUDI_PKT_SHORT_VAL_MON_MODE_SHIFT; /* GREATER_OR_EQUAL */
- value |= mask << GAUDI_PKT_SHORT_VAL_MON_MASK_SHIFT;
+ value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_id / 8);
+ value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
+ value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
+ 0); /* GREATER OR EQUAL*/
+ value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
- ctl = addr << GAUDI_PKT_SHORT_CTL_ADDR_SHIFT;
- ctl |= 0 << GAUDI_PKT_SHORT_CTL_OP_SHIFT; /* write the value */
- ctl |= 2 << GAUDI_PKT_SHORT_CTL_BASE_SHIFT; /* W_S MON base */
- ctl |= PACKET_MSG_SHORT << GAUDI_PKT_SHORT_CTL_OPCODE_SHIFT;
- ctl |= 0 << GAUDI_PKT_SHORT_CTL_EB_SHIFT;
- ctl |= 1 << GAUDI_PKT_SHORT_CTL_RB_SHIFT;
- ctl |= 1 << GAUDI_PKT_SHORT_CTL_MB_SHIFT;
+ ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
+ ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
+ ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
+ ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
+ ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
+ ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
+ ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
pkt->value = cpu_to_le32(value);
pkt->ctl = cpu_to_le32(ctl);
@@ -6591,15 +6374,14 @@ static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
memset(pkt, 0, pkt_size);
- cfg = 1 << GAUDI_PKT_FENCE_CFG_DEC_VAL_SHIFT;
- cfg |= 1 << GAUDI_PKT_FENCE_CFG_TARGET_VAL_SHIFT;
- cfg |= 2 << GAUDI_PKT_FENCE_CFG_ID_SHIFT;
+ cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
+ cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
+ cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
- ctl = 0 << GAUDI_PKT_FENCE_CTL_PRED_SHIFT;
- ctl |= PACKET_FENCE << GAUDI_PKT_FENCE_CTL_OPCODE_SHIFT;
- ctl |= 0 << GAUDI_PKT_FENCE_CTL_EB_SHIFT;
- ctl |= 1 << GAUDI_PKT_FENCE_CTL_RB_SHIFT;
- ctl |= 1 << GAUDI_PKT_FENCE_CTL_MB_SHIFT;
+ ctl = FIELD_PREP(GAUDI_PKT_FENCE_CTL_OPCODE_MASK, PACKET_FENCE);
+ ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
+ ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
+ ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
pkt->cfg = cpu_to_le32(cfg);
pkt->ctl = cpu_to_le32(ctl);
@@ -6798,13 +6580,12 @@ static const struct hl_asic_funcs gaudi_funcs = {
.rreg = hl_rreg,
.wreg = hl_wreg,
.halt_coresight = gaudi_halt_coresight,
+ .ctx_init = gaudi_ctx_init,
.get_clk_rate = gaudi_get_clk_rate,
.get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
.read_device_fw_version = gaudi_read_device_fw_version,
.load_firmware_to_device = gaudi_load_firmware_to_device,
.load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
- .ext_queue_init = gaudi_ext_queue_init,
- .ext_queue_reset = gaudi_ext_queue_reset,
.get_signal_cb_size = gaudi_get_signal_cb_size,
.get_wait_cb_size = gaudi_get_wait_cb_size,
.gen_signal_cb = gaudi_gen_signal_cb,
@@ -6817,7 +6598,7 @@ static const struct hl_asic_funcs gaudi_funcs = {
/**
* gaudi_set_asic_funcs - set GAUDI function pointers
*
- * @*hdev: pointer to hl_device structure
+ * @hdev: pointer to hl_device structure
*
*/
void gaudi_set_asic_funcs(struct hl_device *hdev)