diff options
Diffstat (limited to 'drivers/misc/habanalabs')
-rw-r--r-- | drivers/misc/habanalabs/command_submission.c | 14 | ||||
-rw-r--r-- | drivers/misc/habanalabs/debugfs.c | 23 | ||||
-rw-r--r-- | drivers/misc/habanalabs/device.c | 2 | ||||
-rw-r--r-- | drivers/misc/habanalabs/firmware_if.c | 10 | ||||
-rw-r--r-- | drivers/misc/habanalabs/gaudi/gaudi.c | 123 | ||||
-rw-r--r-- | drivers/misc/habanalabs/goya/goya.c | 20 | ||||
-rw-r--r-- | drivers/misc/habanalabs/habanalabs.h | 19 | ||||
-rw-r--r-- | drivers/misc/habanalabs/habanalabs_drv.c | 2 | ||||
-rw-r--r-- | drivers/misc/habanalabs/hwmon.c | 19 | ||||
-rw-r--r-- | drivers/misc/habanalabs/sysfs.c | 11 |
10 files changed, 148 insertions, 95 deletions
diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/command_submission.c index b0f62cbbdc87..f3a8f113865d 100644 --- a/drivers/misc/habanalabs/command_submission.c +++ b/drivers/misc/habanalabs/command_submission.c @@ -499,11 +499,19 @@ static int validate_queue_index(struct hl_device *hdev, struct asic_fixed_properties *asic = &hdev->asic_prop; struct hw_queue_properties *hw_queue_prop; + /* This must be checked here to prevent out-of-bounds access to + * hw_queues_props array + */ + if (chunk->queue_index >= HL_MAX_QUEUES) { + dev_err(hdev->dev, "Queue index %d is invalid\n", + chunk->queue_index); + return -EINVAL; + } + hw_queue_prop = &asic->hw_queues_props[chunk->queue_index]; - if ((chunk->queue_index >= HL_MAX_QUEUES) || - (hw_queue_prop->type == QUEUE_TYPE_NA)) { - dev_err(hdev->dev, "Queue index %d is invalid\n", + if (hw_queue_prop->type == QUEUE_TYPE_NA) { + dev_err(hdev->dev, "Queue index %d is not applicable\n", chunk->queue_index); return -EINVAL; } diff --git a/drivers/misc/habanalabs/debugfs.c b/drivers/misc/habanalabs/debugfs.c index fc4372c18ce2..0bc036e01ee8 100644 --- a/drivers/misc/habanalabs/debugfs.c +++ b/drivers/misc/habanalabs/debugfs.c @@ -36,7 +36,7 @@ static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, pkt.i2c_reg = i2c_reg; rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - HL_DEVICE_TIMEOUT_USEC, (long *) val); + 0, (long *) val); if (rc) dev_err(hdev->dev, "Failed to read from I2C, error %d\n", rc); @@ -63,7 +63,7 @@ static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, pkt.value = cpu_to_le64(val); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - HL_DEVICE_TIMEOUT_USEC, NULL); + 0, NULL); if (rc) dev_err(hdev->dev, "Failed to write to I2C, error %d\n", rc); @@ -87,7 +87,7 @@ static void hl_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state) pkt.value = cpu_to_le64(state); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - HL_DEVICE_TIMEOUT_USEC, NULL); + 0, NULL); if (rc) dev_err(hdev->dev, "Failed to set LED %d, error %d\n", led, rc); @@ -981,7 +981,7 @@ static ssize_t hl_clk_gate_read(struct file *f, char __user *buf, if (*ppos) return 0; - sprintf(tmp_buf, "%d\n", hdev->clock_gating); + sprintf(tmp_buf, "0x%llx\n", hdev->clock_gating_mask); rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf, strlen(tmp_buf) + 1); @@ -993,7 +993,7 @@ static ssize_t hl_clk_gate_write(struct file *f, const char __user *buf, { struct hl_dbg_device_entry *entry = file_inode(f)->i_private; struct hl_device *hdev = entry->hdev; - u32 value; + u64 value; ssize_t rc; if (atomic_read(&hdev->in_reset)) { @@ -1002,19 +1002,12 @@ static ssize_t hl_clk_gate_write(struct file *f, const char __user *buf, return 0; } - rc = kstrtouint_from_user(buf, count, 10, &value); + rc = kstrtoull_from_user(buf, count, 16, &value); if (rc) return rc; - if (value) { - hdev->clock_gating = 1; - if (hdev->asic_funcs->enable_clock_gating) - hdev->asic_funcs->enable_clock_gating(hdev); - } else { - if (hdev->asic_funcs->disable_clock_gating) - hdev->asic_funcs->disable_clock_gating(hdev); - hdev->clock_gating = 0; - } + hdev->clock_gating_mask = value; + hdev->asic_funcs->set_clock_gating(hdev); return count; } diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c index 2b38a119704c..59608d1bac88 100644 --- a/drivers/misc/habanalabs/device.c +++ b/drivers/misc/habanalabs/device.c @@ -608,7 +608,7 @@ int hl_device_set_debug_mode(struct hl_device *hdev, bool enable) hdev->in_debug = 0; if (!hdev->hard_reset_pending) - hdev->asic_funcs->enable_clock_gating(hdev); + hdev->asic_funcs->set_clock_gating(hdev); goto out; } diff --git a/drivers/misc/habanalabs/firmware_if.c b/drivers/misc/habanalabs/firmware_if.c index baf790cf4b78..d27841cb5bcb 100644 --- a/drivers/misc/habanalabs/firmware_if.c +++ b/drivers/misc/habanalabs/firmware_if.c @@ -61,7 +61,7 @@ int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode) pkt.ctl = cpu_to_le32(opcode << ARMCP_PKT_CTL_OPCODE_SHIFT); return hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, - sizeof(pkt), HL_DEVICE_TIMEOUT_USEC, NULL); + sizeof(pkt), 0, NULL); } int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, @@ -144,7 +144,7 @@ int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type) pkt.value = cpu_to_le64(event_type); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - HL_DEVICE_TIMEOUT_USEC, &result); + 0, &result); if (rc) dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type); @@ -183,7 +183,7 @@ int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr, ARMCP_PKT_CTL_OPCODE_SHIFT); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt, - total_pkt_size, HL_DEVICE_TIMEOUT_USEC, &result); + total_pkt_size, 0, &result); if (rc) dev_err(hdev->dev, "failed to unmask IRQ array\n"); @@ -204,7 +204,7 @@ int hl_fw_test_cpu_queue(struct hl_device *hdev) test_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &test_pkt, - sizeof(test_pkt), HL_DEVICE_TIMEOUT_USEC, &result); + sizeof(test_pkt), 0, &result); if (!rc) { if (result != ARMCP_PACKET_FENCE_VAL) @@ -248,7 +248,7 @@ int hl_fw_send_heartbeat(struct hl_device *hdev) hb_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &hb_pkt, - sizeof(hb_pkt), HL_DEVICE_TIMEOUT_USEC, &result); + sizeof(hb_pkt), 0, &result); if ((rc) || (result != ARMCP_PACKET_FENCE_VAL)) rc = -EIO; diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 834470d10b46..637a9d608707 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -80,6 +80,7 @@ #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */ +#define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */ #define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9 @@ -98,6 +99,11 @@ #define GAUDI_ARB_WDT_TIMEOUT 0x1000000 +#define GAUDI_CLK_GATE_DEBUGFS_MASK (\ + BIT(GAUDI_ENGINE_ID_MME_0) |\ + BIT(GAUDI_ENGINE_ID_MME_2) |\ + GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0)) + static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = { "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3", "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3", @@ -106,14 +112,14 @@ static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = { }; static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = { - [GAUDI_PCI_DMA_1] = 0, - [GAUDI_PCI_DMA_2] = 1, - [GAUDI_PCI_DMA_3] = 5, - [GAUDI_HBM_DMA_1] = 2, - [GAUDI_HBM_DMA_2] = 3, - [GAUDI_HBM_DMA_3] = 4, - [GAUDI_HBM_DMA_4] = 6, - [GAUDI_HBM_DMA_5] = 7 + [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0, + [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1, + [GAUDI_PCI_DMA_3] = GAUDI_ENGINE_ID_DMA_5, + [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2, + [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3, + [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4, + [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_6, + [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_7 }; static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = { @@ -1819,7 +1825,7 @@ static void gaudi_init_golden_registers(struct hl_device *hdev) gaudi_init_rate_limiter(hdev); - gaudi_disable_clock_gating(hdev); + hdev->asic_funcs->disable_clock_gating(hdev); for (tpc_id = 0, tpc_offset = 0; tpc_id < TPC_NUMBER_OF_ENGINES; @@ -2531,46 +2537,55 @@ static void gaudi_tpc_stall(struct hl_device *hdev) WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); } -static void gaudi_enable_clock_gating(struct hl_device *hdev) +static void gaudi_set_clock_gating(struct hl_device *hdev) { struct gaudi_device *gaudi = hdev->asic_specific; u32 qman_offset; int i; - if (!hdev->clock_gating) - return; - - if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) - return; - /* In case we are during debug session, don't enable the clock gate * as it may interfere */ if (hdev->in_debug) return; - for (i = 0, qman_offset = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) { + for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) { + if (!(hdev->clock_gating_mask & + (BIT_ULL(gaudi_dma_assignment[i])))) + continue; + qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET; WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, QMAN_CGM1_PWR_GATE_EN); WREG32(mmDMA0_QM_CGM_CFG + qman_offset, QMAN_UPPER_CP_CGM_PWR_GATE_EN); } - for (; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) { + for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) { + if (!(hdev->clock_gating_mask & + (BIT_ULL(gaudi_dma_assignment[i])))) + continue; + qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET; WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, QMAN_CGM1_PWR_GATE_EN); WREG32(mmDMA0_QM_CGM_CFG + qman_offset, QMAN_COMMON_CP_CGM_PWR_GATE_EN); } - WREG32(mmMME0_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN); - WREG32(mmMME0_QM_CGM_CFG, - QMAN_COMMON_CP_CGM_PWR_GATE_EN); - WREG32(mmMME2_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN); - WREG32(mmMME2_QM_CGM_CFG, - QMAN_COMMON_CP_CGM_PWR_GATE_EN); + if (hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0))) { + WREG32(mmMME0_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN); + WREG32(mmMME0_QM_CGM_CFG, QMAN_COMMON_CP_CGM_PWR_GATE_EN); + } + + if (hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2))) { + WREG32(mmMME2_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN); + WREG32(mmMME2_QM_CGM_CFG, QMAN_COMMON_CP_CGM_PWR_GATE_EN); + } for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { + if (!(hdev->clock_gating_mask & + (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)))) + continue; + WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, QMAN_CGM1_PWR_GATE_EN); WREG32(mmTPC0_QM_CGM_CFG + qman_offset, @@ -2663,7 +2678,7 @@ static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset) gaudi_stop_hbm_dma_qmans(hdev); gaudi_stop_pci_dma_qmans(hdev); - gaudi_disable_clock_gating(hdev); + hdev->asic_funcs->disable_clock_gating(hdev); msleep(wait_timeout_ms); @@ -3003,7 +3018,7 @@ static int gaudi_hw_init(struct hl_device *hdev) gaudi_init_tpc_qmans(hdev); - gaudi_enable_clock_gating(hdev); + hdev->asic_funcs->set_clock_gating(hdev); gaudi_enable_timestamp(hdev); @@ -3112,7 +3127,9 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset) HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER | - HW_CAP_HBM_SCRAMBLER); + HW_CAP_HBM_SCRAMBLER | + HW_CAP_CLK_GATE); + memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat)); } @@ -3463,6 +3480,9 @@ static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg, return 0; } + if (!timeout) + timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC; + return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len, timeout, result); } @@ -3865,6 +3885,12 @@ static int gaudi_validate_cb(struct hl_device *hdev, rc = -EPERM; break; + case PACKET_WREG_BULK: + dev_err(hdev->dev, + "User not allowed to use WREG_BULK\n"); + rc = -EPERM; + break; + case PACKET_LOAD_AND_EXE: rc = gaudi_validate_load_and_exe_pkt(hdev, parser, (struct packet_load_and_exe *) user_pkt); @@ -3880,7 +3906,6 @@ static int gaudi_validate_cb(struct hl_device *hdev, break; case PACKET_WREG_32: - case PACKET_WREG_BULK: case PACKET_MSG_LONG: case PACKET_MSG_SHORT: case PACKET_REPEAT: @@ -4521,13 +4546,18 @@ static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val) int rc = 0; if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) { - if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) { + + if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && + (hdev->clock_gating_mask & + GAUDI_CLK_GATE_DEBUGFS_MASK)) { + dev_err_ratelimited(hdev->dev, "Can't read register - clock gating is enabled!\n"); rc = -EFAULT; } else { *val = RREG32(addr - CFG_BASE); } + } else if ((addr >= SRAM_BASE_ADDR) && (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) { *val = readl(hdev->pcie_bar[SRAM_BAR_ID] + @@ -4563,13 +4593,18 @@ static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val) int rc = 0; if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) { - if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) { + + if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && + (hdev->clock_gating_mask & + GAUDI_CLK_GATE_DEBUGFS_MASK)) { + dev_err_ratelimited(hdev->dev, "Can't write register - clock gating is enabled!\n"); rc = -EFAULT; } else { WREG32(addr - CFG_BASE, val); } + } else if ((addr >= SRAM_BASE_ADDR) && (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) { writel(val, hdev->pcie_bar[SRAM_BAR_ID] + @@ -4605,7 +4640,11 @@ static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val) int rc = 0; if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) { - if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) { + + if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && + (hdev->clock_gating_mask & + GAUDI_CLK_GATE_DEBUGFS_MASK)) { + dev_err_ratelimited(hdev->dev, "Can't read register - clock gating is enabled!\n"); rc = -EFAULT; @@ -4615,6 +4654,7 @@ static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val) *val = (((u64) val_h) << 32) | val_l; } + } else if ((addr >= SRAM_BASE_ADDR) && (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) { *val = readq(hdev->pcie_bar[SRAM_BAR_ID] + @@ -4651,7 +4691,11 @@ static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val) int rc = 0; if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) { - if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) { + + if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && + (hdev->clock_gating_mask & + GAUDI_CLK_GATE_DEBUGFS_MASK)) { + dev_err_ratelimited(hdev->dev, "Can't write register - clock gating is enabled!\n"); rc = -EFAULT; @@ -4660,6 +4704,7 @@ static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val) WREG32(addr + sizeof(u32) - CFG_BASE, upper_32_bits(val)); } + } else if ((addr >= SRAM_BASE_ADDR) && (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) { writeq(val, hdev->pcie_bar[SRAM_BAR_ID] + @@ -4881,7 +4926,7 @@ static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid) gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid); gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid); - hdev->asic_funcs->enable_clock_gating(hdev); + hdev->asic_funcs->set_clock_gating(hdev); mutex_unlock(&gaudi->clk_gate_mutex); } @@ -5262,7 +5307,7 @@ static void gaudi_print_ecc_info_generic(struct hl_device *hdev, } if (disable_clock_gating) { - hdev->asic_funcs->enable_clock_gating(hdev); + hdev->asic_funcs->set_clock_gating(hdev); mutex_unlock(&gaudi->clk_gate_mutex); } } @@ -5749,7 +5794,7 @@ static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id, /* Clear interrupts */ WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0); - hdev->asic_funcs->enable_clock_gating(hdev); + hdev->asic_funcs->set_clock_gating(hdev); mutex_unlock(&gaudi->clk_gate_mutex); @@ -6265,7 +6310,7 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u32 *mask, if (s) seq_puts(s, "\n"); - hdev->asic_funcs->enable_clock_gating(hdev); + hdev->asic_funcs->set_clock_gating(hdev); mutex_unlock(&gaudi->clk_gate_mutex); @@ -6366,7 +6411,7 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, dev_err(hdev->dev, "Timeout while waiting for TPC%d icache prefetch\n", tpc_id); - hdev->asic_funcs->enable_clock_gating(hdev); + hdev->asic_funcs->set_clock_gating(hdev); mutex_unlock(&gaudi->clk_gate_mutex); return -EIO; } @@ -6395,7 +6440,7 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, 1000, kernel_timeout); - hdev->asic_funcs->enable_clock_gating(hdev); + hdev->asic_funcs->set_clock_gating(hdev); mutex_unlock(&gaudi->clk_gate_mutex); if (rc) { @@ -6736,7 +6781,7 @@ static const struct hl_asic_funcs gaudi_funcs = { .mmu_invalidate_cache = gaudi_mmu_invalidate_cache, .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range, .send_heartbeat = gaudi_send_heartbeat, - .enable_clock_gating = gaudi_enable_clock_gating, + .set_clock_gating = gaudi_set_clock_gating, .disable_clock_gating = gaudi_disable_clock_gating, .debug_coresight = gaudi_debug_coresight, .is_device_idle = gaudi_is_device_idle, diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 0d2952bb58df..88460b2138d8 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -88,6 +88,7 @@ #define GOYA_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100) #define GOYA_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) #define GOYA_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */ +#define GOYA_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */ #define GOYA_QMAN0_FENCE_VAL 0xD169B243 @@ -2830,6 +2831,9 @@ int goya_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len, return 0; } + if (!timeout) + timeout = GOYA_MSG_TO_CPU_TIMEOUT_USEC; + return hl_fw_send_cpu_message(hdev, GOYA_QUEUE_ID_CPU_PQ, msg, len, timeout, result); } @@ -4431,8 +4435,8 @@ static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr, pkt->armcp_pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY << ARMCP_PKT_CTL_OPCODE_SHIFT); - rc = goya_send_cpu_message(hdev, (u32 *) pkt, total_pkt_size, - HL_DEVICE_TIMEOUT_USEC, &result); + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt, + total_pkt_size, 0, &result); if (rc) dev_err(hdev->dev, "failed to unmask IRQ array\n"); @@ -4464,8 +4468,8 @@ static int goya_unmask_irq(struct hl_device *hdev, u16 event_type) ARMCP_PKT_CTL_OPCODE_SHIFT); pkt.value = cpu_to_le64(event_type); - rc = goya_send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - HL_DEVICE_TIMEOUT_USEC, &result); + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + 0, &result); if (rc) dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type); @@ -5028,14 +5032,14 @@ int goya_armcp_info_get(struct hl_device *hdev) return 0; } -static void goya_enable_clock_gating(struct hl_device *hdev) +static void goya_set_clock_gating(struct hl_device *hdev) { - + /* clock gating not supported in Goya */ } static void goya_disable_clock_gating(struct hl_device *hdev) { - + /* clock gating not supported in Goya */ } static bool goya_is_device_idle(struct hl_device *hdev, u32 *mask, @@ -5259,7 +5263,7 @@ static const struct hl_asic_funcs goya_funcs = { .mmu_invalidate_cache = goya_mmu_invalidate_cache, .mmu_invalidate_cache_range = goya_mmu_invalidate_cache_range, .send_heartbeat = goya_send_heartbeat, - .enable_clock_gating = goya_enable_clock_gating, + .set_clock_gating = goya_set_clock_gating, .disable_clock_gating = goya_disable_clock_gating, .debug_coresight = goya_debug_coresight, .is_device_idle = goya_is_device_idle, diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index 1ecdcf8b763a..194d83352696 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -578,8 +578,9 @@ enum hl_pll_frequency { * @mmu_invalidate_cache_range: flush specific MMU STLB cache lines with * ASID-VA-size mask. * @send_heartbeat: send is-alive packet to ArmCP and verify response. - * @enable_clock_gating: enable clock gating for reducing power consumption. - * @disable_clock_gating: disable clock for accessing registers on HBW. + * @set_clock_gating: enable/disable clock gating per engine according to + * clock gating mask in hdev + * @disable_clock_gating: disable clock gating completely * @debug_coresight: perform certain actions on Coresight for debugging. * @is_device_idle: return true if device is idle, false otherwise. * @soft_reset_late_init: perform certain actions needed after soft reset. @@ -587,7 +588,11 @@ enum hl_pll_frequency { * @hw_queues_unlock: release H/W queues lock. * @get_pci_id: retrieve PCI ID. * @get_eeprom_data: retrieve EEPROM data from F/W. - * @send_cpu_message: send buffer to ArmCP. + * @send_cpu_message: send message to F/W. If the message is timedout, the + * driver will eventually reset the device. The timeout can + * be determined by the calling function or it can be 0 and + * then the timeout is the default timeout for the specific + * ASIC * @get_hw_state: retrieve the H/W state * @pci_bars_map: Map PCI BARs. * @set_dram_bar_base: Set DRAM BAR to map specific device address. Returns @@ -680,7 +685,7 @@ struct hl_asic_funcs { int (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard, u32 asid, u64 va, u64 size); int (*send_heartbeat)(struct hl_device *hdev); - void (*enable_clock_gating)(struct hl_device *hdev); + void (*set_clock_gating)(struct hl_device *hdev); void (*disable_clock_gating)(struct hl_device *hdev); int (*debug_coresight)(struct hl_device *hdev, void *data); bool (*is_device_idle)(struct hl_device *hdev, u32 *mask, @@ -1398,6 +1403,9 @@ struct hl_device_idle_busy_ts { * @max_power: the max power of the device, as configured by the sysadmin. This * value is saved so in case of hard-reset, the driver will restore * this value and update the F/W after the re-initialization + * @clock_gating_mask: is clock gating enabled. bitmask that represents the + * different engines. See debugfs-driver-habanalabs for + * details. * @in_reset: is device in reset flow. * @curr_pll_profile: current PLL profile. * @cs_active_cnt: number of active command submissions on this device (active @@ -1425,7 +1433,6 @@ struct hl_device_idle_busy_ts { * @init_done: is the initialization of the device done. * @mmu_enable: is MMU enabled. * @mmu_huge_page_opt: is MMU huge pages optimization enabled. - * @clock_gating: is clock gating enabled. * @device_cpu_disabled: is the device CPU disabled (due to timeouts) * @dma_mask: the dma mask that was set for this device * @in_debug: is device under debug. This, together with fpriv_list, enforces @@ -1493,6 +1500,7 @@ struct hl_device { atomic64_t dram_used_mem; u64 timeout_jiffies; u64 max_power; + u64 clock_gating_mask; atomic_t in_reset; enum hl_pll_frequency curr_pll_profile; int cs_active_cnt; @@ -1514,7 +1522,6 @@ struct hl_device { u8 dram_default_page_mapping; u8 pmmu_huge_range; u8 init_done; - u8 clock_gating; u8 device_cpu_disabled; u8 dma_mask; u8 in_debug; diff --git a/drivers/misc/habanalabs/habanalabs_drv.c b/drivers/misc/habanalabs/habanalabs_drv.c index 8652c7e5d7f1..22716da9f85f 100644 --- a/drivers/misc/habanalabs/habanalabs_drv.c +++ b/drivers/misc/habanalabs/habanalabs_drv.c @@ -232,7 +232,7 @@ static void set_driver_behavior_per_device(struct hl_device *hdev) hdev->fw_loading = 1; hdev->cpu_queues_enable = 1; hdev->heartbeat = 1; - hdev->clock_gating = 1; + hdev->clock_gating_mask = ULONG_MAX; hdev->reset_pcilink = 0; hdev->axi_drain = 0; diff --git a/drivers/misc/habanalabs/hwmon.c b/drivers/misc/habanalabs/hwmon.c index 8c6cd77e6af6..b997336fa75f 100644 --- a/drivers/misc/habanalabs/hwmon.c +++ b/drivers/misc/habanalabs/hwmon.c @@ -10,7 +10,6 @@ #include <linux/pci.h> #include <linux/hwmon.h> -#define SENSORS_PKT_TIMEOUT 1000000 /* 1s */ #define HWMON_NR_SENSOR_TYPES (hwmon_pwm + 1) int hl_build_hwmon_channel_info(struct hl_device *hdev, @@ -323,7 +322,7 @@ int hl_get_temperature(struct hl_device *hdev, pkt.type = __cpu_to_le16(attr); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, value); + 0, value); if (rc) { dev_err(hdev->dev, @@ -350,7 +349,7 @@ int hl_set_temperature(struct hl_device *hdev, pkt.value = __cpu_to_le64(value); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, NULL); + 0, NULL); if (rc) dev_err(hdev->dev, @@ -374,7 +373,7 @@ int hl_get_voltage(struct hl_device *hdev, pkt.type = __cpu_to_le16(attr); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, value); + 0, value); if (rc) { dev_err(hdev->dev, @@ -400,7 +399,7 @@ int hl_get_current(struct hl_device *hdev, pkt.type = __cpu_to_le16(attr); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, value); + 0, value); if (rc) { dev_err(hdev->dev, @@ -426,7 +425,7 @@ int hl_get_fan_speed(struct hl_device *hdev, pkt.type = __cpu_to_le16(attr); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, value); + 0, value); if (rc) { dev_err(hdev->dev, @@ -452,7 +451,7 @@ int hl_get_pwm_info(struct hl_device *hdev, pkt.type = __cpu_to_le16(attr); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, value); + 0, value); if (rc) { dev_err(hdev->dev, @@ -479,7 +478,7 @@ void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, pkt.value = cpu_to_le64(value); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, NULL); + 0, NULL); if (rc) dev_err(hdev->dev, @@ -502,7 +501,7 @@ int hl_set_voltage(struct hl_device *hdev, pkt.value = __cpu_to_le64(value); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, NULL); + 0, NULL); if (rc) dev_err(hdev->dev, @@ -527,7 +526,7 @@ int hl_set_current(struct hl_device *hdev, pkt.value = __cpu_to_le64(value); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, NULL); + 0, NULL); if (rc) dev_err(hdev->dev, diff --git a/drivers/misc/habanalabs/sysfs.c b/drivers/misc/habanalabs/sysfs.c index 5d78d5e1c782..70b6b1863c2e 100644 --- a/drivers/misc/habanalabs/sysfs.c +++ b/drivers/misc/habanalabs/sysfs.c @@ -9,9 +9,6 @@ #include <linux/pci.h> -#define SET_CLK_PKT_TIMEOUT 1000000 /* 1s */ -#define SET_PWR_PKT_TIMEOUT 1000000 /* 1s */ - long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) { struct armcp_packet pkt; @@ -29,7 +26,7 @@ long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) pkt.pll_index = cpu_to_le32(pll_index); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SET_CLK_PKT_TIMEOUT, &result); + 0, &result); if (rc) { dev_err(hdev->dev, @@ -54,7 +51,7 @@ void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq) pkt.value = cpu_to_le64(freq); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SET_CLK_PKT_TIMEOUT, NULL); + 0, NULL); if (rc) dev_err(hdev->dev, @@ -74,7 +71,7 @@ u64 hl_get_max_power(struct hl_device *hdev) ARMCP_PKT_CTL_OPCODE_SHIFT); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SET_PWR_PKT_TIMEOUT, &result); + 0, &result); if (rc) { dev_err(hdev->dev, "Failed to get max power, error %d\n", rc); @@ -96,7 +93,7 @@ void hl_set_max_power(struct hl_device *hdev, u64 value) pkt.value = cpu_to_le64(value); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SET_PWR_PKT_TIMEOUT, NULL); + 0, NULL); if (rc) dev_err(hdev->dev, "Failed to set max power, error %d\n", rc); |