diff options
Diffstat (limited to 'drivers/misc/habanalabs')
27 files changed, 1644 insertions, 766 deletions
diff --git a/drivers/misc/habanalabs/Kconfig b/drivers/misc/habanalabs/Kconfig index 861c81006c6d..bd01d0d940c0 100644 --- a/drivers/misc/habanalabs/Kconfig +++ b/drivers/misc/habanalabs/Kconfig @@ -10,6 +10,7 @@ config HABANA_AI select HWMON select DMA_SHARED_BUFFER select CRC32 + select FW_LOADER help Enables PCIe card driver for Habana's AI Processors (AIP) that are designed to accelerate Deep Learning inference and training workloads. diff --git a/drivers/misc/habanalabs/Makefile b/drivers/misc/habanalabs/Makefile index b35d7000c86b..a48a9e0969ed 100644 --- a/drivers/misc/habanalabs/Makefile +++ b/drivers/misc/habanalabs/Makefile @@ -8,13 +8,13 @@ obj-$(CONFIG_HABANA_AI) := habanalabs.o include $(src)/common/Makefile habanalabs-y += $(HL_COMMON_FILES) -include $(src)/goya/Makefile -habanalabs-y += $(HL_GOYA_FILES) +include $(src)/gaudi2/Makefile +habanalabs-y += $(HL_GAUDI2_FILES) include $(src)/gaudi/Makefile habanalabs-y += $(HL_GAUDI_FILES) -include $(src)/gaudi2/Makefile -habanalabs-y += $(HL_GAUDI2_FILES) +include $(src)/goya/Makefile +habanalabs-y += $(HL_GOYA_FILES) habanalabs-$(CONFIG_DEBUG_FS) += common/debugfs.o diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c index b027f66f8bd4..2b332991ac6a 100644 --- a/drivers/misc/habanalabs/common/command_buffer.c +++ b/drivers/misc/habanalabs/common/command_buffer.c @@ -12,20 +12,18 @@ #include <linux/slab.h> #include <linux/uaccess.h> +#define CB_VA_POOL_SIZE (4UL * SZ_1G) + static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb) { struct hl_device *hdev = ctx->hdev; struct asic_fixed_properties *prop = &hdev->asic_prop; - struct hl_vm_va_block *va_block, *tmp; - dma_addr_t bus_addr; - u64 virt_addr; u32 page_size = prop->pmmu.page_size; - s32 offset; int rc; if (!hdev->supports_cb_mapping) { dev_err_ratelimited(hdev->dev, - "Cannot map CB because no VA range is allocated for CB mapping\n"); + "Mapping a CB to the device's MMU is not supported\n"); return -EINVAL; } @@ -35,106 +33,45 @@ static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb) return -EINVAL; } - INIT_LIST_HEAD(&cb->va_block_list); - - for (bus_addr = cb->bus_address; - bus_addr < cb->bus_address + cb->size; - bus_addr += page_size) { - - virt_addr = (u64) gen_pool_alloc(ctx->cb_va_pool, page_size); - if (!virt_addr) { - dev_err(hdev->dev, - "Failed to allocate device virtual address for CB\n"); - rc = -ENOMEM; - goto err_va_pool_free; - } + if (cb->is_mmu_mapped) + return 0; - va_block = kzalloc(sizeof(*va_block), GFP_KERNEL); - if (!va_block) { - rc = -ENOMEM; - gen_pool_free(ctx->cb_va_pool, virt_addr, page_size); - goto err_va_pool_free; - } + cb->roundup_size = roundup(cb->size, page_size); - va_block->start = virt_addr; - va_block->end = virt_addr + page_size - 1; - va_block->size = page_size; - list_add_tail(&va_block->node, &cb->va_block_list); + cb->virtual_addr = (u64) gen_pool_alloc(ctx->cb_va_pool, cb->roundup_size); + if (!cb->virtual_addr) { + dev_err(hdev->dev, "Failed to allocate device virtual address for CB\n"); + return -ENOMEM; } - mutex_lock(&ctx->mmu_lock); - - bus_addr = cb->bus_address; - offset = 0; - list_for_each_entry(va_block, &cb->va_block_list, node) { - rc = hl_mmu_map_page(ctx, va_block->start, bus_addr, - va_block->size, list_is_last(&va_block->node, - &cb->va_block_list)); - if (rc) { - dev_err(hdev->dev, "Failed to map VA %#llx to CB\n", - va_block->start); - goto err_va_umap; - } - - bus_addr += va_block->size; - offset += va_block->size; + mutex_lock(&hdev->mmu_lock); + rc = hl_mmu_map_contiguous(ctx, cb->virtual_addr, cb->bus_address, cb->roundup_size); + if (rc) { + dev_err(hdev->dev, "Failed to map VA %#llx to CB\n", cb->virtual_addr); + goto err_va_umap; } - rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV); - - mutex_unlock(&ctx->mmu_lock); + mutex_unlock(&hdev->mmu_lock); cb->is_mmu_mapped = true; - return rc; err_va_umap: - list_for_each_entry(va_block, &cb->va_block_list, node) { - if (offset <= 0) - break; - hl_mmu_unmap_page(ctx, va_block->start, va_block->size, - offset <= va_block->size); - offset -= va_block->size; - } - - rc = hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); - - mutex_unlock(&ctx->mmu_lock); - -err_va_pool_free: - list_for_each_entry_safe(va_block, tmp, &cb->va_block_list, node) { - gen_pool_free(ctx->cb_va_pool, va_block->start, va_block->size); - list_del(&va_block->node); - kfree(va_block); - } - + mutex_unlock(&hdev->mmu_lock); + gen_pool_free(ctx->cb_va_pool, cb->virtual_addr, cb->roundup_size); return rc; } static void cb_unmap_mem(struct hl_ctx *ctx, struct hl_cb *cb) { struct hl_device *hdev = ctx->hdev; - struct hl_vm_va_block *va_block, *tmp; - - mutex_lock(&ctx->mmu_lock); - - list_for_each_entry(va_block, &cb->va_block_list, node) - if (hl_mmu_unmap_page(ctx, va_block->start, va_block->size, - list_is_last(&va_block->node, - &cb->va_block_list))) - dev_warn_ratelimited(hdev->dev, - "Failed to unmap CB's va 0x%llx\n", - va_block->start); + mutex_lock(&hdev->mmu_lock); + hl_mmu_unmap_contiguous(ctx, cb->virtual_addr, cb->roundup_size); hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); + mutex_unlock(&hdev->mmu_lock); - mutex_unlock(&ctx->mmu_lock); - - list_for_each_entry_safe(va_block, tmp, &cb->va_block_list, node) { - gen_pool_free(ctx->cb_va_pool, va_block->start, va_block->size); - list_del(&va_block->node); - kfree(va_block); - } + gen_pool_free(ctx->cb_va_pool, cb->virtual_addr, cb->roundup_size); } static void cb_fini(struct hl_device *hdev, struct hl_cb *cb) @@ -376,7 +313,6 @@ int hl_cb_destroy(struct hl_mem_mgr *mmg, u64 cb_handle) static int hl_cb_info(struct hl_mem_mgr *mmg, u64 handle, u32 flags, u32 *usage_cnt, u64 *device_va) { - struct hl_vm_va_block *va_block; struct hl_cb *cb; int rc = 0; @@ -388,9 +324,8 @@ static int hl_cb_info(struct hl_mem_mgr *mmg, } if (flags & HL_CB_FLAGS_GET_DEVICE_VA) { - va_block = list_first_entry(&cb->va_block_list, struct hl_vm_va_block, node); - if (va_block) { - *device_va = va_block->start; + if (cb->is_mmu_mapped) { + *device_va = cb->virtual_addr; } else { dev_err(mmg->dev, "CB is not mapped to the device's MMU\n"); rc = -EINVAL; @@ -566,16 +501,23 @@ int hl_cb_va_pool_init(struct hl_ctx *ctx) return -ENOMEM; } - rc = gen_pool_add(ctx->cb_va_pool, prop->cb_va_start_addr, - prop->cb_va_end_addr - prop->cb_va_start_addr, -1); + ctx->cb_va_pool_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST, + CB_VA_POOL_SIZE, HL_MMU_VA_ALIGNMENT_NOT_NEEDED); + if (!ctx->cb_va_pool_base) { + rc = -ENOMEM; + goto err_pool_destroy; + } + rc = gen_pool_add(ctx->cb_va_pool, ctx->cb_va_pool_base, CB_VA_POOL_SIZE, -1); if (rc) { dev_err(hdev->dev, "Failed to add memory to VA gen pool for CB mapping\n"); - goto err_pool_destroy; + goto err_unreserve_va_block; } return 0; +err_unreserve_va_block: + hl_unreserve_va_block(hdev, ctx, ctx->cb_va_pool_base, CB_VA_POOL_SIZE); err_pool_destroy: gen_pool_destroy(ctx->cb_va_pool); @@ -590,4 +532,5 @@ void hl_cb_va_pool_fini(struct hl_ctx *ctx) return; gen_pool_destroy(ctx->cb_va_pool); + hl_unreserve_va_block(hdev, ctx, ctx->cb_va_pool_base, CB_VA_POOL_SIZE); } diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index 90a4574cbe2d..fa05770865c6 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -12,7 +12,9 @@ #include <linux/slab.h> #define HL_CS_FLAGS_TYPE_MASK (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \ - HL_CS_FLAGS_COLLECTIVE_WAIT) + HL_CS_FLAGS_COLLECTIVE_WAIT | HL_CS_FLAGS_RESERVE_SIGNALS_ONLY | \ + HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY | HL_CS_FLAGS_ENGINE_CORE_COMMAND) + #define MAX_TS_ITER_NUM 10 @@ -824,10 +826,10 @@ static void cs_timedout(struct work_struct *work) } /* Save only the first CS timeout parameters */ - rc = atomic_cmpxchg(&hdev->last_error.cs_timeout.write_enable, 1, 0); + rc = atomic_cmpxchg(&hdev->captured_err_info.cs_timeout.write_enable, 1, 0); if (rc) { - hdev->last_error.cs_timeout.timestamp = ktime_get(); - hdev->last_error.cs_timeout.seq = cs->sequence; + hdev->captured_err_info.cs_timeout.timestamp = ktime_get(); + hdev->captured_err_info.cs_timeout.seq = cs->sequence; event_mask = device_reset ? (HL_NOTIFIER_EVENT_CS_TIMEOUT | HL_NOTIFIER_EVENT_DEVICE_RESET) : HL_NOTIFIER_EVENT_CS_TIMEOUT; @@ -1242,6 +1244,8 @@ static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags) return CS_RESERVE_SIGNALS; else if (cs_type_flags & HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY) return CS_UNRESERVE_SIGNALS; + else if (cs_type_flags & HL_CS_FLAGS_ENGINE_CORE_COMMAND) + return CS_TYPE_ENGINE_CORE; else return CS_TYPE_DEFAULT; } @@ -1253,6 +1257,7 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args) u32 cs_type_flags, num_chunks; enum hl_device_status status; enum hl_cs_type cs_type; + bool is_sync_stream; if (!hl_device_operational(hdev, &status)) { return -EBUSY; @@ -1276,9 +1281,10 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args) cs_type = hl_cs_get_cs_type(cs_type_flags); num_chunks = args->in.num_chunks_execute; - if (unlikely((cs_type == CS_TYPE_SIGNAL || cs_type == CS_TYPE_WAIT || - cs_type == CS_TYPE_COLLECTIVE_WAIT) && - !hdev->supports_sync_stream)) { + is_sync_stream = (cs_type == CS_TYPE_SIGNAL || cs_type == CS_TYPE_WAIT || + cs_type == CS_TYPE_COLLECTIVE_WAIT); + + if (unlikely(is_sync_stream && !hdev->supports_sync_stream)) { dev_err(hdev->dev, "Sync stream CS is not supported\n"); return -EINVAL; } @@ -1288,7 +1294,7 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args) dev_err(hdev->dev, "Got execute CS with 0 chunks, context %d\n", ctx->asid); return -EINVAL; } - } else if (num_chunks != 1) { + } else if (is_sync_stream && num_chunks != 1) { dev_err(hdev->dev, "Sync stream CS mandates one chunk only, context %d\n", ctx->asid); @@ -1584,13 +1590,14 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args, struct hl_device *hdev = hpriv->hdev; struct hl_ctx *ctx = hpriv->ctx; bool need_soft_reset = false; - int rc = 0, do_ctx_switch; + int rc = 0, do_ctx_switch = 0; void __user *chunks; u32 num_chunks, tmp; u16 sob_count; int ret; - do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0); + if (hdev->supports_ctx_switch) + do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0); if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) { mutex_lock(&hpriv->restore_phase_mutex); @@ -1661,9 +1668,10 @@ wait_again: } } - ctx->thread_ctx_switch_wait_token = 1; + if (hdev->supports_ctx_switch) + ctx->thread_ctx_switch_wait_token = 1; - } else if (!ctx->thread_ctx_switch_wait_token) { + } else if (hdev->supports_ctx_switch && !ctx->thread_ctx_switch_wait_token) { rc = hl_poll_timeout_memory(hdev, &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1), 100, jiffies_to_usecs(hdev->timeout_jiffies), false); @@ -2351,6 +2359,41 @@ out: return rc; } +static int cs_ioctl_engine_cores(struct hl_fpriv *hpriv, u64 engine_cores, + u32 num_engine_cores, u32 core_command) +{ + int rc; + struct hl_device *hdev = hpriv->hdev; + void __user *engine_cores_arr; + u32 *cores; + + if (!num_engine_cores || num_engine_cores > hdev->asic_prop.num_engine_cores) { + dev_err(hdev->dev, "Number of engine cores %d is invalid\n", num_engine_cores); + return -EINVAL; + } + + if (core_command != HL_ENGINE_CORE_RUN && core_command != HL_ENGINE_CORE_HALT) { + dev_err(hdev->dev, "Engine core command is invalid\n"); + return -EINVAL; + } + + engine_cores_arr = (void __user *) (uintptr_t) engine_cores; + cores = kmalloc_array(num_engine_cores, sizeof(u32), GFP_KERNEL); + if (!cores) + return -ENOMEM; + + if (copy_from_user(cores, engine_cores_arr, num_engine_cores * sizeof(u32))) { + dev_err(hdev->dev, "Failed to copy core-ids array from user\n"); + kfree(cores); + return -EFAULT; + } + + rc = hdev->asic_funcs->set_engine_cores(hdev, cores, num_engine_cores, core_command); + kfree(cores); + + return rc; +} + int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) { union hl_cs_args *args = data; @@ -2403,6 +2446,10 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) rc = cs_ioctl_unreserve_signals(hpriv, args->in.encaps_sig_handle_id); break; + case CS_TYPE_ENGINE_CORE: + rc = cs_ioctl_engine_cores(hpriv, args->in.engine_cores, + args->in.num_engine_cores, args->in.core_command); + break; default: rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq, args->in.cs_flags, @@ -2524,7 +2571,7 @@ static int hl_cs_poll_fences(struct multi_cs_data *mcs_data, struct multi_cs_com ktime_t max_ktime, first_cs_time; enum hl_cs_wait_status status; - memset(fence_ptr, 0, arr_len * sizeof(*fence_ptr)); + memset(fence_ptr, 0, arr_len * sizeof(struct hl_fence *)); /* get all fences under the same lock */ rc = hl_ctx_get_fences(mcs_data->ctx, seq_arr, fence_ptr, arr_len); @@ -2826,7 +2873,7 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) } /* allocate array for the fences */ - fence_arr = kmalloc_array(seq_arr_len, sizeof(*fence_arr), GFP_KERNEL); + fence_arr = kmalloc_array(seq_arr_len, sizeof(struct hl_fence *), GFP_KERNEL); if (!fence_arr) { rc = -ENOMEM; goto free_seq_arr; diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c index 64439f33a19b..48d3ec8b5c82 100644 --- a/drivers/misc/habanalabs/common/debugfs.c +++ b/drivers/misc/habanalabs/common/debugfs.c @@ -291,14 +291,16 @@ static int vm_show(struct seq_file *s, void *data) if (ctx->asid != HL_KERNEL_ASID_ID && !list_empty(&ctx->hw_block_mem_list)) { seq_puts(s, "\nhw_block mappings:\n\n"); - seq_puts(s, " virtual address size HW block id\n"); - seq_puts(s, "-------------------------------------------\n"); + seq_puts(s, + " virtual address block size mapped size HW block id\n"); + seq_puts(s, + "---------------------------------------------------------------\n"); mutex_lock(&ctx->hw_block_list_lock); - list_for_each_entry(lnode, &ctx->hw_block_mem_list, - node) { + list_for_each_entry(lnode, &ctx->hw_block_mem_list, node) { seq_printf(s, - " 0x%-14lx %-6u %-9u\n", - lnode->vaddr, lnode->size, lnode->id); + " 0x%-14lx %-6u %-6u %-9u\n", + lnode->vaddr, lnode->block_size, lnode->mapped_size, + lnode->id); } mutex_unlock(&ctx->hw_block_list_lock); } @@ -591,6 +593,7 @@ static int engines_show(struct seq_file *s, void *data) struct hl_debugfs_entry *entry = s->private; struct hl_dbg_device_entry *dev_entry = entry->dev_entry; struct hl_device *hdev = dev_entry->hdev; + struct engines_data eng_data; if (hdev->reset_info.in_reset) { dev_warn_ratelimited(hdev->dev, @@ -598,7 +601,25 @@ static int engines_show(struct seq_file *s, void *data) return 0; } - hdev->asic_funcs->is_device_idle(hdev, NULL, 0, s); + eng_data.actual_size = 0; + eng_data.allocated_buf_size = HL_ENGINES_DATA_MAX_SIZE; + eng_data.buf = vmalloc(eng_data.allocated_buf_size); + if (!eng_data.buf) + return -ENOMEM; + + hdev->asic_funcs->is_device_idle(hdev, NULL, 0, &eng_data); + + if (eng_data.actual_size > eng_data.allocated_buf_size) { + dev_err(hdev->dev, + "Engines data size (%d Bytes) is bigger than allocated size (%u Bytes)\n", + eng_data.actual_size, eng_data.allocated_buf_size); + vfree(eng_data.buf); + return -ENOMEM; + } + + seq_write(s, eng_data.buf, eng_data.actual_size); + + vfree(eng_data.buf); return 0; } diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index b30aeb1c657f..233d8b46c831 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -13,6 +13,8 @@ #include <linux/pci.h> #include <linux/hwmon.h> +#include <trace/events/habanalabs.h> + #define HL_RESET_DELAY_USEC 10000 /* 10ms */ enum dma_alloc_type { @@ -26,8 +28,9 @@ enum dma_alloc_type { /* * hl_set_dram_bar- sets the bar to allow later access to address * - * @hdev: pointer to habanalabs device structure + * @hdev: pointer to habanalabs device structure. * @addr: the address the caller wants to access. + * @region: the PCI region. * * @return: the old BAR base address on success, U64_MAX for failure. * The caller should set it back to the old address after use. @@ -37,58 +40,64 @@ enum dma_alloc_type { * This function can be called also if the bar doesn't need to be set, * in that case it just won't change the base. */ -static uint64_t hl_set_dram_bar(struct hl_device *hdev, u64 addr) +static u64 hl_set_dram_bar(struct hl_device *hdev, u64 addr, struct pci_mem_region *region) { struct asic_fixed_properties *prop = &hdev->asic_prop; - u64 bar_base_addr; + u64 bar_base_addr, old_base; - bar_base_addr = addr & ~(prop->dram_pci_bar_size - 0x1ull); + if (is_power_of_2(prop->dram_pci_bar_size)) + bar_base_addr = addr & ~(prop->dram_pci_bar_size - 0x1ull); + else + bar_base_addr = DIV_ROUND_DOWN_ULL(addr, prop->dram_pci_bar_size) * + prop->dram_pci_bar_size; - return hdev->asic_funcs->set_dram_bar_base(hdev, bar_base_addr); -} + old_base = hdev->asic_funcs->set_dram_bar_base(hdev, bar_base_addr); + /* in case of success we need to update the new BAR base */ + if (old_base != U64_MAX) + region->region_base = bar_base_addr; + + return old_base; +} static int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val, enum debugfs_access_type acc_type, enum pci_region region_type) { struct pci_mem_region *region = &hdev->pci_mem_region[region_type]; + void __iomem *acc_addr; u64 old_base = 0, rc; if (region_type == PCI_REGION_DRAM) { - old_base = hl_set_dram_bar(hdev, addr); + old_base = hl_set_dram_bar(hdev, addr, region); if (old_base == U64_MAX) return -EIO; } + acc_addr = hdev->pcie_bar[region->bar_id] + addr - region->region_base + + region->offset_in_bar; switch (acc_type) { case DEBUGFS_READ8: - *val = readb(hdev->pcie_bar[region->bar_id] + - addr - region->region_base + region->offset_in_bar); + *val = readb(acc_addr); break; case DEBUGFS_WRITE8: - writeb(*val, hdev->pcie_bar[region->bar_id] + - addr - region->region_base + region->offset_in_bar); + writeb(*val, acc_addr); break; case DEBUGFS_READ32: - *val = readl(hdev->pcie_bar[region->bar_id] + - addr - region->region_base + region->offset_in_bar); + *val = readl(acc_addr); break; case DEBUGFS_WRITE32: - writel(*val, hdev->pcie_bar[region->bar_id] + - addr - region->region_base + region->offset_in_bar); + writel(*val, acc_addr); break; case DEBUGFS_READ64: - *val = readq(hdev->pcie_bar[region->bar_id] + - addr - region->region_base + region->offset_in_bar); + *val = readq(acc_addr); break; case DEBUGFS_WRITE64: - writeq(*val, hdev->pcie_bar[region->bar_id] + - addr - region->region_base + region->offset_in_bar); + writeq(*val, acc_addr); break; } if (region_type == PCI_REGION_DRAM) { - rc = hl_set_dram_bar(hdev, old_base); + rc = hl_set_dram_bar(hdev, old_base, region); if (rc == U64_MAX) return -EIO; } @@ -97,9 +106,10 @@ static int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val } static void *hl_dma_alloc_common(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle, - gfp_t flag, enum dma_alloc_type alloc_type) + gfp_t flag, enum dma_alloc_type alloc_type, + const char *caller) { - void *ptr; + void *ptr = NULL; switch (alloc_type) { case DMA_ALLOC_COHERENT: @@ -113,11 +123,16 @@ static void *hl_dma_alloc_common(struct hl_device *hdev, size_t size, dma_addr_t break; } + if (trace_habanalabs_dma_alloc_enabled() && !ZERO_OR_NULL_PTR(ptr)) + trace_habanalabs_dma_alloc(hdev->dev, (u64) (uintptr_t) ptr, *dma_handle, size, + caller); + return ptr; } static void hl_asic_dma_free_common(struct hl_device *hdev, size_t size, void *cpu_addr, - dma_addr_t dma_handle, enum dma_alloc_type alloc_type) + dma_addr_t dma_handle, enum dma_alloc_type alloc_type, + const char *caller) { switch (alloc_type) { case DMA_ALLOC_COHERENT: @@ -130,39 +145,44 @@ static void hl_asic_dma_free_common(struct hl_device *hdev, size_t size, void *c hdev->asic_funcs->asic_dma_pool_free(hdev, cpu_addr, dma_handle); break; } + + trace_habanalabs_dma_free(hdev->dev, (u64) (uintptr_t) cpu_addr, dma_handle, size, caller); } -void *hl_asic_dma_alloc_coherent(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle, - gfp_t flag) +void *hl_asic_dma_alloc_coherent_caller(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle, + gfp_t flag, const char *caller) { - return hl_dma_alloc_common(hdev, size, dma_handle, flag, DMA_ALLOC_COHERENT); + return hl_dma_alloc_common(hdev, size, dma_handle, flag, DMA_ALLOC_COHERENT, caller); } -void hl_asic_dma_free_coherent(struct hl_device *hdev, size_t size, void *cpu_addr, - dma_addr_t dma_handle) +void hl_asic_dma_free_coherent_caller(struct hl_device *hdev, size_t size, void *cpu_addr, + dma_addr_t dma_handle, const char *caller) { - hl_asic_dma_free_common(hdev, size, cpu_addr, dma_handle, DMA_ALLOC_COHERENT); + hl_asic_dma_free_common(hdev, size, cpu_addr, dma_handle, DMA_ALLOC_COHERENT, caller); } -void *hl_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle) +void *hl_cpu_accessible_dma_pool_alloc_caller(struct hl_device *hdev, size_t size, + dma_addr_t *dma_handle, const char *caller) { - return hl_dma_alloc_common(hdev, size, dma_handle, 0, DMA_ALLOC_CPU_ACCESSIBLE); + return hl_dma_alloc_common(hdev, size, dma_handle, 0, DMA_ALLOC_CPU_ACCESSIBLE, caller); } -void hl_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr) +void hl_cpu_accessible_dma_pool_free_caller(struct hl_device *hdev, size_t size, void *vaddr, + const char *caller) { - hl_asic_dma_free_common(hdev, size, vaddr, 0, DMA_ALLOC_CPU_ACCESSIBLE); + hl_asic_dma_free_common(hdev, size, vaddr, 0, DMA_ALLOC_CPU_ACCESSIBLE, caller); } -void *hl_asic_dma_pool_zalloc(struct hl_device *hdev, size_t size, gfp_t mem_flags, - dma_addr_t *dma_handle) +void *hl_asic_dma_pool_zalloc_caller(struct hl_device *hdev, size_t size, gfp_t mem_flags, + dma_addr_t *dma_handle, const char *caller) { - return hl_dma_alloc_common(hdev, size, dma_handle, mem_flags, DMA_ALLOC_POOL); + return hl_dma_alloc_common(hdev, size, dma_handle, mem_flags, DMA_ALLOC_POOL, caller); } -void hl_asic_dma_pool_free(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr) +void hl_asic_dma_pool_free_caller(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr, + const char *caller) { - hl_asic_dma_free_common(hdev, 0, vaddr, dma_addr, DMA_ALLOC_POOL); + hl_asic_dma_free_common(hdev, 0, vaddr, dma_addr, DMA_ALLOC_POOL, caller); } int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir) @@ -267,6 +287,30 @@ int hl_access_dev_mem(struct hl_device *hdev, enum pci_region region_type, return 0; } +void hl_engine_data_sprintf(struct engines_data *e, const char *fmt, ...) +{ + va_list args; + int str_size; + + va_start(args, fmt); + /* Calculate formatted string length. Assuming each string is null terminated, hence + * increment result by 1 + */ + str_size = vsnprintf(NULL, 0, fmt, args) + 1; + va_end(args); + + if ((e->actual_size + str_size) < e->allocated_buf_size) { + va_start(args, fmt); + vsnprintf(e->buf + e->actual_size, str_size, fmt, args); + va_end(args); + } + + /* Need to update the size even when not updating destination buffer to get the exact size + * of all input strings + */ + e->actual_size += str_size; +} + enum hl_device_status hl_device_status(struct hl_device *hdev) { enum hl_device_status status; @@ -322,6 +366,8 @@ static void hpriv_release(struct kref *ref) hdev = hpriv->hdev; + hdev->asic_funcs->send_device_activity(hdev, false); + put_pid(hpriv->taskpid); hl_debugfs_remove_file(hpriv); @@ -673,7 +719,7 @@ static int device_early_init(struct hl_device *hdev) if (hdev->asic_prop.completion_queues_count) { hdev->cq_wq = kcalloc(hdev->asic_prop.completion_queues_count, - sizeof(*hdev->cq_wq), + sizeof(struct workqueue_struct *), GFP_KERNEL); if (!hdev->cq_wq) { rc = -ENOMEM; @@ -1091,7 +1137,9 @@ int hl_device_resume(struct hl_device *hdev) /* 'in_reset' was set to true during suspend, now we must clear it in order * for hard reset to be performed */ + spin_lock(&hdev->reset_info.lock); hdev->reset_info.in_reset = 0; + spin_unlock(&hdev->reset_info.lock); rc = hl_device_reset(hdev, HL_DRV_RESET_HARD); if (rc) { @@ -1518,6 +1566,13 @@ kill_processes: */ hdev->disabled = false; + /* F/W security enabled indication might be updated after hard-reset */ + if (hard_reset) { + rc = hl_fw_read_preboot_status(hdev); + if (rc) + goto out_err; + } + rc = hdev->asic_funcs->hw_init(hdev); if (rc) { dev_err(hdev->dev, "failed to initialize the H/W after reset\n"); @@ -1556,7 +1611,7 @@ kill_processes: if (!hdev->asic_prop.fw_security_enabled) hl_fw_set_max_power(hdev); } else { - rc = hdev->asic_funcs->non_hard_reset_late_init(hdev); + rc = hdev->asic_funcs->compute_reset_late_init(hdev); if (rc) { if (reset_upon_device_release) dev_err(hdev->dev, @@ -1704,7 +1759,9 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) char *name; bool add_cdev_sysfs_on_err = false; - name = kasprintf(GFP_KERNEL, "hl%d", hdev->id / 2); + hdev->cdev_idx = hdev->id / 2; + + name = kasprintf(GFP_KERNEL, "hl%d", hdev->cdev_idx); if (!name) { rc = -ENOMEM; goto out_disabled; @@ -1719,7 +1776,7 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) if (rc) goto out_disabled; - name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->id / 2); + name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->cdev_idx); if (!name) { rc = -ENOMEM; goto free_dev; @@ -1806,7 +1863,7 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) } hdev->shadow_cs_queue = kcalloc(hdev->asic_prop.max_pending_cs, - sizeof(*hdev->shadow_cs_queue), GFP_KERNEL); + sizeof(struct hl_cs *), GFP_KERNEL); if (!hdev->shadow_cs_queue) { rc = -ENOMEM; goto cq_fini; @@ -1997,10 +2054,10 @@ out_disabled: if (hdev->pdev) dev_err(&hdev->pdev->dev, "Failed to initialize hl%d. Device is NOT usable !\n", - hdev->id / 2); + hdev->cdev_idx); else pr_err("Failed to initialize hl%d. Device is NOT usable !\n", - hdev->id / 2); + hdev->cdev_idx); return rc; } diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 608ca67527a5..2de6a9bd564d 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -15,14 +15,6 @@ #define FW_FILE_MAX_SIZE 0x1400000 /* maximum size of 20MB */ -struct fw_binning_conf { - u64 tpc_binning; - u32 dec_binning; - u32 hbm_binning; - u32 edma_binning; - u32 mme_redundancy; -}; - static char *extract_fw_ver_from_str(const char *fw_str) { char *str, *fw_ver, *whitespace; @@ -260,7 +252,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, struct cpucp_packet *pkt; dma_addr_t pkt_dma_addr; struct hl_bd *sent_bd; - u32 tmp, expected_ack_val, pi; + u32 tmp, expected_ack_val, pi, opcode; int rc; pkt = hl_cpu_accessible_dma_pool_alloc(hdev, len, &pkt_dma_addr); @@ -327,8 +319,35 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, rc = (tmp & CPUCP_PKT_CTL_RC_MASK) >> CPUCP_PKT_CTL_RC_SHIFT; if (rc) { - dev_dbg(hdev->dev, "F/W ERROR %d for CPU packet %d\n", - rc, (tmp & CPUCP_PKT_CTL_OPCODE_MASK) >> CPUCP_PKT_CTL_OPCODE_SHIFT); + opcode = (tmp & CPUCP_PKT_CTL_OPCODE_MASK) >> CPUCP_PKT_CTL_OPCODE_SHIFT; + + if (!prop->supports_advanced_cpucp_rc) { + dev_dbg(hdev->dev, "F/W ERROR %d for CPU packet %d\n", rc, opcode); + goto scrub_descriptor; + } + + switch (rc) { + case cpucp_packet_invalid: + dev_err(hdev->dev, + "CPU packet %d is not supported by F/W\n", opcode); + break; + case cpucp_packet_fault: + dev_err(hdev->dev, + "F/W failed processing CPU packet %d\n", opcode); + break; + case cpucp_packet_invalid_pkt: + dev_dbg(hdev->dev, + "CPU packet %d is not supported by F/W\n", opcode); + break; + case cpucp_packet_invalid_params: + dev_err(hdev->dev, + "F/W reports invalid parameters for CPU packet %d\n", opcode); + break; + + default: + dev_err(hdev->dev, + "Unknown F/W ERROR %d for CPU packet %d\n", rc, opcode); + } /* propagate the return code from the f/w to the callers who want to check it */ if (result) @@ -340,6 +359,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, *result = le64_to_cpu(pkt->result); } +scrub_descriptor: /* Scrub previous buffer descriptor 'ctl' field which contains the * previous PI value written during packet submission. * We must do this or else F/W can read an old value upon queue wraparound. @@ -462,6 +482,21 @@ void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, size); } +int hl_fw_send_device_activity(struct hl_device *hdev, bool open) +{ + struct cpucp_packet pkt; + int rc; + + memset(&pkt, 0, sizeof(pkt)); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_ACTIVE_STATUS_SET << CPUCP_PKT_CTL_OPCODE_SHIFT); + pkt.value = cpu_to_le64(open); + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL); + if (rc) + dev_err(hdev->dev, "failed to send device activity msg(%u)\n", open); + + return rc; +} + int hl_fw_send_heartbeat(struct hl_device *hdev) { struct cpucp_packet hb_pkt; @@ -581,6 +616,15 @@ static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val, dev_dbg(hdev->dev, "Device status0 %#x\n", sts_val); /* All warnings should go here in order not to reach the unknown error validation */ + if (err_val & CPU_BOOT_ERR0_EEPROM_FAIL) { + dev_warn(hdev->dev, + "Device boot warning - EEPROM failure detected, default settings applied\n"); + /* This is a warning so we don't want it to disable the + * device + */ + err_val &= ~CPU_BOOT_ERR0_EEPROM_FAIL; + } + if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED) { dev_warn(hdev->dev, "Device boot warning - Skipped DRAM initialization\n"); @@ -1476,6 +1520,8 @@ static void hl_fw_preboot_update_state(struct hl_device *hdev) */ prop->hard_reset_done_by_fw = !!(cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN); + prop->fw_security_enabled = !!(cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_SECURITY_EN); + dev_dbg(hdev->dev, "Firmware preboot boot device status0 %#x\n", cpu_boot_dev_sts0); @@ -1514,7 +1560,7 @@ int hl_fw_read_preboot_status(struct hl_device *hdev) hdev->asic_funcs->init_firmware_preload_params(hdev); /* - * In order to determine boot method (static VS dymanic) we need to + * In order to determine boot method (static VS dynamic) we need to * read the boot caps register */ rc = hl_fw_read_preboot_caps(hdev); @@ -1781,7 +1827,7 @@ int hl_fw_dynamic_send_protocol_cmd(struct hl_device *hdev, * * @return the CRC32 result * - * NOTE: kernel's CRC32 differ's from standard CRC32 calculation. + * NOTE: kernel's CRC32 differs from standard CRC32 calculation. * in order to be aligned we need to flip the bits of both the input * initial CRC and kernel's CRC32 result. * in addition both sides use initial CRC of 0, @@ -1798,7 +1844,7 @@ static u32 hl_fw_compat_crc32(u8 *data, size_t size) * * @hdev: pointer to the habanalabs device structure * @addr: device address of memory transfer - * @size: memory transter size + * @size: memory transfer size * @region: PCI memory region * * @return 0 on success, otherwise non-zero error code @@ -1854,50 +1900,36 @@ static int hl_fw_dynamic_validate_descriptor(struct hl_device *hdev, u64 addr; int rc; - if (le32_to_cpu(fw_desc->header.magic) != HL_COMMS_DESC_MAGIC) { - dev_err(hdev->dev, "Invalid magic for dynamic FW descriptor (%x)\n", + if (le32_to_cpu(fw_desc->header.magic) != HL_COMMS_DESC_MAGIC) + dev_warn(hdev->dev, "Invalid magic for dynamic FW descriptor (%x)\n", fw_desc->header.magic); - return -EIO; - } - if (fw_desc->header.version != HL_COMMS_DESC_VER) { - dev_err(hdev->dev, "Invalid version for dynamic FW descriptor (%x)\n", + if (fw_desc->header.version != HL_COMMS_DESC_VER) + dev_warn(hdev->dev, "Invalid version for dynamic FW descriptor (%x)\n", fw_desc->header.version); - return -EIO; - } /* - * calc CRC32 of data without header. + * Calc CRC32 of data without header. use the size of the descriptor + * reported by firmware, without calculating it ourself, to allow adding + * more fields to the lkd_fw_comms_desc structure. * note that no alignment/stride address issues here as all structures - * are 64 bit padded + * are 64 bit padded. */ - data_size = sizeof(struct lkd_fw_comms_desc) - - sizeof(struct comms_desc_header); data_ptr = (u8 *)fw_desc + sizeof(struct comms_desc_header); - - if (le16_to_cpu(fw_desc->header.size) != data_size) { - dev_err(hdev->dev, - "Invalid descriptor size 0x%x, expected size 0x%zx\n", - le16_to_cpu(fw_desc->header.size), data_size); - return -EIO; - } + data_size = le16_to_cpu(fw_desc->header.size); data_crc32 = hl_fw_compat_crc32(data_ptr, data_size); - if (data_crc32 != le32_to_cpu(fw_desc->header.crc32)) { - dev_err(hdev->dev, - "CRC32 mismatch for dynamic FW descriptor (%x:%x)\n", - data_crc32, fw_desc->header.crc32); + dev_err(hdev->dev, "CRC32 mismatch for dynamic FW descriptor (%x:%x)\n", + data_crc32, fw_desc->header.crc32); return -EIO; } /* find memory region to which to copy the image */ addr = le64_to_cpu(fw_desc->img_addr); region_id = hl_get_pci_memory_region(hdev, addr); - if ((region_id != PCI_REGION_SRAM) && - ((region_id != PCI_REGION_DRAM))) { - dev_err(hdev->dev, - "Invalid region to copy FW image address=%llx\n", addr); + if ((region_id != PCI_REGION_SRAM) && ((region_id != PCI_REGION_DRAM))) { + dev_err(hdev->dev, "Invalid region to copy FW image address=%llx\n", addr); return -EIO; } @@ -1914,8 +1946,7 @@ static int hl_fw_dynamic_validate_descriptor(struct hl_device *hdev, fw_loader->dynamic_loader.fw_image_size, region); if (rc) { - dev_err(hdev->dev, - "invalid mem transfer request for FW image\n"); + dev_err(hdev->dev, "invalid mem transfer request for FW image\n"); return rc; } @@ -2422,18 +2453,6 @@ static int hl_fw_dynamic_send_msg(struct hl_device *hdev, msg.reset_cause = *(__u8 *) data; break; - case HL_COMMS_BINNING_CONF_TYPE: - { - struct fw_binning_conf *binning_conf = (struct fw_binning_conf *) data; - - msg.tpc_binning_conf = cpu_to_le64(binning_conf->tpc_binning); - msg.dec_binning_conf = cpu_to_le32(binning_conf->dec_binning); - msg.hbm_binning_conf = cpu_to_le32(binning_conf->hbm_binning); - msg.edma_binning_conf = cpu_to_le32(binning_conf->edma_binning); - msg.mme_redundancy_conf = cpu_to_le32(binning_conf->mme_redundancy); - break; - } - default: dev_err(hdev->dev, "Send COMMS message - invalid message type %u\n", @@ -2503,13 +2522,6 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev, */ dyn_regs = &fw_loader->dynamic_loader.comm_desc.cpu_dyn_regs; - /* if no preboot loaded indication- wait for preboot */ - if (!(hdev->fw_loader.fw_comp_loaded & FW_TYPE_PREBOOT_CPU)) { - rc = hl_fw_wait_preboot_ready(hdev); - if (rc) - return -EIO; - } - rc = hl_fw_dynamic_send_protocol_cmd(hdev, fw_loader, COMMS_RST_STATE, 0, true, fw_loader->cpu_timeout); @@ -2547,7 +2559,7 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev, /* * when testing FW load (without Linux) on PLDM we don't want to * wait until boot fit is active as it may take several hours. - * instead, we load the bootfit and let it do all initializations in + * instead, we load the bootfit and let it do all initialization in * the background. */ if (hdev->pldm && !(hdev->fw_components & FW_TYPE_LINUX)) @@ -2961,3 +2973,49 @@ void hl_fw_set_max_power(struct hl_device *hdev) if (rc) dev_err(hdev->dev, "Failed to set max power, error %d\n", rc); } + +static int hl_fw_get_sec_attest_data(struct hl_device *hdev, u32 packet_id, void *data, u32 size, + u32 nonce, u32 timeout) +{ + struct cpucp_packet pkt = {}; + dma_addr_t req_dma_addr; + void *req_cpu_addr; + int rc; + + req_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, size, &req_dma_addr); + if (!data) { + dev_err(hdev->dev, + "Failed to allocate DMA memory for CPU-CP packet %u\n", packet_id); + return -ENOMEM; + } + + memset(data, 0, size); + + pkt.ctl = cpu_to_le32(packet_id << CPUCP_PKT_CTL_OPCODE_SHIFT); + pkt.addr = cpu_to_le64(req_dma_addr); + pkt.data_max_size = cpu_to_le32(size); + pkt.nonce = cpu_to_le32(nonce); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + timeout, NULL); + if (rc) { + dev_err(hdev->dev, + "Failed to handle CPU-CP pkt %u, error %d\n", packet_id, rc); + goto out; + } + + memcpy(data, req_cpu_addr, size); + +out: + hl_cpu_accessible_dma_pool_free(hdev, size, req_cpu_addr); + + return rc; +} + +int hl_fw_get_sec_attest_info(struct hl_device *hdev, struct cpucp_sec_attest_info *sec_attest_info, + u32 nonce) +{ + return hl_fw_get_sec_attest_data(hdev, CPUCP_PACKET_SEC_ATTEST_GET, sec_attest_info, + sizeof(struct cpucp_sec_attest_info), nonce, + HL_CPUCP_SEC_ATTEST_INFO_TINEOUT_USEC); +} diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index d59bba9e55c9..58c95b13be69 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -66,6 +66,7 @@ struct hl_fpriv; #define HL_CPUCP_INFO_TIMEOUT_USEC 10000000 /* 10s */ #define HL_CPUCP_EEPROM_TIMEOUT_USEC 10000000 /* 10s */ #define HL_CPUCP_MON_DUMP_TIMEOUT_USEC 10000000 /* 10s */ +#define HL_CPUCP_SEC_ATTEST_INFO_TINEOUT_USEC 10000000 /* 10s */ #define HL_FW_STATUS_POLL_INTERVAL_USEC 10000 /* 10ms */ #define HL_FW_COMMS_STATUS_PLDM_POLL_INTERVAL_USEC 1000000 /* 1s */ @@ -94,7 +95,7 @@ struct hl_fpriv; #define MMU_HASH_TABLE_BITS 7 /* 1 << 7 buckets */ /** - * enum hl_mmu_page_table_locaion - mmu page table location + * enum hl_mmu_page_table_location - mmu page table location * @MMU_DR_PGT: page-table is located on device DRAM. * @MMU_HR_PGT: page-table is located on host memory. * @MMU_NUM_PGT_LOCATIONS: number of page-table locations currently supported. @@ -143,6 +144,25 @@ enum hl_mmu_enablement { #define HL_MAX_DCORES 8 +/* DMA alloc/free wrappers */ +#define hl_asic_dma_alloc_coherent(hdev, size, dma_handle, flags) \ + hl_asic_dma_alloc_coherent_caller(hdev, size, dma_handle, flags, __func__) + +#define hl_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle) \ + hl_cpu_accessible_dma_pool_alloc_caller(hdev, size, dma_handle, __func__) + +#define hl_asic_dma_pool_zalloc(hdev, size, mem_flags, dma_handle) \ + hl_asic_dma_pool_zalloc_caller(hdev, size, mem_flags, dma_handle, __func__) + +#define hl_asic_dma_free_coherent(hdev, size, cpu_addr, dma_handle) \ + hl_asic_dma_free_coherent_caller(hdev, size, cpu_addr, dma_handle, __func__) + +#define hl_cpu_accessible_dma_pool_free(hdev, size, vaddr) \ + hl_cpu_accessible_dma_pool_free_caller(hdev, size, vaddr, __func__) + +#define hl_asic_dma_pool_free(hdev, vaddr, dma_addr) \ + hl_asic_dma_pool_free_caller(hdev, vaddr, dma_addr, __func__) + /* * Reset Flags * @@ -208,6 +228,7 @@ enum hl_protection_levels { * struct iterate_module_ctx - HW module iterator * @fn: function to apply to each HW module instance * @data: optional internal data to the function iterator + * @rc: return code for optional use of iterator/iterator-caller */ struct iterate_module_ctx { /* @@ -217,10 +238,12 @@ struct iterate_module_ctx { * @inst: HW module instance within the block * @offset: current HW module instance offset from the 1-st HW module instance * in the 1-st block - * @data: function specific data + * @ctx: the iterator context. */ - void (*fn)(struct hl_device *hdev, int block, int inst, u32 offset, void *data); + void (*fn)(struct hl_device *hdev, int block, int inst, u32 offset, + struct iterate_module_ctx *ctx); void *data; + int rc; }; struct hl_block_glbl_sec { @@ -342,7 +365,8 @@ enum hl_cs_type { CS_TYPE_WAIT, CS_TYPE_COLLECTIVE_WAIT, CS_RESERVE_SIGNALS, - CS_UNRESERVE_SIGNALS + CS_UNRESERVE_SIGNALS, + CS_TYPE_ENGINE_CORE }; /* @@ -544,10 +568,6 @@ struct hl_hints_range { * @tpc_binning_mask: which TPCs are binned. 0 means usable and 1 means binned. * @dram_enabled_mask: which DRAMs are enabled. * @dram_binning_mask: which DRAMs are binned. 0 means usable, 1 means binned. - * @cb_va_start_addr: virtual start address of command buffers which are mapped - * to the device's MMU. - * @cb_va_end_addr: virtual end address of command buffers which are mapped to - * the device's MMU. * @dram_hints_align_mask: dram va hint addresses alignment mask which is used * for hints validity check. * @cfg_base_address: config space base address. @@ -614,6 +634,7 @@ struct hl_hints_range { * which the property supports_user_set_page_size is true * (i.e. the DRAM supports multiple page sizes), otherwise * it will shall be equal to dram_page_size. + * @num_engine_cores: number of engine cpu cores * @collective_first_sob: first sync object available for collective use * @collective_first_mon: first monitor available for collective use * @sync_stream_first_sob: first sync object available for sync stream use @@ -658,6 +679,7 @@ struct hl_hints_range { * @set_max_power_on_device_init: true if need to set max power in F/W on device init. * @supports_user_set_page_size: true if user can set the allocation page size. * @dma_mask: the dma mask to be set for this device + * @supports_advanced_cpucp_rc: true if new cpucp opcodes are supported. */ struct asic_fixed_properties { struct hw_queue_properties *hw_queues_props; @@ -689,8 +711,6 @@ struct asic_fixed_properties { u64 tpc_binning_mask; u64 dram_enabled_mask; u64 dram_binning_mask; - u64 cb_va_start_addr; - u64 cb_va_end_addr; u64 dram_hints_align_mask; u64 cfg_base_address; u64 mmu_cache_mng_addr; @@ -734,6 +754,7 @@ struct asic_fixed_properties { u32 faulty_dram_cluster_map; u32 xbar_edge_enabled_mask; u32 device_mem_alloc_default_page_size; + u32 num_engine_cores; u16 collective_first_sob; u16 collective_first_mon; u16 sync_stream_first_sob; @@ -766,6 +787,7 @@ struct asic_fixed_properties { u8 set_max_power_on_device_init; u8 supports_user_set_page_size; u8 dma_mask; + u8 supports_advanced_cpucp_rc; }; /** @@ -797,7 +819,7 @@ struct hl_fence { * @lock: spinlock to protect fence. * @hdev: habanalabs device structure. * @hw_sob: the H/W SOB used in this signal/wait CS. - * @encaps_sig_hdl: encaps signals hanlder. + * @encaps_sig_hdl: encaps signals handler. * @cs_seq: command submission sequence number. * @type: type of the CS - signal/wait. * @sob_val: the SOB value that is used in this signal/wait CS. @@ -898,14 +920,14 @@ struct hl_mmap_mem_buf { * @buf: back pointer to the parent mappable memory buffer * @debugfs_list: node in debugfs list of command buffers. * @pool_list: node in pool list of command buffers. - * @va_block_list: list of virtual addresses blocks of the CB if it is mapped to - * the device's MMU. * @kernel_address: Holds the CB's kernel virtual address. + * @virtual_addr: Holds the CB's virtual address. * @bus_address: Holds the CB's DMA address. * @size: holds the CB's size. + * @roundup_size: holds the cb size after roundup to page size. * @cs_cnt: holds number of CS that this CB participates in. * @is_pool: true if CB was acquired from the pool, false otherwise. - * @is_internal: internaly allocated + * @is_internal: internally allocated * @is_mmu_mapped: true if the CB is mapped to the device's MMU. */ struct hl_cb { @@ -914,10 +936,11 @@ struct hl_cb { struct hl_mmap_mem_buf *buf; struct list_head debugfs_list; struct list_head pool_list; - struct list_head va_block_list; void *kernel_address; + u64 virtual_addr; dma_addr_t bus_address; u32 size; + u32 roundup_size; atomic_t cs_cnt; u8 is_pool; u8 is_internal; @@ -1113,7 +1136,7 @@ struct timestamp_reg_info { * @fence: hl fence object for interrupt completion * @cq_target_value: CQ target value * @cq_kernel_addr: CQ kernel address, to be used in the cq interrupt - * handler for taget value comparison + * handler for target value comparison */ struct hl_user_pending_interrupt { struct timestamp_reg_info ts_reg_info; @@ -1372,6 +1395,18 @@ struct fw_load_mgr { struct hl_cs; /** + * struct engines_data - asic engines data + * @buf: buffer for engines data in ascii + * @actual_size: actual size of data that was written by the driver to the allocated buffer + * @allocated_buf_size: total size of allocated buffer + */ +struct engines_data { + char *buf; + int actual_size; + u32 allocated_buf_size; +}; + +/** * struct hl_asic_funcs - ASIC specific functions that are can be called from * common code. * @early_init: sets up early driver state (pre sw_init), doesn't configure H/W. @@ -1434,11 +1469,9 @@ struct hl_cs; * @send_heartbeat: send is-alive packet to CPU-CP and verify response. * @debug_coresight: perform certain actions on Coresight for debugging. * @is_device_idle: return true if device is idle, false otherwise. - * @non_hard_reset_late_init: perform certain actions needed after a reset which is not hard-reset + * @compute_reset_late_init: perform certain actions needed after a compute reset * @hw_queues_lock: acquire H/W queues lock. * @hw_queues_unlock: release H/W queues lock. - * @kdma_lock: acquire H/W queues lock. Relevant from GRECO ASIC - * @kdma_unlock: release H/W queues lock. Relevant from GRECO ASIC * @get_pci_id: retrieve PCI ID. * @get_eeprom_data: retrieve EEPROM data from F/W. * @get_monitor_dump: retrieve monitor registers dump from F/W. @@ -1498,6 +1531,8 @@ struct hl_cs; * @check_if_razwi_happened: check if there was a razwi due to RR violation. * @access_dev_mem: access device memory * @set_dram_bar_base: set the base of the DRAM BAR + * @set_engine_cores: set a config command to enigne cores + * @send_device_activity: indication to FW about device availability */ struct hl_asic_funcs { int (*early_init)(struct hl_device *hdev); @@ -1570,13 +1605,11 @@ struct hl_asic_funcs { int (*mmu_prefetch_cache_range)(struct hl_ctx *ctx, u32 flags, u32 asid, u64 va, u64 size); int (*send_heartbeat)(struct hl_device *hdev); int (*debug_coresight)(struct hl_device *hdev, struct hl_ctx *ctx, void *data); - bool (*is_device_idle)(struct hl_device *hdev, u64 *mask_arr, - u8 mask_len, struct seq_file *s); - int (*non_hard_reset_late_init)(struct hl_device *hdev); + bool (*is_device_idle)(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, + struct engines_data *e); + int (*compute_reset_late_init)(struct hl_device *hdev); void (*hw_queues_lock)(struct hl_device *hdev); void (*hw_queues_unlock)(struct hl_device *hdev); - void (*kdma_lock)(struct hl_device *hdev, int dcore_id); - void (*kdma_unlock)(struct hl_device *hdev, int dcore_id); u32 (*get_pci_id)(struct hl_device *hdev); int (*get_eeprom_data)(struct hl_device *hdev, void *data, size_t max_size); int (*get_monitor_dump)(struct hl_device *hdev, void *data); @@ -1634,6 +1667,9 @@ struct hl_asic_funcs { int (*access_dev_mem)(struct hl_device *hdev, enum pci_region region_type, u64 addr, u64 *val, enum debugfs_access_type acc_type); u64 (*set_dram_bar_base)(struct hl_device *hdev, u64 addr); + int (*set_engine_cores)(struct hl_device *hdev, u32 *core_ids, + u32 num_cores, u32 core_command); + int (*send_device_activity)(struct hl_device *hdev, bool open); }; @@ -1727,10 +1763,10 @@ struct hl_cs_outcome { /** * struct hl_cs_outcome_store - represents a limited store of completed CS outcomes - * @outcome_map: index of completed CS searcheable by sequence number + * @outcome_map: index of completed CS searchable by sequence number * @used_list: list of outcome objects currently in use * @free_list: list of outcome objects currently not in use - * @nodes_pool: a static pool of preallocated outcome objects + * @nodes_pool: a static pool of pre-allocated outcome objects * @db_lock: any operation on the store must take this lock */ struct hl_cs_outcome_store { @@ -1754,12 +1790,10 @@ struct hl_cs_outcome_store { * @refcount: reference counter for the context. Context is released only when * this hits 0l. It is incremented on CS and CS_WAIT. * @cs_pending: array of hl fence objects representing pending CS. - * @outcome_store: storage data structure used to remember ouitcomes of completed + * @outcome_store: storage data structure used to remember outcomes of completed * command submissions for a long time after CS id wraparound. * @va_range: holds available virtual addresses for host and dram mappings. * @mem_hash_lock: protects the mem_hash. - * @mmu_lock: protects the MMU page tables. Any change to the PGT, modifying the - * MMU hash or walking the PGT requires talking this lock. * @hw_block_list_lock: protects the HW block memory list. * @debugfs_list: node in debugfs list of contexts. * @hw_block_mem_list: list of HW block virtual mapped addresses. @@ -1767,6 +1801,7 @@ struct hl_cs_outcome_store { * @cb_va_pool: device VA pool for command buffers which are mapped to the * device's MMU. * @sig_mgr: encaps signals handle manager. + * @cb_va_pool_base: the base address for the device VA pool * @cs_sequence: sequence number for CS. Value is assigned to a CS and passed * to user so user could inquire about CS. It is used as * index to cs_pending array. @@ -1795,13 +1830,13 @@ struct hl_ctx { struct hl_cs_outcome_store outcome_store; struct hl_va_range *va_range[HL_VA_RANGE_TYPE_MAX]; struct mutex mem_hash_lock; - struct mutex mmu_lock; struct mutex hw_block_list_lock; struct list_head debugfs_list; struct list_head hw_block_mem_list; struct hl_cs_counters_atomic cs_counters; struct gen_pool *cb_va_pool; struct hl_encaps_signals_mgr sig_mgr; + u64 cb_va_pool_base; u64 cs_sequence; u64 *dram_default_hops; spinlock_t cs_lock; @@ -1823,7 +1858,6 @@ struct hl_ctx_mgr { }; - /* * COMMAND SUBMISSIONS */ @@ -1889,7 +1923,7 @@ struct hl_userptr { * @tdr_active: true if TDR was activated for this CS (to prevent * double TDR activation). * @aborted: true if CS was aborted due to some device error. - * @timestamp: true if a timestmap must be captured upon completion. + * @timestamp: true if a timestamp must be captured upon completion. * @staged_last: true if this is the last staged CS and needs completion. * @staged_first: true if this is the first staged CS and we need to receive * timeout for this CS. @@ -2047,14 +2081,16 @@ struct hl_vm_hash_node { * @node: node to hang on the list in context object. * @ctx: the context this node belongs to. * @vaddr: virtual address of the HW block. - * @size: size of the block. + * @block_size: size of the block. + * @mapped_size: size of the block which is mapped. May change if partial un-mappings are done. * @id: HW block id (handle). */ struct hl_vm_hw_block_list_node { struct list_head node; struct hl_ctx *ctx; unsigned long vaddr; - u32 size; + u32 block_size; + u32 mapped_size; u32 id; }; @@ -2214,7 +2250,7 @@ struct hl_info_list { /** * struct hl_debugfs_entry - debugfs dentry wrapper. - * @info_ent: dentry realted ops. + * @info_ent: dentry related ops. * @dev_entry: ASIC specific debugfs manager. */ struct hl_debugfs_entry { @@ -2492,7 +2528,7 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val); break; \ (val) = __elbi_read; \ } else {\ - (val) = RREG32((u32)addr); \ + (val) = RREG32((u32)(addr)); \ } \ if (cond) \ break; \ @@ -2503,7 +2539,7 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val); break; \ (val) = __elbi_read; \ } else {\ - (val) = RREG32((u32)addr); \ + (val) = RREG32((u32)(addr)); \ } \ break; \ } \ @@ -2919,7 +2955,7 @@ struct razwi_info { * struct undefined_opcode_info - info about last undefined opcode error * @timestamp: timestamp of the undefined opcode error * @cb_addr_streams: CB addresses (per stream) that are currently exists in the PQ - * entiers. In case all streams array entries are + * entries. In case all streams array entries are * filled with values, it means the execution was in Lower-CP. * @cq_addr: the address of the current handled command buffer * @cq_size: the size of the current handled command buffer @@ -2946,12 +2982,12 @@ struct undefined_opcode_info { }; /** - * struct last_error_session_info - info about last session errors occurred. - * @cs_timeout: CS timeout error last information. - * @razwi: razwi last information. + * struct hl_error_info - holds information collected during an error. + * @cs_timeout: CS timeout error information. + * @razwi: razwi information. * @undef_opcode: undefined opcode information */ -struct last_error_session_info { +struct hl_error_info { struct cs_timeout_info cs_timeout; struct razwi_info razwi; struct undefined_opcode_info undef_opcode; @@ -2960,7 +2996,7 @@ struct last_error_session_info { /** * struct hl_reset_info - holds current device reset information. * @lock: lock to protect critical reset flows. - * @compute_reset_cnt: number of compte resets since the driver was loaded. + * @compute_reset_cnt: number of compute resets since the driver was loaded. * @hard_reset_cnt: number of hard resets since the driver was loaded. * @hard_reset_schedule_flags: hard reset is scheduled to after current compute reset, * here we hold the hard reset flags. @@ -2971,7 +3007,7 @@ struct last_error_session_info { * @hard_reset_pending: is there a hard reset work pending. * @curr_reset_cause: saves an enumerated reset cause when a hard reset is * triggered, and cleared after it is shared with preboot. - * @prev_reset_trigger: saves the previous trigger which caused a reset, overidden + * @prev_reset_trigger: saves the previous trigger which caused a reset, overridden * with a new value on next reset * @reset_trigger_repeated: set if device reset is triggered more than once with * same cause. @@ -3041,6 +3077,12 @@ struct hl_reset_info { * @asid_mutex: protects asid_bitmap. * @send_cpu_message_lock: enforces only one message in Host <-> CPU-CP queue. * @debug_lock: protects critical section of setting debug mode for device + * @mmu_lock: protects the MMU page tables and invalidation h/w. Although the + * page tables are per context, the invalidation h/w is per MMU. + * Therefore, we can't allow multiple contexts (we only have two, + * user and kernel) to access the invalidation h/w at the same time. + * In addition, any change to the PGT, modifying the MMU hash or + * walking the PGT requires talking this lock. * @asic_prop: ASIC specific immutable properties. * @asic_funcs: ASIC specific functions. * @asic_specific: ASIC specific information to use only from ASIC files. @@ -3049,7 +3091,7 @@ struct hl_reset_info { * @hl_chip_info: ASIC's sensors information. * @device_status_description: device status description. * @hl_debugfs: device's debugfs manager. - * @cb_pool: list of preallocated CBs. + * @cb_pool: list of pre allocated CBs. * @cb_pool_lock: protects the CB pool. * @internal_cb_pool_virt_addr: internal command buffer pool virtual address. * @internal_cb_pool_dma_addr: internal command buffer pool dma address. @@ -3070,7 +3112,7 @@ struct hl_reset_info { * @state_dump_specs: constants and dictionaries needed to dump system state. * @multi_cs_completion: array of multi-CS completion. * @clk_throttling: holds information about current/previous clock throttling events - * @last_error: holds information about last session in which CS timeout or razwi error occurred. + * @captured_err_info: holds information about errors. * @reset_info: holds current device reset information. * @stream_master_qid_arr: pointer to array with QIDs of master streams. * @fw_major_version: major version of current loaded preboot. @@ -3111,7 +3153,8 @@ struct hl_reset_info { * @edma_binning: contains mask of edma engines that is received from the f/w which * indicates which edma engines are binned-out * @id: device minor. - * @id_control: minor of the control device + * @id_control: minor of the control device. + * @cdev_idx: char device index. Used for setting its name. * @cpu_pci_msb_addr: 50-bit extension bits for the device CPU's 40-bit * addresses. * @is_in_dram_scrub: true if dram scrub operation is on going. @@ -3165,6 +3208,7 @@ struct hl_reset_info { * Used only for testing. * @heartbeat: Controls if we want to enable the heartbeat mechanism vs. the f/w, which verifies * that the f/w is always alive. Used only for testing. + * @supports_ctx_switch: true if a ctx switch is required upon first submission. */ struct hl_device { struct pci_dev *pdev; @@ -3204,6 +3248,7 @@ struct hl_device { struct mutex asid_mutex; struct mutex send_cpu_message_lock; struct mutex debug_lock; + struct mutex mmu_lock; struct asic_fixed_properties asic_prop; const struct hl_asic_funcs *asic_funcs; void *asic_specific; @@ -3242,7 +3287,7 @@ struct hl_device { struct multi_cs_completion multi_cs_completion[ MULTI_CS_MAX_USER_CTX]; struct hl_clk_throttle clk_throttling; - struct last_error_session_info last_error; + struct hl_error_info captured_err_info; struct hl_reset_info reset_info; @@ -3271,6 +3316,7 @@ struct hl_device { u32 edma_binning; u16 id; u16 id_control; + u16 cdev_idx; u16 cpu_pci_msb_addr; u8 is_in_dram_scrub; u8 disabled; @@ -3300,6 +3346,7 @@ struct hl_device { u8 compute_ctx_in_release; u8 supports_mmu_prefetch; u8 reset_upon_device_release; + u8 supports_ctx_switch; /* Parameters for bring-up */ u64 nic_ports_mask; @@ -3426,15 +3473,18 @@ static inline bool hl_mem_area_crosses_range(u64 address, u32 size, } uint64_t hl_set_dram_bar_default(struct hl_device *hdev, u64 addr); -void *hl_asic_dma_alloc_coherent(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle, - gfp_t flag); -void hl_asic_dma_free_coherent(struct hl_device *hdev, size_t size, void *cpu_addr, - dma_addr_t dma_handle); -void *hl_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle); -void hl_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr); -void *hl_asic_dma_pool_zalloc(struct hl_device *hdev, size_t size, gfp_t mem_flags, - dma_addr_t *dma_handle); -void hl_asic_dma_pool_free(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr); +void *hl_asic_dma_alloc_coherent_caller(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle, + gfp_t flag, const char *caller); +void hl_asic_dma_free_coherent_caller(struct hl_device *hdev, size_t size, void *cpu_addr, + dma_addr_t dma_handle, const char *caller); +void *hl_cpu_accessible_dma_pool_alloc_caller(struct hl_device *hdev, size_t size, + dma_addr_t *dma_handle, const char *caller); +void hl_cpu_accessible_dma_pool_free_caller(struct hl_device *hdev, size_t size, void *vaddr, + const char *caller); +void *hl_asic_dma_pool_zalloc_caller(struct hl_device *hdev, size_t size, gfp_t mem_flags, + dma_addr_t *dma_handle, const char *caller); +void hl_asic_dma_pool_free_caller(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr, + const char *caller); int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir); void hl_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir); @@ -3513,6 +3563,7 @@ void hl_sysfs_fini(struct hl_device *hdev); int hl_hwmon_init(struct hl_device *hdev); void hl_hwmon_fini(struct hl_device *hdev); +void hl_hwmon_release_resources(struct hl_device *hdev); int hl_cb_create(struct hl_device *hdev, struct hl_mem_mgr *mmg, struct hl_ctx *ctx, u32 cb_size, bool internal_cb, @@ -3557,7 +3608,7 @@ void hl_hw_block_mem_init(struct hl_ctx *ctx); void hl_hw_block_mem_fini(struct hl_ctx *ctx); u64 hl_reserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx, - enum hl_va_range_type type, u32 size, u32 alignment); + enum hl_va_range_type type, u64 size, u32 alignment); int hl_unreserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx, u64 start_addr, u64 size); int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size, @@ -3674,6 +3725,7 @@ int hl_fw_dram_replaced_row_get(struct hl_device *hdev, struct cpucp_hbm_row_info *info); int hl_fw_dram_pending_row_get(struct hl_device *hdev, u32 *pend_rows_num); int hl_fw_cpucp_engine_core_asid_set(struct hl_device *hdev, u32 asid); +int hl_fw_send_device_activity(struct hl_device *hdev, bool open); int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3], bool is_wc[3]); int hl_pci_elbi_read(struct hl_device *hdev, u64 addr, u32 *data); @@ -3697,6 +3749,8 @@ int hl_get_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, long *va void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, long value); long hl_fw_get_max_power(struct hl_device *hdev); void hl_fw_set_max_power(struct hl_device *hdev); +int hl_fw_get_sec_attest_info(struct hl_device *hdev, struct cpucp_sec_attest_info *sec_attest_info, + u32 nonce); int hl_set_voltage(struct hl_device *hdev, int sensor_index, u32 attr, long value); int hl_set_current(struct hl_device *hdev, int sensor_index, u32 attr, long value); int hl_set_power(struct hl_device *hdev, int sensor_index, u32 attr, long value); @@ -3743,6 +3797,7 @@ struct hl_mmap_mem_buf * hl_mmap_mem_buf_alloc(struct hl_mem_mgr *mmg, struct hl_mmap_mem_buf_behavior *behavior, gfp_t gfp, void *args); +__printf(2, 3) void hl_engine_data_sprintf(struct engines_data *e, const char *fmt, ...); #ifdef CONFIG_DEBUG_FS diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c index f733ead605e7..112632afe7d5 100644 --- a/drivers/misc/habanalabs/common/habanalabs_drv.c +++ b/drivers/misc/habanalabs/common/habanalabs_drv.c @@ -14,6 +14,9 @@ #include <linux/aer.h> #include <linux/module.h> +#define CREATE_TRACE_POINTS +#include <trace/events/habanalabs.h> + #define HL_DRIVER_AUTHOR "HabanaLabs Kernel Driver Team" #define HL_DRIVER_DESC "Driver for HabanaLabs's AI Accelerators" @@ -27,7 +30,10 @@ static struct class *hl_class; static DEFINE_IDR(hl_devs_idr); static DEFINE_MUTEX(hl_devs_idr_lock); -static int timeout_locked = 30; +#define HL_DEFAULT_TIMEOUT_LOCKED 30 /* 30 seconds */ +#define GAUDI_DEFAULT_TIMEOUT_LOCKED 600 /* 10 minutes */ + +static int timeout_locked = HL_DEFAULT_TIMEOUT_LOCKED; static int reset_on_lockup = 1; static int memory_scrub; static ulong boot_error_status_mask = ULONG_MAX; @@ -55,14 +61,12 @@ MODULE_PARM_DESC(boot_error_status_mask, #define PCI_IDS_GAUDI_SEC 0x1010 #define PCI_IDS_GAUDI2 0x1020 -#define PCI_IDS_GAUDI2_SEC 0x1030 static const struct pci_device_id ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GOYA), }, { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI), }, { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI_SEC), }, { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI2), }, - { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI2_SEC), }, { 0, } }; MODULE_DEVICE_TABLE(pci, ids); @@ -92,9 +96,6 @@ static enum hl_asic_type get_asic_type(u16 device) case PCI_IDS_GAUDI2: asic_type = ASIC_GAUDI2; break; - case PCI_IDS_GAUDI2_SEC: - asic_type = ASIC_GAUDI2_SEC; - break; default: asic_type = ASIC_INVALID; break; @@ -107,7 +108,6 @@ static bool is_asic_secured(enum hl_asic_type asic_type) { switch (asic_type) { case ASIC_GAUDI_SEC: - case ASIC_GAUDI2_SEC: return true; default: return false; @@ -161,7 +161,7 @@ int hl_device_open(struct inode *inode, struct file *filp) mutex_lock(&hdev->fpriv_list_lock); if (!hl_device_operational(hdev, &status)) { - dev_err_ratelimited(hdev->dev, + dev_dbg_ratelimited(hdev->dev, "Can't open %s because it is %s\n", dev_name(hdev->dev), hdev->status[status]); @@ -207,11 +207,13 @@ int hl_device_open(struct inode *inode, struct file *filp) list_add(&hpriv->dev_node, &hdev->fpriv_list); mutex_unlock(&hdev->fpriv_list_lock); + hdev->asic_funcs->send_device_activity(hdev, true); + hl_debugfs_add_file(hpriv); - atomic_set(&hdev->last_error.cs_timeout.write_enable, 1); - atomic_set(&hdev->last_error.razwi.write_enable, 1); - hdev->last_error.undef_opcode.write_enable = true; + atomic_set(&hdev->captured_err_info.cs_timeout.write_enable, 1); + atomic_set(&hdev->captured_err_info.razwi.write_enable, 1); + hdev->captured_err_info.undef_opcode.write_enable = true; hdev->open_counter++; hdev->last_successful_open_jif = jiffies; @@ -269,7 +271,7 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp) mutex_lock(&hdev->fpriv_ctrl_list_lock); if (!hl_device_operational(hdev, NULL)) { - dev_err_ratelimited(hdev->dev_ctrl, + dev_dbg_ratelimited(hdev->dev_ctrl, "Can't open %s because it is disabled or in reset\n", dev_name(hdev->dev_ctrl)); rc = -EPERM; @@ -314,12 +316,22 @@ static void copy_kernel_module_params_to_device(struct hl_device *hdev) hdev->boot_error_status_mask = boot_error_status_mask; } -static void fixup_device_params_per_asic(struct hl_device *hdev) +static void fixup_device_params_per_asic(struct hl_device *hdev, int timeout) { switch (hdev->asic_type) { - case ASIC_GOYA: case ASIC_GAUDI: case ASIC_GAUDI_SEC: + /* If user didn't request a different timeout than the default one, we have + * a different default timeout for Gaudi + */ + if (timeout == HL_DEFAULT_TIMEOUT_LOCKED) + hdev->timeout_jiffies = msecs_to_jiffies(GAUDI_DEFAULT_TIMEOUT_LOCKED * + MSEC_PER_SEC); + + hdev->reset_upon_device_release = 0; + break; + + case ASIC_GOYA: hdev->reset_upon_device_release = 0; break; @@ -339,7 +351,7 @@ static int fixup_device_params(struct hl_device *hdev) hdev->fw_comms_poll_interval_usec = HL_FW_STATUS_POLL_INTERVAL_USEC; if (tmp_timeout) - hdev->timeout_jiffies = msecs_to_jiffies(tmp_timeout * 1000); + hdev->timeout_jiffies = msecs_to_jiffies(tmp_timeout * MSEC_PER_SEC); else hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT; @@ -360,7 +372,7 @@ static int fixup_device_params(struct hl_device *hdev) if (!hdev->cpu_queues_enable) hdev->heartbeat = 0; - fixup_device_params_per_asic(hdev); + fixup_device_params_per_asic(hdev, tmp_timeout); return 0; } diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index 6a30bd98ab5e..43afe40966e5 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -14,6 +14,7 @@ #include <linux/fs.h> #include <linux/uaccess.h> #include <linux/slab.h> +#include <linux/vmalloc.h> static u32 hl_debug_struct_size[HL_DEBUG_OP_TIMESTAMP + 1] = { [HL_DEBUG_OP_ETR] = sizeof(struct hl_debug_params_etr), @@ -103,6 +104,7 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args) hw_ip.edma_enabled_mask = prop->edma_enabled_mask; hw_ip.server_type = prop->server_type; + hw_ip.security_enabled = prop->fw_security_enabled; return copy_to_user(out, &hw_ip, min((size_t) size, sizeof(hw_ip))) ? -EFAULT : 0; @@ -591,8 +593,8 @@ static int cs_timeout_info(struct hl_fpriv *hpriv, struct hl_info_args *args) if ((!max_size) || (!out)) return -EINVAL; - info.seq = hdev->last_error.cs_timeout.seq; - info.timestamp = ktime_to_ns(hdev->last_error.cs_timeout.timestamp); + info.seq = hdev->captured_err_info.cs_timeout.seq; + info.timestamp = ktime_to_ns(hdev->captured_err_info.cs_timeout.timestamp); return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0; } @@ -607,12 +609,12 @@ static int razwi_info(struct hl_fpriv *hpriv, struct hl_info_args *args) if ((!max_size) || (!out)) return -EINVAL; - info.timestamp = ktime_to_ns(hdev->last_error.razwi.timestamp); - info.addr = hdev->last_error.razwi.addr; - info.engine_id_1 = hdev->last_error.razwi.engine_id_1; - info.engine_id_2 = hdev->last_error.razwi.engine_id_2; - info.no_engine_id = hdev->last_error.razwi.non_engine_initiator; - info.error_type = hdev->last_error.razwi.type; + info.timestamp = ktime_to_ns(hdev->captured_err_info.razwi.timestamp); + info.addr = hdev->captured_err_info.razwi.addr; + info.engine_id_1 = hdev->captured_err_info.razwi.engine_id_1; + info.engine_id_2 = hdev->captured_err_info.razwi.engine_id_2; + info.no_engine_id = hdev->captured_err_info.razwi.non_engine_initiator; + info.error_type = hdev->captured_err_info.razwi.type; return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0; } @@ -627,13 +629,13 @@ static int undefined_opcode_info(struct hl_fpriv *hpriv, struct hl_info_args *ar if ((!max_size) || (!out)) return -EINVAL; - info.timestamp = ktime_to_ns(hdev->last_error.undef_opcode.timestamp); - info.engine_id = hdev->last_error.undef_opcode.engine_id; - info.cq_addr = hdev->last_error.undef_opcode.cq_addr; - info.cq_size = hdev->last_error.undef_opcode.cq_size; - info.stream_id = hdev->last_error.undef_opcode.stream_id; - info.cb_addr_streams_len = hdev->last_error.undef_opcode.cb_addr_streams_len; - memcpy(info.cb_addr_streams, hdev->last_error.undef_opcode.cb_addr_streams, + info.timestamp = ktime_to_ns(hdev->captured_err_info.undef_opcode.timestamp); + info.engine_id = hdev->captured_err_info.undef_opcode.engine_id; + info.cq_addr = hdev->captured_err_info.undef_opcode.cq_addr; + info.cq_size = hdev->captured_err_info.undef_opcode.cq_size; + info.stream_id = hdev->captured_err_info.undef_opcode.stream_id; + info.cb_addr_streams_len = hdev->captured_err_info.undef_opcode.cb_addr_streams_len; + memcpy(info.cb_addr_streams, hdev->captured_err_info.undef_opcode.cb_addr_streams, sizeof(info.cb_addr_streams)); return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0; @@ -660,6 +662,55 @@ static int dev_mem_alloc_page_sizes_info(struct hl_fpriv *hpriv, struct hl_info_ return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0; } +static int sec_attest_info(struct hl_fpriv *hpriv, struct hl_info_args *args) +{ + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + struct cpucp_sec_attest_info *sec_attest_info; + struct hl_info_sec_attest *info; + u32 max_size = args->return_size; + int rc; + + if ((!max_size) || (!out)) + return -EINVAL; + + sec_attest_info = kmalloc(sizeof(*sec_attest_info), GFP_KERNEL); + if (!sec_attest_info) + return -ENOMEM; + + info = kmalloc(sizeof(*info), GFP_KERNEL); + if (!info) { + rc = -ENOMEM; + goto free_sec_attest_info; + } + + rc = hl_fw_get_sec_attest_info(hpriv->hdev, sec_attest_info, args->sec_attest_nonce); + if (rc) + goto free_info; + + info->nonce = le32_to_cpu(sec_attest_info->nonce); + info->pcr_quote_len = le16_to_cpu(sec_attest_info->pcr_quote_len); + info->pub_data_len = le16_to_cpu(sec_attest_info->pub_data_len); + info->certificate_len = le16_to_cpu(sec_attest_info->certificate_len); + info->pcr_num_reg = sec_attest_info->pcr_num_reg; + info->pcr_reg_len = sec_attest_info->pcr_reg_len; + info->quote_sig_len = sec_attest_info->quote_sig_len; + memcpy(&info->pcr_data, &sec_attest_info->pcr_data, sizeof(info->pcr_data)); + memcpy(&info->pcr_quote, &sec_attest_info->pcr_quote, sizeof(info->pcr_quote)); + memcpy(&info->public_data, &sec_attest_info->public_data, sizeof(info->public_data)); + memcpy(&info->certificate, &sec_attest_info->certificate, sizeof(info->certificate)); + memcpy(&info->quote_sig, &sec_attest_info->quote_sig, sizeof(info->quote_sig)); + + rc = copy_to_user(out, info, + min_t(size_t, max_size, sizeof(*info))) ? -EFAULT : 0; + +free_info: + kfree(info); +free_sec_attest_info: + kfree(sec_attest_info); + + return rc; +} + static int eventfd_register(struct hl_fpriv *hpriv, struct hl_info_args *args) { int rc; @@ -697,6 +748,42 @@ static int eventfd_unregister(struct hl_fpriv *hpriv, struct hl_info_args *args) return 0; } +static int engine_status_info(struct hl_fpriv *hpriv, struct hl_info_args *args) +{ + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + u32 status_buf_size = args->return_size; + struct hl_device *hdev = hpriv->hdev; + struct engines_data eng_data; + int rc; + + if ((status_buf_size < SZ_1K) || (status_buf_size > HL_ENGINES_DATA_MAX_SIZE) || (!out)) + return -EINVAL; + + eng_data.actual_size = 0; + eng_data.allocated_buf_size = status_buf_size; + eng_data.buf = vmalloc(status_buf_size); + if (!eng_data.buf) + return -ENOMEM; + + hdev->asic_funcs->is_device_idle(hdev, NULL, 0, &eng_data); + + if (eng_data.actual_size > eng_data.allocated_buf_size) { + dev_err(hdev->dev, + "Engines data size (%d Bytes) is bigger than allocated size (%u Bytes)\n", + eng_data.actual_size, status_buf_size); + vfree(eng_data.buf); + return -ENOMEM; + } + + args->user_buffer_actual_size = eng_data.actual_size; + rc = copy_to_user(out, eng_data.buf, min_t(size_t, status_buf_size, eng_data.actual_size)) ? + -EFAULT : 0; + + vfree(eng_data.buf); + + return rc; +} + static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, struct device *dev) { @@ -806,12 +893,18 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, case HL_INFO_DRAM_PENDING_ROWS: return dram_pending_rows_info(hpriv, args); + case HL_INFO_SECURED_ATTESTATION: + return sec_attest_info(hpriv, args); + case HL_INFO_REGISTER_EVENTFD: return eventfd_register(hpriv, args); case HL_INFO_UNREGISTER_EVENTFD: return eventfd_unregister(hpriv, args); + case HL_INFO_ENGINE_STATUS: + return engine_status_info(hpriv, args); + default: dev_err(dev, "Invalid request %d\n", args->op); rc = -EINVAL; diff --git a/drivers/misc/habanalabs/common/hw_queue.c b/drivers/misc/habanalabs/common/hw_queue.c index 3f15ab9d827f..d0087c0ec48c 100644 --- a/drivers/misc/habanalabs/common/hw_queue.c +++ b/drivers/misc/habanalabs/common/hw_queue.c @@ -826,9 +826,7 @@ static int ext_and_cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q, q->kernel_address = p; - q->shadow_queue = kmalloc_array(HL_QUEUE_LENGTH, - sizeof(*q->shadow_queue), - GFP_KERNEL); + q->shadow_queue = kmalloc_array(HL_QUEUE_LENGTH, sizeof(struct hl_cs_job *), GFP_KERNEL); if (!q->shadow_queue) { dev_err(hdev->dev, "Failed to allocate shadow queue for H/W queue %d\n", diff --git a/drivers/misc/habanalabs/common/hwmon.c b/drivers/misc/habanalabs/common/hwmon.c index 57f5d2c48330..55eb0203817f 100644 --- a/drivers/misc/habanalabs/common/hwmon.c +++ b/drivers/misc/habanalabs/common/hwmon.c @@ -194,7 +194,8 @@ int hl_build_hwmon_channel_info(struct hl_device *hdev, struct cpucp_sensor *sen curr_arr[sensors_by_type_next_index[type]++] = flags; } - channels_info = kcalloc(num_active_sensor_types + 1, sizeof(*channels_info), GFP_KERNEL); + channels_info = kcalloc(num_active_sensor_types + 1, sizeof(struct hwmon_channel_info *), + GFP_KERNEL); if (!channels_info) { rc = -ENOMEM; goto channels_info_array_err; @@ -910,3 +911,24 @@ void hl_hwmon_fini(struct hl_device *hdev) hwmon_device_unregister(hdev->hwmon_dev); } + +void hl_hwmon_release_resources(struct hl_device *hdev) +{ + const struct hwmon_channel_info **channel_info_arr; + int i = 0; + + if (!hdev->hl_chip_info->info) + return; + + channel_info_arr = hdev->hl_chip_info->info; + + while (channel_info_arr[i]) { + kfree(channel_info_arr[i]->config); + kfree(channel_info_arr[i]); + i++; + } + + kfree(channel_info_arr); + + hdev->hl_chip_info->info = NULL; +} diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c index 61bc1bfe984a..ef28f3b37b93 100644 --- a/drivers/misc/habanalabs/common/memory.c +++ b/drivers/misc/habanalabs/common/memory.c @@ -457,7 +457,7 @@ static void merge_va_blocks_locked(struct hl_device *hdev, prev = list_prev_entry(va_block, node); if (&prev->node != va_list && prev->end + 1 == va_block->start) { prev->end = va_block->end; - prev->size = prev->end - prev->start; + prev->size = prev->end - prev->start + 1; list_del(&va_block->node); kfree(va_block); va_block = prev; @@ -466,7 +466,7 @@ static void merge_va_blocks_locked(struct hl_device *hdev, next = list_next_entry(va_block, node); if (&next->node != va_list && va_block->end + 1 == next->start) { next->start = va_block->start; - next->size = next->end - next->start; + next->size = next->end - next->start + 1; list_del(&va_block->node); kfree(va_block); } @@ -755,7 +755,7 @@ out: * - Return the start address of the virtual block. */ u64 hl_reserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx, - enum hl_va_range_type type, u32 size, u32 alignment) + enum hl_va_range_type type, u64 size, u32 alignment) { return get_va_block(hdev, ctx->va_range[type], size, 0, max(alignment, ctx->va_range[type]->page_size), @@ -1210,18 +1210,18 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, u64 *device goto va_block_err; } - mutex_lock(&ctx->mmu_lock); + mutex_lock(&hdev->mmu_lock); rc = map_phys_pg_pack(ctx, ret_vaddr, phys_pg_pack); if (rc) { dev_err(hdev->dev, "mapping page pack failed for handle %u\n", handle); - mutex_unlock(&ctx->mmu_lock); + mutex_unlock(&hdev->mmu_lock); goto map_err; } rc = hl_mmu_invalidate_cache_range(hdev, false, *vm_type | MMU_OP_SKIP_LOW_CACHE_INV, ctx->asid, ret_vaddr, phys_pg_pack->total_size); - mutex_unlock(&ctx->mmu_lock); + mutex_unlock(&hdev->mmu_lock); if (rc) goto map_err; @@ -1362,7 +1362,7 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, else vaddr &= ~(((u64) phys_pg_pack->page_size) - 1); - mutex_lock(&ctx->mmu_lock); + mutex_lock(&hdev->mmu_lock); unmap_phys_pg_pack(ctx, vaddr, phys_pg_pack); @@ -1375,7 +1375,7 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, rc = hl_mmu_invalidate_cache_range(hdev, true, *vm_type, ctx->asid, vaddr, phys_pg_pack->total_size); - mutex_unlock(&ctx->mmu_lock); + mutex_unlock(&hdev->mmu_lock); /* * If the context is closing we don't need to check for the MMU cache @@ -1418,18 +1418,23 @@ vm_type_err: return rc; } -static int map_block(struct hl_device *hdev, u64 address, u64 *handle, - u32 *size) +static int map_block(struct hl_device *hdev, u64 address, u64 *handle, u32 *size) { - u32 block_id = 0; + u32 block_id; int rc; + *handle = 0; + if (size) + *size = 0; + rc = hdev->asic_funcs->get_hw_block_id(hdev, address, size, &block_id); + if (rc) + return rc; *handle = block_id | HL_MMAP_TYPE_BLOCK; *handle <<= PAGE_SHIFT; - return rc; + return 0; } static void hw_block_vm_close(struct vm_area_struct *vma) @@ -1437,6 +1442,13 @@ static void hw_block_vm_close(struct vm_area_struct *vma) struct hl_vm_hw_block_list_node *lnode = (struct hl_vm_hw_block_list_node *) vma->vm_private_data; struct hl_ctx *ctx = lnode->ctx; + long new_mmap_size; + + new_mmap_size = lnode->mapped_size - (vma->vm_end - vma->vm_start); + if (new_mmap_size > 0) { + lnode->mapped_size = new_mmap_size; + return; + } mutex_lock(&ctx->hw_block_list_lock); list_del(&lnode->node); @@ -1487,23 +1499,23 @@ int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma) if (!lnode) return -ENOMEM; - vma->vm_ops = &hw_block_vm_ops; - vma->vm_private_data = lnode; - - hl_ctx_get(ctx); - rc = hdev->asic_funcs->hw_block_mmap(hdev, vma, block_id, block_size); if (rc) { - hl_ctx_put(ctx); kfree(lnode); return rc; } + hl_ctx_get(ctx); + lnode->ctx = ctx; lnode->vaddr = vma->vm_start; - lnode->size = block_size; + lnode->block_size = block_size; + lnode->mapped_size = lnode->block_size; lnode->id = block_id; + vma->vm_private_data = lnode; + vma->vm_ops = &hw_block_vm_ops; + mutex_lock(&ctx->hw_block_list_lock); list_add_tail(&lnode->node, &ctx->hw_block_mem_list); mutex_unlock(&ctx->hw_block_list_lock); @@ -2296,8 +2308,7 @@ static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size, return -EFAULT; } - userptr->pages = kvmalloc_array(npages, sizeof(*userptr->pages), - GFP_KERNEL); + userptr->pages = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL); if (!userptr->pages) return -ENOMEM; @@ -2759,13 +2770,13 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx) unmap_device_va(ctx, &args, true); } - mutex_lock(&ctx->mmu_lock); + mutex_lock(&hdev->mmu_lock); /* invalidate the cache once after the unmapping loop */ hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); hl_mmu_invalidate_cache(hdev, true, MMU_OP_PHYS_PACK); - mutex_unlock(&ctx->mmu_lock); + mutex_unlock(&hdev->mmu_lock); INIT_LIST_HEAD(&free_list); diff --git a/drivers/misc/habanalabs/common/memory_mgr.c b/drivers/misc/habanalabs/common/memory_mgr.c index 56df962d2f3c..1936d653699e 100644 --- a/drivers/misc/habanalabs/common/memory_mgr.c +++ b/drivers/misc/habanalabs/common/memory_mgr.c @@ -11,7 +11,7 @@ * hl_mmap_mem_buf_get - increase the buffer refcount and return a pointer to * the buffer descriptor. * - * @mmg: parent unifed memory manager + * @mmg: parent unified memory manager * @handle: requested buffer handle * * Find the buffer in the store and return a pointer to its descriptor. @@ -104,7 +104,7 @@ int hl_mmap_mem_buf_put(struct hl_mmap_mem_buf *buf) * hl_mmap_mem_buf_put_handle - decrease the reference to the buffer with the * given handle. * - * @mmg: parent unifed memory manager + * @mmg: parent unified memory manager * @handle: requested buffer handle * * Decrease the reference to the buffer, and release it if it was the last one. @@ -137,7 +137,7 @@ int hl_mmap_mem_buf_put_handle(struct hl_mem_mgr *mmg, u64 handle) /** * hl_mmap_mem_buf_alloc - allocate a new mappable buffer * - * @mmg: parent unifed memory manager + * @mmg: parent unified memory manager * @behavior: behavior object describing this buffer polymorphic behavior * @gfp: gfp flags to use for the memory allocations * @args: additional args passed to behavior->alloc @@ -222,7 +222,7 @@ static const struct vm_operations_struct hl_mmap_mem_buf_vm_ops = { /** * hl_mem_mgr_mmap - map the given buffer to the user * - * @mmg: unifed memory manager + * @mmg: unified memory manager * @vma: the vma object for which mmap was closed. * @args: additional args passed to behavior->mmap * @@ -322,7 +322,7 @@ void hl_mem_mgr_init(struct device *dev, struct hl_mem_mgr *mmg) /** * hl_mem_mgr_fini - release unified memory manager * - * @mmg: parent unifed memory manager + * @mmg: parent unified memory manager * * Release the unified memory manager. Shall be called from an interrupt context. */ diff --git a/drivers/misc/habanalabs/common/mmu/mmu.c b/drivers/misc/habanalabs/common/mmu/mmu.c index 60740de47b34..cf8946266615 100644 --- a/drivers/misc/habanalabs/common/mmu/mmu.c +++ b/drivers/misc/habanalabs/common/mmu/mmu.c @@ -9,6 +9,8 @@ #include "../habanalabs.h" +#include <trace/events/habanalabs.h> + /** * hl_mmu_get_funcs() - get MMU functions structure * @hdev: habanalabs device structure. @@ -45,6 +47,8 @@ int hl_mmu_init(struct hl_device *hdev) if (!hdev->mmu_enable) return 0; + mutex_init(&hdev->mmu_lock); + if (hdev->mmu_func[MMU_DR_PGT].init != NULL) { rc = hdev->mmu_func[MMU_DR_PGT].init(hdev); if (rc) @@ -86,6 +90,8 @@ void hl_mmu_fini(struct hl_device *hdev) if (hdev->mmu_func[MMU_HR_PGT].fini != NULL) hdev->mmu_func[MMU_HR_PGT].fini(hdev); + + mutex_destroy(&hdev->mmu_lock); } /** @@ -104,8 +110,6 @@ int hl_mmu_ctx_init(struct hl_ctx *ctx) if (!hdev->mmu_enable) return 0; - mutex_init(&ctx->mmu_lock); - if (hdev->mmu_func[MMU_DR_PGT].ctx_init != NULL) { rc = hdev->mmu_func[MMU_DR_PGT].ctx_init(ctx); if (rc) @@ -149,8 +153,6 @@ void hl_mmu_ctx_fini(struct hl_ctx *ctx) if (hdev->mmu_func[MMU_HR_PGT].ctx_fini != NULL) hdev->mmu_func[MMU_HR_PGT].ctx_fini(ctx); - - mutex_destroy(&ctx->mmu_lock); } /* @@ -259,6 +261,9 @@ int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, bool flu if (flush_pte) mmu_funcs->flush(ctx); + if (trace_habanalabs_mmu_unmap_enabled() && !rc) + trace_habanalabs_mmu_unmap(hdev->dev, virt_addr, 0, page_size, flush_pte); + return rc; } @@ -344,6 +349,8 @@ int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_s if (flush_pte) mmu_funcs->flush(ctx); + trace_habanalabs_mmu_map(hdev->dev, virt_addr, phys_addr, page_size, flush_pte); + return 0; err: @@ -403,6 +410,8 @@ int hl_mmu_map_contiguous(struct hl_ctx *ctx, u64 virt_addr, dev_err(hdev->dev, "Map failed for va 0x%llx to pa 0x%llx\n", curr_va, curr_pa); + /* last mapping failed so don't try to unmap it - reduce off by page_size */ + off -= page_size; goto unmap; } } @@ -600,9 +609,9 @@ int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, pgt_residency = mmu_prop->host_resident ? MMU_HR_PGT : MMU_DR_PGT; mmu_funcs = hl_mmu_get_funcs(hdev, pgt_residency, is_dram_addr); - mutex_lock(&ctx->mmu_lock); + mutex_lock(&hdev->mmu_lock); rc = mmu_funcs->get_tlb_info(ctx, virt_addr, hops); - mutex_unlock(&ctx->mmu_lock); + mutex_unlock(&hdev->mmu_lock); if (rc) return rc; @@ -692,16 +701,16 @@ static void hl_mmu_prefetch_work_function(struct work_struct *work) { struct hl_prefetch_work *pfw = container_of(work, struct hl_prefetch_work, pf_work); struct hl_ctx *ctx = pfw->ctx; + struct hl_device *hdev = ctx->hdev; - if (!hl_device_operational(ctx->hdev, NULL)) + if (!hl_device_operational(hdev, NULL)) goto put_ctx; - mutex_lock(&ctx->mmu_lock); + mutex_lock(&hdev->mmu_lock); - ctx->hdev->asic_funcs->mmu_prefetch_cache_range(ctx, pfw->flags, pfw->asid, - pfw->va, pfw->size); + hdev->asic_funcs->mmu_prefetch_cache_range(ctx, pfw->flags, pfw->asid, pfw->va, pfw->size); - mutex_unlock(&ctx->mmu_lock); + mutex_unlock(&hdev->mmu_lock); put_ctx: /* diff --git a/drivers/misc/habanalabs/common/sysfs.c b/drivers/misc/habanalabs/common/sysfs.c index 6c5271f01160..36e9814139d1 100644 --- a/drivers/misc/habanalabs/common/sysfs.c +++ b/drivers/misc/habanalabs/common/sysfs.c @@ -375,6 +375,14 @@ out: return max_size; } +static ssize_t security_enabled_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + + return sprintf(buf, "%d\n", hdev->asic_prop.fw_security_enabled); +} + static DEVICE_ATTR_RO(armcp_kernel_ver); static DEVICE_ATTR_RO(armcp_ver); static DEVICE_ATTR_RO(cpld_ver); @@ -393,6 +401,7 @@ static DEVICE_ATTR_RO(status); static DEVICE_ATTR_RO(thermal_ver); static DEVICE_ATTR_RO(uboot_ver); static DEVICE_ATTR_RO(fw_os_ver); +static DEVICE_ATTR_RO(security_enabled); static struct bin_attribute bin_attr_eeprom = { .attr = {.name = "eeprom", .mode = (0444)}, @@ -417,6 +426,7 @@ static struct attribute *hl_dev_attrs[] = { &dev_attr_thermal_ver.attr, &dev_attr_uboot_ver.attr, &dev_attr_fw_os_ver.attr, + &dev_attr_security_enabled.attr, NULL, }; diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index cb2988e2c7a8..92560414e843 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -899,12 +899,13 @@ static int gaudi_early_fini(struct hl_device *hdev) */ static int gaudi_fetch_psoc_frequency(struct hl_device *hdev) { - struct asic_fixed_properties *prop = &hdev->asic_prop; u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel; + struct asic_fixed_properties *prop = &hdev->asic_prop; u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq; int rc; - if (hdev->asic_prop.fw_security_enabled) { + if ((hdev->fw_components & FW_TYPE_LINUX) && + (prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PLL_INFO_EN)) { struct gaudi_device *gaudi = hdev->asic_specific; if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) @@ -939,9 +940,7 @@ static int gaudi_fetch_psoc_frequency(struct hl_device *hdev) else freq = pll_clk / (div_fctr + 1); } else { - dev_warn(hdev->dev, - "Received invalid div select value: %d", - div_sel); + dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel); freq = 0; } } @@ -985,9 +984,10 @@ static int _gaudi_init_tpc_mem(struct hl_device *hdev, init_tpc_mem_pkt->ctl = cpu_to_le32(ctl); init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr); - dst_addr = (prop->sram_user_base_address & - GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >> - GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT; + + /* TPC_CMD is configured with I$ prefetch enabled, so address should be aligned to 8KB */ + dst_addr = FIELD_PREP(GAUDI_PKT_LIN_DMA_DST_ADDR_MASK, + round_up(prop->sram_user_base_address, SZ_8K)); init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr); job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); @@ -1683,23 +1683,7 @@ disable_pci_access: static void gaudi_late_fini(struct hl_device *hdev) { - const struct hwmon_channel_info **channel_info_arr; - int i = 0; - - if (!hdev->hl_chip_info->info) - return; - - channel_info_arr = hdev->hl_chip_info->info; - - while (channel_info_arr[i]) { - kfree(channel_info_arr[i]->config); - kfree(channel_info_arr[i]); - i++; - } - - kfree(channel_info_arr); - - hdev->hl_chip_info->info = NULL; + hl_hwmon_release_resources(hdev); } static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev) @@ -4723,7 +4707,7 @@ static int gaudi_scrub_device_mem(struct hl_device *hdev) addr = prop->sram_user_base_address; size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET; - dev_dbg(hdev->dev, "Scrubing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n", + dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n", addr, addr + size, val); rc = gaudi_memset_device_memory(hdev, addr, size, val); if (rc) { @@ -6911,9 +6895,9 @@ static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 strea stream, cq_ptr, size); if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) { - hdev->last_error.undef_opcode.cq_addr = cq_ptr; - hdev->last_error.undef_opcode.cq_size = size; - hdev->last_error.undef_opcode.stream_id = stream; + hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr; + hdev->captured_err_info.undef_opcode.cq_size = size; + hdev->captured_err_info.undef_opcode.stream_id = stream; } } @@ -6979,7 +6963,7 @@ static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base, } if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) { - struct undefined_opcode_info *undef_opcode = &hdev->last_error.undef_opcode; + struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode; u32 arr_idx = undef_opcode->cb_addr_streams_len; if (arr_idx == 0) { @@ -7063,11 +7047,11 @@ static void gaudi_handle_qman_err_generic(struct hl_device *hdev, } /* check for undefined opcode */ if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK && - hdev->last_error.undef_opcode.write_enable) { - memset(&hdev->last_error.undef_opcode, 0, - sizeof(hdev->last_error.undef_opcode)); + hdev->captured_err_info.undef_opcode.write_enable) { + memset(&hdev->captured_err_info.undef_opcode, 0, + sizeof(hdev->captured_err_info.undef_opcode)); - hdev->last_error.undef_opcode.write_enable = false; + hdev->captured_err_info.undef_opcode.write_enable = false; *event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE; } @@ -7233,12 +7217,6 @@ static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *e switch (event_type) { case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM: - /* In TPC QM event, notify on TPC assertion. While there isn't - * a specific event for assertion yet, the FW generates QM event. - * The SW upper layer will inspect an internal mapped area to indicate - * if the event is a tpc assertion or tpc QM. - */ - *event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT; index = event_type - GAUDI_EVENT_TPC0_QM; qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS; qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET; @@ -7349,18 +7327,19 @@ static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type, gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, &razwi_type); /* In case it's the first razwi, save its parameters*/ - rc = atomic_cmpxchg(&hdev->last_error.razwi.write_enable, 1, 0); + rc = atomic_cmpxchg(&hdev->captured_err_info.razwi.write_enable, 1, 0); if (rc) { - hdev->last_error.razwi.timestamp = ktime_get(); - hdev->last_error.razwi.addr = razwi_addr; - hdev->last_error.razwi.engine_id_1 = engine_id_1; - hdev->last_error.razwi.engine_id_2 = engine_id_2; + hdev->captured_err_info.razwi.timestamp = ktime_get(); + hdev->captured_err_info.razwi.addr = razwi_addr; + hdev->captured_err_info.razwi.engine_id_1 = engine_id_1; + hdev->captured_err_info.razwi.engine_id_2 = engine_id_2; /* * If first engine id holds non valid value the razwi initiator * does not have engine id */ - hdev->last_error.razwi.non_engine_initiator = (engine_id_1 == U16_MAX); - hdev->last_error.razwi.type = razwi_type; + hdev->captured_err_info.razwi.non_engine_initiator = + (engine_id_1 == U16_MAX); + hdev->captured_err_info.razwi.type = razwi_type; } } @@ -7427,7 +7406,7 @@ static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type, event_type, desc); } -static int gaudi_non_hard_reset_late_init(struct hl_device *hdev) +static int gaudi_compute_reset_late_init(struct hl_device *hdev) { /* GAUDI doesn't support any reset except hard-reset */ return -EPERM; @@ -7702,6 +7681,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR: gaudi_print_irq_info(hdev, event_type, true); gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; goto reset_device; @@ -7711,6 +7691,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17: gaudi_print_irq_info(hdev, event_type, false); fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; goto reset_device; case GAUDI_EVENT_HBM0_SPI_0: @@ -7722,6 +7703,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr gaudi_hbm_event_to_dev(event_type), &eq_entry->hbm_ecc_data); fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; goto reset_device; case GAUDI_EVENT_HBM0_SPI_1: @@ -7733,6 +7715,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr gaudi_hbm_event_to_dev(event_type), &eq_entry->hbm_ecc_data); hl_fw_unmask_irq(hdev, event_type); + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI_EVENT_TPC0_DEC: @@ -7743,10 +7726,17 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr case GAUDI_EVENT_TPC5_DEC: case GAUDI_EVENT_TPC6_DEC: case GAUDI_EVENT_TPC7_DEC: + /* In TPC DEC event, notify on TPC assertion. While there isn't + * a specific event for assertion yet, the FW generates TPC DEC event. + * The SW upper layer will inspect an internal mapped area to indicate + * if the event is a TPC Assertion or a "real" TPC DEC. + */ + event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT; gaudi_print_irq_info(hdev, event_type, true); reset_required = gaudi_tpc_read_interrupts(hdev, tpc_dec_event_to_tpc_id(event_type), "AXI_SLV_DEC_Error"); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; if (reset_required) { dev_err(hdev->dev, "reset required due to %s\n", gaudi_irq_map_table[event_type].name); @@ -7755,6 +7745,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr goto reset_device; } else { hl_fw_unmask_irq(hdev, event_type); + event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; } break; @@ -7770,6 +7761,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr reset_required = gaudi_tpc_read_interrupts(hdev, tpc_krn_event_to_tpc_id(event_type), "KRN_ERR"); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; if (reset_required) { dev_err(hdev->dev, "reset required due to %s\n", gaudi_irq_map_table[event_type].name); @@ -7778,6 +7770,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr goto reset_device; } else { hl_fw_unmask_irq(hdev, event_type); + event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; } break; @@ -7806,9 +7799,25 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr gaudi_print_irq_info(hdev, event_type, true); gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); hl_fw_unmask_irq(hdev, event_type); + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI_EVENT_PCIE_DEC: + case GAUDI_EVENT_CPU_AXI_SPLITTER: + case GAUDI_EVENT_PSOC_AXI_DEC: + case GAUDI_EVENT_PSOC_PRSTN_FALL: + gaudi_print_irq_info(hdev, event_type, true); + hl_fw_unmask_irq(hdev, event_type); + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; + break; + + case GAUDI_EVENT_MMU_PAGE_FAULT: + case GAUDI_EVENT_MMU_WR_PERM: + gaudi_print_irq_info(hdev, event_type, true); + hl_fw_unmask_irq(hdev, event_type); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; + break; + case GAUDI_EVENT_MME0_WBC_RSP: case GAUDI_EVENT_MME0_SBAB0_RSP: case GAUDI_EVENT_MME1_WBC_RSP: @@ -7817,11 +7826,6 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr case GAUDI_EVENT_MME2_SBAB0_RSP: case GAUDI_EVENT_MME3_WBC_RSP: case GAUDI_EVENT_MME3_SBAB0_RSP: - case GAUDI_EVENT_CPU_AXI_SPLITTER: - case GAUDI_EVENT_PSOC_AXI_DEC: - case GAUDI_EVENT_PSOC_PRSTN_FALL: - case GAUDI_EVENT_MMU_PAGE_FAULT: - case GAUDI_EVENT_MMU_WR_PERM: case GAUDI_EVENT_RAZWI_OR_ADC: case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM: case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM: @@ -7841,10 +7845,12 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr gaudi_print_irq_info(hdev, event_type, true); gaudi_handle_qman_err(hdev, event_type, &event_mask); hl_fw_unmask_irq(hdev, event_type); + event_mask |= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR | HL_NOTIFIER_EVENT_DEVICE_RESET); break; case GAUDI_EVENT_RAZWI_OR_ADC_SW: gaudi_print_irq_info(hdev, event_type, true); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; goto reset_device; case GAUDI_EVENT_TPC0_BMON_SPMU: @@ -7858,11 +7864,13 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7: gaudi_print_irq_info(hdev, event_type, false); hl_fw_unmask_irq(hdev, event_type); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4: gaudi_print_nic_axi_irq_info(hdev, event_type, &data); hl_fw_unmask_irq(hdev, event_type); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3: @@ -7870,6 +7878,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr gaudi_print_sm_sei_info(hdev, event_type, &eq_entry->sm_sei_data); rc = hl_state_dump(hdev); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; if (rc) dev_err(hdev->dev, "Error during system state dump %d\n", rc); @@ -7880,6 +7889,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr break; case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E: + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; gaudi_print_clk_change_info(hdev, event_type); hl_fw_unmask_irq(hdev, event_type); break; @@ -7889,20 +7899,24 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr dev_err(hdev->dev, "Received high temp H/W interrupt %d (cause %d)\n", event_type, cause); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI_EVENT_DEV_RESET_REQ: gaudi_print_irq_info(hdev, event_type, false); + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; goto reset_device; case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC: gaudi_print_irq_info(hdev, event_type, false); gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err); + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; goto reset_device; case GAUDI_EVENT_FW_ALIVE_S: gaudi_print_irq_info(hdev, event_type, false); gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive); + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; goto reset_device; default: @@ -8066,8 +8080,8 @@ static int gaudi_cpucp_info_get(struct hl_device *hdev) return 0; } -static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, - u8 mask_len, struct seq_file *s) +static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, + struct engines_data *e) { struct gaudi_device *gaudi = hdev->asic_specific; const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n"; @@ -8079,8 +8093,8 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u64 offset; int i, dma_id, port; - if (s) - seq_puts(s, + if (e) + hl_engine_data_sprintf(e, "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n" "--- ------- ------------ ---------- -------------\n"); @@ -8097,14 +8111,14 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, if (mask && !is_eng_idle) set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask); - if (s) - seq_printf(s, fmt, dma_id, + if (e) + hl_engine_data_sprintf(e, fmt, dma_id, is_eng_idle ? "Y" : "N", qm_glbl_sts0, qm_cgm_sts, dma_core_sts0); } - if (s) - seq_puts(s, + if (e) + hl_engine_data_sprintf(e, "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n" "--- ------- ------------ ---------- ----------\n"); @@ -8119,14 +8133,14 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, if (mask && !is_eng_idle) set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask); - if (s) - seq_printf(s, fmt, i, + if (e) + hl_engine_data_sprintf(e, fmt, i, is_eng_idle ? "Y" : "N", qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts); } - if (s) - seq_puts(s, + if (e) + hl_engine_data_sprintf(e, "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n" "--- ------- ------------ ---------- -----------\n"); @@ -8147,20 +8161,21 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, if (mask && !is_eng_idle) set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask); - if (s) { + if (e) { if (!is_slave) - seq_printf(s, fmt, i, + hl_engine_data_sprintf(e, fmt, i, is_eng_idle ? "Y" : "N", qm_glbl_sts0, qm_cgm_sts, mme_arch_sts); else - seq_printf(s, mme_slave_fmt, i, + hl_engine_data_sprintf(e, mme_slave_fmt, i, is_eng_idle ? "Y" : "N", "-", "-", mme_arch_sts); } } - if (s) - seq_puts(s, "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n" + if (e) + hl_engine_data_sprintf(e, + "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n" "--- ------- ------------ ----------\n"); for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) { @@ -8174,8 +8189,8 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, if (mask && !is_eng_idle) set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask); - if (s) - seq_printf(s, nic_fmt, port, + if (e) + hl_engine_data_sprintf(e, nic_fmt, port, is_eng_idle ? "Y" : "N", qm_glbl_sts0, qm_cgm_sts); } @@ -8189,15 +8204,15 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, if (mask && !is_eng_idle) set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask); - if (s) - seq_printf(s, nic_fmt, port, + if (e) + hl_engine_data_sprintf(e, nic_fmt, port, is_eng_idle ? "Y" : "N", qm_glbl_sts0, qm_cgm_sts); } } - if (s) - seq_puts(s, "\n"); + if (e) + hl_engine_data_sprintf(e, "\n"); return is_idle; } @@ -8392,13 +8407,13 @@ static int gaudi_internal_cb_pool_init(struct hl_device *hdev, goto destroy_internal_cb_pool; } - mutex_lock(&ctx->mmu_lock); + mutex_lock(&hdev->mmu_lock); rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, hdev->internal_cb_pool_dma_addr, HOST_SPACE_INTERNAL_CB_SZ); hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR); - mutex_unlock(&ctx->mmu_lock); + mutex_unlock(&hdev->mmu_lock); if (rc) goto unreserve_internal_cb_pool; @@ -8425,13 +8440,13 @@ static void gaudi_internal_cb_pool_fini(struct hl_device *hdev, if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) return; - mutex_lock(&ctx->mmu_lock); + mutex_lock(&hdev->mmu_lock); hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ); hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ); hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); - mutex_unlock(&ctx->mmu_lock); + mutex_unlock(&hdev->mmu_lock); gen_pool_destroy(hdev->internal_cb_pool); @@ -9148,6 +9163,11 @@ static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs; } +static int gaudi_send_device_activity(struct hl_device *hdev, bool open) +{ + return 0; +} + static const struct hl_asic_funcs gaudi_funcs = { .early_init = gaudi_early_init, .early_fini = gaudi_early_fini, @@ -9192,11 +9212,9 @@ static const struct hl_asic_funcs gaudi_funcs = { .send_heartbeat = gaudi_send_heartbeat, .debug_coresight = gaudi_debug_coresight, .is_device_idle = gaudi_is_device_idle, - .non_hard_reset_late_init = gaudi_non_hard_reset_late_init, + .compute_reset_late_init = gaudi_compute_reset_late_init, .hw_queues_lock = gaudi_hw_queues_lock, .hw_queues_unlock = gaudi_hw_queues_unlock, - .kdma_lock = NULL, - .kdma_unlock = NULL, .get_pci_id = gaudi_get_pci_id, .get_eeprom_data = gaudi_get_eeprom_data, .get_monitor_dump = gaudi_get_monitor_dump, @@ -9242,6 +9260,7 @@ static const struct hl_asic_funcs gaudi_funcs = { .mmu_get_real_page_size = hl_mmu_get_real_page_size, .access_dev_mem = hl_access_dev_mem, .set_dram_bar_base = gaudi_set_hbm_bar_base, + .send_device_activity = gaudi_send_device_activity, }; /** diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index 98336a1a84b0..75c4bef7841c 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -21,7 +21,7 @@ #define GAUDI2_DMA_POOL_BLK_SIZE SZ_256 /* 256 bytes */ -#define GAUDI2_RESET_TIMEOUT_MSEC 500 /* 500ms */ +#define GAUDI2_RESET_TIMEOUT_MSEC 2000 /* 2000ms */ #define GAUDI2_RESET_POLL_TIMEOUT_USEC 50000 /* 50ms */ #define GAUDI2_PLDM_HRESET_TIMEOUT_MSEC 25000 /* 25s */ #define GAUDI2_PLDM_SRESET_TIMEOUT_MSEC 25000 /* 25s */ @@ -117,6 +117,12 @@ #define MMU_RANGE_INV_ASID_EN_SHIFT 1 #define MMU_RANGE_INV_ASID_SHIFT 2 +/* The last SPI_SEI cause bit, "burst_fifo_full", is expected to be triggered in PMMU because it has + * a 2 entries FIFO, and hence it is not enabled for it. + */ +#define GAUDI2_PMMU_SPI_SEI_ENABLE_MASK GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 2, 0) +#define GAUDI2_HMMU_SPI_SEI_ENABLE_MASK GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 1, 0) + #define GAUDI2_MAX_STRING_LEN 64 #define GAUDI2_VDEC_MSIX_ENTRIES (GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM - \ @@ -610,7 +616,7 @@ static const char * const guadi2_mme_error_cause[GAUDI2_NUM_OF_MME_ERR_CAUSE] = "qman_axi_err", "wap sei (wbc axi err)", "arc sei", - "mme_cfg_unalign_addr", + "cfg access error", "qm_sw_err", "sbte_dbg_intr_0", "sbte_dbg_intr_1", @@ -1525,17 +1531,57 @@ static const u32 rtr_coordinates_to_rtr_id[NUM_OF_RTR_PER_DCORE * NUM_OF_DCORES] RTR_ID_X_Y(17, 11) }; +enum rtr_id { + DCORE0_RTR0, + DCORE0_RTR1, + DCORE0_RTR2, + DCORE0_RTR3, + DCORE0_RTR4, + DCORE0_RTR5, + DCORE0_RTR6, + DCORE0_RTR7, + DCORE1_RTR0, + DCORE1_RTR1, + DCORE1_RTR2, + DCORE1_RTR3, + DCORE1_RTR4, + DCORE1_RTR5, + DCORE1_RTR6, + DCORE1_RTR7, + DCORE2_RTR0, + DCORE2_RTR1, + DCORE2_RTR2, + DCORE2_RTR3, + DCORE2_RTR4, + DCORE2_RTR5, + DCORE2_RTR6, + DCORE2_RTR7, + DCORE3_RTR0, + DCORE3_RTR1, + DCORE3_RTR2, + DCORE3_RTR3, + DCORE3_RTR4, + DCORE3_RTR5, + DCORE3_RTR6, + DCORE3_RTR7, +}; + static const u32 gaudi2_tpc_initiator_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = { - 1, 1, 2, 2, 3, 3, 14, 14, 13, 13, 12, 12, 19, 19, 18, 18, 17, - 17, 28, 28, 29, 29, 30, 30, 0 + DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2, DCORE0_RTR3, DCORE0_RTR3, + DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5, DCORE1_RTR4, DCORE1_RTR4, + DCORE2_RTR3, DCORE2_RTR3, DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1, + DCORE3_RTR4, DCORE3_RTR4, DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6, + DCORE0_RTR0 }; static const u32 gaudi2_dec_initiator_rtr_id[NUMBER_OF_DEC] = { - 0, 0, 15, 15, 16, 16, 31, 31, 0, 0 + DCORE0_RTR0, DCORE0_RTR0, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0, DCORE2_RTR0, + DCORE3_RTR7, DCORE3_RTR7, DCORE0_RTR0, DCORE0_RTR0 }; static const u32 gaudi2_nic_initiator_rtr_id[NIC_NUMBER_OF_MACROS] = { - 15, 15, 15, 15, 15, 16, 16, 16, 16, 31, 31, 31 + DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0, + DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7 }; struct sft_info { @@ -1548,11 +1594,11 @@ static const struct sft_info gaudi2_edma_initiator_sft_id[NUM_OF_EDMA_PER_DCORE }; static const u32 gaudi2_pdma_initiator_rtr_id[NUM_OF_PDMA] = { - 0, 0 + DCORE0_RTR0, DCORE0_RTR0 }; static const u32 gaudi2_rot_initiator_rtr_id[NUM_OF_ROT] = { - 16, 31 + DCORE2_RTR0, DCORE3_RTR7 }; struct mme_initiators_rtr_id { @@ -1663,7 +1709,7 @@ struct gaudi2_cache_invld_params { }; struct gaudi2_tpc_idle_data { - struct seq_file *s; + struct engines_data *e; unsigned long *mask; bool *is_idle; const char *tpc_fmt; @@ -1706,6 +1752,9 @@ void gaudi2_iterate_tpcs(struct hl_device *hdev, struct iterate_module_ctx *ctx) int dcore, inst, tpc_seq; u32 offset; + /* init the return code */ + ctx->rc = 0; + for (dcore = 0; dcore < NUM_OF_DCORES; dcore++) { for (inst = 0; inst < NUM_OF_TPC_PER_DCORE; inst++) { tpc_seq = dcore * NUM_OF_TPC_PER_DCORE + inst; @@ -1715,7 +1764,12 @@ void gaudi2_iterate_tpcs(struct hl_device *hdev, struct iterate_module_ctx *ctx) offset = (DCORE_OFFSET * dcore) + (DCORE_TPC_OFFSET * inst); - ctx->fn(hdev, dcore, inst, offset, ctx->data); + ctx->fn(hdev, dcore, inst, offset, ctx); + if (ctx->rc) { + dev_err(hdev->dev, "TPC iterator failed for DCORE%d TPC%d\n", + dcore, inst); + return; + } } } @@ -1724,7 +1778,9 @@ void gaudi2_iterate_tpcs(struct hl_device *hdev, struct iterate_module_ctx *ctx) /* special check for PCI TPC (DCORE0_TPC6) */ offset = DCORE_TPC_OFFSET * (NUM_DCORE0_TPC - 1); - ctx->fn(hdev, 0, NUM_DCORE0_TPC - 1, offset, ctx->data); + ctx->fn(hdev, 0, NUM_DCORE0_TPC - 1, offset, ctx); + if (ctx->rc) + dev_err(hdev->dev, "TPC iterator failed for DCORE0 TPC6\n"); } static bool gaudi2_host_phys_addr_valid(u64 addr) @@ -1973,6 +2029,7 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev) prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END; } + prop->num_engine_cores = CPU_ID_MAX; prop->cfg_size = CFG_SIZE; prop->max_asid = MAX_ASID; prop->num_of_events = GAUDI2_EVENT_SIZE; @@ -2005,9 +2062,6 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev) prop->server_type = HL_SERVER_TYPE_UNKNOWN; - prop->cb_va_start_addr = VA_HOST_SPACE_USER_MAPPED_CB_START; - prop->cb_va_end_addr = VA_HOST_SPACE_USER_MAPPED_CB_END; - prop->max_dec = NUMBER_OF_DEC; prop->clk_pll_index = HL_GAUDI2_MME_PLL; @@ -2477,7 +2531,6 @@ static int gaudi2_early_init(struct hl_device *hdev) struct asic_fixed_properties *prop = &hdev->asic_prop; struct pci_dev *pdev = hdev->pdev; resource_size_t pci_bar_size; - u32 fw_boot_status; int rc; rc = gaudi2_set_fixed_properties(hdev); @@ -2505,22 +2558,14 @@ static int gaudi2_early_init(struct hl_device *hdev) prop->dram_pci_bar_size = pci_resource_len(pdev, DRAM_BAR_ID); hdev->dram_pci_bar_start = pci_resource_start(pdev, DRAM_BAR_ID); - /* If FW security is enabled at this point it means no access to ELBI */ - if (hdev->asic_prop.fw_security_enabled) { - hdev->asic_prop.iatu_done_by_fw = true; - goto pci_init; - } - - rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0, &fw_boot_status); - if (rc) - goto free_queue_props; - - /* Check whether FW is configuring iATU */ - if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) && - (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN)) + /* + * Only in pldm driver config iATU + */ + if (hdev->pldm) + hdev->asic_prop.iatu_done_by_fw = false; + else hdev->asic_prop.iatu_done_by_fw = true; -pci_init: rc = hl_pci_init(hdev); if (rc) goto free_queue_props; @@ -2676,6 +2721,8 @@ static int gaudi2_late_init(struct hl_device *hdev) struct gaudi2_device *gaudi2 = hdev->asic_specific; int rc; + hdev->asic_prop.supports_advanced_cpucp_rc = true; + rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, gaudi2->virt_msix_db_dma_addr); if (rc) { @@ -2703,23 +2750,7 @@ disable_pci_access: static void gaudi2_late_fini(struct hl_device *hdev) { - const struct hwmon_channel_info **channel_info_arr; - int i = 0; - - if (!hdev->hl_chip_info->info) - return; - - channel_info_arr = hdev->hl_chip_info->info; - - while (channel_info_arr[i]) { - kfree(channel_info_arr[i]->config); - kfree(channel_info_arr[i]); - i++; - } - - kfree(channel_info_arr); - - hdev->hl_chip_info->info = NULL; + hl_hwmon_release_resources(hdev); } static void gaudi2_user_mapped_dec_init(struct gaudi2_device *gaudi2, u32 start_idx) @@ -2994,7 +3025,6 @@ static int gaudi2_sw_init(struct hl_device *hdev) } spin_lock_init(&gaudi2->hw_queues_lock); - spin_lock_init(&gaudi2->kdma_lock); gaudi2->scratchpad_kernel_address = hl_asic_dma_alloc_coherent(hdev, PAGE_SIZE, &gaudi2->scratchpad_bus_address, @@ -3551,7 +3581,7 @@ static int gaudi2_enable_msix(struct hl_device *hdev) rc = gaudi2_dec_enable_msix(hdev); if (rc) { dev_err(hdev->dev, "Failed to enable decoder IRQ"); - goto free_completion_irq; + goto free_event_irq; } for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, user_irq_init_cnt = 0; @@ -3582,6 +3612,10 @@ free_user_irq: gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1); +free_event_irq: + irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE); + free_irq(irq, cq); + free_completion_irq: irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION); free_irq(irq, cq); @@ -3745,14 +3779,16 @@ static void gaudi2_stop_dec(struct hl_device *hdev) gaudi2_stop_pcie_dec(hdev); } -static void gaudi2_halt_arc(struct hl_device *hdev, u32 cpu_id) +static void gaudi2_set_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode) { u32 reg_base, reg_val; reg_base = gaudi2_arc_blocks_bases[cpu_id]; + if (run_mode == HL_ENGINE_CORE_RUN) + reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 1); + else + reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_HALT_REQ_MASK, 1); - /* Halt ARC */ - reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_HALT_REQ_MASK, 1); WREG32(reg_base + ARC_HALT_REQ_OFFSET, reg_val); } @@ -3762,8 +3798,35 @@ static void gaudi2_halt_arcs(struct hl_device *hdev) for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++) { if (gaudi2_is_arc_enabled(hdev, arc_id)) - gaudi2_halt_arc(hdev, arc_id); + gaudi2_set_arc_running_mode(hdev, arc_id, HL_ENGINE_CORE_HALT); + } +} + +static int gaudi2_verify_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode) +{ + int rc; + u32 reg_base, val, ack_mask, timeout_usec = 100000; + + if (hdev->pldm) + timeout_usec *= 100; + + reg_base = gaudi2_arc_blocks_bases[cpu_id]; + if (run_mode == HL_ENGINE_CORE_RUN) + ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_RUN_ACK_MASK; + else + ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_HALT_ACK_MASK; + + rc = hl_poll_timeout(hdev, reg_base + ARC_HALT_ACK_OFFSET, + val, ((val & ack_mask) == ack_mask), + 1000, timeout_usec); + + if (!rc) { + /* Clear */ + val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 0); + WREG32(reg_base + ARC_HALT_REQ_OFFSET, val); } + + return rc; } static void gaudi2_reset_arcs(struct hl_device *hdev) @@ -3790,8 +3853,39 @@ static void gaudi2_nic_qmans_manual_flush(struct hl_device *hdev) queue_id = GAUDI2_QUEUE_ID_NIC_0_0; - for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) + for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) { + if (!(hdev->nic_ports_mask & BIT(i))) + continue; + gaudi2_qman_manual_flush_common(hdev, queue_id); + } +} + +static int gaudi2_set_engine_cores(struct hl_device *hdev, u32 *core_ids, + u32 num_cores, u32 core_command) +{ + int i, rc; + + + for (i = 0 ; i < num_cores ; i++) { + if (gaudi2_is_arc_enabled(hdev, core_ids[i])) + gaudi2_set_arc_running_mode(hdev, core_ids[i], core_command); + } + + for (i = 0 ; i < num_cores ; i++) { + if (gaudi2_is_arc_enabled(hdev, core_ids[i])) { + rc = gaudi2_verify_arc_running_mode(hdev, core_ids[i], core_command); + + if (rc) { + dev_err(hdev->dev, "failed to %s arc: %d\n", + (core_command == HL_ENGINE_CORE_HALT) ? + "HALT" : "RUN", core_ids[i]); + return -1; + } + } + } + + return 0; } static void gaudi2_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset) @@ -4124,11 +4218,15 @@ static void gaudi2_init_qman_common(struct hl_device *hdev, u32 reg_base, WREG32(reg_base + QM_GLBL_CFG2_OFFSET, 0); /* Enable the QMAN channel. - * PDMA1 QMAN configuration is different, as we do not allow user to - * access CP2/3, it is reserved for the ARC usage. + * PDMA QMAN configuration is different, as we do not allow user to + * access some of the CPs. + * PDMA0: CP2/3 are reserved for the ARC usage. + * PDMA1: CP1/2/3 are reserved for the ARC usage. */ if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0]) WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA1_QMAN_ENABLE); + else if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0]) + WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA0_QMAN_ENABLE); else WREG32(reg_base + QM_GLBL_CFG0_OFFSET, QMAN_ENABLE); } @@ -4501,10 +4599,10 @@ struct gaudi2_tpc_init_cfg_data { }; static void gaudi2_init_tpc_config(struct hl_device *hdev, int dcore, int inst, - u32 offset, void *data) + u32 offset, struct iterate_module_ctx *ctx) { struct gaudi2_device *gaudi2 = hdev->asic_specific; - struct gaudi2_tpc_init_cfg_data *cfg_data = data; + struct gaudi2_tpc_init_cfg_data *cfg_data = ctx->data; u32 queue_id_base; u8 seq; @@ -4956,8 +5054,7 @@ static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base) return 0; } -static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, - u32 stlb_base) +static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb_base) { u32 status, timeout_usec; int rc; @@ -4985,7 +5082,6 @@ static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, return rc; WREG32(mmu_base + MMU_BYPASS_OFFSET, 0); - WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, 0xF); rc = hl_poll_timeout( hdev, @@ -5042,6 +5138,8 @@ static int gaudi2_pci_mmu_init(struct hl_device *hdev) DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK); } + WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_PMMU_SPI_SEI_ENABLE_MASK); + rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base); if (rc) return rc; @@ -5092,6 +5190,8 @@ static int gaudi2_dcore_hmmu_init(struct hl_device *hdev, int dcore_id, RMWREG32(stlb_base + STLB_HOP_CONFIGURATION_OFFSET, 1, STLB_HOP_CONFIGURATION_ONLY_LARGE_PAGE_MASK); + WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_HMMU_SPI_SEI_ENABLE_MASK); + rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base); if (rc) return rc; @@ -5339,7 +5439,10 @@ static void gaudi2_execute_soft_reset(struct hl_device *hdev, u32 reset_sleep_ms if (!driver_performs_reset) { /* set SP to indicate reset request sent to FW */ - WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA); + if (dyn_regs->cpu_rst_status) + WREG32(le32_to_cpu(dyn_regs->cpu_rst_status), CPU_RST_STATUS_NA); + else + WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA); WREG32(le32_to_cpu(dyn_regs->gic_host_soft_rst_irq), gaudi2_irq_map_table[GAUDI2_EVENT_CPU_SOFT_RESET].cpu_id); @@ -5527,10 +5630,11 @@ static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id) u64 hw_test_cap_bit = 0; switch (hw_queue_id) { - case GAUDI2_QUEUE_ID_PDMA_0_0 ... GAUDI2_QUEUE_ID_PDMA_1_1: + case GAUDI2_QUEUE_ID_PDMA_0_0: + case GAUDI2_QUEUE_ID_PDMA_0_1: + case GAUDI2_QUEUE_ID_PDMA_1_0: hw_cap_mask = HW_CAP_PDMA_MASK; break; - case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3: hw_test_cap_bit = HW_CAP_EDMA_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0) >> 2); @@ -6129,7 +6233,7 @@ done: return ret_val; } -static int gaudi2_non_hard_reset_late_init(struct hl_device *hdev) +static int gaudi2_compute_reset_late_init(struct hl_device *hdev) { struct gaudi2_device *gaudi2 = hdev->asic_specific; size_t irq_arr_size; @@ -6147,9 +6251,9 @@ static int gaudi2_non_hard_reset_late_init(struct hl_device *hdev) } static void gaudi2_is_tpc_engine_idle(struct hl_device *hdev, int dcore, int inst, u32 offset, - void *data) + struct iterate_module_ctx *ctx) { - struct gaudi2_tpc_idle_data *idle_data = (struct gaudi2_tpc_idle_data *)data; + struct gaudi2_tpc_idle_data *idle_data = ctx->data; u32 tpc_cfg_sts, qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts; bool is_eng_idle; int engine_idx; @@ -6172,14 +6276,15 @@ static void gaudi2_is_tpc_engine_idle(struct hl_device *hdev, int dcore, int ins if (idle_data->mask && !is_eng_idle) set_bit(engine_idx, idle_data->mask); - if (idle_data->s) - seq_printf(idle_data->s, idle_data->tpc_fmt, dcore, inst, + if (idle_data->e) + hl_engine_data_sprintf(idle_data->e, + idle_data->tpc_fmt, dcore, inst, is_eng_idle ? "Y" : "N", qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts); } -static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, - u8 mask_len, struct seq_file *s) +static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, + struct engines_data *e) { u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_idle_ind_mask, mme_arch_sts, dec_swreg15, dec_enabled_bit; @@ -6197,7 +6302,7 @@ static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, struct gaudi2_tpc_idle_data tpc_idle_data = { .tpc_fmt = "%-6d%-5d%-9s%#-14x%#-12x%#x\n", - .s = s, + .e = e, .mask = mask, .is_idle = &is_idle, }; @@ -6209,8 +6314,8 @@ static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, int engine_idx, i, j; /* EDMA, Two engines per Dcore */ - if (s) - seq_puts(s, + if (e) + hl_engine_data_sprintf(e, "\nCORE EDMA is_idle QM_GLBL_STS0 DMA_CORE_IDLE_IND_MASK\n" "---- ---- ------- ------------ ----------------------\n"); @@ -6239,19 +6344,19 @@ static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, if (mask && !is_eng_idle) set_bit(engine_idx, mask); - if (s) - seq_printf(s, edma_fmt, i, j, - is_eng_idle ? "Y" : "N", - qm_glbl_sts0, - dma_core_idle_ind_mask); + if (e) + hl_engine_data_sprintf(e, edma_fmt, i, j, + is_eng_idle ? "Y" : "N", + qm_glbl_sts0, + dma_core_idle_ind_mask); } } /* PDMA, Two engines in Full chip */ - if (s) - seq_puts(s, - "\nPDMA is_idle QM_GLBL_STS0 DMA_CORE_IDLE_IND_MASK\n" - "---- ------- ------------ ----------------------\n"); + if (e) + hl_engine_data_sprintf(e, + "\nPDMA is_idle QM_GLBL_STS0 DMA_CORE_IDLE_IND_MASK\n" + "---- ------- ------------ ----------------------\n"); for (i = 0 ; i < NUM_OF_PDMA ; i++) { engine_idx = GAUDI2_ENGINE_ID_PDMA_0 + i; @@ -6269,16 +6374,16 @@ static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, if (mask && !is_eng_idle) set_bit(engine_idx, mask); - if (s) - seq_printf(s, pdma_fmt, i, is_eng_idle ? "Y" : "N", qm_glbl_sts0, - dma_core_idle_ind_mask); + if (e) + hl_engine_data_sprintf(e, pdma_fmt, i, is_eng_idle ? "Y" : "N", + qm_glbl_sts0, dma_core_idle_ind_mask); } /* NIC, twelve macros in Full chip */ - if (s && hdev->nic_ports_mask) - seq_puts(s, - "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n" - "--- ------- ------------ ----------\n"); + if (e && hdev->nic_ports_mask) + hl_engine_data_sprintf(e, + "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n" + "--- ------- ------------ ----------\n"); for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) { if (!(i & 1)) @@ -6302,15 +6407,15 @@ static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, if (mask && !is_eng_idle) set_bit(engine_idx, mask); - if (s) - seq_printf(s, nic_fmt, i, is_eng_idle ? "Y" : "N", qm_glbl_sts0, - qm_cgm_sts); + if (e) + hl_engine_data_sprintf(e, nic_fmt, i, is_eng_idle ? "Y" : "N", + qm_glbl_sts0, qm_cgm_sts); } - if (s) - seq_puts(s, - "\nMME Stub is_idle QM_GLBL_STS0 MME_ARCH_STATUS\n" - "--- ---- ------- ------------ ---------------\n"); + if (e) + hl_engine_data_sprintf(e, + "\nMME Stub is_idle QM_GLBL_STS0 MME_ARCH_STATUS\n" + "--- ---- ------- ------------ ---------------\n"); /* MME, one per Dcore */ for (i = 0 ; i < NUM_OF_DCORES ; i++) { engine_idx = GAUDI2_DCORE0_ENGINE_ID_MME + i * GAUDI2_ENGINE_ID_DCORE_OFFSET; @@ -6327,8 +6432,8 @@ static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, is_eng_idle &= IS_MME_IDLE(mme_arch_sts); is_idle &= is_eng_idle; - if (s) - seq_printf(s, mme_fmt, i, "N", + if (e) + hl_engine_data_sprintf(e, mme_fmt, i, "N", is_eng_idle ? "Y" : "N", qm_glbl_sts0, mme_arch_sts); @@ -6340,16 +6445,16 @@ static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, /* * TPC */ - if (s && prop->tpc_enabled_mask) - seq_puts(s, + if (e && prop->tpc_enabled_mask) + hl_engine_data_sprintf(e, "\nCORE TPC is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_IDLE_IND_MASK\n" "---- --- -------- ------------ ---------- ----------------------\n"); gaudi2_iterate_tpcs(hdev, &tpc_iter); /* Decoders, two each Dcore and two shared PCIe decoders */ - if (s && (prop->decoder_enabled_mask & (~PCIE_DEC_EN_MASK))) - seq_puts(s, + if (e && (prop->decoder_enabled_mask & (~PCIE_DEC_EN_MASK))) + hl_engine_data_sprintf(e, "\nCORE DEC is_idle VSI_CMD_SWREG15\n" "---- --- ------- ---------------\n"); @@ -6370,13 +6475,14 @@ static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, if (mask && !is_eng_idle) set_bit(engine_idx, mask); - if (s) - seq_printf(s, dec_fmt, i, j, is_eng_idle ? "Y" : "N", dec_swreg15); + if (e) + hl_engine_data_sprintf(e, dec_fmt, i, j, + is_eng_idle ? "Y" : "N", dec_swreg15); } } - if (s && (prop->decoder_enabled_mask & PCIE_DEC_EN_MASK)) - seq_puts(s, + if (e && (prop->decoder_enabled_mask & PCIE_DEC_EN_MASK)) + hl_engine_data_sprintf(e, "\nPCIe DEC is_idle VSI_CMD_SWREG15\n" "-------- ------- ---------------\n"); @@ -6395,12 +6501,13 @@ static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, if (mask && !is_eng_idle) set_bit(engine_idx, mask); - if (s) - seq_printf(s, pcie_dec_fmt, i, is_eng_idle ? "Y" : "N", dec_swreg15); + if (e) + hl_engine_data_sprintf(e, pcie_dec_fmt, i, + is_eng_idle ? "Y" : "N", dec_swreg15); } - if (s) - seq_puts(s, + if (e) + hl_engine_data_sprintf(e, "\nCORE ROT is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n" "---- ---- ------- ------------ ---------- -------------\n"); @@ -6419,8 +6526,8 @@ static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, if (mask && !is_eng_idle) set_bit(engine_idx, mask); - if (s) - seq_printf(s, rot_fmt, i, 0, is_eng_idle ? "Y" : "N", + if (e) + hl_engine_data_sprintf(e, rot_fmt, i, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0, qm_cgm_sts, "-"); } @@ -6443,22 +6550,6 @@ static void gaudi2_hw_queues_unlock(struct hl_device *hdev) spin_unlock(&gaudi2->hw_queues_lock); } -static void gaudi2_kdma_lock(struct hl_device *hdev, int dcore_id) - __acquires(&gaudi2->kdma_lock) -{ - struct gaudi2_device *gaudi2 = hdev->asic_specific; - - spin_lock(&gaudi2->kdma_lock); -} - -static void gaudi2_kdma_unlock(struct hl_device *hdev, int dcore_id) - __releases(&gaudi2->kdma_lock) -{ - struct gaudi2_device *gaudi2 = hdev->asic_specific; - - spin_unlock(&gaudi2->kdma_lock); -} - static u32 gaudi2_get_pci_id(struct hl_device *hdev) { return hdev->pdev->device; @@ -6725,9 +6816,9 @@ static int gaudi2_mmu_shared_prepare(struct hl_device *hdev, u32 asid) } static void gaudi2_tpc_mmu_prepare(struct hl_device *hdev, int dcore, int inst, u32 offset, - void *data) + struct iterate_module_ctx *ctx) { - struct gaudi2_tpc_mmu_data *mmu_data = (struct gaudi2_tpc_mmu_data *)data; + struct gaudi2_tpc_mmu_data *mmu_data = ctx->data; WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_MMU_BP + offset, 0); WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_ASID + offset, mmu_data->rw_asid); @@ -7020,10 +7111,6 @@ static void gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device *hdev, razwi_lo = le32_to_cpu(razwi_info->hbw.rr_aw_razwi_lo_reg); razwi_xy = le32_to_cpu(razwi_info->hbw.rr_aw_razwi_id_reg); } - - dev_err_ratelimited(hdev->dev, - "%s-RAZWI SHARED RR HBW WR error, captured address HI 0x%x LO 0x%x, Initiator coordinates 0x%x\n", - name, razwi_hi, razwi_lo, razwi_xy); } else { if (read_razwi_regs) { razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HI); @@ -7034,11 +7121,11 @@ static void gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device *hdev, razwi_lo = le32_to_cpu(razwi_info->hbw.rr_ar_razwi_lo_reg); razwi_xy = le32_to_cpu(razwi_info->hbw.rr_ar_razwi_id_reg); } - - dev_err_ratelimited(hdev->dev, - "%s-RAZWI SHARED RR HBW AR error, captured address HI 0x%x LO 0x%x, Initiator coordinates 0x%x\n", - name, razwi_hi, razwi_lo, razwi_xy); } + + dev_err_ratelimited(hdev->dev, + "%s-RAZWI SHARED RR HBW %s error, address %#llx, Initiator coordinates 0x%x\n", + name, is_write ? "WR" : "RD", (u64)razwi_hi << 32 | razwi_lo, razwi_xy); } static void gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device *hdev, @@ -7296,7 +7383,79 @@ static void gaudi2_check_if_razwi_happened(struct hl_device *hdev) gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, mod_idx, 0, NULL); } -static void gaudi2_razwi_unmapped_addr_hbw_printf_info(struct hl_device *hdev, +static const char *gaudi2_get_initiators_name(u32 rtr_id) +{ + switch (rtr_id) { + case DCORE0_RTR0: + return "DEC0/1/8/9, TPC24, PDMA0/1, PMMU, PCIE_IF, EDMA0/2, HMMU0/2/4/6, CPU"; + case DCORE0_RTR1: + return "TPC0/1"; + case DCORE0_RTR2: + return "TPC2/3"; + case DCORE0_RTR3: + return "TPC4/5"; + case DCORE0_RTR4: + return "MME0_SBTE0/1"; + case DCORE0_RTR5: + return "MME0_WAP0/SBTE2"; + case DCORE0_RTR6: + return "MME0_CTRL_WR/SBTE3"; + case DCORE0_RTR7: + return "MME0_WAP1/CTRL_RD/SBTE4"; + case DCORE1_RTR0: + return "MME1_WAP1/CTRL_RD/SBTE4"; + case DCORE1_RTR1: + return "MME1_CTRL_WR/SBTE3"; + case DCORE1_RTR2: + return "MME1_WAP0/SBTE2"; + case DCORE1_RTR3: + return "MME1_SBTE0/1"; + case DCORE1_RTR4: + return "TPC10/11"; + case DCORE1_RTR5: + return "TPC8/9"; + case DCORE1_RTR6: + return "TPC6/7"; + case DCORE1_RTR7: + return "DEC2/3, NIC0/1/2/3/4, ARC_FARM, KDMA, EDMA1/3, HMMU1/3/5/7"; + case DCORE2_RTR0: + return "DEC4/5, NIC5/6/7/8, EDMA4/6, HMMU8/10/12/14, ROT0"; + case DCORE2_RTR1: + return "TPC16/17"; + case DCORE2_RTR2: + return "TPC14/15"; + case DCORE2_RTR3: + return "TPC12/13"; + case DCORE2_RTR4: + return "MME2_SBTE0/1"; + case DCORE2_RTR5: + return "MME2_WAP0/SBTE2"; + case DCORE2_RTR6: + return "MME2_CTRL_WR/SBTE3"; + case DCORE2_RTR7: + return "MME2_WAP1/CTRL_RD/SBTE4"; + case DCORE3_RTR0: + return "MME3_WAP1/CTRL_RD/SBTE4"; + case DCORE3_RTR1: + return "MME3_CTRL_WR/SBTE3"; + case DCORE3_RTR2: + return "MME3_WAP0/SBTE2"; + case DCORE3_RTR3: + return "MME3_SBTE0/1"; + case DCORE3_RTR4: + return "TPC18/19"; + case DCORE3_RTR5: + return "TPC20/21"; + case DCORE3_RTR6: + return "TPC22/23"; + case DCORE3_RTR7: + return "DEC6/7, NIC9/10/11, EDMA5/7, HMMU9/11/13/15, ROT1, PSOC"; + default: + return "N/A"; + } +} + +static void gaudi2_razwi_unmapped_addr_hbw_printf_info(struct hl_device *hdev, u32 rtr_id, u64 rtr_ctrl_base_addr, bool is_write) { u32 razwi_hi, razwi_lo; @@ -7305,50 +7464,47 @@ static void gaudi2_razwi_unmapped_addr_hbw_printf_info(struct hl_device *hdev, razwi_hi = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_ADDR_HI); razwi_lo = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_ADDR_LO); - dev_err_ratelimited(hdev->dev, - "RAZWI PSOC unmapped HBW WR error, ctr_base 0x%llx, captured address HI 0x%x, LO 0x%x\n", - rtr_ctrl_base_addr, razwi_hi, razwi_lo); - /* Clear set indication */ WREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_SET, 0x1); } else { razwi_hi = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_ADDR_HI); - razwi_lo = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_ADDR_LO); - dev_err_ratelimited(hdev->dev, - "RAZWI PSOC unmapped HBW AR error, ctr_base 0x%llx, captured address HI 0x%x, LO 0x%x\n", - rtr_ctrl_base_addr, razwi_hi, razwi_lo); - /* Clear set indication */ WREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_SET, 0x1); } + + dev_err_ratelimited(hdev->dev, + "RAZWI PSOC unmapped HBW %s error, rtr id %u, address %#llx\n", + is_write ? "WR" : "RD", rtr_id, (u64)razwi_hi << 32 | razwi_lo); + + dev_err_ratelimited(hdev->dev, + "Initiators: %s\n", gaudi2_get_initiators_name(rtr_id)); } -static void gaudi2_razwi_unmapped_addr_lbw_printf_info(struct hl_device *hdev, - u64 rtr_ctrl_base_addr, bool is_write) +static void gaudi2_razwi_unmapped_addr_lbw_printf_info(struct hl_device *hdev, u32 rtr_id, + u64 rtr_ctrl_base_addr, bool is_write) { u32 razwi_addr; if (is_write) { razwi_addr = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_ADDR); - dev_err_ratelimited(hdev->dev, - "RAZWI PSOC unmapped LBW WR error, ctr_base 0x%llx, captured address 0x%x\n", - rtr_ctrl_base_addr, razwi_addr); - /* Clear set indication */ WREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_SET, 0x1); } else { razwi_addr = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_ADDR); - dev_err_ratelimited(hdev->dev, - "RAZWI PSOC unmapped LBW AR error, ctr_base 0x%llx, captured address 0x%x\n", - rtr_ctrl_base_addr, razwi_addr); - /* Clear set indication */ WREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_SET, 0x1); } + + dev_err_ratelimited(hdev->dev, + "RAZWI PSOC unmapped LBW %s error, rtr id %u, address %#x\n", + is_write ? "WR" : "RD", rtr_id, razwi_addr); + + dev_err_ratelimited(hdev->dev, + "Initiators: %s\n", gaudi2_get_initiators_name(rtr_id)); } /* PSOC RAZWI interrupt occurs only when trying to access a bad address */ @@ -7366,21 +7522,16 @@ static void gaudi2_ack_psoc_razwi_event_handler(struct hl_device *hdev) } razwi_mask_info = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_MASK_INFO); - - xy = (razwi_mask_info & PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_L_MASK) - >> PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_L_SHIFT; + xy = FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_L_MASK, razwi_mask_info); dev_err_ratelimited(hdev->dev, - "PSOC RAZWI interrupt: Mask %d, WAS_AR %d, WAS_AW %d, AXUSER_L 0x%x AXUSER_H 0x%x\n", - (razwi_mask_info & PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_MASK_MASK) - >> PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_MASK_SHIFT, - (razwi_mask_info & PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AR_MASK) - >> PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AR_SHIFT, - (razwi_mask_info & PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AW_MASK) - >> PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AW_SHIFT, xy, - (razwi_mask_info & - PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_H_MASK) - >> PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_H_SHIFT); + "PSOC RAZWI interrupt: Mask %d, AR %d, AW %d, AXUSER_L 0x%x AXUSER_H 0x%x\n", + FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_MASK_MASK, razwi_mask_info), + FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AR_MASK, razwi_mask_info), + FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AW_MASK, razwi_mask_info), + xy, + FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_H_MASK, razwi_mask_info)); + if (xy == 0) { dev_err_ratelimited(hdev->dev, "PSOC RAZWI interrupt: received event from 0 rtr coordinates\n"); @@ -7410,16 +7561,20 @@ static void gaudi2_ack_psoc_razwi_event_handler(struct hl_device *hdev) lbw_ar_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_SET); if (hbw_aw_set) - gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_ctrl_base_addr, true); + gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_id, + rtr_ctrl_base_addr, true); if (hbw_ar_set) - gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_ctrl_base_addr, false); + gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_id, + rtr_ctrl_base_addr, false); if (lbw_aw_set) - gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_ctrl_base_addr, true); + gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_id, + rtr_ctrl_base_addr, true); if (lbw_ar_set) - gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_ctrl_base_addr, false); + gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_id, + rtr_ctrl_base_addr, false); clear: /* Clear Interrupts only on pldm or if f/w doesn't handle interrupts */ @@ -7811,14 +7966,58 @@ static void gaudi2_handle_dma_core_event(struct hl_device *hdev, u64 intr_cause_ gaudi2_dma_core_interrupts_cause[i]); } +static void gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device *hdev) +{ + u32 mstr_if_base_addr = mmPCIE_MSTR_RR_MSTR_IF_RR_SHRD_HBW_BASE, razwi_happened_addr; + + razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED; + if (RREG32(razwi_happened_addr)) { + gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE", true, + NULL); + WREG32(razwi_happened_addr, 0x1); + } + + razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED; + if (RREG32(razwi_happened_addr)) { + gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE", true, + NULL); + WREG32(razwi_happened_addr, 0x1); + } + + razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED; + if (RREG32(razwi_happened_addr)) { + gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE", true, + NULL); + WREG32(razwi_happened_addr, 0x1); + } + + razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED; + if (RREG32(razwi_happened_addr)) { + gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE", true, + NULL); + WREG32(razwi_happened_addr, 0x1); + } +} + static void gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u64 intr_cause_data) { int i; - for (i = 0 ; i < GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE; i++) - if (intr_cause_data & BIT_ULL(i)) - dev_err_ratelimited(hdev->dev, "PCIE ADDR DEC Error: %s\n", - gaudi2_pcie_addr_dec_error_cause[i]); + for (i = 0 ; i < GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE ; i++) { + if (!(intr_cause_data & BIT_ULL(i))) + continue; + + dev_err_ratelimited(hdev->dev, "PCIE ADDR DEC Error: %s\n", + gaudi2_pcie_addr_dec_error_cause[i]); + + switch (intr_cause_data & BIT_ULL(i)) { + case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK: + break; + case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK: + gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(hdev); + break; + } + } } static void gaudi2_handle_pif_fatal(struct hl_device *hdev, u64 intr_cause_data) @@ -8158,10 +8357,17 @@ static bool gaudi2_handle_hbm_mc_sei_err(struct hl_device *hdev, u16 event_type, return true; } - dev_err_ratelimited(hdev->dev, - "System Error Interrupt - HBM(%u) MC(%u) MC_CH(%u) MC_PC(%u). Critical(%u). Error cause: %s\n", - hbm_id, mc_id, sei_data->hdr.mc_channel, sei_data->hdr.mc_pseudo_channel, - sei_data->hdr.is_critical, hbm_mc_sei_cause[cause_idx]); + if (sei_data->hdr.is_critical) + dev_err(hdev->dev, + "System Critical Error Interrupt - HBM(%u) MC(%u) MC_CH(%u) MC_PC(%u). Error cause: %s\n", + hbm_id, mc_id, sei_data->hdr.mc_channel, sei_data->hdr.mc_pseudo_channel, + hbm_mc_sei_cause[cause_idx]); + + else + dev_err_ratelimited(hdev->dev, + "System Non-Critical Error Interrupt - HBM(%u) MC(%u) MC_CH(%u) MC_PC(%u). Error cause: %s\n", + hbm_id, mc_id, sei_data->hdr.mc_channel, sei_data->hdr.mc_pseudo_channel, + hbm_mc_sei_cause[cause_idx]); /* Print error-specific info */ switch (cause_idx) { @@ -8371,6 +8577,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent struct gaudi2_device *gaudi2 = hdev->asic_specific; bool reset_required = false, skip_reset = false; int index, sbte_index; + u64 event_mask = 0; u16 event_type; ctl = le32_to_cpu(eq_entry->hdr.ctl); @@ -8392,6 +8599,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent fallthrough; case GAUDI2_EVENT_ROTATOR0_SERR ... GAUDI2_EVENT_ROTATOR1_DERR: reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; reset_required = gaudi2_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); break; @@ -8401,21 +8609,25 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent fallthrough; case GAUDI2_EVENT_NIC0_QM0 ... GAUDI2_EVENT_NIC11_QM1: gaudi2_handle_qman_err(hdev, event_type); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI2_EVENT_ARC_AXI_ERROR_RESPONSE_0: reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; gaudi2_handle_arc_farm_sei_err(hdev); + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI2_EVENT_CPU_AXI_ERR_RSP: gaudi2_handle_cpu_sei_err(hdev); + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP: case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP: reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; gaudi2_handle_qm_sei_err(hdev, event_type, &eq_entry->razwi_info); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE: @@ -8423,6 +8635,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE; gaudi2_handle_rot_err(hdev, index, &eq_entry->razwi_with_intr_cause); gaudi2_handle_qm_sei_err(hdev, event_type, NULL); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC24_AXI_ERR_RSP: @@ -8430,11 +8643,13 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent gaudi2_tpc_ack_interrupts(hdev, index, "AXI_ERR_RSP", &eq_entry->razwi_with_intr_cause); gaudi2_handle_qm_sei_err(hdev, event_type, NULL); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE ... GAUDI2_EVENT_DEC9_AXI_ERR_RSPONSE: index = event_type - GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE; gaudi2_handle_dec_err(hdev, index, "AXI_ERR_RESPONSE", &eq_entry->razwi_info); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI2_EVENT_TPC0_KERNEL_ERR: @@ -8465,6 +8680,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent index = (event_type - GAUDI2_EVENT_TPC0_KERNEL_ERR) / (GAUDI2_EVENT_TPC1_KERNEL_ERR - GAUDI2_EVENT_TPC0_KERNEL_ERR); gaudi2_tpc_ack_interrupts(hdev, index, "KRN_ERR", &eq_entry->razwi_with_intr_cause); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI2_EVENT_DEC0_SPI: @@ -8480,6 +8696,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent index = (event_type - GAUDI2_EVENT_DEC0_SPI) / (GAUDI2_EVENT_DEC1_SPI - GAUDI2_EVENT_DEC0_SPI); gaudi2_handle_dec_err(hdev, index, "SPI", &eq_entry->razwi_info); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE: @@ -8492,6 +8709,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent gaudi2_handle_mme_err(hdev, index, "CTRL_AXI_ERROR_RESPONSE", &eq_entry->razwi_info); gaudi2_handle_qm_sei_err(hdev, event_type, NULL); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI2_EVENT_MME0_QMAN_SW_ERROR: @@ -8502,6 +8720,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent (GAUDI2_EVENT_MME1_QMAN_SW_ERROR - GAUDI2_EVENT_MME0_QMAN_SW_ERROR); gaudi2_handle_mme_err(hdev, index, "QMAN_SW_ERROR", &eq_entry->razwi_info); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID: @@ -8512,22 +8731,27 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent (GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID - GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID); gaudi2_handle_mme_wap_err(hdev, index, &eq_entry->razwi_info); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI2_EVENT_KDMA_CH0_AXI_ERR_RSP: case GAUDI2_EVENT_KDMA0_CORE: gaudi2_handle_kdma_core_event(hdev, le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI2_EVENT_HDMA2_CORE ... GAUDI2_EVENT_PDMA1_CORE: gaudi2_handle_dma_core_event(hdev, le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI2_EVENT_PCIE_ADDR_DEC_ERR: gaudi2_print_pcie_addr_dec_info(hdev, le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); + reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR: @@ -8536,25 +8760,30 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0: gaudi2_handle_mmu_spi_sei_err(hdev, event_type); reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI2_EVENT_HIF0_FATAL ... GAUDI2_EVENT_HIF12_FATAL: gaudi2_handle_hif_fatal(hdev, event_type, le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI2_EVENT_PMMU_FATAL_0: gaudi2_handle_pif_fatal(hdev, le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI2_EVENT_PSOC63_RAZWI_OR_PID_MIN_MAX_INTERRUPT: gaudi2_ack_psoc_razwi_event_handler(hdev); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE ... GAUDI2_EVENT_HBM5_MC1_SEI_NON_SEVERE: + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; if (gaudi2_handle_hbm_mc_sei_err(hdev, event_type, &eq_entry->sei_data)) { reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; reset_required = true; @@ -8563,25 +8792,31 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent case GAUDI2_EVENT_HBM_CATTRIP_0 ... GAUDI2_EVENT_HBM_CATTRIP_5: gaudi2_handle_hbm_cattrip(hdev, le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI2_EVENT_HBM0_MC0_SPI ... GAUDI2_EVENT_HBM5_MC1_SPI: gaudi2_handle_hbm_mc_spi(hdev, le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI2_EVENT_PCIE_DRAIN_COMPLETE: gaudi2_handle_pcie_drain(hdev, &eq_entry->pcie_drain_ind_data); + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI2_EVENT_PSOC59_RPM_ERROR_OR_DRAIN: gaudi2_handle_psoc_drain(hdev, le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI2_EVENT_CPU_AXI_ECC: reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI2_EVENT_CPU_L2_RAM_ECC: reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME0_SBTE4_AXI_ERR_RSP: case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_SBTE4_AXI_ERR_RSP: @@ -8595,17 +8830,24 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP); gaudi2_handle_mme_sbte_err(hdev, index, sbte_index, le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI2_EVENT_VM0_ALARM_A ... GAUDI2_EVENT_VM3_ALARM_B: reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI2_EVENT_PSOC_AXI_ERR_RSP: + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; + break; case GAUDI2_EVENT_PSOC_PRSTN_FALL: + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI2_EVENT_PCIE_APB_TIMEOUT: reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI2_EVENT_PCIE_FATAL_ERR: + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI2_EVENT_TPC0_BMON_SPMU: case GAUDI2_EVENT_TPC1_BMON_SPMU: @@ -8657,6 +8899,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent case GAUDI2_EVENT_DEC8_BMON_SPMU: case GAUDI2_EVENT_DEC9_BMON_SPMU: case GAUDI2_EVENT_ROTATOR0_BMON_SPMU ... GAUDI2_EVENT_SM3_BMON_SPMU: + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S: @@ -8664,43 +8907,53 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S: case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E: gaudi2_print_clk_change_info(hdev, event_type); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI2_EVENT_CPU_PKT_QUEUE_OUT_SYNC: gaudi2_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err); + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI2_EVENT_PCIE_FLR_REQUESTED: + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; /* Do nothing- FW will handle it */ break; case GAUDI2_EVENT_PCIE_P2P_MSIX: gaudi2_handle_pcie_p2p_msix(hdev); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE ... GAUDI2_EVENT_SM3_AXI_ERROR_RESPONSE: index = event_type - GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE; skip_reset = !gaudi2_handle_sm_err(hdev, index); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI2_EVENT_PSOC_MME_PLL_LOCK_ERR ... GAUDI2_EVENT_DCORE2_HBM_PLL_LOCK_ERR: + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE: dev_info(hdev->dev, "CPLD shutdown cause, reset reason: 0x%llx\n", le64_to_cpu(eq_entry->data[0])); + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_EVENT: dev_err(hdev->dev, "CPLD shutdown event, reset reason: 0x%llx\n", le64_to_cpu(eq_entry->data[0])); + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI2_EVENT_CPU_PKT_SANITY_FAILED: gaudi2_print_cpu_pkt_failure_info(hdev, &eq_entry->pkt_sync_err); + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI2_EVENT_ARC_DCCM_FULL: hl_arc_event_handle(hdev, &eq_entry->arc_data); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; default: @@ -8716,15 +8969,22 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent if (!gaudi2_irq_map_table[event_type].msg) hl_fw_unmask_irq(hdev, event_type); + if (event_mask) + hl_notifier_event_send_all(hdev, event_mask); + return; reset_device: if (hdev->hard_reset_on_fw_events) { hl_device_reset(hdev, reset_flags); + event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; } else { if (!gaudi2_irq_map_table[event_type].msg) hl_fw_unmask_irq(hdev, event_type); } + + if (event_mask) + hl_notifier_event_send_all(hdev, event_mask); } static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val) @@ -9090,19 +9350,17 @@ static int gaudi2_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, v } /* Create mapping on asic side */ - mutex_lock(&ctx->mmu_lock); + mutex_lock(&hdev->mmu_lock); rc = hl_mmu_map_contiguous(ctx, reserved_va_base, host_mem_dma_addr, SZ_2M); hl_mmu_invalidate_cache_range(hdev, false, MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV, ctx->asid, reserved_va_base, SZ_2M); - mutex_unlock(&ctx->mmu_lock); + mutex_unlock(&hdev->mmu_lock); if (rc) { dev_err(hdev->dev, "Failed to create mapping on asic mmu\n"); goto unreserve_va; } - hdev->asic_funcs->kdma_lock(hdev, 0); - /* Enable MMU on KDMA */ gaudi2_kdma_set_mmbp_asid(hdev, false, ctx->asid); @@ -9130,13 +9388,11 @@ static int gaudi2_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, v gaudi2_kdma_set_mmbp_asid(hdev, true, HL_KERNEL_ASID_ID); - hdev->asic_funcs->kdma_unlock(hdev, 0); - - mutex_lock(&ctx->mmu_lock); + mutex_lock(&hdev->mmu_lock); hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M); hl_mmu_invalidate_cache_range(hdev, false, MMU_OP_USERPTR, ctx->asid, reserved_va_base, SZ_2M); - mutex_unlock(&ctx->mmu_lock); + mutex_unlock(&hdev->mmu_lock); unreserve_va: hl_unreserve_va_block(hdev, ctx, reserved_va_base, SZ_2M); free_data_buffer: @@ -9189,11 +9445,11 @@ static int gaudi2_internal_cb_pool_init(struct hl_device *hdev, struct hl_ctx *c goto destroy_internal_cb_pool; } - mutex_lock(&ctx->mmu_lock); + mutex_lock(&hdev->mmu_lock); rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, hdev->internal_cb_pool_dma_addr, HOST_SPACE_INTERNAL_CB_SZ); hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR); - mutex_unlock(&ctx->mmu_lock); + mutex_unlock(&hdev->mmu_lock); if (rc) goto unreserve_internal_cb_pool; @@ -9218,11 +9474,11 @@ static void gaudi2_internal_cb_pool_fini(struct hl_device *hdev, struct hl_ctx * if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU)) return; - mutex_lock(&ctx->mmu_lock); + mutex_lock(&hdev->mmu_lock); hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ); hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ); hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); - mutex_unlock(&ctx->mmu_lock); + mutex_unlock(&hdev->mmu_lock); gen_pool_destroy(hdev->internal_cb_pool); @@ -9336,7 +9592,7 @@ static u32 gaudi2_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx) static u32 gaudi2_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, u32 size, bool eb) { - struct hl_cb *cb = (struct hl_cb *) data; + struct hl_cb *cb = data; struct packet_msg_short *pkt; u32 value, ctl, pkt_size = sizeof(*pkt); @@ -9429,7 +9685,7 @@ static u32 gaudi2_add_fence_pkt(struct packet_fence *pkt) static u32 gaudi2_gen_wait_cb(struct hl_device *hdev, struct hl_gen_wait_properties *prop) { - struct hl_cb *cb = (struct hl_cb *) prop->data; + struct hl_cb *cb = prop->data; void *buf = (void *) (uintptr_t) (cb->kernel_address); u64 monitor_base, fence_addr = 0; @@ -9481,7 +9737,7 @@ static u32 gaudi2_gen_wait_cb(struct hl_device *hdev, struct hl_gen_wait_propert static void gaudi2_reset_sob(struct hl_device *hdev, void *data) { - struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data; + struct hl_hw_sob *hw_sob = data; dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx, hw_sob->sob_id); @@ -9724,7 +9980,7 @@ static int gaudi2_get_mmu_base(struct hl_device *hdev, u64 mmu_id, u32 *mmu_base static void gaudi2_ack_mmu_error(struct hl_device *hdev, u64 mmu_id) { - bool is_pmmu = (mmu_id == HW_CAP_PMMU ? true : false); + bool is_pmmu = (mmu_id == HW_CAP_PMMU); struct gaudi2_device *gaudi2 = hdev->asic_specific; u32 mmu_base; @@ -9881,6 +10137,17 @@ static int gaudi2_get_monitor_dump(struct hl_device *hdev, void *data) return -EOPNOTSUPP; } +int gaudi2_send_device_activity(struct hl_device *hdev, bool open) +{ + struct gaudi2_device *gaudi2 = hdev->asic_specific; + + if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q) || hdev->fw_major_version < 37) + return 0; + + /* TODO: add check for FW version using minor ver once it's known */ + return hl_fw_send_device_activity(hdev, open); +} + static const struct hl_asic_funcs gaudi2_funcs = { .early_init = gaudi2_early_init, .early_fini = gaudi2_early_fini, @@ -9927,11 +10194,9 @@ static const struct hl_asic_funcs gaudi2_funcs = { .send_heartbeat = gaudi2_send_heartbeat, .debug_coresight = gaudi2_debug_coresight, .is_device_idle = gaudi2_is_device_idle, - .non_hard_reset_late_init = gaudi2_non_hard_reset_late_init, + .compute_reset_late_init = gaudi2_compute_reset_late_init, .hw_queues_lock = gaudi2_hw_queues_lock, .hw_queues_unlock = gaudi2_hw_queues_unlock, - .kdma_lock = gaudi2_kdma_lock, - .kdma_unlock = gaudi2_kdma_unlock, .get_pci_id = gaudi2_get_pci_id, .get_eeprom_data = gaudi2_get_eeprom_data, .get_monitor_dump = gaudi2_get_monitor_dump, @@ -9978,6 +10243,8 @@ static const struct hl_asic_funcs gaudi2_funcs = { .mmu_get_real_page_size = gaudi2_mmu_get_real_page_size, .access_dev_mem = hl_access_dev_mem, .set_dram_bar_base = gaudi2_set_hbm_bar_base, + .set_engine_cores = gaudi2_set_engine_cores, + .send_device_activity = gaudi2_send_device_activity, }; void gaudi2_set_asic_funcs(struct hl_device *hdev) diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2P.h b/drivers/misc/habanalabs/gaudi2/gaudi2P.h index e4bc4009f05b..a99c348bbf39 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2P.h +++ b/drivers/misc/habanalabs/gaudi2/gaudi2P.h @@ -15,7 +15,6 @@ #include "../include/gaudi2/gaudi2_packets.h" #include "../include/gaudi2/gaudi2_fw_if.h" #include "../include/gaudi2/gaudi2_async_events.h" -#include "../include/gaudi2/gaudi2_async_virt_events.h" #define GAUDI2_LINUX_FW_FILE "habanalabs/gaudi2/gaudi2-fit.itb" #define GAUDI2_BOOT_FIT_FILE "habanalabs/gaudi2/gaudi2-boot-fit.itb" @@ -140,9 +139,6 @@ #define VA_HOST_SPACE_HPAGE_START 0xFFF0800000000000ull #define VA_HOST_SPACE_HPAGE_END 0xFFF1000000000000ull /* 140TB */ -#define VA_HOST_SPACE_USER_MAPPED_CB_START 0xFFF1000000000000ull -#define VA_HOST_SPACE_USER_MAPPED_CB_END 0xFFF1000100000000ull /* 4GB */ - /* 140TB */ #define VA_HOST_SPACE_PAGE_SIZE (VA_HOST_SPACE_PAGE_END - VA_HOST_SPACE_PAGE_START) @@ -458,7 +454,6 @@ struct dup_block_ctx { * the user can map. * @lfsr_rand_seeds: array of MME ACC random seeds to set. * @hw_queues_lock: protects the H/W queues from concurrent access. - * @kdma_lock: protects the KDMA engine from concurrent access. * @scratchpad_kernel_address: general purpose PAGE_SIZE contiguous memory, * this memory region should be write-only. * currently used for HBW QMAN writes which is @@ -510,9 +505,6 @@ struct dup_block_ctx { * @flush_db_fifo: flag to force flush DB FIFO after a write. * @hbm_cfg: HBM subsystem settings * @hw_queues_lock_mutex: used by simulator instead of hw_queues_lock. - * @kdma_lock_mutex: used by simulator instead of kdma_lock. - * @use_deprecated_event_mappings: use old event mappings which are about to be - * deprecated */ struct gaudi2_device { int (*cpucp_info_get)(struct hl_device *hdev); @@ -521,7 +513,6 @@ struct gaudi2_device { int lfsr_rand_seeds[MME_NUM_OF_LFSR_SEEDS]; spinlock_t hw_queues_lock; - spinlock_t kdma_lock; void *scratchpad_kernel_address; dma_addr_t scratchpad_bus_address; @@ -562,5 +553,6 @@ void gaudi2_pb_print_security_errors(struct hl_device *hdev, u32 block_addr, u32 u32 offended_addr); int gaudi2_init_security(struct hl_device *hdev); void gaudi2_ack_protection_bits_errors(struct hl_device *hdev); +int gaudi2_send_device_activity(struct hl_device *hdev, bool open); #endif /* GAUDI2P_H_ */ diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2_masks.h b/drivers/misc/habanalabs/gaudi2/gaudi2_masks.h index eed16d642a5a..e9ac87828221 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2_masks.h +++ b/drivers/misc/habanalabs/gaudi2/gaudi2_masks.h @@ -51,12 +51,18 @@ (0x1F << PDMA0_QM_GLBL_CFG0_CP_EN_SHIFT) | \ (0x1 << PDMA0_QM_GLBL_CFG0_ARC_CQF_EN_SHIFT)) -#define PDMA1_QMAN_ENABLE \ +#define PDMA0_QMAN_ENABLE \ ((0x3 << PDMA0_QM_GLBL_CFG0_PQF_EN_SHIFT) | \ (0x1F << PDMA0_QM_GLBL_CFG0_CQF_EN_SHIFT) | \ (0x1F << PDMA0_QM_GLBL_CFG0_CP_EN_SHIFT) | \ (0x1 << PDMA0_QM_GLBL_CFG0_ARC_CQF_EN_SHIFT)) +#define PDMA1_QMAN_ENABLE \ + ((0x1 << PDMA0_QM_GLBL_CFG0_PQF_EN_SHIFT) | \ + (0x1F << PDMA0_QM_GLBL_CFG0_CQF_EN_SHIFT) | \ + (0x1F << PDMA0_QM_GLBL_CFG0_CP_EN_SHIFT) | \ + (0x1 << PDMA0_QM_GLBL_CFG0_ARC_CQF_EN_SHIFT)) + /* QM_IDLE_MASK is valid for all engines QM idle check */ #define QM_IDLE_MASK (DCORE0_EDMA0_QM_GLBL_STS0_PQF_IDLE_MASK | \ DCORE0_EDMA0_QM_GLBL_STS0_CQF_IDLE_MASK | \ @@ -138,4 +144,17 @@ #define DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_SHIFT 15 #define DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_MASK 0x8000 +#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_ERR_INTR_SHIFT 0 +#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_ERR_INTR_MASK 0x1 +#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_SHIFT 1 +#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK 0x2 +#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_SHIFT 2 +#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK 0x4 +#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_ERR_INTR_MASK_SHIFT 3 +#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_ERR_INTR_MASK_MASK 0x8 +#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK_SHIFT 4 +#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK_MASK 0x10 +#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK_SHIFT 5 +#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK_MASK 0x20 + #endif /* GAUDI2_MASKS_H_ */ diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2_security.c b/drivers/misc/habanalabs/gaudi2/gaudi2_security.c index 89a06ff5ba34..c6906fb14229 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2_security.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2_security.c @@ -2559,6 +2559,10 @@ static const u32 gaudi2_pb_pcie[] = { mmPCIE_WRAP_BASE, }; +static const u32 gaudi2_pb_pcie_unsecured_regs[] = { + mmPCIE_WRAP_SPECIAL_GLBL_SPARE_0, +}; + static const u32 gaudi2_pb_thermal_sensor0[] = { mmDCORE0_XFT_BASE, mmDCORE0_TSTDVS_BASE, @@ -2583,9 +2587,9 @@ struct gaudi2_tpc_pb_data { }; static void gaudi2_config_tpcs_glbl_sec(struct hl_device *hdev, int dcore, int inst, u32 offset, - void *data) + struct iterate_module_ctx *ctx) { - struct gaudi2_tpc_pb_data *pb_data = (struct gaudi2_tpc_pb_data *)data; + struct gaudi2_tpc_pb_data *pb_data = ctx->data; hl_config_glbl_sec(hdev, gaudi2_pb_dcr0_tpc0, pb_data->glbl_sec, offset, pb_data->block_array_size); @@ -2660,15 +2664,14 @@ static int gaudi2_init_pb_tpc(struct hl_device *hdev) struct gaudi2_tpc_arc_pb_data { u32 unsecured_regs_arr_size; u32 arc_regs_arr_size; - int rc; }; static void gaudi2_config_tpcs_pb_ranges(struct hl_device *hdev, int dcore, int inst, u32 offset, - void *data) + struct iterate_module_ctx *ctx) { - struct gaudi2_tpc_arc_pb_data *pb_data = (struct gaudi2_tpc_arc_pb_data *)data; + struct gaudi2_tpc_arc_pb_data *pb_data = ctx->data; - pb_data->rc |= hl_init_pb_ranges(hdev, HL_PB_SHARED, HL_PB_NA, 1, + ctx->rc = hl_init_pb_ranges(hdev, HL_PB_SHARED, HL_PB_NA, 1, offset, gaudi2_pb_dcr0_tpc0_arc, pb_data->arc_regs_arr_size, gaudi2_pb_dcr0_tpc0_arc_unsecured_regs, @@ -2683,12 +2686,12 @@ static int gaudi2_init_pb_tpc_arc(struct hl_device *hdev) tpc_arc_pb_data.arc_regs_arr_size = ARRAY_SIZE(gaudi2_pb_dcr0_tpc0_arc); tpc_arc_pb_data.unsecured_regs_arr_size = ARRAY_SIZE(gaudi2_pb_dcr0_tpc0_arc_unsecured_regs); - tpc_arc_pb_data.rc = 0; + tpc_iter.fn = &gaudi2_config_tpcs_pb_ranges; tpc_iter.data = &tpc_arc_pb_data; gaudi2_iterate_tpcs(hdev, &tpc_iter); - return tpc_arc_pb_data.rc; + return tpc_iter.rc; } static int gaudi2_init_pb_sm_objs(struct hl_device *hdev) @@ -3419,7 +3422,8 @@ static int gaudi2_init_protection_bits(struct hl_device *hdev) rc |= hl_init_pb(hdev, HL_PB_SHARED, HL_PB_NA, HL_PB_SINGLE_INSTANCE, HL_PB_NA, gaudi2_pb_pcie, ARRAY_SIZE(gaudi2_pb_pcie), - NULL, HL_PB_NA); + gaudi2_pb_pcie_unsecured_regs, + ARRAY_SIZE(gaudi2_pb_pcie_unsecured_regs)); /* Thermal Sensor. * Skip when security is enabled in F/W, because the blocks are protected by privileged RR. @@ -3547,9 +3551,9 @@ struct gaudi2_ack_pb_tpc_data { }; static void gaudi2_ack_pb_tpc_config(struct hl_device *hdev, int dcore, int inst, u32 offset, - void *data) + struct iterate_module_ctx *ctx) { - struct gaudi2_ack_pb_tpc_data *pb_data = (struct gaudi2_ack_pb_tpc_data *)data; + struct gaudi2_ack_pb_tpc_data *pb_data = ctx->data; hl_ack_pb_single_dcore(hdev, offset, HL_PB_SINGLE_INSTANCE, HL_PB_NA, gaudi2_pb_dcr0_tpc0, pb_data->tpc_regs_array_size); diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index db4487c33582..5ef9e3ca97a6 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -916,26 +916,11 @@ int goya_late_init(struct hl_device *hdev) */ void goya_late_fini(struct hl_device *hdev) { - const struct hwmon_channel_info **channel_info_arr; struct goya_device *goya = hdev->asic_specific; - int i = 0; cancel_delayed_work_sync(&goya->goya_work->work_freq); - if (!hdev->hl_chip_info->info) - return; - - channel_info_arr = hdev->hl_chip_info->info; - - while (channel_info_arr[i]) { - kfree(channel_info_arr[i]->config); - kfree(channel_info_arr[i]); - i++; - } - - kfree(channel_info_arr); - - hdev->hl_chip_info->info = NULL; + hl_hwmon_release_resources(hdev); } static void goya_set_pci_memory_regions(struct hl_device *hdev) @@ -1040,6 +1025,7 @@ static int goya_sw_init(struct hl_device *hdev) hdev->asic_prop.supports_compute_reset = true; hdev->asic_prop.allow_inference_soft_reset = true; hdev->supports_wait_for_multi_cs = false; + hdev->supports_ctx_switch = true; hdev->asic_funcs->set_pci_memory_regions(hdev); @@ -4559,7 +4545,7 @@ static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr, return rc; } -static int goya_non_hard_reset_late_init(struct hl_device *hdev) +static int goya_compute_reset_late_init(struct hl_device *hdev) { /* * Unmask all IRQs since some could have been received @@ -5137,8 +5123,8 @@ int goya_cpucp_info_get(struct hl_device *hdev) return 0; } -static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr, - u8 mask_len, struct seq_file *s) +static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, + struct engines_data *e) { const char *fmt = "%-5d%-9s%#-14x%#-16x%#x\n"; const char *dma_fmt = "%-5d%-9s%#-14x%#x\n"; @@ -5149,9 +5135,9 @@ static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u64 offset; int i; - if (s) - seq_puts(s, "\nDMA is_idle QM_GLBL_STS0 DMA_CORE_STS0\n" - "--- ------- ------------ -------------\n"); + if (e) + hl_engine_data_sprintf(e, "\nDMA is_idle QM_GLBL_STS0 DMA_CORE_STS0\n" + "--- ------- ------------ -------------\n"); offset = mmDMA_QM_1_GLBL_STS0 - mmDMA_QM_0_GLBL_STS0; @@ -5164,13 +5150,13 @@ static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr, if (mask && !is_eng_idle) set_bit(GOYA_ENGINE_ID_DMA_0 + i, mask); - if (s) - seq_printf(s, dma_fmt, i, is_eng_idle ? "Y" : "N", + if (e) + hl_engine_data_sprintf(e, dma_fmt, i, is_eng_idle ? "Y" : "N", qm_glbl_sts0, dma_core_sts0); } - if (s) - seq_puts(s, + if (e) + hl_engine_data_sprintf(e, "\nTPC is_idle QM_GLBL_STS0 CMDQ_GLBL_STS0 CFG_STATUS\n" "--- ------- ------------ -------------- ----------\n"); @@ -5187,13 +5173,13 @@ static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr, if (mask && !is_eng_idle) set_bit(GOYA_ENGINE_ID_TPC_0 + i, mask); - if (s) - seq_printf(s, fmt, i, is_eng_idle ? "Y" : "N", + if (e) + hl_engine_data_sprintf(e, fmt, i, is_eng_idle ? "Y" : "N", qm_glbl_sts0, cmdq_glbl_sts0, tpc_cfg_sts); } - if (s) - seq_puts(s, + if (e) + hl_engine_data_sprintf(e, "\nMME is_idle QM_GLBL_STS0 CMDQ_GLBL_STS0 ARCH_STATUS\n" "--- ------- ------------ -------------- -----------\n"); @@ -5207,10 +5193,10 @@ static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr, if (mask && !is_eng_idle) set_bit(GOYA_ENGINE_ID_MME_0, mask); - if (s) { - seq_printf(s, fmt, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0, + if (e) { + hl_engine_data_sprintf(e, fmt, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0, cmdq_glbl_sts0, mme_arch_sts); - seq_puts(s, "\n"); + hl_engine_data_sprintf(e, "\n"); } return is_idle; @@ -5434,6 +5420,11 @@ static int goya_scrub_device_dram(struct hl_device *hdev, u64 val) return -EOPNOTSUPP; } +static int goya_send_device_activity(struct hl_device *hdev, bool open) +{ + return 0; +} + static const struct hl_asic_funcs goya_funcs = { .early_init = goya_early_init, .early_fini = goya_early_fini, @@ -5478,11 +5469,9 @@ static const struct hl_asic_funcs goya_funcs = { .send_heartbeat = goya_send_heartbeat, .debug_coresight = goya_debug_coresight, .is_device_idle = goya_is_device_idle, - .non_hard_reset_late_init = goya_non_hard_reset_late_init, + .compute_reset_late_init = goya_compute_reset_late_init, .hw_queues_lock = goya_hw_queues_lock, .hw_queues_unlock = goya_hw_queues_unlock, - .kdma_lock = NULL, - .kdma_unlock = NULL, .get_pci_id = goya_get_pci_id, .get_eeprom_data = goya_get_eeprom_data, .get_monitor_dump = goya_get_monitor_dump, @@ -5528,6 +5517,7 @@ static const struct hl_asic_funcs goya_funcs = { .mmu_get_real_page_size = hl_mmu_get_real_page_size, .access_dev_mem = hl_access_dev_mem, .set_dram_bar_base = goya_set_ddr_bar_base, + .send_device_activity = goya_send_device_activity, }; /* diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h index abf40e1c4965..baa5aa43b6f4 100644 --- a/drivers/misc/habanalabs/include/common/cpucp_if.h +++ b/drivers/misc/habanalabs/include/common/cpucp_if.h @@ -629,6 +629,12 @@ enum pq_init_status { * CPUCP_PACKET_ENGINE_CORE_ASID_SET - * Packet to perform engine core ASID configuration * + * CPUCP_PACKET_SEC_ATTEST_GET - + * Get the attestaion data that is collected during various stages of the + * boot sequence. the attestation data is also hashed with some unique + * number (nonce) provided by the host to prevent replay attacks. + * public key and certificate also provided as part of the FW response. + * * CPUCP_PACKET_MONITOR_DUMP_GET - * Get monitors registers dump from the CpuCP kernel. * The CPU will put the registers dump in the a buffer allocated by the driver @@ -636,6 +642,10 @@ enum pq_init_status { * passes the max size it allows the CpuCP to write to the structure, to prevent * data corruption in case of mismatched driver/FW versions. * Relevant only to Gaudi. + * + * CPUCP_PACKET_ACTIVE_STATUS_SET - + * LKD sends FW indication whether device is free or in use, this indication is reported + * also to the BMC. */ enum cpucp_packet_id { @@ -687,10 +697,17 @@ enum cpucp_packet_id { CPUCP_PACKET_RESERVED, /* not used */ CPUCP_PACKET_ENGINE_CORE_ASID_SET, /* internal */ CPUCP_PACKET_RESERVED2, /* not used */ + CPUCP_PACKET_SEC_ATTEST_GET, /* internal */ CPUCP_PACKET_RESERVED3, /* not used */ CPUCP_PACKET_RESERVED4, /* not used */ - CPUCP_PACKET_RESERVED5, /* not used */ CPUCP_PACKET_MONITOR_DUMP_GET, /* debugfs */ + CPUCP_PACKET_RESERVED5, /* not used */ + CPUCP_PACKET_RESERVED6, /* not used */ + CPUCP_PACKET_RESERVED7, /* not used */ + CPUCP_PACKET_RESERVED8, /* not used */ + CPUCP_PACKET_RESERVED9, /* not used */ + CPUCP_PACKET_ACTIVE_STATUS_SET, /* internal */ + CPUCP_PACKET_ID_MAX /* must be last */ }; #define CPUCP_PACKET_FENCE_VAL 0xFE8CE7A5 @@ -783,6 +800,9 @@ struct cpucp_packet { * result cannot be used to hold general purpose data. */ __le32 status_mask; + + /* random, used once number, for security packets */ + __le32 nonce; }; /* For NIC requests */ @@ -813,10 +833,25 @@ enum cpucp_led_index { CPUCP_LED2_INDEX }; +/* + * enum cpucp_packet_rc - Error return code + * @cpucp_packet_success -> in case of success. + * @cpucp_packet_invalid -> this is to support Goya and Gaudi platform. + * @cpucp_packet_fault -> in case of processing error like failing to + * get device binding or semaphore etc. + * @cpucp_packet_invalid_pkt -> when cpucp packet is un-supported. This is + * supported Greco onwards. + * @cpucp_packet_invalid_params -> when checking parameter like length of buffer + * or attribute value etc. Supported Greco onwards. + * @cpucp_packet_rc_max -> It indicates size of enum so should be at last. + */ enum cpucp_packet_rc { cpucp_packet_success, cpucp_packet_invalid, - cpucp_packet_fault + cpucp_packet_fault, + cpucp_packet_invalid_pkt, + cpucp_packet_invalid_params, + cpucp_packet_rc_max }; /* @@ -1193,6 +1228,70 @@ enum cpu_reset_status { CPU_RST_STATUS_SOFT_RST_DONE = 1, }; +#define SEC_PCR_DATA_BUF_SZ 256 +#define SEC_PCR_QUOTE_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */ +#define SEC_SIGNATURE_BUF_SZ 255 /* (256 - 1) 1 byte used for size */ +#define SEC_PUB_DATA_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */ +#define SEC_CERTIFICATE_BUF_SZ 2046 /* (2048 - 2) 2 bytes used for size */ + +/* + * struct cpucp_sec_attest_info - attestation report of the boot + * @pcr_data: raw values of the PCR registers + * @pcr_num_reg: number of PCR registers in the pcr_data array + * @pcr_reg_len: length of each PCR register in the pcr_data array (bytes) + * @nonce: number only used once. random number provided by host. this also + * passed to the quote command as a qualifying data. + * @pcr_quote_len: length of the attestation quote data (bytes) + * @pcr_quote: attestation report data structure + * @quote_sig_len: length of the attestation report signature (bytes) + * @quote_sig: signature structure of the attestation report + * @pub_data_len: length of the public data (bytes) + * @public_data: public key for the signed attestation + * (outPublic + name + qualifiedName) + * @certificate_len: length of the certificate (bytes) + * @certificate: certificate for the attestation signing key + */ +struct cpucp_sec_attest_info { + __u8 pcr_data[SEC_PCR_DATA_BUF_SZ]; + __u8 pcr_num_reg; + __u8 pcr_reg_len; + __le16 pad0; + __le32 nonce; + __le16 pcr_quote_len; + __u8 pcr_quote[SEC_PCR_QUOTE_BUF_SZ]; + __u8 quote_sig_len; + __u8 quote_sig[SEC_SIGNATURE_BUF_SZ]; + __le16 pub_data_len; + __u8 public_data[SEC_PUB_DATA_BUF_SZ]; + __le16 certificate_len; + __u8 certificate[SEC_CERTIFICATE_BUF_SZ]; +}; + +/* + * struct cpucp_dev_info_signed - device information signed by a secured device + * @info: device information structure as defined above + * @nonce: number only used once. random number provided by host. this number is + * hashed and signed along with the device information. + * @info_sig_len: length of the attestation signature (bytes) + * @info_sig: signature of the info + nonce data. + * @pub_data_len: length of the public data (bytes) + * @public_data: public key info signed info data + * (outPublic + name + qualifiedName) + * @certificate_len: length of the certificate (bytes) + * @certificate: certificate for the signing key + */ +struct cpucp_dev_info_signed { + struct cpucp_info info; /* assumed to be 64bit aligned */ + __le32 nonce; + __le32 pad0; + __u8 info_sig_len; + __u8 info_sig[SEC_SIGNATURE_BUF_SZ]; + __le16 pub_data_len; + __u8 public_data[SEC_PUB_DATA_BUF_SZ]; + __le16 certificate_len; + __u8 certificate[SEC_CERTIFICATE_BUF_SZ]; +}; + /* * struct dcore_monitor_regs_data - DCORE monitor regs data. * the structure follows sync manager block layout. relevant only to Gaudi. diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h index a3594119bc51..e0ea51cc7475 100644 --- a/drivers/misc/habanalabs/include/common/hl_boot_if.h +++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h @@ -34,6 +34,7 @@ enum cpu_boot_err { CPU_BOOT_ERR_BINNING_FAIL = 19, CPU_BOOT_ERR_TPM_FAIL = 20, CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL = 21, + CPU_BOOT_ERR_EEPROM_FAIL = 22, CPU_BOOT_ERR_ENABLED = 31, CPU_BOOT_ERR_SCND_EN = 63, CPU_BOOT_ERR_LAST = 64 /* we have 2 registers of 32 bits */ @@ -115,6 +116,9 @@ enum cpu_boot_err { * CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL Failed to set threshold for tmperature * sensor. * + * CPU_BOOT_ERR_EEPROM_FAIL Failed reading EEPROM data. Defaults + * are used. + * * CPU_BOOT_ERR0_ENABLED Error registers enabled. * This is a main indication that the * running FW populates the error @@ -139,6 +143,7 @@ enum cpu_boot_err { #define CPU_BOOT_ERR0_BINNING_FAIL (1 << CPU_BOOT_ERR_BINNING_FAIL) #define CPU_BOOT_ERR0_TPM_FAIL (1 << CPU_BOOT_ERR_TPM_FAIL) #define CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL (1 << CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL) +#define CPU_BOOT_ERR0_EEPROM_FAIL (1 << CPU_BOOT_ERR_EEPROM_FAIL) #define CPU_BOOT_ERR0_ENABLED (1 << CPU_BOOT_ERR_ENABLED) #define CPU_BOOT_ERR1_ENABLED (1 << CPU_BOOT_ERR_ENABLED) @@ -426,7 +431,9 @@ struct cpu_dyn_regs { __le32 gic_host_ints_irq; __le32 gic_host_soft_rst_irq; __le32 gic_rot_qm_irq_ctrl; - __le32 reserved1[22]; /* reserve for future use */ + __le32 cpu_rst_status; + __le32 eng_arc_irq_ctrl; + __le32 reserved1[20]; /* reserve for future use */ }; /* TODO: remove the desc magic after the code is updated to use message */ @@ -465,6 +472,26 @@ enum comms_msg_type { HL_COMMS_BINNING_CONF_TYPE = 3, }; +/* + * Binning information shared between LKD and FW + * @tpc_mask - TPC binning information + * @dec_mask - Decoder binning information + * @hbm_mask - HBM binning information + * @edma_mask - EDMA binning information + * @mme_mask_l - MME binning information lower 32 + * @mme_mask_h - MME binning information upper 32 + * @reserved - reserved field for 64 bit alignment + */ +struct lkd_fw_binning_info { + __le64 tpc_mask; + __le32 dec_mask; + __le32 hbm_mask; + __le32 edma_mask; + __le32 mme_mask_l; + __le32 mme_mask_h; + __le32 reserved; +}; + /* TODO: remove this struct after the code is updated to use message */ /* this is the comms descriptor header - meta data */ struct comms_desc_header { @@ -525,13 +552,7 @@ struct lkd_fw_comms_msg { struct { __u8 fw_cfg_skip; /* 1 - skip, 0 - don't skip */ }; - struct { - __le64 tpc_binning_conf; - __le32 dec_binning_conf; - __le32 hbm_binning_conf; - __le32 edma_binning_conf; - __le32 mme_redundancy_conf; /* use MME_REDUNDANT_COLUMN */ - }; + struct lkd_fw_binning_info binning_info; }; }; diff --git a/drivers/misc/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h b/drivers/misc/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h index d0e2c68a639f..6aa1b1412462 100644 --- a/drivers/misc/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h +++ b/drivers/misc/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h @@ -132,6 +132,7 @@ #include "dcore0_mme_ctrl_lo_arch_tensor_a_regs.h" #include "dcore0_mme_ctrl_lo_arch_tensor_b_regs.h" #include "dcore0_mme_ctrl_lo_arch_tensor_cout_regs.h" +#include "pcie_wrap_special_regs.h" #include "pdma0_qm_masks.h" #include "pdma0_core_masks.h" @@ -239,6 +240,7 @@ #define SFT_IF_RTR_OFFSET (mmSFT0_HBW_RTR_IF1_RTR_H3_BASE - mmSFT0_HBW_RTR_IF0_RTR_H3_BASE) #define ARC_HALT_REQ_OFFSET (mmARC_FARM_ARC0_AUX_RUN_HALT_REQ - mmARC_FARM_ARC0_AUX_BASE) +#define ARC_HALT_ACK_OFFSET (mmARC_FARM_ARC0_AUX_RUN_HALT_ACK - mmARC_FARM_ARC0_AUX_BASE) #define ARC_REGION_CFG_OFFSET(region) \ (mmARC_FARM_ARC0_AUX_ARC_REGION_CFG_0 + (region * 4) - mmARC_FARM_ARC0_AUX_BASE) diff --git a/drivers/misc/habanalabs/include/gaudi2/asic_reg/pcie_wrap_special_regs.h b/drivers/misc/habanalabs/include/gaudi2/asic_reg/pcie_wrap_special_regs.h new file mode 100644 index 000000000000..46558e7a7f63 --- /dev/null +++ b/drivers/misc/habanalabs/include/gaudi2/asic_reg/pcie_wrap_special_regs.h @@ -0,0 +1,185 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright 2016-2020 HabanaLabs, Ltd. + * All Rights Reserved. + * + */ + +/************************************ + ** This is an auto-generated file ** + ** DO NOT EDIT BELOW ** + ************************************/ + +#ifndef ASIC_REG_PCIE_WRAP_SPECIAL_REGS_H_ +#define ASIC_REG_PCIE_WRAP_SPECIAL_REGS_H_ + +/* + ***************************************** + * PCIE_WRAP_SPECIAL + * (Prototype: SPECIAL_REGS) + ***************************************** + */ + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_0 0x4C01E80 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_1 0x4C01E84 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_2 0x4C01E88 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_3 0x4C01E8C + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_4 0x4C01E90 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_5 0x4C01E94 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_6 0x4C01E98 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_7 0x4C01E9C + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_8 0x4C01EA0 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_9 0x4C01EA4 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_10 0x4C01EA8 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_11 0x4C01EAC + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_12 0x4C01EB0 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_13 0x4C01EB4 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_14 0x4C01EB8 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_15 0x4C01EBC + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_16 0x4C01EC0 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_17 0x4C01EC4 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_18 0x4C01EC8 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_19 0x4C01ECC + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_20 0x4C01ED0 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_21 0x4C01ED4 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_22 0x4C01ED8 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_23 0x4C01EDC + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_24 0x4C01EE0 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_25 0x4C01EE4 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_26 0x4C01EE8 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_27 0x4C01EEC + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_28 0x4C01EF0 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_29 0x4C01EF4 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_30 0x4C01EF8 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_31 0x4C01EFC + +#define mmPCIE_WRAP_SPECIAL_MEM_GW_DATA 0x4C01F00 + +#define mmPCIE_WRAP_SPECIAL_MEM_GW_REQ 0x4C01F04 + +#define mmPCIE_WRAP_SPECIAL_MEM_NUMOF 0x4C01F0C + +#define mmPCIE_WRAP_SPECIAL_MEM_ECC_SEL 0x4C01F10 + +#define mmPCIE_WRAP_SPECIAL_MEM_ECC_CTL 0x4C01F14 + +#define mmPCIE_WRAP_SPECIAL_MEM_ECC_ERR_MASK 0x4C01F18 + +#define mmPCIE_WRAP_SPECIAL_MEM_ECC_GLBL_ERR_MASK 0x4C01F1C + +#define mmPCIE_WRAP_SPECIAL_MEM_ECC_ERR_STS 0x4C01F20 + +#define mmPCIE_WRAP_SPECIAL_MEM_ECC_ERR_ADDR 0x4C01F24 + +#define mmPCIE_WRAP_SPECIAL_MEM_RM 0x4C01F28 + +#define mmPCIE_WRAP_SPECIAL_GLBL_ERR_MASK 0x4C01F40 + +#define mmPCIE_WRAP_SPECIAL_GLBL_ERR_ADDR 0x4C01F44 + +#define mmPCIE_WRAP_SPECIAL_GLBL_ERR_CAUSE 0x4C01F48 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SPARE_0 0x4C01F60 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SPARE_1 0x4C01F64 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SPARE_2 0x4C01F68 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SPARE_3 0x4C01F6C + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_0 0x4C01F80 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_1 0x4C01F84 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_2 0x4C01F88 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_3 0x4C01F8C + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_4 0x4C01F90 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_5 0x4C01F94 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_6 0x4C01F98 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_7 0x4C01F9C + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_8 0x4C01FA0 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_9 0x4C01FA4 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_10 0x4C01FA8 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_11 0x4C01FAC + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_12 0x4C01FB0 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_13 0x4C01FB4 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_14 0x4C01FB8 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_15 0x4C01FBC + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_16 0x4C01FC0 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_17 0x4C01FC4 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_18 0x4C01FC8 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_19 0x4C01FCC + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_20 0x4C01FD0 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_21 0x4C01FD4 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_22 0x4C01FD8 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_23 0x4C01FDC + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_24 0x4C01FE0 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_25 0x4C01FE4 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_26 0x4C01FE8 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_27 0x4C01FEC + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_28 0x4C01FF0 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_29 0x4C01FF4 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_30 0x4C01FF8 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_31 0x4C01FFC + +#endif /* ASIC_REG_PCIE_WRAP_SPECIAL_REGS_H_ */ diff --git a/drivers/misc/habanalabs/include/gaudi2/gaudi2_async_virt_events.h b/drivers/misc/habanalabs/include/gaudi2/gaudi2_async_virt_events.h deleted file mode 100644 index 6d6ed7838a64..000000000000 --- a/drivers/misc/habanalabs/include/gaudi2/gaudi2_async_virt_events.h +++ /dev/null @@ -1,57 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 - * - * Copyright 2022 HabanaLabs, Ltd. - * All Rights Reserved. - * - */ - -#ifndef __GAUDI2_ASYNC_VIRT_EVENTS_H_ -#define __GAUDI2_ASYNC_VIRT_EVENTS_H_ - -enum gaudi2_async_virt_event_id { - GAUDI2_EVENT_NIC3_QM1_OLD = 1206, - GAUDI2_EVENT_NIC4_QM0_OLD = 1207, - GAUDI2_EVENT_NIC4_QM1_OLD = 1208, - GAUDI2_EVENT_NIC5_QM0_OLD = 1209, - GAUDI2_EVENT_NIC5_QM1_OLD = 1210, - GAUDI2_EVENT_NIC6_QM0_OLD = 1211, - GAUDI2_EVENT_NIC6_QM1_OLD = 1212, - GAUDI2_EVENT_NIC7_QM0_OLD = 1213, - GAUDI2_EVENT_NIC7_QM1_OLD = 1214, - GAUDI2_EVENT_NIC8_QM0_OLD = 1215, - GAUDI2_EVENT_NIC8_QM1_OLD = 1216, - GAUDI2_EVENT_NIC9_QM0_OLD = 1217, - GAUDI2_EVENT_NIC9_QM1_OLD = 1218, - GAUDI2_EVENT_NIC10_QM0_OLD = 1219, - GAUDI2_EVENT_NIC10_QM1_OLD = 1220, - GAUDI2_EVENT_NIC11_QM0_OLD = 1221, - GAUDI2_EVENT_NIC11_QM1_OLD = 1222, - GAUDI2_EVENT_CPU_PKT_SANITY_FAILED_OLD = 1223, - GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG0_OLD = 1224, - GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG1_OLD = 1225, - GAUDI2_EVENT_CPU1_STATUS_NIC1_ENG0_OLD = 1226, - GAUDI2_EVENT_CPU1_STATUS_NIC1_ENG1_OLD = 1227, - GAUDI2_EVENT_CPU2_STATUS_NIC2_ENG0_OLD = 1228, - GAUDI2_EVENT_CPU2_STATUS_NIC2_ENG1_OLD = 1229, - GAUDI2_EVENT_CPU3_STATUS_NIC3_ENG0_OLD = 1230, - GAUDI2_EVENT_CPU3_STATUS_NIC3_ENG1_OLD = 1231, - GAUDI2_EVENT_CPU4_STATUS_NIC4_ENG0_OLD = 1232, - GAUDI2_EVENT_CPU4_STATUS_NIC4_ENG1_OLD = 1233, - GAUDI2_EVENT_CPU5_STATUS_NIC5_ENG0_OLD = 1234, - GAUDI2_EVENT_CPU5_STATUS_NIC5_ENG1_OLD = 1235, - GAUDI2_EVENT_CPU6_STATUS_NIC6_ENG0_OLD = 1236, - GAUDI2_EVENT_CPU6_STATUS_NIC6_ENG1_OLD = 1237, - GAUDI2_EVENT_CPU7_STATUS_NIC7_ENG0_OLD = 1238, - GAUDI2_EVENT_CPU7_STATUS_NIC7_ENG1_OLD = 1239, - GAUDI2_EVENT_CPU8_STATUS_NIC8_ENG0_OLD = 1240, - GAUDI2_EVENT_CPU8_STATUS_NIC8_ENG1_OLD = 1241, - GAUDI2_EVENT_CPU9_STATUS_NIC9_ENG0_OLD = 1242, - GAUDI2_EVENT_CPU9_STATUS_NIC9_ENG1_OLD = 1243, - GAUDI2_EVENT_CPU10_STATUS_NIC10_ENG0_OLD = 1244, - GAUDI2_EVENT_CPU10_STATUS_NIC10_ENG1_OLD = 1245, - GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG0_OLD = 1246, - GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1_OLD = 1247, - GAUDI2_EVENT_ARC_DCCM_FULL_OLD = 1248, -}; - -#endif /* __GAUDI2_ASYNC_VIRT_EVENTS_H_ */ |