diff options
Diffstat (limited to 'drivers/misc/habanalabs')
29 files changed, 2082 insertions, 481 deletions
diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c index d9adb9a5e4d8..719168c980a4 100644 --- a/drivers/misc/habanalabs/common/command_buffer.c +++ b/drivers/misc/habanalabs/common/command_buffer.c @@ -181,7 +181,7 @@ static void cb_release(struct kref *ref) static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size, int ctx_id, bool internal_cb) { - struct hl_cb *cb; + struct hl_cb *cb = NULL; u32 cb_offset; void *p; @@ -193,9 +193,10 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size, * the kernel's copy. Hence, we must never sleep in this code section * and must use GFP_ATOMIC for all memory allocations. */ - if (ctx_id == HL_KERNEL_ASID_ID) + if (ctx_id == HL_KERNEL_ASID_ID && !hdev->disabled) cb = kzalloc(sizeof(*cb), GFP_ATOMIC); - else + + if (!cb) cb = kzalloc(sizeof(*cb), GFP_KERNEL); if (!cb) @@ -214,6 +215,9 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size, } else if (ctx_id == HL_KERNEL_ASID_ID) { p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size, &cb->bus_address, GFP_ATOMIC); + if (!p) + p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, + cb_size, &cb->bus_address, GFP_KERNEL); } else { p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size, &cb->bus_address, @@ -310,6 +314,8 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, spin_lock(&mgr->cb_lock); rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_ATOMIC); + if (rc < 0) + rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_KERNEL); spin_unlock(&mgr->cb_lock); if (rc < 0) { diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index 7bd4a03b3429..ff8791a651fd 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -84,6 +84,38 @@ int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask) return 0; } +static void sob_reset_work(struct work_struct *work) +{ + struct hl_cs_compl *hl_cs_cmpl = + container_of(work, struct hl_cs_compl, sob_reset_work); + struct hl_device *hdev = hl_cs_cmpl->hdev; + + /* + * A signal CS can get completion while the corresponding wait + * for signal CS is on its way to the PQ. The wait for signal CS + * will get stuck if the signal CS incremented the SOB to its + * max value and there are no pending (submitted) waits on this + * SOB. + * We do the following to void this situation: + * 1. The wait for signal CS must get a ref for the signal CS as + * soon as possible in cs_ioctl_signal_wait() and put it + * before being submitted to the PQ but after it incremented + * the SOB refcnt in init_signal_wait_cs(). + * 2. Signal/Wait for signal CS will decrement the SOB refcnt + * here. + * These two measures guarantee that the wait for signal CS will + * reset the SOB upon completion rather than the signal CS and + * hence the above scenario is avoided. + */ + kref_put(&hl_cs_cmpl->hw_sob->kref, hl_sob_reset); + + if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT) + hdev->asic_funcs->reset_sob_group(hdev, + hl_cs_cmpl->sob_group); + + kfree(hl_cs_cmpl); +} + static void hl_fence_release(struct kref *kref) { struct hl_fence *fence = @@ -109,28 +141,9 @@ static void hl_fence_release(struct kref *kref) hl_cs_cmpl->hw_sob->sob_id, hl_cs_cmpl->sob_val); - /* - * A signal CS can get completion while the corresponding wait - * for signal CS is on its way to the PQ. The wait for signal CS - * will get stuck if the signal CS incremented the SOB to its - * max value and there are no pending (submitted) waits on this - * SOB. - * We do the following to void this situation: - * 1. The wait for signal CS must get a ref for the signal CS as - * soon as possible in cs_ioctl_signal_wait() and put it - * before being submitted to the PQ but after it incremented - * the SOB refcnt in init_signal_wait_cs(). - * 2. Signal/Wait for signal CS will decrement the SOB refcnt - * here. - * These two measures guarantee that the wait for signal CS will - * reset the SOB upon completion rather than the signal CS and - * hence the above scenario is avoided. - */ - kref_put(&hl_cs_cmpl->hw_sob->kref, hl_sob_reset); + queue_work(hdev->sob_reset_wq, &hl_cs_cmpl->sob_reset_work); - if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT) - hdev->asic_funcs->reset_sob_group(hdev, - hl_cs_cmpl->sob_group); + return; } free: @@ -454,8 +467,7 @@ static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs) if (next_entry_found && !next->tdr_active) { next->tdr_active = true; - schedule_delayed_work(&next->work_tdr, - hdev->timeout_jiffies); + schedule_delayed_work(&next->work_tdr, next->timeout_jiffies); } spin_unlock(&hdev->cs_mirror_lock); @@ -492,24 +504,6 @@ static void cs_do_release(struct kref *ref) goto out; } - hdev->asic_funcs->hw_queues_lock(hdev); - - hdev->cs_active_cnt--; - if (!hdev->cs_active_cnt) { - struct hl_device_idle_busy_ts *ts; - - ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx++]; - ts->busy_to_idle_ts = ktime_get(); - - if (hdev->idle_busy_ts_idx == HL_IDLE_BUSY_TS_ARR_SIZE) - hdev->idle_busy_ts_idx = 0; - } else if (hdev->cs_active_cnt < 0) { - dev_crit(hdev->dev, "CS active cnt %d is negative\n", - hdev->cs_active_cnt); - } - - hdev->asic_funcs->hw_queues_unlock(hdev); - /* Need to update CI for all queue jobs that does not get completion */ hl_hw_queue_update_ci(cs); @@ -620,14 +614,14 @@ static void cs_timedout(struct work_struct *work) cs_put(cs); if (hdev->reset_on_lockup) - hl_device_reset(hdev, false, false); + hl_device_reset(hdev, 0); else hdev->needs_reset = true; } static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, enum hl_cs_type cs_type, u64 user_sequence, - struct hl_cs **cs_new) + struct hl_cs **cs_new, u32 flags, u32 timeout) { struct hl_cs_counters_atomic *cntr; struct hl_fence *other = NULL; @@ -638,6 +632,9 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, cntr = &hdev->aggregated_cs_counters; cs = kzalloc(sizeof(*cs), GFP_ATOMIC); + if (!cs) + cs = kzalloc(sizeof(*cs), GFP_KERNEL); + if (!cs) { atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); atomic64_inc(&cntr->out_of_mem_drop_cnt); @@ -651,12 +648,17 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, cs->submitted = false; cs->completed = false; cs->type = cs_type; + cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP); + cs->timeout_jiffies = timeout; INIT_LIST_HEAD(&cs->job_list); INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout); kref_init(&cs->refcount); spin_lock_init(&cs->job_lock); cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_ATOMIC); + if (!cs_cmpl) + cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_KERNEL); + if (!cs_cmpl) { atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); atomic64_inc(&cntr->out_of_mem_drop_cnt); @@ -664,9 +666,23 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, goto free_cs; } + cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues, + sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC); + if (!cs->jobs_in_queue_cnt) + cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues, + sizeof(*cs->jobs_in_queue_cnt), GFP_KERNEL); + + if (!cs->jobs_in_queue_cnt) { + atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); + atomic64_inc(&cntr->out_of_mem_drop_cnt); + rc = -ENOMEM; + goto free_cs_cmpl; + } + cs_cmpl->hdev = hdev; cs_cmpl->type = cs->type; spin_lock_init(&cs_cmpl->lock); + INIT_WORK(&cs_cmpl->sob_reset_work, sob_reset_work); cs->fence = &cs_cmpl->base_fence; spin_lock(&ctx->cs_lock); @@ -696,15 +712,6 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, goto free_fence; } - cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues, - sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC); - if (!cs->jobs_in_queue_cnt) { - atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); - atomic64_inc(&cntr->out_of_mem_drop_cnt); - rc = -ENOMEM; - goto free_fence; - } - /* init hl_fence */ hl_fence_init(&cs_cmpl->base_fence, cs_cmpl->cs_seq); @@ -727,6 +734,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, free_fence: spin_unlock(&ctx->cs_lock); + kfree(cs->jobs_in_queue_cnt); +free_cs_cmpl: kfree(cs_cmpl); free_cs: kfree(cs); @@ -749,6 +758,8 @@ void hl_cs_rollback_all(struct hl_device *hdev) int i; struct hl_cs *cs, *tmp; + flush_workqueue(hdev->sob_reset_wq); + /* flush all completions before iterating over the CS mirror list in * order to avoid a race with the release functions */ @@ -778,6 +789,44 @@ void hl_pending_cb_list_flush(struct hl_ctx *ctx) } } +static void +wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt) +{ + struct hl_user_pending_interrupt *pend; + + spin_lock(&interrupt->wait_list_lock); + list_for_each_entry(pend, &interrupt->wait_list_head, wait_list_node) { + pend->fence.error = -EIO; + complete_all(&pend->fence.completion); + } + spin_unlock(&interrupt->wait_list_lock); +} + +void hl_release_pending_user_interrupts(struct hl_device *hdev) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_user_interrupt *interrupt; + int i; + + if (!prop->user_interrupt_count) + return; + + /* We iterate through the user interrupt requests and waking up all + * user threads waiting for interrupt completion. We iterate the + * list under a lock, this is why all user threads, once awake, + * will wait on the same lock and will release the waiting object upon + * unlock. + */ + + for (i = 0 ; i < prop->user_interrupt_count ; i++) { + interrupt = &hdev->user_interrupt[i]; + wake_pending_user_interrupt_threads(interrupt); + } + + interrupt = &hdev->common_user_interrupt; + wake_pending_user_interrupt_threads(interrupt); +} + static void job_wq_completion(struct work_struct *work) { struct hl_cs_job *job = container_of(work, struct hl_cs_job, @@ -890,6 +939,9 @@ struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, job = kzalloc(sizeof(*job), GFP_ATOMIC); if (!job) + job = kzalloc(sizeof(*job), GFP_KERNEL); + + if (!job) return NULL; kref_init(&job->refcount); @@ -991,6 +1043,9 @@ static int hl_cs_copy_chunk_array(struct hl_device *hdev, *cs_chunk_array = kmalloc_array(num_chunks, sizeof(**cs_chunk_array), GFP_ATOMIC); + if (!*cs_chunk_array) + *cs_chunk_array = kmalloc_array(num_chunks, + sizeof(**cs_chunk_array), GFP_KERNEL); if (!*cs_chunk_array) { atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt); @@ -1038,7 +1093,8 @@ static int cs_staged_submission(struct hl_device *hdev, struct hl_cs *cs, } static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, - u32 num_chunks, u64 *cs_seq, u32 flags) + u32 num_chunks, u64 *cs_seq, u32 flags, + u32 timeout) { bool staged_mid, int_queues_only = true; struct hl_device *hdev = hpriv->hdev; @@ -1067,11 +1123,11 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, staged_mid = false; rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT, - staged_mid ? user_sequence : ULLONG_MAX, &cs); + staged_mid ? user_sequence : ULLONG_MAX, &cs, flags, + timeout); if (rc) goto free_cs_chunk_array; - cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP); *cs_seq = cs->sequence; hl_debugfs_add_cs(cs); @@ -1269,7 +1325,8 @@ static int hl_submit_pending_cb(struct hl_fpriv *hpriv) list_move_tail(&pending_cb->cb_node, &local_cb_list); spin_unlock(&ctx->pending_cb_lock); - rc = allocate_cs(hdev, ctx, CS_TYPE_DEFAULT, ULLONG_MAX, &cs); + rc = allocate_cs(hdev, ctx, CS_TYPE_DEFAULT, ULLONG_MAX, &cs, 0, + hdev->timeout_jiffies); if (rc) goto add_list_elements; @@ -1370,7 +1427,7 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args, rc = 0; } else { rc = cs_ioctl_default(hpriv, chunks, num_chunks, - cs_seq, 0); + cs_seq, 0, hdev->timeout_jiffies); } mutex_unlock(&hpriv->restore_phase_mutex); @@ -1419,7 +1476,7 @@ wait_again: out: if ((rc == -ETIMEDOUT || rc == -EBUSY) && (need_soft_reset)) - hl_device_reset(hdev, false, false); + hl_device_reset(hdev, 0); return rc; } @@ -1445,6 +1502,10 @@ static int cs_ioctl_extract_signal_seq(struct hl_device *hdev, signal_seq_arr = kmalloc_array(signal_seq_arr_len, sizeof(*signal_seq_arr), GFP_ATOMIC); + if (!signal_seq_arr) + signal_seq_arr = kmalloc_array(signal_seq_arr_len, + sizeof(*signal_seq_arr), + GFP_KERNEL); if (!signal_seq_arr) { atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt); @@ -1536,7 +1597,7 @@ static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev, static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, void __user *chunks, u32 num_chunks, - u64 *cs_seq, bool timestamp) + u64 *cs_seq, u32 flags, u32 timeout) { struct hl_cs_chunk *cs_chunk_array, *chunk; struct hw_queue_properties *hw_queue_prop; @@ -1642,7 +1703,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, } } - rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, &cs); + rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, &cs, flags, timeout); if (rc) { if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_COLLECTIVE_WAIT) @@ -1650,8 +1711,6 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, goto free_cs_chunk_array; } - cs->timestamp = !!timestamp; - /* * Save the signal CS fence for later initialization right before * hanging the wait CS on the queue. @@ -1709,7 +1768,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) enum hl_cs_type cs_type; u64 cs_seq = ULONG_MAX; void __user *chunks; - u32 num_chunks, flags; + u32 num_chunks, flags, timeout; int rc; rc = hl_cs_sanity_checks(hpriv, args); @@ -1735,16 +1794,20 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) !(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST)) cs_seq = args->in.seq; + timeout = flags & HL_CS_FLAGS_CUSTOM_TIMEOUT + ? msecs_to_jiffies(args->in.timeout * 1000) + : hpriv->hdev->timeout_jiffies; + switch (cs_type) { case CS_TYPE_SIGNAL: case CS_TYPE_WAIT: case CS_TYPE_COLLECTIVE_WAIT: rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks, num_chunks, - &cs_seq, args->in.cs_flags & HL_CS_FLAGS_TIMESTAMP); + &cs_seq, args->in.cs_flags, timeout); break; default: rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq, - args->in.cs_flags); + args->in.cs_flags, timeout); break; } @@ -1818,7 +1881,7 @@ static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, return rc; } -int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) +static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) { struct hl_device *hdev = hpriv->hdev; union hl_wait_cs_args *args = data; @@ -1873,3 +1936,176 @@ int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) return 0; } + +static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, + u32 timeout_us, u64 user_address, + u32 target_value, u16 interrupt_offset, + enum hl_cs_wait_status *status) +{ + struct hl_user_pending_interrupt *pend; + struct hl_user_interrupt *interrupt; + unsigned long timeout; + long completion_rc; + u32 completion_value; + int rc = 0; + + if (timeout_us == U32_MAX) + timeout = timeout_us; + else + timeout = usecs_to_jiffies(timeout_us); + + hl_ctx_get(hdev, ctx); + + pend = kmalloc(sizeof(*pend), GFP_KERNEL); + if (!pend) { + hl_ctx_put(ctx); + return -ENOMEM; + } + + hl_fence_init(&pend->fence, ULONG_MAX); + + if (interrupt_offset == HL_COMMON_USER_INTERRUPT_ID) + interrupt = &hdev->common_user_interrupt; + else + interrupt = &hdev->user_interrupt[interrupt_offset]; + + spin_lock(&interrupt->wait_list_lock); + if (!hl_device_operational(hdev, NULL)) { + rc = -EPERM; + goto unlock_and_free_fence; + } + + if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 4)) { + dev_err(hdev->dev, + "Failed to copy completion value from user\n"); + rc = -EFAULT; + goto unlock_and_free_fence; + } + + if (completion_value >= target_value) + *status = CS_WAIT_STATUS_COMPLETED; + else + *status = CS_WAIT_STATUS_BUSY; + + if (!timeout_us || (*status == CS_WAIT_STATUS_COMPLETED)) + goto unlock_and_free_fence; + + /* Add pending user interrupt to relevant list for the interrupt + * handler to monitor + */ + list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head); + spin_unlock(&interrupt->wait_list_lock); + +wait_again: + /* Wait for interrupt handler to signal completion */ + completion_rc = + wait_for_completion_interruptible_timeout( + &pend->fence.completion, timeout); + + /* If timeout did not expire we need to perform the comparison. + * If comparison fails, keep waiting until timeout expires + */ + if (completion_rc > 0) { + if (copy_from_user(&completion_value, + u64_to_user_ptr(user_address), 4)) { + dev_err(hdev->dev, + "Failed to copy completion value from user\n"); + rc = -EFAULT; + goto remove_pending_user_interrupt; + } + + if (completion_value >= target_value) { + *status = CS_WAIT_STATUS_COMPLETED; + } else { + timeout -= jiffies_to_usecs(completion_rc); + goto wait_again; + } + } else { + *status = CS_WAIT_STATUS_BUSY; + } + +remove_pending_user_interrupt: + spin_lock(&interrupt->wait_list_lock); + list_del(&pend->wait_list_node); + +unlock_and_free_fence: + spin_unlock(&interrupt->wait_list_lock); + kfree(pend); + hl_ctx_put(ctx); + + return rc; +} + +static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data) +{ + u16 interrupt_id, interrupt_offset, first_interrupt, last_interrupt; + struct hl_device *hdev = hpriv->hdev; + struct asic_fixed_properties *prop; + union hl_wait_cs_args *args = data; + enum hl_cs_wait_status status; + int rc; + + prop = &hdev->asic_prop; + + if (!prop->user_interrupt_count) { + dev_err(hdev->dev, "no user interrupts allowed"); + return -EPERM; + } + + interrupt_id = + FIELD_GET(HL_WAIT_CS_FLAGS_INTERRUPT_MASK, args->in.flags); + + first_interrupt = prop->first_available_user_msix_interrupt; + last_interrupt = prop->first_available_user_msix_interrupt + + prop->user_interrupt_count - 1; + + if ((interrupt_id < first_interrupt || interrupt_id > last_interrupt) && + interrupt_id != HL_COMMON_USER_INTERRUPT_ID) { + dev_err(hdev->dev, "invalid user interrupt %u", interrupt_id); + return -EINVAL; + } + + if (interrupt_id == HL_COMMON_USER_INTERRUPT_ID) + interrupt_offset = HL_COMMON_USER_INTERRUPT_ID; + else + interrupt_offset = interrupt_id - first_interrupt; + + rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, + args->in.interrupt_timeout_us, args->in.addr, + args->in.target, interrupt_offset, &status); + + memset(args, 0, sizeof(*args)); + + if (rc) { + dev_err_ratelimited(hdev->dev, + "interrupt_wait_ioctl failed (%d)\n", rc); + + return rc; + } + + switch (status) { + case CS_WAIT_STATUS_COMPLETED: + args->out.status = HL_WAIT_CS_STATUS_COMPLETED; + break; + case CS_WAIT_STATUS_BUSY: + default: + args->out.status = HL_WAIT_CS_STATUS_BUSY; + break; + } + + return 0; +} + +int hl_wait_ioctl(struct hl_fpriv *hpriv, void *data) +{ + union hl_wait_cs_args *args = data; + u32 flags = args->in.flags; + int rc; + + if (flags & HL_WAIT_CS_FLAGS_INTERRUPT) + rc = hl_interrupt_wait_ioctl(hpriv, data); + else + rc = hl_cs_wait_ioctl(hpriv, data); + + return rc; +} diff --git a/drivers/misc/habanalabs/common/context.c b/drivers/misc/habanalabs/common/context.c index cda871afb8f4..62d705889ca8 100644 --- a/drivers/misc/habanalabs/common/context.c +++ b/drivers/misc/habanalabs/common/context.c @@ -20,6 +20,11 @@ static void hl_ctx_fini(struct hl_ctx *ctx) */ hl_pending_cb_list_flush(ctx); + /* Release all allocated HW block mapped list entries and destroy + * the mutex. + */ + hl_hw_block_mem_fini(ctx); + /* * If we arrived here, there are no jobs waiting for this context * on its queues so we can safely remove it. @@ -160,13 +165,15 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx) if (!ctx->cs_pending) return -ENOMEM; + hl_hw_block_mem_init(ctx); + if (is_kernel_ctx) { ctx->asid = HL_KERNEL_ASID_ID; /* Kernel driver gets ASID 0 */ rc = hl_vm_ctx_init(ctx); if (rc) { dev_err(hdev->dev, "Failed to init mem ctx module\n"); rc = -ENOMEM; - goto err_free_cs_pending; + goto err_hw_block_mem_fini; } rc = hdev->asic_funcs->ctx_init(ctx); @@ -179,7 +186,7 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx) if (!ctx->asid) { dev_err(hdev->dev, "No free ASID, failed to create context\n"); rc = -ENOMEM; - goto err_free_cs_pending; + goto err_hw_block_mem_fini; } rc = hl_vm_ctx_init(ctx); @@ -214,7 +221,8 @@ err_vm_ctx_fini: err_asid_free: if (ctx->asid != HL_KERNEL_ASID_ID) hl_asid_free(hdev, ctx->asid); -err_free_cs_pending: +err_hw_block_mem_fini: + hl_hw_block_mem_fini(ctx); kfree(ctx->cs_pending); return rc; diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c index 9f19bee7b592..8381155578a0 100644 --- a/drivers/misc/habanalabs/common/debugfs.c +++ b/drivers/misc/habanalabs/common/debugfs.c @@ -9,8 +9,8 @@ #include "../include/hw_ip/mmu/mmu_general.h" #include <linux/pci.h> -#include <linux/debugfs.h> #include <linux/uaccess.h> +#include <linux/vmalloc.h> #define MMU_ADDR_BUF_SIZE 40 #define MMU_ASID_BUF_SIZE 10 @@ -229,6 +229,7 @@ static int vm_show(struct seq_file *s, void *data) { struct hl_debugfs_entry *entry = s->private; struct hl_dbg_device_entry *dev_entry = entry->dev_entry; + struct hl_vm_hw_block_list_node *lnode; struct hl_ctx *ctx; struct hl_vm *vm; struct hl_vm_hash_node *hnode; @@ -272,6 +273,21 @@ static int vm_show(struct seq_file *s, void *data) } mutex_unlock(&ctx->mem_hash_lock); + if (ctx->asid != HL_KERNEL_ASID_ID && + !list_empty(&ctx->hw_block_mem_list)) { + seq_puts(s, "\nhw_block mappings:\n\n"); + seq_puts(s, " virtual address size HW block id\n"); + seq_puts(s, "-------------------------------------------\n"); + mutex_lock(&ctx->hw_block_list_lock); + list_for_each_entry(lnode, &ctx->hw_block_mem_list, + node) { + seq_printf(s, + " 0x%-14lx %-6u %-9u\n", + lnode->vaddr, lnode->size, lnode->id); + } + mutex_unlock(&ctx->hw_block_list_lock); + } + vm = &ctx->hdev->vm; spin_lock(&vm->idr_lock); @@ -441,21 +457,86 @@ out: return false; } -static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, - u64 *phys_addr) +static bool hl_is_device_internal_memory_va(struct hl_device *hdev, u64 addr, + u32 size) { + struct asic_fixed_properties *prop = &hdev->asic_prop; + u64 dram_start_addr, dram_end_addr; + + if (!hdev->mmu_enable) + return false; + + if (prop->dram_supports_virtual_memory) { + dram_start_addr = prop->dmmu.start_addr; + dram_end_addr = prop->dmmu.end_addr; + } else { + dram_start_addr = prop->dram_base_address; + dram_end_addr = prop->dram_end_address; + } + + if (hl_mem_area_inside_range(addr, size, dram_start_addr, + dram_end_addr)) + return true; + + if (hl_mem_area_inside_range(addr, size, prop->sram_base_address, + prop->sram_end_address)) + return true; + + return false; +} + +static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, u32 size, + u64 *phys_addr) +{ + struct hl_vm_phys_pg_pack *phys_pg_pack; struct hl_ctx *ctx = hdev->compute_ctx; - int rc = 0; + struct hl_vm_hash_node *hnode; + struct hl_userptr *userptr; + enum vm_type_t *vm_type; + bool valid = false; + u64 end_address; + u32 range_size; + int i, rc = 0; if (!ctx) { dev_err(hdev->dev, "no ctx available\n"); return -EINVAL; } + /* Verify address is mapped */ + mutex_lock(&ctx->mem_hash_lock); + hash_for_each(ctx->mem_hash, i, hnode, node) { + vm_type = hnode->ptr; + + if (*vm_type == VM_TYPE_USERPTR) { + userptr = hnode->ptr; + range_size = userptr->size; + } else { + phys_pg_pack = hnode->ptr; + range_size = phys_pg_pack->total_size; + } + + end_address = virt_addr + size; + if ((virt_addr >= hnode->vaddr) && + (end_address <= hnode->vaddr + range_size)) { + valid = true; + break; + } + } + mutex_unlock(&ctx->mem_hash_lock); + + if (!valid) { + dev_err(hdev->dev, + "virt addr 0x%llx is not mapped\n", + virt_addr); + return -EINVAL; + } + rc = hl_mmu_va_to_pa(ctx, virt_addr, phys_addr); if (rc) { - dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n", - virt_addr); + dev_err(hdev->dev, + "virt addr 0x%llx is not mapped to phys addr\n", + virt_addr); rc = -EINVAL; } @@ -467,10 +548,11 @@ static ssize_t hl_data_read32(struct file *f, char __user *buf, { struct hl_dbg_device_entry *entry = file_inode(f)->i_private; struct hl_device *hdev = entry->hdev; - char tmp_buf[32]; u64 addr = entry->addr; - u32 val; + bool user_address; + char tmp_buf[32]; ssize_t rc; + u32 val; if (atomic_read(&hdev->in_reset)) { dev_warn_ratelimited(hdev->dev, "Can't read during reset\n"); @@ -480,13 +562,14 @@ static ssize_t hl_data_read32(struct file *f, char __user *buf, if (*ppos) return 0; - if (hl_is_device_va(hdev, addr)) { - rc = device_va_to_pa(hdev, addr, &addr); + user_address = hl_is_device_va(hdev, addr); + if (user_address) { + rc = device_va_to_pa(hdev, addr, sizeof(val), &addr); if (rc) return rc; } - rc = hdev->asic_funcs->debugfs_read32(hdev, addr, &val); + rc = hdev->asic_funcs->debugfs_read32(hdev, addr, user_address, &val); if (rc) { dev_err(hdev->dev, "Failed to read from 0x%010llx\n", addr); return rc; @@ -503,6 +586,7 @@ static ssize_t hl_data_write32(struct file *f, const char __user *buf, struct hl_dbg_device_entry *entry = file_inode(f)->i_private; struct hl_device *hdev = entry->hdev; u64 addr = entry->addr; + bool user_address; u32 value; ssize_t rc; @@ -515,13 +599,14 @@ static ssize_t hl_data_write32(struct file *f, const char __user *buf, if (rc) return rc; - if (hl_is_device_va(hdev, addr)) { - rc = device_va_to_pa(hdev, addr, &addr); + user_address = hl_is_device_va(hdev, addr); + if (user_address) { + rc = device_va_to_pa(hdev, addr, sizeof(value), &addr); if (rc) return rc; } - rc = hdev->asic_funcs->debugfs_write32(hdev, addr, value); + rc = hdev->asic_funcs->debugfs_write32(hdev, addr, user_address, value); if (rc) { dev_err(hdev->dev, "Failed to write 0x%08x to 0x%010llx\n", value, addr); @@ -536,21 +621,28 @@ static ssize_t hl_data_read64(struct file *f, char __user *buf, { struct hl_dbg_device_entry *entry = file_inode(f)->i_private; struct hl_device *hdev = entry->hdev; - char tmp_buf[32]; u64 addr = entry->addr; - u64 val; + bool user_address; + char tmp_buf[32]; ssize_t rc; + u64 val; + + if (atomic_read(&hdev->in_reset)) { + dev_warn_ratelimited(hdev->dev, "Can't read during reset\n"); + return 0; + } if (*ppos) return 0; - if (hl_is_device_va(hdev, addr)) { - rc = device_va_to_pa(hdev, addr, &addr); + user_address = hl_is_device_va(hdev, addr); + if (user_address) { + rc = device_va_to_pa(hdev, addr, sizeof(val), &addr); if (rc) return rc; } - rc = hdev->asic_funcs->debugfs_read64(hdev, addr, &val); + rc = hdev->asic_funcs->debugfs_read64(hdev, addr, user_address, &val); if (rc) { dev_err(hdev->dev, "Failed to read from 0x%010llx\n", addr); return rc; @@ -567,20 +659,27 @@ static ssize_t hl_data_write64(struct file *f, const char __user *buf, struct hl_dbg_device_entry *entry = file_inode(f)->i_private; struct hl_device *hdev = entry->hdev; u64 addr = entry->addr; + bool user_address; u64 value; ssize_t rc; + if (atomic_read(&hdev->in_reset)) { + dev_warn_ratelimited(hdev->dev, "Can't write during reset\n"); + return 0; + } + rc = kstrtoull_from_user(buf, count, 16, &value); if (rc) return rc; - if (hl_is_device_va(hdev, addr)) { - rc = device_va_to_pa(hdev, addr, &addr); + user_address = hl_is_device_va(hdev, addr); + if (user_address) { + rc = device_va_to_pa(hdev, addr, sizeof(value), &addr); if (rc) return rc; } - rc = hdev->asic_funcs->debugfs_write64(hdev, addr, value); + rc = hdev->asic_funcs->debugfs_write64(hdev, addr, user_address, value); if (rc) { dev_err(hdev->dev, "Failed to write 0x%016llx to 0x%010llx\n", value, addr); @@ -590,6 +689,63 @@ static ssize_t hl_data_write64(struct file *f, const char __user *buf, return count; } +static ssize_t hl_dma_size_write(struct file *f, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + u64 addr = entry->addr; + ssize_t rc; + u32 size; + + if (atomic_read(&hdev->in_reset)) { + dev_warn_ratelimited(hdev->dev, "Can't DMA during reset\n"); + return 0; + } + rc = kstrtouint_from_user(buf, count, 16, &size); + if (rc) + return rc; + + if (!size) { + dev_err(hdev->dev, "DMA read failed. size can't be 0\n"); + return -EINVAL; + } + + if (size > SZ_128M) { + dev_err(hdev->dev, + "DMA read failed. size can't be larger than 128MB\n"); + return -EINVAL; + } + + if (!hl_is_device_internal_memory_va(hdev, addr, size)) { + dev_err(hdev->dev, + "DMA read failed. Invalid 0x%010llx + 0x%08x\n", + addr, size); + return -EINVAL; + } + + /* Free the previous allocation, if there was any */ + entry->blob_desc.size = 0; + vfree(entry->blob_desc.data); + + entry->blob_desc.data = vmalloc(size); + if (!entry->blob_desc.data) + return -ENOMEM; + + rc = hdev->asic_funcs->debugfs_read_dma(hdev, addr, size, + entry->blob_desc.data); + if (rc) { + dev_err(hdev->dev, "Failed to DMA from 0x%010llx\n", addr); + vfree(entry->blob_desc.data); + entry->blob_desc.data = NULL; + return -EIO; + } + + entry->blob_desc.size = size; + + return count; +} + static ssize_t hl_get_power_state(struct file *f, char __user *buf, size_t count, loff_t *ppos) { @@ -871,7 +1027,7 @@ static ssize_t hl_stop_on_err_write(struct file *f, const char __user *buf, hdev->stop_on_err = value ? 1 : 0; - hl_device_reset(hdev, false, false); + hl_device_reset(hdev, 0); return count; } @@ -899,6 +1055,11 @@ static const struct file_operations hl_data64b_fops = { .write = hl_data_write64 }; +static const struct file_operations hl_dma_size_fops = { + .owner = THIS_MODULE, + .write = hl_dma_size_write +}; + static const struct file_operations hl_i2c_data_fops = { .owner = THIS_MODULE, .read = hl_i2c_data_read, @@ -1001,6 +1162,9 @@ void hl_debugfs_add_device(struct hl_device *hdev) if (!dev_entry->entry_arr) return; + dev_entry->blob_desc.size = 0; + dev_entry->blob_desc.data = NULL; + INIT_LIST_HEAD(&dev_entry->file_list); INIT_LIST_HEAD(&dev_entry->cb_list); INIT_LIST_HEAD(&dev_entry->cs_list); @@ -1103,6 +1267,17 @@ void hl_debugfs_add_device(struct hl_device *hdev) dev_entry, &hl_security_violations_fops); + debugfs_create_file("dma_size", + 0200, + dev_entry->root, + dev_entry, + &hl_dma_size_fops); + + debugfs_create_blob("data_dma", + 0400, + dev_entry->root, + &dev_entry->blob_desc); + for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) { debugfs_create_file(hl_debugfs_list[i].name, 0444, @@ -1121,6 +1296,9 @@ void hl_debugfs_remove_device(struct hl_device *hdev) debugfs_remove_recursive(entry->root); mutex_destroy(&entry->file_mutex); + + vfree(entry->blob_desc.data); + kfree(entry->entry_arr); } diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 334009e83823..00e92b678828 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -70,6 +70,9 @@ static void hpriv_release(struct kref *ref) mutex_unlock(&hdev->fpriv_list_lock); kfree(hpriv); + + if (hdev->reset_upon_device_release) + hl_device_reset(hdev, 0); } void hl_hpriv_get(struct hl_fpriv *hpriv) @@ -77,9 +80,9 @@ void hl_hpriv_get(struct hl_fpriv *hpriv) kref_get(&hpriv->refcount); } -void hl_hpriv_put(struct hl_fpriv *hpriv) +int hl_hpriv_put(struct hl_fpriv *hpriv) { - kref_put(&hpriv->refcount, hpriv_release); + return kref_put(&hpriv->refcount, hpriv_release); } /* @@ -103,10 +106,17 @@ static int hl_device_release(struct inode *inode, struct file *filp) return 0; } - hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr); - hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr); + /* Each pending user interrupt holds the user's context, hence we + * must release them all before calling hl_ctx_mgr_fini(). + */ + hl_release_pending_user_interrupts(hpriv->hdev); + + hl_cb_mgr_fini(hdev, &hpriv->cb_mgr); + hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr); - hl_hpriv_put(hpriv); + if (!hl_hpriv_put(hpriv)) + dev_warn(hdev->dev, + "Device is still in use because there are live CS and/or memory mappings\n"); return 0; } @@ -283,7 +293,7 @@ static void device_hard_reset_pending(struct work_struct *work) struct hl_device *hdev = device_reset_work->hdev; int rc; - rc = hl_device_reset(hdev, true, true); + rc = hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_FROM_RESET_THREAD); if ((rc == -EBUSY) && !hdev->device_fini_pending) { dev_info(hdev->dev, "Could not reset device. will try again in %u seconds", @@ -311,11 +321,15 @@ static int device_early_init(struct hl_device *hdev) switch (hdev->asic_type) { case ASIC_GOYA: goya_set_asic_funcs(hdev); - strlcpy(hdev->asic_name, "GOYA", sizeof(hdev->asic_name)); + strscpy(hdev->asic_name, "GOYA", sizeof(hdev->asic_name)); break; case ASIC_GAUDI: gaudi_set_asic_funcs(hdev); - sprintf(hdev->asic_name, "GAUDI"); + strscpy(hdev->asic_name, "GAUDI", sizeof(hdev->asic_name)); + break; + case ASIC_GAUDI_SEC: + gaudi_set_asic_funcs(hdev); + strscpy(hdev->asic_name, "GAUDI SEC", sizeof(hdev->asic_name)); break; default: dev_err(hdev->dev, "Unrecognized ASIC type %d\n", @@ -334,7 +348,7 @@ static int device_early_init(struct hl_device *hdev) if (hdev->asic_prop.completion_queues_count) { hdev->cq_wq = kcalloc(hdev->asic_prop.completion_queues_count, sizeof(*hdev->cq_wq), - GFP_ATOMIC); + GFP_KERNEL); if (!hdev->cq_wq) { rc = -ENOMEM; goto asid_fini; @@ -358,24 +372,24 @@ static int device_early_init(struct hl_device *hdev) goto free_cq_wq; } - hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info), - GFP_KERNEL); - if (!hdev->hl_chip_info) { + hdev->sob_reset_wq = alloc_workqueue("hl-sob-reset", WQ_UNBOUND, 0); + if (!hdev->sob_reset_wq) { + dev_err(hdev->dev, + "Failed to allocate SOB reset workqueue\n"); rc = -ENOMEM; goto free_eq_wq; } - hdev->idle_busy_ts_arr = kmalloc_array(HL_IDLE_BUSY_TS_ARR_SIZE, - sizeof(struct hl_device_idle_busy_ts), - (GFP_KERNEL | __GFP_ZERO)); - if (!hdev->idle_busy_ts_arr) { + hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info), + GFP_KERNEL); + if (!hdev->hl_chip_info) { rc = -ENOMEM; - goto free_chip_info; + goto free_sob_reset_wq; } rc = hl_mmu_if_set_funcs(hdev); if (rc) - goto free_idle_busy_ts_arr; + goto free_chip_info; hl_cb_mgr_init(&hdev->kernel_cb_mgr); @@ -404,10 +418,10 @@ static int device_early_init(struct hl_device *hdev) free_cb_mgr: hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr); -free_idle_busy_ts_arr: - kfree(hdev->idle_busy_ts_arr); free_chip_info: kfree(hdev->hl_chip_info); +free_sob_reset_wq: + destroy_workqueue(hdev->sob_reset_wq); free_eq_wq: destroy_workqueue(hdev->eq_wq); free_cq_wq: @@ -441,9 +455,9 @@ static void device_early_fini(struct hl_device *hdev) hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr); - kfree(hdev->idle_busy_ts_arr); kfree(hdev->hl_chip_info); + destroy_workqueue(hdev->sob_reset_wq); destroy_workqueue(hdev->eq_wq); destroy_workqueue(hdev->device_reset_work.wq); @@ -485,7 +499,7 @@ static void hl_device_heartbeat(struct work_struct *work) goto reschedule; dev_err(hdev->dev, "Device heartbeat failed!\n"); - hl_device_reset(hdev, true, false); + hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_HEARTBEAT); return; @@ -561,100 +575,24 @@ static void device_late_fini(struct hl_device *hdev) hdev->late_init_done = false; } -uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms) +int hl_device_utilization(struct hl_device *hdev, u32 *utilization) { - struct hl_device_idle_busy_ts *ts; - ktime_t zero_ktime, curr = ktime_get(); - u32 overlap_cnt = 0, last_index = hdev->idle_busy_ts_idx; - s64 period_us, last_start_us, last_end_us, last_busy_time_us, - total_busy_time_us = 0, total_busy_time_ms; - - zero_ktime = ktime_set(0, 0); - period_us = period_ms * USEC_PER_MSEC; - ts = &hdev->idle_busy_ts_arr[last_index]; - - /* check case that device is currently in idle */ - if (!ktime_compare(ts->busy_to_idle_ts, zero_ktime) && - !ktime_compare(ts->idle_to_busy_ts, zero_ktime)) { - - last_index--; - /* Handle case idle_busy_ts_idx was 0 */ - if (last_index > HL_IDLE_BUSY_TS_ARR_SIZE) - last_index = HL_IDLE_BUSY_TS_ARR_SIZE - 1; - - ts = &hdev->idle_busy_ts_arr[last_index]; - } - - while (overlap_cnt < HL_IDLE_BUSY_TS_ARR_SIZE) { - /* Check if we are in last sample case. i.e. if the sample - * begun before the sampling period. This could be a real - * sample or 0 so need to handle both cases - */ - last_start_us = ktime_to_us( - ktime_sub(curr, ts->idle_to_busy_ts)); - - if (last_start_us > period_us) { - - /* First check two cases: - * 1. If the device is currently busy - * 2. If the device was idle during the whole sampling - * period - */ - - if (!ktime_compare(ts->busy_to_idle_ts, zero_ktime)) { - /* Check if the device is currently busy */ - if (ktime_compare(ts->idle_to_busy_ts, - zero_ktime)) - return 100; - - /* We either didn't have any activity or we - * reached an entry which is 0. Either way, - * exit and return what was accumulated so far - */ - break; - } - - /* If sample has finished, check it is relevant */ - last_end_us = ktime_to_us( - ktime_sub(curr, ts->busy_to_idle_ts)); - - if (last_end_us > period_us) - break; - - /* It is relevant so add it but with adjustment */ - last_busy_time_us = ktime_to_us( - ktime_sub(ts->busy_to_idle_ts, - ts->idle_to_busy_ts)); - total_busy_time_us += last_busy_time_us - - (last_start_us - period_us); - break; - } - - /* Check if the sample is finished or still open */ - if (ktime_compare(ts->busy_to_idle_ts, zero_ktime)) - last_busy_time_us = ktime_to_us( - ktime_sub(ts->busy_to_idle_ts, - ts->idle_to_busy_ts)); - else - last_busy_time_us = ktime_to_us( - ktime_sub(curr, ts->idle_to_busy_ts)); - - total_busy_time_us += last_busy_time_us; + u64 max_power, curr_power, dc_power, dividend; + int rc; - last_index--; - /* Handle case idle_busy_ts_idx was 0 */ - if (last_index > HL_IDLE_BUSY_TS_ARR_SIZE) - last_index = HL_IDLE_BUSY_TS_ARR_SIZE - 1; + max_power = hdev->asic_prop.max_power_default; + dc_power = hdev->asic_prop.dc_power_default; + rc = hl_fw_cpucp_power_get(hdev, &curr_power); - ts = &hdev->idle_busy_ts_arr[last_index]; + if (rc) + return rc; - overlap_cnt++; - } + curr_power = clamp(curr_power, dc_power, max_power); - total_busy_time_ms = DIV_ROUND_UP_ULL(total_busy_time_us, - USEC_PER_MSEC); + dividend = (curr_power - dc_power) * 100; + *utilization = (u32) div_u64(dividend, (max_power - dc_power)); - return DIV_ROUND_UP_ULL(total_busy_time_ms * 100, period_ms); + return 0; } /* @@ -809,7 +747,7 @@ int hl_device_resume(struct hl_device *hdev) hdev->disabled = false; atomic_set(&hdev->in_reset, 0); - rc = hl_device_reset(hdev, true, false); + rc = hl_device_reset(hdev, HL_RESET_HARD); if (rc) { dev_err(hdev->dev, "Failed to reset device during resume\n"); goto disable_device; @@ -915,9 +853,7 @@ static void device_disable_open_processes(struct hl_device *hdev) * hl_device_reset - reset the device * * @hdev: pointer to habanalabs device structure - * @hard_reset: should we do hard reset to all engines or just reset the - * compute/dma engines - * @from_hard_reset_thread: is the caller the hard-reset thread + * @flags: reset flags. * * Block future CS and wait for pending CS to be enqueued * Call ASIC H/W fini @@ -929,9 +865,10 @@ static void device_disable_open_processes(struct hl_device *hdev) * * Returns 0 for success or an error on failure. */ -int hl_device_reset(struct hl_device *hdev, bool hard_reset, - bool from_hard_reset_thread) +int hl_device_reset(struct hl_device *hdev, u32 flags) { + u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0}; + bool hard_reset, from_hard_reset_thread; int i, rc; if (!hdev->init_done) { @@ -940,6 +877,9 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset, return 0; } + hard_reset = (flags & HL_RESET_HARD) != 0; + from_hard_reset_thread = (flags & HL_RESET_FROM_RESET_THREAD) != 0; + if ((!hard_reset) && (!hdev->supports_soft_reset)) { dev_dbg(hdev->dev, "Doing hard-reset instead of soft-reset\n"); hard_reset = true; @@ -960,7 +900,11 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset, if (rc) return 0; - if (hard_reset) { + /* + * if reset is due to heartbeat, device CPU is no responsive in + * which case no point sending PCI disable message to it + */ + if (hard_reset && !(flags & HL_RESET_HEARTBEAT)) { /* Disable PCI access from device F/W so he won't send * us additional interrupts. We disable MSI/MSI-X at * the halt_engines function and we can't have the F/W @@ -1030,6 +974,11 @@ again: /* Go over all the queues, release all CS and their jobs */ hl_cs_rollback_all(hdev); + /* Release all pending user interrupts, each pending user interrupt + * holds a reference to user context + */ + hl_release_pending_user_interrupts(hdev); + kill_processes: if (hard_reset) { /* Kill processes here after CS rollback. This is because the @@ -1078,14 +1027,6 @@ kill_processes: for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) hl_cq_reset(hdev, &hdev->completion_queue[i]); - hdev->idle_busy_ts_idx = 0; - hdev->idle_busy_ts_arr[0].busy_to_idle_ts = ktime_set(0, 0); - hdev->idle_busy_ts_arr[0].idle_to_busy_ts = ktime_set(0, 0); - - if (hdev->cs_active_cnt) - dev_crit(hdev->dev, "CS active cnt %d is not 0 during reset\n", - hdev->cs_active_cnt); - mutex_lock(&hdev->fpriv_list_lock); /* Make sure the context switch phase will run again */ @@ -1151,6 +1092,16 @@ kill_processes: goto out_err; } + /* If device is not idle fail the reset process */ + if (!hdev->asic_funcs->is_device_idle(hdev, idle_mask, + HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)) { + dev_err(hdev->dev, + "device is not idle (mask %#llx %#llx) after reset\n", + idle_mask[0], idle_mask[1]); + rc = -EIO; + goto out_err; + } + /* Check that the communication with the device is working */ rc = hdev->asic_funcs->test_queues(hdev); if (rc) { @@ -1235,7 +1186,7 @@ out_err: */ int hl_device_init(struct hl_device *hdev, struct class *hclass) { - int i, rc, cq_cnt, cq_ready_cnt; + int i, rc, cq_cnt, user_interrupt_cnt, cq_ready_cnt; char *name; bool add_cdev_sysfs_on_err = false; @@ -1274,13 +1225,26 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) if (rc) goto free_dev_ctrl; + user_interrupt_cnt = hdev->asic_prop.user_interrupt_count; + + if (user_interrupt_cnt) { + hdev->user_interrupt = kcalloc(user_interrupt_cnt, + sizeof(*hdev->user_interrupt), + GFP_KERNEL); + + if (!hdev->user_interrupt) { + rc = -ENOMEM; + goto early_fini; + } + } + /* * Start calling ASIC initialization. First S/W then H/W and finally * late init */ rc = hdev->asic_funcs->sw_init(hdev); if (rc) - goto early_fini; + goto user_interrupts_fini; /* * Initialize the H/W queues. Must be done before hw_init, because @@ -1478,6 +1442,8 @@ hw_queues_destroy: hl_hw_queues_destroy(hdev); sw_fini: hdev->asic_funcs->sw_fini(hdev); +user_interrupts_fini: + kfree(hdev->user_interrupt); early_fini: device_early_fini(hdev); free_dev_ctrl: @@ -1609,6 +1575,7 @@ void hl_device_fini(struct hl_device *hdev) for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) hl_cq_fini(hdev, &hdev->completion_queue[i]); kfree(hdev->completion_queue); + kfree(hdev->user_interrupt); hl_hw_queues_destroy(hdev); diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 09706c571e95..832dd5c5bb06 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -293,6 +293,7 @@ static int fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg, u32 cpu_security_boot_status_reg) { u32 err_val, security_val; + bool err_exists = false; /* Some of the firmware status codes are deprecated in newer f/w * versions. In those versions, the errors are reported @@ -307,48 +308,102 @@ static int fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg, if (!(err_val & CPU_BOOT_ERR0_ENABLED)) return 0; - if (err_val & CPU_BOOT_ERR0_DRAM_INIT_FAIL) + if (err_val & CPU_BOOT_ERR0_DRAM_INIT_FAIL) { dev_err(hdev->dev, "Device boot error - DRAM initialization failed\n"); - if (err_val & CPU_BOOT_ERR0_FIT_CORRUPTED) + err_exists = true; + } + + if (err_val & CPU_BOOT_ERR0_FIT_CORRUPTED) { dev_err(hdev->dev, "Device boot error - FIT image corrupted\n"); - if (err_val & CPU_BOOT_ERR0_TS_INIT_FAIL) + err_exists = true; + } + + if (err_val & CPU_BOOT_ERR0_TS_INIT_FAIL) { dev_err(hdev->dev, "Device boot error - Thermal Sensor initialization failed\n"); - if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED) + err_exists = true; + } + + if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED) { dev_warn(hdev->dev, "Device boot warning - Skipped DRAM initialization\n"); + /* This is a warning so we don't want it to disable the + * device + */ + err_val &= ~CPU_BOOT_ERR0_DRAM_SKIPPED; + } if (err_val & CPU_BOOT_ERR0_BMC_WAIT_SKIPPED) { - if (hdev->bmc_enable) - dev_warn(hdev->dev, + if (hdev->bmc_enable) { + dev_err(hdev->dev, "Device boot error - Skipped waiting for BMC\n"); - else + err_exists = true; + } else { + dev_info(hdev->dev, + "Device boot message - Skipped waiting for BMC\n"); + /* This is an info so we don't want it to disable the + * device + */ err_val &= ~CPU_BOOT_ERR0_BMC_WAIT_SKIPPED; + } } - if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY) + if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY) { dev_err(hdev->dev, "Device boot error - Serdes data from BMC not available\n"); - if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL) + err_exists = true; + } + + if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL) { dev_err(hdev->dev, "Device boot error - NIC F/W initialization failed\n"); - if (err_val & CPU_BOOT_ERR0_SECURITY_NOT_RDY) + err_exists = true; + } + + if (err_val & CPU_BOOT_ERR0_SECURITY_NOT_RDY) { dev_warn(hdev->dev, "Device boot warning - security not ready\n"); - if (err_val & CPU_BOOT_ERR0_SECURITY_FAIL) + /* This is a warning so we don't want it to disable the + * device + */ + err_val &= ~CPU_BOOT_ERR0_SECURITY_NOT_RDY; + } + + if (err_val & CPU_BOOT_ERR0_SECURITY_FAIL) { dev_err(hdev->dev, "Device boot error - security failure\n"); - if (err_val & CPU_BOOT_ERR0_EFUSE_FAIL) + err_exists = true; + } + + if (err_val & CPU_BOOT_ERR0_EFUSE_FAIL) { dev_err(hdev->dev, "Device boot error - eFuse failure\n"); - if (err_val & CPU_BOOT_ERR0_PLL_FAIL) + err_exists = true; + } + + if (err_val & CPU_BOOT_ERR0_PLL_FAIL) { dev_err(hdev->dev, "Device boot error - PLL failure\n"); + err_exists = true; + } + + if (err_val & CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL) { + dev_err(hdev->dev, + "Device boot error - device unusable\n"); + err_exists = true; + } security_val = RREG32(cpu_security_boot_status_reg); if (security_val & CPU_BOOT_DEV_STS0_ENABLED) dev_dbg(hdev->dev, "Device security status %#x\n", security_val); - if (err_val & ~CPU_BOOT_ERR0_ENABLED) + if (!err_exists && (err_val & ~CPU_BOOT_ERR0_ENABLED)) { + dev_err(hdev->dev, + "Device boot error - unknown error 0x%08x\n", + err_val); + err_exists = true; + } + + if (err_exists) return -EIO; return 0; @@ -419,6 +474,73 @@ out: return rc; } +static int hl_fw_send_msi_info_msg(struct hl_device *hdev) +{ + struct cpucp_array_data_packet *pkt; + size_t total_pkt_size, data_size; + u64 result; + int rc; + + /* skip sending this info for unsupported ASICs */ + if (!hdev->asic_funcs->get_msi_info) + return 0; + + data_size = CPUCP_NUM_OF_MSI_TYPES * sizeof(u32); + total_pkt_size = sizeof(struct cpucp_array_data_packet) + data_size; + + /* data should be aligned to 8 bytes in order to CPU-CP to copy it */ + total_pkt_size = (total_pkt_size + 0x7) & ~0x7; + + /* total_pkt_size is casted to u16 later on */ + if (total_pkt_size > USHRT_MAX) { + dev_err(hdev->dev, "CPUCP array data is too big\n"); + return -EINVAL; + } + + pkt = kzalloc(total_pkt_size, GFP_KERNEL); + if (!pkt) + return -ENOMEM; + + pkt->length = cpu_to_le32(CPUCP_NUM_OF_MSI_TYPES); + + hdev->asic_funcs->get_msi_info((u32 *)&pkt->data); + + pkt->cpucp_pkt.ctl = cpu_to_le32(CPUCP_PACKET_MSI_INFO_SET << + CPUCP_PKT_CTL_OPCODE_SHIFT); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *)pkt, + total_pkt_size, 0, &result); + + /* + * in case packet result is invalid it means that FW does not support + * this feature and will use default/hard coded MSI values. no reason + * to stop the boot + */ + if (rc && result == cpucp_packet_invalid) + rc = 0; + + if (rc) + dev_err(hdev->dev, "failed to send CPUCP array data\n"); + + kfree(pkt); + + return rc; +} + +int hl_fw_cpucp_handshake(struct hl_device *hdev, + u32 cpu_security_boot_status_reg, + u32 boot_err0_reg) +{ + int rc; + + rc = hl_fw_cpucp_info_get(hdev, cpu_security_boot_status_reg, + boot_err0_reg); + if (rc) + return rc; + + return hl_fw_send_msi_info_msg(hdev); +} + int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size) { struct cpucp_packet pkt = {}; @@ -539,18 +661,63 @@ int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy) return rc; } -int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 pll_index, +int get_used_pll_index(struct hl_device *hdev, enum pll_index input_pll_index, + enum pll_index *pll_index) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + u8 pll_byte, pll_bit_off; + bool dynamic_pll; + + if (input_pll_index >= PLL_MAX) { + dev_err(hdev->dev, "PLL index %d is out of range\n", + input_pll_index); + return -EINVAL; + } + + dynamic_pll = prop->fw_security_status_valid && + (prop->fw_app_security_map & CPU_BOOT_DEV_STS0_DYN_PLL_EN); + + if (!dynamic_pll) { + /* + * in case we are working with legacy FW (each asic has unique + * PLL numbering) extract the legacy numbering + */ + *pll_index = hdev->legacy_pll_map[input_pll_index]; + return 0; + } + + /* PLL map is a u8 array */ + pll_byte = prop->cpucp_info.pll_map[input_pll_index >> 3]; + pll_bit_off = input_pll_index & 0x7; + + if (!(pll_byte & BIT(pll_bit_off))) { + dev_err(hdev->dev, "PLL index %d is not supported\n", + input_pll_index); + return -EINVAL; + } + + *pll_index = input_pll_index; + + return 0; +} + +int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, enum pll_index pll_index, u16 *pll_freq_arr) { struct cpucp_packet pkt; + enum pll_index used_pll_idx; u64 result; int rc; + rc = get_used_pll_index(hdev, pll_index, &used_pll_idx); + if (rc) + return rc; + memset(&pkt, 0, sizeof(pkt)); pkt.ctl = cpu_to_le32(CPUCP_PACKET_PLL_INFO_GET << CPUCP_PKT_CTL_OPCODE_SHIFT); - pkt.pll_type = __cpu_to_le16(pll_index); + pkt.pll_type = __cpu_to_le16((u16)used_pll_idx); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), HL_CPUCP_INFO_TIMEOUT_USEC, &result); @@ -565,6 +732,29 @@ int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 pll_index, return rc; } +int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power) +{ + struct cpucp_packet pkt; + u64 result; + int rc; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(CPUCP_PACKET_POWER_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + HL_CPUCP_INFO_TIMEOUT_USEC, &result); + if (rc) { + dev_err(hdev->dev, "Failed to read power, error %d\n", rc); + return rc; + } + + *power = result; + + return rc; +} + static void detect_cpu_boot_status(struct hl_device *hdev, u32 status) { /* Some of the status codes below are deprecated in newer f/w @@ -623,7 +813,11 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg, u32 status, security_status; int rc; - if (!hdev->cpu_enable) + /* pldm was added for cases in which we use preboot on pldm and want + * to load boot fit, but we can't wait for preboot because it runs + * very slowly + */ + if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU) || hdev->pldm) return 0; /* Need to check two possible scenarios: @@ -677,16 +871,16 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg, if (security_status & CPU_BOOT_DEV_STS0_ENABLED) { prop->fw_security_status_valid = 1; + /* FW security should be derived from PCI ID, we keep this + * check for backward compatibility + */ if (security_status & CPU_BOOT_DEV_STS0_SECURITY_EN) prop->fw_security_disabled = false; - else - prop->fw_security_disabled = true; if (security_status & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN) prop->hard_reset_done_by_fw = true; } else { prop->fw_security_status_valid = 0; - prop->fw_security_disabled = true; } dev_dbg(hdev->dev, "Firmware preboot security status %#x\n", @@ -710,7 +904,7 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, u32 status; int rc; - if (!(hdev->fw_loading & FW_TYPE_BOOT_CPU)) + if (!(hdev->fw_components & FW_TYPE_BOOT_CPU)) return 0; dev_info(hdev->dev, "Going to wait for device boot (up to %lds)\n", @@ -801,7 +995,7 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, goto out; } - if (!(hdev->fw_loading & FW_TYPE_LINUX)) { + if (!(hdev->fw_components & FW_TYPE_LINUX)) { dev_info(hdev->dev, "Skip loading Linux F/W\n"); goto out; } diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 4b321e4f8059..44e89da30b4a 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -19,6 +19,7 @@ #include <linux/dma-direction.h> #include <linux/scatterlist.h> #include <linux/hashtable.h> +#include <linux/debugfs.h> #include <linux/bitfield.h> #include <linux/genalloc.h> #include <linux/sched/signal.h> @@ -61,7 +62,7 @@ #define HL_SIM_MAX_TIMEOUT_US 10000000 /* 10s */ -#define HL_IDLE_BUSY_TS_ARR_SIZE 4096 +#define HL_COMMON_USER_INTERRUPT_ID 0xFFF /* Memory */ #define MEM_HASH_TABLE_BITS 7 /* 1 << 7 buckets */ @@ -102,6 +103,23 @@ enum hl_mmu_page_table_location { #define HL_MAX_DCORES 4 +/* + * Reset Flags + * + * - HL_RESET_HARD + * If set do hard reset to all engines. If not set reset just + * compute/DMA engines. + * + * - HL_RESET_FROM_RESET_THREAD + * Set if the caller is the hard-reset thread + * + * - HL_RESET_HEARTBEAT + * Set if reset is due to heartbeat + */ +#define HL_RESET_HARD (1 << 0) +#define HL_RESET_FROM_RESET_THREAD (1 << 1) +#define HL_RESET_HEARTBEAT (1 << 2) + #define HL_MAX_SOBS_PER_MONITOR 8 /** @@ -169,15 +187,19 @@ enum hl_fw_component { }; /** - * enum hl_fw_types - F/W types to load + * enum hl_fw_types - F/W types present in the system * @FW_TYPE_LINUX: Linux image for device CPU * @FW_TYPE_BOOT_CPU: Boot image for device CPU + * @FW_TYPE_PREBOOT_CPU: Indicates pre-loaded CPUs are present in the system + * (preboot, ppboot etc...) * @FW_TYPE_ALL_TYPES: Mask for all types */ enum hl_fw_types { FW_TYPE_LINUX = 0x1, FW_TYPE_BOOT_CPU = 0x2, - FW_TYPE_ALL_TYPES = (FW_TYPE_LINUX | FW_TYPE_BOOT_CPU) + FW_TYPE_PREBOOT_CPU = 0x4, + FW_TYPE_ALL_TYPES = + (FW_TYPE_LINUX | FW_TYPE_BOOT_CPU | FW_TYPE_PREBOOT_CPU) }; /** @@ -368,6 +390,7 @@ struct hl_mmu_properties { * @dram_size: DRAM total size. * @dram_pci_bar_size: size of PCI bar towards DRAM. * @max_power_default: max power of the device after reset + * @dc_power_default: power consumed by the device in mode idle. * @dram_size_for_default_page_mapping: DRAM size needed to map to avoid page * fault. * @pcie_dbi_base_address: Base address of the PCIE_DBI block. @@ -412,6 +435,7 @@ struct hl_mmu_properties { * @first_available_user_msix_interrupt: first available msix interrupt * reserved for the user * @first_available_cq: first available CQ for the user. + * @user_interrupt_count: number of user interrupts. * @tpc_enabled_mask: which TPCs are enabled. * @completion_queues_count: number of completion queues. * @fw_security_disabled: true if security measures are disabled in firmware, @@ -421,6 +445,7 @@ struct hl_mmu_properties { * @dram_supports_virtual_memory: is there an MMU towards the DRAM * @hard_reset_done_by_fw: true if firmware is handling hard reset flow * @num_functional_hbms: number of functional HBMs in each DCORE. + * @iatu_done_by_fw: true if iATU configuration is being done by FW. */ struct asic_fixed_properties { struct hw_queue_properties *hw_queues_props; @@ -439,6 +464,7 @@ struct asic_fixed_properties { u64 dram_size; u64 dram_pci_bar_size; u64 max_power_default; + u64 dc_power_default; u64 dram_size_for_default_page_mapping; u64 pcie_dbi_base_address; u64 pcie_aux_dbi_reg_addr; @@ -475,6 +501,7 @@ struct asic_fixed_properties { u16 first_available_user_mon[HL_MAX_DCORES]; u16 first_available_user_msix_interrupt; u16 first_available_cq[HL_MAX_DCORES]; + u16 user_interrupt_count; u8 tpc_enabled_mask; u8 completion_queues_count; u8 fw_security_disabled; @@ -482,6 +509,7 @@ struct asic_fixed_properties { u8 dram_supports_virtual_memory; u8 hard_reset_done_by_fw; u8 num_functional_hbms; + u8 iatu_done_by_fw; }; /** @@ -503,6 +531,7 @@ struct hl_fence { /** * struct hl_cs_compl - command submission completion object. + * @sob_reset_work: workqueue object to run SOB reset flow. * @base_fence: hl fence object. * @lock: spinlock to protect fence. * @hdev: habanalabs device structure. @@ -513,6 +542,7 @@ struct hl_fence { * @sob_group: the SOB group that is used in this collective wait CS. */ struct hl_cs_compl { + struct work_struct sob_reset_work; struct hl_fence base_fence; spinlock_t lock; struct hl_device *hdev; @@ -690,6 +720,31 @@ struct hl_cq { }; /** + * struct hl_user_interrupt - holds user interrupt information + * @hdev: pointer to the device structure + * @wait_list_head: head to the list of user threads pending on this interrupt + * @wait_list_lock: protects wait_list_head + * @interrupt_id: msix interrupt id + */ +struct hl_user_interrupt { + struct hl_device *hdev; + struct list_head wait_list_head; + spinlock_t wait_list_lock; + u32 interrupt_id; +}; + +/** + * struct hl_user_pending_interrupt - holds a context to a user thread + * pending on an interrupt + * @wait_list_node: node in the list of user threads pending on an interrupt + * @fence: hl fence object for interrupt completion + */ +struct hl_user_pending_interrupt { + struct list_head wait_list_node; + struct hl_fence fence; +}; + +/** * struct hl_eq - describes the event queue (single one per device) * @hdev: pointer to the device structure * @kernel_address: holds the queue's kernel virtual address @@ -713,11 +768,13 @@ struct hl_eq { * @ASIC_INVALID: Invalid ASIC type. * @ASIC_GOYA: Goya device. * @ASIC_GAUDI: Gaudi device. + * @ASIC_GAUDI_SEC: Gaudi secured device (HL-2000). */ enum hl_asic_type { ASIC_INVALID, ASIC_GOYA, - ASIC_GAUDI + ASIC_GAUDI, + ASIC_GAUDI_SEC }; struct hl_cs_parser; @@ -802,8 +859,12 @@ enum div_select_defs { * @update_eq_ci: update event queue CI. * @context_switch: called upon ASID context switch. * @restore_phase_topology: clear all SOBs amd MONs. - * @debugfs_read32: debug interface for reading u32 from DRAM/SRAM. - * @debugfs_write32: debug interface for writing u32 to DRAM/SRAM. + * @debugfs_read32: debug interface for reading u32 from DRAM/SRAM/Host memory. + * @debugfs_write32: debug interface for writing u32 to DRAM/SRAM/Host memory. + * @debugfs_read64: debug interface for reading u64 from DRAM/SRAM/Host memory. + * @debugfs_write64: debug interface for writing u64 to DRAM/SRAM/Host memory. + * @debugfs_read_dma: debug interface for reading up to 2MB from the device's + * internal memory via DMA engine. * @add_device_attr: add ASIC specific device attributes. * @handle_eqe: handle event queue entry (IRQ) from CPU-CP. * @set_pll_profile: change PLL profile (manual/automatic). @@ -919,10 +980,16 @@ struct hl_asic_funcs { void (*update_eq_ci)(struct hl_device *hdev, u32 val); int (*context_switch)(struct hl_device *hdev, u32 asid); void (*restore_phase_topology)(struct hl_device *hdev); - int (*debugfs_read32)(struct hl_device *hdev, u64 addr, u32 *val); - int (*debugfs_write32)(struct hl_device *hdev, u64 addr, u32 val); - int (*debugfs_read64)(struct hl_device *hdev, u64 addr, u64 *val); - int (*debugfs_write64)(struct hl_device *hdev, u64 addr, u64 val); + int (*debugfs_read32)(struct hl_device *hdev, u64 addr, + bool user_address, u32 *val); + int (*debugfs_write32)(struct hl_device *hdev, u64 addr, + bool user_address, u32 val); + int (*debugfs_read64)(struct hl_device *hdev, u64 addr, + bool user_address, u64 *val); + int (*debugfs_write64)(struct hl_device *hdev, u64 addr, + bool user_address, u64 val); + int (*debugfs_read_dma)(struct hl_device *hdev, u64 addr, u32 size, + void *blob_addr); void (*add_device_attr)(struct hl_device *hdev, struct attribute_group *dev_attr_grp); void (*handle_eqe)(struct hl_device *hdev, @@ -986,6 +1053,7 @@ struct hl_asic_funcs { int (*hw_block_mmap)(struct hl_device *hdev, struct vm_area_struct *vma, u32 block_id, u32 block_size); void (*enable_events_from_fw)(struct hl_device *hdev); + void (*get_msi_info)(u32 *table); }; @@ -1070,9 +1138,11 @@ struct hl_pending_cb { * @mem_hash_lock: protects the mem_hash. * @mmu_lock: protects the MMU page tables. Any change to the PGT, modifying the * MMU hash or walking the PGT requires talking this lock. + * @hw_block_list_lock: protects the HW block memory list. * @debugfs_list: node in debugfs list of contexts. * pending_cb_list: list of pending command buffers waiting to be sent upon * next user command submission context. + * @hw_block_mem_list: list of HW block virtual mapped addresses. * @cs_counters: context command submission counters. * @cb_va_pool: device VA pool for command buffers which are mapped to the * device's MMU. @@ -1109,8 +1179,10 @@ struct hl_ctx { struct hl_va_range *va_range[HL_VA_RANGE_TYPE_MAX]; struct mutex mem_hash_lock; struct mutex mmu_lock; + struct mutex hw_block_list_lock; struct list_head debugfs_list; struct list_head pending_cb_list; + struct list_head hw_block_mem_list; struct hl_cs_counters_atomic cs_counters; struct gen_pool *cb_va_pool; u64 cs_sequence; @@ -1185,6 +1257,7 @@ struct hl_userptr { * @sequence: the sequence number of this CS. * @staged_sequence: the sequence of the staged submission this CS is part of, * relevant only if staged_cs is set. + * @timeout_jiffies: cs timeout in jiffies. * @type: CS_TYPE_*. * @submitted: true if CS was submitted to H/W. * @completed: true if CS was completed by device. @@ -1213,6 +1286,7 @@ struct hl_cs { struct list_head debugfs_list; u64 sequence; u64 staged_sequence; + u64 timeout_jiffies; enum hl_cs_type type; u8 submitted; u8 completed; @@ -1330,6 +1404,23 @@ struct hl_vm_hash_node { }; /** + * struct hl_vm_hw_block_list_node - list element from user virtual address to + * HW block id. + * @node: node to hang on the list in context object. + * @ctx: the context this node belongs to. + * @vaddr: virtual address of the HW block. + * @size: size of the block. + * @id: HW block id (handle). + */ +struct hl_vm_hw_block_list_node { + struct list_head node; + struct hl_ctx *ctx; + unsigned long vaddr; + u32 size; + u32 id; +}; + +/** * struct hl_vm_phys_pg_pack - physical page pack. * @vm_type: describes the type of the virtual area descriptor. * @pages: the physical page array. @@ -1490,12 +1581,13 @@ struct hl_debugfs_entry { * @userptr_spinlock: protects userptr_list. * @ctx_mem_hash_list: list of available contexts with MMU mappings. * @ctx_mem_hash_spinlock: protects cb_list. + * @blob_desc: descriptor of blob * @addr: next address to read/write from/to in read/write32. * @mmu_addr: next virtual address to translate to physical address in mmu_show. * @mmu_asid: ASID to use while translating in mmu_show. * @i2c_bus: generic u8 debugfs file for bus value to use in i2c_data_read. - * @i2c_bus: generic u8 debugfs file for address value to use in i2c_data_read. - * @i2c_bus: generic u8 debugfs file for register value to use in i2c_data_read. + * @i2c_addr: generic u8 debugfs file for address value to use in i2c_data_read. + * @i2c_reg: generic u8 debugfs file for register value to use in i2c_data_read. */ struct hl_dbg_device_entry { struct dentry *root; @@ -1513,6 +1605,7 @@ struct hl_dbg_device_entry { spinlock_t userptr_spinlock; struct list_head ctx_mem_hash_list; spinlock_t ctx_mem_hash_spinlock; + struct debugfs_blob_wrapper blob_desc; u64 addr; u64 mmu_addr; u32 mmu_asid; @@ -1684,16 +1777,6 @@ struct hl_device_reset_work { }; /** - * struct hl_device_idle_busy_ts - used for calculating device utilization rate. - * @idle_to_busy_ts: timestamp where device changed from idle to busy. - * @busy_to_idle_ts: timestamp where device changed from busy to idle. - */ -struct hl_device_idle_busy_ts { - ktime_t idle_to_busy_ts; - ktime_t busy_to_idle_ts; -}; - -/** * struct hr_mmu_hop_addrs - used for holding per-device host-resident mmu hop * information. * @virt_addr: the virtual address of the hop. @@ -1821,9 +1904,16 @@ struct hl_mmu_funcs { * @asic_name: ASIC specific name. * @asic_type: ASIC specific type. * @completion_queue: array of hl_cq. + * @user_interrupt: array of hl_user_interrupt. upon the corresponding user + * interrupt, driver will monitor the list of fences + * registered to this interrupt. + * @common_user_interrupt: common user interrupt for all user interrupts. + * upon any user interrupt, driver will monitor the + * list of fences registered to this common structure. * @cq_wq: work queues of completion queues for executing work in process * context. * @eq_wq: work queue of event queue for executing work in process context. + * @sob_reset_wq: work queue for sob reset executions. * @kernel_ctx: Kernel driver context structure. * @kernel_queues: array of hl_hw_queue. * @cs_mirror_list: CS mirror list for TDR. @@ -1857,11 +1947,11 @@ struct hl_mmu_funcs { * when a user opens the device * @fpriv_list_lock: protects the fpriv_list * @compute_ctx: current compute context executing. - * @idle_busy_ts_arr: array to hold time stamps of transitions from idle to busy - * and vice-versa * @aggregated_cs_counters: aggregated cs counters among all contexts * @mmu_priv: device-specific MMU data. * @mmu_func: device-related MMU functions. + * @legacy_pll_map: map holding map between dynamic (common) PLL indexes and + * static (asic specific) PLL indexes. * @dram_used_mem: current DRAM memory consumption. * @timeout_jiffies: device CS timeout value. * @max_power: the max power of the device, as configured by the sysadmin. This @@ -1874,13 +1964,10 @@ struct hl_mmu_funcs { * @curr_pll_profile: current PLL profile. * @card_type: Various ASICs have several card types. This indicates the card * type of the current device. - * @cs_active_cnt: number of active command submissions on this device (active - * means already in H/W queues) * @major: habanalabs kernel driver major. * @high_pll: high PLL profile frequency. * @soft_reset_cnt: number of soft reset since the driver was loaded. * @hard_reset_cnt: number of hard reset since the driver was loaded. - * @idle_busy_ts_idx: index of current entry in idle_busy_ts_arr * @clk_throttling_reason: bitmask represents the current clk throttling reasons * @id: device minor. * @id_control: minor of the control device @@ -1937,8 +2024,11 @@ struct hl_device { char status[HL_DEV_STS_MAX][HL_STR_MAX]; enum hl_asic_type asic_type; struct hl_cq *completion_queue; + struct hl_user_interrupt *user_interrupt; + struct hl_user_interrupt common_user_interrupt; struct workqueue_struct **cq_wq; struct workqueue_struct *eq_wq; + struct workqueue_struct *sob_reset_wq; struct hl_ctx *kernel_ctx; struct hl_hw_queue *kernel_queues; struct list_head cs_mirror_list; @@ -1976,13 +2066,13 @@ struct hl_device { struct hl_ctx *compute_ctx; - struct hl_device_idle_busy_ts *idle_busy_ts_arr; - struct hl_cs_counters_atomic aggregated_cs_counters; struct hl_mmu_priv mmu_priv; struct hl_mmu_funcs mmu_func[MMU_NUM_PGT_LOCATIONS]; + enum pll_index *legacy_pll_map; + atomic64_t dram_used_mem; u64 timeout_jiffies; u64 max_power; @@ -1990,12 +2080,10 @@ struct hl_device { atomic_t in_reset; enum hl_pll_frequency curr_pll_profile; enum cpucp_card_types card_type; - int cs_active_cnt; u32 major; u32 high_pll; u32 soft_reset_cnt; u32 hard_reset_cnt; - u32 idle_busy_ts_idx; u32 clk_throttling_reason; u16 id; u16 id_control; @@ -2029,10 +2117,9 @@ struct hl_device { /* Parameters for bring-up */ u64 nic_ports_mask; - u64 fw_loading; + u64 fw_components; u8 mmu_enable; u8 mmu_huge_page_opt; - u8 cpu_enable; u8 reset_pcilink; u8 cpu_queues_enable; u8 pldm; @@ -2043,6 +2130,7 @@ struct hl_device { u8 bmc_enable; u8 rl_enable; u8 reset_on_preboot_fail; + u8 reset_upon_device_release; }; @@ -2157,6 +2245,8 @@ void hl_cq_reset(struct hl_device *hdev, struct hl_cq *q); void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q); irqreturn_t hl_irq_handler_cq(int irq, void *arg); irqreturn_t hl_irq_handler_eq(int irq, void *arg); +irqreturn_t hl_irq_handler_user_cq(int irq, void *arg); +irqreturn_t hl_irq_handler_default(int irq, void *arg); u32 hl_cq_inc_ptr(u32 ptr); int hl_asid_init(struct hl_device *hdev); @@ -2178,12 +2268,11 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass); void hl_device_fini(struct hl_device *hdev); int hl_device_suspend(struct hl_device *hdev); int hl_device_resume(struct hl_device *hdev); -int hl_device_reset(struct hl_device *hdev, bool hard_reset, - bool from_hard_reset_thread); +int hl_device_reset(struct hl_device *hdev, u32 flags); void hl_hpriv_get(struct hl_fpriv *hpriv); -void hl_hpriv_put(struct hl_fpriv *hpriv); +int hl_hpriv_put(struct hl_fpriv *hpriv); int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq); -uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms); +int hl_device_utilization(struct hl_device *hdev, u32 *utilization); int hl_build_hwmon_channel_info(struct hl_device *hdev, struct cpucp_sensor *sensors_arr); @@ -2235,6 +2324,9 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx); int hl_vm_init(struct hl_device *hdev); void hl_vm_fini(struct hl_device *hdev); +void hl_hw_block_mem_init(struct hl_ctx *ctx); +void hl_hw_block_mem_fini(struct hl_ctx *ctx); + u64 hl_reserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx, enum hl_va_range_type type, u32 size, u32 alignment); int hl_unreserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx, @@ -2287,13 +2379,19 @@ int hl_fw_send_heartbeat(struct hl_device *hdev); int hl_fw_cpucp_info_get(struct hl_device *hdev, u32 cpu_security_boot_status_reg, u32 boot_err0_reg); +int hl_fw_cpucp_handshake(struct hl_device *hdev, + u32 cpu_security_boot_status_reg, + u32 boot_err0_reg); int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size); int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev, struct hl_info_pci_counters *counters); int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy); -int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 pll_index, +int get_used_pll_index(struct hl_device *hdev, enum pll_index input_pll_index, + enum pll_index *pll_index); +int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, enum pll_index pll_index, u16 *pll_freq_arr); +int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power); int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, u32 msg_to_cpu_reg, u32 cpu_msg_status_reg, u32 cpu_security_boot_status_reg, u32 boot_err0_reg, @@ -2304,6 +2402,7 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg, int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3], bool is_wc[3]); +int hl_pci_elbi_read(struct hl_device *hdev, u64 addr, u32 *data); int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data); int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region, struct hl_inbound_pci_region *pci_region); @@ -2312,8 +2411,10 @@ int hl_pci_set_outbound_region(struct hl_device *hdev, int hl_pci_init(struct hl_device *hdev); void hl_pci_fini(struct hl_device *hdev); -long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr); -void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq); +long hl_get_frequency(struct hl_device *hdev, enum pll_index pll_index, + bool curr); +void hl_set_frequency(struct hl_device *hdev, enum pll_index pll_index, + u64 freq); int hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr, long *value); int hl_set_temperature(struct hl_device *hdev, @@ -2334,6 +2435,7 @@ int hl_set_voltage(struct hl_device *hdev, int sensor_index, u32 attr, long value); int hl_set_current(struct hl_device *hdev, int sensor_index, u32 attr, long value); +void hl_release_pending_user_interrupts(struct hl_device *hdev); #ifdef CONFIG_DEBUG_FS @@ -2434,7 +2536,7 @@ long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg); int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data); int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data); -int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data); +int hl_wait_ioctl(struct hl_fpriv *hpriv, void *data); int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data); #endif /* HABANALABSP_H_ */ diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c index 032d114f01ea..7135f1e03864 100644 --- a/drivers/misc/habanalabs/common/habanalabs_drv.c +++ b/drivers/misc/habanalabs/common/habanalabs_drv.c @@ -27,13 +27,13 @@ static struct class *hl_class; static DEFINE_IDR(hl_devs_idr); static DEFINE_MUTEX(hl_devs_idr_lock); -static int timeout_locked = 5; +static int timeout_locked = 30; static int reset_on_lockup = 1; static int memory_scrub = 1; module_param(timeout_locked, int, 0444); MODULE_PARM_DESC(timeout_locked, - "Device lockup timeout in seconds (0 = disabled, default 5s)"); + "Device lockup timeout in seconds (0 = disabled, default 30s)"); module_param(reset_on_lockup, int, 0444); MODULE_PARM_DESC(reset_on_lockup, @@ -47,10 +47,12 @@ MODULE_PARM_DESC(memory_scrub, #define PCI_IDS_GOYA 0x0001 #define PCI_IDS_GAUDI 0x1000 +#define PCI_IDS_GAUDI_SEC 0x1010 static const struct pci_device_id ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GOYA), }, { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI), }, + { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI_SEC), }, { 0, } }; MODULE_DEVICE_TABLE(pci, ids); @@ -74,6 +76,9 @@ static enum hl_asic_type get_asic_type(u16 device) case PCI_IDS_GAUDI: asic_type = ASIC_GAUDI; break; + case PCI_IDS_GAUDI_SEC: + asic_type = ASIC_GAUDI_SEC; + break; default: asic_type = ASIC_INVALID; break; @@ -82,6 +87,16 @@ static enum hl_asic_type get_asic_type(u16 device) return asic_type; } +static bool is_asic_secured(enum hl_asic_type asic_type) +{ + switch (asic_type) { + case ASIC_GAUDI_SEC: + return true; + default: + return false; + } +} + /* * hl_device_open - open function for habanalabs device * @@ -234,8 +249,7 @@ out_err: static void set_driver_behavior_per_device(struct hl_device *hdev) { - hdev->cpu_enable = 1; - hdev->fw_loading = FW_TYPE_ALL_TYPES; + hdev->fw_components = FW_TYPE_ALL_TYPES; hdev->cpu_queues_enable = 1; hdev->heartbeat = 1; hdev->mmu_enable = 1; @@ -288,6 +302,12 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev, hdev->asic_type = asic_type; } + if (pdev) + hdev->asic_prop.fw_security_disabled = + !is_asic_secured(pdev->device); + else + hdev->asic_prop.fw_security_disabled = true; + /* Assign status description string */ strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION], "disabled", HL_STR_MAX); diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index 083a30969c5f..33841c272eb6 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -226,19 +226,14 @@ static int device_utilization(struct hl_device *hdev, struct hl_info_args *args) struct hl_info_device_utilization device_util = {0}; u32 max_size = args->return_size; void __user *out = (void __user *) (uintptr_t) args->return_pointer; + int rc; if ((!max_size) || (!out)) return -EINVAL; - if ((args->period_ms < 100) || (args->period_ms > 1000) || - (args->period_ms % 100)) { - dev_err(hdev->dev, - "period %u must be between 100 - 1000 and must be divisible by 100\n", - args->period_ms); + rc = hl_device_utilization(hdev, &device_util.utilization); + if (rc) return -EINVAL; - } - - device_util.utilization = hl_device_utilization(hdev, args->period_ms); return copy_to_user(out, &device_util, min((size_t) max_size, sizeof(device_util))) ? -EFAULT : 0; @@ -446,6 +441,25 @@ static int pll_frequency_info(struct hl_fpriv *hpriv, struct hl_info_args *args) min((size_t) max_size, sizeof(freq_info))) ? -EFAULT : 0; } +static int power_info(struct hl_fpriv *hpriv, struct hl_info_args *args) +{ + struct hl_device *hdev = hpriv->hdev; + u32 max_size = args->return_size; + struct hl_power_info power_info = {0}; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + int rc; + + if ((!max_size) || (!out)) + return -EINVAL; + + rc = hl_fw_cpucp_power_get(hdev, &power_info.power); + if (rc) + return rc; + + return copy_to_user(out, &power_info, + min((size_t) max_size, sizeof(power_info))) ? -EFAULT : 0; +} + static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, struct device *dev) { @@ -526,6 +540,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, case HL_INFO_PLL_FREQUENCY: return pll_frequency_info(hpriv, args); + case HL_INFO_POWER: + return power_info(hpriv, args); + default: dev_err(dev, "Invalid request %d\n", args->op); rc = -ENOTTY; @@ -596,7 +613,7 @@ static const struct hl_ioctl_desc hl_ioctls[] = { HL_IOCTL_DEF(HL_IOCTL_INFO, hl_info_ioctl), HL_IOCTL_DEF(HL_IOCTL_CB, hl_cb_ioctl), HL_IOCTL_DEF(HL_IOCTL_CS, hl_cs_ioctl), - HL_IOCTL_DEF(HL_IOCTL_WAIT_CS, hl_cs_wait_ioctl), + HL_IOCTL_DEF(HL_IOCTL_WAIT_CS, hl_wait_ioctl), HL_IOCTL_DEF(HL_IOCTL_MEMORY, hl_mem_ioctl), HL_IOCTL_DEF(HL_IOCTL_DEBUG, hl_debug_ioctl) }; diff --git a/drivers/misc/habanalabs/common/hw_queue.c b/drivers/misc/habanalabs/common/hw_queue.c index 0f335182267f..173438461835 100644 --- a/drivers/misc/habanalabs/common/hw_queue.c +++ b/drivers/misc/habanalabs/common/hw_queue.c @@ -629,20 +629,12 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs) if ((hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT) && first_entry && cs_needs_timeout(cs)) { cs->tdr_active = true; - schedule_delayed_work(&cs->work_tdr, hdev->timeout_jiffies); + schedule_delayed_work(&cs->work_tdr, cs->timeout_jiffies); } spin_unlock(&hdev->cs_mirror_lock); - if (!hdev->cs_active_cnt++) { - struct hl_device_idle_busy_ts *ts; - - ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx]; - ts->busy_to_idle_ts = ktime_set(0, 0); - ts->idle_to_busy_ts = ktime_get(); - } - list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) switch (job->queue_type) { case QUEUE_TYPE_EXT: diff --git a/drivers/misc/habanalabs/common/irq.c b/drivers/misc/habanalabs/common/irq.c index 44a0522b59b9..27129868c711 100644 --- a/drivers/misc/habanalabs/common/irq.c +++ b/drivers/misc/habanalabs/common/irq.c @@ -137,6 +137,62 @@ irqreturn_t hl_irq_handler_cq(int irq, void *arg) return IRQ_HANDLED; } +static void handle_user_cq(struct hl_device *hdev, + struct hl_user_interrupt *user_cq) +{ + struct hl_user_pending_interrupt *pend; + + spin_lock(&user_cq->wait_list_lock); + list_for_each_entry(pend, &user_cq->wait_list_head, wait_list_node) + complete_all(&pend->fence.completion); + spin_unlock(&user_cq->wait_list_lock); +} + +/** + * hl_irq_handler_user_cq - irq handler for user completion queues + * + * @irq: irq number + * @arg: pointer to user interrupt structure + * + */ +irqreturn_t hl_irq_handler_user_cq(int irq, void *arg) +{ + struct hl_user_interrupt *user_cq = arg; + struct hl_device *hdev = user_cq->hdev; + + dev_dbg(hdev->dev, + "got user completion interrupt id %u", + user_cq->interrupt_id); + + /* Handle user cq interrupts registered on all interrupts */ + handle_user_cq(hdev, &hdev->common_user_interrupt); + + /* Handle user cq interrupts registered on this specific interrupt */ + handle_user_cq(hdev, user_cq); + + return IRQ_HANDLED; +} + +/** + * hl_irq_handler_default - default irq handler + * + * @irq: irq number + * @arg: pointer to user interrupt structure + * + */ +irqreturn_t hl_irq_handler_default(int irq, void *arg) +{ + struct hl_user_interrupt *user_interrupt = arg; + struct hl_device *hdev = user_interrupt->hdev; + u32 interrupt_id = user_interrupt->interrupt_id; + + dev_err(hdev->dev, + "got invalid user interrupt %u", + interrupt_id); + + return IRQ_HANDLED; +} + /** * hl_irq_handler_eq - irq handler for event queue * diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c index 1f5910517b0e..2938cbbafbbc 100644 --- a/drivers/misc/habanalabs/common/memory.c +++ b/drivers/misc/habanalabs/common/memory.c @@ -81,16 +81,6 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args, num_pgs, total_size); return -ENOMEM; } - - if (hdev->memory_scrub) { - rc = hdev->asic_funcs->scrub_device_mem(hdev, paddr, - total_size); - if (rc) { - dev_err(hdev->dev, - "Failed to scrub contiguous device memory\n"); - goto pages_pack_err; - } - } } phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL); @@ -128,24 +118,13 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args, goto page_err; } - if (hdev->memory_scrub) { - rc = hdev->asic_funcs->scrub_device_mem(hdev, - phys_pg_pack->pages[i], - page_size); - if (rc) { - dev_err(hdev->dev, - "Failed to scrub device memory\n"); - goto page_err; - } - } - num_curr_pgs++; } } spin_lock(&vm->idr_lock); handle = idr_alloc(&vm->phys_pg_pack_handles, phys_pg_pack, 1, 0, - GFP_ATOMIC); + GFP_KERNEL); spin_unlock(&vm->idr_lock); if (handle < 0) { @@ -280,37 +259,67 @@ static void dram_pg_pool_do_release(struct kref *ref) * @phys_pg_pack: physical page pack to free. * * This function does the following: - * - For DRAM memory only, iterate over the pack and free each physical block - * structure by returning it to the general pool. + * - For DRAM memory only + * - iterate over the pack, scrub and free each physical block structure by + * returning it to the general pool. + * In case of error during scrubbing, initiate hard reset. + * Once hard reset is triggered, scrubbing is bypassed while freeing the + * memory continues. * - Free the hl_vm_phys_pg_pack structure. */ -static void free_phys_pg_pack(struct hl_device *hdev, +static int free_phys_pg_pack(struct hl_device *hdev, struct hl_vm_phys_pg_pack *phys_pg_pack) { struct hl_vm *vm = &hdev->vm; u64 i; + int rc = 0; - if (!phys_pg_pack->created_from_userptr) { - if (phys_pg_pack->contiguous) { - gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[0], + if (phys_pg_pack->created_from_userptr) + goto end; + + if (phys_pg_pack->contiguous) { + if (hdev->memory_scrub && !hdev->disabled) { + rc = hdev->asic_funcs->scrub_device_mem(hdev, + phys_pg_pack->pages[0], phys_pg_pack->total_size); + if (rc) + dev_err(hdev->dev, + "Failed to scrub contiguous device memory\n"); + } - for (i = 0; i < phys_pg_pack->npages ; i++) - kref_put(&vm->dram_pg_pool_refcount, - dram_pg_pool_do_release); - } else { - for (i = 0 ; i < phys_pg_pack->npages ; i++) { - gen_pool_free(vm->dram_pg_pool, + gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[0], + phys_pg_pack->total_size); + + for (i = 0; i < phys_pg_pack->npages ; i++) + kref_put(&vm->dram_pg_pool_refcount, + dram_pg_pool_do_release); + } else { + for (i = 0 ; i < phys_pg_pack->npages ; i++) { + if (hdev->memory_scrub && !hdev->disabled && rc == 0) { + rc = hdev->asic_funcs->scrub_device_mem( + hdev, phys_pg_pack->pages[i], phys_pg_pack->page_size); - kref_put(&vm->dram_pg_pool_refcount, - dram_pg_pool_do_release); + if (rc) + dev_err(hdev->dev, + "Failed to scrub device memory\n"); } + gen_pool_free(vm->dram_pg_pool, + phys_pg_pack->pages[i], + phys_pg_pack->page_size); + kref_put(&vm->dram_pg_pool_refcount, + dram_pg_pool_do_release); } } + if (rc && !hdev->disabled) + hl_device_reset(hdev, HL_RESET_HARD); + +end: kvfree(phys_pg_pack->pages); kfree(phys_pg_pack); + + return rc; } /** @@ -349,7 +358,7 @@ static int free_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args) atomic64_sub(phys_pg_pack->total_size, &ctx->dram_phys_mem); atomic64_sub(phys_pg_pack->total_size, &hdev->dram_used_mem); - free_phys_pg_pack(hdev, phys_pg_pack); + return free_phys_pg_pack(hdev, phys_pg_pack); } else { spin_unlock(&vm->idr_lock); dev_err(hdev->dev, @@ -857,6 +866,7 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, u64 next_vaddr = vaddr, paddr, mapped_pg_cnt = 0, i; u32 page_size = phys_pg_pack->page_size; int rc = 0; + bool is_host_addr; for (i = 0 ; i < phys_pg_pack->npages ; i++) { paddr = phys_pg_pack->pages[i]; @@ -878,6 +888,8 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, return 0; err: + is_host_addr = !hl_is_dram_va(hdev, vaddr); + next_vaddr = vaddr; for (i = 0 ; i < mapped_pg_cnt ; i++) { if (hl_mmu_unmap_page(ctx, next_vaddr, page_size, @@ -888,6 +900,17 @@ err: phys_pg_pack->pages[i], page_size); next_vaddr += page_size; + + /* + * unmapping on Palladium can be really long, so avoid a CPU + * soft lockup bug by sleeping a little between unmapping pages + * + * In addition, on host num of pages could be huge, + * because page size could be 4KB, so when unmapping host + * pages sleep every 32K pages to avoid soft lockup + */ + if (hdev->pldm || (is_host_addr && (i & 0x7FFF) == 0)) + usleep_range(50, 200); } return rc; @@ -921,9 +944,9 @@ static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, * unmapping on Palladium can be really long, so avoid a CPU * soft lockup bug by sleeping a little between unmapping pages * - * In addition, when unmapping host memory we pass through - * the Linux kernel to unpin the pages and that takes a long - * time. Therefore, sleep every 32K pages to avoid soft lockup + * In addition, on host num of pages could be huge, + * because page size could be 4KB, so when unmapping host + * pages sleep every 32K pages to avoid soft lockup */ if (hdev->pldm || (is_host_addr && (i & 0x7FFF) == 0)) usleep_range(50, 200); @@ -1117,9 +1140,9 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, *device_addr = ret_vaddr; if (is_userptr) - free_phys_pg_pack(hdev, phys_pg_pack); + rc = free_phys_pg_pack(hdev, phys_pg_pack); - return 0; + return rc; map_err: if (add_va_block(hdev, va_range, ret_vaddr, @@ -1272,7 +1295,7 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, kfree(hnode); if (is_userptr) { - free_phys_pg_pack(hdev, phys_pg_pack); + rc = free_phys_pg_pack(hdev, phys_pg_pack); dma_unmap_host_va(hdev, userptr); } @@ -1305,9 +1328,15 @@ static int map_block(struct hl_device *hdev, u64 address, u64 *handle, static void hw_block_vm_close(struct vm_area_struct *vma) { - struct hl_ctx *ctx = (struct hl_ctx *) vma->vm_private_data; + struct hl_vm_hw_block_list_node *lnode = + (struct hl_vm_hw_block_list_node *) vma->vm_private_data; + struct hl_ctx *ctx = lnode->ctx; + mutex_lock(&ctx->hw_block_list_lock); + list_del(&lnode->node); + mutex_unlock(&ctx->hw_block_list_lock); hl_ctx_put(ctx); + kfree(lnode); vma->vm_private_data = NULL; } @@ -1325,7 +1354,9 @@ static const struct vm_operations_struct hw_block_vm_ops = { */ int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma) { + struct hl_vm_hw_block_list_node *lnode; struct hl_device *hdev = hpriv->hdev; + struct hl_ctx *ctx = hpriv->ctx; u32 block_id, block_size; int rc; @@ -1351,17 +1382,31 @@ int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma) return -EINVAL; } + lnode = kzalloc(sizeof(*lnode), GFP_KERNEL); + if (!lnode) + return -ENOMEM; + vma->vm_ops = &hw_block_vm_ops; - vma->vm_private_data = hpriv->ctx; + vma->vm_private_data = lnode; - hl_ctx_get(hdev, hpriv->ctx); + hl_ctx_get(hdev, ctx); rc = hdev->asic_funcs->hw_block_mmap(hdev, vma, block_id, block_size); if (rc) { - hl_ctx_put(hpriv->ctx); + hl_ctx_put(ctx); + kfree(lnode); return rc; } + lnode->ctx = ctx; + lnode->vaddr = vma->vm_start; + lnode->size = block_size; + lnode->id = block_id; + + mutex_lock(&ctx->hw_block_list_lock); + list_add_tail(&lnode->node, &ctx->hw_block_mem_list); + mutex_unlock(&ctx->hw_block_list_lock); + vma->vm_pgoff = block_id; return 0; @@ -1574,7 +1619,7 @@ static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size, rc = sg_alloc_table_from_pages(userptr->sgt, userptr->pages, - npages, offset, size, GFP_ATOMIC); + npages, offset, size, GFP_KERNEL); if (rc < 0) { dev_err(hdev->dev, "failed to create SG table from pages\n"); goto put_pages; @@ -1624,11 +1669,7 @@ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size, return -EINVAL; } - /* - * This function can be called also from data path, hence use atomic - * always as it is not a big allocation. - */ - userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_ATOMIC); + userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_KERNEL); if (!userptr->sgt) return -ENOMEM; @@ -2122,3 +2163,38 @@ void hl_vm_fini(struct hl_device *hdev) vm->init_done = false; } + +/** + * hl_hw_block_mem_init() - HW block memory initialization. + * @ctx: pointer to the habanalabs context structure. + * + * This function initializes the HW block virtual mapped addresses list and + * it's lock. + */ +void hl_hw_block_mem_init(struct hl_ctx *ctx) +{ + mutex_init(&ctx->hw_block_list_lock); + INIT_LIST_HEAD(&ctx->hw_block_mem_list); +} + +/** + * hl_hw_block_mem_fini() - HW block memory teardown. + * @ctx: pointer to the habanalabs context structure. + * + * This function clears the HW block virtual mapped addresses list and destroys + * it's lock. + */ +void hl_hw_block_mem_fini(struct hl_ctx *ctx) +{ + struct hl_vm_hw_block_list_node *lnode, *tmp; + + if (!list_empty(&ctx->hw_block_mem_list)) + dev_crit(ctx->hdev->dev, "HW block mem list isn't empty\n"); + + list_for_each_entry_safe(lnode, tmp, &ctx->hw_block_mem_list, node) { + list_del(&lnode->node); + kfree(lnode); + } + + mutex_destroy(&ctx->hw_block_list_lock); +} diff --git a/drivers/misc/habanalabs/common/mmu/mmu.c b/drivers/misc/habanalabs/common/mmu/mmu.c index 93c9e5f587e1..b37189956b14 100644 --- a/drivers/misc/habanalabs/common/mmu/mmu.c +++ b/drivers/misc/habanalabs/common/mmu/mmu.c @@ -532,6 +532,8 @@ int hl_mmu_va_to_pa(struct hl_ctx *ctx, u64 virt_addr, u64 *phys_addr) struct hl_mmu_hop_info hops; int rc; + memset(&hops, 0, sizeof(hops)); + rc = hl_mmu_get_tlb_info(ctx, virt_addr, &hops); if (rc) return rc; @@ -589,6 +591,7 @@ int hl_mmu_if_set_funcs(struct hl_device *hdev) switch (hdev->asic_type) { case ASIC_GOYA: case ASIC_GAUDI: + case ASIC_GAUDI_SEC: hl_mmu_v1_set_funcs(hdev, &hdev->mmu_func[MMU_DR_PGT]); break; default: diff --git a/drivers/misc/habanalabs/common/pci/pci.c b/drivers/misc/habanalabs/common/pci/pci.c index b799f9258fb0..e941b7eef346 100644 --- a/drivers/misc/habanalabs/common/pci/pci.c +++ b/drivers/misc/habanalabs/common/pci/pci.c @@ -85,6 +85,58 @@ static void hl_pci_bars_unmap(struct hl_device *hdev) pci_release_regions(pdev); } +int hl_pci_elbi_read(struct hl_device *hdev, u64 addr, u32 *data) +{ + struct pci_dev *pdev = hdev->pdev; + ktime_t timeout; + u64 msec; + u32 val; + + if (hdev->pldm) + msec = HL_PLDM_PCI_ELBI_TIMEOUT_MSEC; + else + msec = HL_PCI_ELBI_TIMEOUT_MSEC; + + /* Clear previous status */ + pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, 0); + + pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_ADDR, (u32) addr); + pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_CTRL, 0); + + timeout = ktime_add_ms(ktime_get(), msec); + for (;;) { + pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, &val); + if (val & PCI_CONFIG_ELBI_STS_MASK) + break; + if (ktime_compare(ktime_get(), timeout) > 0) { + pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, + &val); + break; + } + + usleep_range(300, 500); + } + + if ((val & PCI_CONFIG_ELBI_STS_MASK) == PCI_CONFIG_ELBI_STS_DONE) { + pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_DATA, data); + + return 0; + } + + if (val & PCI_CONFIG_ELBI_STS_ERR) { + dev_err(hdev->dev, "Error reading from ELBI\n"); + return -EIO; + } + + if (!(val & PCI_CONFIG_ELBI_STS_MASK)) { + dev_err(hdev->dev, "ELBI read didn't finish in time\n"); + return -EIO; + } + + dev_err(hdev->dev, "ELBI read has undefined bits in status\n"); + return -EIO; +} + /** * hl_pci_elbi_write() - Write through the ELBI interface. * @hdev: Pointer to hl_device structure. diff --git a/drivers/misc/habanalabs/common/sysfs.c b/drivers/misc/habanalabs/common/sysfs.c index 4366d8f93842..9fa61573a89d 100644 --- a/drivers/misc/habanalabs/common/sysfs.c +++ b/drivers/misc/habanalabs/common/sysfs.c @@ -9,12 +9,18 @@ #include <linux/pci.h> -long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) +long hl_get_frequency(struct hl_device *hdev, enum pll_index pll_index, + bool curr) { struct cpucp_packet pkt; + u32 used_pll_idx; u64 result; int rc; + rc = get_used_pll_index(hdev, pll_index, &used_pll_idx); + if (rc) + return rc; + memset(&pkt, 0, sizeof(pkt)); if (curr) @@ -23,7 +29,7 @@ long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) else pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_GET << CPUCP_PKT_CTL_OPCODE_SHIFT); - pkt.pll_index = cpu_to_le32(pll_index); + pkt.pll_index = cpu_to_le32((u32)used_pll_idx); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, &result); @@ -31,23 +37,29 @@ long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) if (rc) { dev_err(hdev->dev, "Failed to get frequency of PLL %d, error %d\n", - pll_index, rc); + used_pll_idx, rc); return rc; } return (long) result; } -void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq) +void hl_set_frequency(struct hl_device *hdev, enum pll_index pll_index, + u64 freq) { struct cpucp_packet pkt; + u32 used_pll_idx; int rc; + rc = get_used_pll_index(hdev, pll_index, &used_pll_idx); + if (rc) + return; + memset(&pkt, 0, sizeof(pkt)); pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_SET << CPUCP_PKT_CTL_OPCODE_SHIFT); - pkt.pll_index = cpu_to_le32(pll_index); + pkt.pll_index = cpu_to_le32((u32)used_pll_idx); pkt.value = cpu_to_le64(freq); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), @@ -56,7 +68,7 @@ void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq) if (rc) dev_err(hdev->dev, "Failed to set frequency to PLL %d, error %d\n", - pll_index, rc); + used_pll_idx, rc); } u64 hl_get_max_power(struct hl_device *hdev) @@ -203,7 +215,7 @@ static ssize_t soft_reset_store(struct device *dev, dev_warn(hdev->dev, "Soft-Reset requested through sysfs\n"); - hl_device_reset(hdev, false, false); + hl_device_reset(hdev, 0); out: return count; @@ -226,7 +238,7 @@ static ssize_t hard_reset_store(struct device *dev, dev_warn(hdev->dev, "Hard-Reset requested through sysfs\n"); - hl_device_reset(hdev, true, false); + hl_device_reset(hdev, HL_RESET_HARD); out: return count; @@ -245,6 +257,9 @@ static ssize_t device_type_show(struct device *dev, case ASIC_GAUDI: str = "GAUDI"; break; + case ASIC_GAUDI_SEC: + str = "GAUDI SEC"; + break; default: dev_err(hdev->dev, "Unrecognized ASIC type %d\n", hdev->asic_type); @@ -344,7 +359,7 @@ static ssize_t eeprom_read_handler(struct file *filp, struct kobject *kobj, struct bin_attribute *attr, char *buf, loff_t offset, size_t max_size) { - struct device *dev = container_of(kobj, struct device, kobj); + struct device *dev = kobj_to_dev(kobj); struct hl_device *hdev = dev_get_drvdata(dev); char *data; int rc; diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 9152242778f5..b751652f80a8 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -105,6 +105,36 @@ #define GAUDI_PLL_MAX 10 +/* + * this enum kept here for compatibility with old FW (in which each asic has + * unique PLL numbering + */ +enum gaudi_pll_index { + GAUDI_CPU_PLL = 0, + GAUDI_PCI_PLL, + GAUDI_SRAM_PLL, + GAUDI_HBM_PLL, + GAUDI_NIC_PLL, + GAUDI_DMA_PLL, + GAUDI_MESH_PLL, + GAUDI_MME_PLL, + GAUDI_TPC_PLL, + GAUDI_IF_PLL, +}; + +static enum pll_index gaudi_pll_map[PLL_MAX] = { + [CPU_PLL] = GAUDI_CPU_PLL, + [PCI_PLL] = GAUDI_PCI_PLL, + [SRAM_PLL] = GAUDI_SRAM_PLL, + [HBM_PLL] = GAUDI_HBM_PLL, + [NIC_PLL] = GAUDI_NIC_PLL, + [DMA_PLL] = GAUDI_DMA_PLL, + [MESH_PLL] = GAUDI_MESH_PLL, + [MME_PLL] = GAUDI_MME_PLL, + [TPC_PLL] = GAUDI_TPC_PLL, + [IF_PLL] = GAUDI_IF_PLL, +}; + static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = { "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3", "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3", @@ -396,6 +426,19 @@ get_collective_mode(struct hl_device *hdev, u32 queue_id) return HL_COLLECTIVE_NOT_SUPPORTED; } +static inline void set_default_power_values(struct hl_device *hdev) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + + if (hdev->card_type == cpucp_card_type_pmc) { + prop->max_power_default = MAX_POWER_DEFAULT_PMC; + prop->dc_power_default = DC_POWER_DEFAULT_PMC; + } else { + prop->max_power_default = MAX_POWER_DEFAULT_PCI; + prop->dc_power_default = DC_POWER_DEFAULT_PCI; + } +} + static int gaudi_get_fixed_properties(struct hl_device *hdev) { struct asic_fixed_properties *prop = &hdev->asic_prop; @@ -507,7 +550,7 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev) prop->num_of_events = GAUDI_EVENT_SIZE; prop->tpc_enabled_mask = TPC_ENABLED_MASK; - prop->max_power_default = MAX_POWER_DEFAULT_PCI; + set_default_power_values(hdev); prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT; prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE; @@ -532,8 +575,6 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev) for (i = 0 ; i < HL_MAX_DCORES ; i++) prop->first_available_cq[i] = USHRT_MAX; - /* disable fw security for now, set it in a later stage */ - prop->fw_security_disabled = true; prop->fw_security_status_valid = false; prop->hard_reset_done_by_fw = false; @@ -588,6 +629,11 @@ static int gaudi_init_iatu(struct hl_device *hdev) struct hl_outbound_pci_region outbound_region; int rc; + if (hdev->asic_prop.iatu_done_by_fw) { + hdev->asic_funcs->set_dma_mask_from_fw(hdev); + return 0; + } + /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */ inbound_region.mode = PCI_BAR_MATCH_MODE; inbound_region.bar = SRAM_BAR_ID; @@ -632,6 +678,7 @@ static int gaudi_early_init(struct hl_device *hdev) { struct asic_fixed_properties *prop = &hdev->asic_prop; struct pci_dev *pdev = hdev->pdev; + u32 fw_boot_status; int rc; rc = gaudi_get_fixed_properties(hdev); @@ -665,6 +712,23 @@ static int gaudi_early_init(struct hl_device *hdev) prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID); + /* If FW security is enabled at this point it means no access to ELBI */ + if (!hdev->asic_prop.fw_security_disabled) { + hdev->asic_prop.iatu_done_by_fw = true; + goto pci_init; + } + + rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0, + &fw_boot_status); + if (rc) + goto free_queue_props; + + /* Check whether FW is configuring iATU */ + if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) && + (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN)) + hdev->asic_prop.iatu_done_by_fw = true; + +pci_init: rc = hl_pci_init(hdev); if (rc) goto free_queue_props; @@ -1588,6 +1652,9 @@ static int gaudi_sw_init(struct hl_device *hdev) hdev->asic_specific = gaudi; + /* store legacy PLL map */ + hdev->legacy_pll_map = gaudi_pll_map; + /* Create DMA pool for small allocations */ hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0); @@ -1766,8 +1833,7 @@ static int gaudi_enable_msi(struct hl_device *hdev) if (gaudi->hw_cap_initialized & HW_CAP_MSI) return 0; - rc = pci_alloc_irq_vectors(hdev->pdev, 1, GAUDI_MSI_ENTRIES, - PCI_IRQ_MSI); + rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI); if (rc < 0) { dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc); return rc; @@ -3701,7 +3767,7 @@ static int gaudi_init_cpu(struct hl_device *hdev) struct gaudi_device *gaudi = hdev->asic_specific; int rc; - if (!hdev->cpu_enable) + if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU)) return 0; if (gaudi->hw_cap_initialized & HW_CAP_CPU) @@ -4873,7 +4939,7 @@ static int gaudi_pin_memory_before_cs(struct hl_device *hdev, parser->job_userptr_list, &userptr)) goto already_pinned; - userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC); + userptr = kzalloc(sizeof(*userptr), GFP_KERNEL); if (!userptr) return -ENOMEM; @@ -5684,18 +5750,26 @@ release_cb: static int gaudi_schedule_register_memset(struct hl_device *hdev, u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val) { - struct hl_ctx *ctx = hdev->compute_ctx; + struct hl_ctx *ctx; struct hl_pending_cb *pending_cb; struct packet_msg_long *pkt; u32 cb_size, ctl; struct hl_cb *cb; - int i; + int i, rc; + + mutex_lock(&hdev->fpriv_list_lock); + ctx = hdev->compute_ctx; /* If no compute context available or context is going down * memset registers directly */ - if (!ctx || kref_read(&ctx->refcount) == 0) - return gaudi_memset_registers(hdev, reg_base, num_regs, val); + if (!ctx || kref_read(&ctx->refcount) == 0) { + rc = gaudi_memset_registers(hdev, reg_base, num_regs, val); + mutex_unlock(&hdev->fpriv_list_lock); + return rc; + } + + mutex_unlock(&hdev->fpriv_list_lock); cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot) * 2; @@ -5911,13 +5985,16 @@ static void gaudi_restore_phase_topology(struct hl_device *hdev) } -static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val) +static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, + bool user_address, u32 *val) { struct asic_fixed_properties *prop = &hdev->asic_prop; struct gaudi_device *gaudi = hdev->asic_specific; - u64 hbm_bar_addr; + u64 hbm_bar_addr, host_phys_end; int rc = 0; + host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE; + if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) { if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && @@ -5949,6 +6026,9 @@ static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val) } if (hbm_bar_addr == U64_MAX) rc = -EIO; + } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end && + user_address && !iommu_present(&pci_bus_type)) { + *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE); } else { rc = -EFAULT; } @@ -5956,13 +6036,16 @@ static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val) return rc; } -static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val) +static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, + bool user_address, u32 val) { struct asic_fixed_properties *prop = &hdev->asic_prop; struct gaudi_device *gaudi = hdev->asic_specific; - u64 hbm_bar_addr; + u64 hbm_bar_addr, host_phys_end; int rc = 0; + host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE; + if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) { if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && @@ -5994,6 +6077,9 @@ static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val) } if (hbm_bar_addr == U64_MAX) rc = -EIO; + } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end && + user_address && !iommu_present(&pci_bus_type)) { + *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val; } else { rc = -EFAULT; } @@ -6001,13 +6087,16 @@ static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val) return rc; } -static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val) +static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, + bool user_address, u64 *val) { struct asic_fixed_properties *prop = &hdev->asic_prop; struct gaudi_device *gaudi = hdev->asic_specific; - u64 hbm_bar_addr; + u64 hbm_bar_addr, host_phys_end; int rc = 0; + host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE; + if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) { if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && @@ -6043,6 +6132,9 @@ static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val) } if (hbm_bar_addr == U64_MAX) rc = -EIO; + } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end && + user_address && !iommu_present(&pci_bus_type)) { + *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE); } else { rc = -EFAULT; } @@ -6050,13 +6142,16 @@ static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val) return rc; } -static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val) +static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, + bool user_address, u64 val) { struct asic_fixed_properties *prop = &hdev->asic_prop; struct gaudi_device *gaudi = hdev->asic_specific; - u64 hbm_bar_addr; + u64 hbm_bar_addr, host_phys_end; int rc = 0; + host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE; + if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) { if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && @@ -6091,6 +6186,9 @@ static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val) } if (hbm_bar_addr == U64_MAX) rc = -EIO; + } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end && + user_address && !iommu_present(&pci_bus_type)) { + *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val; } else { rc = -EFAULT; } @@ -6098,6 +6196,164 @@ static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val) return rc; } +static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr, + u32 size_to_dma, dma_addr_t dma_addr) +{ + u32 err_cause, val; + u64 dma_offset; + int rc; + + dma_offset = dma_id * DMA_CORE_OFFSET; + + WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr)); + WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr)); + WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr)); + WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr)); + WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma); + WREG32(mmDMA0_CORE_COMMIT + dma_offset, + (1 << DMA0_CORE_COMMIT_LIN_SHIFT)); + + rc = hl_poll_timeout( + hdev, + mmDMA0_CORE_STS0 + dma_offset, + val, + ((val & DMA0_CORE_STS0_BUSY_MASK) == 0), + 0, + 1000000); + + if (rc) { + dev_err(hdev->dev, + "DMA %d timed-out during reading of 0x%llx\n", + dma_id, addr); + return -EIO; + } + + /* Verify DMA is OK */ + err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset); + if (err_cause) { + dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause); + dev_dbg(hdev->dev, + "Clearing DMA0 engine from errors (cause 0x%x)\n", + err_cause); + WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause); + + return -EIO; + } + + return 0; +} + +static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, + void *blob_addr) +{ + u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma; + struct gaudi_device *gaudi = hdev->asic_specific; + u64 dma_offset, qm_offset; + dma_addr_t dma_addr; + void *kernel_addr; + bool is_eng_idle; + int rc = 0, dma_id; + + kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent( + hdev, SZ_2M, + &dma_addr, + GFP_KERNEL | __GFP_ZERO); + + if (!kernel_addr) + return -ENOMEM; + + mutex_lock(&gaudi->clk_gate_mutex); + + hdev->asic_funcs->disable_clock_gating(hdev); + + hdev->asic_funcs->hw_queues_lock(hdev); + + dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1]; + dma_offset = dma_id * DMA_CORE_OFFSET; + qm_offset = dma_id * DMA_QMAN_OFFSET; + dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset); + is_eng_idle = IS_DMA_IDLE(dma_core_sts0); + + if (!is_eng_idle) { + dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2]; + dma_offset = dma_id * DMA_CORE_OFFSET; + qm_offset = dma_id * DMA_QMAN_OFFSET; + dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset); + is_eng_idle = IS_DMA_IDLE(dma_core_sts0); + + if (!is_eng_idle) { + dev_err_ratelimited(hdev->dev, + "Can't read via DMA because it is BUSY\n"); + rc = -EAGAIN; + goto out; + } + } + + cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset); + WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, + 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); + + /* TODO: remove this by mapping the DMA temporary buffer to the MMU + * using the compute ctx ASID, if exists. If not, use the kernel ctx + * ASID + */ + WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT)); + + /* Verify DMA is OK */ + err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset); + if (err_cause) { + dev_dbg(hdev->dev, + "Clearing DMA0 engine from errors (cause 0x%x)\n", + err_cause); + WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause); + } + + pos = 0; + size_left = size; + size_to_dma = SZ_2M; + + while (size_left > 0) { + + if (size_left < SZ_2M) + size_to_dma = size_left; + + rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma, + dma_addr); + if (rc) + break; + + memcpy(blob_addr + pos, kernel_addr, size_to_dma); + + if (size_left <= SZ_2M) + break; + + pos += SZ_2M; + addr += SZ_2M; + size_left -= SZ_2M; + } + + /* TODO: remove this by mapping the DMA temporary buffer to the MMU + * using the compute ctx ASID, if exists. If not, use the kernel ctx + * ASID + */ + WREG32_AND(mmDMA0_CORE_PROT + dma_offset, + ~BIT(DMA0_CORE_PROT_VAL_SHIFT)); + + WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1); + +out: + hdev->asic_funcs->hw_queues_unlock(hdev); + + hdev->asic_funcs->set_clock_gating(hdev); + + mutex_unlock(&gaudi->clk_gate_mutex); + + hdev->asic_funcs->asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, + dma_addr); + + return rc; +} + static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr) { struct gaudi_device *gaudi = hdev->asic_specific; @@ -6851,7 +7107,8 @@ static void gaudi_handle_qman_err_generic(struct hl_device *hdev, } /* Write 1 clear errors */ - WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val); + if (!hdev->stop_on_err) + WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val); } arb_err_val = RREG32(arb_err_addr); @@ -7097,6 +7354,15 @@ static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type, } } +static void gaudi_print_out_of_sync_info(struct hl_device *hdev, + struct cpucp_pkt_sync_err *sync_err) +{ + struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ]; + + dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n", + sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci)); +} + static int gaudi_soft_reset_late_init(struct hl_device *hdev) { struct gaudi_device *gaudi = hdev->asic_specific; @@ -7371,18 +7637,14 @@ static void gaudi_handle_eqe(struct hl_device *hdev, case GAUDI_EVENT_MMU_DERR: gaudi_print_irq_info(hdev, event_type, true); gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); - if (hdev->hard_reset_on_fw_events) - hl_device_reset(hdev, true, false); - break; + goto reset_device; case GAUDI_EVENT_GIC500: case GAUDI_EVENT_AXI_ECC: case GAUDI_EVENT_L2_RAM_ECC: case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17: gaudi_print_irq_info(hdev, event_type, false); - if (hdev->hard_reset_on_fw_events) - hl_device_reset(hdev, true, false); - break; + goto reset_device; case GAUDI_EVENT_HBM0_SPI_0: case GAUDI_EVENT_HBM1_SPI_0: @@ -7392,9 +7654,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, gaudi_hbm_read_interrupts(hdev, gaudi_hbm_event_to_dev(event_type), &eq_entry->hbm_ecc_data); - if (hdev->hard_reset_on_fw_events) - hl_device_reset(hdev, true, false); - break; + goto reset_device; case GAUDI_EVENT_HBM0_SPI_1: case GAUDI_EVENT_HBM1_SPI_1: @@ -7423,8 +7683,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, dev_err(hdev->dev, "hard reset required due to %s\n", gaudi_irq_map_table[event_type].name); - if (hdev->hard_reset_on_fw_events) - hl_device_reset(hdev, true, false); + goto reset_device; } else { hl_fw_unmask_irq(hdev, event_type); } @@ -7446,8 +7705,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, dev_err(hdev->dev, "hard reset required due to %s\n", gaudi_irq_map_table[event_type].name); - if (hdev->hard_reset_on_fw_events) - hl_device_reset(hdev, true, false); + goto reset_device; } else { hl_fw_unmask_irq(hdev, event_type); } @@ -7516,9 +7774,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, case GAUDI_EVENT_RAZWI_OR_ADC_SW: gaudi_print_irq_info(hdev, event_type, true); - if (hdev->hard_reset_on_fw_events) - hl_device_reset(hdev, true, false); - break; + goto reset_device; case GAUDI_EVENT_TPC0_BMON_SPMU: case GAUDI_EVENT_TPC1_BMON_SPMU: @@ -7552,11 +7808,28 @@ static void gaudi_handle_eqe(struct hl_device *hdev, event_type, cause); break; + case GAUDI_EVENT_DEV_RESET_REQ: + gaudi_print_irq_info(hdev, event_type, false); + goto reset_device; + + case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC: + gaudi_print_irq_info(hdev, event_type, false); + gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err); + goto reset_device; + default: dev_err(hdev->dev, "Received invalid H/W interrupt %d\n", event_type); break; } + + return; + +reset_device: + if (hdev->hard_reset_on_fw_events) + hl_device_reset(hdev, HL_RESET_HARD); + else + hl_fw_unmask_irq(hdev, event_type); } static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, @@ -7607,7 +7880,7 @@ static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, if (rc) { dev_err_ratelimited(hdev->dev, "MMU cache invalidation timeout\n"); - hl_device_reset(hdev, true, false); + hl_device_reset(hdev, HL_RESET_HARD); } return rc; @@ -7656,7 +7929,7 @@ static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev, if (rc) { dev_err_ratelimited(hdev->dev, "MMU cache invalidation timeout\n"); - hl_device_reset(hdev, true, false); + hl_device_reset(hdev, HL_RESET_HARD); } return rc; @@ -7714,7 +7987,7 @@ static int gaudi_cpucp_info_get(struct hl_device *hdev) if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) return 0; - rc = hl_fw_cpucp_info_get(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0); + rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0); if (rc) return rc; @@ -7724,10 +7997,7 @@ static int gaudi_cpucp_info_get(struct hl_device *hdev) hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type); - if (hdev->card_type == cpucp_card_type_pci) - prop->max_power_default = MAX_POWER_DEFAULT_PCI; - else if (hdev->card_type == cpucp_card_type_pmc) - prop->max_power_default = MAX_POWER_DEFAULT_PMC; + set_default_power_values(hdev); hdev->max_power = prop->max_power_default; @@ -8549,6 +8819,7 @@ static const struct hl_asic_funcs gaudi_funcs = { .debugfs_write32 = gaudi_debugfs_write32, .debugfs_read64 = gaudi_debugfs_read64, .debugfs_write64 = gaudi_debugfs_write64, + .debugfs_read_dma = gaudi_debugfs_read_dma, .add_device_attr = gaudi_add_device_attr, .handle_eqe = gaudi_handle_eqe, .set_pll_profile = gaudi_set_pll_profile, diff --git a/drivers/misc/habanalabs/gaudi/gaudiP.h b/drivers/misc/habanalabs/gaudi/gaudiP.h index 50bb4ad570fd..5929be81ec23 100644 --- a/drivers/misc/habanalabs/gaudi/gaudiP.h +++ b/drivers/misc/habanalabs/gaudi/gaudiP.h @@ -47,6 +47,9 @@ #define MAX_POWER_DEFAULT_PCI 200000 /* 200W */ #define MAX_POWER_DEFAULT_PMC 350000 /* 350W */ +#define DC_POWER_DEFAULT_PCI 60000 /* 60W */ +#define DC_POWER_DEFAULT_PMC 60000 /* 60W */ + #define GAUDI_CPU_TIMEOUT_USEC 30000000 /* 30s */ #define TPC_ENABLED_MASK 0xFF diff --git a/drivers/misc/habanalabs/gaudi/gaudi_security.c b/drivers/misc/habanalabs/gaudi/gaudi_security.c index 7085f45814ae..9a706c5980ef 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi_security.c +++ b/drivers/misc/habanalabs/gaudi/gaudi_security.c @@ -9556,7 +9556,6 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev) mask = 1U << ((mmTPC0_CFG_PROT & 0x7F) >> 2); mask |= 1U << ((mmTPC0_CFG_VFLAGS & 0x7F) >> 2); mask |= 1U << ((mmTPC0_CFG_SFLAGS & 0x7F) >> 2); - mask |= 1U << ((mmTPC0_CFG_STATUS & 0x7F) >> 2); mask |= 1U << ((mmTPC0_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2); mask |= 1U << ((mmTPC0_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2); mask |= 1U << ((mmTPC0_CFG_TPC_STALL & 0x7F) >> 2); @@ -10011,7 +10010,6 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev) mask = 1U << ((mmTPC1_CFG_PROT & 0x7F) >> 2); mask |= 1U << ((mmTPC1_CFG_VFLAGS & 0x7F) >> 2); mask |= 1U << ((mmTPC1_CFG_SFLAGS & 0x7F) >> 2); - mask |= 1U << ((mmTPC1_CFG_STATUS & 0x7F) >> 2); mask |= 1U << ((mmTPC1_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2); mask |= 1U << ((mmTPC1_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2); mask |= 1U << ((mmTPC1_CFG_TPC_STALL & 0x7F) >> 2); @@ -10465,7 +10463,6 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev) mask = 1U << ((mmTPC2_CFG_PROT & 0x7F) >> 2); mask |= 1U << ((mmTPC2_CFG_VFLAGS & 0x7F) >> 2); mask |= 1U << ((mmTPC2_CFG_SFLAGS & 0x7F) >> 2); - mask |= 1U << ((mmTPC2_CFG_STATUS & 0x7F) >> 2); mask |= 1U << ((mmTPC2_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2); mask |= 1U << ((mmTPC2_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2); mask |= 1U << ((mmTPC2_CFG_TPC_STALL & 0x7F) >> 2); @@ -10919,7 +10916,6 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev) mask = 1U << ((mmTPC3_CFG_PROT & 0x7F) >> 2); mask |= 1U << ((mmTPC3_CFG_VFLAGS & 0x7F) >> 2); mask |= 1U << ((mmTPC3_CFG_SFLAGS & 0x7F) >> 2); - mask |= 1U << ((mmTPC3_CFG_STATUS & 0x7F) >> 2); mask |= 1U << ((mmTPC3_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2); mask |= 1U << ((mmTPC3_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2); mask |= 1U << ((mmTPC3_CFG_TPC_STALL & 0x7F) >> 2); @@ -11373,7 +11369,6 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev) mask = 1U << ((mmTPC4_CFG_PROT & 0x7F) >> 2); mask |= 1U << ((mmTPC4_CFG_VFLAGS & 0x7F) >> 2); mask |= 1U << ((mmTPC4_CFG_SFLAGS & 0x7F) >> 2); - mask |= 1U << ((mmTPC4_CFG_STATUS & 0x7F) >> 2); mask |= 1U << ((mmTPC4_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2); mask |= 1U << ((mmTPC4_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2); mask |= 1U << ((mmTPC4_CFG_TPC_STALL & 0x7F) >> 2); @@ -11827,7 +11822,6 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev) mask = 1U << ((mmTPC5_CFG_PROT & 0x7F) >> 2); mask |= 1U << ((mmTPC5_CFG_VFLAGS & 0x7F) >> 2); mask |= 1U << ((mmTPC5_CFG_SFLAGS & 0x7F) >> 2); - mask |= 1U << ((mmTPC5_CFG_STATUS & 0x7F) >> 2); mask |= 1U << ((mmTPC5_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2); mask |= 1U << ((mmTPC5_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2); mask |= 1U << ((mmTPC5_CFG_TPC_STALL & 0x7F) >> 2); @@ -12283,7 +12277,6 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev) mask = 1U << ((mmTPC6_CFG_PROT & 0x7F) >> 2); mask |= 1U << ((mmTPC6_CFG_VFLAGS & 0x7F) >> 2); mask |= 1U << ((mmTPC6_CFG_SFLAGS & 0x7F) >> 2); - mask |= 1U << ((mmTPC6_CFG_STATUS & 0x7F) >> 2); mask |= 1U << ((mmTPC6_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2); mask |= 1U << ((mmTPC6_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2); mask |= 1U << ((mmTPC6_CFG_TPC_STALL & 0x7F) >> 2); @@ -12739,7 +12732,6 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev) mask = 1U << ((mmTPC7_CFG_PROT & 0x7F) >> 2); mask |= 1U << ((mmTPC7_CFG_VFLAGS & 0x7F) >> 2); mask |= 1U << ((mmTPC7_CFG_SFLAGS & 0x7F) >> 2); - mask |= 1U << ((mmTPC7_CFG_STATUS & 0x7F) >> 2); mask |= 1U << ((mmTPC7_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2); mask |= 1U << ((mmTPC7_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2); mask |= 1U << ((mmTPC7_CFG_TPC_STALL & 0x7F) >> 2); diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index ed566c52ccaa..e27338f4aad2 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -118,6 +118,29 @@ #define IS_MME_IDLE(mme_arch_sts) \ (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK) +/* + * this enum kept here for compatibility with old FW (in which each asic has + * unique PLL numbering + */ +enum goya_pll_index { + GOYA_CPU_PLL = 0, + GOYA_IC_PLL, + GOYA_MC_PLL, + GOYA_MME_PLL, + GOYA_PCI_PLL, + GOYA_EMMC_PLL, + GOYA_TPC_PLL, +}; + +static enum pll_index goya_pll_map[PLL_MAX] = { + [CPU_PLL] = GOYA_CPU_PLL, + [IC_PLL] = GOYA_IC_PLL, + [MC_PLL] = GOYA_MC_PLL, + [MME_PLL] = GOYA_MME_PLL, + [PCI_PLL] = GOYA_PCI_PLL, + [EMMC_PLL] = GOYA_EMMC_PLL, + [TPC_PLL] = GOYA_TPC_PLL, +}; static const char goya_irq_name[GOYA_MSIX_ENTRIES][GOYA_MAX_STRING_LEN] = { "goya cq 0", "goya cq 1", "goya cq 2", "goya cq 3", @@ -446,6 +469,7 @@ int goya_get_fixed_properties(struct hl_device *hdev) prop->cb_pool_cb_cnt = GOYA_CB_POOL_CB_CNT; prop->cb_pool_cb_size = GOYA_CB_POOL_CB_SIZE; prop->max_power_default = MAX_POWER_DEFAULT; + prop->dc_power_default = DC_POWER_DEFAULT; prop->tpc_enabled_mask = TPC_ENABLED_MASK; prop->pcie_dbi_base_address = mmPCIE_DBI_BASE; prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI; @@ -460,8 +484,6 @@ int goya_get_fixed_properties(struct hl_device *hdev) for (i = 0 ; i < HL_MAX_DCORES ; i++) prop->first_available_cq[i] = USHRT_MAX; - /* disable fw security for now, set it in a later stage */ - prop->fw_security_disabled = true; prop->fw_security_status_valid = false; prop->hard_reset_done_by_fw = false; @@ -533,6 +555,11 @@ static int goya_init_iatu(struct hl_device *hdev) struct hl_outbound_pci_region outbound_region; int rc; + if (hdev->asic_prop.iatu_done_by_fw) { + hdev->asic_funcs->set_dma_mask_from_fw(hdev); + return 0; + } + /* Inbound Region 0 - Bar 0 - Point to SRAM and CFG */ inbound_region.mode = PCI_BAR_MATCH_MODE; inbound_region.bar = SRAM_CFG_BAR_ID; @@ -580,7 +607,7 @@ static int goya_early_init(struct hl_device *hdev) { struct asic_fixed_properties *prop = &hdev->asic_prop; struct pci_dev *pdev = hdev->pdev; - u32 val; + u32 fw_boot_status, val; int rc; rc = goya_get_fixed_properties(hdev); @@ -614,6 +641,23 @@ static int goya_early_init(struct hl_device *hdev) prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID); + /* If FW security is enabled at this point it means no access to ELBI */ + if (!hdev->asic_prop.fw_security_disabled) { + hdev->asic_prop.iatu_done_by_fw = true; + goto pci_init; + } + + rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0, + &fw_boot_status); + if (rc) + goto free_queue_props; + + /* Check whether FW is configuring iATU */ + if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) && + (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN)) + hdev->asic_prop.iatu_done_by_fw = true; + +pci_init: rc = hl_pci_init(hdev); if (rc) goto free_queue_props; @@ -853,6 +897,9 @@ static int goya_sw_init(struct hl_device *hdev) hdev->asic_specific = goya; + /* store legacy PLL map */ + hdev->legacy_pll_map = goya_pll_map; + /* Create DMA pool for small allocations */ hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), &hdev->pdev->dev, GOYA_DMA_POOL_BLK_SIZE, 8, 0); @@ -2429,7 +2476,7 @@ static int goya_init_cpu(struct hl_device *hdev) struct goya_device *goya = hdev->asic_specific; int rc; - if (!hdev->cpu_enable) + if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU)) return 0; if (goya->hw_cap_initialized & HW_CAP_CPU) @@ -3221,7 +3268,7 @@ static int goya_pin_memory_before_cs(struct hl_device *hdev, parser->job_userptr_list, &userptr)) goto already_pinned; - userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC); + userptr = kzalloc(sizeof(*userptr), GFP_KERNEL); if (!userptr) return -ENOMEM; @@ -4101,12 +4148,15 @@ static void goya_clear_sm_regs(struct hl_device *hdev) * lead to undefined behavior and therefore, should be done with extreme care * */ -static int goya_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val) +static int goya_debugfs_read32(struct hl_device *hdev, u64 addr, + bool user_address, u32 *val) { struct asic_fixed_properties *prop = &hdev->asic_prop; - u64 ddr_bar_addr; + u64 ddr_bar_addr, host_phys_end; int rc = 0; + host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE; + if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) { *val = RREG32(addr - CFG_BASE); @@ -4132,6 +4182,10 @@ static int goya_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val) if (ddr_bar_addr == U64_MAX) rc = -EIO; + } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end && + user_address && !iommu_present(&pci_bus_type)) { + *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE); + } else { rc = -EFAULT; } @@ -4154,12 +4208,15 @@ static int goya_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val) * lead to undefined behavior and therefore, should be done with extreme care * */ -static int goya_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val) +static int goya_debugfs_write32(struct hl_device *hdev, u64 addr, + bool user_address, u32 val) { struct asic_fixed_properties *prop = &hdev->asic_prop; - u64 ddr_bar_addr; + u64 ddr_bar_addr, host_phys_end; int rc = 0; + host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE; + if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) { WREG32(addr - CFG_BASE, val); @@ -4185,6 +4242,10 @@ static int goya_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val) if (ddr_bar_addr == U64_MAX) rc = -EIO; + } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end && + user_address && !iommu_present(&pci_bus_type)) { + *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val; + } else { rc = -EFAULT; } @@ -4192,12 +4253,15 @@ static int goya_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val) return rc; } -static int goya_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val) +static int goya_debugfs_read64(struct hl_device *hdev, u64 addr, + bool user_address, u64 *val) { struct asic_fixed_properties *prop = &hdev->asic_prop; - u64 ddr_bar_addr; + u64 ddr_bar_addr, host_phys_end; int rc = 0; + host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE; + if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) { u32 val_l = RREG32(addr - CFG_BASE); u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE); @@ -4227,6 +4291,10 @@ static int goya_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val) if (ddr_bar_addr == U64_MAX) rc = -EIO; + } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end && + user_address && !iommu_present(&pci_bus_type)) { + *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE); + } else { rc = -EFAULT; } @@ -4234,12 +4302,15 @@ static int goya_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val) return rc; } -static int goya_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val) +static int goya_debugfs_write64(struct hl_device *hdev, u64 addr, + bool user_address, u64 val) { struct asic_fixed_properties *prop = &hdev->asic_prop; - u64 ddr_bar_addr; + u64 ddr_bar_addr, host_phys_end; int rc = 0; + host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE; + if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) { WREG32(addr - CFG_BASE, lower_32_bits(val)); WREG32(addr + sizeof(u32) - CFG_BASE, upper_32_bits(val)); @@ -4267,6 +4338,10 @@ static int goya_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val) if (ddr_bar_addr == U64_MAX) rc = -EIO; + } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end && + user_address && !iommu_present(&pci_bus_type)) { + *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val; + } else { rc = -EFAULT; } @@ -4274,6 +4349,13 @@ static int goya_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val) return rc; } +static int goya_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, + void *blob_addr) +{ + dev_err(hdev->dev, "Reading via DMA is unimplemented yet\n"); + return -EPERM; +} + static u64 goya_read_pte(struct hl_device *hdev, u64 addr) { struct goya_device *goya = hdev->asic_specific; @@ -4401,6 +4483,8 @@ static const char *_goya_get_event_desc(u16 event_type) return "THERMAL_ENV_S"; case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E: return "THERMAL_ENV_E"; + case GOYA_ASYNC_EVENT_PKT_QUEUE_OUT_SYNC: + return "QUEUE_OUT_OF_SYNC"; default: return "N/A"; } @@ -4483,6 +4567,9 @@ static void goya_get_event_desc(u16 event_type, char *desc, size_t size) index = event_type - GOYA_ASYNC_EVENT_ID_DMA_BM_CH0; snprintf(desc, size, _goya_get_event_desc(event_type), index); break; + case GOYA_ASYNC_EVENT_PKT_QUEUE_OUT_SYNC: + snprintf(desc, size, _goya_get_event_desc(event_type)); + break; default: snprintf(desc, size, _goya_get_event_desc(event_type)); break; @@ -4534,6 +4621,15 @@ static void goya_print_mmu_error_info(struct hl_device *hdev) } } +static void goya_print_out_of_sync_info(struct hl_device *hdev, + struct cpucp_pkt_sync_err *sync_err) +{ + struct hl_hw_queue *q = &hdev->kernel_queues[GOYA_QUEUE_ID_CPU_PQ]; + + dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n", + sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci)); +} + static void goya_print_irq_info(struct hl_device *hdev, u16 event_type, bool razwi) { @@ -4698,7 +4794,7 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET: goya_print_irq_info(hdev, event_type, false); if (hdev->hard_reset_on_fw_events) - hl_device_reset(hdev, true, false); + hl_device_reset(hdev, HL_RESET_HARD); break; case GOYA_ASYNC_EVENT_ID_PCIE_DEC: @@ -4754,6 +4850,15 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) goya_unmask_irq(hdev, event_type); break; + case GOYA_ASYNC_EVENT_PKT_QUEUE_OUT_SYNC: + goya_print_irq_info(hdev, event_type, false); + goya_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err); + if (hdev->hard_reset_on_fw_events) + hl_device_reset(hdev, HL_RESET_HARD); + else + hl_fw_unmask_irq(hdev, event_type); + break; + default: dev_err(hdev->dev, "Received invalid H/W interrupt %d\n", event_type); @@ -5083,7 +5188,7 @@ static int goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, if (rc) { dev_err_ratelimited(hdev->dev, "MMU cache invalidation timeout\n"); - hl_device_reset(hdev, true, false); + hl_device_reset(hdev, HL_RESET_HARD); } return rc; @@ -5134,7 +5239,7 @@ static int goya_mmu_invalidate_cache_range(struct hl_device *hdev, if (rc) { dev_err_ratelimited(hdev->dev, "MMU cache invalidation timeout\n"); - hl_device_reset(hdev, true, false); + hl_device_reset(hdev, HL_RESET_HARD); } return rc; @@ -5160,7 +5265,7 @@ int goya_cpucp_info_get(struct hl_device *hdev) if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q)) return 0; - rc = hl_fw_cpucp_info_get(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0); + rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0); if (rc) return rc; @@ -5443,6 +5548,7 @@ static const struct hl_asic_funcs goya_funcs = { .debugfs_write32 = goya_debugfs_write32, .debugfs_read64 = goya_debugfs_read64, .debugfs_write64 = goya_debugfs_write64, + .debugfs_read_dma = goya_debugfs_read_dma, .add_device_attr = goya_add_device_attr, .handle_eqe = goya_handle_eqe, .set_pll_profile = goya_set_pll_profile, diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h index 23fe099ed218..ef8c6c8b5c8d 100644 --- a/drivers/misc/habanalabs/goya/goyaP.h +++ b/drivers/misc/habanalabs/goya/goyaP.h @@ -49,6 +49,8 @@ #define MAX_POWER_DEFAULT 200000 /* 200W */ +#define DC_POWER_DEFAULT 20000 /* 20W */ + #define DRAM_PHYS_DEFAULT_SIZE 0x100000000ull /* 4GB */ #define GOYA_DEFAULT_CARD_NAME "HL1000" diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h index b77c1c16c32c..27cd0ba99aa3 100644 --- a/drivers/misc/habanalabs/include/common/cpucp_if.h +++ b/drivers/misc/habanalabs/include/common/cpucp_if.h @@ -11,6 +11,8 @@ #include <linux/types.h> #include <linux/if_ether.h> +#include "hl_boot_if.h" + #define NUM_HBM_PSEUDO_CH 2 #define NUM_HBM_CH_PER_DEV 8 #define CPUCP_PKT_HBM_ECC_INFO_WR_PAR_SHIFT 0 @@ -28,6 +30,17 @@ #define CPUCP_PKT_HBM_ECC_INFO_HBM_CH_SHIFT 6 #define CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK 0x000007C0 +#define PLL_MAP_MAX_BITS 128 +#define PLL_MAP_LEN (PLL_MAP_MAX_BITS / 8) + +/* + * info of the pkt queue pointers in the first async occurrence + */ +struct cpucp_pkt_sync_err { + __le32 pi; + __le32 ci; +}; + struct hl_eq_hbm_ecc_data { /* SERR counter */ __le32 sec_cnt; @@ -77,6 +90,7 @@ struct hl_eq_entry { struct hl_eq_ecc_data ecc_data; struct hl_eq_hbm_ecc_data hbm_ecc_data; struct hl_eq_sm_sei_data sm_sei_data; + struct cpucp_pkt_sync_err pkt_sync_err; __le64 data[7]; }; }; @@ -287,6 +301,30 @@ enum pq_init_status { * The result is composed of 4 outputs, each is 16-bit * frequency in MHz. * + * CPUCP_PACKET_POWER_GET + * Fetch the present power consumption of the device (Current * Voltage). + * + * CPUCP_PACKET_NIC_PFC_SET - + * Enable/Disable the NIC PFC feature. The packet's arguments specify the + * NIC port, relevant lanes to configure and one bit indication for + * enable/disable. + * + * CPUCP_PACKET_NIC_FAULT_GET - + * Fetch the current indication for local/remote faults from the NIC MAC. + * The result is 32-bit value of the relevant register. + * + * CPUCP_PACKET_NIC_LPBK_SET - + * Enable/Disable the MAC loopback feature. The packet's arguments specify + * the NIC port, relevant lanes to configure and one bit indication for + * enable/disable. + * + * CPUCP_PACKET_NIC_MAC_INIT - + * Configure the NIC MAC channels. The packet's arguments specify the + * NIC port and the speed. + * + * CPUCP_PACKET_MSI_INFO_SET - + * set the index number for each supported msi type going from + * host to device */ enum cpucp_packet_id { @@ -320,6 +358,13 @@ enum cpucp_packet_id { CPUCP_PACKET_PCIE_REPLAY_CNT_GET, /* internal */ CPUCP_PACKET_TOTAL_ENERGY_GET, /* internal */ CPUCP_PACKET_PLL_INFO_GET, /* internal */ + CPUCP_PACKET_NIC_STATUS, /* internal */ + CPUCP_PACKET_POWER_GET, /* internal */ + CPUCP_PACKET_NIC_PFC_SET, /* internal */ + CPUCP_PACKET_NIC_FAULT_GET, /* internal */ + CPUCP_PACKET_NIC_LPBK_SET, /* internal */ + CPUCP_PACKET_NIC_MAC_CFG, /* internal */ + CPUCP_PACKET_MSI_INFO_SET, /* internal */ }; #define CPUCP_PACKET_FENCE_VAL 0xFE8CE7A5 @@ -391,6 +436,12 @@ struct cpucp_unmask_irq_arr_packet { __le32 irqs[0]; }; +struct cpucp_array_data_packet { + struct cpucp_packet cpucp_pkt; + __le32 length; + __le32 data[0]; +}; + enum cpucp_packet_rc { cpucp_packet_success, cpucp_packet_invalid, @@ -459,6 +510,51 @@ enum cpucp_pll_type_attributes { cpucp_pll_pci, }; +/* + * MSI type enumeration table for all ASICs and future SW versions. + * For future ASIC-LKD compatibility, we can only add new enumerations. + * at the end of the table (before CPUCP_NUM_OF_MSI_TYPES). + * Changing the order of entries or removing entries is not allowed. + */ +enum cpucp_msi_type { + CPUCP_EVENT_QUEUE_MSI_TYPE, + CPUCP_NIC_PORT1_MSI_TYPE, + CPUCP_NIC_PORT3_MSI_TYPE, + CPUCP_NIC_PORT5_MSI_TYPE, + CPUCP_NIC_PORT7_MSI_TYPE, + CPUCP_NIC_PORT9_MSI_TYPE, + CPUCP_NUM_OF_MSI_TYPES +}; + +/* + * PLL enumeration table used for all ASICs and future SW versions. + * For future ASIC-LKD compatibility, we can only add new enumerations. + * at the end of the table. + * Changing the order of entries or removing entries is not allowed. + */ +enum pll_index { + CPU_PLL = 0, + PCI_PLL = 1, + NIC_PLL = 2, + DMA_PLL = 3, + MESH_PLL = 4, + MME_PLL = 5, + TPC_PLL = 6, + IF_PLL = 7, + SRAM_PLL = 8, + NS_PLL = 9, + HBM_PLL = 10, + MSS_PLL = 11, + DDR_PLL = 12, + VID_PLL = 13, + BANK_PLL = 14, + MMU_PLL = 15, + IC_PLL = 16, + MC_PLL = 17, + EMMC_PLL = 18, + PLL_MAX +}; + /* Event Queue Packets */ struct eq_generic_event { @@ -470,7 +566,6 @@ struct eq_generic_event { */ #define CARD_NAME_MAX_LEN 16 -#define VERSION_MAX_LEN 128 #define CPUCP_MAX_SENSORS 128 #define CPUCP_MAX_NICS 128 #define CPUCP_LANES_PER_NIC 4 @@ -533,6 +628,7 @@ struct cpucp_security_info { * @dram_size: available DRAM size. * @card_name: card name that will be displayed in HWMON subsystem on the host * @sec_info: security information + * @pll_map: Bit map of supported PLLs for current ASIC version. */ struct cpucp_info { struct cpucp_sensor sensors[CPUCP_MAX_SENSORS]; @@ -554,6 +650,7 @@ struct cpucp_info { __u8 pad[7]; struct cpucp_security_info sec_info; __le32 reserved6; + __u8 pll_map[PLL_MAP_LEN]; }; struct cpucp_mac_addr { diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h index e87f5a98e193..e0a259e0495c 100644 --- a/drivers/misc/habanalabs/include/common/hl_boot_if.h +++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h @@ -13,6 +13,8 @@ #define BOOT_FIT_SRAM_OFFSET 0x200000 +#define VERSION_MAX_LEN 128 + /* * CPU error bits in BOOT_ERROR registers * @@ -73,6 +75,9 @@ * CPU_BOOT_ERR0_PLL_FAIL PLL settings failed, meaning that one * of the PLLs remains in REF_CLK * + * CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL Device is unusable and customer support + * should be contacted. + * * CPU_BOOT_ERR0_ENABLED Error registers enabled. * This is a main indication that the * running FW populates the error @@ -92,6 +97,7 @@ #define CPU_BOOT_ERR0_PRI_IMG_VER_FAIL (1 << 10) #define CPU_BOOT_ERR0_SEC_IMG_VER_FAIL (1 << 11) #define CPU_BOOT_ERR0_PLL_FAIL (1 << 12) +#define CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL (1 << 13) #define CPU_BOOT_ERR0_ENABLED (1 << 31) /* @@ -170,6 +176,20 @@ * is set to the PI counter. * Initialized in: linux * + * CPU_BOOT_DEV_STS0_FW_LD_COM_EN Flexible FW loading communication + * protocol is enabled. + * Initialized in: preboot + * + * CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN FW iATU configuration is enabled. + * This bit if set, means the iATU has been + * configured and is ready for use. + * Initialized in: ppboot + * + * CPU_BOOT_DEV_STS0_DYN_PLL_EN Dynamic PLL configuration is enabled. + * FW sends to host a bitmap of supported + * PLLs. + * Initialized in: linux + * * CPU_BOOT_DEV_STS0_ENABLED Device status register enabled. * This is a main indication that the * running FW populates the device status @@ -195,6 +215,9 @@ #define CPU_BOOT_DEV_STS0_CLK_GATE_EN (1 << 13) #define CPU_BOOT_DEV_STS0_HBM_ECC_EN (1 << 14) #define CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN (1 << 15) +#define CPU_BOOT_DEV_STS0_FW_LD_COM_EN (1 << 16) +#define CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN (1 << 17) +#define CPU_BOOT_DEV_STS0_DYN_PLL_EN (1 << 19) #define CPU_BOOT_DEV_STS0_ENABLED (1 << 31) enum cpu_boot_status { @@ -230,6 +253,7 @@ enum kmd_msg { KMD_MSG_SKIP_BMC, RESERVED, KMD_MSG_RST_DEV, + KMD_MSG_LAST }; enum cpu_msg_status { @@ -238,4 +262,199 @@ enum cpu_msg_status { CPU_MSG_ERR, }; +/* communication registers mapping - consider ABI when changing */ +struct cpu_dyn_regs { + uint32_t cpu_pq_base_addr_low; + uint32_t cpu_pq_base_addr_high; + uint32_t cpu_pq_length; + uint32_t cpu_pq_init_status; + uint32_t cpu_eq_base_addr_low; + uint32_t cpu_eq_base_addr_high; + uint32_t cpu_eq_length; + uint32_t cpu_eq_ci; + uint32_t cpu_cq_base_addr_low; + uint32_t cpu_cq_base_addr_high; + uint32_t cpu_cq_length; + uint32_t cpu_pf_pq_pi; + uint32_t cpu_boot_dev_sts0; + uint32_t cpu_boot_dev_sts1; + uint32_t cpu_boot_err0; + uint32_t cpu_boot_err1; + uint32_t cpu_boot_status; + uint32_t fw_upd_sts; + uint32_t fw_upd_cmd; + uint32_t fw_upd_pending_sts; + uint32_t fuse_ver_offset; + uint32_t preboot_ver_offset; + uint32_t uboot_ver_offset; + uint32_t hw_state; + uint32_t kmd_msg_to_cpu; + uint32_t cpu_cmd_status_to_host; + uint32_t reserved1[32]; /* reserve for future use */ +}; + +/* HCDM - Habana Communications Descriptor Magic */ +#define HL_COMMS_DESC_MAGIC 0x4843444D +#define HL_COMMS_DESC_VER 1 + +/* this is the comms descriptor header - meta data */ +struct comms_desc_header { + uint32_t magic; /* magic for validation */ + uint32_t crc32; /* CRC32 of the descriptor w/o header */ + uint16_t size; /* size of the descriptor w/o header */ + uint8_t version; /* descriptor version */ + uint8_t reserved[5]; /* pad to 64 bit */ +}; + +/* this is the main FW descriptor - consider ABI when changing */ +struct lkd_fw_comms_desc { + struct comms_desc_header header; + struct cpu_dyn_regs cpu_dyn_regs; + char fuse_ver[VERSION_MAX_LEN]; + char cur_fw_ver[VERSION_MAX_LEN]; + /* can be used for 1 more version w/o ABI change */ + char reserved0[VERSION_MAX_LEN]; + uint64_t img_addr; /* address for next FW component load */ +}; + +/* + * LKD commands: + * + * COMMS_NOOP Used to clear the command register and no actual + * command is send. + * + * COMMS_CLR_STS Clear status command - FW should clear the + * status register. Used for synchronization + * between the commands as part of the race free + * protocol. + * + * COMMS_RST_STATE Reset the current communication state which is + * kept by FW for proper responses. + * Should be used in the beginning of the + * communication cycle to clean any leftovers from + * previous communication attempts. + * + * COMMS_PREP_DESC Prepare descriptor for setting up the + * communication and other dynamic data: + * struct lkd_fw_comms_desc. + * This command has a parameter stating the next FW + * component size, so the FW can actually prepare a + * space for it and in the status response provide + * the descriptor offset. The Offset of the next FW + * data component is a part of the descriptor + * structure. + * + * COMMS_DATA_RDY The FW data has been uploaded and is ready for + * validation. + * + * COMMS_EXEC Execute the next FW component. + * + * COMMS_RST_DEV Reset the device. + * + * COMMS_GOTO_WFE Execute WFE command. Allowed only on non-secure + * devices. + * + * COMMS_SKIP_BMC Perform actions required for BMC-less servers. + * Do not wait for BMC response. + * + * COMMS_LOW_PLL_OPP Initialize PLLs for low OPP. + */ +enum comms_cmd { + COMMS_NOOP = 0, + COMMS_CLR_STS = 1, + COMMS_RST_STATE = 2, + COMMS_PREP_DESC = 3, + COMMS_DATA_RDY = 4, + COMMS_EXEC = 5, + COMMS_RST_DEV = 6, + COMMS_GOTO_WFE = 7, + COMMS_SKIP_BMC = 8, + COMMS_LOW_PLL_OPP = 9, + COMMS_INVLD_LAST +}; + +#define COMMS_COMMAND_SIZE_SHIFT 0 +#define COMMS_COMMAND_SIZE_MASK 0x1FFFFFF +#define COMMS_COMMAND_CMD_SHIFT 27 +#define COMMS_COMMAND_CMD_MASK 0xF8000000 + +/* + * LKD command to FW register structure + * @size - FW component size + * @cmd - command from enum comms_cmd + */ +struct comms_command { + union { /* bit fields are only for FW use */ + struct { + unsigned int size :25; /* 32MB max. */ + unsigned int reserved :2; + enum comms_cmd cmd :5; /* 32 commands */ + }; + unsigned int val; + }; +}; + +/* + * FW status + * + * COMMS_STS_NOOP Used to clear the status register and no actual + * status is provided. + * + * COMMS_STS_ACK Command has been received and recognized. + * + * COMMS_STS_OK Command execution has finished successfully. + * + * COMMS_STS_ERR Command execution was unsuccessful and resulted + * in error. + * + * COMMS_STS_VALID_ERR FW validation has failed. + * + * COMMS_STS_TIMEOUT_ERR Command execution has timed out. + */ +enum comms_sts { + COMMS_STS_NOOP = 0, + COMMS_STS_ACK = 1, + COMMS_STS_OK = 2, + COMMS_STS_ERR = 3, + COMMS_STS_VALID_ERR = 4, + COMMS_STS_TIMEOUT_ERR = 5, + COMMS_STS_INVLD_LAST +}; + +/* RAM types for FW components loading - defines the base address */ +enum comms_ram_types { + COMMS_SRAM = 0, + COMMS_DRAM = 1, +}; + +#define COMMS_STATUS_OFFSET_SHIFT 0 +#define COMMS_STATUS_OFFSET_MASK 0x03FFFFFF +#define COMMS_STATUS_OFFSET_ALIGN_SHIFT 2 +#define COMMS_STATUS_RAM_TYPE_SHIFT 26 +#define COMMS_STATUS_RAM_TYPE_MASK 0x0C000000 +#define COMMS_STATUS_STATUS_SHIFT 28 +#define COMMS_STATUS_STATUS_MASK 0xF0000000 + +/* + * FW status to LKD register structure + * @offset - an offset from the base of the ram_type shifted right by + * 2 bits (always aligned to 32 bits). + * Allows a maximum addressable offset of 256MB from RAM base. + * Example: for real offset in RAM of 0x800000 (8MB), the value + * in offset field is (0x800000 >> 2) = 0x200000. + * @ram_type - the RAM type that should be used for offset from + * enum comms_ram_types + * @status - status from enum comms_sts + */ +struct comms_status { + union { /* bit fields are only for FW use */ + struct { + unsigned int offset :26; + unsigned int ram_type :2; + enum comms_sts status :4; /* 16 statuses */ + }; + unsigned int val; + }; +}; + #endif /* HL_BOOT_IF_H */ diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi.h b/drivers/misc/habanalabs/include/gaudi/gaudi.h index f9ea897ae42c..ffae107b1693 100644 --- a/drivers/misc/habanalabs/include/gaudi/gaudi.h +++ b/drivers/misc/habanalabs/include/gaudi/gaudi.h @@ -38,7 +38,7 @@ #define QMAN_PQ_ENTRY_SIZE 16 /* Bytes */ -#define MAX_ASID 1024 +#define MAX_ASID 2 #define PROT_BITS_OFFS 0xF80 diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h b/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h index 49335e8334b4..e8651abf84f2 100644 --- a/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h +++ b/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h @@ -303,6 +303,8 @@ enum gaudi_async_event_id { GAUDI_EVENT_NIC3_QP1 = 619, GAUDI_EVENT_NIC4_QP0 = 620, GAUDI_EVENT_NIC4_QP1 = 621, + GAUDI_EVENT_DEV_RESET_REQ = 646, + GAUDI_EVENT_PKT_QUEUE_OUT_SYNC = 647, GAUDI_EVENT_FIX_POWER_ENV_S = 658, GAUDI_EVENT_FIX_POWER_ENV_E = 659, GAUDI_EVENT_FIX_THERMAL_ENV_S = 660, diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h b/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h index 737176ba06fb..3dc79c131805 100644 --- a/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h +++ b/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h @@ -301,10 +301,10 @@ static struct gaudi_async_events_ids_map gaudi_irq_map_table[] = { { .fc_id = 274, .cpu_id = 128, .valid = 0, .name = "" }, { .fc_id = 275, .cpu_id = 128, .valid = 0, .name = "" }, { .fc_id = 276, .cpu_id = 128, .valid = 0, .name = "" }, - { .fc_id = 277, .cpu_id = 129, .valid = 0, .name = "" }, - { .fc_id = 278, .cpu_id = 129, .valid = 0, .name = "" }, - { .fc_id = 279, .cpu_id = 129, .valid = 0, .name = "" }, - { .fc_id = 280, .cpu_id = 129, .valid = 0, .name = "" }, + { .fc_id = 277, .cpu_id = 129, .valid = 1, .name = "DMA_IF_SEI_0" }, + { .fc_id = 278, .cpu_id = 129, .valid = 1, .name = "DMA_IF_SEI_1" }, + { .fc_id = 279, .cpu_id = 129, .valid = 1, .name = "DMA_IF_SEI_2" }, + { .fc_id = 280, .cpu_id = 129, .valid = 1, .name = "DMA_IF_SEI_3" }, { .fc_id = 281, .cpu_id = 130, .valid = 0, .name = "" }, { .fc_id = 282, .cpu_id = 131, .valid = 0, .name = "" }, { .fc_id = 283, .cpu_id = 132, .valid = 0, .name = "" }, @@ -670,18 +670,29 @@ static struct gaudi_async_events_ids_map gaudi_irq_map_table[] = { { .fc_id = 643, .cpu_id = 492, .valid = 0, .name = "" }, { .fc_id = 644, .cpu_id = 493, .valid = 0, .name = "" }, { .fc_id = 645, .cpu_id = 494, .valid = 0, .name = "" }, - { .fc_id = 646, .cpu_id = 495, .valid = 0, .name = "" }, - { .fc_id = 647, .cpu_id = 496, .valid = 0, .name = "" }, - { .fc_id = 648, .cpu_id = 497, .valid = 0, .name = "" }, - { .fc_id = 649, .cpu_id = 498, .valid = 0, .name = "" }, - { .fc_id = 650, .cpu_id = 499, .valid = 0, .name = "" }, - { .fc_id = 651, .cpu_id = 500, .valid = 0, .name = "" }, - { .fc_id = 652, .cpu_id = 501, .valid = 0, .name = "" }, - { .fc_id = 653, .cpu_id = 502, .valid = 0, .name = "" }, - { .fc_id = 654, .cpu_id = 503, .valid = 0, .name = "" }, - { .fc_id = 655, .cpu_id = 504, .valid = 0, .name = "" }, - { .fc_id = 656, .cpu_id = 505, .valid = 0, .name = "" }, - { .fc_id = 657, .cpu_id = 506, .valid = 0, .name = "" }, + { .fc_id = 646, .cpu_id = 495, .valid = 1, .name = "DEV_RESET_REQ" }, + { .fc_id = 647, .cpu_id = 496, .valid = 1, + .name = "PKT_QUEUE_OUT_SYNC" }, + { .fc_id = 648, .cpu_id = 497, .valid = 1, + .name = "STATUS_NIC0_ENG0" }, + { .fc_id = 649, .cpu_id = 498, .valid = 1, + .name = "STATUS_NIC0_ENG1" }, + { .fc_id = 650, .cpu_id = 499, .valid = 1, + .name = "STATUS_NIC1_ENG0" }, + { .fc_id = 651, .cpu_id = 500, .valid = 1, + .name = "STATUS_NIC1_ENG1" }, + { .fc_id = 652, .cpu_id = 501, .valid = 1, + .name = "STATUS_NIC2_ENG0" }, + { .fc_id = 653, .cpu_id = 502, .valid = 1, + .name = "STATUS_NIC2_ENG1" }, + { .fc_id = 654, .cpu_id = 503, .valid = 1, + .name = "STATUS_NIC3_ENG0" }, + { .fc_id = 655, .cpu_id = 504, .valid = 1, + .name = "STATUS_NIC3_ENG1" }, + { .fc_id = 656, .cpu_id = 505, .valid = 1, + .name = "STATUS_NIC4_ENG0" }, + { .fc_id = 657, .cpu_id = 506, .valid = 1, + .name = "STATUS_NIC4_ENG1" }, { .fc_id = 658, .cpu_id = 507, .valid = 1, .name = "FIX_POWER_ENV_S" }, { .fc_id = 659, .cpu_id = 508, .valid = 1, .name = "FIX_POWER_ENV_E" }, { .fc_id = 660, .cpu_id = 509, .valid = 1, diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_fw_if.h b/drivers/misc/habanalabs/include/gaudi/gaudi_fw_if.h index 25acd9e87e20..a9f51f9f9e92 100644 --- a/drivers/misc/habanalabs/include/gaudi/gaudi_fw_if.h +++ b/drivers/misc/habanalabs/include/gaudi/gaudi_fw_if.h @@ -20,20 +20,6 @@ #define UBOOT_FW_OFFSET 0x100000 /* 1MB in SRAM */ #define LINUX_FW_OFFSET 0x800000 /* 8MB in HBM */ -enum gaudi_pll_index { - CPU_PLL = 0, - PCI_PLL, - SRAM_PLL, - HBM_PLL, - NIC_PLL, - DMA_PLL, - MESH_PLL, - MME_PLL, - TPC_PLL, - IF_PLL, - PLL_MAX -}; - enum gaudi_nic_axi_error { RXB, RXE, diff --git a/drivers/misc/habanalabs/include/goya/goya.h b/drivers/misc/habanalabs/include/goya/goya.h index 43d241891e45..1b4ca435021d 100644 --- a/drivers/misc/habanalabs/include/goya/goya.h +++ b/drivers/misc/habanalabs/include/goya/goya.h @@ -30,7 +30,7 @@ #define QMAN_PQ_ENTRY_SIZE 16 /* Bytes */ -#define MAX_ASID 1024 +#define MAX_ASID 2 #define PROT_BITS_OFFS 0xF80 diff --git a/drivers/misc/habanalabs/include/goya/goya_async_events.h b/drivers/misc/habanalabs/include/goya/goya_async_events.h index 5fb92362fc5f..09081401cb1d 100644 --- a/drivers/misc/habanalabs/include/goya/goya_async_events.h +++ b/drivers/misc/habanalabs/include/goya/goya_async_events.h @@ -188,6 +188,7 @@ enum goya_async_event_id { GOYA_ASYNC_EVENT_ID_HALT_MACHINE = 485, GOYA_ASYNC_EVENT_ID_INTS_REGISTER = 486, GOYA_ASYNC_EVENT_ID_SOFT_RESET = 487, + GOYA_ASYNC_EVENT_PKT_QUEUE_OUT_SYNC = 506, GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S = 507, GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E = 508, GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S = 509, diff --git a/drivers/misc/habanalabs/include/goya/goya_fw_if.h b/drivers/misc/habanalabs/include/goya/goya_fw_if.h index daf8d8cd14be..bc05f86c73ac 100644 --- a/drivers/misc/habanalabs/include/goya/goya_fw_if.h +++ b/drivers/misc/habanalabs/include/goya/goya_fw_if.h @@ -15,17 +15,6 @@ #define UBOOT_FW_OFFSET 0x100000 /* 1MB in SRAM */ #define LINUX_FW_OFFSET 0x800000 /* 8MB in DDR */ -enum goya_pll_index { - CPU_PLL = 0, - IC_PLL, - MC_PLL, - MME_PLL, - PCI_PLL, - EMMC_PLL, - TPC_PLL, - PLL_MAX -}; - #define GOYA_PLL_FREQ_LOW 50000000 /* 50 MHz */ #endif /* GOYA_FW_IF_H */ |