diff options
Diffstat (limited to 'drivers/misc/habanalabs')
28 files changed, 2031 insertions, 889 deletions
diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c index 8132a84698d5..3c0ae07a2d80 100644 --- a/drivers/misc/habanalabs/common/command_buffer.c +++ b/drivers/misc/habanalabs/common/command_buffer.c @@ -57,7 +57,7 @@ static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb) } va_block->start = virt_addr; - va_block->end = virt_addr + page_size; + va_block->end = virt_addr + page_size - 1; va_block->size = page_size; list_add_tail(&va_block->node, &cb->va_block_list); } @@ -80,13 +80,13 @@ static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb) offset += va_block->size; } - hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR); + rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV); mutex_unlock(&ctx->mmu_lock); cb->is_mmu_mapped = true; - return 0; + return rc; err_va_umap: list_for_each_entry(va_block, &cb->va_block_list, node) { @@ -97,7 +97,7 @@ err_va_umap: offset -= va_block->size; } - hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR); + rc = hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); mutex_unlock(&ctx->mmu_lock); @@ -126,7 +126,7 @@ static void cb_unmap_mem(struct hl_ctx *ctx, struct hl_cb *cb) "Failed to unmap CB's va 0x%llx\n", va_block->start); - hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR); + hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); mutex_unlock(&ctx->mmu_lock); @@ -250,8 +250,7 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, * Can't use generic function to check this because of special case * where we create a CB as part of the reset process */ - if ((hdev->disabled) || ((atomic_read(&hdev->in_reset)) && - (ctx_id != HL_KERNEL_ASID_ID))) { + if ((hdev->disabled) || (hdev->reset_info.in_reset && (ctx_id != HL_KERNEL_ASID_ID))) { dev_warn_ratelimited(hdev->dev, "Device is disabled or in reset. Can't create new CBs\n"); rc = -EBUSY; @@ -380,8 +379,9 @@ int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle) } static int hl_cb_info(struct hl_device *hdev, struct hl_cb_mgr *mgr, - u64 cb_handle, u32 *usage_cnt) + u64 cb_handle, u32 flags, u32 *usage_cnt, u64 *device_va) { + struct hl_vm_va_block *va_block; struct hl_cb *cb; u32 handle; int rc = 0; @@ -402,7 +402,18 @@ static int hl_cb_info(struct hl_device *hdev, struct hl_cb_mgr *mgr, goto out; } - *usage_cnt = atomic_read(&cb->cs_cnt); + if (flags & HL_CB_FLAGS_GET_DEVICE_VA) { + va_block = list_first_entry(&cb->va_block_list, struct hl_vm_va_block, node); + if (va_block) { + *device_va = va_block->start; + } else { + dev_err(hdev->dev, "CB is not mapped to the device's MMU\n"); + rc = -EINVAL; + goto out; + } + } else { + *usage_cnt = atomic_read(&cb->cs_cnt); + } out: spin_unlock(&mgr->cb_lock); @@ -414,7 +425,7 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data) union hl_cb_args *args = data; struct hl_device *hdev = hpriv->hdev; enum hl_device_status status; - u64 handle = 0; + u64 handle = 0, device_va; u32 usage_cnt = 0; int rc; @@ -450,13 +461,20 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data) case HL_CB_OP_INFO: rc = hl_cb_info(hdev, &hpriv->cb_mgr, args->in.cb_handle, - &usage_cnt); - memset(args, 0, sizeof(*args)); - args->out.usage_cnt = usage_cnt; + args->in.flags, + &usage_cnt, + &device_va); + + memset(&args->out, 0, sizeof(args->out)); + + if (args->in.flags & HL_CB_FLAGS_GET_DEVICE_VA) + args->out.device_va = device_va; + else + args->out.usage_cnt = usage_cnt; break; default: - rc = -ENOTTY; + rc = -EINVAL; break; } diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index 4c8000fd246c..0a4ef13d9ac4 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright 2016-2019 HabanaLabs, Ltd. + * Copyright 2016-2021 HabanaLabs, Ltd. * All Rights Reserved. */ @@ -533,8 +533,8 @@ static void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs) mcs_compl->stream_master_qid_map)) { /* extract the timestamp only of first completed CS */ if (!mcs_compl->timestamp) - mcs_compl->timestamp = - ktime_to_ns(fence->timestamp); + mcs_compl->timestamp = ktime_to_ns(fence->timestamp); + complete_all(&mcs_compl->completion); /* @@ -733,6 +733,14 @@ static void cs_timedout(struct work_struct *work) hdev = cs->ctx->hdev; + /* Save only the first CS timeout parameters */ + rc = atomic_cmpxchg(&hdev->last_error.cs_write_disable, 0, 1); + if (!rc) { + hdev->last_error.open_dev_timestamp = hdev->last_successful_open_ktime; + hdev->last_error.cs_timeout_timestamp = ktime_get(); + hdev->last_error.cs_timeout_seq = cs->sequence; + } + switch (cs->type) { case CS_TYPE_SIGNAL: dev_err(hdev->dev, @@ -767,9 +775,9 @@ static void cs_timedout(struct work_struct *work) if (likely(!skip_reset_on_timeout)) { if (hdev->reset_on_lockup) - hl_device_reset(hdev, HL_RESET_TDR); + hl_device_reset(hdev, HL_DRV_RESET_TDR); else - hdev->needs_reset = true; + hdev->reset_info.needs_reset = true; } } @@ -806,7 +814,7 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, cs->encaps_signals = !!(flags & HL_CS_FLAGS_ENCAP_SIGNALS); cs->timeout_jiffies = timeout; cs->skip_reset_on_timeout = - hdev->skip_reset_on_timeout || + hdev->reset_info.skip_reset_on_timeout || !!(flags & HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT); cs->submission_time_jiffies = jiffies; INIT_LIST_HEAD(&cs->job_list); @@ -1131,9 +1139,6 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args) enum hl_cs_type cs_type; if (!hl_device_operational(hdev, &status)) { - dev_warn_ratelimited(hdev->dev, - "Device is %s. Can't submit new CS\n", - hdev->status[status]); return -EBUSY; } @@ -1262,7 +1267,8 @@ static u32 get_stream_master_qid_mask(struct hl_device *hdev, u32 qid) static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, u32 num_chunks, u64 *cs_seq, u32 flags, - u32 encaps_signals_handle, u32 timeout) + u32 encaps_signals_handle, u32 timeout, + u16 *signal_initial_sob_count) { bool staged_mid, int_queues_only = true; struct hl_device *hdev = hpriv->hdev; @@ -1429,6 +1435,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, goto free_cs_object; } + *signal_initial_sob_count = cs->initial_sob_count; + rc = HL_CS_STATUS_SUCCESS; goto put_cs; @@ -1457,6 +1465,7 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args, int rc = 0, do_ctx_switch; void __user *chunks; u32 num_chunks, tmp; + u16 sob_count; int ret; do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0); @@ -1497,7 +1506,7 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args, rc = 0; } else { rc = cs_ioctl_default(hpriv, chunks, num_chunks, - cs_seq, 0, 0, hdev->timeout_jiffies); + cs_seq, 0, 0, hdev->timeout_jiffies, &sob_count); } mutex_unlock(&hpriv->restore_phase_mutex); @@ -1813,6 +1822,9 @@ static int cs_ioctl_reserve_signals(struct hl_fpriv *hpriv, } handle->count = count; + + hl_ctx_get(hdev, hpriv->ctx); + handle->ctx = hpriv->ctx; mgr = &hpriv->ctx->sig_mgr; spin_lock(&mgr->lock); @@ -1822,7 +1834,7 @@ static int cs_ioctl_reserve_signals(struct hl_fpriv *hpriv, if (hdl_id < 0) { dev_err(hdev->dev, "Failed to allocate IDR for a new signal reservation\n"); rc = -EINVAL; - goto out; + goto put_ctx; } handle->id = hdl_id; @@ -1875,7 +1887,10 @@ remove_idr: idr_remove(&mgr->handles, hdl_id); spin_unlock(&mgr->lock); +put_ctx: + hl_ctx_put(handle->ctx); kfree(handle); + out: return rc; } @@ -1935,6 +1950,7 @@ static int cs_ioctl_unreserve_signals(struct hl_fpriv *hpriv, u32 handle_id) /* Release the id and free allocated memory of the handle */ idr_remove(&mgr->handles, handle_id); + hl_ctx_put(encaps_sig_hdl->ctx); kfree(encaps_sig_hdl); } else { rc = -EINVAL; @@ -1948,7 +1964,8 @@ out: static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, void __user *chunks, u32 num_chunks, - u64 *cs_seq, u32 flags, u32 timeout) + u64 *cs_seq, u32 flags, u32 timeout, + u32 *signal_sob_addr_offset, u16 *signal_initial_sob_count) { struct hl_cs_encaps_sig_handle *encaps_sig_hdl = NULL; bool handle_found = false, is_wait_cs = false, @@ -2180,6 +2197,9 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, goto free_cs_object; } + *signal_sob_addr_offset = cs->sob_addr_offset; + *signal_initial_sob_count = cs->initial_sob_count; + rc = HL_CS_STATUS_SUCCESS; if (is_wait_cs) wait_cs_submitted = true; @@ -2210,6 +2230,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) void __user *chunks; u32 num_chunks, flags, timeout, signals_count = 0, sob_addr = 0, handle_id = 0; + u16 sob_initial_count = 0; int rc; rc = hl_cs_sanity_checks(hpriv, args); @@ -2240,7 +2261,8 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) case CS_TYPE_WAIT: case CS_TYPE_COLLECTIVE_WAIT: rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks, num_chunks, - &cs_seq, args->in.cs_flags, timeout); + &cs_seq, args->in.cs_flags, timeout, + &sob_addr, &sob_initial_count); break; case CS_RESERVE_SIGNALS: rc = cs_ioctl_reserve_signals(hpriv, @@ -2256,20 +2278,33 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq, args->in.cs_flags, args->in.encaps_sig_handle_id, - timeout); + timeout, &sob_initial_count); break; } out: if (rc != -EAGAIN) { memset(args, 0, sizeof(*args)); - if (cs_type == CS_RESERVE_SIGNALS) { + switch (cs_type) { + case CS_RESERVE_SIGNALS: args->out.handle_id = handle_id; args->out.sob_base_addr_offset = sob_addr; args->out.count = signals_count; - } else { + break; + case CS_TYPE_SIGNAL: + args->out.sob_base_addr_offset = sob_addr; + args->out.sob_count_before_submission = sob_initial_count; + args->out.seq = cs_seq; + break; + case CS_TYPE_DEFAULT: + args->out.sob_count_before_submission = sob_initial_count; args->out.seq = cs_seq; + break; + default: + args->out.seq = cs_seq; + break; } + args->out.status = rc; } @@ -2334,16 +2369,18 @@ static int hl_wait_for_fence(struct hl_ctx *ctx, u64 seq, struct hl_fence *fence * hl_cs_poll_fences - iterate CS fences to check for CS completion * * @mcs_data: multi-CS internal data + * @mcs_compl: multi-CS completion structure * * @return 0 on success, otherwise non 0 error code * * The function iterates on all CS sequence in the list and set bit in * completion_bitmap for each completed CS. - * while iterating, the function can extracts the stream map to be later - * used by the waiting function. - * this function shall be called after taking context ref + * While iterating, the function sets the stream map of each fence in the fence + * array in the completion QID stream map to be used by CSs to perform + * completion to the multi-CS context. + * This function shall be called after taking context ref */ -static int hl_cs_poll_fences(struct multi_cs_data *mcs_data) +static int hl_cs_poll_fences(struct multi_cs_data *mcs_data, struct multi_cs_completion *mcs_compl) { struct hl_fence **fence_ptr = mcs_data->fence_arr; struct hl_device *hdev = mcs_data->ctx->hdev; @@ -2360,6 +2397,15 @@ static int hl_cs_poll_fences(struct multi_cs_data *mcs_data) return rc; /* + * re-initialize the completion here to handle 2 possible cases: + * 1. CS will complete the multi-CS prior clearing the completion. in which + * case the fence iteration is guaranteed to catch the CS completion. + * 2. the completion will occur after re-init of the completion. + * in which case we will wake up immediately in wait_for_completion. + */ + reinit_completion(&mcs_compl->completion); + + /* * set to maximum time to verify timestamp is valid: if at the end * this value is maintained- no timestamp was updated */ @@ -2370,6 +2416,21 @@ static int hl_cs_poll_fences(struct multi_cs_data *mcs_data) struct hl_fence *fence = *fence_ptr; /* + * In order to prevent case where we wait until timeout even though a CS associated + * with the multi-CS actually completed we do things in the below order: + * 1. for each fence set it's QID map in the multi-CS completion QID map. This way + * any CS can, potentially, complete the multi CS for the specific QID (note + * that once completion is initialized, calling complete* and then wait on the + * completion will cause it to return at once) + * 2. only after allowing multi-CS completion for the specific QID we check whether + * the specific CS already completed (and thus the wait for completion part will + * be skipped). if the CS not completed it is guaranteed that completing CS will + * wake up the completion. + */ + if (fence) + mcs_compl->stream_master_qid_map |= fence->stream_master_qid_map; + + /* * function won't sleep as it is called with timeout 0 (i.e. * poll the fence) */ @@ -2384,9 +2445,7 @@ static int hl_cs_poll_fences(struct multi_cs_data *mcs_data) switch (status) { case CS_WAIT_STATUS_BUSY: - /* CS did not finished, keep waiting on its QID*/ - mcs_data->stream_master_qid_map |= - fence->stream_master_qid_map; + /* CS did not finished, QID to wait on already stored */ break; case CS_WAIT_STATUS_COMPLETED: /* @@ -2394,9 +2453,19 @@ static int hl_cs_poll_fences(struct multi_cs_data *mcs_data) * returns to user indicating CS completed before it finished * all of its mcs handling, to avoid race the next time the * user waits for mcs. + * note: when reaching this case fence is definitely not NULL + * but NULL check was added to overcome static analysis */ - if (!fence->mcs_handling_done) + if (fence && !fence->mcs_handling_done) { + /* + * in case multi CS is completed but MCS handling not done + * we "complete" the multi CS to prevent it from waiting + * until time-out and the "multi-CS handling done" will have + * another chance at the next iteration + */ + complete_all(&mcs_compl->completion); break; + } mcs_data->completion_bitmap |= BIT(i); /* @@ -2456,6 +2525,21 @@ static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, return rc; } +static inline unsigned long hl_usecs64_to_jiffies(const u64 usecs) +{ + if (usecs <= U32_MAX) + return usecs_to_jiffies(usecs); + + /* + * If the value in nanoseconds is larger than 64 bit, use the largest + * 64 bit value. + */ + if (usecs >= ((u64)(U64_MAX / NSEC_PER_USEC))) + return nsecs_to_jiffies(U64_MAX); + + return nsecs_to_jiffies(usecs * NSEC_PER_USEC); +} + /* * hl_wait_multi_cs_completion_init - init completion structure * @@ -2469,9 +2553,7 @@ static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, * the function gets the first available completion (by marking it "used") * and initialize its values. */ -static struct multi_cs_completion *hl_wait_multi_cs_completion_init( - struct hl_device *hdev, - u8 stream_master_bitmap) +static struct multi_cs_completion *hl_wait_multi_cs_completion_init(struct hl_device *hdev) { struct multi_cs_completion *mcs_compl; int i; @@ -2483,8 +2565,11 @@ static struct multi_cs_completion *hl_wait_multi_cs_completion_init( if (!mcs_compl->used) { mcs_compl->used = 1; mcs_compl->timestamp = 0; - mcs_compl->stream_master_qid_map = stream_master_bitmap; - reinit_completion(&mcs_compl->completion); + /* + * init QID map to 0 to avoid completion by CSs. the actual QID map + * to multi-CS CSs will be set incrementally at a later stage + */ + mcs_compl->stream_master_qid_map = 0; spin_unlock(&mcs_compl->lock); break; } @@ -2492,8 +2577,7 @@ static struct multi_cs_completion *hl_wait_multi_cs_completion_init( } if (i == MULTI_CS_MAX_USER_CTX) { - dev_err(hdev->dev, - "no available multi-CS completion structure\n"); + dev_err(hdev->dev, "no available multi-CS completion structure\n"); return ERR_PTR(-ENOMEM); } return mcs_compl; @@ -2524,27 +2608,18 @@ static void hl_wait_multi_cs_completion_fini( * * @return 0 on success, otherwise non 0 error code */ -static int hl_wait_multi_cs_completion(struct multi_cs_data *mcs_data) +static int hl_wait_multi_cs_completion(struct multi_cs_data *mcs_data, + struct multi_cs_completion *mcs_compl) { - struct hl_device *hdev = mcs_data->ctx->hdev; - struct multi_cs_completion *mcs_compl; long completion_rc; - mcs_compl = hl_wait_multi_cs_completion_init(hdev, - mcs_data->stream_master_qid_map); - if (IS_ERR(mcs_compl)) - return PTR_ERR(mcs_compl); - - completion_rc = wait_for_completion_interruptible_timeout( - &mcs_compl->completion, - usecs_to_jiffies(mcs_data->timeout_us)); + completion_rc = wait_for_completion_interruptible_timeout(&mcs_compl->completion, + mcs_data->timeout_jiffies); /* update timestamp */ if (completion_rc > 0) mcs_data->timestamp = mcs_compl->timestamp; - hl_wait_multi_cs_completion_fini(mcs_compl); - mcs_data->wait_status = completion_rc; return 0; @@ -2577,6 +2652,7 @@ void hl_multi_cs_completion_init(struct hl_device *hdev) */ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) { + struct multi_cs_completion *mcs_compl; struct hl_device *hdev = hpriv->hdev; struct multi_cs_data mcs_data = {0}; union hl_wait_cs_args *args = data; @@ -2631,9 +2707,17 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) hl_ctx_get(hdev, ctx); + /* wait (with timeout) for the first CS to be completed */ + mcs_data.timeout_jiffies = hl_usecs64_to_jiffies(args->in.timeout_us); + mcs_compl = hl_wait_multi_cs_completion_init(hdev); + if (IS_ERR(mcs_compl)) { + rc = PTR_ERR(mcs_compl); + goto put_ctx; + } + /* poll all CS fences, extract timestamp */ mcs_data.update_ts = true; - rc = hl_cs_poll_fences(&mcs_data); + rc = hl_cs_poll_fences(&mcs_data, mcs_compl); /* * skip wait for CS completion when one of the below is true: * - an error on the poll function @@ -2641,34 +2725,39 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) * - the user called ioctl with timeout 0 */ if (rc || mcs_data.completion_bitmap || !args->in.timeout_us) - goto put_ctx; + goto completion_fini; - /* wait (with timeout) for the first CS to be completed */ - mcs_data.timeout_us = args->in.timeout_us; - rc = hl_wait_multi_cs_completion(&mcs_data); - if (rc) - goto put_ctx; + while (true) { + rc = hl_wait_multi_cs_completion(&mcs_data, mcs_compl); + if (rc || (mcs_data.wait_status == 0)) + break; - if (mcs_data.wait_status > 0) { /* * poll fences once again to update the CS map. * no timestamp should be updated this time. */ mcs_data.update_ts = false; - rc = hl_cs_poll_fences(&mcs_data); + rc = hl_cs_poll_fences(&mcs_data, mcs_compl); + + if (mcs_data.completion_bitmap) + break; /* * if hl_wait_multi_cs_completion returned before timeout (i.e. - * it got a completion) we expect to see at least one CS - * completed after the poll function. + * it got a completion) it either got completed by CS in the multi CS list + * (in which case the indication will be non empty completion_bitmap) or it + * got completed by CS submitted to one of the shared stream master but + * not in the multi CS list (in which case we should wait again but modify + * the timeout and set timestamp as zero to let a CS related to the current + * multi-CS set a new, relevant, timestamp) */ - if (!mcs_data.completion_bitmap) { - dev_warn_ratelimited(hdev->dev, - "Multi-CS got completion on wait but no CS completed\n"); - rc = -EFAULT; - } + mcs_data.timeout_jiffies = mcs_data.wait_status; + mcs_compl->timestamp = 0; } +completion_fini: + hl_wait_multi_cs_completion_fini(mcs_compl); + put_ctx: hl_ctx_put(ctx); kfree(fence_arr); @@ -2699,7 +2788,7 @@ free_seq_arr: } /* update if some CS was gone */ - if (mcs_data.timestamp) + if (!mcs_data.timestamp) args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE; } else { args->out.status = HL_WAIT_CS_STATUS_BUSY; @@ -2766,37 +2855,129 @@ static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) } static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, - u32 timeout_us, u64 user_address, - u64 target_value, u16 interrupt_offset, - enum hl_cs_wait_status *status, + struct hl_cb_mgr *cb_mgr, u64 timeout_us, + u64 cq_counters_handle, u64 cq_counters_offset, + u64 target_value, struct hl_user_interrupt *interrupt, + u32 *status, u64 *timestamp) { struct hl_user_pending_interrupt *pend; - struct hl_user_interrupt *interrupt; unsigned long timeout, flags; - u64 completion_value; long completion_rc; + struct hl_cb *cb; int rc = 0; + u32 handle; - if (timeout_us == U32_MAX) - timeout = timeout_us; - else - timeout = usecs_to_jiffies(timeout_us); + timeout = hl_usecs64_to_jiffies(timeout_us); hl_ctx_get(hdev, ctx); - pend = kmalloc(sizeof(*pend), GFP_KERNEL); + cq_counters_handle >>= PAGE_SHIFT; + handle = (u32) cq_counters_handle; + + cb = hl_cb_get(hdev, cb_mgr, handle); + if (!cb) { + hl_ctx_put(ctx); + return -EINVAL; + } + + pend = kzalloc(sizeof(*pend), GFP_KERNEL); if (!pend) { + hl_cb_put(cb); hl_ctx_put(ctx); return -ENOMEM; } hl_fence_init(&pend->fence, ULONG_MAX); - if (interrupt_offset == HL_COMMON_USER_INTERRUPT_ID) - interrupt = &hdev->common_user_interrupt; - else - interrupt = &hdev->user_interrupt[interrupt_offset]; + pend->cq_kernel_addr = (u64 *) cb->kernel_address + cq_counters_offset; + pend->cq_target_value = target_value; + + /* We check for completion value as interrupt could have been received + * before we added the node to the wait list + */ + if (*pend->cq_kernel_addr >= target_value) { + *status = HL_WAIT_CS_STATUS_COMPLETED; + /* There was no interrupt, we assume the completion is now. */ + pend->fence.timestamp = ktime_get(); + } + + if (!timeout_us || (*status == HL_WAIT_CS_STATUS_COMPLETED)) + goto set_timestamp; + + /* Add pending user interrupt to relevant list for the interrupt + * handler to monitor + */ + spin_lock_irqsave(&interrupt->wait_list_lock, flags); + list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head); + spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); + + /* Wait for interrupt handler to signal completion */ + completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion, + timeout); + if (completion_rc > 0) { + *status = HL_WAIT_CS_STATUS_COMPLETED; + } else { + if (completion_rc == -ERESTARTSYS) { + dev_err_ratelimited(hdev->dev, + "user process got signal while waiting for interrupt ID %d\n", + interrupt->interrupt_id); + rc = -EINTR; + *status = HL_WAIT_CS_STATUS_ABORTED; + } else { + if (pend->fence.error == -EIO) { + dev_err_ratelimited(hdev->dev, + "interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n", + pend->fence.error); + rc = -EIO; + *status = HL_WAIT_CS_STATUS_ABORTED; + } else { + dev_err_ratelimited(hdev->dev, "Waiting for interrupt ID %d timedout\n", + interrupt->interrupt_id); + rc = -ETIMEDOUT; + } + *status = HL_WAIT_CS_STATUS_BUSY; + } + } + + spin_lock_irqsave(&interrupt->wait_list_lock, flags); + list_del(&pend->wait_list_node); + spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); + +set_timestamp: + *timestamp = ktime_to_ns(pend->fence.timestamp); + + kfree(pend); + hl_cb_put(cb); + hl_ctx_put(ctx); + + return rc; +} + +static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_ctx *ctx, + u64 timeout_us, u64 user_address, + u64 target_value, struct hl_user_interrupt *interrupt, + + u32 *status, + u64 *timestamp) +{ + struct hl_user_pending_interrupt *pend; + unsigned long timeout, flags; + u64 completion_value; + long completion_rc; + int rc = 0; + + timeout = hl_usecs64_to_jiffies(timeout_us); + + hl_ctx_get(hdev, ctx); + + pend = kzalloc(sizeof(*pend), GFP_KERNEL); + if (!pend) { + hl_ctx_put(ctx); + return -ENOMEM; + } + + hl_fence_init(&pend->fence, ULONG_MAX); /* Add pending user interrupt to relevant list for the interrupt * handler to monitor @@ -2815,13 +2996,14 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, } if (completion_value >= target_value) { - *status = CS_WAIT_STATUS_COMPLETED; + *status = HL_WAIT_CS_STATUS_COMPLETED; /* There was no interrupt, we assume the completion is now. */ pend->fence.timestamp = ktime_get(); - } else - *status = CS_WAIT_STATUS_BUSY; + } else { + *status = HL_WAIT_CS_STATUS_BUSY; + } - if (!timeout_us || (*status == CS_WAIT_STATUS_COMPLETED)) + if (!timeout_us || (*status == HL_WAIT_CS_STATUS_COMPLETED)) goto remove_pending_user_interrupt; wait_again: @@ -2850,7 +3032,13 @@ wait_again: } if (completion_value >= target_value) { - *status = CS_WAIT_STATUS_COMPLETED; + *status = HL_WAIT_CS_STATUS_COMPLETED; + } else if (pend->fence.error) { + dev_err_ratelimited(hdev->dev, + "interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n", + pend->fence.error); + /* set the command completion status as ABORTED */ + *status = HL_WAIT_CS_STATUS_ABORTED; } else { timeout = completion_rc; goto wait_again; @@ -2861,7 +3049,7 @@ wait_again: interrupt->interrupt_id); rc = -EINTR; } else { - *status = CS_WAIT_STATUS_BUSY; + *status = HL_WAIT_CS_STATUS_BUSY; } remove_pending_user_interrupt: @@ -2879,11 +3067,12 @@ remove_pending_user_interrupt: static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data) { - u16 interrupt_id, interrupt_offset, first_interrupt, last_interrupt; + u16 interrupt_id, first_interrupt, last_interrupt; struct hl_device *hdev = hpriv->hdev; struct asic_fixed_properties *prop; + struct hl_user_interrupt *interrupt; union hl_wait_cs_args *args = data; - enum hl_cs_wait_status status; + u32 status = HL_WAIT_CS_STATUS_BUSY; u64 timestamp; int rc; @@ -2894,8 +3083,7 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data) return -EPERM; } - interrupt_id = - FIELD_GET(HL_WAIT_CS_FLAGS_INTERRUPT_MASK, args->in.flags); + interrupt_id = FIELD_GET(HL_WAIT_CS_FLAGS_INTERRUPT_MASK, args->in.flags); first_interrupt = prop->first_available_user_msix_interrupt; last_interrupt = prop->first_available_user_msix_interrupt + @@ -2908,15 +3096,21 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data) } if (interrupt_id == HL_COMMON_USER_INTERRUPT_ID) - interrupt_offset = HL_COMMON_USER_INTERRUPT_ID; + interrupt = &hdev->common_user_interrupt; else - interrupt_offset = interrupt_id - first_interrupt; + interrupt = &hdev->user_interrupt[interrupt_id - first_interrupt]; - rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, + if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ) + rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->cb_mgr, + args->in.interrupt_timeout_us, args->in.cq_counters_handle, + args->in.cq_counters_offset, + args->in.target, interrupt, &status, + ×tamp); + else + rc = _hl_interrupt_wait_ioctl_user_addr(hdev, hpriv->ctx, args->in.interrupt_timeout_us, args->in.addr, - args->in.target, interrupt_offset, &status, + args->in.target, interrupt, &status, ×tamp); - if (rc) { if (rc != -EINTR) dev_err_ratelimited(hdev->dev, @@ -2926,22 +3120,13 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data) } memset(args, 0, sizeof(*args)); + args->out.status = status; if (timestamp) { args->out.timestamp_nsec = timestamp; args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD; } - switch (status) { - case CS_WAIT_STATUS_COMPLETED: - args->out.status = HL_WAIT_CS_STATUS_COMPLETED; - break; - case CS_WAIT_STATUS_BUSY: - default: - args->out.status = HL_WAIT_CS_STATUS_BUSY; - break; - } - return 0; } @@ -2955,7 +3140,7 @@ int hl_wait_ioctl(struct hl_fpriv *hpriv, void *data) * user interrupt */ if (!hl_device_operational(hpriv->hdev, NULL)) - return -EPERM; + return -EBUSY; if (flags & HL_WAIT_CS_FLAGS_INTERRUPT) rc = hl_interrupt_wait_ioctl(hpriv, data); diff --git a/drivers/misc/habanalabs/common/context.c b/drivers/misc/habanalabs/common/context.c index d0aaccd4df2c..c6360e33bce8 100644 --- a/drivers/misc/habanalabs/common/context.c +++ b/drivers/misc/habanalabs/common/context.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright 2016-2019 HabanaLabs, Ltd. + * Copyright 2016-2021 HabanaLabs, Ltd. * All Rights Reserved. */ @@ -13,13 +13,13 @@ void hl_encaps_handle_do_release(struct kref *ref) { struct hl_cs_encaps_sig_handle *handle = container_of(ref, struct hl_cs_encaps_sig_handle, refcount); - struct hl_ctx *ctx = handle->hdev->compute_ctx; - struct hl_encaps_signals_mgr *mgr = &ctx->sig_mgr; + struct hl_encaps_signals_mgr *mgr = &handle->ctx->sig_mgr; spin_lock(&mgr->lock); idr_remove(&mgr->handles, handle->id); spin_unlock(&mgr->lock); + hl_ctx_put(handle->ctx); kfree(handle); } @@ -27,8 +27,7 @@ static void hl_encaps_handle_do_release_sob(struct kref *ref) { struct hl_cs_encaps_sig_handle *handle = container_of(ref, struct hl_cs_encaps_sig_handle, refcount); - struct hl_ctx *ctx = handle->hdev->compute_ctx; - struct hl_encaps_signals_mgr *mgr = &ctx->sig_mgr; + struct hl_encaps_signals_mgr *mgr = &handle->ctx->sig_mgr; /* if we're here, then there was a signals reservation but cs with * encaps signals wasn't submitted, so need to put refcount @@ -40,6 +39,7 @@ static void hl_encaps_handle_do_release_sob(struct kref *ref) idr_remove(&mgr->handles, handle->id); spin_unlock(&mgr->lock); + hl_ctx_put(handle->ctx); kfree(handle); } @@ -97,11 +97,9 @@ static void hl_ctx_fini(struct hl_ctx *ctx) /* The engines are stopped as there is no executing CS, but the * Coresight might be still working by accessing addresses * related to the stopped engines. Hence stop it explicitly. - * Stop only if this is the compute context, as there can be - * only one compute context */ - if ((hdev->in_debug) && (hdev->compute_ctx == ctx)) - hl_device_set_debug_mode(hdev, false); + if (hdev->in_debug) + hl_device_set_debug_mode(hdev, ctx, false); hdev->asic_funcs->ctx_fini(ctx); hl_cb_va_pool_fini(ctx); @@ -167,7 +165,7 @@ int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv) hpriv->ctx = ctx; /* TODO: remove the following line for multiple process support */ - hdev->compute_ctx = ctx; + hdev->is_compute_ctx_active = true; return 0; @@ -274,6 +272,27 @@ int hl_ctx_put(struct hl_ctx *ctx) return kref_put(&ctx->refcount, hl_ctx_do_release); } +struct hl_ctx *hl_get_compute_ctx(struct hl_device *hdev) +{ + struct hl_ctx *ctx = NULL; + struct hl_fpriv *hpriv; + + mutex_lock(&hdev->fpriv_list_lock); + + list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node) { + /* There can only be a single user which has opened the compute device, so exit + * immediately once we find him + */ + ctx = hpriv->ctx; + hl_ctx_get(hdev, ctx); + break; + } + + mutex_unlock(&hdev->fpriv_list_lock); + + return ctx; +} + /* * hl_ctx_get_fence_locked - get CS fence under CS lock * diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c index 1f2a3dc6c4e2..fc084ee5106e 100644 --- a/drivers/misc/habanalabs/common/debugfs.c +++ b/drivers/misc/habanalabs/common/debugfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright 2016-2019 HabanaLabs, Ltd. + * Copyright 2016-2021 HabanaLabs, Ltd. * All Rights Reserved. */ @@ -15,19 +15,25 @@ #define MMU_ADDR_BUF_SIZE 40 #define MMU_ASID_BUF_SIZE 10 #define MMU_KBUF_SIZE (MMU_ADDR_BUF_SIZE + MMU_ASID_BUF_SIZE) +#define I2C_MAX_TRANSACTION_LEN 8 static struct dentry *hl_debug_root; static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, - u8 i2c_reg, long *val) + u8 i2c_reg, u8 i2c_len, u64 *val) { struct cpucp_packet pkt; - u64 result; int rc; if (!hl_device_operational(hdev, NULL)) return -EBUSY; + if (i2c_len > I2C_MAX_TRANSACTION_LEN) { + dev_err(hdev->dev, "I2C transaction length %u, exceeds maximum of %u\n", + i2c_len, I2C_MAX_TRANSACTION_LEN); + return -EINVAL; + } + memset(&pkt, 0, sizeof(pkt)); pkt.ctl = cpu_to_le32(CPUCP_PACKET_I2C_RD << @@ -35,12 +41,10 @@ static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, pkt.i2c_bus = i2c_bus; pkt.i2c_addr = i2c_addr; pkt.i2c_reg = i2c_reg; + pkt.i2c_len = i2c_len; rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - 0, &result); - - *val = (long) result; - + 0, val); if (rc) dev_err(hdev->dev, "Failed to read from I2C, error %d\n", rc); @@ -48,7 +52,7 @@ static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, } static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, - u8 i2c_reg, u32 val) + u8 i2c_reg, u8 i2c_len, u64 val) { struct cpucp_packet pkt; int rc; @@ -56,6 +60,12 @@ static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, if (!hl_device_operational(hdev, NULL)) return -EBUSY; + if (i2c_len > I2C_MAX_TRANSACTION_LEN) { + dev_err(hdev->dev, "I2C transaction length %u, exceeds maximum of %u\n", + i2c_len, I2C_MAX_TRANSACTION_LEN); + return -EINVAL; + } + memset(&pkt, 0, sizeof(pkt)); pkt.ctl = cpu_to_le32(CPUCP_PACKET_I2C_WR << @@ -63,6 +73,7 @@ static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, pkt.i2c_bus = i2c_bus; pkt.i2c_addr = i2c_addr; pkt.i2c_reg = i2c_reg; + pkt.i2c_len = i2c_len; pkt.value = cpu_to_le64(val); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), @@ -235,6 +246,8 @@ static int vm_show(struct seq_file *s, void *data) struct hl_vm_hash_node *hnode; struct hl_userptr *userptr; struct hl_vm_phys_pg_pack *phys_pg_pack = NULL; + struct hl_va_range *va_range; + struct hl_vm_va_block *va_block; enum vm_type *vm_type; bool once = true; u64 j; @@ -314,6 +327,25 @@ static int vm_show(struct seq_file *s, void *data) spin_unlock(&dev_entry->ctx_mem_hash_spinlock); + ctx = hl_get_compute_ctx(dev_entry->hdev); + if (ctx) { + seq_puts(s, "\nVA ranges:\n\n"); + for (i = HL_VA_RANGE_TYPE_HOST ; i < HL_VA_RANGE_TYPE_MAX ; ++i) { + va_range = ctx->va_range[i]; + seq_printf(s, " va_range %d\n", i); + seq_puts(s, "---------------------\n"); + mutex_lock(&va_range->lock); + list_for_each_entry(va_block, &va_range->list, node) { + seq_printf(s, "%#16llx - %#16llx (%#llx)\n", + va_block->start, va_block->end, + va_block->size); + } + mutex_unlock(&va_range->lock); + seq_puts(s, "\n"); + } + hl_ctx_put(ctx); + } + if (!once) seq_puts(s, "\n"); @@ -407,7 +439,7 @@ static int mmu_show(struct seq_file *s, void *data) if (dev_entry->mmu_asid == HL_KERNEL_ASID_ID) ctx = hdev->kernel_ctx; else - ctx = hdev->compute_ctx; + ctx = hl_get_compute_ctx(hdev); if (!ctx) { dev_err(hdev->dev, "no ctx available\n"); @@ -495,7 +527,7 @@ static int engines_show(struct seq_file *s, void *data) struct hl_dbg_device_entry *dev_entry = entry->dev_entry; struct hl_device *hdev = dev_entry->hdev; - if (atomic_read(&hdev->in_reset)) { + if (hdev->reset_info.in_reset) { dev_warn_ratelimited(hdev->dev, "Can't check device idle during reset\n"); return 0; @@ -560,7 +592,7 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, u32 size, u64 *phys_addr) { struct hl_vm_phys_pg_pack *phys_pg_pack; - struct hl_ctx *ctx = hdev->compute_ctx; + struct hl_ctx *ctx; struct hl_vm_hash_node *hnode; u64 end_address, range_size; struct hl_userptr *userptr; @@ -568,6 +600,8 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, u32 size, bool valid = false; int i, rc = 0; + ctx = hl_get_compute_ctx(hdev); + if (!ctx) { dev_err(hdev->dev, "no ctx available\n"); return -EINVAL; @@ -624,7 +658,7 @@ static ssize_t hl_data_read32(struct file *f, char __user *buf, ssize_t rc; u32 val; - if (atomic_read(&hdev->in_reset)) { + if (hdev->reset_info.in_reset) { dev_warn_ratelimited(hdev->dev, "Can't read during reset\n"); return 0; } @@ -660,7 +694,7 @@ static ssize_t hl_data_write32(struct file *f, const char __user *buf, u32 value; ssize_t rc; - if (atomic_read(&hdev->in_reset)) { + if (hdev->reset_info.in_reset) { dev_warn_ratelimited(hdev->dev, "Can't write during reset\n"); return 0; } @@ -697,7 +731,7 @@ static ssize_t hl_data_read64(struct file *f, char __user *buf, ssize_t rc; u64 val; - if (atomic_read(&hdev->in_reset)) { + if (hdev->reset_info.in_reset) { dev_warn_ratelimited(hdev->dev, "Can't read during reset\n"); return 0; } @@ -733,7 +767,7 @@ static ssize_t hl_data_write64(struct file *f, const char __user *buf, u64 value; ssize_t rc; - if (atomic_read(&hdev->in_reset)) { + if (hdev->reset_info.in_reset) { dev_warn_ratelimited(hdev->dev, "Can't write during reset\n"); return 0; } @@ -768,7 +802,7 @@ static ssize_t hl_dma_size_write(struct file *f, const char __user *buf, ssize_t rc; u32 size; - if (atomic_read(&hdev->in_reset)) { + if (hdev->reset_info.in_reset) { dev_warn_ratelimited(hdev->dev, "Can't DMA during reset\n"); return 0; } @@ -874,22 +908,22 @@ static ssize_t hl_i2c_data_read(struct file *f, char __user *buf, struct hl_dbg_device_entry *entry = file_inode(f)->i_private; struct hl_device *hdev = entry->hdev; char tmp_buf[32]; - long val; + u64 val; ssize_t rc; if (*ppos) return 0; rc = hl_debugfs_i2c_read(hdev, entry->i2c_bus, entry->i2c_addr, - entry->i2c_reg, &val); + entry->i2c_reg, entry->i2c_len, &val); if (rc) { dev_err(hdev->dev, - "Failed to read from I2C bus %d, addr %d, reg %d\n", - entry->i2c_bus, entry->i2c_addr, entry->i2c_reg); + "Failed to read from I2C bus %d, addr %d, reg %d, len %d\n", + entry->i2c_bus, entry->i2c_addr, entry->i2c_reg, entry->i2c_len); return rc; } - sprintf(tmp_buf, "0x%02lx\n", val); + sprintf(tmp_buf, "%#02llx\n", val); rc = simple_read_from_buffer(buf, count, ppos, tmp_buf, strlen(tmp_buf)); @@ -901,19 +935,19 @@ static ssize_t hl_i2c_data_write(struct file *f, const char __user *buf, { struct hl_dbg_device_entry *entry = file_inode(f)->i_private; struct hl_device *hdev = entry->hdev; - u32 value; + u64 value; ssize_t rc; - rc = kstrtouint_from_user(buf, count, 16, &value); + rc = kstrtou64_from_user(buf, count, 16, &value); if (rc) return rc; rc = hl_debugfs_i2c_write(hdev, entry->i2c_bus, entry->i2c_addr, - entry->i2c_reg, value); + entry->i2c_reg, entry->i2c_len, value); if (rc) { dev_err(hdev->dev, - "Failed to write 0x%02x to I2C bus %d, addr %d, reg %d\n", - value, entry->i2c_bus, entry->i2c_addr, entry->i2c_reg); + "Failed to write %#02llx to I2C bus %d, addr %d, reg %d, len %d\n", + value, entry->i2c_bus, entry->i2c_addr, entry->i2c_reg, entry->i2c_len); return rc; } @@ -1043,7 +1077,7 @@ static ssize_t hl_clk_gate_write(struct file *f, const char __user *buf, u64 value; ssize_t rc; - if (atomic_read(&hdev->in_reset)) { + if (hdev->reset_info.in_reset) { dev_warn_ratelimited(hdev->dev, "Can't change clock gating during reset\n"); return 0; @@ -1085,7 +1119,7 @@ static ssize_t hl_stop_on_err_write(struct file *f, const char __user *buf, u32 value; ssize_t rc; - if (atomic_read(&hdev->in_reset)) { + if (hdev->reset_info.in_reset) { dev_warn_ratelimited(hdev->dev, "Can't change stop on error during reset\n"); return 0; @@ -1396,6 +1430,11 @@ void hl_debugfs_add_device(struct hl_device *hdev) dev_entry->root, &dev_entry->i2c_reg); + debugfs_create_u8("i2c_len", + 0644, + dev_entry->root, + &dev_entry->i2c_len); + debugfs_create_file("i2c_data", 0644, dev_entry->root, @@ -1458,7 +1497,7 @@ void hl_debugfs_add_device(struct hl_device *hdev) debugfs_create_x8("skip_reset_on_timeout", 0644, dev_entry->root, - &hdev->skip_reset_on_timeout); + &hdev->reset_info.skip_reset_on_timeout); debugfs_create_file("state_dump", 0600, diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 2022e5d7b3ad..733338ab6f1d 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright 2016-2019 HabanaLabs, Ltd. + * Copyright 2016-2021 HabanaLabs, Ltd. * All Rights Reserved. */ @@ -17,9 +17,9 @@ enum hl_device_status hl_device_status(struct hl_device *hdev) { enum hl_device_status status; - if (atomic_read(&hdev->in_reset)) + if (hdev->reset_info.in_reset) status = HL_DEVICE_STATUS_IN_RESET; - else if (hdev->needs_reset) + else if (hdev->reset_info.needs_reset) status = HL_DEVICE_STATUS_NEEDS_RESET; else if (hdev->disabled) status = HL_DEVICE_STATUS_MALFUNCTION; @@ -95,14 +95,14 @@ static void hpriv_release(struct kref *ref) if ((hdev->reset_if_device_not_idle && !device_is_idle) || hdev->reset_upon_device_release) - hl_device_reset(hdev, HL_RESET_DEVICE_RELEASE); + hl_device_reset(hdev, HL_DRV_RESET_DEV_RELEASE); - /* Now we can mark the compute_ctx as empty. Even if a reset is running in a different + /* Now we can mark the compute_ctx as not active. Even if a reset is running in a different * thread, we don't care because the in_reset is marked so if a user will try to open - * the device it will fail on that, even if compute_ctx is NULL. + * the device it will fail on that, even if compute_ctx is false. */ mutex_lock(&hdev->fpriv_list_lock); - hdev->compute_ctx = NULL; + hdev->is_compute_ctx_active = false; mutex_unlock(&hdev->fpriv_list_lock); kfree(hpriv); @@ -169,9 +169,9 @@ static int hl_device_release_ctrl(struct inode *inode, struct file *filp) goto out; } - mutex_lock(&hdev->fpriv_list_lock); + mutex_lock(&hdev->fpriv_ctrl_list_lock); list_del(&hpriv->dev_node); - mutex_unlock(&hdev->fpriv_list_lock); + mutex_unlock(&hdev->fpriv_ctrl_list_lock); out: put_pid(hpriv->taskpid); @@ -324,16 +324,12 @@ put_devices: static void device_hard_reset_pending(struct work_struct *work) { struct hl_device_reset_work *device_reset_work = - container_of(work, struct hl_device_reset_work, - reset_work.work); + container_of(work, struct hl_device_reset_work, reset_work.work); struct hl_device *hdev = device_reset_work->hdev; u32 flags; int rc; - flags = HL_RESET_HARD | HL_RESET_FROM_RESET_THREAD; - - if (device_reset_work->fw_reset) - flags |= HL_RESET_FW; + flags = device_reset_work->flags | HL_DRV_RESET_FROM_RESET_THR; rc = hl_device_reset(hdev, flags); if ((rc == -EBUSY) && !hdev->device_fini_pending) { @@ -452,9 +448,12 @@ static int device_early_init(struct hl_device *hdev) mutex_init(&hdev->debug_lock); INIT_LIST_HEAD(&hdev->cs_mirror_list); spin_lock_init(&hdev->cs_mirror_lock); + spin_lock_init(&hdev->reset_info.lock); INIT_LIST_HEAD(&hdev->fpriv_list); + INIT_LIST_HEAD(&hdev->fpriv_ctrl_list); mutex_init(&hdev->fpriv_list_lock); - atomic_set(&hdev->in_reset, 0); + mutex_init(&hdev->fpriv_ctrl_list_lock); + mutex_init(&hdev->clk_throttling.lock); return 0; @@ -494,6 +493,9 @@ static void device_early_fini(struct hl_device *hdev) mutex_destroy(&hdev->send_cpu_message_lock); mutex_destroy(&hdev->fpriv_list_lock); + mutex_destroy(&hdev->fpriv_ctrl_list_lock); + + mutex_destroy(&hdev->clk_throttling.lock); hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr); @@ -513,22 +515,6 @@ static void device_early_fini(struct hl_device *hdev) hdev->asic_funcs->early_fini(hdev); } -static void set_freq_to_low_job(struct work_struct *work) -{ - struct hl_device *hdev = container_of(work, struct hl_device, - work_freq.work); - - mutex_lock(&hdev->fpriv_list_lock); - - if (!hdev->compute_ctx) - hl_device_set_frequency(hdev, PLL_LOW); - - mutex_unlock(&hdev->fpriv_list_lock); - - schedule_delayed_work(&hdev->work_freq, - usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC)); -} - static void hl_device_heartbeat(struct work_struct *work) { struct hl_device *hdev = container_of(work, struct hl_device, @@ -540,8 +526,10 @@ static void hl_device_heartbeat(struct work_struct *work) if (!hdev->asic_funcs->send_heartbeat(hdev)) goto reschedule; - dev_err(hdev->dev, "Device heartbeat failed!\n"); - hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_HEARTBEAT); + if (hl_device_operational(hdev, NULL)) + dev_err(hdev->dev, "Device heartbeat failed!\n"); + + hl_device_reset(hdev, HL_DRV_RESET_HARD | HL_DRV_RESET_HEARTBEAT); return; @@ -552,12 +540,12 @@ reschedule: * If control reached here, then at least one heartbeat work has been * scheduled since last reset/init cycle. * So if the device is not already in reset cycle, reset the flag - * prev_reset_trigger as no reset occurred with HL_RESET_FW_FATAL_ERR + * prev_reset_trigger as no reset occurred with HL_DRV_RESET_FW_FATAL_ERR * status for at least one heartbeat. From this point driver restarts * tracking future consecutive fatal errors. */ - if (!(atomic_read(&hdev->in_reset))) - hdev->prev_reset_trigger = HL_RESET_TRIGGER_DEFAULT; + if (!hdev->reset_info.in_reset) + hdev->reset_info.prev_reset_trigger = HL_RESET_TRIGGER_DEFAULT; schedule_delayed_work(&hdev->work_heartbeat, usecs_to_jiffies(HL_HEARTBEAT_PER_USEC)); @@ -586,18 +574,6 @@ static int device_late_init(struct hl_device *hdev) hdev->high_pll = hdev->asic_prop.high_pll; - /* force setting to low frequency */ - hdev->curr_pll_profile = PLL_LOW; - - if (hdev->pm_mng_profile == PM_AUTO) - hdev->asic_funcs->set_pll_profile(hdev, PLL_LOW); - else - hdev->asic_funcs->set_pll_profile(hdev, PLL_LAST); - - INIT_DELAYED_WORK(&hdev->work_freq, set_freq_to_low_job); - schedule_delayed_work(&hdev->work_freq, - usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC)); - if (hdev->heartbeat) { INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat); schedule_delayed_work(&hdev->work_heartbeat, @@ -620,7 +596,6 @@ static void device_late_fini(struct hl_device *hdev) if (!hdev->late_init_done) return; - cancel_delayed_work_sync(&hdev->work_freq); if (hdev->heartbeat) cancel_delayed_work_sync(&hdev->work_heartbeat); @@ -650,36 +625,7 @@ int hl_device_utilization(struct hl_device *hdev, u32 *utilization) return 0; } -/* - * hl_device_set_frequency - set the frequency of the device - * - * @hdev: pointer to habanalabs device structure - * @freq: the new frequency value - * - * Change the frequency if needed. This function has no protection against - * concurrency, therefore it is assumed that the calling function has protected - * itself against the case of calling this function from multiple threads with - * different values - * - * Returns 0 if no change was done, otherwise returns 1 - */ -int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq) -{ - if ((hdev->pm_mng_profile == PM_MANUAL) || - (hdev->curr_pll_profile == freq)) - return 0; - - dev_dbg(hdev->dev, "Changing device frequency to %s\n", - freq == PLL_HIGH ? "high" : "low"); - - hdev->asic_funcs->set_pll_profile(hdev, freq); - - hdev->curr_pll_profile = freq; - - return 1; -} - -int hl_device_set_debug_mode(struct hl_device *hdev, bool enable) +int hl_device_set_debug_mode(struct hl_device *hdev, struct hl_ctx *ctx, bool enable) { int rc = 0; @@ -693,12 +639,12 @@ int hl_device_set_debug_mode(struct hl_device *hdev, bool enable) goto out; } - if (!hdev->hard_reset_pending) - hdev->asic_funcs->halt_coresight(hdev); + if (!hdev->reset_info.hard_reset_pending) + hdev->asic_funcs->halt_coresight(hdev, ctx); hdev->in_debug = 0; - if (!hdev->hard_reset_pending) + if (!hdev->reset_info.hard_reset_pending) hdev->asic_funcs->set_clock_gating(hdev); goto out; @@ -735,6 +681,8 @@ static void take_release_locks(struct hl_device *hdev) /* Flush anyone that is inside device open */ mutex_lock(&hdev->fpriv_list_lock); mutex_unlock(&hdev->fpriv_list_lock); + mutex_lock(&hdev->fpriv_ctrl_list_lock); + mutex_unlock(&hdev->fpriv_ctrl_list_lock); } static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_reset) @@ -774,11 +722,14 @@ int hl_device_suspend(struct hl_device *hdev) pci_save_state(hdev->pdev); /* Block future CS/VM/JOB completion operations */ - rc = atomic_cmpxchg(&hdev->in_reset, 0, 1); - if (rc) { + spin_lock(&hdev->reset_info.lock); + if (hdev->reset_info.in_reset) { + spin_unlock(&hdev->reset_info.lock); dev_err(hdev->dev, "Can't suspend while in reset\n"); return -EIO; } + hdev->reset_info.in_reset = 1; + spin_unlock(&hdev->reset_info.lock); /* This blocks all other stuff that is not blocked by in_reset */ hdev->disabled = true; @@ -828,10 +779,12 @@ int hl_device_resume(struct hl_device *hdev) } - hdev->disabled = false; - atomic_set(&hdev->in_reset, 0); + /* 'in_reset' was set to true during suspend, now we must clear it in order + * for hard reset to be performed + */ + hdev->reset_info.in_reset = 0; - rc = hl_device_reset(hdev, HL_RESET_HARD); + rc = hl_device_reset(hdev, HL_DRV_RESET_HARD); if (rc) { dev_err(hdev->dev, "Failed to reset device during resume\n"); goto disable_device; @@ -846,17 +799,21 @@ disable_device: return rc; } -static int device_kill_open_processes(struct hl_device *hdev, u32 timeout) +static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool control_dev) { - struct hl_fpriv *hpriv; struct task_struct *task = NULL; + struct list_head *fd_list; + struct hl_fpriv *hpriv; + struct mutex *fd_lock; u32 pending_cnt; + fd_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock; + fd_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list; /* Giving time for user to close FD, and for processes that are inside * hl_device_open to finish */ - if (!list_empty(&hdev->fpriv_list)) + if (!list_empty(fd_list)) ssleep(1); if (timeout) { @@ -872,12 +829,12 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout) } } - mutex_lock(&hdev->fpriv_list_lock); + mutex_lock(fd_lock); /* This section must be protected because we are dereferencing * pointers that are freed if the process exits */ - list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node) { + list_for_each_entry(hpriv, fd_list, dev_node) { task = get_pid_task(hpriv->taskpid, PIDTYPE_PID); if (task) { dev_info(hdev->dev, "Killing user process pid=%d\n", @@ -889,12 +846,12 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout) } else { dev_warn(hdev->dev, "Can't get task struct for PID so giving up on killing process\n"); - mutex_unlock(&hdev->fpriv_list_lock); + mutex_unlock(fd_lock); return -ETIME; } } - mutex_unlock(&hdev->fpriv_list_lock); + mutex_unlock(fd_lock); /* * We killed the open users, but that doesn't mean they are closed. @@ -906,7 +863,7 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout) */ wait_for_processes: - while ((!list_empty(&hdev->fpriv_list)) && (pending_cnt)) { + while ((!list_empty(fd_list)) && (pending_cnt)) { dev_dbg(hdev->dev, "Waiting for all unmap operations to finish before hard reset\n"); @@ -916,7 +873,7 @@ wait_for_processes: } /* All processes exited successfully */ - if (list_empty(&hdev->fpriv_list)) + if (list_empty(fd_list)) return 0; /* Give up waiting for processes to exit */ @@ -928,14 +885,19 @@ wait_for_processes: return -EBUSY; } -static void device_disable_open_processes(struct hl_device *hdev) +static void device_disable_open_processes(struct hl_device *hdev, bool control_dev) { + struct list_head *fd_list; struct hl_fpriv *hpriv; + struct mutex *fd_lock; - mutex_lock(&hdev->fpriv_list_lock); - list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node) + fd_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock; + fd_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list; + + mutex_lock(fd_lock); + list_for_each_entry(hpriv, fd_list, dev_node) hpriv->hdev = NULL; - mutex_unlock(&hdev->fpriv_list_lock); + mutex_unlock(fd_lock); } static void handle_reset_trigger(struct hl_device *hdev, u32 flags) @@ -948,17 +910,17 @@ static void handle_reset_trigger(struct hl_device *hdev, u32 flags) * ('in_reset' makes sure of it). This makes sure that * 'reset_cause' will continue holding its 1st recorded reason! */ - if (flags & HL_RESET_HEARTBEAT) { - hdev->curr_reset_cause = HL_RESET_CAUSE_HEARTBEAT; - cur_reset_trigger = HL_RESET_HEARTBEAT; - } else if (flags & HL_RESET_TDR) { - hdev->curr_reset_cause = HL_RESET_CAUSE_TDR; - cur_reset_trigger = HL_RESET_TDR; - } else if (flags & HL_RESET_FW_FATAL_ERR) { - hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN; - cur_reset_trigger = HL_RESET_FW_FATAL_ERR; + if (flags & HL_DRV_RESET_HEARTBEAT) { + hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_HEARTBEAT; + cur_reset_trigger = HL_DRV_RESET_HEARTBEAT; + } else if (flags & HL_DRV_RESET_TDR) { + hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_TDR; + cur_reset_trigger = HL_DRV_RESET_TDR; + } else if (flags & HL_DRV_RESET_FW_FATAL_ERR) { + hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN; + cur_reset_trigger = HL_DRV_RESET_FW_FATAL_ERR; } else { - hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN; + hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN; } /* @@ -966,11 +928,11 @@ static void handle_reset_trigger(struct hl_device *hdev, u32 flags) * is set and if this reset is due to a fatal FW error * device is set to an unstable state. */ - if (hdev->prev_reset_trigger != cur_reset_trigger) { - hdev->prev_reset_trigger = cur_reset_trigger; - hdev->reset_trigger_repeated = 0; + if (hdev->reset_info.prev_reset_trigger != cur_reset_trigger) { + hdev->reset_info.prev_reset_trigger = cur_reset_trigger; + hdev->reset_info.reset_trigger_repeated = 0; } else { - hdev->reset_trigger_repeated = 1; + hdev->reset_info.reset_trigger_repeated = 1; } /* If reset is due to heartbeat, device CPU is no responsive in @@ -979,8 +941,8 @@ static void handle_reset_trigger(struct hl_device *hdev, u32 flags) * If F/W is performing the reset, no need to send it a message to disable * PCI access */ - if ((flags & HL_RESET_HARD) && - !(flags & (HL_RESET_HEARTBEAT | HL_RESET_FW))) { + if ((flags & HL_DRV_RESET_HARD) && + !(flags & (HL_DRV_RESET_HEARTBEAT | HL_DRV_RESET_BYPASS_REQ_TO_FW))) { /* Disable PCI access from device F/W so he won't send * us additional interrupts. We disable MSI/MSI-X at * the halt_engines function and we can't have the F/W @@ -1015,34 +977,39 @@ static void handle_reset_trigger(struct hl_device *hdev, u32 flags) */ int hl_device_reset(struct hl_device *hdev, u32 flags) { + bool hard_reset, from_hard_reset_thread, fw_reset, hard_instead_soft = false, + reset_upon_device_release = false, schedule_hard_reset = false; u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0}; - bool hard_reset, from_hard_reset_thread, fw_reset, hard_instead_soft = false; + struct hl_ctx *ctx; int i, rc; if (!hdev->init_done) { - dev_err(hdev->dev, - "Can't reset before initialization is done\n"); + dev_err(hdev->dev, "Can't reset before initialization is done\n"); return 0; } - hard_reset = !!(flags & HL_RESET_HARD); - from_hard_reset_thread = !!(flags & HL_RESET_FROM_RESET_THREAD); - fw_reset = !!(flags & HL_RESET_FW); + hard_reset = !!(flags & HL_DRV_RESET_HARD); + from_hard_reset_thread = !!(flags & HL_DRV_RESET_FROM_RESET_THR); + fw_reset = !!(flags & HL_DRV_RESET_BYPASS_REQ_TO_FW); - if (!hard_reset && !hdev->supports_soft_reset) { + if (!hard_reset && !hdev->asic_prop.supports_soft_reset) { hard_instead_soft = true; hard_reset = true; } - if (hdev->reset_upon_device_release && - (flags & HL_RESET_DEVICE_RELEASE)) { - dev_dbg(hdev->dev, - "Perform %s-reset upon device release\n", - hard_reset ? "hard" : "soft"); + if (hdev->reset_upon_device_release && (flags & HL_DRV_RESET_DEV_RELEASE)) { + if (hard_reset) { + dev_crit(hdev->dev, + "Aborting reset because hard-reset is mutually exclusive with reset-on-device-release\n"); + return -EINVAL; + } + + reset_upon_device_release = true; + goto do_reset; } - if (!hard_reset && !hdev->allow_inference_soft_reset) { + if (!hard_reset && !hdev->asic_prop.allow_inference_soft_reset) { hard_instead_soft = true; hard_reset = true; } @@ -1062,12 +1029,22 @@ do_reset: */ if (!from_hard_reset_thread) { /* Block future CS/VM/JOB completion operations */ - rc = atomic_cmpxchg(&hdev->in_reset, 0, 1); - if (rc) + spin_lock(&hdev->reset_info.lock); + if (hdev->reset_info.in_reset) { + /* We only allow scheduling of a hard reset during soft reset */ + if (hard_reset && hdev->reset_info.is_in_soft_reset) + hdev->reset_info.hard_reset_schedule_flags = flags; + spin_unlock(&hdev->reset_info.lock); return 0; + } + hdev->reset_info.in_reset = 1; + spin_unlock(&hdev->reset_info.lock); handle_reset_trigger(hdev, flags); + /* This still allows the completion of some KDMA ops */ + hdev->reset_info.is_in_soft_reset = !hard_reset; + /* This also blocks future CS/VM/JOB completion operations */ hdev->disabled = true; @@ -1075,21 +1052,19 @@ do_reset: if (hard_reset) dev_info(hdev->dev, "Going to reset device\n"); - else if (flags & HL_RESET_DEVICE_RELEASE) - dev_info(hdev->dev, - "Going to reset device after it was released by user\n"); + else if (reset_upon_device_release) + dev_info(hdev->dev, "Going to reset device after release by user\n"); else - dev_info(hdev->dev, - "Going to reset compute engines of inference device\n"); + dev_info(hdev->dev, "Going to reset engines of inference device\n"); } again: if ((hard_reset) && (!from_hard_reset_thread)) { - hdev->hard_reset_pending = true; + hdev->reset_info.hard_reset_pending = true; hdev->process_kill_trial_cnt = 0; - hdev->device_reset_work.fw_reset = fw_reset; + hdev->device_reset_work.flags = flags; /* * Because the reset function can't run from heartbeat work, @@ -1109,7 +1084,7 @@ kill_processes: * process can't really exit until all its CSs are done, which * is what we do in cs rollback */ - rc = device_kill_open_processes(hdev, 0); + rc = device_kill_open_processes(hdev, 0, false); if (rc == -EBUSY) { if (hdev->device_fini_pending) { @@ -1138,7 +1113,7 @@ kill_processes: hdev->asic_funcs->hw_fini(hdev, hard_reset, fw_reset); if (hard_reset) { - hdev->fw_loader.linux_loaded = false; + hdev->fw_loader.fw_comp_loaded = FW_TYPE_NONE; /* Release kernel context */ if (hdev->kernel_ctx && hl_ctx_put(hdev->kernel_ctx) == 1) @@ -1154,24 +1129,23 @@ kill_processes: for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) hl_cq_reset(hdev, &hdev->completion_queue[i]); - mutex_lock(&hdev->fpriv_list_lock); - /* Make sure the context switch phase will run again */ - if (hdev->compute_ctx) { - atomic_set(&hdev->compute_ctx->thread_ctx_switch_token, 1); - hdev->compute_ctx->thread_ctx_switch_wait_token = 0; + ctx = hl_get_compute_ctx(hdev); + if (ctx) { + atomic_set(&ctx->thread_ctx_switch_token, 1); + ctx->thread_ctx_switch_wait_token = 0; + hl_ctx_put(ctx); } - mutex_unlock(&hdev->fpriv_list_lock); - /* Finished tear-down, starting to re-initialize */ if (hard_reset) { hdev->device_cpu_disabled = false; - hdev->hard_reset_pending = false; + hdev->reset_info.hard_reset_pending = false; - if (hdev->reset_trigger_repeated && - (hdev->prev_reset_trigger == HL_RESET_FW_FATAL_ERR)) { + if (hdev->reset_info.reset_trigger_repeated && + (hdev->reset_info.prev_reset_trigger == + HL_DRV_RESET_FW_FATAL_ERR)) { /* if there 2 back to back resets from FW, * ensure driver puts the driver in a unusable state */ @@ -1204,7 +1178,7 @@ kill_processes: goto out_err; } - hdev->compute_ctx = NULL; + hdev->is_compute_ctx_active = false; rc = hl_ctx_init(hdev, hdev->kernel_ctx, true); if (rc) { @@ -1225,16 +1199,14 @@ kill_processes: rc = hdev->asic_funcs->hw_init(hdev); if (rc) { - dev_err(hdev->dev, - "failed to initialize the H/W after reset\n"); + dev_err(hdev->dev, "failed to initialize the H/W after reset\n"); goto out_err; } /* If device is not idle fail the reset process */ if (!hdev->asic_funcs->is_device_idle(hdev, idle_mask, HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)) { - dev_err(hdev->dev, - "device is not idle (mask 0x%llx_%llx) after reset\n", + dev_err(hdev->dev, "device is not idle (mask 0x%llx_%llx) after reset\n", idle_mask[1], idle_mask[0]); rc = -EIO; goto out_err; @@ -1243,43 +1215,56 @@ kill_processes: /* Check that the communication with the device is working */ rc = hdev->asic_funcs->test_queues(hdev); if (rc) { - dev_err(hdev->dev, - "Failed to detect if device is alive after reset\n"); + dev_err(hdev->dev, "Failed to detect if device is alive after reset\n"); goto out_err; } if (hard_reset) { rc = device_late_init(hdev); if (rc) { - dev_err(hdev->dev, - "Failed late init after hard reset\n"); + dev_err(hdev->dev, "Failed late init after hard reset\n"); goto out_err; } rc = hl_vm_init(hdev); if (rc) { - dev_err(hdev->dev, - "Failed to init memory module after hard reset\n"); + dev_err(hdev->dev, "Failed to init memory module after hard reset\n"); goto out_err; } hl_set_max_power(hdev); } else { - rc = hdev->asic_funcs->soft_reset_late_init(hdev); + rc = hdev->asic_funcs->non_hard_reset_late_init(hdev); if (rc) { - dev_err(hdev->dev, - "Failed late init after soft reset\n"); + if (reset_upon_device_release) + dev_err(hdev->dev, + "Failed late init in reset after device release\n"); + else + dev_err(hdev->dev, "Failed late init after soft reset\n"); goto out_err; } } - atomic_set(&hdev->in_reset, 0); - hdev->needs_reset = false; + spin_lock(&hdev->reset_info.lock); + hdev->reset_info.is_in_soft_reset = false; + + /* Schedule hard reset only if requested and if not already in hard reset. + * We keep 'in_reset' enabled, so no other reset can go in during the hard + * reset schedule + */ + if (!hard_reset && hdev->reset_info.hard_reset_schedule_flags) + schedule_hard_reset = true; + else + hdev->reset_info.in_reset = 0; + + spin_unlock(&hdev->reset_info.lock); + + hdev->reset_info.needs_reset = false; dev_notice(hdev->dev, "Successfully finished resetting the device\n"); if (hard_reset) { - hdev->hard_reset_cnt++; + hdev->reset_info.hard_reset_cnt++; /* After reset is done, we are ready to receive events from * the F/W. We can't do it before because we will ignore events @@ -1287,28 +1272,41 @@ kill_processes: * the device will be operational although it shouldn't be */ hdev->asic_funcs->enable_events_from_fw(hdev); - } else { - hdev->soft_reset_cnt++; + } else if (!reset_upon_device_release) { + hdev->reset_info.soft_reset_cnt++; + } + + if (schedule_hard_reset) { + dev_info(hdev->dev, "Performing hard reset scheduled during soft reset\n"); + flags = hdev->reset_info.hard_reset_schedule_flags; + hdev->reset_info.hard_reset_schedule_flags = 0; + hdev->disabled = true; + hard_reset = true; + handle_reset_trigger(hdev, flags); + goto again; } return 0; out_err: hdev->disabled = true; + hdev->reset_info.is_in_soft_reset = false; if (hard_reset) { - dev_err(hdev->dev, - "Failed to reset! Device is NOT usable\n"); - hdev->hard_reset_cnt++; + dev_err(hdev->dev, "Failed to reset! Device is NOT usable\n"); + hdev->reset_info.hard_reset_cnt++; + } else if (reset_upon_device_release) { + dev_err(hdev->dev, "Failed to reset device after user release\n"); + hard_reset = true; + goto again; } else { - dev_err(hdev->dev, - "Failed to do soft-reset, trying hard reset\n"); - hdev->soft_reset_cnt++; + dev_err(hdev->dev, "Failed to do soft-reset\n"); + hdev->reset_info.soft_reset_cnt++; hard_reset = true; goto again; } - atomic_set(&hdev->in_reset, 0); + hdev->reset_info.in_reset = 0; return rc; } @@ -1455,7 +1453,7 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) goto mmu_fini; } - hdev->compute_ctx = NULL; + hdev->is_compute_ctx_active = false; hdev->asic_funcs->state_dump_init(hdev); @@ -1619,6 +1617,7 @@ out_disabled: */ void hl_device_fini(struct hl_device *hdev) { + bool device_in_reset; ktime_t timeout; u64 reset_sec; int i, rc; @@ -1642,10 +1641,22 @@ void hl_device_fini(struct hl_device *hdev) */ timeout = ktime_add_us(ktime_get(), reset_sec * 1000 * 1000); - rc = atomic_cmpxchg(&hdev->in_reset, 0, 1); - while (rc) { + + spin_lock(&hdev->reset_info.lock); + device_in_reset = !!hdev->reset_info.in_reset; + if (!device_in_reset) + hdev->reset_info.in_reset = 1; + spin_unlock(&hdev->reset_info.lock); + + while (device_in_reset) { usleep_range(50, 200); - rc = atomic_cmpxchg(&hdev->in_reset, 0, 1); + + spin_lock(&hdev->reset_info.lock); + device_in_reset = !!hdev->reset_info.in_reset; + if (!device_in_reset) + hdev->reset_info.in_reset = 1; + spin_unlock(&hdev->reset_info.lock); + if (ktime_compare(ktime_get(), timeout) > 0) { dev_crit(hdev->dev, "Failed to remove device because reset function did not finish\n"); @@ -1667,7 +1678,7 @@ void hl_device_fini(struct hl_device *hdev) take_release_locks(hdev); - hdev->hard_reset_pending = true; + hdev->reset_info.hard_reset_pending = true; hl_hwmon_fini(hdev); @@ -1681,10 +1692,16 @@ void hl_device_fini(struct hl_device *hdev) "Waiting for all processes to exit (timeout of %u seconds)", HL_PENDING_RESET_LONG_SEC); - rc = device_kill_open_processes(hdev, HL_PENDING_RESET_LONG_SEC); + rc = device_kill_open_processes(hdev, HL_PENDING_RESET_LONG_SEC, false); if (rc) { dev_crit(hdev->dev, "Failed to kill all open processes\n"); - device_disable_open_processes(hdev); + device_disable_open_processes(hdev, false); + } + + rc = device_kill_open_processes(hdev, 0, true); + if (rc) { + dev_crit(hdev->dev, "Failed to kill all control device open processes\n"); + device_disable_open_processes(hdev, true); } hl_cb_pool_fini(hdev); @@ -1692,7 +1709,7 @@ void hl_device_fini(struct hl_device *hdev) /* Reset the H/W. It will be in idle state after this returns */ hdev->asic_funcs->hw_fini(hdev, true, false); - hdev->fw_loader.linux_loaded = false; + hdev->fw_loader.fw_comp_loaded = FW_TYPE_NONE; /* Release kernel context */ if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1)) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 4e68fb9d2a6b..6775c5c3166b 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -15,8 +15,6 @@ #define FW_FILE_MAX_SIZE 0x1400000 /* maximum size of 20MB */ -#define FW_CPU_STATUS_POLL_INTERVAL_USEC 10000 - static char *extract_fw_ver_from_str(const char *fw_str) { char *str, *fw_ver, *whitespace; @@ -214,7 +212,8 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, struct asic_fixed_properties *prop = &hdev->asic_prop; struct cpucp_packet *pkt; dma_addr_t pkt_dma_addr; - u32 tmp, expected_ack_val; + struct hl_bd *sent_bd; + u32 tmp, expected_ack_val, pi; int rc = 0; pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len, @@ -239,6 +238,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, /* set fence to a non valid value */ pkt->fence = cpu_to_le32(UINT_MAX); + pi = queue->pi; /* * The CPU queue is a synchronous queue with an effective depth of @@ -248,7 +248,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, * Which means that we don't need to lock the access to the entire H/W * queues module when submitting a JOB to the CPU queue. */ - hl_hw_queue_submit_bd(hdev, queue, 0, len, pkt_dma_addr); + hl_hw_queue_submit_bd(hdev, queue, hl_queue_inc_ptr(queue->pi), len, pkt_dma_addr); if (prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN) expected_ack_val = queue->pi; @@ -280,6 +280,14 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, *result = le64_to_cpu(pkt->result); } + /* Scrub previous buffer descriptor 'ctl' field which contains the + * previous PI value written during packet submission. + * We must do this or else F/W can read an old value upon queue wraparound. + */ + sent_bd = queue->kernel_address; + sent_bd += hl_pi_2_offset(pi); + sent_bd->ctl = cpu_to_le32(UINT_MAX); + out: mutex_unlock(&hdev->send_cpu_message_lock); @@ -445,15 +453,6 @@ static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val, err_exists = true; } - if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED) { - dev_warn(hdev->dev, - "Device boot warning - Skipped DRAM initialization\n"); - /* This is a warning so we don't want it to disable the - * device - */ - err_val &= ~CPU_BOOT_ERR0_DRAM_SKIPPED; - } - if (err_val & CPU_BOOT_ERR0_BMC_WAIT_SKIPPED) { if (hdev->bmc_enable) { dev_err(hdev->dev, @@ -497,15 +496,6 @@ static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val, err_exists = true; } - if (err_val & CPU_BOOT_ERR0_PRI_IMG_VER_FAIL) { - dev_warn(hdev->dev, - "Device boot warning - Failed to load preboot primary image\n"); - /* This is a warning so we don't want it to disable the - * device as we have a secondary preboot image - */ - err_val &= ~CPU_BOOT_ERR0_PRI_IMG_VER_FAIL; - } - if (err_val & CPU_BOOT_ERR0_SEC_IMG_VER_FAIL) { dev_err(hdev->dev, "Device boot error - Failed to load preboot secondary image\n"); err_exists = true; @@ -525,6 +515,34 @@ static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val, if (sts_val & CPU_BOOT_DEV_STS0_ENABLED) dev_dbg(hdev->dev, "Device status0 %#x\n", sts_val); + /* All warnings should go here in order not to reach the unknown error validation */ + if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED) { + dev_warn(hdev->dev, + "Device boot warning - Skipped DRAM initialization\n"); + /* This is a warning so we don't want it to disable the + * device + */ + err_val &= ~CPU_BOOT_ERR0_DRAM_SKIPPED; + } + + if (err_val & CPU_BOOT_ERR0_PRI_IMG_VER_FAIL) { + dev_warn(hdev->dev, + "Device boot warning - Failed to load preboot primary image\n"); + /* This is a warning so we don't want it to disable the + * device as we have a secondary preboot image + */ + err_val &= ~CPU_BOOT_ERR0_PRI_IMG_VER_FAIL; + } + + if (err_val & CPU_BOOT_ERR0_TPM_FAIL) { + dev_warn(hdev->dev, + "Device boot warning - TPM failure\n"); + /* This is a warning so we don't want it to disable the + * device + */ + err_val &= ~CPU_BOOT_ERR0_TPM_FAIL; + } + if (!err_exists && (err_val & ~CPU_BOOT_ERR0_ENABLED)) { dev_err(hdev->dev, "Device boot error - unknown ERR0 error 0x%08x\n", err_val); @@ -961,6 +979,7 @@ int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power) pkt.ctl = cpu_to_le32(CPUCP_PACKET_POWER_GET << CPUCP_PKT_CTL_OPCODE_SHIFT); + pkt.type = cpu_to_le16(CPUCP_POWER_INPUT); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), HL_CPUCP_INFO_TIMEOUT_USEC, &result); @@ -974,6 +993,92 @@ int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power) return rc; } +int hl_fw_dram_replaced_row_get(struct hl_device *hdev, + struct cpucp_hbm_row_info *info) +{ + struct cpucp_hbm_row_info *cpucp_repl_rows_info_cpu_addr; + dma_addr_t cpucp_repl_rows_info_dma_addr; + struct cpucp_packet pkt = {}; + u64 result; + int rc; + + cpucp_repl_rows_info_cpu_addr = + hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, + sizeof(struct cpucp_hbm_row_info), + &cpucp_repl_rows_info_dma_addr); + if (!cpucp_repl_rows_info_cpu_addr) { + dev_err(hdev->dev, + "Failed to allocate DMA memory for CPU-CP replaced rows info packet\n"); + return -ENOMEM; + } + + memset(cpucp_repl_rows_info_cpu_addr, 0, sizeof(struct cpucp_hbm_row_info)); + + pkt.ctl = cpu_to_le32(CPUCP_PACKET_HBM_REPLACED_ROWS_INFO_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); + pkt.addr = cpu_to_le64(cpucp_repl_rows_info_dma_addr); + pkt.data_max_size = cpu_to_le32(sizeof(struct cpucp_hbm_row_info)); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + HL_CPUCP_INFO_TIMEOUT_USEC, &result); + if (rc) { + dev_err(hdev->dev, + "Failed to handle CPU-CP replaced rows info pkt, error %d\n", rc); + goto out; + } + + memcpy(info, cpucp_repl_rows_info_cpu_addr, sizeof(*info)); + +out: + hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, + sizeof(struct cpucp_hbm_row_info), + cpucp_repl_rows_info_cpu_addr); + + return rc; +} + +int hl_fw_dram_pending_row_get(struct hl_device *hdev, u32 *pend_rows_num) +{ + struct cpucp_packet pkt; + u64 result; + int rc; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(CPUCP_PACKET_HBM_PENDING_ROWS_STATUS << CPUCP_PKT_CTL_OPCODE_SHIFT); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, &result); + if (rc) { + dev_err(hdev->dev, + "Failed to handle CPU-CP pending rows info pkt, error %d\n", rc); + goto out; + } + + *pend_rows_num = (u32) result; +out: + return rc; +} + +int hl_fw_cpucp_engine_core_asid_set(struct hl_device *hdev, u32 asid) +{ + struct cpucp_packet pkt; + int rc; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(CPUCP_PACKET_ENGINE_CORE_ASID_SET << CPUCP_PKT_CTL_OPCODE_SHIFT); + pkt.value = cpu_to_le64(asid); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + HL_CPUCP_INFO_TIMEOUT_USEC, NULL); + if (rc) + dev_err(hdev->dev, + "Failed on ASID configuration request for engine core, error %d\n", + rc); + + return rc; +} + void hl_fw_ask_hard_reset_without_linux(struct hl_device *hdev) { struct static_fw_load_mgr *static_loader = @@ -1028,7 +1133,7 @@ static void detect_cpu_boot_status(struct hl_device *hdev, u32 status) switch (status) { case CPU_BOOT_STATUS_NA: dev_err(hdev->dev, - "Device boot progress - BTL did NOT run\n"); + "Device boot progress - BTL/ROM did NOT run\n"); break; case CPU_BOOT_STATUS_IN_WFE: dev_err(hdev->dev, @@ -1101,9 +1206,8 @@ static int hl_fw_read_preboot_caps(struct hl_device *hdev, (status == CPU_BOOT_STATUS_DRAM_RDY) || (status == CPU_BOOT_STATUS_NIC_FW_RDY) || (status == CPU_BOOT_STATUS_READY_TO_BOOT) || - (status == CPU_BOOT_STATUS_SRAM_AVAIL) || (status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT), - FW_CPU_STATUS_POLL_INTERVAL_USEC, + hdev->fw_poll_interval_usec, timeout); if (rc) { @@ -1250,8 +1354,7 @@ static void hl_fw_preboot_update_state(struct hl_device *hdev) * 3. FW application - a. Fetch fw application security status * b. Check whether hard reset is done by fw app */ - prop->hard_reset_done_by_fw = - !!(cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN); + prop->hard_reset_done_by_fw = !!(cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN); dev_dbg(hdev->dev, "Firmware preboot boot device status0 %#x\n", cpu_boot_dev_sts0); @@ -1287,11 +1390,7 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg, { int rc; - /* pldm was added for cases in which we use preboot on pldm and want - * to load boot fit, but we can't wait for preboot because it runs - * very slowly - */ - if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU) || hdev->pldm) + if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU)) return 0; /* @@ -1437,7 +1536,7 @@ static int hl_fw_dynamic_wait_for_status(struct hl_device *hdev, le32_to_cpu(dyn_regs->cpu_cmd_status_to_host), status, FIELD_GET(COMMS_STATUS_STATUS_MASK, status) == expected_status, - FW_CPU_STATUS_POLL_INTERVAL_USEC, + hdev->fw_poll_interval_usec, timeout); if (rc) { @@ -1703,6 +1802,9 @@ static int hl_fw_dynamic_validate_descriptor(struct hl_device *hdev, return rc; } + /* here we can mark the descriptor as valid as the content has been validated */ + fw_loader->dynamic_loader.fw_desc_valid = true; + return 0; } @@ -1759,7 +1861,13 @@ static int hl_fw_dynamic_read_and_validate_descriptor(struct hl_device *hdev, return rc; } - /* extract address copy the descriptor from */ + /* + * extract address to copy the descriptor from + * in addition, as the descriptor value is going to be over-ridden by new data- we mark it + * as invalid. + * it will be marked again as valid once validated + */ + fw_loader->dynamic_loader.fw_desc_valid = false; src = hdev->pcie_bar[region->bar_id] + region->offset_in_bar + response->ram_offset; memcpy_fromio(fw_desc, src, sizeof(struct lkd_fw_comms_desc)); @@ -1920,17 +2028,15 @@ static void hl_fw_boot_fit_update_state(struct hl_device *hdev, { struct asic_fixed_properties *prop = &hdev->asic_prop; - /* Clear reset status since we need to read it again from boot CPU */ - prop->hard_reset_done_by_fw = false; + hdev->fw_loader.fw_comp_loaded |= FW_TYPE_BOOT_CPU; /* Read boot_cpu status bits */ if (prop->fw_preboot_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_ENABLED) { prop->fw_bootfit_cpu_boot_dev_sts0 = RREG32(cpu_boot_dev_sts0_reg); - if (prop->fw_bootfit_cpu_boot_dev_sts0 & - CPU_BOOT_DEV_STS0_FW_HARD_RST_EN) - prop->hard_reset_done_by_fw = true; + prop->hard_reset_done_by_fw = !!(prop->fw_bootfit_cpu_boot_dev_sts0 & + CPU_BOOT_DEV_STS0_FW_HARD_RST_EN); dev_dbg(hdev->dev, "Firmware boot CPU status0 %#x\n", prop->fw_bootfit_cpu_boot_dev_sts0); @@ -2055,14 +2161,21 @@ static int hl_fw_dynamic_wait_for_boot_fit_active(struct hl_device *hdev, dyn_loader = &fw_loader->dynamic_loader; - /* Make sure CPU boot-loader is running */ + /* + * Make sure CPU boot-loader is running + * Note that the CPU_BOOT_STATUS_SRAM_AVAIL is generally set by Linux + * yet there is a debug scenario in which we loading uboot (without Linux) + * which at later stage is relocated to DRAM. In this case we expect + * uboot to set the CPU_BOOT_STATUS_SRAM_AVAIL and so we add it to the + * poll flags + */ rc = hl_poll_timeout( hdev, le32_to_cpu(dyn_loader->comm_desc.cpu_dyn_regs.cpu_boot_status), status, - (status == CPU_BOOT_STATUS_NIC_FW_RDY) || - (status == CPU_BOOT_STATUS_READY_TO_BOOT), - FW_CPU_STATUS_POLL_INTERVAL_USEC, + (status == CPU_BOOT_STATUS_READY_TO_BOOT) || + (status == CPU_BOOT_STATUS_SRAM_AVAIL), + hdev->fw_poll_interval_usec, dyn_loader->wait_for_bl_timeout); if (rc) { dev_err(hdev->dev, "failed to wait for boot\n"); @@ -2082,14 +2195,14 @@ static int hl_fw_dynamic_wait_for_linux_active(struct hl_device *hdev, dyn_loader = &fw_loader->dynamic_loader; - /* Make sure CPU boot-loader is running */ + /* Make sure CPU linux is running */ rc = hl_poll_timeout( hdev, le32_to_cpu(dyn_loader->comm_desc.cpu_dyn_regs.cpu_boot_status), status, (status == CPU_BOOT_STATUS_SRAM_AVAIL), - FW_CPU_STATUS_POLL_INTERVAL_USEC, + hdev->fw_poll_interval_usec, fw_loader->cpu_timeout); if (rc) { dev_err(hdev->dev, "failed to wait for Linux\n"); @@ -2121,18 +2234,14 @@ static void hl_fw_linux_update_state(struct hl_device *hdev, { struct asic_fixed_properties *prop = &hdev->asic_prop; - hdev->fw_loader.linux_loaded = true; - - /* Clear reset status since we need to read again from app */ - prop->hard_reset_done_by_fw = false; + hdev->fw_loader.fw_comp_loaded |= FW_TYPE_LINUX; /* Read FW application security bits */ if (prop->fw_cpu_boot_dev_sts0_valid) { prop->fw_app_cpu_boot_dev_sts0 = RREG32(cpu_boot_dev_sts0_reg); - if (prop->fw_app_cpu_boot_dev_sts0 & - CPU_BOOT_DEV_STS0_FW_HARD_RST_EN) - prop->hard_reset_done_by_fw = true; + prop->hard_reset_done_by_fw = !!(prop->fw_app_cpu_boot_dev_sts0 & + CPU_BOOT_DEV_STS0_FW_HARD_RST_EN); if (prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_GIC_PRIVILEGED_EN) @@ -2247,6 +2356,9 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev, dev_info(hdev->dev, "Loading firmware to device, may take some time...\n"); + /* initialize FW descriptor as invalid */ + fw_loader->dynamic_loader.fw_desc_valid = false; + /* * In this stage, "cpu_dyn_regs" contains only LKD's hard coded values! * It will be updated from FW after hl_fw_dynamic_request_descriptor(). @@ -2259,14 +2371,14 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev, if (rc) goto protocol_err; - if (hdev->curr_reset_cause) { + if (hdev->reset_info.curr_reset_cause) { rc = hl_fw_dynamic_send_msg(hdev, fw_loader, - HL_COMMS_RESET_CAUSE_TYPE, &hdev->curr_reset_cause); + HL_COMMS_RESET_CAUSE_TYPE, &hdev->reset_info.curr_reset_cause); if (rc) goto protocol_err; /* Clear current reset cause */ - hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN; + hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN; } if (!(hdev->fw_components & FW_TYPE_BOOT_CPU)) { @@ -2288,6 +2400,15 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev, goto protocol_err; } + /* + * when testing FW load (without Linux) on PLDM we don't want to + * wait until boot fit is active as it may take several hours. + * instead, we load the bootfit and let it do all initializations in + * the background. + */ + if (hdev->pldm && !(hdev->fw_components & FW_TYPE_LINUX)) + return 0; + rc = hl_fw_dynamic_wait_for_boot_fit_active(hdev, fw_loader); if (rc) goto protocol_err; @@ -2333,7 +2454,8 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev, return 0; protocol_err: - fw_read_errors(hdev, le32_to_cpu(dyn_regs->cpu_boot_err0), + if (fw_loader->dynamic_loader.fw_desc_valid) + fw_read_errors(hdev, le32_to_cpu(dyn_regs->cpu_boot_err0), le32_to_cpu(dyn_regs->cpu_boot_err1), le32_to_cpu(dyn_regs->cpu_boot_dev_sts0), le32_to_cpu(dyn_regs->cpu_boot_dev_sts1)); @@ -2380,7 +2502,7 @@ static int hl_fw_static_init_cpu(struct hl_device *hdev, cpu_boot_status_reg, status, status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT, - FW_CPU_STATUS_POLL_INTERVAL_USEC, + hdev->fw_poll_interval_usec, fw_loader->boot_fit_timeout); if (rc) { @@ -2403,7 +2525,7 @@ static int hl_fw_static_init_cpu(struct hl_device *hdev, cpu_msg_status_reg, status, status == CPU_MSG_OK, - FW_CPU_STATUS_POLL_INTERVAL_USEC, + hdev->fw_poll_interval_usec, fw_loader->boot_fit_timeout); if (rc) { @@ -2416,7 +2538,14 @@ static int hl_fw_static_init_cpu(struct hl_device *hdev, WREG32(msg_to_cpu_reg, KMD_MSG_NA); } - /* Make sure CPU boot-loader is running */ + /* + * Make sure CPU boot-loader is running + * Note that the CPU_BOOT_STATUS_SRAM_AVAIL is generally set by Linux + * yet there is a debug scenario in which we loading uboot (without Linux) + * which at later stage is relocated to DRAM. In this case we expect + * uboot to set the CPU_BOOT_STATUS_SRAM_AVAIL and so we add it to the + * poll flags + */ rc = hl_poll_timeout( hdev, cpu_boot_status_reg, @@ -2425,7 +2554,7 @@ static int hl_fw_static_init_cpu(struct hl_device *hdev, (status == CPU_BOOT_STATUS_NIC_FW_RDY) || (status == CPU_BOOT_STATUS_READY_TO_BOOT) || (status == CPU_BOOT_STATUS_SRAM_AVAIL), - FW_CPU_STATUS_POLL_INTERVAL_USEC, + hdev->fw_poll_interval_usec, cpu_timeout); dev_dbg(hdev->dev, "uboot status = %d\n", status); @@ -2474,7 +2603,7 @@ static int hl_fw_static_init_cpu(struct hl_device *hdev, cpu_boot_status_reg, status, (status == CPU_BOOT_STATUS_BMC_WAITING_SKIPPED), - FW_CPU_STATUS_POLL_INTERVAL_USEC, + hdev->fw_poll_interval_usec, cpu_timeout); if (rc) { @@ -2494,7 +2623,7 @@ static int hl_fw_static_init_cpu(struct hl_device *hdev, cpu_boot_status_reg, status, (status == CPU_BOOT_STATUS_SRAM_AVAIL), - FW_CPU_STATUS_POLL_INTERVAL_USEC, + hdev->fw_poll_interval_usec, cpu_timeout); /* Clear message */ diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index a2002cbf794b..cb710fd478b6 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 * - * Copyright 2016-2019 HabanaLabs, Ltd. + * Copyright 2016-2021 HabanaLabs, Ltd. * All Rights Reserved. * */ @@ -61,6 +61,8 @@ #define HL_CPUCP_INFO_TIMEOUT_USEC 10000000 /* 10s */ #define HL_CPUCP_EEPROM_TIMEOUT_USEC 10000000 /* 10s */ +#define HL_FW_STATUS_POLL_INTERVAL_USEC 10000 /* 10ms */ + #define HL_PCI_ELBI_TIMEOUT_MSEC 10 /* 10ms */ #define HL_SIM_MAX_TIMEOUT_US 10000000 /* 10s */ @@ -117,37 +119,37 @@ enum hl_mmu_page_table_location { /* * Reset Flags * - * - HL_RESET_HARD + * - HL_DRV_RESET_HARD * If set do hard reset to all engines. If not set reset just * compute/DMA engines. * - * - HL_RESET_FROM_RESET_THREAD + * - HL_DRV_RESET_FROM_RESET_THR * Set if the caller is the hard-reset thread * - * - HL_RESET_HEARTBEAT + * - HL_DRV_RESET_HEARTBEAT * Set if reset is due to heartbeat * - * - HL_RESET_TDR + * - HL_DRV_RESET_TDR * Set if reset is due to TDR * - * - HL_RESET_DEVICE_RELEASE + * - HL_DRV_RESET_DEV_RELEASE * Set if reset is due to device release * - * - HL_RESET_FW + * - HL_DRV_RESET_BYPASS_REQ_TO_FW * F/W will perform the reset. No need to ask it to reset the device. This is relevant * only when running with secured f/w * - * - HL_RESET_FW_FATAL_ERR + * - HL_DRV_RESET_FW_FATAL_ERR * Set if reset is due to a fatal error from FW */ -#define HL_RESET_HARD (1 << 0) -#define HL_RESET_FROM_RESET_THREAD (1 << 1) -#define HL_RESET_HEARTBEAT (1 << 2) -#define HL_RESET_TDR (1 << 3) -#define HL_RESET_DEVICE_RELEASE (1 << 4) -#define HL_RESET_FW (1 << 5) -#define HL_RESET_FW_FATAL_ERR (1 << 6) +#define HL_DRV_RESET_HARD (1 << 0) +#define HL_DRV_RESET_FROM_RESET_THR (1 << 1) +#define HL_DRV_RESET_HEARTBEAT (1 << 2) +#define HL_DRV_RESET_TDR (1 << 3) +#define HL_DRV_RESET_DEV_RELEASE (1 << 4) +#define HL_DRV_RESET_BYPASS_REQ_TO_FW (1 << 5) +#define HL_DRV_RESET_FW_FATAL_ERR (1 << 6) #define HL_MAX_SOBS_PER_MONITOR 8 @@ -219,6 +221,7 @@ enum hl_fw_component { /** * enum hl_fw_types - F/W types present in the system + * @FW_TYPE_NONE: no FW component indication * @FW_TYPE_LINUX: Linux image for device CPU * @FW_TYPE_BOOT_CPU: Boot image for device CPU * @FW_TYPE_PREBOOT_CPU: Indicates pre-loaded CPUs are present in the system @@ -226,6 +229,7 @@ enum hl_fw_component { * @FW_TYPE_ALL_TYPES: Mask for all types */ enum hl_fw_types { + FW_TYPE_NONE = 0x0, FW_TYPE_LINUX = 0x1, FW_TYPE_BOOT_CPU = 0x2, FW_TYPE_PREBOOT_CPU = 0x4, @@ -353,6 +357,21 @@ enum vm_type { }; /** + * enum mmu_op_flags - mmu operation relevant information. + * @MMU_OP_USERPTR: operation on user memory (host resident). + * @MMU_OP_PHYS_PACK: operation on DRAM (device resident). + * @MMU_OP_CLEAR_MEMCACHE: operation has to clear memcache. + * @MMU_OP_SKIP_LOW_CACHE_INV: operation is allowed to skip parts of cache invalidation. + */ +enum mmu_op_flags { + MMU_OP_USERPTR = 0x1, + MMU_OP_PHYS_PACK = 0x2, + MMU_OP_CLEAR_MEMCACHE = 0x4, + MMU_OP_SKIP_LOW_CACHE_INV = 0x8, +}; + + +/** * enum hl_device_hw_state - H/W device state. use this to understand whether * to do reset before hw_init or not * @HL_DEVICE_HW_STATE_CLEAN: H/W state is clean. i.e. after hard reset @@ -382,6 +401,7 @@ enum hl_device_hw_state { * @hop3_mask: mask to get the PTE address in hop 3. * @hop4_mask: mask to get the PTE address in hop 4. * @hop5_mask: mask to get the PTE address in hop 5. + * @last_mask: mask to get the bit indicating this is the last hop. * @page_size: default page size used to allocate memory. * @num_hops: The amount of hops supported by the translation table. * @host_resident: Should the MMU page table reside in host memory or in the @@ -402,6 +422,7 @@ struct hl_mmu_properties { u64 hop3_mask; u64 hop4_mask; u64 hop5_mask; + u64 last_mask; u32 page_size; u32 num_hops; u8 host_resident; @@ -524,6 +545,15 @@ struct hl_hints_range { * @dynamic_fw_load: is dynamic FW load is supported. * @gic_interrupts_enable: true if FW is not blocking GIC controller, * false otherwise. + * @use_get_power_for_reset_history: To support backward compatibility for Goya + * and Gaudi + * @supports_soft_reset: is soft reset supported. + * @allow_inference_soft_reset: true if the ASIC supports soft reset that is + * initiated by user or TDR. This is only true + * in inference ASICs, as there is no real-world + * use-case of doing soft-reset in training (due + * to the fact that training runs on multiple + * devices) */ struct asic_fixed_properties { struct hw_queue_properties *hw_queues_props; @@ -604,6 +634,9 @@ struct asic_fixed_properties { u8 iatu_done_by_fw; u8 dynamic_fw_load; u8 gic_interrupts_enable; + u8 use_get_power_for_reset_history; + u8 supports_soft_reset; + u8 allow_inference_soft_reset; }; /** @@ -852,10 +885,15 @@ struct hl_user_interrupt { * pending on an interrupt * @wait_list_node: node in the list of user threads pending on an interrupt * @fence: hl fence object for interrupt completion + * @cq_target_value: CQ target value + * @cq_kernel_addr: CQ kernel address, to be used in the cq interrupt + * handler for taget value comparison */ struct hl_user_pending_interrupt { struct list_head wait_list_node; struct hl_fence fence; + u64 cq_target_value; + u64 *cq_kernel_addr; }; /** @@ -1010,6 +1048,7 @@ struct fw_response { * @image_region: region to copy the FW image to * @fw_image_size: size of FW image to load * @wait_for_bl_timeout: timeout for waiting for boot loader to respond + * @fw_desc_valid: true if FW descriptor has been validated and hence the data can be used */ struct dynamic_fw_load_mgr { struct fw_response response; @@ -1017,6 +1056,7 @@ struct dynamic_fw_load_mgr { struct pci_mem_region *image_region; size_t fw_image_size; u32 wait_for_bl_timeout; + bool fw_desc_valid; }; /** @@ -1042,7 +1082,8 @@ struct fw_image_props { * @skip_bmc: should BMC be skipped * @sram_bar_id: SRAM bar ID * @dram_bar_id: DRAM bar ID - * @linux_loaded: true if linux was loaded so far + * @fw_comp_loaded: bitmask of loaded FW components. set bit meaning loaded + * component. values are set according to enum hl_fw_types. */ struct fw_load_mgr { union { @@ -1056,7 +1097,7 @@ struct fw_load_mgr { u8 skip_bmc; u8 sram_bar_id; u8 dram_bar_id; - u8 linux_loaded; + u8 fw_comp_loaded; }; /** @@ -1128,7 +1169,7 @@ struct fw_load_mgr { * @disable_clock_gating: disable clock gating completely * @debug_coresight: perform certain actions on Coresight for debugging. * @is_device_idle: return true if device is idle, false otherwise. - * @soft_reset_late_init: perform certain actions needed after soft reset. + * @non_hard_reset_late_init: perform certain actions needed after a reset which is not hard-reset * @hw_queues_lock: acquire H/W queues lock. * @hw_queues_unlock: release H/W queues lock. * @get_pci_id: retrieve PCI ID. @@ -1261,10 +1302,10 @@ struct hl_asic_funcs { int (*send_heartbeat)(struct hl_device *hdev); void (*set_clock_gating)(struct hl_device *hdev); void (*disable_clock_gating)(struct hl_device *hdev); - int (*debug_coresight)(struct hl_device *hdev, void *data); + int (*debug_coresight)(struct hl_device *hdev, struct hl_ctx *ctx, void *data); bool (*is_device_idle)(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, struct seq_file *s); - int (*soft_reset_late_init)(struct hl_device *hdev); + int (*non_hard_reset_late_init)(struct hl_device *hdev); void (*hw_queues_lock)(struct hl_device *hdev); void (*hw_queues_unlock)(struct hl_device *hdev); u32 (*get_pci_id)(struct hl_device *hdev); @@ -1276,7 +1317,7 @@ struct hl_asic_funcs { int (*init_iatu)(struct hl_device *hdev); u32 (*rreg)(struct hl_device *hdev, u32 reg); void (*wreg)(struct hl_device *hdev, u32 reg, u32 val); - void (*halt_coresight)(struct hl_device *hdev); + void (*halt_coresight)(struct hl_device *hdev, struct hl_ctx *ctx); int (*ctx_init)(struct hl_ctx *ctx); void (*ctx_fini)(struct hl_ctx *ctx); int (*get_clk_rate)(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk); @@ -1518,6 +1559,9 @@ struct hl_userptr { * @submission_time_jiffies: submission time of the cs * @type: CS_TYPE_*. * @encaps_sig_hdl_id: encaps signals handle id, set for the first staged cs. + * @sob_addr_offset: sob offset from the configuration base address. + * @initial_sob_count: count of completed signals in SOB before current submission of signal or + * cs with encaps signals. * @submitted: true if CS was submitted to H/W. * @completed: true if CS was completed by device. * @timedout : true if CS was timedout. @@ -1553,6 +1597,8 @@ struct hl_cs { u64 submission_time_jiffies; enum hl_cs_type type; u32 encaps_sig_hdl_id; + u32 sob_addr_offset; + u16 initial_sob_count; u8 submitted; u8 completed; u8 timedout; @@ -1792,7 +1838,6 @@ struct hl_debug_params { * @dev_node: node in the device list of file private data * @refcount: number of related contexts. * @restore_phase_mutex: lock for context switch and restore phase. - * @is_control: true for control device, false otherwise */ struct hl_fpriv { struct hl_device *hdev; @@ -1805,7 +1850,6 @@ struct hl_fpriv { struct list_head dev_node; struct kref refcount; struct mutex restore_phase_mutex; - u8 is_control; }; @@ -1864,6 +1908,7 @@ struct hl_debugfs_entry { * @i2c_bus: generic u8 debugfs file for bus value to use in i2c_data_read. * @i2c_addr: generic u8 debugfs file for address value to use in i2c_data_read. * @i2c_reg: generic u8 debugfs file for register value to use in i2c_data_read. + * @i2c_len: generic u8 debugfs file for length value to use in i2c_data_read. */ struct hl_dbg_device_entry { struct dentry *root; @@ -1892,6 +1937,7 @@ struct hl_dbg_device_entry { u8 i2c_bus; u8 i2c_addr; u8 i2c_reg; + u8 i2c_len; }; /** @@ -2180,13 +2226,13 @@ struct hwmon_chip_info; * @wq: work queue for device reset procedure. * @reset_work: reset work to be done. * @hdev: habanalabs device structure. - * @fw_reset: whether f/w will do the reset without us sending them a message to do it. + * @flags: reset flags. */ struct hl_device_reset_work { struct workqueue_struct *wq; struct delayed_work reset_work; struct hl_device *hdev; - bool fw_reset; + u32 flags; }; /** @@ -2328,12 +2374,10 @@ struct multi_cs_completion { * @ctx: pointer to the context structure * @fence_arr: array of fences of all CSs * @seq_arr: array of CS sequence numbers - * @timeout_us: timeout in usec for waiting for CS to complete + * @timeout_jiffies: timeout in jiffies for waiting for CS to complete * @timestamp: timestamp of first completed CS * @wait_status: wait for CS status * @completion_bitmap: bitmap of completed CSs (1- completed, otherwise 0) - * @stream_master_qid_map: bitmap of all stream master QIDs on which the - * multi-CS is waiting * @arr_len: fence_arr and seq_arr array length * @gone_cs: indication of gone CS (1- there was gone CS, otherwise 0) * @update_ts: update timestamp. 1- update the timestamp, otherwise 0. @@ -2342,17 +2386,114 @@ struct multi_cs_data { struct hl_ctx *ctx; struct hl_fence **fence_arr; u64 *seq_arr; - s64 timeout_us; + s64 timeout_jiffies; s64 timestamp; long wait_status; u32 completion_bitmap; - u32 stream_master_qid_map; u8 arr_len; u8 gone_cs; u8 update_ts; }; /** + * struct hl_clk_throttle_timestamp - current/last clock throttling timestamp + * @start: timestamp taken when 'start' event is received in driver + * @end: timestamp taken when 'end' event is received in driver + */ +struct hl_clk_throttle_timestamp { + ktime_t start; + ktime_t end; +}; + +/** + * struct hl_clk_throttle - keeps current/last clock throttling timestamps + * @timestamp: timestamp taken by driver and firmware, index 0 refers to POWER + * index 1 refers to THERMAL + * @lock: protects this structure as it can be accessed from both event queue + * context and info_ioctl context + * @current_reason: bitmask represents the current clk throttling reasons + * @aggregated_reason: bitmask represents aggregated clk throttling reasons since driver load + */ +struct hl_clk_throttle { + struct hl_clk_throttle_timestamp timestamp[HL_CLK_THROTTLE_TYPE_MAX]; + struct mutex lock; + u32 current_reason; + u32 aggregated_reason; +}; + +/** + * struct last_error_session_info - info about last session in which CS timeout or + * razwi error occurred. + * @open_dev_timestamp: device open timestamp. + * @cs_timeout_timestamp: CS timeout timestamp. + * @razwi_timestamp: razwi timestamp. + * @cs_write_disable: if set writing to CS parameters in the structure is disabled so the + * first (root cause) CS timeout will not be overwritten. + * @razwi_write_disable: if set writing to razwi parameters in the structure is disabled so the + * first (root cause) razwi will not be overwritten. + * @cs_timeout_seq: CS timeout sequence number. + * @razwi_addr: address that caused razwi. + * @razwi_engine_id_1: engine id of the razwi initiator, if it was initiated by engine that does + * not have engine id it will be set to U16_MAX. + * @razwi_engine_id_2: second engine id of razwi initiator. Might happen that razwi have 2 possible + * engines which one them caused the razwi. In that case, it will contain the + * second possible engine id, otherwise it will be set to U16_MAX. + * @razwi_non_engine_initiator: in case the initiator of the razwi does not have engine id. + * @razwi_type: cause of razwi, page fault or access error, otherwise it will be set to U8_MAX. + */ +struct last_error_session_info { + ktime_t open_dev_timestamp; + ktime_t cs_timeout_timestamp; + ktime_t razwi_timestamp; + atomic_t cs_write_disable; + atomic_t razwi_write_disable; + u64 cs_timeout_seq; + u64 razwi_addr; + u16 razwi_engine_id_1; + u16 razwi_engine_id_2; + u8 razwi_non_engine_initiator; + u8 razwi_type; +}; + +/** + * struct hl_reset_info - holds current device reset information. + * @lock: lock to protect critical reset flows. + * @soft_reset_cnt: number of soft reset since the driver was loaded. + * @hard_reset_cnt: number of hard reset since the driver was loaded. + * @hard_reset_schedule_flags: hard reset is scheduled to after current soft reset, + * here we hold the hard reset flags. + * @in_reset: is device in reset flow. + * @is_in_soft_reset: Device is currently in soft reset process. + * @needs_reset: true if reset_on_lockup is false and device should be reset + * due to lockup. + * @hard_reset_pending: is there a hard reset work pending. + * @curr_reset_cause: saves an enumerated reset cause when a hard reset is + * triggered, and cleared after it is shared with preboot. + * @prev_reset_trigger: saves the previous trigger which caused a reset, overidden + * with a new value on next reset + * @reset_trigger_repeated: set if device reset is triggered more than once with + * same cause. + * @skip_reset_on_timeout: Skip device reset if CS has timed out, wait for it to + * complete instead. + */ +struct hl_reset_info { + spinlock_t lock; + u32 soft_reset_cnt; + u32 hard_reset_cnt; + u32 hard_reset_schedule_flags; + u8 in_reset; + u8 is_in_soft_reset; + u8 needs_reset; + u8 hard_reset_pending; + + u8 curr_reset_cause; + u8 prev_reset_trigger; + u8 reset_trigger_repeated; + + u8 skip_reset_on_timeout; +}; + +/** * struct hl_device - habanalabs device structure. * @pdev: pointer to PCI device, can be NULL in case of simulator device. * @pcie_bar_phys: array of available PCIe bars physical addresses. @@ -2363,7 +2504,6 @@ struct multi_cs_data { * @cdev_ctrl: char device for control operations only (INFO IOCTL) * @dev: related kernel basic device structure. * @dev_ctrl: related kernel device structure for the control device - * @work_freq: delayed work to lower device frequency if possible. * @work_heartbeat: delayed work for CPU-CP is-alive check. * @device_reset_work: delayed work which performs hard reset * @asic_name: ASIC specific name. @@ -2398,7 +2538,6 @@ struct multi_cs_data { * @asic_specific: ASIC specific information to use only from ASIC files. * @vm: virtual memory manager for MMU. * @hwmon_dev: H/W monitor device. - * @pm_mng_profile: current power management profile. * @hl_chip_info: ASIC's sensors information. * @device_status_description: device status description. * @hl_debugfs: device's debugfs manager. @@ -2410,8 +2549,10 @@ struct multi_cs_data { * @internal_cb_va_base: internal cb pool mmu virtual address base * @fpriv_list: list of file private data structures. Each structure is created * when a user opens the device + * @fpriv_ctrl_list: list of file private data structures. Each structure is created + * when a user opens the control device * @fpriv_list_lock: protects the fpriv_list - * @compute_ctx: current compute context executing. + * @fpriv_ctrl_list_lock: protects the fpriv_ctrl_list * @aggregated_cs_counters: aggregated cs counters among all contexts * @mmu_priv: device-specific MMU data. * @mmu_func: device-related MMU functions. @@ -2419,6 +2560,10 @@ struct multi_cs_data { * @pci_mem_region: array of memory regions in the PCI * @state_dump_specs: constants and dictionaries needed to dump system state. * @multi_cs_completion: array of multi-CS completion. + * @clk_throttling: holds information about current/previous clock throttling events + * @reset_info: holds current device reset information. + * @last_error: holds information about last session in which CS timeout or razwi error occurred. + * @stream_master_qid_arr: pointer to array with QIDs of master streams. * @dram_used_mem: current DRAM memory consumption. * @timeout_jiffies: device CS timeout value. * @max_power: the max power of the device, as configured by the sysadmin. This @@ -2434,20 +2579,17 @@ struct multi_cs_data { * device initialization. Mainly used to debug and * workaround firmware bugs * @dram_pci_bar_start: start bus address of PCIe bar towards DRAM. + * @last_successful_open_ktime: timestamp (ktime) of the last successful device open. * @last_successful_open_jif: timestamp (jiffies) of the last successful * device open. * @last_open_session_duration_jif: duration (jiffies) of the last device open * session. * @open_counter: number of successful device open operations. - * @in_reset: is device in reset flow. - * @curr_pll_profile: current PLL profile. + * @fw_poll_interval_usec: FW status poll interval in usec. * @card_type: Various ASICs have several card types. This indicates the card * type of the current device. * @major: habanalabs kernel driver major. * @high_pll: high PLL profile frequency. - * @soft_reset_cnt: number of soft reset since the driver was loaded. - * @hard_reset_cnt: number of hard reset since the driver was loaded. - * @clk_throttling_reason: bitmask represents the current clk throttling reasons * @id: device minor. * @id_control: minor of the control device * @cpu_pci_msb_addr: 50-bit extension bits for the device CPU's 40-bit @@ -2455,7 +2597,6 @@ struct multi_cs_data { * @disabled: is device disabled. * @late_init_done: is late init stage was done during initialization. * @hwmon_initialized: is H/W monitor sensors was initialized. - * @hard_reset_pending: is there a hard reset work pending. * @heartbeat: is heartbeat sanity check towards CPU-CP enabled. * @reset_on_lockup: true if a reset should be done in case of stuck CS, false * otherwise. @@ -2467,8 +2608,9 @@ struct multi_cs_data { * @init_done: is the initialization of the device done. * @device_cpu_disabled: is the device CPU disabled (due to timeouts) * @dma_mask: the dma mask that was set for this device - * @in_debug: is device under debug. This, together with fpriv_list, enforces - * that only a single user is configuring the debug infrastructure. + * @in_debug: whether the device is in a state where the profiling/tracing infrastructure + * can be used. This indication is needed because in some ASICs we need to do + * specific operations to enable that infrastructure. * @power9_64bit_dma_enable: true to enable 64-bit DMA mask support. Relevant * only to POWER9 machines. * @cdev_sysfs_created: were char devices and sysfs nodes created. @@ -2477,34 +2619,18 @@ struct multi_cs_data { * @sync_stream_queue_idx: helper index for sync stream queues initialization. * @collective_mon_idx: helper index for collective initialization * @supports_coresight: is CoreSight supported. - * @supports_soft_reset: is soft reset supported. - * @allow_inference_soft_reset: true if the ASIC supports soft reset that is - * initiated by user or TDR. This is only true - * in inference ASICs, as there is no real-world - * use-case of doing soft-reset in training (due - * to the fact that training runs on multiple - * devices) * @supports_cb_mapping: is mapping a CB to the device's MMU supported. - * @needs_reset: true if reset_on_lockup is false and device should be reset - * due to lockup. * @process_kill_trial_cnt: number of trials reset thread tried killing * user processes * @device_fini_pending: true if device_fini was called and might be * waiting for the reset thread to finish * @supports_staged_submission: true if staged submissions are supported - * @curr_reset_cause: saves an enumerated reset cause when a hard reset is - * triggered, and cleared after it is shared with preboot. - * @prev_reset_trigger: saves the previous trigger which caused a reset, overidden - * with a new value on next reset - * @reset_trigger_repeated: set if device reset is triggered more than once with - * same cause. - * @skip_reset_on_timeout: Skip device reset if CS has timed out, wait for it to - * complete instead. * @device_cpu_is_halted: Flag to indicate whether the device CPU was already * halted. We can't halt it again because the COMMS * protocol will throw an error. Relevant only for * cases where Linux was not loaded to device CPU * @supports_wait_for_multi_cs: true if wait for multi CS is supported + * @is_compute_ctx_active: Whether there is an active compute context executing. */ struct hl_device { struct pci_dev *pdev; @@ -2515,7 +2641,6 @@ struct hl_device { struct cdev cdev_ctrl; struct device *dev; struct device *dev_ctrl; - struct delayed_work work_freq; struct delayed_work work_heartbeat; struct hl_device_reset_work device_reset_work; char asic_name[HL_STR_MAX]; @@ -2546,7 +2671,6 @@ struct hl_device { void *asic_specific; struct hl_vm vm; struct device *hwmon_dev; - enum hl_pm_mng_profile pm_mng_profile; struct hwmon_chip_info *hl_chip_info; struct hl_dbg_device_entry hl_debugfs; @@ -2560,9 +2684,9 @@ struct hl_device { u64 internal_cb_va_base; struct list_head fpriv_list; + struct list_head fpriv_ctrl_list; struct mutex fpriv_list_lock; - - struct hl_ctx *compute_ctx; + struct mutex fpriv_ctrl_list_lock; struct hl_cs_counters_atomic aggregated_cs_counters; @@ -2577,6 +2701,11 @@ struct hl_device { struct multi_cs_completion multi_cs_completion[ MULTI_CS_MAX_USER_CTX]; + struct hl_clk_throttle clk_throttling; + struct last_error_session_info last_error; + + struct hl_reset_info reset_info; + u32 *stream_master_qid_arr; atomic64_t dram_used_mem; u64 timeout_jiffies; @@ -2587,21 +2716,17 @@ struct hl_device { u64 last_successful_open_jif; u64 last_open_session_duration_jif; u64 open_counter; - atomic_t in_reset; - enum hl_pll_frequency curr_pll_profile; + u64 fw_poll_interval_usec; + ktime_t last_successful_open_ktime; enum cpucp_card_types card_type; u32 major; u32 high_pll; - u32 soft_reset_cnt; - u32 hard_reset_cnt; - u32 clk_throttling_reason; u16 id; u16 id_control; u16 cpu_pci_msb_addr; u8 disabled; u8 late_init_done; u8 hwmon_initialized; - u8 hard_reset_pending; u8 heartbeat; u8 reset_on_lockup; u8 dram_default_page_mapping; @@ -2618,20 +2743,14 @@ struct hl_device { u8 sync_stream_queue_idx; u8 collective_mon_idx; u8 supports_coresight; - u8 supports_soft_reset; - u8 allow_inference_soft_reset; u8 supports_cb_mapping; - u8 needs_reset; u8 process_kill_trial_cnt; u8 device_fini_pending; u8 supports_staged_submission; - u8 curr_reset_cause; - u8 prev_reset_trigger; - u8 reset_trigger_repeated; - u8 skip_reset_on_timeout; u8 device_cpu_is_halted; u8 supports_wait_for_multi_cs; u8 stream_master_qid_arr_size; + u8 is_compute_ctx_active; /* Parameters for bring-up */ u64 nic_ports_mask; @@ -2659,6 +2778,7 @@ struct hl_device { * wait cs are used to wait of the reserved encaps signals. * @hdev: pointer to habanalabs device structure. * @hw_sob: pointer to H/W SOB used in the reservation. + * @ctx: pointer to the user's context data structure * @cs_seq: staged cs sequence which contains encapsulated signals * @id: idr handler id to be used to fetch the handler info * @q_idx: stream queue index @@ -2669,6 +2789,7 @@ struct hl_cs_encaps_sig_handle { struct kref refcount; struct hl_device *hdev; struct hl_hw_sob *hw_sob; + struct hl_ctx *ctx; u64 cs_seq; u32 id; u32 q_idx; @@ -2757,21 +2878,9 @@ static inline bool hl_mem_area_inside_range(u64 address, u64 size, static inline bool hl_mem_area_crosses_range(u64 address, u32 size, u64 range_start_address, u64 range_end_address) { - u64 end_address = address + size; + u64 end_address = address + size - 1; - if ((address >= range_start_address) && - (address < range_end_address)) - return true; - - if ((end_address >= range_start_address) && - (end_address < range_end_address)) - return true; - - if ((address < range_start_address) && - (end_address >= range_end_address)) - return true; - - return false; + return ((address <= range_end_address) && (range_start_address <= end_address)); } int hl_device_open(struct inode *inode, struct file *filp); @@ -2779,10 +2888,7 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp); bool hl_device_operational(struct hl_device *hdev, enum hl_device_status *status); enum hl_device_status hl_device_status(struct hl_device *hdev); -int hl_device_set_debug_mode(struct hl_device *hdev, bool enable); -int create_hdev(struct hl_device **dev, struct pci_dev *pdev, - enum hl_asic_type asic_type, int minor); -void destroy_hdev(struct hl_device *hdev); +int hl_device_set_debug_mode(struct hl_device *hdev, struct hl_ctx *ctx, bool enable); int hl_hw_queues_create(struct hl_device *hdev); void hl_hw_queues_destroy(struct hl_device *hdev); int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id, @@ -2821,6 +2927,7 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx); void hl_ctx_do_release(struct kref *ref); void hl_ctx_get(struct hl_device *hdev, struct hl_ctx *ctx); int hl_ctx_put(struct hl_ctx *ctx); +struct hl_ctx *hl_get_compute_ctx(struct hl_device *hdev); struct hl_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq); int hl_ctx_get_fences(struct hl_ctx *ctx, u64 *seq_arr, struct hl_fence **fence, u32 arr_len); @@ -2834,7 +2941,6 @@ int hl_device_resume(struct hl_device *hdev); int hl_device_reset(struct hl_device *hdev, u32 flags); void hl_hpriv_get(struct hl_fpriv *hpriv); int hl_hpriv_put(struct hl_fpriv *hpriv); -int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq); int hl_device_utilization(struct hl_device *hdev, u32 *utilization); int hl_build_hwmon_channel_info(struct hl_device *hdev, @@ -2915,6 +3021,9 @@ int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, int hl_mmu_map_contiguous(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 size); int hl_mmu_unmap_contiguous(struct hl_ctx *ctx, u64 virt_addr, u32 size); +int hl_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags); +int hl_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard, + u32 flags, u32 asid, u64 va, u64 size); void hl_mmu_swap_out(struct hl_ctx *ctx); void hl_mmu_swap_in(struct hl_ctx *ctx); int hl_mmu_if_set_funcs(struct hl_device *hdev); @@ -2969,6 +3078,10 @@ int hl_fw_dynamic_send_protocol_cmd(struct hl_device *hdev, struct fw_load_mgr *fw_loader, enum comms_cmd cmd, unsigned int size, bool wait_ok, u32 timeout); +int hl_fw_dram_replaced_row_get(struct hl_device *hdev, + struct cpucp_hbm_row_info *info); +int hl_fw_dram_pending_row_get(struct hl_device *hdev, u32 *pend_rows_num); +int hl_fw_cpucp_engine_core_asid_set(struct hl_device *hdev, u32 asid); int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3], bool is_wc[3]); int hl_pci_elbi_read(struct hl_device *hdev, u64 addr, u32 *data); diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c index 949d1b5c5c41..690b763c7a95 100644 --- a/drivers/misc/habanalabs/common/habanalabs_drv.c +++ b/drivers/misc/habanalabs/common/habanalabs_drv.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright 2016-2019 HabanaLabs, Ltd. + * Copyright 2016-2021 HabanaLabs, Ltd. * All Rights Reserved. * */ @@ -153,15 +153,7 @@ int hl_device_open(struct inode *inode, struct file *filp) goto out_err; } - if (hdev->in_debug) { - dev_err_ratelimited(hdev->dev, - "Can't open %s because it is being debugged by another user\n", - dev_name(hdev->dev)); - rc = -EPERM; - goto out_err; - } - - if (hdev->compute_ctx) { + if (hdev->is_compute_ctx_active) { dev_dbg_ratelimited(hdev->dev, "Can't open %s because another user is working on it\n", dev_name(hdev->dev)); @@ -175,20 +167,17 @@ int hl_device_open(struct inode *inode, struct file *filp) goto out_err; } - /* Device is IDLE at this point so it is legal to change PLLs. - * There is no need to check anything because if the PLL is - * already HIGH, the set function will return without doing - * anything - */ - hl_device_set_frequency(hdev, PLL_HIGH); - list_add(&hpriv->dev_node, &hdev->fpriv_list); mutex_unlock(&hdev->fpriv_list_lock); hl_debugfs_add_file(hpriv); + atomic_set(&hdev->last_error.cs_write_disable, 0); + atomic_set(&hdev->last_error.razwi_write_disable, 0); + hdev->open_counter++; hdev->last_successful_open_jif = jiffies; + hdev->last_successful_open_ktime = ktime_get(); return 0; @@ -231,12 +220,11 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp) hpriv->hdev = hdev; filp->private_data = hpriv; hpriv->filp = filp; - hpriv->is_control = true; nonseekable_open(inode, filp); hpriv->taskpid = find_get_pid(current->pid); - mutex_lock(&hdev->fpriv_list_lock); + mutex_lock(&hdev->fpriv_ctrl_list_lock); if (!hl_device_operational(hdev, NULL)) { dev_err_ratelimited(hdev->dev_ctrl, @@ -246,13 +234,13 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp) goto out_err; } - list_add(&hpriv->dev_node, &hdev->fpriv_list); - mutex_unlock(&hdev->fpriv_list_lock); + list_add(&hpriv->dev_node, &hdev->fpriv_ctrl_list); + mutex_unlock(&hdev->fpriv_ctrl_list_lock); return 0; out_err: - mutex_unlock(&hdev->fpriv_list_lock); + mutex_unlock(&hdev->fpriv_ctrl_list_lock); filp->private_data = NULL; put_pid(hpriv->taskpid); @@ -263,6 +251,7 @@ out_err: static void set_driver_behavior_per_device(struct hl_device *hdev) { + hdev->pldm = 0; hdev->fw_components = FW_TYPE_ALL_TYPES; hdev->cpu_queues_enable = 1; hdev->heartbeat = 1; @@ -279,23 +268,53 @@ static void set_driver_behavior_per_device(struct hl_device *hdev) hdev->axi_drain = 0; } -/* +static void copy_kernel_module_params_to_device(struct hl_device *hdev) +{ + hdev->major = hl_major; + hdev->memory_scrub = memory_scrub; + hdev->reset_on_lockup = reset_on_lockup; + hdev->boot_error_status_mask = boot_error_status_mask; + + if (timeout_locked) + hdev->timeout_jiffies = msecs_to_jiffies(timeout_locked * 1000); + else + hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT; + +} + +static int fixup_device_params(struct hl_device *hdev) +{ + hdev->asic_prop.fw_security_enabled = is_asic_secured(hdev->asic_type); + + hdev->fw_poll_interval_usec = HL_FW_STATUS_POLL_INTERVAL_USEC; + + hdev->stop_on_err = true; + hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN; + hdev->reset_info.prev_reset_trigger = HL_RESET_TRIGGER_DEFAULT; + + /* Enable only after the initialization of the device */ + hdev->disabled = true; + + /* Set default DMA mask to 32 bits */ + hdev->dma_mask = 32; + + return 0; +} + +/** * create_hdev - create habanalabs device instance * * @dev: will hold the pointer to the new habanalabs device structure * @pdev: pointer to the pci device - * @asic_type: in case of simulator device, which device is it - * @minor: in case of simulator device, the minor of the device * * Allocate memory for habanalabs device and initialize basic fields * Identify the ASIC type * Allocate ID (minor) for the device (only for real devices) */ -int create_hdev(struct hl_device **dev, struct pci_dev *pdev, - enum hl_asic_type asic_type, int minor) +static int create_hdev(struct hl_device **dev, struct pci_dev *pdev) { + int main_id, ctrl_id = 0, rc = 0; struct hl_device *hdev; - int rc, main_id, ctrl_id = 0; *dev = NULL; @@ -303,69 +322,39 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev, if (!hdev) return -ENOMEM; - /* First, we must find out which ASIC are we handling. This is needed - * to configure the behavior of the driver (kernel parameters) - */ - if (pdev) { - hdev->asic_type = get_asic_type(pdev->device); - if (hdev->asic_type == ASIC_INVALID) { - dev_err(&pdev->dev, "Unsupported ASIC\n"); - rc = -ENODEV; - goto free_hdev; - } - } else { - hdev->asic_type = asic_type; - } - - if (pdev) - hdev->asic_prop.fw_security_enabled = - is_asic_secured(hdev->asic_type); - else - hdev->asic_prop.fw_security_enabled = false; + /* can be NULL in case of simulator device */ + hdev->pdev = pdev; /* Assign status description string */ - strncpy(hdev->status[HL_DEVICE_STATUS_OPERATIONAL], - "operational", HL_STR_MAX); - strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET], - "in reset", HL_STR_MAX); - strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION], - "disabled", HL_STR_MAX); - strncpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET], - "needs reset", HL_STR_MAX); + strncpy(hdev->status[HL_DEVICE_STATUS_OPERATIONAL], "operational", HL_STR_MAX); + strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET], "in reset", HL_STR_MAX); + strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION], "disabled", HL_STR_MAX); + strncpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET], "needs reset", HL_STR_MAX); strncpy(hdev->status[HL_DEVICE_STATUS_IN_DEVICE_CREATION], "in device creation", HL_STR_MAX); - hdev->major = hl_major; - hdev->reset_on_lockup = reset_on_lockup; - hdev->memory_scrub = memory_scrub; - hdev->boot_error_status_mask = boot_error_status_mask; - hdev->stop_on_err = true; + /* First, we must find out which ASIC are we handling. This is needed + * to configure the behavior of the driver (kernel parameters) + */ + hdev->asic_type = get_asic_type(pdev->device); + if (hdev->asic_type == ASIC_INVALID) { + dev_err(&pdev->dev, "Unsupported ASIC\n"); + rc = -ENODEV; + goto free_hdev; + } - hdev->pldm = 0; + copy_kernel_module_params_to_device(hdev); set_driver_behavior_per_device(hdev); - hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN; - hdev->prev_reset_trigger = HL_RESET_TRIGGER_DEFAULT; - - if (timeout_locked) - hdev->timeout_jiffies = msecs_to_jiffies(timeout_locked * 1000); - else - hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT; - - hdev->disabled = true; - hdev->pdev = pdev; /* can be NULL in case of simulator device */ - - /* Set default DMA mask to 32 bits */ - hdev->dma_mask = 32; + fixup_device_params(hdev); mutex_lock(&hl_devs_idr_lock); /* Always save 2 numbers, 1 for main device and 1 for control. * They must be consecutive */ - main_id = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS, - GFP_KERNEL); + main_id = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS, GFP_KERNEL); if (main_id >= 0) ctrl_id = idr_alloc(&hl_devs_idr, hdev, main_id + 1, @@ -405,7 +394,7 @@ free_hdev: * @dev: pointer to the habanalabs device structure * */ -void destroy_hdev(struct hl_device *hdev) +static void destroy_hdev(struct hl_device *hdev) { /* Remove device from the device list */ mutex_lock(&hl_devs_idr_lock); @@ -444,7 +433,7 @@ static int hl_pmops_resume(struct device *dev) return hl_device_resume(hdev); } -/* +/** * hl_pci_probe - probe PCI habanalabs devices * * @pdev: pointer to pci device @@ -454,8 +443,7 @@ static int hl_pmops_resume(struct device *dev) * Create a new habanalabs device and initialize it according to the * device's type */ -static int hl_pci_probe(struct pci_dev *pdev, - const struct pci_device_id *id) +static int hl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct hl_device *hdev; int rc; @@ -464,7 +452,7 @@ static int hl_pci_probe(struct pci_dev *pdev, " device found [%04x:%04x] (rev %x)\n", (int)pdev->vendor, (int)pdev->device, (int)pdev->revision); - rc = create_hdev(&hdev, pdev, ASIC_INVALID, -1); + rc = create_hdev(&hdev, pdev); if (rc) return rc; diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index 86c3257d9ae1..3ba3a8ffda3e 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -158,7 +158,7 @@ static int hw_idle(struct hl_device *hdev, struct hl_info_args *args) min((size_t) max_size, sizeof(hw_idle))) ? -EFAULT : 0; } -static int debug_coresight(struct hl_device *hdev, struct hl_debug_args *args) +static int debug_coresight(struct hl_device *hdev, struct hl_ctx *ctx, struct hl_debug_args *args) { struct hl_debug_params *params; void *input = NULL, *output = NULL; @@ -200,7 +200,7 @@ static int debug_coresight(struct hl_device *hdev, struct hl_debug_args *args) params->output_size = args->output_size; } - rc = hdev->asic_funcs->debug_coresight(hdev, params); + rc = hdev->asic_funcs->debug_coresight(hdev, ctx, params); if (rc) { dev_err(hdev->dev, "debug coresight operation failed %d\n", rc); @@ -269,8 +269,8 @@ static int get_reset_count(struct hl_device *hdev, struct hl_info_args *args) if ((!max_size) || (!out)) return -EINVAL; - reset_count.hard_reset_cnt = hdev->hard_reset_cnt; - reset_count.soft_reset_cnt = hdev->soft_reset_cnt; + reset_count.hard_reset_cnt = hdev->reset_info.hard_reset_cnt; + reset_count.soft_reset_cnt = hdev->reset_info.soft_reset_cnt; return copy_to_user(out, &reset_count, min((size_t) max_size, sizeof(reset_count))) ? -EFAULT : 0; @@ -313,15 +313,38 @@ static int pci_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args) static int clk_throttle_info(struct hl_fpriv *hpriv, struct hl_info_args *args) { + void __user *out = (void __user *) (uintptr_t) args->return_pointer; struct hl_device *hdev = hpriv->hdev; struct hl_info_clk_throttle clk_throttle = {0}; + ktime_t end_time, zero_time = ktime_set(0, 0); u32 max_size = args->return_size; - void __user *out = (void __user *) (uintptr_t) args->return_pointer; + int i; if ((!max_size) || (!out)) return -EINVAL; - clk_throttle.clk_throttling_reason = hdev->clk_throttling_reason; + mutex_lock(&hdev->clk_throttling.lock); + + clk_throttle.clk_throttling_reason = hdev->clk_throttling.current_reason; + + for (i = 0 ; i < HL_CLK_THROTTLE_TYPE_MAX ; i++) { + if (!(hdev->clk_throttling.aggregated_reason & BIT(i))) + continue; + + clk_throttle.clk_throttling_timestamp_us[i] = + ktime_to_us(hdev->clk_throttling.timestamp[i].start); + + if (ktime_compare(hdev->clk_throttling.timestamp[i].end, zero_time)) + end_time = hdev->clk_throttling.timestamp[i].end; + else + end_time = ktime_get(); + + clk_throttle.clk_throttling_duration_ns[i] = + ktime_to_ns(ktime_sub(end_time, + hdev->clk_throttling.timestamp[i].start)); + + } + mutex_unlock(&hdev->clk_throttling.lock); return copy_to_user(out, &clk_throttle, min((size_t) max_size, sizeof(clk_throttle))) ? -EFAULT : 0; @@ -480,6 +503,94 @@ static int open_stats_info(struct hl_fpriv *hpriv, struct hl_info_args *args) min((size_t) max_size, sizeof(open_stats_info))) ? -EFAULT : 0; } +static int dram_pending_rows_info(struct hl_fpriv *hpriv, struct hl_info_args *args) +{ + struct hl_device *hdev = hpriv->hdev; + u32 max_size = args->return_size; + u32 pend_rows_num = 0; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + int rc; + + if ((!max_size) || (!out)) + return -EINVAL; + + rc = hl_fw_dram_pending_row_get(hdev, &pend_rows_num); + if (rc) + return rc; + + return copy_to_user(out, &pend_rows_num, + min_t(size_t, max_size, sizeof(pend_rows_num))) ? -EFAULT : 0; +} + +static int dram_replaced_rows_info(struct hl_fpriv *hpriv, struct hl_info_args *args) +{ + struct hl_device *hdev = hpriv->hdev; + u32 max_size = args->return_size; + struct cpucp_hbm_row_info info = {0}; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + int rc; + + if ((!max_size) || (!out)) + return -EINVAL; + + rc = hl_fw_dram_replaced_row_get(hdev, &info); + if (rc) + return rc; + + return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0; +} + +static int last_err_open_dev_info(struct hl_fpriv *hpriv, struct hl_info_args *args) +{ + struct hl_info_last_err_open_dev_time info = {0}; + struct hl_device *hdev = hpriv->hdev; + u32 max_size = args->return_size; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + + if ((!max_size) || (!out)) + return -EINVAL; + + info.timestamp = ktime_to_ns(hdev->last_error.open_dev_timestamp); + + return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0; +} + +static int cs_timeout_info(struct hl_fpriv *hpriv, struct hl_info_args *args) +{ + struct hl_info_cs_timeout_event info = {0}; + struct hl_device *hdev = hpriv->hdev; + u32 max_size = args->return_size; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + + if ((!max_size) || (!out)) + return -EINVAL; + + info.seq = hdev->last_error.cs_timeout_seq; + info.timestamp = ktime_to_ns(hdev->last_error.cs_timeout_timestamp); + + return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0; +} + +static int razwi_info(struct hl_fpriv *hpriv, struct hl_info_args *args) +{ + struct hl_device *hdev = hpriv->hdev; + u32 max_size = args->return_size; + struct hl_info_razwi_event info = {0}; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + + if ((!max_size) || (!out)) + return -EINVAL; + + info.timestamp = ktime_to_ns(hdev->last_error.razwi_timestamp); + info.addr = hdev->last_error.razwi_addr; + info.engine_id_1 = hdev->last_error.razwi_engine_id_1; + info.engine_id_2 = hdev->last_error.razwi_engine_id_2; + info.no_engine_id = hdev->last_error.razwi_non_engine_initiator; + info.error_type = hdev->last_error.razwi_type; + + return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0; +} + static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, struct device *dev) { @@ -503,6 +614,33 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, case HL_INFO_RESET_COUNT: return get_reset_count(hdev, args); + case HL_INFO_HW_EVENTS: + return hw_events_info(hdev, false, args); + + case HL_INFO_HW_EVENTS_AGGREGATE: + return hw_events_info(hdev, true, args); + + case HL_INFO_CS_COUNTERS: + return cs_counters_info(hpriv, args); + + case HL_INFO_CLK_THROTTLE_REASON: + return clk_throttle_info(hpriv, args); + + case HL_INFO_SYNC_MANAGER: + return sync_manager_info(hpriv, args); + + case HL_INFO_OPEN_STATS: + return open_stats_info(hpriv, args); + + case HL_INFO_LAST_ERR_OPEN_DEV_TIME: + return last_err_open_dev_info(hpriv, args); + + case HL_INFO_CS_TIMEOUT_EVENT: + return cs_timeout_info(hpriv, args); + + case HL_INFO_RAZWI_EVENT: + return razwi_info(hpriv, args); + default: break; } @@ -515,10 +653,6 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, } switch (args->op) { - case HL_INFO_HW_EVENTS: - rc = hw_events_info(hdev, false, args); - break; - case HL_INFO_DRAM_USAGE: rc = dram_usage_info(hpriv, args); break; @@ -531,10 +665,6 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, rc = device_utilization(hdev, args); break; - case HL_INFO_HW_EVENTS_AGGREGATE: - rc = hw_events_info(hdev, true, args); - break; - case HL_INFO_CLK_RATE: rc = get_clk_rate(hdev, args); break; @@ -542,18 +672,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, case HL_INFO_TIME_SYNC: return time_sync_info(hdev, args); - case HL_INFO_CS_COUNTERS: - return cs_counters_info(hpriv, args); - case HL_INFO_PCI_COUNTERS: return pci_counters_info(hpriv, args); - case HL_INFO_CLK_THROTTLE_REASON: - return clk_throttle_info(hpriv, args); - - case HL_INFO_SYNC_MANAGER: - return sync_manager_info(hpriv, args); - case HL_INFO_TOTAL_ENERGY: return total_energy_consumption_info(hpriv, args); @@ -563,12 +684,16 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, case HL_INFO_POWER: return power_info(hpriv, args); - case HL_INFO_OPEN_STATS: - return open_stats_info(hpriv, args); + + case HL_INFO_DRAM_REPLACED_ROWS: + return dram_replaced_rows_info(hpriv, args); + + case HL_INFO_DRAM_PENDING_ROWS: + return dram_pending_rows_info(hpriv, args); default: dev_err(dev, "Invalid request %d\n", args->op); - rc = -ENOTTY; + rc = -EINVAL; break; } @@ -613,16 +738,17 @@ static int hl_debug_ioctl(struct hl_fpriv *hpriv, void *data) "Rejecting debug configuration request because device not in debug mode\n"); return -EFAULT; } - args->input_size = - min(args->input_size, hl_debug_struct_size[args->op]); - rc = debug_coresight(hdev, args); + args->input_size = min(args->input_size, hl_debug_struct_size[args->op]); + rc = debug_coresight(hdev, hpriv->ctx, args); break; + case HL_DEBUG_OP_SET_MODE: - rc = hl_device_set_debug_mode(hdev, (bool) args->enable); + rc = hl_device_set_debug_mode(hdev, hpriv->ctx, (bool) args->enable); break; + default: dev_err(hdev->dev, "Invalid request %d\n", args->op); - rc = -ENOTTY; + rc = -EINVAL; break; } @@ -649,7 +775,6 @@ static long _hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg, const struct hl_ioctl_desc *ioctl, struct device *dev) { struct hl_fpriv *hpriv = filep->private_data; - struct hl_device *hdev = hpriv->hdev; unsigned int nr = _IOC_NR(cmd); char stack_kdata[128] = {0}; char *kdata = NULL; @@ -658,12 +783,6 @@ static long _hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg, u32 hl_size; int retcode; - if (hdev->hard_reset_pending) { - dev_crit_ratelimited(dev, - "Device HARD reset pending! Please close FD\n"); - return -ENODEV; - } - /* Do not trust userspace, use our own definition */ func = ioctl->func; diff --git a/drivers/misc/habanalabs/common/hw_queue.c b/drivers/misc/habanalabs/common/hw_queue.c index 0743319b10c7..6103e479e855 100644 --- a/drivers/misc/habanalabs/common/hw_queue.c +++ b/drivers/misc/habanalabs/common/hw_queue.c @@ -429,6 +429,9 @@ static int init_signal_cs(struct hl_device *hdev, rc = hl_cs_signal_sob_wraparound_handler(hdev, q_idx, &hw_sob, 1, false); + job->cs->sob_addr_offset = hw_sob->sob_addr; + job->cs->initial_sob_count = prop->next_sob_val - 1; + return rc; } @@ -571,7 +574,7 @@ static int encaps_sig_first_staged_cs_handler struct hl_encaps_signals_mgr *mgr; int rc = 0; - mgr = &hdev->compute_ctx->sig_mgr; + mgr = &cs->ctx->sig_mgr; spin_lock(&mgr->lock); encaps_sig_hdl = idr_find(&mgr->handles, cs->encaps_sig_hdl_id); diff --git a/drivers/misc/habanalabs/common/hwmon.c b/drivers/misc/habanalabs/common/hwmon.c index e33f65be8a00..57f5d2c48330 100644 --- a/drivers/misc/habanalabs/common/hwmon.c +++ b/drivers/misc/habanalabs/common/hwmon.c @@ -10,17 +10,148 @@ #include <linux/pci.h> #include <linux/hwmon.h> -#define HWMON_NR_SENSOR_TYPES (hwmon_pwm + 1) +#define HWMON_NR_SENSOR_TYPES (hwmon_max) -int hl_build_hwmon_channel_info(struct hl_device *hdev, - struct cpucp_sensor *sensors_arr) +#ifdef _HAS_HWMON_HWMON_T_ENABLE + +static u32 fixup_flags_legacy_fw(struct hl_device *hdev, enum hwmon_sensor_types type, + u32 cpucp_flags) { - u32 counts[HWMON_NR_SENSOR_TYPES] = {0}; - u32 *sensors_by_type[HWMON_NR_SENSOR_TYPES] = {NULL}; + u32 flags; + + switch (type) { + case hwmon_temp: + flags = (cpucp_flags << 1) | HWMON_T_ENABLE; + break; + + case hwmon_in: + flags = (cpucp_flags << 1) | HWMON_I_ENABLE; + break; + + case hwmon_curr: + flags = (cpucp_flags << 1) | HWMON_C_ENABLE; + break; + + case hwmon_fan: + flags = (cpucp_flags << 1) | HWMON_F_ENABLE; + break; + + case hwmon_power: + flags = (cpucp_flags << 1) | HWMON_P_ENABLE; + break; + + case hwmon_pwm: + /* enable bit was here from day 1, so no need to adjust */ + flags = cpucp_flags; + break; + + default: + dev_err(hdev->dev, "unsupported h/w sensor type %d\n", type); + flags = cpucp_flags; + break; + } + + return flags; +} + +static u32 fixup_attr_legacy_fw(u32 attr) +{ + return (attr - 1); +} + +#else + +static u32 fixup_flags_legacy_fw(struct hl_device *hdev, enum hwmon_sensor_types type, + u32 cpucp_flags) +{ + return cpucp_flags; +} + +static u32 fixup_attr_legacy_fw(u32 attr) +{ + return attr; +} + +#endif /* !_HAS_HWMON_HWMON_T_ENABLE */ + +static u32 adjust_hwmon_flags(struct hl_device *hdev, enum hwmon_sensor_types type, u32 cpucp_flags) +{ + u32 flags, cpucp_input_val; + bool use_cpucp_enum; + + use_cpucp_enum = (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 & + CPU_BOOT_DEV_STS0_MAP_HWMON_EN) ? true : false; + + /* If f/w is using it's own enum, we need to check if the properties values are aligned. + * If not, it means we need to adjust the values to the new format that is used in the + * kernel since 5.6 (enum values were incremented by 1 by adding a new enable value). + */ + if (use_cpucp_enum) { + switch (type) { + case hwmon_temp: + cpucp_input_val = cpucp_temp_input; + if (cpucp_input_val == hwmon_temp_input) + flags = cpucp_flags; + else + flags = (cpucp_flags << 1) | HWMON_T_ENABLE; + break; + + case hwmon_in: + cpucp_input_val = cpucp_in_input; + if (cpucp_input_val == hwmon_in_input) + flags = cpucp_flags; + else + flags = (cpucp_flags << 1) | HWMON_I_ENABLE; + break; + + case hwmon_curr: + cpucp_input_val = cpucp_curr_input; + if (cpucp_input_val == hwmon_curr_input) + flags = cpucp_flags; + else + flags = (cpucp_flags << 1) | HWMON_C_ENABLE; + break; + + case hwmon_fan: + cpucp_input_val = cpucp_fan_input; + if (cpucp_input_val == hwmon_fan_input) + flags = cpucp_flags; + else + flags = (cpucp_flags << 1) | HWMON_F_ENABLE; + break; + + case hwmon_pwm: + /* enable bit was here from day 1, so no need to adjust */ + flags = cpucp_flags; + break; + + case hwmon_power: + cpucp_input_val = CPUCP_POWER_INPUT; + if (cpucp_input_val == hwmon_power_input) + flags = cpucp_flags; + else + flags = (cpucp_flags << 1) | HWMON_P_ENABLE; + break; + + default: + dev_err(hdev->dev, "unsupported h/w sensor type %d\n", type); + flags = cpucp_flags; + break; + } + } else { + flags = fixup_flags_legacy_fw(hdev, type, cpucp_flags); + } + + return flags; +} + +int hl_build_hwmon_channel_info(struct hl_device *hdev, struct cpucp_sensor *sensors_arr) +{ + u32 num_sensors_for_type, flags, num_active_sensor_types = 0, arr_size = 0, *curr_arr; u32 sensors_by_type_next_index[HWMON_NR_SENSOR_TYPES] = {0}; + u32 *sensors_by_type[HWMON_NR_SENSOR_TYPES] = {NULL}; struct hwmon_channel_info **channels_info; - u32 num_sensors_for_type, num_active_sensor_types = 0, - arr_size = 0, *curr_arr; + u32 counts[HWMON_NR_SENSOR_TYPES] = {0}; enum hwmon_sensor_types type; int rc, i, j; @@ -31,8 +162,7 @@ int hl_build_hwmon_channel_info(struct hl_device *hdev, break; if (type >= HWMON_NR_SENSOR_TYPES) { - dev_err(hdev->dev, - "Got wrong sensor type %d from device\n", type); + dev_err(hdev->dev, "Got wrong sensor type %d from device\n", type); return -EINVAL; } @@ -45,8 +175,9 @@ int hl_build_hwmon_channel_info(struct hl_device *hdev, continue; num_sensors_for_type = counts[i] + 1; - curr_arr = kcalloc(num_sensors_for_type, sizeof(*curr_arr), - GFP_KERNEL); + dev_dbg(hdev->dev, "num_sensors_for_type %d = %d\n", i, num_sensors_for_type); + + curr_arr = kcalloc(num_sensors_for_type, sizeof(*curr_arr), GFP_KERNEL); if (!curr_arr) { rc = -ENOMEM; goto sensors_type_err; @@ -59,20 +190,18 @@ int hl_build_hwmon_channel_info(struct hl_device *hdev, for (i = 0 ; i < arr_size ; i++) { type = le32_to_cpu(sensors_arr[i].type); curr_arr = sensors_by_type[type]; - curr_arr[sensors_by_type_next_index[type]++] = - le32_to_cpu(sensors_arr[i].flags); + flags = adjust_hwmon_flags(hdev, type, le32_to_cpu(sensors_arr[i].flags)); + curr_arr[sensors_by_type_next_index[type]++] = flags; } - channels_info = kcalloc(num_active_sensor_types + 1, - sizeof(*channels_info), GFP_KERNEL); + channels_info = kcalloc(num_active_sensor_types + 1, sizeof(*channels_info), GFP_KERNEL); if (!channels_info) { rc = -ENOMEM; goto channels_info_array_err; } for (i = 0 ; i < num_active_sensor_types ; i++) { - channels_info[i] = kzalloc(sizeof(*channels_info[i]), - GFP_KERNEL); + channels_info[i] = kzalloc(sizeof(*channels_info[i]), GFP_KERNEL); if (!channels_info[i]) { rc = -ENOMEM; goto channel_info_err; @@ -88,18 +217,19 @@ int hl_build_hwmon_channel_info(struct hl_device *hdev, j++; } - hdev->hl_chip_info->info = - (const struct hwmon_channel_info **)channels_info; + hdev->hl_chip_info->info = (const struct hwmon_channel_info **)channels_info; return 0; channel_info_err: - for (i = 0 ; i < num_active_sensor_types ; i++) + for (i = 0 ; i < num_active_sensor_types ; i++) { if (channels_info[i]) { kfree(channels_info[i]->config); kfree(channels_info[i]); } + } kfree(channels_info); + channels_info_array_err: sensors_type_err: for (i = 0 ; i < HWMON_NR_SENSOR_TYPES ; i++) @@ -112,14 +242,16 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel, long *val) { struct hl_device *hdev = dev_get_drvdata(dev); - int rc; + bool use_cpucp_enum; u32 cpucp_attr; - bool use_cpucp_enum = (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 & - CPU_BOOT_DEV_STS0_MAP_HWMON_EN) ? true : false; + int rc; if (!hl_device_operational(hdev, NULL)) return -ENODEV; + use_cpucp_enum = (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 & + CPU_BOOT_DEV_STS0_MAP_HWMON_EN) ? true : false; + switch (type) { case hwmon_temp: switch (attr) { @@ -151,7 +283,7 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type, if (use_cpucp_enum) rc = hl_get_temperature(hdev, channel, cpucp_attr, val); else - rc = hl_get_temperature(hdev, channel, attr, val); + rc = hl_get_temperature(hdev, channel, fixup_attr_legacy_fw(attr), val); break; case hwmon_in: switch (attr) { @@ -174,7 +306,7 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type, if (use_cpucp_enum) rc = hl_get_voltage(hdev, channel, cpucp_attr, val); else - rc = hl_get_voltage(hdev, channel, attr, val); + rc = hl_get_voltage(hdev, channel, fixup_attr_legacy_fw(attr), val); break; case hwmon_curr: switch (attr) { @@ -197,7 +329,7 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type, if (use_cpucp_enum) rc = hl_get_current(hdev, channel, cpucp_attr, val); else - rc = hl_get_current(hdev, channel, attr, val); + rc = hl_get_current(hdev, channel, fixup_attr_legacy_fw(attr), val); break; case hwmon_fan: switch (attr) { @@ -217,7 +349,7 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type, if (use_cpucp_enum) rc = hl_get_fan_speed(hdev, channel, cpucp_attr, val); else - rc = hl_get_fan_speed(hdev, channel, attr, val); + rc = hl_get_fan_speed(hdev, channel, fixup_attr_legacy_fw(attr), val); break; case hwmon_pwm: switch (attr) { @@ -234,6 +366,7 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type, if (use_cpucp_enum) rc = hl_get_pwm_info(hdev, channel, cpucp_attr, val); else + /* no need for fixup as pwm was aligned from day 1 */ rc = hl_get_pwm_info(hdev, channel, attr, val); break; case hwmon_power: @@ -251,7 +384,7 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type, if (use_cpucp_enum) rc = hl_get_power(hdev, channel, cpucp_attr, val); else - rc = hl_get_power(hdev, channel, attr, val); + rc = hl_get_power(hdev, channel, fixup_attr_legacy_fw(attr), val); break; default: return -EINVAL; @@ -286,7 +419,7 @@ static int hl_write(struct device *dev, enum hwmon_sensor_types type, if (use_cpucp_enum) hl_set_temperature(hdev, channel, cpucp_attr, val); else - hl_set_temperature(hdev, channel, attr, val); + hl_set_temperature(hdev, channel, fixup_attr_legacy_fw(attr), val); break; case hwmon_pwm: switch (attr) { @@ -303,6 +436,7 @@ static int hl_write(struct device *dev, enum hwmon_sensor_types type, if (use_cpucp_enum) hl_set_pwm_info(hdev, channel, cpucp_attr, val); else + /* no need for fixup as pwm was aligned from day 1 */ hl_set_pwm_info(hdev, channel, attr, val); break; case hwmon_in: @@ -317,7 +451,7 @@ static int hl_write(struct device *dev, enum hwmon_sensor_types type, if (use_cpucp_enum) hl_set_voltage(hdev, channel, cpucp_attr, val); else - hl_set_voltage(hdev, channel, attr, val); + hl_set_voltage(hdev, channel, fixup_attr_legacy_fw(attr), val); break; case hwmon_curr: switch (attr) { @@ -331,7 +465,7 @@ static int hl_write(struct device *dev, enum hwmon_sensor_types type, if (use_cpucp_enum) hl_set_current(hdev, channel, cpucp_attr, val); else - hl_set_current(hdev, channel, attr, val); + hl_set_current(hdev, channel, fixup_attr_legacy_fw(attr), val); break; case hwmon_power: switch (attr) { @@ -345,7 +479,7 @@ static int hl_write(struct device *dev, enum hwmon_sensor_types type, if (use_cpucp_enum) hl_set_power(hdev, channel, cpucp_attr, val); else - hl_set_power(hdev, channel, attr, val); + hl_set_power(hdev, channel, fixup_attr_legacy_fw(attr), val); break; default: return -EINVAL; @@ -444,6 +578,9 @@ int hl_get_temperature(struct hl_device *hdev, pkt.sensor_index = __cpu_to_le16(sensor_index); pkt.type = __cpu_to_le16(attr); + dev_dbg(hdev->dev, "get temp, ctl 0x%x, sensor %d, type %d\n", + pkt.ctl, pkt.sensor_index, pkt.type); + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, &result); @@ -677,12 +814,18 @@ int hl_set_power(struct hl_device *hdev, int sensor_index, u32 attr, long value) { struct cpucp_packet pkt; + struct asic_fixed_properties *prop = &hdev->asic_prop; int rc; memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = cpu_to_le32(CPUCP_PACKET_POWER_GET << + if (prop->use_get_power_for_reset_history) + pkt.ctl = cpu_to_le32(CPUCP_PACKET_POWER_GET << CPUCP_PKT_CTL_OPCODE_SHIFT); + else + pkt.ctl = cpu_to_le32(CPUCP_PACKET_POWER_SET << + CPUCP_PKT_CTL_OPCODE_SHIFT); + pkt.sensor_index = __cpu_to_le16(sensor_index); pkt.type = __cpu_to_le16(attr); pkt.value = __cpu_to_le64(value); diff --git a/drivers/misc/habanalabs/common/irq.c b/drivers/misc/habanalabs/common/irq.c index 96d82b682674..1b6bdc900c26 100644 --- a/drivers/misc/habanalabs/common/irq.c +++ b/drivers/misc/habanalabs/common/irq.c @@ -145,8 +145,12 @@ static void handle_user_cq(struct hl_device *hdev, spin_lock(&user_cq->wait_list_lock); list_for_each_entry(pend, &user_cq->wait_list_head, wait_list_node) { - pend->fence.timestamp = now; - complete_all(&pend->fence.completion); + if ((pend->cq_kernel_addr && + *(pend->cq_kernel_addr) >= pend->cq_target_value) || + !pend->cq_kernel_addr) { + pend->fence.timestamp = now; + complete_all(&pend->fence.completion); + } } spin_unlock(&user_cq->wait_list_lock); } @@ -245,10 +249,8 @@ irqreturn_t hl_irq_handler_eq(int irq, void *arg) */ dma_rmb(); - if (hdev->disabled) { - dev_warn(hdev->dev, - "Device disabled but received IRQ %d for EQ\n", - irq); + if (hdev->disabled && !hdev->reset_info.is_in_soft_reset) { + dev_warn(hdev->dev, "Device disabled but received an EQ event\n"); goto skip_irq; } diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c index 9bd626a00de3..c1eefaebacb6 100644 --- a/drivers/misc/habanalabs/common/memory.c +++ b/drivers/misc/habanalabs/common/memory.c @@ -316,7 +316,7 @@ static int free_phys_pg_pack(struct hl_device *hdev, } if (rc && !hdev->disabled) - hl_device_reset(hdev, HL_RESET_HARD); + hl_device_reset(hdev, HL_DRV_RESET_HARD); end: kvfree(phys_pg_pack->pages); @@ -477,7 +477,7 @@ static int add_va_block_locked(struct hl_device *hdev, struct list_head *va_list, u64 start, u64 end) { struct hl_vm_va_block *va_block, *res = NULL; - u64 size = end - start; + u64 size = end - start + 1; print_va_list_locked(hdev, va_list); @@ -518,7 +518,7 @@ static int add_va_block_locked(struct hl_device *hdev, /** * add_va_block() - wrapper for add_va_block_locked. * @hdev: pointer to the habanalabs device structure. - * @va_list: pointer to the virtual addresses block list. + * @va_range: pointer to the virtual addresses range object. * @start: start virtual address. * @end: end virtual address. * @@ -538,8 +538,11 @@ static inline int add_va_block(struct hl_device *hdev, } /** - * is_hint_crossing_range() - check if hint address crossing specified reserved - * range. + * is_hint_crossing_range() - check if hint address crossing specified reserved. + * @range_type: virtual space range type. + * @start_addr: start virtual address. + * @size: block size. + * @prop: asic properties structure to retrieve reserved ranges from. */ static inline bool is_hint_crossing_range(enum hl_va_range_type range_type, u64 start_addr, u32 size, struct asic_fixed_properties *prop) { @@ -644,7 +647,7 @@ static u64 get_va_block(struct hl_device *hdev, continue; } - valid_size = va_block->end - valid_start; + valid_size = va_block->end - valid_start + 1; if (valid_size < size) continue; @@ -707,7 +710,7 @@ static u64 get_va_block(struct hl_device *hdev, if (new_va_block->size > size) { new_va_block->start += size; - new_va_block->size = new_va_block->end - new_va_block->start; + new_va_block->size = new_va_block->end - new_va_block->start + 1; } else { list_del(&new_va_block->node); kfree(new_va_block); @@ -749,6 +752,7 @@ u64 hl_reserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx, /** * hl_get_va_range_type() - get va_range type for the given address and size. + * @ctx: context to fetch va_range from. * @address: the start address of the area we want to validate. * @size: the size in bytes of the area we want to validate. * @type: returned va_range type. @@ -776,8 +780,8 @@ static int hl_get_va_range_type(struct hl_ctx *ctx, u64 address, u64 size, * hl_unreserve_va_block() - wrapper for add_va_block to unreserve a va block. * @hdev: pointer to the habanalabs device structure * @ctx: pointer to the context structure. - * @start: start virtual address. - * @end: end virtual address. + * @start_addr: start virtual address. + * @size: number of bytes to unreserve. * * This function does the following: * - Takes the list lock and calls add_va_block_locked. @@ -1201,17 +1205,13 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, goto map_err; } - rc = hdev->asic_funcs->mmu_invalidate_cache_range(hdev, false, - *vm_type, ctx->asid, ret_vaddr, phys_pg_pack->total_size); + rc = hl_mmu_invalidate_cache_range(hdev, false, *vm_type | MMU_OP_SKIP_LOW_CACHE_INV, + ctx->asid, ret_vaddr, phys_pg_pack->total_size); mutex_unlock(&ctx->mmu_lock); - if (rc) { - dev_err(hdev->dev, - "mapping handle %u failed due to MMU cache invalidation\n", - handle); + if (rc) goto map_err; - } ret_vaddr += phys_pg_pack->offset; @@ -1349,9 +1349,8 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, * at the loop end rather than for each iteration */ if (!ctx_free) - rc = hdev->asic_funcs->mmu_invalidate_cache_range(hdev, true, - *vm_type, ctx->asid, vaddr, - phys_pg_pack->total_size); + rc = hl_mmu_invalidate_cache_range(hdev, true, *vm_type, ctx->asid, vaddr, + phys_pg_pack->total_size); mutex_unlock(&ctx->mmu_lock); @@ -1364,11 +1363,6 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, if (!ctx_free) { int tmp_rc; - if (rc) - dev_err(hdev->dev, - "unmapping vaddr 0x%llx failed due to MMU cache invalidation\n", - vaddr); - tmp_rc = add_va_block(hdev, va_range, vaddr, vaddr + phys_pg_pack->total_size - 1); if (tmp_rc) { @@ -2037,7 +2031,7 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args) default: dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n"); - rc = -ENOTTY; + rc = -EINVAL; break; } @@ -2162,7 +2156,7 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data) default: dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n"); - rc = -ENOTTY; + rc = -EINVAL; break; } @@ -2339,6 +2333,8 @@ void hl_userptr_delete_list(struct hl_device *hdev, /** * hl_userptr_is_pinned() - returns whether the given userptr is pinned. * @hdev: pointer to the habanalabs device structure. + * @addr: user address to check. + * @size: user block size to check. * @userptr_list: pointer to the list to clear. * @userptr: pointer to userptr to check. * @@ -2361,9 +2357,10 @@ bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr, /** * va_range_init() - initialize virtual addresses range. * @hdev: pointer to the habanalabs device structure. - * @va_range: pointer to the range to initialize. + * @va_ranges: pointer to va_ranges array. * @start: range start address. * @end: range end address. + * @page_size: page size for this va_range. * * This function does the following: * - Initializes the virtual addresses list of the given range with the given @@ -2388,8 +2385,14 @@ static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range, start += PAGE_SIZE; } - if (end & (PAGE_SIZE - 1)) - end &= PAGE_MASK; + /* + * The end of the range is inclusive, hence we need to align it + * to the end of the last full page in the range. For example if + * end = 0x3ff5 with page size 0x1000, we need to align it to + * 0x2fff. The remainig 0xff5 bytes do not form a full page. + */ + if ((end + 1) & (PAGE_SIZE - 1)) + end = ((end + 1) & PAGE_MASK) - 1; } if (start >= end) { @@ -2414,7 +2417,7 @@ static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range, /** * va_range_fini() - clear a virtual addresses range. * @hdev: pointer to the habanalabs structure. - * va_range: pointer to virtual addresses rang.e + * @va_range: pointer to virtual addresses range. * * This function does the following: * - Frees the virtual addresses block list and its lock. @@ -2434,12 +2437,15 @@ static void va_range_fini(struct hl_device *hdev, struct hl_va_range *va_range) * @ctx: pointer to the habanalabs context structure. * @host_range_start: host virtual addresses range start. * @host_range_end: host virtual addresses range end. + * @host_page_size: host page size. * @host_huge_range_start: host virtual addresses range start for memory * allocated with huge pages. * @host_huge_range_end: host virtual addresses range end for memory allocated * with huge pages. + * @host_huge_page_size: host huge page size. * @dram_range_start: dram virtual addresses range start. * @dram_range_end: dram virtual addresses range end. + * @dram_page_size: dram page size. * * This function initializes the following: * - MMU for context. @@ -2564,14 +2570,14 @@ int hl_vm_ctx_init(struct hl_ctx *ctx) return 0; dram_range_start = prop->dmmu.start_addr; - dram_range_end = prop->dmmu.end_addr; + dram_range_end = prop->dmmu.end_addr - 1; dram_page_size = prop->dram_page_size ? prop->dram_page_size : prop->dmmu.page_size; host_range_start = prop->pmmu.start_addr; - host_range_end = prop->pmmu.end_addr; + host_range_end = prop->pmmu.end_addr - 1; host_page_size = prop->pmmu.page_size; host_huge_range_start = prop->pmmu_huge.start_addr; - host_huge_range_end = prop->pmmu_huge.end_addr; + host_huge_range_end = prop->pmmu_huge.end_addr - 1; host_huge_page_size = prop->pmmu_huge.page_size; return vm_ctx_init_with_ranges(ctx, host_range_start, host_range_end, @@ -2618,7 +2624,7 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx) * Clearly something went wrong on hard reset so no point in printing * another side effect error */ - if (!hdev->hard_reset_pending && !hash_empty(ctx->mem_hash)) + if (!hdev->reset_info.hard_reset_pending && !hash_empty(ctx->mem_hash)) dev_dbg(hdev->dev, "user released device without removing its memory mappings\n"); @@ -2633,8 +2639,8 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx) mutex_lock(&ctx->mmu_lock); /* invalidate the cache once after the unmapping loop */ - hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR); - hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_PHYS_PACK); + hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); + hl_mmu_invalidate_cache(hdev, true, MMU_OP_PHYS_PACK); mutex_unlock(&ctx->mmu_lock); diff --git a/drivers/misc/habanalabs/common/mmu/mmu.c b/drivers/misc/habanalabs/common/mmu/mmu.c index aa96917f62e5..9153a1f55175 100644 --- a/drivers/misc/habanalabs/common/mmu/mmu.c +++ b/drivers/misc/habanalabs/common/mmu/mmu.c @@ -637,3 +637,28 @@ u64 hl_mmu_descramble_addr(struct hl_device *hdev, u64 addr) { return addr; } + +int hl_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags) +{ + int rc; + + rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags); + if (rc) + dev_err_ratelimited(hdev->dev, "MMU cache invalidation failed\n"); + + return rc; +} + +int hl_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard, + u32 flags, u32 asid, u64 va, u64 size) +{ + int rc; + + rc = hdev->asic_funcs->mmu_invalidate_cache_range(hdev, is_hard, flags, + asid, va, size); + if (rc) + dev_err_ratelimited(hdev->dev, "MMU cache range invalidation failed\n"); + + return rc; +} + diff --git a/drivers/misc/habanalabs/common/mmu/mmu_v1.c b/drivers/misc/habanalabs/common/mmu/mmu_v1.c index 0f536f79dd9c..6134b6ae7615 100644 --- a/drivers/misc/habanalabs/common/mmu/mmu_v1.c +++ b/drivers/misc/habanalabs/common/mmu/mmu_v1.c @@ -269,7 +269,7 @@ static int dram_default_mapping_init(struct hl_ctx *ctx) num_of_hop3 = prop->dram_size_for_default_page_mapping; do_div(num_of_hop3, prop->dram_page_size); - do_div(num_of_hop3, PTE_ENTRIES_IN_HOP); + do_div(num_of_hop3, HOP_PTE_ENTRIES_512); /* add hop1 and hop2 */ total_hops = num_of_hop3 + 2; @@ -330,7 +330,7 @@ static int dram_default_mapping_init(struct hl_ctx *ctx) for (i = 0 ; i < num_of_hop3 ; i++) { hop3_pte_addr = ctx->dram_default_hops[i]; - for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) { + for (j = 0 ; j < HOP_PTE_ENTRIES_512 ; j++) { write_final_pte(ctx, hop3_pte_addr, pte_val); get_pte(ctx, ctx->dram_default_hops[i]); hop3_pte_addr += HL_PTE_SIZE; @@ -369,7 +369,7 @@ static void dram_default_mapping_fini(struct hl_ctx *ctx) num_of_hop3 = prop->dram_size_for_default_page_mapping; do_div(num_of_hop3, prop->dram_page_size); - do_div(num_of_hop3, PTE_ENTRIES_IN_HOP); + do_div(num_of_hop3, HOP_PTE_ENTRIES_512); hop0_addr = get_hop0_addr(ctx); /* add hop1 and hop2 */ @@ -379,7 +379,7 @@ static void dram_default_mapping_fini(struct hl_ctx *ctx) for (i = 0 ; i < num_of_hop3 ; i++) { hop3_pte_addr = ctx->dram_default_hops[i]; - for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) { + for (j = 0 ; j < HOP_PTE_ENTRIES_512 ; j++) { clear_pte(ctx, hop3_pte_addr); put_pte(ctx, ctx->dram_default_hops[i]); hop3_pte_addr += HL_PTE_SIZE; @@ -573,7 +573,7 @@ static int _hl_mmu_v1_unmap(struct hl_ctx *ctx, curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr; - is_huge = curr_pte & LAST_MASK; + is_huge = curr_pte & mmu_prop->last_mask; if (is_dram_addr && !is_huge) { dev_err(hdev->dev, @@ -597,7 +597,7 @@ static int _hl_mmu_v1_unmap(struct hl_ctx *ctx, if (hdev->dram_default_page_mapping && is_dram_addr) { u64 default_pte = (prop->mmu_dram_default_page_addr & - HOP_PHYS_ADDR_MASK) | LAST_MASK | + HOP_PHYS_ADDR_MASK) | mmu_prop->last_mask | PAGE_PRESENT_MASK; if (curr_pte == default_pte) { dev_err(hdev->dev, @@ -729,7 +729,7 @@ static int _hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, if (hdev->dram_default_page_mapping && is_dram_addr) { u64 default_pte = (prop->mmu_dram_default_page_addr & - HOP_PHYS_ADDR_MASK) | LAST_MASK | + HOP_PHYS_ADDR_MASK) | mmu_prop->last_mask | PAGE_PRESENT_MASK; if (curr_pte != default_pte) { @@ -769,7 +769,7 @@ static int _hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, goto err; } - curr_pte = (phys_addr & HOP_PHYS_ADDR_MASK) | LAST_MASK + curr_pte = (phys_addr & HOP_PHYS_ADDR_MASK) | mmu_prop->last_mask | PAGE_PRESENT_MASK; if (is_huge) @@ -930,7 +930,7 @@ static int hl_mmu_v1_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, if (!(hops->hop_info[i].hop_pte_val & PAGE_PRESENT_MASK)) return -EFAULT; - if (hops->hop_info[i].hop_pte_val & LAST_MASK) + if (hops->hop_info[i].hop_pte_val & mmu_prop->last_mask) break; } diff --git a/drivers/misc/habanalabs/common/sysfs.c b/drivers/misc/habanalabs/common/sysfs.c index 42c1769ad25d..45c715325e2a 100644 --- a/drivers/misc/habanalabs/common/sysfs.c +++ b/drivers/misc/habanalabs/common/sysfs.c @@ -139,7 +139,7 @@ static ssize_t cpld_ver_show(struct device *dev, struct device_attribute *attr, struct hl_device *hdev = dev_get_drvdata(dev); return sprintf(buf, "0x%08x\n", - hdev->asic_prop.cpucp_info.cpld_version); + le32_to_cpu(hdev->asic_prop.cpucp_info.cpld_version)); } static ssize_t cpucp_kernel_ver_show(struct device *dev, @@ -163,8 +163,13 @@ static ssize_t infineon_ver_show(struct device *dev, { struct hl_device *hdev = dev_get_drvdata(dev); - return sprintf(buf, "0x%04x\n", - hdev->asic_prop.cpucp_info.infineon_version); + if (hdev->asic_prop.cpucp_info.infineon_second_stage_version) + return sprintf(buf, "%#04x %#04x\n", + le32_to_cpu(hdev->asic_prop.cpucp_info.infineon_version), + le32_to_cpu(hdev->asic_prop.cpucp_info.infineon_second_stage_version)); + else + return sprintf(buf, "%#04x\n", + le32_to_cpu(hdev->asic_prop.cpucp_info.infineon_version)); } static ssize_t fuse_ver_show(struct device *dev, struct device_attribute *attr, @@ -206,7 +211,7 @@ static ssize_t soft_reset_store(struct device *dev, goto out; } - if (!hdev->allow_inference_soft_reset) { + if (!hdev->asic_prop.allow_inference_soft_reset) { dev_err(hdev->dev, "Device does not support inference soft-reset\n"); goto out; } @@ -236,7 +241,7 @@ static ssize_t hard_reset_store(struct device *dev, dev_warn(hdev->dev, "Hard-Reset requested through sysfs\n"); - hl_device_reset(hdev, HL_RESET_HARD); + hl_device_reset(hdev, HL_DRV_RESET_HARD); out: return count; @@ -298,7 +303,7 @@ static ssize_t soft_reset_cnt_show(struct device *dev, { struct hl_device *hdev = dev_get_drvdata(dev); - return sprintf(buf, "%d\n", hdev->soft_reset_cnt); + return sprintf(buf, "%d\n", hdev->reset_info.soft_reset_cnt); } static ssize_t hard_reset_cnt_show(struct device *dev, @@ -306,7 +311,7 @@ static ssize_t hard_reset_cnt_show(struct device *dev, { struct hl_device *hdev = dev_get_drvdata(dev); - return sprintf(buf, "%d\n", hdev->hard_reset_cnt); + return sprintf(buf, "%d\n", hdev->reset_info.hard_reset_cnt); } static ssize_t max_power_show(struct device *dev, struct device_attribute *attr, @@ -419,8 +424,6 @@ static struct attribute *hl_dev_attrs[] = { &dev_attr_max_power.attr, &dev_attr_pci_addr.attr, &dev_attr_preboot_btl_ver.attr, - &dev_attr_soft_reset.attr, - &dev_attr_soft_reset_cnt.attr, &dev_attr_status.attr, &dev_attr_thermal_ver.attr, &dev_attr_uboot_ver.attr, @@ -445,15 +448,25 @@ static const struct attribute_group *hl_dev_attr_groups[] = { NULL, }; +static struct attribute *hl_dev_inference_attrs[] = { + &dev_attr_soft_reset.attr, + &dev_attr_soft_reset_cnt.attr, + NULL, +}; + +static struct attribute_group hl_dev_inference_attr_group = { + .attrs = hl_dev_inference_attrs, +}; + +static const struct attribute_group *hl_dev_inference_attr_groups[] = { + &hl_dev_inference_attr_group, + NULL, +}; + int hl_sysfs_init(struct hl_device *hdev) { int rc; - if (hdev->asic_type == ASIC_GOYA) - hdev->pm_mng_profile = PM_AUTO; - else - hdev->pm_mng_profile = PM_MANUAL; - hdev->max_power = hdev->asic_prop.max_power_default; hdev->asic_funcs->add_device_attr(hdev, &hl_dev_clks_attr_group); @@ -465,10 +478,25 @@ int hl_sysfs_init(struct hl_device *hdev) return rc; } + if (!hdev->asic_prop.allow_inference_soft_reset) + return 0; + + rc = device_add_groups(hdev->dev, hl_dev_inference_attr_groups); + if (rc) { + dev_err(hdev->dev, + "Failed to add groups to device, error %d\n", rc); + return rc; + } + return 0; } void hl_sysfs_fini(struct hl_device *hdev) { device_remove_groups(hdev->dev, hl_dev_attr_groups); + + if (!hdev->asic_prop.allow_inference_soft_reset) + return; + + device_remove_groups(hdev->dev, hl_dev_inference_attr_groups); } diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 825737dfe381..013c6da2e3ca 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright 2016-2020 HabanaLabs, Ltd. + * Copyright 2016-2021 HabanaLabs, Ltd. * All Rights Reserved. */ @@ -593,26 +593,27 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev) else prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE; prop->mmu_pte_size = HL_PTE_SIZE; - prop->mmu_hop_table_size = HOP_TABLE_SIZE; - prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE; + prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE; + prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE; prop->dram_page_size = PAGE_SIZE_2MB; prop->dram_supports_virtual_memory = false; - prop->pmmu.hop0_shift = HOP0_SHIFT; - prop->pmmu.hop1_shift = HOP1_SHIFT; - prop->pmmu.hop2_shift = HOP2_SHIFT; - prop->pmmu.hop3_shift = HOP3_SHIFT; - prop->pmmu.hop4_shift = HOP4_SHIFT; - prop->pmmu.hop0_mask = HOP0_MASK; - prop->pmmu.hop1_mask = HOP1_MASK; - prop->pmmu.hop2_mask = HOP2_MASK; - prop->pmmu.hop3_mask = HOP3_MASK; - prop->pmmu.hop4_mask = HOP4_MASK; + prop->pmmu.hop0_shift = MMU_V1_1_HOP0_SHIFT; + prop->pmmu.hop1_shift = MMU_V1_1_HOP1_SHIFT; + prop->pmmu.hop2_shift = MMU_V1_1_HOP2_SHIFT; + prop->pmmu.hop3_shift = MMU_V1_1_HOP3_SHIFT; + prop->pmmu.hop4_shift = MMU_V1_1_HOP4_SHIFT; + prop->pmmu.hop0_mask = MMU_V1_1_HOP0_MASK; + prop->pmmu.hop1_mask = MMU_V1_1_HOP1_MASK; + prop->pmmu.hop2_mask = MMU_V1_1_HOP2_MASK; + prop->pmmu.hop3_mask = MMU_V1_1_HOP3_MASK; + prop->pmmu.hop4_mask = MMU_V1_1_HOP4_MASK; prop->pmmu.start_addr = VA_HOST_SPACE_START; prop->pmmu.end_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1; prop->pmmu.page_size = PAGE_SIZE_4KB; prop->pmmu.num_hops = MMU_ARCH_5_HOPS; + prop->pmmu.last_mask = LAST_MASK; /* PMMU and HPMMU are the same except of page size */ memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu)); @@ -664,6 +665,8 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev) prop->clk_pll_index = HL_GAUDI_MME_PLL; prop->max_freq_value = GAUDI_MAX_CLK_FREQ; + prop->use_get_power_for_reset_history = true; + return 0; } @@ -878,6 +881,11 @@ static int gaudi_fetch_psoc_frequency(struct hl_device *hdev) int rc; if (hdev->asic_prop.fw_security_enabled) { + struct gaudi_device *gaudi = hdev->asic_specific; + + if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) + return 0; + rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr); if (rc) @@ -1273,6 +1281,7 @@ static int gaudi_collective_wait_init_cs(struct hl_cs *cs) container_of(cs->signal_fence, struct hl_cs_compl, base_fence); struct hl_cs_compl *cs_cmpl = container_of(cs->fence, struct hl_cs_compl, base_fence); + struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl; struct gaudi_collective_properties *cprop; u32 stream, queue_id, sob_group_offset; struct gaudi_device *gaudi; @@ -1285,10 +1294,16 @@ static int gaudi_collective_wait_init_cs(struct hl_cs *cs) gaudi = hdev->asic_specific; cprop = &gaudi->collective_props; - /* In encaps signals case the SOB info will be retrieved from - * the handle in gaudi_collective_slave_init_job. - */ - if (!cs->encaps_signals) { + if (cs->encaps_signals) { + cs_cmpl->hw_sob = handle->hw_sob; + /* at this checkpoint we only need the hw_sob pointer + * for the completion check before start going over the jobs + * of the master/slaves, the sob_value will be taken later on + * in gaudi_collective_slave_init_job depends on each + * job wait offset value. + */ + cs_cmpl->sob_val = 0; + } else { /* copy the SOB id and value of the signal CS */ cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob; cs_cmpl->sob_val = signal_cs_cmpl->sob_val; @@ -1621,6 +1636,8 @@ static int gaudi_late_init(struct hl_device *hdev) */ gaudi_mmu_prepare(hdev, 1); + hdev->asic_funcs->set_pll_profile(hdev, PLL_LAST); + return 0; disable_pci_access: @@ -4006,7 +4023,7 @@ static void gaudi_init_firmware_loader(struct hl_device *hdev) struct fw_load_mgr *fw_loader = &hdev->fw_loader; /* fill common fields */ - fw_loader->linux_loaded = false; + fw_loader->fw_comp_loaded = FW_TYPE_NONE; fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE; fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE; fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC; @@ -4289,13 +4306,31 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU * registers in case of old F/Ws */ - if (hdev->fw_loader.linux_loaded) { + if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) { irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : le32_to_cpu(dyn_regs->gic_host_halt_irq); WREG32(irq_handler_offset, gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id); + + /* This is a hail-mary attempt to revive the card in the small chance that the + * f/w has experienced a watchdog event, which caused it to return back to preboot. + * In that case, triggering reset through GIC won't help. We need to trigger the + * reset as if Linux wasn't loaded. + * + * We do it only if the reset cause was HB, because that would be the indication + * of such an event. + * + * In case watchdog hasn't expired but we still got HB, then this won't do any + * damage. + */ + if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) { + if (hdev->asic_prop.hard_reset_done_by_fw) + hl_fw_ask_hard_reset_without_linux(hdev); + else + hl_fw_ask_halt_machine_without_linux(hdev); + } } else { if (hdev->asic_prop.hard_reset_done_by_fw) hl_fw_ask_hard_reset_without_linux(hdev); @@ -6412,6 +6447,7 @@ static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, { u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma; struct gaudi_device *gaudi = hdev->asic_specific; + u32 qm_glbl_sts0, qm_cgm_sts; u64 dma_offset, qm_offset; dma_addr_t dma_addr; void *kernel_addr; @@ -6436,14 +6472,20 @@ static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, dma_offset = dma_id * DMA_CORE_OFFSET; qm_offset = dma_id * DMA_QMAN_OFFSET; dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset); - is_eng_idle = IS_DMA_IDLE(dma_core_sts0); + qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset); + qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset); + is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && + IS_DMA_IDLE(dma_core_sts0); if (!is_eng_idle) { dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2]; dma_offset = dma_id * DMA_CORE_OFFSET; qm_offset = dma_id * DMA_QMAN_OFFSET; dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset); - is_eng_idle = IS_DMA_IDLE(dma_core_sts0); + qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset); + qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset); + is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && + IS_DMA_IDLE(dma_core_sts0); if (!is_eng_idle) { dev_err_ratelimited(hdev->dev, @@ -6522,7 +6564,7 @@ static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr) { struct gaudi_device *gaudi = hdev->asic_specific; - if (hdev->hard_reset_pending) + if (hdev->reset_info.hard_reset_pending) return U64_MAX; return readq(hdev->pcie_bar[HBM_BAR_ID] + @@ -6533,7 +6575,7 @@ static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val) { struct gaudi_device *gaudi = hdev->asic_specific; - if (hdev->hard_reset_pending) + if (hdev->reset_info.hard_reset_pending) return; writeq(val, hdev->pcie_bar[HBM_BAR_ID] + @@ -6935,8 +6977,9 @@ event_not_supported: snprintf(desc, size, "N/A"); } -static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, - u32 x_y, bool is_write) +static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y, + bool is_write, s32 *engine_id_1, + s32 *engine_id_2) { u32 dma_id[2], dma_offset, err_cause[2], mask, i; @@ -6976,44 +7019,64 @@ static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, switch (x_y) { case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0: case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1: - if ((err_cause[0] & mask) && !(err_cause[1] & mask)) + if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { + *engine_id_1 = GAUDI_ENGINE_ID_DMA_0; return "DMA0"; - else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) + } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { + *engine_id_1 = GAUDI_ENGINE_ID_DMA_2; return "DMA2"; - else + } else { + *engine_id_1 = GAUDI_ENGINE_ID_DMA_0; + *engine_id_2 = GAUDI_ENGINE_ID_DMA_2; return "DMA0 or DMA2"; + } case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0: case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1: - if ((err_cause[0] & mask) && !(err_cause[1] & mask)) + if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { + *engine_id_1 = GAUDI_ENGINE_ID_DMA_1; return "DMA1"; - else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) + } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { + *engine_id_1 = GAUDI_ENGINE_ID_DMA_3; return "DMA3"; - else + } else { + *engine_id_1 = GAUDI_ENGINE_ID_DMA_1; + *engine_id_2 = GAUDI_ENGINE_ID_DMA_3; return "DMA1 or DMA3"; + } case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0: case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1: - if ((err_cause[0] & mask) && !(err_cause[1] & mask)) + if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { + *engine_id_1 = GAUDI_ENGINE_ID_DMA_4; return "DMA4"; - else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) + } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { + *engine_id_1 = GAUDI_ENGINE_ID_DMA_6; return "DMA6"; - else + } else { + *engine_id_1 = GAUDI_ENGINE_ID_DMA_4; + *engine_id_2 = GAUDI_ENGINE_ID_DMA_6; return "DMA4 or DMA6"; + } case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0: case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1: - if ((err_cause[0] & mask) && !(err_cause[1] & mask)) + if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { + *engine_id_1 = GAUDI_ENGINE_ID_DMA_5; return "DMA5"; - else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) + } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { + *engine_id_1 = GAUDI_ENGINE_ID_DMA_7; return "DMA7"; - else + } else { + *engine_id_1 = GAUDI_ENGINE_ID_DMA_5; + *engine_id_2 = GAUDI_ENGINE_ID_DMA_7; return "DMA5 or DMA7"; + } } unknown_initiator: return "unknown initiator"; } -static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, - bool is_write) +static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write, + u32 *engine_id_1, u32 *engine_id_2) { u32 val, x_y, axi_id; @@ -7026,24 +7089,35 @@ static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, switch (x_y) { case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0: - if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) + if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { + *engine_id_1 = GAUDI_ENGINE_ID_TPC_0; return "TPC0"; - if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) + } + if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) { + *engine_id_1 = GAUDI_ENGINE_ID_NIC_0; return "NIC0"; + } break; case RAZWI_INITIATOR_ID_X_Y_TPC1: + *engine_id_1 = GAUDI_ENGINE_ID_TPC_1; return "TPC1"; case RAZWI_INITIATOR_ID_X_Y_MME0_0: case RAZWI_INITIATOR_ID_X_Y_MME0_1: + *engine_id_1 = GAUDI_ENGINE_ID_MME_0; return "MME0"; case RAZWI_INITIATOR_ID_X_Y_MME1_0: case RAZWI_INITIATOR_ID_X_Y_MME1_1: + *engine_id_1 = GAUDI_ENGINE_ID_MME_1; return "MME1"; case RAZWI_INITIATOR_ID_X_Y_TPC2: + *engine_id_1 = GAUDI_ENGINE_ID_TPC_2; return "TPC2"; case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC: - if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) + if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { + *engine_id_1 = GAUDI_ENGINE_ID_TPC_3; return "TPC3"; + } + /* PCI, CPU or PSOC does not have engine id*/ if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI)) return "PCI"; if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU)) @@ -7059,32 +7133,49 @@ static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1: case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0: case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1: - return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write); + return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write, + engine_id_1, engine_id_2); case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2: - if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) + if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { + *engine_id_1 = GAUDI_ENGINE_ID_TPC_4; return "TPC4"; - if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) + } + if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) { + *engine_id_1 = GAUDI_ENGINE_ID_NIC_1; return "NIC1"; - if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) + } + if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) { + *engine_id_1 = GAUDI_ENGINE_ID_NIC_2; return "NIC2"; + } break; case RAZWI_INITIATOR_ID_X_Y_TPC5: + *engine_id_1 = GAUDI_ENGINE_ID_TPC_5; return "TPC5"; case RAZWI_INITIATOR_ID_X_Y_MME2_0: case RAZWI_INITIATOR_ID_X_Y_MME2_1: + *engine_id_1 = GAUDI_ENGINE_ID_MME_2; return "MME2"; case RAZWI_INITIATOR_ID_X_Y_MME3_0: case RAZWI_INITIATOR_ID_X_Y_MME3_1: + *engine_id_1 = GAUDI_ENGINE_ID_MME_3; return "MME3"; case RAZWI_INITIATOR_ID_X_Y_TPC6: + *engine_id_1 = GAUDI_ENGINE_ID_TPC_6; return "TPC6"; case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5: - if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) + if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { + *engine_id_1 = GAUDI_ENGINE_ID_TPC_7; return "TPC7"; - if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) + } + if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) { + *engine_id_1 = GAUDI_ENGINE_ID_NIC_4; return "NIC4"; - if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) + } + if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) { + *engine_id_1 = GAUDI_ENGINE_ID_NIC_5; return "NIC5"; + } break; default: break; @@ -7101,27 +7192,28 @@ static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, return "unknown initiator"; } -static void gaudi_print_razwi_info(struct hl_device *hdev) +static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u32 *engine_id_1, + u32 *engine_id_2) { + if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) { dev_err_ratelimited(hdev->dev, "RAZWI event caused by illegal write of %s\n", - gaudi_get_razwi_initiator_name(hdev, true)); + gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2)); WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0); } if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) { dev_err_ratelimited(hdev->dev, "RAZWI event caused by illegal read of %s\n", - gaudi_get_razwi_initiator_name(hdev, false)); + gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2)); WREG32(mmMMU_UP_RAZWI_READ_VLD, 0); } } -static void gaudi_print_mmu_error_info(struct hl_device *hdev) +static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u8 *type) { struct gaudi_device *gaudi = hdev->asic_specific; - u64 addr; u32 val; if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) @@ -7129,24 +7221,24 @@ static void gaudi_print_mmu_error_info(struct hl_device *hdev) val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE); if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) { - addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK; - addr <<= 32; - addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA); + *addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK; + *addr <<= 32; + *addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA); - dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", - addr); + dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr); + *type = HL_RAZWI_PAGE_FAULT; WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0); } val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE); if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) { - addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK; - addr <<= 32; - addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA); + *addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK; + *addr <<= 32; + *addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA); - dev_err_ratelimited(hdev->dev, - "MMU access error on va 0x%llx\n", addr); + dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr); + *type = HL_RAZWI_MMU_ACCESS_ERROR; WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0); } @@ -7665,15 +7757,46 @@ static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type) static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type, bool razwi) { + u32 engine_id_1, engine_id_2; char desc[64] = ""; + u64 razwi_addr = 0; + u8 razwi_type; + int rc; + + /* + * Init engine id by default as not valid and only if razwi initiated from engine with + * engine id it will get valid value. + * Init razwi type to default, will be changed only if razwi caused by page fault of + * MMU access error + */ + engine_id_1 = U16_MAX; + engine_id_2 = U16_MAX; + razwi_type = U8_MAX; gaudi_get_event_desc(event_type, desc, sizeof(desc)); dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n", event_type, desc); if (razwi) { - gaudi_print_razwi_info(hdev); - gaudi_print_mmu_error_info(hdev); + gaudi_print_and_get_razwi_info(hdev, &engine_id_1, &engine_id_2); + gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, &razwi_type); + + /* In case it's the first razwi, save its parameters*/ + rc = atomic_cmpxchg(&hdev->last_error.razwi_write_disable, 0, 1); + if (!rc) { + hdev->last_error.open_dev_timestamp = hdev->last_successful_open_ktime; + hdev->last_error.razwi_timestamp = ktime_get(); + hdev->last_error.razwi_addr = razwi_addr; + hdev->last_error.razwi_engine_id_1 = engine_id_1; + hdev->last_error.razwi_engine_id_2 = engine_id_2; + /* + * If first engine id holds non valid value the razwi initiator + * does not have engine id + */ + hdev->last_error.razwi_non_engine_initiator = (engine_id_1 == U16_MAX); + hdev->last_error.razwi_type = razwi_type; + + } } } @@ -7696,14 +7819,10 @@ static void gaudi_print_fw_alive_info(struct hl_device *hdev, fw_alive->thread_id, fw_alive->uptime_seconds); } -static int gaudi_soft_reset_late_init(struct hl_device *hdev) +static int gaudi_non_hard_reset_late_init(struct hl_device *hdev) { - struct gaudi_device *gaudi = hdev->asic_specific; - - /* Unmask all IRQs since some could have been received - * during the soft reset - */ - return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events)); + /* GAUDI doesn't support any reset except hard-reset */ + return -EPERM; } static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device, @@ -7897,27 +8016,39 @@ static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type) static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type) { + ktime_t zero_time = ktime_set(0, 0); + + mutex_lock(&hdev->clk_throttling.lock); + switch (event_type) { case GAUDI_EVENT_FIX_POWER_ENV_S: - hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER; + hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER; + hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER; + hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get(); + hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time; dev_info_ratelimited(hdev->dev, "Clock throttling due to power consumption\n"); break; case GAUDI_EVENT_FIX_POWER_ENV_E: - hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER; + hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER; + hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get(); dev_info_ratelimited(hdev->dev, "Power envelop is safe, back to optimal clock\n"); break; case GAUDI_EVENT_FIX_THERMAL_ENV_S: - hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL; + hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL; + hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL; + hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get(); + hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time; dev_info_ratelimited(hdev->dev, "Clock throttling due to overheating\n"); break; case GAUDI_EVENT_FIX_THERMAL_ENV_E: - hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL; + hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL; + hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get(); dev_info_ratelimited(hdev->dev, "Thermal envelop is safe, back to optimal clock\n"); break; @@ -7927,6 +8058,8 @@ static void gaudi_print_clk_change_info(struct hl_device *hdev, event_type); break; } + + mutex_unlock(&hdev->clk_throttling.lock); } static void gaudi_handle_eqe(struct hl_device *hdev, @@ -7975,7 +8108,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR: gaudi_print_irq_info(hdev, event_type, true); gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); - fw_fatal_err_flag = HL_RESET_FW_FATAL_ERR; + fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; goto reset_device; case GAUDI_EVENT_GIC500: @@ -7983,7 +8116,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, case GAUDI_EVENT_L2_RAM_ECC: case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17: gaudi_print_irq_info(hdev, event_type, false); - fw_fatal_err_flag = HL_RESET_FW_FATAL_ERR; + fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; goto reset_device; case GAUDI_EVENT_HBM0_SPI_0: @@ -7994,7 +8127,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, gaudi_hbm_read_interrupts(hdev, gaudi_hbm_event_to_dev(event_type), &eq_entry->hbm_ecc_data); - fw_fatal_err_flag = HL_RESET_FW_FATAL_ERR; + fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; goto reset_device; case GAUDI_EVENT_HBM0_SPI_1: @@ -8177,9 +8310,11 @@ static void gaudi_handle_eqe(struct hl_device *hdev, reset_device: if (hdev->asic_prop.fw_security_enabled) - hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_FW | fw_fatal_err_flag); + hl_device_reset(hdev, HL_DRV_RESET_HARD + | HL_DRV_RESET_BYPASS_REQ_TO_FW + | fw_fatal_err_flag); else if (hdev->hard_reset_on_fw_events) - hl_device_reset(hdev, HL_RESET_HARD | fw_fatal_err_flag); + hl_device_reset(hdev, HL_DRV_RESET_HARD | fw_fatal_err_flag); else hl_fw_unmask_irq(hdev, event_type); } @@ -8206,7 +8341,7 @@ static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, int rc; if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) || - hdev->hard_reset_pending) + hdev->reset_info.hard_reset_pending) return 0; if (hdev->pldm) @@ -8229,12 +8364,6 @@ static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, WREG32(mmSTLB_INV_SET, 0); - if (rc) { - dev_err_ratelimited(hdev->dev, - "MMU cache invalidation timeout\n"); - hl_device_reset(hdev, HL_RESET_HARD); - } - return rc; } @@ -8662,7 +8791,7 @@ static int gaudi_internal_cb_pool_init(struct hl_device *hdev, hdev->internal_cb_pool_dma_addr, HOST_SPACE_INTERNAL_CB_SZ); - hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR); + hdev->asic_funcs->mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR); mutex_unlock(&ctx->mmu_lock); if (rc) @@ -8697,7 +8826,7 @@ static void gaudi_internal_cb_pool_fini(struct hl_device *hdev, HOST_SPACE_INTERNAL_CB_SZ); hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ); - hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR); + hdev->asic_funcs->mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); mutex_unlock(&ctx->mmu_lock); gen_pool_destroy(hdev->internal_cb_pool); @@ -9458,7 +9587,7 @@ static const struct hl_asic_funcs gaudi_funcs = { .disable_clock_gating = gaudi_disable_clock_gating, .debug_coresight = gaudi_debug_coresight, .is_device_idle = gaudi_is_device_idle, - .soft_reset_late_init = gaudi_soft_reset_late_init, + .non_hard_reset_late_init = gaudi_non_hard_reset_late_init, .hw_queues_lock = gaudi_hw_queues_lock, .hw_queues_unlock = gaudi_hw_queues_unlock, .get_pci_id = gaudi_get_pci_id, diff --git a/drivers/misc/habanalabs/gaudi/gaudiP.h b/drivers/misc/habanalabs/gaudi/gaudiP.h index f325e36a71e6..8ac16a9b7d15 100644 --- a/drivers/misc/habanalabs/gaudi/gaudiP.h +++ b/drivers/misc/habanalabs/gaudi/gaudiP.h @@ -357,8 +357,8 @@ void gaudi_init_security(struct hl_device *hdev); void gaudi_ack_protection_bits_errors(struct hl_device *hdev); void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_attr_grp); -int gaudi_debug_coresight(struct hl_device *hdev, void *data); -void gaudi_halt_coresight(struct hl_device *hdev); +int gaudi_debug_coresight(struct hl_device *hdev, struct hl_ctx *ctx, void *data); +void gaudi_halt_coresight(struct hl_device *hdev, struct hl_ctx *ctx); void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid); #endif /* GAUDIP_H_ */ diff --git a/drivers/misc/habanalabs/gaudi/gaudi_coresight.c b/drivers/misc/habanalabs/gaudi/gaudi_coresight.c index 5349c1be13f9..08108f5fed67 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi_coresight.c +++ b/drivers/misc/habanalabs/gaudi/gaudi_coresight.c @@ -848,7 +848,7 @@ static int gaudi_config_spmu(struct hl_device *hdev, return 0; } -int gaudi_debug_coresight(struct hl_device *hdev, void *data) +int gaudi_debug_coresight(struct hl_device *hdev, struct hl_ctx *ctx, void *data) { struct hl_debug_params *params = data; int rc = 0; @@ -887,7 +887,7 @@ int gaudi_debug_coresight(struct hl_device *hdev, void *data) return rc; } -void gaudi_halt_coresight(struct hl_device *hdev) +void gaudi_halt_coresight(struct hl_device *hdev, struct hl_ctx *ctx) { struct hl_debug_params params = {}; int i, rc; diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 5536e8c27bd5..fbcc7bbf44b3 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright 2016-2019 HabanaLabs, Ltd. + * Copyright 2016-2021 HabanaLabs, Ltd. * All Rights Reserved. */ @@ -410,25 +410,26 @@ int goya_set_fixed_properties(struct hl_device *hdev) else prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE; prop->mmu_pte_size = HL_PTE_SIZE; - prop->mmu_hop_table_size = HOP_TABLE_SIZE; - prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE; + prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE; + prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE; prop->dram_page_size = PAGE_SIZE_2MB; prop->dram_supports_virtual_memory = true; - prop->dmmu.hop0_shift = HOP0_SHIFT; - prop->dmmu.hop1_shift = HOP1_SHIFT; - prop->dmmu.hop2_shift = HOP2_SHIFT; - prop->dmmu.hop3_shift = HOP3_SHIFT; - prop->dmmu.hop4_shift = HOP4_SHIFT; - prop->dmmu.hop0_mask = HOP0_MASK; - prop->dmmu.hop1_mask = HOP1_MASK; - prop->dmmu.hop2_mask = HOP2_MASK; - prop->dmmu.hop3_mask = HOP3_MASK; - prop->dmmu.hop4_mask = HOP4_MASK; + prop->dmmu.hop0_shift = MMU_V1_0_HOP0_SHIFT; + prop->dmmu.hop1_shift = MMU_V1_0_HOP1_SHIFT; + prop->dmmu.hop2_shift = MMU_V1_0_HOP2_SHIFT; + prop->dmmu.hop3_shift = MMU_V1_0_HOP3_SHIFT; + prop->dmmu.hop4_shift = MMU_V1_0_HOP4_SHIFT; + prop->dmmu.hop0_mask = MMU_V1_0_HOP0_MASK; + prop->dmmu.hop1_mask = MMU_V1_0_HOP1_MASK; + prop->dmmu.hop2_mask = MMU_V1_0_HOP2_MASK; + prop->dmmu.hop3_mask = MMU_V1_0_HOP3_MASK; + prop->dmmu.hop4_mask = MMU_V1_0_HOP4_MASK; prop->dmmu.start_addr = VA_DDR_SPACE_START; prop->dmmu.end_addr = VA_DDR_SPACE_END; prop->dmmu.page_size = PAGE_SIZE_2MB; prop->dmmu.num_hops = MMU_ARCH_5_HOPS; + prop->dmmu.last_mask = LAST_MASK; /* shifts and masks are the same in PMMU and DMMU */ memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu)); @@ -436,6 +437,7 @@ int goya_set_fixed_properties(struct hl_device *hdev) prop->pmmu.end_addr = VA_HOST_SPACE_END; prop->pmmu.page_size = PAGE_SIZE_4KB; prop->pmmu.num_hops = MMU_ARCH_5_HOPS; + prop->pmmu.last_mask = LAST_MASK; /* PMMU and HPMMU are the same except of page size */ memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu)); @@ -473,6 +475,8 @@ int goya_set_fixed_properties(struct hl_device *hdev) prop->clk_pll_index = HL_GOYA_MME_PLL; + prop->use_get_power_for_reset_history = true; + return 0; } @@ -735,6 +739,11 @@ static void goya_fetch_psoc_frequency(struct hl_device *hdev) int rc; if (hdev->asic_prop.fw_security_enabled) { + struct goya_device *goya = hdev->asic_specific; + + if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q)) + return; + rc = hl_fw_cpucp_pll_info_get(hdev, HL_GOYA_PCI_PLL, pll_freq_arr); @@ -778,9 +787,59 @@ static void goya_fetch_psoc_frequency(struct hl_device *hdev) prop->psoc_pci_pll_div_factor = div_fctr; } +/* + * goya_set_frequency - set the frequency of the device + * + * @hdev: pointer to habanalabs device structure + * @freq: the new frequency value + * + * Change the frequency if needed. This function has no protection against + * concurrency, therefore it is assumed that the calling function has protected + * itself against the case of calling this function from multiple threads with + * different values + * + * Returns 0 if no change was done, otherwise returns 1 + */ +int goya_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq) +{ + struct goya_device *goya = hdev->asic_specific; + + if ((goya->pm_mng_profile == PM_MANUAL) || + (goya->curr_pll_profile == freq)) + return 0; + + dev_dbg(hdev->dev, "Changing device frequency to %s\n", + freq == PLL_HIGH ? "high" : "low"); + + goya_set_pll_profile(hdev, freq); + + goya->curr_pll_profile = freq; + + return 1; +} + +static void goya_set_freq_to_low_job(struct work_struct *work) +{ + struct goya_work_freq *goya_work = container_of(work, + struct goya_work_freq, + work_freq.work); + struct hl_device *hdev = goya_work->hdev; + + mutex_lock(&hdev->fpriv_list_lock); + + if (!hdev->is_compute_ctx_active) + goya_set_frequency(hdev, PLL_LOW); + + mutex_unlock(&hdev->fpriv_list_lock); + + schedule_delayed_work(&goya_work->work_freq, + usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC)); +} + int goya_late_init(struct hl_device *hdev) { struct asic_fixed_properties *prop = &hdev->asic_prop; + struct goya_device *goya = hdev->asic_specific; int rc; goya_fetch_psoc_frequency(hdev); @@ -829,6 +888,16 @@ int goya_late_init(struct hl_device *hdev) return rc; } + /* force setting to low frequency */ + goya->curr_pll_profile = PLL_LOW; + + goya->pm_mng_profile = PM_AUTO; + + hdev->asic_funcs->set_pll_profile(hdev, PLL_LOW); + + schedule_delayed_work(&goya->goya_work->work_freq, + usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC)); + return 0; } @@ -842,8 +911,11 @@ int goya_late_init(struct hl_device *hdev) void goya_late_fini(struct hl_device *hdev) { const struct hwmon_channel_info **channel_info_arr; + struct goya_device *goya = hdev->asic_specific; int i = 0; + cancel_delayed_work_sync(&goya->goya_work->work_freq); + if (!hdev->hl_chip_info->info) return; @@ -961,12 +1033,21 @@ static int goya_sw_init(struct hl_device *hdev) spin_lock_init(&goya->hw_queues_lock); hdev->supports_coresight = true; - hdev->supports_soft_reset = true; - hdev->allow_inference_soft_reset = true; + hdev->asic_prop.supports_soft_reset = true; + hdev->asic_prop.allow_inference_soft_reset = true; hdev->supports_wait_for_multi_cs = false; hdev->asic_funcs->set_pci_memory_regions(hdev); + goya->goya_work = kmalloc(sizeof(struct goya_work_freq), GFP_KERNEL); + if (!goya->goya_work) { + rc = -ENOMEM; + goto free_cpu_accessible_dma_pool; + } + + goya->goya_work->hdev = hdev; + INIT_DELAYED_WORK(&goya->goya_work->work_freq, goya_set_freq_to_low_job); + return 0; free_cpu_accessible_dma_pool: @@ -1003,6 +1084,7 @@ static int goya_sw_fini(struct hl_device *hdev) dma_pool_destroy(hdev->dma_pool); + kfree(goya->goya_work); kfree(goya); return 0; @@ -2502,7 +2584,7 @@ static void goya_init_firmware_loader(struct hl_device *hdev) struct fw_load_mgr *fw_loader = &hdev->fw_loader; /* fill common fields */ - fw_loader->linux_loaded = false; + fw_loader->fw_comp_loaded = FW_TYPE_NONE; fw_loader->boot_fit_img.image_name = GOYA_BOOT_FIT_FILE; fw_loader->linux_img.image_name = GOYA_LINUX_FW_FILE; fw_loader->cpu_timeout = GOYA_CPU_TIMEOUT_USEC; @@ -2619,7 +2701,7 @@ int goya_mmu_init(struct hl_device *hdev) (~STLB_STLB_FEATURE_EN_FOLLOWER_EN_MASK)); hdev->asic_funcs->mmu_invalidate_cache(hdev, true, - VM_TYPE_USERPTR | VM_TYPE_PHYS_PACK); + MMU_OP_USERPTR | MMU_OP_PHYS_PACK); WREG32(mmMMU_MMU_ENABLE, 1); WREG32(mmMMU_SPI_MASK, 0xF); @@ -4395,7 +4477,7 @@ static u64 goya_read_pte(struct hl_device *hdev, u64 addr) { struct goya_device *goya = hdev->asic_specific; - if (hdev->hard_reset_pending) + if (hdev->reset_info.hard_reset_pending) return U64_MAX; return readq(hdev->pcie_bar[DDR_BAR_ID] + @@ -4406,7 +4488,7 @@ static void goya_write_pte(struct hl_device *hdev, u64 addr, u64 val) { struct goya_device *goya = hdev->asic_specific; - if (hdev->hard_reset_pending) + if (hdev->reset_info.hard_reset_pending) return; writeq(val, hdev->pcie_bar[DDR_BAR_ID] + @@ -4731,7 +4813,7 @@ static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr, return rc; } -static int goya_soft_reset_late_init(struct hl_device *hdev) +static int goya_non_hard_reset_late_init(struct hl_device *hdev) { /* * Unmask all IRQs since some could have been received @@ -4764,24 +4846,39 @@ static int goya_unmask_irq(struct hl_device *hdev, u16 event_type) static void goya_print_clk_change_info(struct hl_device *hdev, u16 event_type) { + ktime_t zero_time = ktime_set(0, 0); + + mutex_lock(&hdev->clk_throttling.lock); + switch (event_type) { case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S: - hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER; + hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER; + hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER; + hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get(); + hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time; dev_info_ratelimited(hdev->dev, "Clock throttling due to power consumption\n"); break; + case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E: - hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER; + hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER; + hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get(); dev_info_ratelimited(hdev->dev, "Power envelop is safe, back to optimal clock\n"); break; + case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S: - hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL; + hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL; + hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL; + hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get(); + hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time; dev_info_ratelimited(hdev->dev, "Clock throttling due to overheating\n"); break; + case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E: - hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL; + hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL; + hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get(); dev_info_ratelimited(hdev->dev, "Thermal envelop is safe, back to optimal clock\n"); break; @@ -4791,6 +4888,8 @@ static void goya_print_clk_change_info(struct hl_device *hdev, u16 event_type) event_type); break; } + + mutex_unlock(&hdev->clk_throttling.lock); } void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) @@ -4834,14 +4933,14 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC: goya_print_irq_info(hdev, event_type, false); if (hdev->hard_reset_on_fw_events) - hl_device_reset(hdev, (HL_RESET_HARD | - HL_RESET_FW_FATAL_ERR)); + hl_device_reset(hdev, (HL_DRV_RESET_HARD | + HL_DRV_RESET_FW_FATAL_ERR)); break; case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET: goya_print_irq_info(hdev, event_type, false); if (hdev->hard_reset_on_fw_events) - hl_device_reset(hdev, HL_RESET_HARD); + hl_device_reset(hdev, HL_DRV_RESET_HARD); break; case GOYA_ASYNC_EVENT_ID_PCIE_DEC: @@ -4901,7 +5000,7 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) goya_print_irq_info(hdev, event_type, false); goya_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err); if (hdev->hard_reset_on_fw_events) - hl_device_reset(hdev, HL_RESET_HARD); + hl_device_reset(hdev, HL_DRV_RESET_HARD); else hl_fw_unmask_irq(hdev, event_type); break; @@ -5209,7 +5308,7 @@ static int goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, int rc; if (!(goya->hw_cap_initialized & HW_CAP_MMU) || - hdev->hard_reset_pending) + hdev->reset_info.hard_reset_pending) return 0; /* no need in L1 only invalidation in Goya */ @@ -5232,12 +5331,6 @@ static int goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, 1000, timeout_usec); - if (rc) { - dev_err_ratelimited(hdev->dev, - "MMU cache invalidation timeout\n"); - hl_device_reset(hdev, HL_RESET_HARD); - } - return rc; } @@ -5645,7 +5738,7 @@ static const struct hl_asic_funcs goya_funcs = { .disable_clock_gating = goya_disable_clock_gating, .debug_coresight = goya_debug_coresight, .is_device_idle = goya_is_device_idle, - .soft_reset_late_init = goya_soft_reset_late_init, + .non_hard_reset_late_init = goya_non_hard_reset_late_init, .hw_queues_lock = goya_hw_queues_lock, .hw_queues_unlock = goya_hw_queues_unlock, .get_pci_id = goya_get_pci_id, diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h index 97add7b04f82..3740fd25bf84 100644 --- a/drivers/misc/habanalabs/goya/goyaP.h +++ b/drivers/misc/habanalabs/goya/goyaP.h @@ -153,9 +153,15 @@ #define HW_CAP_GOLDEN 0x00000400 #define HW_CAP_TPC 0x00000800 +struct goya_work_freq { + struct hl_device *hdev; + struct delayed_work work_freq; +}; + struct goya_device { /* TODO: remove hw_queues_lock after moving to scheduler code */ spinlock_t hw_queues_lock; + struct goya_work_freq *goya_work; u64 mme_clk; u64 tpc_clk; @@ -166,6 +172,9 @@ struct goya_device { u32 events_stat_aggregate[GOYA_ASYNC_EVENT_ID_SIZE]; u32 hw_cap_initialized; u8 device_cpu_mmu_mappings_done; + + enum hl_pll_frequency curr_pll_profile; + enum hl_pm_mng_profile pm_mng_profile; }; int goya_set_fixed_properties(struct hl_device *hdev); @@ -211,8 +220,8 @@ void goya_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq); void goya_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_attr_grp); int goya_cpucp_info_get(struct hl_device *hdev); -int goya_debug_coresight(struct hl_device *hdev, void *data); -void goya_halt_coresight(struct hl_device *hdev); +int goya_debug_coresight(struct hl_device *hdev, struct hl_ctx *ctx, void *data); +void goya_halt_coresight(struct hl_device *hdev, struct hl_ctx *ctx); int goya_suspend(struct hl_device *hdev); int goya_resume(struct hl_device *hdev); @@ -237,5 +246,6 @@ void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev); u32 goya_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx); u64 goya_get_device_time(struct hl_device *hdev); +int goya_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq); #endif /* GOYAP_H_ */ diff --git a/drivers/misc/habanalabs/goya/goya_coresight.c b/drivers/misc/habanalabs/goya/goya_coresight.c index c55c100fdd24..2c5133cfae65 100644 --- a/drivers/misc/habanalabs/goya/goya_coresight.c +++ b/drivers/misc/habanalabs/goya/goya_coresight.c @@ -652,7 +652,7 @@ static int goya_config_spmu(struct hl_device *hdev, return 0; } -int goya_debug_coresight(struct hl_device *hdev, void *data) +int goya_debug_coresight(struct hl_device *hdev, struct hl_ctx *ctx, void *data) { struct hl_debug_params *params = data; int rc = 0; @@ -691,7 +691,7 @@ int goya_debug_coresight(struct hl_device *hdev, void *data) return rc; } -void goya_halt_coresight(struct hl_device *hdev) +void goya_halt_coresight(struct hl_device *hdev, struct hl_ctx *ctx) { struct hl_debug_params params = {}; int i, rc; diff --git a/drivers/misc/habanalabs/goya/goya_hwmgr.c b/drivers/misc/habanalabs/goya/goya_hwmgr.c index 59b2624ff81a..76b47749affe 100644 --- a/drivers/misc/habanalabs/goya/goya_hwmgr.c +++ b/drivers/misc/habanalabs/goya/goya_hwmgr.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright 2016-2019 HabanaLabs, Ltd. + * Copyright 2016-2021 HabanaLabs, Ltd. * All Rights Reserved. */ @@ -62,7 +62,7 @@ static ssize_t mme_clk_store(struct device *dev, struct device_attribute *attr, goto fail; } - if (hdev->pm_mng_profile == PM_AUTO) { + if (goya->pm_mng_profile == PM_AUTO) { count = -EPERM; goto fail; } @@ -111,7 +111,7 @@ static ssize_t tpc_clk_store(struct device *dev, struct device_attribute *attr, goto fail; } - if (hdev->pm_mng_profile == PM_AUTO) { + if (goya->pm_mng_profile == PM_AUTO) { count = -EPERM; goto fail; } @@ -160,7 +160,7 @@ static ssize_t ic_clk_store(struct device *dev, struct device_attribute *attr, goto fail; } - if (hdev->pm_mng_profile == PM_AUTO) { + if (goya->pm_mng_profile == PM_AUTO) { count = -EPERM; goto fail; } @@ -234,13 +234,14 @@ static ssize_t pm_mng_profile_show(struct device *dev, struct device_attribute *attr, char *buf) { struct hl_device *hdev = dev_get_drvdata(dev); + struct goya_device *goya = hdev->asic_specific; if (!hl_device_operational(hdev, NULL)) return -ENODEV; return sprintf(buf, "%s\n", - (hdev->pm_mng_profile == PM_AUTO) ? "auto" : - (hdev->pm_mng_profile == PM_MANUAL) ? "manual" : + (goya->pm_mng_profile == PM_AUTO) ? "auto" : + (goya->pm_mng_profile == PM_MANUAL) ? "manual" : "unknown"); } @@ -248,6 +249,7 @@ static ssize_t pm_mng_profile_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct hl_device *hdev = dev_get_drvdata(dev); + struct goya_device *goya = hdev->asic_specific; if (!hl_device_operational(hdev, NULL)) { count = -ENODEV; @@ -256,7 +258,7 @@ static ssize_t pm_mng_profile_store(struct device *dev, mutex_lock(&hdev->fpriv_list_lock); - if (hdev->compute_ctx) { + if (hdev->is_compute_ctx_active) { dev_err(hdev->dev, "Can't change PM profile while compute context is opened on the device\n"); count = -EPERM; @@ -265,26 +267,27 @@ static ssize_t pm_mng_profile_store(struct device *dev, if (strncmp("auto", buf, strlen("auto")) == 0) { /* Make sure we are in LOW PLL when changing modes */ - if (hdev->pm_mng_profile == PM_MANUAL) { - hdev->curr_pll_profile = PLL_HIGH; - hdev->pm_mng_profile = PM_AUTO; - hl_device_set_frequency(hdev, PLL_LOW); + if (goya->pm_mng_profile == PM_MANUAL) { + goya->curr_pll_profile = PLL_HIGH; + goya->pm_mng_profile = PM_AUTO; + goya_set_frequency(hdev, PLL_LOW); } } else if (strncmp("manual", buf, strlen("manual")) == 0) { - if (hdev->pm_mng_profile == PM_AUTO) { + if (goya->pm_mng_profile == PM_AUTO) { /* Must release the lock because the work thread also * takes this lock. But before we release it, set * the mode to manual so nothing will change if a user * suddenly opens the device */ - hdev->pm_mng_profile = PM_MANUAL; + goya->pm_mng_profile = PM_MANUAL; mutex_unlock(&hdev->fpriv_list_lock); /* Flush the current work so we can return to the user * knowing that he is the only one changing frequencies */ - flush_delayed_work(&hdev->work_freq); + if (goya->goya_work) + flush_delayed_work(&goya->goya_work->work_freq); return count; } diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h index ae13231fda94..737c39f33f05 100644 --- a/drivers/misc/habanalabs/include/common/cpucp_if.h +++ b/drivers/misc/habanalabs/include/common/cpucp_if.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 * - * Copyright 2020 HabanaLabs, Ltd. + * Copyright 2020-2021 HabanaLabs, Ltd. * All Rights Reserved. * */ @@ -376,6 +376,19 @@ enum pq_init_status { * and QMANs. The f/w will return a bitmask where each bit represents * a different engine or QMAN according to enum cpucp_idle_mask. * The bit will be 1 if the engine is NOT idle. + * + * CPUCP_PACKET_HBM_REPLACED_ROWS_INFO_GET - + * Fetch all HBM replaced-rows and prending to be replaced rows data. + * + * CPUCP_PACKET_HBM_PENDING_ROWS_STATUS - + * Fetch status of HBM rows pending replacement and need a reboot to + * be replaced. + * + * CPUCP_PACKET_POWER_SET - + * Resets power history of device to 0 + * + * CPUCP_PACKET_ENGINE_CORE_ASID_SET - + * Packet to perform engine core ASID configuration */ enum cpucp_packet_id { @@ -421,6 +434,11 @@ enum cpucp_packet_id { CPUCP_PACKET_NIC_STAT_REGS_CLR, /* internal */ CPUCP_PACKET_NIC_STAT_REGS_ALL_GET, /* internal */ CPUCP_PACKET_IS_IDLE_CHECK, /* internal */ + CPUCP_PACKET_HBM_REPLACED_ROWS_INFO_GET,/* internal */ + CPUCP_PACKET_HBM_PENDING_ROWS_STATUS, /* internal */ + CPUCP_PACKET_POWER_SET, /* internal */ + CPUCP_PACKET_RESERVED, /* not used */ + CPUCP_PACKET_ENGINE_CORE_ASID_SET, /* internal */ }; #define CPUCP_PACKET_FENCE_VAL 0xFE8CE7A5 @@ -480,7 +498,14 @@ struct cpucp_packet { __u8 i2c_bus; __u8 i2c_addr; __u8 i2c_reg; - __u8 pad; /* unused */ + /* + * In legacy implemetations, i2c_len was not present, + * was unused and just added as pad. + * So if i2c_len is 0, it is treated as legacy + * and r/w 1 Byte, else if i2c_len is specified, + * its treated as new multibyte r/w support. + */ + __u8 i2c_len; }; struct {/* For PLL info fetch */ @@ -688,6 +713,7 @@ struct eq_generic_event { #define CPUCP_MAX_NIC_LANES (CPUCP_MAX_NICS * CPUCP_LANES_PER_NIC) #define CPUCP_NIC_MASK_ARR_LEN ((CPUCP_MAX_NICS + 63) / 64) #define CPUCP_NIC_POLARITY_ARR_LEN ((CPUCP_MAX_NIC_LANES + 63) / 64) +#define CPUCP_HBM_ROW_REPLACE_MAX 32 struct cpucp_sensor { __le32 type; @@ -740,6 +766,7 @@ struct cpucp_security_info { * @fuse_version: silicon production FUSE information. * @thermal_version: thermald S/W version. * @cpucp_version: CpuCP S/W version. + * @infineon_second_stage_version: Infineon 2nd stage DC-DC version. * @dram_size: available DRAM size. * @card_name: card name that will be displayed in HWMON subsystem on the host * @sec_info: security information @@ -749,6 +776,10 @@ struct cpucp_security_info { * @dram_binning_mask: DRAM binning mask, 1 bit per dram instance * (0 = functional 1 = binned) * @memory_repair_flag: eFuse flag indicating memory repair + * @edma_binning_mask: EDMA binning mask, 1 bit per EDMA instance + * (0 = functional 1 = binned) + * @xbar_binning_mask: Xbar binning mask, 1 bit per Xbar instance + * (0 = functional 1 = binned) */ struct cpucp_info { struct cpucp_sensor sensors[CPUCP_MAX_SENSORS]; @@ -761,7 +792,7 @@ struct cpucp_info { __u8 fuse_version[VERSION_MAX_LEN]; __u8 thermal_version[VERSION_MAX_LEN]; __u8 cpucp_version[VERSION_MAX_LEN]; - __le32 reserved2; + __le32 infineon_second_stage_version; __le64 dram_size; char card_name[CARD_NAME_MAX_LEN]; __le64 reserved3; @@ -769,7 +800,9 @@ struct cpucp_info { __u8 reserved5; __u8 dram_binning_mask; __u8 memory_repair_flag; - __u8 pad[5]; + __u8 edma_binning_mask; + __u8 xbar_binning_mask; + __u8 pad[3]; struct cpucp_security_info sec_info; __le32 reserved6; __u8 pll_map[PLL_MAP_LEN]; @@ -833,4 +866,25 @@ struct cpucp_nic_status { __le32 high_ber_cnt; }; +enum cpucp_hbm_row_replace_cause { + REPLACE_CAUSE_DOUBLE_ECC_ERR, + REPLACE_CAUSE_MULTI_SINGLE_ECC_ERR, +}; + +struct cpucp_hbm_row_info { + __u8 hbm_idx; + __u8 pc; + __u8 sid; + __u8 bank_idx; + __le16 row_addr; + __u8 replaced_row_cause; /* enum cpucp_hbm_row_replace_cause */ + __u8 pad; +}; + +struct cpucp_hbm_row_replaced_rows_info { + __le16 num_replaced_rows; + __u8 pad[6]; + struct cpucp_hbm_row_info replaced_rows[CPUCP_HBM_ROW_REPLACE_MAX]; +}; + #endif /* CPUCP_IF_H */ diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h index 2626df6ef3ef..135e21d6edc9 100644 --- a/drivers/misc/habanalabs/include/common/hl_boot_if.h +++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h @@ -32,6 +32,7 @@ enum cpu_boot_err { CPU_BOOT_ERR_DEVICE_UNUSABLE_FAIL = 13, CPU_BOOT_ERR_BOOT_FW_CRIT_ERR = 18, CPU_BOOT_ERR_BINNING_FAIL = 19, + CPU_BOOT_ERR_TPM_FAIL = 20, CPU_BOOT_ERR_ENABLED = 31, CPU_BOOT_ERR_SCND_EN = 63, CPU_BOOT_ERR_LAST = 64 /* we have 2 registers of 32 bits */ @@ -108,6 +109,8 @@ enum cpu_boot_err { * malfunctioning components might still be * in use. * + * CPU_BOOT_ERR0_TPM_FAIL TPM verification flow failed. + * * CPU_BOOT_ERR0_ENABLED Error registers enabled. * This is a main indication that the * running FW populates the error @@ -130,6 +133,7 @@ enum cpu_boot_err { #define CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL (1 << CPU_BOOT_ERR_DEVICE_UNUSABLE_FAIL) #define CPU_BOOT_ERR0_BOOT_FW_CRIT_ERR (1 << CPU_BOOT_ERR_BOOT_FW_CRIT_ERR) #define CPU_BOOT_ERR0_BINNING_FAIL (1 << CPU_BOOT_ERR_BINNING_FAIL) +#define CPU_BOOT_ERR0_TPM_FAIL (1 << CPU_BOOT_ERR_TPM_FAIL) #define CPU_BOOT_ERR0_ENABLED (1 << CPU_BOOT_ERR_ENABLED) #define CPU_BOOT_ERR1_ENABLED (1 << CPU_BOOT_ERR_ENABLED) diff --git a/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h b/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h index dedf20e8f956..758f246627f8 100644 --- a/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h +++ b/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h @@ -16,27 +16,18 @@ #define PAGE_PRESENT_MASK 0x0000000000001ull #define SWAP_OUT_MASK 0x0000000000004ull #define LAST_MASK 0x0000000000800ull -#define HOP0_MASK 0x3000000000000ull -#define HOP1_MASK 0x0FF8000000000ull -#define HOP2_MASK 0x0007FC0000000ull -#define HOP3_MASK 0x000003FE00000ull -#define HOP4_MASK 0x00000001FF000ull #define FLAGS_MASK 0x0000000000FFFull -#define HOP0_SHIFT 48 -#define HOP1_SHIFT 39 -#define HOP2_SHIFT 30 -#define HOP3_SHIFT 21 -#define HOP4_SHIFT 12 - #define MMU_ARCH_5_HOPS 5 #define HOP_PHYS_ADDR_MASK (~FLAGS_MASK) #define HL_PTE_SIZE sizeof(u64) -#define HOP_TABLE_SIZE PAGE_SIZE_4KB -#define PTE_ENTRIES_IN_HOP (HOP_TABLE_SIZE / HL_PTE_SIZE) -#define HOP0_TABLES_TOTAL_SIZE (HOP_TABLE_SIZE * MAX_ASID) + +/* definitions for HOP with 512 PTE entries */ +#define HOP_PTE_ENTRIES_512 512 +#define HOP_TABLE_SIZE_512_PTE (HOP_PTE_ENTRIES_512 * HL_PTE_SIZE) +#define HOP0_512_PTE_TABLES_TOTAL_SIZE (HOP_TABLE_SIZE_512_PTE * MAX_ASID) #define MMU_HOP0_PA43_12_SHIFT 12 #define MMU_HOP0_PA49_44_SHIFT (12 + 32) diff --git a/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_v1_0.h b/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_v1_0.h index 8539dd041f2c..86511002e367 100644 --- a/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_v1_0.h +++ b/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_v1_0.h @@ -8,8 +8,20 @@ #ifndef INCLUDE_MMU_V1_0_H_ #define INCLUDE_MMU_V1_0_H_ -#define MMU_HOP0_PA43_12 0x490004 -#define MMU_HOP0_PA49_44 0x490008 -#define MMU_ASID_BUSY 0x490000 +#define MMU_V1_0_HOP0_MASK 0x3000000000000ull +#define MMU_V1_0_HOP1_MASK 0x0FF8000000000ull +#define MMU_V1_0_HOP2_MASK 0x0007FC0000000ull +#define MMU_V1_0_HOP3_MASK 0x000003FE00000ull +#define MMU_V1_0_HOP4_MASK 0x00000001FF000ull + +#define MMU_V1_0_HOP0_SHIFT 48 +#define MMU_V1_0_HOP1_SHIFT 39 +#define MMU_V1_0_HOP2_SHIFT 30 +#define MMU_V1_0_HOP3_SHIFT 21 +#define MMU_V1_0_HOP4_SHIFT 12 + +#define MMU_HOP0_PA43_12 0x490004 +#define MMU_HOP0_PA49_44 0x490008 +#define MMU_ASID_BUSY 0x490000 #endif /* INCLUDE_MMU_V1_0_H_ */ diff --git a/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_v1_1.h b/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_v1_1.h index b2a9570583ac..9c727a5d47b4 100644 --- a/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_v1_1.h +++ b/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_v1_1.h @@ -8,9 +8,21 @@ #ifndef INCLUDE_MMU_V1_1_H_ #define INCLUDE_MMU_V1_1_H_ -#define MMU_ASID 0xC12004 -#define MMU_HOP0_PA43_12 0xC12008 -#define MMU_HOP0_PA49_44 0xC1200C -#define MMU_BUSY 0xC12000 +#define MMU_V1_1_HOP0_MASK 0x3000000000000ull +#define MMU_V1_1_HOP1_MASK 0x0FF8000000000ull +#define MMU_V1_1_HOP2_MASK 0x0007FC0000000ull +#define MMU_V1_1_HOP3_MASK 0x000003FE00000ull +#define MMU_V1_1_HOP4_MASK 0x00000001FF000ull + +#define MMU_V1_1_HOP0_SHIFT 48 +#define MMU_V1_1_HOP1_SHIFT 39 +#define MMU_V1_1_HOP2_SHIFT 30 +#define MMU_V1_1_HOP3_SHIFT 21 +#define MMU_V1_1_HOP4_SHIFT 12 + +#define MMU_ASID 0xC12004 +#define MMU_HOP0_PA43_12 0xC12008 +#define MMU_HOP0_PA49_44 0xC1200C +#define MMU_BUSY 0xC12000 #endif /* INCLUDE_MMU_V1_1_H_ */ |