summaryrefslogtreecommitdiffstats
path: root/drivers/misc/habanalabs
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc/habanalabs')
-rw-r--r--drivers/misc/habanalabs/common/command_buffer.c12
-rw-r--r--drivers/misc/habanalabs/common/command_submission.c368
-rw-r--r--drivers/misc/habanalabs/common/context.c14
-rw-r--r--drivers/misc/habanalabs/common/debugfs.c224
-rw-r--r--drivers/misc/habanalabs/common/device.c221
-rw-r--r--drivers/misc/habanalabs/common/firmware_if.c238
-rw-r--r--drivers/misc/habanalabs/common/habanalabs.h184
-rw-r--r--drivers/misc/habanalabs/common/habanalabs_drv.c28
-rw-r--r--drivers/misc/habanalabs/common/habanalabs_ioctl.c35
-rw-r--r--drivers/misc/habanalabs/common/hw_queue.c10
-rw-r--r--drivers/misc/habanalabs/common/irq.c56
-rw-r--r--drivers/misc/habanalabs/common/memory.c182
-rw-r--r--drivers/misc/habanalabs/common/mmu/mmu.c3
-rw-r--r--drivers/misc/habanalabs/common/pci/pci.c52
-rw-r--r--drivers/misc/habanalabs/common/sysfs.c33
-rw-r--r--drivers/misc/habanalabs/gaudi/gaudi.c357
-rw-r--r--drivers/misc/habanalabs/gaudi/gaudiP.h3
-rw-r--r--drivers/misc/habanalabs/gaudi/gaudi_security.c8
-rw-r--r--drivers/misc/habanalabs/goya/goya.c140
-rw-r--r--drivers/misc/habanalabs/goya/goyaP.h2
-rw-r--r--drivers/misc/habanalabs/include/common/cpucp_if.h99
-rw-r--r--drivers/misc/habanalabs/include/common/hl_boot_if.h219
-rw-r--r--drivers/misc/habanalabs/include/gaudi/gaudi.h2
-rw-r--r--drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h2
-rw-r--r--drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h43
-rw-r--r--drivers/misc/habanalabs/include/gaudi/gaudi_fw_if.h14
-rw-r--r--drivers/misc/habanalabs/include/goya/goya.h2
-rw-r--r--drivers/misc/habanalabs/include/goya/goya_async_events.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/goya_fw_if.h11
29 files changed, 2082 insertions, 481 deletions
diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c
index d9adb9a5e4d8..719168c980a4 100644
--- a/drivers/misc/habanalabs/common/command_buffer.c
+++ b/drivers/misc/habanalabs/common/command_buffer.c
@@ -181,7 +181,7 @@ static void cb_release(struct kref *ref)
static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
int ctx_id, bool internal_cb)
{
- struct hl_cb *cb;
+ struct hl_cb *cb = NULL;
u32 cb_offset;
void *p;
@@ -193,9 +193,10 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
* the kernel's copy. Hence, we must never sleep in this code section
* and must use GFP_ATOMIC for all memory allocations.
*/
- if (ctx_id == HL_KERNEL_ASID_ID)
+ if (ctx_id == HL_KERNEL_ASID_ID && !hdev->disabled)
cb = kzalloc(sizeof(*cb), GFP_ATOMIC);
- else
+
+ if (!cb)
cb = kzalloc(sizeof(*cb), GFP_KERNEL);
if (!cb)
@@ -214,6 +215,9 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
} else if (ctx_id == HL_KERNEL_ASID_ID) {
p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size,
&cb->bus_address, GFP_ATOMIC);
+ if (!p)
+ p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
+ cb_size, &cb->bus_address, GFP_KERNEL);
} else {
p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size,
&cb->bus_address,
@@ -310,6 +314,8 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
spin_lock(&mgr->cb_lock);
rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_ATOMIC);
+ if (rc < 0)
+ rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_KERNEL);
spin_unlock(&mgr->cb_lock);
if (rc < 0) {
diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index 7bd4a03b3429..ff8791a651fd 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -84,6 +84,38 @@ int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask)
return 0;
}
+static void sob_reset_work(struct work_struct *work)
+{
+ struct hl_cs_compl *hl_cs_cmpl =
+ container_of(work, struct hl_cs_compl, sob_reset_work);
+ struct hl_device *hdev = hl_cs_cmpl->hdev;
+
+ /*
+ * A signal CS can get completion while the corresponding wait
+ * for signal CS is on its way to the PQ. The wait for signal CS
+ * will get stuck if the signal CS incremented the SOB to its
+ * max value and there are no pending (submitted) waits on this
+ * SOB.
+ * We do the following to void this situation:
+ * 1. The wait for signal CS must get a ref for the signal CS as
+ * soon as possible in cs_ioctl_signal_wait() and put it
+ * before being submitted to the PQ but after it incremented
+ * the SOB refcnt in init_signal_wait_cs().
+ * 2. Signal/Wait for signal CS will decrement the SOB refcnt
+ * here.
+ * These two measures guarantee that the wait for signal CS will
+ * reset the SOB upon completion rather than the signal CS and
+ * hence the above scenario is avoided.
+ */
+ kref_put(&hl_cs_cmpl->hw_sob->kref, hl_sob_reset);
+
+ if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)
+ hdev->asic_funcs->reset_sob_group(hdev,
+ hl_cs_cmpl->sob_group);
+
+ kfree(hl_cs_cmpl);
+}
+
static void hl_fence_release(struct kref *kref)
{
struct hl_fence *fence =
@@ -109,28 +141,9 @@ static void hl_fence_release(struct kref *kref)
hl_cs_cmpl->hw_sob->sob_id,
hl_cs_cmpl->sob_val);
- /*
- * A signal CS can get completion while the corresponding wait
- * for signal CS is on its way to the PQ. The wait for signal CS
- * will get stuck if the signal CS incremented the SOB to its
- * max value and there are no pending (submitted) waits on this
- * SOB.
- * We do the following to void this situation:
- * 1. The wait for signal CS must get a ref for the signal CS as
- * soon as possible in cs_ioctl_signal_wait() and put it
- * before being submitted to the PQ but after it incremented
- * the SOB refcnt in init_signal_wait_cs().
- * 2. Signal/Wait for signal CS will decrement the SOB refcnt
- * here.
- * These two measures guarantee that the wait for signal CS will
- * reset the SOB upon completion rather than the signal CS and
- * hence the above scenario is avoided.
- */
- kref_put(&hl_cs_cmpl->hw_sob->kref, hl_sob_reset);
+ queue_work(hdev->sob_reset_wq, &hl_cs_cmpl->sob_reset_work);
- if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)
- hdev->asic_funcs->reset_sob_group(hdev,
- hl_cs_cmpl->sob_group);
+ return;
}
free:
@@ -454,8 +467,7 @@ static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs)
if (next_entry_found && !next->tdr_active) {
next->tdr_active = true;
- schedule_delayed_work(&next->work_tdr,
- hdev->timeout_jiffies);
+ schedule_delayed_work(&next->work_tdr, next->timeout_jiffies);
}
spin_unlock(&hdev->cs_mirror_lock);
@@ -492,24 +504,6 @@ static void cs_do_release(struct kref *ref)
goto out;
}
- hdev->asic_funcs->hw_queues_lock(hdev);
-
- hdev->cs_active_cnt--;
- if (!hdev->cs_active_cnt) {
- struct hl_device_idle_busy_ts *ts;
-
- ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx++];
- ts->busy_to_idle_ts = ktime_get();
-
- if (hdev->idle_busy_ts_idx == HL_IDLE_BUSY_TS_ARR_SIZE)
- hdev->idle_busy_ts_idx = 0;
- } else if (hdev->cs_active_cnt < 0) {
- dev_crit(hdev->dev, "CS active cnt %d is negative\n",
- hdev->cs_active_cnt);
- }
-
- hdev->asic_funcs->hw_queues_unlock(hdev);
-
/* Need to update CI for all queue jobs that does not get completion */
hl_hw_queue_update_ci(cs);
@@ -620,14 +614,14 @@ static void cs_timedout(struct work_struct *work)
cs_put(cs);
if (hdev->reset_on_lockup)
- hl_device_reset(hdev, false, false);
+ hl_device_reset(hdev, 0);
else
hdev->needs_reset = true;
}
static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
enum hl_cs_type cs_type, u64 user_sequence,
- struct hl_cs **cs_new)
+ struct hl_cs **cs_new, u32 flags, u32 timeout)
{
struct hl_cs_counters_atomic *cntr;
struct hl_fence *other = NULL;
@@ -638,6 +632,9 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
cntr = &hdev->aggregated_cs_counters;
cs = kzalloc(sizeof(*cs), GFP_ATOMIC);
+ if (!cs)
+ cs = kzalloc(sizeof(*cs), GFP_KERNEL);
+
if (!cs) {
atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
atomic64_inc(&cntr->out_of_mem_drop_cnt);
@@ -651,12 +648,17 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
cs->submitted = false;
cs->completed = false;
cs->type = cs_type;
+ cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP);
+ cs->timeout_jiffies = timeout;
INIT_LIST_HEAD(&cs->job_list);
INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout);
kref_init(&cs->refcount);
spin_lock_init(&cs->job_lock);
cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_ATOMIC);
+ if (!cs_cmpl)
+ cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_KERNEL);
+
if (!cs_cmpl) {
atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
atomic64_inc(&cntr->out_of_mem_drop_cnt);
@@ -664,9 +666,23 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
goto free_cs;
}
+ cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
+ sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC);
+ if (!cs->jobs_in_queue_cnt)
+ cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
+ sizeof(*cs->jobs_in_queue_cnt), GFP_KERNEL);
+
+ if (!cs->jobs_in_queue_cnt) {
+ atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
+ atomic64_inc(&cntr->out_of_mem_drop_cnt);
+ rc = -ENOMEM;
+ goto free_cs_cmpl;
+ }
+
cs_cmpl->hdev = hdev;
cs_cmpl->type = cs->type;
spin_lock_init(&cs_cmpl->lock);
+ INIT_WORK(&cs_cmpl->sob_reset_work, sob_reset_work);
cs->fence = &cs_cmpl->base_fence;
spin_lock(&ctx->cs_lock);
@@ -696,15 +712,6 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
goto free_fence;
}
- cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
- sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC);
- if (!cs->jobs_in_queue_cnt) {
- atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
- atomic64_inc(&cntr->out_of_mem_drop_cnt);
- rc = -ENOMEM;
- goto free_fence;
- }
-
/* init hl_fence */
hl_fence_init(&cs_cmpl->base_fence, cs_cmpl->cs_seq);
@@ -727,6 +734,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
free_fence:
spin_unlock(&ctx->cs_lock);
+ kfree(cs->jobs_in_queue_cnt);
+free_cs_cmpl:
kfree(cs_cmpl);
free_cs:
kfree(cs);
@@ -749,6 +758,8 @@ void hl_cs_rollback_all(struct hl_device *hdev)
int i;
struct hl_cs *cs, *tmp;
+ flush_workqueue(hdev->sob_reset_wq);
+
/* flush all completions before iterating over the CS mirror list in
* order to avoid a race with the release functions
*/
@@ -778,6 +789,44 @@ void hl_pending_cb_list_flush(struct hl_ctx *ctx)
}
}
+static void
+wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt)
+{
+ struct hl_user_pending_interrupt *pend;
+
+ spin_lock(&interrupt->wait_list_lock);
+ list_for_each_entry(pend, &interrupt->wait_list_head, wait_list_node) {
+ pend->fence.error = -EIO;
+ complete_all(&pend->fence.completion);
+ }
+ spin_unlock(&interrupt->wait_list_lock);
+}
+
+void hl_release_pending_user_interrupts(struct hl_device *hdev)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ struct hl_user_interrupt *interrupt;
+ int i;
+
+ if (!prop->user_interrupt_count)
+ return;
+
+ /* We iterate through the user interrupt requests and waking up all
+ * user threads waiting for interrupt completion. We iterate the
+ * list under a lock, this is why all user threads, once awake,
+ * will wait on the same lock and will release the waiting object upon
+ * unlock.
+ */
+
+ for (i = 0 ; i < prop->user_interrupt_count ; i++) {
+ interrupt = &hdev->user_interrupt[i];
+ wake_pending_user_interrupt_threads(interrupt);
+ }
+
+ interrupt = &hdev->common_user_interrupt;
+ wake_pending_user_interrupt_threads(interrupt);
+}
+
static void job_wq_completion(struct work_struct *work)
{
struct hl_cs_job *job = container_of(work, struct hl_cs_job,
@@ -890,6 +939,9 @@ struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
job = kzalloc(sizeof(*job), GFP_ATOMIC);
if (!job)
+ job = kzalloc(sizeof(*job), GFP_KERNEL);
+
+ if (!job)
return NULL;
kref_init(&job->refcount);
@@ -991,6 +1043,9 @@ static int hl_cs_copy_chunk_array(struct hl_device *hdev,
*cs_chunk_array = kmalloc_array(num_chunks, sizeof(**cs_chunk_array),
GFP_ATOMIC);
+ if (!*cs_chunk_array)
+ *cs_chunk_array = kmalloc_array(num_chunks,
+ sizeof(**cs_chunk_array), GFP_KERNEL);
if (!*cs_chunk_array) {
atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
@@ -1038,7 +1093,8 @@ static int cs_staged_submission(struct hl_device *hdev, struct hl_cs *cs,
}
static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
- u32 num_chunks, u64 *cs_seq, u32 flags)
+ u32 num_chunks, u64 *cs_seq, u32 flags,
+ u32 timeout)
{
bool staged_mid, int_queues_only = true;
struct hl_device *hdev = hpriv->hdev;
@@ -1067,11 +1123,11 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
staged_mid = false;
rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT,
- staged_mid ? user_sequence : ULLONG_MAX, &cs);
+ staged_mid ? user_sequence : ULLONG_MAX, &cs, flags,
+ timeout);
if (rc)
goto free_cs_chunk_array;
- cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP);
*cs_seq = cs->sequence;
hl_debugfs_add_cs(cs);
@@ -1269,7 +1325,8 @@ static int hl_submit_pending_cb(struct hl_fpriv *hpriv)
list_move_tail(&pending_cb->cb_node, &local_cb_list);
spin_unlock(&ctx->pending_cb_lock);
- rc = allocate_cs(hdev, ctx, CS_TYPE_DEFAULT, ULLONG_MAX, &cs);
+ rc = allocate_cs(hdev, ctx, CS_TYPE_DEFAULT, ULLONG_MAX, &cs, 0,
+ hdev->timeout_jiffies);
if (rc)
goto add_list_elements;
@@ -1370,7 +1427,7 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
rc = 0;
} else {
rc = cs_ioctl_default(hpriv, chunks, num_chunks,
- cs_seq, 0);
+ cs_seq, 0, hdev->timeout_jiffies);
}
mutex_unlock(&hpriv->restore_phase_mutex);
@@ -1419,7 +1476,7 @@ wait_again:
out:
if ((rc == -ETIMEDOUT || rc == -EBUSY) && (need_soft_reset))
- hl_device_reset(hdev, false, false);
+ hl_device_reset(hdev, 0);
return rc;
}
@@ -1445,6 +1502,10 @@ static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
signal_seq_arr = kmalloc_array(signal_seq_arr_len,
sizeof(*signal_seq_arr),
GFP_ATOMIC);
+ if (!signal_seq_arr)
+ signal_seq_arr = kmalloc_array(signal_seq_arr_len,
+ sizeof(*signal_seq_arr),
+ GFP_KERNEL);
if (!signal_seq_arr) {
atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
@@ -1536,7 +1597,7 @@ static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev,
static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
void __user *chunks, u32 num_chunks,
- u64 *cs_seq, bool timestamp)
+ u64 *cs_seq, u32 flags, u32 timeout)
{
struct hl_cs_chunk *cs_chunk_array, *chunk;
struct hw_queue_properties *hw_queue_prop;
@@ -1642,7 +1703,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
}
}
- rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, &cs);
+ rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, &cs, flags, timeout);
if (rc) {
if (cs_type == CS_TYPE_WAIT ||
cs_type == CS_TYPE_COLLECTIVE_WAIT)
@@ -1650,8 +1711,6 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
goto free_cs_chunk_array;
}
- cs->timestamp = !!timestamp;
-
/*
* Save the signal CS fence for later initialization right before
* hanging the wait CS on the queue.
@@ -1709,7 +1768,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
enum hl_cs_type cs_type;
u64 cs_seq = ULONG_MAX;
void __user *chunks;
- u32 num_chunks, flags;
+ u32 num_chunks, flags, timeout;
int rc;
rc = hl_cs_sanity_checks(hpriv, args);
@@ -1735,16 +1794,20 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST))
cs_seq = args->in.seq;
+ timeout = flags & HL_CS_FLAGS_CUSTOM_TIMEOUT
+ ? msecs_to_jiffies(args->in.timeout * 1000)
+ : hpriv->hdev->timeout_jiffies;
+
switch (cs_type) {
case CS_TYPE_SIGNAL:
case CS_TYPE_WAIT:
case CS_TYPE_COLLECTIVE_WAIT:
rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks, num_chunks,
- &cs_seq, args->in.cs_flags & HL_CS_FLAGS_TIMESTAMP);
+ &cs_seq, args->in.cs_flags, timeout);
break;
default:
rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq,
- args->in.cs_flags);
+ args->in.cs_flags, timeout);
break;
}
@@ -1818,7 +1881,7 @@ static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
return rc;
}
-int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
+static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
{
struct hl_device *hdev = hpriv->hdev;
union hl_wait_cs_args *args = data;
@@ -1873,3 +1936,176 @@ int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
return 0;
}
+
+static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
+ u32 timeout_us, u64 user_address,
+ u32 target_value, u16 interrupt_offset,
+ enum hl_cs_wait_status *status)
+{
+ struct hl_user_pending_interrupt *pend;
+ struct hl_user_interrupt *interrupt;
+ unsigned long timeout;
+ long completion_rc;
+ u32 completion_value;
+ int rc = 0;
+
+ if (timeout_us == U32_MAX)
+ timeout = timeout_us;
+ else
+ timeout = usecs_to_jiffies(timeout_us);
+
+ hl_ctx_get(hdev, ctx);
+
+ pend = kmalloc(sizeof(*pend), GFP_KERNEL);
+ if (!pend) {
+ hl_ctx_put(ctx);
+ return -ENOMEM;
+ }
+
+ hl_fence_init(&pend->fence, ULONG_MAX);
+
+ if (interrupt_offset == HL_COMMON_USER_INTERRUPT_ID)
+ interrupt = &hdev->common_user_interrupt;
+ else
+ interrupt = &hdev->user_interrupt[interrupt_offset];
+
+ spin_lock(&interrupt->wait_list_lock);
+ if (!hl_device_operational(hdev, NULL)) {
+ rc = -EPERM;
+ goto unlock_and_free_fence;
+ }
+
+ if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 4)) {
+ dev_err(hdev->dev,
+ "Failed to copy completion value from user\n");
+ rc = -EFAULT;
+ goto unlock_and_free_fence;
+ }
+
+ if (completion_value >= target_value)
+ *status = CS_WAIT_STATUS_COMPLETED;
+ else
+ *status = CS_WAIT_STATUS_BUSY;
+
+ if (!timeout_us || (*status == CS_WAIT_STATUS_COMPLETED))
+ goto unlock_and_free_fence;
+
+ /* Add pending user interrupt to relevant list for the interrupt
+ * handler to monitor
+ */
+ list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
+ spin_unlock(&interrupt->wait_list_lock);
+
+wait_again:
+ /* Wait for interrupt handler to signal completion */
+ completion_rc =
+ wait_for_completion_interruptible_timeout(
+ &pend->fence.completion, timeout);
+
+ /* If timeout did not expire we need to perform the comparison.
+ * If comparison fails, keep waiting until timeout expires
+ */
+ if (completion_rc > 0) {
+ if (copy_from_user(&completion_value,
+ u64_to_user_ptr(user_address), 4)) {
+ dev_err(hdev->dev,
+ "Failed to copy completion value from user\n");
+ rc = -EFAULT;
+ goto remove_pending_user_interrupt;
+ }
+
+ if (completion_value >= target_value) {
+ *status = CS_WAIT_STATUS_COMPLETED;
+ } else {
+ timeout -= jiffies_to_usecs(completion_rc);
+ goto wait_again;
+ }
+ } else {
+ *status = CS_WAIT_STATUS_BUSY;
+ }
+
+remove_pending_user_interrupt:
+ spin_lock(&interrupt->wait_list_lock);
+ list_del(&pend->wait_list_node);
+
+unlock_and_free_fence:
+ spin_unlock(&interrupt->wait_list_lock);
+ kfree(pend);
+ hl_ctx_put(ctx);
+
+ return rc;
+}
+
+static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
+{
+ u16 interrupt_id, interrupt_offset, first_interrupt, last_interrupt;
+ struct hl_device *hdev = hpriv->hdev;
+ struct asic_fixed_properties *prop;
+ union hl_wait_cs_args *args = data;
+ enum hl_cs_wait_status status;
+ int rc;
+
+ prop = &hdev->asic_prop;
+
+ if (!prop->user_interrupt_count) {
+ dev_err(hdev->dev, "no user interrupts allowed");
+ return -EPERM;
+ }
+
+ interrupt_id =
+ FIELD_GET(HL_WAIT_CS_FLAGS_INTERRUPT_MASK, args->in.flags);
+
+ first_interrupt = prop->first_available_user_msix_interrupt;
+ last_interrupt = prop->first_available_user_msix_interrupt +
+ prop->user_interrupt_count - 1;
+
+ if ((interrupt_id < first_interrupt || interrupt_id > last_interrupt) &&
+ interrupt_id != HL_COMMON_USER_INTERRUPT_ID) {
+ dev_err(hdev->dev, "invalid user interrupt %u", interrupt_id);
+ return -EINVAL;
+ }
+
+ if (interrupt_id == HL_COMMON_USER_INTERRUPT_ID)
+ interrupt_offset = HL_COMMON_USER_INTERRUPT_ID;
+ else
+ interrupt_offset = interrupt_id - first_interrupt;
+
+ rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx,
+ args->in.interrupt_timeout_us, args->in.addr,
+ args->in.target, interrupt_offset, &status);
+
+ memset(args, 0, sizeof(*args));
+
+ if (rc) {
+ dev_err_ratelimited(hdev->dev,
+ "interrupt_wait_ioctl failed (%d)\n", rc);
+
+ return rc;
+ }
+
+ switch (status) {
+ case CS_WAIT_STATUS_COMPLETED:
+ args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
+ break;
+ case CS_WAIT_STATUS_BUSY:
+ default:
+ args->out.status = HL_WAIT_CS_STATUS_BUSY;
+ break;
+ }
+
+ return 0;
+}
+
+int hl_wait_ioctl(struct hl_fpriv *hpriv, void *data)
+{
+ union hl_wait_cs_args *args = data;
+ u32 flags = args->in.flags;
+ int rc;
+
+ if (flags & HL_WAIT_CS_FLAGS_INTERRUPT)
+ rc = hl_interrupt_wait_ioctl(hpriv, data);
+ else
+ rc = hl_cs_wait_ioctl(hpriv, data);
+
+ return rc;
+}
diff --git a/drivers/misc/habanalabs/common/context.c b/drivers/misc/habanalabs/common/context.c
index cda871afb8f4..62d705889ca8 100644
--- a/drivers/misc/habanalabs/common/context.c
+++ b/drivers/misc/habanalabs/common/context.c
@@ -20,6 +20,11 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
*/
hl_pending_cb_list_flush(ctx);
+ /* Release all allocated HW block mapped list entries and destroy
+ * the mutex.
+ */
+ hl_hw_block_mem_fini(ctx);
+
/*
* If we arrived here, there are no jobs waiting for this context
* on its queues so we can safely remove it.
@@ -160,13 +165,15 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
if (!ctx->cs_pending)
return -ENOMEM;
+ hl_hw_block_mem_init(ctx);
+
if (is_kernel_ctx) {
ctx->asid = HL_KERNEL_ASID_ID; /* Kernel driver gets ASID 0 */
rc = hl_vm_ctx_init(ctx);
if (rc) {
dev_err(hdev->dev, "Failed to init mem ctx module\n");
rc = -ENOMEM;
- goto err_free_cs_pending;
+ goto err_hw_block_mem_fini;
}
rc = hdev->asic_funcs->ctx_init(ctx);
@@ -179,7 +186,7 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
if (!ctx->asid) {
dev_err(hdev->dev, "No free ASID, failed to create context\n");
rc = -ENOMEM;
- goto err_free_cs_pending;
+ goto err_hw_block_mem_fini;
}
rc = hl_vm_ctx_init(ctx);
@@ -214,7 +221,8 @@ err_vm_ctx_fini:
err_asid_free:
if (ctx->asid != HL_KERNEL_ASID_ID)
hl_asid_free(hdev, ctx->asid);
-err_free_cs_pending:
+err_hw_block_mem_fini:
+ hl_hw_block_mem_fini(ctx);
kfree(ctx->cs_pending);
return rc;
diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c
index 9f19bee7b592..8381155578a0 100644
--- a/drivers/misc/habanalabs/common/debugfs.c
+++ b/drivers/misc/habanalabs/common/debugfs.c
@@ -9,8 +9,8 @@
#include "../include/hw_ip/mmu/mmu_general.h"
#include <linux/pci.h>
-#include <linux/debugfs.h>
#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
#define MMU_ADDR_BUF_SIZE 40
#define MMU_ASID_BUF_SIZE 10
@@ -229,6 +229,7 @@ static int vm_show(struct seq_file *s, void *data)
{
struct hl_debugfs_entry *entry = s->private;
struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
+ struct hl_vm_hw_block_list_node *lnode;
struct hl_ctx *ctx;
struct hl_vm *vm;
struct hl_vm_hash_node *hnode;
@@ -272,6 +273,21 @@ static int vm_show(struct seq_file *s, void *data)
}
mutex_unlock(&ctx->mem_hash_lock);
+ if (ctx->asid != HL_KERNEL_ASID_ID &&
+ !list_empty(&ctx->hw_block_mem_list)) {
+ seq_puts(s, "\nhw_block mappings:\n\n");
+ seq_puts(s, " virtual address size HW block id\n");
+ seq_puts(s, "-------------------------------------------\n");
+ mutex_lock(&ctx->hw_block_list_lock);
+ list_for_each_entry(lnode, &ctx->hw_block_mem_list,
+ node) {
+ seq_printf(s,
+ " 0x%-14lx %-6u %-9u\n",
+ lnode->vaddr, lnode->size, lnode->id);
+ }
+ mutex_unlock(&ctx->hw_block_list_lock);
+ }
+
vm = &ctx->hdev->vm;
spin_lock(&vm->idr_lock);
@@ -441,21 +457,86 @@ out:
return false;
}
-static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr,
- u64 *phys_addr)
+static bool hl_is_device_internal_memory_va(struct hl_device *hdev, u64 addr,
+ u32 size)
{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ u64 dram_start_addr, dram_end_addr;
+
+ if (!hdev->mmu_enable)
+ return false;
+
+ if (prop->dram_supports_virtual_memory) {
+ dram_start_addr = prop->dmmu.start_addr;
+ dram_end_addr = prop->dmmu.end_addr;
+ } else {
+ dram_start_addr = prop->dram_base_address;
+ dram_end_addr = prop->dram_end_address;
+ }
+
+ if (hl_mem_area_inside_range(addr, size, dram_start_addr,
+ dram_end_addr))
+ return true;
+
+ if (hl_mem_area_inside_range(addr, size, prop->sram_base_address,
+ prop->sram_end_address))
+ return true;
+
+ return false;
+}
+
+static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, u32 size,
+ u64 *phys_addr)
+{
+ struct hl_vm_phys_pg_pack *phys_pg_pack;
struct hl_ctx *ctx = hdev->compute_ctx;
- int rc = 0;
+ struct hl_vm_hash_node *hnode;
+ struct hl_userptr *userptr;
+ enum vm_type_t *vm_type;
+ bool valid = false;
+ u64 end_address;
+ u32 range_size;
+ int i, rc = 0;
if (!ctx) {
dev_err(hdev->dev, "no ctx available\n");
return -EINVAL;
}
+ /* Verify address is mapped */
+ mutex_lock(&ctx->mem_hash_lock);
+ hash_for_each(ctx->mem_hash, i, hnode, node) {
+ vm_type = hnode->ptr;
+
+ if (*vm_type == VM_TYPE_USERPTR) {
+ userptr = hnode->ptr;
+ range_size = userptr->size;
+ } else {
+ phys_pg_pack = hnode->ptr;
+ range_size = phys_pg_pack->total_size;
+ }
+
+ end_address = virt_addr + size;
+ if ((virt_addr >= hnode->vaddr) &&
+ (end_address <= hnode->vaddr + range_size)) {
+ valid = true;
+ break;
+ }
+ }
+ mutex_unlock(&ctx->mem_hash_lock);
+
+ if (!valid) {
+ dev_err(hdev->dev,
+ "virt addr 0x%llx is not mapped\n",
+ virt_addr);
+ return -EINVAL;
+ }
+
rc = hl_mmu_va_to_pa(ctx, virt_addr, phys_addr);
if (rc) {
- dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
- virt_addr);
+ dev_err(hdev->dev,
+ "virt addr 0x%llx is not mapped to phys addr\n",
+ virt_addr);
rc = -EINVAL;
}
@@ -467,10 +548,11 @@ static ssize_t hl_data_read32(struct file *f, char __user *buf,
{
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
struct hl_device *hdev = entry->hdev;
- char tmp_buf[32];
u64 addr = entry->addr;
- u32 val;
+ bool user_address;
+ char tmp_buf[32];
ssize_t rc;
+ u32 val;
if (atomic_read(&hdev->in_reset)) {
dev_warn_ratelimited(hdev->dev, "Can't read during reset\n");
@@ -480,13 +562,14 @@ static ssize_t hl_data_read32(struct file *f, char __user *buf,
if (*ppos)
return 0;
- if (hl_is_device_va(hdev, addr)) {
- rc = device_va_to_pa(hdev, addr, &addr);
+ user_address = hl_is_device_va(hdev, addr);
+ if (user_address) {
+ rc = device_va_to_pa(hdev, addr, sizeof(val), &addr);
if (rc)
return rc;
}
- rc = hdev->asic_funcs->debugfs_read32(hdev, addr, &val);
+ rc = hdev->asic_funcs->debugfs_read32(hdev, addr, user_address, &val);
if (rc) {
dev_err(hdev->dev, "Failed to read from 0x%010llx\n", addr);
return rc;
@@ -503,6 +586,7 @@ static ssize_t hl_data_write32(struct file *f, const char __user *buf,
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
struct hl_device *hdev = entry->hdev;
u64 addr = entry->addr;
+ bool user_address;
u32 value;
ssize_t rc;
@@ -515,13 +599,14 @@ static ssize_t hl_data_write32(struct file *f, const char __user *buf,
if (rc)
return rc;
- if (hl_is_device_va(hdev, addr)) {
- rc = device_va_to_pa(hdev, addr, &addr);
+ user_address = hl_is_device_va(hdev, addr);
+ if (user_address) {
+ rc = device_va_to_pa(hdev, addr, sizeof(value), &addr);
if (rc)
return rc;
}
- rc = hdev->asic_funcs->debugfs_write32(hdev, addr, value);
+ rc = hdev->asic_funcs->debugfs_write32(hdev, addr, user_address, value);
if (rc) {
dev_err(hdev->dev, "Failed to write 0x%08x to 0x%010llx\n",
value, addr);
@@ -536,21 +621,28 @@ static ssize_t hl_data_read64(struct file *f, char __user *buf,
{
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
struct hl_device *hdev = entry->hdev;
- char tmp_buf[32];
u64 addr = entry->addr;
- u64 val;
+ bool user_address;
+ char tmp_buf[32];
ssize_t rc;
+ u64 val;
+
+ if (atomic_read(&hdev->in_reset)) {
+ dev_warn_ratelimited(hdev->dev, "Can't read during reset\n");
+ return 0;
+ }
if (*ppos)
return 0;
- if (hl_is_device_va(hdev, addr)) {
- rc = device_va_to_pa(hdev, addr, &addr);
+ user_address = hl_is_device_va(hdev, addr);
+ if (user_address) {
+ rc = device_va_to_pa(hdev, addr, sizeof(val), &addr);
if (rc)
return rc;
}
- rc = hdev->asic_funcs->debugfs_read64(hdev, addr, &val);
+ rc = hdev->asic_funcs->debugfs_read64(hdev, addr, user_address, &val);
if (rc) {
dev_err(hdev->dev, "Failed to read from 0x%010llx\n", addr);
return rc;
@@ -567,20 +659,27 @@ static ssize_t hl_data_write64(struct file *f, const char __user *buf,
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
struct hl_device *hdev = entry->hdev;
u64 addr = entry->addr;
+ bool user_address;
u64 value;
ssize_t rc;
+ if (atomic_read(&hdev->in_reset)) {
+ dev_warn_ratelimited(hdev->dev, "Can't write during reset\n");
+ return 0;
+ }
+
rc = kstrtoull_from_user(buf, count, 16, &value);
if (rc)
return rc;
- if (hl_is_device_va(hdev, addr)) {
- rc = device_va_to_pa(hdev, addr, &addr);
+ user_address = hl_is_device_va(hdev, addr);
+ if (user_address) {
+ rc = device_va_to_pa(hdev, addr, sizeof(value), &addr);
if (rc)
return rc;
}
- rc = hdev->asic_funcs->debugfs_write64(hdev, addr, value);
+ rc = hdev->asic_funcs->debugfs_write64(hdev, addr, user_address, value);
if (rc) {
dev_err(hdev->dev, "Failed to write 0x%016llx to 0x%010llx\n",
value, addr);
@@ -590,6 +689,63 @@ static ssize_t hl_data_write64(struct file *f, const char __user *buf,
return count;
}
+static ssize_t hl_dma_size_write(struct file *f, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+ struct hl_device *hdev = entry->hdev;
+ u64 addr = entry->addr;
+ ssize_t rc;
+ u32 size;
+
+ if (atomic_read(&hdev->in_reset)) {
+ dev_warn_ratelimited(hdev->dev, "Can't DMA during reset\n");
+ return 0;
+ }
+ rc = kstrtouint_from_user(buf, count, 16, &size);
+ if (rc)
+ return rc;
+
+ if (!size) {
+ dev_err(hdev->dev, "DMA read failed. size can't be 0\n");
+ return -EINVAL;
+ }
+
+ if (size > SZ_128M) {
+ dev_err(hdev->dev,
+ "DMA read failed. size can't be larger than 128MB\n");
+ return -EINVAL;
+ }
+
+ if (!hl_is_device_internal_memory_va(hdev, addr, size)) {
+ dev_err(hdev->dev,
+ "DMA read failed. Invalid 0x%010llx + 0x%08x\n",
+ addr, size);
+ return -EINVAL;
+ }
+
+ /* Free the previous allocation, if there was any */
+ entry->blob_desc.size = 0;
+ vfree(entry->blob_desc.data);
+
+ entry->blob_desc.data = vmalloc(size);
+ if (!entry->blob_desc.data)
+ return -ENOMEM;
+
+ rc = hdev->asic_funcs->debugfs_read_dma(hdev, addr, size,
+ entry->blob_desc.data);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to DMA from 0x%010llx\n", addr);
+ vfree(entry->blob_desc.data);
+ entry->blob_desc.data = NULL;
+ return -EIO;
+ }
+
+ entry->blob_desc.size = size;
+
+ return count;
+}
+
static ssize_t hl_get_power_state(struct file *f, char __user *buf,
size_t count, loff_t *ppos)
{
@@ -871,7 +1027,7 @@ static ssize_t hl_stop_on_err_write(struct file *f, const char __user *buf,
hdev->stop_on_err = value ? 1 : 0;
- hl_device_reset(hdev, false, false);
+ hl_device_reset(hdev, 0);
return count;
}
@@ -899,6 +1055,11 @@ static const struct file_operations hl_data64b_fops = {
.write = hl_data_write64
};
+static const struct file_operations hl_dma_size_fops = {
+ .owner = THIS_MODULE,
+ .write = hl_dma_size_write
+};
+
static const struct file_operations hl_i2c_data_fops = {
.owner = THIS_MODULE,
.read = hl_i2c_data_read,
@@ -1001,6 +1162,9 @@ void hl_debugfs_add_device(struct hl_device *hdev)
if (!dev_entry->entry_arr)
return;
+ dev_entry->blob_desc.size = 0;
+ dev_entry->blob_desc.data = NULL;
+
INIT_LIST_HEAD(&dev_entry->file_list);
INIT_LIST_HEAD(&dev_entry->cb_list);
INIT_LIST_HEAD(&dev_entry->cs_list);
@@ -1103,6 +1267,17 @@ void hl_debugfs_add_device(struct hl_device *hdev)
dev_entry,
&hl_security_violations_fops);
+ debugfs_create_file("dma_size",
+ 0200,
+ dev_entry->root,
+ dev_entry,
+ &hl_dma_size_fops);
+
+ debugfs_create_blob("data_dma",
+ 0400,
+ dev_entry->root,
+ &dev_entry->blob_desc);
+
for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
debugfs_create_file(hl_debugfs_list[i].name,
0444,
@@ -1121,6 +1296,9 @@ void hl_debugfs_remove_device(struct hl_device *hdev)
debugfs_remove_recursive(entry->root);
mutex_destroy(&entry->file_mutex);
+
+ vfree(entry->blob_desc.data);
+
kfree(entry->entry_arr);
}
diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 334009e83823..00e92b678828 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -70,6 +70,9 @@ static void hpriv_release(struct kref *ref)
mutex_unlock(&hdev->fpriv_list_lock);
kfree(hpriv);
+
+ if (hdev->reset_upon_device_release)
+ hl_device_reset(hdev, 0);
}
void hl_hpriv_get(struct hl_fpriv *hpriv)
@@ -77,9 +80,9 @@ void hl_hpriv_get(struct hl_fpriv *hpriv)
kref_get(&hpriv->refcount);
}
-void hl_hpriv_put(struct hl_fpriv *hpriv)
+int hl_hpriv_put(struct hl_fpriv *hpriv)
{
- kref_put(&hpriv->refcount, hpriv_release);
+ return kref_put(&hpriv->refcount, hpriv_release);
}
/*
@@ -103,10 +106,17 @@ static int hl_device_release(struct inode *inode, struct file *filp)
return 0;
}
- hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
- hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
+ /* Each pending user interrupt holds the user's context, hence we
+ * must release them all before calling hl_ctx_mgr_fini().
+ */
+ hl_release_pending_user_interrupts(hpriv->hdev);
+
+ hl_cb_mgr_fini(hdev, &hpriv->cb_mgr);
+ hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr);
- hl_hpriv_put(hpriv);
+ if (!hl_hpriv_put(hpriv))
+ dev_warn(hdev->dev,
+ "Device is still in use because there are live CS and/or memory mappings\n");
return 0;
}
@@ -283,7 +293,7 @@ static void device_hard_reset_pending(struct work_struct *work)
struct hl_device *hdev = device_reset_work->hdev;
int rc;
- rc = hl_device_reset(hdev, true, true);
+ rc = hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_FROM_RESET_THREAD);
if ((rc == -EBUSY) && !hdev->device_fini_pending) {
dev_info(hdev->dev,
"Could not reset device. will try again in %u seconds",
@@ -311,11 +321,15 @@ static int device_early_init(struct hl_device *hdev)
switch (hdev->asic_type) {
case ASIC_GOYA:
goya_set_asic_funcs(hdev);
- strlcpy(hdev->asic_name, "GOYA", sizeof(hdev->asic_name));
+ strscpy(hdev->asic_name, "GOYA", sizeof(hdev->asic_name));
break;
case ASIC_GAUDI:
gaudi_set_asic_funcs(hdev);
- sprintf(hdev->asic_name, "GAUDI");
+ strscpy(hdev->asic_name, "GAUDI", sizeof(hdev->asic_name));
+ break;
+ case ASIC_GAUDI_SEC:
+ gaudi_set_asic_funcs(hdev);
+ strscpy(hdev->asic_name, "GAUDI SEC", sizeof(hdev->asic_name));
break;
default:
dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
@@ -334,7 +348,7 @@ static int device_early_init(struct hl_device *hdev)
if (hdev->asic_prop.completion_queues_count) {
hdev->cq_wq = kcalloc(hdev->asic_prop.completion_queues_count,
sizeof(*hdev->cq_wq),
- GFP_ATOMIC);
+ GFP_KERNEL);
if (!hdev->cq_wq) {
rc = -ENOMEM;
goto asid_fini;
@@ -358,24 +372,24 @@ static int device_early_init(struct hl_device *hdev)
goto free_cq_wq;
}
- hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info),
- GFP_KERNEL);
- if (!hdev->hl_chip_info) {
+ hdev->sob_reset_wq = alloc_workqueue("hl-sob-reset", WQ_UNBOUND, 0);
+ if (!hdev->sob_reset_wq) {
+ dev_err(hdev->dev,
+ "Failed to allocate SOB reset workqueue\n");
rc = -ENOMEM;
goto free_eq_wq;
}
- hdev->idle_busy_ts_arr = kmalloc_array(HL_IDLE_BUSY_TS_ARR_SIZE,
- sizeof(struct hl_device_idle_busy_ts),
- (GFP_KERNEL | __GFP_ZERO));
- if (!hdev->idle_busy_ts_arr) {
+ hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info),
+ GFP_KERNEL);
+ if (!hdev->hl_chip_info) {
rc = -ENOMEM;
- goto free_chip_info;
+ goto free_sob_reset_wq;
}
rc = hl_mmu_if_set_funcs(hdev);
if (rc)
- goto free_idle_busy_ts_arr;
+ goto free_chip_info;
hl_cb_mgr_init(&hdev->kernel_cb_mgr);
@@ -404,10 +418,10 @@ static int device_early_init(struct hl_device *hdev)
free_cb_mgr:
hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
-free_idle_busy_ts_arr:
- kfree(hdev->idle_busy_ts_arr);
free_chip_info:
kfree(hdev->hl_chip_info);
+free_sob_reset_wq:
+ destroy_workqueue(hdev->sob_reset_wq);
free_eq_wq:
destroy_workqueue(hdev->eq_wq);
free_cq_wq:
@@ -441,9 +455,9 @@ static void device_early_fini(struct hl_device *hdev)
hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
- kfree(hdev->idle_busy_ts_arr);
kfree(hdev->hl_chip_info);
+ destroy_workqueue(hdev->sob_reset_wq);
destroy_workqueue(hdev->eq_wq);
destroy_workqueue(hdev->device_reset_work.wq);
@@ -485,7 +499,7 @@ static void hl_device_heartbeat(struct work_struct *work)
goto reschedule;
dev_err(hdev->dev, "Device heartbeat failed!\n");
- hl_device_reset(hdev, true, false);
+ hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_HEARTBEAT);
return;
@@ -561,100 +575,24 @@ static void device_late_fini(struct hl_device *hdev)
hdev->late_init_done = false;
}
-uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms)
+int hl_device_utilization(struct hl_device *hdev, u32 *utilization)
{
- struct hl_device_idle_busy_ts *ts;
- ktime_t zero_ktime, curr = ktime_get();
- u32 overlap_cnt = 0, last_index = hdev->idle_busy_ts_idx;
- s64 period_us, last_start_us, last_end_us, last_busy_time_us,
- total_busy_time_us = 0, total_busy_time_ms;
-
- zero_ktime = ktime_set(0, 0);
- period_us = period_ms * USEC_PER_MSEC;
- ts = &hdev->idle_busy_ts_arr[last_index];
-
- /* check case that device is currently in idle */
- if (!ktime_compare(ts->busy_to_idle_ts, zero_ktime) &&
- !ktime_compare(ts->idle_to_busy_ts, zero_ktime)) {
-
- last_index--;
- /* Handle case idle_busy_ts_idx was 0 */
- if (last_index > HL_IDLE_BUSY_TS_ARR_SIZE)
- last_index = HL_IDLE_BUSY_TS_ARR_SIZE - 1;
-
- ts = &hdev->idle_busy_ts_arr[last_index];
- }
-
- while (overlap_cnt < HL_IDLE_BUSY_TS_ARR_SIZE) {
- /* Check if we are in last sample case. i.e. if the sample
- * begun before the sampling period. This could be a real
- * sample or 0 so need to handle both cases
- */
- last_start_us = ktime_to_us(
- ktime_sub(curr, ts->idle_to_busy_ts));
-
- if (last_start_us > period_us) {
-
- /* First check two cases:
- * 1. If the device is currently busy
- * 2. If the device was idle during the whole sampling
- * period
- */
-
- if (!ktime_compare(ts->busy_to_idle_ts, zero_ktime)) {
- /* Check if the device is currently busy */
- if (ktime_compare(ts->idle_to_busy_ts,
- zero_ktime))
- return 100;
-
- /* We either didn't have any activity or we
- * reached an entry which is 0. Either way,
- * exit and return what was accumulated so far
- */
- break;
- }
-
- /* If sample has finished, check it is relevant */
- last_end_us = ktime_to_us(
- ktime_sub(curr, ts->busy_to_idle_ts));
-
- if (last_end_us > period_us)
- break;
-
- /* It is relevant so add it but with adjustment */
- last_busy_time_us = ktime_to_us(
- ktime_sub(ts->busy_to_idle_ts,
- ts->idle_to_busy_ts));
- total_busy_time_us += last_busy_time_us -
- (last_start_us - period_us);
- break;
- }
-
- /* Check if the sample is finished or still open */
- if (ktime_compare(ts->busy_to_idle_ts, zero_ktime))
- last_busy_time_us = ktime_to_us(
- ktime_sub(ts->busy_to_idle_ts,
- ts->idle_to_busy_ts));
- else
- last_busy_time_us = ktime_to_us(
- ktime_sub(curr, ts->idle_to_busy_ts));
-
- total_busy_time_us += last_busy_time_us;
+ u64 max_power, curr_power, dc_power, dividend;
+ int rc;
- last_index--;
- /* Handle case idle_busy_ts_idx was 0 */
- if (last_index > HL_IDLE_BUSY_TS_ARR_SIZE)
- last_index = HL_IDLE_BUSY_TS_ARR_SIZE - 1;
+ max_power = hdev->asic_prop.max_power_default;
+ dc_power = hdev->asic_prop.dc_power_default;
+ rc = hl_fw_cpucp_power_get(hdev, &curr_power);
- ts = &hdev->idle_busy_ts_arr[last_index];
+ if (rc)
+ return rc;
- overlap_cnt++;
- }
+ curr_power = clamp(curr_power, dc_power, max_power);
- total_busy_time_ms = DIV_ROUND_UP_ULL(total_busy_time_us,
- USEC_PER_MSEC);
+ dividend = (curr_power - dc_power) * 100;
+ *utilization = (u32) div_u64(dividend, (max_power - dc_power));
- return DIV_ROUND_UP_ULL(total_busy_time_ms * 100, period_ms);
+ return 0;
}
/*
@@ -809,7 +747,7 @@ int hl_device_resume(struct hl_device *hdev)
hdev->disabled = false;
atomic_set(&hdev->in_reset, 0);
- rc = hl_device_reset(hdev, true, false);
+ rc = hl_device_reset(hdev, HL_RESET_HARD);
if (rc) {
dev_err(hdev->dev, "Failed to reset device during resume\n");
goto disable_device;
@@ -915,9 +853,7 @@ static void device_disable_open_processes(struct hl_device *hdev)
* hl_device_reset - reset the device
*
* @hdev: pointer to habanalabs device structure
- * @hard_reset: should we do hard reset to all engines or just reset the
- * compute/dma engines
- * @from_hard_reset_thread: is the caller the hard-reset thread
+ * @flags: reset flags.
*
* Block future CS and wait for pending CS to be enqueued
* Call ASIC H/W fini
@@ -929,9 +865,10 @@ static void device_disable_open_processes(struct hl_device *hdev)
*
* Returns 0 for success or an error on failure.
*/
-int hl_device_reset(struct hl_device *hdev, bool hard_reset,
- bool from_hard_reset_thread)
+int hl_device_reset(struct hl_device *hdev, u32 flags)
{
+ u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
+ bool hard_reset, from_hard_reset_thread;
int i, rc;
if (!hdev->init_done) {
@@ -940,6 +877,9 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset,
return 0;
}
+ hard_reset = (flags & HL_RESET_HARD) != 0;
+ from_hard_reset_thread = (flags & HL_RESET_FROM_RESET_THREAD) != 0;
+
if ((!hard_reset) && (!hdev->supports_soft_reset)) {
dev_dbg(hdev->dev, "Doing hard-reset instead of soft-reset\n");
hard_reset = true;
@@ -960,7 +900,11 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset,
if (rc)
return 0;
- if (hard_reset) {
+ /*
+ * if reset is due to heartbeat, device CPU is no responsive in
+ * which case no point sending PCI disable message to it
+ */
+ if (hard_reset && !(flags & HL_RESET_HEARTBEAT)) {
/* Disable PCI access from device F/W so he won't send
* us additional interrupts. We disable MSI/MSI-X at
* the halt_engines function and we can't have the F/W
@@ -1030,6 +974,11 @@ again:
/* Go over all the queues, release all CS and their jobs */
hl_cs_rollback_all(hdev);
+ /* Release all pending user interrupts, each pending user interrupt
+ * holds a reference to user context
+ */
+ hl_release_pending_user_interrupts(hdev);
+
kill_processes:
if (hard_reset) {
/* Kill processes here after CS rollback. This is because the
@@ -1078,14 +1027,6 @@ kill_processes:
for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
hl_cq_reset(hdev, &hdev->completion_queue[i]);
- hdev->idle_busy_ts_idx = 0;
- hdev->idle_busy_ts_arr[0].busy_to_idle_ts = ktime_set(0, 0);
- hdev->idle_busy_ts_arr[0].idle_to_busy_ts = ktime_set(0, 0);
-
- if (hdev->cs_active_cnt)
- dev_crit(hdev->dev, "CS active cnt %d is not 0 during reset\n",
- hdev->cs_active_cnt);
-
mutex_lock(&hdev->fpriv_list_lock);
/* Make sure the context switch phase will run again */
@@ -1151,6 +1092,16 @@ kill_processes:
goto out_err;
}
+ /* If device is not idle fail the reset process */
+ if (!hdev->asic_funcs->is_device_idle(hdev, idle_mask,
+ HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)) {
+ dev_err(hdev->dev,
+ "device is not idle (mask %#llx %#llx) after reset\n",
+ idle_mask[0], idle_mask[1]);
+ rc = -EIO;
+ goto out_err;
+ }
+
/* Check that the communication with the device is working */
rc = hdev->asic_funcs->test_queues(hdev);
if (rc) {
@@ -1235,7 +1186,7 @@ out_err:
*/
int hl_device_init(struct hl_device *hdev, struct class *hclass)
{
- int i, rc, cq_cnt, cq_ready_cnt;
+ int i, rc, cq_cnt, user_interrupt_cnt, cq_ready_cnt;
char *name;
bool add_cdev_sysfs_on_err = false;
@@ -1274,13 +1225,26 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
if (rc)
goto free_dev_ctrl;
+ user_interrupt_cnt = hdev->asic_prop.user_interrupt_count;
+
+ if (user_interrupt_cnt) {
+ hdev->user_interrupt = kcalloc(user_interrupt_cnt,
+ sizeof(*hdev->user_interrupt),
+ GFP_KERNEL);
+
+ if (!hdev->user_interrupt) {
+ rc = -ENOMEM;
+ goto early_fini;
+ }
+ }
+
/*
* Start calling ASIC initialization. First S/W then H/W and finally
* late init
*/
rc = hdev->asic_funcs->sw_init(hdev);
if (rc)
- goto early_fini;
+ goto user_interrupts_fini;
/*
* Initialize the H/W queues. Must be done before hw_init, because
@@ -1478,6 +1442,8 @@ hw_queues_destroy:
hl_hw_queues_destroy(hdev);
sw_fini:
hdev->asic_funcs->sw_fini(hdev);
+user_interrupts_fini:
+ kfree(hdev->user_interrupt);
early_fini:
device_early_fini(hdev);
free_dev_ctrl:
@@ -1609,6 +1575,7 @@ void hl_device_fini(struct hl_device *hdev)
for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
hl_cq_fini(hdev, &hdev->completion_queue[i]);
kfree(hdev->completion_queue);
+ kfree(hdev->user_interrupt);
hl_hw_queues_destroy(hdev);
diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 09706c571e95..832dd5c5bb06 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -293,6 +293,7 @@ static int fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg,
u32 cpu_security_boot_status_reg)
{
u32 err_val, security_val;
+ bool err_exists = false;
/* Some of the firmware status codes are deprecated in newer f/w
* versions. In those versions, the errors are reported
@@ -307,48 +308,102 @@ static int fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg,
if (!(err_val & CPU_BOOT_ERR0_ENABLED))
return 0;
- if (err_val & CPU_BOOT_ERR0_DRAM_INIT_FAIL)
+ if (err_val & CPU_BOOT_ERR0_DRAM_INIT_FAIL) {
dev_err(hdev->dev,
"Device boot error - DRAM initialization failed\n");
- if (err_val & CPU_BOOT_ERR0_FIT_CORRUPTED)
+ err_exists = true;
+ }
+
+ if (err_val & CPU_BOOT_ERR0_FIT_CORRUPTED) {
dev_err(hdev->dev, "Device boot error - FIT image corrupted\n");
- if (err_val & CPU_BOOT_ERR0_TS_INIT_FAIL)
+ err_exists = true;
+ }
+
+ if (err_val & CPU_BOOT_ERR0_TS_INIT_FAIL) {
dev_err(hdev->dev,
"Device boot error - Thermal Sensor initialization failed\n");
- if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED)
+ err_exists = true;
+ }
+
+ if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED) {
dev_warn(hdev->dev,
"Device boot warning - Skipped DRAM initialization\n");
+ /* This is a warning so we don't want it to disable the
+ * device
+ */
+ err_val &= ~CPU_BOOT_ERR0_DRAM_SKIPPED;
+ }
if (err_val & CPU_BOOT_ERR0_BMC_WAIT_SKIPPED) {
- if (hdev->bmc_enable)
- dev_warn(hdev->dev,
+ if (hdev->bmc_enable) {
+ dev_err(hdev->dev,
"Device boot error - Skipped waiting for BMC\n");
- else
+ err_exists = true;
+ } else {
+ dev_info(hdev->dev,
+ "Device boot message - Skipped waiting for BMC\n");
+ /* This is an info so we don't want it to disable the
+ * device
+ */
err_val &= ~CPU_BOOT_ERR0_BMC_WAIT_SKIPPED;
+ }
}
- if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY)
+ if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY) {
dev_err(hdev->dev,
"Device boot error - Serdes data from BMC not available\n");
- if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL)
+ err_exists = true;
+ }
+
+ if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL) {
dev_err(hdev->dev,
"Device boot error - NIC F/W initialization failed\n");
- if (err_val & CPU_BOOT_ERR0_SECURITY_NOT_RDY)
+ err_exists = true;
+ }
+
+ if (err_val & CPU_BOOT_ERR0_SECURITY_NOT_RDY) {
dev_warn(hdev->dev,
"Device boot warning - security not ready\n");
- if (err_val & CPU_BOOT_ERR0_SECURITY_FAIL)
+ /* This is a warning so we don't want it to disable the
+ * device
+ */
+ err_val &= ~CPU_BOOT_ERR0_SECURITY_NOT_RDY;
+ }
+
+ if (err_val & CPU_BOOT_ERR0_SECURITY_FAIL) {
dev_err(hdev->dev, "Device boot error - security failure\n");
- if (err_val & CPU_BOOT_ERR0_EFUSE_FAIL)
+ err_exists = true;
+ }
+
+ if (err_val & CPU_BOOT_ERR0_EFUSE_FAIL) {
dev_err(hdev->dev, "Device boot error - eFuse failure\n");
- if (err_val & CPU_BOOT_ERR0_PLL_FAIL)
+ err_exists = true;
+ }
+
+ if (err_val & CPU_BOOT_ERR0_PLL_FAIL) {
dev_err(hdev->dev, "Device boot error - PLL failure\n");
+ err_exists = true;
+ }
+
+ if (err_val & CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL) {
+ dev_err(hdev->dev,
+ "Device boot error - device unusable\n");
+ err_exists = true;
+ }
security_val = RREG32(cpu_security_boot_status_reg);
if (security_val & CPU_BOOT_DEV_STS0_ENABLED)
dev_dbg(hdev->dev, "Device security status %#x\n",
security_val);
- if (err_val & ~CPU_BOOT_ERR0_ENABLED)
+ if (!err_exists && (err_val & ~CPU_BOOT_ERR0_ENABLED)) {
+ dev_err(hdev->dev,
+ "Device boot error - unknown error 0x%08x\n",
+ err_val);
+ err_exists = true;
+ }
+
+ if (err_exists)
return -EIO;
return 0;
@@ -419,6 +474,73 @@ out:
return rc;
}
+static int hl_fw_send_msi_info_msg(struct hl_device *hdev)
+{
+ struct cpucp_array_data_packet *pkt;
+ size_t total_pkt_size, data_size;
+ u64 result;
+ int rc;
+
+ /* skip sending this info for unsupported ASICs */
+ if (!hdev->asic_funcs->get_msi_info)
+ return 0;
+
+ data_size = CPUCP_NUM_OF_MSI_TYPES * sizeof(u32);
+ total_pkt_size = sizeof(struct cpucp_array_data_packet) + data_size;
+
+ /* data should be aligned to 8 bytes in order to CPU-CP to copy it */
+ total_pkt_size = (total_pkt_size + 0x7) & ~0x7;
+
+ /* total_pkt_size is casted to u16 later on */
+ if (total_pkt_size > USHRT_MAX) {
+ dev_err(hdev->dev, "CPUCP array data is too big\n");
+ return -EINVAL;
+ }
+
+ pkt = kzalloc(total_pkt_size, GFP_KERNEL);
+ if (!pkt)
+ return -ENOMEM;
+
+ pkt->length = cpu_to_le32(CPUCP_NUM_OF_MSI_TYPES);
+
+ hdev->asic_funcs->get_msi_info((u32 *)&pkt->data);
+
+ pkt->cpucp_pkt.ctl = cpu_to_le32(CPUCP_PACKET_MSI_INFO_SET <<
+ CPUCP_PKT_CTL_OPCODE_SHIFT);
+
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *)pkt,
+ total_pkt_size, 0, &result);
+
+ /*
+ * in case packet result is invalid it means that FW does not support
+ * this feature and will use default/hard coded MSI values. no reason
+ * to stop the boot
+ */
+ if (rc && result == cpucp_packet_invalid)
+ rc = 0;
+
+ if (rc)
+ dev_err(hdev->dev, "failed to send CPUCP array data\n");
+
+ kfree(pkt);
+
+ return rc;
+}
+
+int hl_fw_cpucp_handshake(struct hl_device *hdev,
+ u32 cpu_security_boot_status_reg,
+ u32 boot_err0_reg)
+{
+ int rc;
+
+ rc = hl_fw_cpucp_info_get(hdev, cpu_security_boot_status_reg,
+ boot_err0_reg);
+ if (rc)
+ return rc;
+
+ return hl_fw_send_msi_info_msg(hdev);
+}
+
int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
{
struct cpucp_packet pkt = {};
@@ -539,18 +661,63 @@ int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy)
return rc;
}
-int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 pll_index,
+int get_used_pll_index(struct hl_device *hdev, enum pll_index input_pll_index,
+ enum pll_index *pll_index)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ u8 pll_byte, pll_bit_off;
+ bool dynamic_pll;
+
+ if (input_pll_index >= PLL_MAX) {
+ dev_err(hdev->dev, "PLL index %d is out of range\n",
+ input_pll_index);
+ return -EINVAL;
+ }
+
+ dynamic_pll = prop->fw_security_status_valid &&
+ (prop->fw_app_security_map & CPU_BOOT_DEV_STS0_DYN_PLL_EN);
+
+ if (!dynamic_pll) {
+ /*
+ * in case we are working with legacy FW (each asic has unique
+ * PLL numbering) extract the legacy numbering
+ */
+ *pll_index = hdev->legacy_pll_map[input_pll_index];
+ return 0;
+ }
+
+ /* PLL map is a u8 array */
+ pll_byte = prop->cpucp_info.pll_map[input_pll_index >> 3];
+ pll_bit_off = input_pll_index & 0x7;
+
+ if (!(pll_byte & BIT(pll_bit_off))) {
+ dev_err(hdev->dev, "PLL index %d is not supported\n",
+ input_pll_index);
+ return -EINVAL;
+ }
+
+ *pll_index = input_pll_index;
+
+ return 0;
+}
+
+int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, enum pll_index pll_index,
u16 *pll_freq_arr)
{
struct cpucp_packet pkt;
+ enum pll_index used_pll_idx;
u64 result;
int rc;
+ rc = get_used_pll_index(hdev, pll_index, &used_pll_idx);
+ if (rc)
+ return rc;
+
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(CPUCP_PACKET_PLL_INFO_GET <<
CPUCP_PKT_CTL_OPCODE_SHIFT);
- pkt.pll_type = __cpu_to_le16(pll_index);
+ pkt.pll_type = __cpu_to_le16((u16)used_pll_idx);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
HL_CPUCP_INFO_TIMEOUT_USEC, &result);
@@ -565,6 +732,29 @@ int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 pll_index,
return rc;
}
+int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power)
+{
+ struct cpucp_packet pkt;
+ u64 result;
+ int rc;
+
+ memset(&pkt, 0, sizeof(pkt));
+
+ pkt.ctl = cpu_to_le32(CPUCP_PACKET_POWER_GET <<
+ CPUCP_PKT_CTL_OPCODE_SHIFT);
+
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+ HL_CPUCP_INFO_TIMEOUT_USEC, &result);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to read power, error %d\n", rc);
+ return rc;
+ }
+
+ *power = result;
+
+ return rc;
+}
+
static void detect_cpu_boot_status(struct hl_device *hdev, u32 status)
{
/* Some of the status codes below are deprecated in newer f/w
@@ -623,7 +813,11 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
u32 status, security_status;
int rc;
- if (!hdev->cpu_enable)
+ /* pldm was added for cases in which we use preboot on pldm and want
+ * to load boot fit, but we can't wait for preboot because it runs
+ * very slowly
+ */
+ if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU) || hdev->pldm)
return 0;
/* Need to check two possible scenarios:
@@ -677,16 +871,16 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
if (security_status & CPU_BOOT_DEV_STS0_ENABLED) {
prop->fw_security_status_valid = 1;
+ /* FW security should be derived from PCI ID, we keep this
+ * check for backward compatibility
+ */
if (security_status & CPU_BOOT_DEV_STS0_SECURITY_EN)
prop->fw_security_disabled = false;
- else
- prop->fw_security_disabled = true;
if (security_status & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
prop->hard_reset_done_by_fw = true;
} else {
prop->fw_security_status_valid = 0;
- prop->fw_security_disabled = true;
}
dev_dbg(hdev->dev, "Firmware preboot security status %#x\n",
@@ -710,7 +904,7 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
u32 status;
int rc;
- if (!(hdev->fw_loading & FW_TYPE_BOOT_CPU))
+ if (!(hdev->fw_components & FW_TYPE_BOOT_CPU))
return 0;
dev_info(hdev->dev, "Going to wait for device boot (up to %lds)\n",
@@ -801,7 +995,7 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
goto out;
}
- if (!(hdev->fw_loading & FW_TYPE_LINUX)) {
+ if (!(hdev->fw_components & FW_TYPE_LINUX)) {
dev_info(hdev->dev, "Skip loading Linux F/W\n");
goto out;
}
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 4b321e4f8059..44e89da30b4a 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -19,6 +19,7 @@
#include <linux/dma-direction.h>
#include <linux/scatterlist.h>
#include <linux/hashtable.h>
+#include <linux/debugfs.h>
#include <linux/bitfield.h>
#include <linux/genalloc.h>
#include <linux/sched/signal.h>
@@ -61,7 +62,7 @@
#define HL_SIM_MAX_TIMEOUT_US 10000000 /* 10s */
-#define HL_IDLE_BUSY_TS_ARR_SIZE 4096
+#define HL_COMMON_USER_INTERRUPT_ID 0xFFF
/* Memory */
#define MEM_HASH_TABLE_BITS 7 /* 1 << 7 buckets */
@@ -102,6 +103,23 @@ enum hl_mmu_page_table_location {
#define HL_MAX_DCORES 4
+/*
+ * Reset Flags
+ *
+ * - HL_RESET_HARD
+ * If set do hard reset to all engines. If not set reset just
+ * compute/DMA engines.
+ *
+ * - HL_RESET_FROM_RESET_THREAD
+ * Set if the caller is the hard-reset thread
+ *
+ * - HL_RESET_HEARTBEAT
+ * Set if reset is due to heartbeat
+ */
+#define HL_RESET_HARD (1 << 0)
+#define HL_RESET_FROM_RESET_THREAD (1 << 1)
+#define HL_RESET_HEARTBEAT (1 << 2)
+
#define HL_MAX_SOBS_PER_MONITOR 8
/**
@@ -169,15 +187,19 @@ enum hl_fw_component {
};
/**
- * enum hl_fw_types - F/W types to load
+ * enum hl_fw_types - F/W types present in the system
* @FW_TYPE_LINUX: Linux image for device CPU
* @FW_TYPE_BOOT_CPU: Boot image for device CPU
+ * @FW_TYPE_PREBOOT_CPU: Indicates pre-loaded CPUs are present in the system
+ * (preboot, ppboot etc...)
* @FW_TYPE_ALL_TYPES: Mask for all types
*/
enum hl_fw_types {
FW_TYPE_LINUX = 0x1,
FW_TYPE_BOOT_CPU = 0x2,
- FW_TYPE_ALL_TYPES = (FW_TYPE_LINUX | FW_TYPE_BOOT_CPU)
+ FW_TYPE_PREBOOT_CPU = 0x4,
+ FW_TYPE_ALL_TYPES =
+ (FW_TYPE_LINUX | FW_TYPE_BOOT_CPU | FW_TYPE_PREBOOT_CPU)
};
/**
@@ -368,6 +390,7 @@ struct hl_mmu_properties {
* @dram_size: DRAM total size.
* @dram_pci_bar_size: size of PCI bar towards DRAM.
* @max_power_default: max power of the device after reset
+ * @dc_power_default: power consumed by the device in mode idle.
* @dram_size_for_default_page_mapping: DRAM size needed to map to avoid page
* fault.
* @pcie_dbi_base_address: Base address of the PCIE_DBI block.
@@ -412,6 +435,7 @@ struct hl_mmu_properties {
* @first_available_user_msix_interrupt: first available msix interrupt
* reserved for the user
* @first_available_cq: first available CQ for the user.
+ * @user_interrupt_count: number of user interrupts.
* @tpc_enabled_mask: which TPCs are enabled.
* @completion_queues_count: number of completion queues.
* @fw_security_disabled: true if security measures are disabled in firmware,
@@ -421,6 +445,7 @@ struct hl_mmu_properties {
* @dram_supports_virtual_memory: is there an MMU towards the DRAM
* @hard_reset_done_by_fw: true if firmware is handling hard reset flow
* @num_functional_hbms: number of functional HBMs in each DCORE.
+ * @iatu_done_by_fw: true if iATU configuration is being done by FW.
*/
struct asic_fixed_properties {
struct hw_queue_properties *hw_queues_props;
@@ -439,6 +464,7 @@ struct asic_fixed_properties {
u64 dram_size;
u64 dram_pci_bar_size;
u64 max_power_default;
+ u64 dc_power_default;
u64 dram_size_for_default_page_mapping;
u64 pcie_dbi_base_address;
u64 pcie_aux_dbi_reg_addr;
@@ -475,6 +501,7 @@ struct asic_fixed_properties {
u16 first_available_user_mon[HL_MAX_DCORES];
u16 first_available_user_msix_interrupt;
u16 first_available_cq[HL_MAX_DCORES];
+ u16 user_interrupt_count;
u8 tpc_enabled_mask;
u8 completion_queues_count;
u8 fw_security_disabled;
@@ -482,6 +509,7 @@ struct asic_fixed_properties {
u8 dram_supports_virtual_memory;
u8 hard_reset_done_by_fw;
u8 num_functional_hbms;
+ u8 iatu_done_by_fw;
};
/**
@@ -503,6 +531,7 @@ struct hl_fence {
/**
* struct hl_cs_compl - command submission completion object.
+ * @sob_reset_work: workqueue object to run SOB reset flow.
* @base_fence: hl fence object.
* @lock: spinlock to protect fence.
* @hdev: habanalabs device structure.
@@ -513,6 +542,7 @@ struct hl_fence {
* @sob_group: the SOB group that is used in this collective wait CS.
*/
struct hl_cs_compl {
+ struct work_struct sob_reset_work;
struct hl_fence base_fence;
spinlock_t lock;
struct hl_device *hdev;
@@ -690,6 +720,31 @@ struct hl_cq {
};
/**
+ * struct hl_user_interrupt - holds user interrupt information
+ * @hdev: pointer to the device structure
+ * @wait_list_head: head to the list of user threads pending on this interrupt
+ * @wait_list_lock: protects wait_list_head
+ * @interrupt_id: msix interrupt id
+ */
+struct hl_user_interrupt {
+ struct hl_device *hdev;
+ struct list_head wait_list_head;
+ spinlock_t wait_list_lock;
+ u32 interrupt_id;
+};
+
+/**
+ * struct hl_user_pending_interrupt - holds a context to a user thread
+ * pending on an interrupt
+ * @wait_list_node: node in the list of user threads pending on an interrupt
+ * @fence: hl fence object for interrupt completion
+ */
+struct hl_user_pending_interrupt {
+ struct list_head wait_list_node;
+ struct hl_fence fence;
+};
+
+/**
* struct hl_eq - describes the event queue (single one per device)
* @hdev: pointer to the device structure
* @kernel_address: holds the queue's kernel virtual address
@@ -713,11 +768,13 @@ struct hl_eq {
* @ASIC_INVALID: Invalid ASIC type.
* @ASIC_GOYA: Goya device.
* @ASIC_GAUDI: Gaudi device.
+ * @ASIC_GAUDI_SEC: Gaudi secured device (HL-2000).
*/
enum hl_asic_type {
ASIC_INVALID,
ASIC_GOYA,
- ASIC_GAUDI
+ ASIC_GAUDI,
+ ASIC_GAUDI_SEC
};
struct hl_cs_parser;
@@ -802,8 +859,12 @@ enum div_select_defs {
* @update_eq_ci: update event queue CI.
* @context_switch: called upon ASID context switch.
* @restore_phase_topology: clear all SOBs amd MONs.
- * @debugfs_read32: debug interface for reading u32 from DRAM/SRAM.
- * @debugfs_write32: debug interface for writing u32 to DRAM/SRAM.
+ * @debugfs_read32: debug interface for reading u32 from DRAM/SRAM/Host memory.
+ * @debugfs_write32: debug interface for writing u32 to DRAM/SRAM/Host memory.
+ * @debugfs_read64: debug interface for reading u64 from DRAM/SRAM/Host memory.
+ * @debugfs_write64: debug interface for writing u64 to DRAM/SRAM/Host memory.
+ * @debugfs_read_dma: debug interface for reading up to 2MB from the device's
+ * internal memory via DMA engine.
* @add_device_attr: add ASIC specific device attributes.
* @handle_eqe: handle event queue entry (IRQ) from CPU-CP.
* @set_pll_profile: change PLL profile (manual/automatic).
@@ -919,10 +980,16 @@ struct hl_asic_funcs {
void (*update_eq_ci)(struct hl_device *hdev, u32 val);
int (*context_switch)(struct hl_device *hdev, u32 asid);
void (*restore_phase_topology)(struct hl_device *hdev);
- int (*debugfs_read32)(struct hl_device *hdev, u64 addr, u32 *val);
- int (*debugfs_write32)(struct hl_device *hdev, u64 addr, u32 val);
- int (*debugfs_read64)(struct hl_device *hdev, u64 addr, u64 *val);
- int (*debugfs_write64)(struct hl_device *hdev, u64 addr, u64 val);
+ int (*debugfs_read32)(struct hl_device *hdev, u64 addr,
+ bool user_address, u32 *val);
+ int (*debugfs_write32)(struct hl_device *hdev, u64 addr,
+ bool user_address, u32 val);
+ int (*debugfs_read64)(struct hl_device *hdev, u64 addr,
+ bool user_address, u64 *val);
+ int (*debugfs_write64)(struct hl_device *hdev, u64 addr,
+ bool user_address, u64 val);
+ int (*debugfs_read_dma)(struct hl_device *hdev, u64 addr, u32 size,
+ void *blob_addr);
void (*add_device_attr)(struct hl_device *hdev,
struct attribute_group *dev_attr_grp);
void (*handle_eqe)(struct hl_device *hdev,
@@ -986,6 +1053,7 @@ struct hl_asic_funcs {
int (*hw_block_mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
u32 block_id, u32 block_size);
void (*enable_events_from_fw)(struct hl_device *hdev);
+ void (*get_msi_info)(u32 *table);
};
@@ -1070,9 +1138,11 @@ struct hl_pending_cb {
* @mem_hash_lock: protects the mem_hash.
* @mmu_lock: protects the MMU page tables. Any change to the PGT, modifying the
* MMU hash or walking the PGT requires talking this lock.
+ * @hw_block_list_lock: protects the HW block memory list.
* @debugfs_list: node in debugfs list of contexts.
* pending_cb_list: list of pending command buffers waiting to be sent upon
* next user command submission context.
+ * @hw_block_mem_list: list of HW block virtual mapped addresses.
* @cs_counters: context command submission counters.
* @cb_va_pool: device VA pool for command buffers which are mapped to the
* device's MMU.
@@ -1109,8 +1179,10 @@ struct hl_ctx {
struct hl_va_range *va_range[HL_VA_RANGE_TYPE_MAX];
struct mutex mem_hash_lock;
struct mutex mmu_lock;
+ struct mutex hw_block_list_lock;
struct list_head debugfs_list;
struct list_head pending_cb_list;
+ struct list_head hw_block_mem_list;
struct hl_cs_counters_atomic cs_counters;
struct gen_pool *cb_va_pool;
u64 cs_sequence;
@@ -1185,6 +1257,7 @@ struct hl_userptr {
* @sequence: the sequence number of this CS.
* @staged_sequence: the sequence of the staged submission this CS is part of,
* relevant only if staged_cs is set.
+ * @timeout_jiffies: cs timeout in jiffies.
* @type: CS_TYPE_*.
* @submitted: true if CS was submitted to H/W.
* @completed: true if CS was completed by device.
@@ -1213,6 +1286,7 @@ struct hl_cs {
struct list_head debugfs_list;
u64 sequence;
u64 staged_sequence;
+ u64 timeout_jiffies;
enum hl_cs_type type;
u8 submitted;
u8 completed;
@@ -1330,6 +1404,23 @@ struct hl_vm_hash_node {
};
/**
+ * struct hl_vm_hw_block_list_node - list element from user virtual address to
+ * HW block id.
+ * @node: node to hang on the list in context object.
+ * @ctx: the context this node belongs to.
+ * @vaddr: virtual address of the HW block.
+ * @size: size of the block.
+ * @id: HW block id (handle).
+ */
+struct hl_vm_hw_block_list_node {
+ struct list_head node;
+ struct hl_ctx *ctx;
+ unsigned long vaddr;
+ u32 size;
+ u32 id;
+};
+
+/**
* struct hl_vm_phys_pg_pack - physical page pack.
* @vm_type: describes the type of the virtual area descriptor.
* @pages: the physical page array.
@@ -1490,12 +1581,13 @@ struct hl_debugfs_entry {
* @userptr_spinlock: protects userptr_list.
* @ctx_mem_hash_list: list of available contexts with MMU mappings.
* @ctx_mem_hash_spinlock: protects cb_list.
+ * @blob_desc: descriptor of blob
* @addr: next address to read/write from/to in read/write32.
* @mmu_addr: next virtual address to translate to physical address in mmu_show.
* @mmu_asid: ASID to use while translating in mmu_show.
* @i2c_bus: generic u8 debugfs file for bus value to use in i2c_data_read.
- * @i2c_bus: generic u8 debugfs file for address value to use in i2c_data_read.
- * @i2c_bus: generic u8 debugfs file for register value to use in i2c_data_read.
+ * @i2c_addr: generic u8 debugfs file for address value to use in i2c_data_read.
+ * @i2c_reg: generic u8 debugfs file for register value to use in i2c_data_read.
*/
struct hl_dbg_device_entry {
struct dentry *root;
@@ -1513,6 +1605,7 @@ struct hl_dbg_device_entry {
spinlock_t userptr_spinlock;
struct list_head ctx_mem_hash_list;
spinlock_t ctx_mem_hash_spinlock;
+ struct debugfs_blob_wrapper blob_desc;
u64 addr;
u64 mmu_addr;
u32 mmu_asid;
@@ -1684,16 +1777,6 @@ struct hl_device_reset_work {
};
/**
- * struct hl_device_idle_busy_ts - used for calculating device utilization rate.
- * @idle_to_busy_ts: timestamp where device changed from idle to busy.
- * @busy_to_idle_ts: timestamp where device changed from busy to idle.
- */
-struct hl_device_idle_busy_ts {
- ktime_t idle_to_busy_ts;
- ktime_t busy_to_idle_ts;
-};
-
-/**
* struct hr_mmu_hop_addrs - used for holding per-device host-resident mmu hop
* information.
* @virt_addr: the virtual address of the hop.
@@ -1821,9 +1904,16 @@ struct hl_mmu_funcs {
* @asic_name: ASIC specific name.
* @asic_type: ASIC specific type.
* @completion_queue: array of hl_cq.
+ * @user_interrupt: array of hl_user_interrupt. upon the corresponding user
+ * interrupt, driver will monitor the list of fences
+ * registered to this interrupt.
+ * @common_user_interrupt: common user interrupt for all user interrupts.
+ * upon any user interrupt, driver will monitor the
+ * list of fences registered to this common structure.
* @cq_wq: work queues of completion queues for executing work in process
* context.
* @eq_wq: work queue of event queue for executing work in process context.
+ * @sob_reset_wq: work queue for sob reset executions.
* @kernel_ctx: Kernel driver context structure.
* @kernel_queues: array of hl_hw_queue.
* @cs_mirror_list: CS mirror list for TDR.
@@ -1857,11 +1947,11 @@ struct hl_mmu_funcs {
* when a user opens the device
* @fpriv_list_lock: protects the fpriv_list
* @compute_ctx: current compute context executing.
- * @idle_busy_ts_arr: array to hold time stamps of transitions from idle to busy
- * and vice-versa
* @aggregated_cs_counters: aggregated cs counters among all contexts
* @mmu_priv: device-specific MMU data.
* @mmu_func: device-related MMU functions.
+ * @legacy_pll_map: map holding map between dynamic (common) PLL indexes and
+ * static (asic specific) PLL indexes.
* @dram_used_mem: current DRAM memory consumption.
* @timeout_jiffies: device CS timeout value.
* @max_power: the max power of the device, as configured by the sysadmin. This
@@ -1874,13 +1964,10 @@ struct hl_mmu_funcs {
* @curr_pll_profile: current PLL profile.
* @card_type: Various ASICs have several card types. This indicates the card
* type of the current device.
- * @cs_active_cnt: number of active command submissions on this device (active
- * means already in H/W queues)
* @major: habanalabs kernel driver major.
* @high_pll: high PLL profile frequency.
* @soft_reset_cnt: number of soft reset since the driver was loaded.
* @hard_reset_cnt: number of hard reset since the driver was loaded.
- * @idle_busy_ts_idx: index of current entry in idle_busy_ts_arr
* @clk_throttling_reason: bitmask represents the current clk throttling reasons
* @id: device minor.
* @id_control: minor of the control device
@@ -1937,8 +2024,11 @@ struct hl_device {
char status[HL_DEV_STS_MAX][HL_STR_MAX];
enum hl_asic_type asic_type;
struct hl_cq *completion_queue;
+ struct hl_user_interrupt *user_interrupt;
+ struct hl_user_interrupt common_user_interrupt;
struct workqueue_struct **cq_wq;
struct workqueue_struct *eq_wq;
+ struct workqueue_struct *sob_reset_wq;
struct hl_ctx *kernel_ctx;
struct hl_hw_queue *kernel_queues;
struct list_head cs_mirror_list;
@@ -1976,13 +2066,13 @@ struct hl_device {
struct hl_ctx *compute_ctx;
- struct hl_device_idle_busy_ts *idle_busy_ts_arr;
-
struct hl_cs_counters_atomic aggregated_cs_counters;
struct hl_mmu_priv mmu_priv;
struct hl_mmu_funcs mmu_func[MMU_NUM_PGT_LOCATIONS];
+ enum pll_index *legacy_pll_map;
+
atomic64_t dram_used_mem;
u64 timeout_jiffies;
u64 max_power;
@@ -1990,12 +2080,10 @@ struct hl_device {
atomic_t in_reset;
enum hl_pll_frequency curr_pll_profile;
enum cpucp_card_types card_type;
- int cs_active_cnt;
u32 major;
u32 high_pll;
u32 soft_reset_cnt;
u32 hard_reset_cnt;
- u32 idle_busy_ts_idx;
u32 clk_throttling_reason;
u16 id;
u16 id_control;
@@ -2029,10 +2117,9 @@ struct hl_device {
/* Parameters for bring-up */
u64 nic_ports_mask;
- u64 fw_loading;
+ u64 fw_components;
u8 mmu_enable;
u8 mmu_huge_page_opt;
- u8 cpu_enable;
u8 reset_pcilink;
u8 cpu_queues_enable;
u8 pldm;
@@ -2043,6 +2130,7 @@ struct hl_device {
u8 bmc_enable;
u8 rl_enable;
u8 reset_on_preboot_fail;
+ u8 reset_upon_device_release;
};
@@ -2157,6 +2245,8 @@ void hl_cq_reset(struct hl_device *hdev, struct hl_cq *q);
void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q);
irqreturn_t hl_irq_handler_cq(int irq, void *arg);
irqreturn_t hl_irq_handler_eq(int irq, void *arg);
+irqreturn_t hl_irq_handler_user_cq(int irq, void *arg);
+irqreturn_t hl_irq_handler_default(int irq, void *arg);
u32 hl_cq_inc_ptr(u32 ptr);
int hl_asid_init(struct hl_device *hdev);
@@ -2178,12 +2268,11 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass);
void hl_device_fini(struct hl_device *hdev);
int hl_device_suspend(struct hl_device *hdev);
int hl_device_resume(struct hl_device *hdev);
-int hl_device_reset(struct hl_device *hdev, bool hard_reset,
- bool from_hard_reset_thread);
+int hl_device_reset(struct hl_device *hdev, u32 flags);
void hl_hpriv_get(struct hl_fpriv *hpriv);
-void hl_hpriv_put(struct hl_fpriv *hpriv);
+int hl_hpriv_put(struct hl_fpriv *hpriv);
int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq);
-uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms);
+int hl_device_utilization(struct hl_device *hdev, u32 *utilization);
int hl_build_hwmon_channel_info(struct hl_device *hdev,
struct cpucp_sensor *sensors_arr);
@@ -2235,6 +2324,9 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx);
int hl_vm_init(struct hl_device *hdev);
void hl_vm_fini(struct hl_device *hdev);
+void hl_hw_block_mem_init(struct hl_ctx *ctx);
+void hl_hw_block_mem_fini(struct hl_ctx *ctx);
+
u64 hl_reserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
enum hl_va_range_type type, u32 size, u32 alignment);
int hl_unreserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
@@ -2287,13 +2379,19 @@ int hl_fw_send_heartbeat(struct hl_device *hdev);
int hl_fw_cpucp_info_get(struct hl_device *hdev,
u32 cpu_security_boot_status_reg,
u32 boot_err0_reg);
+int hl_fw_cpucp_handshake(struct hl_device *hdev,
+ u32 cpu_security_boot_status_reg,
+ u32 boot_err0_reg);
int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size);
int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
struct hl_info_pci_counters *counters);
int hl_fw_cpucp_total_energy_get(struct hl_device *hdev,
u64 *total_energy);
-int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 pll_index,
+int get_used_pll_index(struct hl_device *hdev, enum pll_index input_pll_index,
+ enum pll_index *pll_index);
+int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, enum pll_index pll_index,
u16 *pll_freq_arr);
+int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power);
int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
u32 cpu_security_boot_status_reg, u32 boot_err0_reg,
@@ -2304,6 +2402,7 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3],
bool is_wc[3]);
+int hl_pci_elbi_read(struct hl_device *hdev, u64 addr, u32 *data);
int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data);
int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region,
struct hl_inbound_pci_region *pci_region);
@@ -2312,8 +2411,10 @@ int hl_pci_set_outbound_region(struct hl_device *hdev,
int hl_pci_init(struct hl_device *hdev);
void hl_pci_fini(struct hl_device *hdev);
-long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr);
-void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq);
+long hl_get_frequency(struct hl_device *hdev, enum pll_index pll_index,
+ bool curr);
+void hl_set_frequency(struct hl_device *hdev, enum pll_index pll_index,
+ u64 freq);
int hl_get_temperature(struct hl_device *hdev,
int sensor_index, u32 attr, long *value);
int hl_set_temperature(struct hl_device *hdev,
@@ -2334,6 +2435,7 @@ int hl_set_voltage(struct hl_device *hdev,
int sensor_index, u32 attr, long value);
int hl_set_current(struct hl_device *hdev,
int sensor_index, u32 attr, long value);
+void hl_release_pending_user_interrupts(struct hl_device *hdev);
#ifdef CONFIG_DEBUG_FS
@@ -2434,7 +2536,7 @@ long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg);
int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data);
int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data);
-int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data);
+int hl_wait_ioctl(struct hl_fpriv *hpriv, void *data);
int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data);
#endif /* HABANALABSP_H_ */
diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c
index 032d114f01ea..7135f1e03864 100644
--- a/drivers/misc/habanalabs/common/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/common/habanalabs_drv.c
@@ -27,13 +27,13 @@ static struct class *hl_class;
static DEFINE_IDR(hl_devs_idr);
static DEFINE_MUTEX(hl_devs_idr_lock);
-static int timeout_locked = 5;
+static int timeout_locked = 30;
static int reset_on_lockup = 1;
static int memory_scrub = 1;
module_param(timeout_locked, int, 0444);
MODULE_PARM_DESC(timeout_locked,
- "Device lockup timeout in seconds (0 = disabled, default 5s)");
+ "Device lockup timeout in seconds (0 = disabled, default 30s)");
module_param(reset_on_lockup, int, 0444);
MODULE_PARM_DESC(reset_on_lockup,
@@ -47,10 +47,12 @@ MODULE_PARM_DESC(memory_scrub,
#define PCI_IDS_GOYA 0x0001
#define PCI_IDS_GAUDI 0x1000
+#define PCI_IDS_GAUDI_SEC 0x1010
static const struct pci_device_id ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GOYA), },
{ PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI), },
+ { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI_SEC), },
{ 0, }
};
MODULE_DEVICE_TABLE(pci, ids);
@@ -74,6 +76,9 @@ static enum hl_asic_type get_asic_type(u16 device)
case PCI_IDS_GAUDI:
asic_type = ASIC_GAUDI;
break;
+ case PCI_IDS_GAUDI_SEC:
+ asic_type = ASIC_GAUDI_SEC;
+ break;
default:
asic_type = ASIC_INVALID;
break;
@@ -82,6 +87,16 @@ static enum hl_asic_type get_asic_type(u16 device)
return asic_type;
}
+static bool is_asic_secured(enum hl_asic_type asic_type)
+{
+ switch (asic_type) {
+ case ASIC_GAUDI_SEC:
+ return true;
+ default:
+ return false;
+ }
+}
+
/*
* hl_device_open - open function for habanalabs device
*
@@ -234,8 +249,7 @@ out_err:
static void set_driver_behavior_per_device(struct hl_device *hdev)
{
- hdev->cpu_enable = 1;
- hdev->fw_loading = FW_TYPE_ALL_TYPES;
+ hdev->fw_components = FW_TYPE_ALL_TYPES;
hdev->cpu_queues_enable = 1;
hdev->heartbeat = 1;
hdev->mmu_enable = 1;
@@ -288,6 +302,12 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
hdev->asic_type = asic_type;
}
+ if (pdev)
+ hdev->asic_prop.fw_security_disabled =
+ !is_asic_secured(pdev->device);
+ else
+ hdev->asic_prop.fw_security_disabled = true;
+
/* Assign status description string */
strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION],
"disabled", HL_STR_MAX);
diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index 083a30969c5f..33841c272eb6 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -226,19 +226,14 @@ static int device_utilization(struct hl_device *hdev, struct hl_info_args *args)
struct hl_info_device_utilization device_util = {0};
u32 max_size = args->return_size;
void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+ int rc;
if ((!max_size) || (!out))
return -EINVAL;
- if ((args->period_ms < 100) || (args->period_ms > 1000) ||
- (args->period_ms % 100)) {
- dev_err(hdev->dev,
- "period %u must be between 100 - 1000 and must be divisible by 100\n",
- args->period_ms);
+ rc = hl_device_utilization(hdev, &device_util.utilization);
+ if (rc)
return -EINVAL;
- }
-
- device_util.utilization = hl_device_utilization(hdev, args->period_ms);
return copy_to_user(out, &device_util,
min((size_t) max_size, sizeof(device_util))) ? -EFAULT : 0;
@@ -446,6 +441,25 @@ static int pll_frequency_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
min((size_t) max_size, sizeof(freq_info))) ? -EFAULT : 0;
}
+static int power_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+ struct hl_device *hdev = hpriv->hdev;
+ u32 max_size = args->return_size;
+ struct hl_power_info power_info = {0};
+ void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+ int rc;
+
+ if ((!max_size) || (!out))
+ return -EINVAL;
+
+ rc = hl_fw_cpucp_power_get(hdev, &power_info.power);
+ if (rc)
+ return rc;
+
+ return copy_to_user(out, &power_info,
+ min((size_t) max_size, sizeof(power_info))) ? -EFAULT : 0;
+}
+
static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
struct device *dev)
{
@@ -526,6 +540,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
case HL_INFO_PLL_FREQUENCY:
return pll_frequency_info(hpriv, args);
+ case HL_INFO_POWER:
+ return power_info(hpriv, args);
+
default:
dev_err(dev, "Invalid request %d\n", args->op);
rc = -ENOTTY;
@@ -596,7 +613,7 @@ static const struct hl_ioctl_desc hl_ioctls[] = {
HL_IOCTL_DEF(HL_IOCTL_INFO, hl_info_ioctl),
HL_IOCTL_DEF(HL_IOCTL_CB, hl_cb_ioctl),
HL_IOCTL_DEF(HL_IOCTL_CS, hl_cs_ioctl),
- HL_IOCTL_DEF(HL_IOCTL_WAIT_CS, hl_cs_wait_ioctl),
+ HL_IOCTL_DEF(HL_IOCTL_WAIT_CS, hl_wait_ioctl),
HL_IOCTL_DEF(HL_IOCTL_MEMORY, hl_mem_ioctl),
HL_IOCTL_DEF(HL_IOCTL_DEBUG, hl_debug_ioctl)
};
diff --git a/drivers/misc/habanalabs/common/hw_queue.c b/drivers/misc/habanalabs/common/hw_queue.c
index 0f335182267f..173438461835 100644
--- a/drivers/misc/habanalabs/common/hw_queue.c
+++ b/drivers/misc/habanalabs/common/hw_queue.c
@@ -629,20 +629,12 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
if ((hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT) &&
first_entry && cs_needs_timeout(cs)) {
cs->tdr_active = true;
- schedule_delayed_work(&cs->work_tdr, hdev->timeout_jiffies);
+ schedule_delayed_work(&cs->work_tdr, cs->timeout_jiffies);
}
spin_unlock(&hdev->cs_mirror_lock);
- if (!hdev->cs_active_cnt++) {
- struct hl_device_idle_busy_ts *ts;
-
- ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx];
- ts->busy_to_idle_ts = ktime_set(0, 0);
- ts->idle_to_busy_ts = ktime_get();
- }
-
list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
switch (job->queue_type) {
case QUEUE_TYPE_EXT:
diff --git a/drivers/misc/habanalabs/common/irq.c b/drivers/misc/habanalabs/common/irq.c
index 44a0522b59b9..27129868c711 100644
--- a/drivers/misc/habanalabs/common/irq.c
+++ b/drivers/misc/habanalabs/common/irq.c
@@ -137,6 +137,62 @@ irqreturn_t hl_irq_handler_cq(int irq, void *arg)
return IRQ_HANDLED;
}
+static void handle_user_cq(struct hl_device *hdev,
+ struct hl_user_interrupt *user_cq)
+{
+ struct hl_user_pending_interrupt *pend;
+
+ spin_lock(&user_cq->wait_list_lock);
+ list_for_each_entry(pend, &user_cq->wait_list_head, wait_list_node)
+ complete_all(&pend->fence.completion);
+ spin_unlock(&user_cq->wait_list_lock);
+}
+
+/**
+ * hl_irq_handler_user_cq - irq handler for user completion queues
+ *
+ * @irq: irq number
+ * @arg: pointer to user interrupt structure
+ *
+ */
+irqreturn_t hl_irq_handler_user_cq(int irq, void *arg)
+{
+ struct hl_user_interrupt *user_cq = arg;
+ struct hl_device *hdev = user_cq->hdev;
+
+ dev_dbg(hdev->dev,
+ "got user completion interrupt id %u",
+ user_cq->interrupt_id);
+
+ /* Handle user cq interrupts registered on all interrupts */
+ handle_user_cq(hdev, &hdev->common_user_interrupt);
+
+ /* Handle user cq interrupts registered on this specific interrupt */
+ handle_user_cq(hdev, user_cq);
+
+ return IRQ_HANDLED;
+}
+
+/**
+ * hl_irq_handler_default - default irq handler
+ *
+ * @irq: irq number
+ * @arg: pointer to user interrupt structure
+ *
+ */
+irqreturn_t hl_irq_handler_default(int irq, void *arg)
+{
+ struct hl_user_interrupt *user_interrupt = arg;
+ struct hl_device *hdev = user_interrupt->hdev;
+ u32 interrupt_id = user_interrupt->interrupt_id;
+
+ dev_err(hdev->dev,
+ "got invalid user interrupt %u",
+ interrupt_id);
+
+ return IRQ_HANDLED;
+}
+
/**
* hl_irq_handler_eq - irq handler for event queue
*
diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c
index 1f5910517b0e..2938cbbafbbc 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -81,16 +81,6 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
num_pgs, total_size);
return -ENOMEM;
}
-
- if (hdev->memory_scrub) {
- rc = hdev->asic_funcs->scrub_device_mem(hdev, paddr,
- total_size);
- if (rc) {
- dev_err(hdev->dev,
- "Failed to scrub contiguous device memory\n");
- goto pages_pack_err;
- }
- }
}
phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL);
@@ -128,24 +118,13 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
goto page_err;
}
- if (hdev->memory_scrub) {
- rc = hdev->asic_funcs->scrub_device_mem(hdev,
- phys_pg_pack->pages[i],
- page_size);
- if (rc) {
- dev_err(hdev->dev,
- "Failed to scrub device memory\n");
- goto page_err;
- }
- }
-
num_curr_pgs++;
}
}
spin_lock(&vm->idr_lock);
handle = idr_alloc(&vm->phys_pg_pack_handles, phys_pg_pack, 1, 0,
- GFP_ATOMIC);
+ GFP_KERNEL);
spin_unlock(&vm->idr_lock);
if (handle < 0) {
@@ -280,37 +259,67 @@ static void dram_pg_pool_do_release(struct kref *ref)
* @phys_pg_pack: physical page pack to free.
*
* This function does the following:
- * - For DRAM memory only, iterate over the pack and free each physical block
- * structure by returning it to the general pool.
+ * - For DRAM memory only
+ * - iterate over the pack, scrub and free each physical block structure by
+ * returning it to the general pool.
+ * In case of error during scrubbing, initiate hard reset.
+ * Once hard reset is triggered, scrubbing is bypassed while freeing the
+ * memory continues.
* - Free the hl_vm_phys_pg_pack structure.
*/
-static void free_phys_pg_pack(struct hl_device *hdev,
+static int free_phys_pg_pack(struct hl_device *hdev,
struct hl_vm_phys_pg_pack *phys_pg_pack)
{
struct hl_vm *vm = &hdev->vm;
u64 i;
+ int rc = 0;
- if (!phys_pg_pack->created_from_userptr) {
- if (phys_pg_pack->contiguous) {
- gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[0],
+ if (phys_pg_pack->created_from_userptr)
+ goto end;
+
+ if (phys_pg_pack->contiguous) {
+ if (hdev->memory_scrub && !hdev->disabled) {
+ rc = hdev->asic_funcs->scrub_device_mem(hdev,
+ phys_pg_pack->pages[0],
phys_pg_pack->total_size);
+ if (rc)
+ dev_err(hdev->dev,
+ "Failed to scrub contiguous device memory\n");
+ }
- for (i = 0; i < phys_pg_pack->npages ; i++)
- kref_put(&vm->dram_pg_pool_refcount,
- dram_pg_pool_do_release);
- } else {
- for (i = 0 ; i < phys_pg_pack->npages ; i++) {
- gen_pool_free(vm->dram_pg_pool,
+ gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[0],
+ phys_pg_pack->total_size);
+
+ for (i = 0; i < phys_pg_pack->npages ; i++)
+ kref_put(&vm->dram_pg_pool_refcount,
+ dram_pg_pool_do_release);
+ } else {
+ for (i = 0 ; i < phys_pg_pack->npages ; i++) {
+ if (hdev->memory_scrub && !hdev->disabled && rc == 0) {
+ rc = hdev->asic_funcs->scrub_device_mem(
+ hdev,
phys_pg_pack->pages[i],
phys_pg_pack->page_size);
- kref_put(&vm->dram_pg_pool_refcount,
- dram_pg_pool_do_release);
+ if (rc)
+ dev_err(hdev->dev,
+ "Failed to scrub device memory\n");
}
+ gen_pool_free(vm->dram_pg_pool,
+ phys_pg_pack->pages[i],
+ phys_pg_pack->page_size);
+ kref_put(&vm->dram_pg_pool_refcount,
+ dram_pg_pool_do_release);
}
}
+ if (rc && !hdev->disabled)
+ hl_device_reset(hdev, HL_RESET_HARD);
+
+end:
kvfree(phys_pg_pack->pages);
kfree(phys_pg_pack);
+
+ return rc;
}
/**
@@ -349,7 +358,7 @@ static int free_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args)
atomic64_sub(phys_pg_pack->total_size, &ctx->dram_phys_mem);
atomic64_sub(phys_pg_pack->total_size, &hdev->dram_used_mem);
- free_phys_pg_pack(hdev, phys_pg_pack);
+ return free_phys_pg_pack(hdev, phys_pg_pack);
} else {
spin_unlock(&vm->idr_lock);
dev_err(hdev->dev,
@@ -857,6 +866,7 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
u64 next_vaddr = vaddr, paddr, mapped_pg_cnt = 0, i;
u32 page_size = phys_pg_pack->page_size;
int rc = 0;
+ bool is_host_addr;
for (i = 0 ; i < phys_pg_pack->npages ; i++) {
paddr = phys_pg_pack->pages[i];
@@ -878,6 +888,8 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
return 0;
err:
+ is_host_addr = !hl_is_dram_va(hdev, vaddr);
+
next_vaddr = vaddr;
for (i = 0 ; i < mapped_pg_cnt ; i++) {
if (hl_mmu_unmap_page(ctx, next_vaddr, page_size,
@@ -888,6 +900,17 @@ err:
phys_pg_pack->pages[i], page_size);
next_vaddr += page_size;
+
+ /*
+ * unmapping on Palladium can be really long, so avoid a CPU
+ * soft lockup bug by sleeping a little between unmapping pages
+ *
+ * In addition, on host num of pages could be huge,
+ * because page size could be 4KB, so when unmapping host
+ * pages sleep every 32K pages to avoid soft lockup
+ */
+ if (hdev->pldm || (is_host_addr && (i & 0x7FFF) == 0))
+ usleep_range(50, 200);
}
return rc;
@@ -921,9 +944,9 @@ static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
* unmapping on Palladium can be really long, so avoid a CPU
* soft lockup bug by sleeping a little between unmapping pages
*
- * In addition, when unmapping host memory we pass through
- * the Linux kernel to unpin the pages and that takes a long
- * time. Therefore, sleep every 32K pages to avoid soft lockup
+ * In addition, on host num of pages could be huge,
+ * because page size could be 4KB, so when unmapping host
+ * pages sleep every 32K pages to avoid soft lockup
*/
if (hdev->pldm || (is_host_addr && (i & 0x7FFF) == 0))
usleep_range(50, 200);
@@ -1117,9 +1140,9 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
*device_addr = ret_vaddr;
if (is_userptr)
- free_phys_pg_pack(hdev, phys_pg_pack);
+ rc = free_phys_pg_pack(hdev, phys_pg_pack);
- return 0;
+ return rc;
map_err:
if (add_va_block(hdev, va_range, ret_vaddr,
@@ -1272,7 +1295,7 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
kfree(hnode);
if (is_userptr) {
- free_phys_pg_pack(hdev, phys_pg_pack);
+ rc = free_phys_pg_pack(hdev, phys_pg_pack);
dma_unmap_host_va(hdev, userptr);
}
@@ -1305,9 +1328,15 @@ static int map_block(struct hl_device *hdev, u64 address, u64 *handle,
static void hw_block_vm_close(struct vm_area_struct *vma)
{
- struct hl_ctx *ctx = (struct hl_ctx *) vma->vm_private_data;
+ struct hl_vm_hw_block_list_node *lnode =
+ (struct hl_vm_hw_block_list_node *) vma->vm_private_data;
+ struct hl_ctx *ctx = lnode->ctx;
+ mutex_lock(&ctx->hw_block_list_lock);
+ list_del(&lnode->node);
+ mutex_unlock(&ctx->hw_block_list_lock);
hl_ctx_put(ctx);
+ kfree(lnode);
vma->vm_private_data = NULL;
}
@@ -1325,7 +1354,9 @@ static const struct vm_operations_struct hw_block_vm_ops = {
*/
int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
{
+ struct hl_vm_hw_block_list_node *lnode;
struct hl_device *hdev = hpriv->hdev;
+ struct hl_ctx *ctx = hpriv->ctx;
u32 block_id, block_size;
int rc;
@@ -1351,17 +1382,31 @@ int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
return -EINVAL;
}
+ lnode = kzalloc(sizeof(*lnode), GFP_KERNEL);
+ if (!lnode)
+ return -ENOMEM;
+
vma->vm_ops = &hw_block_vm_ops;
- vma->vm_private_data = hpriv->ctx;
+ vma->vm_private_data = lnode;
- hl_ctx_get(hdev, hpriv->ctx);
+ hl_ctx_get(hdev, ctx);
rc = hdev->asic_funcs->hw_block_mmap(hdev, vma, block_id, block_size);
if (rc) {
- hl_ctx_put(hpriv->ctx);
+ hl_ctx_put(ctx);
+ kfree(lnode);
return rc;
}
+ lnode->ctx = ctx;
+ lnode->vaddr = vma->vm_start;
+ lnode->size = block_size;
+ lnode->id = block_id;
+
+ mutex_lock(&ctx->hw_block_list_lock);
+ list_add_tail(&lnode->node, &ctx->hw_block_mem_list);
+ mutex_unlock(&ctx->hw_block_list_lock);
+
vma->vm_pgoff = block_id;
return 0;
@@ -1574,7 +1619,7 @@ static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size,
rc = sg_alloc_table_from_pages(userptr->sgt,
userptr->pages,
- npages, offset, size, GFP_ATOMIC);
+ npages, offset, size, GFP_KERNEL);
if (rc < 0) {
dev_err(hdev->dev, "failed to create SG table from pages\n");
goto put_pages;
@@ -1624,11 +1669,7 @@ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
return -EINVAL;
}
- /*
- * This function can be called also from data path, hence use atomic
- * always as it is not a big allocation.
- */
- userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_ATOMIC);
+ userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_KERNEL);
if (!userptr->sgt)
return -ENOMEM;
@@ -2122,3 +2163,38 @@ void hl_vm_fini(struct hl_device *hdev)
vm->init_done = false;
}
+
+/**
+ * hl_hw_block_mem_init() - HW block memory initialization.
+ * @ctx: pointer to the habanalabs context structure.
+ *
+ * This function initializes the HW block virtual mapped addresses list and
+ * it's lock.
+ */
+void hl_hw_block_mem_init(struct hl_ctx *ctx)
+{
+ mutex_init(&ctx->hw_block_list_lock);
+ INIT_LIST_HEAD(&ctx->hw_block_mem_list);
+}
+
+/**
+ * hl_hw_block_mem_fini() - HW block memory teardown.
+ * @ctx: pointer to the habanalabs context structure.
+ *
+ * This function clears the HW block virtual mapped addresses list and destroys
+ * it's lock.
+ */
+void hl_hw_block_mem_fini(struct hl_ctx *ctx)
+{
+ struct hl_vm_hw_block_list_node *lnode, *tmp;
+
+ if (!list_empty(&ctx->hw_block_mem_list))
+ dev_crit(ctx->hdev->dev, "HW block mem list isn't empty\n");
+
+ list_for_each_entry_safe(lnode, tmp, &ctx->hw_block_mem_list, node) {
+ list_del(&lnode->node);
+ kfree(lnode);
+ }
+
+ mutex_destroy(&ctx->hw_block_list_lock);
+}
diff --git a/drivers/misc/habanalabs/common/mmu/mmu.c b/drivers/misc/habanalabs/common/mmu/mmu.c
index 93c9e5f587e1..b37189956b14 100644
--- a/drivers/misc/habanalabs/common/mmu/mmu.c
+++ b/drivers/misc/habanalabs/common/mmu/mmu.c
@@ -532,6 +532,8 @@ int hl_mmu_va_to_pa(struct hl_ctx *ctx, u64 virt_addr, u64 *phys_addr)
struct hl_mmu_hop_info hops;
int rc;
+ memset(&hops, 0, sizeof(hops));
+
rc = hl_mmu_get_tlb_info(ctx, virt_addr, &hops);
if (rc)
return rc;
@@ -589,6 +591,7 @@ int hl_mmu_if_set_funcs(struct hl_device *hdev)
switch (hdev->asic_type) {
case ASIC_GOYA:
case ASIC_GAUDI:
+ case ASIC_GAUDI_SEC:
hl_mmu_v1_set_funcs(hdev, &hdev->mmu_func[MMU_DR_PGT]);
break;
default:
diff --git a/drivers/misc/habanalabs/common/pci/pci.c b/drivers/misc/habanalabs/common/pci/pci.c
index b799f9258fb0..e941b7eef346 100644
--- a/drivers/misc/habanalabs/common/pci/pci.c
+++ b/drivers/misc/habanalabs/common/pci/pci.c
@@ -85,6 +85,58 @@ static void hl_pci_bars_unmap(struct hl_device *hdev)
pci_release_regions(pdev);
}
+int hl_pci_elbi_read(struct hl_device *hdev, u64 addr, u32 *data)
+{
+ struct pci_dev *pdev = hdev->pdev;
+ ktime_t timeout;
+ u64 msec;
+ u32 val;
+
+ if (hdev->pldm)
+ msec = HL_PLDM_PCI_ELBI_TIMEOUT_MSEC;
+ else
+ msec = HL_PCI_ELBI_TIMEOUT_MSEC;
+
+ /* Clear previous status */
+ pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, 0);
+
+ pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_ADDR, (u32) addr);
+ pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_CTRL, 0);
+
+ timeout = ktime_add_ms(ktime_get(), msec);
+ for (;;) {
+ pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, &val);
+ if (val & PCI_CONFIG_ELBI_STS_MASK)
+ break;
+ if (ktime_compare(ktime_get(), timeout) > 0) {
+ pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS,
+ &val);
+ break;
+ }
+
+ usleep_range(300, 500);
+ }
+
+ if ((val & PCI_CONFIG_ELBI_STS_MASK) == PCI_CONFIG_ELBI_STS_DONE) {
+ pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_DATA, data);
+
+ return 0;
+ }
+
+ if (val & PCI_CONFIG_ELBI_STS_ERR) {
+ dev_err(hdev->dev, "Error reading from ELBI\n");
+ return -EIO;
+ }
+
+ if (!(val & PCI_CONFIG_ELBI_STS_MASK)) {
+ dev_err(hdev->dev, "ELBI read didn't finish in time\n");
+ return -EIO;
+ }
+
+ dev_err(hdev->dev, "ELBI read has undefined bits in status\n");
+ return -EIO;
+}
+
/**
* hl_pci_elbi_write() - Write through the ELBI interface.
* @hdev: Pointer to hl_device structure.
diff --git a/drivers/misc/habanalabs/common/sysfs.c b/drivers/misc/habanalabs/common/sysfs.c
index 4366d8f93842..9fa61573a89d 100644
--- a/drivers/misc/habanalabs/common/sysfs.c
+++ b/drivers/misc/habanalabs/common/sysfs.c
@@ -9,12 +9,18 @@
#include <linux/pci.h>
-long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr)
+long hl_get_frequency(struct hl_device *hdev, enum pll_index pll_index,
+ bool curr)
{
struct cpucp_packet pkt;
+ u32 used_pll_idx;
u64 result;
int rc;
+ rc = get_used_pll_index(hdev, pll_index, &used_pll_idx);
+ if (rc)
+ return rc;
+
memset(&pkt, 0, sizeof(pkt));
if (curr)
@@ -23,7 +29,7 @@ long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr)
else
pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_GET <<
CPUCP_PKT_CTL_OPCODE_SHIFT);
- pkt.pll_index = cpu_to_le32(pll_index);
+ pkt.pll_index = cpu_to_le32((u32)used_pll_idx);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
0, &result);
@@ -31,23 +37,29 @@ long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr)
if (rc) {
dev_err(hdev->dev,
"Failed to get frequency of PLL %d, error %d\n",
- pll_index, rc);
+ used_pll_idx, rc);
return rc;
}
return (long) result;
}
-void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq)
+void hl_set_frequency(struct hl_device *hdev, enum pll_index pll_index,
+ u64 freq)
{
struct cpucp_packet pkt;
+ u32 used_pll_idx;
int rc;
+ rc = get_used_pll_index(hdev, pll_index, &used_pll_idx);
+ if (rc)
+ return;
+
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_SET <<
CPUCP_PKT_CTL_OPCODE_SHIFT);
- pkt.pll_index = cpu_to_le32(pll_index);
+ pkt.pll_index = cpu_to_le32((u32)used_pll_idx);
pkt.value = cpu_to_le64(freq);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
@@ -56,7 +68,7 @@ void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq)
if (rc)
dev_err(hdev->dev,
"Failed to set frequency to PLL %d, error %d\n",
- pll_index, rc);
+ used_pll_idx, rc);
}
u64 hl_get_max_power(struct hl_device *hdev)
@@ -203,7 +215,7 @@ static ssize_t soft_reset_store(struct device *dev,
dev_warn(hdev->dev, "Soft-Reset requested through sysfs\n");
- hl_device_reset(hdev, false, false);
+ hl_device_reset(hdev, 0);
out:
return count;
@@ -226,7 +238,7 @@ static ssize_t hard_reset_store(struct device *dev,
dev_warn(hdev->dev, "Hard-Reset requested through sysfs\n");
- hl_device_reset(hdev, true, false);
+ hl_device_reset(hdev, HL_RESET_HARD);
out:
return count;
@@ -245,6 +257,9 @@ static ssize_t device_type_show(struct device *dev,
case ASIC_GAUDI:
str = "GAUDI";
break;
+ case ASIC_GAUDI_SEC:
+ str = "GAUDI SEC";
+ break;
default:
dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
hdev->asic_type);
@@ -344,7 +359,7 @@ static ssize_t eeprom_read_handler(struct file *filp, struct kobject *kobj,
struct bin_attribute *attr, char *buf, loff_t offset,
size_t max_size)
{
- struct device *dev = container_of(kobj, struct device, kobj);
+ struct device *dev = kobj_to_dev(kobj);
struct hl_device *hdev = dev_get_drvdata(dev);
char *data;
int rc;
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 9152242778f5..b751652f80a8 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -105,6 +105,36 @@
#define GAUDI_PLL_MAX 10
+/*
+ * this enum kept here for compatibility with old FW (in which each asic has
+ * unique PLL numbering
+ */
+enum gaudi_pll_index {
+ GAUDI_CPU_PLL = 0,
+ GAUDI_PCI_PLL,
+ GAUDI_SRAM_PLL,
+ GAUDI_HBM_PLL,
+ GAUDI_NIC_PLL,
+ GAUDI_DMA_PLL,
+ GAUDI_MESH_PLL,
+ GAUDI_MME_PLL,
+ GAUDI_TPC_PLL,
+ GAUDI_IF_PLL,
+};
+
+static enum pll_index gaudi_pll_map[PLL_MAX] = {
+ [CPU_PLL] = GAUDI_CPU_PLL,
+ [PCI_PLL] = GAUDI_PCI_PLL,
+ [SRAM_PLL] = GAUDI_SRAM_PLL,
+ [HBM_PLL] = GAUDI_HBM_PLL,
+ [NIC_PLL] = GAUDI_NIC_PLL,
+ [DMA_PLL] = GAUDI_DMA_PLL,
+ [MESH_PLL] = GAUDI_MESH_PLL,
+ [MME_PLL] = GAUDI_MME_PLL,
+ [TPC_PLL] = GAUDI_TPC_PLL,
+ [IF_PLL] = GAUDI_IF_PLL,
+};
+
static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
"gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
"gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
@@ -396,6 +426,19 @@ get_collective_mode(struct hl_device *hdev, u32 queue_id)
return HL_COLLECTIVE_NOT_SUPPORTED;
}
+static inline void set_default_power_values(struct hl_device *hdev)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+
+ if (hdev->card_type == cpucp_card_type_pmc) {
+ prop->max_power_default = MAX_POWER_DEFAULT_PMC;
+ prop->dc_power_default = DC_POWER_DEFAULT_PMC;
+ } else {
+ prop->max_power_default = MAX_POWER_DEFAULT_PCI;
+ prop->dc_power_default = DC_POWER_DEFAULT_PCI;
+ }
+}
+
static int gaudi_get_fixed_properties(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
@@ -507,7 +550,7 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev)
prop->num_of_events = GAUDI_EVENT_SIZE;
prop->tpc_enabled_mask = TPC_ENABLED_MASK;
- prop->max_power_default = MAX_POWER_DEFAULT_PCI;
+ set_default_power_values(hdev);
prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
@@ -532,8 +575,6 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev)
for (i = 0 ; i < HL_MAX_DCORES ; i++)
prop->first_available_cq[i] = USHRT_MAX;
- /* disable fw security for now, set it in a later stage */
- prop->fw_security_disabled = true;
prop->fw_security_status_valid = false;
prop->hard_reset_done_by_fw = false;
@@ -588,6 +629,11 @@ static int gaudi_init_iatu(struct hl_device *hdev)
struct hl_outbound_pci_region outbound_region;
int rc;
+ if (hdev->asic_prop.iatu_done_by_fw) {
+ hdev->asic_funcs->set_dma_mask_from_fw(hdev);
+ return 0;
+ }
+
/* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
inbound_region.mode = PCI_BAR_MATCH_MODE;
inbound_region.bar = SRAM_BAR_ID;
@@ -632,6 +678,7 @@ static int gaudi_early_init(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct pci_dev *pdev = hdev->pdev;
+ u32 fw_boot_status;
int rc;
rc = gaudi_get_fixed_properties(hdev);
@@ -665,6 +712,23 @@ static int gaudi_early_init(struct hl_device *hdev)
prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
+ /* If FW security is enabled at this point it means no access to ELBI */
+ if (!hdev->asic_prop.fw_security_disabled) {
+ hdev->asic_prop.iatu_done_by_fw = true;
+ goto pci_init;
+ }
+
+ rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
+ &fw_boot_status);
+ if (rc)
+ goto free_queue_props;
+
+ /* Check whether FW is configuring iATU */
+ if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
+ (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
+ hdev->asic_prop.iatu_done_by_fw = true;
+
+pci_init:
rc = hl_pci_init(hdev);
if (rc)
goto free_queue_props;
@@ -1588,6 +1652,9 @@ static int gaudi_sw_init(struct hl_device *hdev)
hdev->asic_specific = gaudi;
+ /* store legacy PLL map */
+ hdev->legacy_pll_map = gaudi_pll_map;
+
/* Create DMA pool for small allocations */
hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
&hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
@@ -1766,8 +1833,7 @@ static int gaudi_enable_msi(struct hl_device *hdev)
if (gaudi->hw_cap_initialized & HW_CAP_MSI)
return 0;
- rc = pci_alloc_irq_vectors(hdev->pdev, 1, GAUDI_MSI_ENTRIES,
- PCI_IRQ_MSI);
+ rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
if (rc < 0) {
dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
return rc;
@@ -3701,7 +3767,7 @@ static int gaudi_init_cpu(struct hl_device *hdev)
struct gaudi_device *gaudi = hdev->asic_specific;
int rc;
- if (!hdev->cpu_enable)
+ if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
return 0;
if (gaudi->hw_cap_initialized & HW_CAP_CPU)
@@ -4873,7 +4939,7 @@ static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
parser->job_userptr_list, &userptr))
goto already_pinned;
- userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC);
+ userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
if (!userptr)
return -ENOMEM;
@@ -5684,18 +5750,26 @@ release_cb:
static int gaudi_schedule_register_memset(struct hl_device *hdev,
u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val)
{
- struct hl_ctx *ctx = hdev->compute_ctx;
+ struct hl_ctx *ctx;
struct hl_pending_cb *pending_cb;
struct packet_msg_long *pkt;
u32 cb_size, ctl;
struct hl_cb *cb;
- int i;
+ int i, rc;
+
+ mutex_lock(&hdev->fpriv_list_lock);
+ ctx = hdev->compute_ctx;
/* If no compute context available or context is going down
* memset registers directly
*/
- if (!ctx || kref_read(&ctx->refcount) == 0)
- return gaudi_memset_registers(hdev, reg_base, num_regs, val);
+ if (!ctx || kref_read(&ctx->refcount) == 0) {
+ rc = gaudi_memset_registers(hdev, reg_base, num_regs, val);
+ mutex_unlock(&hdev->fpriv_list_lock);
+ return rc;
+ }
+
+ mutex_unlock(&hdev->fpriv_list_lock);
cb_size = (sizeof(*pkt) * num_regs) +
sizeof(struct packet_msg_prot) * 2;
@@ -5911,13 +5985,16 @@ static void gaudi_restore_phase_topology(struct hl_device *hdev)
}
-static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
+static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr,
+ bool user_address, u32 *val)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct gaudi_device *gaudi = hdev->asic_specific;
- u64 hbm_bar_addr;
+ u64 hbm_bar_addr, host_phys_end;
int rc = 0;
+ host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
+
if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
@@ -5949,6 +6026,9 @@ static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
}
if (hbm_bar_addr == U64_MAX)
rc = -EIO;
+ } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
+ user_address && !iommu_present(&pci_bus_type)) {
+ *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
} else {
rc = -EFAULT;
}
@@ -5956,13 +6036,16 @@ static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
return rc;
}
-static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
+static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr,
+ bool user_address, u32 val)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct gaudi_device *gaudi = hdev->asic_specific;
- u64 hbm_bar_addr;
+ u64 hbm_bar_addr, host_phys_end;
int rc = 0;
+ host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
+
if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
@@ -5994,6 +6077,9 @@ static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
}
if (hbm_bar_addr == U64_MAX)
rc = -EIO;
+ } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
+ user_address && !iommu_present(&pci_bus_type)) {
+ *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
} else {
rc = -EFAULT;
}
@@ -6001,13 +6087,16 @@ static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
return rc;
}
-static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
+static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr,
+ bool user_address, u64 *val)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct gaudi_device *gaudi = hdev->asic_specific;
- u64 hbm_bar_addr;
+ u64 hbm_bar_addr, host_phys_end;
int rc = 0;
+ host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
+
if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
@@ -6043,6 +6132,9 @@ static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
}
if (hbm_bar_addr == U64_MAX)
rc = -EIO;
+ } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
+ user_address && !iommu_present(&pci_bus_type)) {
+ *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
} else {
rc = -EFAULT;
}
@@ -6050,13 +6142,16 @@ static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
return rc;
}
-static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
+static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr,
+ bool user_address, u64 val)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct gaudi_device *gaudi = hdev->asic_specific;
- u64 hbm_bar_addr;
+ u64 hbm_bar_addr, host_phys_end;
int rc = 0;
+ host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
+
if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
@@ -6091,6 +6186,9 @@ static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
}
if (hbm_bar_addr == U64_MAX)
rc = -EIO;
+ } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
+ user_address && !iommu_present(&pci_bus_type)) {
+ *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
} else {
rc = -EFAULT;
}
@@ -6098,6 +6196,164 @@ static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
return rc;
}
+static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
+ u32 size_to_dma, dma_addr_t dma_addr)
+{
+ u32 err_cause, val;
+ u64 dma_offset;
+ int rc;
+
+ dma_offset = dma_id * DMA_CORE_OFFSET;
+
+ WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
+ WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
+ WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
+ WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
+ WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
+ WREG32(mmDMA0_CORE_COMMIT + dma_offset,
+ (1 << DMA0_CORE_COMMIT_LIN_SHIFT));
+
+ rc = hl_poll_timeout(
+ hdev,
+ mmDMA0_CORE_STS0 + dma_offset,
+ val,
+ ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
+ 0,
+ 1000000);
+
+ if (rc) {
+ dev_err(hdev->dev,
+ "DMA %d timed-out during reading of 0x%llx\n",
+ dma_id, addr);
+ return -EIO;
+ }
+
+ /* Verify DMA is OK */
+ err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
+ if (err_cause) {
+ dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
+ dev_dbg(hdev->dev,
+ "Clearing DMA0 engine from errors (cause 0x%x)\n",
+ err_cause);
+ WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
+
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
+ void *blob_addr)
+{
+ u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
+ struct gaudi_device *gaudi = hdev->asic_specific;
+ u64 dma_offset, qm_offset;
+ dma_addr_t dma_addr;
+ void *kernel_addr;
+ bool is_eng_idle;
+ int rc = 0, dma_id;
+
+ kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
+ hdev, SZ_2M,
+ &dma_addr,
+ GFP_KERNEL | __GFP_ZERO);
+
+ if (!kernel_addr)
+ return -ENOMEM;
+
+ mutex_lock(&gaudi->clk_gate_mutex);
+
+ hdev->asic_funcs->disable_clock_gating(hdev);
+
+ hdev->asic_funcs->hw_queues_lock(hdev);
+
+ dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
+ dma_offset = dma_id * DMA_CORE_OFFSET;
+ qm_offset = dma_id * DMA_QMAN_OFFSET;
+ dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
+ is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
+
+ if (!is_eng_idle) {
+ dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
+ dma_offset = dma_id * DMA_CORE_OFFSET;
+ qm_offset = dma_id * DMA_QMAN_OFFSET;
+ dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
+ is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
+
+ if (!is_eng_idle) {
+ dev_err_ratelimited(hdev->dev,
+ "Can't read via DMA because it is BUSY\n");
+ rc = -EAGAIN;
+ goto out;
+ }
+ }
+
+ cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
+ WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
+ 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
+
+ /* TODO: remove this by mapping the DMA temporary buffer to the MMU
+ * using the compute ctx ASID, if exists. If not, use the kernel ctx
+ * ASID
+ */
+ WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
+
+ /* Verify DMA is OK */
+ err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
+ if (err_cause) {
+ dev_dbg(hdev->dev,
+ "Clearing DMA0 engine from errors (cause 0x%x)\n",
+ err_cause);
+ WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
+ }
+
+ pos = 0;
+ size_left = size;
+ size_to_dma = SZ_2M;
+
+ while (size_left > 0) {
+
+ if (size_left < SZ_2M)
+ size_to_dma = size_left;
+
+ rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
+ dma_addr);
+ if (rc)
+ break;
+
+ memcpy(blob_addr + pos, kernel_addr, size_to_dma);
+
+ if (size_left <= SZ_2M)
+ break;
+
+ pos += SZ_2M;
+ addr += SZ_2M;
+ size_left -= SZ_2M;
+ }
+
+ /* TODO: remove this by mapping the DMA temporary buffer to the MMU
+ * using the compute ctx ASID, if exists. If not, use the kernel ctx
+ * ASID
+ */
+ WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
+ ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
+
+ WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
+
+out:
+ hdev->asic_funcs->hw_queues_unlock(hdev);
+
+ hdev->asic_funcs->set_clock_gating(hdev);
+
+ mutex_unlock(&gaudi->clk_gate_mutex);
+
+ hdev->asic_funcs->asic_dma_free_coherent(hdev, SZ_2M, kernel_addr,
+ dma_addr);
+
+ return rc;
+}
+
static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
{
struct gaudi_device *gaudi = hdev->asic_specific;
@@ -6851,7 +7107,8 @@ static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
}
/* Write 1 clear errors */
- WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
+ if (!hdev->stop_on_err)
+ WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
}
arb_err_val = RREG32(arb_err_addr);
@@ -7097,6 +7354,15 @@ static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
}
}
+static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
+ struct cpucp_pkt_sync_err *sync_err)
+{
+ struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
+
+ dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
+ sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
+}
+
static int gaudi_soft_reset_late_init(struct hl_device *hdev)
{
struct gaudi_device *gaudi = hdev->asic_specific;
@@ -7371,18 +7637,14 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
case GAUDI_EVENT_MMU_DERR:
gaudi_print_irq_info(hdev, event_type, true);
gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
- if (hdev->hard_reset_on_fw_events)
- hl_device_reset(hdev, true, false);
- break;
+ goto reset_device;
case GAUDI_EVENT_GIC500:
case GAUDI_EVENT_AXI_ECC:
case GAUDI_EVENT_L2_RAM_ECC:
case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
gaudi_print_irq_info(hdev, event_type, false);
- if (hdev->hard_reset_on_fw_events)
- hl_device_reset(hdev, true, false);
- break;
+ goto reset_device;
case GAUDI_EVENT_HBM0_SPI_0:
case GAUDI_EVENT_HBM1_SPI_0:
@@ -7392,9 +7654,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
gaudi_hbm_read_interrupts(hdev,
gaudi_hbm_event_to_dev(event_type),
&eq_entry->hbm_ecc_data);
- if (hdev->hard_reset_on_fw_events)
- hl_device_reset(hdev, true, false);
- break;
+ goto reset_device;
case GAUDI_EVENT_HBM0_SPI_1:
case GAUDI_EVENT_HBM1_SPI_1:
@@ -7423,8 +7683,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
dev_err(hdev->dev, "hard reset required due to %s\n",
gaudi_irq_map_table[event_type].name);
- if (hdev->hard_reset_on_fw_events)
- hl_device_reset(hdev, true, false);
+ goto reset_device;
} else {
hl_fw_unmask_irq(hdev, event_type);
}
@@ -7446,8 +7705,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
dev_err(hdev->dev, "hard reset required due to %s\n",
gaudi_irq_map_table[event_type].name);
- if (hdev->hard_reset_on_fw_events)
- hl_device_reset(hdev, true, false);
+ goto reset_device;
} else {
hl_fw_unmask_irq(hdev, event_type);
}
@@ -7516,9 +7774,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
case GAUDI_EVENT_RAZWI_OR_ADC_SW:
gaudi_print_irq_info(hdev, event_type, true);
- if (hdev->hard_reset_on_fw_events)
- hl_device_reset(hdev, true, false);
- break;
+ goto reset_device;
case GAUDI_EVENT_TPC0_BMON_SPMU:
case GAUDI_EVENT_TPC1_BMON_SPMU:
@@ -7552,11 +7808,28 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
event_type, cause);
break;
+ case GAUDI_EVENT_DEV_RESET_REQ:
+ gaudi_print_irq_info(hdev, event_type, false);
+ goto reset_device;
+
+ case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
+ gaudi_print_irq_info(hdev, event_type, false);
+ gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
+ goto reset_device;
+
default:
dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
event_type);
break;
}
+
+ return;
+
+reset_device:
+ if (hdev->hard_reset_on_fw_events)
+ hl_device_reset(hdev, HL_RESET_HARD);
+ else
+ hl_fw_unmask_irq(hdev, event_type);
}
static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
@@ -7607,7 +7880,7 @@ static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
if (rc) {
dev_err_ratelimited(hdev->dev,
"MMU cache invalidation timeout\n");
- hl_device_reset(hdev, true, false);
+ hl_device_reset(hdev, HL_RESET_HARD);
}
return rc;
@@ -7656,7 +7929,7 @@ static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
if (rc) {
dev_err_ratelimited(hdev->dev,
"MMU cache invalidation timeout\n");
- hl_device_reset(hdev, true, false);
+ hl_device_reset(hdev, HL_RESET_HARD);
}
return rc;
@@ -7714,7 +7987,7 @@ static int gaudi_cpucp_info_get(struct hl_device *hdev)
if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
return 0;
- rc = hl_fw_cpucp_info_get(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0);
+ rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0);
if (rc)
return rc;
@@ -7724,10 +7997,7 @@ static int gaudi_cpucp_info_get(struct hl_device *hdev)
hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
- if (hdev->card_type == cpucp_card_type_pci)
- prop->max_power_default = MAX_POWER_DEFAULT_PCI;
- else if (hdev->card_type == cpucp_card_type_pmc)
- prop->max_power_default = MAX_POWER_DEFAULT_PMC;
+ set_default_power_values(hdev);
hdev->max_power = prop->max_power_default;
@@ -8549,6 +8819,7 @@ static const struct hl_asic_funcs gaudi_funcs = {
.debugfs_write32 = gaudi_debugfs_write32,
.debugfs_read64 = gaudi_debugfs_read64,
.debugfs_write64 = gaudi_debugfs_write64,
+ .debugfs_read_dma = gaudi_debugfs_read_dma,
.add_device_attr = gaudi_add_device_attr,
.handle_eqe = gaudi_handle_eqe,
.set_pll_profile = gaudi_set_pll_profile,
diff --git a/drivers/misc/habanalabs/gaudi/gaudiP.h b/drivers/misc/habanalabs/gaudi/gaudiP.h
index 50bb4ad570fd..5929be81ec23 100644
--- a/drivers/misc/habanalabs/gaudi/gaudiP.h
+++ b/drivers/misc/habanalabs/gaudi/gaudiP.h
@@ -47,6 +47,9 @@
#define MAX_POWER_DEFAULT_PCI 200000 /* 200W */
#define MAX_POWER_DEFAULT_PMC 350000 /* 350W */
+#define DC_POWER_DEFAULT_PCI 60000 /* 60W */
+#define DC_POWER_DEFAULT_PMC 60000 /* 60W */
+
#define GAUDI_CPU_TIMEOUT_USEC 30000000 /* 30s */
#define TPC_ENABLED_MASK 0xFF
diff --git a/drivers/misc/habanalabs/gaudi/gaudi_security.c b/drivers/misc/habanalabs/gaudi/gaudi_security.c
index 7085f45814ae..9a706c5980ef 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi_security.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi_security.c
@@ -9556,7 +9556,6 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
mask = 1U << ((mmTPC0_CFG_PROT & 0x7F) >> 2);
mask |= 1U << ((mmTPC0_CFG_VFLAGS & 0x7F) >> 2);
mask |= 1U << ((mmTPC0_CFG_SFLAGS & 0x7F) >> 2);
- mask |= 1U << ((mmTPC0_CFG_STATUS & 0x7F) >> 2);
mask |= 1U << ((mmTPC0_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
mask |= 1U << ((mmTPC0_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
mask |= 1U << ((mmTPC0_CFG_TPC_STALL & 0x7F) >> 2);
@@ -10011,7 +10010,6 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
mask = 1U << ((mmTPC1_CFG_PROT & 0x7F) >> 2);
mask |= 1U << ((mmTPC1_CFG_VFLAGS & 0x7F) >> 2);
mask |= 1U << ((mmTPC1_CFG_SFLAGS & 0x7F) >> 2);
- mask |= 1U << ((mmTPC1_CFG_STATUS & 0x7F) >> 2);
mask |= 1U << ((mmTPC1_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
mask |= 1U << ((mmTPC1_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
mask |= 1U << ((mmTPC1_CFG_TPC_STALL & 0x7F) >> 2);
@@ -10465,7 +10463,6 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
mask = 1U << ((mmTPC2_CFG_PROT & 0x7F) >> 2);
mask |= 1U << ((mmTPC2_CFG_VFLAGS & 0x7F) >> 2);
mask |= 1U << ((mmTPC2_CFG_SFLAGS & 0x7F) >> 2);
- mask |= 1U << ((mmTPC2_CFG_STATUS & 0x7F) >> 2);
mask |= 1U << ((mmTPC2_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
mask |= 1U << ((mmTPC2_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
mask |= 1U << ((mmTPC2_CFG_TPC_STALL & 0x7F) >> 2);
@@ -10919,7 +10916,6 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
mask = 1U << ((mmTPC3_CFG_PROT & 0x7F) >> 2);
mask |= 1U << ((mmTPC3_CFG_VFLAGS & 0x7F) >> 2);
mask |= 1U << ((mmTPC3_CFG_SFLAGS & 0x7F) >> 2);
- mask |= 1U << ((mmTPC3_CFG_STATUS & 0x7F) >> 2);
mask |= 1U << ((mmTPC3_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
mask |= 1U << ((mmTPC3_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
mask |= 1U << ((mmTPC3_CFG_TPC_STALL & 0x7F) >> 2);
@@ -11373,7 +11369,6 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
mask = 1U << ((mmTPC4_CFG_PROT & 0x7F) >> 2);
mask |= 1U << ((mmTPC4_CFG_VFLAGS & 0x7F) >> 2);
mask |= 1U << ((mmTPC4_CFG_SFLAGS & 0x7F) >> 2);
- mask |= 1U << ((mmTPC4_CFG_STATUS & 0x7F) >> 2);
mask |= 1U << ((mmTPC4_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
mask |= 1U << ((mmTPC4_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
mask |= 1U << ((mmTPC4_CFG_TPC_STALL & 0x7F) >> 2);
@@ -11827,7 +11822,6 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
mask = 1U << ((mmTPC5_CFG_PROT & 0x7F) >> 2);
mask |= 1U << ((mmTPC5_CFG_VFLAGS & 0x7F) >> 2);
mask |= 1U << ((mmTPC5_CFG_SFLAGS & 0x7F) >> 2);
- mask |= 1U << ((mmTPC5_CFG_STATUS & 0x7F) >> 2);
mask |= 1U << ((mmTPC5_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
mask |= 1U << ((mmTPC5_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
mask |= 1U << ((mmTPC5_CFG_TPC_STALL & 0x7F) >> 2);
@@ -12283,7 +12277,6 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
mask = 1U << ((mmTPC6_CFG_PROT & 0x7F) >> 2);
mask |= 1U << ((mmTPC6_CFG_VFLAGS & 0x7F) >> 2);
mask |= 1U << ((mmTPC6_CFG_SFLAGS & 0x7F) >> 2);
- mask |= 1U << ((mmTPC6_CFG_STATUS & 0x7F) >> 2);
mask |= 1U << ((mmTPC6_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
mask |= 1U << ((mmTPC6_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
mask |= 1U << ((mmTPC6_CFG_TPC_STALL & 0x7F) >> 2);
@@ -12739,7 +12732,6 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
mask = 1U << ((mmTPC7_CFG_PROT & 0x7F) >> 2);
mask |= 1U << ((mmTPC7_CFG_VFLAGS & 0x7F) >> 2);
mask |= 1U << ((mmTPC7_CFG_SFLAGS & 0x7F) >> 2);
- mask |= 1U << ((mmTPC7_CFG_STATUS & 0x7F) >> 2);
mask |= 1U << ((mmTPC7_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
mask |= 1U << ((mmTPC7_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
mask |= 1U << ((mmTPC7_CFG_TPC_STALL & 0x7F) >> 2);
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index ed566c52ccaa..e27338f4aad2 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -118,6 +118,29 @@
#define IS_MME_IDLE(mme_arch_sts) \
(((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
+/*
+ * this enum kept here for compatibility with old FW (in which each asic has
+ * unique PLL numbering
+ */
+enum goya_pll_index {
+ GOYA_CPU_PLL = 0,
+ GOYA_IC_PLL,
+ GOYA_MC_PLL,
+ GOYA_MME_PLL,
+ GOYA_PCI_PLL,
+ GOYA_EMMC_PLL,
+ GOYA_TPC_PLL,
+};
+
+static enum pll_index goya_pll_map[PLL_MAX] = {
+ [CPU_PLL] = GOYA_CPU_PLL,
+ [IC_PLL] = GOYA_IC_PLL,
+ [MC_PLL] = GOYA_MC_PLL,
+ [MME_PLL] = GOYA_MME_PLL,
+ [PCI_PLL] = GOYA_PCI_PLL,
+ [EMMC_PLL] = GOYA_EMMC_PLL,
+ [TPC_PLL] = GOYA_TPC_PLL,
+};
static const char goya_irq_name[GOYA_MSIX_ENTRIES][GOYA_MAX_STRING_LEN] = {
"goya cq 0", "goya cq 1", "goya cq 2", "goya cq 3",
@@ -446,6 +469,7 @@ int goya_get_fixed_properties(struct hl_device *hdev)
prop->cb_pool_cb_cnt = GOYA_CB_POOL_CB_CNT;
prop->cb_pool_cb_size = GOYA_CB_POOL_CB_SIZE;
prop->max_power_default = MAX_POWER_DEFAULT;
+ prop->dc_power_default = DC_POWER_DEFAULT;
prop->tpc_enabled_mask = TPC_ENABLED_MASK;
prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
@@ -460,8 +484,6 @@ int goya_get_fixed_properties(struct hl_device *hdev)
for (i = 0 ; i < HL_MAX_DCORES ; i++)
prop->first_available_cq[i] = USHRT_MAX;
- /* disable fw security for now, set it in a later stage */
- prop->fw_security_disabled = true;
prop->fw_security_status_valid = false;
prop->hard_reset_done_by_fw = false;
@@ -533,6 +555,11 @@ static int goya_init_iatu(struct hl_device *hdev)
struct hl_outbound_pci_region outbound_region;
int rc;
+ if (hdev->asic_prop.iatu_done_by_fw) {
+ hdev->asic_funcs->set_dma_mask_from_fw(hdev);
+ return 0;
+ }
+
/* Inbound Region 0 - Bar 0 - Point to SRAM and CFG */
inbound_region.mode = PCI_BAR_MATCH_MODE;
inbound_region.bar = SRAM_CFG_BAR_ID;
@@ -580,7 +607,7 @@ static int goya_early_init(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct pci_dev *pdev = hdev->pdev;
- u32 val;
+ u32 fw_boot_status, val;
int rc;
rc = goya_get_fixed_properties(hdev);
@@ -614,6 +641,23 @@ static int goya_early_init(struct hl_device *hdev)
prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID);
+ /* If FW security is enabled at this point it means no access to ELBI */
+ if (!hdev->asic_prop.fw_security_disabled) {
+ hdev->asic_prop.iatu_done_by_fw = true;
+ goto pci_init;
+ }
+
+ rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
+ &fw_boot_status);
+ if (rc)
+ goto free_queue_props;
+
+ /* Check whether FW is configuring iATU */
+ if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
+ (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
+ hdev->asic_prop.iatu_done_by_fw = true;
+
+pci_init:
rc = hl_pci_init(hdev);
if (rc)
goto free_queue_props;
@@ -853,6 +897,9 @@ static int goya_sw_init(struct hl_device *hdev)
hdev->asic_specific = goya;
+ /* store legacy PLL map */
+ hdev->legacy_pll_map = goya_pll_map;
+
/* Create DMA pool for small allocations */
hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
&hdev->pdev->dev, GOYA_DMA_POOL_BLK_SIZE, 8, 0);
@@ -2429,7 +2476,7 @@ static int goya_init_cpu(struct hl_device *hdev)
struct goya_device *goya = hdev->asic_specific;
int rc;
- if (!hdev->cpu_enable)
+ if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
return 0;
if (goya->hw_cap_initialized & HW_CAP_CPU)
@@ -3221,7 +3268,7 @@ static int goya_pin_memory_before_cs(struct hl_device *hdev,
parser->job_userptr_list, &userptr))
goto already_pinned;
- userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC);
+ userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
if (!userptr)
return -ENOMEM;
@@ -4101,12 +4148,15 @@ static void goya_clear_sm_regs(struct hl_device *hdev)
* lead to undefined behavior and therefore, should be done with extreme care
*
*/
-static int goya_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
+static int goya_debugfs_read32(struct hl_device *hdev, u64 addr,
+ bool user_address, u32 *val)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
- u64 ddr_bar_addr;
+ u64 ddr_bar_addr, host_phys_end;
int rc = 0;
+ host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
+
if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
*val = RREG32(addr - CFG_BASE);
@@ -4132,6 +4182,10 @@ static int goya_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
if (ddr_bar_addr == U64_MAX)
rc = -EIO;
+ } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
+ user_address && !iommu_present(&pci_bus_type)) {
+ *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
+
} else {
rc = -EFAULT;
}
@@ -4154,12 +4208,15 @@ static int goya_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
* lead to undefined behavior and therefore, should be done with extreme care
*
*/
-static int goya_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
+static int goya_debugfs_write32(struct hl_device *hdev, u64 addr,
+ bool user_address, u32 val)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
- u64 ddr_bar_addr;
+ u64 ddr_bar_addr, host_phys_end;
int rc = 0;
+ host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
+
if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
WREG32(addr - CFG_BASE, val);
@@ -4185,6 +4242,10 @@ static int goya_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
if (ddr_bar_addr == U64_MAX)
rc = -EIO;
+ } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
+ user_address && !iommu_present(&pci_bus_type)) {
+ *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
+
} else {
rc = -EFAULT;
}
@@ -4192,12 +4253,15 @@ static int goya_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
return rc;
}
-static int goya_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
+static int goya_debugfs_read64(struct hl_device *hdev, u64 addr,
+ bool user_address, u64 *val)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
- u64 ddr_bar_addr;
+ u64 ddr_bar_addr, host_phys_end;
int rc = 0;
+ host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
+
if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
u32 val_l = RREG32(addr - CFG_BASE);
u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
@@ -4227,6 +4291,10 @@ static int goya_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
if (ddr_bar_addr == U64_MAX)
rc = -EIO;
+ } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
+ user_address && !iommu_present(&pci_bus_type)) {
+ *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
+
} else {
rc = -EFAULT;
}
@@ -4234,12 +4302,15 @@ static int goya_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
return rc;
}
-static int goya_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
+static int goya_debugfs_write64(struct hl_device *hdev, u64 addr,
+ bool user_address, u64 val)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
- u64 ddr_bar_addr;
+ u64 ddr_bar_addr, host_phys_end;
int rc = 0;
+ host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
+
if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
WREG32(addr - CFG_BASE, lower_32_bits(val));
WREG32(addr + sizeof(u32) - CFG_BASE, upper_32_bits(val));
@@ -4267,6 +4338,10 @@ static int goya_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
if (ddr_bar_addr == U64_MAX)
rc = -EIO;
+ } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
+ user_address && !iommu_present(&pci_bus_type)) {
+ *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
+
} else {
rc = -EFAULT;
}
@@ -4274,6 +4349,13 @@ static int goya_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
return rc;
}
+static int goya_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
+ void *blob_addr)
+{
+ dev_err(hdev->dev, "Reading via DMA is unimplemented yet\n");
+ return -EPERM;
+}
+
static u64 goya_read_pte(struct hl_device *hdev, u64 addr)
{
struct goya_device *goya = hdev->asic_specific;
@@ -4401,6 +4483,8 @@ static const char *_goya_get_event_desc(u16 event_type)
return "THERMAL_ENV_S";
case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
return "THERMAL_ENV_E";
+ case GOYA_ASYNC_EVENT_PKT_QUEUE_OUT_SYNC:
+ return "QUEUE_OUT_OF_SYNC";
default:
return "N/A";
}
@@ -4483,6 +4567,9 @@ static void goya_get_event_desc(u16 event_type, char *desc, size_t size)
index = event_type - GOYA_ASYNC_EVENT_ID_DMA_BM_CH0;
snprintf(desc, size, _goya_get_event_desc(event_type), index);
break;
+ case GOYA_ASYNC_EVENT_PKT_QUEUE_OUT_SYNC:
+ snprintf(desc, size, _goya_get_event_desc(event_type));
+ break;
default:
snprintf(desc, size, _goya_get_event_desc(event_type));
break;
@@ -4534,6 +4621,15 @@ static void goya_print_mmu_error_info(struct hl_device *hdev)
}
}
+static void goya_print_out_of_sync_info(struct hl_device *hdev,
+ struct cpucp_pkt_sync_err *sync_err)
+{
+ struct hl_hw_queue *q = &hdev->kernel_queues[GOYA_QUEUE_ID_CPU_PQ];
+
+ dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
+ sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
+}
+
static void goya_print_irq_info(struct hl_device *hdev, u16 event_type,
bool razwi)
{
@@ -4698,7 +4794,7 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
goya_print_irq_info(hdev, event_type, false);
if (hdev->hard_reset_on_fw_events)
- hl_device_reset(hdev, true, false);
+ hl_device_reset(hdev, HL_RESET_HARD);
break;
case GOYA_ASYNC_EVENT_ID_PCIE_DEC:
@@ -4754,6 +4850,15 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
goya_unmask_irq(hdev, event_type);
break;
+ case GOYA_ASYNC_EVENT_PKT_QUEUE_OUT_SYNC:
+ goya_print_irq_info(hdev, event_type, false);
+ goya_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
+ if (hdev->hard_reset_on_fw_events)
+ hl_device_reset(hdev, HL_RESET_HARD);
+ else
+ hl_fw_unmask_irq(hdev, event_type);
+ break;
+
default:
dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
event_type);
@@ -5083,7 +5188,7 @@ static int goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
if (rc) {
dev_err_ratelimited(hdev->dev,
"MMU cache invalidation timeout\n");
- hl_device_reset(hdev, true, false);
+ hl_device_reset(hdev, HL_RESET_HARD);
}
return rc;
@@ -5134,7 +5239,7 @@ static int goya_mmu_invalidate_cache_range(struct hl_device *hdev,
if (rc) {
dev_err_ratelimited(hdev->dev,
"MMU cache invalidation timeout\n");
- hl_device_reset(hdev, true, false);
+ hl_device_reset(hdev, HL_RESET_HARD);
}
return rc;
@@ -5160,7 +5265,7 @@ int goya_cpucp_info_get(struct hl_device *hdev)
if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
return 0;
- rc = hl_fw_cpucp_info_get(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0);
+ rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0);
if (rc)
return rc;
@@ -5443,6 +5548,7 @@ static const struct hl_asic_funcs goya_funcs = {
.debugfs_write32 = goya_debugfs_write32,
.debugfs_read64 = goya_debugfs_read64,
.debugfs_write64 = goya_debugfs_write64,
+ .debugfs_read_dma = goya_debugfs_read_dma,
.add_device_attr = goya_add_device_attr,
.handle_eqe = goya_handle_eqe,
.set_pll_profile = goya_set_pll_profile,
diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h
index 23fe099ed218..ef8c6c8b5c8d 100644
--- a/drivers/misc/habanalabs/goya/goyaP.h
+++ b/drivers/misc/habanalabs/goya/goyaP.h
@@ -49,6 +49,8 @@
#define MAX_POWER_DEFAULT 200000 /* 200W */
+#define DC_POWER_DEFAULT 20000 /* 20W */
+
#define DRAM_PHYS_DEFAULT_SIZE 0x100000000ull /* 4GB */
#define GOYA_DEFAULT_CARD_NAME "HL1000"
diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h
index b77c1c16c32c..27cd0ba99aa3 100644
--- a/drivers/misc/habanalabs/include/common/cpucp_if.h
+++ b/drivers/misc/habanalabs/include/common/cpucp_if.h
@@ -11,6 +11,8 @@
#include <linux/types.h>
#include <linux/if_ether.h>
+#include "hl_boot_if.h"
+
#define NUM_HBM_PSEUDO_CH 2
#define NUM_HBM_CH_PER_DEV 8
#define CPUCP_PKT_HBM_ECC_INFO_WR_PAR_SHIFT 0
@@ -28,6 +30,17 @@
#define CPUCP_PKT_HBM_ECC_INFO_HBM_CH_SHIFT 6
#define CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK 0x000007C0
+#define PLL_MAP_MAX_BITS 128
+#define PLL_MAP_LEN (PLL_MAP_MAX_BITS / 8)
+
+/*
+ * info of the pkt queue pointers in the first async occurrence
+ */
+struct cpucp_pkt_sync_err {
+ __le32 pi;
+ __le32 ci;
+};
+
struct hl_eq_hbm_ecc_data {
/* SERR counter */
__le32 sec_cnt;
@@ -77,6 +90,7 @@ struct hl_eq_entry {
struct hl_eq_ecc_data ecc_data;
struct hl_eq_hbm_ecc_data hbm_ecc_data;
struct hl_eq_sm_sei_data sm_sei_data;
+ struct cpucp_pkt_sync_err pkt_sync_err;
__le64 data[7];
};
};
@@ -287,6 +301,30 @@ enum pq_init_status {
* The result is composed of 4 outputs, each is 16-bit
* frequency in MHz.
*
+ * CPUCP_PACKET_POWER_GET
+ * Fetch the present power consumption of the device (Current * Voltage).
+ *
+ * CPUCP_PACKET_NIC_PFC_SET -
+ * Enable/Disable the NIC PFC feature. The packet's arguments specify the
+ * NIC port, relevant lanes to configure and one bit indication for
+ * enable/disable.
+ *
+ * CPUCP_PACKET_NIC_FAULT_GET -
+ * Fetch the current indication for local/remote faults from the NIC MAC.
+ * The result is 32-bit value of the relevant register.
+ *
+ * CPUCP_PACKET_NIC_LPBK_SET -
+ * Enable/Disable the MAC loopback feature. The packet's arguments specify
+ * the NIC port, relevant lanes to configure and one bit indication for
+ * enable/disable.
+ *
+ * CPUCP_PACKET_NIC_MAC_INIT -
+ * Configure the NIC MAC channels. The packet's arguments specify the
+ * NIC port and the speed.
+ *
+ * CPUCP_PACKET_MSI_INFO_SET -
+ * set the index number for each supported msi type going from
+ * host to device
*/
enum cpucp_packet_id {
@@ -320,6 +358,13 @@ enum cpucp_packet_id {
CPUCP_PACKET_PCIE_REPLAY_CNT_GET, /* internal */
CPUCP_PACKET_TOTAL_ENERGY_GET, /* internal */
CPUCP_PACKET_PLL_INFO_GET, /* internal */
+ CPUCP_PACKET_NIC_STATUS, /* internal */
+ CPUCP_PACKET_POWER_GET, /* internal */
+ CPUCP_PACKET_NIC_PFC_SET, /* internal */
+ CPUCP_PACKET_NIC_FAULT_GET, /* internal */
+ CPUCP_PACKET_NIC_LPBK_SET, /* internal */
+ CPUCP_PACKET_NIC_MAC_CFG, /* internal */
+ CPUCP_PACKET_MSI_INFO_SET, /* internal */
};
#define CPUCP_PACKET_FENCE_VAL 0xFE8CE7A5
@@ -391,6 +436,12 @@ struct cpucp_unmask_irq_arr_packet {
__le32 irqs[0];
};
+struct cpucp_array_data_packet {
+ struct cpucp_packet cpucp_pkt;
+ __le32 length;
+ __le32 data[0];
+};
+
enum cpucp_packet_rc {
cpucp_packet_success,
cpucp_packet_invalid,
@@ -459,6 +510,51 @@ enum cpucp_pll_type_attributes {
cpucp_pll_pci,
};
+/*
+ * MSI type enumeration table for all ASICs and future SW versions.
+ * For future ASIC-LKD compatibility, we can only add new enumerations.
+ * at the end of the table (before CPUCP_NUM_OF_MSI_TYPES).
+ * Changing the order of entries or removing entries is not allowed.
+ */
+enum cpucp_msi_type {
+ CPUCP_EVENT_QUEUE_MSI_TYPE,
+ CPUCP_NIC_PORT1_MSI_TYPE,
+ CPUCP_NIC_PORT3_MSI_TYPE,
+ CPUCP_NIC_PORT5_MSI_TYPE,
+ CPUCP_NIC_PORT7_MSI_TYPE,
+ CPUCP_NIC_PORT9_MSI_TYPE,
+ CPUCP_NUM_OF_MSI_TYPES
+};
+
+/*
+ * PLL enumeration table used for all ASICs and future SW versions.
+ * For future ASIC-LKD compatibility, we can only add new enumerations.
+ * at the end of the table.
+ * Changing the order of entries or removing entries is not allowed.
+ */
+enum pll_index {
+ CPU_PLL = 0,
+ PCI_PLL = 1,
+ NIC_PLL = 2,
+ DMA_PLL = 3,
+ MESH_PLL = 4,
+ MME_PLL = 5,
+ TPC_PLL = 6,
+ IF_PLL = 7,
+ SRAM_PLL = 8,
+ NS_PLL = 9,
+ HBM_PLL = 10,
+ MSS_PLL = 11,
+ DDR_PLL = 12,
+ VID_PLL = 13,
+ BANK_PLL = 14,
+ MMU_PLL = 15,
+ IC_PLL = 16,
+ MC_PLL = 17,
+ EMMC_PLL = 18,
+ PLL_MAX
+};
+
/* Event Queue Packets */
struct eq_generic_event {
@@ -470,7 +566,6 @@ struct eq_generic_event {
*/
#define CARD_NAME_MAX_LEN 16
-#define VERSION_MAX_LEN 128
#define CPUCP_MAX_SENSORS 128
#define CPUCP_MAX_NICS 128
#define CPUCP_LANES_PER_NIC 4
@@ -533,6 +628,7 @@ struct cpucp_security_info {
* @dram_size: available DRAM size.
* @card_name: card name that will be displayed in HWMON subsystem on the host
* @sec_info: security information
+ * @pll_map: Bit map of supported PLLs for current ASIC version.
*/
struct cpucp_info {
struct cpucp_sensor sensors[CPUCP_MAX_SENSORS];
@@ -554,6 +650,7 @@ struct cpucp_info {
__u8 pad[7];
struct cpucp_security_info sec_info;
__le32 reserved6;
+ __u8 pll_map[PLL_MAP_LEN];
};
struct cpucp_mac_addr {
diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h
index e87f5a98e193..e0a259e0495c 100644
--- a/drivers/misc/habanalabs/include/common/hl_boot_if.h
+++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h
@@ -13,6 +13,8 @@
#define BOOT_FIT_SRAM_OFFSET 0x200000
+#define VERSION_MAX_LEN 128
+
/*
* CPU error bits in BOOT_ERROR registers
*
@@ -73,6 +75,9 @@
* CPU_BOOT_ERR0_PLL_FAIL PLL settings failed, meaning that one
* of the PLLs remains in REF_CLK
*
+ * CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL Device is unusable and customer support
+ * should be contacted.
+ *
* CPU_BOOT_ERR0_ENABLED Error registers enabled.
* This is a main indication that the
* running FW populates the error
@@ -92,6 +97,7 @@
#define CPU_BOOT_ERR0_PRI_IMG_VER_FAIL (1 << 10)
#define CPU_BOOT_ERR0_SEC_IMG_VER_FAIL (1 << 11)
#define CPU_BOOT_ERR0_PLL_FAIL (1 << 12)
+#define CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL (1 << 13)
#define CPU_BOOT_ERR0_ENABLED (1 << 31)
/*
@@ -170,6 +176,20 @@
* is set to the PI counter.
* Initialized in: linux
*
+ * CPU_BOOT_DEV_STS0_FW_LD_COM_EN Flexible FW loading communication
+ * protocol is enabled.
+ * Initialized in: preboot
+ *
+ * CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN FW iATU configuration is enabled.
+ * This bit if set, means the iATU has been
+ * configured and is ready for use.
+ * Initialized in: ppboot
+ *
+ * CPU_BOOT_DEV_STS0_DYN_PLL_EN Dynamic PLL configuration is enabled.
+ * FW sends to host a bitmap of supported
+ * PLLs.
+ * Initialized in: linux
+ *
* CPU_BOOT_DEV_STS0_ENABLED Device status register enabled.
* This is a main indication that the
* running FW populates the device status
@@ -195,6 +215,9 @@
#define CPU_BOOT_DEV_STS0_CLK_GATE_EN (1 << 13)
#define CPU_BOOT_DEV_STS0_HBM_ECC_EN (1 << 14)
#define CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN (1 << 15)
+#define CPU_BOOT_DEV_STS0_FW_LD_COM_EN (1 << 16)
+#define CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN (1 << 17)
+#define CPU_BOOT_DEV_STS0_DYN_PLL_EN (1 << 19)
#define CPU_BOOT_DEV_STS0_ENABLED (1 << 31)
enum cpu_boot_status {
@@ -230,6 +253,7 @@ enum kmd_msg {
KMD_MSG_SKIP_BMC,
RESERVED,
KMD_MSG_RST_DEV,
+ KMD_MSG_LAST
};
enum cpu_msg_status {
@@ -238,4 +262,199 @@ enum cpu_msg_status {
CPU_MSG_ERR,
};
+/* communication registers mapping - consider ABI when changing */
+struct cpu_dyn_regs {
+ uint32_t cpu_pq_base_addr_low;
+ uint32_t cpu_pq_base_addr_high;
+ uint32_t cpu_pq_length;
+ uint32_t cpu_pq_init_status;
+ uint32_t cpu_eq_base_addr_low;
+ uint32_t cpu_eq_base_addr_high;
+ uint32_t cpu_eq_length;
+ uint32_t cpu_eq_ci;
+ uint32_t cpu_cq_base_addr_low;
+ uint32_t cpu_cq_base_addr_high;
+ uint32_t cpu_cq_length;
+ uint32_t cpu_pf_pq_pi;
+ uint32_t cpu_boot_dev_sts0;
+ uint32_t cpu_boot_dev_sts1;
+ uint32_t cpu_boot_err0;
+ uint32_t cpu_boot_err1;
+ uint32_t cpu_boot_status;
+ uint32_t fw_upd_sts;
+ uint32_t fw_upd_cmd;
+ uint32_t fw_upd_pending_sts;
+ uint32_t fuse_ver_offset;
+ uint32_t preboot_ver_offset;
+ uint32_t uboot_ver_offset;
+ uint32_t hw_state;
+ uint32_t kmd_msg_to_cpu;
+ uint32_t cpu_cmd_status_to_host;
+ uint32_t reserved1[32]; /* reserve for future use */
+};
+
+/* HCDM - Habana Communications Descriptor Magic */
+#define HL_COMMS_DESC_MAGIC 0x4843444D
+#define HL_COMMS_DESC_VER 1
+
+/* this is the comms descriptor header - meta data */
+struct comms_desc_header {
+ uint32_t magic; /* magic for validation */
+ uint32_t crc32; /* CRC32 of the descriptor w/o header */
+ uint16_t size; /* size of the descriptor w/o header */
+ uint8_t version; /* descriptor version */
+ uint8_t reserved[5]; /* pad to 64 bit */
+};
+
+/* this is the main FW descriptor - consider ABI when changing */
+struct lkd_fw_comms_desc {
+ struct comms_desc_header header;
+ struct cpu_dyn_regs cpu_dyn_regs;
+ char fuse_ver[VERSION_MAX_LEN];
+ char cur_fw_ver[VERSION_MAX_LEN];
+ /* can be used for 1 more version w/o ABI change */
+ char reserved0[VERSION_MAX_LEN];
+ uint64_t img_addr; /* address for next FW component load */
+};
+
+/*
+ * LKD commands:
+ *
+ * COMMS_NOOP Used to clear the command register and no actual
+ * command is send.
+ *
+ * COMMS_CLR_STS Clear status command - FW should clear the
+ * status register. Used for synchronization
+ * between the commands as part of the race free
+ * protocol.
+ *
+ * COMMS_RST_STATE Reset the current communication state which is
+ * kept by FW for proper responses.
+ * Should be used in the beginning of the
+ * communication cycle to clean any leftovers from
+ * previous communication attempts.
+ *
+ * COMMS_PREP_DESC Prepare descriptor for setting up the
+ * communication and other dynamic data:
+ * struct lkd_fw_comms_desc.
+ * This command has a parameter stating the next FW
+ * component size, so the FW can actually prepare a
+ * space for it and in the status response provide
+ * the descriptor offset. The Offset of the next FW
+ * data component is a part of the descriptor
+ * structure.
+ *
+ * COMMS_DATA_RDY The FW data has been uploaded and is ready for
+ * validation.
+ *
+ * COMMS_EXEC Execute the next FW component.
+ *
+ * COMMS_RST_DEV Reset the device.
+ *
+ * COMMS_GOTO_WFE Execute WFE command. Allowed only on non-secure
+ * devices.
+ *
+ * COMMS_SKIP_BMC Perform actions required for BMC-less servers.
+ * Do not wait for BMC response.
+ *
+ * COMMS_LOW_PLL_OPP Initialize PLLs for low OPP.
+ */
+enum comms_cmd {
+ COMMS_NOOP = 0,
+ COMMS_CLR_STS = 1,
+ COMMS_RST_STATE = 2,
+ COMMS_PREP_DESC = 3,
+ COMMS_DATA_RDY = 4,
+ COMMS_EXEC = 5,
+ COMMS_RST_DEV = 6,
+ COMMS_GOTO_WFE = 7,
+ COMMS_SKIP_BMC = 8,
+ COMMS_LOW_PLL_OPP = 9,
+ COMMS_INVLD_LAST
+};
+
+#define COMMS_COMMAND_SIZE_SHIFT 0
+#define COMMS_COMMAND_SIZE_MASK 0x1FFFFFF
+#define COMMS_COMMAND_CMD_SHIFT 27
+#define COMMS_COMMAND_CMD_MASK 0xF8000000
+
+/*
+ * LKD command to FW register structure
+ * @size - FW component size
+ * @cmd - command from enum comms_cmd
+ */
+struct comms_command {
+ union { /* bit fields are only for FW use */
+ struct {
+ unsigned int size :25; /* 32MB max. */
+ unsigned int reserved :2;
+ enum comms_cmd cmd :5; /* 32 commands */
+ };
+ unsigned int val;
+ };
+};
+
+/*
+ * FW status
+ *
+ * COMMS_STS_NOOP Used to clear the status register and no actual
+ * status is provided.
+ *
+ * COMMS_STS_ACK Command has been received and recognized.
+ *
+ * COMMS_STS_OK Command execution has finished successfully.
+ *
+ * COMMS_STS_ERR Command execution was unsuccessful and resulted
+ * in error.
+ *
+ * COMMS_STS_VALID_ERR FW validation has failed.
+ *
+ * COMMS_STS_TIMEOUT_ERR Command execution has timed out.
+ */
+enum comms_sts {
+ COMMS_STS_NOOP = 0,
+ COMMS_STS_ACK = 1,
+ COMMS_STS_OK = 2,
+ COMMS_STS_ERR = 3,
+ COMMS_STS_VALID_ERR = 4,
+ COMMS_STS_TIMEOUT_ERR = 5,
+ COMMS_STS_INVLD_LAST
+};
+
+/* RAM types for FW components loading - defines the base address */
+enum comms_ram_types {
+ COMMS_SRAM = 0,
+ COMMS_DRAM = 1,
+};
+
+#define COMMS_STATUS_OFFSET_SHIFT 0
+#define COMMS_STATUS_OFFSET_MASK 0x03FFFFFF
+#define COMMS_STATUS_OFFSET_ALIGN_SHIFT 2
+#define COMMS_STATUS_RAM_TYPE_SHIFT 26
+#define COMMS_STATUS_RAM_TYPE_MASK 0x0C000000
+#define COMMS_STATUS_STATUS_SHIFT 28
+#define COMMS_STATUS_STATUS_MASK 0xF0000000
+
+/*
+ * FW status to LKD register structure
+ * @offset - an offset from the base of the ram_type shifted right by
+ * 2 bits (always aligned to 32 bits).
+ * Allows a maximum addressable offset of 256MB from RAM base.
+ * Example: for real offset in RAM of 0x800000 (8MB), the value
+ * in offset field is (0x800000 >> 2) = 0x200000.
+ * @ram_type - the RAM type that should be used for offset from
+ * enum comms_ram_types
+ * @status - status from enum comms_sts
+ */
+struct comms_status {
+ union { /* bit fields are only for FW use */
+ struct {
+ unsigned int offset :26;
+ unsigned int ram_type :2;
+ enum comms_sts status :4; /* 16 statuses */
+ };
+ unsigned int val;
+ };
+};
+
#endif /* HL_BOOT_IF_H */
diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi.h b/drivers/misc/habanalabs/include/gaudi/gaudi.h
index f9ea897ae42c..ffae107b1693 100644
--- a/drivers/misc/habanalabs/include/gaudi/gaudi.h
+++ b/drivers/misc/habanalabs/include/gaudi/gaudi.h
@@ -38,7 +38,7 @@
#define QMAN_PQ_ENTRY_SIZE 16 /* Bytes */
-#define MAX_ASID 1024
+#define MAX_ASID 2
#define PROT_BITS_OFFS 0xF80
diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h b/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h
index 49335e8334b4..e8651abf84f2 100644
--- a/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h
+++ b/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h
@@ -303,6 +303,8 @@ enum gaudi_async_event_id {
GAUDI_EVENT_NIC3_QP1 = 619,
GAUDI_EVENT_NIC4_QP0 = 620,
GAUDI_EVENT_NIC4_QP1 = 621,
+ GAUDI_EVENT_DEV_RESET_REQ = 646,
+ GAUDI_EVENT_PKT_QUEUE_OUT_SYNC = 647,
GAUDI_EVENT_FIX_POWER_ENV_S = 658,
GAUDI_EVENT_FIX_POWER_ENV_E = 659,
GAUDI_EVENT_FIX_THERMAL_ENV_S = 660,
diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h b/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h
index 737176ba06fb..3dc79c131805 100644
--- a/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h
+++ b/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h
@@ -301,10 +301,10 @@ static struct gaudi_async_events_ids_map gaudi_irq_map_table[] = {
{ .fc_id = 274, .cpu_id = 128, .valid = 0, .name = "" },
{ .fc_id = 275, .cpu_id = 128, .valid = 0, .name = "" },
{ .fc_id = 276, .cpu_id = 128, .valid = 0, .name = "" },
- { .fc_id = 277, .cpu_id = 129, .valid = 0, .name = "" },
- { .fc_id = 278, .cpu_id = 129, .valid = 0, .name = "" },
- { .fc_id = 279, .cpu_id = 129, .valid = 0, .name = "" },
- { .fc_id = 280, .cpu_id = 129, .valid = 0, .name = "" },
+ { .fc_id = 277, .cpu_id = 129, .valid = 1, .name = "DMA_IF_SEI_0" },
+ { .fc_id = 278, .cpu_id = 129, .valid = 1, .name = "DMA_IF_SEI_1" },
+ { .fc_id = 279, .cpu_id = 129, .valid = 1, .name = "DMA_IF_SEI_2" },
+ { .fc_id = 280, .cpu_id = 129, .valid = 1, .name = "DMA_IF_SEI_3" },
{ .fc_id = 281, .cpu_id = 130, .valid = 0, .name = "" },
{ .fc_id = 282, .cpu_id = 131, .valid = 0, .name = "" },
{ .fc_id = 283, .cpu_id = 132, .valid = 0, .name = "" },
@@ -670,18 +670,29 @@ static struct gaudi_async_events_ids_map gaudi_irq_map_table[] = {
{ .fc_id = 643, .cpu_id = 492, .valid = 0, .name = "" },
{ .fc_id = 644, .cpu_id = 493, .valid = 0, .name = "" },
{ .fc_id = 645, .cpu_id = 494, .valid = 0, .name = "" },
- { .fc_id = 646, .cpu_id = 495, .valid = 0, .name = "" },
- { .fc_id = 647, .cpu_id = 496, .valid = 0, .name = "" },
- { .fc_id = 648, .cpu_id = 497, .valid = 0, .name = "" },
- { .fc_id = 649, .cpu_id = 498, .valid = 0, .name = "" },
- { .fc_id = 650, .cpu_id = 499, .valid = 0, .name = "" },
- { .fc_id = 651, .cpu_id = 500, .valid = 0, .name = "" },
- { .fc_id = 652, .cpu_id = 501, .valid = 0, .name = "" },
- { .fc_id = 653, .cpu_id = 502, .valid = 0, .name = "" },
- { .fc_id = 654, .cpu_id = 503, .valid = 0, .name = "" },
- { .fc_id = 655, .cpu_id = 504, .valid = 0, .name = "" },
- { .fc_id = 656, .cpu_id = 505, .valid = 0, .name = "" },
- { .fc_id = 657, .cpu_id = 506, .valid = 0, .name = "" },
+ { .fc_id = 646, .cpu_id = 495, .valid = 1, .name = "DEV_RESET_REQ" },
+ { .fc_id = 647, .cpu_id = 496, .valid = 1,
+ .name = "PKT_QUEUE_OUT_SYNC" },
+ { .fc_id = 648, .cpu_id = 497, .valid = 1,
+ .name = "STATUS_NIC0_ENG0" },
+ { .fc_id = 649, .cpu_id = 498, .valid = 1,
+ .name = "STATUS_NIC0_ENG1" },
+ { .fc_id = 650, .cpu_id = 499, .valid = 1,
+ .name = "STATUS_NIC1_ENG0" },
+ { .fc_id = 651, .cpu_id = 500, .valid = 1,
+ .name = "STATUS_NIC1_ENG1" },
+ { .fc_id = 652, .cpu_id = 501, .valid = 1,
+ .name = "STATUS_NIC2_ENG0" },
+ { .fc_id = 653, .cpu_id = 502, .valid = 1,
+ .name = "STATUS_NIC2_ENG1" },
+ { .fc_id = 654, .cpu_id = 503, .valid = 1,
+ .name = "STATUS_NIC3_ENG0" },
+ { .fc_id = 655, .cpu_id = 504, .valid = 1,
+ .name = "STATUS_NIC3_ENG1" },
+ { .fc_id = 656, .cpu_id = 505, .valid = 1,
+ .name = "STATUS_NIC4_ENG0" },
+ { .fc_id = 657, .cpu_id = 506, .valid = 1,
+ .name = "STATUS_NIC4_ENG1" },
{ .fc_id = 658, .cpu_id = 507, .valid = 1, .name = "FIX_POWER_ENV_S" },
{ .fc_id = 659, .cpu_id = 508, .valid = 1, .name = "FIX_POWER_ENV_E" },
{ .fc_id = 660, .cpu_id = 509, .valid = 1,
diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_fw_if.h b/drivers/misc/habanalabs/include/gaudi/gaudi_fw_if.h
index 25acd9e87e20..a9f51f9f9e92 100644
--- a/drivers/misc/habanalabs/include/gaudi/gaudi_fw_if.h
+++ b/drivers/misc/habanalabs/include/gaudi/gaudi_fw_if.h
@@ -20,20 +20,6 @@
#define UBOOT_FW_OFFSET 0x100000 /* 1MB in SRAM */
#define LINUX_FW_OFFSET 0x800000 /* 8MB in HBM */
-enum gaudi_pll_index {
- CPU_PLL = 0,
- PCI_PLL,
- SRAM_PLL,
- HBM_PLL,
- NIC_PLL,
- DMA_PLL,
- MESH_PLL,
- MME_PLL,
- TPC_PLL,
- IF_PLL,
- PLL_MAX
-};
-
enum gaudi_nic_axi_error {
RXB,
RXE,
diff --git a/drivers/misc/habanalabs/include/goya/goya.h b/drivers/misc/habanalabs/include/goya/goya.h
index 43d241891e45..1b4ca435021d 100644
--- a/drivers/misc/habanalabs/include/goya/goya.h
+++ b/drivers/misc/habanalabs/include/goya/goya.h
@@ -30,7 +30,7 @@
#define QMAN_PQ_ENTRY_SIZE 16 /* Bytes */
-#define MAX_ASID 1024
+#define MAX_ASID 2
#define PROT_BITS_OFFS 0xF80
diff --git a/drivers/misc/habanalabs/include/goya/goya_async_events.h b/drivers/misc/habanalabs/include/goya/goya_async_events.h
index 5fb92362fc5f..09081401cb1d 100644
--- a/drivers/misc/habanalabs/include/goya/goya_async_events.h
+++ b/drivers/misc/habanalabs/include/goya/goya_async_events.h
@@ -188,6 +188,7 @@ enum goya_async_event_id {
GOYA_ASYNC_EVENT_ID_HALT_MACHINE = 485,
GOYA_ASYNC_EVENT_ID_INTS_REGISTER = 486,
GOYA_ASYNC_EVENT_ID_SOFT_RESET = 487,
+ GOYA_ASYNC_EVENT_PKT_QUEUE_OUT_SYNC = 506,
GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S = 507,
GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E = 508,
GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S = 509,
diff --git a/drivers/misc/habanalabs/include/goya/goya_fw_if.h b/drivers/misc/habanalabs/include/goya/goya_fw_if.h
index daf8d8cd14be..bc05f86c73ac 100644
--- a/drivers/misc/habanalabs/include/goya/goya_fw_if.h
+++ b/drivers/misc/habanalabs/include/goya/goya_fw_if.h
@@ -15,17 +15,6 @@
#define UBOOT_FW_OFFSET 0x100000 /* 1MB in SRAM */
#define LINUX_FW_OFFSET 0x800000 /* 8MB in DDR */
-enum goya_pll_index {
- CPU_PLL = 0,
- IC_PLL,
- MC_PLL,
- MME_PLL,
- PCI_PLL,
- EMMC_PLL,
- TPC_PLL,
- PLL_MAX
-};
-
#define GOYA_PLL_FREQ_LOW 50000000 /* 50 MHz */
#endif /* GOYA_FW_IF_H */