diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-03-28 21:27:35 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-03-28 21:27:35 +0200 |
commit | 02e2af20f4f9f2aa0c84e9a30a35c02f0fbb7daa (patch) | |
tree | d126449a7d2ea2270627183f7cebd726fbe56a9d /drivers/misc/habanalabs | |
parent | Merge tag 'pinctrl-v5.18-1' of git://git.kernel.org/pub/scm/linux/kernel/git/... (diff) | |
parent | firmware: google: Properly state IOMEM dependency (diff) | |
download | linux-02e2af20f4f9f2aa0c84e9a30a35c02f0fbb7daa.tar.xz linux-02e2af20f4f9f2aa0c84e9a30a35c02f0fbb7daa.zip |
Merge tag 'char-misc-5.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc
Pull char/misc and other driver updates from Greg KH:
"Here is the big set of char/misc and other small driver subsystem
updates for 5.18-rc1.
Included in here are merges from driver subsystems which contain:
- iio driver updates and new drivers
- fsi driver updates
- fpga driver updates
- habanalabs driver updates and support for new hardware
- soundwire driver updates and new drivers
- phy driver updates and new drivers
- coresight driver updates
- icc driver updates
Individual changes include:
- mei driver updates
- interconnect driver updates
- new PECI driver subsystem added
- vmci driver updates
- lots of tiny misc/char driver updates
All of these have been in linux-next for a while with no reported
problems"
* tag 'char-misc-5.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc: (556 commits)
firmware: google: Properly state IOMEM dependency
kgdbts: fix return value of __setup handler
firmware: sysfb: fix platform-device leak in error path
firmware: stratix10-svc: add missing callback parameter on RSU
arm64: dts: qcom: add non-secure domain property to fastrpc nodes
misc: fastrpc: Add dma handle implementation
misc: fastrpc: Add fdlist implementation
misc: fastrpc: Add helper function to get list and page
misc: fastrpc: Add support to secure memory map
dt-bindings: misc: add fastrpc domain vmid property
misc: fastrpc: check before loading process to the DSP
misc: fastrpc: add secure domain support
dt-bindings: misc: add property to support non-secure DSP
misc: fastrpc: Add support to get DSP capabilities
misc: fastrpc: add support for FASTRPC_IOCTL_MEM_MAP/UNMAP
misc: fastrpc: separate fastrpc device from channel context
dt-bindings: nvmem: brcm,nvram: add basic NVMEM cells
dt-bindings: nvmem: make "reg" property optional
nvmem: brcm_nvram: parse NVRAM content into NVMEM cells
nvmem: dt-bindings: Fix the error of dt-bindings check
...
Diffstat (limited to 'drivers/misc/habanalabs')
24 files changed, 1393 insertions, 844 deletions
diff --git a/drivers/misc/habanalabs/common/Makefile b/drivers/misc/habanalabs/common/Makefile index 82c3824cad00..6ebe3c7001ff 100644 --- a/drivers/misc/habanalabs/common/Makefile +++ b/drivers/misc/habanalabs/common/Makefile @@ -11,4 +11,4 @@ HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \ common/command_buffer.o common/hw_queue.o common/irq.o \ common/sysfs.o common/hwmon.o common/memory.o \ common/command_submission.o common/firmware_if.o \ - common/state_dump.o common/hwmgr.o + common/state_dump.o diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c index 3c0ae07a2d80..a507110f6443 100644 --- a/drivers/misc/habanalabs/common/command_buffer.c +++ b/drivers/misc/habanalabs/common/command_buffer.c @@ -424,8 +424,8 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data) { union hl_cb_args *args = data; struct hl_device *hdev = hpriv->hdev; + u64 handle = 0, device_va = 0; enum hl_device_status status; - u64 handle = 0, device_va; u32 usage_cnt = 0; int rc; @@ -464,6 +464,8 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data) args->in.flags, &usage_cnt, &device_va); + if (rc) + break; memset(&args->out, 0, sizeof(args->out)); diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index 0a4ef13d9ac4..d93ef9f1c45c 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -14,6 +14,8 @@ #define HL_CS_FLAGS_TYPE_MASK (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \ HL_CS_FLAGS_COLLECTIVE_WAIT) +#define MAX_TS_ITER_NUM 10 + /** * enum hl_cs_wait_status - cs wait status * @CS_WAIT_STATUS_BUSY: cs was not completed yet @@ -919,18 +921,21 @@ static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs) complete_job(hdev, job); } -void hl_cs_rollback_all(struct hl_device *hdev) +void hl_cs_rollback_all(struct hl_device *hdev, bool skip_wq_flush) { int i; struct hl_cs *cs, *tmp; - flush_workqueue(hdev->sob_reset_wq); + if (!skip_wq_flush) { + flush_workqueue(hdev->ts_free_obj_wq); - /* flush all completions before iterating over the CS mirror list in - * order to avoid a race with the release functions - */ - for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) - flush_workqueue(hdev->cq_wq[i]); + /* flush all completions before iterating over the CS mirror list in + * order to avoid a race with the release functions + */ + for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) + flush_workqueue(hdev->cq_wq[i]); + + } /* Make sure we don't have leftovers in the CS mirror list */ list_for_each_entry_safe(cs, tmp, &hdev->cs_mirror_list, mirror_node) { @@ -948,13 +953,19 @@ void hl_cs_rollback_all(struct hl_device *hdev) static void wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt) { - struct hl_user_pending_interrupt *pend; + struct hl_user_pending_interrupt *pend, *temp; unsigned long flags; spin_lock_irqsave(&interrupt->wait_list_lock, flags); - list_for_each_entry(pend, &interrupt->wait_list_head, wait_list_node) { - pend->fence.error = -EIO; - complete_all(&pend->fence.completion); + list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, wait_list_node) { + if (pend->ts_reg_info.ts_buff) { + list_del(&pend->wait_list_node); + hl_ts_put(pend->ts_reg_info.ts_buff); + hl_cb_put(pend->ts_reg_info.cq_cb); + } else { + pend->fence.error = -EIO; + complete_all(&pend->fence.completion); + } } spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); } @@ -2063,13 +2074,16 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, idp = &ctx->sig_mgr.handles; idr_for_each_entry(idp, encaps_sig_hdl, id) { if (encaps_sig_hdl->cs_seq == signal_seq) { - handle_found = true; - /* get refcount to protect removing - * this handle from idr, needed when - * multiple wait cs are used with offset + /* get refcount to protect removing this handle from idr, + * needed when multiple wait cs are used with offset * to wait on reserved encaps signals. + * Since kref_put of this handle is executed outside the + * current lock, it is possible that the handle refcount + * is 0 but it yet to be removed from the list. In this + * case need to consider the handle as not valid. */ - kref_get(&encaps_sig_hdl->refcount); + if (kref_get_unless_zero(&encaps_sig_hdl->refcount)) + handle_found = true; break; } } @@ -2739,7 +2753,7 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) mcs_data.update_ts = false; rc = hl_cs_poll_fences(&mcs_data, mcs_compl); - if (mcs_data.completion_bitmap) + if (rc || mcs_data.completion_bitmap) break; /* @@ -2854,64 +2868,174 @@ static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) return 0; } +static int ts_buff_get_kernel_ts_record(struct hl_ts_buff *ts_buff, + struct hl_cb *cq_cb, + u64 ts_offset, u64 cq_offset, u64 target_value, + spinlock_t *wait_list_lock, + struct hl_user_pending_interrupt **pend) +{ + struct hl_user_pending_interrupt *requested_offset_record = + (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address + + ts_offset; + struct hl_user_pending_interrupt *cb_last = + (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address + + (ts_buff->kernel_buff_size / sizeof(struct hl_user_pending_interrupt)); + unsigned long flags, iter_counter = 0; + u64 current_cq_counter; + + /* Validate ts_offset not exceeding last max */ + if (requested_offset_record > cb_last) { + dev_err(ts_buff->hdev->dev, "Ts offset exceeds max CB offset(0x%llx)\n", + (u64)(uintptr_t)cb_last); + return -EINVAL; + } + +start_over: + spin_lock_irqsave(wait_list_lock, flags); + + /* Unregister only if we didn't reach the target value + * since in this case there will be no handling in irq context + * and then it's safe to delete the node out of the interrupt list + * then re-use it on other interrupt + */ + if (requested_offset_record->ts_reg_info.in_use) { + current_cq_counter = *requested_offset_record->cq_kernel_addr; + if (current_cq_counter < requested_offset_record->cq_target_value) { + list_del(&requested_offset_record->wait_list_node); + spin_unlock_irqrestore(wait_list_lock, flags); + + hl_ts_put(requested_offset_record->ts_reg_info.ts_buff); + hl_cb_put(requested_offset_record->ts_reg_info.cq_cb); + + dev_dbg(ts_buff->hdev->dev, "ts node removed from interrupt list now can re-use\n"); + } else { + dev_dbg(ts_buff->hdev->dev, "ts node in middle of irq handling\n"); + + /* irq handling in the middle give it time to finish */ + spin_unlock_irqrestore(wait_list_lock, flags); + usleep_range(1, 10); + if (++iter_counter == MAX_TS_ITER_NUM) { + dev_err(ts_buff->hdev->dev, "handling registration interrupt took too long!!\n"); + return -EINVAL; + } + + goto start_over; + } + } else { + spin_unlock_irqrestore(wait_list_lock, flags); + } + + /* Fill up the new registration node info */ + requested_offset_record->ts_reg_info.in_use = 1; + requested_offset_record->ts_reg_info.ts_buff = ts_buff; + requested_offset_record->ts_reg_info.cq_cb = cq_cb; + requested_offset_record->ts_reg_info.timestamp_kernel_addr = + (u64 *) ts_buff->user_buff_address + ts_offset; + requested_offset_record->cq_kernel_addr = + (u64 *) cq_cb->kernel_address + cq_offset; + requested_offset_record->cq_target_value = target_value; + + *pend = requested_offset_record; + + dev_dbg(ts_buff->hdev->dev, "Found available node in TS kernel CB(0x%llx)\n", + (u64)(uintptr_t)requested_offset_record); + return 0; +} + static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, - struct hl_cb_mgr *cb_mgr, u64 timeout_us, - u64 cq_counters_handle, u64 cq_counters_offset, + struct hl_cb_mgr *cb_mgr, struct hl_ts_mgr *ts_mgr, + u64 timeout_us, u64 cq_counters_handle, u64 cq_counters_offset, u64 target_value, struct hl_user_interrupt *interrupt, - u32 *status, - u64 *timestamp) + bool register_ts_record, u64 ts_handle, u64 ts_offset, + u32 *status, u64 *timestamp) { + u32 cq_patched_handle, ts_patched_handle; struct hl_user_pending_interrupt *pend; + struct hl_ts_buff *ts_buff; + struct hl_cb *cq_cb; unsigned long timeout, flags; long completion_rc; - struct hl_cb *cb; int rc = 0; - u32 handle; timeout = hl_usecs64_to_jiffies(timeout_us); hl_ctx_get(hdev, ctx); - cq_counters_handle >>= PAGE_SHIFT; - handle = (u32) cq_counters_handle; - - cb = hl_cb_get(hdev, cb_mgr, handle); - if (!cb) { - hl_ctx_put(ctx); - return -EINVAL; + cq_patched_handle = lower_32_bits(cq_counters_handle >> PAGE_SHIFT); + cq_cb = hl_cb_get(hdev, cb_mgr, cq_patched_handle); + if (!cq_cb) { + rc = -EINVAL; + goto put_ctx; } - pend = kzalloc(sizeof(*pend), GFP_KERNEL); - if (!pend) { - hl_cb_put(cb); - hl_ctx_put(ctx); - return -ENOMEM; - } + if (register_ts_record) { + dev_dbg(hdev->dev, "Timestamp registration: interrupt id: %u, ts offset: %llu, cq_offset: %llu\n", + interrupt->interrupt_id, ts_offset, cq_counters_offset); - hl_fence_init(&pend->fence, ULONG_MAX); + ts_patched_handle = lower_32_bits(ts_handle >> PAGE_SHIFT); + ts_buff = hl_ts_get(hdev, ts_mgr, ts_patched_handle); + if (!ts_buff) { + rc = -EINVAL; + goto put_cq_cb; + } - pend->cq_kernel_addr = (u64 *) cb->kernel_address + cq_counters_offset; - pend->cq_target_value = target_value; + /* Find first available record */ + rc = ts_buff_get_kernel_ts_record(ts_buff, cq_cb, ts_offset, + cq_counters_offset, target_value, + &interrupt->wait_list_lock, &pend); + if (rc) + goto put_ts_buff; + } else { + pend = kzalloc(sizeof(*pend), GFP_KERNEL); + if (!pend) { + rc = -ENOMEM; + goto put_cq_cb; + } + hl_fence_init(&pend->fence, ULONG_MAX); + pend->cq_kernel_addr = (u64 *) cq_cb->kernel_address + cq_counters_offset; + pend->cq_target_value = target_value; + } + + spin_lock_irqsave(&interrupt->wait_list_lock, flags); /* We check for completion value as interrupt could have been received * before we added the node to the wait list */ if (*pend->cq_kernel_addr >= target_value) { + if (register_ts_record) + pend->ts_reg_info.in_use = 0; + spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); + *status = HL_WAIT_CS_STATUS_COMPLETED; - /* There was no interrupt, we assume the completion is now. */ - pend->fence.timestamp = ktime_get(); - } - if (!timeout_us || (*status == HL_WAIT_CS_STATUS_COMPLETED)) + if (register_ts_record) { + *pend->ts_reg_info.timestamp_kernel_addr = ktime_get_ns(); + goto put_ts_buff; + } else { + pend->fence.timestamp = ktime_get(); + goto set_timestamp; + } + } else if (!timeout_us) { + spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); + *status = HL_WAIT_CS_STATUS_BUSY; + pend->fence.timestamp = ktime_get(); goto set_timestamp; + } /* Add pending user interrupt to relevant list for the interrupt - * handler to monitor + * handler to monitor. + * Note that we cannot have sorted list by target value, + * in order to shorten the list pass loop, since + * same list could have nodes for different cq counter handle. */ - spin_lock_irqsave(&interrupt->wait_list_lock, flags); list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head); spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); + if (register_ts_record) { + rc = *status = HL_WAIT_CS_STATUS_COMPLETED; + goto ts_registration_exit; + } + /* Wait for interrupt handler to signal completion */ completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion, timeout); @@ -2932,23 +3056,41 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, rc = -EIO; *status = HL_WAIT_CS_STATUS_ABORTED; } else { - dev_err_ratelimited(hdev->dev, "Waiting for interrupt ID %d timedout\n", - interrupt->interrupt_id); - rc = -ETIMEDOUT; + /* The wait has timed-out. We don't know anything beyond that + * because the workload wasn't submitted through the driver. + * Therefore, from driver's perspective, the workload is still + * executing. + */ + rc = 0; + *status = HL_WAIT_CS_STATUS_BUSY; } - *status = HL_WAIT_CS_STATUS_BUSY; } } + /* + * We keep removing the node from list here, and not at the irq handler + * for completion timeout case. and if it's a registration + * for ts record, the node will be deleted in the irq handler after + * we reach the target value. + */ spin_lock_irqsave(&interrupt->wait_list_lock, flags); list_del(&pend->wait_list_node); spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); set_timestamp: *timestamp = ktime_to_ns(pend->fence.timestamp); - kfree(pend); - hl_cb_put(cb); + hl_cb_put(cq_cb); +ts_registration_exit: + hl_ctx_put(ctx); + + return rc; + +put_ts_buff: + hl_ts_put(ts_buff); +put_cq_cb: + hl_cb_put(cq_cb); +put_ctx: hl_ctx_put(ctx); return rc; @@ -3049,6 +3191,12 @@ wait_again: interrupt->interrupt_id); rc = -EINTR; } else { + /* The wait has timed-out. We don't know anything beyond that + * because the workload wasn't submitted through the driver. + * Therefore, from driver's perspective, the workload is still + * executing. + */ + rc = 0; *status = HL_WAIT_CS_STATUS_BUSY; } @@ -3101,23 +3249,20 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data) interrupt = &hdev->user_interrupt[interrupt_id - first_interrupt]; if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ) - rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->cb_mgr, + rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->cb_mgr, &hpriv->ts_mem_mgr, args->in.interrupt_timeout_us, args->in.cq_counters_handle, args->in.cq_counters_offset, - args->in.target, interrupt, &status, - ×tamp); + args->in.target, interrupt, + !!(args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT), + args->in.timestamp_handle, args->in.timestamp_offset, + &status, ×tamp); else rc = _hl_interrupt_wait_ioctl_user_addr(hdev, hpriv->ctx, args->in.interrupt_timeout_us, args->in.addr, args->in.target, interrupt, &status, ×tamp); - if (rc) { - if (rc != -EINTR) - dev_err_ratelimited(hdev->dev, - "interrupt_wait_ioctl failed (%d)\n", rc); - + if (rc) return rc; - } memset(args, 0, sizeof(*args)); args->out.status = status; diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c index fc084ee5106e..f18495545854 100644 --- a/drivers/misc/habanalabs/common/debugfs.c +++ b/drivers/misc/habanalabs/common/debugfs.c @@ -890,6 +890,8 @@ static ssize_t hl_set_power_state(struct file *f, const char __user *buf, pci_set_power_state(hdev->pdev, PCI_D0); pci_restore_state(hdev->pdev); rc = pci_enable_device(hdev->pdev); + if (rc < 0) + return rc; } else if (value == 2) { pci_save_state(hdev->pdev); pci_disable_device(hdev->pdev); @@ -1054,42 +1056,12 @@ static ssize_t hl_device_write(struct file *f, const char __user *buf, static ssize_t hl_clk_gate_read(struct file *f, char __user *buf, size_t count, loff_t *ppos) { - struct hl_dbg_device_entry *entry = file_inode(f)->i_private; - struct hl_device *hdev = entry->hdev; - char tmp_buf[200]; - ssize_t rc; - - if (*ppos) - return 0; - - sprintf(tmp_buf, "0x%llx\n", hdev->clock_gating_mask); - rc = simple_read_from_buffer(buf, count, ppos, tmp_buf, - strlen(tmp_buf) + 1); - - return rc; + return 0; } static ssize_t hl_clk_gate_write(struct file *f, const char __user *buf, size_t count, loff_t *ppos) { - struct hl_dbg_device_entry *entry = file_inode(f)->i_private; - struct hl_device *hdev = entry->hdev; - u64 value; - ssize_t rc; - - if (hdev->reset_info.in_reset) { - dev_warn_ratelimited(hdev->dev, - "Can't change clock gating during reset\n"); - return 0; - } - - rc = kstrtoull_from_user(buf, count, 16, &value); - if (rc) - return rc; - - hdev->clock_gating_mask = value; - hdev->asic_funcs->set_clock_gating(hdev); - return count; } @@ -1101,6 +1073,9 @@ static ssize_t hl_stop_on_err_read(struct file *f, char __user *buf, char tmp_buf[200]; ssize_t rc; + if (!hdev->asic_prop.configurable_stop_on_err) + return -EOPNOTSUPP; + if (*ppos) return 0; @@ -1119,6 +1094,9 @@ static ssize_t hl_stop_on_err_write(struct file *f, const char __user *buf, u32 value; ssize_t rc; + if (!hdev->asic_prop.configurable_stop_on_err) + return -EOPNOTSUPP; + if (hdev->reset_info.in_reset) { dev_warn_ratelimited(hdev->dev, "Can't change stop on error during reset\n"); diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 733338ab6f1d..dc9341a64541 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright 2016-2021 HabanaLabs, Ltd. + * Copyright 2016-2022 HabanaLabs, Ltd. * All Rights Reserved. */ @@ -13,6 +13,8 @@ #include <linux/pci.h> #include <linux/hwmon.h> +#define HL_RESET_DELAY_USEC 10000 /* 10ms */ + enum hl_device_status hl_device_status(struct hl_device *hdev) { enum hl_device_status status; @@ -145,6 +147,7 @@ static int hl_device_release(struct inode *inode, struct file *filp) hl_release_pending_user_interrupts(hpriv->hdev); hl_cb_mgr_fini(hdev, &hpriv->cb_mgr); + hl_ts_mgr_fini(hpriv->hdev, &hpriv->ts_mem_mgr); hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr); if (!hl_hpriv_put(hpriv)) @@ -209,6 +212,9 @@ static int hl_mmap(struct file *filp, struct vm_area_struct *vma) case HL_MMAP_TYPE_BLOCK: return hl_hw_block_mmap(hpriv, vma); + + case HL_MMAP_TYPE_TS_BUFF: + return hl_ts_mmap(hpriv, vma); } return -EINVAL; @@ -410,10 +416,10 @@ static int device_early_init(struct hl_device *hdev) goto free_cq_wq; } - hdev->sob_reset_wq = alloc_workqueue("hl-sob-reset", WQ_UNBOUND, 0); - if (!hdev->sob_reset_wq) { + hdev->ts_free_obj_wq = alloc_workqueue("hl-ts-free-obj", WQ_UNBOUND, 0); + if (!hdev->ts_free_obj_wq) { dev_err(hdev->dev, - "Failed to allocate SOB reset workqueue\n"); + "Failed to allocate Timestamp registration free workqueue\n"); rc = -ENOMEM; goto free_eq_wq; } @@ -422,7 +428,7 @@ static int device_early_init(struct hl_device *hdev) GFP_KERNEL); if (!hdev->hl_chip_info) { rc = -ENOMEM; - goto free_sob_reset_wq; + goto free_ts_free_wq; } rc = hl_mmu_if_set_funcs(hdev); @@ -461,8 +467,8 @@ free_cb_mgr: hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr); free_chip_info: kfree(hdev->hl_chip_info); -free_sob_reset_wq: - destroy_workqueue(hdev->sob_reset_wq); +free_ts_free_wq: + destroy_workqueue(hdev->ts_free_obj_wq); free_eq_wq: destroy_workqueue(hdev->eq_wq); free_cq_wq: @@ -501,7 +507,7 @@ static void device_early_fini(struct hl_device *hdev) kfree(hdev->hl_chip_info); - destroy_workqueue(hdev->sob_reset_wq); + destroy_workqueue(hdev->ts_free_obj_wq); destroy_workqueue(hdev->eq_wq); destroy_workqueue(hdev->device_reset_work.wq); @@ -610,7 +616,7 @@ int hl_device_utilization(struct hl_device *hdev, u32 *utilization) u64 max_power, curr_power, dc_power, dividend; int rc; - max_power = hdev->asic_prop.max_power_default; + max_power = hdev->max_power; dc_power = hdev->asic_prop.dc_power_default; rc = hl_fw_cpucp_power_get(hdev, &curr_power); @@ -644,9 +650,6 @@ int hl_device_set_debug_mode(struct hl_device *hdev, struct hl_ctx *ctx, bool en hdev->in_debug = 0; - if (!hdev->reset_info.hard_reset_pending) - hdev->asic_funcs->set_clock_gating(hdev); - goto out; } @@ -657,7 +660,6 @@ int hl_device_set_debug_mode(struct hl_device *hdev, struct hl_ctx *ctx, bool en goto out; } - hdev->asic_funcs->disable_clock_gating(hdev); hdev->in_debug = 1; out: @@ -685,7 +687,8 @@ static void take_release_locks(struct hl_device *hdev) mutex_unlock(&hdev->fpriv_ctrl_list_lock); } -static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_reset) +static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_reset, + bool skip_wq_flush) { if (hard_reset) device_late_fini(hdev); @@ -698,7 +701,7 @@ static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_r hdev->asic_funcs->halt_engines(hdev, hard_reset, fw_reset); /* Go over all the queues, release all CS and their jobs */ - hl_cs_rollback_all(hdev); + hl_cs_rollback_all(hdev, skip_wq_flush); /* Release all pending user interrupts, each pending user interrupt * holds a reference to user context @@ -978,7 +981,8 @@ static void handle_reset_trigger(struct hl_device *hdev, u32 flags) int hl_device_reset(struct hl_device *hdev, u32 flags) { bool hard_reset, from_hard_reset_thread, fw_reset, hard_instead_soft = false, - reset_upon_device_release = false, schedule_hard_reset = false; + reset_upon_device_release = false, schedule_hard_reset = false, + skip_wq_flush, delay_reset; u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0}; struct hl_ctx *ctx; int i, rc; @@ -991,6 +995,8 @@ int hl_device_reset(struct hl_device *hdev, u32 flags) hard_reset = !!(flags & HL_DRV_RESET_HARD); from_hard_reset_thread = !!(flags & HL_DRV_RESET_FROM_RESET_THR); fw_reset = !!(flags & HL_DRV_RESET_BYPASS_REQ_TO_FW); + skip_wq_flush = !!(flags & HL_DRV_RESET_DEV_RELEASE); + delay_reset = !!(flags & HL_DRV_RESET_DELAY); if (!hard_reset && !hdev->asic_prop.supports_soft_reset) { hard_instead_soft = true; @@ -1040,6 +1046,9 @@ do_reset: hdev->reset_info.in_reset = 1; spin_unlock(&hdev->reset_info.lock); + if (delay_reset) + usleep_range(HL_RESET_DELAY_USEC, HL_RESET_DELAY_USEC << 1); + handle_reset_trigger(hdev, flags); /* This still allows the completion of some KDMA ops */ @@ -1076,7 +1085,7 @@ again: return 0; } - cleanup_resources(hdev, hard_reset, fw_reset); + cleanup_resources(hdev, hard_reset, fw_reset, skip_wq_flush); kill_processes: if (hard_reset) { @@ -1232,7 +1241,7 @@ kill_processes: goto out_err; } - hl_set_max_power(hdev); + hl_fw_set_max_power(hdev); } else { rc = hdev->asic_funcs->non_hard_reset_late_init(hdev); if (rc) { @@ -1297,11 +1306,14 @@ out_err: hdev->reset_info.hard_reset_cnt++; } else if (reset_upon_device_release) { dev_err(hdev->dev, "Failed to reset device after user release\n"); + flags |= HL_DRV_RESET_HARD; + flags &= ~HL_DRV_RESET_DEV_RELEASE; hard_reset = true; goto again; } else { dev_err(hdev->dev, "Failed to do soft-reset\n"); hdev->reset_info.soft_reset_cnt++; + flags |= HL_DRV_RESET_HARD; hard_reset = true; goto again; } @@ -1538,7 +1550,8 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) /* Need to call this again because the max power might change, * depending on card type for certain ASICs */ - hl_set_max_power(hdev); + if (hdev->asic_prop.set_max_power_on_device_init) + hl_fw_set_max_power(hdev); /* * hl_hwmon_init() must be called after device_late_init(), because only @@ -1682,7 +1695,7 @@ void hl_device_fini(struct hl_device *hdev) hl_hwmon_fini(hdev); - cleanup_resources(hdev, true, false); + cleanup_resources(hdev, true, false, false); /* Kill processes here after CS rollback. This is because the process * can't really exit until all its CSs are done, which is what we diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 6775c5c3166b..3262126cc7ca 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright 2016-2021 HabanaLabs, Ltd. + * Copyright 2016-2022 HabanaLabs, Ltd. * All Rights Reserved. */ @@ -214,7 +214,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, dma_addr_t pkt_dma_addr; struct hl_bd *sent_bd; u32 tmp, expected_ack_val, pi; - int rc = 0; + int rc; pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len, &pkt_dma_addr); @@ -228,8 +228,11 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, mutex_lock(&hdev->send_cpu_message_lock); - if (hdev->disabled) + /* CPU-CP messages can be sent during soft-reset */ + if (hdev->disabled && !hdev->reset_info.is_in_soft_reset) { + rc = 0; goto out; + } if (hdev->device_cpu_disabled) { rc = -EIO; @@ -958,15 +961,17 @@ int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u32 pll_index, rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), HL_CPUCP_INFO_TIMEOUT_USEC, &result); - if (rc) + if (rc) { dev_err(hdev->dev, "Failed to read PLL info, error %d\n", rc); + return rc; + } pll_freq_arr[0] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT0_MASK, result); pll_freq_arr[1] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT1_MASK, result); pll_freq_arr[2] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT2_MASK, result); pll_freq_arr[3] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT3_MASK, result); - return rc; + return 0; } int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power) @@ -1202,8 +1207,6 @@ static int hl_fw_read_preboot_caps(struct hl_device *hdev, hdev, cpu_boot_status_reg, status, - (status == CPU_BOOT_STATUS_IN_UBOOT) || - (status == CPU_BOOT_STATUS_DRAM_RDY) || (status == CPU_BOOT_STATUS_NIC_FW_RDY) || (status == CPU_BOOT_STATUS_READY_TO_BOOT) || (status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT), @@ -2682,3 +2685,138 @@ int hl_fw_init_cpu(struct hl_device *hdev) hl_fw_dynamic_init_cpu(hdev, fw_loader) : hl_fw_static_init_cpu(hdev, fw_loader); } + +void hl_fw_set_pll_profile(struct hl_device *hdev) +{ + hl_fw_set_frequency(hdev, hdev->asic_prop.clk_pll_index, + hdev->asic_prop.max_freq_value); +} + +int hl_fw_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk) +{ + long value; + + if (!hl_device_operational(hdev, NULL)) + return -ENODEV; + + if (!hdev->pdev) { + *cur_clk = 0; + *max_clk = 0; + return 0; + } + + value = hl_fw_get_frequency(hdev, hdev->asic_prop.clk_pll_index, false); + + if (value < 0) { + dev_err(hdev->dev, "Failed to retrieve device max clock %ld\n", value); + return value; + } + + *max_clk = (value / 1000 / 1000); + + value = hl_fw_get_frequency(hdev, hdev->asic_prop.clk_pll_index, true); + + if (value < 0) { + dev_err(hdev->dev, "Failed to retrieve device current clock %ld\n", value); + return value; + } + + *cur_clk = (value / 1000 / 1000); + + return 0; +} + +long hl_fw_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) +{ + struct cpucp_packet pkt; + u32 used_pll_idx; + u64 result; + int rc; + + rc = get_used_pll_index(hdev, pll_index, &used_pll_idx); + if (rc) + return rc; + + memset(&pkt, 0, sizeof(pkt)); + + if (curr) + pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_CURR_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); + else + pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_GET << CPUCP_PKT_CTL_OPCODE_SHIFT); + + pkt.pll_index = cpu_to_le32((u32)used_pll_idx); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, &result); + + if (rc) { + dev_err(hdev->dev, "Failed to get frequency of PLL %d, error %d\n", + used_pll_idx, rc); + return rc; + } + + return (long) result; +} + +void hl_fw_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq) +{ + struct cpucp_packet pkt; + u32 used_pll_idx; + int rc; + + rc = get_used_pll_index(hdev, pll_index, &used_pll_idx); + if (rc) + return; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_SET << CPUCP_PKT_CTL_OPCODE_SHIFT); + pkt.pll_index = cpu_to_le32((u32)used_pll_idx); + pkt.value = cpu_to_le64(freq); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL); + + if (rc) + dev_err(hdev->dev, "Failed to set frequency to PLL %d, error %d\n", + used_pll_idx, rc); +} + +long hl_fw_get_max_power(struct hl_device *hdev) +{ + struct cpucp_packet pkt; + u64 result; + int rc; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(CPUCP_PACKET_MAX_POWER_GET << CPUCP_PKT_CTL_OPCODE_SHIFT); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, &result); + + if (rc) { + dev_err(hdev->dev, "Failed to get max power, error %d\n", rc); + return rc; + } + + return result; +} + +void hl_fw_set_max_power(struct hl_device *hdev) +{ + struct cpucp_packet pkt; + int rc; + + /* TODO: remove this after simulator supports this packet */ + if (!hdev->pdev) + return; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(CPUCP_PACKET_MAX_POWER_SET << CPUCP_PKT_CTL_OPCODE_SHIFT); + pkt.value = cpu_to_le64(hdev->max_power); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL); + + if (rc) + dev_err(hdev->dev, "Failed to set max power, error %d\n", rc); +} diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index cb710fd478b6..1edaf6ab67bd 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 * - * Copyright 2016-2021 HabanaLabs, Ltd. + * Copyright 2016-2022 HabanaLabs, Ltd. * All Rights Reserved. * */ @@ -31,14 +31,15 @@ #define HL_NAME "habanalabs" /* Use upper bits of mmap offset to store habana driver specific information. - * bits[63:61] - Encode mmap type + * bits[63:59] - Encode mmap type * bits[45:0] - mmap offset value * * NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these * defines are w.r.t to PAGE_SIZE */ -#define HL_MMAP_TYPE_SHIFT (61 - PAGE_SHIFT) -#define HL_MMAP_TYPE_MASK (0x7ull << HL_MMAP_TYPE_SHIFT) +#define HL_MMAP_TYPE_SHIFT (59 - PAGE_SHIFT) +#define HL_MMAP_TYPE_MASK (0x1full << HL_MMAP_TYPE_SHIFT) +#define HL_MMAP_TYPE_TS_BUFF (0x10ull << HL_MMAP_TYPE_SHIFT) #define HL_MMAP_TYPE_BLOCK (0x4ull << HL_MMAP_TYPE_SHIFT) #define HL_MMAP_TYPE_CB (0x2ull << HL_MMAP_TYPE_SHIFT) @@ -141,6 +142,9 @@ enum hl_mmu_page_table_location { * * - HL_DRV_RESET_FW_FATAL_ERR * Set if reset is due to a fatal error from FW + * + * - HL_DRV_RESET_DELAY + * Set if a delay should be added before the reset */ #define HL_DRV_RESET_HARD (1 << 0) @@ -150,6 +154,7 @@ enum hl_mmu_page_table_location { #define HL_DRV_RESET_DEV_RELEASE (1 << 4) #define HL_DRV_RESET_BYPASS_REQ_TO_FW (1 << 5) #define HL_DRV_RESET_FW_FATAL_ERR (1 << 6) +#define HL_DRV_RESET_DELAY (1 << 7) #define HL_MAX_SOBS_PER_MONITOR 8 @@ -402,8 +407,11 @@ enum hl_device_hw_state { * @hop4_mask: mask to get the PTE address in hop 4. * @hop5_mask: mask to get the PTE address in hop 5. * @last_mask: mask to get the bit indicating this is the last hop. + * @pgt_size: size for page tables. * @page_size: default page size used to allocate memory. * @num_hops: The amount of hops supported by the translation table. + * @hop_table_size: HOP table size. + * @hop0_tables_total_size: total size for all HOP0 tables. * @host_resident: Should the MMU page table reside in host memory or in the * device DRAM. */ @@ -423,8 +431,11 @@ struct hl_mmu_properties { u64 hop4_mask; u64 hop5_mask; u64 last_mask; + u64 pgt_size; u32 page_size; u32 num_hops; + u32 hop_table_size; + u32 hop0_tables_total_size; u8 host_resident; }; @@ -554,6 +565,9 @@ struct hl_hints_range { * use-case of doing soft-reset in training (due * to the fact that training runs on multiple * devices) + * @configurable_stop_on_err: is stop-on-error option configurable via debugfs. + * @set_max_power_on_device_init: true if need to set max power in F/W on device init. + * @supports_user_set_page_size: true if user can set the allocation page size. */ struct asic_fixed_properties { struct hw_queue_properties *hw_queues_props; @@ -637,6 +651,9 @@ struct asic_fixed_properties { u8 use_get_power_for_reset_history; u8 supports_soft_reset; u8 allow_inference_soft_reset; + u8 configurable_stop_on_err; + u8 set_max_power_on_device_init; + u8 supports_user_set_page_size; }; /** @@ -704,6 +721,40 @@ struct hl_cb_mgr { }; /** + * struct hl_ts_mgr - describes the timestamp registration memory manager. + * @ts_lock: protects ts_handles. + * @ts_handles: an idr to hold all ts bufferes handles. + */ +struct hl_ts_mgr { + spinlock_t ts_lock; + struct idr ts_handles; +}; + +/** + * struct hl_ts_buff - describes a timestamp buffer. + * @refcount: reference counter for usage of the buffer. + * @hdev: pointer to device this buffer belongs to. + * @mmap: true if the buff is currently mapped to user. + * @kernel_buff_address: Holds the internal buffer's kernel virtual address. + * @user_buff_address: Holds the user buffer's kernel virtual address. + * @id: the buffer ID. + * @mmap_size: Holds the buffer size that was mmaped. + * @kernel_buff_size: Holds the internal kernel buffer size. + * @user_buff_size: Holds the user buffer size. + */ +struct hl_ts_buff { + struct kref refcount; + struct hl_device *hdev; + atomic_t mmap; + void *kernel_buff_address; + void *user_buff_address; + u32 id; + u32 mmap_size; + u32 kernel_buff_size; + u32 user_buff_size; +}; + +/** * struct hl_cb - describes a Command Buffer. * @refcount: reference counter for usage of the CB. * @hdev: pointer to device this CB belongs to. @@ -881,8 +932,53 @@ struct hl_user_interrupt { }; /** + * struct timestamp_reg_free_node - holds the timestamp registration free objects node + * @free_objects_node: node in the list free_obj_jobs + * @cq_cb: pointer to cq command buffer to be freed + * @ts_buff: pointer to timestamp buffer to be freed + */ +struct timestamp_reg_free_node { + struct list_head free_objects_node; + struct hl_cb *cq_cb; + struct hl_ts_buff *ts_buff; +}; + +/* struct timestamp_reg_work_obj - holds the timestamp registration free objects job + * the job will be to pass over the free_obj_jobs list and put refcount to objects + * in each node of the list + * @free_obj: workqueue object to free timestamp registration node objects + * @hdev: pointer to the device structure + * @free_obj_head: list of free jobs nodes (node type timestamp_reg_free_node) + */ +struct timestamp_reg_work_obj { + struct work_struct free_obj; + struct hl_device *hdev; + struct list_head *free_obj_head; +}; + +/* struct timestamp_reg_info - holds the timestamp registration related data. + * @ts_buff: pointer to the timestamp buffer which include both user/kernel buffers. + * relevant only when doing timestamps records registration. + * @cq_cb: pointer to CQ counter CB. + * @timestamp_kernel_addr: timestamp handle address, where to set timestamp + * relevant only when doing timestamps records + * registration. + * @in_use: indicates if the node already in use. relevant only when doing + * timestamps records registration, since in this case the driver + * will have it's own buffer which serve as a records pool instead of + * allocating records dynamically. + */ +struct timestamp_reg_info { + struct hl_ts_buff *ts_buff; + struct hl_cb *cq_cb; + u64 *timestamp_kernel_addr; + u8 in_use; +}; + +/** * struct hl_user_pending_interrupt - holds a context to a user thread * pending on an interrupt + * @ts_reg_info: holds the timestamps registration nodes info * @wait_list_node: node in the list of user threads pending on an interrupt * @fence: hl fence object for interrupt completion * @cq_target_value: CQ target value @@ -890,10 +986,11 @@ struct hl_user_interrupt { * handler for taget value comparison */ struct hl_user_pending_interrupt { - struct list_head wait_list_node; - struct hl_fence fence; - u64 cq_target_value; - u64 *cq_kernel_addr; + struct timestamp_reg_info ts_reg_info; + struct list_head wait_list_node; + struct hl_fence fence; + u64 cq_target_value; + u64 *cq_kernel_addr; }; /** @@ -1155,7 +1252,6 @@ struct fw_load_mgr { * internal memory via DMA engine. * @add_device_attr: add ASIC specific device attributes. * @handle_eqe: handle event queue entry (IRQ) from CPU-CP. - * @set_pll_profile: change PLL profile (manual/automatic). * @get_events_stat: retrieve event queue entries histogram. * @read_pte: read MMU page table entry from DRAM. * @write_pte: write MMU page table entry to DRAM. @@ -1164,9 +1260,6 @@ struct fw_load_mgr { * @mmu_invalidate_cache_range: flush specific MMU STLB cache lines with * ASID-VA-size mask. * @send_heartbeat: send is-alive packet to CPU-CP and verify response. - * @set_clock_gating: enable/disable clock gating per engine according to - * clock gating mask in hdev - * @disable_clock_gating: disable clock gating completely * @debug_coresight: perform certain actions on Coresight for debugging. * @is_device_idle: return true if device is idle, false otherwise. * @non_hard_reset_late_init: perform certain actions needed after a reset which is not hard-reset @@ -1187,7 +1280,6 @@ struct fw_load_mgr { * @halt_coresight: stop the ETF and ETR traces. * @ctx_init: context dependent initialization. * @ctx_fini: context dependent cleanup. - * @get_clk_rate: Retrieve the ASIC current and maximum clock rate in MHz * @get_queue_id_for_cq: Get the H/W queue id related to the given CQ index. * @load_firmware_to_device: load the firmware to the device's memory * @load_boot_fit_to_device: load boot fit to device's memory @@ -1225,6 +1317,8 @@ struct fw_load_mgr { * @get_sob_addr: get SOB base address offset. * @set_pci_memory_regions: setting properties of PCI memory regions * @get_stream_master_qid_arr: get pointer to stream masters QID array + * @is_valid_dram_page_size: return true if page size is supported in device + * memory allocation, otherwise false. */ struct hl_asic_funcs { int (*early_init)(struct hl_device *hdev); @@ -1285,12 +1379,10 @@ struct hl_asic_funcs { bool user_address, u64 val); int (*debugfs_read_dma)(struct hl_device *hdev, u64 addr, u32 size, void *blob_addr); - void (*add_device_attr)(struct hl_device *hdev, - struct attribute_group *dev_attr_grp); + void (*add_device_attr)(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp, + struct attribute_group *dev_vrm_attr_grp); void (*handle_eqe)(struct hl_device *hdev, struct hl_eq_entry *eq_entry); - void (*set_pll_profile)(struct hl_device *hdev, - enum hl_pll_frequency freq); void* (*get_events_stat)(struct hl_device *hdev, bool aggregate, u32 *size); u64 (*read_pte)(struct hl_device *hdev, u64 addr); @@ -1300,8 +1392,6 @@ struct hl_asic_funcs { int (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard, u32 flags, u32 asid, u64 va, u64 size); int (*send_heartbeat)(struct hl_device *hdev); - void (*set_clock_gating)(struct hl_device *hdev); - void (*disable_clock_gating)(struct hl_device *hdev); int (*debug_coresight)(struct hl_device *hdev, struct hl_ctx *ctx, void *data); bool (*is_device_idle)(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, struct seq_file *s); @@ -1320,7 +1410,6 @@ struct hl_asic_funcs { void (*halt_coresight)(struct hl_device *hdev, struct hl_ctx *ctx); int (*ctx_init)(struct hl_ctx *ctx); void (*ctx_fini)(struct hl_ctx *ctx); - int (*get_clk_rate)(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk); u32 (*get_queue_id_for_cq)(struct hl_device *hdev, u32 cq_idx); int (*load_firmware_to_device)(struct hl_device *hdev); int (*load_boot_fit_to_device)(struct hl_device *hdev); @@ -1355,6 +1444,7 @@ struct hl_asic_funcs { u32 (*get_sob_addr)(struct hl_device *hdev, u32 sob_id); void (*set_pci_memory_regions)(struct hl_device *hdev); u32* (*get_stream_master_qid_arr)(void); + bool (*is_valid_dram_page_size)(u32 page_size); }; @@ -1742,6 +1832,8 @@ struct hl_vm_hw_block_list_node { * @pages: the physical page array. * @npages: num physical pages in the pack. * @total_size: total size of all the pages in this list. + * @node: used to attach to deletion list that is used when all the allocations are cleared + * at the teardown of the context. * @mapping_cnt: number of shared mappings. * @exporting_cnt: number of dma-buf exporting. * @asid: the context related to this list. @@ -1757,6 +1849,7 @@ struct hl_vm_phys_pg_pack { u64 *pages; u64 npages; u64 total_size; + struct list_head node; atomic_t mapping_cnt; u32 exporting_cnt; u32 asid; @@ -1834,6 +1927,7 @@ struct hl_debug_params { * @ctx: current executing context. TODO: remove for multiple ctx per process * @ctx_mgr: context manager to handle multiple context for this FD. * @cb_mgr: command buffer manager to handle multiple buffers for this FD. + * @ts_mem_mgr: timestamp registration manager for alloc/free/map timestamp buffers. * @debugfs_list: list of relevant ASIC debugfs. * @dev_node: node in the device list of file private data * @refcount: number of related contexts. @@ -1846,6 +1940,7 @@ struct hl_fpriv { struct hl_ctx *ctx; struct hl_ctx_mgr ctx_mgr; struct hl_cb_mgr cb_mgr; + struct hl_ts_mgr ts_mem_mgr; struct list_head debugfs_list; struct list_head dev_node; struct kref refcount; @@ -2518,7 +2613,7 @@ struct hl_reset_info { * @cq_wq: work queues of completion queues for executing work in process * context. * @eq_wq: work queue of event queue for executing work in process context. - * @sob_reset_wq: work queue for sob reset executions. + * @ts_free_obj_wq: work queue for timestamp registration objects release. * @kernel_ctx: Kernel driver context structure. * @kernel_queues: array of hl_hw_queue. * @cs_mirror_list: CS mirror list for TDR. @@ -2569,9 +2664,6 @@ struct hl_reset_info { * @max_power: the max power of the device, as configured by the sysadmin. This * value is saved so in case of hard-reset, the driver will restore * this value and update the F/W after the re-initialization - * @clock_gating_mask: is clock gating enabled. bitmask that represents the - * different engines. See debugfs-driver-habanalabs for - * details. * @boot_error_status_mask: contains a mask of the device boot error status. * Each bit represents a different error, according to * the defines in hl_boot_if.h. If the bit is cleared, @@ -2611,8 +2703,6 @@ struct hl_reset_info { * @in_debug: whether the device is in a state where the profiling/tracing infrastructure * can be used. This indication is needed because in some ASICs we need to do * specific operations to enable that infrastructure. - * @power9_64bit_dma_enable: true to enable 64-bit DMA mask support. Relevant - * only to POWER9 machines. * @cdev_sysfs_created: were char devices and sysfs nodes created. * @stop_on_err: true if engines should stop on error. * @supports_sync_stream: is sync stream supported. @@ -2651,7 +2741,7 @@ struct hl_device { struct hl_user_interrupt common_user_interrupt; struct workqueue_struct **cq_wq; struct workqueue_struct *eq_wq; - struct workqueue_struct *sob_reset_wq; + struct workqueue_struct *ts_free_obj_wq; struct hl_ctx *kernel_ctx; struct hl_hw_queue *kernel_queues; struct list_head cs_mirror_list; @@ -2710,7 +2800,6 @@ struct hl_device { atomic64_t dram_used_mem; u64 timeout_jiffies; u64 max_power; - u64 clock_gating_mask; u64 boot_error_status_mask; u64 dram_pci_bar_start; u64 last_successful_open_jif; @@ -2736,7 +2825,6 @@ struct hl_device { u8 device_cpu_disabled; u8 dma_mask; u8 in_debug; - u8 power9_64bit_dma_enable; u8 cdev_sysfs_created; u8 stop_on_err; u8 supports_sync_stream; @@ -2970,7 +3058,7 @@ int hl_cb_pool_fini(struct hl_device *hdev); int hl_cb_va_pool_init(struct hl_ctx *ctx); void hl_cb_va_pool_fini(struct hl_ctx *ctx); -void hl_cs_rollback_all(struct hl_device *hdev); +void hl_cs_rollback_all(struct hl_device *hdev, bool skip_wq_flush); struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, enum hl_queue_type queue_type, bool is_kernel_allocated_cb); void hl_sob_reset_error(struct kref *ref); @@ -3024,6 +3112,9 @@ int hl_mmu_unmap_contiguous(struct hl_ctx *ctx, u64 virt_addr, u32 size); int hl_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags); int hl_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard, u32 flags, u32 asid, u64 va, u64 size); +u64 hl_mmu_get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte); +u64 hl_mmu_get_hop_pte_phys_addr(struct hl_ctx *ctx, struct hl_mmu_properties *mmu_prop, + u8 hop_idx, u64 hop_addr, u64 virt_addr); void hl_mmu_swap_out(struct hl_ctx *ctx); void hl_mmu_swap_in(struct hl_ctx *ctx); int hl_mmu_if_set_funcs(struct hl_device *hdev); @@ -3094,39 +3185,26 @@ enum pci_region hl_get_pci_memory_region(struct hl_device *hdev, u64 addr); int hl_pci_init(struct hl_device *hdev); void hl_pci_fini(struct hl_device *hdev); -long hl_get_frequency(struct hl_device *hdev, u32 pll_index, - bool curr); -void hl_set_frequency(struct hl_device *hdev, u32 pll_index, - u64 freq); -int hl_get_temperature(struct hl_device *hdev, - int sensor_index, u32 attr, long *value); -int hl_set_temperature(struct hl_device *hdev, - int sensor_index, u32 attr, long value); -int hl_get_voltage(struct hl_device *hdev, - int sensor_index, u32 attr, long *value); -int hl_get_current(struct hl_device *hdev, - int sensor_index, u32 attr, long *value); -int hl_get_fan_speed(struct hl_device *hdev, - int sensor_index, u32 attr, long *value); -int hl_get_pwm_info(struct hl_device *hdev, - int sensor_index, u32 attr, long *value); -void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, - long value); -u64 hl_get_max_power(struct hl_device *hdev); -void hl_set_max_power(struct hl_device *hdev); -int hl_set_voltage(struct hl_device *hdev, - int sensor_index, u32 attr, long value); -int hl_set_current(struct hl_device *hdev, - int sensor_index, u32 attr, long value); -int hl_set_power(struct hl_device *hdev, - int sensor_index, u32 attr, long value); -int hl_get_power(struct hl_device *hdev, - int sensor_index, u32 attr, long *value); -int hl_get_clk_rate(struct hl_device *hdev, - u32 *cur_clk, u32 *max_clk); -void hl_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq); -void hl_add_device_attr(struct hl_device *hdev, - struct attribute_group *dev_attr_grp); +long hl_fw_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr); +void hl_fw_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq); +int hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr, long *value); +int hl_set_temperature(struct hl_device *hdev, int sensor_index, u32 attr, long value); +int hl_get_voltage(struct hl_device *hdev, int sensor_index, u32 attr, long *value); +int hl_get_current(struct hl_device *hdev, int sensor_index, u32 attr, long *value); +int hl_get_fan_speed(struct hl_device *hdev, int sensor_index, u32 attr, long *value); +int hl_get_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, long *value); +void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, long value); +long hl_fw_get_max_power(struct hl_device *hdev); +void hl_fw_set_max_power(struct hl_device *hdev); +int hl_set_voltage(struct hl_device *hdev, int sensor_index, u32 attr, long value); +int hl_set_current(struct hl_device *hdev, int sensor_index, u32 attr, long value); +int hl_set_power(struct hl_device *hdev, int sensor_index, u32 attr, long value); +int hl_get_power(struct hl_device *hdev, int sensor_index, u32 attr, long *value); +int hl_fw_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk); +void hl_fw_set_pll_profile(struct hl_device *hdev); +void hl_sysfs_add_dev_clk_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp); +void hl_sysfs_add_dev_vrm_attr(struct hl_device *hdev, struct attribute_group *dev_vrm_attr_grp); + void hw_sob_get(struct hl_hw_sob *hw_sob); void hw_sob_put(struct hl_hw_sob *hw_sob); void hl_encaps_handle_do_release(struct kref *ref); @@ -3146,6 +3224,11 @@ __printf(4, 5) int hl_snprintf_resize(char **buf, size_t *size, size_t *offset, const char *format, ...); char *hl_format_as_binary(char *buf, size_t buf_len, u32 n); const char *hl_sync_engine_to_string(enum hl_sync_engine_type engine_type); +void hl_ts_mgr_init(struct hl_ts_mgr *mgr); +void hl_ts_mgr_fini(struct hl_device *hdev, struct hl_ts_mgr *mgr); +int hl_ts_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma); +struct hl_ts_buff *hl_ts_get(struct hl_device *hdev, struct hl_ts_mgr *mgr, u32 handle); +void hl_ts_put(struct hl_ts_buff *buff); #ifdef CONFIG_DEBUG_FS diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c index 690b763c7a95..ca404ed9d9a7 100644 --- a/drivers/misc/habanalabs/common/habanalabs_drv.c +++ b/drivers/misc/habanalabs/common/habanalabs_drv.c @@ -140,6 +140,7 @@ int hl_device_open(struct inode *inode, struct file *filp) hl_cb_mgr_init(&hpriv->cb_mgr); hl_ctx_mgr_init(&hpriv->ctx_mgr); + hl_ts_mgr_init(&hpriv->ts_mem_mgr); hpriv->taskpid = get_task_pid(current, PIDTYPE_PID); @@ -184,6 +185,7 @@ int hl_device_open(struct inode *inode, struct file *filp) out_err: mutex_unlock(&hdev->fpriv_list_lock); hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr); + hl_ts_mgr_fini(hpriv->hdev, &hpriv->ts_mem_mgr); hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr); filp->private_data = NULL; mutex_destroy(&hpriv->restore_phase_mutex); @@ -256,7 +258,6 @@ static void set_driver_behavior_per_device(struct hl_device *hdev) hdev->cpu_queues_enable = 1; hdev->heartbeat = 1; hdev->mmu_enable = 1; - hdev->clock_gating_mask = ULONG_MAX; hdev->sram_scrambler_enable = 1; hdev->dram_scrambler_enable = 1; hdev->bmc_enable = 1; diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index 3ba3a8ffda3e..c13a3c2a7013 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright 2016-2019 HabanaLabs, Ltd. + * Copyright 2016-2022 HabanaLabs, Ltd. * All Rights Reserved. */ @@ -92,8 +92,8 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args) hw_ip.psoc_pci_pll_od = prop->psoc_pci_pll_od; hw_ip.psoc_pci_pll_div_factor = prop->psoc_pci_pll_div_factor; - hw_ip.first_available_interrupt_id = - prop->first_available_user_msix_interrupt; + hw_ip.first_available_interrupt_id = prop->first_available_user_msix_interrupt; + hw_ip.number_of_user_interrupts = prop->user_interrupt_count; hw_ip.server_type = prop->server_type; return copy_to_user(out, &hw_ip, @@ -251,13 +251,12 @@ static int get_clk_rate(struct hl_device *hdev, struct hl_info_args *args) if ((!max_size) || (!out)) return -EINVAL; - rc = hdev->asic_funcs->get_clk_rate(hdev, &clk_rate.cur_clk_rate_mhz, - &clk_rate.max_clk_rate_mhz); + rc = hl_fw_get_clk_rate(hdev, &clk_rate.cur_clk_rate_mhz, &clk_rate.max_clk_rate_mhz); if (rc) return rc; - return copy_to_user(out, &clk_rate, - min((size_t) max_size, sizeof(clk_rate))) ? -EFAULT : 0; + return copy_to_user(out, &clk_rate, min_t(size_t, max_size, sizeof(clk_rate))) + ? -EFAULT : 0; } static int get_reset_count(struct hl_device *hdev, struct hl_info_args *args) diff --git a/drivers/misc/habanalabs/common/hwmgr.c b/drivers/misc/habanalabs/common/hwmgr.c deleted file mode 100644 index 5451019f143f..000000000000 --- a/drivers/misc/habanalabs/common/hwmgr.c +++ /dev/null @@ -1,117 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -/* - * Copyright 2019-2021 HabanaLabs, Ltd. - * All Rights Reserved. - */ - -#include "habanalabs.h" - -void hl_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq) -{ - hl_set_frequency(hdev, hdev->asic_prop.clk_pll_index, - hdev->asic_prop.max_freq_value); -} - -int hl_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk) -{ - long value; - - if (!hl_device_operational(hdev, NULL)) - return -ENODEV; - - value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, false); - - if (value < 0) { - dev_err(hdev->dev, "Failed to retrieve device max clock %ld\n", - value); - return value; - } - - *max_clk = (value / 1000 / 1000); - - value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, true); - - if (value < 0) { - dev_err(hdev->dev, - "Failed to retrieve device current clock %ld\n", - value); - return value; - } - - *cur_clk = (value / 1000 / 1000); - - return 0; -} - -static ssize_t clk_max_freq_mhz_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct hl_device *hdev = dev_get_drvdata(dev); - long value; - - if (!hl_device_operational(hdev, NULL)) - return -ENODEV; - - value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, false); - - hdev->asic_prop.max_freq_value = value; - - return sprintf(buf, "%lu\n", (value / 1000 / 1000)); -} - -static ssize_t clk_max_freq_mhz_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct hl_device *hdev = dev_get_drvdata(dev); - int rc; - u64 value; - - if (!hl_device_operational(hdev, NULL)) { - count = -ENODEV; - goto fail; - } - - rc = kstrtoull(buf, 0, &value); - if (rc) { - count = -EINVAL; - goto fail; - } - - hdev->asic_prop.max_freq_value = value * 1000 * 1000; - - hl_set_frequency(hdev, hdev->asic_prop.clk_pll_index, - hdev->asic_prop.max_freq_value); - -fail: - return count; -} - -static ssize_t clk_cur_freq_mhz_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct hl_device *hdev = dev_get_drvdata(dev); - long value; - - if (!hl_device_operational(hdev, NULL)) - return -ENODEV; - - value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, true); - - return sprintf(buf, "%lu\n", (value / 1000 / 1000)); -} - -static DEVICE_ATTR_RW(clk_max_freq_mhz); -static DEVICE_ATTR_RO(clk_cur_freq_mhz); - -static struct attribute *hl_dev_attrs[] = { - &dev_attr_clk_max_freq_mhz.attr, - &dev_attr_clk_cur_freq_mhz.attr, - NULL, -}; - -void hl_add_device_attr(struct hl_device *hdev, - struct attribute_group *dev_attr_grp) -{ - dev_attr_grp->attrs = hl_dev_attrs; -} diff --git a/drivers/misc/habanalabs/common/irq.c b/drivers/misc/habanalabs/common/irq.c index 1b6bdc900c26..e2bc128f2291 100644 --- a/drivers/misc/habanalabs/common/irq.c +++ b/drivers/misc/habanalabs/common/irq.c @@ -137,22 +137,137 @@ irqreturn_t hl_irq_handler_cq(int irq, void *arg) return IRQ_HANDLED; } +/* + * hl_ts_free_objects - handler of the free objects workqueue. + * This function should put refcount to objects that the registration node + * took refcount to them. + * @work: workqueue object pointer + */ +static void hl_ts_free_objects(struct work_struct *work) +{ + struct timestamp_reg_work_obj *job = + container_of(work, struct timestamp_reg_work_obj, free_obj); + struct timestamp_reg_free_node *free_obj, *temp_free_obj; + struct list_head *free_list_head = job->free_obj_head; + struct hl_device *hdev = job->hdev; + + list_for_each_entry_safe(free_obj, temp_free_obj, free_list_head, free_objects_node) { + dev_dbg(hdev->dev, "About to put refcount to ts_buff (%p) cq_cb(%p)\n", + free_obj->ts_buff, + free_obj->cq_cb); + + hl_ts_put(free_obj->ts_buff); + hl_cb_put(free_obj->cq_cb); + kfree(free_obj); + } + + kfree(free_list_head); + kfree(job); +} + +/* + * This function called with spin_lock of wait_list_lock taken + * This function will set timestamp and delete the registration node from the + * wait_list_lock. + * and since we're protected with spin_lock here, so we cannot just put the refcount + * for the objects here, since the release function may be called and it's also a long + * logic (which might sleep also) that cannot be handled in irq context. + * so here we'll be filling a list with nodes of "put" jobs and then will send this + * list to a dedicated workqueue to do the actual put. + */ +static int handle_registration_node(struct hl_device *hdev, struct hl_user_pending_interrupt *pend, + struct list_head **free_list) +{ + struct timestamp_reg_free_node *free_node; + u64 timestamp; + + if (!(*free_list)) { + /* Alloc/Init the timestamp registration free objects list */ + *free_list = kmalloc(sizeof(struct list_head), GFP_ATOMIC); + if (!(*free_list)) + return -ENOMEM; + + INIT_LIST_HEAD(*free_list); + } + + free_node = kmalloc(sizeof(*free_node), GFP_ATOMIC); + if (!free_node) + return -ENOMEM; + + timestamp = ktime_get_ns(); + + *pend->ts_reg_info.timestamp_kernel_addr = timestamp; + + dev_dbg(hdev->dev, "Timestamp is set to ts cb address (%p), ts: 0x%llx\n", + pend->ts_reg_info.timestamp_kernel_addr, + *(u64 *)pend->ts_reg_info.timestamp_kernel_addr); + + list_del(&pend->wait_list_node); + + /* Mark kernel CB node as free */ + pend->ts_reg_info.in_use = 0; + + /* Putting the refcount for ts_buff and cq_cb objects will be handled + * in workqueue context, just add job to free_list. + */ + free_node->ts_buff = pend->ts_reg_info.ts_buff; + free_node->cq_cb = pend->ts_reg_info.cq_cb; + list_add(&free_node->free_objects_node, *free_list); + + return 0; +} + static void handle_user_cq(struct hl_device *hdev, struct hl_user_interrupt *user_cq) { - struct hl_user_pending_interrupt *pend; + struct hl_user_pending_interrupt *pend, *temp_pend; + struct list_head *ts_reg_free_list_head = NULL; + struct timestamp_reg_work_obj *job; + bool reg_node_handle_fail = false; ktime_t now = ktime_get(); + int rc; + + /* For registration nodes: + * As part of handling the registration nodes, we should put refcount to + * some objects. the problem is that we cannot do that under spinlock + * or in irq handler context at all (since release functions are long and + * might sleep), so we will need to handle that part in workqueue context. + * To avoid handling kmalloc failure which compels us rolling back actions + * and move nodes hanged on the free list back to the interrupt wait list + * we always alloc the job of the WQ at the beginning. + */ + job = kmalloc(sizeof(*job), GFP_ATOMIC); + if (!job) + return; spin_lock(&user_cq->wait_list_lock); - list_for_each_entry(pend, &user_cq->wait_list_head, wait_list_node) { - if ((pend->cq_kernel_addr && - *(pend->cq_kernel_addr) >= pend->cq_target_value) || + list_for_each_entry_safe(pend, temp_pend, &user_cq->wait_list_head, wait_list_node) { + if ((pend->cq_kernel_addr && *(pend->cq_kernel_addr) >= pend->cq_target_value) || !pend->cq_kernel_addr) { - pend->fence.timestamp = now; - complete_all(&pend->fence.completion); + if (pend->ts_reg_info.ts_buff) { + if (!reg_node_handle_fail) { + rc = handle_registration_node(hdev, pend, + &ts_reg_free_list_head); + if (rc) + reg_node_handle_fail = true; + } + } else { + /* Handle wait target value node */ + pend->fence.timestamp = now; + complete_all(&pend->fence.completion); + } } } spin_unlock(&user_cq->wait_list_lock); + + if (ts_reg_free_list_head) { + INIT_WORK(&job->free_obj, hl_ts_free_objects); + job->free_obj_head = ts_reg_free_list_head; + job->hdev = hdev; + queue_work(hdev->ts_free_obj_wq, &job->free_obj); + } else { + kfree(job); + } } /** diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c index c1eefaebacb6..e008d82e4ba3 100644 --- a/drivers/misc/habanalabs/common/memory.c +++ b/drivers/misc/habanalabs/common/memory.c @@ -11,6 +11,7 @@ #include <linux/uaccess.h> #include <linux/slab.h> +#include <linux/vmalloc.h> #include <linux/pci-p2pdma.h> MODULE_IMPORT_NS(DMA_BUF); @@ -20,6 +21,34 @@ MODULE_IMPORT_NS(DMA_BUF); /* use small pages for supporting non-pow2 (32M/40M/48M) DRAM phys page sizes */ #define DRAM_POOL_PAGE_SIZE SZ_8M +static int allocate_timestamps_buffers(struct hl_fpriv *hpriv, + struct hl_mem_in *args, u64 *handle); + +static int set_alloc_page_size(struct hl_device *hdev, struct hl_mem_in *args, u32 *page_size) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + u32 psize; + + /* + * for ASIC that supports setting the allocation page size by user we will address + * user's choice only if it is not 0 (as 0 means taking the default page size) + */ + if (prop->supports_user_set_page_size && args->alloc.page_size) { + psize = args->alloc.page_size; + + if (!hdev->asic_funcs->is_valid_dram_page_size(psize)) { + dev_err(hdev->dev, "user page size (%#x) is not valid\n", psize); + return -EINVAL; + } + } else { + psize = hdev->asic_prop.dram_page_size; + } + + *page_size = psize; + + return 0; +} + /* * The va ranges in context object contain a list with the available chunks of * device virtual memory. @@ -61,11 +90,15 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args, struct hl_vm_phys_pg_pack *phys_pg_pack; u64 paddr = 0, total_size, num_pgs, i; u32 num_curr_pgs, page_size; - int handle, rc; bool contiguous; + int handle, rc; num_curr_pgs = 0; - page_size = hdev->asic_prop.dram_page_size; + + rc = set_alloc_page_size(hdev, args, &page_size); + if (rc) + return rc; + num_pgs = DIV_ROUND_UP_ULL(args->alloc.mem_size, page_size); total_size = num_pgs * page_size; @@ -77,7 +110,11 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args, contiguous = args->flags & HL_MEM_CONTIGUOUS; if (contiguous) { - paddr = (u64) gen_pool_alloc(vm->dram_pg_pool, total_size); + if (is_power_of_2(page_size)) + paddr = (u64) (uintptr_t) gen_pool_dma_alloc_align(vm->dram_pg_pool, + total_size, NULL, page_size); + else + paddr = (u64) (uintptr_t) gen_pool_alloc(vm->dram_pg_pool, total_size); if (!paddr) { dev_err(hdev->dev, "failed to allocate %llu contiguous pages with total size of %llu\n", @@ -111,9 +148,14 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args, phys_pg_pack->pages[i] = paddr + i * page_size; } else { for (i = 0 ; i < num_pgs ; i++) { - phys_pg_pack->pages[i] = (u64) gen_pool_alloc( - vm->dram_pg_pool, - page_size); + if (is_power_of_2(page_size)) + phys_pg_pack->pages[i] = + (u64) gen_pool_dma_alloc_align(vm->dram_pg_pool, + page_size, NULL, + page_size); + else + phys_pg_pack->pages[i] = (u64) gen_pool_alloc(vm->dram_pg_pool, + page_size); if (!phys_pg_pack->pages[i]) { dev_err(hdev->dev, "Failed to allocate device memory (out of memory)\n"); @@ -652,7 +694,7 @@ static u64 get_va_block(struct hl_device *hdev, continue; /* - * In case hint address is 0, and arc_hints_range_reservation + * In case hint address is 0, and hints_range_reservation * property enabled, then avoid allocating va blocks from the * range reserved for hint addresses */ @@ -1967,16 +2009,15 @@ err_dec_exporting_cnt: static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args) { struct hl_device *hdev = hpriv->hdev; - struct hl_ctx *ctx = hpriv->ctx; u64 block_handle, device_addr = 0; + struct hl_ctx *ctx = hpriv->ctx; u32 handle = 0, block_size; - int rc, dmabuf_fd = -EBADF; + int rc; switch (args->in.op) { case HL_MEM_OP_ALLOC: if (args->in.alloc.mem_size == 0) { - dev_err(hdev->dev, - "alloc size must be larger than 0\n"); + dev_err(hdev->dev, "alloc size must be larger than 0\n"); rc = -EINVAL; goto out; } @@ -1997,15 +2038,14 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args) case HL_MEM_OP_MAP: if (args->in.flags & HL_MEM_USERPTR) { - device_addr = args->in.map_host.host_virt_addr; - rc = 0; + dev_err(hdev->dev, "Failed to map host memory when MMU is disabled\n"); + rc = -EPERM; } else { - rc = get_paddr_from_handle(ctx, &args->in, - &device_addr); + rc = get_paddr_from_handle(ctx, &args->in, &device_addr); + memset(args, 0, sizeof(*args)); + args->out.device_virt_addr = device_addr; } - memset(args, 0, sizeof(*args)); - args->out.device_virt_addr = device_addr; break; case HL_MEM_OP_UNMAP: @@ -2013,22 +2053,19 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args) break; case HL_MEM_OP_MAP_BLOCK: - rc = map_block(hdev, args->in.map_block.block_addr, - &block_handle, &block_size); + rc = map_block(hdev, args->in.map_block.block_addr, &block_handle, &block_size); args->out.block_handle = block_handle; args->out.block_size = block_size; break; case HL_MEM_OP_EXPORT_DMABUF_FD: - rc = export_dmabuf_from_addr(ctx, - args->in.export_dmabuf_fd.handle, - args->in.export_dmabuf_fd.mem_size, - args->in.flags, - &dmabuf_fd); - memset(args, 0, sizeof(*args)); - args->out.fd = dmabuf_fd; + dev_err(hdev->dev, "Failed to export dma-buf object when MMU is disabled\n"); + rc = -EPERM; break; + case HL_MEM_OP_TS_ALLOC: + rc = allocate_timestamps_buffers(hpriv, &args->in, &args->out.handle); + break; default: dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n"); rc = -EINVAL; @@ -2039,6 +2076,258 @@ out: return rc; } +static void ts_buff_release(struct kref *ref) +{ + struct hl_ts_buff *buff; + + buff = container_of(ref, struct hl_ts_buff, refcount); + + vfree(buff->kernel_buff_address); + vfree(buff->user_buff_address); + kfree(buff); +} + +struct hl_ts_buff *hl_ts_get(struct hl_device *hdev, struct hl_ts_mgr *mgr, + u32 handle) +{ + struct hl_ts_buff *buff; + + spin_lock(&mgr->ts_lock); + buff = idr_find(&mgr->ts_handles, handle); + if (!buff) { + spin_unlock(&mgr->ts_lock); + dev_warn(hdev->dev, + "TS buff get failed, no match to handle 0x%x\n", handle); + return NULL; + } + kref_get(&buff->refcount); + spin_unlock(&mgr->ts_lock); + + return buff; +} + +void hl_ts_put(struct hl_ts_buff *buff) +{ + kref_put(&buff->refcount, ts_buff_release); +} + +static void buff_vm_close(struct vm_area_struct *vma) +{ + struct hl_ts_buff *buff = (struct hl_ts_buff *) vma->vm_private_data; + long new_mmap_size; + + new_mmap_size = buff->mmap_size - (vma->vm_end - vma->vm_start); + + if (new_mmap_size > 0) { + buff->mmap_size = new_mmap_size; + return; + } + + atomic_set(&buff->mmap, 0); + hl_ts_put(buff); + vma->vm_private_data = NULL; +} + +static const struct vm_operations_struct ts_buff_vm_ops = { + .close = buff_vm_close +}; + +int hl_ts_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma) +{ + struct hl_device *hdev = hpriv->hdev; + struct hl_ts_buff *buff; + u32 handle, user_buff_size; + int rc; + + /* We use the page offset to hold the idr and thus we need to clear + * it before doing the mmap itself + */ + handle = vma->vm_pgoff; + vma->vm_pgoff = 0; + + buff = hl_ts_get(hdev, &hpriv->ts_mem_mgr, handle); + if (!buff) { + dev_err(hdev->dev, + "TS buff mmap failed, no match to handle 0x%x\n", handle); + return -EINVAL; + } + + /* Validation check */ + user_buff_size = vma->vm_end - vma->vm_start; + if (user_buff_size != ALIGN(buff->user_buff_size, PAGE_SIZE)) { + dev_err(hdev->dev, + "TS buff mmap failed, mmap size 0x%x != 0x%x buff size\n", + user_buff_size, ALIGN(buff->user_buff_size, PAGE_SIZE)); + rc = -EINVAL; + goto put_buff; + } + +#ifdef _HAS_TYPE_ARG_IN_ACCESS_OK + if (!access_ok(VERIFY_WRITE, + (void __user *) (uintptr_t) vma->vm_start, user_buff_size)) { +#else + if (!access_ok((void __user *) (uintptr_t) vma->vm_start, + user_buff_size)) { +#endif + dev_err(hdev->dev, + "user pointer is invalid - 0x%lx\n", + vma->vm_start); + + rc = -EINVAL; + goto put_buff; + } + + if (atomic_cmpxchg(&buff->mmap, 0, 1)) { + dev_err(hdev->dev, "TS buff memory mmap failed, already mmaped to user\n"); + rc = -EINVAL; + goto put_buff; + } + + vma->vm_ops = &ts_buff_vm_ops; + vma->vm_private_data = buff; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY | VM_NORESERVE; + rc = remap_vmalloc_range(vma, buff->user_buff_address, 0); + if (rc) { + atomic_set(&buff->mmap, 0); + goto put_buff; + } + + buff->mmap_size = buff->user_buff_size; + vma->vm_pgoff = handle; + + return 0; + +put_buff: + hl_ts_put(buff); + return rc; +} + +void hl_ts_mgr_init(struct hl_ts_mgr *mgr) +{ + spin_lock_init(&mgr->ts_lock); + idr_init(&mgr->ts_handles); +} + +void hl_ts_mgr_fini(struct hl_device *hdev, struct hl_ts_mgr *mgr) +{ + struct hl_ts_buff *buff; + struct idr *idp; + u32 id; + + idp = &mgr->ts_handles; + + idr_for_each_entry(idp, buff, id) { + if (kref_put(&buff->refcount, ts_buff_release) != 1) + dev_err(hdev->dev, "TS buff handle %d for CTX is still alive\n", + id); + } + + idr_destroy(&mgr->ts_handles); +} + +static struct hl_ts_buff *hl_ts_alloc_buff(struct hl_device *hdev, u32 num_elements) +{ + struct hl_ts_buff *ts_buff = NULL; + u32 size; + void *p; + + ts_buff = kzalloc(sizeof(*ts_buff), GFP_KERNEL); + if (!ts_buff) + return NULL; + + /* Allocate the user buffer */ + size = num_elements * sizeof(u64); + p = vmalloc_user(size); + if (!p) + goto free_mem; + + ts_buff->user_buff_address = p; + ts_buff->user_buff_size = size; + + /* Allocate the internal kernel buffer */ + size = num_elements * sizeof(struct hl_user_pending_interrupt); + p = vmalloc(size); + if (!p) + goto free_user_buff; + + ts_buff->kernel_buff_address = p; + ts_buff->kernel_buff_size = size; + + return ts_buff; + +free_user_buff: + vfree(ts_buff->user_buff_address); +free_mem: + kfree(ts_buff); + return NULL; +} + +/** + * allocate_timestamps_buffers() - allocate timestamps buffers + * This function will allocate ts buffer that will later on be mapped to the user + * in order to be able to read the timestamp. + * in additon it'll allocate an extra buffer for registration management. + * since we cannot fail during registration for out-of-memory situation, so + * we'll prepare a pool which will be used as user interrupt nodes and instead + * of dynamically allocating nodes while registration we'll pick the node from + * this pool. in addtion it'll add node to the mapping hash which will be used + * to map user ts buffer to the internal kernel ts buffer. + * @hpriv: pointer to the private data of the fd + * @args: ioctl input + * @handle: user timestamp buffer handle as an output + */ +static int allocate_timestamps_buffers(struct hl_fpriv *hpriv, struct hl_mem_in *args, u64 *handle) +{ + struct hl_ts_mgr *ts_mgr = &hpriv->ts_mem_mgr; + struct hl_device *hdev = hpriv->hdev; + struct hl_ts_buff *ts_buff; + int rc = 0; + + if (args->num_of_elements > TS_MAX_ELEMENTS_NUM) { + dev_err(hdev->dev, "Num of elements exceeds Max allowed number (0x%x > 0x%x)\n", + args->num_of_elements, TS_MAX_ELEMENTS_NUM); + return -EINVAL; + } + + /* Allocate ts buffer object + * This object will contain two buffers one that will be mapped to the user + * and another internal buffer for the driver use only, which won't be mapped + * to the user. + */ + ts_buff = hl_ts_alloc_buff(hdev, args->num_of_elements); + if (!ts_buff) { + rc = -ENOMEM; + goto out_err; + } + + spin_lock(&ts_mgr->ts_lock); + rc = idr_alloc(&ts_mgr->ts_handles, ts_buff, 1, 0, GFP_ATOMIC); + spin_unlock(&ts_mgr->ts_lock); + if (rc < 0) { + dev_err(hdev->dev, "Failed to allocate IDR for a new ts buffer\n"); + goto release_ts_buff; + } + + ts_buff->id = rc; + ts_buff->hdev = hdev; + + kref_init(&ts_buff->refcount); + + /* idr is 32-bit so we can safely OR it with a mask that is above 32 bit */ + *handle = (u64) ts_buff->id | HL_MMAP_TYPE_TS_BUFF; + *handle <<= PAGE_SHIFT; + + dev_dbg(hdev->dev, "Created ts buff object handle(%u)\n", ts_buff->id); + + return 0; + +release_ts_buff: + kref_put(&ts_buff->refcount, ts_buff_release); +out_err: + *handle = 0; + return rc; +} + int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data) { enum hl_device_status status; @@ -2154,6 +2443,9 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data) args->out.fd = dmabuf_fd; break; + case HL_MEM_OP_TS_ALLOC: + rc = allocate_timestamps_buffers(hpriv, &args->in, &args->out.handle); + break; default: dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n"); rc = -EINVAL; @@ -2607,11 +2899,12 @@ int hl_vm_ctx_init(struct hl_ctx *ctx) */ void hl_vm_ctx_fini(struct hl_ctx *ctx) { + struct hl_vm_phys_pg_pack *phys_pg_list, *tmp_phys_node; struct hl_device *hdev = ctx->hdev; - struct hl_vm *vm = &hdev->vm; - struct hl_vm_phys_pg_pack *phys_pg_list; struct hl_vm_hash_node *hnode; + struct hl_vm *vm = &hdev->vm; struct hlist_node *tmp_node; + struct list_head free_list; struct hl_mem_in args; int i; @@ -2644,19 +2937,24 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx) mutex_unlock(&ctx->mmu_lock); + INIT_LIST_HEAD(&free_list); + spin_lock(&vm->idr_lock); idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_list, i) if (phys_pg_list->asid == ctx->asid) { dev_dbg(hdev->dev, "page list 0x%px of asid %d is still alive\n", phys_pg_list, ctx->asid); - atomic64_sub(phys_pg_list->total_size, - &hdev->dram_used_mem); - free_phys_pg_pack(hdev, phys_pg_list); + + atomic64_sub(phys_pg_list->total_size, &hdev->dram_used_mem); idr_remove(&vm->phys_pg_pack_handles, i); + list_add(&phys_pg_list->node, &free_list); } spin_unlock(&vm->idr_lock); + list_for_each_entry_safe(phys_pg_list, tmp_phys_node, &free_list, node) + free_phys_pg_pack(hdev, phys_pg_list); + va_range_fini(hdev, ctx->va_range[HL_VA_RANGE_TYPE_DRAM]); va_range_fini(hdev, ctx->va_range[HL_VA_RANGE_TYPE_HOST]); diff --git a/drivers/misc/habanalabs/common/mmu/mmu.c b/drivers/misc/habanalabs/common/mmu/mmu.c index 9153a1f55175..810b73421ce1 100644 --- a/drivers/misc/habanalabs/common/mmu/mmu.c +++ b/drivers/misc/habanalabs/common/mmu/mmu.c @@ -662,3 +662,58 @@ int hl_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard, return rc; } +u64 hl_mmu_get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte) +{ + return (curr_pte & PAGE_PRESENT_MASK) ? (curr_pte & HOP_PHYS_ADDR_MASK) : ULLONG_MAX; +} + +/** + * hl_mmu_get_hop_pte_phys_addr() - extract PTE address from HOP + * @ctx: pointer to the context structure to initialize. + * @hop_idx: HOP index. + * @hop_addr: HOP address. + * @virt_addr: virtual address fro the translation. + * + * @return the matching PTE value on success, otherwise U64_MAX. + */ +u64 hl_mmu_get_hop_pte_phys_addr(struct hl_ctx *ctx, struct hl_mmu_properties *mmu_prop, + u8 hop_idx, u64 hop_addr, u64 virt_addr) +{ + u64 mask, shift; + + if (hop_idx >= mmu_prop->num_hops) { + dev_err_ratelimited(ctx->hdev->dev, "Invalid hop index %d\n", hop_idx); + return U64_MAX; + } + + /* currently max number of HOPs is 6 */ + switch (hop_idx) { + case 0: + mask = mmu_prop->hop0_mask; + shift = mmu_prop->hop0_shift; + break; + case 1: + mask = mmu_prop->hop1_mask; + shift = mmu_prop->hop1_shift; + break; + case 2: + mask = mmu_prop->hop2_mask; + shift = mmu_prop->hop2_shift; + break; + case 3: + mask = mmu_prop->hop3_mask; + shift = mmu_prop->hop3_shift; + break; + case 4: + mask = mmu_prop->hop4_mask; + shift = mmu_prop->hop4_shift; + break; + default: + mask = mmu_prop->hop5_mask; + shift = mmu_prop->hop5_shift; + break; + } + + return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * ((virt_addr & mask) >> shift); +} + diff --git a/drivers/misc/habanalabs/common/mmu/mmu_v1.c b/drivers/misc/habanalabs/common/mmu/mmu_v1.c index 6134b6ae7615..d03786d0c407 100644 --- a/drivers/misc/habanalabs/common/mmu/mmu_v1.c +++ b/drivers/misc/habanalabs/common/mmu/mmu_v1.c @@ -217,18 +217,10 @@ static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, mmu_prop->hop4_shift); } -static inline u64 get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte) -{ - if (curr_pte & PAGE_PRESENT_MASK) - return curr_pte & HOP_PHYS_ADDR_MASK; - else - return ULLONG_MAX; -} - static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte, bool *is_new_hop) { - u64 hop_addr = get_next_hop_addr(ctx, curr_pte); + u64 hop_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte); if (hop_addr == ULLONG_MAX) { hop_addr = alloc_hop(ctx); @@ -467,7 +459,7 @@ static void hl_mmu_v1_fini(struct hl_device *hdev) { /* MMU H/W fini was already done in device hw_fini() */ - if (!ZERO_OR_NULL_PTR(hdev->mmu_priv.hr.mmu_shadow_hop0)) { + if (!ZERO_OR_NULL_PTR(hdev->mmu_priv.dr.mmu_shadow_hop0)) { kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0); gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool); @@ -546,7 +538,7 @@ static int _hl_mmu_v1_unmap(struct hl_ctx *ctx, curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr; - hop1_addr = get_next_hop_addr(ctx, curr_pte); + hop1_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte); if (hop1_addr == ULLONG_MAX) goto not_mapped; @@ -555,7 +547,7 @@ static int _hl_mmu_v1_unmap(struct hl_ctx *ctx, curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr; - hop2_addr = get_next_hop_addr(ctx, curr_pte); + hop2_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte); if (hop2_addr == ULLONG_MAX) goto not_mapped; @@ -564,7 +556,7 @@ static int _hl_mmu_v1_unmap(struct hl_ctx *ctx, curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr; - hop3_addr = get_next_hop_addr(ctx, curr_pte); + hop3_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte); if (hop3_addr == ULLONG_MAX) goto not_mapped; @@ -582,7 +574,7 @@ static int _hl_mmu_v1_unmap(struct hl_ctx *ctx, } if (!is_huge) { - hop4_addr = get_next_hop_addr(ctx, curr_pte); + hop4_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte); if (hop4_addr == ULLONG_MAX) goto not_mapped; @@ -845,27 +837,6 @@ static void hl_mmu_v1_swap_in(struct hl_ctx *ctx) } -static inline u64 get_hop_pte_addr(struct hl_ctx *ctx, - struct hl_mmu_properties *mmu_prop, - int hop_num, u64 hop_addr, u64 virt_addr) -{ - switch (hop_num) { - case 0: - return get_hop0_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); - case 1: - return get_hop1_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); - case 2: - return get_hop2_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); - case 3: - return get_hop3_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); - case 4: - return get_hop4_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); - default: - break; - } - return U64_MAX; -} - static int hl_mmu_v1_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, struct hl_mmu_hop_info *hops) { @@ -906,7 +877,7 @@ static int hl_mmu_v1_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, hops->hop_info[0].hop_addr = get_phys_hop0_addr(ctx); hops->hop_info[0].hop_pte_addr = - get_hop_pte_addr(ctx, mmu_prop, 0, + hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, 0, hops->hop_info[0].hop_addr, virt_addr); hops->hop_info[0].hop_pte_val = hdev->asic_funcs->read_pte(hdev, @@ -914,13 +885,13 @@ static int hl_mmu_v1_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, for (i = 1 ; i < used_hops ; i++) { hops->hop_info[i].hop_addr = - get_next_hop_addr(ctx, + hl_mmu_get_next_hop_addr(ctx, hops->hop_info[i - 1].hop_pte_val); if (hops->hop_info[i].hop_addr == ULLONG_MAX) return -EFAULT; hops->hop_info[i].hop_pte_addr = - get_hop_pte_addr(ctx, mmu_prop, i, + hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, i, hops->hop_info[i].hop_addr, virt_addr); hops->hop_info[i].hop_pte_val = diff --git a/drivers/misc/habanalabs/common/pci/pci.c b/drivers/misc/habanalabs/common/pci/pci.c index 0b5366cc84fd..bb9ce22bafc4 100644 --- a/drivers/misc/habanalabs/common/pci/pci.c +++ b/drivers/misc/habanalabs/common/pci/pci.c @@ -338,10 +338,7 @@ int hl_pci_set_outbound_region(struct hl_device *hdev, lower_32_bits(outbound_region_end_address)); rc |= hl_pci_iatu_write(hdev, 0x014, 0); - if ((hdev->power9_64bit_dma_enable) && (hdev->dma_mask == 64)) - rc |= hl_pci_iatu_write(hdev, 0x018, 0x08000000); - else - rc |= hl_pci_iatu_write(hdev, 0x018, 0); + rc |= hl_pci_iatu_write(hdev, 0x018, 0); rc |= hl_pci_iatu_write(hdev, 0x020, upper_32_bits(outbound_region_end_address)); @@ -411,13 +408,13 @@ int hl_pci_init(struct hl_device *hdev) rc = hdev->asic_funcs->pci_bars_map(hdev); if (rc) { - dev_err(hdev->dev, "Failed to initialize PCI BARs\n"); + dev_err(hdev->dev, "Failed to map PCI BAR addresses\n"); goto disable_device; } rc = hdev->asic_funcs->init_iatu(hdev); if (rc) { - dev_err(hdev->dev, "Failed to initialize iATU\n"); + dev_err(hdev->dev, "PCI controller was not initialized successfully\n"); goto unmap_pci_bars; } diff --git a/drivers/misc/habanalabs/common/sysfs.c b/drivers/misc/habanalabs/common/sysfs.c index 45c715325e2a..9ebeb18ab85e 100644 --- a/drivers/misc/habanalabs/common/sysfs.c +++ b/drivers/misc/habanalabs/common/sysfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright 2016-2019 HabanaLabs, Ltd. + * Copyright 2016-2022 HabanaLabs, Ltd. * All Rights Reserved. */ @@ -9,105 +9,91 @@ #include <linux/pci.h> -long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) +static ssize_t clk_max_freq_mhz_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct cpucp_packet pkt; - u32 used_pll_idx; - u64 result; - int rc; - - rc = get_used_pll_index(hdev, pll_index, &used_pll_idx); - if (rc) - return rc; - - memset(&pkt, 0, sizeof(pkt)); + struct hl_device *hdev = dev_get_drvdata(dev); + long value; - if (curr) - pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_CURR_GET << - CPUCP_PKT_CTL_OPCODE_SHIFT); - else - pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_GET << - CPUCP_PKT_CTL_OPCODE_SHIFT); - pkt.pll_index = cpu_to_le32((u32)used_pll_idx); + if (!hl_device_operational(hdev, NULL)) + return -ENODEV; - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - 0, &result); + value = hl_fw_get_frequency(hdev, hdev->asic_prop.clk_pll_index, false); + if (value < 0) + return value; - if (rc) { - dev_err(hdev->dev, - "Failed to get frequency of PLL %d, error %d\n", - used_pll_idx, rc); - return rc; - } + hdev->asic_prop.max_freq_value = value; - return (long) result; + return sprintf(buf, "%lu\n", (value / 1000 / 1000)); } -void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq) +static ssize_t clk_max_freq_mhz_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) { - struct cpucp_packet pkt; - u32 used_pll_idx; + struct hl_device *hdev = dev_get_drvdata(dev); int rc; + u64 value; - rc = get_used_pll_index(hdev, pll_index, &used_pll_idx); - if (rc) - return; + if (!hl_device_operational(hdev, NULL)) { + count = -ENODEV; + goto fail; + } - memset(&pkt, 0, sizeof(pkt)); + rc = kstrtoull(buf, 0, &value); + if (rc) { + count = -EINVAL; + goto fail; + } - pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_SET << - CPUCP_PKT_CTL_OPCODE_SHIFT); - pkt.pll_index = cpu_to_le32((u32)used_pll_idx); - pkt.value = cpu_to_le64(freq); + hdev->asic_prop.max_freq_value = value * 1000 * 1000; - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - 0, NULL); + hl_fw_set_frequency(hdev, hdev->asic_prop.clk_pll_index, hdev->asic_prop.max_freq_value); - if (rc) - dev_err(hdev->dev, - "Failed to set frequency to PLL %d, error %d\n", - used_pll_idx, rc); +fail: + return count; } -u64 hl_get_max_power(struct hl_device *hdev) +static ssize_t clk_cur_freq_mhz_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct cpucp_packet pkt; - u64 result; - int rc; + struct hl_device *hdev = dev_get_drvdata(dev); + long value; - memset(&pkt, 0, sizeof(pkt)); + if (!hl_device_operational(hdev, NULL)) + return -ENODEV; - pkt.ctl = cpu_to_le32(CPUCP_PACKET_MAX_POWER_GET << - CPUCP_PKT_CTL_OPCODE_SHIFT); + value = hl_fw_get_frequency(hdev, hdev->asic_prop.clk_pll_index, true); + if (value < 0) + return value; - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - 0, &result); + return sprintf(buf, "%lu\n", (value / 1000 / 1000)); +} - if (rc) { - dev_err(hdev->dev, "Failed to get max power, error %d\n", rc); - return (u64) rc; - } +static DEVICE_ATTR_RW(clk_max_freq_mhz); +static DEVICE_ATTR_RO(clk_cur_freq_mhz); - return result; -} +static struct attribute *hl_dev_clk_attrs[] = { + &dev_attr_clk_max_freq_mhz.attr, + &dev_attr_clk_cur_freq_mhz.attr, +}; -void hl_set_max_power(struct hl_device *hdev) +static ssize_t vrm_ver_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct cpucp_packet pkt; - int rc; + struct hl_device *hdev = dev_get_drvdata(dev); + struct cpucp_info *cpucp_info; - memset(&pkt, 0, sizeof(pkt)); + cpucp_info = &hdev->asic_prop.cpucp_info; - pkt.ctl = cpu_to_le32(CPUCP_PACKET_MAX_POWER_SET << - CPUCP_PKT_CTL_OPCODE_SHIFT); - pkt.value = cpu_to_le64(hdev->max_power); + if (cpucp_info->infineon_second_stage_version) + return sprintf(buf, "%#04x %#04x\n", le32_to_cpu(cpucp_info->infineon_version), + le32_to_cpu(cpucp_info->infineon_second_stage_version)); + else + return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version)); +} - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - 0, NULL); +static DEVICE_ATTR_RO(vrm_ver); - if (rc) - dev_err(hdev->dev, "Failed to set max power, error %d\n", rc); -} +static struct attribute *hl_dev_vrm_attrs[] = { + &dev_attr_vrm_ver.attr, +}; static ssize_t uboot_ver_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -158,20 +144,6 @@ static ssize_t cpucp_ver_show(struct device *dev, struct device_attribute *attr, return sprintf(buf, "%s\n", hdev->asic_prop.cpucp_info.cpucp_version); } -static ssize_t infineon_ver_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct hl_device *hdev = dev_get_drvdata(dev); - - if (hdev->asic_prop.cpucp_info.infineon_second_stage_version) - return sprintf(buf, "%#04x %#04x\n", - le32_to_cpu(hdev->asic_prop.cpucp_info.infineon_version), - le32_to_cpu(hdev->asic_prop.cpucp_info.infineon_second_stage_version)); - else - return sprintf(buf, "%#04x\n", - le32_to_cpu(hdev->asic_prop.cpucp_info.infineon_version)); -} - static ssize_t fuse_ver_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -188,6 +160,14 @@ static ssize_t thermal_ver_show(struct device *dev, return sprintf(buf, "%s", hdev->asic_prop.cpucp_info.thermal_version); } +static ssize_t fw_os_ver_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + + return sprintf(buf, "%s", hdev->asic_prop.cpucp_info.fw_os_version); +} + static ssize_t preboot_btl_ver_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -323,7 +303,9 @@ static ssize_t max_power_show(struct device *dev, struct device_attribute *attr, if (!hl_device_operational(hdev, NULL)) return -ENODEV; - val = hl_get_max_power(hdev); + val = hl_fw_get_max_power(hdev); + if (val < 0) + return val; return sprintf(buf, "%lu\n", val); } @@ -348,7 +330,7 @@ static ssize_t max_power_store(struct device *dev, } hdev->max_power = value; - hl_set_max_power(hdev); + hl_fw_set_max_power(hdev); out: return count; @@ -394,7 +376,6 @@ static DEVICE_ATTR_RO(device_type); static DEVICE_ATTR_RO(fuse_ver); static DEVICE_ATTR_WO(hard_reset); static DEVICE_ATTR_RO(hard_reset_cnt); -static DEVICE_ATTR_RO(infineon_ver); static DEVICE_ATTR_RW(max_power); static DEVICE_ATTR_RO(pci_addr); static DEVICE_ATTR_RO(preboot_btl_ver); @@ -403,6 +384,7 @@ static DEVICE_ATTR_RO(soft_reset_cnt); static DEVICE_ATTR_RO(status); static DEVICE_ATTR_RO(thermal_ver); static DEVICE_ATTR_RO(uboot_ver); +static DEVICE_ATTR_RO(fw_os_ver); static struct bin_attribute bin_attr_eeprom = { .attr = {.name = "eeprom", .mode = (0444)}, @@ -420,13 +402,13 @@ static struct attribute *hl_dev_attrs[] = { &dev_attr_fuse_ver.attr, &dev_attr_hard_reset.attr, &dev_attr_hard_reset_cnt.attr, - &dev_attr_infineon_ver.attr, &dev_attr_max_power.attr, &dev_attr_pci_addr.attr, &dev_attr_preboot_btl_ver.attr, &dev_attr_status.attr, &dev_attr_thermal_ver.attr, &dev_attr_uboot_ver.attr, + &dev_attr_fw_os_ver.attr, NULL, }; @@ -441,10 +423,12 @@ static struct attribute_group hl_dev_attr_group = { }; static struct attribute_group hl_dev_clks_attr_group; +static struct attribute_group hl_dev_vrm_attr_group; static const struct attribute_group *hl_dev_attr_groups[] = { &hl_dev_attr_group, &hl_dev_clks_attr_group, + &hl_dev_vrm_attr_group, NULL, }; @@ -463,13 +447,23 @@ static const struct attribute_group *hl_dev_inference_attr_groups[] = { NULL, }; +void hl_sysfs_add_dev_clk_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp) +{ + dev_clk_attr_grp->attrs = hl_dev_clk_attrs; +} + +void hl_sysfs_add_dev_vrm_attr(struct hl_device *hdev, struct attribute_group *dev_vrm_attr_grp) +{ + dev_vrm_attr_grp->attrs = hl_dev_vrm_attrs; +} + int hl_sysfs_init(struct hl_device *hdev) { int rc; hdev->max_power = hdev->asic_prop.max_power_default; - hdev->asic_funcs->add_device_attr(hdev, &hl_dev_clks_attr_group); + hdev->asic_funcs->add_device_attr(hdev, &hl_dev_clks_attr_group, &hl_dev_vrm_attr_group); rc = device_add_groups(hdev->dev, hl_dev_attr_groups); if (rc) { diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 013c6da2e3ca..21c2b678ff72 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright 2016-2021 HabanaLabs, Ltd. + * Copyright 2016-2022 HabanaLabs, Ltd. * All Rights Reserved. */ @@ -458,7 +458,6 @@ struct ecc_info_extract_params { u64 block_address; u32 num_memories; bool derr; - bool disable_clock_gating; }; static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, @@ -614,6 +613,9 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev) prop->pmmu.page_size = PAGE_SIZE_4KB; prop->pmmu.num_hops = MMU_ARCH_5_HOPS; prop->pmmu.last_mask = LAST_MASK; + /* TODO: will be duplicated until implementing per-MMU props */ + prop->pmmu.hop_table_size = prop->mmu_hop_table_size; + prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size; /* PMMU and HPMMU are the same except of page size */ memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu)); @@ -667,6 +669,10 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev) prop->use_get_power_for_reset_history = true; + prop->configurable_stop_on_err = true; + + prop->set_max_power_on_device_init = true; + return 0; } @@ -1636,7 +1642,7 @@ static int gaudi_late_init(struct hl_device *hdev) */ gaudi_mmu_prepare(hdev, 1); - hdev->asic_funcs->set_pll_profile(hdev, PLL_LAST); + hl_fw_set_pll_profile(hdev); return 0; @@ -1896,7 +1902,6 @@ static int gaudi_sw_init(struct hl_device *hdev) goto free_cpu_accessible_dma_pool; spin_lock_init(&gaudi->hw_queues_lock); - mutex_init(&gaudi->clk_gate_mutex); hdev->supports_sync_stream = true; hdev->supports_coresight = true; @@ -1946,8 +1951,6 @@ static int gaudi_sw_fini(struct hl_device *hdev) dma_pool_destroy(hdev->dma_pool); - mutex_destroy(&gaudi->clk_gate_mutex); - kfree(gaudi); return 0; @@ -3738,76 +3741,8 @@ static void gaudi_tpc_stall(struct hl_device *hdev) WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); } -static void gaudi_set_clock_gating(struct hl_device *hdev) -{ - struct gaudi_device *gaudi = hdev->asic_specific; - u32 qman_offset; - bool enable; - int i; - - /* In case we are during debug session, don't enable the clock gate - * as it may interfere - */ - if (hdev->in_debug) - return; - - if (hdev->asic_prop.fw_security_enabled) - return; - - for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) { - enable = !!(hdev->clock_gating_mask & - (BIT_ULL(gaudi_dma_assignment[i]))); - - qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET; - WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, - enable ? QMAN_CGM1_PWR_GATE_EN : 0); - WREG32(mmDMA0_QM_CGM_CFG + qman_offset, - enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0); - } - - for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) { - enable = !!(hdev->clock_gating_mask & - (BIT_ULL(gaudi_dma_assignment[i]))); - - /* GC sends work to DMA engine through Upper CP in DMA5 so - * we need to not enable clock gating in that DMA - */ - if (i == GAUDI_HBM_DMA_4) - enable = 0; - - qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET; - WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, - enable ? QMAN_CGM1_PWR_GATE_EN : 0); - WREG32(mmDMA0_QM_CGM_CFG + qman_offset, - enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0); - } - - enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0))); - WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0); - WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0); - - enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2))); - WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0); - WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0); - - for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { - enable = !!(hdev->clock_gating_mask & - (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i))); - - WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, - enable ? QMAN_CGM1_PWR_GATE_EN : 0); - WREG32(mmTPC0_QM_CGM_CFG + qman_offset, - enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0); - - qman_offset += TPC_QMAN_OFFSET; - } - - gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE; -} - static void gaudi_disable_clock_gating(struct hl_device *hdev) { - struct gaudi_device *gaudi = hdev->asic_specific; u32 qman_offset; int i; @@ -3832,8 +3767,6 @@ static void gaudi_disable_clock_gating(struct hl_device *hdev) qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG); } - - gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE); } static void gaudi_enable_timestamp(struct hl_device *hdev) @@ -3876,8 +3809,6 @@ static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_ gaudi_stop_hbm_dma_qmans(hdev); gaudi_stop_pci_dma_qmans(hdev); - hdev->asic_funcs->disable_clock_gating(hdev); - msleep(wait_timeout_ms); gaudi_pci_dma_stall(hdev); @@ -3931,7 +3862,7 @@ static int gaudi_mmu_init(struct hl_device *hdev) /* mem cache invalidation */ WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1); - hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0); + hl_mmu_invalidate_cache(hdev, true, 0); WREG32(mmMMU_UP_MMU_ENABLE, 1); WREG32(mmMMU_UP_SPI_MASK, 0xF); @@ -4203,10 +4134,8 @@ static int gaudi_hw_init(struct hl_device *hdev) /* In case the clock gating was enabled in preboot we need to disable * it here before touching the MME/TPC registers. - * There is no need to take clk gating mutex because when this function - * runs, no other relevant code can run */ - hdev->asic_funcs->disable_clock_gating(hdev); + gaudi_disable_clock_gating(hdev); /* SRAM scrambler must be initialized after CPU is running from HBM */ gaudi_init_scrambler_sram(hdev); @@ -4232,8 +4161,6 @@ static int gaudi_hw_init(struct hl_device *hdev) gaudi_init_nic_qmans(hdev); - hdev->asic_funcs->set_clock_gating(hdev); - gaudi_enable_timestamp(hdev); /* MSI must be enabled before CPU queues and NIC are initialized */ @@ -4400,14 +4327,11 @@ skip_reset: status); if (gaudi) { - gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | - HW_CAP_HBM | HW_CAP_PCI_DMA | - HW_CAP_MME | HW_CAP_TPC_MASK | - HW_CAP_HBM_DMA | HW_CAP_PLL | - HW_CAP_NIC_MASK | HW_CAP_MMU | - HW_CAP_SRAM_SCRAMBLER | - HW_CAP_HBM_SCRAMBLER | - HW_CAP_CLK_GATE); + gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM | + HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK | + HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK | + HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER | + HW_CAP_HBM_SCRAMBLER); memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat)); @@ -4884,7 +4808,6 @@ static int gaudi_hbm_scrubbing(struct hl_device *hdev) static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size) { struct asic_fixed_properties *prop = &hdev->asic_prop; - struct gaudi_device *gaudi = hdev->asic_specific; int rc = 0; u64 val = 0; @@ -4919,17 +4842,11 @@ static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size) return rc; } - mutex_lock(&gaudi->clk_gate_mutex); - hdev->asic_funcs->disable_clock_gating(hdev); - /* Scrub HBM using all DMA channels in parallel */ rc = gaudi_hbm_scrubbing(hdev); if (rc) dev_err(hdev->dev, "Failed to clear HBM in mem scrub all\n"); - - hdev->asic_funcs->set_clock_gating(hdev); - mutex_unlock(&gaudi->clk_gate_mutex); } return rc; @@ -6188,7 +6105,6 @@ static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, bool user_address, u32 *val) { struct asic_fixed_properties *prop = &hdev->asic_prop; - struct gaudi_device *gaudi = hdev->asic_specific; u64 hbm_bar_addr, host_phys_end; int rc = 0; @@ -6196,38 +6112,31 @@ static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) { - if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && - (hdev->clock_gating_mask & - GAUDI_CLK_GATE_DEBUGFS_MASK)) { + *val = RREG32(addr - CFG_BASE); - dev_err_ratelimited(hdev->dev, - "Can't read register - clock gating is enabled!\n"); - rc = -EFAULT; - } else { - *val = RREG32(addr - CFG_BASE); - } + } else if ((addr >= SRAM_BASE_ADDR) && (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) { + + *val = readl(hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR)); - } else if ((addr >= SRAM_BASE_ADDR) && - (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) { - *val = readl(hdev->pcie_bar[SRAM_BAR_ID] + - (addr - SRAM_BASE_ADDR)); } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) { - u64 bar_base_addr = DRAM_PHYS_BASE + - (addr & ~(prop->dram_pci_bar_size - 0x1ull)); + + u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull)); hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr); - if (hbm_bar_addr != U64_MAX) { - *val = readl(hdev->pcie_bar[HBM_BAR_ID] + - (addr - bar_base_addr)); - hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, - hbm_bar_addr); + if (hbm_bar_addr != U64_MAX) { + *val = readl(hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr)); + hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr); } + if (hbm_bar_addr == U64_MAX) rc = -EIO; + } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end && user_address && !iommu_present(&pci_bus_type)) { + *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE); + } else { rc = -EFAULT; } @@ -6239,7 +6148,6 @@ static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, bool user_address, u32 val) { struct asic_fixed_properties *prop = &hdev->asic_prop; - struct gaudi_device *gaudi = hdev->asic_specific; u64 hbm_bar_addr, host_phys_end; int rc = 0; @@ -6247,38 +6155,31 @@ static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) { - if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && - (hdev->clock_gating_mask & - GAUDI_CLK_GATE_DEBUGFS_MASK)) { + WREG32(addr - CFG_BASE, val); - dev_err_ratelimited(hdev->dev, - "Can't write register - clock gating is enabled!\n"); - rc = -EFAULT; - } else { - WREG32(addr - CFG_BASE, val); - } + } else if ((addr >= SRAM_BASE_ADDR) && (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) { + + writel(val, hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR)); - } else if ((addr >= SRAM_BASE_ADDR) && - (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) { - writel(val, hdev->pcie_bar[SRAM_BAR_ID] + - (addr - SRAM_BASE_ADDR)); } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) { - u64 bar_base_addr = DRAM_PHYS_BASE + - (addr & ~(prop->dram_pci_bar_size - 0x1ull)); + + u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull)); hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr); - if (hbm_bar_addr != U64_MAX) { - writel(val, hdev->pcie_bar[HBM_BAR_ID] + - (addr - bar_base_addr)); - hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, - hbm_bar_addr); + if (hbm_bar_addr != U64_MAX) { + writel(val, hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr)); + hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr); } + if (hbm_bar_addr == U64_MAX) rc = -EIO; + } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end && user_address && !iommu_present(&pci_bus_type)) { + *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val; + } else { rc = -EFAULT; } @@ -6290,7 +6191,6 @@ static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, bool user_address, u64 *val) { struct asic_fixed_properties *prop = &hdev->asic_prop; - struct gaudi_device *gaudi = hdev->asic_specific; u64 hbm_bar_addr, host_phys_end; int rc = 0; @@ -6298,42 +6198,35 @@ static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) { - if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && - (hdev->clock_gating_mask & - GAUDI_CLK_GATE_DEBUGFS_MASK)) { - - dev_err_ratelimited(hdev->dev, - "Can't read register - clock gating is enabled!\n"); - rc = -EFAULT; - } else { - u32 val_l = RREG32(addr - CFG_BASE); - u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE); + u32 val_l = RREG32(addr - CFG_BASE); + u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE); - *val = (((u64) val_h) << 32) | val_l; - } + *val = (((u64) val_h) << 32) | val_l; } else if ((addr >= SRAM_BASE_ADDR) && - (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) { - *val = readq(hdev->pcie_bar[SRAM_BAR_ID] + - (addr - SRAM_BASE_ADDR)); - } else if (addr <= - DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) { - u64 bar_base_addr = DRAM_PHYS_BASE + - (addr & ~(prop->dram_pci_bar_size - 0x1ull)); + (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) { + + *val = readq(hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR)); + + } else if (addr <= DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) { + + u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull)); hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr); - if (hbm_bar_addr != U64_MAX) { - *val = readq(hdev->pcie_bar[HBM_BAR_ID] + - (addr - bar_base_addr)); - hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, - hbm_bar_addr); + if (hbm_bar_addr != U64_MAX) { + *val = readq(hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr)); + hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr); } + if (hbm_bar_addr == U64_MAX) rc = -EIO; + } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end && user_address && !iommu_present(&pci_bus_type)) { + *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE); + } else { rc = -EFAULT; } @@ -6345,7 +6238,6 @@ static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, bool user_address, u64 val) { struct asic_fixed_properties *prop = &hdev->asic_prop; - struct gaudi_device *gaudi = hdev->asic_specific; u64 hbm_bar_addr, host_phys_end; int rc = 0; @@ -6353,41 +6245,33 @@ static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) { - if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && - (hdev->clock_gating_mask & - GAUDI_CLK_GATE_DEBUGFS_MASK)) { - - dev_err_ratelimited(hdev->dev, - "Can't write register - clock gating is enabled!\n"); - rc = -EFAULT; - } else { - WREG32(addr - CFG_BASE, lower_32_bits(val)); - WREG32(addr + sizeof(u32) - CFG_BASE, - upper_32_bits(val)); - } + WREG32(addr - CFG_BASE, lower_32_bits(val)); + WREG32(addr + sizeof(u32) - CFG_BASE, upper_32_bits(val)); } else if ((addr >= SRAM_BASE_ADDR) && - (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) { - writeq(val, hdev->pcie_bar[SRAM_BAR_ID] + - (addr - SRAM_BASE_ADDR)); - } else if (addr <= - DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) { - u64 bar_base_addr = DRAM_PHYS_BASE + - (addr & ~(prop->dram_pci_bar_size - 0x1ull)); + (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) { + + writeq(val, hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR)); + + } else if (addr <= DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) { + + u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull)); hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr); - if (hbm_bar_addr != U64_MAX) { - writeq(val, hdev->pcie_bar[HBM_BAR_ID] + - (addr - bar_base_addr)); - hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, - hbm_bar_addr); + if (hbm_bar_addr != U64_MAX) { + writeq(val, hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr)); + hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr); } + if (hbm_bar_addr == U64_MAX) rc = -EIO; + } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end && user_address && !iommu_present(&pci_bus_type)) { + *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val; + } else { rc = -EFAULT; } @@ -6446,7 +6330,6 @@ static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, void *blob_addr) { u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma; - struct gaudi_device *gaudi = hdev->asic_specific; u32 qm_glbl_sts0, qm_cgm_sts; u64 dma_offset, qm_offset; dma_addr_t dma_addr; @@ -6462,10 +6345,6 @@ static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, if (!kernel_addr) return -ENOMEM; - mutex_lock(&gaudi->clk_gate_mutex); - - hdev->asic_funcs->disable_clock_gating(hdev); - hdev->asic_funcs->hw_queues_lock(hdev); dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1]; @@ -6550,10 +6429,6 @@ static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, out: hdev->asic_funcs->hw_queues_unlock(hdev); - hdev->asic_funcs->set_clock_gating(hdev); - - mutex_unlock(&gaudi->clk_gate_mutex); - hdev->asic_funcs->asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, dma_addr); @@ -6601,10 +6476,6 @@ static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid) return; } - mutex_lock(&gaudi->clk_gate_mutex); - - hdev->asic_funcs->disable_clock_gating(hdev); - gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid); gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid); gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid); @@ -6882,10 +6753,6 @@ static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid) gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid); gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid); - - hdev->asic_funcs->set_clock_gating(hdev); - - mutex_unlock(&gaudi->clk_gate_mutex); } static int gaudi_send_job_on_qman0(struct hl_device *hdev, @@ -7266,10 +7133,8 @@ static int gaudi_extract_ecc_info(struct hl_device *hdev, struct ecc_info_extract_params *params, u64 *ecc_address, u64 *ecc_syndrom, u8 *memory_wrapper_idx) { - struct gaudi_device *gaudi = hdev->asic_specific; u32 i, num_mem_regs, reg, err_bit; u64 err_addr, err_word = 0; - int rc = 0; num_mem_regs = params->num_memories / 32 + ((params->num_memories % 32) ? 1 : 0); @@ -7282,11 +7147,6 @@ static int gaudi_extract_ecc_info(struct hl_device *hdev, else err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET; - if (params->disable_clock_gating) { - mutex_lock(&gaudi->clk_gate_mutex); - hdev->asic_funcs->disable_clock_gating(hdev); - } - /* Set invalid wrapper index */ *memory_wrapper_idx = 0xFF; @@ -7303,8 +7163,7 @@ static int gaudi_extract_ecc_info(struct hl_device *hdev, if (*memory_wrapper_idx == 0xFF) { dev_err(hdev->dev, "ECC error information cannot be found\n"); - rc = -EINVAL; - goto enable_clk_gate; + return -EINVAL; } WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET, @@ -7324,14 +7183,7 @@ static int gaudi_extract_ecc_info(struct hl_device *hdev, WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg); -enable_clk_gate: - if (params->disable_clock_gating) { - hdev->asic_funcs->set_clock_gating(hdev); - - mutex_unlock(&gaudi->clk_gate_mutex); - } - - return rc; + return 0; } /* @@ -7589,7 +7441,6 @@ static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type, params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET; params.num_memories = 90; params.derr = false; - params.disable_clock_gating = true; extract_info_from_fw = false; break; case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR: @@ -7598,7 +7449,6 @@ static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type, mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET; params.num_memories = 90; params.derr = true; - params.disable_clock_gating = true; extract_info_from_fw = false; break; case GAUDI_EVENT_MME0_ACC_SERR: @@ -7609,7 +7459,6 @@ static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type, params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET; params.num_memories = 128; params.derr = false; - params.disable_clock_gating = true; extract_info_from_fw = false; break; case GAUDI_EVENT_MME0_ACC_DERR: @@ -7620,7 +7469,6 @@ static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type, params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET; params.num_memories = 128; params.derr = true; - params.disable_clock_gating = true; extract_info_from_fw = false; break; case GAUDI_EVENT_MME0_SBAB_SERR: @@ -7632,7 +7480,6 @@ static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type, mmMME0_SBAB_BASE + index * MME_ACC_OFFSET; params.num_memories = 33; params.derr = false; - params.disable_clock_gating = true; extract_info_from_fw = false; break; case GAUDI_EVENT_MME0_SBAB_DERR: @@ -7644,7 +7491,6 @@ static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type, mmMME0_SBAB_BASE + index * MME_ACC_OFFSET; params.num_memories = 33; params.derr = true; - params.disable_clock_gating = true; extract_info_from_fw = false; break; default: @@ -7819,6 +7665,48 @@ static void gaudi_print_fw_alive_info(struct hl_device *hdev, fw_alive->thread_id, fw_alive->uptime_seconds); } +static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type, + void *data) +{ + char desc[64] = "", *type; + struct eq_nic_sei_event *eq_nic_sei = data; + u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0; + + switch (eq_nic_sei->axi_error_cause) { + case RXB: + type = "RXB"; + break; + case RXE: + type = "RXE"; + break; + case TXS: + type = "TXS"; + break; + case TXE: + type = "TXE"; + break; + case QPC_RESP: + type = "QPC_RESP"; + break; + case NON_AXI_ERR: + type = "NON_AXI_ERR"; + break; + case TMR: + type = "TMR"; + break; + default: + dev_err(hdev->dev, "unknown NIC AXI cause %d\n", + eq_nic_sei->axi_error_cause); + type = "N/A"; + break; + } + + snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type, + eq_nic_sei->id); + dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n", + event_type, desc); +} + static int gaudi_non_hard_reset_late_init(struct hl_device *hdev) { /* GAUDI doesn't support any reset except hard-reset */ @@ -7966,19 +7854,9 @@ static int gaudi_hbm_event_to_dev(u16 hbm_event_type) static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id, char *interrupt_name) { - struct gaudi_device *gaudi = hdev->asic_specific; u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i; bool soft_reset_required = false; - /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock - * gating, and thus cannot be done in CPU-CP and should be done instead - * by the driver. - */ - - mutex_lock(&gaudi->clk_gate_mutex); - - hdev->asic_funcs->disable_clock_gating(hdev); - tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) & TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK; @@ -7996,10 +7874,6 @@ static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id, /* Clear interrupts */ WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0); - hdev->asic_funcs->set_clock_gating(hdev); - - mutex_unlock(&gaudi->clk_gate_mutex); - return soft_reset_required; } @@ -8066,6 +7940,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) { struct gaudi_device *gaudi = hdev->asic_specific; + u64 data = le64_to_cpu(eq_entry->data[0]); u32 ctl = le32_to_cpu(eq_entry->hdr.ctl); u32 fw_fatal_err_flag = 0; u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) @@ -8102,6 +7977,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, case GAUDI_EVENT_PSOC_MEM_DERR: case GAUDI_EVENT_PSOC_CORESIGHT_DERR: case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR: + case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR: case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR: case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR: case GAUDI_EVENT_MMU_DERR: @@ -8202,6 +8078,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, case GAUDI_EVENT_PSOC_MEM_SERR: case GAUDI_EVENT_PSOC_CORESIGHT_SERR: case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR: + case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR: case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR: case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR: fallthrough; @@ -8263,6 +8140,11 @@ static void gaudi_handle_eqe(struct hl_device *hdev, hl_fw_unmask_irq(hdev, event_type); break; + case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4: + gaudi_print_nic_axi_irq_info(hdev, event_type, &data); + hl_fw_unmask_irq(hdev, event_type); + break; + case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3: gaudi_print_irq_info(hdev, event_type, false); gaudi_print_sm_sei_info(hdev, event_type, @@ -8274,6 +8156,9 @@ static void gaudi_handle_eqe(struct hl_device *hdev, hl_fw_unmask_irq(hdev, event_type); break; + case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1: + break; + case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E: gaudi_print_clk_change_info(hdev, event_type); hl_fw_unmask_irq(hdev, event_type); @@ -8314,7 +8199,7 @@ reset_device: | HL_DRV_RESET_BYPASS_REQ_TO_FW | fw_fatal_err_flag); else if (hdev->hard_reset_on_fw_events) - hl_device_reset(hdev, HL_DRV_RESET_HARD | fw_fatal_err_flag); + hl_device_reset(hdev, HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag); else hl_fw_unmask_irq(hdev, event_type); } @@ -8461,10 +8346,6 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u64 offset; int i, dma_id, port; - mutex_lock(&gaudi->clk_gate_mutex); - - hdev->asic_funcs->disable_clock_gating(hdev); - if (s) seq_puts(s, "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n" @@ -8585,10 +8466,6 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, if (s) seq_puts(s, "\n"); - hdev->asic_funcs->set_clock_gating(hdev); - - mutex_unlock(&gaudi->clk_gate_mutex); - return is_idle; } @@ -8628,10 +8505,8 @@ static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data, * this function should be used only during initialization and/or after reset, * when there are no active users. */ -static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, - u32 tpc_id) +static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, u32 tpc_id) { - struct gaudi_device *gaudi = hdev->asic_specific; u64 kernel_timeout; u32 status, offset; int rc; @@ -8643,10 +8518,6 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, else kernel_timeout = HL_DEVICE_TIMEOUT_USEC; - mutex_lock(&gaudi->clk_gate_mutex); - - hdev->asic_funcs->disable_clock_gating(hdev); - WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset, lower_32_bits(tpc_kernel)); WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset, @@ -8686,8 +8557,6 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, dev_err(hdev->dev, "Timeout while waiting for TPC%d icache prefetch\n", tpc_id); - hdev->asic_funcs->set_clock_gating(hdev); - mutex_unlock(&gaudi->clk_gate_mutex); return -EIO; } @@ -8711,8 +8580,6 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, dev_err(hdev->dev, "Timeout while waiting for TPC%d vector pipe\n", tpc_id); - hdev->asic_funcs->set_clock_gating(hdev); - mutex_unlock(&gaudi->clk_gate_mutex); return -EIO; } @@ -8724,9 +8591,6 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, 1000, kernel_timeout); - hdev->asic_funcs->set_clock_gating(hdev); - mutex_unlock(&gaudi->clk_gate_mutex); - if (rc) { dev_err(hdev->dev, "Timeout while waiting for TPC%d kernel to execute\n", @@ -8791,7 +8655,7 @@ static int gaudi_internal_cb_pool_init(struct hl_device *hdev, hdev->internal_cb_pool_dma_addr, HOST_SPACE_INTERNAL_CB_SZ); - hdev->asic_funcs->mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR); + hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR); mutex_unlock(&ctx->mmu_lock); if (rc) @@ -8826,7 +8690,7 @@ static void gaudi_internal_cb_pool_fini(struct hl_device *hdev, HOST_SPACE_INTERNAL_CB_SZ); hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ); - hdev->asic_funcs->mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); + hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); mutex_unlock(&ctx->mmu_lock); gen_pool_destroy(hdev->internal_cb_pool); @@ -9204,14 +9068,7 @@ static void gaudi_reset_sob(struct hl_device *hdev, void *data) static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev) { - if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) == - HL_POWER9_HOST_MAGIC) { - hdev->power9_64bit_dma_enable = 1; - hdev->dma_mask = 64; - } else { - hdev->power9_64bit_dma_enable = 0; - hdev->dma_mask = 48; - } + hdev->dma_mask = 48; } static u64 gaudi_get_device_time(struct hl_device *hdev) @@ -9293,23 +9150,15 @@ static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev, struct hl_sync_to_engine_map *map) { struct hl_state_dump_specs *sds = &hdev->state_dump_specs; - struct gaudi_device *gaudi = hdev->asic_specific; int i, j, rc; u32 reg_value; /* Iterate over TPC engines */ for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) { - /* TPC registered must be accessed with clock gating disabled */ - mutex_lock(&gaudi->clk_gate_mutex); - hdev->asic_funcs->disable_clock_gating(hdev); reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] + sds->props[SP_NEXT_TPC] * i); - /* We can reenable clock_gating */ - hdev->asic_funcs->set_clock_gating(hdev); - mutex_unlock(&gaudi->clk_gate_mutex); - rc = gaudi_add_sync_to_engine_map_entry(map, reg_value, ENGINE_TPC, i); if (rc) @@ -9319,20 +9168,11 @@ static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev, /* Iterate over MME engines */ for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) { for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) { - /* MME registered must be accessed with clock gating - * disabled - */ - mutex_lock(&gaudi->clk_gate_mutex); - hdev->asic_funcs->disable_clock_gating(hdev); reg_value = RREG32(sds->props[SP_MME_CFG_SO] + sds->props[SP_NEXT_MME] * i + j * sizeof(u32)); - /* We can reenable clock_gating */ - hdev->asic_funcs->set_clock_gating(hdev); - mutex_unlock(&gaudi->clk_gate_mutex); - rc = gaudi_add_sync_to_engine_map_entry( map, reg_value, ENGINE_MME, i * sds->props[SP_SUB_MME_ENG_NUM] + j); @@ -9537,6 +9377,29 @@ static u32 *gaudi_get_stream_master_qid_arr(void) return gaudi_stream_master; } +static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + struct cpucp_info *cpucp_info; + + cpucp_info = &hdev->asic_prop.cpucp_info; + + return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version)); +} + +static DEVICE_ATTR_RO(infineon_ver); + +static struct attribute *gaudi_vrm_dev_attrs[] = { + &dev_attr_infineon_ver.attr, +}; + +static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp, + struct attribute_group *dev_vrm_attr_grp) +{ + hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp); + dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs; +} + static const struct hl_asic_funcs gaudi_funcs = { .early_init = gaudi_early_init, .early_fini = gaudi_early_fini, @@ -9574,17 +9437,14 @@ static const struct hl_asic_funcs gaudi_funcs = { .debugfs_read64 = gaudi_debugfs_read64, .debugfs_write64 = gaudi_debugfs_write64, .debugfs_read_dma = gaudi_debugfs_read_dma, - .add_device_attr = hl_add_device_attr, + .add_device_attr = gaudi_add_device_attr, .handle_eqe = gaudi_handle_eqe, - .set_pll_profile = hl_set_pll_profile, .get_events_stat = gaudi_get_events_stat, .read_pte = gaudi_read_pte, .write_pte = gaudi_write_pte, .mmu_invalidate_cache = gaudi_mmu_invalidate_cache, .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range, .send_heartbeat = gaudi_send_heartbeat, - .set_clock_gating = gaudi_set_clock_gating, - .disable_clock_gating = gaudi_disable_clock_gating, .debug_coresight = gaudi_debug_coresight, .is_device_idle = gaudi_is_device_idle, .non_hard_reset_late_init = gaudi_non_hard_reset_late_init, @@ -9600,7 +9460,6 @@ static const struct hl_asic_funcs gaudi_funcs = { .halt_coresight = gaudi_halt_coresight, .ctx_init = gaudi_ctx_init, .ctx_fini = gaudi_ctx_fini, - .get_clk_rate = hl_get_clk_rate, .get_queue_id_for_cq = gaudi_get_queue_id_for_cq, .load_firmware_to_device = gaudi_load_firmware_to_device, .load_boot_fit_to_device = gaudi_load_boot_fit_to_device, @@ -9626,7 +9485,8 @@ static const struct hl_asic_funcs gaudi_funcs = { .state_dump_init = gaudi_state_dump_init, .get_sob_addr = gaudi_get_sob_addr, .set_pci_memory_regions = gaudi_set_pci_memory_regions, - .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr + .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr, + .is_valid_dram_page_size = NULL }; /** diff --git a/drivers/misc/habanalabs/gaudi/gaudiP.h b/drivers/misc/habanalabs/gaudi/gaudiP.h index 8ac16a9b7d15..54de7c599072 100644 --- a/drivers/misc/habanalabs/gaudi/gaudiP.h +++ b/drivers/misc/habanalabs/gaudi/gaudiP.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 * - * Copyright 2019-2020 HabanaLabs, Ltd. + * Copyright 2019-2022 HabanaLabs, Ltd. * All Rights Reserved. * */ @@ -177,7 +177,6 @@ #define HW_CAP_MSI BIT(6) #define HW_CAP_CPU_Q BIT(7) #define HW_CAP_HBM_DMA BIT(8) -#define HW_CAP_CLK_GATE BIT(9) #define HW_CAP_SRAM_SCRAMBLER BIT(10) #define HW_CAP_HBM_SCRAMBLER BIT(11) @@ -313,8 +312,6 @@ struct gaudi_internal_qman_info { * struct gaudi_device - ASIC specific manage structure. * @cpucp_info_get: get information on device from CPU-CP * @hw_queues_lock: protects the H/W queues from concurrent access. - * @clk_gate_mutex: protects code areas that require clock gating to be disabled - * temporarily * @internal_qmans: Internal QMANs information. The array size is larger than * the actual number of internal queues because they are not in * consecutive order. @@ -337,7 +334,6 @@ struct gaudi_device { /* TODO: remove hw_queues_lock after moving to scheduler code */ spinlock_t hw_queues_lock; - struct mutex clk_gate_mutex; struct gaudi_internal_qman_info internal_qmans[GAUDI_QUEUE_ID_SIZE]; @@ -355,8 +351,6 @@ struct gaudi_device { void gaudi_init_security(struct hl_device *hdev); void gaudi_ack_protection_bits_errors(struct hl_device *hdev); -void gaudi_add_device_attr(struct hl_device *hdev, - struct attribute_group *dev_attr_grp); int gaudi_debug_coresight(struct hl_device *hdev, struct hl_ctx *ctx, void *data); void gaudi_halt_coresight(struct hl_device *hdev, struct hl_ctx *ctx); void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid); diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index fbcc7bbf44b3..ec9358bcbf0b 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright 2016-2021 HabanaLabs, Ltd. + * Copyright 2016-2022 HabanaLabs, Ltd. * All Rights Reserved. */ @@ -430,6 +430,9 @@ int goya_set_fixed_properties(struct hl_device *hdev) prop->dmmu.page_size = PAGE_SIZE_2MB; prop->dmmu.num_hops = MMU_ARCH_5_HOPS; prop->dmmu.last_mask = LAST_MASK; + /* TODO: will be duplicated until implementing per-MMU props */ + prop->dmmu.hop_table_size = prop->mmu_hop_table_size; + prop->dmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size; /* shifts and masks are the same in PMMU and DMMU */ memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu)); @@ -438,6 +441,9 @@ int goya_set_fixed_properties(struct hl_device *hdev) prop->pmmu.page_size = PAGE_SIZE_4KB; prop->pmmu.num_hops = MMU_ARCH_5_HOPS; prop->pmmu.last_mask = LAST_MASK; + /* TODO: will be duplicated until implementing per-MMU props */ + prop->pmmu.hop_table_size = prop->mmu_hop_table_size; + prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size; /* PMMU and HPMMU are the same except of page size */ memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu)); @@ -477,6 +483,10 @@ int goya_set_fixed_properties(struct hl_device *hdev) prop->use_get_power_for_reset_history = true; + prop->configurable_stop_on_err = true; + + prop->set_max_power_on_device_init = true; + return 0; } @@ -893,7 +903,7 @@ int goya_late_init(struct hl_device *hdev) goya->pm_mng_profile = PM_AUTO; - hdev->asic_funcs->set_pll_profile(hdev, PLL_LOW); + goya_set_pll_profile(hdev, PLL_LOW); schedule_delayed_work(&goya->goya_work->work_freq, usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC)); @@ -2700,8 +2710,7 @@ int goya_mmu_init(struct hl_device *hdev) WREG32_AND(mmSTLB_STLB_FEATURE_EN, (~STLB_STLB_FEATURE_EN_FOLLOWER_EN_MASK)); - hdev->asic_funcs->mmu_invalidate_cache(hdev, true, - MMU_OP_USERPTR | MMU_OP_PHYS_PACK); + hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR | MMU_OP_PHYS_PACK); WREG32(mmMMU_MMU_ENABLE, 1); WREG32(mmMMU_SPI_MASK, 0xF); @@ -5341,7 +5350,7 @@ static int goya_mmu_invalidate_cache_range(struct hl_device *hdev, /* Treat as invalidate all because there is no range invalidation * in Goya */ - return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags); + return hl_mmu_invalidate_cache(hdev, is_hard, flags); } int goya_send_heartbeat(struct hl_device *hdev) @@ -5391,16 +5400,6 @@ int goya_cpucp_info_get(struct hl_device *hdev) return 0; } -static void goya_set_clock_gating(struct hl_device *hdev) -{ - /* clock gating not supported in Goya */ -} - -static void goya_disable_clock_gating(struct hl_device *hdev) -{ - /* clock gating not supported in Goya */ -} - static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, struct seq_file *s) { @@ -5564,16 +5563,7 @@ static void goya_reset_sob_group(struct hl_device *hdev, u16 sob_group) static void goya_set_dma_mask_from_fw(struct hl_device *hdev) { - if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) == - HL_POWER9_HOST_MAGIC) { - dev_dbg(hdev->dev, "Working in 64-bit DMA mode\n"); - hdev->power9_64bit_dma_enable = 1; - hdev->dma_mask = 64; - } else { - dev_dbg(hdev->dev, "Working in 48-bit DMA mode\n"); - hdev->power9_64bit_dma_enable = 0; - hdev->dma_mask = 48; - } + hdev->dma_mask = 48; } u64 goya_get_device_time(struct hl_device *hdev) @@ -5727,15 +5717,12 @@ static const struct hl_asic_funcs goya_funcs = { .debugfs_read_dma = goya_debugfs_read_dma, .add_device_attr = goya_add_device_attr, .handle_eqe = goya_handle_eqe, - .set_pll_profile = goya_set_pll_profile, .get_events_stat = goya_get_events_stat, .read_pte = goya_read_pte, .write_pte = goya_write_pte, .mmu_invalidate_cache = goya_mmu_invalidate_cache, .mmu_invalidate_cache_range = goya_mmu_invalidate_cache_range, .send_heartbeat = goya_send_heartbeat, - .set_clock_gating = goya_set_clock_gating, - .disable_clock_gating = goya_disable_clock_gating, .debug_coresight = goya_debug_coresight, .is_device_idle = goya_is_device_idle, .non_hard_reset_late_init = goya_non_hard_reset_late_init, @@ -5751,7 +5738,6 @@ static const struct hl_asic_funcs goya_funcs = { .halt_coresight = goya_halt_coresight, .ctx_init = goya_ctx_init, .ctx_fini = goya_ctx_fini, - .get_clk_rate = hl_get_clk_rate, .get_queue_id_for_cq = goya_get_queue_id_for_cq, .load_firmware_to_device = goya_load_firmware_to_device, .load_boot_fit_to_device = goya_load_boot_fit_to_device, @@ -5778,6 +5764,7 @@ static const struct hl_asic_funcs goya_funcs = { .get_sob_addr = &goya_get_sob_addr, .set_pci_memory_regions = goya_set_pci_memory_regions, .get_stream_master_qid_arr = goya_get_stream_master_qid_arr, + .is_valid_dram_page_size = NULL }; /* diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h index 3740fd25bf84..647f57402616 100644 --- a/drivers/misc/habanalabs/goya/goyaP.h +++ b/drivers/misc/habanalabs/goya/goyaP.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 * - * Copyright 2016-2019 HabanaLabs, Ltd. + * Copyright 2016-2022 HabanaLabs, Ltd. * All Rights Reserved. * */ @@ -217,8 +217,8 @@ u64 goya_get_max_power(struct hl_device *hdev); void goya_set_max_power(struct hl_device *hdev, u64 value); void goya_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq); -void goya_add_device_attr(struct hl_device *hdev, - struct attribute_group *dev_attr_grp); +void goya_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp, + struct attribute_group *dev_vrm_attr_grp); int goya_cpucp_info_get(struct hl_device *hdev); int goya_debug_coresight(struct hl_device *hdev, struct hl_ctx *ctx, void *data); void goya_halt_coresight(struct hl_device *hdev, struct hl_ctx *ctx); diff --git a/drivers/misc/habanalabs/goya/goya_hwmgr.c b/drivers/misc/habanalabs/goya/goya_hwmgr.c index 76b47749affe..6580fc6a486a 100644 --- a/drivers/misc/habanalabs/goya/goya_hwmgr.c +++ b/drivers/misc/habanalabs/goya/goya_hwmgr.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright 2016-2021 HabanaLabs, Ltd. + * Copyright 2016-2022 HabanaLabs, Ltd. * All Rights Reserved. */ @@ -11,21 +11,24 @@ void goya_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq) { struct goya_device *goya = hdev->asic_specific; + if (!hdev->pdev) + return; + switch (freq) { case PLL_HIGH: - hl_set_frequency(hdev, HL_GOYA_MME_PLL, hdev->high_pll); - hl_set_frequency(hdev, HL_GOYA_TPC_PLL, hdev->high_pll); - hl_set_frequency(hdev, HL_GOYA_IC_PLL, hdev->high_pll); + hl_fw_set_frequency(hdev, HL_GOYA_MME_PLL, hdev->high_pll); + hl_fw_set_frequency(hdev, HL_GOYA_TPC_PLL, hdev->high_pll); + hl_fw_set_frequency(hdev, HL_GOYA_IC_PLL, hdev->high_pll); break; case PLL_LOW: - hl_set_frequency(hdev, HL_GOYA_MME_PLL, GOYA_PLL_FREQ_LOW); - hl_set_frequency(hdev, HL_GOYA_TPC_PLL, GOYA_PLL_FREQ_LOW); - hl_set_frequency(hdev, HL_GOYA_IC_PLL, GOYA_PLL_FREQ_LOW); + hl_fw_set_frequency(hdev, HL_GOYA_MME_PLL, GOYA_PLL_FREQ_LOW); + hl_fw_set_frequency(hdev, HL_GOYA_TPC_PLL, GOYA_PLL_FREQ_LOW); + hl_fw_set_frequency(hdev, HL_GOYA_IC_PLL, GOYA_PLL_FREQ_LOW); break; case PLL_LAST: - hl_set_frequency(hdev, HL_GOYA_MME_PLL, goya->mme_clk); - hl_set_frequency(hdev, HL_GOYA_TPC_PLL, goya->tpc_clk); - hl_set_frequency(hdev, HL_GOYA_IC_PLL, goya->ic_clk); + hl_fw_set_frequency(hdev, HL_GOYA_MME_PLL, goya->mme_clk); + hl_fw_set_frequency(hdev, HL_GOYA_TPC_PLL, goya->tpc_clk); + hl_fw_set_frequency(hdev, HL_GOYA_IC_PLL, goya->ic_clk); break; default: dev_err(hdev->dev, "unknown frequency setting\n"); @@ -41,7 +44,7 @@ static ssize_t mme_clk_show(struct device *dev, struct device_attribute *attr, if (!hl_device_operational(hdev, NULL)) return -ENODEV; - value = hl_get_frequency(hdev, HL_GOYA_MME_PLL, false); + value = hl_fw_get_frequency(hdev, HL_GOYA_MME_PLL, false); if (value < 0) return value; @@ -74,7 +77,7 @@ static ssize_t mme_clk_store(struct device *dev, struct device_attribute *attr, goto fail; } - hl_set_frequency(hdev, HL_GOYA_MME_PLL, value); + hl_fw_set_frequency(hdev, HL_GOYA_MME_PLL, value); goya->mme_clk = value; fail: @@ -90,7 +93,7 @@ static ssize_t tpc_clk_show(struct device *dev, struct device_attribute *attr, if (!hl_device_operational(hdev, NULL)) return -ENODEV; - value = hl_get_frequency(hdev, HL_GOYA_TPC_PLL, false); + value = hl_fw_get_frequency(hdev, HL_GOYA_TPC_PLL, false); if (value < 0) return value; @@ -123,7 +126,7 @@ static ssize_t tpc_clk_store(struct device *dev, struct device_attribute *attr, goto fail; } - hl_set_frequency(hdev, HL_GOYA_TPC_PLL, value); + hl_fw_set_frequency(hdev, HL_GOYA_TPC_PLL, value); goya->tpc_clk = value; fail: @@ -139,7 +142,7 @@ static ssize_t ic_clk_show(struct device *dev, struct device_attribute *attr, if (!hl_device_operational(hdev, NULL)) return -ENODEV; - value = hl_get_frequency(hdev, HL_GOYA_IC_PLL, false); + value = hl_fw_get_frequency(hdev, HL_GOYA_IC_PLL, false); if (value < 0) return value; @@ -172,7 +175,7 @@ static ssize_t ic_clk_store(struct device *dev, struct device_attribute *attr, goto fail; } - hl_set_frequency(hdev, HL_GOYA_IC_PLL, value); + hl_fw_set_frequency(hdev, HL_GOYA_IC_PLL, value); goya->ic_clk = value; fail: @@ -188,7 +191,7 @@ static ssize_t mme_clk_curr_show(struct device *dev, if (!hl_device_operational(hdev, NULL)) return -ENODEV; - value = hl_get_frequency(hdev, HL_GOYA_MME_PLL, true); + value = hl_fw_get_frequency(hdev, HL_GOYA_MME_PLL, true); if (value < 0) return value; @@ -205,7 +208,7 @@ static ssize_t tpc_clk_curr_show(struct device *dev, if (!hl_device_operational(hdev, NULL)) return -ENODEV; - value = hl_get_frequency(hdev, HL_GOYA_TPC_PLL, true); + value = hl_fw_get_frequency(hdev, HL_GOYA_TPC_PLL, true); if (value < 0) return value; @@ -222,7 +225,7 @@ static ssize_t ic_clk_curr_show(struct device *dev, if (!hl_device_operational(hdev, NULL)) return -ENODEV; - value = hl_get_frequency(hdev, HL_GOYA_IC_PLL, true); + value = hl_fw_get_frequency(hdev, HL_GOYA_IC_PLL, true); if (value < 0) return value; @@ -347,7 +350,7 @@ static DEVICE_ATTR_RW(pm_mng_profile); static DEVICE_ATTR_RW(tpc_clk); static DEVICE_ATTR_RO(tpc_clk_curr); -static struct attribute *goya_dev_attrs[] = { +static struct attribute *goya_clk_dev_attrs[] = { &dev_attr_high_pll.attr, &dev_attr_ic_clk.attr, &dev_attr_ic_clk_curr.attr, @@ -356,11 +359,27 @@ static struct attribute *goya_dev_attrs[] = { &dev_attr_pm_mng_profile.attr, &dev_attr_tpc_clk.attr, &dev_attr_tpc_clk_curr.attr, - NULL, }; -void goya_add_device_attr(struct hl_device *hdev, - struct attribute_group *dev_attr_grp) +static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + struct cpucp_info *cpucp_info; + + cpucp_info = &hdev->asic_prop.cpucp_info; + + return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version)); +} + +static DEVICE_ATTR_RO(infineon_ver); + +static struct attribute *goya_vrm_dev_attrs[] = { + &dev_attr_infineon_ver.attr, +}; + +void goya_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp, + struct attribute_group *dev_vrm_attr_grp) { - dev_attr_grp->attrs = goya_dev_attrs; + dev_clk_attr_grp->attrs = goya_clk_dev_attrs; + dev_vrm_attr_grp->attrs = goya_vrm_dev_attrs; } diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h index f9c4acc9bf5a..65668dac6a5f 100644 --- a/drivers/misc/habanalabs/include/common/cpucp_if.h +++ b/drivers/misc/habanalabs/include/common/cpucp_if.h @@ -780,6 +780,7 @@ struct cpucp_security_info { * (0 = functional 1 = binned) * @xbar_binning_mask: Xbar binning mask, 1 bit per Xbar instance * (0 = functional 1 = binned) + * @fw_os_version: Firmware OS Version */ struct cpucp_info { struct cpucp_sensor sensors[CPUCP_MAX_SENSORS]; @@ -807,6 +808,7 @@ struct cpucp_info { __le32 reserved6; __u8 pll_map[PLL_MAP_LEN]; __le64 mme_binning_mask; + __u8 fw_os_version[VERSION_MAX_LEN]; }; struct cpucp_mac_addr { diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h index 135e21d6edc9..15f91ae9de6e 100644 --- a/drivers/misc/habanalabs/include/common/hl_boot_if.h +++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h @@ -33,6 +33,7 @@ enum cpu_boot_err { CPU_BOOT_ERR_BOOT_FW_CRIT_ERR = 18, CPU_BOOT_ERR_BINNING_FAIL = 19, CPU_BOOT_ERR_TPM_FAIL = 20, + CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL = 21, CPU_BOOT_ERR_ENABLED = 31, CPU_BOOT_ERR_SCND_EN = 63, CPU_BOOT_ERR_LAST = 64 /* we have 2 registers of 32 bits */ @@ -111,6 +112,9 @@ enum cpu_boot_err { * * CPU_BOOT_ERR0_TPM_FAIL TPM verification flow failed. * + * CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL Failed to set threshold for tmperature + * sensor. + * * CPU_BOOT_ERR0_ENABLED Error registers enabled. * This is a main indication that the * running FW populates the error @@ -134,6 +138,7 @@ enum cpu_boot_err { #define CPU_BOOT_ERR0_BOOT_FW_CRIT_ERR (1 << CPU_BOOT_ERR_BOOT_FW_CRIT_ERR) #define CPU_BOOT_ERR0_BINNING_FAIL (1 << CPU_BOOT_ERR_BINNING_FAIL) #define CPU_BOOT_ERR0_TPM_FAIL (1 << CPU_BOOT_ERR_TPM_FAIL) +#define CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL (1 << CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL) #define CPU_BOOT_ERR0_ENABLED (1 << CPU_BOOT_ERR_ENABLED) #define CPU_BOOT_ERR1_ENABLED (1 << CPU_BOOT_ERR_ENABLED) diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h b/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h index d966bd4dfea6..c07ed4ed304c 100644 --- a/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h +++ b/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h @@ -311,6 +311,16 @@ enum gaudi_async_event_id { GAUDI_EVENT_FW_ALIVE_S = 645, GAUDI_EVENT_DEV_RESET_REQ = 646, GAUDI_EVENT_PKT_QUEUE_OUT_SYNC = 647, + GAUDI_EVENT_STATUS_NIC0_ENG0 = 648, + GAUDI_EVENT_STATUS_NIC0_ENG1 = 649, + GAUDI_EVENT_STATUS_NIC1_ENG0 = 650, + GAUDI_EVENT_STATUS_NIC1_ENG1 = 651, + GAUDI_EVENT_STATUS_NIC2_ENG0 = 652, + GAUDI_EVENT_STATUS_NIC2_ENG1 = 653, + GAUDI_EVENT_STATUS_NIC3_ENG0 = 654, + GAUDI_EVENT_STATUS_NIC3_ENG1 = 655, + GAUDI_EVENT_STATUS_NIC4_ENG0 = 656, + GAUDI_EVENT_STATUS_NIC4_ENG1 = 657, GAUDI_EVENT_FIX_POWER_ENV_S = 658, GAUDI_EVENT_FIX_POWER_ENV_E = 659, GAUDI_EVENT_FIX_THERMAL_ENV_S = 660, |