diff options
Diffstat (limited to 'drivers/accel/ivpu')
27 files changed, 8477 insertions, 0 deletions
diff --git a/drivers/accel/ivpu/Kconfig b/drivers/accel/ivpu/Kconfig new file mode 100644 index 000000000000..9bdf168bf1d0 --- /dev/null +++ b/drivers/accel/ivpu/Kconfig @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0-only + +config DRM_ACCEL_IVPU + tristate "Intel VPU for Meteor Lake and newer" + depends on DRM_ACCEL + depends on X86_64 && !UML + depends on PCI && PCI_MSI + select FW_LOADER + select SHMEM + help + Choose this option if you have a system that has an 14th generation Intel CPU + or newer. VPU stands for Versatile Processing Unit and it's a CPU-integrated + inference accelerator for Computer Vision and Deep Learning applications. + + If "M" is selected, the module will be called intel_vpu. diff --git a/drivers/accel/ivpu/Makefile b/drivers/accel/ivpu/Makefile new file mode 100644 index 000000000000..80f1fb3548ae --- /dev/null +++ b/drivers/accel/ivpu/Makefile @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (C) 2023 Intel Corporation + +intel_vpu-y := \ + ivpu_drv.o \ + ivpu_fw.o \ + ivpu_gem.o \ + ivpu_hw_mtl.o \ + ivpu_ipc.o \ + ivpu_job.o \ + ivpu_jsm_msg.o \ + ivpu_mmu.o \ + ivpu_mmu_context.o \ + ivpu_pm.o + +obj-$(CONFIG_DRM_ACCEL_IVPU) += intel_vpu.o
\ No newline at end of file diff --git a/drivers/accel/ivpu/TODO b/drivers/accel/ivpu/TODO new file mode 100644 index 000000000000..9077217ae10f --- /dev/null +++ b/drivers/accel/ivpu/TODO @@ -0,0 +1,11 @@ +- Move to threaded_irqs to mitigate potential infinite loop in ivpu_ipc_irq_handler() +- Implement support for BLOB IDs +- Add debugfs support to improve debugging and testing +- Add tracing events for performance debugging +- Implement HW based scheduling support +- Use syncobjs for submit/sync +- Refactor IPC protocol to improve message latency +- Implement BO cache and MADVISE IOCTL +- Add support for user allocated buffers using prime import and dma-buf heaps +- Refactor struct ivpu_bo to use struct drm_gem_shmem_object +- Add driver/device documentation diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c new file mode 100644 index 000000000000..231f29bb5025 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -0,0 +1,655 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#include <linux/firmware.h> +#include <linux/module.h> +#include <linux/pci.h> + +#include <drm/drm_accel.h> +#include <drm/drm_drv.h> +#include <drm/drm_file.h> +#include <drm/drm_gem.h> +#include <drm/drm_ioctl.h> +#include <drm/drm_prime.h> + +#include "vpu_boot_api.h" +#include "ivpu_drv.h" +#include "ivpu_fw.h" +#include "ivpu_gem.h" +#include "ivpu_hw.h" +#include "ivpu_ipc.h" +#include "ivpu_job.h" +#include "ivpu_jsm_msg.h" +#include "ivpu_mmu.h" +#include "ivpu_mmu_context.h" +#include "ivpu_pm.h" + +#ifndef DRIVER_VERSION_STR +#define DRIVER_VERSION_STR __stringify(DRM_IVPU_DRIVER_MAJOR) "." \ + __stringify(DRM_IVPU_DRIVER_MINOR) "." +#endif + +static const struct drm_driver driver; + +static struct lock_class_key submitted_jobs_xa_lock_class_key; + +int ivpu_dbg_mask; +module_param_named(dbg_mask, ivpu_dbg_mask, int, 0644); +MODULE_PARM_DESC(dbg_mask, "Driver debug mask. See IVPU_DBG_* macros."); + +int ivpu_test_mode; +module_param_named_unsafe(test_mode, ivpu_test_mode, int, 0644); +MODULE_PARM_DESC(test_mode, "Test mode: 0 - normal operation, 1 - fw unit test, 2 - null hw"); + +u8 ivpu_pll_min_ratio; +module_param_named(pll_min_ratio, ivpu_pll_min_ratio, byte, 0644); +MODULE_PARM_DESC(pll_min_ratio, "Minimum PLL ratio used to set VPU frequency"); + +u8 ivpu_pll_max_ratio = U8_MAX; +module_param_named(pll_max_ratio, ivpu_pll_max_ratio, byte, 0644); +MODULE_PARM_DESC(pll_max_ratio, "Maximum PLL ratio used to set VPU frequency"); + +struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv) +{ + struct ivpu_device *vdev = file_priv->vdev; + + kref_get(&file_priv->ref); + + ivpu_dbg(vdev, KREF, "file_priv get: ctx %u refcount %u\n", + file_priv->ctx.id, kref_read(&file_priv->ref)); + + return file_priv; +} + +struct ivpu_file_priv *ivpu_file_priv_get_by_ctx_id(struct ivpu_device *vdev, unsigned long id) +{ + struct ivpu_file_priv *file_priv; + + xa_lock_irq(&vdev->context_xa); + file_priv = xa_load(&vdev->context_xa, id); + /* file_priv may still be in context_xa during file_priv_release() */ + if (file_priv && !kref_get_unless_zero(&file_priv->ref)) + file_priv = NULL; + xa_unlock_irq(&vdev->context_xa); + + if (file_priv) + ivpu_dbg(vdev, KREF, "file_priv get by id: ctx %u refcount %u\n", + file_priv->ctx.id, kref_read(&file_priv->ref)); + + return file_priv; +} + +static void file_priv_release(struct kref *ref) +{ + struct ivpu_file_priv *file_priv = container_of(ref, struct ivpu_file_priv, ref); + struct ivpu_device *vdev = file_priv->vdev; + + ivpu_dbg(vdev, FILE, "file_priv release: ctx %u\n", file_priv->ctx.id); + + ivpu_cmdq_release_all(file_priv); + ivpu_bo_remove_all_bos_from_context(&file_priv->ctx); + ivpu_jsm_context_release(vdev, file_priv->ctx.id); + ivpu_mmu_user_context_fini(vdev, &file_priv->ctx); + drm_WARN_ON(&vdev->drm, xa_erase_irq(&vdev->context_xa, file_priv->ctx.id) != file_priv); + mutex_destroy(&file_priv->lock); + kfree(file_priv); +} + +void ivpu_file_priv_put(struct ivpu_file_priv **link) +{ + struct ivpu_file_priv *file_priv = *link; + struct ivpu_device *vdev = file_priv->vdev; + + drm_WARN_ON(&vdev->drm, !file_priv); + + ivpu_dbg(vdev, KREF, "file_priv put: ctx %u refcount %u\n", + file_priv->ctx.id, kref_read(&file_priv->ref)); + + *link = NULL; + kref_put(&file_priv->ref, file_priv_release); +} + +static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct ivpu_file_priv *file_priv = file->driver_priv; + struct ivpu_device *vdev = file_priv->vdev; + struct pci_dev *pdev = to_pci_dev(vdev->drm.dev); + struct drm_ivpu_param *args = data; + int ret = 0; + + switch (args->param) { + case DRM_IVPU_PARAM_DEVICE_ID: + args->value = pdev->device; + break; + case DRM_IVPU_PARAM_DEVICE_REVISION: + args->value = pdev->revision; + break; + case DRM_IVPU_PARAM_PLATFORM_TYPE: + args->value = vdev->platform; + break; + case DRM_IVPU_PARAM_CORE_CLOCK_RATE: + args->value = ivpu_hw_reg_pll_freq_get(vdev); + break; + case DRM_IVPU_PARAM_NUM_CONTEXTS: + args->value = ivpu_get_context_count(vdev); + break; + case DRM_IVPU_PARAM_CONTEXT_BASE_ADDRESS: + args->value = vdev->hw->ranges.user_low.start; + break; + case DRM_IVPU_PARAM_CONTEXT_PRIORITY: + args->value = file_priv->priority; + break; + case DRM_IVPU_PARAM_CONTEXT_ID: + args->value = file_priv->ctx.id; + break; + case DRM_IVPU_PARAM_FW_API_VERSION: + if (args->index < VPU_FW_API_VER_NUM) { + struct vpu_firmware_header *fw_hdr; + + fw_hdr = (struct vpu_firmware_header *)vdev->fw->file->data; + args->value = fw_hdr->api_version[args->index]; + } else { + ret = -EINVAL; + } + break; + case DRM_IVPU_PARAM_ENGINE_HEARTBEAT: + ret = ivpu_jsm_get_heartbeat(vdev, args->index, &args->value); + break; + case DRM_IVPU_PARAM_UNIQUE_INFERENCE_ID: + args->value = (u64)atomic64_inc_return(&vdev->unique_id_counter); + break; + case DRM_IVPU_PARAM_TILE_CONFIG: + args->value = vdev->hw->tile_fuse; + break; + case DRM_IVPU_PARAM_SKU: + args->value = vdev->hw->sku; + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static int ivpu_set_param_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct ivpu_file_priv *file_priv = file->driver_priv; + struct drm_ivpu_param *args = data; + int ret = 0; + + switch (args->param) { + case DRM_IVPU_PARAM_CONTEXT_PRIORITY: + if (args->value <= DRM_IVPU_CONTEXT_PRIORITY_REALTIME) + file_priv->priority = args->value; + else + ret = -EINVAL; + break; + default: + ret = -EINVAL; + } + + return ret; +} + +static int ivpu_open(struct drm_device *dev, struct drm_file *file) +{ + struct ivpu_device *vdev = to_ivpu_device(dev); + struct ivpu_file_priv *file_priv; + u32 ctx_id; + void *old; + int ret; + + ret = xa_alloc_irq(&vdev->context_xa, &ctx_id, NULL, vdev->context_xa_limit, GFP_KERNEL); + if (ret) { + ivpu_err(vdev, "Failed to allocate context id: %d\n", ret); + return ret; + } + + file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); + if (!file_priv) { + ret = -ENOMEM; + goto err_xa_erase; + } + + file_priv->vdev = vdev; + file_priv->priority = DRM_IVPU_CONTEXT_PRIORITY_NORMAL; + kref_init(&file_priv->ref); + mutex_init(&file_priv->lock); + + ret = ivpu_mmu_user_context_init(vdev, &file_priv->ctx, ctx_id); + if (ret) + goto err_mutex_destroy; + + old = xa_store_irq(&vdev->context_xa, ctx_id, file_priv, GFP_KERNEL); + if (xa_is_err(old)) { + ret = xa_err(old); + ivpu_err(vdev, "Failed to store context %u: %d\n", ctx_id, ret); + goto err_ctx_fini; + } + + ivpu_dbg(vdev, FILE, "file_priv create: ctx %u process %s pid %d\n", + ctx_id, current->comm, task_pid_nr(current)); + + file->driver_priv = file_priv; + return 0; + +err_ctx_fini: + ivpu_mmu_user_context_fini(vdev, &file_priv->ctx); +err_mutex_destroy: + mutex_destroy(&file_priv->lock); + kfree(file_priv); +err_xa_erase: + xa_erase_irq(&vdev->context_xa, ctx_id); + return ret; +} + +static void ivpu_postclose(struct drm_device *dev, struct drm_file *file) +{ + struct ivpu_file_priv *file_priv = file->driver_priv; + struct ivpu_device *vdev = to_ivpu_device(dev); + + ivpu_dbg(vdev, FILE, "file_priv close: ctx %u process %s pid %d\n", + file_priv->ctx.id, current->comm, task_pid_nr(current)); + + ivpu_file_priv_put(&file_priv); +} + +static const struct drm_ioctl_desc ivpu_drm_ioctls[] = { + DRM_IOCTL_DEF_DRV(IVPU_GET_PARAM, ivpu_get_param_ioctl, 0), + DRM_IOCTL_DEF_DRV(IVPU_SET_PARAM, ivpu_set_param_ioctl, 0), + DRM_IOCTL_DEF_DRV(IVPU_BO_CREATE, ivpu_bo_create_ioctl, 0), + DRM_IOCTL_DEF_DRV(IVPU_BO_INFO, ivpu_bo_info_ioctl, 0), + DRM_IOCTL_DEF_DRV(IVPU_SUBMIT, ivpu_submit_ioctl, 0), + DRM_IOCTL_DEF_DRV(IVPU_BO_WAIT, ivpu_bo_wait_ioctl, 0), +}; + +static int ivpu_wait_for_ready(struct ivpu_device *vdev) +{ + struct ivpu_ipc_consumer cons; + struct ivpu_ipc_hdr ipc_hdr; + unsigned long timeout; + int ret; + + if (ivpu_test_mode == IVPU_TEST_MODE_FW_TEST) + return 0; + + ivpu_ipc_consumer_add(vdev, &cons, IVPU_IPC_CHAN_BOOT_MSG); + + timeout = jiffies + msecs_to_jiffies(vdev->timeout.boot); + while (1) { + ret = ivpu_ipc_irq_handler(vdev); + if (ret) + break; + ret = ivpu_ipc_receive(vdev, &cons, &ipc_hdr, NULL, 0); + if (ret != -ETIMEDOUT || time_after_eq(jiffies, timeout)) + break; + + cond_resched(); + } + + ivpu_ipc_consumer_del(vdev, &cons); + + if (!ret && ipc_hdr.data_addr != IVPU_IPC_BOOT_MSG_DATA_ADDR) { + ivpu_err(vdev, "Invalid VPU ready message: 0x%x\n", + ipc_hdr.data_addr); + return -EIO; + } + + if (!ret) + ivpu_info(vdev, "VPU ready message received successfully\n"); + else + ivpu_hw_diagnose_failure(vdev); + + return ret; +} + +/** + * ivpu_boot() - Start VPU firmware + * @vdev: VPU device + * + * This function is paired with ivpu_shutdown() but it doesn't power up the + * VPU because power up has to be called very early in ivpu_probe(). + */ +int ivpu_boot(struct ivpu_device *vdev) +{ + int ret; + + /* Update boot params located at first 4KB of FW memory */ + ivpu_fw_boot_params_setup(vdev, vdev->fw->mem->kvaddr); + + ret = ivpu_hw_boot_fw(vdev); + if (ret) { + ivpu_err(vdev, "Failed to start the firmware: %d\n", ret); + return ret; + } + + ret = ivpu_wait_for_ready(vdev); + if (ret) { + ivpu_err(vdev, "Failed to boot the firmware: %d\n", ret); + return ret; + } + + ivpu_hw_irq_clear(vdev); + enable_irq(vdev->irq); + ivpu_hw_irq_enable(vdev); + ivpu_ipc_enable(vdev); + return 0; +} + +int ivpu_shutdown(struct ivpu_device *vdev) +{ + int ret; + + ivpu_hw_irq_disable(vdev); + disable_irq(vdev->irq); + ivpu_ipc_disable(vdev); + ivpu_mmu_disable(vdev); + + ret = ivpu_hw_power_down(vdev); + if (ret) + ivpu_warn(vdev, "Failed to power down HW: %d\n", ret); + + return ret; +} + +static const struct file_operations ivpu_fops = { + .owner = THIS_MODULE, + DRM_ACCEL_FOPS, +}; + +static const struct drm_driver driver = { + .driver_features = DRIVER_GEM | DRIVER_COMPUTE_ACCEL, + + .open = ivpu_open, + .postclose = ivpu_postclose, + .prime_handle_to_fd = drm_gem_prime_handle_to_fd, + .prime_fd_to_handle = drm_gem_prime_fd_to_handle, + .gem_prime_import = ivpu_gem_prime_import, + .gem_prime_mmap = drm_gem_prime_mmap, + + .ioctls = ivpu_drm_ioctls, + .num_ioctls = ARRAY_SIZE(ivpu_drm_ioctls), + .fops = &ivpu_fops, + + .name = DRIVER_NAME, + .desc = DRIVER_DESC, + .date = DRIVER_DATE, + .major = DRM_IVPU_DRIVER_MAJOR, + .minor = DRM_IVPU_DRIVER_MINOR, +}; + +static int ivpu_irq_init(struct ivpu_device *vdev) +{ + struct pci_dev *pdev = to_pci_dev(vdev->drm.dev); + int ret; + + ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI | PCI_IRQ_MSIX); + if (ret < 0) { + ivpu_err(vdev, "Failed to allocate a MSI IRQ: %d\n", ret); + return ret; + } + + vdev->irq = pci_irq_vector(pdev, 0); + + ret = devm_request_irq(vdev->drm.dev, vdev->irq, vdev->hw->ops->irq_handler, + IRQF_NO_AUTOEN, DRIVER_NAME, vdev); + if (ret) + ivpu_err(vdev, "Failed to request an IRQ %d\n", ret); + + return ret; +} + +static int ivpu_pci_init(struct ivpu_device *vdev) +{ + struct pci_dev *pdev = to_pci_dev(vdev->drm.dev); + struct resource *bar0 = &pdev->resource[0]; + struct resource *bar4 = &pdev->resource[4]; + int ret; + + ivpu_dbg(vdev, MISC, "Mapping BAR0 (RegV) %pR\n", bar0); + vdev->regv = devm_ioremap_resource(vdev->drm.dev, bar0); + if (IS_ERR(vdev->regv)) { + ivpu_err(vdev, "Failed to map bar 0: %pe\n", vdev->regv); + return PTR_ERR(vdev->regv); + } + + ivpu_dbg(vdev, MISC, "Mapping BAR4 (RegB) %pR\n", bar4); + vdev->regb = devm_ioremap_resource(vdev->drm.dev, bar4); + if (IS_ERR(vdev->regb)) { + ivpu_err(vdev, "Failed to map bar 4: %pe\n", vdev->regb); + return PTR_ERR(vdev->regb); + } + + ret = dma_set_mask_and_coherent(vdev->drm.dev, DMA_BIT_MASK(38)); + if (ret) { + ivpu_err(vdev, "Failed to set DMA mask: %d\n", ret); + return ret; + } + dma_set_max_seg_size(vdev->drm.dev, UINT_MAX); + + /* Clear any pending errors */ + pcie_capability_clear_word(pdev, PCI_EXP_DEVSTA, 0x3f); + + ret = pcim_enable_device(pdev); + if (ret) { + ivpu_err(vdev, "Failed to enable PCI device: %d\n", ret); + return ret; + } + + pci_set_master(pdev); + + return 0; +} + +static int ivpu_dev_init(struct ivpu_device *vdev) +{ + int ret; + + vdev->hw = drmm_kzalloc(&vdev->drm, sizeof(*vdev->hw), GFP_KERNEL); + if (!vdev->hw) + return -ENOMEM; + + vdev->mmu = drmm_kzalloc(&vdev->drm, sizeof(*vdev->mmu), GFP_KERNEL); + if (!vdev->mmu) + return -ENOMEM; + + vdev->fw = drmm_kzalloc(&vdev->drm, sizeof(*vdev->fw), GFP_KERNEL); + if (!vdev->fw) + return -ENOMEM; + + vdev->ipc = drmm_kzalloc(&vdev->drm, sizeof(*vdev->ipc), GFP_KERNEL); + if (!vdev->ipc) + return -ENOMEM; + + vdev->pm = drmm_kzalloc(&vdev->drm, sizeof(*vdev->pm), GFP_KERNEL); + if (!vdev->pm) + return -ENOMEM; + + vdev->hw->ops = &ivpu_hw_mtl_ops; + vdev->platform = IVPU_PLATFORM_INVALID; + vdev->context_xa_limit.min = IVPU_GLOBAL_CONTEXT_MMU_SSID + 1; + vdev->context_xa_limit.max = IVPU_CONTEXT_LIMIT; + atomic64_set(&vdev->unique_id_counter, 0); + xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC); + xa_init_flags(&vdev->submitted_jobs_xa, XA_FLAGS_ALLOC1); + lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key); + + ret = ivpu_pci_init(vdev); + if (ret) { + ivpu_err(vdev, "Failed to initialize PCI device: %d\n", ret); + goto err_xa_destroy; + } + + ret = ivpu_irq_init(vdev); + if (ret) { + ivpu_err(vdev, "Failed to initialize IRQs: %d\n", ret); + goto err_xa_destroy; + } + + /* Init basic HW info based on buttress registers which are accessible before power up */ + ret = ivpu_hw_info_init(vdev); + if (ret) { + ivpu_err(vdev, "Failed to initialize HW info: %d\n", ret); + goto err_xa_destroy; + } + + /* Power up early so the rest of init code can access VPU registers */ + ret = ivpu_hw_power_up(vdev); + if (ret) { + ivpu_err(vdev, "Failed to power up HW: %d\n", ret); + goto err_xa_destroy; + } + + ret = ivpu_mmu_global_context_init(vdev); + if (ret) { + ivpu_err(vdev, "Failed to initialize global MMU context: %d\n", ret); + goto err_power_down; + } + + ret = ivpu_mmu_init(vdev); + if (ret) { + ivpu_err(vdev, "Failed to initialize MMU device: %d\n", ret); + goto err_mmu_gctx_fini; + } + + ret = ivpu_fw_init(vdev); + if (ret) { + ivpu_err(vdev, "Failed to initialize firmware: %d\n", ret); + goto err_mmu_gctx_fini; + } + + ret = ivpu_ipc_init(vdev); + if (ret) { + ivpu_err(vdev, "Failed to initialize IPC: %d\n", ret); + goto err_fw_fini; + } + + ret = ivpu_pm_init(vdev); + if (ret) { + ivpu_err(vdev, "Failed to initialize PM: %d\n", ret); + goto err_ipc_fini; + } + + ret = ivpu_job_done_thread_init(vdev); + if (ret) { + ivpu_err(vdev, "Failed to initialize job done thread: %d\n", ret); + goto err_ipc_fini; + } + + ret = ivpu_fw_load(vdev); + if (ret) { + ivpu_err(vdev, "Failed to load firmware: %d\n", ret); + goto err_job_done_thread_fini; + } + + ret = ivpu_boot(vdev); + if (ret) { + ivpu_err(vdev, "Failed to boot: %d\n", ret); + goto err_job_done_thread_fini; + } + + ivpu_pm_enable(vdev); + + return 0; + +err_job_done_thread_fini: + ivpu_job_done_thread_fini(vdev); +err_ipc_fini: + ivpu_ipc_fini(vdev); +err_fw_fini: + ivpu_fw_fini(vdev); +err_mmu_gctx_fini: + ivpu_mmu_global_context_fini(vdev); +err_power_down: + ivpu_hw_power_down(vdev); +err_xa_destroy: + xa_destroy(&vdev->submitted_jobs_xa); + xa_destroy(&vdev->context_xa); + return ret; +} + +static void ivpu_dev_fini(struct ivpu_device *vdev) +{ + ivpu_pm_disable(vdev); + ivpu_shutdown(vdev); + ivpu_job_done_thread_fini(vdev); + ivpu_ipc_fini(vdev); + ivpu_fw_fini(vdev); + ivpu_mmu_global_context_fini(vdev); + + drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->submitted_jobs_xa)); + xa_destroy(&vdev->submitted_jobs_xa); + drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->context_xa)); + xa_destroy(&vdev->context_xa); +} + +static struct pci_device_id ivpu_pci_ids[] = { + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_MTL) }, + { } +}; +MODULE_DEVICE_TABLE(pci, ivpu_pci_ids); + +static int ivpu_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct ivpu_device *vdev; + int ret; + + vdev = devm_drm_dev_alloc(&pdev->dev, &driver, struct ivpu_device, drm); + if (IS_ERR(vdev)) + return PTR_ERR(vdev); + + pci_set_drvdata(pdev, vdev); + + ret = ivpu_dev_init(vdev); + if (ret) { + dev_err(&pdev->dev, "Failed to initialize VPU device: %d\n", ret); + return ret; + } + + ret = drm_dev_register(&vdev->drm, 0); + if (ret) { + dev_err(&pdev->dev, "Failed to register DRM device: %d\n", ret); + ivpu_dev_fini(vdev); + } + + return ret; +} + +static void ivpu_remove(struct pci_dev *pdev) +{ + struct ivpu_device *vdev = pci_get_drvdata(pdev); + + drm_dev_unregister(&vdev->drm); + ivpu_dev_fini(vdev); +} + +static const struct dev_pm_ops ivpu_drv_pci_pm = { + SET_SYSTEM_SLEEP_PM_OPS(ivpu_pm_suspend_cb, ivpu_pm_resume_cb) + SET_RUNTIME_PM_OPS(ivpu_pm_runtime_suspend_cb, ivpu_pm_runtime_resume_cb, NULL) +}; + +static const struct pci_error_handlers ivpu_drv_pci_err = { + .reset_prepare = ivpu_pm_reset_prepare_cb, + .reset_done = ivpu_pm_reset_done_cb, +}; + +static struct pci_driver ivpu_pci_driver = { + .name = KBUILD_MODNAME, + .id_table = ivpu_pci_ids, + .probe = ivpu_probe, + .remove = ivpu_remove, + .driver = { + .pm = &ivpu_drv_pci_pm, + }, + .err_handler = &ivpu_drv_pci_err, +}; + +module_pci_driver(ivpu_pci_driver); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION(DRIVER_DESC); +MODULE_LICENSE("GPL and additional rights"); +MODULE_VERSION(DRIVER_VERSION_STR); diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h new file mode 100644 index 000000000000..f47b4965db2e --- /dev/null +++ b/drivers/accel/ivpu/ivpu_drv.h @@ -0,0 +1,190 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#ifndef __IVPU_DRV_H__ +#define __IVPU_DRV_H__ + +#include <drm/drm_device.h> +#include <drm/drm_managed.h> +#include <drm/drm_mm.h> +#include <drm/drm_print.h> + +#include <linux/pci.h> +#include <linux/xarray.h> +#include <uapi/drm/ivpu_accel.h> + +#include "ivpu_mmu_context.h" + +#define DRIVER_NAME "intel_vpu" +#define DRIVER_DESC "Driver for Intel Versatile Processing Unit (VPU)" +#define DRIVER_DATE "20230117" + +#define PCI_DEVICE_ID_MTL 0x7d1d + +#define IVPU_GLOBAL_CONTEXT_MMU_SSID 0 +#define IVPU_CONTEXT_LIMIT 64 +#define IVPU_NUM_ENGINES 2 + +#define IVPU_PLATFORM_SILICON 0 +#define IVPU_PLATFORM_SIMICS 2 +#define IVPU_PLATFORM_FPGA 3 +#define IVPU_PLATFORM_INVALID 8 + +#define IVPU_DBG_REG BIT(0) +#define IVPU_DBG_IRQ BIT(1) +#define IVPU_DBG_MMU BIT(2) +#define IVPU_DBG_FILE BIT(3) +#define IVPU_DBG_MISC BIT(4) +#define IVPU_DBG_FW_BOOT BIT(5) +#define IVPU_DBG_PM BIT(6) +#define IVPU_DBG_IPC BIT(7) +#define IVPU_DBG_BO BIT(8) +#define IVPU_DBG_JOB BIT(9) +#define IVPU_DBG_JSM BIT(10) +#define IVPU_DBG_KREF BIT(11) +#define IVPU_DBG_RPM BIT(12) + +#define ivpu_err(vdev, fmt, ...) \ + drm_err(&(vdev)->drm, "%s(): " fmt, __func__, ##__VA_ARGS__) + +#define ivpu_err_ratelimited(vdev, fmt, ...) \ + drm_err_ratelimited(&(vdev)->drm, "%s(): " fmt, __func__, ##__VA_ARGS__) + +#define ivpu_warn(vdev, fmt, ...) \ + drm_warn(&(vdev)->drm, "%s(): " fmt, __func__, ##__VA_ARGS__) + +#define ivpu_warn_ratelimited(vdev, fmt, ...) \ + drm_err_ratelimited(&(vdev)->drm, "%s(): " fmt, __func__, ##__VA_ARGS__) + +#define ivpu_info(vdev, fmt, ...) drm_info(&(vdev)->drm, fmt, ##__VA_ARGS__) + +#define ivpu_dbg(vdev, type, fmt, args...) do { \ + if (unlikely(IVPU_DBG_##type & ivpu_dbg_mask)) \ + dev_dbg((vdev)->drm.dev, "[%s] " fmt, #type, ##args); \ +} while (0) + +#define IVPU_WA(wa_name) (vdev->wa.wa_name) + +struct ivpu_wa_table { + bool punit_disabled; + bool clear_runtime_mem; +}; + +struct ivpu_hw_info; +struct ivpu_mmu_info; +struct ivpu_fw_info; +struct ivpu_ipc_info; +struct ivpu_pm_info; + +struct ivpu_device { + struct drm_device drm; + void __iomem *regb; + void __iomem *regv; + u32 platform; + u32 irq; + + struct ivpu_wa_table wa; + struct ivpu_hw_info *hw; + struct ivpu_mmu_info *mmu; + struct ivpu_fw_info *fw; + struct ivpu_ipc_info *ipc; + struct ivpu_pm_info *pm; + + struct ivpu_mmu_context gctx; + struct xarray context_xa; + struct xa_limit context_xa_limit; + + struct xarray submitted_jobs_xa; + struct task_struct *job_done_thread; + + atomic64_t unique_id_counter; + + struct { + int boot; + int jsm; + int tdr; + int reschedule_suspend; + } timeout; +}; + +/* + * file_priv has its own refcount (ref) that allows user space to close the fd + * without blocking even if VPU is still processing some jobs. + */ +struct ivpu_file_priv { + struct kref ref; + struct ivpu_device *vdev; + struct mutex lock; /* Protects cmdq */ + struct ivpu_cmdq *cmdq[IVPU_NUM_ENGINES]; + struct ivpu_mmu_context ctx; + u32 priority; + bool has_mmu_faults; +}; + +extern int ivpu_dbg_mask; +extern u8 ivpu_pll_min_ratio; +extern u8 ivpu_pll_max_ratio; + +#define IVPU_TEST_MODE_DISABLED 0 +#define IVPU_TEST_MODE_FW_TEST 1 +#define IVPU_TEST_MODE_NULL_HW 2 +extern int ivpu_test_mode; + +struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv); +struct ivpu_file_priv *ivpu_file_priv_get_by_ctx_id(struct ivpu_device *vdev, unsigned long id); +void ivpu_file_priv_put(struct ivpu_file_priv **link); + +int ivpu_boot(struct ivpu_device *vdev); +int ivpu_shutdown(struct ivpu_device *vdev); + +static inline bool ivpu_is_mtl(struct ivpu_device *vdev) +{ + return to_pci_dev(vdev->drm.dev)->device == PCI_DEVICE_ID_MTL; +} + +static inline u8 ivpu_revision(struct ivpu_device *vdev) +{ + return to_pci_dev(vdev->drm.dev)->revision; +} + +static inline u16 ivpu_device_id(struct ivpu_device *vdev) +{ + return to_pci_dev(vdev->drm.dev)->device; +} + +static inline struct ivpu_device *to_ivpu_device(struct drm_device *dev) +{ + return container_of(dev, struct ivpu_device, drm); +} + +static inline u32 ivpu_get_context_count(struct ivpu_device *vdev) +{ + struct xa_limit ctx_limit = vdev->context_xa_limit; + + return (ctx_limit.max - ctx_limit.min + 1); +} + +static inline u32 ivpu_get_platform(struct ivpu_device *vdev) +{ + WARN_ON_ONCE(vdev->platform == IVPU_PLATFORM_INVALID); + return vdev->platform; +} + +static inline bool ivpu_is_silicon(struct ivpu_device *vdev) +{ + return ivpu_get_platform(vdev) == IVPU_PLATFORM_SILICON; +} + +static inline bool ivpu_is_simics(struct ivpu_device *vdev) +{ + return ivpu_get_platform(vdev) == IVPU_PLATFORM_SIMICS; +} + +static inline bool ivpu_is_fpga(struct ivpu_device *vdev) +{ + return ivpu_get_platform(vdev) == IVPU_PLATFORM_FPGA; +} + +#endif /* __IVPU_DRV_H__ */ diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c new file mode 100644 index 000000000000..f58951a0d81b --- /dev/null +++ b/drivers/accel/ivpu/ivpu_fw.c @@ -0,0 +1,434 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#include <linux/firmware.h> +#include <linux/highmem.h> +#include <linux/moduleparam.h> +#include <linux/pci.h> + +#include "vpu_boot_api.h" +#include "ivpu_drv.h" +#include "ivpu_fw.h" +#include "ivpu_gem.h" +#include "ivpu_hw.h" +#include "ivpu_ipc.h" +#include "ivpu_pm.h" + +#define FW_GLOBAL_MEM_START (2ull * SZ_1G) +#define FW_GLOBAL_MEM_END (3ull * SZ_1G) +#define FW_SHARED_MEM_SIZE SZ_256M /* Must be aligned to FW_SHARED_MEM_ALIGNMENT */ +#define FW_SHARED_MEM_ALIGNMENT SZ_128K /* VPU MTRR limitation */ +#define FW_RUNTIME_MAX_SIZE SZ_512M +#define FW_SHAVE_NN_MAX_SIZE SZ_2M +#define FW_RUNTIME_MIN_ADDR (FW_GLOBAL_MEM_START) +#define FW_RUNTIME_MAX_ADDR (FW_GLOBAL_MEM_END - FW_SHARED_MEM_SIZE) +#define FW_VERSION_HEADER_SIZE SZ_4K +#define FW_FILE_IMAGE_OFFSET (VPU_FW_HEADER_SIZE + FW_VERSION_HEADER_SIZE) + +#define WATCHDOG_MSS_REDIRECT 32 +#define WATCHDOG_NCE_REDIRECT 33 + +#define ADDR_TO_L2_CACHE_CFG(addr) ((addr) >> 31) + +#define IVPU_FW_CHECK_API(vdev, fw_hdr, name, min_major) \ + ivpu_fw_check_api(vdev, fw_hdr, #name, \ + VPU_##name##_API_VER_INDEX, \ + VPU_##name##_API_VER_MAJOR, \ + VPU_##name##_API_VER_MINOR, min_major) + +static char *ivpu_firmware; +module_param_named_unsafe(firmware, ivpu_firmware, charp, 0644); +MODULE_PARM_DESC(firmware, "VPU firmware binary in /lib/firmware/.."); + +static int ivpu_fw_request(struct ivpu_device *vdev) +{ + static const char * const fw_names[] = { + "mtl_vpu.bin", + "intel/vpu/mtl_vpu_v0.0.bin" + }; + int ret = -ENOENT; + int i; + + if (ivpu_firmware) + return request_firmware(&vdev->fw->file, ivpu_firmware, vdev->drm.dev); + + for (i = 0; i < ARRAY_SIZE(fw_names); i++) { + ret = firmware_request_nowarn(&vdev->fw->file, fw_names[i], vdev->drm.dev); + if (!ret) + return 0; + } + + ivpu_err(vdev, "Failed to request firmware: %d\n", ret); + return ret; +} + +static int +ivpu_fw_check_api(struct ivpu_device *vdev, const struct vpu_firmware_header *fw_hdr, + const char *str, int index, u16 expected_major, u16 expected_minor, + u16 min_major) +{ + u16 major = (u16)(fw_hdr->api_version[index] >> 16); + u16 minor = (u16)(fw_hdr->api_version[index]); + + if (major < min_major) { + ivpu_err(vdev, "Incompatible FW %s API version: %d.%d, required %d.0 or later\n", + str, major, minor, min_major); + return -EINVAL; + } + if (major != expected_major) { + ivpu_warn(vdev, "Major FW %s API version different: %d.%d (expected %d.%d)\n", + str, major, minor, expected_major, expected_minor); + } + ivpu_dbg(vdev, FW_BOOT, "FW %s API version: %d.%d (expected %d.%d)\n", + str, major, minor, expected_major, expected_minor); + + return 0; +} + +static int ivpu_fw_parse(struct ivpu_device *vdev) +{ + struct ivpu_fw_info *fw = vdev->fw; + const struct vpu_firmware_header *fw_hdr = (const void *)fw->file->data; + u64 runtime_addr, image_load_addr, runtime_size, image_size; + + if (fw->file->size <= FW_FILE_IMAGE_OFFSET) { + ivpu_err(vdev, "Firmware file is too small: %zu\n", fw->file->size); + return -EINVAL; + } + + if (fw_hdr->header_version != VPU_FW_HEADER_VERSION) { + ivpu_err(vdev, "Invalid firmware header version: %u\n", fw_hdr->header_version); + return -EINVAL; + } + + runtime_addr = fw_hdr->boot_params_load_address; + runtime_size = fw_hdr->runtime_size; + image_load_addr = fw_hdr->image_load_address; + image_size = fw_hdr->image_size; + + if (runtime_addr < FW_RUNTIME_MIN_ADDR || runtime_addr > FW_RUNTIME_MAX_ADDR) { + ivpu_err(vdev, "Invalid firmware runtime address: 0x%llx\n", runtime_addr); + return -EINVAL; + } + + if (runtime_size < fw->file->size || runtime_size > FW_RUNTIME_MAX_SIZE) { + ivpu_err(vdev, "Invalid firmware runtime size: %llu\n", runtime_size); + return -EINVAL; + } + + if (FW_FILE_IMAGE_OFFSET + image_size > fw->file->size) { + ivpu_err(vdev, "Invalid image size: %llu\n", image_size); + return -EINVAL; + } + + if (image_load_addr < runtime_addr || + image_load_addr + image_size > runtime_addr + runtime_size) { + ivpu_err(vdev, "Invalid firmware load address size: 0x%llx and size %llu\n", + image_load_addr, image_size); + return -EINVAL; + } + + if (fw_hdr->shave_nn_fw_size > FW_SHAVE_NN_MAX_SIZE) { + ivpu_err(vdev, "SHAVE NN firmware is too big: %u\n", fw_hdr->shave_nn_fw_size); + return -EINVAL; + } + + if (fw_hdr->entry_point < image_load_addr || + fw_hdr->entry_point >= image_load_addr + image_size) { + ivpu_err(vdev, "Invalid entry point: 0x%llx\n", fw_hdr->entry_point); + return -EINVAL; + } + ivpu_dbg(vdev, FW_BOOT, "Header version: 0x%x, format 0x%x\n", + fw_hdr->header_version, fw_hdr->image_format); + ivpu_dbg(vdev, FW_BOOT, "FW version: %s\n", (char *)fw_hdr + VPU_FW_HEADER_SIZE); + + if (IVPU_FW_CHECK_API(vdev, fw_hdr, BOOT, 3)) + return -EINVAL; + if (IVPU_FW_CHECK_API(vdev, fw_hdr, JSM, 3)) + return -EINVAL; + + fw->runtime_addr = runtime_addr; + fw->runtime_size = runtime_size; + fw->image_load_offset = image_load_addr - runtime_addr; + fw->image_size = image_size; + fw->shave_nn_size = PAGE_ALIGN(fw_hdr->shave_nn_fw_size); + + fw->cold_boot_entry_point = fw_hdr->entry_point; + fw->entry_point = fw->cold_boot_entry_point; + + ivpu_dbg(vdev, FW_BOOT, "Size: file %lu image %u runtime %u shavenn %u\n", + fw->file->size, fw->image_size, fw->runtime_size, fw->shave_nn_size); + ivpu_dbg(vdev, FW_BOOT, "Address: runtime 0x%llx, load 0x%llx, entry point 0x%llx\n", + fw->runtime_addr, image_load_addr, fw->entry_point); + + return 0; +} + +static void ivpu_fw_release(struct ivpu_device *vdev) +{ + release_firmware(vdev->fw->file); +} + +static int ivpu_fw_update_global_range(struct ivpu_device *vdev) +{ + struct ivpu_fw_info *fw = vdev->fw; + u64 start = ALIGN(fw->runtime_addr + fw->runtime_size, FW_SHARED_MEM_ALIGNMENT); + u64 size = FW_SHARED_MEM_SIZE; + + if (start + size > FW_GLOBAL_MEM_END) { + ivpu_err(vdev, "No space for shared region, start %lld, size %lld\n", start, size); + return -EINVAL; + } + + ivpu_hw_init_range(&vdev->hw->ranges.global_low, start, size); + return 0; +} + +static int ivpu_fw_mem_init(struct ivpu_device *vdev) +{ + struct ivpu_fw_info *fw = vdev->fw; + int ret; + + ret = ivpu_fw_update_global_range(vdev); + if (ret) + return ret; + + fw->mem = ivpu_bo_alloc_internal(vdev, fw->runtime_addr, fw->runtime_size, DRM_IVPU_BO_WC); + if (!fw->mem) { + ivpu_err(vdev, "Failed to allocate firmware runtime memory\n"); + return -ENOMEM; + } + + if (fw->shave_nn_size) { + fw->mem_shave_nn = ivpu_bo_alloc_internal(vdev, vdev->hw->ranges.global_high.start, + fw->shave_nn_size, DRM_IVPU_BO_UNCACHED); + if (!fw->mem_shave_nn) { + ivpu_err(vdev, "Failed to allocate shavenn buffer\n"); + ivpu_bo_free_internal(fw->mem); + return -ENOMEM; + } + } + + return 0; +} + +static void ivpu_fw_mem_fini(struct ivpu_device *vdev) +{ + struct ivpu_fw_info *fw = vdev->fw; + + if (fw->mem_shave_nn) { + ivpu_bo_free_internal(fw->mem_shave_nn); + fw->mem_shave_nn = NULL; + } + + ivpu_bo_free_internal(fw->mem); + fw->mem = NULL; +} + +int ivpu_fw_init(struct ivpu_device *vdev) +{ + int ret; + + ret = ivpu_fw_request(vdev); + if (ret) + return ret; + + ret = ivpu_fw_parse(vdev); + if (ret) + goto err_fw_release; + + ret = ivpu_fw_mem_init(vdev); + if (ret) + goto err_fw_release; + + return 0; + +err_fw_release: + ivpu_fw_release(vdev); + return ret; +} + +void ivpu_fw_fini(struct ivpu_device *vdev) +{ + ivpu_fw_mem_fini(vdev); + ivpu_fw_release(vdev); +} + +int ivpu_fw_load(struct ivpu_device *vdev) +{ + struct ivpu_fw_info *fw = vdev->fw; + u64 image_end_offset = fw->image_load_offset + fw->image_size; + + memset(fw->mem->kvaddr, 0, fw->image_load_offset); + memcpy(fw->mem->kvaddr + fw->image_load_offset, + fw->file->data + FW_FILE_IMAGE_OFFSET, fw->image_size); + + if (IVPU_WA(clear_runtime_mem)) { + u8 *start = fw->mem->kvaddr + image_end_offset; + u64 size = fw->mem->base.size - image_end_offset; + + memset(start, 0, size); + } + + wmb(); /* Flush WC buffers after writing fw->mem */ + + return 0; +} + +static void ivpu_fw_boot_params_print(struct ivpu_device *vdev, struct vpu_boot_params *boot_params) +{ + ivpu_dbg(vdev, FW_BOOT, "boot_params.magic = 0x%x\n", + boot_params->magic); + ivpu_dbg(vdev, FW_BOOT, "boot_params.vpu_id = 0x%x\n", + boot_params->vpu_id); + ivpu_dbg(vdev, FW_BOOT, "boot_params.vpu_count = 0x%x\n", + boot_params->vpu_count); + ivpu_dbg(vdev, FW_BOOT, "boot_params.frequency = %u\n", + boot_params->frequency); + ivpu_dbg(vdev, FW_BOOT, "boot_params.perf_clk_frequency = %u\n", + boot_params->perf_clk_frequency); + + ivpu_dbg(vdev, FW_BOOT, "boot_params.ipc_header_area_start = 0x%llx\n", + boot_params->ipc_header_area_start); + ivpu_dbg(vdev, FW_BOOT, "boot_params.ipc_header_area_size = 0x%x\n", + boot_params->ipc_header_area_size); + ivpu_dbg(vdev, FW_BOOT, "boot_params.shared_region_base = 0x%llx\n", + boot_params->shared_region_base); + ivpu_dbg(vdev, FW_BOOT, "boot_params.shared_region_size = 0x%x\n", + boot_params->shared_region_size); + ivpu_dbg(vdev, FW_BOOT, "boot_params.ipc_payload_area_start = 0x%llx\n", + boot_params->ipc_payload_area_start); + ivpu_dbg(vdev, FW_BOOT, "boot_params.ipc_payload_area_size = 0x%x\n", + boot_params->ipc_payload_area_size); + ivpu_dbg(vdev, FW_BOOT, "boot_params.global_aliased_pio_base = 0x%llx\n", + boot_params->global_aliased_pio_base); + ivpu_dbg(vdev, FW_BOOT, "boot_params.global_aliased_pio_size = 0x%x\n", + boot_params->global_aliased_pio_size); + + ivpu_dbg(vdev, FW_BOOT, "boot_params.autoconfig = 0x%x\n", + boot_params->autoconfig); + + ivpu_dbg(vdev, FW_BOOT, "boot_params.cache_defaults[VPU_BOOT_L2_CACHE_CFG_NN].use = 0x%x\n", + boot_params->cache_defaults[VPU_BOOT_L2_CACHE_CFG_NN].use); + ivpu_dbg(vdev, FW_BOOT, "boot_params.cache_defaults[VPU_BOOT_L2_CACHE_CFG_NN].cfg = 0x%x\n", + boot_params->cache_defaults[VPU_BOOT_L2_CACHE_CFG_NN].cfg); + + ivpu_dbg(vdev, FW_BOOT, "boot_params.global_memory_allocator_base = 0x%llx\n", + boot_params->global_memory_allocator_base); + ivpu_dbg(vdev, FW_BOOT, "boot_params.global_memory_allocator_size = 0x%x\n", + boot_params->global_memory_allocator_size); + + ivpu_dbg(vdev, FW_BOOT, "boot_params.shave_nn_fw_base = 0x%llx\n", + boot_params->shave_nn_fw_base); + + ivpu_dbg(vdev, FW_BOOT, "boot_params.watchdog_irq_mss = 0x%x\n", + boot_params->watchdog_irq_mss); + ivpu_dbg(vdev, FW_BOOT, "boot_params.watchdog_irq_nce = 0x%x\n", + boot_params->watchdog_irq_nce); + ivpu_dbg(vdev, FW_BOOT, "boot_params.host_to_vpu_irq = 0x%x\n", + boot_params->host_to_vpu_irq); + ivpu_dbg(vdev, FW_BOOT, "boot_params.job_done_irq = 0x%x\n", + boot_params->job_done_irq); + + ivpu_dbg(vdev, FW_BOOT, "boot_params.host_version_id = 0x%x\n", + boot_params->host_version_id); + ivpu_dbg(vdev, FW_BOOT, "boot_params.si_stepping = 0x%x\n", + boot_params->si_stepping); + ivpu_dbg(vdev, FW_BOOT, "boot_params.device_id = 0x%llx\n", + boot_params->device_id); + ivpu_dbg(vdev, FW_BOOT, "boot_params.feature_exclusion = 0x%llx\n", + boot_params->feature_exclusion); + ivpu_dbg(vdev, FW_BOOT, "boot_params.sku = 0x%llx\n", + boot_params->sku); + ivpu_dbg(vdev, FW_BOOT, "boot_params.min_freq_pll_ratio = 0x%x\n", + boot_params->min_freq_pll_ratio); + ivpu_dbg(vdev, FW_BOOT, "boot_params.pn_freq_pll_ratio = 0x%x\n", + boot_params->pn_freq_pll_ratio); + ivpu_dbg(vdev, FW_BOOT, "boot_params.max_freq_pll_ratio = 0x%x\n", + boot_params->max_freq_pll_ratio); + ivpu_dbg(vdev, FW_BOOT, "boot_params.default_trace_level = 0x%x\n", + boot_params->default_trace_level); + ivpu_dbg(vdev, FW_BOOT, "boot_params.tracing_buff_message_format_mask = 0x%llx\n", + boot_params->tracing_buff_message_format_mask); + ivpu_dbg(vdev, FW_BOOT, "boot_params.trace_destination_mask = 0x%x\n", + boot_params->trace_destination_mask); + ivpu_dbg(vdev, FW_BOOT, "boot_params.trace_hw_component_mask = 0x%llx\n", + boot_params->trace_hw_component_mask); + ivpu_dbg(vdev, FW_BOOT, "boot_params.boot_type = 0x%x\n", + boot_params->boot_type); + ivpu_dbg(vdev, FW_BOOT, "boot_params.punit_telemetry_sram_base = 0x%llx\n", + boot_params->punit_telemetry_sram_base); + ivpu_dbg(vdev, FW_BOOT, "boot_params.punit_telemetry_sram_size = 0x%llx\n", + boot_params->punit_telemetry_sram_size); + ivpu_dbg(vdev, FW_BOOT, "boot_params.vpu_telemetry_enable = 0x%x\n", + boot_params->vpu_telemetry_enable); +} + +void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params *boot_params) +{ + struct ivpu_bo *ipc_mem_rx = vdev->ipc->mem_rx; + + /* In case of warm boot we only have to reset the entrypoint addr */ + if (!ivpu_fw_is_cold_boot(vdev)) { + boot_params->save_restore_ret_address = 0; + vdev->pm->is_warmboot = true; + return; + } + + vdev->pm->is_warmboot = false; + + boot_params->magic = VPU_BOOT_PARAMS_MAGIC; + boot_params->vpu_id = to_pci_dev(vdev->drm.dev)->bus->number; + boot_params->frequency = ivpu_hw_reg_pll_freq_get(vdev); + + /* + * Uncached region of VPU address space, covers IPC buffers, job queues + * and log buffers, programmable to L2$ Uncached by VPU MTRR + */ + boot_params->shared_region_base = vdev->hw->ranges.global_low.start; + boot_params->shared_region_size = vdev->hw->ranges.global_low.end - + vdev->hw->ranges.global_low.start; + + boot_params->ipc_header_area_start = ipc_mem_rx->vpu_addr; + boot_params->ipc_header_area_size = ipc_mem_rx->base.size / 2; + + boot_params->ipc_payload_area_start = ipc_mem_rx->vpu_addr + ipc_mem_rx->base.size / 2; + boot_params->ipc_payload_area_size = ipc_mem_rx->base.size / 2; + + boot_params->global_aliased_pio_base = + vdev->hw->ranges.global_aliased_pio.start; + boot_params->global_aliased_pio_size = + ivpu_hw_range_size(&vdev->hw->ranges.global_aliased_pio); + + /* Allow configuration for L2C_PAGE_TABLE with boot param value */ + boot_params->autoconfig = 1; + + /* Enable L2 cache for first 2GB of high memory */ + boot_params->cache_defaults[VPU_BOOT_L2_CACHE_CFG_NN].use = 1; + boot_params->cache_defaults[VPU_BOOT_L2_CACHE_CFG_NN].cfg = + ADDR_TO_L2_CACHE_CFG(vdev->hw->ranges.global_high.start); + + if (vdev->fw->mem_shave_nn) + boot_params->shave_nn_fw_base = vdev->fw->mem_shave_nn->vpu_addr; + + boot_params->watchdog_irq_mss = WATCHDOG_MSS_REDIRECT; + boot_params->watchdog_irq_nce = WATCHDOG_NCE_REDIRECT; + boot_params->si_stepping = ivpu_revision(vdev); + boot_params->device_id = ivpu_device_id(vdev); + boot_params->feature_exclusion = vdev->hw->tile_fuse; + boot_params->sku = vdev->hw->sku; + + boot_params->min_freq_pll_ratio = vdev->hw->pll.min_ratio; + boot_params->pn_freq_pll_ratio = vdev->hw->pll.pn_ratio; + boot_params->max_freq_pll_ratio = vdev->hw->pll.max_ratio; + + boot_params->punit_telemetry_sram_base = ivpu_hw_reg_telemetry_offset_get(vdev); + boot_params->punit_telemetry_sram_size = ivpu_hw_reg_telemetry_size_get(vdev); + boot_params->vpu_telemetry_enable = ivpu_hw_reg_telemetry_enable_get(vdev); + + wmb(); /* Flush WC buffers after writing bootparams */ + + ivpu_fw_boot_params_print(vdev, boot_params); +} diff --git a/drivers/accel/ivpu/ivpu_fw.h b/drivers/accel/ivpu/ivpu_fw.h new file mode 100644 index 000000000000..8d275c802d1c --- /dev/null +++ b/drivers/accel/ivpu/ivpu_fw.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#ifndef __IVPU_FW_H__ +#define __IVPU_FW_H__ + +struct ivpu_device; +struct ivpu_bo; +struct vpu_boot_params; + +struct ivpu_fw_info { + const struct firmware *file; + struct ivpu_bo *mem; + struct ivpu_bo *mem_shave_nn; + struct ivpu_bo *mem_log_crit; + struct ivpu_bo *mem_log_verb; + u64 runtime_addr; + u32 runtime_size; + u64 image_load_offset; + u32 image_size; + u32 shave_nn_size; + u64 entry_point; /* Cold or warm boot entry point for next boot */ + u64 cold_boot_entry_point; +}; + +int ivpu_fw_init(struct ivpu_device *vdev); +void ivpu_fw_fini(struct ivpu_device *vdev); +int ivpu_fw_load(struct ivpu_device *vdev); +void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params *bp); + +static inline bool ivpu_fw_is_cold_boot(struct ivpu_device *vdev) +{ + return vdev->fw->entry_point == vdev->fw->cold_boot_entry_point; +} + +#endif /* __IVPU_FW_H__ */ diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c new file mode 100644 index 000000000000..01d47d3bad5b --- /dev/null +++ b/drivers/accel/ivpu/ivpu_gem.c @@ -0,0 +1,749 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#include <linux/dma-buf.h> +#include <linux/highmem.h> +#include <linux/module.h> +#include <linux/set_memory.h> +#include <linux/xarray.h> + +#include <drm/drm_cache.h> +#include <drm/drm_debugfs.h> +#include <drm/drm_file.h> +#include <drm/drm_utils.h> + +#include "ivpu_drv.h" +#include "ivpu_gem.h" +#include "ivpu_hw.h" +#include "ivpu_mmu.h" +#include "ivpu_mmu_context.h" + +MODULE_IMPORT_NS(DMA_BUF); + +static const struct drm_gem_object_funcs ivpu_gem_funcs; + +static struct lock_class_key prime_bo_lock_class_key; + +static int __must_check prime_alloc_pages_locked(struct ivpu_bo *bo) +{ + /* Pages are managed by the underlying dma-buf */ + return 0; +} + +static void prime_free_pages_locked(struct ivpu_bo *bo) +{ + /* Pages are managed by the underlying dma-buf */ +} + +static int prime_map_pages_locked(struct ivpu_bo *bo) +{ + struct ivpu_device *vdev = ivpu_bo_to_vdev(bo); + struct sg_table *sgt; + + sgt = dma_buf_map_attachment_unlocked(bo->base.import_attach, DMA_BIDIRECTIONAL); + if (IS_ERR(sgt)) { + ivpu_err(vdev, "Failed to map attachment: %ld\n", PTR_ERR(sgt)); + return PTR_ERR(sgt); + } + + bo->sgt = sgt; + return 0; +} + +static void prime_unmap_pages_locked(struct ivpu_bo *bo) +{ + dma_buf_unmap_attachment_unlocked(bo->base.import_attach, bo->sgt, DMA_BIDIRECTIONAL); + bo->sgt = NULL; +} + +static const struct ivpu_bo_ops prime_ops = { + .type = IVPU_BO_TYPE_PRIME, + .name = "prime", + .alloc_pages = prime_alloc_pages_locked, + .free_pages = prime_free_pages_locked, + .map_pages = prime_map_pages_locked, + .unmap_pages = prime_unmap_pages_locked, +}; + +static int __must_check shmem_alloc_pages_locked(struct ivpu_bo *bo) +{ + int npages = bo->base.size >> PAGE_SHIFT; + struct page **pages; + + pages = drm_gem_get_pages(&bo->base); + if (IS_ERR(pages)) + return PTR_ERR(pages); + + if (bo->flags & DRM_IVPU_BO_WC) + set_pages_array_wc(pages, npages); + else if (bo->flags & DRM_IVPU_BO_UNCACHED) + set_pages_array_uc(pages, npages); + + bo->pages = pages; + return 0; +} + +static void shmem_free_pages_locked(struct ivpu_bo *bo) +{ + if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED) + set_pages_array_wb(bo->pages, bo->base.size >> PAGE_SHIFT); + + drm_gem_put_pages(&bo->base, bo->pages, true, false); + bo->pages = NULL; +} + +static int ivpu_bo_map_pages_locked(struct ivpu_bo *bo) +{ + int npages = bo->base.size >> PAGE_SHIFT; + struct ivpu_device *vdev = ivpu_bo_to_vdev(bo); + struct sg_table *sgt; + int ret; + + sgt = drm_prime_pages_to_sg(&vdev->drm, bo->pages, npages); + if (IS_ERR(sgt)) { + ivpu_err(vdev, "Failed to allocate sgtable\n"); + return PTR_ERR(sgt); + } + + ret = dma_map_sgtable(vdev->drm.dev, sgt, DMA_BIDIRECTIONAL, 0); + if (ret) { + ivpu_err(vdev, "Failed to map BO in IOMMU: %d\n", ret); + goto err_free_sgt; + } + + bo->sgt = sgt; + return 0; + +err_free_sgt: + kfree(sgt); + return ret; +} + +static void ivpu_bo_unmap_pages_locked(struct ivpu_bo *bo) +{ + struct ivpu_device *vdev = ivpu_bo_to_vdev(bo); + + dma_unmap_sgtable(vdev->drm.dev, bo->sgt, DMA_BIDIRECTIONAL, 0); + sg_free_table(bo->sgt); + kfree(bo->sgt); + bo->sgt = NULL; +} + +static const struct ivpu_bo_ops shmem_ops = { + .type = IVPU_BO_TYPE_SHMEM, + .name = "shmem", + .alloc_pages = shmem_alloc_pages_locked, + .free_pages = shmem_free_pages_locked, + .map_pages = ivpu_bo_map_pages_locked, + .unmap_pages = ivpu_bo_unmap_pages_locked, +}; + +static int __must_check internal_alloc_pages_locked(struct ivpu_bo *bo) +{ + unsigned int i, npages = bo->base.size >> PAGE_SHIFT; + struct page **pages; + int ret; + + pages = kvmalloc_array(npages, sizeof(*bo->pages), GFP_KERNEL); + if (!pages) + return -ENOMEM; + + for (i = 0; i < npages; i++) { + pages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO); + if (!pages[i]) { + ret = -ENOMEM; + goto err_free_pages; + } + cond_resched(); + } + + bo->pages = pages; + return 0; + +err_free_pages: + while (i--) + put_page(pages[i]); + kvfree(pages); + return ret; +} + +static void internal_free_pages_locked(struct ivpu_bo *bo) +{ + unsigned int i, npages = bo->base.size >> PAGE_SHIFT; + + for (i = 0; i < npages; i++) + put_page(bo->pages[i]); + + kvfree(bo->pages); + bo->pages = NULL; +} + +static const struct ivpu_bo_ops internal_ops = { + .type = IVPU_BO_TYPE_INTERNAL, + .name = "internal", + .alloc_pages = internal_alloc_pages_locked, + .free_pages = internal_free_pages_locked, + .map_pages = ivpu_bo_map_pages_locked, + .unmap_pages = ivpu_bo_unmap_pages_locked, +}; + +static int __must_check ivpu_bo_alloc_and_map_pages_locked(struct ivpu_bo *bo) +{ + struct ivpu_device *vdev = ivpu_bo_to_vdev(bo); + int ret; + + lockdep_assert_held(&bo->lock); + drm_WARN_ON(&vdev->drm, bo->sgt); + + ret = bo->ops->alloc_pages(bo); + if (ret) { + ivpu_err(vdev, "Failed to allocate pages for BO: %d", ret); + return ret; + } + + ret = bo->ops->map_pages(bo); + if (ret) { + ivpu_err(vdev, "Failed to map pages for BO: %d", ret); + goto err_free_pages; + } + return ret; + +err_free_pages: + bo->ops->free_pages(bo); + return ret; +} + +static void ivpu_bo_unmap_and_free_pages(struct ivpu_bo *bo) +{ + mutex_lock(&bo->lock); + + WARN_ON(!bo->sgt); + bo->ops->unmap_pages(bo); + WARN_ON(bo->sgt); + bo->ops->free_pages(bo); + WARN_ON(bo->pages); + + mutex_unlock(&bo->lock); +} + +/* + * ivpu_bo_pin() - pin the backing physical pages and map them to VPU. + * + * This function pins physical memory pages, then maps the physical pages + * to IOMMU address space and finally updates the VPU MMU page tables + * to allow the VPU to translate VPU address to IOMMU address. + */ +int __must_check ivpu_bo_pin(struct ivpu_bo *bo) +{ + struct ivpu_device *vdev = ivpu_bo_to_vdev(bo); + int ret = 0; + + mutex_lock(&bo->lock); + + if (!bo->vpu_addr) { + ivpu_err(vdev, "vpu_addr not set for BO ctx_id: %d handle: %d\n", + bo->ctx->id, bo->handle); + ret = -EINVAL; + goto unlock; + } + + if (!bo->sgt) { + ret = ivpu_bo_alloc_and_map_pages_locked(bo); + if (ret) + goto unlock; + } + + if (!bo->mmu_mapped) { + ret = ivpu_mmu_context_map_sgt(vdev, bo->ctx, bo->vpu_addr, bo->sgt, + ivpu_bo_is_snooped(bo)); + if (ret) { + ivpu_err(vdev, "Failed to map BO in MMU: %d\n", ret); + goto unlock; + } + bo->mmu_mapped = true; + } + +unlock: + mutex_unlock(&bo->lock); + + return ret; +} + +static int +ivpu_bo_alloc_vpu_addr(struct ivpu_bo *bo, struct ivpu_mmu_context *ctx, + const struct ivpu_addr_range *range) +{ + struct ivpu_device *vdev = ivpu_bo_to_vdev(bo); + int ret; + + if (!range) { + if (bo->flags & DRM_IVPU_BO_HIGH_MEM) + range = &vdev->hw->ranges.user_high; + else + range = &vdev->hw->ranges.user_low; + } + + mutex_lock(&ctx->lock); + ret = ivpu_mmu_context_insert_node_locked(ctx, range, bo->base.size, &bo->mm_node); + if (!ret) { + bo->ctx = ctx; + bo->vpu_addr = bo->mm_node.start; + list_add_tail(&bo->ctx_node, &ctx->bo_list); + } + mutex_unlock(&ctx->lock); + + return ret; +} + +static void ivpu_bo_free_vpu_addr(struct ivpu_bo *bo) +{ + struct ivpu_device *vdev = ivpu_bo_to_vdev(bo); + struct ivpu_mmu_context *ctx = bo->ctx; + + ivpu_dbg(vdev, BO, "remove from ctx: ctx %d vpu_addr 0x%llx allocated %d mmu_mapped %d\n", + ctx->id, bo->vpu_addr, (bool)bo->sgt, bo->mmu_mapped); + + mutex_lock(&bo->lock); + + if (bo->mmu_mapped) { + drm_WARN_ON(&vdev->drm, !bo->sgt); + ivpu_mmu_context_unmap_sgt(vdev, ctx, bo->vpu_addr, bo->sgt); + bo->mmu_mapped = false; + } + + mutex_lock(&ctx->lock); + list_del(&bo->ctx_node); + bo->vpu_addr = 0; + bo->ctx = NULL; + ivpu_mmu_context_remove_node_locked(ctx, &bo->mm_node); + mutex_unlock(&ctx->lock); + + mutex_unlock(&bo->lock); +} + +void ivpu_bo_remove_all_bos_from_context(struct ivpu_mmu_context *ctx) +{ + struct ivpu_bo *bo, *tmp; + + list_for_each_entry_safe(bo, tmp, &ctx->bo_list, ctx_node) + ivpu_bo_free_vpu_addr(bo); +} + +static struct ivpu_bo * +ivpu_bo_alloc(struct ivpu_device *vdev, struct ivpu_mmu_context *mmu_context, + u64 size, u32 flags, const struct ivpu_bo_ops *ops, + const struct ivpu_addr_range *range, u64 user_ptr) +{ + struct ivpu_bo *bo; + int ret = 0; + + if (drm_WARN_ON(&vdev->drm, size == 0 || !PAGE_ALIGNED(size))) + return ERR_PTR(-EINVAL); + + switch (flags & DRM_IVPU_BO_CACHE_MASK) { + case DRM_IVPU_BO_CACHED: + case DRM_IVPU_BO_UNCACHED: + case DRM_IVPU_BO_WC: + break; + default: + return ERR_PTR(-EINVAL); + } + + bo = kzalloc(sizeof(*bo), GFP_KERNEL); + if (!bo) + return ERR_PTR(-ENOMEM); + + mutex_init(&bo->lock); + bo->base.funcs = &ivpu_gem_funcs; + bo->flags = flags; + bo->ops = ops; + bo->user_ptr = user_ptr; + + if (ops->type == IVPU_BO_TYPE_SHMEM) + ret = drm_gem_object_init(&vdev->drm, &bo->base, size); + else + drm_gem_private_object_init(&vdev->drm, &bo->base, size); + + if (ret) { + ivpu_err(vdev, "Failed to initialize drm object\n"); + goto err_free; + } + + if (flags & DRM_IVPU_BO_MAPPABLE) { + ret = drm_gem_create_mmap_offset(&bo->base); + if (ret) { + ivpu_err(vdev, "Failed to allocate mmap offset\n"); + goto err_release; + } + } + + if (mmu_context) { + ret = ivpu_bo_alloc_vpu_addr(bo, mmu_context, range); + if (ret) { + ivpu_err(vdev, "Failed to add BO to context: %d\n", ret); + goto err_release; + } + } + + return bo; + +err_release: + drm_gem_object_release(&bo->base); +err_free: + kfree(bo); + return ERR_PTR(ret); +} + +static void ivpu_bo_free(struct drm_gem_object *obj) +{ + struct ivpu_bo *bo = to_ivpu_bo(obj); + struct ivpu_device *vdev = ivpu_bo_to_vdev(bo); + + if (bo->ctx) + ivpu_dbg(vdev, BO, "free: ctx %d vpu_addr 0x%llx allocated %d mmu_mapped %d\n", + bo->ctx->id, bo->vpu_addr, (bool)bo->sgt, bo->mmu_mapped); + else + ivpu_dbg(vdev, BO, "free: ctx (released) allocated %d mmu_mapped %d\n", + (bool)bo->sgt, bo->mmu_mapped); + + drm_WARN_ON(&vdev->drm, !dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ)); + + vunmap(bo->kvaddr); + + if (bo->ctx) + ivpu_bo_free_vpu_addr(bo); + + if (bo->sgt) + ivpu_bo_unmap_and_free_pages(bo); + + if (bo->base.import_attach) + drm_prime_gem_destroy(&bo->base, bo->sgt); + + drm_gem_object_release(&bo->base); + + mutex_destroy(&bo->lock); + kfree(bo); +} + +static int ivpu_bo_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) +{ + struct ivpu_bo *bo = to_ivpu_bo(obj); + struct ivpu_device *vdev = ivpu_bo_to_vdev(bo); + + ivpu_dbg(vdev, BO, "mmap: ctx %u handle %u vpu_addr 0x%llx size %zu type %s", + bo->ctx->id, bo->handle, bo->vpu_addr, bo->base.size, bo->ops->name); + + if (obj->import_attach) { + /* Drop the reference drm_gem_mmap_obj() acquired.*/ + drm_gem_object_put(obj); + vma->vm_private_data = NULL; + return dma_buf_mmap(obj->dma_buf, vma, 0); + } + + vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND; + vma->vm_page_prot = ivpu_bo_pgprot(bo, vm_get_page_prot(vma->vm_flags)); + + return 0; +} + +static struct sg_table *ivpu_bo_get_sg_table(struct drm_gem_object *obj) +{ + struct ivpu_bo *bo = to_ivpu_bo(obj); + loff_t npages = obj->size >> PAGE_SHIFT; + int ret = 0; + + mutex_lock(&bo->lock); + + if (!bo->sgt) + ret = ivpu_bo_alloc_and_map_pages_locked(bo); + + mutex_unlock(&bo->lock); + + if (ret) + return ERR_PTR(ret); + + return drm_prime_pages_to_sg(obj->dev, bo->pages, npages); +} + +static vm_fault_t ivpu_vm_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + struct drm_gem_object *obj = vma->vm_private_data; + struct ivpu_bo *bo = to_ivpu_bo(obj); + loff_t npages = obj->size >> PAGE_SHIFT; + pgoff_t page_offset; + struct page *page; + vm_fault_t ret; + int err; + + mutex_lock(&bo->lock); + + if (!bo->sgt) { + err = ivpu_bo_alloc_and_map_pages_locked(bo); + if (err) { + ret = vmf_error(err); + goto unlock; + } + } + + /* We don't use vmf->pgoff since that has the fake offset */ + page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT; + if (page_offset >= npages) { + ret = VM_FAULT_SIGBUS; + } else { + page = bo->pages[page_offset]; + ret = vmf_insert_pfn(vma, vmf->address, page_to_pfn(page)); + } + +unlock: + mutex_unlock(&bo->lock); + + return ret; +} + +static const struct vm_operations_struct ivpu_vm_ops = { + .fault = ivpu_vm_fault, + .open = drm_gem_vm_open, + .close = drm_gem_vm_close, +}; + +static const struct drm_gem_object_funcs ivpu_gem_funcs = { + .free = ivpu_bo_free, + .mmap = ivpu_bo_mmap, + .vm_ops = &ivpu_vm_ops, + .get_sg_table = ivpu_bo_get_sg_table, +}; + +int +ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct ivpu_file_priv *file_priv = file->driver_priv; + struct ivpu_device *vdev = file_priv->vdev; + struct drm_ivpu_bo_create *args = data; + u64 size = PAGE_ALIGN(args->size); + struct ivpu_bo *bo; + int ret; + + if (args->flags & ~DRM_IVPU_BO_FLAGS) + return -EINVAL; + + if (size == 0) + return -EINVAL; + + bo = ivpu_bo_alloc(vdev, &file_priv->ctx, size, args->flags, &shmem_ops, NULL, 0); + if (IS_ERR(bo)) { + ivpu_err(vdev, "Failed to create BO: %pe (ctx %u size %llu flags 0x%x)", + bo, file_priv->ctx.id, args->size, args->flags); + return PTR_ERR(bo); + } + + ret = drm_gem_handle_create(file, &bo->base, &bo->handle); + if (!ret) { + args->vpu_addr = bo->vpu_addr; + args->handle = bo->handle; + } + + drm_gem_object_put(&bo->base); + + ivpu_dbg(vdev, BO, "alloc shmem: ctx %u vpu_addr 0x%llx size %zu flags 0x%x\n", + file_priv->ctx.id, bo->vpu_addr, bo->base.size, bo->flags); + + return ret; +} + +struct ivpu_bo * +ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 flags) +{ + const struct ivpu_addr_range *range; + struct ivpu_addr_range fixed_range; + struct ivpu_bo *bo; + pgprot_t prot; + int ret; + + drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(vpu_addr)); + drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(size)); + + if (vpu_addr) { + fixed_range.start = vpu_addr; + fixed_range.end = vpu_addr + size; + range = &fixed_range; + } else { + range = &vdev->hw->ranges.global_low; + } + + bo = ivpu_bo_alloc(vdev, &vdev->gctx, size, flags, &internal_ops, range, 0); + if (IS_ERR(bo)) { + ivpu_err(vdev, "Failed to create BO: %pe (vpu_addr 0x%llx size %llu flags 0x%x)", + bo, vpu_addr, size, flags); + return NULL; + } + + ret = ivpu_bo_pin(bo); + if (ret) + goto err_put; + + if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED) + drm_clflush_pages(bo->pages, bo->base.size >> PAGE_SHIFT); + + prot = ivpu_bo_pgprot(bo, PAGE_KERNEL); + bo->kvaddr = vmap(bo->pages, bo->base.size >> PAGE_SHIFT, VM_MAP, prot); + if (!bo->kvaddr) { + ivpu_err(vdev, "Failed to map BO into kernel virtual memory\n"); + goto err_put; + } + + ivpu_dbg(vdev, BO, "alloc internal: ctx 0 vpu_addr 0x%llx size %zu flags 0x%x\n", + bo->vpu_addr, bo->base.size, flags); + + return bo; + +err_put: + drm_gem_object_put(&bo->base); + return NULL; +} + +void ivpu_bo_free_internal(struct ivpu_bo *bo) +{ + drm_gem_object_put(&bo->base); +} + +struct drm_gem_object *ivpu_gem_prime_import(struct drm_device *dev, struct dma_buf *buf) +{ + struct ivpu_device *vdev = to_ivpu_device(dev); + struct dma_buf_attachment *attach; + struct ivpu_bo *bo; + + attach = dma_buf_attach(buf, dev->dev); + if (IS_ERR(attach)) + return ERR_CAST(attach); + + get_dma_buf(buf); + + bo = ivpu_bo_alloc(vdev, NULL, buf->size, DRM_IVPU_BO_MAPPABLE, &prime_ops, NULL, 0); + if (IS_ERR(bo)) { + ivpu_err(vdev, "Failed to import BO: %pe (size %lu)", bo, buf->size); + goto err_detach; + } + + lockdep_set_class(&bo->lock, &prime_bo_lock_class_key); + + bo->base.import_attach = attach; + + return &bo->base; + +err_detach: + dma_buf_detach(buf, attach); + dma_buf_put(buf); + return ERR_CAST(bo); +} + +int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct ivpu_file_priv *file_priv = file->driver_priv; + struct ivpu_device *vdev = to_ivpu_device(dev); + struct drm_ivpu_bo_info *args = data; + struct drm_gem_object *obj; + struct ivpu_bo *bo; + int ret = 0; + + obj = drm_gem_object_lookup(file, args->handle); + if (!obj) + return -ENOENT; + + bo = to_ivpu_bo(obj); + + mutex_lock(&bo->lock); + + if (!bo->ctx) { + ret = ivpu_bo_alloc_vpu_addr(bo, &file_priv->ctx, NULL); + if (ret) { + ivpu_err(vdev, "Failed to allocate vpu_addr: %d\n", ret); + goto unlock; + } + } + + args->flags = bo->flags; + args->mmap_offset = drm_vma_node_offset_addr(&obj->vma_node); + args->vpu_addr = bo->vpu_addr; + args->size = obj->size; +unlock: + mutex_unlock(&bo->lock); + drm_gem_object_put(obj); + return ret; +} + +int ivpu_bo_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct drm_ivpu_bo_wait *args = data; + struct drm_gem_object *obj; + unsigned long timeout; + long ret; + + timeout = drm_timeout_abs_to_jiffies(args->timeout_ns); + + obj = drm_gem_object_lookup(file, args->handle); + if (!obj) + return -EINVAL; + + ret = dma_resv_wait_timeout(obj->resv, DMA_RESV_USAGE_READ, true, timeout); + if (ret == 0) { + ret = -ETIMEDOUT; + } else if (ret > 0) { + ret = 0; + args->job_status = to_ivpu_bo(obj)->job_status; + } + + drm_gem_object_put(obj); + + return ret; +} + +static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p) +{ + unsigned long dma_refcount = 0; + + if (bo->base.dma_buf && bo->base.dma_buf->file) + dma_refcount = atomic_long_read(&bo->base.dma_buf->file->f_count); + + drm_printf(p, "%5u %6d %16llx %10lu %10u %12lu %14s\n", + bo->ctx->id, bo->handle, bo->vpu_addr, bo->base.size, + kref_read(&bo->base.refcount), dma_refcount, bo->ops->name); +} + +void ivpu_bo_list(struct drm_device *dev, struct drm_printer *p) +{ + struct ivpu_device *vdev = to_ivpu_device(dev); + struct ivpu_file_priv *file_priv; + unsigned long ctx_id; + struct ivpu_bo *bo; + + drm_printf(p, "%5s %6s %16s %10s %10s %12s %14s\n", + "ctx", "handle", "vpu_addr", "size", "refcount", "dma_refcount", "type"); + + mutex_lock(&vdev->gctx.lock); + list_for_each_entry(bo, &vdev->gctx.bo_list, ctx_node) + ivpu_bo_print_info(bo, p); + mutex_unlock(&vdev->gctx.lock); + + xa_for_each(&vdev->context_xa, ctx_id, file_priv) { + file_priv = ivpu_file_priv_get_by_ctx_id(vdev, ctx_id); + if (!file_priv) + continue; + + mutex_lock(&file_priv->ctx.lock); + list_for_each_entry(bo, &file_priv->ctx.bo_list, ctx_node) + ivpu_bo_print_info(bo, p); + mutex_unlock(&file_priv->ctx.lock); + + ivpu_file_priv_put(&file_priv); + } +} + +void ivpu_bo_list_print(struct drm_device *dev) +{ + struct drm_printer p = drm_info_printer(dev->dev); + + ivpu_bo_list(dev, &p); +} diff --git a/drivers/accel/ivpu/ivpu_gem.h b/drivers/accel/ivpu/ivpu_gem.h new file mode 100644 index 000000000000..6b0ceda5f253 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_gem.h @@ -0,0 +1,127 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2023 Intel Corporation + */ +#ifndef __IVPU_GEM_H__ +#define __IVPU_GEM_H__ + +#include <drm/drm_gem.h> +#include <drm/drm_mm.h> + +struct dma_buf; +struct ivpu_bo_ops; +struct ivpu_file_priv; + +struct ivpu_bo { + struct drm_gem_object base; + const struct ivpu_bo_ops *ops; + + struct ivpu_mmu_context *ctx; + struct list_head ctx_node; + struct drm_mm_node mm_node; + + struct mutex lock; /* Protects: pages, sgt, mmu_mapped */ + struct sg_table *sgt; + struct page **pages; + bool mmu_mapped; + + void *kvaddr; + u64 vpu_addr; + u32 handle; + u32 flags; + uintptr_t user_ptr; + u32 job_status; +}; + +enum ivpu_bo_type { + IVPU_BO_TYPE_SHMEM = 1, + IVPU_BO_TYPE_INTERNAL, + IVPU_BO_TYPE_PRIME, +}; + +struct ivpu_bo_ops { + enum ivpu_bo_type type; + const char *name; + int (*alloc_pages)(struct ivpu_bo *bo); + void (*free_pages)(struct ivpu_bo *bo); + int (*map_pages)(struct ivpu_bo *bo); + void (*unmap_pages)(struct ivpu_bo *bo); +}; + +int ivpu_bo_pin(struct ivpu_bo *bo); +void ivpu_bo_remove_all_bos_from_context(struct ivpu_mmu_context *ctx); +void ivpu_bo_list(struct drm_device *dev, struct drm_printer *p); +void ivpu_bo_list_print(struct drm_device *dev); + +struct ivpu_bo * +ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 flags); +void ivpu_bo_free_internal(struct ivpu_bo *bo); +struct drm_gem_object *ivpu_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf); +void ivpu_bo_unmap_sgt_and_remove_from_context(struct ivpu_bo *bo); + +int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +int ivpu_bo_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file); + +static inline struct ivpu_bo *to_ivpu_bo(struct drm_gem_object *obj) +{ + return container_of(obj, struct ivpu_bo, base); +} + +static inline struct page *ivpu_bo_get_page(struct ivpu_bo *bo, u64 offset) +{ + if (offset > bo->base.size || !bo->pages) + return NULL; + + return bo->pages[offset / PAGE_SIZE]; +} + +static inline u32 ivpu_bo_cache_mode(struct ivpu_bo *bo) +{ + return bo->flags & DRM_IVPU_BO_CACHE_MASK; +} + +static inline bool ivpu_bo_is_snooped(struct ivpu_bo *bo) +{ + return ivpu_bo_cache_mode(bo) == DRM_IVPU_BO_CACHED; +} + +static inline pgprot_t ivpu_bo_pgprot(struct ivpu_bo *bo, pgprot_t prot) +{ + if (bo->flags & DRM_IVPU_BO_WC) + return pgprot_writecombine(prot); + + if (bo->flags & DRM_IVPU_BO_UNCACHED) + return pgprot_noncached(prot); + + return prot; +} + +static inline struct ivpu_device *ivpu_bo_to_vdev(struct ivpu_bo *bo) +{ + return to_ivpu_device(bo->base.dev); +} + +static inline void *ivpu_to_cpu_addr(struct ivpu_bo *bo, u32 vpu_addr) +{ + if (vpu_addr < bo->vpu_addr) + return NULL; + + if (vpu_addr >= (bo->vpu_addr + bo->base.size)) + return NULL; + + return bo->kvaddr + (vpu_addr - bo->vpu_addr); +} + +static inline u32 cpu_to_vpu_addr(struct ivpu_bo *bo, void *cpu_addr) +{ + if (cpu_addr < bo->kvaddr) + return 0; + + if (cpu_addr >= (bo->kvaddr + bo->base.size)) + return 0; + + return bo->vpu_addr + (cpu_addr - bo->kvaddr); +} + +#endif /* __IVPU_GEM_H__ */ diff --git a/drivers/accel/ivpu/ivpu_hw.h b/drivers/accel/ivpu/ivpu_hw.h new file mode 100644 index 000000000000..50a9304ab09c --- /dev/null +++ b/drivers/accel/ivpu/ivpu_hw.h @@ -0,0 +1,170 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#ifndef __IVPU_HW_H__ +#define __IVPU_HW_H__ + +#include "ivpu_drv.h" + +struct ivpu_hw_ops { + int (*info_init)(struct ivpu_device *vdev); + int (*power_up)(struct ivpu_device *vdev); + int (*boot_fw)(struct ivpu_device *vdev); + int (*power_down)(struct ivpu_device *vdev); + bool (*is_idle)(struct ivpu_device *vdev); + void (*wdt_disable)(struct ivpu_device *vdev); + void (*diagnose_failure)(struct ivpu_device *vdev); + u32 (*reg_pll_freq_get)(struct ivpu_device *vdev); + u32 (*reg_telemetry_offset_get)(struct ivpu_device *vdev); + u32 (*reg_telemetry_size_get)(struct ivpu_device *vdev); + u32 (*reg_telemetry_enable_get)(struct ivpu_device *vdev); + void (*reg_db_set)(struct ivpu_device *vdev, u32 db_id); + u32 (*reg_ipc_rx_addr_get)(struct ivpu_device *vdev); + u32 (*reg_ipc_rx_count_get)(struct ivpu_device *vdev); + void (*reg_ipc_tx_set)(struct ivpu_device *vdev, u32 vpu_addr); + void (*irq_clear)(struct ivpu_device *vdev); + void (*irq_enable)(struct ivpu_device *vdev); + void (*irq_disable)(struct ivpu_device *vdev); + irqreturn_t (*irq_handler)(int irq, void *ptr); +}; + +struct ivpu_addr_range { + resource_size_t start; + resource_size_t end; +}; + +struct ivpu_hw_info { + const struct ivpu_hw_ops *ops; + struct { + struct ivpu_addr_range global_low; + struct ivpu_addr_range global_high; + struct ivpu_addr_range user_low; + struct ivpu_addr_range user_high; + struct ivpu_addr_range global_aliased_pio; + } ranges; + struct { + u8 min_ratio; + u8 max_ratio; + /* + * Pll ratio for the efficiency frequency. The VPU has optimum + * performance to power ratio at this frequency. + */ + u8 pn_ratio; + u32 profiling_freq; + } pll; + u32 tile_fuse; + u32 sku; + u16 config; +}; + +extern const struct ivpu_hw_ops ivpu_hw_mtl_ops; + +static inline int ivpu_hw_info_init(struct ivpu_device *vdev) +{ + return vdev->hw->ops->info_init(vdev); +}; + +static inline int ivpu_hw_power_up(struct ivpu_device *vdev) +{ + ivpu_dbg(vdev, PM, "HW power up\n"); + + return vdev->hw->ops->power_up(vdev); +}; + +static inline int ivpu_hw_boot_fw(struct ivpu_device *vdev) +{ + return vdev->hw->ops->boot_fw(vdev); +}; + +static inline bool ivpu_hw_is_idle(struct ivpu_device *vdev) +{ + return vdev->hw->ops->is_idle(vdev); +}; + +static inline int ivpu_hw_power_down(struct ivpu_device *vdev) +{ + ivpu_dbg(vdev, PM, "HW power down\n"); + + return vdev->hw->ops->power_down(vdev); +}; + +static inline void ivpu_hw_wdt_disable(struct ivpu_device *vdev) +{ + vdev->hw->ops->wdt_disable(vdev); +}; + +/* Register indirect accesses */ +static inline u32 ivpu_hw_reg_pll_freq_get(struct ivpu_device *vdev) +{ + return vdev->hw->ops->reg_pll_freq_get(vdev); +}; + +static inline u32 ivpu_hw_reg_telemetry_offset_get(struct ivpu_device *vdev) +{ + return vdev->hw->ops->reg_telemetry_offset_get(vdev); +}; + +static inline u32 ivpu_hw_reg_telemetry_size_get(struct ivpu_device *vdev) +{ + return vdev->hw->ops->reg_telemetry_size_get(vdev); +}; + +static inline u32 ivpu_hw_reg_telemetry_enable_get(struct ivpu_device *vdev) +{ + return vdev->hw->ops->reg_telemetry_enable_get(vdev); +}; + +static inline void ivpu_hw_reg_db_set(struct ivpu_device *vdev, u32 db_id) +{ + vdev->hw->ops->reg_db_set(vdev, db_id); +}; + +static inline u32 ivpu_hw_reg_ipc_rx_addr_get(struct ivpu_device *vdev) +{ + return vdev->hw->ops->reg_ipc_rx_addr_get(vdev); +}; + +static inline u32 ivpu_hw_reg_ipc_rx_count_get(struct ivpu_device *vdev) +{ + return vdev->hw->ops->reg_ipc_rx_count_get(vdev); +}; + +static inline void ivpu_hw_reg_ipc_tx_set(struct ivpu_device *vdev, u32 vpu_addr) +{ + vdev->hw->ops->reg_ipc_tx_set(vdev, vpu_addr); +}; + +static inline void ivpu_hw_irq_clear(struct ivpu_device *vdev) +{ + vdev->hw->ops->irq_clear(vdev); +}; + +static inline void ivpu_hw_irq_enable(struct ivpu_device *vdev) +{ + vdev->hw->ops->irq_enable(vdev); +}; + +static inline void ivpu_hw_irq_disable(struct ivpu_device *vdev) +{ + vdev->hw->ops->irq_disable(vdev); +}; + +static inline void ivpu_hw_init_range(struct ivpu_addr_range *range, u64 start, u64 size) +{ + range->start = start; + range->end = start + size; +} + +static inline u64 ivpu_hw_range_size(const struct ivpu_addr_range *range) +{ + return range->end - range->start; +} + +static inline void ivpu_hw_diagnose_failure(struct ivpu_device *vdev) +{ + vdev->hw->ops->diagnose_failure(vdev); +} + +#endif /* __IVPU_HW_H__ */ diff --git a/drivers/accel/ivpu/ivpu_hw_mtl.c b/drivers/accel/ivpu/ivpu_hw_mtl.c new file mode 100644 index 000000000000..62bfaa9081c4 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_hw_mtl.c @@ -0,0 +1,1084 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#include "ivpu_drv.h" +#include "ivpu_fw.h" +#include "ivpu_hw_mtl_reg.h" +#include "ivpu_hw_reg_io.h" +#include "ivpu_hw.h" +#include "ivpu_ipc.h" +#include "ivpu_mmu.h" +#include "ivpu_pm.h" + +#define TILE_FUSE_ENABLE_BOTH 0x0 +#define TILE_FUSE_ENABLE_UPPER 0x1 +#define TILE_FUSE_ENABLE_LOWER 0x2 + +#define TILE_SKU_BOTH_MTL 0x3630 +#define TILE_SKU_LOWER_MTL 0x3631 +#define TILE_SKU_UPPER_MTL 0x3632 + +/* Work point configuration values */ +#define WP_CONFIG_1_TILE_5_3_RATIO 0x0101 +#define WP_CONFIG_1_TILE_4_3_RATIO 0x0102 +#define WP_CONFIG_2_TILE_5_3_RATIO 0x0201 +#define WP_CONFIG_2_TILE_4_3_RATIO 0x0202 +#define WP_CONFIG_0_TILE_PLL_OFF 0x0000 + +#define PLL_REF_CLK_FREQ (50 * 1000000) +#define PLL_SIMULATION_FREQ (10 * 1000000) +#define PLL_RATIO_TO_FREQ(x) ((x) * PLL_REF_CLK_FREQ) +#define PLL_DEFAULT_EPP_VALUE 0x80 + +#define TIM_SAFE_ENABLE 0xf1d0dead +#define TIM_WATCHDOG_RESET_VALUE 0xffffffff + +#define TIMEOUT_US (150 * USEC_PER_MSEC) +#define PWR_ISLAND_STATUS_TIMEOUT_US (5 * USEC_PER_MSEC) +#define PLL_TIMEOUT_US (1500 * USEC_PER_MSEC) +#define IDLE_TIMEOUT_US (500 * USEC_PER_MSEC) + +#define ICB_0_IRQ_MASK ((REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT)) | \ + (REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT)) | \ + (REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT)) | \ + (REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, MMU_IRQ_2_INT)) | \ + (REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT)) | \ + (REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT)) | \ + (REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT))) + +#define ICB_1_IRQ_MASK ((REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_2_INT)) | \ + (REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_3_INT)) | \ + (REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_4_INT))) + +#define ICB_0_1_IRQ_MASK ((((u64)ICB_1_IRQ_MASK) << 32) | ICB_0_IRQ_MASK) + +#define BUTTRESS_IRQ_MASK ((REG_FLD(MTL_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE)) | \ + (REG_FLD(MTL_BUTTRESS_INTERRUPT_STAT, ATS_ERR)) | \ + (REG_FLD(MTL_BUTTRESS_INTERRUPT_STAT, UFI_ERR))) + +#define BUTTRESS_IRQ_ENABLE_MASK ((u32)~BUTTRESS_IRQ_MASK) +#define BUTTRESS_IRQ_DISABLE_MASK ((u32)-1) + +#define ITF_FIREWALL_VIOLATION_MASK ((REG_FLD(MTL_VPU_HOST_SS_FW_SOC_IRQ_EN, CSS_ROM_CMX)) | \ + (REG_FLD(MTL_VPU_HOST_SS_FW_SOC_IRQ_EN, CSS_DBG)) | \ + (REG_FLD(MTL_VPU_HOST_SS_FW_SOC_IRQ_EN, CSS_CTRL)) | \ + (REG_FLD(MTL_VPU_HOST_SS_FW_SOC_IRQ_EN, DEC400)) | \ + (REG_FLD(MTL_VPU_HOST_SS_FW_SOC_IRQ_EN, MSS_NCE)) | \ + (REG_FLD(MTL_VPU_HOST_SS_FW_SOC_IRQ_EN, MSS_MBI)) | \ + (REG_FLD(MTL_VPU_HOST_SS_FW_SOC_IRQ_EN, MSS_MBI_CMX))) + +static char *ivpu_platform_to_str(u32 platform) +{ + switch (platform) { + case IVPU_PLATFORM_SILICON: + return "IVPU_PLATFORM_SILICON"; + case IVPU_PLATFORM_SIMICS: + return "IVPU_PLATFORM_SIMICS"; + case IVPU_PLATFORM_FPGA: + return "IVPU_PLATFORM_FPGA"; + default: + return "Invalid platform"; + } +} + +static void ivpu_hw_read_platform(struct ivpu_device *vdev) +{ + u32 gen_ctrl = REGV_RD32(MTL_VPU_HOST_SS_GEN_CTRL); + u32 platform = REG_GET_FLD(MTL_VPU_HOST_SS_GEN_CTRL, PS, gen_ctrl); + + if (platform == IVPU_PLATFORM_SIMICS || platform == IVPU_PLATFORM_FPGA) + vdev->platform = platform; + else + vdev->platform = IVPU_PLATFORM_SILICON; + + ivpu_dbg(vdev, MISC, "Platform type: %s (%d)\n", + ivpu_platform_to_str(vdev->platform), vdev->platform); +} + +static void ivpu_hw_wa_init(struct ivpu_device *vdev) +{ + vdev->wa.punit_disabled = ivpu_is_fpga(vdev); + vdev->wa.clear_runtime_mem = false; +} + +static void ivpu_hw_timeouts_init(struct ivpu_device *vdev) +{ + if (ivpu_is_simics(vdev) || ivpu_is_fpga(vdev)) { + vdev->timeout.boot = 100000; + vdev->timeout.jsm = 50000; + vdev->timeout.tdr = 2000000; + vdev->timeout.reschedule_suspend = 1000; + } else { + vdev->timeout.boot = 1000; + vdev->timeout.jsm = 500; + vdev->timeout.tdr = 2000; + vdev->timeout.reschedule_suspend = 10; + } +} + +static int ivpu_pll_wait_for_cmd_send(struct ivpu_device *vdev) +{ + return REGB_POLL_FLD(MTL_BUTTRESS_WP_REQ_CMD, SEND, 0, PLL_TIMEOUT_US); +} + +/* Send KMD initiated workpoint change */ +static int ivpu_pll_cmd_send(struct ivpu_device *vdev, u16 min_ratio, u16 max_ratio, + u16 target_ratio, u16 config) +{ + int ret; + u32 val; + + ret = ivpu_pll_wait_for_cmd_send(vdev); + if (ret) { + ivpu_err(vdev, "Failed to sync before WP request: %d\n", ret); + return ret; + } + + val = REGB_RD32(MTL_BUTTRESS_WP_REQ_PAYLOAD0); + val = REG_SET_FLD_NUM(MTL_BUTTRESS_WP_REQ_PAYLOAD0, MIN_RATIO, min_ratio, val); + val = REG_SET_FLD_NUM(MTL_BUTTRESS_WP_REQ_PAYLOAD0, MAX_RATIO, max_ratio, val); + REGB_WR32(MTL_BUTTRESS_WP_REQ_PAYLOAD0, val); + + val = REGB_RD32(MTL_BUTTRESS_WP_REQ_PAYLOAD1); + val = REG_SET_FLD_NUM(MTL_BUTTRESS_WP_REQ_PAYLOAD1, TARGET_RATIO, target_ratio, val); + val = REG_SET_FLD_NUM(MTL_BUTTRESS_WP_REQ_PAYLOAD1, EPP, PLL_DEFAULT_EPP_VALUE, val); + REGB_WR32(MTL_BUTTRESS_WP_REQ_PAYLOAD1, val); + + val = REGB_RD32(MTL_BUTTRESS_WP_REQ_PAYLOAD2); + val = REG_SET_FLD_NUM(MTL_BUTTRESS_WP_REQ_PAYLOAD2, CONFIG, config, val); + REGB_WR32(MTL_BUTTRESS_WP_REQ_PAYLOAD2, val); + + val = REGB_RD32(MTL_BUTTRESS_WP_REQ_CMD); + val = REG_SET_FLD(MTL_BUTTRESS_WP_REQ_CMD, SEND, val); + REGB_WR32(MTL_BUTTRESS_WP_REQ_CMD, val); + + ret = ivpu_pll_wait_for_cmd_send(vdev); + if (ret) + ivpu_err(vdev, "Failed to sync after WP request: %d\n", ret); + + return ret; +} + +static int ivpu_pll_wait_for_lock(struct ivpu_device *vdev, bool enable) +{ + u32 exp_val = enable ? 0x1 : 0x0; + + if (IVPU_WA(punit_disabled)) + return 0; + + return REGB_POLL_FLD(MTL_BUTTRESS_PLL_STATUS, LOCK, exp_val, PLL_TIMEOUT_US); +} + +static int ivpu_pll_wait_for_status_ready(struct ivpu_device *vdev) +{ + if (IVPU_WA(punit_disabled)) + return 0; + + return REGB_POLL_FLD(MTL_BUTTRESS_VPU_STATUS, READY, 1, PLL_TIMEOUT_US); +} + +static void ivpu_pll_init_frequency_ratios(struct ivpu_device *vdev) +{ + struct ivpu_hw_info *hw = vdev->hw; + u8 fuse_min_ratio, fuse_max_ratio, fuse_pn_ratio; + u32 fmin_fuse, fmax_fuse; + + fmin_fuse = REGB_RD32(MTL_BUTTRESS_FMIN_FUSE); + fuse_min_ratio = REG_GET_FLD(MTL_BUTTRESS_FMIN_FUSE, MIN_RATIO, fmin_fuse); + fuse_pn_ratio = REG_GET_FLD(MTL_BUTTRESS_FMIN_FUSE, PN_RATIO, fmin_fuse); + + fmax_fuse = REGB_RD32(MTL_BUTTRESS_FMAX_FUSE); + fuse_max_ratio = REG_GET_FLD(MTL_BUTTRESS_FMAX_FUSE, MAX_RATIO, fmax_fuse); + + hw->pll.min_ratio = clamp_t(u8, ivpu_pll_min_ratio, fuse_min_ratio, fuse_max_ratio); + hw->pll.max_ratio = clamp_t(u8, ivpu_pll_max_ratio, hw->pll.min_ratio, fuse_max_ratio); + hw->pll.pn_ratio = clamp_t(u8, fuse_pn_ratio, hw->pll.min_ratio, hw->pll.max_ratio); +} + +static int ivpu_pll_drive(struct ivpu_device *vdev, bool enable) +{ + struct ivpu_hw_info *hw = vdev->hw; + u16 target_ratio; + u16 config; + int ret; + + if (IVPU_WA(punit_disabled)) { + ivpu_dbg(vdev, PM, "Skipping PLL request on %s\n", + ivpu_platform_to_str(vdev->platform)); + return 0; + } + + if (enable) { + target_ratio = hw->pll.pn_ratio; + config = hw->config; + } else { + target_ratio = 0; + config = 0; + } + + ivpu_dbg(vdev, PM, "PLL workpoint request: %d Hz\n", PLL_RATIO_TO_FREQ(target_ratio)); + + ret = ivpu_pll_cmd_send(vdev, hw->pll.min_ratio, hw->pll.max_ratio, target_ratio, config); + if (ret) { + ivpu_err(vdev, "Failed to send PLL workpoint request: %d\n", ret); + return ret; + } + + ret = ivpu_pll_wait_for_lock(vdev, enable); + if (ret) { + ivpu_err(vdev, "Timed out waiting for PLL lock\n"); + return ret; + } + + if (enable) { + ret = ivpu_pll_wait_for_status_ready(vdev); + if (ret) { + ivpu_err(vdev, "Timed out waiting for PLL ready status\n"); + return ret; + } + } + + return 0; +} + +static int ivpu_pll_enable(struct ivpu_device *vdev) +{ + return ivpu_pll_drive(vdev, true); +} + +static int ivpu_pll_disable(struct ivpu_device *vdev) +{ + return ivpu_pll_drive(vdev, false); +} + +static void ivpu_boot_host_ss_rst_clr_assert(struct ivpu_device *vdev) +{ + u32 val = REGV_RD32(MTL_VPU_HOST_SS_CPR_RST_CLR); + + val = REG_SET_FLD(MTL_VPU_HOST_SS_CPR_RST_CLR, TOP_NOC, val); + val = REG_SET_FLD(MTL_VPU_HOST_SS_CPR_RST_CLR, DSS_MAS, val); + val = REG_SET_FLD(MTL_VPU_HOST_SS_CPR_RST_CLR, MSS_MAS, val); + + REGV_WR32(MTL_VPU_HOST_SS_CPR_RST_CLR, val); +} + +static void ivpu_boot_host_ss_rst_drive(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(MTL_VPU_HOST_SS_CPR_RST_SET); + + if (enable) { + val = REG_SET_FLD(MTL_VPU_HOST_SS_CPR_RST_SET, TOP_NOC, val); + val = REG_SET_FLD(MTL_VPU_HOST_SS_CPR_RST_SET, DSS_MAS, val); + val = REG_SET_FLD(MTL_VPU_HOST_SS_CPR_RST_SET, MSS_MAS, val); + } else { + val = REG_CLR_FLD(MTL_VPU_HOST_SS_CPR_RST_SET, TOP_NOC, val); + val = REG_CLR_FLD(MTL_VPU_HOST_SS_CPR_RST_SET, DSS_MAS, val); + val = REG_CLR_FLD(MTL_VPU_HOST_SS_CPR_RST_SET, MSS_MAS, val); + } + + REGV_WR32(MTL_VPU_HOST_SS_CPR_RST_SET, val); +} + +static void ivpu_boot_host_ss_clk_drive(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(MTL_VPU_HOST_SS_CPR_CLK_SET); + + if (enable) { + val = REG_SET_FLD(MTL_VPU_HOST_SS_CPR_CLK_SET, TOP_NOC, val); + val = REG_SET_FLD(MTL_VPU_HOST_SS_CPR_CLK_SET, DSS_MAS, val); + val = REG_SET_FLD(MTL_VPU_HOST_SS_CPR_CLK_SET, MSS_MAS, val); + } else { + val = REG_CLR_FLD(MTL_VPU_HOST_SS_CPR_CLK_SET, TOP_NOC, val); + val = REG_CLR_FLD(MTL_VPU_HOST_SS_CPR_CLK_SET, DSS_MAS, val); + val = REG_CLR_FLD(MTL_VPU_HOST_SS_CPR_CLK_SET, MSS_MAS, val); + } + + REGV_WR32(MTL_VPU_HOST_SS_CPR_CLK_SET, val); +} + +static int ivpu_boot_noc_qreqn_check(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(MTL_VPU_HOST_SS_NOC_QREQN); + + if (!REG_TEST_FLD_NUM(MTL_VPU_HOST_SS_NOC_QREQN, TOP_SOCMMIO, exp_val, val)) + return -EIO; + + return 0; +} + +static int ivpu_boot_noc_qacceptn_check(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(MTL_VPU_HOST_SS_NOC_QACCEPTN); + + if (!REG_TEST_FLD_NUM(MTL_VPU_HOST_SS_NOC_QACCEPTN, TOP_SOCMMIO, exp_val, val)) + return -EIO; + + return 0; +} + +static int ivpu_boot_noc_qdeny_check(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(MTL_VPU_HOST_SS_NOC_QDENY); + + if (!REG_TEST_FLD_NUM(MTL_VPU_HOST_SS_NOC_QDENY, TOP_SOCMMIO, exp_val, val)) + return -EIO; + + return 0; +} + +static int ivpu_boot_top_noc_qrenqn_check(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(MTL_VPU_TOP_NOC_QREQN); + + if (!REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QREQN, CPU_CTRL, exp_val, val) || + !REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QREQN, HOSTIF_L2CACHE, exp_val, val)) + return -EIO; + + return 0; +} + +static int ivpu_boot_top_noc_qacceptn_check(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(MTL_VPU_TOP_NOC_QACCEPTN); + + if (!REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QACCEPTN, CPU_CTRL, exp_val, val) || + !REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QACCEPTN, HOSTIF_L2CACHE, exp_val, val)) + return -EIO; + + return 0; +} + +static int ivpu_boot_top_noc_qdeny_check(struct ivpu_device *vdev, u32 exp_val) +{ + u32 val = REGV_RD32(MTL_VPU_TOP_NOC_QDENY); + + if (!REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QDENY, CPU_CTRL, exp_val, val) || + !REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QDENY, HOSTIF_L2CACHE, exp_val, val)) + return -EIO; + + return 0; +} + +static int ivpu_boot_host_ss_configure(struct ivpu_device *vdev) +{ + ivpu_boot_host_ss_rst_clr_assert(vdev); + + return ivpu_boot_noc_qreqn_check(vdev, 0x0); +} + +static void ivpu_boot_vpu_idle_gen_disable(struct ivpu_device *vdev) +{ + REGV_WR32(MTL_VPU_HOST_SS_AON_VPU_IDLE_GEN, 0x0); +} + +static int ivpu_boot_host_ss_axi_drive(struct ivpu_device *vdev, bool enable) +{ + int ret; + u32 val; + + val = REGV_RD32(MTL_VPU_HOST_SS_NOC_QREQN); + if (enable) + val = REG_SET_FLD(MTL_VPU_HOST_SS_NOC_QREQN, TOP_SOCMMIO, val); + else + val = REG_CLR_FLD(MTL_VPU_HOST_SS_NOC_QREQN, TOP_SOCMMIO, val); + REGV_WR32(MTL_VPU_HOST_SS_NOC_QREQN, val); + + ret = ivpu_boot_noc_qacceptn_check(vdev, enable ? 0x1 : 0x0); + if (ret) { + ivpu_err(vdev, "Failed qacceptn check: %d\n", ret); + return ret; + } + + ret = ivpu_boot_noc_qdeny_check(vdev, 0x0); + if (ret) + ivpu_err(vdev, "Failed qdeny check: %d\n", ret); + + return ret; +} + +static int ivpu_boot_host_ss_axi_enable(struct ivpu_device *vdev) +{ + return ivpu_boot_host_ss_axi_drive(vdev, true); +} + +static int ivpu_boot_host_ss_axi_disable(struct ivpu_device *vdev) +{ + return ivpu_boot_host_ss_axi_drive(vdev, false); +} + +static int ivpu_boot_host_ss_top_noc_drive(struct ivpu_device *vdev, bool enable) +{ + int ret; + u32 val; + + val = REGV_RD32(MTL_VPU_TOP_NOC_QREQN); + if (enable) { + val = REG_SET_FLD(MTL_VPU_TOP_NOC_QREQN, CPU_CTRL, val); + val = REG_SET_FLD(MTL_VPU_TOP_NOC_QREQN, HOSTIF_L2CACHE, val); + } else { + val = REG_CLR_FLD(MTL_VPU_TOP_NOC_QREQN, CPU_CTRL, val); + val = REG_CLR_FLD(MTL_VPU_TOP_NOC_QREQN, HOSTIF_L2CACHE, val); + } + REGV_WR32(MTL_VPU_TOP_NOC_QREQN, val); + + ret = ivpu_boot_top_noc_qacceptn_check(vdev, enable ? 0x1 : 0x0); + if (ret) { + ivpu_err(vdev, "Failed qacceptn check: %d\n", ret); + return ret; + } + + ret = ivpu_boot_top_noc_qdeny_check(vdev, 0x0); + if (ret) + ivpu_err(vdev, "Failed qdeny check: %d\n", ret); + + return ret; +} + +static int ivpu_boot_host_ss_top_noc_enable(struct ivpu_device *vdev) +{ + return ivpu_boot_host_ss_top_noc_drive(vdev, true); +} + +static int ivpu_boot_host_ss_top_noc_disable(struct ivpu_device *vdev) +{ + return ivpu_boot_host_ss_top_noc_drive(vdev, false); +} + +static void ivpu_boot_pwr_island_trickle_drive(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(MTL_VPU_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0); + + if (enable) + val = REG_SET_FLD(MTL_VPU_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, MSS_CPU, val); + else + val = REG_CLR_FLD(MTL_VPU_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, MSS_CPU, val); + + REGV_WR32(MTL_VPU_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, val); +} + +static void ivpu_boot_pwr_island_drive(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(MTL_VPU_HOST_SS_AON_PWR_ISLAND_EN0); + + if (enable) + val = REG_SET_FLD(MTL_VPU_HOST_SS_AON_PWR_ISLAND_EN0, MSS_CPU, val); + else + val = REG_CLR_FLD(MTL_VPU_HOST_SS_AON_PWR_ISLAND_EN0, MSS_CPU, val); + + REGV_WR32(MTL_VPU_HOST_SS_AON_PWR_ISLAND_EN0, val); +} + +static int ivpu_boot_wait_for_pwr_island_status(struct ivpu_device *vdev, u32 exp_val) +{ + /* FPGA model (UPF) is not power aware, skipped Power Island polling */ + if (ivpu_is_fpga(vdev)) + return 0; + + return REGV_POLL_FLD(MTL_VPU_HOST_SS_AON_PWR_ISLAND_STATUS0, MSS_CPU, + exp_val, PWR_ISLAND_STATUS_TIMEOUT_US); +} + +static void ivpu_boot_pwr_island_isolation_drive(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(MTL_VPU_HOST_SS_AON_PWR_ISO_EN0); + + if (enable) + val = REG_SET_FLD(MTL_VPU_HOST_SS_AON_PWR_ISO_EN0, MSS_CPU, val); + else + val = REG_CLR_FLD(MTL_VPU_HOST_SS_AON_PWR_ISO_EN0, MSS_CPU, val); + + REGV_WR32(MTL_VPU_HOST_SS_AON_PWR_ISO_EN0, val); +} + +static void ivpu_boot_dpu_active_drive(struct ivpu_device *vdev, bool enable) +{ + u32 val = REGV_RD32(MTL_VPU_HOST_SS_AON_DPU_ACTIVE); + + if (enable) + val = REG_SET_FLD(MTL_VPU_HOST_SS_AON_DPU_ACTIVE, DPU_ACTIVE, val); + else + val = REG_CLR_FLD(MTL_VPU_HOST_SS_AON_DPU_ACTIVE, DPU_ACTIVE, val); + + REGV_WR32(MTL_VPU_HOST_SS_AON_DPU_ACTIVE, val); +} + +static int ivpu_boot_pwr_domain_disable(struct ivpu_device *vdev) +{ + ivpu_boot_dpu_active_drive(vdev, false); + ivpu_boot_pwr_island_isolation_drive(vdev, true); + ivpu_boot_pwr_island_trickle_drive(vdev, false); + ivpu_boot_pwr_island_drive(vdev, false); + + return ivpu_boot_wait_for_pwr_island_status(vdev, 0x0); +} + +static int ivpu_boot_pwr_domain_enable(struct ivpu_device *vdev) +{ + int ret; + + ivpu_boot_pwr_island_trickle_drive(vdev, true); + ivpu_boot_pwr_island_drive(vdev, true); + + ret = ivpu_boot_wait_for_pwr_island_status(vdev, 0x1); + if (ret) { + ivpu_err(vdev, "Timed out waiting for power island status\n"); + return ret; + } + + ret = ivpu_boot_top_noc_qrenqn_check(vdev, 0x0); + if (ret) { + ivpu_err(vdev, "Failed qrenqn check %d\n", ret); + return ret; + } + + ivpu_boot_host_ss_clk_drive(vdev, true); + ivpu_boot_pwr_island_isolation_drive(vdev, false); + ivpu_boot_host_ss_rst_drive(vdev, true); + ivpu_boot_dpu_active_drive(vdev, true); + + return ret; +} + +static void ivpu_boot_no_snoop_enable(struct ivpu_device *vdev) +{ + u32 val = REGV_RD32(MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES); + + val = REG_SET_FLD(MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES, NOSNOOP_OVERRIDE_EN, val); + val = REG_SET_FLD(MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES, AW_NOSNOOP_OVERRIDE, val); + val = REG_SET_FLD(MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES, AR_NOSNOOP_OVERRIDE, val); + + REGV_WR32(MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES, val); +} + +static void ivpu_boot_tbu_mmu_enable(struct ivpu_device *vdev) +{ + u32 val = REGV_RD32(MTL_VPU_HOST_IF_TBU_MMUSSIDV); + + if (ivpu_is_fpga(vdev)) { + val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU0_AWMMUSSIDV, val); + val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU0_ARMMUSSIDV, val); + val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU2_AWMMUSSIDV, val); + val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU2_ARMMUSSIDV, val); + } else { + val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU0_AWMMUSSIDV, val); + val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU0_ARMMUSSIDV, val); + val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU1_AWMMUSSIDV, val); + val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU1_ARMMUSSIDV, val); + val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU2_AWMMUSSIDV, val); + val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU2_ARMMUSSIDV, val); + val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU3_AWMMUSSIDV, val); + val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU3_ARMMUSSIDV, val); + } + + REGV_WR32(MTL_VPU_HOST_IF_TBU_MMUSSIDV, val); +} + +static void ivpu_boot_soc_cpu_boot(struct ivpu_device *vdev) +{ + u32 val; + + val = REGV_RD32(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC); + val = REG_SET_FLD(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RSTRUN0, val); + + val = REG_CLR_FLD(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RSTVEC, val); + REGV_WR32(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val); + + val = REG_SET_FLD(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RESUME0, val); + REGV_WR32(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val); + + val = REG_CLR_FLD(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RESUME0, val); + REGV_WR32(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val); + + val = vdev->fw->entry_point >> 9; + REGV_WR32(MTL_VPU_HOST_SS_LOADING_ADDRESS_LO, val); + + val = REG_SET_FLD(MTL_VPU_HOST_SS_LOADING_ADDRESS_LO, DONE, val); + REGV_WR32(MTL_VPU_HOST_SS_LOADING_ADDRESS_LO, val); + + ivpu_dbg(vdev, PM, "Booting firmware, mode: %s\n", + vdev->fw->entry_point == vdev->fw->cold_boot_entry_point ? "cold boot" : "resume"); +} + +static int ivpu_boot_d0i3_drive(struct ivpu_device *vdev, bool enable) +{ + int ret; + u32 val; + + ret = REGB_POLL_FLD(MTL_BUTTRESS_VPU_D0I3_CONTROL, INPROGRESS, 0, TIMEOUT_US); + if (ret) { + ivpu_err(vdev, "Failed to sync before D0i3 transition: %d\n", ret); + return ret; + } + + val = REGB_RD32(MTL_BUTTRESS_VPU_D0I3_CONTROL); + if (enable) + val = REG_SET_FLD(MTL_BUTTRESS_VPU_D0I3_CONTROL, I3, val); + else + val = REG_CLR_FLD(MTL_BUTTRESS_VPU_D0I3_CONTROL, I3, val); + REGB_WR32(MTL_BUTTRESS_VPU_D0I3_CONTROL, val); + + ret = REGB_POLL_FLD(MTL_BUTTRESS_VPU_D0I3_CONTROL, INPROGRESS, 0, TIMEOUT_US); + if (ret) + ivpu_err(vdev, "Failed to sync after D0i3 transition: %d\n", ret); + + return ret; +} + +static int ivpu_hw_mtl_info_init(struct ivpu_device *vdev) +{ + struct ivpu_hw_info *hw = vdev->hw; + u32 tile_fuse; + + tile_fuse = REGB_RD32(MTL_BUTTRESS_TILE_FUSE); + if (!REG_TEST_FLD(MTL_BUTTRESS_TILE_FUSE, VALID, tile_fuse)) + ivpu_warn(vdev, "Tile Fuse: Invalid (0x%x)\n", tile_fuse); + + hw->tile_fuse = REG_GET_FLD(MTL_BUTTRESS_TILE_FUSE, SKU, tile_fuse); + switch (hw->tile_fuse) { + case TILE_FUSE_ENABLE_LOWER: + hw->sku = TILE_SKU_LOWER_MTL; + hw->config = WP_CONFIG_1_TILE_5_3_RATIO; + ivpu_dbg(vdev, MISC, "Tile Fuse: Enable Lower\n"); + break; + case TILE_FUSE_ENABLE_UPPER: + hw->sku = TILE_SKU_UPPER_MTL; + hw->config = WP_CONFIG_1_TILE_4_3_RATIO; + ivpu_dbg(vdev, MISC, "Tile Fuse: Enable Upper\n"); + break; + case TILE_FUSE_ENABLE_BOTH: + hw->sku = TILE_SKU_BOTH_MTL; + hw->config = WP_CONFIG_2_TILE_5_3_RATIO; + ivpu_dbg(vdev, MISC, "Tile Fuse: Enable Both\n"); + break; + default: + hw->config = WP_CONFIG_0_TILE_PLL_OFF; + ivpu_dbg(vdev, MISC, "Tile Fuse: Disable\n"); + break; + } + + ivpu_pll_init_frequency_ratios(vdev); + + ivpu_hw_init_range(&hw->ranges.global_low, 0x80000000, SZ_512M); + ivpu_hw_init_range(&hw->ranges.global_high, 0x180000000, SZ_2M); + ivpu_hw_init_range(&hw->ranges.user_low, 0xc0000000, 255 * SZ_1M); + ivpu_hw_init_range(&hw->ranges.user_high, 0x180000000, SZ_2G); + hw->ranges.global_aliased_pio = hw->ranges.user_low; + + return 0; +} + +static int ivpu_hw_mtl_reset(struct ivpu_device *vdev) +{ + int ret; + u32 val; + + if (IVPU_WA(punit_disabled)) + return 0; + + ret = REGB_POLL_FLD(MTL_BUTTRESS_VPU_IP_RESET, TRIGGER, 0, TIMEOUT_US); + if (ret) { + ivpu_err(vdev, "Timed out waiting for TRIGGER bit\n"); + return ret; + } + + val = REGB_RD32(MTL_BUTTRESS_VPU_IP_RESET); + val = REG_SET_FLD(MTL_BUTTRESS_VPU_IP_RESET, TRIGGER, val); + REGB_WR32(MTL_BUTTRESS_VPU_IP_RESET, val); + + ret = REGB_POLL_FLD(MTL_BUTTRESS_VPU_IP_RESET, TRIGGER, 0, TIMEOUT_US); + if (ret) + ivpu_err(vdev, "Timed out waiting for RESET completion\n"); + + return ret; +} + +static int ivpu_hw_mtl_d0i3_enable(struct ivpu_device *vdev) +{ + int ret; + + ret = ivpu_boot_d0i3_drive(vdev, true); + if (ret) + ivpu_err(vdev, "Failed to enable D0i3: %d\n", ret); + + udelay(5); /* VPU requires 5 us to complete the transition */ + + return ret; +} + +static int ivpu_hw_mtl_d0i3_disable(struct ivpu_device *vdev) +{ + int ret; + + ret = ivpu_boot_d0i3_drive(vdev, false); + if (ret) + ivpu_err(vdev, "Failed to disable D0i3: %d\n", ret); + + return ret; +} + +static int ivpu_hw_mtl_power_up(struct ivpu_device *vdev) +{ + int ret; + + ivpu_hw_read_platform(vdev); + ivpu_hw_wa_init(vdev); + ivpu_hw_timeouts_init(vdev); + + ret = ivpu_hw_mtl_reset(vdev); + if (ret) + ivpu_warn(vdev, "Failed to reset HW: %d\n", ret); + + ret = ivpu_hw_mtl_d0i3_disable(vdev); + if (ret) + ivpu_warn(vdev, "Failed to disable D0I3: %d\n", ret); + + ret = ivpu_pll_enable(vdev); + if (ret) { + ivpu_err(vdev, "Failed to enable PLL: %d\n", ret); + return ret; + } + + ret = ivpu_boot_host_ss_configure(vdev); + if (ret) { + ivpu_err(vdev, "Failed to configure host SS: %d\n", ret); + return ret; + } + + /* + * The control circuitry for vpu_idle indication logic powers up active. + * To ensure unnecessary low power mode signal from LRT during bring up, + * KMD disables the circuitry prior to bringing up the Main Power island. + */ + ivpu_boot_vpu_idle_gen_disable(vdev); + + ret = ivpu_boot_pwr_domain_enable(vdev); + if (ret) { + ivpu_err(vdev, "Failed to enable power domain: %d\n", ret); + return ret; + } + + ret = ivpu_boot_host_ss_axi_enable(vdev); + if (ret) { + ivpu_err(vdev, "Failed to enable AXI: %d\n", ret); + return ret; + } + + ret = ivpu_boot_host_ss_top_noc_enable(vdev); + if (ret) + ivpu_err(vdev, "Failed to enable TOP NOC: %d\n", ret); + + return ret; +} + +static int ivpu_hw_mtl_boot_fw(struct ivpu_device *vdev) +{ + ivpu_boot_no_snoop_enable(vdev); + ivpu_boot_tbu_mmu_enable(vdev); + ivpu_boot_soc_cpu_boot(vdev); + + return 0; +} + +static bool ivpu_hw_mtl_is_idle(struct ivpu_device *vdev) +{ + u32 val; + + if (IVPU_WA(punit_disabled)) + return true; + + val = REGB_RD32(MTL_BUTTRESS_VPU_STATUS); + return REG_TEST_FLD(MTL_BUTTRESS_VPU_STATUS, READY, val) && + REG_TEST_FLD(MTL_BUTTRESS_VPU_STATUS, IDLE, val); +} + +static int ivpu_hw_mtl_power_down(struct ivpu_device *vdev) +{ + int ret = 0; + + /* FPGA requires manual clearing of IP_Reset bit by enabling quiescent state */ + if (ivpu_is_fpga(vdev)) { + if (ivpu_boot_host_ss_top_noc_disable(vdev)) { + ivpu_err(vdev, "Failed to disable TOP NOC\n"); + ret = -EIO; + } + + if (ivpu_boot_host_ss_axi_disable(vdev)) { + ivpu_err(vdev, "Failed to disable AXI\n"); + ret = -EIO; + } + } + + if (ivpu_boot_pwr_domain_disable(vdev)) { + ivpu_err(vdev, "Failed to disable power domain\n"); + ret = -EIO; + } + + if (ivpu_pll_disable(vdev)) { + ivpu_err(vdev, "Failed to disable PLL\n"); + ret = -EIO; + } + + if (ivpu_hw_mtl_d0i3_enable(vdev)) + ivpu_warn(vdev, "Failed to enable D0I3\n"); + + return ret; +} + +static void ivpu_hw_mtl_wdt_disable(struct ivpu_device *vdev) +{ + u32 val; + + /* Enable writing and set non-zero WDT value */ + REGV_WR32(MTL_VPU_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE); + REGV_WR32(MTL_VPU_CPU_SS_TIM_WATCHDOG, TIM_WATCHDOG_RESET_VALUE); + + /* Enable writing and disable watchdog timer */ + REGV_WR32(MTL_VPU_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE); + REGV_WR32(MTL_VPU_CPU_SS_TIM_WDOG_EN, 0); + + /* Now clear the timeout interrupt */ + val = REGV_RD32(MTL_VPU_CPU_SS_TIM_GEN_CONFIG); + val = REG_CLR_FLD(MTL_VPU_CPU_SS_TIM_GEN_CONFIG, WDOG_TO_INT_CLR, val); + REGV_WR32(MTL_VPU_CPU_SS_TIM_GEN_CONFIG, val); +} + +/* Register indirect accesses */ +static u32 ivpu_hw_mtl_reg_pll_freq_get(struct ivpu_device *vdev) +{ + u32 pll_curr_ratio; + + pll_curr_ratio = REGB_RD32(MTL_BUTTRESS_CURRENT_PLL); + pll_curr_ratio &= MTL_BUTTRESS_CURRENT_PLL_RATIO_MASK; + + if (!ivpu_is_silicon(vdev)) + return PLL_SIMULATION_FREQ; + + return PLL_RATIO_TO_FREQ(pll_curr_ratio); +} + +static u32 ivpu_hw_mtl_reg_telemetry_offset_get(struct ivpu_device *vdev) +{ + return REGB_RD32(MTL_BUTTRESS_VPU_TELEMETRY_OFFSET); +} + +static u32 ivpu_hw_mtl_reg_telemetry_size_get(struct ivpu_device *vdev) +{ + return REGB_RD32(MTL_BUTTRESS_VPU_TELEMETRY_SIZE); +} + +static u32 ivpu_hw_mtl_reg_telemetry_enable_get(struct ivpu_device *vdev) +{ + return REGB_RD32(MTL_BUTTRESS_VPU_TELEMETRY_ENABLE); +} + +static void ivpu_hw_mtl_reg_db_set(struct ivpu_device *vdev, u32 db_id) +{ + u32 reg_stride = MTL_VPU_CPU_SS_DOORBELL_1 - MTL_VPU_CPU_SS_DOORBELL_0; + u32 val = REG_FLD(MTL_VPU_CPU_SS_DOORBELL_0, SET); + + REGV_WR32I(MTL_VPU_CPU_SS_DOORBELL_0, reg_stride, db_id, val); +} + +static u32 ivpu_hw_mtl_reg_ipc_rx_addr_get(struct ivpu_device *vdev) +{ + return REGV_RD32(MTL_VPU_HOST_SS_TIM_IPC_FIFO_ATM); +} + +static u32 ivpu_hw_mtl_reg_ipc_rx_count_get(struct ivpu_device *vdev) +{ + u32 count = REGV_RD32_SILENT(MTL_VPU_HOST_SS_TIM_IPC_FIFO_STAT); + + return REG_GET_FLD(MTL_VPU_HOST_SS_TIM_IPC_FIFO_STAT, FILL_LEVEL, count); +} + +static void ivpu_hw_mtl_reg_ipc_tx_set(struct ivpu_device *vdev, u32 vpu_addr) +{ + REGV_WR32(MTL_VPU_CPU_SS_TIM_IPC_FIFO, vpu_addr); +} + +static void ivpu_hw_mtl_irq_clear(struct ivpu_device *vdev) +{ + REGV_WR64(MTL_VPU_HOST_SS_ICB_CLEAR_0, ICB_0_1_IRQ_MASK); +} + +static void ivpu_hw_mtl_irq_enable(struct ivpu_device *vdev) +{ + REGV_WR32(MTL_VPU_HOST_SS_FW_SOC_IRQ_EN, ITF_FIREWALL_VIOLATION_MASK); + REGV_WR64(MTL_VPU_HOST_SS_ICB_ENABLE_0, ICB_0_1_IRQ_MASK); + REGB_WR32(MTL_BUTTRESS_LOCAL_INT_MASK, BUTTRESS_IRQ_ENABLE_MASK); + REGB_WR32(MTL_BUTTRESS_GLOBAL_INT_MASK, 0x0); +} + +static void ivpu_hw_mtl_irq_disable(struct ivpu_device *vdev) +{ + REGB_WR32(MTL_BUTTRESS_GLOBAL_INT_MASK, 0x1); + REGB_WR32(MTL_BUTTRESS_LOCAL_INT_MASK, BUTTRESS_IRQ_DISABLE_MASK); + REGV_WR64(MTL_VPU_HOST_SS_ICB_ENABLE_0, 0x0ull); + REGB_WR32(MTL_VPU_HOST_SS_FW_SOC_IRQ_EN, 0x0); +} + +static void ivpu_hw_mtl_irq_wdt_nce_handler(struct ivpu_device *vdev) +{ + ivpu_err_ratelimited(vdev, "WDT NCE irq\n"); + + ivpu_pm_schedule_recovery(vdev); +} + +static void ivpu_hw_mtl_irq_wdt_mss_handler(struct ivpu_device *vdev) +{ + ivpu_err_ratelimited(vdev, "WDT MSS irq\n"); + + ivpu_hw_wdt_disable(vdev); + ivpu_pm_schedule_recovery(vdev); +} + +static void ivpu_hw_mtl_irq_noc_firewall_handler(struct ivpu_device *vdev) +{ + ivpu_err_ratelimited(vdev, "NOC Firewall irq\n"); + + ivpu_pm_schedule_recovery(vdev); +} + +/* Handler for IRQs from VPU core (irqV) */ +static u32 ivpu_hw_mtl_irqv_handler(struct ivpu_device *vdev, int irq) +{ + u32 status = REGV_RD32(MTL_VPU_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK; + + REGV_WR32(MTL_VPU_HOST_SS_ICB_CLEAR_0, status); + + if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT, status)) + ivpu_mmu_irq_evtq_handler(vdev); + + if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT, status)) + ivpu_ipc_irq_handler(vdev); + + if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT, status)) + ivpu_dbg(vdev, IRQ, "MMU sync complete\n"); + + if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, MMU_IRQ_2_INT, status)) + ivpu_mmu_irq_gerr_handler(vdev); + + if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT, status)) + ivpu_hw_mtl_irq_wdt_mss_handler(vdev); + + if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT, status)) + ivpu_hw_mtl_irq_wdt_nce_handler(vdev); + + if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, status)) + ivpu_hw_mtl_irq_noc_firewall_handler(vdev); + + return status; +} + +/* Handler for IRQs from Buttress core (irqB) */ +static u32 ivpu_hw_mtl_irqb_handler(struct ivpu_device *vdev, int irq) +{ + u32 status = REGB_RD32(MTL_BUTTRESS_INTERRUPT_STAT) & BUTTRESS_IRQ_MASK; + bool schedule_recovery = false; + + if (status == 0) + return 0; + + /* Disable global interrupt before handling local buttress interrupts */ + REGB_WR32(MTL_BUTTRESS_GLOBAL_INT_MASK, 0x1); + + if (REG_TEST_FLD(MTL_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE, status)) + ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq: %08x", REGB_RD32(MTL_BUTTRESS_CURRENT_PLL)); + + if (REG_TEST_FLD(MTL_BUTTRESS_INTERRUPT_STAT, ATS_ERR, status)) { + ivpu_err(vdev, "ATS_ERR irq 0x%016llx", REGB_RD64(MTL_BUTTRESS_ATS_ERR_LOG_0)); + REGB_WR32(MTL_BUTTRESS_ATS_ERR_CLEAR, 0x1); + schedule_recovery = true; + } + + if (REG_TEST_FLD(MTL_BUTTRESS_INTERRUPT_STAT, UFI_ERR, status)) { + u32 ufi_log = REGB_RD32(MTL_BUTTRESS_UFI_ERR_LOG); + + ivpu_err(vdev, "UFI_ERR irq (0x%08x) opcode: 0x%02lx axi_id: 0x%02lx cq_id: 0x%03lx", + ufi_log, REG_GET_FLD(MTL_BUTTRESS_UFI_ERR_LOG, OPCODE, ufi_log), + REG_GET_FLD(MTL_BUTTRESS_UFI_ERR_LOG, AXI_ID, ufi_log), + REG_GET_FLD(MTL_BUTTRESS_UFI_ERR_LOG, CQ_ID, ufi_log)); + REGB_WR32(MTL_BUTTRESS_UFI_ERR_CLEAR, 0x1); + schedule_recovery = true; + } + + /* + * Clear local interrupt status by writing 0 to all bits. + * This must be done after interrupts are cleared at the source. + * Writing 1 triggers an interrupt, so we can't perform read update write. + */ + REGB_WR32(MTL_BUTTRESS_INTERRUPT_STAT, 0x0); + + /* Re-enable global interrupt */ + REGB_WR32(MTL_BUTTRESS_GLOBAL_INT_MASK, 0x0); + + if (schedule_recovery) + ivpu_pm_schedule_recovery(vdev); + + return status; +} + +static irqreturn_t ivpu_hw_mtl_irq_handler(int irq, void *ptr) +{ + struct ivpu_device *vdev = ptr; + u32 ret_irqv, ret_irqb; + + ret_irqv = ivpu_hw_mtl_irqv_handler(vdev, irq); + ret_irqb = ivpu_hw_mtl_irqb_handler(vdev, irq); + + return IRQ_RETVAL(ret_irqb | ret_irqv); +} + +static void ivpu_hw_mtl_diagnose_failure(struct ivpu_device *vdev) +{ + u32 irqv = REGV_RD32(MTL_VPU_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK; + u32 irqb = REGB_RD32(MTL_BUTTRESS_INTERRUPT_STAT) & BUTTRESS_IRQ_MASK; + + if (ivpu_hw_mtl_reg_ipc_rx_count_get(vdev)) + ivpu_err(vdev, "IPC FIFO queue not empty, missed IPC IRQ"); + + if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT, irqv)) + ivpu_err(vdev, "WDT MSS timeout detected\n"); + + if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT, irqv)) + ivpu_err(vdev, "WDT NCE timeout detected\n"); + + if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, irqv)) + ivpu_err(vdev, "NOC Firewall irq detected\n"); + + if (REG_TEST_FLD(MTL_BUTTRESS_INTERRUPT_STAT, ATS_ERR, irqb)) + ivpu_err(vdev, "ATS_ERR irq 0x%016llx", REGB_RD64(MTL_BUTTRESS_ATS_ERR_LOG_0)); + + if (REG_TEST_FLD(MTL_BUTTRESS_INTERRUPT_STAT, UFI_ERR, irqb)) { + u32 ufi_log = REGB_RD32(MTL_BUTTRESS_UFI_ERR_LOG); + + ivpu_err(vdev, "UFI_ERR irq (0x%08x) opcode: 0x%02lx axi_id: 0x%02lx cq_id: 0x%03lx", + ufi_log, REG_GET_FLD(MTL_BUTTRESS_UFI_ERR_LOG, OPCODE, ufi_log), + REG_GET_FLD(MTL_BUTTRESS_UFI_ERR_LOG, AXI_ID, ufi_log), + REG_GET_FLD(MTL_BUTTRESS_UFI_ERR_LOG, CQ_ID, ufi_log)); + } +} + +const struct ivpu_hw_ops ivpu_hw_mtl_ops = { + .info_init = ivpu_hw_mtl_info_init, + .power_up = ivpu_hw_mtl_power_up, + .is_idle = ivpu_hw_mtl_is_idle, + .power_down = ivpu_hw_mtl_power_down, + .boot_fw = ivpu_hw_mtl_boot_fw, + .wdt_disable = ivpu_hw_mtl_wdt_disable, + .diagnose_failure = ivpu_hw_mtl_diagnose_failure, + .reg_pll_freq_get = ivpu_hw_mtl_reg_pll_freq_get, + .reg_telemetry_offset_get = ivpu_hw_mtl_reg_telemetry_offset_get, + .reg_telemetry_size_get = ivpu_hw_mtl_reg_telemetry_size_get, + .reg_telemetry_enable_get = ivpu_hw_mtl_reg_telemetry_enable_get, + .reg_db_set = ivpu_hw_mtl_reg_db_set, + .reg_ipc_rx_addr_get = ivpu_hw_mtl_reg_ipc_rx_addr_get, + .reg_ipc_rx_count_get = ivpu_hw_mtl_reg_ipc_rx_count_get, + .reg_ipc_tx_set = ivpu_hw_mtl_reg_ipc_tx_set, + .irq_clear = ivpu_hw_mtl_irq_clear, + .irq_enable = ivpu_hw_mtl_irq_enable, + .irq_disable = ivpu_hw_mtl_irq_disable, + .irq_handler = ivpu_hw_mtl_irq_handler, +}; diff --git a/drivers/accel/ivpu/ivpu_hw_mtl_reg.h b/drivers/accel/ivpu/ivpu_hw_mtl_reg.h new file mode 100644 index 000000000000..d83ccfd9a871 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_hw_mtl_reg.h @@ -0,0 +1,280 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#ifndef __IVPU_HW_MTL_REG_H__ +#define __IVPU_HW_MTL_REG_H__ + +#include <linux/bits.h> + +#define MTL_BUTTRESS_INTERRUPT_TYPE 0x00000000u + +#define MTL_BUTTRESS_INTERRUPT_STAT 0x00000004u +#define MTL_BUTTRESS_INTERRUPT_STAT_FREQ_CHANGE_MASK BIT_MASK(0) +#define MTL_BUTTRESS_INTERRUPT_STAT_ATS_ERR_MASK BIT_MASK(1) +#define MTL_BUTTRESS_INTERRUPT_STAT_UFI_ERR_MASK BIT_MASK(2) + +#define MTL_BUTTRESS_WP_REQ_PAYLOAD0 0x00000008u +#define MTL_BUTTRESS_WP_REQ_PAYLOAD0_MIN_RATIO_MASK GENMASK(15, 0) +#define MTL_BUTTRESS_WP_REQ_PAYLOAD0_MAX_RATIO_MASK GENMASK(31, 16) + +#define MTL_BUTTRESS_WP_REQ_PAYLOAD1 0x0000000cu +#define MTL_BUTTRESS_WP_REQ_PAYLOAD1_TARGET_RATIO_MASK GENMASK(15, 0) +#define MTL_BUTTRESS_WP_REQ_PAYLOAD1_EPP_MASK GENMASK(31, 16) + +#define MTL_BUTTRESS_WP_REQ_PAYLOAD2 0x00000010u +#define MTL_BUTTRESS_WP_REQ_PAYLOAD2_CONFIG_MASK GENMASK(15, 0) + +#define MTL_BUTTRESS_WP_REQ_CMD 0x00000014u +#define MTL_BUTTRESS_WP_REQ_CMD_SEND_MASK BIT_MASK(0) + +#define MTL_BUTTRESS_WP_DOWNLOAD 0x00000018u +#define MTL_BUTTRESS_WP_DOWNLOAD_TARGET_RATIO_MASK GENMASK(15, 0) + +#define MTL_BUTTRESS_CURRENT_PLL 0x0000001cu +#define MTL_BUTTRESS_CURRENT_PLL_RATIO_MASK GENMASK(15, 0) + +#define MTL_BUTTRESS_PLL_ENABLE 0x00000020u + +#define MTL_BUTTRESS_FMIN_FUSE 0x00000024u +#define MTL_BUTTRESS_FMIN_FUSE_MIN_RATIO_MASK GENMASK(7, 0) +#define MTL_BUTTRESS_FMIN_FUSE_PN_RATIO_MASK GENMASK(15, 8) + +#define MTL_BUTTRESS_FMAX_FUSE 0x00000028u +#define MTL_BUTTRESS_FMAX_FUSE_MAX_RATIO_MASK GENMASK(7, 0) + +#define MTL_BUTTRESS_TILE_FUSE 0x0000002cu +#define MTL_BUTTRESS_TILE_FUSE_VALID_MASK BIT_MASK(0) +#define MTL_BUTTRESS_TILE_FUSE_SKU_MASK GENMASK(3, 2) + +#define MTL_BUTTRESS_LOCAL_INT_MASK 0x00000030u +#define MTL_BUTTRESS_GLOBAL_INT_MASK 0x00000034u + +#define MTL_BUTTRESS_PLL_STATUS 0x00000040u +#define MTL_BUTTRESS_PLL_STATUS_LOCK_MASK BIT_MASK(1) + +#define MTL_BUTTRESS_VPU_STATUS 0x00000044u +#define MTL_BUTTRESS_VPU_STATUS_READY_MASK BIT_MASK(0) +#define MTL_BUTTRESS_VPU_STATUS_IDLE_MASK BIT_MASK(1) + +#define MTL_BUTTRESS_VPU_D0I3_CONTROL 0x00000060u +#define MTL_BUTTRESS_VPU_D0I3_CONTROL_INPROGRESS_MASK BIT_MASK(0) +#define MTL_BUTTRESS_VPU_D0I3_CONTROL_I3_MASK BIT_MASK(2) + +#define MTL_BUTTRESS_VPU_IP_RESET 0x00000050u +#define MTL_BUTTRESS_VPU_IP_RESET_TRIGGER_MASK BIT_MASK(0) + +#define MTL_BUTTRESS_VPU_TELEMETRY_OFFSET 0x00000080u +#define MTL_BUTTRESS_VPU_TELEMETRY_SIZE 0x00000084u +#define MTL_BUTTRESS_VPU_TELEMETRY_ENABLE 0x00000088u + +#define MTL_BUTTRESS_ATS_ERR_LOG_0 0x000000a0u +#define MTL_BUTTRESS_ATS_ERR_LOG_1 0x000000a4u +#define MTL_BUTTRESS_ATS_ERR_CLEAR 0x000000a8u + +#define MTL_BUTTRESS_UFI_ERR_LOG 0x000000b0u +#define MTL_BUTTRESS_UFI_ERR_LOG_CQ_ID_MASK GENMASK(11, 0) +#define MTL_BUTTRESS_UFI_ERR_LOG_AXI_ID_MASK GENMASK(19, 12) +#define MTL_BUTTRESS_UFI_ERR_LOG_OPCODE_MASK GENMASK(24, 20) + +#define MTL_BUTTRESS_UFI_ERR_CLEAR 0x000000b4u + +#define MTL_VPU_HOST_SS_CPR_CLK_SET 0x00000084u +#define MTL_VPU_HOST_SS_CPR_CLK_SET_TOP_NOC_MASK BIT_MASK(1) +#define MTL_VPU_HOST_SS_CPR_CLK_SET_DSS_MAS_MASK BIT_MASK(10) +#define MTL_VPU_HOST_SS_CPR_CLK_SET_MSS_MAS_MASK BIT_MASK(11) + +#define MTL_VPU_HOST_SS_CPR_RST_SET 0x00000094u +#define MTL_VPU_HOST_SS_CPR_RST_SET_TOP_NOC_MASK BIT_MASK(1) +#define MTL_VPU_HOST_SS_CPR_RST_SET_DSS_MAS_MASK BIT_MASK(10) +#define MTL_VPU_HOST_SS_CPR_RST_SET_MSS_MAS_MASK BIT_MASK(11) + +#define MTL_VPU_HOST_SS_CPR_RST_CLR 0x00000098u +#define MTL_VPU_HOST_SS_CPR_RST_CLR_TOP_NOC_MASK BIT_MASK(1) +#define MTL_VPU_HOST_SS_CPR_RST_CLR_DSS_MAS_MASK BIT_MASK(10) +#define MTL_VPU_HOST_SS_CPR_RST_CLR_MSS_MAS_MASK BIT_MASK(11) + +#define MTL_VPU_HOST_SS_HW_VERSION 0x00000108u +#define MTL_VPU_HOST_SS_HW_VERSION_SOC_REVISION_MASK GENMASK(7, 0) +#define MTL_VPU_HOST_SS_HW_VERSION_SOC_NUMBER_MASK GENMASK(15, 8) +#define MTL_VPU_HOST_SS_HW_VERSION_VPU_GENERATION_MASK GENMASK(23, 16) + +#define MTL_VPU_HOST_SS_GEN_CTRL 0x00000118u +#define MTL_VPU_HOST_SS_GEN_CTRL_PS_MASK GENMASK(31, 29) + +#define MTL_VPU_HOST_SS_NOC_QREQN 0x00000154u +#define MTL_VPU_HOST_SS_NOC_QREQN_TOP_SOCMMIO_MASK BIT_MASK(0) + +#define MTL_VPU_HOST_SS_NOC_QACCEPTN 0x00000158u +#define MTL_VPU_HOST_SS_NOC_QACCEPTN_TOP_SOCMMIO_MASK BIT_MASK(0) + +#define MTL_VPU_HOST_SS_NOC_QDENY 0x0000015cu +#define MTL_VPU_HOST_SS_NOC_QDENY_TOP_SOCMMIO_MASK BIT_MASK(0) + +#define MTL_VPU_TOP_NOC_QREQN 0x00000160u +#define MTL_VPU_TOP_NOC_QREQN_CPU_CTRL_MASK BIT_MASK(0) +#define MTL_VPU_TOP_NOC_QREQN_HOSTIF_L2CACHE_MASK BIT_MASK(1) + +#define MTL_VPU_TOP_NOC_QACCEPTN 0x00000164u +#define MTL_VPU_TOP_NOC_QACCEPTN_CPU_CTRL_MASK BIT_MASK(0) +#define MTL_VPU_TOP_NOC_QACCEPTN_HOSTIF_L2CACHE_MASK BIT_MASK(1) + +#define MTL_VPU_TOP_NOC_QDENY 0x00000168u +#define MTL_VPU_TOP_NOC_QDENY_CPU_CTRL_MASK BIT_MASK(0) +#define MTL_VPU_TOP_NOC_QDENY_HOSTIF_L2CACHE_MASK BIT_MASK(1) + +#define MTL_VPU_HOST_SS_FW_SOC_IRQ_EN 0x00000170u +#define MTL_VPU_HOST_SS_FW_SOC_IRQ_EN_CSS_ROM_CMX_MASK BIT_MASK(0) +#define MTL_VPU_HOST_SS_FW_SOC_IRQ_EN_CSS_DBG_MASK BIT_MASK(1) +#define MTL_VPU_HOST_SS_FW_SOC_IRQ_EN_CSS_CTRL_MASK BIT_MASK(2) +#define MTL_VPU_HOST_SS_FW_SOC_IRQ_EN_DEC400_MASK BIT_MASK(3) +#define MTL_VPU_HOST_SS_FW_SOC_IRQ_EN_MSS_NCE_MASK BIT_MASK(4) +#define MTL_VPU_HOST_SS_FW_SOC_IRQ_EN_MSS_MBI_MASK BIT_MASK(5) +#define MTL_VPU_HOST_SS_FW_SOC_IRQ_EN_MSS_MBI_CMX_MASK BIT_MASK(6) + +#define MTL_VPU_HOST_SS_ICB_STATUS_0 0x00010210u +#define MTL_VPU_HOST_SS_ICB_STATUS_0_TIMER_0_INT_MASK BIT_MASK(0) +#define MTL_VPU_HOST_SS_ICB_STATUS_0_TIMER_1_INT_MASK BIT_MASK(1) +#define MTL_VPU_HOST_SS_ICB_STATUS_0_TIMER_2_INT_MASK BIT_MASK(2) +#define MTL_VPU_HOST_SS_ICB_STATUS_0_TIMER_3_INT_MASK BIT_MASK(3) +#define MTL_VPU_HOST_SS_ICB_STATUS_0_HOST_IPC_FIFO_INT_MASK BIT_MASK(4) +#define MTL_VPU_HOST_SS_ICB_STATUS_0_MMU_IRQ_0_INT_MASK BIT_MASK(5) +#define MTL_VPU_HOST_SS_ICB_STATUS_0_MMU_IRQ_1_INT_MASK BIT_MASK(6) +#define MTL_VPU_HOST_SS_ICB_STATUS_0_MMU_IRQ_2_INT_MASK BIT_MASK(7) +#define MTL_VPU_HOST_SS_ICB_STATUS_0_NOC_FIREWALL_INT_MASK BIT_MASK(8) +#define MTL_VPU_HOST_SS_ICB_STATUS_0_CPU_INT_REDIRECT_0_INT_MASK BIT_MASK(30) +#define MTL_VPU_HOST_SS_ICB_STATUS_0_CPU_INT_REDIRECT_1_INT_MASK BIT_MASK(31) + +#define MTL_VPU_HOST_SS_ICB_STATUS_1 0x00010214u +#define MTL_VPU_HOST_SS_ICB_STATUS_1_CPU_INT_REDIRECT_2_INT_MASK BIT_MASK(0) +#define MTL_VPU_HOST_SS_ICB_STATUS_1_CPU_INT_REDIRECT_3_INT_MASK BIT_MASK(1) +#define MTL_VPU_HOST_SS_ICB_STATUS_1_CPU_INT_REDIRECT_4_INT_MASK BIT_MASK(2) + +#define MTL_VPU_HOST_SS_ICB_CLEAR_0 0x00010220u +#define MTL_VPU_HOST_SS_ICB_CLEAR_1 0x00010224u +#define MTL_VPU_HOST_SS_ICB_ENABLE_0 0x00010240u + +#define MTL_VPU_HOST_SS_TIM_IPC_FIFO_ATM 0x000200f4u + +#define MTL_VPU_HOST_SS_TIM_IPC_FIFO_STAT 0x000200fcu +#define MTL_VPU_HOST_SS_TIM_IPC_FIFO_STAT_READ_POINTER_MASK GENMASK(7, 0) +#define MTL_VPU_HOST_SS_TIM_IPC_FIFO_STAT_WRITE_POINTER_MASK GENMASK(15, 8) +#define MTL_VPU_HOST_SS_TIM_IPC_FIFO_STAT_FILL_LEVEL_MASK GENMASK(23, 16) +#define MTL_VPU_HOST_SS_TIM_IPC_FIFO_STAT_RSVD0_MASK GENMASK(31, 24) + +#define MTL_VPU_HOST_SS_AON_PWR_ISO_EN0 0x00030020u +#define MTL_VPU_HOST_SS_AON_PWR_ISO_EN0_MSS_CPU_MASK BIT_MASK(3) + +#define MTL_VPU_HOST_SS_AON_PWR_ISLAND_EN0 0x00030024u +#define MTL_VPU_HOST_SS_AON_PWR_ISLAND_EN0_MSS_CPU_MASK BIT_MASK(3) + +#define MTL_VPU_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0 0x00030028u +#define MTL_VPU_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0_MSS_CPU_MASK BIT_MASK(3) + +#define MTL_VPU_HOST_SS_AON_PWR_ISLAND_STATUS0 0x0003002cu +#define MTL_VPU_HOST_SS_AON_PWR_ISLAND_STATUS0_MSS_CPU_MASK BIT_MASK(3) + +#define MTL_VPU_HOST_SS_AON_VPU_IDLE_GEN 0x00030200u +#define MTL_VPU_HOST_SS_AON_VPU_IDLE_GEN_EN_MASK BIT_MASK(0) + +#define MTL_VPU_HOST_SS_AON_DPU_ACTIVE 0x00030204u +#define MTL_VPU_HOST_SS_AON_DPU_ACTIVE_DPU_ACTIVE_MASK BIT_MASK(0) + +#define MTL_VPU_HOST_SS_LOADING_ADDRESS_LO 0x00041040u +#define MTL_VPU_HOST_SS_LOADING_ADDRESS_LO_DONE_MASK BIT_MASK(0) +#define MTL_VPU_HOST_SS_LOADING_ADDRESS_LO_IOSF_RS_ID_MASK GENMASK(2, 1) +#define MTL_VPU_HOST_SS_LOADING_ADDRESS_LO_IMAGE_LOCATION_MASK GENMASK(31, 3) + +#define MTL_VPU_HOST_SS_WORKPOINT_CONFIG_MIRROR 0x00082020u +#define MTL_VPU_HOST_SS_WORKPOINT_CONFIG_MIRROR_FINAL_PLL_FREQ_MASK GENMASK(15, 0) +#define MTL_VPU_HOST_SS_WORKPOINT_CONFIG_MIRROR_CONFIG_ID_MASK GENMASK(31, 16) + +#define MTL_VPU_HOST_MMU_IDR0 0x00200000u +#define MTL_VPU_HOST_MMU_IDR1 0x00200004u +#define MTL_VPU_HOST_MMU_IDR3 0x0020000cu +#define MTL_VPU_HOST_MMU_IDR5 0x00200014u +#define MTL_VPU_HOST_MMU_CR0 0x00200020u +#define MTL_VPU_HOST_MMU_CR0ACK 0x00200024u +#define MTL_VPU_HOST_MMU_CR1 0x00200028u +#define MTL_VPU_HOST_MMU_CR2 0x0020002cu +#define MTL_VPU_HOST_MMU_IRQ_CTRL 0x00200050u +#define MTL_VPU_HOST_MMU_IRQ_CTRLACK 0x00200054u + +#define MTL_VPU_HOST_MMU_GERROR 0x00200060u +#define MTL_VPU_HOST_MMU_GERROR_CMDQ_MASK BIT_MASK(0) +#define MTL_VPU_HOST_MMU_GERROR_EVTQ_ABT_MASK BIT_MASK(2) +#define MTL_VPU_HOST_MMU_GERROR_PRIQ_ABT_MASK BIT_MASK(3) +#define MTL_VPU_HOST_MMU_GERROR_MSI_CMDQ_ABT_MASK BIT_MASK(4) +#define MTL_VPU_HOST_MMU_GERROR_MSI_EVTQ_ABT_MASK BIT_MASK(5) +#define MTL_VPU_HOST_MMU_GERROR_MSI_PRIQ_ABT_MASK BIT_MASK(6) +#define MTL_VPU_HOST_MMU_GERROR_MSI_ABT_MASK BIT_MASK(7) + +#define MTL_VPU_HOST_MMU_GERRORN 0x00200064u + +#define MTL_VPU_HOST_MMU_STRTAB_BASE 0x00200080u +#define MTL_VPU_HOST_MMU_STRTAB_BASE_CFG 0x00200088u +#define MTL_VPU_HOST_MMU_CMDQ_BASE 0x00200090u +#define MTL_VPU_HOST_MMU_CMDQ_PROD 0x00200098u +#define MTL_VPU_HOST_MMU_CMDQ_CONS 0x0020009cu +#define MTL_VPU_HOST_MMU_EVTQ_BASE 0x002000a0u +#define MTL_VPU_HOST_MMU_EVTQ_PROD 0x002000a8u +#define MTL_VPU_HOST_MMU_EVTQ_CONS 0x002000acu +#define MTL_VPU_HOST_MMU_EVTQ_PROD_SEC (0x002000a8u + SZ_64K) +#define MTL_VPU_HOST_MMU_EVTQ_CONS_SEC (0x002000acu + SZ_64K) + +#define MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES 0x00360000u +#define MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES_CACHE_OVERRIDE_EN_MASK BIT_MASK(0) +#define MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES_AWCACHE_OVERRIDE_MASK BIT_MASK(1) +#define MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES_ARCACHE_OVERRIDE_MASK BIT_MASK(2) +#define MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES_NOSNOOP_OVERRIDE_EN_MASK BIT_MASK(3) +#define MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES_AW_NOSNOOP_OVERRIDE_MASK BIT_MASK(4) +#define MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES_AR_NOSNOOP_OVERRIDE_MASK BIT_MASK(5) +#define MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES_PTW_AW_CONTEXT_FLAG_MASK GENMASK(10, 6) +#define MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES_PTW_AR_CONTEXT_FLAG_MASK GENMASK(15, 11) + +#define MTL_VPU_HOST_IF_TBU_MMUSSIDV 0x00360004u +#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU0_AWMMUSSIDV_MASK BIT_MASK(0) +#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU0_ARMMUSSIDV_MASK BIT_MASK(1) +#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU1_AWMMUSSIDV_MASK BIT_MASK(2) +#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU1_ARMMUSSIDV_MASK BIT_MASK(3) +#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU2_AWMMUSSIDV_MASK BIT_MASK(4) +#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU2_ARMMUSSIDV_MASK BIT_MASK(5) +#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU3_AWMMUSSIDV_MASK BIT_MASK(6) +#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU3_ARMMUSSIDV_MASK BIT_MASK(7) +#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU4_AWMMUSSIDV_MASK BIT_MASK(8) +#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU4_ARMMUSSIDV_MASK BIT_MASK(9) + +#define MTL_VPU_CPU_SS_DSU_LEON_RT_BASE 0x04000000u +#define MTL_VPU_CPU_SS_DSU_LEON_RT_DSU_CTRL 0x04000000u +#define MTL_VPU_CPU_SS_DSU_LEON_RT_PC_REG 0x04400010u +#define MTL_VPU_CPU_SS_DSU_LEON_RT_NPC_REG 0x04400014u +#define MTL_VPU_CPU_SS_DSU_LEON_RT_DSU_TRAP_REG 0x04400020u + +#define MTL_VPU_CPU_SS_MSSCPU_CPR_CLK_SET 0x06010004u +#define MTL_VPU_CPU_SS_MSSCPU_CPR_CLK_SET_CPU_DSU_MASK BIT_MASK(1) + +#define MTL_VPU_CPU_SS_MSSCPU_CPR_RST_CLR 0x06010018u +#define MTL_VPU_CPU_SS_MSSCPU_CPR_RST_CLR_CPU_DSU_MASK BIT_MASK(1) + +#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC 0x06010040u +#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RSTRUN0_MASK BIT_MASK(0) +#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RESUME0_MASK BIT_MASK(1) +#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RSTRUN1_MASK BIT_MASK(2) +#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RESUME1_MASK BIT_MASK(3) +#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RSTVEC_MASK GENMASK(31, 4) + +#define MTL_VPU_CPU_SS_TIM_WATCHDOG 0x0602009cu +#define MTL_VPU_CPU_SS_TIM_WDOG_EN 0x060200a4u +#define MTL_VPU_CPU_SS_TIM_SAFE 0x060200a8u +#define MTL_VPU_CPU_SS_TIM_IPC_FIFO 0x060200f0u + +#define MTL_VPU_CPU_SS_TIM_GEN_CONFIG 0x06021008u +#define MTL_VPU_CPU_SS_TIM_GEN_CONFIG_WDOG_TO_INT_CLR_MASK BIT_MASK(9) + +#define MTL_VPU_CPU_SS_DOORBELL_0 0x06300000u +#define MTL_VPU_CPU_SS_DOORBELL_0_SET_MASK BIT_MASK(0) + +#define MTL_VPU_CPU_SS_DOORBELL_1 0x06301000u + +#endif /* __IVPU_HW_MTL_REG_H__ */ diff --git a/drivers/accel/ivpu/ivpu_hw_reg_io.h b/drivers/accel/ivpu/ivpu_hw_reg_io.h new file mode 100644 index 000000000000..43c2c0c2d050 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_hw_reg_io.h @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#ifndef __IVPU_HW_REG_IO_H__ +#define __IVPU_HW_REG_IO_H__ + +#include <linux/bitfield.h> +#include <linux/io.h> +#include <linux/iopoll.h> + +#include "ivpu_drv.h" + +#define REG_POLL_SLEEP_US 50 +#define REG_IO_ERROR 0xffffffff + +#define REGB_RD32(reg) ivpu_hw_reg_rd32(vdev, vdev->regb, (reg), #reg, __func__) +#define REGB_RD32_SILENT(reg) readl(vdev->regb + (reg)) +#define REGB_RD64(reg) ivpu_hw_reg_rd64(vdev, vdev->regb, (reg), #reg, __func__) +#define REGB_WR32(reg, val) ivpu_hw_reg_wr32(vdev, vdev->regb, (reg), (val), #reg, __func__) +#define REGB_WR64(reg, val) ivpu_hw_reg_wr64(vdev, vdev->regb, (reg), (val), #reg, __func__) + +#define REGV_RD32(reg) ivpu_hw_reg_rd32(vdev, vdev->regv, (reg), #reg, __func__) +#define REGV_RD32_SILENT(reg) readl(vdev->regv + (reg)) +#define REGV_RD64(reg) ivpu_hw_reg_rd64(vdev, vdev->regv, (reg), #reg, __func__) +#define REGV_WR32(reg, val) ivpu_hw_reg_wr32(vdev, vdev->regv, (reg), (val), #reg, __func__) +#define REGV_WR64(reg, val) ivpu_hw_reg_wr64(vdev, vdev->regv, (reg), (val), #reg, __func__) + +#define REGV_WR32I(reg, stride, index, val) \ + ivpu_hw_reg_wr32_index(vdev, vdev->regv, (reg), (stride), (index), (val), #reg, __func__) + +#define REG_FLD(REG, FLD) \ + (REG##_##FLD##_MASK) +#define REG_FLD_NUM(REG, FLD, num) \ + FIELD_PREP(REG##_##FLD##_MASK, num) +#define REG_GET_FLD(REG, FLD, val) \ + FIELD_GET(REG##_##FLD##_MASK, val) +#define REG_CLR_FLD(REG, FLD, val) \ + ((val) & ~(REG##_##FLD##_MASK)) +#define REG_SET_FLD(REG, FLD, val) \ + ((val) | (REG##_##FLD##_MASK)) +#define REG_SET_FLD_NUM(REG, FLD, num, val) \ + (((val) & ~(REG##_##FLD##_MASK)) | FIELD_PREP(REG##_##FLD##_MASK, num)) +#define REG_TEST_FLD(REG, FLD, val) \ + ((REG##_##FLD##_MASK) == ((val) & (REG##_##FLD##_MASK))) +#define REG_TEST_FLD_NUM(REG, FLD, num, val) \ + ((num) == FIELD_GET(REG##_##FLD##_MASK, val)) + +#define REGB_POLL(reg, var, cond, timeout_us) \ + read_poll_timeout(REGB_RD32_SILENT, var, cond, REG_POLL_SLEEP_US, timeout_us, false, reg) + +#define REGV_POLL(reg, var, cond, timeout_us) \ + read_poll_timeout(REGV_RD32_SILENT, var, cond, REG_POLL_SLEEP_US, timeout_us, false, reg) + +#define REGB_POLL_FLD(reg, fld, val, timeout_us) \ +({ \ + u32 var; \ + REGB_POLL(reg, var, (FIELD_GET(reg##_##fld##_MASK, var) == (val)), timeout_us); \ +}) + +#define REGV_POLL_FLD(reg, fld, val, timeout_us) \ +({ \ + u32 var; \ + REGV_POLL(reg, var, (FIELD_GET(reg##_##fld##_MASK, var) == (val)), timeout_us); \ +}) + +static inline u32 +ivpu_hw_reg_rd32(struct ivpu_device *vdev, void __iomem *base, u32 reg, + const char *name, const char *func) +{ + u32 val = readl(base + reg); + + ivpu_dbg(vdev, REG, "%s RD: %s (0x%08x) => 0x%08x\n", func, name, reg, val); + return val; +} + +static inline u64 +ivpu_hw_reg_rd64(struct ivpu_device *vdev, void __iomem *base, u32 reg, + const char *name, const char *func) +{ + u64 val = readq(base + reg); + + ivpu_dbg(vdev, REG, "%s RD: %s (0x%08x) => 0x%016llx\n", func, name, reg, val); + return val; +} + +static inline void +ivpu_hw_reg_wr32(struct ivpu_device *vdev, void __iomem *base, u32 reg, u32 val, + const char *name, const char *func) +{ + ivpu_dbg(vdev, REG, "%s WR: %s (0x%08x) <= 0x%08x\n", func, name, reg, val); + writel(val, base + reg); +} + +static inline void +ivpu_hw_reg_wr64(struct ivpu_device *vdev, void __iomem *base, u32 reg, u64 val, + const char *name, const char *func) +{ + ivpu_dbg(vdev, REG, "%s WR: %s (0x%08x) <= 0x%016llx\n", func, name, reg, val); + writeq(val, base + reg); +} + +static inline void +ivpu_hw_reg_wr32_index(struct ivpu_device *vdev, void __iomem *base, u32 reg, + u32 stride, u32 index, u32 val, const char *name, + const char *func) +{ + reg += index * stride; + + ivpu_dbg(vdev, REG, "%s WR: %s_%d (0x%08x) <= 0x%08x\n", func, name, index, reg, val); + writel(val, base + reg); +} + +#endif /* __IVPU_HW_REG_IO_H__ */ diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c new file mode 100644 index 000000000000..3adcfa80fc0e --- /dev/null +++ b/drivers/accel/ivpu/ivpu_ipc.c @@ -0,0 +1,510 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#include <linux/genalloc.h> +#include <linux/highmem.h> +#include <linux/kthread.h> +#include <linux/wait.h> + +#include "ivpu_drv.h" +#include "ivpu_gem.h" +#include "ivpu_hw.h" +#include "ivpu_hw_reg_io.h" +#include "ivpu_ipc.h" +#include "ivpu_jsm_msg.h" +#include "ivpu_pm.h" + +#define IPC_MAX_RX_MSG 128 +#define IS_KTHREAD() (get_current()->flags & PF_KTHREAD) + +struct ivpu_ipc_tx_buf { + struct ivpu_ipc_hdr ipc; + struct vpu_jsm_msg jsm; +}; + +struct ivpu_ipc_rx_msg { + struct list_head link; + struct ivpu_ipc_hdr *ipc_hdr; + struct vpu_jsm_msg *jsm_msg; +}; + +static void ivpu_ipc_msg_dump(struct ivpu_device *vdev, char *c, + struct ivpu_ipc_hdr *ipc_hdr, u32 vpu_addr) +{ + ivpu_dbg(vdev, IPC, + "%s: vpu:0x%x (data_addr:0x%08x, data_size:0x%x, channel:0x%x, src_node:0x%x, dst_node:0x%x, status:0x%x)", + c, vpu_addr, ipc_hdr->data_addr, ipc_hdr->data_size, ipc_hdr->channel, + ipc_hdr->src_node, ipc_hdr->dst_node, ipc_hdr->status); +} + +static void ivpu_jsm_msg_dump(struct ivpu_device *vdev, char *c, + struct vpu_jsm_msg *jsm_msg, u32 vpu_addr) +{ + u32 *payload = (u32 *)&jsm_msg->payload; + + ivpu_dbg(vdev, JSM, + "%s: vpu:0x%08x (type:0x%x, status:0x%x, id: 0x%x, result: 0x%x, payload:0x%x 0x%x 0x%x 0x%x 0x%x)\n", + c, vpu_addr, jsm_msg->type, jsm_msg->status, jsm_msg->request_id, jsm_msg->result, + payload[0], payload[1], payload[2], payload[3], payload[4]); +} + +static void +ivpu_ipc_rx_mark_free(struct ivpu_device *vdev, struct ivpu_ipc_hdr *ipc_hdr, + struct vpu_jsm_msg *jsm_msg) +{ + ipc_hdr->status = IVPU_IPC_HDR_FREE; + if (jsm_msg) + jsm_msg->status = VPU_JSM_MSG_FREE; + wmb(); /* Flush WC buffers for message statuses */ +} + +static void ivpu_ipc_mem_fini(struct ivpu_device *vdev) +{ + struct ivpu_ipc_info *ipc = vdev->ipc; + + ivpu_bo_free_internal(ipc->mem_rx); + ivpu_bo_free_internal(ipc->mem_tx); +} + +static int +ivpu_ipc_tx_prepare(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, + struct vpu_jsm_msg *req) +{ + struct ivpu_ipc_info *ipc = vdev->ipc; + struct ivpu_ipc_tx_buf *tx_buf; + u32 tx_buf_vpu_addr; + u32 jsm_vpu_addr; + + tx_buf_vpu_addr = gen_pool_alloc(ipc->mm_tx, sizeof(*tx_buf)); + if (!tx_buf_vpu_addr) { + ivpu_err(vdev, "Failed to reserve IPC buffer, size %ld\n", + sizeof(*tx_buf)); + return -ENOMEM; + } + + tx_buf = ivpu_to_cpu_addr(ipc->mem_tx, tx_buf_vpu_addr); + if (drm_WARN_ON(&vdev->drm, !tx_buf)) { + gen_pool_free(ipc->mm_tx, tx_buf_vpu_addr, sizeof(*tx_buf)); + return -EIO; + } + + jsm_vpu_addr = tx_buf_vpu_addr + offsetof(struct ivpu_ipc_tx_buf, jsm); + + if (tx_buf->ipc.status != IVPU_IPC_HDR_FREE) + ivpu_warn(vdev, "IPC message vpu:0x%x not released by firmware\n", + tx_buf_vpu_addr); + + if (tx_buf->jsm.status != VPU_JSM_MSG_FREE) + ivpu_warn(vdev, "JSM message vpu:0x%x not released by firmware\n", + jsm_vpu_addr); + + memset(tx_buf, 0, sizeof(*tx_buf)); + tx_buf->ipc.data_addr = jsm_vpu_addr; + /* TODO: Set data_size to actual JSM message size, not union of all messages */ + tx_buf->ipc.data_size = sizeof(*req); + tx_buf->ipc.channel = cons->channel; + tx_buf->ipc.src_node = 0; + tx_buf->ipc.dst_node = 1; + tx_buf->ipc.status = IVPU_IPC_HDR_ALLOCATED; + tx_buf->jsm.type = req->type; + tx_buf->jsm.status = VPU_JSM_MSG_ALLOCATED; + tx_buf->jsm.payload = req->payload; + + req->request_id = atomic_inc_return(&ipc->request_id); + tx_buf->jsm.request_id = req->request_id; + cons->request_id = req->request_id; + wmb(); /* Flush WC buffers for IPC, JSM msgs */ + + cons->tx_vpu_addr = tx_buf_vpu_addr; + + ivpu_jsm_msg_dump(vdev, "TX", &tx_buf->jsm, jsm_vpu_addr); + ivpu_ipc_msg_dump(vdev, "TX", &tx_buf->ipc, tx_buf_vpu_addr); + + return 0; +} + +static void ivpu_ipc_tx_release(struct ivpu_device *vdev, u32 vpu_addr) +{ + struct ivpu_ipc_info *ipc = vdev->ipc; + + if (vpu_addr) + gen_pool_free(ipc->mm_tx, vpu_addr, sizeof(struct ivpu_ipc_tx_buf)); +} + +static void ivpu_ipc_tx(struct ivpu_device *vdev, u32 vpu_addr) +{ + ivpu_hw_reg_ipc_tx_set(vdev, vpu_addr); +} + +void +ivpu_ipc_consumer_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, u32 channel) +{ + struct ivpu_ipc_info *ipc = vdev->ipc; + + INIT_LIST_HEAD(&cons->link); + cons->channel = channel; + cons->tx_vpu_addr = 0; + cons->request_id = 0; + spin_lock_init(&cons->rx_msg_lock); + INIT_LIST_HEAD(&cons->rx_msg_list); + init_waitqueue_head(&cons->rx_msg_wq); + + spin_lock_irq(&ipc->cons_list_lock); + list_add_tail(&cons->link, &ipc->cons_list); + spin_unlock_irq(&ipc->cons_list_lock); +} + +void ivpu_ipc_consumer_del(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons) +{ + struct ivpu_ipc_info *ipc = vdev->ipc; + struct ivpu_ipc_rx_msg *rx_msg, *r; + + spin_lock_irq(&ipc->cons_list_lock); + list_del(&cons->link); + spin_unlock_irq(&ipc->cons_list_lock); + + spin_lock_irq(&cons->rx_msg_lock); + list_for_each_entry_safe(rx_msg, r, &cons->rx_msg_list, link) { + list_del(&rx_msg->link); + ivpu_ipc_rx_mark_free(vdev, rx_msg->ipc_hdr, rx_msg->jsm_msg); + atomic_dec(&ipc->rx_msg_count); + kfree(rx_msg); + } + spin_unlock_irq(&cons->rx_msg_lock); + + ivpu_ipc_tx_release(vdev, cons->tx_vpu_addr); +} + +static int +ivpu_ipc_send(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, struct vpu_jsm_msg *req) +{ + struct ivpu_ipc_info *ipc = vdev->ipc; + int ret; + + ret = mutex_lock_interruptible(&ipc->lock); + if (ret) + return ret; + + if (!ipc->on) { + ret = -EAGAIN; + goto unlock; + } + + ret = ivpu_ipc_tx_prepare(vdev, cons, req); + if (ret) + goto unlock; + + ivpu_ipc_tx(vdev, cons->tx_vpu_addr); + +unlock: + mutex_unlock(&ipc->lock); + return ret; +} + +int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, + struct ivpu_ipc_hdr *ipc_buf, + struct vpu_jsm_msg *ipc_payload, unsigned long timeout_ms) +{ + struct ivpu_ipc_info *ipc = vdev->ipc; + struct ivpu_ipc_rx_msg *rx_msg; + int wait_ret, ret = 0; + + wait_ret = wait_event_interruptible_timeout(cons->rx_msg_wq, + (IS_KTHREAD() && kthread_should_stop()) || + !list_empty(&cons->rx_msg_list), + msecs_to_jiffies(timeout_ms)); + + if (IS_KTHREAD() && kthread_should_stop()) + return -EINTR; + + if (wait_ret == 0) + return -ETIMEDOUT; + + if (wait_ret < 0) + return -ERESTARTSYS; + + spin_lock_irq(&cons->rx_msg_lock); + rx_msg = list_first_entry_or_null(&cons->rx_msg_list, struct ivpu_ipc_rx_msg, link); + if (!rx_msg) { + spin_unlock_irq(&cons->rx_msg_lock); + return -EAGAIN; + } + list_del(&rx_msg->link); + spin_unlock_irq(&cons->rx_msg_lock); + + if (ipc_buf) + memcpy(ipc_buf, rx_msg->ipc_hdr, sizeof(*ipc_buf)); + if (rx_msg->jsm_msg) { + u32 size = min_t(int, rx_msg->ipc_hdr->data_size, sizeof(*ipc_payload)); + + if (rx_msg->jsm_msg->result != VPU_JSM_STATUS_SUCCESS) { + ivpu_dbg(vdev, IPC, "IPC resp result error: %d\n", rx_msg->jsm_msg->result); + ret = -EBADMSG; + } + + if (ipc_payload) + memcpy(ipc_payload, rx_msg->jsm_msg, size); + } + + ivpu_ipc_rx_mark_free(vdev, rx_msg->ipc_hdr, rx_msg->jsm_msg); + atomic_dec(&ipc->rx_msg_count); + kfree(rx_msg); + + return ret; +} + +static int +ivpu_ipc_send_receive_internal(struct ivpu_device *vdev, struct vpu_jsm_msg *req, + enum vpu_ipc_msg_type expected_resp_type, + struct vpu_jsm_msg *resp, u32 channel, + unsigned long timeout_ms) +{ + struct ivpu_ipc_consumer cons; + int ret; + + ivpu_ipc_consumer_add(vdev, &cons, channel); + + ret = ivpu_ipc_send(vdev, &cons, req); + if (ret) { + ivpu_warn(vdev, "IPC send failed: %d\n", ret); + goto consumer_del; + } + + ret = ivpu_ipc_receive(vdev, &cons, NULL, resp, timeout_ms); + if (ret) { + ivpu_warn(vdev, "IPC receive failed: type 0x%x, ret %d\n", req->type, ret); + goto consumer_del; + } + + if (resp->type != expected_resp_type) { + ivpu_warn(vdev, "Invalid JSM response type: 0x%x\n", resp->type); + ret = -EBADE; + } + +consumer_del: + ivpu_ipc_consumer_del(vdev, &cons); + return ret; +} + +int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req, + enum vpu_ipc_msg_type expected_resp_type, + struct vpu_jsm_msg *resp, u32 channel, + unsigned long timeout_ms) +{ + struct vpu_jsm_msg hb_req = { .type = VPU_JSM_MSG_QUERY_ENGINE_HB }; + struct vpu_jsm_msg hb_resp; + int ret, hb_ret; + + ret = ivpu_rpm_get(vdev); + if (ret < 0) + return ret; + + ret = ivpu_ipc_send_receive_internal(vdev, req, expected_resp_type, resp, + channel, timeout_ms); + if (ret != -ETIMEDOUT) + goto rpm_put; + + hb_ret = ivpu_ipc_send_receive_internal(vdev, &hb_req, VPU_JSM_MSG_QUERY_ENGINE_HB_DONE, + &hb_resp, VPU_IPC_CHAN_ASYNC_CMD, + vdev->timeout.jsm); + if (hb_ret == -ETIMEDOUT) { + ivpu_hw_diagnose_failure(vdev); + ivpu_pm_schedule_recovery(vdev); + } + +rpm_put: + ivpu_rpm_put(vdev); + return ret; +} + +static bool +ivpu_ipc_match_consumer(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, + struct ivpu_ipc_hdr *ipc_hdr, struct vpu_jsm_msg *jsm_msg) +{ + if (cons->channel != ipc_hdr->channel) + return false; + + if (!jsm_msg || jsm_msg->request_id == cons->request_id) + return true; + + return false; +} + +static void +ivpu_ipc_dispatch(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, + struct ivpu_ipc_hdr *ipc_hdr, struct vpu_jsm_msg *jsm_msg) +{ + struct ivpu_ipc_info *ipc = vdev->ipc; + struct ivpu_ipc_rx_msg *rx_msg; + unsigned long flags; + + lockdep_assert_held(&ipc->cons_list_lock); + + rx_msg = kzalloc(sizeof(*rx_msg), GFP_ATOMIC); + if (!rx_msg) { + ivpu_ipc_rx_mark_free(vdev, ipc_hdr, jsm_msg); + return; + } + + atomic_inc(&ipc->rx_msg_count); + + rx_msg->ipc_hdr = ipc_hdr; + rx_msg->jsm_msg = jsm_msg; + + spin_lock_irqsave(&cons->rx_msg_lock, flags); + list_add_tail(&rx_msg->link, &cons->rx_msg_list); + spin_unlock_irqrestore(&cons->rx_msg_lock, flags); + + wake_up(&cons->rx_msg_wq); +} + +int ivpu_ipc_irq_handler(struct ivpu_device *vdev) +{ + struct ivpu_ipc_info *ipc = vdev->ipc; + struct ivpu_ipc_consumer *cons; + struct ivpu_ipc_hdr *ipc_hdr; + struct vpu_jsm_msg *jsm_msg; + unsigned long flags; + bool dispatched; + u32 vpu_addr; + + /* + * Driver needs to purge all messages from IPC FIFO to clear IPC interrupt. + * Without purge IPC FIFO to 0 next IPC interrupts won't be generated. + */ + while (ivpu_hw_reg_ipc_rx_count_get(vdev)) { + vpu_addr = ivpu_hw_reg_ipc_rx_addr_get(vdev); + if (vpu_addr == REG_IO_ERROR) { + ivpu_err(vdev, "Failed to read IPC rx addr register\n"); + return -EIO; + } + + ipc_hdr = ivpu_to_cpu_addr(ipc->mem_rx, vpu_addr); + if (!ipc_hdr) { + ivpu_warn(vdev, "IPC msg 0x%x out of range\n", vpu_addr); + continue; + } + ivpu_ipc_msg_dump(vdev, "RX", ipc_hdr, vpu_addr); + + jsm_msg = NULL; + if (ipc_hdr->channel != IVPU_IPC_CHAN_BOOT_MSG) { + jsm_msg = ivpu_to_cpu_addr(ipc->mem_rx, ipc_hdr->data_addr); + if (!jsm_msg) { + ivpu_warn(vdev, "JSM msg 0x%x out of range\n", ipc_hdr->data_addr); + ivpu_ipc_rx_mark_free(vdev, ipc_hdr, NULL); + continue; + } + ivpu_jsm_msg_dump(vdev, "RX", jsm_msg, ipc_hdr->data_addr); + } + + if (atomic_read(&ipc->rx_msg_count) > IPC_MAX_RX_MSG) { + ivpu_warn(vdev, "IPC RX msg dropped, msg count %d\n", IPC_MAX_RX_MSG); + ivpu_ipc_rx_mark_free(vdev, ipc_hdr, jsm_msg); + continue; + } + + dispatched = false; + spin_lock_irqsave(&ipc->cons_list_lock, flags); + list_for_each_entry(cons, &ipc->cons_list, link) { + if (ivpu_ipc_match_consumer(vdev, cons, ipc_hdr, jsm_msg)) { + ivpu_ipc_dispatch(vdev, cons, ipc_hdr, jsm_msg); + dispatched = true; + break; + } + } + spin_unlock_irqrestore(&ipc->cons_list_lock, flags); + + if (!dispatched) { + ivpu_dbg(vdev, IPC, "IPC RX msg 0x%x dropped (no consumer)\n", vpu_addr); + ivpu_ipc_rx_mark_free(vdev, ipc_hdr, jsm_msg); + } + } + + return 0; +} + +int ivpu_ipc_init(struct ivpu_device *vdev) +{ + struct ivpu_ipc_info *ipc = vdev->ipc; + int ret = -ENOMEM; + + ipc->mem_tx = ivpu_bo_alloc_internal(vdev, 0, SZ_16K, DRM_IVPU_BO_WC); + if (!ipc->mem_tx) + return ret; + + ipc->mem_rx = ivpu_bo_alloc_internal(vdev, 0, SZ_16K, DRM_IVPU_BO_WC); + if (!ipc->mem_rx) + goto err_free_tx; + + ipc->mm_tx = devm_gen_pool_create(vdev->drm.dev, __ffs(IVPU_IPC_ALIGNMENT), + -1, "TX_IPC_JSM"); + if (IS_ERR(ipc->mm_tx)) { + ret = PTR_ERR(ipc->mm_tx); + ivpu_err(vdev, "Failed to create gen pool, %pe\n", ipc->mm_tx); + goto err_free_rx; + } + + ret = gen_pool_add(ipc->mm_tx, ipc->mem_tx->vpu_addr, ipc->mem_tx->base.size, -1); + if (ret) { + ivpu_err(vdev, "gen_pool_add failed, ret %d\n", ret); + goto err_free_rx; + } + + INIT_LIST_HEAD(&ipc->cons_list); + spin_lock_init(&ipc->cons_list_lock); + drmm_mutex_init(&vdev->drm, &ipc->lock); + + ivpu_ipc_reset(vdev); + return 0; + +err_free_rx: + ivpu_bo_free_internal(ipc->mem_rx); +err_free_tx: + ivpu_bo_free_internal(ipc->mem_tx); + return ret; +} + +void ivpu_ipc_fini(struct ivpu_device *vdev) +{ + ivpu_ipc_mem_fini(vdev); +} + +void ivpu_ipc_enable(struct ivpu_device *vdev) +{ + struct ivpu_ipc_info *ipc = vdev->ipc; + + mutex_lock(&ipc->lock); + ipc->on = true; + mutex_unlock(&ipc->lock); +} + +void ivpu_ipc_disable(struct ivpu_device *vdev) +{ + struct ivpu_ipc_info *ipc = vdev->ipc; + struct ivpu_ipc_consumer *cons, *c; + unsigned long flags; + + mutex_lock(&ipc->lock); + ipc->on = false; + mutex_unlock(&ipc->lock); + + spin_lock_irqsave(&ipc->cons_list_lock, flags); + list_for_each_entry_safe(cons, c, &ipc->cons_list, link) + wake_up(&cons->rx_msg_wq); + spin_unlock_irqrestore(&ipc->cons_list_lock, flags); +} + +void ivpu_ipc_reset(struct ivpu_device *vdev) +{ + struct ivpu_ipc_info *ipc = vdev->ipc; + + mutex_lock(&ipc->lock); + + memset(ipc->mem_tx->kvaddr, 0, ipc->mem_tx->base.size); + memset(ipc->mem_rx->kvaddr, 0, ipc->mem_rx->base.size); + wmb(); /* Flush WC buffers for TX and RX rings */ + + mutex_unlock(&ipc->lock); +} diff --git a/drivers/accel/ivpu/ivpu_ipc.h b/drivers/accel/ivpu/ivpu_ipc.h new file mode 100644 index 000000000000..9838202ecfad --- /dev/null +++ b/drivers/accel/ivpu/ivpu_ipc.h @@ -0,0 +1,93 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#ifndef __IVPU_IPC_H__ +#define __IVPU_IPC_H__ + +#include <linux/interrupt.h> +#include <linux/spinlock.h> + +#include "vpu_jsm_api.h" + +struct ivpu_bo; + +/* VPU FW boot notification */ +#define IVPU_IPC_CHAN_BOOT_MSG 0x3ff +#define IVPU_IPC_BOOT_MSG_DATA_ADDR 0x424f4f54 + +/* The alignment to be used for IPC Buffers and IPC Data. */ +#define IVPU_IPC_ALIGNMENT 64 + +#define IVPU_IPC_HDR_FREE 0 +#define IVPU_IPC_HDR_ALLOCATED 0 + +/** + * struct ivpu_ipc_hdr - The IPC message header structure, exchanged + * with the VPU device firmware. + * @data_addr: The VPU address of the payload (JSM message) + * @data_size: The size of the payload. + * @channel: The channel used. + * @src_node: The Node ID of the sender. + * @dst_node: The Node ID of the intended receiver. + * @status: IPC buffer usage status + */ +struct ivpu_ipc_hdr { + u32 data_addr; + u32 data_size; + u16 channel; + u8 src_node; + u8 dst_node; + u8 status; +} __packed __aligned(IVPU_IPC_ALIGNMENT); + +struct ivpu_ipc_consumer { + struct list_head link; + u32 channel; + u32 tx_vpu_addr; + u32 request_id; + + spinlock_t rx_msg_lock; /* Protects rx_msg_list */ + struct list_head rx_msg_list; + wait_queue_head_t rx_msg_wq; +}; + +struct ivpu_ipc_info { + struct gen_pool *mm_tx; + struct ivpu_bo *mem_tx; + struct ivpu_bo *mem_rx; + + atomic_t rx_msg_count; + + spinlock_t cons_list_lock; /* Protects cons_list */ + struct list_head cons_list; + + atomic_t request_id; + struct mutex lock; /* Lock on status */ + bool on; +}; + +int ivpu_ipc_init(struct ivpu_device *vdev); +void ivpu_ipc_fini(struct ivpu_device *vdev); + +void ivpu_ipc_enable(struct ivpu_device *vdev); +void ivpu_ipc_disable(struct ivpu_device *vdev); +void ivpu_ipc_reset(struct ivpu_device *vdev); + +int ivpu_ipc_irq_handler(struct ivpu_device *vdev); + +void ivpu_ipc_consumer_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, + u32 channel); +void ivpu_ipc_consumer_del(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons); + +int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, + struct ivpu_ipc_hdr *ipc_buf, struct vpu_jsm_msg *ipc_payload, + unsigned long timeout_ms); + +int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req, + enum vpu_ipc_msg_type expected_resp_type, + struct vpu_jsm_msg *resp, u32 channel, + unsigned long timeout_ms); + +#endif /* __IVPU_IPC_H__ */ diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c new file mode 100644 index 000000000000..94068aedf97c --- /dev/null +++ b/drivers/accel/ivpu/ivpu_job.c @@ -0,0 +1,615 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#include <drm/drm_file.h> + +#include <linux/bitfield.h> +#include <linux/highmem.h> +#include <linux/kthread.h> +#include <linux/pci.h> +#include <linux/module.h> +#include <uapi/drm/ivpu_accel.h> + +#include "ivpu_drv.h" +#include "ivpu_hw.h" +#include "ivpu_ipc.h" +#include "ivpu_job.h" +#include "ivpu_jsm_msg.h" +#include "ivpu_pm.h" + +#define CMD_BUF_IDX 0 +#define JOB_ID_JOB_MASK GENMASK(7, 0) +#define JOB_ID_CONTEXT_MASK GENMASK(31, 8) +#define JOB_MAX_BUFFER_COUNT 65535 + +static unsigned int ivpu_tdr_timeout_ms; +module_param_named(tdr_timeout_ms, ivpu_tdr_timeout_ms, uint, 0644); +MODULE_PARM_DESC(tdr_timeout_ms, "Timeout for device hang detection, in milliseconds, 0 - default"); + +static void ivpu_cmdq_ring_db(struct ivpu_device *vdev, struct ivpu_cmdq *cmdq) +{ + ivpu_hw_reg_db_set(vdev, cmdq->db_id); +} + +static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv, u16 engine) +{ + struct ivpu_device *vdev = file_priv->vdev; + struct vpu_job_queue_header *jobq_header; + struct ivpu_cmdq *cmdq; + + cmdq = kzalloc(sizeof(*cmdq), GFP_KERNEL); + if (!cmdq) + return NULL; + + cmdq->mem = ivpu_bo_alloc_internal(vdev, 0, SZ_4K, DRM_IVPU_BO_WC); + if (!cmdq->mem) + goto cmdq_free; + + cmdq->db_id = file_priv->ctx.id + engine * ivpu_get_context_count(vdev); + cmdq->entry_count = (u32)((cmdq->mem->base.size - sizeof(struct vpu_job_queue_header)) / + sizeof(struct vpu_job_queue_entry)); + + cmdq->jobq = (struct vpu_job_queue *)cmdq->mem->kvaddr; + jobq_header = &cmdq->jobq->header; + jobq_header->engine_idx = engine; + jobq_header->head = 0; + jobq_header->tail = 0; + wmb(); /* Flush WC buffer for jobq->header */ + + return cmdq; + +cmdq_free: + kfree(cmdq); + return NULL; +} + +static void ivpu_cmdq_free(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq) +{ + if (!cmdq) + return; + + ivpu_bo_free_internal(cmdq->mem); + kfree(cmdq); +} + +static struct ivpu_cmdq *ivpu_cmdq_acquire(struct ivpu_file_priv *file_priv, u16 engine) +{ + struct ivpu_device *vdev = file_priv->vdev; + struct ivpu_cmdq *cmdq = file_priv->cmdq[engine]; + int ret; + + lockdep_assert_held(&file_priv->lock); + + if (!cmdq) { + cmdq = ivpu_cmdq_alloc(file_priv, engine); + if (!cmdq) + return NULL; + file_priv->cmdq[engine] = cmdq; + } + + if (cmdq->db_registered) + return cmdq; + + ret = ivpu_jsm_register_db(vdev, file_priv->ctx.id, cmdq->db_id, + cmdq->mem->vpu_addr, cmdq->mem->base.size); + if (ret) + return NULL; + + cmdq->db_registered = true; + + return cmdq; +} + +static void ivpu_cmdq_release_locked(struct ivpu_file_priv *file_priv, u16 engine) +{ + struct ivpu_cmdq *cmdq = file_priv->cmdq[engine]; + + lockdep_assert_held(&file_priv->lock); + + if (cmdq) { + file_priv->cmdq[engine] = NULL; + if (cmdq->db_registered) + ivpu_jsm_unregister_db(file_priv->vdev, cmdq->db_id); + + ivpu_cmdq_free(file_priv, cmdq); + } +} + +void ivpu_cmdq_release_all(struct ivpu_file_priv *file_priv) +{ + int i; + + mutex_lock(&file_priv->lock); + + for (i = 0; i < IVPU_NUM_ENGINES; i++) + ivpu_cmdq_release_locked(file_priv, i); + + mutex_unlock(&file_priv->lock); +} + +/* + * Mark the doorbell as unregistered and reset job queue pointers. + * This function needs to be called when the VPU hardware is restarted + * and FW looses job queue state. The next time job queue is used it + * will be registered again. + */ +static void ivpu_cmdq_reset_locked(struct ivpu_file_priv *file_priv, u16 engine) +{ + struct ivpu_cmdq *cmdq = file_priv->cmdq[engine]; + + lockdep_assert_held(&file_priv->lock); + + if (cmdq) { + cmdq->db_registered = false; + cmdq->jobq->header.head = 0; + cmdq->jobq->header.tail = 0; + wmb(); /* Flush WC buffer for jobq header */ + } +} + +static void ivpu_cmdq_reset_all(struct ivpu_file_priv *file_priv) +{ + int i; + + mutex_lock(&file_priv->lock); + + for (i = 0; i < IVPU_NUM_ENGINES; i++) + ivpu_cmdq_reset_locked(file_priv, i); + + mutex_unlock(&file_priv->lock); +} + +void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev) +{ + struct ivpu_file_priv *file_priv; + unsigned long ctx_id; + + xa_for_each(&vdev->context_xa, ctx_id, file_priv) { + file_priv = ivpu_file_priv_get_by_ctx_id(vdev, ctx_id); + if (!file_priv) + continue; + + ivpu_cmdq_reset_all(file_priv); + + ivpu_file_priv_put(&file_priv); + } +} + +static int ivpu_cmdq_push_job(struct ivpu_cmdq *cmdq, struct ivpu_job *job) +{ + struct ivpu_device *vdev = job->vdev; + struct vpu_job_queue_header *header = &cmdq->jobq->header; + struct vpu_job_queue_entry *entry; + u32 tail = READ_ONCE(header->tail); + u32 next_entry = (tail + 1) % cmdq->entry_count; + + /* Check if there is space left in job queue */ + if (next_entry == header->head) { + ivpu_dbg(vdev, JOB, "Job queue full: ctx %d engine %d db %d head %d tail %d\n", + job->file_priv->ctx.id, job->engine_idx, cmdq->db_id, header->head, tail); + return -EBUSY; + } + + entry = &cmdq->jobq->job[tail]; + entry->batch_buf_addr = job->cmd_buf_vpu_addr; + entry->job_id = job->job_id; + entry->flags = 0; + wmb(); /* Ensure that tail is updated after filling entry */ + header->tail = next_entry; + wmb(); /* Flush WC buffer for jobq header */ + + return 0; +} + +struct ivpu_fence { + struct dma_fence base; + spinlock_t lock; /* protects base */ + struct ivpu_device *vdev; +}; + +static inline struct ivpu_fence *to_vpu_fence(struct dma_fence *fence) +{ + return container_of(fence, struct ivpu_fence, base); +} + +static const char *ivpu_fence_get_driver_name(struct dma_fence *fence) +{ + return DRIVER_NAME; +} + +static const char *ivpu_fence_get_timeline_name(struct dma_fence *fence) +{ + struct ivpu_fence *ivpu_fence = to_vpu_fence(fence); + + return dev_name(ivpu_fence->vdev->drm.dev); +} + +static const struct dma_fence_ops ivpu_fence_ops = { + .get_driver_name = ivpu_fence_get_driver_name, + .get_timeline_name = ivpu_fence_get_timeline_name, +}; + +static struct dma_fence *ivpu_fence_create(struct ivpu_device *vdev) +{ + struct ivpu_fence *fence; + + fence = kzalloc(sizeof(*fence), GFP_KERNEL); + if (!fence) + return NULL; + + fence->vdev = vdev; + spin_lock_init(&fence->lock); + dma_fence_init(&fence->base, &ivpu_fence_ops, &fence->lock, dma_fence_context_alloc(1), 1); + + return &fence->base; +} + +static void job_get(struct ivpu_job *job, struct ivpu_job **link) +{ + struct ivpu_device *vdev = job->vdev; + + kref_get(&job->ref); + *link = job; + + ivpu_dbg(vdev, KREF, "Job get: id %u refcount %u\n", job->job_id, kref_read(&job->ref)); +} + +static void job_release(struct kref *ref) +{ + struct ivpu_job *job = container_of(ref, struct ivpu_job, ref); + struct ivpu_device *vdev = job->vdev; + u32 i; + + for (i = 0; i < job->bo_count; i++) + if (job->bos[i]) + drm_gem_object_put(&job->bos[i]->base); + + dma_fence_put(job->done_fence); + ivpu_file_priv_put(&job->file_priv); + + ivpu_dbg(vdev, KREF, "Job released: id %u\n", job->job_id); + kfree(job); + + /* Allow the VPU to get suspended, must be called after ivpu_file_priv_put() */ + ivpu_rpm_put(vdev); +} + +static void job_put(struct ivpu_job *job) +{ + struct ivpu_device *vdev = job->vdev; + + ivpu_dbg(vdev, KREF, "Job put: id %u refcount %u\n", job->job_id, kref_read(&job->ref)); + kref_put(&job->ref, job_release); +} + +static struct ivpu_job * +ivpu_create_job(struct ivpu_file_priv *file_priv, u32 engine_idx, u32 bo_count) +{ + struct ivpu_device *vdev = file_priv->vdev; + struct ivpu_job *job; + size_t buf_size; + int ret; + + ret = ivpu_rpm_get(vdev); + if (ret < 0) + return NULL; + + buf_size = sizeof(*job) + bo_count * sizeof(struct ivpu_bo *); + job = kzalloc(buf_size, GFP_KERNEL); + if (!job) + goto err_rpm_put; + + kref_init(&job->ref); + + job->vdev = vdev; + job->engine_idx = engine_idx; + job->bo_count = bo_count; + job->done_fence = ivpu_fence_create(vdev); + if (!job->done_fence) { + ivpu_warn_ratelimited(vdev, "Failed to create a fence\n"); + goto err_free_job; + } + + job->file_priv = ivpu_file_priv_get(file_priv); + + ivpu_dbg(vdev, JOB, "Job created: ctx %2d engine %d", file_priv->ctx.id, job->engine_idx); + + return job; + +err_free_job: + kfree(job); +err_rpm_put: + ivpu_rpm_put(vdev); + return NULL; +} + +static int ivpu_job_done(struct ivpu_device *vdev, u32 job_id, u32 job_status) +{ + struct ivpu_job *job; + + job = xa_erase(&vdev->submitted_jobs_xa, job_id); + if (!job) + return -ENOENT; + + if (job->file_priv->has_mmu_faults) + job_status = VPU_JSM_STATUS_ABORTED; + + job->bos[CMD_BUF_IDX]->job_status = job_status; + dma_fence_signal(job->done_fence); + + ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d engine %d status 0x%x\n", + job->job_id, job->file_priv->ctx.id, job->engine_idx, job_status); + + job_put(job); + return 0; +} + +static void ivpu_job_done_message(struct ivpu_device *vdev, void *msg) +{ + struct vpu_ipc_msg_payload_job_done *payload; + struct vpu_jsm_msg *job_ret_msg = msg; + int ret; + + payload = (struct vpu_ipc_msg_payload_job_done *)&job_ret_msg->payload; + + ret = ivpu_job_done(vdev, payload->job_id, payload->job_status); + if (ret) + ivpu_err(vdev, "Failed to finish job %d: %d\n", payload->job_id, ret); +} + +void ivpu_jobs_abort_all(struct ivpu_device *vdev) +{ + struct ivpu_job *job; + unsigned long id; + + xa_for_each(&vdev->submitted_jobs_xa, id, job) + ivpu_job_done(vdev, id, VPU_JSM_STATUS_ABORTED); +} + +static int ivpu_direct_job_submission(struct ivpu_job *job) +{ + struct ivpu_file_priv *file_priv = job->file_priv; + struct ivpu_device *vdev = job->vdev; + struct xa_limit job_id_range; + struct ivpu_cmdq *cmdq; + int ret; + + mutex_lock(&file_priv->lock); + + cmdq = ivpu_cmdq_acquire(job->file_priv, job->engine_idx); + if (!cmdq) { + ivpu_warn(vdev, "Failed get job queue, ctx %d engine %d\n", + file_priv->ctx.id, job->engine_idx); + ret = -EINVAL; + goto err_unlock; + } + + job_id_range.min = FIELD_PREP(JOB_ID_CONTEXT_MASK, (file_priv->ctx.id - 1)); + job_id_range.max = job_id_range.min | JOB_ID_JOB_MASK; + + job_get(job, &job); + ret = xa_alloc(&vdev->submitted_jobs_xa, &job->job_id, job, job_id_range, GFP_KERNEL); + if (ret) { + ivpu_warn_ratelimited(vdev, "Failed to allocate job id: %d\n", ret); + goto err_job_put; + } + + ret = ivpu_cmdq_push_job(cmdq, job); + if (ret) + goto err_xa_erase; + + ivpu_dbg(vdev, JOB, "Job submitted: id %3u addr 0x%llx ctx %2d engine %d next %d\n", + job->job_id, job->cmd_buf_vpu_addr, file_priv->ctx.id, + job->engine_idx, cmdq->jobq->header.tail); + + if (ivpu_test_mode == IVPU_TEST_MODE_NULL_HW) { + ivpu_job_done(vdev, job->job_id, VPU_JSM_STATUS_SUCCESS); + cmdq->jobq->header.head = cmdq->jobq->header.tail; + wmb(); /* Flush WC buffer for jobq header */ + } else { + ivpu_cmdq_ring_db(vdev, cmdq); + } + + mutex_unlock(&file_priv->lock); + return 0; + +err_xa_erase: + xa_erase(&vdev->submitted_jobs_xa, job->job_id); +err_job_put: + job_put(job); +err_unlock: + mutex_unlock(&file_priv->lock); + return ret; +} + +static int +ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32 *buf_handles, + u32 buf_count, u32 commands_offset) +{ + struct ivpu_file_priv *file_priv = file->driver_priv; + struct ivpu_device *vdev = file_priv->vdev; + struct ww_acquire_ctx acquire_ctx; + struct ivpu_bo *bo; + int ret; + u32 i; + + for (i = 0; i < buf_count; i++) { + struct drm_gem_object *obj = drm_gem_object_lookup(file, buf_handles[i]); + + if (!obj) + return -ENOENT; + + job->bos[i] = to_ivpu_bo(obj); + + ret = ivpu_bo_pin(job->bos[i]); + if (ret) + return ret; + } + + bo = job->bos[CMD_BUF_IDX]; + if (!dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_READ)) { + ivpu_warn(vdev, "Buffer is already in use\n"); + return -EBUSY; + } + + if (commands_offset >= bo->base.size) { + ivpu_warn(vdev, "Invalid command buffer offset %u\n", commands_offset); + return -EINVAL; + } + + job->cmd_buf_vpu_addr = bo->vpu_addr + commands_offset; + + ret = drm_gem_lock_reservations((struct drm_gem_object **)job->bos, buf_count, + &acquire_ctx); + if (ret) { + ivpu_warn(vdev, "Failed to lock reservations: %d\n", ret); + return ret; + } + + for (i = 0; i < buf_count; i++) { + ret = dma_resv_reserve_fences(job->bos[i]->base.resv, 1); + if (ret) { + ivpu_warn(vdev, "Failed to reserve fences: %d\n", ret); + goto unlock_reservations; + } + } + + for (i = 0; i < buf_count; i++) + dma_resv_add_fence(job->bos[i]->base.resv, job->done_fence, DMA_RESV_USAGE_WRITE); + +unlock_reservations: + drm_gem_unlock_reservations((struct drm_gem_object **)job->bos, buf_count, &acquire_ctx); + + wmb(); /* Flush write combining buffers */ + + return ret; +} + +int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + int ret = 0; + struct ivpu_file_priv *file_priv = file->driver_priv; + struct ivpu_device *vdev = file_priv->vdev; + struct drm_ivpu_submit *params = data; + struct ivpu_job *job; + u32 *buf_handles; + + if (params->engine > DRM_IVPU_ENGINE_COPY) + return -EINVAL; + + if (params->buffer_count == 0 || params->buffer_count > JOB_MAX_BUFFER_COUNT) + return -EINVAL; + + if (!IS_ALIGNED(params->commands_offset, 8)) + return -EINVAL; + + if (!file_priv->ctx.id) + return -EINVAL; + + if (file_priv->has_mmu_faults) + return -EBADFD; + + buf_handles = kcalloc(params->buffer_count, sizeof(u32), GFP_KERNEL); + if (!buf_handles) + return -ENOMEM; + + ret = copy_from_user(buf_handles, + (void __user *)params->buffers_ptr, + params->buffer_count * sizeof(u32)); + if (ret) { + ret = -EFAULT; + goto free_handles; + } + + ivpu_dbg(vdev, JOB, "Submit ioctl: ctx %u buf_count %u\n", + file_priv->ctx.id, params->buffer_count); + + job = ivpu_create_job(file_priv, params->engine, params->buffer_count); + if (!job) { + ivpu_err(vdev, "Failed to create job\n"); + ret = -ENOMEM; + goto free_handles; + } + + ret = ivpu_job_prepare_bos_for_submit(file, job, buf_handles, params->buffer_count, + params->commands_offset); + if (ret) { + ivpu_err(vdev, "Failed to prepare job, ret %d\n", ret); + goto job_put; + } + + ret = ivpu_direct_job_submission(job); + if (ret) { + dma_fence_signal(job->done_fence); + ivpu_err(vdev, "Failed to submit job to the HW, ret %d\n", ret); + } + +job_put: + job_put(job); +free_handles: + kfree(buf_handles); + + return ret; +} + +static int ivpu_job_done_thread(void *arg) +{ + struct ivpu_device *vdev = (struct ivpu_device *)arg; + struct ivpu_ipc_consumer cons; + struct vpu_jsm_msg jsm_msg; + bool jobs_submitted; + unsigned int timeout; + int ret; + + ivpu_dbg(vdev, JOB, "Started %s\n", __func__); + + ivpu_ipc_consumer_add(vdev, &cons, VPU_IPC_CHAN_JOB_RET); + + while (!kthread_should_stop()) { + timeout = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr; + jobs_submitted = !xa_empty(&vdev->submitted_jobs_xa); + ret = ivpu_ipc_receive(vdev, &cons, NULL, &jsm_msg, timeout); + if (!ret) { + ivpu_job_done_message(vdev, &jsm_msg); + } else if (ret == -ETIMEDOUT) { + if (jobs_submitted && !xa_empty(&vdev->submitted_jobs_xa)) { + ivpu_err(vdev, "TDR detected, timeout %d ms", timeout); + ivpu_hw_diagnose_failure(vdev); + ivpu_pm_schedule_recovery(vdev); + } + } + } + + ivpu_ipc_consumer_del(vdev, &cons); + + ivpu_jobs_abort_all(vdev); + + ivpu_dbg(vdev, JOB, "Stopped %s\n", __func__); + return 0; +} + +int ivpu_job_done_thread_init(struct ivpu_device *vdev) +{ + struct task_struct *thread; + + thread = kthread_run(&ivpu_job_done_thread, (void *)vdev, "ivpu_job_done_thread"); + if (IS_ERR(thread)) { + ivpu_err(vdev, "Failed to start job completion thread\n"); + return -EIO; + } + + get_task_struct(thread); + wake_up_process(thread); + + vdev->job_done_thread = thread; + + return 0; +} + +void ivpu_job_done_thread_fini(struct ivpu_device *vdev) +{ + kthread_stop(vdev->job_done_thread); + put_task_struct(vdev->job_done_thread); +} diff --git a/drivers/accel/ivpu/ivpu_job.h b/drivers/accel/ivpu/ivpu_job.h new file mode 100644 index 000000000000..aa1f0b9479b0 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_job.h @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#ifndef __IVPU_JOB_H__ +#define __IVPU_JOB_H__ + +#include <linux/kref.h> +#include <linux/idr.h> + +#include "ivpu_gem.h" + +struct ivpu_device; +struct ivpu_file_priv; + +/** + * struct ivpu_cmdq - Object representing device queue used to send jobs. + * @jobq: Pointer to job queue memory shared with the device + * @mem: Memory allocated for the job queue, shared with device + * @entry_count Number of job entries in the queue + * @db_id: Doorbell assigned to this job queue + * @db_registered: True if doorbell is registered in device + */ +struct ivpu_cmdq { + struct vpu_job_queue *jobq; + struct ivpu_bo *mem; + u32 entry_count; + u32 db_id; + bool db_registered; +}; + +/** + * struct ivpu_job - KMD object that represents batchbuffer / DMA buffer. + * Each batch / DMA buffer is a job to be submitted and executed by the VPU FW. + * This is a unit of execution, and be tracked by the job_id for + * any status reporting from VPU FW through IPC JOB RET/DONE message. + * @file_priv: The client that submitted this job + * @job_id: Job ID for KMD tracking and job status reporting from VPU FW + * @status: Status of the Job from IPC JOB RET/DONE message + * @batch_buffer: CPU vaddr points to the batch buffer memory allocated for the job + * @submit_status_offset: Offset within batch buffer where job completion handler + will update the job status + */ +struct ivpu_job { + struct kref ref; + struct ivpu_device *vdev; + struct ivpu_file_priv *file_priv; + struct dma_fence *done_fence; + u64 cmd_buf_vpu_addr; + u32 job_id; + u32 engine_idx; + size_t bo_count; + struct ivpu_bo *bos[]; +}; + +int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file); + +void ivpu_cmdq_release_all(struct ivpu_file_priv *file_priv); +void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev); + +int ivpu_job_done_thread_init(struct ivpu_device *vdev); +void ivpu_job_done_thread_fini(struct ivpu_device *vdev); + +void ivpu_jobs_abort_all(struct ivpu_device *vdev); + +#endif /* __IVPU_JOB_H__ */ diff --git a/drivers/accel/ivpu/ivpu_jsm_msg.c b/drivers/accel/ivpu/ivpu_jsm_msg.c new file mode 100644 index 000000000000..831bfd2b2d39 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_jsm_msg.c @@ -0,0 +1,180 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#include "ivpu_drv.h" +#include "ivpu_ipc.h" +#include "ivpu_jsm_msg.h" + +int ivpu_jsm_register_db(struct ivpu_device *vdev, u32 ctx_id, u32 db_id, + u64 jobq_base, u32 jobq_size) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_REGISTER_DB }; + struct vpu_jsm_msg resp; + int ret = 0; + + req.payload.register_db.db_idx = db_id; + req.payload.register_db.jobq_base = jobq_base; + req.payload.register_db.jobq_size = jobq_size; + req.payload.register_db.host_ssid = ctx_id; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_REGISTER_DB_DONE, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) { + ivpu_err(vdev, "Failed to register doorbell %d: %d\n", db_id, ret); + return ret; + } + + ivpu_dbg(vdev, JSM, "Doorbell %d registered to context %d\n", db_id, ctx_id); + + return 0; +} + +int ivpu_jsm_unregister_db(struct ivpu_device *vdev, u32 db_id) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_UNREGISTER_DB }; + struct vpu_jsm_msg resp; + int ret = 0; + + req.payload.unregister_db.db_idx = db_id; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_UNREGISTER_DB_DONE, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) { + ivpu_warn(vdev, "Failed to unregister doorbell %d: %d\n", db_id, ret); + return ret; + } + + ivpu_dbg(vdev, JSM, "Doorbell %d unregistered\n", db_id); + + return 0; +} + +int ivpu_jsm_get_heartbeat(struct ivpu_device *vdev, u32 engine, u64 *heartbeat) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_QUERY_ENGINE_HB }; + struct vpu_jsm_msg resp; + int ret; + + if (engine > VPU_ENGINE_COPY) + return -EINVAL; + + req.payload.query_engine_hb.engine_idx = engine; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_QUERY_ENGINE_HB_DONE, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) { + ivpu_err(vdev, "Failed to get heartbeat from engine %d: %d\n", engine, ret); + return ret; + } + + *heartbeat = resp.payload.query_engine_hb_done.heartbeat; + return ret; +} + +int ivpu_jsm_reset_engine(struct ivpu_device *vdev, u32 engine) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_ENGINE_RESET }; + struct vpu_jsm_msg resp; + int ret; + + if (engine > VPU_ENGINE_COPY) + return -EINVAL; + + req.payload.engine_reset.engine_idx = engine; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_ENGINE_RESET_DONE, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) + ivpu_err(vdev, "Failed to reset engine %d: %d\n", engine, ret); + + return ret; +} + +int ivpu_jsm_preempt_engine(struct ivpu_device *vdev, u32 engine, u32 preempt_id) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_ENGINE_PREEMPT }; + struct vpu_jsm_msg resp; + int ret; + + if (engine > VPU_ENGINE_COPY) + return -EINVAL; + + req.payload.engine_preempt.engine_idx = engine; + req.payload.engine_preempt.preempt_id = preempt_id; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_ENGINE_PREEMPT_DONE, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) + ivpu_err(vdev, "Failed to preempt engine %d: %d\n", engine, ret); + + return ret; +} + +int ivpu_jsm_dyndbg_control(struct ivpu_device *vdev, char *command, size_t size) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_DYNDBG_CONTROL }; + struct vpu_jsm_msg resp; + int ret; + + if (!strncpy(req.payload.dyndbg_control.dyndbg_cmd, command, VPU_DYNDBG_CMD_MAX_LEN - 1)) + return -ENOMEM; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_DYNDBG_CONTROL_RSP, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) + ivpu_warn(vdev, "Failed to send command \"%s\": ret %d\n", command, ret); + + return ret; +} + +int ivpu_jsm_trace_get_capability(struct ivpu_device *vdev, u32 *trace_destination_mask, + u64 *trace_hw_component_mask) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_TRACE_GET_CAPABILITY }; + struct vpu_jsm_msg resp; + int ret; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_TRACE_GET_CAPABILITY_RSP, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) { + ivpu_warn(vdev, "Failed to get trace capability: %d\n", ret); + return ret; + } + + *trace_destination_mask = resp.payload.trace_capability.trace_destination_mask; + *trace_hw_component_mask = resp.payload.trace_capability.trace_hw_component_mask; + + return ret; +} + +int ivpu_jsm_trace_set_config(struct ivpu_device *vdev, u32 trace_level, u32 trace_destination_mask, + u64 trace_hw_component_mask) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_TRACE_SET_CONFIG }; + struct vpu_jsm_msg resp; + int ret; + + req.payload.trace_config.trace_level = trace_level; + req.payload.trace_config.trace_destination_mask = trace_destination_mask; + req.payload.trace_config.trace_hw_component_mask = trace_hw_component_mask; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_TRACE_SET_CONFIG_RSP, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) + ivpu_warn(vdev, "Failed to set config: %d\n", ret); + + return ret; +} + +int ivpu_jsm_context_release(struct ivpu_device *vdev, u32 host_ssid) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_SSID_RELEASE }; + struct vpu_jsm_msg resp; + + req.payload.ssid_release.host_ssid = host_ssid; + + return ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_SSID_RELEASE_DONE, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); +} diff --git a/drivers/accel/ivpu/ivpu_jsm_msg.h b/drivers/accel/ivpu/ivpu_jsm_msg.h new file mode 100644 index 000000000000..ab50d7b017c1 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_jsm_msg.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#ifndef __IVPU_JSM_MSG_H__ +#define __IVPU_JSM_MSG_H__ + +#include "vpu_jsm_api.h" + +int ivpu_jsm_register_db(struct ivpu_device *vdev, u32 ctx_id, u32 db_id, + u64 jobq_base, u32 jobq_size); +int ivpu_jsm_unregister_db(struct ivpu_device *vdev, u32 db_id); +int ivpu_jsm_get_heartbeat(struct ivpu_device *vdev, u32 engine, u64 *heartbeat); +int ivpu_jsm_reset_engine(struct ivpu_device *vdev, u32 engine); +int ivpu_jsm_preempt_engine(struct ivpu_device *vdev, u32 engine, u32 preempt_id); +int ivpu_jsm_dyndbg_control(struct ivpu_device *vdev, char *command, size_t size); +int ivpu_jsm_trace_get_capability(struct ivpu_device *vdev, u32 *trace_destination_mask, + u64 *trace_hw_component_mask); +int ivpu_jsm_trace_set_config(struct ivpu_device *vdev, u32 trace_level, u32 trace_destination_mask, + u64 trace_hw_component_mask); +int ivpu_jsm_context_release(struct ivpu_device *vdev, u32 host_ssid); +#endif diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c new file mode 100644 index 000000000000..694e978aba66 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_mmu.c @@ -0,0 +1,883 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#include <linux/circ_buf.h> +#include <linux/highmem.h> + +#include "ivpu_drv.h" +#include "ivpu_hw_mtl_reg.h" +#include "ivpu_hw_reg_io.h" +#include "ivpu_mmu.h" +#include "ivpu_mmu_context.h" +#include "ivpu_pm.h" + +#define IVPU_MMU_IDR0_REF 0x080f3e0f +#define IVPU_MMU_IDR0_REF_SIMICS 0x080f3e1f +#define IVPU_MMU_IDR1_REF 0x0e739d18 +#define IVPU_MMU_IDR3_REF 0x0000003c +#define IVPU_MMU_IDR5_REF 0x00040070 +#define IVPU_MMU_IDR5_REF_SIMICS 0x00000075 +#define IVPU_MMU_IDR5_REF_FPGA 0x00800075 + +#define IVPU_MMU_CDTAB_ENT_SIZE 64 +#define IVPU_MMU_CDTAB_ENT_COUNT_LOG2 8 /* 256 entries */ +#define IVPU_MMU_CDTAB_ENT_COUNT ((u32)1 << IVPU_MMU_CDTAB_ENT_COUNT_LOG2) + +#define IVPU_MMU_STREAM_ID0 0 +#define IVPU_MMU_STREAM_ID3 3 + +#define IVPU_MMU_STRTAB_ENT_SIZE 64 +#define IVPU_MMU_STRTAB_ENT_COUNT 4 +#define IVPU_MMU_STRTAB_CFG_LOG2SIZE 2 +#define IVPU_MMU_STRTAB_CFG IVPU_MMU_STRTAB_CFG_LOG2SIZE + +#define IVPU_MMU_Q_COUNT_LOG2 4 /* 16 entries */ +#define IVPU_MMU_Q_COUNT ((u32)1 << IVPU_MMU_Q_COUNT_LOG2) +#define IVPU_MMU_Q_WRAP_BIT (IVPU_MMU_Q_COUNT << 1) +#define IVPU_MMU_Q_WRAP_MASK (IVPU_MMU_Q_WRAP_BIT - 1) +#define IVPU_MMU_Q_IDX_MASK (IVPU_MMU_Q_COUNT - 1) +#define IVPU_MMU_Q_IDX(val) ((val) & IVPU_MMU_Q_IDX_MASK) + +#define IVPU_MMU_CMDQ_CMD_SIZE 16 +#define IVPU_MMU_CMDQ_SIZE (IVPU_MMU_Q_COUNT * IVPU_MMU_CMDQ_CMD_SIZE) + +#define IVPU_MMU_EVTQ_CMD_SIZE 32 +#define IVPU_MMU_EVTQ_SIZE (IVPU_MMU_Q_COUNT * IVPU_MMU_EVTQ_CMD_SIZE) + +#define IVPU_MMU_CMD_OPCODE GENMASK(7, 0) + +#define IVPU_MMU_CMD_SYNC_0_CS GENMASK(13, 12) +#define IVPU_MMU_CMD_SYNC_0_MSH GENMASK(23, 22) +#define IVPU_MMU_CMD_SYNC_0_MSI_ATTR GENMASK(27, 24) +#define IVPU_MMU_CMD_SYNC_0_MSI_ATTR GENMASK(27, 24) +#define IVPU_MMU_CMD_SYNC_0_MSI_DATA GENMASK(63, 32) + +#define IVPU_MMU_CMD_CFGI_0_SSEC BIT(10) +#define IVPU_MMU_CMD_CFGI_0_SSV BIT(11) +#define IVPU_MMU_CMD_CFGI_0_SSID GENMASK(31, 12) +#define IVPU_MMU_CMD_CFGI_0_SID GENMASK(63, 32) +#define IVPU_MMU_CMD_CFGI_1_RANGE GENMASK(4, 0) + +#define IVPU_MMU_CMD_TLBI_0_ASID GENMASK(63, 48) +#define IVPU_MMU_CMD_TLBI_0_VMID GENMASK(47, 32) + +#define CMD_PREFETCH_CFG 0x1 +#define CMD_CFGI_STE 0x3 +#define CMD_CFGI_ALL 0x4 +#define CMD_CFGI_CD 0x5 +#define CMD_CFGI_CD_ALL 0x6 +#define CMD_TLBI_NH_ASID 0x11 +#define CMD_TLBI_EL2_ALL 0x20 +#define CMD_TLBI_NSNH_ALL 0x30 +#define CMD_SYNC 0x46 + +#define IVPU_MMU_EVT_F_UUT 0x01 +#define IVPU_MMU_EVT_C_BAD_STREAMID 0x02 +#define IVPU_MMU_EVT_F_STE_FETCH 0x03 +#define IVPU_MMU_EVT_C_BAD_STE 0x04 +#define IVPU_MMU_EVT_F_BAD_ATS_TREQ 0x05 +#define IVPU_MMU_EVT_F_STREAM_DISABLED 0x06 +#define IVPU_MMU_EVT_F_TRANSL_FORBIDDEN 0x07 +#define IVPU_MMU_EVT_C_BAD_SUBSTREAMID 0x08 +#define IVPU_MMU_EVT_F_CD_FETCH 0x09 +#define IVPU_MMU_EVT_C_BAD_CD 0x0a +#define IVPU_MMU_EVT_F_WALK_EABT 0x0b +#define IVPU_MMU_EVT_F_TRANSLATION 0x10 +#define IVPU_MMU_EVT_F_ADDR_SIZE 0x11 +#define IVPU_MMU_EVT_F_ACCESS 0x12 +#define IVPU_MMU_EVT_F_PERMISSION 0x13 +#define IVPU_MMU_EVT_F_TLB_CONFLICT 0x20 +#define IVPU_MMU_EVT_F_CFG_CONFLICT 0x21 +#define IVPU_MMU_EVT_E_PAGE_REQUEST 0x24 +#define IVPU_MMU_EVT_F_VMS_FETCH 0x25 + +#define IVPU_MMU_EVT_OP_MASK GENMASK_ULL(7, 0) +#define IVPU_MMU_EVT_SSID_MASK GENMASK_ULL(31, 12) + +#define IVPU_MMU_Q_BASE_RWA BIT(62) +#define IVPU_MMU_Q_BASE_ADDR_MASK GENMASK_ULL(51, 5) +#define IVPU_MMU_STRTAB_BASE_RA BIT(62) +#define IVPU_MMU_STRTAB_BASE_ADDR_MASK GENMASK_ULL(51, 6) + +#define IVPU_MMU_IRQ_EVTQ_EN BIT(2) +#define IVPU_MMU_IRQ_GERROR_EN BIT(0) + +#define IVPU_MMU_CR0_ATSCHK BIT(4) +#define IVPU_MMU_CR0_CMDQEN BIT(3) +#define IVPU_MMU_CR0_EVTQEN BIT(2) +#define IVPU_MMU_CR0_PRIQEN BIT(1) +#define IVPU_MMU_CR0_SMMUEN BIT(0) + +#define IVPU_MMU_CR1_TABLE_SH GENMASK(11, 10) +#define IVPU_MMU_CR1_TABLE_OC GENMASK(9, 8) +#define IVPU_MMU_CR1_TABLE_IC GENMASK(7, 6) +#define IVPU_MMU_CR1_QUEUE_SH GENMASK(5, 4) +#define IVPU_MMU_CR1_QUEUE_OC GENMASK(3, 2) +#define IVPU_MMU_CR1_QUEUE_IC GENMASK(1, 0) +#define IVPU_MMU_CACHE_NC 0 +#define IVPU_MMU_CACHE_WB 1 +#define IVPU_MMU_CACHE_WT 2 +#define IVPU_MMU_SH_NSH 0 +#define IVPU_MMU_SH_OSH 2 +#define IVPU_MMU_SH_ISH 3 + +#define IVPU_MMU_CMDQ_OP GENMASK_ULL(7, 0) + +#define IVPU_MMU_CD_0_TCR_T0SZ GENMASK_ULL(5, 0) +#define IVPU_MMU_CD_0_TCR_TG0 GENMASK_ULL(7, 6) +#define IVPU_MMU_CD_0_TCR_IRGN0 GENMASK_ULL(9, 8) +#define IVPU_MMU_CD_0_TCR_ORGN0 GENMASK_ULL(11, 10) +#define IVPU_MMU_CD_0_TCR_SH0 GENMASK_ULL(13, 12) +#define IVPU_MMU_CD_0_TCR_EPD0 BIT_ULL(14) +#define IVPU_MMU_CD_0_TCR_EPD1 BIT_ULL(30) +#define IVPU_MMU_CD_0_ENDI BIT(15) +#define IVPU_MMU_CD_0_V BIT(31) +#define IVPU_MMU_CD_0_TCR_IPS GENMASK_ULL(34, 32) +#define IVPU_MMU_CD_0_TCR_TBI0 BIT_ULL(38) +#define IVPU_MMU_CD_0_AA64 BIT(41) +#define IVPU_MMU_CD_0_S BIT(44) +#define IVPU_MMU_CD_0_R BIT(45) +#define IVPU_MMU_CD_0_A BIT(46) +#define IVPU_MMU_CD_0_ASET BIT(47) +#define IVPU_MMU_CD_0_ASID GENMASK_ULL(63, 48) + +#define IVPU_MMU_CD_1_TTB0_MASK GENMASK_ULL(51, 4) + +#define IVPU_MMU_STE_0_S1CDMAX GENMASK_ULL(63, 59) +#define IVPU_MMU_STE_0_S1FMT GENMASK_ULL(5, 4) +#define IVPU_MMU_STE_0_S1FMT_LINEAR 0 +#define IVPU_MMU_STE_DWORDS 8 +#define IVPU_MMU_STE_0_CFG_S1_TRANS 5 +#define IVPU_MMU_STE_0_CFG GENMASK_ULL(3, 1) +#define IVPU_MMU_STE_0_S1CTXPTR_MASK GENMASK_ULL(51, 6) +#define IVPU_MMU_STE_0_V BIT(0) + +#define IVPU_MMU_STE_1_STRW_NSEL1 0ul +#define IVPU_MMU_STE_1_CONT GENMASK_ULL(16, 13) +#define IVPU_MMU_STE_1_STRW GENMASK_ULL(31, 30) +#define IVPU_MMU_STE_1_PRIVCFG GENMASK_ULL(49, 48) +#define IVPU_MMU_STE_1_PRIVCFG_UNPRIV 2ul +#define IVPU_MMU_STE_1_INSTCFG GENMASK_ULL(51, 50) +#define IVPU_MMU_STE_1_INSTCFG_DATA 2ul +#define IVPU_MMU_STE_1_MEV BIT(19) +#define IVPU_MMU_STE_1_S1STALLD BIT(27) +#define IVPU_MMU_STE_1_S1C_CACHE_NC 0ul +#define IVPU_MMU_STE_1_S1C_CACHE_WBRA 1ul +#define IVPU_MMU_STE_1_S1C_CACHE_WT 2ul +#define IVPU_MMU_STE_1_S1C_CACHE_WB 3ul +#define IVPU_MMU_STE_1_S1CIR GENMASK_ULL(3, 2) +#define IVPU_MMU_STE_1_S1COR GENMASK_ULL(5, 4) +#define IVPU_MMU_STE_1_S1CSH GENMASK_ULL(7, 6) +#define IVPU_MMU_STE_1_S1DSS GENMASK_ULL(1, 0) +#define IVPU_MMU_STE_1_S1DSS_TERMINATE 0x0 + +#define IVPU_MMU_REG_TIMEOUT_US (10 * USEC_PER_MSEC) +#define IVPU_MMU_QUEUE_TIMEOUT_US (100 * USEC_PER_MSEC) + +#define IVPU_MMU_GERROR_ERR_MASK ((REG_FLD(MTL_VPU_HOST_MMU_GERROR, CMDQ)) | \ + (REG_FLD(MTL_VPU_HOST_MMU_GERROR, EVTQ_ABT)) | \ + (REG_FLD(MTL_VPU_HOST_MMU_GERROR, PRIQ_ABT)) | \ + (REG_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_CMDQ_ABT)) | \ + (REG_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_EVTQ_ABT)) | \ + (REG_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_PRIQ_ABT)) | \ + (REG_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_ABT))) + +static char *ivpu_mmu_event_to_str(u32 cmd) +{ + switch (cmd) { + case IVPU_MMU_EVT_F_UUT: + return "Unsupported Upstream Transaction"; + case IVPU_MMU_EVT_C_BAD_STREAMID: + return "Transaction StreamID out of range"; + case IVPU_MMU_EVT_F_STE_FETCH: + return "Fetch of STE caused external abort"; + case IVPU_MMU_EVT_C_BAD_STE: + return "Used STE invalid"; + case IVPU_MMU_EVT_F_BAD_ATS_TREQ: + return "Address Request disallowed for a StreamID"; + case IVPU_MMU_EVT_F_STREAM_DISABLED: + return "Transaction marks non-substream disabled"; + case IVPU_MMU_EVT_F_TRANSL_FORBIDDEN: + return "MMU bypass is disallowed for this StreamID"; + case IVPU_MMU_EVT_C_BAD_SUBSTREAMID: + return "Invalid StreamID"; + case IVPU_MMU_EVT_F_CD_FETCH: + return "Fetch of CD caused external abort"; + case IVPU_MMU_EVT_C_BAD_CD: + return "Fetched CD invalid"; + case IVPU_MMU_EVT_F_WALK_EABT: + return " An external abort occurred fetching a TLB"; + case IVPU_MMU_EVT_F_TRANSLATION: + return "Translation fault"; + case IVPU_MMU_EVT_F_ADDR_SIZE: + return " Output address caused address size fault"; + case IVPU_MMU_EVT_F_ACCESS: + return "Access flag fault"; + case IVPU_MMU_EVT_F_PERMISSION: + return "Permission fault occurred on page access"; + case IVPU_MMU_EVT_F_TLB_CONFLICT: + return "A TLB conflict"; + case IVPU_MMU_EVT_F_CFG_CONFLICT: + return "A configuration cache conflict"; + case IVPU_MMU_EVT_E_PAGE_REQUEST: + return "Page request hint from a client device"; + case IVPU_MMU_EVT_F_VMS_FETCH: + return "Fetch of VMS caused external abort"; + default: + return "Unknown CMDQ command"; + } +} + +static void ivpu_mmu_config_check(struct ivpu_device *vdev) +{ + u32 val_ref; + u32 val; + + if (ivpu_is_simics(vdev)) + val_ref = IVPU_MMU_IDR0_REF_SIMICS; + else + val_ref = IVPU_MMU_IDR0_REF; + + val = REGV_RD32(MTL_VPU_HOST_MMU_IDR0); + if (val != val_ref) + ivpu_dbg(vdev, MMU, "IDR0 0x%x != IDR0_REF 0x%x\n", val, val_ref); + + val = REGV_RD32(MTL_VPU_HOST_MMU_IDR1); + if (val != IVPU_MMU_IDR1_REF) + ivpu_dbg(vdev, MMU, "IDR1 0x%x != IDR1_REF 0x%x\n", val, IVPU_MMU_IDR1_REF); + + val = REGV_RD32(MTL_VPU_HOST_MMU_IDR3); + if (val != IVPU_MMU_IDR3_REF) + ivpu_dbg(vdev, MMU, "IDR3 0x%x != IDR3_REF 0x%x\n", val, IVPU_MMU_IDR3_REF); + + if (ivpu_is_simics(vdev)) + val_ref = IVPU_MMU_IDR5_REF_SIMICS; + else if (ivpu_is_fpga(vdev)) + val_ref = IVPU_MMU_IDR5_REF_FPGA; + else + val_ref = IVPU_MMU_IDR5_REF; + + val = REGV_RD32(MTL_VPU_HOST_MMU_IDR5); + if (val != val_ref) + ivpu_dbg(vdev, MMU, "IDR5 0x%x != IDR5_REF 0x%x\n", val, val_ref); +} + +static int ivpu_mmu_cdtab_alloc(struct ivpu_device *vdev) +{ + struct ivpu_mmu_info *mmu = vdev->mmu; + struct ivpu_mmu_cdtab *cdtab = &mmu->cdtab; + size_t size = IVPU_MMU_CDTAB_ENT_COUNT * IVPU_MMU_CDTAB_ENT_SIZE; + + cdtab->base = dmam_alloc_coherent(vdev->drm.dev, size, &cdtab->dma, GFP_KERNEL); + if (!cdtab->base) + return -ENOMEM; + + ivpu_dbg(vdev, MMU, "CDTAB alloc: dma=%pad size=%zu\n", &cdtab->dma, size); + + return 0; +} + +static int ivpu_mmu_strtab_alloc(struct ivpu_device *vdev) +{ + struct ivpu_mmu_info *mmu = vdev->mmu; + struct ivpu_mmu_strtab *strtab = &mmu->strtab; + size_t size = IVPU_MMU_STRTAB_ENT_COUNT * IVPU_MMU_STRTAB_ENT_SIZE; + + strtab->base = dmam_alloc_coherent(vdev->drm.dev, size, &strtab->dma, GFP_KERNEL); + if (!strtab->base) + return -ENOMEM; + + strtab->base_cfg = IVPU_MMU_STRTAB_CFG; + strtab->dma_q = IVPU_MMU_STRTAB_BASE_RA; + strtab->dma_q |= strtab->dma & IVPU_MMU_STRTAB_BASE_ADDR_MASK; + + ivpu_dbg(vdev, MMU, "STRTAB alloc: dma=%pad dma_q=%pad size=%zu\n", + &strtab->dma, &strtab->dma_q, size); + + return 0; +} + +static int ivpu_mmu_cmdq_alloc(struct ivpu_device *vdev) +{ + struct ivpu_mmu_info *mmu = vdev->mmu; + struct ivpu_mmu_queue *q = &mmu->cmdq; + + q->base = dmam_alloc_coherent(vdev->drm.dev, IVPU_MMU_CMDQ_SIZE, &q->dma, GFP_KERNEL); + if (!q->base) + return -ENOMEM; + + q->dma_q = IVPU_MMU_Q_BASE_RWA; + q->dma_q |= q->dma & IVPU_MMU_Q_BASE_ADDR_MASK; + q->dma_q |= IVPU_MMU_Q_COUNT_LOG2; + + ivpu_dbg(vdev, MMU, "CMDQ alloc: dma=%pad dma_q=%pad size=%u\n", + &q->dma, &q->dma_q, IVPU_MMU_CMDQ_SIZE); + + return 0; +} + +static int ivpu_mmu_evtq_alloc(struct ivpu_device *vdev) +{ + struct ivpu_mmu_info *mmu = vdev->mmu; + struct ivpu_mmu_queue *q = &mmu->evtq; + + q->base = dmam_alloc_coherent(vdev->drm.dev, IVPU_MMU_EVTQ_SIZE, &q->dma, GFP_KERNEL); + if (!q->base) + return -ENOMEM; + + q->dma_q = IVPU_MMU_Q_BASE_RWA; + q->dma_q |= q->dma & IVPU_MMU_Q_BASE_ADDR_MASK; + q->dma_q |= IVPU_MMU_Q_COUNT_LOG2; + + ivpu_dbg(vdev, MMU, "EVTQ alloc: dma=%pad dma_q=%pad size=%u\n", + &q->dma, &q->dma_q, IVPU_MMU_EVTQ_SIZE); + + return 0; +} + +static int ivpu_mmu_structs_alloc(struct ivpu_device *vdev) +{ + int ret; + + ret = ivpu_mmu_cdtab_alloc(vdev); + if (ret) { + ivpu_err(vdev, "Failed to allocate cdtab: %d\n", ret); + return ret; + } + + ret = ivpu_mmu_strtab_alloc(vdev); + if (ret) { + ivpu_err(vdev, "Failed to allocate strtab: %d\n", ret); + return ret; + } + + ret = ivpu_mmu_cmdq_alloc(vdev); + if (ret) { + ivpu_err(vdev, "Failed to allocate cmdq: %d\n", ret); + return ret; + } + + ret = ivpu_mmu_evtq_alloc(vdev); + if (ret) + ivpu_err(vdev, "Failed to allocate evtq: %d\n", ret); + + return ret; +} + +static int ivpu_mmu_reg_write(struct ivpu_device *vdev, u32 reg, u32 val) +{ + u32 reg_ack = reg + 4; /* ACK register is 4B after base register */ + u32 val_ack; + int ret; + + REGV_WR32(reg, val); + + ret = REGV_POLL(reg_ack, val_ack, (val == val_ack), IVPU_MMU_REG_TIMEOUT_US); + if (ret) + ivpu_err(vdev, "Failed to write register 0x%x\n", reg); + + return ret; +} + +static int ivpu_mmu_irqs_setup(struct ivpu_device *vdev) +{ + u32 irq_ctrl = IVPU_MMU_IRQ_EVTQ_EN | IVPU_MMU_IRQ_GERROR_EN; + int ret; + + ret = ivpu_mmu_reg_write(vdev, MTL_VPU_HOST_MMU_IRQ_CTRL, 0); + if (ret) + return ret; + + return ivpu_mmu_reg_write(vdev, MTL_VPU_HOST_MMU_IRQ_CTRL, irq_ctrl); +} + +static int ivpu_mmu_cmdq_wait_for_cons(struct ivpu_device *vdev) +{ + struct ivpu_mmu_queue *cmdq = &vdev->mmu->cmdq; + + return REGV_POLL(MTL_VPU_HOST_MMU_CMDQ_CONS, cmdq->cons, (cmdq->prod == cmdq->cons), + IVPU_MMU_QUEUE_TIMEOUT_US); +} + +static int ivpu_mmu_cmdq_cmd_write(struct ivpu_device *vdev, const char *name, u64 data0, u64 data1) +{ + struct ivpu_mmu_queue *q = &vdev->mmu->cmdq; + u64 *queue_buffer = q->base; + int idx = IVPU_MMU_Q_IDX(q->prod) * (IVPU_MMU_CMDQ_CMD_SIZE / sizeof(*queue_buffer)); + + if (!CIRC_SPACE(IVPU_MMU_Q_IDX(q->prod), IVPU_MMU_Q_IDX(q->cons), IVPU_MMU_Q_COUNT)) { + ivpu_err(vdev, "Failed to write MMU CMD %s\n", name); + return -EBUSY; + } + + queue_buffer[idx] = data0; + queue_buffer[idx + 1] = data1; + q->prod = (q->prod + 1) & IVPU_MMU_Q_WRAP_MASK; + + ivpu_dbg(vdev, MMU, "CMD write: %s data: 0x%llx 0x%llx\n", name, data0, data1); + + return 0; +} + +static int ivpu_mmu_cmdq_sync(struct ivpu_device *vdev) +{ + struct ivpu_mmu_queue *q = &vdev->mmu->cmdq; + u64 val; + int ret; + + val = FIELD_PREP(IVPU_MMU_CMD_OPCODE, CMD_SYNC) | + FIELD_PREP(IVPU_MMU_CMD_SYNC_0_CS, 0x2) | + FIELD_PREP(IVPU_MMU_CMD_SYNC_0_MSH, 0x3) | + FIELD_PREP(IVPU_MMU_CMD_SYNC_0_MSI_ATTR, 0xf); + + ret = ivpu_mmu_cmdq_cmd_write(vdev, "SYNC", val, 0); + if (ret) + return ret; + + clflush_cache_range(q->base, IVPU_MMU_CMDQ_SIZE); + REGV_WR32(MTL_VPU_HOST_MMU_CMDQ_PROD, q->prod); + + ret = ivpu_mmu_cmdq_wait_for_cons(vdev); + if (ret) + ivpu_err(vdev, "Timed out waiting for consumer: %d\n", ret); + + return ret; +} + +static int ivpu_mmu_cmdq_write_cfgi_all(struct ivpu_device *vdev) +{ + u64 data0 = FIELD_PREP(IVPU_MMU_CMD_OPCODE, CMD_CFGI_ALL); + u64 data1 = FIELD_PREP(IVPU_MMU_CMD_CFGI_1_RANGE, 0x1f); + + return ivpu_mmu_cmdq_cmd_write(vdev, "CFGI_ALL", data0, data1); +} + +static int ivpu_mmu_cmdq_write_tlbi_nh_asid(struct ivpu_device *vdev, u16 ssid) +{ + u64 val = FIELD_PREP(IVPU_MMU_CMD_OPCODE, CMD_TLBI_NH_ASID) | + FIELD_PREP(IVPU_MMU_CMD_TLBI_0_ASID, ssid); + + return ivpu_mmu_cmdq_cmd_write(vdev, "TLBI_NH_ASID", val, 0); +} + +static int ivpu_mmu_cmdq_write_tlbi_nsnh_all(struct ivpu_device *vdev) +{ + u64 val = FIELD_PREP(IVPU_MMU_CMD_OPCODE, CMD_TLBI_NSNH_ALL); + + return ivpu_mmu_cmdq_cmd_write(vdev, "TLBI_NSNH_ALL", val, 0); +} + +static int ivpu_mmu_reset(struct ivpu_device *vdev) +{ + struct ivpu_mmu_info *mmu = vdev->mmu; + u32 val; + int ret; + + memset(mmu->cmdq.base, 0, IVPU_MMU_CMDQ_SIZE); + clflush_cache_range(mmu->cmdq.base, IVPU_MMU_CMDQ_SIZE); + mmu->cmdq.prod = 0; + mmu->cmdq.cons = 0; + + memset(mmu->evtq.base, 0, IVPU_MMU_EVTQ_SIZE); + clflush_cache_range(mmu->evtq.base, IVPU_MMU_EVTQ_SIZE); + mmu->evtq.prod = 0; + mmu->evtq.cons = 0; + + ret = ivpu_mmu_reg_write(vdev, MTL_VPU_HOST_MMU_CR0, 0); + if (ret) + return ret; + + val = FIELD_PREP(IVPU_MMU_CR1_TABLE_SH, IVPU_MMU_SH_ISH) | + FIELD_PREP(IVPU_MMU_CR1_TABLE_OC, IVPU_MMU_CACHE_WB) | + FIELD_PREP(IVPU_MMU_CR1_TABLE_IC, IVPU_MMU_CACHE_WB) | + FIELD_PREP(IVPU_MMU_CR1_QUEUE_SH, IVPU_MMU_SH_ISH) | + FIELD_PREP(IVPU_MMU_CR1_QUEUE_OC, IVPU_MMU_CACHE_WB) | + FIELD_PREP(IVPU_MMU_CR1_QUEUE_IC, IVPU_MMU_CACHE_WB); + REGV_WR32(MTL_VPU_HOST_MMU_CR1, val); + + REGV_WR64(MTL_VPU_HOST_MMU_STRTAB_BASE, mmu->strtab.dma_q); + REGV_WR32(MTL_VPU_HOST_MMU_STRTAB_BASE_CFG, mmu->strtab.base_cfg); + + REGV_WR64(MTL_VPU_HOST_MMU_CMDQ_BASE, mmu->cmdq.dma_q); + REGV_WR32(MTL_VPU_HOST_MMU_CMDQ_PROD, 0); + REGV_WR32(MTL_VPU_HOST_MMU_CMDQ_CONS, 0); + + val = IVPU_MMU_CR0_CMDQEN; + ret = ivpu_mmu_reg_write(vdev, MTL_VPU_HOST_MMU_CR0, val); + if (ret) + return ret; + + ret = ivpu_mmu_cmdq_write_cfgi_all(vdev); + if (ret) + return ret; + + ret = ivpu_mmu_cmdq_write_tlbi_nsnh_all(vdev); + if (ret) + return ret; + + ret = ivpu_mmu_cmdq_sync(vdev); + if (ret) + return ret; + + REGV_WR64(MTL_VPU_HOST_MMU_EVTQ_BASE, mmu->evtq.dma_q); + REGV_WR32(MTL_VPU_HOST_MMU_EVTQ_PROD_SEC, 0); + REGV_WR32(MTL_VPU_HOST_MMU_EVTQ_CONS_SEC, 0); + + val |= IVPU_MMU_CR0_EVTQEN; + ret = ivpu_mmu_reg_write(vdev, MTL_VPU_HOST_MMU_CR0, val); + if (ret) + return ret; + + val |= IVPU_MMU_CR0_ATSCHK; + ret = ivpu_mmu_reg_write(vdev, MTL_VPU_HOST_MMU_CR0, val); + if (ret) + return ret; + + ret = ivpu_mmu_irqs_setup(vdev); + if (ret) + return ret; + + val |= IVPU_MMU_CR0_SMMUEN; + return ivpu_mmu_reg_write(vdev, MTL_VPU_HOST_MMU_CR0, val); +} + +static void ivpu_mmu_strtab_link_cd(struct ivpu_device *vdev, u32 sid) +{ + struct ivpu_mmu_info *mmu = vdev->mmu; + struct ivpu_mmu_strtab *strtab = &mmu->strtab; + struct ivpu_mmu_cdtab *cdtab = &mmu->cdtab; + u64 *entry = strtab->base + (sid * IVPU_MMU_STRTAB_ENT_SIZE); + u64 str[2]; + + str[0] = FIELD_PREP(IVPU_MMU_STE_0_CFG, IVPU_MMU_STE_0_CFG_S1_TRANS) | + FIELD_PREP(IVPU_MMU_STE_0_S1CDMAX, IVPU_MMU_CDTAB_ENT_COUNT_LOG2) | + FIELD_PREP(IVPU_MMU_STE_0_S1FMT, IVPU_MMU_STE_0_S1FMT_LINEAR) | + IVPU_MMU_STE_0_V | + (cdtab->dma & IVPU_MMU_STE_0_S1CTXPTR_MASK); + + str[1] = FIELD_PREP(IVPU_MMU_STE_1_S1DSS, IVPU_MMU_STE_1_S1DSS_TERMINATE) | + FIELD_PREP(IVPU_MMU_STE_1_S1CIR, IVPU_MMU_STE_1_S1C_CACHE_NC) | + FIELD_PREP(IVPU_MMU_STE_1_S1COR, IVPU_MMU_STE_1_S1C_CACHE_NC) | + FIELD_PREP(IVPU_MMU_STE_1_S1CSH, IVPU_MMU_SH_NSH) | + FIELD_PREP(IVPU_MMU_STE_1_PRIVCFG, IVPU_MMU_STE_1_PRIVCFG_UNPRIV) | + FIELD_PREP(IVPU_MMU_STE_1_INSTCFG, IVPU_MMU_STE_1_INSTCFG_DATA) | + FIELD_PREP(IVPU_MMU_STE_1_STRW, IVPU_MMU_STE_1_STRW_NSEL1) | + FIELD_PREP(IVPU_MMU_STE_1_CONT, IVPU_MMU_STRTAB_CFG_LOG2SIZE) | + IVPU_MMU_STE_1_MEV | + IVPU_MMU_STE_1_S1STALLD; + + WRITE_ONCE(entry[1], str[1]); + WRITE_ONCE(entry[0], str[0]); + + clflush_cache_range(entry, IVPU_MMU_STRTAB_ENT_SIZE); + + ivpu_dbg(vdev, MMU, "STRTAB write entry (SSID=%u): 0x%llx, 0x%llx\n", sid, str[0], str[1]); +} + +static int ivpu_mmu_strtab_init(struct ivpu_device *vdev) +{ + ivpu_mmu_strtab_link_cd(vdev, IVPU_MMU_STREAM_ID0); + ivpu_mmu_strtab_link_cd(vdev, IVPU_MMU_STREAM_ID3); + + return 0; +} + +int ivpu_mmu_invalidate_tlb(struct ivpu_device *vdev, u16 ssid) +{ + struct ivpu_mmu_info *mmu = vdev->mmu; + int ret; + + ret = mutex_lock_interruptible(&mmu->lock); + if (ret) + return ret; + + if (!mmu->on) { + ret = 0; + goto unlock; + } + + ret = ivpu_mmu_cmdq_write_tlbi_nh_asid(vdev, ssid); + if (ret) + goto unlock; + + ret = ivpu_mmu_cmdq_sync(vdev); +unlock: + mutex_unlock(&mmu->lock); + return ret; +} + +static int ivpu_mmu_cd_add(struct ivpu_device *vdev, u32 ssid, u64 cd_dma) +{ + struct ivpu_mmu_info *mmu = vdev->mmu; + struct ivpu_mmu_cdtab *cdtab = &mmu->cdtab; + u64 *entry; + u64 cd[4]; + int ret; + + if (ssid > IVPU_MMU_CDTAB_ENT_COUNT) + return -EINVAL; + + entry = cdtab->base + (ssid * IVPU_MMU_CDTAB_ENT_SIZE); + + if (cd_dma != 0) { + cd[0] = FIELD_PREP(IVPU_MMU_CD_0_TCR_T0SZ, 26) | + FIELD_PREP(IVPU_MMU_CD_0_TCR_TG0, 0) | + FIELD_PREP(IVPU_MMU_CD_0_TCR_IRGN0, 0) | + FIELD_PREP(IVPU_MMU_CD_0_TCR_ORGN0, 0) | + FIELD_PREP(IVPU_MMU_CD_0_TCR_SH0, 0) | + FIELD_PREP(IVPU_MMU_CD_0_TCR_IPS, 3) | + FIELD_PREP(IVPU_MMU_CD_0_ASID, ssid) | + IVPU_MMU_CD_0_TCR_EPD1 | + IVPU_MMU_CD_0_AA64 | + IVPU_MMU_CD_0_R | + IVPU_MMU_CD_0_ASET | + IVPU_MMU_CD_0_V; + cd[1] = cd_dma & IVPU_MMU_CD_1_TTB0_MASK; + cd[2] = 0; + cd[3] = 0x0000000000007444; + + /* For global context generate memory fault on VPU */ + if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID) + cd[0] |= IVPU_MMU_CD_0_A; + } else { + memset(cd, 0, sizeof(cd)); + } + + WRITE_ONCE(entry[1], cd[1]); + WRITE_ONCE(entry[2], cd[2]); + WRITE_ONCE(entry[3], cd[3]); + WRITE_ONCE(entry[0], cd[0]); + + clflush_cache_range(entry, IVPU_MMU_CDTAB_ENT_SIZE); + + ivpu_dbg(vdev, MMU, "CDTAB %s entry (SSID=%u, dma=%pad): 0x%llx, 0x%llx, 0x%llx, 0x%llx\n", + cd_dma ? "write" : "clear", ssid, &cd_dma, cd[0], cd[1], cd[2], cd[3]); + + ret = mutex_lock_interruptible(&mmu->lock); + if (ret) + return ret; + + if (!mmu->on) { + ret = 0; + goto unlock; + } + + ret = ivpu_mmu_cmdq_write_cfgi_all(vdev); + if (ret) + goto unlock; + + ret = ivpu_mmu_cmdq_sync(vdev); +unlock: + mutex_unlock(&mmu->lock); + return ret; +} + +static int ivpu_mmu_cd_add_gbl(struct ivpu_device *vdev) +{ + int ret; + + ret = ivpu_mmu_cd_add(vdev, 0, vdev->gctx.pgtable.pgd_dma); + if (ret) + ivpu_err(vdev, "Failed to add global CD entry: %d\n", ret); + + return ret; +} + +static int ivpu_mmu_cd_add_user(struct ivpu_device *vdev, u32 ssid, dma_addr_t cd_dma) +{ + int ret; + + if (ssid == 0) { + ivpu_err(vdev, "Invalid SSID: %u\n", ssid); + return -EINVAL; + } + + ret = ivpu_mmu_cd_add(vdev, ssid, cd_dma); + if (ret) + ivpu_err(vdev, "Failed to add CD entry SSID=%u: %d\n", ssid, ret); + + return ret; +} + +int ivpu_mmu_init(struct ivpu_device *vdev) +{ + struct ivpu_mmu_info *mmu = vdev->mmu; + int ret; + + ivpu_dbg(vdev, MMU, "Init..\n"); + + drmm_mutex_init(&vdev->drm, &mmu->lock); + ivpu_mmu_config_check(vdev); + + ret = ivpu_mmu_structs_alloc(vdev); + if (ret) + return ret; + + ret = ivpu_mmu_strtab_init(vdev); + if (ret) { + ivpu_err(vdev, "Failed to initialize strtab: %d\n", ret); + return ret; + } + + ret = ivpu_mmu_cd_add_gbl(vdev); + if (ret) { + ivpu_err(vdev, "Failed to initialize strtab: %d\n", ret); + return ret; + } + + ret = ivpu_mmu_enable(vdev); + if (ret) { + ivpu_err(vdev, "Failed to resume MMU: %d\n", ret); + return ret; + } + + ivpu_dbg(vdev, MMU, "Init done\n"); + + return 0; +} + +int ivpu_mmu_enable(struct ivpu_device *vdev) +{ + struct ivpu_mmu_info *mmu = vdev->mmu; + int ret; + + mutex_lock(&mmu->lock); + + mmu->on = true; + + ret = ivpu_mmu_reset(vdev); + if (ret) { + ivpu_err(vdev, "Failed to reset MMU: %d\n", ret); + goto err; + } + + ret = ivpu_mmu_cmdq_write_cfgi_all(vdev); + if (ret) + goto err; + + ret = ivpu_mmu_cmdq_write_tlbi_nsnh_all(vdev); + if (ret) + goto err; + + ret = ivpu_mmu_cmdq_sync(vdev); + if (ret) + goto err; + + mutex_unlock(&mmu->lock); + + return 0; +err: + mmu->on = false; + mutex_unlock(&mmu->lock); + return ret; +} + +void ivpu_mmu_disable(struct ivpu_device *vdev) +{ + struct ivpu_mmu_info *mmu = vdev->mmu; + + mutex_lock(&mmu->lock); + mmu->on = false; + mutex_unlock(&mmu->lock); +} + +static void ivpu_mmu_dump_event(struct ivpu_device *vdev, u32 *event) +{ + u32 ssid = FIELD_GET(IVPU_MMU_EVT_SSID_MASK, event[0]); + u32 op = FIELD_GET(IVPU_MMU_EVT_OP_MASK, event[0]); + u64 fetch_addr = ((u64)event[7]) << 32 | event[6]; + u64 in_addr = ((u64)event[5]) << 32 | event[4]; + u32 sid = event[1]; + + ivpu_err(vdev, "MMU EVTQ: 0x%x (%s) SSID: %d SID: %d, e[2] %08x, e[3] %08x, in addr: 0x%llx, fetch addr: 0x%llx\n", + op, ivpu_mmu_event_to_str(op), ssid, sid, event[2], event[3], in_addr, fetch_addr); +} + +static u32 *ivpu_mmu_get_event(struct ivpu_device *vdev) +{ + struct ivpu_mmu_queue *evtq = &vdev->mmu->evtq; + u32 idx = IVPU_MMU_Q_IDX(evtq->cons); + u32 *evt = evtq->base + (idx * IVPU_MMU_EVTQ_CMD_SIZE); + + evtq->prod = REGV_RD32(MTL_VPU_HOST_MMU_EVTQ_PROD_SEC); + if (!CIRC_CNT(IVPU_MMU_Q_IDX(evtq->prod), IVPU_MMU_Q_IDX(evtq->cons), IVPU_MMU_Q_COUNT)) + return NULL; + + clflush_cache_range(evt, IVPU_MMU_EVTQ_CMD_SIZE); + + evtq->cons = (evtq->cons + 1) & IVPU_MMU_Q_WRAP_MASK; + REGV_WR32(MTL_VPU_HOST_MMU_EVTQ_CONS_SEC, evtq->cons); + + return evt; +} + +void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev) +{ + bool schedule_recovery = false; + u32 *event; + u32 ssid; + + ivpu_dbg(vdev, IRQ, "MMU event queue\n"); + + while ((event = ivpu_mmu_get_event(vdev)) != NULL) { + ivpu_mmu_dump_event(vdev, event); + + ssid = FIELD_GET(IVPU_MMU_EVT_SSID_MASK, event[0]); + if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID) + schedule_recovery = true; + else + ivpu_mmu_user_context_mark_invalid(vdev, ssid); + } + + if (schedule_recovery) + ivpu_pm_schedule_recovery(vdev); +} + +void ivpu_mmu_irq_gerr_handler(struct ivpu_device *vdev) +{ + u32 gerror_val, gerrorn_val, active; + + ivpu_dbg(vdev, IRQ, "MMU error\n"); + + gerror_val = REGV_RD32(MTL_VPU_HOST_MMU_GERROR); + gerrorn_val = REGV_RD32(MTL_VPU_HOST_MMU_GERRORN); + + active = gerror_val ^ gerrorn_val; + if (!(active & IVPU_MMU_GERROR_ERR_MASK)) + return; + + if (REG_TEST_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_ABT, active)) + ivpu_warn_ratelimited(vdev, "MMU MSI ABT write aborted\n"); + + if (REG_TEST_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_PRIQ_ABT, active)) + ivpu_warn_ratelimited(vdev, "MMU PRIQ MSI ABT write aborted\n"); + + if (REG_TEST_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_EVTQ_ABT, active)) + ivpu_warn_ratelimited(vdev, "MMU EVTQ MSI ABT write aborted\n"); + + if (REG_TEST_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_CMDQ_ABT, active)) + ivpu_warn_ratelimited(vdev, "MMU CMDQ MSI ABT write aborted\n"); + + if (REG_TEST_FLD(MTL_VPU_HOST_MMU_GERROR, PRIQ_ABT, active)) + ivpu_err_ratelimited(vdev, "MMU PRIQ write aborted\n"); + + if (REG_TEST_FLD(MTL_VPU_HOST_MMU_GERROR, EVTQ_ABT, active)) + ivpu_err_ratelimited(vdev, "MMU EVTQ write aborted\n"); + + if (REG_TEST_FLD(MTL_VPU_HOST_MMU_GERROR, CMDQ, active)) + ivpu_err_ratelimited(vdev, "MMU CMDQ write aborted\n"); + + REGV_WR32(MTL_VPU_HOST_MMU_GERRORN, gerror_val); +} + +int ivpu_mmu_set_pgtable(struct ivpu_device *vdev, int ssid, struct ivpu_mmu_pgtable *pgtable) +{ + return ivpu_mmu_cd_add_user(vdev, ssid, pgtable->pgd_dma); +} + +void ivpu_mmu_clear_pgtable(struct ivpu_device *vdev, int ssid) +{ + ivpu_mmu_cd_add_user(vdev, ssid, 0); /* 0 will clear CD entry */ +} diff --git a/drivers/accel/ivpu/ivpu_mmu.h b/drivers/accel/ivpu/ivpu_mmu.h new file mode 100644 index 000000000000..cb551126806b --- /dev/null +++ b/drivers/accel/ivpu/ivpu_mmu.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#ifndef __IVPU_MMU_H__ +#define __IVPU_MMU_H__ + +struct ivpu_device; + +struct ivpu_mmu_cdtab { + void *base; + dma_addr_t dma; +}; + +struct ivpu_mmu_strtab { + void *base; + dma_addr_t dma; + u64 dma_q; + u32 base_cfg; +}; + +struct ivpu_mmu_queue { + void *base; + dma_addr_t dma; + u64 dma_q; + u32 prod; + u32 cons; +}; + +struct ivpu_mmu_info { + struct mutex lock; /* Protects cdtab, strtab, cmdq, on */ + struct ivpu_mmu_cdtab cdtab; + struct ivpu_mmu_strtab strtab; + struct ivpu_mmu_queue cmdq; + struct ivpu_mmu_queue evtq; + bool on; +}; + +int ivpu_mmu_init(struct ivpu_device *vdev); +void ivpu_mmu_disable(struct ivpu_device *vdev); +int ivpu_mmu_enable(struct ivpu_device *vdev); +int ivpu_mmu_set_pgtable(struct ivpu_device *vdev, int ssid, struct ivpu_mmu_pgtable *pgtable); +void ivpu_mmu_clear_pgtable(struct ivpu_device *vdev, int ssid); +int ivpu_mmu_invalidate_tlb(struct ivpu_device *vdev, u16 ssid); + +void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev); +void ivpu_mmu_irq_gerr_handler(struct ivpu_device *vdev); + +#endif /* __IVPU_MMU_H__ */ diff --git a/drivers/accel/ivpu/ivpu_mmu_context.c b/drivers/accel/ivpu/ivpu_mmu_context.c new file mode 100644 index 000000000000..8ce9b12ac356 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_mmu_context.c @@ -0,0 +1,398 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#include <linux/bitfield.h> +#include <linux/highmem.h> + +#include "ivpu_drv.h" +#include "ivpu_hw.h" +#include "ivpu_mmu.h" +#include "ivpu_mmu_context.h" + +#define IVPU_MMU_PGD_INDEX_MASK GENMASK(38, 30) +#define IVPU_MMU_PMD_INDEX_MASK GENMASK(29, 21) +#define IVPU_MMU_PTE_INDEX_MASK GENMASK(20, 12) +#define IVPU_MMU_ENTRY_FLAGS_MASK GENMASK(11, 0) +#define IVPU_MMU_ENTRY_FLAG_NG BIT(11) +#define IVPU_MMU_ENTRY_FLAG_AF BIT(10) +#define IVPU_MMU_ENTRY_FLAG_USER BIT(6) +#define IVPU_MMU_ENTRY_FLAG_LLC_COHERENT BIT(2) +#define IVPU_MMU_ENTRY_FLAG_TYPE_PAGE BIT(1) +#define IVPU_MMU_ENTRY_FLAG_VALID BIT(0) + +#define IVPU_MMU_PAGE_SIZE SZ_4K +#define IVPU_MMU_PTE_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PAGE_SIZE) +#define IVPU_MMU_PMD_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PTE_MAP_SIZE) +#define IVPU_MMU_PGTABLE_SIZE (IVPU_MMU_PGTABLE_ENTRIES * sizeof(u64)) + +#define IVPU_MMU_DUMMY_ADDRESS 0xdeadb000 +#define IVPU_MMU_ENTRY_VALID (IVPU_MMU_ENTRY_FLAG_TYPE_PAGE | IVPU_MMU_ENTRY_FLAG_VALID) +#define IVPU_MMU_ENTRY_INVALID (IVPU_MMU_DUMMY_ADDRESS & ~IVPU_MMU_ENTRY_FLAGS_MASK) +#define IVPU_MMU_ENTRY_MAPPED (IVPU_MMU_ENTRY_FLAG_AF | IVPU_MMU_ENTRY_FLAG_USER | \ + IVPU_MMU_ENTRY_FLAG_NG | IVPU_MMU_ENTRY_VALID) + +static int ivpu_mmu_pgtable_init(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable) +{ + dma_addr_t pgd_dma; + u64 *pgd; + + pgd = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pgd_dma, GFP_KERNEL); + if (!pgd) + return -ENOMEM; + + pgtable->pgd = pgd; + pgtable->pgd_dma = pgd_dma; + + return 0; +} + +static void ivpu_mmu_pgtable_free(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable) +{ + int pgd_index, pmd_index; + + for (pgd_index = 0; pgd_index < IVPU_MMU_PGTABLE_ENTRIES; ++pgd_index) { + u64 **pmd_entries = pgtable->pgd_cpu_entries[pgd_index]; + u64 *pmd = pgtable->pgd_entries[pgd_index]; + + if (!pmd_entries) + continue; + + for (pmd_index = 0; pmd_index < IVPU_MMU_PGTABLE_ENTRIES; ++pmd_index) { + if (pmd_entries[pmd_index]) + dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, + pmd_entries[pmd_index], + pmd[pmd_index] & ~IVPU_MMU_ENTRY_FLAGS_MASK); + } + + kfree(pmd_entries); + dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, pgtable->pgd_entries[pgd_index], + pgtable->pgd[pgd_index] & ~IVPU_MMU_ENTRY_FLAGS_MASK); + } + + dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, pgtable->pgd, + pgtable->pgd_dma & ~IVPU_MMU_ENTRY_FLAGS_MASK); +} + +static u64* +ivpu_mmu_ensure_pmd(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable, u64 pgd_index) +{ + u64 **pmd_entries; + dma_addr_t pmd_dma; + u64 *pmd; + + if (pgtable->pgd_entries[pgd_index]) + return pgtable->pgd_entries[pgd_index]; + + pmd = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pmd_dma, GFP_KERNEL); + if (!pmd) + return NULL; + + pmd_entries = kzalloc(IVPU_MMU_PGTABLE_SIZE, GFP_KERNEL); + if (!pmd_entries) + goto err_free_pgd; + + pgtable->pgd_entries[pgd_index] = pmd; + pgtable->pgd_cpu_entries[pgd_index] = pmd_entries; + pgtable->pgd[pgd_index] = pmd_dma | IVPU_MMU_ENTRY_VALID; + + return pmd; + +err_free_pgd: + dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, pmd, pmd_dma); + return NULL; +} + +static u64* +ivpu_mmu_ensure_pte(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable, + int pgd_index, int pmd_index) +{ + dma_addr_t pte_dma; + u64 *pte; + + if (pgtable->pgd_cpu_entries[pgd_index][pmd_index]) + return pgtable->pgd_cpu_entries[pgd_index][pmd_index]; + + pte = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pte_dma, GFP_KERNEL); + if (!pte) + return NULL; + + pgtable->pgd_cpu_entries[pgd_index][pmd_index] = pte; + pgtable->pgd_entries[pgd_index][pmd_index] = pte_dma | IVPU_MMU_ENTRY_VALID; + + return pte; +} + +static int +ivpu_mmu_context_map_page(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, + u64 vpu_addr, dma_addr_t dma_addr, int prot) +{ + u64 *pte; + int pgd_index = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr); + int pmd_index = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr); + int pte_index = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr); + + /* Allocate PMD - second level page table if needed */ + if (!ivpu_mmu_ensure_pmd(vdev, &ctx->pgtable, pgd_index)) + return -ENOMEM; + + /* Allocate PTE - third level page table if needed */ + pte = ivpu_mmu_ensure_pte(vdev, &ctx->pgtable, pgd_index, pmd_index); + if (!pte) + return -ENOMEM; + + /* Update PTE - third level page table with DMA address */ + pte[pte_index] = dma_addr | prot; + + return 0; +} + +static void ivpu_mmu_context_unmap_page(struct ivpu_mmu_context *ctx, u64 vpu_addr) +{ + int pgd_index = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr); + int pmd_index = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr); + int pte_index = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr); + + /* Update PTE with dummy physical address and clear flags */ + ctx->pgtable.pgd_cpu_entries[pgd_index][pmd_index][pte_index] = IVPU_MMU_ENTRY_INVALID; +} + +static void +ivpu_mmu_context_flush_page_tables(struct ivpu_mmu_context *ctx, u64 vpu_addr, size_t size) +{ + u64 end_addr = vpu_addr + size; + u64 *pgd = ctx->pgtable.pgd; + + /* Align to PMD entry (2 MB) */ + vpu_addr &= ~(IVPU_MMU_PTE_MAP_SIZE - 1); + + while (vpu_addr < end_addr) { + int pgd_index = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr); + u64 pmd_end = (pgd_index + 1) * (u64)IVPU_MMU_PMD_MAP_SIZE; + u64 *pmd = ctx->pgtable.pgd_entries[pgd_index]; + + while (vpu_addr < end_addr && vpu_addr < pmd_end) { + int pmd_index = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr); + u64 *pte = ctx->pgtable.pgd_cpu_entries[pgd_index][pmd_index]; + + clflush_cache_range(pte, IVPU_MMU_PGTABLE_SIZE); + vpu_addr += IVPU_MMU_PTE_MAP_SIZE; + } + clflush_cache_range(pmd, IVPU_MMU_PGTABLE_SIZE); + } + clflush_cache_range(pgd, IVPU_MMU_PGTABLE_SIZE); +} + +static int +ivpu_mmu_context_map_pages(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, + u64 vpu_addr, dma_addr_t dma_addr, size_t size, int prot) +{ + while (size) { + int ret = ivpu_mmu_context_map_page(vdev, ctx, vpu_addr, dma_addr, prot); + + if (ret) + return ret; + + vpu_addr += IVPU_MMU_PAGE_SIZE; + dma_addr += IVPU_MMU_PAGE_SIZE; + size -= IVPU_MMU_PAGE_SIZE; + } + + return 0; +} + +static void ivpu_mmu_context_unmap_pages(struct ivpu_mmu_context *ctx, u64 vpu_addr, size_t size) +{ + while (size) { + ivpu_mmu_context_unmap_page(ctx, vpu_addr); + vpu_addr += IVPU_MMU_PAGE_SIZE; + size -= IVPU_MMU_PAGE_SIZE; + } +} + +int +ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, + u64 vpu_addr, struct sg_table *sgt, bool llc_coherent) +{ + struct scatterlist *sg; + int prot; + int ret; + u64 i; + + if (!IS_ALIGNED(vpu_addr, IVPU_MMU_PAGE_SIZE)) + return -EINVAL; + /* + * VPU is only 32 bit, but DMA engine is 38 bit + * Ranges < 2 GB are reserved for VPU internal registers + * Limit range to 8 GB + */ + if (vpu_addr < SZ_2G || vpu_addr > SZ_8G) + return -EINVAL; + + prot = IVPU_MMU_ENTRY_MAPPED; + if (llc_coherent) + prot |= IVPU_MMU_ENTRY_FLAG_LLC_COHERENT; + + mutex_lock(&ctx->lock); + + for_each_sgtable_dma_sg(sgt, sg, i) { + u64 dma_addr = sg_dma_address(sg) - sg->offset; + size_t size = sg_dma_len(sg) + sg->offset; + + ret = ivpu_mmu_context_map_pages(vdev, ctx, vpu_addr, dma_addr, size, prot); + if (ret) { + ivpu_err(vdev, "Failed to map context pages\n"); + mutex_unlock(&ctx->lock); + return ret; + } + ivpu_mmu_context_flush_page_tables(ctx, vpu_addr, size); + vpu_addr += size; + } + + mutex_unlock(&ctx->lock); + + ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id); + if (ret) + ivpu_err(vdev, "Failed to invalidate TLB for ctx %u: %d\n", ctx->id, ret); + return ret; +} + +void +ivpu_mmu_context_unmap_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, + u64 vpu_addr, struct sg_table *sgt) +{ + struct scatterlist *sg; + int ret; + u64 i; + + if (!IS_ALIGNED(vpu_addr, IVPU_MMU_PAGE_SIZE)) + ivpu_warn(vdev, "Unaligned vpu_addr: 0x%llx\n", vpu_addr); + + mutex_lock(&ctx->lock); + + for_each_sgtable_dma_sg(sgt, sg, i) { + size_t size = sg_dma_len(sg) + sg->offset; + + ivpu_mmu_context_unmap_pages(ctx, vpu_addr, size); + ivpu_mmu_context_flush_page_tables(ctx, vpu_addr, size); + vpu_addr += size; + } + + mutex_unlock(&ctx->lock); + + ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id); + if (ret) + ivpu_warn(vdev, "Failed to invalidate TLB for ctx %u: %d\n", ctx->id, ret); +} + +int +ivpu_mmu_context_insert_node_locked(struct ivpu_mmu_context *ctx, + const struct ivpu_addr_range *range, + u64 size, struct drm_mm_node *node) +{ + lockdep_assert_held(&ctx->lock); + + return drm_mm_insert_node_in_range(&ctx->mm, node, size, IVPU_MMU_PAGE_SIZE, + 0, range->start, range->end, DRM_MM_INSERT_BEST); +} + +void +ivpu_mmu_context_remove_node_locked(struct ivpu_mmu_context *ctx, struct drm_mm_node *node) +{ + lockdep_assert_held(&ctx->lock); + + drm_mm_remove_node(node); +} + +static int +ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 context_id) +{ + u64 start, end; + int ret; + + mutex_init(&ctx->lock); + INIT_LIST_HEAD(&ctx->bo_list); + + ret = ivpu_mmu_pgtable_init(vdev, &ctx->pgtable); + if (ret) + return ret; + + if (!context_id) { + start = vdev->hw->ranges.global_low.start; + end = vdev->hw->ranges.global_high.end; + } else { + start = vdev->hw->ranges.user_low.start; + end = vdev->hw->ranges.user_high.end; + } + + drm_mm_init(&ctx->mm, start, end - start); + ctx->id = context_id; + + return 0; +} + +static void ivpu_mmu_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx) +{ + drm_WARN_ON(&vdev->drm, !ctx->pgtable.pgd); + + mutex_destroy(&ctx->lock); + ivpu_mmu_pgtable_free(vdev, &ctx->pgtable); + drm_mm_takedown(&ctx->mm); +} + +int ivpu_mmu_global_context_init(struct ivpu_device *vdev) +{ + return ivpu_mmu_context_init(vdev, &vdev->gctx, IVPU_GLOBAL_CONTEXT_MMU_SSID); +} + +void ivpu_mmu_global_context_fini(struct ivpu_device *vdev) +{ + return ivpu_mmu_context_fini(vdev, &vdev->gctx); +} + +void ivpu_mmu_user_context_mark_invalid(struct ivpu_device *vdev, u32 ssid) +{ + struct ivpu_file_priv *file_priv; + + xa_lock(&vdev->context_xa); + + file_priv = xa_load(&vdev->context_xa, ssid); + if (file_priv) + file_priv->has_mmu_faults = true; + + xa_unlock(&vdev->context_xa); +} + +int ivpu_mmu_user_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 ctx_id) +{ + int ret; + + drm_WARN_ON(&vdev->drm, !ctx_id); + + ret = ivpu_mmu_context_init(vdev, ctx, ctx_id); + if (ret) { + ivpu_err(vdev, "Failed to initialize context: %d\n", ret); + return ret; + } + + ret = ivpu_mmu_set_pgtable(vdev, ctx_id, &ctx->pgtable); + if (ret) { + ivpu_err(vdev, "Failed to set page table: %d\n", ret); + goto err_context_fini; + } + + return 0; + +err_context_fini: + ivpu_mmu_context_fini(vdev, ctx); + return ret; +} + +void ivpu_mmu_user_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx) +{ + drm_WARN_ON(&vdev->drm, !ctx->id); + + ivpu_mmu_clear_pgtable(vdev, ctx->id); + ivpu_mmu_context_fini(vdev, ctx); +} diff --git a/drivers/accel/ivpu/ivpu_mmu_context.h b/drivers/accel/ivpu/ivpu_mmu_context.h new file mode 100644 index 000000000000..ddf11b95023a --- /dev/null +++ b/drivers/accel/ivpu/ivpu_mmu_context.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#ifndef __IVPU_MMU_CONTEXT_H__ +#define __IVPU_MMU_CONTEXT_H__ + +#include <drm/drm_mm.h> + +struct ivpu_device; +struct ivpu_file_priv; +struct ivpu_addr_range; + +#define IVPU_MMU_PGTABLE_ENTRIES 512 + +struct ivpu_mmu_pgtable { + u64 **pgd_cpu_entries[IVPU_MMU_PGTABLE_ENTRIES]; + u64 *pgd_entries[IVPU_MMU_PGTABLE_ENTRIES]; + u64 *pgd; + dma_addr_t pgd_dma; +}; + +struct ivpu_mmu_context { + struct mutex lock; /* protects: mm, pgtable, bo_list */ + struct drm_mm mm; + struct ivpu_mmu_pgtable pgtable; + struct list_head bo_list; + u32 id; +}; + +int ivpu_mmu_global_context_init(struct ivpu_device *vdev); +void ivpu_mmu_global_context_fini(struct ivpu_device *vdev); + +int ivpu_mmu_user_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 ctx_id); +void ivpu_mmu_user_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx); +void ivpu_mmu_user_context_mark_invalid(struct ivpu_device *vdev, u32 ssid); + +int ivpu_mmu_context_insert_node_locked(struct ivpu_mmu_context *ctx, + const struct ivpu_addr_range *range, + u64 size, struct drm_mm_node *node); +void ivpu_mmu_context_remove_node_locked(struct ivpu_mmu_context *ctx, + struct drm_mm_node *node); + +int ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, + u64 vpu_addr, struct sg_table *sgt, bool llc_coherent); +void ivpu_mmu_context_unmap_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, + u64 vpu_addr, struct sg_table *sgt); + +#endif /* __IVPU_MMU_CONTEXT_H__ */ diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c new file mode 100644 index 000000000000..553bcbd787b3 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_pm.c @@ -0,0 +1,329 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#include <linux/highmem.h> +#include <linux/moduleparam.h> +#include <linux/pci.h> +#include <linux/pm_runtime.h> +#include <linux/reboot.h> + +#include "vpu_boot_api.h" +#include "ivpu_drv.h" +#include "ivpu_hw.h" +#include "ivpu_fw.h" +#include "ivpu_ipc.h" +#include "ivpu_job.h" +#include "ivpu_mmu.h" +#include "ivpu_pm.h" + +static bool ivpu_disable_recovery; +module_param_named_unsafe(disable_recovery, ivpu_disable_recovery, bool, 0644); +MODULE_PARM_DESC(disable_recovery, "Disables recovery when VPU hang is detected"); + +#define PM_RESCHEDULE_LIMIT 5 + +static void ivpu_pm_prepare_cold_boot(struct ivpu_device *vdev) +{ + struct ivpu_fw_info *fw = vdev->fw; + + ivpu_cmdq_reset_all_contexts(vdev); + ivpu_ipc_reset(vdev); + ivpu_fw_load(vdev); + fw->entry_point = fw->cold_boot_entry_point; +} + +static void ivpu_pm_prepare_warm_boot(struct ivpu_device *vdev) +{ + struct ivpu_fw_info *fw = vdev->fw; + struct vpu_boot_params *bp = fw->mem->kvaddr; + + if (!bp->save_restore_ret_address) { + ivpu_pm_prepare_cold_boot(vdev); + return; + } + + ivpu_dbg(vdev, FW_BOOT, "Save/restore entry point %llx", bp->save_restore_ret_address); + fw->entry_point = bp->save_restore_ret_address; +} + +static int ivpu_suspend(struct ivpu_device *vdev) +{ + int ret; + + ret = ivpu_shutdown(vdev); + if (ret) { + ivpu_err(vdev, "Failed to shutdown VPU: %d\n", ret); + return ret; + } + + return ret; +} + +static int ivpu_resume(struct ivpu_device *vdev) +{ + int ret; + +retry: + ret = ivpu_hw_power_up(vdev); + if (ret) { + ivpu_err(vdev, "Failed to power up HW: %d\n", ret); + return ret; + } + + ret = ivpu_mmu_enable(vdev); + if (ret) { + ivpu_err(vdev, "Failed to resume MMU: %d\n", ret); + ivpu_hw_power_down(vdev); + return ret; + } + + ret = ivpu_boot(vdev); + if (ret) { + ivpu_mmu_disable(vdev); + ivpu_hw_power_down(vdev); + if (!ivpu_fw_is_cold_boot(vdev)) { + ivpu_warn(vdev, "Failed to resume the FW: %d. Retrying cold boot..\n", ret); + ivpu_pm_prepare_cold_boot(vdev); + goto retry; + } else { + ivpu_err(vdev, "Failed to resume the FW: %d\n", ret); + } + } + + return ret; +} + +static void ivpu_pm_recovery_work(struct work_struct *work) +{ + struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, recovery_work); + struct ivpu_device *vdev = pm->vdev; + char *evt[2] = {"IVPU_PM_EVENT=IVPU_RECOVER", NULL}; + int ret; + + ret = pci_reset_function(to_pci_dev(vdev->drm.dev)); + if (ret) + ivpu_err(vdev, "Failed to reset VPU: %d\n", ret); + + kobject_uevent_env(&vdev->drm.dev->kobj, KOBJ_CHANGE, evt); +} + +void ivpu_pm_schedule_recovery(struct ivpu_device *vdev) +{ + struct ivpu_pm_info *pm = vdev->pm; + + if (ivpu_disable_recovery) { + ivpu_err(vdev, "Recovery not available when disable_recovery param is set\n"); + return; + } + + if (ivpu_is_fpga(vdev)) { + ivpu_err(vdev, "Recovery not available on FPGA\n"); + return; + } + + /* Schedule recovery if it's not in progress */ + if (atomic_cmpxchg(&pm->in_reset, 0, 1) == 0) { + ivpu_hw_irq_disable(vdev); + queue_work(system_long_wq, &pm->recovery_work); + } +} + +int ivpu_pm_suspend_cb(struct device *dev) +{ + struct drm_device *drm = dev_get_drvdata(dev); + struct ivpu_device *vdev = to_ivpu_device(drm); + int ret; + + ivpu_dbg(vdev, PM, "Suspend..\n"); + + ret = ivpu_suspend(vdev); + if (ret && vdev->pm->suspend_reschedule_counter) { + ivpu_dbg(vdev, PM, "Failed to enter idle, rescheduling suspend, retries left %d\n", + vdev->pm->suspend_reschedule_counter); + pm_schedule_suspend(dev, vdev->timeout.reschedule_suspend); + vdev->pm->suspend_reschedule_counter--; + return -EBUSY; + } else if (!vdev->pm->suspend_reschedule_counter) { + ivpu_warn(vdev, "Failed to enter idle, force suspend\n"); + ivpu_pm_prepare_cold_boot(vdev); + } else { + ivpu_pm_prepare_warm_boot(vdev); + } + + vdev->pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT; + + pci_save_state(to_pci_dev(dev)); + pci_set_power_state(to_pci_dev(dev), PCI_D3hot); + + ivpu_dbg(vdev, PM, "Suspend done.\n"); + + return ret; +} + +int ivpu_pm_resume_cb(struct device *dev) +{ + struct drm_device *drm = dev_get_drvdata(dev); + struct ivpu_device *vdev = to_ivpu_device(drm); + int ret; + + ivpu_dbg(vdev, PM, "Resume..\n"); + + pci_set_power_state(to_pci_dev(dev), PCI_D0); + pci_restore_state(to_pci_dev(dev)); + + ret = ivpu_resume(vdev); + if (ret) + ivpu_err(vdev, "Failed to resume: %d\n", ret); + + ivpu_dbg(vdev, PM, "Resume done.\n"); + + return ret; +} + +int ivpu_pm_runtime_suspend_cb(struct device *dev) +{ + struct drm_device *drm = dev_get_drvdata(dev); + struct ivpu_device *vdev = to_ivpu_device(drm); + int ret; + + ivpu_dbg(vdev, PM, "Runtime suspend..\n"); + + if (!ivpu_hw_is_idle(vdev) && vdev->pm->suspend_reschedule_counter) { + ivpu_dbg(vdev, PM, "Failed to enter idle, rescheduling suspend, retries left %d\n", + vdev->pm->suspend_reschedule_counter); + pm_schedule_suspend(dev, vdev->timeout.reschedule_suspend); + vdev->pm->suspend_reschedule_counter--; + return -EAGAIN; + } + + ret = ivpu_suspend(vdev); + if (ret) + ivpu_err(vdev, "Failed to set suspend VPU: %d\n", ret); + + if (!vdev->pm->suspend_reschedule_counter) { + ivpu_warn(vdev, "VPU failed to enter idle, force suspended.\n"); + ivpu_pm_prepare_cold_boot(vdev); + } else { + ivpu_pm_prepare_warm_boot(vdev); + } + + vdev->pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT; + + ivpu_dbg(vdev, PM, "Runtime suspend done.\n"); + + return 0; +} + +int ivpu_pm_runtime_resume_cb(struct device *dev) +{ + struct drm_device *drm = dev_get_drvdata(dev); + struct ivpu_device *vdev = to_ivpu_device(drm); + int ret; + + ivpu_dbg(vdev, PM, "Runtime resume..\n"); + + ret = ivpu_resume(vdev); + if (ret) + ivpu_err(vdev, "Failed to set RESUME state: %d\n", ret); + + ivpu_dbg(vdev, PM, "Runtime resume done.\n"); + + return ret; +} + +int ivpu_rpm_get(struct ivpu_device *vdev) +{ + int ret; + + ivpu_dbg(vdev, RPM, "rpm_get count %d\n", atomic_read(&vdev->drm.dev->power.usage_count)); + + ret = pm_runtime_resume_and_get(vdev->drm.dev); + if (!drm_WARN_ON(&vdev->drm, ret < 0)) + vdev->pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT; + + return ret; +} + +void ivpu_rpm_put(struct ivpu_device *vdev) +{ + ivpu_dbg(vdev, RPM, "rpm_put count %d\n", atomic_read(&vdev->drm.dev->power.usage_count)); + + pm_runtime_mark_last_busy(vdev->drm.dev); + pm_runtime_put_autosuspend(vdev->drm.dev); +} + +void ivpu_pm_reset_prepare_cb(struct pci_dev *pdev) +{ + struct ivpu_device *vdev = pci_get_drvdata(pdev); + + pm_runtime_get_sync(vdev->drm.dev); + + ivpu_dbg(vdev, PM, "Pre-reset..\n"); + atomic_set(&vdev->pm->in_reset, 1); + ivpu_shutdown(vdev); + ivpu_pm_prepare_cold_boot(vdev); + ivpu_jobs_abort_all(vdev); + ivpu_dbg(vdev, PM, "Pre-reset done.\n"); +} + +void ivpu_pm_reset_done_cb(struct pci_dev *pdev) +{ + struct ivpu_device *vdev = pci_get_drvdata(pdev); + int ret; + + ivpu_dbg(vdev, PM, "Post-reset..\n"); + ret = ivpu_resume(vdev); + if (ret) + ivpu_err(vdev, "Failed to set RESUME state: %d\n", ret); + atomic_set(&vdev->pm->in_reset, 0); + ivpu_dbg(vdev, PM, "Post-reset done.\n"); + + pm_runtime_put_autosuspend(vdev->drm.dev); +} + +int ivpu_pm_init(struct ivpu_device *vdev) +{ + struct device *dev = vdev->drm.dev; + struct ivpu_pm_info *pm = vdev->pm; + + pm->vdev = vdev; + pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT; + + atomic_set(&pm->in_reset, 0); + INIT_WORK(&pm->recovery_work, ivpu_pm_recovery_work); + + pm_runtime_use_autosuspend(dev); + + if (ivpu_disable_recovery) + pm_runtime_set_autosuspend_delay(dev, -1); + else if (ivpu_is_silicon(vdev)) + pm_runtime_set_autosuspend_delay(dev, 100); + else + pm_runtime_set_autosuspend_delay(dev, 60000); + + return 0; +} + +void ivpu_pm_enable(struct ivpu_device *vdev) +{ + struct device *dev = vdev->drm.dev; + + pm_runtime_set_active(dev); + pm_runtime_allow(dev); + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); + + ivpu_dbg(vdev, RPM, "Enable RPM count %d\n", atomic_read(&dev->power.usage_count)); +} + +void ivpu_pm_disable(struct ivpu_device *vdev) +{ + struct device *dev = vdev->drm.dev; + + ivpu_dbg(vdev, RPM, "Disable RPM count %d\n", atomic_read(&dev->power.usage_count)); + + pm_runtime_get_noresume(vdev->drm.dev); + pm_runtime_forbid(vdev->drm.dev); +} diff --git a/drivers/accel/ivpu/ivpu_pm.h b/drivers/accel/ivpu/ivpu_pm.h new file mode 100644 index 000000000000..dc1b3758e13f --- /dev/null +++ b/drivers/accel/ivpu/ivpu_pm.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#ifndef __IVPU_PM_H__ +#define __IVPU_PM_H__ + +#include <linux/types.h> + +struct ivpu_device; + +struct ivpu_pm_info { + struct ivpu_device *vdev; + struct work_struct recovery_work; + atomic_t in_reset; + bool is_warmboot; + u32 suspend_reschedule_counter; +}; + +int ivpu_pm_init(struct ivpu_device *vdev); +void ivpu_pm_enable(struct ivpu_device *vdev); +void ivpu_pm_disable(struct ivpu_device *vdev); + +int ivpu_pm_suspend_cb(struct device *dev); +int ivpu_pm_resume_cb(struct device *dev); +int ivpu_pm_runtime_suspend_cb(struct device *dev); +int ivpu_pm_runtime_resume_cb(struct device *dev); + +void ivpu_pm_reset_prepare_cb(struct pci_dev *pdev); +void ivpu_pm_reset_done_cb(struct pci_dev *pdev); + +int __must_check ivpu_rpm_get(struct ivpu_device *vdev); +void ivpu_rpm_put(struct ivpu_device *vdev); + +void ivpu_pm_schedule_recovery(struct ivpu_device *vdev); + +#endif /* __IVPU_PM_H__ */ diff --git a/drivers/accel/ivpu/vpu_boot_api.h b/drivers/accel/ivpu/vpu_boot_api.h new file mode 100644 index 000000000000..6b71be92ba65 --- /dev/null +++ b/drivers/accel/ivpu/vpu_boot_api.h @@ -0,0 +1,349 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#ifndef VPU_BOOT_API_H +#define VPU_BOOT_API_H + +/* + * =========== FW API version information beginning ================ + * The bellow values will be used to construct the version info this way: + * fw_bin_header->api_version[VPU_BOOT_API_VER_ID] = (VPU_BOOT_API_VER_MAJOR << 16) | + * VPU_BOOT_API_VER_MINOR; + * VPU_BOOT_API_VER_PATCH will be ignored. KMD and compatibility is not affected if this changes. + */ + +/* + * Major version changes that break backward compatibility. + * Major version must start from 1 and can only be incremented. + */ +#define VPU_BOOT_API_VER_MAJOR 3 + +/* + * Minor version changes when API backward compatibility is preserved. + * Resets to 0 if Major version is incremented. + */ +#define VPU_BOOT_API_VER_MINOR 12 + +/* + * API header changed (field names, documentation, formatting) but API itself has not been changed + */ +#define VPU_BOOT_API_VER_PATCH 2 + +/* + * Index in the API version table + * Must be unique for each API + */ +#define VPU_BOOT_API_VER_INDEX 0 +/* ------------ FW API version information end ---------------------*/ + +#pragma pack(push, 1) + +/* + * Firmware image header format + */ +#define VPU_FW_HEADER_SIZE 4096 +#define VPU_FW_HEADER_VERSION 0x1 +#define VPU_FW_VERSION_SIZE 32 +#define VPU_FW_API_VER_NUM 16 + +struct vpu_firmware_header { + u32 header_version; + u32 image_format; + u64 image_load_address; + u32 image_size; + u64 entry_point; + u8 vpu_version[VPU_FW_VERSION_SIZE]; + u32 compression_type; + u64 firmware_version_load_address; + u32 firmware_version_size; + u64 boot_params_load_address; + u32 api_version[VPU_FW_API_VER_NUM]; + /* Size of memory require for firmware execution */ + u32 runtime_size; + u32 shave_nn_fw_size; +}; + +/* + * Firmware boot parameters format + */ + +#define VPU_BOOT_PLL_COUNT 3 +#define VPU_BOOT_PLL_OUT_COUNT 4 + +/** Values for boot_type field */ +#define VPU_BOOT_TYPE_COLDBOOT 0 +#define VPU_BOOT_TYPE_WARMBOOT 1 + +/** Value for magic filed */ +#define VPU_BOOT_PARAMS_MAGIC 0x10000 + +/** VPU scheduling mode. By default, OS scheduling is used. */ +#define VPU_SCHEDULING_MODE_OS 0 +#define VPU_SCHEDULING_MODE_HW 1 + +enum VPU_BOOT_L2_CACHE_CFG_TYPE { + VPU_BOOT_L2_CACHE_CFG_UPA = 0, + VPU_BOOT_L2_CACHE_CFG_NN = 1, + VPU_BOOT_L2_CACHE_CFG_NUM = 2 +}; + +/** + * Logging destinations. + * + * Logging output can be directed to different logging destinations. This enum + * defines the list of logging destinations supported by the VPU firmware (NOTE: + * a specific VPU FW binary may support only a subset of such output + * destinations, depending on the target platform and compile options). + */ +enum vpu_trace_destination { + VPU_TRACE_DESTINATION_PIPEPRINT = 0x1, + VPU_TRACE_DESTINATION_VERBOSE_TRACING = 0x2, + VPU_TRACE_DESTINATION_NORTH_PEAK = 0x4, +}; + +/* + * Processor bit shifts (for loggable HW components). + */ +#define VPU_TRACE_PROC_BIT_ARM 0 +#define VPU_TRACE_PROC_BIT_LRT 1 +#define VPU_TRACE_PROC_BIT_LNN 2 +#define VPU_TRACE_PROC_BIT_SHV_0 3 +#define VPU_TRACE_PROC_BIT_SHV_1 4 +#define VPU_TRACE_PROC_BIT_SHV_2 5 +#define VPU_TRACE_PROC_BIT_SHV_3 6 +#define VPU_TRACE_PROC_BIT_SHV_4 7 +#define VPU_TRACE_PROC_BIT_SHV_5 8 +#define VPU_TRACE_PROC_BIT_SHV_6 9 +#define VPU_TRACE_PROC_BIT_SHV_7 10 +#define VPU_TRACE_PROC_BIT_SHV_8 11 +#define VPU_TRACE_PROC_BIT_SHV_9 12 +#define VPU_TRACE_PROC_BIT_SHV_10 13 +#define VPU_TRACE_PROC_BIT_SHV_11 14 +#define VPU_TRACE_PROC_BIT_SHV_12 15 +#define VPU_TRACE_PROC_BIT_SHV_13 16 +#define VPU_TRACE_PROC_BIT_SHV_14 17 +#define VPU_TRACE_PROC_BIT_SHV_15 18 +#define VPU_TRACE_PROC_BIT_ACT_SHV_0 19 +#define VPU_TRACE_PROC_BIT_ACT_SHV_1 20 +#define VPU_TRACE_PROC_BIT_ACT_SHV_2 21 +#define VPU_TRACE_PROC_BIT_ACT_SHV_3 22 +#define VPU_TRACE_PROC_NO_OF_HW_DEVS 23 + +/* KMB HW component IDs are sequential, so define first and last IDs. */ +#define VPU_TRACE_PROC_BIT_KMB_FIRST VPU_TRACE_PROC_BIT_LRT +#define VPU_TRACE_PROC_BIT_KMB_LAST VPU_TRACE_PROC_BIT_SHV_15 + +struct vpu_boot_l2_cache_config { + u8 use; + u8 cfg; +}; + +struct vpu_warm_boot_section { + u32 src; + u32 dst; + u32 size; + u32 core_id; + u32 is_clear_op; +}; + +struct vpu_boot_params { + u32 magic; + u32 vpu_id; + u32 vpu_count; + u32 pad0[5]; + /* Clock frequencies: 0x20 - 0xFF */ + u32 frequency; + u32 pll[VPU_BOOT_PLL_COUNT][VPU_BOOT_PLL_OUT_COUNT]; + u32 perf_clk_frequency; + u32 pad1[42]; + /* Memory regions: 0x100 - 0x1FF */ + u64 ipc_header_area_start; + u32 ipc_header_area_size; + u64 shared_region_base; + u32 shared_region_size; + u64 ipc_payload_area_start; + u32 ipc_payload_area_size; + u64 global_aliased_pio_base; + u32 global_aliased_pio_size; + u32 autoconfig; + struct vpu_boot_l2_cache_config cache_defaults[VPU_BOOT_L2_CACHE_CFG_NUM]; + u64 global_memory_allocator_base; + u32 global_memory_allocator_size; + /** + * ShaveNN FW section VPU base address + * On VPU2.7 HW this address must be within 2GB range starting from L2C_PAGE_TABLE base + */ + u64 shave_nn_fw_base; + u64 save_restore_ret_address; /* stores the address of FW's restore entry point */ + u32 pad2[43]; + /* IRQ re-direct numbers: 0x200 - 0x2FF */ + s32 watchdog_irq_mss; + s32 watchdog_irq_nce; + /* ARM -> VPU doorbell interrupt. ARM is notifying VPU of async command or compute job. */ + u32 host_to_vpu_irq; + /* VPU -> ARM job done interrupt. VPU is notifying ARM of compute job completion. */ + u32 job_done_irq; + /* VPU -> ARM IRQ line to use to request MMU update. */ + u32 mmu_update_request_irq; + /* ARM -> VPU IRQ line to use to notify of MMU update completion. */ + u32 mmu_update_done_irq; + /* ARM -> VPU IRQ line to use to request power level change. */ + u32 set_power_level_irq; + /* VPU -> ARM IRQ line to use to notify of power level change completion. */ + u32 set_power_level_done_irq; + /* VPU -> ARM IRQ line to use to notify of VPU idle state change */ + u32 set_vpu_idle_update_irq; + /* VPU -> ARM IRQ line to use to request counter reset. */ + u32 metric_query_event_irq; + /* ARM -> VPU IRQ line to use to notify of counter reset completion. */ + u32 metric_query_event_done_irq; + /* VPU -> ARM IRQ line to use to notify of preemption completion. */ + u32 preemption_done_irq; + /* Padding. */ + u32 pad3[52]; + /* Silicon information: 0x300 - 0x3FF */ + u32 host_version_id; + u32 si_stepping; + u64 device_id; + u64 feature_exclusion; + u64 sku; + /** PLL ratio for minimum clock frequency */ + u32 min_freq_pll_ratio; + /** PLL ratio for maximum clock frequency */ + u32 max_freq_pll_ratio; + /** + * Initial log level threshold (messages with log level severity less than + * the threshold will not be logged); applies to every enabled logging + * destination and loggable HW component. See 'mvLog_t' enum for acceptable + * values. + */ + u32 default_trace_level; + u32 boot_type; + u64 punit_telemetry_sram_base; + u64 punit_telemetry_sram_size; + u32 vpu_telemetry_enable; + u64 crit_tracing_buff_addr; + u32 crit_tracing_buff_size; + u64 verbose_tracing_buff_addr; + u32 verbose_tracing_buff_size; + u64 verbose_tracing_sw_component_mask; /* TO BE REMOVED */ + /** + * Mask of destinations to which logging messages are delivered; bitwise OR + * of values defined in vpu_trace_destination enum. + */ + u32 trace_destination_mask; + /** + * Mask of hardware components for which logging is enabled; bitwise OR of + * bits defined by the VPU_TRACE_PROC_BIT_* macros. + */ + u64 trace_hw_component_mask; + /** Mask of trace message formats supported by the driver */ + u64 tracing_buff_message_format_mask; + u64 trace_reserved_1[2]; + /** + * Period at which the VPU reads the temp sensor values into MMIO, on + * platforms where that is necessary (in ms). 0 to disable reads. + */ + u32 temp_sensor_period_ms; + /** PLL ratio for efficient clock frequency */ + u32 pn_freq_pll_ratio; + u32 pad4[28]; + /* Warm boot information: 0x400 - 0x43F */ + u32 warm_boot_sections_count; + u32 warm_boot_start_address_reference; + u32 warm_boot_section_info_address_offset; + u32 pad5[13]; + /* Power States transitions timestamps: 0x440 - 0x46F*/ + struct { + /* VPU_IDLE -> VPU_ACTIVE transition initiated timestamp */ + u64 vpu_active_state_requested; + /* VPU_IDLE -> VPU_ACTIVE transition completed timestamp */ + u64 vpu_active_state_achieved; + /* VPU_ACTIVE -> VPU_IDLE transition initiated timestamp */ + u64 vpu_idle_state_requested; + /* VPU_ACTIVE -> VPU_IDLE transition completed timestamp */ + u64 vpu_idle_state_achieved; + /* VPU_IDLE -> VPU_STANDBY transition initiated timestamp */ + u64 vpu_standby_state_requested; + /* VPU_IDLE -> VPU_STANDBY transition completed timestamp */ + u64 vpu_standby_state_achieved; + } power_states_timestamps; + /* VPU scheduling mode. Values defined by VPU_SCHEDULING_MODE_* macros. */ + u32 vpu_scheduling_mode; + /* Present call period in milliseconds. */ + u32 vpu_focus_present_timer_ms; + /* Unused/reserved: 0x478 - 0xFFF */ + u32 pad6[738]; +}; + +/* + * Magic numbers set between host and vpu to detect corruptio of tracing init + */ + +#define VPU_TRACING_BUFFER_CANARY (0xCAFECAFE) + +/* Tracing buffer message format definitions */ +#define VPU_TRACING_FORMAT_STRING 0 +#define VPU_TRACING_FORMAT_MIPI 2 +/* + * Header of the tracing buffer. + * The below defined header will be stored at the beginning of + * each allocated tracing buffer, followed by a series of 256b + * of ASCII trace message entries. + */ +struct vpu_tracing_buffer_header { + /** + * Magic number set by host to detect corruption + * @see VPU_TRACING_BUFFER_CANARY + */ + u32 host_canary_start; + /* offset from start of buffer for trace entries */ + u32 read_index; + u32 pad_to_cache_line_size_0[14]; + /* End of first cache line */ + + /** + * Magic number set by host to detect corruption + * @see VPU_TRACING_BUFFER_CANARY + */ + u32 vpu_canary_start; + /* offset from start of buffer from write start */ + u32 write_index; + /* counter for buffer wrapping */ + u32 wrap_count; + /* legacy field - do not use */ + u32 reserved_0; + /** + * Size of the log buffer include this header (@header_size) and space + * reserved for all messages. If @alignment` is greater that 0 the @Size + * must be multiple of @Alignment. + */ + u32 size; + /* Header version */ + u16 header_version; + /* Header size */ + u16 header_size; + /* + * Format of the messages in the trace buffer + * 0 - null terminated string + * 1 - size + null terminated string + * 2 - MIPI-SysT encoding + */ + u32 format; + /* + * Message alignment + * 0 - messages are place 1 after another + * n - every message starts and multiple on offset + */ + u32 alignment; /* 64, 128, 256 */ + /* Name of the logging entity, i.e "LRT", "LNN", "SHV0", etc */ + char name[16]; + u32 pad_to_cache_line_size_1[4]; + /* End of second cache line */ +}; + +#pragma pack(pop) + +#endif diff --git a/drivers/accel/ivpu/vpu_jsm_api.h b/drivers/accel/ivpu/vpu_jsm_api.h new file mode 100644 index 000000000000..2949ec8365bd --- /dev/null +++ b/drivers/accel/ivpu/vpu_jsm_api.h @@ -0,0 +1,1008 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +/** + * @file + * @brief JSM shared definitions + * + * @ingroup Jsm + * @brief JSM shared definitions + * @{ + */ +#ifndef VPU_JSM_API_H +#define VPU_JSM_API_H + +/* + * Major version changes that break backward compatibility + */ +#define VPU_JSM_API_VER_MAJOR 3 + +/* + * Minor version changes when API backward compatibility is preserved. + */ +#define VPU_JSM_API_VER_MINOR 0 + +/* + * API header changed (field names, documentation, formatting) but API itself has not been changed + */ +#define VPU_JSM_API_VER_PATCH 1 + +/* + * Index in the API version table + */ +#define VPU_JSM_API_VER_INDEX 4 + +/* + * Number of Priority Bands for Hardware Scheduling + * Bands: RealTime, Focus, Normal, Idle + */ +#define VPU_HWS_NUM_PRIORITY_BANDS 4 + +/* Max number of impacted contexts that can be dealt with the engine reset command */ +#define VPU_MAX_ENGINE_RESET_IMPACTED_CONTEXTS 3 + +/** Pack the API structures for now, once alignment issues are fixed this can be removed */ +#pragma pack(push, 1) + +/* + * Engine indexes. + */ +#define VPU_ENGINE_COMPUTE 0 +#define VPU_ENGINE_COPY 1 +#define VPU_ENGINE_NB 2 + +/* + * VPU status values. + */ +#define VPU_JSM_STATUS_SUCCESS 0x0U +#define VPU_JSM_STATUS_PARSING_ERR 0x1U +#define VPU_JSM_STATUS_PROCESSING_ERR 0x2U +#define VPU_JSM_STATUS_PREEMPTED 0x3U +#define VPU_JSM_STATUS_ABORTED 0x4U +#define VPU_JSM_STATUS_USER_CTX_VIOL_ERR 0x5U +#define VPU_JSM_STATUS_GLOBAL_CTX_VIOL_ERR 0x6U +#define VPU_JSM_STATUS_MVNCI_WRONG_INPUT_FORMAT 0x7U +#define VPU_JSM_STATUS_MVNCI_UNSUPPORTED_NETWORK_ELEMENT 0x8U +#define VPU_JSM_STATUS_MVNCI_INVALID_HANDLE 0x9U +#define VPU_JSM_STATUS_MVNCI_OUT_OF_RESOURCES 0xAU +#define VPU_JSM_STATUS_MVNCI_NOT_IMPLEMENTED 0xBU +#define VPU_JSM_STATUS_MVNCI_INTERNAL_ERROR 0xCU +/* Job status returned when the job was preempted mid-inference */ +#define VPU_JSM_STATUS_PREEMPTED_MID_INFERENCE 0xDU + +/* + * Host <-> VPU IPC channels. + * ASYNC commands use a high priority channel, other messages use low-priority ones. + */ +#define VPU_IPC_CHAN_ASYNC_CMD 0 +#define VPU_IPC_CHAN_GEN_CMD 10 +#define VPU_IPC_CHAN_JOB_RET 11 + +/* + * Job flags bit masks. + */ +#define VPU_JOB_FLAGS_NULL_SUBMISSION_MASK 0x00000001 + +/* + * Sizes of the reserved areas in jobs, in bytes. + */ +#define VPU_JOB_RESERVED_BYTES 16 +/* + * Sizes of the reserved areas in job queues, in bytes. + */ +#define VPU_JOB_QUEUE_RESERVED_BYTES 52 + +/* + * Max length (including trailing NULL char) of trace entity name (e.g., the + * name of a logging destination or a loggable HW component). + */ +#define VPU_TRACE_ENTITY_NAME_MAX_LEN 32 + +/* + * Max length (including trailing NULL char) of a dyndbg command. + * + * NOTE: 96 is used so that the size of 'struct vpu_ipc_msg' in the JSM API is + * 128 bytes (multiple of 64 bytes, the cache line size). + */ +#define VPU_DYNDBG_CMD_MAX_LEN 96 + +/* + * Job format. + */ +struct vpu_job_queue_entry { + u64 batch_buf_addr; /**< Address of VPU commands batch buffer */ + u32 job_id; /**< Job ID */ + u32 flags; /**< Flags bit field, see VPU_JOB_FLAGS_* above */ + u64 root_page_table_addr; /**< Address of root page table to use for this job */ + u64 root_page_table_update_counter; /**< Page tables update events counter */ + u64 preemption_buffer_address; /**< Address of the preemption buffer to use for this job */ + u64 preemption_buffer_size; /**< Size of the preemption buffer to use for this job */ + u8 reserved_0[VPU_JOB_RESERVED_BYTES]; +}; + +/* + * Job queue control registers. + */ +struct vpu_job_queue_header { + u32 engine_idx; + u32 head; + u32 tail; + u8 reserved_0[VPU_JOB_QUEUE_RESERVED_BYTES]; +}; + +/* + * Job queue format. + */ +struct vpu_job_queue { + struct vpu_job_queue_header header; + struct vpu_job_queue_entry job[]; +}; + +/** + * Logging entity types. + * + * This enum defines the different types of entities involved in logging. + */ +enum vpu_trace_entity_type { + /** Logging destination (entity where logs can be stored / printed). */ + VPU_TRACE_ENTITY_TYPE_DESTINATION = 1, + /** Loggable HW component (HW entity that can be logged). */ + VPU_TRACE_ENTITY_TYPE_HW_COMPONENT = 2, +}; + +/* + * Host <-> VPU IPC messages types. + */ +enum vpu_ipc_msg_type { + VPU_JSM_MSG_UNKNOWN = 0xFFFFFFFF, + /* IPC Host -> Device, Async commands */ + VPU_JSM_MSG_ASYNC_CMD = 0x1100, + VPU_JSM_MSG_ENGINE_RESET = VPU_JSM_MSG_ASYNC_CMD, + VPU_JSM_MSG_ENGINE_PREEMPT = 0x1101, + VPU_JSM_MSG_REGISTER_DB = 0x1102, + VPU_JSM_MSG_UNREGISTER_DB = 0x1103, + VPU_JSM_MSG_QUERY_ENGINE_HB = 0x1104, + VPU_JSM_MSG_GET_POWER_LEVEL_COUNT = 0x1105, + VPU_JSM_MSG_GET_POWER_LEVEL = 0x1106, + VPU_JSM_MSG_SET_POWER_LEVEL = 0x1107, + /* @deprecated */ + VPU_JSM_MSG_METRIC_STREAMER_OPEN = 0x1108, + /* @deprecated */ + VPU_JSM_MSG_METRIC_STREAMER_CLOSE = 0x1109, + /** Configure logging (used to modify configuration passed in boot params). */ + VPU_JSM_MSG_TRACE_SET_CONFIG = 0x110a, + /** Return current logging configuration. */ + VPU_JSM_MSG_TRACE_GET_CONFIG = 0x110b, + /** + * Get masks of destinations and HW components supported by the firmware + * (may vary between HW generations and FW compile + * time configurations) + */ + VPU_JSM_MSG_TRACE_GET_CAPABILITY = 0x110c, + /** Get the name of a destination or HW component. */ + VPU_JSM_MSG_TRACE_GET_NAME = 0x110d, + /** + * Release resource associated with host ssid . All jobs that belong to the host_ssid + * aborted and removed from internal scheduling queues. All doorbells assigned + * to the host_ssid are unregistered and any internal FW resources belonging to + * the host_ssid are released. + */ + VPU_JSM_MSG_SSID_RELEASE = 0x110e, + /** + * Start collecting metric data. + * @see vpu_jsm_metric_streamer_start + */ + VPU_JSM_MSG_METRIC_STREAMER_START = 0x110f, + /** + * Stop collecting metric data. This command will return success if it is called + * for a metric stream that has already been stopped or was never started. + * @see vpu_jsm_metric_streamer_stop + */ + VPU_JSM_MSG_METRIC_STREAMER_STOP = 0x1110, + /** + * Update current and next buffer for metric data collection. This command can + * also be used to request information about the number of collected samples + * and the amount of data written to the buffer. + * @see vpu_jsm_metric_streamer_update + */ + VPU_JSM_MSG_METRIC_STREAMER_UPDATE = 0x1111, + /** + * Request description of selected metric groups and metric counters within + * each group. The VPU will write the description of groups and counters to + * the buffer specified in the command structure. + * @see vpu_jsm_metric_streamer_start + */ + VPU_JSM_MSG_METRIC_STREAMER_INFO = 0x1112, + /** Control command: Priority band setup */ + VPU_JSM_MSG_SET_PRIORITY_BAND_SETUP = 0x1113, + /** Control command: Create command queue */ + VPU_JSM_MSG_CREATE_CMD_QUEUE = 0x1114, + /** Control command: Destroy command queue */ + VPU_JSM_MSG_DESTROY_CMD_QUEUE = 0x1115, + /** Control command: Set context scheduling properties */ + VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES = 0x1116, + /* + * Register a doorbell to notify VPU of new work. The doorbell may later be + * deallocated or reassigned to another context. + */ + VPU_JSM_MSG_HWS_REGISTER_DB = 0x1117, + /* IPC Host -> Device, General commands */ + VPU_JSM_MSG_GENERAL_CMD = 0x1200, + VPU_JSM_MSG_BLOB_DEINIT = VPU_JSM_MSG_GENERAL_CMD, + /** + * Control dyndbg behavior by executing a dyndbg command; equivalent to + * Linux command: `echo '<dyndbg_cmd>' > <debugfs>/dynamic_debug/control`. + */ + VPU_JSM_MSG_DYNDBG_CONTROL = 0x1201, + /* IPC Device -> Host, Job completion */ + VPU_JSM_MSG_JOB_DONE = 0x2100, + /* IPC Device -> Host, Async command completion */ + VPU_JSM_MSG_ASYNC_CMD_DONE = 0x2200, + VPU_JSM_MSG_ENGINE_RESET_DONE = VPU_JSM_MSG_ASYNC_CMD_DONE, + VPU_JSM_MSG_ENGINE_PREEMPT_DONE = 0x2201, + VPU_JSM_MSG_REGISTER_DB_DONE = 0x2202, + VPU_JSM_MSG_UNREGISTER_DB_DONE = 0x2203, + VPU_JSM_MSG_QUERY_ENGINE_HB_DONE = 0x2204, + VPU_JSM_MSG_GET_POWER_LEVEL_COUNT_DONE = 0x2205, + VPU_JSM_MSG_GET_POWER_LEVEL_DONE = 0x2206, + VPU_JSM_MSG_SET_POWER_LEVEL_DONE = 0x2207, + /* @deprecated */ + VPU_JSM_MSG_METRIC_STREAMER_OPEN_DONE = 0x2208, + /* @deprecated */ + VPU_JSM_MSG_METRIC_STREAMER_CLOSE_DONE = 0x2209, + /** Response to VPU_JSM_MSG_TRACE_SET_CONFIG. */ + VPU_JSM_MSG_TRACE_SET_CONFIG_RSP = 0x220a, + /** Response to VPU_JSM_MSG_TRACE_GET_CONFIG. */ + VPU_JSM_MSG_TRACE_GET_CONFIG_RSP = 0x220b, + /** Response to VPU_JSM_MSG_TRACE_GET_CAPABILITY. */ + VPU_JSM_MSG_TRACE_GET_CAPABILITY_RSP = 0x220c, + /** Response to VPU_JSM_MSG_TRACE_GET_NAME. */ + VPU_JSM_MSG_TRACE_GET_NAME_RSP = 0x220d, + /** Response to VPU_JSM_MSG_SSID_RELEASE. */ + VPU_JSM_MSG_SSID_RELEASE_DONE = 0x220e, + /** + * Response to VPU_JSM_MSG_METRIC_STREAMER_START. + * VPU will return an error result if metric collection cannot be started, + * e.g. when the specified metric mask is invalid. + * @see vpu_jsm_metric_streamer_done + */ + VPU_JSM_MSG_METRIC_STREAMER_START_DONE = 0x220f, + /** + * Response to VPU_JSM_MSG_METRIC_STREAMER_STOP. + * Returns information about collected metric data. + * @see vpu_jsm_metric_streamer_done + */ + VPU_JSM_MSG_METRIC_STREAMER_STOP_DONE = 0x2210, + /** + * Response to VPU_JSM_MSG_METRIC_STREAMER_UPDATE. + * Returns information about collected metric data. + * @see vpu_jsm_metric_streamer_done + */ + VPU_JSM_MSG_METRIC_STREAMER_UPDATE_DONE = 0x2211, + /** + * Response to VPU_JSM_MSG_METRIC_STREAMER_INFO. + * Returns a description of the metric groups and metric counters. + * @see vpu_jsm_metric_streamer_done + */ + VPU_JSM_MSG_METRIC_STREAMER_INFO_DONE = 0x2212, + /** + * Asynchronous event sent from the VPU to the host either when the current + * metric buffer is full or when the VPU has collected a multiple of + * @notify_sample_count samples as indicated through the start command + * (VPU_JSM_MSG_METRIC_STREAMER_START). Returns information about collected + * metric data. + * @see vpu_jsm_metric_streamer_done + */ + VPU_JSM_MSG_METRIC_STREAMER_NOTIFICATION = 0x2213, + /** Response to control command: Priority band setup */ + VPU_JSM_MSG_SET_PRIORITY_BAND_SETUP_RSP = 0x2214, + /** Response to control command: Create command queue */ + VPU_JSM_MSG_CREATE_CMD_QUEUE_RSP = 0x2215, + /** Response to control command: Destroy command queue */ + VPU_JSM_MSG_DESTROY_CMD_QUEUE_RSP = 0x2216, + /** Response to control command: Set context scheduling properties */ + VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES_RSP = 0x2217, + /* IPC Device -> Host, General command completion */ + VPU_JSM_MSG_GENERAL_CMD_DONE = 0x2300, + VPU_JSM_MSG_BLOB_DEINIT_DONE = VPU_JSM_MSG_GENERAL_CMD_DONE, + /** Response to VPU_JSM_MSG_DYNDBG_CONTROL. */ + VPU_JSM_MSG_DYNDBG_CONTROL_RSP = 0x2301, +}; + +enum vpu_ipc_msg_status { VPU_JSM_MSG_FREE, VPU_JSM_MSG_ALLOCATED }; + +/* + * Host <-> LRT IPC message payload definitions + */ +struct vpu_ipc_msg_payload_engine_reset { + /* Engine to be reset. */ + u32 engine_idx; + /* Reserved */ + u32 reserved_0; +}; + +struct vpu_ipc_msg_payload_engine_preempt { + /* Engine to be preempted. */ + u32 engine_idx; + /* ID of the preemption request. */ + u32 preempt_id; +}; + +/* + * @brief Register doorbell command structure. + * This structure supports doorbell registration for only OS scheduling. + * @see VPU_JSM_MSG_REGISTER_DB + */ +struct vpu_ipc_msg_payload_register_db { + /* Index of the doorbell to register. */ + u32 db_idx; + /* Reserved */ + u32 reserved_0; + /* Virtual address in Global GTT pointing to the start of job queue. */ + u64 jobq_base; + /* Size of the job queue in bytes. */ + u32 jobq_size; + /* Host sub-stream ID for the context assigned to the doorbell. */ + u32 host_ssid; +}; + +/** + * @brief Unregister doorbell command structure. + * Request structure to unregister a doorbell for both HW and OS scheduling. + * @see VPU_JSM_MSG_UNREGISTER_DB + */ +struct vpu_ipc_msg_payload_unregister_db { + /* Index of the doorbell to unregister. */ + u32 db_idx; + /* Reserved */ + u32 reserved_0; +}; + +struct vpu_ipc_msg_payload_query_engine_hb { + /* Engine to return heartbeat value. */ + u32 engine_idx; + /* Reserved */ + u32 reserved_0; +}; + +struct vpu_ipc_msg_payload_power_level { + /** + * Requested power level. The power level value is in the + * range [0, power_level_count-1] where power_level_count + * is the number of available power levels as returned by + * the get power level count command. A power level of 0 + * corresponds to the maximum possible power level, while + * power_level_count-1 corresponds to the minimum possible + * power level. Values outside of this range are not + * considered to be valid. + */ + u32 power_level; + /* Reserved */ + u32 reserved_0; +}; + +struct vpu_ipc_msg_payload_ssid_release { + /* Host sub-stream ID for the context to be released. */ + u32 host_ssid; + /* Reserved */ + u32 reserved_0; +}; + +/** + * @brief Metric streamer start command structure. + * This structure is also used with VPU_JSM_MSG_METRIC_STREAMER_INFO to request metric + * groups and metric counters description from the firmware. + * @see VPU_JSM_MSG_METRIC_STREAMER_START + * @see VPU_JSM_MSG_METRIC_STREAMER_INFO + */ +struct vpu_jsm_metric_streamer_start { + /** + * Bitmask to select the desired metric groups. + * A metric group can belong only to one metric streamer instance at a time. + * Since each metric streamer instance has a unique set of metric groups, it + * can also identify a metric streamer instance if more than one instance was + * started. If the VPU device does not support multiple metric streamer instances, + * then VPU_JSM_MSG_METRIC_STREAMER_START will return an error even if the second + * instance has different groups to the first. + */ + u64 metric_group_mask; + /** Sampling rate in nanoseconds. */ + u64 sampling_rate; + /** + * If > 0 the VPU will send a VPU_JSM_MSG_METRIC_STREAMER_NOTIFICATION message + * after every @notify_sample_count samples is collected or dropped by the VPU. + * If set to UINT_MAX the VPU will only generate a notification when the metric + * buffer is full. If set to 0 the VPU will never generate a notification. + */ + u32 notify_sample_count; + u32 reserved_0; + /** + * Address and size of the buffer where the VPU will write metric data. The + * VPU writes all counters from enabled metric groups one after another. If + * there is no space left to write data at the next sample period the VPU + * will switch to the next buffer (@see next_buffer_addr) and will optionally + * send a notification to the host driver if @notify_sample_count is non-zero. + * If @next_buffer_addr is NULL the VPU will stop collecting metric data. + */ + u64 buffer_addr; + u64 buffer_size; + /** + * Address and size of the next buffer to write metric data to after the initial + * buffer is full. If the address is NULL the VPU will stop collecting metric + * data. + */ + u64 next_buffer_addr; + u64 next_buffer_size; +}; + +/** + * @brief Metric streamer stop command structure. + * @see VPU_JSM_MSG_METRIC_STREAMER_STOP + */ +struct vpu_jsm_metric_streamer_stop { + /** Bitmask to select the desired metric groups. */ + u64 metric_group_mask; +}; + +/** + * Provide VPU FW with buffers to write metric data. + * @see VPU_JSM_MSG_METRIC_STREAMER_UPDATE + */ +struct vpu_jsm_metric_streamer_update { + /** Metric group mask that identifies metric streamer instance. */ + u64 metric_group_mask; + /** + * Address and size of the buffer where the VPU will write metric data. If + * the buffer address is 0 or same as the currently used buffer the VPU will + * continue writing metric data to the current buffer. In this case the + * buffer size is ignored and the size of the current buffer is unchanged. + * If the address is non-zero and differs from the current buffer address the + * VPU will immediately switch data collection to the new buffer. + */ + u64 buffer_addr; + u64 buffer_size; + /** + * Address and size of the next buffer to write metric data after the initial + * buffer is full. If the address is NULL the VPU will stop collecting metric + * data but will continue to record dropped samples. + * + * Note that there is a hazard possible if both buffer_addr and the next_buffer_addr + * are non-zero in same update request. It is the host's responsibility to ensure + * that both addresses make sense even if the VPU just switched to writing samples + * from the current to the next buffer. + */ + u64 next_buffer_addr; + u64 next_buffer_size; +}; + +struct vpu_ipc_msg_payload_blob_deinit { + /* 64-bit unique ID for the blob to be de-initialized. */ + u64 blob_id; +}; + +struct vpu_ipc_msg_payload_job_done { + /* Engine to which the job was submitted. */ + u32 engine_idx; + /* Index of the doorbell to which the job was submitted */ + u32 db_idx; + /* ID of the completed job */ + u32 job_id; + /* Status of the completed job */ + u32 job_status; + /* Host SSID */ + u32 host_ssid; + /* Zero Padding */ + u32 reserved_0; + /* Command queue id */ + u64 cmdq_id; +}; + +struct vpu_jsm_engine_reset_context { + /* Host SSID */ + u32 host_ssid; + /* Zero Padding */ + u32 reserved_0; + /* Command queue id */ + u64 cmdq_id; + /* Flags: 0: cause of hang; 1: collateral damage of reset */ + u64 flags; +}; + +struct vpu_ipc_msg_payload_engine_reset_done { + /* Engine ordinal */ + u32 engine_idx; + /* Number of impacted contexts */ + u32 num_impacted_contexts; + /* Array of impacted command queue ids and their flags */ + struct vpu_jsm_engine_reset_context + impacted_contexts[VPU_MAX_ENGINE_RESET_IMPACTED_CONTEXTS]; +}; + +struct vpu_ipc_msg_payload_engine_preempt_done { + /* Engine preempted. */ + u32 engine_idx; + /* ID of the preemption request. */ + u32 preempt_id; +}; + +/** + * Response structure for register doorbell command for both OS + * and HW scheduling. + * @see VPU_JSM_MSG_REGISTER_DB + * @see VPU_JSM_MSG_HWS_REGISTER_DB + */ +struct vpu_ipc_msg_payload_register_db_done { + /* Index of the registered doorbell. */ + u32 db_idx; + /* Reserved */ + u32 reserved_0; +}; + +/** + * Response structure for unregister doorbell command for both OS + * and HW scheduling. + * @see VPU_JSM_MSG_UNREGISTER_DB + */ +struct vpu_ipc_msg_payload_unregister_db_done { + /* Index of the unregistered doorbell. */ + u32 db_idx; + /* Reserved */ + u32 reserved_0; +}; + +struct vpu_ipc_msg_payload_query_engine_hb_done { + /* Engine returning heartbeat value. */ + u32 engine_idx; + /* Reserved */ + u32 reserved_0; + /* Heartbeat value. */ + u64 heartbeat; +}; + +struct vpu_ipc_msg_payload_get_power_level_count_done { + /** + * Number of supported power levels. The maximum possible + * value of power_level_count is 16 but this may vary across + * implementations. + */ + u32 power_level_count; + /* Reserved */ + u32 reserved_0; + /** + * Power consumption limit for each supported power level in + * [0-100%] range relative to power level 0. + */ + u8 power_limit[16]; +}; + +struct vpu_ipc_msg_payload_blob_deinit_done { + /* 64-bit unique ID for the blob de-initialized. */ + u64 blob_id; +}; + +/* HWS priority band setup request / response */ +struct vpu_ipc_msg_payload_hws_priority_band_setup { + /* + * Grace period in 100ns units when preempting another priority band for + * this priority band + */ + u32 grace_period[VPU_HWS_NUM_PRIORITY_BANDS]; + /* + * Default quantum in 100ns units for scheduling across processes + * within a priority band + */ + u64 process_quantum[VPU_HWS_NUM_PRIORITY_BANDS]; + /* + * Default grace period in 100ns units for processes that preempt each + * other within a priority band + */ + u64 process_grace_period[VPU_HWS_NUM_PRIORITY_BANDS]; + /* + * For normal priority band, specifies the target VPU percentage + * in situations when it's starved by the focus band. + */ + u32 normal_band_percentage; + /* Reserved */ + u32 reserved_0; +}; + +/* HWS create command queue request */ +struct vpu_ipc_msg_payload_hws_create_cmdq { + /* Process id */ + u64 process_id; + /* Host SSID */ + u32 host_ssid; + /* Zero Padding */ + u32 reserved; + /* Command queue id */ + u64 cmdq_id; + /* Command queue base */ + u64 cmdq_base; + /* Command queue size */ + u32 cmdq_size; + /* Reserved */ + u32 reserved_0; +}; + +/* HWS create command queue response */ +struct vpu_ipc_msg_payload_hws_create_cmdq_rsp { + /* Process id */ + u64 process_id; + /* Host SSID */ + u32 host_ssid; + /* Zero Padding */ + u32 reserved; + /* Command queue id */ + u64 cmdq_id; +}; + +/* HWS destroy command queue request / response */ +struct vpu_ipc_msg_payload_hws_destroy_cmdq { + /* Host SSID */ + u32 host_ssid; + /* Zero Padding */ + u32 reserved; + /* Command queue id */ + u64 cmdq_id; +}; + +/* HWS set context scheduling properties request / response */ +struct vpu_ipc_msg_payload_hws_set_context_sched_properties { + /* Host SSID */ + u32 host_ssid; + /* Zero Padding */ + u32 reserved_0; + /* Command queue id */ + u64 cmdq_id; + /* Priority band to assign to work of this context */ + u32 priority_band; + /* Inside realtime band assigns a further priority */ + u32 realtime_priority_level; + /* Priority relative to other contexts in the same process */ + u32 in_process_priority; + /* Zero padding / Reserved */ + u32 reserved_1; + /* Context quantum relative to other contexts of same priority in the same process */ + u64 context_quantum; + /* Grace period when preempting context of the same priority within the same process */ + u64 grace_period_same_priority; + /* Grace period when preempting context of a lower priority within the same process */ + u64 grace_period_lower_priority; +}; + +/* + * @brief Register doorbell command structure. + * This structure supports doorbell registration for both HW and OS scheduling. + * Note: Queue base and size are added here so that the same structure can be used for + * OS scheduling and HW scheduling. For OS scheduling, cmdq_id will be ignored + * and cmdq_base and cmdq_size will be used. For HW scheduling, cmdq_base and cmdq_size will be + * ignored and cmdq_id is used. + * @see VPU_JSM_MSG_HWS_REGISTER_DB + */ +struct vpu_jsm_hws_register_db { + /* Index of the doorbell to register. */ + u32 db_id; + /* Host sub-stream ID for the context assigned to the doorbell. */ + u32 host_ssid; + /* ID of the command queue associated with the doorbell. */ + u64 cmdq_id; + /* Virtual address pointing to the start of command queue. */ + u64 cmdq_base; + /* Size of the command queue in bytes. */ + u64 cmdq_size; +}; + +/** + * Payload for VPU_JSM_MSG_TRACE_SET_CONFIG[_RSP] and + * VPU_JSM_MSG_TRACE_GET_CONFIG_RSP messages. + * + * The payload is interpreted differently depending on the type of message: + * + * - For VPU_JSM_MSG_TRACE_SET_CONFIG, the payload specifies the desired + * logging configuration to be set. + * + * - For VPU_JSM_MSG_TRACE_SET_CONFIG_RSP, the payload reports the logging + * configuration that was set after a VPU_JSM_MSG_TRACE_SET_CONFIG request. + * The host can compare this payload with the one it sent in the + * VPU_JSM_MSG_TRACE_SET_CONFIG request to check whether or not the + * configuration was set as desired. + * + * - VPU_JSM_MSG_TRACE_GET_CONFIG_RSP, the payload reports the current logging + * configuration. + */ +struct vpu_ipc_msg_payload_trace_config { + /** + * Logging level (currently set or to be set); see 'mvLog_t' enum for + * acceptable values. The specified logging level applies to all + * destinations and HW components + */ + u32 trace_level; + /** + * Bitmask of logging destinations (currently enabled or to be enabled); + * bitwise OR of values defined in logging_destination enum. + */ + u32 trace_destination_mask; + /** + * Bitmask of loggable HW components (currently enabled or to be enabled); + * bitwise OR of values defined in loggable_hw_component enum. + */ + u64 trace_hw_component_mask; + u64 reserved_0; /**< Reserved for future extensions. */ +}; + +/** + * Payload for VPU_JSM_MSG_TRACE_GET_CAPABILITY_RSP messages. + */ +struct vpu_ipc_msg_payload_trace_capability_rsp { + u32 trace_destination_mask; /**< Bitmask of supported logging destinations. */ + u32 reserved_0; + u64 trace_hw_component_mask; /**< Bitmask of supported loggable HW components. */ + u64 reserved_1; /**< Reserved for future extensions. */ +}; + +/** + * Payload for VPU_JSM_MSG_TRACE_GET_NAME requests. + */ +struct vpu_ipc_msg_payload_trace_get_name { + /** + * The type of the entity to query name for; see logging_entity_type for + * possible values. + */ + u32 entity_type; + u32 reserved_0; + /** + * The ID of the entity to query name for; possible values depends on the + * entity type. + */ + u64 entity_id; +}; + +/** + * Payload for VPU_JSM_MSG_TRACE_GET_NAME_RSP responses. + */ +struct vpu_ipc_msg_payload_trace_get_name_rsp { + /** + * The type of the entity whose name was queried; see logging_entity_type + * for possible values. + */ + u32 entity_type; + u32 reserved_0; + /** + * The ID of the entity whose name was queried; possible values depends on + * the entity type. + */ + u64 entity_id; + /** Reserved for future extensions. */ + u64 reserved_1; + /** The name of the entity. */ + char entity_name[VPU_TRACE_ENTITY_NAME_MAX_LEN]; +}; + +/** + * Data sent from the VPU to the host in all metric streamer response messages + * and in asynchronous notification. + * @see VPU_JSM_MSG_METRIC_STREAMER_START_DONE + * @see VPU_JSM_MSG_METRIC_STREAMER_STOP_DONE + * @see VPU_JSM_MSG_METRIC_STREAMER_UPDATE_DONE + * @see VPU_JSM_MSG_METRIC_STREAMER_INFO_DONE + * @see VPU_JSM_MSG_METRIC_STREAMER_NOTIFICATION + */ +struct vpu_jsm_metric_streamer_done { + /** Metric group mask that identifies metric streamer instance. */ + u64 metric_group_mask; + /** + * Size in bytes of single sample - total size of all enabled counters. + * Some VPU implementations may align sample_size to more than 8 bytes. + */ + u32 sample_size; + u32 reserved_0; + /** + * Number of samples collected since the metric streamer was started. + * This will be 0 if the metric streamer was not started. + */ + u32 samples_collected; + /** + * Number of samples dropped since the metric streamer was started. This + * is incremented every time the metric streamer is not able to write + * collected samples because the current buffer is full and there is no + * next buffer to switch to. + */ + u32 samples_dropped; + /** Address of the buffer that contains the latest metric data. */ + u64 buffer_addr; + /** + * Number of bytes written into the metric data buffer. In response to the + * VPU_JSM_MSG_METRIC_STREAMER_INFO request this field contains the size of + * all group and counter descriptors. The size is updated even if the buffer + * in the request was NULL or too small to hold descriptors of all counters + */ + u64 bytes_written; +}; + +/** + * Metric group description placed in the metric buffer after successful completion + * of the VPU_JSM_MSG_METRIC_STREAMER_INFO command. This is followed by one or more + * @vpu_jsm_metric_counter_descriptor records. + * @see VPU_JSM_MSG_METRIC_STREAMER_INFO + */ +struct vpu_jsm_metric_group_descriptor { + /** + * Offset to the next metric group (8-byte aligned). If this offset is 0 this + * is the last descriptor. The value of metric_info_size must be greater than + * or equal to sizeof(struct vpu_jsm_metric_group_descriptor) + name_string_size + * + description_string_size and must be 8-byte aligned. + */ + u32 next_metric_group_info_offset; + /** + * Offset to the first metric counter description record (8-byte aligned). + * @see vpu_jsm_metric_counter_descriptor + */ + u32 next_metric_counter_info_offset; + /** Index of the group. This corresponds to bit index in metric_group_mask. */ + u32 group_id; + /** Number of counters in the metric group. */ + u32 num_counters; + /** Data size for all counters, must be a multiple of 8 bytes.*/ + u32 metric_group_data_size; + /** + * Metric group domain number. Cannot use multiple, simultaneous metric groups + * from the same domain. + */ + u32 domain; + /** + * Counter name string size. The string must include a null termination character. + * The FW may use a fixed size name or send a different name for each counter. + * If the VPU uses fixed size strings, all characters from the end of the name + * to the of the fixed size character array must be zeroed. + */ + u32 name_string_size; + /** Counter description string size, @see name_string_size */ + u32 description_string_size; + u64 reserved_0; + /** + * Right after this structure, the VPU writes name and description of + * the metric group. + */ +}; + +/** + * Metric counter description, placed in the buffer after vpu_jsm_metric_group_descriptor. + * @see VPU_JSM_MSG_METRIC_STREAMER_INFO + */ +struct vpu_jsm_metric_counter_descriptor { + /** + * Offset to the next counter in a group (8-byte aligned). If this offset is + * 0 this is the last counter in the group. + */ + u32 next_metric_counter_info_offset; + /** + * Offset to the counter data from the start of samples in this metric group. + * Note that metric_data_offset % metric_data_size must be 0. + */ + u32 metric_data_offset; + /** Size of the metric counter data in bytes. */ + u32 metric_data_size; + /** Metric type, see Level Zero API for definitions. */ + u32 tier; + /** Metric type, see set_metric_type_t for definitions. */ + u32 metric_type; + /** Metric type, see set_value_type_t for definitions. */ + u32 metric_value_type; + /** + * Counter name string size. The string must include a null termination character. + * The FW may use a fixed size name or send a different name for each counter. + * If the VPU uses fixed size strings, all characters from the end of the name + * to the of the fixed size character array must be zeroed. + */ + u32 name_string_size; + /** Counter description string size, @see name_string_size */ + u32 description_string_size; + /** Counter component name string size, @see name_string_size */ + u32 component_string_size; + /** Counter string size, @see name_string_size */ + u32 units_string_size; + u64 reserved_0; + /** + * Right after this structure, the VPU writes name, description + * component and unit strings. + */ +}; + +/** + * Payload for VPU_JSM_MSG_DYNDBG_CONTROL requests. + * + * VPU_JSM_MSG_DYNDBG_CONTROL are used to control the VPU FW Dynamic Debug + * feature, which allows developers to selectively enable / disable MVLOG_DEBUG + * messages. This is equivalent to the Dynamic Debug functionality provided by + * Linux + * (https://www.kernel.org/doc/html/latest/admin-guide/dynamic-debug-howto.html) + * The host can control Dynamic Debug behavior by sending dyndbg commands, which + * have the same syntax as Linux + * dyndbg commands. + * + * NOTE: in order for MVLOG_DEBUG messages to be actually printed, the host + * still has to set the logging level to MVLOG_DEBUG, using the + * VPU_JSM_MSG_TRACE_SET_CONFIG command. + * + * The host can see the current dynamic debug configuration by executing a + * special 'show' command. The dyndbg configuration will be printed to the + * configured logging destination using MVLOG_INFO logging level. + */ +struct vpu_ipc_msg_payload_dyndbg_control { + /** + * Dyndbg command (same format as Linux dyndbg); must be a NULL-terminated + * string. + */ + char dyndbg_cmd[VPU_DYNDBG_CMD_MAX_LEN]; +}; + +/* + * Payloads union, used to define complete message format. + */ +union vpu_ipc_msg_payload { + struct vpu_ipc_msg_payload_engine_reset engine_reset; + struct vpu_ipc_msg_payload_engine_preempt engine_preempt; + struct vpu_ipc_msg_payload_register_db register_db; + struct vpu_ipc_msg_payload_unregister_db unregister_db; + struct vpu_ipc_msg_payload_query_engine_hb query_engine_hb; + struct vpu_ipc_msg_payload_power_level power_level; + struct vpu_jsm_metric_streamer_start metric_streamer_start; + struct vpu_jsm_metric_streamer_stop metric_streamer_stop; + struct vpu_jsm_metric_streamer_update metric_streamer_update; + struct vpu_ipc_msg_payload_blob_deinit blob_deinit; + struct vpu_ipc_msg_payload_ssid_release ssid_release; + struct vpu_jsm_hws_register_db hws_register_db; + struct vpu_ipc_msg_payload_job_done job_done; + struct vpu_ipc_msg_payload_engine_reset_done engine_reset_done; + struct vpu_ipc_msg_payload_engine_preempt_done engine_preempt_done; + struct vpu_ipc_msg_payload_register_db_done register_db_done; + struct vpu_ipc_msg_payload_unregister_db_done unregister_db_done; + struct vpu_ipc_msg_payload_query_engine_hb_done query_engine_hb_done; + struct vpu_ipc_msg_payload_get_power_level_count_done get_power_level_count_done; + struct vpu_jsm_metric_streamer_done metric_streamer_done; + struct vpu_ipc_msg_payload_blob_deinit_done blob_deinit_done; + struct vpu_ipc_msg_payload_trace_config trace_config; + struct vpu_ipc_msg_payload_trace_capability_rsp trace_capability; + struct vpu_ipc_msg_payload_trace_get_name trace_get_name; + struct vpu_ipc_msg_payload_trace_get_name_rsp trace_get_name_rsp; + struct vpu_ipc_msg_payload_dyndbg_control dyndbg_control; + struct vpu_ipc_msg_payload_hws_priority_band_setup hws_priority_band_setup; + struct vpu_ipc_msg_payload_hws_create_cmdq hws_create_cmdq; + struct vpu_ipc_msg_payload_hws_create_cmdq_rsp hws_create_cmdq_rsp; + struct vpu_ipc_msg_payload_hws_destroy_cmdq hws_destroy_cmdq; + struct vpu_ipc_msg_payload_hws_set_context_sched_properties + hws_set_context_sched_properties; +}; + +/* + * Host <-> LRT IPC message base structure. + * + * NOTE: All instances of this object must be aligned on a 64B boundary + * to allow proper handling of VPU cache operations. + */ +struct vpu_jsm_msg { + /* Reserved */ + u64 reserved_0; + /* Message type, see vpu_ipc_msg_type enum. */ + u32 type; + /* Buffer status, see vpu_ipc_msg_status enum. */ + u32 status; + /* + * Request ID, provided by the host in a request message and passed + * back by VPU in the response message. + */ + u32 request_id; + /* Request return code set by the VPU, see VPU_JSM_STATUS_* defines. */ + u32 result; + u64 reserved_1; + /* Message payload depending on message type, see vpu_ipc_msg_payload union. */ + union vpu_ipc_msg_payload payload; +}; + +#pragma pack(pop) + +#endif + +///@} |