// SPDX-License-Identifier: MIT /* * Copyright © 2023 Intel Corporation */ #include "xe_gsc.h" #include #include #include #include "abi/gsc_mkhi_commands_abi.h" #include "xe_bb.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_exec_queue.h" #include "xe_force_wake.h" #include "xe_gsc_proxy.h" #include "xe_gsc_submit.h" #include "xe_gt.h" #include "xe_gt_mcr.h" #include "xe_gt_printk.h" #include "xe_guc_pc.h" #include "xe_huc.h" #include "xe_map.h" #include "xe_mmio.h" #include "xe_pm.h" #include "xe_sched_job.h" #include "xe_uc_fw.h" #include "xe_wa.h" #include "instructions/xe_gsc_commands.h" #include "regs/xe_gsc_regs.h" #include "regs/xe_gt_regs.h" static struct xe_gt * gsc_to_gt(struct xe_gsc *gsc) { return container_of(gsc, struct xe_gt, uc.gsc); } static int memcpy_fw(struct xe_gsc *gsc) { struct xe_gt *gt = gsc_to_gt(gsc); struct xe_device *xe = gt_to_xe(gt); u32 fw_size = gsc->fw.size; void *storage; /* * FIXME: xe_migrate_copy does not work with stolen mem yet, so we use * a memcpy for now. */ storage = kmalloc(fw_size, GFP_KERNEL); if (!storage) return -ENOMEM; xe_map_memcpy_from(xe, storage, &gsc->fw.bo->vmap, 0, fw_size); xe_map_memcpy_to(xe, &gsc->private->vmap, 0, storage, fw_size); xe_map_memset(xe, &gsc->private->vmap, fw_size, 0, gsc->private->size - fw_size); kfree(storage); return 0; } static int emit_gsc_upload(struct xe_gsc *gsc) { struct xe_gt *gt = gsc_to_gt(gsc); u64 offset = xe_bo_ggtt_addr(gsc->private); struct xe_bb *bb; struct xe_sched_job *job; struct dma_fence *fence; long timeout; bb = xe_bb_new(gt, 4, false); if (IS_ERR(bb)) return PTR_ERR(bb); bb->cs[bb->len++] = GSC_FW_LOAD; bb->cs[bb->len++] = lower_32_bits(offset); bb->cs[bb->len++] = upper_32_bits(offset); bb->cs[bb->len++] = (gsc->private->size / SZ_4K) | GSC_FW_LOAD_LIMIT_VALID; job = xe_bb_create_job(gsc->q, bb); if (IS_ERR(job)) { xe_bb_free(bb, NULL); return PTR_ERR(job); } xe_sched_job_arm(job); fence = dma_fence_get(&job->drm.s_fence->finished); xe_sched_job_push(job); timeout = dma_fence_wait_timeout(fence, false, HZ); dma_fence_put(fence); xe_bb_free(bb, NULL); if (timeout < 0) return timeout; else if (!timeout) return -ETIME; return 0; } #define version_query_wr(xe_, map_, offset_, field_, val_) \ xe_map_wr_field(xe_, map_, offset_, struct gsc_get_compatibility_version_in, field_, val_) #define version_query_rd(xe_, map_, offset_, field_) \ xe_map_rd_field(xe_, map_, offset_, struct gsc_get_compatibility_version_out, field_) static u32 emit_version_query_msg(struct xe_device *xe, struct iosys_map *map, u32 wr_offset) { xe_map_memset(xe, map, wr_offset, 0, sizeof(struct gsc_get_compatibility_version_in)); version_query_wr(xe, map, wr_offset, header.group_id, MKHI_GROUP_ID_GFX_SRV); version_query_wr(xe, map, wr_offset, header.command, MKHI_GFX_SRV_GET_HOST_COMPATIBILITY_VERSION); return wr_offset + sizeof(struct gsc_get_compatibility_version_in); } #define GSC_VER_PKT_SZ SZ_4K /* 4K each for input and output */ static int query_compatibility_version(struct xe_gsc *gsc) { struct xe_uc_fw_version *compat = &gsc->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY]; struct xe_gt *gt = gsc_to_gt(gsc); struct xe_tile *tile = gt_to_tile(gt); struct xe_device *xe = gt_to_xe(gt); struct xe_bo *bo; u32 wr_offset; u32 rd_offset; u64 ggtt_offset; int err; bo = xe_bo_create_pin_map(xe, tile, NULL, GSC_VER_PKT_SZ * 2, ttm_bo_type_kernel, XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT); if (IS_ERR(bo)) { xe_gt_err(gt, "failed to allocate bo for GSC version query\n"); return PTR_ERR(bo); } ggtt_offset = xe_bo_ggtt_addr(bo); wr_offset = xe_gsc_emit_header(xe, &bo->vmap, 0, HECI_MEADDRESS_MKHI, 0, sizeof(struct gsc_get_compatibility_version_in)); wr_offset = emit_version_query_msg(xe, &bo->vmap, wr_offset); err = xe_gsc_pkt_submit_kernel(gsc, ggtt_offset, wr_offset, ggtt_offset + GSC_VER_PKT_SZ, GSC_VER_PKT_SZ); if (err) { xe_gt_err(gt, "failed to submit GSC request for compatibility version: %d\n", err); goto out_bo; } err = xe_gsc_read_out_header(xe, &bo->vmap, GSC_VER_PKT_SZ, sizeof(struct gsc_get_compatibility_version_out), &rd_offset); if (err) { xe_gt_err(gt, "HuC: invalid GSC reply for version query (err=%d)\n", err); return err; } compat->major = version_query_rd(xe, &bo->vmap, rd_offset, compat_major); compat->minor = version_query_rd(xe, &bo->vmap, rd_offset, compat_minor); xe_gt_info(gt, "found GSC cv%u.%u\n", compat->major, compat->minor); out_bo: xe_bo_unpin_map_no_vm(bo); return err; } static int gsc_fw_is_loaded(struct xe_gt *gt) { return xe_mmio_read32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE)) & HECI1_FWSTS1_INIT_COMPLETE; } static int gsc_fw_wait(struct xe_gt *gt) { /* * GSC load can take up to 250ms from the moment the instruction is * executed by the GSCCS. To account for possible submission delays or * other issues, we use a 500ms timeout in the wait here. */ return xe_mmio_wait32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE), HECI1_FWSTS1_INIT_COMPLETE, HECI1_FWSTS1_INIT_COMPLETE, 500 * USEC_PER_MSEC, NULL, false); } static int gsc_upload(struct xe_gsc *gsc) { struct xe_gt *gt = gsc_to_gt(gsc); struct xe_device *xe = gt_to_xe(gt); int err; /* we should only be here if the init step were successful */ xe_assert(xe, xe_uc_fw_is_loadable(&gsc->fw) && gsc->q); if (gsc_fw_is_loaded(gt)) { xe_gt_err(gt, "GSC already loaded at upload time\n"); return -EEXIST; } err = memcpy_fw(gsc); if (err) { xe_gt_err(gt, "Failed to memcpy GSC FW\n"); return err; } /* * GSC is only killed by an FLR, so we need to trigger one on unload to * make sure we stop it. This is because we assign a chunk of memory to * the GSC as part of the FW load, so we need to make sure it stops * using it when we release it to the system on driver unload. Note that * this is not a problem of the unload per-se, because the GSC will not * touch that memory unless there are requests for it coming from the * driver; therefore, no accesses will happen while Xe is not loaded, * but if we re-load the driver then the GSC might wake up and try to * access that old memory location again. * Given that an FLR is a very disruptive action (see the FLR function * for details), we want to do it as the last action before releasing * the access to the MMIO bar, which means we need to do it as part of * mmio cleanup. */ xe->needs_flr_on_fini = true; err = emit_gsc_upload(gsc); if (err) { xe_gt_err(gt, "Failed to emit GSC FW upload (%pe)\n", ERR_PTR(err)); return err; } err = gsc_fw_wait(gt); if (err) { xe_gt_err(gt, "Failed to wait for GSC load (%pe)\n", ERR_PTR(err)); return err; } err = query_compatibility_version(gsc); if (err) return err; err = xe_uc_fw_check_version_requirements(&gsc->fw); if (err) return err; return 0; } static int gsc_upload_and_init(struct xe_gsc *gsc) { struct xe_gt *gt = gsc_to_gt(gsc); struct xe_tile *tile = gt_to_tile(gt); int ret; if (XE_WA(gt, 14018094691)) { ret = xe_force_wake_get(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL); /* * If the forcewake fails we want to keep going, because the worst * case outcome in failing to apply the WA is that PXP won't work, * which is not fatal. We still throw a warning so the issue is * seen if it happens. */ xe_gt_WARN_ON(tile->primary_gt, ret); xe_gt_mcr_multicast_write(tile->primary_gt, EU_SYSTOLIC_LIC_THROTTLE_CTL_WITH_LOCK, EU_SYSTOLIC_LIC_THROTTLE_CTL_LOCK_BIT); } ret = gsc_upload(gsc); if (XE_WA(gt, 14018094691)) xe_force_wake_put(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL); if (ret) return ret; xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED); /* GSC load is done, restore expected GT frequencies */ xe_gt_sanitize_freq(gt); xe_gt_dbg(gt, "GSC FW async load completed\n"); /* HuC auth failure is not fatal */ if (xe_huc_is_authenticated(>->uc.huc, XE_HUC_AUTH_VIA_GUC)) xe_huc_auth(>->uc.huc, XE_HUC_AUTH_VIA_GSC); ret = xe_gsc_proxy_start(gsc); if (ret) return ret; xe_gt_dbg(gt, "GSC proxy init completed\n"); return 0; } static int gsc_er_complete(struct xe_gt *gt) { u32 er_status; if (!gsc_fw_is_loaded(gt)) return 0; /* * Starting on Xe2, the GSCCS engine reset is a 2-step process. When the * driver or the GuC hit the GDRST register, the CS is immediately reset * and a success is reported, but the GSC shim keeps resetting in the * background. While the shim reset is ongoing, the CS is able to accept * new context submission, but any commands that require the shim will * be stalled until the reset is completed. This means that we can keep * submitting to the GSCCS as long as we make sure that the preemption * timeout is big enough to cover any delay introduced by the reset. * When the shim reset completes, a specific CS interrupt is triggered, * in response to which we need to check the GSCI_TIMER_STATUS register * to see if the reset was successful or not. * Note that the GSCI_TIMER_STATUS register is not power save/restored, * so it gets reset on MC6 entry. However, a reset failure stops MC6, * so in that scenario we're always guaranteed to find the correct * value. */ er_status = xe_mmio_read32(gt, GSCI_TIMER_STATUS) & GSCI_TIMER_STATUS_VALUE; if (er_status == GSCI_TIMER_STATUS_TIMER_EXPIRED) { /* * XXX: we should trigger an FLR here, but we don't have support * for that yet. */ xe_gt_err(gt, "GSC ER timed out!\n"); return -EIO; } return 0; } static void gsc_work(struct work_struct *work) { struct xe_gsc *gsc = container_of(work, typeof(*gsc), work); struct xe_gt *gt = gsc_to_gt(gsc); struct xe_device *xe = gt_to_xe(gt); u32 actions; int ret; spin_lock_irq(&gsc->lock); actions = gsc->work_actions; gsc->work_actions = 0; spin_unlock_irq(&gsc->lock); xe_pm_runtime_get(xe); xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC)); if (actions & GSC_ACTION_ER_COMPLETE) { ret = gsc_er_complete(gt); if (ret) goto out; } if (actions & GSC_ACTION_FW_LOAD) { ret = gsc_upload_and_init(gsc); if (ret && ret != -EEXIST) xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOAD_FAIL); else xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_RUNNING); } if (actions & GSC_ACTION_SW_PROXY) xe_gsc_proxy_request_handler(gsc); out: xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC); xe_pm_runtime_put(xe); } void xe_gsc_hwe_irq_handler(struct xe_hw_engine *hwe, u16 intr_vec) { struct xe_gt *gt = hwe->gt; struct xe_gsc *gsc = >->uc.gsc; if (unlikely(!intr_vec)) return; if (intr_vec & GSC_ER_COMPLETE) { spin_lock(&gsc->lock); gsc->work_actions |= GSC_ACTION_ER_COMPLETE; spin_unlock(&gsc->lock); queue_work(gsc->wq, &gsc->work); } } int xe_gsc_init(struct xe_gsc *gsc) { struct xe_gt *gt = gsc_to_gt(gsc); struct xe_tile *tile = gt_to_tile(gt); int ret; gsc->fw.type = XE_UC_FW_TYPE_GSC; INIT_WORK(&gsc->work, gsc_work); spin_lock_init(&gsc->lock); /* The GSC uC is only available on the media GT */ if (tile->media_gt && (gt != tile->media_gt)) { xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_NOT_SUPPORTED); return 0; } /* * Some platforms can have GuC but not GSC. That would cause * xe_uc_fw_init(gsc) to return a "not supported" failure code and abort * all firmware loading. So check for GSC being enabled before * propagating the failure back up. That way the higher level will keep * going and load GuC as appropriate. */ ret = xe_uc_fw_init(&gsc->fw); if (!xe_uc_fw_is_enabled(&gsc->fw)) return 0; else if (ret) goto out; ret = xe_gsc_proxy_init(gsc); if (ret && ret != -ENODEV) goto out; return 0; out: xe_gt_err(gt, "GSC init failed with %d", ret); return ret; } static void free_resources(struct drm_device *drm, void *arg) { struct xe_gsc *gsc = arg; if (gsc->wq) { destroy_workqueue(gsc->wq); gsc->wq = NULL; } if (gsc->q) { xe_exec_queue_put(gsc->q); gsc->q = NULL; } if (gsc->private) { xe_bo_unpin_map_no_vm(gsc->private); gsc->private = NULL; } } int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc) { struct xe_gt *gt = gsc_to_gt(gsc); struct xe_tile *tile = gt_to_tile(gt); struct xe_device *xe = gt_to_xe(gt); struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, XE_ENGINE_CLASS_OTHER, 0, true); struct xe_exec_queue *q; struct workqueue_struct *wq; struct xe_bo *bo; int err; if (!xe_uc_fw_is_available(&gsc->fw)) return 0; if (!hwe) return -ENODEV; bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4M, ttm_bo_type_kernel, XE_BO_FLAG_STOLEN | XE_BO_FLAG_GGTT); if (IS_ERR(bo)) return PTR_ERR(bo); q = xe_exec_queue_create(xe, NULL, BIT(hwe->logical_instance), 1, hwe, EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_PERMANENT, 0); if (IS_ERR(q)) { xe_gt_err(gt, "Failed to create queue for GSC submission\n"); err = PTR_ERR(q); goto out_bo; } wq = alloc_ordered_workqueue("gsc-ordered-wq", 0); if (!wq) { err = -ENOMEM; goto out_q; } gsc->private = bo; gsc->q = q; gsc->wq = wq; err = drmm_add_action_or_reset(&xe->drm, free_resources, gsc); if (err) return err; xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOADABLE); return 0; out_q: xe_exec_queue_put(q); out_bo: xe_bo_unpin_map_no_vm(bo); return err; } void xe_gsc_load_start(struct xe_gsc *gsc) { struct xe_gt *gt = gsc_to_gt(gsc); if (!xe_uc_fw_is_loadable(&gsc->fw) || !gsc->q) return; /* GSC FW survives GT reset and D3Hot */ if (gsc_fw_is_loaded(gt)) { xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED); return; } spin_lock_irq(&gsc->lock); gsc->work_actions |= GSC_ACTION_FW_LOAD; spin_unlock_irq(&gsc->lock); queue_work(gsc->wq, &gsc->work); } void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc) { if (xe_uc_fw_is_loadable(&gsc->fw) && gsc->wq) flush_work(&gsc->work); } /** * xe_gsc_remove() - Clean up the GSC structures before driver removal * @gsc: the GSC uC */ void xe_gsc_remove(struct xe_gsc *gsc) { xe_gsc_proxy_remove(gsc); } /* * wa_14015076503: if the GSC FW is loaded, we need to alert it before doing a * GSC engine reset by writing a notification bit in the GS1 register and then * triggering an interrupt to GSC; from the interrupt it will take up to 200ms * for the FW to get prepare for the reset, so we need to wait for that amount * of time. * After the reset is complete we need to then clear the GS1 register. */ void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep) { u32 gs1_set = prep ? HECI_H_GS1_ER_PREP : 0; u32 gs1_clr = prep ? 0 : HECI_H_GS1_ER_PREP; /* WA only applies if the GSC is loaded */ if (!XE_WA(gt, 14015076503) || !gsc_fw_is_loaded(gt)) return; xe_mmio_rmw32(gt, HECI_H_GS1(MTL_GSC_HECI2_BASE), gs1_clr, gs1_set); if (prep) { /* make sure the reset bit is clear when writing the CSR reg */ xe_mmio_rmw32(gt, HECI_H_CSR(MTL_GSC_HECI2_BASE), HECI_H_CSR_RST, HECI_H_CSR_IG); msleep(200); } }