diff options
author | Dave Airlie <airlied@redhat.com> | 2024-08-30 05:41:05 +0200 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2024-08-30 05:41:06 +0200 |
commit | 8bdb468dd7a5d17f8556afdd4c8d046939ff965f (patch) | |
tree | b3c579119359e8161bf0956431d5ab33d215eecd /drivers/gpu | |
parent | Merge tag 'drm-misc-next-2024-08-29' of https://gitlab.freedesktop.org/drm/mi... (diff) | |
parent | drm/xe/bmg: Drop force_probe requirement (diff) | |
download | linux-8bdb468dd7a5d17f8556afdd4c8d046939ff965f.tar.xz linux-8bdb468dd7a5d17f8556afdd4c8d046939ff965f.zip |
Merge tag 'drm-xe-next-2024-08-28' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-next
UAPI Changes:
- Fix OA format masks which were breaking build with gcc-5
Cross-subsystem Changes:
Driver Changes:
- Use dma_fence_chain_free in chain fence unused as a sync (Matthew Brost)
- Refactor hw engine lookup and mmio access to be used in more places
(Dominik, Matt Auld, Mika Kuoppala)
- Enable priority mem read for Xe2 and later (Pallavi Mishra)
- Fix PL1 disable flow in xe_hwmon_power_max_write (Karthik)
- Fix refcount and speedup devcoredump (Matthew Brost)
- Add performance tuning changes to Xe2 (Akshata, Shekhar)
- Fix OA sysfs entry (Ashutosh)
- Add first GuC firmware support for BMG (Julia)
- Bump minimum GuC firmware for platforms under force_probe to match LNL
and BMG (Julia)
- Fix access check on user fence creation (Nirmoy)
- Add/document workarounds for Xe2 (Julia, Daniele, John, Tejas)
- Document workaround and use proper WA infra (Matt Roper)
- Fix VF configuration on media GT (Michal Wajdeczko)
- Fix VM dma-resv lock (Matthew Brost)
- Allow suspend/resume exec queue backend op to be called multiple times
(Matthew Brost)
- Add GT stats to debugfs (Nirmoy)
- Add hwconfig to debugfs (Matt Roper)
- Compile out all debugfs code with ONFIG_DEUBG_FS=n (Lucas)
- Remove dead kunit code (Jani Nikula)
- Refactor drvdata storing to help display (Jani Nikula)
- Cleanup unsused xe parameter in pte handling (Himal)
- Rename s/enable_display/probe_display/ for clarity (Lucas)
- Fix missing MCR annotation in couple of registers (Tejas)
- Fix DGFX display suspend/resume (Maarten)
- Prepare exec_queue_kill for PXP handling (Daniele)
- Fix devm/drmm issues (Daniele, Matthew Brost)
- Fix tile and ggtt fini sequences (Matthew Brost)
- Fix crashes when probing without firmware in place (Daniele, Matthew Brost)
- Use xe_managed for kernel BOs (Daniele, Matthew Brost)
- Future-proof dss_per_group calculation by using hwconfig (Matt Roper)
- Use reserved copy engine for user binds on faulting devices
(Matthew Brost)
- Allow mixing dma-fence jobs and long-running faulting jobs (Francois)
- Cleanup redundant arg when creating use BO (Nirmoy)
- Prevent UAF around preempt fence (Auld)
- Fix display suspend/resume (Maarten)
- Use vma_pages() helper (Thorsten)
- Calculate pagefault queue size (Stuart, Matthew Auld)
- Fix missing pagefault wq destroy (Stuart)
- Fix lifetime handling of HW fence ctx (Matthew Brost)
- Fix order destroy order for jobs (Matthew Brost)
- Fix TLB invalidation for media GT (Matthew Brost)
- Document GGTT (Rodrigo Vivi)
- Refactor GGTT layering and fix runtime outer protection (Rodrigo Vivi)
- Handle HPD polling on display pm runtime suspend/resume (Imre, Vinod)
- Drop unrequired NULL checks (Apoorva, Himal)
- Use separate rpm lockdep map for non-d3cold-capable devices (Thomas Hellström)
- Support "nomodeset" kernel command-line option (Thomas Zimmermann)
- Drop force_probe requirement for LNL and BMG (Lucas, Balasubramani)
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/wd42jsh4i3q5zlrmi2cljejohdsrqc6hvtxf76lbxsp3ibrgmz@y54fa7wwxgsd
Diffstat (limited to 'drivers/gpu')
85 files changed, 2203 insertions, 834 deletions
diff --git a/drivers/gpu/drm/drm_print.c b/drivers/gpu/drm/drm_print.c index cf24dfdeb6b2..0081190201a7 100644 --- a/drivers/gpu/drm/drm_print.c +++ b/drivers/gpu/drm/drm_print.c @@ -100,8 +100,9 @@ void __drm_puts_coredump(struct drm_printer *p, const char *str) copy = iterator->remain; /* Copy out the bit of the string that we need */ - memcpy(iterator->data, - str + (iterator->start - iterator->offset), copy); + if (iterator->data) + memcpy(iterator->data, + str + (iterator->start - iterator->offset), copy); iterator->offset = iterator->start + copy; iterator->remain -= copy; @@ -110,7 +111,8 @@ void __drm_puts_coredump(struct drm_printer *p, const char *str) len = min_t(ssize_t, strlen(str), iterator->remain); - memcpy(iterator->data + pos, str, len); + if (iterator->data) + memcpy(iterator->data + pos, str, len); iterator->offset += len; iterator->remain -= len; @@ -140,8 +142,9 @@ void __drm_printfn_coredump(struct drm_printer *p, struct va_format *vaf) if ((iterator->offset >= iterator->start) && (len < iterator->remain)) { ssize_t pos = iterator->offset - iterator->start; - snprintf(((char *) iterator->data) + pos, - iterator->remain, "%pV", vaf); + if (iterator->data) + snprintf(((char *) iterator->data) + pos, + iterator->remain, "%pV", vaf); iterator->offset += len; iterator->remain -= len; diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c index 73a1918e2537..3a6d99044828 100644 --- a/drivers/gpu/drm/i915/display/intel_dpt.c +++ b/drivers/gpu/drm/i915/display/intel_dpt.c @@ -317,3 +317,7 @@ void intel_dpt_destroy(struct i915_address_space *vm) i915_vm_put(&dpt->vm); } +u64 intel_dpt_offset(struct i915_vma *dpt_vma) +{ + return dpt_vma->node.start; +} diff --git a/drivers/gpu/drm/i915/display/intel_dpt.h b/drivers/gpu/drm/i915/display/intel_dpt.h index ff18a525bfbe..1f88b0ee17e7 100644 --- a/drivers/gpu/drm/i915/display/intel_dpt.h +++ b/drivers/gpu/drm/i915/display/intel_dpt.h @@ -6,6 +6,8 @@ #ifndef __INTEL_DPT_H__ #define __INTEL_DPT_H__ +#include <linux/types.h> + struct drm_i915_private; struct i915_address_space; @@ -20,5 +22,6 @@ void intel_dpt_suspend(struct drm_i915_private *i915); void intel_dpt_resume(struct drm_i915_private *i915); struct i915_address_space * intel_dpt_create(struct intel_framebuffer *fb); +u64 intel_dpt_offset(struct i915_vma *dpt_vma); #endif /* __INTEL_DPT_H__ */ diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index ba5a628b4757..1cf1d5c8b9dc 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -14,6 +14,7 @@ #include "intel_de.h" #include "intel_display_irq.h" #include "intel_display_types.h" +#include "intel_dpt.h" #include "intel_fb.h" #include "intel_fbc.h" #include "intel_frontbuffer.h" @@ -1162,7 +1163,7 @@ static u32 skl_surf_address(const struct intel_plane_state *plane_state, * within the DPT is always 0. */ drm_WARN_ON(&i915->drm, plane_state->dpt_vma && - plane_state->dpt_vma->node.start); + intel_dpt_offset(plane_state->dpt_vma)); drm_WARN_ON(&i915->drm, offset & 0x1fffff); return offset >> 9; } else { diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 1ff9602a52f6..b9670ae09a9e 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -28,7 +28,6 @@ $(obj)/generated/%_wa_oob.c $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \ xe-y += xe_bb.o \ xe_bo.o \ xe_bo_evict.o \ - xe_debugfs.o \ xe_devcoredump.o \ xe_device.o \ xe_device_sysfs.o \ @@ -46,7 +45,6 @@ xe-y += xe_bb.o \ xe_gt.o \ xe_gt_ccs_mode.o \ xe_gt_clock.o \ - xe_gt_debugfs.o \ xe_gt_freq.o \ xe_gt_idle.o \ xe_gt_mcr.o \ @@ -59,7 +57,6 @@ xe-y += xe_bb.o \ xe_guc_ads.o \ xe_guc_ct.o \ xe_guc_db_mgr.o \ - xe_guc_debugfs.o \ xe_guc_hwconfig.o \ xe_guc_id_mgr.o \ xe_guc_klv_helpers.o \ @@ -69,9 +66,9 @@ xe-y += xe_bb.o \ xe_heci_gsc.o \ xe_hw_engine.o \ xe_hw_engine_class_sysfs.o \ + xe_hw_engine_group.o \ xe_hw_fence.o \ xe_huc.o \ - xe_huc_debugfs.o \ xe_irq.o \ xe_lrc.o \ xe_migrate.o \ @@ -107,7 +104,6 @@ xe-y += xe_bb.o \ xe_ttm_vram_mgr.o \ xe_tuning.o \ xe_uc.o \ - xe_uc_debugfs.o \ xe_uc_fw.o \ xe_vm.o \ xe_vram.o \ @@ -124,7 +120,6 @@ xe-$(CONFIG_HWMON) += xe_hwmon.o # graphics virtualization (SR-IOV) support xe-y += \ xe_gt_sriov_vf.o \ - xe_gt_sriov_vf_debugfs.o \ xe_guc_relay.o \ xe_memirq.o \ xe_sriov.o @@ -133,7 +128,6 @@ xe-$(CONFIG_PCI_IOV) += \ xe_gt_sriov_pf.o \ xe_gt_sriov_pf_config.o \ xe_gt_sriov_pf_control.o \ - xe_gt_sriov_pf_debugfs.o \ xe_gt_sriov_pf_monitor.o \ xe_gt_sriov_pf_policy.o \ xe_gt_sriov_pf_service.o \ @@ -281,6 +275,16 @@ ifeq ($(CONFIG_DRM_FBDEV_EMULATION),y) endif ifeq ($(CONFIG_DEBUG_FS),y) + xe-y += xe_debugfs.o \ + xe_gt_debugfs.o \ + xe_gt_sriov_vf_debugfs.o \ + xe_gt_stats.o \ + xe_guc_debugfs.o \ + xe_huc_debugfs.o \ + xe_uc_debugfs.o + + xe-$(CONFIG_PCI_IOV) += xe_gt_sriov_pf_debugfs.o + xe-$(CONFIG_DRM_XE_DISPLAY) += \ i915-display/intel_display_debugfs.o \ i915-display/intel_display_debugfs_params.o \ diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h index 8f9f60b28306..6b30743a2f6c 100644 --- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h @@ -351,6 +351,7 @@ enum xe_guc_klv_ids { GUC_WORKAROUND_KLV_ID_GAM_PFQ_SHADOW_TAIL_POLLING = 0x9005, GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE = 0x9007, GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE = 0x9008, + GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET = 0x9009, }; #endif diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h index a20d2638ea7a..bdae8392e125 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h @@ -7,7 +7,8 @@ #define I915_VMA_H #include <uapi/drm/i915_drm.h> -#include <drm/drm_mm.h> + +#include "xe_ggtt_types.h" /* We don't want these from i915_drm.h in case of Xe */ #undef I915_TILING_X @@ -19,7 +20,7 @@ struct xe_bo; struct i915_vma { struct xe_bo *bo, *dpt; - struct drm_mm_node node; + struct xe_ggtt_node *node; }; #define i915_ggtt_clear_scanout(bo) do { } while (0) @@ -28,7 +29,7 @@ struct i915_vma { static inline u32 i915_ggtt_offset(const struct i915_vma *vma) { - return vma->node.start; + return vma->node->base.start; } #endif diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index 1188ab83cfae..78a884ddd499 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -46,7 +46,7 @@ static bool has_display(struct xe_device *xe) */ bool xe_display_driver_probe_defer(struct pci_dev *pdev) { - if (!xe_modparam.enable_display) + if (!xe_modparam.probe_display) return 0; return intel_display_driver_probe_defer(pdev); @@ -62,7 +62,7 @@ bool xe_display_driver_probe_defer(struct pci_dev *pdev) */ void xe_display_driver_set_hooks(struct drm_driver *driver) { - if (!xe_modparam.enable_display) + if (!xe_modparam.probe_display) return; driver->driver_features |= DRIVER_MODESET | DRIVER_ATOMIC; @@ -104,7 +104,7 @@ static void xe_display_fini_nommio(struct drm_device *dev, void *dummy) { struct xe_device *xe = to_xe_device(dev); - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_power_domains_cleanup(xe); @@ -112,7 +112,7 @@ static void xe_display_fini_nommio(struct drm_device *dev, void *dummy) int xe_display_init_nommio(struct xe_device *xe) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return 0; /* Fake uncore lock */ @@ -129,7 +129,7 @@ static void xe_display_fini_noirq(void *arg) struct xe_device *xe = arg; struct intel_display *display = &xe->display; - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_display_driver_remove_noirq(xe); @@ -141,7 +141,7 @@ int xe_display_init_noirq(struct xe_device *xe) struct intel_display *display = &xe->display; int err; - if (!xe->info.enable_display) + if (!xe->info.probe_display) return 0; intel_display_driver_early_probe(xe); @@ -172,7 +172,7 @@ static void xe_display_fini_noaccel(void *arg) { struct xe_device *xe = arg; - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_display_driver_remove_nogem(xe); @@ -182,7 +182,7 @@ int xe_display_init_noaccel(struct xe_device *xe) { int err; - if (!xe->info.enable_display) + if (!xe->info.probe_display) return 0; err = intel_display_driver_probe_nogem(xe); @@ -194,7 +194,7 @@ int xe_display_init_noaccel(struct xe_device *xe) int xe_display_init(struct xe_device *xe) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return 0; return intel_display_driver_probe(xe); @@ -202,7 +202,7 @@ int xe_display_init(struct xe_device *xe) void xe_display_fini(struct xe_device *xe) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_hpd_poll_fini(xe); @@ -213,7 +213,7 @@ void xe_display_fini(struct xe_device *xe) void xe_display_register(struct xe_device *xe) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_display_driver_register(xe); @@ -223,7 +223,7 @@ void xe_display_register(struct xe_device *xe) void xe_display_unregister(struct xe_device *xe) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_unregister_dsm_handler(); @@ -233,7 +233,7 @@ void xe_display_unregister(struct xe_device *xe) void xe_display_driver_remove(struct xe_device *xe) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_display_driver_remove(xe); @@ -243,7 +243,7 @@ void xe_display_driver_remove(struct xe_device *xe) void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; if (master_ctl & DISPLAY_IRQ) @@ -254,7 +254,7 @@ void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir) { struct intel_display *display = &xe->display; - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; if (gu_misc_iir & GU_MISC_GSE) @@ -263,7 +263,7 @@ void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir) void xe_display_irq_reset(struct xe_device *xe) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; gen11_display_irq_reset(xe); @@ -271,7 +271,7 @@ void xe_display_irq_reset(struct xe_device *xe) void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; if (gt->info.id == XE_GT0) @@ -308,11 +308,23 @@ static void xe_display_flush_cleanup_work(struct xe_device *xe) } } +/* TODO: System and runtime suspend/resume sequences will be sanitized as a follow-up. */ +void xe_display_pm_runtime_suspend(struct xe_device *xe) +{ + if (!xe->info.probe_display) + return; + + if (xe->d3cold.allowed) + xe_display_pm_suspend(xe, true); + + intel_hpd_poll_enable(xe); +} + void xe_display_pm_suspend(struct xe_device *xe, bool runtime) { struct intel_display *display = &xe->display; bool s2idle = suspend_to_idle(); - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; /* @@ -320,11 +332,14 @@ void xe_display_pm_suspend(struct xe_device *xe, bool runtime) * properly. */ intel_power_domains_disable(xe); - if (has_display(xe)) + intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true); + if (!runtime && has_display(xe)) { drm_kms_helper_poll_disable(&xe->drm); - - if (!runtime) + intel_display_driver_disable_user_access(xe); intel_display_driver_suspend(xe); + } + + xe_display_flush_cleanup_work(xe); xe_display_flush_cleanup_work(xe); @@ -332,19 +347,20 @@ void xe_display_pm_suspend(struct xe_device *xe, bool runtime) intel_hpd_cancel_work(xe); + if (!runtime && has_display(xe)) + intel_display_driver_suspend_access(xe); + intel_encoder_suspend_all(&xe->display); intel_opregion_suspend(display, s2idle ? PCI_D1 : PCI_D3cold); - intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true); - intel_dmc_suspend(xe); } void xe_display_pm_suspend_late(struct xe_device *xe) { bool s2idle = suspend_to_idle(); - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_power_domains_suspend(xe, s2idle); @@ -352,9 +368,20 @@ void xe_display_pm_suspend_late(struct xe_device *xe) intel_display_power_suspend_late(xe); } +void xe_display_pm_runtime_resume(struct xe_device *xe) +{ + if (!xe->info.probe_display) + return; + + intel_hpd_poll_disable(xe); + + if (xe->d3cold.allowed) + xe_display_pm_resume(xe, true); +} + void xe_display_pm_resume_early(struct xe_device *xe) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_display_power_resume_early(xe); @@ -366,7 +393,7 @@ void xe_display_pm_resume(struct xe_device *xe, bool runtime) { struct intel_display *display = &xe->display; - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_dmc_resume(xe); @@ -377,14 +404,17 @@ void xe_display_pm_resume(struct xe_device *xe, bool runtime) intel_display_driver_init_hw(xe); intel_hpd_init(xe); + if (!runtime && has_display(xe)) + intel_display_driver_resume_access(xe); + /* MST sideband requires HPD interrupts enabled */ intel_dp_mst_resume(xe); - if (!runtime) + if (!runtime && has_display(xe)) { intel_display_driver_resume(xe); - - intel_hpd_poll_disable(xe); - if (has_display(xe)) drm_kms_helper_poll_enable(&xe->drm); + intel_display_driver_enable_user_access(xe); + intel_hpd_poll_disable(xe); + } intel_opregion_resume(display); @@ -404,7 +434,7 @@ int xe_display_probe(struct xe_device *xe) { int err; - if (!xe->info.enable_display) + if (!xe->info.probe_display) goto no_display; intel_display_device_probe(xe); @@ -417,7 +447,7 @@ int xe_display_probe(struct xe_device *xe) return 0; no_display: - xe->info.enable_display = false; + xe->info.probe_display = false; unset_display_features(xe); return 0; } diff --git a/drivers/gpu/drm/xe/display/xe_display.h b/drivers/gpu/drm/xe/display/xe_display.h index 000fb5799df5..53d727fd792b 100644 --- a/drivers/gpu/drm/xe/display/xe_display.h +++ b/drivers/gpu/drm/xe/display/xe_display.h @@ -38,6 +38,8 @@ void xe_display_pm_suspend(struct xe_device *xe, bool runtime); void xe_display_pm_suspend_late(struct xe_device *xe); void xe_display_pm_resume_early(struct xe_device *xe); void xe_display_pm_resume(struct xe_device *xe, bool runtime); +void xe_display_pm_runtime_suspend(struct xe_device *xe); +void xe_display_pm_runtime_resume(struct xe_device *xe); #else @@ -67,6 +69,8 @@ static inline void xe_display_pm_suspend(struct xe_device *xe, bool runtime) {} static inline void xe_display_pm_suspend_late(struct xe_device *xe) {} static inline void xe_display_pm_resume_early(struct xe_device *xe) {} static inline void xe_display_pm_resume(struct xe_device *xe, bool runtime) {} +static inline void xe_display_pm_runtime_suspend(struct xe_device *xe) {} +static inline void xe_display_pm_runtime_resume(struct xe_device *xe) {} #endif /* CONFIG_DRM_XE_DISPLAY */ #endif /* _XE_DISPLAY_H_ */ diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index d7db44e79eaf..d650c5ac41a4 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -204,21 +204,28 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K) align = max_t(u32, align, SZ_64K); - if (bo->ggtt_node.size && view->type == I915_GTT_VIEW_NORMAL) { + if (bo->ggtt_node && view->type == I915_GTT_VIEW_NORMAL) { vma->node = bo->ggtt_node; } else if (view->type == I915_GTT_VIEW_NORMAL) { u32 x, size = bo->ttm.base.size; - ret = xe_ggtt_insert_special_node_locked(ggtt, &vma->node, size, - align, 0); - if (ret) + vma->node = xe_ggtt_node_init(ggtt); + if (IS_ERR(vma->node)) { + ret = PTR_ERR(vma->node); goto out_unlock; + } + + ret = xe_ggtt_node_insert_locked(vma->node, size, align, 0); + if (ret) { + xe_ggtt_node_fini(vma->node); + goto out_unlock; + } for (x = 0; x < size; x += XE_PAGE_SIZE) { u64 pte = ggtt->pt_ops->pte_encode_bo(bo, x, xe->pat.idx[XE_CACHE_NONE]); - ggtt->pt_ops->ggtt_set_pte(ggtt, vma->node.start + x, pte); + ggtt->pt_ops->ggtt_set_pte(ggtt, vma->node->base.start + x, pte); } } else { u32 i, ggtt_ofs; @@ -227,12 +234,19 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, /* display seems to use tiles instead of bytes here, so convert it back.. */ u32 size = intel_rotation_info_size(rot_info) * XE_PAGE_SIZE; - ret = xe_ggtt_insert_special_node_locked(ggtt, &vma->node, size, - align, 0); - if (ret) + vma->node = xe_ggtt_node_init(ggtt); + if (IS_ERR(vma->node)) { + ret = PTR_ERR(vma->node); + goto out_unlock; + } + + ret = xe_ggtt_node_insert_locked(vma->node, size, align, 0); + if (ret) { + xe_ggtt_node_fini(vma->node); goto out_unlock; + } - ggtt_ofs = vma->node.start; + ggtt_ofs = vma->node->base.start; for (i = 0; i < ARRAY_SIZE(rot_info->plane); i++) write_ggtt_rotated(bo, ggtt, &ggtt_ofs, @@ -320,14 +334,11 @@ err: static void __xe_unpin_fb_vma(struct i915_vma *vma) { - struct xe_device *xe = to_xe_device(vma->bo->ttm.base.dev); - struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt; - if (vma->dpt) xe_bo_unpin_map_no_vm(vma->dpt); - else if (!drm_mm_node_allocated(&vma->bo->ggtt_node) || - vma->bo->ggtt_node.start != vma->node.start) - xe_ggtt_remove_node(ggtt, &vma->node, false); + else if (!xe_ggtt_node_allocated(vma->bo->ggtt_node) || + vma->bo->ggtt_node->base.start != vma->node->base.start) + xe_ggtt_node_remove(vma->node, false); ttm_bo_reserve(&vma->bo->ttm, false, false, NULL); ttm_bo_unpin(&vma->bo->ttm); @@ -377,8 +388,8 @@ void intel_plane_unpin_fb(struct intel_plane_state *old_plane_state) } /* - * For Xe introduce dummy intel_dpt_create which just return NULL and - * intel_dpt_destroy which does nothing. + * For Xe introduce dummy intel_dpt_create which just return NULL, + * intel_dpt_destroy which does nothing, and fake intel_dpt_ofsset returning 0; */ struct i915_address_space *intel_dpt_create(struct intel_framebuffer *fb) { @@ -389,3 +400,8 @@ void intel_dpt_destroy(struct i915_address_space *vm) { return; } + +u64 intel_dpt_offset(struct i915_vma *dpt_vma) +{ + return 0; +} diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index c38db2a74614..81b71903675e 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -104,6 +104,7 @@ #define CSFE_CHICKEN1(base) XE_REG((base) + 0xd4, XE_REG_OPTION_MASKED) #define GHWSP_CSB_REPORT_DIS REG_BIT(15) #define PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS REG_BIT(14) +#define CS_PRIORITY_MEM_READ REG_BIT(7) #define FF_SLICE_CS_CHICKEN1(base) XE_REG((base) + 0xe0, XE_REG_OPTION_MASKED) #define FFSC_PERCTX_PREEMPT_CTRL REG_BIT(14) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 8cd4a9589410..0d1a4a9f4e11 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -80,7 +80,10 @@ #define LE_CACHEABILITY_MASK REG_GENMASK(1, 0) #define LE_CACHEABILITY(value) REG_FIELD_PREP(LE_CACHEABILITY_MASK, value) -#define XE2_GAMREQSTRM_CTRL XE_REG(0x4194) +#define STATELESS_COMPRESSION_CTRL XE_REG_MCR(0x4148) +#define UNIFIED_COMPRESSION_FORMAT REG_GENMASK(3, 0) + +#define XE2_GAMREQSTRM_CTRL XE_REG_MCR(0x4194) #define CG_DIS_CNTLBUS REG_BIT(6) #define CCS_AUX_INV XE_REG(0x4208) @@ -193,6 +196,7 @@ #define GSCPSMI_BASE XE_REG(0x880c) #define CCCHKNREG1 XE_REG_MCR(0x8828) +#define L3CMPCTRL REG_BIT(23) #define ENCOMPPERFFIX REG_BIT(18) /* Fuse readout registers for GT */ @@ -367,6 +371,9 @@ #define XEHP_L3NODEARBCFG XE_REG_MCR(0xb0b4) #define XEHP_LNESPARE REG_BIT(19) +#define L3SQCREG2 XE_REG_MCR(0xb104) +#define COMPMEMRD256BOVRFETCHEN REG_BIT(20) + #define L3SQCREG3 XE_REG_MCR(0xb108) #define COMPPWOVERFETCHEN REG_BIT(28) diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index 1768483da1b7..8dac069483e8 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -36,7 +36,8 @@ static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo, /* Optionally clear bo *and* CCS data in VRAM. */ if (clear) { - fence = xe_migrate_clear(tile->migrate, bo, bo->ttm.resource); + fence = xe_migrate_clear(tile->migrate, bo, bo->ttm.resource, + XE_MIGRATE_CLEAR_FLAG_FULL); if (IS_ERR(fence)) { KUNIT_FAIL(test, "Failed to submit bo clear.\n"); return PTR_ERR(fence); @@ -124,7 +125,7 @@ static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile, kunit_info(test, "Testing system memory\n"); bo = xe_bo_create_user(xe, NULL, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC, - ttm_bo_type_device, bo_flags); + bo_flags); if (IS_ERR(bo)) { KUNIT_FAIL(test, "Failed to create bo.\n"); return; @@ -205,7 +206,6 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc xe_vm_lock(vm, false); bo = xe_bo_create_user(xe, NULL, vm, 0x10000, DRM_XE_GEM_CPU_CACHING_WC, - ttm_bo_type_device, bo_flags); xe_vm_unlock(vm); if (IS_ERR(bo)) { @@ -215,7 +215,7 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc external = xe_bo_create_user(xe, NULL, NULL, 0x10000, DRM_XE_GEM_CPU_CACHING_WC, - ttm_bo_type_device, bo_flags); + bo_flags); if (IS_ERR(external)) { KUNIT_FAIL(test, "external bo create err=%pe\n", external); goto cleanup_bo; diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c index c24c8509227e..13db6c0530b3 100644 --- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c @@ -126,7 +126,7 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe) kunit_info(test, "running %s\n", __func__); bo = xe_bo_create_user(xe, NULL, NULL, size, DRM_XE_GEM_CPU_CACHING_WC, - ttm_bo_type_device, params->mem_mask); + params->mem_mask); if (IS_ERR(bo)) { KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", PTR_ERR(bo)); diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index 4344a1724029..1a192a2a941b 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -105,7 +105,8 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, } xe_map_memset(xe, &remote->vmap, 0, 0xd0, remote->size); - fence = xe_migrate_clear(m, remote, remote->ttm.resource); + fence = xe_migrate_clear(m, remote, remote->ttm.resource, + XE_MIGRATE_CLEAR_FLAG_FULL); if (!sanity_fence_failed(xe, fence, big ? "Clearing remote big bo" : "Clearing remote small bo", test)) { retval = xe_map_rd(xe, &remote->vmap, 0, u64); @@ -279,7 +280,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) kunit_info(test, "Clearing small buffer object\n"); xe_map_memset(xe, &tiny->vmap, 0, 0x22, tiny->size); expected = 0; - fence = xe_migrate_clear(m, tiny, tiny->ttm.resource); + fence = xe_migrate_clear(m, tiny, tiny->ttm.resource, + XE_MIGRATE_CLEAR_FLAG_FULL); if (sanity_fence_failed(xe, fence, "Clearing small bo", test)) goto out; @@ -300,7 +302,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) kunit_info(test, "Clearing big buffer object\n"); xe_map_memset(xe, &big->vmap, 0, 0x11, big->size); expected = 0; - fence = xe_migrate_clear(m, big, big->ttm.resource); + fence = xe_migrate_clear(m, big, big->ttm.resource, + XE_MIGRATE_CLEAR_FLAG_FULL); if (sanity_fence_failed(xe, fence, "Clearing big bo", test)) goto out; @@ -603,7 +606,8 @@ static void test_clear(struct xe_device *xe, struct xe_tile *tile, kunit_info(test, "Clear vram buffer object\n"); expected = 0x0000000000000000; - fence = xe_migrate_clear(tile->migrate, vram_bo, vram_bo->ttm.resource); + fence = xe_migrate_clear(tile->migrate, vram_bo, vram_bo->ttm.resource, + XE_MIGRATE_CLEAR_FLAG_FULL); if (sanity_fence_failed(xe, fence, "Clear vram_bo", test)) return; dma_fence_put(fence); @@ -637,7 +641,7 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til long ret; sys_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, - DRM_XE_GEM_CPU_CACHING_WC, ttm_bo_type_device, + DRM_XE_GEM_CPU_CACHING_WC, XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS); if (IS_ERR(sys_bo)) { @@ -660,8 +664,9 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til } xe_bo_unlock(sys_bo); - ccs_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, DRM_XE_GEM_CPU_CACHING_WC, - ttm_bo_type_device, bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS); + ccs_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, + DRM_XE_GEM_CPU_CACHING_WC, + bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS); if (IS_ERR(ccs_bo)) { KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", @@ -683,8 +688,9 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til } xe_bo_unlock(ccs_bo); - vram_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, DRM_XE_GEM_CPU_CACHING_WC, - ttm_bo_type_device, bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS); + vram_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, + DRM_XE_GEM_CPU_CACHING_WC, + bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS); if (IS_ERR(vram_bo)) { KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", PTR_ERR(vram_bo)); diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c index 577ee7d14381..67404863087e 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci.c +++ b/drivers/gpu/drm/xe/tests/xe_pci.c @@ -12,58 +12,6 @@ #include <kunit/test-bug.h> #include <kunit/visibility.h> -struct kunit_test_data { - int ndevs; - xe_device_fn xe_fn; -}; - -static int dev_to_xe_device_fn(struct device *dev, void *__data) - -{ - struct drm_device *drm = dev_get_drvdata(dev); - struct kunit_test_data *data = __data; - int ret = 0; - int idx; - - data->ndevs++; - - if (drm_dev_enter(drm, &idx)) - ret = data->xe_fn(to_xe_device(dev_get_drvdata(dev))); - drm_dev_exit(idx); - - return ret; -} - -/** - * xe_call_for_each_device - Iterate over all devices this driver binds to - * @xe_fn: Function to call for each device. - * - * This function iterated over all devices this driver binds to, and calls - * @xe_fn: for each one of them. If the called function returns anything else - * than 0, iteration is stopped and the return value is returned by this - * function. Across each function call, drm_dev_enter() / drm_dev_exit() is - * called for the corresponding drm device. - * - * Return: Number of devices iterated or - * the error code of a call to @xe_fn returning an error code. - */ -int xe_call_for_each_device(xe_device_fn xe_fn) -{ - int ret; - struct kunit_test_data data = { - .xe_fn = xe_fn, - .ndevs = 0, - }; - - ret = driver_for_each_device(&xe_pci_driver.driver, NULL, - &data, dev_to_xe_device_fn); - - if (!data.ndevs) - kunit_skip(current->kunit_test, "test runs only on hardware\n"); - - return ret ?: data.ndevs; -} - /** * xe_call_for_each_graphics_ip - Iterate over all recognized graphics IPs * @xe_fn: Function to call for each device. diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.h b/drivers/gpu/drm/xe/tests/xe_pci_test.h index 3e2558bc3c90..ede46800aff1 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci_test.h +++ b/drivers/gpu/drm/xe/tests/xe_pci_test.h @@ -19,7 +19,6 @@ typedef int (*xe_device_fn)(struct xe_device *); typedef void (*xe_graphics_fn)(const struct xe_graphics_desc *); typedef void (*xe_media_fn)(const struct xe_media_desc *); -int xe_call_for_each_device(xe_device_fn xe_fn); void xe_call_for_each_graphics_ip(xe_graphics_fn xe_fn); void xe_call_for_each_media_ip(xe_media_fn xe_fn); diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 45652d7e6fa6..e3d68710a91a 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -793,8 +793,16 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, } } } else { - if (move_lacks_source) - fence = xe_migrate_clear(migrate, bo, new_mem); + if (move_lacks_source) { + u32 flags = 0; + + if (mem_type_is_vram(new_mem->mem_type)) + flags |= XE_MIGRATE_CLEAR_FLAG_FULL; + else if (handle_system_ccs) + flags |= XE_MIGRATE_CLEAR_FLAG_CCS_DATA; + + fence = xe_migrate_clear(migrate, bo, new_mem, flags); + } else fence = xe_migrate_copy(migrate, bo, bo, old_mem, new_mem, handle_system_ccs); @@ -1090,7 +1098,7 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo) xe_assert(xe, list_empty(&ttm_bo->base.gpuva.list)); - if (bo->ggtt_node.size) + if (bo->ggtt_node && bo->ggtt_node->base.size) xe_ggtt_remove_bo(bo->tile->mem.ggtt, bo); #ifdef CONFIG_PROC_FS @@ -1491,11 +1499,10 @@ struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile, struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, u16 cpu_caching, - enum ttm_bo_type type, u32 flags) { struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, - cpu_caching, type, + cpu_caching, ttm_bo_type_device, flags | XE_BO_FLAG_USER); if (!IS_ERR(bo)) xe_bo_unlock_vm_held(bo); @@ -2019,7 +2026,7 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, } bo = xe_bo_create_user(xe, NULL, vm, args->size, args->cpu_caching, - ttm_bo_type_device, bo_flags); + bo_flags); if (vm) xe_vm_unlock(vm); @@ -2325,7 +2332,6 @@ int xe_bo_dumb_create(struct drm_file *file_priv, bo = xe_bo_create_user(xe, NULL, NULL, args->size, DRM_XE_GEM_CPU_CACHING_WC, - ttm_bo_type_device, XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) | XE_BO_FLAG_SCANOUT | XE_BO_FLAG_NEEDS_CPU_ACCESS); diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 1c9dc8adaaa3..dbfb3209615d 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -87,7 +87,6 @@ struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile, struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, u16 cpu_caching, - enum ttm_bo_type type, u32 flags); struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, @@ -195,9 +194,12 @@ xe_bo_main_addr(struct xe_bo *bo, size_t page_size) static inline u32 xe_bo_ggtt_addr(struct xe_bo *bo) { - XE_WARN_ON(bo->ggtt_node.size > bo->size); - XE_WARN_ON(bo->ggtt_node.start + bo->ggtt_node.size > (1ull << 32)); - return bo->ggtt_node.start; + if (XE_WARN_ON(!bo->ggtt_node)) + return 0; + + XE_WARN_ON(bo->ggtt_node->base.size > bo->size); + XE_WARN_ON(bo->ggtt_node->base.start + bo->ggtt_node->base.size > (1ull << 32)); + return bo->ggtt_node->base.start; } int xe_bo_vmap(struct xe_bo *bo); diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index ebc8abf7930a..2ed558ac2264 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -8,12 +8,13 @@ #include <linux/iosys-map.h> -#include <drm/drm_mm.h> #include <drm/ttm/ttm_bo.h> #include <drm/ttm/ttm_device.h> #include <drm/ttm/ttm_execbuf_util.h> #include <drm/ttm/ttm_placement.h> +#include "xe_ggtt_types.h" + struct xe_device; struct xe_vm; @@ -39,7 +40,7 @@ struct xe_bo { /** @placement: current placement for this BO */ struct ttm_placement placement; /** @ggtt_node: GGTT node if this BO is mapped in the GGTT */ - struct drm_mm_node ggtt_node; + struct xe_ggtt_node *ggtt_node; /** @vmap: iosys map of this buffer */ struct iosys_map vmap; /** @ttm_kmap: TTM bo kmap object for internal use only. Keep off. */ diff --git a/drivers/gpu/drm/xe/xe_debugfs.h b/drivers/gpu/drm/xe/xe_debugfs.h index 715b8e2e0bd9..17f4c2f1b5e4 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.h +++ b/drivers/gpu/drm/xe/xe_debugfs.h @@ -8,6 +8,10 @@ struct xe_device; +#ifdef CONFIG_DEBUG_FS void xe_debugfs_register(struct xe_device *xe); +#else +static inline void xe_debugfs_register(struct xe_device *xe) { } +#endif #endif diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index d8d8ca2c19d3..bdb76e834e4c 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -66,22 +66,9 @@ static struct xe_guc *exec_queue_to_guc(struct xe_exec_queue *q) return &q->gt->uc.guc; } -static void xe_devcoredump_deferred_snap_work(struct work_struct *work) -{ - struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work); - - /* keep going if fw fails as we still want to save the memory and SW data */ - if (xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL)) - xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n"); - xe_vm_snapshot_capture_delayed(ss->vm); - xe_guc_exec_queue_snapshot_capture_delayed(ss->ge); - xe_force_wake_put(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL); -} - -static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, - size_t count, void *data, size_t datalen) +static ssize_t __xe_devcoredump_read(char *buffer, size_t count, + struct xe_devcoredump *coredump) { - struct xe_devcoredump *coredump = data; struct xe_device *xe; struct xe_devcoredump_snapshot *ss; struct drm_printer p; @@ -89,18 +76,11 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, struct timespec64 ts; int i; - if (!coredump) - return -ENODEV; - xe = coredump_to_xe(coredump); ss = &coredump->snapshot; - /* Ensure delayed work is captured before continuing */ - flush_work(&ss->work); - iter.data = buffer; - iter.offset = 0; - iter.start = offset; + iter.start = 0; iter.remain = count; p = drm_coredump_printer(&iter); @@ -134,10 +114,83 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, return count - iter.remain; } +static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss) +{ + int i; + + xe_guc_ct_snapshot_free(ss->ct); + ss->ct = NULL; + + xe_guc_exec_queue_snapshot_free(ss->ge); + ss->ge = NULL; + + xe_sched_job_snapshot_free(ss->job); + ss->job = NULL; + + for (i = 0; i < XE_NUM_HW_ENGINES; i++) + if (ss->hwe[i]) { + xe_hw_engine_snapshot_free(ss->hwe[i]); + ss->hwe[i] = NULL; + } + + xe_vm_snapshot_free(ss->vm); + ss->vm = NULL; +} + +static void xe_devcoredump_deferred_snap_work(struct work_struct *work) +{ + struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work); + struct xe_devcoredump *coredump = container_of(ss, typeof(*coredump), snapshot); + + /* keep going if fw fails as we still want to save the memory and SW data */ + if (xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL)) + xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n"); + xe_vm_snapshot_capture_delayed(ss->vm); + xe_guc_exec_queue_snapshot_capture_delayed(ss->ge); + xe_force_wake_put(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL); + + /* Calculate devcoredump size */ + ss->read.size = __xe_devcoredump_read(NULL, INT_MAX, coredump); + + ss->read.buffer = kvmalloc(ss->read.size, GFP_USER); + if (!ss->read.buffer) + return; + + __xe_devcoredump_read(ss->read.buffer, ss->read.size, coredump); + xe_devcoredump_snapshot_free(ss); +} + +static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, + size_t count, void *data, size_t datalen) +{ + struct xe_devcoredump *coredump = data; + struct xe_devcoredump_snapshot *ss; + ssize_t byte_copied; + + if (!coredump) + return -ENODEV; + + ss = &coredump->snapshot; + + /* Ensure delayed work is captured before continuing */ + flush_work(&ss->work); + + if (!ss->read.buffer) + return -ENODEV; + + if (offset >= ss->read.size) + return 0; + + byte_copied = count < ss->read.size - offset ? count : + ss->read.size - offset; + memcpy(buffer, ss->read.buffer + offset, byte_copied); + + return byte_copied; +} + static void xe_devcoredump_free(void *data) { struct xe_devcoredump *coredump = data; - int i; /* Our device is gone. Nothing to do... */ if (!data || !coredump_to_xe(coredump)) @@ -145,13 +198,8 @@ static void xe_devcoredump_free(void *data) cancel_work_sync(&coredump->snapshot.work); - xe_guc_ct_snapshot_free(coredump->snapshot.ct); - xe_guc_exec_queue_snapshot_free(coredump->snapshot.ge); - xe_sched_job_snapshot_free(coredump->snapshot.job); - for (i = 0; i < XE_NUM_HW_ENGINES; i++) - if (coredump->snapshot.hwe[i]) - xe_hw_engine_snapshot_free(coredump->snapshot.hwe[i]); - xe_vm_snapshot_free(coredump->snapshot.vm); + xe_devcoredump_snapshot_free(&coredump->snapshot); + kvfree(coredump->snapshot.read.buffer); /* To prevent stale data on next snapshot, clear everything */ memset(&coredump->snapshot, 0, sizeof(coredump->snapshot)); @@ -260,4 +308,5 @@ int xe_devcoredump_init(struct xe_device *xe) { return devm_add_action_or_reset(xe->drm.dev, xe_driver_devcoredump_fini, &xe->drm); } + #endif diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h index 923cdf72a816..440d05d77a5a 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump_types.h +++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h @@ -46,6 +46,14 @@ struct xe_devcoredump_snapshot { struct xe_sched_job_snapshot *job; /** @vm: Snapshot of VM state */ struct xe_vm_snapshot *vm; + + /** @read: devcoredump in human readable format */ + struct { + /** @read.size: size of devcoredump in human readable format */ + ssize_t size; + /** @read.buffer: buffer of devcoredump in human readable format */ + char *buffer; + } read; }; /** diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 1aba6f9eaa19..b6db7e082d88 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -37,6 +37,7 @@ #include "xe_gt_printk.h" #include "xe_gt_sriov_vf.h" #include "xe_guc.h" +#include "xe_hw_engine_group.h" #include "xe_hwmon.h" #include "xe_irq.h" #include "xe_memirq.h" @@ -165,6 +166,8 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file) * vm->lock taken during xe_exec_queue_kill(). */ xa_for_each(&xef->exec_queue.xa, idx, q) { + if (q->vm && q->hwe->hw_engine_group) + xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q); xe_exec_queue_kill(q); xe_exec_queue_put(q); } @@ -543,7 +546,7 @@ static void update_device_info(struct xe_device *xe) { /* disable features that are not available/applicable to VFs */ if (IS_SRIOV_VF(xe)) { - xe->info.enable_display = 0; + xe->info.probe_display = 0; xe->info.has_heci_gscfi = 0; xe->info.skip_guc_pc = 1; xe->info.skip_pcode = 1; diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index db6cc8d0d6b8..f052c06a2d2f 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -15,6 +15,11 @@ static inline struct xe_device *to_xe_device(const struct drm_device *dev) return container_of(dev, struct xe_device, drm); } +static inline struct xe_device *kdev_to_xe_device(struct device *kdev) +{ + return dev_get_drvdata(kdev); +} + static inline struct xe_device *pdev_to_xe_device(struct pci_dev *pdev) { return pci_get_drvdata(pdev); @@ -134,16 +139,6 @@ static inline struct xe_force_wake *gt_to_fw(struct xe_gt *gt) void xe_device_assert_mem_access(struct xe_device *xe); -static inline bool xe_device_in_fault_mode(struct xe_device *xe) -{ - return xe->usm.num_vm_in_fault_mode != 0; -} - -static inline bool xe_device_in_non_fault_mode(struct xe_device *xe) -{ - return xe->usm.num_vm_in_non_fault_mode != 0; -} - static inline bool xe_device_has_flat_ccs(struct xe_device *xe) { return xe->info.has_flat_ccs; diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 5b7292a9a66d..856db4020048 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -204,7 +204,7 @@ struct xe_tile { struct xe_memirq memirq; /** @sriov.vf.ggtt_balloon: GGTT regions excluded from use. */ - struct drm_mm_node ggtt_balloon[2]; + struct xe_ggtt_node *ggtt_balloon[2]; } vf; } sriov; @@ -282,8 +282,15 @@ struct xe_device { u8 has_sriov:1; /** @info.has_usm: Device has unified shared memory support */ u8 has_usm:1; - /** @info.enable_display: display enabled */ - u8 enable_display:1; + /** + * @info.probe_display: Probe display hardware. If set to + * false, the driver will behave as if there is no display + * hardware present and will not try to read/write to it in any + * way. The display hardware, if it exists, will not be + * exposed to userspace and will be left untouched in whatever + * state the firmware or bootloader left it in. + */ + u8 probe_display:1; /** @info.skip_mtcfg: skip Multi-Tile configuration from MTCFG register */ u8 skip_mtcfg:1; /** @info.skip_pcode: skip access to PCODE uC */ @@ -361,10 +368,6 @@ struct xe_device { struct xarray asid_to_vm; /** @usm.next_asid: next ASID, used to cyclical alloc asids */ u32 next_asid; - /** @usm.num_vm_in_fault_mode: number of VM in fault mode */ - u32 num_vm_in_fault_mode; - /** @usm.num_vm_in_non_fault_mode: number of VM in non-fault mode */ - u32 num_vm_in_non_fault_mode; /** @usm.lock: protects UM state */ struct mutex lock; } usm; diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index f36980aa26e6..484acfbe0e61 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -14,6 +14,7 @@ #include "xe_bo.h" #include "xe_device.h" #include "xe_exec_queue.h" +#include "xe_hw_engine_group.h" #include "xe_macros.h" #include "xe_ring_ops_types.h" #include "xe_sched_job.h" @@ -124,6 +125,8 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) bool write_locked, skip_retry = false; ktime_t end = 0; int err = 0; + struct xe_hw_engine_group *group; + enum xe_hw_engine_group_execution_mode mode, previous_mode; if (XE_IOCTL_DBG(xe, args->extensions) || XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) || @@ -182,6 +185,15 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) } } + group = q->hwe->hw_engine_group; + mode = xe_hw_engine_group_find_exec_mode(q); + + if (mode == EXEC_MODE_DMA_FENCE) { + err = xe_hw_engine_group_get_mode(group, mode, &previous_mode); + if (err) + goto err_syncs; + } + retry: if (!xe_vm_in_lr_mode(vm) && xe_vm_userptr_check_repin(vm)) { err = down_write_killable(&vm->lock); @@ -199,7 +211,7 @@ retry: downgrade_write(&vm->lock); write_locked = false; if (err) - goto err_unlock_list; + goto err_hw_exec_mode; } if (!args->num_batch_buffer) { @@ -312,6 +324,9 @@ retry: spin_unlock(&xe->ttm.lru_lock); } + if (mode == EXEC_MODE_LR) + xe_hw_engine_group_resume_faulting_lr_jobs(group); + err_repin: if (!xe_vm_in_lr_mode(vm)) up_read(&vm->userptr.notifier_lock); @@ -324,6 +339,9 @@ err_unlock_list: up_read(&vm->lock); if (err == -EAGAIN && !skip_retry) goto retry; +err_hw_exec_mode: + if (mode == EXEC_MODE_DMA_FENCE) + xe_hw_engine_group_put(group); err_syncs: while (num_syncs--) xe_sync_entry_cleanup(&syncs[num_syncs]); diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 017d939659b5..e53937fafd14 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -14,6 +14,7 @@ #include "xe_device.h" #include "xe_gt.h" #include "xe_hw_engine_class_sysfs.h" +#include "xe_hw_engine_group.h" #include "xe_hw_fence.h" #include "xe_lrc.h" #include "xe_macros.h" @@ -73,6 +74,7 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, q->ops = gt->exec_queue_ops; INIT_LIST_HEAD(&q->lr.link); INIT_LIST_HEAD(&q->multi_gt_link); + INIT_LIST_HEAD(&q->hw_engine_group_link); q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us; q->sched_props.preempt_timeout_us = @@ -166,7 +168,8 @@ err_post_alloc: struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt, struct xe_vm *vm, - enum xe_engine_class class, u32 flags) + enum xe_engine_class class, + u32 flags, u64 extensions) { struct xe_hw_engine *hwe, *hwe0 = NULL; enum xe_hw_engine_id id; @@ -186,7 +189,54 @@ struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe if (!logical_mask) return ERR_PTR(-ENODEV); - return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags, 0); + return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags, extensions); +} + +/** + * xe_exec_queue_create_bind() - Create bind exec queue. + * @xe: Xe device. + * @tile: tile which bind exec queue belongs to. + * @flags: exec queue creation flags + * @extensions: exec queue creation extensions + * + * Normalize bind exec queue creation. Bind exec queue is tied to migration VM + * for access to physical memory required for page table programming. On a + * faulting devices the reserved copy engine instance must be used to avoid + * deadlocking (user binds cannot get stuck behind faults as kernel binds which + * resolve faults depend on user binds). On non-faulting devices any copy engine + * can be used. + * + * Returns exec queue on success, ERR_PTR on failure + */ +struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, + struct xe_tile *tile, + u32 flags, u64 extensions) +{ + struct xe_gt *gt = tile->primary_gt; + struct xe_exec_queue *q; + struct xe_vm *migrate_vm; + + migrate_vm = xe_migrate_get_vm(tile->migrate); + if (xe->info.has_usm) { + struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, + XE_ENGINE_CLASS_COPY, + gt->usm.reserved_bcs_instance, + false); + + if (!hwe) + return ERR_PTR(-EINVAL); + + q = xe_exec_queue_create(xe, migrate_vm, + BIT(hwe->logical_instance), 1, hwe, + flags, extensions); + } else { + q = xe_exec_queue_create_class(xe, gt, migrate_vm, + XE_ENGINE_CLASS_COPY, flags, + extensions); + } + xe_vm_put(migrate_vm); + + return q; } void xe_exec_queue_destroy(struct kref *ref) @@ -418,63 +468,6 @@ static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue return 0; } -static const enum xe_engine_class user_to_xe_engine_class[] = { - [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER, - [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY, - [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE, - [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE, - [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE, -}; - -static struct xe_hw_engine * -find_hw_engine(struct xe_device *xe, - struct drm_xe_engine_class_instance eci) -{ - u32 idx; - - if (eci.engine_class >= ARRAY_SIZE(user_to_xe_engine_class)) - return NULL; - - if (eci.gt_id >= xe->info.gt_count) - return NULL; - - idx = array_index_nospec(eci.engine_class, - ARRAY_SIZE(user_to_xe_engine_class)); - - return xe_gt_hw_engine(xe_device_get_gt(xe, eci.gt_id), - user_to_xe_engine_class[idx], - eci.engine_instance, true); -} - -static u32 bind_exec_queue_logical_mask(struct xe_device *xe, struct xe_gt *gt, - struct drm_xe_engine_class_instance *eci, - u16 width, u16 num_placements) -{ - struct xe_hw_engine *hwe; - enum xe_hw_engine_id id; - u32 logical_mask = 0; - - if (XE_IOCTL_DBG(xe, width != 1)) - return 0; - if (XE_IOCTL_DBG(xe, num_placements != 1)) - return 0; - if (XE_IOCTL_DBG(xe, eci[0].engine_instance != 0)) - return 0; - - eci[0].engine_class = DRM_XE_ENGINE_CLASS_COPY; - - for_each_hw_engine(hwe, gt, id) { - if (xe_hw_engine_is_reserved(hwe)) - continue; - - if (hwe->class == - user_to_xe_engine_class[DRM_XE_ENGINE_CLASS_COPY]) - logical_mask |= BIT(hwe->logical_instance); - } - - return logical_mask; -} - static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt, struct drm_xe_engine_class_instance *eci, u16 width, u16 num_placements) @@ -497,7 +490,7 @@ static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt, n = j * width + i; - hwe = find_hw_engine(xe, eci[n]); + hwe = xe_hw_engine_lookup(xe, eci[n]); if (XE_IOCTL_DBG(xe, !hwe)) return 0; @@ -536,8 +529,9 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, struct drm_xe_engine_class_instance __user *user_eci = u64_to_user_ptr(args->instances); struct xe_hw_engine *hwe; - struct xe_vm *vm, *migrate_vm; + struct xe_vm *vm; struct xe_gt *gt; + struct xe_tile *tile; struct xe_exec_queue *q = NULL; u32 logical_mask; u32 id; @@ -562,37 +556,20 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, return -EINVAL; if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) { - for_each_gt(gt, xe, id) { - struct xe_exec_queue *new; - u32 flags; - - if (xe_gt_is_media_type(gt)) - continue; - - eci[0].gt_id = gt->info.id; - logical_mask = bind_exec_queue_logical_mask(xe, gt, eci, - args->width, - args->num_placements); - if (XE_IOCTL_DBG(xe, !logical_mask)) - return -EINVAL; - - hwe = find_hw_engine(xe, eci[0]); - if (XE_IOCTL_DBG(xe, !hwe)) - return -EINVAL; - - /* The migration vm doesn't hold rpm ref */ - xe_pm_runtime_get_noresume(xe); - - flags = EXEC_QUEUE_FLAG_VM | (id ? EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD : 0); + if (XE_IOCTL_DBG(xe, args->width != 1) || + XE_IOCTL_DBG(xe, args->num_placements != 1) || + XE_IOCTL_DBG(xe, eci[0].engine_instance != 0)) + return -EINVAL; - migrate_vm = xe_migrate_get_vm(gt_to_tile(gt)->migrate); - new = xe_exec_queue_create(xe, migrate_vm, logical_mask, - args->width, hwe, flags, - args->extensions); + for_each_tile(tile, xe, id) { + struct xe_exec_queue *new; + u32 flags = EXEC_QUEUE_FLAG_VM; - xe_pm_runtime_put(xe); /* now held by engine */ + if (id) + flags |= EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD; - xe_vm_put(migrate_vm); + new = xe_exec_queue_create_bind(xe, tile, flags, + args->extensions); if (IS_ERR(new)) { err = PTR_ERR(new); if (q) @@ -613,7 +590,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, !logical_mask)) return -EINVAL; - hwe = find_hw_engine(xe, eci[0]); + hwe = xe_hw_engine_lookup(xe, eci[0]); if (XE_IOCTL_DBG(xe, !hwe)) return -EINVAL; @@ -648,6 +625,12 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, err)) goto put_exec_queue; } + + if (q->vm && q->hwe->hw_engine_group) { + err = xe_hw_engine_group_add_exec_queue(q->hwe->hw_engine_group, q); + if (err) + goto put_exec_queue; + } } mutex_lock(&xef->exec_queue.lock); @@ -798,6 +781,15 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) xef->run_ticks[q->class] += (new_ts - old_ts) * q->width; } +/** + * xe_exec_queue_kill - permanently stop all execution from an exec queue + * @q: The exec queue + * + * This function permanently stops all activity on an exec queue. If the queue + * is actively executing on the HW, it will be kicked off the engine; any + * pending jobs are discarded and all future submissions are rejected. + * This function is safe to call multiple times. + */ void xe_exec_queue_kill(struct xe_exec_queue *q) { struct xe_exec_queue *eq = q, *next; @@ -830,6 +822,9 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, !q)) return -ENOENT; + if (q->vm && q->hwe->hw_engine_group) + xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q); + xe_exec_queue_kill(q); trace_xe_exec_queue_close(q); @@ -841,10 +836,12 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q, struct xe_vm *vm) { - if (q->flags & EXEC_QUEUE_FLAG_VM) + if (q->flags & EXEC_QUEUE_FLAG_VM) { lockdep_assert_held(&vm->lock); - else + } else { xe_vm_assert_held(vm); + lockdep_assert_held(&q->hwe->hw_engine_group->mode_sem); + } } /** @@ -856,10 +853,7 @@ void xe_exec_queue_last_fence_put(struct xe_exec_queue *q, struct xe_vm *vm) { xe_exec_queue_last_fence_lockdep_assert(q, vm); - if (q->last_fence) { - dma_fence_put(q->last_fence); - q->last_fence = NULL; - } + xe_exec_queue_last_fence_put_unlocked(q); } /** @@ -902,6 +896,33 @@ struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *q, } /** + * xe_exec_queue_last_fence_get_for_resume() - Get last fence + * @q: The exec queue + * @vm: The VM the engine does a bind or exec for + * + * Get last fence, takes a ref. Only safe to be called in the context of + * resuming the hw engine group's long-running exec queue, when the group + * semaphore is held. + * + * Returns: last fence if not signaled, dma fence stub if signaled + */ +struct dma_fence *xe_exec_queue_last_fence_get_for_resume(struct xe_exec_queue *q, + struct xe_vm *vm) +{ + struct dma_fence *fence; + + lockdep_assert_held_write(&q->hwe->hw_engine_group->mode_sem); + + if (q->last_fence && + test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags)) + xe_exec_queue_last_fence_put_unlocked(q); + + fence = q->last_fence ? q->last_fence : dma_fence_get_stub(); + dma_fence_get(fence); + return fence; +} + +/** * xe_exec_queue_last_fence_set() - Set last fence * @q: The exec queue * @vm: The VM the engine does a bind or exec for diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h index ded77b0f3b90..90c7f73eab88 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.h +++ b/drivers/gpu/drm/xe/xe_exec_queue.h @@ -20,7 +20,11 @@ struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *v u64 extensions); struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt, struct xe_vm *vm, - enum xe_engine_class class, u32 flags); + enum xe_engine_class class, + u32 flags, u64 extensions); +struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, + struct xe_tile *tile, + u32 flags, u64 extensions); void xe_exec_queue_fini(struct xe_exec_queue *q); void xe_exec_queue_destroy(struct kref *ref); @@ -73,6 +77,8 @@ void xe_exec_queue_last_fence_put(struct xe_exec_queue *e, struct xe_vm *vm); void xe_exec_queue_last_fence_put_unlocked(struct xe_exec_queue *e); struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *e, struct xe_vm *vm); +struct dma_fence *xe_exec_queue_last_fence_get_for_resume(struct xe_exec_queue *e, + struct xe_vm *vm); void xe_exec_queue_last_fence_set(struct xe_exec_queue *e, struct xe_vm *vm, struct dma_fence *fence); int xe_exec_queue_last_fence_test_dep(struct xe_exec_queue *q, diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index fc2a1a20b7e4..7deb480e26af 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -140,6 +140,8 @@ struct xe_exec_queue { * Protected by @vm's resv. Unused if @vm == NULL. */ u64 tlb_flush_seqno; + /** @hw_engine_group_link: link into exec queues in the same hw engine group */ + struct list_head hw_engine_group_link; /** @lrc: logical ring context for this exec queue */ struct xe_lrc *lrc[]; }; diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 0cdbc1296e88..f3fca5565d32 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -30,6 +30,39 @@ #include "xe_wa.h" #include "xe_wopcm.h" +/** + * DOC: Global Graphics Translation Table (GGTT) + * + * Xe GGTT implements the support for a Global Virtual Address space that is used + * for resources that are accessible to privileged (i.e. kernel-mode) processes, + * and not tied to a specific user-level process. For example, the Graphics + * micro-Controller (GuC) and Display Engine (if present) utilize this Global + * address space. + * + * The Global GTT (GGTT) translates from the Global virtual address to a physical + * address that can be accessed by HW. The GGTT is a flat, single-level table. + * + * Xe implements a simplified version of the GGTT specifically managing only a + * certain range of it that goes from the Write Once Protected Content Memory (WOPCM) + * Layout to a predefined GUC_GGTT_TOP. This approach avoids complications related to + * the GuC (Graphics Microcontroller) hardware limitations. The GuC address space + * is limited on both ends of the GGTT, because the GuC shim HW redirects + * accesses to those addresses to other HW areas instead of going through the + * GGTT. On the bottom end, the GuC can't access offsets below the WOPCM size, + * while on the top side the limit is fixed at GUC_GGTT_TOP. To keep things + * simple, instead of checking each object to see if they are accessed by GuC or + * not, we just exclude those areas from the allocator. Additionally, to simplify + * the driver load, we use the maximum WOPCM size in this logic instead of the + * programmed one, so we don't need to wait until the actual size to be + * programmed is determined (which requires FW fetch) before initializing the + * GGTT. These simplifications might waste space in the GGTT (about 20-25 MBs + * depending on the platform) but we can live with this. Another benefit of this + * is the GuC bootrom can't access anything below the WOPCM max size so anything + * the bootrom needs to access (e.g. a RSA key) needs to be placed in the GGTT + * above the WOPCM max size. Starting the GGTT allocations above the WOPCM max + * give us the correct placement for free. + */ + static u64 xelp_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, u16 pat_index) { @@ -128,11 +161,12 @@ static void ggtt_fini_early(struct drm_device *drm, void *arg) { struct xe_ggtt *ggtt = arg; + destroy_workqueue(ggtt->wq); mutex_destroy(&ggtt->lock); drm_mm_takedown(&ggtt->mm); } -static void ggtt_fini(struct drm_device *drm, void *arg) +static void ggtt_fini(void *arg) { struct xe_ggtt *ggtt = arg; @@ -164,12 +198,16 @@ static const struct xe_ggtt_pt_ops xelpg_pt_wa_ops = { .ggtt_set_pte = xe_ggtt_set_pte_and_flush, }; -/* - * Early GGTT initialization, which allows to create new mappings usable by the - * GuC. - * Mappings are not usable by the HW engines, as it doesn't have scratch / +/** + * xe_ggtt_init_early - Early GGTT initialization + * @ggtt: the &xe_ggtt to be initialized + * + * It allows to create new mappings usable by the GuC. + * Mappings are not usable by the HW engines, as it doesn't have scratch nor * initial clear done to it yet. That will happen in the regular, non-early - * GGTT init. + * GGTT initialization. + * + * Return: 0 on success or a negative error code on failure. */ int xe_ggtt_init_early(struct xe_ggtt *ggtt) { @@ -194,29 +232,6 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) ggtt->flags |= XE_GGTT_FLAGS_64K; - /* - * 8B per entry, each points to a 4KB page. - * - * The GuC address space is limited on both ends of the GGTT, because - * the GuC shim HW redirects accesses to those addresses to other HW - * areas instead of going through the GGTT. On the bottom end, the GuC - * can't access offsets below the WOPCM size, while on the top side the - * limit is fixed at GUC_GGTT_TOP. To keep things simple, instead of - * checking each object to see if they are accessed by GuC or not, we - * just exclude those areas from the allocator. Additionally, to - * simplify the driver load, we use the maximum WOPCM size in this logic - * instead of the programmed one, so we don't need to wait until the - * actual size to be programmed is determined (which requires FW fetch) - * before initializing the GGTT. These simplifications might waste space - * in the GGTT (about 20-25 MBs depending on the platform) but we can - * live with this. - * - * Another benifit of this is the GuC bootrom can't access anything - * below the WOPCM max size so anything the bootom needs to access (e.g. - * a RSA key) needs to be placed in the GGTT above the WOPCM max size. - * Starting the GGTT allocations above the WOPCM max give us the correct - * placement for free. - */ if (ggtt->size > GUC_GGTT_TOP) ggtt->size = GUC_GGTT_TOP; @@ -228,6 +243,8 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) else ggtt->pt_ops = &xelp_pt_ops; + ggtt->wq = alloc_workqueue("xe-ggtt-wq", 0, 0); + drm_mm_init(&ggtt->mm, xe_wopcm_size(xe), ggtt->size - xe_wopcm_size(xe)); mutex_init(&ggtt->lock); @@ -262,6 +279,77 @@ static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt) mutex_unlock(&ggtt->lock); } +static void ggtt_node_remove(struct xe_ggtt_node *node) +{ + struct xe_ggtt *ggtt = node->ggtt; + struct xe_device *xe = tile_to_xe(ggtt->tile); + bool bound; + int idx; + + bound = drm_dev_enter(&xe->drm, &idx); + + mutex_lock(&ggtt->lock); + if (bound) + xe_ggtt_clear(ggtt, node->base.start, node->base.size); + drm_mm_remove_node(&node->base); + node->base.size = 0; + mutex_unlock(&ggtt->lock); + + if (!bound) + goto free_node; + + if (node->invalidate_on_remove) + xe_ggtt_invalidate(ggtt); + + drm_dev_exit(idx); + +free_node: + xe_ggtt_node_fini(node); +} + +static void ggtt_node_remove_work_func(struct work_struct *work) +{ + struct xe_ggtt_node *node = container_of(work, typeof(*node), + delayed_removal_work); + struct xe_device *xe = tile_to_xe(node->ggtt->tile); + + xe_pm_runtime_get(xe); + ggtt_node_remove(node); + xe_pm_runtime_put(xe); +} + +/** + * xe_ggtt_node_remove - Remove a &xe_ggtt_node from the GGTT + * @node: the &xe_ggtt_node to be removed + * @invalidate: if node needs invalidation upon removal + */ +void xe_ggtt_node_remove(struct xe_ggtt_node *node, bool invalidate) +{ + struct xe_ggtt *ggtt; + struct xe_device *xe; + + if (!node || !node->ggtt) + return; + + ggtt = node->ggtt; + xe = tile_to_xe(ggtt->tile); + + node->invalidate_on_remove = invalidate; + + if (xe_pm_runtime_get_if_active(xe)) { + ggtt_node_remove(node); + xe_pm_runtime_put(xe); + } else { + queue_work(ggtt->wq, &node->delayed_removal_work); + } +} + +/** + * xe_ggtt_init - Regular non-early GGTT initialization + * @ggtt: the &xe_ggtt to be initialized + * + * Return: 0 on success or a negative error code on failure. + */ int xe_ggtt_init(struct xe_ggtt *ggtt) { struct xe_device *xe = tile_to_xe(ggtt->tile); @@ -289,7 +377,7 @@ int xe_ggtt_init(struct xe_ggtt *ggtt) xe_ggtt_initial_clear(ggtt); - return drmm_add_action_or_reset(&xe->drm, ggtt_fini, ggtt); + return devm_add_action_or_reset(xe->drm.dev, ggtt_fini, ggtt); err: ggtt->scratch = NULL; return err; @@ -314,26 +402,6 @@ static void xe_ggtt_invalidate(struct xe_ggtt *ggtt) ggtt_invalidate_gt_tlb(ggtt->tile->media_gt); } -void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix) -{ - u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[XE_CACHE_WB]; - u64 addr, scratch_pte; - - scratch_pte = ggtt->pt_ops->pte_encode_bo(ggtt->scratch, 0, pat_index); - - printk("%sGlobal GTT:", prefix); - for (addr = 0; addr < ggtt->size; addr += XE_PAGE_SIZE) { - unsigned int i = addr / XE_PAGE_SIZE; - - xe_tile_assert(ggtt->tile, addr <= U32_MAX); - if (ggtt->gsm[i] == scratch_pte) - continue; - - printk("%s ggtt[0x%08x] = 0x%016llx", - prefix, (u32)addr, ggtt->gsm[i]); - } -} - static void xe_ggtt_dump_node(struct xe_ggtt *ggtt, const struct drm_mm_node *node, const char *description) { @@ -347,88 +415,180 @@ static void xe_ggtt_dump_node(struct xe_ggtt *ggtt, } /** - * xe_ggtt_balloon - prevent allocation of specified GGTT addresses - * @ggtt: the &xe_ggtt where we want to make reservation + * xe_ggtt_node_insert_balloon - prevent allocation of specified GGTT addresses + * @node: the &xe_ggtt_node to hold reserved GGTT node * @start: the starting GGTT address of the reserved region * @end: then end GGTT address of the reserved region - * @node: the &drm_mm_node to hold reserved GGTT node * - * Use xe_ggtt_deballoon() to release a reserved GGTT node. + * Use xe_ggtt_node_remove_balloon() to release a reserved GGTT node. * * Return: 0 on success or a negative error code on failure. */ -int xe_ggtt_balloon(struct xe_ggtt *ggtt, u64 start, u64 end, struct drm_mm_node *node) +int xe_ggtt_node_insert_balloon(struct xe_ggtt_node *node, u64 start, u64 end) { + struct xe_ggtt *ggtt = node->ggtt; int err; xe_tile_assert(ggtt->tile, start < end); xe_tile_assert(ggtt->tile, IS_ALIGNED(start, XE_PAGE_SIZE)); xe_tile_assert(ggtt->tile, IS_ALIGNED(end, XE_PAGE_SIZE)); - xe_tile_assert(ggtt->tile, !drm_mm_node_allocated(node)); + xe_tile_assert(ggtt->tile, !drm_mm_node_allocated(&node->base)); - node->color = 0; - node->start = start; - node->size = end - start; + node->base.color = 0; + node->base.start = start; + node->base.size = end - start; mutex_lock(&ggtt->lock); - err = drm_mm_reserve_node(&ggtt->mm, node); + err = drm_mm_reserve_node(&ggtt->mm, &node->base); mutex_unlock(&ggtt->lock); if (xe_gt_WARN(ggtt->tile->primary_gt, err, "Failed to balloon GGTT %#llx-%#llx (%pe)\n", - node->start, node->start + node->size, ERR_PTR(err))) + node->base.start, node->base.start + node->base.size, ERR_PTR(err))) return err; - xe_ggtt_dump_node(ggtt, node, "balloon"); + xe_ggtt_dump_node(ggtt, &node->base, "balloon"); return 0; } /** - * xe_ggtt_deballoon - release a reserved GGTT region - * @ggtt: the &xe_ggtt where reserved node belongs - * @node: the &drm_mm_node with reserved GGTT region + * xe_ggtt_node_remove_balloon - release a reserved GGTT region + * @node: the &xe_ggtt_node with reserved GGTT region * - * See xe_ggtt_balloon() for details. + * See xe_ggtt_node_insert_balloon() for details. */ -void xe_ggtt_deballoon(struct xe_ggtt *ggtt, struct drm_mm_node *node) +void xe_ggtt_node_remove_balloon(struct xe_ggtt_node *node) { - if (!drm_mm_node_allocated(node)) + if (!node || !node->ggtt) return; - xe_ggtt_dump_node(ggtt, node, "deballoon"); + if (!drm_mm_node_allocated(&node->base)) + goto free_node; - mutex_lock(&ggtt->lock); - drm_mm_remove_node(node); - mutex_unlock(&ggtt->lock); + xe_ggtt_dump_node(node->ggtt, &node->base, "remove-balloon"); + + mutex_lock(&node->ggtt->lock); + drm_mm_remove_node(&node->base); + mutex_unlock(&node->ggtt->lock); + +free_node: + xe_ggtt_node_fini(node); } -int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, struct drm_mm_node *node, - u32 size, u32 align, u32 mm_flags) +/** + * xe_ggtt_node_insert_locked - Locked version to insert a &xe_ggtt_node into the GGTT + * @node: the &xe_ggtt_node to be inserted + * @size: size of the node + * @align: alignment constrain of the node + * @mm_flags: flags to control the node behavior + * + * It cannot be called without first having called xe_ggtt_init() once. + * To be used in cases where ggtt->lock is already taken. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_ggtt_node_insert_locked(struct xe_ggtt_node *node, + u32 size, u32 align, u32 mm_flags) { - return drm_mm_insert_node_generic(&ggtt->mm, node, size, align, 0, + return drm_mm_insert_node_generic(&node->ggtt->mm, &node->base, size, align, 0, mm_flags); } -int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, - u32 size, u32 align) +/** + * xe_ggtt_node_insert - Insert a &xe_ggtt_node into the GGTT + * @node: the &xe_ggtt_node to be inserted + * @size: size of the node + * @align: alignment constrain of the node + * + * It cannot be called without first having called xe_ggtt_init() once. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_ggtt_node_insert(struct xe_ggtt_node *node, u32 size, u32 align) { int ret; - mutex_lock(&ggtt->lock); - ret = xe_ggtt_insert_special_node_locked(ggtt, node, size, - align, DRM_MM_INSERT_HIGH); - mutex_unlock(&ggtt->lock); + if (!node || !node->ggtt) + return -ENOENT; + + mutex_lock(&node->ggtt->lock); + ret = xe_ggtt_node_insert_locked(node, size, align, + DRM_MM_INSERT_HIGH); + mutex_unlock(&node->ggtt->lock); return ret; } +/** + * xe_ggtt_node_init - Initialize %xe_ggtt_node struct + * @ggtt: the &xe_ggtt where the new node will later be inserted/reserved. + * + * This function will allocated the struct %xe_ggtt_node and return it's pointer. + * This struct will then be freed after the node removal upon xe_ggtt_node_remove() + * or xe_ggtt_node_remove_balloon(). + * Having %xe_ggtt_node struct allocated doesn't mean that the node is already allocated + * in GGTT. Only the xe_ggtt_node_insert(), xe_ggtt_node_insert_locked(), + * xe_ggtt_node_insert_balloon() will ensure the node is inserted or reserved in GGTT. + * + * Return: A pointer to %xe_ggtt_node struct on success. An ERR_PTR otherwise. + **/ +struct xe_ggtt_node *xe_ggtt_node_init(struct xe_ggtt *ggtt) +{ + struct xe_ggtt_node *node = kzalloc(sizeof(*node), GFP_NOFS); + + if (!node) + return ERR_PTR(-ENOMEM); + + INIT_WORK(&node->delayed_removal_work, ggtt_node_remove_work_func); + node->ggtt = ggtt; + + return node; +} + +/** + * xe_ggtt_node_fini - Forcebly finalize %xe_ggtt_node struct + * @node: the &xe_ggtt_node to be freed + * + * If anything went wrong with either xe_ggtt_node_insert(), xe_ggtt_node_insert_locked(), + * or xe_ggtt_node_insert_balloon(); and this @node is not going to be reused, then, + * this function needs to be called to free the %xe_ggtt_node struct + **/ +void xe_ggtt_node_fini(struct xe_ggtt_node *node) +{ + kfree(node); +} + +/** + * xe_ggtt_node_allocated - Check if node is allocated in GGTT + * @node: the &xe_ggtt_node to be inspected + * + * Return: True if allocated, False otherwise. + */ +bool xe_ggtt_node_allocated(const struct xe_ggtt_node *node) +{ + if (!node || !node->ggtt) + return false; + + return drm_mm_node_allocated(&node->base); +} + +/** + * xe_ggtt_map_bo - Map the BO into GGTT + * @ggtt: the &xe_ggtt where node will be mapped + * @bo: the &xe_bo to be mapped + */ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) { u16 cache_mode = bo->flags & XE_BO_FLAG_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB; u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[cache_mode]; - u64 start = bo->ggtt_node.start; + u64 start; u64 offset, pte; + if (XE_WARN_ON(!bo->ggtt_node)) + return; + + start = bo->ggtt_node->base.start; + for (offset = 0; offset < bo->size; offset += XE_PAGE_SIZE) { pte = ggtt->pt_ops->pte_encode_bo(bo, offset, pat_index); ggtt->pt_ops->ggtt_set_pte(ggtt, start + offset, pte); @@ -444,9 +604,9 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K) alignment = SZ_64K; - if (XE_WARN_ON(bo->ggtt_node.size)) { + if (XE_WARN_ON(bo->ggtt_node)) { /* Someone's already inserted this BO in the GGTT */ - xe_tile_assert(ggtt->tile, bo->ggtt_node.size == bo->size); + xe_tile_assert(ggtt->tile, bo->ggtt_node->base.size == bo->size); return 0; } @@ -455,69 +615,108 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, return err; xe_pm_runtime_get_noresume(tile_to_xe(ggtt->tile)); + + bo->ggtt_node = xe_ggtt_node_init(ggtt); + if (IS_ERR(bo->ggtt_node)) { + err = PTR_ERR(bo->ggtt_node); + goto out; + } + mutex_lock(&ggtt->lock); - err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node, bo->size, + err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node->base, bo->size, alignment, 0, start, end, 0); - if (!err) + if (err) + xe_ggtt_node_fini(bo->ggtt_node); + else xe_ggtt_map_bo(ggtt, bo); mutex_unlock(&ggtt->lock); if (!err && bo->flags & XE_BO_FLAG_GGTT_INVALIDATE) xe_ggtt_invalidate(ggtt); + +out: xe_pm_runtime_put(tile_to_xe(ggtt->tile)); return err; } +/** + * xe_ggtt_insert_bo_at - Insert BO at a specific GGTT space + * @ggtt: the &xe_ggtt where bo will be inserted + * @bo: the &xe_bo to be inserted + * @start: address where it will be inserted + * @end: end of the range where it will be inserted + * + * Return: 0 on success or a negative error code on failure. + */ int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, u64 start, u64 end) { return __xe_ggtt_insert_bo_at(ggtt, bo, start, end); } +/** + * xe_ggtt_insert_bo - Insert BO into GGTT + * @ggtt: the &xe_ggtt where bo will be inserted + * @bo: the &xe_bo to be inserted + * + * Return: 0 on success or a negative error code on failure. + */ int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) { return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX); } -void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, - bool invalidate) +/** + * xe_ggtt_remove_bo - Remove a BO from the GGTT + * @ggtt: the &xe_ggtt where node will be removed + * @bo: the &xe_bo to be removed + */ +void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) { - struct xe_device *xe = tile_to_xe(ggtt->tile); - bool bound; - int idx; - - bound = drm_dev_enter(&xe->drm, &idx); - if (bound) - xe_pm_runtime_get_noresume(xe); - - mutex_lock(&ggtt->lock); - if (bound) - xe_ggtt_clear(ggtt, node->start, node->size); - drm_mm_remove_node(node); - node->size = 0; - mutex_unlock(&ggtt->lock); - - if (!bound) + if (XE_WARN_ON(!bo->ggtt_node)) return; - if (invalidate) - xe_ggtt_invalidate(ggtt); + /* This BO is not currently in the GGTT */ + xe_tile_assert(ggtt->tile, bo->ggtt_node->base.size == bo->size); - xe_pm_runtime_put(xe); - drm_dev_exit(idx); + xe_ggtt_node_remove(bo->ggtt_node, + bo->flags & XE_BO_FLAG_GGTT_INVALIDATE); } -void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) +/** + * xe_ggtt_largest_hole - Largest GGTT hole + * @ggtt: the &xe_ggtt that will be inspected + * @alignment: minimum alignment + * @spare: If not NULL: in: desired memory size to be spared / out: Adjusted possible spare + * + * Return: size of the largest continuous GGTT region + */ +u64 xe_ggtt_largest_hole(struct xe_ggtt *ggtt, u64 alignment, u64 *spare) { - if (XE_WARN_ON(!bo->ggtt_node.size)) - return; + const struct drm_mm *mm = &ggtt->mm; + const struct drm_mm_node *entry; + u64 hole_min_start = xe_wopcm_size(tile_to_xe(ggtt->tile)); + u64 hole_start, hole_end, hole_size; + u64 max_hole = 0; - /* This BO is not currently in the GGTT */ - xe_tile_assert(ggtt->tile, bo->ggtt_node.size == bo->size); + mutex_lock(&ggtt->lock); - xe_ggtt_remove_node(ggtt, &bo->ggtt_node, - bo->flags & XE_BO_FLAG_GGTT_INVALIDATE); + drm_mm_for_each_hole(entry, mm, hole_start, hole_end) { + hole_start = max(hole_start, hole_min_start); + hole_start = ALIGN(hole_start, alignment); + hole_end = ALIGN_DOWN(hole_end, alignment); + if (hole_start >= hole_end) + continue; + hole_size = hole_end - hole_start; + if (spare) + *spare -= min3(*spare, hole_size, max_hole); + max_hole = max(max_hole, hole_size); + } + + mutex_unlock(&ggtt->lock); + + return max_hole; } #ifdef CONFIG_PCI_IOV @@ -548,22 +747,28 @@ static void xe_ggtt_assign_locked(struct xe_ggtt *ggtt, const struct drm_mm_node /** * xe_ggtt_assign - assign a GGTT region to the VF - * @ggtt: the &xe_ggtt where the node belongs - * @node: the &drm_mm_node to update + * @node: the &xe_ggtt_node to update * @vfid: the VF identifier * * This function is used by the PF driver to assign a GGTT region to the VF. * In addition to PTE's VFID bits 11:2 also PRESENT bit 0 is set as on some * platforms VFs can't modify that either. */ -void xe_ggtt_assign(struct xe_ggtt *ggtt, const struct drm_mm_node *node, u16 vfid) +void xe_ggtt_assign(const struct xe_ggtt_node *node, u16 vfid) { - mutex_lock(&ggtt->lock); - xe_ggtt_assign_locked(ggtt, node, vfid); - mutex_unlock(&ggtt->lock); + mutex_lock(&node->ggtt->lock); + xe_ggtt_assign_locked(node->ggtt, &node->base, vfid); + mutex_unlock(&node->ggtt->lock); } #endif +/** + * xe_ggtt_dump - Dump GGTT for debug + * @ggtt: the &xe_ggtt to be dumped + * @p: the &drm_mm_printer helper handle to be used to dump the information + * + * Return: 0 on success or a negative error code on failure. + */ int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p) { int err; @@ -576,3 +781,43 @@ int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p) mutex_unlock(&ggtt->lock); return err; } + +/** + * xe_ggtt_print_holes - Print holes + * @ggtt: the &xe_ggtt to be inspected + * @alignment: min alignment + * @p: the &drm_printer + * + * Print GGTT ranges that are available and return total size available. + * + * Return: Total available size. + */ +u64 xe_ggtt_print_holes(struct xe_ggtt *ggtt, u64 alignment, struct drm_printer *p) +{ + const struct drm_mm *mm = &ggtt->mm; + const struct drm_mm_node *entry; + u64 hole_min_start = xe_wopcm_size(tile_to_xe(ggtt->tile)); + u64 hole_start, hole_end, hole_size; + u64 total = 0; + char buf[10]; + + mutex_lock(&ggtt->lock); + + drm_mm_for_each_hole(entry, mm, hole_start, hole_end) { + hole_start = max(hole_start, hole_min_start); + hole_start = ALIGN(hole_start, alignment); + hole_end = ALIGN_DOWN(hole_end, alignment); + if (hole_start >= hole_end) + continue; + hole_size = hole_end - hole_start; + total += hole_size; + + string_get_size(hole_size, 1, STRING_UNITS_2, buf, sizeof(buf)); + drm_printf(p, "range:\t%#llx-%#llx\t(%s)\n", + hole_start, hole_end - 1, buf); + } + + mutex_unlock(&ggtt->lock); + + return total; +} diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h index 6a96fd54bf60..27e7d67de004 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.h +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -12,28 +12,30 @@ struct drm_printer; int xe_ggtt_init_early(struct xe_ggtt *ggtt); int xe_ggtt_init(struct xe_ggtt *ggtt); -void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix); - -int xe_ggtt_balloon(struct xe_ggtt *ggtt, u64 start, u64 size, struct drm_mm_node *node); -void xe_ggtt_deballoon(struct xe_ggtt *ggtt, struct drm_mm_node *node); - -int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, - u32 size, u32 align); -int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, - struct drm_mm_node *node, - u32 size, u32 align, u32 mm_flags); -void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, - bool invalidate); + +struct xe_ggtt_node *xe_ggtt_node_init(struct xe_ggtt *ggtt); +void xe_ggtt_node_fini(struct xe_ggtt_node *node); +int xe_ggtt_node_insert_balloon(struct xe_ggtt_node *node, + u64 start, u64 size); +void xe_ggtt_node_remove_balloon(struct xe_ggtt_node *node); + +int xe_ggtt_node_insert(struct xe_ggtt_node *node, u32 size, u32 align); +int xe_ggtt_node_insert_locked(struct xe_ggtt_node *node, + u32 size, u32 align, u32 mm_flags); +void xe_ggtt_node_remove(struct xe_ggtt_node *node, bool invalidate); +bool xe_ggtt_node_allocated(const struct xe_ggtt_node *node); void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, u64 start, u64 end); void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); +u64 xe_ggtt_largest_hole(struct xe_ggtt *ggtt, u64 alignment, u64 *spare); int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p); +u64 xe_ggtt_print_holes(struct xe_ggtt *ggtt, u64 alignment, struct drm_printer *p); #ifdef CONFIG_PCI_IOV -void xe_ggtt_assign(struct xe_ggtt *ggtt, const struct drm_mm_node *node, u16 vfid); +void xe_ggtt_assign(const struct xe_ggtt_node *node, u16 vfid); #endif #endif diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h index 2245d88d8f39..cb02b7994a9a 100644 --- a/drivers/gpu/drm/xe/xe_ggtt_types.h +++ b/drivers/gpu/drm/xe/xe_ggtt_types.h @@ -13,30 +13,70 @@ struct xe_bo; struct xe_gt; +/** + * struct xe_ggtt - Main GGTT struct + * + * In general, each tile can contains its own Global Graphics Translation Table + * (GGTT) instance. + */ struct xe_ggtt { + /** @tile: Back pointer to tile where this GGTT belongs */ struct xe_tile *tile; - + /** @size: Total size of this GGTT */ u64 size; #define XE_GGTT_FLAGS_64K BIT(0) + /** + * @flags: Flags for this GGTT + * Acceptable flags: + * - %XE_GGTT_FLAGS_64K - if PTE size is 64K. Otherwise, regular is 4K. + */ unsigned int flags; - + /** @scratch: Internal object allocation used as a scratch page */ struct xe_bo *scratch; - + /** @lock: Mutex lock to protect GGTT data */ struct mutex lock; - + /** + * @gsm: The iomem pointer to the actual location of the translation + * table located in the GSM for easy PTE manipulation + */ u64 __iomem *gsm; - + /** @pt_ops: Page Table operations per platform */ const struct xe_ggtt_pt_ops *pt_ops; - + /** @mm: The memory manager used to manage individual GGTT allocations */ struct drm_mm mm; - /** @access_count: counts GGTT writes */ unsigned int access_count; + /** @wq: Dedicated unordered work queue to process node removals */ + struct workqueue_struct *wq; +}; + +/** + * struct xe_ggtt_node - A node in GGTT. + * + * This struct needs to be initialized (only-once) with xe_ggtt_node_init() before any node + * insertion, reservation, or 'ballooning'. + * It will, then, be finalized by either xe_ggtt_node_remove() or xe_ggtt_node_deballoon(). + */ +struct xe_ggtt_node { + /** @ggtt: Back pointer to xe_ggtt where this region will be inserted at */ + struct xe_ggtt *ggtt; + /** @base: A drm_mm_node */ + struct drm_mm_node base; + /** @delayed_removal_work: The work struct for the delayed removal */ + struct work_struct delayed_removal_work; + /** @invalidate_on_remove: If it needs invalidation upon removal */ + bool invalidate_on_remove; }; +/** + * struct xe_ggtt_pt_ops - GGTT Page table operations + * Which can vary from platform to platform. + */ struct xe_ggtt_pt_ops { + /** @pte_encode_bo: Encode PTE address for a given BO */ u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset, u16 pat_index); + /** @ggtt_set_pte: Directly write into GGTT's PTE */ void (*ggtt_set_pte)(struct xe_ggtt *ggtt, u64 addr, u64 pte); }; diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.c b/drivers/gpu/drm/xe/xe_gpu_scheduler.c index e4ad1d6ce1d5..c518d1d16d82 100644 --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.c +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.c @@ -15,11 +15,11 @@ static void xe_sched_process_msg_queue_if_ready(struct xe_gpu_scheduler *sched) { struct xe_sched_msg *msg; - spin_lock(&sched->base.job_list_lock); + xe_sched_msg_lock(sched); msg = list_first_entry_or_null(&sched->msgs, struct xe_sched_msg, link); if (msg) xe_sched_process_msg_queue(sched); - spin_unlock(&sched->base.job_list_lock); + xe_sched_msg_unlock(sched); } static struct xe_sched_msg * @@ -27,12 +27,12 @@ xe_sched_get_msg(struct xe_gpu_scheduler *sched) { struct xe_sched_msg *msg; - spin_lock(&sched->base.job_list_lock); + xe_sched_msg_lock(sched); msg = list_first_entry_or_null(&sched->msgs, struct xe_sched_msg, link); if (msg) - list_del(&msg->link); - spin_unlock(&sched->base.job_list_lock); + list_del_init(&msg->link); + xe_sched_msg_unlock(sched); return msg; } @@ -93,9 +93,16 @@ void xe_sched_submission_stop(struct xe_gpu_scheduler *sched) void xe_sched_add_msg(struct xe_gpu_scheduler *sched, struct xe_sched_msg *msg) { - spin_lock(&sched->base.job_list_lock); - list_add_tail(&msg->link, &sched->msgs); - spin_unlock(&sched->base.job_list_lock); + xe_sched_msg_lock(sched); + xe_sched_add_msg_locked(sched, msg); + xe_sched_msg_unlock(sched); +} +void xe_sched_add_msg_locked(struct xe_gpu_scheduler *sched, + struct xe_sched_msg *msg) +{ + lockdep_assert_held(&sched->base.job_list_lock); + + list_add_tail(&msg->link, &sched->msgs); xe_sched_process_msg_queue(sched); } diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h index 10c6bb9c9386..cee9c6809fc0 100644 --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.h +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h @@ -24,6 +24,18 @@ void xe_sched_submission_stop(struct xe_gpu_scheduler *sched); void xe_sched_add_msg(struct xe_gpu_scheduler *sched, struct xe_sched_msg *msg); +void xe_sched_add_msg_locked(struct xe_gpu_scheduler *sched, + struct xe_sched_msg *msg); + +static inline void xe_sched_msg_lock(struct xe_gpu_scheduler *sched) +{ + spin_lock(&sched->base.job_list_lock); +} + +static inline void xe_sched_msg_unlock(struct xe_gpu_scheduler *sched) +{ + spin_unlock(&sched->base.job_list_lock); +} static inline void xe_sched_stop(struct xe_gpu_scheduler *sched) { diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index 2a612652bb13..8a137cb83318 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -450,11 +450,6 @@ static void free_resources(void *arg) xe_exec_queue_put(gsc->q); gsc->q = NULL; } - - if (gsc->private) { - xe_bo_unpin_map_no_vm(gsc->private); - gsc->private = NULL; - } } int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc) @@ -474,10 +469,9 @@ int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc) if (!hwe) return -ENODEV; - bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4M, - ttm_bo_type_kernel, - XE_BO_FLAG_STOLEN | - XE_BO_FLAG_GGTT); + bo = xe_managed_bo_create_pin_map(xe, tile, SZ_4M, + XE_BO_FLAG_STOLEN | + XE_BO_FLAG_GGTT); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c index aa812a2bc3ed..2d6ea8c01445 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.c +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c @@ -62,11 +62,6 @@ gsc_to_gt(struct xe_gsc *gsc) return container_of(gsc, struct xe_gt, uc.gsc); } -static inline struct xe_device *kdev_to_xe(struct device *kdev) -{ - return dev_get_drvdata(kdev); -} - bool xe_gsc_proxy_init_done(struct xe_gsc *gsc) { struct xe_gt *gt = gsc_to_gt(gsc); @@ -345,7 +340,7 @@ void xe_gsc_proxy_irq_handler(struct xe_gsc *gsc, u32 iir) static int xe_gsc_proxy_component_bind(struct device *xe_kdev, struct device *mei_kdev, void *data) { - struct xe_device *xe = kdev_to_xe(xe_kdev); + struct xe_device *xe = kdev_to_xe_device(xe_kdev); struct xe_gt *gt = xe->tiles[0].media_gt; struct xe_gsc *gsc = >->uc.gsc; @@ -360,7 +355,7 @@ static int xe_gsc_proxy_component_bind(struct device *xe_kdev, static void xe_gsc_proxy_component_unbind(struct device *xe_kdev, struct device *mei_kdev, void *data) { - struct xe_device *xe = kdev_to_xe(xe_kdev); + struct xe_device *xe = kdev_to_xe_device(xe_kdev); struct xe_gt *gt = xe->tiles[0].media_gt; struct xe_gsc *gsc = >->uc.gsc; @@ -376,27 +371,6 @@ static const struct component_ops xe_gsc_proxy_component_ops = { .unbind = xe_gsc_proxy_component_unbind, }; -static void proxy_channel_free(struct drm_device *drm, void *arg) -{ - struct xe_gsc *gsc = arg; - - if (!gsc->proxy.bo) - return; - - if (gsc->proxy.to_csme) { - kfree(gsc->proxy.to_csme); - gsc->proxy.to_csme = NULL; - gsc->proxy.from_csme = NULL; - } - - if (gsc->proxy.bo) { - iosys_map_clear(&gsc->proxy.to_gsc); - iosys_map_clear(&gsc->proxy.from_gsc); - xe_bo_unpin_map_no_vm(gsc->proxy.bo); - gsc->proxy.bo = NULL; - } -} - static int proxy_channel_alloc(struct xe_gsc *gsc) { struct xe_gt *gt = gsc_to_gt(gsc); @@ -405,18 +379,15 @@ static int proxy_channel_alloc(struct xe_gsc *gsc) struct xe_bo *bo; void *csme; - csme = kzalloc(GSC_PROXY_CHANNEL_SIZE, GFP_KERNEL); + csme = drmm_kzalloc(&xe->drm, GSC_PROXY_CHANNEL_SIZE, GFP_KERNEL); if (!csme) return -ENOMEM; - bo = xe_bo_create_pin_map(xe, tile, NULL, GSC_PROXY_CHANNEL_SIZE, - ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | - XE_BO_FLAG_GGTT); - if (IS_ERR(bo)) { - kfree(csme); + bo = xe_managed_bo_create_pin_map(xe, tile, GSC_PROXY_CHANNEL_SIZE, + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT); + if (IS_ERR(bo)) return PTR_ERR(bo); - } gsc->proxy.bo = bo; gsc->proxy.to_gsc = IOSYS_MAP_INIT_OFFSET(&bo->vmap, 0); @@ -424,7 +395,7 @@ static int proxy_channel_alloc(struct xe_gsc *gsc) gsc->proxy.to_csme = csme; gsc->proxy.from_csme = csme + GSC_PROXY_BUFFER_SIZE; - return drmm_add_action_or_reset(&xe->drm, proxy_channel_free, gsc); + return 0; } /** diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 282b5dc39908..08a004d698d4 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -112,9 +112,9 @@ static void xe_gt_enable_host_l2_vram(struct xe_gt *gt) if (!xe_gt_is_media_type(gt)) { xe_mmio_write32(gt, SCRATCH1LPFC, EN_L3_RW_CCS_CACHE_FLUSH); - reg = xe_mmio_read32(gt, XE2_GAMREQSTRM_CTRL); + reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL); reg |= CG_DIS_CNTLBUS; - xe_mmio_write32(gt, XE2_GAMREQSTRM_CTRL, reg); + xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg); } xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0x3); @@ -136,9 +136,9 @@ static void xe_gt_disable_host_l2_vram(struct xe_gt *gt) if (WARN_ON(err)) return; - reg = xe_mmio_read32(gt, XE2_GAMREQSTRM_CTRL); + reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL); reg &= ~CG_DIS_CNTLBUS; - xe_mmio_write32(gt, XE2_GAMREQSTRM_CTRL, reg); + xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg); xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); } @@ -559,7 +559,6 @@ int xe_gt_init_hwconfig(struct xe_gt *gt) xe_gt_mcr_init_early(gt); xe_pat_init(gt); - xe_gt_enable_host_l2_vram(gt); err = xe_uc_init(>->uc); if (err) @@ -571,6 +570,7 @@ int xe_gt_init_hwconfig(struct xe_gt *gt) xe_gt_topology_init(gt); xe_gt_mcr_init(gt); + xe_gt_enable_host_l2_vram(gt); out_fw: xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index 5e7fd937917a..8f95d3a5949b 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -17,7 +17,9 @@ #include "xe_gt_mcr.h" #include "xe_gt_sriov_pf_debugfs.h" #include "xe_gt_sriov_vf_debugfs.h" +#include "xe_gt_stats.h" #include "xe_gt_topology.h" +#include "xe_guc_hwconfig.h" #include "xe_hw_engine.h" #include "xe_lrc.h" #include "xe_macros.h" @@ -269,6 +271,15 @@ static int vecs_default_lrc(struct xe_gt *gt, struct drm_printer *p) return 0; } +static int hwconfig(struct xe_gt *gt, struct drm_printer *p) +{ + xe_pm_runtime_get(gt_to_xe(gt)); + xe_guc_hwconfig_dump(>->uc.guc, p); + xe_pm_runtime_put(gt_to_xe(gt)); + + return 0; +} + static const struct drm_info_list debugfs_list[] = { {"hw_engines", .show = xe_gt_debugfs_simple_show, .data = hw_engines}, {"force_reset", .show = xe_gt_debugfs_simple_show, .data = force_reset}, @@ -286,6 +297,8 @@ static const struct drm_info_list debugfs_list[] = { {"default_lrc_bcs", .show = xe_gt_debugfs_simple_show, .data = bcs_default_lrc}, {"default_lrc_vcs", .show = xe_gt_debugfs_simple_show, .data = vcs_default_lrc}, {"default_lrc_vecs", .show = xe_gt_debugfs_simple_show, .data = vecs_default_lrc}, + {"stats", .show = xe_gt_debugfs_simple_show, .data = xe_gt_stats_print_info}, + {"hwconfig", .show = xe_gt_debugfs_simple_show, .data = hwconfig}, }; void xe_gt_debugfs_register(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index 6d948a469126..7d7bd0be6233 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -8,8 +8,10 @@ #include "regs/xe_gt_regs.h" #include "xe_assert.h" #include "xe_gt.h" +#include "xe_gt_printk.h" #include "xe_gt_topology.h" #include "xe_gt_types.h" +#include "xe_guc_hwconfig.h" #include "xe_mmio.h" #include "xe_sriov.h" @@ -297,6 +299,36 @@ static void init_steering_mslice(struct xe_gt *gt) static unsigned int dss_per_group(struct xe_gt *gt) { + struct xe_guc *guc = >->uc.guc; + u32 max_slices = 0, max_subslices = 0; + int ret; + + /* + * Try to query the GuC's hwconfig table for the maximum number of + * slices and subslices. These don't reflect the platform's actual + * slice/DSS counts, just the physical layout by which we should + * determine the steering targets. On older platforms with older GuC + * firmware releases it's possible that these attributes may not be + * included in the table, so we can always fall back to the old + * hardcoded layouts. + */ +#define HWCONFIG_ATTR_MAX_SLICES 1 +#define HWCONFIG_ATTR_MAX_SUBSLICES 70 + + ret = xe_guc_hwconfig_lookup_u32(guc, HWCONFIG_ATTR_MAX_SLICES, + &max_slices); + if (ret < 0 || max_slices == 0) + goto fallback; + + ret = xe_guc_hwconfig_lookup_u32(guc, HWCONFIG_ATTR_MAX_SUBSLICES, + &max_subslices); + if (ret < 0 || max_subslices == 0) + goto fallback; + + return DIV_ROUND_UP(max_subslices, max_slices); + +fallback: + xe_gt_dbg(gt, "GuC hwconfig cannot provide dss/slice; using typical fallback values\n"); if (gt_to_xe(gt)->info.platform == XE_PVC) return 8; else if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1250) @@ -314,16 +346,16 @@ static unsigned int dss_per_group(struct xe_gt *gt) */ void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group, u16 *instance) { - int dss_per_grp = dss_per_group(gt); - xe_gt_assert(gt, dss < XE_MAX_DSS_FUSE_BITS); - *group = dss / dss_per_grp; - *instance = dss % dss_per_grp; + *group = dss / gt->steering_dss_per_grp; + *instance = dss % gt->steering_dss_per_grp; } static void init_steering_dss(struct xe_gt *gt) { + gt->steering_dss_per_grp = dss_per_group(gt); + xe_gt_mcr_get_dss_steering(gt, min(xe_dss_mask_group_ffs(gt->fuse_topo.g_dss_mask, 0, 0), xe_dss_mask_group_ffs(gt->fuse_topo.c_dss_mask, 0, 0)), diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index b2a7fa55bd18..0be4687bfc20 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -287,7 +287,7 @@ static bool get_pagefault(struct pf_queue *pf_queue, struct pagefault *pf) PFD_VIRTUAL_ADDR_LO_SHIFT; pf_queue->tail = (pf_queue->tail + PF_MSG_LEN_DW) % - PF_QUEUE_NUM_DW; + pf_queue->num_dw; ret = true; } spin_unlock_irq(&pf_queue->lock); @@ -299,7 +299,8 @@ static bool pf_queue_full(struct pf_queue *pf_queue) { lockdep_assert_held(&pf_queue->lock); - return CIRC_SPACE(pf_queue->head, pf_queue->tail, PF_QUEUE_NUM_DW) <= + return CIRC_SPACE(pf_queue->head, pf_queue->tail, + pf_queue->num_dw) <= PF_MSG_LEN_DW; } @@ -312,22 +313,23 @@ int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len) u32 asid; bool full; - /* - * The below logic doesn't work unless PF_QUEUE_NUM_DW % PF_MSG_LEN_DW == 0 - */ - BUILD_BUG_ON(PF_QUEUE_NUM_DW % PF_MSG_LEN_DW); - if (unlikely(len != PF_MSG_LEN_DW)) return -EPROTO; asid = FIELD_GET(PFD_ASID, msg[1]); pf_queue = gt->usm.pf_queue + (asid % NUM_PF_QUEUE); + /* + * The below logic doesn't work unless PF_QUEUE_NUM_DW % PF_MSG_LEN_DW == 0 + */ + xe_gt_assert(gt, !(pf_queue->num_dw % PF_MSG_LEN_DW)); + spin_lock_irqsave(&pf_queue->lock, flags); full = pf_queue_full(pf_queue); if (!full) { memcpy(pf_queue->data + pf_queue->head, msg, len * sizeof(u32)); - pf_queue->head = (pf_queue->head + len) % PF_QUEUE_NUM_DW; + pf_queue->head = (pf_queue->head + len) % + pf_queue->num_dw; queue_work(gt->usm.pf_wq, &pf_queue->worker); } else { drm_warn(&xe->drm, "PF Queue full, shouldn't be possible"); @@ -386,26 +388,57 @@ static void pagefault_fini(void *arg) { struct xe_gt *gt = arg; struct xe_device *xe = gt_to_xe(gt); + int i; if (!xe->info.has_usm) return; destroy_workqueue(gt->usm.acc_wq); destroy_workqueue(gt->usm.pf_wq); + + for (i = 0; i < NUM_PF_QUEUE; ++i) + kfree(gt->usm.pf_queue[i].data); +} + +static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue *pf_queue) +{ + xe_dss_mask_t all_dss; + int num_dss, num_eus; + + bitmap_or(all_dss, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask, + XE_MAX_DSS_FUSE_BITS); + + num_dss = bitmap_weight(all_dss, XE_MAX_DSS_FUSE_BITS); + num_eus = bitmap_weight(gt->fuse_topo.eu_mask_per_dss, + XE_MAX_EU_FUSE_BITS) * num_dss; + + /* user can issue separate page faults per EU and per CS */ + pf_queue->num_dw = + (num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW; + + pf_queue->gt = gt; + pf_queue->data = kcalloc(pf_queue->num_dw, sizeof(u32), GFP_KERNEL); + if (!pf_queue->data) + return -ENOMEM; + + spin_lock_init(&pf_queue->lock); + INIT_WORK(&pf_queue->worker, pf_queue_work_func); + + return 0; } int xe_gt_pagefault_init(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); - int i; + int i, ret = 0; if (!xe->info.has_usm) return 0; for (i = 0; i < NUM_PF_QUEUE; ++i) { - gt->usm.pf_queue[i].gt = gt; - spin_lock_init(>->usm.pf_queue[i].lock); - INIT_WORK(>->usm.pf_queue[i].worker, pf_queue_work_func); + ret = xe_alloc_pf_queue(gt, >->usm.pf_queue[i]); + if (ret) + return ret; } for (i = 0; i < NUM_ACC_QUEUE; ++i) { gt->usm.acc_queue[i].gt = gt; diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 227527785afd..41ed07b153b5 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -232,14 +232,14 @@ static u32 encode_config_ggtt(u32 *cfg, const struct xe_gt_sriov_config *config) { u32 n = 0; - if (drm_mm_node_allocated(&config->ggtt_region)) { + if (xe_ggtt_node_allocated(config->ggtt_region)) { cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_START); - cfg[n++] = lower_32_bits(config->ggtt_region.start); - cfg[n++] = upper_32_bits(config->ggtt_region.start); + cfg[n++] = lower_32_bits(config->ggtt_region->base.start); + cfg[n++] = upper_32_bits(config->ggtt_region->base.start); cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_SIZE); - cfg[n++] = lower_32_bits(config->ggtt_region.size); - cfg[n++] = upper_32_bits(config->ggtt_region.size); + cfg[n++] = lower_32_bits(config->ggtt_region->base.size); + cfg[n++] = upper_32_bits(config->ggtt_region->base.size); } return n; @@ -369,29 +369,28 @@ static int pf_distribute_config_ggtt(struct xe_tile *tile, unsigned int vfid, u6 return err ?: err2; } -static void pf_release_ggtt(struct xe_tile *tile, struct drm_mm_node *node) +static void pf_release_ggtt(struct xe_tile *tile, struct xe_ggtt_node *node) { - struct xe_ggtt *ggtt = tile->mem.ggtt; - - if (drm_mm_node_allocated(node)) { + if (xe_ggtt_node_allocated(node)) { /* * explicit GGTT PTE assignment to the PF using xe_ggtt_assign() * is redundant, as PTE will be implicitly re-assigned to PF by * the xe_ggtt_clear() called by below xe_ggtt_remove_node(). */ - xe_ggtt_remove_node(ggtt, node, false); + xe_ggtt_node_remove(node, false); } } static void pf_release_vf_config_ggtt(struct xe_gt *gt, struct xe_gt_sriov_config *config) { - pf_release_ggtt(gt_to_tile(gt), &config->ggtt_region); + pf_release_ggtt(gt_to_tile(gt), config->ggtt_region); + config->ggtt_region = NULL; } static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) { struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); - struct drm_mm_node *node = &config->ggtt_region; + struct xe_ggtt_node *node = config->ggtt_region; struct xe_tile *tile = gt_to_tile(gt); struct xe_ggtt *ggtt = tile->mem.ggtt; u64 alignment = pf_get_ggtt_alignment(gt); @@ -403,40 +402,48 @@ static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) size = round_up(size, alignment); - if (drm_mm_node_allocated(node)) { + if (xe_ggtt_node_allocated(node)) { err = pf_distribute_config_ggtt(tile, vfid, 0, 0); if (unlikely(err)) return err; pf_release_ggtt(tile, node); } - xe_gt_assert(gt, !drm_mm_node_allocated(node)); + xe_gt_assert(gt, !xe_ggtt_node_allocated(node)); if (!size) return 0; - err = xe_ggtt_insert_special_node(ggtt, node, size, alignment); + node = xe_ggtt_node_init(ggtt); + if (IS_ERR(node)) + return PTR_ERR(node); + + err = xe_ggtt_node_insert(node, size, alignment); if (unlikely(err)) - return err; + goto err; - xe_ggtt_assign(ggtt, node, vfid); + xe_ggtt_assign(node, vfid); xe_gt_sriov_dbg_verbose(gt, "VF%u assigned GGTT %llx-%llx\n", - vfid, node->start, node->start + node->size - 1); + vfid, node->base.start, node->base.start + node->base.size - 1); - err = pf_distribute_config_ggtt(gt->tile, vfid, node->start, node->size); + err = pf_distribute_config_ggtt(gt->tile, vfid, node->base.start, node->base.size); if (unlikely(err)) - return err; + goto err; + config->ggtt_region = node; return 0; +err: + xe_ggtt_node_fini(node); + return err; } static u64 pf_get_vf_config_ggtt(struct xe_gt *gt, unsigned int vfid) { struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); - struct drm_mm_node *node = &config->ggtt_region; + struct xe_ggtt_node *node = config->ggtt_region; xe_gt_assert(gt, !xe_gt_is_media_type(gt)); - return drm_mm_node_allocated(node) ? node->size : 0; + return xe_ggtt_node_allocated(node) ? node->base.size : 0; } /** @@ -587,30 +594,11 @@ int xe_gt_sriov_pf_config_bulk_set_ggtt(struct xe_gt *gt, unsigned int vfid, static u64 pf_get_max_ggtt(struct xe_gt *gt) { struct xe_ggtt *ggtt = gt_to_tile(gt)->mem.ggtt; - const struct drm_mm *mm = &ggtt->mm; - const struct drm_mm_node *entry; u64 alignment = pf_get_ggtt_alignment(gt); u64 spare = pf_get_spare_ggtt(gt); - u64 hole_min_start = xe_wopcm_size(gt_to_xe(gt)); - u64 hole_start, hole_end, hole_size; - u64 max_hole = 0; + u64 max_hole; - mutex_lock(&ggtt->lock); - - drm_mm_for_each_hole(entry, mm, hole_start, hole_end) { - hole_start = max(hole_start, hole_min_start); - hole_start = ALIGN(hole_start, alignment); - hole_end = ALIGN_DOWN(hole_end, alignment); - if (hole_start >= hole_end) - continue; - hole_size = hole_end - hole_start; - xe_gt_sriov_dbg_verbose(gt, "HOLE start %llx size %lluK\n", - hole_start, hole_size / SZ_1K); - spare -= min3(spare, hole_size, max_hole); - max_hole = max(max_hole, hole_size); - } - - mutex_unlock(&ggtt->lock); + max_hole = xe_ggtt_largest_hole(ggtt, alignment, &spare); xe_gt_sriov_dbg_verbose(gt, "HOLE max %lluK reserved %lluK\n", max_hole / SZ_1K, spare / SZ_1K); @@ -2025,13 +2013,15 @@ int xe_gt_sriov_pf_config_print_ggtt(struct xe_gt *gt, struct drm_printer *p) for (n = 1; n <= total_vfs; n++) { config = >->sriov.pf.vfs[n].config; - if (!drm_mm_node_allocated(&config->ggtt_region)) + if (!xe_ggtt_node_allocated(config->ggtt_region)) continue; - string_get_size(config->ggtt_region.size, 1, STRING_UNITS_2, buf, sizeof(buf)); + string_get_size(config->ggtt_region->base.size, 1, STRING_UNITS_2, + buf, sizeof(buf)); drm_printf(p, "VF%u:\t%#0llx-%#llx\t(%s)\n", - n, config->ggtt_region.start, - config->ggtt_region.start + config->ggtt_region.size - 1, buf); + n, config->ggtt_region->base.start, + config->ggtt_region->base.start + config->ggtt_region->base.size - 1, + buf); } return 0; @@ -2119,12 +2109,8 @@ int xe_gt_sriov_pf_config_print_dbs(struct xe_gt *gt, struct drm_printer *p) int xe_gt_sriov_pf_config_print_available_ggtt(struct xe_gt *gt, struct drm_printer *p) { struct xe_ggtt *ggtt = gt_to_tile(gt)->mem.ggtt; - const struct drm_mm *mm = &ggtt->mm; - const struct drm_mm_node *entry; u64 alignment = pf_get_ggtt_alignment(gt); - u64 hole_min_start = xe_wopcm_size(gt_to_xe(gt)); - u64 hole_start, hole_end, hole_size; - u64 spare, avail, total = 0; + u64 spare, avail, total; char buf[10]; xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); @@ -2132,24 +2118,8 @@ int xe_gt_sriov_pf_config_print_available_ggtt(struct xe_gt *gt, struct drm_prin mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); spare = pf_get_spare_ggtt(gt); + total = xe_ggtt_print_holes(ggtt, alignment, p); - mutex_lock(&ggtt->lock); - - drm_mm_for_each_hole(entry, mm, hole_start, hole_end) { - hole_start = max(hole_start, hole_min_start); - hole_start = ALIGN(hole_start, alignment); - hole_end = ALIGN_DOWN(hole_end, alignment); - if (hole_start >= hole_end) - continue; - hole_size = hole_end - hole_start; - total += hole_size; - - string_get_size(hole_size, 1, STRING_UNITS_2, buf, sizeof(buf)); - drm_printf(p, "range:\t%#llx-%#llx\t(%s)\n", - hole_start, hole_end - 1, buf); - } - - mutex_unlock(&ggtt->lock); mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); string_get_size(total, 1, STRING_UNITS_2, buf, sizeof(buf)); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h index 7bc66656fcc7..2d3b73d78f14 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h @@ -6,8 +6,7 @@ #ifndef _XE_GT_SRIOV_PF_CONFIG_TYPES_H_ #define _XE_GT_SRIOV_PF_CONFIG_TYPES_H_ -#include <drm/drm_mm.h> - +#include "xe_ggtt_types.h" #include "xe_guc_klv_thresholds_set_types.h" struct xe_bo; @@ -19,7 +18,7 @@ struct xe_bo; */ struct xe_gt_sriov_config { /** @ggtt_region: GGTT region assigned to the VF. */ - struct drm_mm_node ggtt_region; + struct xe_ggtt_node *ggtt_region; /** @lmem_obj: LMEM allocation for use by the VF. */ struct xe_bo *lmem_obj; /** @num_ctxs: number of GuC contexts IDs. */ diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index 47222bd9988d..4ebc82e607af 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -495,6 +495,25 @@ u64 xe_gt_sriov_vf_lmem(struct xe_gt *gt) return gt->sriov.vf.self_config.lmem_size; } +static struct xe_ggtt_node * +vf_balloon_ggtt_node(struct xe_ggtt *ggtt, u64 start, u64 end) +{ + struct xe_ggtt_node *node; + int err; + + node = xe_ggtt_node_init(ggtt); + if (IS_ERR(node)) + return node; + + err = xe_ggtt_node_insert_balloon(node, start, end); + if (err) { + xe_ggtt_node_fini(node); + return ERR_PTR(err); + } + + return node; +} + static int vf_balloon_ggtt(struct xe_gt *gt) { struct xe_gt_sriov_vf_selfconfig *config = >->sriov.vf.self_config; @@ -502,7 +521,6 @@ static int vf_balloon_ggtt(struct xe_gt *gt) struct xe_ggtt *ggtt = tile->mem.ggtt; struct xe_device *xe = gt_to_xe(gt); u64 start, end; - int err; xe_gt_assert(gt, IS_SRIOV_VF(xe)); xe_gt_assert(gt, !xe_gt_is_media_type(gt)); @@ -528,35 +546,31 @@ static int vf_balloon_ggtt(struct xe_gt *gt) start = xe_wopcm_size(xe); end = config->ggtt_base; if (end != start) { - err = xe_ggtt_balloon(ggtt, start, end, &tile->sriov.vf.ggtt_balloon[0]); - if (err) - goto failed; + tile->sriov.vf.ggtt_balloon[0] = vf_balloon_ggtt_node(ggtt, start, end); + if (IS_ERR(tile->sriov.vf.ggtt_balloon[0])) + return PTR_ERR(tile->sriov.vf.ggtt_balloon[0]); } start = config->ggtt_base + config->ggtt_size; end = GUC_GGTT_TOP; if (end != start) { - err = xe_ggtt_balloon(ggtt, start, end, &tile->sriov.vf.ggtt_balloon[1]); - if (err) - goto deballoon; + tile->sriov.vf.ggtt_balloon[1] = vf_balloon_ggtt_node(ggtt, start, end); + if (IS_ERR(tile->sriov.vf.ggtt_balloon[1])) { + xe_ggtt_node_remove_balloon(tile->sriov.vf.ggtt_balloon[0]); + return PTR_ERR(tile->sriov.vf.ggtt_balloon[1]); + } } return 0; - -deballoon: - xe_ggtt_deballoon(ggtt, &tile->sriov.vf.ggtt_balloon[0]); -failed: - return err; } static void deballoon_ggtt(struct drm_device *drm, void *arg) { struct xe_tile *tile = arg; - struct xe_ggtt *ggtt = tile->mem.ggtt; xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile))); - xe_ggtt_deballoon(ggtt, &tile->sriov.vf.ggtt_balloon[1]); - xe_ggtt_deballoon(ggtt, &tile->sriov.vf.ggtt_balloon[0]); + xe_ggtt_node_remove_balloon(tile->sriov.vf.ggtt_balloon[1]); + xe_ggtt_node_remove_balloon(tile->sriov.vf.ggtt_balloon[0]); } /** diff --git a/drivers/gpu/drm/xe/xe_gt_stats.c b/drivers/gpu/drm/xe/xe_gt_stats.c new file mode 100644 index 000000000000..c7364a5aef8f --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_stats.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2024 Intel Corporation + */ + +#include <linux/atomic.h> + +#include <drm/drm_print.h> + +#include "xe_gt.h" +#include "xe_gt_stats.h" + +/** + * xe_gt_stats_incr - Increments the specified stats counter + * @gt: graphics tile + * @id: xe_gt_stats_id type id that needs to be incremented + * @incr: value to be incremented with + * + * Increments the specified stats counter. + */ +void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr) +{ + if (id >= __XE_GT_STATS_NUM_IDS) + return; + + atomic_add(incr, >->stats.counters[id]); +} + +static const char *const stat_description[__XE_GT_STATS_NUM_IDS] = { + "tlb_inval_count", +}; + +/** + * xe_gt_stats_print_info - Print the GT stats + * @gt: graphics tile + * @p: drm_printer where it will be printed out. + * + * This prints out all the available GT stats. + */ +int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p) +{ + enum xe_gt_stats_id id; + + for (id = 0; id < __XE_GT_STATS_NUM_IDS; ++id) + drm_printf(p, "%s: %d\n", stat_description[id], + atomic_read(>->stats.counters[id])); + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_gt_stats.h b/drivers/gpu/drm/xe/xe_gt_stats.h new file mode 100644 index 000000000000..91d944f6c4e4 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_stats.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef _XE_GT_STATS_H_ +#define _XE_GT_STATS_H_ + +struct xe_gt; +struct drm_printer; + +enum xe_gt_stats_id { + XE_GT_STATS_ID_TLB_INVAL, + /* must be the last entry */ + __XE_GT_STATS_NUM_IDS, +}; + +#ifdef CONFIG_DEBUG_FS +int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p); +void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr); +#else +static inline void +xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, + int incr) +{ +} + +#endif +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 87cb76a8718c..cca9cf536f76 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -12,6 +12,7 @@ #include "xe_gt_printk.h" #include "xe_guc.h" #include "xe_guc_ct.h" +#include "xe_gt_stats.h" #include "xe_mmio.h" #include "xe_pm.h" #include "xe_sriov.h" @@ -213,6 +214,7 @@ static int send_tlb_invalidation(struct xe_guc *guc, gt->tlb_invalidation.seqno = 1; } mutex_unlock(&guc->ct.lock); + xe_gt_stats_incr(gt, XE_GT_STATS_ID_TLB_INVAL, 1); return ret; } diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 631928258d71..31946d7fe701 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -10,6 +10,7 @@ #include "xe_gt_idle_types.h" #include "xe_gt_sriov_pf_types.h" #include "xe_gt_sriov_vf_types.h" +#include "xe_gt_stats.h" #include "xe_hw_engine_types.h" #include "xe_hw_fence_types.h" #include "xe_oa.h" @@ -133,6 +134,14 @@ struct xe_gt { u8 has_indirect_ring_state:1; } info; +#if IS_ENABLED(CONFIG_DEBUG_FS) + /** @stats: GT stats */ + struct { + /** @stats.counters: counters for various GT stats */ + atomic_t counters[__XE_GT_STATS_NUM_IDS]; + } stats; +#endif + /** * @mmio: mmio info for GT. All GTs within a tile share the same * register space, but have their own copy of GSI registers at a @@ -238,9 +247,14 @@ struct xe_gt { struct pf_queue { /** @usm.pf_queue.gt: back pointer to GT */ struct xe_gt *gt; -#define PF_QUEUE_NUM_DW 128 /** @usm.pf_queue.data: data in the page fault queue */ - u32 data[PF_QUEUE_NUM_DW]; + u32 *data; + /** + * @usm.pf_queue.num_dw: number of DWORDS in the page + * fault queue. Dynamically calculated based on the number + * of compute resources available. + */ + u32 num_dw; /** * @usm.pf_queue.tail: tail pointer in DWs for page fault queue, * moved by worker which processes faults (consumer). @@ -368,6 +382,12 @@ struct xe_gt { } steering[NUM_STEERING_TYPES]; /** + * @steering_dss_per_grp: number of DSS per steering group (gslice, + * cslice, etc.). + */ + unsigned int steering_dss_per_grp; + + /** * @mcr_lock: protects the MCR_SELECTOR register for the duration * of a steered operation */ diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index de0fe9e65746..52df28032a6f 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -350,6 +350,8 @@ int xe_guc_init(struct xe_guc *guc) if (ret) goto out; + xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE); + ret = devm_add_action_or_reset(xe->drm.dev, guc_fini_hw, guc); if (ret) goto out; @@ -358,8 +360,6 @@ int xe_guc_init(struct xe_guc *guc) xe_guc_comm_init_early(guc); - xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE); - return 0; out: diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h index e0bbf98f849d..c3e6b51f7a09 100644 --- a/drivers/gpu/drm/xe/xe_guc.h +++ b/drivers/gpu/drm/xe/xe_guc.h @@ -11,6 +11,16 @@ #include "xe_hw_engine_types.h" #include "xe_macros.h" +/* + * GuC version number components are defined to be only 8-bit size, + * so converting to a 32bit 8.8.8 integer allows simple (and safe) + * numerical comparisons. + */ +#define MAKE_GUC_VER(maj, min, pat) (((maj) << 16) | ((min) << 8) | (pat)) +#define MAKE_GUC_VER_STRUCT(ver) MAKE_GUC_VER((ver).major, (ver).minor, (ver).patch) +#define GUC_SUBMIT_VER(guc) MAKE_VER_STRUCT((guc)->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY]) +#define GUC_FIRMWARE_VER(guc) MAKE_VER_STRUCT((guc)->fw.versions.found[XE_UC_FW_VER_RELEASE]) + struct drm_printer; void xe_guc_comm_init_early(struct xe_guc *guc); diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 1c60b685dbc6..d1902a8581ca 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -24,6 +24,7 @@ #include "xe_map.h" #include "xe_mmio.h" #include "xe_platform_types.h" +#include "xe_uc_fw.h" #include "xe_wa.h" /* Slack of a few additional entries per engine */ @@ -367,6 +368,11 @@ static void guc_waklv_init(struct xe_guc_ads *ads) 0xC40, &offset, &remain); + if (XE_WA(gt, 14022293748) || XE_WA(gt, 22019794406)) + guc_waklv_enable_simple(ads, + GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET, + &offset, &remain); + size = guc_ads_waklv_size(ads) - remain; if (!size) return; diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index beeeb120d1fc..f24dd5223926 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -105,12 +105,20 @@ ct_to_xe(struct xe_guc_ct *ct) * enough space to avoid backpressure on the driver. We increase the size * of the receive buffer (relative to the send) to ensure a G2H response * CTB has a landing spot. + * + * In addition to submissions, the G2H buffer needs to be able to hold + * enough space for recoverable page fault notifications. The number of + * page faults is interrupt driven and can be as much as the number of + * compute resources available. However, most of the actual work for these + * is in a separate page fault worker thread. Therefore we only need to + * make sure the queue has enough space to handle all of the submissions + * and responses and an extra buffer for incoming page faults. */ #define CTB_DESC_SIZE ALIGN(sizeof(struct guc_ct_buffer_desc), SZ_2K) #define CTB_H2G_BUFFER_SIZE (SZ_4K) -#define CTB_G2H_BUFFER_SIZE (4 * CTB_H2G_BUFFER_SIZE) -#define G2H_ROOM_BUFFER_SIZE (CTB_G2H_BUFFER_SIZE / 4) +#define CTB_G2H_BUFFER_SIZE (SZ_128K) +#define G2H_ROOM_BUFFER_SIZE (CTB_G2H_BUFFER_SIZE / 2) /** * xe_guc_ct_queue_proc_time_jiffies - Return maximum time to process a full diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.c b/drivers/gpu/drm/xe/xe_guc_hwconfig.c index d9b570a154a2..af2c817d552c 100644 --- a/drivers/gpu/drm/xe/xe_guc_hwconfig.c +++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.c @@ -6,6 +6,7 @@ #include "xe_guc_hwconfig.h" #include <drm/drm_managed.h> +#include <drm/drm_print.h> #include "abi/guc_actions_abi.h" #include "xe_bo.h" @@ -103,3 +104,99 @@ void xe_guc_hwconfig_copy(struct xe_guc *guc, void *dst) xe_map_memcpy_from(xe, dst, &guc->hwconfig.bo->vmap, 0, guc->hwconfig.size); } + +void xe_guc_hwconfig_dump(struct xe_guc *guc, struct drm_printer *p) +{ + size_t size = xe_guc_hwconfig_size(guc); + u32 *hwconfig; + u64 num_dw; + u32 extra_bytes; + int i = 0; + + if (size == 0) { + drm_printf(p, "No hwconfig available\n"); + return; + } + + num_dw = div_u64_rem(size, sizeof(u32), &extra_bytes); + + hwconfig = kzalloc(size, GFP_KERNEL); + if (!hwconfig) { + drm_printf(p, "Error: could not allocate hwconfig memory\n"); + return; + } + + xe_guc_hwconfig_copy(guc, hwconfig); + + /* An entry requires at least three dwords for key, length, value */ + while (i + 3 <= num_dw) { + u32 attribute = hwconfig[i++]; + u32 len_dw = hwconfig[i++]; + + if (i + len_dw > num_dw) { + drm_printf(p, "Error: Attribute %u is %u dwords, but only %llu remain\n", + attribute, len_dw, num_dw - i); + len_dw = num_dw - i; + } + + /* + * If it's a single dword (as most hwconfig attributes are), + * then it's probably a number that makes sense to display + * in decimal form. In the rare cases where it's more than + * one dword, just print it in hex form and let the user + * figure out how to interpret it. + */ + if (len_dw == 1) + drm_printf(p, "[%2u] = %u\n", attribute, hwconfig[i]); + else + drm_printf(p, "[%2u] = { %*ph }\n", attribute, + (int)(len_dw * sizeof(u32)), &hwconfig[i]); + i += len_dw; + } + + if (i < num_dw || extra_bytes) + drm_printf(p, "Error: %llu extra bytes at end of hwconfig\n", + (num_dw - i) * sizeof(u32) + extra_bytes); + + kfree(hwconfig); +} + +/* + * Lookup a specific 32-bit attribute value in the GuC's hwconfig table. + */ +int xe_guc_hwconfig_lookup_u32(struct xe_guc *guc, u32 attribute, u32 *val) +{ + size_t size = xe_guc_hwconfig_size(guc); + u64 num_dw = div_u64(size, sizeof(u32)); + u32 *hwconfig; + bool found = false; + int i = 0; + + if (num_dw == 0) + return -EINVAL; + + hwconfig = kzalloc(size, GFP_KERNEL); + if (!hwconfig) + return -ENOMEM; + + xe_guc_hwconfig_copy(guc, hwconfig); + + /* An entry requires at least three dwords for key, length, value */ + while (i + 3 <= num_dw) { + u32 key = hwconfig[i++]; + u32 len_dw = hwconfig[i++]; + + if (key != attribute) { + i += len_dw; + continue; + } + + *val = hwconfig[i]; + found = true; + break; + } + + kfree(hwconfig); + + return found ? 0 : -ENOENT; +} diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.h b/drivers/gpu/drm/xe/xe_guc_hwconfig.h index b5794d641900..ab4e5038236e 100644 --- a/drivers/gpu/drm/xe/xe_guc_hwconfig.h +++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.h @@ -8,10 +8,13 @@ #include <linux/types.h> +struct drm_printer; struct xe_guc; int xe_guc_hwconfig_init(struct xe_guc *guc); u32 xe_guc_hwconfig_size(struct xe_guc *guc); void xe_guc_hwconfig_copy(struct xe_guc *guc, void *dst); +void xe_guc_hwconfig_dump(struct xe_guc *guc, struct drm_printer *p); +int xe_guc_hwconfig_lookup_u32(struct xe_guc *guc, u32 attribute, u32 *val); #endif diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 32e93a8127d4..def503abeed5 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -1042,7 +1042,7 @@ static void xe_guc_pc_fini_hw(void *arg) return; XE_WARN_ON(xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL)); - XE_WARN_ON(xe_guc_pc_gucrc_disable(pc)); + xe_guc_pc_gucrc_disable(pc); XE_WARN_ON(xe_guc_pc_stop(pc)); /* Bind requested freq to mert_freq_cap before unload */ diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 2adf551500cb..fbbe6a487bbb 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1374,9 +1374,11 @@ static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) struct xe_exec_queue *q = msg->private_data; if (guc_exec_queue_allowed_to_change_state(q)) { - q->guc->resume_time = RESUME_PENDING; clear_exec_queue_suspended(q); - enable_scheduling(q); + if (!exec_queue_enabled(q)) { + q->guc->resume_time = RESUME_PENDING; + enable_scheduling(q); + } } else { clear_exec_queue_suspended(q); } @@ -1386,6 +1388,8 @@ static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) #define SET_SCHED_PROPS 2 #define SUSPEND 3 #define RESUME 4 +#define OPCODE_MASK 0xf +#define MSG_LOCKED BIT(8) static void guc_exec_queue_process_msg(struct xe_sched_msg *msg) { @@ -1430,7 +1434,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) struct xe_device *xe = guc_to_xe(guc); struct xe_guc_exec_queue *ge; long timeout; - int err; + int err, i; xe_assert(xe, xe_device_uc_enabled(guc_to_xe(guc))); @@ -1442,6 +1446,9 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) ge->q = q; init_waitqueue_head(&ge->suspend_wait); + for (i = 0; i < MAX_STATIC_MSG_TYPE; ++i) + INIT_LIST_HEAD(&ge->static_msgs[i].link); + timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT : msecs_to_jiffies(q->sched_props.job_timeout_ms); err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, @@ -1504,11 +1511,26 @@ static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg xe_pm_runtime_get_noresume(guc_to_xe(exec_queue_to_guc(q))); INIT_LIST_HEAD(&msg->link); - msg->opcode = opcode; + msg->opcode = opcode & OPCODE_MASK; msg->private_data = q; trace_xe_sched_msg_add(msg); - xe_sched_add_msg(&q->guc->sched, msg); + if (opcode & MSG_LOCKED) + xe_sched_add_msg_locked(&q->guc->sched, msg); + else + xe_sched_add_msg(&q->guc->sched, msg); +} + +static bool guc_exec_queue_try_add_msg(struct xe_exec_queue *q, + struct xe_sched_msg *msg, + u32 opcode) +{ + if (!list_empty(&msg->link)) + return false; + + guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED); + + return true; } #define STATIC_MSG_CLEANUP 0 @@ -1582,13 +1604,16 @@ static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q, static int guc_exec_queue_suspend(struct xe_exec_queue *q) { + struct xe_gpu_scheduler *sched = &q->guc->sched; struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; - if (exec_queue_killed_or_banned_or_wedged(q) || q->guc->suspend_pending) + if (exec_queue_killed_or_banned_or_wedged(q)) return -EINVAL; - q->guc->suspend_pending = true; - guc_exec_queue_add_msg(q, msg, SUSPEND); + xe_sched_msg_lock(sched); + if (guc_exec_queue_try_add_msg(q, msg, SUSPEND)) + q->guc->suspend_pending = true; + xe_sched_msg_unlock(sched); return 0; } @@ -1603,11 +1628,11 @@ static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q) * suspend_pending upon kill but to be paranoid but races in which * suspend_pending is set after kill also check kill here. */ - ret = wait_event_timeout(q->guc->suspend_wait, - !READ_ONCE(q->guc->suspend_pending) || - exec_queue_killed(q) || - guc_read_stopped(guc), - HZ * 5); + ret = wait_event_interruptible_timeout(q->guc->suspend_wait, + !READ_ONCE(q->guc->suspend_pending) || + exec_queue_killed(q) || + guc_read_stopped(guc), + HZ * 5); if (!ret) { xe_gt_warn(guc_to_gt(guc), @@ -1617,18 +1642,21 @@ static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q) return -ETIME; } - return 0; + return ret < 0 ? ret : 0; } static void guc_exec_queue_resume(struct xe_exec_queue *q) { + struct xe_gpu_scheduler *sched = &q->guc->sched; struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME; struct xe_guc *guc = exec_queue_to_guc(q); struct xe_device *xe = guc_to_xe(guc); xe_assert(xe, !q->guc->suspend_pending); - guc_exec_queue_add_msg(q, msg, RESUME); + xe_sched_msg_lock(sched); + guc_exec_queue_try_add_msg(q, msg, RESUME); + xe_sched_msg_unlock(sched); } static bool guc_exec_queue_reset_status(struct xe_exec_queue *q) diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c index bec4366e5513..f5459f97af23 100644 --- a/drivers/gpu/drm/xe/xe_huc.c +++ b/drivers/gpu/drm/xe/xe_huc.c @@ -43,14 +43,6 @@ huc_to_guc(struct xe_huc *huc) return &container_of(huc, struct xe_uc, huc)->guc; } -static void free_gsc_pkt(struct drm_device *drm, void *arg) -{ - struct xe_huc *huc = arg; - - xe_bo_unpin_map_no_vm(huc->gsc_pkt); - huc->gsc_pkt = NULL; -} - #define PXP43_HUC_AUTH_INOUT_SIZE SZ_4K static int huc_alloc_gsc_pkt(struct xe_huc *huc) { @@ -59,17 +51,16 @@ static int huc_alloc_gsc_pkt(struct xe_huc *huc) struct xe_bo *bo; /* we use a single object for both input and output */ - bo = xe_bo_create_pin_map(xe, gt_to_tile(gt), NULL, - PXP43_HUC_AUTH_INOUT_SIZE * 2, - ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | - XE_BO_FLAG_GGTT); + bo = xe_managed_bo_create_pin_map(xe, gt_to_tile(gt), + PXP43_HUC_AUTH_INOUT_SIZE * 2, + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT); if (IS_ERR(bo)) return PTR_ERR(bo); huc->gsc_pkt = bo; - return drmm_add_action_or_reset(&xe->drm, free_gsc_pkt, huc); + return 0; } int xe_huc_init(struct xe_huc *huc) diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 07ed9fd28f19..18980238a2ea 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -5,7 +5,10 @@ #include "xe_hw_engine.h" +#include <linux/nospec.h> + #include <drm/drm_managed.h> +#include <drm/xe_drm.h> #include "regs/xe_engine_regs.h" #include "regs/xe_gt_regs.h" @@ -20,6 +23,7 @@ #include "xe_gt_printk.h" #include "xe_gt_mcr.h" #include "xe_gt_topology.h" +#include "xe_hw_engine_group.h" #include "xe_hw_fence.h" #include "xe_irq.h" #include "xe_lrc.h" @@ -263,7 +267,7 @@ static const struct engine_info engine_infos[] = { }, }; -static void hw_engine_fini(struct drm_device *drm, void *arg) +static void hw_engine_fini(void *arg) { struct xe_hw_engine *hwe = arg; @@ -274,8 +278,18 @@ static void hw_engine_fini(struct drm_device *drm, void *arg) hwe->gt = NULL; } -static void hw_engine_mmio_write32(struct xe_hw_engine *hwe, struct xe_reg reg, - u32 val) +/** + * xe_hw_engine_mmio_write32() - Write engine register + * @hwe: engine + * @reg: register to write into + * @val: desired 32-bit value to write + * + * This function will write val into an engine specific register. + * Forcewake must be held by the caller. + * + */ +void xe_hw_engine_mmio_write32(struct xe_hw_engine *hwe, + struct xe_reg reg, u32 val) { xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base)); xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain); @@ -285,7 +299,17 @@ static void hw_engine_mmio_write32(struct xe_hw_engine *hwe, struct xe_reg reg, xe_mmio_write32(hwe->gt, reg, val); } -static u32 hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg) +/** + * xe_hw_engine_mmio_read32() - Read engine register + * @hwe: engine + * @reg: register to read from + * + * This function will read from an engine specific register. + * Forcewake must be held by the caller. + * + * Return: value of the 32-bit register. + */ +u32 xe_hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg) { xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base)); xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain); @@ -304,14 +328,14 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe) xe_mmio_write32(hwe->gt, RCU_MODE, _MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE)); - hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0); - hw_engine_mmio_write32(hwe, RING_HWS_PGA(0), - xe_bo_ggtt_addr(hwe->hwsp)); - hw_engine_mmio_write32(hwe, RING_MODE(0), - _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE)); - hw_engine_mmio_write32(hwe, RING_MI_MODE(0), - _MASKED_BIT_DISABLE(STOP_RING)); - hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); + xe_hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0); + xe_hw_engine_mmio_write32(hwe, RING_HWS_PGA(0), + xe_bo_ggtt_addr(hwe->hwsp)); + xe_hw_engine_mmio_write32(hwe, RING_MODE(0), + _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE)); + xe_hw_engine_mmio_write32(hwe, RING_MI_MODE(0), + _MASKED_BIT_DISABLE(STOP_RING)); + xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); } static bool xe_hw_engine_match_fixed_cslice_mode(const struct xe_gt *gt, @@ -425,6 +449,12 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe) 0xA, XE_RTP_ACTION_FLAG(ENGINE_BASE))) }, + /* Enable Priority Mem Read */ + { XE_RTP_NAME("Priority_Mem_Read"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), + XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), CS_PRIORITY_MEM_READ, + XE_RTP_ACTION_FLAG(ENGINE_BASE))) + }, {} }; @@ -555,7 +585,7 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe, if (xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY) gt->usm.reserved_bcs_instance = hwe->instance; - return drmm_add_action_or_reset(&xe->drm, hw_engine_fini, hwe); + return devm_add_action_or_reset(xe->drm.dev, hw_engine_fini, hwe); err_kernel_lrc: xe_lrc_put(hwe->kernel_lrc); @@ -761,6 +791,9 @@ int xe_hw_engines_init(struct xe_gt *gt) } hw_engine_setup_logical_mapping(gt); + err = xe_hw_engine_setup_groups(gt); + if (err) + return err; return 0; } @@ -791,7 +824,7 @@ xe_hw_engine_snapshot_instdone_capture(struct xe_hw_engine *hwe, unsigned int dss; u16 group, instance; - snapshot->reg.instdone.ring = hw_engine_mmio_read32(hwe, RING_INSTDONE(0)); + snapshot->reg.instdone.ring = xe_hw_engine_mmio_read32(hwe, RING_INSTDONE(0)); if (snapshot->hwe->class != XE_ENGINE_CLASS_RENDER) return; @@ -887,53 +920,53 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe) return snapshot; snapshot->reg.ring_execlist_status = - hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0)); - val = hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0)); + xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0)); + val = xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0)); snapshot->reg.ring_execlist_status |= val << 32; snapshot->reg.ring_execlist_sq_contents = - hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_LO(0)); - val = hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_HI(0)); + xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_LO(0)); + val = xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_HI(0)); snapshot->reg.ring_execlist_sq_contents |= val << 32; - snapshot->reg.ring_acthd = hw_engine_mmio_read32(hwe, RING_ACTHD(0)); - val = hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0)); + snapshot->reg.ring_acthd = xe_hw_engine_mmio_read32(hwe, RING_ACTHD(0)); + val = xe_hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0)); snapshot->reg.ring_acthd |= val << 32; - snapshot->reg.ring_bbaddr = hw_engine_mmio_read32(hwe, RING_BBADDR(0)); - val = hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0)); + snapshot->reg.ring_bbaddr = xe_hw_engine_mmio_read32(hwe, RING_BBADDR(0)); + val = xe_hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0)); snapshot->reg.ring_bbaddr |= val << 32; snapshot->reg.ring_dma_fadd = - hw_engine_mmio_read32(hwe, RING_DMA_FADD(0)); - val = hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0)); + xe_hw_engine_mmio_read32(hwe, RING_DMA_FADD(0)); + val = xe_hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0)); snapshot->reg.ring_dma_fadd |= val << 32; - snapshot->reg.ring_hwstam = hw_engine_mmio_read32(hwe, RING_HWSTAM(0)); - snapshot->reg.ring_hws_pga = hw_engine_mmio_read32(hwe, RING_HWS_PGA(0)); - snapshot->reg.ring_start = hw_engine_mmio_read32(hwe, RING_START(0)); + snapshot->reg.ring_hwstam = xe_hw_engine_mmio_read32(hwe, RING_HWSTAM(0)); + snapshot->reg.ring_hws_pga = xe_hw_engine_mmio_read32(hwe, RING_HWS_PGA(0)); + snapshot->reg.ring_start = xe_hw_engine_mmio_read32(hwe, RING_START(0)); if (GRAPHICS_VERx100(hwe->gt->tile->xe) >= 2000) { - val = hw_engine_mmio_read32(hwe, RING_START_UDW(0)); + val = xe_hw_engine_mmio_read32(hwe, RING_START_UDW(0)); snapshot->reg.ring_start |= val << 32; } if (xe_gt_has_indirect_ring_state(hwe->gt)) { snapshot->reg.indirect_ring_state = - hw_engine_mmio_read32(hwe, INDIRECT_RING_STATE(0)); + xe_hw_engine_mmio_read32(hwe, INDIRECT_RING_STATE(0)); } snapshot->reg.ring_head = - hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR; + xe_hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR; snapshot->reg.ring_tail = - hw_engine_mmio_read32(hwe, RING_TAIL(0)) & TAIL_ADDR; - snapshot->reg.ring_ctl = hw_engine_mmio_read32(hwe, RING_CTL(0)); + xe_hw_engine_mmio_read32(hwe, RING_TAIL(0)) & TAIL_ADDR; + snapshot->reg.ring_ctl = xe_hw_engine_mmio_read32(hwe, RING_CTL(0)); snapshot->reg.ring_mi_mode = - hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); - snapshot->reg.ring_mode = hw_engine_mmio_read32(hwe, RING_MODE(0)); - snapshot->reg.ring_imr = hw_engine_mmio_read32(hwe, RING_IMR(0)); - snapshot->reg.ring_esr = hw_engine_mmio_read32(hwe, RING_ESR(0)); - snapshot->reg.ring_emr = hw_engine_mmio_read32(hwe, RING_EMR(0)); - snapshot->reg.ring_eir = hw_engine_mmio_read32(hwe, RING_EIR(0)); - snapshot->reg.ipehr = hw_engine_mmio_read32(hwe, RING_IPEHR(0)); + xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); + snapshot->reg.ring_mode = xe_hw_engine_mmio_read32(hwe, RING_MODE(0)); + snapshot->reg.ring_imr = xe_hw_engine_mmio_read32(hwe, RING_IMR(0)); + snapshot->reg.ring_esr = xe_hw_engine_mmio_read32(hwe, RING_ESR(0)); + snapshot->reg.ring_emr = xe_hw_engine_mmio_read32(hwe, RING_EMR(0)); + snapshot->reg.ring_eir = xe_hw_engine_mmio_read32(hwe, RING_EIR(0)); + snapshot->reg.ipehr = xe_hw_engine_mmio_read32(hwe, RING_IPEHR(0)); xe_hw_engine_snapshot_instdone_capture(hwe, snapshot); if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE) @@ -1135,3 +1168,41 @@ enum xe_force_wake_domains xe_hw_engine_to_fw_domain(struct xe_hw_engine *hwe) { return engine_infos[hwe->engine_id].domain; } + +static const enum xe_engine_class user_to_xe_engine_class[] = { + [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER, + [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY, + [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE, + [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE, + [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE, +}; + +/** + * xe_hw_engine_lookup() - Lookup hardware engine for class:instance + * @xe: xe device + * @eci: engine class and instance + * + * This function will find a hardware engine for given engine + * class and instance. + * + * Return: If found xe_hw_engine pointer, NULL otherwise. + */ +struct xe_hw_engine * +xe_hw_engine_lookup(struct xe_device *xe, + struct drm_xe_engine_class_instance eci) +{ + unsigned int idx; + + if (eci.engine_class >= ARRAY_SIZE(user_to_xe_engine_class)) + return NULL; + + if (eci.gt_id >= xe->info.gt_count) + return NULL; + + idx = array_index_nospec(eci.engine_class, + ARRAY_SIZE(user_to_xe_engine_class)); + + return xe_gt_hw_engine(xe_device_get_gt(xe, eci.gt_id), + user_to_xe_engine_class[idx], + eci.engine_instance, true); +} diff --git a/drivers/gpu/drm/xe/xe_hw_engine.h b/drivers/gpu/drm/xe/xe_hw_engine.h index 900c8c991430..022819a4a8eb 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.h +++ b/drivers/gpu/drm/xe/xe_hw_engine.h @@ -9,6 +9,8 @@ #include "xe_hw_engine_types.h" struct drm_printer; +struct drm_xe_engine_class_instance; +struct xe_device; #ifdef CONFIG_DRM_XE_JOB_TIMEOUT_MIN #define XE_HW_ENGINE_JOB_TIMEOUT_MIN CONFIG_DRM_XE_JOB_TIMEOUT_MIN @@ -62,6 +64,11 @@ void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p); void xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe); bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe); + +struct xe_hw_engine * +xe_hw_engine_lookup(struct xe_device *xe, + struct drm_xe_engine_class_instance eci); + static inline bool xe_hw_engine_is_valid(struct xe_hw_engine *hwe) { return hwe->name; @@ -71,4 +78,7 @@ const char *xe_hw_engine_class_to_str(enum xe_engine_class class); u64 xe_hw_engine_read_timestamp(struct xe_hw_engine *hwe); enum xe_force_wake_domains xe_hw_engine_to_fw_domain(struct xe_hw_engine *hwe); +void xe_hw_engine_mmio_write32(struct xe_hw_engine *hwe, struct xe_reg reg, u32 val); +u32 xe_hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg); + #endif diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c new file mode 100644 index 000000000000..82750520a90a --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c @@ -0,0 +1,372 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2024 Intel Corporation + */ + +#include <drm/drm_managed.h> + +#include "xe_assert.h" +#include "xe_device.h" +#include "xe_exec_queue.h" +#include "xe_gt.h" +#include "xe_hw_engine_group.h" +#include "xe_vm.h" + +static void +hw_engine_group_free(struct drm_device *drm, void *arg) +{ + struct xe_hw_engine_group *group = arg; + + destroy_workqueue(group->resume_wq); + kfree(group); +} + +static void +hw_engine_group_resume_lr_jobs_func(struct work_struct *w) +{ + struct xe_exec_queue *q; + struct xe_hw_engine_group *group = container_of(w, struct xe_hw_engine_group, resume_work); + int err; + enum xe_hw_engine_group_execution_mode previous_mode; + + err = xe_hw_engine_group_get_mode(group, EXEC_MODE_LR, &previous_mode); + if (err) + return; + + if (previous_mode == EXEC_MODE_LR) + goto put; + + list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { + if (!xe_vm_in_fault_mode(q->vm)) + continue; + + q->ops->resume(q); + } + +put: + xe_hw_engine_group_put(group); +} + +static struct xe_hw_engine_group * +hw_engine_group_alloc(struct xe_device *xe) +{ + struct xe_hw_engine_group *group; + int err; + + group = kzalloc(sizeof(*group), GFP_KERNEL); + if (!group) + return ERR_PTR(-ENOMEM); + + group->resume_wq = alloc_workqueue("xe-resume-lr-jobs-wq", 0, 0); + if (!group->resume_wq) + return ERR_PTR(-ENOMEM); + + init_rwsem(&group->mode_sem); + INIT_WORK(&group->resume_work, hw_engine_group_resume_lr_jobs_func); + INIT_LIST_HEAD(&group->exec_queue_list); + + err = drmm_add_action_or_reset(&xe->drm, hw_engine_group_free, group); + if (err) + return ERR_PTR(err); + + return group; +} + +/** + * xe_hw_engine_setup_groups() - Setup the hw engine groups for the gt + * @gt: The gt for which groups are setup + * + * Return: 0 on success, negative error code on error. + */ +int xe_hw_engine_setup_groups(struct xe_gt *gt) +{ + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + struct xe_hw_engine_group *group_rcs_ccs, *group_bcs, *group_vcs_vecs; + struct xe_device *xe = gt_to_xe(gt); + int err; + + group_rcs_ccs = hw_engine_group_alloc(xe); + if (IS_ERR(group_rcs_ccs)) { + err = PTR_ERR(group_rcs_ccs); + goto err_group_rcs_ccs; + } + + group_bcs = hw_engine_group_alloc(xe); + if (IS_ERR(group_bcs)) { + err = PTR_ERR(group_bcs); + goto err_group_bcs; + } + + group_vcs_vecs = hw_engine_group_alloc(xe); + if (IS_ERR(group_vcs_vecs)) { + err = PTR_ERR(group_vcs_vecs); + goto err_group_vcs_vecs; + } + + for_each_hw_engine(hwe, gt, id) { + switch (hwe->class) { + case XE_ENGINE_CLASS_COPY: + hwe->hw_engine_group = group_bcs; + break; + case XE_ENGINE_CLASS_RENDER: + case XE_ENGINE_CLASS_COMPUTE: + hwe->hw_engine_group = group_rcs_ccs; + break; + case XE_ENGINE_CLASS_VIDEO_DECODE: + case XE_ENGINE_CLASS_VIDEO_ENHANCE: + hwe->hw_engine_group = group_vcs_vecs; + break; + case XE_ENGINE_CLASS_OTHER: + break; + default: + drm_warn(&xe->drm, "NOT POSSIBLE"); + } + } + + return 0; + +err_group_vcs_vecs: + kfree(group_vcs_vecs); +err_group_bcs: + kfree(group_bcs); +err_group_rcs_ccs: + kfree(group_rcs_ccs); + + return err; +} + +/** + * xe_hw_engine_group_add_exec_queue() - Add an exec queue to a hw engine group + * @group: The hw engine group + * @q: The exec_queue + * + * Return: 0 on success, + * -EINTR if the lock could not be acquired + */ +int xe_hw_engine_group_add_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q) +{ + int err; + struct xe_device *xe = gt_to_xe(q->gt); + + xe_assert(xe, group); + xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_VM)); + xe_assert(xe, q->vm); + + if (xe_vm_in_preempt_fence_mode(q->vm)) + return 0; + + err = down_write_killable(&group->mode_sem); + if (err) + return err; + + if (xe_vm_in_fault_mode(q->vm) && group->cur_mode == EXEC_MODE_DMA_FENCE) { + q->ops->suspend(q); + err = q->ops->suspend_wait(q); + if (err) + goto err_suspend; + + xe_hw_engine_group_resume_faulting_lr_jobs(group); + } + + list_add(&q->hw_engine_group_link, &group->exec_queue_list); + up_write(&group->mode_sem); + + return 0; + +err_suspend: + up_write(&group->mode_sem); + return err; +} + +/** + * xe_hw_engine_group_del_exec_queue() - Delete an exec queue from a hw engine group + * @group: The hw engine group + * @q: The exec_queue + */ +void xe_hw_engine_group_del_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q) +{ + struct xe_device *xe = gt_to_xe(q->gt); + + xe_assert(xe, group); + xe_assert(xe, q->vm); + + down_write(&group->mode_sem); + + if (!list_empty(&q->hw_engine_group_link)) + list_del(&q->hw_engine_group_link); + + up_write(&group->mode_sem); +} + +/** + * xe_hw_engine_group_resume_faulting_lr_jobs() - Asynchronously resume the hw engine group's + * faulting LR jobs + * @group: The hw engine group + */ +void xe_hw_engine_group_resume_faulting_lr_jobs(struct xe_hw_engine_group *group) +{ + queue_work(group->resume_wq, &group->resume_work); +} + +/** + * xe_hw_engine_group_suspend_faulting_lr_jobs() - Suspend the faulting LR jobs of this group + * @group: The hw engine group + * + * Return: 0 on success, negative error code on error. + */ +static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group *group) +{ + int err; + struct xe_exec_queue *q; + bool need_resume = false; + + lockdep_assert_held_write(&group->mode_sem); + + list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { + if (!xe_vm_in_fault_mode(q->vm)) + continue; + + need_resume = true; + q->ops->suspend(q); + } + + list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { + if (!xe_vm_in_fault_mode(q->vm)) + continue; + + err = q->ops->suspend_wait(q); + if (err) + goto err_suspend; + } + + if (need_resume) + xe_hw_engine_group_resume_faulting_lr_jobs(group); + + return 0; + +err_suspend: + up_write(&group->mode_sem); + return err; +} + +/** + * xe_hw_engine_group_wait_for_dma_fence_jobs() - Wait for dma fence jobs to complete + * @group: The hw engine group + * + * This function is not meant to be called directly from a user IOCTL as dma_fence_wait() + * is not interruptible. + * + * Return: 0 on success, + * -ETIME if waiting for one job failed + */ +static int xe_hw_engine_group_wait_for_dma_fence_jobs(struct xe_hw_engine_group *group) +{ + long timeout; + struct xe_exec_queue *q; + struct dma_fence *fence; + + lockdep_assert_held_write(&group->mode_sem); + + list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { + if (xe_vm_in_lr_mode(q->vm)) + continue; + + fence = xe_exec_queue_last_fence_get_for_resume(q, q->vm); + timeout = dma_fence_wait(fence, false); + dma_fence_put(fence); + + if (timeout < 0) + return -ETIME; + } + + return 0; +} + +static int switch_mode(struct xe_hw_engine_group *group) +{ + int err = 0; + enum xe_hw_engine_group_execution_mode new_mode; + + lockdep_assert_held_write(&group->mode_sem); + + switch (group->cur_mode) { + case EXEC_MODE_LR: + new_mode = EXEC_MODE_DMA_FENCE; + err = xe_hw_engine_group_suspend_faulting_lr_jobs(group); + break; + case EXEC_MODE_DMA_FENCE: + new_mode = EXEC_MODE_LR; + err = xe_hw_engine_group_wait_for_dma_fence_jobs(group); + break; + } + + if (err) + return err; + + group->cur_mode = new_mode; + + return 0; +} + +/** + * xe_hw_engine_group_get_mode() - Get the group to execute in the new mode + * @group: The hw engine group + * @new_mode: The new execution mode + * @previous_mode: Pointer to the previous mode provided for use by caller + * + * Return: 0 if successful, -EINTR if locking failed. + */ +int xe_hw_engine_group_get_mode(struct xe_hw_engine_group *group, + enum xe_hw_engine_group_execution_mode new_mode, + enum xe_hw_engine_group_execution_mode *previous_mode) +__acquires(&group->mode_sem) +{ + int err = down_read_interruptible(&group->mode_sem); + + if (err) + return err; + + *previous_mode = group->cur_mode; + + if (new_mode != group->cur_mode) { + up_read(&group->mode_sem); + err = down_write_killable(&group->mode_sem); + if (err) + return err; + + if (new_mode != group->cur_mode) { + err = switch_mode(group); + if (err) { + up_write(&group->mode_sem); + return err; + } + } + downgrade_write(&group->mode_sem); + } + + return err; +} + +/** + * xe_hw_engine_group_put() - Put the group + * @group: The hw engine group + */ +void xe_hw_engine_group_put(struct xe_hw_engine_group *group) +__releases(&group->mode_sem) +{ + up_read(&group->mode_sem); +} + +/** + * xe_hw_engine_group_find_exec_mode() - Find the execution mode for this exec queue + * @q: The exec_queue + */ +enum xe_hw_engine_group_execution_mode +xe_hw_engine_group_find_exec_mode(struct xe_exec_queue *q) +{ + if (xe_vm_in_fault_mode(q->vm)) + return EXEC_MODE_LR; + else + return EXEC_MODE_DMA_FENCE; +} diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.h b/drivers/gpu/drm/xe/xe_hw_engine_group.h new file mode 100644 index 000000000000..797ee81acbf2 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef _XE_HW_ENGINE_GROUP_H_ +#define _XE_HW_ENGINE_GROUP_H_ + +#include "xe_hw_engine_group_types.h" + +struct drm_device; +struct xe_exec_queue; +struct xe_gt; + +int xe_hw_engine_setup_groups(struct xe_gt *gt); + +int xe_hw_engine_group_add_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q); +void xe_hw_engine_group_del_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q); + +int xe_hw_engine_group_get_mode(struct xe_hw_engine_group *group, + enum xe_hw_engine_group_execution_mode new_mode, + enum xe_hw_engine_group_execution_mode *previous_mode); +void xe_hw_engine_group_put(struct xe_hw_engine_group *group); + +enum xe_hw_engine_group_execution_mode +xe_hw_engine_group_find_exec_mode(struct xe_exec_queue *q); +void xe_hw_engine_group_resume_faulting_lr_jobs(struct xe_hw_engine_group *group); + +#endif diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group_types.h b/drivers/gpu/drm/xe/xe_hw_engine_group_types.h new file mode 100644 index 000000000000..92b6e0712c03 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hw_engine_group_types.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef _XE_HW_ENGINE_GROUP_TYPES_H_ +#define _XE_HW_ENGINE_GROUP_TYPES_H_ + +#include "xe_force_wake_types.h" +#include "xe_lrc_types.h" +#include "xe_reg_sr_types.h" + +/** + * enum xe_hw_engine_group_execution_mode - possible execution modes of a hw + * engine group + * + * @EXEC_MODE_LR: execution in long-running mode + * @EXEC_MODE_DMA_FENCE: execution in dma fence mode + */ +enum xe_hw_engine_group_execution_mode { + EXEC_MODE_LR, + EXEC_MODE_DMA_FENCE, +}; + +/** + * struct xe_hw_engine_group - Hardware engine group + * + * hw engines belong to the same group if they share hardware resources in a way + * that prevents them from making progress when one is stuck on a page fault. + */ +struct xe_hw_engine_group { + /** + * @exec_queue_list: list of exec queues attached to this + * xe_hw_engine_group + */ + struct list_head exec_queue_list; + /** @resume_work: worker to resume faulting LR exec queues */ + struct work_struct resume_work; + /** @resume_wq: workqueue to resume faulting LR exec queues */ + struct workqueue_struct *resume_wq; + /** + * @mode_sem: used to protect this group's hardware resources and ensure + * mutual exclusion between execution only in faulting LR mode and + * execution only in DMA_FENCE mode + */ + struct rw_semaphore mode_sem; + /** @cur_mode: current execution mode of this hw engine group */ + enum xe_hw_engine_group_execution_mode cur_mode; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h index 70e6434f150d..39f24012d0f4 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_types.h +++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h @@ -150,6 +150,8 @@ struct xe_hw_engine { struct xe_hw_engine_class_intf *eclass; /** @oa_unit: oa unit for this hw engine */ struct xe_oa_unit *oa_unit; + /** @hw_engine_group: the group of hw engines this one belongs to */ + struct xe_hw_engine_group *hw_engine_group; }; /** diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 58121821f081..aec7db39c061 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -5,6 +5,8 @@ #include "xe_lrc.h" +#include <generated/xe_wa_oob.h> + #include <linux/ascii85.h> #include "instructions/xe_mi_commands.h" @@ -24,6 +26,7 @@ #include "xe_memirq.h" #include "xe_sriov.h" #include "xe_vm.h" +#include "xe_wa.h" #define LRC_VALID BIT_ULL(0) #define LRC_PRIVILEGE BIT_ULL(8) @@ -1581,19 +1584,31 @@ void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *b int state_table_size = 0; /* - * At the moment we only need to emit non-register state for the RCS - * engine. + * Wa_14019789679 + * + * If the driver doesn't explicitly emit the SVG instructions while + * setting up the default LRC, the context switch will write 0's + * (noops) into the LRC memory rather than the expected instruction + * headers. Application contexts start out as a copy of the default + * LRC, and if they also do not emit specific settings for some SVG + * state, then on context restore they'll unintentionally inherit + * whatever state setting the previous context had programmed into the + * hardware (i.e., the lack of a 3DSTATE_* instruction in the LRC will + * prevent the hardware from resetting that state back to any specific + * value). + * + * The official workaround only requires emitting 3DSTATE_MESH_CONTROL + * since that's a specific state setting that can easily cause GPU + * hangs if unintentionally inherited. However to be safe we'll + * continue to emit all of the SVG state since it's best not to leak + * any of the state between contexts, even if that leakage is harmless. */ - if (q->hwe->class != XE_ENGINE_CLASS_RENDER) - return; - - switch (GRAPHICS_VERx100(xe)) { - case 1255: - case 1270 ... 2004: + if (XE_WA(gt, 14019789679) && q->hwe->class == XE_ENGINE_CLASS_RENDER) { state_table = xe_hpg_svg_state; state_table_size = ARRAY_SIZE(xe_hpg_svg_state); - break; - default: + } + + if (!state_table) { xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n", GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100); return; @@ -1634,7 +1649,7 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) if (!snapshot) return NULL; - if (lrc->bo && lrc->bo->vm) + if (lrc->bo->vm) xe_vm_get(lrc->bo->vm); snapshot->context_desc = xe_lrc_ggtt_addr(lrc); diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 6f24aaf58252..cbf54be224c9 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -442,7 +442,7 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) m->q = xe_exec_queue_create_class(xe, primary_gt, vm, XE_ENGINE_CLASS_COPY, EXEC_QUEUE_FLAG_KERNEL | - EXEC_QUEUE_FLAG_PERMANENT); + EXEC_QUEUE_FLAG_PERMANENT, 0); } if (IS_ERR(m->q)) { xe_vm_close_and_put(vm); @@ -1037,9 +1037,11 @@ static void emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, * @m: The migration context. * @bo: The buffer object @dst is currently bound to. * @dst: The dst TTM resource to be cleared. + * @clear_flags: flags to specify which data to clear: CCS, BO, or both. * - * Clear the contents of @dst to zero. On flat CCS devices, - * the CCS metadata is cleared to zero as well on VRAM destinations. + * Clear the contents of @dst to zero when XE_MIGRATE_CLEAR_FLAG_BO_DATA is set. + * On flat CCS devices, the CCS metadata is cleared to zero with XE_MIGRATE_CLEAR_FLAG_CCS_DATA. + * Set XE_MIGRATE_CLEAR_FLAG_FULL to clear bo as well as CCS metadata. * TODO: Eliminate the @bo argument. * * Return: Pointer to a dma_fence representing the last clear batch, or @@ -1048,18 +1050,27 @@ static void emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, */ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, struct xe_bo *bo, - struct ttm_resource *dst) + struct ttm_resource *dst, + u32 clear_flags) { bool clear_vram = mem_type_is_vram(dst->mem_type); + bool clear_bo_data = XE_MIGRATE_CLEAR_FLAG_BO_DATA & clear_flags; + bool clear_ccs = XE_MIGRATE_CLEAR_FLAG_CCS_DATA & clear_flags; struct xe_gt *gt = m->tile->primary_gt; struct xe_device *xe = gt_to_xe(gt); - bool clear_system_ccs = (xe_bo_needs_ccs_pages(bo) && !IS_DGFX(xe)) ? true : false; + bool clear_only_system_ccs = false; struct dma_fence *fence = NULL; u64 size = bo->size; struct xe_res_cursor src_it; struct ttm_resource *src = dst; int err; + if (WARN_ON(!clear_bo_data && !clear_ccs)) + return NULL; + + if (!clear_bo_data && clear_ccs && !IS_DGFX(xe)) + clear_only_system_ccs = true; + if (!clear_vram) xe_res_first_sg(xe_bo_sg(bo), 0, bo->size, &src_it); else @@ -1085,7 +1096,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, batch_size = 2 + pte_update_size(m, pte_flags, src, &src_it, &clear_L0, &clear_L0_ofs, &clear_L0_pt, - clear_system_ccs ? 0 : emit_clear_cmd_len(gt), 0, + clear_bo_data ? emit_clear_cmd_len(gt) : 0, 0, avail_pts); if (xe_migrate_needs_ccs_emit(xe)) @@ -1107,13 +1118,13 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, if (clear_vram && xe_migrate_allow_identity(clear_L0, &src_it)) xe_res_next(&src_it, clear_L0); else - emit_pte(m, bb, clear_L0_pt, clear_vram, clear_system_ccs, + emit_pte(m, bb, clear_L0_pt, clear_vram, clear_only_system_ccs, &src_it, clear_L0, dst); bb->cs[bb->len++] = MI_BATCH_BUFFER_END; update_idx = bb->len; - if (!clear_system_ccs) + if (clear_bo_data) emit_clear(gt, bb, clear_L0_ofs, clear_L0, XE_PAGE_SIZE, clear_vram); if (xe_migrate_needs_ccs_emit(xe)) { @@ -1172,7 +1183,7 @@ err_sync: return ERR_PTR(err); } - if (clear_system_ccs) + if (clear_ccs) bo->ccs_cleared = true; return fence; diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h index 453e0ecf5034..0109866e398a 100644 --- a/drivers/gpu/drm/xe/xe_migrate.h +++ b/drivers/gpu/drm/xe/xe_migrate.h @@ -6,7 +6,7 @@ #ifndef _XE_MIGRATE_ #define _XE_MIGRATE_ -#include <drm/drm_mm.h> +#include <linux/types.h> struct dma_fence; struct iosys_map; @@ -102,9 +102,14 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, struct ttm_resource *dst, bool copy_only_ccs); +#define XE_MIGRATE_CLEAR_FLAG_BO_DATA BIT(0) +#define XE_MIGRATE_CLEAR_FLAG_CCS_DATA BIT(1) +#define XE_MIGRATE_CLEAR_FLAG_FULL (XE_MIGRATE_CLEAR_FLAG_BO_DATA | \ + XE_MIGRATE_CLEAR_FLAG_CCS_DATA) struct dma_fence *xe_migrate_clear(struct xe_migrate *m, struct xe_bo *bo, - struct ttm_resource *dst); + struct ttm_resource *dst, + u32 clear_flags); struct xe_vm *xe_migrate_get_vm(struct xe_migrate *m); diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index f5bdb540e823..3fd462fda625 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -29,9 +29,8 @@ static void tiles_fini(void *arg) struct xe_tile *tile; int id; - for_each_tile(tile, xe, id) - if (tile != xe_device_get_root_tile(xe)) - tile->mmio.regs = NULL; + for_each_remote_tile(tile, xe, id) + tile->mmio.regs = NULL; } /* diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index 499540add465..bfc3deebdaa2 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -8,14 +8,17 @@ #include <linux/init.h> #include <linux/module.h> +#include <drm/drm_module.h> + #include "xe_drv.h" #include "xe_hw_fence.h" #include "xe_pci.h" +#include "xe_pm.h" #include "xe_observation.h" #include "xe_sched_job.h" struct xe_modparam xe_modparam = { - .enable_display = true, + .probe_display = true, .guc_log_level = 5, .force_probe = CONFIG_DRM_XE_FORCE_PROBE, .wedged_mode = 1, @@ -25,8 +28,8 @@ struct xe_modparam xe_modparam = { module_param_named_unsafe(force_execlist, xe_modparam.force_execlist, bool, 0444); MODULE_PARM_DESC(force_execlist, "Force Execlist submission"); -module_param_named(enable_display, xe_modparam.enable_display, bool, 0444); -MODULE_PARM_DESC(enable_display, "Enable display"); +module_param_named(probe_display, xe_modparam.probe_display, bool, 0444); +MODULE_PARM_DESC(probe_display, "Probe display HW, otherwise it's left untouched (default: true)"); module_param_named(vram_bar_size, xe_modparam.force_vram_bar_size, uint, 0600); MODULE_PARM_DESC(vram_bar_size, "Set the vram bar size(in MiB)"); @@ -61,13 +64,28 @@ module_param_named_unsafe(wedged_mode, xe_modparam.wedged_mode, int, 0600); MODULE_PARM_DESC(wedged_mode, "Module's default policy for the wedged mode - 0=never, 1=upon-critical-errors[default], 2=upon-any-hang"); +static int xe_check_nomodeset(void) +{ + if (drm_firmware_drivers_only()) + return -ENODEV; + + return 0; +} + struct init_funcs { int (*init)(void); void (*exit)(void); }; +static void xe_dummy_exit(void) +{ +} + static const struct init_funcs init_funcs[] = { { + .init = xe_check_nomodeset, + }, + { .init = xe_hw_fence_module_init, .exit = xe_hw_fence_module_exit, }, @@ -83,17 +101,41 @@ static const struct init_funcs init_funcs[] = { .init = xe_observation_sysctl_register, .exit = xe_observation_sysctl_unregister, }, + { + .init = xe_pm_module_init, + .exit = xe_dummy_exit, + }, }; +static int __init xe_call_init_func(unsigned int i) +{ + if (WARN_ON(i >= ARRAY_SIZE(init_funcs))) + return 0; + if (!init_funcs[i].init) + return 0; + + return init_funcs[i].init(); +} + +static void xe_call_exit_func(unsigned int i) +{ + if (WARN_ON(i >= ARRAY_SIZE(init_funcs))) + return; + if (!init_funcs[i].exit) + return; + + init_funcs[i].exit(); +} + static int __init xe_init(void) { int err, i; for (i = 0; i < ARRAY_SIZE(init_funcs); i++) { - err = init_funcs[i].init(); + err = xe_call_init_func(i); if (err) { while (i--) - init_funcs[i].exit(); + xe_call_exit_func(i); return err; } } @@ -106,7 +148,7 @@ static void __exit xe_exit(void) int i; for (i = ARRAY_SIZE(init_funcs) - 1; i >= 0; i--) - init_funcs[i].exit(); + xe_call_exit_func(i); } module_init(xe_init); diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h index 61a0d28a28c8..161a5e6f717f 100644 --- a/drivers/gpu/drm/xe/xe_module.h +++ b/drivers/gpu/drm/xe/xe_module.h @@ -11,7 +11,7 @@ /* Module modprobe variables */ struct xe_modparam { bool force_execlist; - bool enable_display; + bool probe_display; u32 force_vram_bar_size; int guc_log_level; char *guc_firmware_path; diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 3ef92eb8fbb1..4d4541e0b24c 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -1244,8 +1244,7 @@ static int xe_oa_mmap(struct file *file, struct vm_area_struct *vma) vm_flags_mod(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY, VM_MAYWRITE | VM_MAYEXEC); - xe_assert(stream->oa->xe, bo->ttm.ttm->num_pages == - (vma->vm_end - vma->vm_start) >> PAGE_SHIFT); + xe_assert(stream->oa->xe, bo->ttm.ttm->num_pages == vma_pages(vma)); for (i = 0; i < bo->ttm.ttm->num_pages; i++) { ret = remap_pfn_range(vma, start, page_to_pfn(bo->ttm.ttm->pages[i]), PAGE_SIZE, vma->vm_page_prot); diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 3c4a3c91377a..7eb00a87b786 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -338,14 +338,12 @@ static const struct xe_device_desc mtl_desc = { static const struct xe_device_desc lnl_desc = { PLATFORM(LUNARLAKE), .has_display = true, - .require_force_probe = true, }; static const struct xe_device_desc bmg_desc = { DGFX_FEATURES, PLATFORM(BATTLEMAGE), .has_display = true, - .require_force_probe = true, .has_heci_cscfi = 1, }; @@ -616,9 +614,9 @@ static int xe_info_init_early(struct xe_device *xe, xe->info.skip_mtcfg = desc->skip_mtcfg; xe->info.skip_pcode = desc->skip_pcode; - xe->info.enable_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) && - xe_modparam.enable_display && - desc->has_display; + xe->info.probe_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) && + xe_modparam.probe_display && + desc->has_display; err = xe_tile_init_early(xe_device_get_root_tile(xe), xe, 0); if (err) @@ -747,7 +745,7 @@ static void xe_pci_remove(struct pci_dev *pdev) { struct xe_device *xe; - xe = pci_get_drvdata(pdev); + xe = pdev_to_xe_device(pdev); if (!xe) /* driver load aborted, nothing to cleanup */ return; @@ -829,7 +827,7 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) xe->info.media_name, xe->info.media_verx100 / 100, xe->info.media_verx100 % 100, - str_yes_no(xe->info.enable_display), + str_yes_no(xe->info.probe_display), xe->info.dma_mask_size, xe->info.tile_count, xe->info.has_heci_gscfi, xe->info.has_heci_cscfi); diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index fcfb49af8c89..2e2accd76fb2 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -70,11 +70,34 @@ */ #ifdef CONFIG_LOCKDEP -static struct lockdep_map xe_pm_runtime_lockdep_map = { - .name = "xe_pm_runtime_lockdep_map" +static struct lockdep_map xe_pm_runtime_d3cold_map = { + .name = "xe_rpm_d3cold_map" +}; + +static struct lockdep_map xe_pm_runtime_nod3cold_map = { + .name = "xe_rpm_nod3cold_map" }; #endif +static bool __maybe_unused xe_rpm_reclaim_safe(const struct xe_device *xe) +{ + return !xe->d3cold.capable && !xe->info.has_sriov; +} + +static void xe_rpm_lockmap_acquire(const struct xe_device *xe) +{ + lock_map_acquire(xe_rpm_reclaim_safe(xe) ? + &xe_pm_runtime_nod3cold_map : + &xe_pm_runtime_d3cold_map); +} + +static void xe_rpm_lockmap_release(const struct xe_device *xe) +{ + lock_map_release(xe_rpm_reclaim_safe(xe) ? + &xe_pm_runtime_nod3cold_map : + &xe_pm_runtime_d3cold_map); +} + /** * xe_pm_suspend - Helper for System suspend, i.e. S0->S3 / S0->S2idle * @xe: xe device instance @@ -354,7 +377,7 @@ int xe_pm_runtime_suspend(struct xe_device *xe) * annotation here and in xe_pm_runtime_get() lockdep will see * the potential lock inversion and give us a nice splat. */ - lock_map_acquire(&xe_pm_runtime_lockdep_map); + xe_rpm_lockmap_acquire(xe); /* * Applying lock for entire list op as xe_ttm_bo_destroy and xe_bo_move_notify @@ -366,6 +389,8 @@ int xe_pm_runtime_suspend(struct xe_device *xe) xe_bo_runtime_pm_release_mmap_offset(bo); mutex_unlock(&xe->mem_access.vram_userfault.lock); + xe_display_pm_runtime_suspend(xe); + if (xe->d3cold.allowed) { xe_display_pm_suspend(xe, true); @@ -387,7 +412,7 @@ int xe_pm_runtime_suspend(struct xe_device *xe) out: if (err) xe_display_pm_resume(xe, true); - lock_map_release(&xe_pm_runtime_lockdep_map); + xe_rpm_lockmap_release(xe); xe_pm_write_callback_task(xe, NULL); return err; } @@ -408,7 +433,7 @@ int xe_pm_runtime_resume(struct xe_device *xe) /* Disable access_ongoing asserts and prevent recursive pm calls */ xe_pm_write_callback_task(xe, current); - lock_map_acquire(&xe_pm_runtime_lockdep_map); + xe_rpm_lockmap_acquire(xe); if (xe->d3cold.allowed) { err = xe_pcode_ready(xe, true); @@ -431,14 +456,16 @@ int xe_pm_runtime_resume(struct xe_device *xe) for_each_gt(gt, xe, id) xe_gt_resume(gt); + xe_display_pm_runtime_resume(xe); + if (xe->d3cold.allowed) { - xe_display_pm_resume(xe, true); err = xe_bo_restore_user(xe); if (err) goto out; } + out: - lock_map_release(&xe_pm_runtime_lockdep_map); + xe_rpm_lockmap_release(xe); xe_pm_write_callback_task(xe, NULL); return err; } @@ -452,15 +479,37 @@ out: * stuff that can happen inside the runtime_resume callback by acquiring * a dummy lock (it doesn't protect anything and gets compiled out on * non-debug builds). Lockdep then only needs to see the - * xe_pm_runtime_lockdep_map -> runtime_resume callback once, and then can - * hopefully validate all the (callers_locks) -> xe_pm_runtime_lockdep_map. + * xe_pm_runtime_xxx_map -> runtime_resume callback once, and then can + * hopefully validate all the (callers_locks) -> xe_pm_runtime_xxx_map. * For example if the (callers_locks) are ever grabbed in the * runtime_resume callback, lockdep should give us a nice splat. */ -static void pm_runtime_lockdep_prime(void) +static void xe_rpm_might_enter_cb(const struct xe_device *xe) { - lock_map_acquire(&xe_pm_runtime_lockdep_map); - lock_map_release(&xe_pm_runtime_lockdep_map); + xe_rpm_lockmap_acquire(xe); + xe_rpm_lockmap_release(xe); +} + +/* + * Prime the lockdep maps for known locking orders that need to + * be supported but that may not always occur on all systems. + */ +static void xe_pm_runtime_lockdep_prime(void) +{ + struct dma_resv lockdep_resv; + + dma_resv_init(&lockdep_resv); + lock_map_acquire(&xe_pm_runtime_d3cold_map); + /* D3Cold takes the dma_resv locks to evict bos */ + dma_resv_lock(&lockdep_resv, NULL); + dma_resv_unlock(&lockdep_resv); + lock_map_release(&xe_pm_runtime_d3cold_map); + + /* Shrinkers might like to wake up the device under reclaim. */ + fs_reclaim_acquire(GFP_KERNEL); + lock_map_acquire(&xe_pm_runtime_nod3cold_map); + lock_map_release(&xe_pm_runtime_nod3cold_map); + fs_reclaim_release(GFP_KERNEL); } /** @@ -475,7 +524,7 @@ void xe_pm_runtime_get(struct xe_device *xe) if (xe_pm_read_callback_task(xe) == current) return; - pm_runtime_lockdep_prime(); + xe_rpm_might_enter_cb(xe); pm_runtime_resume(xe->drm.dev); } @@ -507,7 +556,7 @@ int xe_pm_runtime_get_ioctl(struct xe_device *xe) if (WARN_ON(xe_pm_read_callback_task(xe) == current)) return -ELOOP; - pm_runtime_lockdep_prime(); + xe_rpm_might_enter_cb(xe); return pm_runtime_get_sync(xe->drm.dev); } @@ -575,7 +624,7 @@ bool xe_pm_runtime_resume_and_get(struct xe_device *xe) return true; } - pm_runtime_lockdep_prime(); + xe_rpm_might_enter_cb(xe); return pm_runtime_resume_and_get(xe->drm.dev) >= 0; } @@ -667,3 +716,14 @@ void xe_pm_d3cold_allowed_toggle(struct xe_device *xe) drm_dbg(&xe->drm, "d3cold: allowed=%s\n", str_yes_no(xe->d3cold.allowed)); } + +/** + * xe_pm_module_init() - Perform xe_pm specific module initialization. + * + * Return: 0 on success. Currently doesn't fail. + */ +int __init xe_pm_module_init(void) +{ + xe_pm_runtime_lockdep_prime(); + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h index 104a21ae6dfd..9aef673b1c8a 100644 --- a/drivers/gpu/drm/xe/xe_pm.h +++ b/drivers/gpu/drm/xe/xe_pm.h @@ -32,5 +32,6 @@ void xe_pm_assert_unbounded_bridge(struct xe_device *xe); int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold); void xe_pm_d3cold_allowed_toggle(struct xe_device *xe); struct task_struct *xe_pm_read_callback_task(struct xe_device *xe); +int xe_pm_module_init(void); #endif diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 97a6a0b0b8ba..579ed31b46db 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1149,10 +1149,12 @@ static int xe_pt_vm_dependencies(struct xe_sched_job *job, return err; } - if (job) - err = xe_sched_job_last_fence_add_dep(job, vm); - else - err = xe_exec_queue_last_fence_test_dep(pt_update_ops->q, vm); + if (!(pt_update_ops->q->flags & EXEC_QUEUE_FLAG_KERNEL)) { + if (job) + err = xe_sched_job_last_fence_add_dep(job, vm); + else + err = xe_exec_queue_last_fence_test_dep(pt_update_ops->q, vm); + } for (i = 0; job && !err && i < vops->num_syncs; i++) err = xe_sync_entry_add_deps(&vops->syncs[i], job); diff --git a/drivers/gpu/drm/xe/xe_res_cursor.h b/drivers/gpu/drm/xe/xe_res_cursor.h index 655af89b31a9..dca374b6521c 100644 --- a/drivers/gpu/drm/xe/xe_res_cursor.h +++ b/drivers/gpu/drm/xe/xe_res_cursor.h @@ -26,7 +26,6 @@ #include <linux/scatterlist.h> -#include <drm/drm_mm.h> #include <drm/ttm/ttm_placement.h> #include <drm/ttm/ttm_range_manager.h> #include <drm/ttm/ttm_resource.h> diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c index f3060979e63f..fe2cb2a96f78 100644 --- a/drivers/gpu/drm/xe/xe_sa.c +++ b/drivers/gpu/drm/xe/xe_sa.c @@ -25,10 +25,9 @@ static void xe_sa_bo_manager_fini(struct drm_device *drm, void *arg) drm_suballoc_manager_fini(&sa_manager->base); - if (bo->vmap.is_iomem) + if (sa_manager->is_iomem) kvfree(sa_manager->cpu_ptr); - xe_bo_unpin_map_no_vm(bo); sa_manager->bo = NULL; } @@ -47,16 +46,17 @@ struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 sa_manager->bo = NULL; - bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_GGTT | - XE_BO_FLAG_GGTT_INVALIDATE); + bo = xe_managed_bo_create_pin_map(xe, tile, size, + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE); if (IS_ERR(bo)) { drm_err(&xe->drm, "failed to allocate bo for sa manager: %ld\n", PTR_ERR(bo)); return (struct xe_sa_manager *)bo; } sa_manager->bo = bo; + sa_manager->is_iomem = bo->vmap.is_iomem; drm_suballoc_manager_init(&sa_manager->base, managed_size, align); sa_manager->gpu_addr = xe_bo_ggtt_addr(bo); @@ -64,7 +64,6 @@ struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 if (bo->vmap.is_iomem) { sa_manager->cpu_ptr = kvzalloc(managed_size, GFP_KERNEL); if (!sa_manager->cpu_ptr) { - xe_bo_unpin_map_no_vm(sa_manager->bo); sa_manager->bo = NULL; return ERR_PTR(-ENOMEM); } diff --git a/drivers/gpu/drm/xe/xe_sa_types.h b/drivers/gpu/drm/xe/xe_sa_types.h index 2ef896aeca1d..2b070ff1292e 100644 --- a/drivers/gpu/drm/xe/xe_sa_types.h +++ b/drivers/gpu/drm/xe/xe_sa_types.h @@ -14,6 +14,7 @@ struct xe_sa_manager { struct xe_bo *bo; u64 gpu_addr; void *cpu_ptr; + bool is_iomem; }; #endif diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c index 9628f9deb3c0..55d47450b2c6 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.c +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -89,8 +89,7 @@ static void xe_sched_job_free_fences(struct xe_sched_job *job) if (ptrs->lrc_fence) xe_lrc_free_seqno_fence(ptrs->lrc_fence); - if (ptrs->chain_fence) - dma_fence_chain_free(ptrs->chain_fence); + dma_fence_chain_free(ptrs->chain_fence); } } diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index 3aa6270e5dd7..436faff09bac 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -55,7 +55,7 @@ static struct xe_user_fence *user_fence_create(struct xe_device *xe, u64 addr, struct xe_user_fence *ufence; u64 __user *ptr = u64_to_user_ptr(addr); - if (!access_ok(ptr, sizeof(ptr))) + if (!access_ok(ptr, sizeof(*ptr))) return ERR_PTR(-EFAULT); ufence = kmalloc(sizeof(*ufence), GFP_KERNEL); @@ -206,16 +206,9 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, int xe_sync_entry_add_deps(struct xe_sync_entry *sync, struct xe_sched_job *job) { - int err; - - if (sync->fence) { - err = drm_sched_job_add_dependency(&job->drm, - dma_fence_get(sync->fence)); - if (err) { - dma_fence_put(sync->fence); - return err; - } - } + if (sync->fence) + return drm_sched_job_add_dependency(&job->drm, + dma_fence_get(sync->fence)); return 0; } @@ -256,10 +249,8 @@ void xe_sync_entry_cleanup(struct xe_sync_entry *sync) { if (sync->syncobj) drm_syncobj_put(sync->syncobj); - if (sync->fence) - dma_fence_put(sync->fence); - if (sync->chain_fence) - dma_fence_chain_free(sync->chain_fence); + dma_fence_put(sync->fence); + dma_fence_chain_free(sync->chain_fence); if (sync->ufence) user_fence_put(sync->ufence); } diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index f46fd2df84de..f7113cf6109d 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -5,7 +5,6 @@ */ #include <drm/drm_managed.h> -#include <drm/drm_mm.h> #include <drm/ttm/ttm_device.h> #include <drm/ttm/ttm_placement.h> diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index 77d4eec0118d..faa1bf42e50e 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -39,12 +39,23 @@ static const struct xe_rtp_entry_sr gt_tunings[] = { }, { XE_RTP_NAME("Tuning: Compression Overfetch"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), - XE_RTP_ACTIONS(CLR(CCCHKNREG1, ENCOMPPERFFIX)), + XE_RTP_ACTIONS(CLR(CCCHKNREG1, ENCOMPPERFFIX), + SET(CCCHKNREG1, L3CMPCTRL)) }, { XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), XE_RTP_ACTIONS(SET(L3SQCREG3, COMPPWOVERFETCHEN)) }, + { XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), + XE_RTP_ACTIONS(SET(L3SQCREG2, + COMPMEMRD256BOVRFETCHEN)) + }, + { XE_RTP_NAME("Tuning: Stateless compression control"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), + XE_RTP_ACTIONS(FIELD_SET(STATELESS_COMPRESSION_CTRL, UNIFIED_COMPRESSION_FORMAT, + REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0))) + }, {} }; diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 5b70d23724c4..4bb2a4a80ddc 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -15,6 +15,7 @@ #include "xe_gsc.h" #include "xe_gt.h" #include "xe_gt_printk.h" +#include "xe_guc.h" #include "xe_map.h" #include "xe_mmio.h" #include "xe_module.h" @@ -105,15 +106,16 @@ struct fw_blobs_by_type { }; #define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \ - fw_def(LUNARLAKE, major_ver(xe, guc, lnl, 70, 19, 2)) \ - fw_def(METEORLAKE, major_ver(i915, guc, mtl, 70, 19, 2)) \ - fw_def(DG2, major_ver(i915, guc, dg2, 70, 19, 2)) \ - fw_def(DG1, major_ver(i915, guc, dg1, 70, 19, 2)) \ - fw_def(ALDERLAKE_N, major_ver(i915, guc, tgl, 70, 19, 2)) \ - fw_def(ALDERLAKE_P, major_ver(i915, guc, adlp, 70, 19, 2)) \ - fw_def(ALDERLAKE_S, major_ver(i915, guc, tgl, 70, 19, 2)) \ - fw_def(ROCKETLAKE, major_ver(i915, guc, tgl, 70, 19, 2)) \ - fw_def(TIGERLAKE, major_ver(i915, guc, tgl, 70, 19, 2)) + fw_def(BATTLEMAGE, major_ver(xe, guc, bmg, 70, 29, 2)) \ + fw_def(LUNARLAKE, major_ver(xe, guc, lnl, 70, 29, 2)) \ + fw_def(METEORLAKE, major_ver(i915, guc, mtl, 70, 29, 2)) \ + fw_def(DG2, major_ver(i915, guc, dg2, 70, 29, 2)) \ + fw_def(DG1, major_ver(i915, guc, dg1, 70, 29, 2)) \ + fw_def(ALDERLAKE_N, major_ver(i915, guc, tgl, 70, 29, 2)) \ + fw_def(ALDERLAKE_P, major_ver(i915, guc, adlp, 70, 29, 2)) \ + fw_def(ALDERLAKE_S, major_ver(i915, guc, tgl, 70, 29, 2)) \ + fw_def(ROCKETLAKE, major_ver(i915, guc, tgl, 70, 29, 2)) \ + fw_def(TIGERLAKE, major_ver(i915, guc, tgl, 70, 29, 2)) #define XE_HUC_FIRMWARE_DEFS(fw_def, mmp_ver, no_ver) \ fw_def(BATTLEMAGE, no_ver(xe, huc, bmg)) \ @@ -309,10 +311,10 @@ static int guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css) xe_gt_assert(gt, uc_fw->type == XE_UC_FW_TYPE_GUC); - /* We don't support GuC releases older than 70.19 */ - if (release->major < 70 || (release->major == 70 && release->minor < 19)) { - xe_gt_err(gt, "Unsupported GuC v%u.%u! v70.19 or newer is required\n", - release->major, release->minor); + /* We don't support GuC releases older than 70.29.2 */ + if (MAKE_GUC_VER_STRUCT(*release) < MAKE_GUC_VER(70, 29, 2)) { + xe_gt_err(gt, "Unsupported GuC v%u.%u.%u! v70.29.2 or newer is required\n", + release->major, release->minor, release->patch); return -EINVAL; } diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index dab2a3b2e17f..4cc13eddb6b3 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -275,6 +275,8 @@ out_up_write: * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM * @vm: The VM. * @q: The exec_queue + * + * Note that this function might be called multiple times on the same queue. */ void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) { @@ -282,8 +284,10 @@ void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) return; down_write(&vm->lock); - list_del(&q->lr.link); - --vm->preempt.num_exec_queues; + if (!list_empty(&q->lr.link)) { + list_del_init(&q->lr.link); + --vm->preempt.num_exec_queues; + } if (q->lr.pfence) { dma_fence_enable_sw_signaling(q->lr.pfence); dma_fence_put(q->lr.pfence); @@ -1191,7 +1195,7 @@ static const struct drm_gpuvm_ops gpuvm_ops = { .vm_free = xe_vm_free, }; -static u64 pde_encode_pat_index(struct xe_device *xe, u16 pat_index) +static u64 pde_encode_pat_index(u16 pat_index) { u64 pte = 0; @@ -1204,8 +1208,7 @@ static u64 pde_encode_pat_index(struct xe_device *xe, u16 pat_index) return pte; } -static u64 pte_encode_pat_index(struct xe_device *xe, u16 pat_index, - u32 pt_level) +static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level) { u64 pte = 0; @@ -1246,12 +1249,11 @@ static u64 pte_encode_ps(u32 pt_level) static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset, const u16 pat_index) { - struct xe_device *xe = xe_bo_device(bo); u64 pde; pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); pde |= XE_PAGE_PRESENT | XE_PAGE_RW; - pde |= pde_encode_pat_index(xe, pat_index); + pde |= pde_encode_pat_index(pat_index); return pde; } @@ -1259,12 +1261,11 @@ static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset, static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, u16 pat_index, u32 pt_level) { - struct xe_device *xe = xe_bo_device(bo); u64 pte; pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); pte |= XE_PAGE_PRESENT | XE_PAGE_RW; - pte |= pte_encode_pat_index(xe, pat_index, pt_level); + pte |= pte_encode_pat_index(pat_index, pt_level); pte |= pte_encode_ps(pt_level); if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) @@ -1276,14 +1277,12 @@ static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma, u16 pat_index, u32 pt_level) { - struct xe_device *xe = xe_vma_vm(vma)->xe; - pte |= XE_PAGE_PRESENT; if (likely(!xe_vma_read_only(vma))) pte |= XE_PAGE_RW; - pte |= pte_encode_pat_index(xe, pat_index, pt_level); + pte |= pte_encode_pat_index(pat_index, pt_level); pte |= pte_encode_ps(pt_level); if (unlikely(xe_vma_is_null(vma))) @@ -1303,7 +1302,7 @@ static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr, pte = addr; pte |= XE_PAGE_PRESENT | XE_PAGE_RW; - pte |= pte_encode_pat_index(xe, pat_index, pt_level); + pte |= pte_encode_pat_index(pat_index, pt_level); pte |= pte_encode_ps(pt_level); if (devmem) @@ -1483,19 +1482,13 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) /* Kernel migration VM shouldn't have a circular loop.. */ if (!(flags & XE_VM_FLAG_MIGRATION)) { for_each_tile(tile, xe, id) { - struct xe_gt *gt = tile->primary_gt; - struct xe_vm *migrate_vm; struct xe_exec_queue *q; u32 create_flags = EXEC_QUEUE_FLAG_VM; if (!vm->pt_root[id]) continue; - migrate_vm = xe_migrate_get_vm(tile->migrate); - q = xe_exec_queue_create_class(xe, gt, migrate_vm, - XE_ENGINE_CLASS_COPY, - create_flags); - xe_vm_put(migrate_vm); + q = xe_exec_queue_create_bind(xe, tile, create_flags, 0); if (IS_ERR(q)) { err = PTR_ERR(q); goto err_close; @@ -1508,13 +1501,6 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) if (number_tiles > 1) vm->composite_fence_ctx = dma_fence_context_alloc(1); - mutex_lock(&xe->usm.lock); - if (flags & XE_VM_FLAG_FAULT_MODE) - xe->usm.num_vm_in_fault_mode++; - else if (!(flags & XE_VM_FLAG_MIGRATION)) - xe->usm.num_vm_in_non_fault_mode++; - mutex_unlock(&xe->usm.lock); - trace_xe_vm_create(vm); return vm; @@ -1628,11 +1614,6 @@ void xe_vm_close_and_put(struct xe_vm *vm) up_write(&vm->lock); mutex_lock(&xe->usm.lock); - if (vm->flags & XE_VM_FLAG_FAULT_MODE) - xe->usm.num_vm_in_fault_mode--; - else if (!(vm->flags & XE_VM_FLAG_MIGRATION)) - xe->usm.num_vm_in_non_fault_mode--; - if (vm->usm.asid) { void *lookup; @@ -1770,14 +1751,6 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) return -EINVAL; - if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && - xe_device_in_non_fault_mode(xe))) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) && - xe_device_in_fault_mode(xe))) - return -EINVAL; - if (XE_IOCTL_DBG(xe, args->extensions)) return -EINVAL; @@ -3185,9 +3158,10 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) { struct xe_device *xe = xe_vma_vm(vma)->xe; struct xe_tile *tile; - struct xe_gt_tlb_invalidation_fence fence[XE_MAX_TILES_PER_DEVICE]; - u32 tile_needs_invalidate = 0; + struct xe_gt_tlb_invalidation_fence + fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; u8 id; + u32 fence_id = 0; int ret = 0; xe_assert(xe, !xe_vma_is_null(vma)); @@ -3215,27 +3189,37 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) if (xe_pt_zap_ptes(tile, vma)) { xe_device_wmb(xe); xe_gt_tlb_invalidation_fence_init(tile->primary_gt, - &fence[id], true); + &fence[fence_id], + true); - /* - * FIXME: We potentially need to invalidate multiple - * GTs within the tile - */ ret = xe_gt_tlb_invalidation_vma(tile->primary_gt, - &fence[id], vma); + &fence[fence_id], vma); if (ret < 0) { - xe_gt_tlb_invalidation_fence_fini(&fence[id]); + xe_gt_tlb_invalidation_fence_fini(&fence[fence_id]); goto wait; } + ++fence_id; + + if (!tile->media_gt) + continue; - tile_needs_invalidate |= BIT(id); + xe_gt_tlb_invalidation_fence_init(tile->media_gt, + &fence[fence_id], + true); + + ret = xe_gt_tlb_invalidation_vma(tile->media_gt, + &fence[fence_id], vma); + if (ret < 0) { + xe_gt_tlb_invalidation_fence_fini(&fence[fence_id]); + goto wait; + } + ++fence_id; } } wait: - for_each_tile(tile, xe, id) - if (tile_needs_invalidate & BIT(id)) - xe_gt_tlb_invalidation_fence_wait(&fence[id]); + for (id = 0; id < fence_id; ++id) + xe_gt_tlb_invalidation_fence_wait(&fence[id]); vma->tile_invalidated = vma->tile_mask; diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 5b9e5a1857ea..28b7f95b6c2f 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -557,16 +557,6 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_ACTION_FLAG(ENGINE_BASE))) }, - /* Xe2_LPM */ - - { XE_RTP_NAME("16021639441"), - XE_RTP_RULES(MEDIA_VERSION(2000)), - XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), - GHWSP_CSB_REPORT_DIS | - PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS, - XE_RTP_ACTION_FLAG(ENGINE_BASE))) - }, - /* Xe2_HPM */ { XE_RTP_NAME("16021639441"), diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 540d38603f32..920ca5060146 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -27,7 +27,13 @@ 16022287689 GRAPHICS_VERSION(2001) GRAPHICS_VERSION(2004) 13011645652 GRAPHICS_VERSION(2004) +14022293748 GRAPHICS_VERSION(2001) + GRAPHICS_VERSION(2004) +22019794406 GRAPHICS_VERSION(2001) + GRAPHICS_VERSION(2004) 22019338487 MEDIA_VERSION(2000) GRAPHICS_VERSION(2001) 22019338487_display PLATFORM(LUNARLAKE) 16023588340 GRAPHICS_VERSION(2001) +14019789679 GRAPHICS_VERSION(1255) + GRAPHICS_VERSION_RANGE(1270, 2004) |