summaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2024-08-30 05:41:05 +0200
committerDave Airlie <airlied@redhat.com>2024-08-30 05:41:06 +0200
commit8bdb468dd7a5d17f8556afdd4c8d046939ff965f (patch)
treeb3c579119359e8161bf0956431d5ab33d215eecd /drivers/gpu
parentMerge tag 'drm-misc-next-2024-08-29' of https://gitlab.freedesktop.org/drm/mi... (diff)
parentdrm/xe/bmg: Drop force_probe requirement (diff)
downloadlinux-8bdb468dd7a5d17f8556afdd4c8d046939ff965f.tar.xz
linux-8bdb468dd7a5d17f8556afdd4c8d046939ff965f.zip
Merge tag 'drm-xe-next-2024-08-28' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-next
UAPI Changes: - Fix OA format masks which were breaking build with gcc-5 Cross-subsystem Changes: Driver Changes: - Use dma_fence_chain_free in chain fence unused as a sync (Matthew Brost) - Refactor hw engine lookup and mmio access to be used in more places (Dominik, Matt Auld, Mika Kuoppala) - Enable priority mem read for Xe2 and later (Pallavi Mishra) - Fix PL1 disable flow in xe_hwmon_power_max_write (Karthik) - Fix refcount and speedup devcoredump (Matthew Brost) - Add performance tuning changes to Xe2 (Akshata, Shekhar) - Fix OA sysfs entry (Ashutosh) - Add first GuC firmware support for BMG (Julia) - Bump minimum GuC firmware for platforms under force_probe to match LNL and BMG (Julia) - Fix access check on user fence creation (Nirmoy) - Add/document workarounds for Xe2 (Julia, Daniele, John, Tejas) - Document workaround and use proper WA infra (Matt Roper) - Fix VF configuration on media GT (Michal Wajdeczko) - Fix VM dma-resv lock (Matthew Brost) - Allow suspend/resume exec queue backend op to be called multiple times (Matthew Brost) - Add GT stats to debugfs (Nirmoy) - Add hwconfig to debugfs (Matt Roper) - Compile out all debugfs code with ONFIG_DEUBG_FS=n (Lucas) - Remove dead kunit code (Jani Nikula) - Refactor drvdata storing to help display (Jani Nikula) - Cleanup unsused xe parameter in pte handling (Himal) - Rename s/enable_display/probe_display/ for clarity (Lucas) - Fix missing MCR annotation in couple of registers (Tejas) - Fix DGFX display suspend/resume (Maarten) - Prepare exec_queue_kill for PXP handling (Daniele) - Fix devm/drmm issues (Daniele, Matthew Brost) - Fix tile and ggtt fini sequences (Matthew Brost) - Fix crashes when probing without firmware in place (Daniele, Matthew Brost) - Use xe_managed for kernel BOs (Daniele, Matthew Brost) - Future-proof dss_per_group calculation by using hwconfig (Matt Roper) - Use reserved copy engine for user binds on faulting devices (Matthew Brost) - Allow mixing dma-fence jobs and long-running faulting jobs (Francois) - Cleanup redundant arg when creating use BO (Nirmoy) - Prevent UAF around preempt fence (Auld) - Fix display suspend/resume (Maarten) - Use vma_pages() helper (Thorsten) - Calculate pagefault queue size (Stuart, Matthew Auld) - Fix missing pagefault wq destroy (Stuart) - Fix lifetime handling of HW fence ctx (Matthew Brost) - Fix order destroy order for jobs (Matthew Brost) - Fix TLB invalidation for media GT (Matthew Brost) - Document GGTT (Rodrigo Vivi) - Refactor GGTT layering and fix runtime outer protection (Rodrigo Vivi) - Handle HPD polling on display pm runtime suspend/resume (Imre, Vinod) - Drop unrequired NULL checks (Apoorva, Himal) - Use separate rpm lockdep map for non-d3cold-capable devices (Thomas Hellström) - Support "nomodeset" kernel command-line option (Thomas Zimmermann) - Drop force_probe requirement for LNL and BMG (Lucas, Balasubramani) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Lucas De Marchi <lucas.demarchi@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/wd42jsh4i3q5zlrmi2cljejohdsrqc6hvtxf76lbxsp3ibrgmz@y54fa7wwxgsd
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/drm/drm_print.c13
-rw-r--r--drivers/gpu/drm/i915/display/intel_dpt.c4
-rw-r--r--drivers/gpu/drm/i915/display/intel_dpt.h3
-rw-r--r--drivers/gpu/drm/i915/display/skl_universal_plane.c3
-rw-r--r--drivers/gpu/drm/xe/Makefile18
-rw-r--r--drivers/gpu/drm/xe/abi/guc_klvs_abi.h1
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h7
-rw-r--r--drivers/gpu/drm/xe/display/xe_display.c94
-rw-r--r--drivers/gpu/drm/xe/display/xe_display.h4
-rw-r--r--drivers/gpu/drm/xe/display/xe_fb_pin.c50
-rw-r--r--drivers/gpu/drm/xe/regs/xe_engine_regs.h1
-rw-r--r--drivers/gpu/drm/xe/regs/xe_gt_regs.h9
-rw-r--r--drivers/gpu/drm/xe/tests/xe_bo.c8
-rw-r--r--drivers/gpu/drm/xe/tests/xe_dma_buf.c2
-rw-r--r--drivers/gpu/drm/xe/tests/xe_migrate.c24
-rw-r--r--drivers/gpu/drm/xe/tests/xe_pci.c52
-rw-r--r--drivers/gpu/drm/xe/tests/xe_pci_test.h1
-rw-r--r--drivers/gpu/drm/xe/xe_bo.c20
-rw-r--r--drivers/gpu/drm/xe/xe_bo.h10
-rw-r--r--drivers/gpu/drm/xe/xe_bo_types.h5
-rw-r--r--drivers/gpu/drm/xe/xe_debugfs.h4
-rw-r--r--drivers/gpu/drm/xe/xe_devcoredump.c111
-rw-r--r--drivers/gpu/drm/xe/xe_devcoredump_types.h8
-rw-r--r--drivers/gpu/drm/xe/xe_device.c5
-rw-r--r--drivers/gpu/drm/xe/xe_device.h15
-rw-r--r--drivers/gpu/drm/xe/xe_device_types.h17
-rw-r--r--drivers/gpu/drm/xe/xe_exec.c20
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue.c213
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue.h8
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_ggtt.c487
-rw-r--r--drivers/gpu/drm/xe/xe_ggtt.h28
-rw-r--r--drivers/gpu/drm/xe/xe_ggtt_types.h54
-rw-r--r--drivers/gpu/drm/xe/xe_gpu_scheduler.c23
-rw-r--r--drivers/gpu/drm/xe/xe_gpu_scheduler.h12
-rw-r--r--drivers/gpu/drm/xe/xe_gsc.c12
-rw-r--r--drivers/gpu/drm/xe/xe_gsc_proxy.c45
-rw-r--r--drivers/gpu/drm/xe/xe_gt.c10
-rw-r--r--drivers/gpu/drm/xe/xe_gt_debugfs.c13
-rw-r--r--drivers/gpu/drm/xe/xe_gt_mcr.c40
-rw-r--r--drivers/gpu/drm/xe/xe_gt_pagefault.c57
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c108
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h5
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_vf.c44
-rw-r--r--drivers/gpu/drm/xe/xe_gt_stats.c49
-rw-r--r--drivers/gpu/drm/xe/xe_gt_stats.h29
-rw-r--r--drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_types.h24
-rw-r--r--drivers/gpu/drm/xe/xe_guc.c4
-rw-r--r--drivers/gpu/drm/xe/xe_guc.h10
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ads.c6
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct.c12
-rw-r--r--drivers/gpu/drm/xe/xe_guc_hwconfig.c97
-rw-r--r--drivers/gpu/drm/xe/xe_guc_hwconfig.h3
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc.c2
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.c58
-rw-r--r--drivers/gpu/drm/xe/xe_huc.c19
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine.c149
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine.h10
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine_group.c372
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine_group.h29
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine_group_types.h51
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.c37
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.c29
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.h9
-rw-r--r--drivers/gpu/drm/xe/xe_mmio.c5
-rw-r--r--drivers/gpu/drm/xe/xe_module.c54
-rw-r--r--drivers/gpu/drm/xe/xe_module.h2
-rw-r--r--drivers/gpu/drm/xe/xe_oa.c3
-rw-r--r--drivers/gpu/drm/xe/xe_pci.c12
-rw-r--r--drivers/gpu/drm/xe/xe_pm.c90
-rw-r--r--drivers/gpu/drm/xe/xe_pm.h1
-rw-r--r--drivers/gpu/drm/xe/xe_pt.c10
-rw-r--r--drivers/gpu/drm/xe/xe_res_cursor.h1
-rw-r--r--drivers/gpu/drm/xe/xe_sa.c13
-rw-r--r--drivers/gpu/drm/xe/xe_sa_types.h1
-rw-r--r--drivers/gpu/drm/xe/xe_sched_job.c3
-rw-r--r--drivers/gpu/drm/xe/xe_sync.c21
-rw-r--r--drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c1
-rw-r--r--drivers/gpu/drm/xe/xe_tuning.c13
-rw-r--r--drivers/gpu/drm/xe/xe_uc_fw.c28
-rw-r--r--drivers/gpu/drm/xe/xe_vm.c90
-rw-r--r--drivers/gpu/drm/xe/xe_wa.c10
-rw-r--r--drivers/gpu/drm/xe/xe_wa_oob.rules6
85 files changed, 2203 insertions, 834 deletions
diff --git a/drivers/gpu/drm/drm_print.c b/drivers/gpu/drm/drm_print.c
index cf24dfdeb6b2..0081190201a7 100644
--- a/drivers/gpu/drm/drm_print.c
+++ b/drivers/gpu/drm/drm_print.c
@@ -100,8 +100,9 @@ void __drm_puts_coredump(struct drm_printer *p, const char *str)
copy = iterator->remain;
/* Copy out the bit of the string that we need */
- memcpy(iterator->data,
- str + (iterator->start - iterator->offset), copy);
+ if (iterator->data)
+ memcpy(iterator->data,
+ str + (iterator->start - iterator->offset), copy);
iterator->offset = iterator->start + copy;
iterator->remain -= copy;
@@ -110,7 +111,8 @@ void __drm_puts_coredump(struct drm_printer *p, const char *str)
len = min_t(ssize_t, strlen(str), iterator->remain);
- memcpy(iterator->data + pos, str, len);
+ if (iterator->data)
+ memcpy(iterator->data + pos, str, len);
iterator->offset += len;
iterator->remain -= len;
@@ -140,8 +142,9 @@ void __drm_printfn_coredump(struct drm_printer *p, struct va_format *vaf)
if ((iterator->offset >= iterator->start) && (len < iterator->remain)) {
ssize_t pos = iterator->offset - iterator->start;
- snprintf(((char *) iterator->data) + pos,
- iterator->remain, "%pV", vaf);
+ if (iterator->data)
+ snprintf(((char *) iterator->data) + pos,
+ iterator->remain, "%pV", vaf);
iterator->offset += len;
iterator->remain -= len;
diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c
index 73a1918e2537..3a6d99044828 100644
--- a/drivers/gpu/drm/i915/display/intel_dpt.c
+++ b/drivers/gpu/drm/i915/display/intel_dpt.c
@@ -317,3 +317,7 @@ void intel_dpt_destroy(struct i915_address_space *vm)
i915_vm_put(&dpt->vm);
}
+u64 intel_dpt_offset(struct i915_vma *dpt_vma)
+{
+ return dpt_vma->node.start;
+}
diff --git a/drivers/gpu/drm/i915/display/intel_dpt.h b/drivers/gpu/drm/i915/display/intel_dpt.h
index ff18a525bfbe..1f88b0ee17e7 100644
--- a/drivers/gpu/drm/i915/display/intel_dpt.h
+++ b/drivers/gpu/drm/i915/display/intel_dpt.h
@@ -6,6 +6,8 @@
#ifndef __INTEL_DPT_H__
#define __INTEL_DPT_H__
+#include <linux/types.h>
+
struct drm_i915_private;
struct i915_address_space;
@@ -20,5 +22,6 @@ void intel_dpt_suspend(struct drm_i915_private *i915);
void intel_dpt_resume(struct drm_i915_private *i915);
struct i915_address_space *
intel_dpt_create(struct intel_framebuffer *fb);
+u64 intel_dpt_offset(struct i915_vma *dpt_vma);
#endif /* __INTEL_DPT_H__ */
diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c
index ba5a628b4757..1cf1d5c8b9dc 100644
--- a/drivers/gpu/drm/i915/display/skl_universal_plane.c
+++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c
@@ -14,6 +14,7 @@
#include "intel_de.h"
#include "intel_display_irq.h"
#include "intel_display_types.h"
+#include "intel_dpt.h"
#include "intel_fb.h"
#include "intel_fbc.h"
#include "intel_frontbuffer.h"
@@ -1162,7 +1163,7 @@ static u32 skl_surf_address(const struct intel_plane_state *plane_state,
* within the DPT is always 0.
*/
drm_WARN_ON(&i915->drm, plane_state->dpt_vma &&
- plane_state->dpt_vma->node.start);
+ intel_dpt_offset(plane_state->dpt_vma));
drm_WARN_ON(&i915->drm, offset & 0x1fffff);
return offset >> 9;
} else {
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 1ff9602a52f6..b9670ae09a9e 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -28,7 +28,6 @@ $(obj)/generated/%_wa_oob.c $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \
xe-y += xe_bb.o \
xe_bo.o \
xe_bo_evict.o \
- xe_debugfs.o \
xe_devcoredump.o \
xe_device.o \
xe_device_sysfs.o \
@@ -46,7 +45,6 @@ xe-y += xe_bb.o \
xe_gt.o \
xe_gt_ccs_mode.o \
xe_gt_clock.o \
- xe_gt_debugfs.o \
xe_gt_freq.o \
xe_gt_idle.o \
xe_gt_mcr.o \
@@ -59,7 +57,6 @@ xe-y += xe_bb.o \
xe_guc_ads.o \
xe_guc_ct.o \
xe_guc_db_mgr.o \
- xe_guc_debugfs.o \
xe_guc_hwconfig.o \
xe_guc_id_mgr.o \
xe_guc_klv_helpers.o \
@@ -69,9 +66,9 @@ xe-y += xe_bb.o \
xe_heci_gsc.o \
xe_hw_engine.o \
xe_hw_engine_class_sysfs.o \
+ xe_hw_engine_group.o \
xe_hw_fence.o \
xe_huc.o \
- xe_huc_debugfs.o \
xe_irq.o \
xe_lrc.o \
xe_migrate.o \
@@ -107,7 +104,6 @@ xe-y += xe_bb.o \
xe_ttm_vram_mgr.o \
xe_tuning.o \
xe_uc.o \
- xe_uc_debugfs.o \
xe_uc_fw.o \
xe_vm.o \
xe_vram.o \
@@ -124,7 +120,6 @@ xe-$(CONFIG_HWMON) += xe_hwmon.o
# graphics virtualization (SR-IOV) support
xe-y += \
xe_gt_sriov_vf.o \
- xe_gt_sriov_vf_debugfs.o \
xe_guc_relay.o \
xe_memirq.o \
xe_sriov.o
@@ -133,7 +128,6 @@ xe-$(CONFIG_PCI_IOV) += \
xe_gt_sriov_pf.o \
xe_gt_sriov_pf_config.o \
xe_gt_sriov_pf_control.o \
- xe_gt_sriov_pf_debugfs.o \
xe_gt_sriov_pf_monitor.o \
xe_gt_sriov_pf_policy.o \
xe_gt_sriov_pf_service.o \
@@ -281,6 +275,16 @@ ifeq ($(CONFIG_DRM_FBDEV_EMULATION),y)
endif
ifeq ($(CONFIG_DEBUG_FS),y)
+ xe-y += xe_debugfs.o \
+ xe_gt_debugfs.o \
+ xe_gt_sriov_vf_debugfs.o \
+ xe_gt_stats.o \
+ xe_guc_debugfs.o \
+ xe_huc_debugfs.o \
+ xe_uc_debugfs.o
+
+ xe-$(CONFIG_PCI_IOV) += xe_gt_sriov_pf_debugfs.o
+
xe-$(CONFIG_DRM_XE_DISPLAY) += \
i915-display/intel_display_debugfs.o \
i915-display/intel_display_debugfs_params.o \
diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
index 8f9f60b28306..6b30743a2f6c 100644
--- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
@@ -351,6 +351,7 @@ enum xe_guc_klv_ids {
GUC_WORKAROUND_KLV_ID_GAM_PFQ_SHADOW_TAIL_POLLING = 0x9005,
GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE = 0x9007,
GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE = 0x9008,
+ GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET = 0x9009,
};
#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h
index a20d2638ea7a..bdae8392e125 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h
@@ -7,7 +7,8 @@
#define I915_VMA_H
#include <uapi/drm/i915_drm.h>
-#include <drm/drm_mm.h>
+
+#include "xe_ggtt_types.h"
/* We don't want these from i915_drm.h in case of Xe */
#undef I915_TILING_X
@@ -19,7 +20,7 @@ struct xe_bo;
struct i915_vma {
struct xe_bo *bo, *dpt;
- struct drm_mm_node node;
+ struct xe_ggtt_node *node;
};
#define i915_ggtt_clear_scanout(bo) do { } while (0)
@@ -28,7 +29,7 @@ struct i915_vma {
static inline u32 i915_ggtt_offset(const struct i915_vma *vma)
{
- return vma->node.start;
+ return vma->node->base.start;
}
#endif
diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c
index 1188ab83cfae..78a884ddd499 100644
--- a/drivers/gpu/drm/xe/display/xe_display.c
+++ b/drivers/gpu/drm/xe/display/xe_display.c
@@ -46,7 +46,7 @@ static bool has_display(struct xe_device *xe)
*/
bool xe_display_driver_probe_defer(struct pci_dev *pdev)
{
- if (!xe_modparam.enable_display)
+ if (!xe_modparam.probe_display)
return 0;
return intel_display_driver_probe_defer(pdev);
@@ -62,7 +62,7 @@ bool xe_display_driver_probe_defer(struct pci_dev *pdev)
*/
void xe_display_driver_set_hooks(struct drm_driver *driver)
{
- if (!xe_modparam.enable_display)
+ if (!xe_modparam.probe_display)
return;
driver->driver_features |= DRIVER_MODESET | DRIVER_ATOMIC;
@@ -104,7 +104,7 @@ static void xe_display_fini_nommio(struct drm_device *dev, void *dummy)
{
struct xe_device *xe = to_xe_device(dev);
- if (!xe->info.enable_display)
+ if (!xe->info.probe_display)
return;
intel_power_domains_cleanup(xe);
@@ -112,7 +112,7 @@ static void xe_display_fini_nommio(struct drm_device *dev, void *dummy)
int xe_display_init_nommio(struct xe_device *xe)
{
- if (!xe->info.enable_display)
+ if (!xe->info.probe_display)
return 0;
/* Fake uncore lock */
@@ -129,7 +129,7 @@ static void xe_display_fini_noirq(void *arg)
struct xe_device *xe = arg;
struct intel_display *display = &xe->display;
- if (!xe->info.enable_display)
+ if (!xe->info.probe_display)
return;
intel_display_driver_remove_noirq(xe);
@@ -141,7 +141,7 @@ int xe_display_init_noirq(struct xe_device *xe)
struct intel_display *display = &xe->display;
int err;
- if (!xe->info.enable_display)
+ if (!xe->info.probe_display)
return 0;
intel_display_driver_early_probe(xe);
@@ -172,7 +172,7 @@ static void xe_display_fini_noaccel(void *arg)
{
struct xe_device *xe = arg;
- if (!xe->info.enable_display)
+ if (!xe->info.probe_display)
return;
intel_display_driver_remove_nogem(xe);
@@ -182,7 +182,7 @@ int xe_display_init_noaccel(struct xe_device *xe)
{
int err;
- if (!xe->info.enable_display)
+ if (!xe->info.probe_display)
return 0;
err = intel_display_driver_probe_nogem(xe);
@@ -194,7 +194,7 @@ int xe_display_init_noaccel(struct xe_device *xe)
int xe_display_init(struct xe_device *xe)
{
- if (!xe->info.enable_display)
+ if (!xe->info.probe_display)
return 0;
return intel_display_driver_probe(xe);
@@ -202,7 +202,7 @@ int xe_display_init(struct xe_device *xe)
void xe_display_fini(struct xe_device *xe)
{
- if (!xe->info.enable_display)
+ if (!xe->info.probe_display)
return;
intel_hpd_poll_fini(xe);
@@ -213,7 +213,7 @@ void xe_display_fini(struct xe_device *xe)
void xe_display_register(struct xe_device *xe)
{
- if (!xe->info.enable_display)
+ if (!xe->info.probe_display)
return;
intel_display_driver_register(xe);
@@ -223,7 +223,7 @@ void xe_display_register(struct xe_device *xe)
void xe_display_unregister(struct xe_device *xe)
{
- if (!xe->info.enable_display)
+ if (!xe->info.probe_display)
return;
intel_unregister_dsm_handler();
@@ -233,7 +233,7 @@ void xe_display_unregister(struct xe_device *xe)
void xe_display_driver_remove(struct xe_device *xe)
{
- if (!xe->info.enable_display)
+ if (!xe->info.probe_display)
return;
intel_display_driver_remove(xe);
@@ -243,7 +243,7 @@ void xe_display_driver_remove(struct xe_device *xe)
void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl)
{
- if (!xe->info.enable_display)
+ if (!xe->info.probe_display)
return;
if (master_ctl & DISPLAY_IRQ)
@@ -254,7 +254,7 @@ void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir)
{
struct intel_display *display = &xe->display;
- if (!xe->info.enable_display)
+ if (!xe->info.probe_display)
return;
if (gu_misc_iir & GU_MISC_GSE)
@@ -263,7 +263,7 @@ void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir)
void xe_display_irq_reset(struct xe_device *xe)
{
- if (!xe->info.enable_display)
+ if (!xe->info.probe_display)
return;
gen11_display_irq_reset(xe);
@@ -271,7 +271,7 @@ void xe_display_irq_reset(struct xe_device *xe)
void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
{
- if (!xe->info.enable_display)
+ if (!xe->info.probe_display)
return;
if (gt->info.id == XE_GT0)
@@ -308,11 +308,23 @@ static void xe_display_flush_cleanup_work(struct xe_device *xe)
}
}
+/* TODO: System and runtime suspend/resume sequences will be sanitized as a follow-up. */
+void xe_display_pm_runtime_suspend(struct xe_device *xe)
+{
+ if (!xe->info.probe_display)
+ return;
+
+ if (xe->d3cold.allowed)
+ xe_display_pm_suspend(xe, true);
+
+ intel_hpd_poll_enable(xe);
+}
+
void xe_display_pm_suspend(struct xe_device *xe, bool runtime)
{
struct intel_display *display = &xe->display;
bool s2idle = suspend_to_idle();
- if (!xe->info.enable_display)
+ if (!xe->info.probe_display)
return;
/*
@@ -320,11 +332,14 @@ void xe_display_pm_suspend(struct xe_device *xe, bool runtime)
* properly.
*/
intel_power_domains_disable(xe);
- if (has_display(xe))
+ intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true);
+ if (!runtime && has_display(xe)) {
drm_kms_helper_poll_disable(&xe->drm);
-
- if (!runtime)
+ intel_display_driver_disable_user_access(xe);
intel_display_driver_suspend(xe);
+ }
+
+ xe_display_flush_cleanup_work(xe);
xe_display_flush_cleanup_work(xe);
@@ -332,19 +347,20 @@ void xe_display_pm_suspend(struct xe_device *xe, bool runtime)
intel_hpd_cancel_work(xe);
+ if (!runtime && has_display(xe))
+ intel_display_driver_suspend_access(xe);
+
intel_encoder_suspend_all(&xe->display);
intel_opregion_suspend(display, s2idle ? PCI_D1 : PCI_D3cold);
- intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true);
-
intel_dmc_suspend(xe);
}
void xe_display_pm_suspend_late(struct xe_device *xe)
{
bool s2idle = suspend_to_idle();
- if (!xe->info.enable_display)
+ if (!xe->info.probe_display)
return;
intel_power_domains_suspend(xe, s2idle);
@@ -352,9 +368,20 @@ void xe_display_pm_suspend_late(struct xe_device *xe)
intel_display_power_suspend_late(xe);
}
+void xe_display_pm_runtime_resume(struct xe_device *xe)
+{
+ if (!xe->info.probe_display)
+ return;
+
+ intel_hpd_poll_disable(xe);
+
+ if (xe->d3cold.allowed)
+ xe_display_pm_resume(xe, true);
+}
+
void xe_display_pm_resume_early(struct xe_device *xe)
{
- if (!xe->info.enable_display)
+ if (!xe->info.probe_display)
return;
intel_display_power_resume_early(xe);
@@ -366,7 +393,7 @@ void xe_display_pm_resume(struct xe_device *xe, bool runtime)
{
struct intel_display *display = &xe->display;
- if (!xe->info.enable_display)
+ if (!xe->info.probe_display)
return;
intel_dmc_resume(xe);
@@ -377,14 +404,17 @@ void xe_display_pm_resume(struct xe_device *xe, bool runtime)
intel_display_driver_init_hw(xe);
intel_hpd_init(xe);
+ if (!runtime && has_display(xe))
+ intel_display_driver_resume_access(xe);
+
/* MST sideband requires HPD interrupts enabled */
intel_dp_mst_resume(xe);
- if (!runtime)
+ if (!runtime && has_display(xe)) {
intel_display_driver_resume(xe);
-
- intel_hpd_poll_disable(xe);
- if (has_display(xe))
drm_kms_helper_poll_enable(&xe->drm);
+ intel_display_driver_enable_user_access(xe);
+ intel_hpd_poll_disable(xe);
+ }
intel_opregion_resume(display);
@@ -404,7 +434,7 @@ int xe_display_probe(struct xe_device *xe)
{
int err;
- if (!xe->info.enable_display)
+ if (!xe->info.probe_display)
goto no_display;
intel_display_device_probe(xe);
@@ -417,7 +447,7 @@ int xe_display_probe(struct xe_device *xe)
return 0;
no_display:
- xe->info.enable_display = false;
+ xe->info.probe_display = false;
unset_display_features(xe);
return 0;
}
diff --git a/drivers/gpu/drm/xe/display/xe_display.h b/drivers/gpu/drm/xe/display/xe_display.h
index 000fb5799df5..53d727fd792b 100644
--- a/drivers/gpu/drm/xe/display/xe_display.h
+++ b/drivers/gpu/drm/xe/display/xe_display.h
@@ -38,6 +38,8 @@ void xe_display_pm_suspend(struct xe_device *xe, bool runtime);
void xe_display_pm_suspend_late(struct xe_device *xe);
void xe_display_pm_resume_early(struct xe_device *xe);
void xe_display_pm_resume(struct xe_device *xe, bool runtime);
+void xe_display_pm_runtime_suspend(struct xe_device *xe);
+void xe_display_pm_runtime_resume(struct xe_device *xe);
#else
@@ -67,6 +69,8 @@ static inline void xe_display_pm_suspend(struct xe_device *xe, bool runtime) {}
static inline void xe_display_pm_suspend_late(struct xe_device *xe) {}
static inline void xe_display_pm_resume_early(struct xe_device *xe) {}
static inline void xe_display_pm_resume(struct xe_device *xe, bool runtime) {}
+static inline void xe_display_pm_runtime_suspend(struct xe_device *xe) {}
+static inline void xe_display_pm_runtime_resume(struct xe_device *xe) {}
#endif /* CONFIG_DRM_XE_DISPLAY */
#endif /* _XE_DISPLAY_H_ */
diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c
index d7db44e79eaf..d650c5ac41a4 100644
--- a/drivers/gpu/drm/xe/display/xe_fb_pin.c
+++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c
@@ -204,21 +204,28 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb,
if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K)
align = max_t(u32, align, SZ_64K);
- if (bo->ggtt_node.size && view->type == I915_GTT_VIEW_NORMAL) {
+ if (bo->ggtt_node && view->type == I915_GTT_VIEW_NORMAL) {
vma->node = bo->ggtt_node;
} else if (view->type == I915_GTT_VIEW_NORMAL) {
u32 x, size = bo->ttm.base.size;
- ret = xe_ggtt_insert_special_node_locked(ggtt, &vma->node, size,
- align, 0);
- if (ret)
+ vma->node = xe_ggtt_node_init(ggtt);
+ if (IS_ERR(vma->node)) {
+ ret = PTR_ERR(vma->node);
goto out_unlock;
+ }
+
+ ret = xe_ggtt_node_insert_locked(vma->node, size, align, 0);
+ if (ret) {
+ xe_ggtt_node_fini(vma->node);
+ goto out_unlock;
+ }
for (x = 0; x < size; x += XE_PAGE_SIZE) {
u64 pte = ggtt->pt_ops->pte_encode_bo(bo, x,
xe->pat.idx[XE_CACHE_NONE]);
- ggtt->pt_ops->ggtt_set_pte(ggtt, vma->node.start + x, pte);
+ ggtt->pt_ops->ggtt_set_pte(ggtt, vma->node->base.start + x, pte);
}
} else {
u32 i, ggtt_ofs;
@@ -227,12 +234,19 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb,
/* display seems to use tiles instead of bytes here, so convert it back.. */
u32 size = intel_rotation_info_size(rot_info) * XE_PAGE_SIZE;
- ret = xe_ggtt_insert_special_node_locked(ggtt, &vma->node, size,
- align, 0);
- if (ret)
+ vma->node = xe_ggtt_node_init(ggtt);
+ if (IS_ERR(vma->node)) {
+ ret = PTR_ERR(vma->node);
+ goto out_unlock;
+ }
+
+ ret = xe_ggtt_node_insert_locked(vma->node, size, align, 0);
+ if (ret) {
+ xe_ggtt_node_fini(vma->node);
goto out_unlock;
+ }
- ggtt_ofs = vma->node.start;
+ ggtt_ofs = vma->node->base.start;
for (i = 0; i < ARRAY_SIZE(rot_info->plane); i++)
write_ggtt_rotated(bo, ggtt, &ggtt_ofs,
@@ -320,14 +334,11 @@ err:
static void __xe_unpin_fb_vma(struct i915_vma *vma)
{
- struct xe_device *xe = to_xe_device(vma->bo->ttm.base.dev);
- struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt;
-
if (vma->dpt)
xe_bo_unpin_map_no_vm(vma->dpt);
- else if (!drm_mm_node_allocated(&vma->bo->ggtt_node) ||
- vma->bo->ggtt_node.start != vma->node.start)
- xe_ggtt_remove_node(ggtt, &vma->node, false);
+ else if (!xe_ggtt_node_allocated(vma->bo->ggtt_node) ||
+ vma->bo->ggtt_node->base.start != vma->node->base.start)
+ xe_ggtt_node_remove(vma->node, false);
ttm_bo_reserve(&vma->bo->ttm, false, false, NULL);
ttm_bo_unpin(&vma->bo->ttm);
@@ -377,8 +388,8 @@ void intel_plane_unpin_fb(struct intel_plane_state *old_plane_state)
}
/*
- * For Xe introduce dummy intel_dpt_create which just return NULL and
- * intel_dpt_destroy which does nothing.
+ * For Xe introduce dummy intel_dpt_create which just return NULL,
+ * intel_dpt_destroy which does nothing, and fake intel_dpt_ofsset returning 0;
*/
struct i915_address_space *intel_dpt_create(struct intel_framebuffer *fb)
{
@@ -389,3 +400,8 @@ void intel_dpt_destroy(struct i915_address_space *vm)
{
return;
}
+
+u64 intel_dpt_offset(struct i915_vma *dpt_vma)
+{
+ return 0;
+}
diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index c38db2a74614..81b71903675e 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -104,6 +104,7 @@
#define CSFE_CHICKEN1(base) XE_REG((base) + 0xd4, XE_REG_OPTION_MASKED)
#define GHWSP_CSB_REPORT_DIS REG_BIT(15)
#define PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS REG_BIT(14)
+#define CS_PRIORITY_MEM_READ REG_BIT(7)
#define FF_SLICE_CS_CHICKEN1(base) XE_REG((base) + 0xe0, XE_REG_OPTION_MASKED)
#define FFSC_PERCTX_PREEMPT_CTRL REG_BIT(14)
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 8cd4a9589410..0d1a4a9f4e11 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -80,7 +80,10 @@
#define LE_CACHEABILITY_MASK REG_GENMASK(1, 0)
#define LE_CACHEABILITY(value) REG_FIELD_PREP(LE_CACHEABILITY_MASK, value)
-#define XE2_GAMREQSTRM_CTRL XE_REG(0x4194)
+#define STATELESS_COMPRESSION_CTRL XE_REG_MCR(0x4148)
+#define UNIFIED_COMPRESSION_FORMAT REG_GENMASK(3, 0)
+
+#define XE2_GAMREQSTRM_CTRL XE_REG_MCR(0x4194)
#define CG_DIS_CNTLBUS REG_BIT(6)
#define CCS_AUX_INV XE_REG(0x4208)
@@ -193,6 +196,7 @@
#define GSCPSMI_BASE XE_REG(0x880c)
#define CCCHKNREG1 XE_REG_MCR(0x8828)
+#define L3CMPCTRL REG_BIT(23)
#define ENCOMPPERFFIX REG_BIT(18)
/* Fuse readout registers for GT */
@@ -367,6 +371,9 @@
#define XEHP_L3NODEARBCFG XE_REG_MCR(0xb0b4)
#define XEHP_LNESPARE REG_BIT(19)
+#define L3SQCREG2 XE_REG_MCR(0xb104)
+#define COMPMEMRD256BOVRFETCHEN REG_BIT(20)
+
#define L3SQCREG3 XE_REG_MCR(0xb108)
#define COMPPWOVERFETCHEN REG_BIT(28)
diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index 1768483da1b7..8dac069483e8 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -36,7 +36,8 @@ static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo,
/* Optionally clear bo *and* CCS data in VRAM. */
if (clear) {
- fence = xe_migrate_clear(tile->migrate, bo, bo->ttm.resource);
+ fence = xe_migrate_clear(tile->migrate, bo, bo->ttm.resource,
+ XE_MIGRATE_CLEAR_FLAG_FULL);
if (IS_ERR(fence)) {
KUNIT_FAIL(test, "Failed to submit bo clear.\n");
return PTR_ERR(fence);
@@ -124,7 +125,7 @@ static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile,
kunit_info(test, "Testing system memory\n");
bo = xe_bo_create_user(xe, NULL, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC,
- ttm_bo_type_device, bo_flags);
+ bo_flags);
if (IS_ERR(bo)) {
KUNIT_FAIL(test, "Failed to create bo.\n");
return;
@@ -205,7 +206,6 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc
xe_vm_lock(vm, false);
bo = xe_bo_create_user(xe, NULL, vm, 0x10000,
DRM_XE_GEM_CPU_CACHING_WC,
- ttm_bo_type_device,
bo_flags);
xe_vm_unlock(vm);
if (IS_ERR(bo)) {
@@ -215,7 +215,7 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc
external = xe_bo_create_user(xe, NULL, NULL, 0x10000,
DRM_XE_GEM_CPU_CACHING_WC,
- ttm_bo_type_device, bo_flags);
+ bo_flags);
if (IS_ERR(external)) {
KUNIT_FAIL(test, "external bo create err=%pe\n", external);
goto cleanup_bo;
diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
index c24c8509227e..13db6c0530b3 100644
--- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
@@ -126,7 +126,7 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe)
kunit_info(test, "running %s\n", __func__);
bo = xe_bo_create_user(xe, NULL, NULL, size, DRM_XE_GEM_CPU_CACHING_WC,
- ttm_bo_type_device, params->mem_mask);
+ params->mem_mask);
if (IS_ERR(bo)) {
KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
PTR_ERR(bo));
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index 4344a1724029..1a192a2a941b 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -105,7 +105,8 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo,
}
xe_map_memset(xe, &remote->vmap, 0, 0xd0, remote->size);
- fence = xe_migrate_clear(m, remote, remote->ttm.resource);
+ fence = xe_migrate_clear(m, remote, remote->ttm.resource,
+ XE_MIGRATE_CLEAR_FLAG_FULL);
if (!sanity_fence_failed(xe, fence, big ? "Clearing remote big bo" :
"Clearing remote small bo", test)) {
retval = xe_map_rd(xe, &remote->vmap, 0, u64);
@@ -279,7 +280,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
kunit_info(test, "Clearing small buffer object\n");
xe_map_memset(xe, &tiny->vmap, 0, 0x22, tiny->size);
expected = 0;
- fence = xe_migrate_clear(m, tiny, tiny->ttm.resource);
+ fence = xe_migrate_clear(m, tiny, tiny->ttm.resource,
+ XE_MIGRATE_CLEAR_FLAG_FULL);
if (sanity_fence_failed(xe, fence, "Clearing small bo", test))
goto out;
@@ -300,7 +302,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
kunit_info(test, "Clearing big buffer object\n");
xe_map_memset(xe, &big->vmap, 0, 0x11, big->size);
expected = 0;
- fence = xe_migrate_clear(m, big, big->ttm.resource);
+ fence = xe_migrate_clear(m, big, big->ttm.resource,
+ XE_MIGRATE_CLEAR_FLAG_FULL);
if (sanity_fence_failed(xe, fence, "Clearing big bo", test))
goto out;
@@ -603,7 +606,8 @@ static void test_clear(struct xe_device *xe, struct xe_tile *tile,
kunit_info(test, "Clear vram buffer object\n");
expected = 0x0000000000000000;
- fence = xe_migrate_clear(tile->migrate, vram_bo, vram_bo->ttm.resource);
+ fence = xe_migrate_clear(tile->migrate, vram_bo, vram_bo->ttm.resource,
+ XE_MIGRATE_CLEAR_FLAG_FULL);
if (sanity_fence_failed(xe, fence, "Clear vram_bo", test))
return;
dma_fence_put(fence);
@@ -637,7 +641,7 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til
long ret;
sys_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M,
- DRM_XE_GEM_CPU_CACHING_WC, ttm_bo_type_device,
+ DRM_XE_GEM_CPU_CACHING_WC,
XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS);
if (IS_ERR(sys_bo)) {
@@ -660,8 +664,9 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til
}
xe_bo_unlock(sys_bo);
- ccs_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, DRM_XE_GEM_CPU_CACHING_WC,
- ttm_bo_type_device, bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS);
+ ccs_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M,
+ DRM_XE_GEM_CPU_CACHING_WC,
+ bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS);
if (IS_ERR(ccs_bo)) {
KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
@@ -683,8 +688,9 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til
}
xe_bo_unlock(ccs_bo);
- vram_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, DRM_XE_GEM_CPU_CACHING_WC,
- ttm_bo_type_device, bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS);
+ vram_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M,
+ DRM_XE_GEM_CPU_CACHING_WC,
+ bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS);
if (IS_ERR(vram_bo)) {
KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
PTR_ERR(vram_bo));
diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c
index 577ee7d14381..67404863087e 100644
--- a/drivers/gpu/drm/xe/tests/xe_pci.c
+++ b/drivers/gpu/drm/xe/tests/xe_pci.c
@@ -12,58 +12,6 @@
#include <kunit/test-bug.h>
#include <kunit/visibility.h>
-struct kunit_test_data {
- int ndevs;
- xe_device_fn xe_fn;
-};
-
-static int dev_to_xe_device_fn(struct device *dev, void *__data)
-
-{
- struct drm_device *drm = dev_get_drvdata(dev);
- struct kunit_test_data *data = __data;
- int ret = 0;
- int idx;
-
- data->ndevs++;
-
- if (drm_dev_enter(drm, &idx))
- ret = data->xe_fn(to_xe_device(dev_get_drvdata(dev)));
- drm_dev_exit(idx);
-
- return ret;
-}
-
-/**
- * xe_call_for_each_device - Iterate over all devices this driver binds to
- * @xe_fn: Function to call for each device.
- *
- * This function iterated over all devices this driver binds to, and calls
- * @xe_fn: for each one of them. If the called function returns anything else
- * than 0, iteration is stopped and the return value is returned by this
- * function. Across each function call, drm_dev_enter() / drm_dev_exit() is
- * called for the corresponding drm device.
- *
- * Return: Number of devices iterated or
- * the error code of a call to @xe_fn returning an error code.
- */
-int xe_call_for_each_device(xe_device_fn xe_fn)
-{
- int ret;
- struct kunit_test_data data = {
- .xe_fn = xe_fn,
- .ndevs = 0,
- };
-
- ret = driver_for_each_device(&xe_pci_driver.driver, NULL,
- &data, dev_to_xe_device_fn);
-
- if (!data.ndevs)
- kunit_skip(current->kunit_test, "test runs only on hardware\n");
-
- return ret ?: data.ndevs;
-}
-
/**
* xe_call_for_each_graphics_ip - Iterate over all recognized graphics IPs
* @xe_fn: Function to call for each device.
diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.h b/drivers/gpu/drm/xe/tests/xe_pci_test.h
index 3e2558bc3c90..ede46800aff1 100644
--- a/drivers/gpu/drm/xe/tests/xe_pci_test.h
+++ b/drivers/gpu/drm/xe/tests/xe_pci_test.h
@@ -19,7 +19,6 @@ typedef int (*xe_device_fn)(struct xe_device *);
typedef void (*xe_graphics_fn)(const struct xe_graphics_desc *);
typedef void (*xe_media_fn)(const struct xe_media_desc *);
-int xe_call_for_each_device(xe_device_fn xe_fn);
void xe_call_for_each_graphics_ip(xe_graphics_fn xe_fn);
void xe_call_for_each_media_ip(xe_media_fn xe_fn);
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 45652d7e6fa6..e3d68710a91a 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -793,8 +793,16 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
}
}
} else {
- if (move_lacks_source)
- fence = xe_migrate_clear(migrate, bo, new_mem);
+ if (move_lacks_source) {
+ u32 flags = 0;
+
+ if (mem_type_is_vram(new_mem->mem_type))
+ flags |= XE_MIGRATE_CLEAR_FLAG_FULL;
+ else if (handle_system_ccs)
+ flags |= XE_MIGRATE_CLEAR_FLAG_CCS_DATA;
+
+ fence = xe_migrate_clear(migrate, bo, new_mem, flags);
+ }
else
fence = xe_migrate_copy(migrate, bo, bo, old_mem,
new_mem, handle_system_ccs);
@@ -1090,7 +1098,7 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
xe_assert(xe, list_empty(&ttm_bo->base.gpuva.list));
- if (bo->ggtt_node.size)
+ if (bo->ggtt_node && bo->ggtt_node->base.size)
xe_ggtt_remove_bo(bo->tile->mem.ggtt, bo);
#ifdef CONFIG_PROC_FS
@@ -1491,11 +1499,10 @@ struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile,
struct xe_vm *vm, size_t size,
u16 cpu_caching,
- enum ttm_bo_type type,
u32 flags)
{
struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL,
- cpu_caching, type,
+ cpu_caching, ttm_bo_type_device,
flags | XE_BO_FLAG_USER);
if (!IS_ERR(bo))
xe_bo_unlock_vm_held(bo);
@@ -2019,7 +2026,7 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
}
bo = xe_bo_create_user(xe, NULL, vm, args->size, args->cpu_caching,
- ttm_bo_type_device, bo_flags);
+ bo_flags);
if (vm)
xe_vm_unlock(vm);
@@ -2325,7 +2332,6 @@ int xe_bo_dumb_create(struct drm_file *file_priv,
bo = xe_bo_create_user(xe, NULL, NULL, args->size,
DRM_XE_GEM_CPU_CACHING_WC,
- ttm_bo_type_device,
XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
XE_BO_FLAG_SCANOUT |
XE_BO_FLAG_NEEDS_CPU_ACCESS);
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 1c9dc8adaaa3..dbfb3209615d 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -87,7 +87,6 @@ struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile,
struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile,
struct xe_vm *vm, size_t size,
u16 cpu_caching,
- enum ttm_bo_type type,
u32 flags);
struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
struct xe_vm *vm, size_t size,
@@ -195,9 +194,12 @@ xe_bo_main_addr(struct xe_bo *bo, size_t page_size)
static inline u32
xe_bo_ggtt_addr(struct xe_bo *bo)
{
- XE_WARN_ON(bo->ggtt_node.size > bo->size);
- XE_WARN_ON(bo->ggtt_node.start + bo->ggtt_node.size > (1ull << 32));
- return bo->ggtt_node.start;
+ if (XE_WARN_ON(!bo->ggtt_node))
+ return 0;
+
+ XE_WARN_ON(bo->ggtt_node->base.size > bo->size);
+ XE_WARN_ON(bo->ggtt_node->base.start + bo->ggtt_node->base.size > (1ull << 32));
+ return bo->ggtt_node->base.start;
}
int xe_bo_vmap(struct xe_bo *bo);
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
index ebc8abf7930a..2ed558ac2264 100644
--- a/drivers/gpu/drm/xe/xe_bo_types.h
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -8,12 +8,13 @@
#include <linux/iosys-map.h>
-#include <drm/drm_mm.h>
#include <drm/ttm/ttm_bo.h>
#include <drm/ttm/ttm_device.h>
#include <drm/ttm/ttm_execbuf_util.h>
#include <drm/ttm/ttm_placement.h>
+#include "xe_ggtt_types.h"
+
struct xe_device;
struct xe_vm;
@@ -39,7 +40,7 @@ struct xe_bo {
/** @placement: current placement for this BO */
struct ttm_placement placement;
/** @ggtt_node: GGTT node if this BO is mapped in the GGTT */
- struct drm_mm_node ggtt_node;
+ struct xe_ggtt_node *ggtt_node;
/** @vmap: iosys map of this buffer */
struct iosys_map vmap;
/** @ttm_kmap: TTM bo kmap object for internal use only. Keep off. */
diff --git a/drivers/gpu/drm/xe/xe_debugfs.h b/drivers/gpu/drm/xe/xe_debugfs.h
index 715b8e2e0bd9..17f4c2f1b5e4 100644
--- a/drivers/gpu/drm/xe/xe_debugfs.h
+++ b/drivers/gpu/drm/xe/xe_debugfs.h
@@ -8,6 +8,10 @@
struct xe_device;
+#ifdef CONFIG_DEBUG_FS
void xe_debugfs_register(struct xe_device *xe);
+#else
+static inline void xe_debugfs_register(struct xe_device *xe) { }
+#endif
#endif
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
index d8d8ca2c19d3..bdb76e834e4c 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.c
+++ b/drivers/gpu/drm/xe/xe_devcoredump.c
@@ -66,22 +66,9 @@ static struct xe_guc *exec_queue_to_guc(struct xe_exec_queue *q)
return &q->gt->uc.guc;
}
-static void xe_devcoredump_deferred_snap_work(struct work_struct *work)
-{
- struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work);
-
- /* keep going if fw fails as we still want to save the memory and SW data */
- if (xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL))
- xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n");
- xe_vm_snapshot_capture_delayed(ss->vm);
- xe_guc_exec_queue_snapshot_capture_delayed(ss->ge);
- xe_force_wake_put(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL);
-}
-
-static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
- size_t count, void *data, size_t datalen)
+static ssize_t __xe_devcoredump_read(char *buffer, size_t count,
+ struct xe_devcoredump *coredump)
{
- struct xe_devcoredump *coredump = data;
struct xe_device *xe;
struct xe_devcoredump_snapshot *ss;
struct drm_printer p;
@@ -89,18 +76,11 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
struct timespec64 ts;
int i;
- if (!coredump)
- return -ENODEV;
-
xe = coredump_to_xe(coredump);
ss = &coredump->snapshot;
- /* Ensure delayed work is captured before continuing */
- flush_work(&ss->work);
-
iter.data = buffer;
- iter.offset = 0;
- iter.start = offset;
+ iter.start = 0;
iter.remain = count;
p = drm_coredump_printer(&iter);
@@ -134,10 +114,83 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
return count - iter.remain;
}
+static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss)
+{
+ int i;
+
+ xe_guc_ct_snapshot_free(ss->ct);
+ ss->ct = NULL;
+
+ xe_guc_exec_queue_snapshot_free(ss->ge);
+ ss->ge = NULL;
+
+ xe_sched_job_snapshot_free(ss->job);
+ ss->job = NULL;
+
+ for (i = 0; i < XE_NUM_HW_ENGINES; i++)
+ if (ss->hwe[i]) {
+ xe_hw_engine_snapshot_free(ss->hwe[i]);
+ ss->hwe[i] = NULL;
+ }
+
+ xe_vm_snapshot_free(ss->vm);
+ ss->vm = NULL;
+}
+
+static void xe_devcoredump_deferred_snap_work(struct work_struct *work)
+{
+ struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work);
+ struct xe_devcoredump *coredump = container_of(ss, typeof(*coredump), snapshot);
+
+ /* keep going if fw fails as we still want to save the memory and SW data */
+ if (xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL))
+ xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n");
+ xe_vm_snapshot_capture_delayed(ss->vm);
+ xe_guc_exec_queue_snapshot_capture_delayed(ss->ge);
+ xe_force_wake_put(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL);
+
+ /* Calculate devcoredump size */
+ ss->read.size = __xe_devcoredump_read(NULL, INT_MAX, coredump);
+
+ ss->read.buffer = kvmalloc(ss->read.size, GFP_USER);
+ if (!ss->read.buffer)
+ return;
+
+ __xe_devcoredump_read(ss->read.buffer, ss->read.size, coredump);
+ xe_devcoredump_snapshot_free(ss);
+}
+
+static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
+ size_t count, void *data, size_t datalen)
+{
+ struct xe_devcoredump *coredump = data;
+ struct xe_devcoredump_snapshot *ss;
+ ssize_t byte_copied;
+
+ if (!coredump)
+ return -ENODEV;
+
+ ss = &coredump->snapshot;
+
+ /* Ensure delayed work is captured before continuing */
+ flush_work(&ss->work);
+
+ if (!ss->read.buffer)
+ return -ENODEV;
+
+ if (offset >= ss->read.size)
+ return 0;
+
+ byte_copied = count < ss->read.size - offset ? count :
+ ss->read.size - offset;
+ memcpy(buffer, ss->read.buffer + offset, byte_copied);
+
+ return byte_copied;
+}
+
static void xe_devcoredump_free(void *data)
{
struct xe_devcoredump *coredump = data;
- int i;
/* Our device is gone. Nothing to do... */
if (!data || !coredump_to_xe(coredump))
@@ -145,13 +198,8 @@ static void xe_devcoredump_free(void *data)
cancel_work_sync(&coredump->snapshot.work);
- xe_guc_ct_snapshot_free(coredump->snapshot.ct);
- xe_guc_exec_queue_snapshot_free(coredump->snapshot.ge);
- xe_sched_job_snapshot_free(coredump->snapshot.job);
- for (i = 0; i < XE_NUM_HW_ENGINES; i++)
- if (coredump->snapshot.hwe[i])
- xe_hw_engine_snapshot_free(coredump->snapshot.hwe[i]);
- xe_vm_snapshot_free(coredump->snapshot.vm);
+ xe_devcoredump_snapshot_free(&coredump->snapshot);
+ kvfree(coredump->snapshot.read.buffer);
/* To prevent stale data on next snapshot, clear everything */
memset(&coredump->snapshot, 0, sizeof(coredump->snapshot));
@@ -260,4 +308,5 @@ int xe_devcoredump_init(struct xe_device *xe)
{
return devm_add_action_or_reset(xe->drm.dev, xe_driver_devcoredump_fini, &xe->drm);
}
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h
index 923cdf72a816..440d05d77a5a 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump_types.h
+++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h
@@ -46,6 +46,14 @@ struct xe_devcoredump_snapshot {
struct xe_sched_job_snapshot *job;
/** @vm: Snapshot of VM state */
struct xe_vm_snapshot *vm;
+
+ /** @read: devcoredump in human readable format */
+ struct {
+ /** @read.size: size of devcoredump in human readable format */
+ ssize_t size;
+ /** @read.buffer: buffer of devcoredump in human readable format */
+ char *buffer;
+ } read;
};
/**
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 1aba6f9eaa19..b6db7e082d88 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -37,6 +37,7 @@
#include "xe_gt_printk.h"
#include "xe_gt_sriov_vf.h"
#include "xe_guc.h"
+#include "xe_hw_engine_group.h"
#include "xe_hwmon.h"
#include "xe_irq.h"
#include "xe_memirq.h"
@@ -165,6 +166,8 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file)
* vm->lock taken during xe_exec_queue_kill().
*/
xa_for_each(&xef->exec_queue.xa, idx, q) {
+ if (q->vm && q->hwe->hw_engine_group)
+ xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q);
xe_exec_queue_kill(q);
xe_exec_queue_put(q);
}
@@ -543,7 +546,7 @@ static void update_device_info(struct xe_device *xe)
{
/* disable features that are not available/applicable to VFs */
if (IS_SRIOV_VF(xe)) {
- xe->info.enable_display = 0;
+ xe->info.probe_display = 0;
xe->info.has_heci_gscfi = 0;
xe->info.skip_guc_pc = 1;
xe->info.skip_pcode = 1;
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index db6cc8d0d6b8..f052c06a2d2f 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -15,6 +15,11 @@ static inline struct xe_device *to_xe_device(const struct drm_device *dev)
return container_of(dev, struct xe_device, drm);
}
+static inline struct xe_device *kdev_to_xe_device(struct device *kdev)
+{
+ return dev_get_drvdata(kdev);
+}
+
static inline struct xe_device *pdev_to_xe_device(struct pci_dev *pdev)
{
return pci_get_drvdata(pdev);
@@ -134,16 +139,6 @@ static inline struct xe_force_wake *gt_to_fw(struct xe_gt *gt)
void xe_device_assert_mem_access(struct xe_device *xe);
-static inline bool xe_device_in_fault_mode(struct xe_device *xe)
-{
- return xe->usm.num_vm_in_fault_mode != 0;
-}
-
-static inline bool xe_device_in_non_fault_mode(struct xe_device *xe)
-{
- return xe->usm.num_vm_in_non_fault_mode != 0;
-}
-
static inline bool xe_device_has_flat_ccs(struct xe_device *xe)
{
return xe->info.has_flat_ccs;
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 5b7292a9a66d..856db4020048 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -204,7 +204,7 @@ struct xe_tile {
struct xe_memirq memirq;
/** @sriov.vf.ggtt_balloon: GGTT regions excluded from use. */
- struct drm_mm_node ggtt_balloon[2];
+ struct xe_ggtt_node *ggtt_balloon[2];
} vf;
} sriov;
@@ -282,8 +282,15 @@ struct xe_device {
u8 has_sriov:1;
/** @info.has_usm: Device has unified shared memory support */
u8 has_usm:1;
- /** @info.enable_display: display enabled */
- u8 enable_display:1;
+ /**
+ * @info.probe_display: Probe display hardware. If set to
+ * false, the driver will behave as if there is no display
+ * hardware present and will not try to read/write to it in any
+ * way. The display hardware, if it exists, will not be
+ * exposed to userspace and will be left untouched in whatever
+ * state the firmware or bootloader left it in.
+ */
+ u8 probe_display:1;
/** @info.skip_mtcfg: skip Multi-Tile configuration from MTCFG register */
u8 skip_mtcfg:1;
/** @info.skip_pcode: skip access to PCODE uC */
@@ -361,10 +368,6 @@ struct xe_device {
struct xarray asid_to_vm;
/** @usm.next_asid: next ASID, used to cyclical alloc asids */
u32 next_asid;
- /** @usm.num_vm_in_fault_mode: number of VM in fault mode */
- u32 num_vm_in_fault_mode;
- /** @usm.num_vm_in_non_fault_mode: number of VM in non-fault mode */
- u32 num_vm_in_non_fault_mode;
/** @usm.lock: protects UM state */
struct mutex lock;
} usm;
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index f36980aa26e6..484acfbe0e61 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -14,6 +14,7 @@
#include "xe_bo.h"
#include "xe_device.h"
#include "xe_exec_queue.h"
+#include "xe_hw_engine_group.h"
#include "xe_macros.h"
#include "xe_ring_ops_types.h"
#include "xe_sched_job.h"
@@ -124,6 +125,8 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
bool write_locked, skip_retry = false;
ktime_t end = 0;
int err = 0;
+ struct xe_hw_engine_group *group;
+ enum xe_hw_engine_group_execution_mode mode, previous_mode;
if (XE_IOCTL_DBG(xe, args->extensions) ||
XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) ||
@@ -182,6 +185,15 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
}
}
+ group = q->hwe->hw_engine_group;
+ mode = xe_hw_engine_group_find_exec_mode(q);
+
+ if (mode == EXEC_MODE_DMA_FENCE) {
+ err = xe_hw_engine_group_get_mode(group, mode, &previous_mode);
+ if (err)
+ goto err_syncs;
+ }
+
retry:
if (!xe_vm_in_lr_mode(vm) && xe_vm_userptr_check_repin(vm)) {
err = down_write_killable(&vm->lock);
@@ -199,7 +211,7 @@ retry:
downgrade_write(&vm->lock);
write_locked = false;
if (err)
- goto err_unlock_list;
+ goto err_hw_exec_mode;
}
if (!args->num_batch_buffer) {
@@ -312,6 +324,9 @@ retry:
spin_unlock(&xe->ttm.lru_lock);
}
+ if (mode == EXEC_MODE_LR)
+ xe_hw_engine_group_resume_faulting_lr_jobs(group);
+
err_repin:
if (!xe_vm_in_lr_mode(vm))
up_read(&vm->userptr.notifier_lock);
@@ -324,6 +339,9 @@ err_unlock_list:
up_read(&vm->lock);
if (err == -EAGAIN && !skip_retry)
goto retry;
+err_hw_exec_mode:
+ if (mode == EXEC_MODE_DMA_FENCE)
+ xe_hw_engine_group_put(group);
err_syncs:
while (num_syncs--)
xe_sync_entry_cleanup(&syncs[num_syncs]);
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index 017d939659b5..e53937fafd14 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -14,6 +14,7 @@
#include "xe_device.h"
#include "xe_gt.h"
#include "xe_hw_engine_class_sysfs.h"
+#include "xe_hw_engine_group.h"
#include "xe_hw_fence.h"
#include "xe_lrc.h"
#include "xe_macros.h"
@@ -73,6 +74,7 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe,
q->ops = gt->exec_queue_ops;
INIT_LIST_HEAD(&q->lr.link);
INIT_LIST_HEAD(&q->multi_gt_link);
+ INIT_LIST_HEAD(&q->hw_engine_group_link);
q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us;
q->sched_props.preempt_timeout_us =
@@ -166,7 +168,8 @@ err_post_alloc:
struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt,
struct xe_vm *vm,
- enum xe_engine_class class, u32 flags)
+ enum xe_engine_class class,
+ u32 flags, u64 extensions)
{
struct xe_hw_engine *hwe, *hwe0 = NULL;
enum xe_hw_engine_id id;
@@ -186,7 +189,54 @@ struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe
if (!logical_mask)
return ERR_PTR(-ENODEV);
- return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags, 0);
+ return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags, extensions);
+}
+
+/**
+ * xe_exec_queue_create_bind() - Create bind exec queue.
+ * @xe: Xe device.
+ * @tile: tile which bind exec queue belongs to.
+ * @flags: exec queue creation flags
+ * @extensions: exec queue creation extensions
+ *
+ * Normalize bind exec queue creation. Bind exec queue is tied to migration VM
+ * for access to physical memory required for page table programming. On a
+ * faulting devices the reserved copy engine instance must be used to avoid
+ * deadlocking (user binds cannot get stuck behind faults as kernel binds which
+ * resolve faults depend on user binds). On non-faulting devices any copy engine
+ * can be used.
+ *
+ * Returns exec queue on success, ERR_PTR on failure
+ */
+struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe,
+ struct xe_tile *tile,
+ u32 flags, u64 extensions)
+{
+ struct xe_gt *gt = tile->primary_gt;
+ struct xe_exec_queue *q;
+ struct xe_vm *migrate_vm;
+
+ migrate_vm = xe_migrate_get_vm(tile->migrate);
+ if (xe->info.has_usm) {
+ struct xe_hw_engine *hwe = xe_gt_hw_engine(gt,
+ XE_ENGINE_CLASS_COPY,
+ gt->usm.reserved_bcs_instance,
+ false);
+
+ if (!hwe)
+ return ERR_PTR(-EINVAL);
+
+ q = xe_exec_queue_create(xe, migrate_vm,
+ BIT(hwe->logical_instance), 1, hwe,
+ flags, extensions);
+ } else {
+ q = xe_exec_queue_create_class(xe, gt, migrate_vm,
+ XE_ENGINE_CLASS_COPY, flags,
+ extensions);
+ }
+ xe_vm_put(migrate_vm);
+
+ return q;
}
void xe_exec_queue_destroy(struct kref *ref)
@@ -418,63 +468,6 @@ static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue
return 0;
}
-static const enum xe_engine_class user_to_xe_engine_class[] = {
- [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER,
- [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY,
- [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE,
- [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE,
- [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE,
-};
-
-static struct xe_hw_engine *
-find_hw_engine(struct xe_device *xe,
- struct drm_xe_engine_class_instance eci)
-{
- u32 idx;
-
- if (eci.engine_class >= ARRAY_SIZE(user_to_xe_engine_class))
- return NULL;
-
- if (eci.gt_id >= xe->info.gt_count)
- return NULL;
-
- idx = array_index_nospec(eci.engine_class,
- ARRAY_SIZE(user_to_xe_engine_class));
-
- return xe_gt_hw_engine(xe_device_get_gt(xe, eci.gt_id),
- user_to_xe_engine_class[idx],
- eci.engine_instance, true);
-}
-
-static u32 bind_exec_queue_logical_mask(struct xe_device *xe, struct xe_gt *gt,
- struct drm_xe_engine_class_instance *eci,
- u16 width, u16 num_placements)
-{
- struct xe_hw_engine *hwe;
- enum xe_hw_engine_id id;
- u32 logical_mask = 0;
-
- if (XE_IOCTL_DBG(xe, width != 1))
- return 0;
- if (XE_IOCTL_DBG(xe, num_placements != 1))
- return 0;
- if (XE_IOCTL_DBG(xe, eci[0].engine_instance != 0))
- return 0;
-
- eci[0].engine_class = DRM_XE_ENGINE_CLASS_COPY;
-
- for_each_hw_engine(hwe, gt, id) {
- if (xe_hw_engine_is_reserved(hwe))
- continue;
-
- if (hwe->class ==
- user_to_xe_engine_class[DRM_XE_ENGINE_CLASS_COPY])
- logical_mask |= BIT(hwe->logical_instance);
- }
-
- return logical_mask;
-}
-
static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt,
struct drm_xe_engine_class_instance *eci,
u16 width, u16 num_placements)
@@ -497,7 +490,7 @@ static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt,
n = j * width + i;
- hwe = find_hw_engine(xe, eci[n]);
+ hwe = xe_hw_engine_lookup(xe, eci[n]);
if (XE_IOCTL_DBG(xe, !hwe))
return 0;
@@ -536,8 +529,9 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
struct drm_xe_engine_class_instance __user *user_eci =
u64_to_user_ptr(args->instances);
struct xe_hw_engine *hwe;
- struct xe_vm *vm, *migrate_vm;
+ struct xe_vm *vm;
struct xe_gt *gt;
+ struct xe_tile *tile;
struct xe_exec_queue *q = NULL;
u32 logical_mask;
u32 id;
@@ -562,37 +556,20 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
return -EINVAL;
if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) {
- for_each_gt(gt, xe, id) {
- struct xe_exec_queue *new;
- u32 flags;
-
- if (xe_gt_is_media_type(gt))
- continue;
-
- eci[0].gt_id = gt->info.id;
- logical_mask = bind_exec_queue_logical_mask(xe, gt, eci,
- args->width,
- args->num_placements);
- if (XE_IOCTL_DBG(xe, !logical_mask))
- return -EINVAL;
-
- hwe = find_hw_engine(xe, eci[0]);
- if (XE_IOCTL_DBG(xe, !hwe))
- return -EINVAL;
-
- /* The migration vm doesn't hold rpm ref */
- xe_pm_runtime_get_noresume(xe);
-
- flags = EXEC_QUEUE_FLAG_VM | (id ? EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD : 0);
+ if (XE_IOCTL_DBG(xe, args->width != 1) ||
+ XE_IOCTL_DBG(xe, args->num_placements != 1) ||
+ XE_IOCTL_DBG(xe, eci[0].engine_instance != 0))
+ return -EINVAL;
- migrate_vm = xe_migrate_get_vm(gt_to_tile(gt)->migrate);
- new = xe_exec_queue_create(xe, migrate_vm, logical_mask,
- args->width, hwe, flags,
- args->extensions);
+ for_each_tile(tile, xe, id) {
+ struct xe_exec_queue *new;
+ u32 flags = EXEC_QUEUE_FLAG_VM;
- xe_pm_runtime_put(xe); /* now held by engine */
+ if (id)
+ flags |= EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD;
- xe_vm_put(migrate_vm);
+ new = xe_exec_queue_create_bind(xe, tile, flags,
+ args->extensions);
if (IS_ERR(new)) {
err = PTR_ERR(new);
if (q)
@@ -613,7 +590,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
if (XE_IOCTL_DBG(xe, !logical_mask))
return -EINVAL;
- hwe = find_hw_engine(xe, eci[0]);
+ hwe = xe_hw_engine_lookup(xe, eci[0]);
if (XE_IOCTL_DBG(xe, !hwe))
return -EINVAL;
@@ -648,6 +625,12 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
if (XE_IOCTL_DBG(xe, err))
goto put_exec_queue;
}
+
+ if (q->vm && q->hwe->hw_engine_group) {
+ err = xe_hw_engine_group_add_exec_queue(q->hwe->hw_engine_group, q);
+ if (err)
+ goto put_exec_queue;
+ }
}
mutex_lock(&xef->exec_queue.lock);
@@ -798,6 +781,15 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
xef->run_ticks[q->class] += (new_ts - old_ts) * q->width;
}
+/**
+ * xe_exec_queue_kill - permanently stop all execution from an exec queue
+ * @q: The exec queue
+ *
+ * This function permanently stops all activity on an exec queue. If the queue
+ * is actively executing on the HW, it will be kicked off the engine; any
+ * pending jobs are discarded and all future submissions are rejected.
+ * This function is safe to call multiple times.
+ */
void xe_exec_queue_kill(struct xe_exec_queue *q)
{
struct xe_exec_queue *eq = q, *next;
@@ -830,6 +822,9 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data,
if (XE_IOCTL_DBG(xe, !q))
return -ENOENT;
+ if (q->vm && q->hwe->hw_engine_group)
+ xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q);
+
xe_exec_queue_kill(q);
trace_xe_exec_queue_close(q);
@@ -841,10 +836,12 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data,
static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q,
struct xe_vm *vm)
{
- if (q->flags & EXEC_QUEUE_FLAG_VM)
+ if (q->flags & EXEC_QUEUE_FLAG_VM) {
lockdep_assert_held(&vm->lock);
- else
+ } else {
xe_vm_assert_held(vm);
+ lockdep_assert_held(&q->hwe->hw_engine_group->mode_sem);
+ }
}
/**
@@ -856,10 +853,7 @@ void xe_exec_queue_last_fence_put(struct xe_exec_queue *q, struct xe_vm *vm)
{
xe_exec_queue_last_fence_lockdep_assert(q, vm);
- if (q->last_fence) {
- dma_fence_put(q->last_fence);
- q->last_fence = NULL;
- }
+ xe_exec_queue_last_fence_put_unlocked(q);
}
/**
@@ -902,6 +896,33 @@ struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *q,
}
/**
+ * xe_exec_queue_last_fence_get_for_resume() - Get last fence
+ * @q: The exec queue
+ * @vm: The VM the engine does a bind or exec for
+ *
+ * Get last fence, takes a ref. Only safe to be called in the context of
+ * resuming the hw engine group's long-running exec queue, when the group
+ * semaphore is held.
+ *
+ * Returns: last fence if not signaled, dma fence stub if signaled
+ */
+struct dma_fence *xe_exec_queue_last_fence_get_for_resume(struct xe_exec_queue *q,
+ struct xe_vm *vm)
+{
+ struct dma_fence *fence;
+
+ lockdep_assert_held_write(&q->hwe->hw_engine_group->mode_sem);
+
+ if (q->last_fence &&
+ test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags))
+ xe_exec_queue_last_fence_put_unlocked(q);
+
+ fence = q->last_fence ? q->last_fence : dma_fence_get_stub();
+ dma_fence_get(fence);
+ return fence;
+}
+
+/**
* xe_exec_queue_last_fence_set() - Set last fence
* @q: The exec queue
* @vm: The VM the engine does a bind or exec for
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h
index ded77b0f3b90..90c7f73eab88 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue.h
@@ -20,7 +20,11 @@ struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *v
u64 extensions);
struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt,
struct xe_vm *vm,
- enum xe_engine_class class, u32 flags);
+ enum xe_engine_class class,
+ u32 flags, u64 extensions);
+struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe,
+ struct xe_tile *tile,
+ u32 flags, u64 extensions);
void xe_exec_queue_fini(struct xe_exec_queue *q);
void xe_exec_queue_destroy(struct kref *ref);
@@ -73,6 +77,8 @@ void xe_exec_queue_last_fence_put(struct xe_exec_queue *e, struct xe_vm *vm);
void xe_exec_queue_last_fence_put_unlocked(struct xe_exec_queue *e);
struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *e,
struct xe_vm *vm);
+struct dma_fence *xe_exec_queue_last_fence_get_for_resume(struct xe_exec_queue *e,
+ struct xe_vm *vm);
void xe_exec_queue_last_fence_set(struct xe_exec_queue *e, struct xe_vm *vm,
struct dma_fence *fence);
int xe_exec_queue_last_fence_test_dep(struct xe_exec_queue *q,
diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
index fc2a1a20b7e4..7deb480e26af 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
@@ -140,6 +140,8 @@ struct xe_exec_queue {
* Protected by @vm's resv. Unused if @vm == NULL.
*/
u64 tlb_flush_seqno;
+ /** @hw_engine_group_link: link into exec queues in the same hw engine group */
+ struct list_head hw_engine_group_link;
/** @lrc: logical ring context for this exec queue */
struct xe_lrc *lrc[];
};
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 0cdbc1296e88..f3fca5565d32 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -30,6 +30,39 @@
#include "xe_wa.h"
#include "xe_wopcm.h"
+/**
+ * DOC: Global Graphics Translation Table (GGTT)
+ *
+ * Xe GGTT implements the support for a Global Virtual Address space that is used
+ * for resources that are accessible to privileged (i.e. kernel-mode) processes,
+ * and not tied to a specific user-level process. For example, the Graphics
+ * micro-Controller (GuC) and Display Engine (if present) utilize this Global
+ * address space.
+ *
+ * The Global GTT (GGTT) translates from the Global virtual address to a physical
+ * address that can be accessed by HW. The GGTT is a flat, single-level table.
+ *
+ * Xe implements a simplified version of the GGTT specifically managing only a
+ * certain range of it that goes from the Write Once Protected Content Memory (WOPCM)
+ * Layout to a predefined GUC_GGTT_TOP. This approach avoids complications related to
+ * the GuC (Graphics Microcontroller) hardware limitations. The GuC address space
+ * is limited on both ends of the GGTT, because the GuC shim HW redirects
+ * accesses to those addresses to other HW areas instead of going through the
+ * GGTT. On the bottom end, the GuC can't access offsets below the WOPCM size,
+ * while on the top side the limit is fixed at GUC_GGTT_TOP. To keep things
+ * simple, instead of checking each object to see if they are accessed by GuC or
+ * not, we just exclude those areas from the allocator. Additionally, to simplify
+ * the driver load, we use the maximum WOPCM size in this logic instead of the
+ * programmed one, so we don't need to wait until the actual size to be
+ * programmed is determined (which requires FW fetch) before initializing the
+ * GGTT. These simplifications might waste space in the GGTT (about 20-25 MBs
+ * depending on the platform) but we can live with this. Another benefit of this
+ * is the GuC bootrom can't access anything below the WOPCM max size so anything
+ * the bootrom needs to access (e.g. a RSA key) needs to be placed in the GGTT
+ * above the WOPCM max size. Starting the GGTT allocations above the WOPCM max
+ * give us the correct placement for free.
+ */
+
static u64 xelp_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
u16 pat_index)
{
@@ -128,11 +161,12 @@ static void ggtt_fini_early(struct drm_device *drm, void *arg)
{
struct xe_ggtt *ggtt = arg;
+ destroy_workqueue(ggtt->wq);
mutex_destroy(&ggtt->lock);
drm_mm_takedown(&ggtt->mm);
}
-static void ggtt_fini(struct drm_device *drm, void *arg)
+static void ggtt_fini(void *arg)
{
struct xe_ggtt *ggtt = arg;
@@ -164,12 +198,16 @@ static const struct xe_ggtt_pt_ops xelpg_pt_wa_ops = {
.ggtt_set_pte = xe_ggtt_set_pte_and_flush,
};
-/*
- * Early GGTT initialization, which allows to create new mappings usable by the
- * GuC.
- * Mappings are not usable by the HW engines, as it doesn't have scratch /
+/**
+ * xe_ggtt_init_early - Early GGTT initialization
+ * @ggtt: the &xe_ggtt to be initialized
+ *
+ * It allows to create new mappings usable by the GuC.
+ * Mappings are not usable by the HW engines, as it doesn't have scratch nor
* initial clear done to it yet. That will happen in the regular, non-early
- * GGTT init.
+ * GGTT initialization.
+ *
+ * Return: 0 on success or a negative error code on failure.
*/
int xe_ggtt_init_early(struct xe_ggtt *ggtt)
{
@@ -194,29 +232,6 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt)
if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
ggtt->flags |= XE_GGTT_FLAGS_64K;
- /*
- * 8B per entry, each points to a 4KB page.
- *
- * The GuC address space is limited on both ends of the GGTT, because
- * the GuC shim HW redirects accesses to those addresses to other HW
- * areas instead of going through the GGTT. On the bottom end, the GuC
- * can't access offsets below the WOPCM size, while on the top side the
- * limit is fixed at GUC_GGTT_TOP. To keep things simple, instead of
- * checking each object to see if they are accessed by GuC or not, we
- * just exclude those areas from the allocator. Additionally, to
- * simplify the driver load, we use the maximum WOPCM size in this logic
- * instead of the programmed one, so we don't need to wait until the
- * actual size to be programmed is determined (which requires FW fetch)
- * before initializing the GGTT. These simplifications might waste space
- * in the GGTT (about 20-25 MBs depending on the platform) but we can
- * live with this.
- *
- * Another benifit of this is the GuC bootrom can't access anything
- * below the WOPCM max size so anything the bootom needs to access (e.g.
- * a RSA key) needs to be placed in the GGTT above the WOPCM max size.
- * Starting the GGTT allocations above the WOPCM max give us the correct
- * placement for free.
- */
if (ggtt->size > GUC_GGTT_TOP)
ggtt->size = GUC_GGTT_TOP;
@@ -228,6 +243,8 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt)
else
ggtt->pt_ops = &xelp_pt_ops;
+ ggtt->wq = alloc_workqueue("xe-ggtt-wq", 0, 0);
+
drm_mm_init(&ggtt->mm, xe_wopcm_size(xe),
ggtt->size - xe_wopcm_size(xe));
mutex_init(&ggtt->lock);
@@ -262,6 +279,77 @@ static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt)
mutex_unlock(&ggtt->lock);
}
+static void ggtt_node_remove(struct xe_ggtt_node *node)
+{
+ struct xe_ggtt *ggtt = node->ggtt;
+ struct xe_device *xe = tile_to_xe(ggtt->tile);
+ bool bound;
+ int idx;
+
+ bound = drm_dev_enter(&xe->drm, &idx);
+
+ mutex_lock(&ggtt->lock);
+ if (bound)
+ xe_ggtt_clear(ggtt, node->base.start, node->base.size);
+ drm_mm_remove_node(&node->base);
+ node->base.size = 0;
+ mutex_unlock(&ggtt->lock);
+
+ if (!bound)
+ goto free_node;
+
+ if (node->invalidate_on_remove)
+ xe_ggtt_invalidate(ggtt);
+
+ drm_dev_exit(idx);
+
+free_node:
+ xe_ggtt_node_fini(node);
+}
+
+static void ggtt_node_remove_work_func(struct work_struct *work)
+{
+ struct xe_ggtt_node *node = container_of(work, typeof(*node),
+ delayed_removal_work);
+ struct xe_device *xe = tile_to_xe(node->ggtt->tile);
+
+ xe_pm_runtime_get(xe);
+ ggtt_node_remove(node);
+ xe_pm_runtime_put(xe);
+}
+
+/**
+ * xe_ggtt_node_remove - Remove a &xe_ggtt_node from the GGTT
+ * @node: the &xe_ggtt_node to be removed
+ * @invalidate: if node needs invalidation upon removal
+ */
+void xe_ggtt_node_remove(struct xe_ggtt_node *node, bool invalidate)
+{
+ struct xe_ggtt *ggtt;
+ struct xe_device *xe;
+
+ if (!node || !node->ggtt)
+ return;
+
+ ggtt = node->ggtt;
+ xe = tile_to_xe(ggtt->tile);
+
+ node->invalidate_on_remove = invalidate;
+
+ if (xe_pm_runtime_get_if_active(xe)) {
+ ggtt_node_remove(node);
+ xe_pm_runtime_put(xe);
+ } else {
+ queue_work(ggtt->wq, &node->delayed_removal_work);
+ }
+}
+
+/**
+ * xe_ggtt_init - Regular non-early GGTT initialization
+ * @ggtt: the &xe_ggtt to be initialized
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
int xe_ggtt_init(struct xe_ggtt *ggtt)
{
struct xe_device *xe = tile_to_xe(ggtt->tile);
@@ -289,7 +377,7 @@ int xe_ggtt_init(struct xe_ggtt *ggtt)
xe_ggtt_initial_clear(ggtt);
- return drmm_add_action_or_reset(&xe->drm, ggtt_fini, ggtt);
+ return devm_add_action_or_reset(xe->drm.dev, ggtt_fini, ggtt);
err:
ggtt->scratch = NULL;
return err;
@@ -314,26 +402,6 @@ static void xe_ggtt_invalidate(struct xe_ggtt *ggtt)
ggtt_invalidate_gt_tlb(ggtt->tile->media_gt);
}
-void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix)
-{
- u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[XE_CACHE_WB];
- u64 addr, scratch_pte;
-
- scratch_pte = ggtt->pt_ops->pte_encode_bo(ggtt->scratch, 0, pat_index);
-
- printk("%sGlobal GTT:", prefix);
- for (addr = 0; addr < ggtt->size; addr += XE_PAGE_SIZE) {
- unsigned int i = addr / XE_PAGE_SIZE;
-
- xe_tile_assert(ggtt->tile, addr <= U32_MAX);
- if (ggtt->gsm[i] == scratch_pte)
- continue;
-
- printk("%s ggtt[0x%08x] = 0x%016llx",
- prefix, (u32)addr, ggtt->gsm[i]);
- }
-}
-
static void xe_ggtt_dump_node(struct xe_ggtt *ggtt,
const struct drm_mm_node *node, const char *description)
{
@@ -347,88 +415,180 @@ static void xe_ggtt_dump_node(struct xe_ggtt *ggtt,
}
/**
- * xe_ggtt_balloon - prevent allocation of specified GGTT addresses
- * @ggtt: the &xe_ggtt where we want to make reservation
+ * xe_ggtt_node_insert_balloon - prevent allocation of specified GGTT addresses
+ * @node: the &xe_ggtt_node to hold reserved GGTT node
* @start: the starting GGTT address of the reserved region
* @end: then end GGTT address of the reserved region
- * @node: the &drm_mm_node to hold reserved GGTT node
*
- * Use xe_ggtt_deballoon() to release a reserved GGTT node.
+ * Use xe_ggtt_node_remove_balloon() to release a reserved GGTT node.
*
* Return: 0 on success or a negative error code on failure.
*/
-int xe_ggtt_balloon(struct xe_ggtt *ggtt, u64 start, u64 end, struct drm_mm_node *node)
+int xe_ggtt_node_insert_balloon(struct xe_ggtt_node *node, u64 start, u64 end)
{
+ struct xe_ggtt *ggtt = node->ggtt;
int err;
xe_tile_assert(ggtt->tile, start < end);
xe_tile_assert(ggtt->tile, IS_ALIGNED(start, XE_PAGE_SIZE));
xe_tile_assert(ggtt->tile, IS_ALIGNED(end, XE_PAGE_SIZE));
- xe_tile_assert(ggtt->tile, !drm_mm_node_allocated(node));
+ xe_tile_assert(ggtt->tile, !drm_mm_node_allocated(&node->base));
- node->color = 0;
- node->start = start;
- node->size = end - start;
+ node->base.color = 0;
+ node->base.start = start;
+ node->base.size = end - start;
mutex_lock(&ggtt->lock);
- err = drm_mm_reserve_node(&ggtt->mm, node);
+ err = drm_mm_reserve_node(&ggtt->mm, &node->base);
mutex_unlock(&ggtt->lock);
if (xe_gt_WARN(ggtt->tile->primary_gt, err,
"Failed to balloon GGTT %#llx-%#llx (%pe)\n",
- node->start, node->start + node->size, ERR_PTR(err)))
+ node->base.start, node->base.start + node->base.size, ERR_PTR(err)))
return err;
- xe_ggtt_dump_node(ggtt, node, "balloon");
+ xe_ggtt_dump_node(ggtt, &node->base, "balloon");
return 0;
}
/**
- * xe_ggtt_deballoon - release a reserved GGTT region
- * @ggtt: the &xe_ggtt where reserved node belongs
- * @node: the &drm_mm_node with reserved GGTT region
+ * xe_ggtt_node_remove_balloon - release a reserved GGTT region
+ * @node: the &xe_ggtt_node with reserved GGTT region
*
- * See xe_ggtt_balloon() for details.
+ * See xe_ggtt_node_insert_balloon() for details.
*/
-void xe_ggtt_deballoon(struct xe_ggtt *ggtt, struct drm_mm_node *node)
+void xe_ggtt_node_remove_balloon(struct xe_ggtt_node *node)
{
- if (!drm_mm_node_allocated(node))
+ if (!node || !node->ggtt)
return;
- xe_ggtt_dump_node(ggtt, node, "deballoon");
+ if (!drm_mm_node_allocated(&node->base))
+ goto free_node;
- mutex_lock(&ggtt->lock);
- drm_mm_remove_node(node);
- mutex_unlock(&ggtt->lock);
+ xe_ggtt_dump_node(node->ggtt, &node->base, "remove-balloon");
+
+ mutex_lock(&node->ggtt->lock);
+ drm_mm_remove_node(&node->base);
+ mutex_unlock(&node->ggtt->lock);
+
+free_node:
+ xe_ggtt_node_fini(node);
}
-int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, struct drm_mm_node *node,
- u32 size, u32 align, u32 mm_flags)
+/**
+ * xe_ggtt_node_insert_locked - Locked version to insert a &xe_ggtt_node into the GGTT
+ * @node: the &xe_ggtt_node to be inserted
+ * @size: size of the node
+ * @align: alignment constrain of the node
+ * @mm_flags: flags to control the node behavior
+ *
+ * It cannot be called without first having called xe_ggtt_init() once.
+ * To be used in cases where ggtt->lock is already taken.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_ggtt_node_insert_locked(struct xe_ggtt_node *node,
+ u32 size, u32 align, u32 mm_flags)
{
- return drm_mm_insert_node_generic(&ggtt->mm, node, size, align, 0,
+ return drm_mm_insert_node_generic(&node->ggtt->mm, &node->base, size, align, 0,
mm_flags);
}
-int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node,
- u32 size, u32 align)
+/**
+ * xe_ggtt_node_insert - Insert a &xe_ggtt_node into the GGTT
+ * @node: the &xe_ggtt_node to be inserted
+ * @size: size of the node
+ * @align: alignment constrain of the node
+ *
+ * It cannot be called without first having called xe_ggtt_init() once.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_ggtt_node_insert(struct xe_ggtt_node *node, u32 size, u32 align)
{
int ret;
- mutex_lock(&ggtt->lock);
- ret = xe_ggtt_insert_special_node_locked(ggtt, node, size,
- align, DRM_MM_INSERT_HIGH);
- mutex_unlock(&ggtt->lock);
+ if (!node || !node->ggtt)
+ return -ENOENT;
+
+ mutex_lock(&node->ggtt->lock);
+ ret = xe_ggtt_node_insert_locked(node, size, align,
+ DRM_MM_INSERT_HIGH);
+ mutex_unlock(&node->ggtt->lock);
return ret;
}
+/**
+ * xe_ggtt_node_init - Initialize %xe_ggtt_node struct
+ * @ggtt: the &xe_ggtt where the new node will later be inserted/reserved.
+ *
+ * This function will allocated the struct %xe_ggtt_node and return it's pointer.
+ * This struct will then be freed after the node removal upon xe_ggtt_node_remove()
+ * or xe_ggtt_node_remove_balloon().
+ * Having %xe_ggtt_node struct allocated doesn't mean that the node is already allocated
+ * in GGTT. Only the xe_ggtt_node_insert(), xe_ggtt_node_insert_locked(),
+ * xe_ggtt_node_insert_balloon() will ensure the node is inserted or reserved in GGTT.
+ *
+ * Return: A pointer to %xe_ggtt_node struct on success. An ERR_PTR otherwise.
+ **/
+struct xe_ggtt_node *xe_ggtt_node_init(struct xe_ggtt *ggtt)
+{
+ struct xe_ggtt_node *node = kzalloc(sizeof(*node), GFP_NOFS);
+
+ if (!node)
+ return ERR_PTR(-ENOMEM);
+
+ INIT_WORK(&node->delayed_removal_work, ggtt_node_remove_work_func);
+ node->ggtt = ggtt;
+
+ return node;
+}
+
+/**
+ * xe_ggtt_node_fini - Forcebly finalize %xe_ggtt_node struct
+ * @node: the &xe_ggtt_node to be freed
+ *
+ * If anything went wrong with either xe_ggtt_node_insert(), xe_ggtt_node_insert_locked(),
+ * or xe_ggtt_node_insert_balloon(); and this @node is not going to be reused, then,
+ * this function needs to be called to free the %xe_ggtt_node struct
+ **/
+void xe_ggtt_node_fini(struct xe_ggtt_node *node)
+{
+ kfree(node);
+}
+
+/**
+ * xe_ggtt_node_allocated - Check if node is allocated in GGTT
+ * @node: the &xe_ggtt_node to be inspected
+ *
+ * Return: True if allocated, False otherwise.
+ */
+bool xe_ggtt_node_allocated(const struct xe_ggtt_node *node)
+{
+ if (!node || !node->ggtt)
+ return false;
+
+ return drm_mm_node_allocated(&node->base);
+}
+
+/**
+ * xe_ggtt_map_bo - Map the BO into GGTT
+ * @ggtt: the &xe_ggtt where node will be mapped
+ * @bo: the &xe_bo to be mapped
+ */
void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
{
u16 cache_mode = bo->flags & XE_BO_FLAG_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB;
u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[cache_mode];
- u64 start = bo->ggtt_node.start;
+ u64 start;
u64 offset, pte;
+ if (XE_WARN_ON(!bo->ggtt_node))
+ return;
+
+ start = bo->ggtt_node->base.start;
+
for (offset = 0; offset < bo->size; offset += XE_PAGE_SIZE) {
pte = ggtt->pt_ops->pte_encode_bo(bo, offset, pat_index);
ggtt->pt_ops->ggtt_set_pte(ggtt, start + offset, pte);
@@ -444,9 +604,9 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K)
alignment = SZ_64K;
- if (XE_WARN_ON(bo->ggtt_node.size)) {
+ if (XE_WARN_ON(bo->ggtt_node)) {
/* Someone's already inserted this BO in the GGTT */
- xe_tile_assert(ggtt->tile, bo->ggtt_node.size == bo->size);
+ xe_tile_assert(ggtt->tile, bo->ggtt_node->base.size == bo->size);
return 0;
}
@@ -455,69 +615,108 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
return err;
xe_pm_runtime_get_noresume(tile_to_xe(ggtt->tile));
+
+ bo->ggtt_node = xe_ggtt_node_init(ggtt);
+ if (IS_ERR(bo->ggtt_node)) {
+ err = PTR_ERR(bo->ggtt_node);
+ goto out;
+ }
+
mutex_lock(&ggtt->lock);
- err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node, bo->size,
+ err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node->base, bo->size,
alignment, 0, start, end, 0);
- if (!err)
+ if (err)
+ xe_ggtt_node_fini(bo->ggtt_node);
+ else
xe_ggtt_map_bo(ggtt, bo);
mutex_unlock(&ggtt->lock);
if (!err && bo->flags & XE_BO_FLAG_GGTT_INVALIDATE)
xe_ggtt_invalidate(ggtt);
+
+out:
xe_pm_runtime_put(tile_to_xe(ggtt->tile));
return err;
}
+/**
+ * xe_ggtt_insert_bo_at - Insert BO at a specific GGTT space
+ * @ggtt: the &xe_ggtt where bo will be inserted
+ * @bo: the &xe_bo to be inserted
+ * @start: address where it will be inserted
+ * @end: end of the range where it will be inserted
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
u64 start, u64 end)
{
return __xe_ggtt_insert_bo_at(ggtt, bo, start, end);
}
+/**
+ * xe_ggtt_insert_bo - Insert BO into GGTT
+ * @ggtt: the &xe_ggtt where bo will be inserted
+ * @bo: the &xe_bo to be inserted
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
{
return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX);
}
-void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node,
- bool invalidate)
+/**
+ * xe_ggtt_remove_bo - Remove a BO from the GGTT
+ * @ggtt: the &xe_ggtt where node will be removed
+ * @bo: the &xe_bo to be removed
+ */
+void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
{
- struct xe_device *xe = tile_to_xe(ggtt->tile);
- bool bound;
- int idx;
-
- bound = drm_dev_enter(&xe->drm, &idx);
- if (bound)
- xe_pm_runtime_get_noresume(xe);
-
- mutex_lock(&ggtt->lock);
- if (bound)
- xe_ggtt_clear(ggtt, node->start, node->size);
- drm_mm_remove_node(node);
- node->size = 0;
- mutex_unlock(&ggtt->lock);
-
- if (!bound)
+ if (XE_WARN_ON(!bo->ggtt_node))
return;
- if (invalidate)
- xe_ggtt_invalidate(ggtt);
+ /* This BO is not currently in the GGTT */
+ xe_tile_assert(ggtt->tile, bo->ggtt_node->base.size == bo->size);
- xe_pm_runtime_put(xe);
- drm_dev_exit(idx);
+ xe_ggtt_node_remove(bo->ggtt_node,
+ bo->flags & XE_BO_FLAG_GGTT_INVALIDATE);
}
-void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
+/**
+ * xe_ggtt_largest_hole - Largest GGTT hole
+ * @ggtt: the &xe_ggtt that will be inspected
+ * @alignment: minimum alignment
+ * @spare: If not NULL: in: desired memory size to be spared / out: Adjusted possible spare
+ *
+ * Return: size of the largest continuous GGTT region
+ */
+u64 xe_ggtt_largest_hole(struct xe_ggtt *ggtt, u64 alignment, u64 *spare)
{
- if (XE_WARN_ON(!bo->ggtt_node.size))
- return;
+ const struct drm_mm *mm = &ggtt->mm;
+ const struct drm_mm_node *entry;
+ u64 hole_min_start = xe_wopcm_size(tile_to_xe(ggtt->tile));
+ u64 hole_start, hole_end, hole_size;
+ u64 max_hole = 0;
- /* This BO is not currently in the GGTT */
- xe_tile_assert(ggtt->tile, bo->ggtt_node.size == bo->size);
+ mutex_lock(&ggtt->lock);
- xe_ggtt_remove_node(ggtt, &bo->ggtt_node,
- bo->flags & XE_BO_FLAG_GGTT_INVALIDATE);
+ drm_mm_for_each_hole(entry, mm, hole_start, hole_end) {
+ hole_start = max(hole_start, hole_min_start);
+ hole_start = ALIGN(hole_start, alignment);
+ hole_end = ALIGN_DOWN(hole_end, alignment);
+ if (hole_start >= hole_end)
+ continue;
+ hole_size = hole_end - hole_start;
+ if (spare)
+ *spare -= min3(*spare, hole_size, max_hole);
+ max_hole = max(max_hole, hole_size);
+ }
+
+ mutex_unlock(&ggtt->lock);
+
+ return max_hole;
}
#ifdef CONFIG_PCI_IOV
@@ -548,22 +747,28 @@ static void xe_ggtt_assign_locked(struct xe_ggtt *ggtt, const struct drm_mm_node
/**
* xe_ggtt_assign - assign a GGTT region to the VF
- * @ggtt: the &xe_ggtt where the node belongs
- * @node: the &drm_mm_node to update
+ * @node: the &xe_ggtt_node to update
* @vfid: the VF identifier
*
* This function is used by the PF driver to assign a GGTT region to the VF.
* In addition to PTE's VFID bits 11:2 also PRESENT bit 0 is set as on some
* platforms VFs can't modify that either.
*/
-void xe_ggtt_assign(struct xe_ggtt *ggtt, const struct drm_mm_node *node, u16 vfid)
+void xe_ggtt_assign(const struct xe_ggtt_node *node, u16 vfid)
{
- mutex_lock(&ggtt->lock);
- xe_ggtt_assign_locked(ggtt, node, vfid);
- mutex_unlock(&ggtt->lock);
+ mutex_lock(&node->ggtt->lock);
+ xe_ggtt_assign_locked(node->ggtt, &node->base, vfid);
+ mutex_unlock(&node->ggtt->lock);
}
#endif
+/**
+ * xe_ggtt_dump - Dump GGTT for debug
+ * @ggtt: the &xe_ggtt to be dumped
+ * @p: the &drm_mm_printer helper handle to be used to dump the information
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p)
{
int err;
@@ -576,3 +781,43 @@ int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p)
mutex_unlock(&ggtt->lock);
return err;
}
+
+/**
+ * xe_ggtt_print_holes - Print holes
+ * @ggtt: the &xe_ggtt to be inspected
+ * @alignment: min alignment
+ * @p: the &drm_printer
+ *
+ * Print GGTT ranges that are available and return total size available.
+ *
+ * Return: Total available size.
+ */
+u64 xe_ggtt_print_holes(struct xe_ggtt *ggtt, u64 alignment, struct drm_printer *p)
+{
+ const struct drm_mm *mm = &ggtt->mm;
+ const struct drm_mm_node *entry;
+ u64 hole_min_start = xe_wopcm_size(tile_to_xe(ggtt->tile));
+ u64 hole_start, hole_end, hole_size;
+ u64 total = 0;
+ char buf[10];
+
+ mutex_lock(&ggtt->lock);
+
+ drm_mm_for_each_hole(entry, mm, hole_start, hole_end) {
+ hole_start = max(hole_start, hole_min_start);
+ hole_start = ALIGN(hole_start, alignment);
+ hole_end = ALIGN_DOWN(hole_end, alignment);
+ if (hole_start >= hole_end)
+ continue;
+ hole_size = hole_end - hole_start;
+ total += hole_size;
+
+ string_get_size(hole_size, 1, STRING_UNITS_2, buf, sizeof(buf));
+ drm_printf(p, "range:\t%#llx-%#llx\t(%s)\n",
+ hole_start, hole_end - 1, buf);
+ }
+
+ mutex_unlock(&ggtt->lock);
+
+ return total;
+}
diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h
index 6a96fd54bf60..27e7d67de004 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.h
+++ b/drivers/gpu/drm/xe/xe_ggtt.h
@@ -12,28 +12,30 @@ struct drm_printer;
int xe_ggtt_init_early(struct xe_ggtt *ggtt);
int xe_ggtt_init(struct xe_ggtt *ggtt);
-void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix);
-
-int xe_ggtt_balloon(struct xe_ggtt *ggtt, u64 start, u64 size, struct drm_mm_node *node);
-void xe_ggtt_deballoon(struct xe_ggtt *ggtt, struct drm_mm_node *node);
-
-int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node,
- u32 size, u32 align);
-int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt,
- struct drm_mm_node *node,
- u32 size, u32 align, u32 mm_flags);
-void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node,
- bool invalidate);
+
+struct xe_ggtt_node *xe_ggtt_node_init(struct xe_ggtt *ggtt);
+void xe_ggtt_node_fini(struct xe_ggtt_node *node);
+int xe_ggtt_node_insert_balloon(struct xe_ggtt_node *node,
+ u64 start, u64 size);
+void xe_ggtt_node_remove_balloon(struct xe_ggtt_node *node);
+
+int xe_ggtt_node_insert(struct xe_ggtt_node *node, u32 size, u32 align);
+int xe_ggtt_node_insert_locked(struct xe_ggtt_node *node,
+ u32 size, u32 align, u32 mm_flags);
+void xe_ggtt_node_remove(struct xe_ggtt_node *node, bool invalidate);
+bool xe_ggtt_node_allocated(const struct xe_ggtt_node *node);
void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo);
int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo);
int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
u64 start, u64 end);
void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo);
+u64 xe_ggtt_largest_hole(struct xe_ggtt *ggtt, u64 alignment, u64 *spare);
int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p);
+u64 xe_ggtt_print_holes(struct xe_ggtt *ggtt, u64 alignment, struct drm_printer *p);
#ifdef CONFIG_PCI_IOV
-void xe_ggtt_assign(struct xe_ggtt *ggtt, const struct drm_mm_node *node, u16 vfid);
+void xe_ggtt_assign(const struct xe_ggtt_node *node, u16 vfid);
#endif
#endif
diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h
index 2245d88d8f39..cb02b7994a9a 100644
--- a/drivers/gpu/drm/xe/xe_ggtt_types.h
+++ b/drivers/gpu/drm/xe/xe_ggtt_types.h
@@ -13,30 +13,70 @@
struct xe_bo;
struct xe_gt;
+/**
+ * struct xe_ggtt - Main GGTT struct
+ *
+ * In general, each tile can contains its own Global Graphics Translation Table
+ * (GGTT) instance.
+ */
struct xe_ggtt {
+ /** @tile: Back pointer to tile where this GGTT belongs */
struct xe_tile *tile;
-
+ /** @size: Total size of this GGTT */
u64 size;
#define XE_GGTT_FLAGS_64K BIT(0)
+ /**
+ * @flags: Flags for this GGTT
+ * Acceptable flags:
+ * - %XE_GGTT_FLAGS_64K - if PTE size is 64K. Otherwise, regular is 4K.
+ */
unsigned int flags;
-
+ /** @scratch: Internal object allocation used as a scratch page */
struct xe_bo *scratch;
-
+ /** @lock: Mutex lock to protect GGTT data */
struct mutex lock;
-
+ /**
+ * @gsm: The iomem pointer to the actual location of the translation
+ * table located in the GSM for easy PTE manipulation
+ */
u64 __iomem *gsm;
-
+ /** @pt_ops: Page Table operations per platform */
const struct xe_ggtt_pt_ops *pt_ops;
-
+ /** @mm: The memory manager used to manage individual GGTT allocations */
struct drm_mm mm;
-
/** @access_count: counts GGTT writes */
unsigned int access_count;
+ /** @wq: Dedicated unordered work queue to process node removals */
+ struct workqueue_struct *wq;
+};
+
+/**
+ * struct xe_ggtt_node - A node in GGTT.
+ *
+ * This struct needs to be initialized (only-once) with xe_ggtt_node_init() before any node
+ * insertion, reservation, or 'ballooning'.
+ * It will, then, be finalized by either xe_ggtt_node_remove() or xe_ggtt_node_deballoon().
+ */
+struct xe_ggtt_node {
+ /** @ggtt: Back pointer to xe_ggtt where this region will be inserted at */
+ struct xe_ggtt *ggtt;
+ /** @base: A drm_mm_node */
+ struct drm_mm_node base;
+ /** @delayed_removal_work: The work struct for the delayed removal */
+ struct work_struct delayed_removal_work;
+ /** @invalidate_on_remove: If it needs invalidation upon removal */
+ bool invalidate_on_remove;
};
+/**
+ * struct xe_ggtt_pt_ops - GGTT Page table operations
+ * Which can vary from platform to platform.
+ */
struct xe_ggtt_pt_ops {
+ /** @pte_encode_bo: Encode PTE address for a given BO */
u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset, u16 pat_index);
+ /** @ggtt_set_pte: Directly write into GGTT's PTE */
void (*ggtt_set_pte)(struct xe_ggtt *ggtt, u64 addr, u64 pte);
};
diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.c b/drivers/gpu/drm/xe/xe_gpu_scheduler.c
index e4ad1d6ce1d5..c518d1d16d82 100644
--- a/drivers/gpu/drm/xe/xe_gpu_scheduler.c
+++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.c
@@ -15,11 +15,11 @@ static void xe_sched_process_msg_queue_if_ready(struct xe_gpu_scheduler *sched)
{
struct xe_sched_msg *msg;
- spin_lock(&sched->base.job_list_lock);
+ xe_sched_msg_lock(sched);
msg = list_first_entry_or_null(&sched->msgs, struct xe_sched_msg, link);
if (msg)
xe_sched_process_msg_queue(sched);
- spin_unlock(&sched->base.job_list_lock);
+ xe_sched_msg_unlock(sched);
}
static struct xe_sched_msg *
@@ -27,12 +27,12 @@ xe_sched_get_msg(struct xe_gpu_scheduler *sched)
{
struct xe_sched_msg *msg;
- spin_lock(&sched->base.job_list_lock);
+ xe_sched_msg_lock(sched);
msg = list_first_entry_or_null(&sched->msgs,
struct xe_sched_msg, link);
if (msg)
- list_del(&msg->link);
- spin_unlock(&sched->base.job_list_lock);
+ list_del_init(&msg->link);
+ xe_sched_msg_unlock(sched);
return msg;
}
@@ -93,9 +93,16 @@ void xe_sched_submission_stop(struct xe_gpu_scheduler *sched)
void xe_sched_add_msg(struct xe_gpu_scheduler *sched,
struct xe_sched_msg *msg)
{
- spin_lock(&sched->base.job_list_lock);
- list_add_tail(&msg->link, &sched->msgs);
- spin_unlock(&sched->base.job_list_lock);
+ xe_sched_msg_lock(sched);
+ xe_sched_add_msg_locked(sched, msg);
+ xe_sched_msg_unlock(sched);
+}
+void xe_sched_add_msg_locked(struct xe_gpu_scheduler *sched,
+ struct xe_sched_msg *msg)
+{
+ lockdep_assert_held(&sched->base.job_list_lock);
+
+ list_add_tail(&msg->link, &sched->msgs);
xe_sched_process_msg_queue(sched);
}
diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h
index 10c6bb9c9386..cee9c6809fc0 100644
--- a/drivers/gpu/drm/xe/xe_gpu_scheduler.h
+++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h
@@ -24,6 +24,18 @@ void xe_sched_submission_stop(struct xe_gpu_scheduler *sched);
void xe_sched_add_msg(struct xe_gpu_scheduler *sched,
struct xe_sched_msg *msg);
+void xe_sched_add_msg_locked(struct xe_gpu_scheduler *sched,
+ struct xe_sched_msg *msg);
+
+static inline void xe_sched_msg_lock(struct xe_gpu_scheduler *sched)
+{
+ spin_lock(&sched->base.job_list_lock);
+}
+
+static inline void xe_sched_msg_unlock(struct xe_gpu_scheduler *sched)
+{
+ spin_unlock(&sched->base.job_list_lock);
+}
static inline void xe_sched_stop(struct xe_gpu_scheduler *sched)
{
diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c
index 2a612652bb13..8a137cb83318 100644
--- a/drivers/gpu/drm/xe/xe_gsc.c
+++ b/drivers/gpu/drm/xe/xe_gsc.c
@@ -450,11 +450,6 @@ static void free_resources(void *arg)
xe_exec_queue_put(gsc->q);
gsc->q = NULL;
}
-
- if (gsc->private) {
- xe_bo_unpin_map_no_vm(gsc->private);
- gsc->private = NULL;
- }
}
int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc)
@@ -474,10 +469,9 @@ int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc)
if (!hwe)
return -ENODEV;
- bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4M,
- ttm_bo_type_kernel,
- XE_BO_FLAG_STOLEN |
- XE_BO_FLAG_GGTT);
+ bo = xe_managed_bo_create_pin_map(xe, tile, SZ_4M,
+ XE_BO_FLAG_STOLEN |
+ XE_BO_FLAG_GGTT);
if (IS_ERR(bo))
return PTR_ERR(bo);
diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c
index aa812a2bc3ed..2d6ea8c01445 100644
--- a/drivers/gpu/drm/xe/xe_gsc_proxy.c
+++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c
@@ -62,11 +62,6 @@ gsc_to_gt(struct xe_gsc *gsc)
return container_of(gsc, struct xe_gt, uc.gsc);
}
-static inline struct xe_device *kdev_to_xe(struct device *kdev)
-{
- return dev_get_drvdata(kdev);
-}
-
bool xe_gsc_proxy_init_done(struct xe_gsc *gsc)
{
struct xe_gt *gt = gsc_to_gt(gsc);
@@ -345,7 +340,7 @@ void xe_gsc_proxy_irq_handler(struct xe_gsc *gsc, u32 iir)
static int xe_gsc_proxy_component_bind(struct device *xe_kdev,
struct device *mei_kdev, void *data)
{
- struct xe_device *xe = kdev_to_xe(xe_kdev);
+ struct xe_device *xe = kdev_to_xe_device(xe_kdev);
struct xe_gt *gt = xe->tiles[0].media_gt;
struct xe_gsc *gsc = &gt->uc.gsc;
@@ -360,7 +355,7 @@ static int xe_gsc_proxy_component_bind(struct device *xe_kdev,
static void xe_gsc_proxy_component_unbind(struct device *xe_kdev,
struct device *mei_kdev, void *data)
{
- struct xe_device *xe = kdev_to_xe(xe_kdev);
+ struct xe_device *xe = kdev_to_xe_device(xe_kdev);
struct xe_gt *gt = xe->tiles[0].media_gt;
struct xe_gsc *gsc = &gt->uc.gsc;
@@ -376,27 +371,6 @@ static const struct component_ops xe_gsc_proxy_component_ops = {
.unbind = xe_gsc_proxy_component_unbind,
};
-static void proxy_channel_free(struct drm_device *drm, void *arg)
-{
- struct xe_gsc *gsc = arg;
-
- if (!gsc->proxy.bo)
- return;
-
- if (gsc->proxy.to_csme) {
- kfree(gsc->proxy.to_csme);
- gsc->proxy.to_csme = NULL;
- gsc->proxy.from_csme = NULL;
- }
-
- if (gsc->proxy.bo) {
- iosys_map_clear(&gsc->proxy.to_gsc);
- iosys_map_clear(&gsc->proxy.from_gsc);
- xe_bo_unpin_map_no_vm(gsc->proxy.bo);
- gsc->proxy.bo = NULL;
- }
-}
-
static int proxy_channel_alloc(struct xe_gsc *gsc)
{
struct xe_gt *gt = gsc_to_gt(gsc);
@@ -405,18 +379,15 @@ static int proxy_channel_alloc(struct xe_gsc *gsc)
struct xe_bo *bo;
void *csme;
- csme = kzalloc(GSC_PROXY_CHANNEL_SIZE, GFP_KERNEL);
+ csme = drmm_kzalloc(&xe->drm, GSC_PROXY_CHANNEL_SIZE, GFP_KERNEL);
if (!csme)
return -ENOMEM;
- bo = xe_bo_create_pin_map(xe, tile, NULL, GSC_PROXY_CHANNEL_SIZE,
- ttm_bo_type_kernel,
- XE_BO_FLAG_SYSTEM |
- XE_BO_FLAG_GGTT);
- if (IS_ERR(bo)) {
- kfree(csme);
+ bo = xe_managed_bo_create_pin_map(xe, tile, GSC_PROXY_CHANNEL_SIZE,
+ XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_GGTT);
+ if (IS_ERR(bo))
return PTR_ERR(bo);
- }
gsc->proxy.bo = bo;
gsc->proxy.to_gsc = IOSYS_MAP_INIT_OFFSET(&bo->vmap, 0);
@@ -424,7 +395,7 @@ static int proxy_channel_alloc(struct xe_gsc *gsc)
gsc->proxy.to_csme = csme;
gsc->proxy.from_csme = csme + GSC_PROXY_BUFFER_SIZE;
- return drmm_add_action_or_reset(&xe->drm, proxy_channel_free, gsc);
+ return 0;
}
/**
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 282b5dc39908..08a004d698d4 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -112,9 +112,9 @@ static void xe_gt_enable_host_l2_vram(struct xe_gt *gt)
if (!xe_gt_is_media_type(gt)) {
xe_mmio_write32(gt, SCRATCH1LPFC, EN_L3_RW_CCS_CACHE_FLUSH);
- reg = xe_mmio_read32(gt, XE2_GAMREQSTRM_CTRL);
+ reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL);
reg |= CG_DIS_CNTLBUS;
- xe_mmio_write32(gt, XE2_GAMREQSTRM_CTRL, reg);
+ xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg);
}
xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0x3);
@@ -136,9 +136,9 @@ static void xe_gt_disable_host_l2_vram(struct xe_gt *gt)
if (WARN_ON(err))
return;
- reg = xe_mmio_read32(gt, XE2_GAMREQSTRM_CTRL);
+ reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL);
reg &= ~CG_DIS_CNTLBUS;
- xe_mmio_write32(gt, XE2_GAMREQSTRM_CTRL, reg);
+ xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg);
xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
}
@@ -559,7 +559,6 @@ int xe_gt_init_hwconfig(struct xe_gt *gt)
xe_gt_mcr_init_early(gt);
xe_pat_init(gt);
- xe_gt_enable_host_l2_vram(gt);
err = xe_uc_init(&gt->uc);
if (err)
@@ -571,6 +570,7 @@ int xe_gt_init_hwconfig(struct xe_gt *gt)
xe_gt_topology_init(gt);
xe_gt_mcr_init(gt);
+ xe_gt_enable_host_l2_vram(gt);
out_fw:
xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index 5e7fd937917a..8f95d3a5949b 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -17,7 +17,9 @@
#include "xe_gt_mcr.h"
#include "xe_gt_sriov_pf_debugfs.h"
#include "xe_gt_sriov_vf_debugfs.h"
+#include "xe_gt_stats.h"
#include "xe_gt_topology.h"
+#include "xe_guc_hwconfig.h"
#include "xe_hw_engine.h"
#include "xe_lrc.h"
#include "xe_macros.h"
@@ -269,6 +271,15 @@ static int vecs_default_lrc(struct xe_gt *gt, struct drm_printer *p)
return 0;
}
+static int hwconfig(struct xe_gt *gt, struct drm_printer *p)
+{
+ xe_pm_runtime_get(gt_to_xe(gt));
+ xe_guc_hwconfig_dump(&gt->uc.guc, p);
+ xe_pm_runtime_put(gt_to_xe(gt));
+
+ return 0;
+}
+
static const struct drm_info_list debugfs_list[] = {
{"hw_engines", .show = xe_gt_debugfs_simple_show, .data = hw_engines},
{"force_reset", .show = xe_gt_debugfs_simple_show, .data = force_reset},
@@ -286,6 +297,8 @@ static const struct drm_info_list debugfs_list[] = {
{"default_lrc_bcs", .show = xe_gt_debugfs_simple_show, .data = bcs_default_lrc},
{"default_lrc_vcs", .show = xe_gt_debugfs_simple_show, .data = vcs_default_lrc},
{"default_lrc_vecs", .show = xe_gt_debugfs_simple_show, .data = vecs_default_lrc},
+ {"stats", .show = xe_gt_debugfs_simple_show, .data = xe_gt_stats_print_info},
+ {"hwconfig", .show = xe_gt_debugfs_simple_show, .data = hwconfig},
};
void xe_gt_debugfs_register(struct xe_gt *gt)
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
index 6d948a469126..7d7bd0be6233 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -8,8 +8,10 @@
#include "regs/xe_gt_regs.h"
#include "xe_assert.h"
#include "xe_gt.h"
+#include "xe_gt_printk.h"
#include "xe_gt_topology.h"
#include "xe_gt_types.h"
+#include "xe_guc_hwconfig.h"
#include "xe_mmio.h"
#include "xe_sriov.h"
@@ -297,6 +299,36 @@ static void init_steering_mslice(struct xe_gt *gt)
static unsigned int dss_per_group(struct xe_gt *gt)
{
+ struct xe_guc *guc = &gt->uc.guc;
+ u32 max_slices = 0, max_subslices = 0;
+ int ret;
+
+ /*
+ * Try to query the GuC's hwconfig table for the maximum number of
+ * slices and subslices. These don't reflect the platform's actual
+ * slice/DSS counts, just the physical layout by which we should
+ * determine the steering targets. On older platforms with older GuC
+ * firmware releases it's possible that these attributes may not be
+ * included in the table, so we can always fall back to the old
+ * hardcoded layouts.
+ */
+#define HWCONFIG_ATTR_MAX_SLICES 1
+#define HWCONFIG_ATTR_MAX_SUBSLICES 70
+
+ ret = xe_guc_hwconfig_lookup_u32(guc, HWCONFIG_ATTR_MAX_SLICES,
+ &max_slices);
+ if (ret < 0 || max_slices == 0)
+ goto fallback;
+
+ ret = xe_guc_hwconfig_lookup_u32(guc, HWCONFIG_ATTR_MAX_SUBSLICES,
+ &max_subslices);
+ if (ret < 0 || max_subslices == 0)
+ goto fallback;
+
+ return DIV_ROUND_UP(max_subslices, max_slices);
+
+fallback:
+ xe_gt_dbg(gt, "GuC hwconfig cannot provide dss/slice; using typical fallback values\n");
if (gt_to_xe(gt)->info.platform == XE_PVC)
return 8;
else if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1250)
@@ -314,16 +346,16 @@ static unsigned int dss_per_group(struct xe_gt *gt)
*/
void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group, u16 *instance)
{
- int dss_per_grp = dss_per_group(gt);
-
xe_gt_assert(gt, dss < XE_MAX_DSS_FUSE_BITS);
- *group = dss / dss_per_grp;
- *instance = dss % dss_per_grp;
+ *group = dss / gt->steering_dss_per_grp;
+ *instance = dss % gt->steering_dss_per_grp;
}
static void init_steering_dss(struct xe_gt *gt)
{
+ gt->steering_dss_per_grp = dss_per_group(gt);
+
xe_gt_mcr_get_dss_steering(gt,
min(xe_dss_mask_group_ffs(gt->fuse_topo.g_dss_mask, 0, 0),
xe_dss_mask_group_ffs(gt->fuse_topo.c_dss_mask, 0, 0)),
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index b2a7fa55bd18..0be4687bfc20 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -287,7 +287,7 @@ static bool get_pagefault(struct pf_queue *pf_queue, struct pagefault *pf)
PFD_VIRTUAL_ADDR_LO_SHIFT;
pf_queue->tail = (pf_queue->tail + PF_MSG_LEN_DW) %
- PF_QUEUE_NUM_DW;
+ pf_queue->num_dw;
ret = true;
}
spin_unlock_irq(&pf_queue->lock);
@@ -299,7 +299,8 @@ static bool pf_queue_full(struct pf_queue *pf_queue)
{
lockdep_assert_held(&pf_queue->lock);
- return CIRC_SPACE(pf_queue->head, pf_queue->tail, PF_QUEUE_NUM_DW) <=
+ return CIRC_SPACE(pf_queue->head, pf_queue->tail,
+ pf_queue->num_dw) <=
PF_MSG_LEN_DW;
}
@@ -312,22 +313,23 @@ int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len)
u32 asid;
bool full;
- /*
- * The below logic doesn't work unless PF_QUEUE_NUM_DW % PF_MSG_LEN_DW == 0
- */
- BUILD_BUG_ON(PF_QUEUE_NUM_DW % PF_MSG_LEN_DW);
-
if (unlikely(len != PF_MSG_LEN_DW))
return -EPROTO;
asid = FIELD_GET(PFD_ASID, msg[1]);
pf_queue = gt->usm.pf_queue + (asid % NUM_PF_QUEUE);
+ /*
+ * The below logic doesn't work unless PF_QUEUE_NUM_DW % PF_MSG_LEN_DW == 0
+ */
+ xe_gt_assert(gt, !(pf_queue->num_dw % PF_MSG_LEN_DW));
+
spin_lock_irqsave(&pf_queue->lock, flags);
full = pf_queue_full(pf_queue);
if (!full) {
memcpy(pf_queue->data + pf_queue->head, msg, len * sizeof(u32));
- pf_queue->head = (pf_queue->head + len) % PF_QUEUE_NUM_DW;
+ pf_queue->head = (pf_queue->head + len) %
+ pf_queue->num_dw;
queue_work(gt->usm.pf_wq, &pf_queue->worker);
} else {
drm_warn(&xe->drm, "PF Queue full, shouldn't be possible");
@@ -386,26 +388,57 @@ static void pagefault_fini(void *arg)
{
struct xe_gt *gt = arg;
struct xe_device *xe = gt_to_xe(gt);
+ int i;
if (!xe->info.has_usm)
return;
destroy_workqueue(gt->usm.acc_wq);
destroy_workqueue(gt->usm.pf_wq);
+
+ for (i = 0; i < NUM_PF_QUEUE; ++i)
+ kfree(gt->usm.pf_queue[i].data);
+}
+
+static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue *pf_queue)
+{
+ xe_dss_mask_t all_dss;
+ int num_dss, num_eus;
+
+ bitmap_or(all_dss, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask,
+ XE_MAX_DSS_FUSE_BITS);
+
+ num_dss = bitmap_weight(all_dss, XE_MAX_DSS_FUSE_BITS);
+ num_eus = bitmap_weight(gt->fuse_topo.eu_mask_per_dss,
+ XE_MAX_EU_FUSE_BITS) * num_dss;
+
+ /* user can issue separate page faults per EU and per CS */
+ pf_queue->num_dw =
+ (num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW;
+
+ pf_queue->gt = gt;
+ pf_queue->data = kcalloc(pf_queue->num_dw, sizeof(u32), GFP_KERNEL);
+ if (!pf_queue->data)
+ return -ENOMEM;
+
+ spin_lock_init(&pf_queue->lock);
+ INIT_WORK(&pf_queue->worker, pf_queue_work_func);
+
+ return 0;
}
int xe_gt_pagefault_init(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
- int i;
+ int i, ret = 0;
if (!xe->info.has_usm)
return 0;
for (i = 0; i < NUM_PF_QUEUE; ++i) {
- gt->usm.pf_queue[i].gt = gt;
- spin_lock_init(&gt->usm.pf_queue[i].lock);
- INIT_WORK(&gt->usm.pf_queue[i].worker, pf_queue_work_func);
+ ret = xe_alloc_pf_queue(gt, &gt->usm.pf_queue[i]);
+ if (ret)
+ return ret;
}
for (i = 0; i < NUM_ACC_QUEUE; ++i) {
gt->usm.acc_queue[i].gt = gt;
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
index 227527785afd..41ed07b153b5 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
@@ -232,14 +232,14 @@ static u32 encode_config_ggtt(u32 *cfg, const struct xe_gt_sriov_config *config)
{
u32 n = 0;
- if (drm_mm_node_allocated(&config->ggtt_region)) {
+ if (xe_ggtt_node_allocated(config->ggtt_region)) {
cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_START);
- cfg[n++] = lower_32_bits(config->ggtt_region.start);
- cfg[n++] = upper_32_bits(config->ggtt_region.start);
+ cfg[n++] = lower_32_bits(config->ggtt_region->base.start);
+ cfg[n++] = upper_32_bits(config->ggtt_region->base.start);
cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_SIZE);
- cfg[n++] = lower_32_bits(config->ggtt_region.size);
- cfg[n++] = upper_32_bits(config->ggtt_region.size);
+ cfg[n++] = lower_32_bits(config->ggtt_region->base.size);
+ cfg[n++] = upper_32_bits(config->ggtt_region->base.size);
}
return n;
@@ -369,29 +369,28 @@ static int pf_distribute_config_ggtt(struct xe_tile *tile, unsigned int vfid, u6
return err ?: err2;
}
-static void pf_release_ggtt(struct xe_tile *tile, struct drm_mm_node *node)
+static void pf_release_ggtt(struct xe_tile *tile, struct xe_ggtt_node *node)
{
- struct xe_ggtt *ggtt = tile->mem.ggtt;
-
- if (drm_mm_node_allocated(node)) {
+ if (xe_ggtt_node_allocated(node)) {
/*
* explicit GGTT PTE assignment to the PF using xe_ggtt_assign()
* is redundant, as PTE will be implicitly re-assigned to PF by
* the xe_ggtt_clear() called by below xe_ggtt_remove_node().
*/
- xe_ggtt_remove_node(ggtt, node, false);
+ xe_ggtt_node_remove(node, false);
}
}
static void pf_release_vf_config_ggtt(struct xe_gt *gt, struct xe_gt_sriov_config *config)
{
- pf_release_ggtt(gt_to_tile(gt), &config->ggtt_region);
+ pf_release_ggtt(gt_to_tile(gt), config->ggtt_region);
+ config->ggtt_region = NULL;
}
static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size)
{
struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
- struct drm_mm_node *node = &config->ggtt_region;
+ struct xe_ggtt_node *node = config->ggtt_region;
struct xe_tile *tile = gt_to_tile(gt);
struct xe_ggtt *ggtt = tile->mem.ggtt;
u64 alignment = pf_get_ggtt_alignment(gt);
@@ -403,40 +402,48 @@ static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size)
size = round_up(size, alignment);
- if (drm_mm_node_allocated(node)) {
+ if (xe_ggtt_node_allocated(node)) {
err = pf_distribute_config_ggtt(tile, vfid, 0, 0);
if (unlikely(err))
return err;
pf_release_ggtt(tile, node);
}
- xe_gt_assert(gt, !drm_mm_node_allocated(node));
+ xe_gt_assert(gt, !xe_ggtt_node_allocated(node));
if (!size)
return 0;
- err = xe_ggtt_insert_special_node(ggtt, node, size, alignment);
+ node = xe_ggtt_node_init(ggtt);
+ if (IS_ERR(node))
+ return PTR_ERR(node);
+
+ err = xe_ggtt_node_insert(node, size, alignment);
if (unlikely(err))
- return err;
+ goto err;
- xe_ggtt_assign(ggtt, node, vfid);
+ xe_ggtt_assign(node, vfid);
xe_gt_sriov_dbg_verbose(gt, "VF%u assigned GGTT %llx-%llx\n",
- vfid, node->start, node->start + node->size - 1);
+ vfid, node->base.start, node->base.start + node->base.size - 1);
- err = pf_distribute_config_ggtt(gt->tile, vfid, node->start, node->size);
+ err = pf_distribute_config_ggtt(gt->tile, vfid, node->base.start, node->base.size);
if (unlikely(err))
- return err;
+ goto err;
+ config->ggtt_region = node;
return 0;
+err:
+ xe_ggtt_node_fini(node);
+ return err;
}
static u64 pf_get_vf_config_ggtt(struct xe_gt *gt, unsigned int vfid)
{
struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
- struct drm_mm_node *node = &config->ggtt_region;
+ struct xe_ggtt_node *node = config->ggtt_region;
xe_gt_assert(gt, !xe_gt_is_media_type(gt));
- return drm_mm_node_allocated(node) ? node->size : 0;
+ return xe_ggtt_node_allocated(node) ? node->base.size : 0;
}
/**
@@ -587,30 +594,11 @@ int xe_gt_sriov_pf_config_bulk_set_ggtt(struct xe_gt *gt, unsigned int vfid,
static u64 pf_get_max_ggtt(struct xe_gt *gt)
{
struct xe_ggtt *ggtt = gt_to_tile(gt)->mem.ggtt;
- const struct drm_mm *mm = &ggtt->mm;
- const struct drm_mm_node *entry;
u64 alignment = pf_get_ggtt_alignment(gt);
u64 spare = pf_get_spare_ggtt(gt);
- u64 hole_min_start = xe_wopcm_size(gt_to_xe(gt));
- u64 hole_start, hole_end, hole_size;
- u64 max_hole = 0;
+ u64 max_hole;
- mutex_lock(&ggtt->lock);
-
- drm_mm_for_each_hole(entry, mm, hole_start, hole_end) {
- hole_start = max(hole_start, hole_min_start);
- hole_start = ALIGN(hole_start, alignment);
- hole_end = ALIGN_DOWN(hole_end, alignment);
- if (hole_start >= hole_end)
- continue;
- hole_size = hole_end - hole_start;
- xe_gt_sriov_dbg_verbose(gt, "HOLE start %llx size %lluK\n",
- hole_start, hole_size / SZ_1K);
- spare -= min3(spare, hole_size, max_hole);
- max_hole = max(max_hole, hole_size);
- }
-
- mutex_unlock(&ggtt->lock);
+ max_hole = xe_ggtt_largest_hole(ggtt, alignment, &spare);
xe_gt_sriov_dbg_verbose(gt, "HOLE max %lluK reserved %lluK\n",
max_hole / SZ_1K, spare / SZ_1K);
@@ -2025,13 +2013,15 @@ int xe_gt_sriov_pf_config_print_ggtt(struct xe_gt *gt, struct drm_printer *p)
for (n = 1; n <= total_vfs; n++) {
config = &gt->sriov.pf.vfs[n].config;
- if (!drm_mm_node_allocated(&config->ggtt_region))
+ if (!xe_ggtt_node_allocated(config->ggtt_region))
continue;
- string_get_size(config->ggtt_region.size, 1, STRING_UNITS_2, buf, sizeof(buf));
+ string_get_size(config->ggtt_region->base.size, 1, STRING_UNITS_2,
+ buf, sizeof(buf));
drm_printf(p, "VF%u:\t%#0llx-%#llx\t(%s)\n",
- n, config->ggtt_region.start,
- config->ggtt_region.start + config->ggtt_region.size - 1, buf);
+ n, config->ggtt_region->base.start,
+ config->ggtt_region->base.start + config->ggtt_region->base.size - 1,
+ buf);
}
return 0;
@@ -2119,12 +2109,8 @@ int xe_gt_sriov_pf_config_print_dbs(struct xe_gt *gt, struct drm_printer *p)
int xe_gt_sriov_pf_config_print_available_ggtt(struct xe_gt *gt, struct drm_printer *p)
{
struct xe_ggtt *ggtt = gt_to_tile(gt)->mem.ggtt;
- const struct drm_mm *mm = &ggtt->mm;
- const struct drm_mm_node *entry;
u64 alignment = pf_get_ggtt_alignment(gt);
- u64 hole_min_start = xe_wopcm_size(gt_to_xe(gt));
- u64 hole_start, hole_end, hole_size;
- u64 spare, avail, total = 0;
+ u64 spare, avail, total;
char buf[10];
xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
@@ -2132,24 +2118,8 @@ int xe_gt_sriov_pf_config_print_available_ggtt(struct xe_gt *gt, struct drm_prin
mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
spare = pf_get_spare_ggtt(gt);
+ total = xe_ggtt_print_holes(ggtt, alignment, p);
- mutex_lock(&ggtt->lock);
-
- drm_mm_for_each_hole(entry, mm, hole_start, hole_end) {
- hole_start = max(hole_start, hole_min_start);
- hole_start = ALIGN(hole_start, alignment);
- hole_end = ALIGN_DOWN(hole_end, alignment);
- if (hole_start >= hole_end)
- continue;
- hole_size = hole_end - hole_start;
- total += hole_size;
-
- string_get_size(hole_size, 1, STRING_UNITS_2, buf, sizeof(buf));
- drm_printf(p, "range:\t%#llx-%#llx\t(%s)\n",
- hole_start, hole_end - 1, buf);
- }
-
- mutex_unlock(&ggtt->lock);
mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
string_get_size(total, 1, STRING_UNITS_2, buf, sizeof(buf));
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h
index 7bc66656fcc7..2d3b73d78f14 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h
@@ -6,8 +6,7 @@
#ifndef _XE_GT_SRIOV_PF_CONFIG_TYPES_H_
#define _XE_GT_SRIOV_PF_CONFIG_TYPES_H_
-#include <drm/drm_mm.h>
-
+#include "xe_ggtt_types.h"
#include "xe_guc_klv_thresholds_set_types.h"
struct xe_bo;
@@ -19,7 +18,7 @@ struct xe_bo;
*/
struct xe_gt_sriov_config {
/** @ggtt_region: GGTT region assigned to the VF. */
- struct drm_mm_node ggtt_region;
+ struct xe_ggtt_node *ggtt_region;
/** @lmem_obj: LMEM allocation for use by the VF. */
struct xe_bo *lmem_obj;
/** @num_ctxs: number of GuC contexts IDs. */
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
index 47222bd9988d..4ebc82e607af 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
@@ -495,6 +495,25 @@ u64 xe_gt_sriov_vf_lmem(struct xe_gt *gt)
return gt->sriov.vf.self_config.lmem_size;
}
+static struct xe_ggtt_node *
+vf_balloon_ggtt_node(struct xe_ggtt *ggtt, u64 start, u64 end)
+{
+ struct xe_ggtt_node *node;
+ int err;
+
+ node = xe_ggtt_node_init(ggtt);
+ if (IS_ERR(node))
+ return node;
+
+ err = xe_ggtt_node_insert_balloon(node, start, end);
+ if (err) {
+ xe_ggtt_node_fini(node);
+ return ERR_PTR(err);
+ }
+
+ return node;
+}
+
static int vf_balloon_ggtt(struct xe_gt *gt)
{
struct xe_gt_sriov_vf_selfconfig *config = &gt->sriov.vf.self_config;
@@ -502,7 +521,6 @@ static int vf_balloon_ggtt(struct xe_gt *gt)
struct xe_ggtt *ggtt = tile->mem.ggtt;
struct xe_device *xe = gt_to_xe(gt);
u64 start, end;
- int err;
xe_gt_assert(gt, IS_SRIOV_VF(xe));
xe_gt_assert(gt, !xe_gt_is_media_type(gt));
@@ -528,35 +546,31 @@ static int vf_balloon_ggtt(struct xe_gt *gt)
start = xe_wopcm_size(xe);
end = config->ggtt_base;
if (end != start) {
- err = xe_ggtt_balloon(ggtt, start, end, &tile->sriov.vf.ggtt_balloon[0]);
- if (err)
- goto failed;
+ tile->sriov.vf.ggtt_balloon[0] = vf_balloon_ggtt_node(ggtt, start, end);
+ if (IS_ERR(tile->sriov.vf.ggtt_balloon[0]))
+ return PTR_ERR(tile->sriov.vf.ggtt_balloon[0]);
}
start = config->ggtt_base + config->ggtt_size;
end = GUC_GGTT_TOP;
if (end != start) {
- err = xe_ggtt_balloon(ggtt, start, end, &tile->sriov.vf.ggtt_balloon[1]);
- if (err)
- goto deballoon;
+ tile->sriov.vf.ggtt_balloon[1] = vf_balloon_ggtt_node(ggtt, start, end);
+ if (IS_ERR(tile->sriov.vf.ggtt_balloon[1])) {
+ xe_ggtt_node_remove_balloon(tile->sriov.vf.ggtt_balloon[0]);
+ return PTR_ERR(tile->sriov.vf.ggtt_balloon[1]);
+ }
}
return 0;
-
-deballoon:
- xe_ggtt_deballoon(ggtt, &tile->sriov.vf.ggtt_balloon[0]);
-failed:
- return err;
}
static void deballoon_ggtt(struct drm_device *drm, void *arg)
{
struct xe_tile *tile = arg;
- struct xe_ggtt *ggtt = tile->mem.ggtt;
xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile)));
- xe_ggtt_deballoon(ggtt, &tile->sriov.vf.ggtt_balloon[1]);
- xe_ggtt_deballoon(ggtt, &tile->sriov.vf.ggtt_balloon[0]);
+ xe_ggtt_node_remove_balloon(tile->sriov.vf.ggtt_balloon[1]);
+ xe_ggtt_node_remove_balloon(tile->sriov.vf.ggtt_balloon[0]);
}
/**
diff --git a/drivers/gpu/drm/xe/xe_gt_stats.c b/drivers/gpu/drm/xe/xe_gt_stats.c
new file mode 100644
index 000000000000..c7364a5aef8f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_stats.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#include <linux/atomic.h>
+
+#include <drm/drm_print.h>
+
+#include "xe_gt.h"
+#include "xe_gt_stats.h"
+
+/**
+ * xe_gt_stats_incr - Increments the specified stats counter
+ * @gt: graphics tile
+ * @id: xe_gt_stats_id type id that needs to be incremented
+ * @incr: value to be incremented with
+ *
+ * Increments the specified stats counter.
+ */
+void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr)
+{
+ if (id >= __XE_GT_STATS_NUM_IDS)
+ return;
+
+ atomic_add(incr, &gt->stats.counters[id]);
+}
+
+static const char *const stat_description[__XE_GT_STATS_NUM_IDS] = {
+ "tlb_inval_count",
+};
+
+/**
+ * xe_gt_stats_print_info - Print the GT stats
+ * @gt: graphics tile
+ * @p: drm_printer where it will be printed out.
+ *
+ * This prints out all the available GT stats.
+ */
+int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p)
+{
+ enum xe_gt_stats_id id;
+
+ for (id = 0; id < __XE_GT_STATS_NUM_IDS; ++id)
+ drm_printf(p, "%s: %d\n", stat_description[id],
+ atomic_read(&gt->stats.counters[id]));
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_stats.h b/drivers/gpu/drm/xe/xe_gt_stats.h
new file mode 100644
index 000000000000..91d944f6c4e4
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_stats.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef _XE_GT_STATS_H_
+#define _XE_GT_STATS_H_
+
+struct xe_gt;
+struct drm_printer;
+
+enum xe_gt_stats_id {
+ XE_GT_STATS_ID_TLB_INVAL,
+ /* must be the last entry */
+ __XE_GT_STATS_NUM_IDS,
+};
+
+#ifdef CONFIG_DEBUG_FS
+int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p);
+void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr);
+#else
+static inline void
+xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id,
+ int incr)
+{
+}
+
+#endif
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index 87cb76a8718c..cca9cf536f76 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -12,6 +12,7 @@
#include "xe_gt_printk.h"
#include "xe_guc.h"
#include "xe_guc_ct.h"
+#include "xe_gt_stats.h"
#include "xe_mmio.h"
#include "xe_pm.h"
#include "xe_sriov.h"
@@ -213,6 +214,7 @@ static int send_tlb_invalidation(struct xe_guc *guc,
gt->tlb_invalidation.seqno = 1;
}
mutex_unlock(&guc->ct.lock);
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_TLB_INVAL, 1);
return ret;
}
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index 631928258d71..31946d7fe701 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -10,6 +10,7 @@
#include "xe_gt_idle_types.h"
#include "xe_gt_sriov_pf_types.h"
#include "xe_gt_sriov_vf_types.h"
+#include "xe_gt_stats.h"
#include "xe_hw_engine_types.h"
#include "xe_hw_fence_types.h"
#include "xe_oa.h"
@@ -133,6 +134,14 @@ struct xe_gt {
u8 has_indirect_ring_state:1;
} info;
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+ /** @stats: GT stats */
+ struct {
+ /** @stats.counters: counters for various GT stats */
+ atomic_t counters[__XE_GT_STATS_NUM_IDS];
+ } stats;
+#endif
+
/**
* @mmio: mmio info for GT. All GTs within a tile share the same
* register space, but have their own copy of GSI registers at a
@@ -238,9 +247,14 @@ struct xe_gt {
struct pf_queue {
/** @usm.pf_queue.gt: back pointer to GT */
struct xe_gt *gt;
-#define PF_QUEUE_NUM_DW 128
/** @usm.pf_queue.data: data in the page fault queue */
- u32 data[PF_QUEUE_NUM_DW];
+ u32 *data;
+ /**
+ * @usm.pf_queue.num_dw: number of DWORDS in the page
+ * fault queue. Dynamically calculated based on the number
+ * of compute resources available.
+ */
+ u32 num_dw;
/**
* @usm.pf_queue.tail: tail pointer in DWs for page fault queue,
* moved by worker which processes faults (consumer).
@@ -368,6 +382,12 @@ struct xe_gt {
} steering[NUM_STEERING_TYPES];
/**
+ * @steering_dss_per_grp: number of DSS per steering group (gslice,
+ * cslice, etc.).
+ */
+ unsigned int steering_dss_per_grp;
+
+ /**
* @mcr_lock: protects the MCR_SELECTOR register for the duration
* of a steered operation
*/
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index de0fe9e65746..52df28032a6f 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -350,6 +350,8 @@ int xe_guc_init(struct xe_guc *guc)
if (ret)
goto out;
+ xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE);
+
ret = devm_add_action_or_reset(xe->drm.dev, guc_fini_hw, guc);
if (ret)
goto out;
@@ -358,8 +360,6 @@ int xe_guc_init(struct xe_guc *guc)
xe_guc_comm_init_early(guc);
- xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE);
-
return 0;
out:
diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h
index e0bbf98f849d..c3e6b51f7a09 100644
--- a/drivers/gpu/drm/xe/xe_guc.h
+++ b/drivers/gpu/drm/xe/xe_guc.h
@@ -11,6 +11,16 @@
#include "xe_hw_engine_types.h"
#include "xe_macros.h"
+/*
+ * GuC version number components are defined to be only 8-bit size,
+ * so converting to a 32bit 8.8.8 integer allows simple (and safe)
+ * numerical comparisons.
+ */
+#define MAKE_GUC_VER(maj, min, pat) (((maj) << 16) | ((min) << 8) | (pat))
+#define MAKE_GUC_VER_STRUCT(ver) MAKE_GUC_VER((ver).major, (ver).minor, (ver).patch)
+#define GUC_SUBMIT_VER(guc) MAKE_VER_STRUCT((guc)->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY])
+#define GUC_FIRMWARE_VER(guc) MAKE_VER_STRUCT((guc)->fw.versions.found[XE_UC_FW_VER_RELEASE])
+
struct drm_printer;
void xe_guc_comm_init_early(struct xe_guc *guc);
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index 1c60b685dbc6..d1902a8581ca 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -24,6 +24,7 @@
#include "xe_map.h"
#include "xe_mmio.h"
#include "xe_platform_types.h"
+#include "xe_uc_fw.h"
#include "xe_wa.h"
/* Slack of a few additional entries per engine */
@@ -367,6 +368,11 @@ static void guc_waklv_init(struct xe_guc_ads *ads)
0xC40,
&offset, &remain);
+ if (XE_WA(gt, 14022293748) || XE_WA(gt, 22019794406))
+ guc_waklv_enable_simple(ads,
+ GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET,
+ &offset, &remain);
+
size = guc_ads_waklv_size(ads) - remain;
if (!size)
return;
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index beeeb120d1fc..f24dd5223926 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -105,12 +105,20 @@ ct_to_xe(struct xe_guc_ct *ct)
* enough space to avoid backpressure on the driver. We increase the size
* of the receive buffer (relative to the send) to ensure a G2H response
* CTB has a landing spot.
+ *
+ * In addition to submissions, the G2H buffer needs to be able to hold
+ * enough space for recoverable page fault notifications. The number of
+ * page faults is interrupt driven and can be as much as the number of
+ * compute resources available. However, most of the actual work for these
+ * is in a separate page fault worker thread. Therefore we only need to
+ * make sure the queue has enough space to handle all of the submissions
+ * and responses and an extra buffer for incoming page faults.
*/
#define CTB_DESC_SIZE ALIGN(sizeof(struct guc_ct_buffer_desc), SZ_2K)
#define CTB_H2G_BUFFER_SIZE (SZ_4K)
-#define CTB_G2H_BUFFER_SIZE (4 * CTB_H2G_BUFFER_SIZE)
-#define G2H_ROOM_BUFFER_SIZE (CTB_G2H_BUFFER_SIZE / 4)
+#define CTB_G2H_BUFFER_SIZE (SZ_128K)
+#define G2H_ROOM_BUFFER_SIZE (CTB_G2H_BUFFER_SIZE / 2)
/**
* xe_guc_ct_queue_proc_time_jiffies - Return maximum time to process a full
diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.c b/drivers/gpu/drm/xe/xe_guc_hwconfig.c
index d9b570a154a2..af2c817d552c 100644
--- a/drivers/gpu/drm/xe/xe_guc_hwconfig.c
+++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.c
@@ -6,6 +6,7 @@
#include "xe_guc_hwconfig.h"
#include <drm/drm_managed.h>
+#include <drm/drm_print.h>
#include "abi/guc_actions_abi.h"
#include "xe_bo.h"
@@ -103,3 +104,99 @@ void xe_guc_hwconfig_copy(struct xe_guc *guc, void *dst)
xe_map_memcpy_from(xe, dst, &guc->hwconfig.bo->vmap, 0,
guc->hwconfig.size);
}
+
+void xe_guc_hwconfig_dump(struct xe_guc *guc, struct drm_printer *p)
+{
+ size_t size = xe_guc_hwconfig_size(guc);
+ u32 *hwconfig;
+ u64 num_dw;
+ u32 extra_bytes;
+ int i = 0;
+
+ if (size == 0) {
+ drm_printf(p, "No hwconfig available\n");
+ return;
+ }
+
+ num_dw = div_u64_rem(size, sizeof(u32), &extra_bytes);
+
+ hwconfig = kzalloc(size, GFP_KERNEL);
+ if (!hwconfig) {
+ drm_printf(p, "Error: could not allocate hwconfig memory\n");
+ return;
+ }
+
+ xe_guc_hwconfig_copy(guc, hwconfig);
+
+ /* An entry requires at least three dwords for key, length, value */
+ while (i + 3 <= num_dw) {
+ u32 attribute = hwconfig[i++];
+ u32 len_dw = hwconfig[i++];
+
+ if (i + len_dw > num_dw) {
+ drm_printf(p, "Error: Attribute %u is %u dwords, but only %llu remain\n",
+ attribute, len_dw, num_dw - i);
+ len_dw = num_dw - i;
+ }
+
+ /*
+ * If it's a single dword (as most hwconfig attributes are),
+ * then it's probably a number that makes sense to display
+ * in decimal form. In the rare cases where it's more than
+ * one dword, just print it in hex form and let the user
+ * figure out how to interpret it.
+ */
+ if (len_dw == 1)
+ drm_printf(p, "[%2u] = %u\n", attribute, hwconfig[i]);
+ else
+ drm_printf(p, "[%2u] = { %*ph }\n", attribute,
+ (int)(len_dw * sizeof(u32)), &hwconfig[i]);
+ i += len_dw;
+ }
+
+ if (i < num_dw || extra_bytes)
+ drm_printf(p, "Error: %llu extra bytes at end of hwconfig\n",
+ (num_dw - i) * sizeof(u32) + extra_bytes);
+
+ kfree(hwconfig);
+}
+
+/*
+ * Lookup a specific 32-bit attribute value in the GuC's hwconfig table.
+ */
+int xe_guc_hwconfig_lookup_u32(struct xe_guc *guc, u32 attribute, u32 *val)
+{
+ size_t size = xe_guc_hwconfig_size(guc);
+ u64 num_dw = div_u64(size, sizeof(u32));
+ u32 *hwconfig;
+ bool found = false;
+ int i = 0;
+
+ if (num_dw == 0)
+ return -EINVAL;
+
+ hwconfig = kzalloc(size, GFP_KERNEL);
+ if (!hwconfig)
+ return -ENOMEM;
+
+ xe_guc_hwconfig_copy(guc, hwconfig);
+
+ /* An entry requires at least three dwords for key, length, value */
+ while (i + 3 <= num_dw) {
+ u32 key = hwconfig[i++];
+ u32 len_dw = hwconfig[i++];
+
+ if (key != attribute) {
+ i += len_dw;
+ continue;
+ }
+
+ *val = hwconfig[i];
+ found = true;
+ break;
+ }
+
+ kfree(hwconfig);
+
+ return found ? 0 : -ENOENT;
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.h b/drivers/gpu/drm/xe/xe_guc_hwconfig.h
index b5794d641900..ab4e5038236e 100644
--- a/drivers/gpu/drm/xe/xe_guc_hwconfig.h
+++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.h
@@ -8,10 +8,13 @@
#include <linux/types.h>
+struct drm_printer;
struct xe_guc;
int xe_guc_hwconfig_init(struct xe_guc *guc);
u32 xe_guc_hwconfig_size(struct xe_guc *guc);
void xe_guc_hwconfig_copy(struct xe_guc *guc, void *dst);
+void xe_guc_hwconfig_dump(struct xe_guc *guc, struct drm_printer *p);
+int xe_guc_hwconfig_lookup_u32(struct xe_guc *guc, u32 attribute, u32 *val);
#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 32e93a8127d4..def503abeed5 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -1042,7 +1042,7 @@ static void xe_guc_pc_fini_hw(void *arg)
return;
XE_WARN_ON(xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL));
- XE_WARN_ON(xe_guc_pc_gucrc_disable(pc));
+ xe_guc_pc_gucrc_disable(pc);
XE_WARN_ON(xe_guc_pc_stop(pc));
/* Bind requested freq to mert_freq_cap before unload */
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 2adf551500cb..fbbe6a487bbb 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -1374,9 +1374,11 @@ static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg)
struct xe_exec_queue *q = msg->private_data;
if (guc_exec_queue_allowed_to_change_state(q)) {
- q->guc->resume_time = RESUME_PENDING;
clear_exec_queue_suspended(q);
- enable_scheduling(q);
+ if (!exec_queue_enabled(q)) {
+ q->guc->resume_time = RESUME_PENDING;
+ enable_scheduling(q);
+ }
} else {
clear_exec_queue_suspended(q);
}
@@ -1386,6 +1388,8 @@ static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg)
#define SET_SCHED_PROPS 2
#define SUSPEND 3
#define RESUME 4
+#define OPCODE_MASK 0xf
+#define MSG_LOCKED BIT(8)
static void guc_exec_queue_process_msg(struct xe_sched_msg *msg)
{
@@ -1430,7 +1434,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
struct xe_device *xe = guc_to_xe(guc);
struct xe_guc_exec_queue *ge;
long timeout;
- int err;
+ int err, i;
xe_assert(xe, xe_device_uc_enabled(guc_to_xe(guc)));
@@ -1442,6 +1446,9 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
ge->q = q;
init_waitqueue_head(&ge->suspend_wait);
+ for (i = 0; i < MAX_STATIC_MSG_TYPE; ++i)
+ INIT_LIST_HEAD(&ge->static_msgs[i].link);
+
timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT :
msecs_to_jiffies(q->sched_props.job_timeout_ms);
err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops,
@@ -1504,11 +1511,26 @@ static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg
xe_pm_runtime_get_noresume(guc_to_xe(exec_queue_to_guc(q)));
INIT_LIST_HEAD(&msg->link);
- msg->opcode = opcode;
+ msg->opcode = opcode & OPCODE_MASK;
msg->private_data = q;
trace_xe_sched_msg_add(msg);
- xe_sched_add_msg(&q->guc->sched, msg);
+ if (opcode & MSG_LOCKED)
+ xe_sched_add_msg_locked(&q->guc->sched, msg);
+ else
+ xe_sched_add_msg(&q->guc->sched, msg);
+}
+
+static bool guc_exec_queue_try_add_msg(struct xe_exec_queue *q,
+ struct xe_sched_msg *msg,
+ u32 opcode)
+{
+ if (!list_empty(&msg->link))
+ return false;
+
+ guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED);
+
+ return true;
}
#define STATIC_MSG_CLEANUP 0
@@ -1582,13 +1604,16 @@ static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q,
static int guc_exec_queue_suspend(struct xe_exec_queue *q)
{
+ struct xe_gpu_scheduler *sched = &q->guc->sched;
struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND;
- if (exec_queue_killed_or_banned_or_wedged(q) || q->guc->suspend_pending)
+ if (exec_queue_killed_or_banned_or_wedged(q))
return -EINVAL;
- q->guc->suspend_pending = true;
- guc_exec_queue_add_msg(q, msg, SUSPEND);
+ xe_sched_msg_lock(sched);
+ if (guc_exec_queue_try_add_msg(q, msg, SUSPEND))
+ q->guc->suspend_pending = true;
+ xe_sched_msg_unlock(sched);
return 0;
}
@@ -1603,11 +1628,11 @@ static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q)
* suspend_pending upon kill but to be paranoid but races in which
* suspend_pending is set after kill also check kill here.
*/
- ret = wait_event_timeout(q->guc->suspend_wait,
- !READ_ONCE(q->guc->suspend_pending) ||
- exec_queue_killed(q) ||
- guc_read_stopped(guc),
- HZ * 5);
+ ret = wait_event_interruptible_timeout(q->guc->suspend_wait,
+ !READ_ONCE(q->guc->suspend_pending) ||
+ exec_queue_killed(q) ||
+ guc_read_stopped(guc),
+ HZ * 5);
if (!ret) {
xe_gt_warn(guc_to_gt(guc),
@@ -1617,18 +1642,21 @@ static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q)
return -ETIME;
}
- return 0;
+ return ret < 0 ? ret : 0;
}
static void guc_exec_queue_resume(struct xe_exec_queue *q)
{
+ struct xe_gpu_scheduler *sched = &q->guc->sched;
struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME;
struct xe_guc *guc = exec_queue_to_guc(q);
struct xe_device *xe = guc_to_xe(guc);
xe_assert(xe, !q->guc->suspend_pending);
- guc_exec_queue_add_msg(q, msg, RESUME);
+ xe_sched_msg_lock(sched);
+ guc_exec_queue_try_add_msg(q, msg, RESUME);
+ xe_sched_msg_unlock(sched);
}
static bool guc_exec_queue_reset_status(struct xe_exec_queue *q)
diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c
index bec4366e5513..f5459f97af23 100644
--- a/drivers/gpu/drm/xe/xe_huc.c
+++ b/drivers/gpu/drm/xe/xe_huc.c
@@ -43,14 +43,6 @@ huc_to_guc(struct xe_huc *huc)
return &container_of(huc, struct xe_uc, huc)->guc;
}
-static void free_gsc_pkt(struct drm_device *drm, void *arg)
-{
- struct xe_huc *huc = arg;
-
- xe_bo_unpin_map_no_vm(huc->gsc_pkt);
- huc->gsc_pkt = NULL;
-}
-
#define PXP43_HUC_AUTH_INOUT_SIZE SZ_4K
static int huc_alloc_gsc_pkt(struct xe_huc *huc)
{
@@ -59,17 +51,16 @@ static int huc_alloc_gsc_pkt(struct xe_huc *huc)
struct xe_bo *bo;
/* we use a single object for both input and output */
- bo = xe_bo_create_pin_map(xe, gt_to_tile(gt), NULL,
- PXP43_HUC_AUTH_INOUT_SIZE * 2,
- ttm_bo_type_kernel,
- XE_BO_FLAG_SYSTEM |
- XE_BO_FLAG_GGTT);
+ bo = xe_managed_bo_create_pin_map(xe, gt_to_tile(gt),
+ PXP43_HUC_AUTH_INOUT_SIZE * 2,
+ XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_GGTT);
if (IS_ERR(bo))
return PTR_ERR(bo);
huc->gsc_pkt = bo;
- return drmm_add_action_or_reset(&xe->drm, free_gsc_pkt, huc);
+ return 0;
}
int xe_huc_init(struct xe_huc *huc)
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index 07ed9fd28f19..18980238a2ea 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -5,7 +5,10 @@
#include "xe_hw_engine.h"
+#include <linux/nospec.h>
+
#include <drm/drm_managed.h>
+#include <drm/xe_drm.h>
#include "regs/xe_engine_regs.h"
#include "regs/xe_gt_regs.h"
@@ -20,6 +23,7 @@
#include "xe_gt_printk.h"
#include "xe_gt_mcr.h"
#include "xe_gt_topology.h"
+#include "xe_hw_engine_group.h"
#include "xe_hw_fence.h"
#include "xe_irq.h"
#include "xe_lrc.h"
@@ -263,7 +267,7 @@ static const struct engine_info engine_infos[] = {
},
};
-static void hw_engine_fini(struct drm_device *drm, void *arg)
+static void hw_engine_fini(void *arg)
{
struct xe_hw_engine *hwe = arg;
@@ -274,8 +278,18 @@ static void hw_engine_fini(struct drm_device *drm, void *arg)
hwe->gt = NULL;
}
-static void hw_engine_mmio_write32(struct xe_hw_engine *hwe, struct xe_reg reg,
- u32 val)
+/**
+ * xe_hw_engine_mmio_write32() - Write engine register
+ * @hwe: engine
+ * @reg: register to write into
+ * @val: desired 32-bit value to write
+ *
+ * This function will write val into an engine specific register.
+ * Forcewake must be held by the caller.
+ *
+ */
+void xe_hw_engine_mmio_write32(struct xe_hw_engine *hwe,
+ struct xe_reg reg, u32 val)
{
xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base));
xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain);
@@ -285,7 +299,17 @@ static void hw_engine_mmio_write32(struct xe_hw_engine *hwe, struct xe_reg reg,
xe_mmio_write32(hwe->gt, reg, val);
}
-static u32 hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg)
+/**
+ * xe_hw_engine_mmio_read32() - Read engine register
+ * @hwe: engine
+ * @reg: register to read from
+ *
+ * This function will read from an engine specific register.
+ * Forcewake must be held by the caller.
+ *
+ * Return: value of the 32-bit register.
+ */
+u32 xe_hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg)
{
xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base));
xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain);
@@ -304,14 +328,14 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe)
xe_mmio_write32(hwe->gt, RCU_MODE,
_MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE));
- hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0);
- hw_engine_mmio_write32(hwe, RING_HWS_PGA(0),
- xe_bo_ggtt_addr(hwe->hwsp));
- hw_engine_mmio_write32(hwe, RING_MODE(0),
- _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE));
- hw_engine_mmio_write32(hwe, RING_MI_MODE(0),
- _MASKED_BIT_DISABLE(STOP_RING));
- hw_engine_mmio_read32(hwe, RING_MI_MODE(0));
+ xe_hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0);
+ xe_hw_engine_mmio_write32(hwe, RING_HWS_PGA(0),
+ xe_bo_ggtt_addr(hwe->hwsp));
+ xe_hw_engine_mmio_write32(hwe, RING_MODE(0),
+ _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE));
+ xe_hw_engine_mmio_write32(hwe, RING_MI_MODE(0),
+ _MASKED_BIT_DISABLE(STOP_RING));
+ xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0));
}
static bool xe_hw_engine_match_fixed_cslice_mode(const struct xe_gt *gt,
@@ -425,6 +449,12 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe)
0xA,
XE_RTP_ACTION_FLAG(ENGINE_BASE)))
},
+ /* Enable Priority Mem Read */
+ { XE_RTP_NAME("Priority_Mem_Read"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)),
+ XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), CS_PRIORITY_MEM_READ,
+ XE_RTP_ACTION_FLAG(ENGINE_BASE)))
+ },
{}
};
@@ -555,7 +585,7 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
if (xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY)
gt->usm.reserved_bcs_instance = hwe->instance;
- return drmm_add_action_or_reset(&xe->drm, hw_engine_fini, hwe);
+ return devm_add_action_or_reset(xe->drm.dev, hw_engine_fini, hwe);
err_kernel_lrc:
xe_lrc_put(hwe->kernel_lrc);
@@ -761,6 +791,9 @@ int xe_hw_engines_init(struct xe_gt *gt)
}
hw_engine_setup_logical_mapping(gt);
+ err = xe_hw_engine_setup_groups(gt);
+ if (err)
+ return err;
return 0;
}
@@ -791,7 +824,7 @@ xe_hw_engine_snapshot_instdone_capture(struct xe_hw_engine *hwe,
unsigned int dss;
u16 group, instance;
- snapshot->reg.instdone.ring = hw_engine_mmio_read32(hwe, RING_INSTDONE(0));
+ snapshot->reg.instdone.ring = xe_hw_engine_mmio_read32(hwe, RING_INSTDONE(0));
if (snapshot->hwe->class != XE_ENGINE_CLASS_RENDER)
return;
@@ -887,53 +920,53 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
return snapshot;
snapshot->reg.ring_execlist_status =
- hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0));
- val = hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0));
+ xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0));
+ val = xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0));
snapshot->reg.ring_execlist_status |= val << 32;
snapshot->reg.ring_execlist_sq_contents =
- hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_LO(0));
- val = hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_HI(0));
+ xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_LO(0));
+ val = xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_HI(0));
snapshot->reg.ring_execlist_sq_contents |= val << 32;
- snapshot->reg.ring_acthd = hw_engine_mmio_read32(hwe, RING_ACTHD(0));
- val = hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0));
+ snapshot->reg.ring_acthd = xe_hw_engine_mmio_read32(hwe, RING_ACTHD(0));
+ val = xe_hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0));
snapshot->reg.ring_acthd |= val << 32;
- snapshot->reg.ring_bbaddr = hw_engine_mmio_read32(hwe, RING_BBADDR(0));
- val = hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0));
+ snapshot->reg.ring_bbaddr = xe_hw_engine_mmio_read32(hwe, RING_BBADDR(0));
+ val = xe_hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0));
snapshot->reg.ring_bbaddr |= val << 32;
snapshot->reg.ring_dma_fadd =
- hw_engine_mmio_read32(hwe, RING_DMA_FADD(0));
- val = hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0));
+ xe_hw_engine_mmio_read32(hwe, RING_DMA_FADD(0));
+ val = xe_hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0));
snapshot->reg.ring_dma_fadd |= val << 32;
- snapshot->reg.ring_hwstam = hw_engine_mmio_read32(hwe, RING_HWSTAM(0));
- snapshot->reg.ring_hws_pga = hw_engine_mmio_read32(hwe, RING_HWS_PGA(0));
- snapshot->reg.ring_start = hw_engine_mmio_read32(hwe, RING_START(0));
+ snapshot->reg.ring_hwstam = xe_hw_engine_mmio_read32(hwe, RING_HWSTAM(0));
+ snapshot->reg.ring_hws_pga = xe_hw_engine_mmio_read32(hwe, RING_HWS_PGA(0));
+ snapshot->reg.ring_start = xe_hw_engine_mmio_read32(hwe, RING_START(0));
if (GRAPHICS_VERx100(hwe->gt->tile->xe) >= 2000) {
- val = hw_engine_mmio_read32(hwe, RING_START_UDW(0));
+ val = xe_hw_engine_mmio_read32(hwe, RING_START_UDW(0));
snapshot->reg.ring_start |= val << 32;
}
if (xe_gt_has_indirect_ring_state(hwe->gt)) {
snapshot->reg.indirect_ring_state =
- hw_engine_mmio_read32(hwe, INDIRECT_RING_STATE(0));
+ xe_hw_engine_mmio_read32(hwe, INDIRECT_RING_STATE(0));
}
snapshot->reg.ring_head =
- hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR;
+ xe_hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR;
snapshot->reg.ring_tail =
- hw_engine_mmio_read32(hwe, RING_TAIL(0)) & TAIL_ADDR;
- snapshot->reg.ring_ctl = hw_engine_mmio_read32(hwe, RING_CTL(0));
+ xe_hw_engine_mmio_read32(hwe, RING_TAIL(0)) & TAIL_ADDR;
+ snapshot->reg.ring_ctl = xe_hw_engine_mmio_read32(hwe, RING_CTL(0));
snapshot->reg.ring_mi_mode =
- hw_engine_mmio_read32(hwe, RING_MI_MODE(0));
- snapshot->reg.ring_mode = hw_engine_mmio_read32(hwe, RING_MODE(0));
- snapshot->reg.ring_imr = hw_engine_mmio_read32(hwe, RING_IMR(0));
- snapshot->reg.ring_esr = hw_engine_mmio_read32(hwe, RING_ESR(0));
- snapshot->reg.ring_emr = hw_engine_mmio_read32(hwe, RING_EMR(0));
- snapshot->reg.ring_eir = hw_engine_mmio_read32(hwe, RING_EIR(0));
- snapshot->reg.ipehr = hw_engine_mmio_read32(hwe, RING_IPEHR(0));
+ xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0));
+ snapshot->reg.ring_mode = xe_hw_engine_mmio_read32(hwe, RING_MODE(0));
+ snapshot->reg.ring_imr = xe_hw_engine_mmio_read32(hwe, RING_IMR(0));
+ snapshot->reg.ring_esr = xe_hw_engine_mmio_read32(hwe, RING_ESR(0));
+ snapshot->reg.ring_emr = xe_hw_engine_mmio_read32(hwe, RING_EMR(0));
+ snapshot->reg.ring_eir = xe_hw_engine_mmio_read32(hwe, RING_EIR(0));
+ snapshot->reg.ipehr = xe_hw_engine_mmio_read32(hwe, RING_IPEHR(0));
xe_hw_engine_snapshot_instdone_capture(hwe, snapshot);
if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE)
@@ -1135,3 +1168,41 @@ enum xe_force_wake_domains xe_hw_engine_to_fw_domain(struct xe_hw_engine *hwe)
{
return engine_infos[hwe->engine_id].domain;
}
+
+static const enum xe_engine_class user_to_xe_engine_class[] = {
+ [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER,
+ [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY,
+ [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE,
+ [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE,
+ [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE,
+};
+
+/**
+ * xe_hw_engine_lookup() - Lookup hardware engine for class:instance
+ * @xe: xe device
+ * @eci: engine class and instance
+ *
+ * This function will find a hardware engine for given engine
+ * class and instance.
+ *
+ * Return: If found xe_hw_engine pointer, NULL otherwise.
+ */
+struct xe_hw_engine *
+xe_hw_engine_lookup(struct xe_device *xe,
+ struct drm_xe_engine_class_instance eci)
+{
+ unsigned int idx;
+
+ if (eci.engine_class >= ARRAY_SIZE(user_to_xe_engine_class))
+ return NULL;
+
+ if (eci.gt_id >= xe->info.gt_count)
+ return NULL;
+
+ idx = array_index_nospec(eci.engine_class,
+ ARRAY_SIZE(user_to_xe_engine_class));
+
+ return xe_gt_hw_engine(xe_device_get_gt(xe, eci.gt_id),
+ user_to_xe_engine_class[idx],
+ eci.engine_instance, true);
+}
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.h b/drivers/gpu/drm/xe/xe_hw_engine.h
index 900c8c991430..022819a4a8eb 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine.h
@@ -9,6 +9,8 @@
#include "xe_hw_engine_types.h"
struct drm_printer;
+struct drm_xe_engine_class_instance;
+struct xe_device;
#ifdef CONFIG_DRM_XE_JOB_TIMEOUT_MIN
#define XE_HW_ENGINE_JOB_TIMEOUT_MIN CONFIG_DRM_XE_JOB_TIMEOUT_MIN
@@ -62,6 +64,11 @@ void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p);
void xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe);
bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe);
+
+struct xe_hw_engine *
+xe_hw_engine_lookup(struct xe_device *xe,
+ struct drm_xe_engine_class_instance eci);
+
static inline bool xe_hw_engine_is_valid(struct xe_hw_engine *hwe)
{
return hwe->name;
@@ -71,4 +78,7 @@ const char *xe_hw_engine_class_to_str(enum xe_engine_class class);
u64 xe_hw_engine_read_timestamp(struct xe_hw_engine *hwe);
enum xe_force_wake_domains xe_hw_engine_to_fw_domain(struct xe_hw_engine *hwe);
+void xe_hw_engine_mmio_write32(struct xe_hw_engine *hwe, struct xe_reg reg, u32 val);
+u32 xe_hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg);
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c
new file mode 100644
index 000000000000..82750520a90a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c
@@ -0,0 +1,372 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+
+#include "xe_assert.h"
+#include "xe_device.h"
+#include "xe_exec_queue.h"
+#include "xe_gt.h"
+#include "xe_hw_engine_group.h"
+#include "xe_vm.h"
+
+static void
+hw_engine_group_free(struct drm_device *drm, void *arg)
+{
+ struct xe_hw_engine_group *group = arg;
+
+ destroy_workqueue(group->resume_wq);
+ kfree(group);
+}
+
+static void
+hw_engine_group_resume_lr_jobs_func(struct work_struct *w)
+{
+ struct xe_exec_queue *q;
+ struct xe_hw_engine_group *group = container_of(w, struct xe_hw_engine_group, resume_work);
+ int err;
+ enum xe_hw_engine_group_execution_mode previous_mode;
+
+ err = xe_hw_engine_group_get_mode(group, EXEC_MODE_LR, &previous_mode);
+ if (err)
+ return;
+
+ if (previous_mode == EXEC_MODE_LR)
+ goto put;
+
+ list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) {
+ if (!xe_vm_in_fault_mode(q->vm))
+ continue;
+
+ q->ops->resume(q);
+ }
+
+put:
+ xe_hw_engine_group_put(group);
+}
+
+static struct xe_hw_engine_group *
+hw_engine_group_alloc(struct xe_device *xe)
+{
+ struct xe_hw_engine_group *group;
+ int err;
+
+ group = kzalloc(sizeof(*group), GFP_KERNEL);
+ if (!group)
+ return ERR_PTR(-ENOMEM);
+
+ group->resume_wq = alloc_workqueue("xe-resume-lr-jobs-wq", 0, 0);
+ if (!group->resume_wq)
+ return ERR_PTR(-ENOMEM);
+
+ init_rwsem(&group->mode_sem);
+ INIT_WORK(&group->resume_work, hw_engine_group_resume_lr_jobs_func);
+ INIT_LIST_HEAD(&group->exec_queue_list);
+
+ err = drmm_add_action_or_reset(&xe->drm, hw_engine_group_free, group);
+ if (err)
+ return ERR_PTR(err);
+
+ return group;
+}
+
+/**
+ * xe_hw_engine_setup_groups() - Setup the hw engine groups for the gt
+ * @gt: The gt for which groups are setup
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+int xe_hw_engine_setup_groups(struct xe_gt *gt)
+{
+ struct xe_hw_engine *hwe;
+ enum xe_hw_engine_id id;
+ struct xe_hw_engine_group *group_rcs_ccs, *group_bcs, *group_vcs_vecs;
+ struct xe_device *xe = gt_to_xe(gt);
+ int err;
+
+ group_rcs_ccs = hw_engine_group_alloc(xe);
+ if (IS_ERR(group_rcs_ccs)) {
+ err = PTR_ERR(group_rcs_ccs);
+ goto err_group_rcs_ccs;
+ }
+
+ group_bcs = hw_engine_group_alloc(xe);
+ if (IS_ERR(group_bcs)) {
+ err = PTR_ERR(group_bcs);
+ goto err_group_bcs;
+ }
+
+ group_vcs_vecs = hw_engine_group_alloc(xe);
+ if (IS_ERR(group_vcs_vecs)) {
+ err = PTR_ERR(group_vcs_vecs);
+ goto err_group_vcs_vecs;
+ }
+
+ for_each_hw_engine(hwe, gt, id) {
+ switch (hwe->class) {
+ case XE_ENGINE_CLASS_COPY:
+ hwe->hw_engine_group = group_bcs;
+ break;
+ case XE_ENGINE_CLASS_RENDER:
+ case XE_ENGINE_CLASS_COMPUTE:
+ hwe->hw_engine_group = group_rcs_ccs;
+ break;
+ case XE_ENGINE_CLASS_VIDEO_DECODE:
+ case XE_ENGINE_CLASS_VIDEO_ENHANCE:
+ hwe->hw_engine_group = group_vcs_vecs;
+ break;
+ case XE_ENGINE_CLASS_OTHER:
+ break;
+ default:
+ drm_warn(&xe->drm, "NOT POSSIBLE");
+ }
+ }
+
+ return 0;
+
+err_group_vcs_vecs:
+ kfree(group_vcs_vecs);
+err_group_bcs:
+ kfree(group_bcs);
+err_group_rcs_ccs:
+ kfree(group_rcs_ccs);
+
+ return err;
+}
+
+/**
+ * xe_hw_engine_group_add_exec_queue() - Add an exec queue to a hw engine group
+ * @group: The hw engine group
+ * @q: The exec_queue
+ *
+ * Return: 0 on success,
+ * -EINTR if the lock could not be acquired
+ */
+int xe_hw_engine_group_add_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q)
+{
+ int err;
+ struct xe_device *xe = gt_to_xe(q->gt);
+
+ xe_assert(xe, group);
+ xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_VM));
+ xe_assert(xe, q->vm);
+
+ if (xe_vm_in_preempt_fence_mode(q->vm))
+ return 0;
+
+ err = down_write_killable(&group->mode_sem);
+ if (err)
+ return err;
+
+ if (xe_vm_in_fault_mode(q->vm) && group->cur_mode == EXEC_MODE_DMA_FENCE) {
+ q->ops->suspend(q);
+ err = q->ops->suspend_wait(q);
+ if (err)
+ goto err_suspend;
+
+ xe_hw_engine_group_resume_faulting_lr_jobs(group);
+ }
+
+ list_add(&q->hw_engine_group_link, &group->exec_queue_list);
+ up_write(&group->mode_sem);
+
+ return 0;
+
+err_suspend:
+ up_write(&group->mode_sem);
+ return err;
+}
+
+/**
+ * xe_hw_engine_group_del_exec_queue() - Delete an exec queue from a hw engine group
+ * @group: The hw engine group
+ * @q: The exec_queue
+ */
+void xe_hw_engine_group_del_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q)
+{
+ struct xe_device *xe = gt_to_xe(q->gt);
+
+ xe_assert(xe, group);
+ xe_assert(xe, q->vm);
+
+ down_write(&group->mode_sem);
+
+ if (!list_empty(&q->hw_engine_group_link))
+ list_del(&q->hw_engine_group_link);
+
+ up_write(&group->mode_sem);
+}
+
+/**
+ * xe_hw_engine_group_resume_faulting_lr_jobs() - Asynchronously resume the hw engine group's
+ * faulting LR jobs
+ * @group: The hw engine group
+ */
+void xe_hw_engine_group_resume_faulting_lr_jobs(struct xe_hw_engine_group *group)
+{
+ queue_work(group->resume_wq, &group->resume_work);
+}
+
+/**
+ * xe_hw_engine_group_suspend_faulting_lr_jobs() - Suspend the faulting LR jobs of this group
+ * @group: The hw engine group
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group *group)
+{
+ int err;
+ struct xe_exec_queue *q;
+ bool need_resume = false;
+
+ lockdep_assert_held_write(&group->mode_sem);
+
+ list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) {
+ if (!xe_vm_in_fault_mode(q->vm))
+ continue;
+
+ need_resume = true;
+ q->ops->suspend(q);
+ }
+
+ list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) {
+ if (!xe_vm_in_fault_mode(q->vm))
+ continue;
+
+ err = q->ops->suspend_wait(q);
+ if (err)
+ goto err_suspend;
+ }
+
+ if (need_resume)
+ xe_hw_engine_group_resume_faulting_lr_jobs(group);
+
+ return 0;
+
+err_suspend:
+ up_write(&group->mode_sem);
+ return err;
+}
+
+/**
+ * xe_hw_engine_group_wait_for_dma_fence_jobs() - Wait for dma fence jobs to complete
+ * @group: The hw engine group
+ *
+ * This function is not meant to be called directly from a user IOCTL as dma_fence_wait()
+ * is not interruptible.
+ *
+ * Return: 0 on success,
+ * -ETIME if waiting for one job failed
+ */
+static int xe_hw_engine_group_wait_for_dma_fence_jobs(struct xe_hw_engine_group *group)
+{
+ long timeout;
+ struct xe_exec_queue *q;
+ struct dma_fence *fence;
+
+ lockdep_assert_held_write(&group->mode_sem);
+
+ list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) {
+ if (xe_vm_in_lr_mode(q->vm))
+ continue;
+
+ fence = xe_exec_queue_last_fence_get_for_resume(q, q->vm);
+ timeout = dma_fence_wait(fence, false);
+ dma_fence_put(fence);
+
+ if (timeout < 0)
+ return -ETIME;
+ }
+
+ return 0;
+}
+
+static int switch_mode(struct xe_hw_engine_group *group)
+{
+ int err = 0;
+ enum xe_hw_engine_group_execution_mode new_mode;
+
+ lockdep_assert_held_write(&group->mode_sem);
+
+ switch (group->cur_mode) {
+ case EXEC_MODE_LR:
+ new_mode = EXEC_MODE_DMA_FENCE;
+ err = xe_hw_engine_group_suspend_faulting_lr_jobs(group);
+ break;
+ case EXEC_MODE_DMA_FENCE:
+ new_mode = EXEC_MODE_LR;
+ err = xe_hw_engine_group_wait_for_dma_fence_jobs(group);
+ break;
+ }
+
+ if (err)
+ return err;
+
+ group->cur_mode = new_mode;
+
+ return 0;
+}
+
+/**
+ * xe_hw_engine_group_get_mode() - Get the group to execute in the new mode
+ * @group: The hw engine group
+ * @new_mode: The new execution mode
+ * @previous_mode: Pointer to the previous mode provided for use by caller
+ *
+ * Return: 0 if successful, -EINTR if locking failed.
+ */
+int xe_hw_engine_group_get_mode(struct xe_hw_engine_group *group,
+ enum xe_hw_engine_group_execution_mode new_mode,
+ enum xe_hw_engine_group_execution_mode *previous_mode)
+__acquires(&group->mode_sem)
+{
+ int err = down_read_interruptible(&group->mode_sem);
+
+ if (err)
+ return err;
+
+ *previous_mode = group->cur_mode;
+
+ if (new_mode != group->cur_mode) {
+ up_read(&group->mode_sem);
+ err = down_write_killable(&group->mode_sem);
+ if (err)
+ return err;
+
+ if (new_mode != group->cur_mode) {
+ err = switch_mode(group);
+ if (err) {
+ up_write(&group->mode_sem);
+ return err;
+ }
+ }
+ downgrade_write(&group->mode_sem);
+ }
+
+ return err;
+}
+
+/**
+ * xe_hw_engine_group_put() - Put the group
+ * @group: The hw engine group
+ */
+void xe_hw_engine_group_put(struct xe_hw_engine_group *group)
+__releases(&group->mode_sem)
+{
+ up_read(&group->mode_sem);
+}
+
+/**
+ * xe_hw_engine_group_find_exec_mode() - Find the execution mode for this exec queue
+ * @q: The exec_queue
+ */
+enum xe_hw_engine_group_execution_mode
+xe_hw_engine_group_find_exec_mode(struct xe_exec_queue *q)
+{
+ if (xe_vm_in_fault_mode(q->vm))
+ return EXEC_MODE_LR;
+ else
+ return EXEC_MODE_DMA_FENCE;
+}
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.h b/drivers/gpu/drm/xe/xe_hw_engine_group.h
new file mode 100644
index 000000000000..797ee81acbf2
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hw_engine_group.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef _XE_HW_ENGINE_GROUP_H_
+#define _XE_HW_ENGINE_GROUP_H_
+
+#include "xe_hw_engine_group_types.h"
+
+struct drm_device;
+struct xe_exec_queue;
+struct xe_gt;
+
+int xe_hw_engine_setup_groups(struct xe_gt *gt);
+
+int xe_hw_engine_group_add_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q);
+void xe_hw_engine_group_del_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q);
+
+int xe_hw_engine_group_get_mode(struct xe_hw_engine_group *group,
+ enum xe_hw_engine_group_execution_mode new_mode,
+ enum xe_hw_engine_group_execution_mode *previous_mode);
+void xe_hw_engine_group_put(struct xe_hw_engine_group *group);
+
+enum xe_hw_engine_group_execution_mode
+xe_hw_engine_group_find_exec_mode(struct xe_exec_queue *q);
+void xe_hw_engine_group_resume_faulting_lr_jobs(struct xe_hw_engine_group *group);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group_types.h b/drivers/gpu/drm/xe/xe_hw_engine_group_types.h
new file mode 100644
index 000000000000..92b6e0712c03
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hw_engine_group_types.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef _XE_HW_ENGINE_GROUP_TYPES_H_
+#define _XE_HW_ENGINE_GROUP_TYPES_H_
+
+#include "xe_force_wake_types.h"
+#include "xe_lrc_types.h"
+#include "xe_reg_sr_types.h"
+
+/**
+ * enum xe_hw_engine_group_execution_mode - possible execution modes of a hw
+ * engine group
+ *
+ * @EXEC_MODE_LR: execution in long-running mode
+ * @EXEC_MODE_DMA_FENCE: execution in dma fence mode
+ */
+enum xe_hw_engine_group_execution_mode {
+ EXEC_MODE_LR,
+ EXEC_MODE_DMA_FENCE,
+};
+
+/**
+ * struct xe_hw_engine_group - Hardware engine group
+ *
+ * hw engines belong to the same group if they share hardware resources in a way
+ * that prevents them from making progress when one is stuck on a page fault.
+ */
+struct xe_hw_engine_group {
+ /**
+ * @exec_queue_list: list of exec queues attached to this
+ * xe_hw_engine_group
+ */
+ struct list_head exec_queue_list;
+ /** @resume_work: worker to resume faulting LR exec queues */
+ struct work_struct resume_work;
+ /** @resume_wq: workqueue to resume faulting LR exec queues */
+ struct workqueue_struct *resume_wq;
+ /**
+ * @mode_sem: used to protect this group's hardware resources and ensure
+ * mutual exclusion between execution only in faulting LR mode and
+ * execution only in DMA_FENCE mode
+ */
+ struct rw_semaphore mode_sem;
+ /** @cur_mode: current execution mode of this hw engine group */
+ enum xe_hw_engine_group_execution_mode cur_mode;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h
index 70e6434f150d..39f24012d0f4 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_types.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h
@@ -150,6 +150,8 @@ struct xe_hw_engine {
struct xe_hw_engine_class_intf *eclass;
/** @oa_unit: oa unit for this hw engine */
struct xe_oa_unit *oa_unit;
+ /** @hw_engine_group: the group of hw engines this one belongs to */
+ struct xe_hw_engine_group *hw_engine_group;
};
/**
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 58121821f081..aec7db39c061 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -5,6 +5,8 @@
#include "xe_lrc.h"
+#include <generated/xe_wa_oob.h>
+
#include <linux/ascii85.h>
#include "instructions/xe_mi_commands.h"
@@ -24,6 +26,7 @@
#include "xe_memirq.h"
#include "xe_sriov.h"
#include "xe_vm.h"
+#include "xe_wa.h"
#define LRC_VALID BIT_ULL(0)
#define LRC_PRIVILEGE BIT_ULL(8)
@@ -1581,19 +1584,31 @@ void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *b
int state_table_size = 0;
/*
- * At the moment we only need to emit non-register state for the RCS
- * engine.
+ * Wa_14019789679
+ *
+ * If the driver doesn't explicitly emit the SVG instructions while
+ * setting up the default LRC, the context switch will write 0's
+ * (noops) into the LRC memory rather than the expected instruction
+ * headers. Application contexts start out as a copy of the default
+ * LRC, and if they also do not emit specific settings for some SVG
+ * state, then on context restore they'll unintentionally inherit
+ * whatever state setting the previous context had programmed into the
+ * hardware (i.e., the lack of a 3DSTATE_* instruction in the LRC will
+ * prevent the hardware from resetting that state back to any specific
+ * value).
+ *
+ * The official workaround only requires emitting 3DSTATE_MESH_CONTROL
+ * since that's a specific state setting that can easily cause GPU
+ * hangs if unintentionally inherited. However to be safe we'll
+ * continue to emit all of the SVG state since it's best not to leak
+ * any of the state between contexts, even if that leakage is harmless.
*/
- if (q->hwe->class != XE_ENGINE_CLASS_RENDER)
- return;
-
- switch (GRAPHICS_VERx100(xe)) {
- case 1255:
- case 1270 ... 2004:
+ if (XE_WA(gt, 14019789679) && q->hwe->class == XE_ENGINE_CLASS_RENDER) {
state_table = xe_hpg_svg_state;
state_table_size = ARRAY_SIZE(xe_hpg_svg_state);
- break;
- default:
+ }
+
+ if (!state_table) {
xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n",
GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100);
return;
@@ -1634,7 +1649,7 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
if (!snapshot)
return NULL;
- if (lrc->bo && lrc->bo->vm)
+ if (lrc->bo->vm)
xe_vm_get(lrc->bo->vm);
snapshot->context_desc = xe_lrc_ggtt_addr(lrc);
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 6f24aaf58252..cbf54be224c9 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -442,7 +442,7 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
m->q = xe_exec_queue_create_class(xe, primary_gt, vm,
XE_ENGINE_CLASS_COPY,
EXEC_QUEUE_FLAG_KERNEL |
- EXEC_QUEUE_FLAG_PERMANENT);
+ EXEC_QUEUE_FLAG_PERMANENT, 0);
}
if (IS_ERR(m->q)) {
xe_vm_close_and_put(vm);
@@ -1037,9 +1037,11 @@ static void emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
* @m: The migration context.
* @bo: The buffer object @dst is currently bound to.
* @dst: The dst TTM resource to be cleared.
+ * @clear_flags: flags to specify which data to clear: CCS, BO, or both.
*
- * Clear the contents of @dst to zero. On flat CCS devices,
- * the CCS metadata is cleared to zero as well on VRAM destinations.
+ * Clear the contents of @dst to zero when XE_MIGRATE_CLEAR_FLAG_BO_DATA is set.
+ * On flat CCS devices, the CCS metadata is cleared to zero with XE_MIGRATE_CLEAR_FLAG_CCS_DATA.
+ * Set XE_MIGRATE_CLEAR_FLAG_FULL to clear bo as well as CCS metadata.
* TODO: Eliminate the @bo argument.
*
* Return: Pointer to a dma_fence representing the last clear batch, or
@@ -1048,18 +1050,27 @@ static void emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
*/
struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
struct xe_bo *bo,
- struct ttm_resource *dst)
+ struct ttm_resource *dst,
+ u32 clear_flags)
{
bool clear_vram = mem_type_is_vram(dst->mem_type);
+ bool clear_bo_data = XE_MIGRATE_CLEAR_FLAG_BO_DATA & clear_flags;
+ bool clear_ccs = XE_MIGRATE_CLEAR_FLAG_CCS_DATA & clear_flags;
struct xe_gt *gt = m->tile->primary_gt;
struct xe_device *xe = gt_to_xe(gt);
- bool clear_system_ccs = (xe_bo_needs_ccs_pages(bo) && !IS_DGFX(xe)) ? true : false;
+ bool clear_only_system_ccs = false;
struct dma_fence *fence = NULL;
u64 size = bo->size;
struct xe_res_cursor src_it;
struct ttm_resource *src = dst;
int err;
+ if (WARN_ON(!clear_bo_data && !clear_ccs))
+ return NULL;
+
+ if (!clear_bo_data && clear_ccs && !IS_DGFX(xe))
+ clear_only_system_ccs = true;
+
if (!clear_vram)
xe_res_first_sg(xe_bo_sg(bo), 0, bo->size, &src_it);
else
@@ -1085,7 +1096,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
batch_size = 2 +
pte_update_size(m, pte_flags, src, &src_it,
&clear_L0, &clear_L0_ofs, &clear_L0_pt,
- clear_system_ccs ? 0 : emit_clear_cmd_len(gt), 0,
+ clear_bo_data ? emit_clear_cmd_len(gt) : 0, 0,
avail_pts);
if (xe_migrate_needs_ccs_emit(xe))
@@ -1107,13 +1118,13 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
if (clear_vram && xe_migrate_allow_identity(clear_L0, &src_it))
xe_res_next(&src_it, clear_L0);
else
- emit_pte(m, bb, clear_L0_pt, clear_vram, clear_system_ccs,
+ emit_pte(m, bb, clear_L0_pt, clear_vram, clear_only_system_ccs,
&src_it, clear_L0, dst);
bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
update_idx = bb->len;
- if (!clear_system_ccs)
+ if (clear_bo_data)
emit_clear(gt, bb, clear_L0_ofs, clear_L0, XE_PAGE_SIZE, clear_vram);
if (xe_migrate_needs_ccs_emit(xe)) {
@@ -1172,7 +1183,7 @@ err_sync:
return ERR_PTR(err);
}
- if (clear_system_ccs)
+ if (clear_ccs)
bo->ccs_cleared = true;
return fence;
diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h
index 453e0ecf5034..0109866e398a 100644
--- a/drivers/gpu/drm/xe/xe_migrate.h
+++ b/drivers/gpu/drm/xe/xe_migrate.h
@@ -6,7 +6,7 @@
#ifndef _XE_MIGRATE_
#define _XE_MIGRATE_
-#include <drm/drm_mm.h>
+#include <linux/types.h>
struct dma_fence;
struct iosys_map;
@@ -102,9 +102,14 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
struct ttm_resource *dst,
bool copy_only_ccs);
+#define XE_MIGRATE_CLEAR_FLAG_BO_DATA BIT(0)
+#define XE_MIGRATE_CLEAR_FLAG_CCS_DATA BIT(1)
+#define XE_MIGRATE_CLEAR_FLAG_FULL (XE_MIGRATE_CLEAR_FLAG_BO_DATA | \
+ XE_MIGRATE_CLEAR_FLAG_CCS_DATA)
struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
struct xe_bo *bo,
- struct ttm_resource *dst);
+ struct ttm_resource *dst,
+ u32 clear_flags);
struct xe_vm *xe_migrate_get_vm(struct xe_migrate *m);
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index f5bdb540e823..3fd462fda625 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -29,9 +29,8 @@ static void tiles_fini(void *arg)
struct xe_tile *tile;
int id;
- for_each_tile(tile, xe, id)
- if (tile != xe_device_get_root_tile(xe))
- tile->mmio.regs = NULL;
+ for_each_remote_tile(tile, xe, id)
+ tile->mmio.regs = NULL;
}
/*
diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c
index 499540add465..bfc3deebdaa2 100644
--- a/drivers/gpu/drm/xe/xe_module.c
+++ b/drivers/gpu/drm/xe/xe_module.c
@@ -8,14 +8,17 @@
#include <linux/init.h>
#include <linux/module.h>
+#include <drm/drm_module.h>
+
#include "xe_drv.h"
#include "xe_hw_fence.h"
#include "xe_pci.h"
+#include "xe_pm.h"
#include "xe_observation.h"
#include "xe_sched_job.h"
struct xe_modparam xe_modparam = {
- .enable_display = true,
+ .probe_display = true,
.guc_log_level = 5,
.force_probe = CONFIG_DRM_XE_FORCE_PROBE,
.wedged_mode = 1,
@@ -25,8 +28,8 @@ struct xe_modparam xe_modparam = {
module_param_named_unsafe(force_execlist, xe_modparam.force_execlist, bool, 0444);
MODULE_PARM_DESC(force_execlist, "Force Execlist submission");
-module_param_named(enable_display, xe_modparam.enable_display, bool, 0444);
-MODULE_PARM_DESC(enable_display, "Enable display");
+module_param_named(probe_display, xe_modparam.probe_display, bool, 0444);
+MODULE_PARM_DESC(probe_display, "Probe display HW, otherwise it's left untouched (default: true)");
module_param_named(vram_bar_size, xe_modparam.force_vram_bar_size, uint, 0600);
MODULE_PARM_DESC(vram_bar_size, "Set the vram bar size(in MiB)");
@@ -61,13 +64,28 @@ module_param_named_unsafe(wedged_mode, xe_modparam.wedged_mode, int, 0600);
MODULE_PARM_DESC(wedged_mode,
"Module's default policy for the wedged mode - 0=never, 1=upon-critical-errors[default], 2=upon-any-hang");
+static int xe_check_nomodeset(void)
+{
+ if (drm_firmware_drivers_only())
+ return -ENODEV;
+
+ return 0;
+}
+
struct init_funcs {
int (*init)(void);
void (*exit)(void);
};
+static void xe_dummy_exit(void)
+{
+}
+
static const struct init_funcs init_funcs[] = {
{
+ .init = xe_check_nomodeset,
+ },
+ {
.init = xe_hw_fence_module_init,
.exit = xe_hw_fence_module_exit,
},
@@ -83,17 +101,41 @@ static const struct init_funcs init_funcs[] = {
.init = xe_observation_sysctl_register,
.exit = xe_observation_sysctl_unregister,
},
+ {
+ .init = xe_pm_module_init,
+ .exit = xe_dummy_exit,
+ },
};
+static int __init xe_call_init_func(unsigned int i)
+{
+ if (WARN_ON(i >= ARRAY_SIZE(init_funcs)))
+ return 0;
+ if (!init_funcs[i].init)
+ return 0;
+
+ return init_funcs[i].init();
+}
+
+static void xe_call_exit_func(unsigned int i)
+{
+ if (WARN_ON(i >= ARRAY_SIZE(init_funcs)))
+ return;
+ if (!init_funcs[i].exit)
+ return;
+
+ init_funcs[i].exit();
+}
+
static int __init xe_init(void)
{
int err, i;
for (i = 0; i < ARRAY_SIZE(init_funcs); i++) {
- err = init_funcs[i].init();
+ err = xe_call_init_func(i);
if (err) {
while (i--)
- init_funcs[i].exit();
+ xe_call_exit_func(i);
return err;
}
}
@@ -106,7 +148,7 @@ static void __exit xe_exit(void)
int i;
for (i = ARRAY_SIZE(init_funcs) - 1; i >= 0; i--)
- init_funcs[i].exit();
+ xe_call_exit_func(i);
}
module_init(xe_init);
diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h
index 61a0d28a28c8..161a5e6f717f 100644
--- a/drivers/gpu/drm/xe/xe_module.h
+++ b/drivers/gpu/drm/xe/xe_module.h
@@ -11,7 +11,7 @@
/* Module modprobe variables */
struct xe_modparam {
bool force_execlist;
- bool enable_display;
+ bool probe_display;
u32 force_vram_bar_size;
int guc_log_level;
char *guc_firmware_path;
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index 3ef92eb8fbb1..4d4541e0b24c 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -1244,8 +1244,7 @@ static int xe_oa_mmap(struct file *file, struct vm_area_struct *vma)
vm_flags_mod(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY,
VM_MAYWRITE | VM_MAYEXEC);
- xe_assert(stream->oa->xe, bo->ttm.ttm->num_pages ==
- (vma->vm_end - vma->vm_start) >> PAGE_SHIFT);
+ xe_assert(stream->oa->xe, bo->ttm.ttm->num_pages == vma_pages(vma));
for (i = 0; i < bo->ttm.ttm->num_pages; i++) {
ret = remap_pfn_range(vma, start, page_to_pfn(bo->ttm.ttm->pages[i]),
PAGE_SIZE, vma->vm_page_prot);
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 3c4a3c91377a..7eb00a87b786 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -338,14 +338,12 @@ static const struct xe_device_desc mtl_desc = {
static const struct xe_device_desc lnl_desc = {
PLATFORM(LUNARLAKE),
.has_display = true,
- .require_force_probe = true,
};
static const struct xe_device_desc bmg_desc = {
DGFX_FEATURES,
PLATFORM(BATTLEMAGE),
.has_display = true,
- .require_force_probe = true,
.has_heci_cscfi = 1,
};
@@ -616,9 +614,9 @@ static int xe_info_init_early(struct xe_device *xe,
xe->info.skip_mtcfg = desc->skip_mtcfg;
xe->info.skip_pcode = desc->skip_pcode;
- xe->info.enable_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) &&
- xe_modparam.enable_display &&
- desc->has_display;
+ xe->info.probe_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) &&
+ xe_modparam.probe_display &&
+ desc->has_display;
err = xe_tile_init_early(xe_device_get_root_tile(xe), xe, 0);
if (err)
@@ -747,7 +745,7 @@ static void xe_pci_remove(struct pci_dev *pdev)
{
struct xe_device *xe;
- xe = pci_get_drvdata(pdev);
+ xe = pdev_to_xe_device(pdev);
if (!xe) /* driver load aborted, nothing to cleanup */
return;
@@ -829,7 +827,7 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
xe->info.media_name,
xe->info.media_verx100 / 100,
xe->info.media_verx100 % 100,
- str_yes_no(xe->info.enable_display),
+ str_yes_no(xe->info.probe_display),
xe->info.dma_mask_size, xe->info.tile_count,
xe->info.has_heci_gscfi, xe->info.has_heci_cscfi);
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index fcfb49af8c89..2e2accd76fb2 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -70,11 +70,34 @@
*/
#ifdef CONFIG_LOCKDEP
-static struct lockdep_map xe_pm_runtime_lockdep_map = {
- .name = "xe_pm_runtime_lockdep_map"
+static struct lockdep_map xe_pm_runtime_d3cold_map = {
+ .name = "xe_rpm_d3cold_map"
+};
+
+static struct lockdep_map xe_pm_runtime_nod3cold_map = {
+ .name = "xe_rpm_nod3cold_map"
};
#endif
+static bool __maybe_unused xe_rpm_reclaim_safe(const struct xe_device *xe)
+{
+ return !xe->d3cold.capable && !xe->info.has_sriov;
+}
+
+static void xe_rpm_lockmap_acquire(const struct xe_device *xe)
+{
+ lock_map_acquire(xe_rpm_reclaim_safe(xe) ?
+ &xe_pm_runtime_nod3cold_map :
+ &xe_pm_runtime_d3cold_map);
+}
+
+static void xe_rpm_lockmap_release(const struct xe_device *xe)
+{
+ lock_map_release(xe_rpm_reclaim_safe(xe) ?
+ &xe_pm_runtime_nod3cold_map :
+ &xe_pm_runtime_d3cold_map);
+}
+
/**
* xe_pm_suspend - Helper for System suspend, i.e. S0->S3 / S0->S2idle
* @xe: xe device instance
@@ -354,7 +377,7 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
* annotation here and in xe_pm_runtime_get() lockdep will see
* the potential lock inversion and give us a nice splat.
*/
- lock_map_acquire(&xe_pm_runtime_lockdep_map);
+ xe_rpm_lockmap_acquire(xe);
/*
* Applying lock for entire list op as xe_ttm_bo_destroy and xe_bo_move_notify
@@ -366,6 +389,8 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
xe_bo_runtime_pm_release_mmap_offset(bo);
mutex_unlock(&xe->mem_access.vram_userfault.lock);
+ xe_display_pm_runtime_suspend(xe);
+
if (xe->d3cold.allowed) {
xe_display_pm_suspend(xe, true);
@@ -387,7 +412,7 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
out:
if (err)
xe_display_pm_resume(xe, true);
- lock_map_release(&xe_pm_runtime_lockdep_map);
+ xe_rpm_lockmap_release(xe);
xe_pm_write_callback_task(xe, NULL);
return err;
}
@@ -408,7 +433,7 @@ int xe_pm_runtime_resume(struct xe_device *xe)
/* Disable access_ongoing asserts and prevent recursive pm calls */
xe_pm_write_callback_task(xe, current);
- lock_map_acquire(&xe_pm_runtime_lockdep_map);
+ xe_rpm_lockmap_acquire(xe);
if (xe->d3cold.allowed) {
err = xe_pcode_ready(xe, true);
@@ -431,14 +456,16 @@ int xe_pm_runtime_resume(struct xe_device *xe)
for_each_gt(gt, xe, id)
xe_gt_resume(gt);
+ xe_display_pm_runtime_resume(xe);
+
if (xe->d3cold.allowed) {
- xe_display_pm_resume(xe, true);
err = xe_bo_restore_user(xe);
if (err)
goto out;
}
+
out:
- lock_map_release(&xe_pm_runtime_lockdep_map);
+ xe_rpm_lockmap_release(xe);
xe_pm_write_callback_task(xe, NULL);
return err;
}
@@ -452,15 +479,37 @@ out:
* stuff that can happen inside the runtime_resume callback by acquiring
* a dummy lock (it doesn't protect anything and gets compiled out on
* non-debug builds). Lockdep then only needs to see the
- * xe_pm_runtime_lockdep_map -> runtime_resume callback once, and then can
- * hopefully validate all the (callers_locks) -> xe_pm_runtime_lockdep_map.
+ * xe_pm_runtime_xxx_map -> runtime_resume callback once, and then can
+ * hopefully validate all the (callers_locks) -> xe_pm_runtime_xxx_map.
* For example if the (callers_locks) are ever grabbed in the
* runtime_resume callback, lockdep should give us a nice splat.
*/
-static void pm_runtime_lockdep_prime(void)
+static void xe_rpm_might_enter_cb(const struct xe_device *xe)
{
- lock_map_acquire(&xe_pm_runtime_lockdep_map);
- lock_map_release(&xe_pm_runtime_lockdep_map);
+ xe_rpm_lockmap_acquire(xe);
+ xe_rpm_lockmap_release(xe);
+}
+
+/*
+ * Prime the lockdep maps for known locking orders that need to
+ * be supported but that may not always occur on all systems.
+ */
+static void xe_pm_runtime_lockdep_prime(void)
+{
+ struct dma_resv lockdep_resv;
+
+ dma_resv_init(&lockdep_resv);
+ lock_map_acquire(&xe_pm_runtime_d3cold_map);
+ /* D3Cold takes the dma_resv locks to evict bos */
+ dma_resv_lock(&lockdep_resv, NULL);
+ dma_resv_unlock(&lockdep_resv);
+ lock_map_release(&xe_pm_runtime_d3cold_map);
+
+ /* Shrinkers might like to wake up the device under reclaim. */
+ fs_reclaim_acquire(GFP_KERNEL);
+ lock_map_acquire(&xe_pm_runtime_nod3cold_map);
+ lock_map_release(&xe_pm_runtime_nod3cold_map);
+ fs_reclaim_release(GFP_KERNEL);
}
/**
@@ -475,7 +524,7 @@ void xe_pm_runtime_get(struct xe_device *xe)
if (xe_pm_read_callback_task(xe) == current)
return;
- pm_runtime_lockdep_prime();
+ xe_rpm_might_enter_cb(xe);
pm_runtime_resume(xe->drm.dev);
}
@@ -507,7 +556,7 @@ int xe_pm_runtime_get_ioctl(struct xe_device *xe)
if (WARN_ON(xe_pm_read_callback_task(xe) == current))
return -ELOOP;
- pm_runtime_lockdep_prime();
+ xe_rpm_might_enter_cb(xe);
return pm_runtime_get_sync(xe->drm.dev);
}
@@ -575,7 +624,7 @@ bool xe_pm_runtime_resume_and_get(struct xe_device *xe)
return true;
}
- pm_runtime_lockdep_prime();
+ xe_rpm_might_enter_cb(xe);
return pm_runtime_resume_and_get(xe->drm.dev) >= 0;
}
@@ -667,3 +716,14 @@ void xe_pm_d3cold_allowed_toggle(struct xe_device *xe)
drm_dbg(&xe->drm,
"d3cold: allowed=%s\n", str_yes_no(xe->d3cold.allowed));
}
+
+/**
+ * xe_pm_module_init() - Perform xe_pm specific module initialization.
+ *
+ * Return: 0 on success. Currently doesn't fail.
+ */
+int __init xe_pm_module_init(void)
+{
+ xe_pm_runtime_lockdep_prime();
+ return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h
index 104a21ae6dfd..9aef673b1c8a 100644
--- a/drivers/gpu/drm/xe/xe_pm.h
+++ b/drivers/gpu/drm/xe/xe_pm.h
@@ -32,5 +32,6 @@ void xe_pm_assert_unbounded_bridge(struct xe_device *xe);
int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold);
void xe_pm_d3cold_allowed_toggle(struct xe_device *xe);
struct task_struct *xe_pm_read_callback_task(struct xe_device *xe);
+int xe_pm_module_init(void);
#endif
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 97a6a0b0b8ba..579ed31b46db 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -1149,10 +1149,12 @@ static int xe_pt_vm_dependencies(struct xe_sched_job *job,
return err;
}
- if (job)
- err = xe_sched_job_last_fence_add_dep(job, vm);
- else
- err = xe_exec_queue_last_fence_test_dep(pt_update_ops->q, vm);
+ if (!(pt_update_ops->q->flags & EXEC_QUEUE_FLAG_KERNEL)) {
+ if (job)
+ err = xe_sched_job_last_fence_add_dep(job, vm);
+ else
+ err = xe_exec_queue_last_fence_test_dep(pt_update_ops->q, vm);
+ }
for (i = 0; job && !err && i < vops->num_syncs; i++)
err = xe_sync_entry_add_deps(&vops->syncs[i], job);
diff --git a/drivers/gpu/drm/xe/xe_res_cursor.h b/drivers/gpu/drm/xe/xe_res_cursor.h
index 655af89b31a9..dca374b6521c 100644
--- a/drivers/gpu/drm/xe/xe_res_cursor.h
+++ b/drivers/gpu/drm/xe/xe_res_cursor.h
@@ -26,7 +26,6 @@
#include <linux/scatterlist.h>
-#include <drm/drm_mm.h>
#include <drm/ttm/ttm_placement.h>
#include <drm/ttm/ttm_range_manager.h>
#include <drm/ttm/ttm_resource.h>
diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c
index f3060979e63f..fe2cb2a96f78 100644
--- a/drivers/gpu/drm/xe/xe_sa.c
+++ b/drivers/gpu/drm/xe/xe_sa.c
@@ -25,10 +25,9 @@ static void xe_sa_bo_manager_fini(struct drm_device *drm, void *arg)
drm_suballoc_manager_fini(&sa_manager->base);
- if (bo->vmap.is_iomem)
+ if (sa_manager->is_iomem)
kvfree(sa_manager->cpu_ptr);
- xe_bo_unpin_map_no_vm(bo);
sa_manager->bo = NULL;
}
@@ -47,16 +46,17 @@ struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32
sa_manager->bo = NULL;
- bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel,
- XE_BO_FLAG_VRAM_IF_DGFX(tile) |
- XE_BO_FLAG_GGTT |
- XE_BO_FLAG_GGTT_INVALIDATE);
+ bo = xe_managed_bo_create_pin_map(xe, tile, size,
+ XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_INVALIDATE);
if (IS_ERR(bo)) {
drm_err(&xe->drm, "failed to allocate bo for sa manager: %ld\n",
PTR_ERR(bo));
return (struct xe_sa_manager *)bo;
}
sa_manager->bo = bo;
+ sa_manager->is_iomem = bo->vmap.is_iomem;
drm_suballoc_manager_init(&sa_manager->base, managed_size, align);
sa_manager->gpu_addr = xe_bo_ggtt_addr(bo);
@@ -64,7 +64,6 @@ struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32
if (bo->vmap.is_iomem) {
sa_manager->cpu_ptr = kvzalloc(managed_size, GFP_KERNEL);
if (!sa_manager->cpu_ptr) {
- xe_bo_unpin_map_no_vm(sa_manager->bo);
sa_manager->bo = NULL;
return ERR_PTR(-ENOMEM);
}
diff --git a/drivers/gpu/drm/xe/xe_sa_types.h b/drivers/gpu/drm/xe/xe_sa_types.h
index 2ef896aeca1d..2b070ff1292e 100644
--- a/drivers/gpu/drm/xe/xe_sa_types.h
+++ b/drivers/gpu/drm/xe/xe_sa_types.h
@@ -14,6 +14,7 @@ struct xe_sa_manager {
struct xe_bo *bo;
u64 gpu_addr;
void *cpu_ptr;
+ bool is_iomem;
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
index 9628f9deb3c0..55d47450b2c6 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.c
+++ b/drivers/gpu/drm/xe/xe_sched_job.c
@@ -89,8 +89,7 @@ static void xe_sched_job_free_fences(struct xe_sched_job *job)
if (ptrs->lrc_fence)
xe_lrc_free_seqno_fence(ptrs->lrc_fence);
- if (ptrs->chain_fence)
- dma_fence_chain_free(ptrs->chain_fence);
+ dma_fence_chain_free(ptrs->chain_fence);
}
}
diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c
index 3aa6270e5dd7..436faff09bac 100644
--- a/drivers/gpu/drm/xe/xe_sync.c
+++ b/drivers/gpu/drm/xe/xe_sync.c
@@ -55,7 +55,7 @@ static struct xe_user_fence *user_fence_create(struct xe_device *xe, u64 addr,
struct xe_user_fence *ufence;
u64 __user *ptr = u64_to_user_ptr(addr);
- if (!access_ok(ptr, sizeof(ptr)))
+ if (!access_ok(ptr, sizeof(*ptr)))
return ERR_PTR(-EFAULT);
ufence = kmalloc(sizeof(*ufence), GFP_KERNEL);
@@ -206,16 +206,9 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
int xe_sync_entry_add_deps(struct xe_sync_entry *sync, struct xe_sched_job *job)
{
- int err;
-
- if (sync->fence) {
- err = drm_sched_job_add_dependency(&job->drm,
- dma_fence_get(sync->fence));
- if (err) {
- dma_fence_put(sync->fence);
- return err;
- }
- }
+ if (sync->fence)
+ return drm_sched_job_add_dependency(&job->drm,
+ dma_fence_get(sync->fence));
return 0;
}
@@ -256,10 +249,8 @@ void xe_sync_entry_cleanup(struct xe_sync_entry *sync)
{
if (sync->syncobj)
drm_syncobj_put(sync->syncobj);
- if (sync->fence)
- dma_fence_put(sync->fence);
- if (sync->chain_fence)
- dma_fence_chain_free(sync->chain_fence);
+ dma_fence_put(sync->fence);
+ dma_fence_chain_free(sync->chain_fence);
if (sync->ufence)
user_fence_put(sync->ufence);
}
diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
index f46fd2df84de..f7113cf6109d 100644
--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
@@ -5,7 +5,6 @@
*/
#include <drm/drm_managed.h>
-#include <drm/drm_mm.h>
#include <drm/ttm/ttm_device.h>
#include <drm/ttm/ttm_placement.h>
diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c
index 77d4eec0118d..faa1bf42e50e 100644
--- a/drivers/gpu/drm/xe/xe_tuning.c
+++ b/drivers/gpu/drm/xe/xe_tuning.c
@@ -39,12 +39,23 @@ static const struct xe_rtp_entry_sr gt_tunings[] = {
},
{ XE_RTP_NAME("Tuning: Compression Overfetch"),
XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)),
- XE_RTP_ACTIONS(CLR(CCCHKNREG1, ENCOMPPERFFIX)),
+ XE_RTP_ACTIONS(CLR(CCCHKNREG1, ENCOMPPERFFIX),
+ SET(CCCHKNREG1, L3CMPCTRL))
},
{ XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3"),
XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)),
XE_RTP_ACTIONS(SET(L3SQCREG3, COMPPWOVERFETCHEN))
},
+ { XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)),
+ XE_RTP_ACTIONS(SET(L3SQCREG2,
+ COMPMEMRD256BOVRFETCHEN))
+ },
+ { XE_RTP_NAME("Tuning: Stateless compression control"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)),
+ XE_RTP_ACTIONS(FIELD_SET(STATELESS_COMPRESSION_CTRL, UNIFIED_COMPRESSION_FORMAT,
+ REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0)))
+ },
{}
};
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index 5b70d23724c4..4bb2a4a80ddc 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -15,6 +15,7 @@
#include "xe_gsc.h"
#include "xe_gt.h"
#include "xe_gt_printk.h"
+#include "xe_guc.h"
#include "xe_map.h"
#include "xe_mmio.h"
#include "xe_module.h"
@@ -105,15 +106,16 @@ struct fw_blobs_by_type {
};
#define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \
- fw_def(LUNARLAKE, major_ver(xe, guc, lnl, 70, 19, 2)) \
- fw_def(METEORLAKE, major_ver(i915, guc, mtl, 70, 19, 2)) \
- fw_def(DG2, major_ver(i915, guc, dg2, 70, 19, 2)) \
- fw_def(DG1, major_ver(i915, guc, dg1, 70, 19, 2)) \
- fw_def(ALDERLAKE_N, major_ver(i915, guc, tgl, 70, 19, 2)) \
- fw_def(ALDERLAKE_P, major_ver(i915, guc, adlp, 70, 19, 2)) \
- fw_def(ALDERLAKE_S, major_ver(i915, guc, tgl, 70, 19, 2)) \
- fw_def(ROCKETLAKE, major_ver(i915, guc, tgl, 70, 19, 2)) \
- fw_def(TIGERLAKE, major_ver(i915, guc, tgl, 70, 19, 2))
+ fw_def(BATTLEMAGE, major_ver(xe, guc, bmg, 70, 29, 2)) \
+ fw_def(LUNARLAKE, major_ver(xe, guc, lnl, 70, 29, 2)) \
+ fw_def(METEORLAKE, major_ver(i915, guc, mtl, 70, 29, 2)) \
+ fw_def(DG2, major_ver(i915, guc, dg2, 70, 29, 2)) \
+ fw_def(DG1, major_ver(i915, guc, dg1, 70, 29, 2)) \
+ fw_def(ALDERLAKE_N, major_ver(i915, guc, tgl, 70, 29, 2)) \
+ fw_def(ALDERLAKE_P, major_ver(i915, guc, adlp, 70, 29, 2)) \
+ fw_def(ALDERLAKE_S, major_ver(i915, guc, tgl, 70, 29, 2)) \
+ fw_def(ROCKETLAKE, major_ver(i915, guc, tgl, 70, 29, 2)) \
+ fw_def(TIGERLAKE, major_ver(i915, guc, tgl, 70, 29, 2))
#define XE_HUC_FIRMWARE_DEFS(fw_def, mmp_ver, no_ver) \
fw_def(BATTLEMAGE, no_ver(xe, huc, bmg)) \
@@ -309,10 +311,10 @@ static int guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css)
xe_gt_assert(gt, uc_fw->type == XE_UC_FW_TYPE_GUC);
- /* We don't support GuC releases older than 70.19 */
- if (release->major < 70 || (release->major == 70 && release->minor < 19)) {
- xe_gt_err(gt, "Unsupported GuC v%u.%u! v70.19 or newer is required\n",
- release->major, release->minor);
+ /* We don't support GuC releases older than 70.29.2 */
+ if (MAKE_GUC_VER_STRUCT(*release) < MAKE_GUC_VER(70, 29, 2)) {
+ xe_gt_err(gt, "Unsupported GuC v%u.%u.%u! v70.29.2 or newer is required\n",
+ release->major, release->minor, release->patch);
return -EINVAL;
}
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index dab2a3b2e17f..4cc13eddb6b3 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -275,6 +275,8 @@ out_up_write:
* xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM
* @vm: The VM.
* @q: The exec_queue
+ *
+ * Note that this function might be called multiple times on the same queue.
*/
void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
{
@@ -282,8 +284,10 @@ void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
return;
down_write(&vm->lock);
- list_del(&q->lr.link);
- --vm->preempt.num_exec_queues;
+ if (!list_empty(&q->lr.link)) {
+ list_del_init(&q->lr.link);
+ --vm->preempt.num_exec_queues;
+ }
if (q->lr.pfence) {
dma_fence_enable_sw_signaling(q->lr.pfence);
dma_fence_put(q->lr.pfence);
@@ -1191,7 +1195,7 @@ static const struct drm_gpuvm_ops gpuvm_ops = {
.vm_free = xe_vm_free,
};
-static u64 pde_encode_pat_index(struct xe_device *xe, u16 pat_index)
+static u64 pde_encode_pat_index(u16 pat_index)
{
u64 pte = 0;
@@ -1204,8 +1208,7 @@ static u64 pde_encode_pat_index(struct xe_device *xe, u16 pat_index)
return pte;
}
-static u64 pte_encode_pat_index(struct xe_device *xe, u16 pat_index,
- u32 pt_level)
+static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level)
{
u64 pte = 0;
@@ -1246,12 +1249,11 @@ static u64 pte_encode_ps(u32 pt_level)
static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset,
const u16 pat_index)
{
- struct xe_device *xe = xe_bo_device(bo);
u64 pde;
pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
- pde |= pde_encode_pat_index(xe, pat_index);
+ pde |= pde_encode_pat_index(pat_index);
return pde;
}
@@ -1259,12 +1261,11 @@ static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset,
static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
u16 pat_index, u32 pt_level)
{
- struct xe_device *xe = xe_bo_device(bo);
u64 pte;
pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
- pte |= pte_encode_pat_index(xe, pat_index, pt_level);
+ pte |= pte_encode_pat_index(pat_index, pt_level);
pte |= pte_encode_ps(pt_level);
if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
@@ -1276,14 +1277,12 @@ static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
u16 pat_index, u32 pt_level)
{
- struct xe_device *xe = xe_vma_vm(vma)->xe;
-
pte |= XE_PAGE_PRESENT;
if (likely(!xe_vma_read_only(vma)))
pte |= XE_PAGE_RW;
- pte |= pte_encode_pat_index(xe, pat_index, pt_level);
+ pte |= pte_encode_pat_index(pat_index, pt_level);
pte |= pte_encode_ps(pt_level);
if (unlikely(xe_vma_is_null(vma)))
@@ -1303,7 +1302,7 @@ static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr,
pte = addr;
pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
- pte |= pte_encode_pat_index(xe, pat_index, pt_level);
+ pte |= pte_encode_pat_index(pat_index, pt_level);
pte |= pte_encode_ps(pt_level);
if (devmem)
@@ -1483,19 +1482,13 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
/* Kernel migration VM shouldn't have a circular loop.. */
if (!(flags & XE_VM_FLAG_MIGRATION)) {
for_each_tile(tile, xe, id) {
- struct xe_gt *gt = tile->primary_gt;
- struct xe_vm *migrate_vm;
struct xe_exec_queue *q;
u32 create_flags = EXEC_QUEUE_FLAG_VM;
if (!vm->pt_root[id])
continue;
- migrate_vm = xe_migrate_get_vm(tile->migrate);
- q = xe_exec_queue_create_class(xe, gt, migrate_vm,
- XE_ENGINE_CLASS_COPY,
- create_flags);
- xe_vm_put(migrate_vm);
+ q = xe_exec_queue_create_bind(xe, tile, create_flags, 0);
if (IS_ERR(q)) {
err = PTR_ERR(q);
goto err_close;
@@ -1508,13 +1501,6 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
if (number_tiles > 1)
vm->composite_fence_ctx = dma_fence_context_alloc(1);
- mutex_lock(&xe->usm.lock);
- if (flags & XE_VM_FLAG_FAULT_MODE)
- xe->usm.num_vm_in_fault_mode++;
- else if (!(flags & XE_VM_FLAG_MIGRATION))
- xe->usm.num_vm_in_non_fault_mode++;
- mutex_unlock(&xe->usm.lock);
-
trace_xe_vm_create(vm);
return vm;
@@ -1628,11 +1614,6 @@ void xe_vm_close_and_put(struct xe_vm *vm)
up_write(&vm->lock);
mutex_lock(&xe->usm.lock);
- if (vm->flags & XE_VM_FLAG_FAULT_MODE)
- xe->usm.num_vm_in_fault_mode--;
- else if (!(vm->flags & XE_VM_FLAG_MIGRATION))
- xe->usm.num_vm_in_non_fault_mode--;
-
if (vm->usm.asid) {
void *lookup;
@@ -1770,14 +1751,6 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
return -EINVAL;
- if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
- xe_device_in_non_fault_mode(xe)))
- return -EINVAL;
-
- if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) &&
- xe_device_in_fault_mode(xe)))
- return -EINVAL;
-
if (XE_IOCTL_DBG(xe, args->extensions))
return -EINVAL;
@@ -3185,9 +3158,10 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
{
struct xe_device *xe = xe_vma_vm(vma)->xe;
struct xe_tile *tile;
- struct xe_gt_tlb_invalidation_fence fence[XE_MAX_TILES_PER_DEVICE];
- u32 tile_needs_invalidate = 0;
+ struct xe_gt_tlb_invalidation_fence
+ fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE];
u8 id;
+ u32 fence_id = 0;
int ret = 0;
xe_assert(xe, !xe_vma_is_null(vma));
@@ -3215,27 +3189,37 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
if (xe_pt_zap_ptes(tile, vma)) {
xe_device_wmb(xe);
xe_gt_tlb_invalidation_fence_init(tile->primary_gt,
- &fence[id], true);
+ &fence[fence_id],
+ true);
- /*
- * FIXME: We potentially need to invalidate multiple
- * GTs within the tile
- */
ret = xe_gt_tlb_invalidation_vma(tile->primary_gt,
- &fence[id], vma);
+ &fence[fence_id], vma);
if (ret < 0) {
- xe_gt_tlb_invalidation_fence_fini(&fence[id]);
+ xe_gt_tlb_invalidation_fence_fini(&fence[fence_id]);
goto wait;
}
+ ++fence_id;
+
+ if (!tile->media_gt)
+ continue;
- tile_needs_invalidate |= BIT(id);
+ xe_gt_tlb_invalidation_fence_init(tile->media_gt,
+ &fence[fence_id],
+ true);
+
+ ret = xe_gt_tlb_invalidation_vma(tile->media_gt,
+ &fence[fence_id], vma);
+ if (ret < 0) {
+ xe_gt_tlb_invalidation_fence_fini(&fence[fence_id]);
+ goto wait;
+ }
+ ++fence_id;
}
}
wait:
- for_each_tile(tile, xe, id)
- if (tile_needs_invalidate & BIT(id))
- xe_gt_tlb_invalidation_fence_wait(&fence[id]);
+ for (id = 0; id < fence_id; ++id)
+ xe_gt_tlb_invalidation_fence_wait(&fence[id]);
vma->tile_invalidated = vma->tile_mask;
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 5b9e5a1857ea..28b7f95b6c2f 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -557,16 +557,6 @@ static const struct xe_rtp_entry_sr engine_was[] = {
XE_RTP_ACTION_FLAG(ENGINE_BASE)))
},
- /* Xe2_LPM */
-
- { XE_RTP_NAME("16021639441"),
- XE_RTP_RULES(MEDIA_VERSION(2000)),
- XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0),
- GHWSP_CSB_REPORT_DIS |
- PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS,
- XE_RTP_ACTION_FLAG(ENGINE_BASE)))
- },
-
/* Xe2_HPM */
{ XE_RTP_NAME("16021639441"),
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index 540d38603f32..920ca5060146 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -27,7 +27,13 @@
16022287689 GRAPHICS_VERSION(2001)
GRAPHICS_VERSION(2004)
13011645652 GRAPHICS_VERSION(2004)
+14022293748 GRAPHICS_VERSION(2001)
+ GRAPHICS_VERSION(2004)
+22019794406 GRAPHICS_VERSION(2001)
+ GRAPHICS_VERSION(2004)
22019338487 MEDIA_VERSION(2000)
GRAPHICS_VERSION(2001)
22019338487_display PLATFORM(LUNARLAKE)
16023588340 GRAPHICS_VERSION(2001)
+14019789679 GRAPHICS_VERSION(1255)
+ GRAPHICS_VERSION_RANGE(1270, 2004)