summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2024-08-05 10:47:30 +0200
committerDave Airlie <airlied@redhat.com>2024-08-05 10:47:43 +0200
commita4172af3040cdc207f1b60efffcdd219156093c9 (patch)
treee1b6db607bbbcc487a1314f1ab1fd8024e78f32b /drivers
parentLinux 6.11-rc2 (diff)
parentdrm/xe/oa/uapi: Make bit masks unsigned (diff)
downloadlinux-a4172af3040cdc207f1b60efffcdd219156093c9.tar.xz
linux-a4172af3040cdc207f1b60efffcdd219156093c9.zip
Merge tag 'drm-xe-next-2024-07-30' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-next
drm-xe-next for 6.12 UAPI Changes: - Rename xe perf layer as xe observation layer, but was also made available via fixes to previous verison (Ashutosh) - Use write-back caching mode for system memory on DGFX, but was also mad available via fixes to previous version (Thomas) - Expose SIMD16 EU mask in topology query for userspace to know the type of EU, as available in PVC, Lunar Lake and Battlemage (Lucas) - Return ENOBUFS instead of ENOMEM in vm_bind if failure is tied to an array of binds (Matthew Brost) Driver Changes: - Log cleanup moving messages to debug priority (Michal Wajdeczko) - Add timeout to fences to adhere to dma_buf rules (Matthew Brost) - Rename old engine nomenclature to exec_queue (Matthew Brost) - Convert multiple bind ops to 1 job (Matthew Brost) - Add error injection for vm bind to help testing error path (Matthew Brost) - Fix error handling in page table to propagate correctly to userspace (Matthew Brost) - Re-organize and cleanup SR-IOV related registers (Michal Wajdeczko) - Make the device write barrier compatible with VF (Michal Wajdeczko) - New display workarounds for Battlemage (Matthew Auld) - New media workarounds for Lunar Lake and Battlemage (Ngai-Mint Kwan) - New graphics workarounds for Lunar Lake (Bommu Krishnaiah) - Tracepoint updates (Matthew Brost, Nirmoy Das) - Cleanup the header generation for OOB workarounds (Lucas De Marchi) - Fix leaking HDCP-related object (Nirmoy Das) - Serialize L2 flushes to avoid races (Tejas Upadhyay) - Log pid and comm on job timeout (José Roberto de Souza) - Simplify boilerplate code for live kunit (Michal Wajdeczko) - Improve kunit skips for live kunit (Michal Wajdeczko) - Fix xe_sync cleanup when handling xe_exec ioctl (Ashutosh Dixit) - Limit fair VF LMEM provisioning (Michal Wajdeczko) - New workaround to fence mmio writes in Lunar Lake (Tejas Upadhyay) - Warn on writes inaccessible register in VF (Michal Wajdeczko) - Fix register lookup in VF (Michal Wajdeczko) - Add GSC support for Battlemage (Alexander Usyskin) - Fix wedging only the GT in which timeout occurred (Matthew Brost) - Block device suspend when wedging (Matthew Brost) - Handle compression and migration changes for Battlemage (Akshata Jahagirdar) - Limit access of stolen memory for Lunar Lake (Uma Shankar) - Fail invalid addresses during user fence creation (Matthew Brost) - Refcount xe_file to safely and accurately store fdinfo stats (Umesh Nerlige Ramappa) - Cleanup and fix PM reference for TLB invalidation code (Matthew Brost) - Fix PM reference handling when communicating with GuC (Matthew Brost) - Add new BO flag for 2 MiB alignement and use in VF (Michal Wajdeczko) - Simplify MMIO setup for multi-tile platforms (Lucas De Marchi) - Add check for uninitialized access to OOB workarounds (Lucas De Marchi) - New GSC and HuC firmware blobs for Lunar Lake and Battlemage (Daniele Ceraolo Spurio) - Unify mmio wait logic (Gustavo Sousa) - Fix off-by-one when processing RTP rules (Lucas De Marchi) - Future-proof migrate logic with compressed PAT flag (Matt Roper) - Add WA kunit tests for Battlemage (Lucas De Marchi) - Test active tracking for workaorunds with kunit (Lucas De Marchi) - Add kunit tests for RTP with no actions (Lucas De Marchi) - Unify parse of OR rules in RTP (Lucas De Marchi) - Add performance tuning for Battlemage (Sai Teja Pottumuttu) - Make bit masks unsigned (Geert Uytterhoeven) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Lucas De Marchi <lucas.demarchi@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/k7xuktfav4zmtxxjr77glu2hszypvzgmzghoumh757nqfnk7kn@ccfi4ts3ytbk
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_wa.h8
-rw-r--r--drivers/gpu/drm/i915/display/intel_fbc.c6
-rw-r--r--drivers/gpu/drm/xe/Makefile23
-rw-r--r--drivers/gpu/drm/xe/display/intel_fbdev_fb.c6
-rw-r--r--drivers/gpu/drm/xe/display/xe_display_wa.c16
-rw-r--r--drivers/gpu/drm/xe/display/xe_dsb_buffer.c8
-rw-r--r--drivers/gpu/drm/xe/display/xe_fb_pin.c3
-rw-r--r--drivers/gpu/drm/xe/display/xe_plane_initial.c6
-rw-r--r--drivers/gpu/drm/xe/regs/xe_gt_regs.h15
-rw-r--r--drivers/gpu/drm/xe/regs/xe_regs.h12
-rw-r--r--drivers/gpu/drm/xe/regs/xe_sriov_regs.h23
-rw-r--r--drivers/gpu/drm/xe/tests/Makefile6
-rw-r--r--drivers/gpu/drm/xe/tests/xe_bo.c45
-rw-r--r--drivers/gpu/drm/xe/tests/xe_bo_test.c21
-rw-r--r--drivers/gpu/drm/xe/tests/xe_bo_test.h14
-rw-r--r--drivers/gpu/drm/xe/tests/xe_dma_buf.c26
-rw-r--r--drivers/gpu/drm/xe/tests/xe_dma_buf_test.c20
-rw-r--r--drivers/gpu/drm/xe/tests/xe_dma_buf_test.h13
-rw-r--r--drivers/gpu/drm/xe/tests/xe_kunit_helpers.c39
-rw-r--r--drivers/gpu/drm/xe/tests/xe_kunit_helpers.h2
-rw-r--r--drivers/gpu/drm/xe/tests/xe_live_test_mod.c11
-rw-r--r--drivers/gpu/drm/xe/tests/xe_migrate.c424
-rw-r--r--drivers/gpu/drm/xe/tests/xe_migrate_test.c20
-rw-r--r--drivers/gpu/drm/xe/tests/xe_migrate_test.h13
-rw-r--r--drivers/gpu/drm/xe/tests/xe_mocs.c44
-rw-r--r--drivers/gpu/drm/xe/tests/xe_mocs_test.c21
-rw-r--r--drivers/gpu/drm/xe/tests/xe_mocs_test.h14
-rw-r--r--drivers/gpu/drm/xe/tests/xe_pci.c30
-rw-r--r--drivers/gpu/drm/xe/tests/xe_pci_test.c4
-rw-r--r--drivers/gpu/drm/xe/tests/xe_pci_test.h2
-rw-r--r--drivers/gpu/drm/xe/tests/xe_rtp_test.c219
-rw-r--r--drivers/gpu/drm/xe/tests/xe_test.h10
-rw-r--r--drivers/gpu/drm/xe/tests/xe_wa_test.c1
-rw-r--r--drivers/gpu/drm/xe/xe_bo.c11
-rw-r--r--drivers/gpu/drm/xe/xe_bo.h5
-rw-r--r--drivers/gpu/drm/xe/xe_bo_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_devcoredump.c10
-rw-r--r--drivers/gpu/drm/xe/xe_device.c111
-rw-r--r--drivers/gpu/drm/xe/xe_device.h9
-rw-r--r--drivers/gpu/drm/xe/xe_device_types.h30
-rw-r--r--drivers/gpu/drm/xe/xe_drm_client.c5
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue.c33
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue.h2
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue_types.h13
-rw-r--r--drivers/gpu/drm/xe/xe_execlist.c3
-rw-r--r--drivers/gpu/drm/xe/xe_gen_wa_oob.c16
-rw-r--r--drivers/gpu/drm/xe/xe_gt.c54
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf.c2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c1
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_vf.c28
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_vf.h1
-rw-r--r--drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c205
-rw-r--r--drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h12
-rw-r--r--drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h4
-rw-r--r--drivers/gpu/drm/xe/xe_gt_topology.c27
-rw-r--r--drivers/gpu/drm/xe/xe_gt_types.h25
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct.c11
-rw-r--r--drivers/gpu/drm/xe/xe_guc_id_mgr.c4
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.c56
-rw-r--r--drivers/gpu/drm/xe/xe_heci_gsc.c28
-rw-r--r--drivers/gpu/drm/xe/xe_heci_gsc.h10
-rw-r--r--drivers/gpu/drm/xe/xe_irq.c2
-rw-r--r--drivers/gpu/drm/xe/xe_lmtt.c4
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.c528
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.h34
-rw-r--r--drivers/gpu/drm/xe/xe_mmio.c231
-rw-r--r--drivers/gpu/drm/xe/xe_mmio.h1
-rw-r--r--drivers/gpu/drm/xe/xe_oa.c2
-rw-r--r--drivers/gpu/drm/xe/xe_pat.c11
-rw-r--r--drivers/gpu/drm/xe/xe_pci.c7
-rw-r--r--drivers/gpu/drm/xe/xe_pm.c8
-rw-r--r--drivers/gpu/drm/xe/xe_preempt_fence.c12
-rw-r--r--drivers/gpu/drm/xe/xe_pt.c1310
-rw-r--r--drivers/gpu/drm/xe/xe_pt.h14
-rw-r--r--drivers/gpu/drm/xe/xe_pt_types.h48
-rw-r--r--drivers/gpu/drm/xe/xe_query.c4
-rw-r--r--drivers/gpu/drm/xe/xe_rtp.c42
-rw-r--r--drivers/gpu/drm/xe/xe_rtp.h4
-rw-r--r--drivers/gpu/drm/xe/xe_rtp_helpers.h6
-rw-r--r--drivers/gpu/drm/xe/xe_sa.c7
-rw-r--r--drivers/gpu/drm/xe/xe_sriov.c2
-rw-r--r--drivers/gpu/drm/xe/xe_sync.c20
-rw-r--r--drivers/gpu/drm/xe/xe_sync.h1
-rw-r--r--drivers/gpu/drm/xe/xe_trace.h52
-rw-r--r--drivers/gpu/drm/xe/xe_trace_bo.h10
-rw-r--r--drivers/gpu/drm/xe/xe_tuning.c8
-rw-r--r--drivers/gpu/drm/xe/xe_uc_fw.c3
-rw-r--r--drivers/gpu/drm/xe/xe_vm.c699
-rw-r--r--drivers/gpu/drm/xe/xe_vm.h2
-rw-r--r--drivers/gpu/drm/xe/xe_vm_types.h55
-rw-r--r--drivers/gpu/drm/xe/xe_wa.c15
-rw-r--r--drivers/gpu/drm/xe/xe_wa.h7
-rw-r--r--drivers/gpu/drm/xe/xe_wa_oob.rules2
93 files changed, 3260 insertions, 1734 deletions
diff --git a/drivers/gpu/drm/i915/display/intel_display_wa.h b/drivers/gpu/drm/i915/display/intel_display_wa.h
index 63201d09852c..be644ab6ae00 100644
--- a/drivers/gpu/drm/i915/display/intel_display_wa.h
+++ b/drivers/gpu/drm/i915/display/intel_display_wa.h
@@ -6,8 +6,16 @@
#ifndef __INTEL_DISPLAY_WA_H__
#define __INTEL_DISPLAY_WA_H__
+#include <linux/types.h>
+
struct drm_i915_private;
void intel_display_wa_apply(struct drm_i915_private *i915);
+#ifdef I915
+static inline bool intel_display_needs_wa_16023588340(struct drm_i915_private *i915) { return false; }
+#else
+bool intel_display_needs_wa_16023588340(struct drm_i915_private *i915);
+#endif
+
#endif
diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c
index 67116c9f1464..8488f82143a4 100644
--- a/drivers/gpu/drm/i915/display/intel_fbc.c
+++ b/drivers/gpu/drm/i915/display/intel_fbc.c
@@ -56,6 +56,7 @@
#include "intel_display_device.h"
#include "intel_display_trace.h"
#include "intel_display_types.h"
+#include "intel_display_wa.h"
#include "intel_fbc.h"
#include "intel_fbc_regs.h"
#include "intel_frontbuffer.h"
@@ -1237,6 +1238,11 @@ static int intel_fbc_check_plane(struct intel_atomic_state *state,
return 0;
}
+ if (intel_display_needs_wa_16023588340(i915)) {
+ plane_state->no_fbc_reason = "Wa_16023588340";
+ return 0;
+ }
+
/* WaFbcTurnOffFbcWhenHyperVisorIsUsed:skl,bxt */
if (i915_vtd_active(i915) && (IS_SKYLAKE(i915) || IS_BROXTON(i915))) {
plane_state->no_fbc_reason = "VT-d enabled";
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 628c245c4822..1ff9602a52f6 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -12,32 +12,15 @@ subdir-ccflags-$(CONFIG_DRM_XE_WERROR) += -Werror
subdir-ccflags-y += -I$(obj) -I$(src)
# generated sources
-hostprogs := xe_gen_wa_oob
+hostprogs := xe_gen_wa_oob
generated_oob := $(obj)/generated/xe_wa_oob.c $(obj)/generated/xe_wa_oob.h
-
quiet_cmd_wa_oob = GEN $(notdir $(generated_oob))
cmd_wa_oob = mkdir -p $(@D); $^ $(generated_oob)
-
$(obj)/generated/%_wa_oob.c $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \
$(src)/xe_wa_oob.rules
$(call cmd,wa_oob)
-uses_generated_oob := \
- $(obj)/xe_ggtt.o \
- $(obj)/xe_gsc.o \
- $(obj)/xe_gt.o \
- $(obj)/xe_guc.o \
- $(obj)/xe_guc_ads.o \
- $(obj)/xe_guc_pc.o \
- $(obj)/xe_migrate.o \
- $(obj)/xe_ring_ops.o \
- $(obj)/xe_vm.o \
- $(obj)/xe_wa.o \
- $(obj)/xe_ttm_stolen_mgr.o
-
-$(uses_generated_oob): $(generated_oob)
-
# Please keep these build lists sorted!
# core driver code
@@ -192,6 +175,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \
display/xe_display.o \
display/xe_display_misc.o \
display/xe_display_rps.o \
+ display/xe_display_wa.o \
display/xe_dsb_buffer.o \
display/xe_fb_pin.o \
display/xe_hdcp_gsc.o \
@@ -320,3 +304,6 @@ quiet_cmd_hdrtest = HDRTEST $(patsubst %.hdrtest,%.h,$@)
$(obj)/%.hdrtest: $(src)/%.h FORCE
$(call if_changed_dep,hdrtest)
+
+uses_generated_oob := $(addprefix $(obj)/, $(xe-y))
+$(uses_generated_oob): $(obj)/generated/xe_wa_oob.h
diff --git a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c
index 816ad13821a8..cd8948c08661 100644
--- a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c
+++ b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c
@@ -10,6 +10,9 @@
#include "xe_bo.h"
#include "xe_gt.h"
#include "xe_ttm_stolen_mgr.h"
+#include "xe_wa.h"
+
+#include <generated/xe_wa_oob.h>
struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper,
struct drm_fb_helper_surface_size *sizes)
@@ -37,7 +40,7 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper,
size = PAGE_ALIGN(size);
obj = ERR_PTR(-ENODEV);
- if (!IS_DGFX(xe)) {
+ if (!IS_DGFX(xe) && !XE_WA(xe_root_mmio_gt(xe), 22019338487_display)) {
obj = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe),
NULL, size,
ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT |
@@ -48,6 +51,7 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper,
else
drm_info(&xe->drm, "Allocated fbdev into stolen failed: %li\n", PTR_ERR(obj));
}
+
if (IS_ERR(obj)) {
obj = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe), NULL, size,
ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT |
diff --git a/drivers/gpu/drm/xe/display/xe_display_wa.c b/drivers/gpu/drm/xe/display/xe_display_wa.c
new file mode 100644
index 000000000000..68e3d1959ad6
--- /dev/null
+++ b/drivers/gpu/drm/xe/display/xe_display_wa.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#include "intel_display_wa.h"
+
+#include "xe_device.h"
+#include "xe_wa.h"
+
+#include <generated/xe_wa_oob.h>
+
+bool intel_display_needs_wa_16023588340(struct drm_i915_private *i915)
+{
+ return XE_WA(xe_root_mmio_gt(i915), 16023588340);
+}
diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
index 9e860c61f4b3..ccd0d87d438a 100644
--- a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
+++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
@@ -7,6 +7,8 @@
#include "intel_display_types.h"
#include "intel_dsb_buffer.h"
#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_device_types.h"
#include "xe_gt.h"
u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf)
@@ -16,7 +18,10 @@ u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf)
void intel_dsb_buffer_write(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val)
{
+ struct xe_device *xe = dsb_buf->vma->bo->tile->xe;
+
iosys_map_wr(&dsb_buf->vma->bo->vmap, idx * 4, u32, val);
+ xe_device_l2_flush(xe);
}
u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx)
@@ -26,9 +31,12 @@ u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx)
void intel_dsb_buffer_memset(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val, size_t size)
{
+ struct xe_device *xe = dsb_buf->vma->bo->tile->xe;
+
WARN_ON(idx > (dsb_buf->buf_size - size) / sizeof(*dsb_buf->cmd_buf));
iosys_map_memset(&dsb_buf->vma->bo->vmap, idx * 4, val, size);
+ xe_device_l2_flush(xe);
}
bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *dsb_buf, size_t size)
diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c
index 423f367c7065..d7db44e79eaf 100644
--- a/drivers/gpu/drm/xe/display/xe_fb_pin.c
+++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c
@@ -10,6 +10,7 @@
#include "intel_fb.h"
#include "intel_fb_pin.h"
#include "xe_bo.h"
+#include "xe_device.h"
#include "xe_ggtt.h"
#include "xe_gt.h"
#include "xe_pm.h"
@@ -304,6 +305,8 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb,
if (ret)
goto err_unpin;
+ /* Ensure DPT writes are flushed */
+ xe_device_l2_flush(xe);
return vma;
err_unpin:
diff --git a/drivers/gpu/drm/xe/display/xe_plane_initial.c b/drivers/gpu/drm/xe/display/xe_plane_initial.c
index 5eccd6abb3ef..a50ab9eae40a 100644
--- a/drivers/gpu/drm/xe/display/xe_plane_initial.c
+++ b/drivers/gpu/drm/xe/display/xe_plane_initial.c
@@ -18,6 +18,9 @@
#include "intel_frontbuffer.h"
#include "intel_plane_initial.h"
#include "xe_bo.h"
+#include "xe_wa.h"
+
+#include <generated/xe_wa_oob.h>
static bool
intel_reuse_initial_plane_obj(struct intel_crtc *this,
@@ -104,6 +107,9 @@ initial_plane_bo(struct xe_device *xe,
phys_base = base;
flags |= XE_BO_FLAG_STOLEN;
+ if (XE_WA(xe_root_mmio_gt(xe), 22019338487_display))
+ return NULL;
+
/*
* If the FB is too big, just don't use it since fbdev is not very
* important and we should probably use that space with FBC or other
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index d44564bad009..3b87f95f9ecf 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -80,6 +80,9 @@
#define LE_CACHEABILITY_MASK REG_GENMASK(1, 0)
#define LE_CACHEABILITY(value) REG_FIELD_PREP(LE_CACHEABILITY_MASK, value)
+#define XE2_GAMREQSTRM_CTRL XE_REG(0x4194)
+#define CG_DIS_CNTLBUS REG_BIT(6)
+
#define CCS_AUX_INV XE_REG(0x4208)
#define VD0_AUX_INV XE_REG(0x4218)
@@ -88,6 +91,8 @@
#define VE1_AUX_INV XE_REG(0x42b8)
#define AUX_INV REG_BIT(0)
+#define XE2_LMEM_CFG XE_REG(0x48b0)
+
#define XEHP_TILE_ADDR_RANGE(_idx) XE_REG_MCR(0x4900 + (_idx) * 4)
#define XEHP_FLAT_CCS_BASE_ADDR XE_REG_MCR(0x4910)
#define XEHP_FLAT_CCS_PTR REG_GENMASK(31, 8)
@@ -103,6 +108,7 @@
#define FF_MODE XE_REG_MCR(0x6210)
#define DIS_TE_AUTOSTRIP REG_BIT(31)
+#define VS_HIT_MAX_VALUE_MASK REG_GENMASK(25, 20)
#define DIS_MESH_PARTIAL_AUTOSTRIP REG_BIT(16)
#define DIS_MESH_AUTOSTRIP REG_BIT(15)
@@ -372,6 +378,11 @@
#define XEHPC_L3CLOS_MASK(i) XE_REG_MCR(0xb194 + (i) * 8)
+#define XE2_GLOBAL_INVAL XE_REG(0xb404)
+
+#define SCRATCH1LPFC XE_REG(0xb474)
+#define EN_L3_RW_CCS_CACHE_FLUSH REG_BIT(0)
+
#define XE2LPM_L3SQCREG5 XE_REG_MCR(0xb658)
#define XE2_TDF_CTRL XE_REG(0xb418)
@@ -395,6 +406,10 @@
#define INVALIDATION_BROADCAST_MODE_DIS REG_BIT(12)
#define GLOBAL_INVALIDATION_MODE REG_BIT(2)
+#define LMEM_CFG XE_REG(0xcf58)
+#define LMEM_EN REG_BIT(31)
+#define LMTT_DIR_PTR REG_GENMASK(30, 0) /* in multiples of 64KB */
+
#define HALF_SLICE_CHICKEN5 XE_REG_MCR(0xe188, XE_REG_OPTION_MASKED)
#define DISABLE_SAMPLE_G_PERFORMANCE REG_BIT(0)
diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h
index 23e33ec84902..dfa869f0dddd 100644
--- a/drivers/gpu/drm/xe/regs/xe_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_regs.h
@@ -15,8 +15,6 @@
#define GU_MISC_IRQ_OFFSET 0x444f0
#define GU_MISC_GSE REG_BIT(27)
-#define SOFTWARE_FLAGS_SPR33 XE_REG(0x4f084)
-
#define GU_CNTL_PROTECTED XE_REG(0x10100C)
#define DRIVERINT_FLR_DIS REG_BIT(31)
@@ -24,11 +22,14 @@
#define LMEM_INIT REG_BIT(7)
#define DRIVERFLR REG_BIT(31)
+#define XEHP_CLOCK_GATE_DIS XE_REG(0x101014)
+#define SGSI_SIDECLK_DIS REG_BIT(17)
+
#define GU_DEBUG XE_REG(0x101018)
#define DRIVERFLR_STATUS REG_BIT(31)
-#define XEHP_CLOCK_GATE_DIS XE_REG(0x101014)
-#define SGSI_SIDECLK_DIS REG_BIT(17)
+#define VIRTUAL_CTRL_REG XE_REG(0x10108c)
+#define GUEST_GTT_UPDATE_EN REG_BIT(8)
#define XEHP_MTCFG_ADDR XE_REG(0x101800)
#define TILE_COUNT REG_GENMASK(15, 8)
@@ -66,6 +67,9 @@
#define DISPLAY_IRQ REG_BIT(16)
#define GT_DW_IRQ(x) REG_BIT(x)
+#define VF_CAP_REG XE_REG(0x1901f8, XE_REG_OPTION_VF)
+#define VF_CAP REG_BIT(0)
+
#define PVC_RP_STATE_CAP XE_REG(0x281014)
#endif
diff --git a/drivers/gpu/drm/xe/regs/xe_sriov_regs.h b/drivers/gpu/drm/xe/regs/xe_sriov_regs.h
deleted file mode 100644
index 017b4ddd1ecf..000000000000
--- a/drivers/gpu/drm/xe/regs/xe_sriov_regs.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2023 Intel Corporation
- */
-
-#ifndef _REGS_XE_SRIOV_REGS_H_
-#define _REGS_XE_SRIOV_REGS_H_
-
-#include "regs/xe_reg_defs.h"
-
-#define XE2_LMEM_CFG XE_REG(0x48b0)
-
-#define LMEM_CFG XE_REG(0xcf58)
-#define LMEM_EN REG_BIT(31)
-#define LMTT_DIR_PTR REG_GENMASK(30, 0) /* in multiples of 64KB */
-
-#define VIRTUAL_CTRL_REG XE_REG(0x10108c)
-#define GUEST_GTT_UPDATE_EN REG_BIT(8)
-
-#define VF_CAP_REG XE_REG(0x1901f8, XE_REG_OPTION_VF)
-#define VF_CAP REG_BIT(0)
-
-#endif
diff --git a/drivers/gpu/drm/xe/tests/Makefile b/drivers/gpu/drm/xe/tests/Makefile
index 6e58931fddd4..0e3408f4952c 100644
--- a/drivers/gpu/drm/xe/tests/Makefile
+++ b/drivers/gpu/drm/xe/tests/Makefile
@@ -2,11 +2,7 @@
# "live" kunit tests
obj-$(CONFIG_DRM_XE_KUNIT_TEST) += xe_live_test.o
-xe_live_test-y = xe_live_test_mod.o \
- xe_bo_test.o \
- xe_dma_buf_test.o \
- xe_migrate_test.o \
- xe_mocs_test.o
+xe_live_test-y = xe_live_test_mod.o
# Normal kunit tests
obj-$(CONFIG_DRM_XE_KUNIT_TEST) += xe_test.o
diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index 9f3c02826464..1768483da1b7 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -6,7 +6,7 @@
#include <kunit/test.h>
#include <kunit/visibility.h>
-#include "tests/xe_bo_test.h"
+#include "tests/xe_kunit_helpers.h"
#include "tests/xe_pci_test.h"
#include "tests/xe_test.h"
@@ -154,12 +154,18 @@ out_unlock:
static int ccs_test_run_device(struct xe_device *xe)
{
- struct kunit *test = xe_cur_kunit();
+ struct kunit *test = kunit_get_current_test();
struct xe_tile *tile;
int id;
if (!xe_device_has_flat_ccs(xe)) {
- kunit_info(test, "Skipping non-flat-ccs device.\n");
+ kunit_skip(test, "non-flat-ccs device\n");
+ return 0;
+ }
+
+ /* For xe2+ dgfx, we don't handle ccs metadata */
+ if (GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe)) {
+ kunit_skip(test, "xe2+ dgfx device\n");
return 0;
}
@@ -177,11 +183,12 @@ static int ccs_test_run_device(struct xe_device *xe)
return 0;
}
-void xe_ccs_migrate_kunit(struct kunit *test)
+static void xe_ccs_migrate_kunit(struct kunit *test)
{
- xe_call_for_each_device(ccs_test_run_device);
+ struct xe_device *xe = test->priv;
+
+ ccs_test_run_device(xe);
}
-EXPORT_SYMBOL_IF_KUNIT(xe_ccs_migrate_kunit);
static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struct kunit *test)
{
@@ -325,13 +332,12 @@ cleanup_bo:
static int evict_test_run_device(struct xe_device *xe)
{
- struct kunit *test = xe_cur_kunit();
+ struct kunit *test = kunit_get_current_test();
struct xe_tile *tile;
int id;
if (!IS_DGFX(xe)) {
- kunit_info(test, "Skipping non-discrete device %s.\n",
- dev_name(xe->drm.dev));
+ kunit_skip(test, "non-discrete device\n");
return 0;
}
@@ -345,8 +351,23 @@ static int evict_test_run_device(struct xe_device *xe)
return 0;
}
-void xe_bo_evict_kunit(struct kunit *test)
+static void xe_bo_evict_kunit(struct kunit *test)
{
- xe_call_for_each_device(evict_test_run_device);
+ struct xe_device *xe = test->priv;
+
+ evict_test_run_device(xe);
}
-EXPORT_SYMBOL_IF_KUNIT(xe_bo_evict_kunit);
+
+static struct kunit_case xe_bo_tests[] = {
+ KUNIT_CASE_PARAM(xe_ccs_migrate_kunit, xe_pci_live_device_gen_param),
+ KUNIT_CASE_PARAM(xe_bo_evict_kunit, xe_pci_live_device_gen_param),
+ {}
+};
+
+VISIBLE_IF_KUNIT
+struct kunit_suite xe_bo_test_suite = {
+ .name = "xe_bo",
+ .test_cases = xe_bo_tests,
+ .init = xe_kunit_helper_xe_device_live_test_init,
+};
+EXPORT_SYMBOL_IF_KUNIT(xe_bo_test_suite);
diff --git a/drivers/gpu/drm/xe/tests/xe_bo_test.c b/drivers/gpu/drm/xe/tests/xe_bo_test.c
deleted file mode 100644
index a324cde77db8..000000000000
--- a/drivers/gpu/drm/xe/tests/xe_bo_test.c
+++ /dev/null
@@ -1,21 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright © 2022 Intel Corporation
- */
-
-#include "xe_bo_test.h"
-
-#include <kunit/test.h>
-
-static struct kunit_case xe_bo_tests[] = {
- KUNIT_CASE(xe_ccs_migrate_kunit),
- KUNIT_CASE(xe_bo_evict_kunit),
- {}
-};
-
-static struct kunit_suite xe_bo_test_suite = {
- .name = "xe_bo",
- .test_cases = xe_bo_tests,
-};
-
-kunit_test_suite(xe_bo_test_suite);
diff --git a/drivers/gpu/drm/xe/tests/xe_bo_test.h b/drivers/gpu/drm/xe/tests/xe_bo_test.h
deleted file mode 100644
index 0113ab45066a..000000000000
--- a/drivers/gpu/drm/xe/tests/xe_bo_test.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 AND MIT */
-/*
- * Copyright © 2023 Intel Corporation
- */
-
-#ifndef _XE_BO_TEST_H_
-#define _XE_BO_TEST_H_
-
-struct kunit;
-
-void xe_ccs_migrate_kunit(struct kunit *test);
-void xe_bo_evict_kunit(struct kunit *test);
-
-#endif
diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
index e7f9b531c465..c24c8509227e 100644
--- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
@@ -8,7 +8,7 @@
#include <kunit/test.h>
#include <kunit/visibility.h>
-#include "tests/xe_dma_buf_test.h"
+#include "tests/xe_kunit_helpers.h"
#include "tests/xe_pci_test.h"
#include "xe_pci.h"
@@ -107,7 +107,7 @@ static void check_residency(struct kunit *test, struct xe_bo *exported,
static void xe_test_dmabuf_import_same_driver(struct xe_device *xe)
{
- struct kunit *test = xe_cur_kunit();
+ struct kunit *test = kunit_get_current_test();
struct dma_buf_test_params *params = to_dma_buf_test_params(test->priv);
struct drm_gem_object *import;
struct dma_buf *dmabuf;
@@ -258,7 +258,7 @@ static const struct dma_buf_test_params test_params[] = {
static int dma_buf_run_device(struct xe_device *xe)
{
const struct dma_buf_test_params *params;
- struct kunit *test = xe_cur_kunit();
+ struct kunit *test = kunit_get_current_test();
xe_pm_runtime_get(xe);
for (params = test_params; params->mem_mask; ++params) {
@@ -274,8 +274,22 @@ static int dma_buf_run_device(struct xe_device *xe)
return 0;
}
-void xe_dma_buf_kunit(struct kunit *test)
+static void xe_dma_buf_kunit(struct kunit *test)
{
- xe_call_for_each_device(dma_buf_run_device);
+ struct xe_device *xe = test->priv;
+
+ dma_buf_run_device(xe);
}
-EXPORT_SYMBOL_IF_KUNIT(xe_dma_buf_kunit);
+
+static struct kunit_case xe_dma_buf_tests[] = {
+ KUNIT_CASE_PARAM(xe_dma_buf_kunit, xe_pci_live_device_gen_param),
+ {}
+};
+
+VISIBLE_IF_KUNIT
+struct kunit_suite xe_dma_buf_test_suite = {
+ .name = "xe_dma_buf",
+ .test_cases = xe_dma_buf_tests,
+ .init = xe_kunit_helper_xe_device_live_test_init,
+};
+EXPORT_SYMBOL_IF_KUNIT(xe_dma_buf_test_suite);
diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c
deleted file mode 100644
index 99cdb718b6c6..000000000000
--- a/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c
+++ /dev/null
@@ -1,20 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright © 2022 Intel Corporation
- */
-
-#include "xe_dma_buf_test.h"
-
-#include <kunit/test.h>
-
-static struct kunit_case xe_dma_buf_tests[] = {
- KUNIT_CASE(xe_dma_buf_kunit),
- {}
-};
-
-static struct kunit_suite xe_dma_buf_test_suite = {
- .name = "xe_dma_buf",
- .test_cases = xe_dma_buf_tests,
-};
-
-kunit_test_suite(xe_dma_buf_test_suite);
diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf_test.h b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.h
deleted file mode 100644
index e6b464ddd526..000000000000
--- a/drivers/gpu/drm/xe/tests/xe_dma_buf_test.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 AND MIT */
-/*
- * Copyright © 2023 Intel Corporation
- */
-
-#ifndef _XE_DMA_BUF_TEST_H_
-#define _XE_DMA_BUF_TEST_H_
-
-struct kunit;
-
-void xe_dma_buf_kunit(struct kunit *test);
-
-#endif
diff --git a/drivers/gpu/drm/xe/tests/xe_kunit_helpers.c b/drivers/gpu/drm/xe/tests/xe_kunit_helpers.c
index fefe79b3b75a..bc5156966ce9 100644
--- a/drivers/gpu/drm/xe/tests/xe_kunit_helpers.c
+++ b/drivers/gpu/drm/xe/tests/xe_kunit_helpers.c
@@ -12,7 +12,9 @@
#include "tests/xe_kunit_helpers.h"
#include "tests/xe_pci_test.h"
+#include "xe_device.h"
#include "xe_device_types.h"
+#include "xe_pm.h"
/**
* xe_kunit_helper_alloc_xe_device - Allocate a &xe_device for a KUnit test.
@@ -88,3 +90,40 @@ int xe_kunit_helper_xe_device_test_init(struct kunit *test)
return 0;
}
EXPORT_SYMBOL_IF_KUNIT(xe_kunit_helper_xe_device_test_init);
+
+KUNIT_DEFINE_ACTION_WRAPPER(put_xe_pm_runtime, xe_pm_runtime_put, struct xe_device *);
+
+/**
+ * xe_kunit_helper_xe_device_live_test_init - Prepare a &xe_device for
+ * use in a live KUnit test.
+ * @test: the &kunit where live &xe_device will be used
+ *
+ * This function expects pointer to the &xe_device in the &test.param_value,
+ * like it is prepared by the &xe_pci_live_device_gen_param and stores that
+ * pointer as &kunit.priv to allow the test code to access it.
+ *
+ * This function makes sure that device is not wedged and then resumes it
+ * to avoid waking up the device inside the test. It uses deferred cleanup
+ * action to release a runtime_pm reference.
+ *
+ * This function can be used as custom implementation of &kunit_suite.init.
+ *
+ * This function uses KUNIT_ASSERT to detect any failures.
+ *
+ * Return: Always 0.
+ */
+int xe_kunit_helper_xe_device_live_test_init(struct kunit *test)
+{
+ struct xe_device *xe = xe_device_const_cast(test->param_value);
+
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xe);
+ kunit_info(test, "running on %s device\n", xe->info.platform_name);
+
+ KUNIT_ASSERT_FALSE(test, xe_device_wedged(xe));
+ xe_pm_runtime_get(xe);
+ KUNIT_ASSERT_EQ(test, 0, kunit_add_action_or_reset(test, put_xe_pm_runtime, xe));
+
+ test->priv = xe;
+ return 0;
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_kunit_helper_xe_device_live_test_init);
diff --git a/drivers/gpu/drm/xe/tests/xe_kunit_helpers.h b/drivers/gpu/drm/xe/tests/xe_kunit_helpers.h
index 067a1babf049..83665f7b1254 100644
--- a/drivers/gpu/drm/xe/tests/xe_kunit_helpers.h
+++ b/drivers/gpu/drm/xe/tests/xe_kunit_helpers.h
@@ -14,4 +14,6 @@ struct xe_device *xe_kunit_helper_alloc_xe_device(struct kunit *test,
struct device *dev);
int xe_kunit_helper_xe_device_test_init(struct kunit *test);
+int xe_kunit_helper_xe_device_live_test_init(struct kunit *test);
+
#endif
diff --git a/drivers/gpu/drm/xe/tests/xe_live_test_mod.c b/drivers/gpu/drm/xe/tests/xe_live_test_mod.c
index eb1ea99a5a8b..5f14737c8210 100644
--- a/drivers/gpu/drm/xe/tests/xe_live_test_mod.c
+++ b/drivers/gpu/drm/xe/tests/xe_live_test_mod.c
@@ -3,6 +3,17 @@
* Copyright © 2023 Intel Corporation
*/
#include <linux/module.h>
+#include <kunit/test.h>
+
+extern struct kunit_suite xe_bo_test_suite;
+extern struct kunit_suite xe_dma_buf_test_suite;
+extern struct kunit_suite xe_migrate_test_suite;
+extern struct kunit_suite xe_mocs_test_suite;
+
+kunit_test_suite(xe_bo_test_suite);
+kunit_test_suite(xe_dma_buf_test_suite);
+kunit_test_suite(xe_migrate_test_suite);
+kunit_test_suite(xe_mocs_test_suite);
MODULE_AUTHOR("Intel Corporation");
MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index 962f6438e219..4344a1724029 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -6,7 +6,7 @@
#include <kunit/test.h>
#include <kunit/visibility.h>
-#include "tests/xe_migrate_test.h"
+#include "tests/xe_kunit_helpers.h"
#include "tests/xe_pci_test.h"
#include "xe_pci.h"
@@ -334,7 +334,7 @@ vunmap:
static int migrate_test_run_device(struct xe_device *xe)
{
- struct kunit *test = xe_cur_kunit();
+ struct kunit *test = kunit_get_current_test();
struct xe_tile *tile;
int id;
@@ -354,8 +354,422 @@ static int migrate_test_run_device(struct xe_device *xe)
return 0;
}
-void xe_migrate_sanity_kunit(struct kunit *test)
+static void xe_migrate_sanity_kunit(struct kunit *test)
{
- xe_call_for_each_device(migrate_test_run_device);
+ struct xe_device *xe = test->priv;
+
+ migrate_test_run_device(xe);
+}
+
+static struct dma_fence *blt_copy(struct xe_tile *tile,
+ struct xe_bo *src_bo, struct xe_bo *dst_bo,
+ bool copy_only_ccs, const char *str, struct kunit *test)
+{
+ struct xe_gt *gt = tile->primary_gt;
+ struct xe_migrate *m = tile->migrate;
+ struct xe_device *xe = gt_to_xe(gt);
+ struct dma_fence *fence = NULL;
+ u64 size = src_bo->size;
+ struct xe_res_cursor src_it, dst_it;
+ struct ttm_resource *src = src_bo->ttm.resource, *dst = dst_bo->ttm.resource;
+ u64 src_L0_ofs, dst_L0_ofs;
+ u32 src_L0_pt, dst_L0_pt;
+ u64 src_L0, dst_L0;
+ int err;
+ bool src_is_vram = mem_type_is_vram(src->mem_type);
+ bool dst_is_vram = mem_type_is_vram(dst->mem_type);
+
+ if (!src_is_vram)
+ xe_res_first_sg(xe_bo_sg(src_bo), 0, size, &src_it);
+ else
+ xe_res_first(src, 0, size, &src_it);
+
+ if (!dst_is_vram)
+ xe_res_first_sg(xe_bo_sg(dst_bo), 0, size, &dst_it);
+ else
+ xe_res_first(dst, 0, size, &dst_it);
+
+ while (size) {
+ u32 batch_size = 2; /* arb_clear() + MI_BATCH_BUFFER_END */
+ struct xe_sched_job *job;
+ struct xe_bb *bb;
+ u32 flush_flags = 0;
+ u32 update_idx;
+ u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE;
+ u32 pte_flags;
+
+ src_L0 = xe_migrate_res_sizes(m, &src_it);
+ dst_L0 = xe_migrate_res_sizes(m, &dst_it);
+
+ src_L0 = min(src_L0, dst_L0);
+
+ pte_flags = src_is_vram ? (PTE_UPDATE_FLAG_IS_VRAM |
+ PTE_UPDATE_FLAG_IS_COMP_PTE) : 0;
+ batch_size += pte_update_size(m, pte_flags, src, &src_it, &src_L0,
+ &src_L0_ofs, &src_L0_pt, 0, 0,
+ avail_pts);
+
+ pte_flags = dst_is_vram ? (PTE_UPDATE_FLAG_IS_VRAM |
+ PTE_UPDATE_FLAG_IS_COMP_PTE) : 0;
+ batch_size += pte_update_size(m, pte_flags, dst, &dst_it, &src_L0,
+ &dst_L0_ofs, &dst_L0_pt, 0,
+ avail_pts, avail_pts);
+
+ /* Add copy commands size here */
+ batch_size += ((copy_only_ccs) ? 0 : EMIT_COPY_DW) +
+ ((xe_device_has_flat_ccs(xe) && copy_only_ccs) ? EMIT_COPY_CCS_DW : 0);
+
+ bb = xe_bb_new(gt, batch_size, xe->info.has_usm);
+ if (IS_ERR(bb)) {
+ err = PTR_ERR(bb);
+ goto err_sync;
+ }
+
+ if (src_is_vram)
+ xe_res_next(&src_it, src_L0);
+ else
+ emit_pte(m, bb, src_L0_pt, src_is_vram, false,
+ &src_it, src_L0, src);
+
+ if (dst_is_vram)
+ xe_res_next(&dst_it, src_L0);
+ else
+ emit_pte(m, bb, dst_L0_pt, dst_is_vram, false,
+ &dst_it, src_L0, dst);
+
+ bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
+ update_idx = bb->len;
+ if (!copy_only_ccs)
+ emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0, XE_PAGE_SIZE);
+
+ if (copy_only_ccs)
+ flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs,
+ src_is_vram, dst_L0_ofs,
+ dst_is_vram, src_L0, dst_L0_ofs,
+ copy_only_ccs);
+
+ job = xe_bb_create_migration_job(m->q, bb,
+ xe_migrate_batch_base(m, xe->info.has_usm),
+ update_idx);
+ if (IS_ERR(job)) {
+ err = PTR_ERR(job);
+ goto err;
+ }
+
+ xe_sched_job_add_migrate_flush(job, flush_flags);
+
+ mutex_lock(&m->job_mutex);
+ xe_sched_job_arm(job);
+ dma_fence_put(fence);
+ fence = dma_fence_get(&job->drm.s_fence->finished);
+ xe_sched_job_push(job);
+
+ dma_fence_put(m->fence);
+ m->fence = dma_fence_get(fence);
+
+ mutex_unlock(&m->job_mutex);
+
+ xe_bb_free(bb, fence);
+ size -= src_L0;
+ continue;
+
+err:
+ xe_bb_free(bb, NULL);
+
+err_sync:
+ if (fence) {
+ dma_fence_wait(fence, false);
+ dma_fence_put(fence);
+ }
+ return ERR_PTR(err);
+ }
+
+ return fence;
+}
+
+static void test_migrate(struct xe_device *xe, struct xe_tile *tile,
+ struct xe_bo *sys_bo, struct xe_bo *vram_bo, struct xe_bo *ccs_bo,
+ struct kunit *test)
+{
+ struct dma_fence *fence;
+ u64 expected, retval;
+ long timeout;
+ long ret;
+
+ expected = 0xd0d0d0d0d0d0d0d0;
+ xe_map_memset(xe, &sys_bo->vmap, 0, 0xd0, sys_bo->size);
+
+ fence = blt_copy(tile, sys_bo, vram_bo, false, "Blit copy from sysmem to vram", test);
+ if (!sanity_fence_failed(xe, fence, "Blit copy from sysmem to vram", test)) {
+ retval = xe_map_rd(xe, &vram_bo->vmap, 0, u64);
+ if (retval == expected)
+ KUNIT_FAIL(test, "Sanity check failed: VRAM must have compressed value\n");
+ }
+ dma_fence_put(fence);
+
+ kunit_info(test, "Evict vram buffer object\n");
+ ret = xe_bo_evict(vram_bo, true);
+ if (ret) {
+ KUNIT_FAIL(test, "Failed to evict bo.\n");
+ return;
+ }
+
+ ret = xe_bo_vmap(vram_bo);
+ if (ret) {
+ KUNIT_FAIL(test, "Failed to vmap vram bo: %li\n", ret);
+ return;
+ }
+
+ retval = xe_map_rd(xe, &vram_bo->vmap, 0, u64);
+ check(retval, expected, "Clear evicted vram data first value", test);
+ retval = xe_map_rd(xe, &vram_bo->vmap, vram_bo->size - 8, u64);
+ check(retval, expected, "Clear evicted vram data last value", test);
+
+ fence = blt_copy(tile, vram_bo, ccs_bo,
+ true, "Blit surf copy from vram to sysmem", test);
+ if (!sanity_fence_failed(xe, fence, "Clear ccs buffer data", test)) {
+ retval = xe_map_rd(xe, &ccs_bo->vmap, 0, u64);
+ check(retval, 0, "Clear ccs data first value", test);
+
+ retval = xe_map_rd(xe, &ccs_bo->vmap, ccs_bo->size - 8, u64);
+ check(retval, 0, "Clear ccs data last value", test);
+ }
+ dma_fence_put(fence);
+
+ kunit_info(test, "Restore vram buffer object\n");
+ ret = xe_bo_validate(vram_bo, NULL, false);
+ if (ret) {
+ KUNIT_FAIL(test, "Failed to validate vram bo for: %li\n", ret);
+ return;
+ }
+
+ /* Sync all migration blits */
+ timeout = dma_resv_wait_timeout(vram_bo->ttm.base.resv,
+ DMA_RESV_USAGE_KERNEL,
+ true,
+ 5 * HZ);
+ if (timeout <= 0) {
+ KUNIT_FAIL(test, "Failed to sync bo eviction.\n");
+ return;
+ }
+
+ ret = xe_bo_vmap(vram_bo);
+ if (ret) {
+ KUNIT_FAIL(test, "Failed to vmap vram bo: %li\n", ret);
+ return;
+ }
+
+ retval = xe_map_rd(xe, &vram_bo->vmap, 0, u64);
+ check(retval, expected, "Restored value must be equal to initial value", test);
+ retval = xe_map_rd(xe, &vram_bo->vmap, vram_bo->size - 8, u64);
+ check(retval, expected, "Restored value must be equal to initial value", test);
+
+ fence = blt_copy(tile, vram_bo, ccs_bo,
+ true, "Blit surf copy from vram to sysmem", test);
+ if (!sanity_fence_failed(xe, fence, "Clear ccs buffer data", test)) {
+ retval = xe_map_rd(xe, &ccs_bo->vmap, 0, u64);
+ check(retval, 0, "Clear ccs data first value", test);
+ retval = xe_map_rd(xe, &ccs_bo->vmap, ccs_bo->size - 8, u64);
+ check(retval, 0, "Clear ccs data last value", test);
+ }
+ dma_fence_put(fence);
+}
+
+static void test_clear(struct xe_device *xe, struct xe_tile *tile,
+ struct xe_bo *sys_bo, struct xe_bo *vram_bo, struct kunit *test)
+{
+ struct dma_fence *fence;
+ u64 expected, retval;
+
+ expected = 0xd0d0d0d0d0d0d0d0;
+ xe_map_memset(xe, &sys_bo->vmap, 0, 0xd0, sys_bo->size);
+
+ fence = blt_copy(tile, sys_bo, vram_bo, false, "Blit copy from sysmem to vram", test);
+ if (!sanity_fence_failed(xe, fence, "Blit copy from sysmem to vram", test)) {
+ retval = xe_map_rd(xe, &vram_bo->vmap, 0, u64);
+ if (retval == expected)
+ KUNIT_FAIL(test, "Sanity check failed: VRAM must have compressed value\n");
+ }
+ dma_fence_put(fence);
+
+ fence = blt_copy(tile, vram_bo, sys_bo, false, "Blit copy from vram to sysmem", test);
+ if (!sanity_fence_failed(xe, fence, "Blit copy from vram to sysmem", test)) {
+ retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64);
+ check(retval, expected, "Decompressed value must be equal to initial value", test);
+ retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64);
+ check(retval, expected, "Decompressed value must be equal to initial value", test);
+ }
+ dma_fence_put(fence);
+
+ kunit_info(test, "Clear vram buffer object\n");
+ expected = 0x0000000000000000;
+ fence = xe_migrate_clear(tile->migrate, vram_bo, vram_bo->ttm.resource);
+ if (sanity_fence_failed(xe, fence, "Clear vram_bo", test))
+ return;
+ dma_fence_put(fence);
+
+ fence = blt_copy(tile, vram_bo, sys_bo,
+ false, "Blit copy from vram to sysmem", test);
+ if (!sanity_fence_failed(xe, fence, "Clear main buffer data", test)) {
+ retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64);
+ check(retval, expected, "Clear main buffer first value", test);
+ retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64);
+ check(retval, expected, "Clear main buffer last value", test);
+ }
+ dma_fence_put(fence);
+
+ fence = blt_copy(tile, vram_bo, sys_bo,
+ true, "Blit surf copy from vram to sysmem", test);
+ if (!sanity_fence_failed(xe, fence, "Clear ccs buffer data", test)) {
+ retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64);
+ check(retval, expected, "Clear ccs data first value", test);
+ retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64);
+ check(retval, expected, "Clear ccs data last value", test);
+ }
+ dma_fence_put(fence);
+}
+
+static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile,
+ struct kunit *test)
+{
+ struct xe_bo *sys_bo, *vram_bo = NULL, *ccs_bo = NULL;
+ unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile);
+ long ret;
+
+ sys_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M,
+ DRM_XE_GEM_CPU_CACHING_WC, ttm_bo_type_device,
+ XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS);
+
+ if (IS_ERR(sys_bo)) {
+ KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
+ PTR_ERR(sys_bo));
+ return;
+ }
+
+ xe_bo_lock(sys_bo, false);
+ ret = xe_bo_validate(sys_bo, NULL, false);
+ if (ret) {
+ KUNIT_FAIL(test, "Failed to validate system bo for: %li\n", ret);
+ goto free_sysbo;
+ }
+
+ ret = xe_bo_vmap(sys_bo);
+ if (ret) {
+ KUNIT_FAIL(test, "Failed to vmap system bo: %li\n", ret);
+ goto free_sysbo;
+ }
+ xe_bo_unlock(sys_bo);
+
+ ccs_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, DRM_XE_GEM_CPU_CACHING_WC,
+ ttm_bo_type_device, bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS);
+
+ if (IS_ERR(ccs_bo)) {
+ KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
+ PTR_ERR(ccs_bo));
+ return;
+ }
+
+ xe_bo_lock(ccs_bo, false);
+ ret = xe_bo_validate(ccs_bo, NULL, false);
+ if (ret) {
+ KUNIT_FAIL(test, "Failed to validate system bo for: %li\n", ret);
+ goto free_ccsbo;
+ }
+
+ ret = xe_bo_vmap(ccs_bo);
+ if (ret) {
+ KUNIT_FAIL(test, "Failed to vmap system bo: %li\n", ret);
+ goto free_ccsbo;
+ }
+ xe_bo_unlock(ccs_bo);
+
+ vram_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, DRM_XE_GEM_CPU_CACHING_WC,
+ ttm_bo_type_device, bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS);
+ if (IS_ERR(vram_bo)) {
+ KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
+ PTR_ERR(vram_bo));
+ return;
+ }
+
+ xe_bo_lock(vram_bo, false);
+ ret = xe_bo_validate(vram_bo, NULL, false);
+ if (ret) {
+ KUNIT_FAIL(test, "Failed to validate vram bo for: %li\n", ret);
+ goto free_vrambo;
+ }
+
+ ret = xe_bo_vmap(vram_bo);
+ if (ret) {
+ KUNIT_FAIL(test, "Failed to vmap vram bo: %li\n", ret);
+ goto free_vrambo;
+ }
+
+ test_clear(xe, tile, sys_bo, vram_bo, test);
+ test_migrate(xe, tile, sys_bo, vram_bo, ccs_bo, test);
+ xe_bo_unlock(vram_bo);
+
+ xe_bo_lock(vram_bo, false);
+ xe_bo_vunmap(vram_bo);
+ xe_bo_unlock(vram_bo);
+
+ xe_bo_lock(ccs_bo, false);
+ xe_bo_vunmap(ccs_bo);
+ xe_bo_unlock(ccs_bo);
+
+ xe_bo_lock(sys_bo, false);
+ xe_bo_vunmap(sys_bo);
+ xe_bo_unlock(sys_bo);
+free_vrambo:
+ xe_bo_put(vram_bo);
+free_ccsbo:
+ xe_bo_put(ccs_bo);
+free_sysbo:
+ xe_bo_put(sys_bo);
+}
+
+static int validate_ccs_test_run_device(struct xe_device *xe)
+{
+ struct kunit *test = kunit_get_current_test();
+ struct xe_tile *tile;
+ int id;
+
+ if (!xe_device_has_flat_ccs(xe)) {
+ kunit_skip(test, "non-flat-ccs device\n");
+ return 0;
+ }
+
+ if (!(GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe))) {
+ kunit_skip(test, "non-xe2 discrete device\n");
+ return 0;
+ }
+
+ xe_pm_runtime_get(xe);
+
+ for_each_tile(tile, xe, id)
+ validate_ccs_test_run_tile(xe, tile, test);
+
+ xe_pm_runtime_put(xe);
+
+ return 0;
}
-EXPORT_SYMBOL_IF_KUNIT(xe_migrate_sanity_kunit);
+
+static void xe_validate_ccs_kunit(struct kunit *test)
+{
+ struct xe_device *xe = test->priv;
+
+ validate_ccs_test_run_device(xe);
+}
+
+static struct kunit_case xe_migrate_tests[] = {
+ KUNIT_CASE_PARAM(xe_migrate_sanity_kunit, xe_pci_live_device_gen_param),
+ KUNIT_CASE_PARAM(xe_validate_ccs_kunit, xe_pci_live_device_gen_param),
+ {}
+};
+
+VISIBLE_IF_KUNIT
+struct kunit_suite xe_migrate_test_suite = {
+ .name = "xe_migrate",
+ .test_cases = xe_migrate_tests,
+ .init = xe_kunit_helper_xe_device_live_test_init,
+};
+EXPORT_SYMBOL_IF_KUNIT(xe_migrate_test_suite);
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.c b/drivers/gpu/drm/xe/tests/xe_migrate_test.c
deleted file mode 100644
index eb0d8963419c..000000000000
--- a/drivers/gpu/drm/xe/tests/xe_migrate_test.c
+++ /dev/null
@@ -1,20 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright © 2022 Intel Corporation
- */
-
-#include "xe_migrate_test.h"
-
-#include <kunit/test.h>
-
-static struct kunit_case xe_migrate_tests[] = {
- KUNIT_CASE(xe_migrate_sanity_kunit),
- {}
-};
-
-static struct kunit_suite xe_migrate_test_suite = {
- .name = "xe_migrate",
- .test_cases = xe_migrate_tests,
-};
-
-kunit_test_suite(xe_migrate_test_suite);
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.h b/drivers/gpu/drm/xe/tests/xe_migrate_test.h
deleted file mode 100644
index 7c645c66824f..000000000000
--- a/drivers/gpu/drm/xe/tests/xe_migrate_test.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 AND MIT */
-/*
- * Copyright © 2023 Intel Corporation
- */
-
-#ifndef _XE_MIGRATE_TEST_H_
-#define _XE_MIGRATE_TEST_H_
-
-struct kunit;
-
-void xe_migrate_sanity_kunit(struct kunit *test);
-
-#endif
diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c
index 67c65e88c384..79be73b4a02b 100644
--- a/drivers/gpu/drm/xe/tests/xe_mocs.c
+++ b/drivers/gpu/drm/xe/tests/xe_mocs.c
@@ -6,7 +6,7 @@
#include <kunit/test.h>
#include <kunit/visibility.h>
-#include "tests/xe_mocs_test.h"
+#include "tests/xe_kunit_helpers.h"
#include "tests/xe_pci_test.h"
#include "tests/xe_test.h"
@@ -23,7 +23,7 @@ struct live_mocs {
static int live_mocs_init(struct live_mocs *arg, struct xe_gt *gt)
{
unsigned int flags;
- struct kunit *test = xe_cur_kunit();
+ struct kunit *test = kunit_get_current_test();
memset(arg, 0, sizeof(*arg));
@@ -41,7 +41,7 @@ static int live_mocs_init(struct live_mocs *arg, struct xe_gt *gt)
static void read_l3cc_table(struct xe_gt *gt,
const struct xe_mocs_info *info)
{
- struct kunit *test = xe_cur_kunit();
+ struct kunit *test = kunit_get_current_test();
u32 l3cc, l3cc_expected;
unsigned int i;
u32 reg_val;
@@ -78,7 +78,7 @@ static void read_l3cc_table(struct xe_gt *gt,
static void read_mocs_table(struct xe_gt *gt,
const struct xe_mocs_info *info)
{
- struct kunit *test = xe_cur_kunit();
+ struct kunit *test = kunit_get_current_test();
u32 mocs, mocs_expected;
unsigned int i;
u32 reg_val;
@@ -134,11 +134,15 @@ static int mocs_kernel_test_run_device(struct xe_device *xe)
return 0;
}
-void xe_live_mocs_kernel_kunit(struct kunit *test)
+static void xe_live_mocs_kernel_kunit(struct kunit *test)
{
- xe_call_for_each_device(mocs_kernel_test_run_device);
+ struct xe_device *xe = test->priv;
+
+ if (IS_SRIOV_VF(xe))
+ kunit_skip(test, "this test is N/A for VF");
+
+ mocs_kernel_test_run_device(xe);
}
-EXPORT_SYMBOL_IF_KUNIT(xe_live_mocs_kernel_kunit);
static int mocs_reset_test_run_device(struct xe_device *xe)
{
@@ -148,7 +152,7 @@ static int mocs_reset_test_run_device(struct xe_device *xe)
struct xe_gt *gt;
unsigned int flags;
int id;
- struct kunit *test = xe_cur_kunit();
+ struct kunit *test = kunit_get_current_test();
xe_pm_runtime_get(xe);
@@ -175,8 +179,26 @@ static int mocs_reset_test_run_device(struct xe_device *xe)
return 0;
}
-void xe_live_mocs_reset_kunit(struct kunit *test)
+static void xe_live_mocs_reset_kunit(struct kunit *test)
{
- xe_call_for_each_device(mocs_reset_test_run_device);
+ struct xe_device *xe = test->priv;
+
+ if (IS_SRIOV_VF(xe))
+ kunit_skip(test, "this test is N/A for VF");
+
+ mocs_reset_test_run_device(xe);
}
-EXPORT_SYMBOL_IF_KUNIT(xe_live_mocs_reset_kunit);
+
+static struct kunit_case xe_mocs_tests[] = {
+ KUNIT_CASE_PARAM(xe_live_mocs_kernel_kunit, xe_pci_live_device_gen_param),
+ KUNIT_CASE_PARAM(xe_live_mocs_reset_kunit, xe_pci_live_device_gen_param),
+ {}
+};
+
+VISIBLE_IF_KUNIT
+struct kunit_suite xe_mocs_test_suite = {
+ .name = "xe_mocs",
+ .test_cases = xe_mocs_tests,
+ .init = xe_kunit_helper_xe_device_live_test_init,
+};
+EXPORT_SYMBOL_IF_KUNIT(xe_mocs_test_suite);
diff --git a/drivers/gpu/drm/xe/tests/xe_mocs_test.c b/drivers/gpu/drm/xe/tests/xe_mocs_test.c
deleted file mode 100644
index 6315886b659e..000000000000
--- a/drivers/gpu/drm/xe/tests/xe_mocs_test.c
+++ /dev/null
@@ -1,21 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright © 2022 Intel Corporation
- */
-
-#include "xe_mocs_test.h"
-
-#include <kunit/test.h>
-
-static struct kunit_case xe_mocs_tests[] = {
- KUNIT_CASE(xe_live_mocs_kernel_kunit),
- KUNIT_CASE(xe_live_mocs_reset_kunit),
- {}
-};
-
-static struct kunit_suite xe_mocs_test_suite = {
- .name = "xe_mocs",
- .test_cases = xe_mocs_tests,
-};
-
-kunit_test_suite(xe_mocs_test_suite);
diff --git a/drivers/gpu/drm/xe/tests/xe_mocs_test.h b/drivers/gpu/drm/xe/tests/xe_mocs_test.h
deleted file mode 100644
index e7699d495411..000000000000
--- a/drivers/gpu/drm/xe/tests/xe_mocs_test.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 AND MIT */
-/*
- * Copyright © 2023 Intel Corporation
- */
-
-#ifndef _XE_MOCS_TEST_H_
-#define _XE_MOCS_TEST_H_
-
-struct kunit;
-
-void xe_live_mocs_kernel_kunit(struct kunit *test);
-void xe_live_mocs_reset_kunit(struct kunit *test);
-
-#endif
diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c
index f62809ca8b51..577ee7d14381 100644
--- a/drivers/gpu/drm/xe/tests/xe_pci.c
+++ b/drivers/gpu/drm/xe/tests/xe_pci.c
@@ -167,3 +167,33 @@ done:
return 0;
}
EXPORT_SYMBOL_IF_KUNIT(xe_pci_fake_device_init);
+
+/**
+ * xe_pci_live_device_gen_param - Helper to iterate Xe devices as KUnit parameters
+ * @prev: the previously returned value, or NULL for the first iteration
+ * @desc: the buffer for a parameter name
+ *
+ * Iterates over the available Xe devices on the system. Uses the device name
+ * as the parameter name.
+ *
+ * To be used only as a parameter generator function in &KUNIT_CASE_PARAM.
+ *
+ * Return: pointer to the next &struct xe_device ready to be used as a parameter
+ * or NULL if there are no more Xe devices on the system.
+ */
+const void *xe_pci_live_device_gen_param(const void *prev, char *desc)
+{
+ const struct xe_device *xe = prev;
+ struct device *dev = xe ? xe->drm.dev : NULL;
+ struct device *next;
+
+ next = driver_find_next_device(&xe_pci_driver.driver, dev);
+ if (dev)
+ put_device(dev);
+ if (!next)
+ return NULL;
+
+ snprintf(desc, KUNIT_PARAM_DESC_SIZE, "%s", dev_name(next));
+ return pdev_to_xe_device(to_pci_dev(next));
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_pci_live_device_gen_param);
diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.c b/drivers/gpu/drm/xe/tests/xe_pci_test.c
index a6705a536391..744a37583d2d 100644
--- a/drivers/gpu/drm/xe/tests/xe_pci_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_pci_test.c
@@ -16,7 +16,7 @@
static void check_graphics_ip(const struct xe_graphics_desc *graphics)
{
- struct kunit *test = xe_cur_kunit();
+ struct kunit *test = kunit_get_current_test();
u64 mask = graphics->hw_engine_mask;
/* RCS, CCS, and BCS engines are allowed on the graphics IP */
@@ -30,7 +30,7 @@ static void check_graphics_ip(const struct xe_graphics_desc *graphics)
static void check_media_ip(const struct xe_media_desc *media)
{
- struct kunit *test = xe_cur_kunit();
+ struct kunit *test = kunit_get_current_test();
u64 mask = media->hw_engine_mask;
/* VCS, VECS and GSCCS engines are allowed on the media IP */
diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.h b/drivers/gpu/drm/xe/tests/xe_pci_test.h
index f40dcec83992..3e2558bc3c90 100644
--- a/drivers/gpu/drm/xe/tests/xe_pci_test.h
+++ b/drivers/gpu/drm/xe/tests/xe_pci_test.h
@@ -35,4 +35,6 @@ struct xe_pci_fake_data {
int xe_pci_fake_device_init(struct xe_device *xe);
+const void *xe_pci_live_device_gen_param(const void *prev, char *desc);
+
#endif
diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
index f217445c246a..36a3b5420fef 100644
--- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
@@ -31,16 +31,23 @@
#undef XE_REG_MCR
#define XE_REG_MCR(...) XE_REG(__VA_ARGS__, .mcr = 1)
-struct rtp_test_case {
+struct rtp_to_sr_test_case {
const char *name;
struct xe_reg expected_reg;
u32 expected_set_bits;
u32 expected_clr_bits;
- unsigned long expected_count;
+ unsigned long expected_count_sr_entries;
unsigned int expected_sr_errors;
+ unsigned long expected_active;
const struct xe_rtp_entry_sr *entries;
};
+struct rtp_test_case {
+ const char *name;
+ unsigned long expected_active;
+ const struct xe_rtp_entry *entries;
+};
+
static bool match_yes(const struct xe_gt *gt, const struct xe_hw_engine *hwe)
{
return true;
@@ -51,13 +58,14 @@ static bool match_no(const struct xe_gt *gt, const struct xe_hw_engine *hwe)
return false;
}
-static const struct rtp_test_case cases[] = {
+static const struct rtp_to_sr_test_case rtp_to_sr_cases[] = {
{
.name = "coalesce-same-reg",
.expected_reg = REGULAR_REG1,
.expected_set_bits = REG_BIT(0) | REG_BIT(1),
.expected_clr_bits = REG_BIT(0) | REG_BIT(1),
- .expected_count = 1,
+ .expected_active = BIT(0) | BIT(1),
+ .expected_count_sr_entries = 1,
/* Different bits on the same register: create a single entry */
.entries = (const struct xe_rtp_entry_sr[]) {
{ XE_RTP_NAME("basic-1"),
@@ -76,7 +84,8 @@ static const struct rtp_test_case cases[] = {
.expected_reg = REGULAR_REG1,
.expected_set_bits = REG_BIT(0),
.expected_clr_bits = REG_BIT(0),
- .expected_count = 1,
+ .expected_active = BIT(0),
+ .expected_count_sr_entries = 1,
/* Don't coalesce second entry since rules don't match */
.entries = (const struct xe_rtp_entry_sr[]) {
{ XE_RTP_NAME("basic-1"),
@@ -95,7 +104,8 @@ static const struct rtp_test_case cases[] = {
.expected_reg = REGULAR_REG1,
.expected_set_bits = REG_BIT(0) | REG_BIT(1) | REG_BIT(2),
.expected_clr_bits = REG_BIT(0) | REG_BIT(1) | REG_BIT(2),
- .expected_count = 1,
+ .expected_active = BIT(0) | BIT(1) | BIT(2),
+ .expected_count_sr_entries = 1,
.entries = (const struct xe_rtp_entry_sr[]) {
{ XE_RTP_NAME("first"),
XE_RTP_RULES(FUNC(match_yes), OR, FUNC(match_no)),
@@ -121,7 +131,7 @@ static const struct rtp_test_case cases[] = {
{
.name = "match-or-xfail",
.expected_reg = REGULAR_REG1,
- .expected_count = 0,
+ .expected_count_sr_entries = 0,
.entries = (const struct xe_rtp_entry_sr[]) {
{ XE_RTP_NAME("leading-or"),
XE_RTP_RULES(OR, FUNC(match_yes)),
@@ -148,7 +158,8 @@ static const struct rtp_test_case cases[] = {
.expected_reg = REGULAR_REG1,
.expected_set_bits = REG_BIT(0),
.expected_clr_bits = REG_BIT(0),
- .expected_count = 1,
+ .expected_active = BIT(0),
+ .expected_count_sr_entries = 1,
/* Don't coalesce second entry due to one of the rules */
.entries = (const struct xe_rtp_entry_sr[]) {
{ XE_RTP_NAME("basic-1"),
@@ -167,7 +178,8 @@ static const struct rtp_test_case cases[] = {
.expected_reg = REGULAR_REG1,
.expected_set_bits = REG_BIT(0),
.expected_clr_bits = REG_BIT(0),
- .expected_count = 2,
+ .expected_active = BIT(0) | BIT(1),
+ .expected_count_sr_entries = 2,
/* Same bits on different registers are not coalesced */
.entries = (const struct xe_rtp_entry_sr[]) {
{ XE_RTP_NAME("basic-1"),
@@ -186,7 +198,8 @@ static const struct rtp_test_case cases[] = {
.expected_reg = REGULAR_REG1,
.expected_set_bits = REG_BIT(0),
.expected_clr_bits = REG_BIT(1) | REG_BIT(0),
- .expected_count = 1,
+ .expected_active = BIT(0) | BIT(1),
+ .expected_count_sr_entries = 1,
/* Check clr vs set actions on different bits */
.entries = (const struct xe_rtp_entry_sr[]) {
{ XE_RTP_NAME("basic-1"),
@@ -207,7 +220,8 @@ static const struct rtp_test_case cases[] = {
.expected_reg = REGULAR_REG1,
.expected_set_bits = TEMP_FIELD,
.expected_clr_bits = TEMP_MASK,
- .expected_count = 1,
+ .expected_active = BIT(0),
+ .expected_count_sr_entries = 1,
/* Check FIELD_SET works */
.entries = (const struct xe_rtp_entry_sr[]) {
{ XE_RTP_NAME("basic-1"),
@@ -225,7 +239,8 @@ static const struct rtp_test_case cases[] = {
.expected_reg = REGULAR_REG1,
.expected_set_bits = REG_BIT(0),
.expected_clr_bits = REG_BIT(0),
- .expected_count = 1,
+ .expected_active = BIT(0) | BIT(1),
+ .expected_count_sr_entries = 1,
.expected_sr_errors = 1,
.entries = (const struct xe_rtp_entry_sr[]) {
{ XE_RTP_NAME("basic-1"),
@@ -245,7 +260,8 @@ static const struct rtp_test_case cases[] = {
.expected_reg = REGULAR_REG1,
.expected_set_bits = REG_BIT(0),
.expected_clr_bits = REG_BIT(0),
- .expected_count = 1,
+ .expected_active = BIT(0) | BIT(1),
+ .expected_count_sr_entries = 1,
.expected_sr_errors = 1,
.entries = (const struct xe_rtp_entry_sr[]) {
{ XE_RTP_NAME("basic-1"),
@@ -265,7 +281,8 @@ static const struct rtp_test_case cases[] = {
.expected_reg = REGULAR_REG1,
.expected_set_bits = REG_BIT(0),
.expected_clr_bits = REG_BIT(0),
- .expected_count = 1,
+ .expected_active = BIT(0) | BIT(1) | BIT(2),
+ .expected_count_sr_entries = 1,
.expected_sr_errors = 2,
.entries = (const struct xe_rtp_entry_sr[]) {
{ XE_RTP_NAME("basic-1"),
@@ -287,28 +304,35 @@ static const struct rtp_test_case cases[] = {
},
};
-static void xe_rtp_process_tests(struct kunit *test)
+static void xe_rtp_process_to_sr_tests(struct kunit *test)
{
- const struct rtp_test_case *param = test->param_value;
+ const struct rtp_to_sr_test_case *param = test->param_value;
struct xe_device *xe = test->priv;
struct xe_gt *gt = xe_device_get_root_tile(xe)->primary_gt;
struct xe_reg_sr *reg_sr = &gt->reg_sr;
const struct xe_reg_sr_entry *sre, *sr_entry = NULL;
struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt);
- unsigned long idx, count = 0;
+ unsigned long idx, count_sr_entries = 0, count_rtp_entries = 0, active = 0;
+
+ xe_reg_sr_init(reg_sr, "xe_rtp_to_sr_tests", xe);
+
+ while (param->entries[count_rtp_entries].rules)
+ count_rtp_entries++;
- xe_reg_sr_init(reg_sr, "xe_rtp_tests", xe);
+ xe_rtp_process_ctx_enable_active_tracking(&ctx, &active, count_rtp_entries);
xe_rtp_process_to_sr(&ctx, param->entries, reg_sr);
xa_for_each(&reg_sr->xa, idx, sre) {
if (idx == param->expected_reg.addr)
sr_entry = sre;
- count++;
+ count_sr_entries++;
}
- KUNIT_EXPECT_EQ(test, count, param->expected_count);
- if (count) {
+ KUNIT_EXPECT_EQ(test, active, param->expected_active);
+
+ KUNIT_EXPECT_EQ(test, count_sr_entries, param->expected_count_sr_entries);
+ if (count_sr_entries) {
KUNIT_EXPECT_EQ(test, sr_entry->clr_bits, param->expected_clr_bits);
KUNIT_EXPECT_EQ(test, sr_entry->set_bits, param->expected_set_bits);
KUNIT_EXPECT_EQ(test, sr_entry->reg.raw, param->expected_reg.raw);
@@ -319,12 +343,162 @@ static void xe_rtp_process_tests(struct kunit *test)
KUNIT_EXPECT_EQ(test, reg_sr->errors, param->expected_sr_errors);
}
+/*
+ * Entries below follow the logic used with xe_wa_oob.rules:
+ * 1) Entries with empty name are OR'ed: all entries marked active since the
+ * last entry with a name
+ * 2) There are no action associated with rules
+ */
+static const struct rtp_test_case rtp_cases[] = {
+ {
+ .name = "active1",
+ .expected_active = BIT(0),
+ .entries = (const struct xe_rtp_entry[]) {
+ { XE_RTP_NAME("r1"),
+ XE_RTP_RULES(FUNC(match_yes)),
+ },
+ {}
+ },
+ },
+ {
+ .name = "active2",
+ .expected_active = BIT(0) | BIT(1),
+ .entries = (const struct xe_rtp_entry[]) {
+ { XE_RTP_NAME("r1"),
+ XE_RTP_RULES(FUNC(match_yes)),
+ },
+ { XE_RTP_NAME("r2"),
+ XE_RTP_RULES(FUNC(match_yes)),
+ },
+ {}
+ },
+ },
+ {
+ .name = "active-inactive",
+ .expected_active = BIT(0),
+ .entries = (const struct xe_rtp_entry[]) {
+ { XE_RTP_NAME("r1"),
+ XE_RTP_RULES(FUNC(match_yes)),
+ },
+ { XE_RTP_NAME("r2"),
+ XE_RTP_RULES(FUNC(match_no)),
+ },
+ {}
+ },
+ },
+ {
+ .name = "inactive-active",
+ .expected_active = BIT(1),
+ .entries = (const struct xe_rtp_entry[]) {
+ { XE_RTP_NAME("r1"),
+ XE_RTP_RULES(FUNC(match_no)),
+ },
+ { XE_RTP_NAME("r2"),
+ XE_RTP_RULES(FUNC(match_yes)),
+ },
+ {}
+ },
+ },
+ {
+ .name = "inactive-1st_or_active-inactive",
+ .expected_active = BIT(1),
+ .entries = (const struct xe_rtp_entry[]) {
+ { XE_RTP_NAME("r1"),
+ XE_RTP_RULES(FUNC(match_no)),
+ },
+ { XE_RTP_NAME("r2_or_conditions"),
+ XE_RTP_RULES(FUNC(match_yes), OR,
+ FUNC(match_no), OR,
+ FUNC(match_no)) },
+ { XE_RTP_NAME("r3"),
+ XE_RTP_RULES(FUNC(match_no)),
+ },
+ {}
+ },
+ },
+ {
+ .name = "inactive-2nd_or_active-inactive",
+ .expected_active = BIT(1),
+ .entries = (const struct xe_rtp_entry[]) {
+ { XE_RTP_NAME("r1"),
+ XE_RTP_RULES(FUNC(match_no)),
+ },
+ { XE_RTP_NAME("r2_or_conditions"),
+ XE_RTP_RULES(FUNC(match_no), OR,
+ FUNC(match_yes), OR,
+ FUNC(match_no)) },
+ { XE_RTP_NAME("r3"),
+ XE_RTP_RULES(FUNC(match_no)),
+ },
+ {}
+ },
+ },
+ {
+ .name = "inactive-last_or_active-inactive",
+ .expected_active = BIT(1),
+ .entries = (const struct xe_rtp_entry[]) {
+ { XE_RTP_NAME("r1"),
+ XE_RTP_RULES(FUNC(match_no)),
+ },
+ { XE_RTP_NAME("r2_or_conditions"),
+ XE_RTP_RULES(FUNC(match_no), OR,
+ FUNC(match_no), OR,
+ FUNC(match_yes)) },
+ { XE_RTP_NAME("r3"),
+ XE_RTP_RULES(FUNC(match_no)),
+ },
+ {}
+ },
+ },
+ {
+ .name = "inactive-no_or_active-inactive",
+ .expected_active = 0,
+ .entries = (const struct xe_rtp_entry[]) {
+ { XE_RTP_NAME("r1"),
+ XE_RTP_RULES(FUNC(match_no)),
+ },
+ { XE_RTP_NAME("r2_or_conditions"),
+ XE_RTP_RULES(FUNC(match_no), OR,
+ FUNC(match_no), OR,
+ FUNC(match_no)) },
+ { XE_RTP_NAME("r3"),
+ XE_RTP_RULES(FUNC(match_no)),
+ },
+ {}
+ },
+ },
+};
+
+static void xe_rtp_process_tests(struct kunit *test)
+{
+ const struct rtp_test_case *param = test->param_value;
+ struct xe_device *xe = test->priv;
+ struct xe_gt *gt = xe_device_get_root_tile(xe)->primary_gt;
+ struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt);
+ unsigned long count_rtp_entries = 0, active = 0;
+
+ while (param->entries[count_rtp_entries].rules)
+ count_rtp_entries++;
+
+ xe_rtp_process_ctx_enable_active_tracking(&ctx, &active, count_rtp_entries);
+ xe_rtp_process(&ctx, param->entries);
+
+ KUNIT_EXPECT_EQ(test, active, param->expected_active);
+}
+
+static void rtp_to_sr_desc(const struct rtp_to_sr_test_case *t, char *desc)
+{
+ strscpy(desc, t->name, KUNIT_PARAM_DESC_SIZE);
+}
+
+KUNIT_ARRAY_PARAM(rtp_to_sr, rtp_to_sr_cases, rtp_to_sr_desc);
+
static void rtp_desc(const struct rtp_test_case *t, char *desc)
{
strscpy(desc, t->name, KUNIT_PARAM_DESC_SIZE);
}
-KUNIT_ARRAY_PARAM(rtp, cases, rtp_desc);
+KUNIT_ARRAY_PARAM(rtp, rtp_cases, rtp_desc);
static int xe_rtp_test_init(struct kunit *test)
{
@@ -357,6 +531,7 @@ static void xe_rtp_test_exit(struct kunit *test)
}
static struct kunit_case xe_rtp_tests[] = {
+ KUNIT_CASE_PARAM(xe_rtp_process_to_sr_tests, rtp_to_sr_gen_params),
KUNIT_CASE_PARAM(xe_rtp_process_tests, rtp_gen_params),
{}
};
diff --git a/drivers/gpu/drm/xe/tests/xe_test.h b/drivers/gpu/drm/xe/tests/xe_test.h
index 7a1ae213e750..9c23ad9dba8d 100644
--- a/drivers/gpu/drm/xe/tests/xe_test.h
+++ b/drivers/gpu/drm/xe/tests/xe_test.h
@@ -9,8 +9,8 @@
#include <linux/types.h>
#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
-#include <linux/sched.h>
#include <kunit/test.h>
+#include <kunit/test-bug.h>
/*
* Each test that provides a kunit private test structure, place a test id
@@ -31,8 +31,6 @@ struct xe_test_priv {
#define XE_TEST_DECLARE(x) x
#define XE_TEST_ONLY(x) unlikely(x)
-#define XE_TEST_EXPORT
-#define xe_cur_kunit() current->kunit_test
/**
* xe_cur_kunit_priv - Obtain the struct xe_test_priv pointed to by
@@ -48,10 +46,10 @@ xe_cur_kunit_priv(enum xe_test_priv_id id)
{
struct xe_test_priv *priv;
- if (!xe_cur_kunit())
+ if (!kunit_get_current_test())
return NULL;
- priv = xe_cur_kunit()->priv;
+ priv = kunit_get_current_test()->priv;
return priv->id == id ? priv : NULL;
}
@@ -59,8 +57,6 @@ xe_cur_kunit_priv(enum xe_test_priv_id id)
#define XE_TEST_DECLARE(x)
#define XE_TEST_ONLY(x) 0
-#define XE_TEST_EXPORT static
-#define xe_cur_kunit() NULL
#define xe_cur_kunit_priv(_id) NULL
#endif
diff --git a/drivers/gpu/drm/xe/tests/xe_wa_test.c b/drivers/gpu/drm/xe/tests/xe_wa_test.c
index 9d0c715142b9..c96d1fe34151 100644
--- a/drivers/gpu/drm/xe/tests/xe_wa_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_wa_test.c
@@ -74,6 +74,7 @@ static const struct platform_test_case cases[] = {
GMDID_CASE(METEORLAKE, 1274, A0, 1300, A0),
GMDID_CASE(LUNARLAKE, 2004, A0, 2000, A0),
GMDID_CASE(LUNARLAKE, 2004, B0, 2000, A0),
+ GMDID_CASE(BATTLEMAGE, 2001, A0, 1301, A1),
};
static void platform_desc(const struct platform_test_case *t, char *desc)
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 31192d983d9e..3295bc92d7aa 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -1264,13 +1264,14 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
if (flags & (XE_BO_FLAG_VRAM_MASK | XE_BO_FLAG_STOLEN) &&
!(flags & XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE) &&
((xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) ||
- (flags & XE_BO_NEEDS_64K))) {
- aligned_size = ALIGN(size, SZ_64K);
+ (flags & (XE_BO_FLAG_NEEDS_64K | XE_BO_FLAG_NEEDS_2M)))) {
+ size_t align = flags & XE_BO_FLAG_NEEDS_2M ? SZ_2M : SZ_64K;
+
+ aligned_size = ALIGN(size, align);
if (type != ttm_bo_type_device)
- size = ALIGN(size, SZ_64K);
+ size = ALIGN(size, align);
flags |= XE_BO_FLAG_INTERNAL_64K;
- alignment = SZ_64K >> PAGE_SHIFT;
-
+ alignment = align >> PAGE_SHIFT;
} else {
aligned_size = ALIGN(size, SZ_4K);
flags &= ~XE_BO_FLAG_INTERNAL_64K;
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 6de894c728f5..1c9dc8adaaa3 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -36,8 +36,9 @@
#define XE_BO_FLAG_PAGETABLE BIT(12)
#define XE_BO_FLAG_NEEDS_CPU_ACCESS BIT(13)
#define XE_BO_FLAG_NEEDS_UC BIT(14)
-#define XE_BO_NEEDS_64K BIT(15)
-#define XE_BO_FLAG_GGTT_INVALIDATE BIT(16)
+#define XE_BO_FLAG_NEEDS_64K BIT(15)
+#define XE_BO_FLAG_NEEDS_2M BIT(16)
+#define XE_BO_FLAG_GGTT_INVALIDATE BIT(17)
/* this one is trigger internally only */
#define XE_BO_FLAG_INTERNAL_TEST BIT(30)
#define XE_BO_FLAG_INTERNAL_64K BIT(31)
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
index 10450f1fbbde..ebc8abf7930a 100644
--- a/drivers/gpu/drm/xe/xe_bo_types.h
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -58,6 +58,8 @@ struct xe_bo {
#endif
/** @freed: List node for delayed put. */
struct llist_node freed;
+ /** @update_index: Update index if PT BO */
+ int update_index;
/** @created: Whether the bo has passed initial creation */
bool created;
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
index 62c2b10fbf1d..d8d8ca2c19d3 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.c
+++ b/drivers/gpu/drm/xe/xe_devcoredump.c
@@ -171,7 +171,6 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
u32 adj_logical_mask = q->logical_mask;
u32 width_mask = (0x1 << q->width) - 1;
const char *process_name = "no process";
- struct task_struct *task = NULL;
int i;
bool cookie;
@@ -179,14 +178,9 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
ss->snapshot_time = ktime_get_real();
ss->boot_time = ktime_get_boottime();
- if (q->vm && q->vm->xef) {
- task = get_pid_task(q->vm->xef->drm->pid, PIDTYPE_PID);
- if (task)
- process_name = task->comm;
- }
+ if (q->vm && q->vm->xef)
+ process_name = q->vm->xef->process_name;
strscpy(ss->process_name, process_name);
- if (task)
- put_task_struct(task);
ss->gt = q->gt;
INIT_WORK(&ss->work, xe_devcoredump_deferred_snap_work);
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 76109415eba6..1aba6f9eaa19 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -54,6 +54,9 @@
#include "xe_vm.h"
#include "xe_vram.h"
#include "xe_wait_user_fence.h"
+#include "xe_wa.h"
+
+#include <generated/xe_wa_oob.h>
static int xe_file_open(struct drm_device *dev, struct drm_file *file)
{
@@ -61,6 +64,7 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file)
struct xe_drm_client *client;
struct xe_file *xef;
int ret = -ENOMEM;
+ struct task_struct *task = NULL;
xef = kzalloc(sizeof(*xef), GFP_KERNEL);
if (!xef)
@@ -87,9 +91,63 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file)
spin_unlock(&xe->clients.lock);
file->driver_priv = xef;
+ kref_init(&xef->refcount);
+
+ task = get_pid_task(rcu_access_pointer(file->pid), PIDTYPE_PID);
+ if (task) {
+ xef->process_name = kstrdup(task->comm, GFP_KERNEL);
+ xef->pid = task->pid;
+ put_task_struct(task);
+ }
+
return 0;
}
+static void xe_file_destroy(struct kref *ref)
+{
+ struct xe_file *xef = container_of(ref, struct xe_file, refcount);
+ struct xe_device *xe = xef->xe;
+
+ xa_destroy(&xef->exec_queue.xa);
+ mutex_destroy(&xef->exec_queue.lock);
+ xa_destroy(&xef->vm.xa);
+ mutex_destroy(&xef->vm.lock);
+
+ spin_lock(&xe->clients.lock);
+ xe->clients.count--;
+ spin_unlock(&xe->clients.lock);
+
+ xe_drm_client_put(xef->client);
+ kfree(xef->process_name);
+ kfree(xef);
+}
+
+/**
+ * xe_file_get() - Take a reference to the xe file object
+ * @xef: Pointer to the xe file
+ *
+ * Anyone with a pointer to xef must take a reference to the xe file
+ * object using this call.
+ *
+ * Return: xe file pointer
+ */
+struct xe_file *xe_file_get(struct xe_file *xef)
+{
+ kref_get(&xef->refcount);
+ return xef;
+}
+
+/**
+ * xe_file_put() - Drop a reference to the xe file object
+ * @xef: Pointer to the xe file
+ *
+ * Used to drop reference to the xef object
+ */
+void xe_file_put(struct xe_file *xef)
+{
+ kref_put(&xef->refcount, xe_file_destroy);
+}
+
static void xe_file_close(struct drm_device *dev, struct drm_file *file)
{
struct xe_device *xe = to_xe_device(dev);
@@ -98,6 +156,8 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file)
struct xe_exec_queue *q;
unsigned long idx;
+ xe_pm_runtime_get(xe);
+
/*
* No need for exec_queue.lock here as there is no contention for it
* when FD is closing as IOCTLs presumably can't be modifying the
@@ -108,21 +168,14 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file)
xe_exec_queue_kill(q);
xe_exec_queue_put(q);
}
- xa_destroy(&xef->exec_queue.xa);
- mutex_destroy(&xef->exec_queue.lock);
mutex_lock(&xef->vm.lock);
xa_for_each(&xef->vm.xa, idx, vm)
xe_vm_close_and_put(vm);
mutex_unlock(&xef->vm.lock);
- xa_destroy(&xef->vm.xa);
- mutex_destroy(&xef->vm.lock);
- spin_lock(&xe->clients.lock);
- xe->clients.count--;
- spin_unlock(&xe->clients.lock);
+ xe_file_put(xef);
- xe_drm_client_put(xef->client);
- kfree(xef);
+ xe_pm_runtime_put(xe);
}
static const struct drm_ioctl_desc xe_ioctls[] = {
@@ -744,13 +797,22 @@ void xe_device_shutdown(struct xe_device *xe)
{
}
+/**
+ * xe_device_wmb() - Device specific write memory barrier
+ * @xe: the &xe_device
+ *
+ * While wmb() is sufficient for a barrier if we use system memory, on discrete
+ * platforms with device memory we additionally need to issue a register write.
+ * Since it doesn't matter which register we write to, use the read-only VF_CAP
+ * register that is also marked as accessible by the VFs.
+ */
void xe_device_wmb(struct xe_device *xe)
{
struct xe_gt *gt = xe_root_mmio_gt(xe);
wmb();
if (IS_DGFX(xe))
- xe_mmio_write32(gt, SOFTWARE_FLAGS_SPR33, 0);
+ xe_mmio_write32(gt, VF_CAP_REG, 0);
}
/**
@@ -779,6 +841,11 @@ void xe_device_td_flush(struct xe_device *xe)
if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20)
return;
+ if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) {
+ xe_device_l2_flush(xe);
+ return;
+ }
+
for_each_gt(gt, xe, id) {
if (xe_gt_is_media_type(gt))
continue;
@@ -802,6 +869,30 @@ void xe_device_td_flush(struct xe_device *xe)
}
}
+void xe_device_l2_flush(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ int err;
+
+ gt = xe_root_mmio_gt(xe);
+
+ if (!XE_WA(gt, 16023588340))
+ return;
+
+ err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (err)
+ return;
+
+ spin_lock(&gt->global_invl_lock);
+ xe_mmio_write32(gt, XE2_GLOBAL_INVAL, 0x1);
+
+ if (xe_mmio_wait32(gt, XE2_GLOBAL_INVAL, 0x1, 0x0, 150, NULL, true))
+ xe_gt_err_once(gt, "Global invalidation timeout\n");
+ spin_unlock(&gt->global_invl_lock);
+
+ xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+}
+
u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
{
return xe_device_has_flat_ccs(xe) ?
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index bb07f5669dbb..db6cc8d0d6b8 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -20,6 +20,11 @@ static inline struct xe_device *pdev_to_xe_device(struct pci_dev *pdev)
return pci_get_drvdata(pdev);
}
+static inline struct xe_device *xe_device_const_cast(const struct xe_device *xe)
+{
+ return (struct xe_device *)xe;
+}
+
static inline struct xe_device *ttm_to_xe_device(struct ttm_device *ttm)
{
return container_of(ttm, struct xe_device, ttm);
@@ -162,6 +167,7 @@ u64 xe_device_canonicalize_addr(struct xe_device *xe, u64 address);
u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address);
void xe_device_td_flush(struct xe_device *xe);
+void xe_device_l2_flush(struct xe_device *xe);
static inline bool xe_device_wedged(struct xe_device *xe)
{
@@ -170,4 +176,7 @@ static inline bool xe_device_wedged(struct xe_device *xe)
void xe_device_declare_wedged(struct xe_device *xe);
+struct xe_file *xe_file_get(struct xe_file *xef);
+void xe_file_put(struct xe_file *xef);
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 3bca6d344744..5b7292a9a66d 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -23,6 +23,10 @@
#include "xe_sriov_types.h"
#include "xe_step_types.h"
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+#define TEST_VM_OPS_ERROR
+#endif
+
#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
#include "soc/intel_pch.h"
#include "intel_display_core.h"
@@ -40,6 +44,7 @@ struct xe_pat_ops;
#define MEDIA_VERx100(xe) ((xe)->info.media_verx100)
#define IS_DGFX(xe) ((xe)->info.is_dgfx)
#define HAS_HECI_GSCFI(xe) ((xe)->info.has_heci_gscfi)
+#define HAS_HECI_CSCFI(xe) ((xe)->info.has_heci_cscfi)
#define XE_VRAM_FLAGS_NEED64K BIT(0)
@@ -285,6 +290,8 @@ struct xe_device {
u8 skip_pcode:1;
/** @info.has_heci_gscfi: device has heci gscfi */
u8 has_heci_gscfi:1;
+ /** @info.has_heci_cscfi: device has heci cscfi */
+ u8 has_heci_cscfi:1;
/** @info.skip_guc_pc: Skip GuC based PM feature init */
u8 skip_guc_pc:1;
/** @info.has_atomic_enable_pte_bit: Device has atomic enable PTE bit */
@@ -477,6 +484,14 @@ struct xe_device {
int mode;
} wedged;
+#ifdef TEST_VM_OPS_ERROR
+ /**
+ * @vm_inject_error_position: inject errors at different places in VM
+ * bind IOCTL based on this value
+ */
+ u8 vm_inject_error_position;
+#endif
+
/* private: */
#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
@@ -566,6 +581,21 @@ struct xe_file {
/** @client: drm client */
struct xe_drm_client *client;
+
+ /**
+ * @process_name: process name for file handle, used to safely output
+ * during error situations where xe file can outlive process
+ */
+ char *process_name;
+
+ /**
+ * @pid: pid for file handle, used to safely output uring error
+ * situations where xe file can outlive process
+ */
+ pid_t pid;
+
+ /** @refcount: ref count of this xe file */
+ struct kref refcount;
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c
index 6a26923fa10e..7ddd59908334 100644
--- a/drivers/gpu/drm/xe/xe_drm_client.c
+++ b/drivers/gpu/drm/xe/xe_drm_client.c
@@ -251,11 +251,8 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file)
/* Accumulate all the exec queues from this client */
mutex_lock(&xef->exec_queue.lock);
- xa_for_each(&xef->exec_queue.xa, i, q) {
+ xa_for_each(&xef->exec_queue.xa, i, q)
xe_exec_queue_update_run_ticks(q);
- xef->run_ticks[q->class] += q->run_ticks - q->old_run_ticks;
- q->old_run_ticks = q->run_ticks;
- }
mutex_unlock(&xef->exec_queue.lock);
/* Get the total GPU cycles */
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index 0ba37835849b..69867a7b7c77 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -37,6 +37,10 @@ static void __xe_exec_queue_free(struct xe_exec_queue *q)
{
if (q->vm)
xe_vm_put(q->vm);
+
+ if (q->xef)
+ xe_file_put(q->xef);
+
kfree(q);
}
@@ -649,6 +653,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
goto kill_exec_queue;
args->exec_queue_id = id;
+ q->xef = xe_file_get(xef);
return 0;
@@ -762,6 +767,7 @@ bool xe_exec_queue_is_idle(struct xe_exec_queue *q)
*/
void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
{
+ struct xe_file *xef;
struct xe_lrc *lrc;
u32 old_ts, new_ts;
@@ -773,6 +779,8 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
if (!q->vm || !q->vm->xef)
return;
+ xef = q->vm->xef;
+
/*
* Only sample the first LRC. For parallel submission, all of them are
* scheduled together and we compensate that below by multiplying by
@@ -783,7 +791,7 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
*/
lrc = q->lrc[0];
new_ts = xe_lrc_update_timestamp(lrc, &old_ts);
- q->run_ticks += (new_ts - old_ts) * q->width;
+ xef->run_ticks[q->class] += (new_ts - old_ts) * q->width;
}
void xe_exec_queue_kill(struct xe_exec_queue *q)
@@ -906,3 +914,26 @@ void xe_exec_queue_last_fence_set(struct xe_exec_queue *q, struct xe_vm *vm,
xe_exec_queue_last_fence_put(q, vm);
q->last_fence = dma_fence_get(fence);
}
+
+/**
+ * xe_exec_queue_last_fence_test_dep - Test last fence dependency of queue
+ * @q: The exec queue
+ * @vm: The VM the engine does a bind or exec for
+ *
+ * Returns:
+ * -ETIME if there exists an unsignalled last fence dependency, zero otherwise.
+ */
+int xe_exec_queue_last_fence_test_dep(struct xe_exec_queue *q, struct xe_vm *vm)
+{
+ struct dma_fence *fence;
+ int err = 0;
+
+ fence = xe_exec_queue_last_fence_get(q, vm);
+ if (fence) {
+ err = test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) ?
+ 0 : -ETIME;
+ dma_fence_put(fence);
+ }
+
+ return err;
+}
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h
index 289a3a51d2a2..ded77b0f3b90 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue.h
@@ -75,6 +75,8 @@ struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *e,
struct xe_vm *vm);
void xe_exec_queue_last_fence_set(struct xe_exec_queue *e, struct xe_vm *vm,
struct dma_fence *fence);
+int xe_exec_queue_last_fence_test_dep(struct xe_exec_queue *q,
+ struct xe_vm *vm);
void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q);
#endif
diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
index 201588ec33c3..1408b02eea53 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
@@ -38,6 +38,9 @@ enum xe_exec_queue_priority {
* a kernel object.
*/
struct xe_exec_queue {
+ /** @xef: Back pointer to xe file if this is user created exec queue */
+ struct xe_file *xef;
+
/** @gt: graphics tile this exec queue can submit to */
struct xe_gt *gt;
/**
@@ -139,10 +142,6 @@ struct xe_exec_queue {
* Protected by @vm's resv. Unused if @vm == NULL.
*/
u64 tlb_flush_seqno;
- /** @old_run_ticks: prior hw engine class run time in ticks for this exec queue */
- u64 old_run_ticks;
- /** @run_ticks: hw engine class run time in ticks for this exec queue */
- u64 run_ticks;
/** @lrc: logical ring context for this exec queue */
struct xe_lrc *lrc[];
};
@@ -172,9 +171,11 @@ struct xe_exec_queue_ops {
int (*suspend)(struct xe_exec_queue *q);
/**
* @suspend_wait: Wait for an exec queue to suspend executing, should be
- * call after suspend.
+ * call after suspend. In dma-fencing path thus must return within a
+ * reasonable amount of time. -ETIME return shall indicate an error
+ * waiting for suspend resulting in associated VM getting killed.
*/
- void (*suspend_wait)(struct xe_exec_queue *q);
+ int (*suspend_wait)(struct xe_exec_queue *q);
/**
* @resume: Resume exec queue execution, exec queue must be in a suspended
* state and dma fence returned from most recent suspend call must be
diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
index db906117db6d..7502e3486eaf 100644
--- a/drivers/gpu/drm/xe/xe_execlist.c
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -422,10 +422,11 @@ static int execlist_exec_queue_suspend(struct xe_exec_queue *q)
return 0;
}
-static void execlist_exec_queue_suspend_wait(struct xe_exec_queue *q)
+static int execlist_exec_queue_suspend_wait(struct xe_exec_queue *q)
{
/* NIY */
+ return 0;
}
static void execlist_exec_queue_resume(struct xe_exec_queue *q)
diff --git a/drivers/gpu/drm/xe/xe_gen_wa_oob.c b/drivers/gpu/drm/xe/xe_gen_wa_oob.c
index 106ee2b027f0..904cf47925aa 100644
--- a/drivers/gpu/drm/xe/xe_gen_wa_oob.c
+++ b/drivers/gpu/drm/xe/xe_gen_wa_oob.c
@@ -97,19 +97,27 @@ static int parse(FILE *input, FILE *csource, FILE *cheader)
if (name) {
fprintf(cheader, "\tXE_WA_OOB_%s = %u,\n", name, idx);
- fprintf(csource, "{ XE_RTP_NAME(\"%s\"), XE_RTP_RULES(%s) },\n",
+
+ /* Close previous entry before starting a new one */
+ if (idx)
+ fprintf(csource, ") },\n");
+
+ fprintf(csource, "{ XE_RTP_NAME(\"%s\"),\n XE_RTP_RULES(%s",
name, rules);
+ idx++;
} else {
- fprintf(csource, "{ XE_RTP_NAME(NULL), XE_RTP_RULES(%s) },\n",
- rules);
+ fprintf(csource, ", OR,\n\t%s", rules);
}
- idx++;
lineno++;
if (!is_continuation)
prev_name = name;
}
+ /* Close last entry */
+ if (idx)
+ fprintf(csource, ") },\n");
+
fprintf(cheader, "\t_XE_WA_OOB_COUNT = %u\n", idx);
return 0;
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 31b2e64c70c6..58895ed22f6e 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -9,6 +9,7 @@
#include <drm/drm_managed.h>
#include <drm/xe_drm.h>
+
#include <generated/xe_wa_oob.h>
#include "instructions/xe_gfxpipe_commands.h"
@@ -95,6 +96,51 @@ void xe_gt_sanitize(struct xe_gt *gt)
gt->uc.guc.submission_state.enabled = false;
}
+static void xe_gt_enable_host_l2_vram(struct xe_gt *gt)
+{
+ u32 reg;
+ int err;
+
+ if (!XE_WA(gt, 16023588340))
+ return;
+
+ err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (WARN_ON(err))
+ return;
+
+ if (!xe_gt_is_media_type(gt)) {
+ xe_mmio_write32(gt, SCRATCH1LPFC, EN_L3_RW_CCS_CACHE_FLUSH);
+ reg = xe_mmio_read32(gt, XE2_GAMREQSTRM_CTRL);
+ reg |= CG_DIS_CNTLBUS;
+ xe_mmio_write32(gt, XE2_GAMREQSTRM_CTRL, reg);
+ }
+
+ xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0x3);
+ xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+}
+
+static void xe_gt_disable_host_l2_vram(struct xe_gt *gt)
+{
+ u32 reg;
+ int err;
+
+ if (!XE_WA(gt, 16023588340))
+ return;
+
+ if (xe_gt_is_media_type(gt))
+ return;
+
+ err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (WARN_ON(err))
+ return;
+
+ reg = xe_mmio_read32(gt, XE2_GAMREQSTRM_CTRL);
+ reg &= ~CG_DIS_CNTLBUS;
+ xe_mmio_write32(gt, XE2_GAMREQSTRM_CTRL, reg);
+
+ xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+}
+
/**
* xe_gt_remove() - Clean up the GT structures before driver removal
* @gt: the GT object
@@ -111,6 +157,8 @@ void xe_gt_remove(struct xe_gt *gt)
for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
xe_hw_fence_irq_finish(&gt->fence_irq[i]);
+
+ xe_gt_disable_host_l2_vram(gt);
}
static void gt_reset_worker(struct work_struct *w);
@@ -339,6 +387,7 @@ int xe_gt_init_early(struct xe_gt *gt)
xe_force_wake_init_gt(gt, gt_to_fw(gt));
xe_pcode_init(gt);
+ spin_lock_init(&gt->global_invl_lock);
return 0;
}
@@ -508,6 +557,7 @@ int xe_gt_init_hwconfig(struct xe_gt *gt)
xe_gt_mcr_init_early(gt);
xe_pat_init(gt);
+ xe_gt_enable_host_l2_vram(gt);
err = xe_uc_init(&gt->uc);
if (err)
@@ -643,6 +693,8 @@ static int do_gt_restart(struct xe_gt *gt)
xe_pat_init(gt);
+ xe_gt_enable_host_l2_vram(gt);
+
xe_gt_mcr_set_implicit_defaults(gt);
xe_reg_sr_apply_mmio(&gt->reg_sr, gt);
@@ -796,6 +848,8 @@ int xe_gt_suspend(struct xe_gt *gt)
xe_gt_idle_disable_pg(gt);
+ xe_gt_disable_host_l2_vram(gt);
+
XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
xe_gt_dbg(gt, "suspended\n");
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
index 9dbba9ab7a9a..ef239440963c 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
@@ -5,7 +5,7 @@
#include <drm/drm_managed.h>
-#include "regs/xe_sriov_regs.h"
+#include "regs/xe_regs.h"
#include "xe_gt_sriov_pf.h"
#include "xe_gt_sriov_pf_config.h"
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
index 4699b7836001..52c7277d243d 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
@@ -1401,6 +1401,7 @@ static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size)
ALIGN(size, PAGE_SIZE),
ttm_bo_type_kernel,
XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+ XE_BO_FLAG_NEEDS_2M |
XE_BO_FLAG_PINNED);
if (IS_ERR(bo))
return PTR_ERR(bo);
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
index 41e46a00c01e..47222bd9988d 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
@@ -850,7 +850,7 @@ static struct vf_runtime_reg *vf_lookup_reg(struct xe_gt *gt, u32 addr)
xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
- return bsearch(&key, runtime->regs, runtime->regs_size, sizeof(key),
+ return bsearch(&key, runtime->regs, runtime->num_regs, sizeof(key),
vf_runtime_reg_cmp);
}
@@ -893,6 +893,32 @@ u32 xe_gt_sriov_vf_read32(struct xe_gt *gt, struct xe_reg reg)
}
/**
+ * xe_gt_sriov_vf_write32 - Handle a write to an inaccessible register.
+ * @gt: the &xe_gt
+ * @reg: the register to write
+ * @val: value to write
+ *
+ * This function is for VF use only.
+ * Currently it will trigger a WARN if running on debug build.
+ */
+void xe_gt_sriov_vf_write32(struct xe_gt *gt, struct xe_reg reg, u32 val)
+{
+ u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
+
+ xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
+ xe_gt_assert(gt, !reg.vf);
+
+ /*
+ * In the future, we may want to handle selected writes to inaccessible
+ * registers in some custom way, but for now let's just log a warning
+ * about such attempt, as likely we might be doing something wrong.
+ */
+ xe_gt_WARN(gt, IS_ENABLED(CONFIG_DRM_XE_DEBUG),
+ "VF is trying to write %#x to an inaccessible register %#x+%#x\n",
+ val, reg.addr, addr - reg.addr);
+}
+
+/**
* xe_gt_sriov_vf_print_config - Print VF self config.
* @gt: the &xe_gt
* @p: the &drm_printer
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h
index 0de7f8cbcfa6..e541ce57bec2 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h
@@ -22,6 +22,7 @@ u32 xe_gt_sriov_vf_gmdid(struct xe_gt *gt);
u16 xe_gt_sriov_vf_guc_ids(struct xe_gt *gt);
u64 xe_gt_sriov_vf_lmem(struct xe_gt *gt);
u32 xe_gt_sriov_vf_read32(struct xe_gt *gt, struct xe_reg reg);
+void xe_gt_sriov_vf_write32(struct xe_gt *gt, struct xe_reg reg, u32 val);
void xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p);
void xe_gt_sriov_vf_print_runtime(struct xe_gt *gt, struct drm_printer *p);
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index d9359976ab8b..87cb76a8718c 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -13,10 +13,13 @@
#include "xe_guc.h"
#include "xe_guc_ct.h"
#include "xe_mmio.h"
+#include "xe_pm.h"
#include "xe_sriov.h"
#include "xe_trace.h"
#include "regs/xe_guc_regs.h"
+#define FENCE_STACK_BIT DMA_FENCE_FLAG_USER_BITS
+
/*
* TLB inval depends on pending commands in the CT queue and then the real
* invalidation time. Double up the time to process full CT queue
@@ -33,6 +36,24 @@ static long tlb_timeout_jiffies(struct xe_gt *gt)
return hw_tlb_timeout + 2 * delay;
}
+static void
+__invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence)
+{
+ bool stack = test_bit(FENCE_STACK_BIT, &fence->base.flags);
+
+ trace_xe_gt_tlb_invalidation_fence_signal(xe, fence);
+ xe_gt_tlb_invalidation_fence_fini(fence);
+ dma_fence_signal(&fence->base);
+ if (!stack)
+ dma_fence_put(&fence->base);
+}
+
+static void
+invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence)
+{
+ list_del(&fence->link);
+ __invalidation_fence_signal(xe, fence);
+}
static void xe_gt_tlb_fence_timeout(struct work_struct *work)
{
@@ -54,10 +75,8 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work)
xe_gt_err(gt, "TLB invalidation fence timeout, seqno=%d recv=%d",
fence->seqno, gt->tlb_invalidation.seqno_recv);
- list_del(&fence->link);
fence->base.error = -ETIME;
- dma_fence_signal(&fence->base);
- dma_fence_put(&fence->base);
+ invalidation_fence_signal(xe, fence);
}
if (!list_empty(&gt->tlb_invalidation.pending_fences))
queue_delayed_work(system_wq,
@@ -87,21 +106,6 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
return 0;
}
-static void
-__invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence)
-{
- trace_xe_gt_tlb_invalidation_fence_signal(xe, fence);
- dma_fence_signal(&fence->base);
- dma_fence_put(&fence->base);
-}
-
-static void
-invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence)
-{
- list_del(&fence->link);
- __invalidation_fence_signal(xe, fence);
-}
-
/**
* xe_gt_tlb_invalidation_reset - Initialize GT TLB invalidation reset
* @gt: graphics tile
@@ -111,7 +115,6 @@ invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fe
void xe_gt_tlb_invalidation_reset(struct xe_gt *gt)
{
struct xe_gt_tlb_invalidation_fence *fence, *next;
- struct xe_guc *guc = &gt->uc.guc;
int pending_seqno;
/*
@@ -134,7 +137,6 @@ void xe_gt_tlb_invalidation_reset(struct xe_gt *gt)
else
pending_seqno = gt->tlb_invalidation.seqno - 1;
WRITE_ONCE(gt->tlb_invalidation.seqno_recv, pending_seqno);
- wake_up_all(&guc->ct.wq);
list_for_each_entry_safe(fence, next,
&gt->tlb_invalidation.pending_fences, link)
@@ -165,6 +167,8 @@ static int send_tlb_invalidation(struct xe_guc *guc,
int seqno;
int ret;
+ xe_gt_assert(gt, fence);
+
/*
* XXX: The seqno algorithm relies on TLB invalidation being processed
* in order which they currently are, if that changes the algorithm will
@@ -173,14 +177,12 @@ static int send_tlb_invalidation(struct xe_guc *guc,
mutex_lock(&guc->ct.lock);
seqno = gt->tlb_invalidation.seqno;
- if (fence) {
- fence->seqno = seqno;
- trace_xe_gt_tlb_invalidation_fence_send(xe, fence);
- }
+ fence->seqno = seqno;
+ trace_xe_gt_tlb_invalidation_fence_send(xe, fence);
action[1] = seqno;
ret = xe_guc_ct_send_locked(&guc->ct, action, len,
G2H_LEN_DW_TLB_INVALIDATE, 1);
- if (!ret && fence) {
+ if (!ret) {
spin_lock_irq(&gt->tlb_invalidation.pending_lock);
/*
* We haven't actually published the TLB fence as per
@@ -201,7 +203,7 @@ static int send_tlb_invalidation(struct xe_guc *guc,
tlb_timeout_jiffies(gt));
}
spin_unlock_irq(&gt->tlb_invalidation.pending_lock);
- } else if (ret < 0 && fence) {
+ } else if (ret < 0) {
__invalidation_fence_signal(xe, fence);
}
if (!ret) {
@@ -209,7 +211,6 @@ static int send_tlb_invalidation(struct xe_guc *guc,
TLB_INVALIDATION_SEQNO_MAX;
if (!gt->tlb_invalidation.seqno)
gt->tlb_invalidation.seqno = 1;
- ret = seqno;
}
mutex_unlock(&guc->ct.lock);
@@ -223,14 +224,16 @@ static int send_tlb_invalidation(struct xe_guc *guc,
/**
* xe_gt_tlb_invalidation_guc - Issue a TLB invalidation on this GT for the GuC
* @gt: graphics tile
+ * @fence: invalidation fence which will be signal on TLB invalidation
+ * completion
*
* Issue a TLB invalidation for the GuC. Completion of TLB is asynchronous and
- * caller can use seqno + xe_gt_tlb_invalidation_wait to wait for completion.
+ * caller can use the invalidation fence to wait for completion.
*
- * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
- * negative error code on error.
+ * Return: 0 on success, negative error code on error
*/
-static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt)
+static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt,
+ struct xe_gt_tlb_invalidation_fence *fence)
{
u32 action[] = {
XE_GUC_ACTION_TLB_INVALIDATION,
@@ -238,7 +241,7 @@ static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt)
MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC),
};
- return send_tlb_invalidation(&gt->uc.guc, NULL, action,
+ return send_tlb_invalidation(&gt->uc.guc, fence, action,
ARRAY_SIZE(action));
}
@@ -257,13 +260,17 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
if (xe_guc_ct_enabled(&gt->uc.guc.ct) &&
gt->uc.guc.submission_state.enabled) {
- int seqno;
-
- seqno = xe_gt_tlb_invalidation_guc(gt);
- if (seqno <= 0)
- return seqno;
+ struct xe_gt_tlb_invalidation_fence fence;
+ int ret;
+
+ xe_gt_tlb_invalidation_fence_init(gt, &fence, true);
+ ret = xe_gt_tlb_invalidation_guc(gt, &fence);
+ if (ret < 0) {
+ xe_gt_tlb_invalidation_fence_fini(&fence);
+ return ret;
+ }
- xe_gt_tlb_invalidation_wait(gt, seqno);
+ xe_gt_tlb_invalidation_fence_wait(&fence);
} else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) {
if (IS_SRIOV_VF(xe))
return 0;
@@ -290,18 +297,16 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
*
* @gt: graphics tile
* @fence: invalidation fence which will be signal on TLB invalidation
- * completion, can be NULL
+ * completion
* @start: start address
* @end: end address
* @asid: address space id
*
* Issue a range based TLB invalidation if supported, if not fallback to a full
- * TLB invalidation. Completion of TLB is asynchronous and caller can either use
- * the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for
- * completion.
+ * TLB invalidation. Completion of TLB is asynchronous and caller can use
+ * the invalidation fence to wait for completion.
*
- * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
- * negative error code on error.
+ * Return: Negative error code on error, 0 on success
*/
int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
struct xe_gt_tlb_invalidation_fence *fence,
@@ -312,11 +317,11 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
u32 action[MAX_TLB_INVALIDATION_LEN];
int len = 0;
+ xe_gt_assert(gt, fence);
+
/* Execlists not supported */
if (gt_to_xe(gt)->info.force_execlist) {
- if (fence)
- __invalidation_fence_signal(xe, fence);
-
+ __invalidation_fence_signal(xe, fence);
return 0;
}
@@ -382,12 +387,10 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
* @vma: VMA to invalidate
*
* Issue a range based TLB invalidation if supported, if not fallback to a full
- * TLB invalidation. Completion of TLB is asynchronous and caller can either use
- * the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for
- * completion.
+ * TLB invalidation. Completion of TLB is asynchronous and caller can use
+ * the invalidation fence to wait for completion.
*
- * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
- * negative error code on error.
+ * Return: Negative error code on error, 0 on success
*/
int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
struct xe_gt_tlb_invalidation_fence *fence,
@@ -401,43 +404,6 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
}
/**
- * xe_gt_tlb_invalidation_wait - Wait for TLB to complete
- * @gt: graphics tile
- * @seqno: seqno to wait which was returned from xe_gt_tlb_invalidation
- *
- * Wait for tlb_timeout_jiffies() for a TLB invalidation to complete.
- *
- * Return: 0 on success, -ETIME on TLB invalidation timeout
- */
-int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno)
-{
- struct xe_guc *guc = &gt->uc.guc;
- int ret;
-
- /* Execlists not supported */
- if (gt_to_xe(gt)->info.force_execlist)
- return 0;
-
- /*
- * XXX: See above, this algorithm only works if seqno are always in
- * order
- */
- ret = wait_event_timeout(guc->ct.wq,
- tlb_invalidation_seqno_past(gt, seqno),
- tlb_timeout_jiffies(gt));
- if (!ret) {
- struct drm_printer p = xe_gt_err_printer(gt);
-
- xe_gt_err(gt, "TLB invalidation time'd out, seqno=%d, recv=%d\n",
- seqno, gt->tlb_invalidation.seqno_recv);
- xe_guc_ct_print(&guc->ct, &p, true);
- return -ETIME;
- }
-
- return 0;
-}
-
-/**
* xe_guc_tlb_invalidation_done_handler - TLB invalidation done handler
* @guc: guc
* @msg: message indicating TLB invalidation done
@@ -480,12 +446,7 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
return 0;
}
- /*
- * wake_up_all() and wait_event_timeout() already have the correct
- * barriers.
- */
WRITE_ONCE(gt->tlb_invalidation.seqno_recv, msg[0]);
- wake_up_all(&guc->ct.wq);
list_for_each_entry_safe(fence, next,
&gt->tlb_invalidation.pending_fences, link) {
@@ -508,3 +469,59 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
return 0;
}
+
+static const char *
+invalidation_fence_get_driver_name(struct dma_fence *dma_fence)
+{
+ return "xe";
+}
+
+static const char *
+invalidation_fence_get_timeline_name(struct dma_fence *dma_fence)
+{
+ return "invalidation_fence";
+}
+
+static const struct dma_fence_ops invalidation_fence_ops = {
+ .get_driver_name = invalidation_fence_get_driver_name,
+ .get_timeline_name = invalidation_fence_get_timeline_name,
+};
+
+/**
+ * xe_gt_tlb_invalidation_fence_init - Initialize TLB invalidation fence
+ * @gt: GT
+ * @fence: TLB invalidation fence to initialize
+ * @stack: fence is stack variable
+ *
+ * Initialize TLB invalidation fence for use. xe_gt_tlb_invalidation_fence_fini
+ * must be called if fence is not signaled.
+ */
+void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt,
+ struct xe_gt_tlb_invalidation_fence *fence,
+ bool stack)
+{
+ xe_pm_runtime_get_noresume(gt_to_xe(gt));
+
+ spin_lock_irq(&gt->tlb_invalidation.lock);
+ dma_fence_init(&fence->base, &invalidation_fence_ops,
+ &gt->tlb_invalidation.lock,
+ dma_fence_context_alloc(1), 1);
+ spin_unlock_irq(&gt->tlb_invalidation.lock);
+ INIT_LIST_HEAD(&fence->link);
+ if (stack)
+ set_bit(FENCE_STACK_BIT, &fence->base.flags);
+ else
+ dma_fence_get(&fence->base);
+ fence->gt = gt;
+}
+
+/**
+ * xe_gt_tlb_invalidation_fence_fini - Finalize TLB invalidation fence
+ * @fence: TLB invalidation fence to finalize
+ *
+ * Drop PM ref which fence took durinig init.
+ */
+void xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence)
+{
+ xe_pm_runtime_put(gt_to_xe(fence->gt));
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
index bf3bebd9f985..a84065fa324c 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
@@ -23,7 +23,17 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
struct xe_gt_tlb_invalidation_fence *fence,
u64 start, u64 end, u32 asid);
-int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno);
int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
+void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt,
+ struct xe_gt_tlb_invalidation_fence *fence,
+ bool stack);
+void xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence);
+
+static inline void
+xe_gt_tlb_invalidation_fence_wait(struct xe_gt_tlb_invalidation_fence *fence)
+{
+ dma_fence_wait(&fence->base, false);
+}
+
#endif /* _XE_GT_TLB_INVALIDATION_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h
index 934c828efe31..de6e825e0851 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h
@@ -8,6 +8,8 @@
#include <linux/dma-fence.h>
+struct xe_gt;
+
/**
* struct xe_gt_tlb_invalidation_fence - XE GT TLB invalidation fence
*
@@ -17,6 +19,8 @@
struct xe_gt_tlb_invalidation_fence {
/** @base: dma fence base */
struct dma_fence base;
+ /** @gt: GT which fence belong to */
+ struct xe_gt *gt;
/** @link: link into list of pending tlb fences */
struct list_head link;
/** @seqno: seqno of TLB invalidation to signal fence one */
diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c
index 25ff03ab8448..0662f71c6ede 100644
--- a/drivers/gpu/drm/xe/xe_gt_topology.c
+++ b/drivers/gpu/drm/xe/xe_gt_topology.c
@@ -6,6 +6,7 @@
#include "xe_gt_topology.h"
#include <linux/bitmap.h>
+#include <linux/compiler.h>
#include "regs/xe_gt_regs.h"
#include "xe_assert.h"
@@ -31,7 +32,7 @@ load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...)
}
static void
-load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask)
+load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask, enum xe_gt_eu_type *eu_type)
{
struct xe_device *xe = gt_to_xe(gt);
u32 reg_val = xe_mmio_read32(gt, XELP_EU_ENABLE);
@@ -47,11 +48,13 @@ load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask)
if (GRAPHICS_VERx100(xe) < 1250)
reg_val = ~reg_val & XELP_EU_MASK;
- /* On PVC, one bit = one EU */
- if (GRAPHICS_VERx100(xe) == 1260) {
+ if (GRAPHICS_VERx100(xe) == 1260 || GRAPHICS_VER(xe) >= 20) {
+ /* SIMD16 EUs, one bit == one EU */
+ *eu_type = XE_GT_EU_TYPE_SIMD16;
val = reg_val;
} else {
- /* All other platforms, one bit = 2 EU */
+ /* SIMD8 EUs, one bit == 2 EU */
+ *eu_type = XE_GT_EU_TYPE_SIMD8;
for (i = 0; i < fls(reg_val); i++)
if (reg_val & BIT(i))
val |= 0x3 << 2 * i;
@@ -213,7 +216,7 @@ xe_gt_topology_init(struct xe_gt *gt)
XEHP_GT_COMPUTE_DSS_ENABLE,
XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,
XE2_GT_COMPUTE_DSS_2);
- load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss);
+ load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss, &gt->fuse_topo.eu_type);
load_l3_bank_mask(gt, gt->fuse_topo.l3_bank_mask);
p = drm_dbg_printer(&gt_to_xe(gt)->drm, DRM_UT_DRIVER, "GT topology");
@@ -221,6 +224,18 @@ xe_gt_topology_init(struct xe_gt *gt)
xe_gt_topology_dump(gt, &p);
}
+static const char *eu_type_to_str(enum xe_gt_eu_type eu_type)
+{
+ switch (eu_type) {
+ case XE_GT_EU_TYPE_SIMD16:
+ return "simd16";
+ case XE_GT_EU_TYPE_SIMD8:
+ return "simd8";
+ }
+
+ return NULL;
+}
+
void
xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p)
{
@@ -231,6 +246,8 @@ xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p)
drm_printf(p, "EU mask per DSS: %*pb\n", XE_MAX_EU_FUSE_BITS,
gt->fuse_topo.eu_mask_per_dss);
+ drm_printf(p, "EU type: %s\n",
+ eu_type_to_str(gt->fuse_topo.eu_type));
drm_printf(p, "L3 bank mask: %*pb\n", XE_MAX_L3_BANK_MASK_BITS,
gt->fuse_topo.l3_bank_mask);
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index 6b5e0b45efb0..631928258d71 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -27,6 +27,11 @@ enum xe_gt_type {
XE_GT_TYPE_MEDIA,
};
+enum xe_gt_eu_type {
+ XE_GT_EU_TYPE_SIMD8,
+ XE_GT_EU_TYPE_SIMD16,
+};
+
#define XE_MAX_DSS_FUSE_REGS 3
#define XE_MAX_DSS_FUSE_BITS (32 * XE_MAX_DSS_FUSE_REGS)
#define XE_MAX_EU_FUSE_REGS 1
@@ -343,6 +348,12 @@ struct xe_gt {
/** @fuse_topo.l3_bank_mask: L3 bank mask */
xe_l3_bank_mask_t l3_bank_mask;
+
+ /**
+ * @fuse_topo.eu_type: type/width of EU stored in
+ * fuse_topo.eu_mask_per_dss
+ */
+ enum xe_gt_eu_type eu_type;
} fuse_topo;
/** @steering: register steering for individual HW units */
@@ -362,6 +373,12 @@ struct xe_gt {
*/
spinlock_t mcr_lock;
+ /**
+ * @global_invl_lock: protects the register for the duration
+ * of a global invalidation of l2 cache
+ */
+ spinlock_t global_invl_lock;
+
/** @wa_active: keep track of active workarounds */
struct {
/** @wa_active.gt: bitmap with active GT workarounds */
@@ -370,8 +387,14 @@ struct xe_gt {
unsigned long *engine;
/** @wa_active.lrc: bitmap with active LRC workarounds */
unsigned long *lrc;
- /** @wa_active.oob: bitmap with active OOB workaroudns */
+ /** @wa_active.oob: bitmap with active OOB workarounds */
unsigned long *oob;
+ /**
+ * @wa_active.oob_initialized: mark oob as initialized to help
+ * detecting misuse of XE_WA() - it can only be called on
+ * initialization after OOB WAs have being processed
+ */
+ bool oob_initialized;
} wa_active;
/** @user_engines: engines present in GT and available to userspace */
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 7d2e937da1d8..beeeb120d1fc 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -327,6 +327,8 @@ static void xe_guc_ct_set_state(struct xe_guc_ct *ct,
xe_gt_assert(ct_to_gt(ct), ct->g2h_outstanding == 0 ||
state == XE_GUC_CT_STATE_STOPPED);
+ if (ct->g2h_outstanding)
+ xe_pm_runtime_put(ct_to_xe(ct));
ct->g2h_outstanding = 0;
ct->state = state;
@@ -495,10 +497,15 @@ static void h2g_reserve_space(struct xe_guc_ct *ct, u32 cmd_len)
static void __g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h)
{
xe_gt_assert(ct_to_gt(ct), g2h_len <= ct->ctbs.g2h.info.space);
+ xe_gt_assert(ct_to_gt(ct), (!g2h_len && !num_g2h) ||
+ (g2h_len && num_g2h));
if (g2h_len) {
lockdep_assert_held(&ct->fast_lock);
+ if (!ct->g2h_outstanding)
+ xe_pm_runtime_get_noresume(ct_to_xe(ct));
+
ct->ctbs.g2h.info.space -= g2h_len;
ct->g2h_outstanding += num_g2h;
}
@@ -509,9 +516,11 @@ static void __g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len)
lockdep_assert_held(&ct->fast_lock);
xe_gt_assert(ct_to_gt(ct), ct->ctbs.g2h.info.space + g2h_len <=
ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space);
+ xe_gt_assert(ct_to_gt(ct), ct->g2h_outstanding);
ct->ctbs.g2h.info.space += g2h_len;
- --ct->g2h_outstanding;
+ if (!--ct->g2h_outstanding)
+ xe_pm_runtime_put(ct_to_xe(ct));
}
static void g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len)
diff --git a/drivers/gpu/drm/xe/xe_guc_id_mgr.c b/drivers/gpu/drm/xe/xe_guc_id_mgr.c
index cd0549d0ef89..e845425d670b 100644
--- a/drivers/gpu/drm/xe/xe_guc_id_mgr.c
+++ b/drivers/gpu/drm/xe/xe_guc_id_mgr.c
@@ -97,8 +97,8 @@ int xe_guc_id_mgr_init(struct xe_guc_id_mgr *idm, unsigned int limit)
if (ret)
return ret;
- xe_gt_info(idm_to_gt(idm), "using %u GUC ID%s\n",
- idm->total, str_plural(idm->total));
+ xe_gt_dbg(idm_to_gt(idm), "using %u GuC ID%s\n",
+ idm->total, str_plural(idm->total));
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 8d7e7f4bbff7..460808507947 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -1071,7 +1071,9 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
struct xe_exec_queue *q = job->q;
struct xe_gpu_scheduler *sched = &q->guc->sched;
struct xe_guc *guc = exec_queue_to_guc(q);
+ const char *process_name = "no process";
int err = -ETIME;
+ pid_t pid = -1;
int i = 0;
bool wedged, skip_timeout_check;
@@ -1168,9 +1170,14 @@ trigger_reset:
goto sched_enable;
}
- xe_gt_notice(guc_to_gt(guc), "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx",
+ if (q->vm && q->vm->xef) {
+ process_name = q->vm->xef->process_name;
+ pid = q->vm->xef->pid;
+ }
+ xe_gt_notice(guc_to_gt(guc), "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx in %s [%d]",
xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
- q->guc->id, q->flags);
+ q->guc->id, q->flags, process_name, pid);
+
trace_xe_sched_job_timedout(job);
if (!exec_queue_killed(q))
@@ -1312,6 +1319,15 @@ static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *ms
kfree(msg);
}
+static void __suspend_fence_signal(struct xe_exec_queue *q)
+{
+ if (!q->guc->suspend_pending)
+ return;
+
+ WRITE_ONCE(q->guc->suspend_pending, false);
+ wake_up(&q->guc->suspend_wait);
+}
+
static void suspend_fence_signal(struct xe_exec_queue *q)
{
struct xe_guc *guc = exec_queue_to_guc(q);
@@ -1321,9 +1337,7 @@ static void suspend_fence_signal(struct xe_exec_queue *q)
guc_read_stopped(guc));
xe_assert(xe, q->guc->suspend_pending);
- q->guc->suspend_pending = false;
- smp_wmb();
- wake_up(&q->guc->suspend_wait);
+ __suspend_fence_signal(q);
}
static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg)
@@ -1375,6 +1389,8 @@ static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg)
static void guc_exec_queue_process_msg(struct xe_sched_msg *msg)
{
+ struct xe_device *xe = guc_to_xe(exec_queue_to_guc(msg->private_data));
+
trace_xe_sched_msg_recv(msg);
switch (msg->opcode) {
@@ -1393,6 +1409,8 @@ static void guc_exec_queue_process_msg(struct xe_sched_msg *msg)
default:
XE_WARN_ON("Unknown message type");
}
+
+ xe_pm_runtime_put(xe);
}
static const struct drm_sched_backend_ops drm_sched_ops = {
@@ -1476,12 +1494,15 @@ static void guc_exec_queue_kill(struct xe_exec_queue *q)
{
trace_xe_exec_queue_kill(q);
set_exec_queue_killed(q);
+ __suspend_fence_signal(q);
xe_guc_exec_queue_trigger_cleanup(q);
}
static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg,
u32 opcode)
{
+ xe_pm_runtime_get_noresume(guc_to_xe(exec_queue_to_guc(q)));
+
INIT_LIST_HEAD(&msg->link);
msg->opcode = opcode;
msg->private_data = q;
@@ -1572,12 +1593,31 @@ static int guc_exec_queue_suspend(struct xe_exec_queue *q)
return 0;
}
-static void guc_exec_queue_suspend_wait(struct xe_exec_queue *q)
+static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q)
{
struct xe_guc *guc = exec_queue_to_guc(q);
+ int ret;
+
+ /*
+ * Likely don't need to check exec_queue_killed() as we clear
+ * suspend_pending upon kill but to be paranoid but races in which
+ * suspend_pending is set after kill also check kill here.
+ */
+ ret = wait_event_timeout(q->guc->suspend_wait,
+ !READ_ONCE(q->guc->suspend_pending) ||
+ exec_queue_killed(q) ||
+ guc_read_stopped(guc),
+ HZ * 5);
- wait_event(q->guc->suspend_wait, !q->guc->suspend_pending ||
- guc_read_stopped(guc));
+ if (!ret) {
+ xe_gt_warn(guc_to_gt(guc),
+ "Suspend fence, guc_id=%d, failed to respond",
+ q->guc->id);
+ /* XXX: Trigger GT reset? */
+ return -ETIME;
+ }
+
+ return 0;
}
static void guc_exec_queue_resume(struct xe_exec_queue *q)
diff --git a/drivers/gpu/drm/xe/xe_heci_gsc.c b/drivers/gpu/drm/xe/xe_heci_gsc.c
index 1c9d38b6f5f1..65b2e147c4b9 100644
--- a/drivers/gpu/drm/xe/xe_heci_gsc.c
+++ b/drivers/gpu/drm/xe/xe_heci_gsc.c
@@ -92,7 +92,7 @@ void xe_heci_gsc_fini(struct xe_device *xe)
{
struct xe_heci_gsc *heci_gsc = &xe->heci_gsc;
- if (!HAS_HECI_GSCFI(xe))
+ if (!HAS_HECI_GSCFI(xe) && !HAS_HECI_CSCFI(xe))
return;
if (heci_gsc->adev) {
@@ -177,12 +177,14 @@ void xe_heci_gsc_init(struct xe_device *xe)
const struct heci_gsc_def *def;
int ret;
- if (!HAS_HECI_GSCFI(xe))
+ if (!HAS_HECI_GSCFI(xe) && !HAS_HECI_CSCFI(xe))
return;
heci_gsc->irq = -1;
- if (xe->info.platform == XE_PVC) {
+ if (xe->info.platform == XE_BATTLEMAGE) {
+ def = &heci_gsc_def_dg2;
+ } else if (xe->info.platform == XE_PVC) {
def = &heci_gsc_def_pvc;
} else if (xe->info.platform == XE_DG2) {
def = &heci_gsc_def_dg2;
@@ -232,3 +234,23 @@ void xe_heci_gsc_irq_handler(struct xe_device *xe, u32 iir)
if (ret)
drm_err_ratelimited(&xe->drm, "error handling GSC irq: %d\n", ret);
}
+
+void xe_heci_csc_irq_handler(struct xe_device *xe, u32 iir)
+{
+ int ret;
+
+ if ((iir & CSC_IRQ_INTF(1)) == 0)
+ return;
+
+ if (!HAS_HECI_CSCFI(xe)) {
+ drm_warn_once(&xe->drm, "CSC irq: not supported");
+ return;
+ }
+
+ if (xe->heci_gsc.irq < 0)
+ return;
+
+ ret = generic_handle_irq(xe->heci_gsc.irq);
+ if (ret)
+ drm_err_ratelimited(&xe->drm, "error handling GSC irq: %d\n", ret);
+}
diff --git a/drivers/gpu/drm/xe/xe_heci_gsc.h b/drivers/gpu/drm/xe/xe_heci_gsc.h
index 9db454478fae..48b3b1838045 100644
--- a/drivers/gpu/drm/xe/xe_heci_gsc.h
+++ b/drivers/gpu/drm/xe/xe_heci_gsc.h
@@ -11,10 +11,15 @@ struct xe_device;
struct mei_aux_device;
/*
- * The HECI1 bit corresponds to bit15 and HECI2 to bit14.
+ * GSC HECI1 bit corresponds to bit15 and HECI2 to bit14.
* The reason for this is to allow growth for more interfaces in the future.
*/
-#define GSC_IRQ_INTF(_x) BIT(15 - (_x))
+#define GSC_IRQ_INTF(_x) BIT(15 - (_x))
+
+/*
+ * CSC HECI1 bit corresponds to bit9 and HECI2 to bit10.
+ */
+#define CSC_IRQ_INTF(_x) BIT(9 + (_x))
/**
* struct xe_heci_gsc - graphics security controller for xe, HECI interface
@@ -31,5 +36,6 @@ struct xe_heci_gsc {
void xe_heci_gsc_init(struct xe_device *xe);
void xe_heci_gsc_fini(struct xe_device *xe);
void xe_heci_gsc_irq_handler(struct xe_device *xe, u32 iir);
+void xe_heci_csc_irq_handler(struct xe_device *xe, u32 iir);
#endif /* __XE_HECI_GSC_DEV_H__ */
diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index 85733f993d09..5f2c368c35ad 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -459,6 +459,8 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg)
* the primary tile.
*/
if (id == 0) {
+ if (HAS_HECI_CSCFI(xe))
+ xe_heci_csc_irq_handler(xe, master_ctl);
xe_display_irq_handler(xe, master_ctl);
gu_misc_iir = gu_misc_irq_ack(xe, master_ctl);
}
diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c
index 418661a88918..8999ac511555 100644
--- a/drivers/gpu/drm/xe/xe_lmtt.c
+++ b/drivers/gpu/drm/xe/xe_lmtt.c
@@ -7,7 +7,7 @@
#include <drm/drm_managed.h>
-#include "regs/xe_sriov_regs.h"
+#include "regs/xe_gt_regs.h"
#include "xe_assert.h"
#include "xe_bo.h"
@@ -71,7 +71,7 @@ static struct xe_lmtt_pt *lmtt_pt_alloc(struct xe_lmtt *lmtt, unsigned int level
lmtt->ops->lmtt_pte_num(level)),
ttm_bo_type_kernel,
XE_BO_FLAG_VRAM_IF_DGFX(lmtt_to_tile(lmtt)) |
- XE_BO_NEEDS_64K | XE_BO_FLAG_PINNED);
+ XE_BO_FLAG_NEEDS_64K | XE_BO_FLAG_PINNED);
if (IS_ERR(bo)) {
err = PTR_ERR(bo);
goto out_free_pt;
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index c9f5673353ee..6f24aaf58252 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -73,6 +73,7 @@ struct xe_migrate {
#define NUM_PT_SLOTS 32
#define LEVEL0_PAGE_TABLE_ENCODE_SIZE SZ_2M
#define MAX_NUM_PTE 512
+#define IDENTITY_OFFSET 256ULL
/*
* Although MI_STORE_DATA_IMM's "length" field is 10-bits, 0x3FE is the largest
@@ -84,15 +85,14 @@ struct xe_migrate {
#define MAX_PTE_PER_SDI 0x1FE
/**
- * xe_tile_migrate_engine() - Get this tile's migrate engine.
+ * xe_tile_migrate_exec_queue() - Get this tile's migrate exec queue.
* @tile: The tile.
*
- * Returns the default migrate engine of this tile.
- * TODO: Perhaps this function is slightly misplaced, and even unneeded?
+ * Returns the default migrate exec queue of this tile.
*
- * Return: The default migrate engine
+ * Return: The default migrate exec queue
*/
-struct xe_exec_queue *xe_tile_migrate_engine(struct xe_tile *tile)
+struct xe_exec_queue *xe_tile_migrate_exec_queue(struct xe_tile *tile)
{
return tile->migrate->q;
}
@@ -121,14 +121,64 @@ static u64 xe_migrate_vm_addr(u64 slot, u32 level)
return (slot + 1ULL) << xe_pt_shift(level + 1);
}
-static u64 xe_migrate_vram_ofs(struct xe_device *xe, u64 addr)
+static u64 xe_migrate_vram_ofs(struct xe_device *xe, u64 addr, bool is_comp_pte)
{
/*
* Remove the DPA to get a correct offset into identity table for the
* migrate offset
*/
+ u64 identity_offset = IDENTITY_OFFSET;
+
+ if (GRAPHICS_VER(xe) >= 20 && is_comp_pte)
+ identity_offset += DIV_ROUND_UP_ULL(xe->mem.vram.actual_physical_size, SZ_1G);
+
addr -= xe->mem.vram.dpa_base;
- return addr + (256ULL << xe_pt_shift(2));
+ return addr + (identity_offset << xe_pt_shift(2));
+}
+
+static void xe_migrate_program_identity(struct xe_device *xe, struct xe_vm *vm, struct xe_bo *bo,
+ u64 map_ofs, u64 vram_offset, u16 pat_index, u64 pt_2m_ofs)
+{
+ u64 pos, ofs, flags;
+ u64 entry;
+ /* XXX: Unclear if this should be usable_size? */
+ u64 vram_limit = xe->mem.vram.actual_physical_size +
+ xe->mem.vram.dpa_base;
+ u32 level = 2;
+
+ ofs = map_ofs + XE_PAGE_SIZE * level + vram_offset * 8;
+ flags = vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level,
+ true, 0);
+
+ xe_assert(xe, IS_ALIGNED(xe->mem.vram.usable_size, SZ_2M));
+
+ /*
+ * Use 1GB pages when possible, last chunk always use 2M
+ * pages as mixing reserved memory (stolen, WOCPM) with a single
+ * mapping is not allowed on certain platforms.
+ */
+ for (pos = xe->mem.vram.dpa_base; pos < vram_limit;
+ pos += SZ_1G, ofs += 8) {
+ if (pos + SZ_1G >= vram_limit) {
+ entry = vm->pt_ops->pde_encode_bo(bo, pt_2m_ofs,
+ pat_index);
+ xe_map_wr(xe, &bo->vmap, ofs, u64, entry);
+
+ flags = vm->pt_ops->pte_encode_addr(xe, 0,
+ pat_index,
+ level - 1,
+ true, 0);
+
+ for (ofs = pt_2m_ofs; pos < vram_limit;
+ pos += SZ_2M, ofs += 8)
+ xe_map_wr(xe, &bo->vmap, ofs, u64, pos | flags);
+ break; /* Ensure pos == vram_limit assert correct */
+ }
+
+ xe_map_wr(xe, &bo->vmap, ofs, u64, pos | flags);
+ }
+
+ xe_assert(xe, pos == vram_limit);
}
static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
@@ -137,11 +187,13 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
struct xe_device *xe = tile_to_xe(tile);
u16 pat_index = xe->pat.idx[XE_CACHE_WB];
u8 id = tile->id;
- u32 num_entries = NUM_PT_SLOTS, num_level = vm->pt_root[id]->level,
- num_setup = num_level + 1;
+ u32 num_entries = NUM_PT_SLOTS, num_level = vm->pt_root[id]->level;
+#define VRAM_IDENTITY_MAP_COUNT 2
+ u32 num_setup = num_level + VRAM_IDENTITY_MAP_COUNT;
+#undef VRAM_IDENTITY_MAP_COUNT
u32 map_ofs, level, i;
struct xe_bo *bo, *batch = tile->mem.kernel_bb_pool->bo;
- u64 entry, pt30_ofs;
+ u64 entry, pt29_ofs;
/* Can't bump NUM_PT_SLOTS too high */
BUILD_BUG_ON(NUM_PT_SLOTS > SZ_2M/XE_PAGE_SIZE);
@@ -161,9 +213,9 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
if (IS_ERR(bo))
return PTR_ERR(bo);
- /* PT31 reserved for 2M identity map */
- pt30_ofs = bo->size - 2 * XE_PAGE_SIZE;
- entry = vm->pt_ops->pde_encode_bo(bo, pt30_ofs, pat_index);
+ /* PT30 & PT31 reserved for 2M identity map */
+ pt29_ofs = bo->size - 3 * XE_PAGE_SIZE;
+ entry = vm->pt_ops->pde_encode_bo(bo, pt29_ofs, pat_index);
xe_pt_write(xe, &vm->pt_root[id]->bo->vmap, 0, entry);
map_ofs = (num_entries - num_setup) * XE_PAGE_SIZE;
@@ -215,12 +267,12 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
} else {
u64 batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE);
- m->batch_base_ofs = xe_migrate_vram_ofs(xe, batch_addr);
+ m->batch_base_ofs = xe_migrate_vram_ofs(xe, batch_addr, false);
if (xe->info.has_usm) {
batch = tile->primary_gt->usm.bb_pool->bo;
batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE);
- m->usm_batch_base_ofs = xe_migrate_vram_ofs(xe, batch_addr);
+ m->usm_batch_base_ofs = xe_migrate_vram_ofs(xe, batch_addr, false);
}
}
@@ -254,55 +306,36 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
/* Identity map the entire vram at 256GiB offset */
if (IS_DGFX(xe)) {
- u64 pos, ofs, flags;
- /* XXX: Unclear if this should be usable_size? */
- u64 vram_limit = xe->mem.vram.actual_physical_size +
- xe->mem.vram.dpa_base;
+ u64 pt30_ofs = bo->size - 2 * XE_PAGE_SIZE;
- level = 2;
- ofs = map_ofs + XE_PAGE_SIZE * level + 256 * 8;
- flags = vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level,
- true, 0);
-
- xe_assert(xe, IS_ALIGNED(xe->mem.vram.usable_size, SZ_2M));
+ xe_migrate_program_identity(xe, vm, bo, map_ofs, IDENTITY_OFFSET,
+ pat_index, pt30_ofs);
+ xe_assert(xe, xe->mem.vram.actual_physical_size <=
+ (MAX_NUM_PTE - IDENTITY_OFFSET) * SZ_1G);
/*
- * Use 1GB pages when possible, last chunk always use 2M
- * pages as mixing reserved memory (stolen, WOCPM) with a single
- * mapping is not allowed on certain platforms.
+ * Identity map the entire vram for compressed pat_index for xe2+
+ * if flat ccs is enabled.
*/
- for (pos = xe->mem.vram.dpa_base; pos < vram_limit;
- pos += SZ_1G, ofs += 8) {
- if (pos + SZ_1G >= vram_limit) {
- u64 pt31_ofs = bo->size - XE_PAGE_SIZE;
-
- entry = vm->pt_ops->pde_encode_bo(bo, pt31_ofs,
- pat_index);
- xe_map_wr(xe, &bo->vmap, ofs, u64, entry);
-
- flags = vm->pt_ops->pte_encode_addr(xe, 0,
- pat_index,
- level - 1,
- true, 0);
-
- for (ofs = pt31_ofs; pos < vram_limit;
- pos += SZ_2M, ofs += 8)
- xe_map_wr(xe, &bo->vmap, ofs, u64, pos | flags);
- break; /* Ensure pos == vram_limit assert correct */
- }
-
- xe_map_wr(xe, &bo->vmap, ofs, u64, pos | flags);
+ if (GRAPHICS_VER(xe) >= 20 && xe_device_has_flat_ccs(xe)) {
+ u16 comp_pat_index = xe->pat.idx[XE_CACHE_NONE_COMPRESSION];
+ u64 vram_offset = IDENTITY_OFFSET +
+ DIV_ROUND_UP_ULL(xe->mem.vram.actual_physical_size, SZ_1G);
+ u64 pt31_ofs = bo->size - XE_PAGE_SIZE;
+
+ xe_assert(xe, xe->mem.vram.actual_physical_size <= (MAX_NUM_PTE -
+ IDENTITY_OFFSET - IDENTITY_OFFSET / 2) * SZ_1G);
+ xe_migrate_program_identity(xe, vm, bo, map_ofs, vram_offset,
+ comp_pat_index, pt31_ofs);
}
-
- xe_assert(xe, pos == vram_limit);
}
/*
* Example layout created above, with root level = 3:
* [PT0...PT7]: kernel PT's for copy/clear; 64 or 4KiB PTE's
* [PT8]: Kernel PT for VM_BIND, 4 KiB PTE's
- * [PT9...PT27]: Userspace PT's for VM_BIND, 4 KiB PTE's
- * [PT28 = PDE 0] [PT29 = PDE 1] [PT30 = PDE 2] [PT31 = 2M vram identity map]
+ * [PT9...PT26]: Userspace PT's for VM_BIND, 4 KiB PTE's
+ * [PT27 = PDE 0] [PT28 = PDE 1] [PT29 = PDE 2] [PT30 & PT31 = 2M vram identity map]
*
* This makes the lowest part of the VM point to the pagetables.
* Hence the lowest 2M in the vm should point to itself, with a few writes
@@ -348,6 +381,11 @@ static u32 xe_migrate_usm_logical_mask(struct xe_gt *gt)
return logical_mask;
}
+static bool xe_migrate_needs_ccs_emit(struct xe_device *xe)
+{
+ return xe_device_has_flat_ccs(xe) && !(GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe));
+}
+
/**
* xe_migrate_init() - Initialize a migrate context
* @tile: Back-pointer to the tile we're initializing for.
@@ -421,7 +459,7 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
return ERR_PTR(err);
if (IS_DGFX(xe)) {
- if (xe_device_has_flat_ccs(xe))
+ if (xe_migrate_needs_ccs_emit(xe))
/* min chunk size corresponds to 4K of CCS Metadata */
m->min_chunk_size = SZ_4K * SZ_64K /
xe_device_ccs_bytes(xe, SZ_64K);
@@ -475,20 +513,26 @@ static bool xe_migrate_allow_identity(u64 size, const struct xe_res_cursor *cur)
return cur->size >= size;
}
+#define PTE_UPDATE_FLAG_IS_VRAM BIT(0)
+#define PTE_UPDATE_FLAG_IS_COMP_PTE BIT(1)
+
static u32 pte_update_size(struct xe_migrate *m,
- bool is_vram,
+ u32 flags,
struct ttm_resource *res,
struct xe_res_cursor *cur,
u64 *L0, u64 *L0_ofs, u32 *L0_pt,
u32 cmd_size, u32 pt_ofs, u32 avail_pts)
{
u32 cmds = 0;
+ bool is_vram = PTE_UPDATE_FLAG_IS_VRAM & flags;
+ bool is_comp_pte = PTE_UPDATE_FLAG_IS_COMP_PTE & flags;
*L0_pt = pt_ofs;
if (is_vram && xe_migrate_allow_identity(*L0, cur)) {
/* Offset into identity map. */
*L0_ofs = xe_migrate_vram_ofs(tile_to_xe(m->tile),
- cur->start + vram_region_gpu_offset(res));
+ cur->start + vram_region_gpu_offset(res),
+ is_comp_pte);
cmds += cmd_size;
} else {
/* Clip L0 to available size */
@@ -661,7 +705,7 @@ static u32 xe_migrate_ccs_copy(struct xe_migrate *m,
struct xe_gt *gt = m->tile->primary_gt;
u32 flush_flags = 0;
- if (xe_device_has_flat_ccs(gt_to_xe(gt)) && !copy_ccs && dst_is_indirect) {
+ if (!copy_ccs && dst_is_indirect) {
/*
* If the src is already in vram, then it should already
* have been cleared by us, or has been populated by the
@@ -737,6 +781,8 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
bool copy_ccs = xe_device_has_flat_ccs(xe) &&
xe_bo_needs_ccs_pages(src_bo) && xe_bo_needs_ccs_pages(dst_bo);
bool copy_system_ccs = copy_ccs && (!src_is_vram || !dst_is_vram);
+ bool use_comp_pat = xe_device_has_flat_ccs(xe) &&
+ GRAPHICS_VER(xe) >= 20 && src_is_vram && !dst_is_vram;
/* Copying CCS between two different BOs is not supported yet. */
if (XE_WARN_ON(copy_ccs && src_bo != dst_bo))
@@ -763,10 +809,11 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
u32 batch_size = 2; /* arb_clear() + MI_BATCH_BUFFER_END */
struct xe_sched_job *job;
struct xe_bb *bb;
- u32 flush_flags;
+ u32 flush_flags = 0;
u32 update_idx;
u64 ccs_ofs, ccs_size;
u32 ccs_pt;
+ u32 pte_flags;
bool usm = xe->info.has_usm;
u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE;
@@ -779,17 +826,20 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
src_L0 = min(src_L0, dst_L0);
- batch_size += pte_update_size(m, src_is_vram, src, &src_it, &src_L0,
+ pte_flags = src_is_vram ? PTE_UPDATE_FLAG_IS_VRAM : 0;
+ pte_flags |= use_comp_pat ? PTE_UPDATE_FLAG_IS_COMP_PTE : 0;
+ batch_size += pte_update_size(m, pte_flags, src, &src_it, &src_L0,
&src_L0_ofs, &src_L0_pt, 0, 0,
avail_pts);
- batch_size += pte_update_size(m, dst_is_vram, dst, &dst_it, &src_L0,
+ pte_flags = dst_is_vram ? PTE_UPDATE_FLAG_IS_VRAM : 0;
+ batch_size += pte_update_size(m, pte_flags, dst, &dst_it, &src_L0,
&dst_L0_ofs, &dst_L0_pt, 0,
avail_pts, avail_pts);
if (copy_system_ccs) {
ccs_size = xe_device_ccs_bytes(xe, src_L0);
- batch_size += pte_update_size(m, false, NULL, &ccs_it, &ccs_size,
+ batch_size += pte_update_size(m, 0, NULL, &ccs_it, &ccs_size,
&ccs_ofs, &ccs_pt, 0,
2 * avail_pts,
avail_pts);
@@ -798,7 +848,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
/* Add copy commands size here */
batch_size += ((copy_only_ccs) ? 0 : EMIT_COPY_DW) +
- ((xe_device_has_flat_ccs(xe) ? EMIT_COPY_CCS_DW : 0));
+ ((xe_migrate_needs_ccs_emit(xe) ? EMIT_COPY_CCS_DW : 0));
bb = xe_bb_new(gt, batch_size, usm);
if (IS_ERR(bb)) {
@@ -827,11 +877,12 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
if (!copy_only_ccs)
emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0, XE_PAGE_SIZE);
- flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs,
- IS_DGFX(xe) ? src_is_vram : src_is_pltt,
- dst_L0_ofs,
- IS_DGFX(xe) ? dst_is_vram : dst_is_pltt,
- src_L0, ccs_ofs, copy_ccs);
+ if (xe_migrate_needs_ccs_emit(xe))
+ flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs,
+ IS_DGFX(xe) ? src_is_vram : src_is_pltt,
+ dst_L0_ofs,
+ IS_DGFX(xe) ? dst_is_vram : dst_is_pltt,
+ src_L0, ccs_ofs, copy_ccs);
job = xe_bb_create_migration_job(m->q, bb,
xe_migrate_batch_base(m, usm),
@@ -1022,6 +1073,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
struct xe_sched_job *job;
struct xe_bb *bb;
u32 batch_size, update_idx;
+ u32 pte_flags;
bool usm = xe->info.has_usm;
u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE;
@@ -1029,13 +1081,14 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
clear_L0 = xe_migrate_res_sizes(m, &src_it);
/* Calculate final sizes and batch size.. */
+ pte_flags = clear_vram ? PTE_UPDATE_FLAG_IS_VRAM : 0;
batch_size = 2 +
- pte_update_size(m, clear_vram, src, &src_it,
+ pte_update_size(m, pte_flags, src, &src_it,
&clear_L0, &clear_L0_ofs, &clear_L0_pt,
clear_system_ccs ? 0 : emit_clear_cmd_len(gt), 0,
avail_pts);
- if (xe_device_has_flat_ccs(xe))
+ if (xe_migrate_needs_ccs_emit(xe))
batch_size += EMIT_COPY_CCS_DW;
/* Clear commands */
@@ -1063,7 +1116,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
if (!clear_system_ccs)
emit_clear(gt, bb, clear_L0_ofs, clear_L0, XE_PAGE_SIZE, clear_vram);
- if (xe_device_has_flat_ccs(xe)) {
+ if (xe_migrate_needs_ccs_emit(xe)) {
emit_copy_ccs(gt, bb, clear_L0_ofs, true,
m->cleared_mem_ofs, false, clear_L0);
flush_flags = MI_FLUSH_DW_CCS;
@@ -1126,6 +1179,7 @@ err_sync:
}
static void write_pgtable(struct xe_tile *tile, struct xe_bb *bb, u64 ppgtt_ofs,
+ const struct xe_vm_pgtable_update_op *pt_op,
const struct xe_vm_pgtable_update *update,
struct xe_migrate_pt_update *pt_update)
{
@@ -1146,7 +1200,7 @@ static void write_pgtable(struct xe_tile *tile, struct xe_bb *bb, u64 ppgtt_ofs,
if (!ppgtt_ofs)
ppgtt_ofs = xe_migrate_vram_ofs(tile_to_xe(tile),
xe_bo_addr(update->pt_bo, 0,
- XE_PAGE_SIZE));
+ XE_PAGE_SIZE), false);
do {
u64 addr = ppgtt_ofs + ofs * 8;
@@ -1160,8 +1214,12 @@ static void write_pgtable(struct xe_tile *tile, struct xe_bb *bb, u64 ppgtt_ofs,
bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk);
bb->cs[bb->len++] = lower_32_bits(addr);
bb->cs[bb->len++] = upper_32_bits(addr);
- ops->populate(pt_update, tile, NULL, bb->cs + bb->len, ofs, chunk,
- update);
+ if (pt_op->bind)
+ ops->populate(pt_update, tile, NULL, bb->cs + bb->len,
+ ofs, chunk, update);
+ else
+ ops->clear(pt_update, tile, NULL, bb->cs + bb->len,
+ ofs, chunk, update);
bb->len += chunk * 2;
ofs += chunk;
@@ -1186,114 +1244,58 @@ struct migrate_test_params {
static struct dma_fence *
xe_migrate_update_pgtables_cpu(struct xe_migrate *m,
- struct xe_vm *vm, struct xe_bo *bo,
- const struct xe_vm_pgtable_update *updates,
- u32 num_updates, bool wait_vm,
struct xe_migrate_pt_update *pt_update)
{
XE_TEST_DECLARE(struct migrate_test_params *test =
to_migrate_test_params
(xe_cur_kunit_priv(XE_TEST_LIVE_MIGRATE));)
const struct xe_migrate_pt_update_ops *ops = pt_update->ops;
- struct dma_fence *fence;
+ struct xe_vm *vm = pt_update->vops->vm;
+ struct xe_vm_pgtable_update_ops *pt_update_ops =
+ &pt_update->vops->pt_update_ops[pt_update->tile_id];
int err;
- u32 i;
+ u32 i, j;
if (XE_TEST_ONLY(test && test->force_gpu))
return ERR_PTR(-ETIME);
- if (bo && !dma_resv_test_signaled(bo->ttm.base.resv,
- DMA_RESV_USAGE_KERNEL))
- return ERR_PTR(-ETIME);
-
- if (wait_vm && !dma_resv_test_signaled(xe_vm_resv(vm),
- DMA_RESV_USAGE_BOOKKEEP))
- return ERR_PTR(-ETIME);
-
if (ops->pre_commit) {
pt_update->job = NULL;
err = ops->pre_commit(pt_update);
if (err)
return ERR_PTR(err);
}
- for (i = 0; i < num_updates; i++) {
- const struct xe_vm_pgtable_update *update = &updates[i];
-
- ops->populate(pt_update, m->tile, &update->pt_bo->vmap, NULL,
- update->ofs, update->qwords, update);
- }
-
- if (vm) {
- trace_xe_vm_cpu_bind(vm);
- xe_device_wmb(vm->xe);
- }
-
- fence = dma_fence_get_stub();
-
- return fence;
-}
-
-static bool no_in_syncs(struct xe_vm *vm, struct xe_exec_queue *q,
- struct xe_sync_entry *syncs, u32 num_syncs)
-{
- struct dma_fence *fence;
- int i;
- for (i = 0; i < num_syncs; i++) {
- fence = syncs[i].fence;
-
- if (fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
- &fence->flags))
- return false;
- }
- if (q) {
- fence = xe_exec_queue_last_fence_get(q, vm);
- if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
- dma_fence_put(fence);
- return false;
+ for (i = 0; i < pt_update_ops->num_ops; ++i) {
+ const struct xe_vm_pgtable_update_op *pt_op =
+ &pt_update_ops->ops[i];
+
+ for (j = 0; j < pt_op->num_entries; j++) {
+ const struct xe_vm_pgtable_update *update =
+ &pt_op->entries[j];
+
+ if (pt_op->bind)
+ ops->populate(pt_update, m->tile,
+ &update->pt_bo->vmap, NULL,
+ update->ofs, update->qwords,
+ update);
+ else
+ ops->clear(pt_update, m->tile,
+ &update->pt_bo->vmap, NULL,
+ update->ofs, update->qwords, update);
}
- dma_fence_put(fence);
}
- return true;
+ trace_xe_vm_cpu_bind(vm);
+ xe_device_wmb(vm->xe);
+
+ return dma_fence_get_stub();
}
-/**
- * xe_migrate_update_pgtables() - Pipelined page-table update
- * @m: The migrate context.
- * @vm: The vm we'll be updating.
- * @bo: The bo whose dma-resv we will await before updating, or NULL if userptr.
- * @q: The exec queue to be used for the update or NULL if the default
- * migration engine is to be used.
- * @updates: An array of update descriptors.
- * @num_updates: Number of descriptors in @updates.
- * @syncs: Array of xe_sync_entry to await before updating. Note that waits
- * will block the engine timeline.
- * @num_syncs: Number of entries in @syncs.
- * @pt_update: Pointer to a struct xe_migrate_pt_update, which contains
- * pointers to callback functions and, if subclassed, private arguments to
- * those.
- *
- * Perform a pipelined page-table update. The update descriptors are typically
- * built under the same lock critical section as a call to this function. If
- * using the default engine for the updates, they will be performed in the
- * order they grab the job_mutex. If different engines are used, external
- * synchronization is needed for overlapping updates to maintain page-table
- * consistency. Note that the meaing of "overlapping" is that the updates
- * touch the same page-table, which might be a higher-level page-directory.
- * If no pipelining is needed, then updates may be performed by the cpu.
- *
- * Return: A dma_fence that, when signaled, indicates the update completion.
- */
-struct dma_fence *
-xe_migrate_update_pgtables(struct xe_migrate *m,
- struct xe_vm *vm,
- struct xe_bo *bo,
- struct xe_exec_queue *q,
- const struct xe_vm_pgtable_update *updates,
- u32 num_updates,
- struct xe_sync_entry *syncs, u32 num_syncs,
- struct xe_migrate_pt_update *pt_update)
+static struct dma_fence *
+__xe_migrate_update_pgtables(struct xe_migrate *m,
+ struct xe_migrate_pt_update *pt_update,
+ struct xe_vm_pgtable_update_ops *pt_update_ops)
{
const struct xe_migrate_pt_update_ops *ops = pt_update->ops;
struct xe_tile *tile = m->tile;
@@ -1302,59 +1304,53 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
struct xe_sched_job *job;
struct dma_fence *fence;
struct drm_suballoc *sa_bo = NULL;
- struct xe_vma *vma = pt_update->vma;
struct xe_bb *bb;
- u32 i, batch_size, ppgtt_ofs, update_idx, page_ofs = 0;
+ u32 i, j, batch_size = 0, ppgtt_ofs, update_idx, page_ofs = 0;
+ u32 num_updates = 0, current_update = 0;
u64 addr;
int err = 0;
- bool usm = !q && xe->info.has_usm;
- bool first_munmap_rebind = vma &&
- vma->gpuva.flags & XE_VMA_FIRST_REBIND;
- struct xe_exec_queue *q_override = !q ? m->q : q;
- u16 pat_index = xe->pat.idx[XE_CACHE_WB];
+ bool is_migrate = pt_update_ops->q == m->q;
+ bool usm = is_migrate && xe->info.has_usm;
+
+ for (i = 0; i < pt_update_ops->num_ops; ++i) {
+ struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[i];
+ struct xe_vm_pgtable_update *updates = pt_op->entries;
- /* Use the CPU if no in syncs and engine is idle */
- if (no_in_syncs(vm, q, syncs, num_syncs) && xe_exec_queue_is_idle(q_override)) {
- fence = xe_migrate_update_pgtables_cpu(m, vm, bo, updates,
- num_updates,
- first_munmap_rebind,
- pt_update);
- if (!IS_ERR(fence) || fence == ERR_PTR(-EAGAIN))
- return fence;
+ num_updates += pt_op->num_entries;
+ for (j = 0; j < pt_op->num_entries; ++j) {
+ u32 num_cmds = DIV_ROUND_UP(updates[j].qwords,
+ MAX_PTE_PER_SDI);
+
+ /* align noop + MI_STORE_DATA_IMM cmd prefix */
+ batch_size += 4 * num_cmds + updates[j].qwords * 2;
+ }
}
/* fixed + PTE entries */
if (IS_DGFX(xe))
- batch_size = 2;
+ batch_size += 2;
else
- batch_size = 6 + num_updates * 2;
-
- for (i = 0; i < num_updates; i++) {
- u32 num_cmds = DIV_ROUND_UP(updates[i].qwords, MAX_PTE_PER_SDI);
-
- /* align noop + MI_STORE_DATA_IMM cmd prefix */
- batch_size += 4 * num_cmds + updates[i].qwords * 2;
- }
-
- /*
- * XXX: Create temp bo to copy from, if batch_size becomes too big?
- *
- * Worst case: Sum(2 * (each lower level page size) + (top level page size))
- * Should be reasonably bound..
- */
- xe_tile_assert(tile, batch_size < SZ_128K);
+ batch_size += 6 * (num_updates / MAX_PTE_PER_SDI + 1) +
+ num_updates * 2;
- bb = xe_bb_new(gt, batch_size, !q && xe->info.has_usm);
+ bb = xe_bb_new(gt, batch_size, usm);
if (IS_ERR(bb))
return ERR_CAST(bb);
/* For sysmem PTE's, need to map them in our hole.. */
if (!IS_DGFX(xe)) {
+ u32 ptes, ofs;
+
ppgtt_ofs = NUM_KERNEL_PDE - 1;
- if (q) {
- xe_tile_assert(tile, num_updates <= NUM_VMUSA_WRITES_PER_UNIT);
+ if (!is_migrate) {
+ u32 num_units = DIV_ROUND_UP(num_updates,
+ NUM_VMUSA_WRITES_PER_UNIT);
- sa_bo = drm_suballoc_new(&m->vm_update_sa, 1,
+ if (num_units > m->vm_update_sa.size) {
+ err = -ENOBUFS;
+ goto err_bb;
+ }
+ sa_bo = drm_suballoc_new(&m->vm_update_sa, num_units,
GFP_KERNEL, true, 0);
if (IS_ERR(sa_bo)) {
err = PTR_ERR(sa_bo);
@@ -1370,18 +1366,49 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
}
/* Map our PT's to gtt */
- bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(num_updates);
- bb->cs[bb->len++] = ppgtt_ofs * XE_PAGE_SIZE + page_ofs;
- bb->cs[bb->len++] = 0; /* upper_32_bits */
-
- for (i = 0; i < num_updates; i++) {
- struct xe_bo *pt_bo = updates[i].pt_bo;
+ i = 0;
+ j = 0;
+ ptes = num_updates;
+ ofs = ppgtt_ofs * XE_PAGE_SIZE + page_ofs;
+ while (ptes) {
+ u32 chunk = min(MAX_PTE_PER_SDI, ptes);
+ u32 idx = 0;
+
+ bb->cs[bb->len++] = MI_STORE_DATA_IMM |
+ MI_SDI_NUM_QW(chunk);
+ bb->cs[bb->len++] = ofs;
+ bb->cs[bb->len++] = 0; /* upper_32_bits */
+
+ for (; i < pt_update_ops->num_ops; ++i) {
+ struct xe_vm_pgtable_update_op *pt_op =
+ &pt_update_ops->ops[i];
+ struct xe_vm_pgtable_update *updates = pt_op->entries;
+
+ for (; j < pt_op->num_entries; ++j, ++current_update, ++idx) {
+ struct xe_vm *vm = pt_update->vops->vm;
+ struct xe_bo *pt_bo = updates[j].pt_bo;
+
+ if (idx == chunk)
+ goto next_cmd;
+
+ xe_tile_assert(tile, pt_bo->size == SZ_4K);
+
+ /* Map a PT at most once */
+ if (pt_bo->update_index < 0)
+ pt_bo->update_index = current_update;
+
+ addr = vm->pt_ops->pte_encode_bo(pt_bo, 0,
+ XE_CACHE_WB, 0);
+ bb->cs[bb->len++] = lower_32_bits(addr);
+ bb->cs[bb->len++] = upper_32_bits(addr);
+ }
- xe_tile_assert(tile, pt_bo->size == SZ_4K);
+ j = 0;
+ }
- addr = vm->pt_ops->pte_encode_bo(pt_bo, 0, pat_index, 0);
- bb->cs[bb->len++] = lower_32_bits(addr);
- bb->cs[bb->len++] = upper_32_bits(addr);
+next_cmd:
+ ptes -= chunk;
+ ofs += chunk * sizeof(u64);
}
bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
@@ -1389,19 +1416,36 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
addr = xe_migrate_vm_addr(ppgtt_ofs, 0) +
(page_ofs / sizeof(u64)) * XE_PAGE_SIZE;
- for (i = 0; i < num_updates; i++)
- write_pgtable(tile, bb, addr + i * XE_PAGE_SIZE,
- &updates[i], pt_update);
+ for (i = 0; i < pt_update_ops->num_ops; ++i) {
+ struct xe_vm_pgtable_update_op *pt_op =
+ &pt_update_ops->ops[i];
+ struct xe_vm_pgtable_update *updates = pt_op->entries;
+
+ for (j = 0; j < pt_op->num_entries; ++j) {
+ struct xe_bo *pt_bo = updates[j].pt_bo;
+
+ write_pgtable(tile, bb, addr +
+ pt_bo->update_index * XE_PAGE_SIZE,
+ pt_op, &updates[j], pt_update);
+ }
+ }
} else {
/* phys pages, no preamble required */
bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
update_idx = bb->len;
- for (i = 0; i < num_updates; i++)
- write_pgtable(tile, bb, 0, &updates[i], pt_update);
+ for (i = 0; i < pt_update_ops->num_ops; ++i) {
+ struct xe_vm_pgtable_update_op *pt_op =
+ &pt_update_ops->ops[i];
+ struct xe_vm_pgtable_update *updates = pt_op->entries;
+
+ for (j = 0; j < pt_op->num_entries; ++j)
+ write_pgtable(tile, bb, 0, pt_op, &updates[j],
+ pt_update);
+ }
}
- job = xe_bb_create_migration_job(q ?: m->q, bb,
+ job = xe_bb_create_migration_job(pt_update_ops->q, bb,
xe_migrate_batch_base(m, usm),
update_idx);
if (IS_ERR(job)) {
@@ -1409,46 +1453,20 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
goto err_sa;
}
- /* Wait on BO move */
- if (bo) {
- err = xe_sched_job_add_deps(job, bo->ttm.base.resv,
- DMA_RESV_USAGE_KERNEL);
- if (err)
- goto err_job;
- }
-
- /*
- * Munmap style VM unbind, need to wait for all jobs to be complete /
- * trigger preempts before moving forward
- */
- if (first_munmap_rebind) {
- err = xe_sched_job_add_deps(job, xe_vm_resv(vm),
- DMA_RESV_USAGE_BOOKKEEP);
- if (err)
- goto err_job;
- }
-
- err = xe_sched_job_last_fence_add_dep(job, vm);
- for (i = 0; !err && i < num_syncs; i++)
- err = xe_sync_entry_add_deps(&syncs[i], job);
-
- if (err)
- goto err_job;
-
if (ops->pre_commit) {
pt_update->job = job;
err = ops->pre_commit(pt_update);
if (err)
goto err_job;
}
- if (!q)
+ if (is_migrate)
mutex_lock(&m->job_mutex);
xe_sched_job_arm(job);
fence = dma_fence_get(&job->drm.s_fence->finished);
xe_sched_job_push(job);
- if (!q)
+ if (is_migrate)
mutex_unlock(&m->job_mutex);
xe_bb_free(bb, fence);
@@ -1466,6 +1484,40 @@ err_bb:
}
/**
+ * xe_migrate_update_pgtables() - Pipelined page-table update
+ * @m: The migrate context.
+ * @pt_update: PT update arguments
+ *
+ * Perform a pipelined page-table update. The update descriptors are typically
+ * built under the same lock critical section as a call to this function. If
+ * using the default engine for the updates, they will be performed in the
+ * order they grab the job_mutex. If different engines are used, external
+ * synchronization is needed for overlapping updates to maintain page-table
+ * consistency. Note that the meaing of "overlapping" is that the updates
+ * touch the same page-table, which might be a higher-level page-directory.
+ * If no pipelining is needed, then updates may be performed by the cpu.
+ *
+ * Return: A dma_fence that, when signaled, indicates the update completion.
+ */
+struct dma_fence *
+xe_migrate_update_pgtables(struct xe_migrate *m,
+ struct xe_migrate_pt_update *pt_update)
+
+{
+ struct xe_vm_pgtable_update_ops *pt_update_ops =
+ &pt_update->vops->pt_update_ops[pt_update->tile_id];
+ struct dma_fence *fence;
+
+ fence = xe_migrate_update_pgtables_cpu(m, pt_update);
+
+ /* -ETIME indicates a job is needed, anything else is legit error */
+ if (!IS_ERR(fence) || PTR_ERR(fence) != -ETIME)
+ return fence;
+
+ return __xe_migrate_update_pgtables(m, pt_update, pt_update_ops);
+}
+
+/**
* xe_migrate_wait() - Complete all operations using the xe_migrate context
* @m: Migrate context to wait for.
*
diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h
index 951f19318ea4..453e0ecf5034 100644
--- a/drivers/gpu/drm/xe/xe_migrate.h
+++ b/drivers/gpu/drm/xe/xe_migrate.h
@@ -47,6 +47,24 @@ struct xe_migrate_pt_update_ops {
struct xe_tile *tile, struct iosys_map *map,
void *pos, u32 ofs, u32 num_qwords,
const struct xe_vm_pgtable_update *update);
+ /**
+ * @clear: Clear a command buffer or page-table with ptes.
+ * @pt_update: Embeddable callback argument.
+ * @tile: The tile for the current operation.
+ * @map: struct iosys_map into the memory to be populated.
+ * @pos: If @map is NULL, map into the memory to be populated.
+ * @ofs: qword offset into @map, unused if @map is NULL.
+ * @num_qwords: Number of qwords to write.
+ * @update: Information about the PTEs to be inserted.
+ *
+ * This interface is intended to be used as a callback into the
+ * page-table system to populate command buffers or shared
+ * page-tables with PTEs.
+ */
+ void (*clear)(struct xe_migrate_pt_update *pt_update,
+ struct xe_tile *tile, struct iosys_map *map,
+ void *pos, u32 ofs, u32 num_qwords,
+ const struct xe_vm_pgtable_update *update);
/**
* @pre_commit: Callback to be called just before arming the
@@ -67,14 +85,10 @@ struct xe_migrate_pt_update_ops {
struct xe_migrate_pt_update {
/** @ops: Pointer to the struct xe_migrate_pt_update_ops callbacks */
const struct xe_migrate_pt_update_ops *ops;
- /** @vma: The vma we're updating the pagetable for. */
- struct xe_vma *vma;
+ /** @vops: VMA operations */
+ struct xe_vma_ops *vops;
/** @job: The job if a GPU page-table update. NULL otherwise */
struct xe_sched_job *job;
- /** @start: Start of update for the range fence */
- u64 start;
- /** @last: Last of update for the range fence */
- u64 last;
/** @tile_id: Tile ID of the update */
u8 tile_id;
};
@@ -96,15 +110,9 @@ struct xe_vm *xe_migrate_get_vm(struct xe_migrate *m);
struct dma_fence *
xe_migrate_update_pgtables(struct xe_migrate *m,
- struct xe_vm *vm,
- struct xe_bo *bo,
- struct xe_exec_queue *q,
- const struct xe_vm_pgtable_update *updates,
- u32 num_updates,
- struct xe_sync_entry *syncs, u32 num_syncs,
struct xe_migrate_pt_update *pt_update);
void xe_migrate_wait(struct xe_migrate *m);
-struct xe_exec_queue *xe_tile_migrate_engine(struct xe_tile *tile);
+struct xe_exec_queue *xe_tile_migrate_exec_queue(struct xe_tile *tile);
#endif
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index f92faad4b96d..bdcc7282385c 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -33,29 +33,56 @@ static void tiles_fini(void *arg)
tile->mmio.regs = NULL;
}
-int xe_mmio_probe_tiles(struct xe_device *xe)
+/*
+ * On multi-tile devices, partition the BAR space for MMIO on each tile,
+ * possibly accounting for register override on the number of tiles available.
+ * Resulting memory layout is like below:
+ *
+ * .----------------------. <- tile_count * tile_mmio_size
+ * | .... |
+ * |----------------------| <- 2 * tile_mmio_size
+ * | tile1->mmio.regs |
+ * |----------------------| <- 1 * tile_mmio_size
+ * | tile0->mmio.regs |
+ * '----------------------' <- 0MB
+ */
+static void mmio_multi_tile_setup(struct xe_device *xe, size_t tile_mmio_size)
{
- size_t tile_mmio_size = SZ_16M, tile_mmio_ext_size = xe->info.tile_mmio_ext_size;
- u8 id, tile_count = xe->info.tile_count;
- struct xe_gt *gt = xe_root_mmio_gt(xe);
struct xe_tile *tile;
void __iomem *regs;
- u32 mtcfg;
+ u8 id;
- if (tile_count == 1)
- goto add_mmio_ext;
+ /*
+ * Nothing to be done as tile 0 has already been setup earlier with the
+ * entire BAR mapped - see xe_mmio_init()
+ */
+ if (xe->info.tile_count == 1)
+ return;
+ /* Possibly override number of tile based on configuration register */
if (!xe->info.skip_mtcfg) {
+ struct xe_gt *gt = xe_root_mmio_gt(xe);
+ u8 tile_count;
+ u32 mtcfg;
+
+ /*
+ * Although the per-tile mmio regs are not yet initialized, this
+ * is fine as it's going to the root gt, that's guaranteed to be
+ * initialized earlier in xe_mmio_init()
+ */
mtcfg = xe_mmio_read64_2x32(gt, XEHP_MTCFG_ADDR);
tile_count = REG_FIELD_GET(TILE_COUNT, mtcfg) + 1;
+
if (tile_count < xe->info.tile_count) {
drm_info(&xe->drm, "tile_count: %d, reduced_tile_count %d\n",
xe->info.tile_count, tile_count);
xe->info.tile_count = tile_count;
/*
- * FIXME: Needs some work for standalone media, but should be impossible
- * with multi-tile for now.
+ * FIXME: Needs some work for standalone media, but
+ * should be impossible with multi-tile for now:
+ * multi-tile platform with standalone media doesn't
+ * exist
*/
xe->info.gt_count = xe->info.tile_count;
}
@@ -67,23 +94,51 @@ int xe_mmio_probe_tiles(struct xe_device *xe)
tile->mmio.regs = regs;
regs += tile_mmio_size;
}
+}
-add_mmio_ext:
- /*
- * By design, there's a contiguous multi-tile MMIO space (16MB hard coded per tile).
- * When supported, there could be an additional contiguous multi-tile MMIO extension
- * space ON TOP of it, and hence the necessity for distinguished MMIO spaces.
- */
- if (xe->info.has_mmio_ext) {
- regs = xe->mmio.regs + tile_mmio_size * tile_count;
+/*
+ * On top of all the multi-tile MMIO space there can be a platform-dependent
+ * extension for each tile, resulting in a layout like below:
+ *
+ * .----------------------. <- ext_base + tile_count * tile_mmio_ext_size
+ * | .... |
+ * |----------------------| <- ext_base + 2 * tile_mmio_ext_size
+ * | tile1->mmio_ext.regs |
+ * |----------------------| <- ext_base + 1 * tile_mmio_ext_size
+ * | tile0->mmio_ext.regs |
+ * |======================| <- ext_base = tile_count * tile_mmio_size
+ * | |
+ * | mmio.regs |
+ * | |
+ * '----------------------' <- 0MB
+ *
+ * Set up the tile[]->mmio_ext pointers/sizes.
+ */
+static void mmio_extension_setup(struct xe_device *xe, size_t tile_mmio_size,
+ size_t tile_mmio_ext_size)
+{
+ struct xe_tile *tile;
+ void __iomem *regs;
+ u8 id;
- for_each_tile(tile, xe, id) {
- tile->mmio_ext.size = tile_mmio_ext_size;
- tile->mmio_ext.regs = regs;
+ if (!xe->info.has_mmio_ext)
+ return;
- regs += tile_mmio_ext_size;
- }
+ regs = xe->mmio.regs + tile_mmio_size * xe->info.tile_count;
+ for_each_tile(tile, xe, id) {
+ tile->mmio_ext.size = tile_mmio_ext_size;
+ tile->mmio_ext.regs = regs;
+ regs += tile_mmio_ext_size;
}
+}
+
+int xe_mmio_probe_tiles(struct xe_device *xe)
+{
+ size_t tile_mmio_size = SZ_16M;
+ size_t tile_mmio_ext_size = xe->info.tile_mmio_ext_size;
+
+ mmio_multi_tile_setup(xe, tile_mmio_size);
+ mmio_extension_setup(xe, tile_mmio_size, tile_mmio_ext_size);
return devm_add_action_or_reset(xe->drm.dev, tiles_fini, xe);
}
@@ -121,12 +176,29 @@ int xe_mmio_init(struct xe_device *xe)
return devm_add_action_or_reset(xe->drm.dev, mmio_fini, xe);
}
+static void mmio_flush_pending_writes(struct xe_gt *gt)
+{
+#define DUMMY_REG_OFFSET 0x130030
+ struct xe_tile *tile = gt_to_tile(gt);
+ int i;
+
+ if (tile->xe->info.platform != XE_LUNARLAKE)
+ return;
+
+ /* 4 dummy writes */
+ for (i = 0; i < 4; i++)
+ writel(0, tile->mmio.regs + DUMMY_REG_OFFSET);
+}
+
u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg)
{
struct xe_tile *tile = gt_to_tile(gt);
u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
u8 val;
+ /* Wa_15015404425 */
+ mmio_flush_pending_writes(gt);
+
val = readb((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr);
trace_xe_reg_rw(gt, false, addr, val, sizeof(val));
@@ -139,6 +211,9 @@ u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg)
u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
u16 val;
+ /* Wa_15015404425 */
+ mmio_flush_pending_writes(gt);
+
val = readw((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr);
trace_xe_reg_rw(gt, false, addr, val, sizeof(val));
@@ -151,7 +226,11 @@ void xe_mmio_write32(struct xe_gt *gt, struct xe_reg reg, u32 val)
u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
trace_xe_reg_rw(gt, true, addr, val, sizeof(val));
- writel(val, (reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr);
+
+ if (!reg.vf && IS_SRIOV_VF(gt_to_xe(gt)))
+ xe_gt_sriov_vf_write32(gt, reg, val);
+ else
+ writel(val, (reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr);
}
u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg)
@@ -160,6 +239,9 @@ u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg)
u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
u32 val;
+ /* Wa_15015404425 */
+ mmio_flush_pending_writes(gt);
+
if (!reg.vf && IS_SRIOV_VF(gt_to_xe(gt)))
val = xe_gt_sriov_vf_read32(gt, reg);
else
@@ -251,37 +333,24 @@ u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg)
return (u64)udw << 32 | ldw;
}
-/**
- * xe_mmio_wait32() - Wait for a register to match the desired masked value
- * @gt: MMIO target GT
- * @reg: register to read value from
- * @mask: mask to be applied to the value read from the register
- * @val: desired value after applying the mask
- * @timeout_us: time out after this period of time. Wait logic tries to be
- * smart, applying an exponential backoff until @timeout_us is reached.
- * @out_val: if not NULL, points where to store the last unmasked value
- * @atomic: needs to be true if calling from an atomic context
- *
- * This function polls for the desired masked value and returns zero on success
- * or -ETIMEDOUT if timed out.
- *
- * Note that @timeout_us represents the minimum amount of time to wait before
- * giving up. The actual time taken by this function can be a little more than
- * @timeout_us for different reasons, specially in non-atomic contexts. Thus,
- * it is possible that this function succeeds even after @timeout_us has passed.
- */
-int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
- u32 *out_val, bool atomic)
+static int __xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
+ u32 *out_val, bool atomic, bool expect_match)
{
ktime_t cur = ktime_get_raw();
const ktime_t end = ktime_add_us(cur, timeout_us);
int ret = -ETIMEDOUT;
s64 wait = 10;
u32 read;
+ bool check;
for (;;) {
read = xe_mmio_read32(gt, reg);
- if ((read & mask) == val) {
+
+ check = (read & mask) == val;
+ if (!expect_match)
+ check = !check;
+
+ if (check) {
ret = 0;
break;
}
@@ -302,7 +371,12 @@ int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 t
if (ret != 0) {
read = xe_mmio_read32(gt, reg);
- if ((read & mask) == val)
+
+ check = (read & mask) == val;
+ if (!expect_match)
+ check = !check;
+
+ if (check)
ret = 0;
}
@@ -313,62 +387,45 @@ int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 t
}
/**
- * xe_mmio_wait32_not() - Wait for a register to return anything other than the given masked value
+ * xe_mmio_wait32() - Wait for a register to match the desired masked value
* @gt: MMIO target GT
* @reg: register to read value from
* @mask: mask to be applied to the value read from the register
- * @val: value to match after applying the mask
+ * @val: desired value after applying the mask
* @timeout_us: time out after this period of time. Wait logic tries to be
* smart, applying an exponential backoff until @timeout_us is reached.
* @out_val: if not NULL, points where to store the last unmasked value
* @atomic: needs to be true if calling from an atomic context
*
- * This function polls for a masked value to change from a given value and
- * returns zero on success or -ETIMEDOUT if timed out.
+ * This function polls for the desired masked value and returns zero on success
+ * or -ETIMEDOUT if timed out.
*
* Note that @timeout_us represents the minimum amount of time to wait before
* giving up. The actual time taken by this function can be a little more than
* @timeout_us for different reasons, specially in non-atomic contexts. Thus,
* it is possible that this function succeeds even after @timeout_us has passed.
*/
+int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
+ u32 *out_val, bool atomic)
+{
+ return __xe_mmio_wait32(gt, reg, mask, val, timeout_us, out_val, atomic, true);
+}
+
+/**
+ * xe_mmio_wait32_not() - Wait for a register to return anything other than the given masked value
+ * @gt: MMIO target GT
+ * @reg: register to read value from
+ * @mask: mask to be applied to the value read from the register
+ * @val: value not to be matched after applying the mask
+ * @timeout_us: time out after this period of time
+ * @out_val: if not NULL, points where to store the last unmasked value
+ * @atomic: needs to be true if calling from an atomic context
+ *
+ * This function works exactly like xe_mmio_wait32() with the exception that
+ * @val is expected not to be matched.
+ */
int xe_mmio_wait32_not(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
u32 *out_val, bool atomic)
{
- ktime_t cur = ktime_get_raw();
- const ktime_t end = ktime_add_us(cur, timeout_us);
- int ret = -ETIMEDOUT;
- s64 wait = 10;
- u32 read;
-
- for (;;) {
- read = xe_mmio_read32(gt, reg);
- if ((read & mask) != val) {
- ret = 0;
- break;
- }
-
- cur = ktime_get_raw();
- if (!ktime_before(cur, end))
- break;
-
- if (ktime_after(ktime_add_us(cur, wait), end))
- wait = ktime_us_delta(end, cur);
-
- if (atomic)
- udelay(wait);
- else
- usleep_range(wait, wait << 1);
- wait <<= 1;
- }
-
- if (ret != 0) {
- read = xe_mmio_read32(gt, reg);
- if ((read & mask) != val)
- ret = 0;
- }
-
- if (out_val)
- *out_val = read;
-
- return ret;
+ return __xe_mmio_wait32(gt, reg, mask, val, timeout_us, out_val, atomic, false);
}
diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
index 6ae0cc32c651..26551410ecc8 100644
--- a/drivers/gpu/drm/xe/xe_mmio.h
+++ b/drivers/gpu/drm/xe/xe_mmio.h
@@ -22,7 +22,6 @@ u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr, u32 set);
int xe_mmio_write32_and_verify(struct xe_gt *gt, struct xe_reg reg, u32 val, u32 mask, u32 eval);
bool xe_mmio_in_range(const struct xe_gt *gt, const struct xe_mmio_range *range, struct xe_reg reg);
-int xe_mmio_probe_vram(struct xe_device *xe);
u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg);
int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
u32 *out_val, bool atomic);
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index 6d69f751bf78..3ef92eb8fbb1 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -641,7 +641,7 @@ static void xe_oa_store_flex(struct xe_oa_stream *stream, struct xe_lrc *lrc,
u32 offset = xe_bo_ggtt_addr(lrc->bo);
do {
- bb->cs[bb->len++] = MI_STORE_DATA_IMM | BIT(22) /* GGTT */ | 2;
+ bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1);
bb->cs[bb->len++] = offset + flex->offset * sizeof(u32);
bb->cs[bb->len++] = 0;
bb->cs[bb->len++] = flex->value;
diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c
index 4ee32ee1cc88..722278cc23fc 100644
--- a/drivers/gpu/drm/xe/xe_pat.c
+++ b/drivers/gpu/drm/xe/xe_pat.c
@@ -7,6 +7,8 @@
#include <drm/xe_drm.h>
+#include <generated/xe_wa_oob.h>
+
#include "regs/xe_reg_defs.h"
#include "xe_assert.h"
#include "xe_device.h"
@@ -15,6 +17,7 @@
#include "xe_gt_mcr.h"
#include "xe_mmio.h"
#include "xe_sriov.h"
+#include "xe_wa.h"
#define _PAT_ATS 0x47fc
#define _PAT_INDEX(index) _PICK_EVEN_2RANGES(index, 8, \
@@ -382,7 +385,13 @@ void xe_pat_init_early(struct xe_device *xe)
if (GRAPHICS_VER(xe) == 20) {
xe->pat.ops = &xe2_pat_ops;
xe->pat.table = xe2_pat_table;
- xe->pat.n_entries = ARRAY_SIZE(xe2_pat_table);
+
+ /* Wa_16023588340. XXX: Should use XE_WA */
+ if (GRAPHICS_VERx100(xe) == 2001)
+ xe->pat.n_entries = 28; /* Disable CLOS3 */
+ else
+ xe->pat.n_entries = ARRAY_SIZE(xe2_pat_table);
+
xe->pat.idx[XE_CACHE_NONE] = 3;
xe->pat.idx[XE_CACHE_WT] = 15;
xe->pat.idx[XE_CACHE_WB] = 2;
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 732ee0d02124..3c4a3c91377a 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -59,6 +59,7 @@ struct xe_device_desc {
u8 has_display:1;
u8 has_heci_gscfi:1;
+ u8 has_heci_cscfi:1;
u8 has_llc:1;
u8 has_mmio_ext:1;
u8 has_sriov:1;
@@ -345,6 +346,7 @@ static const struct xe_device_desc bmg_desc = {
PLATFORM(BATTLEMAGE),
.has_display = true,
.require_force_probe = true,
+ .has_heci_cscfi = 1,
};
#undef PLATFORM
@@ -606,6 +608,7 @@ static int xe_info_init_early(struct xe_device *xe,
xe->info.is_dgfx = desc->is_dgfx;
xe->info.has_heci_gscfi = desc->has_heci_gscfi;
+ xe->info.has_heci_cscfi = desc->has_heci_cscfi;
xe->info.has_llc = desc->has_llc;
xe->info.has_mmio_ext = desc->has_mmio_ext;
xe->info.has_sriov = desc->has_sriov;
@@ -815,7 +818,7 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (err)
return err;
- drm_dbg(&xe->drm, "%s %s %04x:%04x dgfx:%d gfx:%s (%d.%02d) media:%s (%d.%02d) display:%s dma_m_s:%d tc:%d gscfi:%d",
+ drm_dbg(&xe->drm, "%s %s %04x:%04x dgfx:%d gfx:%s (%d.%02d) media:%s (%d.%02d) display:%s dma_m_s:%d tc:%d gscfi:%d cscfi:%d",
desc->platform_name,
subplatform_desc ? subplatform_desc->name : "",
xe->info.devid, xe->info.revid,
@@ -828,7 +831,7 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
xe->info.media_verx100 % 100,
str_yes_no(xe->info.enable_display),
xe->info.dma_mask_size, xe->info.tile_count,
- xe->info.has_heci_gscfi);
+ xe->info.has_heci_gscfi, xe->info.has_heci_cscfi);
drm_dbg(&xe->drm, "Stepping = (G:%s, M:%s, D:%s, B:%s)\n",
xe_step_name(xe->info.step.graphics),
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index de3b5df65e48..9f3c14fd9f33 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -20,6 +20,7 @@
#include "xe_guc.h"
#include "xe_irq.h"
#include "xe_pcode.h"
+#include "xe_trace.h"
#include "xe_wa.h"
/**
@@ -87,6 +88,7 @@ int xe_pm_suspend(struct xe_device *xe)
int err;
drm_dbg(&xe->drm, "Suspending device\n");
+ trace_xe_pm_suspend(xe, __builtin_return_address(0));
for_each_gt(gt, xe, id)
xe_gt_suspend_prepare(gt);
@@ -131,6 +133,7 @@ int xe_pm_resume(struct xe_device *xe)
int err;
drm_dbg(&xe->drm, "Resuming device\n");
+ trace_xe_pm_resume(xe, __builtin_return_address(0));
for_each_tile(tile, xe, id)
xe_wa_apply_tile_workarounds(tile);
@@ -326,6 +329,7 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
u8 id;
int err = 0;
+ trace_xe_pm_runtime_suspend(xe, __builtin_return_address(0));
/* Disable access_ongoing asserts and prevent recursive pm calls */
xe_pm_write_callback_task(xe, current);
@@ -399,6 +403,7 @@ int xe_pm_runtime_resume(struct xe_device *xe)
u8 id;
int err = 0;
+ trace_xe_pm_runtime_resume(xe, __builtin_return_address(0));
/* Disable access_ongoing asserts and prevent recursive pm calls */
xe_pm_write_callback_task(xe, current);
@@ -463,6 +468,7 @@ static void pm_runtime_lockdep_prime(void)
*/
void xe_pm_runtime_get(struct xe_device *xe)
{
+ trace_xe_pm_runtime_get(xe, __builtin_return_address(0));
pm_runtime_get_noresume(xe->drm.dev);
if (xe_pm_read_callback_task(xe) == current)
@@ -478,6 +484,7 @@ void xe_pm_runtime_get(struct xe_device *xe)
*/
void xe_pm_runtime_put(struct xe_device *xe)
{
+ trace_xe_pm_runtime_put(xe, __builtin_return_address(0));
if (xe_pm_read_callback_task(xe) == current) {
pm_runtime_put_noidle(xe->drm.dev);
} else {
@@ -495,6 +502,7 @@ void xe_pm_runtime_put(struct xe_device *xe)
*/
int xe_pm_runtime_get_ioctl(struct xe_device *xe)
{
+ trace_xe_pm_runtime_get_ioctl(xe, __builtin_return_address(0));
if (WARN_ON(xe_pm_read_callback_task(xe) == current))
return -ELOOP;
diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c
index e8b8ae5c6485..56e709d2fb30 100644
--- a/drivers/gpu/drm/xe/xe_preempt_fence.c
+++ b/drivers/gpu/drm/xe/xe_preempt_fence.c
@@ -17,10 +17,16 @@ static void preempt_fence_work_func(struct work_struct *w)
container_of(w, typeof(*pfence), preempt_work);
struct xe_exec_queue *q = pfence->q;
- if (pfence->error)
+ if (pfence->error) {
dma_fence_set_error(&pfence->base, pfence->error);
- else
- q->ops->suspend_wait(q);
+ } else if (!q->ops->reset_status(q)) {
+ int err = q->ops->suspend_wait(q);
+
+ if (err)
+ dma_fence_set_error(&pfence->base, err);
+ } else {
+ dma_fence_set_error(&pfence->base, -ENOENT);
+ }
dma_fence_signal(&pfence->base);
/*
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index ade9e7a3a0ad..97a6a0b0b8ba 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -9,12 +9,15 @@
#include "xe_bo.h"
#include "xe_device.h"
#include "xe_drm_client.h"
+#include "xe_exec_queue.h"
#include "xe_gt.h"
#include "xe_gt_tlb_invalidation.h"
#include "xe_migrate.h"
#include "xe_pt_types.h"
#include "xe_pt_walk.h"
#include "xe_res_cursor.h"
+#include "xe_sched_job.h"
+#include "xe_sync.h"
#include "xe_trace.h"
#include "xe_ttm_stolen_mgr.h"
#include "xe_vm.h"
@@ -325,6 +328,7 @@ xe_pt_new_shared(struct xe_walk_update *wupd, struct xe_pt *parent,
entry->pt = parent;
entry->flags = 0;
entry->qwords = 0;
+ entry->pt_bo->update_index = -1;
if (alloc_entries) {
entry->pt_entries = kmalloc_array(XE_PDES,
@@ -842,19 +846,27 @@ xe_vm_populate_pgtable(struct xe_migrate_pt_update *pt_update, struct xe_tile *t
}
}
-static void xe_pt_abort_bind(struct xe_vma *vma,
- struct xe_vm_pgtable_update *entries,
- u32 num_entries)
+static void xe_pt_cancel_bind(struct xe_vma *vma,
+ struct xe_vm_pgtable_update *entries,
+ u32 num_entries)
{
u32 i, j;
for (i = 0; i < num_entries; i++) {
- if (!entries[i].pt_entries)
+ struct xe_pt *pt = entries[i].pt;
+
+ if (!pt)
continue;
- for (j = 0; j < entries[i].qwords; j++)
- xe_pt_destroy(entries[i].pt_entries[j].pt, xe_vma_vm(vma)->flags, NULL);
+ if (pt->level) {
+ for (j = 0; j < entries[i].qwords; j++)
+ xe_pt_destroy(entries[i].pt_entries[j].pt,
+ xe_vma_vm(vma)->flags, NULL);
+ }
+
kfree(entries[i].pt_entries);
+ entries[i].pt_entries = NULL;
+ entries[i].qwords = 0;
}
}
@@ -864,18 +876,15 @@ static void xe_pt_commit_locks_assert(struct xe_vma *vma)
lockdep_assert_held(&vm->lock);
- if (xe_vma_is_userptr(vma))
- lockdep_assert_held_read(&vm->userptr.notifier_lock);
- else if (!xe_vma_is_null(vma))
+ if (!xe_vma_is_userptr(vma) && !xe_vma_is_null(vma))
dma_resv_assert_held(xe_vma_bo(vma)->ttm.base.resv);
xe_vm_assert_held(vm);
}
-static void xe_pt_commit_bind(struct xe_vma *vma,
- struct xe_vm_pgtable_update *entries,
- u32 num_entries, bool rebind,
- struct llist_head *deferred)
+static void xe_pt_commit(struct xe_vma *vma,
+ struct xe_vm_pgtable_update *entries,
+ u32 num_entries, struct llist_head *deferred)
{
u32 i, j;
@@ -883,31 +892,90 @@ static void xe_pt_commit_bind(struct xe_vma *vma,
for (i = 0; i < num_entries; i++) {
struct xe_pt *pt = entries[i].pt;
+
+ if (!pt->level)
+ continue;
+
+ for (j = 0; j < entries[i].qwords; j++) {
+ struct xe_pt *oldpte = entries[i].pt_entries[j].pt;
+
+ xe_pt_destroy(oldpte, xe_vma_vm(vma)->flags, deferred);
+ }
+ }
+}
+
+static void xe_pt_abort_bind(struct xe_vma *vma,
+ struct xe_vm_pgtable_update *entries,
+ u32 num_entries, bool rebind)
+{
+ int i, j;
+
+ xe_pt_commit_locks_assert(vma);
+
+ for (i = num_entries - 1; i >= 0; --i) {
+ struct xe_pt *pt = entries[i].pt;
struct xe_pt_dir *pt_dir;
if (!rebind)
- pt->num_live += entries[i].qwords;
+ pt->num_live -= entries[i].qwords;
- if (!pt->level) {
- kfree(entries[i].pt_entries);
+ if (!pt->level)
continue;
+
+ pt_dir = as_xe_pt_dir(pt);
+ for (j = 0; j < entries[i].qwords; j++) {
+ u32 j_ = j + entries[i].ofs;
+ struct xe_pt *newpte = xe_pt_entry(pt_dir, j_);
+ struct xe_pt *oldpte = entries[i].pt_entries[j].pt;
+
+ pt_dir->children[j_] = oldpte ? &oldpte->base : 0;
+ xe_pt_destroy(newpte, xe_vma_vm(vma)->flags, NULL);
}
+ }
+}
+
+static void xe_pt_commit_prepare_bind(struct xe_vma *vma,
+ struct xe_vm_pgtable_update *entries,
+ u32 num_entries, bool rebind)
+{
+ u32 i, j;
+
+ xe_pt_commit_locks_assert(vma);
+
+ for (i = 0; i < num_entries; i++) {
+ struct xe_pt *pt = entries[i].pt;
+ struct xe_pt_dir *pt_dir;
+
+ if (!rebind)
+ pt->num_live += entries[i].qwords;
+
+ if (!pt->level)
+ continue;
pt_dir = as_xe_pt_dir(pt);
for (j = 0; j < entries[i].qwords; j++) {
u32 j_ = j + entries[i].ofs;
struct xe_pt *newpte = entries[i].pt_entries[j].pt;
+ struct xe_pt *oldpte = NULL;
if (xe_pt_entry(pt_dir, j_))
- xe_pt_destroy(xe_pt_entry(pt_dir, j_),
- xe_vma_vm(vma)->flags, deferred);
+ oldpte = xe_pt_entry(pt_dir, j_);
pt_dir->children[j_] = &newpte->base;
+ entries[i].pt_entries[j].pt = oldpte;
}
- kfree(entries[i].pt_entries);
}
}
+static void xe_pt_free_bind(struct xe_vm_pgtable_update *entries,
+ u32 num_entries)
+{
+ u32 i;
+
+ for (i = 0; i < num_entries; i++)
+ kfree(entries[i].pt_entries);
+}
+
static int
xe_pt_prepare_bind(struct xe_tile *tile, struct xe_vma *vma,
struct xe_vm_pgtable_update *entries, u32 *num_entries)
@@ -918,20 +986,19 @@ xe_pt_prepare_bind(struct xe_tile *tile, struct xe_vma *vma,
err = xe_pt_stage_bind(tile, vma, entries, num_entries);
if (!err)
xe_tile_assert(tile, *num_entries);
- else /* abort! */
- xe_pt_abort_bind(vma, entries, *num_entries);
return err;
}
static void xe_vm_dbg_print_entries(struct xe_device *xe,
const struct xe_vm_pgtable_update *entries,
- unsigned int num_entries)
+ unsigned int num_entries, bool bind)
#if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM))
{
unsigned int i;
- vm_dbg(&xe->drm, "%u entries to update\n", num_entries);
+ vm_dbg(&xe->drm, "%s: %u entries to update\n", bind ? "bind" : "unbind",
+ num_entries);
for (i = 0; i < num_entries; i++) {
const struct xe_vm_pgtable_update *entry = &entries[i];
struct xe_pt *xe_pt = entry->pt;
@@ -952,66 +1019,108 @@ static void xe_vm_dbg_print_entries(struct xe_device *xe,
{}
#endif
-#ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT
-
-static int xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma)
+static bool no_in_syncs(struct xe_sync_entry *syncs, u32 num_syncs)
{
- u32 divisor = uvma->userptr.divisor ? uvma->userptr.divisor : 2;
- static u32 count;
+ int i;
- if (count++ % divisor == divisor - 1) {
- struct xe_vm *vm = xe_vma_vm(&uvma->vma);
+ for (i = 0; i < num_syncs; i++) {
+ struct dma_fence *fence = syncs[i].fence;
- uvma->userptr.divisor = divisor << 1;
- spin_lock(&vm->userptr.invalidated_lock);
- list_move_tail(&uvma->userptr.invalidate_link,
- &vm->userptr.invalidated);
- spin_unlock(&vm->userptr.invalidated_lock);
- return true;
+ if (fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+ &fence->flags))
+ return false;
}
- return false;
+ return true;
}
-#else
+static int job_test_add_deps(struct xe_sched_job *job,
+ struct dma_resv *resv,
+ enum dma_resv_usage usage)
+{
+ if (!job) {
+ if (!dma_resv_test_signaled(resv, usage))
+ return -ETIME;
-static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma)
+ return 0;
+ }
+
+ return xe_sched_job_add_deps(job, resv, usage);
+}
+
+static int vma_add_deps(struct xe_vma *vma, struct xe_sched_job *job)
{
- return false;
+ struct xe_bo *bo = xe_vma_bo(vma);
+
+ xe_bo_assert_held(bo);
+
+ if (bo && !bo->vm)
+ return job_test_add_deps(job, bo->ttm.base.resv,
+ DMA_RESV_USAGE_KERNEL);
+
+ return 0;
}
-#endif
+static int op_add_deps(struct xe_vm *vm, struct xe_vma_op *op,
+ struct xe_sched_job *job)
+{
+ int err = 0;
-/**
- * struct xe_pt_migrate_pt_update - Callback argument for pre-commit callbacks
- * @base: Base we derive from.
- * @bind: Whether this is a bind or an unbind operation. A bind operation
- * makes the pre-commit callback error with -EAGAIN if it detects a
- * pending invalidation.
- * @locked: Whether the pre-commit callback locked the userptr notifier lock
- * and it needs unlocking.
- */
-struct xe_pt_migrate_pt_update {
- struct xe_migrate_pt_update base;
- bool bind;
- bool locked;
-};
+ switch (op->base.op) {
+ case DRM_GPUVA_OP_MAP:
+ if (!op->map.immediate && xe_vm_in_fault_mode(vm))
+ break;
+
+ err = vma_add_deps(op->map.vma, job);
+ break;
+ case DRM_GPUVA_OP_REMAP:
+ if (op->remap.prev)
+ err = vma_add_deps(op->remap.prev, job);
+ if (!err && op->remap.next)
+ err = vma_add_deps(op->remap.next, job);
+ break;
+ case DRM_GPUVA_OP_UNMAP:
+ break;
+ case DRM_GPUVA_OP_PREFETCH:
+ err = vma_add_deps(gpuva_to_vma(op->base.prefetch.va), job);
+ break;
+ default:
+ drm_warn(&vm->xe->drm, "NOT POSSIBLE");
+ }
+
+ return err;
+}
-/*
- * This function adds the needed dependencies to a page-table update job
- * to make sure racing jobs for separate bind engines don't race writing
- * to the same page-table range, wreaking havoc. Initially use a single
- * fence for the entire VM. An optimization would use smaller granularity.
- */
static int xe_pt_vm_dependencies(struct xe_sched_job *job,
- struct xe_range_fence_tree *rftree,
- u64 start, u64 last)
+ struct xe_vm *vm,
+ struct xe_vma_ops *vops,
+ struct xe_vm_pgtable_update_ops *pt_update_ops,
+ struct xe_range_fence_tree *rftree)
{
struct xe_range_fence *rtfence;
struct dma_fence *fence;
- int err;
+ struct xe_vma_op *op;
+ int err = 0, i;
+
+ xe_vm_assert_held(vm);
- rtfence = xe_range_fence_tree_first(rftree, start, last);
+ if (!job && !no_in_syncs(vops->syncs, vops->num_syncs))
+ return -ETIME;
+
+ if (!job && !xe_exec_queue_is_idle(pt_update_ops->q))
+ return -ETIME;
+
+ if (pt_update_ops->wait_vm_bookkeep || pt_update_ops->wait_vm_kernel) {
+ err = job_test_add_deps(job, xe_vm_resv(vm),
+ pt_update_ops->wait_vm_bookkeep ?
+ DMA_RESV_USAGE_BOOKKEEP :
+ DMA_RESV_USAGE_KERNEL);
+ if (err)
+ return err;
+ }
+
+ rtfence = xe_range_fence_tree_first(rftree, pt_update_ops->start,
+ pt_update_ops->last);
while (rtfence) {
fence = rtfence->fence;
@@ -1029,80 +1138,173 @@ static int xe_pt_vm_dependencies(struct xe_sched_job *job,
return err;
}
- rtfence = xe_range_fence_tree_next(rtfence, start, last);
+ rtfence = xe_range_fence_tree_next(rtfence,
+ pt_update_ops->start,
+ pt_update_ops->last);
}
- return 0;
+ list_for_each_entry(op, &vops->list, link) {
+ err = op_add_deps(vm, op, job);
+ if (err)
+ return err;
+ }
+
+ if (job)
+ err = xe_sched_job_last_fence_add_dep(job, vm);
+ else
+ err = xe_exec_queue_last_fence_test_dep(pt_update_ops->q, vm);
+
+ for (i = 0; job && !err && i < vops->num_syncs; i++)
+ err = xe_sync_entry_add_deps(&vops->syncs[i], job);
+
+ return err;
}
static int xe_pt_pre_commit(struct xe_migrate_pt_update *pt_update)
{
- struct xe_range_fence_tree *rftree =
- &xe_vma_vm(pt_update->vma)->rftree[pt_update->tile_id];
+ struct xe_vma_ops *vops = pt_update->vops;
+ struct xe_vm *vm = vops->vm;
+ struct xe_range_fence_tree *rftree = &vm->rftree[pt_update->tile_id];
+ struct xe_vm_pgtable_update_ops *pt_update_ops =
+ &vops->pt_update_ops[pt_update->tile_id];
+
+ return xe_pt_vm_dependencies(pt_update->job, vm, pt_update->vops,
+ pt_update_ops, rftree);
+}
- return xe_pt_vm_dependencies(pt_update->job, rftree,
- pt_update->start, pt_update->last);
+#ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT
+
+static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma)
+{
+ u32 divisor = uvma->userptr.divisor ? uvma->userptr.divisor : 2;
+ static u32 count;
+
+ if (count++ % divisor == divisor - 1) {
+ uvma->userptr.divisor = divisor << 1;
+ return true;
+ }
+
+ return false;
}
-static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update)
+#else
+
+static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma)
{
- struct xe_pt_migrate_pt_update *userptr_update =
- container_of(pt_update, typeof(*userptr_update), base);
- struct xe_userptr_vma *uvma = to_userptr_vma(pt_update->vma);
- unsigned long notifier_seq = uvma->userptr.notifier_seq;
- struct xe_vm *vm = xe_vma_vm(&uvma->vma);
- int err = xe_pt_vm_dependencies(pt_update->job,
- &vm->rftree[pt_update->tile_id],
- pt_update->start,
- pt_update->last);
+ return false;
+}
- if (err)
- return err;
+#endif
- userptr_update->locked = false;
+static int vma_check_userptr(struct xe_vm *vm, struct xe_vma *vma,
+ struct xe_vm_pgtable_update_ops *pt_update)
+{
+ struct xe_userptr_vma *uvma;
+ unsigned long notifier_seq;
- /*
- * Wait until nobody is running the invalidation notifier, and
- * since we're exiting the loop holding the notifier lock,
- * nobody can proceed invalidating either.
- *
- * Note that we don't update the vma->userptr.notifier_seq since
- * we don't update the userptr pages.
- */
- do {
- down_read(&vm->userptr.notifier_lock);
- if (!mmu_interval_read_retry(&uvma->userptr.notifier,
- notifier_seq))
- break;
+ lockdep_assert_held_read(&vm->userptr.notifier_lock);
- up_read(&vm->userptr.notifier_lock);
+ if (!xe_vma_is_userptr(vma))
+ return 0;
- if (userptr_update->bind)
- return -EAGAIN;
+ uvma = to_userptr_vma(vma);
+ notifier_seq = uvma->userptr.notifier_seq;
- notifier_seq = mmu_interval_read_begin(&uvma->userptr.notifier);
- } while (true);
+ if (uvma->userptr.initial_bind && !xe_vm_in_fault_mode(vm))
+ return 0;
- /* Inject errors to test_whether they are handled correctly */
- if (userptr_update->bind && xe_pt_userptr_inject_eagain(uvma)) {
- up_read(&vm->userptr.notifier_lock);
+ if (!mmu_interval_read_retry(&uvma->userptr.notifier,
+ notifier_seq) &&
+ !xe_pt_userptr_inject_eagain(uvma))
+ return 0;
+
+ if (xe_vm_in_fault_mode(vm)) {
return -EAGAIN;
- }
+ } else {
+ spin_lock(&vm->userptr.invalidated_lock);
+ list_move_tail(&uvma->userptr.invalidate_link,
+ &vm->userptr.invalidated);
+ spin_unlock(&vm->userptr.invalidated_lock);
- userptr_update->locked = true;
+ if (xe_vm_in_preempt_fence_mode(vm)) {
+ struct dma_resv_iter cursor;
+ struct dma_fence *fence;
+ long err;
+
+ dma_resv_iter_begin(&cursor, xe_vm_resv(vm),
+ DMA_RESV_USAGE_BOOKKEEP);
+ dma_resv_for_each_fence_unlocked(&cursor, fence)
+ dma_fence_enable_sw_signaling(fence);
+ dma_resv_iter_end(&cursor);
+
+ err = dma_resv_wait_timeout(xe_vm_resv(vm),
+ DMA_RESV_USAGE_BOOKKEEP,
+ false, MAX_SCHEDULE_TIMEOUT);
+ XE_WARN_ON(err <= 0);
+ }
+ }
return 0;
}
-static const struct xe_migrate_pt_update_ops bind_ops = {
- .populate = xe_vm_populate_pgtable,
- .pre_commit = xe_pt_pre_commit,
-};
+static int op_check_userptr(struct xe_vm *vm, struct xe_vma_op *op,
+ struct xe_vm_pgtable_update_ops *pt_update)
+{
+ int err = 0;
-static const struct xe_migrate_pt_update_ops userptr_bind_ops = {
- .populate = xe_vm_populate_pgtable,
- .pre_commit = xe_pt_userptr_pre_commit,
-};
+ lockdep_assert_held_read(&vm->userptr.notifier_lock);
+
+ switch (op->base.op) {
+ case DRM_GPUVA_OP_MAP:
+ if (!op->map.immediate && xe_vm_in_fault_mode(vm))
+ break;
+
+ err = vma_check_userptr(vm, op->map.vma, pt_update);
+ break;
+ case DRM_GPUVA_OP_REMAP:
+ if (op->remap.prev)
+ err = vma_check_userptr(vm, op->remap.prev, pt_update);
+ if (!err && op->remap.next)
+ err = vma_check_userptr(vm, op->remap.next, pt_update);
+ break;
+ case DRM_GPUVA_OP_UNMAP:
+ break;
+ case DRM_GPUVA_OP_PREFETCH:
+ err = vma_check_userptr(vm, gpuva_to_vma(op->base.prefetch.va),
+ pt_update);
+ break;
+ default:
+ drm_warn(&vm->xe->drm, "NOT POSSIBLE");
+ }
+
+ return err;
+}
+
+static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update)
+{
+ struct xe_vm *vm = pt_update->vops->vm;
+ struct xe_vma_ops *vops = pt_update->vops;
+ struct xe_vm_pgtable_update_ops *pt_update_ops =
+ &vops->pt_update_ops[pt_update->tile_id];
+ struct xe_vma_op *op;
+ int err;
+
+ err = xe_pt_pre_commit(pt_update);
+ if (err)
+ return err;
+
+ down_read(&vm->userptr.notifier_lock);
+
+ list_for_each_entry(op, &vops->list, link) {
+ err = op_check_userptr(vm, op, pt_update_ops);
+ if (err) {
+ up_read(&vm->userptr.notifier_lock);
+ break;
+ }
+ }
+
+ return err;
+}
struct invalidation_fence {
struct xe_gt_tlb_invalidation_fence base;
@@ -1115,23 +1317,6 @@ struct invalidation_fence {
u32 asid;
};
-static const char *
-invalidation_fence_get_driver_name(struct dma_fence *dma_fence)
-{
- return "xe";
-}
-
-static const char *
-invalidation_fence_get_timeline_name(struct dma_fence *dma_fence)
-{
- return "invalidation_fence";
-}
-
-static const struct dma_fence_ops invalidation_fence_ops = {
- .get_driver_name = invalidation_fence_get_driver_name,
- .get_timeline_name = invalidation_fence_get_timeline_name,
-};
-
static void invalidation_fence_cb(struct dma_fence *fence,
struct dma_fence_cb *cb)
{
@@ -1161,24 +1346,17 @@ static void invalidation_fence_work_func(struct work_struct *w)
ifence->end, ifence->asid);
}
-static int invalidation_fence_init(struct xe_gt *gt,
- struct invalidation_fence *ifence,
- struct dma_fence *fence,
- u64 start, u64 end, u32 asid)
+static void invalidation_fence_init(struct xe_gt *gt,
+ struct invalidation_fence *ifence,
+ struct dma_fence *fence,
+ u64 start, u64 end, u32 asid)
{
int ret;
trace_xe_gt_tlb_invalidation_fence_create(gt_to_xe(gt), &ifence->base);
- spin_lock_irq(&gt->tlb_invalidation.lock);
- dma_fence_init(&ifence->base.base, &invalidation_fence_ops,
- &gt->tlb_invalidation.lock,
- dma_fence_context_alloc(1), 1);
- spin_unlock_irq(&gt->tlb_invalidation.lock);
-
- INIT_LIST_HEAD(&ifence->base.link);
+ xe_gt_tlb_invalidation_fence_init(gt, &ifence->base, false);
- dma_fence_get(&ifence->base.base); /* Ref for caller */
ifence->fence = fence;
ifence->gt = gt;
ifence->start = start;
@@ -1196,192 +1374,6 @@ static int invalidation_fence_init(struct xe_gt *gt,
}
xe_gt_assert(gt, !ret || ret == -ENOENT);
-
- return ret && ret != -ENOENT ? ret : 0;
-}
-
-static void xe_pt_calc_rfence_interval(struct xe_vma *vma,
- struct xe_pt_migrate_pt_update *update,
- struct xe_vm_pgtable_update *entries,
- u32 num_entries)
-{
- int i, level = 0;
-
- for (i = 0; i < num_entries; i++) {
- const struct xe_vm_pgtable_update *entry = &entries[i];
-
- if (entry->pt->level > level)
- level = entry->pt->level;
- }
-
- /* Greedy (non-optimal) calculation but simple */
- update->base.start = ALIGN_DOWN(xe_vma_start(vma),
- 0x1ull << xe_pt_shift(level));
- update->base.last = ALIGN(xe_vma_end(vma),
- 0x1ull << xe_pt_shift(level)) - 1;
-}
-
-/**
- * __xe_pt_bind_vma() - Build and connect a page-table tree for the vma
- * address range.
- * @tile: The tile to bind for.
- * @vma: The vma to bind.
- * @q: The exec_queue with which to do pipelined page-table updates.
- * @syncs: Entries to sync on before binding the built tree to the live vm tree.
- * @num_syncs: Number of @sync entries.
- * @rebind: Whether we're rebinding this vma to the same address range without
- * an unbind in-between.
- *
- * This function builds a page-table tree (see xe_pt_stage_bind() for more
- * information on page-table building), and the xe_vm_pgtable_update entries
- * abstracting the operations needed to attach it to the main vm tree. It
- * then takes the relevant locks and updates the metadata side of the main
- * vm tree and submits the operations for pipelined attachment of the
- * gpu page-table to the vm main tree, (which can be done either by the
- * cpu and the GPU).
- *
- * Return: A valid dma-fence representing the pipelined attachment operation
- * on success, an error pointer on error.
- */
-struct dma_fence *
-__xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q,
- struct xe_sync_entry *syncs, u32 num_syncs,
- bool rebind)
-{
- struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1];
- struct xe_pt_migrate_pt_update bind_pt_update = {
- .base = {
- .ops = xe_vma_is_userptr(vma) ? &userptr_bind_ops : &bind_ops,
- .vma = vma,
- .tile_id = tile->id,
- },
- .bind = true,
- };
- struct xe_vm *vm = xe_vma_vm(vma);
- u32 num_entries;
- struct dma_fence *fence;
- struct invalidation_fence *ifence = NULL;
- struct xe_range_fence *rfence;
- int err;
-
- bind_pt_update.locked = false;
- xe_bo_assert_held(xe_vma_bo(vma));
- xe_vm_assert_held(vm);
-
- vm_dbg(&xe_vma_vm(vma)->xe->drm,
- "Preparing bind, with range [%llx...%llx) engine %p.\n",
- xe_vma_start(vma), xe_vma_end(vma), q);
-
- err = xe_pt_prepare_bind(tile, vma, entries, &num_entries);
- if (err)
- goto err;
-
- err = dma_resv_reserve_fences(xe_vm_resv(vm), 1);
- if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
- err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1);
- if (err)
- goto err;
-
- xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries));
-
- xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries);
- xe_pt_calc_rfence_interval(vma, &bind_pt_update, entries,
- num_entries);
-
- /*
- * If rebind, we have to invalidate TLB on !LR vms to invalidate
- * cached PTEs point to freed memory. on LR vms this is done
- * automatically when the context is re-enabled by the rebind worker,
- * or in fault mode it was invalidated on PTE zapping.
- *
- * If !rebind, and scratch enabled VMs, there is a chance the scratch
- * PTE is already cached in the TLB so it needs to be invalidated.
- * on !LR VMs this is done in the ring ops preceding a batch, but on
- * non-faulting LR, in particular on user-space batch buffer chaining,
- * it needs to be done here.
- */
- if ((!rebind && xe_vm_has_scratch(vm) && xe_vm_in_preempt_fence_mode(vm))) {
- ifence = kzalloc(sizeof(*ifence), GFP_KERNEL);
- if (!ifence)
- return ERR_PTR(-ENOMEM);
- } else if (rebind && !xe_vm_in_lr_mode(vm)) {
- /* We bump also if batch_invalidate_tlb is true */
- vm->tlb_flush_seqno++;
- }
-
- rfence = kzalloc(sizeof(*rfence), GFP_KERNEL);
- if (!rfence) {
- kfree(ifence);
- return ERR_PTR(-ENOMEM);
- }
-
- fence = xe_migrate_update_pgtables(tile->migrate,
- vm, xe_vma_bo(vma), q,
- entries, num_entries,
- syncs, num_syncs,
- &bind_pt_update.base);
- if (!IS_ERR(fence)) {
- bool last_munmap_rebind = vma->gpuva.flags & XE_VMA_LAST_REBIND;
- LLIST_HEAD(deferred);
- int err;
-
- err = xe_range_fence_insert(&vm->rftree[tile->id], rfence,
- &xe_range_fence_kfree_ops,
- bind_pt_update.base.start,
- bind_pt_update.base.last, fence);
- if (err)
- dma_fence_wait(fence, false);
-
- /* TLB invalidation must be done before signaling rebind */
- if (ifence) {
- int err = invalidation_fence_init(tile->primary_gt,
- ifence, fence,
- xe_vma_start(vma),
- xe_vma_end(vma),
- xe_vma_vm(vma)->usm.asid);
- if (err) {
- dma_fence_put(fence);
- kfree(ifence);
- return ERR_PTR(err);
- }
- fence = &ifence->base.base;
- }
-
- /* add shared fence now for pagetable delayed destroy */
- dma_resv_add_fence(xe_vm_resv(vm), fence, rebind ||
- last_munmap_rebind ?
- DMA_RESV_USAGE_KERNEL :
- DMA_RESV_USAGE_BOOKKEEP);
-
- if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
- dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence,
- DMA_RESV_USAGE_BOOKKEEP);
- xe_pt_commit_bind(vma, entries, num_entries, rebind,
- bind_pt_update.locked ? &deferred : NULL);
-
- /* This vma is live (again?) now */
- vma->tile_present |= BIT(tile->id);
-
- if (bind_pt_update.locked) {
- to_userptr_vma(vma)->userptr.initial_bind = true;
- up_read(&vm->userptr.notifier_lock);
- xe_bo_put_commit(&deferred);
- }
- if (!rebind && last_munmap_rebind &&
- xe_vm_in_preempt_fence_mode(vm))
- xe_vm_queue_rebind_worker(vm);
- } else {
- kfree(rfence);
- kfree(ifence);
- if (bind_pt_update.locked)
- up_read(&vm->userptr.notifier_lock);
- xe_pt_abort_bind(vma, entries, num_entries);
- }
-
- return fence;
-
-err:
- return ERR_PTR(err);
}
struct xe_pt_stage_unbind_walk {
@@ -1466,6 +1458,7 @@ xe_pt_stage_unbind_post_descend(struct xe_ptw *parent, pgoff_t offset,
struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base);
pgoff_t end_offset;
u64 size = 1ull << walk->shifts[--level];
+ int err;
if (!IS_ALIGNED(addr, size))
addr = xe_walk->modified_start;
@@ -1481,7 +1474,10 @@ xe_pt_stage_unbind_post_descend(struct xe_ptw *parent, pgoff_t offset,
&end_offset))
return 0;
- (void)xe_pt_new_shared(&xe_walk->wupd, xe_child, offset, false);
+ err = xe_pt_new_shared(&xe_walk->wupd, xe_child, offset, true);
+ if (err)
+ return err;
+
xe_walk->wupd.updates[level].update->qwords = end_offset - offset;
return 0;
@@ -1534,8 +1530,8 @@ xe_migrate_clear_pgtable_callback(struct xe_migrate_pt_update *pt_update,
void *ptr, u32 qword_ofs, u32 num_qwords,
const struct xe_vm_pgtable_update *update)
{
- struct xe_vma *vma = pt_update->vma;
- u64 empty = __xe_pt_empty_pte(tile, xe_vma_vm(vma), update->pt->level);
+ struct xe_vm *vm = pt_update->vops->vm;
+ u64 empty = __xe_pt_empty_pte(tile, vm, update->pt->level);
int i;
if (map && map->is_iomem)
@@ -1549,181 +1545,571 @@ xe_migrate_clear_pgtable_callback(struct xe_migrate_pt_update *pt_update,
memset64(ptr, empty, num_qwords);
}
+static void xe_pt_abort_unbind(struct xe_vma *vma,
+ struct xe_vm_pgtable_update *entries,
+ u32 num_entries)
+{
+ int i, j;
+
+ xe_pt_commit_locks_assert(vma);
+
+ for (i = num_entries - 1; i >= 0; --i) {
+ struct xe_vm_pgtable_update *entry = &entries[i];
+ struct xe_pt *pt = entry->pt;
+ struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt);
+
+ pt->num_live += entry->qwords;
+
+ if (!pt->level)
+ continue;
+
+ for (j = entry->ofs; j < entry->ofs + entry->qwords; j++)
+ pt_dir->children[j] =
+ entries[i].pt_entries[j - entry->ofs].pt ?
+ &entries[i].pt_entries[j - entry->ofs].pt->base : NULL;
+ }
+}
+
static void
-xe_pt_commit_unbind(struct xe_vma *vma,
- struct xe_vm_pgtable_update *entries, u32 num_entries,
- struct llist_head *deferred)
+xe_pt_commit_prepare_unbind(struct xe_vma *vma,
+ struct xe_vm_pgtable_update *entries,
+ u32 num_entries)
{
- u32 j;
+ int i, j;
xe_pt_commit_locks_assert(vma);
- for (j = 0; j < num_entries; ++j) {
- struct xe_vm_pgtable_update *entry = &entries[j];
+ for (i = 0; i < num_entries; ++i) {
+ struct xe_vm_pgtable_update *entry = &entries[i];
struct xe_pt *pt = entry->pt;
+ struct xe_pt_dir *pt_dir;
pt->num_live -= entry->qwords;
- if (pt->level) {
- struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt);
- u32 i;
+ if (!pt->level)
+ continue;
+
+ pt_dir = as_xe_pt_dir(pt);
+ for (j = entry->ofs; j < entry->ofs + entry->qwords; j++) {
+ entry->pt_entries[j - entry->ofs].pt =
+ xe_pt_entry(pt_dir, j);
+ pt_dir->children[j] = NULL;
+ }
+ }
+}
+
+static void
+xe_pt_update_ops_rfence_interval(struct xe_vm_pgtable_update_ops *pt_update_ops,
+ struct xe_vma *vma)
+{
+ u32 current_op = pt_update_ops->current_op;
+ struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op];
+ int i, level = 0;
+ u64 start, last;
- for (i = entry->ofs; i < entry->ofs + entry->qwords;
- i++) {
- if (xe_pt_entry(pt_dir, i))
- xe_pt_destroy(xe_pt_entry(pt_dir, i),
- xe_vma_vm(vma)->flags, deferred);
+ for (i = 0; i < pt_op->num_entries; i++) {
+ const struct xe_vm_pgtable_update *entry = &pt_op->entries[i];
- pt_dir->children[i] = NULL;
- }
+ if (entry->pt->level > level)
+ level = entry->pt->level;
+ }
+
+ /* Greedy (non-optimal) calculation but simple */
+ start = ALIGN_DOWN(xe_vma_start(vma), 0x1ull << xe_pt_shift(level));
+ last = ALIGN(xe_vma_end(vma), 0x1ull << xe_pt_shift(level)) - 1;
+
+ if (start < pt_update_ops->start)
+ pt_update_ops->start = start;
+ if (last > pt_update_ops->last)
+ pt_update_ops->last = last;
+}
+
+static int vma_reserve_fences(struct xe_device *xe, struct xe_vma *vma)
+{
+ if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
+ return dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv,
+ xe->info.tile_count);
+
+ return 0;
+}
+
+static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile,
+ struct xe_vm_pgtable_update_ops *pt_update_ops,
+ struct xe_vma *vma)
+{
+ u32 current_op = pt_update_ops->current_op;
+ struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op];
+ int err;
+
+ xe_bo_assert_held(xe_vma_bo(vma));
+
+ vm_dbg(&xe_vma_vm(vma)->xe->drm,
+ "Preparing bind, with range [%llx...%llx)\n",
+ xe_vma_start(vma), xe_vma_end(vma) - 1);
+
+ pt_op->vma = NULL;
+ pt_op->bind = true;
+ pt_op->rebind = BIT(tile->id) & vma->tile_present;
+
+ err = vma_reserve_fences(tile_to_xe(tile), vma);
+ if (err)
+ return err;
+
+ err = xe_pt_prepare_bind(tile, vma, pt_op->entries,
+ &pt_op->num_entries);
+ if (!err) {
+ xe_tile_assert(tile, pt_op->num_entries <=
+ ARRAY_SIZE(pt_op->entries));
+ xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries,
+ pt_op->num_entries, true);
+
+ xe_pt_update_ops_rfence_interval(pt_update_ops, vma);
+ ++pt_update_ops->current_op;
+ pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma);
+
+ /*
+ * If rebind, we have to invalidate TLB on !LR vms to invalidate
+ * cached PTEs point to freed memory. On LR vms this is done
+ * automatically when the context is re-enabled by the rebind worker,
+ * or in fault mode it was invalidated on PTE zapping.
+ *
+ * If !rebind, and scratch enabled VMs, there is a chance the scratch
+ * PTE is already cached in the TLB so it needs to be invalidated.
+ * On !LR VMs this is done in the ring ops preceding a batch, but on
+ * non-faulting LR, in particular on user-space batch buffer chaining,
+ * it needs to be done here.
+ */
+ if ((!pt_op->rebind && xe_vm_has_scratch(vm) &&
+ xe_vm_in_preempt_fence_mode(vm)))
+ pt_update_ops->needs_invalidation = true;
+ else if (pt_op->rebind && !xe_vm_in_lr_mode(vm))
+ /* We bump also if batch_invalidate_tlb is true */
+ vm->tlb_flush_seqno++;
+
+ vma->tile_staged |= BIT(tile->id);
+ pt_op->vma = vma;
+ xe_pt_commit_prepare_bind(vma, pt_op->entries,
+ pt_op->num_entries, pt_op->rebind);
+ } else {
+ xe_pt_cancel_bind(vma, pt_op->entries, pt_op->num_entries);
+ }
+
+ return err;
+}
+
+static int unbind_op_prepare(struct xe_tile *tile,
+ struct xe_vm_pgtable_update_ops *pt_update_ops,
+ struct xe_vma *vma)
+{
+ u32 current_op = pt_update_ops->current_op;
+ struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op];
+ int err;
+
+ if (!((vma->tile_present | vma->tile_staged) & BIT(tile->id)))
+ return 0;
+
+ xe_bo_assert_held(xe_vma_bo(vma));
+
+ vm_dbg(&xe_vma_vm(vma)->xe->drm,
+ "Preparing unbind, with range [%llx...%llx)\n",
+ xe_vma_start(vma), xe_vma_end(vma) - 1);
+
+ /*
+ * Wait for invalidation to complete. Can corrupt internal page table
+ * state if an invalidation is running while preparing an unbind.
+ */
+ if (xe_vma_is_userptr(vma) && xe_vm_in_fault_mode(xe_vma_vm(vma)))
+ mmu_interval_read_begin(&to_userptr_vma(vma)->userptr.notifier);
+
+ pt_op->vma = vma;
+ pt_op->bind = false;
+ pt_op->rebind = false;
+
+ err = vma_reserve_fences(tile_to_xe(tile), vma);
+ if (err)
+ return err;
+
+ pt_op->num_entries = xe_pt_stage_unbind(tile, vma, pt_op->entries);
+
+ xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries,
+ pt_op->num_entries, false);
+ xe_pt_update_ops_rfence_interval(pt_update_ops, vma);
+ ++pt_update_ops->current_op;
+ pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma);
+ pt_update_ops->needs_invalidation = true;
+
+ xe_pt_commit_prepare_unbind(vma, pt_op->entries, pt_op->num_entries);
+
+ return 0;
+}
+
+static int op_prepare(struct xe_vm *vm,
+ struct xe_tile *tile,
+ struct xe_vm_pgtable_update_ops *pt_update_ops,
+ struct xe_vma_op *op)
+{
+ int err = 0;
+
+ xe_vm_assert_held(vm);
+
+ switch (op->base.op) {
+ case DRM_GPUVA_OP_MAP:
+ if (!op->map.immediate && xe_vm_in_fault_mode(vm))
+ break;
+
+ err = bind_op_prepare(vm, tile, pt_update_ops, op->map.vma);
+ pt_update_ops->wait_vm_kernel = true;
+ break;
+ case DRM_GPUVA_OP_REMAP:
+ err = unbind_op_prepare(tile, pt_update_ops,
+ gpuva_to_vma(op->base.remap.unmap->va));
+
+ if (!err && op->remap.prev) {
+ err = bind_op_prepare(vm, tile, pt_update_ops,
+ op->remap.prev);
+ pt_update_ops->wait_vm_bookkeep = true;
}
+ if (!err && op->remap.next) {
+ err = bind_op_prepare(vm, tile, pt_update_ops,
+ op->remap.next);
+ pt_update_ops->wait_vm_bookkeep = true;
+ }
+ break;
+ case DRM_GPUVA_OP_UNMAP:
+ err = unbind_op_prepare(tile, pt_update_ops,
+ gpuva_to_vma(op->base.unmap.va));
+ break;
+ case DRM_GPUVA_OP_PREFETCH:
+ err = bind_op_prepare(vm, tile, pt_update_ops,
+ gpuva_to_vma(op->base.prefetch.va));
+ pt_update_ops->wait_vm_kernel = true;
+ break;
+ default:
+ drm_warn(&vm->xe->drm, "NOT POSSIBLE");
}
+
+ return err;
+}
+
+static void
+xe_pt_update_ops_init(struct xe_vm_pgtable_update_ops *pt_update_ops)
+{
+ init_llist_head(&pt_update_ops->deferred);
+ pt_update_ops->start = ~0x0ull;
+ pt_update_ops->last = 0x0ull;
}
-static const struct xe_migrate_pt_update_ops unbind_ops = {
- .populate = xe_migrate_clear_pgtable_callback,
+/**
+ * xe_pt_update_ops_prepare() - Prepare PT update operations
+ * @tile: Tile of PT update operations
+ * @vops: VMA operationa
+ *
+ * Prepare PT update operations which includes updating internal PT state,
+ * allocate memory for page tables, populate page table being pruned in, and
+ * create PT update operations for leaf insertion / removal.
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops)
+{
+ struct xe_vm_pgtable_update_ops *pt_update_ops =
+ &vops->pt_update_ops[tile->id];
+ struct xe_vma_op *op;
+ int err;
+
+ lockdep_assert_held(&vops->vm->lock);
+ xe_vm_assert_held(vops->vm);
+
+ xe_pt_update_ops_init(pt_update_ops);
+
+ err = dma_resv_reserve_fences(xe_vm_resv(vops->vm),
+ tile_to_xe(tile)->info.tile_count);
+ if (err)
+ return err;
+
+ list_for_each_entry(op, &vops->list, link) {
+ err = op_prepare(vops->vm, tile, pt_update_ops, op);
+
+ if (err)
+ return err;
+ }
+
+ xe_tile_assert(tile, pt_update_ops->current_op <=
+ pt_update_ops->num_ops);
+
+#ifdef TEST_VM_OPS_ERROR
+ if (vops->inject_error &&
+ vops->vm->xe->vm_inject_error_position == FORCE_OP_ERROR_PREPARE)
+ return -ENOSPC;
+#endif
+
+ return 0;
+}
+
+static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile,
+ struct xe_vm_pgtable_update_ops *pt_update_ops,
+ struct xe_vma *vma, struct dma_fence *fence)
+{
+ if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
+ dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence,
+ pt_update_ops->wait_vm_bookkeep ?
+ DMA_RESV_USAGE_KERNEL :
+ DMA_RESV_USAGE_BOOKKEEP);
+ vma->tile_present |= BIT(tile->id);
+ vma->tile_staged &= ~BIT(tile->id);
+ if (xe_vma_is_userptr(vma)) {
+ lockdep_assert_held_read(&vm->userptr.notifier_lock);
+ to_userptr_vma(vma)->userptr.initial_bind = true;
+ }
+
+ /*
+ * Kick rebind worker if this bind triggers preempt fences and not in
+ * the rebind worker
+ */
+ if (pt_update_ops->wait_vm_bookkeep &&
+ xe_vm_in_preempt_fence_mode(vm) &&
+ !current->mm)
+ xe_vm_queue_rebind_worker(vm);
+}
+
+static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile,
+ struct xe_vm_pgtable_update_ops *pt_update_ops,
+ struct xe_vma *vma, struct dma_fence *fence)
+{
+ if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
+ dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence,
+ pt_update_ops->wait_vm_bookkeep ?
+ DMA_RESV_USAGE_KERNEL :
+ DMA_RESV_USAGE_BOOKKEEP);
+ vma->tile_present &= ~BIT(tile->id);
+ if (!vma->tile_present) {
+ list_del_init(&vma->combined_links.rebind);
+ if (xe_vma_is_userptr(vma)) {
+ lockdep_assert_held_read(&vm->userptr.notifier_lock);
+
+ spin_lock(&vm->userptr.invalidated_lock);
+ list_del_init(&to_userptr_vma(vma)->userptr.invalidate_link);
+ spin_unlock(&vm->userptr.invalidated_lock);
+ }
+ }
+}
+
+static void op_commit(struct xe_vm *vm,
+ struct xe_tile *tile,
+ struct xe_vm_pgtable_update_ops *pt_update_ops,
+ struct xe_vma_op *op, struct dma_fence *fence)
+{
+ xe_vm_assert_held(vm);
+
+ switch (op->base.op) {
+ case DRM_GPUVA_OP_MAP:
+ if (!op->map.immediate && xe_vm_in_fault_mode(vm))
+ break;
+
+ bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence);
+ break;
+ case DRM_GPUVA_OP_REMAP:
+ unbind_op_commit(vm, tile, pt_update_ops,
+ gpuva_to_vma(op->base.remap.unmap->va), fence);
+
+ if (op->remap.prev)
+ bind_op_commit(vm, tile, pt_update_ops, op->remap.prev,
+ fence);
+ if (op->remap.next)
+ bind_op_commit(vm, tile, pt_update_ops, op->remap.next,
+ fence);
+ break;
+ case DRM_GPUVA_OP_UNMAP:
+ unbind_op_commit(vm, tile, pt_update_ops,
+ gpuva_to_vma(op->base.unmap.va), fence);
+ break;
+ case DRM_GPUVA_OP_PREFETCH:
+ bind_op_commit(vm, tile, pt_update_ops,
+ gpuva_to_vma(op->base.prefetch.va), fence);
+ break;
+ default:
+ drm_warn(&vm->xe->drm, "NOT POSSIBLE");
+ }
+}
+
+static const struct xe_migrate_pt_update_ops migrate_ops = {
+ .populate = xe_vm_populate_pgtable,
+ .clear = xe_migrate_clear_pgtable_callback,
.pre_commit = xe_pt_pre_commit,
};
-static const struct xe_migrate_pt_update_ops userptr_unbind_ops = {
- .populate = xe_migrate_clear_pgtable_callback,
+static const struct xe_migrate_pt_update_ops userptr_migrate_ops = {
+ .populate = xe_vm_populate_pgtable,
+ .clear = xe_migrate_clear_pgtable_callback,
.pre_commit = xe_pt_userptr_pre_commit,
};
/**
- * __xe_pt_unbind_vma() - Disconnect and free a page-table tree for the vma
- * address range.
- * @tile: The tile to unbind for.
- * @vma: The vma to unbind.
- * @q: The exec_queue with which to do pipelined page-table updates.
- * @syncs: Entries to sync on before disconnecting the tree to be destroyed.
- * @num_syncs: Number of @sync entries.
+ * xe_pt_update_ops_run() - Run PT update operations
+ * @tile: Tile of PT update operations
+ * @vops: VMA operationa
*
- * This function builds a the xe_vm_pgtable_update entries abstracting the
- * operations needed to detach the page-table tree to be destroyed from the
- * man vm tree.
- * It then takes the relevant locks and submits the operations for
- * pipelined detachment of the gpu page-table from the vm main tree,
- * (which can be done either by the cpu and the GPU), Finally it frees the
- * detached page-table tree.
+ * Run PT update operations which includes committing internal PT state changes,
+ * creating job for PT update operations for leaf insertion / removal, and
+ * installing job fence in various places.
*
- * Return: A valid dma-fence representing the pipelined detachment operation
- * on success, an error pointer on error.
+ * Return: fence on success, negative ERR_PTR on error.
*/
struct dma_fence *
-__xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q,
- struct xe_sync_entry *syncs, u32 num_syncs)
+xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
{
- struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1];
- struct xe_pt_migrate_pt_update unbind_pt_update = {
- .base = {
- .ops = xe_vma_is_userptr(vma) ? &userptr_unbind_ops :
- &unbind_ops,
- .vma = vma,
- .tile_id = tile->id,
- },
- };
- struct xe_vm *vm = xe_vma_vm(vma);
- u32 num_entries;
- struct dma_fence *fence = NULL;
- struct invalidation_fence *ifence;
+ struct xe_vm *vm = vops->vm;
+ struct xe_vm_pgtable_update_ops *pt_update_ops =
+ &vops->pt_update_ops[tile->id];
+ struct dma_fence *fence;
+ struct invalidation_fence *ifence = NULL;
struct xe_range_fence *rfence;
- int err;
-
- LLIST_HEAD(deferred);
+ struct xe_vma_op *op;
+ int err = 0, i;
+ struct xe_migrate_pt_update update = {
+ .ops = pt_update_ops->needs_userptr_lock ?
+ &userptr_migrate_ops :
+ &migrate_ops,
+ .vops = vops,
+ .tile_id = tile->id,
+ };
- xe_bo_assert_held(xe_vma_bo(vma));
+ lockdep_assert_held(&vm->lock);
xe_vm_assert_held(vm);
- vm_dbg(&xe_vma_vm(vma)->xe->drm,
- "Preparing unbind, with range [%llx...%llx) engine %p.\n",
- xe_vma_start(vma), xe_vma_end(vma), q);
-
- num_entries = xe_pt_stage_unbind(tile, vma, entries);
- xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries));
+ if (!pt_update_ops->current_op) {
+ xe_tile_assert(tile, xe_vm_in_fault_mode(vm));
- xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries);
- xe_pt_calc_rfence_interval(vma, &unbind_pt_update, entries,
- num_entries);
+ return dma_fence_get_stub();
+ }
- err = dma_resv_reserve_fences(xe_vm_resv(vm), 1);
- if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
- err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1);
- if (err)
- return ERR_PTR(err);
+#ifdef TEST_VM_OPS_ERROR
+ if (vops->inject_error &&
+ vm->xe->vm_inject_error_position == FORCE_OP_ERROR_RUN)
+ return ERR_PTR(-ENOSPC);
+#endif
- ifence = kzalloc(sizeof(*ifence), GFP_KERNEL);
- if (!ifence)
- return ERR_PTR(-ENOMEM);
+ if (pt_update_ops->needs_invalidation) {
+ ifence = kzalloc(sizeof(*ifence), GFP_KERNEL);
+ if (!ifence) {
+ err = -ENOMEM;
+ goto kill_vm_tile1;
+ }
+ }
rfence = kzalloc(sizeof(*rfence), GFP_KERNEL);
if (!rfence) {
- kfree(ifence);
- return ERR_PTR(-ENOMEM);
+ err = -ENOMEM;
+ goto free_ifence;
}
- /*
- * Even if we were already evicted and unbind to destroy, we need to
- * clear again here. The eviction may have updated pagetables at a
- * lower level, because it needs to be more conservative.
- */
- fence = xe_migrate_update_pgtables(tile->migrate,
- vm, NULL, q ? q :
- vm->q[tile->id],
- entries, num_entries,
- syncs, num_syncs,
- &unbind_pt_update.base);
- if (!IS_ERR(fence)) {
- int err;
-
- err = xe_range_fence_insert(&vm->rftree[tile->id], rfence,
- &xe_range_fence_kfree_ops,
- unbind_pt_update.base.start,
- unbind_pt_update.base.last, fence);
- if (err)
- dma_fence_wait(fence, false);
+ fence = xe_migrate_update_pgtables(tile->migrate, &update);
+ if (IS_ERR(fence)) {
+ err = PTR_ERR(fence);
+ goto free_rfence;
+ }
- /* TLB invalidation must be done before signaling unbind */
- err = invalidation_fence_init(tile->primary_gt, ifence, fence,
- xe_vma_start(vma),
- xe_vma_end(vma),
- xe_vma_vm(vma)->usm.asid);
- if (err) {
- dma_fence_put(fence);
- kfree(ifence);
- return ERR_PTR(err);
- }
- fence = &ifence->base.base;
+ /* Point of no return - VM killed if failure after this */
+ for (i = 0; i < pt_update_ops->current_op; ++i) {
+ struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[i];
- /* add shared fence now for pagetable delayed destroy */
- dma_resv_add_fence(xe_vm_resv(vm), fence,
- DMA_RESV_USAGE_BOOKKEEP);
+ xe_pt_commit(pt_op->vma, pt_op->entries,
+ pt_op->num_entries, &pt_update_ops->deferred);
+ pt_op->vma = NULL; /* skip in xe_pt_update_ops_abort */
+ }
- /* This fence will be installed by caller when doing eviction */
- if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
- dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence,
- DMA_RESV_USAGE_BOOKKEEP);
- xe_pt_commit_unbind(vma, entries, num_entries,
- unbind_pt_update.locked ? &deferred : NULL);
- vma->tile_present &= ~BIT(tile->id);
- } else {
- kfree(rfence);
- kfree(ifence);
+ if (xe_range_fence_insert(&vm->rftree[tile->id], rfence,
+ &xe_range_fence_kfree_ops,
+ pt_update_ops->start,
+ pt_update_ops->last, fence))
+ dma_fence_wait(fence, false);
+
+ /* tlb invalidation must be done before signaling rebind */
+ if (ifence) {
+ invalidation_fence_init(tile->primary_gt, ifence, fence,
+ pt_update_ops->start,
+ pt_update_ops->last, vm->usm.asid);
+ fence = &ifence->base.base;
}
- if (!vma->tile_present)
- list_del_init(&vma->combined_links.rebind);
+ dma_resv_add_fence(xe_vm_resv(vm), fence,
+ pt_update_ops->wait_vm_bookkeep ?
+ DMA_RESV_USAGE_KERNEL :
+ DMA_RESV_USAGE_BOOKKEEP);
- if (unbind_pt_update.locked) {
- xe_tile_assert(tile, xe_vma_is_userptr(vma));
+ list_for_each_entry(op, &vops->list, link)
+ op_commit(vops->vm, tile, pt_update_ops, op, fence);
- if (!vma->tile_present) {
- spin_lock(&vm->userptr.invalidated_lock);
- list_del_init(&to_userptr_vma(vma)->userptr.invalidate_link);
- spin_unlock(&vm->userptr.invalidated_lock);
- }
+ if (pt_update_ops->needs_userptr_lock)
up_read(&vm->userptr.notifier_lock);
- xe_bo_put_commit(&deferred);
- }
return fence;
+
+free_rfence:
+ kfree(rfence);
+free_ifence:
+ kfree(ifence);
+kill_vm_tile1:
+ if (err != -EAGAIN && tile->id)
+ xe_vm_kill(vops->vm, false);
+
+ return ERR_PTR(err);
+}
+
+/**
+ * xe_pt_update_ops_fini() - Finish PT update operations
+ * @tile: Tile of PT update operations
+ * @vops: VMA operations
+ *
+ * Finish PT update operations by committing to destroy page table memory
+ */
+void xe_pt_update_ops_fini(struct xe_tile *tile, struct xe_vma_ops *vops)
+{
+ struct xe_vm_pgtable_update_ops *pt_update_ops =
+ &vops->pt_update_ops[tile->id];
+ int i;
+
+ lockdep_assert_held(&vops->vm->lock);
+ xe_vm_assert_held(vops->vm);
+
+ for (i = 0; i < pt_update_ops->current_op; ++i) {
+ struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[i];
+
+ xe_pt_free_bind(pt_op->entries, pt_op->num_entries);
+ }
+ xe_bo_put_commit(&vops->pt_update_ops[tile->id].deferred);
+}
+
+/**
+ * xe_pt_update_ops_abort() - Abort PT update operations
+ * @tile: Tile of PT update operations
+ * @vops: VMA operationa
+ *
+ * Abort PT update operations by unwinding internal PT state
+ */
+void xe_pt_update_ops_abort(struct xe_tile *tile, struct xe_vma_ops *vops)
+{
+ struct xe_vm_pgtable_update_ops *pt_update_ops =
+ &vops->pt_update_ops[tile->id];
+ int i;
+
+ lockdep_assert_held(&vops->vm->lock);
+ xe_vm_assert_held(vops->vm);
+
+ for (i = pt_update_ops->num_ops - 1; i >= 0; --i) {
+ struct xe_vm_pgtable_update_op *pt_op =
+ &pt_update_ops->ops[i];
+
+ if (!pt_op->vma || i >= pt_update_ops->current_op)
+ continue;
+
+ if (pt_op->bind)
+ xe_pt_abort_bind(pt_op->vma, pt_op->entries,
+ pt_op->num_entries,
+ pt_op->rebind);
+ else
+ xe_pt_abort_unbind(pt_op->vma, pt_op->entries,
+ pt_op->num_entries);
+ }
+
+ xe_bo_put_commit(&vops->pt_update_ops[tile->id].deferred);
}
diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h
index 71a4fbfcff43..9ab386431cad 100644
--- a/drivers/gpu/drm/xe/xe_pt.h
+++ b/drivers/gpu/drm/xe/xe_pt.h
@@ -17,6 +17,7 @@ struct xe_sync_entry;
struct xe_tile;
struct xe_vm;
struct xe_vma;
+struct xe_vma_ops;
/* Largest huge pte is currently 1GiB. May become device dependent. */
#define MAX_HUGEPTE_LEVEL 2
@@ -34,14 +35,11 @@ void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm,
void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred);
-struct dma_fence *
-__xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q,
- struct xe_sync_entry *syncs, u32 num_syncs,
- bool rebind);
-
-struct dma_fence *
-__xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q,
- struct xe_sync_entry *syncs, u32 num_syncs);
+int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops);
+struct dma_fence *xe_pt_update_ops_run(struct xe_tile *tile,
+ struct xe_vma_ops *vops);
+void xe_pt_update_ops_fini(struct xe_tile *tile, struct xe_vma_ops *vops);
+void xe_pt_update_ops_abort(struct xe_tile *tile, struct xe_vma_ops *vops);
bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma);
diff --git a/drivers/gpu/drm/xe/xe_pt_types.h b/drivers/gpu/drm/xe/xe_pt_types.h
index cee70cb0f014..384cc04de719 100644
--- a/drivers/gpu/drm/xe/xe_pt_types.h
+++ b/drivers/gpu/drm/xe/xe_pt_types.h
@@ -74,4 +74,52 @@ struct xe_vm_pgtable_update {
u32 flags;
};
+/** struct xe_vm_pgtable_update_op - Page table update operation */
+struct xe_vm_pgtable_update_op {
+ /** @entries: entries to update for this operation */
+ struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1];
+ /** @vma: VMA for operation, operation not valid if NULL */
+ struct xe_vma *vma;
+ /** @num_entries: number of entries for this update operation */
+ u32 num_entries;
+ /** @bind: is a bind */
+ bool bind;
+ /** @rebind: is a rebind */
+ bool rebind;
+};
+
+/** struct xe_vm_pgtable_update_ops: page table update operations */
+struct xe_vm_pgtable_update_ops {
+ /** @ops: operations */
+ struct xe_vm_pgtable_update_op *ops;
+ /** @deferred: deferred list to destroy PT entries */
+ struct llist_head deferred;
+ /** @q: exec queue for PT operations */
+ struct xe_exec_queue *q;
+ /** @start: start address of ops */
+ u64 start;
+ /** @last: last address of ops */
+ u64 last;
+ /** @num_ops: number of operations */
+ u32 num_ops;
+ /** @current_op: current operations */
+ u32 current_op;
+ /** @needs_userptr_lock: Needs userptr lock */
+ bool needs_userptr_lock;
+ /** @needs_invalidation: Needs invalidation */
+ bool needs_invalidation;
+ /**
+ * @wait_vm_bookkeep: PT operations need to wait until VM is idle
+ * (bookkeep dma-resv slots are idle) and stage all future VM activity
+ * behind these operations (install PT operations into VM kernel
+ * dma-resv slot).
+ */
+ bool wait_vm_bookkeep;
+ /**
+ * @wait_vm_kernel: PT operations need to wait until VM kernel dma-resv
+ * slots are idle.
+ */
+ bool wait_vm_kernel;
+};
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 4e01df6b1b7a..73ef6e4c2dc9 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -518,7 +518,9 @@ static int query_gt_topology(struct xe_device *xe,
if (err)
return err;
- topo.type = DRM_XE_TOPO_EU_PER_DSS;
+ topo.type = gt->fuse_topo.eu_type == XE_GT_EU_TYPE_SIMD16 ?
+ DRM_XE_TOPO_SIMD16_EU_PER_DSS :
+ DRM_XE_TOPO_EU_PER_DSS;
err = copy_mask(&query_ptr, &topo,
gt->fuse_topo.eu_mask_per_dss,
sizeof(gt->fuse_topo.eu_mask_per_dss));
diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c
index 02e28274282f..e78ba324dd18 100644
--- a/drivers/gpu/drm/xe/xe_rtp.c
+++ b/drivers/gpu/drm/xe/xe_rtp.c
@@ -217,21 +217,19 @@ void xe_rtp_process_ctx_enable_active_tracking(struct xe_rtp_process_ctx *ctx,
ctx->active_entries = active_entries;
ctx->n_entries = n_entries;
}
+EXPORT_SYMBOL_IF_KUNIT(xe_rtp_process_ctx_enable_active_tracking);
static void rtp_mark_active(struct xe_device *xe,
struct xe_rtp_process_ctx *ctx,
- unsigned int first, unsigned int last)
+ unsigned int idx)
{
if (!ctx->active_entries)
return;
- if (drm_WARN_ON(&xe->drm, last > ctx->n_entries))
+ if (drm_WARN_ON(&xe->drm, idx >= ctx->n_entries))
return;
- if (first == last)
- bitmap_set(ctx->active_entries, first, 1);
- else
- bitmap_set(ctx->active_entries, first, last - first + 2);
+ bitmap_set(ctx->active_entries, idx, 1);
}
/**
@@ -276,8 +274,7 @@ void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx,
}
if (match)
- rtp_mark_active(xe, ctx, entry - entries,
- entry - entries);
+ rtp_mark_active(xe, ctx, entry - entries);
}
}
EXPORT_SYMBOL_IF_KUNIT(xe_rtp_process_to_sr);
@@ -288,44 +285,29 @@ EXPORT_SYMBOL_IF_KUNIT(xe_rtp_process_to_sr);
* @entries: Table with RTP definitions
*
* Walk the table pointed by @entries (with an empty sentinel), executing the
- * rules. A few differences from xe_rtp_process_to_sr():
- *
- * 1. There is no action associated with each entry since this uses
- * struct xe_rtp_entry. Its main use is for marking active workarounds via
- * xe_rtp_process_ctx_enable_active_tracking().
- * 2. There is support for OR operations by having entries with no name.
+ * rules. One difference from xe_rtp_process_to_sr(): there is no action
+ * associated with each entry since this uses struct xe_rtp_entry. Its main use
+ * is for marking active workarounds via
+ * xe_rtp_process_ctx_enable_active_tracking().
*/
void xe_rtp_process(struct xe_rtp_process_ctx *ctx,
const struct xe_rtp_entry *entries)
{
- const struct xe_rtp_entry *entry, *first_entry;
+ const struct xe_rtp_entry *entry;
struct xe_hw_engine *hwe;
struct xe_gt *gt;
struct xe_device *xe;
rtp_get_context(ctx, &hwe, &gt, &xe);
- first_entry = entries;
- if (drm_WARN_ON(&xe->drm, !first_entry->name))
- return;
-
for (entry = entries; entry && entry->rules; entry++) {
- if (entry->name)
- first_entry = entry;
-
if (!rule_matches(xe, gt, hwe, entry->rules, entry->n_rules))
continue;
- /* Fast-forward entry, eliminating the OR'ed entries */
- for (entry++; entry && entry->rules; entry++)
- if (entry->name)
- break;
- entry--;
-
- rtp_mark_active(xe, ctx, first_entry - entries,
- entry - entries);
+ rtp_mark_active(xe, ctx, entry - entries);
}
}
+EXPORT_SYMBOL_IF_KUNIT(xe_rtp_process);
bool xe_rtp_match_even_instance(const struct xe_gt *gt,
const struct xe_hw_engine *hwe)
diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h
index ad446731192c..827d932b6908 100644
--- a/drivers/gpu/drm/xe/xe_rtp.h
+++ b/drivers/gpu/drm/xe/xe_rtp.h
@@ -374,7 +374,7 @@ struct xe_reg_sr;
* XE_RTP_RULES - Helper to set multiple rules to a struct xe_rtp_entry_sr entry
* @...: Rules
*
- * At least one rule is needed and up to 6 are supported. Multiple rules are
+ * At least one rule is needed and up to 12 are supported. Multiple rules are
* AND'ed together, i.e. all the rules must evaluate to true for the entry to
* be processed. See XE_RTP_MATCH_* for the possible match rules. Example:
*
@@ -399,7 +399,7 @@ struct xe_reg_sr;
* XE_RTP_ACTIONS - Helper to set multiple actions to a struct xe_rtp_entry_sr
* @...: Actions to be taken
*
- * At least one action is needed and up to 6 are supported. See XE_RTP_ACTION_*
+ * At least one action is needed and up to 12 are supported. See XE_RTP_ACTION_*
* for the possible actions. Example:
*
* .. code-block:: c
diff --git a/drivers/gpu/drm/xe/xe_rtp_helpers.h b/drivers/gpu/drm/xe/xe_rtp_helpers.h
index c59e40fd7fff..a33b0ae98bbc 100644
--- a/drivers/gpu/drm/xe/xe_rtp_helpers.h
+++ b/drivers/gpu/drm/xe/xe_rtp_helpers.h
@@ -60,6 +60,12 @@
#define XE_RTP_PASTE_4(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_3(prefix_, sep_, _XE_TUPLE_TAIL args_)
#define XE_RTP_PASTE_5(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_4(prefix_, sep_, _XE_TUPLE_TAIL args_)
#define XE_RTP_PASTE_6(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_5(prefix_, sep_, _XE_TUPLE_TAIL args_)
+#define XE_RTP_PASTE_7(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_6(prefix_, sep_, _XE_TUPLE_TAIL args_)
+#define XE_RTP_PASTE_8(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_7(prefix_, sep_, _XE_TUPLE_TAIL args_)
+#define XE_RTP_PASTE_9(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_8(prefix_, sep_, _XE_TUPLE_TAIL args_)
+#define XE_RTP_PASTE_10(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_9(prefix_, sep_, _XE_TUPLE_TAIL args_)
+#define XE_RTP_PASTE_11(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_10(prefix_, sep_, _XE_TUPLE_TAIL args_)
+#define XE_RTP_PASTE_12(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_11(prefix_, sep_, _XE_TUPLE_TAIL args_)
/*
* XE_RTP_DROP_CAST - Drop cast to convert a compound statement to a initializer
diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c
index 8941522b7705..f3060979e63f 100644
--- a/drivers/gpu/drm/xe/xe_sa.c
+++ b/drivers/gpu/drm/xe/xe_sa.c
@@ -84,6 +84,13 @@ struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32
struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager,
unsigned int size)
{
+ /*
+ * BB to large, return -ENOBUFS indicating user should split
+ * array of binds into smaller chunks.
+ */
+ if (size > sa_manager->base.size)
+ return ERR_PTR(-ENOBUFS);
+
return drm_suballoc_new(&sa_manager->base, size, GFP_KERNEL, true, 0);
}
diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c
index a274a5fb1401..5a1d65e4f19f 100644
--- a/drivers/gpu/drm/xe/xe_sriov.c
+++ b/drivers/gpu/drm/xe/xe_sriov.c
@@ -5,7 +5,7 @@
#include <drm/drm_managed.h>
-#include "regs/xe_sriov_regs.h"
+#include "regs/xe_regs.h"
#include "xe_assert.h"
#include "xe_device.h"
diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c
index 2883d9aca404..533246f42256 100644
--- a/drivers/gpu/drm/xe/xe_sync.c
+++ b/drivers/gpu/drm/xe/xe_sync.c
@@ -53,14 +53,18 @@ static struct xe_user_fence *user_fence_create(struct xe_device *xe, u64 addr,
u64 value)
{
struct xe_user_fence *ufence;
+ u64 __user *ptr = u64_to_user_ptr(addr);
+
+ if (!access_ok(ptr, sizeof(ptr)))
+ return ERR_PTR(-EFAULT);
ufence = kmalloc(sizeof(*ufence), GFP_KERNEL);
if (!ufence)
- return NULL;
+ return ERR_PTR(-ENOMEM);
ufence->xe = xe;
kref_init(&ufence->refcount);
- ufence->addr = u64_to_user_ptr(addr);
+ ufence->addr = ptr;
ufence->value = value;
ufence->mm = current->mm;
mmgrab(ufence->mm);
@@ -183,8 +187,8 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
} else {
sync->ufence = user_fence_create(xe, sync_in.addr,
sync_in.timeline_value);
- if (XE_IOCTL_DBG(xe, !sync->ufence))
- return -ENOMEM;
+ if (XE_IOCTL_DBG(xe, IS_ERR(sync->ufence)))
+ return PTR_ERR(sync->ufence);
}
break;
@@ -200,14 +204,6 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
return 0;
}
-int xe_sync_entry_wait(struct xe_sync_entry *sync)
-{
- if (sync->fence)
- dma_fence_wait(sync->fence, true);
-
- return 0;
-}
-
int xe_sync_entry_add_deps(struct xe_sync_entry *sync, struct xe_sched_job *job)
{
int err;
diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h
index 006dbf780793..256ffc1e54dc 100644
--- a/drivers/gpu/drm/xe/xe_sync.h
+++ b/drivers/gpu/drm/xe/xe_sync.h
@@ -22,7 +22,6 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
struct xe_sync_entry *sync,
struct drm_xe_sync __user *sync_user,
unsigned int flags);
-int xe_sync_entry_wait(struct xe_sync_entry *sync);
int xe_sync_entry_add_deps(struct xe_sync_entry *sync,
struct xe_sched_job *job);
void xe_sync_entry_signal(struct xe_sync_entry *sync,
diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
index baba14fb1e32..1abdb30cb7ad 100644
--- a/drivers/gpu/drm/xe/xe_trace.h
+++ b/drivers/gpu/drm/xe/xe_trace.h
@@ -369,6 +369,58 @@ TRACE_EVENT(xe_reg_rw,
(u32)(__entry->val >> 32))
);
+DECLARE_EVENT_CLASS(xe_pm_runtime,
+ TP_PROTO(struct xe_device *xe, void *caller),
+ TP_ARGS(xe, caller),
+
+ TP_STRUCT__entry(
+ __string(dev, __dev_name_xe(xe))
+ __field(void *, caller)
+ ),
+
+ TP_fast_assign(
+ __assign_str(dev);
+ __entry->caller = caller;
+ ),
+
+ TP_printk("dev=%s caller_function=%pS", __get_str(dev), __entry->caller)
+);
+
+DEFINE_EVENT(xe_pm_runtime, xe_pm_runtime_get,
+ TP_PROTO(struct xe_device *xe, void *caller),
+ TP_ARGS(xe, caller)
+);
+
+DEFINE_EVENT(xe_pm_runtime, xe_pm_runtime_put,
+ TP_PROTO(struct xe_device *xe, void *caller),
+ TP_ARGS(xe, caller)
+);
+
+DEFINE_EVENT(xe_pm_runtime, xe_pm_resume,
+ TP_PROTO(struct xe_device *xe, void *caller),
+ TP_ARGS(xe, caller)
+);
+
+DEFINE_EVENT(xe_pm_runtime, xe_pm_suspend,
+ TP_PROTO(struct xe_device *xe, void *caller),
+ TP_ARGS(xe, caller)
+);
+
+DEFINE_EVENT(xe_pm_runtime, xe_pm_runtime_resume,
+ TP_PROTO(struct xe_device *xe, void *caller),
+ TP_ARGS(xe, caller)
+);
+
+DEFINE_EVENT(xe_pm_runtime, xe_pm_runtime_suspend,
+ TP_PROTO(struct xe_device *xe, void *caller),
+ TP_ARGS(xe, caller)
+);
+
+DEFINE_EVENT(xe_pm_runtime, xe_pm_runtime_get_ioctl,
+ TP_PROTO(struct xe_device *xe, void *caller),
+ TP_ARGS(xe, caller)
+);
+
#endif
/* This part must be outside protection */
diff --git a/drivers/gpu/drm/xe/xe_trace_bo.h b/drivers/gpu/drm/xe/xe_trace_bo.h
index f39f09ed3495..9b1a1d4304ae 100644
--- a/drivers/gpu/drm/xe/xe_trace_bo.h
+++ b/drivers/gpu/drm/xe/xe_trace_bo.h
@@ -117,11 +117,6 @@ DEFINE_EVENT(xe_vma, xe_vma_acc,
TP_ARGS(vma)
);
-DEFINE_EVENT(xe_vma, xe_vma_fail,
- TP_PROTO(struct xe_vma *vma),
- TP_ARGS(vma)
-);
-
DEFINE_EVENT(xe_vma, xe_vma_bind,
TP_PROTO(struct xe_vma *vma),
TP_ARGS(vma)
@@ -237,6 +232,11 @@ DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_exit,
TP_ARGS(vm)
);
+DEFINE_EVENT(xe_vm, xe_vm_ops_fail,
+ TP_PROTO(struct xe_vm *vm),
+ TP_ARGS(vm)
+);
+
#endif
/* This part must be outside protection */
diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c
index d4e6fa918942..77d4eec0118d 100644
--- a/drivers/gpu/drm/xe/xe_tuning.c
+++ b/drivers/gpu/drm/xe/xe_tuning.c
@@ -93,6 +93,14 @@ static const struct xe_rtp_entry_sr lrc_tunings[] = {
REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
},
+ /* Xe2_HPG */
+
+ { XE_RTP_NAME("Tuning: vs hit max value"),
+ XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
+ XE_RTP_ACTIONS(FIELD_SET(FF_MODE, VS_HIT_MAX_VALUE_MASK,
+ REG_FIELD_PREP(VS_HIT_MAX_VALUE_MASK, 0x3f)))
+ },
+
{}
};
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index 5f23ecd98376..5b70d23724c4 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -116,6 +116,8 @@ struct fw_blobs_by_type {
fw_def(TIGERLAKE, major_ver(i915, guc, tgl, 70, 19, 2))
#define XE_HUC_FIRMWARE_DEFS(fw_def, mmp_ver, no_ver) \
+ fw_def(BATTLEMAGE, no_ver(xe, huc, bmg)) \
+ fw_def(LUNARLAKE, no_ver(xe, huc, lnl)) \
fw_def(METEORLAKE, no_ver(i915, huc_gsc, mtl)) \
fw_def(DG1, no_ver(i915, huc, dg1)) \
fw_def(ALDERLAKE_P, no_ver(i915, huc, tgl)) \
@@ -125,6 +127,7 @@ struct fw_blobs_by_type {
/* for the GSC FW we match the compatibility version and not the release one */
#define XE_GSC_FIRMWARE_DEFS(fw_def, major_ver) \
+ fw_def(LUNARLAKE, major_ver(xe, gsc, lnl, 1, 0, 0)) \
fw_def(METEORLAKE, major_ver(i915, gsc, mtl, 1, 0, 0))
#define MAKE_FW_PATH(dir__, uc__, shortname__, version__) \
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 5b166fa03684..f225107bdd65 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -133,8 +133,10 @@ static int wait_for_existing_preempt_fences(struct xe_vm *vm)
if (q->lr.pfence) {
long timeout = dma_fence_wait(q->lr.pfence, false);
- if (timeout < 0)
+ /* Only -ETIME on fence indicates VM needs to be killed */
+ if (timeout < 0 || q->lr.pfence->error == -ETIME)
return -ETIME;
+
dma_fence_put(q->lr.pfence);
q->lr.pfence = NULL;
}
@@ -311,7 +313,15 @@ int __xe_vm_userptr_needs_repin(struct xe_vm *vm)
#define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
-static void xe_vm_kill(struct xe_vm *vm, bool unlocked)
+/**
+ * xe_vm_kill() - VM Kill
+ * @vm: The VM.
+ * @unlocked: Flag indicates the VM's dma-resv is not held
+ *
+ * Kill the VM by setting banned flag indicated VM is no longer available for
+ * use. If in preempt fence mode, also kill all exec queue attached to the VM.
+ */
+void xe_vm_kill(struct xe_vm *vm, bool unlocked)
{
struct xe_exec_queue *q;
@@ -708,6 +718,42 @@ int xe_vm_userptr_check_repin(struct xe_vm *vm)
list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
}
+static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds)
+{
+ int i;
+
+ for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) {
+ if (!vops->pt_update_ops[i].num_ops)
+ continue;
+
+ vops->pt_update_ops[i].ops =
+ kmalloc_array(vops->pt_update_ops[i].num_ops,
+ sizeof(*vops->pt_update_ops[i].ops),
+ GFP_KERNEL);
+ if (!vops->pt_update_ops[i].ops)
+ return array_of_binds ? -ENOBUFS : -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void xe_vma_ops_fini(struct xe_vma_ops *vops)
+{
+ int i;
+
+ for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
+ kfree(vops->pt_update_ops[i].ops);
+}
+
+static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask)
+{
+ int i;
+
+ for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
+ if (BIT(i) & tile_mask)
+ ++vops->pt_update_ops[i].num_ops;
+}
+
static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma,
u8 tile_mask)
{
@@ -735,6 +781,7 @@ static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma,
xe_vm_populate_rebind(op, vma, tile_mask);
list_add_tail(&op->link, &vops->list);
+ xe_vma_ops_incr_pt_update_ops(vops, tile_mask);
return 0;
}
@@ -751,7 +798,7 @@ int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
struct xe_vma *vma, *next;
struct xe_vma_ops vops;
struct xe_vma_op *op, *next_op;
- int err;
+ int err, i;
lockdep_assert_held(&vm->lock);
if ((xe_vm_in_lr_mode(vm) && !rebind_worker) ||
@@ -759,6 +806,8 @@ int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
return 0;
xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
+ for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
+ vops.pt_update_ops[i].wait_vm_bookkeep = true;
xe_vm_assert_held(vm);
list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) {
@@ -775,6 +824,10 @@ int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
goto free_ops;
}
+ err = xe_vma_ops_alloc(&vops, false);
+ if (err)
+ goto free_ops;
+
fence = ops_execute(vm, &vops);
if (IS_ERR(fence)) {
err = PTR_ERR(fence);
@@ -789,6 +842,7 @@ free_ops:
list_del(&op->link);
kfree(op);
}
+ xe_vma_ops_fini(&vops);
return err;
}
@@ -798,6 +852,8 @@ struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_ma
struct dma_fence *fence = NULL;
struct xe_vma_ops vops;
struct xe_vma_op *op, *next_op;
+ struct xe_tile *tile;
+ u8 id;
int err;
lockdep_assert_held(&vm->lock);
@@ -805,17 +861,30 @@ struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_ma
xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
+ for_each_tile(tile, vm->xe, id) {
+ vops.pt_update_ops[id].wait_vm_bookkeep = true;
+ vops.pt_update_ops[tile->id].q =
+ xe_tile_migrate_exec_queue(tile);
+ }
err = xe_vm_ops_add_rebind(&vops, vma, tile_mask);
if (err)
return ERR_PTR(err);
+ err = xe_vma_ops_alloc(&vops, false);
+ if (err) {
+ fence = ERR_PTR(err);
+ goto free_ops;
+ }
+
fence = ops_execute(vm, &vops);
+free_ops:
list_for_each_entry_safe(op, next_op, &vops.list, link) {
list_del(&op->link);
kfree(op);
}
+ xe_vma_ops_fini(&vops);
return fence;
}
@@ -1601,6 +1670,10 @@ static void vm_destroy_work_func(struct work_struct *w)
XE_WARN_ON(vm->pt_root[id]);
trace_xe_vm_free(vm);
+
+ if (vm->xef)
+ xe_file_put(vm->xef);
+
kfree(vm);
}
@@ -1637,147 +1710,6 @@ to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
return q ? q : vm->q[0];
}
-static struct dma_fence *
-xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
- struct xe_sync_entry *syncs, u32 num_syncs,
- bool first_op, bool last_op)
-{
- struct xe_vm *vm = xe_vma_vm(vma);
- struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
- struct xe_tile *tile;
- struct dma_fence *fence = NULL;
- struct dma_fence **fences = NULL;
- struct dma_fence_array *cf = NULL;
- int cur_fence = 0;
- int number_tiles = hweight8(vma->tile_present);
- int err;
- u8 id;
-
- trace_xe_vma_unbind(vma);
-
- if (number_tiles > 1) {
- fences = kmalloc_array(number_tiles, sizeof(*fences),
- GFP_KERNEL);
- if (!fences)
- return ERR_PTR(-ENOMEM);
- }
-
- for_each_tile(tile, vm->xe, id) {
- if (!(vma->tile_present & BIT(id)))
- goto next;
-
- fence = __xe_pt_unbind_vma(tile, vma, q ? q : vm->q[id],
- first_op ? syncs : NULL,
- first_op ? num_syncs : 0);
- if (IS_ERR(fence)) {
- err = PTR_ERR(fence);
- goto err_fences;
- }
-
- if (fences)
- fences[cur_fence++] = fence;
-
-next:
- if (q && vm->pt_root[id] && !list_empty(&q->multi_gt_list))
- q = list_next_entry(q, multi_gt_list);
- }
-
- if (fences) {
- cf = dma_fence_array_create(number_tiles, fences,
- vm->composite_fence_ctx,
- vm->composite_fence_seqno++,
- false);
- if (!cf) {
- --vm->composite_fence_seqno;
- err = -ENOMEM;
- goto err_fences;
- }
- }
-
- fence = cf ? &cf->base : !fence ?
- xe_exec_queue_last_fence_get(wait_exec_queue, vm) : fence;
-
- return fence;
-
-err_fences:
- if (fences) {
- while (cur_fence)
- dma_fence_put(fences[--cur_fence]);
- kfree(fences);
- }
-
- return ERR_PTR(err);
-}
-
-static struct dma_fence *
-xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
- struct xe_sync_entry *syncs, u32 num_syncs,
- u8 tile_mask, bool first_op, bool last_op)
-{
- struct xe_tile *tile;
- struct dma_fence *fence;
- struct dma_fence **fences = NULL;
- struct dma_fence_array *cf = NULL;
- struct xe_vm *vm = xe_vma_vm(vma);
- int cur_fence = 0;
- int number_tiles = hweight8(tile_mask);
- int err;
- u8 id;
-
- trace_xe_vma_bind(vma);
-
- if (number_tiles > 1) {
- fences = kmalloc_array(number_tiles, sizeof(*fences),
- GFP_KERNEL);
- if (!fences)
- return ERR_PTR(-ENOMEM);
- }
-
- for_each_tile(tile, vm->xe, id) {
- if (!(tile_mask & BIT(id)))
- goto next;
-
- fence = __xe_pt_bind_vma(tile, vma, q ? q : vm->q[id],
- first_op ? syncs : NULL,
- first_op ? num_syncs : 0,
- vma->tile_present & BIT(id));
- if (IS_ERR(fence)) {
- err = PTR_ERR(fence);
- goto err_fences;
- }
-
- if (fences)
- fences[cur_fence++] = fence;
-
-next:
- if (q && vm->pt_root[id] && !list_empty(&q->multi_gt_list))
- q = list_next_entry(q, multi_gt_list);
- }
-
- if (fences) {
- cf = dma_fence_array_create(number_tiles, fences,
- vm->composite_fence_ctx,
- vm->composite_fence_seqno++,
- false);
- if (!cf) {
- --vm->composite_fence_seqno;
- err = -ENOMEM;
- goto err_fences;
- }
- }
-
- return cf ? &cf->base : fence;
-
-err_fences:
- if (fences) {
- while (cur_fence)
- dma_fence_put(fences[--cur_fence]);
- kfree(fences);
- }
-
- return ERR_PTR(err);
-}
-
static struct xe_user_fence *
find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs)
{
@@ -1793,48 +1725,6 @@ find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs)
return NULL;
}
-static struct dma_fence *
-xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q,
- struct xe_bo *bo, struct xe_sync_entry *syncs, u32 num_syncs,
- u8 tile_mask, bool immediate, bool first_op, bool last_op)
-{
- struct dma_fence *fence;
- struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
-
- xe_vm_assert_held(vm);
- xe_bo_assert_held(bo);
-
- if (immediate) {
- fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, tile_mask,
- first_op, last_op);
- if (IS_ERR(fence))
- return fence;
- } else {
- xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
-
- fence = xe_exec_queue_last_fence_get(wait_exec_queue, vm);
- }
-
- return fence;
-}
-
-static struct dma_fence *
-xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
- struct xe_exec_queue *q, struct xe_sync_entry *syncs,
- u32 num_syncs, bool first_op, bool last_op)
-{
- struct dma_fence *fence;
-
- xe_vm_assert_held(vm);
- xe_bo_assert_held(xe_vma_bo(vma));
-
- fence = xe_vm_unbind_vma(vma, q, syncs, num_syncs, first_op, last_op);
- if (IS_ERR(fence))
- return fence;
-
- return fence;
-}
-
#define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \
DRM_XE_VM_CREATE_FLAG_LR_MODE | \
DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
@@ -1916,7 +1806,7 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
}
args->vm_id = id;
- vm->xef = xef;
+ vm->xef = xe_file_get(xef);
/* Record BO memory for VM pagetable created against client */
for_each_tile(tile, xe, id)
@@ -1975,21 +1865,6 @@ static const u32 region_to_mem_type[] = {
XE_PL_VRAM1,
};
-static struct dma_fence *
-xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
- struct xe_exec_queue *q, struct xe_sync_entry *syncs,
- u32 num_syncs, bool first_op, bool last_op)
-{
- struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
-
- if (vma->tile_mask != (vma->tile_present & ~vma->tile_invalidated)) {
- return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs, num_syncs,
- vma->tile_mask, true, first_op, last_op);
- } else {
- return xe_exec_queue_last_fence_get(wait_exec_queue, vm);
- }
-}
-
static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma,
bool post_commit)
{
@@ -2277,14 +2152,10 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
return err;
}
-
-static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
- struct drm_gpuva_ops *ops,
- struct xe_sync_entry *syncs, u32 num_syncs,
- struct xe_vma_ops *vops, bool last)
+static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
+ struct xe_vma_ops *vops)
{
struct xe_device *xe = vm->xe;
- struct xe_vma_op *last_op = NULL;
struct drm_gpuva_op *__op;
struct xe_tile *tile;
u8 id, tile_mask = 0;
@@ -2298,19 +2169,10 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
drm_gpuva_for_each_op(__op, ops) {
struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
struct xe_vma *vma;
- bool first = list_empty(&vops->list);
unsigned int flags = 0;
INIT_LIST_HEAD(&op->link);
list_add_tail(&op->link, &vops->list);
-
- if (first) {
- op->flags |= XE_VMA_OP_FIRST;
- op->num_syncs = num_syncs;
- op->syncs = syncs;
- }
-
- op->q = q;
op->tile_mask = tile_mask;
switch (op->base.op) {
@@ -2329,6 +2191,9 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
return PTR_ERR(vma);
op->map.vma = vma;
+ if (op->map.immediate || !xe_vm_in_fault_mode(vm))
+ xe_vma_ops_incr_pt_update_ops(vops,
+ op->tile_mask);
break;
}
case DRM_GPUVA_OP_REMAP:
@@ -2373,6 +2238,8 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx",
(ULL)op->remap.start,
(ULL)op->remap.range);
+ } else {
+ xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
}
}
@@ -2409,203 +2276,30 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx",
(ULL)op->remap.start,
(ULL)op->remap.range);
+ } else {
+ xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
}
}
+ xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
break;
}
case DRM_GPUVA_OP_UNMAP:
case DRM_GPUVA_OP_PREFETCH:
- /* Nothing to do */
+ /* FIXME: Need to skip some prefetch ops */
+ xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
break;
default:
drm_warn(&vm->xe->drm, "NOT POSSIBLE");
}
- last_op = op;
-
err = xe_vma_op_commit(vm, op);
if (err)
return err;
}
- /* FIXME: Unhandled corner case */
- XE_WARN_ON(!last_op && last && !list_empty(&vops->list));
-
- if (!last_op)
- return 0;
-
- if (last) {
- last_op->flags |= XE_VMA_OP_LAST;
- last_op->num_syncs = num_syncs;
- last_op->syncs = syncs;
- }
-
return 0;
}
-static struct dma_fence *op_execute(struct xe_vm *vm, struct xe_vma *vma,
- struct xe_vma_op *op)
-{
- struct dma_fence *fence = NULL;
-
- lockdep_assert_held(&vm->lock);
-
- xe_vm_assert_held(vm);
- xe_bo_assert_held(xe_vma_bo(vma));
-
- switch (op->base.op) {
- case DRM_GPUVA_OP_MAP:
- fence = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma),
- op->syncs, op->num_syncs,
- op->tile_mask,
- op->map.immediate || !xe_vm_in_fault_mode(vm),
- op->flags & XE_VMA_OP_FIRST,
- op->flags & XE_VMA_OP_LAST);
- break;
- case DRM_GPUVA_OP_REMAP:
- {
- bool prev = !!op->remap.prev;
- bool next = !!op->remap.next;
-
- if (!op->remap.unmap_done) {
- if (prev || next)
- vma->gpuva.flags |= XE_VMA_FIRST_REBIND;
- fence = xe_vm_unbind(vm, vma, op->q, op->syncs,
- op->num_syncs,
- op->flags & XE_VMA_OP_FIRST,
- op->flags & XE_VMA_OP_LAST &&
- !prev && !next);
- if (IS_ERR(fence))
- break;
- op->remap.unmap_done = true;
- }
-
- if (prev) {
- op->remap.prev->gpuva.flags |= XE_VMA_LAST_REBIND;
- dma_fence_put(fence);
- fence = xe_vm_bind(vm, op->remap.prev, op->q,
- xe_vma_bo(op->remap.prev), op->syncs,
- op->num_syncs,
- op->remap.prev->tile_mask, true,
- false,
- op->flags & XE_VMA_OP_LAST && !next);
- op->remap.prev->gpuva.flags &= ~XE_VMA_LAST_REBIND;
- if (IS_ERR(fence))
- break;
- op->remap.prev = NULL;
- }
-
- if (next) {
- op->remap.next->gpuva.flags |= XE_VMA_LAST_REBIND;
- dma_fence_put(fence);
- fence = xe_vm_bind(vm, op->remap.next, op->q,
- xe_vma_bo(op->remap.next),
- op->syncs, op->num_syncs,
- op->remap.next->tile_mask, true,
- false, op->flags & XE_VMA_OP_LAST);
- op->remap.next->gpuva.flags &= ~XE_VMA_LAST_REBIND;
- if (IS_ERR(fence))
- break;
- op->remap.next = NULL;
- }
-
- break;
- }
- case DRM_GPUVA_OP_UNMAP:
- fence = xe_vm_unbind(vm, vma, op->q, op->syncs,
- op->num_syncs, op->flags & XE_VMA_OP_FIRST,
- op->flags & XE_VMA_OP_LAST);
- break;
- case DRM_GPUVA_OP_PREFETCH:
- fence = xe_vm_prefetch(vm, vma, op->q, op->syncs, op->num_syncs,
- op->flags & XE_VMA_OP_FIRST,
- op->flags & XE_VMA_OP_LAST);
- break;
- default:
- drm_warn(&vm->xe->drm, "NOT POSSIBLE");
- }
-
- if (IS_ERR(fence))
- trace_xe_vma_fail(vma);
-
- return fence;
-}
-
-static struct dma_fence *
-__xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
- struct xe_vma_op *op)
-{
- struct dma_fence *fence;
- int err;
-
-retry_userptr:
- fence = op_execute(vm, vma, op);
- if (IS_ERR(fence) && PTR_ERR(fence) == -EAGAIN) {
- lockdep_assert_held_write(&vm->lock);
-
- if (op->base.op == DRM_GPUVA_OP_REMAP) {
- if (!op->remap.unmap_done)
- vma = gpuva_to_vma(op->base.remap.unmap->va);
- else if (op->remap.prev)
- vma = op->remap.prev;
- else
- vma = op->remap.next;
- }
-
- if (xe_vma_is_userptr(vma)) {
- err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
- if (!err)
- goto retry_userptr;
-
- fence = ERR_PTR(err);
- trace_xe_vma_fail(vma);
- }
- }
-
- return fence;
-}
-
-static struct dma_fence *
-xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op)
-{
- struct dma_fence *fence = ERR_PTR(-ENOMEM);
-
- lockdep_assert_held(&vm->lock);
-
- switch (op->base.op) {
- case DRM_GPUVA_OP_MAP:
- fence = __xe_vma_op_execute(vm, op->map.vma, op);
- break;
- case DRM_GPUVA_OP_REMAP:
- {
- struct xe_vma *vma;
-
- if (!op->remap.unmap_done)
- vma = gpuva_to_vma(op->base.remap.unmap->va);
- else if (op->remap.prev)
- vma = op->remap.prev;
- else
- vma = op->remap.next;
-
- fence = __xe_vma_op_execute(vm, vma, op);
- break;
- }
- case DRM_GPUVA_OP_UNMAP:
- fence = __xe_vma_op_execute(vm, gpuva_to_vma(op->base.unmap.va),
- op);
- break;
- case DRM_GPUVA_OP_PREFETCH:
- fence = __xe_vma_op_execute(vm,
- gpuva_to_vma(op->base.prefetch.va),
- op);
- break;
- default:
- drm_warn(&vm->xe->drm, "NOT POSSIBLE");
- }
-
- return fence;
-}
-
static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
bool post_commit, bool prev_post_commit,
bool next_post_commit)
@@ -2788,26 +2482,157 @@ static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
return err;
}
+#ifdef TEST_VM_OPS_ERROR
+ if (vops->inject_error &&
+ vm->xe->vm_inject_error_position == FORCE_OP_ERROR_LOCK)
+ return -ENOSPC;
+#endif
+
return 0;
}
+static void op_trace(struct xe_vma_op *op)
+{
+ switch (op->base.op) {
+ case DRM_GPUVA_OP_MAP:
+ trace_xe_vma_bind(op->map.vma);
+ break;
+ case DRM_GPUVA_OP_REMAP:
+ trace_xe_vma_unbind(gpuva_to_vma(op->base.remap.unmap->va));
+ if (op->remap.prev)
+ trace_xe_vma_bind(op->remap.prev);
+ if (op->remap.next)
+ trace_xe_vma_bind(op->remap.next);
+ break;
+ case DRM_GPUVA_OP_UNMAP:
+ trace_xe_vma_unbind(gpuva_to_vma(op->base.unmap.va));
+ break;
+ case DRM_GPUVA_OP_PREFETCH:
+ trace_xe_vma_bind(gpuva_to_vma(op->base.prefetch.va));
+ break;
+ default:
+ XE_WARN_ON("NOT POSSIBLE");
+ }
+}
+
+static void trace_xe_vm_ops_execute(struct xe_vma_ops *vops)
+{
+ struct xe_vma_op *op;
+
+ list_for_each_entry(op, &vops->list, link)
+ op_trace(op);
+}
+
+static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops)
+{
+ struct xe_exec_queue *q = vops->q;
+ struct xe_tile *tile;
+ int number_tiles = 0;
+ u8 id;
+
+ for_each_tile(tile, vm->xe, id) {
+ if (vops->pt_update_ops[id].num_ops)
+ ++number_tiles;
+
+ if (vops->pt_update_ops[id].q)
+ continue;
+
+ if (q) {
+ vops->pt_update_ops[id].q = q;
+ if (vm->pt_root[id] && !list_empty(&q->multi_gt_list))
+ q = list_next_entry(q, multi_gt_list);
+ } else {
+ vops->pt_update_ops[id].q = vm->q[id];
+ }
+ }
+
+ return number_tiles;
+}
+
static struct dma_fence *ops_execute(struct xe_vm *vm,
struct xe_vma_ops *vops)
{
- struct xe_vma_op *op, *next;
+ struct xe_tile *tile;
struct dma_fence *fence = NULL;
+ struct dma_fence **fences = NULL;
+ struct dma_fence_array *cf = NULL;
+ int number_tiles = 0, current_fence = 0, err;
+ u8 id;
- list_for_each_entry_safe(op, next, &vops->list, link) {
- dma_fence_put(fence);
- fence = xe_vma_op_execute(vm, op);
- if (IS_ERR(fence)) {
- drm_warn(&vm->xe->drm, "VM op(%d) failed with %ld",
- op->base.op, PTR_ERR(fence));
- fence = ERR_PTR(-ENOSPC);
- break;
+ number_tiles = vm_ops_setup_tile_args(vm, vops);
+ if (number_tiles == 0)
+ return ERR_PTR(-ENODATA);
+
+ if (number_tiles > 1) {
+ fences = kmalloc_array(number_tiles, sizeof(*fences),
+ GFP_KERNEL);
+ if (!fences) {
+ fence = ERR_PTR(-ENOMEM);
+ goto err_trace;
+ }
+ }
+
+ for_each_tile(tile, vm->xe, id) {
+ if (!vops->pt_update_ops[id].num_ops)
+ continue;
+
+ err = xe_pt_update_ops_prepare(tile, vops);
+ if (err) {
+ fence = ERR_PTR(err);
+ goto err_out;
}
}
+ trace_xe_vm_ops_execute(vops);
+
+ for_each_tile(tile, vm->xe, id) {
+ if (!vops->pt_update_ops[id].num_ops)
+ continue;
+
+ fence = xe_pt_update_ops_run(tile, vops);
+ if (IS_ERR(fence))
+ goto err_out;
+
+ if (fences)
+ fences[current_fence++] = fence;
+ }
+
+ if (fences) {
+ cf = dma_fence_array_create(number_tiles, fences,
+ vm->composite_fence_ctx,
+ vm->composite_fence_seqno++,
+ false);
+ if (!cf) {
+ --vm->composite_fence_seqno;
+ fence = ERR_PTR(-ENOMEM);
+ goto err_out;
+ }
+ fence = &cf->base;
+ }
+
+ for_each_tile(tile, vm->xe, id) {
+ if (!vops->pt_update_ops[id].num_ops)
+ continue;
+
+ xe_pt_update_ops_fini(tile, vops);
+ }
+
+ return fence;
+
+err_out:
+ for_each_tile(tile, vm->xe, id) {
+ if (!vops->pt_update_ops[id].num_ops)
+ continue;
+
+ xe_pt_update_ops_abort(tile, vops);
+ }
+ while (current_fence)
+ dma_fence_put(fences[--current_fence]);
+ kfree(fences);
+ kfree(cf);
+
+err_trace:
+ trace_xe_vm_ops_fail(vm);
return fence;
}
@@ -2888,12 +2713,10 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
fence = ops_execute(vm, vops);
if (IS_ERR(fence)) {
err = PTR_ERR(fence);
- /* FIXME: Killing VM rather than proper error handling */
- xe_vm_kill(vm, false);
goto unlock;
- } else {
- vm_bind_ioctl_ops_fini(vm, vops, fence);
}
+
+ vm_bind_ioctl_ops_fini(vm, vops, fence);
}
unlock:
@@ -2901,11 +2724,18 @@ unlock:
return err;
}
-#define SUPPORTED_FLAGS \
+#define SUPPORTED_FLAGS_STUB \
(DRM_XE_VM_BIND_FLAG_READONLY | \
DRM_XE_VM_BIND_FLAG_IMMEDIATE | \
DRM_XE_VM_BIND_FLAG_NULL | \
DRM_XE_VM_BIND_FLAG_DUMPABLE)
+
+#ifdef TEST_VM_OPS_ERROR
+#define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR)
+#else
+#define SUPPORTED_FLAGS SUPPORTED_FLAGS_STUB
+#endif
+
#define XE_64K_PAGE_MASK 0xffffull
#define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
@@ -2931,7 +2761,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe,
sizeof(struct drm_xe_vm_bind_op),
GFP_KERNEL | __GFP_ACCOUNT);
if (!*bind_ops)
- return -ENOMEM;
+ return args->num_binds > 1 ? -ENOBUFS : -ENOMEM;
err = __copy_from_user(*bind_ops, bind_user,
sizeof(struct drm_xe_vm_bind_op) *
@@ -3250,10 +3080,18 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
goto unwind_ops;
}
- err = vm_bind_ioctl_ops_parse(vm, q, ops[i], syncs, num_syncs,
- &vops, i == args->num_binds - 1);
+ err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops);
if (err)
goto unwind_ops;
+
+#ifdef TEST_VM_OPS_ERROR
+ if (flags & FORCE_OP_ERROR) {
+ vops.inject_error = true;
+ vm->xe->vm_inject_error_position =
+ (vm->xe->vm_inject_error_position + 1) %
+ FORCE_OP_ERROR_COUNT;
+ }
+#endif
}
/* Nothing to do */
@@ -3262,11 +3100,16 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
goto unwind_ops;
}
+ err = xe_vma_ops_alloc(&vops, args->num_binds > 1);
+ if (err)
+ goto unwind_ops;
+
err = vm_bind_ioctl_ops_execute(vm, &vops);
unwind_ops:
if (err && err != -ENODATA)
vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
+ xe_vma_ops_fini(&vops);
for (i = args->num_binds - 1; i >= 0; --i)
if (ops[i])
drm_gpuva_ops_free(&vm->gpuvm, ops[i]);
@@ -3337,10 +3180,10 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
{
struct xe_device *xe = xe_vma_vm(vma)->xe;
struct xe_tile *tile;
+ struct xe_gt_tlb_invalidation_fence fence[XE_MAX_TILES_PER_DEVICE];
u32 tile_needs_invalidate = 0;
- int seqno[XE_MAX_TILES_PER_DEVICE];
u8 id;
- int ret;
+ int ret = 0;
xe_assert(xe, !xe_vma_is_null(vma));
trace_xe_vma_invalidate(vma);
@@ -3365,29 +3208,33 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
for_each_tile(tile, xe, id) {
if (xe_pt_zap_ptes(tile, vma)) {
- tile_needs_invalidate |= BIT(id);
xe_device_wmb(xe);
+ xe_gt_tlb_invalidation_fence_init(tile->primary_gt,
+ &fence[id], true);
+
/*
* FIXME: We potentially need to invalidate multiple
* GTs within the tile
*/
- seqno[id] = xe_gt_tlb_invalidation_vma(tile->primary_gt, NULL, vma);
- if (seqno[id] < 0)
- return seqno[id];
- }
- }
+ ret = xe_gt_tlb_invalidation_vma(tile->primary_gt,
+ &fence[id], vma);
+ if (ret < 0) {
+ xe_gt_tlb_invalidation_fence_fini(&fence[id]);
+ goto wait;
+ }
- for_each_tile(tile, xe, id) {
- if (tile_needs_invalidate & BIT(id)) {
- ret = xe_gt_tlb_invalidation_wait(tile->primary_gt, seqno[id]);
- if (ret < 0)
- return ret;
+ tile_needs_invalidate |= BIT(id);
}
}
+wait:
+ for_each_tile(tile, xe, id)
+ if (tile_needs_invalidate & BIT(id))
+ xe_gt_tlb_invalidation_fence_wait(&fence[id]);
+
vma->tile_invalidated = vma->tile_mask;
- return 0;
+ return ret;
}
struct xe_vm_snapshot {
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index b481608b12f1..c864dba35e1d 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -259,6 +259,8 @@ static inline struct dma_resv *xe_vm_resv(struct xe_vm *vm)
return drm_gpuvm_resv(&vm->gpuvm);
}
+void xe_vm_kill(struct xe_vm *vm, bool unlocked);
+
/**
* xe_vm_assert_held(vm) - Assert that the vm's reservation object is held.
* @vm: The vm
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index ce1a63a5e3e7..7f9a303e51d8 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -21,18 +21,27 @@ struct xe_bo;
struct xe_sync_entry;
struct xe_user_fence;
struct xe_vm;
+struct xe_vm_pgtable_update_op;
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+#define TEST_VM_OPS_ERROR
+#define FORCE_OP_ERROR BIT(31)
+
+#define FORCE_OP_ERROR_LOCK 0
+#define FORCE_OP_ERROR_PREPARE 1
+#define FORCE_OP_ERROR_RUN 2
+#define FORCE_OP_ERROR_COUNT 3
+#endif
#define XE_VMA_READ_ONLY DRM_GPUVA_USERBITS
#define XE_VMA_DESTROYED (DRM_GPUVA_USERBITS << 1)
#define XE_VMA_ATOMIC_PTE_BIT (DRM_GPUVA_USERBITS << 2)
-#define XE_VMA_FIRST_REBIND (DRM_GPUVA_USERBITS << 3)
-#define XE_VMA_LAST_REBIND (DRM_GPUVA_USERBITS << 4)
-#define XE_VMA_PTE_4K (DRM_GPUVA_USERBITS << 5)
-#define XE_VMA_PTE_2M (DRM_GPUVA_USERBITS << 6)
-#define XE_VMA_PTE_1G (DRM_GPUVA_USERBITS << 7)
-#define XE_VMA_PTE_64K (DRM_GPUVA_USERBITS << 8)
-#define XE_VMA_PTE_COMPACT (DRM_GPUVA_USERBITS << 9)
-#define XE_VMA_DUMPABLE (DRM_GPUVA_USERBITS << 10)
+#define XE_VMA_PTE_4K (DRM_GPUVA_USERBITS << 3)
+#define XE_VMA_PTE_2M (DRM_GPUVA_USERBITS << 4)
+#define XE_VMA_PTE_1G (DRM_GPUVA_USERBITS << 5)
+#define XE_VMA_PTE_64K (DRM_GPUVA_USERBITS << 6)
+#define XE_VMA_PTE_COMPACT (DRM_GPUVA_USERBITS << 7)
+#define XE_VMA_DUMPABLE (DRM_GPUVA_USERBITS << 8)
/** struct xe_userptr - User pointer */
struct xe_userptr {
@@ -99,6 +108,9 @@ struct xe_vma {
*/
u8 tile_present;
+ /** @tile_staged: bind is staged for this VMA */
+ u8 tile_staged;
+
/**
* @pat_index: The pat index to use when encoding the PTEs for this vma.
*/
@@ -314,31 +326,18 @@ struct xe_vma_op_prefetch {
/** enum xe_vma_op_flags - flags for VMA operation */
enum xe_vma_op_flags {
- /** @XE_VMA_OP_FIRST: first VMA operation for a set of syncs */
- XE_VMA_OP_FIRST = BIT(0),
- /** @XE_VMA_OP_LAST: last VMA operation for a set of syncs */
- XE_VMA_OP_LAST = BIT(1),
/** @XE_VMA_OP_COMMITTED: VMA operation committed */
- XE_VMA_OP_COMMITTED = BIT(2),
+ XE_VMA_OP_COMMITTED = BIT(0),
/** @XE_VMA_OP_PREV_COMMITTED: Previous VMA operation committed */
- XE_VMA_OP_PREV_COMMITTED = BIT(3),
+ XE_VMA_OP_PREV_COMMITTED = BIT(1),
/** @XE_VMA_OP_NEXT_COMMITTED: Next VMA operation committed */
- XE_VMA_OP_NEXT_COMMITTED = BIT(4),
+ XE_VMA_OP_NEXT_COMMITTED = BIT(2),
};
/** struct xe_vma_op - VMA operation */
struct xe_vma_op {
/** @base: GPUVA base operation */
struct drm_gpuva_op base;
- /** @q: exec queue for this operation */
- struct xe_exec_queue *q;
- /**
- * @syncs: syncs for this operation, only used on first and last
- * operation
- */
- struct xe_sync_entry *syncs;
- /** @num_syncs: number of syncs */
- u32 num_syncs;
/** @link: async operation link */
struct list_head link;
/** @flags: operation flags */
@@ -362,12 +361,18 @@ struct xe_vma_ops {
struct list_head list;
/** @vm: VM */
struct xe_vm *vm;
- /** @q: exec queue these operations */
+ /** @q: exec queue for VMA operations */
struct xe_exec_queue *q;
/** @syncs: syncs these operation */
struct xe_sync_entry *syncs;
/** @num_syncs: number of syncs */
u32 num_syncs;
+ /** @pt_update_ops: page table update operations */
+ struct xe_vm_pgtable_update_ops pt_update_ops[XE_MAX_TILES_PER_DEVICE];
+#ifdef TEST_VM_OPS_ERROR
+ /** @inject_error: inject error to test error handling */
+ bool inject_error;
+#endif
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index c7bf0862b231..564e32e44e3b 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -486,6 +486,10 @@ static const struct xe_rtp_entry_sr engine_was[] = {
XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)),
XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, SLM_WMTP_RESTORE))
},
+ { XE_RTP_NAME("14021402888"),
+ XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)),
+ XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE))
+ },
/* Xe2_HPG */
@@ -539,6 +543,16 @@ static const struct xe_rtp_entry_sr engine_was[] = {
XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE))
},
+ /* Xe2_LPM */
+
+ { XE_RTP_NAME("16021639441"),
+ XE_RTP_RULES(MEDIA_VERSION(2000)),
+ XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0),
+ GHWSP_CSB_REPORT_DIS |
+ PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS,
+ XE_RTP_ACTION_FLAG(ENGINE_BASE)))
+ },
+
/* Xe2_HPM */
{ XE_RTP_NAME("16021639441"),
@@ -741,6 +755,7 @@ void xe_wa_process_oob(struct xe_gt *gt)
xe_rtp_process_ctx_enable_active_tracking(&ctx, gt->wa_active.oob,
ARRAY_SIZE(oob_was));
+ gt->wa_active.oob_initialized = true;
xe_rtp_process(&ctx, oob_was);
}
diff --git a/drivers/gpu/drm/xe/xe_wa.h b/drivers/gpu/drm/xe/xe_wa.h
index db9ddeaf69bf..52337405b5bc 100644
--- a/drivers/gpu/drm/xe/xe_wa.h
+++ b/drivers/gpu/drm/xe/xe_wa.h
@@ -6,6 +6,8 @@
#ifndef _XE_WA_
#define _XE_WA_
+#include "xe_assert.h"
+
struct drm_printer;
struct xe_gt;
struct xe_hw_engine;
@@ -25,6 +27,9 @@ void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p);
* @gt__: gt instance
* @id__: XE_OOB_<id__>, as generated by build system in generated/xe_wa_oob.h
*/
-#define XE_WA(gt__, id__) test_bit(XE_WA_OOB_ ## id__, (gt__)->wa_active.oob)
+#define XE_WA(gt__, id__) ({ \
+ xe_gt_assert(gt__, (gt__)->wa_active.oob_initialized); \
+ test_bit(XE_WA_OOB_ ## id__, (gt__)->wa_active.oob); \
+})
#endif
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index 26066beb4f6f..540d38603f32 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -29,3 +29,5 @@
13011645652 GRAPHICS_VERSION(2004)
22019338487 MEDIA_VERSION(2000)
GRAPHICS_VERSION(2001)
+22019338487_display PLATFORM(LUNARLAKE)
+16023588340 GRAPHICS_VERSION(2001)