summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/xe/xe_vm.c
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2024-08-30 05:41:05 +0200
committerDave Airlie <airlied@redhat.com>2024-08-30 05:41:06 +0200
commit8bdb468dd7a5d17f8556afdd4c8d046939ff965f (patch)
treeb3c579119359e8161bf0956431d5ab33d215eecd /drivers/gpu/drm/xe/xe_vm.c
parentMerge tag 'drm-misc-next-2024-08-29' of https://gitlab.freedesktop.org/drm/mi... (diff)
parentdrm/xe/bmg: Drop force_probe requirement (diff)
downloadlinux-8bdb468dd7a5d17f8556afdd4c8d046939ff965f.tar.xz
linux-8bdb468dd7a5d17f8556afdd4c8d046939ff965f.zip
Merge tag 'drm-xe-next-2024-08-28' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-next
UAPI Changes: - Fix OA format masks which were breaking build with gcc-5 Cross-subsystem Changes: Driver Changes: - Use dma_fence_chain_free in chain fence unused as a sync (Matthew Brost) - Refactor hw engine lookup and mmio access to be used in more places (Dominik, Matt Auld, Mika Kuoppala) - Enable priority mem read for Xe2 and later (Pallavi Mishra) - Fix PL1 disable flow in xe_hwmon_power_max_write (Karthik) - Fix refcount and speedup devcoredump (Matthew Brost) - Add performance tuning changes to Xe2 (Akshata, Shekhar) - Fix OA sysfs entry (Ashutosh) - Add first GuC firmware support for BMG (Julia) - Bump minimum GuC firmware for platforms under force_probe to match LNL and BMG (Julia) - Fix access check on user fence creation (Nirmoy) - Add/document workarounds for Xe2 (Julia, Daniele, John, Tejas) - Document workaround and use proper WA infra (Matt Roper) - Fix VF configuration on media GT (Michal Wajdeczko) - Fix VM dma-resv lock (Matthew Brost) - Allow suspend/resume exec queue backend op to be called multiple times (Matthew Brost) - Add GT stats to debugfs (Nirmoy) - Add hwconfig to debugfs (Matt Roper) - Compile out all debugfs code with ONFIG_DEUBG_FS=n (Lucas) - Remove dead kunit code (Jani Nikula) - Refactor drvdata storing to help display (Jani Nikula) - Cleanup unsused xe parameter in pte handling (Himal) - Rename s/enable_display/probe_display/ for clarity (Lucas) - Fix missing MCR annotation in couple of registers (Tejas) - Fix DGFX display suspend/resume (Maarten) - Prepare exec_queue_kill for PXP handling (Daniele) - Fix devm/drmm issues (Daniele, Matthew Brost) - Fix tile and ggtt fini sequences (Matthew Brost) - Fix crashes when probing without firmware in place (Daniele, Matthew Brost) - Use xe_managed for kernel BOs (Daniele, Matthew Brost) - Future-proof dss_per_group calculation by using hwconfig (Matt Roper) - Use reserved copy engine for user binds on faulting devices (Matthew Brost) - Allow mixing dma-fence jobs and long-running faulting jobs (Francois) - Cleanup redundant arg when creating use BO (Nirmoy) - Prevent UAF around preempt fence (Auld) - Fix display suspend/resume (Maarten) - Use vma_pages() helper (Thorsten) - Calculate pagefault queue size (Stuart, Matthew Auld) - Fix missing pagefault wq destroy (Stuart) - Fix lifetime handling of HW fence ctx (Matthew Brost) - Fix order destroy order for jobs (Matthew Brost) - Fix TLB invalidation for media GT (Matthew Brost) - Document GGTT (Rodrigo Vivi) - Refactor GGTT layering and fix runtime outer protection (Rodrigo Vivi) - Handle HPD polling on display pm runtime suspend/resume (Imre, Vinod) - Drop unrequired NULL checks (Apoorva, Himal) - Use separate rpm lockdep map for non-d3cold-capable devices (Thomas Hellström) - Support "nomodeset" kernel command-line option (Thomas Zimmermann) - Drop force_probe requirement for LNL and BMG (Lucas, Balasubramani) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Lucas De Marchi <lucas.demarchi@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/wd42jsh4i3q5zlrmi2cljejohdsrqc6hvtxf76lbxsp3ibrgmz@y54fa7wwxgsd
Diffstat (limited to 'drivers/gpu/drm/xe/xe_vm.c')
-rw-r--r--drivers/gpu/drm/xe/xe_vm.c90
1 files changed, 37 insertions, 53 deletions
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index dab2a3b2e17f..4cc13eddb6b3 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -275,6 +275,8 @@ out_up_write:
* xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM
* @vm: The VM.
* @q: The exec_queue
+ *
+ * Note that this function might be called multiple times on the same queue.
*/
void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
{
@@ -282,8 +284,10 @@ void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
return;
down_write(&vm->lock);
- list_del(&q->lr.link);
- --vm->preempt.num_exec_queues;
+ if (!list_empty(&q->lr.link)) {
+ list_del_init(&q->lr.link);
+ --vm->preempt.num_exec_queues;
+ }
if (q->lr.pfence) {
dma_fence_enable_sw_signaling(q->lr.pfence);
dma_fence_put(q->lr.pfence);
@@ -1191,7 +1195,7 @@ static const struct drm_gpuvm_ops gpuvm_ops = {
.vm_free = xe_vm_free,
};
-static u64 pde_encode_pat_index(struct xe_device *xe, u16 pat_index)
+static u64 pde_encode_pat_index(u16 pat_index)
{
u64 pte = 0;
@@ -1204,8 +1208,7 @@ static u64 pde_encode_pat_index(struct xe_device *xe, u16 pat_index)
return pte;
}
-static u64 pte_encode_pat_index(struct xe_device *xe, u16 pat_index,
- u32 pt_level)
+static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level)
{
u64 pte = 0;
@@ -1246,12 +1249,11 @@ static u64 pte_encode_ps(u32 pt_level)
static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset,
const u16 pat_index)
{
- struct xe_device *xe = xe_bo_device(bo);
u64 pde;
pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
- pde |= pde_encode_pat_index(xe, pat_index);
+ pde |= pde_encode_pat_index(pat_index);
return pde;
}
@@ -1259,12 +1261,11 @@ static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset,
static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
u16 pat_index, u32 pt_level)
{
- struct xe_device *xe = xe_bo_device(bo);
u64 pte;
pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
- pte |= pte_encode_pat_index(xe, pat_index, pt_level);
+ pte |= pte_encode_pat_index(pat_index, pt_level);
pte |= pte_encode_ps(pt_level);
if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
@@ -1276,14 +1277,12 @@ static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
u16 pat_index, u32 pt_level)
{
- struct xe_device *xe = xe_vma_vm(vma)->xe;
-
pte |= XE_PAGE_PRESENT;
if (likely(!xe_vma_read_only(vma)))
pte |= XE_PAGE_RW;
- pte |= pte_encode_pat_index(xe, pat_index, pt_level);
+ pte |= pte_encode_pat_index(pat_index, pt_level);
pte |= pte_encode_ps(pt_level);
if (unlikely(xe_vma_is_null(vma)))
@@ -1303,7 +1302,7 @@ static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr,
pte = addr;
pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
- pte |= pte_encode_pat_index(xe, pat_index, pt_level);
+ pte |= pte_encode_pat_index(pat_index, pt_level);
pte |= pte_encode_ps(pt_level);
if (devmem)
@@ -1483,19 +1482,13 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
/* Kernel migration VM shouldn't have a circular loop.. */
if (!(flags & XE_VM_FLAG_MIGRATION)) {
for_each_tile(tile, xe, id) {
- struct xe_gt *gt = tile->primary_gt;
- struct xe_vm *migrate_vm;
struct xe_exec_queue *q;
u32 create_flags = EXEC_QUEUE_FLAG_VM;
if (!vm->pt_root[id])
continue;
- migrate_vm = xe_migrate_get_vm(tile->migrate);
- q = xe_exec_queue_create_class(xe, gt, migrate_vm,
- XE_ENGINE_CLASS_COPY,
- create_flags);
- xe_vm_put(migrate_vm);
+ q = xe_exec_queue_create_bind(xe, tile, create_flags, 0);
if (IS_ERR(q)) {
err = PTR_ERR(q);
goto err_close;
@@ -1508,13 +1501,6 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
if (number_tiles > 1)
vm->composite_fence_ctx = dma_fence_context_alloc(1);
- mutex_lock(&xe->usm.lock);
- if (flags & XE_VM_FLAG_FAULT_MODE)
- xe->usm.num_vm_in_fault_mode++;
- else if (!(flags & XE_VM_FLAG_MIGRATION))
- xe->usm.num_vm_in_non_fault_mode++;
- mutex_unlock(&xe->usm.lock);
-
trace_xe_vm_create(vm);
return vm;
@@ -1628,11 +1614,6 @@ void xe_vm_close_and_put(struct xe_vm *vm)
up_write(&vm->lock);
mutex_lock(&xe->usm.lock);
- if (vm->flags & XE_VM_FLAG_FAULT_MODE)
- xe->usm.num_vm_in_fault_mode--;
- else if (!(vm->flags & XE_VM_FLAG_MIGRATION))
- xe->usm.num_vm_in_non_fault_mode--;
-
if (vm->usm.asid) {
void *lookup;
@@ -1770,14 +1751,6 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
return -EINVAL;
- if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
- xe_device_in_non_fault_mode(xe)))
- return -EINVAL;
-
- if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) &&
- xe_device_in_fault_mode(xe)))
- return -EINVAL;
-
if (XE_IOCTL_DBG(xe, args->extensions))
return -EINVAL;
@@ -3185,9 +3158,10 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
{
struct xe_device *xe = xe_vma_vm(vma)->xe;
struct xe_tile *tile;
- struct xe_gt_tlb_invalidation_fence fence[XE_MAX_TILES_PER_DEVICE];
- u32 tile_needs_invalidate = 0;
+ struct xe_gt_tlb_invalidation_fence
+ fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE];
u8 id;
+ u32 fence_id = 0;
int ret = 0;
xe_assert(xe, !xe_vma_is_null(vma));
@@ -3215,27 +3189,37 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
if (xe_pt_zap_ptes(tile, vma)) {
xe_device_wmb(xe);
xe_gt_tlb_invalidation_fence_init(tile->primary_gt,
- &fence[id], true);
+ &fence[fence_id],
+ true);
- /*
- * FIXME: We potentially need to invalidate multiple
- * GTs within the tile
- */
ret = xe_gt_tlb_invalidation_vma(tile->primary_gt,
- &fence[id], vma);
+ &fence[fence_id], vma);
if (ret < 0) {
- xe_gt_tlb_invalidation_fence_fini(&fence[id]);
+ xe_gt_tlb_invalidation_fence_fini(&fence[fence_id]);
goto wait;
}
+ ++fence_id;
+
+ if (!tile->media_gt)
+ continue;
- tile_needs_invalidate |= BIT(id);
+ xe_gt_tlb_invalidation_fence_init(tile->media_gt,
+ &fence[fence_id],
+ true);
+
+ ret = xe_gt_tlb_invalidation_vma(tile->media_gt,
+ &fence[fence_id], vma);
+ if (ret < 0) {
+ xe_gt_tlb_invalidation_fence_fini(&fence[fence_id]);
+ goto wait;
+ }
+ ++fence_id;
}
}
wait:
- for_each_tile(tile, xe, id)
- if (tile_needs_invalidate & BIT(id))
- xe_gt_tlb_invalidation_fence_wait(&fence[id]);
+ for (id = 0; id < fence_id; ++id)
+ xe_gt_tlb_invalidation_fence_wait(&fence[id]);
vma->tile_invalidated = vma->tile_mask;