diff options
author | Jani Nikula <jani.nikula@intel.com> | 2023-01-02 10:31:03 +0100 |
---|---|---|
committer | Jani Nikula <jani.nikula@intel.com> | 2023-01-02 10:31:03 +0100 |
commit | 0d8eae7b124e2ddaee00f186fe922450faad0ed7 (patch) | |
tree | 5acf7a7a4300665c26b128d9afd1843b402e8b8e /drivers/gpu/drm/i915 | |
parent | drm/i915/mtl: Add support of Tile4 to MTL (diff) | |
parent | Linux 6.2-rc1 (diff) | |
download | linux-0d8eae7b124e2ddaee00f186fe922450faad0ed7.tar.xz linux-0d8eae7b124e2ddaee00f186fe922450faad0ed7.zip |
Merge drm/drm-next into drm-intel-next
Sync up with v6.2-rc1.
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Diffstat (limited to 'drivers/gpu/drm/i915')
110 files changed, 1234 insertions, 728 deletions
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 0ed5985c03b5..7046e435a155 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -130,9 +130,11 @@ gt-y += \ gt/intel_sseu.o \ gt/intel_sseu_debugfs.o \ gt/intel_timeline.o \ + gt/intel_wopcm.o \ gt/intel_workarounds.o \ gt/shmem_utils.o \ gt/sysfs_engines.o + # x86 intel-gtt module support gt-$(CONFIG_X86) += gt/intel_ggtt_gmch.o # autogenerated null render state @@ -186,8 +188,7 @@ i915-y += \ i915_trace_points.o \ i915_ttm_buddy_manager.o \ i915_vma.o \ - i915_vma_resource.o \ - intel_wopcm.o + i915_vma_resource.o # general-purpose microcontroller (GuC) support i915-y += gt/uc/intel_uc.o \ diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c index ab385d18ddcc..5575d7abdc09 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev.c @@ -124,6 +124,8 @@ static const struct fb_ops intelfb_ops = { .owner = THIS_MODULE, DRM_FB_HELPER_DEFAULT_OPS, .fb_set_par = intel_fbdev_set_par, + .fb_read = drm_fb_helper_cfb_read, + .fb_write = drm_fb_helper_cfb_write, .fb_fillrect = drm_fb_helper_cfb_fillrect, .fb_copyarea = drm_fb_helper_cfb_copyarea, .fb_imageblit = drm_fb_helper_cfb_imageblit, @@ -254,7 +256,7 @@ static int intelfb_create(struct drm_fb_helper *helper, goto out_unlock; } - info = drm_fb_helper_alloc_fbi(helper); + info = drm_fb_helper_alloc_info(helper); if (IS_ERR(info)) { drm_err(&dev_priv->drm, "Failed to allocate fb_info (%pe)\n", info); ret = PTR_ERR(info); @@ -584,7 +586,7 @@ void intel_fbdev_unregister(struct drm_i915_private *dev_priv) if (!current_is_async()) intel_fbdev_sync(ifbdev); - drm_fb_helper_unregister_fbi(&ifbdev->helper); + drm_fb_helper_unregister_info(&ifbdev->helper); } void intel_fbdev_fini(struct drm_i915_private *dev_priv) @@ -627,7 +629,7 @@ void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous if (!ifbdev || !ifbdev->vma) goto set_suspend; - info = ifbdev->helper.fbdev; + info = ifbdev->helper.info; if (synchronous) { /* Flush any pending work to turn the console on, and then diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 01402f3c58f6..7f2831efc798 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -546,7 +546,7 @@ set_proto_ctx_engines_bond(struct i915_user_extension __user *base, void *data) } if (intel_engine_uses_guc(master)) { - DRM_DEBUG("bonding extension not supported with GuC submission"); + drm_dbg(&i915->drm, "bonding extension not supported with GuC submission"); return -ENODEV; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index ec6f7ae47783..fd556a076d05 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -97,6 +97,8 @@ static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct * struct drm_i915_private *i915 = to_i915(obj->base.dev); int ret; + dma_resv_assert_held(dma_buf->resv); + if (obj->base.size < vma->vm_end - vma->vm_start) return -EINVAL; @@ -238,7 +240,6 @@ static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); struct sg_table *sgt; - unsigned int sg_page_sizes; assert_object_held(obj); @@ -262,8 +263,7 @@ static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) (!HAS_LLC(i915) && !IS_DG1(i915))) wbinvd_on_all_cpus(); - sg_page_sizes = i915_sg_dma_sizes(sgt->sgl); - __i915_gem_object_set_pages(obj, sgt, sg_page_sizes); + __i915_gem_object_set_pages(obj, sgt); return 0; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index ffa30d2dd47f..da09767fda07 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -54,13 +54,13 @@ enum { #define DBG_FORCE_RELOC 0 /* choose one of the above! */ }; -/* __EXEC_OBJECT_NO_RESERVE is BIT(31), defined in i915_vma.h */ -#define __EXEC_OBJECT_HAS_PIN BIT(30) -#define __EXEC_OBJECT_HAS_FENCE BIT(29) -#define __EXEC_OBJECT_USERPTR_INIT BIT(28) -#define __EXEC_OBJECT_NEEDS_MAP BIT(27) -#define __EXEC_OBJECT_NEEDS_BIAS BIT(26) -#define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 26) /* all of the above + */ +/* __EXEC_OBJECT_ flags > BIT(29) defined in i915_vma.h */ +#define __EXEC_OBJECT_HAS_PIN BIT(29) +#define __EXEC_OBJECT_HAS_FENCE BIT(28) +#define __EXEC_OBJECT_USERPTR_INIT BIT(27) +#define __EXEC_OBJECT_NEEDS_MAP BIT(26) +#define __EXEC_OBJECT_NEEDS_BIAS BIT(25) +#define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 25) /* all of the above + */ #define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE) #define __EXEC_HAS_RELOC BIT(31) @@ -2102,7 +2102,8 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) eb->composite_fence ? eb->composite_fence : &eb->requests[j]->fence, - flags | __EXEC_OBJECT_NO_RESERVE); + flags | __EXEC_OBJECT_NO_RESERVE | + __EXEC_OBJECT_NO_REQUEST_AWAIT); } } @@ -2149,7 +2150,8 @@ err_skip: return err; } -static int i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) +static int i915_gem_check_execbuffer(struct drm_i915_private *i915, + struct drm_i915_gem_execbuffer2 *exec) { if (exec->flags & __I915_EXEC_ILLEGAL_FLAGS) return -EINVAL; @@ -2162,7 +2164,7 @@ static int i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) } if (exec->DR4 == 0xffffffff) { - DRM_DEBUG("UXA submitting garbage DR4, fixing up\n"); + drm_dbg(&i915->drm, "UXA submitting garbage DR4, fixing up\n"); exec->DR4 = 0; } if (exec->DR1 || exec->DR4) @@ -2425,7 +2427,7 @@ gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv, /* Check whether the file_priv has already selected one ring. */ if ((int)file_priv->bsd_engine < 0) file_priv->bsd_engine = - prandom_u32_max(num_vcs_engines(dev_priv)); + get_random_u32_below(num_vcs_engines(dev_priv)); return file_priv->bsd_engine; } @@ -2800,7 +2802,8 @@ add_timeline_fence_array(struct i915_execbuffer *eb, syncobj = drm_syncobj_find(eb->file, user_fence.handle); if (!syncobj) { - DRM_DEBUG("Invalid syncobj handle provided\n"); + drm_dbg(&eb->i915->drm, + "Invalid syncobj handle provided\n"); return -ENOENT; } @@ -2808,7 +2811,8 @@ add_timeline_fence_array(struct i915_execbuffer *eb, if (!fence && user_fence.flags && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) { - DRM_DEBUG("Syncobj handle has no fence\n"); + drm_dbg(&eb->i915->drm, + "Syncobj handle has no fence\n"); drm_syncobj_put(syncobj); return -EINVAL; } @@ -2817,7 +2821,9 @@ add_timeline_fence_array(struct i915_execbuffer *eb, err = dma_fence_chain_find_seqno(&fence, point); if (err && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) { - DRM_DEBUG("Syncobj handle missing requested point %llu\n", point); + drm_dbg(&eb->i915->drm, + "Syncobj handle missing requested point %llu\n", + point); dma_fence_put(fence); drm_syncobj_put(syncobj); return err; @@ -2843,7 +2849,8 @@ add_timeline_fence_array(struct i915_execbuffer *eb, * 0) would break the timeline. */ if (user_fence.flags & I915_EXEC_FENCE_WAIT) { - DRM_DEBUG("Trying to wait & signal the same timeline point.\n"); + drm_dbg(&eb->i915->drm, + "Trying to wait & signal the same timeline point.\n"); dma_fence_put(fence); drm_syncobj_put(syncobj); return -EINVAL; @@ -2914,14 +2921,16 @@ static int add_fence_array(struct i915_execbuffer *eb) syncobj = drm_syncobj_find(eb->file, user_fence.handle); if (!syncobj) { - DRM_DEBUG("Invalid syncobj handle provided\n"); + drm_dbg(&eb->i915->drm, + "Invalid syncobj handle provided\n"); return -ENOENT; } if (user_fence.flags & I915_EXEC_FENCE_WAIT) { fence = drm_syncobj_fence_get(syncobj); if (!fence) { - DRM_DEBUG("Syncobj handle has no fence\n"); + drm_dbg(&eb->i915->drm, + "Syncobj handle has no fence\n"); drm_syncobj_put(syncobj); return -EINVAL; } @@ -3516,7 +3525,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, return -EINVAL; } - err = i915_gem_check_execbuffer(args); + err = i915_gem_check_execbuffer(i915, args); if (err) return err; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index 629acb403a2c..f66bcefc09ec 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -35,7 +35,6 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) struct drm_i915_private *i915 = to_i915(obj->base.dev); struct sg_table *st; struct scatterlist *sg; - unsigned int sg_page_sizes; unsigned int npages; int max_order = MAX_ORDER; unsigned int max_segment; @@ -64,7 +63,6 @@ create_st: sg = st->sgl; st->nents = 0; - sg_page_sizes = 0; do { int order = min(fls(npages) - 1, max_order); @@ -83,7 +81,6 @@ create_st: } while (1); sg_set_page(sg, page, PAGE_SIZE << order, 0); - sg_page_sizes |= PAGE_SIZE << order; st->nents++; npages -= 1 << order; @@ -105,7 +102,7 @@ create_st: goto err; } - __i915_gem_object_set_pages(obj, st, sg_page_sizes); + __i915_gem_object_set_pages(obj, st); return 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index e63329bc8065..c29efdef8313 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -330,7 +330,7 @@ retry: if (ret) goto err_rpm; - ret = intel_gt_reset_trylock(ggtt->vm.gt, &srcu); + ret = intel_gt_reset_lock_interruptible(ggtt->vm.gt, &srcu); if (ret) goto err_pages; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 733696057761..1a0886b8aaa1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -785,6 +785,9 @@ bool i915_gem_object_needs_ccs_pages(struct drm_i915_gem_object *obj) if (!HAS_FLAT_CCS(to_i915(obj->base.dev))) return false; + if (obj->flags & I915_BO_ALLOC_CCS_AUX) + return true; + for (i = 0; i < obj->mm.n_placements; i++) { /* Compression is not allowed for the objects with smem placement */ if (obj->mm.placements[i]->type == INTEL_MEMORY_SYSTEM) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 6b9ecff42bb5..3db53769864c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -403,8 +403,7 @@ i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, unsigned long n); void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, - struct sg_table *pages, - unsigned int sg_page_sizes); + struct sg_table *pages); int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj); int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index d0d6772e6f36..ab4c2f90a564 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -327,16 +327,18 @@ struct drm_i915_gem_object { * dealing with userspace objects the CPU fault handler is free to ignore this. */ #define I915_BO_ALLOC_GPU_ONLY BIT(6) +#define I915_BO_ALLOC_CCS_AUX BIT(7) #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \ I915_BO_ALLOC_VOLATILE | \ I915_BO_ALLOC_CPU_CLEAR | \ I915_BO_ALLOC_USER | \ I915_BO_ALLOC_PM_VOLATILE | \ I915_BO_ALLOC_PM_EARLY | \ - I915_BO_ALLOC_GPU_ONLY) -#define I915_BO_READONLY BIT(7) -#define I915_TILING_QUIRK_BIT 8 /* unknown swizzling; do not release! */ -#define I915_BO_PROTECTED BIT(9) + I915_BO_ALLOC_GPU_ONLY | \ + I915_BO_ALLOC_CCS_AUX) +#define I915_BO_READONLY BIT(8) +#define I915_TILING_QUIRK_BIT 9 /* unknown swizzling; do not release! */ +#define I915_BO_PROTECTED BIT(10) /** * @mem_flags - Mutable placement-related flags * diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 16f845663ff2..05a27723ebb8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -16,8 +16,7 @@ #include "i915_gem_mman.h" void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, - struct sg_table *pages, - unsigned int sg_page_sizes) + struct sg_table *pages) { struct drm_i915_private *i915 = to_i915(obj->base.dev); unsigned long supported = RUNTIME_INFO(i915)->page_sizes; @@ -45,8 +44,8 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, obj->mm.pages = pages; - GEM_BUG_ON(!sg_page_sizes); - obj->mm.page_sizes.phys = sg_page_sizes; + obj->mm.page_sizes.phys = i915_sg_dma_sizes(pages->sgl); + GEM_BUG_ON(!obj->mm.page_sizes.phys); /* * Calculate the supported page-sizes which fit into the given diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c index 0d0e46dae559..68453572275b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c @@ -79,7 +79,7 @@ static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) /* We're no longer struct page backed */ obj->mem_flags &= ~I915_BO_FLAG_STRUCT_PAGE; - __i915_gem_object_set_pages(obj, st, sg->length); + __i915_gem_object_set_pages(obj, st); return 0; @@ -209,11 +209,8 @@ static int i915_gem_object_shmem_to_phys(struct drm_i915_gem_object *obj) return 0; err_xfer: - if (!IS_ERR_OR_NULL(pages)) { - unsigned int sg_page_sizes = i915_sg_dma_sizes(pages->sgl); - - __i915_gem_object_set_pages(obj, pages, sg_page_sizes); - } + if (!IS_ERR_OR_NULL(pages)) + __i915_gem_object_set_pages(obj, pages); return err; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 2f7804492cd5..9c759df700ca 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -247,7 +247,7 @@ rebuild_st: if (i915_gem_object_can_bypass_llc(obj)) obj->cache_dirty = true; - __i915_gem_object_set_pages(obj, st, i915_sg_dma_sizes(st->sgl)); + __i915_gem_object_set_pages(obj, st); return 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index 0c70711818ed..bc9521078807 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -628,7 +628,7 @@ static int i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj) sg_dma_len(pages->sgl), POISON_INUSE); - __i915_gem_object_set_pages(obj, pages, obj->stolen->size); + __i915_gem_object_set_pages(obj, pages); return 0; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 25129af70f70..1e50fb0d6bfc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -603,6 +603,10 @@ static int i915_ttm_truncate(struct drm_i915_gem_object *obj) WARN_ON_ONCE(obj->mm.madv == I915_MADV_WILLNEED); + err = ttm_bo_wait(bo, true, false); + if (err) + return err; + err = i915_ttm_move_notify(bo); if (err) return err; @@ -815,8 +819,7 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, GEM_BUG_ON(obj->mm.rsgt); obj->mm.rsgt = rsgt; - __i915_gem_object_set_pages(obj, &rsgt->table, - i915_sg_dma_sizes(rsgt->table.sgl)); + __i915_gem_object_set_pages(obj, &rsgt->table); } GEM_BUG_ON(bo->ttm && ((obj->base.size >> PAGE_SHIFT) < bo->ttm->num_pages)); @@ -1031,9 +1034,6 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf) vm_fault_t ret; int idx; - if (i915_ttm_is_ghost_object(bo)) - return VM_FAULT_SIGBUS; - /* Sanity check that we allow writing into this object */ if (unlikely(i915_gem_object_is_readonly(obj) && area->vm_flags & VM_WRITE)) @@ -1048,9 +1048,6 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf) return VM_FAULT_SIGBUS; } - if (i915_ttm_cpu_maps_iomem(bo->resource)) - wakeref = intel_runtime_pm_get(&to_i915(obj->base.dev)->runtime_pm); - if (!i915_ttm_resource_mappable(bo->resource)) { int err = -ENODEV; int i; @@ -1078,6 +1075,9 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf) } } + if (i915_ttm_cpu_maps_iomem(bo->resource)) + wakeref = intel_runtime_pm_get(&to_i915(obj->base.dev)->runtime_pm); + if (drm_dev_enter(dev, &idx)) { ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot, TTM_BO_VM_NUM_PREFAULT); @@ -1098,6 +1098,8 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf) spin_lock(&to_i915(obj->base.dev)->runtime_pm.lmem_userfault_lock); list_add(&obj->userfault_link, &to_i915(obj->base.dev)->runtime_pm.lmem_userfault_list); spin_unlock(&to_i915(obj->base.dev)->runtime_pm.lmem_userfault_lock); + + GEM_WARN_ON(!i915_ttm_cpu_maps_iomem(bo->resource)); } if (wakeref & CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND) @@ -1180,6 +1182,8 @@ static void i915_ttm_unmap_virtual(struct drm_i915_gem_object *obj) } } + GEM_WARN_ON(obj->userfault_count); + ttm_bo_unmap_virtual(i915_gem_to_ttm(obj)); if (wakeref) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c index 07e49f22f2de..7e67742bc65e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c @@ -50,6 +50,7 @@ static int i915_ttm_backup(struct i915_gem_apply_to_region *apply, container_of(bo->bdev, typeof(*i915), bdev); struct drm_i915_gem_object *backup; struct ttm_operation_ctx ctx = {}; + unsigned int flags; int err = 0; if (bo->resource->mem_type == I915_PL_SYSTEM || obj->ttm.backup) @@ -65,7 +66,22 @@ static int i915_ttm_backup(struct i915_gem_apply_to_region *apply, if (obj->flags & I915_BO_ALLOC_PM_VOLATILE) return 0; - backup = i915_gem_object_create_shmem(i915, obj->base.size); + /* + * It seems that we might have some framebuffers still pinned at this + * stage, but for such objects we might also need to deal with the CCS + * aux state. Make sure we force the save/restore of the CCS state, + * otherwise we might observe display corruption, when returning from + * suspend. + */ + flags = 0; + if (i915_gem_object_needs_ccs_pages(obj)) { + WARN_ON_ONCE(!i915_gem_object_is_framebuffer(obj)); + WARN_ON_ONCE(!pm_apply->allow_gpu); + + flags = I915_BO_ALLOC_CCS_AUX; + } + backup = i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_SMEM], + obj->base.size, 0, flags); if (IS_ERR(backup)) return PTR_ERR(backup); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 1b1a22716722..9348b1804d53 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -131,7 +131,6 @@ static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) const unsigned long num_pages = obj->base.size >> PAGE_SHIFT; unsigned int max_segment = i915_sg_segment_size(obj->base.dev->dev); struct sg_table *st; - unsigned int sg_page_sizes; struct page **pvec; int ret; @@ -170,8 +169,7 @@ alloc_table: if (i915_gem_object_can_bypass_llc(obj)) obj->cache_dirty = true; - sg_page_sizes = i915_sg_dma_sizes(st->sgl); - __i915_gem_object_set_pages(obj, st, sg_page_sizes); + __i915_gem_object_set_pages(obj, st); return 0; @@ -427,9 +425,10 @@ probe_range(struct mm_struct *mm, unsigned long addr, unsigned long len) { VMA_ITERATOR(vmi, mm, addr); struct vm_area_struct *vma; + unsigned long end = addr + len; mmap_read_lock(mm); - for_each_vma_range(vmi, vma, addr + len) { + for_each_vma_range(vmi, vma, end) { /* Check for holes, note that we also update the addr below */ if (vma->vm_start > addr) break; @@ -441,7 +440,7 @@ probe_range(struct mm_struct *mm, unsigned long addr, unsigned long len) } mmap_read_unlock(mm); - if (vma) + if (vma || addr < end) return -EFAULT; return 0; } diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c index f963b8e1e37b..cbd9b624a788 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c @@ -68,7 +68,7 @@ static int huge_get_pages(struct drm_i915_gem_object *obj) if (i915_gem_gtt_prepare_pages(obj, pages)) goto err; - __i915_gem_object_set_pages(obj, pages, PAGE_SIZE); + __i915_gem_object_set_pages(obj, pages); return 0; diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index 0cb99e75b0bc..beaf27e09e8a 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -136,7 +136,7 @@ static int get_huge_pages(struct drm_i915_gem_object *obj) goto err; GEM_BUG_ON(sg_page_sizes != obj->mm.page_mask); - __i915_gem_object_set_pages(obj, st, sg_page_sizes); + __i915_gem_object_set_pages(obj, st); return 0; @@ -210,7 +210,6 @@ static int fake_get_huge_pages(struct drm_i915_gem_object *obj) const u64 max_len = rounddown_pow_of_two(UINT_MAX); struct sg_table *st; struct scatterlist *sg; - unsigned int sg_page_sizes; u64 rem; st = kmalloc(sizeof(*st), GFP); @@ -226,7 +225,6 @@ static int fake_get_huge_pages(struct drm_i915_gem_object *obj) rem = obj->base.size; sg = st->sgl; st->nents = 0; - sg_page_sizes = 0; do { unsigned int page_size = get_largest_page_size(i915, rem); unsigned int len = min(page_size * div_u64(rem, page_size), @@ -239,8 +237,6 @@ static int fake_get_huge_pages(struct drm_i915_gem_object *obj) sg_dma_len(sg) = len; sg_dma_address(sg) = page_size; - sg_page_sizes |= len; - st->nents++; rem -= len; @@ -254,7 +250,7 @@ static int fake_get_huge_pages(struct drm_i915_gem_object *obj) i915_sg_trim(st); - __i915_gem_object_set_pages(obj, st, sg_page_sizes); + __i915_gem_object_set_pages(obj, st); return 0; } @@ -286,7 +282,7 @@ static int fake_get_huge_pages_single(struct drm_i915_gem_object *obj) sg_dma_len(sg) = obj->base.size; sg_dma_address(sg) = page_size; - __i915_gem_object_set_pages(obj, st, sg->length); + __i915_gem_object_set_pages(obj, st); return 0; #undef GFP diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c index 9a6a6b5b722b..692a16914ca0 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -13,6 +13,7 @@ #include "gt/intel_gt_regs.h" #include "gem/i915_gem_lmem.h" +#include "gem/selftests/igt_gem_utils.h" #include "selftests/igt_flush_test.h" #include "selftests/mock_drm.h" #include "selftests/i915_random.h" @@ -457,21 +458,6 @@ static int verify_buffer(const struct tiled_blits *t, return ret; } -static int move_to_active(struct i915_vma *vma, - struct i915_request *rq, - unsigned int flags) -{ - int err; - - i915_vma_lock(vma); - err = i915_request_await_object(rq, vma->obj, false); - if (err == 0) - err = i915_vma_move_to_active(vma, rq, flags); - i915_vma_unlock(vma); - - return err; -} - static int pin_buffer(struct i915_vma *vma, u64 addr) { int err; @@ -525,11 +511,11 @@ tiled_blit(struct tiled_blits *t, goto err_bb; } - err = move_to_active(t->batch, rq, 0); + err = igt_vma_move_to_active_unlocked(t->batch, rq, 0); if (!err) - err = move_to_active(src->vma, rq, 0); + err = igt_vma_move_to_active_unlocked(src->vma, rq, 0); if (!err) - err = move_to_active(dst->vma, rq, 0); + err = igt_vma_move_to_active_unlocked(dst->vma, rq, 0); if (!err) err = rq->engine->emit_bb_start(rq, t->batch->node.start, diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c index a666d7e610f5..c228fe4aba50 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c @@ -239,9 +239,7 @@ static int gpu_set(struct context *ctx, unsigned long offset, u32 v) } intel_ring_advance(rq, cs); - err = i915_request_await_object(rq, vma->obj, true); - if (err == 0) - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); out_rq: i915_request_add(rq); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index d8864444432b..a0ff51d71d07 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -984,15 +984,11 @@ retry: goto err_batch; } - err = i915_request_await_object(rq, batch->obj, false); - if (err == 0) - err = i915_vma_move_to_active(batch, rq, 0); + err = i915_vma_move_to_active(batch, rq, 0); if (err) goto skip_request; - err = i915_request_await_object(rq, vma->obj, true); - if (err == 0) - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); if (err) goto skip_request; @@ -1553,9 +1549,7 @@ static int write_to_scratch(struct i915_gem_context *ctx, } i915_vma_lock(vma); - err = i915_request_await_object(rq, vma->obj, false); - if (err == 0) - err = i915_vma_move_to_active(vma, rq, 0); + err = i915_vma_move_to_active(vma, rq, 0); i915_vma_unlock(vma); if (err) goto skip_request; @@ -1689,9 +1683,7 @@ static int read_from_scratch(struct i915_gem_context *ctx, } i915_vma_lock(vma); - err = i915_request_await_object(rq, vma->obj, true); - if (err == 0) - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); i915_vma_unlock(vma); if (err) goto skip_request; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 3c486efe0bac..3f658d5717d8 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -566,10 +566,8 @@ retry: goto err_unpin; } - err = i915_request_await_object(rq, vma->obj, true); - if (err == 0) - err = i915_vma_move_to_active(vma, rq, - EXEC_OBJECT_WRITE); + err = i915_vma_move_to_active(vma, rq, + EXEC_OBJECT_WRITE); i915_request_add(rq); err_unpin: @@ -1609,9 +1607,7 @@ retry: goto out_unpin; } - err = i915_request_await_object(rq, vma->obj, false); - if (err == 0) - err = i915_vma_move_to_active(vma, rq, 0); + err = i915_vma_move_to_active(vma, rq, 0); err = engine->emit_bb_start(rq, vma->node.start, 0, 0); i915_request_get(rq); diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c index 3c55e77b0f1b..374b10ac430e 100644 --- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c +++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c @@ -131,17 +131,13 @@ int igt_gpu_fill_dw(struct intel_context *ce, } i915_vma_lock(batch); - err = i915_request_await_object(rq, batch->obj, false); - if (err == 0) - err = i915_vma_move_to_active(batch, rq, 0); + err = i915_vma_move_to_active(batch, rq, 0); i915_vma_unlock(batch); if (err) goto skip_request; i915_vma_lock(vma); - err = i915_request_await_object(rq, vma->obj, true); - if (err == 0) - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); i915_vma_unlock(vma); if (err) goto skip_request; diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h index 4221cf84d175..1379fbc14431 100644 --- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h +++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h @@ -9,6 +9,8 @@ #include <linux/types.h> +#include "i915_vma.h" + struct i915_request; struct i915_gem_context; struct i915_vma; @@ -29,4 +31,16 @@ int igt_gpu_fill_dw(struct intel_context *ce, struct i915_vma *vma, u64 offset, unsigned long count, u32 val); +static inline int __must_check +igt_vma_move_to_active_unlocked(struct i915_vma *vma, struct i915_request *rq, + unsigned int flags) +{ + int err; + + i915_vma_lock(vma); + err = _i915_vma_move_to_active(vma, rq, &rq->fence, flags); + i915_vma_unlock(vma); + return err; +} + #endif /* __IGT_GEM_UTILS_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 6ae8b07cfaa1..c33e0d72d670 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -246,6 +246,13 @@ static const struct engine_info intel_engines[] = { { .graphics_ver = 12, .base = GEN12_COMPUTE3_RING_BASE } } }, + [GSC0] = { + .class = OTHER_CLASS, + .instance = OTHER_GSC_INSTANCE, + .mmio_bases = { + { .graphics_ver = 12, .base = MTL_GSC_RING_BASE } + } + }, }; /** @@ -326,6 +333,7 @@ u32 intel_engine_context_size(struct intel_gt *gt, u8 class) case VIDEO_DECODE_CLASS: case VIDEO_ENHANCEMENT_CLASS: case COPY_ENGINE_CLASS: + case OTHER_CLASS: if (GRAPHICS_VER(gt->i915) < 8) return 0; return GEN8_LR_CONTEXT_OTHER_SIZE; @@ -417,6 +425,7 @@ static u32 get_reset_domain(u8 ver, enum intel_engine_id id) [CCS1] = GEN11_GRDOM_RENDER, [CCS2] = GEN11_GRDOM_RENDER, [CCS3] = GEN11_GRDOM_RENDER, + [GSC0] = GEN12_GRDOM_GSC, }; GEM_BUG_ON(id >= ARRAY_SIZE(engine_reset_domains) || !engine_reset_domains[id]); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 6b5d4ea22b67..4fd54fb8810f 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -136,6 +136,7 @@ enum intel_engine_id { CCS2, CCS3, #define _CCS(n) (CCS0 + (n)) + GSC0, I915_NUM_ENGINES #define INVALID_ENGINE ((enum intel_engine_id)-1) }; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c index 46a174f8aa00..cd4f1b126f75 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c @@ -140,6 +140,7 @@ const char *intel_engine_class_repr(u8 class) [COPY_ENGINE_CLASS] = "bcs", [VIDEO_DECODE_CLASS] = "vcs", [VIDEO_ENHANCEMENT_CLASS] = "vecs", + [OTHER_CLASS] = "other", [COMPUTE_CLASS] = "ccs", }; @@ -190,6 +191,15 @@ static void add_legacy_ring(struct legacy_ring *ring, ring->instance++; } +static void engine_rename(struct intel_engine_cs *engine, const char *name, u16 instance) +{ + char old[sizeof(engine->name)]; + + memcpy(old, engine->name, sizeof(engine->name)); + scnprintf(engine->name, sizeof(engine->name), "%s%u", name, instance); + drm_dbg(&engine->i915->drm, "renamed %s to %s\n", old, engine->name); +} + void intel_engines_driver_register(struct drm_i915_private *i915) { struct legacy_ring ring = {}; @@ -205,11 +215,19 @@ void intel_engines_driver_register(struct drm_i915_private *i915) struct intel_engine_cs *engine = container_of((struct rb_node *)it, typeof(*engine), uabi_node); - char old[sizeof(engine->name)]; if (intel_gt_has_unrecoverable_error(engine->gt)) continue; /* ignore incomplete engines */ + /* + * We don't want to expose the GSC engine to the users, but we + * still rename it so it is easier to identify in the debug logs + */ + if (engine->id == GSC0) { + engine_rename(engine, "gsc", 0); + continue; + } + GEM_BUG_ON(engine->class >= ARRAY_SIZE(uabi_classes)); engine->uabi_class = uabi_classes[engine->class]; @@ -219,11 +237,9 @@ void intel_engines_driver_register(struct drm_i915_private *i915) i915->engine_uabi_class_count[engine->uabi_class]++; /* Replace the internal name with the final user facing name */ - memcpy(old, engine->name, sizeof(engine->name)); - scnprintf(engine->name, sizeof(engine->name), "%s%u", - intel_engine_class_repr(engine->class), - engine->uabi_instance); - DRM_DEBUG_DRIVER("renamed %s to %s\n", old, engine->name); + engine_rename(engine, + intel_engine_class_repr(engine->class), + engine->uabi_instance); rb_link_node(&engine->uabi_node, prev, p); rb_insert_color(&engine->uabi_node, &i915->uabi_engines); diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index b23ef1faa501..2daffa7c7dfd 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -3690,7 +3690,7 @@ static void virtual_engine_initial_hint(struct virtual_engine *ve) * NB This does not force us to execute on this engine, it will just * typically be the first we inspect for submission. */ - swp = prandom_u32_max(ve->num_siblings); + swp = get_random_u32_below(ve->num_siblings); if (swp) swap(ve->siblings[swp], ve->siblings[0]); } @@ -3922,6 +3922,7 @@ static struct intel_context * execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count, unsigned long flags) { + struct drm_i915_private *i915 = siblings[0]->i915; struct virtual_engine *ve; unsigned int n; int err; @@ -3930,7 +3931,7 @@ execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count, if (!ve) return ERR_PTR(-ENOMEM); - ve->base.i915 = siblings[0]->i915; + ve->base.i915 = i915; ve->base.gt = siblings[0]->gt; ve->base.uncore = siblings[0]->uncore; ve->base.id = -1; @@ -3989,8 +3990,9 @@ execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count, GEM_BUG_ON(!is_power_of_2(sibling->mask)); if (sibling->mask & ve->base.mask) { - DRM_DEBUG("duplicate %s entry in load balancer\n", - sibling->name); + drm_dbg(&i915->drm, + "duplicate %s entry in load balancer\n", + sibling->name); err = -EINVAL; goto err_put; } @@ -4024,8 +4026,9 @@ execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count, */ if (ve->base.class != OTHER_CLASS) { if (ve->base.class != sibling->class) { - DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n", - sibling->class, ve->base.class); + drm_dbg(&i915->drm, + "invalid mixing of engine class, sibling %d, already %d\n", + sibling->class, ve->base.class); err = -EINVAL; goto err_put; } diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 2518cebbf931..8145851ad23d 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -560,7 +560,7 @@ static int init_ggtt(struct i915_ggtt *ggtt) * why. */ ggtt->pin_bias = max_t(u32, I915_GTT_PAGE_SIZE, - intel_wopcm_guc_size(&ggtt->vm.i915->wopcm)); + intel_wopcm_guc_size(&ggtt->vm.gt->wopcm)); ret = intel_vgt_balloon(ggtt); if (ret) diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c index ea775e601686..995082d45cb2 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c @@ -816,8 +816,8 @@ i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj, if (obj->bit_17 == NULL) { obj->bit_17 = bitmap_zalloc(page_count, GFP_KERNEL); if (obj->bit_17 == NULL) { - DRM_ERROR("Failed to allocate memory for bit 17 " - "record\n"); + drm_err(&to_i915(obj->base.dev)->drm, + "Failed to allocate memory for bit 17 record\n"); return; } } diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 53b8dcf7b34d..767e329e1cc5 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -55,6 +55,7 @@ void intel_gt_common_init_early(struct intel_gt *gt) seqcount_mutex_init(>->tlb.seqno, >->tlb.invalidate_lock); intel_gt_pm_init_early(gt); + intel_wopcm_init_early(>->wopcm); intel_uc_init_early(>->uc); intel_rps_init_early(>->rps); } @@ -191,7 +192,7 @@ int intel_gt_init_hw(struct intel_gt *gt) ret = i915_ppgtt_init_hw(gt); if (ret) { - DRM_ERROR("Enabling PPGTT failed (%d)\n", ret); + drm_err(&i915->drm, "Enabling PPGTT failed (%d)\n", ret); goto out; } @@ -263,7 +264,7 @@ intel_gt_clear_error_registers(struct intel_gt *gt, * some errors might have become stuck, * mask them. */ - DRM_DEBUG_DRIVER("EIR stuck: 0x%08x, masking\n", eir); + drm_dbg(>->i915->drm, "EIR stuck: 0x%08x, masking\n", eir); rmw_set(uncore, EMR, eir); intel_uncore_write(uncore, GEN2_IIR, I915_MASTER_ERROR_INTERRUPT); @@ -676,8 +677,13 @@ int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout) return -EINTR; } - return timeout ? timeout : intel_uc_wait_for_idle(>->uc, - remaining_timeout); + if (timeout) + return timeout; + + if (remaining_timeout < 0) + remaining_timeout = 0; + + return intel_uc_wait_for_idle(>->uc, remaining_timeout); } int intel_gt_init(struct intel_gt *gt) @@ -1034,9 +1040,9 @@ get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8, static int wait_for_invalidate(struct intel_gt *gt, struct reg_and_bit rb) { if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) - return intel_gt_mcr_wait_for_reg_fw(gt, rb.mcr_reg, rb.bit, 0, - TLB_INVAL_TIMEOUT_US, - TLB_INVAL_TIMEOUT_MS); + return intel_gt_mcr_wait_for_reg(gt, rb.mcr_reg, rb.bit, 0, + TLB_INVAL_TIMEOUT_US, + TLB_INVAL_TIMEOUT_MS); else return __intel_wait_for_register_fw(gt->uncore, rb.reg, rb.bit, 0, TLB_INVAL_TIMEOUT_US, @@ -1111,6 +1117,11 @@ static void mmio_invalidate_full(struct intel_gt *gt) if (!i915_mmio_reg_offset(rb.reg)) continue; + if (GRAPHICS_VER(i915) == 12 && (engine->class == VIDEO_DECODE_CLASS || + engine->class == VIDEO_ENHANCEMENT_CLASS || + engine->class == COMPUTE_CLASS)) + rb.bit = _MASKED_BIT_ENABLE(rb.bit); + intel_uncore_write_fw(uncore, rb.reg, rb.bit); } awake |= engine->mask; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c index f26882fdc24c..6f6b9e04d916 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c @@ -17,6 +17,9 @@ static void guc_irq_handler(struct intel_guc *guc, u16 iir) { + if (unlikely(!guc->interrupts.enabled)) + return; + if (iir & GUC_INTR_GUC2HOST) intel_guc_to_host_event_handler(guc); } @@ -44,8 +47,9 @@ gen11_gt_engine_identity(struct intel_gt *gt, !time_after32(local_clock() >> 10, timeout_ts)); if (unlikely(!(ident & GEN11_INTR_DATA_VALID))) { - DRM_ERROR("INTR_IDENTITY_REG%u:%u 0x%08x not valid!\n", - bank, bit, ident); + drm_err(>->i915->drm, + "INTR_IDENTITY_REG%u:%u 0x%08x not valid!\n", + bank, bit, ident); return 0; } @@ -81,35 +85,27 @@ gen11_other_irq_handler(struct intel_gt *gt, const u8 instance, instance, iir); } -static void -gen11_engine_irq_handler(struct intel_gt *gt, const u8 class, - const u8 instance, const u16 iir) +static struct intel_gt *pick_gt(struct intel_gt *gt, u8 class, u8 instance) { - struct intel_engine_cs *engine; - - /* - * Platforms with standalone media have their media engines in another - * GT. - */ - if (MEDIA_VER(gt->i915) >= 13 && - (class == VIDEO_DECODE_CLASS || class == VIDEO_ENHANCEMENT_CLASS)) { - if (!gt->i915->media_gt) - goto err; + struct intel_gt *media_gt = gt->i915->media_gt; - gt = gt->i915->media_gt; + /* we expect the non-media gt to be passed in */ + GEM_BUG_ON(gt == media_gt); + + if (!media_gt) + return gt; + + switch (class) { + case VIDEO_DECODE_CLASS: + case VIDEO_ENHANCEMENT_CLASS: + return media_gt; + case OTHER_CLASS: + if (instance == OTHER_GSC_INSTANCE && HAS_ENGINE(media_gt, GSC0)) + return media_gt; + fallthrough; + default: + return gt; } - - if (instance <= MAX_ENGINE_INSTANCE) - engine = gt->engine_class[class][instance]; - else - engine = NULL; - - if (likely(engine)) - return intel_engine_cs_irq(engine, iir); - -err: - WARN_ONCE(1, "unhandled engine interrupt class=0x%x, instance=0x%x\n", - class, instance); } static void @@ -122,8 +118,17 @@ gen11_gt_identity_handler(struct intel_gt *gt, const u32 identity) if (unlikely(!intr)) return; - if (class <= COPY_ENGINE_CLASS || class == COMPUTE_CLASS) - return gen11_engine_irq_handler(gt, class, instance, intr); + /* + * Platforms with standalone media have the media and GSC engines in + * another GT. + */ + gt = pick_gt(gt, class, instance); + + if (class <= MAX_ENGINE_CLASS && instance <= MAX_ENGINE_INSTANCE) { + struct intel_engine_cs *engine = gt->engine_class[class][instance]; + if (engine) + return intel_engine_cs_irq(engine, intr); + } if (class == OTHER_CLASS) return gen11_other_irq_handler(gt, instance, intr); @@ -206,7 +211,7 @@ void gen11_gt_irq_reset(struct intel_gt *gt) intel_uncore_write(uncore, GEN11_VCS_VECS_INTR_ENABLE, 0); if (CCS_MASK(gt)) intel_uncore_write(uncore, GEN12_CCS_RSVD_INTR_ENABLE, 0); - if (HAS_HECI_GSC(gt->i915)) + if (HAS_HECI_GSC(gt->i915) || HAS_ENGINE(gt, GSC0)) intel_uncore_write(uncore, GEN11_GUNIT_CSME_INTR_ENABLE, 0); /* Restore masks irqs on RCS, BCS, VCS and VECS engines. */ @@ -233,7 +238,7 @@ void gen11_gt_irq_reset(struct intel_gt *gt) intel_uncore_write(uncore, GEN12_CCS0_CCS1_INTR_MASK, ~0); if (HAS_ENGINE(gt, CCS2) || HAS_ENGINE(gt, CCS3)) intel_uncore_write(uncore, GEN12_CCS2_CCS3_INTR_MASK, ~0); - if (HAS_HECI_GSC(gt->i915)) + if (HAS_HECI_GSC(gt->i915) || HAS_ENGINE(gt, GSC0)) intel_uncore_write(uncore, GEN11_GUNIT_CSME_INTR_MASK, ~0); intel_uncore_write(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE, 0); @@ -249,7 +254,8 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt) { struct intel_uncore *uncore = gt->uncore; u32 irqs = GT_RENDER_USER_INTERRUPT; - const u32 gsc_mask = GSC_IRQ_INTF(0) | GSC_IRQ_INTF(1); + u32 guc_mask = intel_uc_wants_guc(>->uc) ? GUC_INTR_GUC2HOST : 0; + u32 gsc_mask = 0; u32 dmask; u32 smask; @@ -261,6 +267,11 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt) dmask = irqs << 16 | irqs; smask = irqs << 16; + if (HAS_ENGINE(gt, GSC0)) + gsc_mask = irqs; + else if (HAS_HECI_GSC(gt->i915)) + gsc_mask = GSC_IRQ_INTF(0) | GSC_IRQ_INTF(1); + BUILD_BUG_ON(irqs & 0xffff0000); /* Enable RCS, BCS, VCS and VECS class interrupts. */ @@ -268,9 +279,8 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt) intel_uncore_write(uncore, GEN11_VCS_VECS_INTR_ENABLE, dmask); if (CCS_MASK(gt)) intel_uncore_write(uncore, GEN12_CCS_RSVD_INTR_ENABLE, smask); - if (HAS_HECI_GSC(gt->i915)) - intel_uncore_write(uncore, GEN11_GUNIT_CSME_INTR_ENABLE, - gsc_mask); + if (gsc_mask) + intel_uncore_write(uncore, GEN11_GUNIT_CSME_INTR_ENABLE, gsc_mask); /* Unmask irqs on RCS, BCS, VCS and VECS engines. */ intel_uncore_write(uncore, GEN11_RCS0_RSVD_INTR_MASK, ~smask); @@ -296,9 +306,22 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt) intel_uncore_write(uncore, GEN12_CCS0_CCS1_INTR_MASK, ~dmask); if (HAS_ENGINE(gt, CCS2) || HAS_ENGINE(gt, CCS3)) intel_uncore_write(uncore, GEN12_CCS2_CCS3_INTR_MASK, ~dmask); - if (HAS_HECI_GSC(gt->i915)) + if (gsc_mask) intel_uncore_write(uncore, GEN11_GUNIT_CSME_INTR_MASK, ~gsc_mask); + if (guc_mask) { + /* the enable bit is common for both GTs but the masks are separate */ + u32 mask = gt->type == GT_MEDIA ? + REG_FIELD_PREP(ENGINE0_MASK, guc_mask) : + REG_FIELD_PREP(ENGINE1_MASK, guc_mask); + + intel_uncore_write(uncore, GEN11_GUC_SG_INTR_ENABLE, + REG_FIELD_PREP(ENGINE1_MASK, guc_mask)); + + /* we might not be the first GT to write this reg */ + intel_uncore_rmw(uncore, MTL_GUC_MGUC_INTR_MASK, mask, 0); + } + /* * RPS interrupts will get enabled/disabled on demand when RPS itself * is enabled/disabled. @@ -307,10 +330,6 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt) gt->pm_imr = ~gt->pm_ier; intel_uncore_write(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE, 0); intel_uncore_write(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK, ~0); - - /* Same thing for GuC interrupts */ - intel_uncore_write(uncore, GEN11_GUC_SG_INTR_ENABLE, 0); - intel_uncore_write(uncore, GEN11_GUC_SG_INTR_MASK, ~0); } void gen5_gt_irq_handler(struct intel_gt *gt, u32 gt_iir) @@ -359,7 +378,8 @@ void gen6_gt_irq_handler(struct intel_gt *gt, u32 gt_iir) if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT | GT_BSD_CS_ERROR_INTERRUPT | GT_CS_MASTER_ERROR_INTERRUPT)) - DRM_DEBUG("Command parser error, gt_iir 0x%08x\n", gt_iir); + drm_dbg(>->i915->drm, "Command parser error, gt_iir 0x%08x\n", + gt_iir); if (gt_iir & GT_PARITY_ERROR(gt->i915)) gen7_parity_error_irq_handler(gt, gt_iir); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c index 830edffe88cc..ea86c1ab5dc5 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c @@ -702,7 +702,7 @@ void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, unsigned int dss, } /** - * intel_gt_mcr_wait_for_reg_fw - wait until MCR register matches expected state + * intel_gt_mcr_wait_for_reg - wait until MCR register matches expected state * @gt: GT structure * @reg: the register to read * @mask: mask to apply to register value @@ -730,17 +730,19 @@ void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, unsigned int dss, * * Return: 0 if the register matches the desired condition, or -ETIMEDOUT. */ -int intel_gt_mcr_wait_for_reg_fw(struct intel_gt *gt, - i915_mcr_reg_t reg, - u32 mask, - u32 value, - unsigned int fast_timeout_us, - unsigned int slow_timeout_ms) +int intel_gt_mcr_wait_for_reg(struct intel_gt *gt, + i915_mcr_reg_t reg, + u32 mask, + u32 value, + unsigned int fast_timeout_us, + unsigned int slow_timeout_ms) { - u32 reg_value = 0; -#define done (((reg_value = intel_gt_mcr_read_any_fw(gt, reg)) & mask) == value) int ret; + lockdep_assert_not_held(>->uncore->lock); + +#define done ((intel_gt_mcr_read_any(gt, reg) & mask) == value) + /* Catch any overuse of this function */ might_sleep_if(slow_timeout_ms); GEM_BUG_ON(fast_timeout_us > 20000); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.h b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h index 3fb0502bff22..ae93b20e1c17 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h @@ -37,12 +37,12 @@ void intel_gt_mcr_report_steering(struct drm_printer *p, struct intel_gt *gt, void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, unsigned int dss, unsigned int *group, unsigned int *instance); -int intel_gt_mcr_wait_for_reg_fw(struct intel_gt *gt, - i915_mcr_reg_t reg, - u32 mask, - u32 value, - unsigned int fast_timeout_us, - unsigned int slow_timeout_ms); +int intel_gt_mcr_wait_for_reg(struct intel_gt *gt, + i915_mcr_reg_t reg, + u32 mask, + u32 value, + unsigned int fast_timeout_us, + unsigned int slow_timeout_ms); /* * Helper for for_each_ss_steering loop. On pre-Xe_HP platforms, subslice diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 3fbf70a58747..16db85fab0b1 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -20,10 +20,31 @@ #include "intel_rc6.h" #include "intel_rps.h" #include "intel_wakeref.h" +#include "intel_pcode.h" #include "pxp/intel_pxp_pm.h" #define I915_GT_SUSPEND_IDLE_TIMEOUT (HZ / 2) +static void mtl_media_busy(struct intel_gt *gt) +{ + /* Wa_14017073508: mtl */ + if (IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0) && + gt->type == GT_MEDIA) + snb_pcode_write_p(gt->uncore, PCODE_MBOX_GT_STATE, + PCODE_MBOX_GT_STATE_MEDIA_BUSY, + PCODE_MBOX_GT_STATE_DOMAIN_MEDIA, 0); +} + +static void mtl_media_idle(struct intel_gt *gt) +{ + /* Wa_14017073508: mtl */ + if (IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0) && + gt->type == GT_MEDIA) + snb_pcode_write_p(gt->uncore, PCODE_MBOX_GT_STATE, + PCODE_MBOX_GT_STATE_MEDIA_NOT_BUSY, + PCODE_MBOX_GT_STATE_DOMAIN_MEDIA, 0); +} + static void user_forcewake(struct intel_gt *gt, bool suspend) { int count = atomic_read(>->user_wakeref); @@ -71,6 +92,9 @@ static int __gt_unpark(struct intel_wakeref *wf) GT_TRACE(gt, "\n"); + /* Wa_14017073508: mtl */ + mtl_media_busy(gt); + /* * It seems that the DMC likes to transition between the DC states a lot * when there are no connected displays (no active power domains) during @@ -120,6 +144,9 @@ static int __gt_park(struct intel_wakeref *wf) GEM_BUG_ON(!wakeref); intel_display_power_put_async(i915, POWER_DOMAIN_GT_IRQ, wakeref); + /* Wa_14017073508: mtl */ + mtl_media_idle(gt); + return 0; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c index 40d0a3be42ac..83df4cd5e06c 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c @@ -83,19 +83,6 @@ static int fw_domains_show(struct seq_file *m, void *data) } DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(fw_domains); -static void print_rc6_res(struct seq_file *m, - const char *title, - const i915_reg_t reg) -{ - struct intel_gt *gt = m->private; - intel_wakeref_t wakeref; - - with_intel_runtime_pm(gt->uncore->rpm, wakeref) - seq_printf(m, "%s %u (%llu us)\n", title, - intel_uncore_read(gt->uncore, reg), - intel_rc6_residency_us(>->rc6, reg)); -} - static int vlv_drpc(struct seq_file *m) { struct intel_gt *gt = m->private; @@ -115,8 +102,8 @@ static int vlv_drpc(struct seq_file *m) seq_printf(m, "Media Power Well: %s\n", (pw_status & VLV_GTLC_PW_MEDIA_STATUS_MASK) ? "Up" : "Down"); - print_rc6_res(m, "Render RC6 residency since boot:", GEN6_GT_GFX_RC6); - print_rc6_res(m, "Media RC6 residency since boot:", VLV_GT_MEDIA_RC6); + intel_rc6_print_residency(m, "Render RC6 residency since boot:", INTEL_RC6_RES_RC6); + intel_rc6_print_residency(m, "Media RC6 residency since boot:", INTEL_RC6_RES_VLV_MEDIA); return fw_domains_show(m, NULL); } @@ -192,11 +179,11 @@ static int gen6_drpc(struct seq_file *m) } /* Not exactly sure what this is */ - print_rc6_res(m, "RC6 \"Locked to RPn\" residency since boot:", - GEN6_GT_GFX_RC6_LOCKED); - print_rc6_res(m, "RC6 residency since boot:", GEN6_GT_GFX_RC6); - print_rc6_res(m, "RC6+ residency since boot:", GEN6_GT_GFX_RC6p); - print_rc6_res(m, "RC6++ residency since boot:", GEN6_GT_GFX_RC6pp); + intel_rc6_print_residency(m, "RC6 \"Locked to RPn\" residency since boot:", + INTEL_RC6_RES_RC6_LOCKED); + intel_rc6_print_residency(m, "RC6 residency since boot:", INTEL_RC6_RES_RC6); + intel_rc6_print_residency(m, "RC6+ residency since boot:", INTEL_RC6_RES_RC6p); + intel_rc6_print_residency(m, "RC6++ residency since boot:", INTEL_RC6_RES_RC6pp); if (GRAPHICS_VER(i915) <= 7) { seq_printf(m, "RC6 voltage: %dmV\n", @@ -269,6 +256,61 @@ static int ilk_drpc(struct seq_file *m) return 0; } +static int mtl_drpc(struct seq_file *m) +{ + struct intel_gt *gt = m->private; + struct intel_uncore *uncore = gt->uncore; + u32 gt_core_status, rcctl1, mt_fwake_req; + u32 mtl_powergate_enable = 0, mtl_powergate_status = 0; + + mt_fwake_req = intel_uncore_read_fw(uncore, FORCEWAKE_MT); + gt_core_status = intel_uncore_read(uncore, MTL_MIRROR_TARGET_WP1); + + rcctl1 = intel_uncore_read(uncore, GEN6_RC_CONTROL); + mtl_powergate_enable = intel_uncore_read(uncore, GEN9_PG_ENABLE); + mtl_powergate_status = intel_uncore_read(uncore, + GEN9_PWRGT_DOMAIN_STATUS); + + seq_printf(m, "RC6 Enabled: %s\n", + str_yes_no(rcctl1 & GEN6_RC_CTL_RC6_ENABLE)); + if (gt->type == GT_MEDIA) { + seq_printf(m, "Media Well Gating Enabled: %s\n", + str_yes_no(mtl_powergate_enable & GEN9_MEDIA_PG_ENABLE)); + } else { + seq_printf(m, "Render Well Gating Enabled: %s\n", + str_yes_no(mtl_powergate_enable & GEN9_RENDER_PG_ENABLE)); + } + + seq_puts(m, "Current RC state: "); + switch (REG_FIELD_GET(MTL_CC_MASK, gt_core_status)) { + case MTL_CC0: + seq_puts(m, "RC0\n"); + break; + case MTL_CC6: + seq_puts(m, "RC6\n"); + break; + default: + MISSING_CASE(REG_FIELD_GET(MTL_CC_MASK, gt_core_status)); + seq_puts(m, "Unknown\n"); + break; + } + + seq_printf(m, "Multi-threaded Forcewake Request: 0x%x\n", mt_fwake_req); + if (gt->type == GT_MEDIA) + seq_printf(m, "Media Power Well: %s\n", + (mtl_powergate_status & + GEN9_PWRGT_MEDIA_STATUS_MASK) ? "Up" : "Down"); + else + seq_printf(m, "Render Power Well: %s\n", + (mtl_powergate_status & + GEN9_PWRGT_RENDER_STATUS_MASK) ? "Up" : "Down"); + + /* Works for both render and media gt's */ + intel_rc6_print_residency(m, "RC6 residency since boot:", INTEL_RC6_RES_RC6); + + return fw_domains_show(m, NULL); +} + static int drpc_show(struct seq_file *m, void *unused) { struct intel_gt *gt = m->private; @@ -277,7 +319,9 @@ static int drpc_show(struct seq_file *m, void *unused) int err = -ENODEV; with_intel_runtime_pm(gt->uncore->rpm, wakeref) { - if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) + err = mtl_drpc(m); + else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) err = vlv_drpc(m); else if (GRAPHICS_VER(i915) >= 6) err = gen6_drpc(m); @@ -307,7 +351,7 @@ void intel_gt_pm_frequency_dump(struct intel_gt *gt, struct drm_printer *p) drm_printf(p, "Current VID: %d\n", (rgvstat & MEMSTAT_VID_MASK) >> MEMSTAT_VID_SHIFT); drm_printf(p, "Current P-state: %d\n", - (rgvstat & MEMSTAT_PSTATE_MASK) >> MEMSTAT_PSTATE_SHIFT); + REG_FIELD_GET(MEMSTAT_PSTATE_MASK, rgvstat)); } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { u32 rpmodectl, freq_sts; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 5051d8ac7ae9..c3cd92691795 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -22,6 +22,13 @@ */ #define PERF_REG(offset) _MMIO(offset) +/* MTL workpoint reg to get core C state and actual freq of 3D, SAMedia */ +#define MTL_MIRROR_TARGET_WP1 _MMIO(0xc60) +#define MTL_CAGF_MASK REG_GENMASK(8, 0) +#define MTL_CC0 0x0 +#define MTL_CC6 0x3 +#define MTL_CC_MASK REG_GENMASK(12, 9) + /* RPM unit config (Gen8+) */ #define RPM_CONFIG0 _MMIO(0xd00) #define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT 3 @@ -644,6 +651,7 @@ #define XEHPC_GRDOM_BLT3 REG_BIT(26) #define XEHPC_GRDOM_BLT2 REG_BIT(25) #define XEHPC_GRDOM_BLT1 REG_BIT(24) +#define GEN12_GRDOM_GSC REG_BIT(21) #define GEN11_GRDOM_SFC3 REG_BIT(20) #define GEN11_GRDOM_SFC2 REG_BIT(19) #define GEN11_GRDOM_SFC1 REG_BIT(18) @@ -799,12 +807,9 @@ #define GEN6_RP_DOWN_TIMEOUT _MMIO(0xa010) #define GEN6_RP_INTERRUPT_LIMITS _MMIO(0xa014) #define GEN6_RPSTAT1 _MMIO(0xa01c) -#define GEN6_CAGF_SHIFT 8 -#define HSW_CAGF_SHIFT 7 -#define GEN9_CAGF_SHIFT 23 -#define GEN6_CAGF_MASK (0x7f << GEN6_CAGF_SHIFT) -#define HSW_CAGF_MASK (0x7f << HSW_CAGF_SHIFT) -#define GEN9_CAGF_MASK (0x1ff << GEN9_CAGF_SHIFT) +#define GEN6_CAGF_MASK REG_GENMASK(14, 8) +#define HSW_CAGF_MASK REG_GENMASK(13, 7) +#define GEN9_CAGF_MASK REG_GENMASK(31, 23) #define GEN6_RP_CONTROL _MMIO(0xa024) #define GEN6_RP_MEDIA_TURBO (1 << 11) #define GEN6_RP_MEDIA_MODE_MASK (3 << 9) @@ -1376,8 +1381,7 @@ #define MEMSTAT_ILK _MMIO(0x111f8) #define MEMSTAT_VID_MASK 0x7f00 #define MEMSTAT_VID_SHIFT 8 -#define MEMSTAT_PSTATE_MASK 0x00f8 -#define MEMSTAT_PSTATE_SHIFT 3 +#define MEMSTAT_PSTATE_MASK REG_GENMASK(7, 3) #define MEMSTAT_MON_ACTV (1 << 2) #define MEMSTAT_SRC_CTL_MASK 0x0003 #define MEMSTAT_SRC_CTL_CORE 0 @@ -1518,6 +1522,8 @@ #define FORCEWAKE_MEDIA_VLV _MMIO(0x1300b8) #define FORCEWAKE_ACK_MEDIA_VLV _MMIO(0x1300bc) +#define MTL_MEDIA_MC6 _MMIO(0x138048) + #define GEN6_GT_THREAD_STATUS_REG _MMIO(0x13805c) #define GEN6_GT_THREAD_STATUS_CORE_MASK 0x7 @@ -1549,11 +1555,13 @@ #define GEN12_RPSTAT1 _MMIO(0x1381b4) #define GEN12_VOLTAGE_MASK REG_GENMASK(10, 0) +#define GEN12_CAGF_MASK REG_GENMASK(19, 11) #define GEN11_GT_INTR_DW(x) _MMIO(0x190018 + ((x) * 4)) #define GEN11_CSME (31) #define GEN11_GUNIT (28) #define GEN11_GUC (25) +#define MTL_MGUC (24) #define GEN11_WDPERF (20) #define GEN11_KCR (19) #define GEN11_GTPM (16) @@ -1608,6 +1616,7 @@ #define GEN11_VECS0_VECS1_INTR_MASK _MMIO(0x1900d0) #define GEN12_VECS2_VECS3_INTR_MASK _MMIO(0x1900d4) #define GEN11_GUC_SG_INTR_MASK _MMIO(0x1900e8) +#define MTL_GUC_MGUC_INTR_MASK _MMIO(0x1900e8) /* MTL+ */ #define GEN11_GPM_WGBOXPERF_INTR_MASK _MMIO(0x1900ec) #define GEN11_CRYPTO_RSVD_INTR_MASK _MMIO(0x1900f0) #define GEN11_GUNIT_CSME_INTR_MASK _MMIO(0x1900f4) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c index edb881d75630..1dfd01668c79 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c @@ -199,7 +199,7 @@ out_active: spin_lock(&timelines->lock); if (remaining_timeout) *remaining_timeout = timeout; - return active_count ? timeout : 0; + return active_count ? timeout ?: -ETIME : 0; } static void retire_work_handler(struct work_struct *work) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c index 2b5f05b31187..cf71305ad586 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c @@ -165,13 +165,13 @@ sysfs_gt_attribute_r_func(struct kobject *kobj, struct attribute *attr, INTEL_GT_ATTR_RO(_name) #ifdef CONFIG_PM -static u32 get_residency(struct intel_gt *gt, i915_reg_t reg) +static u32 get_residency(struct intel_gt *gt, enum intel_rc6_res_type id) { intel_wakeref_t wakeref; u64 res = 0; with_intel_runtime_pm(gt->uncore->rpm, wakeref) - res = intel_rc6_residency_us(>->rc6, reg); + res = intel_rc6_residency_us(>->rc6, id); return DIV_ROUND_CLOSEST_ULL(res, 1000); } @@ -210,22 +210,22 @@ static ssize_t rc6_enable_dev_show(struct device *dev, static u32 __rc6_residency_ms_show(struct intel_gt *gt) { - return get_residency(gt, GEN6_GT_GFX_RC6); + return get_residency(gt, INTEL_RC6_RES_RC6); } static u32 __rc6p_residency_ms_show(struct intel_gt *gt) { - return get_residency(gt, GEN6_GT_GFX_RC6p); + return get_residency(gt, INTEL_RC6_RES_RC6p); } static u32 __rc6pp_residency_ms_show(struct intel_gt *gt) { - return get_residency(gt, GEN6_GT_GFX_RC6pp); + return get_residency(gt, INTEL_RC6_RES_RC6pp); } static u32 __media_rc6_residency_ms_show(struct intel_gt *gt) { - return get_residency(gt, VLV_GT_MEDIA_RC6); + return get_residency(gt, INTEL_RC6_RES_VLV_MEDIA); } INTEL_GT_SYSFS_SHOW_MIN(rc6_residency_ms); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index a0cc73b401ef..c1d9cd255e06 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -31,6 +31,7 @@ #include "intel_migrate_types.h" #include "intel_wakeref.h" #include "pxp/intel_pxp_types.h" +#include "intel_wopcm.h" struct drm_i915_private; struct i915_ggtt; @@ -101,6 +102,7 @@ struct intel_gt { struct intel_uc uc; struct intel_gsc gsc; + struct intel_wopcm wopcm; struct { /* Serialize global tlb invalidations */ diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c index b405a04135ca..5fb74e71f27b 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.c +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c @@ -342,6 +342,16 @@ static int emit_no_arbitration(struct i915_request *rq) return 0; } +static int max_pte_pkt_size(struct i915_request *rq, int pkt) +{ + struct intel_ring *ring = rq->ring; + + pkt = min_t(int, pkt, (ring->space - rq->reserved_space) / sizeof(u32) + 5); + pkt = min_t(int, pkt, (ring->size - ring->emit) / sizeof(u32) + 5); + + return pkt; +} + static int emit_pte(struct i915_request *rq, struct sgt_dma *it, enum i915_cache_level cache_level, @@ -388,8 +398,7 @@ static int emit_pte(struct i915_request *rq, return PTR_ERR(cs); /* Pack as many PTE updates as possible into a single MI command */ - pkt = min_t(int, dword_length, ring->space / sizeof(u32) + 5); - pkt = min_t(int, pkt, (ring->size - ring->emit) / sizeof(u32) + 5); + pkt = max_pte_pkt_size(rq, dword_length); hdr = cs; *cs++ = MI_STORE_DATA_IMM | REG_BIT(21); /* as qword elements */ @@ -422,8 +431,7 @@ static int emit_pte(struct i915_request *rq, } } - pkt = min_t(int, dword_rem, ring->space / sizeof(u32) + 5); - pkt = min_t(int, pkt, (ring->size - ring->emit) / sizeof(u32) + 5); + pkt = max_pte_pkt_size(rq, dword_rem); hdr = cs; *cs++ = MI_STORE_DATA_IMM | REG_BIT(21); @@ -829,14 +837,35 @@ intel_context_migrate_copy(struct intel_context *ce, if (err) goto out_rq; - /* - * While we can't always restore/manage the CCS state, - * we still need to ensure we don't leak the CCS state - * from the previous user, so make sure we overwrite it - * with something. - */ - err = emit_copy_ccs(rq, dst_offset, INDIRECT_ACCESS, - dst_offset, DIRECT_ACCESS, len); + if (src_is_lmem) { + /* + * If the src is already in lmem, then we must + * be doing an lmem -> lmem transfer, and so + * should be safe to directly copy the CCS + * state. In this case we have either + * initialised the CCS aux state when first + * clearing the pages (since it is already + * allocated in lmem), or the user has + * potentially populated it, in which case we + * need to copy the CCS state as-is. + */ + err = emit_copy_ccs(rq, + dst_offset, INDIRECT_ACCESS, + src_offset, INDIRECT_ACCESS, + len); + } else { + /* + * While we can't always restore/manage the CCS + * state, we still need to ensure we don't leak + * the CCS state from the previous user, so make + * sure we overwrite it with something. + */ + err = emit_copy_ccs(rq, + dst_offset, INDIRECT_ACCESS, + dst_offset, DIRECT_ACCESS, + len); + } + if (err) goto out_rq; diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index f8d0523f4c18..2ee4051e4d96 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -551,6 +551,23 @@ static void __intel_rc6_disable(struct intel_rc6 *rc6) intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); } +static void rc6_res_reg_init(struct intel_rc6 *rc6) +{ + memset(rc6->res_reg, INVALID_MMIO_REG.reg, sizeof(rc6->res_reg)); + + switch (rc6_to_gt(rc6)->type) { + case GT_MEDIA: + rc6->res_reg[INTEL_RC6_RES_RC6] = MTL_MEDIA_MC6; + break; + default: + rc6->res_reg[INTEL_RC6_RES_RC6_LOCKED] = GEN6_GT_GFX_RC6_LOCKED; + rc6->res_reg[INTEL_RC6_RES_RC6] = GEN6_GT_GFX_RC6; + rc6->res_reg[INTEL_RC6_RES_RC6p] = GEN6_GT_GFX_RC6p; + rc6->res_reg[INTEL_RC6_RES_RC6pp] = GEN6_GT_GFX_RC6pp; + break; + } +} + void intel_rc6_init(struct intel_rc6 *rc6) { struct drm_i915_private *i915 = rc6_to_i915(rc6); @@ -562,6 +579,8 @@ void intel_rc6_init(struct intel_rc6 *rc6) if (!rc6_supported(rc6)) return; + rc6_res_reg_init(rc6); + if (IS_CHERRYVIEW(i915)) err = chv_rc6_init(rc6); else if (IS_VALLEYVIEW(i915)) @@ -736,31 +755,19 @@ static u64 vlv_residency_raw(struct intel_uncore *uncore, const i915_reg_t reg) return lower | (u64)upper << 8; } -u64 intel_rc6_residency_ns(struct intel_rc6 *rc6, const i915_reg_t reg) +u64 intel_rc6_residency_ns(struct intel_rc6 *rc6, enum intel_rc6_res_type id) { struct drm_i915_private *i915 = rc6_to_i915(rc6); struct intel_uncore *uncore = rc6_to_uncore(rc6); u64 time_hw, prev_hw, overflow_hw; + i915_reg_t reg = rc6->res_reg[id]; unsigned int fw_domains; unsigned long flags; - unsigned int i; u32 mul, div; if (!rc6->supported) return 0; - /* - * Store previous hw counter values for counter wrap-around handling. - * - * There are only four interesting registers and they live next to each - * other so we can use the relative address, compared to the smallest - * one as the index into driver storage. - */ - i = (i915_mmio_reg_offset(reg) - - i915_mmio_reg_offset(GEN6_GT_GFX_RC6_LOCKED)) / sizeof(u32); - if (drm_WARN_ON_ONCE(&i915->drm, i >= ARRAY_SIZE(rc6->cur_residency))) - return 0; - fw_domains = intel_uncore_forcewake_for_reg(uncore, reg, FW_REG_READ); spin_lock_irqsave(&uncore->lock, flags); @@ -789,11 +796,11 @@ u64 intel_rc6_residency_ns(struct intel_rc6 *rc6, const i915_reg_t reg) /* * Counter wrap handling. * - * But relying on a sufficient frequency of queries otherwise counters - * can still wrap. + * Store previous hw counter values for counter wrap-around handling. But + * relying on a sufficient frequency of queries otherwise counters can still wrap. */ - prev_hw = rc6->prev_hw_residency[i]; - rc6->prev_hw_residency[i] = time_hw; + prev_hw = rc6->prev_hw_residency[id]; + rc6->prev_hw_residency[id] = time_hw; /* RC6 delta from last sample. */ if (time_hw >= prev_hw) @@ -802,8 +809,8 @@ u64 intel_rc6_residency_ns(struct intel_rc6 *rc6, const i915_reg_t reg) time_hw += overflow_hw - prev_hw; /* Add delta to RC6 extended raw driver copy. */ - time_hw += rc6->cur_residency[i]; - rc6->cur_residency[i] = time_hw; + time_hw += rc6->cur_residency[id]; + rc6->cur_residency[id] = time_hw; intel_uncore_forcewake_put__locked(uncore, fw_domains); spin_unlock_irqrestore(&uncore->lock, flags); @@ -811,9 +818,22 @@ u64 intel_rc6_residency_ns(struct intel_rc6 *rc6, const i915_reg_t reg) return mul_u64_u32_div(time_hw, mul, div); } -u64 intel_rc6_residency_us(struct intel_rc6 *rc6, i915_reg_t reg) +u64 intel_rc6_residency_us(struct intel_rc6 *rc6, enum intel_rc6_res_type id) +{ + return DIV_ROUND_UP_ULL(intel_rc6_residency_ns(rc6, id), 1000); +} + +void intel_rc6_print_residency(struct seq_file *m, const char *title, + enum intel_rc6_res_type id) { - return DIV_ROUND_UP_ULL(intel_rc6_residency_ns(rc6, reg), 1000); + struct intel_gt *gt = m->private; + i915_reg_t reg = gt->rc6.res_reg[id]; + intel_wakeref_t wakeref; + + with_intel_runtime_pm(gt->uncore->rpm, wakeref) + seq_printf(m, "%s %u (%llu us)\n", title, + intel_uncore_read(gt->uncore, reg), + intel_rc6_residency_us(>->rc6, id)); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.h b/drivers/gpu/drm/i915/gt/intel_rc6.h index b6fea71afc22..456fa668a276 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.h +++ b/drivers/gpu/drm/i915/gt/intel_rc6.h @@ -6,10 +6,11 @@ #ifndef INTEL_RC6_H #define INTEL_RC6_H -#include "i915_reg_defs.h" +#include <linux/types.h> -struct intel_engine_cs; +enum intel_rc6_res_type; struct intel_rc6; +struct seq_file; void intel_rc6_init(struct intel_rc6 *rc6); void intel_rc6_fini(struct intel_rc6 *rc6); @@ -21,7 +22,9 @@ void intel_rc6_sanitize(struct intel_rc6 *rc6); void intel_rc6_enable(struct intel_rc6 *rc6); void intel_rc6_disable(struct intel_rc6 *rc6); -u64 intel_rc6_residency_ns(struct intel_rc6 *rc6, i915_reg_t reg); -u64 intel_rc6_residency_us(struct intel_rc6 *rc6, i915_reg_t reg); +u64 intel_rc6_residency_ns(struct intel_rc6 *rc6, enum intel_rc6_res_type id); +u64 intel_rc6_residency_us(struct intel_rc6 *rc6, enum intel_rc6_res_type id); +void intel_rc6_print_residency(struct seq_file *m, const char *title, + enum intel_rc6_res_type id); #endif /* INTEL_RC6_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_rc6_types.h b/drivers/gpu/drm/i915/gt/intel_rc6_types.h index e747492b2f46..fa23c4dce00b 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6_types.h +++ b/drivers/gpu/drm/i915/gt/intel_rc6_types.h @@ -13,9 +13,20 @@ struct drm_i915_gem_object; +/* RC6 residency types */ +enum intel_rc6_res_type { + INTEL_RC6_RES_RC6_LOCKED, + INTEL_RC6_RES_RC6, + INTEL_RC6_RES_RC6p, + INTEL_RC6_RES_RC6pp, + INTEL_RC6_RES_MAX, + INTEL_RC6_RES_VLV_MEDIA = INTEL_RC6_RES_RC6p, +}; + struct intel_rc6 { - u64 prev_hw_residency[4]; - u64 cur_residency[4]; + i915_reg_t res_reg[INTEL_RC6_RES_MAX]; + u64 prev_hw_residency[INTEL_RC6_RES_MAX]; + u64 cur_residency[INTEL_RC6_RES_MAX]; u32 ctl_enable; diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c b/drivers/gpu/drm/i915/gt/intel_renderstate.c index 5121e6dc2fa5..9c1ae070ee7b 100644 --- a/drivers/gpu/drm/i915/gt/intel_renderstate.c +++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c @@ -215,9 +215,7 @@ int intel_renderstate_emit(struct intel_renderstate *so, if (!so->vma) return 0; - err = i915_request_await_object(rq, so->vma->obj, false); - if (err == 0) - err = i915_vma_move_to_active(so->vma, rq, 0); + err = i915_vma_move_to_active(so->vma, rq, 0); if (err) return err; diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 3159df6cdd49..24736ebee17c 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -1407,15 +1407,19 @@ out: intel_runtime_pm_put(gt->uncore->rpm, wakeref); } -int intel_gt_reset_trylock(struct intel_gt *gt, int *srcu) +static int _intel_gt_reset_lock(struct intel_gt *gt, int *srcu, bool retry) { might_lock(>->reset.backoff_srcu); - might_sleep(); + if (retry) + might_sleep(); rcu_read_lock(); while (test_bit(I915_RESET_BACKOFF, >->reset.flags)) { rcu_read_unlock(); + if (!retry) + return -EBUSY; + if (wait_event_interruptible(gt->reset.queue, !test_bit(I915_RESET_BACKOFF, >->reset.flags))) @@ -1429,6 +1433,16 @@ int intel_gt_reset_trylock(struct intel_gt *gt, int *srcu) return 0; } +int intel_gt_reset_trylock(struct intel_gt *gt, int *srcu) +{ + return _intel_gt_reset_lock(gt, srcu, false); +} + +int intel_gt_reset_lock_interruptible(struct intel_gt *gt, int *srcu) +{ + return _intel_gt_reset_lock(gt, srcu, true); +} + void intel_gt_reset_unlock(struct intel_gt *gt, int tag) __releases(>->reset.backoff_srcu) { diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h index adc734e67387..25c975b6e8fc 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.h +++ b/drivers/gpu/drm/i915/gt/intel_reset.h @@ -39,6 +39,7 @@ int __intel_engine_reset_bh(struct intel_engine_cs *engine, void __i915_request_reset(struct i915_request *rq, bool guilty); int __must_check intel_gt_reset_trylock(struct intel_gt *gt, int *srcu); +int __must_check intel_gt_reset_lock_interruptible(struct intel_gt *gt, int *srcu); void intel_gt_reset_unlock(struct intel_gt *gt, int tag); void intel_gt_set_wedged(struct intel_gt *gt); diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 6c34a83c24b3..9ad3bc7201cb 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -430,7 +430,8 @@ static int __gen5_rps_set(struct intel_rps *rps, u8 val) rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); if (rgvswctl & MEMCTL_CMD_STS) { - DRM_DEBUG("gpu busy, RCS change rejected\n"); + drm_dbg(&rps_to_i915(rps)->drm, + "gpu busy, RCS change rejected\n"); return -EBUSY; /* still busy with another command */ } @@ -1953,7 +1954,8 @@ void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) intel_engine_cs_irq(gt->engine[VECS0], pm_iir >> 10); if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT) - DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir); + drm_dbg(&rps_to_i915(rps)->drm, + "Command parser error, pm_iir 0x%08x\n", pm_iir); } void gen5_rps_irq_handler(struct intel_rps *rps) @@ -2072,22 +2074,45 @@ void intel_rps_sanitize(struct intel_rps *rps) rps_disable_interrupts(rps); } +u32 intel_rps_read_rpstat_fw(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + i915_reg_t rpstat; + + rpstat = (GRAPHICS_VER(i915) >= 12) ? GEN12_RPSTAT1 : GEN6_RPSTAT1; + + return intel_uncore_read_fw(rps_to_gt(rps)->uncore, rpstat); +} + +u32 intel_rps_read_rpstat(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + i915_reg_t rpstat; + + rpstat = (GRAPHICS_VER(i915) >= 12) ? GEN12_RPSTAT1 : GEN6_RPSTAT1; + + return intel_uncore_read(rps_to_gt(rps)->uncore, rpstat); +} + u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat) { struct drm_i915_private *i915 = rps_to_i915(rps); u32 cagf; - if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) - cagf = (rpstat >> 8) & 0xff; + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) + cagf = REG_FIELD_GET(MTL_CAGF_MASK, rpstat); + else if (GRAPHICS_VER(i915) >= 12) + cagf = REG_FIELD_GET(GEN12_CAGF_MASK, rpstat); + else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) + cagf = REG_FIELD_GET(RPE_MASK, rpstat); else if (GRAPHICS_VER(i915) >= 9) - cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT; + cagf = REG_FIELD_GET(GEN9_CAGF_MASK, rpstat); else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) - cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT; + cagf = REG_FIELD_GET(HSW_CAGF_MASK, rpstat); else if (GRAPHICS_VER(i915) >= 6) - cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT; + cagf = REG_FIELD_GET(GEN6_CAGF_MASK, rpstat); else - cagf = gen5_invert_freq(rps, (rpstat & MEMSTAT_PSTATE_MASK) >> - MEMSTAT_PSTATE_SHIFT); + cagf = gen5_invert_freq(rps, REG_FIELD_GET(MEMSTAT_PSTATE_MASK, rpstat)); return cagf; } @@ -2098,7 +2123,15 @@ static u32 read_cagf(struct intel_rps *rps) struct intel_uncore *uncore = rps_to_uncore(rps); u32 freq; - if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { + /* + * For Gen12+ reading freq from HW does not need a forcewake and + * registers will return 0 freq when GT is in RC6 + */ + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) { + freq = intel_uncore_read(uncore, MTL_MIRROR_TARGET_WP1); + } else if (GRAPHICS_VER(i915) >= 12) { + freq = intel_uncore_read(uncore, GEN12_RPSTAT1); + } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { vlv_punit_get(i915); freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); vlv_punit_put(i915); @@ -2264,7 +2297,7 @@ static void rps_frequency_dump(struct intel_rps *rps, struct drm_printer *p) rpinclimit = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD); rpdeclimit = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD); - rpstat = intel_uncore_read(uncore, GEN6_RPSTAT1); + rpstat = intel_rps_read_rpstat(rps); rpcurupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK; rpcurup = intel_uncore_read(uncore, GEN6_RP_CUR_UP) & GEN6_CURBSYTAVG_MASK; rpprevup = intel_uncore_read(uncore, GEN6_RP_PREV_UP) & GEN6_CURBSYTAVG_MASK; @@ -2399,7 +2432,7 @@ static void slpc_frequency_dump(struct intel_rps *rps, struct drm_printer *p) drm_printf(p, "PM MASK=0x%08x\n", pm_mask); drm_printf(p, "pm_intrmsk_mbz: 0x%08x\n", rps->pm_intrmsk_mbz); - drm_printf(p, "RPSTAT1: 0x%08x\n", intel_uncore_read(uncore, GEN6_RPSTAT1)); + drm_printf(p, "RPSTAT1: 0x%08x\n", intel_rps_read_rpstat(rps)); drm_printf(p, "RPNSWREQ: %dMHz\n", intel_rps_get_requested_frequency(rps)); drm_printf(p, "Lowest (RPN) frequency: %dMHz\n", intel_gpu_freq(rps, caps.min_freq)); diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h index 110300dfd438..9e1cad9ba0e9 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.h +++ b/drivers/gpu/drm/i915/gt/intel_rps.h @@ -48,6 +48,8 @@ u32 intel_rps_get_rp1_frequency(struct intel_rps *rps); u32 intel_rps_get_rpn_frequency(struct intel_rps *rps); u32 intel_rps_read_punit_req(struct intel_rps *rps); u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps); +u32 intel_rps_read_rpstat(struct intel_rps *rps); +u32 intel_rps_read_rpstat_fw(struct intel_rps *rps); void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps); void intel_rps_raise_unslice(struct intel_rps *rps); void intel_rps_lower_unslice(struct intel_rps *rps); diff --git a/drivers/gpu/drm/i915/intel_wopcm.c b/drivers/gpu/drm/i915/gt/intel_wopcm.c index 322fb9eeb880..7ebbcc191c2d 100644 --- a/drivers/gpu/drm/i915/intel_wopcm.c +++ b/drivers/gpu/drm/i915/gt/intel_wopcm.c @@ -64,9 +64,9 @@ #define GEN9_GUC_FW_RESERVED SZ_128K #define GEN9_GUC_WOPCM_OFFSET (GUC_WOPCM_RESERVED + GEN9_GUC_FW_RESERVED) -static inline struct drm_i915_private *wopcm_to_i915(struct intel_wopcm *wopcm) +static inline struct intel_gt *wopcm_to_gt(struct intel_wopcm *wopcm) { - return container_of(wopcm, struct drm_i915_private, wopcm); + return container_of(wopcm, struct intel_gt, wopcm); } /** @@ -77,7 +77,8 @@ static inline struct drm_i915_private *wopcm_to_i915(struct intel_wopcm *wopcm) */ void intel_wopcm_init_early(struct intel_wopcm *wopcm) { - struct drm_i915_private *i915 = wopcm_to_i915(wopcm); + struct intel_gt *gt = wopcm_to_gt(wopcm); + struct drm_i915_private *i915 = gt->i915; if (!HAS_GT_UC(i915)) return; @@ -157,10 +158,11 @@ static bool check_hw_restrictions(struct drm_i915_private *i915, return true; } -static bool __check_layout(struct drm_i915_private *i915, u32 wopcm_size, +static bool __check_layout(struct intel_gt *gt, u32 wopcm_size, u32 guc_wopcm_base, u32 guc_wopcm_size, u32 guc_fw_size, u32 huc_fw_size) { + struct drm_i915_private *i915 = gt->i915; const u32 ctx_rsvd = context_reserved_size(i915); u32 size; @@ -181,12 +183,14 @@ static bool __check_layout(struct drm_i915_private *i915, u32 wopcm_size, return false; } - size = huc_fw_size + WOPCM_RESERVED_SIZE; - if (unlikely(guc_wopcm_base < size)) { - drm_err(&i915->drm, "WOPCM: no space for %s: %uK < %uK\n", - intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_HUC), - guc_wopcm_base / SZ_1K, size / SZ_1K); - return false; + if (intel_uc_supports_huc(>->uc)) { + size = huc_fw_size + WOPCM_RESERVED_SIZE; + if (unlikely(guc_wopcm_base < size)) { + drm_err(&i915->drm, "WOPCM: no space for %s: %uK < %uK\n", + intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_HUC), + guc_wopcm_base / SZ_1K, size / SZ_1K); + return false; + } } return check_hw_restrictions(i915, guc_wopcm_base, guc_wopcm_size, @@ -228,8 +232,8 @@ static bool __wopcm_regs_writable(struct intel_uncore *uncore) */ void intel_wopcm_init(struct intel_wopcm *wopcm) { - struct drm_i915_private *i915 = wopcm_to_i915(wopcm); - struct intel_gt *gt = to_gt(i915); + struct intel_gt *gt = wopcm_to_gt(wopcm); + struct drm_i915_private *i915 = gt->i915; u32 guc_fw_size = intel_uc_fw_get_upload_size(>->uc.guc.fw); u32 huc_fw_size = intel_uc_fw_get_upload_size(>->uc.huc.fw); u32 ctx_rsvd = context_reserved_size(i915); @@ -275,6 +279,19 @@ void intel_wopcm_init(struct intel_wopcm *wopcm) } /* + * On platforms with a media GT, the WOPCM is partitioned between the + * two GTs, so we would have to take that into account when doing the + * math below. There is also a new section reserved for the GSC context + * that would have to be factored in. However, all platforms with a + * media GT also have GuC depriv enabled, so the WOPCM regs are + * pre-locked and therefore we don't have to do the math ourselves. + */ + if (unlikely(i915->media_gt)) { + drm_err(&i915->drm, "Unlocked WOPCM regs with media GT\n"); + return; + } + + /* * Aligned value of guc_wopcm_base will determine available WOPCM space * for HuC firmware and mandatory reserved area. */ @@ -295,7 +312,7 @@ void intel_wopcm_init(struct intel_wopcm *wopcm) guc_wopcm_base / SZ_1K, guc_wopcm_size / SZ_1K); check: - if (__check_layout(i915, wopcm_size, guc_wopcm_base, guc_wopcm_size, + if (__check_layout(gt, wopcm_size, guc_wopcm_base, guc_wopcm_size, guc_fw_size, huc_fw_size)) { wopcm->guc.base = guc_wopcm_base; wopcm->guc.size = guc_wopcm_size; diff --git a/drivers/gpu/drm/i915/intel_wopcm.h b/drivers/gpu/drm/i915/gt/intel_wopcm.h index 17d6aa86008a..17d6aa86008a 100644 --- a/drivers/gpu/drm/i915/intel_wopcm.h +++ b/drivers/gpu/drm/i915/gt/intel_wopcm.h diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index ffc80d2e9952..2afb4f80a954 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -18,46 +18,68 @@ /** * DOC: Hardware workarounds * - * This file is intended as a central place to implement most [1]_ of the - * required workarounds for hardware to work as originally intended. They fall - * in five basic categories depending on how/when they are applied: + * Hardware workarounds are register programming documented to be executed in + * the driver that fall outside of the normal programming sequences for a + * platform. There are some basic categories of workarounds, depending on + * how/when they are applied: * - * - Workarounds that touch registers that are saved/restored to/from the HW - * context image. The list is emitted (via Load Register Immediate commands) - * everytime a new context is created. - * - GT workarounds. The list of these WAs is applied whenever these registers - * revert to default values (on GPU reset, suspend/resume [2]_, etc..). - * - Display workarounds. The list is applied during display clock-gating - * initialization. - * - Workarounds that whitelist a privileged register, so that UMDs can manage - * them directly. This is just a special case of a MMMIO workaround (as we - * write the list of these to/be-whitelisted registers to some special HW - * registers). - * - Workaround batchbuffers, that get executed automatically by the hardware - * on every HW context restore. + * - Context workarounds: workarounds that touch registers that are + * saved/restored to/from the HW context image. The list is emitted (via Load + * Register Immediate commands) once when initializing the device and saved in + * the default context. That default context is then used on every context + * creation to have a "primed golden context", i.e. a context image that + * already contains the changes needed to all the registers. * - * .. [1] Please notice that there are other WAs that, due to their nature, - * cannot be applied from a central place. Those are peppered around the rest - * of the code, as needed. + * - Engine workarounds: the list of these WAs is applied whenever the specific + * engine is reset. It's also possible that a set of engine classes share a + * common power domain and they are reset together. This happens on some + * platforms with render and compute engines. In this case (at least) one of + * them need to keeep the workaround programming: the approach taken in the + * driver is to tie those workarounds to the first compute/render engine that + * is registered. When executing with GuC submission, engine resets are + * outside of kernel driver control, hence the list of registers involved in + * written once, on engine initialization, and then passed to GuC, that + * saves/restores their values before/after the reset takes place. See + * ``drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c`` for reference. * - * .. [2] Technically, some registers are powercontext saved & restored, so they - * survive a suspend/resume. In practice, writing them again is not too - * costly and simplifies things. We can revisit this in the future. + * - GT workarounds: the list of these WAs is applied whenever these registers + * revert to their default values: on GPU reset, suspend/resume [1]_, etc. + * + * - Register whitelist: some workarounds need to be implemented in userspace, + * but need to touch privileged registers. The whitelist in the kernel + * instructs the hardware to allow the access to happen. From the kernel side, + * this is just a special case of a MMIO workaround (as we write the list of + * these to/be-whitelisted registers to some special HW registers). + * + * - Workaround batchbuffers: buffers that get executed automatically by the + * hardware on every HW context restore. These buffers are created and + * programmed in the default context so the hardware always go through those + * programming sequences when switching contexts. The support for workaround + * batchbuffers is enabled these hardware mechanisms: * - * Layout - * ~~~~~~ + * #. INDIRECT_CTX: A batchbuffer and an offset are provided in the default + * context, pointing the hardware to jump to that location when that offset + * is reached in the context restore. Workaround batchbuffer in the driver + * currently uses this mechanism for all platforms. * - * Keep things in this file ordered by WA type, as per the above (context, GT, - * display, register whitelist, batchbuffer). Then, inside each type, keep the - * following order: + * #. BB_PER_CTX_PTR: A batchbuffer is provided in the default context, + * pointing the hardware to a buffer to continue executing after the + * engine registers are restored in a context restore sequence. This is + * currently not used in the driver. * - * - Infrastructure functions and macros - * - WAs per platform in standard gen/chrono order - * - Public functions to init or apply the given workaround type. + * - Other: There are WAs that, due to their nature, cannot be applied from a + * central place. Those are peppered around the rest of the code, as needed. + * Workarounds related to the display IP are the main example. + * + * .. [1] Technically, some registers are powercontext saved & restored, so they + * survive a suspend/resume. In practice, writing them again is not too + * costly and simplifies things, so it's the approach taken in the driver. */ -static void wa_init_start(struct i915_wa_list *wal, const char *name, const char *engine_name) +static void wa_init_start(struct i915_wa_list *wal, struct intel_gt *gt, + const char *name, const char *engine_name) { + wal->gt = gt; wal->name = name; wal->engine_name = engine_name; } @@ -81,13 +103,14 @@ static void wa_init_finish(struct i915_wa_list *wal) if (!wal->count) return; - DRM_DEBUG_DRIVER("Initialized %u %s workarounds on %s\n", - wal->wa_count, wal->name, wal->engine_name); + drm_dbg(&wal->gt->i915->drm, "Initialized %u %s workarounds on %s\n", + wal->wa_count, wal->name, wal->engine_name); } static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa) { unsigned int addr = i915_mmio_reg_offset(wa->reg); + struct drm_i915_private *i915 = wal->gt->i915; unsigned int start = 0, end = wal->count; const unsigned int grow = WA_LIST_CHUNK; struct i915_wa *wa_; @@ -100,7 +123,7 @@ static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa) list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa), GFP_KERNEL); if (!list) { - DRM_ERROR("No space for workaround init!\n"); + drm_err(&i915->drm, "No space for workaround init!\n"); return; } @@ -123,9 +146,10 @@ static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa) wa_ = &wal->list[mid]; if ((wa->clr | wa_->clr) && !(wa->clr & ~wa_->clr)) { - DRM_ERROR("Discarding overwritten w/a for reg %04x (clear: %08x, set: %08x)\n", - i915_mmio_reg_offset(wa_->reg), - wa_->clr, wa_->set); + drm_err(&i915->drm, + "Discarding overwritten w/a for reg %04x (clear: %08x, set: %08x)\n", + i915_mmio_reg_offset(wa_->reg), + wa_->clr, wa_->set); wa_->set &= ~wa->clr; } @@ -827,7 +851,7 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, { struct drm_i915_private *i915 = engine->i915; - wa_init_start(wal, name, engine->name); + wa_init_start(wal, engine->gt, name, engine->name); /* Applies to all engines */ /* @@ -1677,7 +1701,7 @@ void intel_gt_init_workarounds(struct intel_gt *gt) { struct i915_wa_list *wal = >->wa_list; - wa_init_start(wal, "GT", "global"); + wa_init_start(wal, gt, "GT", "global"); gt_init_workarounds(gt, wal); wa_init_finish(wal); } @@ -1699,12 +1723,14 @@ wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal) } static bool -wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from) +wa_verify(struct intel_gt *gt, const struct i915_wa *wa, u32 cur, + const char *name, const char *from) { if ((cur ^ wa->set) & wa->read) { - DRM_ERROR("%s workaround lost on %s! (reg[%x]=0x%x, relevant bits were 0x%x vs expected 0x%x)\n", - name, from, i915_mmio_reg_offset(wa->reg), - cur, cur & wa->read, wa->set & wa->read); + drm_err(>->i915->drm, + "%s workaround lost on %s! (reg[%x]=0x%x, relevant bits were 0x%x vs expected 0x%x)\n", + name, from, i915_mmio_reg_offset(wa->reg), + cur, cur & wa->read, wa->set & wa->read); return false; } @@ -1712,9 +1738,9 @@ wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from) return true; } -static void -wa_list_apply(struct intel_gt *gt, const struct i915_wa_list *wal) +static void wa_list_apply(const struct i915_wa_list *wal) { + struct intel_gt *gt = wal->gt; struct intel_uncore *uncore = gt->uncore; enum forcewake_domains fw; unsigned long flags; @@ -1750,7 +1776,7 @@ wa_list_apply(struct intel_gt *gt, const struct i915_wa_list *wal) intel_gt_mcr_read_any_fw(gt, wa->mcr_reg) : intel_uncore_read_fw(uncore, wa->reg); - wa_verify(wa, val, wal->name, "application"); + wa_verify(gt, wa, val, wal->name, "application"); } } @@ -1760,7 +1786,7 @@ wa_list_apply(struct intel_gt *gt, const struct i915_wa_list *wal) void intel_gt_apply_workarounds(struct intel_gt *gt) { - wa_list_apply(gt, >->wa_list); + wa_list_apply(>->wa_list); } static bool wa_list_verify(struct intel_gt *gt, @@ -1780,7 +1806,7 @@ static bool wa_list_verify(struct intel_gt *gt, intel_uncore_forcewake_get__locked(uncore, fw); for (i = 0, wa = wal->list; i < wal->count; i++, wa++) - ok &= wa_verify(wa, wa->is_mcr ? + ok &= wa_verify(wal->gt, wa, wa->is_mcr ? intel_gt_mcr_read_any_fw(gt, wa->mcr_reg) : intel_uncore_read_fw(uncore, wa->reg), wal->name, from); @@ -2128,7 +2154,7 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine) struct drm_i915_private *i915 = engine->i915; struct i915_wa_list *w = &engine->whitelist; - wa_init_start(w, "whitelist", engine->name); + wa_init_start(w, engine->gt, "whitelist", engine->name); if (IS_PONTEVECCHIO(i915)) pvc_whitelist_build(engine); @@ -2985,7 +3011,7 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li static void engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal) { - if (I915_SELFTEST_ONLY(GRAPHICS_VER(engine->i915) < 4)) + if (GRAPHICS_VER(engine->i915) < 4) return; engine_fake_wa_init(engine, wal); @@ -3010,17 +3036,14 @@ void intel_engine_init_workarounds(struct intel_engine_cs *engine) { struct i915_wa_list *wal = &engine->wa_list; - if (GRAPHICS_VER(engine->i915) < 4) - return; - - wa_init_start(wal, "engine", engine->name); + wa_init_start(wal, engine->gt, "engine", engine->name); engine_init_workarounds(engine, wal); wa_init_finish(wal); } void intel_engine_apply_workarounds(struct intel_engine_cs *engine) { - wa_list_apply(engine->gt, &engine->wa_list); + wa_list_apply(&engine->wa_list); } static const struct i915_range mcr_ranges_gen8[] = { @@ -3164,9 +3187,7 @@ retry: goto err_vma; } - err = i915_request_await_object(rq, vma->obj, true); - if (err == 0) - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); if (err == 0) err = wa_list_srm(rq, wal, vma); @@ -3194,7 +3215,7 @@ retry: if (mcr_range(rq->engine->i915, i915_mmio_reg_offset(wa->reg))) continue; - if (!wa_verify(wa, results[i], wal->name, from)) + if (!wa_verify(wal->gt, wa, results[i], wal->name, from)) err = -ENXIO; } diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds_types.h b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h index 7c8b01d00043..e14188120e66 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds_types.h +++ b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h @@ -10,6 +10,8 @@ #include "i915_reg_defs.h" +struct intel_gt; + struct i915_wa { union { i915_reg_t reg; @@ -24,6 +26,7 @@ struct i915_wa { }; struct i915_wa_list { + struct intel_gt *gt; const char *name; const char *engine_name; struct i915_wa *list; diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c index 0dcb3ed44a73..87c94314cf67 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c @@ -317,7 +317,7 @@ static int live_engine_busy_stats(void *arg) ENGINE_TRACE(engine, "measuring busy time\n"); preempt_disable(); de = intel_engine_get_busy_time(engine, &t[0]); - mdelay(10); + mdelay(100); de = ktime_sub(intel_engine_get_busy_time(engine, &t[1]), de); preempt_enable(); dt = ktime_sub(t[1], t[0]); diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index 2c7c053a8808..ab2e9a6a2452 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -2764,9 +2764,7 @@ static int create_gang(struct intel_engine_cs *engine, i915_request_get(rq); i915_vma_lock(vma); - err = i915_request_await_object(rq, vma->obj, false); - if (!err) - err = i915_vma_move_to_active(vma, rq, 0); + err = i915_vma_move_to_active(vma, rq, 0); if (!err) err = rq->engine->emit_bb_start(rq, vma->node.start, @@ -3180,15 +3178,11 @@ create_gpr_client(struct intel_engine_cs *engine, } i915_vma_lock(vma); - err = i915_request_await_object(rq, vma->obj, false); - if (!err) - err = i915_vma_move_to_active(vma, rq, 0); + err = i915_vma_move_to_active(vma, rq, 0); i915_vma_unlock(vma); i915_vma_lock(batch); if (!err) - err = i915_request_await_object(rq, batch->obj, false); - if (!err) err = i915_vma_move_to_active(batch, rq, 0); if (!err) err = rq->engine->emit_bb_start(rq, @@ -3521,9 +3515,7 @@ static int smoke_submit(struct preempt_smoke *smoke, if (vma) { i915_vma_lock(vma); - err = i915_request_await_object(rq, vma->obj, false); - if (!err) - err = i915_vma_move_to_active(vma, rq, 0); + err = i915_vma_move_to_active(vma, rq, 0); if (!err) err = rq->engine->emit_bb_start(rq, vma->node.start, diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 71263058a7b0..bc05ef48c194 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -99,22 +99,6 @@ static u64 hws_address(const struct i915_vma *hws, return hws->node.start + offset_in_page(sizeof(u32)*rq->fence.context); } -static int move_to_active(struct i915_vma *vma, - struct i915_request *rq, - unsigned int flags) -{ - int err; - - i915_vma_lock(vma); - err = i915_request_await_object(rq, vma->obj, - flags & EXEC_OBJECT_WRITE); - if (err == 0) - err = i915_vma_move_to_active(vma, rq, flags); - i915_vma_unlock(vma); - - return err; -} - static struct i915_request * hang_create_request(struct hang *h, struct intel_engine_cs *engine) { @@ -175,11 +159,11 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) goto unpin_hws; } - err = move_to_active(vma, rq, 0); + err = igt_vma_move_to_active_unlocked(vma, rq, 0); if (err) goto cancel_rq; - err = move_to_active(hws, rq, 0); + err = igt_vma_move_to_active_unlocked(hws, rq, 0); if (err) goto cancel_rq; @@ -1519,18 +1503,9 @@ static int __igt_reset_evict_vma(struct intel_gt *gt, } } - i915_vma_lock(arg.vma); - err = i915_request_await_object(rq, arg.vma->obj, - flags & EXEC_OBJECT_WRITE); - if (err == 0) { - err = i915_vma_move_to_active(arg.vma, rq, flags); - if (err) - pr_err("[%s] Move to active failed: %d!\n", engine->name, err); - } else { - pr_err("[%s] Request await failed: %d!\n", engine->name, err); - } - - i915_vma_unlock(arg.vma); + err = igt_vma_move_to_active_unlocked(arg.vma, rq, flags); + if (err) + pr_err("[%s] Move to active failed: %d!\n", engine->name, err); if (flags & EXEC_OBJECT_NEEDS_FENCE) i915_vma_unpin_fence(arg.vma); diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 82d3f8058995..7c56ffd2c659 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -452,9 +452,7 @@ retry: *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32); *cs++ = 0; - err = i915_request_await_object(rq, scratch->obj, true); - if (!err) - err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE); + err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE); i915_request_get(rq); i915_request_add(rq); @@ -602,9 +600,7 @@ __gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot) } i915_vma_lock(scratch); - err = i915_request_await_object(rq, scratch->obj, true); - if (!err) - err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE); + err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE); i915_vma_unlock(scratch); i915_request_get(rq); @@ -1053,21 +1049,6 @@ store_context(struct intel_context *ce, struct i915_vma *scratch) return batch; } -static int move_to_active(struct i915_request *rq, - struct i915_vma *vma, - unsigned int flags) -{ - int err; - - i915_vma_lock(vma); - err = i915_request_await_object(rq, vma->obj, flags); - if (!err) - err = i915_vma_move_to_active(vma, rq, flags); - i915_vma_unlock(vma); - - return err; -} - static struct i915_request * record_registers(struct intel_context *ce, struct i915_vma *before, @@ -1093,19 +1074,19 @@ record_registers(struct intel_context *ce, if (IS_ERR(rq)) goto err_after; - err = move_to_active(rq, before, EXEC_OBJECT_WRITE); + err = igt_vma_move_to_active_unlocked(before, rq, EXEC_OBJECT_WRITE); if (err) goto err_rq; - err = move_to_active(rq, b_before, 0); + err = igt_vma_move_to_active_unlocked(b_before, rq, 0); if (err) goto err_rq; - err = move_to_active(rq, after, EXEC_OBJECT_WRITE); + err = igt_vma_move_to_active_unlocked(after, rq, EXEC_OBJECT_WRITE); if (err) goto err_rq; - err = move_to_active(rq, b_after, 0); + err = igt_vma_move_to_active_unlocked(b_after, rq, 0); if (err) goto err_rq; @@ -1243,7 +1224,7 @@ static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema) goto err_batch; } - err = move_to_active(rq, batch, 0); + err = igt_vma_move_to_active_unlocked(batch, rq, 0); if (err) goto err_rq; diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c index c1d861333c44..f27cc28608d4 100644 --- a/drivers/gpu/drm/i915/gt/selftest_mocs.c +++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c @@ -7,6 +7,7 @@ #include "gt/intel_gpu_commands.h" #include "i915_selftest.h" +#include "gem/selftests/igt_gem_utils.h" #include "gem/selftests/mock_context.h" #include "selftests/igt_reset.h" #include "selftests/igt_spinner.h" @@ -228,9 +229,7 @@ static int check_mocs_engine(struct live_mocs *arg, return PTR_ERR(rq); i915_vma_lock(vma); - err = i915_request_await_object(rq, vma->obj, true); - if (!err) - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); i915_vma_unlock(vma); /* Read the mocs tables back using SRM */ diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.c b/drivers/gpu/drm/i915/gt/selftest_rc6.c index 8c70b7e12074..2ceeadecc639 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rc6.c +++ b/drivers/gpu/drm/i915/gt/selftest_rc6.c @@ -19,11 +19,11 @@ static u64 rc6_residency(struct intel_rc6 *rc6) /* XXX VLV_GT_MEDIA_RC6? */ - result = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6); + result = intel_rc6_residency_ns(rc6, INTEL_RC6_RES_RC6); if (HAS_RC6p(rc6_to_i915(rc6))) - result += intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6p); + result += intel_rc6_residency_ns(rc6, INTEL_RC6_RES_RC6p); if (HAS_RC6pp(rc6_to_i915(rc6))) - result += intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6pp); + result += intel_rc6_residency_ns(rc6, INTEL_RC6_RES_RC6pp); return result; } diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c index 99a372486fb7..39f1b7564170 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rps.c +++ b/drivers/gpu/drm/i915/gt/selftest_rps.c @@ -652,9 +652,7 @@ int live_rps_frequency_cs(void *arg) goto err_vma; } - err = i915_request_await_object(rq, vma->obj, false); - if (!err) - err = i915_vma_move_to_active(vma, rq, 0); + err = i915_vma_move_to_active(vma, rq, 0); if (!err) err = rq->engine->emit_bb_start(rq, vma->node.start, @@ -793,9 +791,7 @@ int live_rps_frequency_srm(void *arg) goto err_vma; } - err = i915_request_await_object(rq, vma->obj, false); - if (!err) - err = i915_vma_move_to_active(vma, rq, 0); + err = i915_vma_move_to_active(vma, rq, 0); if (!err) err = rq->engine->emit_bb_start(rq, vma->node.start, diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c b/drivers/gpu/drm/i915/gt/selftest_slpc.c index 82ec95a299f6..bd44ce73a504 100644 --- a/drivers/gpu/drm/i915/gt/selftest_slpc.c +++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c @@ -13,6 +13,14 @@ enum test_type { VARY_MAX, MAX_GRANTED, SLPC_POWER, + TILE_INTERACTION, +}; + +struct slpc_thread { + struct kthread_worker *worker; + struct kthread_work work; + struct intel_gt *gt; + int result; }; static int slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 freq) @@ -212,7 +220,8 @@ static int max_granted_freq(struct intel_guc_slpc *slpc, struct intel_rps *rps, *max_act_freq = intel_rps_read_actual_frequency(rps); if (*max_act_freq != slpc->rp0_freq) { /* Check if there was some throttling by pcode */ - perf_limit_reasons = intel_uncore_read(gt->uncore, GT0_PERF_LIMIT_REASONS); + perf_limit_reasons = intel_uncore_read(gt->uncore, + intel_gt_perf_limit_reasons_reg(gt)); /* If not, this is an error */ if (!(perf_limit_reasons & GT0_PERF_LIMIT_REASONS_MASK)) { @@ -310,9 +319,10 @@ static int run_test(struct intel_gt *gt, int test_type) break; case MAX_GRANTED: + case TILE_INTERACTION: /* Media engines have a different RP0 */ - if (engine->class == VIDEO_DECODE_CLASS || - engine->class == VIDEO_ENHANCEMENT_CLASS) { + if (gt->type != GT_MEDIA && (engine->class == VIDEO_DECODE_CLASS || + engine->class == VIDEO_ENHANCEMENT_CLASS)) { igt_spinner_end(&spin); st_engine_heartbeat_enable(engine); err = 0; @@ -335,7 +345,8 @@ static int run_test(struct intel_gt *gt, int test_type) if (max_act_freq <= slpc->min_freq) { pr_err("Actual freq did not rise above min\n"); pr_err("Perf Limit Reasons: 0x%x\n", - intel_uncore_read(gt->uncore, GT0_PERF_LIMIT_REASONS)); + intel_uncore_read(gt->uncore, + intel_gt_perf_limit_reasons_reg(gt))); err = -EINVAL; } } @@ -426,6 +437,56 @@ static int live_slpc_power(void *arg) return ret; } +static void slpc_spinner_thread(struct kthread_work *work) +{ + struct slpc_thread *thread = container_of(work, typeof(*thread), work); + + thread->result = run_test(thread->gt, TILE_INTERACTION); +} + +static int live_slpc_tile_interaction(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_gt *gt; + struct slpc_thread *threads; + int i = 0, ret = 0; + + threads = kcalloc(I915_MAX_GT, sizeof(*threads), GFP_KERNEL); + if (!threads) + return -ENOMEM; + + for_each_gt(gt, i915, i) { + threads[i].worker = kthread_create_worker(0, "igt/slpc_parallel:%d", gt->info.id); + + if (IS_ERR(threads[i].worker)) { + ret = PTR_ERR(threads[i].worker); + break; + } + + threads[i].gt = gt; + kthread_init_work(&threads[i].work, slpc_spinner_thread); + kthread_queue_work(threads[i].worker, &threads[i].work); + } + + for_each_gt(gt, i915, i) { + int status; + + if (IS_ERR_OR_NULL(threads[i].worker)) + continue; + + kthread_flush_work(&threads[i].work); + status = READ_ONCE(threads[i].result); + if (status && !ret) { + pr_err("%s GT %d failed ", __func__, gt->info.id); + ret = status; + } + kthread_destroy_worker(threads[i].worker); + } + + kfree(threads); + return ret; +} + int intel_slpc_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { @@ -433,6 +494,7 @@ int intel_slpc_live_selftests(struct drm_i915_private *i915) SUBTEST(live_slpc_vary_min), SUBTEST(live_slpc_max_granted), SUBTEST(live_slpc_power), + SUBTEST(live_slpc_tile_interaction), }; struct intel_gt *gt; diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index 21b1edc052f8..96e3861706d6 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -66,14 +66,14 @@ reference_lists_init(struct intel_gt *gt, struct wa_lists *lists) memset(lists, 0, sizeof(*lists)); - wa_init_start(&lists->gt_wa_list, "GT_REF", "global"); + wa_init_start(&lists->gt_wa_list, gt, "GT_REF", "global"); gt_init_workarounds(gt, &lists->gt_wa_list); wa_init_finish(&lists->gt_wa_list); for_each_engine(engine, gt, id) { struct i915_wa_list *wal = &lists->engine[id].wa_list; - wa_init_start(wal, "REF", engine->name); + wa_init_start(wal, gt, "REF", engine->name); engine_init_workarounds(engine, wal); wa_init_finish(wal); @@ -139,9 +139,7 @@ read_nonprivs(struct intel_context *ce) } i915_vma_lock(vma); - err = i915_request_await_object(rq, vma->obj, true); - if (err == 0) - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); i915_vma_unlock(vma); if (err) goto err_req; @@ -632,16 +630,12 @@ retry: goto err_request; } - err = i915_request_await_object(rq, batch->obj, false); - if (err == 0) - err = i915_vma_move_to_active(batch, rq, 0); + err = i915_vma_move_to_active(batch, rq, 0); if (err) goto err_request; - err = i915_request_await_object(rq, scratch->obj, true); - if (err == 0) - err = i915_vma_move_to_active(scratch, rq, - EXEC_OBJECT_WRITE); + err = i915_vma_move_to_active(scratch, rq, + EXEC_OBJECT_WRITE); if (err) goto err_request; @@ -860,9 +854,7 @@ static int read_whitelisted_registers(struct intel_context *ce, return PTR_ERR(rq); i915_vma_lock(results); - err = i915_request_await_object(rq, results->obj, true); - if (err == 0) - err = i915_vma_move_to_active(results, rq, EXEC_OBJECT_WRITE); + err = i915_vma_move_to_active(results, rq, EXEC_OBJECT_WRITE); i915_vma_unlock(results); if (err) goto err_req; @@ -944,9 +936,7 @@ static int scrub_whitelisted_registers(struct intel_context *ce) } i915_vma_lock(batch); - err = i915_request_await_object(rq, batch->obj, false); - if (err == 0) - err = i915_vma_move_to_active(batch, rq, 0); + err = i915_vma_move_to_active(batch, rq, 0); i915_vma_unlock(batch); if (err) goto err_request; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 27b09ba1d295..52aede324788 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -98,6 +98,8 @@ static void gen9_enable_guc_interrupts(struct intel_guc *guc) gt->pm_guc_events); gen6_gt_pm_enable_irq(gt, gt->pm_guc_events); spin_unlock_irq(gt->irq_lock); + + guc->interrupts.enabled = true; } static void gen9_disable_guc_interrupts(struct intel_guc *guc) @@ -105,6 +107,7 @@ static void gen9_disable_guc_interrupts(struct intel_guc *guc) struct intel_gt *gt = guc_to_gt(guc); assert_rpm_wakelock_held(>->i915->runtime_pm); + guc->interrupts.enabled = false; spin_lock_irq(gt->irq_lock); @@ -116,39 +119,39 @@ static void gen9_disable_guc_interrupts(struct intel_guc *guc) gen9_reset_guc_interrupts(guc); } +static bool __gen11_reset_guc_interrupts(struct intel_gt *gt) +{ + u32 irq = gt->type == GT_MEDIA ? MTL_MGUC : GEN11_GUC; + + lockdep_assert_held(gt->irq_lock); + return gen11_gt_reset_one_iir(gt, 0, irq); +} + static void gen11_reset_guc_interrupts(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); spin_lock_irq(gt->irq_lock); - gen11_gt_reset_one_iir(gt, 0, GEN11_GUC); + __gen11_reset_guc_interrupts(gt); spin_unlock_irq(gt->irq_lock); } static void gen11_enable_guc_interrupts(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); - u32 events = REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST); spin_lock_irq(gt->irq_lock); - WARN_ON_ONCE(gen11_gt_reset_one_iir(gt, 0, GEN11_GUC)); - intel_uncore_write(gt->uncore, - GEN11_GUC_SG_INTR_ENABLE, events); - intel_uncore_write(gt->uncore, - GEN11_GUC_SG_INTR_MASK, ~events); + __gen11_reset_guc_interrupts(gt); spin_unlock_irq(gt->irq_lock); + + guc->interrupts.enabled = true; } static void gen11_disable_guc_interrupts(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); - spin_lock_irq(gt->irq_lock); - - intel_uncore_write(gt->uncore, GEN11_GUC_SG_INTR_MASK, ~0); - intel_uncore_write(gt->uncore, GEN11_GUC_SG_INTR_ENABLE, 0); - - spin_unlock_irq(gt->irq_lock); + guc->interrupts.enabled = false; intel_synchronize_irq(gt->i915); gen11_reset_guc_interrupts(guc); @@ -156,7 +159,8 @@ static void gen11_disable_guc_interrupts(struct intel_guc *guc) void intel_guc_init_early(struct intel_guc *guc) { - struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + struct intel_gt *gt = guc_to_gt(guc); + struct drm_i915_private *i915 = gt->i915; intel_uc_fw_init_early(&guc->fw, INTEL_UC_FW_TYPE_GUC); intel_guc_ct_init_early(&guc->ct); @@ -168,12 +172,17 @@ void intel_guc_init_early(struct intel_guc *guc) mutex_init(&guc->send_mutex); spin_lock_init(&guc->irq_lock); if (GRAPHICS_VER(i915) >= 11) { - guc->notify_reg = GEN11_GUC_HOST_INTERRUPT; guc->interrupts.reset = gen11_reset_guc_interrupts; guc->interrupts.enable = gen11_enable_guc_interrupts; guc->interrupts.disable = gen11_disable_guc_interrupts; - guc->send_regs.base = - i915_mmio_reg_offset(GEN11_SOFT_SCRATCH(0)); + if (gt->type == GT_MEDIA) { + guc->notify_reg = MEDIA_GUC_HOST_INTERRUPT; + guc->send_regs.base = i915_mmio_reg_offset(MEDIA_SOFT_SCRATCH(0)); + } else { + guc->notify_reg = GEN11_GUC_HOST_INTERRUPT; + guc->send_regs.base = i915_mmio_reg_offset(GEN11_SOFT_SCRATCH(0)); + } + guc->send_regs.count = GEN11_SOFT_SCRATCH_COUNT; } else { @@ -871,14 +880,14 @@ void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p) u32 status = intel_uncore_read(uncore, GUC_STATUS); u32 i; - drm_printf(p, "\nGuC status 0x%08x:\n", status); + drm_printf(p, "GuC status 0x%08x:\n", status); drm_printf(p, "\tBootrom status = 0x%x\n", (status & GS_BOOTROM_MASK) >> GS_BOOTROM_SHIFT); drm_printf(p, "\tuKernel status = 0x%x\n", (status & GS_UKERNEL_MASK) >> GS_UKERNEL_SHIFT); drm_printf(p, "\tMIA Core status = 0x%x\n", (status & GS_MIA_MASK) >> GS_MIA_SHIFT); - drm_puts(p, "\nScratch registers:\n"); + drm_puts(p, "Scratch registers:\n"); for (i = 0; i < 16; i++) { drm_printf(p, "\t%2d: \t0x%x\n", i, intel_uncore_read(uncore, SOFT_SCRATCH(i))); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 357873ef692b..1bb3f9829286 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -78,6 +78,7 @@ struct intel_guc { /** @interrupts: pointers to GuC interrupt-managing functions. */ struct { + bool enabled; void (*reset)(struct intel_guc *guc); void (*enable)(struct intel_guc *guc); void (*disable)(struct intel_guc *guc); @@ -330,9 +331,11 @@ retry: return err; } +/* Only call this from the interrupt handler code */ static inline void intel_guc_to_host_event_handler(struct intel_guc *guc) { - intel_guc_ct_event_handler(&guc->ct); + if (guc->interrupts.enabled) + intel_guc_ct_event_handler(&guc->ct); } /* GuC addresses above GUC_GGTT_TOP also don't map through the GTT */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index a419d60166c8..a7f737c4792e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -488,6 +488,11 @@ static void fill_engine_enable_masks(struct intel_gt *gt, info_map_write(info_map, engine_enabled_masks[GUC_BLITTER_CLASS], BCS_MASK(gt)); info_map_write(info_map, engine_enabled_masks[GUC_VIDEO_CLASS], VDBOX_MASK(gt)); info_map_write(info_map, engine_enabled_masks[GUC_VIDEOENHANCE_CLASS], VEBOX_MASK(gt)); + + /* The GSC engine is an instance (6) of OTHER_CLASS */ + if (gt->engine[GSC0]) + info_map_write(info_map, engine_enabled_masks[GUC_GSC_OTHER_CLASS], + BIT(gt->engine[GSC0]->instance)); } #define LR_HW_CONTEXT_SIZE (80 * sizeof(u32)) @@ -529,9 +534,6 @@ static int guc_prep_golden_context(struct intel_guc *guc) } for (engine_class = 0; engine_class <= MAX_ENGINE_CLASS; ++engine_class) { - if (engine_class == OTHER_CLASS) - continue; - guc_class = engine_class_to_guc_class(engine_class); if (!info_map_read(&info_map, engine_enabled_masks[guc_class])) @@ -609,9 +611,6 @@ static void guc_init_golden_context(struct intel_guc *guc) addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset; for (engine_class = 0; engine_class <= MAX_ENGINE_CLASS; ++engine_class) { - if (engine_class == OTHER_CLASS) - continue; - guc_class = engine_class_to_guc_class(engine_class); if (!ads_blob_read(guc, system_info.engine_enabled_masks[guc_class])) continue; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c index 4e6dca707d94..1c1b85073b4b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c @@ -132,6 +132,11 @@ static const struct __guc_mmio_reg_descr xe_lpd_blt_inst_regs[] = { COMMON_BASE_ENGINE_INSTANCE, }; +/* XE_LPD - GSC Per-Engine-Instance */ +static const struct __guc_mmio_reg_descr xe_lpd_gsc_inst_regs[] = { + COMMON_BASE_ENGINE_INSTANCE, +}; + /* GEN9 - Global */ static const struct __guc_mmio_reg_descr default_global_regs[] = { COMMON_BASE_GLOBAL, @@ -165,7 +170,7 @@ static const struct __guc_mmio_reg_descr empty_regs_list[] = { } /* List of lists */ -static struct __guc_mmio_reg_descr_group default_lists[] = { +static const struct __guc_mmio_reg_descr_group default_lists[] = { MAKE_REGLIST(default_global_regs, PF, GLOBAL, 0), MAKE_REGLIST(default_rc_class_regs, PF, ENGINE_CLASS, GUC_RENDER_CLASS), MAKE_REGLIST(xe_lpd_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_RENDER_CLASS), @@ -177,6 +182,8 @@ static struct __guc_mmio_reg_descr_group default_lists[] = { MAKE_REGLIST(xe_lpd_vec_inst_regs, PF, ENGINE_INSTANCE, GUC_VIDEOENHANCE_CLASS), MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_BLITTER_CLASS), MAKE_REGLIST(xe_lpd_blt_inst_regs, PF, ENGINE_INSTANCE, GUC_BLITTER_CLASS), + MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_GSC_OTHER_CLASS), + MAKE_REGLIST(xe_lpd_gsc_inst_regs, PF, ENGINE_INSTANCE, GUC_GSC_OTHER_CLASS), {} }; @@ -192,6 +199,8 @@ static const struct __guc_mmio_reg_descr_group xe_lpd_lists[] = { MAKE_REGLIST(xe_lpd_vec_inst_regs, PF, ENGINE_INSTANCE, GUC_VIDEOENHANCE_CLASS), MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_BLITTER_CLASS), MAKE_REGLIST(xe_lpd_blt_inst_regs, PF, ENGINE_INSTANCE, GUC_BLITTER_CLASS), + MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_GSC_OTHER_CLASS), + MAKE_REGLIST(xe_lpd_gsc_inst_regs, PF, ENGINE_INSTANCE, GUC_GSC_OTHER_CLASS), {} }; @@ -454,6 +463,8 @@ __stringify_engclass(u32 class) return "Blitter"; case GUC_COMPUTE_CLASS: return "Compute"; + case GUC_GSC_OTHER_CLASS: + return "GSC-Other"; default: break; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index 968ebd79dce7..4ae5fc2f6002 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -47,7 +47,8 @@ #define GUC_VIDEOENHANCE_CLASS 2 #define GUC_BLITTER_CLASS 3 #define GUC_COMPUTE_CLASS 4 -#define GUC_LAST_ENGINE_CLASS GUC_COMPUTE_CLASS +#define GUC_GSC_OTHER_CLASS 5 +#define GUC_LAST_ENGINE_CLASS GUC_GSC_OTHER_CLASS #define GUC_MAX_ENGINE_CLASSES 16 #define GUC_MAX_INSTANCES_PER_CLASS 32 @@ -169,6 +170,7 @@ static u8 engine_class_guc_class_map[] = { [COPY_ENGINE_CLASS] = GUC_BLITTER_CLASS, [VIDEO_DECODE_CLASS] = GUC_VIDEO_CLASS, [VIDEO_ENHANCEMENT_CLASS] = GUC_VIDEOENHANCE_CLASS, + [OTHER_CLASS] = GUC_GSC_OTHER_CLASS, [COMPUTE_CLASS] = GUC_COMPUTE_CLASS, }; @@ -178,12 +180,13 @@ static u8 guc_class_engine_class_map[] = { [GUC_VIDEO_CLASS] = VIDEO_DECODE_CLASS, [GUC_VIDEOENHANCE_CLASS] = VIDEO_ENHANCEMENT_CLASS, [GUC_COMPUTE_CLASS] = COMPUTE_CLASS, + [GUC_GSC_OTHER_CLASS] = OTHER_CLASS, }; static inline u8 engine_class_to_guc_class(u8 class) { BUILD_BUG_ON(ARRAY_SIZE(engine_class_guc_class_map) != MAX_ENGINE_CLASS + 1); - GEM_BUG_ON(class > MAX_ENGINE_CLASS || class == OTHER_CLASS); + GEM_BUG_ON(class > MAX_ENGINE_CLASS); return engine_class_guc_class_map[class]; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c index 8f8dd05835c5..b5855091cf6a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c @@ -11,9 +11,20 @@ static bool __guc_rc_supported(struct intel_guc *guc) { + struct intel_gt *gt = guc_to_gt(guc); + + /* + * Wa_14017073508: mtl + * Do not enable gucrc to avoid additional interrupts which + * may disrupt pcode wa. + */ + if (IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0) && + gt->type == GT_MEDIA) + return false; + /* GuC RC is unavailable for pre-Gen12 */ return guc->submission_supported && - GRAPHICS_VER(guc_to_gt(guc)->i915) >= 12; + GRAPHICS_VER(gt->i915) >= 12; } static bool __guc_rc_selected(struct intel_guc *guc) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h index a7092f711e9c..9915de32e894 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h @@ -36,6 +36,7 @@ #define SOFT_SCRATCH_COUNT 16 #define GEN11_SOFT_SCRATCH(n) _MMIO(0x190240 + (n) * 4) +#define MEDIA_SOFT_SCRATCH(n) _MMIO(0x190310 + (n) * 4) #define GEN11_SOFT_SCRATCH_COUNT 4 #define UOS_RSA_SCRATCH(i) _MMIO(0xc200 + (i) * 4) @@ -101,6 +102,7 @@ #define GUC_SEND_INTERRUPT _MMIO(0xc4c8) #define GUC_SEND_TRIGGER (1<<0) #define GEN11_GUC_HOST_INTERRUPT _MMIO(0x1901f0) +#define MEDIA_GUC_HOST_INTERRUPT _MMIO(0x190304) #define GEN12_GUC_SEM_INTR_ENABLES _MMIO(0xc71c) #define GUC_SEM_INTR_ROUTE_TO_GUC BIT(31) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 1b8b8ad27f26..0a42f1807f52 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1402,7 +1402,9 @@ static void guc_timestamp_ping(struct work_struct *wrk) /* * Synchronize with gt reset to make sure the worker does not - * corrupt the engine/guc stats. + * corrupt the engine/guc stats. NB: can't actually block waiting + * for a reset to complete as the reset requires flushing out + * this worker thread if started. So waiting would deadlock. */ ret = intel_gt_reset_trylock(gt, &srcu); if (ret) @@ -4112,6 +4114,9 @@ static inline void guc_kernel_context_pin(struct intel_guc *guc, if (context_guc_id_invalid(ce)) pin_guc_id(guc, ce); + if (!test_bit(CONTEXT_GUC_INIT, &ce->flags)) + guc_context_init(ce); + try_context_registration(ce, true); } @@ -4902,7 +4907,7 @@ void intel_guc_submission_print_info(struct intel_guc *guc, drm_printf(p, "GuC Number Outstanding Submission G2H: %u\n", atomic_read(&guc->outstanding_submission_g2h)); - drm_printf(p, "GuC tasklet count: %u\n\n", + drm_printf(p, "GuC tasklet count: %u\n", atomic_read(&sched_engine->tasklet.count)); spin_lock_irqsave(&sched_engine->lock, flags); @@ -4950,7 +4955,7 @@ static inline void guc_log_context(struct drm_printer *p, atomic_read(&ce->pin_count)); drm_printf(p, "\t\tGuC ID Ref Count: %u\n", atomic_read(&ce->guc_id.ref)); - drm_printf(p, "\t\tSchedule State: 0x%x\n\n", + drm_printf(p, "\t\tSchedule State: 0x%x\n", ce->guc_state.sched_state); } @@ -4979,7 +4984,7 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc, READ_ONCE(*ce->parallel.guc.wq_head)); drm_printf(p, "\t\tWQI Tail: %u\n", READ_ONCE(*ce->parallel.guc.wq_tail)); - drm_printf(p, "\t\tWQI Status: %u\n\n", + drm_printf(p, "\t\tWQI Status: %u\n", READ_ONCE(*ce->parallel.guc.wq_status)); } @@ -4987,7 +4992,7 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc, emit_bb_start_parent_no_preempt_mid_batch) { u8 i; - drm_printf(p, "\t\tChildren Go: %u\n\n", + drm_printf(p, "\t\tChildren Go: %u\n", get_children_go_value(ce)); for (i = 0; i < ce->parallel.number_children; ++i) drm_printf(p, "\t\tChildren Join: %u\n", diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index fbc8bae14f76..410905da8e97 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -211,12 +211,74 @@ void intel_huc_unregister_gsc_notifier(struct intel_huc *huc, struct bus_type *b huc->delayed_load.nb.notifier_call = NULL; } +static void delayed_huc_load_init(struct intel_huc *huc) +{ + /* + * Initialize fence to be complete as this is expected to be complete + * unless there is a delayed HuC load in progress. + */ + i915_sw_fence_init(&huc->delayed_load.fence, + sw_fence_dummy_notify); + i915_sw_fence_commit(&huc->delayed_load.fence); + + hrtimer_init(&huc->delayed_load.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + huc->delayed_load.timer.function = huc_delayed_load_timer_callback; +} + +static void delayed_huc_load_fini(struct intel_huc *huc) +{ + /* + * the fence is initialized in init_early, so we need to clean it up + * even if HuC loading is off. + */ + delayed_huc_load_complete(huc); + i915_sw_fence_fini(&huc->delayed_load.fence); +} + +static bool vcs_supported(struct intel_gt *gt) +{ + intel_engine_mask_t mask = gt->info.engine_mask; + + /* + * We reach here from i915_driver_early_probe for the primary GT before + * its engine mask is set, so we use the device info engine mask for it; + * this means we're not taking VCS fusing into account, but if the + * primary GT supports VCS engines we expect at least one of them to + * remain unfused so we're fine. + * For other GTs we expect the GT-specific mask to be set before we + * call this function. + */ + GEM_BUG_ON(!gt_is_root(gt) && !gt->info.engine_mask); + + if (gt_is_root(gt)) + mask = RUNTIME_INFO(gt->i915)->platform_engine_mask; + else + mask = gt->info.engine_mask; + + return __ENGINE_INSTANCES_MASK(mask, VCS0, I915_MAX_VCS); +} + void intel_huc_init_early(struct intel_huc *huc) { struct drm_i915_private *i915 = huc_to_gt(huc)->i915; + struct intel_gt *gt = huc_to_gt(huc); intel_uc_fw_init_early(&huc->fw, INTEL_UC_FW_TYPE_HUC); + /* + * we always init the fence as already completed, even if HuC is not + * supported. This way we don't have to distinguish between HuC not + * supported/disabled or already loaded, and can focus on if the load + * is currently in progress (fence not complete) or not, which is what + * we care about for stalling userspace submissions. + */ + delayed_huc_load_init(huc); + + if (!vcs_supported(gt)) { + intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_NOT_SUPPORTED); + return; + } + if (GRAPHICS_VER(i915) >= 11) { huc->status.reg = GEN11_HUC_KERNEL_LOAD_INFO; huc->status.mask = HUC_LOAD_SUCCESSFUL; @@ -226,17 +288,6 @@ void intel_huc_init_early(struct intel_huc *huc) huc->status.mask = HUC_FW_VERIFIED; huc->status.value = HUC_FW_VERIFIED; } - - /* - * Initialize fence to be complete as this is expected to be complete - * unless there is a delayed HuC reload in progress. - */ - i915_sw_fence_init(&huc->delayed_load.fence, - sw_fence_dummy_notify); - i915_sw_fence_commit(&huc->delayed_load.fence); - - hrtimer_init(&huc->delayed_load.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - huc->delayed_load.timer.function = huc_delayed_load_timer_callback; } #define HUC_LOAD_MODE_STRING(x) (x ? "GSC" : "legacy") @@ -300,13 +351,14 @@ out: void intel_huc_fini(struct intel_huc *huc) { - if (!intel_uc_fw_is_loadable(&huc->fw)) - return; - - delayed_huc_load_complete(huc); + /* + * the fence is initialized in init_early, so we need to clean it up + * even if HuC loading is off. + */ + delayed_huc_load_fini(huc); - i915_sw_fence_fini(&huc->delayed_load.fence); - intel_uc_fw_fini(&huc->fw); + if (intel_uc_fw_is_loadable(&huc->fw)) + intel_uc_fw_fini(&huc->fw); } void intel_huc_suspend(struct intel_huc *huc) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index dbd048b77e19..2a508b137e90 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -357,8 +357,8 @@ static int uc_init_wopcm(struct intel_uc *uc) { struct intel_gt *gt = uc_to_gt(uc); struct intel_uncore *uncore = gt->uncore; - u32 base = intel_wopcm_guc_base(>->i915->wopcm); - u32 size = intel_wopcm_guc_size(>->i915->wopcm); + u32 base = intel_wopcm_guc_base(>->wopcm); + u32 size = intel_wopcm_guc_size(>->wopcm); u32 huc_agent = intel_uc_uses_huc(uc) ? HUC_LOADING_AGENT_GUC : 0; u32 mask; int err; @@ -636,8 +636,10 @@ void intel_uc_runtime_suspend(struct intel_uc *uc) { struct intel_guc *guc = &uc->guc; - if (!intel_guc_is_ready(guc)) + if (!intel_guc_is_ready(guc)) { + guc->interrupts.enabled = false; return; + } /* * Wait for any outstanding CTB before tearing down communication /w the @@ -657,8 +659,10 @@ void intel_uc_suspend(struct intel_uc *uc) intel_wakeref_t wakeref; int err; - if (!intel_guc_is_ready(guc)) + if (!intel_guc_is_ready(guc)) { + guc->interrupts.enabled = false; return; + } with_intel_runtime_pm(&uc_to_gt(uc)->i915->runtime_pm, wakeref) { err = intel_guc_suspend(guc); @@ -718,6 +722,7 @@ int intel_uc_runtime_resume(struct intel_uc *uc) static const struct intel_uc_ops uc_ops_off = { .init_hw = __uc_check_hw, + .fini = __uc_fini, /* to clean-up the init_early initialization */ }; static const struct intel_uc_ops uc_ops_on = { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index de2843dc1307..0c80ba51a4bd 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -478,10 +478,11 @@ static int check_gsc_manifest(const struct firmware *fw, return 0; } -static int check_ccs_header(struct drm_i915_private *i915, +static int check_ccs_header(struct intel_gt *gt, const struct firmware *fw, struct intel_uc_fw *uc_fw) { + struct drm_i915_private *i915 = gt->i915; struct uc_css_header *css; size_t size; @@ -523,10 +524,10 @@ static int check_ccs_header(struct drm_i915_private *i915, /* Sanity check whether this fw is not larger than whole WOPCM memory */ size = __intel_uc_fw_get_upload_size(uc_fw); - if (unlikely(size >= i915->wopcm.size)) { + if (unlikely(size >= gt->wopcm.size)) { drm_warn(&i915->drm, "%s firmware %s: invalid size: %zu > %zu\n", intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path, - size, (size_t)i915->wopcm.size); + size, (size_t)gt->wopcm.size); return -E2BIG; } @@ -554,7 +555,8 @@ static int check_ccs_header(struct drm_i915_private *i915, */ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) { - struct drm_i915_private *i915 = __uc_fw_to_gt(uc_fw)->i915; + struct intel_gt *gt = __uc_fw_to_gt(uc_fw); + struct drm_i915_private *i915 = gt->i915; struct intel_uc_fw_file file_ideal; struct device *dev = i915->drm.dev; struct drm_i915_gem_object *obj; @@ -562,7 +564,7 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) bool old_ver = false; int err; - GEM_BUG_ON(!i915->wopcm.size); + GEM_BUG_ON(!gt->wopcm.size); GEM_BUG_ON(!intel_uc_fw_is_enabled(uc_fw)); err = i915_inject_probe_error(i915, -ENXIO); @@ -575,6 +577,17 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) err = firmware_request_nowarn(&fw, uc_fw->file_selected.path, dev); memcpy(&file_ideal, &uc_fw->file_wanted, sizeof(file_ideal)); + if (!err && fw->size > INTEL_UC_RSVD_GGTT_PER_FW) { + drm_err(&i915->drm, + "%s firmware %s: size (%zuKB) exceeds max supported size (%uKB)\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path, + fw->size / SZ_1K, INTEL_UC_RSVD_GGTT_PER_FW / SZ_1K); + + /* try to find another blob to load */ + release_firmware(fw); + err = -ENOENT; + } + /* Any error is terminal if overriding. Don't bother searching for older versions */ if (err && intel_uc_fw_is_overridden(uc_fw)) goto fail; @@ -604,7 +617,7 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) if (uc_fw->loaded_via_gsc) err = check_gsc_manifest(fw, uc_fw); else - err = check_ccs_header(i915, fw, uc_fw); + err = check_ccs_header(gt, fw, uc_fw); if (err) goto fail; @@ -677,14 +690,30 @@ fail: static u32 uc_fw_ggtt_offset(struct intel_uc_fw *uc_fw) { - struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt; + struct intel_gt *gt = __uc_fw_to_gt(uc_fw); + struct i915_ggtt *ggtt = gt->ggtt; struct drm_mm_node *node = &ggtt->uc_fw; + u32 offset = uc_fw->type * INTEL_UC_RSVD_GGTT_PER_FW; + + /* + * The media GT shares the GGTT with the root GT, which means that + * we need to use different offsets for the binaries on the media GT. + * To keep the math simple, we use 8MB for the root tile and 8MB for + * the media one. This will need to be updated if we ever have more + * than 1 media GT. + */ + BUILD_BUG_ON(INTEL_UC_FW_NUM_TYPES * INTEL_UC_RSVD_GGTT_PER_FW > SZ_8M); + GEM_BUG_ON(gt->type == GT_MEDIA && gt->info.id > 1); + if (gt->type == GT_MEDIA) + offset += SZ_8M; GEM_BUG_ON(!drm_mm_node_allocated(node)); GEM_BUG_ON(upper_32_bits(node->start)); GEM_BUG_ON(upper_32_bits(node->start + node->size - 1)); + GEM_BUG_ON(offset + uc_fw->obj->base.size > node->size); + GEM_BUG_ON(uc_fw->obj->base.size > INTEL_UC_RSVD_GGTT_PER_FW); - return lower_32_bits(node->start); + return lower_32_bits(node->start + offset); } static void uc_fw_bind_ggtt(struct intel_uc_fw *uc_fw) @@ -699,7 +728,6 @@ static void uc_fw_bind_ggtt(struct intel_uc_fw *uc_fw) dummy->bi.pages = obj->mm.pages; GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); - GEM_BUG_ON(dummy->node_size > ggtt->uc_fw.size); /* uc_fw->obj cache domains were not controlled across suspend */ if (i915_gem_object_has_struct_page(obj)) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h index cb586f7df270..bc898ba5355d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h @@ -6,6 +6,7 @@ #ifndef _INTEL_UC_FW_H_ #define _INTEL_UC_FW_H_ +#include <linux/sizes.h> #include <linux/types.h> #include "intel_uc_fw_abi.h" #include "intel_device_info.h" @@ -114,6 +115,19 @@ struct intel_uc_fw { (uc)->fw.file_selected.minor_ver, \ (uc)->fw.file_selected.patch_ver)) +/* + * When we load the uC binaries, we pin them in a reserved section at the top of + * the GGTT, which is ~18 MBs. On multi-GT systems where the GTs share the GGTT, + * we also need to make sure that each binary is pinned to a unique location + * during load, because the different GT can go through the FW load at the same + * time (see uc_fw_ggtt_offset() for details). + * Given that the available space is much greater than what is required by the + * binaries, to keep things simple instead of dynamically partitioning the + * reserved section to make space for all the blobs we can just reserve a static + * chunk for each binary. + */ +#define INTEL_UC_RSVD_GGTT_PER_FW SZ_2M + #ifdef CONFIG_DRM_I915_DEBUG_GUC void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, enum intel_uc_fw_status status); diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c index 01f8cd3c3134..d91b58f70403 100644 --- a/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c @@ -35,11 +35,14 @@ static int intel_hang_guc(void *arg) struct i915_request *rq; intel_wakeref_t wakeref; struct i915_gpu_error *global = >->i915->gpu_error; - struct intel_engine_cs *engine; + struct intel_engine_cs *engine = intel_selftest_find_any_engine(gt); unsigned int reset_count; u32 guc_status; u32 old_beat; + if (!engine) + return 0; + ctx = kernel_context(gt->i915, NULL); if (IS_ERR(ctx)) { drm_err(>->i915->drm, "Failed get kernel context: %ld\n", PTR_ERR(ctx)); @@ -48,14 +51,13 @@ static int intel_hang_guc(void *arg) wakeref = intel_runtime_pm_get(gt->uncore->rpm); - ce = intel_context_create(gt->engine[BCS0]); + ce = intel_context_create(engine); if (IS_ERR(ce)) { ret = PTR_ERR(ce); drm_err(>->i915->drm, "Failed to create spinner request: %d\n", ret); goto err; } - engine = ce->engine; reset_count = i915_reset_count(global); old_beat = engine->props.heartbeat_interval_ms; diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c index 01e54b45c5c1..355f1c0e8664 100644 --- a/drivers/gpu/drm/i915/gvt/dmabuf.c +++ b/drivers/gpu/drm/i915/gvt/dmabuf.c @@ -88,7 +88,7 @@ static int vgpu_gem_get_pages( sg_dma_address(sg) = dma_addr; } - __i915_gem_object_set_pages(obj, st, PAGE_SIZE); + __i915_gem_object_set_pages(obj, st); out: if (ret) { dma_addr_t dma_addr; diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 897b6fdbbaed..f5451adcd489 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -664,18 +664,14 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev) return -ESRCH; } - kvm_get_kvm(vgpu->vfio_device.kvm); - if (__kvmgt_vgpu_exist(vgpu)) return -EEXIST; vgpu->attached = true; - kvmgt_protect_table_init(vgpu); - gvt_cache_init(vgpu); - vgpu->track_node.track_write = kvmgt_page_track_write; vgpu->track_node.track_flush_slot = kvmgt_page_track_flush_slot; + kvm_get_kvm(vgpu->vfio_device.kvm); kvm_page_track_register_notifier(vgpu->vfio_device.kvm, &vgpu->track_node); @@ -711,15 +707,19 @@ static void intel_vgpu_close_device(struct vfio_device *vfio_dev) kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm, &vgpu->track_node); + kvm_put_kvm(vgpu->vfio_device.kvm); + kvmgt_protect_table_destroy(vgpu); gvt_cache_destroy(vgpu); + WARN_ON(vgpu->nr_cache_entries); + + vgpu->gfn_cache = RB_ROOT; + vgpu->dma_addr_cache = RB_ROOT; + intel_vgpu_release_msi_eventfd_ctx(vgpu); vgpu->attached = false; - - if (vgpu->vfio_device.kvm) - kvm_put_kvm(vgpu->vfio_device.kvm); } static u64 intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar) @@ -1447,9 +1447,17 @@ static int intel_vgpu_init_dev(struct vfio_device *vfio_dev) struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); struct intel_vgpu_type *type = container_of(mdev->type, struct intel_vgpu_type, type); + int ret; vgpu->gvt = kdev_to_i915(mdev->type->parent->dev)->gvt; - return intel_gvt_create_vgpu(vgpu, type->conf); + ret = intel_gvt_create_vgpu(vgpu, type->conf); + if (ret) + return ret; + + kvmgt_protect_table_init(vgpu); + gvt_cache_init(vgpu); + + return 0; } static void intel_vgpu_release_dev(struct vfio_device *vfio_dev) @@ -1457,7 +1465,6 @@ static void intel_vgpu_release_dev(struct vfio_device *vfio_dev) struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); intel_gvt_destroy_vgpu(vgpu); - vfio_free_device(vfio_dev); } static const struct vfio_device_ops intel_vgpu_dev_ops = { @@ -1470,6 +1477,9 @@ static const struct vfio_device_ops intel_vgpu_dev_ops = { .mmap = intel_vgpu_mmap, .ioctl = intel_vgpu_ioctl, .dma_unmap = intel_vgpu_dma_unmap, + .bind_iommufd = vfio_iommufd_emulated_bind, + .unbind_iommufd = vfio_iommufd_emulated_unbind, + .attach_ioas = vfio_iommufd_emulated_attach_ioas, }; static int intel_vgpu_probe(struct mdev_device *mdev) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index d6fe94cd0fdb..9cd8fcbf7cad 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -570,9 +570,8 @@ retry: if (gmadr_bytes == 8) bb->bb_start_cmd_va[2] = 0; - ret = i915_vma_move_to_active(bb->vma, - workload->req, - 0); + ret = i915_vma_move_to_active(bb->vma, workload->req, + __EXEC_OBJECT_NO_REQUEST_AWAIT); if (ret) goto err; diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index ae987e92251d..6c7ac73b69a5 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -688,8 +688,8 @@ i915_drop_caches_set(void *data, u64 val) unsigned int flags; int ret; - DRM_DEBUG("Dropping caches: 0x%08llx [0x%08llx]\n", - val, val & DROP_ALL); + drm_dbg(&i915->drm, "Dropping caches: 0x%08llx [0x%08llx]\n", + val, val & DROP_ALL); ret = gt_drop_caches(to_gt(i915), val); if (ret) diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 9ceea52ad9db..ef94baebb337 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -373,8 +373,6 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv) if (ret) goto err_ttm; - intel_wopcm_init_early(&dev_priv->wopcm); - ret = intel_root_gt_init_early(dev_priv); if (ret < 0) goto err_rootgt; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 05b84196216c..360743a8a163 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -63,7 +63,6 @@ #include "intel_runtime_pm.h" #include "intel_step.h" #include "intel_uncore.h" -#include "intel_wopcm.h" struct drm_i915_clock_gating_funcs; struct drm_i915_gem_object; @@ -236,8 +235,6 @@ struct drm_i915_private { struct intel_gvt *gvt; - struct intel_wopcm wopcm; - struct pci_dev *bridge_dev; struct rb_root uabi_engines; @@ -727,6 +724,10 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_XEHPSDV_GRAPHICS_STEP(__i915, since, until) \ (IS_XEHPSDV(__i915) && IS_GRAPHICS_STEP(__i915, since, until)) +#define IS_MTL_GRAPHICS_STEP(__i915, variant, since, until) \ + (IS_SUBPLATFORM(__i915, INTEL_METEORLAKE, INTEL_SUBPLATFORM_##variant) && \ + IS_GRAPHICS_STEP(__i915, since, until)) + #define IS_MTL_DISPLAY_STEP(__i915, since, until) \ (IS_METEORLAKE(__i915) && \ IS_DISPLAY_STEP(__i915, since, until)) @@ -769,12 +770,15 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define __HAS_ENGINE(engine_mask, id) ((engine_mask) & BIT(id)) #define HAS_ENGINE(gt, id) __HAS_ENGINE((gt)->info.engine_mask, id) -#define ENGINE_INSTANCES_MASK(gt, first, count) ({ \ +#define __ENGINE_INSTANCES_MASK(mask, first, count) ({ \ unsigned int first__ = (first); \ unsigned int count__ = (count); \ - ((gt)->info.engine_mask & \ - GENMASK(first__ + count__ - 1, first__)) >> first__; \ + ((mask) & GENMASK(first__ + count__ - 1, first__)) >> first__; \ }) + +#define ENGINE_INSTANCES_MASK(gt, first, count) \ + __ENGINE_INSTANCES_MASK((gt)->info.engine_mask, first, count) + #define RCS_MASK(gt) \ ENGINE_INSTANCES_MASK(gt, RCS0, I915_MAX_RCS) #define BCS_MASK(gt) \ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 299f94a9fb87..8468ca9885fd 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1140,8 +1140,10 @@ int i915_gem_init(struct drm_i915_private *dev_priv) if (ret) return ret; - intel_uc_fetch_firmwares(&to_gt(dev_priv)->uc); - intel_wopcm_init(&dev_priv->wopcm); + for_each_gt(gt, dev_priv, i) { + intel_uc_fetch_firmwares(>->uc); + intel_wopcm_init(>->wopcm); + } ret = i915_init_ggtt(dev_priv); if (ret) { @@ -1286,7 +1288,7 @@ int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file) struct i915_drm_client *client; int ret = -ENOMEM; - DRM_DEBUG("\n"); + drm_dbg(&i915->drm, "\n"); file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); if (!file_priv) diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c index 3047e80e1163..61ef2d9cfa62 100644 --- a/drivers/gpu/drm/i915/i915_getparam.c +++ b/drivers/gpu/drm/i915/i915_getparam.c @@ -179,7 +179,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, value = i915_perf_oa_timestamp_frequency(i915); break; default: - DRM_DEBUG("Unknown parameter %d\n", param->param); + drm_dbg(&i915->drm, "Unknown parameter %d\n", param->param); return -EINVAL; } diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index c4e46da6502c..54ea28cf8a1a 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1085,8 +1085,9 @@ static void ivb_parity_work(struct work_struct *work) kobject_uevent_env(&dev_priv->drm.primary->kdev->kobj, KOBJ_CHANGE, parity_event); - DRM_DEBUG("Parity error: Slice = %d, Row = %d, Bank = %d, Sub bank = %d.\n", - slice, row, bank, subbank); + drm_dbg(&dev_priv->drm, + "Parity error: Slice = %d, Row = %d, Bank = %d, Sub bank = %d.\n", + slice, row, bank, subbank); kfree(parity_event[4]); kfree(parity_event[3]); @@ -2776,7 +2777,8 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg) master_ctl = raw_reg_read(regs, GEN11_GFX_MSTR_IRQ); raw_reg_write(regs, GEN11_GFX_MSTR_IRQ, master_ctl); } else { - DRM_ERROR("Tile not supported: 0x%08x\n", master_tile_ctl); + drm_err(&i915->drm, "Tile not supported: 0x%08x\n", + master_tile_ctl); dg1_master_intr_enable(regs); return IRQ_NONE; } @@ -3942,7 +3944,7 @@ static void i8xx_error_irq_ack(struct drm_i915_private *i915, static void i8xx_error_irq_handler(struct drm_i915_private *dev_priv, u16 eir, u16 eir_stuck) { - DRM_DEBUG("Master Error: EIR 0x%04x\n", eir); + drm_dbg(&dev_priv->drm, "Master Error: EIR 0x%04x\n", eir); if (eir_stuck) drm_dbg(&dev_priv->drm, "EIR stuck: 0x%04x, masked\n", @@ -3977,7 +3979,7 @@ static void i9xx_error_irq_ack(struct drm_i915_private *dev_priv, static void i9xx_error_irq_handler(struct drm_i915_private *dev_priv, u32 eir, u32 eir_stuck) { - DRM_DEBUG("Master Error, EIR 0x%08x\n", eir); + drm_dbg(&dev_priv->drm, "Master Error, EIR 0x%08x\n", eir); if (eir_stuck) drm_dbg(&dev_priv->drm, "EIR stuck: 0x%08x, masked\n", diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index b28ccbebc821..48225bf6efe7 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1145,6 +1145,7 @@ static const struct intel_device_info mtl_info = { .extra_gt_list = xelpmp_extra_gt, .has_flat_ccs = 0, .has_gmd_id = 1, + .has_guc_deprivilege = 1, .has_mslice_steering = 0, .has_snoop = 1, .__runtime.memory_regions = REGION_SMEM | REGION_STOLEN_LMEM, diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index b07b5a40630a..125b6ca25a75 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -531,9 +531,9 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) if (OA_TAKEN(hw_tail, tail) > report_size && __ratelimit(&stream->perf->tail_pointer_race)) - DRM_NOTE("unlanded report(s) head=0x%x " - "tail=0x%x hw_tail=0x%x\n", - head, tail, hw_tail); + drm_notice(&stream->uncore->i915->drm, + "unlanded report(s) head=0x%x tail=0x%x hw_tail=0x%x\n", + head, tail, hw_tail); stream->oa_buffer.tail = gtt_offset + tail; stream->oa_buffer.aging_tail = gtt_offset + hw_tail; @@ -1016,7 +1016,8 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, */ if (report32[0] == 0) { if (__ratelimit(&stream->perf->spurious_report_rs)) - DRM_NOTE("Skipping spurious, invalid OA report\n"); + drm_notice(&uncore->i915->drm, + "Skipping spurious, invalid OA report\n"); continue; } @@ -1382,6 +1383,9 @@ static u32 oa_context_image_offset(struct intel_context *ce, u32 reg) u32 offset, len = (ce->engine->context_size - PAGE_SIZE) / 4; u32 *state = ce->lrc_reg_state; + if (drm_WARN_ON(&ce->engine->i915->drm, !state)) + return U32_MAX; + for (offset = 0; offset < len; ) { if (IS_MI_LRI_CMD(state[offset])) { /* @@ -1446,7 +1450,8 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) if (IS_ERR(ce)) return PTR_ERR(ce); - if (engine_supports_mi_query(stream->engine)) { + if (engine_supports_mi_query(stream->engine) && + HAS_LOGICAL_RING_CONTEXTS(stream->perf->i915)) { /* * We are enabling perf query here. If we don't find the context * offset here, just return an error. @@ -1603,8 +1608,9 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream) free_noa_wait(stream); if (perf->spurious_report_rs.missed) { - DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n", - perf->spurious_report_rs.missed); + drm_notice(>->i915->drm, + "%d spurious OA report notices suppressed due to ratelimiting\n", + perf->spurious_report_rs.missed); } } @@ -2252,9 +2258,7 @@ retry: goto err_add_request; } - err = i915_request_await_object(rq, vma->obj, 0); - if (!err) - err = i915_vma_move_to_active(vma, rq, 0); + err = i915_vma_move_to_active(vma, rq, 0); if (err) goto err_add_request; diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h index e0c96b44eda8..ca150b7af3f2 100644 --- a/drivers/gpu/drm/i915/i915_perf_types.h +++ b/drivers/gpu/drm/i915/i915_perf_types.h @@ -146,8 +146,8 @@ struct i915_perf_stream { */ struct intel_engine_cs *engine; - /* - * Lock associated with operations on stream + /** + * @lock: Lock associated with operations on stream */ struct mutex lock; diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 958b37123bf1..52531ab28c5f 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -148,13 +148,13 @@ static u64 __get_rc6(struct intel_gt *gt) struct drm_i915_private *i915 = gt->i915; u64 val; - val = intel_rc6_residency_ns(>->rc6, GEN6_GT_GFX_RC6); + val = intel_rc6_residency_ns(>->rc6, INTEL_RC6_RES_RC6); if (HAS_RC6p(i915)) - val += intel_rc6_residency_ns(>->rc6, GEN6_GT_GFX_RC6p); + val += intel_rc6_residency_ns(>->rc6, INTEL_RC6_RES_RC6p); if (HAS_RC6pp(i915)) - val += intel_rc6_residency_ns(>->rc6, GEN6_GT_GFX_RC6pp); + val += intel_rc6_residency_ns(>->rc6, INTEL_RC6_RES_RC6pp); return val; } @@ -371,7 +371,6 @@ static void frequency_sample(struct intel_gt *gt, unsigned int period_ns) { struct drm_i915_private *i915 = gt->i915; - struct intel_uncore *uncore = gt->uncore; struct i915_pmu *pmu = &i915->pmu; struct intel_rps *rps = >->rps; @@ -394,7 +393,7 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns) * case we assume the system is running at the intended * frequency. Fortunately, the read should rarely fail! */ - val = intel_uncore_read_fw(uncore, GEN6_RPSTAT1); + val = intel_rps_read_rpstat_fw(rps); if (val) val = intel_rps_get_cagf(rps, val); else diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c index 6ec9c9fb7b0d..00871ef99792 100644 --- a/drivers/gpu/drm/i915/i915_query.c +++ b/drivers/gpu/drm/i915/i915_query.c @@ -250,8 +250,9 @@ static int query_perf_config_data(struct drm_i915_private *i915, return total_size; if (query_item->length < total_size) { - DRM_DEBUG("Invalid query config data item size=%u expected=%u\n", - query_item->length, total_size); + drm_dbg(&i915->drm, + "Invalid query config data item size=%u expected=%u\n", + query_item->length, total_size); return -EINVAL; } @@ -418,9 +419,10 @@ static int query_perf_config_list(struct drm_i915_private *i915, } while (n_configs > alloc); if (query_item->length < sizeof_perf_config_list(n_configs)) { - DRM_DEBUG("Invalid query config list item size=%u expected=%zu\n", - query_item->length, - sizeof_perf_config_list(n_configs)); + drm_dbg(&i915->drm, + "Invalid query config list item size=%u expected=%zu\n", + query_item->length, + sizeof_perf_config_list(n_configs)); kfree(oa_config_ids); return -EINVAL; } diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 37452140cb93..c76a02c6d22c 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -902,6 +902,7 @@ #define GEN11_VEBOX2_RING_BASE 0x1d8000 #define XEHP_VEBOX3_RING_BASE 0x1e8000 #define XEHP_VEBOX4_RING_BASE 0x1f8000 +#define MTL_GSC_RING_BASE 0x11a000 #define GEN12_COMPUTE0_RING_BASE 0x1a000 #define GEN12_COMPUTE1_RING_BASE 0x1c000 #define GEN12_COMPUTE2_RING_BASE 0x1e000 @@ -6591,6 +6592,15 @@ /* XEHP_PCODE_FREQUENCY_CONFIG param2 */ #define PCODE_MBOX_DOMAIN_NONE 0x0 #define PCODE_MBOX_DOMAIN_MEDIAFF 0x3 + +/* Wa_14017210380: mtl */ +#define PCODE_MBOX_GT_STATE 0x50 +/* sub-commands (param1) */ +#define PCODE_MBOX_GT_STATE_MEDIA_BUSY 0x1 +#define PCODE_MBOX_GT_STATE_MEDIA_NOT_BUSY 0x2 +/* param2 */ +#define PCODE_MBOX_GT_STATE_DOMAIN_MEDIA 0x1 + #define GEN6_PCODE_DATA _MMIO(0x138128) #define GEN6_PCODE_FREQ_IA_RATIO_SHIFT 8 #define GEN6_PCODE_FREQ_RING_RATIO_SHIFT 16 diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index cc2a8821d22a..8a9aad523eec 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -465,7 +465,7 @@ static void irq_i915_sw_fence_work(struct irq_work *wrk) struct i915_sw_dma_fence_cb_timer *cb = container_of(wrk, typeof(*cb), work); - del_timer_sync(&cb->timer); + timer_shutdown_sync(&cb->timer); dma_fence_put(cb->dma); kfree_rcu(cb, rcu); diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 1e2750210831..595e8b574990 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -218,7 +218,8 @@ static const struct bin_attribute error_state_attr = { static void i915_setup_error_capture(struct device *kdev) { if (sysfs_create_bin_file(&kdev->kobj, &error_state_attr)) - DRM_ERROR("error_state sysfs setup failed\n"); + drm_err(&kdev_minor_to_i915(kdev)->drm, + "error_state sysfs setup failed\n"); } static void i915_teardown_error_capture(struct device *kdev) diff --git a/drivers/gpu/drm/i915/i915_user_extensions.c b/drivers/gpu/drm/i915/i915_user_extensions.c index c822d0aafd2d..e3f808372c47 100644 --- a/drivers/gpu/drm/i915/i915_user_extensions.c +++ b/drivers/gpu/drm/i915/i915_user_extensions.c @@ -51,7 +51,7 @@ int i915_user_extensions(struct i915_user_extension __user *ext, return err; if (get_user(next, &ext->next_extension) || - overflows_type(next, ext)) + overflows_type(next, uintptr_t)) return -EFAULT; ext = u64_to_user_ptr(next); diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index 6c14d13364bf..67a66d4d5c70 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -111,10 +111,6 @@ bool i915_error_injected(void); #define range_overflows_end_t(type, start, size, max) \ range_overflows_end((type)(start), (type)(size), (type)(max)) -/* Note we don't consider signbits :| */ -#define overflows_type(x, T) \ - (sizeof(x) > sizeof(T) && (x) >> BITS_PER_TYPE(T)) - #define ptr_mask_bits(ptr, n) ({ \ unsigned long __v = (unsigned long)(ptr); \ (typeof(ptr))(__v & -BIT(n)); \ diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index c39488eb9eeb..703fee6b5f75 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -73,14 +73,16 @@ static void vma_print_allocator(struct i915_vma *vma, const char *reason) char buf[512]; if (!vma->node.stack) { - DRM_DEBUG_DRIVER("vma.node [%08llx + %08llx] %s: unknown owner\n", - vma->node.start, vma->node.size, reason); + drm_dbg(&to_i915(vma->obj->base.dev)->drm, + "vma.node [%08llx + %08llx] %s: unknown owner\n", + vma->node.start, vma->node.size, reason); return; } stack_depot_snprint(vma->node.stack, buf, sizeof(buf), 0); - DRM_DEBUG_DRIVER("vma.node [%08llx + %08llx] %s: inserted at %s\n", - vma->node.start, vma->node.size, reason, buf); + drm_dbg(&to_i915(vma->obj->base.dev)->drm, + "vma.node [%08llx + %08llx] %s: inserted at %s\n", + vma->node.start, vma->node.size, reason, buf); } #else @@ -782,9 +784,9 @@ i915_vma_insert(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, * attempt to find space. */ if (size > end) { - DRM_DEBUG("Attempting to bind an object larger than the aperture: request=%llu > %s aperture=%llu\n", - size, flags & PIN_MAPPABLE ? "mappable" : "total", - end); + drm_dbg(&to_i915(vma->obj->base.dev)->drm, + "Attempting to bind an object larger than the aperture: request=%llu > %s aperture=%llu\n", + size, flags & PIN_MAPPABLE ? "mappable" : "total", end); return -ENOSPC; } @@ -1842,6 +1844,11 @@ int _i915_vma_move_to_active(struct i915_vma *vma, GEM_BUG_ON(!vma->pages); + if (!(flags & __EXEC_OBJECT_NO_REQUEST_AWAIT)) { + err = i915_request_await_object(rq, vma->obj, flags & EXEC_OBJECT_WRITE); + if (unlikely(err)) + return err; + } err = __i915_vma_move_to_active(vma, rq); if (unlikely(err)) return err; diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index aecd9c64486b..0757977a489b 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -55,6 +55,7 @@ static inline bool i915_vma_is_active(const struct i915_vma *vma) /* do not reserve memory to prevent deadlocks */ #define __EXEC_OBJECT_NO_RESERVE BIT(31) +#define __EXEC_OBJECT_NO_REQUEST_AWAIT BIT(30) int __must_check _i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq, diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c index 9a4a7fb55582..b9a164efd6ae 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.c +++ b/drivers/gpu/drm/i915/intel_memory_region.c @@ -38,7 +38,7 @@ static int __iopagetest(struct intel_memory_region *mem, u8 value, resource_size_t offset, const void *caller) { - int byte = prandom_u32_max(pagesize); + int byte = get_random_u32_below(pagesize); u8 result[3]; memset_io(va, value, pagesize); /* or GPF! */ @@ -92,7 +92,7 @@ static int iopagetest(struct intel_memory_region *mem, static resource_size_t random_page(resource_size_t last) { /* Limited to low 44b (16TiB), but should suffice for a spot check */ - return prandom_u32_max(last >> PAGE_SHIFT) << PAGE_SHIFT; + return get_random_u32_below(last >> PAGE_SHIFT) << PAGE_SHIFT; } static int iomemtest(struct intel_memory_region *mem, diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 9995f478152c..614013745fca 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -179,8 +179,9 @@ static inline void fw_domain_wait_ack_clear(const struct intel_uncore_forcewake_domain *d) { if (wait_ack_clear(d, FORCEWAKE_KERNEL)) { - DRM_ERROR("%s: timed out waiting for forcewake ack to clear.\n", - intel_uncore_forcewake_domain_to_str(d->id)); + drm_err(&d->uncore->i915->drm, + "%s: timed out waiting for forcewake ack to clear.\n", + intel_uncore_forcewake_domain_to_str(d->id)); add_taint_for_CI(d->uncore->i915, TAINT_WARN); /* CI now unreliable */ } } @@ -227,11 +228,12 @@ fw_domain_wait_ack_with_fallback(const struct intel_uncore_forcewake_domain *d, fw_clear(d, FORCEWAKE_KERNEL_FALLBACK); } while (!ack_detected && pass++ < 10); - DRM_DEBUG_DRIVER("%s had to use fallback to %s ack, 0x%x (passes %u)\n", - intel_uncore_forcewake_domain_to_str(d->id), - type == ACK_SET ? "set" : "clear", - fw_ack(d), - pass); + drm_dbg(&d->uncore->i915->drm, + "%s had to use fallback to %s ack, 0x%x (passes %u)\n", + intel_uncore_forcewake_domain_to_str(d->id), + type == ACK_SET ? "set" : "clear", + fw_ack(d), + pass); return ack_detected ? 0 : -ETIMEDOUT; } @@ -256,8 +258,9 @@ static inline void fw_domain_wait_ack_set(const struct intel_uncore_forcewake_domain *d) { if (wait_ack_set(d, FORCEWAKE_KERNEL)) { - DRM_ERROR("%s: timed out waiting for forcewake ack request.\n", - intel_uncore_forcewake_domain_to_str(d->id)); + drm_err(&d->uncore->i915->drm, + "%s: timed out waiting for forcewake ack request.\n", + intel_uncore_forcewake_domain_to_str(d->id)); add_taint_for_CI(d->uncore->i915, TAINT_WARN); /* CI now unreliable */ } } diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index 5449146a0624..e9e38490815d 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -382,20 +382,6 @@ __uncore_write(write_notrace, 32, l, false) */ __uncore_read(read64, 64, q, true) -static inline u64 -intel_uncore_read64_2x32(struct intel_uncore *uncore, - i915_reg_t lower_reg, i915_reg_t upper_reg) -{ - u32 upper, lower, old_upper, loop = 0; - upper = intel_uncore_read(uncore, upper_reg); - do { - old_upper = upper; - lower = intel_uncore_read(uncore, lower_reg); - upper = intel_uncore_read(uncore, upper_reg); - } while (upper != old_upper && loop++ < 2); - return (u64)upper << 32 | lower; -} - #define intel_uncore_posting_read(...) ((void)intel_uncore_read_notrace(__VA_ARGS__)) #define intel_uncore_posting_read16(...) ((void)intel_uncore_read16_notrace(__VA_ARGS__)) @@ -455,6 +441,36 @@ static inline void intel_uncore_rmw_fw(struct intel_uncore *uncore, intel_uncore_write_fw(uncore, reg, val); } +static inline u64 +intel_uncore_read64_2x32(struct intel_uncore *uncore, + i915_reg_t lower_reg, i915_reg_t upper_reg) +{ + u32 upper, lower, old_upper, loop = 0; + enum forcewake_domains fw_domains; + unsigned long flags; + + fw_domains = intel_uncore_forcewake_for_reg(uncore, lower_reg, + FW_REG_READ); + + fw_domains |= intel_uncore_forcewake_for_reg(uncore, upper_reg, + FW_REG_READ); + + spin_lock_irqsave(&uncore->lock, flags); + intel_uncore_forcewake_get__locked(uncore, fw_domains); + + upper = intel_uncore_read_fw(uncore, upper_reg); + do { + old_upper = upper; + lower = intel_uncore_read_fw(uncore, lower_reg); + upper = intel_uncore_read_fw(uncore, upper_reg); + } while (upper != old_upper && loop++ < 2); + + intel_uncore_forcewake_put__locked(uncore, fw_domains); + spin_unlock_irqrestore(&uncore->lock, flags); + + return (u64)upper << 32 | lower; +} + static inline int intel_uncore_write_and_verify(struct intel_uncore *uncore, i915_reg_t reg, u32 val, u32 mask, u32 expected_val) diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_42.h b/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_42.h new file mode 100644 index 000000000000..739f9072fa5f --- /dev/null +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_42.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright(c) 2020, Intel Corporation. All rights reserved. + */ + +#ifndef __INTEL_PXP_FW_INTERFACE_42_H__ +#define __INTEL_PXP_FW_INTERFACE_42_H__ + +#include <linux/types.h> +#include "intel_pxp_cmd_interface_cmn.h" + +/* PXP-Opcode for Init Session */ +#define PXP42_CMDID_INIT_SESSION 0x1e + +/* PXP-Input-Packet: Init Session (Arb-Session) */ +struct pxp42_create_arb_in { + struct pxp_cmd_header header; + u32 protection_mode; +#define PXP42_ARB_SESSION_MODE_HEAVY 0x2 + u32 session_id; +} __packed; + +/* PXP-Output-Packet: Init Session */ +struct pxp42_create_arb_out { + struct pxp_cmd_header header; +} __packed; + +#endif /* __INTEL_PXP_FW_INTERFACE_42_H__ */ diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_43.h b/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_43.h new file mode 100644 index 000000000000..ad67e3f49c20 --- /dev/null +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_43.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright(c) 2022, Intel Corporation. All rights reserved. + */ + +#ifndef __INTEL_PXP_FW_INTERFACE_43_H__ +#define __INTEL_PXP_FW_INTERFACE_43_H__ + +#include <linux/types.h> +#include "intel_pxp_cmd_interface_cmn.h" + +/* PXP-Cmd-Op definitions */ +#define PXP43_CMDID_START_HUC_AUTH 0x0000003A + +/* PXP-Input-Packet: HUC-Authentication */ +struct pxp43_start_huc_auth_in { + struct pxp_cmd_header header; + __le64 huc_base_address; +} __packed; + +/* PXP-Output-Packet: HUC-Authentication */ +struct pxp43_start_huc_auth_out { + struct pxp_cmd_header header; +} __packed; + +#endif /* __INTEL_PXP_FW_INTERFACE_43_H__ */ diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_cmn.h b/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_cmn.h new file mode 100644 index 000000000000..c2f23394f9b8 --- /dev/null +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_cmn.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright(c) 2022, Intel Corporation. All rights reserved. + */ + +#ifndef __INTEL_PXP_FW_INTERFACE_CMN_H__ +#define __INTEL_PXP_FW_INTERFACE_CMN_H__ + +#include <linux/types.h> + +#define PXP_APIVER(x, y) (((x) & 0xFFFF) << 16 | ((y) & 0xFFFF)) + +/* + * there are a lot of status codes for PXP, but we only define the cross-API + * common ones that we actually can handle in the kernel driver. Other failure + * codes should be printed to error msg for debug. + */ +enum pxp_status { + PXP_STATUS_SUCCESS = 0x0, + PXP_STATUS_OP_NOT_PERMITTED = 0x4013 +}; + +/* Common PXP FW message header */ +struct pxp_cmd_header { + u32 api_version; + u32 command_id; + union { + u32 status; /* out */ + u32 stream_id; /* in */ + }; + /* Length of the message (excluding the header) */ + u32 buffer_len; +} __packed; + +#endif /* __INTEL_PXP_FW_INTERFACE_CMN_H__ */ diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_huc.c b/drivers/gpu/drm/i915/pxp/intel_pxp_huc.c index 7ec36d94e758..2e1165522950 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_huc.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_huc.c @@ -3,7 +3,8 @@ * Copyright(c) 2021-2022, Intel Corporation. All rights reserved. */ -#include "drm/i915_drm.h" +#include <drm/i915_drm.h> + #include "i915_drv.h" #include "gem/i915_gem_region.h" @@ -13,14 +14,14 @@ #include "intel_pxp_huc.h" #include "intel_pxp_tee.h" #include "intel_pxp_types.h" -#include "intel_pxp_tee_interface.h" +#include "intel_pxp_cmd_interface_43.h" int intel_pxp_huc_load_and_auth(struct intel_pxp *pxp) { struct intel_gt *gt = pxp_to_gt(pxp); struct intel_huc *huc = >->uc.huc; - struct pxp_tee_start_huc_auth_in huc_in = {0}; - struct pxp_tee_start_huc_auth_out huc_out = {0}; + struct pxp43_start_huc_auth_in huc_in = {0}; + struct pxp43_start_huc_auth_out huc_out = {0}; dma_addr_t huc_phys_addr; u8 client_id = 0; u8 fence_id = 0; @@ -32,8 +33,8 @@ int intel_pxp_huc_load_and_auth(struct intel_pxp *pxp) huc_phys_addr = i915_gem_object_get_dma_address(huc->fw.obj, 0); /* write the PXP message into the lmem (the sg list) */ - huc_in.header.api_version = PXP_TEE_43_APIVER; - huc_in.header.command_id = PXP_TEE_43_START_HUC_AUTH; + huc_in.header.api_version = PXP_APIVER(4, 3); + huc_in.header.command_id = PXP43_CMDID_START_HUC_AUTH; huc_in.header.status = 0; huc_in.header.buffer_len = sizeof(huc_in.huc_base_address); huc_in.huc_base_address = huc_phys_addr; diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c b/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c index 052fd2f9a583..b0c9170b1395 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c @@ -14,7 +14,7 @@ #include "intel_pxp.h" #include "intel_pxp_session.h" #include "intel_pxp_tee.h" -#include "intel_pxp_tee_interface.h" +#include "intel_pxp_cmd_interface_42.h" #include "intel_pxp_huc.h" static inline struct intel_pxp *i915_dev_to_pxp(struct device *i915_kdev) @@ -286,14 +286,14 @@ int intel_pxp_tee_cmd_create_arb_session(struct intel_pxp *pxp, int arb_session_id) { struct drm_i915_private *i915 = pxp_to_gt(pxp)->i915; - struct pxp_tee_create_arb_in msg_in = {0}; - struct pxp_tee_create_arb_out msg_out = {0}; + struct pxp42_create_arb_in msg_in = {0}; + struct pxp42_create_arb_out msg_out = {0}; int ret; - msg_in.header.api_version = PXP_TEE_APIVER; - msg_in.header.command_id = PXP_TEE_ARB_CMDID; + msg_in.header.api_version = PXP_APIVER(4, 2); + msg_in.header.command_id = PXP42_CMDID_INIT_SESSION; msg_in.header.buffer_len = sizeof(msg_in) - sizeof(msg_in.header); - msg_in.protection_mode = PXP_TEE_ARB_PROTECTION_MODE; + msg_in.protection_mode = PXP42_ARB_SESSION_MODE_HEAVY; msg_in.session_id = arb_session_id; ret = intel_pxp_tee_io_message(pxp, diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_tee_interface.h b/drivers/gpu/drm/i915/pxp/intel_pxp_tee_interface.h deleted file mode 100644 index 7edc1760f142..000000000000 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_tee_interface.h +++ /dev/null @@ -1,57 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright(c) 2020-2022, Intel Corporation. All rights reserved. - */ - -#ifndef __INTEL_PXP_TEE_INTERFACE_H__ -#define __INTEL_PXP_TEE_INTERFACE_H__ - -#include <linux/types.h> - -#define PXP_TEE_APIVER 0x40002 -#define PXP_TEE_43_APIVER 0x00040003 -#define PXP_TEE_ARB_CMDID 0x1e -#define PXP_TEE_ARB_PROTECTION_MODE 0x2 -#define PXP_TEE_43_START_HUC_AUTH 0x0000003A - -/* - * there are a lot of status codes for PXP, but we only define the ones we - * actually can handle in the driver. other failure codes will be printed to - * error msg for debug. - */ -enum pxp_status { - PXP_STATUS_SUCCESS = 0x0, - PXP_STATUS_OP_NOT_PERMITTED = 0x4013 -}; - -/* PXP TEE message header */ -struct pxp_tee_cmd_header { - u32 api_version; - u32 command_id; - u32 status; - /* Length of the message (excluding the header) */ - u32 buffer_len; -} __packed; - -/* PXP TEE message input to create a arbitrary session */ -struct pxp_tee_create_arb_in { - struct pxp_tee_cmd_header header; - u32 protection_mode; - u32 session_id; -} __packed; - -/* PXP TEE message output to create a arbitrary session */ -struct pxp_tee_create_arb_out { - struct pxp_tee_cmd_header header; -} __packed; - -struct pxp_tee_start_huc_auth_in { - struct pxp_tee_cmd_header header; - __le64 huc_base_address; -}; - -struct pxp_tee_start_huc_auth_out { - struct pxp_tee_cmd_header header; -}; - -#endif /* __INTEL_PXP_TEE_INTERFACE_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 27c733b00976..eae7d947d7de 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -61,7 +61,6 @@ static int fake_get_pages(struct drm_i915_gem_object *obj) #define PFN_BIAS 0x1000 struct sg_table *pages; struct scatterlist *sg; - unsigned int sg_page_sizes; typeof(obj->base.size) rem; pages = kmalloc(sizeof(*pages), GFP); @@ -74,7 +73,6 @@ static int fake_get_pages(struct drm_i915_gem_object *obj) return -ENOMEM; } - sg_page_sizes = 0; rem = obj->base.size; for (sg = pages->sgl; sg; sg = sg_next(sg)) { unsigned long len = min_t(typeof(rem), rem, BIT(31)); @@ -83,13 +81,12 @@ static int fake_get_pages(struct drm_i915_gem_object *obj) sg_set_page(sg, pfn_to_page(PFN_BIAS), len, 0); sg_dma_address(sg) = page_to_phys(sg_page(sg)); sg_dma_len(sg) = len; - sg_page_sizes |= len; rem -= len; } GEM_BUG_ON(rem); - __i915_gem_object_set_pages(obj, pages, sg_page_sizes); + __i915_gem_object_set_pages(obj, pages); return 0; #undef GFP diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index a46350c37e9d..0daa8669181d 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -1223,9 +1223,7 @@ static int live_all_engines(void *arg) goto out_request; } - err = i915_request_await_object(request[idx], batch->obj, 0); - if (err == 0) - err = i915_vma_move_to_active(batch, request[idx], 0); + err = i915_vma_move_to_active(batch, request[idx], 0); GEM_BUG_ON(err); err = engine->emit_bb_start(request[idx], @@ -1352,10 +1350,7 @@ static int live_sequential_engines(void *arg) } } - err = i915_request_await_object(request[idx], - batch->obj, false); - if (err == 0) - err = i915_vma_move_to_active(batch, request[idx], 0); + err = i915_vma_move_to_active(batch, request[idx], 0); GEM_BUG_ON(err); err = engine->emit_bb_start(request[idx], @@ -1710,7 +1705,8 @@ static int live_breadcrumbs_smoketest(void *arg) { struct drm_i915_private *i915 = arg; const unsigned int nengines = num_uabi_engines(i915); - const unsigned int ncpus = num_online_cpus(); + const unsigned int ncpus = /* saturate with nengines * ncpus */ + max_t(int, 2, DIV_ROUND_UP(num_online_cpus(), nengines)); unsigned long num_waits, num_fences; struct intel_engine_cs *engine; struct smoke_thread *threads; @@ -1782,7 +1778,7 @@ static int live_breadcrumbs_smoketest(void *arg) goto out_flush; } /* One ring interleaved between requests from all cpus */ - smoke[idx].max_batch /= num_online_cpus() + 1; + smoke[idx].max_batch /= ncpus + 1; pr_debug("Limiting batches to %d requests on %s\n", smoke[idx].max_batch, engine->name); diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c b/drivers/gpu/drm/i915/selftests/igt_spinner.c index 0c22594ae274..16978ac59797 100644 --- a/drivers/gpu/drm/i915/selftests/igt_spinner.c +++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c @@ -119,22 +119,6 @@ static u64 hws_address(const struct i915_vma *hws, return hws->node.start + seqno_offset(rq->fence.context); } -static int move_to_active(struct i915_vma *vma, - struct i915_request *rq, - unsigned int flags) -{ - int err; - - i915_vma_lock(vma); - err = i915_request_await_object(rq, vma->obj, - flags & EXEC_OBJECT_WRITE); - if (err == 0) - err = i915_vma_move_to_active(vma, rq, flags); - i915_vma_unlock(vma); - - return err; -} - struct i915_request * igt_spinner_create_request(struct igt_spinner *spin, struct intel_context *ce, @@ -165,11 +149,11 @@ igt_spinner_create_request(struct igt_spinner *spin, if (IS_ERR(rq)) return ERR_CAST(rq); - err = move_to_active(vma, rq, 0); + err = igt_vma_move_to_active_unlocked(vma, rq, 0); if (err) goto cancel_rq; - err = move_to_active(hws, rq, 0); + err = igt_vma_move_to_active_unlocked(hws, rq, 0); if (err) goto cancel_rq; diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c index bac21fe84ca5..6324eb32d4dd 100644 --- a/drivers/gpu/drm/i915/selftests/mock_region.c +++ b/drivers/gpu/drm/i915/selftests/mock_region.c @@ -41,7 +41,7 @@ static int mock_region_get_pages(struct drm_i915_gem_object *obj) } pages = &obj->mm.rsgt->table; - __i915_gem_object_set_pages(obj, pages, i915_sg_dma_sizes(pages->sgl)); + __i915_gem_object_set_pages(obj, pages); return 0; |