diff options
author | Dave Airlie <airlied@redhat.com> | 2022-11-04 08:20:12 +0100 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2022-11-04 08:33:34 +0100 |
commit | 60ba8c5bd94e17ab4b024f5cecf8b48e2cf36412 (patch) | |
tree | 7e03a3b457f942c7eb3b865f535bcbe55bb72d11 /drivers | |
parent | Merge tag 'drm-misc-next-2022-11-03' of git://anongit.freedesktop.org/drm/drm... (diff) | |
parent | drm/i915/hwmon: Fix a build error used with clang compiler (diff) | |
download | linux-60ba8c5bd94e17ab4b024f5cecf8b48e2cf36412.tar.xz linux-60ba8c5bd94e17ab4b024f5cecf8b48e2cf36412.zip |
Merge tag 'drm-intel-gt-next-2022-11-03' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
Driver Changes:
- Fix for #7306: [Arc A380] white flickering when using arc as a
secondary gpu (Matt A)
- Add Wa_18017747507 for DG2 (Wayne)
- Avoid spurious WARN on DG1 due to incorrect cache_dirty flag
(Niranjana, Matt A)
- Corrections to CS timestamp support for Gen5 and earlier (Ville)
- Fix a build error used with clang compiler on hwmon (GG)
- Improvements to LMEM handling with RPM (Anshuman, Matt A)
- Cleanups in dmabuf code (Mike)
- Selftest improvements (Matt A)
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/Y2N11wu175p6qeEN@jlahtine-mobl.ger.corp.intel.com
Diffstat (limited to 'drivers')
145 files changed, 6574 insertions, 2035 deletions
diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile index 39328567c200..7cc38d25ee5c 100644 --- a/drivers/gpu/drm/i915/Kconfig.profile +++ b/drivers/gpu/drm/i915/Kconfig.profile @@ -57,10 +57,28 @@ config DRM_I915_PREEMPT_TIMEOUT default 640 # milliseconds help How long to wait (in milliseconds) for a preemption event to occur - when submitting a new context via execlists. If the current context - does not hit an arbitration point and yield to HW before the timer - expires, the HW will be reset to allow the more important context - to execute. + when submitting a new context. If the current context does not hit + an arbitration point and yield to HW before the timer expires, the + HW will be reset to allow the more important context to execute. + + This is adjustable via + /sys/class/drm/card?/engine/*/preempt_timeout_ms + + May be 0 to disable the timeout. + + The compiled in default may get overridden at driver probe time on + certain platforms and certain engines which will be reflected in the + sysfs control. + +config DRM_I915_PREEMPT_TIMEOUT_COMPUTE + int "Preempt timeout for compute engines (ms, jiffy granularity)" + default 7500 # milliseconds + help + How long to wait (in milliseconds) for a preemption event to occur + when submitting a new context to a compute capable engine. If the + current context does not hit an arbitration point and yield to HW + before the timer expires, the HW will be reset to allow the more + important context to execute. This is adjustable via /sys/class/drm/card?/engine/*/preempt_timeout_ms diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index cea00aaca04b..51704b54317c 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -209,6 +209,9 @@ i915-y += gt/uc/intel_uc.o \ # graphics system controller (GSC) support i915-y += gt/intel_gsc.o +# graphics hardware monitoring (HWMON) support +i915-$(CONFIG_HWMON) += i915_hwmon.o + # modesetting core code i915-y += \ display/hsw_ips.o \ @@ -310,15 +313,18 @@ i915-y += \ i915-y += i915_perf.o -# Protected execution platform (PXP) support -i915-$(CONFIG_DRM_I915_PXP) += \ +# Protected execution platform (PXP) support. Base support is required for HuC +i915-y += \ pxp/intel_pxp.o \ + pxp/intel_pxp_tee.o \ + pxp/intel_pxp_huc.o + +i915-$(CONFIG_DRM_I915_PXP) += \ pxp/intel_pxp_cmd.o \ pxp/intel_pxp_debugfs.o \ pxp/intel_pxp_irq.o \ pxp/intel_pxp_pm.o \ - pxp/intel_pxp_session.o \ - pxp/intel_pxp_tee.o + pxp/intel_pxp_session.o # Post-mortem debug and GPU hang state capture i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c index ac587647e1f5..ad1a37b515fb 100644 --- a/drivers/gpu/drm/i915/display/intel_dpt.c +++ b/drivers/gpu/drm/i915/display/intel_dpt.c @@ -5,6 +5,7 @@ #include "gem/i915_gem_domain.h" #include "gem/i915_gem_internal.h" +#include "gem/i915_gem_lmem.h" #include "gt/gen8_ppgtt.h" #include "i915_drv.h" diff --git a/drivers/gpu/drm/i915/display/intel_fb_pin.c b/drivers/gpu/drm/i915/display/intel_fb_pin.c index 1dddd6abd77b..6900acbb1381 100644 --- a/drivers/gpu/drm/i915/display/intel_fb_pin.c +++ b/drivers/gpu/drm/i915/display/intel_fb_pin.c @@ -167,7 +167,6 @@ retry: ret = i915_gem_object_attach_phys(obj, alignment); else if (!ret && HAS_LMEM(dev_priv)) ret = i915_gem_object_migrate(obj, &ww, INTEL_REGION_LMEM_0); - /* TODO: Do we need to sync when migration becomes async? */ if (!ret) ret = i915_gem_object_pin_pages(obj); if (ret) diff --git a/drivers/gpu/drm/i915/display/intel_lpe_audio.c b/drivers/gpu/drm/i915/display/intel_lpe_audio.c index 6a7ac60e4f76..22ca8754ea96 100644 --- a/drivers/gpu/drm/i915/display/intel_lpe_audio.c +++ b/drivers/gpu/drm/i915/display/intel_lpe_audio.c @@ -100,9 +100,9 @@ lpe_audio_platdev_create(struct drm_i915_private *dev_priv) rsc[0].flags = IORESOURCE_IRQ; rsc[0].name = "hdmi-lpe-audio-irq"; - rsc[1].start = pci_resource_start(pdev, GTTMMADR_BAR) + + rsc[1].start = pci_resource_start(pdev, GEN4_GTTMMADR_BAR) + I915_HDMI_LPE_AUDIO_BASE; - rsc[1].end = pci_resource_start(pdev, GTTMMADR_BAR) + + rsc[1].end = pci_resource_start(pdev, GEN4_GTTMMADR_BAR) + I915_HDMI_LPE_AUDIO_BASE + I915_HDMI_LPE_AUDIO_SIZE - 1; rsc[1].flags = IORESOURCE_MEM; rsc[1].name = "hdmi-lpe-audio-mmio"; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 1e29b1e6d186..01402f3c58f6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1452,7 +1452,7 @@ static void engines_idle_release(struct i915_gem_context *ctx, int err; /* serialises with execbuf */ - set_bit(CONTEXT_CLOSED_BIT, &ce->flags); + intel_context_close(ce); if (!intel_context_pin_if_active(ce)) continue; @@ -2298,7 +2298,6 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, } args->ctx_id = id; - drm_dbg(&i915->drm, "HW context %d created\n", args->ctx_id); return 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index 07eee1c09aaf..ec6f7ae47783 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -25,43 +25,44 @@ static struct drm_i915_gem_object *dma_buf_to_obj(struct dma_buf *buf) return to_intel_bo(buf->priv); } -static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachment, +static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attach, enum dma_data_direction dir) { - struct drm_i915_gem_object *obj = dma_buf_to_obj(attachment->dmabuf); - struct sg_table *st; + struct drm_i915_gem_object *obj = dma_buf_to_obj(attach->dmabuf); + struct sg_table *sgt; struct scatterlist *src, *dst; int ret, i; - /* Copy sg so that we make an independent mapping */ - st = kmalloc(sizeof(struct sg_table), GFP_KERNEL); - if (st == NULL) { + /* + * Make a copy of the object's sgt, so that we can make an independent + * mapping + */ + sgt = kmalloc(sizeof(*sgt), GFP_KERNEL); + if (!sgt) { ret = -ENOMEM; goto err; } - ret = sg_alloc_table(st, obj->mm.pages->nents, GFP_KERNEL); + ret = sg_alloc_table(sgt, obj->mm.pages->orig_nents, GFP_KERNEL); if (ret) goto err_free; - src = obj->mm.pages->sgl; - dst = st->sgl; - for (i = 0; i < obj->mm.pages->nents; i++) { + dst = sgt->sgl; + for_each_sg(obj->mm.pages->sgl, src, obj->mm.pages->orig_nents, i) { sg_set_page(dst, sg_page(src), src->length, 0); dst = sg_next(dst); - src = sg_next(src); } - ret = dma_map_sgtable(attachment->dev, st, dir, DMA_ATTR_SKIP_CPU_SYNC); + ret = dma_map_sgtable(attach->dev, sgt, dir, DMA_ATTR_SKIP_CPU_SYNC); if (ret) goto err_free_sg; - return st; + return sgt; err_free_sg: - sg_free_table(st); + sg_free_table(sgt); err_free: - kfree(st); + kfree(sgt); err: return ERR_PTR(ret); } @@ -236,15 +237,15 @@ struct dma_buf *i915_gem_prime_export(struct drm_gem_object *gem_obj, int flags) static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct sg_table *pages; + struct sg_table *sgt; unsigned int sg_page_sizes; assert_object_held(obj); - pages = dma_buf_map_attachment(obj->base.import_attach, - DMA_BIDIRECTIONAL); - if (IS_ERR(pages)) - return PTR_ERR(pages); + sgt = dma_buf_map_attachment(obj->base.import_attach, + DMA_BIDIRECTIONAL); + if (IS_ERR(sgt)) + return PTR_ERR(sgt); /* * DG1 is special here since it still snoops transactions even with @@ -261,16 +262,16 @@ static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) (!HAS_LLC(i915) && !IS_DG1(i915))) wbinvd_on_all_cpus(); - sg_page_sizes = i915_sg_dma_sizes(pages->sgl); - __i915_gem_object_set_pages(obj, pages, sg_page_sizes); + sg_page_sizes = i915_sg_dma_sizes(sgt->sgl); + __i915_gem_object_set_pages(obj, sgt, sg_page_sizes); return 0; } static void i915_gem_object_put_pages_dmabuf(struct drm_i915_gem_object *obj, - struct sg_table *pages) + struct sg_table *sgt) { - dma_buf_unmap_attachment(obj->base.import_attach, pages, + dma_buf_unmap_attachment(obj->base.import_attach, sgt, DMA_BIDIRECTIONAL); } @@ -313,7 +314,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, get_dma_buf(dma_buf); obj = i915_gem_object_alloc(); - if (obj == NULL) { + if (!obj) { ret = -ENOMEM; goto fail_detach; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 845023c14eb3..1160723c9d2d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -2954,11 +2954,6 @@ await_fence_array(struct i915_execbuffer *eb, int err; for (n = 0; n < eb->num_fences; n++) { - struct drm_syncobj *syncobj; - unsigned int flags; - - syncobj = ptr_unpack_bits(eb->fences[n].syncobj, &flags, 2); - if (!eb->fences[n].dma_fence) continue; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index c698f95af15f..629acb403a2c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -6,7 +6,6 @@ #include <linux/scatterlist.h> #include <linux/slab.h> -#include <linux/swiotlb.h> #include "i915_drv.h" #include "i915_gem.h" @@ -38,22 +37,12 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) struct scatterlist *sg; unsigned int sg_page_sizes; unsigned int npages; - int max_order; + int max_order = MAX_ORDER; + unsigned int max_segment; gfp_t gfp; - max_order = MAX_ORDER; -#ifdef CONFIG_SWIOTLB - if (is_swiotlb_active(obj->base.dev->dev)) { - unsigned int max_segment; - - max_segment = swiotlb_max_segment(); - if (max_segment) { - max_segment = max_t(unsigned int, max_segment, - PAGE_SIZE) >> PAGE_SHIFT; - max_order = min(max_order, ilog2(max_segment)); - } - } -#endif + max_segment = i915_sg_segment_size(i915->drm.dev) >> PAGE_SHIFT; + max_order = min(max_order, get_order(max_segment)); gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE; if (IS_I965GM(i915) || IS_I965G(i915)) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 73d9eda1d6b7..e63329bc8065 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -413,7 +413,7 @@ retry: vma->mmo = mmo; if (CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND) - intel_wakeref_auto(&to_gt(i915)->userfault_wakeref, + intel_wakeref_auto(&i915->runtime_pm.userfault_wakeref, msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)); if (write) { @@ -557,11 +557,13 @@ void i915_gem_object_runtime_pm_release_mmap_offset(struct drm_i915_gem_object * drm_vma_node_unmap(&bo->base.vma_node, bdev->dev_mapping); - if (obj->userfault_count) { - /* rpm wakeref provide exclusive access */ - list_del(&obj->userfault_link); - obj->userfault_count = 0; - } + /* + * We have exclusive access here via runtime suspend. All other callers + * must first grab the rpm wakeref. + */ + GEM_BUG_ON(!obj->userfault_count); + list_del(&obj->userfault_link); + obj->userfault_count = 0; } void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj) @@ -587,13 +589,6 @@ void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj) spin_lock(&obj->mmo.lock); } spin_unlock(&obj->mmo.lock); - - if (obj->userfault_count) { - mutex_lock(&to_gt(to_i915(obj->base.dev))->lmem_userfault_lock); - list_del(&obj->userfault_link); - mutex_unlock(&to_gt(to_i915(obj->base.dev))->lmem_userfault_lock); - obj->userfault_count = 0; - } } static struct i915_mmap_offset * diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 6b8710ba8ded..733696057761 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -458,6 +458,16 @@ i915_gem_object_read_from_page_iomap(struct drm_i915_gem_object *obj, u64 offset io_mapping_unmap(src_map); } +static bool object_has_mappable_iomem(struct drm_i915_gem_object *obj) +{ + GEM_BUG_ON(!i915_gem_object_has_iomem(obj)); + + if (IS_DGFX(to_i915(obj->base.dev))) + return i915_ttm_resource_mappable(i915_gem_to_ttm(obj)->resource); + + return true; +} + /** * i915_gem_object_read_from_page - read data from the page of a GEM object * @obj: GEM object to read from @@ -480,7 +490,7 @@ int i915_gem_object_read_from_page(struct drm_i915_gem_object *obj, u64 offset, if (i915_gem_object_has_struct_page(obj)) i915_gem_object_read_from_page_kmap(obj, offset, dst, size); - else if (i915_gem_object_has_iomem(obj)) + else if (i915_gem_object_has_iomem(obj) && object_has_mappable_iomem(obj)) i915_gem_object_read_from_page_iomap(obj, offset, dst, size); else return -ENODEV; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 1723af9b0f6a..6b9ecff42bb5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -482,6 +482,10 @@ void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object *obj, void *__must_check i915_gem_object_pin_map_unlocked(struct drm_i915_gem_object *obj, enum i915_map_type type); +enum i915_map_type i915_coherent_map_type(struct drm_i915_private *i915, + struct drm_i915_gem_object *obj, + bool always_coherent); + void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj, unsigned long offset, unsigned long size); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 4df50b049cea..16f845663ff2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -466,6 +466,18 @@ void *i915_gem_object_pin_map_unlocked(struct drm_i915_gem_object *obj, return ret; } +enum i915_map_type i915_coherent_map_type(struct drm_i915_private *i915, + struct drm_i915_gem_object *obj, + bool always_coherent) +{ + if (i915_gem_object_is_lmem(obj)) + return I915_MAP_WC; + if (HAS_LLC(i915) || always_coherent) + return I915_MAP_WB; + else + return I915_MAP_WC; +} + void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj, unsigned long offset, unsigned long size) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 3428f735e786..0d812f4d787d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -22,9 +22,12 @@ void i915_gem_suspend(struct drm_i915_private *i915) { + struct intel_gt *gt; + unsigned int i; + GEM_TRACE("%s\n", dev_name(i915->drm.dev)); - intel_wakeref_auto(&to_gt(i915)->userfault_wakeref, 0); + intel_wakeref_auto(&i915->runtime_pm.userfault_wakeref, 0); flush_workqueue(i915->wq); /* @@ -36,7 +39,8 @@ void i915_gem_suspend(struct drm_i915_private *i915) * state. Fortunately, the kernel_context is disposable and we do * not rely on its state. */ - intel_gt_suspend_prepare(to_gt(i915)); + for_each_gt(gt, i915, i) + intel_gt_suspend_prepare(gt); i915_gem_drain_freed_objects(i915); } @@ -131,7 +135,9 @@ void i915_gem_suspend_late(struct drm_i915_private *i915) &i915->mm.purge_list, NULL }, **phase; + struct intel_gt *gt; unsigned long flags; + unsigned int i; bool flush = false; /* @@ -154,7 +160,8 @@ void i915_gem_suspend_late(struct drm_i915_private *i915) * machine in an unusable condition. */ - intel_gt_suspend_late(to_gt(i915)); + for_each_gt(gt, i915, i) + intel_gt_suspend_late(gt); spin_lock_irqsave(&i915->mm.obj_lock, flags); for (phase = phases; *phase; phase++) { @@ -212,7 +219,8 @@ int i915_gem_freeze_late(struct drm_i915_private *i915) void i915_gem_resume(struct drm_i915_private *i915) { - int ret; + struct intel_gt *gt; + int ret, i, j; GEM_TRACE("%s\n", dev_name(i915->drm.dev)); @@ -224,8 +232,25 @@ void i915_gem_resume(struct drm_i915_private *i915) * guarantee that the context image is complete. So let's just reset * it and start again. */ - intel_gt_resume(to_gt(i915)); + for_each_gt(gt, i915, i) + if (intel_gt_resume(gt)) + goto err_wedged; ret = lmem_restore(i915, I915_TTM_BACKUP_ALLOW_GPU); GEM_WARN_ON(ret); + + return; + +err_wedged: + for_each_gt(gt, i915, j) { + if (!intel_gt_is_wedged(gt)) { + dev_err(i915->drm.dev, + "Failed to re-initialize GPU[%u], declaring it wedged!\n", + j); + intel_gt_set_wedged(gt); + } + + if (j == i) + break; + } } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index f42ca1179f37..2f7804492cd5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -194,7 +194,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj) struct intel_memory_region *mem = obj->mm.region; struct address_space *mapping = obj->base.filp->f_mapping; const unsigned long page_count = obj->base.size / PAGE_SIZE; - unsigned int max_segment = i915_sg_segment_size(); + unsigned int max_segment = i915_sg_segment_size(i915->drm.dev); struct sg_table *st; struct sgt_iter sgt_iter; struct page *page; @@ -369,14 +369,14 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, __start_cpu_write(obj); /* - * On non-LLC platforms, force the flush-on-acquire if this is ever + * On non-LLC igfx platforms, force the flush-on-acquire if this is ever * swapped-in. Our async flush path is not trust worthy enough yet(and * happens in the wrong order), and with some tricks it's conceivable * for userspace to change the cache-level to I915_CACHE_NONE after the * pages are swapped-in, and since execbuf binds the object before doing * the async flush, we have a race window. */ - if (!HAS_LLC(i915)) + if (!HAS_LLC(i915) && !IS_DGFX(i915)) obj->cache_dirty = true; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index acc561c0f0aa..0c70711818ed 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -77,22 +77,26 @@ void i915_gem_stolen_remove_node(struct drm_i915_private *i915, mutex_unlock(&i915->mm.stolen_lock); } -static int i915_adjust_stolen(struct drm_i915_private *i915, - struct resource *dsm) +static bool valid_stolen_size(struct drm_i915_private *i915, struct resource *dsm) +{ + return (dsm->start != 0 || HAS_LMEMBAR_SMEM_STOLEN(i915)) && dsm->end > dsm->start; +} + +static int adjust_stolen(struct drm_i915_private *i915, + struct resource *dsm) { struct i915_ggtt *ggtt = to_gt(i915)->ggtt; struct intel_uncore *uncore = ggtt->vm.gt->uncore; - struct resource *r; - if (dsm->start == 0 || dsm->end <= dsm->start) + if (!valid_stolen_size(i915, dsm)) return -EINVAL; /* + * Make sure we don't clobber the GTT if it's within stolen memory + * * TODO: We have yet too encounter the case where the GTT wasn't at the * end of stolen. With that assumption we could simplify this. */ - - /* Make sure we don't clobber the GTT if it's within stolen memory */ if (GRAPHICS_VER(i915) <= 4 && !IS_G33(i915) && !IS_PINEVIEW(i915) && !IS_G4X(i915)) { struct resource stolen[2] = {*dsm, *dsm}; @@ -131,12 +135,25 @@ static int i915_adjust_stolen(struct drm_i915_private *i915, } } + if (!valid_stolen_size(i915, dsm)) + return -EINVAL; + + return 0; +} + +static int request_smem_stolen(struct drm_i915_private *i915, + struct resource *dsm) +{ + struct resource *r; + /* - * With stolen lmem, we don't need to check if the address range - * overlaps with the non-stolen system memory range, since lmem is local - * to the gpu. + * With stolen lmem, we don't need to request system memory for the + * address range since it's local to the gpu. + * + * Starting MTL, in IGFX devices the stolen memory is exposed via + * LMEMBAR and shall be considered similar to stolen lmem. */ - if (HAS_LMEM(i915)) + if (HAS_LMEM(i915) || HAS_LMEMBAR_SMEM_STOLEN(i915)) return 0; /* @@ -371,8 +388,6 @@ static void icl_get_stolen_reserved(struct drm_i915_private *i915, drm_dbg(&i915->drm, "GEN6_STOLEN_RESERVED = 0x%016llx\n", reg_val); - *base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK; - switch (reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK) { case GEN8_STOLEN_RESERVED_1M: *size = 1024 * 1024; @@ -390,41 +405,30 @@ static void icl_get_stolen_reserved(struct drm_i915_private *i915, *size = 8 * 1024 * 1024; MISSING_CASE(reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK); } + + if (HAS_LMEMBAR_SMEM_STOLEN(i915)) + /* the base is initialized to stolen top so subtract size to get base */ + *base -= *size; + else + *base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK; } -static int i915_gem_init_stolen(struct intel_memory_region *mem) +/* + * Initialize i915->dsm_reserved to contain the reserved space within the Data + * Stolen Memory. This is a range on the top of DSM that is reserved, not to + * be used by driver, so must be excluded from the region passed to the + * allocator later. In the spec this is also called as WOPCM. + * + * Our expectation is that the reserved space is at the top of the stolen + * region, as it has been the case for every platform, and *never* at the + * bottom, so the calculation here can be simplified. + */ +static int init_reserved_stolen(struct drm_i915_private *i915) { - struct drm_i915_private *i915 = mem->i915; struct intel_uncore *uncore = &i915->uncore; resource_size_t reserved_base, stolen_top; - resource_size_t reserved_total, reserved_size; - - mutex_init(&i915->mm.stolen_lock); - - if (intel_vgpu_active(i915)) { - drm_notice(&i915->drm, - "%s, disabling use of stolen memory\n", - "iGVT-g active"); - return 0; - } - - if (i915_vtd_active(i915) && GRAPHICS_VER(i915) < 8) { - drm_notice(&i915->drm, - "%s, disabling use of stolen memory\n", - "DMAR active"); - return 0; - } - - if (resource_size(&mem->region) == 0) - return 0; - - i915->dsm = mem->region; - - if (i915_adjust_stolen(i915, &i915->dsm)) - return 0; - - GEM_BUG_ON(i915->dsm.start == 0); - GEM_BUG_ON(i915->dsm.end <= i915->dsm.start); + resource_size_t reserved_size; + int ret = 0; stolen_top = i915->dsm.end + 1; reserved_base = stolen_top; @@ -455,17 +459,16 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem) &reserved_base, &reserved_size); } - /* - * Our expectation is that the reserved space is at the top of the - * stolen region and *never* at the bottom. If we see !reserved_base, - * it likely means we failed to read the registers correctly. - */ + /* No reserved stolen */ + if (reserved_base == stolen_top) + goto bail_out; + if (!reserved_base) { drm_err(&i915->drm, "inconsistent reservation %pa + %pa; ignoring\n", &reserved_base, &reserved_size); - reserved_base = stolen_top; - reserved_size = 0; + ret = -EINVAL; + goto bail_out; } i915->dsm_reserved = @@ -475,19 +478,55 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem) drm_err(&i915->drm, "Stolen reserved area %pR outside stolen memory %pR\n", &i915->dsm_reserved, &i915->dsm); - return 0; + ret = -EINVAL; + goto bail_out; } + return 0; + +bail_out: + i915->dsm_reserved = + (struct resource)DEFINE_RES_MEM(reserved_base, 0); + + return ret; +} + +static int i915_gem_init_stolen(struct intel_memory_region *mem) +{ + struct drm_i915_private *i915 = mem->i915; + + mutex_init(&i915->mm.stolen_lock); + + if (intel_vgpu_active(i915)) { + drm_notice(&i915->drm, + "%s, disabling use of stolen memory\n", + "iGVT-g active"); + return -ENOSPC; + } + + if (i915_vtd_active(i915) && GRAPHICS_VER(i915) < 8) { + drm_notice(&i915->drm, + "%s, disabling use of stolen memory\n", + "DMAR active"); + return -ENOSPC; + } + + if (adjust_stolen(i915, &mem->region)) + return -ENOSPC; + + if (request_smem_stolen(i915, &mem->region)) + return -ENOSPC; + + i915->dsm = mem->region; + + if (init_reserved_stolen(i915)) + return -ENOSPC; + /* Exclude the reserved region from driver use */ - mem->region.end = reserved_base - 1; + mem->region.end = i915->dsm_reserved.start - 1; mem->io_size = min(mem->io_size, resource_size(&mem->region)); - /* It is possible for the reserved area to end before the end of stolen - * memory, so just consider the start. */ - reserved_total = stolen_top - reserved_base; - - i915->stolen_usable_size = - resource_size(&i915->dsm) - reserved_total; + i915->stolen_usable_size = resource_size(&mem->region); drm_dbg(&i915->drm, "Memory reserved for graphics device: %lluK, usable: %lluK\n", @@ -495,7 +534,7 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem) (u64)i915->stolen_usable_size >> 10); if (i915->stolen_usable_size == 0) - return 0; + return -ENOSPC; /* Basic memrange allocator for stolen space. */ drm_mm_init(&i915->mm.stolen, 0, i915->stolen_usable_size); @@ -733,11 +772,17 @@ i915_gem_object_create_stolen(struct drm_i915_private *i915, static int init_stolen_smem(struct intel_memory_region *mem) { + int err; + /* * Initialise stolen early so that we may reserve preallocated * objects for the BIOS to KMS transition. */ - return i915_gem_init_stolen(mem); + err = i915_gem_init_stolen(mem); + if (err) + drm_dbg(&mem->i915->drm, "Skip stolen region: failed to setup\n"); + + return 0; } static int release_stolen_smem(struct intel_memory_region *mem) @@ -754,26 +799,25 @@ static const struct intel_memory_region_ops i915_region_stolen_smem_ops = { static int init_stolen_lmem(struct intel_memory_region *mem) { + struct drm_i915_private *i915 = mem->i915; int err; if (GEM_WARN_ON(resource_size(&mem->region) == 0)) - return -ENODEV; + return 0; - /* - * TODO: For stolen lmem we mostly just care about populating the dsm - * related bits and setting up the drm_mm allocator for the range. - * Perhaps split up i915_gem_init_stolen() for this. - */ err = i915_gem_init_stolen(mem); - if (err) - return err; + if (err) { + drm_dbg(&mem->i915->drm, "Skip stolen region: failed to setup\n"); + return 0; + } - if (mem->io_size && !io_mapping_init_wc(&mem->iomap, - mem->io_start, - mem->io_size)) { - err = -EIO; + if (mem->io_size && + !io_mapping_init_wc(&mem->iomap, mem->io_start, mem->io_size)) goto err_cleanup; - } + + drm_dbg(&i915->drm, "Stolen Local memory IO start: %pa\n", + &mem->io_start); + drm_dbg(&i915->drm, "Stolen Local DSM base: %pa\n", &mem->region.start); return 0; @@ -796,6 +840,29 @@ static const struct intel_memory_region_ops i915_region_stolen_lmem_ops = { .init_object = _i915_gem_object_stolen_init, }; +static int mtl_get_gms_size(struct intel_uncore *uncore) +{ + u16 ggc, gms; + + ggc = intel_uncore_read16(uncore, GGC); + + /* check GGMS, should be fixed 0x3 (8MB) */ + if ((ggc & GGMS_MASK) != GGMS_MASK) + return -EIO; + + /* return valid GMS value, -EIO if invalid */ + gms = REG_FIELD_GET(GMS_MASK, ggc); + switch (gms) { + case 0x0 ... 0x04: + return gms * 32; + case 0xf0 ... 0xfe: + return (gms - 0xf0 + 1) * 4; + default: + MISSING_CASE(gms); + return -EIO; + } +} + struct intel_memory_region * i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type, u16 instance) @@ -806,6 +873,7 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type, struct intel_memory_region *mem; resource_size_t io_start, io_size; resource_size_t min_page_size; + int ret; if (WARN_ON_ONCE(instance)) return ERR_PTR(-ENODEV); @@ -813,12 +881,8 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type, if (!i915_pci_resource_valid(pdev, GEN12_LMEM_BAR)) return ERR_PTR(-ENXIO); - /* Use DSM base address instead for stolen memory */ - dsm_base = intel_uncore_read64(uncore, GEN12_DSMBASE); - if (IS_DG1(uncore->i915)) { + if (HAS_LMEMBAR_SMEM_STOLEN(i915) || IS_DG1(i915)) { lmem_size = pci_resource_len(pdev, GEN12_LMEM_BAR); - if (WARN_ON(lmem_size < dsm_base)) - return ERR_PTR(-ENODEV); } else { resource_size_t lmem_range; @@ -827,13 +891,39 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type, lmem_size *= SZ_1G; } - dsm_size = lmem_size - dsm_base; - if (pci_resource_len(pdev, GEN12_LMEM_BAR) < lmem_size) { + if (HAS_LMEMBAR_SMEM_STOLEN(i915)) { + /* + * MTL dsm size is in GGC register. + * Also MTL uses offset to DSMBASE in ptes, so i915 + * uses dsm_base = 0 to setup stolen region. + */ + ret = mtl_get_gms_size(uncore); + if (ret < 0) { + drm_err(&i915->drm, "invalid MTL GGC register setting\n"); + return ERR_PTR(ret); + } + + dsm_base = 0; + dsm_size = (resource_size_t)(ret * SZ_1M); + + GEM_BUG_ON(pci_resource_len(pdev, GEN12_LMEM_BAR) != SZ_256M); + GEM_BUG_ON((dsm_size + SZ_8M) > lmem_size); + } else { + /* Use DSM base address instead for stolen memory */ + dsm_base = intel_uncore_read64(uncore, GEN12_DSMBASE) & GEN12_BDSM_MASK; + if (WARN_ON(lmem_size < dsm_base)) + return ERR_PTR(-ENODEV); + dsm_size = lmem_size - dsm_base; + } + + io_size = dsm_size; + if (HAS_LMEMBAR_SMEM_STOLEN(i915)) { + io_start = pci_resource_start(pdev, GEN12_LMEM_BAR) + SZ_8M; + } else if (pci_resource_len(pdev, GEN12_LMEM_BAR) < lmem_size) { io_start = 0; io_size = 0; } else { io_start = pci_resource_start(pdev, GEN12_LMEM_BAR) + dsm_base; - io_size = dsm_size; } min_page_size = HAS_64K_PAGES(i915) ? I915_GTT_PAGE_SIZE_64K : @@ -847,16 +937,6 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type, if (IS_ERR(mem)) return mem; - /* - * TODO: consider creating common helper to just print all the - * interesting stuff from intel_memory_region, which we can use for all - * our probed regions. - */ - - drm_dbg(&i915->drm, "Stolen Local memory IO start: %pa\n", - &mem->io_start); - drm_dbg(&i915->drm, "Stolen Local DSM base: %pa\n", &dsm_base); - intel_memory_region_set_name(mem, "stolen-local"); mem->private = true; @@ -881,6 +961,7 @@ i915_gem_stolen_smem_setup(struct drm_i915_private *i915, u16 type, intel_memory_region_set_name(mem, "stolen-system"); mem->private = true; + return mem; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 7a1e92c11946..25129af70f70 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -189,7 +189,7 @@ static int i915_ttm_tt_shmem_populate(struct ttm_device *bdev, struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev); struct intel_memory_region *mr = i915->mm.regions[INTEL_MEMORY_SYSTEM]; struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm); - const unsigned int max_segment = i915_sg_segment_size(); + const unsigned int max_segment = i915_sg_segment_size(i915->drm.dev); const size_t size = (size_t)ttm->num_pages << PAGE_SHIFT; struct file *filp = i915_tt->filp; struct sgt_iter sgt_iter; @@ -279,7 +279,7 @@ static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo, struct i915_ttm_tt *i915_tt; int ret; - if (!obj) + if (i915_ttm_is_ghost_object(bo)) return NULL; i915_tt = kzalloc(sizeof(*i915_tt), GFP_KERNEL); @@ -362,7 +362,7 @@ static bool i915_ttm_eviction_valuable(struct ttm_buffer_object *bo, { struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); - if (!obj) + if (i915_ttm_is_ghost_object(bo)) return false; /* @@ -509,18 +509,9 @@ static int i915_ttm_shrink(struct drm_i915_gem_object *obj, unsigned int flags) static void i915_ttm_delete_mem_notify(struct ttm_buffer_object *bo) { struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); - intel_wakeref_t wakeref = 0; - - if (bo->resource && likely(obj)) { - /* ttm_bo_release() already has dma_resv_lock */ - if (i915_ttm_cpu_maps_iomem(bo->resource)) - wakeref = intel_runtime_pm_get(&to_i915(obj->base.dev)->runtime_pm); + if (bo->resource && !i915_ttm_is_ghost_object(bo)) { __i915_gem_object_pages_fini(obj); - - if (wakeref) - intel_runtime_pm_put(&to_i915(obj->base.dev)->runtime_pm, wakeref); - i915_ttm_free_cached_io_rsgt(obj); } } @@ -538,7 +529,7 @@ static struct i915_refct_sgt *i915_ttm_tt_get_st(struct ttm_tt *ttm) ret = sg_alloc_table_from_pages_segment(st, ttm->pages, ttm->num_pages, 0, (unsigned long)ttm->num_pages << PAGE_SHIFT, - i915_sg_segment_size(), GFP_KERNEL); + i915_sg_segment_size(i915_tt->dev), GFP_KERNEL); if (ret) { st->sgl = NULL; return ERR_PTR(ret); @@ -624,7 +615,7 @@ static void i915_ttm_swap_notify(struct ttm_buffer_object *bo) struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); int ret; - if (!obj) + if (i915_ttm_is_ghost_object(bo)) return; ret = i915_ttm_move_notify(bo); @@ -657,7 +648,7 @@ static int i915_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resource struct drm_i915_gem_object *obj = i915_ttm_to_gem(mem->bo); bool unknown_state; - if (!obj) + if (i915_ttm_is_ghost_object(mem->bo)) return -EINVAL; if (!kref_get_unless_zero(&obj->base.refcount)) @@ -690,7 +681,7 @@ static unsigned long i915_ttm_io_mem_pfn(struct ttm_buffer_object *bo, unsigned long base; unsigned int ofs; - GEM_BUG_ON(!obj); + GEM_BUG_ON(i915_ttm_is_ghost_object(bo)); GEM_WARN_ON(bo->ttm); base = obj->mm.region->iomap.base - obj->mm.region->region.start; @@ -699,6 +690,50 @@ static unsigned long i915_ttm_io_mem_pfn(struct ttm_buffer_object *bo, return ((base + sg_dma_address(sg)) >> PAGE_SHIFT) + ofs; } +static int i915_ttm_access_memory(struct ttm_buffer_object *bo, + unsigned long offset, void *buf, + int len, int write) +{ + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + resource_size_t iomap = obj->mm.region->iomap.base - + obj->mm.region->region.start; + unsigned long page = offset >> PAGE_SHIFT; + unsigned long bytes_left = len; + + /* + * TODO: For now just let it fail if the resource is non-mappable, + * otherwise we need to perform the memcpy from the gpu here, without + * interfering with the object (like moving the entire thing). + */ + if (!i915_ttm_resource_mappable(bo->resource)) + return -EIO; + + offset -= page << PAGE_SHIFT; + do { + unsigned long bytes = min(bytes_left, PAGE_SIZE - offset); + void __iomem *ptr; + dma_addr_t daddr; + + daddr = i915_gem_object_get_dma_address(obj, page); + ptr = ioremap_wc(iomap + daddr + offset, bytes); + if (!ptr) + return -EIO; + + if (write) + memcpy_toio(ptr, buf, bytes); + else + memcpy_fromio(buf, ptr, bytes); + iounmap(ptr); + + page++; + buf += bytes; + bytes_left -= bytes; + offset = 0; + } while (bytes_left); + + return len; +} + /* * All callbacks need to take care not to downcast a struct ttm_buffer_object * without checking its subclass, since it might be a TTM ghost object. @@ -715,6 +750,7 @@ static struct ttm_device_funcs i915_ttm_bo_driver = { .delete_mem_notify = i915_ttm_delete_mem_notify, .io_mem_reserve = i915_ttm_io_mem_reserve, .io_mem_pfn = i915_ttm_io_mem_pfn, + .access_memory = i915_ttm_access_memory, }; /** @@ -990,13 +1026,12 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf) struct vm_area_struct *area = vmf->vma; struct ttm_buffer_object *bo = area->vm_private_data; struct drm_device *dev = bo->base.dev; - struct drm_i915_gem_object *obj; + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); intel_wakeref_t wakeref = 0; vm_fault_t ret; int idx; - obj = i915_ttm_to_gem(bo); - if (!obj) + if (i915_ttm_is_ghost_object(bo)) return VM_FAULT_SIGBUS; /* Sanity check that we allow writing into this object */ @@ -1035,7 +1070,8 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf) } if (err) { - drm_dbg(dev, "Unable to make resource CPU accessible\n"); + drm_dbg(dev, "Unable to make resource CPU accessible(err = %pe)\n", + ERR_PTR(err)); dma_resv_unlock(bo->base.resv); ret = VM_FAULT_SIGBUS; goto out_rpm; @@ -1053,16 +1089,19 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf) if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) goto out_rpm; - /* ttm_bo_vm_reserve() already has dma_resv_lock */ + /* + * ttm_bo_vm_reserve() already has dma_resv_lock. + * userfault_count is protected by dma_resv lock and rpm wakeref. + */ if (ret == VM_FAULT_NOPAGE && wakeref && !obj->userfault_count) { obj->userfault_count = 1; - mutex_lock(&to_gt(to_i915(obj->base.dev))->lmem_userfault_lock); - list_add(&obj->userfault_link, &to_gt(to_i915(obj->base.dev))->lmem_userfault_list); - mutex_unlock(&to_gt(to_i915(obj->base.dev))->lmem_userfault_lock); + spin_lock(&to_i915(obj->base.dev)->runtime_pm.lmem_userfault_lock); + list_add(&obj->userfault_link, &to_i915(obj->base.dev)->runtime_pm.lmem_userfault_list); + spin_unlock(&to_i915(obj->base.dev)->runtime_pm.lmem_userfault_lock); } if (wakeref & CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND) - intel_wakeref_auto(&to_gt(to_i915(obj->base.dev))->userfault_wakeref, + intel_wakeref_auto(&to_i915(obj->base.dev)->runtime_pm.userfault_wakeref, msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)); i915_ttm_adjust_lru(obj); @@ -1094,7 +1133,7 @@ static void ttm_vm_open(struct vm_area_struct *vma) struct drm_i915_gem_object *obj = i915_ttm_to_gem(vma->vm_private_data); - GEM_BUG_ON(!obj); + GEM_BUG_ON(i915_ttm_is_ghost_object(vma->vm_private_data)); i915_gem_object_get(obj); } @@ -1103,7 +1142,7 @@ static void ttm_vm_close(struct vm_area_struct *vma) struct drm_i915_gem_object *obj = i915_ttm_to_gem(vma->vm_private_data); - GEM_BUG_ON(!obj); + GEM_BUG_ON(i915_ttm_is_ghost_object(vma->vm_private_data)); i915_gem_object_put(obj); } @@ -1124,7 +1163,27 @@ static u64 i915_ttm_mmap_offset(struct drm_i915_gem_object *obj) static void i915_ttm_unmap_virtual(struct drm_i915_gem_object *obj) { + struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + intel_wakeref_t wakeref = 0; + + assert_object_held_shared(obj); + + if (i915_ttm_cpu_maps_iomem(bo->resource)) { + wakeref = intel_runtime_pm_get(&to_i915(obj->base.dev)->runtime_pm); + + /* userfault_count is protected by obj lock and rpm wakeref. */ + if (obj->userfault_count) { + spin_lock(&to_i915(obj->base.dev)->runtime_pm.lmem_userfault_lock); + list_del(&obj->userfault_link); + spin_unlock(&to_i915(obj->base.dev)->runtime_pm.lmem_userfault_lock); + obj->userfault_count = 0; + } + } + ttm_bo_unmap_virtual(i915_gem_to_ttm(obj)); + + if (wakeref) + intel_runtime_pm_put(&to_i915(obj->base.dev)->runtime_pm, wakeref); } static const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops = { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h index e4842b4296fc..2a94a99ef76b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h @@ -28,18 +28,26 @@ i915_gem_to_ttm(struct drm_i915_gem_object *obj) void i915_ttm_bo_destroy(struct ttm_buffer_object *bo); /** + * i915_ttm_is_ghost_object - Check if the ttm bo is a ghost object. + * @bo: Pointer to the ttm buffer object + * + * Return: True if the ttm bo is not a i915 object but a ghost ttm object, + * False otherwise. + */ +static inline bool i915_ttm_is_ghost_object(struct ttm_buffer_object *bo) +{ + return bo->destroy != i915_ttm_bo_destroy; +} + +/** * i915_ttm_to_gem - Convert a struct ttm_buffer_object to an embedding * struct drm_i915_gem_object. * - * Return: Pointer to the embedding struct ttm_buffer_object, or NULL - * if the object was not an i915 ttm object. + * Return: Pointer to the embedding struct ttm_buffer_object. */ static inline struct drm_i915_gem_object * i915_ttm_to_gem(struct ttm_buffer_object *bo) { - if (bo->destroy != i915_ttm_bo_destroy) - return NULL; - return container_of(bo, struct drm_i915_gem_object, __do_not_access); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 9a7e50534b84..f59f812dc6d2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -560,7 +560,7 @@ int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, bool clear; int ret; - if (GEM_WARN_ON(!obj)) { + if (GEM_WARN_ON(i915_ttm_is_ghost_object(bo))) { ttm_bo_move_null(bo, dst_mem); return 0; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index d4398948f016..1b1a22716722 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -129,7 +129,7 @@ static void i915_gem_object_userptr_drop_ref(struct drm_i915_gem_object *obj) static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) { const unsigned long num_pages = obj->base.size >> PAGE_SHIFT; - unsigned int max_segment = i915_sg_segment_size(); + unsigned int max_segment = i915_sg_segment_size(obj->base.dev->dev); struct sg_table *st; unsigned int sg_page_sizes; struct page **pvec; @@ -292,7 +292,7 @@ int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj) if (!i915_gem_object_is_readonly(obj)) gup_flags |= FOLL_WRITE; - pinned = ret = 0; + pinned = 0; while (pinned < num_pages) { ret = pin_user_pages_fast(obj->userptr.ptr + pinned * PAGE_SIZE, num_pages - pinned, gup_flags, @@ -302,7 +302,6 @@ int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj) pinned += ret; } - ret = 0; ret = i915_gem_object_lock_interruptible(obj, NULL); if (ret) diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index c570cf780079..0cb99e75b0bc 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -1161,7 +1161,8 @@ static int igt_write_huge(struct drm_i915_private *i915, GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); size = obj->base.size; - if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) + if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K && + !HAS_64K_PAGES(i915)) size = round_up(size, I915_GTT_PAGE_SIZE_2M); n = 0; @@ -1214,6 +1215,10 @@ static int igt_write_huge(struct drm_i915_private *i915, * size and ensure the vma offset is at the start of the pt * boundary, however to improve coverage we opt for testing both * aligned and unaligned offsets. + * + * With PS64 this is no longer the case, but to ensure we + * sometimes get the compact layout for smaller objects, apply + * the round_up anyway. */ if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) offset_low = round_down(offset_low, @@ -1411,6 +1416,7 @@ static int igt_ppgtt_sanity_check(void *arg) { SZ_2M + SZ_4K, SZ_64K | SZ_4K }, { SZ_2M + SZ_4K, SZ_2M | SZ_4K }, { SZ_2M + SZ_64K, SZ_2M | SZ_64K }, + { SZ_2M + SZ_64K, SZ_64K }, }; int i, j; int err; @@ -1540,6 +1546,154 @@ out_put: return err; } +static int igt_ppgtt_mixed(void *arg) +{ + struct drm_i915_private *i915 = arg; + const unsigned long flags = PIN_OFFSET_FIXED | PIN_USER; + struct drm_i915_gem_object *obj, *on; + struct i915_gem_engines *engines; + struct i915_gem_engines_iter it; + struct i915_address_space *vm; + struct i915_gem_context *ctx; + struct intel_context *ce; + struct file *file; + I915_RND_STATE(prng); + LIST_HEAD(objects); + struct intel_memory_region *mr; + struct i915_vma *vma; + unsigned int count; + u32 i, addr; + int *order; + int n, err; + + /* + * Sanity check mixing 4K and 64K pages within the same page-table via + * the new PS64 TLB hint. + */ + + if (!HAS_64K_PAGES(i915)) { + pr_info("device lacks PS64, skipping\n"); + return 0; + } + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + ctx = hugepage_ctx(i915, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out; + } + vm = i915_gem_context_get_eb_vm(ctx); + + i = 0; + addr = 0; + do { + u32 sz; + + sz = i915_prandom_u32_max_state(SZ_4M, &prng); + sz = max_t(u32, sz, SZ_4K); + + mr = i915->mm.regions[INTEL_REGION_LMEM_0]; + if (i & 1) + mr = i915->mm.regions[INTEL_REGION_SMEM]; + + obj = i915_gem_object_create_region(mr, sz, 0, 0); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_vm; + } + + list_add_tail(&obj->st_link, &objects); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_put; + } + + addr = round_up(addr, mr->min_page_size); + err = i915_vma_pin(vma, 0, 0, addr | flags); + if (err) + goto err_put; + + if (mr->type == INTEL_MEMORY_LOCAL && + (vma->resource->page_sizes_gtt & I915_GTT_PAGE_SIZE_4K)) { + err = -EINVAL; + goto err_put; + } + + addr += obj->base.size; + i++; + } while (addr <= SZ_16M); + + n = 0; + count = 0; + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + count++; + if (!intel_engine_can_store_dword(ce->engine)) + continue; + + n++; + } + i915_gem_context_unlock_engines(ctx); + if (!n) + goto err_put; + + order = i915_random_order(count * count, &prng); + if (!order) { + err = -ENOMEM; + goto err_put; + } + + i = 0; + addr = 0; + engines = i915_gem_context_lock_engines(ctx); + list_for_each_entry(obj, &objects, st_link) { + u32 rnd = i915_prandom_u32_max_state(UINT_MAX, &prng); + + addr = round_up(addr, obj->mm.region->min_page_size); + + ce = engines->engines[order[i] % engines->num_engines]; + i = (i + 1) % (count * count); + if (!ce || !intel_engine_can_store_dword(ce->engine)) + continue; + + err = __igt_write_huge(ce, obj, obj->base.size, addr, 0, rnd); + if (err) + break; + + err = __igt_write_huge(ce, obj, obj->base.size, addr, + offset_in_page(rnd) / sizeof(u32), rnd + 1); + if (err) + break; + + err = __igt_write_huge(ce, obj, obj->base.size, addr, + (PAGE_SIZE / sizeof(u32)) - 1, + rnd + 2); + if (err) + break; + + addr += obj->base.size; + + cond_resched(); + } + + i915_gem_context_unlock_engines(ctx); + kfree(order); +err_put: + list_for_each_entry_safe(obj, on, &objects, st_link) { + list_del(&obj->st_link); + i915_gem_object_put(obj); + } +out_vm: + i915_vm_put(vm); +out: + fput(file); + return err; +} + static int igt_tmpfs_fallback(void *arg) { struct drm_i915_private *i915 = arg; @@ -1803,6 +1957,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_ppgtt_smoke_huge), SUBTEST(igt_ppgtt_sanity_check), SUBTEST(igt_ppgtt_compact), + SUBTEST(igt_ppgtt_mixed), }; if (!HAS_PPGTT(i915)) { diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index c6ad67b90e8a..d8864444432b 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -179,97 +179,108 @@ out_file: } struct parallel_switch { - struct task_struct *tsk; + struct kthread_worker *worker; + struct kthread_work work; struct intel_context *ce[2]; + int result; }; -static int __live_parallel_switch1(void *data) +static void __live_parallel_switch1(struct kthread_work *work) { - struct parallel_switch *arg = data; + struct parallel_switch *arg = + container_of(work, typeof(*arg), work); IGT_TIMEOUT(end_time); unsigned long count; count = 0; + arg->result = 0; do { struct i915_request *rq = NULL; - int err, n; + int n; - err = 0; - for (n = 0; !err && n < ARRAY_SIZE(arg->ce); n++) { + for (n = 0; !arg->result && n < ARRAY_SIZE(arg->ce); n++) { struct i915_request *prev = rq; rq = i915_request_create(arg->ce[n]); if (IS_ERR(rq)) { i915_request_put(prev); - return PTR_ERR(rq); + arg->result = PTR_ERR(rq); + break; } i915_request_get(rq); if (prev) { - err = i915_request_await_dma_fence(rq, &prev->fence); + arg->result = + i915_request_await_dma_fence(rq, + &prev->fence); i915_request_put(prev); } i915_request_add(rq); } + + if (IS_ERR_OR_NULL(rq)) + break; + if (i915_request_wait(rq, 0, HZ) < 0) - err = -ETIME; + arg->result = -ETIME; + i915_request_put(rq); - if (err) - return err; count++; - } while (!__igt_timeout(end_time, NULL)); + } while (!arg->result && !__igt_timeout(end_time, NULL)); - pr_info("%s: %lu switches (sync)\n", arg->ce[0]->engine->name, count); - return 0; + pr_info("%s: %lu switches (sync) <%d>\n", + arg->ce[0]->engine->name, count, arg->result); } -static int __live_parallel_switchN(void *data) +static void __live_parallel_switchN(struct kthread_work *work) { - struct parallel_switch *arg = data; + struct parallel_switch *arg = + container_of(work, typeof(*arg), work); struct i915_request *rq = NULL; IGT_TIMEOUT(end_time); unsigned long count; int n; count = 0; + arg->result = 0; do { - for (n = 0; n < ARRAY_SIZE(arg->ce); n++) { + for (n = 0; !arg->result && n < ARRAY_SIZE(arg->ce); n++) { struct i915_request *prev = rq; - int err = 0; rq = i915_request_create(arg->ce[n]); if (IS_ERR(rq)) { i915_request_put(prev); - return PTR_ERR(rq); + arg->result = PTR_ERR(rq); + break; } i915_request_get(rq); if (prev) { - err = i915_request_await_dma_fence(rq, &prev->fence); + arg->result = + i915_request_await_dma_fence(rq, + &prev->fence); i915_request_put(prev); } i915_request_add(rq); - if (err) { - i915_request_put(rq); - return err; - } } count++; - } while (!__igt_timeout(end_time, NULL)); - i915_request_put(rq); + } while (!arg->result && !__igt_timeout(end_time, NULL)); - pr_info("%s: %lu switches (many)\n", arg->ce[0]->engine->name, count); - return 0; + if (!IS_ERR_OR_NULL(rq)) + i915_request_put(rq); + + pr_info("%s: %lu switches (many) <%d>\n", + arg->ce[0]->engine->name, count, arg->result); } static int live_parallel_switch(void *arg) { struct drm_i915_private *i915 = arg; - static int (* const func[])(void *arg) = { + static void (* const func[])(struct kthread_work *) = { __live_parallel_switch1, __live_parallel_switchN, NULL, @@ -277,7 +288,7 @@ static int live_parallel_switch(void *arg) struct parallel_switch *data = NULL; struct i915_gem_engines *engines; struct i915_gem_engines_iter it; - int (* const *fn)(void *arg); + void (* const *fn)(struct kthread_work *); struct i915_gem_context *ctx; struct intel_context *ce; struct file *file; @@ -348,9 +359,22 @@ static int live_parallel_switch(void *arg) } } + for (n = 0; n < count; n++) { + struct kthread_worker *worker; + + if (!data[n].ce[0]) + continue; + + worker = kthread_create_worker(0, "igt/parallel:%s", + data[n].ce[0]->engine->name); + if (IS_ERR(worker)) + goto out; + + data[n].worker = worker; + } + for (fn = func; !err && *fn; fn++) { struct igt_live_test t; - int n; err = igt_live_test_begin(&t, i915, __func__, ""); if (err) @@ -360,30 +384,17 @@ static int live_parallel_switch(void *arg) if (!data[n].ce[0]) continue; - data[n].tsk = kthread_run(*fn, &data[n], - "igt/parallel:%s", - data[n].ce[0]->engine->name); - if (IS_ERR(data[n].tsk)) { - err = PTR_ERR(data[n].tsk); - break; - } - get_task_struct(data[n].tsk); + data[n].result = 0; + kthread_init_work(&data[n].work, *fn); + kthread_queue_work(data[n].worker, &data[n].work); } - yield(); /* start all threads before we kthread_stop() */ - for (n = 0; n < count; n++) { - int status; - - if (IS_ERR_OR_NULL(data[n].tsk)) - continue; - - status = kthread_stop(data[n].tsk); - if (status && !err) - err = status; - - put_task_struct(data[n].tsk); - data[n].tsk = NULL; + if (data[n].ce[0]) { + kthread_flush_work(&data[n].work); + if (data[n].result && !err) + err = data[n].result; + } } if (igt_live_test_end(&t)) @@ -399,6 +410,9 @@ out: intel_context_unpin(data[n].ce[m]); intel_context_put(data[n].ce[m]); } + + if (data[n].worker) + kthread_destroy_worker(data[n].worker); } kfree(data); out_file: diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c index f2f3cfad807b..e57f9390076c 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c @@ -6,8 +6,12 @@ #include "i915_drv.h" #include "i915_selftest.h" +#include "gem/i915_gem_context.h" +#include "mock_context.h" #include "mock_dmabuf.h" +#include "igt_gem_utils.h" +#include "selftests/mock_drm.h" #include "selftests/mock_gem_device.h" static int igt_dmabuf_export(void *arg) @@ -140,6 +144,75 @@ out_ret: return err; } +static int verify_access(struct drm_i915_private *i915, + struct drm_i915_gem_object *native_obj, + struct drm_i915_gem_object *import_obj) +{ + struct i915_gem_engines_iter it; + struct i915_gem_context *ctx; + struct intel_context *ce; + struct i915_vma *vma; + struct file *file; + u32 *vaddr; + int err = 0, i; + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + ctx = live_context(i915, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out_file; + } + + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + if (intel_engine_can_store_dword(ce->engine)) + break; + } + i915_gem_context_unlock_engines(ctx); + if (!ce) + goto out_file; + + vma = i915_vma_instance(import_obj, ce->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_file; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto out_file; + + err = igt_gpu_fill_dw(ce, vma, 0, + vma->size >> PAGE_SHIFT, 0xdeadbeaf); + i915_vma_unpin(vma); + if (err) + goto out_file; + + err = i915_gem_object_wait(import_obj, 0, MAX_SCHEDULE_TIMEOUT); + if (err) + goto out_file; + + vaddr = i915_gem_object_pin_map_unlocked(native_obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto out_file; + } + + for (i = 0; i < native_obj->base.size / sizeof(u32); i += PAGE_SIZE / sizeof(u32)) { + if (vaddr[i] != 0xdeadbeaf) { + pr_err("Data mismatch [%d]=%u\n", i, vaddr[i]); + err = -EINVAL; + goto out_file; + } + } + +out_file: + fput(file); + return err; +} + static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915, struct intel_memory_region **regions, unsigned int num_regions) @@ -154,7 +227,7 @@ static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915, force_different_devices = true; - obj = __i915_gem_object_create_user(i915, PAGE_SIZE, + obj = __i915_gem_object_create_user(i915, SZ_8M, regions, num_regions); if (IS_ERR(obj)) { pr_err("__i915_gem_object_create_user failed with err=%ld\n", @@ -206,6 +279,10 @@ static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915, i915_gem_object_unlock(import_obj); + err = verify_access(i915, obj, import_obj); + if (err) + goto out_import; + /* Now try a fake an importer */ import_attach = dma_buf_attach(dmabuf, obj->base.dev->dev); if (IS_ERR(import_attach)) { diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index b73c91aa5450..1cae24349a96 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -8,6 +8,7 @@ #include <linux/prime_numbers.h> #include "gem/i915_gem_internal.h" +#include "gem/i915_gem_lmem.h" #include "gem/i915_gem_region.h" #include "gem/i915_gem_ttm.h" #include "gem/i915_gem_ttm_move.h" diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index e49fa6fa6aee..e1c76e5bfa82 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -396,15 +396,17 @@ int gen8_emit_init_breadcrumb(struct i915_request *rq) return 0; } -static int __gen125_emit_bb_start(struct i915_request *rq, - u64 offset, u32 len, - const unsigned int flags, - u32 arb) +static int __xehp_emit_bb_start(struct i915_request *rq, + u64 offset, u32 len, + const unsigned int flags, + u32 arb) { struct intel_context *ce = rq->context; u32 wa_offset = lrc_indirect_bb(ce); u32 *cs; + GEM_BUG_ON(!ce->wa_bb_page); + cs = intel_ring_begin(rq, 12); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -435,18 +437,18 @@ static int __gen125_emit_bb_start(struct i915_request *rq, return 0; } -int gen125_emit_bb_start_noarb(struct i915_request *rq, - u64 offset, u32 len, - const unsigned int flags) +int xehp_emit_bb_start_noarb(struct i915_request *rq, + u64 offset, u32 len, + const unsigned int flags) { - return __gen125_emit_bb_start(rq, offset, len, flags, MI_ARB_DISABLE); + return __xehp_emit_bb_start(rq, offset, len, flags, MI_ARB_DISABLE); } -int gen125_emit_bb_start(struct i915_request *rq, - u64 offset, u32 len, - const unsigned int flags) +int xehp_emit_bb_start(struct i915_request *rq, + u64 offset, u32 len, + const unsigned int flags) { - return __gen125_emit_bb_start(rq, offset, len, flags, MI_ARB_ENABLE); + return __xehp_emit_bb_start(rq, offset, len, flags, MI_ARB_ENABLE); } int gen8_emit_bb_start_noarb(struct i915_request *rq, @@ -583,6 +585,8 @@ u32 *gen8_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs) u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) { cs = gen8_emit_pipe_control(cs, + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_TLB_INVALIDATE | PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_DC_FLUSH_ENABLE, @@ -600,15 +604,21 @@ u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) { + cs = gen8_emit_pipe_control(cs, + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_TLB_INVALIDATE | + PIPE_CONTROL_TILE_CACHE_FLUSH | + PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_DC_FLUSH_ENABLE, + 0); + + /*XXX: Look at gen8_emit_fini_breadcrumb_rcs */ cs = gen8_emit_ggtt_write_rcs(cs, rq->fence.seqno, hwsp_offset(rq), - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_TILE_CACHE_FLUSH | - PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - PIPE_CONTROL_DC_FLUSH_ENABLE | - PIPE_CONTROL_FLUSH_ENABLE); + PIPE_CONTROL_FLUSH_ENABLE | + PIPE_CONTROL_CS_STALL); return gen8_emit_fini_breadcrumb_tail(rq, cs); } @@ -715,6 +725,7 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) { struct drm_i915_private *i915 = rq->engine->i915; u32 flags = (PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_TLB_INVALIDATE | PIPE_CONTROL_TILE_CACHE_FLUSH | PIPE_CONTROL_FLUSH_L3 | PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | @@ -731,11 +742,15 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) else if (rq->engine->class == COMPUTE_CLASS) flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS; + cs = gen12_emit_pipe_control(cs, PIPE_CONTROL0_HDC_PIPELINE_FLUSH, flags, 0); + + /*XXX: Look at gen8_emit_fini_breadcrumb_rcs */ cs = gen12_emit_ggtt_write_rcs(cs, rq->fence.seqno, hwsp_offset(rq), - PIPE_CONTROL0_HDC_PIPELINE_FLUSH, - flags); + 0, + PIPE_CONTROL_FLUSH_ENABLE | + PIPE_CONTROL_CS_STALL); return gen12_emit_fini_breadcrumb_tail(rq, cs); } diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h index e4d24c811dd6..655e5c00ddc2 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h @@ -32,12 +32,12 @@ int gen8_emit_bb_start(struct i915_request *rq, u64 offset, u32 len, const unsigned int flags); -int gen125_emit_bb_start_noarb(struct i915_request *rq, - u64 offset, u32 len, - const unsigned int flags); -int gen125_emit_bb_start(struct i915_request *rq, - u64 offset, u32 len, - const unsigned int flags); +int xehp_emit_bb_start_noarb(struct i915_request *rq, + u64 offset, u32 len, + const unsigned int flags); +int xehp_emit_bb_start(struct i915_request *rq, + u64 offset, u32 len, + const unsigned int flags); u32 *gen8_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs); u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs); diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 2128b7a72a25..4daaa6f55668 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -476,6 +476,7 @@ xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm, const gen8_pte_t pte_encode = vm->pte_encode(0, cache_level, flags); unsigned int rem = sg_dma_len(iter->sg); u64 start = vma_res->start; + u64 end = start + vma_res->vma_size; GEM_BUG_ON(!i915_vm_is_4lvl(vm)); @@ -489,9 +490,10 @@ xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm, gen8_pte_t encode = pte_encode; unsigned int page_size; gen8_pte_t *vaddr; - u16 index, max; + u16 index, max, nent, i; max = I915_PDES; + nent = 1; if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_2M && IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) && @@ -503,25 +505,37 @@ xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm, vaddr = px_vaddr(pd); } else { - if (encode & GEN12_PPGTT_PTE_LM) { - GEM_BUG_ON(__gen8_pte_index(start, 0) % 16); - GEM_BUG_ON(rem < I915_GTT_PAGE_SIZE_64K); - GEM_BUG_ON(!IS_ALIGNED(iter->dma, - I915_GTT_PAGE_SIZE_64K)); - - index = __gen8_pte_index(start, 0) / 16; - page_size = I915_GTT_PAGE_SIZE_64K; - - max /= 16; - - vaddr = px_vaddr(pd); - vaddr[__gen8_pte_index(start, 1)] |= GEN12_PDE_64K; + index = __gen8_pte_index(start, 0); + page_size = I915_GTT_PAGE_SIZE; - pt->is_compact = true; - } else { - GEM_BUG_ON(pt->is_compact); - index = __gen8_pte_index(start, 0); - page_size = I915_GTT_PAGE_SIZE; + if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) { + /* + * Device local-memory on these platforms should + * always use 64K pages or larger (including GTT + * alignment), therefore if we know the whole + * page-table needs to be filled we can always + * safely use the compact-layout. Otherwise fall + * back to the TLB hint with PS64. If this is + * system memory we only bother with PS64. + */ + if ((encode & GEN12_PPGTT_PTE_LM) && + end - start >= SZ_2M && !index) { + index = __gen8_pte_index(start, 0) / 16; + page_size = I915_GTT_PAGE_SIZE_64K; + + max /= 16; + + vaddr = px_vaddr(pd); + vaddr[__gen8_pte_index(start, 1)] |= GEN12_PDE_64K; + + pt->is_compact = true; + } else if (IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && + rem >= I915_GTT_PAGE_SIZE_64K && + !(index % 16)) { + encode |= GEN12_PTE_PS64; + page_size = I915_GTT_PAGE_SIZE_64K; + nent = 16; + } } vaddr = px_vaddr(pt); @@ -529,7 +543,12 @@ xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm, do { GEM_BUG_ON(rem < page_size); - vaddr[index++] = encode | iter->dma; + + for (i = 0; i < nent; i++) { + vaddr[index++] = + encode | (iter->dma + i * + I915_GTT_PAGE_SIZE); + } start += page_size; iter->dma += page_size; @@ -745,6 +764,8 @@ static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm, GEM_BUG_ON(!IS_ALIGNED(addr, SZ_64K)); GEM_BUG_ON(!IS_ALIGNED(offset, SZ_64K)); + /* XXX: we don't strictly need to use this layout */ + if (!pt->is_compact) { vaddr = px_vaddr(pd); vaddr[gen8_pd_index(idx, 1)] |= GEN12_PDE_64K; @@ -929,29 +950,18 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt, */ ppgtt->vm.has_read_only = !IS_GRAPHICS_VER(gt->i915, 11, 12); - if (HAS_LMEM(gt->i915)) { + if (HAS_LMEM(gt->i915)) ppgtt->vm.alloc_pt_dma = alloc_pt_lmem; - - /* - * On some platforms the hw has dropped support for 4K GTT pages - * when dealing with LMEM, and due to the design of 64K GTT - * pages in the hw, we can only mark the *entire* page-table as - * operating in 64K GTT mode, since the enable bit is still on - * the pde, and not the pte. And since we still need to allow - * 4K GTT pages for SMEM objects, we can't have a "normal" 4K - * page-table with scratch pointing to LMEM, since that's - * undefined from the hw pov. The simplest solution is to just - * move the 64K scratch page to SMEM on such platforms and call - * it a day, since that should work for all configurations. - */ - if (HAS_64K_PAGES(gt->i915)) - ppgtt->vm.alloc_scratch_dma = alloc_pt_dma; - else - ppgtt->vm.alloc_scratch_dma = alloc_pt_lmem; - } else { + else ppgtt->vm.alloc_pt_dma = alloc_pt_dma; - ppgtt->vm.alloc_scratch_dma = alloc_pt_dma; - } + + /* + * Using SMEM here instead of LMEM has the advantage of not reserving + * high performance memory for a "never" used filler page. It also + * removes the device access that would be required to initialise the + * scratch page, reducing pressure on an even scarcer resource. + */ + ppgtt->vm.alloc_scratch_dma = alloc_pt_dma; ppgtt->vm.pte_encode = gen8_pte_encode; diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index be09fb2e883a..fb62b7b8cbcd 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -276,6 +276,14 @@ static inline bool intel_context_is_barrier(const struct intel_context *ce) return test_bit(CONTEXT_BARRIER_BIT, &ce->flags); } +static inline void intel_context_close(struct intel_context *ce) +{ + set_bit(CONTEXT_CLOSED_BIT, &ce->flags); + + if (ce->ops->close) + ce->ops->close(ce); +} + static inline bool intel_context_is_closed(const struct intel_context *ce) { return test_bit(CONTEXT_CLOSED_BIT, &ce->flags); diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 04eacae1aca5..e36670f2e626 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -43,6 +43,8 @@ struct intel_context_ops { void (*revoke)(struct intel_context *ce, struct i915_request *rq, unsigned int preempt_timeout_ms); + void (*close)(struct intel_context *ce); + int (*pre_pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void **vaddr); int (*pin)(struct intel_context *ce, void *vaddr); void (*unpin)(struct intel_context *ce); @@ -197,8 +199,6 @@ struct intel_context { * context's submissions is complete. */ struct i915_sw_fence blocked; - /** @number_committed_requests: number of committed requests */ - int number_committed_requests; /** @requests: list of active requests on this context */ struct list_head requests; /** @prio: the context's current guc priority */ @@ -208,6 +208,11 @@ struct intel_context { * each priority bucket */ u32 prio_count[GUC_CLIENT_PRIORITY_NUM]; + /** + * @sched_disable_delay_work: worker to disable scheduling on this + * context + */ + struct delayed_work sched_disable_delay_work; } guc_state; struct { diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 04e435bce79b..cbc8b857d5f7 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -348,4 +348,10 @@ intel_engine_get_hung_context(struct intel_engine_cs *engine) return engine->hung_ce; } +u64 intel_clamp_heartbeat_interval_ms(struct intel_engine_cs *engine, u64 value); +u64 intel_clamp_max_busywait_duration_ns(struct intel_engine_cs *engine, u64 value); +u64 intel_clamp_preempt_timeout_ms(struct intel_engine_cs *engine, u64 value); +u64 intel_clamp_stop_timeout_ms(struct intel_engine_cs *engine, u64 value); +u64 intel_clamp_timeslice_duration_ms(struct intel_engine_cs *engine, u64 value); + #endif /* _INTEL_RINGBUFFER_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 1f7188129cd1..3b7d750ad054 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -486,6 +486,17 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id, engine->logical_mask = BIT(logical_instance); __sprint_engine_name(engine); + if ((engine->class == COMPUTE_CLASS && !RCS_MASK(engine->gt) && + __ffs(CCS_MASK(engine->gt)) == engine->instance) || + engine->class == RENDER_CLASS) + engine->flags |= I915_ENGINE_FIRST_RENDER_COMPUTE; + + /* features common between engines sharing EUs */ + if (engine->class == RENDER_CLASS || engine->class == COMPUTE_CLASS) { + engine->flags |= I915_ENGINE_HAS_RCS_REG_STATE; + engine->flags |= I915_ENGINE_HAS_EU_PRIORITY; + } + engine->props.heartbeat_interval_ms = CONFIG_DRM_I915_HEARTBEAT_INTERVAL; engine->props.max_busywait_duration_ns = @@ -497,20 +508,34 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id, engine->props.timeslice_duration_ms = CONFIG_DRM_I915_TIMESLICE_DURATION; - /* Override to uninterruptible for OpenCL workloads. */ - if (GRAPHICS_VER(i915) == 12 && engine->class == RENDER_CLASS) - engine->props.preempt_timeout_ms = 0; - - if ((engine->class == COMPUTE_CLASS && !RCS_MASK(engine->gt) && - __ffs(CCS_MASK(engine->gt)) == engine->instance) || - engine->class == RENDER_CLASS) - engine->flags |= I915_ENGINE_FIRST_RENDER_COMPUTE; - - /* features common between engines sharing EUs */ - if (engine->class == RENDER_CLASS || engine->class == COMPUTE_CLASS) { - engine->flags |= I915_ENGINE_HAS_RCS_REG_STATE; - engine->flags |= I915_ENGINE_HAS_EU_PRIORITY; - } + /* + * Mid-thread pre-emption is not available in Gen12. Unfortunately, + * some compute workloads run quite long threads. That means they get + * reset due to not pre-empting in a timely manner. So, bump the + * pre-emption timeout value to be much higher for compute engines. + */ + if (GRAPHICS_VER(i915) == 12 && (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE)) + engine->props.preempt_timeout_ms = CONFIG_DRM_I915_PREEMPT_TIMEOUT_COMPUTE; + + /* Cap properties according to any system limits */ +#define CLAMP_PROP(field) \ + do { \ + u64 clamp = intel_clamp_##field(engine, engine->props.field); \ + if (clamp != engine->props.field) { \ + drm_notice(&engine->i915->drm, \ + "Warning, clamping %s to %lld to prevent overflow\n", \ + #field, clamp); \ + engine->props.field = clamp; \ + } \ + } while (0) + + CLAMP_PROP(heartbeat_interval_ms); + CLAMP_PROP(max_busywait_duration_ns); + CLAMP_PROP(preempt_timeout_ms); + CLAMP_PROP(stop_timeout_ms); + CLAMP_PROP(timeslice_duration_ms); + +#undef CLAMP_PROP engine->defaults = engine->props; /* never to change again */ @@ -534,6 +559,55 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id, return 0; } +u64 intel_clamp_heartbeat_interval_ms(struct intel_engine_cs *engine, u64 value) +{ + value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT)); + + return value; +} + +u64 intel_clamp_max_busywait_duration_ns(struct intel_engine_cs *engine, u64 value) +{ + value = min(value, jiffies_to_nsecs(2)); + + return value; +} + +u64 intel_clamp_preempt_timeout_ms(struct intel_engine_cs *engine, u64 value) +{ + /* + * NB: The GuC API only supports 32bit values. However, the limit is further + * reduced due to internal calculations which would otherwise overflow. + */ + if (intel_guc_submission_is_wanted(&engine->gt->uc.guc)) + value = min_t(u64, value, guc_policy_max_preempt_timeout_ms()); + + value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT)); + + return value; +} + +u64 intel_clamp_stop_timeout_ms(struct intel_engine_cs *engine, u64 value) +{ + value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT)); + + return value; +} + +u64 intel_clamp_timeslice_duration_ms(struct intel_engine_cs *engine, u64 value) +{ + /* + * NB: The GuC API only supports 32bit values. However, the limit is further + * reduced due to internal calculations which would otherwise overflow. + */ + if (intel_guc_submission_is_wanted(&engine->gt->uc.guc)) + value = min_t(u64, value, guc_policy_max_exec_quantum_ms()); + + value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT)); + + return value; +} + static void __setup_engine_capabilities(struct intel_engine_cs *engine) { struct drm_i915_private *i915 = engine->i915; @@ -1274,8 +1348,13 @@ int intel_engines_init(struct intel_gt *gt) return err; err = setup(engine); - if (err) + if (err) { + intel_engine_cleanup_common(engine); return err; + } + + /* The backend should now be responsible for cleanup */ + GEM_BUG_ON(engine->release == NULL); err = engine_init_common(engine); if (err) @@ -1554,11 +1633,11 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine, for_each_ss_steering(iter, engine->gt, slice, subslice) { instdone->sampler[slice][subslice] = intel_gt_mcr_read(engine->gt, - GEN7_SAMPLER_INSTDONE, + GEN8_SAMPLER_INSTDONE, slice, subslice); instdone->row[slice][subslice] = intel_gt_mcr_read(engine->gt, - GEN7_ROW_INSTDONE, + GEN8_ROW_INSTDONE, slice, subslice); } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c index a3698f611f45..9a527e1f5be6 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -22,9 +22,37 @@ static bool next_heartbeat(struct intel_engine_cs *engine) { + struct i915_request *rq; long delay; delay = READ_ONCE(engine->props.heartbeat_interval_ms); + + rq = engine->heartbeat.systole; + + /* + * FIXME: The final period extension is disabled if the period has been + * modified from the default. This is to prevent issues with certain + * selftests which override the value and expect specific behaviour. + * Once the selftests have been updated to either cope with variable + * heartbeat periods (or to override the pre-emption timeout as well, + * or just to add a selftest specific override of the extension), the + * generic override can be removed. + */ + if (rq && rq->sched.attr.priority >= I915_PRIORITY_BARRIER && + delay == engine->defaults.heartbeat_interval_ms) { + long longer; + + /* + * The final try is at the highest priority possible. Up until now + * a pre-emption might not even have been attempted. So make sure + * this last attempt allows enough time for a pre-emption to occur. + */ + longer = READ_ONCE(engine->props.preempt_timeout_ms) * 2; + longer = intel_clamp_heartbeat_interval_ms(engine, longer); + if (longer > delay) + delay = longer; + } + if (!delay) return false; @@ -288,6 +316,17 @@ int intel_engine_set_heartbeat(struct intel_engine_cs *engine, if (!delay && !intel_engine_has_preempt_reset(engine)) return -ENODEV; + /* FIXME: Remove together with equally marked hack in next_heartbeat. */ + if (delay != engine->defaults.heartbeat_interval_ms && + delay < 2 * engine->props.preempt_timeout_ms) { + if (intel_engine_uses_guc(engine)) + drm_notice(&engine->i915->drm, "%s heartbeat interval adjusted to a non-default value which may downgrade individual engine resets to full GPU resets!\n", + engine->name); + else + drm_notice(&engine->i915->drm, "%s heartbeat interval adjusted to a non-default value which may cause engine resets to target innocent contexts!\n", + engine->name); + } + intel_engine_pm_get(engine); err = mutex_lock_interruptible(&ce->timeline->mutex); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h b/drivers/gpu/drm/i915/gt/intel_engine_regs.h index fe1a0d5fd4b1..ee3efd06ee54 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h @@ -201,6 +201,7 @@ #define RING_CONTEXT_STATUS_PTR(base) _MMIO((base) + 0x3a0) #define RING_CTX_TIMESTAMP(base) _MMIO((base) + 0x3a8) /* gen8+ */ #define RING_PREDICATE_RESULT(base) _MMIO((base) + 0x3b8) +#define MI_PREDICATE_RESULT_2_ENGINE(base) _MMIO((base) + 0x3bc) #define RING_FORCE_TO_NONPRIV(base, i) _MMIO(((base) + 0x4D0) + (i) * 4) #define RING_FORCE_TO_NONPRIV_DENY REG_BIT(30) #define RING_FORCE_TO_NONPRIV_ADDRESS_MASK REG_GENMASK(25, 2) diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index c718e6dc40b5..0187bc72310d 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -3471,9 +3471,9 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) { if (intel_engine_has_preemption(engine)) - engine->emit_bb_start = gen125_emit_bb_start; + engine->emit_bb_start = xehp_emit_bb_start; else - engine->emit_bb_start = gen125_emit_bb_start_noarb; + engine->emit_bb_start = xehp_emit_bb_start_noarb; } else { if (intel_engine_has_preemption(engine)) engine->emit_bb_start = gen8_emit_bb_start; diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 2049a00417af..2518cebbf931 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -871,8 +871,8 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) u32 pte_flags; int ret; - GEM_WARN_ON(pci_resource_len(pdev, GTTMMADR_BAR) != gen6_gttmmadr_size(i915)); - phys_addr = pci_resource_start(pdev, GTTMMADR_BAR) + gen6_gttadr_offset(i915); + GEM_WARN_ON(pci_resource_len(pdev, GEN4_GTTMMADR_BAR) != gen6_gttmmadr_size(i915)); + phys_addr = pci_resource_start(pdev, GEN4_GTTMMADR_BAR) + gen6_gttadr_offset(i915); /* * On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range @@ -931,11 +931,11 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) unsigned int size; u16 snb_gmch_ctl; - if (!HAS_LMEM(i915)) { - if (!i915_pci_resource_valid(pdev, GTT_APERTURE_BAR)) + if (!HAS_LMEM(i915) && !HAS_LMEMBAR_SMEM_STOLEN(i915)) { + if (!i915_pci_resource_valid(pdev, GEN4_GMADR_BAR)) return -ENXIO; - ggtt->gmadr = pci_resource(pdev, GTT_APERTURE_BAR); + ggtt->gmadr = pci_resource(pdev, GEN4_GMADR_BAR); ggtt->mappable_end = resource_size(&ggtt->gmadr); } @@ -986,7 +986,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) ggtt->vm.pte_encode = gen8_ggtt_pte_encode; - setup_private_pat(ggtt->vm.gt->uncore); + setup_private_pat(ggtt->vm.gt); return ggtt_probe_common(ggtt, size); } @@ -1089,10 +1089,10 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt) unsigned int size; u16 snb_gmch_ctl; - if (!i915_pci_resource_valid(pdev, GTT_APERTURE_BAR)) + if (!i915_pci_resource_valid(pdev, GEN4_GMADR_BAR)) return -ENXIO; - ggtt->gmadr = pci_resource(pdev, GTT_APERTURE_BAR); + ggtt->gmadr = pci_resource(pdev, GEN4_GMADR_BAR); ggtt->mappable_end = resource_size(&ggtt->gmadr); /* @@ -1308,7 +1308,7 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt) wbinvd_on_all_cpus(); if (GRAPHICS_VER(ggtt->vm.i915) >= 8) - setup_private_pat(ggtt->vm.gt->uncore); + setup_private_pat(ggtt->vm.gt); intel_ggtt_restore_fences(ggtt); } diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index d4e9702d3c8e..f50ea92910d9 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -187,6 +187,10 @@ #define MI_BATCH_RESOURCE_STREAMER REG_BIT(10) #define MI_BATCH_PREDICATE REG_BIT(15) /* HSW+ on RCS only*/ +#define MI_OPCODE(x) (((x) >> 23) & 0x3f) +#define IS_MI_LRI_CMD(x) (MI_OPCODE(x) == MI_OPCODE(MI_INSTR(0x22, 0))) +#define MI_LRI_LEN(x) (((x) & 0xff) + 1) + /* * 3D instructions used by the kernel */ diff --git a/drivers/gpu/drm/i915/gt/intel_gsc.c b/drivers/gpu/drm/i915/gt/intel_gsc.c index 7af6db3194dd..976fdf27e790 100644 --- a/drivers/gpu/drm/i915/gt/intel_gsc.c +++ b/drivers/gpu/drm/i915/gt/intel_gsc.c @@ -7,6 +7,7 @@ #include <linux/mei_aux.h> #include "i915_drv.h" #include "i915_reg.h" +#include "gem/i915_gem_lmem.h" #include "gem/i915_gem_region.h" #include "gt/intel_gsc.h" #include "gt/intel_gt.h" @@ -142,8 +143,14 @@ static void gsc_destroy_one(struct drm_i915_private *i915, struct intel_gsc_intf *intf = &gsc->intf[intf_id]; if (intf->adev) { - auxiliary_device_delete(&intf->adev->aux_dev); - auxiliary_device_uninit(&intf->adev->aux_dev); + struct auxiliary_device *aux_dev = &intf->adev->aux_dev; + + if (intf_id == 0) + intel_huc_unregister_gsc_notifier(&gsc_to_gt(gsc)->uc.huc, + aux_dev->dev.bus); + + auxiliary_device_delete(aux_dev); + auxiliary_device_uninit(aux_dev); intf->adev = NULL; } @@ -242,14 +249,24 @@ add_device: goto fail; } + intf->adev = adev; /* needed by the notifier */ + + if (intf_id == 0) + intel_huc_register_gsc_notifier(&gsc_to_gt(gsc)->uc.huc, + aux_dev->dev.bus); + ret = auxiliary_device_add(aux_dev); if (ret < 0) { drm_err(&i915->drm, "gsc aux add failed %d\n", ret); + if (intf_id == 0) + intel_huc_unregister_gsc_notifier(&gsc_to_gt(gsc)->uc.huc, + aux_dev->dev.bus); + intf->adev = NULL; + /* adev will be freed with the put_device() and .release sequence */ auxiliary_device_uninit(aux_dev); goto fail; } - intf->adev = adev; return; fail: diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index d0b03a928b9a..8e914c4066ed 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -40,8 +40,6 @@ void intel_gt_common_init_early(struct intel_gt *gt) { spin_lock_init(gt->irq_lock); - INIT_LIST_HEAD(>->lmem_userfault_list); - mutex_init(>->lmem_userfault_lock); INIT_LIST_HEAD(>->closed_vma); spin_lock_init(>->closed_lock); @@ -231,6 +229,16 @@ static void gen6_clear_engine_error_register(struct intel_engine_cs *engine) GEN6_RING_FAULT_REG_POSTING_READ(engine); } +i915_reg_t intel_gt_perf_limit_reasons_reg(struct intel_gt *gt) +{ + /* GT0_PERF_LIMIT_REASONS is available only for Gen11+ */ + if (GRAPHICS_VER(gt->i915) < 11) + return INVALID_MMIO_REG; + + return gt->type == GT_MEDIA ? + MTL_MEDIA_PERF_LIMIT_REASONS : GT0_PERF_LIMIT_REASONS; +} + void intel_gt_clear_error_registers(struct intel_gt *gt, intel_engine_mask_t engine_mask) @@ -260,7 +268,11 @@ intel_gt_clear_error_registers(struct intel_gt *gt, I915_MASTER_ERROR_INTERRUPT); } - if (GRAPHICS_VER(i915) >= 12) { + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { + intel_gt_mcr_multicast_rmw(gt, XEHP_RING_FAULT_REG, + RING_FAULT_VALID, 0); + intel_gt_mcr_read_any(gt, XEHP_RING_FAULT_REG); + } else if (GRAPHICS_VER(i915) >= 12) { rmw_clear(uncore, GEN12_RING_FAULT_REG, RING_FAULT_VALID); intel_uncore_posting_read(uncore, GEN12_RING_FAULT_REG); } else if (GRAPHICS_VER(i915) >= 8) { @@ -298,6 +310,42 @@ static void gen6_check_faults(struct intel_gt *gt) } } +static void xehp_check_faults(struct intel_gt *gt) +{ + u32 fault; + + /* + * Although the fault register now lives in an MCR register range, + * the GAM registers are special and we only truly need to read + * the "primary" GAM instance rather than handling each instance + * individually. intel_gt_mcr_read_any() will automatically steer + * toward the primary instance. + */ + fault = intel_gt_mcr_read_any(gt, XEHP_RING_FAULT_REG); + if (fault & RING_FAULT_VALID) { + u32 fault_data0, fault_data1; + u64 fault_addr; + + fault_data0 = intel_gt_mcr_read_any(gt, XEHP_FAULT_TLB_DATA0); + fault_data1 = intel_gt_mcr_read_any(gt, XEHP_FAULT_TLB_DATA1); + + fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) | + ((u64)fault_data0 << 12); + + drm_dbg(>->i915->drm, "Unexpected fault\n" + "\tAddr: 0x%08x_%08x\n" + "\tAddress space: %s\n" + "\tEngine ID: %d\n" + "\tSource ID: %d\n" + "\tType: %d\n", + upper_32_bits(fault_addr), lower_32_bits(fault_addr), + fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT", + GEN8_RING_FAULT_ENGINE_ID(fault), + RING_FAULT_SRCID(fault), + RING_FAULT_FAULT_TYPE(fault)); + } +} + static void gen8_check_faults(struct intel_gt *gt) { struct intel_uncore *uncore = gt->uncore; @@ -344,7 +392,9 @@ void intel_gt_check_and_clear_faults(struct intel_gt *gt) struct drm_i915_private *i915 = gt->i915; /* From GEN8 onwards we only have one 'All Engine Fault Register' */ - if (GRAPHICS_VER(i915) >= 8) + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) + xehp_check_faults(gt); + else if (GRAPHICS_VER(i915) >= 8) gen8_check_faults(gt); else if (GRAPHICS_VER(i915) >= 6) gen6_check_faults(gt); @@ -807,7 +857,6 @@ static int intel_gt_tile_setup(struct intel_gt *gt, phys_addr_t phys_addr) } intel_uncore_init_early(gt->uncore, gt); - intel_wakeref_auto_init(>->userfault_wakeref, gt->uncore->rpm); ret = intel_uncore_setup_mmio(gt->uncore, phys_addr); if (ret) @@ -828,7 +877,7 @@ int intel_gt_probe_all(struct drm_i915_private *i915) unsigned int i; int ret; - mmio_bar = GRAPHICS_VER(i915) == 2 ? GEN2_GTTMMADR_BAR : GTTMMADR_BAR; + mmio_bar = intel_mmio_bar(GRAPHICS_VER(i915)); phys_addr = pci_resource_start(pdev, mmio_bar); /* @@ -939,7 +988,10 @@ void intel_gt_info_print(const struct intel_gt_info *info, } struct reg_and_bit { - i915_reg_t reg; + union { + i915_reg_t reg; + i915_mcr_reg_t mcr_reg; + }; u32 bit; }; @@ -965,6 +1017,32 @@ get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8, return rb; } +/* + * HW architecture suggest typical invalidation time at 40us, + * with pessimistic cases up to 100us and a recommendation to + * cap at 1ms. We go a bit higher just in case. + */ +#define TLB_INVAL_TIMEOUT_US 100 +#define TLB_INVAL_TIMEOUT_MS 4 + +/* + * On Xe_HP the TLB invalidation registers are located at the same MMIO offsets + * but are now considered MCR registers. Since they exist within a GAM range, + * the primary instance of the register rolls up the status from each unit. + */ +static int wait_for_invalidate(struct intel_gt *gt, struct reg_and_bit rb) +{ + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) + return intel_gt_mcr_wait_for_reg_fw(gt, rb.mcr_reg, rb.bit, 0, + TLB_INVAL_TIMEOUT_US, + TLB_INVAL_TIMEOUT_MS); + else + return __intel_wait_for_register_fw(gt->uncore, rb.reg, rb.bit, 0, + TLB_INVAL_TIMEOUT_US, + TLB_INVAL_TIMEOUT_MS, + NULL); +} + static void mmio_invalidate_full(struct intel_gt *gt) { static const i915_reg_t gen8_regs[] = { @@ -980,6 +1058,13 @@ static void mmio_invalidate_full(struct intel_gt *gt) [COPY_ENGINE_CLASS] = GEN12_BLT_TLB_INV_CR, [COMPUTE_CLASS] = GEN12_COMPCTX_TLB_INV_CR, }; + static const i915_mcr_reg_t xehp_regs[] = { + [RENDER_CLASS] = XEHP_GFX_TLB_INV_CR, + [VIDEO_DECODE_CLASS] = XEHP_VD_TLB_INV_CR, + [VIDEO_ENHANCEMENT_CLASS] = XEHP_VE_TLB_INV_CR, + [COPY_ENGINE_CLASS] = XEHP_BLT_TLB_INV_CR, + [COMPUTE_CLASS] = XEHP_COMPCTX_TLB_INV_CR, + }; struct drm_i915_private *i915 = gt->i915; struct intel_uncore *uncore = gt->uncore; struct intel_engine_cs *engine; @@ -988,7 +1073,10 @@ static void mmio_invalidate_full(struct intel_gt *gt) const i915_reg_t *regs; unsigned int num = 0; - if (GRAPHICS_VER(i915) == 12) { + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { + regs = NULL; + num = ARRAY_SIZE(xehp_regs); + } else if (GRAPHICS_VER(i915) == 12) { regs = gen12_regs; num = ARRAY_SIZE(gen12_regs); } else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) { @@ -1013,11 +1101,17 @@ static void mmio_invalidate_full(struct intel_gt *gt) if (!intel_engine_pm_is_awake(engine)) continue; - rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); - if (!i915_mmio_reg_offset(rb.reg)) - continue; + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { + intel_gt_mcr_multicast_write_fw(gt, + xehp_regs[engine->class], + BIT(engine->instance)); + } else { + rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); + if (!i915_mmio_reg_offset(rb.reg)) + continue; - intel_uncore_write_fw(uncore, rb.reg, rb.bit); + intel_uncore_write_fw(uncore, rb.reg, rb.bit); + } awake |= engine->mask; } @@ -1037,22 +1131,17 @@ static void mmio_invalidate_full(struct intel_gt *gt) for_each_engine_masked(engine, gt, awake, tmp) { struct reg_and_bit rb; - /* - * HW architecture suggest typical invalidation time at 40us, - * with pessimistic cases up to 100us and a recommendation to - * cap at 1ms. We go a bit higher just in case. - */ - const unsigned int timeout_us = 100; - const unsigned int timeout_ms = 4; - - rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); - if (__intel_wait_for_register_fw(uncore, - rb.reg, rb.bit, 0, - timeout_us, timeout_ms, - NULL)) + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { + rb.mcr_reg = xehp_regs[engine->class]; + rb.bit = BIT(engine->instance); + } else { + rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); + } + + if (wait_for_invalidate(gt, rb)) drm_err_ratelimited(>->i915->drm, "%s TLB invalidation did not complete in %ums!\n", - engine->name, timeout_ms); + engine->name, TLB_INVAL_TIMEOUT_MS); } /* diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 2ee582e287c8..e0365d556248 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -60,6 +60,7 @@ void intel_gt_driver_late_release_all(struct drm_i915_private *i915); int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout); void intel_gt_check_and_clear_faults(struct intel_gt *gt); +i915_reg_t intel_gt_perf_limit_reasons_reg(struct intel_gt *gt); void intel_gt_clear_error_registers(struct intel_gt *gt, intel_engine_mask_t engine_mask); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c index 3f656d3dba9a..2a6a4ca7fdad 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c @@ -107,7 +107,7 @@ static u32 gen9_read_clock_frequency(struct intel_uncore *uncore) return freq; } -static u32 gen5_read_clock_frequency(struct intel_uncore *uncore) +static u32 gen6_read_clock_frequency(struct intel_uncore *uncore) { /* * PRMs say: @@ -119,7 +119,27 @@ static u32 gen5_read_clock_frequency(struct intel_uncore *uncore) return 12500000; } -static u32 gen2_read_clock_frequency(struct intel_uncore *uncore) +static u32 gen5_read_clock_frequency(struct intel_uncore *uncore) +{ + /* + * 63:32 increments every 1000 ns + * 31:0 mbz + */ + return 1000000000 / 1000; +} + +static u32 g4x_read_clock_frequency(struct intel_uncore *uncore) +{ + /* + * 63:20 increments every 1/4 ns + * 19:0 mbz + * + * -> 63:32 increments every 1024 ns + */ + return 1000000000 / 1024; +} + +static u32 gen4_read_clock_frequency(struct intel_uncore *uncore) { /* * PRMs say: @@ -127,8 +147,10 @@ static u32 gen2_read_clock_frequency(struct intel_uncore *uncore) * "The value in this register increments once every 16 * hclks." (through the “Clocking Configuration” * (“CLKCFG”) MCHBAR register) + * + * Testing on actual hardware has shown there is no /16. */ - return RUNTIME_INFO(uncore->i915)->rawclk_freq * 1000 / 16; + return RUNTIME_INFO(uncore->i915)->rawclk_freq * 1000; } static u32 read_clock_frequency(struct intel_uncore *uncore) @@ -137,10 +159,16 @@ static u32 read_clock_frequency(struct intel_uncore *uncore) return gen11_read_clock_frequency(uncore); else if (GRAPHICS_VER(uncore->i915) >= 9) return gen9_read_clock_frequency(uncore); - else if (GRAPHICS_VER(uncore->i915) >= 5) + else if (GRAPHICS_VER(uncore->i915) >= 6) + return gen6_read_clock_frequency(uncore); + else if (GRAPHICS_VER(uncore->i915) == 5) return gen5_read_clock_frequency(uncore); + else if (IS_G4X(uncore->i915)) + return g4x_read_clock_frequency(uncore); + else if (GRAPHICS_VER(uncore->i915) == 4) + return gen4_read_clock_frequency(uncore); else - return gen2_read_clock_frequency(uncore); + return 0; } void intel_gt_init_clock_frequency(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c index e79405a45312..830edffe88cc 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c @@ -40,6 +40,9 @@ static const char * const intel_steering_types[] = { "L3BANK", "MSLICE", "LNCF", + "GAM", + "DSS", + "OADDRM", "INSTANCE 0", }; @@ -48,14 +51,23 @@ static const struct intel_mmio_range icl_l3bank_steering_table[] = { {}, }; +/* + * Although the bspec lists more "MSLICE" ranges than shown here, some of those + * are of a "GAM" subclass that has special rules. Thus we use a separate + * GAM table farther down for those. + */ static const struct intel_mmio_range xehpsdv_mslice_steering_table[] = { - { 0x004000, 0x004AFF }, - { 0x00C800, 0x00CFFF }, { 0x00DD00, 0x00DDFF }, { 0x00E900, 0x00FFFF }, /* 0xEA00 - OxEFFF is unused */ {}, }; +static const struct intel_mmio_range xehpsdv_gam_steering_table[] = { + { 0x004000, 0x004AFF }, + { 0x00C800, 0x00CFFF }, + {}, +}; + static const struct intel_mmio_range xehpsdv_lncf_steering_table[] = { { 0x00B000, 0x00B0FF }, { 0x00D800, 0x00D8FF }, @@ -89,9 +101,47 @@ static const struct intel_mmio_range pvc_instance0_steering_table[] = { {}, }; +static const struct intel_mmio_range xelpg_instance0_steering_table[] = { + { 0x000B00, 0x000BFF }, /* SQIDI */ + { 0x001000, 0x001FFF }, /* SQIDI */ + { 0x004000, 0x0048FF }, /* GAM */ + { 0x008700, 0x0087FF }, /* SQIDI */ + { 0x00B000, 0x00B0FF }, /* NODE */ + { 0x00C800, 0x00CFFF }, /* GAM */ + { 0x00D880, 0x00D8FF }, /* NODE */ + { 0x00DD00, 0x00DDFF }, /* OAAL2 */ + {}, +}; + +static const struct intel_mmio_range xelpg_l3bank_steering_table[] = { + { 0x00B100, 0x00B3FF }, + {}, +}; + +/* DSS steering is used for SLICE ranges as well */ +static const struct intel_mmio_range xelpg_dss_steering_table[] = { + { 0x005200, 0x0052FF }, /* SLICE */ + { 0x005500, 0x007FFF }, /* SLICE */ + { 0x008140, 0x00815F }, /* SLICE (0x8140-0x814F), DSS (0x8150-0x815F) */ + { 0x0094D0, 0x00955F }, /* SLICE (0x94D0-0x951F), DSS (0x9520-0x955F) */ + { 0x009680, 0x0096FF }, /* DSS */ + { 0x00D800, 0x00D87F }, /* SLICE */ + { 0x00DC00, 0x00DCFF }, /* SLICE */ + { 0x00DE80, 0x00E8FF }, /* DSS (0xE000-0xE0FF reserved) */ + {}, +}; + +static const struct intel_mmio_range xelpmp_oaddrm_steering_table[] = { + { 0x393200, 0x39323F }, + { 0x393400, 0x3934FF }, + {}, +}; + void intel_gt_mcr_init(struct intel_gt *gt) { struct drm_i915_private *i915 = gt->i915; + unsigned long fuse; + int i; /* * An mslice is unavailable only if both the meml3 for the slice is @@ -109,14 +159,36 @@ void intel_gt_mcr_init(struct intel_gt *gt) drm_warn(&i915->drm, "mslice mask all zero!\n"); } - if (IS_PONTEVECCHIO(i915)) { + if (MEDIA_VER(i915) >= 13 && gt->type == GT_MEDIA) { + gt->steering_table[OADDRM] = xelpmp_oaddrm_steering_table; + } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) { + fuse = REG_FIELD_GET(GT_L3_EXC_MASK, + intel_uncore_read(gt->uncore, XEHP_FUSE4)); + + /* + * Despite the register field being named "exclude mask" the + * bits actually represent enabled banks (two banks per bit). + */ + for_each_set_bit(i, &fuse, 3) + gt->info.l3bank_mask |= 0x3 << 2 * i; + + gt->steering_table[INSTANCE0] = xelpg_instance0_steering_table; + gt->steering_table[L3BANK] = xelpg_l3bank_steering_table; + gt->steering_table[DSS] = xelpg_dss_steering_table; + } else if (IS_PONTEVECCHIO(i915)) { gt->steering_table[INSTANCE0] = pvc_instance0_steering_table; } else if (IS_DG2(i915)) { gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table; gt->steering_table[LNCF] = dg2_lncf_steering_table; + /* + * No need to hook up the GAM table since it has a dedicated + * steering control register on DG2 and can use implicit + * steering. + */ } else if (IS_XEHPSDV(i915)) { gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table; gt->steering_table[LNCF] = xehpsdv_lncf_steering_table; + gt->steering_table[GAM] = xehpsdv_gam_steering_table; } else if (GRAPHICS_VER(i915) >= 11 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) { gt->steering_table[L3BANK] = icl_l3bank_steering_table; @@ -135,6 +207,19 @@ void intel_gt_mcr_init(struct intel_gt *gt) } /* + * Although the rest of the driver should use MCR-specific functions to + * read/write MCR registers, we still use the regular intel_uncore_* functions + * internally to implement those, so we need a way for the functions in this + * file to "cast" an i915_mcr_reg_t into an i915_reg_t. + */ +static i915_reg_t mcr_reg_cast(const i915_mcr_reg_t mcr) +{ + i915_reg_t r = { .reg = mcr.reg }; + + return r; +} + +/* * rw_with_mcr_steering_fw - Access a register with specific MCR steering * @uncore: pointer to struct intel_uncore * @reg: register being accessed @@ -148,14 +233,26 @@ void intel_gt_mcr_init(struct intel_gt *gt) * Caller needs to make sure the relevant forcewake wells are up. */ static u32 rw_with_mcr_steering_fw(struct intel_uncore *uncore, - i915_reg_t reg, u8 rw_flag, + i915_mcr_reg_t reg, u8 rw_flag, int group, int instance, u32 value) { u32 mcr_mask, mcr_ss, mcr, old_mcr, val = 0; lockdep_assert_held(&uncore->lock); - if (GRAPHICS_VER(uncore->i915) >= 11) { + if (GRAPHICS_VER_FULL(uncore->i915) >= IP_VER(12, 70)) { + /* + * Always leave the hardware in multicast mode when doing reads + * (see comment about Wa_22013088509 below) and only change it + * to unicast mode when doing writes of a specific instance. + * + * No need to save old steering reg value. + */ + intel_uncore_write_fw(uncore, MTL_MCR_SELECTOR, + REG_FIELD_PREP(MTL_MCR_GROUPID, group) | + REG_FIELD_PREP(MTL_MCR_INSTANCEID, instance) | + (rw_flag == FW_REG_READ ? GEN11_MCR_MULTICAST : 0)); + } else if (GRAPHICS_VER(uncore->i915) >= 11) { mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK; mcr_ss = GEN11_MCR_SLICE(group) | GEN11_MCR_SUBSLICE(instance); @@ -173,39 +270,53 @@ static u32 rw_with_mcr_steering_fw(struct intel_uncore *uncore, */ if (rw_flag == FW_REG_WRITE) mcr_mask |= GEN11_MCR_MULTICAST; + + mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR); + old_mcr = mcr; + + mcr &= ~mcr_mask; + mcr |= mcr_ss; + intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr); } else { mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK; mcr_ss = GEN8_MCR_SLICE(group) | GEN8_MCR_SUBSLICE(instance); - } - old_mcr = mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR); + mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR); + old_mcr = mcr; - mcr &= ~mcr_mask; - mcr |= mcr_ss; - intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr); + mcr &= ~mcr_mask; + mcr |= mcr_ss; + intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr); + } if (rw_flag == FW_REG_READ) - val = intel_uncore_read_fw(uncore, reg); + val = intel_uncore_read_fw(uncore, mcr_reg_cast(reg)); else - intel_uncore_write_fw(uncore, reg, value); - - mcr &= ~mcr_mask; - mcr |= old_mcr & mcr_mask; + intel_uncore_write_fw(uncore, mcr_reg_cast(reg), value); - intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr); + /* + * For pre-MTL platforms, we need to restore the old value of the + * steering control register to ensure that implicit steering continues + * to behave as expected. For MTL and beyond, we need only reinstate + * the 'multicast' bit (and only if we did a write that cleared it). + */ + if (GRAPHICS_VER_FULL(uncore->i915) >= IP_VER(12, 70) && rw_flag == FW_REG_WRITE) + intel_uncore_write_fw(uncore, MTL_MCR_SELECTOR, GEN11_MCR_MULTICAST); + else if (GRAPHICS_VER_FULL(uncore->i915) < IP_VER(12, 70)) + intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, old_mcr); return val; } static u32 rw_with_mcr_steering(struct intel_uncore *uncore, - i915_reg_t reg, u8 rw_flag, + i915_mcr_reg_t reg, u8 rw_flag, int group, int instance, u32 value) { enum forcewake_domains fw_domains; u32 val; - fw_domains = intel_uncore_forcewake_for_reg(uncore, reg, + fw_domains = intel_uncore_forcewake_for_reg(uncore, mcr_reg_cast(reg), rw_flag); fw_domains |= intel_uncore_forcewake_for_reg(uncore, GEN8_MCR_SELECTOR, @@ -233,7 +344,7 @@ static u32 rw_with_mcr_steering(struct intel_uncore *uncore, * group/instance. */ u32 intel_gt_mcr_read(struct intel_gt *gt, - i915_reg_t reg, + i915_mcr_reg_t reg, int group, int instance) { return rw_with_mcr_steering(gt->uncore, reg, FW_REG_READ, group, instance, 0); @@ -250,7 +361,7 @@ u32 intel_gt_mcr_read(struct intel_gt *gt, * Write an MCR register in unicast mode after steering toward a specific * group/instance. */ -void intel_gt_mcr_unicast_write(struct intel_gt *gt, i915_reg_t reg, u32 value, +void intel_gt_mcr_unicast_write(struct intel_gt *gt, i915_mcr_reg_t reg, u32 value, int group, int instance) { rw_with_mcr_steering(gt->uncore, reg, FW_REG_WRITE, group, instance, value); @@ -265,9 +376,16 @@ void intel_gt_mcr_unicast_write(struct intel_gt *gt, i915_reg_t reg, u32 value, * Write an MCR register in multicast mode to update all instances. */ void intel_gt_mcr_multicast_write(struct intel_gt *gt, - i915_reg_t reg, u32 value) + i915_mcr_reg_t reg, u32 value) { - intel_uncore_write(gt->uncore, reg, value); + /* + * Ensure we have multicast behavior, just in case some non-i915 agent + * left the hardware in unicast mode. + */ + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) + intel_uncore_write_fw(gt->uncore, MTL_MCR_SELECTOR, GEN11_MCR_MULTICAST); + + intel_uncore_write(gt->uncore, mcr_reg_cast(reg), value); } /** @@ -281,9 +399,44 @@ void intel_gt_mcr_multicast_write(struct intel_gt *gt, * domains; use intel_gt_mcr_multicast_write() in cases where forcewake should * be obtained automatically. */ -void intel_gt_mcr_multicast_write_fw(struct intel_gt *gt, i915_reg_t reg, u32 value) +void intel_gt_mcr_multicast_write_fw(struct intel_gt *gt, i915_mcr_reg_t reg, u32 value) +{ + /* + * Ensure we have multicast behavior, just in case some non-i915 agent + * left the hardware in unicast mode. + */ + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) + intel_uncore_write_fw(gt->uncore, MTL_MCR_SELECTOR, GEN11_MCR_MULTICAST); + + intel_uncore_write_fw(gt->uncore, mcr_reg_cast(reg), value); +} + +/** + * intel_gt_mcr_multicast_rmw - Performs a multicast RMW operations + * @gt: GT structure + * @reg: the MCR register to read and write + * @clear: bits to clear during RMW + * @set: bits to set during RMW + * + * Performs a read-modify-write on an MCR register in a multicast manner. + * This operation only makes sense on MCR registers where all instances are + * expected to have the same value. The read will target any non-terminated + * instance and the write will be applied to all instances. + * + * This function assumes the caller is already holding any necessary forcewake + * domains; use intel_gt_mcr_multicast_rmw() in cases where forcewake should + * be obtained automatically. + * + * Returns the old (unmodified) value read. + */ +u32 intel_gt_mcr_multicast_rmw(struct intel_gt *gt, i915_mcr_reg_t reg, + u32 clear, u32 set) { - intel_uncore_write_fw(gt->uncore, reg, value); + u32 val = intel_gt_mcr_read_any(gt, reg); + + intel_gt_mcr_multicast_write(gt, reg, (val & ~clear) | set); + + return val; } /* @@ -301,7 +454,7 @@ void intel_gt_mcr_multicast_write_fw(struct intel_gt *gt, i915_reg_t reg, u32 va * for @type steering too. */ static bool reg_needs_read_steering(struct intel_gt *gt, - i915_reg_t reg, + i915_mcr_reg_t reg, enum intel_steering_type type) { const u32 offset = i915_mmio_reg_offset(reg); @@ -332,6 +485,8 @@ static void get_nonterminated_steering(struct intel_gt *gt, enum intel_steering_type type, u8 *group, u8 *instance) { + u32 dss; + switch (type) { case L3BANK: *group = 0; /* unused */ @@ -351,6 +506,15 @@ static void get_nonterminated_steering(struct intel_gt *gt, *group = __ffs(gt->info.mslice_mask) << 1; *instance = 0; /* unused */ break; + case GAM: + *group = IS_DG2(gt->i915) ? 1 : 0; + *instance = 0; + break; + case DSS: + dss = intel_sseu_find_first_xehp_dss(>->info.sseu, 0, 0); + *group = dss / GEN_DSS_PER_GSLICE; + *instance = dss % GEN_DSS_PER_GSLICE; + break; case INSTANCE0: /* * There are a lot of MCR types for which instance (0, 0) @@ -359,6 +523,13 @@ static void get_nonterminated_steering(struct intel_gt *gt, *group = 0; *instance = 0; break; + case OADDRM: + if ((VDBOX_MASK(gt) | VEBOX_MASK(gt) | gt->info.sfc_mask) & BIT(0)) + *group = 0; + else + *group = 1; + *instance = 0; + break; default: MISSING_CASE(type); *group = 0; @@ -380,7 +551,7 @@ static void get_nonterminated_steering(struct intel_gt *gt, * steering. */ void intel_gt_mcr_get_nonterminated_steering(struct intel_gt *gt, - i915_reg_t reg, + i915_mcr_reg_t reg, u8 *group, u8 *instance) { int type; @@ -409,7 +580,7 @@ void intel_gt_mcr_get_nonterminated_steering(struct intel_gt *gt, * * Returns the value from a non-terminated instance of @reg. */ -u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_reg_t reg) +u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_mcr_reg_t reg) { int type; u8 group, instance; @@ -423,7 +594,7 @@ u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_reg_t reg) } } - return intel_uncore_read_fw(gt->uncore, reg); + return intel_uncore_read_fw(gt->uncore, mcr_reg_cast(reg)); } /** @@ -436,7 +607,7 @@ u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_reg_t reg) * * Returns the value from a non-terminated instance of @reg. */ -u32 intel_gt_mcr_read_any(struct intel_gt *gt, i915_reg_t reg) +u32 intel_gt_mcr_read_any(struct intel_gt *gt, i915_mcr_reg_t reg) { int type; u8 group, instance; @@ -450,7 +621,7 @@ u32 intel_gt_mcr_read_any(struct intel_gt *gt, i915_reg_t reg) } } - return intel_uncore_read(gt->uncore, reg); + return intel_uncore_read(gt->uncore, mcr_reg_cast(reg)); } static void report_steering_type(struct drm_printer *p, @@ -483,11 +654,20 @@ static void report_steering_type(struct drm_printer *p, void intel_gt_mcr_report_steering(struct drm_printer *p, struct intel_gt *gt, bool dump_table) { - drm_printf(p, "Default steering: group=0x%x, instance=0x%x\n", - gt->default_steering.groupid, - gt->default_steering.instanceid); - - if (IS_PONTEVECCHIO(gt->i915)) { + /* + * Starting with MTL we no longer have default steering; + * all ranges are explicitly steered. + */ + if (GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 70)) + drm_printf(p, "Default steering: group=0x%x, instance=0x%x\n", + gt->default_steering.groupid, + gt->default_steering.instanceid); + + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) { + for (int i = 0; i < NUM_STEERING_TYPES; i++) + if (gt->steering_table[i]) + report_steering_type(p, gt, i, dump_table); + } else if (IS_PONTEVECCHIO(gt->i915)) { report_steering_type(p, gt, INSTANCE0, dump_table); } else if (HAS_MSLICE_STEERING(gt->i915)) { report_steering_type(p, gt, MSLICE, dump_table); @@ -520,3 +700,58 @@ void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, unsigned int dss, return; } } + +/** + * intel_gt_mcr_wait_for_reg_fw - wait until MCR register matches expected state + * @gt: GT structure + * @reg: the register to read + * @mask: mask to apply to register value + * @value: value to wait for + * @fast_timeout_us: fast timeout in microsecond for atomic/tight wait + * @slow_timeout_ms: slow timeout in millisecond + * + * This routine waits until the target register @reg contains the expected + * @value after applying the @mask, i.e. it waits until :: + * + * (intel_gt_mcr_read_any_fw(gt, reg) & mask) == value + * + * Otherwise, the wait will timeout after @slow_timeout_ms milliseconds. + * For atomic context @slow_timeout_ms must be zero and @fast_timeout_us + * must be not larger than 20,0000 microseconds. + * + * This function is basically an MCR-friendly version of + * __intel_wait_for_register_fw(). Generally this function will only be used + * on GAM registers which are a bit special --- although they're MCR registers, + * reads (e.g., waiting for status updates) are always directed to the primary + * instance. + * + * Note that this routine assumes the caller holds forcewake asserted, it is + * not suitable for very long waits. + * + * Return: 0 if the register matches the desired condition, or -ETIMEDOUT. + */ +int intel_gt_mcr_wait_for_reg_fw(struct intel_gt *gt, + i915_mcr_reg_t reg, + u32 mask, + u32 value, + unsigned int fast_timeout_us, + unsigned int slow_timeout_ms) +{ + u32 reg_value = 0; +#define done (((reg_value = intel_gt_mcr_read_any_fw(gt, reg)) & mask) == value) + int ret; + + /* Catch any overuse of this function */ + might_sleep_if(slow_timeout_ms); + GEM_BUG_ON(fast_timeout_us > 20000); + GEM_BUG_ON(!fast_timeout_us && !slow_timeout_ms); + + ret = -ETIMEDOUT; + if (fast_timeout_us && fast_timeout_us <= 20000) + ret = _wait_for_atomic(done, fast_timeout_us, 0); + if (ret && slow_timeout_ms) + ret = wait_for(done, slow_timeout_ms); + + return ret; +#undef done +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.h b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h index 77a8b11c287d..3fb0502bff22 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h @@ -11,21 +11,24 @@ void intel_gt_mcr_init(struct intel_gt *gt); u32 intel_gt_mcr_read(struct intel_gt *gt, - i915_reg_t reg, + i915_mcr_reg_t reg, int group, int instance); -u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_reg_t reg); -u32 intel_gt_mcr_read_any(struct intel_gt *gt, i915_reg_t reg); +u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_mcr_reg_t reg); +u32 intel_gt_mcr_read_any(struct intel_gt *gt, i915_mcr_reg_t reg); void intel_gt_mcr_unicast_write(struct intel_gt *gt, - i915_reg_t reg, u32 value, + i915_mcr_reg_t reg, u32 value, int group, int instance); void intel_gt_mcr_multicast_write(struct intel_gt *gt, - i915_reg_t reg, u32 value); + i915_mcr_reg_t reg, u32 value); void intel_gt_mcr_multicast_write_fw(struct intel_gt *gt, - i915_reg_t reg, u32 value); + i915_mcr_reg_t reg, u32 value); + +u32 intel_gt_mcr_multicast_rmw(struct intel_gt *gt, i915_mcr_reg_t reg, + u32 clear, u32 set); void intel_gt_mcr_get_nonterminated_steering(struct intel_gt *gt, - i915_reg_t reg, + i915_mcr_reg_t reg, u8 *group, u8 *instance); void intel_gt_mcr_report_steering(struct drm_printer *p, struct intel_gt *gt, @@ -34,6 +37,13 @@ void intel_gt_mcr_report_steering(struct drm_printer *p, struct intel_gt *gt, void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, unsigned int dss, unsigned int *group, unsigned int *instance); +int intel_gt_mcr_wait_for_reg_fw(struct intel_gt *gt, + i915_mcr_reg_t reg, + u32 mask, + u32 value, + unsigned int fast_timeout_us, + unsigned int slow_timeout_ms); + /* * Helper for for_each_ss_steering loop. On pre-Xe_HP platforms, subslice * presence is determined by using the group/instance as direct lookups in the diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c index 108b9e76c32e..40d0a3be42ac 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c @@ -344,162 +344,7 @@ void intel_gt_pm_frequency_dump(struct intel_gt *gt, struct drm_printer *p) drm_printf(p, "efficient (RPe) frequency: %d MHz\n", intel_gpu_freq(rps, rps->efficient_freq)); } else if (GRAPHICS_VER(i915) >= 6) { - u32 rp_state_limits; - u32 gt_perf_status; - struct intel_rps_freq_caps caps; - u32 rpmodectl, rpinclimit, rpdeclimit; - u32 rpstat, cagf, reqf; - u32 rpcurupei, rpcurup, rpprevup; - u32 rpcurdownei, rpcurdown, rpprevdown; - u32 rpupei, rpupt, rpdownei, rpdownt; - u32 pm_ier, pm_imr, pm_isr, pm_iir, pm_mask; - - rp_state_limits = intel_uncore_read(uncore, GEN6_RP_STATE_LIMITS); - gen6_rps_get_freq_caps(rps, &caps); - if (IS_GEN9_LP(i915)) - gt_perf_status = intel_uncore_read(uncore, BXT_GT_PERF_STATUS); - else - gt_perf_status = intel_uncore_read(uncore, GEN6_GT_PERF_STATUS); - - /* RPSTAT1 is in the GT power well */ - intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); - - reqf = intel_uncore_read(uncore, GEN6_RPNSWREQ); - if (GRAPHICS_VER(i915) >= 9) { - reqf >>= 23; - } else { - reqf &= ~GEN6_TURBO_DISABLE; - if (IS_HASWELL(i915) || IS_BROADWELL(i915)) - reqf >>= 24; - else - reqf >>= 25; - } - reqf = intel_gpu_freq(rps, reqf); - - rpmodectl = intel_uncore_read(uncore, GEN6_RP_CONTROL); - rpinclimit = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD); - rpdeclimit = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD); - - rpstat = intel_uncore_read(uncore, GEN6_RPSTAT1); - rpcurupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK; - rpcurup = intel_uncore_read(uncore, GEN6_RP_CUR_UP) & GEN6_CURBSYTAVG_MASK; - rpprevup = intel_uncore_read(uncore, GEN6_RP_PREV_UP) & GEN6_CURBSYTAVG_MASK; - rpcurdownei = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK; - rpcurdown = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK; - rpprevdown = intel_uncore_read(uncore, GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK; - - rpupei = intel_uncore_read(uncore, GEN6_RP_UP_EI); - rpupt = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD); - - rpdownei = intel_uncore_read(uncore, GEN6_RP_DOWN_EI); - rpdownt = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD); - - cagf = intel_rps_read_actual_frequency(rps); - - intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); - - if (GRAPHICS_VER(i915) >= 11) { - pm_ier = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE); - pm_imr = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK); - /* - * The equivalent to the PM ISR & IIR cannot be read - * without affecting the current state of the system - */ - pm_isr = 0; - pm_iir = 0; - } else if (GRAPHICS_VER(i915) >= 8) { - pm_ier = intel_uncore_read(uncore, GEN8_GT_IER(2)); - pm_imr = intel_uncore_read(uncore, GEN8_GT_IMR(2)); - pm_isr = intel_uncore_read(uncore, GEN8_GT_ISR(2)); - pm_iir = intel_uncore_read(uncore, GEN8_GT_IIR(2)); - } else { - pm_ier = intel_uncore_read(uncore, GEN6_PMIER); - pm_imr = intel_uncore_read(uncore, GEN6_PMIMR); - pm_isr = intel_uncore_read(uncore, GEN6_PMISR); - pm_iir = intel_uncore_read(uncore, GEN6_PMIIR); - } - pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK); - - drm_printf(p, "Video Turbo Mode: %s\n", - str_yes_no(rpmodectl & GEN6_RP_MEDIA_TURBO)); - drm_printf(p, "HW control enabled: %s\n", - str_yes_no(rpmodectl & GEN6_RP_ENABLE)); - drm_printf(p, "SW control enabled: %s\n", - str_yes_no((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == GEN6_RP_MEDIA_SW_MODE)); - - drm_printf(p, "PM IER=0x%08x IMR=0x%08x, MASK=0x%08x\n", - pm_ier, pm_imr, pm_mask); - if (GRAPHICS_VER(i915) <= 10) - drm_printf(p, "PM ISR=0x%08x IIR=0x%08x\n", - pm_isr, pm_iir); - drm_printf(p, "pm_intrmsk_mbz: 0x%08x\n", - rps->pm_intrmsk_mbz); - drm_printf(p, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status); - drm_printf(p, "Render p-state ratio: %d\n", - (gt_perf_status & (GRAPHICS_VER(i915) >= 9 ? 0x1ff00 : 0xff00)) >> 8); - drm_printf(p, "Render p-state VID: %d\n", - gt_perf_status & 0xff); - drm_printf(p, "Render p-state limit: %d\n", - rp_state_limits & 0xff); - drm_printf(p, "RPSTAT1: 0x%08x\n", rpstat); - drm_printf(p, "RPMODECTL: 0x%08x\n", rpmodectl); - drm_printf(p, "RPINCLIMIT: 0x%08x\n", rpinclimit); - drm_printf(p, "RPDECLIMIT: 0x%08x\n", rpdeclimit); - drm_printf(p, "RPNSWREQ: %dMHz\n", reqf); - drm_printf(p, "CAGF: %dMHz\n", cagf); - drm_printf(p, "RP CUR UP EI: %d (%lldns)\n", - rpcurupei, - intel_gt_pm_interval_to_ns(gt, rpcurupei)); - drm_printf(p, "RP CUR UP: %d (%lldns)\n", - rpcurup, intel_gt_pm_interval_to_ns(gt, rpcurup)); - drm_printf(p, "RP PREV UP: %d (%lldns)\n", - rpprevup, intel_gt_pm_interval_to_ns(gt, rpprevup)); - drm_printf(p, "Up threshold: %d%%\n", - rps->power.up_threshold); - drm_printf(p, "RP UP EI: %d (%lldns)\n", - rpupei, intel_gt_pm_interval_to_ns(gt, rpupei)); - drm_printf(p, "RP UP THRESHOLD: %d (%lldns)\n", - rpupt, intel_gt_pm_interval_to_ns(gt, rpupt)); - - drm_printf(p, "RP CUR DOWN EI: %d (%lldns)\n", - rpcurdownei, - intel_gt_pm_interval_to_ns(gt, rpcurdownei)); - drm_printf(p, "RP CUR DOWN: %d (%lldns)\n", - rpcurdown, - intel_gt_pm_interval_to_ns(gt, rpcurdown)); - drm_printf(p, "RP PREV DOWN: %d (%lldns)\n", - rpprevdown, - intel_gt_pm_interval_to_ns(gt, rpprevdown)); - drm_printf(p, "Down threshold: %d%%\n", - rps->power.down_threshold); - drm_printf(p, "RP DOWN EI: %d (%lldns)\n", - rpdownei, intel_gt_pm_interval_to_ns(gt, rpdownei)); - drm_printf(p, "RP DOWN THRESHOLD: %d (%lldns)\n", - rpdownt, intel_gt_pm_interval_to_ns(gt, rpdownt)); - - drm_printf(p, "Lowest (RPN) frequency: %dMHz\n", - intel_gpu_freq(rps, caps.min_freq)); - drm_printf(p, "Nominal (RP1) frequency: %dMHz\n", - intel_gpu_freq(rps, caps.rp1_freq)); - drm_printf(p, "Max non-overclocked (RP0) frequency: %dMHz\n", - intel_gpu_freq(rps, caps.rp0_freq)); - drm_printf(p, "Max overclocked frequency: %dMHz\n", - intel_gpu_freq(rps, rps->max_freq)); - - drm_printf(p, "Current freq: %d MHz\n", - intel_gpu_freq(rps, rps->cur_freq)); - drm_printf(p, "Actual freq: %d MHz\n", cagf); - drm_printf(p, "Idle freq: %d MHz\n", - intel_gpu_freq(rps, rps->idle_freq)); - drm_printf(p, "Min freq: %d MHz\n", - intel_gpu_freq(rps, rps->min_freq)); - drm_printf(p, "Boost freq: %d MHz\n", - intel_gpu_freq(rps, rps->boost_freq)); - drm_printf(p, "Max freq: %d MHz\n", - intel_gpu_freq(rps, rps->max_freq)); - drm_printf(p, - "efficient (RPe) frequency: %d MHz\n", - intel_gpu_freq(rps, rps->efficient_freq)); + gen6_rps_frequency_dump(rps, p); } else { drm_puts(p, "no P-state info available\n"); } @@ -655,6 +500,44 @@ static bool rps_eval(void *data) DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(rps_boost); +static int perf_limit_reasons_get(void *data, u64 *val) +{ + struct intel_gt *gt = data; + intel_wakeref_t wakeref; + + with_intel_runtime_pm(gt->uncore->rpm, wakeref) + *val = intel_uncore_read(gt->uncore, intel_gt_perf_limit_reasons_reg(gt)); + + return 0; +} + +static int perf_limit_reasons_clear(void *data, u64 val) +{ + struct intel_gt *gt = data; + intel_wakeref_t wakeref; + + /* + * Clear the upper 16 "log" bits, the lower 16 "status" bits are + * read-only. The upper 16 "log" bits are identical to the lower 16 + * "status" bits except that the "log" bits remain set until cleared. + */ + with_intel_runtime_pm(gt->uncore->rpm, wakeref) + intel_uncore_rmw(gt->uncore, intel_gt_perf_limit_reasons_reg(gt), + GT0_PERF_LIMIT_REASONS_LOG_MASK, 0); + + return 0; +} + +static bool perf_limit_reasons_eval(void *data) +{ + struct intel_gt *gt = data; + + return i915_mmio_reg_valid(intel_gt_perf_limit_reasons_reg(gt)); +} + +DEFINE_SIMPLE_ATTRIBUTE(perf_limit_reasons_fops, perf_limit_reasons_get, + perf_limit_reasons_clear, "%llu\n"); + void intel_gt_pm_debugfs_register(struct intel_gt *gt, struct dentry *root) { static const struct intel_gt_debugfs_file files[] = { @@ -664,6 +547,7 @@ void intel_gt_pm_debugfs_register(struct intel_gt *gt, struct dentry *root) { "forcewake_user", &forcewake_user_fops, NULL}, { "llc", &llc_fops, llc_eval }, { "rps_boost", &rps_boost_fops, rps_eval }, + { "perf_limit_reasons", &perf_limit_reasons_fops, perf_limit_reasons_eval }, }; intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), gt); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 5a3a25838fff..70177d3f2e94 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -8,6 +8,19 @@ #include "i915_reg_defs.h" +#define MCR_REG(offset) ((const i915_mcr_reg_t){ .reg = (offset) }) + +/* + * The perf control registers are technically multicast registers, but the + * driver never needs to read/write them directly; we only use them to build + * lists of registers (where they're mixed in with other non-MCR registers) + * and then operate on the offset directly. For now we'll just define them + * as non-multicast so we can place them on the same list, but we may want + * to try to come up with a better way to handle heterogeneous lists of + * registers in the future. + */ +#define PERF_REG(offset) _MMIO(offset) + /* RPM unit config (Gen8+) */ #define RPM_CONFIG0 _MMIO(0xd00) #define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT 3 @@ -39,12 +52,17 @@ #define FORCEWAKE_ACK_RENDER_GEN9 _MMIO(0xd84) #define FORCEWAKE_ACK_MEDIA_GEN9 _MMIO(0xd88) +#define FORCEWAKE_ACK_GSC _MMIO(0xdf8) +#define FORCEWAKE_ACK_GT_MTL _MMIO(0xdfc) + #define GMD_ID_GRAPHICS _MMIO(0xd8c) #define GMD_ID_MEDIA _MMIO(MTL_MEDIA_GSI_BASE + 0xd8c) #define MCFG_MCR_SELECTOR _MMIO(0xfd0) +#define MTL_MCR_SELECTOR _MMIO(0xfd4) #define SF_MCR_SELECTOR _MMIO(0xfd8) #define GEN8_MCR_SELECTOR _MMIO(0xfdc) +#define GAM_MCR_SELECTOR _MMIO(0xfe0) #define GEN8_MCR_SLICE(slice) (((slice) & 3) << 26) #define GEN8_MCR_SLICE_MASK GEN8_MCR_SLICE(3) #define GEN8_MCR_SUBSLICE(subslice) (((subslice) & 3) << 24) @@ -54,6 +72,8 @@ #define GEN11_MCR_SLICE_MASK GEN11_MCR_SLICE(0xf) #define GEN11_MCR_SUBSLICE(subslice) (((subslice) & 0x7) << 24) #define GEN11_MCR_SUBSLICE_MASK GEN11_MCR_SUBSLICE(0x7) +#define MTL_MCR_GROUPID REG_GENMASK(11, 8) +#define MTL_MCR_INSTANCEID REG_GENMASK(3, 0) #define IPEIR_I965 _MMIO(0x2064) #define IPEHR_I965 _MMIO(0x2068) @@ -329,11 +349,12 @@ #define GEN7_TLB_RD_ADDR _MMIO(0x4700) #define GEN12_PAT_INDEX(index) _MMIO(0x4800 + (index) * 4) +#define XEHP_PAT_INDEX(index) MCR_REG(0x4800 + (index) * 4) -#define XEHP_TILE0_ADDR_RANGE _MMIO(0x4900) +#define XEHP_TILE0_ADDR_RANGE MCR_REG(0x4900) #define XEHP_TILE_LMEM_RANGE_SHIFT 8 -#define XEHP_FLAT_CCS_BASE_ADDR _MMIO(0x4910) +#define XEHP_FLAT_CCS_BASE_ADDR MCR_REG(0x4910) #define XEHP_CCS_BASE_SHIFT 8 #define GAMTARBMODE _MMIO(0x4a08) @@ -383,17 +404,18 @@ #define CHICKEN_RASTER_2 _MMIO(0x6208) #define TBIMR_FAST_CLIP REG_BIT(5) -#define VFLSKPD _MMIO(0x62a8) +#define VFLSKPD MCR_REG(0x62a8) #define DIS_OVER_FETCH_CACHE REG_BIT(1) #define DIS_MULT_MISS_RD_SQUASH REG_BIT(0) -#define FF_MODE2 _MMIO(0x6604) +#define GEN12_FF_MODE2 _MMIO(0x6604) +#define XEHP_FF_MODE2 MCR_REG(0x6604) #define FF_MODE2_GS_TIMER_MASK REG_GENMASK(31, 24) #define FF_MODE2_GS_TIMER_224 REG_FIELD_PREP(FF_MODE2_GS_TIMER_MASK, 224) #define FF_MODE2_TDS_TIMER_MASK REG_GENMASK(23, 16) #define FF_MODE2_TDS_TIMER_128 REG_FIELD_PREP(FF_MODE2_TDS_TIMER_MASK, 4) -#define XEHPG_INSTDONE_GEOM_SVG _MMIO(0x666c) +#define XEHPG_INSTDONE_GEOM_SVG MCR_REG(0x666c) #define CACHE_MODE_0_GEN7 _MMIO(0x7000) /* IVB+ */ #define RC_OP_FLUSH_ENABLE (1 << 0) @@ -421,6 +443,7 @@ #define HIZ_CHICKEN _MMIO(0x7018) #define CHV_HZ_8X8_MODE_IN_1X REG_BIT(15) #define DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE REG_BIT(14) +#define HZ_DEPTH_TEST_LE_GE_OPT_DISABLE REG_BIT(13) #define BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE REG_BIT(3) #define GEN8_L3CNTLREG _MMIO(0x7034) @@ -442,23 +465,16 @@ #define GEN8_HDC_CHICKEN1 _MMIO(0x7304) #define GEN11_COMMON_SLICE_CHICKEN3 _MMIO(0x7304) +#define XEHP_COMMON_SLICE_CHICKEN3 MCR_REG(0x7304) #define DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN REG_BIT(12) #define XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE REG_BIT(12) #define GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC REG_BIT(11) #define GEN12_DISABLE_CPS_AWARE_COLOR_PIPE REG_BIT(9) -/* GEN9 chicken */ -#define SLICE_ECO_CHICKEN0 _MMIO(0x7308) -#define PIXEL_MASK_CAMMING_DISABLE (1 << 14) - -#define GEN9_SLICE_COMMON_ECO_CHICKEN0 _MMIO(0x7308) -#define DISABLE_PIXEL_MASK_CAMMING (1 << 14) - #define GEN9_SLICE_COMMON_ECO_CHICKEN1 _MMIO(0x731c) -#define GEN11_STATE_CACHE_REDIRECT_TO_CS (1 << 11) - -#define SLICE_COMMON_ECO_CHICKEN1 _MMIO(0x731c) +#define XEHP_SLICE_COMMON_ECO_CHICKEN1 MCR_REG(0x731c) #define MSC_MSAA_REODER_BUF_BYPASS_DISABLE REG_BIT(14) +#define GEN11_STATE_CACHE_REDIRECT_TO_CS (1 << 11) #define GEN9_SLICE_PGCTL_ACK(slice) _MMIO(0x804c + (slice) * 0x4) #define GEN10_SLICE_PGCTL_ACK(slice) _MMIO(0x804c + ((slice) / 3) * 0x34 + \ @@ -485,9 +501,12 @@ #define VF_PREEMPTION _MMIO(0x83a4) #define PREEMPTION_VERTEX_COUNT REG_GENMASK(15, 0) +#define VFG_PREEMPTION_CHICKEN _MMIO(0x83b4) +#define POLYGON_TRIFAN_LINELOOP_DISABLE REG_BIT(4) + #define GEN8_RC6_CTX_INFO _MMIO(0x8504) -#define GEN12_SQCM _MMIO(0x8724) +#define XEHP_SQCM MCR_REG(0x8724) #define EN_32B_ACCESS REG_BIT(30) #define HSW_IDICR _MMIO(0x9008) @@ -519,6 +538,8 @@ #define GEN6_MBCTL_BOOT_FETCH_MECH (1 << 0) /* Fuse readout registers for GT */ +#define XEHP_FUSE4 _MMIO(0x9114) +#define GT_L3_EXC_MASK REG_GENMASK(6, 4) #define GEN10_MIRROR_FUSE3 _MMIO(0x9118) #define GEN10_L3BANK_PAIR_COUNT 4 #define GEN10_L3BANK_MASK 0x0F @@ -647,6 +668,9 @@ #define GEN7_MISCCPCTL _MMIO(0x9424) #define GEN7_DOP_CLOCK_GATE_ENABLE (1 << 0) + +#define GEN8_MISCCPCTL MCR_REG(0x9424) +#define GEN8_DOP_CLOCK_GATE_ENABLE REG_BIT(0) #define GEN12_DOP_CLOCK_GATE_RENDER_ENABLE REG_BIT(1) #define GEN8_DOP_CLOCK_GATE_CFCLK_ENABLE (1 << 2) #define GEN8_DOP_CLOCK_GATE_GUC_ENABLE (1 << 4) @@ -700,7 +724,8 @@ #define GAMTLBVEBOX0_CLKGATE_DIS REG_BIT(16) #define LTCDD_CLKGATE_DIS REG_BIT(10) -#define SLICE_UNIT_LEVEL_CLKGATE _MMIO(0x94d4) +#define GEN11_SLICE_UNIT_LEVEL_CLKGATE _MMIO(0x94d4) +#define XEHP_SLICE_UNIT_LEVEL_CLKGATE MCR_REG(0x94d4) #define SARBUNIT_CLKGATE_DIS (1 << 5) #define RCCUNIT_CLKGATE_DIS (1 << 7) #define MSCUNIT_CLKGATE_DIS (1 << 10) @@ -708,27 +733,27 @@ #define L3_CLKGATE_DIS REG_BIT(16) #define L3_CR2X_CLKGATE_DIS REG_BIT(17) -#define SCCGCTL94DC _MMIO(0x94dc) +#define SCCGCTL94DC MCR_REG(0x94dc) #define CG3DDISURB REG_BIT(14) #define UNSLICE_UNIT_LEVEL_CLKGATE2 _MMIO(0x94e4) #define VSUNIT_CLKGATE_DIS_TGL REG_BIT(19) #define PSDUNIT_CLKGATE_DIS REG_BIT(5) -#define SUBSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9524) +#define GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE MCR_REG(0x9524) #define DSS_ROUTER_CLKGATE_DIS REG_BIT(28) #define GWUNIT_CLKGATE_DIS REG_BIT(16) -#define SUBSLICE_UNIT_LEVEL_CLKGATE2 _MMIO(0x9528) +#define SUBSLICE_UNIT_LEVEL_CLKGATE2 MCR_REG(0x9528) #define CPSSUNIT_CLKGATE_DIS REG_BIT(9) -#define SSMCGCTL9530 _MMIO(0x9530) +#define SSMCGCTL9530 MCR_REG(0x9530) #define RTFUNIT_CLKGATE_DIS REG_BIT(18) -#define GEN10_DFR_RATIO_EN_AND_CHICKEN _MMIO(0x9550) +#define GEN10_DFR_RATIO_EN_AND_CHICKEN MCR_REG(0x9550) #define DFR_DISABLE (1 << 9) -#define INF_UNIT_LEVEL_CLKGATE _MMIO(0x9560) +#define INF_UNIT_LEVEL_CLKGATE MCR_REG(0x9560) #define CGPSF_CLKGATE_DIS (1 << 3) #define MICRO_BP0_0 _MMIO(0x9800) @@ -901,6 +926,8 @@ #define FORCEWAKE_MEDIA_VDBOX_GEN11(n) _MMIO(0xa540 + (n) * 4) #define FORCEWAKE_MEDIA_VEBOX_GEN11(n) _MMIO(0xa560 + (n) * 4) +#define FORCEWAKE_REQ_GSC _MMIO(0xa618) + #define CHV_POWER_SS0_SIG1 _MMIO(0xa720) #define CHV_POWER_SS0_SIG2 _MMIO(0xa724) #define CHV_POWER_SS1_SIG1 _MMIO(0xa728) @@ -938,7 +965,8 @@ /* MOCS (Memory Object Control State) registers */ #define GEN9_LNCFCMOCS(i) _MMIO(0xb020 + (i) * 4) /* L3 Cache Control */ -#define GEN9_LNCFCMOCS_REG_COUNT 32 +#define XEHP_LNCFCMOCS(i) MCR_REG(0xb020 + (i) * 4) +#define LNCFCMOCS_REG_COUNT 32 #define GEN7_L3CNTLREG3 _MMIO(0xb024) @@ -954,15 +982,10 @@ #define GEN7_L3LOG(slice, i) _MMIO(0xb070 + (slice) * 0x200 + (i) * 4) #define GEN7_L3LOG_SIZE 0x80 -#define GEN10_SCRATCH_LNCF2 _MMIO(0xb0a0) -#define PMFLUSHDONE_LNICRSDROP (1 << 20) -#define PMFLUSH_GAPL3UNBLOCK (1 << 21) -#define PMFLUSHDONE_LNEBLK (1 << 22) - -#define XEHP_L3NODEARBCFG _MMIO(0xb0b4) +#define XEHP_L3NODEARBCFG MCR_REG(0xb0b4) #define XEHP_LNESPARE REG_BIT(19) -#define GEN8_L3SQCREG1 _MMIO(0xb100) +#define GEN8_L3SQCREG1 MCR_REG(0xb100) /* * Note that on CHV the following has an off-by-one error wrt. to BSpec. * Using the formula in BSpec leads to a hang, while the formula here works @@ -973,31 +996,28 @@ #define L3_HIGH_PRIO_CREDITS(x) (((x) >> 1) << 14) #define L3_PRIO_CREDITS_MASK ((0x1f << 19) | (0x1f << 14)) -#define GEN10_L3_CHICKEN_MODE_REGISTER _MMIO(0xb114) -#define GEN11_I2M_WRITE_DISABLE (1 << 28) - -#define GEN8_L3SQCREG4 _MMIO(0xb118) +#define GEN8_L3SQCREG4 MCR_REG(0xb118) #define GEN11_LQSC_CLEAN_EVICT_DISABLE (1 << 6) #define GEN8_LQSC_RO_PERF_DIS (1 << 27) #define GEN8_LQSC_FLUSH_COHERENT_LINES (1 << 21) #define GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE REG_BIT(22) -#define GEN9_SCRATCH1 _MMIO(0xb11c) +#define GEN9_SCRATCH1 MCR_REG(0xb11c) #define EVICTION_PERF_FIX_ENABLE REG_BIT(8) -#define BDW_SCRATCH1 _MMIO(0xb11c) +#define BDW_SCRATCH1 MCR_REG(0xb11c) #define GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE (1 << 2) -#define GEN11_SCRATCH2 _MMIO(0xb140) +#define GEN11_SCRATCH2 MCR_REG(0xb140) #define GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE (1 << 19) -#define GEN11_L3SQCREG5 _MMIO(0xb158) +#define XEHP_L3SQCREG5 MCR_REG(0xb158) #define L3_PWM_TIMER_INIT_VAL_MASK REG_GENMASK(9, 0) -#define MLTICTXCTL _MMIO(0xb170) +#define MLTICTXCTL MCR_REG(0xb170) #define TDONRENDER REG_BIT(2) -#define XEHP_L3SCQREG7 _MMIO(0xb188) +#define XEHP_L3SCQREG7 MCR_REG(0xb188) #define BLEND_FILL_CACHING_OPT_DIS REG_BIT(3) #define XEHPC_L3SCRUB _MMIO(0xb18c) @@ -1005,7 +1025,7 @@ #define SCRUB_RATE_PER_BANK_MASK REG_GENMASK(2, 0) #define SCRUB_RATE_4B_PER_CLK REG_FIELD_PREP(SCRUB_RATE_PER_BANK_MASK, 0x6) -#define L3SQCREG1_CCS0 _MMIO(0xb200) +#define L3SQCREG1_CCS0 MCR_REG(0xb200) #define FLUSHALLNONCOH REG_BIT(5) #define GEN11_GLBLINVL _MMIO(0xb404) @@ -1030,11 +1050,14 @@ #define GEN9_BLT_MOCS(i) _MMIO(__GEN9_BCS0_MOCS0 + (i) * 4) #define GEN12_FAULT_TLB_DATA0 _MMIO(0xceb8) +#define XEHP_FAULT_TLB_DATA0 MCR_REG(0xceb8) #define GEN12_FAULT_TLB_DATA1 _MMIO(0xcebc) +#define XEHP_FAULT_TLB_DATA1 MCR_REG(0xcebc) #define FAULT_VA_HIGH_BITS (0xf << 0) #define FAULT_GTT_SEL (1 << 4) #define GEN12_RING_FAULT_REG _MMIO(0xcec4) +#define XEHP_RING_FAULT_REG MCR_REG(0xcec4) #define GEN8_RING_FAULT_ENGINE_ID(x) (((x) >> 12) & 0x7) #define RING_FAULT_GTTSEL_MASK (1 << 11) #define RING_FAULT_SRCID(x) (((x) >> 3) & 0xff) @@ -1042,16 +1065,21 @@ #define RING_FAULT_VALID (1 << 0) #define GEN12_GFX_TLB_INV_CR _MMIO(0xced8) +#define XEHP_GFX_TLB_INV_CR MCR_REG(0xced8) #define GEN12_VD_TLB_INV_CR _MMIO(0xcedc) +#define XEHP_VD_TLB_INV_CR MCR_REG(0xcedc) #define GEN12_VE_TLB_INV_CR _MMIO(0xcee0) +#define XEHP_VE_TLB_INV_CR MCR_REG(0xcee0) #define GEN12_BLT_TLB_INV_CR _MMIO(0xcee4) +#define XEHP_BLT_TLB_INV_CR MCR_REG(0xcee4) #define GEN12_COMPCTX_TLB_INV_CR _MMIO(0xcf04) +#define XEHP_COMPCTX_TLB_INV_CR MCR_REG(0xcf04) -#define GEN12_MERT_MOD_CTRL _MMIO(0xcf28) -#define RENDER_MOD_CTRL _MMIO(0xcf2c) -#define COMP_MOD_CTRL _MMIO(0xcf30) -#define VDBX_MOD_CTRL _MMIO(0xcf34) -#define VEBX_MOD_CTRL _MMIO(0xcf38) +#define XEHP_MERT_MOD_CTRL MCR_REG(0xcf28) +#define RENDER_MOD_CTRL MCR_REG(0xcf2c) +#define COMP_MOD_CTRL MCR_REG(0xcf30) +#define VDBX_MOD_CTRL MCR_REG(0xcf34) +#define VEBX_MOD_CTRL MCR_REG(0xcf38) #define FORCE_MISS_FTLB REG_BIT(3) #define GEN12_GAMSTLB_CTRL _MMIO(0xcf4c) @@ -1066,48 +1094,52 @@ #define GEN12_GAM_DONE _MMIO(0xcf68) #define GEN7_HALF_SLICE_CHICKEN1 _MMIO(0xe100) /* IVB GT1 + VLV */ +#define GEN8_HALF_SLICE_CHICKEN1 MCR_REG(0xe100) #define GEN7_MAX_PS_THREAD_DEP (8 << 12) #define GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE (1 << 10) #define GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE (1 << 4) #define GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE (1 << 3) #define GEN7_SAMPLER_INSTDONE _MMIO(0xe160) +#define GEN8_SAMPLER_INSTDONE MCR_REG(0xe160) #define GEN7_ROW_INSTDONE _MMIO(0xe164) +#define GEN8_ROW_INSTDONE MCR_REG(0xe164) -#define HALF_SLICE_CHICKEN2 _MMIO(0xe180) +#define HALF_SLICE_CHICKEN2 MCR_REG(0xe180) #define GEN8_ST_PO_DISABLE (1 << 13) -#define HALF_SLICE_CHICKEN3 _MMIO(0xe184) +#define HSW_HALF_SLICE_CHICKEN3 _MMIO(0xe184) +#define GEN8_HALF_SLICE_CHICKEN3 MCR_REG(0xe184) #define HSW_SAMPLE_C_PERFORMANCE (1 << 9) #define GEN8_CENTROID_PIXEL_OPT_DIS (1 << 8) #define GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC (1 << 5) #define GEN8_SAMPLER_POWER_BYPASS_DIS (1 << 1) -#define GEN9_HALF_SLICE_CHICKEN5 _MMIO(0xe188) +#define GEN9_HALF_SLICE_CHICKEN5 MCR_REG(0xe188) #define GEN9_DG_MIRROR_FIX_ENABLE (1 << 5) #define GEN9_CCS_TLB_PREFETCH_ENABLE (1 << 3) -#define GEN10_SAMPLER_MODE _MMIO(0xe18c) +#define GEN10_SAMPLER_MODE MCR_REG(0xe18c) #define ENABLE_SMALLPL REG_BIT(15) #define SC_DISABLE_POWER_OPTIMIZATION_EBB REG_BIT(9) #define GEN11_SAMPLER_ENABLE_HEADLESS_MSG REG_BIT(5) -#define GEN9_HALF_SLICE_CHICKEN7 _MMIO(0xe194) +#define GEN9_HALF_SLICE_CHICKEN7 MCR_REG(0xe194) #define DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA REG_BIT(15) #define GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR REG_BIT(8) #define GEN9_ENABLE_YV12_BUGFIX REG_BIT(4) #define GEN9_ENABLE_GPGPU_PREEMPTION REG_BIT(2) -#define GEN10_CACHE_MODE_SS _MMIO(0xe420) +#define GEN10_CACHE_MODE_SS MCR_REG(0xe420) #define ENABLE_EU_COUNT_FOR_TDL_FLUSH REG_BIT(10) #define DISABLE_ECC REG_BIT(5) #define FLOAT_BLEND_OPTIMIZATION_ENABLE REG_BIT(4) #define ENABLE_PREFETCH_INTO_IC REG_BIT(3) -#define EU_PERF_CNTL0 _MMIO(0xe458) -#define EU_PERF_CNTL4 _MMIO(0xe45c) +#define EU_PERF_CNTL0 PERF_REG(0xe458) +#define EU_PERF_CNTL4 PERF_REG(0xe45c) -#define GEN9_ROW_CHICKEN4 _MMIO(0xe48c) +#define GEN9_ROW_CHICKEN4 MCR_REG(0xe48c) #define GEN12_DISABLE_GRF_CLEAR REG_BIT(13) #define XEHP_DIS_BBL_SYSPIPE REG_BIT(11) #define GEN12_DISABLE_TDL_PUSH REG_BIT(9) @@ -1119,7 +1151,7 @@ #define HSW_ROW_CHICKEN3 _MMIO(0xe49c) #define HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE (1 << 6) -#define GEN8_ROW_CHICKEN _MMIO(0xe4f0) +#define GEN8_ROW_CHICKEN MCR_REG(0xe4f0) #define FLOW_CONTROL_ENABLE REG_BIT(15) #define UGM_BACKUP_MODE REG_BIT(13) #define MDQ_ARBITRATION_MODE REG_BIT(12) @@ -1130,42 +1162,43 @@ #define DISABLE_EARLY_EOT REG_BIT(1) #define GEN7_ROW_CHICKEN2 _MMIO(0xe4f4) + +#define GEN8_ROW_CHICKEN2 MCR_REG(0xe4f4) #define GEN12_DISABLE_READ_SUPPRESSION REG_BIT(15) #define GEN12_DISABLE_EARLY_READ REG_BIT(14) #define GEN12_ENABLE_LARGE_GRF_MODE REG_BIT(12) #define GEN12_PUSH_CONST_DEREF_HOLD_DIS REG_BIT(8) +#define GEN12_DISABLE_DOP_GATING REG_BIT(0) -#define RT_CTRL _MMIO(0xe530) +#define RT_CTRL MCR_REG(0xe530) #define DIS_NULL_QUERY REG_BIT(10) #define STACKID_CTRL REG_GENMASK(6, 5) #define STACKID_CTRL_512 REG_FIELD_PREP(STACKID_CTRL, 0x2) -#define EU_PERF_CNTL1 _MMIO(0xe558) -#define EU_PERF_CNTL5 _MMIO(0xe55c) +#define EU_PERF_CNTL1 PERF_REG(0xe558) +#define EU_PERF_CNTL5 PERF_REG(0xe55c) -#define GEN12_HDC_CHICKEN0 _MMIO(0xe5f0) +#define XEHP_HDC_CHICKEN0 MCR_REG(0xe5f0) #define LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK REG_GENMASK(13, 11) -#define ICL_HDC_MODE _MMIO(0xe5f4) +#define ICL_HDC_MODE MCR_REG(0xe5f4) -#define EU_PERF_CNTL2 _MMIO(0xe658) -#define EU_PERF_CNTL6 _MMIO(0xe65c) -#define EU_PERF_CNTL3 _MMIO(0xe758) +#define EU_PERF_CNTL2 PERF_REG(0xe658) +#define EU_PERF_CNTL6 PERF_REG(0xe65c) +#define EU_PERF_CNTL3 PERF_REG(0xe758) -#define LSC_CHICKEN_BIT_0 _MMIO(0xe7c8) +#define LSC_CHICKEN_BIT_0 MCR_REG(0xe7c8) #define DISABLE_D8_D16_COASLESCE REG_BIT(30) #define FORCE_1_SUB_MESSAGE_PER_FRAGMENT REG_BIT(15) -#define LSC_CHICKEN_BIT_0_UDW _MMIO(0xe7c8 + 4) +#define LSC_CHICKEN_BIT_0_UDW MCR_REG(0xe7c8 + 4) #define DIS_CHAIN_2XSIMD8 REG_BIT(55 - 32) #define FORCE_SLM_FENCE_SCOPE_TO_TILE REG_BIT(42 - 32) #define FORCE_UGM_FENCE_SCOPE_TO_TILE REG_BIT(41 - 32) #define MAXREQS_PER_BANK REG_GENMASK(39 - 32, 37 - 32) #define DISABLE_128B_EVICTION_COMMAND_UDW REG_BIT(36 - 32) -#define SARB_CHICKEN1 _MMIO(0xe90c) +#define SARB_CHICKEN1 MCR_REG(0xe90c) #define COMP_CKN_IN REG_GENMASK(30, 29) -#define GEN7_HALF_SLICE_CHICKEN1_GT2 _MMIO(0xf100) - #define GEN7_ROW_CHICKEN2_GT2 _MMIO(0xf4f4) #define DOP_CLOCK_GATING_DISABLE (1 << 0) #define PUSH_CONSTANT_DEREF_DISABLE (1 << 8) @@ -1513,6 +1546,9 @@ #define VLV_RENDER_C0_COUNT _MMIO(0x138118) #define VLV_MEDIA_C0_COUNT _MMIO(0x13811c) +#define GEN12_RPSTAT1 _MMIO(0x1381b4) +#define GEN12_VOLTAGE_MASK REG_GENMASK(10, 0) + #define GEN11_GT_INTR_DW(x) _MMIO(0x190018 + ((x) * 4)) #define GEN11_CSME (31) #define GEN11_GUNIT (28) @@ -1583,6 +1619,11 @@ #define GEN12_SFC_DONE(n) _MMIO(0x1cc000 + (n) * 0x1000) +#define GT0_PACKAGE_ENERGY_STATUS _MMIO(0x250004) +#define GT0_PACKAGE_RAPL_LIMIT _MMIO(0x250008) +#define GT0_PACKAGE_POWER_SKU_UNIT _MMIO(0x250068) +#define GT0_PLATFORM_ENERGY_STATUS _MMIO(0x25006c) + /* * Standalone Media's non-engine GT registers are located at their regular GT * offsets plus 0x380000. This extra offset is stored inside the intel_uncore diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c index d651ccd0ab20..9486dd3bed99 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c @@ -22,11 +22,9 @@ bool is_object_gt(struct kobject *kobj) return !strncmp(kobj->name, "gt", 2); } -struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev, +struct intel_gt *intel_gt_sysfs_get_drvdata(struct kobject *kobj, const char *name) { - struct kobject *kobj = &dev->kobj; - /* * We are interested at knowing from where the interface * has been called, whether it's called from gt/ or from @@ -38,6 +36,7 @@ struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev, * "struct drm_i915_private *" type. */ if (!is_object_gt(kobj)) { + struct device *dev = kobj_to_dev(kobj); struct drm_i915_private *i915 = kdev_minor_to_i915(dev); return to_gt(i915); @@ -51,18 +50,18 @@ static struct kobject *gt_get_parent_obj(struct intel_gt *gt) return >->i915->drm.primary->kdev->kobj; } -static ssize_t id_show(struct device *dev, - struct device_attribute *attr, +static ssize_t id_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) { - struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); return sysfs_emit(buf, "%u\n", gt->info.id); } -static DEVICE_ATTR_RO(id); +static struct kobj_attribute attr_id = __ATTR_RO(id); static struct attribute *id_attrs[] = { - &dev_attr_id.attr, + &attr_id.attr, NULL, }; ATTRIBUTE_GROUPS(id); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h index 6232923a420d..18bab835be02 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h @@ -18,11 +18,6 @@ bool is_object_gt(struct kobject *kobj); struct drm_i915_private *kobj_to_i915(struct kobject *kobj); -struct kobject * -intel_gt_create_kobj(struct intel_gt *gt, - struct kobject *dir, - const char *name); - static inline struct intel_gt *kobj_to_gt(struct kobject *kobj) { return container_of(kobj, struct intel_gt, sysfs_gt); @@ -30,7 +25,7 @@ static inline struct intel_gt *kobj_to_gt(struct kobject *kobj) void intel_gt_sysfs_register(struct intel_gt *gt); void intel_gt_sysfs_unregister(struct intel_gt *gt); -struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev, +struct intel_gt *intel_gt_sysfs_get_drvdata(struct kobject *kobj, const char *name); #endif /* SYSFS_GT_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c index 180dd6f3ef57..2b5f05b31187 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c @@ -24,14 +24,15 @@ enum intel_gt_sysfs_op { }; static int -sysfs_gt_attribute_w_func(struct device *dev, struct device_attribute *attr, +sysfs_gt_attribute_w_func(struct kobject *kobj, struct attribute *attr, int (func)(struct intel_gt *gt, u32 val), u32 val) { struct intel_gt *gt; int ret; - if (!is_object_gt(&dev->kobj)) { + if (!is_object_gt(kobj)) { int i; + struct device *dev = kobj_to_dev(kobj); struct drm_i915_private *i915 = kdev_minor_to_i915(dev); for_each_gt(gt, i915, i) { @@ -40,7 +41,7 @@ sysfs_gt_attribute_w_func(struct device *dev, struct device_attribute *attr, break; } } else { - gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); + gt = intel_gt_sysfs_get_drvdata(kobj, attr->name); ret = func(gt, val); } @@ -48,7 +49,7 @@ sysfs_gt_attribute_w_func(struct device *dev, struct device_attribute *attr, } static u32 -sysfs_gt_attribute_r_func(struct device *dev, struct device_attribute *attr, +sysfs_gt_attribute_r_func(struct kobject *kobj, struct attribute *attr, u32 (func)(struct intel_gt *gt), enum intel_gt_sysfs_op op) { @@ -57,8 +58,9 @@ sysfs_gt_attribute_r_func(struct device *dev, struct device_attribute *attr, ret = (op == INTEL_GT_SYSFS_MAX) ? 0 : (u32) -1; - if (!is_object_gt(&dev->kobj)) { + if (!is_object_gt(kobj)) { int i; + struct device *dev = kobj_to_dev(kobj); struct drm_i915_private *i915 = kdev_minor_to_i915(dev); for_each_gt(gt, i915, i) { @@ -77,7 +79,7 @@ sysfs_gt_attribute_r_func(struct device *dev, struct device_attribute *attr, } } } else { - gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); + gt = intel_gt_sysfs_get_drvdata(kobj, attr->name); ret = func(gt); } @@ -92,6 +94,76 @@ sysfs_gt_attribute_r_func(struct device *dev, struct device_attribute *attr, #define sysfs_gt_attribute_r_max_func(d, a, f) \ sysfs_gt_attribute_r_func(d, a, f, INTEL_GT_SYSFS_MAX) +#define INTEL_GT_SYSFS_SHOW(_name, _attr_type) \ + static ssize_t _name##_show_common(struct kobject *kobj, \ + struct attribute *attr, char *buff) \ + { \ + u32 val = sysfs_gt_attribute_r_##_attr_type##_func(kobj, attr, \ + __##_name##_show); \ + \ + return sysfs_emit(buff, "%u\n", val); \ + } \ + static ssize_t _name##_show(struct kobject *kobj, \ + struct kobj_attribute *attr, char *buff) \ + { \ + return _name ##_show_common(kobj, &attr->attr, buff); \ + } \ + static ssize_t _name##_dev_show(struct device *dev, \ + struct device_attribute *attr, char *buff) \ + { \ + return _name##_show_common(&dev->kobj, &attr->attr, buff); \ + } + +#define INTEL_GT_SYSFS_STORE(_name, _func) \ + static ssize_t _name##_store_common(struct kobject *kobj, \ + struct attribute *attr, \ + const char *buff, size_t count) \ + { \ + int ret; \ + u32 val; \ + \ + ret = kstrtou32(buff, 0, &val); \ + if (ret) \ + return ret; \ + \ + ret = sysfs_gt_attribute_w_func(kobj, attr, _func, val); \ + \ + return ret ?: count; \ + } \ + static ssize_t _name##_store(struct kobject *kobj, \ + struct kobj_attribute *attr, const char *buff, \ + size_t count) \ + { \ + return _name##_store_common(kobj, &attr->attr, buff, count); \ + } \ + static ssize_t _name##_dev_store(struct device *dev, \ + struct device_attribute *attr, \ + const char *buff, size_t count) \ + { \ + return _name##_store_common(&dev->kobj, &attr->attr, buff, count); \ + } + +#define INTEL_GT_SYSFS_SHOW_MAX(_name) INTEL_GT_SYSFS_SHOW(_name, max) +#define INTEL_GT_SYSFS_SHOW_MIN(_name) INTEL_GT_SYSFS_SHOW(_name, min) + +#define INTEL_GT_ATTR_RW(_name) \ + static struct kobj_attribute attr_##_name = __ATTR_RW(_name) + +#define INTEL_GT_ATTR_RO(_name) \ + static struct kobj_attribute attr_##_name = __ATTR_RO(_name) + +#define INTEL_GT_DUAL_ATTR_RW(_name) \ + static struct device_attribute dev_attr_##_name = __ATTR(_name, 0644, \ + _name##_dev_show, \ + _name##_dev_store); \ + INTEL_GT_ATTR_RW(_name) + +#define INTEL_GT_DUAL_ATTR_RO(_name) \ + static struct device_attribute dev_attr_##_name = __ATTR(_name, 0444, \ + _name##_dev_show, \ + NULL); \ + INTEL_GT_ATTR_RO(_name) + #ifdef CONFIG_PM static u32 get_residency(struct intel_gt *gt, i915_reg_t reg) { @@ -104,11 +176,8 @@ static u32 get_residency(struct intel_gt *gt, i915_reg_t reg) return DIV_ROUND_CLOSEST_ULL(res, 1000); } -static ssize_t rc6_enable_show(struct device *dev, - struct device_attribute *attr, - char *buff) +static u8 get_rc6_mask(struct intel_gt *gt) { - struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); u8 mask = 0; if (HAS_RC6(gt->i915)) @@ -118,37 +187,35 @@ static ssize_t rc6_enable_show(struct device *dev, if (HAS_RC6pp(gt->i915)) mask |= BIT(2); - return sysfs_emit(buff, "%x\n", mask); + return mask; } -static u32 __rc6_residency_ms_show(struct intel_gt *gt) +static ssize_t rc6_enable_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buff) { - return get_residency(gt, GEN6_GT_GFX_RC6); + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); + + return sysfs_emit(buff, "%x\n", get_rc6_mask(gt)); } -static ssize_t rc6_residency_ms_show(struct device *dev, - struct device_attribute *attr, - char *buff) +static ssize_t rc6_enable_dev_show(struct device *dev, + struct device_attribute *attr, + char *buff) { - u32 rc6_residency = sysfs_gt_attribute_r_min_func(dev, attr, - __rc6_residency_ms_show); + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(&dev->kobj, attr->attr.name); - return sysfs_emit(buff, "%u\n", rc6_residency); + return sysfs_emit(buff, "%x\n", get_rc6_mask(gt)); } -static u32 __rc6p_residency_ms_show(struct intel_gt *gt) +static u32 __rc6_residency_ms_show(struct intel_gt *gt) { - return get_residency(gt, GEN6_GT_GFX_RC6p); + return get_residency(gt, GEN6_GT_GFX_RC6); } -static ssize_t rc6p_residency_ms_show(struct device *dev, - struct device_attribute *attr, - char *buff) +static u32 __rc6p_residency_ms_show(struct intel_gt *gt) { - u32 rc6p_residency = sysfs_gt_attribute_r_min_func(dev, attr, - __rc6p_residency_ms_show); - - return sysfs_emit(buff, "%u\n", rc6p_residency); + return get_residency(gt, GEN6_GT_GFX_RC6p); } static u32 __rc6pp_residency_ms_show(struct intel_gt *gt) @@ -156,67 +223,69 @@ static u32 __rc6pp_residency_ms_show(struct intel_gt *gt) return get_residency(gt, GEN6_GT_GFX_RC6pp); } -static ssize_t rc6pp_residency_ms_show(struct device *dev, - struct device_attribute *attr, - char *buff) -{ - u32 rc6pp_residency = sysfs_gt_attribute_r_min_func(dev, attr, - __rc6pp_residency_ms_show); - - return sysfs_emit(buff, "%u\n", rc6pp_residency); -} - static u32 __media_rc6_residency_ms_show(struct intel_gt *gt) { return get_residency(gt, VLV_GT_MEDIA_RC6); } -static ssize_t media_rc6_residency_ms_show(struct device *dev, - struct device_attribute *attr, - char *buff) -{ - u32 rc6_residency = sysfs_gt_attribute_r_min_func(dev, attr, - __media_rc6_residency_ms_show); +INTEL_GT_SYSFS_SHOW_MIN(rc6_residency_ms); +INTEL_GT_SYSFS_SHOW_MIN(rc6p_residency_ms); +INTEL_GT_SYSFS_SHOW_MIN(rc6pp_residency_ms); +INTEL_GT_SYSFS_SHOW_MIN(media_rc6_residency_ms); - return sysfs_emit(buff, "%u\n", rc6_residency); -} - -static DEVICE_ATTR_RO(rc6_enable); -static DEVICE_ATTR_RO(rc6_residency_ms); -static DEVICE_ATTR_RO(rc6p_residency_ms); -static DEVICE_ATTR_RO(rc6pp_residency_ms); -static DEVICE_ATTR_RO(media_rc6_residency_ms); +INTEL_GT_DUAL_ATTR_RO(rc6_enable); +INTEL_GT_DUAL_ATTR_RO(rc6_residency_ms); +INTEL_GT_DUAL_ATTR_RO(rc6p_residency_ms); +INTEL_GT_DUAL_ATTR_RO(rc6pp_residency_ms); +INTEL_GT_DUAL_ATTR_RO(media_rc6_residency_ms); static struct attribute *rc6_attrs[] = { + &attr_rc6_enable.attr, + &attr_rc6_residency_ms.attr, + NULL +}; + +static struct attribute *rc6p_attrs[] = { + &attr_rc6p_residency_ms.attr, + &attr_rc6pp_residency_ms.attr, + NULL +}; + +static struct attribute *media_rc6_attrs[] = { + &attr_media_rc6_residency_ms.attr, + NULL +}; + +static struct attribute *rc6_dev_attrs[] = { &dev_attr_rc6_enable.attr, &dev_attr_rc6_residency_ms.attr, NULL }; -static struct attribute *rc6p_attrs[] = { +static struct attribute *rc6p_dev_attrs[] = { &dev_attr_rc6p_residency_ms.attr, &dev_attr_rc6pp_residency_ms.attr, NULL }; -static struct attribute *media_rc6_attrs[] = { +static struct attribute *media_rc6_dev_attrs[] = { &dev_attr_media_rc6_residency_ms.attr, NULL }; static const struct attribute_group rc6_attr_group[] = { { .attrs = rc6_attrs, }, - { .name = power_group_name, .attrs = rc6_attrs, }, + { .name = power_group_name, .attrs = rc6_dev_attrs, }, }; static const struct attribute_group rc6p_attr_group[] = { { .attrs = rc6p_attrs, }, - { .name = power_group_name, .attrs = rc6p_attrs, }, + { .name = power_group_name, .attrs = rc6p_dev_attrs, }, }; static const struct attribute_group media_rc6_attr_group[] = { { .attrs = media_rc6_attrs, }, - { .name = power_group_name, .attrs = media_rc6_attrs, }, + { .name = power_group_name, .attrs = media_rc6_dev_attrs, }, }; static int __intel_gt_sysfs_create_group(struct kobject *kobj, @@ -271,104 +340,34 @@ static u32 __act_freq_mhz_show(struct intel_gt *gt) return intel_rps_read_actual_frequency(>->rps); } -static ssize_t act_freq_mhz_show(struct device *dev, - struct device_attribute *attr, char *buff) -{ - u32 actual_freq = sysfs_gt_attribute_r_max_func(dev, attr, - __act_freq_mhz_show); - - return sysfs_emit(buff, "%u\n", actual_freq); -} - static u32 __cur_freq_mhz_show(struct intel_gt *gt) { return intel_rps_get_requested_frequency(>->rps); } -static ssize_t cur_freq_mhz_show(struct device *dev, - struct device_attribute *attr, char *buff) -{ - u32 cur_freq = sysfs_gt_attribute_r_max_func(dev, attr, - __cur_freq_mhz_show); - - return sysfs_emit(buff, "%u\n", cur_freq); -} - static u32 __boost_freq_mhz_show(struct intel_gt *gt) { return intel_rps_get_boost_frequency(>->rps); } -static ssize_t boost_freq_mhz_show(struct device *dev, - struct device_attribute *attr, - char *buff) -{ - u32 boost_freq = sysfs_gt_attribute_r_max_func(dev, attr, - __boost_freq_mhz_show); - - return sysfs_emit(buff, "%u\n", boost_freq); -} - static int __boost_freq_mhz_store(struct intel_gt *gt, u32 val) { return intel_rps_set_boost_frequency(>->rps, val); } -static ssize_t boost_freq_mhz_store(struct device *dev, - struct device_attribute *attr, - const char *buff, size_t count) -{ - ssize_t ret; - u32 val; - - ret = kstrtou32(buff, 0, &val); - if (ret) - return ret; - - return sysfs_gt_attribute_w_func(dev, attr, - __boost_freq_mhz_store, val) ?: count; -} - -static u32 __rp0_freq_mhz_show(struct intel_gt *gt) +static u32 __RP0_freq_mhz_show(struct intel_gt *gt) { return intel_rps_get_rp0_frequency(>->rps); } -static ssize_t RP0_freq_mhz_show(struct device *dev, - struct device_attribute *attr, char *buff) -{ - u32 rp0_freq = sysfs_gt_attribute_r_max_func(dev, attr, - __rp0_freq_mhz_show); - - return sysfs_emit(buff, "%u\n", rp0_freq); -} - -static u32 __rp1_freq_mhz_show(struct intel_gt *gt) -{ - return intel_rps_get_rp1_frequency(>->rps); -} - -static ssize_t RP1_freq_mhz_show(struct device *dev, - struct device_attribute *attr, char *buff) -{ - u32 rp1_freq = sysfs_gt_attribute_r_max_func(dev, attr, - __rp1_freq_mhz_show); - - return sysfs_emit(buff, "%u\n", rp1_freq); -} - -static u32 __rpn_freq_mhz_show(struct intel_gt *gt) +static u32 __RPn_freq_mhz_show(struct intel_gt *gt) { return intel_rps_get_rpn_frequency(>->rps); } -static ssize_t RPn_freq_mhz_show(struct device *dev, - struct device_attribute *attr, char *buff) +static u32 __RP1_freq_mhz_show(struct intel_gt *gt) { - u32 rpn_freq = sysfs_gt_attribute_r_max_func(dev, attr, - __rpn_freq_mhz_show); - - return sysfs_emit(buff, "%u\n", rpn_freq); + return intel_rps_get_rp1_frequency(>->rps); } static u32 __max_freq_mhz_show(struct intel_gt *gt) @@ -376,71 +375,21 @@ static u32 __max_freq_mhz_show(struct intel_gt *gt) return intel_rps_get_max_frequency(>->rps); } -static ssize_t max_freq_mhz_show(struct device *dev, - struct device_attribute *attr, char *buff) -{ - u32 max_freq = sysfs_gt_attribute_r_max_func(dev, attr, - __max_freq_mhz_show); - - return sysfs_emit(buff, "%u\n", max_freq); -} - static int __set_max_freq(struct intel_gt *gt, u32 val) { return intel_rps_set_max_frequency(>->rps, val); } -static ssize_t max_freq_mhz_store(struct device *dev, - struct device_attribute *attr, - const char *buff, size_t count) -{ - int ret; - u32 val; - - ret = kstrtou32(buff, 0, &val); - if (ret) - return ret; - - ret = sysfs_gt_attribute_w_func(dev, attr, __set_max_freq, val); - - return ret ?: count; -} - static u32 __min_freq_mhz_show(struct intel_gt *gt) { return intel_rps_get_min_frequency(>->rps); } -static ssize_t min_freq_mhz_show(struct device *dev, - struct device_attribute *attr, char *buff) -{ - u32 min_freq = sysfs_gt_attribute_r_min_func(dev, attr, - __min_freq_mhz_show); - - return sysfs_emit(buff, "%u\n", min_freq); -} - static int __set_min_freq(struct intel_gt *gt, u32 val) { return intel_rps_set_min_frequency(>->rps, val); } -static ssize_t min_freq_mhz_store(struct device *dev, - struct device_attribute *attr, - const char *buff, size_t count) -{ - int ret; - u32 val; - - ret = kstrtou32(buff, 0, &val); - if (ret) - return ret; - - ret = sysfs_gt_attribute_w_func(dev, attr, __set_min_freq, val); - - return ret ?: count; -} - static u32 __vlv_rpe_freq_mhz_show(struct intel_gt *gt) { struct intel_rps *rps = >->rps; @@ -448,23 +397,31 @@ static u32 __vlv_rpe_freq_mhz_show(struct intel_gt *gt) return intel_gpu_freq(rps, rps->efficient_freq); } -static ssize_t vlv_rpe_freq_mhz_show(struct device *dev, - struct device_attribute *attr, char *buff) -{ - u32 rpe_freq = sysfs_gt_attribute_r_max_func(dev, attr, - __vlv_rpe_freq_mhz_show); - - return sysfs_emit(buff, "%u\n", rpe_freq); -} - -#define INTEL_GT_RPS_SYSFS_ATTR(_name, _mode, _show, _store) \ - static struct device_attribute dev_attr_gt_##_name = __ATTR(gt_##_name, _mode, _show, _store); \ - static struct device_attribute dev_attr_rps_##_name = __ATTR(rps_##_name, _mode, _show, _store) - -#define INTEL_GT_RPS_SYSFS_ATTR_RO(_name) \ - INTEL_GT_RPS_SYSFS_ATTR(_name, 0444, _name##_show, NULL) -#define INTEL_GT_RPS_SYSFS_ATTR_RW(_name) \ - INTEL_GT_RPS_SYSFS_ATTR(_name, 0644, _name##_show, _name##_store) +INTEL_GT_SYSFS_SHOW_MAX(act_freq_mhz); +INTEL_GT_SYSFS_SHOW_MAX(boost_freq_mhz); +INTEL_GT_SYSFS_SHOW_MAX(cur_freq_mhz); +INTEL_GT_SYSFS_SHOW_MAX(RP0_freq_mhz); +INTEL_GT_SYSFS_SHOW_MAX(RP1_freq_mhz); +INTEL_GT_SYSFS_SHOW_MAX(RPn_freq_mhz); +INTEL_GT_SYSFS_SHOW_MAX(max_freq_mhz); +INTEL_GT_SYSFS_SHOW_MIN(min_freq_mhz); +INTEL_GT_SYSFS_SHOW_MAX(vlv_rpe_freq_mhz); +INTEL_GT_SYSFS_STORE(boost_freq_mhz, __boost_freq_mhz_store); +INTEL_GT_SYSFS_STORE(max_freq_mhz, __set_max_freq); +INTEL_GT_SYSFS_STORE(min_freq_mhz, __set_min_freq); + +#define INTEL_GT_RPS_SYSFS_ATTR(_name, _mode, _show, _store, _show_dev, _store_dev) \ + static struct device_attribute dev_attr_gt_##_name = __ATTR(gt_##_name, _mode, \ + _show_dev, _store_dev); \ + static struct kobj_attribute attr_rps_##_name = __ATTR(rps_##_name, _mode, \ + _show, _store) + +#define INTEL_GT_RPS_SYSFS_ATTR_RO(_name) \ + INTEL_GT_RPS_SYSFS_ATTR(_name, 0444, _name##_show, NULL, \ + _name##_dev_show, NULL) +#define INTEL_GT_RPS_SYSFS_ATTR_RW(_name) \ + INTEL_GT_RPS_SYSFS_ATTR(_name, 0644, _name##_show, _name##_store, \ + _name##_dev_show, _name##_dev_store) /* The below macros generate static structures */ INTEL_GT_RPS_SYSFS_ATTR_RO(act_freq_mhz); @@ -475,32 +432,31 @@ INTEL_GT_RPS_SYSFS_ATTR_RO(RP1_freq_mhz); INTEL_GT_RPS_SYSFS_ATTR_RO(RPn_freq_mhz); INTEL_GT_RPS_SYSFS_ATTR_RW(max_freq_mhz); INTEL_GT_RPS_SYSFS_ATTR_RW(min_freq_mhz); - -static DEVICE_ATTR_RO(vlv_rpe_freq_mhz); - -#define GEN6_ATTR(s) { \ - &dev_attr_##s##_act_freq_mhz.attr, \ - &dev_attr_##s##_cur_freq_mhz.attr, \ - &dev_attr_##s##_boost_freq_mhz.attr, \ - &dev_attr_##s##_max_freq_mhz.attr, \ - &dev_attr_##s##_min_freq_mhz.attr, \ - &dev_attr_##s##_RP0_freq_mhz.attr, \ - &dev_attr_##s##_RP1_freq_mhz.attr, \ - &dev_attr_##s##_RPn_freq_mhz.attr, \ +INTEL_GT_RPS_SYSFS_ATTR_RO(vlv_rpe_freq_mhz); + +#define GEN6_ATTR(p, s) { \ + &p##attr_##s##_act_freq_mhz.attr, \ + &p##attr_##s##_cur_freq_mhz.attr, \ + &p##attr_##s##_boost_freq_mhz.attr, \ + &p##attr_##s##_max_freq_mhz.attr, \ + &p##attr_##s##_min_freq_mhz.attr, \ + &p##attr_##s##_RP0_freq_mhz.attr, \ + &p##attr_##s##_RP1_freq_mhz.attr, \ + &p##attr_##s##_RPn_freq_mhz.attr, \ NULL, \ } -#define GEN6_RPS_ATTR GEN6_ATTR(rps) -#define GEN6_GT_ATTR GEN6_ATTR(gt) +#define GEN6_RPS_ATTR GEN6_ATTR(, rps) +#define GEN6_GT_ATTR GEN6_ATTR(dev_, gt) static const struct attribute * const gen6_rps_attrs[] = GEN6_RPS_ATTR; static const struct attribute * const gen6_gt_attrs[] = GEN6_GT_ATTR; -static ssize_t punit_req_freq_mhz_show(struct device *dev, - struct device_attribute *attr, +static ssize_t punit_req_freq_mhz_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { - struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); u32 preq = intel_rps_read_punit_req_frequency(>->rps); return sysfs_emit(buff, "%u\n", preq); @@ -508,20 +464,20 @@ static ssize_t punit_req_freq_mhz_show(struct device *dev, struct intel_gt_bool_throttle_attr { struct attribute attr; - ssize_t (*show)(struct device *dev, struct device_attribute *attr, + ssize_t (*show)(struct kobject *kobj, struct kobj_attribute *attr, char *buf); - i915_reg_t reg32; + i915_reg_t (*reg32)(struct intel_gt *gt); u32 mask; }; -static ssize_t throttle_reason_bool_show(struct device *dev, - struct device_attribute *attr, +static ssize_t throttle_reason_bool_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { - struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); struct intel_gt_bool_throttle_attr *t_attr = (struct intel_gt_bool_throttle_attr *) attr; - bool val = rps_read_mask_mmio(>->rps, t_attr->reg32, t_attr->mask); + bool val = rps_read_mask_mmio(>->rps, t_attr->reg32(gt), t_attr->mask); return sysfs_emit(buff, "%u\n", val); } @@ -530,11 +486,11 @@ static ssize_t throttle_reason_bool_show(struct device *dev, struct intel_gt_bool_throttle_attr attr_##sysfs_func__ = { \ .attr = { .name = __stringify(sysfs_func__), .mode = 0444 }, \ .show = throttle_reason_bool_show, \ - .reg32 = GT0_PERF_LIMIT_REASONS, \ + .reg32 = intel_gt_perf_limit_reasons_reg, \ .mask = mask__, \ } -static DEVICE_ATTR_RO(punit_req_freq_mhz); +INTEL_GT_ATTR_RO(punit_req_freq_mhz); static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_status, GT0_PERF_LIMIT_REASONS_MASK); static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_pl1, POWER_LIMIT_1_MASK); static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_pl2, POWER_LIMIT_2_MASK); @@ -597,8 +553,8 @@ static const struct attribute *throttle_reason_attrs[] = { #define U8_8_VAL_MASK 0xffff #define U8_8_SCALE_TO_VALUE "0.00390625" -static ssize_t freq_factor_scale_show(struct device *dev, - struct device_attribute *attr, +static ssize_t freq_factor_scale_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { return sysfs_emit(buff, "%s\n", U8_8_SCALE_TO_VALUE); @@ -610,11 +566,11 @@ static u32 media_ratio_mode_to_factor(u32 mode) return !mode ? mode : 256 / mode; } -static ssize_t media_freq_factor_show(struct device *dev, - struct device_attribute *attr, +static ssize_t media_freq_factor_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { - struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); struct intel_guc_slpc *slpc = >->uc.guc.slpc; intel_wakeref_t wakeref; u32 mode; @@ -641,11 +597,11 @@ static ssize_t media_freq_factor_show(struct device *dev, return sysfs_emit(buff, "%u\n", media_ratio_mode_to_factor(mode)); } -static ssize_t media_freq_factor_store(struct device *dev, - struct device_attribute *attr, +static ssize_t media_freq_factor_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buff, size_t count) { - struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); struct intel_guc_slpc *slpc = >->uc.guc.slpc; u32 factor, mode; int err; @@ -670,11 +626,11 @@ static ssize_t media_freq_factor_store(struct device *dev, return err ?: count; } -static ssize_t media_RP0_freq_mhz_show(struct device *dev, - struct device_attribute *attr, +static ssize_t media_RP0_freq_mhz_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { - struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); u32 val; int err; @@ -691,11 +647,11 @@ static ssize_t media_RP0_freq_mhz_show(struct device *dev, return sysfs_emit(buff, "%u\n", val); } -static ssize_t media_RPn_freq_mhz_show(struct device *dev, - struct device_attribute *attr, +static ssize_t media_RPn_freq_mhz_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { - struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); u32 val; int err; @@ -712,17 +668,17 @@ static ssize_t media_RPn_freq_mhz_show(struct device *dev, return sysfs_emit(buff, "%u\n", val); } -static DEVICE_ATTR_RW(media_freq_factor); -static struct device_attribute dev_attr_media_freq_factor_scale = +INTEL_GT_ATTR_RW(media_freq_factor); +static struct kobj_attribute attr_media_freq_factor_scale = __ATTR(media_freq_factor.scale, 0444, freq_factor_scale_show, NULL); -static DEVICE_ATTR_RO(media_RP0_freq_mhz); -static DEVICE_ATTR_RO(media_RPn_freq_mhz); +INTEL_GT_ATTR_RO(media_RP0_freq_mhz); +INTEL_GT_ATTR_RO(media_RPn_freq_mhz); static const struct attribute *media_perf_power_attrs[] = { - &dev_attr_media_freq_factor.attr, - &dev_attr_media_freq_factor_scale.attr, - &dev_attr_media_RP0_freq_mhz.attr, - &dev_attr_media_RPn_freq_mhz.attr, + &attr_media_freq_factor.attr, + &attr_media_freq_factor_scale.attr, + &attr_media_RP0_freq_mhz.attr, + &attr_media_RPn_freq_mhz.attr, NULL }; @@ -754,20 +710,29 @@ static const struct attribute * const rps_defaults_attrs[] = { NULL }; -static int intel_sysfs_rps_init(struct intel_gt *gt, struct kobject *kobj, - const struct attribute * const *attrs) +static int intel_sysfs_rps_init(struct intel_gt *gt, struct kobject *kobj) { + const struct attribute * const *attrs; + struct attribute *vlv_attr; int ret; if (GRAPHICS_VER(gt->i915) < 6) return 0; + if (is_object_gt(kobj)) { + attrs = gen6_rps_attrs; + vlv_attr = &attr_rps_vlv_rpe_freq_mhz.attr; + } else { + attrs = gen6_gt_attrs; + vlv_attr = &dev_attr_gt_vlv_rpe_freq_mhz.attr; + } + ret = sysfs_create_files(kobj, attrs); if (ret) return ret; if (IS_VALLEYVIEW(gt->i915) || IS_CHERRYVIEW(gt->i915)) - ret = sysfs_create_file(kobj, &dev_attr_vlv_rpe_freq_mhz.attr); + ret = sysfs_create_file(kobj, vlv_attr); return ret; } @@ -778,9 +743,7 @@ void intel_gt_sysfs_pm_init(struct intel_gt *gt, struct kobject *kobj) intel_sysfs_rc6_init(gt, kobj); - ret = is_object_gt(kobj) ? - intel_sysfs_rps_init(gt, kobj, gen6_rps_attrs) : - intel_sysfs_rps_init(gt, kobj, gen6_gt_attrs); + ret = intel_sysfs_rps_init(gt, kobj); if (ret) drm_warn(>->i915->drm, "failed to create gt%u RPS sysfs files (%pe)", @@ -790,13 +753,13 @@ void intel_gt_sysfs_pm_init(struct intel_gt *gt, struct kobject *kobj) if (!is_object_gt(kobj)) return; - ret = sysfs_create_file(kobj, &dev_attr_punit_req_freq_mhz.attr); + ret = sysfs_create_file(kobj, &attr_punit_req_freq_mhz.attr); if (ret) drm_warn(>->i915->drm, "failed to create gt%u punit_req_freq_mhz sysfs (%pe)", gt->info.id, ERR_PTR(ret)); - if (GRAPHICS_VER(gt->i915) >= 11) { + if (i915_mmio_reg_valid(intel_gt_perf_limit_reasons_reg(gt))) { ret = sysfs_create_files(kobj, throttle_reason_attrs); if (ret) drm_warn(>->i915->drm, diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index f19c2de77ff6..a0cc73b401ef 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -20,6 +20,7 @@ #include "intel_gsc.h" #include "i915_vma.h" +#include "i915_perf_types.h" #include "intel_engine_types.h" #include "intel_gt_buffer_pool_types.h" #include "intel_hwconfig.h" @@ -59,6 +60,9 @@ enum intel_steering_type { L3BANK, MSLICE, LNCF, + GAM, + DSS, + OADDRM, /* * On some platforms there are multiple types of MCR registers that @@ -141,20 +145,6 @@ struct intel_gt { struct intel_wakeref wakeref; atomic_t user_wakeref; - /** - * Protects access to lmem usefault list. - * It is required, if we are outside of the runtime suspend path, - * access to @lmem_userfault_list requires always first grabbing the - * runtime pm, to ensure we can't race against runtime suspend. - * Once we have that we also need to grab @lmem_userfault_lock, - * at which point we have exclusive access. - * The runtime suspend path is special since it doesn't really hold any locks, - * but instead has exclusive access by virtue of all other accesses requiring - * holding the runtime pm wakeref. - */ - struct mutex lmem_userfault_lock; - struct list_head lmem_userfault_list; - struct list_head closed_vma; spinlock_t closed_lock; /* guards the list of closed_vma */ @@ -170,9 +160,6 @@ struct intel_gt { */ intel_wakeref_t awake; - /* Manual runtime pm autosuspend delay for user GGTT/lmem mmaps */ - struct intel_wakeref_auto userfault_wakeref; - u32 clock_frequency; u32 clock_period_ns; @@ -286,6 +273,8 @@ struct intel_gt { /* sysfs defaults per gt */ struct gt_defaults defaults; struct kobject *sysfs_defaults; + + struct i915_perf_gt perf; }; struct intel_gt_definition { diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index 2eaeba14319e..e82a9d763e57 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -15,6 +15,7 @@ #include "i915_trace.h" #include "i915_utils.h" #include "intel_gt.h" +#include "intel_gt_mcr.h" #include "intel_gt_regs.h" #include "intel_gtt.h" @@ -269,11 +270,7 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass) memset64(vm->min_alignment, I915_GTT_MIN_ALIGNMENT, ARRAY_SIZE(vm->min_alignment)); - if (HAS_64K_PAGES(vm->i915) && NEEDS_COMPACT_PT(vm->i915) && - subclass == VM_CLASS_PPGTT) { - vm->min_alignment[INTEL_MEMORY_LOCAL] = I915_GTT_PAGE_SIZE_2M; - vm->min_alignment[INTEL_MEMORY_STOLEN_LOCAL] = I915_GTT_PAGE_SIZE_2M; - } else if (HAS_64K_PAGES(vm->i915)) { + if (HAS_64K_PAGES(vm->i915)) { vm->min_alignment[INTEL_MEMORY_LOCAL] = I915_GTT_PAGE_SIZE_64K; vm->min_alignment[INTEL_MEMORY_STOLEN_LOCAL] = I915_GTT_PAGE_SIZE_64K; } @@ -343,7 +340,8 @@ int setup_scratch_page(struct i915_address_space *vm) */ size = I915_GTT_PAGE_SIZE_4K; if (i915_vm_is_4lvl(vm) && - HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) + HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K) && + !HAS_64K_PAGES(vm->i915)) size = I915_GTT_PAGE_SIZE_64K; do { @@ -385,18 +383,6 @@ skip: if (size == I915_GTT_PAGE_SIZE_4K) return -ENOMEM; - /* - * If we need 64K minimum GTT pages for device local-memory, - * like on XEHPSDV, then we need to fail the allocation here, - * otherwise we can't safely support the insertion of - * local-memory pages for this vm, since the HW expects the - * correct physical alignment and size when the page-table is - * operating in 64K GTT mode, which includes any scratch PTEs, - * since userspace can still touch them. - */ - if (HAS_64K_PAGES(vm->i915)) - return -ENOMEM; - size = I915_GTT_PAGE_SIZE_4K; } while (1); } @@ -493,6 +479,18 @@ static void tgl_setup_private_ppat(struct intel_uncore *uncore) intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB); } +static void xehp_setup_private_ppat(struct intel_gt *gt) +{ + intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(0), GEN8_PPAT_WB); + intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(1), GEN8_PPAT_WC); + intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(2), GEN8_PPAT_WT); + intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(3), GEN8_PPAT_UC); + intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(4), GEN8_PPAT_WB); + intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(5), GEN8_PPAT_WB); + intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(6), GEN8_PPAT_WB); + intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(7), GEN8_PPAT_WB); +} + static void icl_setup_private_ppat(struct intel_uncore *uncore) { intel_uncore_write(uncore, @@ -585,13 +583,16 @@ static void chv_setup_private_ppat(struct intel_uncore *uncore) intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); } -void setup_private_pat(struct intel_uncore *uncore) +void setup_private_pat(struct intel_gt *gt) { - struct drm_i915_private *i915 = uncore->i915; + struct intel_uncore *uncore = gt->uncore; + struct drm_i915_private *i915 = gt->i915; GEM_BUG_ON(GRAPHICS_VER(i915) < 8); - if (GRAPHICS_VER(i915) >= 12) + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) + xehp_setup_private_ppat(gt); + else if (GRAPHICS_VER(i915) >= 12) tgl_setup_private_ppat(uncore); else if (GRAPHICS_VER(i915) >= 11) icl_setup_private_ppat(uncore); diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index c0ca53cba9f0..4d75ba4bb41d 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -93,6 +93,7 @@ typedef u64 gen8_pte_t; #define GEN12_GGTT_PTE_LM BIT_ULL(1) #define GEN12_PDE_64K BIT(6) +#define GEN12_PTE_PS64 BIT(8) /* * Cacheability Control is a 4-bit value. The low three bits are stored in bits @@ -667,7 +668,7 @@ void ppgtt_unbind_vma(struct i915_address_space *vm, void gtt_write_workarounds(struct intel_gt *gt); -void setup_private_pat(struct intel_uncore *uncore); +void setup_private_pat(struct intel_gt *gt); int i915_vm_alloc_pt_stash(struct i915_address_space *vm, struct i915_vm_pt_stash *stash, diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 3955292483a6..7771a19008c6 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -20,6 +20,30 @@ #include "intel_ring.h" #include "shmem_utils.h" +/* + * The per-platform tables are u8-encoded in @data. Decode @data and set the + * addresses' offset and commands in @regs. The following encoding is used + * for each byte. There are 2 steps: decoding commands and decoding addresses. + * + * Commands: + * [7]: create NOPs - number of NOPs are set in lower bits + * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set + * MI_LRI_FORCE_POSTED + * [5:0]: Number of NOPs or registers to set values to in case of + * MI_LOAD_REGISTER_IMM + * + * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count" + * number of registers. They are set by using the REG/REG16 macros: the former + * is used for offsets smaller than 0x200 while the latter is for values bigger + * than that. Those macros already set all the bits documented below correctly: + * + * [7]: When a register offset needs more than 6 bits, use additional bytes, to + * follow, for the lower bits + * [6:0]: Register offset, without considering the engine base. + * + * This function only tweaks the commands and register offsets. Values are not + * filled out. + */ static void set_offsets(u32 *regs, const u8 *data, const struct intel_engine_cs *engine, @@ -264,6 +288,39 @@ static const u8 dg2_xcs_offsets[] = { END }; +static const u8 mtl_xcs_offsets[] = { + NOP(1), + LRI(13, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + NOP(4), + + NOP(1), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + END +}; + static const u8 gen8_rcs_offsets[] = { NOP(1), LRI(14, POSTED), @@ -606,6 +663,49 @@ static const u8 dg2_rcs_offsets[] = { END }; +static const u8 mtl_rcs_offsets[] = { + NOP(1), + LRI(15, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + REG(0x120), + REG(0x124), + + NOP(1), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + NOP(2), + LRI(2, POSTED), + REG16(0x5a8), + REG16(0x5ac), + + NOP(6), + LRI(1, 0), + REG(0x0c8), + + END +}; + #undef END #undef REG16 #undef REG @@ -624,7 +724,9 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine) !intel_engine_has_relative_mmio(engine)); if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) { - if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 70)) + return mtl_rcs_offsets; + else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) return dg2_rcs_offsets; else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) return xehp_rcs_offsets; @@ -637,7 +739,9 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine) else return gen8_rcs_offsets; } else { - if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 70)) + return mtl_xcs_offsets; + else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) return dg2_xcs_offsets; else if (GRAPHICS_VER(engine->i915) >= 12) return gen12_xcs_offsets; @@ -745,19 +849,18 @@ static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine) static u32 lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine) { - switch (GRAPHICS_VER(engine->i915)) { - default: - MISSING_CASE(GRAPHICS_VER(engine->i915)); - fallthrough; - case 12: + if (GRAPHICS_VER(engine->i915) >= 12) return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; - case 11: + else if (GRAPHICS_VER(engine->i915) >= 11) return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; - case 9: + else if (GRAPHICS_VER(engine->i915) >= 9) return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; - case 8: + else if (GRAPHICS_VER(engine->i915) >= 8) return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; - } + + GEM_BUG_ON(GRAPHICS_VER(engine->i915) < 8); + + return 0; } static void @@ -1012,7 +1115,7 @@ __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine) if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) context_size += I915_GTT_PAGE_SIZE; /* for redzone */ - if (GRAPHICS_VER(engine->i915) == 12) { + if (GRAPHICS_VER(engine->i915) >= 12) { ce->wa_bb_page = context_size / PAGE_SIZE; context_size += PAGE_SIZE; } @@ -1718,24 +1821,16 @@ void lrc_init_wa_ctx(struct intel_engine_cs *engine) unsigned int i; int err; - if (!(engine->flags & I915_ENGINE_HAS_RCS_REG_STATE)) + if (GRAPHICS_VER(engine->i915) >= 11 || + !(engine->flags & I915_ENGINE_HAS_RCS_REG_STATE)) return; - switch (GRAPHICS_VER(engine->i915)) { - case 12: - case 11: - return; - case 9: + if (GRAPHICS_VER(engine->i915) == 9) { wa_bb_fn[0] = gen9_init_indirectctx_bb; wa_bb_fn[1] = NULL; - break; - case 8: + } else if (GRAPHICS_VER(engine->i915) == 8) { wa_bb_fn[0] = gen8_init_indirectctx_bb; wa_bb_fn[1] = NULL; - break; - default: - MISSING_CASE(GRAPHICS_VER(engine->i915)); - return; } err = lrc_create_wa_ctx(engine); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h index a390f0813c8b..7111bae759f3 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -110,6 +110,8 @@ enum { #define XEHP_SW_CTX_ID_WIDTH 16 #define XEHP_SW_COUNTER_SHIFT 58 #define XEHP_SW_COUNTER_WIDTH 6 +#define GEN12_GUC_SW_CTX_ID_SHIFT 39 +#define GEN12_GUC_SW_CTX_ID_WIDTH 16 static inline void lrc_runtime_start(struct intel_context *ce) { diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c index aaaf1906026c..b405a04135ca 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.c +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c @@ -10,6 +10,7 @@ #include "intel_gtt.h" #include "intel_migrate.h" #include "intel_ring.h" +#include "gem/i915_gem_lmem.h" struct insert_pte_data { u64 offset; diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 152244d7f62a..49fdd509527a 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -7,6 +7,7 @@ #include "intel_engine.h" #include "intel_gt.h" +#include "intel_gt_mcr.h" #include "intel_gt_regs.h" #include "intel_mocs.h" #include "intel_ring.h" @@ -609,14 +610,17 @@ static u32 l3cc_combine(u16 low, u16 high) 0; \ i++) -static void init_l3cc_table(struct intel_uncore *uncore, +static void init_l3cc_table(struct intel_gt *gt, const struct drm_i915_mocs_table *table) { unsigned int i; u32 l3cc; for_each_l3cc(l3cc, table, i) - intel_uncore_write_fw(uncore, GEN9_LNCFCMOCS(i), l3cc); + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) + intel_gt_mcr_multicast_write_fw(gt, XEHP_LNCFCMOCS(i), l3cc); + else + intel_uncore_write_fw(gt->uncore, GEN9_LNCFCMOCS(i), l3cc); } void intel_mocs_init_engine(struct intel_engine_cs *engine) @@ -636,7 +640,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine) init_mocs_table(engine, &table); if (flags & HAS_RENDER_L3CC && engine->class == RENDER_CLASS) - init_l3cc_table(engine->uncore, &table); + init_l3cc_table(engine->gt, &table); } static u32 global_mocs_offset(void) @@ -672,7 +676,7 @@ void intel_mocs_init(struct intel_gt *gt) * memory transactions including guc transactions */ if (flags & HAS_RENDER_L3CC) - init_l3cc_table(gt->uncore, &table); + init_l3cc_table(gt, &table); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index b36674356986..3159df6cdd49 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -1278,7 +1278,7 @@ static void intel_gt_reset_global(struct intel_gt *gt, kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event); /* Use a watchdog to ensure that our reset completes */ - intel_wedge_on_timeout(&w, gt, 5 * HZ) { + intel_wedge_on_timeout(&w, gt, 60 * HZ) { intel_display_prepare_reset(gt->i915); intel_gt_reset(gt, engine_mask, reason); diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 6b86250c31ab..6c34a83c24b3 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -625,9 +625,7 @@ static void gen5_rps_disable(struct intel_rps *rps) rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); /* Ack interrupts, disable EFC interrupt */ - intel_uncore_write(uncore, MEMINTREN, - intel_uncore_read(uncore, MEMINTREN) & - ~MEMINT_EVAL_CHG_EN); + intel_uncore_rmw(uncore, MEMINTREN, MEMINT_EVAL_CHG_EN, 0); intel_uncore_write(uncore, MEMINTRSTS, MEMINT_EVAL_CHG); /* Go back to the starting frequency */ @@ -1016,9 +1014,15 @@ void intel_rps_boost(struct i915_request *rq) if (rps_uses_slpc(rps)) { slpc = rps_to_slpc(rps); + if (slpc->min_freq_softlimit >= slpc->boost_freq) + return; + /* Return if old value is non zero */ - if (!atomic_fetch_inc(&slpc->num_waiters)) + if (!atomic_fetch_inc(&slpc->num_waiters)) { + GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n", + rq->fence.context, rq->fence.seqno); schedule_work(&slpc->boost_work); + } return; } @@ -1085,15 +1089,25 @@ static u32 intel_rps_read_state_cap(struct intel_rps *rps) return intel_uncore_read(uncore, GEN6_RP_STATE_CAP); } -/** - * gen6_rps_get_freq_caps - Get freq caps exposed by HW - * @rps: the intel_rps structure - * @caps: returned freq caps - * - * Returned "caps" frequencies should be converted to MHz using - * intel_gpu_freq() - */ -void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps) +static void +mtl_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + u32 rp_state_cap = rps_to_gt(rps)->type == GT_MEDIA ? + intel_uncore_read(uncore, MTL_MEDIAP_STATE_CAP) : + intel_uncore_read(uncore, MTL_RP_STATE_CAP); + u32 rpe = rps_to_gt(rps)->type == GT_MEDIA ? + intel_uncore_read(uncore, MTL_MPE_FREQUENCY) : + intel_uncore_read(uncore, MTL_GT_RPE_FREQUENCY); + + /* MTL values are in units of 16.67 MHz */ + caps->rp0_freq = REG_FIELD_GET(MTL_RP0_CAP_MASK, rp_state_cap); + caps->min_freq = REG_FIELD_GET(MTL_RPN_CAP_MASK, rp_state_cap); + caps->rp1_freq = REG_FIELD_GET(MTL_RPE_MASK, rpe); +} + +static void +__gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps) { struct drm_i915_private *i915 = rps_to_i915(rps); u32 rp_state_cap; @@ -1128,6 +1142,24 @@ void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *c } } +/** + * gen6_rps_get_freq_caps - Get freq caps exposed by HW + * @rps: the intel_rps structure + * @caps: returned freq caps + * + * Returned "caps" frequencies should be converted to MHz using + * intel_gpu_freq() + */ +void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + + if (IS_METEORLAKE(i915)) + return mtl_get_freq_caps(rps, caps); + else + return __gen6_rps_get_freq_caps(rps, caps); +} + static void gen6_rps_init(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); @@ -2191,6 +2223,213 @@ u32 intel_rps_get_rpn_frequency(struct intel_rps *rps) return intel_gpu_freq(rps, rps->min_freq); } +static void rps_frequency_dump(struct intel_rps *rps, struct drm_printer *p) +{ + struct intel_gt *gt = rps_to_gt(rps); + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; + struct intel_rps_freq_caps caps; + u32 rp_state_limits; + u32 gt_perf_status; + u32 rpmodectl, rpinclimit, rpdeclimit; + u32 rpstat, cagf, reqf; + u32 rpcurupei, rpcurup, rpprevup; + u32 rpcurdownei, rpcurdown, rpprevdown; + u32 rpupei, rpupt, rpdownei, rpdownt; + u32 pm_ier, pm_imr, pm_isr, pm_iir, pm_mask; + + rp_state_limits = intel_uncore_read(uncore, GEN6_RP_STATE_LIMITS); + gen6_rps_get_freq_caps(rps, &caps); + if (IS_GEN9_LP(i915)) + gt_perf_status = intel_uncore_read(uncore, BXT_GT_PERF_STATUS); + else + gt_perf_status = intel_uncore_read(uncore, GEN6_GT_PERF_STATUS); + + /* RPSTAT1 is in the GT power well */ + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + + reqf = intel_uncore_read(uncore, GEN6_RPNSWREQ); + if (GRAPHICS_VER(i915) >= 9) { + reqf >>= 23; + } else { + reqf &= ~GEN6_TURBO_DISABLE; + if (IS_HASWELL(i915) || IS_BROADWELL(i915)) + reqf >>= 24; + else + reqf >>= 25; + } + reqf = intel_gpu_freq(rps, reqf); + + rpmodectl = intel_uncore_read(uncore, GEN6_RP_CONTROL); + rpinclimit = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD); + rpdeclimit = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD); + + rpstat = intel_uncore_read(uncore, GEN6_RPSTAT1); + rpcurupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK; + rpcurup = intel_uncore_read(uncore, GEN6_RP_CUR_UP) & GEN6_CURBSYTAVG_MASK; + rpprevup = intel_uncore_read(uncore, GEN6_RP_PREV_UP) & GEN6_CURBSYTAVG_MASK; + rpcurdownei = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK; + rpcurdown = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK; + rpprevdown = intel_uncore_read(uncore, GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK; + + rpupei = intel_uncore_read(uncore, GEN6_RP_UP_EI); + rpupt = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD); + + rpdownei = intel_uncore_read(uncore, GEN6_RP_DOWN_EI); + rpdownt = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD); + + cagf = intel_rps_read_actual_frequency(rps); + + intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); + + if (GRAPHICS_VER(i915) >= 11) { + pm_ier = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE); + pm_imr = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK); + /* + * The equivalent to the PM ISR & IIR cannot be read + * without affecting the current state of the system + */ + pm_isr = 0; + pm_iir = 0; + } else if (GRAPHICS_VER(i915) >= 8) { + pm_ier = intel_uncore_read(uncore, GEN8_GT_IER(2)); + pm_imr = intel_uncore_read(uncore, GEN8_GT_IMR(2)); + pm_isr = intel_uncore_read(uncore, GEN8_GT_ISR(2)); + pm_iir = intel_uncore_read(uncore, GEN8_GT_IIR(2)); + } else { + pm_ier = intel_uncore_read(uncore, GEN6_PMIER); + pm_imr = intel_uncore_read(uncore, GEN6_PMIMR); + pm_isr = intel_uncore_read(uncore, GEN6_PMISR); + pm_iir = intel_uncore_read(uncore, GEN6_PMIIR); + } + pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK); + + drm_printf(p, "Video Turbo Mode: %s\n", + str_yes_no(rpmodectl & GEN6_RP_MEDIA_TURBO)); + drm_printf(p, "HW control enabled: %s\n", + str_yes_no(rpmodectl & GEN6_RP_ENABLE)); + drm_printf(p, "SW control enabled: %s\n", + str_yes_no((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == GEN6_RP_MEDIA_SW_MODE)); + + drm_printf(p, "PM IER=0x%08x IMR=0x%08x, MASK=0x%08x\n", + pm_ier, pm_imr, pm_mask); + if (GRAPHICS_VER(i915) <= 10) + drm_printf(p, "PM ISR=0x%08x IIR=0x%08x\n", + pm_isr, pm_iir); + drm_printf(p, "pm_intrmsk_mbz: 0x%08x\n", + rps->pm_intrmsk_mbz); + drm_printf(p, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status); + drm_printf(p, "Render p-state ratio: %d\n", + (gt_perf_status & (GRAPHICS_VER(i915) >= 9 ? 0x1ff00 : 0xff00)) >> 8); + drm_printf(p, "Render p-state VID: %d\n", + gt_perf_status & 0xff); + drm_printf(p, "Render p-state limit: %d\n", + rp_state_limits & 0xff); + drm_printf(p, "RPSTAT1: 0x%08x\n", rpstat); + drm_printf(p, "RPMODECTL: 0x%08x\n", rpmodectl); + drm_printf(p, "RPINCLIMIT: 0x%08x\n", rpinclimit); + drm_printf(p, "RPDECLIMIT: 0x%08x\n", rpdeclimit); + drm_printf(p, "RPNSWREQ: %dMHz\n", reqf); + drm_printf(p, "CAGF: %dMHz\n", cagf); + drm_printf(p, "RP CUR UP EI: %d (%lldns)\n", + rpcurupei, + intel_gt_pm_interval_to_ns(gt, rpcurupei)); + drm_printf(p, "RP CUR UP: %d (%lldns)\n", + rpcurup, intel_gt_pm_interval_to_ns(gt, rpcurup)); + drm_printf(p, "RP PREV UP: %d (%lldns)\n", + rpprevup, intel_gt_pm_interval_to_ns(gt, rpprevup)); + drm_printf(p, "Up threshold: %d%%\n", + rps->power.up_threshold); + drm_printf(p, "RP UP EI: %d (%lldns)\n", + rpupei, intel_gt_pm_interval_to_ns(gt, rpupei)); + drm_printf(p, "RP UP THRESHOLD: %d (%lldns)\n", + rpupt, intel_gt_pm_interval_to_ns(gt, rpupt)); + + drm_printf(p, "RP CUR DOWN EI: %d (%lldns)\n", + rpcurdownei, + intel_gt_pm_interval_to_ns(gt, rpcurdownei)); + drm_printf(p, "RP CUR DOWN: %d (%lldns)\n", + rpcurdown, + intel_gt_pm_interval_to_ns(gt, rpcurdown)); + drm_printf(p, "RP PREV DOWN: %d (%lldns)\n", + rpprevdown, + intel_gt_pm_interval_to_ns(gt, rpprevdown)); + drm_printf(p, "Down threshold: %d%%\n", + rps->power.down_threshold); + drm_printf(p, "RP DOWN EI: %d (%lldns)\n", + rpdownei, intel_gt_pm_interval_to_ns(gt, rpdownei)); + drm_printf(p, "RP DOWN THRESHOLD: %d (%lldns)\n", + rpdownt, intel_gt_pm_interval_to_ns(gt, rpdownt)); + + drm_printf(p, "Lowest (RPN) frequency: %dMHz\n", + intel_gpu_freq(rps, caps.min_freq)); + drm_printf(p, "Nominal (RP1) frequency: %dMHz\n", + intel_gpu_freq(rps, caps.rp1_freq)); + drm_printf(p, "Max non-overclocked (RP0) frequency: %dMHz\n", + intel_gpu_freq(rps, caps.rp0_freq)); + drm_printf(p, "Max overclocked frequency: %dMHz\n", + intel_gpu_freq(rps, rps->max_freq)); + + drm_printf(p, "Current freq: %d MHz\n", + intel_gpu_freq(rps, rps->cur_freq)); + drm_printf(p, "Actual freq: %d MHz\n", cagf); + drm_printf(p, "Idle freq: %d MHz\n", + intel_gpu_freq(rps, rps->idle_freq)); + drm_printf(p, "Min freq: %d MHz\n", + intel_gpu_freq(rps, rps->min_freq)); + drm_printf(p, "Boost freq: %d MHz\n", + intel_gpu_freq(rps, rps->boost_freq)); + drm_printf(p, "Max freq: %d MHz\n", + intel_gpu_freq(rps, rps->max_freq)); + drm_printf(p, + "efficient (RPe) frequency: %d MHz\n", + intel_gpu_freq(rps, rps->efficient_freq)); +} + +static void slpc_frequency_dump(struct intel_rps *rps, struct drm_printer *p) +{ + struct intel_gt *gt = rps_to_gt(rps); + struct intel_uncore *uncore = gt->uncore; + struct intel_rps_freq_caps caps; + u32 pm_mask; + + gen6_rps_get_freq_caps(rps, &caps); + pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK); + + drm_printf(p, "PM MASK=0x%08x\n", pm_mask); + drm_printf(p, "pm_intrmsk_mbz: 0x%08x\n", + rps->pm_intrmsk_mbz); + drm_printf(p, "RPSTAT1: 0x%08x\n", intel_uncore_read(uncore, GEN6_RPSTAT1)); + drm_printf(p, "RPNSWREQ: %dMHz\n", intel_rps_get_requested_frequency(rps)); + drm_printf(p, "Lowest (RPN) frequency: %dMHz\n", + intel_gpu_freq(rps, caps.min_freq)); + drm_printf(p, "Nominal (RP1) frequency: %dMHz\n", + intel_gpu_freq(rps, caps.rp1_freq)); + drm_printf(p, "Max non-overclocked (RP0) frequency: %dMHz\n", + intel_gpu_freq(rps, caps.rp0_freq)); + drm_printf(p, "Current freq: %d MHz\n", + intel_rps_get_requested_frequency(rps)); + drm_printf(p, "Actual freq: %d MHz\n", + intel_rps_read_actual_frequency(rps)); + drm_printf(p, "Min freq: %d MHz\n", + intel_rps_get_min_frequency(rps)); + drm_printf(p, "Boost freq: %d MHz\n", + intel_rps_get_boost_frequency(rps)); + drm_printf(p, "Max freq: %d MHz\n", + intel_rps_get_max_frequency(rps)); + drm_printf(p, + "efficient (RPe) frequency: %d MHz\n", + intel_gpu_freq(rps, caps.rp1_freq)); +} + +void gen6_rps_frequency_dump(struct intel_rps *rps, struct drm_printer *p) +{ + if (rps_uses_slpc(rps)) + return slpc_frequency_dump(rps, p); + else + return rps_frequency_dump(rps, p); +} + static int set_max_freq(struct intel_rps *rps, u32 val) { struct drm_i915_private *i915 = rps_to_i915(rps); diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h index 4509dfdc52e0..110300dfd438 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.h +++ b/drivers/gpu/drm/i915/gt/intel_rps.h @@ -10,6 +10,7 @@ #include "i915_reg_defs.h" struct i915_request; +struct drm_printer; void intel_rps_init_early(struct intel_rps *rps); void intel_rps_init(struct intel_rps *rps); @@ -54,6 +55,8 @@ void intel_rps_lower_unslice(struct intel_rps *rps); u32 intel_rps_read_throttle_reason(struct intel_rps *rps); bool rps_read_mask_mmio(struct intel_rps *rps, i915_reg_t reg32, u32 mask); +void gen6_rps_frequency_dump(struct intel_rps *rps, struct drm_printer *p); + void gen5_rps_irq_handler(struct intel_rps *rps); void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir); void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir); diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index 66f21c735d54..6c6198a257ac 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -677,8 +677,8 @@ u32 intel_sseu_make_rpcs(struct intel_gt *gt, * If i915/perf is active, we want a stable powergating configuration * on the system. Use the configuration pinned by i915/perf. */ - if (i915->perf.exclusive_stream) - req_sseu = &i915->perf.sseu; + if (gt->perf.exclusive_stream) + req_sseu = >->perf.sseu; slices = hweight8(req_sseu->slice_mask); subslices = hweight8(req_sseu->subslice_mask); diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index a821e3d405db..3cdf5c24dbc5 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -166,6 +166,21 @@ static void wa_add(struct i915_wa_list *wal, i915_reg_t reg, _wa_add(wal, &wa); } +static void wa_mcr_add(struct i915_wa_list *wal, i915_mcr_reg_t reg, + u32 clear, u32 set, u32 read_mask, bool masked_reg) +{ + struct i915_wa wa = { + .mcr_reg = reg, + .clr = clear, + .set = set, + .read = read_mask, + .masked_reg = masked_reg, + .is_mcr = 1, + }; + + _wa_add(wal, &wa); +} + static void wa_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set) { @@ -173,6 +188,12 @@ wa_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set) } static void +wa_mcr_write_clr_set(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 clear, u32 set) +{ + wa_mcr_add(wal, reg, clear, set, clear, false); +} + +static void wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 set) { wa_write_clr_set(wal, reg, ~0, set); @@ -185,11 +206,23 @@ wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set) } static void +wa_mcr_write_or(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 set) +{ + wa_mcr_write_clr_set(wal, reg, set, set); +} + +static void wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr) { wa_write_clr_set(wal, reg, clr, 0); } +static void +wa_mcr_write_clr(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 clr) +{ + wa_mcr_write_clr_set(wal, reg, clr, 0); +} + /* * WA operations on "masked register". A masked register has the upper 16 bits * documented as "masked" in b-spec. Its purpose is to allow writing to just a @@ -208,18 +241,37 @@ wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val) } static void +wa_mcr_masked_en(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 val) +{ + wa_mcr_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true); +} + +static void wa_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val) { wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true); } static void +wa_mcr_masked_dis(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 val) +{ + wa_mcr_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true); +} + +static void wa_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val) { wa_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true); } +static void +wa_mcr_masked_field_set(struct i915_wa_list *wal, i915_mcr_reg_t reg, + u32 mask, u32 val) +{ + wa_mcr_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true); +} + static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { @@ -241,8 +293,8 @@ static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine, wa_masked_en(wal, RING_MI_MODE(RENDER_RING_BASE), ASYNC_FLIP_PERF_DISABLE); /* WaDisablePartialInstShootdown:bdw,chv */ - wa_masked_en(wal, GEN8_ROW_CHICKEN, - PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); /* Use Force Non-Coherent whenever executing a 3D context. This is a * workaround for a possible hang in the unlikely event a TLB @@ -288,18 +340,18 @@ static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine, gen8_ctx_workarounds_init(engine, wal); /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ - wa_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); /* WaDisableDopClockGating:bdw * * Also see the related UCGTCL1 write in bdw_init_clock_gating() * to disable EUTC clock gating. */ - wa_masked_en(wal, GEN7_ROW_CHICKEN2, - DOP_CLOCK_GATING_DISABLE); + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, + DOP_CLOCK_GATING_DISABLE); - wa_masked_en(wal, HALF_SLICE_CHICKEN3, - GEN8_SAMPLER_POWER_BYPASS_DIS); + wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3, + GEN8_SAMPLER_POWER_BYPASS_DIS); wa_masked_en(wal, HDC_CHICKEN0, /* WaForceContextSaveRestoreNonCoherent:bdw */ @@ -314,7 +366,7 @@ static void chv_ctx_workarounds_init(struct intel_engine_cs *engine, gen8_ctx_workarounds_init(engine, wal); /* WaDisableThreadStallDopClockGating:chv */ - wa_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); /* Improve HiZ throughput on CHV. */ wa_masked_en(wal, HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); @@ -333,21 +385,21 @@ static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine, */ wa_masked_en(wal, COMMON_SLICE_CHICKEN2, GEN9_PBE_COMPRESSED_HASH_SELECTION); - wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7, - GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR); + wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7, + GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR); } /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */ /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */ - wa_masked_en(wal, GEN8_ROW_CHICKEN, - FLOW_CONTROL_ENABLE | - PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, + FLOW_CONTROL_ENABLE | + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */ /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */ - wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7, - GEN9_ENABLE_YV12_BUGFIX | - GEN9_ENABLE_GPGPU_PREEMPTION); + wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7, + GEN9_ENABLE_YV12_BUGFIX | + GEN9_ENABLE_GPGPU_PREEMPTION); /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */ /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */ @@ -356,8 +408,8 @@ static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine, GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE); /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */ - wa_masked_dis(wal, GEN9_HALF_SLICE_CHICKEN5, - GEN9_CCS_TLB_PREFETCH_ENABLE); + wa_mcr_masked_dis(wal, GEN9_HALF_SLICE_CHICKEN5, + GEN9_CCS_TLB_PREFETCH_ENABLE); /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */ wa_masked_en(wal, HDC_CHICKEN0, @@ -386,11 +438,11 @@ static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine, IS_KABYLAKE(i915) || IS_COFFEELAKE(i915) || IS_COMETLAKE(i915)) - wa_masked_en(wal, HALF_SLICE_CHICKEN3, - GEN8_SAMPLER_POWER_BYPASS_DIS); + wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3, + GEN8_SAMPLER_POWER_BYPASS_DIS); /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */ - wa_masked_en(wal, HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); + wa_mcr_masked_en(wal, HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); /* * Supporting preemption with fine-granularity requires changes in the @@ -469,8 +521,8 @@ static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine, gen9_ctx_workarounds_init(engine, wal); /* WaDisableThreadStallDopClockGating:bxt */ - wa_masked_en(wal, GEN8_ROW_CHICKEN, - STALL_DOP_GATING_DISABLE); + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, + STALL_DOP_GATING_DISABLE); /* WaToEnableHwFixForPushConstHWBug:bxt */ wa_masked_en(wal, COMMON_SLICE_CHICKEN2, @@ -490,8 +542,8 @@ static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine, GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); /* WaDisableSbeCacheDispatchPortSharing:kbl */ - wa_masked_en(wal, GEN7_HALF_SLICE_CHICKEN1, - GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); + wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1, + GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); } static void glk_ctx_workarounds_init(struct intel_engine_cs *engine, @@ -514,8 +566,8 @@ static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine, GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); /* WaDisableSbeCacheDispatchPortSharing:cfl */ - wa_masked_en(wal, GEN7_HALF_SLICE_CHICKEN1, - GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); + wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1, + GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); } static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, @@ -534,13 +586,13 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, * (the register is whitelisted in hardware now, so UMDs can opt in * for coherency if they have a good reason). */ - wa_masked_en(wal, ICL_HDC_MODE, HDC_FORCE_NON_COHERENT); + wa_mcr_masked_en(wal, ICL_HDC_MODE, HDC_FORCE_NON_COHERENT); /* WaEnableFloatBlendOptimization:icl */ - wa_add(wal, GEN10_CACHE_MODE_SS, 0, - _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE), - 0 /* write-only, so skip validation */, - true); + wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0, + _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE), + 0 /* write-only, so skip validation */, + true); /* WaDisableGPGPUMidThreadPreemption:icl */ wa_masked_field_set(wal, GEN8_CS_CHICKEN1, @@ -548,8 +600,8 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL); /* allow headerless messages for preemptible GPGPU context */ - wa_masked_en(wal, GEN10_SAMPLER_MODE, - GEN11_SAMPLER_ENABLE_HEADLESS_MSG); + wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE, + GEN11_SAMPLER_ENABLE_HEADLESS_MSG); /* Wa_1604278689:icl,ehl */ wa_write(wal, IVB_FBC_RT_BASE, 0xFFFFFFFF & ~ILK_FBC_RT_VALID); @@ -558,7 +610,7 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, 0xFFFFFFFF); /* Wa_1406306137:icl,ehl */ - wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU); + wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU); } /* @@ -569,13 +621,13 @@ static void dg2_ctx_gt_tuning_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { wa_masked_en(wal, CHICKEN_RASTER_2, TBIMR_FAST_CLIP); - wa_write_clr_set(wal, GEN11_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, - REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)); - wa_add(wal, - FF_MODE2, - FF_MODE2_TDS_TIMER_MASK, - FF_MODE2_TDS_TIMER_128, - 0, false); + wa_mcr_write_clr_set(wal, XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, + REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)); + wa_mcr_add(wal, + XEHP_FF_MODE2, + FF_MODE2_TDS_TIMER_MASK, + FF_MODE2_TDS_TIMER_128, + 0, false); } /* @@ -599,7 +651,7 @@ static void gen12_ctx_gt_tuning_init(struct intel_engine_cs *engine, * verification is ignored. */ wa_add(wal, - FF_MODE2, + GEN12_FF_MODE2, FF_MODE2_TDS_TIMER_MASK, FF_MODE2_TDS_TIMER_128, 0, false); @@ -608,6 +660,8 @@ static void gen12_ctx_gt_tuning_init(struct intel_engine_cs *engine, static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { + struct drm_i915_private *i915 = engine->i915; + gen12_ctx_gt_tuning_init(engine, wal); /* @@ -637,10 +691,14 @@ static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine, * to Wa_1608008084. */ wa_add(wal, - FF_MODE2, + GEN12_FF_MODE2, FF_MODE2_GS_TIMER_MASK, FF_MODE2_GS_TIMER_224, 0, false); + + if (!IS_DG1(i915)) + /* Wa_1806527549 */ + wa_masked_en(wal, HIZ_CHICKEN, HZ_DEPTH_TEST_LE_GE_OPT_DISABLE); } static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine, @@ -664,27 +722,27 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine, /* Wa_16011186671:dg2_g11 */ if (IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) { - wa_masked_dis(wal, VFLSKPD, DIS_MULT_MISS_RD_SQUASH); - wa_masked_en(wal, VFLSKPD, DIS_OVER_FETCH_CACHE); + wa_mcr_masked_dis(wal, VFLSKPD, DIS_MULT_MISS_RD_SQUASH); + wa_mcr_masked_en(wal, VFLSKPD, DIS_OVER_FETCH_CACHE); } if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) { /* Wa_14010469329:dg2_g10 */ - wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3, - XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE); + wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3, + XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE); /* * Wa_22010465075:dg2_g10 * Wa_22010613112:dg2_g10 * Wa_14010698770:dg2_g10 */ - wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3, - GEN12_DISABLE_CPS_AWARE_COLOR_PIPE); + wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3, + GEN12_DISABLE_CPS_AWARE_COLOR_PIPE); } /* Wa_16013271637:dg2 */ - wa_masked_en(wal, SLICE_COMMON_ECO_CHICKEN1, - MSC_MSAA_REODER_BUF_BYPASS_DISABLE); + wa_mcr_masked_en(wal, XEHP_SLICE_COMMON_ECO_CHICKEN1, + MSC_MSAA_REODER_BUF_BYPASS_DISABLE); /* Wa_14014947963:dg2 */ if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_FOREVER) || @@ -1076,18 +1134,23 @@ static void __set_mcr_steering(struct i915_wa_list *wal, wa_write_clr_set(wal, steering_reg, mcr_mask, mcr); } -static void __add_mcr_wa(struct intel_gt *gt, struct i915_wa_list *wal, - unsigned int slice, unsigned int subslice) +static void debug_dump_steering(struct intel_gt *gt) { struct drm_printer p = drm_debug_printer("MCR Steering:"); + if (drm_debug_enabled(DRM_UT_DRIVER)) + intel_gt_mcr_report_steering(&p, gt, false); +} + +static void __add_mcr_wa(struct intel_gt *gt, struct i915_wa_list *wal, + unsigned int slice, unsigned int subslice) +{ __set_mcr_steering(wal, GEN8_MCR_SELECTOR, slice, subslice); gt->default_steering.groupid = slice; gt->default_steering.instanceid = subslice; - if (drm_debug_enabled(DRM_UT_DRIVER)) - intel_gt_mcr_report_steering(&p, gt, false); + debug_dump_steering(gt); } static void @@ -1181,6 +1244,9 @@ xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal) gt->steering_table[MSLICE] = NULL; } + if (IS_XEHPSDV(gt->i915) && slice_mask & BIT(0)) + gt->steering_table[GAM] = NULL; + slice = __ffs(slice_mask); subslice = intel_sseu_find_first_xehp_dss(sseu, GEN_DSS_PER_GSLICE, slice) % GEN_DSS_PER_GSLICE; @@ -1198,6 +1264,13 @@ xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal) */ __set_mcr_steering(wal, MCFG_MCR_SELECTOR, 0, 2); __set_mcr_steering(wal, SF_MCR_SELECTOR, 0, 2); + + /* + * On DG2, GAM registers have a dedicated steering control register + * and must always be programmed to a hardcoded groupid of "1." + */ + if (IS_DG2(gt->i915)) + __set_mcr_steering(wal, GAM_MCR_SELECTOR, 1, 0); } static void @@ -1254,22 +1327,22 @@ icl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) PSDUNIT_CLKGATE_DIS); /* Wa_1406680159:icl,ehl */ - wa_write_or(wal, - SUBSLICE_UNIT_LEVEL_CLKGATE, - GWUNIT_CLKGATE_DIS); + wa_mcr_write_or(wal, + GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE, + GWUNIT_CLKGATE_DIS); /* Wa_1607087056:icl,ehl,jsl */ if (IS_ICELAKE(i915) || IS_JSL_EHL_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) wa_write_or(wal, - SLICE_UNIT_LEVEL_CLKGATE, + GEN11_SLICE_UNIT_LEVEL_CLKGATE, L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); /* * This is not a documented workaround, but rather an optimization * to reduce sampler power. */ - wa_write_clr(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE); + wa_mcr_write_clr(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE); } /* @@ -1303,7 +1376,7 @@ gen12_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) wa_14011060649(gt, wal); /* Wa_14011059788:tgl,rkl,adl-s,dg1,adl-p */ - wa_write_or(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE); + wa_mcr_write_or(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE); } static void @@ -1315,14 +1388,14 @@ tgl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) /* Wa_1409420604:tgl */ if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) - wa_write_or(wal, - SUBSLICE_UNIT_LEVEL_CLKGATE2, - CPSSUNIT_CLKGATE_DIS); + wa_mcr_write_or(wal, + SUBSLICE_UNIT_LEVEL_CLKGATE2, + CPSSUNIT_CLKGATE_DIS); /* Wa_1607087056:tgl also know as BUG:1409180338 */ if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) wa_write_or(wal, - SLICE_UNIT_LEVEL_CLKGATE, + GEN11_SLICE_UNIT_LEVEL_CLKGATE, L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); /* Wa_1408615072:tgl[a0] */ @@ -1341,14 +1414,14 @@ dg1_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) /* Wa_1607087056:dg1 */ if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) wa_write_or(wal, - SLICE_UNIT_LEVEL_CLKGATE, + GEN11_SLICE_UNIT_LEVEL_CLKGATE, L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); /* Wa_1409420604:dg1 */ if (IS_DG1(i915)) - wa_write_or(wal, - SUBSLICE_UNIT_LEVEL_CLKGATE2, - CPSSUNIT_CLKGATE_DIS); + wa_mcr_write_or(wal, + SUBSLICE_UNIT_LEVEL_CLKGATE2, + CPSSUNIT_CLKGATE_DIS); /* Wa_1408615072:dg1 */ /* Empirical testing shows this register is unaffected by engine reset. */ @@ -1365,7 +1438,7 @@ xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) xehp_init_mcr(gt, wal); /* Wa_1409757795:xehpsdv */ - wa_write_or(wal, SCCGCTL94DC, CG3DDISURB); + wa_mcr_write_or(wal, SCCGCTL94DC, CG3DDISURB); /* Wa_16011155590:xehpsdv */ if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) @@ -1445,8 +1518,8 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) CG3DDISCFEG_CLKGATE_DIS); /* Wa_14011006942:dg2 */ - wa_write_or(wal, SUBSLICE_UNIT_LEVEL_CLKGATE, - DSS_ROUTER_CLKGATE_DIS); + wa_mcr_write_or(wal, GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE, + DSS_ROUTER_CLKGATE_DIS); } if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) { @@ -1457,7 +1530,7 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) wa_write_or(wal, UNSLCGCTL9444, LTCDD_CLKGATE_DIS); /* Wa_14011371254:dg2_g10 */ - wa_write_or(wal, SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS); + wa_mcr_write_or(wal, XEHP_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS); /* Wa_14011431319:dg2_g10 */ wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS | @@ -1493,21 +1566,21 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) GAMEDIA_CLKGATE_DIS); /* Wa_14011028019:dg2_g10 */ - wa_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS); + wa_mcr_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS); } /* Wa_14014830051:dg2 */ - wa_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN); + wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN); /* * The following are not actually "workarounds" but rather * recommended tuning settings documented in the bspec's * performance guide section. */ - wa_write_or(wal, GEN12_SQCM, EN_32B_ACCESS); + wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS); /* Wa_14015795083 */ - wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE); + wa_mcr_write_clr(wal, GEN8_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE); } static void @@ -1516,7 +1589,27 @@ pvc_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) pvc_init_mcr(gt, wal); /* Wa_14015795083 */ - wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE); + wa_mcr_write_clr(wal, GEN8_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE); +} + +static void +xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) +{ + /* FIXME: Actual workarounds will be added in future patch(es) */ + + /* + * Unlike older platforms, we no longer setup implicit steering here; + * all MCR accesses are explicitly steered. + */ + debug_dump_steering(gt); +} + +static void +xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) +{ + /* FIXME: Actual workarounds will be added in future patch(es) */ + + debug_dump_steering(gt); } static void @@ -1524,7 +1617,18 @@ gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal) { struct drm_i915_private *i915 = gt->i915; - if (IS_PONTEVECCHIO(i915)) + if (gt->type == GT_MEDIA) { + if (MEDIA_VER(i915) >= 13) + xelpmp_gt_workarounds_init(gt, wal); + else + MISSING_CASE(MEDIA_VER(i915)); + + return; + } + + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) + xelpg_gt_workarounds_init(gt, wal); + else if (IS_PONTEVECCHIO(i915)) pvc_gt_workarounds_init(gt, wal); else if (IS_DG2(i915)) dg2_gt_workarounds_init(gt, wal); @@ -1628,14 +1732,25 @@ wa_list_apply(struct intel_gt *gt, const struct i915_wa_list *wal) u32 val, old = 0; /* open-coded rmw due to steering */ - old = wa->clr ? intel_gt_mcr_read_any_fw(gt, wa->reg) : 0; + if (wa->clr) + old = wa->is_mcr ? + intel_gt_mcr_read_any_fw(gt, wa->mcr_reg) : + intel_uncore_read_fw(uncore, wa->reg); val = (old & ~wa->clr) | wa->set; - if (val != old || !wa->clr) - intel_uncore_write_fw(uncore, wa->reg, val); + if (val != old || !wa->clr) { + if (wa->is_mcr) + intel_gt_mcr_multicast_write_fw(gt, wa->mcr_reg, val); + else + intel_uncore_write_fw(uncore, wa->reg, val); + } + + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) { + u32 val = wa->is_mcr ? + intel_gt_mcr_read_any_fw(gt, wa->mcr_reg) : + intel_uncore_read_fw(uncore, wa->reg); - if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) - wa_verify(wa, intel_gt_mcr_read_any_fw(gt, wa->reg), - wal->name, "application"); + wa_verify(wa, val, wal->name, "application"); + } } intel_uncore_forcewake_put__locked(uncore, fw); @@ -1664,8 +1779,9 @@ static bool wa_list_verify(struct intel_gt *gt, intel_uncore_forcewake_get__locked(uncore, fw); for (i = 0, wa = wal->list; i < wal->count; i++, wa++) - ok &= wa_verify(wa, - intel_gt_mcr_read_any_fw(gt, wa->reg), + ok &= wa_verify(wa, wa->is_mcr ? + intel_gt_mcr_read_any_fw(gt, wa->mcr_reg) : + intel_uncore_read_fw(uncore, wa->reg), wal->name, from); intel_uncore_forcewake_put__locked(uncore, fw); @@ -1712,11 +1828,35 @@ whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags) } static void +whitelist_mcr_reg_ext(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 flags) +{ + struct i915_wa wa = { + .mcr_reg = reg, + .is_mcr = 1, + }; + + if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS)) + return; + + if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags))) + return; + + wa.mcr_reg.reg |= flags; + _wa_add(wal, &wa); +} + +static void whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg) { whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW); } +static void +whitelist_mcr_reg(struct i915_wa_list *wal, i915_mcr_reg_t reg) +{ + whitelist_mcr_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW); +} + static void gen9_whitelist_build(struct i915_wa_list *w) { /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */ @@ -1742,7 +1882,7 @@ static void skl_whitelist_build(struct intel_engine_cs *engine) gen9_whitelist_build(w); /* WaDisableLSQCROPERFforOCL:skl */ - whitelist_reg(w, GEN8_L3SQCREG4); + whitelist_mcr_reg(w, GEN8_L3SQCREG4); } static void bxt_whitelist_build(struct intel_engine_cs *engine) @@ -1763,7 +1903,7 @@ static void kbl_whitelist_build(struct intel_engine_cs *engine) gen9_whitelist_build(w); /* WaDisableLSQCROPERFforOCL:kbl */ - whitelist_reg(w, GEN8_L3SQCREG4); + whitelist_mcr_reg(w, GEN8_L3SQCREG4); } static void glk_whitelist_build(struct intel_engine_cs *engine) @@ -1828,10 +1968,10 @@ static void icl_whitelist_build(struct intel_engine_cs *engine) switch (engine->class) { case RENDER_CLASS: /* WaAllowUMDToModifyHalfSliceChicken7:icl */ - whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7); + whitelist_mcr_reg(w, GEN9_HALF_SLICE_CHICKEN7); /* WaAllowUMDToModifySamplerMode:icl */ - whitelist_reg(w, GEN10_SAMPLER_MODE); + whitelist_mcr_reg(w, GEN10_SAMPLER_MODE); /* WaEnableStateCacheRedirectToCS:icl */ whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1); @@ -2107,24 +2247,21 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) { /* Wa_14013392000:dg2_g11 */ - wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE); - - /* Wa_16011620976:dg2_g11 */ - wa_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8); + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE); } if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) || IS_DG2_G11(i915) || IS_DG2_G12(i915)) { /* Wa_1509727124:dg2 */ - wa_masked_en(wal, GEN10_SAMPLER_MODE, - SC_DISABLE_POWER_OPTIMIZATION_EBB); + wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE, + SC_DISABLE_POWER_OPTIMIZATION_EBB); } if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0) || IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) { /* Wa_14012419201:dg2 */ - wa_masked_en(wal, GEN9_ROW_CHICKEN4, - GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX); + wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, + GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX); } if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) || @@ -2133,13 +2270,13 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) * Wa_22012826095:dg2 * Wa_22013059131:dg2 */ - wa_write_clr_set(wal, LSC_CHICKEN_BIT_0_UDW, - MAXREQS_PER_BANK, - REG_FIELD_PREP(MAXREQS_PER_BANK, 2)); + wa_mcr_write_clr_set(wal, LSC_CHICKEN_BIT_0_UDW, + MAXREQS_PER_BANK, + REG_FIELD_PREP(MAXREQS_PER_BANK, 2)); /* Wa_22013059131:dg2 */ - wa_write_or(wal, LSC_CHICKEN_BIT_0, - FORCE_1_SUB_MESSAGE_PER_FRAGMENT); + wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0, + FORCE_1_SUB_MESSAGE_PER_FRAGMENT); } /* Wa_1308578152:dg2_g10 when first gslice is fused off */ @@ -2152,19 +2289,19 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) || IS_DG2_G11(i915) || IS_DG2_G12(i915)) { /* Wa_22013037850:dg2 */ - wa_write_or(wal, LSC_CHICKEN_BIT_0_UDW, - DISABLE_128B_EVICTION_COMMAND_UDW); + wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, + DISABLE_128B_EVICTION_COMMAND_UDW); /* Wa_22012856258:dg2 */ - wa_masked_en(wal, GEN7_ROW_CHICKEN2, - GEN12_DISABLE_READ_SUPPRESSION); + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, + GEN12_DISABLE_READ_SUPPRESSION); /* * Wa_22010960976:dg2 * Wa_14013347512:dg2 */ - wa_masked_dis(wal, GEN12_HDC_CHICKEN0, - LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK); + wa_mcr_masked_dis(wal, XEHP_HDC_CHICKEN0, + LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK); } if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) { @@ -2172,8 +2309,8 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) * Wa_1608949956:dg2_g10 * Wa_14010198302:dg2_g10 */ - wa_masked_en(wal, GEN8_ROW_CHICKEN, - MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE); + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, + MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE); /* * Wa_14010918519:dg2_g10 @@ -2181,31 +2318,31 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) * LSC_CHICKEN_BIT_0 always reads back as 0 is this stepping, * so ignoring verification. */ - wa_add(wal, LSC_CHICKEN_BIT_0_UDW, 0, - FORCE_SLM_FENCE_SCOPE_TO_TILE | FORCE_UGM_FENCE_SCOPE_TO_TILE, - 0, false); + wa_mcr_add(wal, LSC_CHICKEN_BIT_0_UDW, 0, + FORCE_SLM_FENCE_SCOPE_TO_TILE | FORCE_UGM_FENCE_SCOPE_TO_TILE, + 0, false); } if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) { /* Wa_22010430635:dg2 */ - wa_masked_en(wal, - GEN9_ROW_CHICKEN4, - GEN12_DISABLE_GRF_CLEAR); + wa_mcr_masked_en(wal, + GEN9_ROW_CHICKEN4, + GEN12_DISABLE_GRF_CLEAR); /* Wa_14010648519:dg2 */ - wa_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE); + wa_mcr_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE); } /* Wa_14013202645:dg2 */ if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) || IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) - wa_write_or(wal, RT_CTRL, DIS_NULL_QUERY); + wa_mcr_write_or(wal, RT_CTRL, DIS_NULL_QUERY); /* Wa_22012532006:dg2 */ if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) || IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) - wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7, - DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA); + wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7, + DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA); if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) { /* Wa_14010680813:dg2_g10 */ @@ -2216,17 +2353,16 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0) || IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) { /* Wa_14012362059:dg2 */ - wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB); + wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB); } if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_B0, STEP_FOREVER) || IS_DG2_G10(i915)) { /* Wa_22014600077:dg2 */ - wa_add(wal, GEN10_CACHE_MODE_SS, 0, - _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH), - 0 /* Wa_14012342262 :write-only reg, so skip - verification */, - true); + wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0, + _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH), + 0 /* Wa_14012342262 write-only reg, so skip verification */, + true); } if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) || @@ -2253,7 +2389,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { /* Wa_1606931601:tgl,rkl,dg1,adl-s,adl-p */ - wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ); + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ); /* * Wa_1407928979:tgl A* @@ -2282,14 +2418,14 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { /* Wa_1409804808:tgl,rkl,dg1[a0],adl-s,adl-p */ - wa_masked_en(wal, GEN7_ROW_CHICKEN2, - GEN12_PUSH_CONST_DEREF_HOLD_DIS); + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, + GEN12_PUSH_CONST_DEREF_HOLD_DIS); /* * Wa_1409085225:tgl * Wa_14010229206:tgl,rkl,dg1[a0],adl-s,adl-p */ - wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH); + wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH); } if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) || @@ -2313,9 +2449,9 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) if (IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915) || IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) { /* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */ - wa_masked_en(wal, - GEN10_SAMPLER_MODE, - ENABLE_SMALLPL); + wa_mcr_masked_en(wal, + GEN10_SAMPLER_MODE, + ENABLE_SMALLPL); } if (GRAPHICS_VER(i915) == 11) { @@ -2349,9 +2485,9 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) * Wa_1405733216:icl * Formerly known as WaDisableCleanEvicts */ - wa_write_or(wal, - GEN8_L3SQCREG4, - GEN11_LQSC_CLEAN_EVICT_DISABLE); + wa_mcr_write_or(wal, + GEN8_L3SQCREG4, + GEN11_LQSC_CLEAN_EVICT_DISABLE); /* Wa_1606682166:icl */ wa_write_or(wal, @@ -2359,10 +2495,10 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN7_DISABLE_SAMPLER_PREFETCH); /* Wa_1409178092:icl */ - wa_write_clr_set(wal, - GEN11_SCRATCH2, - GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE, - 0); + wa_mcr_write_clr_set(wal, + GEN11_SCRATCH2, + GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE, + 0); /* WaEnable32PlaneMode:icl */ wa_masked_en(wal, GEN9_CSFE_CHICKEN1_RCS, @@ -2389,12 +2525,64 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) FF_DOP_CLOCK_GATE_DISABLE); } - if (IS_GRAPHICS_VER(i915, 9, 12)) { - /* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl,tgl */ + /* + * Intel platforms that support fine-grained preemption (i.e., gen9 and + * beyond) allow the kernel-mode driver to choose between two different + * options for controlling preemption granularity and behavior. + * + * Option 1 (hardware default): + * Preemption settings are controlled in a global manner via + * kernel-only register CS_DEBUG_MODE1 (0x20EC). Any granularity + * and settings chosen by the kernel-mode driver will apply to all + * userspace clients. + * + * Option 2: + * Preemption settings are controlled on a per-context basis via + * register CS_CHICKEN1 (0x2580). CS_CHICKEN1 is saved/restored on + * context switch and is writable by userspace (e.g., via + * MI_LOAD_REGISTER_IMMEDIATE instructions placed in a batch buffer) + * which allows different userspace drivers/clients to select + * different settings, or to change those settings on the fly in + * response to runtime needs. This option was known by name + * "FtrPerCtxtPreemptionGranularityControl" at one time, although + * that name is somewhat misleading as other non-granularity + * preemption settings are also impacted by this decision. + * + * On Linux, our policy has always been to let userspace drivers + * control preemption granularity/settings (Option 2). This was + * originally mandatory on gen9 to prevent ABI breakage (old gen9 + * userspace developed before object-level preemption was enabled would + * not behave well if i915 were to go with Option 1 and enable that + * preemption in a global manner). On gen9 each context would have + * object-level preemption disabled by default (see + * WaDisable3DMidCmdPreemption in gen9_ctx_workarounds_init), but + * userspace drivers could opt-in to object-level preemption as they + * saw fit. For post-gen9 platforms, we continue to utilize Option 2; + * even though it is no longer necessary for ABI compatibility when + * enabling a new platform, it does ensure that userspace will be able + * to implement any workarounds that show up requiring temporary + * adjustments to preemption behavior at runtime. + * + * Notes/Workarounds: + * - Wa_14015141709: On DG2 and early steppings of MTL, + * CS_CHICKEN1[0] does not disable object-level preemption as + * it is supposed to (nor does CS_DEBUG_MODE1[0] if we had been + * using Option 1). Effectively this means userspace is unable + * to disable object-level preemption on these platforms/steppings + * despite the setting here. + * + * - Wa_16013994831: May require that userspace program + * CS_CHICKEN1[10] when certain runtime conditions are true. + * Userspace requires Option 2 to be in effect for their update of + * CS_CHICKEN1[10] to be effective. + * + * Other workarounds may appear in the future that will also require + * Option 2 behavior to allow proper userspace implementation. + */ + if (GRAPHICS_VER(i915) >= 9) wa_masked_en(wal, GEN7_FF_SLICE_CS_CHICKEN1, GEN9_FFSC_PERCTX_PREEMPT_CTRL); - } if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || @@ -2420,36 +2608,36 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE); /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */ - wa_write_or(wal, - BDW_SCRATCH1, - GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); + wa_mcr_write_or(wal, + BDW_SCRATCH1, + GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */ if (IS_GEN9_LP(i915)) - wa_write_clr_set(wal, - GEN8_L3SQCREG1, - L3_PRIO_CREDITS_MASK, - L3_GENERAL_PRIO_CREDITS(62) | - L3_HIGH_PRIO_CREDITS(2)); + wa_mcr_write_clr_set(wal, + GEN8_L3SQCREG1, + L3_PRIO_CREDITS_MASK, + L3_GENERAL_PRIO_CREDITS(62) | + L3_HIGH_PRIO_CREDITS(2)); /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */ - wa_write_or(wal, - GEN8_L3SQCREG4, - GEN8_LQSC_FLUSH_COHERENT_LINES); + wa_mcr_write_or(wal, + GEN8_L3SQCREG4, + GEN8_LQSC_FLUSH_COHERENT_LINES); /* Disable atomics in L3 to prevent unrecoverable hangs */ wa_write_clr_set(wal, GEN9_SCRATCH_LNCF1, GEN9_LNCF_NONIA_COHERENT_ATOMICS_ENABLE, 0); - wa_write_clr_set(wal, GEN8_L3SQCREG4, - GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE, 0); - wa_write_clr_set(wal, GEN9_SCRATCH1, - EVICTION_PERF_FIX_ENABLE, 0); + wa_mcr_write_clr_set(wal, GEN8_L3SQCREG4, + GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE, 0); + wa_mcr_write_clr_set(wal, GEN9_SCRATCH1, + EVICTION_PERF_FIX_ENABLE, 0); } if (IS_HASWELL(i915)) { /* WaSampleCChickenBitEnable:hsw */ wa_masked_en(wal, - HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE); + HSW_HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE); wa_masked_dis(wal, CACHE_MODE_0_GEN7, @@ -2657,7 +2845,7 @@ ccs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { if (IS_PVC_CT_STEP(engine->i915, STEP_A0, STEP_C0)) { /* Wa_14014999345:pvc */ - wa_masked_en(wal, GEN10_CACHE_MODE_SS, DISABLE_ECC); + wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS, DISABLE_ECC); } } @@ -2683,8 +2871,8 @@ add_render_compute_tuning_settings(struct drm_i915_private *i915, } if (IS_DG2(i915)) { - wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); - wa_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512); + wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); + wa_mcr_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512); /* * This is also listed as Wa_22012654132 for certain DG2 @@ -2695,10 +2883,10 @@ add_render_compute_tuning_settings(struct drm_i915_private *i915, * back for verification on DG2 (due to Wa_14012342262), so * we need to explicitly skip the readback. */ - wa_add(wal, GEN10_CACHE_MODE_SS, 0, - _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC), - 0 /* write-only, so skip validation */, - true); + wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0, + _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC), + 0 /* write-only, so skip validation */, + true); } /* @@ -2707,8 +2895,8 @@ add_render_compute_tuning_settings(struct drm_i915_private *i915, * platforms. */ if (INTEL_INFO(i915)->tuning_thread_rr_after_dep) - wa_masked_field_set(wal, GEN9_ROW_CHICKEN4, THREAD_EX_ARB_MODE, - THREAD_EX_ARB_MODE_RR_AFTER_DEP); + wa_mcr_masked_field_set(wal, GEN9_ROW_CHICKEN4, THREAD_EX_ARB_MODE, + THREAD_EX_ARB_MODE_RR_AFTER_DEP); } /* @@ -2734,30 +2922,30 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li if (IS_XEHPSDV(i915)) { /* Wa_1409954639 */ - wa_masked_en(wal, - GEN8_ROW_CHICKEN, - SYSTOLIC_DOP_CLOCK_GATING_DIS); + wa_mcr_masked_en(wal, + GEN8_ROW_CHICKEN, + SYSTOLIC_DOP_CLOCK_GATING_DIS); /* Wa_1607196519 */ - wa_masked_en(wal, - GEN9_ROW_CHICKEN4, - GEN12_DISABLE_GRF_CLEAR); + wa_mcr_masked_en(wal, + GEN9_ROW_CHICKEN4, + GEN12_DISABLE_GRF_CLEAR); /* Wa_14010670810:xehpsdv */ - wa_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE); + wa_mcr_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE); /* Wa_14010449647:xehpsdv */ - wa_masked_en(wal, GEN7_HALF_SLICE_CHICKEN1, - GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE); + wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1, + GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE); /* Wa_18011725039:xehpsdv */ if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_B0)) { - wa_masked_dis(wal, MLTICTXCTL, TDONRENDER); - wa_write_or(wal, L3SQCREG1_CCS0, FLUSHALLNONCOH); + wa_mcr_masked_dis(wal, MLTICTXCTL, TDONRENDER); + wa_mcr_write_or(wal, L3SQCREG1_CCS0, FLUSHALLNONCOH); } /* Wa_14012362059:xehpsdv */ - wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB); + wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB); /* Wa_14014368820:xehpsdv */ wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS | @@ -2766,19 +2954,30 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li if (IS_DG2(i915) || IS_PONTEVECCHIO(i915)) { /* Wa_14015227452:dg2,pvc */ - wa_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE); + wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE); /* Wa_22014226127:dg2,pvc */ - wa_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE); + wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE); /* Wa_16015675438:dg2,pvc */ wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE); /* Wa_18018781329:dg2,pvc */ - wa_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB); - wa_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB); - wa_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB); - wa_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB); + wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB); + wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB); + wa_mcr_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB); + wa_mcr_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB); + } + + if (IS_DG2(i915)) { + /* + * Wa_16011620976:dg2_g11 + * Wa_22015475538:dg2 + */ + wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8); + + /* Wa_18017747507:dg2 */ + wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, POLYGON_TRIFAN_LINELOOP_DISABLE); } } diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds_types.h b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h index 8a4b6de4e754..7c8b01d00043 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds_types.h +++ b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h @@ -11,11 +11,16 @@ #include "i915_reg_defs.h" struct i915_wa { - i915_reg_t reg; + union { + i915_reg_t reg; + i915_mcr_reg_t mcr_reg; + }; u32 clr; u32 set; u32 read; - bool masked_reg; + + u32 masked_reg:1; + u32 is_mcr:1; }; struct i915_wa_list { diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c index 1b75f478d1b8..881b64f3e7b9 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c @@ -39,6 +39,16 @@ static int perf_end(struct intel_gt *gt) return igt_flush_test(gt->i915); } +static i915_reg_t timestamp_reg(struct intel_engine_cs *engine) +{ + struct drm_i915_private *i915 = engine->i915; + + if (GRAPHICS_VER(i915) == 5 || IS_G4X(i915)) + return RING_TIMESTAMP_UDW(engine->mmio_base); + else + return RING_TIMESTAMP(engine->mmio_base); +} + static int write_timestamp(struct i915_request *rq, int slot) { struct intel_timeline *tl = @@ -55,7 +65,7 @@ static int write_timestamp(struct i915_request *rq, int slot) if (GRAPHICS_VER(rq->engine->i915) >= 8) cmd++; *cs++ = cmd; - *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base)); + *cs++ = i915_mmio_reg_offset(timestamp_reg(rq->engine)); *cs++ = tl->hwsp_offset + slot * sizeof(u32); *cs++ = 0; @@ -125,7 +135,7 @@ static int perf_mi_bb_start(void *arg) enum intel_engine_id id; int err = 0; - if (GRAPHICS_VER(gt->i915) < 7) /* for per-engine CS_TIMESTAMP */ + if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */ return 0; perf_begin(gt); @@ -135,6 +145,9 @@ static int perf_mi_bb_start(void *arg) u32 cycles[COUNT]; int i; + if (GRAPHICS_VER(engine->i915) < 7 && engine->id != RCS0) + continue; + intel_engine_pm_get(engine); batch = create_empty_batch(ce); @@ -249,7 +262,7 @@ static int perf_mi_noop(void *arg) enum intel_engine_id id; int err = 0; - if (GRAPHICS_VER(gt->i915) < 7) /* for per-engine CS_TIMESTAMP */ + if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */ return 0; perf_begin(gt); @@ -259,6 +272,9 @@ static int perf_mi_noop(void *arg) u32 cycles[COUNT]; int i; + if (GRAPHICS_VER(engine->i915) < 7 && engine->id != RCS0) + continue; + intel_engine_pm_get(engine); base = create_empty_batch(ce); diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index 1e08b2473b99..2c7c053a8808 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -85,8 +85,6 @@ static int wait_for_reset(struct intel_engine_cs *engine, break; } while (time_before(jiffies, timeout)); - flush_scheduled_work(); - if (rq->fence.error != -EIO) { pr_err("%s: hanging request %llx:%lld not reset\n", engine->name, @@ -3475,12 +3473,14 @@ static int random_priority(struct rnd_state *rnd) struct preempt_smoke { struct intel_gt *gt; + struct kthread_work work; struct i915_gem_context **contexts; struct intel_engine_cs *engine; struct drm_i915_gem_object *batch; unsigned int ncontext; struct rnd_state prng; unsigned long count; + int result; }; static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke) @@ -3540,34 +3540,31 @@ unpin: return err; } -static int smoke_crescendo_thread(void *arg) +static void smoke_crescendo_work(struct kthread_work *work) { - struct preempt_smoke *smoke = arg; + struct preempt_smoke *smoke = container_of(work, typeof(*smoke), work); IGT_TIMEOUT(end_time); unsigned long count; count = 0; do { struct i915_gem_context *ctx = smoke_context(smoke); - int err; - err = smoke_submit(smoke, - ctx, count % I915_PRIORITY_MAX, - smoke->batch); - if (err) - return err; + smoke->result = smoke_submit(smoke, ctx, + count % I915_PRIORITY_MAX, + smoke->batch); count++; - } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL)); + } while (!smoke->result && count < smoke->ncontext && + !__igt_timeout(end_time, NULL)); smoke->count = count; - return 0; } static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) #define BATCH BIT(0) { - struct task_struct *tsk[I915_NUM_ENGINES] = {}; + struct kthread_worker *worker[I915_NUM_ENGINES] = {}; struct preempt_smoke *arg; struct intel_engine_cs *engine; enum intel_engine_id id; @@ -3578,6 +3575,8 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) if (!arg) return -ENOMEM; + memset(arg, 0, I915_NUM_ENGINES * sizeof(*arg)); + for_each_engine(engine, smoke->gt, id) { arg[id] = *smoke; arg[id].engine = engine; @@ -3585,31 +3584,28 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) arg[id].batch = NULL; arg[id].count = 0; - tsk[id] = kthread_run(smoke_crescendo_thread, arg, - "igt/smoke:%d", id); - if (IS_ERR(tsk[id])) { - err = PTR_ERR(tsk[id]); + worker[id] = kthread_create_worker(0, "igt/smoke:%d", id); + if (IS_ERR(worker[id])) { + err = PTR_ERR(worker[id]); break; } - get_task_struct(tsk[id]); - } - yield(); /* start all threads before we kthread_stop() */ + kthread_init_work(&arg[id].work, smoke_crescendo_work); + kthread_queue_work(worker[id], &arg[id].work); + } count = 0; for_each_engine(engine, smoke->gt, id) { - int status; - - if (IS_ERR_OR_NULL(tsk[id])) + if (IS_ERR_OR_NULL(worker[id])) continue; - status = kthread_stop(tsk[id]); - if (status && !err) - err = status; + kthread_flush_work(&arg[id].work); + if (arg[id].result && !err) + err = arg[id].result; count += arg[id].count; - put_task_struct(tsk[id]); + kthread_destroy_worker(worker[id]); } pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n", diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c index be94f863bdef..b46425aeb2f0 100644 --- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c @@ -36,6 +36,19 @@ static int cmp_u32(const void *A, const void *B) return 0; } +static u32 read_timestamp(struct intel_engine_cs *engine) +{ + struct drm_i915_private *i915 = engine->i915; + + /* On i965 the first read tends to give a stale value */ + ENGINE_READ_FW(engine, RING_TIMESTAMP); + + if (GRAPHICS_VER(i915) == 5 || IS_G4X(i915)) + return ENGINE_READ_FW(engine, RING_TIMESTAMP_UDW); + else + return ENGINE_READ_FW(engine, RING_TIMESTAMP); +} + static void measure_clocks(struct intel_engine_cs *engine, u32 *out_cycles, ktime_t *out_dt) { @@ -45,13 +58,13 @@ static void measure_clocks(struct intel_engine_cs *engine, for (i = 0; i < 5; i++) { local_irq_disable(); - cycles[i] = -ENGINE_READ_FW(engine, RING_TIMESTAMP); + cycles[i] = -read_timestamp(engine); dt[i] = ktime_get(); udelay(1000); dt[i] = ktime_sub(ktime_get(), dt[i]); - cycles[i] += ENGINE_READ_FW(engine, RING_TIMESTAMP); + cycles[i] += read_timestamp(engine); local_irq_enable(); } @@ -78,25 +91,6 @@ static int live_gt_clocks(void *arg) if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */ return 0; - if (GRAPHICS_VER(gt->i915) == 5) - /* - * XXX CS_TIMESTAMP low dword is dysfunctional? - * - * Ville's experiments indicate the high dword still works, - * but at a correspondingly reduced frequency. - */ - return 0; - - if (GRAPHICS_VER(gt->i915) == 4) - /* - * XXX CS_TIMESTAMP appears gibberish - * - * Ville's experiments indicate that it mostly appears 'stuck' - * in that we see the register report the same cycle count - * for a couple of reads. - */ - return 0; - intel_gt_pm_get(gt); intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 7f3bb1d34dfb..71263058a7b0 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -866,10 +866,13 @@ static int igt_reset_active_engine(void *arg) } struct active_engine { - struct task_struct *task; + struct kthread_worker *worker; + struct kthread_work work; struct intel_engine_cs *engine; unsigned long resets; unsigned int flags; + bool stop; + int result; }; #define TEST_ACTIVE BIT(0) @@ -900,10 +903,10 @@ static int active_request_put(struct i915_request *rq) return err; } -static int active_engine(void *data) +static void active_engine(struct kthread_work *work) { I915_RND_STATE(prng); - struct active_engine *arg = data; + struct active_engine *arg = container_of(work, typeof(*arg), work); struct intel_engine_cs *engine = arg->engine; struct i915_request *rq[8] = {}; struct intel_context *ce[ARRAY_SIZE(rq)]; @@ -913,16 +916,17 @@ static int active_engine(void *data) for (count = 0; count < ARRAY_SIZE(ce); count++) { ce[count] = intel_context_create(engine); if (IS_ERR(ce[count])) { - err = PTR_ERR(ce[count]); - pr_err("[%s] Create context #%ld failed: %d!\n", engine->name, count, err); + arg->result = PTR_ERR(ce[count]); + pr_err("[%s] Create context #%ld failed: %d!\n", + engine->name, count, arg->result); while (--count) intel_context_put(ce[count]); - return err; + return; } } count = 0; - while (!kthread_should_stop()) { + while (!READ_ONCE(arg->stop)) { unsigned int idx = count++ & (ARRAY_SIZE(rq) - 1); struct i915_request *old = rq[idx]; struct i915_request *new; @@ -967,7 +971,7 @@ static int active_engine(void *data) intel_context_put(ce[count]); } - return err; + arg->result = err; } static int __igt_reset_engines(struct intel_gt *gt, @@ -1022,7 +1026,7 @@ static int __igt_reset_engines(struct intel_gt *gt, memset(threads, 0, sizeof(*threads) * I915_NUM_ENGINES); for_each_engine(other, gt, tmp) { - struct task_struct *tsk; + struct kthread_worker *worker; threads[tmp].resets = i915_reset_engine_count(global, other); @@ -1036,19 +1040,21 @@ static int __igt_reset_engines(struct intel_gt *gt, threads[tmp].engine = other; threads[tmp].flags = flags; - tsk = kthread_run(active_engine, &threads[tmp], - "igt/%s", other->name); - if (IS_ERR(tsk)) { - err = PTR_ERR(tsk); - pr_err("[%s] Thread spawn failed: %d!\n", engine->name, err); + worker = kthread_create_worker(0, "igt/%s", + other->name); + if (IS_ERR(worker)) { + err = PTR_ERR(worker); + pr_err("[%s] Worker create failed: %d!\n", + engine->name, err); goto unwind; } - threads[tmp].task = tsk; - get_task_struct(tsk); - } + threads[tmp].worker = worker; - yield(); /* start all threads before we begin */ + kthread_init_work(&threads[tmp].work, active_engine); + kthread_queue_work(threads[tmp].worker, + &threads[tmp].work); + } st_engine_heartbeat_disable_no_pm(engine); GEM_BUG_ON(test_and_set_bit(I915_RESET_ENGINE + id, @@ -1197,17 +1203,20 @@ unwind: for_each_engine(other, gt, tmp) { int ret; - if (!threads[tmp].task) + if (!threads[tmp].worker) continue; - ret = kthread_stop(threads[tmp].task); + WRITE_ONCE(threads[tmp].stop, true); + kthread_flush_work(&threads[tmp].work); + ret = READ_ONCE(threads[tmp].result); if (ret) { pr_err("kthread for other engine %s failed, err=%d\n", other->name, ret); if (!err) err = ret; } - put_task_struct(threads[tmp].task); + + kthread_destroy_worker(threads[tmp].worker); /* GuC based resets are not logged per engine */ if (!using_guc) { diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c b/drivers/gpu/drm/i915/gt/selftest_migrate.c index 2b0c87999949..0dc5309c90a4 100644 --- a/drivers/gpu/drm/i915/gt/selftest_migrate.c +++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c @@ -6,6 +6,7 @@ #include <linux/sort.h> #include "gem/i915_gem_internal.h" +#include "gem/i915_gem_lmem.h" #include "selftests/i915_random.h" diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c index cfb4708dd62e..99a372486fb7 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rps.c +++ b/drivers/gpu/drm/i915/gt/selftest_rps.c @@ -1107,21 +1107,27 @@ static u64 __measure_power(int duration_ms) return div64_u64(1000 * 1000 * dE, dt); } -static u64 measure_power_at(struct intel_rps *rps, int *freq) +static u64 measure_power(struct intel_rps *rps, int *freq) { u64 x[5]; int i; - *freq = rps_set_check(rps, *freq); for (i = 0; i < 5; i++) x[i] = __measure_power(5); - *freq = (*freq + read_cagf(rps)) / 2; + + *freq = (*freq + intel_rps_read_actual_frequency(rps)) / 2; /* A simple triangle filter for better result stability */ sort(x, 5, sizeof(*x), cmp_u64, NULL); return div_u64(x[1] + 2 * x[2] + x[3], 4); } +static u64 measure_power_at(struct intel_rps *rps, int *freq) +{ + *freq = rps_set_check(rps, *freq); + return measure_power(rps, freq); +} + int live_rps_power(void *arg) { struct intel_gt *gt = arg; diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c b/drivers/gpu/drm/i915/gt/selftest_slpc.c index f8a1d27df272..82ec95a299f6 100644 --- a/drivers/gpu/drm/i915/gt/selftest_slpc.c +++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c @@ -11,7 +11,8 @@ enum test_type { VARY_MIN, VARY_MAX, - MAX_GRANTED + MAX_GRANTED, + SLPC_POWER, }; static int slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 freq) @@ -41,6 +42,39 @@ static int slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 freq) return ret; } +static int slpc_set_freq(struct intel_gt *gt, u32 freq) +{ + int err; + struct intel_guc_slpc *slpc = >->uc.guc.slpc; + + err = slpc_set_max_freq(slpc, freq); + if (err) { + pr_err("Unable to update max freq"); + return err; + } + + err = slpc_set_min_freq(slpc, freq); + if (err) { + pr_err("Unable to update min freq"); + return err; + } + + return err; +} + +static u64 measure_power_at_freq(struct intel_gt *gt, int *freq, u64 *power) +{ + int err = 0; + + err = slpc_set_freq(gt, *freq); + if (err) + return err; + *freq = intel_rps_read_actual_frequency(>->rps); + *power = measure_power(>->rps, freq); + + return err; +} + static int vary_max_freq(struct intel_guc_slpc *slpc, struct intel_rps *rps, u32 *max_act_freq) { @@ -113,6 +147,58 @@ static int vary_min_freq(struct intel_guc_slpc *slpc, struct intel_rps *rps, return err; } +static int slpc_power(struct intel_gt *gt, struct intel_engine_cs *engine) +{ + struct intel_guc_slpc *slpc = >->uc.guc.slpc; + struct { + u64 power; + int freq; + } min, max; + int err = 0; + + /* + * Our fundamental assumption is that running at lower frequency + * actually saves power. Let's see if our RAPL measurement supports + * that theory. + */ + if (!librapl_supported(gt->i915)) + return 0; + + min.freq = slpc->min_freq; + err = measure_power_at_freq(gt, &min.freq, &min.power); + + if (err) + return err; + + max.freq = slpc->rp0_freq; + err = measure_power_at_freq(gt, &max.freq, &max.power); + + if (err) + return err; + + pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n", + engine->name, + min.power, min.freq, + max.power, max.freq); + + if (10 * min.freq >= 9 * max.freq) { + pr_notice("Could not control frequency, ran at [%uMHz, %uMhz]\n", + min.freq, max.freq); + } + + if (11 * min.power > 10 * max.power) { + pr_err("%s: did not conserve power when setting lower frequency!\n", + engine->name); + err = -EINVAL; + } + + /* Restore min/max frequencies */ + slpc_set_max_freq(slpc, slpc->rp0_freq); + slpc_set_min_freq(slpc, slpc->min_freq); + + return err; +} + static int max_granted_freq(struct intel_guc_slpc *slpc, struct intel_rps *rps, u32 *max_act_freq) { struct intel_gt *gt = rps_to_gt(rps); @@ -153,6 +239,11 @@ static int run_test(struct intel_gt *gt, int test_type) if (!intel_uc_uses_guc_slpc(>->uc)) return 0; + if (slpc->min_freq == slpc->rp0_freq) { + pr_err("Min/Max are fused to the same value\n"); + return -EINVAL; + } + if (igt_spinner_init(&spin, gt)) return -ENOMEM; @@ -167,17 +258,14 @@ static int run_test(struct intel_gt *gt, int test_type) } /* - * FIXME: With efficient frequency enabled, GuC can request - * frequencies higher than the SLPC max. While this is fixed - * in GuC, we level set these tests with RPn as min. + * Set min frequency to RPn so that we can test the whole + * range of RPn-RP0. This also turns off efficient freq + * usage and makes results more predictable. */ err = slpc_set_min_freq(slpc, slpc->min_freq); - if (err) + if (err) { + pr_err("Unable to update min freq!"); return err; - - if (slpc->min_freq == slpc->rp0_freq) { - pr_err("Min/Max are fused to the same value\n"); - return -EINVAL; } intel_gt_pm_wait_for_idle(gt); @@ -233,17 +321,23 @@ static int run_test(struct intel_gt *gt, int test_type) err = max_granted_freq(slpc, rps, &max_act_freq); break; + + case SLPC_POWER: + err = slpc_power(gt, engine); + break; } - pr_info("Max actual frequency for %s was %d\n", - engine->name, max_act_freq); + if (test_type != SLPC_POWER) { + pr_info("Max actual frequency for %s was %d\n", + engine->name, max_act_freq); - /* Actual frequency should rise above min */ - if (max_act_freq <= slpc_min_freq) { - pr_err("Actual freq did not rise above min\n"); - pr_err("Perf Limit Reasons: 0x%x\n", - intel_uncore_read(gt->uncore, GT0_PERF_LIMIT_REASONS)); - err = -EINVAL; + /* Actual frequency should rise above min */ + if (max_act_freq <= slpc->min_freq) { + pr_err("Actual freq did not rise above min\n"); + pr_err("Perf Limit Reasons: 0x%x\n", + intel_uncore_read(gt->uncore, GT0_PERF_LIMIT_REASONS)); + err = -EINVAL; + } } igt_spinner_end(&spin); @@ -270,26 +364,66 @@ static int run_test(struct intel_gt *gt, int test_type) static int live_slpc_vary_min(void *arg) { struct drm_i915_private *i915 = arg; - struct intel_gt *gt = to_gt(i915); + struct intel_gt *gt; + unsigned int i; + int ret; + + for_each_gt(gt, i915, i) { + ret = run_test(gt, VARY_MIN); + if (ret) + return ret; + } - return run_test(gt, VARY_MIN); + return ret; } static int live_slpc_vary_max(void *arg) { struct drm_i915_private *i915 = arg; - struct intel_gt *gt = to_gt(i915); + struct intel_gt *gt; + unsigned int i; + int ret; + + for_each_gt(gt, i915, i) { + ret = run_test(gt, VARY_MAX); + if (ret) + return ret; + } - return run_test(gt, VARY_MAX); + return ret; } /* check if pcode can grant RP0 */ static int live_slpc_max_granted(void *arg) { struct drm_i915_private *i915 = arg; - struct intel_gt *gt = to_gt(i915); + struct intel_gt *gt; + unsigned int i; + int ret; - return run_test(gt, MAX_GRANTED); + for_each_gt(gt, i915, i) { + ret = run_test(gt, MAX_GRANTED); + if (ret) + return ret; + } + + return ret; +} + +static int live_slpc_power(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_gt *gt; + unsigned int i; + int ret; + + for_each_gt(gt, i915, i) { + ret = run_test(gt, SLPC_POWER); + if (ret) + return ret; + } + + return ret; } int intel_slpc_live_selftests(struct drm_i915_private *i915) @@ -298,10 +432,16 @@ int intel_slpc_live_selftests(struct drm_i915_private *i915) SUBTEST(live_slpc_vary_max), SUBTEST(live_slpc_vary_min), SUBTEST(live_slpc_max_granted), + SUBTEST(live_slpc_power), }; - if (intel_gt_is_wedged(to_gt(i915))) - return 0; + struct intel_gt *gt; + unsigned int i; + + for_each_gt(gt, i915, i) { + if (intel_gt_is_wedged(gt)) + return 0; + } return i915_live_subtests(tests, i915); } diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index 67a9aab801dd..21b1edc052f8 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -991,7 +991,7 @@ static bool pardon_reg(struct drm_i915_private *i915, i915_reg_t reg) /* Alas, we must pardon some whitelists. Mistakes already made */ static const struct regmask pardon[] = { { GEN9_CTX_PREEMPT_REG, 9 }, - { GEN8_L3SQCREG4, 9 }, + { _MMIO(0xb118), 9 }, /* GEN8_L3SQCREG4 */ }; return find_reg(i915, reg, pardon, ARRAY_SIZE(pardon)); diff --git a/drivers/gpu/drm/i915/gt/sysfs_engines.c b/drivers/gpu/drm/i915/gt/sysfs_engines.c index 967031056202..f2d9858d827c 100644 --- a/drivers/gpu/drm/i915/gt/sysfs_engines.c +++ b/drivers/gpu/drm/i915/gt/sysfs_engines.c @@ -144,7 +144,7 @@ max_spin_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { struct intel_engine_cs *engine = kobj_to_engine(kobj); - unsigned long long duration; + unsigned long long duration, clamped; int err; /* @@ -168,7 +168,8 @@ max_spin_store(struct kobject *kobj, struct kobj_attribute *attr, if (err) return err; - if (duration > jiffies_to_nsecs(2)) + clamped = intel_clamp_max_busywait_duration_ns(engine, duration); + if (duration != clamped) return -EINVAL; WRITE_ONCE(engine->props.max_busywait_duration_ns, duration); @@ -203,7 +204,7 @@ timeslice_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { struct intel_engine_cs *engine = kobj_to_engine(kobj); - unsigned long long duration; + unsigned long long duration, clamped; int err; /* @@ -218,7 +219,8 @@ timeslice_store(struct kobject *kobj, struct kobj_attribute *attr, if (err) return err; - if (duration > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT)) + clamped = intel_clamp_timeslice_duration_ms(engine, duration); + if (duration != clamped) return -EINVAL; WRITE_ONCE(engine->props.timeslice_duration_ms, duration); @@ -256,7 +258,7 @@ stop_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { struct intel_engine_cs *engine = kobj_to_engine(kobj); - unsigned long long duration; + unsigned long long duration, clamped; int err; /* @@ -272,7 +274,8 @@ stop_store(struct kobject *kobj, struct kobj_attribute *attr, if (err) return err; - if (duration > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT)) + clamped = intel_clamp_stop_timeout_ms(engine, duration); + if (duration != clamped) return -EINVAL; WRITE_ONCE(engine->props.stop_timeout_ms, duration); @@ -306,7 +309,7 @@ preempt_timeout_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { struct intel_engine_cs *engine = kobj_to_engine(kobj); - unsigned long long timeout; + unsigned long long timeout, clamped; int err; /* @@ -322,7 +325,8 @@ preempt_timeout_store(struct kobject *kobj, struct kobj_attribute *attr, if (err) return err; - if (timeout > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT)) + clamped = intel_clamp_preempt_timeout_ms(engine, timeout); + if (timeout != clamped) return -EINVAL; WRITE_ONCE(engine->props.preempt_timeout_ms, timeout); @@ -362,7 +366,7 @@ heartbeat_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { struct intel_engine_cs *engine = kobj_to_engine(kobj); - unsigned long long delay; + unsigned long long delay, clamped; int err; /* @@ -379,7 +383,8 @@ heartbeat_store(struct kobject *kobj, struct kobj_attribute *attr, if (err) return err; - if (delay >= jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT)) + clamped = intel_clamp_heartbeat_interval_ms(engine, delay); + if (delay != clamped) return -EINVAL; err = intel_engine_set_heartbeat(engine, delay); diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h index 29ef8afc8c2e..f359bef046e0 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h @@ -117,6 +117,7 @@ enum intel_guc_action { INTEL_GUC_ACTION_ENTER_S_STATE = 0x501, INTEL_GUC_ACTION_EXIT_S_STATE = 0x502, INTEL_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE = 0x506, + INTEL_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV = 0x509, INTEL_GUC_ACTION_SCHED_CONTEXT = 0x1000, INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET = 0x1001, INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_DONE = 0x1002, diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h index 4c840a2639dc..811add10c30d 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h @@ -128,6 +128,15 @@ enum slpc_media_ratio_mode { SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_TWO = 2, }; +enum slpc_gucrc_mode { + SLPC_GUCRC_MODE_HW = 0, + SLPC_GUCRC_MODE_GUCRC_NO_RC6 = 1, + SLPC_GUCRC_MODE_GUCRC_STATIC_TIMEOUT = 2, + SLPC_GUCRC_MODE_GUCRC_DYNAMIC_HYSTERESIS = 3, + + SLPC_GUCRC_MODE_MAX, +}; + enum slpc_event_id { SLPC_EVENT_RESET = 0, SLPC_EVENT_SHUTDOWN = 1, diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h index 4a59478c3b5c..58012edd4eb0 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h @@ -82,9 +82,16 @@ #define GUC_KLV_SELF_CFG_G2H_CTB_SIZE_LEN 1u /* + * Global scheduling policy update keys. + */ +enum { + GUC_SCHEDULING_POLICIES_KLV_ID_RENDER_COMPUTE_YIELD = 0x1001, +}; + +/* * Per context scheduling policy update keys. */ -enum { +enum { GUC_CONTEXT_POLICIES_KLV_ID_EXECUTION_QUANTUM = 0x2001, GUC_CONTEXT_POLICIES_KLV_ID_PREEMPTION_TIMEOUT = 0x2002, GUC_CONTEXT_POLICIES_KLV_ID_SCHEDULING_PRIORITY = 0x2003, diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index bac06e3d6f2c..27b09ba1d295 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -441,6 +441,7 @@ err_log: err_fw: intel_uc_fw_fini(&guc->fw); out: + intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_INIT_FAIL); i915_probe_error(gt->i915, "failed with %d\n", ret); return ret; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 804133df1ac9..357873ef692b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -113,6 +113,10 @@ struct intel_guc { */ struct list_head guc_id_list; /** + * @guc_ids_in_use: Number single-lrc guc_ids in use + */ + unsigned int guc_ids_in_use; + /** * @destroyed_contexts: list of contexts waiting to be destroyed * (deregistered with the GuC) */ @@ -132,6 +136,16 @@ struct intel_guc { * @reset_fail_mask: mask of engines that failed to reset */ intel_engine_mask_t reset_fail_mask; + /** + * @sched_disable_delay_ms: schedule disable delay, in ms, for + * contexts + */ + unsigned int sched_disable_delay_ms; + /** + * @sched_disable_gucid_threshold: threshold of min remaining available + * guc_ids before we start bypassing the schedule disable delay + */ + unsigned int sched_disable_gucid_threshold; } submission_state; /** @@ -466,4 +480,6 @@ void intel_guc_write_barrier(struct intel_guc *guc); void intel_guc_dump_time_info(struct intel_guc *guc, struct drm_printer *p); +int intel_guc_sched_disable_gucid_threshold_max(struct intel_guc *guc); + #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 74cbe8eaf531..a419d60166c8 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -5,6 +5,7 @@ #include <linux/bsearch.h> +#include "gem/i915_gem_lmem.h" #include "gt/intel_engine_regs.h" #include "gt/intel_gt.h" #include "gt/intel_gt_mcr.h" @@ -277,24 +278,16 @@ __mmio_reg_add(struct temp_regset *regset, struct guc_mmio_reg *reg) return slot; } -#define GUC_REGSET_STEERING(group, instance) ( \ - FIELD_PREP(GUC_REGSET_STEERING_GROUP, (group)) | \ - FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, (instance)) | \ - GUC_REGSET_NEEDS_STEERING \ -) - static long __must_check guc_mmio_reg_add(struct intel_gt *gt, struct temp_regset *regset, - i915_reg_t reg, u32 flags) + u32 offset, u32 flags) { u32 count = regset->storage_used - (regset->registers - regset->storage); - u32 offset = i915_mmio_reg_offset(reg); struct guc_mmio_reg entry = { .offset = offset, .flags = flags, }; struct guc_mmio_reg *slot; - u8 group, inst; /* * The mmio list is built using separate lists within the driver. @@ -306,17 +299,6 @@ static long __must_check guc_mmio_reg_add(struct intel_gt *gt, sizeof(entry), guc_mmio_reg_cmp)) return 0; - /* - * The GuC doesn't have a default steering, so we need to explicitly - * steer all registers that need steering. However, we do not keep track - * of all the steering ranges, only of those that have a chance of using - * a non-default steering from the i915 pov. Instead of adding such - * tracking, it is easier to just program the default steering for all - * regs that don't need a non-default one. - */ - intel_gt_mcr_get_nonterminated_steering(gt, reg, &group, &inst); - entry.flags |= GUC_REGSET_STEERING(group, inst); - slot = __mmio_reg_add(regset, &entry); if (IS_ERR(slot)) return PTR_ERR(slot); @@ -335,6 +317,38 @@ static long __must_check guc_mmio_reg_add(struct intel_gt *gt, #define GUC_MMIO_REG_ADD(gt, regset, reg, masked) \ guc_mmio_reg_add(gt, \ regset, \ + i915_mmio_reg_offset(reg), \ + (masked) ? GUC_REGSET_MASKED : 0) + +#define GUC_REGSET_STEERING(group, instance) ( \ + FIELD_PREP(GUC_REGSET_STEERING_GROUP, (group)) | \ + FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, (instance)) | \ + GUC_REGSET_NEEDS_STEERING \ +) + +static long __must_check guc_mcr_reg_add(struct intel_gt *gt, + struct temp_regset *regset, + i915_mcr_reg_t reg, u32 flags) +{ + u8 group, inst; + + /* + * The GuC doesn't have a default steering, so we need to explicitly + * steer all registers that need steering. However, we do not keep track + * of all the steering ranges, only of those that have a chance of using + * a non-default steering from the i915 pov. Instead of adding such + * tracking, it is easier to just program the default steering for all + * regs that don't need a non-default one. + */ + intel_gt_mcr_get_nonterminated_steering(gt, reg, &group, &inst); + flags |= GUC_REGSET_STEERING(group, inst); + + return guc_mmio_reg_add(gt, regset, i915_mmio_reg_offset(reg), flags); +} + +#define GUC_MCR_REG_ADD(gt, regset, reg, masked) \ + guc_mcr_reg_add(gt, \ + regset, \ (reg), \ (masked) ? GUC_REGSET_MASKED : 0) @@ -372,8 +386,21 @@ static int guc_mmio_regset_init(struct temp_regset *regset, false); /* add in local MOCS registers */ - for (i = 0; i < GEN9_LNCFCMOCS_REG_COUNT; i++) - ret |= GUC_MMIO_REG_ADD(gt, regset, GEN9_LNCFCMOCS(i), false); + for (i = 0; i < LNCFCMOCS_REG_COUNT; i++) + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) + ret |= GUC_MCR_REG_ADD(gt, regset, XEHP_LNCFCMOCS(i), false); + else + ret |= GUC_MMIO_REG_ADD(gt, regset, GEN9_LNCFCMOCS(i), false); + + if (GRAPHICS_VER(engine->i915) >= 12) { + ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL0, false); + ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL1, false); + ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL2, false); + ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL3, false); + ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL4, false); + ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL5, false); + ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL6, false); + } return ret ? -1 : 0; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c index 8f1165146013..4e6dca707d94 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c @@ -169,6 +169,8 @@ static struct __guc_mmio_reg_descr_group default_lists[] = { MAKE_REGLIST(default_global_regs, PF, GLOBAL, 0), MAKE_REGLIST(default_rc_class_regs, PF, ENGINE_CLASS, GUC_RENDER_CLASS), MAKE_REGLIST(xe_lpd_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_RENDER_CLASS), + MAKE_REGLIST(default_rc_class_regs, PF, ENGINE_CLASS, GUC_COMPUTE_CLASS), + MAKE_REGLIST(xe_lpd_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_COMPUTE_CLASS), MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_VIDEO_CLASS), MAKE_REGLIST(xe_lpd_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_VIDEO_CLASS), MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_VIDEOENHANCE_CLASS), @@ -182,6 +184,8 @@ static const struct __guc_mmio_reg_descr_group xe_lpd_lists[] = { MAKE_REGLIST(xe_lpd_global_regs, PF, GLOBAL, 0), MAKE_REGLIST(xe_lpd_rc_class_regs, PF, ENGINE_CLASS, GUC_RENDER_CLASS), MAKE_REGLIST(xe_lpd_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_RENDER_CLASS), + MAKE_REGLIST(xe_lpd_rc_class_regs, PF, ENGINE_CLASS, GUC_COMPUTE_CLASS), + MAKE_REGLIST(xe_lpd_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_COMPUTE_CLASS), MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_VIDEO_CLASS), MAKE_REGLIST(xe_lpd_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_VIDEO_CLASS), MAKE_REGLIST(xe_lpd_vec_class_regs, PF, ENGINE_CLASS, GUC_VIDEOENHANCE_CLASS), @@ -240,19 +244,19 @@ static void guc_capture_free_extlists(struct __guc_mmio_reg_descr_group *reglist struct __ext_steer_reg { const char *name; - i915_reg_t reg; + i915_mcr_reg_t reg; }; static const struct __ext_steer_reg xe_extregs[] = { - {"GEN7_SAMPLER_INSTDONE", GEN7_SAMPLER_INSTDONE}, - {"GEN7_ROW_INSTDONE", GEN7_ROW_INSTDONE} + {"GEN8_SAMPLER_INSTDONE", GEN8_SAMPLER_INSTDONE}, + {"GEN8_ROW_INSTDONE", GEN8_ROW_INSTDONE} }; static void __fill_ext_reg(struct __guc_mmio_reg_descr *ext, const struct __ext_steer_reg *extlist, int slice_id, int subslice_id) { - ext->reg = extlist->reg; + ext->reg = _MMIO(i915_mmio_reg_offset(extlist->reg)); ext->flags = FIELD_PREP(GUC_REGSET_STEERING_GROUP, slice_id); ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, subslice_id); ext->regname = extlist->name; @@ -419,6 +423,44 @@ guc_capture_get_device_reglist(struct intel_guc *guc) return default_lists; } +static const char * +__stringify_type(u32 type) +{ + switch (type) { + case GUC_CAPTURE_LIST_TYPE_GLOBAL: + return "Global"; + case GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS: + return "Class"; + case GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE: + return "Instance"; + default: + break; + } + + return "unknown"; +} + +static const char * +__stringify_engclass(u32 class) +{ + switch (class) { + case GUC_RENDER_CLASS: + return "Render"; + case GUC_VIDEO_CLASS: + return "Video"; + case GUC_VIDEOENHANCE_CLASS: + return "VideoEnhance"; + case GUC_BLITTER_CLASS: + return "Blitter"; + case GUC_COMPUTE_CLASS: + return "Compute"; + default: + break; + } + + return "unknown"; +} + static int guc_capture_list_init(struct intel_guc *guc, u32 owner, u32 type, u32 classid, struct guc_mmio_reg *ptr, u16 num_entries) @@ -482,32 +524,55 @@ guc_cap_list_num_regs(struct intel_guc_state_capture *gc, u32 owner, u32 type, u return num_regs; } -int -intel_guc_capture_getlistsize(struct intel_guc *guc, u32 owner, u32 type, u32 classid, - size_t *size) +static int +guc_capture_getlistsize(struct intel_guc *guc, u32 owner, u32 type, u32 classid, + size_t *size, bool is_purpose_est) { struct intel_guc_state_capture *gc = guc->capture; + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; struct __guc_capture_ads_cache *cache = &gc->ads_cache[owner][type][classid]; int num_regs; - if (!gc->reglists) + if (!gc->reglists) { + drm_warn(&i915->drm, "GuC-capture: No reglist on this device\n"); return -ENODEV; + } if (cache->is_valid) { *size = cache->size; return cache->status; } + if (!is_purpose_est && owner == GUC_CAPTURE_LIST_INDEX_PF && + !guc_capture_get_one_list(gc->reglists, owner, type, classid)) { + if (type == GUC_CAPTURE_LIST_TYPE_GLOBAL) + drm_warn(&i915->drm, "Missing GuC-Err-Cap reglist Global!\n"); + else + drm_warn(&i915->drm, "Missing GuC-Err-Cap reglist %s(%u):%s(%u)!\n", + __stringify_type(type), type, + __stringify_engclass(classid), classid); + return -ENODATA; + } + num_regs = guc_cap_list_num_regs(gc, owner, type, classid); + /* intentional empty lists can exist depending on hw config */ if (!num_regs) return -ENODATA; - *size = PAGE_ALIGN((sizeof(struct guc_debug_capture_list)) + - (num_regs * sizeof(struct guc_mmio_reg))); + if (size) + *size = PAGE_ALIGN((sizeof(struct guc_debug_capture_list)) + + (num_regs * sizeof(struct guc_mmio_reg))); return 0; } +int +intel_guc_capture_getlistsize(struct intel_guc *guc, u32 owner, u32 type, u32 classid, + size_t *size) +{ + return guc_capture_getlistsize(guc, owner, type, classid, size, false); +} + static void guc_capture_create_prealloc_nodes(struct intel_guc *guc); int @@ -606,7 +671,7 @@ guc_capture_output_min_size_est(struct intel_guc *guc) struct intel_gt *gt = guc_to_gt(guc); struct intel_engine_cs *engine; enum intel_engine_id id; - int worst_min_size = 0, num_regs = 0; + int worst_min_size = 0; size_t tmp = 0; if (!guc->capture) @@ -627,21 +692,19 @@ guc_capture_output_min_size_est(struct intel_guc *guc) worst_min_size += sizeof(struct guc_state_capture_group_header_t) + (3 * sizeof(struct guc_state_capture_header_t)); - if (!intel_guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_GLOBAL, 0, &tmp)) - num_regs += tmp; + if (!guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_GLOBAL, 0, &tmp, true)) + worst_min_size += tmp; - if (!intel_guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS, - engine->class, &tmp)) { - num_regs += tmp; + if (!guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS, + engine->class, &tmp, true)) { + worst_min_size += tmp; } - if (!intel_guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE, - engine->class, &tmp)) { - num_regs += tmp; + if (!guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE, + engine->class, &tmp, true)) { + worst_min_size += tmp; } } - worst_min_size += (num_regs * sizeof(struct guc_mmio_reg)); - return worst_min_size; } @@ -658,15 +721,23 @@ static void check_guc_capture_size(struct intel_guc *guc) int spare_size = min_size * GUC_CAPTURE_OVERBUFFER_MULTIPLIER; u32 buffer_size = intel_guc_log_section_size_capture(&guc->log); + /* + * NOTE: min_size is much smaller than the capture region allocation (DG2: <80K vs 1MB) + * Additionally, its based on space needed to fit all engines getting reset at once + * within the same G2H handler task slot. This is very unlikely. However, if GuC really + * does run out of space for whatever reason, we will see an separate warning message + * when processing the G2H event capture-notification, search for: + * INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE. + */ if (min_size < 0) drm_warn(&i915->drm, "Failed to calculate GuC error state capture buffer minimum size: %d!\n", min_size); else if (min_size > buffer_size) - drm_warn(&i915->drm, "GuC error state capture buffer is too small: %d < %d\n", + drm_warn(&i915->drm, "GuC error state capture buffer maybe small: %d < %d\n", buffer_size, min_size); else if (spare_size > buffer_size) - drm_notice(&i915->drm, "GuC error state capture buffer maybe too small: %d < %d (min = %d)\n", - buffer_size, spare_size, min_size); + drm_dbg(&i915->drm, "GuC error state capture buffer lacks spare size: %d < %d (min = %d)\n", + buffer_size, spare_size, min_size); } /* diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c index 25f09a420561..7269eb0bbedf 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c @@ -71,12 +71,73 @@ static bool intel_eval_slpc_support(void *data) return intel_guc_slpc_is_used(guc); } +static int guc_sched_disable_delay_ms_get(void *data, u64 *val) +{ + struct intel_guc *guc = data; + + if (!intel_guc_submission_is_used(guc)) + return -ENODEV; + + *val = (u64)guc->submission_state.sched_disable_delay_ms; + + return 0; +} + +static int guc_sched_disable_delay_ms_set(void *data, u64 val) +{ + struct intel_guc *guc = data; + + if (!intel_guc_submission_is_used(guc)) + return -ENODEV; + + /* clamp to a practical limit, 1 minute is reasonable for a longest delay */ + guc->submission_state.sched_disable_delay_ms = min_t(u64, val, 60000); + + return 0; +} +DEFINE_SIMPLE_ATTRIBUTE(guc_sched_disable_delay_ms_fops, + guc_sched_disable_delay_ms_get, + guc_sched_disable_delay_ms_set, "%lld\n"); + +static int guc_sched_disable_gucid_threshold_get(void *data, u64 *val) +{ + struct intel_guc *guc = data; + + if (!intel_guc_submission_is_used(guc)) + return -ENODEV; + + *val = guc->submission_state.sched_disable_gucid_threshold; + return 0; +} + +static int guc_sched_disable_gucid_threshold_set(void *data, u64 val) +{ + struct intel_guc *guc = data; + + if (!intel_guc_submission_is_used(guc)) + return -ENODEV; + + if (val > intel_guc_sched_disable_gucid_threshold_max(guc)) + guc->submission_state.sched_disable_gucid_threshold = + intel_guc_sched_disable_gucid_threshold_max(guc); + else + guc->submission_state.sched_disable_gucid_threshold = val; + + return 0; +} +DEFINE_SIMPLE_ATTRIBUTE(guc_sched_disable_gucid_threshold_fops, + guc_sched_disable_gucid_threshold_get, + guc_sched_disable_gucid_threshold_set, "%lld\n"); + void intel_guc_debugfs_register(struct intel_guc *guc, struct dentry *root) { static const struct intel_gt_debugfs_file files[] = { { "guc_info", &guc_info_fops, NULL }, { "guc_registered_contexts", &guc_registered_contexts_fops, NULL }, { "guc_slpc_info", &guc_slpc_info_fops, &intel_eval_slpc_support}, + { "guc_sched_disable_delay_ms", &guc_sched_disable_delay_ms_fops, NULL }, + { "guc_sched_disable_gucid_threshold", &guc_sched_disable_gucid_threshold_fops, + NULL }, }; if (!intel_guc_is_supported(guc)) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c index a0372735cddb..5b86b2e286e0 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c @@ -10,12 +10,15 @@ */ #include "gt/intel_gt.h" +#include "gt/intel_gt_mcr.h" #include "gt/intel_gt_regs.h" #include "intel_guc_fw.h" #include "i915_drv.h" -static void guc_prepare_xfer(struct intel_uncore *uncore) +static void guc_prepare_xfer(struct intel_gt *gt) { + struct intel_uncore *uncore = gt->uncore; + u32 shim_flags = GUC_ENABLE_READ_CACHE_LOGIC | GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA | GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA | @@ -35,8 +38,9 @@ static void guc_prepare_xfer(struct intel_uncore *uncore) if (GRAPHICS_VER(uncore->i915) == 9) { /* DOP Clock Gating Enable for GuC clocks */ - intel_uncore_rmw(uncore, GEN7_MISCCPCTL, - 0, GEN8_DOP_CLOCK_GATE_GUC_ENABLE); + intel_gt_mcr_multicast_write(gt, GEN8_MISCCPCTL, + GEN8_DOP_CLOCK_GATE_GUC_ENABLE | + intel_gt_mcr_read_any(gt, GEN8_MISCCPCTL)); /* allows for 5us (in 10ns units) before GT can go to RC6 */ intel_uncore_write(uncore, GUC_ARAT_C6DIS, 0x1FF); @@ -168,7 +172,7 @@ int intel_guc_fw_upload(struct intel_guc *guc) struct intel_uncore *uncore = gt->uncore; int ret; - guc_prepare_xfer(uncore); + guc_prepare_xfer(gt); /* * Note that GuC needs the CSS header plus uKernel code to be copied diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index 323b055e5db9..968ebd79dce7 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -290,6 +290,25 @@ struct guc_update_context_policy { struct guc_klv_generic_dw_t klv[GUC_CONTEXT_POLICIES_KLV_NUM_IDS]; } __packed; +/* Format of the UPDATE_SCHEDULING_POLICIES H2G data packet */ +struct guc_update_scheduling_policy_header { + u32 action; +} __packed; + +/* + * Can't dynmically allocate memory for the scheduling policy KLV because + * it will be sent from within the reset path. Need a fixed size lump on + * the stack instead :(. + * + * Currently, there is only one KLV defined, which has 1 word of KL + 2 words of V. + */ +#define MAX_SCHEDULING_POLICY_SIZE 3 + +struct guc_update_scheduling_policy { + struct guc_update_scheduling_policy_header header; + u32 data[MAX_SCHEDULING_POLICY_SIZE]; +} __packed; + #define GUC_POWER_UNSPECIFIED 0 #define GUC_POWER_D0 1 #define GUC_POWER_D1 2 @@ -298,6 +317,9 @@ struct guc_update_context_policy { /* Scheduling policy settings */ +#define GLOBAL_SCHEDULE_POLICY_RC_YIELD_DURATION 100 /* in ms */ +#define GLOBAL_SCHEDULE_POLICY_RC_YIELD_RATIO 50 /* in percent */ + #define GLOBAL_POLICY_MAX_NUM_WI 15 /* Don't reset an engine upon preemption failure */ @@ -305,6 +327,27 @@ struct guc_update_context_policy { #define GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US 500000 +/* + * GuC converts the timeout to clock ticks internally. Different platforms have + * different GuC clocks. Thus, the maximum value before overflow is platform + * dependent. Current worst case scenario is about 110s. So, the spec says to + * limit to 100s to be safe. + */ +#define GUC_POLICY_MAX_EXEC_QUANTUM_US (100 * 1000 * 1000UL) +#define GUC_POLICY_MAX_PREEMPT_TIMEOUT_US (100 * 1000 * 1000UL) + +static inline u32 guc_policy_max_exec_quantum_ms(void) +{ + BUILD_BUG_ON(GUC_POLICY_MAX_EXEC_QUANTUM_US >= UINT_MAX); + return GUC_POLICY_MAX_EXEC_QUANTUM_US / 1000; +} + +static inline u32 guc_policy_max_preempt_timeout_ms(void) +{ + BUILD_BUG_ON(GUC_POLICY_MAX_PREEMPT_TIMEOUT_US >= UINT_MAX); + return GUC_POLICY_MAX_PREEMPT_TIMEOUT_US / 1000; +} + struct guc_policies { u32 submission_queue_depth[GUC_MAX_ENGINE_CLASSES]; /* In micro seconds. How much time to allow before DPC processing is diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c index 55d3ef93e86f..68331c538b0a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c @@ -16,15 +16,15 @@ #if defined(CONFIG_DRM_I915_DEBUG_GUC) #define GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE SZ_2M #define GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE SZ_16M -#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_4M +#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_1M #elif defined(CONFIG_DRM_I915_DEBUG_GEM) #define GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE SZ_1M #define GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE SZ_2M -#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_4M +#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_1M #else #define GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE SZ_8K #define GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE SZ_64K -#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_2M +#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_1M #endif static void guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index fdd895f73f9f..63464933cbce 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -137,6 +137,17 @@ static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value) return ret > 0 ? -EPROTO : ret; } +static int guc_action_slpc_unset_param(struct intel_guc *guc, u8 id) +{ + u32 request[] = { + GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, + SLPC_EVENT(SLPC_EVENT_PARAMETER_UNSET, 1), + id, + }; + + return intel_guc_send(guc, request, ARRAY_SIZE(request)); +} + static bool slpc_is_running(struct intel_guc_slpc *slpc) { return slpc_get_state(slpc) == SLPC_GLOBAL_STATE_RUNNING; @@ -190,6 +201,15 @@ static int slpc_set_param(struct intel_guc_slpc *slpc, u8 id, u32 value) return ret; } +static int slpc_unset_param(struct intel_guc_slpc *slpc, u8 id) +{ + struct intel_guc *guc = slpc_to_guc(slpc); + + GEM_BUG_ON(id >= SLPC_MAX_PARAM); + + return guc_action_slpc_unset_param(guc, id); +} + static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq) { struct drm_i915_private *i915 = slpc_to_i915(slpc); @@ -263,6 +283,7 @@ int intel_guc_slpc_init(struct intel_guc_slpc *slpc) slpc->max_freq_softlimit = 0; slpc->min_freq_softlimit = 0; + slpc->min_is_rpmax = false; slpc->boost_freq = 0; atomic_set(&slpc->num_waiters, 0); @@ -588,6 +609,39 @@ static int slpc_set_softlimits(struct intel_guc_slpc *slpc) return 0; } +static bool is_slpc_min_freq_rpmax(struct intel_guc_slpc *slpc) +{ + struct drm_i915_private *i915 = slpc_to_i915(slpc); + int slpc_min_freq; + int ret; + + ret = intel_guc_slpc_get_min_freq(slpc, &slpc_min_freq); + if (ret) { + drm_err(&i915->drm, + "Failed to get min freq: (%d)\n", + ret); + return false; + } + + if (slpc_min_freq == SLPC_MAX_FREQ_MHZ) + return true; + else + return false; +} + +static void update_server_min_softlimit(struct intel_guc_slpc *slpc) +{ + /* For server parts, SLPC min will be at RPMax. + * Use min softlimit to clamp it to RP0 instead. + */ + if (!slpc->min_freq_softlimit && + is_slpc_min_freq_rpmax(slpc)) { + slpc->min_is_rpmax = true; + slpc->min_freq_softlimit = slpc->rp0_freq; + (slpc_to_gt(slpc))->defaults.min_freq = slpc->min_freq_softlimit; + } +} + static int slpc_use_fused_rp0(struct intel_guc_slpc *slpc) { /* Force SLPC to used platform rp0 */ @@ -610,6 +664,52 @@ static void slpc_get_rp_values(struct intel_guc_slpc *slpc) slpc->boost_freq = slpc->rp0_freq; } +/** + * intel_guc_slpc_override_gucrc_mode() - override GUCRC mode + * @slpc: pointer to intel_guc_slpc. + * @mode: new value of the mode. + * + * This function will override the GUCRC mode. + * + * Return: 0 on success, non-zero error code on failure. + */ +int intel_guc_slpc_override_gucrc_mode(struct intel_guc_slpc *slpc, u32 mode) +{ + int ret; + struct drm_i915_private *i915 = slpc_to_i915(slpc); + intel_wakeref_t wakeref; + + if (mode >= SLPC_GUCRC_MODE_MAX) + return -EINVAL; + + with_intel_runtime_pm(&i915->runtime_pm, wakeref) { + ret = slpc_set_param(slpc, SLPC_PARAM_PWRGATE_RC_MODE, mode); + if (ret) + drm_err(&i915->drm, + "Override gucrc mode %d failed %d\n", + mode, ret); + } + + return ret; +} + +int intel_guc_slpc_unset_gucrc_mode(struct intel_guc_slpc *slpc) +{ + struct drm_i915_private *i915 = slpc_to_i915(slpc); + intel_wakeref_t wakeref; + int ret = 0; + + with_intel_runtime_pm(&i915->runtime_pm, wakeref) { + ret = slpc_unset_param(slpc, SLPC_PARAM_PWRGATE_RC_MODE); + if (ret) + drm_err(&i915->drm, + "Unsetting gucrc mode failed %d\n", + ret); + } + + return ret; +} + /* * intel_guc_slpc_enable() - Start SLPC * @slpc: pointer to intel_guc_slpc. @@ -647,6 +747,9 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) slpc_get_rp_values(slpc); + /* Handle the case where min=max=RPmax */ + update_server_min_softlimit(slpc); + /* Set SLPC max limit to RP0 */ ret = slpc_use_fused_rp0(slpc); if (unlikely(ret)) { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h index 82a98f78f96c..17ed515f6a85 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h @@ -9,6 +9,8 @@ #include "intel_guc_submission.h" #include "intel_guc_slpc_types.h" +#define SLPC_MAX_FREQ_MHZ 4250 + struct intel_gt; struct drm_printer; @@ -42,5 +44,7 @@ int intel_guc_slpc_set_media_ratio_mode(struct intel_guc_slpc *slpc, u32 val); void intel_guc_pm_intrmsk_enable(struct intel_gt *gt); void intel_guc_slpc_boost(struct intel_guc_slpc *slpc); void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc); +int intel_guc_slpc_unset_gucrc_mode(struct intel_guc_slpc *slpc); +int intel_guc_slpc_override_gucrc_mode(struct intel_guc_slpc *slpc, u32 mode); #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h index 73d208123528..a6ef53b04e04 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h @@ -19,6 +19,9 @@ struct intel_guc_slpc { bool supported; bool selected; + /* Indicates this is a server part */ + bool min_is_rpmax; + /* platform frequency limits */ u32 min_freq; u32 rp0_freq; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 1db59eeb34db..4ccb29f9ac55 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -6,6 +6,7 @@ #include <linux/circ_buf.h> #include "gem/i915_gem_context.h" +#include "gem/i915_gem_lmem.h" #include "gt/gen8_engine_cs.h" #include "gt/intel_breadcrumbs.h" #include "gt/intel_context.h" @@ -65,7 +66,13 @@ * corresponding G2H returns indicating the scheduling disable operation has * completed it is safe to unpin the context. While a disable is in flight it * isn't safe to resubmit the context so a fence is used to stall all future - * requests of that context until the G2H is returned. + * requests of that context until the G2H is returned. Because this interaction + * with the GuC takes a non-zero amount of time we delay the disabling of + * scheduling after the pin count goes to zero by a configurable period of time + * (see SCHED_DISABLE_DELAY_MS). The thought is this gives the user a window of + * time to resubmit something on the context before doing this costly operation. + * This delay is only done if the context isn't closed and the guc_id usage is + * less than a threshold (see NUM_SCHED_DISABLE_GUC_IDS_THRESHOLD). * * Context deregistration: * Before a context can be destroyed or if we steal its guc_id we must @@ -163,7 +170,8 @@ guc_create_parallel(struct intel_engine_cs **engines, #define SCHED_STATE_PENDING_ENABLE BIT(5) #define SCHED_STATE_REGISTERED BIT(6) #define SCHED_STATE_POLICY_REQUIRED BIT(7) -#define SCHED_STATE_BLOCKED_SHIFT 8 +#define SCHED_STATE_CLOSED BIT(8) +#define SCHED_STATE_BLOCKED_SHIFT 9 #define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT) #define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT) @@ -173,12 +181,20 @@ static inline void init_sched_state(struct intel_context *ce) ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK; } +/* + * Kernel contexts can have SCHED_STATE_REGISTERED after suspend. + * A context close can race with the submission path, so SCHED_STATE_CLOSED + * can be set immediately before we try to register. + */ +#define SCHED_STATE_VALID_INIT \ + (SCHED_STATE_BLOCKED_MASK | \ + SCHED_STATE_CLOSED | \ + SCHED_STATE_REGISTERED) + __maybe_unused static bool sched_state_is_init(struct intel_context *ce) { - /* Kernel contexts can have SCHED_STATE_REGISTERED after suspend. */ - return !(ce->guc_state.sched_state & - ~(SCHED_STATE_BLOCKED_MASK | SCHED_STATE_REGISTERED)); + return !(ce->guc_state.sched_state & ~SCHED_STATE_VALID_INIT); } static inline bool @@ -319,6 +335,17 @@ static inline void clr_context_policy_required(struct intel_context *ce) ce->guc_state.sched_state &= ~SCHED_STATE_POLICY_REQUIRED; } +static inline bool context_close_done(struct intel_context *ce) +{ + return ce->guc_state.sched_state & SCHED_STATE_CLOSED; +} + +static inline void set_context_close_done(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state |= SCHED_STATE_CLOSED; +} + static inline u32 context_blocked(struct intel_context *ce) { return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >> @@ -343,25 +370,6 @@ static inline void decr_context_blocked(struct intel_context *ce) ce->guc_state.sched_state -= SCHED_STATE_BLOCKED; } -static inline bool context_has_committed_requests(struct intel_context *ce) -{ - return !!ce->guc_state.number_committed_requests; -} - -static inline void incr_context_committed_requests(struct intel_context *ce) -{ - lockdep_assert_held(&ce->guc_state.lock); - ++ce->guc_state.number_committed_requests; - GEM_BUG_ON(ce->guc_state.number_committed_requests < 0); -} - -static inline void decr_context_committed_requests(struct intel_context *ce) -{ - lockdep_assert_held(&ce->guc_state.lock); - --ce->guc_state.number_committed_requests; - GEM_BUG_ON(ce->guc_state.number_committed_requests < 0); -} - static struct intel_context * request_to_scheduling_context(struct i915_request *rq) { @@ -1067,6 +1075,12 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) xa_unlock(&guc->context_lookup); + if (test_bit(CONTEXT_GUC_INIT, &ce->flags) && + (cancel_delayed_work(&ce->guc_state.sched_disable_delay_work))) { + /* successful cancel so jump straight to close it */ + intel_context_sched_disable_unpin(ce); + } + spin_lock(&ce->guc_state.lock); /* @@ -1994,6 +2008,9 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) if (unlikely(ret < 0)) return ret; + if (!intel_context_is_parent(ce)) + ++guc->submission_state.guc_ids_in_use; + ce->guc_id.id = ret; return 0; } @@ -2003,14 +2020,16 @@ static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce) GEM_BUG_ON(intel_context_is_child(ce)); if (!context_guc_id_invalid(ce)) { - if (intel_context_is_parent(ce)) + if (intel_context_is_parent(ce)) { bitmap_release_region(guc->submission_state.guc_ids_bitmap, ce->guc_id.id, order_base_2(ce->parallel.number_children + 1)); - else + } else { + --guc->submission_state.guc_ids_in_use; ida_simple_remove(&guc->submission_state.guc_ids, ce->guc_id.id); + } clr_ctx_id_mapping(guc, ce->guc_id.id); set_context_guc_id_invalid(ce); } @@ -2429,6 +2448,10 @@ static int guc_context_policy_init_v70(struct intel_context *ce, bool loop) int ret; /* NB: For both of these, zero means disabled. */ + GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000, + execution_quantum)); + GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000, + preemption_timeout)); execution_quantum = engine->props.timeslice_duration_ms * 1000; preemption_timeout = engine->props.preempt_timeout_ms * 1000; @@ -2462,6 +2485,10 @@ static void guc_context_policy_init_v69(struct intel_engine_cs *engine, desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69; /* NB: For both of these, zero means disabled. */ + GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000, + desc->execution_quantum)); + GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000, + desc->preemption_timeout)); desc->execution_quantum = engine->props.timeslice_duration_ms * 1000; desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000; } @@ -2998,41 +3025,104 @@ guc_context_revoke(struct intel_context *ce, struct i915_request *rq, } } -static void guc_context_sched_disable(struct intel_context *ce) +static void do_sched_disable(struct intel_guc *guc, struct intel_context *ce, + unsigned long flags) + __releases(ce->guc_state.lock) { - struct intel_guc *guc = ce_to_guc(ce); - unsigned long flags; struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm; intel_wakeref_t wakeref; u16 guc_id; + lockdep_assert_held(&ce->guc_state.lock); + guc_id = prep_context_pending_disable(ce); + + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + + with_intel_runtime_pm(runtime_pm, wakeref) + __guc_context_sched_disable(guc, ce, guc_id); +} + +static bool bypass_sched_disable(struct intel_guc *guc, + struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); GEM_BUG_ON(intel_context_is_child(ce)); + if (submission_disabled(guc) || context_guc_id_invalid(ce) || + !ctx_id_mapped(guc, ce->guc_id.id)) { + clr_context_enabled(ce); + return true; + } + + return !context_enabled(ce); +} + +static void __delay_sched_disable(struct work_struct *wrk) +{ + struct intel_context *ce = + container_of(wrk, typeof(*ce), guc_state.sched_disable_delay_work.work); + struct intel_guc *guc = ce_to_guc(ce); + unsigned long flags; + spin_lock_irqsave(&ce->guc_state.lock, flags); + if (bypass_sched_disable(guc, ce)) { + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + intel_context_sched_disable_unpin(ce); + } else { + do_sched_disable(guc, ce, flags); + } +} + +static bool guc_id_pressure(struct intel_guc *guc, struct intel_context *ce) +{ /* - * We have to check if the context has been disabled by another thread, - * check if submssion has been disabled to seal a race with reset and - * finally check if any more requests have been committed to the - * context ensursing that a request doesn't slip through the - * 'context_pending_disable' fence. + * parent contexts are perma-pinned, if we are unpinning do schedule + * disable immediately. */ - if (unlikely(!context_enabled(ce) || submission_disabled(guc) || - context_has_committed_requests(ce))) { - clr_context_enabled(ce); + if (intel_context_is_parent(ce)) + return true; + + /* + * If we are beyond the threshold for avail guc_ids, do schedule disable immediately. + */ + return guc->submission_state.guc_ids_in_use > + guc->submission_state.sched_disable_gucid_threshold; +} + +static void guc_context_sched_disable(struct intel_context *ce) +{ + struct intel_guc *guc = ce_to_guc(ce); + u64 delay = guc->submission_state.sched_disable_delay_ms; + unsigned long flags; + + spin_lock_irqsave(&ce->guc_state.lock, flags); + + if (bypass_sched_disable(guc, ce)) { spin_unlock_irqrestore(&ce->guc_state.lock, flags); - goto unpin; + intel_context_sched_disable_unpin(ce); + } else if (!intel_context_is_closed(ce) && !guc_id_pressure(guc, ce) && + delay) { + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + mod_delayed_work(system_unbound_wq, + &ce->guc_state.sched_disable_delay_work, + msecs_to_jiffies(delay)); + } else { + do_sched_disable(guc, ce, flags); } - guc_id = prep_context_pending_disable(ce); +} - spin_unlock_irqrestore(&ce->guc_state.lock, flags); +static void guc_context_close(struct intel_context *ce) +{ + unsigned long flags; - with_intel_runtime_pm(runtime_pm, wakeref) - __guc_context_sched_disable(guc, ce, guc_id); + if (test_bit(CONTEXT_GUC_INIT, &ce->flags) && + cancel_delayed_work(&ce->guc_state.sched_disable_delay_work)) + __delay_sched_disable(&ce->guc_state.sched_disable_delay_work.work); - return; -unpin: - intel_context_sched_disable_unpin(ce); + spin_lock_irqsave(&ce->guc_state.lock, flags); + set_context_close_done(ce); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); } static inline void guc_lrc_desc_unpin(struct intel_context *ce) @@ -3071,7 +3161,6 @@ static void __guc_context_destroy(struct intel_context *ce) ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_HIGH] || ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] || ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_NORMAL]); - GEM_BUG_ON(ce->guc_state.number_committed_requests); lrc_fini(ce); intel_context_fini(ce); @@ -3340,8 +3429,6 @@ static void remove_from_context(struct i915_request *rq) guc_prio_fini(rq, ce); - decr_context_committed_requests(ce); - spin_unlock_irq(&ce->guc_state.lock); atomic_dec(&ce->guc_id.ref); @@ -3351,6 +3438,8 @@ static void remove_from_context(struct i915_request *rq) static const struct intel_context_ops guc_context_ops = { .alloc = guc_context_alloc, + .close = guc_context_close, + .pre_pin = guc_context_pre_pin, .pin = guc_context_pin, .unpin = guc_context_unpin, @@ -3433,6 +3522,10 @@ static void guc_context_init(struct intel_context *ce) rcu_read_unlock(); ce->guc_state.prio = map_i915_prio_to_guc_prio(prio); + + INIT_DELAYED_WORK(&ce->guc_state.sched_disable_delay_work, + __delay_sched_disable); + set_bit(CONTEXT_GUC_INIT, &ce->flags); } @@ -3471,6 +3564,26 @@ static int guc_request_alloc(struct i915_request *rq) guc_context_init(ce); /* + * If the context gets closed while the execbuf is ongoing, the context + * close code will race with the below code to cancel the delayed work. + * If the context close wins the race and cancels the work, it will + * immediately call the sched disable (see guc_context_close), so there + * is a chance we can get past this check while the sched_disable code + * is being executed. To make sure that code completes before we check + * the status further down, we wait for the close process to complete. + * Else, this code path could send a request down thinking that the + * context is still in a schedule-enable mode while the GuC ends up + * dropping the request completely because the disable did go from the + * context_close path right to GuC just prior. In the event the CT is + * full, we could potentially need to wait up to 1.5 seconds. + */ + if (cancel_delayed_work_sync(&ce->guc_state.sched_disable_delay_work)) + intel_context_sched_disable_unpin(ce); + else if (intel_context_is_closed(ce)) + if (wait_for(context_close_done(ce), 1500)) + drm_warn(&guc_to_gt(guc)->i915->drm, + "timed out waiting on context sched close before realloc\n"); + /* * Call pin_guc_id here rather than in the pinning step as with * dma_resv, contexts can be repeatedly pinned / unpinned trashing the * guc_id and creating horrible race conditions. This is especially bad @@ -3524,7 +3637,6 @@ out: list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences); } - incr_context_committed_requests(ce); spin_unlock_irqrestore(&ce->guc_state.lock, flags); return 0; @@ -3600,6 +3712,8 @@ static int guc_virtual_context_alloc(struct intel_context *ce) static const struct intel_context_ops virtual_guc_context_ops = { .alloc = guc_virtual_context_alloc, + .close = guc_context_close, + .pre_pin = guc_virtual_context_pre_pin, .pin = guc_virtual_context_pin, .unpin = guc_virtual_context_unpin, @@ -3689,6 +3803,8 @@ static void guc_child_context_destroy(struct kref *kref) static const struct intel_context_ops virtual_parent_context_ops = { .alloc = guc_virtual_context_alloc, + .close = guc_context_close, + .pre_pin = guc_context_pre_pin, .pin = guc_parent_context_pin, .unpin = guc_parent_context_unpin, @@ -4093,7 +4209,7 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine) engine->emit_bb_start = gen8_emit_bb_start; if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) - engine->emit_bb_start = gen125_emit_bb_start; + engine->emit_bb_start = xehp_emit_bb_start; } static void rcs_submission_override(struct intel_engine_cs *engine) @@ -4177,6 +4293,98 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine) return 0; } +struct scheduling_policy { + /* internal data */ + u32 max_words, num_words; + u32 count; + /* API data */ + struct guc_update_scheduling_policy h2g; +}; + +static u32 __guc_scheduling_policy_action_size(struct scheduling_policy *policy) +{ + u32 *start = (void *)&policy->h2g; + u32 *end = policy->h2g.data + policy->num_words; + size_t delta = end - start; + + return delta; +} + +static struct scheduling_policy *__guc_scheduling_policy_start_klv(struct scheduling_policy *policy) +{ + policy->h2g.header.action = INTEL_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV; + policy->max_words = ARRAY_SIZE(policy->h2g.data); + policy->num_words = 0; + policy->count = 0; + + return policy; +} + +static void __guc_scheduling_policy_add_klv(struct scheduling_policy *policy, + u32 action, u32 *data, u32 len) +{ + u32 *klv_ptr = policy->h2g.data + policy->num_words; + + GEM_BUG_ON((policy->num_words + 1 + len) > policy->max_words); + *(klv_ptr++) = FIELD_PREP(GUC_KLV_0_KEY, action) | + FIELD_PREP(GUC_KLV_0_LEN, len); + memcpy(klv_ptr, data, sizeof(u32) * len); + policy->num_words += 1 + len; + policy->count++; +} + +static int __guc_action_set_scheduling_policies(struct intel_guc *guc, + struct scheduling_policy *policy) +{ + int ret; + + ret = intel_guc_send(guc, (u32 *)&policy->h2g, + __guc_scheduling_policy_action_size(policy)); + if (ret < 0) + return ret; + + if (ret != policy->count) { + drm_warn(&guc_to_gt(guc)->i915->drm, "GuC global scheduler policy processed %d of %d KLVs!", + ret, policy->count); + if (ret > policy->count) + return -EPROTO; + } + + return 0; +} + +static int guc_init_global_schedule_policy(struct intel_guc *guc) +{ + struct scheduling_policy policy; + struct intel_gt *gt = guc_to_gt(guc); + intel_wakeref_t wakeref; + int ret = 0; + + if (GET_UC_VER(guc) < MAKE_UC_VER(70, 3, 0)) + return 0; + + __guc_scheduling_policy_start_klv(&policy); + + with_intel_runtime_pm(>->i915->runtime_pm, wakeref) { + u32 yield[] = { + GLOBAL_SCHEDULE_POLICY_RC_YIELD_DURATION, + GLOBAL_SCHEDULE_POLICY_RC_YIELD_RATIO, + }; + + __guc_scheduling_policy_add_klv(&policy, + GUC_SCHEDULING_POLICIES_KLV_ID_RENDER_COMPUTE_YIELD, + yield, ARRAY_SIZE(yield)); + + ret = __guc_action_set_scheduling_policies(guc, &policy); + if (ret) + i915_probe_error(gt->i915, + "Failed to configure global scheduling policies: %pe!\n", + ERR_PTR(ret)); + } + + return ret; +} + void intel_guc_submission_enable(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); @@ -4189,6 +4397,7 @@ void intel_guc_submission_enable(struct intel_guc *guc) guc_init_lrc_mapping(guc); guc_init_engine_stats(guc); + guc_init_global_schedule_policy(guc); } void intel_guc_submission_disable(struct intel_guc *guc) @@ -4219,6 +4428,26 @@ static bool __guc_submission_selected(struct intel_guc *guc) return i915->params.enable_guc & ENABLE_GUC_SUBMISSION; } +int intel_guc_sched_disable_gucid_threshold_max(struct intel_guc *guc) +{ + return guc->submission_state.num_guc_ids - NUMBER_MULTI_LRC_GUC_ID(guc); +} + +/* + * This default value of 33 milisecs (+1 milisec round up) ensures 30fps or higher + * workloads are able to enjoy the latency reduction when delaying the schedule-disable + * operation. This matches the 30fps game-render + encode (real world) workload this + * knob was tested against. + */ +#define SCHED_DISABLE_DELAY_MS 34 + +/* + * A threshold of 75% is a reasonable starting point considering that real world apps + * generally don't get anywhere near this. + */ +#define NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(__guc) \ + (((intel_guc_sched_disable_gucid_threshold_max(guc)) * 3) / 4) + void intel_guc_submission_init_early(struct intel_guc *guc) { xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ); @@ -4235,7 +4464,10 @@ void intel_guc_submission_init_early(struct intel_guc *guc) spin_lock_init(&guc->timestamp.lock); INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping); + guc->submission_state.sched_disable_delay_ms = SCHED_DISABLE_DELAY_MS; guc->submission_state.num_guc_ids = GUC_MAX_CONTEXT_ID; + guc->submission_state.sched_disable_gucid_threshold = + NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(guc); guc->submission_supported = __guc_submission_supported(guc); guc->submission_selected = __guc_submission_selected(guc); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index 3bb8838e325a..fbc8bae14f76 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -10,6 +10,9 @@ #include "intel_huc.h" #include "i915_drv.h" +#include <linux/device/bus.h> +#include <linux/mei_aux.h> + /** * DOC: HuC * @@ -42,6 +45,172 @@ * HuC-specific commands. */ +/* + * MEI-GSC load is an async process. The probing of the exposed aux device + * (see intel_gsc.c) usually happens a few seconds after i915 probe, depending + * on when the kernel schedules it. Unless something goes terribly wrong, we're + * guaranteed for this to happen during boot, so the big timeout is a safety net + * that we never expect to need. + * MEI-PXP + HuC load usually takes ~300ms, but if the GSC needs to be resumed + * and/or reset, this can take longer. Note that the kernel might schedule + * other work between the i915 init/resume and the MEI one, which can add to + * the delay. + */ +#define GSC_INIT_TIMEOUT_MS 10000 +#define PXP_INIT_TIMEOUT_MS 5000 + +static int sw_fence_dummy_notify(struct i915_sw_fence *sf, + enum i915_sw_fence_notify state) +{ + return NOTIFY_DONE; +} + +static void __delayed_huc_load_complete(struct intel_huc *huc) +{ + if (!i915_sw_fence_done(&huc->delayed_load.fence)) + i915_sw_fence_complete(&huc->delayed_load.fence); +} + +static void delayed_huc_load_complete(struct intel_huc *huc) +{ + hrtimer_cancel(&huc->delayed_load.timer); + __delayed_huc_load_complete(huc); +} + +static void __gsc_init_error(struct intel_huc *huc) +{ + huc->delayed_load.status = INTEL_HUC_DELAYED_LOAD_ERROR; + __delayed_huc_load_complete(huc); +} + +static void gsc_init_error(struct intel_huc *huc) +{ + hrtimer_cancel(&huc->delayed_load.timer); + __gsc_init_error(huc); +} + +static void gsc_init_done(struct intel_huc *huc) +{ + hrtimer_cancel(&huc->delayed_load.timer); + + /* MEI-GSC init is done, now we wait for MEI-PXP to bind */ + huc->delayed_load.status = INTEL_HUC_WAITING_ON_PXP; + if (!i915_sw_fence_done(&huc->delayed_load.fence)) + hrtimer_start(&huc->delayed_load.timer, + ms_to_ktime(PXP_INIT_TIMEOUT_MS), + HRTIMER_MODE_REL); +} + +static enum hrtimer_restart huc_delayed_load_timer_callback(struct hrtimer *hrtimer) +{ + struct intel_huc *huc = container_of(hrtimer, struct intel_huc, delayed_load.timer); + + if (!intel_huc_is_authenticated(huc)) { + if (huc->delayed_load.status == INTEL_HUC_WAITING_ON_GSC) + drm_notice(&huc_to_gt(huc)->i915->drm, + "timed out waiting for MEI GSC init to load HuC\n"); + else if (huc->delayed_load.status == INTEL_HUC_WAITING_ON_PXP) + drm_notice(&huc_to_gt(huc)->i915->drm, + "timed out waiting for MEI PXP init to load HuC\n"); + else + MISSING_CASE(huc->delayed_load.status); + + __gsc_init_error(huc); + } + + return HRTIMER_NORESTART; +} + +static void huc_delayed_load_start(struct intel_huc *huc) +{ + ktime_t delay; + + GEM_BUG_ON(intel_huc_is_authenticated(huc)); + + /* + * On resume we don't have to wait for MEI-GSC to be re-probed, but we + * do need to wait for MEI-PXP to reset & re-bind + */ + switch (huc->delayed_load.status) { + case INTEL_HUC_WAITING_ON_GSC: + delay = ms_to_ktime(GSC_INIT_TIMEOUT_MS); + break; + case INTEL_HUC_WAITING_ON_PXP: + delay = ms_to_ktime(PXP_INIT_TIMEOUT_MS); + break; + default: + gsc_init_error(huc); + return; + } + + /* + * This fence is always complete unless we're waiting for the + * GSC device to come up to load the HuC. We arm the fence here + * and complete it when we confirm that the HuC is loaded from + * the PXP bind callback. + */ + GEM_BUG_ON(!i915_sw_fence_done(&huc->delayed_load.fence)); + i915_sw_fence_fini(&huc->delayed_load.fence); + i915_sw_fence_reinit(&huc->delayed_load.fence); + i915_sw_fence_await(&huc->delayed_load.fence); + i915_sw_fence_commit(&huc->delayed_load.fence); + + hrtimer_start(&huc->delayed_load.timer, delay, HRTIMER_MODE_REL); +} + +static int gsc_notifier(struct notifier_block *nb, unsigned long action, void *data) +{ + struct device *dev = data; + struct intel_huc *huc = container_of(nb, struct intel_huc, delayed_load.nb); + struct intel_gsc_intf *intf = &huc_to_gt(huc)->gsc.intf[0]; + + if (!intf->adev || &intf->adev->aux_dev.dev != dev) + return 0; + + switch (action) { + case BUS_NOTIFY_BOUND_DRIVER: /* mei driver bound to aux device */ + gsc_init_done(huc); + break; + + case BUS_NOTIFY_DRIVER_NOT_BOUND: /* mei driver fails to be bound */ + case BUS_NOTIFY_UNBIND_DRIVER: /* mei driver about to be unbound */ + drm_info(&huc_to_gt(huc)->i915->drm, + "mei driver not bound, disabling HuC load\n"); + gsc_init_error(huc); + break; + } + + return 0; +} + +void intel_huc_register_gsc_notifier(struct intel_huc *huc, struct bus_type *bus) +{ + int ret; + + if (!intel_huc_is_loaded_by_gsc(huc)) + return; + + huc->delayed_load.nb.notifier_call = gsc_notifier; + ret = bus_register_notifier(bus, &huc->delayed_load.nb); + if (ret) { + drm_err(&huc_to_gt(huc)->i915->drm, + "failed to register GSC notifier\n"); + huc->delayed_load.nb.notifier_call = NULL; + gsc_init_error(huc); + } +} + +void intel_huc_unregister_gsc_notifier(struct intel_huc *huc, struct bus_type *bus) +{ + if (!huc->delayed_load.nb.notifier_call) + return; + + delayed_huc_load_complete(huc); + + bus_unregister_notifier(bus, &huc->delayed_load.nb); + huc->delayed_load.nb.notifier_call = NULL; +} + void intel_huc_init_early(struct intel_huc *huc) { struct drm_i915_private *i915 = huc_to_gt(huc)->i915; @@ -57,6 +226,17 @@ void intel_huc_init_early(struct intel_huc *huc) huc->status.mask = HUC_FW_VERIFIED; huc->status.value = HUC_FW_VERIFIED; } + + /* + * Initialize fence to be complete as this is expected to be complete + * unless there is a delayed HuC reload in progress. + */ + i915_sw_fence_init(&huc->delayed_load.fence, + sw_fence_dummy_notify); + i915_sw_fence_commit(&huc->delayed_load.fence); + + hrtimer_init(&huc->delayed_load.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + huc->delayed_load.timer.function = huc_delayed_load_timer_callback; } #define HUC_LOAD_MODE_STRING(x) (x ? "GSC" : "legacy") @@ -113,6 +293,7 @@ int intel_huc_init(struct intel_huc *huc) return 0; out: + intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_INIT_FAIL); drm_info(&i915->drm, "HuC init failed with %d\n", err); return err; } @@ -122,9 +303,50 @@ void intel_huc_fini(struct intel_huc *huc) if (!intel_uc_fw_is_loadable(&huc->fw)) return; + delayed_huc_load_complete(huc); + + i915_sw_fence_fini(&huc->delayed_load.fence); intel_uc_fw_fini(&huc->fw); } +void intel_huc_suspend(struct intel_huc *huc) +{ + if (!intel_uc_fw_is_loadable(&huc->fw)) + return; + + /* + * in the unlikely case that we're suspending before the GSC has + * completed its loading sequence, just stop waiting. We'll restart + * on resume. + */ + delayed_huc_load_complete(huc); +} + +int intel_huc_wait_for_auth_complete(struct intel_huc *huc) +{ + struct intel_gt *gt = huc_to_gt(huc); + int ret; + + ret = __intel_wait_for_register(gt->uncore, + huc->status.reg, + huc->status.mask, + huc->status.value, + 2, 50, NULL); + + /* mark the load process as complete even if the wait failed */ + delayed_huc_load_complete(huc); + + if (ret) { + drm_err(>->i915->drm, "HuC: Firmware not verified %d\n", ret); + intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_LOAD_FAIL); + return ret; + } + + intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_RUNNING); + drm_info(>->i915->drm, "HuC authenticated\n"); + return 0; +} + /** * intel_huc_auth() - Authenticate HuC uCode * @huc: intel_huc structure @@ -161,27 +383,18 @@ int intel_huc_auth(struct intel_huc *huc) } /* Check authentication status, it should be done by now */ - ret = __intel_wait_for_register(gt->uncore, - huc->status.reg, - huc->status.mask, - huc->status.value, - 2, 50, NULL); - if (ret) { - DRM_ERROR("HuC: Firmware not verified %d\n", ret); + ret = intel_huc_wait_for_auth_complete(huc); + if (ret) goto fail; - } - intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_RUNNING); - drm_info(>->i915->drm, "HuC authenticated\n"); return 0; fail: i915_probe_error(gt->i915, "HuC: Authentication failed %d\n", ret); - intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_LOAD_FAIL); return ret; } -static bool huc_is_authenticated(struct intel_huc *huc) +bool intel_huc_is_authenticated(struct intel_huc *huc) { struct intel_gt *gt = huc_to_gt(huc); intel_wakeref_t wakeref; @@ -200,13 +413,8 @@ static bool huc_is_authenticated(struct intel_huc *huc) * This function reads status register to verify if HuC * firmware was successfully loaded. * - * Returns: - * * -ENODEV if HuC is not present on this platform, - * * -EOPNOTSUPP if HuC firmware is disabled, - * * -ENOPKG if HuC firmware was not installed, - * * -ENOEXEC if HuC firmware is invalid or mismatched, - * * 0 if HuC firmware is not running, - * * 1 if HuC firmware is authenticated and running. + * The return values match what is expected for the I915_PARAM_HUC_STATUS + * getparam. */ int intel_huc_check_status(struct intel_huc *huc) { @@ -219,11 +427,21 @@ int intel_huc_check_status(struct intel_huc *huc) return -ENOPKG; case INTEL_UC_FIRMWARE_ERROR: return -ENOEXEC; + case INTEL_UC_FIRMWARE_INIT_FAIL: + return -ENOMEM; + case INTEL_UC_FIRMWARE_LOAD_FAIL: + return -EIO; default: break; } - return huc_is_authenticated(huc); + return intel_huc_is_authenticated(huc); +} + +static bool huc_has_delayed_load(struct intel_huc *huc) +{ + return intel_huc_is_loaded_by_gsc(huc) && + (huc->delayed_load.status != INTEL_HUC_DELAYED_LOAD_ERROR); } void intel_huc_update_auth_status(struct intel_huc *huc) @@ -231,9 +449,11 @@ void intel_huc_update_auth_status(struct intel_huc *huc) if (!intel_uc_fw_is_loadable(&huc->fw)) return; - if (huc_is_authenticated(huc)) + if (intel_huc_is_authenticated(huc)) intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_RUNNING); + else if (huc_has_delayed_load(huc)) + huc_delayed_load_start(huc); } /** diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.h b/drivers/gpu/drm/i915/gt/uc/intel_huc.h index d7e25b6e879e..52db03620c60 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.h @@ -7,9 +7,21 @@ #define _INTEL_HUC_H_ #include "i915_reg_defs.h" +#include "i915_sw_fence.h" #include "intel_uc_fw.h" #include "intel_huc_fw.h" +#include <linux/notifier.h> +#include <linux/hrtimer.h> + +struct bus_type; + +enum intel_huc_delayed_load_status { + INTEL_HUC_WAITING_ON_GSC = 0, + INTEL_HUC_WAITING_ON_PXP, + INTEL_HUC_DELAYED_LOAD_ERROR, +}; + struct intel_huc { /* Generic uC firmware management */ struct intel_uc_fw fw; @@ -20,14 +32,27 @@ struct intel_huc { u32 mask; u32 value; } status; + + struct { + struct i915_sw_fence fence; + struct hrtimer timer; + struct notifier_block nb; + enum intel_huc_delayed_load_status status; + } delayed_load; }; void intel_huc_init_early(struct intel_huc *huc); int intel_huc_init(struct intel_huc *huc); void intel_huc_fini(struct intel_huc *huc); +void intel_huc_suspend(struct intel_huc *huc); int intel_huc_auth(struct intel_huc *huc); +int intel_huc_wait_for_auth_complete(struct intel_huc *huc); int intel_huc_check_status(struct intel_huc *huc); void intel_huc_update_auth_status(struct intel_huc *huc); +bool intel_huc_is_authenticated(struct intel_huc *huc); + +void intel_huc_register_gsc_notifier(struct intel_huc *huc, struct bus_type *bus); +void intel_huc_unregister_gsc_notifier(struct intel_huc *huc, struct bus_type *bus); static inline int intel_huc_sanitize(struct intel_huc *huc) { @@ -56,6 +81,12 @@ static inline bool intel_huc_is_loaded_by_gsc(const struct intel_huc *huc) return huc->fw.loaded_via_gsc; } +static inline bool intel_huc_wait_required(struct intel_huc *huc) +{ + return intel_huc_is_used(huc) && intel_huc_is_loaded_by_gsc(huc) && + !intel_huc_is_authenticated(huc); +} + void intel_huc_load_status(struct intel_huc *huc, struct drm_printer *p); #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c index 9d6ab1e01639..4f246416db17 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c @@ -3,9 +3,43 @@ * Copyright © 2014-2019 Intel Corporation */ +#include "gt/intel_gsc.h" #include "gt/intel_gt.h" +#include "intel_huc.h" #include "intel_huc_fw.h" #include "i915_drv.h" +#include "pxp/intel_pxp_huc.h" + +int intel_huc_fw_load_and_auth_via_gsc(struct intel_huc *huc) +{ + int ret; + + if (!intel_huc_is_loaded_by_gsc(huc)) + return -ENODEV; + + if (!intel_uc_fw_is_loadable(&huc->fw)) + return -ENOEXEC; + + /* + * If we abort a suspend, HuC might still be loaded when the mei + * component gets re-bound and this function called again. If so, just + * mark the HuC as loaded. + */ + if (intel_huc_is_authenticated(huc)) { + intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_RUNNING); + return 0; + } + + GEM_WARN_ON(intel_uc_fw_is_loaded(&huc->fw)); + + ret = intel_pxp_huc_load_and_auth(&huc_to_gt(huc)->pxp); + if (ret) + return ret; + + intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_TRANSFERRED); + + return intel_huc_wait_for_auth_complete(huc); +} /** * intel_huc_fw_upload() - load HuC uCode to device via DMA transfer diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h index 12f264ee3e0b..db42e238b45f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h @@ -8,6 +8,7 @@ struct intel_huc; +int intel_huc_fw_load_and_auth_via_gsc(struct intel_huc *huc); int intel_huc_fw_upload(struct intel_huc *huc); #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index b91ad4aede1f..de2843dc1307 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -93,7 +93,8 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, fw_def(BROXTON, 0, guc_mmp(bxt, 70, 1, 1)) \ fw_def(SKYLAKE, 0, guc_mmp(skl, 70, 1, 1)) -#define INTEL_HUC_FIRMWARE_DEFS(fw_def, huc_raw, huc_mmp) \ +#define INTEL_HUC_FIRMWARE_DEFS(fw_def, huc_raw, huc_mmp, huc_gsc) \ + fw_def(DG2, 0, huc_gsc(dg2)) \ fw_def(ALDERLAKE_P, 0, huc_raw(tgl)) \ fw_def(ALDERLAKE_P, 0, huc_mmp(tgl, 7, 9, 3)) \ fw_def(ALDERLAKE_S, 0, huc_raw(tgl)) \ @@ -141,6 +142,9 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, #define MAKE_HUC_FW_PATH_BLANK(prefix_) \ __MAKE_UC_FW_PATH_BLANK(prefix_, "_huc") +#define MAKE_HUC_FW_PATH_GSC(prefix_) \ + __MAKE_UC_FW_PATH_BLANK(prefix_, "_huc_gsc") + #define MAKE_HUC_FW_PATH_MMP(prefix_, major_, minor_, patch_) \ __MAKE_UC_FW_PATH_MMP(prefix_, "_huc_", major_, minor_, patch_) @@ -153,7 +157,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, MODULE_FIRMWARE(uc_); INTEL_GUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH_MAJOR, MAKE_GUC_FW_PATH_MMP) -INTEL_HUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_HUC_FW_PATH_BLANK, MAKE_HUC_FW_PATH_MMP) +INTEL_HUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_HUC_FW_PATH_BLANK, MAKE_HUC_FW_PATH_MMP, MAKE_HUC_FW_PATH_GSC) /* * The next expansion of the table macros (in __uc_fw_auto_select below) provides @@ -168,6 +172,7 @@ struct __packed uc_fw_blob { u8 major; u8 minor; u8 patch; + bool loaded_via_gsc; }; #define UC_FW_BLOB_BASE(major_, minor_, patch_, path_) \ @@ -176,16 +181,16 @@ struct __packed uc_fw_blob { .patch = patch_, \ .path = path_, -#define UC_FW_BLOB_NEW(major_, minor_, patch_, path_) \ +#define UC_FW_BLOB_NEW(major_, minor_, patch_, gsc_, path_) \ { UC_FW_BLOB_BASE(major_, minor_, patch_, path_) \ - .legacy = false } + .legacy = false, .loaded_via_gsc = gsc_ } #define UC_FW_BLOB_OLD(major_, minor_, patch_, path_) \ { UC_FW_BLOB_BASE(major_, minor_, patch_, path_) \ .legacy = true } #define GUC_FW_BLOB(prefix_, major_, minor_) \ - UC_FW_BLOB_NEW(major_, minor_, 0, \ + UC_FW_BLOB_NEW(major_, minor_, 0, false, \ MAKE_GUC_FW_PATH_MAJOR(prefix_, major_, minor_)) #define GUC_FW_BLOB_MMP(prefix_, major_, minor_, patch_) \ @@ -193,12 +198,15 @@ struct __packed uc_fw_blob { MAKE_GUC_FW_PATH_MMP(prefix_, major_, minor_, patch_)) #define HUC_FW_BLOB(prefix_) \ - UC_FW_BLOB_NEW(0, 0, 0, MAKE_HUC_FW_PATH_BLANK(prefix_)) + UC_FW_BLOB_NEW(0, 0, 0, false, MAKE_HUC_FW_PATH_BLANK(prefix_)) #define HUC_FW_BLOB_MMP(prefix_, major_, minor_, patch_) \ UC_FW_BLOB_OLD(major_, minor_, patch_, \ MAKE_HUC_FW_PATH_MMP(prefix_, major_, minor_, patch_)) +#define HUC_FW_BLOB_GSC(prefix_) \ + UC_FW_BLOB_NEW(0, 0, 0, true, MAKE_HUC_FW_PATH_GSC(prefix_)) + struct __packed uc_fw_platform_requirement { enum intel_platform p; u8 rev; /* first platform rev using this FW */ @@ -224,7 +232,7 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) INTEL_GUC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB, GUC_FW_BLOB_MMP) }; static const struct uc_fw_platform_requirement blobs_huc[] = { - INTEL_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB, HUC_FW_BLOB_MMP) + INTEL_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB, HUC_FW_BLOB_MMP, HUC_FW_BLOB_GSC) }; static const struct fw_blobs_by_type blobs_all[INTEL_UC_FW_NUM_TYPES] = { [INTEL_UC_FW_TYPE_GUC] = { blobs_guc, ARRAY_SIZE(blobs_guc) }, @@ -272,6 +280,7 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) uc_fw->file_wanted.path = blob->path; uc_fw->file_wanted.major_ver = blob->major; uc_fw->file_wanted.minor_ver = blob->minor; + uc_fw->loaded_via_gsc = blob->loaded_via_gsc; found = true; break; } @@ -904,7 +913,6 @@ int intel_uc_fw_init(struct intel_uc_fw *uc_fw) out_unpin: i915_gem_object_unpin_pages(uc_fw->obj); out: - intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_INIT_FAIL); return err; } diff --git a/drivers/gpu/drm/i915/gvt/cfg_space.c b/drivers/gpu/drm/i915/gvt/cfg_space.c index eef3bba8a41b..357c5b65e097 100644 --- a/drivers/gpu/drm/i915/gvt/cfg_space.c +++ b/drivers/gpu/drm/i915/gvt/cfg_space.c @@ -354,9 +354,9 @@ void intel_vgpu_init_cfg_space(struct intel_vgpu *vgpu, memset(vgpu_cfg_space(vgpu) + INTEL_GVT_PCI_OPREGION, 0, 4); vgpu->cfg_space.bar[INTEL_GVT_PCI_BAR_GTTMMIO].size = - pci_resource_len(pdev, GTTMMADR_BAR); + pci_resource_len(pdev, GEN4_GTTMMADR_BAR); vgpu->cfg_space.bar[INTEL_GVT_PCI_BAR_APERTURE].size = - pci_resource_len(pdev, GTT_APERTURE_BAR); + pci_resource_len(pdev, GEN4_GMADR_BAR); memset(vgpu_cfg_space(vgpu) + PCI_ROM_ADDRESS, 0, 4); diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index daac2050d77d..1cb388484bf0 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -734,7 +734,7 @@ static i915_reg_t force_nonpriv_white_list[] = { _MMIO(0x770c), _MMIO(0x83a8), _MMIO(0xb110), - GEN8_L3SQCREG4,//_MMIO(0xb118) + _MMIO(0xb118), _MMIO(0xe100), _MMIO(0xe18c), _MMIO(0xe48c), @@ -2257,7 +2257,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_DFH(_MMIO(0x2438), D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(_MMIO(0x243c), D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(_MMIO(0x7018), D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(HALF_SLICE_CHICKEN3, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(HSW_HALF_SLICE_CHICKEN3, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(GEN7_HALF_SLICE_CHICKEN1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); /* display */ diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index 1c6e941c9666..200c1162daa3 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -106,15 +106,15 @@ static struct engine_mmio gen9_engine_mmio_list[] __cacheline_aligned = { {RCS0, GEN8_CS_CHICKEN1, 0xffff, true}, /* 0x2580 */ {RCS0, COMMON_SLICE_CHICKEN2, 0xffff, true}, /* 0x7014 */ {RCS0, GEN9_CS_DEBUG_MODE1, 0xffff, false}, /* 0x20ec */ - {RCS0, GEN8_L3SQCREG4, 0, false}, /* 0xb118 */ - {RCS0, GEN9_SCRATCH1, 0, false}, /* 0xb11c */ + {RCS0, _MMIO(0xb118), 0, false}, /* GEN8_L3SQCREG4 */ + {RCS0, _MMIO(0xb11c), 0, false}, /* GEN9_SCRATCH1 */ {RCS0, GEN9_SCRATCH_LNCF1, 0, false}, /* 0xb008 */ {RCS0, GEN7_HALF_SLICE_CHICKEN1, 0xffff, true}, /* 0xe100 */ - {RCS0, HALF_SLICE_CHICKEN2, 0xffff, true}, /* 0xe180 */ - {RCS0, HALF_SLICE_CHICKEN3, 0xffff, true}, /* 0xe184 */ - {RCS0, GEN9_HALF_SLICE_CHICKEN5, 0xffff, true}, /* 0xe188 */ - {RCS0, GEN9_HALF_SLICE_CHICKEN7, 0xffff, true}, /* 0xe194 */ - {RCS0, GEN8_ROW_CHICKEN, 0xffff, true}, /* 0xe4f0 */ + {RCS0, _MMIO(0xe180), 0xffff, true}, /* HALF_SLICE_CHICKEN2 */ + {RCS0, _MMIO(0xe184), 0xffff, true}, /* GEN8_HALF_SLICE_CHICKEN3 */ + {RCS0, _MMIO(0xe188), 0xffff, true}, /* GEN9_HALF_SLICE_CHICKEN5 */ + {RCS0, _MMIO(0xe194), 0xffff, true}, /* GEN9_HALF_SLICE_CHICKEN7 */ + {RCS0, _MMIO(0xe4f0), 0xffff, true}, /* GEN8_ROW_CHICKEN */ {RCS0, TRVATTL3PTRDW(0), 0, true}, /* 0x4de0 */ {RCS0, TRVATTL3PTRDW(1), 0, true}, /* 0x4de4 */ {RCS0, TRNULLDETCT, 0, true}, /* 0x4de8 */ diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 298ed36f078a..c3d43f9b1e45 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -81,6 +81,7 @@ #include "i915_drm_client.h" #include "i915_drv.h" #include "i915_getparam.h" +#include "i915_hwmon.h" #include "i915_ioc32.h" #include "i915_ioctl.h" #include "i915_irq.h" @@ -764,6 +765,8 @@ static void i915_driver_register(struct drm_i915_private *dev_priv) for_each_gt(gt, dev_priv, i) intel_gt_driver_register(gt); + i915_hwmon_register(dev_priv); + intel_display_driver_register(dev_priv); intel_power_domains_enable(dev_priv); @@ -796,6 +799,8 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv) for_each_gt(gt, dev_priv, i) intel_gt_driver_unregister(gt); + i915_hwmon_unregister(dev_priv); + i915_perf_unregister(dev_priv); i915_pmu_unregister(dev_priv); @@ -1656,7 +1661,8 @@ static int intel_runtime_suspend(struct device *kdev) intel_runtime_pm_enable_interrupts(dev_priv); - intel_gt_runtime_resume(to_gt(dev_priv)); + for_each_gt(gt, dev_priv, i) + intel_gt_runtime_resume(gt); enable_rpm_wakeref_asserts(rpm); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 349ff7d65deb..05b3300cc4ed 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -40,7 +40,6 @@ #include "display/intel_display_core.h" #include "gem/i915_gem_context_types.h" -#include "gem/i915_gem_lmem.h" #include "gem/i915_gem_shrinker.h" #include "gem/i915_gem_stolen.h" @@ -350,6 +349,8 @@ struct drm_i915_private { struct i915_perf perf; + struct i915_hwmon *hwmon; + /* Abstract the submission mechanism (legacy ringbuffer or execlists) away */ struct intel_gt gt0; @@ -898,19 +899,17 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_RUNTIME_PM(dev_priv) (INTEL_INFO(dev_priv)->has_runtime_pm) #define HAS_64BIT_RELOC(dev_priv) (INTEL_INFO(dev_priv)->has_64bit_reloc) +#define HAS_OA_BPC_REPORTING(dev_priv) \ + (INTEL_INFO(dev_priv)->has_oa_bpc_reporting) +#define HAS_OA_SLICE_CONTRIB_LIMITS(dev_priv) \ + (INTEL_INFO(dev_priv)->has_oa_slice_contrib_limits) + /* * Set this flag, when platform requires 64K GTT page sizes or larger for * device local memory access. */ #define HAS_64K_PAGES(dev_priv) (INTEL_INFO(dev_priv)->has_64k_pages) -/* - * Set this flag when platform doesn't allow both 64k pages and 4k pages in - * the same PT. this flag means we need to support compact PT layout for the - * ppGTT when using the 64K GTT pages. - */ -#define NEEDS_COMPACT_PT(dev_priv) (INTEL_INFO(dev_priv)->needs_compact_pt) - #define HAS_IPC(dev_priv) (INTEL_INFO(dev_priv)->display.has_ipc) #define HAS_REGION(i915, i) (RUNTIME_INFO(i915)->memory_regions & (i)) @@ -976,6 +975,9 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_ONE_EU_PER_FUSE_BIT(i915) (INTEL_INFO(i915)->has_one_eu_per_fuse_bit) +#define HAS_LMEMBAR_SMEM_STOLEN(i915) (!HAS_LMEM(i915) && \ + GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) + /* intel_device_info.c */ static inline struct intel_device_info * mkwrite_device_info(struct drm_i915_private *dev_priv) @@ -983,16 +985,4 @@ mkwrite_device_info(struct drm_i915_private *dev_priv) return (struct intel_device_info *)INTEL_INFO(dev_priv); } -static inline enum i915_map_type -i915_coherent_map_type(struct drm_i915_private *i915, - struct drm_i915_gem_object *obj, bool always_coherent) -{ - if (i915_gem_object_is_lmem(obj)) - return I915_MAP_WC; - if (HAS_LLC(i915) || always_coherent) - return I915_MAP_WB; - else - return I915_MAP_WC; -} - #endif diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2bdddb61ebd7..299f94a9fb87 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -843,7 +843,7 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915) __i915_gem_object_release_mmap_gtt(obj); list_for_each_entry_safe(obj, on, - &to_gt(i915)->lmem_userfault_list, userfault_link) + &i915->runtime_pm.lmem_userfault_list, userfault_link) i915_gem_object_runtime_pm_release_mmap_offset(obj); /* @@ -1128,6 +1128,8 @@ void i915_gem_drain_workqueue(struct drm_i915_private *i915) int i915_gem_init(struct drm_i915_private *dev_priv) { + struct intel_gt *gt; + unsigned int i; int ret; /* We need to fallback to 4K pages if host doesn't support huge gtt. */ @@ -1158,9 +1160,11 @@ int i915_gem_init(struct drm_i915_private *dev_priv) */ intel_init_clock_gating(dev_priv); - ret = intel_gt_init(to_gt(dev_priv)); - if (ret) - goto err_unlock; + for_each_gt(gt, dev_priv, i) { + ret = intel_gt_init(gt); + if (ret) + goto err_unlock; + } return 0; @@ -1173,8 +1177,13 @@ int i915_gem_init(struct drm_i915_private *dev_priv) err_unlock: i915_gem_drain_workqueue(dev_priv); - if (ret != -EIO) - intel_uc_cleanup_firmwares(&to_gt(dev_priv)->uc); + if (ret != -EIO) { + for_each_gt(gt, dev_priv, i) { + intel_gt_driver_remove(gt); + intel_gt_driver_release(gt); + intel_uc_cleanup_firmwares(>->uc); + } + } if (ret == -EIO) { /* @@ -1182,10 +1191,12 @@ err_unlock: * as wedged. But we only want to do this when the GPU is angry, * for all other failure, such as an allocation failure, bail. */ - if (!intel_gt_is_wedged(to_gt(dev_priv))) { - i915_probe_error(dev_priv, - "Failed to initialize GPU, declaring it wedged!\n"); - intel_gt_set_wedged(to_gt(dev_priv)); + for_each_gt(gt, dev_priv, i) { + if (!intel_gt_is_wedged(gt)) { + i915_probe_error(dev_priv, + "Failed to initialize GPU, declaring it wedged!\n"); + intel_gt_set_wedged(gt); + } } /* Minimal basic recovery for KMS */ @@ -1213,23 +1224,27 @@ void i915_gem_driver_unregister(struct drm_i915_private *i915) void i915_gem_driver_remove(struct drm_i915_private *dev_priv) { - intel_wakeref_auto_fini(&to_gt(dev_priv)->userfault_wakeref); + struct intel_gt *gt; + unsigned int i; i915_gem_suspend_late(dev_priv); - intel_gt_driver_remove(to_gt(dev_priv)); + for_each_gt(gt, dev_priv, i) + intel_gt_driver_remove(gt); dev_priv->uabi_engines = RB_ROOT; /* Flush any outstanding unpin_work. */ i915_gem_drain_workqueue(dev_priv); - - i915_gem_drain_freed_objects(dev_priv); } void i915_gem_driver_release(struct drm_i915_private *dev_priv) { - intel_gt_driver_release(to_gt(dev_priv)); + struct intel_gt *gt; + unsigned int i; - intel_uc_cleanup_firmwares(&to_gt(dev_priv)->uc); + for_each_gt(gt, dev_priv, i) { + intel_gt_driver_release(gt); + intel_uc_cleanup_firmwares(>->uc); + } /* Flush any outstanding work, including i915_gem_context.release_work. */ i915_gem_drain_workqueue(dev_priv); @@ -1259,7 +1274,7 @@ void i915_gem_init_early(struct drm_i915_private *dev_priv) void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) { - i915_gem_drain_freed_objects(dev_priv); + i915_gem_drain_workqueue(dev_priv); GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list)); GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count)); drm_WARN_ON(&dev_priv->drm, dev_priv->mm.shrink_count); diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c index 342c8ca6414e..3047e80e1163 100644 --- a/drivers/gpu/drm/i915/i915_getparam.c +++ b/drivers/gpu/drm/i915/i915_getparam.c @@ -175,6 +175,9 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, case I915_PARAM_PERF_REVISION: value = i915_perf_ioctl_version(); break; + case I915_PARAM_OA_TIMESTAMP_FREQUENCY: + value = i915_perf_oa_timestamp_frequency(i915); + break; default: DRM_DEBUG("Unknown parameter %d\n", param->param); return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 9ea2fe34e7d3..f2d53edcd2ee 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1221,7 +1221,10 @@ static void engine_record_registers(struct intel_engine_coredump *ee) if (GRAPHICS_VER(i915) >= 6) { ee->rc_psmi = ENGINE_READ(engine, RING_PSMI_CTL); - if (GRAPHICS_VER(i915) >= 12) + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) + ee->fault_reg = intel_gt_mcr_read_any(engine->gt, + XEHP_RING_FAULT_REG); + else if (GRAPHICS_VER(i915) >= 12) ee->fault_reg = intel_uncore_read(engine->uncore, GEN12_RING_FAULT_REG); else if (GRAPHICS_VER(i915) >= 8) @@ -1820,7 +1823,12 @@ static void gt_record_global_regs(struct intel_gt_coredump *gt) if (GRAPHICS_VER(i915) == 7) gt->err_int = intel_uncore_read(uncore, GEN7_ERR_INT); - if (GRAPHICS_VER(i915) >= 12) { + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { + gt->fault_data0 = intel_gt_mcr_read_any((struct intel_gt *)gt->_gt, + XEHP_FAULT_TLB_DATA0); + gt->fault_data1 = intel_gt_mcr_read_any((struct intel_gt *)gt->_gt, + XEHP_FAULT_TLB_DATA1); + } else if (GRAPHICS_VER(i915) >= 12) { gt->fault_data0 = intel_uncore_read(uncore, GEN12_FAULT_TLB_DATA0); gt->fault_data1 = intel_uncore_read(uncore, diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c new file mode 100644 index 000000000000..c588a17f97e9 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -0,0 +1,732 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include <linux/hwmon.h> +#include <linux/hwmon-sysfs.h> +#include <linux/types.h> + +#include "i915_drv.h" +#include "i915_hwmon.h" +#include "i915_reg.h" +#include "intel_mchbar_regs.h" +#include "intel_pcode.h" +#include "gt/intel_gt.h" +#include "gt/intel_gt_regs.h" + +/* + * SF_* - scale factors for particular quantities according to hwmon spec. + * - voltage - millivolts + * - power - microwatts + * - curr - milliamperes + * - energy - microjoules + * - time - milliseconds + */ +#define SF_VOLTAGE 1000 +#define SF_POWER 1000000 +#define SF_CURR 1000 +#define SF_ENERGY 1000000 +#define SF_TIME 1000 + +struct hwm_reg { + i915_reg_t gt_perf_status; + i915_reg_t pkg_power_sku_unit; + i915_reg_t pkg_power_sku; + i915_reg_t pkg_rapl_limit; + i915_reg_t energy_status_all; + i915_reg_t energy_status_tile; +}; + +struct hwm_energy_info { + u32 reg_val_prev; + long accum_energy; /* Accumulated energy for energy1_input */ +}; + +struct hwm_drvdata { + struct i915_hwmon *hwmon; + struct intel_uncore *uncore; + struct device *hwmon_dev; + struct hwm_energy_info ei; /* Energy info for energy1_input */ + char name[12]; + int gt_n; +}; + +struct i915_hwmon { + struct hwm_drvdata ddat; + struct hwm_drvdata ddat_gt[I915_MAX_GT]; + struct mutex hwmon_lock; /* counter overflow logic and rmw */ + struct hwm_reg rg; + int scl_shift_power; + int scl_shift_energy; + int scl_shift_time; +}; + +static void +hwm_locked_with_pm_intel_uncore_rmw(struct hwm_drvdata *ddat, + i915_reg_t reg, u32 clear, u32 set) +{ + struct i915_hwmon *hwmon = ddat->hwmon; + struct intel_uncore *uncore = ddat->uncore; + intel_wakeref_t wakeref; + + mutex_lock(&hwmon->hwmon_lock); + + with_intel_runtime_pm(uncore->rpm, wakeref) + intel_uncore_rmw(uncore, reg, clear, set); + + mutex_unlock(&hwmon->hwmon_lock); +} + +/* + * This function's return type of u64 allows for the case where the scaling + * of the field taken from the 32-bit register value might cause a result to + * exceed 32 bits. + */ +static u64 +hwm_field_read_and_scale(struct hwm_drvdata *ddat, i915_reg_t rgadr, + u32 field_msk, int nshift, u32 scale_factor) +{ + struct intel_uncore *uncore = ddat->uncore; + intel_wakeref_t wakeref; + u32 reg_value; + + with_intel_runtime_pm(uncore->rpm, wakeref) + reg_value = intel_uncore_read(uncore, rgadr); + + reg_value = REG_FIELD_GET(field_msk, reg_value); + + return mul_u64_u32_shr(reg_value, scale_factor, nshift); +} + +static void +hwm_field_scale_and_write(struct hwm_drvdata *ddat, i915_reg_t rgadr, + int nshift, unsigned int scale_factor, long lval) +{ + u32 nval; + + /* Computation in 64-bits to avoid overflow. Round to nearest. */ + nval = DIV_ROUND_CLOSEST_ULL((u64)lval << nshift, scale_factor); + + hwm_locked_with_pm_intel_uncore_rmw(ddat, rgadr, + PKG_PWR_LIM_1, + REG_FIELD_PREP(PKG_PWR_LIM_1, nval)); +} + +/* + * hwm_energy - Obtain energy value + * + * The underlying energy hardware register is 32-bits and is subject to + * overflow. How long before overflow? For example, with an example + * scaling bit shift of 14 bits (see register *PACKAGE_POWER_SKU_UNIT) and + * a power draw of 1000 watts, the 32-bit counter will overflow in + * approximately 4.36 minutes. + * + * Examples: + * 1 watt: (2^32 >> 14) / 1 W / (60 * 60 * 24) secs/day -> 3 days + * 1000 watts: (2^32 >> 14) / 1000 W / 60 secs/min -> 4.36 minutes + * + * The function significantly increases overflow duration (from 4.36 + * minutes) by accumulating the energy register into a 'long' as allowed by + * the hwmon API. Using x86_64 128 bit arithmetic (see mul_u64_u32_shr()), + * a 'long' of 63 bits, SF_ENERGY of 1e6 (~20 bits) and + * hwmon->scl_shift_energy of 14 bits we have 57 (63 - 20 + 14) bits before + * energy1_input overflows. This at 1000 W is an overflow duration of 278 years. + */ +static void +hwm_energy(struct hwm_drvdata *ddat, long *energy) +{ + struct intel_uncore *uncore = ddat->uncore; + struct i915_hwmon *hwmon = ddat->hwmon; + struct hwm_energy_info *ei = &ddat->ei; + intel_wakeref_t wakeref; + i915_reg_t rgaddr; + u32 reg_val; + + if (ddat->gt_n >= 0) + rgaddr = hwmon->rg.energy_status_tile; + else + rgaddr = hwmon->rg.energy_status_all; + + mutex_lock(&hwmon->hwmon_lock); + + with_intel_runtime_pm(uncore->rpm, wakeref) + reg_val = intel_uncore_read(uncore, rgaddr); + + if (reg_val >= ei->reg_val_prev) + ei->accum_energy += reg_val - ei->reg_val_prev; + else + ei->accum_energy += UINT_MAX - ei->reg_val_prev + reg_val; + ei->reg_val_prev = reg_val; + + *energy = mul_u64_u32_shr(ei->accum_energy, SF_ENERGY, + hwmon->scl_shift_energy); + mutex_unlock(&hwmon->hwmon_lock); +} + +static ssize_t +hwm_power1_max_interval_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct hwm_drvdata *ddat = dev_get_drvdata(dev); + struct i915_hwmon *hwmon = ddat->hwmon; + intel_wakeref_t wakeref; + u32 r, x, y, x_w = 2; /* 2 bits */ + u64 tau4, out; + + with_intel_runtime_pm(ddat->uncore->rpm, wakeref) + r = intel_uncore_read(ddat->uncore, hwmon->rg.pkg_rapl_limit); + + x = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_X, r); + y = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_Y, r); + /* + * tau = 1.x * power(2,y), x = bits(23:22), y = bits(21:17) + * = (4 | x) << (y - 2) + * where (y - 2) ensures a 1.x fixed point representation of 1.x + * However because y can be < 2, we compute + * tau4 = (4 | x) << y + * but add 2 when doing the final right shift to account for units + */ + tau4 = ((1 << x_w) | x) << y; + /* val in hwmon interface units (millisec) */ + out = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w); + + return sysfs_emit(buf, "%llu\n", out); +} + +static ssize_t +hwm_power1_max_interval_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct hwm_drvdata *ddat = dev_get_drvdata(dev); + struct i915_hwmon *hwmon = ddat->hwmon; + u32 x, y, rxy, x_w = 2; /* 2 bits */ + u64 tau4, r, max_win; + unsigned long val; + int ret; + + ret = kstrtoul(buf, 0, &val); + if (ret) + return ret; + + /* + * Max HW supported tau in '1.x * power(2,y)' format, x = 0, y = 0x12 + * The hwmon->scl_shift_time default of 0xa results in a max tau of 256 seconds + */ +#define PKG_MAX_WIN_DEFAULT 0x12ull + + /* + * val must be < max in hwmon interface units. The steps below are + * explained in i915_power1_max_interval_show() + */ + r = FIELD_PREP(PKG_MAX_WIN, PKG_MAX_WIN_DEFAULT); + x = REG_FIELD_GET(PKG_MAX_WIN_X, r); + y = REG_FIELD_GET(PKG_MAX_WIN_Y, r); + tau4 = ((1 << x_w) | x) << y; + max_win = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w); + + if (val > max_win) + return -EINVAL; + + /* val in hw units */ + val = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_time, SF_TIME); + /* Convert to 1.x * power(2,y) */ + if (!val) + return -EINVAL; + y = ilog2(val); + /* x = (val - (1 << y)) >> (y - 2); */ + x = (val - (1ul << y)) << x_w >> y; + + rxy = REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_X, x) | REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_Y, y); + + hwm_locked_with_pm_intel_uncore_rmw(ddat, hwmon->rg.pkg_rapl_limit, + PKG_PWR_LIM_1_TIME, rxy); + return count; +} + +static SENSOR_DEVICE_ATTR(power1_max_interval, 0664, + hwm_power1_max_interval_show, + hwm_power1_max_interval_store, 0); + +static struct attribute *hwm_attributes[] = { + &sensor_dev_attr_power1_max_interval.dev_attr.attr, + NULL +}; + +static umode_t hwm_attributes_visible(struct kobject *kobj, + struct attribute *attr, int index) +{ + struct device *dev = kobj_to_dev(kobj); + struct hwm_drvdata *ddat = dev_get_drvdata(dev); + struct i915_hwmon *hwmon = ddat->hwmon; + + if (attr == &sensor_dev_attr_power1_max_interval.dev_attr.attr) + return i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit) ? attr->mode : 0; + + return 0; +} + +static const struct attribute_group hwm_attrgroup = { + .attrs = hwm_attributes, + .is_visible = hwm_attributes_visible, +}; + +static const struct attribute_group *hwm_groups[] = { + &hwm_attrgroup, + NULL +}; + +static const struct hwmon_channel_info *hwm_info[] = { + HWMON_CHANNEL_INFO(in, HWMON_I_INPUT), + HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_CRIT), + HWMON_CHANNEL_INFO(energy, HWMON_E_INPUT), + HWMON_CHANNEL_INFO(curr, HWMON_C_CRIT), + NULL +}; + +static const struct hwmon_channel_info *hwm_gt_info[] = { + HWMON_CHANNEL_INFO(energy, HWMON_E_INPUT), + NULL +}; + +/* I1 is exposed as power_crit or as curr_crit depending on bit 31 */ +static int hwm_pcode_read_i1(struct drm_i915_private *i915, u32 *uval) +{ + return snb_pcode_read_p(&i915->uncore, PCODE_POWER_SETUP, + POWER_SETUP_SUBCOMMAND_READ_I1, 0, uval); +} + +static int hwm_pcode_write_i1(struct drm_i915_private *i915, u32 uval) +{ + return snb_pcode_write_p(&i915->uncore, PCODE_POWER_SETUP, + POWER_SETUP_SUBCOMMAND_WRITE_I1, 0, uval); +} + +static umode_t +hwm_in_is_visible(const struct hwm_drvdata *ddat, u32 attr) +{ + struct drm_i915_private *i915 = ddat->uncore->i915; + + switch (attr) { + case hwmon_in_input: + return IS_DG1(i915) || IS_DG2(i915) ? 0444 : 0; + default: + return 0; + } +} + +static int +hwm_in_read(struct hwm_drvdata *ddat, u32 attr, long *val) +{ + struct i915_hwmon *hwmon = ddat->hwmon; + intel_wakeref_t wakeref; + u32 reg_value; + + switch (attr) { + case hwmon_in_input: + with_intel_runtime_pm(ddat->uncore->rpm, wakeref) + reg_value = intel_uncore_read(ddat->uncore, hwmon->rg.gt_perf_status); + /* HW register value in units of 2.5 millivolt */ + *val = DIV_ROUND_CLOSEST(REG_FIELD_GET(GEN12_VOLTAGE_MASK, reg_value) * 25, 10); + return 0; + default: + return -EOPNOTSUPP; + } +} + +static umode_t +hwm_power_is_visible(const struct hwm_drvdata *ddat, u32 attr, int chan) +{ + struct drm_i915_private *i915 = ddat->uncore->i915; + struct i915_hwmon *hwmon = ddat->hwmon; + u32 uval; + + switch (attr) { + case hwmon_power_max: + return i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit) ? 0664 : 0; + case hwmon_power_rated_max: + return i915_mmio_reg_valid(hwmon->rg.pkg_power_sku) ? 0444 : 0; + case hwmon_power_crit: + return (hwm_pcode_read_i1(i915, &uval) || + !(uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644; + default: + return 0; + } +} + +static int +hwm_power_read(struct hwm_drvdata *ddat, u32 attr, int chan, long *val) +{ + struct i915_hwmon *hwmon = ddat->hwmon; + int ret; + u32 uval; + + switch (attr) { + case hwmon_power_max: + *val = hwm_field_read_and_scale(ddat, + hwmon->rg.pkg_rapl_limit, + PKG_PWR_LIM_1, + hwmon->scl_shift_power, + SF_POWER); + return 0; + case hwmon_power_rated_max: + *val = hwm_field_read_and_scale(ddat, + hwmon->rg.pkg_power_sku, + PKG_PKG_TDP, + hwmon->scl_shift_power, + SF_POWER); + return 0; + case hwmon_power_crit: + ret = hwm_pcode_read_i1(ddat->uncore->i915, &uval); + if (ret) + return ret; + if (!(uval & POWER_SETUP_I1_WATTS)) + return -ENODEV; + *val = mul_u64_u32_shr(REG_FIELD_GET(POWER_SETUP_I1_DATA_MASK, uval), + SF_POWER, POWER_SETUP_I1_SHIFT); + return 0; + default: + return -EOPNOTSUPP; + } +} + +static int +hwm_power_write(struct hwm_drvdata *ddat, u32 attr, int chan, long val) +{ + struct i915_hwmon *hwmon = ddat->hwmon; + u32 uval; + + switch (attr) { + case hwmon_power_max: + hwm_field_scale_and_write(ddat, + hwmon->rg.pkg_rapl_limit, + hwmon->scl_shift_power, + SF_POWER, val); + return 0; + case hwmon_power_crit: + uval = DIV_ROUND_CLOSEST_ULL(val << POWER_SETUP_I1_SHIFT, SF_POWER); + return hwm_pcode_write_i1(ddat->uncore->i915, uval); + default: + return -EOPNOTSUPP; + } +} + +static umode_t +hwm_energy_is_visible(const struct hwm_drvdata *ddat, u32 attr) +{ + struct i915_hwmon *hwmon = ddat->hwmon; + i915_reg_t rgaddr; + + switch (attr) { + case hwmon_energy_input: + if (ddat->gt_n >= 0) + rgaddr = hwmon->rg.energy_status_tile; + else + rgaddr = hwmon->rg.energy_status_all; + return i915_mmio_reg_valid(rgaddr) ? 0444 : 0; + default: + return 0; + } +} + +static int +hwm_energy_read(struct hwm_drvdata *ddat, u32 attr, long *val) +{ + switch (attr) { + case hwmon_energy_input: + hwm_energy(ddat, val); + return 0; + default: + return -EOPNOTSUPP; + } +} + +static umode_t +hwm_curr_is_visible(const struct hwm_drvdata *ddat, u32 attr) +{ + struct drm_i915_private *i915 = ddat->uncore->i915; + u32 uval; + + switch (attr) { + case hwmon_curr_crit: + return (hwm_pcode_read_i1(i915, &uval) || + (uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644; + default: + return 0; + } +} + +static int +hwm_curr_read(struct hwm_drvdata *ddat, u32 attr, long *val) +{ + int ret; + u32 uval; + + switch (attr) { + case hwmon_curr_crit: + ret = hwm_pcode_read_i1(ddat->uncore->i915, &uval); + if (ret) + return ret; + if (uval & POWER_SETUP_I1_WATTS) + return -ENODEV; + *val = mul_u64_u32_shr(REG_FIELD_GET(POWER_SETUP_I1_DATA_MASK, uval), + SF_CURR, POWER_SETUP_I1_SHIFT); + return 0; + default: + return -EOPNOTSUPP; + } +} + +static int +hwm_curr_write(struct hwm_drvdata *ddat, u32 attr, long val) +{ + u32 uval; + + switch (attr) { + case hwmon_curr_crit: + uval = DIV_ROUND_CLOSEST_ULL(val << POWER_SETUP_I1_SHIFT, SF_CURR); + return hwm_pcode_write_i1(ddat->uncore->i915, uval); + default: + return -EOPNOTSUPP; + } +} + +static umode_t +hwm_is_visible(const void *drvdata, enum hwmon_sensor_types type, + u32 attr, int channel) +{ + struct hwm_drvdata *ddat = (struct hwm_drvdata *)drvdata; + + switch (type) { + case hwmon_in: + return hwm_in_is_visible(ddat, attr); + case hwmon_power: + return hwm_power_is_visible(ddat, attr, channel); + case hwmon_energy: + return hwm_energy_is_visible(ddat, attr); + case hwmon_curr: + return hwm_curr_is_visible(ddat, attr); + default: + return 0; + } +} + +static int +hwm_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, + int channel, long *val) +{ + struct hwm_drvdata *ddat = dev_get_drvdata(dev); + + switch (type) { + case hwmon_in: + return hwm_in_read(ddat, attr, val); + case hwmon_power: + return hwm_power_read(ddat, attr, channel, val); + case hwmon_energy: + return hwm_energy_read(ddat, attr, val); + case hwmon_curr: + return hwm_curr_read(ddat, attr, val); + default: + return -EOPNOTSUPP; + } +} + +static int +hwm_write(struct device *dev, enum hwmon_sensor_types type, u32 attr, + int channel, long val) +{ + struct hwm_drvdata *ddat = dev_get_drvdata(dev); + + switch (type) { + case hwmon_power: + return hwm_power_write(ddat, attr, channel, val); + case hwmon_curr: + return hwm_curr_write(ddat, attr, val); + default: + return -EOPNOTSUPP; + } +} + +static const struct hwmon_ops hwm_ops = { + .is_visible = hwm_is_visible, + .read = hwm_read, + .write = hwm_write, +}; + +static const struct hwmon_chip_info hwm_chip_info = { + .ops = &hwm_ops, + .info = hwm_info, +}; + +static umode_t +hwm_gt_is_visible(const void *drvdata, enum hwmon_sensor_types type, + u32 attr, int channel) +{ + struct hwm_drvdata *ddat = (struct hwm_drvdata *)drvdata; + + switch (type) { + case hwmon_energy: + return hwm_energy_is_visible(ddat, attr); + default: + return 0; + } +} + +static int +hwm_gt_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, + int channel, long *val) +{ + struct hwm_drvdata *ddat = dev_get_drvdata(dev); + + switch (type) { + case hwmon_energy: + return hwm_energy_read(ddat, attr, val); + default: + return -EOPNOTSUPP; + } +} + +static const struct hwmon_ops hwm_gt_ops = { + .is_visible = hwm_gt_is_visible, + .read = hwm_gt_read, +}; + +static const struct hwmon_chip_info hwm_gt_chip_info = { + .ops = &hwm_gt_ops, + .info = hwm_gt_info, +}; + +static void +hwm_get_preregistration_info(struct drm_i915_private *i915) +{ + struct i915_hwmon *hwmon = i915->hwmon; + struct intel_uncore *uncore = &i915->uncore; + struct hwm_drvdata *ddat = &hwmon->ddat; + intel_wakeref_t wakeref; + u32 val_sku_unit = 0; + struct intel_gt *gt; + long energy; + int i; + + /* Available for all Gen12+/dGfx */ + hwmon->rg.gt_perf_status = GEN12_RPSTAT1; + + if (IS_DG1(i915) || IS_DG2(i915)) { + hwmon->rg.pkg_power_sku_unit = PCU_PACKAGE_POWER_SKU_UNIT; + hwmon->rg.pkg_power_sku = PCU_PACKAGE_POWER_SKU; + hwmon->rg.pkg_rapl_limit = PCU_PACKAGE_RAPL_LIMIT; + hwmon->rg.energy_status_all = PCU_PACKAGE_ENERGY_STATUS; + hwmon->rg.energy_status_tile = INVALID_MMIO_REG; + } else if (IS_XEHPSDV(i915)) { + hwmon->rg.pkg_power_sku_unit = GT0_PACKAGE_POWER_SKU_UNIT; + hwmon->rg.pkg_power_sku = INVALID_MMIO_REG; + hwmon->rg.pkg_rapl_limit = GT0_PACKAGE_RAPL_LIMIT; + hwmon->rg.energy_status_all = GT0_PLATFORM_ENERGY_STATUS; + hwmon->rg.energy_status_tile = GT0_PACKAGE_ENERGY_STATUS; + } else { + hwmon->rg.pkg_power_sku_unit = INVALID_MMIO_REG; + hwmon->rg.pkg_power_sku = INVALID_MMIO_REG; + hwmon->rg.pkg_rapl_limit = INVALID_MMIO_REG; + hwmon->rg.energy_status_all = INVALID_MMIO_REG; + hwmon->rg.energy_status_tile = INVALID_MMIO_REG; + } + + with_intel_runtime_pm(uncore->rpm, wakeref) { + /* + * The contents of register hwmon->rg.pkg_power_sku_unit do not change, + * so read it once and store the shift values. + */ + if (i915_mmio_reg_valid(hwmon->rg.pkg_power_sku_unit)) + val_sku_unit = intel_uncore_read(uncore, + hwmon->rg.pkg_power_sku_unit); + } + + hwmon->scl_shift_power = REG_FIELD_GET(PKG_PWR_UNIT, val_sku_unit); + hwmon->scl_shift_energy = REG_FIELD_GET(PKG_ENERGY_UNIT, val_sku_unit); + hwmon->scl_shift_time = REG_FIELD_GET(PKG_TIME_UNIT, val_sku_unit); + + /* + * Initialize 'struct hwm_energy_info', i.e. set fields to the + * first value of the energy register read + */ + if (i915_mmio_reg_valid(hwmon->rg.energy_status_all)) + hwm_energy(ddat, &energy); + if (i915_mmio_reg_valid(hwmon->rg.energy_status_tile)) { + for_each_gt(gt, i915, i) + hwm_energy(&hwmon->ddat_gt[i], &energy); + } +} + +void i915_hwmon_register(struct drm_i915_private *i915) +{ + struct device *dev = i915->drm.dev; + struct i915_hwmon *hwmon; + struct device *hwmon_dev; + struct hwm_drvdata *ddat; + struct hwm_drvdata *ddat_gt; + struct intel_gt *gt; + int i; + + /* hwmon is available only for dGfx */ + if (!IS_DGFX(i915)) + return; + + hwmon = devm_kzalloc(dev, sizeof(*hwmon), GFP_KERNEL); + if (!hwmon) + return; + + i915->hwmon = hwmon; + mutex_init(&hwmon->hwmon_lock); + ddat = &hwmon->ddat; + + ddat->hwmon = hwmon; + ddat->uncore = &i915->uncore; + snprintf(ddat->name, sizeof(ddat->name), "i915"); + ddat->gt_n = -1; + + for_each_gt(gt, i915, i) { + ddat_gt = hwmon->ddat_gt + i; + + ddat_gt->hwmon = hwmon; + ddat_gt->uncore = gt->uncore; + snprintf(ddat_gt->name, sizeof(ddat_gt->name), "i915_gt%u", i); + ddat_gt->gt_n = i; + } + + hwm_get_preregistration_info(i915); + + /* hwmon_dev points to device hwmon<i> */ + hwmon_dev = devm_hwmon_device_register_with_info(dev, ddat->name, + ddat, + &hwm_chip_info, + hwm_groups); + if (IS_ERR(hwmon_dev)) { + i915->hwmon = NULL; + return; + } + + ddat->hwmon_dev = hwmon_dev; + + for_each_gt(gt, i915, i) { + ddat_gt = hwmon->ddat_gt + i; + /* + * Create per-gt directories only if a per-gt attribute is + * visible. Currently this is only energy + */ + if (!hwm_gt_is_visible(ddat_gt, hwmon_energy, hwmon_energy_input, 0)) + continue; + + hwmon_dev = devm_hwmon_device_register_with_info(dev, ddat_gt->name, + ddat_gt, + &hwm_gt_chip_info, + NULL); + if (!IS_ERR(hwmon_dev)) + ddat_gt->hwmon_dev = hwmon_dev; + } +} + +void i915_hwmon_unregister(struct drm_i915_private *i915) +{ + fetch_and_zero(&i915->hwmon); +} diff --git a/drivers/gpu/drm/i915/i915_hwmon.h b/drivers/gpu/drm/i915/i915_hwmon.h new file mode 100644 index 000000000000..7ca9cf2c34c9 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_hwmon.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: MIT */ + +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef __I915_HWMON_H__ +#define __I915_HWMON_H__ + +struct drm_i915_private; + +#if IS_REACHABLE(CONFIG_HWMON) +void i915_hwmon_register(struct drm_i915_private *i915); +void i915_hwmon_unregister(struct drm_i915_private *i915); +#else +static inline void i915_hwmon_register(struct drm_i915_private *i915) { }; +static inline void i915_hwmon_unregister(struct drm_i915_private *i915) { }; +#endif + +#endif /* __I915_HWMON_H__ */ diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 9486127a44f7..211913be40ce 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1023,6 +1023,8 @@ static const struct intel_device_info adl_p_info = { .has_logical_ring_contexts = 1, \ .has_logical_ring_elsq = 1, \ .has_mslice_steering = 1, \ + .has_oa_bpc_reporting = 1, \ + .has_oa_slice_contrib_limits = 1, \ .has_rc6 = 1, \ .has_reset_engine = 1, \ .has_rps = 1, \ @@ -1042,7 +1044,6 @@ static const struct intel_device_info xehpsdv_info = { PLATFORM(INTEL_XEHPSDV), NO_DISPLAY, .has_64k_pages = 1, - .needs_compact_pt = 1, .has_media_ratio_mode = 1, .__runtime.platform_engine_mask = BIT(RCS0) | BIT(BCS0) | @@ -1064,7 +1065,6 @@ static const struct intel_device_info xehpsdv_info = { .has_64k_pages = 1, \ .has_guc_deprivilege = 1, \ .has_heci_pxp = 1, \ - .needs_compact_pt = 1, \ .has_media_ratio_mode = 1, \ .display.has_cdclk_squash = 1, \ .__runtime.platform_engine_mask = \ @@ -1146,6 +1146,7 @@ static const struct intel_device_info mtl_info = { .extra_gt_list = xelpmp_extra_gt, .has_flat_ccs = 0, .has_gmd_id = 1, + .has_mslice_steering = 0, .has_snoop = 1, .__runtime.memory_regions = REGION_SMEM | REGION_STOLEN_LMEM, .__runtime.platform_engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(CCS0), @@ -1298,9 +1299,7 @@ bool i915_pci_resource_valid(struct pci_dev *pdev, int bar) static bool intel_mmio_bar_valid(struct pci_dev *pdev, struct intel_device_info *intel_info) { - int gttmmaddr_bar = intel_info->__runtime.graphics.ip.ver == 2 ? GEN2_GTTMMADR_BAR : GTTMMADR_BAR; - - return i915_pci_resource_valid(pdev, gttmmaddr_bar); + return i915_pci_resource_valid(pdev, intel_mmio_bar(intel_info->__runtime.graphics.ip.ver)); } static int i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 0defbb43ceea..0dd597a7a11f 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -204,10 +204,12 @@ #include "gt/intel_gpu_commands.h" #include "gt/intel_gt.h" #include "gt/intel_gt_clock_utils.h" +#include "gt/intel_gt_mcr.h" #include "gt/intel_gt_regs.h" #include "gt/intel_lrc.h" #include "gt/intel_lrc_reg.h" #include "gt/intel_ring.h" +#include "gt/uc/intel_guc_slpc.h" #include "i915_drv.h" #include "i915_file_private.h" @@ -286,6 +288,7 @@ static u32 i915_perf_stream_paranoid = true; #define OAREPORT_REASON_CTX_SWITCH (1<<3) #define OAREPORT_REASON_CLK_RATIO (1<<5) +#define HAS_MI_SET_PREDICATE(i915) (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) /* For sysctl proc_dointvec_minmax of i915_oa_max_sample_rate * @@ -320,6 +323,8 @@ static const struct i915_oa_format oa_formats[I915_OA_FORMAT_MAX] = { [I915_OA_FORMAT_A12] = { 0, 64 }, [I915_OA_FORMAT_A12_B8_C8] = { 2, 128 }, [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 }, + [I915_OAR_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 }, + [I915_OA_FORMAT_A24u40_A14u32_B8_C8] = { 5, 256 }, }; #define SAMPLE_OA_REPORT (1<<0) @@ -462,7 +467,7 @@ static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream) static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) { u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); - int report_size = stream->oa_buffer.format_size; + int report_size = stream->oa_buffer.format->size; unsigned long flags; bool pollin; u32 hw_tail; @@ -599,7 +604,7 @@ static int append_oa_sample(struct i915_perf_stream *stream, size_t *offset, const u8 *report) { - int report_size = stream->oa_buffer.format_size; + int report_size = stream->oa_buffer.format->size; struct drm_i915_perf_record_header header; header.type = DRM_I915_PERF_RECORD_SAMPLE; @@ -649,14 +654,13 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, size_t *offset) { struct intel_uncore *uncore = stream->uncore; - int report_size = stream->oa_buffer.format_size; + int report_size = stream->oa_buffer.format->size; u8 *oa_buf_base = stream->oa_buffer.vaddr; u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); u32 mask = (OA_BUFFER_SIZE - 1); size_t start_offset = *offset; unsigned long flags; u32 head, tail; - u32 taken; int ret = 0; if (drm_WARN_ON(&uncore->i915->drm, !stream->enabled)) @@ -692,7 +696,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, for (/* none */; - (taken = OA_TAKEN(tail, head)); + OA_TAKEN(tail, head); head = (head + report_size) & mask) { u8 *report = oa_buf_base + head; u32 *report32 = (void *)report; @@ -774,7 +778,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * switches since it's not-uncommon for periodic samples to * identify a switch before any 'context switch' report. */ - if (!stream->perf->exclusive_stream->ctx || + if (!stream->ctx || stream->specific_ctx_id == ctx_id || stream->oa_buffer.last_ctx_id == stream->specific_ctx_id || reason & OAREPORT_REASON_CTX_SWITCH) { @@ -783,7 +787,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * While filtering for a single context we avoid * leaking the IDs of other contexts. */ - if (stream->perf->exclusive_stream->ctx && + if (stream->ctx && stream->specific_ctx_id != ctx_id) { report32[2] = INVALID_CTX_ID; } @@ -943,14 +947,13 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, size_t *offset) { struct intel_uncore *uncore = stream->uncore; - int report_size = stream->oa_buffer.format_size; + int report_size = stream->oa_buffer.format->size; u8 *oa_buf_base = stream->oa_buffer.vaddr; u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); u32 mask = (OA_BUFFER_SIZE - 1); size_t start_offset = *offset; unsigned long flags; u32 head, tail; - u32 taken; int ret = 0; if (drm_WARN_ON(&uncore->i915->drm, !stream->enabled)) @@ -984,7 +987,7 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, for (/* none */; - (taken = OA_TAKEN(tail, head)); + OA_TAKEN(tail, head); head = (head + report_size) & mask) { u8 *report = oa_buf_base + head; u32 *report32 = (void *)report; @@ -1233,6 +1236,196 @@ retry: return stream->pinned_ctx; } +static int +__store_reg_to_mem(struct i915_request *rq, i915_reg_t reg, u32 ggtt_offset) +{ + u32 *cs, cmd; + + cmd = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; + if (GRAPHICS_VER(rq->engine->i915) >= 8) + cmd++; + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = cmd; + *cs++ = i915_mmio_reg_offset(reg); + *cs++ = ggtt_offset; + *cs++ = 0; + + intel_ring_advance(rq, cs); + + return 0; +} + +static int +__read_reg(struct intel_context *ce, i915_reg_t reg, u32 ggtt_offset) +{ + struct i915_request *rq; + int err; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + i915_request_get(rq); + + err = __store_reg_to_mem(rq, reg, ggtt_offset); + + i915_request_add(rq); + if (!err && i915_request_wait(rq, 0, HZ / 2) < 0) + err = -ETIME; + + i915_request_put(rq); + + return err; +} + +static int +gen12_guc_sw_ctx_id(struct intel_context *ce, u32 *ctx_id) +{ + struct i915_vma *scratch; + u32 *val; + int err; + + scratch = __vm_create_scratch_for_read_pinned(&ce->engine->gt->ggtt->vm, 4); + if (IS_ERR(scratch)) + return PTR_ERR(scratch); + + err = i915_vma_sync(scratch); + if (err) + goto err_scratch; + + err = __read_reg(ce, RING_EXECLIST_STATUS_HI(ce->engine->mmio_base), + i915_ggtt_offset(scratch)); + if (err) + goto err_scratch; + + val = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB); + if (IS_ERR(val)) { + err = PTR_ERR(val); + goto err_scratch; + } + + *ctx_id = *val; + i915_gem_object_unpin_map(scratch->obj); + +err_scratch: + i915_vma_unpin_and_release(&scratch, 0); + return err; +} + +/* + * For execlist mode of submission, pick an unused context id + * 0 - (NUM_CONTEXT_TAG -1) are used by other contexts + * XXX_MAX_CONTEXT_HW_ID is used by idle context + * + * For GuC mode of submission read context id from the upper dword of the + * EXECLIST_STATUS register. Note that we read this value only once and expect + * that the value stays fixed for the entire OA use case. There are cases where + * GuC KMD implementation may deregister a context to reuse it's context id, but + * we prevent that from happening to the OA context by pinning it. + */ +static int gen12_get_render_context_id(struct i915_perf_stream *stream) +{ + u32 ctx_id, mask; + int ret; + + if (intel_engine_uses_guc(stream->engine)) { + ret = gen12_guc_sw_ctx_id(stream->pinned_ctx, &ctx_id); + if (ret) + return ret; + + mask = ((1U << GEN12_GUC_SW_CTX_ID_WIDTH) - 1) << + (GEN12_GUC_SW_CTX_ID_SHIFT - 32); + } else if (GRAPHICS_VER_FULL(stream->engine->i915) >= IP_VER(12, 50)) { + ctx_id = (XEHP_MAX_CONTEXT_HW_ID - 1) << + (XEHP_SW_CTX_ID_SHIFT - 32); + + mask = ((1U << XEHP_SW_CTX_ID_WIDTH) - 1) << + (XEHP_SW_CTX_ID_SHIFT - 32); + } else { + ctx_id = (GEN12_MAX_CONTEXT_HW_ID - 1) << + (GEN11_SW_CTX_ID_SHIFT - 32); + + mask = ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << + (GEN11_SW_CTX_ID_SHIFT - 32); + } + stream->specific_ctx_id = ctx_id & mask; + stream->specific_ctx_id_mask = mask; + + return 0; +} + +static bool oa_find_reg_in_lri(u32 *state, u32 reg, u32 *offset, u32 end) +{ + u32 idx = *offset; + u32 len = min(MI_LRI_LEN(state[idx]) + idx, end); + bool found = false; + + idx++; + for (; idx < len; idx += 2) { + if (state[idx] == reg) { + found = true; + break; + } + } + + *offset = idx; + return found; +} + +static u32 oa_context_image_offset(struct intel_context *ce, u32 reg) +{ + u32 offset, len = (ce->engine->context_size - PAGE_SIZE) / 4; + u32 *state = ce->lrc_reg_state; + + for (offset = 0; offset < len; ) { + if (IS_MI_LRI_CMD(state[offset])) { + /* + * We expect reg-value pairs in MI_LRI command, so + * MI_LRI_LEN() should be even, if not, issue a warning. + */ + drm_WARN_ON(&ce->engine->i915->drm, + MI_LRI_LEN(state[offset]) & 0x1); + + if (oa_find_reg_in_lri(state, reg, &offset, len)) + break; + } else { + offset++; + } + } + + return offset < len ? offset : U32_MAX; +} + +static int set_oa_ctx_ctrl_offset(struct intel_context *ce) +{ + i915_reg_t reg = GEN12_OACTXCONTROL(ce->engine->mmio_base); + struct i915_perf *perf = &ce->engine->i915->perf; + u32 offset = perf->ctx_oactxctrl_offset; + + /* Do this only once. Failure is stored as offset of U32_MAX */ + if (offset) + goto exit; + + offset = oa_context_image_offset(ce, i915_mmio_reg_offset(reg)); + perf->ctx_oactxctrl_offset = offset; + + drm_dbg(&ce->engine->i915->drm, + "%s oa ctx control at 0x%08x dword offset\n", + ce->engine->name, offset); + +exit: + return offset && offset != U32_MAX ? 0 : -ENODEV; +} + +static bool engine_supports_mi_query(struct intel_engine_cs *engine) +{ + return engine->class == RENDER_CLASS; +} + /** * oa_get_render_ctx_id - determine and hold ctx hw id * @stream: An i915-perf stream opened for OA metrics @@ -1246,11 +1439,27 @@ retry: static int oa_get_render_ctx_id(struct i915_perf_stream *stream) { struct intel_context *ce; + int ret = 0; ce = oa_pin_context(stream); if (IS_ERR(ce)) return PTR_ERR(ce); + if (engine_supports_mi_query(stream->engine)) { + /* + * We are enabling perf query here. If we don't find the context + * offset here, just return an error. + */ + ret = set_oa_ctx_ctrl_offset(ce); + if (ret) { + intel_context_unpin(ce); + drm_err(&stream->perf->i915->drm, + "Enabling perf query failed for %s\n", + stream->engine->name); + return ret; + } + } + switch (GRAPHICS_VER(ce->engine->i915)) { case 7: { /* @@ -1292,24 +1501,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) case 11: case 12: - if (GRAPHICS_VER_FULL(ce->engine->i915) >= IP_VER(12, 50)) { - stream->specific_ctx_id_mask = - ((1U << XEHP_SW_CTX_ID_WIDTH) - 1) << - (XEHP_SW_CTX_ID_SHIFT - 32); - stream->specific_ctx_id = - (XEHP_MAX_CONTEXT_HW_ID - 1) << - (XEHP_SW_CTX_ID_SHIFT - 32); - } else { - stream->specific_ctx_id_mask = - ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32); - /* - * Pick an unused context id - * 0 - BITS_PER_LONG are used by other contexts - * GEN12_MAX_CONTEXT_HW_ID (0x7ff) is used by idle context - */ - stream->specific_ctx_id = - (GEN12_MAX_CONTEXT_HW_ID - 1) << (GEN11_SW_CTX_ID_SHIFT - 32); - } + ret = gen12_get_render_context_id(stream); break; default: @@ -1323,7 +1515,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) stream->specific_ctx_id, stream->specific_ctx_id_mask); - return 0; + return ret; } /** @@ -1375,8 +1567,9 @@ free_noa_wait(struct i915_perf_stream *stream) static void i915_oa_stream_destroy(struct i915_perf_stream *stream) { struct i915_perf *perf = stream->perf; + struct intel_gt *gt = stream->engine->gt; - if (WARN_ON(stream != perf->exclusive_stream)) + if (WARN_ON(stream != gt->perf.exclusive_stream)) return; /* @@ -1385,11 +1578,20 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream) * * See i915_oa_init_reg_state() and lrc_configure_all_contexts() */ - WRITE_ONCE(perf->exclusive_stream, NULL); + WRITE_ONCE(gt->perf.exclusive_stream, NULL); perf->ops.disable_metric_set(stream); free_oa_buffer(stream); + /* + * Wa_16011777198:dg2: Unset the override of GUCRC mode to enable rc6. + */ + if (intel_uc_uses_guc_rc(>->uc) && + (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) || + IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0))) + drm_WARN_ON(>->i915->drm, + intel_guc_slpc_unset_gucrc_mode(>->uc.guc.slpc)); + intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL); intel_engine_pm_put(stream->engine); @@ -1563,6 +1765,7 @@ static void gen12_init_oa_buffer(struct i915_perf_stream *stream) static int alloc_oa_buffer(struct i915_perf_stream *stream) { struct drm_i915_private *i915 = stream->perf->i915; + struct intel_gt *gt = stream->engine->gt; struct drm_i915_gem_object *bo; struct i915_vma *vma; int ret; @@ -1582,11 +1785,22 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream) i915_gem_object_set_cache_coherency(bo, I915_CACHE_LLC); /* PreHSW required 512K alignment, HSW requires 16M */ - vma = i915_gem_object_ggtt_pin(bo, NULL, 0, SZ_16M, 0); + vma = i915_vma_instance(bo, >->ggtt->vm, NULL); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto err_unref; } + + /* + * PreHSW required 512K alignment. + * HSW and onwards, align to requested size of OA buffer. + */ + ret = i915_vma_pin(vma, 0, SZ_16M, PIN_GLOBAL | PIN_HIGH); + if (ret) { + drm_err(>->i915->drm, "Failed to pin OA buffer %d\n", ret); + goto err_unref; + } + stream->oa_buffer.vma = vma; stream->oa_buffer.vaddr = @@ -1636,6 +1850,7 @@ static u32 *save_restore_register(struct i915_perf_stream *stream, u32 *cs, static int alloc_noa_wait(struct i915_perf_stream *stream) { struct drm_i915_private *i915 = stream->perf->i915; + struct intel_gt *gt = stream->engine->gt; struct drm_i915_gem_object *bo; struct i915_vma *vma; const u64 delay_ticks = 0xffffffffffffffff - @@ -1654,6 +1869,9 @@ static int alloc_noa_wait(struct i915_perf_stream *stream) DELTA_TARGET, N_CS_GPR }; + i915_reg_t mi_predicate_result = HAS_MI_SET_PREDICATE(i915) ? + MI_PREDICATE_RESULT_2_ENGINE(base) : + MI_PREDICATE_RESULT_1(RENDER_RING_BASE); bo = i915_gem_object_create_internal(i915, 4096); if (IS_ERR(bo)) { @@ -1673,12 +1891,16 @@ retry: * multiple OA config BOs will have a jump to this address and it * needs to be fixed during the lifetime of the i915/perf stream. */ - vma = i915_gem_object_ggtt_pin_ww(bo, &ww, NULL, 0, 0, PIN_HIGH); + vma = i915_vma_instance(bo, >->ggtt->vm, NULL); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto out_ww; } + ret = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_GLOBAL | PIN_HIGH); + if (ret) + goto out_ww; + batch = cs = i915_gem_object_pin_map(bo, I915_MAP_WB); if (IS_ERR(batch)) { ret = PTR_ERR(batch); @@ -1691,7 +1913,7 @@ retry: stream, cs, true /* save */, CS_GPR(i), INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2); cs = save_restore_register( - stream, cs, true /* save */, MI_PREDICATE_RESULT_1(RENDER_RING_BASE), + stream, cs, true /* save */, mi_predicate_result, INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1); /* First timestamp snapshot location. */ @@ -1745,7 +1967,10 @@ retry: */ *cs++ = MI_LOAD_REGISTER_REG | (3 - 2); *cs++ = i915_mmio_reg_offset(CS_GPR(JUMP_PREDICATE)); - *cs++ = i915_mmio_reg_offset(MI_PREDICATE_RESULT_1(RENDER_RING_BASE)); + *cs++ = i915_mmio_reg_offset(mi_predicate_result); + + if (HAS_MI_SET_PREDICATE(i915)) + *cs++ = MI_SET_PREDICATE | 1; /* Restart from the beginning if we had timestamps roll over. */ *cs++ = (GRAPHICS_VER(i915) < 8 ? @@ -1755,6 +1980,9 @@ retry: *cs++ = i915_ggtt_offset(vma) + (ts0 - batch) * 4; *cs++ = 0; + if (HAS_MI_SET_PREDICATE(i915)) + *cs++ = MI_SET_PREDICATE; + /* * Now add the diff between to previous timestamps and add it to : * (((1 * << 64) - 1) - delay_ns) @@ -1782,7 +2010,10 @@ retry: */ *cs++ = MI_LOAD_REGISTER_REG | (3 - 2); *cs++ = i915_mmio_reg_offset(CS_GPR(JUMP_PREDICATE)); - *cs++ = i915_mmio_reg_offset(MI_PREDICATE_RESULT_1(RENDER_RING_BASE)); + *cs++ = i915_mmio_reg_offset(mi_predicate_result); + + if (HAS_MI_SET_PREDICATE(i915)) + *cs++ = MI_SET_PREDICATE | 1; /* Predicate the jump. */ *cs++ = (GRAPHICS_VER(i915) < 8 ? @@ -1792,13 +2023,16 @@ retry: *cs++ = i915_ggtt_offset(vma) + (jump - batch) * 4; *cs++ = 0; + if (HAS_MI_SET_PREDICATE(i915)) + *cs++ = MI_SET_PREDICATE; + /* Restore registers. */ for (i = 0; i < N_CS_GPR; i++) cs = save_restore_register( stream, cs, false /* restore */, CS_GPR(i), INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2); cs = save_restore_register( - stream, cs, false /* restore */, MI_PREDICATE_RESULT_1(RENDER_RING_BASE), + stream, cs, false /* restore */, mi_predicate_result, INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1); /* And return to the ring. */ @@ -2283,11 +2517,12 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream, { int err; struct intel_context *ce = stream->pinned_ctx; - u32 format = stream->oa_buffer.format; + u32 format = stream->oa_buffer.format->format; + u32 offset = stream->perf->ctx_oactxctrl_offset; struct flex regs_context[] = { { GEN8_OACTXCONTROL, - stream->perf->ctx_oactxctrl_offset + 1, + offset + 1, active ? GEN8_OA_COUNTER_RESUME : 0, }, }; @@ -2312,12 +2547,13 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream, }, }; - /* Modify the context image of pinned context with regs_context*/ + /* Modify the context image of pinned context with regs_context */ err = intel_context_lock_pinned(ce); if (err) return err; - err = gen8_modify_context(ce, regs_context, ARRAY_SIZE(regs_context)); + err = gen8_modify_context(ce, regs_context, + ARRAY_SIZE(regs_context)); intel_context_unlock_pinned(ce); if (err) return err; @@ -2359,10 +2595,11 @@ oa_configure_all_contexts(struct i915_perf_stream *stream, { struct drm_i915_private *i915 = stream->perf->i915; struct intel_engine_cs *engine; + struct intel_gt *gt = stream->engine->gt; struct i915_gem_context *ctx, *cn; int err; - lockdep_assert_held(&stream->perf->lock); + lockdep_assert_held(>->perf.lock); /* * The OA register config is setup through the context image. This image @@ -2442,6 +2679,7 @@ lrc_configure_all_contexts(struct i915_perf_stream *stream, const struct i915_oa_config *oa_config, struct i915_active *active) { + u32 ctx_oactxctrl = stream->perf->ctx_oactxctrl_offset; /* The MMIO offsets for Flex EU registers aren't contiguous */ const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset; #define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1) @@ -2452,7 +2690,7 @@ lrc_configure_all_contexts(struct i915_perf_stream *stream, }, { GEN8_OACTXCONTROL, - stream->perf->ctx_oactxctrl_offset + 1, + ctx_oactxctrl + 1, }, { EU_PERF_CNTL0, ctx_flexeuN(0) }, { EU_PERF_CNTL1, ctx_flexeuN(1) }, @@ -2540,12 +2778,26 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream, struct i915_active *active) { + struct drm_i915_private *i915 = stream->perf->i915; struct intel_uncore *uncore = stream->uncore; struct i915_oa_config *oa_config = stream->oa_config; bool periodic = stream->periodic; u32 period_exponent = stream->period_exponent; + u32 sqcnt1; int ret; + /* + * Wa_1508761755:xehpsdv, dg2 + * EU NOA signals behave incorrectly if EU clock gating is enabled. + * Disable thread stall DOP gating and EU DOP gating. + */ + if (IS_XEHPSDV(i915) || IS_DG2(i915)) { + intel_gt_mcr_multicast_write(uncore->gt, GEN8_ROW_CHICKEN, + _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE)); + intel_uncore_write(uncore, GEN7_ROW_CHICKEN2, + _MASKED_BIT_ENABLE(GEN12_DISABLE_DOP_GATING)); + } + intel_uncore_write(uncore, GEN12_OAG_OA_DEBUG, /* Disable clk ratio reports, like previous Gens. */ _MASKED_BIT_ENABLE(GEN12_OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS | @@ -2563,6 +2815,16 @@ gen12_enable_metric_set(struct i915_perf_stream *stream, : 0); /* + * Initialize Super Queue Internal Cnt Register + * Set PMON Enable in order to collect valid metrics. + * Enable byets per clock reporting in OA for XEHPSDV onward. + */ + sqcnt1 = GEN12_SQCNT1_PMON_ENABLE | + (HAS_OA_BPC_REPORTING(i915) ? GEN12_SQCNT1_OABPC : 0); + + intel_uncore_rmw(uncore, GEN12_SQCNT1, 0, sqcnt1); + + /* * Update all contexts prior writing the mux configurations as we need * to make sure all slices/subslices are ON before writing to NOA * registers. @@ -2611,6 +2873,19 @@ static void gen11_disable_metric_set(struct i915_perf_stream *stream) static void gen12_disable_metric_set(struct i915_perf_stream *stream) { struct intel_uncore *uncore = stream->uncore; + struct drm_i915_private *i915 = stream->perf->i915; + u32 sqcnt1; + + /* + * Wa_1508761755:xehpsdv, dg2 + * Enable thread stall DOP gating and EU DOP gating. + */ + if (IS_XEHPSDV(i915) || IS_DG2(i915)) { + intel_gt_mcr_multicast_write(uncore->gt, GEN8_ROW_CHICKEN, + _MASKED_BIT_DISABLE(STALL_DOP_GATING_DISABLE)); + intel_uncore_write(uncore, GEN7_ROW_CHICKEN2, + _MASKED_BIT_DISABLE(GEN12_DISABLE_DOP_GATING)); + } /* Reset all contexts' slices/subslices configurations. */ gen12_configure_all_contexts(stream, NULL, NULL); @@ -2621,6 +2896,12 @@ static void gen12_disable_metric_set(struct i915_perf_stream *stream) /* Make sure we disable noa to save power. */ intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0); + + sqcnt1 = GEN12_SQCNT1_PMON_ENABLE | + (HAS_OA_BPC_REPORTING(i915) ? GEN12_SQCNT1_OABPC : 0); + + /* Reset PMON Enable to save power. */ + intel_uncore_rmw(uncore, GEN12_SQCNT1, sqcnt1, 0); } static void gen7_oa_enable(struct i915_perf_stream *stream) @@ -2630,7 +2911,7 @@ static void gen7_oa_enable(struct i915_perf_stream *stream) u32 ctx_id = stream->specific_ctx_id; bool periodic = stream->periodic; u32 period_exponent = stream->period_exponent; - u32 report_format = stream->oa_buffer.format; + u32 report_format = stream->oa_buffer.format->format; /* * Reset buf pointers so we don't forward reports from before now. @@ -2656,7 +2937,7 @@ static void gen7_oa_enable(struct i915_perf_stream *stream) static void gen8_oa_enable(struct i915_perf_stream *stream) { struct intel_uncore *uncore = stream->uncore; - u32 report_format = stream->oa_buffer.format; + u32 report_format = stream->oa_buffer.format->format; /* * Reset buf pointers so we don't forward reports from before now. @@ -2682,7 +2963,7 @@ static void gen8_oa_enable(struct i915_perf_stream *stream) static void gen12_oa_enable(struct i915_perf_stream *stream) { struct intel_uncore *uncore = stream->uncore; - u32 report_format = stream->oa_buffer.format; + u32 report_format = stream->oa_buffer.format->format; /* * If we don't want OA reports from the OA buffer, then we don't even @@ -2838,6 +3119,30 @@ get_sseu_config(struct intel_sseu *out_sseu, return i915_gem_user_to_context_sseu(engine->gt, drm_sseu, out_sseu); } +/* + * OA timestamp frequency = CS timestamp frequency in most platforms. On some + * platforms OA unit ignores the CTC_SHIFT and the 2 timestamps differ. In such + * cases, return the adjusted CS timestamp frequency to the user. + */ +u32 i915_perf_oa_timestamp_frequency(struct drm_i915_private *i915) +{ + /* Wa_18013179988:dg2 */ + if (IS_DG2(i915)) { + intel_wakeref_t wakeref; + u32 reg, shift; + + with_intel_runtime_pm(to_gt(i915)->uncore->rpm, wakeref) + reg = intel_uncore_read(to_gt(i915)->uncore, RPM_CONFIG0); + + shift = REG_FIELD_GET(GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, + reg); + + return to_gt(i915)->clock_frequency << (3 - shift); + } + + return to_gt(i915)->clock_frequency; +} + /** * i915_oa_stream_init - validate combined props for OA stream and init * @stream: An i915 perf stream @@ -2862,7 +3167,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, { struct drm_i915_private *i915 = stream->perf->i915; struct i915_perf *perf = stream->perf; - int format_size; + struct intel_gt *gt; int ret; if (!props->engine) { @@ -2870,6 +3175,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, "OA engine not specified\n"); return -EINVAL; } + gt = props->engine->gt; /* * If the sysfs metrics/ directory wasn't registered for some @@ -2900,7 +3206,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, * counter reports and marshal to the appropriate client * we currently only allow exclusive access */ - if (perf->exclusive_stream) { + if (gt->perf.exclusive_stream) { drm_dbg(&stream->perf->i915->drm, "OA unit already in use\n"); return -EBUSY; @@ -2917,20 +3223,15 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, stream->sample_size = sizeof(struct drm_i915_perf_record_header); - format_size = perf->oa_formats[props->oa_format].size; + stream->oa_buffer.format = &perf->oa_formats[props->oa_format]; + if (drm_WARN_ON(&i915->drm, stream->oa_buffer.format->size == 0)) + return -EINVAL; stream->sample_flags = props->sample_flags; - stream->sample_size += format_size; - - stream->oa_buffer.format_size = format_size; - if (drm_WARN_ON(&i915->drm, stream->oa_buffer.format_size == 0)) - return -EINVAL; + stream->sample_size += stream->oa_buffer.format->size; stream->hold_preemption = props->hold_preemption; - stream->oa_buffer.format = - perf->oa_formats[props->oa_format].format; - stream->periodic = props->oa_periodic; if (stream->periodic) stream->period_exponent = props->oa_period_exponent; @@ -2974,14 +3275,31 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, intel_engine_pm_get(stream->engine); intel_uncore_forcewake_get(stream->uncore, FORCEWAKE_ALL); + /* + * Wa_16011777198:dg2: GuC resets render as part of the Wa. This causes + * OA to lose the configuration state. Prevent this by overriding GUCRC + * mode. + */ + if (intel_uc_uses_guc_rc(>->uc) && + (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) || + IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0))) { + ret = intel_guc_slpc_override_gucrc_mode(>->uc.guc.slpc, + SLPC_GUCRC_MODE_GUCRC_NO_RC6); + if (ret) { + drm_dbg(&stream->perf->i915->drm, + "Unable to override gucrc mode\n"); + goto err_config; + } + } + ret = alloc_oa_buffer(stream); if (ret) goto err_oa_buf_alloc; stream->ops = &i915_oa_stream_ops; - perf->sseu = props->sseu; - WRITE_ONCE(perf->exclusive_stream, stream); + stream->engine->gt->perf.sseu = props->sseu; + WRITE_ONCE(gt->perf.exclusive_stream, stream); ret = i915_perf_stream_enable_sync(stream); if (ret) { @@ -2999,11 +3317,12 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, stream->poll_check_timer.function = oa_poll_check_timer_cb; init_waitqueue_head(&stream->poll_wq); spin_lock_init(&stream->oa_buffer.ptr_lock); + mutex_init(&stream->lock); return 0; err_enable: - WRITE_ONCE(perf->exclusive_stream, NULL); + WRITE_ONCE(gt->perf.exclusive_stream, NULL); perf->ops.disable_metric_set(stream); free_oa_buffer(stream); @@ -3033,7 +3352,7 @@ void i915_oa_init_reg_state(const struct intel_context *ce, return; /* perf.exclusive_stream serialised by lrc_configure_all_contexts() */ - stream = READ_ONCE(engine->i915->perf.exclusive_stream); + stream = READ_ONCE(engine->gt->perf.exclusive_stream); if (stream && GRAPHICS_VER(stream->perf->i915) < 12) gen8_update_reg_state_unlocked(ce, stream); } @@ -3062,7 +3381,6 @@ static ssize_t i915_perf_read(struct file *file, loff_t *ppos) { struct i915_perf_stream *stream = file->private_data; - struct i915_perf *perf = stream->perf; size_t offset = 0; int ret; @@ -3086,14 +3404,14 @@ static ssize_t i915_perf_read(struct file *file, if (ret) return ret; - mutex_lock(&perf->lock); + mutex_lock(&stream->lock); ret = stream->ops->read(stream, buf, count, &offset); - mutex_unlock(&perf->lock); + mutex_unlock(&stream->lock); } while (!offset && !ret); } else { - mutex_lock(&perf->lock); + mutex_lock(&stream->lock); ret = stream->ops->read(stream, buf, count, &offset); - mutex_unlock(&perf->lock); + mutex_unlock(&stream->lock); } /* We allow the poll checking to sometimes report false positive EPOLLIN @@ -3140,9 +3458,6 @@ static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer) * &i915_perf_stream_ops->poll_wait to call poll_wait() with a wait queue that * will be woken for new stream data. * - * Note: The &perf->lock mutex has been taken to serialize - * with any non-file-operation driver hooks. - * * Returns: any poll events that are ready without sleeping */ static __poll_t i915_perf_poll_locked(struct i915_perf_stream *stream, @@ -3181,12 +3496,11 @@ static __poll_t i915_perf_poll_locked(struct i915_perf_stream *stream, static __poll_t i915_perf_poll(struct file *file, poll_table *wait) { struct i915_perf_stream *stream = file->private_data; - struct i915_perf *perf = stream->perf; __poll_t ret; - mutex_lock(&perf->lock); + mutex_lock(&stream->lock); ret = i915_perf_poll_locked(stream, file, wait); - mutex_unlock(&perf->lock); + mutex_unlock(&stream->lock); return ret; } @@ -3285,9 +3599,6 @@ static long i915_perf_config_locked(struct i915_perf_stream *stream, * @cmd: the ioctl request * @arg: the ioctl data * - * Note: The &perf->lock mutex has been taken to serialize - * with any non-file-operation driver hooks. - * * Returns: zero on success or a negative error code. Returns -EINVAL for * an unknown ioctl request. */ @@ -3325,12 +3636,11 @@ static long i915_perf_ioctl(struct file *file, unsigned long arg) { struct i915_perf_stream *stream = file->private_data; - struct i915_perf *perf = stream->perf; long ret; - mutex_lock(&perf->lock); + mutex_lock(&stream->lock); ret = i915_perf_ioctl_locked(stream, cmd, arg); - mutex_unlock(&perf->lock); + mutex_unlock(&stream->lock); return ret; } @@ -3342,7 +3652,7 @@ static long i915_perf_ioctl(struct file *file, * Frees all resources associated with the given i915 perf @stream, disabling * any associated data capture in the process. * - * Note: The &perf->lock mutex has been taken to serialize + * Note: The >->perf.lock mutex has been taken to serialize * with any non-file-operation driver hooks. */ static void i915_perf_destroy_locked(struct i915_perf_stream *stream) @@ -3374,10 +3684,16 @@ static int i915_perf_release(struct inode *inode, struct file *file) { struct i915_perf_stream *stream = file->private_data; struct i915_perf *perf = stream->perf; + struct intel_gt *gt = stream->engine->gt; - mutex_lock(&perf->lock); + /* + * Within this call, we know that the fd is being closed and we have no + * other user of stream->lock. Use the perf lock to destroy the stream + * here. + */ + mutex_lock(>->perf.lock); i915_perf_destroy_locked(stream); - mutex_unlock(&perf->lock); + mutex_unlock(>->perf.lock); /* Release the reference the perf stream kept on the driver. */ drm_dev_put(&perf->i915->drm); @@ -3410,7 +3726,7 @@ static const struct file_operations fops = { * See i915_perf_ioctl_open() for interface details. * * Implements further stream config validation and stream initialization on - * behalf of i915_perf_open_ioctl() with the &perf->lock mutex + * behalf of i915_perf_open_ioctl() with the >->perf.lock mutex * taken to serialize with any non-file-operation driver hooks. * * Note: at this point the @props have only been validated in isolation and @@ -3565,8 +3881,10 @@ err: static u64 oa_exponent_to_ns(struct i915_perf *perf, int exponent) { - return intel_gt_clock_interval_to_ns(to_gt(perf->i915), - 2ULL << exponent); + u64 nom = (2ULL << exponent) * NSEC_PER_SEC; + u32 den = i915_perf_oa_timestamp_frequency(perf->i915); + + return div_u64(nom + den - 1, den); } static __always_inline bool @@ -3794,7 +4112,7 @@ static int read_properties_unlocked(struct i915_perf *perf, * mutex to avoid an awkward lockdep with mmap_lock. * * Most of the implementation details are handled by - * i915_perf_open_ioctl_locked() after taking the &perf->lock + * i915_perf_open_ioctl_locked() after taking the >->perf.lock * mutex for serializing with any non-file-operation driver hooks. * * Return: A newly opened i915 Perf stream file descriptor or negative @@ -3805,6 +4123,7 @@ int i915_perf_open_ioctl(struct drm_device *dev, void *data, { struct i915_perf *perf = &to_i915(dev)->perf; struct drm_i915_perf_open_param *param = data; + struct intel_gt *gt; struct perf_open_properties props; u32 known_open_flags; int ret; @@ -3831,9 +4150,11 @@ int i915_perf_open_ioctl(struct drm_device *dev, void *data, if (ret) return ret; - mutex_lock(&perf->lock); + gt = props.engine->gt; + + mutex_lock(>->perf.lock); ret = i915_perf_open_ioctl_locked(perf, param, &props, file); - mutex_unlock(&perf->lock); + mutex_unlock(>->perf.lock); return ret; } @@ -3849,6 +4170,7 @@ int i915_perf_open_ioctl(struct drm_device *dev, void *data, void i915_perf_register(struct drm_i915_private *i915) { struct i915_perf *perf = &i915->perf; + struct intel_gt *gt = to_gt(i915); if (!perf->i915) return; @@ -3857,13 +4179,13 @@ void i915_perf_register(struct drm_i915_private *i915) * i915_perf_open_ioctl(); considering that we register after * being exposed to userspace. */ - mutex_lock(&perf->lock); + mutex_lock(>->perf.lock); perf->metrics_kobj = kobject_create_and_add("metrics", &i915->drm.primary->kdev->kobj); - mutex_unlock(&perf->lock); + mutex_unlock(>->perf.lock); } /** @@ -3939,6 +4261,11 @@ static const struct i915_range gen12_oa_b_counters[] = { {} }; +static const struct i915_range xehp_oa_b_counters[] = { + { .start = 0xdc48, .end = 0xdc48 }, /* OAA_ENABLE_REG */ + { .start = 0xdd00, .end = 0xdd48 }, /* OAG_LCE0_0 - OAA_LENABLE_REG */ +}; + static const struct i915_range gen7_oa_mux_regs[] = { { .start = 0x91b8, .end = 0x91cc }, /* OA_PERFCNT[1-2], OA_PERFMATRIX */ { .start = 0x9800, .end = 0x9888 }, /* MICRO_BP0_0 - NOA_WRITE */ @@ -4013,6 +4340,12 @@ static bool gen12_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr) return reg_in_range_table(addr, gen12_oa_b_counters); } +static bool xehp_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr) +{ + return reg_in_range_table(addr, xehp_oa_b_counters) || + reg_in_range_table(addr, gen12_oa_b_counters); +} + static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { return reg_in_range_table(addr, gen12_oa_mux_regs); @@ -4411,11 +4744,47 @@ static void oa_init_supported_formats(struct i915_perf *perf) oa_format_add(perf, I915_OA_FORMAT_C4_B8); break; + case INTEL_DG2: + oa_format_add(perf, I915_OAR_FORMAT_A32u40_A4u32_B8_C8); + oa_format_add(perf, I915_OA_FORMAT_A24u40_A14u32_B8_C8); + break; + default: MISSING_CASE(platform); } } +static void i915_perf_init_info(struct drm_i915_private *i915) +{ + struct i915_perf *perf = &i915->perf; + + switch (GRAPHICS_VER(i915)) { + case 8: + perf->ctx_oactxctrl_offset = 0x120; + perf->ctx_flexeu0_offset = 0x2ce; + perf->gen8_valid_ctx_bit = BIT(25); + break; + case 9: + perf->ctx_oactxctrl_offset = 0x128; + perf->ctx_flexeu0_offset = 0x3de; + perf->gen8_valid_ctx_bit = BIT(16); + break; + case 11: + perf->ctx_oactxctrl_offset = 0x124; + perf->ctx_flexeu0_offset = 0x78e; + perf->gen8_valid_ctx_bit = BIT(16); + break; + case 12: + /* + * Calculate offset at runtime in oa_pin_context for gen12 and + * cache the value in perf->ctx_oactxctrl_offset. + */ + break; + default: + MISSING_CASE(GRAPHICS_VER(i915)); + } +} + /** * i915_perf_init - initialize i915-perf state on module bind * @i915: i915 device instance @@ -4429,12 +4798,6 @@ void i915_perf_init(struct drm_i915_private *i915) { struct i915_perf *perf = &i915->perf; - /* XXX const struct i915_perf_ops! */ - - /* i915_perf is not enabled for DG2 yet */ - if (IS_DG2(i915)) - return; - perf->oa_formats = oa_formats; if (IS_HASWELL(i915)) { perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr; @@ -4454,6 +4817,7 @@ void i915_perf_init(struct drm_i915_private *i915) * execlist mode by default. */ perf->ops.read = gen8_oa_read; + i915_perf_init_info(i915); if (IS_GRAPHICS_VER(i915, 8, 9)) { perf->ops.is_valid_b_counter_reg = @@ -4473,18 +4837,6 @@ void i915_perf_init(struct drm_i915_private *i915) perf->ops.enable_metric_set = gen8_enable_metric_set; perf->ops.disable_metric_set = gen8_disable_metric_set; perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read; - - if (GRAPHICS_VER(i915) == 8) { - perf->ctx_oactxctrl_offset = 0x120; - perf->ctx_flexeu0_offset = 0x2ce; - - perf->gen8_valid_ctx_bit = BIT(25); - } else { - perf->ctx_oactxctrl_offset = 0x128; - perf->ctx_flexeu0_offset = 0x3de; - - perf->gen8_valid_ctx_bit = BIT(16); - } } else if (GRAPHICS_VER(i915) == 11) { perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr; @@ -4498,13 +4850,10 @@ void i915_perf_init(struct drm_i915_private *i915) perf->ops.enable_metric_set = gen8_enable_metric_set; perf->ops.disable_metric_set = gen11_disable_metric_set; perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read; - - perf->ctx_oactxctrl_offset = 0x124; - perf->ctx_flexeu0_offset = 0x78e; - - perf->gen8_valid_ctx_bit = BIT(16); } else if (GRAPHICS_VER(i915) == 12) { perf->ops.is_valid_b_counter_reg = + HAS_OA_SLICE_CONTRIB_LIMITS(i915) ? + xehp_is_valid_b_counter_addr : gen12_is_valid_b_counter_addr; perf->ops.is_valid_mux_reg = gen12_is_valid_mux_addr; @@ -4516,14 +4865,15 @@ void i915_perf_init(struct drm_i915_private *i915) perf->ops.enable_metric_set = gen12_enable_metric_set; perf->ops.disable_metric_set = gen12_disable_metric_set; perf->ops.oa_hw_tail_read = gen12_oa_hw_tail_read; - - perf->ctx_flexeu0_offset = 0; - perf->ctx_oactxctrl_offset = 0x144; } } if (perf->ops.enable_metric_set) { - mutex_init(&perf->lock); + struct intel_gt *gt; + int i; + + for_each_gt(gt, i915, i) + mutex_init(>->perf.lock); /* Choose a representative limit */ oa_sample_rate_hard_limit = to_gt(i915)->clock_frequency / 2; diff --git a/drivers/gpu/drm/i915/i915_perf.h b/drivers/gpu/drm/i915/i915_perf.h index 1d1329e5af3a..f96e09a4af04 100644 --- a/drivers/gpu/drm/i915/i915_perf.h +++ b/drivers/gpu/drm/i915/i915_perf.h @@ -57,4 +57,6 @@ static inline void i915_oa_config_put(struct i915_oa_config *oa_config) kref_put(&oa_config->ref, i915_oa_config_release); } +u32 i915_perf_oa_timestamp_frequency(struct drm_i915_private *i915); + #endif /* __I915_PERF_H__ */ diff --git a/drivers/gpu/drm/i915/i915_perf_oa_regs.h b/drivers/gpu/drm/i915/i915_perf_oa_regs.h index f31c9f13a9fc..381d94101610 100644 --- a/drivers/gpu/drm/i915/i915_perf_oa_regs.h +++ b/drivers/gpu/drm/i915/i915_perf_oa_regs.h @@ -97,7 +97,7 @@ #define GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT 1 #define GEN12_OAR_OACONTROL_COUNTER_ENABLE (1 << 0) -#define GEN12_OACTXCONTROL _MMIO(0x2360) +#define GEN12_OACTXCONTROL(base) _MMIO((base) + 0x360) #define GEN12_OAR_OASTATUS _MMIO(0x2968) /* Gen12 OAG unit */ @@ -134,4 +134,8 @@ #define GDT_CHICKEN_BITS _MMIO(0x9840) #define GT_NOA_ENABLE 0x00000080 +#define GEN12_SQCNT1 _MMIO(0x8718) +#define GEN12_SQCNT1_PMON_ENABLE REG_BIT(30) +#define GEN12_SQCNT1_OABPC REG_BIT(29) + #endif /* __INTEL_PERF_OA_REGS__ */ diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h index 05cb9a335a97..e0c96b44eda8 100644 --- a/drivers/gpu/drm/i915/i915_perf_types.h +++ b/drivers/gpu/drm/i915/i915_perf_types.h @@ -146,6 +146,11 @@ struct i915_perf_stream { */ struct intel_engine_cs *engine; + /* + * Lock associated with operations on stream + */ + struct mutex lock; + /** * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*` * properties given when opening a stream, representing the contents @@ -245,11 +250,10 @@ struct i915_perf_stream { * @oa_buffer: State of the OA buffer. */ struct { + const struct i915_oa_format *format; struct i915_vma *vma; u8 *vaddr; u32 last_ctx_id; - int format; - int format_size; int size_exponent; /** @@ -380,6 +384,26 @@ struct i915_oa_ops { u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream); }; +struct i915_perf_gt { + /* + * Lock associated with anything below within this structure. + */ + struct mutex lock; + + /** + * @sseu: sseu configuration selected to run while perf is active, + * applies to all contexts. + */ + struct intel_sseu sseu; + + /* + * @exclusive_stream: The stream currently using the OA unit. This is + * sometimes accessed outside a syscall associated to its file + * descriptor. + */ + struct i915_perf_stream *exclusive_stream; +}; + struct i915_perf { struct drm_i915_private *i915; @@ -397,25 +421,6 @@ struct i915_perf { */ struct idr metrics_idr; - /* - * Lock associated with anything below within this structure - * except exclusive_stream. - */ - struct mutex lock; - - /* - * The stream currently using the OA unit. If accessed - * outside a syscall associated to its file - * descriptor. - */ - struct i915_perf_stream *exclusive_stream; - - /** - * @sseu: sseu configuration selected to run while perf is active, - * applies to all contexts. - */ - struct intel_sseu sseu; - /** * For rate limiting any notifications of spurious * invalid OA reports diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 2a887cdd7c1b..1c0da50c0dc7 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1796,6 +1796,15 @@ #define XEHPSDV_RP_STATE_CAP _MMIO(0x250014) #define PVC_RP_STATE_CAP _MMIO(0x281014) +#define MTL_RP_STATE_CAP _MMIO(0x138000) +#define MTL_MEDIAP_STATE_CAP _MMIO(0x138020) +#define MTL_RP0_CAP_MASK REG_GENMASK(8, 0) +#define MTL_RPN_CAP_MASK REG_GENMASK(24, 16) + +#define MTL_GT_RPE_FREQUENCY _MMIO(0x13800c) +#define MTL_MPE_FREQUENCY _MMIO(0x13802c) +#define MTL_RPE_MASK REG_GENMASK(8, 0) + #define GT0_PERF_LIMIT_REASONS _MMIO(0x1381a8) #define GT0_PERF_LIMIT_REASONS_MASK 0xde3 #define PROCHOT_MASK REG_BIT(0) @@ -1806,6 +1815,8 @@ #define POWER_LIMIT_4_MASK REG_BIT(8) #define POWER_LIMIT_1_MASK REG_BIT(10) #define POWER_LIMIT_2_MASK REG_BIT(11) +#define GT0_PERF_LIMIT_REASONS_LOG_MASK REG_GENMASK(31, 16) +#define MTL_MEDIA_PERF_LIMIT_REASONS _MMIO(0x138030) #define CHV_CLK_CTL1 _MMIO(0x101100) #define VLV_CLK_CTL2 _MMIO(0x101104) @@ -6652,6 +6663,12 @@ #define DG1_PCODE_STATUS 0x7E #define DG1_UNCORE_GET_INIT_STATUS 0x0 #define DG1_UNCORE_INIT_STATUS_COMPLETE 0x1 +#define PCODE_POWER_SETUP 0x7C +#define POWER_SETUP_SUBCOMMAND_READ_I1 0x4 +#define POWER_SETUP_SUBCOMMAND_WRITE_I1 0x5 +#define POWER_SETUP_I1_WATTS REG_BIT(31) +#define POWER_SETUP_I1_SHIFT 6 /* 10.6 fixed point format */ +#define POWER_SETUP_I1_DATA_MASK REG_GENMASK(15, 0) #define GEN12_PCODE_READ_SAGV_BLOCK_TIME_US 0x23 #define XEHP_PCODE_FREQUENCY_CONFIG 0x6e /* xehpsdv, pvc */ /* XEHP_PCODE_FREQUENCY_CONFIG sub-commands (param1) */ @@ -7788,8 +7805,13 @@ enum skl_power_gate { _ICL_PIPE_DSS_CTL2_PB, \ _ICL_PIPE_DSS_CTL2_PC) +#define GGC _MMIO(0x108040) +#define GMS_MASK REG_GENMASK(15, 8) +#define GGMS_MASK REG_GENMASK(7, 6) + #define GEN12_GSMBASE _MMIO(0x108100) #define GEN12_DSMBASE _MMIO(0x1080C0) +#define GEN12_BDSM_MASK REG_GENMASK64(63, 20) #define XEHP_CLOCK_GATE_DIS _MMIO(0x101014) #define SGSI_SIDECLK_DIS REG_BIT(17) diff --git a/drivers/gpu/drm/i915/i915_reg_defs.h b/drivers/gpu/drm/i915/i915_reg_defs.h index 8f486f77609f..f1859046a9c4 100644 --- a/drivers/gpu/drm/i915/i915_reg_defs.h +++ b/drivers/gpu/drm/i915/i915_reg_defs.h @@ -104,22 +104,21 @@ typedef struct { #define _MMIO(r) ((const i915_reg_t){ .reg = (r) }) -#define INVALID_MMIO_REG _MMIO(0) - -static __always_inline u32 i915_mmio_reg_offset(i915_reg_t reg) -{ - return reg.reg; -} +typedef struct { + u32 reg; +} i915_mcr_reg_t; -static inline bool i915_mmio_reg_equal(i915_reg_t a, i915_reg_t b) -{ - return i915_mmio_reg_offset(a) == i915_mmio_reg_offset(b); -} +#define INVALID_MMIO_REG _MMIO(0) -static inline bool i915_mmio_reg_valid(i915_reg_t reg) -{ - return !i915_mmio_reg_equal(reg, INVALID_MMIO_REG); -} +/* + * These macros can be used on either i915_reg_t or i915_mcr_reg_t since they're + * simply operations on the register's offset and don't care about the MCR vs + * non-MCR nature of the register. + */ +#define i915_mmio_reg_offset(r) \ + _Generic((r), i915_reg_t: (r).reg, i915_mcr_reg_t: (r).reg) +#define i915_mmio_reg_equal(a, b) (i915_mmio_reg_offset(a) == i915_mmio_reg_offset(b)) +#define i915_mmio_reg_valid(r) (!i915_mmio_reg_equal(r, INVALID_MMIO_REG)) #define VLV_DISPLAY_BASE 0x180000 diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 62fad16a55e8..f949a9495758 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1621,6 +1621,20 @@ i915_request_await_object(struct i915_request *to, return ret; } +static void i915_request_await_huc(struct i915_request *rq) +{ + struct intel_huc *huc = &rq->context->engine->gt->uc.huc; + + /* don't stall kernel submissions! */ + if (!rcu_access_pointer(rq->context->gem_context)) + return; + + if (intel_huc_wait_required(huc)) + i915_sw_fence_await_sw_fence(&rq->submit, + &huc->delayed_load.fence, + &rq->hucq); +} + static struct i915_request * __i915_request_ensure_parallel_ordering(struct i915_request *rq, struct intel_timeline *timeline) @@ -1703,6 +1717,16 @@ __i915_request_add_to_timeline(struct i915_request *rq) struct i915_request *prev; /* + * Media workloads may require HuC, so stall them until HuC loading is + * complete. Note that HuC not being loaded when a user submission + * arrives can only happen when HuC is loaded via GSC and in that case + * we still expect the window between us starting to accept submissions + * and HuC loading completion to be small (a few hundred ms). + */ + if (rq->engine->class == VIDEO_DECODE_CLASS) + i915_request_await_huc(rq); + + /* * Dependency tracking and request ordering along the timeline * is special cased so that we can eliminate redundant ordering * operations while building the request (we know that the timeline diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 47041ec68df8..f5e1bb5e857a 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -348,6 +348,11 @@ struct i915_request { #define GUC_PRIO_FINI 0xfe u8 guc_prio; + /** + * @hucq: wait queue entry used to wait on the HuC load to complete + */ + wait_queue_entry_t hucq; + I915_SELFTEST_DECLARE(struct { struct list_head link; unsigned long delay; diff --git a/drivers/gpu/drm/i915/i915_scatterlist.h b/drivers/gpu/drm/i915/i915_scatterlist.h index 9ddb3e743a3e..b0a1db44f895 100644 --- a/drivers/gpu/drm/i915/i915_scatterlist.h +++ b/drivers/gpu/drm/i915/i915_scatterlist.h @@ -9,7 +9,8 @@ #include <linux/pfn.h> #include <linux/scatterlist.h> -#include <linux/swiotlb.h> +#include <linux/dma-mapping.h> +#include <xen/xen.h> #include "i915_gem.h" @@ -127,19 +128,26 @@ static inline unsigned int i915_sg_dma_sizes(struct scatterlist *sg) return page_sizes; } -static inline unsigned int i915_sg_segment_size(void) +static inline unsigned int i915_sg_segment_size(struct device *dev) { - unsigned int size = swiotlb_max_segment(); - - if (size == 0) - size = UINT_MAX; - - size = rounddown(size, PAGE_SIZE); - /* swiotlb_max_segment_size can return 1 byte when it means one page. */ - if (size < PAGE_SIZE) - size = PAGE_SIZE; - - return size; + size_t max = min_t(size_t, UINT_MAX, dma_max_mapping_size(dev)); + + /* + * For Xen PV guests pages aren't contiguous in DMA (machine) address + * space. The DMA API takes care of that both in dma_alloc_* (by + * calling into the hypervisor to make the pages contiguous) and in + * dma_map_* (by bounce buffering). But i915 abuses ignores the + * coherency aspects of the DMA API and thus can't cope with bounce + * buffering actually happening, so add a hack here to force small + * allocations and mappings when running in PV mode on Xen. + * + * Note this will still break if bounce buffering is required for other + * reasons, like confidential computing hypervisors or PCIe root ports + * with addressing limitations. + */ + if (xen_pv_domain()) + max = PAGE_SIZE; + return round_down(max, PAGE_SIZE); } bool i915_sg_trim(struct sg_table *orig_st); diff --git a/drivers/gpu/drm/i915/i915_selftest.h b/drivers/gpu/drm/i915/i915_selftest.h index f54de0499be7..bdf3e22c0a34 100644 --- a/drivers/gpu/drm/i915/i915_selftest.h +++ b/drivers/gpu/drm/i915/i915_selftest.h @@ -92,12 +92,14 @@ int __i915_subtests(const char *caller, T, ARRAY_SIZE(T), data) #define i915_live_subtests(T, data) ({ \ typecheck(struct drm_i915_private *, data); \ + (data)->gt[0]->uc.guc.submission_state.sched_disable_delay_ms = 0; \ __i915_subtests(__func__, \ __i915_live_setup, __i915_live_teardown, \ T, ARRAY_SIZE(T), data); \ }) #define intel_gt_live_subtests(T, data) ({ \ typecheck(struct intel_gt *, data); \ + (data)->uc.guc.submission_state.sched_disable_delay_ms = 0; \ __i915_subtests(__func__, \ __intel_gt_live_setup, __intel_gt_live_teardown, \ T, ARRAY_SIZE(T), data); \ diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 37b5c9e9d260..c70a02517e02 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -671,21 +671,6 @@ TRACE_EVENT_CONDITION(i915_reg_rw, (u32)(__entry->val >> 32)) ); -TRACE_EVENT(intel_gpu_freq_change, - TP_PROTO(u32 freq), - TP_ARGS(freq), - - TP_STRUCT__entry( - __field(u32, freq) - ), - - TP_fast_assign( - __entry->freq = freq; - ), - - TP_printk("new_freq=%u", __entry->freq) -); - /** * DOC: i915_ppgtt_create and i915_ppgtt_release tracepoints * diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index f17c09ead7d7..c39488eb9eeb 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -776,12 +776,6 @@ i915_vma_insert(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE)); alignment = max(alignment, i915_vm_obj_min_alignment(vma->vm, vma->obj)); - /* - * for compact-pt we round up the reservation to prevent - * any smaller pages being used within the same PDE - */ - if (NEEDS_COMPACT_PT(vma->vm->i915)) - size = round_up(size, alignment); /* If binding the object/GGTT view requires more space than the entire * aperture has, reject it early before evicting everything in a vain @@ -820,7 +814,8 @@ i915_vma_insert(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, * forseeable future. See also i915_ggtt_offset(). */ if (upper_32_bits(end - 1) && - vma->page_sizes.sg > I915_GTT_PAGE_SIZE) { + vma->page_sizes.sg > I915_GTT_PAGE_SIZE && + !HAS_64K_PAGES(vma->vm->i915)) { /* * We can't mix 64K and 4K PTEs in the same page-table * (2M block), and so to avoid the ugliness and diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 7b5dd8e21d7a..d588e5fd2eea 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -146,7 +146,6 @@ enum intel_ppgtt_type { /* Keep has_* in alphabetical order */ \ func(has_64bit_reloc); \ func(has_64k_pages); \ - func(needs_compact_pt); \ func(gpu_reset_clobbers_display); \ func(has_reset_engine); \ func(has_3d_pipeline); \ @@ -165,6 +164,8 @@ enum intel_ppgtt_type { func(has_logical_ring_elsq); \ func(has_media_ratio_mode); \ func(has_mslice_steering); \ + func(has_oa_bpc_reporting); \ + func(has_oa_slice_contrib_limits); \ func(has_one_eu_per_fuse_bit); \ func(has_pxp); \ func(has_rc6); \ diff --git a/drivers/gpu/drm/i915/intel_gvt_mmio_table.c b/drivers/gpu/drm/i915/intel_gvt_mmio_table.c index 8279dc580a3e..638b77d64bf4 100644 --- a/drivers/gpu/drm/i915/intel_gvt_mmio_table.c +++ b/drivers/gpu/drm/i915/intel_gvt_mmio_table.c @@ -102,7 +102,7 @@ static int iterate_generic_mmio(struct intel_gvt_mmio_table_iter *iter) MMIO_D(_MMIO(0x2438)); MMIO_D(_MMIO(0x243c)); MMIO_D(_MMIO(0x7018)); - MMIO_D(HALF_SLICE_CHICKEN3); + MMIO_D(HSW_HALF_SLICE_CHICKEN3); MMIO_D(GEN7_HALF_SLICE_CHICKEN1); /* display */ MMIO_F(_MMIO(0x60220), 0x20); diff --git a/drivers/gpu/drm/i915/intel_mchbar_regs.h b/drivers/gpu/drm/i915/intel_mchbar_regs.h index ffc702b79579..f93e9af43ac3 100644 --- a/drivers/gpu/drm/i915/intel_mchbar_regs.h +++ b/drivers/gpu/drm/i915/intel_mchbar_regs.h @@ -189,6 +189,21 @@ #define DG1_QCLK_RATIO_MASK REG_GENMASK(9, 2) #define DG1_QCLK_REFERENCE REG_BIT(10) +/* + * *_PACKAGE_POWER_SKU - SKU power and timing parameters. + */ +#define PCU_PACKAGE_POWER_SKU _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5930) +#define PKG_PKG_TDP GENMASK_ULL(14, 0) +#define PKG_MAX_WIN GENMASK_ULL(54, 48) +#define PKG_MAX_WIN_X GENMASK_ULL(54, 53) +#define PKG_MAX_WIN_Y GENMASK_ULL(52, 48) + +#define PCU_PACKAGE_POWER_SKU_UNIT _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5938) +#define PKG_PWR_UNIT REG_GENMASK(3, 0) +#define PKG_ENERGY_UNIT REG_GENMASK(12, 8) +#define PKG_TIME_UNIT REG_GENMASK(19, 16) +#define PCU_PACKAGE_ENERGY_STATUS _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x593c) + #define GEN6_GT_PERF_STATUS _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5948) #define GEN6_RP_STATE_LIMITS _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5994) #define GEN6_RP_STATE_CAP _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5998) @@ -198,6 +213,12 @@ #define GEN10_FREQ_INFO_REC _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5ef0) #define RPE_MASK REG_GENMASK(15, 8) +#define PCU_PACKAGE_RAPL_LIMIT _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x59a0) +#define PKG_PWR_LIM_1 REG_GENMASK(14, 0) +#define PKG_PWR_LIM_1_EN REG_BIT(15) +#define PKG_PWR_LIM_1_TIME REG_GENMASK(23, 17) +#define PKG_PWR_LIM_1_TIME_X REG_GENMASK(23, 22) +#define PKG_PWR_LIM_1_TIME_Y REG_GENMASK(21, 17) /* snb MCH registers for priority tuning */ #define MCH_SSKPD _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5d10) diff --git a/drivers/gpu/drm/i915/intel_pci_config.h b/drivers/gpu/drm/i915/intel_pci_config.h index 4977a524ce6f..23b8e519f333 100644 --- a/drivers/gpu/drm/i915/intel_pci_config.h +++ b/drivers/gpu/drm/i915/intel_pci_config.h @@ -7,11 +7,29 @@ #define __INTEL_PCI_CONFIG_H__ /* PCI BARs */ -#define GTTMMADR_BAR 0 -#define GEN2_GTTMMADR_BAR 1 -#define GFXMEM_BAR 2 -#define GTT_APERTURE_BAR GFXMEM_BAR -#define GEN12_LMEM_BAR GFXMEM_BAR +#define GEN2_GMADR_BAR 0 +#define GEN2_MMADR_BAR 1 /* MMIO+GTT, despite the name */ +#define GEN2_IO_BAR 2 /* 85x/865 */ + +#define GEN3_MMADR_BAR 0 /* MMIO only */ +#define GEN3_IO_BAR 1 +#define GEN3_GMADR_BAR 2 +#define GEN3_GTTADR_BAR 3 /* GTT only */ + +#define GEN4_GTTMMADR_BAR 0 /* MMIO+GTT */ +#define GEN4_GMADR_BAR 2 +#define GEN4_IO_BAR 4 + +#define GEN12_LMEM_BAR 2 + +static inline int intel_mmio_bar(int graphics_ver) +{ + switch (graphics_ver) { + case 2: return GEN2_MMADR_BAR; + case 3: return GEN3_MMADR_BAR; + default: return GEN4_GTTMMADR_BAR; + } +} /* BSM in include/drm/i915_drm.h */ diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 19d4a88184d7..ee34e2785636 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -30,6 +30,8 @@ #include "display/skl_watermark.h" #include "gt/intel_engine_regs.h" +#include "gt/intel_gt.h" +#include "gt/intel_gt_mcr.h" #include "gt/intel_gt_regs.h" #include "i915_drv.h" @@ -58,25 +60,20 @@ static void gen9_init_clock_gating(struct drm_i915_private *dev_priv) * Must match Sampler, Pixel Back End, and Media. See * WaCompressedResourceSamplerPbeMediaNewHashMode. */ - intel_uncore_write(&dev_priv->uncore, CHICKEN_PAR1_1, - intel_uncore_read(&dev_priv->uncore, CHICKEN_PAR1_1) | - SKL_DE_COMPRESSED_HASH_MODE); + intel_uncore_rmw(&dev_priv->uncore, CHICKEN_PAR1_1, 0, SKL_DE_COMPRESSED_HASH_MODE); } /* See Bspec note for PSR2_CTL bit 31, Wa#828:skl,bxt,kbl,cfl */ - intel_uncore_write(&dev_priv->uncore, CHICKEN_PAR1_1, - intel_uncore_read(&dev_priv->uncore, CHICKEN_PAR1_1) | SKL_EDP_PSR_FIX_RDWRAP); + intel_uncore_rmw(&dev_priv->uncore, CHICKEN_PAR1_1, 0, SKL_EDP_PSR_FIX_RDWRAP); /* WaEnableChickenDCPR:skl,bxt,kbl,glk,cfl */ - intel_uncore_write(&dev_priv->uncore, GEN8_CHICKEN_DCPR_1, - intel_uncore_read(&dev_priv->uncore, GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM); + intel_uncore_rmw(&dev_priv->uncore, GEN8_CHICKEN_DCPR_1, 0, MASK_WAKEMEM); /* * WaFbcWakeMemOn:skl,bxt,kbl,glk,cfl * Display WA #0859: skl,bxt,kbl,glk,cfl */ - intel_uncore_write(&dev_priv->uncore, DISP_ARB_CTL, intel_uncore_read(&dev_priv->uncore, DISP_ARB_CTL) | - DISP_FBC_MEMORY_WAKE); + intel_uncore_rmw(&dev_priv->uncore, DISP_ARB_CTL, 0, DISP_FBC_MEMORY_WAKE); } static void bxt_init_clock_gating(struct drm_i915_private *dev_priv) @@ -84,15 +81,13 @@ static void bxt_init_clock_gating(struct drm_i915_private *dev_priv) gen9_init_clock_gating(dev_priv); /* WaDisableSDEUnitClockGating:bxt */ - intel_uncore_write(&dev_priv->uncore, GEN8_UCGCTL6, intel_uncore_read(&dev_priv->uncore, GEN8_UCGCTL6) | - GEN8_SDEUNIT_CLOCK_GATE_DISABLE); + intel_uncore_rmw(&dev_priv->uncore, GEN8_UCGCTL6, 0, GEN8_SDEUNIT_CLOCK_GATE_DISABLE); /* * FIXME: * GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT SKUs only. */ - intel_uncore_write(&dev_priv->uncore, GEN8_UCGCTL6, intel_uncore_read(&dev_priv->uncore, GEN8_UCGCTL6) | - GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ); + intel_uncore_rmw(&dev_priv->uncore, GEN8_UCGCTL6, 0, GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ); /* * Wa: Backlight PWM may stop in the asserted state, causing backlight @@ -113,16 +108,13 @@ static void bxt_init_clock_gating(struct drm_i915_private *dev_priv) * WaFbcTurnOffFbcWatermark:bxt * Display WA #0562: bxt */ - intel_uncore_write(&dev_priv->uncore, DISP_ARB_CTL, intel_uncore_read(&dev_priv->uncore, DISP_ARB_CTL) | - DISP_FBC_WM_DIS); + intel_uncore_rmw(&dev_priv->uncore, DISP_ARB_CTL, 0, DISP_FBC_WM_DIS); /* * WaFbcHighMemBwCorruptionAvoidance:bxt * Display WA #0883: bxt */ - intel_uncore_write(&dev_priv->uncore, ILK_DPFC_CHICKEN(INTEL_FBC_A), - intel_uncore_read(&dev_priv->uncore, ILK_DPFC_CHICKEN(INTEL_FBC_A)) | - DPFC_DISABLE_DUMMY0); + intel_uncore_rmw(&dev_priv->uncore, ILK_DPFC_CHICKEN(INTEL_FBC_A), 0, DPFC_DISABLE_DUMMY0); } static void glk_init_clock_gating(struct drm_i915_private *dev_priv) @@ -4053,9 +4045,9 @@ void vlv_wm_sanitize(struct drm_i915_private *dev_priv) */ static void ilk_init_lp_watermarks(struct drm_i915_private *dev_priv) { - intel_uncore_write(&dev_priv->uncore, WM3_LP_ILK, intel_uncore_read(&dev_priv->uncore, WM3_LP_ILK) & ~WM_LP_ENABLE); - intel_uncore_write(&dev_priv->uncore, WM2_LP_ILK, intel_uncore_read(&dev_priv->uncore, WM2_LP_ILK) & ~WM_LP_ENABLE); - intel_uncore_write(&dev_priv->uncore, WM1_LP_ILK, intel_uncore_read(&dev_priv->uncore, WM1_LP_ILK) & ~WM_LP_ENABLE); + intel_uncore_rmw(&dev_priv->uncore, WM3_LP_ILK, WM_LP_ENABLE, 0); + intel_uncore_rmw(&dev_priv->uncore, WM2_LP_ILK, WM_LP_ENABLE, 0); + intel_uncore_rmw(&dev_priv->uncore, WM1_LP_ILK, WM_LP_ENABLE, 0); /* * Don't touch WM_LP_SPRITE_ENABLE here. @@ -4109,9 +4101,7 @@ static void g4x_disable_trickle_feed(struct drm_i915_private *dev_priv) enum pipe pipe; for_each_pipe(dev_priv, pipe) { - intel_uncore_write(&dev_priv->uncore, DSPCNTR(pipe), - intel_uncore_read(&dev_priv->uncore, DSPCNTR(pipe)) | - DISP_TRICKLE_FEED_DISABLE); + intel_uncore_rmw(&dev_priv->uncore, DSPCNTR(pipe), 0, DISP_TRICKLE_FEED_DISABLE); intel_uncore_rmw(&dev_priv->uncore, DSPSURF(pipe), 0, 0); intel_uncore_posting_read(&dev_priv->uncore, DSPSURF(pipe)); @@ -4160,19 +4150,13 @@ static void ilk_init_clock_gating(struct drm_i915_private *dev_priv) */ if (IS_IRONLAKE_M(dev_priv)) { /* WaFbcAsynchFlipDisableFbcQueue:ilk */ - intel_uncore_write(&dev_priv->uncore, ILK_DISPLAY_CHICKEN1, - intel_uncore_read(&dev_priv->uncore, ILK_DISPLAY_CHICKEN1) | - ILK_FBCQ_DIS); - intel_uncore_write(&dev_priv->uncore, ILK_DISPLAY_CHICKEN2, - intel_uncore_read(&dev_priv->uncore, ILK_DISPLAY_CHICKEN2) | - ILK_DPARB_GATE); + intel_uncore_rmw(&dev_priv->uncore, ILK_DISPLAY_CHICKEN1, 0, ILK_FBCQ_DIS); + intel_uncore_rmw(&dev_priv->uncore, ILK_DISPLAY_CHICKEN2, 0, ILK_DPARB_GATE); } intel_uncore_write(&dev_priv->uncore, ILK_DSPCLK_GATE_D, dspclk_gate); - intel_uncore_write(&dev_priv->uncore, ILK_DISPLAY_CHICKEN2, - intel_uncore_read(&dev_priv->uncore, ILK_DISPLAY_CHICKEN2) | - ILK_ELPIN_409_SELECT); + intel_uncore_rmw(&dev_priv->uncore, ILK_DISPLAY_CHICKEN2, 0, ILK_ELPIN_409_SELECT); g4x_disable_trickle_feed(dev_priv); @@ -4192,8 +4176,7 @@ static void cpt_init_clock_gating(struct drm_i915_private *dev_priv) intel_uncore_write(&dev_priv->uncore, SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE | PCH_DPLUNIT_CLOCK_GATE_DISABLE | PCH_CPUNIT_CLOCK_GATE_DISABLE); - intel_uncore_write(&dev_priv->uncore, SOUTH_CHICKEN2, intel_uncore_read(&dev_priv->uncore, SOUTH_CHICKEN2) | - DPLS_EDP_PPS_FIX_DIS); + intel_uncore_rmw(&dev_priv->uncore, SOUTH_CHICKEN2, 0, DPLS_EDP_PPS_FIX_DIS); /* The below fixes the weird display corruption, a few pixels shifted * downward, on (only) LVDS of some HP laptops with IVY. */ @@ -4231,9 +4214,7 @@ static void gen6_init_clock_gating(struct drm_i915_private *dev_priv) intel_uncore_write(&dev_priv->uncore, ILK_DSPCLK_GATE_D, dspclk_gate); - intel_uncore_write(&dev_priv->uncore, ILK_DISPLAY_CHICKEN2, - intel_uncore_read(&dev_priv->uncore, ILK_DISPLAY_CHICKEN2) | - ILK_ELPIN_409_SELECT); + intel_uncore_rmw(&dev_priv->uncore, ILK_DISPLAY_CHICKEN2, 0, ILK_ELPIN_409_SELECT); intel_uncore_write(&dev_priv->uncore, GEN6_UCGCTL1, intel_uncore_read(&dev_priv->uncore, GEN6_UCGCTL1) | @@ -4293,14 +4274,12 @@ static void lpt_init_clock_gating(struct drm_i915_private *dev_priv) * disabled when not needed anymore in order to save power. */ if (HAS_PCH_LPT_LP(dev_priv)) - intel_uncore_write(&dev_priv->uncore, SOUTH_DSPCLK_GATE_D, - intel_uncore_read(&dev_priv->uncore, SOUTH_DSPCLK_GATE_D) | - PCH_LP_PARTITION_LEVEL_DISABLE); + intel_uncore_rmw(&dev_priv->uncore, SOUTH_DSPCLK_GATE_D, + 0, PCH_LP_PARTITION_LEVEL_DISABLE); /* WADPOClockGatingDisable:hsw */ - intel_uncore_write(&dev_priv->uncore, TRANS_CHICKEN1(PIPE_A), - intel_uncore_read(&dev_priv->uncore, TRANS_CHICKEN1(PIPE_A)) | - TRANS_CHICKEN1_DP0UNIT_GC_DISABLE); + intel_uncore_rmw(&dev_priv->uncore, TRANS_CHICKEN1(PIPE_A), + 0, TRANS_CHICKEN1_DP0UNIT_GC_DISABLE); } static void lpt_suspend_hw(struct drm_i915_private *dev_priv) @@ -4321,22 +4300,22 @@ static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv, u32 val; /* WaTempDisableDOPClkGating:bdw */ - misccpctl = intel_uncore_rmw(&dev_priv->uncore, GEN7_MISCCPCTL, - GEN7_DOP_CLOCK_GATE_ENABLE, 0); + misccpctl = intel_gt_mcr_multicast_rmw(to_gt(dev_priv), GEN8_MISCCPCTL, + GEN8_DOP_CLOCK_GATE_ENABLE, 0); - val = intel_uncore_read(&dev_priv->uncore, GEN8_L3SQCREG1); + val = intel_gt_mcr_read_any(to_gt(dev_priv), GEN8_L3SQCREG1); val &= ~L3_PRIO_CREDITS_MASK; val |= L3_GENERAL_PRIO_CREDITS(general_prio_credits); val |= L3_HIGH_PRIO_CREDITS(high_prio_credits); - intel_uncore_write(&dev_priv->uncore, GEN8_L3SQCREG1, val); + intel_gt_mcr_multicast_write(to_gt(dev_priv), GEN8_L3SQCREG1, val); /* * Wait at least 100 clocks before re-enabling clock gating. * See the definition of L3SQCREG1 in BSpec. */ - intel_uncore_posting_read(&dev_priv->uncore, GEN8_L3SQCREG1); + intel_gt_mcr_read_any(to_gt(dev_priv), GEN8_L3SQCREG1); udelay(1); - intel_uncore_write(&dev_priv->uncore, GEN7_MISCCPCTL, misccpctl); + intel_gt_mcr_multicast_write(to_gt(dev_priv), GEN8_MISCCPCTL, misccpctl); } static void icl_init_clock_gating(struct drm_i915_private *dev_priv) @@ -4359,8 +4338,7 @@ static void gen12lp_init_clock_gating(struct drm_i915_private *dev_priv) /* Wa_1409825376:tgl (pre-prod)*/ if (IS_TGL_DISPLAY_STEP(dev_priv, STEP_A0, STEP_C0)) - intel_uncore_write(&dev_priv->uncore, GEN9_CLKGATE_DIS_3, intel_uncore_read(&dev_priv->uncore, GEN9_CLKGATE_DIS_3) | - TGL_VRH_GATING_DIS); + intel_uncore_rmw(&dev_priv->uncore, GEN9_CLKGATE_DIS_3, 0, TGL_VRH_GATING_DIS); /* Wa_14013723622:tgl,rkl,dg1,adl-s */ if (DISPLAY_VER(dev_priv) == 12) @@ -4385,8 +4363,7 @@ static void dg1_init_clock_gating(struct drm_i915_private *dev_priv) /* Wa_1409836686:dg1[a0] */ if (IS_DG1_GRAPHICS_STEP(dev_priv, STEP_A0, STEP_B0)) - intel_uncore_write(&dev_priv->uncore, GEN9_CLKGATE_DIS_3, intel_uncore_read(&dev_priv->uncore, GEN9_CLKGATE_DIS_3) | - DPT_GATING_DIS); + intel_uncore_rmw(&dev_priv->uncore, GEN9_CLKGATE_DIS_3, 0, DPT_GATING_DIS); } static void xehpsdv_init_clock_gating(struct drm_i915_private *dev_priv) @@ -4428,8 +4405,7 @@ static void cnp_init_clock_gating(struct drm_i915_private *dev_priv) return; /* Display WA #1181 WaSouthDisplayDisablePWMCGEGating: cnp */ - intel_uncore_write(&dev_priv->uncore, SOUTH_DSPCLK_GATE_D, intel_uncore_read(&dev_priv->uncore, SOUTH_DSPCLK_GATE_D) | - CNP_PWM_CGE_GATING_DISABLE); + intel_uncore_rmw(&dev_priv->uncore, SOUTH_DSPCLK_GATE_D, 0, CNP_PWM_CGE_GATING_DISABLE); } static void cfl_init_clock_gating(struct drm_i915_private *dev_priv) @@ -4438,23 +4414,20 @@ static void cfl_init_clock_gating(struct drm_i915_private *dev_priv) gen9_init_clock_gating(dev_priv); /* WAC6entrylatency:cfl */ - intel_uncore_write(&dev_priv->uncore, FBC_LLC_READ_CTRL, intel_uncore_read(&dev_priv->uncore, FBC_LLC_READ_CTRL) | - FBC_LLC_FULLY_OPEN); + intel_uncore_rmw(&dev_priv->uncore, FBC_LLC_READ_CTRL, 0, FBC_LLC_FULLY_OPEN); /* * WaFbcTurnOffFbcWatermark:cfl * Display WA #0562: cfl */ - intel_uncore_write(&dev_priv->uncore, DISP_ARB_CTL, intel_uncore_read(&dev_priv->uncore, DISP_ARB_CTL) | - DISP_FBC_WM_DIS); + intel_uncore_rmw(&dev_priv->uncore, DISP_ARB_CTL, 0, DISP_FBC_WM_DIS); /* * WaFbcNukeOnHostModify:cfl * Display WA #0873: cfl */ - intel_uncore_write(&dev_priv->uncore, ILK_DPFC_CHICKEN(INTEL_FBC_A), - intel_uncore_read(&dev_priv->uncore, ILK_DPFC_CHICKEN(INTEL_FBC_A)) | - DPFC_NUKE_ON_ANY_MODIFICATION); + intel_uncore_rmw(&dev_priv->uncore, ILK_DPFC_CHICKEN(INTEL_FBC_A), + 0, DPFC_NUKE_ON_ANY_MODIFICATION); } static void kbl_init_clock_gating(struct drm_i915_private *dev_priv) @@ -4462,33 +4435,30 @@ static void kbl_init_clock_gating(struct drm_i915_private *dev_priv) gen9_init_clock_gating(dev_priv); /* WAC6entrylatency:kbl */ - intel_uncore_write(&dev_priv->uncore, FBC_LLC_READ_CTRL, intel_uncore_read(&dev_priv->uncore, FBC_LLC_READ_CTRL) | - FBC_LLC_FULLY_OPEN); + intel_uncore_rmw(&dev_priv->uncore, FBC_LLC_READ_CTRL, 0, FBC_LLC_FULLY_OPEN); /* WaDisableSDEUnitClockGating:kbl */ if (IS_KBL_GRAPHICS_STEP(dev_priv, 0, STEP_C0)) - intel_uncore_write(&dev_priv->uncore, GEN8_UCGCTL6, intel_uncore_read(&dev_priv->uncore, GEN8_UCGCTL6) | - GEN8_SDEUNIT_CLOCK_GATE_DISABLE); + intel_uncore_rmw(&dev_priv->uncore, GEN8_UCGCTL6, + 0, GEN8_SDEUNIT_CLOCK_GATE_DISABLE); /* WaDisableGamClockGating:kbl */ if (IS_KBL_GRAPHICS_STEP(dev_priv, 0, STEP_C0)) - intel_uncore_write(&dev_priv->uncore, GEN6_UCGCTL1, intel_uncore_read(&dev_priv->uncore, GEN6_UCGCTL1) | - GEN6_GAMUNIT_CLOCK_GATE_DISABLE); + intel_uncore_rmw(&dev_priv->uncore, GEN6_UCGCTL1, + 0, GEN6_GAMUNIT_CLOCK_GATE_DISABLE); /* * WaFbcTurnOffFbcWatermark:kbl * Display WA #0562: kbl */ - intel_uncore_write(&dev_priv->uncore, DISP_ARB_CTL, intel_uncore_read(&dev_priv->uncore, DISP_ARB_CTL) | - DISP_FBC_WM_DIS); + intel_uncore_rmw(&dev_priv->uncore, DISP_ARB_CTL, 0, DISP_FBC_WM_DIS); /* * WaFbcNukeOnHostModify:kbl * Display WA #0873: kbl */ - intel_uncore_write(&dev_priv->uncore, ILK_DPFC_CHICKEN(INTEL_FBC_A), - intel_uncore_read(&dev_priv->uncore, ILK_DPFC_CHICKEN(INTEL_FBC_A)) | - DPFC_NUKE_ON_ANY_MODIFICATION); + intel_uncore_rmw(&dev_priv->uncore, ILK_DPFC_CHICKEN(INTEL_FBC_A), + 0, DPFC_NUKE_ON_ANY_MODIFICATION); } static void skl_init_clock_gating(struct drm_i915_private *dev_priv) @@ -4496,35 +4466,30 @@ static void skl_init_clock_gating(struct drm_i915_private *dev_priv) gen9_init_clock_gating(dev_priv); /* WaDisableDopClockGating:skl */ - intel_uncore_write(&dev_priv->uncore, GEN7_MISCCPCTL, intel_uncore_read(&dev_priv->uncore, GEN7_MISCCPCTL) & - ~GEN7_DOP_CLOCK_GATE_ENABLE); + intel_gt_mcr_multicast_rmw(to_gt(dev_priv), GEN8_MISCCPCTL, + GEN8_DOP_CLOCK_GATE_ENABLE, 0); /* WAC6entrylatency:skl */ - intel_uncore_write(&dev_priv->uncore, FBC_LLC_READ_CTRL, intel_uncore_read(&dev_priv->uncore, FBC_LLC_READ_CTRL) | - FBC_LLC_FULLY_OPEN); + intel_uncore_rmw(&dev_priv->uncore, FBC_LLC_READ_CTRL, 0, FBC_LLC_FULLY_OPEN); /* * WaFbcTurnOffFbcWatermark:skl * Display WA #0562: skl */ - intel_uncore_write(&dev_priv->uncore, DISP_ARB_CTL, intel_uncore_read(&dev_priv->uncore, DISP_ARB_CTL) | - DISP_FBC_WM_DIS); + intel_uncore_rmw(&dev_priv->uncore, DISP_ARB_CTL, 0, DISP_FBC_WM_DIS); /* * WaFbcNukeOnHostModify:skl * Display WA #0873: skl */ - intel_uncore_write(&dev_priv->uncore, ILK_DPFC_CHICKEN(INTEL_FBC_A), - intel_uncore_read(&dev_priv->uncore, ILK_DPFC_CHICKEN(INTEL_FBC_A)) | - DPFC_NUKE_ON_ANY_MODIFICATION); + intel_uncore_rmw(&dev_priv->uncore, ILK_DPFC_CHICKEN(INTEL_FBC_A), + 0, DPFC_NUKE_ON_ANY_MODIFICATION); /* * WaFbcHighMemBwCorruptionAvoidance:skl * Display WA #0883: skl */ - intel_uncore_write(&dev_priv->uncore, ILK_DPFC_CHICKEN(INTEL_FBC_A), - intel_uncore_read(&dev_priv->uncore, ILK_DPFC_CHICKEN(INTEL_FBC_A)) | - DPFC_DISABLE_DUMMY0); + intel_uncore_rmw(&dev_priv->uncore, ILK_DPFC_CHICKEN(INTEL_FBC_A), 0, DPFC_DISABLE_DUMMY0); } static void bdw_init_clock_gating(struct drm_i915_private *dev_priv) @@ -4532,43 +4497,37 @@ static void bdw_init_clock_gating(struct drm_i915_private *dev_priv) enum pipe pipe; /* WaFbcAsynchFlipDisableFbcQueue:hsw,bdw */ - intel_uncore_write(&dev_priv->uncore, CHICKEN_PIPESL_1(PIPE_A), - intel_uncore_read(&dev_priv->uncore, CHICKEN_PIPESL_1(PIPE_A)) | - HSW_FBCQ_DIS); + intel_uncore_rmw(&dev_priv->uncore, CHICKEN_PIPESL_1(PIPE_A), 0, HSW_FBCQ_DIS); /* WaSwitchSolVfFArbitrationPriority:bdw */ - intel_uncore_write(&dev_priv->uncore, GAM_ECOCHK, intel_uncore_read(&dev_priv->uncore, GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); + intel_uncore_rmw(&dev_priv->uncore, GAM_ECOCHK, 0, HSW_ECOCHK_ARB_PRIO_SOL); /* WaPsrDPAMaskVBlankInSRD:bdw */ - intel_uncore_write(&dev_priv->uncore, CHICKEN_PAR1_1, - intel_uncore_read(&dev_priv->uncore, CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD); + intel_uncore_rmw(&dev_priv->uncore, CHICKEN_PAR1_1, 0, DPA_MASK_VBLANK_SRD); for_each_pipe(dev_priv, pipe) { /* WaPsrDPRSUnmaskVBlankInSRD:bdw */ - intel_uncore_write(&dev_priv->uncore, CHICKEN_PIPESL_1(pipe), - intel_uncore_read(&dev_priv->uncore, CHICKEN_PIPESL_1(pipe)) | - BDW_DPRS_MASK_VBLANK_SRD); + intel_uncore_rmw(&dev_priv->uncore, CHICKEN_PIPESL_1(pipe), + 0, BDW_DPRS_MASK_VBLANK_SRD); } /* WaVSRefCountFullforceMissDisable:bdw */ /* WaDSRefCountFullforceMissDisable:bdw */ - intel_uncore_write(&dev_priv->uncore, GEN7_FF_THREAD_MODE, - intel_uncore_read(&dev_priv->uncore, GEN7_FF_THREAD_MODE) & - ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME)); + intel_uncore_rmw(&dev_priv->uncore, GEN7_FF_THREAD_MODE, + GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME, 0); intel_uncore_write(&dev_priv->uncore, RING_PSMI_CTL(RENDER_RING_BASE), _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE)); /* WaDisableSDEUnitClockGating:bdw */ - intel_uncore_write(&dev_priv->uncore, GEN8_UCGCTL6, intel_uncore_read(&dev_priv->uncore, GEN8_UCGCTL6) | - GEN8_SDEUNIT_CLOCK_GATE_DISABLE); + intel_uncore_rmw(&dev_priv->uncore, GEN8_UCGCTL6, 0, GEN8_SDEUNIT_CLOCK_GATE_DISABLE); /* WaProgramL3SqcReg1Default:bdw */ gen8_set_l3sqc_credits(dev_priv, 30, 2); /* WaKVMNotificationOnConfigChange:bdw */ - intel_uncore_write(&dev_priv->uncore, CHICKEN_PAR2_1, intel_uncore_read(&dev_priv->uncore, CHICKEN_PAR2_1) - | KVM_CONFIG_CHANGE_NOTIFICATION_SELECT); + intel_uncore_rmw(&dev_priv->uncore, CHICKEN_PAR2_1, + 0, KVM_CONFIG_CHANGE_NOTIFICATION_SELECT); lpt_init_clock_gating(dev_priv); @@ -4577,24 +4536,20 @@ static void bdw_init_clock_gating(struct drm_i915_private *dev_priv) * Also see the CHICKEN2 write in bdw_init_workarounds() to disable DOP * clock gating. */ - intel_uncore_write(&dev_priv->uncore, GEN6_UCGCTL1, - intel_uncore_read(&dev_priv->uncore, GEN6_UCGCTL1) | GEN6_EU_TCUNIT_CLOCK_GATE_DISABLE); + intel_uncore_rmw(&dev_priv->uncore, GEN6_UCGCTL1, 0, GEN6_EU_TCUNIT_CLOCK_GATE_DISABLE); } static void hsw_init_clock_gating(struct drm_i915_private *dev_priv) { /* WaFbcAsynchFlipDisableFbcQueue:hsw,bdw */ - intel_uncore_write(&dev_priv->uncore, CHICKEN_PIPESL_1(PIPE_A), - intel_uncore_read(&dev_priv->uncore, CHICKEN_PIPESL_1(PIPE_A)) | - HSW_FBCQ_DIS); + intel_uncore_rmw(&dev_priv->uncore, CHICKEN_PIPESL_1(PIPE_A), 0, HSW_FBCQ_DIS); /* This is required by WaCatErrorRejectionIssue:hsw */ - intel_uncore_write(&dev_priv->uncore, GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, - intel_uncore_read(&dev_priv->uncore, GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | - GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); + intel_uncore_rmw(&dev_priv->uncore, GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, + 0, GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); /* WaSwitchSolVfFArbitrationPriority:hsw */ - intel_uncore_write(&dev_priv->uncore, GAM_ECOCHK, intel_uncore_read(&dev_priv->uncore, GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); + intel_uncore_rmw(&dev_priv->uncore, GAM_ECOCHK, 0, HSW_ECOCHK_ARB_PRIO_SOL); lpt_init_clock_gating(dev_priv); } @@ -4604,9 +4559,7 @@ static void ivb_init_clock_gating(struct drm_i915_private *dev_priv) intel_uncore_write(&dev_priv->uncore, ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE); /* WaFbcAsynchFlipDisableFbcQueue:ivb */ - intel_uncore_write(&dev_priv->uncore, ILK_DISPLAY_CHICKEN1, - intel_uncore_read(&dev_priv->uncore, ILK_DISPLAY_CHICKEN1) | - ILK_FBCQ_DIS); + intel_uncore_rmw(&dev_priv->uncore, ILK_DISPLAY_CHICKEN1, 0, ILK_FBCQ_DIS); /* WaDisableBackToBackFlipFix:ivb */ intel_uncore_write(&dev_priv->uncore, IVB_CHICKEN3, @@ -4632,9 +4585,8 @@ static void ivb_init_clock_gating(struct drm_i915_private *dev_priv) GEN6_RCZUNIT_CLOCK_GATE_DISABLE); /* This is required by WaCatErrorRejectionIssue:ivb */ - intel_uncore_write(&dev_priv->uncore, GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, - intel_uncore_read(&dev_priv->uncore, GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | - GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); + intel_uncore_rmw(&dev_priv->uncore, GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, + 0, GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); g4x_disable_trickle_feed(dev_priv); @@ -4659,9 +4611,8 @@ static void vlv_init_clock_gating(struct drm_i915_private *dev_priv) _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); /* This is required by WaCatErrorRejectionIssue:vlv */ - intel_uncore_write(&dev_priv->uncore, GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, - intel_uncore_read(&dev_priv->uncore, GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | - GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); + intel_uncore_rmw(&dev_priv->uncore, GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, + 0, GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); /* * According to the spec, bit 13 (RCZUNIT) must be set on IVB. @@ -4673,8 +4624,7 @@ static void vlv_init_clock_gating(struct drm_i915_private *dev_priv) /* WaDisableL3Bank2xClockGate:vlv * Disabling L3 clock gating- MMIO 940c[25] = 1 * Set bit 25, to disable L3_BANK_2x_CLK_GATING */ - intel_uncore_write(&dev_priv->uncore, GEN7_UCGCTL4, - intel_uncore_read(&dev_priv->uncore, GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE); + intel_uncore_rmw(&dev_priv->uncore, GEN7_UCGCTL4, 0, GEN7_L3BANK2X_CLOCK_GATE_DISABLE); /* * WaDisableVLVClockGating_VBIIssue:vlv @@ -4688,21 +4638,18 @@ static void chv_init_clock_gating(struct drm_i915_private *dev_priv) { /* WaVSRefCountFullforceMissDisable:chv */ /* WaDSRefCountFullforceMissDisable:chv */ - intel_uncore_write(&dev_priv->uncore, GEN7_FF_THREAD_MODE, - intel_uncore_read(&dev_priv->uncore, GEN7_FF_THREAD_MODE) & - ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME)); + intel_uncore_rmw(&dev_priv->uncore, GEN7_FF_THREAD_MODE, + GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME, 0); /* WaDisableSemaphoreAndSyncFlipWait:chv */ intel_uncore_write(&dev_priv->uncore, RING_PSMI_CTL(RENDER_RING_BASE), _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE)); /* WaDisableCSUnitClockGating:chv */ - intel_uncore_write(&dev_priv->uncore, GEN6_UCGCTL1, intel_uncore_read(&dev_priv->uncore, GEN6_UCGCTL1) | - GEN6_CSUNIT_CLOCK_GATE_DISABLE); + intel_uncore_rmw(&dev_priv->uncore, GEN6_UCGCTL1, 0, GEN6_CSUNIT_CLOCK_GATE_DISABLE); /* WaDisableSDEUnitClockGating:chv */ - intel_uncore_write(&dev_priv->uncore, GEN8_UCGCTL6, intel_uncore_read(&dev_priv->uncore, GEN8_UCGCTL6) | - GEN8_SDEUNIT_CLOCK_GATE_DISABLE); + intel_uncore_rmw(&dev_priv->uncore, GEN8_UCGCTL6, 0, GEN8_SDEUNIT_CLOCK_GATE_DISABLE); /* * WaProgramL3SqcReg1Default:chv diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 744cca507946..129746713d07 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -633,6 +633,8 @@ void intel_runtime_pm_driver_release(struct intel_runtime_pm *rpm) runtime_pm); int count = atomic_read(&rpm->wakeref_count); + intel_wakeref_auto_fini(&rpm->userfault_wakeref); + drm_WARN(&i915->drm, count, "i915 raw-wakerefs=%d wakelocks=%d on cleanup\n", intel_rpm_raw_wakeref_count(count), @@ -652,4 +654,7 @@ void intel_runtime_pm_init_early(struct intel_runtime_pm *rpm) rpm->available = HAS_RUNTIME_PM(i915); init_intel_runtime_pm_wakeref(rpm); + INIT_LIST_HEAD(&rpm->lmem_userfault_list); + spin_lock_init(&rpm->lmem_userfault_lock); + intel_wakeref_auto_init(&rpm->userfault_wakeref, rpm); } diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.h b/drivers/gpu/drm/i915/intel_runtime_pm.h index d9160e3ff4af..98b8b28baaa1 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.h +++ b/drivers/gpu/drm/i915/intel_runtime_pm.h @@ -53,6 +53,28 @@ struct intel_runtime_pm { bool irqs_enabled; bool no_wakeref_tracking; + /* + * Protects access to lmem usefault list. + * It is required, if we are outside of the runtime suspend path, + * access to @lmem_userfault_list requires always first grabbing the + * runtime pm, to ensure we can't race against runtime suspend. + * Once we have that we also need to grab @lmem_userfault_lock, + * at which point we have exclusive access. + * The runtime suspend path is special since it doesn't really hold any locks, + * but instead has exclusive access by virtue of all other accesses requiring + * holding the runtime pm wakeref. + */ + spinlock_t lmem_userfault_lock; + + /* + * Keep list of userfaulted gem obj, which require to release their + * mmap mappings at runtime suspend path. + */ + struct list_head lmem_userfault_list; + + /* Manual runtime pm autosuspend delay for user GGTT/lmem mmaps */ + struct intel_wakeref_auto userfault_wakeref; + #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM) /* * To aide detection of wakeref leaks and general misuse, we diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 5cd423c7b646..2a3e2869fe71 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -104,6 +104,7 @@ static const char * const forcewake_domain_names[] = { "vebox1", "vebox2", "vebox3", + "gsc", }; const char * @@ -888,10 +889,13 @@ void assert_forcewakes_active(struct intel_uncore *uncore, spin_unlock_irq(&uncore->lock); } -/* We give fast paths for the really cool registers */ +/* + * We give fast paths for the really cool registers. The second range includes + * media domains (and the GSC starting from Xe_LPM+) + */ #define NEEDS_FORCE_WAKE(reg) ({ \ u32 __reg = (reg); \ - __reg < 0x40000 || __reg >= GEN11_BSD_RING_BASE; \ + __reg < 0x40000 || __reg >= 0x116000; \ }) static int fw_range_cmp(u32 offset, const struct intel_forcewake_range *entry) @@ -1131,6 +1135,45 @@ static const struct i915_range pvc_shadowed_regs[] = { { .start = 0x1F8510, .end = 0x1F8550 }, }; +static const struct i915_range mtl_shadowed_regs[] = { + { .start = 0x2030, .end = 0x2030 }, + { .start = 0x2510, .end = 0x2550 }, + { .start = 0xA008, .end = 0xA00C }, + { .start = 0xA188, .end = 0xA188 }, + { .start = 0xA278, .end = 0xA278 }, + { .start = 0xA540, .end = 0xA56C }, + { .start = 0xC050, .end = 0xC050 }, + { .start = 0xC340, .end = 0xC340 }, + { .start = 0xC4C8, .end = 0xC4C8 }, + { .start = 0xC4E0, .end = 0xC4E0 }, + { .start = 0xC600, .end = 0xC600 }, + { .start = 0xC658, .end = 0xC658 }, + { .start = 0xCFD4, .end = 0xCFDC }, + { .start = 0x22030, .end = 0x22030 }, + { .start = 0x22510, .end = 0x22550 }, +}; + +static const struct i915_range xelpmp_shadowed_regs[] = { + { .start = 0x1C0030, .end = 0x1C0030 }, + { .start = 0x1C0510, .end = 0x1C0550 }, + { .start = 0x1C8030, .end = 0x1C8030 }, + { .start = 0x1C8510, .end = 0x1C8550 }, + { .start = 0x1D0030, .end = 0x1D0030 }, + { .start = 0x1D0510, .end = 0x1D0550 }, + { .start = 0x38A008, .end = 0x38A00C }, + { .start = 0x38A188, .end = 0x38A188 }, + { .start = 0x38A278, .end = 0x38A278 }, + { .start = 0x38A540, .end = 0x38A56C }, + { .start = 0x38A618, .end = 0x38A618 }, + { .start = 0x38C050, .end = 0x38C050 }, + { .start = 0x38C340, .end = 0x38C340 }, + { .start = 0x38C4C8, .end = 0x38C4C8 }, + { .start = 0x38C4E0, .end = 0x38C4E4 }, + { .start = 0x38C600, .end = 0x38C600 }, + { .start = 0x38C658, .end = 0x38C658 }, + { .start = 0x38CFD4, .end = 0x38CFDC }, +}; + static int mmio_range_cmp(u32 key, const struct i915_range *range) { if (key < range->start) @@ -1639,25 +1682,27 @@ static const struct intel_forcewake_range __pvc_fw_ranges[] = { GEN_FW_RANGE(0x12000, 0x12fff, 0), /* 0x12000 - 0x127ff: always on 0x12800 - 0x12fff: reserved */ - GEN_FW_RANGE(0x13000, 0x23fff, FORCEWAKE_GT), /* + GEN_FW_RANGE(0x13000, 0x19fff, FORCEWAKE_GT), /* 0x13000 - 0x135ff: gt 0x13600 - 0x147ff: reserved 0x14800 - 0x153ff: gt - 0x15400 - 0x19fff: reserved - 0x1a000 - 0x1ffff: gt - 0x20000 - 0x21fff: reserved - 0x22000 - 0x23fff: gt */ + 0x15400 - 0x19fff: reserved */ + GEN_FW_RANGE(0x1a000, 0x21fff, FORCEWAKE_RENDER), /* + 0x1a000 - 0x1ffff: render + 0x20000 - 0x21fff: reserved */ + GEN_FW_RANGE(0x22000, 0x23fff, FORCEWAKE_GT), GEN_FW_RANGE(0x24000, 0x2417f, 0), /* 24000 - 0x2407f: always on 24080 - 0x2417f: reserved */ - GEN_FW_RANGE(0x24180, 0x3ffff, FORCEWAKE_GT), /* + GEN_FW_RANGE(0x24180, 0x25fff, FORCEWAKE_GT), /* 0x24180 - 0x241ff: gt 0x24200 - 0x251ff: reserved 0x25200 - 0x252ff: gt - 0x25300 - 0x25fff: reserved - 0x26000 - 0x27fff: gt - 0x28000 - 0x2ffff: reserved - 0x30000 - 0x3ffff: gt */ + 0x25300 - 0x25fff: reserved */ + GEN_FW_RANGE(0x26000, 0x2ffff, FORCEWAKE_RENDER), /* + 0x26000 - 0x27fff: render + 0x28000 - 0x2ffff: reserved */ + GEN_FW_RANGE(0x30000, 0x3ffff, FORCEWAKE_GT), GEN_FW_RANGE(0x40000, 0x1bffff, 0), GEN_FW_RANGE(0x1c0000, 0x1c3fff, FORCEWAKE_MEDIA_VDBOX0), /* 0x1c0000 - 0x1c2bff: VD0 @@ -1679,6 +1724,162 @@ static const struct intel_forcewake_range __pvc_fw_ranges[] = { GEN_FW_RANGE(0x3e0000, 0x3effff, FORCEWAKE_GT), }; +static const struct intel_forcewake_range __mtl_fw_ranges[] = { + GEN_FW_RANGE(0x0, 0xaff, 0), + GEN_FW_RANGE(0xb00, 0xbff, FORCEWAKE_GT), + GEN_FW_RANGE(0xc00, 0xfff, 0), + GEN_FW_RANGE(0x1000, 0x1fff, FORCEWAKE_GT), + GEN_FW_RANGE(0x2000, 0x26ff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x2700, 0x2fff, FORCEWAKE_GT), + GEN_FW_RANGE(0x3000, 0x3fff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x4000, 0x51ff, FORCEWAKE_GT), /* + 0x4000 - 0x48ff: render + 0x4900 - 0x51ff: reserved */ + GEN_FW_RANGE(0x5200, 0x7fff, FORCEWAKE_RENDER), /* + 0x5200 - 0x53ff: render + 0x5400 - 0x54ff: reserved + 0x5500 - 0x7fff: render */ + GEN_FW_RANGE(0x8000, 0x813f, FORCEWAKE_GT), + GEN_FW_RANGE(0x8140, 0x817f, FORCEWAKE_RENDER), /* + 0x8140 - 0x815f: render + 0x8160 - 0x817f: reserved */ + GEN_FW_RANGE(0x8180, 0x81ff, 0), + GEN_FW_RANGE(0x8200, 0x94cf, FORCEWAKE_GT), /* + 0x8200 - 0x87ff: gt + 0x8800 - 0x8dff: reserved + 0x8e00 - 0x8f7f: gt + 0x8f80 - 0x8fff: reserved + 0x9000 - 0x947f: gt + 0x9480 - 0x94cf: reserved */ + GEN_FW_RANGE(0x94d0, 0x955f, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x9560, 0x967f, 0), /* + 0x9560 - 0x95ff: always on + 0x9600 - 0x967f: reserved */ + GEN_FW_RANGE(0x9680, 0x97ff, FORCEWAKE_RENDER), /* + 0x9680 - 0x96ff: render + 0x9700 - 0x97ff: reserved */ + GEN_FW_RANGE(0x9800, 0xcfff, FORCEWAKE_GT), /* + 0x9800 - 0xb4ff: gt + 0xb500 - 0xbfff: reserved + 0xc000 - 0xcfff: gt */ + GEN_FW_RANGE(0xd000, 0xd7ff, 0), /* + 0xd000 - 0xd3ff: always on + 0xd400 - 0xd7ff: reserved */ + GEN_FW_RANGE(0xd800, 0xd87f, FORCEWAKE_RENDER), + GEN_FW_RANGE(0xd880, 0xdbff, FORCEWAKE_GT), + GEN_FW_RANGE(0xdc00, 0xdcff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0xdd00, 0xde7f, FORCEWAKE_GT), /* + 0xdd00 - 0xddff: gt + 0xde00 - 0xde7f: reserved */ + GEN_FW_RANGE(0xde80, 0xe8ff, FORCEWAKE_RENDER), /* + 0xde80 - 0xdfff: render + 0xe000 - 0xe0ff: reserved + 0xe100 - 0xe8ff: render */ + GEN_FW_RANGE(0xe900, 0xe9ff, FORCEWAKE_GT), + GEN_FW_RANGE(0xea00, 0x147ff, 0), /* + 0xea00 - 0x11fff: reserved + 0x12000 - 0x127ff: always on + 0x12800 - 0x147ff: reserved */ + GEN_FW_RANGE(0x14800, 0x19fff, FORCEWAKE_GT), /* + 0x14800 - 0x153ff: gt + 0x15400 - 0x19fff: reserved */ + GEN_FW_RANGE(0x1a000, 0x21fff, FORCEWAKE_RENDER), /* + 0x1a000 - 0x1bfff: render + 0x1c000 - 0x21fff: reserved */ + GEN_FW_RANGE(0x22000, 0x23fff, FORCEWAKE_GT), + GEN_FW_RANGE(0x24000, 0x2ffff, 0), /* + 0x24000 - 0x2407f: always on + 0x24080 - 0x2ffff: reserved */ + GEN_FW_RANGE(0x30000, 0x3ffff, FORCEWAKE_GT) +}; + +/* + * Note that the register ranges here are the final offsets after + * translation of the GSI block to the 0x380000 offset. + * + * NOTE: There are a couple MCR ranges near the bottom of this table + * that need to power up either VD0 or VD2 depending on which replicated + * instance of the register we're trying to access. Our forcewake logic + * at the moment doesn't have a good way to take steering into consideration, + * and the driver doesn't even access any registers in those ranges today, + * so for now we just mark those ranges as FORCEWAKE_ALL. That will ensure + * proper operation if we do start using the ranges in the future, and we + * can determine at that time whether it's worth adding extra complexity to + * the forcewake handling to take steering into consideration. + */ +static const struct intel_forcewake_range __xelpmp_fw_ranges[] = { + GEN_FW_RANGE(0x0, 0x115fff, 0), /* render GT range */ + GEN_FW_RANGE(0x116000, 0x11ffff, FORCEWAKE_GSC), /* + 0x116000 - 0x117fff: gsc + 0x118000 - 0x119fff: reserved + 0x11a000 - 0x11efff: gsc + 0x11f000 - 0x11ffff: reserved */ + GEN_FW_RANGE(0x120000, 0x1bffff, 0), /* non-GT range */ + GEN_FW_RANGE(0x1c0000, 0x1c7fff, FORCEWAKE_MEDIA_VDBOX0), /* + 0x1c0000 - 0x1c3dff: VD0 + 0x1c3e00 - 0x1c3eff: reserved + 0x1c3f00 - 0x1c3fff: VD0 + 0x1c4000 - 0x1c7fff: reserved */ + GEN_FW_RANGE(0x1c8000, 0x1cbfff, FORCEWAKE_MEDIA_VEBOX0), /* + 0x1c8000 - 0x1ca0ff: VE0 + 0x1ca100 - 0x1cbfff: reserved */ + GEN_FW_RANGE(0x1cc000, 0x1cffff, FORCEWAKE_MEDIA_VDBOX0), /* + 0x1cc000 - 0x1cdfff: VD0 + 0x1ce000 - 0x1cffff: reserved */ + GEN_FW_RANGE(0x1d0000, 0x1d7fff, FORCEWAKE_MEDIA_VDBOX2), /* + 0x1d0000 - 0x1d3dff: VD2 + 0x1d3e00 - 0x1d3eff: reserved + 0x1d4000 - 0x1d7fff: VD2 */ + GEN_FW_RANGE(0x1d8000, 0x1da0ff, FORCEWAKE_MEDIA_VEBOX1), + GEN_FW_RANGE(0x1da100, 0x380aff, 0), /* + 0x1da100 - 0x23ffff: reserved + 0x240000 - 0x37ffff: non-GT range + 0x380000 - 0x380aff: reserved */ + GEN_FW_RANGE(0x380b00, 0x380bff, FORCEWAKE_GT), + GEN_FW_RANGE(0x380c00, 0x380fff, 0), + GEN_FW_RANGE(0x381000, 0x38817f, FORCEWAKE_GT), /* + 0x381000 - 0x381fff: gt + 0x382000 - 0x383fff: reserved + 0x384000 - 0x384aff: gt + 0x384b00 - 0x3851ff: reserved + 0x385200 - 0x3871ff: gt + 0x387200 - 0x387fff: reserved + 0x388000 - 0x38813f: gt + 0x388140 - 0x38817f: reserved */ + GEN_FW_RANGE(0x388180, 0x3882ff, 0), /* + 0x388180 - 0x3881ff: always on + 0x388200 - 0x3882ff: reserved */ + GEN_FW_RANGE(0x388300, 0x38955f, FORCEWAKE_GT), /* + 0x388300 - 0x38887f: gt + 0x388880 - 0x388fff: reserved + 0x389000 - 0x38947f: gt + 0x389480 - 0x38955f: reserved */ + GEN_FW_RANGE(0x389560, 0x389fff, 0), /* + 0x389560 - 0x3895ff: always on + 0x389600 - 0x389fff: reserved */ + GEN_FW_RANGE(0x38a000, 0x38cfff, FORCEWAKE_GT), /* + 0x38a000 - 0x38afff: gt + 0x38b000 - 0x38bfff: reserved + 0x38c000 - 0x38cfff: gt */ + GEN_FW_RANGE(0x38d000, 0x38d11f, 0), + GEN_FW_RANGE(0x38d120, 0x391fff, FORCEWAKE_GT), /* + 0x38d120 - 0x38dfff: gt + 0x38e000 - 0x38efff: reserved + 0x38f000 - 0x38ffff: gt + 0x389000 - 0x391fff: reserved */ + GEN_FW_RANGE(0x392000, 0x392fff, 0), /* + 0x392000 - 0x3927ff: always on + 0x392800 - 0x292fff: reserved */ + GEN_FW_RANGE(0x393000, 0x3931ff, FORCEWAKE_GT), + GEN_FW_RANGE(0x393200, 0x39323f, FORCEWAKE_ALL), /* instance-based, see note above */ + GEN_FW_RANGE(0x393240, 0x3933ff, FORCEWAKE_GT), + GEN_FW_RANGE(0x393400, 0x3934ff, FORCEWAKE_ALL), /* instance-based, see note above */ + GEN_FW_RANGE(0x393500, 0x393c7f, 0), /* + 0x393500 - 0x393bff: reserved + 0x393c00 - 0x393c7f: always on */ + GEN_FW_RANGE(0x393c80, 0x393dff, FORCEWAKE_GT), +}; + static void ilk_dummy_write(struct intel_uncore *uncore) { @@ -2021,6 +2222,7 @@ static int __fw_domain_init(struct intel_uncore *uncore, BUILD_BUG_ON(FORCEWAKE_MEDIA_VEBOX1 != (1 << FW_DOMAIN_ID_MEDIA_VEBOX1)); BUILD_BUG_ON(FORCEWAKE_MEDIA_VEBOX2 != (1 << FW_DOMAIN_ID_MEDIA_VEBOX2)); BUILD_BUG_ON(FORCEWAKE_MEDIA_VEBOX3 != (1 << FW_DOMAIN_ID_MEDIA_VEBOX3)); + BUILD_BUG_ON(FORCEWAKE_GSC != (1 << FW_DOMAIN_ID_GSC)); d->mask = BIT(domain_id); @@ -2085,17 +2287,26 @@ static int intel_uncore_fw_domains_init(struct intel_uncore *uncore) (ret ?: (ret = __fw_domain_init((uncore__), (id__), (set__), (ack__)))) if (GRAPHICS_VER(i915) >= 11) { - /* we'll prune the domains of missing engines later */ - intel_engine_mask_t emask = RUNTIME_INFO(i915)->platform_engine_mask; + intel_engine_mask_t emask; int i; + /* we'll prune the domains of missing engines later */ + emask = uncore->gt->info.engine_mask; + uncore->fw_get_funcs = &uncore_get_fallback; - fw_domain_init(uncore, FW_DOMAIN_ID_RENDER, - FORCEWAKE_RENDER_GEN9, - FORCEWAKE_ACK_RENDER_GEN9); - fw_domain_init(uncore, FW_DOMAIN_ID_GT, - FORCEWAKE_GT_GEN9, - FORCEWAKE_ACK_GT_GEN9); + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) + fw_domain_init(uncore, FW_DOMAIN_ID_GT, + FORCEWAKE_GT_GEN9, + FORCEWAKE_ACK_GT_MTL); + else + fw_domain_init(uncore, FW_DOMAIN_ID_GT, + FORCEWAKE_GT_GEN9, + FORCEWAKE_ACK_GT_GEN9); + + if (RCS_MASK(uncore->gt) || CCS_MASK(uncore->gt)) + fw_domain_init(uncore, FW_DOMAIN_ID_RENDER, + FORCEWAKE_RENDER_GEN9, + FORCEWAKE_ACK_RENDER_GEN9); for (i = 0; i < I915_MAX_VCS; i++) { if (!__HAS_ENGINE(emask, _VCS(i))) @@ -2113,6 +2324,10 @@ static int intel_uncore_fw_domains_init(struct intel_uncore *uncore) FORCEWAKE_MEDIA_VEBOX_GEN11(i), FORCEWAKE_ACK_MEDIA_VEBOX_GEN11(i)); } + + if (uncore->gt->type == GT_MEDIA) + fw_domain_init(uncore, FW_DOMAIN_ID_GSC, + FORCEWAKE_REQ_GSC, FORCEWAKE_ACK_GSC); } else if (IS_GRAPHICS_VER(i915, 9, 10)) { uncore->fw_get_funcs = &uncore_get_fallback; fw_domain_init(uncore, FW_DOMAIN_ID_RENDER, @@ -2300,6 +2515,22 @@ static void uncore_raw_init(struct intel_uncore *uncore) } } +static int uncore_media_forcewake_init(struct intel_uncore *uncore) +{ + struct drm_i915_private *i915 = uncore->i915; + + if (MEDIA_VER(i915) >= 13) { + ASSIGN_FW_DOMAINS_TABLE(uncore, __xelpmp_fw_ranges); + ASSIGN_SHADOW_TABLE(uncore, xelpmp_shadowed_regs); + ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable); + } else { + MISSING_CASE(MEDIA_VER(i915)); + return -ENODEV; + } + + return 0; +} + static int uncore_forcewake_init(struct intel_uncore *uncore) { struct drm_i915_private *i915 = uncore->i915; @@ -2314,7 +2545,14 @@ static int uncore_forcewake_init(struct intel_uncore *uncore) ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); - if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 60)) { + if (uncore->gt->type == GT_MEDIA) + return uncore_media_forcewake_init(uncore); + + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) { + ASSIGN_FW_DOMAINS_TABLE(uncore, __mtl_fw_ranges); + ASSIGN_SHADOW_TABLE(uncore, mtl_shadowed_regs); + ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable); + } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 60)) { ASSIGN_FW_DOMAINS_TABLE(uncore, __pvc_fw_ranges); ASSIGN_SHADOW_TABLE(uncore, pvc_shadowed_regs); ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable); diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index ddafa4a7ed71..5449146a0624 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -62,6 +62,7 @@ enum forcewake_domain_id { FW_DOMAIN_ID_MEDIA_VEBOX1, FW_DOMAIN_ID_MEDIA_VEBOX2, FW_DOMAIN_ID_MEDIA_VEBOX3, + FW_DOMAIN_ID_GSC, FW_DOMAIN_ID_COUNT }; @@ -82,6 +83,7 @@ enum forcewake_domains { FORCEWAKE_MEDIA_VEBOX1 = BIT(FW_DOMAIN_ID_MEDIA_VEBOX1), FORCEWAKE_MEDIA_VEBOX2 = BIT(FW_DOMAIN_ID_MEDIA_VEBOX2), FORCEWAKE_MEDIA_VEBOX3 = BIT(FW_DOMAIN_ID_MEDIA_VEBOX3), + FORCEWAKE_GSC = BIT(FW_DOMAIN_ID_GSC), FORCEWAKE_ALL = BIT(FW_DOMAIN_ID_COUNT) - 1, }; diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.c b/drivers/gpu/drm/i915/pxp/intel_pxp.c index 69cdaaddc4a9..5efe61f67546 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp.c @@ -103,19 +103,15 @@ static int create_vcs_context(struct intel_pxp *pxp) static void destroy_vcs_context(struct intel_pxp *pxp) { - intel_engine_destroy_pinned_context(fetch_and_zero(&pxp->ce)); + if (pxp->ce) + intel_engine_destroy_pinned_context(fetch_and_zero(&pxp->ce)); } -void intel_pxp_init(struct intel_pxp *pxp) +static void pxp_init_full(struct intel_pxp *pxp) { struct intel_gt *gt = pxp_to_gt(pxp); int ret; - if (!HAS_PXP(gt->i915)) - return; - - mutex_init(&pxp->tee_mutex); - /* * we'll use the completion to check if there is a termination pending, * so we start it as completed and we reinit it when a termination @@ -124,8 +120,7 @@ void intel_pxp_init(struct intel_pxp *pxp) init_completion(&pxp->termination); complete_all(&pxp->termination); - mutex_init(&pxp->arb_mutex); - INIT_WORK(&pxp->session_work, intel_pxp_session_work); + intel_pxp_session_management_init(pxp); ret = create_vcs_context(pxp); if (ret) @@ -143,11 +138,26 @@ out_context: destroy_vcs_context(pxp); } -void intel_pxp_fini(struct intel_pxp *pxp) +void intel_pxp_init(struct intel_pxp *pxp) { - if (!intel_pxp_is_enabled(pxp)) + struct intel_gt *gt = pxp_to_gt(pxp); + + /* we rely on the mei PXP module */ + if (!IS_ENABLED(CONFIG_INTEL_MEI_PXP)) return; + /* + * If HuC is loaded by GSC but PXP is disabled, we can skip the init of + * the full PXP session/object management and just init the tee channel. + */ + if (HAS_PXP(gt->i915)) + pxp_init_full(pxp); + else if (intel_huc_is_loaded_by_gsc(>->uc.huc) && intel_uc_uses_huc(>->uc)) + intel_pxp_tee_component_init(pxp); +} + +void intel_pxp_fini(struct intel_pxp *pxp) +{ pxp->arb_is_valid = false; intel_pxp_tee_component_fini(pxp); diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.h b/drivers/gpu/drm/i915/pxp/intel_pxp.h index 73847e535cab..2da309088c6d 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp.h @@ -12,7 +12,6 @@ struct intel_pxp; struct drm_i915_gem_object; -#ifdef CONFIG_DRM_I915_PXP struct intel_gt *pxp_to_gt(const struct intel_pxp *pxp); bool intel_pxp_is_enabled(const struct intel_pxp *pxp); bool intel_pxp_is_active(const struct intel_pxp *pxp); @@ -32,36 +31,5 @@ int intel_pxp_key_check(struct intel_pxp *pxp, bool assign); void intel_pxp_invalidate(struct intel_pxp *pxp); -#else -static inline void intel_pxp_init(struct intel_pxp *pxp) -{ -} - -static inline void intel_pxp_fini(struct intel_pxp *pxp) -{ -} - -static inline int intel_pxp_start(struct intel_pxp *pxp) -{ - return -ENODEV; -} - -static inline bool intel_pxp_is_enabled(const struct intel_pxp *pxp) -{ - return false; -} - -static inline bool intel_pxp_is_active(const struct intel_pxp *pxp) -{ - return false; -} - -static inline int intel_pxp_key_check(struct intel_pxp *pxp, - struct drm_i915_gem_object *obj, - bool assign) -{ - return -ENODEV; -} -#endif #endif /* __INTEL_PXP_H__ */ diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_huc.c b/drivers/gpu/drm/i915/pxp/intel_pxp_huc.c new file mode 100644 index 000000000000..7ec36d94e758 --- /dev/null +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_huc.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright(c) 2021-2022, Intel Corporation. All rights reserved. + */ + +#include "drm/i915_drm.h" +#include "i915_drv.h" + +#include "gem/i915_gem_region.h" +#include "gt/intel_gt.h" + +#include "intel_pxp.h" +#include "intel_pxp_huc.h" +#include "intel_pxp_tee.h" +#include "intel_pxp_types.h" +#include "intel_pxp_tee_interface.h" + +int intel_pxp_huc_load_and_auth(struct intel_pxp *pxp) +{ + struct intel_gt *gt = pxp_to_gt(pxp); + struct intel_huc *huc = >->uc.huc; + struct pxp_tee_start_huc_auth_in huc_in = {0}; + struct pxp_tee_start_huc_auth_out huc_out = {0}; + dma_addr_t huc_phys_addr; + u8 client_id = 0; + u8 fence_id = 0; + int err; + + if (!pxp->pxp_component) + return -ENODEV; + + huc_phys_addr = i915_gem_object_get_dma_address(huc->fw.obj, 0); + + /* write the PXP message into the lmem (the sg list) */ + huc_in.header.api_version = PXP_TEE_43_APIVER; + huc_in.header.command_id = PXP_TEE_43_START_HUC_AUTH; + huc_in.header.status = 0; + huc_in.header.buffer_len = sizeof(huc_in.huc_base_address); + huc_in.huc_base_address = huc_phys_addr; + + err = intel_pxp_tee_stream_message(pxp, client_id, fence_id, + &huc_in, sizeof(huc_in), + &huc_out, sizeof(huc_out)); + if (err < 0) { + drm_err(>->i915->drm, + "Failed to send HuC load and auth command to GSC [%d]!\n", + err); + return err; + } + + /* + * HuC does sometimes survive suspend/resume (it depends on how "deep" + * a sleep state the device reaches) so we can end up here on resume + * with HuC already loaded, in which case the GSC will return + * PXP_STATUS_OP_NOT_PERMITTED. We can therefore consider the GuC + * correctly transferred in this scenario; if the same error is ever + * returned with HuC not loaded we'll still catch it when we check the + * authentication bit later. + */ + if (huc_out.header.status != PXP_STATUS_SUCCESS && + huc_out.header.status != PXP_STATUS_OP_NOT_PERMITTED) { + drm_err(>->i915->drm, + "HuC load failed with GSC error = 0x%x\n", + huc_out.header.status); + return -EPROTO; + } + + return 0; +} diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_huc.h b/drivers/gpu/drm/i915/pxp/intel_pxp_huc.h new file mode 100644 index 000000000000..e40847a91c39 --- /dev/null +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_huc.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright(c) 2021-2022, Intel Corporation. All rights reserved. + */ + +#ifndef __INTEL_PXP_HUC_H__ +#define __INTEL_PXP_HUC_H__ + +struct intel_pxp; + +int intel_pxp_huc_load_and_auth(struct intel_pxp *pxp); + +#endif /* __INTEL_PXP_HUC_H__ */ diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_irq.h b/drivers/gpu/drm/i915/pxp/intel_pxp_irq.h index 8b5793654844..8c292dc86f68 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_irq.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_irq.h @@ -27,6 +27,14 @@ void intel_pxp_irq_handler(struct intel_pxp *pxp, u16 iir); static inline void intel_pxp_irq_handler(struct intel_pxp *pxp, u16 iir) { } + +static inline void intel_pxp_irq_enable(struct intel_pxp *pxp) +{ +} + +static inline void intel_pxp_irq_disable(struct intel_pxp *pxp) +{ +} #endif #endif /* __INTEL_PXP_IRQ_H__ */ diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_session.c b/drivers/gpu/drm/i915/pxp/intel_pxp_session.c index c4f5c994ca51..85572360c71a 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_session.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_session.c @@ -138,7 +138,7 @@ static void pxp_terminate_complete(struct intel_pxp *pxp) complete_all(&pxp->termination); } -void intel_pxp_session_work(struct work_struct *work) +static void pxp_session_work(struct work_struct *work) { struct intel_pxp *pxp = container_of(work, typeof(*pxp), session_work); struct intel_gt *gt = pxp_to_gt(pxp); @@ -173,3 +173,9 @@ void intel_pxp_session_work(struct work_struct *work) intel_runtime_pm_put(gt->uncore->rpm, wakeref); } + +void intel_pxp_session_management_init(struct intel_pxp *pxp) +{ + mutex_init(&pxp->arb_mutex); + INIT_WORK(&pxp->session_work, pxp_session_work); +} diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_session.h b/drivers/gpu/drm/i915/pxp/intel_pxp_session.h index ba4c9d2b94b7..903ac52cffa1 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_session.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_session.h @@ -8,8 +8,13 @@ #include <linux/types.h> -struct work_struct; - -void intel_pxp_session_work(struct work_struct *work); +struct intel_pxp; +#ifdef CONFIG_DRM_I915_PXP +void intel_pxp_session_management_init(struct intel_pxp *pxp); +#else +static inline void intel_pxp_session_management_init(struct intel_pxp *pxp) +{ +} +#endif #endif /* __INTEL_PXP_SESSION_H__ */ diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c b/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c index a90905039216..052fd2f9a583 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c @@ -8,11 +8,14 @@ #include <drm/i915_pxp_tee_interface.h> #include <drm/i915_component.h> +#include "gem/i915_gem_lmem.h" + #include "i915_drv.h" #include "intel_pxp.h" #include "intel_pxp_session.h" #include "intel_pxp_tee.h" #include "intel_pxp_tee_interface.h" +#include "intel_pxp_huc.h" static inline struct intel_pxp *i915_dev_to_pxp(struct device *i915_kdev) { @@ -69,6 +72,47 @@ unlock: return ret; } +int intel_pxp_tee_stream_message(struct intel_pxp *pxp, + u8 client_id, u32 fence_id, + void *msg_in, size_t msg_in_len, + void *msg_out, size_t msg_out_len) +{ + /* TODO: for bigger objects we need to use a sg of 4k pages */ + const size_t max_msg_size = PAGE_SIZE; + struct drm_i915_private *i915 = pxp_to_gt(pxp)->i915; + struct i915_pxp_component *pxp_component = pxp->pxp_component; + unsigned int offset = 0; + struct scatterlist *sg; + int ret; + + if (msg_in_len > max_msg_size || msg_out_len > max_msg_size) + return -ENOSPC; + + mutex_lock(&pxp->tee_mutex); + + if (unlikely(!pxp_component || !pxp_component->ops->gsc_command)) { + ret = -ENODEV; + goto unlock; + } + + GEM_BUG_ON(!pxp->stream_cmd.obj); + + sg = i915_gem_object_get_sg_dma(pxp->stream_cmd.obj, 0, &offset); + + memcpy(pxp->stream_cmd.vaddr, msg_in, msg_in_len); + + ret = pxp_component->ops->gsc_command(pxp_component->tee_dev, client_id, + fence_id, sg, msg_in_len, sg); + if (ret < 0) + drm_err(&i915->drm, "Failed to send PXP TEE gsc command\n"); + else + memcpy(msg_out, pxp->stream_cmd.vaddr, msg_out_len); + +unlock: + mutex_unlock(&pxp->tee_mutex); + return ret; +} + /** * i915_pxp_tee_component_bind - bind function to pass the function pointers to pxp_tee * @i915_kdev: pointer to i915 kernel device @@ -84,24 +128,36 @@ static int i915_pxp_tee_component_bind(struct device *i915_kdev, { struct drm_i915_private *i915 = kdev_to_i915(i915_kdev); struct intel_pxp *pxp = i915_dev_to_pxp(i915_kdev); + struct intel_uc *uc = &pxp_to_gt(pxp)->uc; intel_wakeref_t wakeref; + int ret = 0; mutex_lock(&pxp->tee_mutex); pxp->pxp_component = data; pxp->pxp_component->tee_dev = tee_kdev; mutex_unlock(&pxp->tee_mutex); + if (intel_uc_uses_huc(uc) && intel_huc_is_loaded_by_gsc(&uc->huc)) { + with_intel_runtime_pm(&i915->runtime_pm, wakeref) { + /* load huc via pxp */ + ret = intel_huc_fw_load_and_auth_via_gsc(&uc->huc); + if (ret < 0) + drm_err(&i915->drm, "failed to load huc via gsc %d\n", ret); + } + } + /* if we are suspended, the HW will be re-initialized on resume */ wakeref = intel_runtime_pm_get_if_in_use(&i915->runtime_pm); if (!wakeref) return 0; /* the component is required to fully start the PXP HW */ - intel_pxp_init_hw(pxp); + if (intel_pxp_is_enabled(pxp)) + intel_pxp_init_hw(pxp); intel_runtime_pm_put(&i915->runtime_pm, wakeref); - return 0; + return ret; } static void i915_pxp_tee_component_unbind(struct device *i915_kdev, @@ -111,8 +167,9 @@ static void i915_pxp_tee_component_unbind(struct device *i915_kdev, struct intel_pxp *pxp = i915_dev_to_pxp(i915_kdev); intel_wakeref_t wakeref; - with_intel_runtime_pm_if_in_use(&i915->runtime_pm, wakeref) - intel_pxp_fini_hw(pxp); + if (intel_pxp_is_enabled(pxp)) + with_intel_runtime_pm_if_in_use(&i915->runtime_pm, wakeref) + intel_pxp_fini_hw(pxp); mutex_lock(&pxp->tee_mutex); pxp->pxp_component = NULL; @@ -124,22 +181,92 @@ static const struct component_ops i915_pxp_tee_component_ops = { .unbind = i915_pxp_tee_component_unbind, }; +static int alloc_streaming_command(struct intel_pxp *pxp) +{ + struct drm_i915_private *i915 = pxp_to_gt(pxp)->i915; + struct drm_i915_gem_object *obj = NULL; + void *cmd; + int err; + + pxp->stream_cmd.obj = NULL; + pxp->stream_cmd.vaddr = NULL; + + if (!IS_DGFX(i915)) + return 0; + + /* allocate lmem object of one page for PXP command memory and store it */ + obj = i915_gem_object_create_lmem(i915, PAGE_SIZE, I915_BO_ALLOC_CONTIGUOUS); + if (IS_ERR(obj)) { + drm_err(&i915->drm, "Failed to allocate pxp streaming command!\n"); + return PTR_ERR(obj); + } + + err = i915_gem_object_pin_pages_unlocked(obj); + if (err) { + drm_err(&i915->drm, "Failed to pin gsc message page!\n"); + goto out_put; + } + + /* map the lmem into the virtual memory pointer */ + cmd = i915_gem_object_pin_map_unlocked(obj, i915_coherent_map_type(i915, obj, true)); + if (IS_ERR(cmd)) { + drm_err(&i915->drm, "Failed to map gsc message page!\n"); + err = PTR_ERR(cmd); + goto out_unpin; + } + + memset(cmd, 0, obj->base.size); + + pxp->stream_cmd.obj = obj; + pxp->stream_cmd.vaddr = cmd; + + return 0; + +out_unpin: + i915_gem_object_unpin_pages(obj); +out_put: + i915_gem_object_put(obj); + return err; +} + +static void free_streaming_command(struct intel_pxp *pxp) +{ + struct drm_i915_gem_object *obj = fetch_and_zero(&pxp->stream_cmd.obj); + + if (!obj) + return; + + i915_gem_object_unpin_map(obj); + i915_gem_object_unpin_pages(obj); + i915_gem_object_put(obj); +} + int intel_pxp_tee_component_init(struct intel_pxp *pxp) { int ret; struct intel_gt *gt = pxp_to_gt(pxp); struct drm_i915_private *i915 = gt->i915; + mutex_init(&pxp->tee_mutex); + + ret = alloc_streaming_command(pxp); + if (ret) + return ret; + ret = component_add_typed(i915->drm.dev, &i915_pxp_tee_component_ops, I915_COMPONENT_PXP); if (ret < 0) { drm_err(&i915->drm, "Failed to add PXP component (%d)\n", ret); - return ret; + goto out_free; } pxp->pxp_component_added = true; return 0; + +out_free: + free_streaming_command(pxp); + return ret; } void intel_pxp_tee_component_fini(struct intel_pxp *pxp) @@ -151,6 +278,8 @@ void intel_pxp_tee_component_fini(struct intel_pxp *pxp) component_del(i915->drm.dev, &i915_pxp_tee_component_ops); pxp->pxp_component_added = false; + + free_streaming_command(pxp); } int intel_pxp_tee_cmd_create_arb_session(struct intel_pxp *pxp, diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_tee.h b/drivers/gpu/drm/i915/pxp/intel_pxp_tee.h index c136053ce340..aeb3dfe7ce96 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_tee.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_tee.h @@ -14,4 +14,9 @@ void intel_pxp_tee_component_fini(struct intel_pxp *pxp); int intel_pxp_tee_cmd_create_arb_session(struct intel_pxp *pxp, int arb_session_id); +int intel_pxp_tee_stream_message(struct intel_pxp *pxp, + u8 client_id, u32 fence_id, + void *msg_in, size_t msg_in_len, + void *msg_out, size_t msg_out_len); + #endif /* __INTEL_PXP_TEE_H__ */ diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_tee_interface.h b/drivers/gpu/drm/i915/pxp/intel_pxp_tee_interface.h index 36e9b0868f5c..7edc1760f142 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_tee_interface.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_tee_interface.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: MIT */ /* - * Copyright(c) 2020, Intel Corporation. All rights reserved. + * Copyright(c) 2020-2022, Intel Corporation. All rights reserved. */ #ifndef __INTEL_PXP_TEE_INTERFACE_H__ @@ -9,8 +9,20 @@ #include <linux/types.h> #define PXP_TEE_APIVER 0x40002 +#define PXP_TEE_43_APIVER 0x00040003 #define PXP_TEE_ARB_CMDID 0x1e #define PXP_TEE_ARB_PROTECTION_MODE 0x2 +#define PXP_TEE_43_START_HUC_AUTH 0x0000003A + +/* + * there are a lot of status codes for PXP, but we only define the ones we + * actually can handle in the driver. other failure codes will be printed to + * error msg for debug. + */ +enum pxp_status { + PXP_STATUS_SUCCESS = 0x0, + PXP_STATUS_OP_NOT_PERMITTED = 0x4013 +}; /* PXP TEE message header */ struct pxp_tee_cmd_header { @@ -33,4 +45,13 @@ struct pxp_tee_create_arb_out { struct pxp_tee_cmd_header header; } __packed; +struct pxp_tee_start_huc_auth_in { + struct pxp_tee_cmd_header header; + __le64 huc_base_address; +}; + +struct pxp_tee_start_huc_auth_out { + struct pxp_tee_cmd_header header; +}; + #endif /* __INTEL_PXP_TEE_INTERFACE_H__ */ diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h index 7ce5f37ee12e..f74b1e11a505 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h @@ -53,6 +53,12 @@ struct intel_pxp { /** @tee_mutex: protects the tee channel binding and messaging. */ struct mutex tee_mutex; + /** @stream_cmd: LMEM obj used to send stream PXP commands to the GSC */ + struct { + struct drm_i915_gem_object *obj; /* contains PXP command memory */ + void *vaddr; /* virtual memory for PXP command */ + } stream_cmd; + /** * @hw_state_invalidated: if the HW perceives an attack on the integrity * of the encryption it will invalidate the keys and expect SW to diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index e050a2de5fd1..27c733b00976 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -27,6 +27,7 @@ #include "gem/i915_gem_context.h" #include "gem/i915_gem_internal.h" +#include "gem/i915_gem_lmem.h" #include "gem/i915_gem_region.h" #include "gem/selftests/mock_context.h" #include "gt/intel_context.h" @@ -1113,15 +1114,8 @@ static int misaligned_case(struct i915_address_space *vm, struct intel_memory_re expected_node_size = expected_vma_size; if (HAS_64K_PAGES(vm->i915) && i915_gem_object_is_lmem(obj)) { - /* - * The compact-pt should expand lmem node to 2MB for the ppGTT, - * for all other cases we should only expect 64K. - */ expected_vma_size = round_up(size, I915_GTT_PAGE_SIZE_64K); - if (NEEDS_COMPACT_PT(vm->i915) && !i915_is_ggtt(vm)) - expected_node_size = round_up(size, I915_GTT_PAGE_SIZE_2M); - else - expected_node_size = round_up(size, I915_GTT_PAGE_SIZE_64K); + expected_node_size = round_up(size, I915_GTT_PAGE_SIZE_64K); } if (vma->size != expected_vma_size || vma->node.size != expected_node_size) { diff --git a/drivers/gpu/drm/i915/selftests/i915_perf.c b/drivers/gpu/drm/i915/selftests/i915_perf.c index 429c6d73b159..24dde5531423 100644 --- a/drivers/gpu/drm/i915/selftests/i915_perf.c +++ b/drivers/gpu/drm/i915/selftests/i915_perf.c @@ -102,6 +102,12 @@ test_stream(struct i915_perf *perf) I915_OA_FORMAT_A32u40_A4u32_B8_C8 : I915_OA_FORMAT_C4_B8, }; struct i915_perf_stream *stream; + struct intel_gt *gt; + + if (!props.engine) + return NULL; + + gt = props.engine->gt; if (!oa_config) return NULL; @@ -116,12 +122,12 @@ test_stream(struct i915_perf *perf) stream->perf = perf; - mutex_lock(&perf->lock); + mutex_lock(>->perf.lock); if (i915_oa_stream_init(stream, ¶m, &props)) { kfree(stream); stream = NULL; } - mutex_unlock(&perf->lock); + mutex_unlock(>->perf.lock); i915_oa_config_put(oa_config); @@ -130,11 +136,11 @@ test_stream(struct i915_perf *perf) static void stream_destroy(struct i915_perf_stream *stream) { - struct i915_perf *perf = stream->perf; + struct intel_gt *gt = stream->engine->gt; - mutex_lock(&perf->lock); + mutex_lock(>->perf.lock); i915_perf_destroy_locked(stream); - mutex_unlock(&perf->lock); + mutex_unlock(>->perf.lock); } static int live_sanitycheck(void *arg) diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 818a4909c1f3..a46350c37e9d 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -299,9 +299,18 @@ __live_request_alloc(struct intel_context *ce) return intel_context_create_request(ce); } -static int __igt_breadcrumbs_smoketest(void *arg) +struct smoke_thread { + struct kthread_worker *worker; + struct kthread_work work; + struct smoketest *t; + bool stop; + int result; +}; + +static void __igt_breadcrumbs_smoketest(struct kthread_work *work) { - struct smoketest *t = arg; + struct smoke_thread *thread = container_of(work, typeof(*thread), work); + struct smoketest *t = thread->t; const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1; const unsigned int total = 4 * t->ncontexts + 1; unsigned int num_waits = 0, num_fences = 0; @@ -320,8 +329,10 @@ static int __igt_breadcrumbs_smoketest(void *arg) */ requests = kcalloc(total, sizeof(*requests), GFP_KERNEL); - if (!requests) - return -ENOMEM; + if (!requests) { + thread->result = -ENOMEM; + return; + } order = i915_random_order(total, &prng); if (!order) { @@ -329,7 +340,7 @@ static int __igt_breadcrumbs_smoketest(void *arg) goto out_requests; } - while (!kthread_should_stop()) { + while (!READ_ONCE(thread->stop)) { struct i915_sw_fence *submit, *wait; unsigned int n, count; @@ -437,7 +448,7 @@ static int __igt_breadcrumbs_smoketest(void *arg) kfree(order); out_requests: kfree(requests); - return err; + thread->result = err; } static int mock_breadcrumbs_smoketest(void *arg) @@ -450,7 +461,7 @@ static int mock_breadcrumbs_smoketest(void *arg) .request_alloc = __mock_request_alloc }; unsigned int ncpus = num_online_cpus(); - struct task_struct **threads; + struct smoke_thread *threads; unsigned int n; int ret = 0; @@ -479,28 +490,37 @@ static int mock_breadcrumbs_smoketest(void *arg) } for (n = 0; n < ncpus; n++) { - threads[n] = kthread_run(__igt_breadcrumbs_smoketest, - &t, "igt/%d", n); - if (IS_ERR(threads[n])) { - ret = PTR_ERR(threads[n]); + struct kthread_worker *worker; + + worker = kthread_create_worker(0, "igt/%d", n); + if (IS_ERR(worker)) { + ret = PTR_ERR(worker); ncpus = n; break; } - get_task_struct(threads[n]); + threads[n].worker = worker; + threads[n].t = &t; + threads[n].stop = false; + threads[n].result = 0; + + kthread_init_work(&threads[n].work, + __igt_breadcrumbs_smoketest); + kthread_queue_work(worker, &threads[n].work); } - yield(); /* start all threads before we begin */ msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); for (n = 0; n < ncpus; n++) { int err; - err = kthread_stop(threads[n]); + WRITE_ONCE(threads[n].stop, true); + kthread_flush_work(&threads[n].work); + err = READ_ONCE(threads[n].result); if (err < 0 && !ret) ret = err; - put_task_struct(threads[n]); + kthread_destroy_worker(threads[n].worker); } pr_info("Completed %lu waits for %lu fence across %d cpus\n", atomic_long_read(&t.num_waits), @@ -1419,9 +1439,18 @@ out_free: return err; } -static int __live_parallel_engine1(void *arg) +struct parallel_thread { + struct kthread_worker *worker; + struct kthread_work work; + struct intel_engine_cs *engine; + int result; +}; + +static void __live_parallel_engine1(struct kthread_work *work) { - struct intel_engine_cs *engine = arg; + struct parallel_thread *thread = + container_of(work, typeof(*thread), work); + struct intel_engine_cs *engine = thread->engine; IGT_TIMEOUT(end_time); unsigned long count; int err = 0; @@ -1452,12 +1481,14 @@ static int __live_parallel_engine1(void *arg) intel_engine_pm_put(engine); pr_info("%s: %lu request + sync\n", engine->name, count); - return err; + thread->result = err; } -static int __live_parallel_engineN(void *arg) +static void __live_parallel_engineN(struct kthread_work *work) { - struct intel_engine_cs *engine = arg; + struct parallel_thread *thread = + container_of(work, typeof(*thread), work); + struct intel_engine_cs *engine = thread->engine; IGT_TIMEOUT(end_time); unsigned long count; int err = 0; @@ -1479,7 +1510,7 @@ static int __live_parallel_engineN(void *arg) intel_engine_pm_put(engine); pr_info("%s: %lu requests\n", engine->name, count); - return err; + thread->result = err; } static bool wake_all(struct drm_i915_private *i915) @@ -1505,9 +1536,11 @@ static int wait_for_all(struct drm_i915_private *i915) return -ETIME; } -static int __live_parallel_spin(void *arg) +static void __live_parallel_spin(struct kthread_work *work) { - struct intel_engine_cs *engine = arg; + struct parallel_thread *thread = + container_of(work, typeof(*thread), work); + struct intel_engine_cs *engine = thread->engine; struct igt_spinner spin; struct i915_request *rq; int err = 0; @@ -1520,7 +1553,8 @@ static int __live_parallel_spin(void *arg) if (igt_spinner_init(&spin, engine->gt)) { wake_all(engine->i915); - return -ENOMEM; + thread->result = -ENOMEM; + return; } intel_engine_pm_get(engine); @@ -1553,22 +1587,22 @@ static int __live_parallel_spin(void *arg) out_spin: igt_spinner_fini(&spin); - return err; + thread->result = err; } static int live_parallel_engines(void *arg) { struct drm_i915_private *i915 = arg; - static int (* const func[])(void *arg) = { + static void (* const func[])(struct kthread_work *) = { __live_parallel_engine1, __live_parallel_engineN, __live_parallel_spin, NULL, }; const unsigned int nengines = num_uabi_engines(i915); + struct parallel_thread *threads; struct intel_engine_cs *engine; - int (* const *fn)(void *arg); - struct task_struct **tsk; + void (* const *fn)(struct kthread_work *); int err = 0; /* @@ -1576,8 +1610,8 @@ static int live_parallel_engines(void *arg) * tests that we load up the system maximally. */ - tsk = kcalloc(nengines, sizeof(*tsk), GFP_KERNEL); - if (!tsk) + threads = kcalloc(nengines, sizeof(*threads), GFP_KERNEL); + if (!threads) return -ENOMEM; for (fn = func; !err && *fn; fn++) { @@ -1594,37 +1628,44 @@ static int live_parallel_engines(void *arg) idx = 0; for_each_uabi_engine(engine, i915) { - tsk[idx] = kthread_run(*fn, engine, - "igt/parallel:%s", - engine->name); - if (IS_ERR(tsk[idx])) { - err = PTR_ERR(tsk[idx]); + struct kthread_worker *worker; + + worker = kthread_create_worker(0, "igt/parallel:%s", + engine->name); + if (IS_ERR(worker)) { + err = PTR_ERR(worker); break; } - get_task_struct(tsk[idx++]); - } - yield(); /* start all threads before we kthread_stop() */ + threads[idx].worker = worker; + threads[idx].result = 0; + threads[idx].engine = engine; + + kthread_init_work(&threads[idx].work, *fn); + kthread_queue_work(worker, &threads[idx].work); + idx++; + } idx = 0; for_each_uabi_engine(engine, i915) { int status; - if (IS_ERR(tsk[idx])) + if (!threads[idx].worker) break; - status = kthread_stop(tsk[idx]); + kthread_flush_work(&threads[idx].work); + status = READ_ONCE(threads[idx].result); if (status && !err) err = status; - put_task_struct(tsk[idx++]); + kthread_destroy_worker(threads[idx++].worker); } if (igt_live_test_end(&t)) err = -EIO; } - kfree(tsk); + kfree(threads); return err; } @@ -1672,7 +1713,7 @@ static int live_breadcrumbs_smoketest(void *arg) const unsigned int ncpus = num_online_cpus(); unsigned long num_waits, num_fences; struct intel_engine_cs *engine; - struct task_struct **threads; + struct smoke_thread *threads; struct igt_live_test live; intel_wakeref_t wakeref; struct smoketest *smoke; @@ -1746,23 +1787,26 @@ static int live_breadcrumbs_smoketest(void *arg) smoke[idx].max_batch, engine->name); for (n = 0; n < ncpus; n++) { - struct task_struct *tsk; + unsigned int i = idx * ncpus + n; + struct kthread_worker *worker; - tsk = kthread_run(__igt_breadcrumbs_smoketest, - &smoke[idx], "igt/%d.%d", idx, n); - if (IS_ERR(tsk)) { - ret = PTR_ERR(tsk); + worker = kthread_create_worker(0, "igt/%d.%d", idx, n); + if (IS_ERR(worker)) { + ret = PTR_ERR(worker); goto out_flush; } - get_task_struct(tsk); - threads[idx * ncpus + n] = tsk; + threads[i].worker = worker; + threads[i].t = &smoke[idx]; + + kthread_init_work(&threads[i].work, + __igt_breadcrumbs_smoketest); + kthread_queue_work(worker, &threads[i].work); } idx++; } - yield(); /* start all threads before we begin */ msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); out_flush: @@ -1771,17 +1815,19 @@ out_flush: num_fences = 0; for_each_uabi_engine(engine, i915) { for (n = 0; n < ncpus; n++) { - struct task_struct *tsk = threads[idx * ncpus + n]; + unsigned int i = idx * ncpus + n; int err; - if (!tsk) + if (!threads[i].worker) continue; - err = kthread_stop(tsk); + WRITE_ONCE(threads[i].stop, true); + kthread_flush_work(&threads[i].work); + err = READ_ONCE(threads[i].result); if (err < 0 && !ret) ret = err; - put_task_struct(tsk); + kthread_destroy_worker(threads[i].worker); } num_waits += atomic_long_read(&smoke[idx].num_waits); @@ -2891,9 +2937,18 @@ out: return err; } -static int p_sync0(void *arg) +struct p_thread { + struct perf_stats p; + struct kthread_worker *worker; + struct kthread_work work; + struct intel_engine_cs *engine; + int result; +}; + +static void p_sync0(struct kthread_work *work) { - struct perf_stats *p = arg; + struct p_thread *thread = container_of(work, typeof(*thread), work); + struct perf_stats *p = &thread->p; struct intel_engine_cs *engine = p->engine; struct intel_context *ce; IGT_TIMEOUT(end_time); @@ -2902,13 +2957,16 @@ static int p_sync0(void *arg) int err = 0; ce = intel_context_create(engine); - if (IS_ERR(ce)) - return PTR_ERR(ce); + if (IS_ERR(ce)) { + thread->result = PTR_ERR(ce); + return; + } err = intel_context_pin(ce); if (err) { intel_context_put(ce); - return err; + thread->result = err; + return; } if (intel_engine_supports_stats(engine)) { @@ -2958,12 +3016,13 @@ static int p_sync0(void *arg) intel_context_unpin(ce); intel_context_put(ce); - return err; + thread->result = err; } -static int p_sync1(void *arg) +static void p_sync1(struct kthread_work *work) { - struct perf_stats *p = arg; + struct p_thread *thread = container_of(work, typeof(*thread), work); + struct perf_stats *p = &thread->p; struct intel_engine_cs *engine = p->engine; struct i915_request *prev = NULL; struct intel_context *ce; @@ -2973,13 +3032,16 @@ static int p_sync1(void *arg) int err = 0; ce = intel_context_create(engine); - if (IS_ERR(ce)) - return PTR_ERR(ce); + if (IS_ERR(ce)) { + thread->result = PTR_ERR(ce); + return; + } err = intel_context_pin(ce); if (err) { intel_context_put(ce); - return err; + thread->result = err; + return; } if (intel_engine_supports_stats(engine)) { @@ -3031,12 +3093,13 @@ static int p_sync1(void *arg) intel_context_unpin(ce); intel_context_put(ce); - return err; + thread->result = err; } -static int p_many(void *arg) +static void p_many(struct kthread_work *work) { - struct perf_stats *p = arg; + struct p_thread *thread = container_of(work, typeof(*thread), work); + struct perf_stats *p = &thread->p; struct intel_engine_cs *engine = p->engine; struct intel_context *ce; IGT_TIMEOUT(end_time); @@ -3045,13 +3108,16 @@ static int p_many(void *arg) bool busy; ce = intel_context_create(engine); - if (IS_ERR(ce)) - return PTR_ERR(ce); + if (IS_ERR(ce)) { + thread->result = PTR_ERR(ce); + return; + } err = intel_context_pin(ce); if (err) { intel_context_put(ce); - return err; + thread->result = err; + return; } if (intel_engine_supports_stats(engine)) { @@ -3092,26 +3158,23 @@ static int p_many(void *arg) intel_context_unpin(ce); intel_context_put(ce); - return err; + thread->result = err; } static int perf_parallel_engines(void *arg) { struct drm_i915_private *i915 = arg; - static int (* const func[])(void *arg) = { + static void (* const func[])(struct kthread_work *) = { p_sync0, p_sync1, p_many, NULL, }; const unsigned int nengines = num_uabi_engines(i915); + void (* const *fn)(struct kthread_work *); struct intel_engine_cs *engine; - int (* const *fn)(void *arg); struct pm_qos_request qos; - struct { - struct perf_stats p; - struct task_struct *tsk; - } *engines; + struct p_thread *engines; int err = 0; engines = kcalloc(nengines, sizeof(*engines), GFP_KERNEL); @@ -3134,36 +3197,45 @@ static int perf_parallel_engines(void *arg) idx = 0; for_each_uabi_engine(engine, i915) { + struct kthread_worker *worker; + intel_engine_pm_get(engine); memset(&engines[idx].p, 0, sizeof(engines[idx].p)); - engines[idx].p.engine = engine; - engines[idx].tsk = kthread_run(*fn, &engines[idx].p, - "igt:%s", engine->name); - if (IS_ERR(engines[idx].tsk)) { - err = PTR_ERR(engines[idx].tsk); + worker = kthread_create_worker(0, "igt:%s", + engine->name); + if (IS_ERR(worker)) { + err = PTR_ERR(worker); intel_engine_pm_put(engine); break; } - get_task_struct(engines[idx++].tsk); - } + engines[idx].worker = worker; + engines[idx].result = 0; + engines[idx].p.engine = engine; + engines[idx].engine = engine; - yield(); /* start all threads before we kthread_stop() */ + kthread_init_work(&engines[idx].work, *fn); + kthread_queue_work(worker, &engines[idx].work); + idx++; + } idx = 0; for_each_uabi_engine(engine, i915) { int status; - if (IS_ERR(engines[idx].tsk)) + if (!engines[idx].worker) break; - status = kthread_stop(engines[idx].tsk); + kthread_flush_work(&engines[idx].work); + status = READ_ONCE(engines[idx].result); if (status && !err) err = status; intel_engine_pm_put(engine); - put_task_struct(engines[idx++].tsk); + + kthread_destroy_worker(engines[idx].worker); + idx++; } if (igt_live_test_end(&t)) diff --git a/drivers/gpu/drm/i915/selftests/intel_uncore.c b/drivers/gpu/drm/i915/selftests/intel_uncore.c index fda9bb79c049..e4281508d580 100644 --- a/drivers/gpu/drm/i915/selftests/intel_uncore.c +++ b/drivers/gpu/drm/i915/selftests/intel_uncore.c @@ -70,6 +70,8 @@ static int intel_shadow_table_check(void) { gen12_shadowed_regs, ARRAY_SIZE(gen12_shadowed_regs) }, { dg2_shadowed_regs, ARRAY_SIZE(dg2_shadowed_regs) }, { pvc_shadowed_regs, ARRAY_SIZE(pvc_shadowed_regs) }, + { mtl_shadowed_regs, ARRAY_SIZE(mtl_shadowed_regs) }, + { xelpmp_shadowed_regs, ARRAY_SIZE(xelpmp_shadowed_regs) }, }; const struct i915_range *range; unsigned int i, j; @@ -117,6 +119,8 @@ int intel_uncore_mock_selftests(void) { __gen12_fw_ranges, ARRAY_SIZE(__gen12_fw_ranges), true }, { __xehp_fw_ranges, ARRAY_SIZE(__xehp_fw_ranges), true }, { __pvc_fw_ranges, ARRAY_SIZE(__pvc_fw_ranges), true }, + { __mtl_fw_ranges, ARRAY_SIZE(__mtl_fw_ranges), true }, + { __xelpmp_fw_ranges, ARRAY_SIZE(__xelpmp_fw_ranges), true }, }; int err, i; diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index fff11c90f1fa..f6a7c0bd2955 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -67,7 +67,6 @@ static void mock_device_release(struct drm_device *dev) intel_gt_driver_remove(to_gt(i915)); i915_gem_drain_workqueue(i915); - i915_gem_drain_freed_objects(i915); mock_fini_ggtt(to_gt(i915)->ggtt); destroy_workqueue(i915->wq); diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c index 46aa3554e97b..1fbe127ff633 100644 --- a/drivers/misc/mei/bus.c +++ b/drivers/misc/mei/bus.c @@ -13,6 +13,7 @@ #include <linux/slab.h> #include <linux/mutex.h> #include <linux/interrupt.h> +#include <linux/scatterlist.h> #include <linux/mei_cl_bus.h> #include "mei_dev.h" @@ -100,9 +101,18 @@ ssize_t __mei_cl_send(struct mei_cl *cl, const u8 *buf, size_t length, u8 vtag, cb->internal = !!(mode & MEI_CL_IO_TX_INTERNAL); cb->blocking = !!(mode & MEI_CL_IO_TX_BLOCKING); memcpy(cb->buf.data, buf, length); + /* hack we point data to header */ + if (mode & MEI_CL_IO_SGL) { + cb->ext_hdr = (struct mei_ext_hdr *)cb->buf.data; + cb->buf.data = NULL; + cb->buf.size = 0; + } rets = mei_cl_write(cl, cb); + if (mode & MEI_CL_IO_SGL && rets == 0) + rets = length; + out: mutex_unlock(&bus->device_lock); @@ -205,9 +215,16 @@ copy: goto free; } - r_length = min_t(size_t, length, cb->buf_idx); - memcpy(buf, cb->buf.data, r_length); + /* for the GSC type - copy the extended header to the buffer */ + if (cb->ext_hdr && cb->ext_hdr->type == MEI_EXT_HDR_GSC) { + r_length = min_t(size_t, length, cb->ext_hdr->length * sizeof(u32)); + memcpy(buf, cb->ext_hdr, r_length); + } else { + r_length = min_t(size_t, length, cb->buf_idx); + memcpy(buf, cb->buf.data, r_length); + } rets = r_length; + if (vtag) *vtag = cb->vtag; @@ -823,6 +840,131 @@ out: EXPORT_SYMBOL_GPL(mei_cldev_disable); /** + * mei_cldev_send_gsc_command - sends a gsc command, by sending + * a gsl mei message to gsc and receiving reply from gsc + * + * @cldev: me client device + * @client_id: client id to send the command to + * @fence_id: fence id to send the command to + * @sg_in: scatter gather list containing addresses for rx message buffer + * @total_in_len: total length of data in 'in' sg, can be less than the sum of buffers sizes + * @sg_out: scatter gather list containing addresses for tx message buffer + * + * Return: + * * written size in bytes + * * < 0 on error + */ +ssize_t mei_cldev_send_gsc_command(struct mei_cl_device *cldev, + u8 client_id, u32 fence_id, + struct scatterlist *sg_in, + size_t total_in_len, + struct scatterlist *sg_out) +{ + struct mei_cl *cl; + struct mei_device *bus; + ssize_t ret = 0; + + struct mei_ext_hdr_gsc_h2f *ext_hdr; + size_t buf_sz = sizeof(struct mei_ext_hdr_gsc_h2f); + int sg_out_nents, sg_in_nents; + int i; + struct scatterlist *sg; + struct mei_ext_hdr_gsc_f2h rx_msg; + unsigned int sg_len; + + if (!cldev || !sg_in || !sg_out) + return -EINVAL; + + cl = cldev->cl; + bus = cldev->bus; + + dev_dbg(bus->dev, "client_id %u, fence_id %u\n", client_id, fence_id); + + if (!bus->hbm_f_gsc_supported) + return -EOPNOTSUPP; + + sg_out_nents = sg_nents(sg_out); + sg_in_nents = sg_nents(sg_in); + /* at least one entry in tx and rx sgls must be present */ + if (sg_out_nents <= 0 || sg_in_nents <= 0) + return -EINVAL; + + buf_sz += (sg_out_nents + sg_in_nents) * sizeof(struct mei_gsc_sgl); + ext_hdr = kzalloc(buf_sz, GFP_KERNEL); + if (!ext_hdr) + return -ENOMEM; + + /* construct the GSC message */ + ext_hdr->hdr.type = MEI_EXT_HDR_GSC; + ext_hdr->hdr.length = buf_sz / sizeof(u32); /* length is in dw */ + + ext_hdr->client_id = client_id; + ext_hdr->addr_type = GSC_ADDRESS_TYPE_PHYSICAL_SGL; + ext_hdr->fence_id = fence_id; + ext_hdr->input_address_count = sg_in_nents; + ext_hdr->output_address_count = sg_out_nents; + ext_hdr->reserved[0] = 0; + ext_hdr->reserved[1] = 0; + + /* copy in-sgl to the message */ + for (i = 0, sg = sg_in; i < sg_in_nents; i++, sg++) { + ext_hdr->sgl[i].low = lower_32_bits(sg_dma_address(sg)); + ext_hdr->sgl[i].high = upper_32_bits(sg_dma_address(sg)); + sg_len = min_t(unsigned int, sg_dma_len(sg), PAGE_SIZE); + ext_hdr->sgl[i].length = (sg_len <= total_in_len) ? sg_len : total_in_len; + total_in_len -= ext_hdr->sgl[i].length; + } + + /* copy out-sgl to the message */ + for (i = sg_in_nents, sg = sg_out; i < sg_in_nents + sg_out_nents; i++, sg++) { + ext_hdr->sgl[i].low = lower_32_bits(sg_dma_address(sg)); + ext_hdr->sgl[i].high = upper_32_bits(sg_dma_address(sg)); + sg_len = min_t(unsigned int, sg_dma_len(sg), PAGE_SIZE); + ext_hdr->sgl[i].length = sg_len; + } + + /* send the message to GSC */ + ret = __mei_cl_send(cl, (u8 *)ext_hdr, buf_sz, 0, MEI_CL_IO_SGL); + if (ret < 0) { + dev_err(bus->dev, "__mei_cl_send failed, returned %zd\n", ret); + goto end; + } + if (ret != buf_sz) { + dev_err(bus->dev, "__mei_cl_send returned %zd instead of expected %zd\n", + ret, buf_sz); + ret = -EIO; + goto end; + } + + /* receive the reply from GSC, note that at this point sg_in should contain the reply */ + ret = __mei_cl_recv(cl, (u8 *)&rx_msg, sizeof(rx_msg), NULL, MEI_CL_IO_SGL, 0); + + if (ret != sizeof(rx_msg)) { + dev_err(bus->dev, "__mei_cl_recv returned %zd instead of expected %zd\n", + ret, sizeof(rx_msg)); + if (ret >= 0) + ret = -EIO; + goto end; + } + + /* check rx_msg.client_id and rx_msg.fence_id match the ones we send */ + if (rx_msg.client_id != client_id || rx_msg.fence_id != fence_id) { + dev_err(bus->dev, "received client_id/fence_id %u/%u instead of %u/%u sent\n", + rx_msg.client_id, rx_msg.fence_id, client_id, fence_id); + ret = -EFAULT; + goto end; + } + + dev_dbg(bus->dev, "gsc command: successfully written %u bytes\n", rx_msg.written); + ret = rx_msg.written; + +end: + kfree(ext_hdr); + return ret; +} +EXPORT_SYMBOL_GPL(mei_cldev_send_gsc_command); + +/** * mei_cl_device_find - find matching entry in the driver id table * * @cldev: me client device diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c index 0b2fbe1335a7..6c8b71ae32c8 100644 --- a/drivers/misc/mei/client.c +++ b/drivers/misc/mei/client.c @@ -322,6 +322,7 @@ void mei_io_cb_free(struct mei_cl_cb *cb) list_del(&cb->list); kfree(cb->buf.data); + kfree(cb->ext_hdr); kfree(cb); } @@ -401,6 +402,7 @@ static struct mei_cl_cb *mei_io_cb_init(struct mei_cl *cl, cb->buf_idx = 0; cb->fop_type = type; cb->vtag = 0; + cb->ext_hdr = NULL; return cb; } @@ -1740,6 +1742,17 @@ static inline u8 mei_ext_hdr_set_vtag(void *ext, u8 vtag) return vtag_hdr->hdr.length; } +static inline bool mei_ext_hdr_is_gsc(struct mei_ext_hdr *ext) +{ + return ext && ext->type == MEI_EXT_HDR_GSC; +} + +static inline u8 mei_ext_hdr_set_gsc(struct mei_ext_hdr *ext, struct mei_ext_hdr *gsc_hdr) +{ + memcpy(ext, gsc_hdr, mei_ext_hdr_len(gsc_hdr)); + return ext->length; +} + /** * mei_msg_hdr_init - allocate and initialize mei message header * @@ -1752,14 +1765,17 @@ static struct mei_msg_hdr *mei_msg_hdr_init(const struct mei_cl_cb *cb) size_t hdr_len; struct mei_ext_meta_hdr *meta; struct mei_msg_hdr *mei_hdr; - bool is_ext, is_vtag; + bool is_ext, is_hbm, is_gsc, is_vtag; + struct mei_ext_hdr *next_ext; if (!cb) return ERR_PTR(-EINVAL); /* Extended header for vtag is attached only on the first fragment */ is_vtag = (cb->vtag && cb->buf_idx == 0); - is_ext = is_vtag; + is_hbm = cb->cl->me_cl->client_id == 0; + is_gsc = ((!is_hbm) && cb->cl->dev->hbm_f_gsc_supported && mei_ext_hdr_is_gsc(cb->ext_hdr)); + is_ext = is_vtag || is_gsc; /* Compute extended header size */ hdr_len = sizeof(*mei_hdr); @@ -1771,6 +1787,9 @@ static struct mei_msg_hdr *mei_msg_hdr_init(const struct mei_cl_cb *cb) if (is_vtag) hdr_len += sizeof(struct mei_ext_hdr_vtag); + if (is_gsc) + hdr_len += mei_ext_hdr_len(cb->ext_hdr); + setup_hdr: mei_hdr = kzalloc(hdr_len, GFP_KERNEL); if (!mei_hdr) @@ -1785,10 +1804,20 @@ setup_hdr: goto out; meta = (struct mei_ext_meta_hdr *)mei_hdr->extension; + meta->size = 0; + next_ext = (struct mei_ext_hdr *)meta->hdrs; if (is_vtag) { meta->count++; - meta->size += mei_ext_hdr_set_vtag(meta->hdrs, cb->vtag); + meta->size += mei_ext_hdr_set_vtag(next_ext, cb->vtag); + next_ext = mei_ext_next(next_ext); + } + + if (is_gsc) { + meta->count++; + meta->size += mei_ext_hdr_set_gsc(next_ext, cb->ext_hdr); + next_ext = mei_ext_next(next_ext); } + out: mei_hdr->length = hdr_len - sizeof(*mei_hdr); return mei_hdr; @@ -1812,14 +1841,14 @@ int mei_cl_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb, struct mei_msg_hdr *mei_hdr = NULL; size_t hdr_len; size_t hbuf_len, dr_len; - size_t buf_len; + size_t buf_len = 0; size_t data_len; int hbuf_slots; u32 dr_slots; u32 dma_len; int rets; bool first_chunk; - const void *data; + const void *data = NULL; if (WARN_ON(!cl || !cl->dev)) return -ENODEV; @@ -1839,8 +1868,10 @@ int mei_cl_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb, return 0; } - buf_len = buf->size - cb->buf_idx; - data = buf->data + cb->buf_idx; + if (buf->data) { + buf_len = buf->size - cb->buf_idx; + data = buf->data + cb->buf_idx; + } hbuf_slots = mei_hbuf_empty_slots(dev); if (hbuf_slots < 0) { rets = -EOVERFLOW; @@ -1858,9 +1889,6 @@ int mei_cl_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb, goto err; } - cl_dbg(dev, cl, "Extended Header %d vtag = %d\n", - mei_hdr->extended, cb->vtag); - hdr_len = sizeof(*mei_hdr) + mei_hdr->length; /** @@ -1889,7 +1917,7 @@ int mei_cl_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb, } mei_hdr->length += data_len; - if (mei_hdr->dma_ring) + if (mei_hdr->dma_ring && buf->data) mei_dma_ring_write(dev, buf->data + cb->buf_idx, buf_len); rets = mei_write_message(dev, mei_hdr, hdr_len, data, data_len); @@ -1983,9 +2011,6 @@ ssize_t mei_cl_write(struct mei_cl *cl, struct mei_cl_cb *cb) goto err; } - cl_dbg(dev, cl, "Extended Header %d vtag = %d\n", - mei_hdr->extended, cb->vtag); - hdr_len = sizeof(*mei_hdr) + mei_hdr->length; if (rets == 0) { @@ -2030,7 +2055,7 @@ ssize_t mei_cl_write(struct mei_cl *cl, struct mei_cl_cb *cb) mei_hdr->length += data_len; - if (mei_hdr->dma_ring) + if (mei_hdr->dma_ring && buf->data) mei_dma_ring_write(dev, buf->data, buf_len); rets = mei_write_message(dev, mei_hdr, hdr_len, data, data_len); diff --git a/drivers/misc/mei/hbm.c b/drivers/misc/mei/hbm.c index de712cbf5d07..12a62a911e42 100644 --- a/drivers/misc/mei/hbm.c +++ b/drivers/misc/mei/hbm.c @@ -340,9 +340,13 @@ static int mei_hbm_capabilities_req(struct mei_device *dev) req.hbm_cmd = MEI_HBM_CAPABILITIES_REQ_CMD; if (dev->hbm_f_vt_supported) req.capability_requested[0] |= HBM_CAP_VT; + if (dev->hbm_f_cd_supported) req.capability_requested[0] |= HBM_CAP_CD; + if (dev->hbm_f_gsc_supported) + req.capability_requested[0] |= HBM_CAP_GSC; + ret = mei_hbm_write_message(dev, &mei_hdr, &req); if (ret) { dev_err(dev->dev, @@ -1200,6 +1204,12 @@ static void mei_hbm_config_features(struct mei_device *dev) dev->version.minor_version >= HBM_MINOR_VERSION_VT)) dev->hbm_f_vt_supported = 1; + /* GSC support */ + if (dev->version.major_version > HBM_MAJOR_VERSION_GSC || + (dev->version.major_version == HBM_MAJOR_VERSION_GSC && + dev->version.minor_version >= HBM_MINOR_VERSION_GSC)) + dev->hbm_f_gsc_supported = 1; + /* Capability message Support */ dev->hbm_f_cap_supported = 0; if (dev->version.major_version > HBM_MAJOR_VERSION_CAP || @@ -1367,6 +1377,9 @@ int mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr) if (!(capability_res->capability_granted[0] & HBM_CAP_CD)) dev->hbm_f_cd_supported = 0; + if (!(capability_res->capability_granted[0] & HBM_CAP_GSC)) + dev->hbm_f_gsc_supported = 0; + if (dev->hbm_f_dr_supported) { if (mei_dmam_ring_alloc(dev)) dev_info(dev->dev, "running w/o dma ring\n"); diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c index 9e2f781c6ed5..da4ef0b51954 100644 --- a/drivers/misc/mei/hw-me.c +++ b/drivers/misc/mei/hw-me.c @@ -590,9 +590,14 @@ static int mei_me_hbuf_write(struct mei_device *dev, u32 dw_cnt; int empty_slots; - if (WARN_ON(!hdr || !data || hdr_len & 0x3)) + if (WARN_ON(!hdr || hdr_len & 0x3)) return -EINVAL; + if (!data && data_len) { + dev_err(dev->dev, "wrong parameters null data with data_len = %zu\n", data_len); + return -EINVAL; + } + dev_dbg(dev->dev, MEI_HDR_FMT, MEI_HDR_PRM((struct mei_msg_hdr *)hdr)); empty_slots = mei_hbuf_empty_slots(dev); diff --git a/drivers/misc/mei/hw.h b/drivers/misc/mei/hw.h index e7e020dba6b1..319418ddf4fb 100644 --- a/drivers/misc/mei/hw.h +++ b/drivers/misc/mei/hw.h @@ -93,6 +93,12 @@ #define HBM_MAJOR_VERSION_VT 2 /* + * MEI version with GSC support + */ +#define HBM_MINOR_VERSION_GSC 2 +#define HBM_MAJOR_VERSION_GSC 2 + +/* * MEI version with capabilities message support */ #define HBM_MINOR_VERSION_CAP 2 @@ -229,18 +235,19 @@ enum mei_cl_disconnect_status { * * @MEI_EXT_HDR_NONE: sentinel * @MEI_EXT_HDR_VTAG: vtag header + * @MEI_EXT_HDR_GSC: gsc header */ enum mei_ext_hdr_type { MEI_EXT_HDR_NONE = 0, MEI_EXT_HDR_VTAG = 1, + MEI_EXT_HDR_GSC = 2, }; /** * struct mei_ext_hdr - extend header descriptor (TLV) * @type: enum mei_ext_hdr_type * @length: length excluding descriptor - * @ext_payload: payload of the specific extended header - * @hdr: place holder for actual header + * @data: the extended header payload */ struct mei_ext_hdr { u8 type; @@ -279,12 +286,11 @@ struct mei_ext_hdr_vtag { * Extended header iterator functions */ /** - * mei_ext_hdr - extended header iterator begin + * mei_ext_begin - extended header iterator begin * * @meta: meta header of the extended header list * - * Return: - * The first extended header + * Return: The first extended header */ static inline struct mei_ext_hdr *mei_ext_begin(struct mei_ext_meta_hdr *meta) { @@ -305,6 +311,60 @@ static inline bool mei_ext_last(struct mei_ext_meta_hdr *meta, return (u8 *)ext >= (u8 *)meta + sizeof(*meta) + (meta->size * 4); } +struct mei_gsc_sgl { + u32 low; + u32 high; + u32 length; +} __packed; + +#define GSC_HECI_MSG_KERNEL 0 +#define GSC_HECI_MSG_USER 1 + +#define GSC_ADDRESS_TYPE_GTT 0 +#define GSC_ADDRESS_TYPE_PPGTT 1 +#define GSC_ADDRESS_TYPE_PHYSICAL_CONTINUOUS 2 /* max of 64K */ +#define GSC_ADDRESS_TYPE_PHYSICAL_SGL 3 + +/** + * struct mei_ext_hdr_gsc_h2f - extended header: gsc host to firmware interface + * + * @hdr: extended header + * @client_id: GSC_HECI_MSG_KERNEL or GSC_HECI_MSG_USER + * @addr_type: GSC_ADDRESS_TYPE_{GTT, PPGTT, PHYSICAL_CONTINUOUS, PHYSICAL_SGL} + * @fence_id: synchronization marker + * @input_address_count: number of input sgl buffers + * @output_address_count: number of output sgl buffers + * @reserved: reserved + * @sgl: sg list + */ +struct mei_ext_hdr_gsc_h2f { + struct mei_ext_hdr hdr; + u8 client_id; + u8 addr_type; + u32 fence_id; + u8 input_address_count; + u8 output_address_count; + u8 reserved[2]; + struct mei_gsc_sgl sgl[]; +} __packed; + +/** + * struct mei_ext_hdr_gsc_f2h - gsc firmware to host interface + * + * @hdr: extended header + * @client_id: GSC_HECI_MSG_KERNEL or GSC_HECI_MSG_USER + * @reserved: reserved + * @fence_id: synchronization marker + * @written: number of bytes written to firmware + */ +struct mei_ext_hdr_gsc_f2h { + struct mei_ext_hdr hdr; + u8 client_id; + u8 reserved; + u32 fence_id; + u32 written; +} __packed; + /** * mei_ext_next - following extended header on the TLV list * @@ -321,6 +381,21 @@ static inline struct mei_ext_hdr *mei_ext_next(struct mei_ext_hdr *ext) } /** + * mei_ext_hdr_len - get ext header length in bytes + * + * @ext: extend header + * + * Return: extend header length in bytes + */ +static inline u32 mei_ext_hdr_len(const struct mei_ext_hdr *ext) +{ + if (!ext) + return 0; + + return ext->length * sizeof(u32); +} + +/** * struct mei_msg_hdr - MEI BUS Interface Section * * @me_addr: device address @@ -682,6 +757,10 @@ struct hbm_dma_ring_ctrl { /* virtual tag supported */ #define HBM_CAP_VT BIT(0) + +/* gsc extended header support */ +#define HBM_CAP_GSC BIT(1) + /* client dma supported */ #define HBM_CAP_CD BIT(2) diff --git a/drivers/misc/mei/interrupt.c b/drivers/misc/mei/interrupt.c index 0706322154cb..0a0e984e5673 100644 --- a/drivers/misc/mei/interrupt.c +++ b/drivers/misc/mei/interrupt.c @@ -98,9 +98,12 @@ static int mei_cl_irq_read_msg(struct mei_cl *cl, struct mei_device *dev = cl->dev; struct mei_cl_cb *cb; + struct mei_ext_hdr_vtag *vtag_hdr = NULL; + struct mei_ext_hdr_gsc_f2h *gsc_f2h = NULL; + size_t buf_sz; u32 length; - int ext_len; + u32 ext_len; length = mei_hdr->length; ext_len = 0; @@ -122,18 +125,24 @@ static int mei_cl_irq_read_msg(struct mei_cl *cl, } if (mei_hdr->extended) { - struct mei_ext_hdr *ext; - struct mei_ext_hdr_vtag *vtag_hdr = NULL; - - ext = mei_ext_begin(meta); + struct mei_ext_hdr *ext = mei_ext_begin(meta); do { switch (ext->type) { case MEI_EXT_HDR_VTAG: vtag_hdr = (struct mei_ext_hdr_vtag *)ext; break; + case MEI_EXT_HDR_GSC: + gsc_f2h = (struct mei_ext_hdr_gsc_f2h *)ext; + cb->ext_hdr = kzalloc(sizeof(*gsc_f2h), GFP_KERNEL); + if (!cb->ext_hdr) { + cb->status = -ENOMEM; + goto discard; + } + break; case MEI_EXT_HDR_NONE: fallthrough; default: + cl_err(dev, cl, "unknown extended header\n"); cb->status = -EPROTO; break; } @@ -141,12 +150,14 @@ static int mei_cl_irq_read_msg(struct mei_cl *cl, ext = mei_ext_next(ext); } while (!mei_ext_last(meta, ext)); - if (!vtag_hdr) { - cl_dbg(dev, cl, "vtag not found in extended header.\n"); + if (!vtag_hdr && !gsc_f2h) { + cl_dbg(dev, cl, "no vtag or gsc found in extended header.\n"); cb->status = -EPROTO; goto discard; } + } + if (vtag_hdr) { cl_dbg(dev, cl, "vtag: %d\n", vtag_hdr->vtag); if (cb->vtag && cb->vtag != vtag_hdr->vtag) { cl_err(dev, cl, "mismatched tag: %d != %d\n", @@ -157,6 +168,28 @@ static int mei_cl_irq_read_msg(struct mei_cl *cl, cb->vtag = vtag_hdr->vtag; } + if (gsc_f2h) { + u32 ext_hdr_len = mei_ext_hdr_len(&gsc_f2h->hdr); + + if (!dev->hbm_f_gsc_supported) { + cl_err(dev, cl, "gsc extended header is not supported\n"); + cb->status = -EPROTO; + goto discard; + } + + if (length) { + cl_err(dev, cl, "no data allowed in cb with gsc\n"); + cb->status = -EPROTO; + goto discard; + } + if (ext_hdr_len > sizeof(*gsc_f2h)) { + cl_err(dev, cl, "gsc extended header is too big %u\n", ext_hdr_len); + cb->status = -EPROTO; + goto discard; + } + memcpy(cb->ext_hdr, gsc_f2h, ext_hdr_len); + } + if (!mei_cl_is_connected(cl)) { cl_dbg(dev, cl, "not connected\n"); cb->status = -ENODEV; diff --git a/drivers/misc/mei/mei_dev.h b/drivers/misc/mei/mei_dev.h index 6bb3e1ba9ded..8d8018428d9d 100644 --- a/drivers/misc/mei/mei_dev.h +++ b/drivers/misc/mei/mei_dev.h @@ -116,12 +116,16 @@ enum mei_cb_file_ops { * @MEI_CL_IO_TX_INTERNAL: internal communication between driver and FW * * @MEI_CL_IO_RX_NONBLOCK: recv is non-blocking + * + * @MEI_CL_IO_SGL: send command with sgl list. */ enum mei_cl_io_mode { MEI_CL_IO_TX_BLOCKING = BIT(0), MEI_CL_IO_TX_INTERNAL = BIT(1), MEI_CL_IO_RX_NONBLOCK = BIT(2), + + MEI_CL_IO_SGL = BIT(3), }; /* @@ -206,6 +210,7 @@ struct mei_cl; * @status: io status of the cb * @internal: communication between driver and FW flag * @blocking: transmission blocking mode + * @ext_hdr: extended header */ struct mei_cl_cb { struct list_head list; @@ -218,6 +223,7 @@ struct mei_cl_cb { int status; u32 internal:1; u32 blocking:1; + struct mei_ext_hdr *ext_hdr; }; /** @@ -494,6 +500,7 @@ struct mei_dev_timeouts { * @hbm_f_vt_supported : hbm feature vtag supported * @hbm_f_cap_supported : hbm feature capabilities message supported * @hbm_f_cd_supported : hbm feature client dma supported + * @hbm_f_gsc_supported : hbm feature gsc supported * * @fw_ver : FW versions * @@ -585,6 +592,7 @@ struct mei_device { unsigned int hbm_f_vt_supported:1; unsigned int hbm_f_cap_supported:1; unsigned int hbm_f_cd_supported:1; + unsigned int hbm_f_gsc_supported:1; struct mei_fw_version fw_ver[MEI_MAX_FW_VER_BLOCKS]; diff --git a/drivers/misc/mei/pxp/mei_pxp.c b/drivers/misc/mei/pxp/mei_pxp.c index 5c39457e3f53..8dd09b1722eb 100644 --- a/drivers/misc/mei/pxp/mei_pxp.c +++ b/drivers/misc/mei/pxp/mei_pxp.c @@ -77,10 +77,35 @@ mei_pxp_receive_message(struct device *dev, void *buffer, size_t size) return byte; } +/** + * mei_pxp_gsc_command() - sends a gsc command, by sending + * a sgl mei message to gsc and receiving reply from gsc + * + * @dev: device corresponding to the mei_cl_device + * @client_id: client id to send the command to + * @fence_id: fence id to send the command to + * @sg_in: scatter gather list containing addresses for rx message buffer + * @total_in_len: total length of data in 'in' sg, can be less than the sum of buffers sizes + * @sg_out: scatter gather list containing addresses for tx message buffer + * + * Return: bytes sent on Success, <0 on Failure + */ +static ssize_t mei_pxp_gsc_command(struct device *dev, u8 client_id, u32 fence_id, + struct scatterlist *sg_in, size_t total_in_len, + struct scatterlist *sg_out) +{ + struct mei_cl_device *cldev; + + cldev = to_mei_cl_device(dev); + + return mei_cldev_send_gsc_command(cldev, client_id, fence_id, sg_in, total_in_len, sg_out); +} + static const struct i915_pxp_component_ops mei_pxp_ops = { .owner = THIS_MODULE, .send = mei_pxp_send_message, .recv = mei_pxp_receive_message, + .gsc_command = mei_pxp_gsc_command, }; static int mei_component_master_bind(struct device *dev) @@ -131,17 +156,24 @@ static int mei_pxp_component_match(struct device *dev, int subcomponent, { struct device *base = data; + if (!dev) + return 0; + if (!dev->driver || strcmp(dev->driver->name, "i915") || subcomponent != I915_COMPONENT_PXP) return 0; base = base->parent; - if (!base) + if (!base) /* mei device */ return 0; - base = base->parent; - dev = dev->parent; + base = base->parent; /* pci device */ + /* for dgfx */ + if (base && dev == base) + return 1; + /* for pch */ + dev = dev->parent; return (base && dev && dev == base); } |