diff options
author | Dave Airlie <airlied@redhat.com> | 2021-12-10 06:35:20 +0100 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2021-12-10 06:35:20 +0100 |
commit | 211b4dbc070090b4183d6f9db7dd3bd4e6170447 (patch) | |
tree | a1751e7ee3411672af2efadac635984c719e3042 /drivers/gpu/drm | |
parent | Merge tag 'drm-misc-next-2021-12-09' of git://anongit.freedesktop.org/drm/drm... (diff) | |
parent | drm/i915/gt: Use hw_engine_masks as reset_domains (diff) | |
download | linux-211b4dbc070090b4183d6f9db7dd3bd4e6170447.tar.xz linux-211b4dbc070090b4183d6f9db7dd3bd4e6170447.zip |
Merge tag 'drm-intel-gt-next-2021-12-09' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
Core Changes:
- Fix PENDING_ERROR leak in dma_fence_array_signaled() (Thomas Hellström)
Driver Changes:
- Fix runtime PM handling during PXP suspend (Tejas Upadhyay)
- Improve eviction performance on discrete by implementing async TTM moves (Thomas Hellström, Maarten Lankhorst)
- Improve robustness of error capture under memory pressure (Thomas Hellström)
- Fix GuC PMU versus GPU reset handling (Umesh Nerlige Ramappa)
- Use per device iommu check (Tvrtko Ursulin)
- Make error capture work with async migration (Thomas Hellström)
- Revert incorrect implementation of Wa_1508744258 causing hangs (José Roberto de Souza)
- Disable coarse power gating on some DG2 steppings workaround (Matt Roper)
- Add IC cache invalidation workaround on DG2 (Ramalingam C)
- Move two Icelake workarounds to the right place (Raviteja Goud Talla)
- Fix error pointer dereference in i915_gem_do_execbuffer() (Dan Carpenter)
- Fixup a couple of generic and DG2 specific issues in migration code (Matthew Auld)
- Fix kernel-doc warnings in i915_drm_object.c (Randy Dunlap)
- Drop stealing of bits from i915_sw_fence function pointer (Matthew Brost)
- Introduce new macros for i915 PTE (Michael Cheng)
- Prep work for engine reset by reset domain lookup (Tejas Upadhyay)
- Fixup drm-intel-gt-next build failure (Matthew Auld)
- Fix live_engine_busy_stats selftests in GuC mode (Umesh Nerlige Ramappa)
- Remove dma_resv_prune (Maarten Lankhorst)
- Preserve huge pages enablement after driver reload (Matthew Auld)
- Fix a NULL pointer dereference in igt_request_rewind() (selftests) (Zhou Qingyang)
- Add workaround numbers to GEN7_COMMON_SLICE_CHICKEN1 whitelisting (José Roberto de Souza)
- Increase timeouts in i915_gem_contexts selftests to handle GuC being slower (Bruce Chang)
Signed-off-by: Dave Airlie <airlied@redhat.com>
# Conflicts:
# drivers/gpu/drm/i915/display/intel_fbc.c
From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/YbIBOeqhn+nPzaYD@tursulin-mobl2
Diffstat (limited to 'drivers/gpu/drm')
110 files changed, 4603 insertions, 1354 deletions
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 11cfa443a33b..3b5857da4123 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -153,6 +153,7 @@ gem-y += \ gem/i915_gem_throttle.o \ gem/i915_gem_tiling.o \ gem/i915_gem_ttm.o \ + gem/i915_gem_ttm_move.o \ gem/i915_gem_ttm_pm.o \ gem/i915_gem_userptr.o \ gem/i915_gem_wait.o \ @@ -172,6 +173,7 @@ i915-y += \ i915_trace_points.o \ i915_ttm_buddy_manager.o \ i915_vma.o \ + i915_vma_snapshot.o \ intel_wopcm.o # general-purpose microcontroller (GuC) support diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index abec394f6869..2da4aacc956b 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -634,7 +634,7 @@ static unsigned int intel_bw_data_rate(struct drm_i915_private *dev_priv, for_each_pipe(dev_priv, pipe) data_rate += bw_state->data_rate[pipe]; - if (DISPLAY_VER(dev_priv) >= 13 && intel_vtd_active()) + if (DISPLAY_VER(dev_priv) >= 13 && intel_vtd_active(dev_priv)) data_rate = data_rate * 105 / 100; return data_rate; diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index b2d51cd79d6c..badf035efaeb 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -1293,7 +1293,7 @@ static bool needs_async_flip_vtd_wa(const struct intel_crtc_state *crtc_state) { struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev); - return crtc_state->uapi.async_flip && intel_vtd_active() && + return crtc_state->uapi.async_flip && intel_vtd_active(i915) && (DISPLAY_VER(i915) == 9 || IS_BROADWELL(i915) || IS_HASWELL(i915)); } @@ -8815,7 +8815,7 @@ static void intel_atomic_commit_work(struct work_struct *work) intel_atomic_commit_tail(state); } -static int __i915_sw_fence_call +static int intel_atomic_commit_ready(struct i915_sw_fence *fence, enum i915_sw_fence_notify notify) { diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index d0c34bc3af6c..614e8697c068 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -1677,7 +1677,7 @@ static int intel_sanitize_fbc_option(struct drm_i915_private *i915) static bool need_fbc_vtd_wa(struct drm_i915_private *i915) { /* WaFbcTurnOffFbcWhenHyperVisorIsUsed:skl,bxt */ - if (intel_vtd_active() && + if (intel_vtd_active(i915) && (IS_SKYLAKE(i915) || IS_BROXTON(i915))) { drm_info(&i915->drm, "Disabling framebuffer compression (FBC) to prevent screen flicker with VT-d enabled\n"); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c index f0435c6feb68..8a248003dfae 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c @@ -69,10 +69,16 @@ static struct clflush *clflush_work_create(struct drm_i915_gem_object *obj) bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, unsigned int flags) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); struct clflush *clflush; assert_object_held(obj); + if (IS_DGFX(i915)) { + WARN_ON_ONCE(obj->cache_dirty); + return false; + } + /* * Stolen memory is always coherent with the GPU as it is explicitly * marked as wc by the system, or the system is cache-coherent. @@ -105,16 +111,24 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, if (clflush) { i915_sw_fence_await_reservation(&clflush->base.chain, obj->base.resv, NULL, true, - i915_fence_timeout(to_i915(obj->base.dev)), + i915_fence_timeout(i915), I915_FENCE_GFP); dma_resv_add_excl_fence(obj->base.resv, &clflush->base.dma); dma_fence_work_commit(&clflush->base); + /* + * We must have successfully populated the pages(since we are + * holding a pin on the pages as per the flush worker) to reach + * this point, which must mean we have already done the required + * flush-on-acquire, hence resetting cache_dirty here should be + * safe. + */ + obj->cache_dirty = false; } else if (obj->mm.pages) { __do_clflush(obj); + obj->cache_dirty = false; } else { GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU); } - obj->cache_dirty = false; return true; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index ebd775cb1661..347dab952e90 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1001,7 +1001,7 @@ static void free_engines_rcu(struct rcu_head *rcu) free_engines(engines); } -static int __i915_sw_fence_call +static int engines_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) { struct i915_gem_engines *engines = diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index e8a58c997170..1b526039a60d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -248,8 +248,19 @@ static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) if (IS_ERR(pages)) return PTR_ERR(pages); - /* XXX: consider doing a vmap flush or something */ - if (!HAS_LLC(i915) || i915_gem_object_can_bypass_llc(obj)) + /* + * DG1 is special here since it still snoops transactions even with + * CACHE_NONE. This is not the case with other HAS_SNOOP platforms. We + * might need to revisit this as we add new discrete platforms. + * + * XXX: Consider doing a vmap flush or something, where possible. + * Currently we just do a heavy handed wbinvd_on_all_cpus() here since + * the underlying sg_table might not even point to struct pages, so we + * can't just call drm_clflush_sg or similar, like we do elsewhere in + * the driver. + */ + if (i915_gem_object_can_bypass_llc(obj) || + (!HAS_LLC(i915) && !IS_DG1(i915))) wbinvd_on_all_cpus(); sg_page_sizes = i915_sg_dma_sizes(pages->sgl); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index b684a62bf3b0..26532c07d467 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -18,10 +18,32 @@ static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); + + if (IS_DGFX(i915)) + return false; + return !(obj->cache_level == I915_CACHE_NONE || obj->cache_level == I915_CACHE_WT); } +bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + + if (obj->cache_dirty) + return false; + + if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) + return true; + + if (IS_DGFX(i915)) + return false; + + /* Currently in use by HW (display engine)? Keep flushed. */ + return i915_gem_object_is_framebuffer(obj); +} + static void flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 4d7da07442f2..60ee60f7bb09 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -29,6 +29,7 @@ #include "i915_gem_ioctls.h" #include "i915_trace.h" #include "i915_user_extensions.h" +#include "i915_vma_snapshot.h" struct eb_vma { struct i915_vma *vma; @@ -307,11 +308,15 @@ struct i915_execbuffer { struct eb_fence *fences; unsigned long num_fences; +#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) + struct i915_capture_list *capture_lists[MAX_ENGINE_INSTANCE + 1]; +#endif }; static int eb_parse(struct i915_execbuffer *eb); static int eb_pin_engine(struct i915_execbuffer *eb, bool throttle); static void eb_unpin_engine(struct i915_execbuffer *eb); +static void eb_capture_release(struct i915_execbuffer *eb); static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) { @@ -990,7 +995,7 @@ static int eb_validate_vmas(struct i915_execbuffer *eb) } if (!(ev->flags & EXEC_OBJECT_WRITE)) { - err = dma_resv_reserve_shared(vma->resv, 1); + err = dma_resv_reserve_shared(vma->obj->base.resv, 1); if (err) return err; } @@ -1043,6 +1048,7 @@ static void eb_release_vmas(struct i915_execbuffer *eb, bool final) i915_vma_put(vma); } + eb_capture_release(eb); eb_unpin_engine(eb); } @@ -1880,36 +1886,113 @@ eb_find_first_request_added(struct i915_execbuffer *eb) return NULL; } -static int eb_move_to_gpu(struct i915_execbuffer *eb) +#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) + +/* Stage with GFP_KERNEL allocations before we enter the signaling critical path */ +static void eb_capture_stage(struct i915_execbuffer *eb) { const unsigned int count = eb->buffer_count; - unsigned int i = count; - int err = 0, j; + unsigned int i = count, j; + struct i915_vma_snapshot *vsnap; while (i--) { struct eb_vma *ev = &eb->vma[i]; struct i915_vma *vma = ev->vma; unsigned int flags = ev->flags; - struct drm_i915_gem_object *obj = vma->obj; - assert_vma_held(vma); + if (!(flags & EXEC_OBJECT_CAPTURE)) + continue; - if (flags & EXEC_OBJECT_CAPTURE) { + vsnap = i915_vma_snapshot_alloc(GFP_KERNEL); + if (!vsnap) + continue; + + i915_vma_snapshot_init(vsnap, vma, "user"); + for_each_batch_create_order(eb, j) { struct i915_capture_list *capture; - for_each_batch_create_order(eb, j) { - if (!eb->requests[j]) - break; + capture = kmalloc(sizeof(*capture), GFP_KERNEL); + if (!capture) + continue; - capture = kmalloc(sizeof(*capture), GFP_KERNEL); - if (capture) { - capture->next = - eb->requests[j]->capture_list; - capture->vma = vma; - eb->requests[j]->capture_list = capture; - } - } + capture->next = eb->capture_lists[j]; + capture->vma_snapshot = i915_vma_snapshot_get(vsnap); + eb->capture_lists[j] = capture; + } + i915_vma_snapshot_put(vsnap); + } +} + +/* Commit once we're in the critical path */ +static void eb_capture_commit(struct i915_execbuffer *eb) +{ + unsigned int j; + + for_each_batch_create_order(eb, j) { + struct i915_request *rq = eb->requests[j]; + + if (!rq) + break; + + rq->capture_list = eb->capture_lists[j]; + eb->capture_lists[j] = NULL; + } +} + +/* + * Release anything that didn't get committed due to errors. + * The capture_list will otherwise be freed at request retire. + */ +static void eb_capture_release(struct i915_execbuffer *eb) +{ + unsigned int j; + + for_each_batch_create_order(eb, j) { + if (eb->capture_lists[j]) { + i915_request_free_capture_list(eb->capture_lists[j]); + eb->capture_lists[j] = NULL; } + } +} + +static void eb_capture_list_clear(struct i915_execbuffer *eb) +{ + memset(eb->capture_lists, 0, sizeof(eb->capture_lists)); +} + +#else + +static void eb_capture_stage(struct i915_execbuffer *eb) +{ +} + +static void eb_capture_commit(struct i915_execbuffer *eb) +{ +} + +static void eb_capture_release(struct i915_execbuffer *eb) +{ +} + +static void eb_capture_list_clear(struct i915_execbuffer *eb) +{ +} + +#endif + +static int eb_move_to_gpu(struct i915_execbuffer *eb) +{ + const unsigned int count = eb->buffer_count; + unsigned int i = count; + int err = 0, j; + + while (i--) { + struct eb_vma *ev = &eb->vma[i]; + struct i915_vma *vma = ev->vma; + unsigned int flags = ev->flags; + struct drm_i915_gem_object *obj = vma->obj; + + assert_vma_held(vma); /* * If the GPU is not _reading_ through the CPU cache, we need @@ -1990,6 +2073,8 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) /* Unconditionally flush any chipset caches (for streaming writes). */ intel_gt_chipset_flush(eb->gt); + eb_capture_commit(eb); + return 0; err_skip: @@ -2164,7 +2249,7 @@ static int eb_parse(struct i915_execbuffer *eb) goto err_trampoline; } - err = dma_resv_reserve_shared(shadow->resv, 1); + err = dma_resv_reserve_shared(shadow->obj->base.resv, 1); if (err) goto err_trampoline; @@ -3114,7 +3199,7 @@ eb_requests_create(struct i915_execbuffer *eb, struct dma_fence *in_fence, /* Allocate a request for this batch buffer nice and early. */ eb->requests[i] = i915_request_create(eb_find_context(eb, i)); if (IS_ERR(eb->requests[i])) { - out_fence = ERR_PTR(PTR_ERR(eb->requests[i])); + out_fence = ERR_CAST(eb->requests[i]); eb->requests[i] = NULL; return out_fence; } @@ -3132,13 +3217,14 @@ eb_requests_create(struct i915_execbuffer *eb, struct dma_fence *in_fence, } /* - * Whilst this request exists, batch_obj will be on the - * active_list, and so will hold the active reference. Only when - * this request is retired will the batch_obj be moved onto - * the inactive_list and lose its active reference. Hence we do - * not need to explicitly hold another reference here. + * Not really on stack, but we don't want to call + * kfree on the batch_snapshot when we put it, so use the + * _onstack interface. */ - eb->requests[i]->batch = eb->batches[i]->vma; + if (eb->batches[i]->vma) + i915_vma_snapshot_init_onstack(&eb->requests[i]->batch_snapshot, + eb->batches[i]->vma, + "batch"); if (eb->batch_pool) { GEM_BUG_ON(intel_context_is_parallel(eb->context)); intel_gt_buffer_pool_mark_active(eb->batch_pool, @@ -3187,6 +3273,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, eb.fences = NULL; eb.num_fences = 0; + eb_capture_list_clear(&eb); + memset(eb.requests, 0, sizeof(struct i915_request *) * ARRAY_SIZE(eb.requests)); eb.composite_fence = NULL; @@ -3273,10 +3361,12 @@ i915_gem_do_execbuffer(struct drm_device *dev, } ww_acquire_done(&eb.ww.ctx); + eb_capture_stage(&eb); out_fence = eb_requests_create(&eb, in_fence, out_fence_fd); if (IS_ERR(out_fence)) { err = PTR_ERR(out_fence); + out_fence = NULL; if (eb.requests[0]) goto err_request; else diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index a57a6b7013c2..c5150a1ee3d2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -145,24 +145,10 @@ static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = { .put_pages = i915_gem_object_put_pages_internal, }; -/** - * i915_gem_object_create_internal: create an object with volatile pages - * @i915: the i915 device - * @size: the size in bytes of backing storage to allocate for the object - * - * Creates a new object that wraps some internal memory for private use. - * This object is not backed by swappable storage, and as such its contents - * are volatile and only valid whilst pinned. If the object is reaped by the - * shrinker, its pages and data will be discarded. Equally, it is not a full - * GEM object and so not valid for access from userspace. This makes it useful - * for hardware interfaces like ringbuffers (which are pinned from the time - * the request is written to the time the hardware stops accessing it), but - * not for contexts (which need to be preserved when not active for later - * reuse). Note that it is not cleared upon allocation. - */ struct drm_i915_gem_object * -i915_gem_object_create_internal(struct drm_i915_private *i915, - phys_addr_t size) +__i915_gem_object_create_internal(struct drm_i915_private *i915, + const struct drm_i915_gem_object_ops *ops, + phys_addr_t size) { static struct lock_class_key lock_class; struct drm_i915_gem_object *obj; @@ -179,7 +165,7 @@ i915_gem_object_create_internal(struct drm_i915_private *i915, return ERR_PTR(-ENOMEM); drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, &i915_gem_object_internal_ops, &lock_class, 0); + i915_gem_object_init(obj, ops, &lock_class, 0); obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; /* @@ -199,3 +185,25 @@ i915_gem_object_create_internal(struct drm_i915_private *i915, return obj; } + +/** + * i915_gem_object_create_internal: create an object with volatile pages + * @i915: the i915 device + * @size: the size in bytes of backing storage to allocate for the object + * + * Creates a new object that wraps some internal memory for private use. + * This object is not backed by swappable storage, and as such its contents + * are volatile and only valid whilst pinned. If the object is reaped by the + * shrinker, its pages and data will be discarded. Equally, it is not a full + * GEM object and so not valid for access from userspace. This makes it useful + * for hardware interfaces like ringbuffers (which are pinned from the time + * the request is written to the time the hardware stops accessing it), but + * not for contexts (which need to be preserved when not active for later + * reuse). Note that it is not cleared upon allocation. + */ +struct drm_i915_gem_object * +i915_gem_object_create_internal(struct drm_i915_private *i915, + phys_addr_t size) +{ + return __i915_gem_object_create_internal(i915, &i915_gem_object_internal_ops, size); +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 1e426a42a36c..5fac9b560b73 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -31,6 +31,7 @@ #include "i915_gem_context.h" #include "i915_gem_mman.h" #include "i915_gem_object.h" +#include "i915_gem_ttm.h" #include "i915_memcpy.h" #include "i915_trace.h" @@ -91,7 +92,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, } /** - * i915_gem_object_fini - Clean up a GEM object initialization + * __i915_gem_object_fini - Clean up a GEM object initialization * @obj: The gem object to cleanup * * This function cleans up gem object fields that are set up by @@ -107,25 +108,29 @@ void __i915_gem_object_fini(struct drm_i915_gem_object *obj) } /** - * Mark up the object's coherency levels for a given cache_level + * i915_gem_object_set_cache_coherency - Mark up the object's coherency levels + * for a given cache_level * @obj: #drm_i915_gem_object * @cache_level: cache level */ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj, unsigned int cache_level) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); + obj->cache_level = cache_level; if (cache_level != I915_CACHE_NONE) obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ | I915_BO_CACHE_COHERENT_FOR_WRITE); - else if (HAS_LLC(to_i915(obj->base.dev))) + else if (HAS_LLC(i915)) obj->cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ; else obj->cache_coherent = 0; obj->cache_dirty = - !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE); + !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE) && + !IS_DGFX(i915); } bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj) @@ -364,15 +369,6 @@ static void i915_gem_free_object(struct drm_gem_object *gem_obj) atomic_inc(&i915->mm.free_count); /* - * This serializes freeing with the shrinker. Since the free - * is delayed, first by RCU then by the workqueue, we want the - * shrinker to be able to free pages of unreferenced objects, - * or else we may oom whilst there are plenty of deferred - * freed objects. - */ - i915_gem_object_make_unshrinkable(obj); - - /* * Since we require blocking on struct_mutex to unbind the freed * object from the GPU before releasing resources back to the * system, we can not do that directly from the RCU callback (which may @@ -456,7 +452,7 @@ i915_gem_object_read_from_page_iomap(struct drm_i915_gem_object *obj, u64 offset * from can't cross a page boundary. The caller must ensure that @obj pages * are pinned and that @obj is synced wrt. any related writes. * - * Returns 0 on success or -ENODEV if the type of @obj's backing store is + * Return: %0 on success or -ENODEV if the type of @obj's backing store is * unsupported. */ int i915_gem_object_read_from_page(struct drm_i915_gem_object *obj, u64 offset, void *dst, int size) @@ -732,6 +728,57 @@ static const struct drm_gem_object_funcs i915_gem_object_funcs = { .export = i915_gem_prime_export, }; +/** + * i915_gem_object_get_moving_fence - Get the object's moving fence if any + * @obj: The object whose moving fence to get. + * + * A non-signaled moving fence means that there is an async operation + * pending on the object that needs to be waited on before setting up + * any GPU- or CPU PTEs to the object's pages. + * + * Return: A refcounted pointer to the object's moving fence if any, + * NULL otherwise. + */ +struct dma_fence * +i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj) +{ + return dma_fence_get(i915_gem_to_ttm(obj)->moving); +} + +/** + * i915_gem_object_wait_moving_fence - Wait for the object's moving fence if any + * @obj: The object whose moving fence to wait for. + * @intr: Whether to wait interruptible. + * + * If the moving fence signaled without an error, it is detached from the + * object and put. + * + * Return: 0 if successful, -ERESTARTSYS if the wait was interrupted, + * negative error code if the async operation represented by the + * moving fence failed. + */ +int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj, + bool intr) +{ + struct dma_fence *fence = i915_gem_to_ttm(obj)->moving; + int ret; + + assert_object_held(obj); + if (!fence) + return 0; + + ret = dma_fence_wait(fence, intr); + if (ret) + return ret; + + if (fence->error) + return fence->error; + + i915_gem_to_ttm(obj)->moving = NULL; + dma_fence_put(fence); + return 0; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/huge_gem_object.c" #include "selftests/huge_pages.c" diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 59201801cec5..66f20b803b01 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -93,7 +93,6 @@ void i915_gem_flush_free_objects(struct drm_i915_private *i915); struct sg_table * __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj); -void i915_gem_object_truncate(struct drm_i915_gem_object *obj); /** * i915_gem_object_lookup_rcu - look up a temporary GEM object from its handle @@ -296,6 +295,12 @@ i915_gem_object_is_shrinkable(const struct drm_i915_gem_object *obj) } static inline bool +i915_gem_object_has_self_managed_shrink_list(const struct drm_i915_gem_object *obj) +{ + return i915_gem_object_type_has(obj, I915_GEM_OBJECT_SELF_MANAGED_SHRINK_LIST); +} + +static inline bool i915_gem_object_is_proxy(const struct drm_i915_gem_object *obj) { return i915_gem_object_type_has(obj, I915_GEM_OBJECT_IS_PROXY); @@ -449,7 +454,7 @@ i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj) } int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj); -void i915_gem_object_truncate(struct drm_i915_gem_object *obj); +int i915_gem_object_truncate(struct drm_i915_gem_object *obj); void i915_gem_object_writeback(struct drm_i915_gem_object *obj); /** @@ -512,11 +517,18 @@ i915_gem_object_finish_access(struct drm_i915_gem_object *obj) i915_gem_object_unpin_pages(obj); } +struct dma_fence * +i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj); + +int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj, + bool intr); + void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj, unsigned int cache_level); bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj); void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj); void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj); +bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj); int __must_check i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write); @@ -533,25 +545,15 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj); void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj); +void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj); +void __i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj); void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj); -static inline bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) -{ - if (obj->cache_dirty) - return false; - - if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) - return true; - - /* Currently in use by HW (display engine)? Keep flushed. */ - return i915_gem_object_is_framebuffer(obj); -} - static inline void __start_cpu_write(struct drm_i915_gem_object *obj) { obj->read_domains = I915_GEM_DOMAIN_CPU; obj->write_domain = I915_GEM_DOMAIN_CPU; - if (cpu_write_needs_clflush(obj)) + if (i915_gem_cpu_write_needs_clflush(obj)) obj->cache_dirty = true; } @@ -613,6 +615,14 @@ int i915_gem_object_wait_migration(struct drm_i915_gem_object *obj, bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj, enum intel_memory_type type); +int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st, + size_t size, struct intel_memory_region *mr, + struct address_space *mapping, + unsigned int max_segment); +void shmem_sg_free_table(struct sg_table *st, struct address_space *mapping, + bool dirty, bool backup); +void __shmem_writeback(size_t size, struct address_space *mapping); + #ifdef CONFIG_MMU_NOTIFIER static inline bool i915_gem_object_is_userptr(struct drm_i915_gem_object *obj) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index da85169006d4..f9f7e44099fe 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -34,9 +34,11 @@ struct i915_lut_handle { struct drm_i915_gem_object_ops { unsigned int flags; -#define I915_GEM_OBJECT_IS_SHRINKABLE BIT(1) -#define I915_GEM_OBJECT_IS_PROXY BIT(2) -#define I915_GEM_OBJECT_NO_MMAP BIT(3) +#define I915_GEM_OBJECT_IS_SHRINKABLE BIT(1) +/* Skip the shrinker management in set_pages/unset_pages */ +#define I915_GEM_OBJECT_SELF_MANAGED_SHRINK_LIST BIT(2) +#define I915_GEM_OBJECT_IS_PROXY BIT(3) +#define I915_GEM_OBJECT_NO_MMAP BIT(4) /* Interface between the GEM object and its backing storage. * get_pages() is called once prior to the use of the associated set @@ -54,8 +56,11 @@ struct drm_i915_gem_object_ops { int (*get_pages)(struct drm_i915_gem_object *obj); void (*put_pages)(struct drm_i915_gem_object *obj, struct sg_table *pages); - void (*truncate)(struct drm_i915_gem_object *obj); + int (*truncate)(struct drm_i915_gem_object *obj); void (*writeback)(struct drm_i915_gem_object *obj); + int (*shrinker_release_pages)(struct drm_i915_gem_object *obj, + bool no_gpu_wait, + bool should_writeback); int (*pread)(struct drm_i915_gem_object *obj, const struct drm_i915_gem_pread *arg); @@ -486,9 +491,37 @@ struct drm_i915_gem_object { * instead go through the pin/unpin interfaces. */ atomic_t pages_pin_count; + + /** + * @shrink_pin: Prevents the pages from being made visible to + * the shrinker, while the shrink_pin is non-zero. Most users + * should pretty much never have to care about this, outside of + * some special use cases. + * + * By default most objects will start out as visible to the + * shrinker(if I915_GEM_OBJECT_IS_SHRINKABLE) as soon as the + * backing pages are attached to the object, like in + * __i915_gem_object_set_pages(). They will then be removed the + * shrinker list once the pages are released. + * + * The @shrink_pin is incremented by calling + * i915_gem_object_make_unshrinkable(), which will also remove + * the object from the shrinker list, if the pin count was zero. + * + * Callers will then typically call + * i915_gem_object_make_shrinkable() or + * i915_gem_object_make_purgeable() to decrement the pin count, + * and make the pages visible again. + */ atomic_t shrink_pin; /** + * @ttm_shrinkable: True when the object is using shmem pages + * underneath. Protected by the object lock. + */ + bool ttm_shrinkable; + + /** * Priority list of potential placements for this object. */ struct intel_memory_region **placements; @@ -512,6 +545,7 @@ struct drm_i915_gem_object { */ struct list_head region_link; + struct i915_refct_sgt *rsgt; struct sg_table *pages; void *mapping; @@ -547,7 +581,7 @@ struct drm_i915_gem_object { struct i915_gem_object_page_iter get_dma_page; /** - * Element within i915->mm.unbound_list or i915->mm.bound_list, + * Element within i915->mm.shrink_list or i915->mm.purge_list, * locked by i915->mm.obj_lock. */ struct list_head link; @@ -565,7 +599,7 @@ struct drm_i915_gem_object { } mm; struct { - struct sg_table *cached_io_st; + struct i915_refct_sgt *cached_io_rsgt; struct i915_gem_object_page_iter get_io_page; struct drm_i915_gem_object *backup; bool created:1; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 8eb1c3a6fc9c..49c6e55c68ce 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -26,6 +26,7 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, /* Make the pages coherent with the GPU (flushing any swapin). */ if (obj->cache_dirty) { + WARN_ON_ONCE(IS_DGFX(i915)); obj->write_domain = 0; if (i915_gem_object_has_struct_page(obj)) drm_clflush_sg(pages); @@ -68,7 +69,7 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, shrinkable = false; } - if (shrinkable) { + if (shrinkable && !i915_gem_object_has_self_managed_shrink_list(obj)) { struct list_head *list; unsigned long flags; @@ -158,11 +159,13 @@ retry: } /* Immediately discard the backing storage */ -void i915_gem_object_truncate(struct drm_i915_gem_object *obj) +int i915_gem_object_truncate(struct drm_i915_gem_object *obj) { drm_gem_free_mmap_offset(&obj->base); if (obj->ops->truncate) - obj->ops->truncate(obj); + return obj->ops->truncate(obj); + + return 0; } /* Try to discard unwanted pages */ @@ -208,7 +211,8 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) if (i915_gem_object_is_volatile(obj)) obj->mm.madv = I915_MADV_WILLNEED; - i915_gem_object_make_unshrinkable(obj); + if (!i915_gem_object_has_self_managed_shrink_list(obj)) + i915_gem_object_make_unshrinkable(obj); if (obj->mm.mapping) { unmap_object(obj, page_mask_bits(obj->mm.mapping)); @@ -414,6 +418,12 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, } if (!ptr) { + err = i915_gem_object_wait_moving_fence(obj, true); + if (err) { + ptr = ERR_PTR(err); + goto err_unpin; + } + if (GEM_WARN_ON(type == I915_MAP_WC && !static_cpu_has(X86_FEATURE_PAT))) ptr = ERR_PTR(-ENODEV); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c index a016ccec36f3..a4350227e9ae 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_region.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c @@ -11,7 +11,7 @@ void i915_gem_object_init_memory_region(struct drm_i915_gem_object *obj, struct intel_memory_region *mem) { - obj->mm.region = intel_memory_region_get(mem); + obj->mm.region = mem; mutex_lock(&mem->objects.lock); list_add(&obj->mm.region_link, &mem->objects.list); @@ -25,8 +25,6 @@ void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj) mutex_lock(&mem->objects.lock); list_del(&obj->mm.region_link); mutex_unlock(&mem->objects.lock); - - intel_memory_region_put(mem); } struct drm_i915_gem_object * diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index d77da59fae04..cc9fe258fba7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -25,62 +25,67 @@ static void check_release_pagevec(struct pagevec *pvec) cond_resched(); } -static int shmem_get_pages(struct drm_i915_gem_object *obj) +void shmem_sg_free_table(struct sg_table *st, struct address_space *mapping, + bool dirty, bool backup) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct intel_memory_region *mem = obj->mm.region; - const unsigned long page_count = obj->base.size / PAGE_SIZE; + struct sgt_iter sgt_iter; + struct pagevec pvec; + struct page *page; + + mapping_clear_unevictable(mapping); + + pagevec_init(&pvec); + for_each_sgt_page(page, sgt_iter, st) { + if (dirty) + set_page_dirty(page); + + if (backup) + mark_page_accessed(page); + + if (!pagevec_add(&pvec, page)) + check_release_pagevec(&pvec); + } + if (pagevec_count(&pvec)) + check_release_pagevec(&pvec); + + sg_free_table(st); +} + +int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st, + size_t size, struct intel_memory_region *mr, + struct address_space *mapping, + unsigned int max_segment) +{ + const unsigned long page_count = size / PAGE_SIZE; unsigned long i; - struct address_space *mapping; - struct sg_table *st; struct scatterlist *sg; - struct sgt_iter sgt_iter; struct page *page; unsigned long last_pfn = 0; /* suppress gcc warning */ - unsigned int max_segment = i915_sg_segment_size(); - unsigned int sg_page_sizes; gfp_t noreclaim; int ret; /* - * Assert that the object is not currently in any GPU domain. As it - * wasn't in the GTT, there shouldn't be any way it could have been in - * a GPU cache - */ - GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS); - GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS); - - /* * If there's no chance of allocating enough pages for the whole * object, bail early. */ - if (obj->base.size > resource_size(&mem->region)) + if (size > resource_size(&mr->region)) return -ENOMEM; - st = kmalloc(sizeof(*st), GFP_KERNEL); - if (!st) + if (sg_alloc_table(st, page_count, GFP_KERNEL)) return -ENOMEM; -rebuild_st: - if (sg_alloc_table(st, page_count, GFP_KERNEL)) { - kfree(st); - return -ENOMEM; - } - /* * Get the list of pages out of our struct file. They'll be pinned * at this point until we release them. * * Fail silently without starting the shrinker */ - mapping = obj->base.filp->f_mapping; mapping_set_unevictable(mapping); noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM); noreclaim |= __GFP_NORETRY | __GFP_NOWARN; sg = st->sgl; st->nents = 0; - sg_page_sizes = 0; for (i = 0; i < page_count; i++) { const unsigned int shrink[] = { I915_SHRINK_BOUND | I915_SHRINK_UNBOUND, @@ -135,10 +140,9 @@ rebuild_st: if (!i || sg->length >= max_segment || page_to_pfn(page) != last_pfn + 1) { - if (i) { - sg_page_sizes |= sg->length; + if (i) sg = sg_next(sg); - } + st->nents++; sg_set_page(sg, page, PAGE_SIZE, 0); } else { @@ -149,14 +153,67 @@ rebuild_st: /* Check that the i965g/gm workaround works. */ GEM_BUG_ON(gfp & __GFP_DMA32 && last_pfn >= 0x00100000UL); } - if (sg) { /* loop terminated early; short sg table */ - sg_page_sizes |= sg->length; + if (sg) /* loop terminated early; short sg table */ sg_mark_end(sg); - } /* Trim unused sg entries to avoid wasting memory. */ i915_sg_trim(st); + return 0; +err_sg: + sg_mark_end(sg); + if (sg != st->sgl) { + shmem_sg_free_table(st, mapping, false, false); + } else { + mapping_clear_unevictable(mapping); + sg_free_table(st); + } + + /* + * shmemfs first checks if there is enough memory to allocate the page + * and reports ENOSPC should there be insufficient, along with the usual + * ENOMEM for a genuine allocation failure. + * + * We use ENOSPC in our driver to mean that we have run out of aperture + * space and so want to translate the error from shmemfs back to our + * usual understanding of ENOMEM. + */ + if (ret == -ENOSPC) + ret = -ENOMEM; + + return ret; +} + +static int shmem_get_pages(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct intel_memory_region *mem = obj->mm.region; + struct address_space *mapping = obj->base.filp->f_mapping; + const unsigned long page_count = obj->base.size / PAGE_SIZE; + unsigned int max_segment = i915_sg_segment_size(); + struct sg_table *st; + struct sgt_iter sgt_iter; + struct page *page; + int ret; + + /* + * Assert that the object is not currently in any GPU domain. As it + * wasn't in the GTT, there shouldn't be any way it could have been in + * a GPU cache + */ + GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS); + GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS); + +rebuild_st: + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (!st) + return -ENOMEM; + + ret = shmem_sg_alloc_table(i915, st, obj->base.size, mem, mapping, + max_segment); + if (ret) + goto err_st; + ret = i915_gem_gtt_prepare_pages(obj, st); if (ret) { /* @@ -168,6 +225,7 @@ rebuild_st: for_each_sgt_page(page, sgt_iter, st) put_page(page); sg_free_table(st); + kfree(st); max_segment = PAGE_SIZE; goto rebuild_st; @@ -185,28 +243,12 @@ rebuild_st: if (i915_gem_object_can_bypass_llc(obj)) obj->cache_dirty = true; - __i915_gem_object_set_pages(obj, st, sg_page_sizes); + __i915_gem_object_set_pages(obj, st, i915_sg_dma_sizes(st->sgl)); return 0; -err_sg: - sg_mark_end(sg); err_pages: - mapping_clear_unevictable(mapping); - if (sg != st->sgl) { - struct pagevec pvec; - - pagevec_init(&pvec); - for_each_sgt_page(page, sgt_iter, st) { - if (!pagevec_add(&pvec, page)) - check_release_pagevec(&pvec); - } - if (pagevec_count(&pvec)) - check_release_pagevec(&pvec); - } - sg_free_table(st); - kfree(st); - + shmem_sg_free_table(st, mapping, false, false); /* * shmemfs first checks if there is enough memory to allocate the page * and reports ENOSPC should there be insufficient, along with the usual @@ -216,13 +258,16 @@ err_pages: * space and so want to translate the error from shmemfs back to our * usual understanding of ENOMEM. */ +err_st: if (ret == -ENOSPC) ret = -ENOMEM; + kfree(st); + return ret; } -static void +static int shmem_truncate(struct drm_i915_gem_object *obj) { /* @@ -234,12 +279,12 @@ shmem_truncate(struct drm_i915_gem_object *obj) shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1); obj->mm.madv = __I915_MADV_PURGED; obj->mm.pages = ERR_PTR(-EFAULT); + + return 0; } -static void -shmem_writeback(struct drm_i915_gem_object *obj) +void __shmem_writeback(size_t size, struct address_space *mapping) { - struct address_space *mapping; struct writeback_control wbc = { .sync_mode = WB_SYNC_NONE, .nr_to_write = SWAP_CLUSTER_MAX, @@ -255,10 +300,9 @@ shmem_writeback(struct drm_i915_gem_object *obj) * instead of invoking writeback so they are aged and paged out * as normal. */ - mapping = obj->base.filp->f_mapping; /* Begin writeback on each dirty page */ - for (i = 0; i < obj->base.size >> PAGE_SHIFT; i++) { + for (i = 0; i < size >> PAGE_SHIFT; i++) { struct page *page; page = find_lock_page(mapping, i); @@ -281,6 +325,12 @@ put: } } +static void +shmem_writeback(struct drm_i915_gem_object *obj) +{ + __shmem_writeback(obj->base.size, obj->base.filp->f_mapping); +} + void __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, struct sg_table *pages, @@ -313,11 +363,6 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_table *pages) { - struct sgt_iter sgt_iter; - struct pagevec pvec; - struct page *page; - - GEM_WARN_ON(IS_DGFX(to_i915(obj->base.dev))); __i915_gem_object_release_shmem(obj, pages, true); i915_gem_gtt_finish_pages(obj, pages); @@ -325,25 +370,10 @@ void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_ if (i915_gem_object_needs_bit17_swizzle(obj)) i915_gem_object_save_bit_17_swizzle(obj, pages); - mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping); - - pagevec_init(&pvec); - for_each_sgt_page(page, sgt_iter, pages) { - if (obj->mm.dirty) - set_page_dirty(page); - - if (obj->mm.madv == I915_MADV_WILLNEED) - mark_page_accessed(page); - - if (!pagevec_add(&pvec, page)) - check_release_pagevec(&pvec); - } - if (pagevec_count(&pvec)) - check_release_pagevec(&pvec); - obj->mm.dirty = false; - - sg_free_table(pages); + shmem_sg_free_table(pages, file_inode(obj->base.filp)->i_mapping, + obj->mm.dirty, obj->mm.madv == I915_MADV_WILLNEED); kfree(pages); + obj->mm.dirty = false; } static void @@ -634,9 +664,10 @@ static int init_shmem(struct intel_memory_region *mem) return 0; /* Don't error, we can simply fallback to the kernel mnt */ } -static void release_shmem(struct intel_memory_region *mem) +static int release_shmem(struct intel_memory_region *mem) { i915_gemfs_fini(mem->i915); + return 0; } static const struct intel_memory_region_ops shmem_region_ops = { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index af3eb7fd951d..157a9765f483 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -55,19 +55,25 @@ static bool unsafe_drop_pages(struct drm_i915_gem_object *obj, return false; } -static void try_to_writeback(struct drm_i915_gem_object *obj, - unsigned int flags) +static int try_to_writeback(struct drm_i915_gem_object *obj, unsigned int flags) { + if (obj->ops->shrinker_release_pages) + return obj->ops->shrinker_release_pages(obj, + !(flags & I915_SHRINK_ACTIVE), + flags & I915_SHRINK_WRITEBACK); + switch (obj->mm.madv) { case I915_MADV_DONTNEED: i915_gem_object_truncate(obj); - return; + return 0; case __I915_MADV_PURGED: - return; + return 0; } if (flags & I915_SHRINK_WRITEBACK) i915_gem_object_writeback(obj); + + return 0; } /** @@ -221,8 +227,8 @@ i915_gem_shrink(struct i915_gem_ww_ctx *ww, } if (!__i915_gem_object_put_pages(obj)) { - try_to_writeback(obj, shrink); - count += obj->base.size >> PAGE_SHIFT; + if (!try_to_writeback(obj, shrink)) + count += obj->base.size >> PAGE_SHIFT; } if (!ww) i915_gem_object_unlock(obj); @@ -455,6 +461,16 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, #define obj_to_i915(obj__) to_i915((obj__)->base.dev) +/** + * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By + * default all object types that support shrinking(see IS_SHRINKABLE), will also + * make the object visible to the shrinker after allocating the system memory + * pages. + * @obj: The GEM object. + * + * This is typically used for special kernel internal objects that can't be + * easily processed by the shrinker, like if they are perma-pinned. + */ void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = obj_to_i915(obj); @@ -479,13 +495,12 @@ void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj) spin_unlock_irqrestore(&i915->mm.obj_lock, flags); } -static void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj, - struct list_head *head) +static void ___i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj, + struct list_head *head) { struct drm_i915_private *i915 = obj_to_i915(obj); unsigned long flags; - GEM_BUG_ON(!i915_gem_object_has_pages(obj)); if (!i915_gem_object_is_shrinkable(obj)) return; @@ -505,14 +520,67 @@ static void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj, spin_unlock_irqrestore(&i915->mm.obj_lock, flags); } +/** + * __i915_gem_object_make_shrinkable - Move the object to the tail of the + * shrinkable list. Objects on this list might be swapped out. Used with + * WILLNEED objects. + * @obj: The GEM object. + * + * DO NOT USE. This is intended to be called on very special objects that don't + * yet have mm.pages, but are guaranteed to have potentially reclaimable pages + * underneath. + */ +void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj) +{ + ___i915_gem_object_make_shrinkable(obj, + &obj_to_i915(obj)->mm.shrink_list); +} + +/** + * __i915_gem_object_make_purgeable - Move the object to the tail of the + * purgeable list. Objects on this list might be swapped out. Used with + * DONTNEED objects. + * @obj: The GEM object. + * + * DO NOT USE. This is intended to be called on very special objects that don't + * yet have mm.pages, but are guaranteed to have potentially reclaimable pages + * underneath. + */ +void __i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj) +{ + ___i915_gem_object_make_shrinkable(obj, + &obj_to_i915(obj)->mm.purge_list); +} + +/** + * i915_gem_object_make_shrinkable - Move the object to the tail of the + * shrinkable list. Objects on this list might be swapped out. Used with + * WILLNEED objects. + * @obj: The GEM object. + * + * MUST only be called on objects which have backing pages. + * + * MUST be balanced with previous call to i915_gem_object_make_unshrinkable(). + */ void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj) { - __i915_gem_object_make_shrinkable(obj, - &obj_to_i915(obj)->mm.shrink_list); + GEM_BUG_ON(!i915_gem_object_has_pages(obj)); + __i915_gem_object_make_shrinkable(obj); } +/** + * i915_gem_object_make_purgeable - Move the object to the tail of the purgeable + * list. Used with DONTNEED objects. Unlike with shrinkable objects, the + * shrinker will attempt to discard the backing pages, instead of trying to swap + * them out. + * @obj: The GEM object. + * + * MUST only be called on objects which have backing pages. + * + * MUST be balanced with previous call to i915_gem_object_make_unshrinkable(). + */ void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj) { - __i915_gem_object_make_shrinkable(obj, - &obj_to_i915(obj)->mm.purge_list); + GEM_BUG_ON(!i915_gem_object_has_pages(obj)); + __i915_gem_object_make_purgeable(obj); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index ddd37ccb1362..bce03d74a0b4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -399,7 +399,7 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem) return 0; } - if (intel_vtd_active() && GRAPHICS_VER(i915) < 8) { + if (intel_vtd_active(i915) && GRAPHICS_VER(i915) < 8) { drm_notice(&i915->drm, "%s, disabling use of stolen memory\n", "DMAR active"); @@ -720,9 +720,10 @@ static int init_stolen_smem(struct intel_memory_region *mem) return i915_gem_init_stolen(mem); } -static void release_stolen_smem(struct intel_memory_region *mem) +static int release_stolen_smem(struct intel_memory_region *mem) { i915_gem_cleanup_stolen(mem->i915); + return 0; } static const struct intel_memory_region_ops i915_region_stolen_smem_ops = { @@ -759,10 +760,11 @@ err_fini: return err; } -static void release_stolen_lmem(struct intel_memory_region *mem) +static int release_stolen_lmem(struct intel_memory_region *mem) { io_mapping_fini(&mem->iomap); i915_gem_cleanup_stolen(mem->i915); + return 0; } static const struct intel_memory_region_ops i915_region_stolen_lmem_ops = { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 74a1ffd0d7dd..218a9b3037c7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -14,13 +14,9 @@ #include "gem/i915_gem_object.h" #include "gem/i915_gem_region.h" #include "gem/i915_gem_ttm.h" +#include "gem/i915_gem_ttm_move.h" #include "gem/i915_gem_ttm_pm.h" - -#include "gt/intel_engine_pm.h" -#include "gt/intel_gt.h" -#include "gt/intel_migrate.h" - #define I915_TTM_PRIO_PURGE 0 #define I915_TTM_PRIO_NO_PAGES 1 #define I915_TTM_PRIO_HAS_PAGES 2 @@ -34,7 +30,9 @@ * struct i915_ttm_tt - TTM page vector with additional private information * @ttm: The base TTM page vector. * @dev: The struct device used for dma mapping and unmapping. - * @cached_st: The cached scatter-gather table. + * @cached_rsgt: The cached scatter-gather table. + * @is_shmem: Set if using shmem. + * @filp: The shmem file, if using shmem backend. * * Note that DMA may be going on right up to the point where the page- * vector is unpopulated in delayed destroy. Hence keep the @@ -45,7 +43,10 @@ struct i915_ttm_tt { struct ttm_tt ttm; struct device *dev; - struct sg_table *cached_st; + struct i915_refct_sgt cached_rsgt; + + bool is_shmem; + struct file *filp; }; static const struct ttm_place sys_placement_flags = { @@ -103,37 +104,15 @@ static int i915_ttm_err_to_gem(int err) return err; } -static bool gpu_binds_iomem(struct ttm_resource *mem) -{ - return mem->mem_type != TTM_PL_SYSTEM; -} - -static bool cpu_maps_iomem(struct ttm_resource *mem) -{ - /* Once / if we support GGTT, this is also false for cached ttm_tts */ - return mem->mem_type != TTM_PL_SYSTEM; -} - -static enum i915_cache_level -i915_ttm_cache_level(struct drm_i915_private *i915, struct ttm_resource *res, - struct ttm_tt *ttm) -{ - return ((HAS_LLC(i915) || HAS_SNOOP(i915)) && !gpu_binds_iomem(res) && - ttm->caching == ttm_cached) ? I915_CACHE_LLC : - I915_CACHE_NONE; -} - -static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj); - static enum ttm_caching i915_ttm_select_tt_caching(const struct drm_i915_gem_object *obj) { /* - * Objects only allowed in system get cached cpu-mappings. - * Other objects get WC mapping for now. Even if in system. + * Objects only allowed in system get cached cpu-mappings, or when + * evicting lmem-only buffers to system for swapping. Other objects get + * WC mapping for now. Even if in system. */ - if (obj->mm.region->type == INTEL_MEMORY_SYSTEM && - obj->mm.n_placements <= 1) + if (obj->mm.n_placements <= 1) return ttm_cached; return ttm_write_combined; @@ -179,15 +158,103 @@ i915_ttm_placement_from_obj(const struct drm_i915_gem_object *obj, placement->busy_placement = busy; } +static int i915_ttm_tt_shmem_populate(struct ttm_device *bdev, + struct ttm_tt *ttm, + struct ttm_operation_ctx *ctx) +{ + struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev); + struct intel_memory_region *mr = i915->mm.regions[INTEL_MEMORY_SYSTEM]; + struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm); + const unsigned int max_segment = i915_sg_segment_size(); + const size_t size = ttm->num_pages << PAGE_SHIFT; + struct file *filp = i915_tt->filp; + struct sgt_iter sgt_iter; + struct sg_table *st; + struct page *page; + unsigned long i; + int err; + + if (!filp) { + struct address_space *mapping; + gfp_t mask; + + filp = shmem_file_setup("i915-shmem-tt", size, VM_NORESERVE); + if (IS_ERR(filp)) + return PTR_ERR(filp); + + mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; + + mapping = filp->f_mapping; + mapping_set_gfp_mask(mapping, mask); + GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM)); + + i915_tt->filp = filp; + } + + st = &i915_tt->cached_rsgt.table; + err = shmem_sg_alloc_table(i915, st, size, mr, filp->f_mapping, + max_segment); + if (err) + return err; + + err = dma_map_sgtable(i915_tt->dev, st, DMA_BIDIRECTIONAL, + DMA_ATTR_SKIP_CPU_SYNC); + if (err) + goto err_free_st; + + i = 0; + for_each_sgt_page(page, sgt_iter, st) + ttm->pages[i++] = page; + + if (ttm->page_flags & TTM_TT_FLAG_SWAPPED) + ttm->page_flags &= ~TTM_TT_FLAG_SWAPPED; + + return 0; + +err_free_st: + shmem_sg_free_table(st, filp->f_mapping, false, false); + + return err; +} + +static void i915_ttm_tt_shmem_unpopulate(struct ttm_tt *ttm) +{ + struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm); + bool backup = ttm->page_flags & TTM_TT_FLAG_SWAPPED; + struct sg_table *st = &i915_tt->cached_rsgt.table; + + shmem_sg_free_table(st, file_inode(i915_tt->filp)->i_mapping, + backup, backup); +} + +static void i915_ttm_tt_release(struct kref *ref) +{ + struct i915_ttm_tt *i915_tt = + container_of(ref, typeof(*i915_tt), cached_rsgt.kref); + struct sg_table *st = &i915_tt->cached_rsgt.table; + + GEM_WARN_ON(st->sgl); + + kfree(i915_tt); +} + +static const struct i915_refct_sgt_ops tt_rsgt_ops = { + .release = i915_ttm_tt_release +}; + static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo, uint32_t page_flags) { struct ttm_resource_manager *man = ttm_manager_type(bo->bdev, bo->resource->mem_type); struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + enum ttm_caching caching; struct i915_ttm_tt *i915_tt; int ret; + if (!obj) + return NULL; + i915_tt = kzalloc(sizeof(*i915_tt), GFP_KERNEL); if (!i915_tt) return NULL; @@ -196,38 +263,66 @@ static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo, man->use_tt) page_flags |= TTM_TT_FLAG_ZERO_ALLOC; - ret = ttm_tt_init(&i915_tt->ttm, bo, page_flags, - i915_ttm_select_tt_caching(obj)); - if (ret) { - kfree(i915_tt); - return NULL; + caching = i915_ttm_select_tt_caching(obj); + if (i915_gem_object_is_shrinkable(obj) && caching == ttm_cached) { + page_flags |= TTM_TT_FLAG_EXTERNAL | + TTM_TT_FLAG_EXTERNAL_MAPPABLE; + i915_tt->is_shmem = true; } + ret = ttm_tt_init(&i915_tt->ttm, bo, page_flags, caching); + if (ret) + goto err_free; + + __i915_refct_sgt_init(&i915_tt->cached_rsgt, bo->base.size, + &tt_rsgt_ops); + i915_tt->dev = obj->base.dev->dev; return &i915_tt->ttm; + +err_free: + kfree(i915_tt); + return NULL; +} + +static int i915_ttm_tt_populate(struct ttm_device *bdev, + struct ttm_tt *ttm, + struct ttm_operation_ctx *ctx) +{ + struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm); + + if (i915_tt->is_shmem) + return i915_ttm_tt_shmem_populate(bdev, ttm, ctx); + + return ttm_pool_alloc(&bdev->pool, ttm, ctx); } static void i915_ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm) { struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm); + struct sg_table *st = &i915_tt->cached_rsgt.table; - if (i915_tt->cached_st) { - dma_unmap_sgtable(i915_tt->dev, i915_tt->cached_st, - DMA_BIDIRECTIONAL, 0); - sg_free_table(i915_tt->cached_st); - kfree(i915_tt->cached_st); - i915_tt->cached_st = NULL; + if (st->sgl) + dma_unmap_sgtable(i915_tt->dev, st, DMA_BIDIRECTIONAL, 0); + + if (i915_tt->is_shmem) { + i915_ttm_tt_shmem_unpopulate(ttm); + } else { + sg_free_table(st); + ttm_pool_free(&bdev->pool, ttm); } - ttm_pool_free(&bdev->pool, ttm); } static void i915_ttm_tt_destroy(struct ttm_device *bdev, struct ttm_tt *ttm) { struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm); + if (i915_tt->filp) + fput(i915_tt->filp); + ttm_tt_fini(ttm); - kfree(i915_tt); + i915_refct_sgt_put(&i915_tt->cached_rsgt); } static bool i915_ttm_eviction_valuable(struct ttm_buffer_object *bo, @@ -235,6 +330,17 @@ static bool i915_ttm_eviction_valuable(struct ttm_buffer_object *bo, { struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + if (!obj) + return false; + + /* + * EXTERNAL objects should never be swapped out by TTM, instead we need + * to handle that ourselves. TTM will already skip such objects for us, + * but we would like to avoid grabbing locks for no good reason. + */ + if (bo->ttm && bo->ttm->page_flags & TTM_TT_FLAG_EXTERNAL) + return false; + /* Will do for now. Our pinned objects are still on TTM's LRU lists */ return i915_gem_object_evictable(obj); } @@ -245,28 +351,19 @@ static void i915_ttm_evict_flags(struct ttm_buffer_object *bo, *placement = i915_sys_placement; } -static int i915_ttm_move_notify(struct ttm_buffer_object *bo) -{ - struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); - int ret; - - ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE); - if (ret) - return ret; - - ret = __i915_gem_object_put_pages(obj); - if (ret) - return ret; - - return 0; -} - -static void i915_ttm_free_cached_io_st(struct drm_i915_gem_object *obj) +/** + * i915_ttm_free_cached_io_rsgt - Free object cached LMEM information + * @obj: The GEM object + * This function frees any LMEM-related information that is cached on + * the object. For example the radix tree for fast page lookup and the + * cached refcounted sg-table + */ +void i915_ttm_free_cached_io_rsgt(struct drm_i915_gem_object *obj) { struct radix_tree_iter iter; void __rcu **slot; - if (!obj->ttm.cached_io_st) + if (!obj->ttm.cached_io_rsgt) return; rcu_read_lock(); @@ -274,93 +371,106 @@ static void i915_ttm_free_cached_io_st(struct drm_i915_gem_object *obj) radix_tree_delete(&obj->ttm.get_io_page.radix, iter.index); rcu_read_unlock(); - sg_free_table(obj->ttm.cached_io_st); - kfree(obj->ttm.cached_io_st); - obj->ttm.cached_io_st = NULL; + i915_refct_sgt_put(obj->ttm.cached_io_rsgt); + obj->ttm.cached_io_rsgt = NULL; } -static void -i915_ttm_adjust_domains_after_move(struct drm_i915_gem_object *obj) +/** + * i915_ttm_purge - Clear an object of its memory + * @obj: The object + * + * This function is called to clear an object of it's memory when it is + * marked as not needed anymore. + * + * Return: 0 on success, negative error code on failure. + */ +int i915_ttm_purge(struct drm_i915_gem_object *obj) { struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + struct i915_ttm_tt *i915_tt = + container_of(bo->ttm, typeof(*i915_tt), ttm); + struct ttm_operation_ctx ctx = { + .interruptible = true, + .no_wait_gpu = false, + }; + struct ttm_placement place = {}; + int ret; - if (cpu_maps_iomem(bo->resource) || bo->ttm->caching != ttm_cached) { - obj->write_domain = I915_GEM_DOMAIN_WC; - obj->read_domains = I915_GEM_DOMAIN_WC; - } else { - obj->write_domain = I915_GEM_DOMAIN_CPU; - obj->read_domains = I915_GEM_DOMAIN_CPU; - } -} + if (obj->mm.madv == __I915_MADV_PURGED) + return 0; -static void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj) -{ - struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); - unsigned int cache_level; - unsigned int i; + ret = ttm_bo_validate(bo, &place, &ctx); + if (ret) + return ret; - /* - * If object was moved to an allowable region, update the object - * region to consider it migrated. Note that if it's currently not - * in an allowable region, it's evicted and we don't update the - * object region. - */ - if (intel_region_to_ttm_type(obj->mm.region) != bo->resource->mem_type) { - for (i = 0; i < obj->mm.n_placements; ++i) { - struct intel_memory_region *mr = obj->mm.placements[i]; - - if (intel_region_to_ttm_type(mr) == bo->resource->mem_type && - mr != obj->mm.region) { - i915_gem_object_release_memory_region(obj); - i915_gem_object_init_memory_region(obj, mr); - break; - } - } + if (bo->ttm && i915_tt->filp) { + /* + * The below fput(which eventually calls shmem_truncate) might + * be delayed by worker, so when directly called to purge the + * pages(like by the shrinker) we should try to be more + * aggressive and release the pages immediately. + */ + shmem_truncate_range(file_inode(i915_tt->filp), + 0, (loff_t)-1); + fput(fetch_and_zero(&i915_tt->filp)); } - obj->mem_flags &= ~(I915_BO_FLAG_STRUCT_PAGE | I915_BO_FLAG_IOMEM); - - obj->mem_flags |= cpu_maps_iomem(bo->resource) ? I915_BO_FLAG_IOMEM : - I915_BO_FLAG_STRUCT_PAGE; + obj->write_domain = 0; + obj->read_domains = 0; + i915_ttm_adjust_gem_after_move(obj); + i915_ttm_free_cached_io_rsgt(obj); + obj->mm.madv = __I915_MADV_PURGED; - cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), bo->resource, - bo->ttm); - i915_gem_object_set_cache_coherency(obj, cache_level); + return 0; } -static void i915_ttm_purge(struct drm_i915_gem_object *obj) +static int i915_ttm_shrinker_release_pages(struct drm_i915_gem_object *obj, + bool no_wait_gpu, + bool should_writeback) { struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + struct i915_ttm_tt *i915_tt = + container_of(bo->ttm, typeof(*i915_tt), ttm); struct ttm_operation_ctx ctx = { .interruptible = true, - .no_wait_gpu = false, + .no_wait_gpu = no_wait_gpu, }; struct ttm_placement place = {}; int ret; - if (obj->mm.madv == __I915_MADV_PURGED) - return; + if (!bo->ttm || bo->resource->mem_type != TTM_PL_SYSTEM) + return 0; + + GEM_BUG_ON(!i915_tt->is_shmem); + + if (!i915_tt->filp) + return 0; + + ret = ttm_bo_wait_ctx(bo, &ctx); + if (ret) + return ret; - /* TTM's purge interface. Note that we might be reentering. */ + switch (obj->mm.madv) { + case I915_MADV_DONTNEED: + return i915_ttm_purge(obj); + case __I915_MADV_PURGED: + return 0; + } + + if (bo->ttm->page_flags & TTM_TT_FLAG_SWAPPED) + return 0; + + bo->ttm->page_flags |= TTM_TT_FLAG_SWAPPED; ret = ttm_bo_validate(bo, &place, &ctx); - if (!ret) { - obj->write_domain = 0; - obj->read_domains = 0; - i915_ttm_adjust_gem_after_move(obj); - i915_ttm_free_cached_io_st(obj); - obj->mm.madv = __I915_MADV_PURGED; + if (ret) { + bo->ttm->page_flags &= ~TTM_TT_FLAG_SWAPPED; + return ret; } -} -static void i915_ttm_swap_notify(struct ttm_buffer_object *bo) -{ - struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); - int ret = i915_ttm_move_notify(bo); + if (should_writeback) + __shmem_writeback(obj->base.size, i915_tt->filp->f_mapping); - GEM_WARN_ON(ret); - GEM_WARN_ON(obj->ttm.cached_io_st); - if (!ret && obj->mm.madv != I915_MADV_WILLNEED) - i915_ttm_purge(obj); + return 0; } static void i915_ttm_delete_mem_notify(struct ttm_buffer_object *bo) @@ -369,232 +479,101 @@ static void i915_ttm_delete_mem_notify(struct ttm_buffer_object *bo) if (likely(obj)) { __i915_gem_object_pages_fini(obj); - i915_ttm_free_cached_io_st(obj); + i915_ttm_free_cached_io_rsgt(obj); } } -static struct intel_memory_region * -i915_ttm_region(struct ttm_device *bdev, int ttm_mem_type) -{ - struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev); - - /* There's some room for optimization here... */ - GEM_BUG_ON(ttm_mem_type != I915_PL_SYSTEM && - ttm_mem_type < I915_PL_LMEM0); - if (ttm_mem_type == I915_PL_SYSTEM) - return intel_memory_region_lookup(i915, INTEL_MEMORY_SYSTEM, - 0); - - return intel_memory_region_lookup(i915, INTEL_MEMORY_LOCAL, - ttm_mem_type - I915_PL_LMEM0); -} - -static struct sg_table *i915_ttm_tt_get_st(struct ttm_tt *ttm) +static struct i915_refct_sgt *i915_ttm_tt_get_st(struct ttm_tt *ttm) { struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm); struct sg_table *st; int ret; - if (i915_tt->cached_st) - return i915_tt->cached_st; - - st = kzalloc(sizeof(*st), GFP_KERNEL); - if (!st) - return ERR_PTR(-ENOMEM); + if (i915_tt->cached_rsgt.table.sgl) + return i915_refct_sgt_get(&i915_tt->cached_rsgt); + st = &i915_tt->cached_rsgt.table; ret = sg_alloc_table_from_pages_segment(st, ttm->pages, ttm->num_pages, 0, (unsigned long)ttm->num_pages << PAGE_SHIFT, i915_sg_segment_size(), GFP_KERNEL); if (ret) { - kfree(st); + st->sgl = NULL; return ERR_PTR(ret); } ret = dma_map_sgtable(i915_tt->dev, st, DMA_BIDIRECTIONAL, 0); if (ret) { sg_free_table(st); - kfree(st); return ERR_PTR(ret); } - i915_tt->cached_st = st; - return st; + return i915_refct_sgt_get(&i915_tt->cached_rsgt); } -static struct sg_table * +/** + * i915_ttm_resource_get_st - Get a refcounted sg-table pointing to the + * resource memory + * @obj: The GEM object used for sg-table caching + * @res: The struct ttm_resource for which an sg-table is requested. + * + * This function returns a refcounted sg-table representing the memory + * pointed to by @res. If @res is the object's current resource it may also + * cache the sg_table on the object or attempt to access an already cached + * sg-table. The refcounted sg-table needs to be put when no-longer in use. + * + * Return: A valid pointer to a struct i915_refct_sgt or error pointer on + * failure. + */ +struct i915_refct_sgt * i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, struct ttm_resource *res) { struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); - if (!gpu_binds_iomem(res)) + if (!i915_ttm_gtt_binds_lmem(res)) return i915_ttm_tt_get_st(bo->ttm); /* * If CPU mapping differs, we need to add the ttm_tt pages to * the resulting st. Might make sense for GGTT. */ - GEM_WARN_ON(!cpu_maps_iomem(res)); - return intel_region_ttm_resource_to_st(obj->mm.region, res); -} - -static int i915_ttm_accel_move(struct ttm_buffer_object *bo, - bool clear, - struct ttm_resource *dst_mem, - struct ttm_tt *dst_ttm, - struct sg_table *dst_st) -{ - struct drm_i915_private *i915 = container_of(bo->bdev, typeof(*i915), - bdev); - struct ttm_resource_manager *src_man = - ttm_manager_type(bo->bdev, bo->resource->mem_type); - struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); - struct sg_table *src_st; - struct i915_request *rq; - struct ttm_tt *src_ttm = bo->ttm; - enum i915_cache_level src_level, dst_level; - int ret; - - if (!i915->gt.migrate.context || intel_gt_is_wedged(&i915->gt)) - return -EINVAL; + GEM_WARN_ON(!i915_ttm_cpu_maps_iomem(res)); + if (bo->resource == res) { + if (!obj->ttm.cached_io_rsgt) { + struct i915_refct_sgt *rsgt; - dst_level = i915_ttm_cache_level(i915, dst_mem, dst_ttm); - if (clear) { - if (bo->type == ttm_bo_type_kernel) - return -EINVAL; + rsgt = intel_region_ttm_resource_to_rsgt(obj->mm.region, + res); + if (IS_ERR(rsgt)) + return rsgt; - intel_engine_pm_get(i915->gt.migrate.context->engine); - ret = intel_context_migrate_clear(i915->gt.migrate.context, NULL, - dst_st->sgl, dst_level, - gpu_binds_iomem(dst_mem), - 0, &rq); - - if (!ret && rq) { - i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); - i915_request_put(rq); - } - intel_engine_pm_put(i915->gt.migrate.context->engine); - } else { - src_st = src_man->use_tt ? i915_ttm_tt_get_st(src_ttm) : - obj->ttm.cached_io_st; - - src_level = i915_ttm_cache_level(i915, bo->resource, src_ttm); - intel_engine_pm_get(i915->gt.migrate.context->engine); - ret = intel_context_migrate_copy(i915->gt.migrate.context, - NULL, src_st->sgl, src_level, - gpu_binds_iomem(bo->resource), - dst_st->sgl, dst_level, - gpu_binds_iomem(dst_mem), - &rq); - if (!ret && rq) { - i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); - i915_request_put(rq); + obj->ttm.cached_io_rsgt = rsgt; } - intel_engine_pm_put(i915->gt.migrate.context->engine); + return i915_refct_sgt_get(obj->ttm.cached_io_rsgt); } - return ret; -} - -static void __i915_ttm_move(struct ttm_buffer_object *bo, bool clear, - struct ttm_resource *dst_mem, - struct ttm_tt *dst_ttm, - struct sg_table *dst_st, - bool allow_accel) -{ - int ret = -EINVAL; - - if (allow_accel) - ret = i915_ttm_accel_move(bo, clear, dst_mem, dst_ttm, dst_st); - if (ret) { - struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); - struct intel_memory_region *dst_reg, *src_reg; - union { - struct ttm_kmap_iter_tt tt; - struct ttm_kmap_iter_iomap io; - } _dst_iter, _src_iter; - struct ttm_kmap_iter *dst_iter, *src_iter; - - dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type); - src_reg = i915_ttm_region(bo->bdev, bo->resource->mem_type); - GEM_BUG_ON(!dst_reg || !src_reg); - - dst_iter = !cpu_maps_iomem(dst_mem) ? - ttm_kmap_iter_tt_init(&_dst_iter.tt, dst_ttm) : - ttm_kmap_iter_iomap_init(&_dst_iter.io, &dst_reg->iomap, - dst_st, dst_reg->region.start); - - src_iter = !cpu_maps_iomem(bo->resource) ? - ttm_kmap_iter_tt_init(&_src_iter.tt, bo->ttm) : - ttm_kmap_iter_iomap_init(&_src_iter.io, &src_reg->iomap, - obj->ttm.cached_io_st, - src_reg->region.start); - - ttm_move_memcpy(clear, dst_mem->num_pages, dst_iter, src_iter); - } + return intel_region_ttm_resource_to_rsgt(obj->mm.region, res); } -static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, - struct ttm_operation_ctx *ctx, - struct ttm_resource *dst_mem, - struct ttm_place *hop) +static void i915_ttm_swap_notify(struct ttm_buffer_object *bo) { struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); - struct ttm_resource_manager *dst_man = - ttm_manager_type(bo->bdev, dst_mem->mem_type); - struct ttm_tt *ttm = bo->ttm; - struct sg_table *dst_st; - bool clear; int ret; - /* Sync for now. We could do the actual copy async. */ - ret = ttm_bo_wait_ctx(bo, ctx); - if (ret) - return ret; + if (!obj) + return; ret = i915_ttm_move_notify(bo); - if (ret) - return ret; - - if (obj->mm.madv != I915_MADV_WILLNEED) { + GEM_WARN_ON(ret); + GEM_WARN_ON(obj->ttm.cached_io_rsgt); + if (!ret && obj->mm.madv != I915_MADV_WILLNEED) i915_ttm_purge(obj); - ttm_resource_free(bo, &dst_mem); - return 0; - } - - /* Populate ttm with pages if needed. Typically system memory. */ - if (ttm && (dst_man->use_tt || (ttm->page_flags & TTM_TT_FLAG_SWAPPED))) { - ret = ttm_tt_populate(bo->bdev, ttm, ctx); - if (ret) - return ret; - } - - dst_st = i915_ttm_resource_get_st(obj, dst_mem); - if (IS_ERR(dst_st)) - return PTR_ERR(dst_st); - - clear = !cpu_maps_iomem(bo->resource) && (!ttm || !ttm_tt_is_populated(ttm)); - if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC))) - __i915_ttm_move(bo, clear, dst_mem, bo->ttm, dst_st, true); - - ttm_bo_move_sync_cleanup(bo, dst_mem); - i915_ttm_adjust_domains_after_move(obj); - i915_ttm_free_cached_io_st(obj); - - if (gpu_binds_iomem(dst_mem) || cpu_maps_iomem(dst_mem)) { - obj->ttm.cached_io_st = dst_st; - obj->ttm.get_io_page.sg_pos = dst_st->sgl; - obj->ttm.get_io_page.sg_idx = 0; - } - - i915_ttm_adjust_gem_after_move(obj); - return 0; } static int i915_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resource *mem) { - if (!cpu_maps_iomem(mem)) + if (!i915_ttm_cpu_maps_iomem(mem)) return 0; mem->bus.caching = ttm_write_combined; @@ -607,19 +586,26 @@ static unsigned long i915_ttm_io_mem_pfn(struct ttm_buffer_object *bo, unsigned long page_offset) { struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); - unsigned long base = obj->mm.region->iomap.base - obj->mm.region->region.start; struct scatterlist *sg; + unsigned long base; unsigned int ofs; + GEM_BUG_ON(!obj); GEM_WARN_ON(bo->ttm); + base = obj->mm.region->iomap.base - obj->mm.region->region.start; sg = __i915_gem_object_get_sg(obj, &obj->ttm.get_io_page, page_offset, &ofs, true); return ((base + sg_dma_address(sg)) >> PAGE_SHIFT) + ofs; } +/* + * All callbacks need to take care not to downcast a struct ttm_buffer_object + * without checking its subclass, since it might be a TTM ghost object. + */ static struct ttm_device_funcs i915_ttm_bo_driver = { .ttm_tt_create = i915_ttm_tt_create, + .ttm_tt_populate = i915_ttm_tt_populate, .ttm_tt_unpopulate = i915_ttm_tt_unpopulate, .ttm_tt_destroy = i915_ttm_tt_destroy, .eviction_valuable = i915_ttm_eviction_valuable, @@ -649,7 +635,6 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, .interruptible = true, .no_wait_gpu = false, }; - struct sg_table *st; int real_num_busy; int ret; @@ -676,7 +661,6 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, return i915_ttm_err_to_gem(ret); } - i915_ttm_adjust_lru(obj); if (bo->ttm && !ttm_tt_is_populated(bo->ttm)) { ret = ttm_tt_populate(bo->bdev, bo->ttm, &ctx); if (ret) @@ -687,14 +671,19 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, } if (!i915_gem_object_has_pages(obj)) { - /* Object either has a page vector or is an iomem object */ - st = bo->ttm ? i915_ttm_tt_get_st(bo->ttm) : obj->ttm.cached_io_st; - if (IS_ERR(st)) - return PTR_ERR(st); + struct i915_refct_sgt *rsgt = + i915_ttm_resource_get_st(obj, bo->resource); + + if (IS_ERR(rsgt)) + return PTR_ERR(rsgt); - __i915_gem_object_set_pages(obj, st, i915_sg_dma_sizes(st->sgl)); + GEM_BUG_ON(obj->mm.rsgt); + obj->mm.rsgt = rsgt; + __i915_gem_object_set_pages(obj, &rsgt->table, + i915_sg_dma_sizes(rsgt->table.sgl)); } + i915_ttm_adjust_lru(obj); return ret; } @@ -766,12 +755,21 @@ static void i915_ttm_put_pages(struct drm_i915_gem_object *obj, * and shrinkers will move it out if needed. */ - i915_ttm_adjust_lru(obj); + if (obj->mm.rsgt) + i915_refct_sgt_put(fetch_and_zero(&obj->mm.rsgt)); } -static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj) +/** + * i915_ttm_adjust_lru - Adjust an object's position on relevant LRU lists. + * @obj: The object + */ +void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj) { struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + struct i915_ttm_tt *i915_tt = + container_of(bo->ttm, typeof(*i915_tt), ttm); + bool shrinkable = + bo->ttm && i915_tt->filp && ttm_tt_is_populated(bo->ttm); /* * Don't manipulate the TTM LRUs while in TTM bo destruction. @@ -781,10 +779,53 @@ static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj) return; /* + * We skip managing the shrinker LRU in set_pages() and just manage + * everything here. This does at least solve the issue with having + * temporary shmem mappings(like with evicted lmem) not being visible to + * the shrinker. Only our shmem objects are shrinkable, everything else + * we keep as unshrinkable. + * + * To make sure everything plays nice we keep an extra shrink pin in TTM + * if the underlying pages are not currently shrinkable. Once we release + * our pin, like when the pages are moved to shmem, the pages will then + * be added to the shrinker LRU, assuming the caller isn't also holding + * a pin. + * + * TODO: consider maybe also bumping the shrinker list here when we have + * already unpinned it, which should give us something more like an LRU. + * + * TODO: There is a small window of opportunity for this function to + * get called from eviction after we've dropped the last GEM refcount, + * but before the TTM deleted flag is set on the object. Avoid + * adjusting the shrinker list in such cases, since the object is + * not available to the shrinker anyway due to its zero refcount. + * To fix this properly we should move to a TTM shrinker LRU list for + * these objects. + */ + if (kref_get_unless_zero(&obj->base.refcount)) { + if (shrinkable != obj->mm.ttm_shrinkable) { + if (shrinkable) { + if (obj->mm.madv == I915_MADV_WILLNEED) + __i915_gem_object_make_shrinkable(obj); + else + __i915_gem_object_make_purgeable(obj); + } else { + i915_gem_object_make_unshrinkable(obj); + } + + obj->mm.ttm_shrinkable = shrinkable; + } + i915_gem_object_put(obj); + } + + /* * Put on the correct LRU list depending on the MADV status */ spin_lock(&bo->bdev->lru_lock); - if (obj->mm.madv != I915_MADV_WILLNEED) { + if (shrinkable) { + /* Try to keep shmem_tt from being considered for shrinking. */ + bo->priority = TTM_MAX_BO_PRIORITY - 1; + } else if (obj->mm.madv != I915_MADV_WILLNEED) { bo->priority = I915_TTM_PRIO_PURGE; } else if (!i915_gem_object_has_pages(obj)) { if (bo->priority < I915_TTM_PRIO_HAS_PAGES) @@ -823,15 +864,39 @@ static void i915_ttm_delayed_free(struct drm_i915_gem_object *obj) static vm_fault_t vm_fault_ttm(struct vm_fault *vmf) { struct vm_area_struct *area = vmf->vma; - struct drm_i915_gem_object *obj = - i915_ttm_to_gem(area->vm_private_data); + struct ttm_buffer_object *bo = area->vm_private_data; + struct drm_device *dev = bo->base.dev; + struct drm_i915_gem_object *obj; + vm_fault_t ret; + int idx; + + obj = i915_ttm_to_gem(bo); + if (!obj) + return VM_FAULT_SIGBUS; /* Sanity check that we allow writing into this object */ if (unlikely(i915_gem_object_is_readonly(obj) && area->vm_flags & VM_WRITE)) return VM_FAULT_SIGBUS; - return ttm_bo_vm_fault(vmf); + ret = ttm_bo_vm_reserve(bo, vmf); + if (ret) + return ret; + + if (drm_dev_enter(dev, &idx)) { + ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot, + TTM_BO_VM_NUM_PREFAULT); + drm_dev_exit(idx); + } else { + ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot); + } + if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) + return ret; + + i915_ttm_adjust_lru(obj); + + dma_resv_unlock(bo->base.resv); + return ret; } static int @@ -882,13 +947,18 @@ static u64 i915_ttm_mmap_offset(struct drm_i915_gem_object *obj) static const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops = { .name = "i915_gem_object_ttm", + .flags = I915_GEM_OBJECT_IS_SHRINKABLE | + I915_GEM_OBJECT_SELF_MANAGED_SHRINK_LIST, .get_pages = i915_ttm_get_pages, .put_pages = i915_ttm_put_pages, .truncate = i915_ttm_purge, + .shrinker_release_pages = i915_ttm_shrinker_release_pages, + .adjust_lru = i915_ttm_adjust_lru, .delayed_free = i915_ttm_delayed_free, .migrate = i915_ttm_migrate, + .mmap_offset = i915_ttm_mmap_offset, .mmap_ops = &vm_ops_ttm, }; @@ -901,6 +971,18 @@ void i915_ttm_bo_destroy(struct ttm_buffer_object *bo) mutex_destroy(&obj->ttm.get_io_page.lock); if (obj->ttm.created) { + /* + * We freely manage the shrinker LRU outide of the mm.pages life + * cycle. As a result when destroying the object we should be + * extra paranoid and ensure we remove it from the LRU, before + * we free the object. + * + * Touching the ttm_shrinkable outside of the object lock here + * should be safe now that the last GEM object ref was dropped. + */ + if (obj->mm.ttm_shrinkable) + i915_gem_object_make_unshrinkable(obj); + i915_ttm_backup_free(obj); /* This releases all gem object bindings to the backend. */ @@ -940,10 +1022,9 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, i915_gem_object_init(obj, &i915_gem_ttm_obj_ops, &lock_class, flags); /* Don't put on a region list until we're either locked or fully initialized. */ - obj->mm.region = intel_memory_region_get(mem); + obj->mm.region = mem; INIT_LIST_HEAD(&obj->mm.region_link); - i915_gem_object_make_unshrinkable(obj); INIT_RADIX_TREE(&obj->ttm.get_io_page.radix, GFP_KERNEL | __GFP_NOWARN); mutex_init(&obj->ttm.get_io_page.lock); bo_type = (obj->flags & I915_BO_ALLOC_USER) ? ttm_bo_type_device : @@ -955,6 +1036,14 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, GEM_BUG_ON(page_size && obj->mm.n_placements); /* + * Keep an extra shrink pin to prevent the object from being made + * shrinkable too early. If the ttm_tt is ever allocated in shmem, we + * drop the pin. The TTM backend manages the shrinker LRU itself, + * outside of the normal mm.pages life cycle. + */ + i915_gem_object_make_unshrinkable(obj); + + /* * If this function fails, it will call the destructor, but * our caller still owns the object. So no freeing in the * destructor until obj->ttm.created is true. @@ -980,6 +1069,7 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, static const struct intel_memory_region_ops ttm_system_region_ops = { .init_object = __i915_gem_ttm_object_init, + .release = intel_region_ttm_fini, }; struct intel_memory_region * @@ -999,50 +1089,3 @@ i915_gem_ttm_system_setup(struct drm_i915_private *i915, intel_memory_region_set_name(mr, "system-ttm"); return mr; } - -/** - * i915_gem_obj_copy_ttm - Copy the contents of one ttm-based gem object to - * another - * @dst: The destination object - * @src: The source object - * @allow_accel: Allow using the blitter. Otherwise TTM memcpy is used. - * @intr: Whether to perform waits interruptible: - * - * Note: The caller is responsible for assuring that the underlying - * TTM objects are populated if needed and locked. - * - * Return: Zero on success. Negative error code on error. If @intr == true, - * then it may return -ERESTARTSYS or -EINTR. - */ -int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst, - struct drm_i915_gem_object *src, - bool allow_accel, bool intr) -{ - struct ttm_buffer_object *dst_bo = i915_gem_to_ttm(dst); - struct ttm_buffer_object *src_bo = i915_gem_to_ttm(src); - struct ttm_operation_ctx ctx = { - .interruptible = intr, - }; - struct sg_table *dst_st; - int ret; - - assert_object_held(dst); - assert_object_held(src); - - /* - * Sync for now. This will change with async moves. - */ - ret = ttm_bo_wait_ctx(dst_bo, &ctx); - if (!ret) - ret = ttm_bo_wait_ctx(src_bo, &ctx); - if (ret) - return ret; - - dst_st = gpu_binds_iomem(dst_bo->resource) ? - dst->ttm.cached_io_st : i915_ttm_tt_get_st(dst_bo->ttm); - - __i915_ttm_move(src_bo, false, dst_bo->resource, dst_bo->ttm, - dst_st, allow_accel); - - return 0; -} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h index 0b7291dd897c..9d698ad00853 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h @@ -5,6 +5,8 @@ #ifndef _I915_GEM_TTM_H_ #define _I915_GEM_TTM_H_ +#include <drm/ttm/ttm_placement.h> + #include "gem/i915_gem_object_types.h" /** @@ -35,7 +37,7 @@ void i915_ttm_bo_destroy(struct ttm_buffer_object *bo); static inline struct drm_i915_gem_object * i915_ttm_to_gem(struct ttm_buffer_object *bo) { - if (GEM_WARN_ON(bo->destroy != i915_ttm_bo_destroy)) + if (bo->destroy != i915_ttm_bo_destroy) return NULL; return container_of(bo, struct drm_i915_gem_object, __do_not_access); @@ -47,10 +49,6 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, resource_size_t page_size, unsigned int flags); -int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst, - struct drm_i915_gem_object *src, - bool allow_accel, bool intr); - /* Internal I915 TTM declarations and definitions below. */ #define I915_PL_LMEM0 TTM_PL_PRIV @@ -60,4 +58,37 @@ int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst, struct ttm_placement *i915_ttm_sys_placement(void); +void i915_ttm_free_cached_io_rsgt(struct drm_i915_gem_object *obj); + +struct i915_refct_sgt * +i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, + struct ttm_resource *res); + +void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj); + +int i915_ttm_purge(struct drm_i915_gem_object *obj); + +/** + * i915_ttm_gtt_binds_lmem - Should the memory be viewed as LMEM by the GTT? + * @mem: struct ttm_resource representing the memory. + * + * Return: true if memory should be viewed as LMEM for GTT binding purposes, + * false otherwise. + */ +static inline bool i915_ttm_gtt_binds_lmem(struct ttm_resource *mem) +{ + return mem->mem_type != I915_PL_SYSTEM; +} + +/** + * i915_ttm_cpu_maps_iomem - Should the memory be viewed as IOMEM by the CPU? + * @mem: struct ttm_resource representing the memory. + * + * Return: true if memory should be viewed as IOMEM for CPU mapping purposes. + */ +static inline bool i915_ttm_cpu_maps_iomem(struct ttm_resource *mem) +{ + /* Once / if we support GGTT, this is also false for cached ttm_tts */ + return mem->mem_type != I915_PL_SYSTEM; +} #endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c new file mode 100644 index 000000000000..80df9f592407 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -0,0 +1,874 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include <linux/dma-fence-array.h> + +#include <drm/ttm/ttm_bo_driver.h> + +#include "i915_drv.h" +#include "intel_memory_region.h" +#include "intel_region_ttm.h" + +#include "gem/i915_gem_object.h" +#include "gem/i915_gem_region.h" +#include "gem/i915_gem_ttm.h" +#include "gem/i915_gem_ttm_move.h" + +#include "gt/intel_engine_pm.h" +#include "gt/intel_gt.h" +#include "gt/intel_migrate.h" + +/** + * DOC: Selftest failure modes for failsafe migration: + * + * For fail_gpu_migration, the gpu blit scheduled is always a clear blit + * rather than a copy blit, and then we force the failure paths as if + * the blit fence returned an error. + * + * For fail_work_allocation we fail the kmalloc of the async worker, we + * sync the gpu blit. If it then fails, or fail_gpu_migration is set to + * true, then a memcpy operation is performed sync. + */ +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +static bool fail_gpu_migration; +static bool fail_work_allocation; + +void i915_ttm_migrate_set_failure_modes(bool gpu_migration, + bool work_allocation) +{ + fail_gpu_migration = gpu_migration; + fail_work_allocation = work_allocation; +} +#endif + +/** + * DOC: Set of utilities to dynamically collect dependencies and + * eventually coalesce them into a single fence which is fed into + * the GT migration code, since it only accepts a single dependency + * fence. + * The single fence returned from these utilities, in the case of + * dependencies from multiple fence contexts, a struct dma_fence_array, + * since the i915 request code can break that up and await the individual + * fences. + * + * Once we can do async unbinding, this is also needed to coalesce + * the migration fence with the unbind fences. + * + * While collecting the individual dependencies, we store the refcounted + * struct dma_fence pointers in a realloc-managed pointer array, since + * that can be easily fed into a dma_fence_array. Other options are + * available, like for example an xarray for similarity with drm/sched. + * Can be changed easily if needed. + * + * A struct i915_deps need to be initialized using i915_deps_init(). + * If i915_deps_add_dependency() or i915_deps_add_resv() return an + * error code they will internally call i915_deps_fini(), which frees + * all internal references and allocations. After a call to + * i915_deps_to_fence(), or i915_deps_sync(), the struct should similarly + * be viewed as uninitialized. + * + * We might want to break this out into a separate file as a utility. + */ + +#define I915_DEPS_MIN_ALLOC_CHUNK 8U + +/** + * struct i915_deps - Collect dependencies into a single dma-fence + * @single: Storage for pointer if the collection is a single fence. + * @fence: Allocated array of fence pointers if more than a single fence; + * otherwise points to the address of @single. + * @num_deps: Current number of dependency fences. + * @fences_size: Size of the @fences array in number of pointers. + * @gfp: Allocation mode. + */ +struct i915_deps { + struct dma_fence *single; + struct dma_fence **fences; + unsigned int num_deps; + unsigned int fences_size; + gfp_t gfp; +}; + +static void i915_deps_reset_fences(struct i915_deps *deps) +{ + if (deps->fences != &deps->single) + kfree(deps->fences); + deps->num_deps = 0; + deps->fences_size = 1; + deps->fences = &deps->single; +} + +static void i915_deps_init(struct i915_deps *deps, gfp_t gfp) +{ + deps->fences = NULL; + deps->gfp = gfp; + i915_deps_reset_fences(deps); +} + +static void i915_deps_fini(struct i915_deps *deps) +{ + unsigned int i; + + for (i = 0; i < deps->num_deps; ++i) + dma_fence_put(deps->fences[i]); + + if (deps->fences != &deps->single) + kfree(deps->fences); +} + +static int i915_deps_grow(struct i915_deps *deps, struct dma_fence *fence, + const struct ttm_operation_ctx *ctx) +{ + int ret; + + if (deps->num_deps >= deps->fences_size) { + unsigned int new_size = 2 * deps->fences_size; + struct dma_fence **new_fences; + + new_size = max(new_size, I915_DEPS_MIN_ALLOC_CHUNK); + new_fences = kmalloc_array(new_size, sizeof(*new_fences), deps->gfp); + if (!new_fences) + goto sync; + + memcpy(new_fences, deps->fences, + deps->fences_size * sizeof(*new_fences)); + swap(new_fences, deps->fences); + if (new_fences != &deps->single) + kfree(new_fences); + deps->fences_size = new_size; + } + deps->fences[deps->num_deps++] = dma_fence_get(fence); + return 0; + +sync: + if (ctx->no_wait_gpu && !dma_fence_is_signaled(fence)) { + ret = -EBUSY; + goto unref; + } + + ret = dma_fence_wait(fence, ctx->interruptible); + if (ret) + goto unref; + + ret = fence->error; + if (ret) + goto unref; + + return 0; + +unref: + i915_deps_fini(deps); + return ret; +} + +static int i915_deps_sync(struct i915_deps *deps, + const struct ttm_operation_ctx *ctx) +{ + struct dma_fence **fences = deps->fences; + unsigned int i; + int ret = 0; + + for (i = 0; i < deps->num_deps; ++i, ++fences) { + if (ctx->no_wait_gpu && !dma_fence_is_signaled(*fences)) { + ret = -EBUSY; + break; + } + + ret = dma_fence_wait(*fences, ctx->interruptible); + if (!ret) + ret = (*fences)->error; + if (ret) + break; + } + + i915_deps_fini(deps); + return ret; +} + +static int i915_deps_add_dependency(struct i915_deps *deps, + struct dma_fence *fence, + const struct ttm_operation_ctx *ctx) +{ + unsigned int i; + int ret; + + if (!fence) + return 0; + + if (dma_fence_is_signaled(fence)) { + ret = fence->error; + if (ret) + i915_deps_fini(deps); + return ret; + } + + for (i = 0; i < deps->num_deps; ++i) { + struct dma_fence *entry = deps->fences[i]; + + if (!entry->context || entry->context != fence->context) + continue; + + if (dma_fence_is_later(fence, entry)) { + dma_fence_put(entry); + deps->fences[i] = dma_fence_get(fence); + } + + return 0; + } + + return i915_deps_grow(deps, fence, ctx); +} + +static struct dma_fence *i915_deps_to_fence(struct i915_deps *deps, + const struct ttm_operation_ctx *ctx) +{ + struct dma_fence_array *array; + + if (deps->num_deps == 0) + return NULL; + + if (deps->num_deps == 1) { + deps->num_deps = 0; + return deps->fences[0]; + } + + /* + * TODO: Alter the allocation mode here to not try too hard to + * make things async. + */ + array = dma_fence_array_create(deps->num_deps, deps->fences, 0, 0, + false); + if (!array) + return ERR_PTR(i915_deps_sync(deps, ctx)); + + deps->fences = NULL; + i915_deps_reset_fences(deps); + + return &array->base; +} + +static int i915_deps_add_resv(struct i915_deps *deps, struct dma_resv *resv, + bool all, const bool no_excl, + const struct ttm_operation_ctx *ctx) +{ + struct dma_resv_iter iter; + struct dma_fence *fence; + + dma_resv_assert_held(resv); + dma_resv_for_each_fence(&iter, resv, all, fence) { + int ret; + + if (no_excl && dma_resv_iter_is_exclusive(&iter)) + continue; + + ret = i915_deps_add_dependency(deps, fence, ctx); + if (ret) + return ret; + } + + return 0; +} + +static enum i915_cache_level +i915_ttm_cache_level(struct drm_i915_private *i915, struct ttm_resource *res, + struct ttm_tt *ttm) +{ + return ((HAS_LLC(i915) || HAS_SNOOP(i915)) && + !i915_ttm_gtt_binds_lmem(res) && + ttm->caching == ttm_cached) ? I915_CACHE_LLC : + I915_CACHE_NONE; +} + +static struct intel_memory_region * +i915_ttm_region(struct ttm_device *bdev, int ttm_mem_type) +{ + struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev); + + /* There's some room for optimization here... */ + GEM_BUG_ON(ttm_mem_type != I915_PL_SYSTEM && + ttm_mem_type < I915_PL_LMEM0); + if (ttm_mem_type == I915_PL_SYSTEM) + return intel_memory_region_lookup(i915, INTEL_MEMORY_SYSTEM, + 0); + + return intel_memory_region_lookup(i915, INTEL_MEMORY_LOCAL, + ttm_mem_type - I915_PL_LMEM0); +} + +/** + * i915_ttm_adjust_domains_after_move - Adjust the GEM domains after a + * TTM move + * @obj: The gem object + */ +void i915_ttm_adjust_domains_after_move(struct drm_i915_gem_object *obj) +{ + struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + + if (i915_ttm_cpu_maps_iomem(bo->resource) || bo->ttm->caching != ttm_cached) { + obj->write_domain = I915_GEM_DOMAIN_WC; + obj->read_domains = I915_GEM_DOMAIN_WC; + } else { + obj->write_domain = I915_GEM_DOMAIN_CPU; + obj->read_domains = I915_GEM_DOMAIN_CPU; + } +} + +/** + * i915_ttm_adjust_gem_after_move - Adjust the GEM state after a TTM move + * @obj: The gem object + * + * Adjusts the GEM object's region, mem_flags and cache coherency after a + * TTM move. + */ +void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj) +{ + struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + unsigned int cache_level; + unsigned int i; + + /* + * If object was moved to an allowable region, update the object + * region to consider it migrated. Note that if it's currently not + * in an allowable region, it's evicted and we don't update the + * object region. + */ + if (intel_region_to_ttm_type(obj->mm.region) != bo->resource->mem_type) { + for (i = 0; i < obj->mm.n_placements; ++i) { + struct intel_memory_region *mr = obj->mm.placements[i]; + + if (intel_region_to_ttm_type(mr) == bo->resource->mem_type && + mr != obj->mm.region) { + i915_gem_object_release_memory_region(obj); + i915_gem_object_init_memory_region(obj, mr); + break; + } + } + } + + obj->mem_flags &= ~(I915_BO_FLAG_STRUCT_PAGE | I915_BO_FLAG_IOMEM); + + obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? I915_BO_FLAG_IOMEM : + I915_BO_FLAG_STRUCT_PAGE; + + cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), bo->resource, + bo->ttm); + i915_gem_object_set_cache_coherency(obj, cache_level); +} + +/** + * i915_ttm_move_notify - Prepare an object for move + * @bo: The ttm buffer object. + * + * This function prepares an object for move by removing all GPU bindings, + * removing all CPU mapings and finally releasing the pages sg-table. + * + * Return: 0 if successful, negative error code on error. + */ +int i915_ttm_move_notify(struct ttm_buffer_object *bo) +{ + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + int ret; + + ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE); + if (ret) + return ret; + + ret = __i915_gem_object_put_pages(obj); + if (ret) + return ret; + + return 0; +} + +static struct dma_fence *i915_ttm_accel_move(struct ttm_buffer_object *bo, + bool clear, + struct ttm_resource *dst_mem, + struct ttm_tt *dst_ttm, + struct sg_table *dst_st, + struct dma_fence *dep) +{ + struct drm_i915_private *i915 = container_of(bo->bdev, typeof(*i915), + bdev); + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + struct i915_request *rq; + struct ttm_tt *src_ttm = bo->ttm; + enum i915_cache_level src_level, dst_level; + int ret; + + if (!i915->gt.migrate.context || intel_gt_is_wedged(&i915->gt)) + return ERR_PTR(-EINVAL); + + /* With fail_gpu_migration, we always perform a GPU clear. */ + if (I915_SELFTEST_ONLY(fail_gpu_migration)) + clear = true; + + dst_level = i915_ttm_cache_level(i915, dst_mem, dst_ttm); + if (clear) { + if (bo->type == ttm_bo_type_kernel && + !I915_SELFTEST_ONLY(fail_gpu_migration)) + return ERR_PTR(-EINVAL); + + intel_engine_pm_get(i915->gt.migrate.context->engine); + ret = intel_context_migrate_clear(i915->gt.migrate.context, dep, + dst_st->sgl, dst_level, + i915_ttm_gtt_binds_lmem(dst_mem), + 0, &rq); + } else { + struct i915_refct_sgt *src_rsgt = + i915_ttm_resource_get_st(obj, bo->resource); + + if (IS_ERR(src_rsgt)) + return ERR_CAST(src_rsgt); + + src_level = i915_ttm_cache_level(i915, bo->resource, src_ttm); + intel_engine_pm_get(i915->gt.migrate.context->engine); + ret = intel_context_migrate_copy(i915->gt.migrate.context, + dep, src_rsgt->table.sgl, + src_level, + i915_ttm_gtt_binds_lmem(bo->resource), + dst_st->sgl, dst_level, + i915_ttm_gtt_binds_lmem(dst_mem), + &rq); + + i915_refct_sgt_put(src_rsgt); + } + + intel_engine_pm_put(i915->gt.migrate.context->engine); + + if (ret && rq) { + i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); + i915_request_put(rq); + } + + return ret ? ERR_PTR(ret) : &rq->fence; +} + +/** + * struct i915_ttm_memcpy_arg - argument for the bo memcpy functionality. + * @_dst_iter: Storage space for the destination kmap iterator. + * @_src_iter: Storage space for the source kmap iterator. + * @dst_iter: Pointer to the destination kmap iterator. + * @src_iter: Pointer to the source kmap iterator. + * @clear: Whether to clear instead of copy. + * @src_rsgt: Refcounted scatter-gather list of source memory. + * @dst_rsgt: Refcounted scatter-gather list of destination memory. + */ +struct i915_ttm_memcpy_arg { + union { + struct ttm_kmap_iter_tt tt; + struct ttm_kmap_iter_iomap io; + } _dst_iter, + _src_iter; + struct ttm_kmap_iter *dst_iter; + struct ttm_kmap_iter *src_iter; + unsigned long num_pages; + bool clear; + struct i915_refct_sgt *src_rsgt; + struct i915_refct_sgt *dst_rsgt; +}; + +/** + * struct i915_ttm_memcpy_work - Async memcpy worker under a dma-fence. + * @fence: The dma-fence. + * @work: The work struct use for the memcpy work. + * @lock: The fence lock. Not used to protect anything else ATM. + * @irq_work: Low latency worker to signal the fence since it can't be done + * from the callback for lockdep reasons. + * @cb: Callback for the accelerated migration fence. + * @arg: The argument for the memcpy functionality. + */ +struct i915_ttm_memcpy_work { + struct dma_fence fence; + struct work_struct work; + /* The fence lock */ + spinlock_t lock; + struct irq_work irq_work; + struct dma_fence_cb cb; + struct i915_ttm_memcpy_arg arg; +}; + +static void i915_ttm_move_memcpy(struct i915_ttm_memcpy_arg *arg) +{ + ttm_move_memcpy(arg->clear, arg->num_pages, + arg->dst_iter, arg->src_iter); +} + +static void i915_ttm_memcpy_init(struct i915_ttm_memcpy_arg *arg, + struct ttm_buffer_object *bo, bool clear, + struct ttm_resource *dst_mem, + struct ttm_tt *dst_ttm, + struct i915_refct_sgt *dst_rsgt) +{ + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + struct intel_memory_region *dst_reg, *src_reg; + + dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type); + src_reg = i915_ttm_region(bo->bdev, bo->resource->mem_type); + GEM_BUG_ON(!dst_reg || !src_reg); + + arg->dst_iter = !i915_ttm_cpu_maps_iomem(dst_mem) ? + ttm_kmap_iter_tt_init(&arg->_dst_iter.tt, dst_ttm) : + ttm_kmap_iter_iomap_init(&arg->_dst_iter.io, &dst_reg->iomap, + &dst_rsgt->table, dst_reg->region.start); + + arg->src_iter = !i915_ttm_cpu_maps_iomem(bo->resource) ? + ttm_kmap_iter_tt_init(&arg->_src_iter.tt, bo->ttm) : + ttm_kmap_iter_iomap_init(&arg->_src_iter.io, &src_reg->iomap, + &obj->ttm.cached_io_rsgt->table, + src_reg->region.start); + arg->clear = clear; + arg->num_pages = bo->base.size >> PAGE_SHIFT; + + arg->dst_rsgt = i915_refct_sgt_get(dst_rsgt); + arg->src_rsgt = clear ? NULL : + i915_ttm_resource_get_st(obj, bo->resource); +} + +static void i915_ttm_memcpy_release(struct i915_ttm_memcpy_arg *arg) +{ + i915_refct_sgt_put(arg->src_rsgt); + i915_refct_sgt_put(arg->dst_rsgt); +} + +static void __memcpy_work(struct work_struct *work) +{ + struct i915_ttm_memcpy_work *copy_work = + container_of(work, typeof(*copy_work), work); + struct i915_ttm_memcpy_arg *arg = ©_work->arg; + bool cookie = dma_fence_begin_signalling(); + + i915_ttm_move_memcpy(arg); + dma_fence_end_signalling(cookie); + + dma_fence_signal(©_work->fence); + + i915_ttm_memcpy_release(arg); + dma_fence_put(©_work->fence); +} + +static void __memcpy_irq_work(struct irq_work *irq_work) +{ + struct i915_ttm_memcpy_work *copy_work = + container_of(irq_work, typeof(*copy_work), irq_work); + struct i915_ttm_memcpy_arg *arg = ©_work->arg; + + dma_fence_signal(©_work->fence); + i915_ttm_memcpy_release(arg); + dma_fence_put(©_work->fence); +} + +static void __memcpy_cb(struct dma_fence *fence, struct dma_fence_cb *cb) +{ + struct i915_ttm_memcpy_work *copy_work = + container_of(cb, typeof(*copy_work), cb); + + if (unlikely(fence->error || I915_SELFTEST_ONLY(fail_gpu_migration))) { + INIT_WORK(©_work->work, __memcpy_work); + queue_work(system_unbound_wq, ©_work->work); + } else { + init_irq_work(©_work->irq_work, __memcpy_irq_work); + irq_work_queue(©_work->irq_work); + } +} + +static const char *get_driver_name(struct dma_fence *fence) +{ + return "i915_ttm_memcpy_work"; +} + +static const char *get_timeline_name(struct dma_fence *fence) +{ + return "unbound"; +} + +static const struct dma_fence_ops dma_fence_memcpy_ops = { + .get_driver_name = get_driver_name, + .get_timeline_name = get_timeline_name, +}; + +static struct dma_fence * +i915_ttm_memcpy_work_arm(struct i915_ttm_memcpy_work *work, + struct dma_fence *dep) +{ + int ret; + + spin_lock_init(&work->lock); + dma_fence_init(&work->fence, &dma_fence_memcpy_ops, &work->lock, 0, 0); + dma_fence_get(&work->fence); + ret = dma_fence_add_callback(dep, &work->cb, __memcpy_cb); + if (ret) { + if (ret != -ENOENT) + dma_fence_wait(dep, false); + + return ERR_PTR(I915_SELFTEST_ONLY(fail_gpu_migration) ? -EINVAL : + dep->error); + } + + return &work->fence; +} + +static struct dma_fence * +__i915_ttm_move(struct ttm_buffer_object *bo, bool clear, + struct ttm_resource *dst_mem, struct ttm_tt *dst_ttm, + struct i915_refct_sgt *dst_rsgt, bool allow_accel, + struct dma_fence *move_dep) +{ + struct i915_ttm_memcpy_work *copy_work = NULL; + struct i915_ttm_memcpy_arg _arg, *arg = &_arg; + struct dma_fence *fence = ERR_PTR(-EINVAL); + + if (allow_accel) { + fence = i915_ttm_accel_move(bo, clear, dst_mem, dst_ttm, + &dst_rsgt->table, move_dep); + + /* + * We only need to intercept the error when moving to lmem. + * When moving to system, TTM or shmem will provide us with + * cleared pages. + */ + if (!IS_ERR(fence) && !i915_ttm_gtt_binds_lmem(dst_mem) && + !I915_SELFTEST_ONLY(fail_gpu_migration || + fail_work_allocation)) + goto out; + } + + /* If we've scheduled gpu migration. Try to arm error intercept. */ + if (!IS_ERR(fence)) { + struct dma_fence *dep = fence; + + if (!I915_SELFTEST_ONLY(fail_work_allocation)) + copy_work = kzalloc(sizeof(*copy_work), GFP_KERNEL); + + if (copy_work) { + arg = ©_work->arg; + i915_ttm_memcpy_init(arg, bo, clear, dst_mem, dst_ttm, + dst_rsgt); + fence = i915_ttm_memcpy_work_arm(copy_work, dep); + } else { + dma_fence_wait(dep, false); + fence = ERR_PTR(I915_SELFTEST_ONLY(fail_gpu_migration) ? + -EINVAL : fence->error); + } + dma_fence_put(dep); + + if (!IS_ERR(fence)) + goto out; + } else if (move_dep) { + int err = dma_fence_wait(move_dep, true); + + if (err) + return ERR_PTR(err); + } + + /* Error intercept failed or no accelerated migration to start with */ + if (!copy_work) + i915_ttm_memcpy_init(arg, bo, clear, dst_mem, dst_ttm, + dst_rsgt); + i915_ttm_move_memcpy(arg); + i915_ttm_memcpy_release(arg); + kfree(copy_work); + + return NULL; +out: + if (!fence && copy_work) { + i915_ttm_memcpy_release(arg); + kfree(copy_work); + } + + return fence; +} + +static struct dma_fence *prev_fence(struct ttm_buffer_object *bo, + struct ttm_operation_ctx *ctx) +{ + struct i915_deps deps; + int ret; + + /* + * Instead of trying hard with GFP_KERNEL to allocate memory, + * the dependency collection will just sync if it doesn't + * succeed. + */ + i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); + ret = i915_deps_add_dependency(&deps, bo->moving, ctx); + if (!ret) + /* + * TODO: Only await excl fence here, and shared fences before + * signaling the migration fence. + */ + ret = i915_deps_add_resv(&deps, bo->base.resv, true, false, ctx); + if (ret) + return ERR_PTR(ret); + + return i915_deps_to_fence(&deps, ctx); +} + +/** + * i915_ttm_move - The TTM move callback used by i915. + * @bo: The buffer object. + * @evict: Whether this is an eviction. + * @dst_mem: The destination ttm resource. + * @hop: If we need multihop, what temporary memory type to move to. + * + * Return: 0 if successful, negative error code otherwise. + */ +int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, + struct ttm_operation_ctx *ctx, + struct ttm_resource *dst_mem, + struct ttm_place *hop) +{ + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + struct ttm_resource_manager *dst_man = + ttm_manager_type(bo->bdev, dst_mem->mem_type); + struct dma_fence *migration_fence = NULL; + struct ttm_tt *ttm = bo->ttm; + struct i915_refct_sgt *dst_rsgt; + bool clear; + int ret; + + if (GEM_WARN_ON(!obj)) { + ttm_bo_move_null(bo, dst_mem); + return 0; + } + + ret = i915_ttm_move_notify(bo); + if (ret) + return ret; + + if (obj->mm.madv != I915_MADV_WILLNEED) { + i915_ttm_purge(obj); + ttm_resource_free(bo, &dst_mem); + return 0; + } + + /* Populate ttm with pages if needed. Typically system memory. */ + if (ttm && (dst_man->use_tt || (ttm->page_flags & TTM_TT_FLAG_SWAPPED))) { + ret = ttm_tt_populate(bo->bdev, ttm, ctx); + if (ret) + return ret; + } + + dst_rsgt = i915_ttm_resource_get_st(obj, dst_mem); + if (IS_ERR(dst_rsgt)) + return PTR_ERR(dst_rsgt); + + clear = !i915_ttm_cpu_maps_iomem(bo->resource) && (!ttm || !ttm_tt_is_populated(ttm)); + if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC))) { + struct dma_fence *dep = prev_fence(bo, ctx); + + if (IS_ERR(dep)) { + i915_refct_sgt_put(dst_rsgt); + return PTR_ERR(dep); + } + + migration_fence = __i915_ttm_move(bo, clear, dst_mem, bo->ttm, + dst_rsgt, true, dep); + dma_fence_put(dep); + } + + /* We can possibly get an -ERESTARTSYS here */ + if (IS_ERR(migration_fence)) { + i915_refct_sgt_put(dst_rsgt); + return PTR_ERR(migration_fence); + } + + if (migration_fence) { + ret = ttm_bo_move_accel_cleanup(bo, migration_fence, evict, + true, dst_mem); + if (ret) { + dma_fence_wait(migration_fence, false); + ttm_bo_move_sync_cleanup(bo, dst_mem); + } + dma_fence_put(migration_fence); + } else { + ttm_bo_move_sync_cleanup(bo, dst_mem); + } + + i915_ttm_adjust_domains_after_move(obj); + i915_ttm_free_cached_io_rsgt(obj); + + if (i915_ttm_gtt_binds_lmem(dst_mem) || i915_ttm_cpu_maps_iomem(dst_mem)) { + obj->ttm.cached_io_rsgt = dst_rsgt; + obj->ttm.get_io_page.sg_pos = dst_rsgt->table.sgl; + obj->ttm.get_io_page.sg_idx = 0; + } else { + i915_refct_sgt_put(dst_rsgt); + } + + i915_ttm_adjust_lru(obj); + i915_ttm_adjust_gem_after_move(obj); + return 0; +} + +/** + * i915_gem_obj_copy_ttm - Copy the contents of one ttm-based gem object to + * another + * @dst: The destination object + * @src: The source object + * @allow_accel: Allow using the blitter. Otherwise TTM memcpy is used. + * @intr: Whether to perform waits interruptible: + * + * Note: The caller is responsible for assuring that the underlying + * TTM objects are populated if needed and locked. + * + * Return: Zero on success. Negative error code on error. If @intr == true, + * then it may return -ERESTARTSYS or -EINTR. + */ +int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst, + struct drm_i915_gem_object *src, + bool allow_accel, bool intr) +{ + struct ttm_buffer_object *dst_bo = i915_gem_to_ttm(dst); + struct ttm_buffer_object *src_bo = i915_gem_to_ttm(src); + struct ttm_operation_ctx ctx = { + .interruptible = intr, + }; + struct i915_refct_sgt *dst_rsgt; + struct dma_fence *copy_fence, *dep_fence; + struct i915_deps deps; + int ret, shared_err; + + assert_object_held(dst); + assert_object_held(src); + i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); + + /* + * We plan to add a shared fence only for the source. If that + * fails, we await all source fences before commencing + * the copy instead of only the exclusive. + */ + shared_err = dma_resv_reserve_shared(src_bo->base.resv, 1); + ret = i915_deps_add_resv(&deps, dst_bo->base.resv, true, false, &ctx); + if (!ret) + ret = i915_deps_add_resv(&deps, src_bo->base.resv, + !!shared_err, false, &ctx); + if (ret) + return ret; + + dep_fence = i915_deps_to_fence(&deps, &ctx); + if (IS_ERR(dep_fence)) + return PTR_ERR(dep_fence); + + dst_rsgt = i915_ttm_resource_get_st(dst, dst_bo->resource); + copy_fence = __i915_ttm_move(src_bo, false, dst_bo->resource, + dst_bo->ttm, dst_rsgt, allow_accel, + dep_fence); + + i915_refct_sgt_put(dst_rsgt); + if (IS_ERR_OR_NULL(copy_fence)) + return PTR_ERR_OR_ZERO(copy_fence); + + dma_resv_add_excl_fence(dst_bo->base.resv, copy_fence); + + /* If we failed to reserve a shared slot, add an exclusive fence */ + if (shared_err) + dma_resv_add_excl_fence(src_bo->base.resv, copy_fence); + else + dma_resv_add_shared_fence(src_bo->base.resv, copy_fence); + + dma_fence_put(copy_fence); + + return 0; +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h new file mode 100644 index 000000000000..d2e7f149e05c --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ +#ifndef _I915_GEM_TTM_MOVE_H_ +#define _I915_GEM_TTM_MOVE_H_ + +#include <linux/types.h> + +#include "i915_selftest.h" + +struct ttm_buffer_object; +struct ttm_operation_ctx; +struct ttm_place; +struct ttm_resource; +struct ttm_tt; + +struct drm_i915_gem_object; +struct i915_refct_sgt; + +int i915_ttm_move_notify(struct ttm_buffer_object *bo); + +I915_SELFTEST_DECLARE(void i915_ttm_migrate_set_failure_modes(bool gpu_migration, + bool work_allocation)); + +int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst, + struct drm_i915_gem_object *src, + bool allow_accel, bool intr); + +/* Internal I915 TTM declarations and definitions below. */ + +int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, + struct ttm_operation_ctx *ctx, + struct ttm_resource *dst_mem, + struct ttm_place *hop); + +void i915_ttm_adjust_domains_after_move(struct drm_i915_gem_object *obj); + +void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj); + +#endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c index 3b6d14b5c604..9aad84059d56 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c @@ -12,6 +12,7 @@ #include "gem/i915_gem_region.h" #include "gem/i915_gem_ttm.h" +#include "gem/i915_gem_ttm_move.h" #include "gem/i915_gem_ttm_pm.h" /** @@ -79,6 +80,7 @@ static int i915_ttm_backup(struct i915_gem_apply_to_region *apply, err = i915_gem_obj_copy_ttm(backup, obj, pm_apply->allow_gpu, false); GEM_WARN_ON(err); + ttm_bo_wait_ctx(backup_bo, &ctx); obj->ttm.backup = backup; return 0; @@ -169,6 +171,7 @@ static int i915_ttm_restore(struct i915_gem_apply_to_region *apply, err = i915_gem_obj_copy_ttm(obj, backup, pm_apply->allow_gpu, false); GEM_WARN_ON(err); + ttm_bo_wait_ctx(backup_bo, &ctx); obj->ttm.backup = NULL; err = 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index cd149aa99364..dab3d30c09a0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -254,6 +254,6 @@ int i915_gem_object_wait_migration(struct drm_i915_gem_object *obj, unsigned int flags) { might_sleep(); - /* NOP for now. */ - return 0; + + return i915_gem_object_wait_moving_fence(obj, !!(flags & I915_WAIT_INTERRUPTIBLE)); } diff --git a/drivers/gpu/drm/i915/gem/i915_gemfs.c b/drivers/gpu/drm/i915/gem/i915_gemfs.c index dbdbdc344d87..7271fbf813fa 100644 --- a/drivers/gpu/drm/i915/gem/i915_gemfs.c +++ b/drivers/gpu/drm/i915/gem/i915_gemfs.c @@ -12,6 +12,7 @@ int i915_gemfs_init(struct drm_i915_private *i915) { + char huge_opt[] = "huge=within_size"; /* r/w */ struct file_system_type *type; struct vfsmount *gemfs; char *opts; @@ -31,10 +32,8 @@ int i915_gemfs_init(struct drm_i915_private *i915) */ opts = NULL; - if (intel_vtd_active()) { + if (intel_vtd_active(i915)) { if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { - static char huge_opt[] = "huge=within_size"; /* r/w */ - opts = huge_opt; drm_info(&i915->drm, "Transparent Hugepage mode '%s'\n", diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index b2003133deaf..c69c7d45aabc 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -22,6 +22,22 @@ #include "selftests/mock_region.h" #include "selftests/i915_random.h" +static struct i915_gem_context *hugepage_ctx(struct drm_i915_private *i915, + struct file *file) +{ + struct i915_gem_context *ctx = live_context(i915, file); + struct i915_address_space *vm; + + if (IS_ERR(ctx)) + return ctx; + + vm = ctx->vm; + if (vm) + WRITE_ONCE(vm->scrub_64K, true); + + return ctx; +} + static const unsigned int page_sizes[] = { I915_GTT_PAGE_SIZE_2M, I915_GTT_PAGE_SIZE_64K, @@ -552,7 +568,7 @@ out_unpin: out_put: i915_gem_object_put(obj); out_region: - intel_memory_region_put(mem); + intel_memory_region_destroy(mem); return err; } @@ -959,6 +975,8 @@ static int igt_mock_ppgtt_64K(void *arg) __i915_gem_object_put_pages(obj); i915_gem_object_unlock(obj); i915_gem_object_put(obj); + + i915_gem_drain_freed_objects(i915); } } @@ -1080,10 +1098,6 @@ static int __igt_write_huge(struct intel_context *ce, if (IS_ERR(vma)) return PTR_ERR(vma); - err = i915_vma_unbind(vma); - if (err) - return err; - err = i915_vma_pin(vma, size, 0, flags | offset); if (err) { /* @@ -1117,7 +1131,7 @@ out_vma_unpin: return err; } -static int igt_write_huge(struct i915_gem_context *ctx, +static int igt_write_huge(struct drm_i915_private *i915, struct drm_i915_gem_object *obj) { struct i915_gem_engines *engines; @@ -1127,6 +1141,8 @@ static int igt_write_huge(struct i915_gem_context *ctx, IGT_TIMEOUT(end_time); unsigned int max_page_size; unsigned int count; + struct i915_gem_context *ctx; + struct file *file; u64 max; u64 num; u64 size; @@ -1134,6 +1150,16 @@ static int igt_write_huge(struct i915_gem_context *ctx, int i, n; int err = 0; + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + ctx = hugepage_ctx(i915, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out; + } + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); size = obj->base.size; @@ -1153,7 +1179,7 @@ static int igt_write_huge(struct i915_gem_context *ctx, } i915_gem_context_unlock_engines(ctx); if (!n) - return 0; + goto out; /* * To keep things interesting when alternating between engines in our @@ -1215,6 +1241,8 @@ static int igt_write_huge(struct i915_gem_context *ctx, kfree(order); +out: + fput(file); return err; } @@ -1277,8 +1305,7 @@ static u32 igt_random_size(struct rnd_state *prng, static int igt_ppgtt_smoke_huge(void *arg) { - struct i915_gem_context *ctx = arg; - struct drm_i915_private *i915 = ctx->i915; + struct drm_i915_private *i915 = arg; struct drm_i915_gem_object *obj; I915_RND_STATE(prng); struct { @@ -1302,6 +1329,7 @@ static int igt_ppgtt_smoke_huge(void *arg) u32 min = backends[i].min; u32 max = backends[i].max; u32 size = max; + try_again: size = igt_random_size(&prng, min, rounddown_pow_of_two(size)); @@ -1336,7 +1364,7 @@ try_again: goto out_unpin; } - err = igt_write_huge(ctx, obj); + err = igt_write_huge(i915, obj); if (err) { pr_err("%s write-huge failed with size=%u, i=%d\n", __func__, size, i); @@ -1363,8 +1391,7 @@ out_put: static int igt_ppgtt_sanity_check(void *arg) { - struct i915_gem_context *ctx = arg; - struct drm_i915_private *i915 = ctx->i915; + struct drm_i915_private *i915 = arg; unsigned int supported = INTEL_INFO(i915)->page_sizes; struct { igt_create_fn fn; @@ -1431,7 +1458,7 @@ static int igt_ppgtt_sanity_check(void *arg) if (pages) obj->mm.page_sizes.sg = pages; - err = igt_write_huge(ctx, obj); + err = igt_write_huge(i915, obj); i915_gem_object_lock(obj, NULL); i915_gem_object_unpin_pages(obj); @@ -1458,15 +1485,27 @@ out: static int igt_tmpfs_fallback(void *arg) { - struct i915_gem_context *ctx = arg; - struct drm_i915_private *i915 = ctx->i915; + struct drm_i915_private *i915 = arg; + struct i915_address_space *vm; + struct i915_gem_context *ctx; struct vfsmount *gemfs = i915->mm.gemfs; - struct i915_address_space *vm = i915_gem_context_get_eb_vm(ctx); struct drm_i915_gem_object *obj; struct i915_vma *vma; + struct file *file; u32 *vaddr; int err = 0; + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + ctx = hugepage_ctx(i915, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out; + } + vm = i915_gem_context_get_eb_vm(ctx); + /* * Make sure that we don't burst into a ball of flames upon falling back * to tmpfs, which we rely on if on the off-chance we encouter a failure @@ -1510,33 +1549,47 @@ out_restore: i915->mm.gemfs = gemfs; i915_vm_put(vm); +out: + fput(file); return err; } static int igt_shrink_thp(void *arg) { - struct i915_gem_context *ctx = arg; - struct drm_i915_private *i915 = ctx->i915; - struct i915_address_space *vm = i915_gem_context_get_eb_vm(ctx); + struct drm_i915_private *i915 = arg; + struct i915_address_space *vm; + struct i915_gem_context *ctx; struct drm_i915_gem_object *obj; struct i915_gem_engines_iter it; struct intel_context *ce; struct i915_vma *vma; + struct file *file; unsigned int flags = PIN_USER; unsigned int n; bool should_swap; - int err = 0; + int err; + + if (!igt_can_allocate_thp(i915)) { + pr_info("missing THP support, skipping\n"); + return 0; + } + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + ctx = hugepage_ctx(i915, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out; + } + vm = i915_gem_context_get_eb_vm(ctx); /* * Sanity check shrinking huge-paged object -- make sure nothing blows * up. */ - if (!igt_can_allocate_thp(i915)) { - pr_info("missing THP support, skipping\n"); - goto out_vm; - } - obj = i915_gem_object_create_shmem(i915, SZ_2M); if (IS_ERR(obj)) { err = PTR_ERR(obj); @@ -1626,7 +1679,8 @@ out_put: i915_gem_object_put(obj); out_vm: i915_vm_put(vm); - +out: + fput(file); return err; } @@ -1687,10 +1741,6 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_ppgtt_smoke_huge), SUBTEST(igt_ppgtt_sanity_check), }; - struct i915_gem_context *ctx; - struct i915_address_space *vm; - struct file *file; - int err; if (!HAS_PPGTT(i915)) { pr_info("PPGTT not supported, skipping live-selftests\n"); @@ -1700,23 +1750,5 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) if (intel_gt_is_wedged(&i915->gt)) return 0; - file = mock_file(i915); - if (IS_ERR(file)) - return PTR_ERR(file); - - ctx = live_context(i915, file); - if (IS_ERR(ctx)) { - err = PTR_ERR(ctx); - goto out_file; - } - - vm = ctx->vm; - if (vm) - WRITE_ONCE(vm->scrub_64K, true); - - err = i915_subtests(tests, ctx); - -out_file: - fput(file); - return err; + return i915_live_subtests(tests, i915); } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index b32f7fed2d9c..21b71568cd5f 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -88,7 +88,7 @@ static int live_nop_switch(void *arg) rq = i915_request_get(this); i915_request_add(this); } - if (i915_request_wait(rq, 0, HZ) < 0) { + if (i915_request_wait(rq, 0, 10 * HZ) < 0) { pr_err("Failed to populated %d contexts\n", nctx); intel_gt_set_wedged(&i915->gt); i915_request_put(rq); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c index 4a6bb64c3a35..3cc74b0fed06 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c @@ -102,7 +102,7 @@ static int igt_dmabuf_import_same_driver_lmem(void *arg) obj = __i915_gem_object_create_user(i915, PAGE_SIZE, &lmem, 1); if (IS_ERR(obj)) { pr_err("__i915_gem_object_create_user failed with err=%ld\n", - PTR_ERR(dmabuf)); + PTR_ERR(obj)); err = PTR_ERR(obj); goto out_ret; } @@ -158,7 +158,7 @@ static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915, regions, num_regions); if (IS_ERR(obj)) { pr_err("__i915_gem_object_create_user failed with err=%ld\n", - PTR_ERR(dmabuf)); + PTR_ERR(obj)); err = PTR_ERR(obj); goto out_ret; } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c index 28a700f08b49..4b8e6b098659 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c @@ -4,6 +4,7 @@ */ #include "gt/intel_migrate.h" +#include "gem/i915_gem_ttm_move.h" static int igt_fill_check_buffer(struct drm_i915_gem_object *obj, bool fill) @@ -227,13 +228,34 @@ out_put: return err; } +static int igt_lmem_pages_failsafe_migrate(void *arg) +{ + int fail_gpu, fail_alloc, ret; + + for (fail_gpu = 0; fail_gpu < 2; ++fail_gpu) { + for (fail_alloc = 0; fail_alloc < 2; ++fail_alloc) { + pr_info("Simulated failure modes: gpu: %d, alloc: %d\n", + fail_gpu, fail_alloc); + i915_ttm_migrate_set_failure_modes(fail_gpu, + fail_alloc); + ret = igt_lmem_pages_migrate(arg); + if (ret) + goto out_err; + } + } + +out_err: + i915_ttm_migrate_set_failure_modes(false, false); + return ret; +} + int i915_gem_migrate_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_smem_create_migrate), SUBTEST(igt_lmem_create_migrate), SUBTEST(igt_same_create_migrate), - SUBTEST(igt_lmem_pages_migrate), + SUBTEST(igt_lmem_pages_failsafe_migrate), }; if (!HAS_LMEM(i915)) diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c index 890191f286e3..4a166d25fe60 100644 --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c @@ -185,7 +185,6 @@ static void gen6_alloc_va_range(struct i915_address_space *vm, pt = stash->pt[0]; __i915_gem_object_pin_pages(pt->base); - i915_gem_object_make_unshrinkable(pt->base); fill32_px(pt, vm->scratch[0]->encode); @@ -262,13 +261,10 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm) { struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); - __i915_vma_put(ppgtt->vma); - gen6_ppgtt_free_pd(ppgtt); free_scratch(vm); mutex_destroy(&ppgtt->flush); - mutex_destroy(&ppgtt->pin_mutex); free_pd(&ppgtt->base.vm, ppgtt->base.pd); } @@ -331,37 +327,6 @@ static const struct i915_vma_ops pd_vma_ops = { .unbind_vma = pd_vma_unbind, }; -static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size) -{ - struct i915_ggtt *ggtt = ppgtt->base.vm.gt->ggtt; - struct i915_vma *vma; - - GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); - GEM_BUG_ON(size > ggtt->vm.total); - - vma = i915_vma_alloc(); - if (!vma) - return ERR_PTR(-ENOMEM); - - i915_active_init(&vma->active, NULL, NULL, 0); - - kref_init(&vma->ref); - mutex_init(&vma->pages_mutex); - vma->vm = i915_vm_get(&ggtt->vm); - vma->ops = &pd_vma_ops; - vma->private = ppgtt; - - vma->size = size; - vma->fence_size = size; - atomic_set(&vma->flags, I915_VMA_GGTT); - vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */ - - INIT_LIST_HEAD(&vma->obj_link); - INIT_LIST_HEAD(&vma->closed_link); - - return vma; -} - int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww) { struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); @@ -378,42 +343,92 @@ int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww) if (atomic_add_unless(&ppgtt->pin_count, 1, 0)) return 0; - if (mutex_lock_interruptible(&ppgtt->pin_mutex)) - return -EINTR; + /* grab the ppgtt resv to pin the object */ + err = i915_vm_lock_objects(&ppgtt->base.vm, ww); + if (err) + return err; /* * PPGTT PDEs reside in the GGTT and consists of 512 entries. The * allocator works in address space sizes, so it's multiplied by page * size. We allocate at the top of the GTT to avoid fragmentation. */ - err = 0; - if (!atomic_read(&ppgtt->pin_count)) + if (!atomic_read(&ppgtt->pin_count)) { err = i915_ggtt_pin(ppgtt->vma, ww, GEN6_PD_ALIGN, PIN_HIGH); + + GEM_BUG_ON(ppgtt->vma->fence); + clear_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(ppgtt->vma)); + } if (!err) atomic_inc(&ppgtt->pin_count); - mutex_unlock(&ppgtt->pin_mutex); return err; } -void gen6_ppgtt_unpin(struct i915_ppgtt *base) +static int pd_dummy_obj_get_pages(struct drm_i915_gem_object *obj) { - struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); + obj->mm.pages = ZERO_SIZE_PTR; + return 0; +} - GEM_BUG_ON(!atomic_read(&ppgtt->pin_count)); - if (atomic_dec_and_test(&ppgtt->pin_count)) - i915_vma_unpin(ppgtt->vma); +static void pd_dummy_obj_put_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ } -void gen6_ppgtt_unpin_all(struct i915_ppgtt *base) +static const struct drm_i915_gem_object_ops pd_dummy_obj_ops = { + .name = "pd_dummy_obj", + .get_pages = pd_dummy_obj_get_pages, + .put_pages = pd_dummy_obj_put_pages, +}; + +static struct i915_page_directory * +gen6_alloc_top_pd(struct gen6_ppgtt *ppgtt) { - struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); + struct i915_ggtt * const ggtt = ppgtt->base.vm.gt->ggtt; + struct i915_page_directory *pd; + int err; - if (!atomic_read(&ppgtt->pin_count)) - return; + pd = __alloc_pd(I915_PDES); + if (unlikely(!pd)) + return ERR_PTR(-ENOMEM); - i915_vma_unpin(ppgtt->vma); - atomic_set(&ppgtt->pin_count, 0); + pd->pt.base = __i915_gem_object_create_internal(ppgtt->base.vm.gt->i915, + &pd_dummy_obj_ops, + I915_PDES * SZ_4K); + if (IS_ERR(pd->pt.base)) { + err = PTR_ERR(pd->pt.base); + pd->pt.base = NULL; + goto err_pd; + } + + pd->pt.base->base.resv = i915_vm_resv_get(&ppgtt->base.vm); + pd->pt.base->shares_resv_from = &ppgtt->base.vm; + + ppgtt->vma = i915_vma_instance(pd->pt.base, &ggtt->vm, NULL); + if (IS_ERR(ppgtt->vma)) { + err = PTR_ERR(ppgtt->vma); + ppgtt->vma = NULL; + goto err_pd; + } + + /* The dummy object we create is special, override ops.. */ + ppgtt->vma->ops = &pd_vma_ops; + ppgtt->vma->private = ppgtt; + return pd; + +err_pd: + free_pd(&ppgtt->base.vm, pd); + return ERR_PTR(err); +} + +void gen6_ppgtt_unpin(struct i915_ppgtt *base) +{ + struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); + + GEM_BUG_ON(!atomic_read(&ppgtt->pin_count)); + if (atomic_dec_and_test(&ppgtt->pin_count)) + i915_vma_unpin(ppgtt->vma); } struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt) @@ -427,7 +442,6 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt) return ERR_PTR(-ENOMEM); mutex_init(&ppgtt->flush); - mutex_init(&ppgtt->pin_mutex); ppgtt_init(&ppgtt->base, gt, 0); ppgtt->base.vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen6_pte_t)); @@ -442,19 +456,13 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt) ppgtt->base.vm.alloc_pt_dma = alloc_pt_dma; ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode; - ppgtt->base.pd = __alloc_pd(I915_PDES); - if (!ppgtt->base.pd) { - err = -ENOMEM; - goto err_free; - } - err = gen6_ppgtt_init_scratch(ppgtt); if (err) - goto err_pd; + goto err_free; - ppgtt->vma = pd_vma_create(ppgtt, GEN6_PD_SIZE); - if (IS_ERR(ppgtt->vma)) { - err = PTR_ERR(ppgtt->vma); + ppgtt->base.pd = gen6_alloc_top_pd(ppgtt); + if (IS_ERR(ppgtt->base.pd)) { + err = PTR_ERR(ppgtt->base.pd); goto err_scratch; } @@ -462,10 +470,7 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt) err_scratch: free_scratch(&ppgtt->base.vm); -err_pd: - free_pd(&ppgtt->base.vm, ppgtt->base.pd); err_free: - mutex_destroy(&ppgtt->pin_mutex); kfree(ppgtt); return ERR_PTR(err); } diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.h b/drivers/gpu/drm/i915/gt/gen6_ppgtt.h index 6a61a5c3a85a..5e5cf2ec3309 100644 --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.h +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.h @@ -19,7 +19,6 @@ struct gen6_ppgtt { u32 pp_dir; atomic_t pin_count; - struct mutex pin_mutex; bool scan_for_unused_pt; }; @@ -71,7 +70,6 @@ static inline struct gen6_ppgtt *to_gen6_ppgtt(struct i915_ppgtt *base) int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww); void gen6_ppgtt_unpin(struct i915_ppgtt *base); -void gen6_ppgtt_unpin_all(struct i915_ppgtt *base); void gen6_ppgtt_enable(struct intel_gt *gt); void gen7_ppgtt_enable(struct intel_gt *gt); struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt); diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index 461844dffd7e..e320610dd0b8 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -42,7 +42,7 @@ int gen8_emit_flush_rcs(struct i915_request *rq, u32 mode) vf_flush_wa = true; /* WaForGAMHang:kbl */ - if (IS_KBL_GT_STEP(rq->engine->i915, 0, STEP_C0)) + if (IS_KBL_GRAPHICS_STEP(rq->engine->i915, 0, STEP_C0)) dc_flush_wa = true; } diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 037a9a6e4889..95c02096a61b 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -18,7 +18,7 @@ static u64 gen8_pde_encode(const dma_addr_t addr, const enum i915_cache_level level) { - u64 pde = addr | _PAGE_PRESENT | _PAGE_RW; + u64 pde = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; if (level != I915_CACHE_NONE) pde |= PPAT_CACHED_PDE; @@ -32,10 +32,10 @@ static u64 gen8_pte_encode(dma_addr_t addr, enum i915_cache_level level, u32 flags) { - gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW; + gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; if (unlikely(flags & PTE_READ_ONLY)) - pte &= ~_PAGE_RW; + pte &= ~GEN8_PAGE_RW; if (flags & PTE_LM) pte |= GEN12_PPGTT_PTE_LM; @@ -301,7 +301,6 @@ static void __gen8_ppgtt_alloc(struct i915_address_space * const vm, pt = stash->pt[!!lvl]; __i915_gem_object_pin_pages(pt->base); - i915_gem_object_make_unshrinkable(pt->base); fill_px(pt, vm->scratch[lvl]->encode); @@ -652,7 +651,7 @@ static int gen8_init_scratch(struct i915_address_space *vm) vm->scratch[0]->encode = gen8_pte_encode(px_dma(vm->scratch[0]), - I915_CACHE_LLC, pte_flags); + I915_CACHE_NONE, pte_flags); for (i = 1; i <= vm->top; i++) { struct drm_i915_gem_object *obj; @@ -668,7 +667,7 @@ static int gen8_init_scratch(struct i915_address_space *vm) } fill_px(obj, vm->scratch[i - 1]->encode); - obj->encode = gen8_pde_encode(px_dma(obj), I915_CACHE_LLC); + obj->encode = gen8_pde_encode(px_dma(obj), I915_CACHE_NONE); vm->scratch[i] = obj; } diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 5634d14052bc..ba083d800a08 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -219,7 +219,7 @@ int __intel_context_do_pin_ww(struct intel_context *ce, */ err = i915_gem_object_lock(ce->timeline->hwsp_ggtt->obj, ww); - if (!err && ce->ring->vma->obj) + if (!err) err = i915_gem_object_lock(ce->ring->vma->obj, ww); if (!err && ce->state) err = i915_gem_object_lock(ce->state->obj, ww); @@ -228,17 +228,17 @@ int __intel_context_do_pin_ww(struct intel_context *ce, if (err) return err; - err = i915_active_acquire(&ce->active); + err = ce->ops->pre_pin(ce, ww, &vaddr); if (err) goto err_ctx_unpin; - err = ce->ops->pre_pin(ce, ww, &vaddr); + err = i915_active_acquire(&ce->active); if (err) - goto err_release; + goto err_post_unpin; err = mutex_lock_interruptible(&ce->pin_mutex); if (err) - goto err_post_unpin; + goto err_release; intel_engine_pm_might_get(ce->engine); @@ -273,11 +273,11 @@ int __intel_context_do_pin_ww(struct intel_context *ce, err_unlock: mutex_unlock(&ce->pin_mutex); +err_release: + i915_active_release(&ce->active); err_post_unpin: if (!handoff) ce->ops->post_unpin(ce); -err_release: - i915_active_release(&ce->active); err_ctx_unpin: intel_context_post_unpin(ce); @@ -364,7 +364,7 @@ static int __intel_context_active(struct i915_active *active) return 0; } -static int __i915_sw_fence_call +static int sw_fence_dummy_notify(struct i915_sw_fence *sf, enum i915_sw_fence_notify state) { diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index ff6753ccb129..352254e001b4 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -325,6 +325,38 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id, engine->id = id; engine->legacy_idx = INVALID_ENGINE; engine->mask = BIT(id); + if (GRAPHICS_VER(gt->i915) >= 11) { + static const u32 engine_reset_domains[] = { + [RCS0] = GEN11_GRDOM_RENDER, + [BCS0] = GEN11_GRDOM_BLT, + [VCS0] = GEN11_GRDOM_MEDIA, + [VCS1] = GEN11_GRDOM_MEDIA2, + [VCS2] = GEN11_GRDOM_MEDIA3, + [VCS3] = GEN11_GRDOM_MEDIA4, + [VCS4] = GEN11_GRDOM_MEDIA5, + [VCS5] = GEN11_GRDOM_MEDIA6, + [VCS6] = GEN11_GRDOM_MEDIA7, + [VCS7] = GEN11_GRDOM_MEDIA8, + [VECS0] = GEN11_GRDOM_VECS, + [VECS1] = GEN11_GRDOM_VECS2, + [VECS2] = GEN11_GRDOM_VECS3, + [VECS3] = GEN11_GRDOM_VECS4, + }; + GEM_BUG_ON(id >= ARRAY_SIZE(engine_reset_domains) || + !engine_reset_domains[id]); + engine->reset_domain = engine_reset_domains[id]; + } else { + static const u32 engine_reset_domains[] = { + [RCS0] = GEN6_GRDOM_RENDER, + [BCS0] = GEN6_GRDOM_BLT, + [VCS0] = GEN6_GRDOM_MEDIA, + [VCS1] = GEN8_GRDOM_MEDIA2, + [VECS0] = GEN6_GRDOM_VECS, + }; + GEM_BUG_ON(id >= ARRAY_SIZE(engine_reset_domains) || + !engine_reset_domains[id]); + engine->reset_domain = engine_reset_domains[id]; + } engine->i915 = i915; engine->gt = gt; engine->uncore = gt->uncore; @@ -363,7 +395,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id, DRIVER_CAPS(i915)->has_logical_contexts = true; ewma__engine_latency_init(&engine->latency); - seqcount_init(&engine->stats.lock); + seqcount_init(&engine->stats.execlists.lock); ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier); @@ -1676,14 +1708,18 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, static void print_request_ring(struct drm_printer *m, struct i915_request *rq) { + struct i915_vma_snapshot *vsnap = &rq->batch_snapshot; void *ring; int size; + if (!i915_vma_snapshot_present(vsnap)) + vsnap = NULL; + drm_printf(m, "[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]:\n", rq->head, rq->postfix, rq->tail, - rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u, - rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u); + vsnap ? upper_32_bits(vsnap->gtt_offset) : ~0u, + vsnap ? lower_32_bits(vsnap->gtt_offset) : ~0u); size = rq->tail - rq->head; if (rq->tail < rq->head) @@ -1915,22 +1951,6 @@ void intel_engine_dump(struct intel_engine_cs *engine, intel_engine_print_breadcrumbs(engine, m); } -static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine, - ktime_t *now) -{ - ktime_t total = engine->stats.total; - - /* - * If the engine is executing something at the moment - * add it to the total. - */ - *now = ktime_get(); - if (READ_ONCE(engine->stats.active)) - total = ktime_add(total, ktime_sub(*now, engine->stats.start)); - - return total; -} - /** * intel_engine_get_busy_time() - Return current accumulated engine busyness * @engine: engine to report on @@ -1940,15 +1960,7 @@ static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine, */ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t *now) { - unsigned int seq; - ktime_t total; - - do { - seq = read_seqcount_begin(&engine->stats.lock); - total = __intel_engine_get_busy_time(engine, now); - } while (read_seqcount_retry(&engine->stats.lock, seq)); - - return total; + return engine->busyness(engine, now); } struct intel_context * diff --git a/drivers/gpu/drm/i915/gt/intel_engine_stats.h b/drivers/gpu/drm/i915/gt/intel_engine_stats.h index 24fbdd94351a..8e762d683e50 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_stats.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_stats.h @@ -15,45 +15,46 @@ static inline void intel_engine_context_in(struct intel_engine_cs *engine) { + struct intel_engine_execlists_stats *stats = &engine->stats.execlists; unsigned long flags; - if (engine->stats.active) { - engine->stats.active++; + if (stats->active) { + stats->active++; return; } /* The writer is serialised; but the pmu reader may be from hardirq */ local_irq_save(flags); - write_seqcount_begin(&engine->stats.lock); + write_seqcount_begin(&stats->lock); - engine->stats.start = ktime_get(); - engine->stats.active++; + stats->start = ktime_get(); + stats->active++; - write_seqcount_end(&engine->stats.lock); + write_seqcount_end(&stats->lock); local_irq_restore(flags); - GEM_BUG_ON(!engine->stats.active); + GEM_BUG_ON(!stats->active); } static inline void intel_engine_context_out(struct intel_engine_cs *engine) { + struct intel_engine_execlists_stats *stats = &engine->stats.execlists; unsigned long flags; - GEM_BUG_ON(!engine->stats.active); - if (engine->stats.active > 1) { - engine->stats.active--; + GEM_BUG_ON(!stats->active); + if (stats->active > 1) { + stats->active--; return; } local_irq_save(flags); - write_seqcount_begin(&engine->stats.lock); + write_seqcount_begin(&stats->lock); - engine->stats.active--; - engine->stats.total = - ktime_add(engine->stats.total, - ktime_sub(ktime_get(), engine->stats.start)); + stats->active--; + stats->total = ktime_add(stats->total, + ktime_sub(ktime_get(), stats->start)); - write_seqcount_end(&engine->stats.lock); + write_seqcount_end(&stats->lock); local_irq_restore(flags); } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index e0f773585c29..36365bdbe1ee 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -257,6 +257,55 @@ struct intel_engine_execlists { #define INTEL_ENGINE_CS_MAX_NAME 8 +struct intel_engine_execlists_stats { + /** + * @active: Number of contexts currently scheduled in. + */ + unsigned int active; + + /** + * @lock: Lock protecting the below fields. + */ + seqcount_t lock; + + /** + * @total: Total time this engine was busy. + * + * Accumulated time not counting the most recent block in cases where + * engine is currently busy (active > 0). + */ + ktime_t total; + + /** + * @start: Timestamp of the last idle to active transition. + * + * Idle is defined as active == 0, active is active > 0. + */ + ktime_t start; +}; + +struct intel_engine_guc_stats { + /** + * @running: Active state of the engine when busyness was last sampled. + */ + bool running; + + /** + * @prev_total: Previous value of total runtime clock cycles. + */ + u32 prev_total; + + /** + * @total_gt_clks: Total gt clock cycles this engine was busy. + */ + u64 total_gt_clks; + + /** + * @start_gt_clk: GT clock time of last idle to active transition. + */ + u64 start_gt_clk; +}; + struct intel_engine_cs { struct drm_i915_private *i915; struct intel_gt *gt; @@ -269,6 +318,7 @@ struct intel_engine_cs { unsigned int guc_id; intel_engine_mask_t mask; + u32 reset_domain; /** * @logical_mask: logical mask of engine, reported to user space via * query IOCTL and used to communicate with the GuC in logical space. @@ -439,6 +489,12 @@ struct intel_engine_cs { void (*add_active_request)(struct i915_request *rq); void (*remove_active_request)(struct i915_request *rq); + /* + * Get engine busyness and the time at which the busyness was sampled. + */ + ktime_t (*busyness)(struct intel_engine_cs *engine, + ktime_t *now); + struct intel_engine_execlists execlists; /* @@ -488,30 +544,10 @@ struct intel_engine_cs { u32 (*get_cmd_length_mask)(u32 cmd_header); struct { - /** - * @active: Number of contexts currently scheduled in. - */ - unsigned int active; - - /** - * @lock: Lock protecting the below fields. - */ - seqcount_t lock; - - /** - * @total: Total time this engine was busy. - * - * Accumulated time not counting the most recent block in cases - * where engine is currently busy (active > 0). - */ - ktime_t total; - - /** - * @start: Timestamp of the last idle to active transition. - * - * Idle is defined as active == 0, active is active > 0. - */ - ktime_t start; + union { + struct intel_engine_execlists_stats execlists; + struct intel_engine_guc_stats guc; + }; /** * @rps: Utilisation at last RPS sampling. diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index bedb80057046..a69df5e9e77a 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -2186,7 +2186,8 @@ struct execlists_capture { static void execlists_capture_work(struct work_struct *work) { struct execlists_capture *cap = container_of(work, typeof(*cap), work); - const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN; + const gfp_t gfp = __GFP_KSWAPD_RECLAIM | __GFP_RETRY_MAYFAIL | + __GFP_NOWARN; struct intel_engine_cs *engine = cap->rq->engine; struct intel_gt_coredump *gt = cap->error->gt; struct intel_engine_capture_vma *vma; @@ -3293,6 +3294,38 @@ static void execlists_release(struct intel_engine_cs *engine) lrc_fini_wa_ctx(engine); } +static ktime_t __execlists_engine_busyness(struct intel_engine_cs *engine, + ktime_t *now) +{ + struct intel_engine_execlists_stats *stats = &engine->stats.execlists; + ktime_t total = stats->total; + + /* + * If the engine is executing something at the moment + * add it to the total. + */ + *now = ktime_get(); + if (READ_ONCE(stats->active)) + total = ktime_add(total, ktime_sub(*now, stats->start)); + + return total; +} + +static ktime_t execlists_engine_busyness(struct intel_engine_cs *engine, + ktime_t *now) +{ + struct intel_engine_execlists_stats *stats = &engine->stats.execlists; + unsigned int seq; + ktime_t total; + + do { + seq = read_seqcount_begin(&stats->lock); + total = __execlists_engine_busyness(engine, now); + } while (read_seqcount_retry(&stats->lock, seq)); + + return total; +} + static void logical_ring_default_vfuncs(struct intel_engine_cs *engine) { @@ -3349,6 +3382,8 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) engine->emit_bb_start = gen8_emit_bb_start; else engine->emit_bb_start = gen8_emit_bb_start_noarb; + + engine->busyness = execlists_engine_busyness; } static void logical_ring_default_irqs(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 555111c3bee5..cbc6d2b1fd9e 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -106,7 +106,7 @@ static bool needs_idle_maps(struct drm_i915_private *i915) * Query intel_iommu to see if we need the workaround. Presumably that * was loaded first. */ - if (!intel_vtd_active()) + if (!intel_vtd_active(i915)) return false; if (GRAPHICS_VER(i915) == 5 && IS_MOBILE(i915)) @@ -209,7 +209,7 @@ u64 gen8_ggtt_pte_encode(dma_addr_t addr, enum i915_cache_level level, u32 flags) { - gen8_pte_t pte = addr | _PAGE_PRESENT; + gen8_pte_t pte = addr | GEN8_PAGE_PRESENT; if (flags & PTE_LM) pte |= GEN12_GGTT_PTE_LM; @@ -1233,7 +1233,7 @@ int i915_ggtt_probe_hw(struct drm_i915_private *i915) if (ret) return ret; - if (intel_vtd_active()) + if (intel_vtd_active(i915)) drm_info(&i915->drm, "VT-d active for gfx access\n"); return 0; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 524eaf678790..c0fa41e4c803 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -86,6 +86,7 @@ static int __gt_unpark(struct intel_wakeref *wf) intel_rc6_unpark(>->rc6); intel_rps_unpark(>->rps); i915_pmu_gt_unparked(i915); + intel_guc_busyness_unpark(gt); intel_gt_unpark_requests(gt); runtime_begin(gt); @@ -104,6 +105,7 @@ static int __gt_park(struct intel_wakeref *wf) runtime_end(gt); intel_gt_park_requests(gt); + intel_guc_busyness_park(gt); i915_vma_parked(gt); i915_pmu_gt_parked(i915); intel_rps_park(>->rps); @@ -301,7 +303,7 @@ void intel_gt_suspend_prepare(struct intel_gt *gt) user_forcewake(gt, true); wait_for_suspend(gt); - intel_pxp_suspend(>->pxp, false); + intel_pxp_suspend_prepare(>->pxp); } static suspend_state_t pm_suspend_target(void) @@ -326,6 +328,7 @@ void intel_gt_suspend_late(struct intel_gt *gt) GEM_BUG_ON(gt->awake); intel_uc_suspend(>->uc); + intel_pxp_suspend(>->pxp); /* * On disabling the device, we want to turn off HW access to memory @@ -353,7 +356,7 @@ void intel_gt_suspend_late(struct intel_gt *gt) void intel_gt_runtime_suspend(struct intel_gt *gt) { - intel_pxp_suspend(>->pxp, true); + intel_pxp_runtime_suspend(>->pxp); intel_uc_runtime_suspend(>->uc); GT_TRACE(gt, "\n"); @@ -371,7 +374,7 @@ int intel_gt_runtime_resume(struct intel_gt *gt) if (ret) return ret; - intel_pxp_resume(>->pxp); + intel_pxp_runtime_resume(>->pxp); return 0; } diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index 67d14afa6623..0dd254cb1f69 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -7,6 +7,8 @@ #include <linux/fault-inject.h> +#include <drm/drm_cache.h> + #include "gem/i915_gem_lmem.h" #include "i915_trace.h" #include "intel_gt.h" @@ -273,6 +275,7 @@ static void poison_scratch_page(struct drm_i915_gem_object *scratch) val = POISON_FREE; memset(vaddr, val, scratch->base.size); + drm_clflush_virt_range(vaddr, scratch->base.size); } int setup_scratch_page(struct i915_address_space *vm) diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index dfeaef680aac..51afe66d00f2 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -135,6 +135,9 @@ typedef u64 gen8_pte_t; #define GEN8_PPAT_ELLC_OVERRIDE (0<<2) #define GEN8_PPAT(i, x) ((u64)(x) << ((i) * 8)) +#define GEN8_PAGE_PRESENT BIT_ULL(0) +#define GEN8_PAGE_RW BIT_ULL(1) + #define GEN8_PDE_IPS_64K BIT(11) #define GEN8_PDE_PS_2M BIT(7) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 56156cf18c41..b3489599e4de 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1167,6 +1167,11 @@ gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs) cs = gen12_emit_cmd_buf_wa(ce, cs); cs = gen12_emit_restore_scratch(ce, cs); + /* Wa_16013000631:dg2 */ + if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_B0, STEP_C0) || + IS_DG2_G11(ce->engine->i915)) + cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0); + return cs; } diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c index afb1cce9a352..19a01878fee3 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.c +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c @@ -13,7 +13,6 @@ struct insert_pte_data { u64 offset; - bool is_lmem; }; #define CHUNK_SZ SZ_8M /* ~1ms at 8GiB/s preemption delay */ @@ -40,7 +39,7 @@ static void insert_pte(struct i915_address_space *vm, struct insert_pte_data *d = data; vm->insert_page(vm, px_dma(pt), d->offset, I915_CACHE_NONE, - d->is_lmem ? PTE_LM : 0); + i915_gem_object_is_lmem(pt->base) ? PTE_LM : 0); d->offset += PAGE_SIZE; } @@ -134,8 +133,7 @@ static struct i915_address_space *migrate_vm(struct intel_gt *gt) goto err_vm; /* Now allow the GPU to rewrite the PTE via its own ppGTT */ - d.is_lmem = i915_gem_object_is_lmem(vm->vm.scratch[0]); - vm->vm.foreach(&vm->vm, base, base + sz, insert_pte, &d); + vm->vm.foreach(&vm->vm, base, d.offset - base, insert_pte, &d); } return &vm->vm; @@ -281,10 +279,10 @@ static int emit_pte(struct i915_request *rq, GEM_BUG_ON(GRAPHICS_VER(rq->engine->i915) < 8); /* Compute the page directory offset for the target address range */ - offset += (u64)rq->engine->instance << 32; offset >>= 12; offset *= sizeof(u64); offset += 2 * CHUNK_SZ; + offset += (u64)rq->engine->instance << 32; cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 15f9ada28a7a..9c253ba593c6 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -424,7 +424,7 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, table->unused_entries_index = I915_MOCS_PTE; if (IS_DG2(i915)) { - if (IS_DG2_GT_STEP(i915, G10, STEP_A0, STEP_B0)) { + if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) { table->size = ARRAY_SIZE(dg2_mocs_table_g10_ax); table->table = dg2_mocs_table_g10_ax; } else { diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 43093dd2d0c9..c3155ee58689 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -117,10 +117,17 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6) GEN6_RC_CTL_RC6_ENABLE | GEN6_RC_CTL_EI_MODE(1); - pg_enable = - GEN9_RENDER_PG_ENABLE | - GEN9_MEDIA_PG_ENABLE | - GEN11_MEDIA_SAMPLER_PG_ENABLE; + /* Wa_16011777198 - Render powergating must remain disabled */ + if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) || + IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)) + pg_enable = + GEN9_MEDIA_PG_ENABLE | + GEN11_MEDIA_SAMPLER_PG_ENABLE; + else + pg_enable = + GEN9_RENDER_PG_ENABLE | + GEN9_MEDIA_PG_ENABLE | + GEN11_MEDIA_SAMPLER_PG_ENABLE; if (GRAPHICS_VER(gt->i915) >= 12) { for (i = 0; i < I915_MAX_VCS; i++) diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c index afb35d2e5c73..9ea49e0a27c0 100644 --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c @@ -66,12 +66,16 @@ static void release_fake_lmem_bar(struct intel_memory_region *mem) DMA_ATTR_FORCE_CONTIGUOUS); } -static void +static int region_lmem_release(struct intel_memory_region *mem) { - intel_region_ttm_fini(mem); + int ret; + + ret = intel_region_ttm_fini(mem); io_mapping_fini(&mem->iomap); release_fake_lmem_bar(mem); + + return ret; } static int @@ -158,7 +162,7 @@ intel_gt_setup_fake_lmem(struct intel_gt *gt) static bool get_legacy_lowmem_region(struct intel_uncore *uncore, u64 *start, u32 *size) { - if (!IS_DG1_GT_STEP(uncore->i915, STEP_A0, STEP_C0)) + if (!IS_DG1_GRAPHICS_STEP(uncore->i915, STEP_A0, STEP_C0)) return false; *start = 0; @@ -231,7 +235,7 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt) return mem; err_region_put: - intel_memory_region_put(mem); + intel_memory_region_destroy(mem); return ERR_PTR(err); } diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 91200c43951f..63199f0550e6 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -297,13 +297,6 @@ static int gen6_reset_engines(struct intel_gt *gt, intel_engine_mask_t engine_mask, unsigned int retry) { - static const u32 hw_engine_mask[] = { - [RCS0] = GEN6_GRDOM_RENDER, - [BCS0] = GEN6_GRDOM_BLT, - [VCS0] = GEN6_GRDOM_MEDIA, - [VCS1] = GEN8_GRDOM_MEDIA2, - [VECS0] = GEN6_GRDOM_VECS, - }; struct intel_engine_cs *engine; u32 hw_mask; @@ -314,8 +307,7 @@ static int gen6_reset_engines(struct intel_gt *gt, hw_mask = 0; for_each_engine_masked(engine, gt, engine_mask, tmp) { - GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask)); - hw_mask |= hw_engine_mask[engine->id]; + hw_mask |= engine->reset_domain; } } @@ -492,22 +484,6 @@ static int gen11_reset_engines(struct intel_gt *gt, intel_engine_mask_t engine_mask, unsigned int retry) { - static const u32 hw_engine_mask[] = { - [RCS0] = GEN11_GRDOM_RENDER, - [BCS0] = GEN11_GRDOM_BLT, - [VCS0] = GEN11_GRDOM_MEDIA, - [VCS1] = GEN11_GRDOM_MEDIA2, - [VCS2] = GEN11_GRDOM_MEDIA3, - [VCS3] = GEN11_GRDOM_MEDIA4, - [VCS4] = GEN11_GRDOM_MEDIA5, - [VCS5] = GEN11_GRDOM_MEDIA6, - [VCS6] = GEN11_GRDOM_MEDIA7, - [VCS7] = GEN11_GRDOM_MEDIA8, - [VECS0] = GEN11_GRDOM_VECS, - [VECS1] = GEN11_GRDOM_VECS2, - [VECS2] = GEN11_GRDOM_VECS3, - [VECS3] = GEN11_GRDOM_VECS4, - }; struct intel_engine_cs *engine; intel_engine_mask_t tmp; u32 reset_mask, unlock_mask = 0; @@ -518,8 +494,7 @@ static int gen11_reset_engines(struct intel_gt *gt, } else { reset_mask = 0; for_each_engine_masked(engine, gt, engine_mask, tmp) { - GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask)); - reset_mask |= hw_engine_mask[engine->id]; + reset_mask |= engine->reset_domain; ret = gen11_lock_sfc(engine, &reset_mask, &unlock_mask); if (ret) goto sfc_unlock; @@ -1367,20 +1342,27 @@ void intel_gt_handle_error(struct intel_gt *gt, /* Make sure i915_reset_trylock() sees the I915_RESET_BACKOFF */ synchronize_rcu_expedited(); - /* Prevent any other reset-engine attempt. */ - for_each_engine(engine, gt, tmp) { - while (test_and_set_bit(I915_RESET_ENGINE + engine->id, - >->reset.flags)) - wait_on_bit(>->reset.flags, - I915_RESET_ENGINE + engine->id, - TASK_UNINTERRUPTIBLE); + /* + * Prevent any other reset-engine attempt. We don't do this for GuC + * submission the GuC owns the per-engine reset, not the i915. + */ + if (!intel_uc_uses_guc_submission(>->uc)) { + for_each_engine(engine, gt, tmp) { + while (test_and_set_bit(I915_RESET_ENGINE + engine->id, + >->reset.flags)) + wait_on_bit(>->reset.flags, + I915_RESET_ENGINE + engine->id, + TASK_UNINTERRUPTIBLE); + } } intel_gt_reset_global(gt, engine_mask, msg); - for_each_engine(engine, gt, tmp) - clear_bit_unlock(I915_RESET_ENGINE + engine->id, - >->reset.flags); + if (!intel_uc_uses_guc_submission(>->uc)) { + for_each_engine(engine, gt, tmp) + clear_bit_unlock(I915_RESET_ENGINE + engine->id, + >->reset.flags); + } clear_bit_unlock(I915_RESET_BACKOFF, >->reset.flags); smp_mb__after_atomic(); wake_up_all(>->reset.queue); @@ -1441,6 +1423,7 @@ void intel_gt_set_wedged_on_init(struct intel_gt *gt) BUILD_BUG_ON(I915_RESET_ENGINE + I915_NUM_ENGINES > I915_WEDGED_ON_INIT); intel_gt_set_wedged(gt); + i915_disable_error_state(gt->i915, -ENODEV); set_bit(I915_WEDGED_ON_INIT, >->reset.flags); /* Wedged on init is non-recoverable */ @@ -1450,6 +1433,7 @@ void intel_gt_set_wedged_on_init(struct intel_gt *gt) void intel_gt_set_wedged_on_fini(struct intel_gt *gt) { intel_gt_set_wedged(gt); + i915_disable_error_state(gt->i915, -ENODEV); set_bit(I915_WEDGED_ON_FINI, >->reset.flags); intel_gt_retire_requests(gt); /* cleanup any wedged requests */ } diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 586dca1731ce..3e6fac0340ef 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -1357,7 +1357,7 @@ retry: err = i915_gem_object_lock(timeline->hwsp_ggtt->obj, &ww); if (!err && gen7_wa_vma) err = i915_gem_object_lock(gen7_wa_vma->obj, &ww); - if (!err && engine->legacy.ring->vma->obj) + if (!err) err = i915_gem_object_lock(engine->legacy.ring->vma->obj, &ww); if (!err) err = intel_timeline_pin(timeline, &ww); diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 5e275f8dda8c..07ff7ba7b2b7 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -936,8 +936,70 @@ void intel_rps_park(struct intel_rps *rps) GT_TRACE(rps_to_gt(rps), "park:%x\n", rps->cur_freq); } +u32 intel_rps_get_boost_frequency(struct intel_rps *rps) +{ + struct intel_guc_slpc *slpc; + + if (rps_uses_slpc(rps)) { + slpc = rps_to_slpc(rps); + + return slpc->boost_freq; + } else { + return intel_gpu_freq(rps, rps->boost_freq); + } +} + +static int rps_set_boost_freq(struct intel_rps *rps, u32 val) +{ + bool boost = false; + + /* Validate against (static) hardware limits */ + val = intel_freq_opcode(rps, val); + if (val < rps->min_freq || val > rps->max_freq) + return -EINVAL; + + mutex_lock(&rps->lock); + if (val != rps->boost_freq) { + rps->boost_freq = val; + boost = atomic_read(&rps->num_waiters); + } + mutex_unlock(&rps->lock); + if (boost) + schedule_work(&rps->work); + + return 0; +} + +int intel_rps_set_boost_frequency(struct intel_rps *rps, u32 freq) +{ + struct intel_guc_slpc *slpc; + + if (rps_uses_slpc(rps)) { + slpc = rps_to_slpc(rps); + + return intel_guc_slpc_set_boost_freq(slpc, freq); + } else { + return rps_set_boost_freq(rps, freq); + } +} + +void intel_rps_dec_waiters(struct intel_rps *rps) +{ + struct intel_guc_slpc *slpc; + + if (rps_uses_slpc(rps)) { + slpc = rps_to_slpc(rps); + + intel_guc_slpc_dec_waiters(slpc); + } else { + atomic_dec(&rps->num_waiters); + } +} + void intel_rps_boost(struct i915_request *rq) { + struct intel_guc_slpc *slpc; + if (i915_request_signaled(rq) || i915_request_has_waitboost(rq)) return; @@ -945,6 +1007,16 @@ void intel_rps_boost(struct i915_request *rq) if (!test_and_set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags)) { struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps; + if (rps_uses_slpc(rps)) { + slpc = rps_to_slpc(rps); + + /* Return if old value is non zero */ + if (!atomic_fetch_inc(&slpc->num_waiters)) + schedule_work(&slpc->boost_work); + + return; + } + if (atomic_fetch_inc(&rps->num_waiters)) return; diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h index 11960d64ca82..aee12f37d38a 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.h +++ b/drivers/gpu/drm/i915/gt/intel_rps.h @@ -23,6 +23,9 @@ void intel_rps_disable(struct intel_rps *rps); void intel_rps_park(struct intel_rps *rps); void intel_rps_unpark(struct intel_rps *rps); void intel_rps_boost(struct i915_request *rq); +void intel_rps_dec_waiters(struct intel_rps *rps); +u32 intel_rps_get_boost_frequency(struct intel_rps *rps); +int intel_rps_set_boost_frequency(struct intel_rps *rps, u32 freq); int intel_rps_set(struct intel_rps *rps, u8 val); void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive); diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index e1f362530889..3113266c286e 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -482,7 +482,7 @@ static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine, gen9_ctx_workarounds_init(engine, wal); /* WaToEnableHwFixForPushConstHWBug:kbl */ - if (IS_KBL_GT_STEP(i915, STEP_C0, STEP_FOREVER)) + if (IS_KBL_GRAPHICS_STEP(i915, STEP_C0, STEP_FOREVER)) wa_masked_en(wal, COMMON_SLICE_CHICKEN2, GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); @@ -560,6 +560,22 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, /* * These settings aren't actually workarounds, but general tuning settings that + * need to be programmed on dg2 platform. + */ +static void dg2_ctx_gt_tuning_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) +{ + wa_write_clr_set(wal, GEN11_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, + REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)); + wa_add(wal, + FF_MODE2, + FF_MODE2_TDS_TIMER_MASK, + FF_MODE2_TDS_TIMER_128, + 0, false); +} + +/* + * These settings aren't actually workarounds, but general tuning settings that * need to be programmed on several platforms. */ static void gen12_ctx_gt_tuning_init(struct intel_engine_cs *engine, @@ -621,13 +637,6 @@ static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine, FF_MODE2_GS_TIMER_MASK, FF_MODE2_GS_TIMER_224, 0, false); - - /* - * Wa_14012131227:dg1 - * Wa_1508744258:tgl,rkl,dg1,adl-s,adl-p - */ - wa_masked_en(wal, GEN7_COMMON_SLICE_CHICKEN1, - GEN9_RHWO_OPTIMIZATION_DISABLE); } static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine, @@ -644,6 +653,42 @@ static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine, DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE); } +static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) +{ + dg2_ctx_gt_tuning_init(engine, wal); + + /* Wa_16011186671:dg2_g11 */ + if (IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) { + wa_masked_dis(wal, VFLSKPD, DIS_MULT_MISS_RD_SQUASH); + wa_masked_en(wal, VFLSKPD, DIS_OVER_FETCH_CACHE); + } + + if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) { + /* Wa_14010469329:dg2_g10 */ + wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3, + XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE); + + /* + * Wa_22010465075:dg2_g10 + * Wa_22010613112:dg2_g10 + * Wa_14010698770:dg2_g10 + */ + wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3, + GEN12_DISABLE_CPS_AWARE_COLOR_PIPE); + } + + /* Wa_16013271637:dg2 */ + wa_masked_en(wal, SLICE_COMMON_ECO_CHICKEN1, + MSC_MSAA_REODER_BUF_BYPASS_DISABLE); + + /* Wa_22012532006:dg2 */ + if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) || + IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) + wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7, + DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA); +} + static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine, struct i915_wa_list *wal) { @@ -730,7 +775,11 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, if (engine->class != RENDER_CLASS) goto done; - if (IS_DG1(i915)) + if (IS_DG2(i915)) + dg2_ctx_workarounds_init(engine, wal); + else if (IS_XEHPSDV(i915)) + ; /* noop; none at this time */ + else if (IS_DG1(i915)) dg1_ctx_workarounds_init(engine, wal); else if (GRAPHICS_VER(i915) == 12) gen12_ctx_workarounds_init(engine, wal); @@ -878,10 +927,51 @@ hsw_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) } static void +gen9_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + const struct sseu_dev_info *sseu = &i915->gt.info.sseu; + unsigned int slice, subslice; + u32 mcr, mcr_mask; + + GEM_BUG_ON(GRAPHICS_VER(i915) != 9); + + /* + * WaProgramMgsrForCorrectSliceSpecificMmioReads:gen9,glk,kbl,cml + * Before any MMIO read into slice/subslice specific registers, MCR + * packet control register needs to be programmed to point to any + * enabled s/ss pair. Otherwise, incorrect values will be returned. + * This means each subsequent MMIO read will be forwarded to an + * specific s/ss combination, but this is OK since these registers + * are consistent across s/ss in almost all cases. In the rare + * occasions, such as INSTDONE, where this value is dependent + * on s/ss combo, the read should be done with read_subslice_reg. + */ + slice = ffs(sseu->slice_mask) - 1; + GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask)); + subslice = ffs(intel_sseu_get_subslices(sseu, slice)); + GEM_BUG_ON(!subslice); + subslice--; + + /* + * We use GEN8_MCR..() macros to calculate the |mcr| value for + * Gen9 to address WaProgramMgsrForCorrectSliceSpecificMmioReads + */ + mcr = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice); + mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK; + + drm_dbg(&i915->drm, "MCR slice:%d/subslice:%d = %x\n", slice, subslice, mcr); + + wa_write_clr_set(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr); +} + +static void gen9_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { struct drm_i915_private *i915 = gt->i915; + /* WaProgramMgsrForCorrectSliceSpecificMmioReads:glk,kbl,cml,gen9 */ + gen9_wa_init_mcr(i915, wal); + /* WaDisableKillLogic:bxt,skl,kbl */ if (!IS_COFFEELAKE(i915) && !IS_COMETLAKE(i915)) wa_write_or(wal, @@ -916,7 +1006,7 @@ skl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); /* WaInPlaceDecompressionHang:skl */ - if (IS_SKL_GT_STEP(gt->i915, STEP_A0, STEP_H0)) + if (IS_SKL_GRAPHICS_STEP(gt->i915, STEP_A0, STEP_H0)) wa_write_or(wal, GEN9_GAMT_ECO_REG_RW_IA, GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); @@ -928,7 +1018,7 @@ kbl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) gen9_gt_workarounds_init(gt, wal); /* WaDisableDynamicCreditSharing:kbl */ - if (IS_KBL_GT_STEP(gt->i915, 0, STEP_C0)) + if (IS_KBL_GRAPHICS_STEP(gt->i915, 0, STEP_C0)) wa_write_or(wal, GAMT_CHKN_BIT_REG, GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING); @@ -1134,9 +1224,18 @@ icl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) GAMT_CHKN_BIT_REG, GAMT_CHKN_DISABLE_L3_COH_PIPE); + /* Wa_1407352427:icl,ehl */ + wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2, + PSDUNIT_CLKGATE_DIS); + + /* Wa_1406680159:icl,ehl */ + wa_write_or(wal, + SUBSLICE_UNIT_LEVEL_CLKGATE, + GWUNIT_CLKGATE_DIS); + /* Wa_1607087056:icl,ehl,jsl */ if (IS_ICELAKE(i915) || - IS_JSL_EHL_GT_STEP(i915, STEP_A0, STEP_B0)) + IS_JSL_EHL_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) wa_write_or(wal, SLICE_UNIT_LEVEL_CLKGATE, L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); @@ -1190,19 +1289,19 @@ tgl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) gen12_gt_workarounds_init(gt, wal); /* Wa_1409420604:tgl */ - if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0)) + if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) wa_write_or(wal, SUBSLICE_UNIT_LEVEL_CLKGATE2, CPSSUNIT_CLKGATE_DIS); /* Wa_1607087056:tgl also know as BUG:1409180338 */ - if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0)) + if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) wa_write_or(wal, SLICE_UNIT_LEVEL_CLKGATE, L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); /* Wa_1408615072:tgl[a0] */ - if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0)) + if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2, VSUNIT_CLKGATE_DIS_TGL); } @@ -1215,7 +1314,7 @@ dg1_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) gen12_gt_workarounds_init(gt, wal); /* Wa_1607087056:dg1 */ - if (IS_DG1_GT_STEP(i915, STEP_A0, STEP_B0)) + if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) wa_write_or(wal, SLICE_UNIT_LEVEL_CLKGATE, L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); @@ -1236,7 +1335,179 @@ dg1_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) static void xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { + struct drm_i915_private *i915 = gt->i915; + + xehp_init_mcr(gt, wal); + + /* Wa_1409757795:xehpsdv */ + wa_write_or(wal, SCCGCTL94DC, CG3DDISURB); + + /* Wa_18011725039:xehpsdv */ + if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_B0)) { + wa_masked_dis(wal, MLTICTXCTL, TDONRENDER); + wa_write_or(wal, L3SQCREG1_CCS0, FLUSHALLNONCOH); + } + + /* Wa_16011155590:xehpsdv */ + if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) + wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, + TSGUNIT_CLKGATE_DIS); + + /* Wa_14011780169:xehpsdv */ + if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_B0, STEP_FOREVER)) { + wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS | + GAMTLBVDBOX7_CLKGATE_DIS | + GAMTLBVDBOX6_CLKGATE_DIS | + GAMTLBVDBOX5_CLKGATE_DIS | + GAMTLBVDBOX4_CLKGATE_DIS | + GAMTLBVDBOX3_CLKGATE_DIS | + GAMTLBVDBOX2_CLKGATE_DIS | + GAMTLBVDBOX1_CLKGATE_DIS | + GAMTLBVDBOX0_CLKGATE_DIS | + GAMTLBKCR_CLKGATE_DIS | + GAMTLBGUC_CLKGATE_DIS | + GAMTLBBLT_CLKGATE_DIS); + wa_write_or(wal, UNSLCGCTL9444, GAMTLBGFXA0_CLKGATE_DIS | + GAMTLBGFXA1_CLKGATE_DIS | + GAMTLBCOMPA0_CLKGATE_DIS | + GAMTLBCOMPA1_CLKGATE_DIS | + GAMTLBCOMPB0_CLKGATE_DIS | + GAMTLBCOMPB1_CLKGATE_DIS | + GAMTLBCOMPC0_CLKGATE_DIS | + GAMTLBCOMPC1_CLKGATE_DIS | + GAMTLBCOMPD0_CLKGATE_DIS | + GAMTLBCOMPD1_CLKGATE_DIS | + GAMTLBMERT_CLKGATE_DIS | + GAMTLBVEBOX3_CLKGATE_DIS | + GAMTLBVEBOX2_CLKGATE_DIS | + GAMTLBVEBOX1_CLKGATE_DIS | + GAMTLBVEBOX0_CLKGATE_DIS); + } + + /* Wa_14012362059:xehpsdv */ + wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB); + + /* Wa_16012725990:xehpsdv */ + if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_FOREVER)) + wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, VFUNIT_CLKGATE_DIS); + + /* Wa_14011060649:xehpsdv */ + wa_14011060649(gt, wal); + + /* Wa_14014368820:xehpsdv */ + wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS | + GLOBAL_INVALIDATION_MODE); +} + +static void +dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) +{ + struct intel_engine_cs *engine; + int id; + xehp_init_mcr(gt, wal); + + /* Wa_14011060649:dg2 */ + wa_14011060649(gt, wal); + + /* + * Although there are per-engine instances of these registers, + * they technically exist outside the engine itself and are not + * impacted by engine resets. Furthermore, they're part of the + * GuC blacklist so trying to treat them as engine workarounds + * will result in GuC initialization failure and a wedged GPU. + */ + for_each_engine(engine, gt, id) { + if (engine->class != VIDEO_DECODE_CLASS) + continue; + + /* Wa_16010515920:dg2_g10 */ + if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) + wa_write_or(wal, VDBOX_CGCTL3F18(engine->mmio_base), + ALNUNIT_CLKGATE_DIS); + } + + if (IS_DG2_G10(gt->i915)) { + /* Wa_22010523718:dg2 */ + wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, + CG3DDISCFEG_CLKGATE_DIS); + + /* Wa_14011006942:dg2 */ + wa_write_or(wal, SUBSLICE_UNIT_LEVEL_CLKGATE, + DSS_ROUTER_CLKGATE_DIS); + } + + if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) { + /* Wa_14010680813:dg2_g10 */ + wa_write_or(wal, GEN12_GAMSTLB_CTRL, CONTROL_BLOCK_CLKGATE_DIS | + EGRESS_BLOCK_CLKGATE_DIS | TAG_BLOCK_CLKGATE_DIS); + + /* Wa_14010948348:dg2_g10 */ + wa_write_or(wal, UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS); + + /* Wa_14011037102:dg2_g10 */ + wa_write_or(wal, UNSLCGCTL9444, LTCDD_CLKGATE_DIS); + + /* Wa_14011371254:dg2_g10 */ + wa_write_or(wal, SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS); + + /* Wa_14011431319:dg2_g10 */ + wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS | + GAMTLBVDBOX7_CLKGATE_DIS | + GAMTLBVDBOX6_CLKGATE_DIS | + GAMTLBVDBOX5_CLKGATE_DIS | + GAMTLBVDBOX4_CLKGATE_DIS | + GAMTLBVDBOX3_CLKGATE_DIS | + GAMTLBVDBOX2_CLKGATE_DIS | + GAMTLBVDBOX1_CLKGATE_DIS | + GAMTLBVDBOX0_CLKGATE_DIS | + GAMTLBKCR_CLKGATE_DIS | + GAMTLBGUC_CLKGATE_DIS | + GAMTLBBLT_CLKGATE_DIS); + wa_write_or(wal, UNSLCGCTL9444, GAMTLBGFXA0_CLKGATE_DIS | + GAMTLBGFXA1_CLKGATE_DIS | + GAMTLBCOMPA0_CLKGATE_DIS | + GAMTLBCOMPA1_CLKGATE_DIS | + GAMTLBCOMPB0_CLKGATE_DIS | + GAMTLBCOMPB1_CLKGATE_DIS | + GAMTLBCOMPC0_CLKGATE_DIS | + GAMTLBCOMPC1_CLKGATE_DIS | + GAMTLBCOMPD0_CLKGATE_DIS | + GAMTLBCOMPD1_CLKGATE_DIS | + GAMTLBMERT_CLKGATE_DIS | + GAMTLBVEBOX3_CLKGATE_DIS | + GAMTLBVEBOX2_CLKGATE_DIS | + GAMTLBVEBOX1_CLKGATE_DIS | + GAMTLBVEBOX0_CLKGATE_DIS); + + /* Wa_14010569222:dg2_g10 */ + wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, + GAMEDIA_CLKGATE_DIS); + + /* Wa_14011028019:dg2_g10 */ + wa_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS); + } + + if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0) || + IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)) { + /* Wa_14012362059:dg2 */ + wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB); + } + + /* Wa_1509235366:dg2 */ + wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS | + GLOBAL_INVALIDATION_MODE); + + /* Wa_14014830051:dg2 */ + wa_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN); + + /* + * The following are not actually "workarounds" but rather + * recommended tuning settings documented in the bspec's + * performance guide section. + */ + wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); + wa_write_or(wal, GEN12_SQCM, EN_32B_ACCESS); } static void @@ -1244,7 +1515,9 @@ gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal) { struct drm_i915_private *i915 = gt->i915; - if (IS_XEHPSDV(i915)) + if (IS_DG2(i915)) + dg2_gt_workarounds_init(gt, wal); + else if (IS_XEHPSDV(i915)) xehpsdv_gt_workarounds_init(gt, wal); else if (IS_DG1(i915)) dg1_gt_workarounds_init(gt, wal); @@ -1518,7 +1791,7 @@ static void cfl_whitelist_build(struct intel_engine_cs *engine) RING_FORCE_TO_NONPRIV_RANGE_4); } -static void cml_whitelist_build(struct intel_engine_cs *engine) +static void allow_read_ctx_timestamp(struct intel_engine_cs *engine) { struct i915_wa_list *w = &engine->whitelist; @@ -1526,6 +1799,11 @@ static void cml_whitelist_build(struct intel_engine_cs *engine) whitelist_reg_ext(w, RING_CTX_TIMESTAMP(engine->mmio_base), RING_FORCE_TO_NONPRIV_ACCESS_RD); +} + +static void cml_whitelist_build(struct intel_engine_cs *engine) +{ + allow_read_ctx_timestamp(engine); cfl_whitelist_build(engine); } @@ -1534,6 +1812,8 @@ static void icl_whitelist_build(struct intel_engine_cs *engine) { struct i915_wa_list *w = &engine->whitelist; + allow_read_ctx_timestamp(engine); + switch (engine->class) { case RENDER_CLASS: /* WaAllowUMDToModifyHalfSliceChicken7:icl */ @@ -1569,15 +1849,9 @@ static void icl_whitelist_build(struct intel_engine_cs *engine) /* hucStatus2RegOffset */ whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base), RING_FORCE_TO_NONPRIV_ACCESS_RD); - whitelist_reg_ext(w, - RING_CTX_TIMESTAMP(engine->mmio_base), - RING_FORCE_TO_NONPRIV_ACCESS_RD); break; default: - whitelist_reg_ext(w, - RING_CTX_TIMESTAMP(engine->mmio_base), - RING_FORCE_TO_NONPRIV_ACCESS_RD); break; } } @@ -1586,6 +1860,8 @@ static void tgl_whitelist_build(struct intel_engine_cs *engine) { struct i915_wa_list *w = &engine->whitelist; + allow_read_ctx_timestamp(engine); + switch (engine->class) { case RENDER_CLASS: /* @@ -1602,16 +1878,17 @@ static void tgl_whitelist_build(struct intel_engine_cs *engine) RING_FORCE_TO_NONPRIV_ACCESS_RD | RING_FORCE_TO_NONPRIV_RANGE_4); - /* Wa_1808121037:tgl */ + /* + * Wa_1808121037:tgl + * Wa_14012131227:dg1 + * Wa_1508744258:tgl,rkl,dg1,adl-s,adl-p + */ whitelist_reg(w, GEN7_COMMON_SLICE_CHICKEN1); /* Wa_1806527549:tgl */ whitelist_reg(w, HIZ_CHICKEN); break; default: - whitelist_reg_ext(w, - RING_CTX_TIMESTAMP(engine->mmio_base), - RING_FORCE_TO_NONPRIV_ACCESS_RD); break; } } @@ -1623,13 +1900,46 @@ static void dg1_whitelist_build(struct intel_engine_cs *engine) tgl_whitelist_build(engine); /* GEN:BUG:1409280441:dg1 */ - if (IS_DG1_GT_STEP(engine->i915, STEP_A0, STEP_B0) && + if (IS_DG1_GRAPHICS_STEP(engine->i915, STEP_A0, STEP_B0) && (engine->class == RENDER_CLASS || engine->class == COPY_ENGINE_CLASS)) whitelist_reg_ext(w, RING_ID(engine->mmio_base), RING_FORCE_TO_NONPRIV_ACCESS_RD); } +static void xehpsdv_whitelist_build(struct intel_engine_cs *engine) +{ + allow_read_ctx_timestamp(engine); +} + +static void dg2_whitelist_build(struct intel_engine_cs *engine) +{ + struct i915_wa_list *w = &engine->whitelist; + + allow_read_ctx_timestamp(engine); + + switch (engine->class) { + case RENDER_CLASS: + /* + * Wa_1507100340:dg2_g10 + * + * This covers 4 registers which are next to one another : + * - PS_INVOCATION_COUNT + * - PS_INVOCATION_COUNT_UDW + * - PS_DEPTH_COUNT + * - PS_DEPTH_COUNT_UDW + */ + if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) + whitelist_reg_ext(w, PS_INVOCATION_COUNT, + RING_FORCE_TO_NONPRIV_ACCESS_RD | + RING_FORCE_TO_NONPRIV_RANGE_4); + + break; + default: + break; + } +} + void intel_engine_init_whitelist(struct intel_engine_cs *engine) { struct drm_i915_private *i915 = engine->i915; @@ -1637,7 +1947,11 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine) wa_init_start(w, "whitelist", engine->name); - if (IS_DG1(i915)) + if (IS_DG2(i915)) + dg2_whitelist_build(engine); + else if (IS_XEHPSDV(i915)) + xehpsdv_whitelist_build(engine); + else if (IS_DG1(i915)) dg1_whitelist_build(engine); else if (GRAPHICS_VER(i915) == 12) tgl_whitelist_build(engine); @@ -1711,13 +2025,119 @@ engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) CMD_CCTL_MOCS_OVERRIDE(mocs, mocs)); } } + +static bool needs_wa_1308578152(struct intel_engine_cs *engine) +{ + u64 dss_mask = intel_sseu_get_subslices(&engine->gt->info.sseu, 0); + + return (dss_mask & GENMASK(GEN_DSS_PER_GSLICE - 1, 0)) == 0; +} + static void rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { struct drm_i915_private *i915 = engine->i915; - if (IS_DG1_GT_STEP(i915, STEP_A0, STEP_B0) || - IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0)) { + if (IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) { + /* Wa_14013392000:dg2_g11 */ + wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE); + + /* Wa_16011620976:dg2_g11 */ + wa_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8); + } + + if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0) || + IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) { + /* Wa_14012419201:dg2 */ + wa_masked_en(wal, GEN9_ROW_CHICKEN4, + GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX); + } + + if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_C0) || + IS_DG2_G11(engine->i915)) { + /* + * Wa_22012826095:dg2 + * Wa_22013059131:dg2 + */ + wa_write_clr_set(wal, LSC_CHICKEN_BIT_0_UDW, + MAXREQS_PER_BANK, + REG_FIELD_PREP(MAXREQS_PER_BANK, 2)); + + /* Wa_22013059131:dg2 */ + wa_write_or(wal, LSC_CHICKEN_BIT_0, + FORCE_1_SUB_MESSAGE_PER_FRAGMENT); + } + + /* Wa_1308578152:dg2_g10 when first gslice is fused off */ + if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_C0) && + needs_wa_1308578152(engine)) { + wa_masked_dis(wal, GEN12_CS_DEBUG_MODE1_CCCSUNIT_BE_COMMON, + GEN12_REPLAY_MODE_GRANULARITY); + } + + if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_FOREVER) || + IS_DG2_G11(engine->i915)) { + /* Wa_22013037850:dg2 */ + wa_write_or(wal, LSC_CHICKEN_BIT_0_UDW, + DISABLE_128B_EVICTION_COMMAND_UDW); + + /* Wa_22012856258:dg2 */ + wa_masked_en(wal, GEN7_ROW_CHICKEN2, + GEN12_DISABLE_READ_SUPPRESSION); + + /* + * Wa_22010960976:dg2 + * Wa_14013347512:dg2 + */ + wa_masked_dis(wal, GEN12_HDC_CHICKEN0, + LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK); + } + + if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) { + /* + * Wa_1608949956:dg2_g10 + * Wa_14010198302:dg2_g10 + */ + wa_masked_en(wal, GEN8_ROW_CHICKEN, + MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE); + + /* + * Wa_14010918519:dg2_g10 + * + * LSC_CHICKEN_BIT_0 always reads back as 0 is this stepping, + * so ignoring verification. + */ + wa_add(wal, LSC_CHICKEN_BIT_0_UDW, 0, + FORCE_SLM_FENCE_SCOPE_TO_TILE | FORCE_UGM_FENCE_SCOPE_TO_TILE, + 0, false); + } + + if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) { + /* Wa_22010430635:dg2 */ + wa_masked_en(wal, + GEN9_ROW_CHICKEN4, + GEN12_DISABLE_GRF_CLEAR); + + /* Wa_14010648519:dg2 */ + wa_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE); + } + + if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) || + IS_DG2_G11(engine->i915)) { + /* Wa_22012654132:dg2 */ + wa_add(wal, GEN10_CACHE_MODE_SS, 0, + _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC), + 0 /* write-only, so skip validation */, + true); + } + + /* Wa_14013202645:dg2 */ + if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_C0) || + IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) + wa_write_or(wal, RT_CTRL, DIS_NULL_QUERY); + + if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) || + IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) { /* * Wa_1607138336:tgl[a0],dg1[a0] * Wa_1607063988:tgl[a0],dg1[a0] @@ -1727,7 +2147,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN12_DISABLE_POSH_BUSY_FF_DOP_CG); } - if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0)) { + if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) { /* * Wa_1606679103:tgl * (see also Wa_1606682166:icl) @@ -1762,7 +2182,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) } if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || - IS_DG1_GT_STEP(i915, STEP_A0, STEP_B0) || + IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { /* Wa_1409804808:tgl,rkl,dg1[a0],adl-s,adl-p */ wa_masked_en(wal, GEN7_ROW_CHICKEN2, @@ -1775,8 +2195,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH); } - - if (IS_DG1_GT_STEP(i915, STEP_A0, STEP_B0) || + if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { /* * Wa_1607030317:tgl @@ -1859,15 +2278,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS); - /* Wa_1407352427:icl,ehl */ - wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2, - PSDUNIT_CLKGATE_DIS); - - /* Wa_1406680159:icl,ehl */ - wa_write_or(wal, - SUBSLICE_UNIT_LEVEL_CLKGATE, - GWUNIT_CLKGATE_DIS); - /* * Wa_1408767742:icl[a2..forever],ehl[all] * Wa_1605460711:icl[a0..c0] @@ -2138,7 +2548,7 @@ xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) struct drm_i915_private *i915 = engine->i915; /* WaKBLVECSSemaphoreWaitPoll:kbl */ - if (IS_KBL_GT_STEP(i915, STEP_A0, STEP_F0)) { + if (IS_KBL_GRAPHICS_STEP(i915, STEP_A0, STEP_F0)) { wa_write(wal, RING_SEMA_WAIT_POLL(engine->mmio_base), 1); diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 8b89215afe46..bb99fc03f503 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -35,9 +35,31 @@ static void mock_timeline_unpin(struct intel_timeline *tl) atomic_dec(&tl->pin_count); } +static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size) +{ + struct i915_address_space *vm = &ggtt->vm; + struct drm_i915_private *i915 = vm->i915; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + + obj = i915_gem_object_create_internal(i915, size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + goto err; + + return vma; + +err: + i915_gem_object_put(obj); + return vma; +} + static struct intel_ring *mock_ring(struct intel_engine_cs *engine) { - const unsigned long sz = PAGE_SIZE / 2; + const unsigned long sz = PAGE_SIZE; struct intel_ring *ring; ring = kzalloc(sizeof(*ring) + sz, GFP_KERNEL); @@ -50,15 +72,11 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine) ring->vaddr = (void *)(ring + 1); atomic_set(&ring->pin_count, 1); - ring->vma = i915_vma_alloc(); - if (!ring->vma) { + ring->vma = create_ring_vma(engine->gt->ggtt, PAGE_SIZE); + if (IS_ERR(ring->vma)) { kfree(ring); return NULL; } - i915_active_init(&ring->vma->active, NULL, NULL, 0); - __set_bit(I915_VMA_GGTT_BIT, __i915_vma_flags(ring->vma)); - __set_bit(DRM_MM_NODE_ALLOCATED_BIT, &ring->vma->node.flags); - ring->vma->node.size = sz; intel_ring_update_space(ring); @@ -67,8 +85,7 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine) static void mock_ring_free(struct intel_ring *ring) { - i915_active_fini(&ring->vma->active); - i915_vma_free(ring->vma); + i915_vma_put(ring->vma); kfree(ring); } @@ -125,6 +142,7 @@ static void mock_context_unpin(struct intel_context *ce) static void mock_context_post_unpin(struct intel_context *ce) { + i915_vma_unpin(ce->ring->vma); } static void mock_context_destroy(struct kref *ref) @@ -169,7 +187,7 @@ static int mock_context_alloc(struct intel_context *ce) static int mock_context_pre_pin(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void **unused) { - return 0; + return i915_vma_pin_ww(ce->ring->vma, ww, 0, 0, PIN_GLOBAL | PIN_HIGH); } static int mock_context_pin(struct intel_context *ce, void *unused) diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c index 75569666105d..75f6efc9882f 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c @@ -214,6 +214,31 @@ static int live_engine_timestamps(void *arg) return 0; } +static int __spin_until_busier(struct intel_engine_cs *engine, ktime_t busyness) +{ + ktime_t start, unused, dt; + + if (!intel_engine_uses_guc(engine)) + return 0; + + /* + * In GuC mode of submission, the busyness stats may get updated after + * the batch starts running. Poll for a change in busyness and timeout + * after 500 us. + */ + start = ktime_get(); + while (intel_engine_get_busy_time(engine, &unused) == busyness) { + dt = ktime_get() - start; + if (dt > 500000) { + pr_err("active wait timed out %lld\n", dt); + ENGINE_TRACE(engine, "active wait time out %lld\n", dt); + return -ETIME; + } + } + + return 0; +} + static int live_engine_busy_stats(void *arg) { struct intel_gt *gt = arg; @@ -232,6 +257,7 @@ static int live_engine_busy_stats(void *arg) GEM_BUG_ON(intel_gt_pm_is_awake(gt)); for_each_engine(engine, gt, id) { struct i915_request *rq; + ktime_t busyness, dummy; ktime_t de, dt; ktime_t t[2]; @@ -274,16 +300,23 @@ static int live_engine_busy_stats(void *arg) } i915_request_add(rq); + busyness = intel_engine_get_busy_time(engine, &dummy); if (!igt_wait_for_spinner(&spin, rq)) { intel_gt_set_wedged(engine->gt); err = -ETIME; goto end; } + err = __spin_until_busier(engine, busyness); + if (err) { + GEM_TRACE_DUMP(); + goto end; + } + ENGINE_TRACE(engine, "measuring busy time\n"); preempt_disable(); de = intel_engine_get_busy_time(engine, &t[0]); - udelay(100); + mdelay(10); de = ktime_sub(intel_engine_get_busy_time(engine, &t[1]), de); preempt_enable(); dt = ktime_sub(t[1], t[0]); diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 7e2d99dd012d..e5ad4d5a91c0 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -471,7 +471,8 @@ static int igt_reset_nop_engine(void *arg) count = 0; st_engine_heartbeat_disable(engine); - set_bit(I915_RESET_ENGINE + id, >->reset.flags); + GEM_BUG_ON(test_and_set_bit(I915_RESET_ENGINE + id, + >->reset.flags)); do { int i; @@ -528,7 +529,7 @@ static int igt_reset_nop_engine(void *arg) break; } } while (time_before(jiffies, end_time)); - clear_bit(I915_RESET_ENGINE + id, >->reset.flags); + clear_and_wake_up_bit(I915_RESET_ENGINE + id, >->reset.flags); st_engine_heartbeat_enable(engine); pr_info("%s(%s): %d resets\n", __func__, engine->name, count); @@ -582,7 +583,8 @@ static int igt_reset_fail_engine(void *arg) } st_engine_heartbeat_disable(engine); - set_bit(I915_RESET_ENGINE + id, >->reset.flags); + GEM_BUG_ON(test_and_set_bit(I915_RESET_ENGINE + id, + >->reset.flags)); force_reset_timeout(engine); err = intel_engine_reset(engine, NULL); @@ -679,7 +681,7 @@ static int igt_reset_fail_engine(void *arg) out: pr_info("%s(%s): %d resets\n", __func__, engine->name, count); skip: - clear_bit(I915_RESET_ENGINE + id, >->reset.flags); + clear_and_wake_up_bit(I915_RESET_ENGINE + id, >->reset.flags); st_engine_heartbeat_enable(engine); intel_context_put(ce); @@ -734,7 +736,8 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) reset_engine_count = i915_reset_engine_count(global, engine); st_engine_heartbeat_disable(engine); - set_bit(I915_RESET_ENGINE + id, >->reset.flags); + GEM_BUG_ON(test_and_set_bit(I915_RESET_ENGINE + id, + >->reset.flags)); count = 0; do { struct i915_request *rq = NULL; @@ -824,7 +827,7 @@ restore: if (err) break; } while (time_before(jiffies, end_time)); - clear_bit(I915_RESET_ENGINE + id, >->reset.flags); + clear_and_wake_up_bit(I915_RESET_ENGINE + id, >->reset.flags); st_engine_heartbeat_enable(engine); pr_info("%s: Completed %lu %s resets\n", engine->name, count, active ? "active" : "idle"); @@ -1042,7 +1045,8 @@ static int __igt_reset_engines(struct intel_gt *gt, yield(); /* start all threads before we begin */ st_engine_heartbeat_disable_no_pm(engine); - set_bit(I915_RESET_ENGINE + id, >->reset.flags); + GEM_BUG_ON(test_and_set_bit(I915_RESET_ENGINE + id, + >->reset.flags)); do { struct i915_request *rq = NULL; struct intel_selftest_saved_policy saved; @@ -1165,7 +1169,7 @@ restore: if (err) break; } while (time_before(jiffies, end_time)); - clear_bit(I915_RESET_ENGINE + id, >->reset.flags); + clear_and_wake_up_bit(I915_RESET_ENGINE + id, >->reset.flags); st_engine_heartbeat_enable_no_pm(engine); pr_info("i915_reset_engine(%s:%s): %lu resets\n", diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c b/drivers/gpu/drm/i915/gt/selftest_migrate.c index 12ef2837c89b..e21787301bbd 100644 --- a/drivers/gpu/drm/i915/gt/selftest_migrate.c +++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c @@ -49,6 +49,7 @@ static int copy(struct intel_migrate *migrate, if (IS_ERR(src)) return 0; + sz = src->base.size; dst = i915_gem_object_create_internal(i915, sz); if (IS_ERR(dst)) goto err_free_src; diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h index ba10bd374cee..fe5d7d261797 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h @@ -144,6 +144,7 @@ enum intel_guc_action { INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600, INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601, INTEL_GUC_ACTION_RESET_CLIENT = 0x5507, + INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A, INTEL_GUC_ACTION_LIMIT }; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 31cf9fb48c7e..1cb46098030d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -138,6 +138,8 @@ struct intel_guc { u32 ads_regset_size; /** @ads_golden_ctxt_size: size of the golden contexts in the ADS */ u32 ads_golden_ctxt_size; + /** @ads_engine_usage_size: size of engine usage in the ADS */ + u32 ads_engine_usage_size; /** @lrc_desc_pool: object allocated to hold the GuC LRC descriptor pool */ struct i915_vma *lrc_desc_pool; @@ -172,6 +174,34 @@ struct intel_guc { /** @send_mutex: used to serialize the intel_guc_send actions */ struct mutex send_mutex; + + /** + * @timestamp: GT timestamp object that stores a copy of the timestamp + * and adjusts it for overflow using a worker. + */ + struct { + /** + * @lock: Lock protecting the below fields and the engine stats. + */ + spinlock_t lock; + + /** + * @gt_stamp: 64 bit extended value of the GT timestamp. + */ + u64 gt_stamp; + + /** + * @ping_delay: Period for polling the GT timestamp for + * overflow. + */ + unsigned long ping_delay; + + /** + * @work: Periodic work to adjust GT timestamp, engine and + * context usage for overflows. + */ + struct delayed_work work; + } timestamp; }; static inline struct intel_guc *log_to_guc(struct intel_guc_log *log) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 621c893a009f..1a1edae67e4e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -26,6 +26,8 @@ * | guc_policies | * +---------------------------------------+ * | guc_gt_system_info | + * +---------------------------------------+ + * | guc_engine_usage | * +---------------------------------------+ <== static * | guc_mmio_reg[countA] (engine 0.0) | * | guc_mmio_reg[countB] (engine 0.1) | @@ -47,6 +49,7 @@ struct __guc_ads_blob { struct guc_ads ads; struct guc_policies policies; struct guc_gt_system_info system_info; + struct guc_engine_usage engine_usage; /* From here on, location is dynamic! Refer to above diagram. */ struct guc_mmio_reg regset[0]; } __packed; @@ -628,3 +631,21 @@ void intel_guc_ads_reset(struct intel_guc *guc) guc_ads_private_data_reset(guc); } + +u32 intel_guc_engine_usage_offset(struct intel_guc *guc) +{ + struct __guc_ads_blob *blob = guc->ads_blob; + u32 base = intel_guc_ggtt_offset(guc, guc->ads_vma); + u32 offset = base + ptr_offset(blob, engine_usage); + + return offset; +} + +struct guc_engine_usage_record *intel_guc_engine_usage(struct intel_engine_cs *engine) +{ + struct intel_guc *guc = &engine->gt->uc.guc; + struct __guc_ads_blob *blob = guc->ads_blob; + u8 guc_class = engine_class_to_guc_class(engine->class); + + return &blob->engine_usage.engines[guc_class][ilog2(engine->logical_mask)]; +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h index 3d85051d57e4..e74c110facff 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h @@ -6,8 +6,11 @@ #ifndef _INTEL_GUC_ADS_H_ #define _INTEL_GUC_ADS_H_ +#include <linux/types.h> + struct intel_guc; struct drm_printer; +struct intel_engine_cs; int intel_guc_ads_create(struct intel_guc *guc); void intel_guc_ads_destroy(struct intel_guc *guc); @@ -15,5 +18,7 @@ void intel_guc_ads_init_late(struct intel_guc *guc); void intel_guc_ads_reset(struct intel_guc *guc); void intel_guc_ads_print_policy_info(struct intel_guc *guc, struct drm_printer *p); +struct guc_engine_usage_record *intel_guc_engine_usage(struct intel_engine_cs *engine); +u32 intel_guc_engine_usage_offset(struct intel_guc *guc); #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index 722933e26347..7072e30e99f4 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -294,6 +294,19 @@ struct guc_ads { u32 reserved[15]; } __packed; +/* Engine usage stats */ +struct guc_engine_usage_record { + u32 current_context_index; + u32 last_switch_in_stamp; + u32 reserved0; + u32 total_runtime; + u32 reserved1[4]; +} __packed; + +struct guc_engine_usage { + struct guc_engine_usage_record engines[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS]; +} __packed; + /* GuC logging structures */ enum guc_log_buffer_type { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index 65a3e7fdb2b2..22c1c12369f2 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -79,29 +79,6 @@ static void slpc_mem_set_disabled(struct slpc_shared_data *data, slpc_mem_set_param(data, enable_id, 0); } -int intel_guc_slpc_init(struct intel_guc_slpc *slpc) -{ - struct intel_guc *guc = slpc_to_guc(slpc); - struct drm_i915_private *i915 = slpc_to_i915(slpc); - u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data)); - int err; - - GEM_BUG_ON(slpc->vma); - - err = intel_guc_allocate_and_map_vma(guc, size, &slpc->vma, (void **)&slpc->vaddr); - if (unlikely(err)) { - drm_err(&i915->drm, - "Failed to allocate SLPC struct (err=%pe)\n", - ERR_PTR(err)); - return err; - } - - slpc->max_freq_softlimit = 0; - slpc->min_freq_softlimit = 0; - - return err; -} - static u32 slpc_get_state(struct intel_guc_slpc *slpc) { struct slpc_shared_data *data; @@ -203,6 +180,86 @@ static int slpc_unset_param(struct intel_guc_slpc *slpc, return guc_action_slpc_unset_param(guc, id); } +static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq) +{ + struct drm_i915_private *i915 = slpc_to_i915(slpc); + struct intel_guc *guc = slpc_to_guc(slpc); + intel_wakeref_t wakeref; + int ret = 0; + + lockdep_assert_held(&slpc->lock); + + if (!intel_guc_is_ready(guc)) + return -ENODEV; + + /* + * This function is a little different as compared to + * intel_guc_slpc_set_min_freq(). Softlimit will not be updated + * here since this is used to temporarily change min freq, + * for example, during a waitboost. Caller is responsible for + * checking bounds. + */ + + with_intel_runtime_pm(&i915->runtime_pm, wakeref) { + ret = slpc_set_param(slpc, + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, + freq); + if (ret) + drm_err(&i915->drm, "Unable to force min freq to %u: %d", + freq, ret); + } + + return ret; +} + +static void slpc_boost_work(struct work_struct *work) +{ + struct intel_guc_slpc *slpc = container_of(work, typeof(*slpc), boost_work); + + /* + * Raise min freq to boost. It's possible that + * this is greater than current max. But it will + * certainly be limited by RP0. An error setting + * the min param is not fatal. + */ + mutex_lock(&slpc->lock); + if (atomic_read(&slpc->num_waiters)) { + slpc_force_min_freq(slpc, slpc->boost_freq); + slpc->num_boosts++; + } + mutex_unlock(&slpc->lock); +} + +int intel_guc_slpc_init(struct intel_guc_slpc *slpc) +{ + struct intel_guc *guc = slpc_to_guc(slpc); + struct drm_i915_private *i915 = slpc_to_i915(slpc); + u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data)); + int err; + + GEM_BUG_ON(slpc->vma); + + err = intel_guc_allocate_and_map_vma(guc, size, &slpc->vma, (void **)&slpc->vaddr); + if (unlikely(err)) { + drm_err(&i915->drm, + "Failed to allocate SLPC struct (err=%pe)\n", + ERR_PTR(err)); + return err; + } + + slpc->max_freq_softlimit = 0; + slpc->min_freq_softlimit = 0; + + slpc->boost_freq = 0; + atomic_set(&slpc->num_waiters, 0); + slpc->num_boosts = 0; + + mutex_init(&slpc->lock); + INIT_WORK(&slpc->boost_work, slpc_boost_work); + + return err; +} + static const char *slpc_global_state_to_string(enum slpc_global_state state) { switch (state) { @@ -393,7 +450,11 @@ int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val) val > slpc->max_freq_softlimit) return -EINVAL; + /* Need a lock now since waitboost can be modifying min as well */ + mutex_lock(&slpc->lock); + with_intel_runtime_pm(&i915->runtime_pm, wakeref) { + ret = slpc_set_param(slpc, SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, val); @@ -406,6 +467,8 @@ int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val) if (!ret) slpc->min_freq_softlimit = val; + mutex_unlock(&slpc->lock); + return ret; } @@ -522,6 +585,9 @@ static void slpc_get_rp_values(struct intel_guc_slpc *slpc) GT_FREQUENCY_MULTIPLIER; slpc->min_freq = REG_FIELD_GET(RPN_CAP_MASK, rp_state_cap) * GT_FREQUENCY_MULTIPLIER; + + if (!slpc->boost_freq) + slpc->boost_freq = slpc->rp0_freq; } /* @@ -588,6 +654,47 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) return 0; } +int intel_guc_slpc_set_boost_freq(struct intel_guc_slpc *slpc, u32 val) +{ + int ret = 0; + + if (val < slpc->min_freq || val > slpc->rp0_freq) + return -EINVAL; + + mutex_lock(&slpc->lock); + + if (slpc->boost_freq != val) { + /* Apply only if there are active waiters */ + if (atomic_read(&slpc->num_waiters)) { + ret = slpc_force_min_freq(slpc, val); + if (ret) { + ret = -EIO; + goto done; + } + } + + slpc->boost_freq = val; + } + +done: + mutex_unlock(&slpc->lock); + return ret; +} + +void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc) +{ + /* + * Return min back to the softlimit. + * This is called during request retire, + * so we don't need to fail that if the + * set_param fails. + */ + mutex_lock(&slpc->lock); + if (atomic_dec_and_test(&slpc->num_waiters)) + slpc_force_min_freq(slpc, slpc->min_freq_softlimit); + mutex_unlock(&slpc->lock); +} + int intel_guc_slpc_print_info(struct intel_guc_slpc *slpc, struct drm_printer *p) { struct drm_i915_private *i915 = slpc_to_i915(slpc); @@ -611,6 +718,8 @@ int intel_guc_slpc_print_info(struct intel_guc_slpc *slpc, struct drm_printer *p slpc_decode_max_freq(slpc)); drm_printf(p, "\tMin freq: %u MHz\n", slpc_decode_min_freq(slpc)); + drm_printf(p, "\twaitboosts: %u\n", + slpc->num_boosts); } } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h index e45054d5b9b4..0caa8fee3c04 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h @@ -34,9 +34,12 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc); void intel_guc_slpc_fini(struct intel_guc_slpc *slpc); int intel_guc_slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 val); int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val); +int intel_guc_slpc_set_boost_freq(struct intel_guc_slpc *slpc, u32 val); int intel_guc_slpc_get_max_freq(struct intel_guc_slpc *slpc, u32 *val); int intel_guc_slpc_get_min_freq(struct intel_guc_slpc *slpc, u32 *val); int intel_guc_slpc_print_info(struct intel_guc_slpc *slpc, struct drm_printer *p); void intel_guc_pm_intrmsk_enable(struct intel_gt *gt); +void intel_guc_slpc_boost(struct intel_guc_slpc *slpc); +void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc); #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h index 41d13527666f..bf5b9a563c09 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h @@ -6,6 +6,9 @@ #ifndef _INTEL_GUC_SLPC_TYPES_H_ #define _INTEL_GUC_SLPC_TYPES_H_ +#include <linux/atomic.h> +#include <linux/workqueue.h> +#include <linux/mutex.h> #include <linux/types.h> #define SLPC_RESET_TIMEOUT_MS 5 @@ -20,10 +23,20 @@ struct intel_guc_slpc { u32 min_freq; u32 rp0_freq; u32 rp1_freq; + u32 boost_freq; /* frequency softlimits */ u32 min_freq_softlimit; u32 max_freq_softlimit; + + /* Protects set/reset of boost freq + * and value of num_waiters + */ + struct mutex lock; + + struct work_struct boost_work; + atomic_t num_waiters; + u32 num_boosts; }; #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index c48557dfa04c..1f9d4fde421f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -13,6 +13,7 @@ #include "gt/intel_engine_heartbeat.h" #include "gt/intel_gpu_commands.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_clock_utils.h" #include "gt/intel_gt_irq.h" #include "gt/intel_gt_pm.h" #include "gt/intel_gt_requests.h" @@ -21,6 +22,7 @@ #include "gt/intel_mocs.h" #include "gt/intel_ring.h" +#include "intel_guc_ads.h" #include "intel_guc_submission.h" #include "i915_drv.h" @@ -1077,6 +1079,271 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) xa_unlock_irqrestore(&guc->context_lookup, flags); } +/* + * GuC stores busyness stats for each engine at context in/out boundaries. A + * context 'in' logs execution start time, 'out' adds in -> out delta to total. + * i915/kmd accesses 'start', 'total' and 'context id' from memory shared with + * GuC. + * + * __i915_pmu_event_read samples engine busyness. When sampling, if context id + * is valid (!= ~0) and start is non-zero, the engine is considered to be + * active. For an active engine total busyness = total + (now - start), where + * 'now' is the time at which the busyness is sampled. For inactive engine, + * total busyness = total. + * + * All times are captured from GUCPMTIMESTAMP reg and are in gt clock domain. + * + * The start and total values provided by GuC are 32 bits and wrap around in a + * few minutes. Since perf pmu provides busyness as 64 bit monotonically + * increasing ns values, there is a need for this implementation to account for + * overflows and extend the GuC provided values to 64 bits before returning + * busyness to the user. In order to do that, a worker runs periodically at + * frequency = 1/8th the time it takes for the timestamp to wrap (i.e. once in + * 27 seconds for a gt clock frequency of 19.2 MHz). + */ + +#define WRAP_TIME_CLKS U32_MAX +#define POLL_TIME_CLKS (WRAP_TIME_CLKS >> 3) + +static void +__extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start) +{ + u32 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp); + u32 gt_stamp_last = lower_32_bits(guc->timestamp.gt_stamp); + + if (new_start == lower_32_bits(*prev_start)) + return; + + if (new_start < gt_stamp_last && + (new_start - gt_stamp_last) <= POLL_TIME_CLKS) + gt_stamp_hi++; + + if (new_start > gt_stamp_last && + (gt_stamp_last - new_start) <= POLL_TIME_CLKS && gt_stamp_hi) + gt_stamp_hi--; + + *prev_start = ((u64)gt_stamp_hi << 32) | new_start; +} + +static void guc_update_engine_gt_clks(struct intel_engine_cs *engine) +{ + struct guc_engine_usage_record *rec = intel_guc_engine_usage(engine); + struct intel_engine_guc_stats *stats = &engine->stats.guc; + struct intel_guc *guc = &engine->gt->uc.guc; + u32 last_switch = rec->last_switch_in_stamp; + u32 ctx_id = rec->current_context_index; + u32 total = rec->total_runtime; + + lockdep_assert_held(&guc->timestamp.lock); + + stats->running = ctx_id != ~0U && last_switch; + if (stats->running) + __extend_last_switch(guc, &stats->start_gt_clk, last_switch); + + /* + * Instead of adjusting the total for overflow, just add the + * difference from previous sample stats->total_gt_clks + */ + if (total && total != ~0U) { + stats->total_gt_clks += (u32)(total - stats->prev_total); + stats->prev_total = total; + } +} + +static void guc_update_pm_timestamp(struct intel_guc *guc, + struct intel_engine_cs *engine, + ktime_t *now) +{ + u32 gt_stamp_now, gt_stamp_hi; + + lockdep_assert_held(&guc->timestamp.lock); + + gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp); + gt_stamp_now = intel_uncore_read(engine->uncore, + RING_TIMESTAMP(engine->mmio_base)); + *now = ktime_get(); + + if (gt_stamp_now < lower_32_bits(guc->timestamp.gt_stamp)) + gt_stamp_hi++; + + guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_now; +} + +/* + * Unlike the execlist mode of submission total and active times are in terms of + * gt clocks. The *now parameter is retained to return the cpu time at which the + * busyness was sampled. + */ +static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) +{ + struct intel_engine_guc_stats stats_saved, *stats = &engine->stats.guc; + struct i915_gpu_error *gpu_error = &engine->i915->gpu_error; + struct intel_gt *gt = engine->gt; + struct intel_guc *guc = >->uc.guc; + u64 total, gt_stamp_saved; + unsigned long flags; + u32 reset_count; + bool in_reset; + + spin_lock_irqsave(&guc->timestamp.lock, flags); + + /* + * If a reset happened, we risk reading partially updated engine + * busyness from GuC, so we just use the driver stored copy of busyness. + * Synchronize with gt reset using reset_count and the + * I915_RESET_BACKOFF flag. Note that reset flow updates the reset_count + * after I915_RESET_BACKOFF flag, so ensure that the reset_count is + * usable by checking the flag afterwards. + */ + reset_count = i915_reset_count(gpu_error); + in_reset = test_bit(I915_RESET_BACKOFF, >->reset.flags); + + *now = ktime_get(); + + /* + * The active busyness depends on start_gt_clk and gt_stamp. + * gt_stamp is updated by i915 only when gt is awake and the + * start_gt_clk is derived from GuC state. To get a consistent + * view of activity, we query the GuC state only if gt is awake. + */ + if (intel_gt_pm_get_if_awake(gt) && !in_reset) { + stats_saved = *stats; + gt_stamp_saved = guc->timestamp.gt_stamp; + guc_update_engine_gt_clks(engine); + guc_update_pm_timestamp(guc, engine, now); + intel_gt_pm_put_async(gt); + if (i915_reset_count(gpu_error) != reset_count) { + *stats = stats_saved; + guc->timestamp.gt_stamp = gt_stamp_saved; + } + } + + total = intel_gt_clock_interval_to_ns(gt, stats->total_gt_clks); + if (stats->running) { + u64 clk = guc->timestamp.gt_stamp - stats->start_gt_clk; + + total += intel_gt_clock_interval_to_ns(gt, clk); + } + + spin_unlock_irqrestore(&guc->timestamp.lock, flags); + + return ns_to_ktime(total); +} + +static void __reset_guc_busyness_stats(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + struct intel_engine_cs *engine; + enum intel_engine_id id; + unsigned long flags; + ktime_t unused; + + cancel_delayed_work_sync(&guc->timestamp.work); + + spin_lock_irqsave(&guc->timestamp.lock, flags); + + for_each_engine(engine, gt, id) { + guc_update_pm_timestamp(guc, engine, &unused); + guc_update_engine_gt_clks(engine); + engine->stats.guc.prev_total = 0; + } + + spin_unlock_irqrestore(&guc->timestamp.lock, flags); +} + +static void __update_guc_busyness_stats(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + struct intel_engine_cs *engine; + enum intel_engine_id id; + unsigned long flags; + ktime_t unused; + + spin_lock_irqsave(&guc->timestamp.lock, flags); + for_each_engine(engine, gt, id) { + guc_update_pm_timestamp(guc, engine, &unused); + guc_update_engine_gt_clks(engine); + } + spin_unlock_irqrestore(&guc->timestamp.lock, flags); +} + +static void guc_timestamp_ping(struct work_struct *wrk) +{ + struct intel_guc *guc = container_of(wrk, typeof(*guc), + timestamp.work.work); + struct intel_uc *uc = container_of(guc, typeof(*uc), guc); + struct intel_gt *gt = guc_to_gt(guc); + intel_wakeref_t wakeref; + int srcu, ret; + + /* + * Synchronize with gt reset to make sure the worker does not + * corrupt the engine/guc stats. + */ + ret = intel_gt_reset_trylock(gt, &srcu); + if (ret) + return; + + with_intel_runtime_pm(>->i915->runtime_pm, wakeref) + __update_guc_busyness_stats(guc); + + intel_gt_reset_unlock(gt, srcu); + + mod_delayed_work(system_highpri_wq, &guc->timestamp.work, + guc->timestamp.ping_delay); +} + +static int guc_action_enable_usage_stats(struct intel_guc *guc) +{ + u32 offset = intel_guc_engine_usage_offset(guc); + u32 action[] = { + INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF, + offset, + 0, + }; + + return intel_guc_send(guc, action, ARRAY_SIZE(action)); +} + +static void guc_init_engine_stats(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + intel_wakeref_t wakeref; + + mod_delayed_work(system_highpri_wq, &guc->timestamp.work, + guc->timestamp.ping_delay); + + with_intel_runtime_pm(>->i915->runtime_pm, wakeref) { + int ret = guc_action_enable_usage_stats(guc); + + if (ret) + drm_err(>->i915->drm, + "Failed to enable usage stats: %d!\n", ret); + } +} + +void intel_guc_busyness_park(struct intel_gt *gt) +{ + struct intel_guc *guc = >->uc.guc; + + if (!guc_submission_initialized(guc)) + return; + + cancel_delayed_work(&guc->timestamp.work); + __update_guc_busyness_stats(guc); +} + +void intel_guc_busyness_unpark(struct intel_gt *gt) +{ + struct intel_guc *guc = >->uc.guc; + + if (!guc_submission_initialized(guc)) + return; + + mod_delayed_work(system_highpri_wq, &guc->timestamp.work, + guc->timestamp.ping_delay); +} + static inline bool submission_disabled(struct intel_guc *guc) { @@ -1138,6 +1405,7 @@ void intel_guc_submission_reset_prepare(struct intel_guc *guc) intel_gt_park_heartbeats(guc_to_gt(guc)); disable_submission(guc); guc->interrupts.disable(guc); + __reset_guc_busyness_stats(guc); /* Flush IRQ handler */ spin_lock_irq(&guc_to_gt(guc)->irq_lock); @@ -1484,6 +1752,7 @@ static void destroyed_worker_func(struct work_struct *w); */ int intel_guc_submission_init(struct intel_guc *guc) { + struct intel_gt *gt = guc_to_gt(guc); int ret; if (guc->lrc_desc_pool) @@ -1512,6 +1781,10 @@ int intel_guc_submission_init(struct intel_guc *guc) if (!guc->submission_state.guc_ids_bitmap) return -ENOMEM; + spin_lock_init(&guc->timestamp.lock); + INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping); + guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ; + return 0; } @@ -3369,7 +3642,9 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine) engine->emit_flush = gen12_emit_flush_xcs; } engine->set_default_submission = guc_set_default_submission; + engine->busyness = guc_engine_busyness; + engine->flags |= I915_ENGINE_SUPPORTS_STATS; engine->flags |= I915_ENGINE_HAS_PREEMPTION; engine->flags |= I915_ENGINE_HAS_TIMESLICES; @@ -3468,6 +3743,7 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine) void intel_guc_submission_enable(struct intel_guc *guc) { guc_init_lrc_mapping(guc); + guc_init_engine_stats(guc); } void intel_guc_submission_disable(struct intel_guc *guc) @@ -3695,6 +3971,7 @@ int intel_guc_context_reset_process_msg(struct intel_guc *guc, const u32 *msg, u32 len) { struct intel_context *ce; + unsigned long flags; int desc_idx; if (unlikely(len != 1)) { @@ -3703,11 +3980,24 @@ int intel_guc_context_reset_process_msg(struct intel_guc *guc, } desc_idx = msg[0]; + + /* + * The context lookup uses the xarray but lookups only require an RCU lock + * not the full spinlock. So take the lock explicitly and keep it until the + * context has been reference count locked to ensure it can't be destroyed + * asynchronously until the reset is done. + */ + xa_lock_irqsave(&guc->context_lookup, flags); ce = g2h_context_lookup(guc, desc_idx); + if (ce) + intel_context_get(ce); + xa_unlock_irqrestore(&guc->context_lookup, flags); + if (unlikely(!ce)) return -EPROTO; guc_handle_context_reset(guc, ce); + intel_context_put(ce); return 0; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h index c7ef44fa0c36..5a95a9f0a8e3 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h @@ -28,6 +28,8 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc, void intel_guc_dump_active_requests(struct intel_engine_cs *engine, struct i915_request *hung_rq, struct drm_printer *m); +void intel_guc_busyness_park(struct intel_gt *gt); +void intel_guc_busyness_unpark(struct intel_gt *gt); bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve); diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 53d0cb327539..99d1781fa5f0 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -446,17 +446,17 @@ static bool gen8_gtt_test_present(struct intel_gvt_gtt_entry *e) || e->type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) return (e->val64 != 0); else - return (e->val64 & _PAGE_PRESENT); + return (e->val64 & GEN8_PAGE_PRESENT); } static void gtt_entry_clear_present(struct intel_gvt_gtt_entry *e) { - e->val64 &= ~_PAGE_PRESENT; + e->val64 &= ~GEN8_PAGE_PRESENT; } static void gtt_entry_set_present(struct intel_gvt_gtt_entry *e) { - e->val64 |= _PAGE_PRESENT; + e->val64 |= GEN8_PAGE_PRESENT; } static bool gen8_gtt_test_64k_splited(struct intel_gvt_gtt_entry *e) @@ -2439,7 +2439,7 @@ static int alloc_scratch_pages(struct intel_vgpu *vgpu, /* The entry parameters like present/writeable/cache type * set to the same as i915's scratch page tree. */ - se.val64 |= _PAGE_PRESENT | _PAGE_RW; + se.val64 |= GEN8_PAGE_PRESENT | GEN8_PAGE_RW; if (type == GTT_TYPE_PPGTT_PDE_PT) se.val64 |= PPAT_CACHED; @@ -2896,7 +2896,7 @@ void intel_gvt_restore_ggtt(struct intel_gvt *gvt) offset = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT; for (idx = 0; idx < num_low; idx++) { pte = mm->ggtt_mm.host_ggtt_aperture[idx]; - if (pte & _PAGE_PRESENT) + if (pte & GEN8_PAGE_PRESENT) write_pte64(vgpu->gvt->gt->ggtt, offset + idx, pte); } @@ -2904,7 +2904,7 @@ void intel_gvt_restore_ggtt(struct intel_gvt *gvt) offset = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT; for (idx = 0; idx < num_hi; idx++) { pte = mm->ggtt_mm.host_ggtt_hidden[idx]; - if (pte & _PAGE_PRESENT) + if (pte & GEN8_PAGE_PRESENT) write_pte64(vgpu->gvt->gt->ggtt, offset + idx, pte); } } diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index fe638b5da7c0..390d541f64ea 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -65,6 +65,7 @@ static int i915_capabilities(struct seq_file *m, void *data) intel_device_info_print_static(INTEL_INFO(i915), &p); intel_device_info_print_runtime(RUNTIME_INFO(i915), &p); + i915_print_iommu_status(i915, &p); intel_gt_info_print(&i915->gt.info, &p); intel_driver_caps_print(&i915->caps, &p); diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index b394b8702f1c..bbc99fc5888f 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -418,10 +418,14 @@ static int i915_driver_mmio_probe(struct drm_i915_private *dev_priv) if (ret < 0) return ret; - ret = intel_uncore_init_mmio(&dev_priv->uncore); + ret = intel_uncore_setup_mmio(&dev_priv->uncore); if (ret < 0) goto err_bridge; + ret = intel_uncore_init_mmio(&dev_priv->uncore); + if (ret) + goto err_mmio; + /* Try to make sure MCHBAR is enabled before poking at it */ intel_setup_mchbar(dev_priv); intel_device_info_runtime_init(dev_priv); @@ -438,6 +442,8 @@ static int i915_driver_mmio_probe(struct drm_i915_private *dev_priv) err_uncore: intel_teardown_mchbar(dev_priv); intel_uncore_fini_mmio(&dev_priv->uncore); +err_mmio: + intel_uncore_cleanup_mmio(&dev_priv->uncore); err_bridge: pci_dev_put(dev_priv->bridge_dev); @@ -452,6 +458,7 @@ static void i915_driver_mmio_release(struct drm_i915_private *dev_priv) { intel_teardown_mchbar(dev_priv); intel_uncore_fini_mmio(&dev_priv->uncore); + intel_uncore_cleanup_mmio(&dev_priv->uncore); pci_dev_put(dev_priv->bridge_dev); } @@ -734,6 +741,12 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv) i915_gem_driver_unregister(dev_priv); } +void +i915_print_iommu_status(struct drm_i915_private *i915, struct drm_printer *p) +{ + drm_printf(p, "iommu: %s\n", enableddisabled(intel_vtd_active(i915))); +} + static void i915_welcome_messages(struct drm_i915_private *dev_priv) { if (drm_debug_enabled(DRM_UT_DRIVER)) { @@ -749,6 +762,7 @@ static void i915_welcome_messages(struct drm_i915_private *dev_priv) intel_device_info_print_static(INTEL_INFO(dev_priv), &p); intel_device_info_print_runtime(RUNTIME_INFO(dev_priv), &p); + i915_print_iommu_status(dev_priv, &p); intel_gt_info_print(&dev_priv->gt.info, &p); } @@ -810,7 +824,7 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return PTR_ERR(i915); /* Disable nuclear pageflip by default on pre-ILK */ - if (!i915->params.nuclear_pageflip && match_info->graphics_ver < 5) + if (!i915->params.nuclear_pageflip && match_info->graphics.ver < 5) i915->drm.driver_features &= ~DRIVER_ATOMIC; /* diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 4f58e1456505..d99e020773ac 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -190,8 +190,6 @@ struct i915_hotplug { I915_GEM_DOMAIN_VERTEX) struct drm_i915_private; -struct i915_mm_struct; -struct i915_mmu_object; struct drm_i915_file_private { struct drm_i915_private *dev_priv; @@ -1324,15 +1322,15 @@ static inline struct drm_i915_private *pdev_to_i915(struct pci_dev *pdev) #define IP_VER(ver, rel) ((ver) << 8 | (rel)) -#define GRAPHICS_VER(i915) (INTEL_INFO(i915)->graphics_ver) -#define GRAPHICS_VER_FULL(i915) IP_VER(INTEL_INFO(i915)->graphics_ver, \ - INTEL_INFO(i915)->graphics_rel) +#define GRAPHICS_VER(i915) (INTEL_INFO(i915)->graphics.ver) +#define GRAPHICS_VER_FULL(i915) IP_VER(INTEL_INFO(i915)->graphics.ver, \ + INTEL_INFO(i915)->graphics.rel) #define IS_GRAPHICS_VER(i915, from, until) \ (GRAPHICS_VER(i915) >= (from) && GRAPHICS_VER(i915) <= (until)) -#define MEDIA_VER(i915) (INTEL_INFO(i915)->media_ver) -#define MEDIA_VER_FULL(i915) IP_VER(INTEL_INFO(i915)->media_ver, \ - INTEL_INFO(i915)->media_rel) +#define MEDIA_VER(i915) (INTEL_INFO(i915)->media.ver) +#define MEDIA_VER_FULL(i915) IP_VER(INTEL_INFO(i915)->media.arch, \ + INTEL_INFO(i915)->media.rel) #define IS_MEDIA_VER(i915, from, until) \ (MEDIA_VER(i915) >= (from) && MEDIA_VER(i915) <= (until)) @@ -1345,15 +1343,20 @@ static inline struct drm_i915_private *pdev_to_i915(struct pci_dev *pdev) #define HAS_DSB(dev_priv) (INTEL_INFO(dev_priv)->display.has_dsb) #define INTEL_DISPLAY_STEP(__i915) (RUNTIME_INFO(__i915)->step.display_step) -#define INTEL_GT_STEP(__i915) (RUNTIME_INFO(__i915)->step.gt_step) +#define INTEL_GRAPHICS_STEP(__i915) (RUNTIME_INFO(__i915)->step.graphics_step) +#define INTEL_MEDIA_STEP(__i915) (RUNTIME_INFO(__i915)->step.media_step) #define IS_DISPLAY_STEP(__i915, since, until) \ (drm_WARN_ON(&(__i915)->drm, INTEL_DISPLAY_STEP(__i915) == STEP_NONE), \ INTEL_DISPLAY_STEP(__i915) >= (since) && INTEL_DISPLAY_STEP(__i915) < (until)) -#define IS_GT_STEP(__i915, since, until) \ - (drm_WARN_ON(&(__i915)->drm, INTEL_GT_STEP(__i915) == STEP_NONE), \ - INTEL_GT_STEP(__i915) >= (since) && INTEL_GT_STEP(__i915) < (until)) +#define IS_GRAPHICS_STEP(__i915, since, until) \ + (drm_WARN_ON(&(__i915)->drm, INTEL_GRAPHICS_STEP(__i915) == STEP_NONE), \ + INTEL_GRAPHICS_STEP(__i915) >= (since) && INTEL_GRAPHICS_STEP(__i915) < (until)) + +#define IS_MEDIA_STEP(__i915, since, until) \ + (drm_WARN_ON(&(__i915)->drm, INTEL_MEDIA_STEP(__i915) == STEP_NONE), \ + INTEL_MEDIA_STEP(__i915) >= (since) && INTEL_MEDIA_STEP(__i915) < (until)) static __always_inline unsigned int __platform_mask_index(const struct intel_runtime_info *info, @@ -1526,15 +1529,15 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_TGL_Y(dev_priv) \ IS_SUBPLATFORM(dev_priv, INTEL_TIGERLAKE, INTEL_SUBPLATFORM_ULX) -#define IS_SKL_GT_STEP(p, since, until) (IS_SKYLAKE(p) && IS_GT_STEP(p, since, until)) +#define IS_SKL_GRAPHICS_STEP(p, since, until) (IS_SKYLAKE(p) && IS_GRAPHICS_STEP(p, since, until)) -#define IS_KBL_GT_STEP(dev_priv, since, until) \ - (IS_KABYLAKE(dev_priv) && IS_GT_STEP(dev_priv, since, until)) +#define IS_KBL_GRAPHICS_STEP(dev_priv, since, until) \ + (IS_KABYLAKE(dev_priv) && IS_GRAPHICS_STEP(dev_priv, since, until)) #define IS_KBL_DISPLAY_STEP(dev_priv, since, until) \ (IS_KABYLAKE(dev_priv) && IS_DISPLAY_STEP(dev_priv, since, until)) -#define IS_JSL_EHL_GT_STEP(p, since, until) \ - (IS_JSL_EHL(p) && IS_GT_STEP(p, since, until)) +#define IS_JSL_EHL_GRAPHICS_STEP(p, since, until) \ + (IS_JSL_EHL(p) && IS_GRAPHICS_STEP(p, since, until)) #define IS_JSL_EHL_DISPLAY_STEP(p, since, until) \ (IS_JSL_EHL(p) && IS_DISPLAY_STEP(p, since, until)) @@ -1542,19 +1545,19 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, (IS_TIGERLAKE(__i915) && \ IS_DISPLAY_STEP(__i915, since, until)) -#define IS_TGL_UY_GT_STEP(__i915, since, until) \ +#define IS_TGL_UY_GRAPHICS_STEP(__i915, since, until) \ ((IS_TGL_U(__i915) || IS_TGL_Y(__i915)) && \ - IS_GT_STEP(__i915, since, until)) + IS_GRAPHICS_STEP(__i915, since, until)) -#define IS_TGL_GT_STEP(__i915, since, until) \ +#define IS_TGL_GRAPHICS_STEP(__i915, since, until) \ (IS_TIGERLAKE(__i915) && !(IS_TGL_U(__i915) || IS_TGL_Y(__i915)) && \ - IS_GT_STEP(__i915, since, until)) + IS_GRAPHICS_STEP(__i915, since, until)) #define IS_RKL_DISPLAY_STEP(p, since, until) \ (IS_ROCKETLAKE(p) && IS_DISPLAY_STEP(p, since, until)) -#define IS_DG1_GT_STEP(p, since, until) \ - (IS_DG1(p) && IS_GT_STEP(p, since, until)) +#define IS_DG1_GRAPHICS_STEP(p, since, until) \ + (IS_DG1(p) && IS_GRAPHICS_STEP(p, since, until)) #define IS_DG1_DISPLAY_STEP(p, since, until) \ (IS_DG1(p) && IS_DISPLAY_STEP(p, since, until)) @@ -1562,20 +1565,20 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, (IS_ALDERLAKE_S(__i915) && \ IS_DISPLAY_STEP(__i915, since, until)) -#define IS_ADLS_GT_STEP(__i915, since, until) \ +#define IS_ADLS_GRAPHICS_STEP(__i915, since, until) \ (IS_ALDERLAKE_S(__i915) && \ - IS_GT_STEP(__i915, since, until)) + IS_GRAPHICS_STEP(__i915, since, until)) #define IS_ADLP_DISPLAY_STEP(__i915, since, until) \ (IS_ALDERLAKE_P(__i915) && \ IS_DISPLAY_STEP(__i915, since, until)) -#define IS_ADLP_GT_STEP(__i915, since, until) \ +#define IS_ADLP_GRAPHICS_STEP(__i915, since, until) \ (IS_ALDERLAKE_P(__i915) && \ - IS_GT_STEP(__i915, since, until)) + IS_GRAPHICS_STEP(__i915, since, until)) -#define IS_XEHPSDV_GT_STEP(__i915, since, until) \ - (IS_XEHPSDV(__i915) && IS_GT_STEP(__i915, since, until)) +#define IS_XEHPSDV_GRAPHICS_STEP(__i915, since, until) \ + (IS_XEHPSDV(__i915) && IS_GRAPHICS_STEP(__i915, since, until)) /* * DG2 hardware steppings are a bit unusual. The hardware design was forked @@ -1591,9 +1594,9 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, * and stepping-specific logic will be applied with a general DG2-wide stepping * number. */ -#define IS_DG2_GT_STEP(__i915, variant, since, until) \ +#define IS_DG2_GRAPHICS_STEP(__i915, variant, since, until) \ (IS_SUBPLATFORM(__i915, INTEL_DG2, INTEL_SUBPLATFORM_##variant) && \ - IS_GT_STEP(__i915, since, until)) + IS_GRAPHICS_STEP(__i915, since, until)) #define IS_DG2_DISP_STEP(__i915, since, until) \ (IS_DG2(__i915) && \ @@ -1757,26 +1760,27 @@ static inline bool run_as_guest(void) #define HAS_D12_PLANE_MINIMIZATION(dev_priv) (IS_ROCKETLAKE(dev_priv) || \ IS_ALDERLAKE_S(dev_priv)) -static inline bool intel_vtd_active(void) +static inline bool intel_vtd_active(struct drm_i915_private *i915) { -#ifdef CONFIG_INTEL_IOMMU - if (intel_iommu_gfx_mapped) + if (device_iommu_mapped(i915->drm.dev)) return true; -#endif /* Running as a guest, we assume the host is enforcing VT'd */ return run_as_guest(); } +void +i915_print_iommu_status(struct drm_i915_private *i915, struct drm_printer *p); + static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv) { - return GRAPHICS_VER(dev_priv) >= 6 && intel_vtd_active(); + return GRAPHICS_VER(dev_priv) >= 6 && intel_vtd_active(dev_priv); } static inline bool intel_ggtt_update_needs_vtd_wa(struct drm_i915_private *i915) { - return IS_BROXTON(i915) && intel_vtd_active(); + return IS_BROXTON(i915) && intel_vtd_active(i915); } static inline bool @@ -1806,6 +1810,7 @@ static inline void i915_gem_drain_freed_objects(struct drm_i915_private *i915) */ while (atomic_read(&i915->mm.free_count)) { flush_work(&i915->mm.free_work); + flush_delayed_work(&i915->bdev.wq); rcu_barrier(); } } @@ -1920,6 +1925,10 @@ int i915_gem_evict_vm(struct i915_address_space *vm); struct drm_i915_gem_object * i915_gem_object_create_internal(struct drm_i915_private *dev_priv, phys_addr_t size); +struct drm_i915_gem_object * +__i915_gem_object_create_internal(struct drm_i915_private *dev_priv, + const struct drm_i915_gem_object_ops *ops, + phys_addr_t size); /* i915_gem_tiling.c */ static inline bool i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 981e383d1a5d..527228d4da7e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -764,7 +764,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, * perspective, requiring manual detiling by the client. */ if (!i915_gem_object_has_struct_page(obj) || - cpu_write_needs_clflush(obj)) + i915_gem_cpu_write_needs_clflush(obj)) /* Note that the gtt paths might fail with non-page-backed user * pointers (e.g. gtt mappings when moving data between * textures). Fallback to the shmem path in that case. @@ -1005,7 +1005,8 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, obj->ops->adjust_lru(obj); } - if (i915_gem_object_has_pages(obj)) { + if (i915_gem_object_has_pages(obj) || + i915_gem_object_has_self_managed_shrink_list(obj)) { unsigned long flags; spin_lock_irqsave(&i915->mm.obj_lock, flags); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 2a2d7643b551..96d2d99f5b98 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -48,8 +48,9 @@ #include "i915_gpu_error.h" #include "i915_memcpy.h" #include "i915_scatterlist.h" +#include "i915_vma_snapshot.h" -#define ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN) +#define ALLOW_FAIL (__GFP_KSWAPD_RECLAIM | __GFP_RETRY_MAYFAIL | __GFP_NOWARN) #define ATOMIC_MAYFAIL (GFP_ATOMIC | __GFP_NOWARN) static void __sg_set_buf(struct scatterlist *sg, @@ -275,16 +276,16 @@ static bool compress_start(struct i915_vma_compress *c) static void *compress_next_page(struct i915_vma_compress *c, struct i915_vma_coredump *dst) { - void *page; + void *page_addr; + struct page *page; - if (dst->page_count >= dst->num_pages) - return ERR_PTR(-ENOSPC); - - page = pool_alloc(&c->pool, ALLOW_FAIL); - if (!page) + page_addr = pool_alloc(&c->pool, ALLOW_FAIL); + if (!page_addr) return ERR_PTR(-ENOMEM); - return dst->pages[dst->page_count++] = page; + page = virt_to_page(page_addr); + list_add_tail(&page->lru, &dst->page_list); + return page_addr; } static int compress_page(struct i915_vma_compress *c, @@ -397,7 +398,7 @@ static int compress_page(struct i915_vma_compress *c, if (!(wc && i915_memcpy_from_wc(ptr, src, PAGE_SIZE))) memcpy(ptr, src, PAGE_SIZE); - dst->pages[dst->page_count++] = ptr; + list_add_tail(&virt_to_page(ptr)->lru, &dst->page_list); cond_resched(); return 0; @@ -614,7 +615,7 @@ static void print_error_vma(struct drm_i915_error_state_buf *m, const struct i915_vma_coredump *vma) { char out[ASCII85_BUFSZ]; - int page; + struct page *page; if (!vma) return; @@ -628,16 +629,17 @@ static void print_error_vma(struct drm_i915_error_state_buf *m, err_printf(m, "gtt_page_sizes = 0x%08x\n", vma->gtt_page_sizes); err_compression_marker(m); - for (page = 0; page < vma->page_count; page++) { + list_for_each_entry(page, &vma->page_list, lru) { int i, len; + const u32 *addr = page_address(page); len = PAGE_SIZE; - if (page == vma->page_count - 1) + if (page == list_last_entry(&vma->page_list, typeof(*page), lru)) len -= vma->unused; len = ascii85_encode_len(len); for (i = 0; i < len; i++) - err_puts(m, ascii85_encode(vma->pages[page][i], out)); + err_puts(m, ascii85_encode(addr[i], out)); } err_puts(m, "\n"); } @@ -946,10 +948,12 @@ static void i915_vma_coredump_free(struct i915_vma_coredump *vma) { while (vma) { struct i915_vma_coredump *next = vma->next; - int page; + struct page *page, *n; - for (page = 0; page < vma->page_count; page++) - free_page((unsigned long)vma->pages[page]); + list_for_each_entry_safe(page, n, &vma->page_list, lru) { + list_del_init(&page->lru); + __free_page(page); + } kfree(vma); vma = next; @@ -1009,25 +1013,21 @@ void __i915_gpu_coredump_free(struct kref *error_ref) static struct i915_vma_coredump * i915_vma_coredump_create(const struct intel_gt *gt, - const struct i915_vma *vma, - const char *name, + const struct i915_vma_snapshot *vsnap, struct i915_vma_compress *compress) { struct i915_ggtt *ggtt = gt->ggtt; const u64 slot = ggtt->error_capture.start; struct i915_vma_coredump *dst; - unsigned long num_pages; struct sgt_iter iter; int ret; might_sleep(); - if (!vma || !vma->pages || !compress) + if (!vsnap || !vsnap->pages || !compress) return NULL; - num_pages = min_t(u64, vma->size, vma->obj->base.size) >> PAGE_SHIFT; - num_pages = DIV_ROUND_UP(10 * num_pages, 8); /* worstcase zlib growth */ - dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *), ALLOW_FAIL); + dst = kmalloc(sizeof(*dst), ALLOW_FAIL); if (!dst) return NULL; @@ -1036,14 +1036,13 @@ i915_vma_coredump_create(const struct intel_gt *gt, return NULL; } - strcpy(dst->name, name); + INIT_LIST_HEAD(&dst->page_list); + strcpy(dst->name, vsnap->name); dst->next = NULL; - dst->gtt_offset = vma->node.start; - dst->gtt_size = vma->node.size; - dst->gtt_page_sizes = vma->page_sizes.gtt; - dst->num_pages = num_pages; - dst->page_count = 0; + dst->gtt_offset = vsnap->gtt_offset; + dst->gtt_size = vsnap->gtt_size; + dst->gtt_page_sizes = vsnap->page_sizes; dst->unused = 0; ret = -EINVAL; @@ -1051,7 +1050,7 @@ i915_vma_coredump_create(const struct intel_gt *gt, void __iomem *s; dma_addr_t dma; - for_each_sgt_daddr(dma, iter, vma->pages) { + for_each_sgt_daddr(dma, iter, vsnap->pages) { mutex_lock(&ggtt->error_mutex); ggtt->vm.insert_page(&ggtt->vm, dma, slot, I915_CACHE_NONE, 0); @@ -1069,11 +1068,11 @@ i915_vma_coredump_create(const struct intel_gt *gt, if (ret) break; } - } else if (__i915_gem_object_is_lmem(vma->obj)) { - struct intel_memory_region *mem = vma->obj->mm.region; + } else if (vsnap->mr && vsnap->mr->type != INTEL_MEMORY_SYSTEM) { + struct intel_memory_region *mem = vsnap->mr; dma_addr_t dma; - for_each_sgt_daddr(dma, iter, vma->pages) { + for_each_sgt_daddr(dma, iter, vsnap->pages) { void __iomem *s; s = io_mapping_map_wc(&mem->iomap, @@ -1089,7 +1088,7 @@ i915_vma_coredump_create(const struct intel_gt *gt, } else { struct page *page; - for_each_sgt_page(page, iter, vma->pages) { + for_each_sgt_page(page, iter, vsnap->pages) { void *s; drm_clflush_pages(&page, 1); @@ -1106,8 +1105,13 @@ i915_vma_coredump_create(const struct intel_gt *gt, } if (ret || compress_flush(compress, dst)) { - while (dst->page_count--) - pool_free(&compress->pool, dst->pages[dst->page_count]); + struct page *page, *n; + + list_for_each_entry_safe_reverse(page, n, &dst->page_list, lru) { + list_del_init(&page->lru); + pool_free(&compress->pool, page_address(page)); + } + kfree(dst); dst = NULL; } @@ -1320,38 +1324,72 @@ static bool record_context(struct i915_gem_context_coredump *e, struct intel_engine_capture_vma { struct intel_engine_capture_vma *next; - struct i915_vma *vma; + struct i915_vma_snapshot *vsnap; char name[16]; + bool lockdep_cookie; }; static struct intel_engine_capture_vma * -capture_vma(struct intel_engine_capture_vma *next, - struct i915_vma *vma, - const char *name, - gfp_t gfp) +capture_vma_snapshot(struct intel_engine_capture_vma *next, + struct i915_vma_snapshot *vsnap, + gfp_t gfp) { struct intel_engine_capture_vma *c; - if (!vma) + if (!i915_vma_snapshot_present(vsnap)) return next; c = kmalloc(sizeof(*c), gfp); if (!c) return next; - if (!i915_active_acquire_if_busy(&vma->active)) { + if (!i915_vma_snapshot_resource_pin(vsnap, &c->lockdep_cookie)) { kfree(c); return next; } - strcpy(c->name, name); - c->vma = vma; /* reference held while active */ + strcpy(c->name, vsnap->name); + c->vsnap = vsnap; + i915_vma_snapshot_get(vsnap); c->next = next; return c; } static struct intel_engine_capture_vma * +capture_vma(struct intel_engine_capture_vma *next, + struct i915_vma *vma, + const char *name, + gfp_t gfp) +{ + struct i915_vma_snapshot *vsnap; + + if (!vma) + return next; + + /* + * If the vma isn't pinned, then the vma should be snapshotted + * to a struct i915_vma_snapshot at command submission time. + * Not here. + */ + GEM_WARN_ON(!i915_vma_is_pinned(vma)); + if (!i915_vma_is_pinned(vma)) + return next; + + vsnap = i915_vma_snapshot_alloc(gfp); + if (!vsnap) + return next; + + i915_vma_snapshot_init(vsnap, vma, name); + next = capture_vma_snapshot(next, vsnap, gfp); + + /* FIXME: Replace on async unbind. */ + i915_vma_snapshot_put(vsnap); + + return next; +} + +static struct intel_engine_capture_vma * capture_user(struct intel_engine_capture_vma *capture, const struct i915_request *rq, gfp_t gfp) @@ -1359,7 +1397,7 @@ capture_user(struct intel_engine_capture_vma *capture, struct i915_capture_list *c; for (c = rq->capture_list; c; c = c->next) - capture = capture_vma(capture, c->vma, "user", gfp); + capture = capture_vma_snapshot(capture, c->vma_snapshot, gfp); return capture; } @@ -1373,6 +1411,36 @@ static void add_vma(struct intel_engine_coredump *ee, } } +static struct i915_vma_coredump * +create_vma_coredump(const struct intel_gt *gt, struct i915_vma *vma, + const char *name, struct i915_vma_compress *compress) +{ + struct i915_vma_coredump *ret = NULL; + struct i915_vma_snapshot tmp; + bool lockdep_cookie; + + if (!vma) + return NULL; + + i915_vma_snapshot_init_onstack(&tmp, vma, name); + if (i915_vma_snapshot_resource_pin(&tmp, &lockdep_cookie)) { + ret = i915_vma_coredump_create(gt, &tmp, compress); + i915_vma_snapshot_resource_unpin(&tmp, lockdep_cookie); + } + i915_vma_snapshot_put_onstack(&tmp); + + return ret; +} + +static void add_vma_coredump(struct intel_engine_coredump *ee, + const struct intel_gt *gt, + struct i915_vma *vma, + const char *name, + struct i915_vma_compress *compress) +{ + add_vma(ee, create_vma_coredump(gt, vma, name, compress)); +} + struct intel_engine_coredump * intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp) { @@ -1406,7 +1474,7 @@ intel_engine_coredump_add_request(struct intel_engine_coredump *ee, * as the simplest method to avoid being overwritten * by userspace. */ - vma = capture_vma(vma, rq->batch, "batch", gfp); + vma = capture_vma_snapshot(vma, &rq->batch_snapshot, gfp); vma = capture_user(vma, rq, gfp); vma = capture_vma(vma, rq->ring->vma, "ring", gfp); vma = capture_vma(vma, rq->context->state, "HW context", gfp); @@ -1427,30 +1495,24 @@ intel_engine_coredump_add_vma(struct intel_engine_coredump *ee, while (capture) { struct intel_engine_capture_vma *this = capture; - struct i915_vma *vma = this->vma; + struct i915_vma_snapshot *vsnap = this->vsnap; add_vma(ee, i915_vma_coredump_create(engine->gt, - vma, this->name, - compress)); + vsnap, compress)); - i915_active_release(&vma->active); + i915_vma_snapshot_resource_unpin(vsnap, this->lockdep_cookie); + i915_vma_snapshot_put(vsnap); capture = this->next; kfree(this); } - add_vma(ee, - i915_vma_coredump_create(engine->gt, - engine->status_page.vma, - "HW Status", - compress)); + add_vma_coredump(ee, engine->gt, engine->status_page.vma, + "HW Status", compress); - add_vma(ee, - i915_vma_coredump_create(engine->gt, - engine->wa_ctx.vma, - "WA context", - compress)); + add_vma_coredump(ee, engine->gt, engine->wa_ctx.vma, + "WA context", compress); } static struct intel_engine_coredump * @@ -1486,17 +1548,25 @@ capture_engine(struct intel_engine_cs *engine, } } if (rq) - capture = intel_engine_coredump_add_request(ee, rq, - ATOMIC_MAYFAIL); + rq = i915_request_get_rcu(rq); + + if (!rq) + goto no_request_capture; + + capture = intel_engine_coredump_add_request(ee, rq, ATOMIC_MAYFAIL); if (!capture) { -no_request_capture: - kfree(ee); - return NULL; + i915_request_put(rq); + goto no_request_capture; } intel_engine_coredump_add_vma(ee, capture, compress); + i915_request_put(rq); return ee; + +no_request_capture: + kfree(ee); + return NULL; } static void @@ -1550,10 +1620,8 @@ gt_record_uc(struct intel_gt_coredump *gt, */ error_uc->guc_fw.path = kstrdup(uc->guc.fw.path, ALLOW_FAIL); error_uc->huc_fw.path = kstrdup(uc->huc.fw.path, ALLOW_FAIL); - error_uc->guc_log = - i915_vma_coredump_create(gt->_gt, - uc->guc.log.vma, "GuC log buffer", - compress); + error_uc->guc_log = create_vma_coredump(gt->_gt, uc->guc.log.vma, + "GuC log buffer", compress); return error_uc; } @@ -1750,10 +1818,7 @@ static void capture_gen(struct i915_gpu_coredump *error) error->wakelock = atomic_read(&i915->runtime_pm.wakeref_count); error->suspended = i915->runtime_pm.suspended; - error->iommu = -1; -#ifdef CONFIG_INTEL_IOMMU - error->iommu = intel_iommu_gfx_mapped; -#endif + error->iommu = intel_vtd_active(i915); error->reset_count = i915_reset_count(&i915->gpu_error); error->suspend_count = i915->suspend_count; @@ -1839,8 +1904,8 @@ void i915_vma_capture_finish(struct intel_gt_coredump *gt, kfree(compress); } -struct i915_gpu_coredump * -i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask) +static struct i915_gpu_coredump * +__i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask) { struct drm_i915_private *i915 = gt->i915; struct i915_gpu_coredump *error; @@ -1881,6 +1946,22 @@ i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask) return error; } +struct i915_gpu_coredump * +i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask) +{ + static DEFINE_MUTEX(capture_mutex); + int ret = mutex_lock_interruptible(&capture_mutex); + struct i915_gpu_coredump *dump; + + if (ret) + return ERR_PTR(ret); + + dump = __i915_gpu_coredump(gt, engine_mask); + mutex_unlock(&capture_mutex); + + return dump; +} + void i915_error_state_store(struct i915_gpu_coredump *error) { struct drm_i915_private *i915; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index b98d8cdbe4f2..5aedf5129814 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -39,10 +39,8 @@ struct i915_vma_coredump { u64 gtt_size; u32 gtt_page_sizes; - int num_pages; - int page_count; int unused; - u32 *pages[]; + struct list_head page_list; }; struct i915_request_coredump { diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index a4b0a0c45b13..6aea159abc50 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2772,7 +2772,7 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg) { struct drm_i915_private * const i915 = arg; struct intel_gt *gt = &i915->gt; - void __iomem * const regs = i915->uncore.regs; + void __iomem * const regs = gt->uncore->regs; u32 master_tile_ctl, master_ctl; u32 gu_misc_iir; @@ -3173,11 +3173,12 @@ static void gen11_display_irq_reset(struct drm_i915_private *dev_priv) static void gen11_irq_reset(struct drm_i915_private *dev_priv) { - struct intel_uncore *uncore = &dev_priv->uncore; + struct intel_gt *gt = &dev_priv->gt; + struct intel_uncore *uncore = gt->uncore; gen11_master_intr_disable(dev_priv->uncore.regs); - gen11_gt_irq_reset(&dev_priv->gt); + gen11_gt_irq_reset(gt); gen11_display_irq_reset(dev_priv); GEN3_IRQ_RESET(uncore, GEN11_GU_MISC_); @@ -3186,11 +3187,12 @@ static void gen11_irq_reset(struct drm_i915_private *dev_priv) static void dg1_irq_reset(struct drm_i915_private *dev_priv) { - struct intel_uncore *uncore = &dev_priv->uncore; + struct intel_gt *gt = &dev_priv->gt; + struct intel_uncore *uncore = gt->uncore; dg1_master_intr_disable(dev_priv->uncore.regs); - gen11_gt_irq_reset(&dev_priv->gt); + gen11_gt_irq_reset(gt); gen11_display_irq_reset(dev_priv); GEN3_IRQ_RESET(uncore, GEN11_GU_MISC_); @@ -3869,13 +3871,14 @@ static void gen11_de_irq_postinstall(struct drm_i915_private *dev_priv) static void gen11_irq_postinstall(struct drm_i915_private *dev_priv) { - struct intel_uncore *uncore = &dev_priv->uncore; + struct intel_gt *gt = &dev_priv->gt; + struct intel_uncore *uncore = gt->uncore; u32 gu_misc_masked = GEN11_GU_MISC_GSE; if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) icp_irq_postinstall(dev_priv); - gen11_gt_irq_postinstall(&dev_priv->gt); + gen11_gt_irq_postinstall(gt); gen11_de_irq_postinstall(dev_priv); GEN3_IRQ_INIT(uncore, GEN11_GU_MISC_, ~gu_misc_masked, gu_misc_masked); @@ -3886,10 +3889,11 @@ static void gen11_irq_postinstall(struct drm_i915_private *dev_priv) static void dg1_irq_postinstall(struct drm_i915_private *dev_priv) { - struct intel_uncore *uncore = &dev_priv->uncore; + struct intel_gt *gt = &dev_priv->gt; + struct intel_uncore *uncore = gt->uncore; u32 gu_misc_masked = GEN11_GU_MISC_GSE; - gen11_gt_irq_postinstall(&dev_priv->gt); + gen11_gt_irq_postinstall(gt); GEN3_IRQ_INIT(uncore, GEN11_GU_MISC_, ~gu_misc_masked, gu_misc_masked); @@ -3900,8 +3904,8 @@ static void dg1_irq_postinstall(struct drm_i915_private *dev_priv) GEN11_DISPLAY_IRQ_ENABLE); } - dg1_master_intr_enable(dev_priv->uncore.regs); - intel_uncore_posting_read(&dev_priv->uncore, DG1_MSTR_TILE_INTR); + dg1_master_intr_enable(uncore->regs); + intel_uncore_posting_read(uncore, DG1_MSTR_TILE_INTR); } static void cherryview_irq_postinstall(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 52749b8f4f6b..f01cba4ec283 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -33,8 +33,8 @@ #define PLATFORM(x) .platform = (x) #define GEN(x) \ - .graphics_ver = (x), \ - .media_ver = (x), \ + .graphics.ver = (x), \ + .media.ver = (x), \ .display.ver = (x) #define I845_PIPE_OFFSETS \ @@ -906,7 +906,7 @@ static const struct intel_device_info rkl_info = { static const struct intel_device_info dg1_info = { GEN12_FEATURES, DGFX_FEATURES, - .graphics_rel = 10, + .graphics.rel = 10, PLATFORM(INTEL_DG1), .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D), .require_force_probe = 1, @@ -998,8 +998,8 @@ static const struct intel_device_info adl_p_info = { I915_GTT_PAGE_SIZE_2M #define XE_HP_FEATURES \ - .graphics_ver = 12, \ - .graphics_rel = 50, \ + .graphics.ver = 12, \ + .graphics.rel = 50, \ XE_HP_PAGE_SIZES, \ .dma_mask_size = 46, \ .has_64bit_reloc = 1, \ @@ -1017,8 +1017,8 @@ static const struct intel_device_info adl_p_info = { .ppgtt_type = INTEL_PPGTT_FULL #define XE_HPM_FEATURES \ - .media_ver = 12, \ - .media_rel = 50 + .media.ver = 12, \ + .media.rel = 50 __maybe_unused static const struct intel_device_info xehpsdv_info = { @@ -1042,8 +1042,8 @@ static const struct intel_device_info dg2_info = { XE_HPM_FEATURES, XE_LPD_FEATURES, DGFX_FEATURES, - .graphics_rel = 55, - .media_rel = 55, + .graphics.rel = 55, + .media.rel = 55, PLATFORM(INTEL_DG2), .platform_engine_mask = BIT(RCS0) | BIT(BCS0) | diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 47fcd104868f..3450818802c2 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -501,6 +501,18 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define ECOBITS_PPGTT_CACHE64B (3 << 8) #define ECOBITS_PPGTT_CACHE4B (0 << 8) +#define GEN12_GAMCNTRL_CTRL _MMIO(0xcf54) +#define INVALIDATION_BROADCAST_MODE_DIS REG_BIT(12) +#define GLOBAL_INVALIDATION_MODE REG_BIT(2) + +#define GEN12_GAMSTLB_CTRL _MMIO(0xcf4c) +#define CONTROL_BLOCK_CLKGATE_DIS REG_BIT(12) +#define EGRESS_BLOCK_CLKGATE_DIS REG_BIT(11) +#define TAG_BLOCK_CLKGATE_DIS REG_BIT(7) + +#define GEN12_MERT_MOD_CTRL _MMIO(0xcf28) +#define FORCE_MISS_FTLB REG_BIT(3) + #define GAB_CTL _MMIO(0x24000) #define GAB_CTL_CONT_AFTER_PAGEFAULT (1 << 8) @@ -722,6 +734,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN12_OA_TLB_INV_CR _MMIO(0xceec) +#define GEN12_SQCM _MMIO(0x8724) +#define EN_32B_ACCESS REG_BIT(30) + /* Gen12 OAR unit */ #define GEN12_OAR_OACONTROL _MMIO(0x2960) #define GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT 1 @@ -773,6 +788,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define EU_PERF_CNTL5 _MMIO(0xe55c) #define EU_PERF_CNTL6 _MMIO(0xe65c) +#define RT_CTRL _MMIO(0xe530) +#define DIS_NULL_QUERY REG_BIT(10) + /* * OA Boolean state */ @@ -2665,6 +2683,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define RING_WAIT (1 << 11) /* gen3+, PRBx_CTL */ #define RING_WAIT_SEMAPHORE (1 << 10) /* gen6+ */ +#define GUCPMTIMESTAMP _MMIO(0xC3E8) + /* There are 16 64-bit CS General Purpose Registers per-engine on Gen8+ */ #define GEN8_RING_CS_GPR(base, n) _MMIO((base) + 0x600 + (n) * 8) #define GEN8_RING_CS_GPR_UDW(base, n) _MMIO((base) + 0x600 + (n) * 8 + 4) @@ -2775,6 +2795,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define VDBOX_CGCTL3F10(base) _MMIO((base) + 0x3f10) #define IECPUNIT_CLKGATE_DIS REG_BIT(22) +#define VDBOX_CGCTL3F18(base) _MMIO((base) + 0x3f18) +#define ALNUNIT_CLKGATE_DIS REG_BIT(13) + #define ERROR_GEN6 _MMIO(0x40a0) #define GEN7_ERR_INT _MMIO(0x44040) #define ERR_INT_POISON (1 << 31) @@ -2873,6 +2896,15 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE (1 << 2) #define GEN11_ENABLE_32_PLANE_MODE (1 << 7) +#define SCCGCTL94DC _MMIO(0x94dc) +#define CG3DDISURB REG_BIT(14) + +#define MLTICTXCTL _MMIO(0xb170) +#define TDONRENDER REG_BIT(2) + +#define L3SQCREG1_CCS0 _MMIO(0xb200) +#define FLUSHALLNONCOH REG_BIT(5) + /* WaClearTdlStateAckDirtyBits */ #define GEN8_STATE_ACK _MMIO(0x20F0) #define GEN9_STATE_ACK_SLICE1 _MMIO(0x20F8) @@ -3109,7 +3141,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN9_RCS_FE_FSM2 _MMIO(0x22a4) #define GEN10_CACHE_MODE_SS _MMIO(0xe420) -#define FLOAT_BLEND_OPTIMIZATION_ENABLE (1 << 4) +#define ENABLE_PREFETCH_INTO_IC REG_BIT(3) +#define FLOAT_BLEND_OPTIMIZATION_ENABLE REG_BIT(4) /* Fuse readout registers for GT */ #define HSW_PAVP_FUSE1 _MMIO(0x911C) @@ -4288,21 +4321,62 @@ enum { /* * GEN10 clock gating regs */ + +#define UNSLCGCTL9440 _MMIO(0x9440) +#define GAMTLBOACS_CLKGATE_DIS REG_BIT(28) +#define GAMTLBVDBOX5_CLKGATE_DIS REG_BIT(27) +#define GAMTLBVDBOX6_CLKGATE_DIS REG_BIT(26) +#define GAMTLBVDBOX3_CLKGATE_DIS REG_BIT(24) +#define GAMTLBVDBOX4_CLKGATE_DIS REG_BIT(23) +#define GAMTLBVDBOX7_CLKGATE_DIS REG_BIT(22) +#define GAMTLBVDBOX2_CLKGATE_DIS REG_BIT(21) +#define GAMTLBVDBOX0_CLKGATE_DIS REG_BIT(17) +#define GAMTLBKCR_CLKGATE_DIS REG_BIT(16) +#define GAMTLBGUC_CLKGATE_DIS REG_BIT(15) +#define GAMTLBBLT_CLKGATE_DIS REG_BIT(14) +#define GAMTLBVDBOX1_CLKGATE_DIS REG_BIT(6) + +#define UNSLCGCTL9444 _MMIO(0x9444) +#define GAMTLBGFXA0_CLKGATE_DIS REG_BIT(30) +#define GAMTLBGFXA1_CLKGATE_DIS REG_BIT(29) +#define GAMTLBCOMPA0_CLKGATE_DIS REG_BIT(28) +#define GAMTLBCOMPA1_CLKGATE_DIS REG_BIT(27) +#define GAMTLBCOMPB0_CLKGATE_DIS REG_BIT(26) +#define GAMTLBCOMPB1_CLKGATE_DIS REG_BIT(25) +#define GAMTLBCOMPC0_CLKGATE_DIS REG_BIT(24) +#define GAMTLBCOMPC1_CLKGATE_DIS REG_BIT(23) +#define GAMTLBCOMPD0_CLKGATE_DIS REG_BIT(22) +#define GAMTLBCOMPD1_CLKGATE_DIS REG_BIT(21) +#define GAMTLBMERT_CLKGATE_DIS REG_BIT(20) +#define GAMTLBVEBOX3_CLKGATE_DIS REG_BIT(19) +#define GAMTLBVEBOX2_CLKGATE_DIS REG_BIT(18) +#define GAMTLBVEBOX1_CLKGATE_DIS REG_BIT(17) +#define GAMTLBVEBOX0_CLKGATE_DIS REG_BIT(16) +#define LTCDD_CLKGATE_DIS REG_BIT(10) + #define SLICE_UNIT_LEVEL_CLKGATE _MMIO(0x94d4) #define SARBUNIT_CLKGATE_DIS (1 << 5) #define RCCUNIT_CLKGATE_DIS (1 << 7) #define MSCUNIT_CLKGATE_DIS (1 << 10) +#define NODEDSS_CLKGATE_DIS REG_BIT(12) #define L3_CLKGATE_DIS REG_BIT(16) #define L3_CR2X_CLKGATE_DIS REG_BIT(17) #define SUBSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9524) -#define GWUNIT_CLKGATE_DIS (1 << 16) +#define DSS_ROUTER_CLKGATE_DIS REG_BIT(28) +#define GWUNIT_CLKGATE_DIS REG_BIT(16) #define SUBSLICE_UNIT_LEVEL_CLKGATE2 _MMIO(0x9528) #define CPSSUNIT_CLKGATE_DIS REG_BIT(9) +#define SSMCGCTL9530 _MMIO(0x9530) +#define RTFUNIT_CLKGATE_DIS REG_BIT(18) + #define UNSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9434) #define VFUNIT_CLKGATE_DIS REG_BIT(20) +#define TSGUNIT_CLKGATE_DIS REG_BIT(17) /* XEHPSDV */ +#define CG3DDISCFEG_CLKGATE_DIS REG_BIT(17) /* DG2 */ +#define GAMEDIA_CLKGATE_DIS REG_BIT(11) #define HSUNIT_CLKGATE_DIS REG_BIT(8) #define VSUNIT_CLKGATE_DIS REG_BIT(3) @@ -8367,6 +8441,9 @@ enum { #define GEN9_CTX_PREEMPT_REG _MMIO(0x2248) #define GEN12_DISABLE_POSH_BUSY_FF_DOP_CG REG_BIT(11) +#define GEN12_CS_DEBUG_MODE1_CCCSUNIT_BE_COMMON _MMIO(0x20EC) +#define GEN12_REPLAY_MODE_GRANULARITY REG_BIT(0) + #define GEN8_CS_CHICKEN1 _MMIO(0x2580) #define GEN9_PREEMPT_3D_OBJECT_LEVEL (1 << 0) #define GEN9_PREEMPT_GPGPU_LEVEL(hi, lo) (((hi) << 2) | ((lo) << 1)) @@ -8390,9 +8467,10 @@ enum { #define GEN8_ERRDETBCTRL (1 << 9) #define GEN11_COMMON_SLICE_CHICKEN3 _MMIO(0x7304) - #define DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN REG_BIT(12) - #define GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC REG_BIT(11) - #define GEN12_DISABLE_CPS_AWARE_COLOR_PIPE REG_BIT(9) +#define DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN REG_BIT(12) +#define XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE REG_BIT(12) +#define GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC REG_BIT(11) +#define GEN12_DISABLE_CPS_AWARE_COLOR_PIPE REG_BIT(9) #define HIZ_CHICKEN _MMIO(0x7018) # define CHV_HZ_8X8_MODE_IN_1X REG_BIT(15) @@ -8446,6 +8524,12 @@ enum { #define GEN8_LQSC_FLUSH_COHERENT_LINES (1 << 21) #define GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE REG_BIT(22) +#define GEN11_L3SQCREG5 _MMIO(0xb158) +#define L3_PWM_TIMER_INIT_VAL_MASK REG_GENMASK(9, 0) + +#define XEHP_L3SCQREG7 _MMIO(0xb188) +#define BLEND_FILL_CACHING_OPT_DIS REG_BIT(3) + /* GEN8 chicken */ #define HDC_CHICKEN0 _MMIO(0x7300) #define ICL_HDC_MODE _MMIO(0xE5F4) @@ -8456,6 +8540,12 @@ enum { #define HDC_FORCE_NON_COHERENT (1 << 4) #define HDC_BARRIER_PERFORMANCE_DISABLE (1 << 10) +#define GEN12_HDC_CHICKEN0 _MMIO(0xE5F0) +#define LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK REG_GENMASK(13, 11) + +#define SARB_CHICKEN1 _MMIO(0xe90c) +#define COMP_CKN_IN REG_GENMASK(30, 29) + #define GEN8_HDC_CHICKEN1 _MMIO(0x7304) /* GEN9 chicken */ @@ -8486,6 +8576,10 @@ enum { #define PIXEL_ROUNDING_TRUNC_FB_PASSTHRU (1 << 15) #define PER_PIXEL_ALPHA_BYPASS_EN (1 << 7) +#define VFLSKPD _MMIO(0x62a8) +#define DIS_OVER_FETCH_CACHE REG_BIT(1) +#define DIS_MULT_MISS_RD_SQUASH REG_BIT(0) + #define FF_MODE2 _MMIO(0x6604) #define FF_MODE2_GS_TIMER_MASK REG_GENMASK(31, 24) #define FF_MODE2_GS_TIMER_224 REG_FIELD_PREP(FF_MODE2_GS_TIMER_MASK, 224) @@ -9309,6 +9403,9 @@ enum { #define GEN8_SDEUNIT_CLOCK_GATE_DISABLE (1 << 14) #define GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ (1 << 28) +#define UNSLCGCTL9430 _MMIO(0x9430) +#define MSQDUNIT_CLKGATE_DIS REG_BIT(3) + #define GEN6_GFXPAUSE _MMIO(0xA000) #define GEN6_RPNSWREQ _MMIO(0xA008) #define GEN6_TURBO_DISABLE (1 << 31) @@ -9624,24 +9721,39 @@ enum { #define GEN9_CCS_TLB_PREFETCH_ENABLE (1 << 3) #define GEN8_ROW_CHICKEN _MMIO(0xe4f0) -#define FLOW_CONTROL_ENABLE (1 << 15) -#define PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE (1 << 8) -#define STALL_DOP_GATING_DISABLE (1 << 5) -#define THROTTLE_12_5 (7 << 2) -#define DISABLE_EARLY_EOT (1 << 1) +#define FLOW_CONTROL_ENABLE REG_BIT(15) +#define UGM_BACKUP_MODE REG_BIT(13) +#define MDQ_ARBITRATION_MODE REG_BIT(12) +#define PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE REG_BIT(8) +#define STALL_DOP_GATING_DISABLE REG_BIT(5) +#define THROTTLE_12_5 REG_GENMASK(4, 2) +#define DISABLE_EARLY_EOT REG_BIT(1) #define GEN7_ROW_CHICKEN2 _MMIO(0xe4f4) +#define GEN12_DISABLE_READ_SUPPRESSION REG_BIT(15) #define GEN12_DISABLE_EARLY_READ REG_BIT(14) +#define GEN12_ENABLE_LARGE_GRF_MODE REG_BIT(12) #define GEN12_PUSH_CONST_DEREF_HOLD_DIS REG_BIT(8) +#define LSC_CHICKEN_BIT_0 _MMIO(0xe7c8) +#define FORCE_1_SUB_MESSAGE_PER_FRAGMENT REG_BIT(15) +#define LSC_CHICKEN_BIT_0_UDW _MMIO(0xe7c8 + 4) +#define DIS_CHAIN_2XSIMD8 REG_BIT(55 - 32) +#define FORCE_SLM_FENCE_SCOPE_TO_TILE REG_BIT(42 - 32) +#define FORCE_UGM_FENCE_SCOPE_TO_TILE REG_BIT(41 - 32) +#define MAXREQS_PER_BANK REG_GENMASK(39 - 32, 37 - 32) +#define DISABLE_128B_EVICTION_COMMAND_UDW REG_BIT(36 - 32) + #define GEN7_ROW_CHICKEN2_GT2 _MMIO(0xf4f4) #define DOP_CLOCK_GATING_DISABLE (1 << 0) #define PUSH_CONSTANT_DEREF_DISABLE (1 << 8) #define GEN11_TDL_CLOCK_GATING_FIX_DISABLE (1 << 1) -#define GEN9_ROW_CHICKEN4 _MMIO(0xe48c) -#define GEN12_DISABLE_TDL_PUSH REG_BIT(9) -#define GEN11_DIS_PICK_2ND_EU REG_BIT(7) +#define GEN9_ROW_CHICKEN4 _MMIO(0xe48c) +#define GEN12_DISABLE_GRF_CLEAR REG_BIT(13) +#define GEN12_DISABLE_TDL_PUSH REG_BIT(9) +#define GEN11_DIS_PICK_2ND_EU REG_BIT(7) +#define GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX REG_BIT(4) #define HSW_ROW_CHICKEN3 _MMIO(0xe49c) #define HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE (1 << 6) @@ -9656,9 +9768,10 @@ enum { #define GEN8_SAMPLER_POWER_BYPASS_DIS (1 << 1) #define GEN9_HALF_SLICE_CHICKEN7 _MMIO(0xe194) -#define GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR (1 << 8) -#define GEN9_ENABLE_YV12_BUGFIX (1 << 4) -#define GEN9_ENABLE_GPGPU_PREEMPTION (1 << 2) +#define DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA REG_BIT(15) +#define GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR REG_BIT(8) +#define GEN9_ENABLE_YV12_BUGFIX REG_BIT(4) +#define GEN9_ENABLE_GPGPU_PREEMPTION REG_BIT(2) /* Audio */ #define G4X_AUD_VID_DID _MMIO(DISPLAY_MMIO_BASE(dev_priv) + 0x62020) @@ -12484,11 +12597,19 @@ enum skl_power_gate { #define PMFLUSH_GAPL3UNBLOCK (1 << 21) #define PMFLUSHDONE_LNEBLK (1 << 22) +#define XEHP_L3NODEARBCFG _MMIO(0xb0b4) +#define XEHP_LNESPARE REG_BIT(19) + #define GEN12_GLOBAL_MOCS(i) _MMIO(0x4000 + (i) * 4) /* Global MOCS regs */ #define GEN12_GSMBASE _MMIO(0x108100) #define GEN12_DSMBASE _MMIO(0x1080C0) +#define XEHP_CLOCK_GATE_DIS _MMIO(0x101014) +#define SGSI_SIDECLK_DIS REG_BIT(17) +#define SGGI_DIS REG_BIT(15) +#define SGR_DIS REG_BIT(13) + /* gamt regs */ #define GEN8_L3_LRA_1_GPGPU _MMIO(0x4dd4) #define GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW 0x67F1427F /* max/min for LRA1/2 */ @@ -12865,4 +12986,7 @@ enum skl_power_gate { #define CLKGATE_DIS_MISC _MMIO(0x46534) #define CLKGATE_DIS_MISC_DMASC_GATING_DIS REG_BIT(21) +#define SLICE_COMMON_ECO_CHICKEN1 _MMIO(0x731C) +#define MSC_MSAA_REODER_BUF_BYPASS_DISABLE REG_BIT(14) + #endif /* _I915_REG_H_ */ diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 42cd17357771..471cde0e9883 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -113,6 +113,10 @@ static void i915_fence_release(struct dma_fence *fence) GEM_BUG_ON(rq->guc_prio != GUC_PRIO_INIT && rq->guc_prio != GUC_PRIO_FINI); + i915_request_free_capture_list(fetch_and_zero(&rq->capture_list)); + if (i915_vma_snapshot_present(&rq->batch_snapshot)) + i915_vma_snapshot_put_onstack(&rq->batch_snapshot); + /* * The request is put onto a RCU freelist (i.e. the address * is immediately reused), mark the fences as being freed now. @@ -186,19 +190,6 @@ void i915_request_notify_execute_cb_imm(struct i915_request *rq) __notify_execute_cb(rq, irq_work_imm); } -static void free_capture_list(struct i915_request *request) -{ - struct i915_capture_list *capture; - - capture = fetch_and_zero(&request->capture_list); - while (capture) { - struct i915_capture_list *next = capture->next; - - kfree(capture); - capture = next; - } -} - static void __i915_request_fill(struct i915_request *rq, u8 val) { void *vaddr = rq->ring->vaddr; @@ -303,6 +294,37 @@ static void __rq_cancel_watchdog(struct i915_request *rq) i915_request_put(rq); } +#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) + +/** + * i915_request_free_capture_list - Free a capture list + * @capture: Pointer to the first list item or NULL + * + */ +void i915_request_free_capture_list(struct i915_capture_list *capture) +{ + while (capture) { + struct i915_capture_list *next = capture->next; + + i915_vma_snapshot_put(capture->vma_snapshot); + capture = next; + } +} + +#define assert_capture_list_is_null(_rq) GEM_BUG_ON((_rq)->capture_list) + +#define clear_capture_list(_rq) ((_rq)->capture_list = NULL) + +#else + +#define i915_request_free_capture_list(_a) do {} while (0) + +#define assert_capture_list_is_null(_a) do {} while (0) + +#define clear_capture_list(_rq) do {} while (0) + +#endif + bool i915_request_retire(struct i915_request *rq) { if (!__i915_request_is_complete(rq)) @@ -339,7 +361,7 @@ bool i915_request_retire(struct i915_request *rq) } if (test_and_set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags)) - atomic_dec(&rq->engine->gt->rps.num_waiters); + intel_rps_dec_waiters(&rq->engine->gt->rps); /* * We only loosely track inflight requests across preemption, @@ -359,7 +381,6 @@ bool i915_request_retire(struct i915_request *rq) intel_context_exit(rq->context); intel_context_unpin(rq->context); - free_capture_list(rq); i915_sched_node_fini(&rq->sched); i915_request_put(rq); @@ -719,7 +740,7 @@ void i915_request_cancel(struct i915_request *rq, int error) intel_context_cancel_request(rq->context, rq); } -static int __i915_sw_fence_call +static int submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) { struct i915_request *request = @@ -755,7 +776,7 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) return NOTIFY_DONE; } -static int __i915_sw_fence_call +static int semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) { struct i915_request *rq = container_of(fence, typeof(*rq), semaphore); @@ -829,11 +850,18 @@ static void __i915_request_ctor(void *arg) i915_sw_fence_init(&rq->submit, submit_notify); i915_sw_fence_init(&rq->semaphore, semaphore_notify); - rq->capture_list = NULL; + clear_capture_list(rq); + rq->batch_snapshot.present = false; init_llist_head(&rq->execute_cb); } +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#define clear_batch_ptr(_rq) ((_rq)->batch = NULL) +#else +#define clear_batch_ptr(_a) do {} while (0) +#endif + struct i915_request * __i915_request_create(struct intel_context *ce, gfp_t gfp) { @@ -925,10 +953,11 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) i915_sched_node_reinit(&rq->sched); /* No zalloc, everything must be cleared after use */ - rq->batch = NULL; + clear_batch_ptr(rq); __rq_init_watchdog(rq); - GEM_BUG_ON(rq->capture_list); + assert_capture_list_is_null(rq); GEM_BUG_ON(!llist_empty(&rq->execute_cb)); + GEM_BUG_ON(i915_vma_snapshot_present(&rq->batch_snapshot)); /* * Reserve space in the ring buffer for all the commands required to diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 3c6e8acd1457..0ed01979491b 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -40,6 +40,7 @@ #include "i915_scheduler.h" #include "i915_selftest.h" #include "i915_sw_fence.h" +#include "i915_vma_snapshot.h" #include <uapi/drm/i915_drm.h> @@ -48,11 +49,17 @@ struct drm_i915_gem_object; struct drm_printer; struct i915_request; +#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) struct i915_capture_list { + struct i915_vma_snapshot *vma_snapshot; struct i915_capture_list *next; - struct i915_vma *vma; }; +void i915_request_free_capture_list(struct i915_capture_list *capture); +#else +#define i915_request_free_capture_list(_a) do {} while (0) +#endif + #define RQ_TRACE(rq, fmt, ...) do { \ const struct i915_request *rq__ = (rq); \ ENGINE_TRACE(rq__->engine, "fence %llx:%lld, current %d " fmt, \ @@ -289,10 +296,12 @@ struct i915_request { /** Preallocate space in the ring for the emitting the request */ u32 reserved_space; - /** Batch buffer related to this request if any (used for - * error state dump only). - */ - struct i915_vma *batch; + /** Batch buffer pointer for selftest internal use. */ + I915_SELFTEST_DECLARE(struct i915_vma *batch); + + struct i915_vma_snapshot batch_snapshot; + +#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) /** * Additional buffers requested by userspace to be captured upon * a GPU hang. The vma/obj on this list are protected by their @@ -300,6 +309,7 @@ struct i915_request { * on the active_list (of their final request). */ struct i915_capture_list *capture_list; +#endif /** Time at which this request was emitted, in jiffies. */ unsigned long emitted_jiffies; diff --git a/drivers/gpu/drm/i915/i915_scatterlist.c b/drivers/gpu/drm/i915/i915_scatterlist.c index 4a6712dca838..41f2adb6a583 100644 --- a/drivers/gpu/drm/i915/i915_scatterlist.c +++ b/drivers/gpu/drm/i915/i915_scatterlist.c @@ -41,8 +41,32 @@ bool i915_sg_trim(struct sg_table *orig_st) return true; } +static void i915_refct_sgt_release(struct kref *ref) +{ + struct i915_refct_sgt *rsgt = + container_of(ref, typeof(*rsgt), kref); + + sg_free_table(&rsgt->table); + kfree(rsgt); +} + +static const struct i915_refct_sgt_ops rsgt_ops = { + .release = i915_refct_sgt_release +}; + +/** + * i915_refct_sgt_init - Initialize a struct i915_refct_sgt with default ops + * @rsgt: The struct i915_refct_sgt to initialize. + * size: The size of the underlying memory buffer. + */ +void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size) +{ + __i915_refct_sgt_init(rsgt, size, &rsgt_ops); +} + /** - * i915_sg_from_mm_node - Create an sg_table from a struct drm_mm_node + * i915_rsgt_from_mm_node - Create a refcounted sg_table from a struct + * drm_mm_node * @node: The drm_mm_node. * @region_start: An offset to add to the dma addresses of the sg list. * @@ -50,25 +74,28 @@ bool i915_sg_trim(struct sg_table *orig_st) * taking a maximum segment length into account, splitting into segments * if necessary. * - * Return: A pointer to a kmalloced struct sg_table on success, negative + * Return: A pointer to a kmalloced struct i915_refct_sgt on success, negative * error code cast to an error pointer on failure. */ -struct sg_table *i915_sg_from_mm_node(const struct drm_mm_node *node, - u64 region_start) +struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node, + u64 region_start) { const u64 max_segment = SZ_1G; /* Do we have a limit on this? */ u64 segment_pages = max_segment >> PAGE_SHIFT; u64 block_size, offset, prev_end; + struct i915_refct_sgt *rsgt; struct sg_table *st; struct scatterlist *sg; - st = kmalloc(sizeof(*st), GFP_KERNEL); - if (!st) + rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL); + if (!rsgt) return ERR_PTR(-ENOMEM); + i915_refct_sgt_init(rsgt, node->size << PAGE_SHIFT); + st = &rsgt->table; if (sg_alloc_table(st, DIV_ROUND_UP(node->size, segment_pages), GFP_KERNEL)) { - kfree(st); + i915_refct_sgt_put(rsgt); return ERR_PTR(-ENOMEM); } @@ -104,11 +131,11 @@ struct sg_table *i915_sg_from_mm_node(const struct drm_mm_node *node, sg_mark_end(sg); i915_sg_trim(st); - return st; + return rsgt; } /** - * i915_sg_from_buddy_resource - Create an sg_table from a struct + * i915_rsgt_from_buddy_resource - Create a refcounted sg_table from a struct * i915_buddy_block list * @res: The struct i915_ttm_buddy_resource. * @region_start: An offset to add to the dma addresses of the sg list. @@ -117,11 +144,11 @@ struct sg_table *i915_sg_from_mm_node(const struct drm_mm_node *node, * taking a maximum segment length into account, splitting into segments * if necessary. * - * Return: A pointer to a kmalloced struct sg_table on success, negative + * Return: A pointer to a kmalloced struct i915_refct_sgts on success, negative * error code cast to an error pointer on failure. */ -struct sg_table *i915_sg_from_buddy_resource(struct ttm_resource *res, - u64 region_start) +struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res, + u64 region_start) { struct i915_ttm_buddy_resource *bman_res = to_ttm_buddy_resource(res); const u64 size = res->num_pages << PAGE_SHIFT; @@ -129,18 +156,21 @@ struct sg_table *i915_sg_from_buddy_resource(struct ttm_resource *res, struct i915_buddy_mm *mm = bman_res->mm; struct list_head *blocks = &bman_res->blocks; struct i915_buddy_block *block; + struct i915_refct_sgt *rsgt; struct scatterlist *sg; struct sg_table *st; resource_size_t prev_end; GEM_BUG_ON(list_empty(blocks)); - st = kmalloc(sizeof(*st), GFP_KERNEL); - if (!st) + rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL); + if (!rsgt) return ERR_PTR(-ENOMEM); + i915_refct_sgt_init(rsgt, size); + st = &rsgt->table; if (sg_alloc_table(st, res->num_pages, GFP_KERNEL)) { - kfree(st); + i915_refct_sgt_put(rsgt); return ERR_PTR(-ENOMEM); } @@ -181,7 +211,7 @@ struct sg_table *i915_sg_from_buddy_resource(struct ttm_resource *res, sg_mark_end(sg); i915_sg_trim(st); - return st; + return rsgt; } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/i915_scatterlist.h b/drivers/gpu/drm/i915/i915_scatterlist.h index b8bd5925b03f..12c6a1684081 100644 --- a/drivers/gpu/drm/i915/i915_scatterlist.h +++ b/drivers/gpu/drm/i915/i915_scatterlist.h @@ -144,10 +144,78 @@ static inline unsigned int i915_sg_segment_size(void) bool i915_sg_trim(struct sg_table *orig_st); -struct sg_table *i915_sg_from_mm_node(const struct drm_mm_node *node, - u64 region_start); +/** + * struct i915_refct_sgt_ops - Operations structure for struct i915_refct_sgt + */ +struct i915_refct_sgt_ops { + /** + * release() - Free the memory of the struct i915_refct_sgt + * @ref: struct kref that is embedded in the struct i915_refct_sgt + */ + void (*release)(struct kref *ref); +}; + +/** + * struct i915_refct_sgt - A refcounted scatter-gather table + * @kref: struct kref for refcounting + * @table: struct sg_table holding the scatter-gather table itself. Note that + * @table->sgl = NULL can be used to determine whether a scatter-gather table + * is present or not. + * @size: The size in bytes of the underlying memory buffer + * @ops: The operations structure. + */ +struct i915_refct_sgt { + struct kref kref; + struct sg_table table; + size_t size; + const struct i915_refct_sgt_ops *ops; +}; + +/** + * i915_refct_sgt_put - Put a refcounted sg-table + * @rsgt the struct i915_refct_sgt to put. + */ +static inline void i915_refct_sgt_put(struct i915_refct_sgt *rsgt) +{ + if (rsgt) + kref_put(&rsgt->kref, rsgt->ops->release); +} + +/** + * i915_refct_sgt_get - Get a refcounted sg-table + * @rsgt the struct i915_refct_sgt to get. + */ +static inline struct i915_refct_sgt * +i915_refct_sgt_get(struct i915_refct_sgt *rsgt) +{ + kref_get(&rsgt->kref); + return rsgt; +} + +/** + * __i915_refct_sgt_init - Initialize a refcounted sg-list with a custom + * operations structure + * @rsgt The struct i915_refct_sgt to initialize. + * @size: Size in bytes of the underlying memory buffer. + * @ops: A customized operations structure in case the refcounted sg-list + * is embedded into another structure. + */ +static inline void __i915_refct_sgt_init(struct i915_refct_sgt *rsgt, + size_t size, + const struct i915_refct_sgt_ops *ops) +{ + kref_init(&rsgt->kref); + rsgt->table.sgl = NULL; + rsgt->size = size; + rsgt->ops = ops; +} + +void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size); + +struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node, + u64 region_start); -struct sg_table *i915_sg_from_buddy_resource(struct ttm_resource *res, - u64 region_start); +struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res, + u64 region_start); #endif diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index 7ea0dbf81530..2a74a9a1cafe 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -18,7 +18,9 @@ #define I915_SW_FENCE_BUG_ON(expr) BUILD_BUG_ON_INVALID(expr) #endif +#ifdef CONFIG_DRM_I915_SW_FENCE_CHECK_DAG static DEFINE_SPINLOCK(i915_sw_fence_lock); +#endif #define WQ_FLAG_BITS \ BITS_PER_TYPE(typeof_member(struct wait_queue_entry, flags)) @@ -34,7 +36,7 @@ enum { static void *i915_sw_fence_debug_hint(void *addr) { - return (void *)(((struct i915_sw_fence *)addr)->flags & I915_SW_FENCE_MASK); + return (void *)(((struct i915_sw_fence *)addr)->fn); } #ifdef CONFIG_DRM_I915_SW_FENCE_DEBUG_OBJECTS @@ -126,10 +128,7 @@ static inline void debug_fence_assert(struct i915_sw_fence *fence) static int __i915_sw_fence_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) { - i915_sw_fence_notify_t fn; - - fn = (i915_sw_fence_notify_t)(fence->flags & I915_SW_FENCE_MASK); - return fn(fence, state); + return fence->fn(fence, state); } #ifdef CONFIG_DRM_I915_SW_FENCE_DEBUG_OBJECTS @@ -242,10 +241,13 @@ void __i915_sw_fence_init(struct i915_sw_fence *fence, const char *name, struct lock_class_key *key) { - BUG_ON(!fn || (unsigned long)fn & ~I915_SW_FENCE_MASK); + BUG_ON(!fn); __init_waitqueue_head(&fence->wait, name, key); - fence->flags = (unsigned long)fn; + fence->fn = fn; +#ifdef CONFIG_DRM_I915_SW_FENCE_CHECK_DAG + fence->flags = 0; +#endif i915_sw_fence_reinit(fence); } @@ -257,7 +259,6 @@ void i915_sw_fence_reinit(struct i915_sw_fence *fence) atomic_set(&fence->pending, 1); fence->error = 0; - I915_SW_FENCE_BUG_ON(!fence->flags); I915_SW_FENCE_BUG_ON(!list_empty(&fence->wait.head)); } @@ -279,6 +280,7 @@ static int i915_sw_fence_wake(wait_queue_entry_t *wq, unsigned mode, int flags, return 0; } +#ifdef CONFIG_DRM_I915_SW_FENCE_CHECK_DAG static bool __i915_sw_fence_check_if_after(struct i915_sw_fence *fence, const struct i915_sw_fence * const signaler) { @@ -322,9 +324,6 @@ static bool i915_sw_fence_check_if_after(struct i915_sw_fence *fence, unsigned long flags; bool err; - if (!IS_ENABLED(CONFIG_DRM_I915_SW_FENCE_CHECK_DAG)) - return false; - spin_lock_irqsave(&i915_sw_fence_lock, flags); err = __i915_sw_fence_check_if_after(fence, signaler); __i915_sw_fence_clear_checked_bit(fence); @@ -332,6 +331,13 @@ static bool i915_sw_fence_check_if_after(struct i915_sw_fence *fence, return err; } +#else +static bool i915_sw_fence_check_if_after(struct i915_sw_fence *fence, + const struct i915_sw_fence * const signaler) +{ + return false; +} +#endif static int __i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence, struct i915_sw_fence *signaler, diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h index 30a863353ee6..a7c603bc1b01 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.h +++ b/drivers/gpu/drm/i915/i915_sw_fence.h @@ -17,26 +17,27 @@ struct completion; struct dma_resv; +struct i915_sw_fence; + +enum i915_sw_fence_notify { + FENCE_COMPLETE, + FENCE_FREE +}; + +typedef int (*i915_sw_fence_notify_t)(struct i915_sw_fence *, + enum i915_sw_fence_notify state); struct i915_sw_fence { wait_queue_head_t wait; + i915_sw_fence_notify_t fn; +#ifdef CONFIG_DRM_I915_SW_FENCE_CHECK_DAG unsigned long flags; +#endif atomic_t pending; int error; }; #define I915_SW_FENCE_CHECKED_BIT 0 /* used internally for DAG checking */ -#define I915_SW_FENCE_PRIVATE_BIT 1 /* available for use by owner */ -#define I915_SW_FENCE_MASK (~3) - -enum i915_sw_fence_notify { - FENCE_COMPLETE, - FENCE_FREE -}; - -typedef int (*i915_sw_fence_notify_t)(struct i915_sw_fence *, - enum i915_sw_fence_notify state); -#define __i915_sw_fence_call __aligned(4) void __i915_sw_fence_init(struct i915_sw_fence *fence, i915_sw_fence_notify_t fn, diff --git a/drivers/gpu/drm/i915/i915_sw_fence_work.c b/drivers/gpu/drm/i915/i915_sw_fence_work.c index 5b33ef23d54c..d2e56b387993 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence_work.c +++ b/drivers/gpu/drm/i915/i915_sw_fence_work.c @@ -23,7 +23,7 @@ static void fence_work(struct work_struct *work) dma_fence_put(&f->dma); } -static int __i915_sw_fence_call +static int fence_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) { struct dma_fence_work *f = container_of(fence, typeof(*f), chain); diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 1804f4142740..59d441cedc75 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -279,7 +279,7 @@ static ssize_t gt_boost_freq_mhz_show(struct device *kdev, struct device_attribu struct drm_i915_private *i915 = kdev_minor_to_i915(kdev); struct intel_rps *rps = &i915->gt.rps; - return sysfs_emit(buf, "%d\n", intel_gpu_freq(rps, rps->boost_freq)); + return sysfs_emit(buf, "%d\n", intel_rps_get_boost_frequency(rps)); } static ssize_t gt_boost_freq_mhz_store(struct device *kdev, @@ -288,7 +288,6 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev, { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); struct intel_rps *rps = &dev_priv->gt.rps; - bool boost = false; ssize_t ret; u32 val; @@ -296,21 +295,9 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev, if (ret) return ret; - /* Validate against (static) hardware limits */ - val = intel_freq_opcode(rps, val); - if (val < rps->min_freq || val > rps->max_freq) - return -EINVAL; - - mutex_lock(&rps->lock); - if (val != rps->boost_freq) { - rps->boost_freq = val; - boost = atomic_read(&rps->num_waiters); - } - mutex_unlock(&rps->lock); - if (boost) - schedule_work(&rps->work); + ret = intel_rps_set_boost_frequency(rps, val); - return count; + return ret ?: count; } static ssize_t vlv_rpe_freq_mhz_show(struct device *kdev, diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index bef795e265a6..927f0d4f8e11 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -40,12 +40,12 @@ static struct kmem_cache *slab_vmas; -struct i915_vma *i915_vma_alloc(void) +static struct i915_vma *i915_vma_alloc(void) { return kmem_cache_zalloc(slab_vmas, GFP_KERNEL); } -void i915_vma_free(struct i915_vma *vma) +static void i915_vma_free(struct i915_vma *vma) { return kmem_cache_free(slab_vmas, vma); } @@ -113,7 +113,6 @@ vma_create(struct drm_i915_gem_object *obj, vma->vm = i915_vm_get(vm); vma->ops = &vm->vma_ops; vma->obj = obj; - vma->resv = obj->base.resv; vma->size = obj->base.size; vma->display_alignment = I915_GTT_MIN_ALIGNMENT; @@ -346,7 +345,7 @@ int i915_vma_wait_for_bind(struct i915_vma *vma) fence = dma_fence_get_rcu_safe(&vma->active.excl.fence); rcu_read_unlock(); if (fence) { - err = dma_fence_wait(fence, MAX_SCHEDULE_TIMEOUT); + err = dma_fence_wait(fence, true); dma_fence_put(fence); } } @@ -354,6 +353,32 @@ int i915_vma_wait_for_bind(struct i915_vma *vma) return err; } +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) +static int i915_vma_verify_bind_complete(struct i915_vma *vma) +{ + int err = 0; + + if (i915_active_has_exclusive(&vma->active)) { + struct dma_fence *fence = + i915_active_fence_get(&vma->active.excl); + + if (!fence) + return 0; + + if (dma_fence_is_signaled(fence)) + err = fence->error; + else + err = -EBUSY; + + dma_fence_put(fence); + } + + return err; +} +#else +#define i915_vma_verify_bind_complete(_vma) 0 +#endif + /** * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space. * @vma: VMA to map @@ -423,11 +448,16 @@ int i915_vma_bind(struct i915_vma *vma, work->base.dma.error = 0; /* enable the queue_work() */ + __i915_gem_object_pin_pages(vma->obj); + work->pinned = i915_gem_object_get(vma->obj); + } else { if (vma->obj) { - __i915_gem_object_pin_pages(vma->obj); - work->pinned = i915_gem_object_get(vma->obj); + int ret; + + ret = i915_gem_object_wait_moving_fence(vma->obj, true); + if (ret) + return ret; } - } else { vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags); } @@ -449,6 +479,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) GEM_BUG_ON(!i915_vma_is_ggtt(vma)); GEM_BUG_ON(!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)); + GEM_BUG_ON(i915_vma_verify_bind_complete(vma)); ptr = READ_ONCE(vma->iomap); if (ptr == NULL) { @@ -667,7 +698,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) } color = 0; - if (vma->obj && i915_vm_has_cache_coloring(vma->vm)) + if (i915_vm_has_cache_coloring(vma->vm)) color = vma->obj->cache_level; if (flags & PIN_OFFSET_FIXED) { @@ -792,17 +823,14 @@ unpinned: static int vma_get_pages(struct i915_vma *vma) { int err = 0; - bool pinned_pages = false; + bool pinned_pages = true; if (atomic_add_unless(&vma->pages_count, 1, 0)) return 0; - if (vma->obj) { - err = i915_gem_object_pin_pages(vma->obj); - if (err) - return err; - pinned_pages = true; - } + err = i915_gem_object_pin_pages(vma->obj); + if (err) + return err; /* Allocations ahoy! */ if (mutex_lock_interruptible(&vma->pages_mutex)) { @@ -835,8 +863,8 @@ static void __vma_put_pages(struct i915_vma *vma, unsigned int count) if (atomic_sub_return(count, &vma->pages_count) == 0) { vma->ops->clear_pages(vma); GEM_BUG_ON(vma->pages); - if (vma->obj) - i915_gem_object_unpin_pages(vma->obj); + + i915_gem_object_unpin_pages(vma->obj); } mutex_unlock(&vma->pages_mutex); } @@ -867,12 +895,13 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, u64 size, u64 alignment, u64 flags) { struct i915_vma_work *work = NULL; + struct dma_fence *moving = NULL; intel_wakeref_t wakeref = 0; unsigned int bound; int err; #ifdef CONFIG_PROVE_LOCKING - if (debug_locks && !WARN_ON(!ww) && vma->resv) + if (debug_locks && !WARN_ON(!ww)) assert_vma_held(vma); #endif @@ -892,7 +921,8 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, if (flags & PIN_GLOBAL) wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm); - if (flags & vma->vm->bind_async_flags) { + moving = vma->obj ? i915_gem_object_get_moving_fence(vma->obj) : NULL; + if (flags & vma->vm->bind_async_flags || moving) { /* lock VM */ err = i915_vm_lock_objects(vma->vm, ww); if (err) @@ -906,6 +936,8 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, work->vm = i915_vm_get(vma->vm); + dma_fence_work_chain(&work->base, moving); + /* Allocate enough page directories to used PTE */ if (vma->vm->allocate_va_range) { err = i915_vm_alloc_pt_stash(vma->vm, @@ -980,7 +1012,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, GEM_BUG_ON(!vma->pages); err = i915_vma_bind(vma, - vma->obj ? vma->obj->cache_level : 0, + vma->obj->cache_level, flags, work); if (err) goto err_remove; @@ -1010,7 +1042,10 @@ err_fence: err_rpm: if (wakeref) intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref); + if (moving) + dma_fence_put(moving); vma_put_pages(vma); + return err; } @@ -1034,7 +1069,7 @@ int i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, GEM_BUG_ON(!i915_vma_is_ggtt(vma)); #ifdef CONFIG_LOCKDEP - WARN_ON(!ww && vma->resv && dma_resv_held(vma->resv)); + WARN_ON(!ww && dma_resv_held(vma->obj->base.resv)); #endif do { @@ -1113,6 +1148,7 @@ void i915_vma_reopen(struct i915_vma *vma) void i915_vma_release(struct kref *ref) { struct i915_vma *vma = container_of(ref, typeof(*vma), ref); + struct drm_i915_gem_object *obj = vma->obj; if (drm_mm_node_allocated(&vma->node)) { mutex_lock(&vma->vm->mutex); @@ -1123,15 +1159,11 @@ void i915_vma_release(struct kref *ref) } GEM_BUG_ON(i915_vma_is_active(vma)); - if (vma->obj) { - struct drm_i915_gem_object *obj = vma->obj; - - spin_lock(&obj->vma.lock); - list_del(&vma->obj_link); - if (!RB_EMPTY_NODE(&vma->obj_node)) - rb_erase(&vma->obj_node, &obj->vma.tree); - spin_unlock(&obj->vma.lock); - } + spin_lock(&obj->vma.lock); + list_del(&vma->obj_link); + if (!RB_EMPTY_NODE(&vma->obj_node)) + rb_erase(&vma->obj_node, &obj->vma.tree); + spin_unlock(&obj->vma.lock); __i915_vma_remove_closed(vma); i915_vm_put(vma->vm); @@ -1256,19 +1288,19 @@ int _i915_vma_move_to_active(struct i915_vma *vma, } if (fence) { - dma_resv_add_excl_fence(vma->resv, fence); + dma_resv_add_excl_fence(vma->obj->base.resv, fence); obj->write_domain = I915_GEM_DOMAIN_RENDER; obj->read_domains = 0; } } else { if (!(flags & __EXEC_OBJECT_NO_RESERVE)) { - err = dma_resv_reserve_shared(vma->resv, 1); + err = dma_resv_reserve_shared(vma->obj->base.resv, 1); if (unlikely(err)) return err; } if (fence) { - dma_resv_add_shared_fence(vma->resv, fence); + dma_resv_add_shared_fence(vma->obj->base.resv, fence); obj->write_domain = 0; } } diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 648dbe744c96..4033aa08d5e4 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -234,16 +234,16 @@ static inline void __i915_vma_put(struct i915_vma *vma) kref_put(&vma->ref, i915_vma_release); } -#define assert_vma_held(vma) dma_resv_assert_held((vma)->resv) +#define assert_vma_held(vma) dma_resv_assert_held((vma)->obj->base.resv) static inline void i915_vma_lock(struct i915_vma *vma) { - dma_resv_lock(vma->resv, NULL); + dma_resv_lock(vma->obj->base.resv, NULL); } static inline void i915_vma_unlock(struct i915_vma *vma) { - dma_resv_unlock(vma->resv); + dma_resv_unlock(vma->obj->base.resv); } int __must_check @@ -418,9 +418,6 @@ static inline void i915_vma_clear_scanout(struct i915_vma *vma) list_for_each_entry(V, &(OBJ)->vma.list, obj_link) \ for_each_until(!i915_vma_is_ggtt(V)) -struct i915_vma *i915_vma_alloc(void); -void i915_vma_free(struct i915_vma *vma); - struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma); void i915_vma_make_shrinkable(struct i915_vma *vma); void i915_vma_make_purgeable(struct i915_vma *vma); diff --git a/drivers/gpu/drm/i915/i915_vma_snapshot.c b/drivers/gpu/drm/i915/i915_vma_snapshot.c new file mode 100644 index 000000000000..2949ceea9884 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_vma_snapshot.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "i915_vma_snapshot.h" +#include "i915_vma_types.h" +#include "i915_vma.h" + +/** + * i915_vma_snapshot_init - Initialize a struct i915_vma_snapshot from + * a struct i915_vma. + * @vsnap: The i915_vma_snapshot to init. + * @vma: A struct i915_vma used to initialize @vsnap. + * @name: Name associated with the snapshot. The character pointer needs to + * stay alive over the lifitime of the shapsot + */ +void i915_vma_snapshot_init(struct i915_vma_snapshot *vsnap, + struct i915_vma *vma, + const char *name) +{ + if (!i915_vma_is_pinned(vma)) + assert_object_held(vma->obj); + + vsnap->name = name; + vsnap->size = vma->size; + vsnap->obj_size = vma->obj->base.size; + vsnap->gtt_offset = vma->node.start; + vsnap->gtt_size = vma->node.size; + vsnap->page_sizes = vma->page_sizes.gtt; + vsnap->pages = vma->pages; + vsnap->pages_rsgt = NULL; + vsnap->mr = NULL; + if (vma->obj->mm.rsgt) + vsnap->pages_rsgt = i915_refct_sgt_get(vma->obj->mm.rsgt); + vsnap->mr = vma->obj->mm.region; + kref_init(&vsnap->kref); + vsnap->vma_resource = &vma->active; + vsnap->onstack = false; + vsnap->present = true; +} + +/** + * i915_vma_snapshot_init_onstack - Initialize a struct i915_vma_snapshot from + * a struct i915_vma, but avoid kfreeing it on last put. + * @vsnap: The i915_vma_snapshot to init. + * @vma: A struct i915_vma used to initialize @vsnap. + * @name: Name associated with the snapshot. The character pointer needs to + * stay alive over the lifitime of the shapsot + */ +void i915_vma_snapshot_init_onstack(struct i915_vma_snapshot *vsnap, + struct i915_vma *vma, + const char *name) +{ + i915_vma_snapshot_init(vsnap, vma, name); + vsnap->onstack = true; +} + +static void vma_snapshot_release(struct kref *ref) +{ + struct i915_vma_snapshot *vsnap = + container_of(ref, typeof(*vsnap), kref); + + vsnap->present = false; + if (vsnap->pages_rsgt) + i915_refct_sgt_put(vsnap->pages_rsgt); + if (!vsnap->onstack) + kfree(vsnap); +} + +/** + * i915_vma_snapshot_put - Put an i915_vma_snapshot pointer reference + * @vsnap: The pointer reference + */ +void i915_vma_snapshot_put(struct i915_vma_snapshot *vsnap) +{ + kref_put(&vsnap->kref, vma_snapshot_release); +} + +/** + * i915_vma_snapshot_put_onstack - Put an onstcak i915_vma_snapshot pointer + * reference and varify that the structure is released + * @vsnap: The pointer reference + * + * This function is intended to be paired with a i915_vma_init_onstack() + * and should be called before exiting the scope that declared or + * freeing the structure that embedded @vsnap to verify that all references + * have been released. + */ +void i915_vma_snapshot_put_onstack(struct i915_vma_snapshot *vsnap) +{ + if (!kref_put(&vsnap->kref, vma_snapshot_release)) + GEM_BUG_ON(1); +} + +/** + * i915_vma_snapshot_resource_pin - Temporarily block the memory the + * vma snapshot is pointing to from being released. + * @vsnap: The vma snapshot. + * @lockdep_cookie: Pointer to bool needed for lockdep support. This needs + * to be passed to the paired i915_vma_snapshot_resource_unpin. + * + * This function will temporarily try to hold up a fence or similar structure + * and will therefore enter a fence signaling critical section. + * + * Return: true if we succeeded in blocking the memory from being released, + * false otherwise. + */ +bool i915_vma_snapshot_resource_pin(struct i915_vma_snapshot *vsnap, + bool *lockdep_cookie) +{ + bool pinned = i915_active_acquire_if_busy(vsnap->vma_resource); + + if (pinned) + *lockdep_cookie = dma_fence_begin_signalling(); + + return pinned; +} + +/** + * i915_vma_snapshot_resource_unpin - Unblock vma snapshot memory from + * being released. + * @vsnap: The vma snapshot. + * @lockdep_cookie: Cookie returned from matching i915_vma_resource_pin(). + * + * Might leave a fence signalling critical section and signal a fence. + */ +void i915_vma_snapshot_resource_unpin(struct i915_vma_snapshot *vsnap, + bool lockdep_cookie) +{ + dma_fence_end_signalling(lockdep_cookie); + + return i915_active_release(vsnap->vma_resource); +} diff --git a/drivers/gpu/drm/i915/i915_vma_snapshot.h b/drivers/gpu/drm/i915/i915_vma_snapshot.h new file mode 100644 index 000000000000..940581df4622 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_vma_snapshot.h @@ -0,0 +1,112 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ +#ifndef _I915_VMA_SNAPSHOT_H_ +#define _I915_VMA_SNAPSHOT_H_ + +#include <linux/kref.h> +#include <linux/slab.h> +#include <linux/types.h> + +struct i915_active; +struct i915_refct_sgt; +struct i915_vma; +struct intel_memory_region; +struct sg_table; + +/** + * DOC: Simple utilities for snapshotting GPU vma metadata, later used for + * error capture. Vi use a separate header for this to avoid issues due to + * recursive header includes. + */ + +/** + * struct i915_vma_snapshot - Snapshot of vma metadata. + * @size: The vma size in bytes. + * @obj_size: The size of the underlying object in bytes. + * @gtt_offset: The gtt offset the vma is bound to. + * @gtt_size: The size in bytes allocated for the vma in the GTT. + * @pages: The struct sg_table pointing to the pages bound. + * @pages_rsgt: The refcounted sg_table holding the reference for @pages if any. + * @mr: The memory region pointed for the pages bound. + * @kref: Reference for this structure. + * @vma_resource: FIXME: A means to keep the unbind fence from signaling. + * Temporarily while we have only sync unbinds, and still use the vma + * active, we use that. With async unbinding we need a signaling refcount + * for the unbind fence. + * @page_sizes: The vma GTT page sizes information. + * @onstack: Whether the structure shouldn't be freed on final put. + * @present: Whether the structure is present and initialized. + */ +struct i915_vma_snapshot { + const char *name; + size_t size; + size_t obj_size; + size_t gtt_offset; + size_t gtt_size; + struct sg_table *pages; + struct i915_refct_sgt *pages_rsgt; + struct intel_memory_region *mr; + struct kref kref; + struct i915_active *vma_resource; + u32 page_sizes; + bool onstack:1; + bool present:1; +}; + +void i915_vma_snapshot_init(struct i915_vma_snapshot *vsnap, + struct i915_vma *vma, + const char *name); + +void i915_vma_snapshot_init_onstack(struct i915_vma_snapshot *vsnap, + struct i915_vma *vma, + const char *name); + +void i915_vma_snapshot_put(struct i915_vma_snapshot *vsnap); + +void i915_vma_snapshot_put_onstack(struct i915_vma_snapshot *vsnap); + +bool i915_vma_snapshot_resource_pin(struct i915_vma_snapshot *vsnap, + bool *lockdep_cookie); + +void i915_vma_snapshot_resource_unpin(struct i915_vma_snapshot *vsnap, + bool lockdep_cookie); + +/** + * i915_vma_snapshot_alloc - Allocate a struct i915_vma_snapshot + * @gfp: Allocation mode. + * + * Return: A pointer to a struct i915_vma_snapshot if successful. + * NULL otherwise. + */ +static inline struct i915_vma_snapshot *i915_vma_snapshot_alloc(gfp_t gfp) +{ + return kmalloc(sizeof(struct i915_vma_snapshot), gfp); +} + +/** + * i915_vma_snapshot_get - Take a reference on a struct i915_vma_snapshot + * + * Return: A pointer to a struct i915_vma_snapshot. + */ +static inline struct i915_vma_snapshot * +i915_vma_snapshot_get(struct i915_vma_snapshot *vsnap) +{ + kref_get(&vsnap->kref); + return vsnap; +} + +/** + * i915_vma_snapshot_present - Whether a struct i915_vma_snapshot is + * present and initialized. + * + * Return: true if present and initialized; false otherwise. + */ +static inline bool +i915_vma_snapshot_present(const struct i915_vma_snapshot *vsnap) +{ + return vsnap && vsnap->present; +} + +#endif diff --git a/drivers/gpu/drm/i915/i915_vma_types.h b/drivers/gpu/drm/i915/i915_vma_types.h index 4ee6e54799f4..f03fa96a1701 100644 --- a/drivers/gpu/drm/i915/i915_vma_types.h +++ b/drivers/gpu/drm/i915/i915_vma_types.h @@ -187,7 +187,6 @@ struct i915_vma { const struct i915_vma_ops *ops; struct drm_i915_gem_object *obj; - struct dma_resv *resv; /** Alias of obj->resv */ struct sg_table *pages; void __iomem *iomap; diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 305facedd284..e6605b5181a5 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -83,33 +83,26 @@ const char *intel_platform_name(enum intel_platform platform) return platform_names[platform]; } -static const char *iommu_name(void) -{ - const char *msg = "n/a"; - -#ifdef CONFIG_INTEL_IOMMU - msg = enableddisabled(intel_iommu_gfx_mapped); -#endif - - return msg; -} - void intel_device_info_print_static(const struct intel_device_info *info, struct drm_printer *p) { - if (info->graphics_rel) - drm_printf(p, "graphics version: %u.%02u\n", info->graphics_ver, info->graphics_rel); + if (info->graphics.rel) + drm_printf(p, "graphics version: %u.%02u\n", info->graphics.ver, + info->graphics.rel); + else + drm_printf(p, "graphics version: %u\n", info->graphics.ver); + + if (info->media.rel) + drm_printf(p, "media version: %u.%02u\n", info->media.ver, info->media.rel); else - drm_printf(p, "graphics version: %u\n", info->graphics_ver); + drm_printf(p, "media version: %u\n", info->media.ver); - if (info->media_rel) - drm_printf(p, "media version: %u.%02u\n", info->media_ver, info->media_rel); + if (info->display.rel) + drm_printf(p, "display version: %u.%02u\n", info->display.ver, info->display.rel); else - drm_printf(p, "media version: %u\n", info->media_ver); + drm_printf(p, "display version: %u\n", info->display.ver); - drm_printf(p, "display version: %u\n", info->display.ver); drm_printf(p, "gt: %d\n", info->gt); - drm_printf(p, "iommu: %s\n", iommu_name()); drm_printf(p, "memory-regions: %x\n", info->memory_regions); drm_printf(p, "page-sizes: %x\n", info->page_sizes); drm_printf(p, "platform: %s\n", intel_platform_name(info->platform)); @@ -369,7 +362,7 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) info->display.has_dsc = 0; } - if (GRAPHICS_VER(dev_priv) == 6 && intel_vtd_active()) { + if (GRAPHICS_VER(dev_priv) == 6 && intel_vtd_active(dev_priv)) { drm_info(&dev_priv->drm, "Disabling ppGTT for VT-d support\n"); info->ppgtt_type = INTEL_PPGTT_NONE; diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 8e6f48d1eb7b..669f0d26c3c3 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -166,11 +166,14 @@ enum intel_ppgtt_type { func(overlay_needs_physical); \ func(supports_tv); +struct ip_version { + u8 ver; + u8 rel; +}; + struct intel_device_info { - u8 graphics_ver; - u8 graphics_rel; - u8 media_ver; - u8 media_rel; + struct ip_version graphics; + struct ip_version media; intel_engine_mask_t platform_engine_mask; /* Engines supported by the HW */ @@ -200,6 +203,7 @@ struct intel_device_info { struct { u8 ver; + u8 rel; #define DEFINE_FLAG(name) u8 name:1 DEV_INFO_DISPLAY_FOR_EACH_FLAG(DEFINE_FLAG); diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c index e7f7e6627750..b43121609e25 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.c +++ b/drivers/gpu/drm/i915/intel_memory_region.c @@ -126,7 +126,6 @@ intel_memory_region_create(struct drm_i915_private *i915, goto err_free; } - kref_init(&mem->kref); return mem; err_free: @@ -144,28 +143,17 @@ void intel_memory_region_set_name(struct intel_memory_region *mem, va_end(ap); } -static void __intel_memory_region_destroy(struct kref *kref) +void intel_memory_region_destroy(struct intel_memory_region *mem) { - struct intel_memory_region *mem = - container_of(kref, typeof(*mem), kref); + int ret = 0; if (mem->ops->release) - mem->ops->release(mem); + ret = mem->ops->release(mem); + GEM_WARN_ON(!list_empty_careful(&mem->objects.list)); mutex_destroy(&mem->objects.lock); - kfree(mem); -} - -struct intel_memory_region * -intel_memory_region_get(struct intel_memory_region *mem) -{ - kref_get(&mem->kref); - return mem; -} - -void intel_memory_region_put(struct intel_memory_region *mem) -{ - kref_put(&mem->kref, __intel_memory_region_destroy); + if (!ret) + kfree(mem); } /* Global memory region registration -- only slight layer inversions! */ @@ -234,7 +222,7 @@ void intel_memory_regions_driver_release(struct drm_i915_private *i915) fetch_and_zero(&i915->mm.regions[i]); if (region) - intel_memory_region_put(region); + intel_memory_region_destroy(region); } } diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h index 3feae3353d33..5625c9c38993 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.h +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -6,7 +6,6 @@ #ifndef __INTEL_MEMORY_REGION_H__ #define __INTEL_MEMORY_REGION_H__ -#include <linux/kref.h> #include <linux/ioport.h> #include <linux/mutex.h> #include <linux/io-mapping.h> @@ -51,7 +50,7 @@ struct intel_memory_region_ops { unsigned int flags; int (*init)(struct intel_memory_region *mem); - void (*release)(struct intel_memory_region *mem); + int (*release)(struct intel_memory_region *mem); int (*init_object)(struct intel_memory_region *mem, struct drm_i915_gem_object *obj, @@ -71,8 +70,6 @@ struct intel_memory_region { /* For fake LMEM */ struct drm_mm_node fake_mappable; - struct kref kref; - resource_size_t io_start; resource_size_t min_page_size; resource_size_t total; @@ -110,9 +107,7 @@ intel_memory_region_create(struct drm_i915_private *i915, u16 instance, const struct intel_memory_region_ops *ops); -struct intel_memory_region * -intel_memory_region_get(struct intel_memory_region *mem); -void intel_memory_region_put(struct intel_memory_region *mem); +void intel_memory_region_destroy(struct intel_memory_region *mem); int intel_memory_regions_hw_probe(struct drm_i915_private *i915); void intel_memory_regions_driver_release(struct drm_i915_private *i915); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index e9f2e307b98e..cff0f32bedc9 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -98,7 +98,7 @@ static void gen9_init_clock_gating(struct drm_i915_private *dev_priv) * "Plane N strech max must be programmed to 11b (x1) * when Async flips are enabled on that plane." */ - if (!IS_GEMINILAKE(dev_priv) && intel_vtd_active()) + if (!IS_GEMINILAKE(dev_priv) && intel_vtd_active(dev_priv)) intel_uncore_rmw(&dev_priv->uncore, CHICKEN_PIPESL_1(pipe), SKL_PLANE1_STRETCH_MAX_MASK, SKL_PLANE1_STRETCH_MAX_X1); } @@ -7482,11 +7482,34 @@ static void dg1_init_clock_gating(struct drm_i915_private *dev_priv) gen12lp_init_clock_gating(dev_priv); /* Wa_1409836686:dg1[a0] */ - if (IS_DG1_GT_STEP(dev_priv, STEP_A0, STEP_B0)) + if (IS_DG1_GRAPHICS_STEP(dev_priv, STEP_A0, STEP_B0)) intel_uncore_write(&dev_priv->uncore, GEN9_CLKGATE_DIS_3, intel_uncore_read(&dev_priv->uncore, GEN9_CLKGATE_DIS_3) | DPT_GATING_DIS); } +static void xehpsdv_init_clock_gating(struct drm_i915_private *dev_priv) +{ + /* Wa_22010146351:xehpsdv */ + if (IS_XEHPSDV_GRAPHICS_STEP(dev_priv, STEP_A0, STEP_B0)) + intel_uncore_rmw(&dev_priv->uncore, XEHP_CLOCK_GATE_DIS, 0, SGR_DIS); +} + +static void dg2_init_clock_gating(struct drm_i915_private *i915) +{ + /* Wa_22010954014:dg2_g10 */ + if (IS_DG2_G10(i915)) + intel_uncore_rmw(&i915->uncore, XEHP_CLOCK_GATE_DIS, 0, + SGSI_SIDECLK_DIS); + + /* + * Wa_14010733611:dg2_g10 + * Wa_22010146351:dg2_g10 + */ + if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) + intel_uncore_rmw(&i915->uncore, XEHP_CLOCK_GATE_DIS, 0, + SGR_DIS | SGGI_DIS); +} + static void cnp_init_clock_gating(struct drm_i915_private *dev_priv) { if (!HAS_PCH_CNP(dev_priv)) @@ -7530,12 +7553,12 @@ static void kbl_init_clock_gating(struct drm_i915_private *dev_priv) FBC_LLC_FULLY_OPEN); /* WaDisableSDEUnitClockGating:kbl */ - if (IS_KBL_GT_STEP(dev_priv, 0, STEP_C0)) + if (IS_KBL_GRAPHICS_STEP(dev_priv, 0, STEP_C0)) intel_uncore_write(&dev_priv->uncore, GEN8_UCGCTL6, intel_uncore_read(&dev_priv->uncore, GEN8_UCGCTL6) | GEN8_SDEUNIT_CLOCK_GATE_DISABLE); /* WaDisableGamClockGating:kbl */ - if (IS_KBL_GT_STEP(dev_priv, 0, STEP_C0)) + if (IS_KBL_GRAPHICS_STEP(dev_priv, 0, STEP_C0)) intel_uncore_write(&dev_priv->uncore, GEN6_UCGCTL1, intel_uncore_read(&dev_priv->uncore, GEN6_UCGCTL1) | GEN6_GAMUNIT_CLOCK_GATE_DISABLE); @@ -7897,6 +7920,8 @@ static const struct drm_i915_clock_gating_funcs platform##_clock_gating_funcs = .init_clock_gating = platform##_init_clock_gating, \ } +CG_FUNCS(dg2); +CG_FUNCS(xehpsdv); CG_FUNCS(adlp); CG_FUNCS(dg1); CG_FUNCS(gen12lp); @@ -7933,7 +7958,11 @@ CG_FUNCS(nop); */ void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv) { - if (IS_ALDERLAKE_P(dev_priv)) + if (IS_DG2(dev_priv)) + dev_priv->clock_gating_funcs = &dg2_clock_gating_funcs; + else if (IS_XEHPSDV(dev_priv)) + dev_priv->clock_gating_funcs = &xehpsdv_clock_gating_funcs; + else if (IS_ALDERLAKE_P(dev_priv)) dev_priv->clock_gating_funcs = &adlp_clock_gating_funcs; else if (IS_DG1(dev_priv)) dev_priv->clock_gating_funcs = &dg1_clock_gating_funcs; diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c index 98c7339bf8ba..f2b888c16958 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.c +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -104,19 +104,50 @@ int intel_region_ttm_init(struct intel_memory_region *mem) * memory region, and if it was registered with the TTM device, * removes that registration. */ -void intel_region_ttm_fini(struct intel_memory_region *mem) +int intel_region_ttm_fini(struct intel_memory_region *mem) { - int ret; + struct ttm_resource_manager *man = mem->region_private; + int ret = -EBUSY; + int count; + + /* + * Put the region's move fences. This releases requests that + * may hold on to contexts and vms that may hold on to buffer + * objects placed in this region. + */ + if (man) + ttm_resource_manager_cleanup(man); + + /* Flush objects from region. */ + for (count = 0; count < 10; ++count) { + i915_gem_flush_free_objects(mem->i915); + + mutex_lock(&mem->objects.lock); + if (list_empty(&mem->objects.list)) + ret = 0; + mutex_unlock(&mem->objects.lock); + if (!ret) + break; + + msleep(20); + flush_delayed_work(&mem->i915->bdev.wq); + } + + /* If we leaked objects, Don't free the region causing use after free */ + if (ret || !man) + return ret; ret = i915_ttm_buddy_man_fini(&mem->i915->bdev, intel_region_to_ttm_type(mem)); GEM_WARN_ON(ret); mem->region_private = NULL; + + return ret; } /** - * intel_region_ttm_resource_to_st - Convert an opaque TTM resource manager resource - * to an sg_table. + * intel_region_ttm_resource_to_rsgt - + * Convert an opaque TTM resource manager resource to a refcounted sg_table. * @mem: The memory region. * @res: The resource manager resource obtained from the TTM resource manager. * @@ -126,17 +157,18 @@ void intel_region_ttm_fini(struct intel_memory_region *mem) * * Return: A malloced sg_table on success, an error pointer on failure. */ -struct sg_table *intel_region_ttm_resource_to_st(struct intel_memory_region *mem, - struct ttm_resource *res) +struct i915_refct_sgt * +intel_region_ttm_resource_to_rsgt(struct intel_memory_region *mem, + struct ttm_resource *res) { if (mem->is_range_manager) { struct ttm_range_mgr_node *range_node = to_ttm_range_mgr_node(res); - return i915_sg_from_mm_node(&range_node->mm_nodes[0], - mem->region.start); + return i915_rsgt_from_mm_node(&range_node->mm_nodes[0], + mem->region.start); } else { - return i915_sg_from_buddy_resource(res, mem->region.start); + return i915_rsgt_from_buddy_resource(res, mem->region.start); } } diff --git a/drivers/gpu/drm/i915/intel_region_ttm.h b/drivers/gpu/drm/i915/intel_region_ttm.h index 6f44075920f2..fdee5e7bd46c 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.h +++ b/drivers/gpu/drm/i915/intel_region_ttm.h @@ -20,10 +20,11 @@ void intel_region_ttm_device_fini(struct drm_i915_private *dev_priv); int intel_region_ttm_init(struct intel_memory_region *mem); -void intel_region_ttm_fini(struct intel_memory_region *mem); +int intel_region_ttm_fini(struct intel_memory_region *mem); -struct sg_table *intel_region_ttm_resource_to_st(struct intel_memory_region *mem, - struct ttm_resource *res); +struct i915_refct_sgt * +intel_region_ttm_resource_to_rsgt(struct intel_memory_region *mem, + struct ttm_resource *res); void intel_region_ttm_resource_free(struct intel_memory_region *mem, struct ttm_resource *res); diff --git a/drivers/gpu/drm/i915/intel_step.c b/drivers/gpu/drm/i915/intel_step.c index 6cf967631395..a4b16b9e2e55 100644 --- a/drivers/gpu/drm/i915/intel_step.c +++ b/drivers/gpu/drm/i915/intel_step.c @@ -23,7 +23,8 @@ * use a macro to define these to make it easier to identify the platforms * where the two steppings can deviate. */ -#define COMMON_STEP(x) .gt_step = STEP_##x, .display_step = STEP_##x +#define COMMON_STEP(x) .graphics_step = STEP_##x, .display_step = STEP_##x, .media_step = STEP_##x +#define COMMON_GT_MEDIA_STEP(x) .graphics_step = STEP_##x, .media_step = STEP_##x static const struct intel_step_info skl_revids[] = { [0x6] = { COMMON_STEP(G0) }, @@ -33,13 +34,13 @@ static const struct intel_step_info skl_revids[] = { }; static const struct intel_step_info kbl_revids[] = { - [1] = { .gt_step = STEP_B0, .display_step = STEP_B0 }, - [2] = { .gt_step = STEP_C0, .display_step = STEP_B0 }, - [3] = { .gt_step = STEP_D0, .display_step = STEP_B0 }, - [4] = { .gt_step = STEP_F0, .display_step = STEP_C0 }, - [5] = { .gt_step = STEP_C0, .display_step = STEP_B1 }, - [6] = { .gt_step = STEP_D1, .display_step = STEP_B1 }, - [7] = { .gt_step = STEP_G0, .display_step = STEP_C0 }, + [1] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_B0 }, + [2] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_B0 }, + [3] = { COMMON_GT_MEDIA_STEP(D0), .display_step = STEP_B0 }, + [4] = { COMMON_GT_MEDIA_STEP(F0), .display_step = STEP_C0 }, + [5] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_B1 }, + [6] = { COMMON_GT_MEDIA_STEP(D1), .display_step = STEP_B1 }, + [7] = { COMMON_GT_MEDIA_STEP(G0), .display_step = STEP_C0 }, }; static const struct intel_step_info bxt_revids[] = { @@ -63,16 +64,16 @@ static const struct intel_step_info jsl_ehl_revids[] = { }; static const struct intel_step_info tgl_uy_revids[] = { - [0] = { .gt_step = STEP_A0, .display_step = STEP_A0 }, - [1] = { .gt_step = STEP_B0, .display_step = STEP_C0 }, - [2] = { .gt_step = STEP_B1, .display_step = STEP_C0 }, - [3] = { .gt_step = STEP_C0, .display_step = STEP_D0 }, + [0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_A0 }, + [1] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_C0 }, + [2] = { COMMON_GT_MEDIA_STEP(B1), .display_step = STEP_C0 }, + [3] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_D0 }, }; /* Same GT stepping between tgl_uy_revids and tgl_revids don't mean the same HW */ static const struct intel_step_info tgl_revids[] = { - [0] = { .gt_step = STEP_A0, .display_step = STEP_B0 }, - [1] = { .gt_step = STEP_B0, .display_step = STEP_D0 }, + [0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_B0 }, + [1] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_D0 }, }; static const struct intel_step_info rkl_revids[] = { @@ -87,38 +88,38 @@ static const struct intel_step_info dg1_revids[] = { }; static const struct intel_step_info adls_revids[] = { - [0x0] = { .gt_step = STEP_A0, .display_step = STEP_A0 }, - [0x1] = { .gt_step = STEP_A0, .display_step = STEP_A2 }, - [0x4] = { .gt_step = STEP_B0, .display_step = STEP_B0 }, - [0x8] = { .gt_step = STEP_C0, .display_step = STEP_B0 }, - [0xC] = { .gt_step = STEP_D0, .display_step = STEP_C0 }, + [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_A0 }, + [0x1] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_A2 }, + [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_B0 }, + [0x8] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_B0 }, + [0xC] = { COMMON_GT_MEDIA_STEP(D0), .display_step = STEP_C0 }, }; static const struct intel_step_info adlp_revids[] = { - [0x0] = { .gt_step = STEP_A0, .display_step = STEP_A0 }, - [0x4] = { .gt_step = STEP_B0, .display_step = STEP_B0 }, - [0x8] = { .gt_step = STEP_C0, .display_step = STEP_C0 }, - [0xC] = { .gt_step = STEP_C0, .display_step = STEP_D0 }, + [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_A0 }, + [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_B0 }, + [0x8] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_C0 }, + [0xC] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_D0 }, }; static const struct intel_step_info xehpsdv_revids[] = { - [0x0] = { .gt_step = STEP_A0 }, - [0x1] = { .gt_step = STEP_A1 }, - [0x4] = { .gt_step = STEP_B0 }, - [0x8] = { .gt_step = STEP_C0 }, + [0x0] = { COMMON_GT_MEDIA_STEP(A0) }, + [0x1] = { COMMON_GT_MEDIA_STEP(A1) }, + [0x4] = { COMMON_GT_MEDIA_STEP(B0) }, + [0x8] = { COMMON_GT_MEDIA_STEP(C0) }, }; static const struct intel_step_info dg2_g10_revid_step_tbl[] = { - [0x0] = { .gt_step = STEP_A0, .display_step = STEP_A0 }, - [0x1] = { .gt_step = STEP_A1, .display_step = STEP_A0 }, - [0x4] = { .gt_step = STEP_B0, .display_step = STEP_B0 }, - [0x8] = { .gt_step = STEP_C0, .display_step = STEP_C0 }, + [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_A0 }, + [0x1] = { COMMON_GT_MEDIA_STEP(A1), .display_step = STEP_A0 }, + [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_B0 }, + [0x8] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_C0 }, }; static const struct intel_step_info dg2_g11_revid_step_tbl[] = { - [0x0] = { .gt_step = STEP_A0, .display_step = STEP_B0 }, - [0x4] = { .gt_step = STEP_B0, .display_step = STEP_C0 }, - [0x5] = { .gt_step = STEP_B1, .display_step = STEP_C0 }, + [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_B0 }, + [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_C0 }, + [0x5] = { COMMON_GT_MEDIA_STEP(B1), .display_step = STEP_C0 }, }; void intel_step_init(struct drm_i915_private *i915) @@ -179,7 +180,7 @@ void intel_step_init(struct drm_i915_private *i915) if (!revids) return; - if (revid < size && revids[revid].gt_step != STEP_NONE) { + if (revid < size && revids[revid].graphics_step != STEP_NONE) { step = revids[revid]; } else { drm_warn(&i915->drm, "Unknown revid 0x%02x\n", revid); @@ -192,7 +193,7 @@ void intel_step_init(struct drm_i915_private *i915) * steppings in the array are not monotonically increasing, but * it's better than defaulting to 0. */ - while (revid < size && revids[revid].gt_step == STEP_NONE) + while (revid < size && revids[revid].graphics_step == STEP_NONE) revid++; if (revid < size) { @@ -201,12 +202,12 @@ void intel_step_init(struct drm_i915_private *i915) step = revids[revid]; } else { drm_dbg(&i915->drm, "Using future steppings\n"); - step.gt_step = STEP_FUTURE; + step.graphics_step = STEP_FUTURE; step.display_step = STEP_FUTURE; } } - if (drm_WARN_ON(&i915->drm, step.gt_step == STEP_NONE)) + if (drm_WARN_ON(&i915->drm, step.graphics_step == STEP_NONE)) return; RUNTIME_INFO(i915)->step = step; diff --git a/drivers/gpu/drm/i915/intel_step.h b/drivers/gpu/drm/i915/intel_step.h index f6641e2a3c77..d71a99bd5179 100644 --- a/drivers/gpu/drm/i915/intel_step.h +++ b/drivers/gpu/drm/i915/intel_step.h @@ -11,8 +11,9 @@ struct drm_i915_private; struct intel_step_info { - u8 gt_step; + u8 graphics_step; u8 display_step; + u8 media_step; }; #define STEP_ENUM_VAL(name) STEP_##name, diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 722910d02b5f..abdac78d3976 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -2020,7 +2020,7 @@ static int i915_pmic_bus_access_notifier(struct notifier_block *nb, return NOTIFY_OK; } -static int uncore_mmio_setup(struct intel_uncore *uncore) +int intel_uncore_setup_mmio(struct intel_uncore *uncore) { struct drm_i915_private *i915 = uncore->i915; struct pci_dev *pdev = to_pci_dev(i915->drm.dev); @@ -2053,7 +2053,7 @@ static int uncore_mmio_setup(struct intel_uncore *uncore) return 0; } -static void uncore_mmio_cleanup(struct intel_uncore *uncore) +void intel_uncore_cleanup_mmio(struct intel_uncore *uncore) { struct pci_dev *pdev = to_pci_dev(uncore->i915->drm.dev); @@ -2146,10 +2146,6 @@ int intel_uncore_init_mmio(struct intel_uncore *uncore) struct drm_i915_private *i915 = uncore->i915; int ret; - ret = uncore_mmio_setup(uncore); - if (ret) - return ret; - /* * The boot firmware initializes local memory and assesses its health. * If memory training fails, the punit will have been instructed to @@ -2170,7 +2166,7 @@ int intel_uncore_init_mmio(struct intel_uncore *uncore) } else { ret = uncore_forcewake_init(uncore); if (ret) - goto out_mmio_cleanup; + return ret; } /* make sure fw funcs are set if and only if we have fw*/ @@ -2192,11 +2188,6 @@ int intel_uncore_init_mmio(struct intel_uncore *uncore) drm_dbg(&i915->drm, "unclaimed mmio detected on uncore init, clearing\n"); return 0; - -out_mmio_cleanup: - uncore_mmio_cleanup(uncore); - - return ret; } /* @@ -2261,8 +2252,6 @@ void intel_uncore_fini_mmio(struct intel_uncore *uncore) intel_uncore_fw_domains_fini(uncore); iosf_mbi_punit_release(); } - - uncore_mmio_cleanup(uncore); } static const struct reg_whitelist { diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index 3248e4e2c540..d1d17b04e29f 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -218,11 +218,13 @@ void intel_uncore_mmio_debug_init_early(struct intel_uncore_mmio_debug *mmio_debug); void intel_uncore_init_early(struct intel_uncore *uncore, struct drm_i915_private *i915); +int intel_uncore_setup_mmio(struct intel_uncore *uncore); int intel_uncore_init_mmio(struct intel_uncore *uncore); void intel_uncore_prune_engine_fw_domains(struct intel_uncore *uncore, struct intel_gt *gt); bool intel_uncore_unclaimed_mmio(struct intel_uncore *uncore); bool intel_uncore_arm_unclaimed_mmio_detection(struct intel_uncore *uncore); +void intel_uncore_cleanup_mmio(struct intel_uncore *uncore); void intel_uncore_fini_mmio(struct intel_uncore *uncore); void intel_uncore_suspend(struct intel_uncore *uncore); void intel_uncore_resume_early(struct intel_uncore *uncore); diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c b/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c index 23fd86de5a24..6a7d4e2ee138 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c @@ -7,26 +7,29 @@ #include "intel_pxp_irq.h" #include "intel_pxp_pm.h" #include "intel_pxp_session.h" +#include "i915_drv.h" -void intel_pxp_suspend(struct intel_pxp *pxp, bool runtime) +void intel_pxp_suspend_prepare(struct intel_pxp *pxp) { if (!intel_pxp_is_enabled(pxp)) return; pxp->arb_is_valid = false; - /* - * Contexts using protected objects keep a runtime PM reference, so we - * can only runtime suspend when all of them have been either closed - * or banned. Therefore, there is no need to invalidate in that - * scenario. - */ - if (!runtime) - intel_pxp_invalidate(pxp); + intel_pxp_invalidate(pxp); +} - intel_pxp_fini_hw(pxp); +void intel_pxp_suspend(struct intel_pxp *pxp) +{ + intel_wakeref_t wakeref; - pxp->hw_state_invalidated = false; + if (!intel_pxp_is_enabled(pxp)) + return; + + with_intel_runtime_pm(&pxp_to_gt(pxp)->i915->runtime_pm, wakeref) { + intel_pxp_fini_hw(pxp); + pxp->hw_state_invalidated = false; + } } void intel_pxp_resume(struct intel_pxp *pxp) @@ -44,3 +47,15 @@ void intel_pxp_resume(struct intel_pxp *pxp) intel_pxp_init_hw(pxp); } + +void intel_pxp_runtime_suspend(struct intel_pxp *pxp) +{ + if (!intel_pxp_is_enabled(pxp)) + return; + + pxp->arb_is_valid = false; + + intel_pxp_fini_hw(pxp); + + pxp->hw_state_invalidated = false; +} diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_pm.h b/drivers/gpu/drm/i915/pxp/intel_pxp_pm.h index c89e97a0c3d0..16990a3f2f85 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_pm.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_pm.h @@ -9,16 +9,29 @@ #include "intel_pxp_types.h" #ifdef CONFIG_DRM_I915_PXP -void intel_pxp_suspend(struct intel_pxp *pxp, bool runtime); +void intel_pxp_suspend_prepare(struct intel_pxp *pxp); +void intel_pxp_suspend(struct intel_pxp *pxp); void intel_pxp_resume(struct intel_pxp *pxp); +void intel_pxp_runtime_suspend(struct intel_pxp *pxp); #else -static inline void intel_pxp_suspend(struct intel_pxp *pxp, bool runtime) +static inline void intel_pxp_suspend_prepare(struct intel_pxp *pxp) +{ +} + +static inline void intel_pxp_suspend(struct intel_pxp *pxp) { } static inline void intel_pxp_resume(struct intel_pxp *pxp) { } -#endif +static inline void intel_pxp_runtime_suspend(struct intel_pxp *pxp) +{ +} +#endif +static inline void intel_pxp_runtime_resume(struct intel_pxp *pxp) +{ + intel_pxp_resume(pxp); +} #endif /* __INTEL_PXP_PM_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index f99bb0113726..7e0658a77659 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -442,6 +442,7 @@ static int igt_evict_contexts(void *arg) /* Overfill the GGTT with context objects and so try to evict one. */ for_each_engine(engine, gt, id) { struct i915_sw_fence fence; + struct i915_request *last = NULL; count = 0; onstack_fence_init(&fence); @@ -479,6 +480,9 @@ static int igt_evict_contexts(void *arg) i915_request_add(rq); count++; + if (last) + i915_request_put(last); + last = i915_request_get(rq); err = 0; } while(1); onstack_fence_fini(&fence); @@ -486,6 +490,21 @@ static int igt_evict_contexts(void *arg) count, engine->name); if (err) break; + if (last) { + if (i915_request_wait(last, 0, HZ) < 0) { + err = -EIO; + i915_request_put(last); + pr_err("Failed waiting for last request (on %s)", + engine->name); + break; + } + i915_request_put(last); + } + err = intel_gt_wait_for_idle(engine->gt, HZ * 3); + if (err) { + pr_err("Failed to idle GT (on %s)", engine->name); + break; + } } mutex_lock(&ggtt->vm.mutex); diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index d67710d10615..9979ef9197cd 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -209,6 +209,10 @@ static int igt_request_rewind(void *arg) int err = -EINVAL; ctx[0] = mock_context(i915, "A"); + if (!ctx[0]) { + err = -ENOMEM; + goto err_ctx_0; + } ce = i915_gem_context_get_engine(ctx[0], RCS0); GEM_BUG_ON(IS_ERR(ce)); @@ -223,6 +227,10 @@ static int igt_request_rewind(void *arg) i915_request_add(request); ctx[1] = mock_context(i915, "B"); + if (!ctx[1]) { + err = -ENOMEM; + goto err_ctx_1; + } ce = i915_gem_context_get_engine(ctx[1], RCS0); GEM_BUG_ON(IS_ERR(ce)); @@ -261,9 +269,11 @@ err: i915_request_put(vip); err_context_1: mock_context_close(ctx[1]); +err_ctx_1: i915_request_put(request); err_context_0: mock_context_close(ctx[0]); +err_ctx_0: mock_device_flush(i915); return err; } @@ -2805,7 +2815,7 @@ static int p_sync0(void *arg) i915_request_add(rq); err = 0; - if (i915_request_wait(rq, 0, HZ / 5) < 0) + if (i915_request_wait(rq, 0, HZ) < 0) err = -ETIME; i915_request_put(rq); if (err) @@ -2876,7 +2886,7 @@ static int p_sync1(void *arg) i915_request_add(rq); err = 0; - if (prev && i915_request_wait(prev, 0, HZ / 5) < 0) + if (prev && i915_request_wait(prev, 0, HZ) < 0) err = -ETIME; i915_request_put(prev); prev = rq; diff --git a/drivers/gpu/drm/i915/selftests/i915_sw_fence.c b/drivers/gpu/drm/i915/selftests/i915_sw_fence.c index cbf45d85cbff..daa985e5a19b 100644 --- a/drivers/gpu/drm/i915/selftests/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/selftests/i915_sw_fence.c @@ -28,7 +28,7 @@ #include "../i915_selftest.h" -static int __i915_sw_fence_call +static int fence_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) { switch (state) { diff --git a/drivers/gpu/drm/i915/selftests/igt_reset.c b/drivers/gpu/drm/i915/selftests/igt_reset.c index 9f8590b868a9..a2838c65f8a5 100644 --- a/drivers/gpu/drm/i915/selftests/igt_reset.c +++ b/drivers/gpu/drm/i915/selftests/igt_reset.c @@ -36,7 +36,7 @@ void igt_global_reset_unlock(struct intel_gt *gt) enum intel_engine_id id; for_each_engine(engine, gt, id) - clear_bit(I915_RESET_ENGINE + id, >->reset.flags); + clear_and_wake_up_bit(I915_RESET_ENGINE + id, >->reset.flags); clear_bit(I915_RESET_BACKOFF, >->reset.flags); wake_up_all(>->reset.queue); diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c index 418caae84759..0d5df0dc7212 100644 --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -225,7 +225,7 @@ static int igt_mock_reserve(void *arg) out_close: close_objects(mem, &objects); - intel_memory_region_put(mem); + intel_memory_region_destroy(mem); out_free_order: kfree(order); return err; @@ -439,7 +439,7 @@ static int igt_mock_splintered_region(void *arg) out_close: close_objects(mem, &objects); out_put: - intel_memory_region_put(mem); + intel_memory_region_destroy(mem); return err; } @@ -507,7 +507,7 @@ static int igt_mock_max_segment(void *arg) out_close: close_objects(mem, &objects); out_put: - intel_memory_region_put(mem); + intel_memory_region_destroy(mem); return err; } @@ -1196,7 +1196,7 @@ int intel_memory_region_mock_selftests(void) err = i915_subtests(tests, mem); - intel_memory_region_put(mem); + intel_memory_region_destroy(mem); out_unref: mock_destroy_device(i915); return err; diff --git a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c index 080b90b63d16..bf2752cc1e0b 100644 --- a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c +++ b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c @@ -26,7 +26,7 @@ /* Small library of different fence types useful for writing tests */ -static int __i915_sw_fence_call +static int nop_fence_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) { return NOTIFY_DONE; @@ -41,12 +41,12 @@ void __onstack_fence_init(struct i915_sw_fence *fence, __init_waitqueue_head(&fence->wait, name, key); atomic_set(&fence->pending, 1); fence->error = 0; - fence->flags = (unsigned long)nop_fence_notify; + fence->fn = nop_fence_notify; } void onstack_fence_fini(struct i915_sw_fence *fence) { - if (!fence->flags) + if (!fence->fn) return; i915_sw_fence_commit(fence); @@ -89,7 +89,7 @@ struct heap_fence { }; }; -static int __i915_sw_fence_call +static int heap_fence_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) { struct heap_fence *h = container_of(fence, typeof(*h), fence); diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 4f8180146888..d0e2e61de8d4 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -165,7 +165,7 @@ struct drm_i915_private *mock_gem_device(void) /* Using the global GTT may ask questions about KMS users, so prepare */ drm_mode_config_init(&i915->drm); - mkwrite_device_info(i915)->graphics_ver = -1; + mkwrite_device_info(i915)->graphics.ver = -1; mkwrite_device_info(i915)->page_sizes = I915_GTT_PAGE_SIZE_4K | @@ -177,6 +177,8 @@ struct drm_i915_private *mock_gem_device(void) mock_uncore_init(&i915->uncore, i915); + spin_lock_init(&i915->gpu_error.lock); + i915_gem_init__mm(i915); intel_gt_init_early(&i915->gt, i915); atomic_inc(&i915->gt.wakeref.count); /* disable; no hw support */ diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c index 75793008c4ef..19bff8afcaaa 100644 --- a/drivers/gpu/drm/i915/selftests/mock_region.c +++ b/drivers/gpu/drm/i915/selftests/mock_region.c @@ -15,9 +15,9 @@ static void mock_region_put_pages(struct drm_i915_gem_object *obj, struct sg_table *pages) { + i915_refct_sgt_put(obj->mm.rsgt); + obj->mm.rsgt = NULL; intel_region_ttm_resource_free(obj->mm.region, obj->mm.res); - sg_free_table(pages); - kfree(pages); } static int mock_region_get_pages(struct drm_i915_gem_object *obj) @@ -36,12 +36,14 @@ static int mock_region_get_pages(struct drm_i915_gem_object *obj) if (IS_ERR(obj->mm.res)) return PTR_ERR(obj->mm.res); - pages = intel_region_ttm_resource_to_st(obj->mm.region, obj->mm.res); - if (IS_ERR(pages)) { - err = PTR_ERR(pages); + obj->mm.rsgt = intel_region_ttm_resource_to_rsgt(obj->mm.region, + obj->mm.res); + if (IS_ERR(obj->mm.rsgt)) { + err = PTR_ERR(obj->mm.rsgt); goto err_free_resource; } + pages = &obj->mm.rsgt->table; __i915_gem_object_set_pages(obj, pages, i915_sg_dma_sizes(pages->sgl)); return 0; @@ -82,13 +84,16 @@ static int mock_object_init(struct intel_memory_region *mem, return 0; } -static void mock_region_fini(struct intel_memory_region *mem) +static int mock_region_fini(struct intel_memory_region *mem) { struct drm_i915_private *i915 = mem->i915; int instance = mem->instance; + int ret; - intel_region_ttm_fini(mem); + ret = intel_region_ttm_fini(mem); ida_free(&i915->selftest.mock_region_instances, instance); + + return ret; } static const struct intel_memory_region_ops mock_region_ops = { |