From f0c02c1b91888ccac539388eacb0659bf263a557 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 21 Jun 2019 08:08:10 +0100 Subject: drm/i915: Rename i915_timeline to intel_timeline and move under gt Move all timeline code under gt and rename to intel_gt prefix. Signed-off-by: Tvrtko Ursulin Suggested-by: Chris Wilson Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190621070811.7006-32-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/Makefile | 2 +- drivers/gpu/drm/i915/Makefile.header-test | 1 - drivers/gpu/drm/i915/gem/i915_gem_context.c | 10 +- drivers/gpu/drm/i915/gem/i915_gem_context_types.h | 4 +- drivers/gpu/drm/i915/gem/i915_gem_pm.c | 2 +- drivers/gpu/drm/i915/gt/intel_engine.h | 4 +- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 14 +- drivers/gpu/drm/i915/gt/intel_engine_types.h | 4 +- drivers/gpu/drm/i915/gt/intel_lrc.c | 10 +- drivers/gpu/drm/i915/gt/intel_reset.c | 2 +- drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 18 +- drivers/gpu/drm/i915/gt/intel_timeline.c | 591 ++++++++++++++ drivers/gpu/drm/i915/gt/intel_timeline.h | 93 +++ drivers/gpu/drm/i915/gt/intel_timeline_types.h | 67 ++ drivers/gpu/drm/i915/gt/mock_engine.c | 10 +- drivers/gpu/drm/i915/gt/selftest_timeline.c | 845 +++++++++++++++++++++ drivers/gpu/drm/i915/gt/selftests/mock_timeline.c | 29 + drivers/gpu/drm/i915/gt/selftests/mock_timeline.h | 15 + drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gem.c | 8 +- drivers/gpu/drm/i915/i915_gem_gtt.h | 2 +- drivers/gpu/drm/i915/i915_request.c | 14 +- drivers/gpu/drm/i915/i915_request.h | 8 +- drivers/gpu/drm/i915/i915_timeline.c | 591 -------------- drivers/gpu/drm/i915/i915_timeline.h | 93 --- drivers/gpu/drm/i915/i915_timeline_types.h | 67 -- .../gpu/drm/i915/selftests/i915_live_selftests.h | 2 +- .../gpu/drm/i915/selftests/i915_mock_selftests.h | 2 +- drivers/gpu/drm/i915/selftests/i915_timeline.c | 845 --------------------- drivers/gpu/drm/i915/selftests/mock_gem_device.c | 6 +- drivers/gpu/drm/i915/selftests/mock_timeline.c | 29 - drivers/gpu/drm/i915/selftests/mock_timeline.h | 15 - 32 files changed, 1702 insertions(+), 1703 deletions(-) create mode 100644 drivers/gpu/drm/i915/gt/intel_timeline.c create mode 100644 drivers/gpu/drm/i915/gt/intel_timeline.h create mode 100644 drivers/gpu/drm/i915/gt/intel_timeline_types.h create mode 100644 drivers/gpu/drm/i915/gt/selftest_timeline.c create mode 100644 drivers/gpu/drm/i915/gt/selftests/mock_timeline.c create mode 100644 drivers/gpu/drm/i915/gt/selftests/mock_timeline.h delete mode 100644 drivers/gpu/drm/i915/i915_timeline.c delete mode 100644 drivers/gpu/drm/i915/i915_timeline.h delete mode 100644 drivers/gpu/drm/i915/i915_timeline_types.h delete mode 100644 drivers/gpu/drm/i915/selftests/i915_timeline.c delete mode 100644 drivers/gpu/drm/i915/selftests/mock_timeline.c delete mode 100644 drivers/gpu/drm/i915/selftests/mock_timeline.h (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 19f8b6745772..84ac0fd1b8d0 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -82,6 +82,7 @@ gt-y += \ gt/intel_ringbuffer.o \ gt/intel_mocs.o \ gt/intel_sseu.o \ + gt/intel_timeline.o \ gt/intel_workarounds.o gt-$(CONFIG_DRM_I915_SELFTEST) += \ gt/mock_engine.o @@ -127,7 +128,6 @@ i915-y += \ i915_query.o \ i915_request.o \ i915_scheduler.o \ - i915_timeline.o \ i915_trace_points.o \ i915_vma.o \ intel_wopcm.o diff --git a/drivers/gpu/drm/i915/Makefile.header-test b/drivers/gpu/drm/i915/Makefile.header-test index cb74242f9c3b..b1c3e642f621 100644 --- a/drivers/gpu/drm/i915/Makefile.header-test +++ b/drivers/gpu/drm/i915/Makefile.header-test @@ -12,7 +12,6 @@ header_test := \ i915_priolist_types.h \ i915_reg.h \ i915_scheduler_types.h \ - i915_timeline_types.h \ i915_utils.h \ intel_csr.h \ intel_drv.h \ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index fb691535fbf2..628673d1d7f8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -316,7 +316,7 @@ static void i915_gem_context_free(struct i915_gem_context *ctx) mutex_destroy(&ctx->engines_mutex); if (ctx->timeline) - i915_timeline_put(ctx->timeline); + intel_timeline_put(ctx->timeline); kfree(ctx->name); put_pid(ctx->pid); @@ -528,9 +528,9 @@ i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags) } if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) { - struct i915_timeline *timeline; + struct intel_timeline *timeline; - timeline = i915_timeline_create(&dev_priv->gt, NULL); + timeline = intel_timeline_create(&dev_priv->gt, NULL); if (IS_ERR(timeline)) { context_close(ctx); return ERR_CAST(timeline); @@ -2015,8 +2015,8 @@ static int clone_timeline(struct i915_gem_context *dst, GEM_BUG_ON(src->timeline == dst->timeline); if (dst->timeline) - i915_timeline_put(dst->timeline); - dst->timeline = i915_timeline_get(src->timeline); + intel_timeline_put(dst->timeline); + dst->timeline = intel_timeline_get(src->timeline); } return 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index cc513410eeef..0ee61482ef94 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -26,7 +26,7 @@ struct pid; struct drm_i915_private; struct drm_i915_file_private; struct i915_address_space; -struct i915_timeline; +struct intel_timeline; struct intel_ring; struct i915_gem_engines { @@ -77,7 +77,7 @@ struct i915_gem_context { struct i915_gem_engines __rcu *engines; struct mutex engines_mutex; /* guards writes to engines */ - struct i915_timeline *timeline; + struct intel_timeline *timeline; /** * @vm: unique address space (GTT) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 05011d4a3b88..8f721cf0ab99 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -38,7 +38,7 @@ static void i915_gem_park(struct drm_i915_private *i915) i915_gem_batch_pool_fini(&engine->batch_pool); } - i915_timelines_park(i915); + intel_timelines_park(i915); i915_vma_parked(i915); i915_globals_park(); diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 9bb6ff76680e..557b08b13feb 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -14,7 +14,7 @@ #include "i915_reg.h" #include "i915_request.h" #include "i915_selftest.h" -#include "i915_timeline.h" +#include "gt/intel_timeline.h" #include "intel_engine_types.h" #include "intel_gpu_commands.h" #include "intel_workarounds.h" @@ -200,7 +200,7 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) struct intel_ring * intel_engine_create_ring(struct intel_engine_cs *engine, - struct i915_timeline *timeline, + struct intel_timeline *timeline, int size); int intel_ring_pin(struct intel_ring *ring); void intel_ring_reset(struct intel_ring *ring, u32 tail); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 89edf97d8ad7..e30212e219ec 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -724,7 +724,7 @@ void intel_engines_set_scheduler_caps(struct drm_i915_private *i915) struct measure_breadcrumb { struct i915_request rq; - struct i915_timeline timeline; + struct intel_timeline timeline; struct intel_ring ring; u32 cs[1024]; }; @@ -740,9 +740,9 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine) if (!frame) return -ENOMEM; - if (i915_timeline_init(&frame->timeline, - engine->gt, - engine->status_page.vma)) + if (intel_timeline_init(&frame->timeline, + engine->gt, + engine->status_page.vma)) goto out_frame; INIT_LIST_HEAD(&frame->ring.request_list); @@ -757,17 +757,17 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine) frame->rq.ring = &frame->ring; frame->rq.timeline = &frame->timeline; - dw = i915_timeline_pin(&frame->timeline); + dw = intel_timeline_pin(&frame->timeline); if (dw < 0) goto out_timeline; dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs; GEM_BUG_ON(dw & 1); /* RING_TAIL must be qword aligned */ - i915_timeline_unpin(&frame->timeline); + intel_timeline_unpin(&frame->timeline); out_timeline: - i915_timeline_fini(&frame->timeline); + intel_timeline_fini(&frame->timeline); out_frame: kfree(frame); return dw; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index fb65e96fa36b..7e056114344e 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -20,7 +20,7 @@ #include "i915_pmu.h" #include "i915_priolist_types.h" #include "i915_selftest.h" -#include "i915_timeline_types.h" +#include "gt/intel_timeline_types.h" #include "intel_sseu.h" #include "intel_wakeref.h" #include "intel_workarounds_types.h" @@ -68,7 +68,7 @@ struct intel_ring { struct i915_vma *vma; void *vaddr; - struct i915_timeline *timeline; + struct intel_timeline *timeline; struct list_head request_list; struct list_head active_link; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 415fdf2eb997..3abcec3e4e0e 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -3005,13 +3005,13 @@ err_unpin_ctx: return ret; } -static struct i915_timeline * +static struct intel_timeline * get_timeline(struct i915_gem_context *ctx, struct intel_gt *gt) { if (ctx->timeline) - return i915_timeline_get(ctx->timeline); + return intel_timeline_get(ctx->timeline); else - return i915_timeline_create(gt, NULL); + return intel_timeline_create(gt, NULL); } static int execlists_context_deferred_alloc(struct intel_context *ce, @@ -3021,7 +3021,7 @@ static int execlists_context_deferred_alloc(struct intel_context *ce, struct i915_vma *vma; u32 context_size; struct intel_ring *ring; - struct i915_timeline *timeline; + struct intel_timeline *timeline; int ret; if (ce->state) @@ -3054,7 +3054,7 @@ static int execlists_context_deferred_alloc(struct intel_context *ce, ring = intel_engine_create_ring(engine, timeline, ce->gem_context->ring_size); - i915_timeline_put(timeline); + intel_timeline_put(timeline); if (IS_ERR(ring)) { ret = PTR_ERR(ring); goto error_deref_obj; diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 5297b3acb56d..3c925af64793 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -851,7 +851,7 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) static bool __i915_gem_unset_wedged(struct drm_i915_private *i915) { struct i915_gpu_error *error = &i915->gpu_error; - struct i915_timeline *tl; + struct intel_timeline *tl; if (!test_bit(I915_WEDGED, &error->flags)) return true; diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c index 9a748be0ce0c..aa483bba04bf 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -1156,7 +1156,7 @@ int intel_ring_pin(struct intel_ring *ring) if (atomic_fetch_inc(&ring->pin_count)) return 0; - ret = i915_timeline_pin(ring->timeline); + ret = intel_timeline_pin(ring->timeline); if (ret) goto err_unpin; @@ -1194,7 +1194,7 @@ int intel_ring_pin(struct intel_ring *ring) err_ring: i915_vma_unpin(vma); err_timeline: - i915_timeline_unpin(ring->timeline); + intel_timeline_unpin(ring->timeline); err_unpin: atomic_dec(&ring->pin_count); return ret; @@ -1231,7 +1231,7 @@ void intel_ring_unpin(struct intel_ring *ring) ring->vma->obj->pin_global--; i915_vma_unpin(ring->vma); - i915_timeline_unpin(ring->timeline); + intel_timeline_unpin(ring->timeline); } static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size) @@ -1267,7 +1267,7 @@ err: struct intel_ring * intel_engine_create_ring(struct intel_engine_cs *engine, - struct i915_timeline *timeline, + struct intel_timeline *timeline, int size) { struct drm_i915_private *i915 = engine->i915; @@ -1283,7 +1283,7 @@ intel_engine_create_ring(struct intel_engine_cs *engine, kref_init(&ring->ref); INIT_LIST_HEAD(&ring->request_list); - ring->timeline = i915_timeline_get(timeline); + ring->timeline = intel_timeline_get(timeline); ring->size = size; /* Workaround an erratum on the i830 which causes a hang if @@ -1313,7 +1313,7 @@ void intel_ring_free(struct kref *ref) i915_vma_close(ring->vma); i915_vma_put(ring->vma); - i915_timeline_put(ring->timeline); + intel_timeline_put(ring->timeline); kfree(ring); } @@ -2269,11 +2269,11 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine) int intel_ring_submission_init(struct intel_engine_cs *engine) { - struct i915_timeline *timeline; + struct intel_timeline *timeline; struct intel_ring *ring; int err; - timeline = i915_timeline_create(engine->gt, engine->status_page.vma); + timeline = intel_timeline_create(engine->gt, engine->status_page.vma); if (IS_ERR(timeline)) { err = PTR_ERR(timeline); goto err; @@ -2281,7 +2281,7 @@ int intel_ring_submission_init(struct intel_engine_cs *engine) GEM_BUG_ON(timeline->has_initial_breadcrumb); ring = intel_engine_create_ring(engine, timeline, 32 * PAGE_SIZE); - i915_timeline_put(timeline); + intel_timeline_put(timeline); if (IS_ERR(ring)) { err = PTR_ERR(ring); goto err; diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c new file mode 100644 index 000000000000..1a3f04458730 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -0,0 +1,591 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2016-2018 Intel Corporation + */ + +#include "gt/intel_gt_types.h" + +#include "i915_drv.h" + +#include "i915_active.h" +#include "i915_syncmap.h" +#include "gt/intel_timeline.h" + +#define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit))) +#define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit)) + +struct intel_timeline_hwsp { + struct intel_gt *gt; + struct i915_gt_timelines *gt_timelines; + struct list_head free_link; + struct i915_vma *vma; + u64 free_bitmap; +}; + +struct intel_timeline_cacheline { + struct i915_active active; + struct intel_timeline_hwsp *hwsp; + void *vaddr; +#define CACHELINE_BITS 6 +#define CACHELINE_FREE CACHELINE_BITS +}; + +static struct i915_vma *__hwsp_alloc(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); + + vma = i915_vma_instance(obj, >->ggtt->vm, NULL); + if (IS_ERR(vma)) + i915_gem_object_put(obj); + + return vma; +} + +static struct i915_vma * +hwsp_alloc(struct intel_timeline *timeline, unsigned int *cacheline) +{ + struct i915_gt_timelines *gt = &timeline->gt->timelines; + struct intel_timeline_hwsp *hwsp; + + BUILD_BUG_ON(BITS_PER_TYPE(u64) * CACHELINE_BYTES > PAGE_SIZE); + + spin_lock_irq(>->hwsp_lock); + + /* hwsp_free_list only contains HWSP that have available cachelines */ + hwsp = list_first_entry_or_null(>->hwsp_free_list, + typeof(*hwsp), free_link); + if (!hwsp) { + struct i915_vma *vma; + + spin_unlock_irq(>->hwsp_lock); + + hwsp = kmalloc(sizeof(*hwsp), GFP_KERNEL); + if (!hwsp) + return ERR_PTR(-ENOMEM); + + vma = __hwsp_alloc(timeline->gt); + if (IS_ERR(vma)) { + kfree(hwsp); + return vma; + } + + vma->private = hwsp; + hwsp->gt = timeline->gt; + hwsp->vma = vma; + hwsp->free_bitmap = ~0ull; + hwsp->gt_timelines = gt; + + spin_lock_irq(>->hwsp_lock); + list_add(&hwsp->free_link, >->hwsp_free_list); + } + + GEM_BUG_ON(!hwsp->free_bitmap); + *cacheline = __ffs64(hwsp->free_bitmap); + hwsp->free_bitmap &= ~BIT_ULL(*cacheline); + if (!hwsp->free_bitmap) + list_del(&hwsp->free_link); + + spin_unlock_irq(>->hwsp_lock); + + GEM_BUG_ON(hwsp->vma->private != hwsp); + return hwsp->vma; +} + +static void __idle_hwsp_free(struct intel_timeline_hwsp *hwsp, int cacheline) +{ + struct i915_gt_timelines *gt = hwsp->gt_timelines; + unsigned long flags; + + spin_lock_irqsave(>->hwsp_lock, flags); + + /* As a cacheline becomes available, publish the HWSP on the freelist */ + if (!hwsp->free_bitmap) + list_add_tail(&hwsp->free_link, >->hwsp_free_list); + + GEM_BUG_ON(cacheline >= BITS_PER_TYPE(hwsp->free_bitmap)); + hwsp->free_bitmap |= BIT_ULL(cacheline); + + /* And if no one is left using it, give the page back to the system */ + if (hwsp->free_bitmap == ~0ull) { + i915_vma_put(hwsp->vma); + list_del(&hwsp->free_link); + kfree(hwsp); + } + + spin_unlock_irqrestore(>->hwsp_lock, flags); +} + +static void __idle_cacheline_free(struct intel_timeline_cacheline *cl) +{ + GEM_BUG_ON(!i915_active_is_idle(&cl->active)); + + i915_gem_object_unpin_map(cl->hwsp->vma->obj); + i915_vma_put(cl->hwsp->vma); + __idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS)); + + i915_active_fini(&cl->active); + kfree(cl); +} + +static void __cacheline_retire(struct i915_active *active) +{ + struct intel_timeline_cacheline *cl = + container_of(active, typeof(*cl), active); + + i915_vma_unpin(cl->hwsp->vma); + if (ptr_test_bit(cl->vaddr, CACHELINE_FREE)) + __idle_cacheline_free(cl); +} + +static struct intel_timeline_cacheline * +cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline) +{ + struct intel_timeline_cacheline *cl; + void *vaddr; + + GEM_BUG_ON(cacheline >= BIT(CACHELINE_BITS)); + + cl = kmalloc(sizeof(*cl), GFP_KERNEL); + if (!cl) + return ERR_PTR(-ENOMEM); + + vaddr = i915_gem_object_pin_map(hwsp->vma->obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + kfree(cl); + return ERR_CAST(vaddr); + } + + i915_vma_get(hwsp->vma); + cl->hwsp = hwsp; + cl->vaddr = page_pack_bits(vaddr, cacheline); + + i915_active_init(hwsp->gt->i915, &cl->active, __cacheline_retire); + + return cl; +} + +static void cacheline_acquire(struct intel_timeline_cacheline *cl) +{ + if (cl && i915_active_acquire(&cl->active)) + __i915_vma_pin(cl->hwsp->vma); +} + +static void cacheline_release(struct intel_timeline_cacheline *cl) +{ + if (cl) + i915_active_release(&cl->active); +} + +static void cacheline_free(struct intel_timeline_cacheline *cl) +{ + GEM_BUG_ON(ptr_test_bit(cl->vaddr, CACHELINE_FREE)); + cl->vaddr = ptr_set_bit(cl->vaddr, CACHELINE_FREE); + + if (i915_active_is_idle(&cl->active)) + __idle_cacheline_free(cl); +} + +int intel_timeline_init(struct intel_timeline *timeline, + struct intel_gt *gt, + struct i915_vma *hwsp) +{ + void *vaddr; + + /* + * Ideally we want a set of engines on a single leaf as we expect + * to mostly be tracking synchronisation between engines. It is not + * a huge issue if this is not the case, but we may want to mitigate + * any page crossing penalties if they become an issue. + * + * Called during early_init before we know how many engines there are. + */ + BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES); + + timeline->gt = gt; + timeline->pin_count = 0; + timeline->has_initial_breadcrumb = !hwsp; + timeline->hwsp_cacheline = NULL; + + if (!hwsp) { + struct intel_timeline_cacheline *cl; + unsigned int cacheline; + + hwsp = hwsp_alloc(timeline, &cacheline); + if (IS_ERR(hwsp)) + return PTR_ERR(hwsp); + + cl = cacheline_alloc(hwsp->private, cacheline); + if (IS_ERR(cl)) { + __idle_hwsp_free(hwsp->private, cacheline); + return PTR_ERR(cl); + } + + timeline->hwsp_cacheline = cl; + timeline->hwsp_offset = cacheline * CACHELINE_BYTES; + + vaddr = page_mask_bits(cl->vaddr); + } else { + timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR; + + vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB); + if (IS_ERR(vaddr)) + return PTR_ERR(vaddr); + } + + timeline->hwsp_seqno = + memset(vaddr + timeline->hwsp_offset, 0, CACHELINE_BYTES); + + timeline->hwsp_ggtt = i915_vma_get(hwsp); + GEM_BUG_ON(timeline->hwsp_offset >= hwsp->size); + + timeline->fence_context = dma_fence_context_alloc(1); + + mutex_init(&timeline->mutex); + + INIT_ACTIVE_REQUEST(&timeline->last_request); + INIT_LIST_HEAD(&timeline->requests); + + i915_syncmap_init(&timeline->sync); + + return 0; +} + +static void timelines_init(struct intel_gt *gt) +{ + struct i915_gt_timelines *timelines = >->timelines; + + mutex_init(&timelines->mutex); + INIT_LIST_HEAD(&timelines->active_list); + + spin_lock_init(&timelines->hwsp_lock); + INIT_LIST_HEAD(&timelines->hwsp_free_list); + + /* via i915_gem_wait_for_idle() */ + i915_gem_shrinker_taints_mutex(gt->i915, &timelines->mutex); +} + +void intel_timelines_init(struct drm_i915_private *i915) +{ + timelines_init(&i915->gt); +} + +static void timeline_add_to_active(struct intel_timeline *tl) +{ + struct i915_gt_timelines *gt = &tl->gt->timelines; + + mutex_lock(>->mutex); + list_add(&tl->link, >->active_list); + mutex_unlock(>->mutex); +} + +static void timeline_remove_from_active(struct intel_timeline *tl) +{ + struct i915_gt_timelines *gt = &tl->gt->timelines; + + mutex_lock(>->mutex); + list_del(&tl->link); + mutex_unlock(>->mutex); +} + +static void timelines_park(struct intel_gt *gt) +{ + struct i915_gt_timelines *timelines = >->timelines; + struct intel_timeline *timeline; + + mutex_lock(&timelines->mutex); + list_for_each_entry(timeline, &timelines->active_list, link) { + /* + * All known fences are completed so we can scrap + * the current sync point tracking and start afresh, + * any attempt to wait upon a previous sync point + * will be skipped as the fence was signaled. + */ + i915_syncmap_free(&timeline->sync); + } + mutex_unlock(&timelines->mutex); +} + +/** + * intel_timelines_park - called when the driver idles + * @i915: the drm_i915_private device + * + * When the driver is completely idle, we know that all of our sync points + * have been signaled and our tracking is then entirely redundant. Any request + * to wait upon an older sync point will be completed instantly as we know + * the fence is signaled and therefore we will not even look them up in the + * sync point map. + */ +void intel_timelines_park(struct drm_i915_private *i915) +{ + timelines_park(&i915->gt); +} + +void intel_timeline_fini(struct intel_timeline *timeline) +{ + GEM_BUG_ON(timeline->pin_count); + GEM_BUG_ON(!list_empty(&timeline->requests)); + + i915_syncmap_free(&timeline->sync); + + if (timeline->hwsp_cacheline) + cacheline_free(timeline->hwsp_cacheline); + else + i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj); + + i915_vma_put(timeline->hwsp_ggtt); +} + +struct intel_timeline * +intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp) +{ + struct intel_timeline *timeline; + int err; + + timeline = kzalloc(sizeof(*timeline), GFP_KERNEL); + if (!timeline) + return ERR_PTR(-ENOMEM); + + err = intel_timeline_init(timeline, gt, global_hwsp); + if (err) { + kfree(timeline); + return ERR_PTR(err); + } + + kref_init(&timeline->kref); + + return timeline; +} + +int intel_timeline_pin(struct intel_timeline *tl) +{ + int err; + + if (tl->pin_count++) + return 0; + GEM_BUG_ON(!tl->pin_count); + + err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH); + if (err) + goto unpin; + + tl->hwsp_offset = + i915_ggtt_offset(tl->hwsp_ggtt) + + offset_in_page(tl->hwsp_offset); + + cacheline_acquire(tl->hwsp_cacheline); + timeline_add_to_active(tl); + + return 0; + +unpin: + tl->pin_count = 0; + return err; +} + +static u32 timeline_advance(struct intel_timeline *tl) +{ + GEM_BUG_ON(!tl->pin_count); + GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb); + + return tl->seqno += 1 + tl->has_initial_breadcrumb; +} + +static void timeline_rollback(struct intel_timeline *tl) +{ + tl->seqno -= 1 + tl->has_initial_breadcrumb; +} + +static noinline int +__intel_timeline_get_seqno(struct intel_timeline *tl, + struct i915_request *rq, + u32 *seqno) +{ + struct intel_timeline_cacheline *cl; + unsigned int cacheline; + struct i915_vma *vma; + void *vaddr; + int err; + + /* + * If there is an outstanding GPU reference to this cacheline, + * such as it being sampled by a HW semaphore on another timeline, + * we cannot wraparound our seqno value (the HW semaphore does + * a strict greater-than-or-equals compare, not i915_seqno_passed). + * So if the cacheline is still busy, we must detach ourselves + * from it and leave it inflight alongside its users. + * + * However, if nobody is watching and we can guarantee that nobody + * will, we could simply reuse the same cacheline. + * + * if (i915_active_request_is_signaled(&tl->last_request) && + * i915_active_is_signaled(&tl->hwsp_cacheline->active)) + * return 0; + * + * That seems unlikely for a busy timeline that needed to wrap in + * the first place, so just replace the cacheline. + */ + + vma = hwsp_alloc(tl, &cacheline); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_rollback; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); + if (err) { + __idle_hwsp_free(vma->private, cacheline); + goto err_rollback; + } + + cl = cacheline_alloc(vma->private, cacheline); + if (IS_ERR(cl)) { + err = PTR_ERR(cl); + __idle_hwsp_free(vma->private, cacheline); + goto err_unpin; + } + GEM_BUG_ON(cl->hwsp->vma != vma); + + /* + * Attach the old cacheline to the current request, so that we only + * free it after the current request is retired, which ensures that + * all writes into the cacheline from previous requests are complete. + */ + err = i915_active_ref(&tl->hwsp_cacheline->active, + tl->fence_context, rq); + if (err) + goto err_cacheline; + + cacheline_release(tl->hwsp_cacheline); /* ownership now xfered to rq */ + cacheline_free(tl->hwsp_cacheline); + + i915_vma_unpin(tl->hwsp_ggtt); /* binding kept alive by old cacheline */ + i915_vma_put(tl->hwsp_ggtt); + + tl->hwsp_ggtt = i915_vma_get(vma); + + vaddr = page_mask_bits(cl->vaddr); + tl->hwsp_offset = cacheline * CACHELINE_BYTES; + tl->hwsp_seqno = + memset(vaddr + tl->hwsp_offset, 0, CACHELINE_BYTES); + + tl->hwsp_offset += i915_ggtt_offset(vma); + + cacheline_acquire(cl); + tl->hwsp_cacheline = cl; + + *seqno = timeline_advance(tl); + GEM_BUG_ON(i915_seqno_passed(*tl->hwsp_seqno, *seqno)); + return 0; + +err_cacheline: + cacheline_free(cl); +err_unpin: + i915_vma_unpin(vma); +err_rollback: + timeline_rollback(tl); + return err; +} + +int intel_timeline_get_seqno(struct intel_timeline *tl, + struct i915_request *rq, + u32 *seqno) +{ + *seqno = timeline_advance(tl); + + /* Replace the HWSP on wraparound for HW semaphores */ + if (unlikely(!*seqno && tl->hwsp_cacheline)) + return __intel_timeline_get_seqno(tl, rq, seqno); + + return 0; +} + +static int cacheline_ref(struct intel_timeline_cacheline *cl, + struct i915_request *rq) +{ + return i915_active_ref(&cl->active, rq->fence.context, rq); +} + +int intel_timeline_read_hwsp(struct i915_request *from, + struct i915_request *to, + u32 *hwsp) +{ + struct intel_timeline_cacheline *cl = from->hwsp_cacheline; + struct intel_timeline *tl = from->timeline; + int err; + + GEM_BUG_ON(to->timeline == tl); + + mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING); + err = i915_request_completed(from); + if (!err) + err = cacheline_ref(cl, to); + if (!err) { + if (likely(cl == tl->hwsp_cacheline)) { + *hwsp = tl->hwsp_offset; + } else { /* across a seqno wrap, recover the original offset */ + *hwsp = i915_ggtt_offset(cl->hwsp->vma) + + ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) * + CACHELINE_BYTES; + } + } + mutex_unlock(&tl->mutex); + + return err; +} + +void intel_timeline_unpin(struct intel_timeline *tl) +{ + GEM_BUG_ON(!tl->pin_count); + if (--tl->pin_count) + return; + + timeline_remove_from_active(tl); + cacheline_release(tl->hwsp_cacheline); + + /* + * Since this timeline is idle, all bariers upon which we were waiting + * must also be complete and so we can discard the last used barriers + * without loss of information. + */ + i915_syncmap_free(&tl->sync); + + __i915_vma_unpin(tl->hwsp_ggtt); +} + +void __intel_timeline_free(struct kref *kref) +{ + struct intel_timeline *timeline = + container_of(kref, typeof(*timeline), kref); + + intel_timeline_fini(timeline); + kfree(timeline); +} + +static void timelines_fini(struct intel_gt *gt) +{ + struct i915_gt_timelines *timelines = >->timelines; + + GEM_BUG_ON(!list_empty(&timelines->active_list)); + GEM_BUG_ON(!list_empty(&timelines->hwsp_free_list)); + + mutex_destroy(&timelines->mutex); +} + +void intel_timelines_fini(struct drm_i915_private *i915) +{ + timelines_fini(&i915->gt); +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "gt/selftests/mock_timeline.c" +#include "gt/selftest_timeline.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h new file mode 100644 index 000000000000..e08cebf64833 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_timeline.h @@ -0,0 +1,93 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef I915_TIMELINE_H +#define I915_TIMELINE_H + +#include + +#include "i915_active.h" +#include "i915_syncmap.h" +#include "gt/intel_timeline_types.h" + +int intel_timeline_init(struct intel_timeline *tl, + struct intel_gt *gt, + struct i915_vma *hwsp); +void intel_timeline_fini(struct intel_timeline *tl); + +struct intel_timeline * +intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp); + +static inline struct intel_timeline * +intel_timeline_get(struct intel_timeline *timeline) +{ + kref_get(&timeline->kref); + return timeline; +} + +void __intel_timeline_free(struct kref *kref); +static inline void intel_timeline_put(struct intel_timeline *timeline) +{ + kref_put(&timeline->kref, __intel_timeline_free); +} + +static inline int __intel_timeline_sync_set(struct intel_timeline *tl, + u64 context, u32 seqno) +{ + return i915_syncmap_set(&tl->sync, context, seqno); +} + +static inline int intel_timeline_sync_set(struct intel_timeline *tl, + const struct dma_fence *fence) +{ + return __intel_timeline_sync_set(tl, fence->context, fence->seqno); +} + +static inline bool __intel_timeline_sync_is_later(struct intel_timeline *tl, + u64 context, u32 seqno) +{ + return i915_syncmap_is_later(&tl->sync, context, seqno); +} + +static inline bool intel_timeline_sync_is_later(struct intel_timeline *tl, + const struct dma_fence *fence) +{ + return __intel_timeline_sync_is_later(tl, fence->context, fence->seqno); +} + +int intel_timeline_pin(struct intel_timeline *tl); +int intel_timeline_get_seqno(struct intel_timeline *tl, + struct i915_request *rq, + u32 *seqno); +void intel_timeline_unpin(struct intel_timeline *tl); + +int intel_timeline_read_hwsp(struct i915_request *from, + struct i915_request *until, + u32 *hwsp_offset); + +void intel_timelines_init(struct drm_i915_private *i915); +void intel_timelines_park(struct drm_i915_private *i915); +void intel_timelines_fini(struct drm_i915_private *i915); + +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h new file mode 100644 index 000000000000..9a71aea7a338 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h @@ -0,0 +1,67 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2016 Intel Corporation + */ + +#ifndef __I915_TIMELINE_TYPES_H__ +#define __I915_TIMELINE_TYPES_H__ + +#include +#include +#include +#include + +#include "i915_active_types.h" + +struct drm_i915_private; +struct i915_vma; +struct intel_timeline_cacheline; +struct i915_syncmap; + +struct intel_timeline { + u64 fence_context; + u32 seqno; + + struct mutex mutex; /* protects the flow of requests */ + + unsigned int pin_count; + const u32 *hwsp_seqno; + struct i915_vma *hwsp_ggtt; + u32 hwsp_offset; + + struct intel_timeline_cacheline *hwsp_cacheline; + + bool has_initial_breadcrumb; + + /** + * List of breadcrumbs associated with GPU requests currently + * outstanding. + */ + struct list_head requests; + + /* Contains an RCU guarded pointer to the last request. No reference is + * held to the request, users must carefully acquire a reference to + * the request using i915_active_request_get_request_rcu(), or hold the + * struct_mutex. + */ + struct i915_active_request last_request; + + /** + * We track the most recent seqno that we wait on in every context so + * that we only have to emit a new await and dependency on a more + * recent sync point. As the contexts may be executed out-of-order, we + * have to track each individually and can not rely on an absolute + * global_seqno. When we know that all tracked fences are completed + * (i.e. when the driver is idle), we know that the syncmap is + * redundant and we can discard it without loss of generality. + */ + struct i915_syncmap *sync; + + struct list_head link; + struct intel_gt *gt; + + struct kref kref; +}; + +#endif /* __I915_TIMELINE_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 423027aa71cd..bf0974b12f3d 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -33,15 +33,15 @@ struct mock_ring { struct intel_ring base; - struct i915_timeline timeline; + struct intel_timeline timeline; }; -static void mock_timeline_pin(struct i915_timeline *tl) +static void mock_timeline_pin(struct intel_timeline *tl) { tl->pin_count++; } -static void mock_timeline_unpin(struct i915_timeline *tl) +static void mock_timeline_unpin(struct intel_timeline *tl) { GEM_BUG_ON(!tl->pin_count); tl->pin_count--; @@ -56,7 +56,7 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine) if (!ring) return NULL; - if (i915_timeline_init(&ring->timeline, engine->gt, NULL)) { + if (intel_timeline_init(&ring->timeline, engine->gt, NULL)) { kfree(ring); return NULL; } @@ -78,7 +78,7 @@ static void mock_ring_free(struct intel_ring *base) { struct mock_ring *ring = container_of(base, typeof(*ring), base); - i915_timeline_fini(&ring->timeline); + intel_timeline_fini(&ring->timeline); kfree(ring); } diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c new file mode 100644 index 000000000000..193cc564ade2 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c @@ -0,0 +1,845 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2017-2018 Intel Corporation + */ + +#include + +#include "gem/i915_gem_pm.h" + +#include "../selftests/i915_random.h" +#include "../i915_selftest.h" + +#include "../selftests/igt_flush_test.h" +#include "../selftests/mock_gem_device.h" +#include "selftests/mock_timeline.h" + +static struct page *hwsp_page(struct intel_timeline *tl) +{ + struct drm_i915_gem_object *obj = tl->hwsp_ggtt->obj; + + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); + return sg_page(obj->mm.pages->sgl); +} + +static unsigned long hwsp_cacheline(struct intel_timeline *tl) +{ + unsigned long address = (unsigned long)page_address(hwsp_page(tl)); + + return (address + tl->hwsp_offset) / CACHELINE_BYTES; +} + +#define CACHELINES_PER_PAGE (PAGE_SIZE / CACHELINE_BYTES) + +struct mock_hwsp_freelist { + struct drm_i915_private *i915; + struct radix_tree_root cachelines; + struct intel_timeline **history; + unsigned long count, max; + struct rnd_state prng; +}; + +enum { + SHUFFLE = BIT(0), +}; + +static void __mock_hwsp_record(struct mock_hwsp_freelist *state, + unsigned int idx, + struct intel_timeline *tl) +{ + tl = xchg(&state->history[idx], tl); + if (tl) { + radix_tree_delete(&state->cachelines, hwsp_cacheline(tl)); + intel_timeline_put(tl); + } +} + +static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state, + unsigned int count, + unsigned int flags) +{ + struct intel_timeline *tl; + unsigned int idx; + + while (count--) { + unsigned long cacheline; + int err; + + tl = intel_timeline_create(&state->i915->gt, NULL); + if (IS_ERR(tl)) + return PTR_ERR(tl); + + cacheline = hwsp_cacheline(tl); + err = radix_tree_insert(&state->cachelines, cacheline, tl); + if (err) { + if (err == -EEXIST) { + pr_err("HWSP cacheline %lu already used; duplicate allocation!\n", + cacheline); + } + intel_timeline_put(tl); + return err; + } + + idx = state->count++ % state->max; + __mock_hwsp_record(state, idx, tl); + } + + if (flags & SHUFFLE) + i915_prandom_shuffle(state->history, + sizeof(*state->history), + min(state->count, state->max), + &state->prng); + + count = i915_prandom_u32_max_state(min(state->count, state->max), + &state->prng); + while (count--) { + idx = --state->count % state->max; + __mock_hwsp_record(state, idx, NULL); + } + + return 0; +} + +static int mock_hwsp_freelist(void *arg) +{ + struct mock_hwsp_freelist state; + const struct { + const char *name; + unsigned int flags; + } phases[] = { + { "linear", 0 }, + { "shuffled", SHUFFLE }, + { }, + }, *p; + unsigned int na; + int err = 0; + + INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL); + state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed); + + state.i915 = mock_gem_device(); + if (!state.i915) + return -ENOMEM; + + /* + * Create a bunch of timelines and check that their HWSP do not overlap. + * Free some, and try again. + */ + + state.max = PAGE_SIZE / sizeof(*state.history); + state.count = 0; + state.history = kcalloc(state.max, sizeof(*state.history), GFP_KERNEL); + if (!state.history) { + err = -ENOMEM; + goto err_put; + } + + mutex_lock(&state.i915->drm.struct_mutex); + for (p = phases; p->name; p++) { + pr_debug("%s(%s)\n", __func__, p->name); + for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) { + err = __mock_hwsp_timeline(&state, na, p->flags); + if (err) + goto out; + } + } + +out: + for (na = 0; na < state.max; na++) + __mock_hwsp_record(&state, na, NULL); + mutex_unlock(&state.i915->drm.struct_mutex); + kfree(state.history); +err_put: + drm_dev_put(&state.i915->drm); + return err; +} + +struct __igt_sync { + const char *name; + u32 seqno; + bool expected; + bool set; +}; + +static int __igt_sync(struct intel_timeline *tl, + u64 ctx, + const struct __igt_sync *p, + const char *name) +{ + int ret; + + if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) { + pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n", + name, p->name, ctx, p->seqno, yesno(p->expected)); + return -EINVAL; + } + + if (p->set) { + ret = __intel_timeline_sync_set(tl, ctx, p->seqno); + if (ret) + return ret; + } + + return 0; +} + +static int igt_sync(void *arg) +{ + const struct __igt_sync pass[] = { + { "unset", 0, false, false }, + { "new", 0, false, true }, + { "0a", 0, true, true }, + { "1a", 1, false, true }, + { "1b", 1, true, true }, + { "0b", 0, true, false }, + { "2a", 2, false, true }, + { "4", 4, false, true }, + { "INT_MAX", INT_MAX, false, true }, + { "INT_MAX-1", INT_MAX-1, true, false }, + { "INT_MAX+1", (u32)INT_MAX+1, false, true }, + { "INT_MAX", INT_MAX, true, false }, + { "UINT_MAX", UINT_MAX, false, true }, + { "wrap", 0, false, true }, + { "unwrap", UINT_MAX, true, false }, + {}, + }, *p; + struct intel_timeline tl; + int order, offset; + int ret = -ENODEV; + + mock_timeline_init(&tl, 0); + for (p = pass; p->name; p++) { + for (order = 1; order < 64; order++) { + for (offset = -1; offset <= (order > 1); offset++) { + u64 ctx = BIT_ULL(order) + offset; + + ret = __igt_sync(&tl, ctx, p, "1"); + if (ret) + goto out; + } + } + } + mock_timeline_fini(&tl); + + mock_timeline_init(&tl, 0); + for (order = 1; order < 64; order++) { + for (offset = -1; offset <= (order > 1); offset++) { + u64 ctx = BIT_ULL(order) + offset; + + for (p = pass; p->name; p++) { + ret = __igt_sync(&tl, ctx, p, "2"); + if (ret) + goto out; + } + } + } + +out: + mock_timeline_fini(&tl); + return ret; +} + +static unsigned int random_engine(struct rnd_state *rnd) +{ + return i915_prandom_u32_max_state(I915_NUM_ENGINES, rnd); +} + +static int bench_sync(void *arg) +{ + struct rnd_state prng; + struct intel_timeline tl; + unsigned long end_time, count; + u64 prng32_1M; + ktime_t kt; + int order, last_order; + + mock_timeline_init(&tl, 0); + + /* Lookups from cache are very fast and so the random number generation + * and the loop itself becomes a significant factor in the per-iteration + * timings. We try to compensate the results by measuring the overhead + * of the prng and subtract it from the reported results. + */ + prandom_seed_state(&prng, i915_selftest.random_seed); + count = 0; + kt = ktime_get(); + end_time = jiffies + HZ/10; + do { + u32 x; + + /* Make sure the compiler doesn't optimise away the prng call */ + WRITE_ONCE(x, prandom_u32_state(&prng)); + + count++; + } while (!time_after(jiffies, end_time)); + kt = ktime_sub(ktime_get(), kt); + pr_debug("%s: %lu random evaluations, %lluns/prng\n", + __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); + prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count); + + /* Benchmark (only) setting random context ids */ + prandom_seed_state(&prng, i915_selftest.random_seed); + count = 0; + kt = ktime_get(); + end_time = jiffies + HZ/10; + do { + u64 id = i915_prandom_u64_state(&prng); + + __intel_timeline_sync_set(&tl, id, 0); + count++; + } while (!time_after(jiffies, end_time)); + kt = ktime_sub(ktime_get(), kt); + kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); + pr_info("%s: %lu random insertions, %lluns/insert\n", + __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); + + /* Benchmark looking up the exact same context ids as we just set */ + prandom_seed_state(&prng, i915_selftest.random_seed); + end_time = count; + kt = ktime_get(); + while (end_time--) { + u64 id = i915_prandom_u64_state(&prng); + + if (!__intel_timeline_sync_is_later(&tl, id, 0)) { + mock_timeline_fini(&tl); + pr_err("Lookup of %llu failed\n", id); + return -EINVAL; + } + } + kt = ktime_sub(ktime_get(), kt); + kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); + pr_info("%s: %lu random lookups, %lluns/lookup\n", + __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); + + mock_timeline_fini(&tl); + cond_resched(); + + mock_timeline_init(&tl, 0); + + /* Benchmark setting the first N (in order) contexts */ + count = 0; + kt = ktime_get(); + end_time = jiffies + HZ/10; + do { + __intel_timeline_sync_set(&tl, count++, 0); + } while (!time_after(jiffies, end_time)); + kt = ktime_sub(ktime_get(), kt); + pr_info("%s: %lu in-order insertions, %lluns/insert\n", + __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); + + /* Benchmark looking up the exact same context ids as we just set */ + end_time = count; + kt = ktime_get(); + while (end_time--) { + if (!__intel_timeline_sync_is_later(&tl, end_time, 0)) { + pr_err("Lookup of %lu failed\n", end_time); + mock_timeline_fini(&tl); + return -EINVAL; + } + } + kt = ktime_sub(ktime_get(), kt); + pr_info("%s: %lu in-order lookups, %lluns/lookup\n", + __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); + + mock_timeline_fini(&tl); + cond_resched(); + + mock_timeline_init(&tl, 0); + + /* Benchmark searching for a random context id and maybe changing it */ + prandom_seed_state(&prng, i915_selftest.random_seed); + count = 0; + kt = ktime_get(); + end_time = jiffies + HZ/10; + do { + u32 id = random_engine(&prng); + u32 seqno = prandom_u32_state(&prng); + + if (!__intel_timeline_sync_is_later(&tl, id, seqno)) + __intel_timeline_sync_set(&tl, id, seqno); + + count++; + } while (!time_after(jiffies, end_time)); + kt = ktime_sub(ktime_get(), kt); + kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); + pr_info("%s: %lu repeated insert/lookups, %lluns/op\n", + __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); + mock_timeline_fini(&tl); + cond_resched(); + + /* Benchmark searching for a known context id and changing the seqno */ + for (last_order = 1, order = 1; order < 32; + ({ int tmp = last_order; last_order = order; order += tmp; })) { + unsigned int mask = BIT(order) - 1; + + mock_timeline_init(&tl, 0); + + count = 0; + kt = ktime_get(); + end_time = jiffies + HZ/10; + do { + /* Without assuming too many details of the underlying + * implementation, try to identify its phase-changes + * (if any)! + */ + u64 id = (u64)(count & mask) << order; + + __intel_timeline_sync_is_later(&tl, id, 0); + __intel_timeline_sync_set(&tl, id, 0); + + count++; + } while (!time_after(jiffies, end_time)); + kt = ktime_sub(ktime_get(), kt); + pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n", + __func__, count, order, + (long long)div64_ul(ktime_to_ns(kt), count)); + mock_timeline_fini(&tl); + cond_resched(); + } + + return 0; +} + +int intel_timeline_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(mock_hwsp_freelist), + SUBTEST(igt_sync), + SUBTEST(bench_sync), + }; + + return i915_subtests(tests, NULL); +} + +static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value) +{ + u32 *cs; + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + if (INTEL_GEN(rq->i915) >= 8) { + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = addr; + *cs++ = 0; + *cs++ = value; + } else if (INTEL_GEN(rq->i915) >= 4) { + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = 0; + *cs++ = addr; + *cs++ = value; + } else { + *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; + *cs++ = addr; + *cs++ = value; + *cs++ = MI_NOOP; + } + + intel_ring_advance(rq, cs); + + return 0; +} + +static struct i915_request * +tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value) +{ + struct i915_request *rq; + int err; + + lockdep_assert_held(&tl->gt->i915->drm.struct_mutex); /* lazy rq refs */ + + err = intel_timeline_pin(tl); + if (err) { + rq = ERR_PTR(err); + goto out; + } + + rq = i915_request_create(engine->kernel_context); + if (IS_ERR(rq)) + goto out_unpin; + + err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value); + i915_request_add(rq); + if (err) + rq = ERR_PTR(err); + +out_unpin: + intel_timeline_unpin(tl); +out: + if (IS_ERR(rq)) + pr_err("Failed to write to timeline!\n"); + return rq; +} + +static struct intel_timeline * +checked_intel_timeline_create(struct drm_i915_private *i915) +{ + struct intel_timeline *tl; + + tl = intel_timeline_create(&i915->gt, NULL); + if (IS_ERR(tl)) + return tl; + + if (*tl->hwsp_seqno != tl->seqno) { + pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n", + *tl->hwsp_seqno, tl->seqno); + intel_timeline_put(tl); + return ERR_PTR(-EINVAL); + } + + return tl; +} + +static int live_hwsp_engine(void *arg) +{ +#define NUM_TIMELINES 4096 + struct drm_i915_private *i915 = arg; + struct intel_timeline **timelines; + struct intel_engine_cs *engine; + enum intel_engine_id id; + intel_wakeref_t wakeref; + unsigned long count, n; + int err = 0; + + /* + * Create a bunch of timelines and check we can write + * independently to each of their breadcrumb slots. + */ + + timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES, + sizeof(*timelines), + GFP_KERNEL); + if (!timelines) + return -ENOMEM; + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(&i915->runtime_pm); + + count = 0; + for_each_engine(engine, i915, id) { + if (!intel_engine_can_store_dword(engine)) + continue; + + for (n = 0; n < NUM_TIMELINES; n++) { + struct intel_timeline *tl; + struct i915_request *rq; + + tl = checked_intel_timeline_create(i915); + if (IS_ERR(tl)) { + err = PTR_ERR(tl); + goto out; + } + + rq = tl_write(tl, engine, count); + if (IS_ERR(rq)) { + intel_timeline_put(tl); + err = PTR_ERR(rq); + goto out; + } + + timelines[count++] = tl; + } + } + +out: + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + + for (n = 0; n < count; n++) { + struct intel_timeline *tl = timelines[n]; + + if (!err && *tl->hwsp_seqno != n) { + pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n", + n, *tl->hwsp_seqno); + err = -EINVAL; + } + intel_timeline_put(tl); + } + + intel_runtime_pm_put(&i915->runtime_pm, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + + kvfree(timelines); + + return err; +#undef NUM_TIMELINES +} + +static int live_hwsp_alternate(void *arg) +{ +#define NUM_TIMELINES 4096 + struct drm_i915_private *i915 = arg; + struct intel_timeline **timelines; + struct intel_engine_cs *engine; + enum intel_engine_id id; + intel_wakeref_t wakeref; + unsigned long count, n; + int err = 0; + + /* + * Create a bunch of timelines and check we can write + * independently to each of their breadcrumb slots with adjacent + * engines. + */ + + timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES, + sizeof(*timelines), + GFP_KERNEL); + if (!timelines) + return -ENOMEM; + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(&i915->runtime_pm); + + count = 0; + for (n = 0; n < NUM_TIMELINES; n++) { + for_each_engine(engine, i915, id) { + struct intel_timeline *tl; + struct i915_request *rq; + + if (!intel_engine_can_store_dword(engine)) + continue; + + tl = checked_intel_timeline_create(i915); + if (IS_ERR(tl)) { + err = PTR_ERR(tl); + goto out; + } + + rq = tl_write(tl, engine, count); + if (IS_ERR(rq)) { + intel_timeline_put(tl); + err = PTR_ERR(rq); + goto out; + } + + timelines[count++] = tl; + } + } + +out: + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + + for (n = 0; n < count; n++) { + struct intel_timeline *tl = timelines[n]; + + if (!err && *tl->hwsp_seqno != n) { + pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n", + n, *tl->hwsp_seqno); + err = -EINVAL; + } + intel_timeline_put(tl); + } + + intel_runtime_pm_put(&i915->runtime_pm, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + + kvfree(timelines); + + return err; +#undef NUM_TIMELINES +} + +static int live_hwsp_wrap(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct intel_timeline *tl; + enum intel_engine_id id; + intel_wakeref_t wakeref; + int err = 0; + + /* + * Across a seqno wrap, we need to keep the old cacheline alive for + * foreign GPU references. + */ + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(&i915->runtime_pm); + + tl = intel_timeline_create(&i915->gt, NULL); + if (IS_ERR(tl)) { + err = PTR_ERR(tl); + goto out_rpm; + } + if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline) + goto out_free; + + err = intel_timeline_pin(tl); + if (err) + goto out_free; + + for_each_engine(engine, i915, id) { + const u32 *hwsp_seqno[2]; + struct i915_request *rq; + u32 seqno[2]; + + if (!intel_engine_can_store_dword(engine)) + continue; + + rq = i915_request_create(engine->kernel_context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out; + } + + tl->seqno = -4u; + + err = intel_timeline_get_seqno(tl, rq, &seqno[0]); + if (err) { + i915_request_add(rq); + goto out; + } + pr_debug("seqno[0]:%08x, hwsp_offset:%08x\n", + seqno[0], tl->hwsp_offset); + + err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[0]); + if (err) { + i915_request_add(rq); + goto out; + } + hwsp_seqno[0] = tl->hwsp_seqno; + + err = intel_timeline_get_seqno(tl, rq, &seqno[1]); + if (err) { + i915_request_add(rq); + goto out; + } + pr_debug("seqno[1]:%08x, hwsp_offset:%08x\n", + seqno[1], tl->hwsp_offset); + + err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[1]); + if (err) { + i915_request_add(rq); + goto out; + } + hwsp_seqno[1] = tl->hwsp_seqno; + + /* With wrap should come a new hwsp */ + GEM_BUG_ON(seqno[1] >= seqno[0]); + GEM_BUG_ON(hwsp_seqno[0] == hwsp_seqno[1]); + + i915_request_add(rq); + + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + pr_err("Wait for timeline writes timed out!\n"); + err = -EIO; + goto out; + } + + if (*hwsp_seqno[0] != seqno[0] || *hwsp_seqno[1] != seqno[1]) { + pr_err("Bad timeline values: found (%x, %x), expected (%x, %x)\n", + *hwsp_seqno[0], *hwsp_seqno[1], + seqno[0], seqno[1]); + err = -EINVAL; + goto out; + } + + i915_retire_requests(i915); /* recycle HWSP */ + } + +out: + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + + intel_timeline_unpin(tl); +out_free: + intel_timeline_put(tl); +out_rpm: + intel_runtime_pm_put(&i915->runtime_pm, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + + return err; +} + +static int live_hwsp_recycle(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + intel_wakeref_t wakeref; + unsigned long count; + int err = 0; + + /* + * Check seqno writes into one timeline at a time. We expect to + * recycle the breadcrumb slot between iterations and neither + * want to confuse ourselves or the GPU. + */ + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(&i915->runtime_pm); + + count = 0; + for_each_engine(engine, i915, id) { + IGT_TIMEOUT(end_time); + + if (!intel_engine_can_store_dword(engine)) + continue; + + do { + struct intel_timeline *tl; + struct i915_request *rq; + + tl = checked_intel_timeline_create(i915); + if (IS_ERR(tl)) { + err = PTR_ERR(tl); + goto out; + } + + rq = tl_write(tl, engine, count); + if (IS_ERR(rq)) { + intel_timeline_put(tl); + err = PTR_ERR(rq); + goto out; + } + + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + pr_err("Wait for timeline writes timed out!\n"); + intel_timeline_put(tl); + err = -EIO; + goto out; + } + + if (*tl->hwsp_seqno != count) { + pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n", + count, *tl->hwsp_seqno); + err = -EINVAL; + } + + intel_timeline_put(tl); + count++; + + if (err) + goto out; + + intel_timelines_park(i915); /* Encourage recycling! */ + } while (!__igt_timeout(end_time, NULL)); + } + +out: + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + intel_runtime_pm_put(&i915->runtime_pm, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + + return err; +} + +int intel_timeline_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_hwsp_recycle), + SUBTEST(live_hwsp_engine), + SUBTEST(live_hwsp_alternate), + SUBTEST(live_hwsp_wrap), + }; + + if (i915_terminally_wedged(i915)) + return 0; + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c new file mode 100644 index 000000000000..5c549205828a --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c @@ -0,0 +1,29 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2017-2018 Intel Corporation + */ + +#include "../intel_timeline.h" + +#include "mock_timeline.h" + +void mock_timeline_init(struct intel_timeline *timeline, u64 context) +{ + timeline->gt = NULL; + timeline->fence_context = context; + + mutex_init(&timeline->mutex); + + INIT_ACTIVE_REQUEST(&timeline->last_request); + INIT_LIST_HEAD(&timeline->requests); + + i915_syncmap_init(&timeline->sync); + + INIT_LIST_HEAD(&timeline->link); +} + +void mock_timeline_fini(struct intel_timeline *timeline) +{ + i915_syncmap_free(&timeline->sync); +} diff --git a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.h b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.h new file mode 100644 index 000000000000..689efc66c908 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.h @@ -0,0 +1,15 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2017-2018 Intel Corporation + */ + +#ifndef __MOCK_TIMELINE__ +#define __MOCK_TIMELINE__ + +struct intel_timeline; + +void mock_timeline_init(struct intel_timeline *timeline, u64 context); +void mock_timeline_fini(struct intel_timeline *timeline); + +#endif /* !__MOCK_TIMELINE__ */ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b574aea23581..89a21fa4eac2 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -89,7 +89,7 @@ #include "i915_gpu_error.h" #include "i915_request.h" #include "i915_scheduler.h" -#include "i915_timeline.h" +#include "gt/intel_timeline.h" #include "i915_vma.h" #include "intel_gvt.h" diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 50d7e1e8d8ad..6e07127242d9 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -909,7 +909,7 @@ wait_for_timelines(struct drm_i915_private *i915, unsigned int flags, long timeout) { struct i915_gt_timelines *gt = &i915->gt.timelines; - struct i915_timeline *tl; + struct intel_timeline *tl; mutex_lock(>->mutex); list_for_each_entry(tl, >->active_list, link) { @@ -1487,7 +1487,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv) dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1); - i915_timelines_init(dev_priv); + intel_timelines_init(dev_priv); ret = i915_gem_init_userptr(dev_priv); if (ret) @@ -1624,7 +1624,7 @@ err_uc_misc: if (ret != -EIO) { i915_gem_cleanup_userptr(dev_priv); - i915_timelines_fini(dev_priv); + intel_timelines_fini(dev_priv); } if (ret == -EIO) { @@ -1688,7 +1688,7 @@ void i915_gem_fini(struct drm_i915_private *dev_priv) intel_uc_fini_misc(dev_priv); i915_gem_cleanup_userptr(dev_priv); - i915_timelines_fini(dev_priv); + intel_timelines_fini(dev_priv); i915_gem_drain_freed_objects(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index e8b9ebe50c4e..028be3b44d07 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -47,7 +47,7 @@ #include "i915_request.h" #include "i915_scatterlist.h" #include "i915_selftest.h" -#include "i915_timeline.h" +#include "gt/intel_timeline.h" #define I915_GTT_PAGE_SIZE_4K BIT_ULL(12) #define I915_GTT_PAGE_SIZE_64K BIT_ULL(16) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 0c99694faab7..5ff87c4a0cd5 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -607,7 +607,7 @@ out: struct i915_request * __i915_request_create(struct intel_context *ce, gfp_t gfp) { - struct i915_timeline *tl = ce->ring->timeline; + struct intel_timeline *tl = ce->ring->timeline; struct i915_request *rq; u32 seqno; int ret; @@ -656,7 +656,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) } } - ret = i915_timeline_get_seqno(tl, rq, &seqno); + ret = intel_timeline_get_seqno(tl, rq, &seqno); if (ret) goto err_free; @@ -775,7 +775,7 @@ i915_request_await_start(struct i915_request *rq, struct i915_request *signal) return 0; signal = list_prev_entry(signal, ring_link); - if (i915_timeline_sync_is_later(rq->timeline, &signal->fence)) + if (intel_timeline_sync_is_later(rq->timeline, &signal->fence)) return 0; return i915_sw_fence_await_dma_fence(&rq->submit, @@ -829,7 +829,7 @@ emit_semaphore_wait(struct i915_request *to, return err; /* We need to pin the signaler's HWSP until we are finished reading. */ - err = i915_timeline_read_hwsp(from, to, &hwsp_offset); + err = intel_timeline_read_hwsp(from, to, &hwsp_offset); if (err) return err; @@ -940,7 +940,7 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) /* Squash repeated waits to the same timelines */ if (fence->context != rq->i915->mm.unordered_timeline && - i915_timeline_sync_is_later(rq->timeline, fence)) + intel_timeline_sync_is_later(rq->timeline, fence)) continue; if (dma_fence_is_i915(fence)) @@ -954,7 +954,7 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) /* Record the latest fence used against each timeline */ if (fence->context != rq->i915->mm.unordered_timeline) - i915_timeline_sync_set(rq->timeline, fence); + intel_timeline_sync_set(rq->timeline, fence); } while (--nchild); return 0; @@ -1092,7 +1092,7 @@ void i915_request_skip(struct i915_request *rq, int error) static struct i915_request * __i915_request_add_to_timeline(struct i915_request *rq) { - struct i915_timeline *timeline = rq->timeline; + struct intel_timeline *timeline = rq->timeline; struct i915_request *prev; /* diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index bebc1e9b4a5e..b58ceef92e20 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -41,8 +41,8 @@ struct drm_file; struct drm_i915_gem_object; struct i915_request; -struct i915_timeline; -struct i915_timeline_cacheline; +struct intel_timeline; +struct intel_timeline_cacheline; struct i915_capture_list { struct i915_capture_list *next; @@ -113,7 +113,7 @@ struct i915_request { struct intel_engine_cs *engine; struct intel_context *hw_context; struct intel_ring *ring; - struct i915_timeline *timeline; + struct intel_timeline *timeline; struct list_head signal_link; /* @@ -176,7 +176,7 @@ struct i915_request { * inside the timeline's HWSP vma, but it is only valid while this * request has not completed and guarded by the timeline mutex. */ - struct i915_timeline_cacheline *hwsp_cacheline; + struct intel_timeline_cacheline *hwsp_cacheline; /** Position in the ring of the start of the request */ u32 head; diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c deleted file mode 100644 index 3e2c3169dc69..000000000000 --- a/drivers/gpu/drm/i915/i915_timeline.c +++ /dev/null @@ -1,591 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2016-2018 Intel Corporation - */ - -#include "gt/intel_gt_types.h" - -#include "i915_drv.h" - -#include "i915_active.h" -#include "i915_syncmap.h" -#include "i915_timeline.h" - -#define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit))) -#define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit)) - -struct i915_timeline_hwsp { - struct intel_gt *gt; - struct i915_gt_timelines *gt_timelines; - struct list_head free_link; - struct i915_vma *vma; - u64 free_bitmap; -}; - -struct i915_timeline_cacheline { - struct i915_active active; - struct i915_timeline_hwsp *hwsp; - void *vaddr; -#define CACHELINE_BITS 6 -#define CACHELINE_FREE CACHELINE_BITS -}; - -static struct i915_vma *__hwsp_alloc(struct intel_gt *gt) -{ - struct drm_i915_private *i915 = gt->i915; - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(obj)) - return ERR_CAST(obj); - - i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); - - vma = i915_vma_instance(obj, >->ggtt->vm, NULL); - if (IS_ERR(vma)) - i915_gem_object_put(obj); - - return vma; -} - -static struct i915_vma * -hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline) -{ - struct i915_gt_timelines *gt = &timeline->gt->timelines; - struct i915_timeline_hwsp *hwsp; - - BUILD_BUG_ON(BITS_PER_TYPE(u64) * CACHELINE_BYTES > PAGE_SIZE); - - spin_lock_irq(>->hwsp_lock); - - /* hwsp_free_list only contains HWSP that have available cachelines */ - hwsp = list_first_entry_or_null(>->hwsp_free_list, - typeof(*hwsp), free_link); - if (!hwsp) { - struct i915_vma *vma; - - spin_unlock_irq(>->hwsp_lock); - - hwsp = kmalloc(sizeof(*hwsp), GFP_KERNEL); - if (!hwsp) - return ERR_PTR(-ENOMEM); - - vma = __hwsp_alloc(timeline->gt); - if (IS_ERR(vma)) { - kfree(hwsp); - return vma; - } - - vma->private = hwsp; - hwsp->gt = timeline->gt; - hwsp->vma = vma; - hwsp->free_bitmap = ~0ull; - hwsp->gt_timelines = gt; - - spin_lock_irq(>->hwsp_lock); - list_add(&hwsp->free_link, >->hwsp_free_list); - } - - GEM_BUG_ON(!hwsp->free_bitmap); - *cacheline = __ffs64(hwsp->free_bitmap); - hwsp->free_bitmap &= ~BIT_ULL(*cacheline); - if (!hwsp->free_bitmap) - list_del(&hwsp->free_link); - - spin_unlock_irq(>->hwsp_lock); - - GEM_BUG_ON(hwsp->vma->private != hwsp); - return hwsp->vma; -} - -static void __idle_hwsp_free(struct i915_timeline_hwsp *hwsp, int cacheline) -{ - struct i915_gt_timelines *gt = hwsp->gt_timelines; - unsigned long flags; - - spin_lock_irqsave(>->hwsp_lock, flags); - - /* As a cacheline becomes available, publish the HWSP on the freelist */ - if (!hwsp->free_bitmap) - list_add_tail(&hwsp->free_link, >->hwsp_free_list); - - GEM_BUG_ON(cacheline >= BITS_PER_TYPE(hwsp->free_bitmap)); - hwsp->free_bitmap |= BIT_ULL(cacheline); - - /* And if no one is left using it, give the page back to the system */ - if (hwsp->free_bitmap == ~0ull) { - i915_vma_put(hwsp->vma); - list_del(&hwsp->free_link); - kfree(hwsp); - } - - spin_unlock_irqrestore(>->hwsp_lock, flags); -} - -static void __idle_cacheline_free(struct i915_timeline_cacheline *cl) -{ - GEM_BUG_ON(!i915_active_is_idle(&cl->active)); - - i915_gem_object_unpin_map(cl->hwsp->vma->obj); - i915_vma_put(cl->hwsp->vma); - __idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS)); - - i915_active_fini(&cl->active); - kfree(cl); -} - -static void __cacheline_retire(struct i915_active *active) -{ - struct i915_timeline_cacheline *cl = - container_of(active, typeof(*cl), active); - - i915_vma_unpin(cl->hwsp->vma); - if (ptr_test_bit(cl->vaddr, CACHELINE_FREE)) - __idle_cacheline_free(cl); -} - -static struct i915_timeline_cacheline * -cacheline_alloc(struct i915_timeline_hwsp *hwsp, unsigned int cacheline) -{ - struct i915_timeline_cacheline *cl; - void *vaddr; - - GEM_BUG_ON(cacheline >= BIT(CACHELINE_BITS)); - - cl = kmalloc(sizeof(*cl), GFP_KERNEL); - if (!cl) - return ERR_PTR(-ENOMEM); - - vaddr = i915_gem_object_pin_map(hwsp->vma->obj, I915_MAP_WB); - if (IS_ERR(vaddr)) { - kfree(cl); - return ERR_CAST(vaddr); - } - - i915_vma_get(hwsp->vma); - cl->hwsp = hwsp; - cl->vaddr = page_pack_bits(vaddr, cacheline); - - i915_active_init(hwsp->gt->i915, &cl->active, __cacheline_retire); - - return cl; -} - -static void cacheline_acquire(struct i915_timeline_cacheline *cl) -{ - if (cl && i915_active_acquire(&cl->active)) - __i915_vma_pin(cl->hwsp->vma); -} - -static void cacheline_release(struct i915_timeline_cacheline *cl) -{ - if (cl) - i915_active_release(&cl->active); -} - -static void cacheline_free(struct i915_timeline_cacheline *cl) -{ - GEM_BUG_ON(ptr_test_bit(cl->vaddr, CACHELINE_FREE)); - cl->vaddr = ptr_set_bit(cl->vaddr, CACHELINE_FREE); - - if (i915_active_is_idle(&cl->active)) - __idle_cacheline_free(cl); -} - -int i915_timeline_init(struct i915_timeline *timeline, - struct intel_gt *gt, - struct i915_vma *hwsp) -{ - void *vaddr; - - /* - * Ideally we want a set of engines on a single leaf as we expect - * to mostly be tracking synchronisation between engines. It is not - * a huge issue if this is not the case, but we may want to mitigate - * any page crossing penalties if they become an issue. - * - * Called during early_init before we know how many engines there are. - */ - BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES); - - timeline->gt = gt; - timeline->pin_count = 0; - timeline->has_initial_breadcrumb = !hwsp; - timeline->hwsp_cacheline = NULL; - - if (!hwsp) { - struct i915_timeline_cacheline *cl; - unsigned int cacheline; - - hwsp = hwsp_alloc(timeline, &cacheline); - if (IS_ERR(hwsp)) - return PTR_ERR(hwsp); - - cl = cacheline_alloc(hwsp->private, cacheline); - if (IS_ERR(cl)) { - __idle_hwsp_free(hwsp->private, cacheline); - return PTR_ERR(cl); - } - - timeline->hwsp_cacheline = cl; - timeline->hwsp_offset = cacheline * CACHELINE_BYTES; - - vaddr = page_mask_bits(cl->vaddr); - } else { - timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR; - - vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB); - if (IS_ERR(vaddr)) - return PTR_ERR(vaddr); - } - - timeline->hwsp_seqno = - memset(vaddr + timeline->hwsp_offset, 0, CACHELINE_BYTES); - - timeline->hwsp_ggtt = i915_vma_get(hwsp); - GEM_BUG_ON(timeline->hwsp_offset >= hwsp->size); - - timeline->fence_context = dma_fence_context_alloc(1); - - mutex_init(&timeline->mutex); - - INIT_ACTIVE_REQUEST(&timeline->last_request); - INIT_LIST_HEAD(&timeline->requests); - - i915_syncmap_init(&timeline->sync); - - return 0; -} - -static void timelines_init(struct intel_gt *gt) -{ - struct i915_gt_timelines *timelines = >->timelines; - - mutex_init(&timelines->mutex); - INIT_LIST_HEAD(&timelines->active_list); - - spin_lock_init(&timelines->hwsp_lock); - INIT_LIST_HEAD(&timelines->hwsp_free_list); - - /* via i915_gem_wait_for_idle() */ - i915_gem_shrinker_taints_mutex(gt->i915, &timelines->mutex); -} - -void i915_timelines_init(struct drm_i915_private *i915) -{ - timelines_init(&i915->gt); -} - -static void timeline_add_to_active(struct i915_timeline *tl) -{ - struct i915_gt_timelines *gt = &tl->gt->timelines; - - mutex_lock(>->mutex); - list_add(&tl->link, >->active_list); - mutex_unlock(>->mutex); -} - -static void timeline_remove_from_active(struct i915_timeline *tl) -{ - struct i915_gt_timelines *gt = &tl->gt->timelines; - - mutex_lock(>->mutex); - list_del(&tl->link); - mutex_unlock(>->mutex); -} - -static void timelines_park(struct intel_gt *gt) -{ - struct i915_gt_timelines *timelines = >->timelines; - struct i915_timeline *timeline; - - mutex_lock(&timelines->mutex); - list_for_each_entry(timeline, &timelines->active_list, link) { - /* - * All known fences are completed so we can scrap - * the current sync point tracking and start afresh, - * any attempt to wait upon a previous sync point - * will be skipped as the fence was signaled. - */ - i915_syncmap_free(&timeline->sync); - } - mutex_unlock(&timelines->mutex); -} - -/** - * i915_timelines_park - called when the driver idles - * @i915: the drm_i915_private device - * - * When the driver is completely idle, we know that all of our sync points - * have been signaled and our tracking is then entirely redundant. Any request - * to wait upon an older sync point will be completed instantly as we know - * the fence is signaled and therefore we will not even look them up in the - * sync point map. - */ -void i915_timelines_park(struct drm_i915_private *i915) -{ - timelines_park(&i915->gt); -} - -void i915_timeline_fini(struct i915_timeline *timeline) -{ - GEM_BUG_ON(timeline->pin_count); - GEM_BUG_ON(!list_empty(&timeline->requests)); - - i915_syncmap_free(&timeline->sync); - - if (timeline->hwsp_cacheline) - cacheline_free(timeline->hwsp_cacheline); - else - i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj); - - i915_vma_put(timeline->hwsp_ggtt); -} - -struct i915_timeline * -i915_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp) -{ - struct i915_timeline *timeline; - int err; - - timeline = kzalloc(sizeof(*timeline), GFP_KERNEL); - if (!timeline) - return ERR_PTR(-ENOMEM); - - err = i915_timeline_init(timeline, gt, global_hwsp); - if (err) { - kfree(timeline); - return ERR_PTR(err); - } - - kref_init(&timeline->kref); - - return timeline; -} - -int i915_timeline_pin(struct i915_timeline *tl) -{ - int err; - - if (tl->pin_count++) - return 0; - GEM_BUG_ON(!tl->pin_count); - - err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH); - if (err) - goto unpin; - - tl->hwsp_offset = - i915_ggtt_offset(tl->hwsp_ggtt) + - offset_in_page(tl->hwsp_offset); - - cacheline_acquire(tl->hwsp_cacheline); - timeline_add_to_active(tl); - - return 0; - -unpin: - tl->pin_count = 0; - return err; -} - -static u32 timeline_advance(struct i915_timeline *tl) -{ - GEM_BUG_ON(!tl->pin_count); - GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb); - - return tl->seqno += 1 + tl->has_initial_breadcrumb; -} - -static void timeline_rollback(struct i915_timeline *tl) -{ - tl->seqno -= 1 + tl->has_initial_breadcrumb; -} - -static noinline int -__i915_timeline_get_seqno(struct i915_timeline *tl, - struct i915_request *rq, - u32 *seqno) -{ - struct i915_timeline_cacheline *cl; - unsigned int cacheline; - struct i915_vma *vma; - void *vaddr; - int err; - - /* - * If there is an outstanding GPU reference to this cacheline, - * such as it being sampled by a HW semaphore on another timeline, - * we cannot wraparound our seqno value (the HW semaphore does - * a strict greater-than-or-equals compare, not i915_seqno_passed). - * So if the cacheline is still busy, we must detach ourselves - * from it and leave it inflight alongside its users. - * - * However, if nobody is watching and we can guarantee that nobody - * will, we could simply reuse the same cacheline. - * - * if (i915_active_request_is_signaled(&tl->last_request) && - * i915_active_is_signaled(&tl->hwsp_cacheline->active)) - * return 0; - * - * That seems unlikely for a busy timeline that needed to wrap in - * the first place, so just replace the cacheline. - */ - - vma = hwsp_alloc(tl, &cacheline); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err_rollback; - } - - err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); - if (err) { - __idle_hwsp_free(vma->private, cacheline); - goto err_rollback; - } - - cl = cacheline_alloc(vma->private, cacheline); - if (IS_ERR(cl)) { - err = PTR_ERR(cl); - __idle_hwsp_free(vma->private, cacheline); - goto err_unpin; - } - GEM_BUG_ON(cl->hwsp->vma != vma); - - /* - * Attach the old cacheline to the current request, so that we only - * free it after the current request is retired, which ensures that - * all writes into the cacheline from previous requests are complete. - */ - err = i915_active_ref(&tl->hwsp_cacheline->active, - tl->fence_context, rq); - if (err) - goto err_cacheline; - - cacheline_release(tl->hwsp_cacheline); /* ownership now xfered to rq */ - cacheline_free(tl->hwsp_cacheline); - - i915_vma_unpin(tl->hwsp_ggtt); /* binding kept alive by old cacheline */ - i915_vma_put(tl->hwsp_ggtt); - - tl->hwsp_ggtt = i915_vma_get(vma); - - vaddr = page_mask_bits(cl->vaddr); - tl->hwsp_offset = cacheline * CACHELINE_BYTES; - tl->hwsp_seqno = - memset(vaddr + tl->hwsp_offset, 0, CACHELINE_BYTES); - - tl->hwsp_offset += i915_ggtt_offset(vma); - - cacheline_acquire(cl); - tl->hwsp_cacheline = cl; - - *seqno = timeline_advance(tl); - GEM_BUG_ON(i915_seqno_passed(*tl->hwsp_seqno, *seqno)); - return 0; - -err_cacheline: - cacheline_free(cl); -err_unpin: - i915_vma_unpin(vma); -err_rollback: - timeline_rollback(tl); - return err; -} - -int i915_timeline_get_seqno(struct i915_timeline *tl, - struct i915_request *rq, - u32 *seqno) -{ - *seqno = timeline_advance(tl); - - /* Replace the HWSP on wraparound for HW semaphores */ - if (unlikely(!*seqno && tl->hwsp_cacheline)) - return __i915_timeline_get_seqno(tl, rq, seqno); - - return 0; -} - -static int cacheline_ref(struct i915_timeline_cacheline *cl, - struct i915_request *rq) -{ - return i915_active_ref(&cl->active, rq->fence.context, rq); -} - -int i915_timeline_read_hwsp(struct i915_request *from, - struct i915_request *to, - u32 *hwsp) -{ - struct i915_timeline_cacheline *cl = from->hwsp_cacheline; - struct i915_timeline *tl = from->timeline; - int err; - - GEM_BUG_ON(to->timeline == tl); - - mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING); - err = i915_request_completed(from); - if (!err) - err = cacheline_ref(cl, to); - if (!err) { - if (likely(cl == tl->hwsp_cacheline)) { - *hwsp = tl->hwsp_offset; - } else { /* across a seqno wrap, recover the original offset */ - *hwsp = i915_ggtt_offset(cl->hwsp->vma) + - ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) * - CACHELINE_BYTES; - } - } - mutex_unlock(&tl->mutex); - - return err; -} - -void i915_timeline_unpin(struct i915_timeline *tl) -{ - GEM_BUG_ON(!tl->pin_count); - if (--tl->pin_count) - return; - - timeline_remove_from_active(tl); - cacheline_release(tl->hwsp_cacheline); - - /* - * Since this timeline is idle, all bariers upon which we were waiting - * must also be complete and so we can discard the last used barriers - * without loss of information. - */ - i915_syncmap_free(&tl->sync); - - __i915_vma_unpin(tl->hwsp_ggtt); -} - -void __i915_timeline_free(struct kref *kref) -{ - struct i915_timeline *timeline = - container_of(kref, typeof(*timeline), kref); - - i915_timeline_fini(timeline); - kfree(timeline); -} - -static void timelines_fini(struct intel_gt *gt) -{ - struct i915_gt_timelines *timelines = >->timelines; - - GEM_BUG_ON(!list_empty(&timelines->active_list)); - GEM_BUG_ON(!list_empty(&timelines->hwsp_free_list)); - - mutex_destroy(&timelines->mutex); -} - -void i915_timelines_fini(struct drm_i915_private *i915) -{ - timelines_fini(&i915->gt); -} - -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/mock_timeline.c" -#include "selftests/i915_timeline.c" -#endif diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h deleted file mode 100644 index a454d49f229f..000000000000 --- a/drivers/gpu/drm/i915/i915_timeline.h +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#ifndef I915_TIMELINE_H -#define I915_TIMELINE_H - -#include - -#include "i915_active.h" -#include "i915_syncmap.h" -#include "i915_timeline_types.h" - -int i915_timeline_init(struct i915_timeline *tl, - struct intel_gt *gt, - struct i915_vma *hwsp); -void i915_timeline_fini(struct i915_timeline *tl); - -struct i915_timeline * -i915_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp); - -static inline struct i915_timeline * -i915_timeline_get(struct i915_timeline *timeline) -{ - kref_get(&timeline->kref); - return timeline; -} - -void __i915_timeline_free(struct kref *kref); -static inline void i915_timeline_put(struct i915_timeline *timeline) -{ - kref_put(&timeline->kref, __i915_timeline_free); -} - -static inline int __i915_timeline_sync_set(struct i915_timeline *tl, - u64 context, u32 seqno) -{ - return i915_syncmap_set(&tl->sync, context, seqno); -} - -static inline int i915_timeline_sync_set(struct i915_timeline *tl, - const struct dma_fence *fence) -{ - return __i915_timeline_sync_set(tl, fence->context, fence->seqno); -} - -static inline bool __i915_timeline_sync_is_later(struct i915_timeline *tl, - u64 context, u32 seqno) -{ - return i915_syncmap_is_later(&tl->sync, context, seqno); -} - -static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl, - const struct dma_fence *fence) -{ - return __i915_timeline_sync_is_later(tl, fence->context, fence->seqno); -} - -int i915_timeline_pin(struct i915_timeline *tl); -int i915_timeline_get_seqno(struct i915_timeline *tl, - struct i915_request *rq, - u32 *seqno); -void i915_timeline_unpin(struct i915_timeline *tl); - -int i915_timeline_read_hwsp(struct i915_request *from, - struct i915_request *until, - u32 *hwsp_offset); - -void i915_timelines_init(struct drm_i915_private *i915); -void i915_timelines_park(struct drm_i915_private *i915); -void i915_timelines_fini(struct drm_i915_private *i915); - -#endif diff --git a/drivers/gpu/drm/i915/i915_timeline_types.h b/drivers/gpu/drm/i915/i915_timeline_types.h deleted file mode 100644 index 931585e12d41..000000000000 --- a/drivers/gpu/drm/i915/i915_timeline_types.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2016 Intel Corporation - */ - -#ifndef __I915_TIMELINE_TYPES_H__ -#define __I915_TIMELINE_TYPES_H__ - -#include -#include -#include -#include - -#include "i915_active_types.h" - -struct drm_i915_private; -struct i915_vma; -struct i915_timeline_cacheline; -struct i915_syncmap; - -struct i915_timeline { - u64 fence_context; - u32 seqno; - - struct mutex mutex; /* protects the flow of requests */ - - unsigned int pin_count; - const u32 *hwsp_seqno; - struct i915_vma *hwsp_ggtt; - u32 hwsp_offset; - - struct i915_timeline_cacheline *hwsp_cacheline; - - bool has_initial_breadcrumb; - - /** - * List of breadcrumbs associated with GPU requests currently - * outstanding. - */ - struct list_head requests; - - /* Contains an RCU guarded pointer to the last request. No reference is - * held to the request, users must carefully acquire a reference to - * the request using i915_active_request_get_request_rcu(), or hold the - * struct_mutex. - */ - struct i915_active_request last_request; - - /** - * We track the most recent seqno that we wait on in every context so - * that we only have to emit a new await and dependency on a more - * recent sync point. As the contexts may be executed out-of-order, we - * have to track each individually and can not rely on an absolute - * global_seqno. When we know that all tracked fences are completed - * (i.e. when the driver is idle), we know that the syncmap is - * redundant and we can discard it without loss of generality. - */ - struct i915_syncmap *sync; - - struct list_head link; - struct intel_gt *gt; - - struct kref kref; -}; - -#endif /* __I915_TIMELINE_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index d5dc4427d664..2b31a4ee0b4c 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -12,7 +12,7 @@ selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */ selftest(uncore, intel_uncore_live_selftests) selftest(workarounds, intel_workarounds_live_selftests) -selftest(timelines, i915_timeline_live_selftests) +selftest(timelines, intel_timeline_live_selftests) selftest(requests, i915_request_live_selftests) selftest(active, i915_active_live_selftests) selftest(objects, i915_gem_object_live_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index 510eb176bb2c..b55da4d9ccba 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -15,7 +15,7 @@ selftest(scatterlist, scatterlist_mock_selftests) selftest(syncmap, i915_syncmap_mock_selftests) selftest(uncore, intel_uncore_mock_selftests) selftest(engine, intel_engine_cs_mock_selftests) -selftest(timelines, i915_timeline_mock_selftests) +selftest(timelines, intel_timeline_mock_selftests) selftest(requests, i915_request_mock_selftests) selftest(objects, i915_gem_object_mock_selftests) selftest(phys, i915_gem_phys_mock_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_timeline.c b/drivers/gpu/drm/i915/selftests/i915_timeline.c deleted file mode 100644 index 44d031446f08..000000000000 --- a/drivers/gpu/drm/i915/selftests/i915_timeline.c +++ /dev/null @@ -1,845 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2017-2018 Intel Corporation - */ - -#include - -#include "gem/i915_gem_pm.h" - -#include "i915_random.h" -#include "i915_selftest.h" - -#include "igt_flush_test.h" -#include "mock_gem_device.h" -#include "mock_timeline.h" - -static struct page *hwsp_page(struct i915_timeline *tl) -{ - struct drm_i915_gem_object *obj = tl->hwsp_ggtt->obj; - - GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); - return sg_page(obj->mm.pages->sgl); -} - -static unsigned long hwsp_cacheline(struct i915_timeline *tl) -{ - unsigned long address = (unsigned long)page_address(hwsp_page(tl)); - - return (address + tl->hwsp_offset) / CACHELINE_BYTES; -} - -#define CACHELINES_PER_PAGE (PAGE_SIZE / CACHELINE_BYTES) - -struct mock_hwsp_freelist { - struct drm_i915_private *i915; - struct radix_tree_root cachelines; - struct i915_timeline **history; - unsigned long count, max; - struct rnd_state prng; -}; - -enum { - SHUFFLE = BIT(0), -}; - -static void __mock_hwsp_record(struct mock_hwsp_freelist *state, - unsigned int idx, - struct i915_timeline *tl) -{ - tl = xchg(&state->history[idx], tl); - if (tl) { - radix_tree_delete(&state->cachelines, hwsp_cacheline(tl)); - i915_timeline_put(tl); - } -} - -static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state, - unsigned int count, - unsigned int flags) -{ - struct i915_timeline *tl; - unsigned int idx; - - while (count--) { - unsigned long cacheline; - int err; - - tl = i915_timeline_create(&state->i915->gt, NULL); - if (IS_ERR(tl)) - return PTR_ERR(tl); - - cacheline = hwsp_cacheline(tl); - err = radix_tree_insert(&state->cachelines, cacheline, tl); - if (err) { - if (err == -EEXIST) { - pr_err("HWSP cacheline %lu already used; duplicate allocation!\n", - cacheline); - } - i915_timeline_put(tl); - return err; - } - - idx = state->count++ % state->max; - __mock_hwsp_record(state, idx, tl); - } - - if (flags & SHUFFLE) - i915_prandom_shuffle(state->history, - sizeof(*state->history), - min(state->count, state->max), - &state->prng); - - count = i915_prandom_u32_max_state(min(state->count, state->max), - &state->prng); - while (count--) { - idx = --state->count % state->max; - __mock_hwsp_record(state, idx, NULL); - } - - return 0; -} - -static int mock_hwsp_freelist(void *arg) -{ - struct mock_hwsp_freelist state; - const struct { - const char *name; - unsigned int flags; - } phases[] = { - { "linear", 0 }, - { "shuffled", SHUFFLE }, - { }, - }, *p; - unsigned int na; - int err = 0; - - INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL); - state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed); - - state.i915 = mock_gem_device(); - if (!state.i915) - return -ENOMEM; - - /* - * Create a bunch of timelines and check that their HWSP do not overlap. - * Free some, and try again. - */ - - state.max = PAGE_SIZE / sizeof(*state.history); - state.count = 0; - state.history = kcalloc(state.max, sizeof(*state.history), GFP_KERNEL); - if (!state.history) { - err = -ENOMEM; - goto err_put; - } - - mutex_lock(&state.i915->drm.struct_mutex); - for (p = phases; p->name; p++) { - pr_debug("%s(%s)\n", __func__, p->name); - for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) { - err = __mock_hwsp_timeline(&state, na, p->flags); - if (err) - goto out; - } - } - -out: - for (na = 0; na < state.max; na++) - __mock_hwsp_record(&state, na, NULL); - mutex_unlock(&state.i915->drm.struct_mutex); - kfree(state.history); -err_put: - drm_dev_put(&state.i915->drm); - return err; -} - -struct __igt_sync { - const char *name; - u32 seqno; - bool expected; - bool set; -}; - -static int __igt_sync(struct i915_timeline *tl, - u64 ctx, - const struct __igt_sync *p, - const char *name) -{ - int ret; - - if (__i915_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) { - pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n", - name, p->name, ctx, p->seqno, yesno(p->expected)); - return -EINVAL; - } - - if (p->set) { - ret = __i915_timeline_sync_set(tl, ctx, p->seqno); - if (ret) - return ret; - } - - return 0; -} - -static int igt_sync(void *arg) -{ - const struct __igt_sync pass[] = { - { "unset", 0, false, false }, - { "new", 0, false, true }, - { "0a", 0, true, true }, - { "1a", 1, false, true }, - { "1b", 1, true, true }, - { "0b", 0, true, false }, - { "2a", 2, false, true }, - { "4", 4, false, true }, - { "INT_MAX", INT_MAX, false, true }, - { "INT_MAX-1", INT_MAX-1, true, false }, - { "INT_MAX+1", (u32)INT_MAX+1, false, true }, - { "INT_MAX", INT_MAX, true, false }, - { "UINT_MAX", UINT_MAX, false, true }, - { "wrap", 0, false, true }, - { "unwrap", UINT_MAX, true, false }, - {}, - }, *p; - struct i915_timeline tl; - int order, offset; - int ret = -ENODEV; - - mock_timeline_init(&tl, 0); - for (p = pass; p->name; p++) { - for (order = 1; order < 64; order++) { - for (offset = -1; offset <= (order > 1); offset++) { - u64 ctx = BIT_ULL(order) + offset; - - ret = __igt_sync(&tl, ctx, p, "1"); - if (ret) - goto out; - } - } - } - mock_timeline_fini(&tl); - - mock_timeline_init(&tl, 0); - for (order = 1; order < 64; order++) { - for (offset = -1; offset <= (order > 1); offset++) { - u64 ctx = BIT_ULL(order) + offset; - - for (p = pass; p->name; p++) { - ret = __igt_sync(&tl, ctx, p, "2"); - if (ret) - goto out; - } - } - } - -out: - mock_timeline_fini(&tl); - return ret; -} - -static unsigned int random_engine(struct rnd_state *rnd) -{ - return i915_prandom_u32_max_state(I915_NUM_ENGINES, rnd); -} - -static int bench_sync(void *arg) -{ - struct rnd_state prng; - struct i915_timeline tl; - unsigned long end_time, count; - u64 prng32_1M; - ktime_t kt; - int order, last_order; - - mock_timeline_init(&tl, 0); - - /* Lookups from cache are very fast and so the random number generation - * and the loop itself becomes a significant factor in the per-iteration - * timings. We try to compensate the results by measuring the overhead - * of the prng and subtract it from the reported results. - */ - prandom_seed_state(&prng, i915_selftest.random_seed); - count = 0; - kt = ktime_get(); - end_time = jiffies + HZ/10; - do { - u32 x; - - /* Make sure the compiler doesn't optimise away the prng call */ - WRITE_ONCE(x, prandom_u32_state(&prng)); - - count++; - } while (!time_after(jiffies, end_time)); - kt = ktime_sub(ktime_get(), kt); - pr_debug("%s: %lu random evaluations, %lluns/prng\n", - __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); - prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count); - - /* Benchmark (only) setting random context ids */ - prandom_seed_state(&prng, i915_selftest.random_seed); - count = 0; - kt = ktime_get(); - end_time = jiffies + HZ/10; - do { - u64 id = i915_prandom_u64_state(&prng); - - __i915_timeline_sync_set(&tl, id, 0); - count++; - } while (!time_after(jiffies, end_time)); - kt = ktime_sub(ktime_get(), kt); - kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); - pr_info("%s: %lu random insertions, %lluns/insert\n", - __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); - - /* Benchmark looking up the exact same context ids as we just set */ - prandom_seed_state(&prng, i915_selftest.random_seed); - end_time = count; - kt = ktime_get(); - while (end_time--) { - u64 id = i915_prandom_u64_state(&prng); - - if (!__i915_timeline_sync_is_later(&tl, id, 0)) { - mock_timeline_fini(&tl); - pr_err("Lookup of %llu failed\n", id); - return -EINVAL; - } - } - kt = ktime_sub(ktime_get(), kt); - kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); - pr_info("%s: %lu random lookups, %lluns/lookup\n", - __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); - - mock_timeline_fini(&tl); - cond_resched(); - - mock_timeline_init(&tl, 0); - - /* Benchmark setting the first N (in order) contexts */ - count = 0; - kt = ktime_get(); - end_time = jiffies + HZ/10; - do { - __i915_timeline_sync_set(&tl, count++, 0); - } while (!time_after(jiffies, end_time)); - kt = ktime_sub(ktime_get(), kt); - pr_info("%s: %lu in-order insertions, %lluns/insert\n", - __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); - - /* Benchmark looking up the exact same context ids as we just set */ - end_time = count; - kt = ktime_get(); - while (end_time--) { - if (!__i915_timeline_sync_is_later(&tl, end_time, 0)) { - pr_err("Lookup of %lu failed\n", end_time); - mock_timeline_fini(&tl); - return -EINVAL; - } - } - kt = ktime_sub(ktime_get(), kt); - pr_info("%s: %lu in-order lookups, %lluns/lookup\n", - __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); - - mock_timeline_fini(&tl); - cond_resched(); - - mock_timeline_init(&tl, 0); - - /* Benchmark searching for a random context id and maybe changing it */ - prandom_seed_state(&prng, i915_selftest.random_seed); - count = 0; - kt = ktime_get(); - end_time = jiffies + HZ/10; - do { - u32 id = random_engine(&prng); - u32 seqno = prandom_u32_state(&prng); - - if (!__i915_timeline_sync_is_later(&tl, id, seqno)) - __i915_timeline_sync_set(&tl, id, seqno); - - count++; - } while (!time_after(jiffies, end_time)); - kt = ktime_sub(ktime_get(), kt); - kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); - pr_info("%s: %lu repeated insert/lookups, %lluns/op\n", - __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); - mock_timeline_fini(&tl); - cond_resched(); - - /* Benchmark searching for a known context id and changing the seqno */ - for (last_order = 1, order = 1; order < 32; - ({ int tmp = last_order; last_order = order; order += tmp; })) { - unsigned int mask = BIT(order) - 1; - - mock_timeline_init(&tl, 0); - - count = 0; - kt = ktime_get(); - end_time = jiffies + HZ/10; - do { - /* Without assuming too many details of the underlying - * implementation, try to identify its phase-changes - * (if any)! - */ - u64 id = (u64)(count & mask) << order; - - __i915_timeline_sync_is_later(&tl, id, 0); - __i915_timeline_sync_set(&tl, id, 0); - - count++; - } while (!time_after(jiffies, end_time)); - kt = ktime_sub(ktime_get(), kt); - pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n", - __func__, count, order, - (long long)div64_ul(ktime_to_ns(kt), count)); - mock_timeline_fini(&tl); - cond_resched(); - } - - return 0; -} - -int i915_timeline_mock_selftests(void) -{ - static const struct i915_subtest tests[] = { - SUBTEST(mock_hwsp_freelist), - SUBTEST(igt_sync), - SUBTEST(bench_sync), - }; - - return i915_subtests(tests, NULL); -} - -static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value) -{ - u32 *cs; - - cs = intel_ring_begin(rq, 4); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - if (INTEL_GEN(rq->i915) >= 8) { - *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; - *cs++ = addr; - *cs++ = 0; - *cs++ = value; - } else if (INTEL_GEN(rq->i915) >= 4) { - *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; - *cs++ = 0; - *cs++ = addr; - *cs++ = value; - } else { - *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; - *cs++ = addr; - *cs++ = value; - *cs++ = MI_NOOP; - } - - intel_ring_advance(rq, cs); - - return 0; -} - -static struct i915_request * -tl_write(struct i915_timeline *tl, struct intel_engine_cs *engine, u32 value) -{ - struct i915_request *rq; - int err; - - lockdep_assert_held(&tl->gt->i915->drm.struct_mutex); /* lazy rq refs */ - - err = i915_timeline_pin(tl); - if (err) { - rq = ERR_PTR(err); - goto out; - } - - rq = i915_request_create(engine->kernel_context); - if (IS_ERR(rq)) - goto out_unpin; - - err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value); - i915_request_add(rq); - if (err) - rq = ERR_PTR(err); - -out_unpin: - i915_timeline_unpin(tl); -out: - if (IS_ERR(rq)) - pr_err("Failed to write to timeline!\n"); - return rq; -} - -static struct i915_timeline * -checked_i915_timeline_create(struct drm_i915_private *i915) -{ - struct i915_timeline *tl; - - tl = i915_timeline_create(&i915->gt, NULL); - if (IS_ERR(tl)) - return tl; - - if (*tl->hwsp_seqno != tl->seqno) { - pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n", - *tl->hwsp_seqno, tl->seqno); - i915_timeline_put(tl); - return ERR_PTR(-EINVAL); - } - - return tl; -} - -static int live_hwsp_engine(void *arg) -{ -#define NUM_TIMELINES 4096 - struct drm_i915_private *i915 = arg; - struct i915_timeline **timelines; - struct intel_engine_cs *engine; - enum intel_engine_id id; - intel_wakeref_t wakeref; - unsigned long count, n; - int err = 0; - - /* - * Create a bunch of timelines and check we can write - * independently to each of their breadcrumb slots. - */ - - timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES, - sizeof(*timelines), - GFP_KERNEL); - if (!timelines) - return -ENOMEM; - - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - - count = 0; - for_each_engine(engine, i915, id) { - if (!intel_engine_can_store_dword(engine)) - continue; - - for (n = 0; n < NUM_TIMELINES; n++) { - struct i915_timeline *tl; - struct i915_request *rq; - - tl = checked_i915_timeline_create(i915); - if (IS_ERR(tl)) { - err = PTR_ERR(tl); - goto out; - } - - rq = tl_write(tl, engine, count); - if (IS_ERR(rq)) { - i915_timeline_put(tl); - err = PTR_ERR(rq); - goto out; - } - - timelines[count++] = tl; - } - } - -out: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) - err = -EIO; - - for (n = 0; n < count; n++) { - struct i915_timeline *tl = timelines[n]; - - if (!err && *tl->hwsp_seqno != n) { - pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n", - n, *tl->hwsp_seqno); - err = -EINVAL; - } - i915_timeline_put(tl); - } - - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - - kvfree(timelines); - - return err; -#undef NUM_TIMELINES -} - -static int live_hwsp_alternate(void *arg) -{ -#define NUM_TIMELINES 4096 - struct drm_i915_private *i915 = arg; - struct i915_timeline **timelines; - struct intel_engine_cs *engine; - enum intel_engine_id id; - intel_wakeref_t wakeref; - unsigned long count, n; - int err = 0; - - /* - * Create a bunch of timelines and check we can write - * independently to each of their breadcrumb slots with adjacent - * engines. - */ - - timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES, - sizeof(*timelines), - GFP_KERNEL); - if (!timelines) - return -ENOMEM; - - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - - count = 0; - for (n = 0; n < NUM_TIMELINES; n++) { - for_each_engine(engine, i915, id) { - struct i915_timeline *tl; - struct i915_request *rq; - - if (!intel_engine_can_store_dword(engine)) - continue; - - tl = checked_i915_timeline_create(i915); - if (IS_ERR(tl)) { - err = PTR_ERR(tl); - goto out; - } - - rq = tl_write(tl, engine, count); - if (IS_ERR(rq)) { - i915_timeline_put(tl); - err = PTR_ERR(rq); - goto out; - } - - timelines[count++] = tl; - } - } - -out: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) - err = -EIO; - - for (n = 0; n < count; n++) { - struct i915_timeline *tl = timelines[n]; - - if (!err && *tl->hwsp_seqno != n) { - pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n", - n, *tl->hwsp_seqno); - err = -EINVAL; - } - i915_timeline_put(tl); - } - - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - - kvfree(timelines); - - return err; -#undef NUM_TIMELINES -} - -static int live_hwsp_wrap(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct intel_engine_cs *engine; - struct i915_timeline *tl; - enum intel_engine_id id; - intel_wakeref_t wakeref; - int err = 0; - - /* - * Across a seqno wrap, we need to keep the old cacheline alive for - * foreign GPU references. - */ - - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - - tl = i915_timeline_create(&i915->gt, NULL); - if (IS_ERR(tl)) { - err = PTR_ERR(tl); - goto out_rpm; - } - if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline) - goto out_free; - - err = i915_timeline_pin(tl); - if (err) - goto out_free; - - for_each_engine(engine, i915, id) { - const u32 *hwsp_seqno[2]; - struct i915_request *rq; - u32 seqno[2]; - - if (!intel_engine_can_store_dword(engine)) - continue; - - rq = i915_request_create(engine->kernel_context); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto out; - } - - tl->seqno = -4u; - - err = i915_timeline_get_seqno(tl, rq, &seqno[0]); - if (err) { - i915_request_add(rq); - goto out; - } - pr_debug("seqno[0]:%08x, hwsp_offset:%08x\n", - seqno[0], tl->hwsp_offset); - - err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[0]); - if (err) { - i915_request_add(rq); - goto out; - } - hwsp_seqno[0] = tl->hwsp_seqno; - - err = i915_timeline_get_seqno(tl, rq, &seqno[1]); - if (err) { - i915_request_add(rq); - goto out; - } - pr_debug("seqno[1]:%08x, hwsp_offset:%08x\n", - seqno[1], tl->hwsp_offset); - - err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[1]); - if (err) { - i915_request_add(rq); - goto out; - } - hwsp_seqno[1] = tl->hwsp_seqno; - - /* With wrap should come a new hwsp */ - GEM_BUG_ON(seqno[1] >= seqno[0]); - GEM_BUG_ON(hwsp_seqno[0] == hwsp_seqno[1]); - - i915_request_add(rq); - - if (i915_request_wait(rq, 0, HZ / 5) < 0) { - pr_err("Wait for timeline writes timed out!\n"); - err = -EIO; - goto out; - } - - if (*hwsp_seqno[0] != seqno[0] || *hwsp_seqno[1] != seqno[1]) { - pr_err("Bad timeline values: found (%x, %x), expected (%x, %x)\n", - *hwsp_seqno[0], *hwsp_seqno[1], - seqno[0], seqno[1]); - err = -EINVAL; - goto out; - } - - i915_retire_requests(i915); /* recycle HWSP */ - } - -out: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) - err = -EIO; - - i915_timeline_unpin(tl); -out_free: - i915_timeline_put(tl); -out_rpm: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - - return err; -} - -static int live_hwsp_recycle(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct intel_engine_cs *engine; - enum intel_engine_id id; - intel_wakeref_t wakeref; - unsigned long count; - int err = 0; - - /* - * Check seqno writes into one timeline at a time. We expect to - * recycle the breadcrumb slot between iterations and neither - * want to confuse ourselves or the GPU. - */ - - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - - count = 0; - for_each_engine(engine, i915, id) { - IGT_TIMEOUT(end_time); - - if (!intel_engine_can_store_dword(engine)) - continue; - - do { - struct i915_timeline *tl; - struct i915_request *rq; - - tl = checked_i915_timeline_create(i915); - if (IS_ERR(tl)) { - err = PTR_ERR(tl); - goto out; - } - - rq = tl_write(tl, engine, count); - if (IS_ERR(rq)) { - i915_timeline_put(tl); - err = PTR_ERR(rq); - goto out; - } - - if (i915_request_wait(rq, 0, HZ / 5) < 0) { - pr_err("Wait for timeline writes timed out!\n"); - i915_timeline_put(tl); - err = -EIO; - goto out; - } - - if (*tl->hwsp_seqno != count) { - pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n", - count, *tl->hwsp_seqno); - err = -EINVAL; - } - - i915_timeline_put(tl); - count++; - - if (err) - goto out; - - i915_timelines_park(i915); /* Encourage recycling! */ - } while (!__igt_timeout(end_time, NULL)); - } - -out: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) - err = -EIO; - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - - return err; -} - -int i915_timeline_live_selftests(struct drm_i915_private *i915) -{ - static const struct i915_subtest tests[] = { - SUBTEST(live_hwsp_recycle), - SUBTEST(live_hwsp_engine), - SUBTEST(live_hwsp_alternate), - SUBTEST(live_hwsp_wrap), - }; - - if (i915_terminally_wedged(i915)) - return 0; - - return i915_subtests(tests, i915); -} diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 021ba42a3a00..2741805b56c2 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -68,7 +68,7 @@ static void mock_device_release(struct drm_device *dev) i915_gem_contexts_fini(i915); mutex_unlock(&i915->drm.struct_mutex); - i915_timelines_fini(i915); + intel_timelines_fini(i915); drain_workqueue(i915->wq); i915_gem_drain_freed_objects(i915); @@ -199,7 +199,7 @@ struct drm_i915_private *mock_gem_device(void) i915->gt.awake = true; - i915_timelines_init(i915); + intel_timelines_init(i915); mutex_lock(&i915->drm.struct_mutex); @@ -230,7 +230,7 @@ err_engine: mock_engine_free(i915->engine[RCS0]); err_unlock: mutex_unlock(&i915->drm.struct_mutex); - i915_timelines_fini(i915); + intel_timelines_fini(i915); destroy_workqueue(i915->wq); err_drv: drm_mode_config_cleanup(&i915->drm); diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c deleted file mode 100644 index c80ac0fbdd3b..000000000000 --- a/drivers/gpu/drm/i915/selftests/mock_timeline.c +++ /dev/null @@ -1,29 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2017-2018 Intel Corporation - */ - -#include "../i915_timeline.h" - -#include "mock_timeline.h" - -void mock_timeline_init(struct i915_timeline *timeline, u64 context) -{ - timeline->gt = NULL; - timeline->fence_context = context; - - mutex_init(&timeline->mutex); - - INIT_ACTIVE_REQUEST(&timeline->last_request); - INIT_LIST_HEAD(&timeline->requests); - - i915_syncmap_init(&timeline->sync); - - INIT_LIST_HEAD(&timeline->link); -} - -void mock_timeline_fini(struct i915_timeline *timeline) -{ - i915_syncmap_free(&timeline->sync); -} diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.h b/drivers/gpu/drm/i915/selftests/mock_timeline.h deleted file mode 100644 index b6deaa61110d..000000000000 --- a/drivers/gpu/drm/i915/selftests/mock_timeline.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2017-2018 Intel Corporation - */ - -#ifndef __MOCK_TIMELINE__ -#define __MOCK_TIMELINE__ - -struct i915_timeline; - -void mock_timeline_init(struct i915_timeline *timeline, u64 context); -void mock_timeline_fini(struct i915_timeline *timeline); - -#endif /* !__MOCK_TIMELINE__ */ -- cgit v1.2.3