summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/intel_lrc.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/intel_lrc.c')
-rw-r--r--drivers/gpu/drm/i915/intel_lrc.c215
1 files changed, 124 insertions, 91 deletions
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 251143361f31..bc86585b9fbb 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -226,10 +226,16 @@ enum {
/* Typical size of the average request (2 pipecontrols and a MI_BB) */
#define EXECLISTS_REQUEST_SIZE 64 /* bytes */
+#define WA_TAIL_DWORDS 2
+
static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
struct intel_engine_cs *engine);
static int intel_lr_context_pin(struct i915_gem_context *ctx,
struct intel_engine_cs *engine);
+static void execlists_init_reg_state(u32 *reg_state,
+ struct i915_gem_context *ctx,
+ struct intel_engine_cs *engine,
+ struct intel_ring *ring);
/**
* intel_sanitize_enable_execlists() - sanitize i915.enable_execlists
@@ -269,8 +275,7 @@ logical_ring_init_platform_invariants(struct intel_engine_cs *engine)
struct drm_i915_private *dev_priv = engine->i915;
engine->disable_lite_restore_wa =
- (IS_SKL_REVID(dev_priv, 0, SKL_REVID_B0) ||
- IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) &&
+ IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1) &&
(engine->id == VCS || engine->id == VCS2);
engine->ctx_desc_template = GEN8_CTX_VALID;
@@ -621,6 +626,10 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request
request->ring = ce->ring;
+ ret = intel_lr_context_pin(request->ctx, engine);
+ if (ret)
+ return ret;
+
if (i915.enable_guc_submission) {
/*
* Check that the GuC has space for the request before
@@ -629,21 +638,17 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request
*/
ret = i915_guc_wq_reserve(request);
if (ret)
- return ret;
+ goto err_unpin;
}
- ret = intel_lr_context_pin(request->ctx, engine);
- if (ret)
- return ret;
-
ret = intel_ring_begin(request, 0);
if (ret)
- goto err_unpin;
+ goto err_unreserve;
if (!ce->initialised) {
ret = engine->init_context(request);
if (ret)
- goto err_unpin;
+ goto err_unreserve;
ce->initialised = true;
}
@@ -658,6 +663,9 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request
request->reserved_space -= EXECLISTS_REQUEST_SIZE;
return 0;
+err_unreserve:
+ if (i915.enable_guc_submission)
+ i915_guc_wq_unreserve(request);
err_unpin:
intel_lr_context_unpin(request->ctx, engine);
return ret;
@@ -708,7 +716,6 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx,
{
struct intel_context *ce = &ctx->engine[engine->id];
void *vaddr;
- u32 *lrc_reg_state;
int ret;
lockdep_assert_held(&ctx->i915->drm.struct_mutex);
@@ -727,17 +734,16 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx,
goto unpin_vma;
}
- lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
-
ret = intel_ring_pin(ce->ring);
if (ret)
goto unpin_map;
intel_lr_context_descriptor_update(ctx, engine);
- lrc_reg_state[CTX_RING_BUFFER_START+1] =
+ ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
+ ce->lrc_reg_state[CTX_RING_BUFFER_START+1] =
i915_ggtt_offset(ce->ring->vma);
- ce->lrc_reg_state = lrc_reg_state;
+
ce->state->obj->dirty = true;
/* Invalidate GuC TLB. */
@@ -846,13 +852,12 @@ static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine,
uint32_t l3sqc4_flush = (0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES);
/*
- * WaDisableLSQCROPERFforOCL:skl,kbl
+ * WaDisableLSQCROPERFforOCL:kbl
* This WA is implemented in skl_init_clock_gating() but since
* this batch updates GEN8_L3SQCREG4 with default value we need to
* set this bit here to retain the WA during flush.
*/
- if (IS_SKL_REVID(dev_priv, 0, SKL_REVID_E0) ||
- IS_KBL_REVID(dev_priv, 0, KBL_REVID_E0))
+ if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_E0))
l3sqc4_flush |= GEN8_LQSC_RO_PERF_DIS;
wa_ctx_emit(batch, index, (MI_STORE_REGISTER_MEM_GEN8 |
@@ -995,9 +1000,8 @@ static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine,
struct drm_i915_private *dev_priv = engine->i915;
uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
- /* WaDisableCtxRestoreArbitration:skl,bxt */
- if (IS_SKL_REVID(dev_priv, 0, SKL_REVID_D0) ||
- IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1))
+ /* WaDisableCtxRestoreArbitration:bxt */
+ if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1))
wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_DISABLE);
/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt */
@@ -1068,9 +1072,8 @@ static int gen9_init_perctx_bb(struct intel_engine_cs *engine,
{
uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
- /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:skl,bxt */
- if (IS_SKL_REVID(engine->i915, 0, SKL_REVID_B0) ||
- IS_BXT_REVID(engine->i915, 0, BXT_REVID_A1)) {
+ /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:bxt */
+ if (IS_BXT_REVID(engine->i915, 0, BXT_REVID_A1)) {
wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1));
wa_ctx_emit_reg(batch, index, GEN9_SLICE_COMMON_ECO_CHICKEN0);
wa_ctx_emit(batch, index,
@@ -1097,9 +1100,8 @@ static int gen9_init_perctx_bb(struct intel_engine_cs *engine,
wa_ctx_emit(batch, index, MI_NOOP);
}
- /* WaDisableCtxRestoreArbitration:skl,bxt */
- if (IS_SKL_REVID(engine->i915, 0, SKL_REVID_D0) ||
- IS_BXT_REVID(engine->i915, 0, BXT_REVID_A1))
+ /* WaDisableCtxRestoreArbitration:bxt */
+ if (IS_BXT_REVID(engine->i915, 0, BXT_REVID_A1))
wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_ENABLE);
wa_ctx_emit(batch, index, MI_BATCH_BUFFER_END);
@@ -1231,7 +1233,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
lrc_init_hws(engine);
- intel_engine_reset_irq(engine);
+ intel_engine_reset_breadcrumbs(engine);
I915_WRITE(RING_HWSTAM(engine->mmio_base), 0xffffffff);
@@ -1243,8 +1245,12 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
intel_engine_init_hangcheck(engine);
- if (!execlists_elsp_idle(engine))
+ /* After a GPU reset, we may have requests to replay */
+ if (!execlists_elsp_idle(engine)) {
+ engine->execlist_port[0].count = 0;
+ engine->execlist_port[1].count = 0;
execlists_submit_ports(engine);
+ }
return 0;
}
@@ -1289,8 +1295,21 @@ static void reset_common_ring(struct intel_engine_cs *engine,
struct execlist_port *port = engine->execlist_port;
struct intel_context *ce = &request->ctx->engine[engine->id];
+ /* We want a simple context + ring to execute the breadcrumb update.
+ * We cannot rely on the context being intact across the GPU hang,
+ * so clear it and rebuild just what we need for the breadcrumb.
+ * All pending requests for this context will be zapped, and any
+ * future request will be after userspace has had the opportunity
+ * to recreate its own state.
+ */
+ execlists_init_reg_state(ce->lrc_reg_state,
+ request->ctx, engine, ce->ring);
+
/* Move the RING_HEAD onto the breadcrumb, past the hanging batch */
+ ce->lrc_reg_state[CTX_RING_BUFFER_START+1] =
+ i915_ggtt_offset(ce->ring->vma);
ce->lrc_reg_state[CTX_RING_HEAD+1] = request->postfix;
+
request->ring->head = request->postfix;
request->ring->last_retired_head = -1;
intel_ring_update_space(request->ring);
@@ -1306,10 +1325,10 @@ static void reset_common_ring(struct intel_engine_cs *engine,
memset(&port[1], 0, sizeof(port[1]));
}
- /* CS is stopped, and we will resubmit both ports on resume */
GEM_BUG_ON(request->ctx != port[0].request->ctx);
- port[0].count = 0;
- port[1].count = 0;
+
+ /* Reset WaIdleLiteRestore:bdw,skl as well */
+ request->tail = request->wa_tail - WA_TAIL_DWORDS * sizeof(u32);
}
static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req)
@@ -1547,7 +1566,6 @@ static void bxt_a_seqno_barrier(struct intel_engine_cs *engine)
* used as a workaround for not being allowed to do lite
* restore with HEAD==TAIL (WaIdleLiteRestore).
*/
-#define WA_TAIL_DWORDS 2
static int gen8_emit_request(struct drm_i915_gem_request *request)
{
@@ -1630,9 +1648,6 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv;
- if (!intel_engine_initialized(engine))
- return;
-
/*
* Tasklet cannot be active at this point due intel_mark_active/idle
* so this is just for documentation.
@@ -1659,13 +1674,16 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
lrc_destroy_wa_ctx_obj(engine);
engine->i915 = NULL;
+ dev_priv->engine[engine->id] = NULL;
+ kfree(engine);
}
void intel_execlists_enable_submission(struct drm_i915_private *dev_priv)
{
struct intel_engine_cs *engine;
+ enum intel_engine_id id;
- for_each_engine(engine, dev_priv)
+ for_each_engine(engine, dev_priv, id)
engine->submit_request = execlists_submit_request;
}
@@ -1894,38 +1912,13 @@ static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine)
return indirect_ctx_offset;
}
-static int
-populate_lr_context(struct i915_gem_context *ctx,
- struct drm_i915_gem_object *ctx_obj,
- struct intel_engine_cs *engine,
- struct intel_ring *ring)
+static void execlists_init_reg_state(u32 *reg_state,
+ struct i915_gem_context *ctx,
+ struct intel_engine_cs *engine,
+ struct intel_ring *ring)
{
- struct drm_i915_private *dev_priv = ctx->i915;
- struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
- void *vaddr;
- u32 *reg_state;
- int ret;
-
- if (!ppgtt)
- ppgtt = dev_priv->mm.aliasing_ppgtt;
-
- ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true);
- if (ret) {
- DRM_DEBUG_DRIVER("Could not set to CPU domain\n");
- return ret;
- }
-
- vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
- if (IS_ERR(vaddr)) {
- ret = PTR_ERR(vaddr);
- DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
- return ret;
- }
- ctx_obj->dirty = true;
-
- /* The second page of the context object contains some fields which must
- * be set up prior to the first execution. */
- reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
+ struct drm_i915_private *dev_priv = engine->i915;
+ struct i915_hw_ppgtt *ppgtt = ctx->ppgtt ?: dev_priv->mm.aliasing_ppgtt;
/* A context is actually a big batch buffer with several MI_LOAD_REGISTER_IMM
* commands followed by (reg, value) pairs. The values we are setting here are
@@ -1939,19 +1932,16 @@ populate_lr_context(struct i915_gem_context *ctx,
_MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
(HAS_RESOURCE_STREAMER(dev_priv) ?
- CTX_CTRL_RS_CTX_ENABLE : 0)));
+ CTX_CTRL_RS_CTX_ENABLE : 0)));
ASSIGN_CTX_REG(reg_state, CTX_RING_HEAD, RING_HEAD(engine->mmio_base),
0);
ASSIGN_CTX_REG(reg_state, CTX_RING_TAIL, RING_TAIL(engine->mmio_base),
0);
- /* Ring buffer start address is not known until the buffer is pinned.
- * It is written to the context image in execlists_update_context()
- */
ASSIGN_CTX_REG(reg_state, CTX_RING_BUFFER_START,
RING_START(engine->mmio_base), 0);
ASSIGN_CTX_REG(reg_state, CTX_RING_BUFFER_CONTROL,
RING_CTL(engine->mmio_base),
- ((ring->size - PAGE_SIZE) & RING_NR_PAGES) | RING_VALID);
+ RING_CTL_SIZE(ring->size) | RING_VALID);
ASSIGN_CTX_REG(reg_state, CTX_BB_HEAD_U,
RING_BBADDR_UDW(engine->mmio_base), 0);
ASSIGN_CTX_REG(reg_state, CTX_BB_HEAD_L,
@@ -2029,6 +2019,36 @@ populate_lr_context(struct i915_gem_context *ctx,
ASSIGN_CTX_REG(reg_state, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
make_rpcs(dev_priv));
}
+}
+
+static int
+populate_lr_context(struct i915_gem_context *ctx,
+ struct drm_i915_gem_object *ctx_obj,
+ struct intel_engine_cs *engine,
+ struct intel_ring *ring)
+{
+ void *vaddr;
+ int ret;
+
+ ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true);
+ if (ret) {
+ DRM_DEBUG_DRIVER("Could not set to CPU domain\n");
+ return ret;
+ }
+
+ vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
+ if (IS_ERR(vaddr)) {
+ ret = PTR_ERR(vaddr);
+ DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
+ return ret;
+ }
+ ctx_obj->dirty = true;
+
+ /* The second page of the context object contains some fields which must
+ * be set up prior to the first execution. */
+
+ execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE,
+ ctx, engine, ring);
i915_gem_object_unpin_map(ctx_obj);
@@ -2129,30 +2149,43 @@ error_deref_obj:
void intel_lr_context_resume(struct drm_i915_private *dev_priv)
{
- struct i915_gem_context *ctx = dev_priv->kernel_context;
struct intel_engine_cs *engine;
+ struct i915_gem_context *ctx;
+ enum intel_engine_id id;
+
+ /* Because we emit WA_TAIL_DWORDS there may be a disparity
+ * between our bookkeeping in ce->ring->head and ce->ring->tail and
+ * that stored in context. As we only write new commands from
+ * ce->ring->tail onwards, everything before that is junk. If the GPU
+ * starts reading from its RING_HEAD from the context, it may try to
+ * execute that junk and die.
+ *
+ * So to avoid that we reset the context images upon resume. For
+ * simplicity, we just zero everything out.
+ */
+ list_for_each_entry(ctx, &dev_priv->context_list, link) {
+ for_each_engine(engine, dev_priv, id) {
+ struct intel_context *ce = &ctx->engine[engine->id];
+ u32 *reg;
- for_each_engine(engine, dev_priv) {
- struct intel_context *ce = &ctx->engine[engine->id];
- void *vaddr;
- uint32_t *reg_state;
-
- if (!ce->state)
- continue;
-
- vaddr = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB);
- if (WARN_ON(IS_ERR(vaddr)))
- continue;
+ if (!ce->state)
+ continue;
- reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
+ reg = i915_gem_object_pin_map(ce->state->obj,
+ I915_MAP_WB);
+ if (WARN_ON(IS_ERR(reg)))
+ continue;
- reg_state[CTX_RING_HEAD+1] = 0;
- reg_state[CTX_RING_TAIL+1] = 0;
+ reg += LRC_STATE_PN * PAGE_SIZE / sizeof(*reg);
+ reg[CTX_RING_HEAD+1] = 0;
+ reg[CTX_RING_TAIL+1] = 0;
- ce->state->obj->dirty = true;
- i915_gem_object_unpin_map(ce->state->obj);
+ ce->state->obj->dirty = true;
+ i915_gem_object_unpin_map(ce->state->obj);
- ce->ring->head = 0;
- ce->ring->tail = 0;
+ ce->ring->head = ce->ring->tail = 0;
+ ce->ring->last_retired_head = -1;
+ intel_ring_update_space(ce->ring);
+ }
}
}