diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2020-01-10 13:30:56 +0100 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2020-01-10 16:34:33 +0100 |
commit | 742379c0c4001fd2a6e02005c1ffa1ff611b28fa (patch) | |
tree | 6b553daa422f74ff42de00abc3b08710a886f19b /drivers/gpu/drm/i915/i915_gpu_error.h | |
parent | drm/i915/gt: Mark ring->vma as active while pinned (diff) | |
download | linux-742379c0c4001fd2a6e02005c1ffa1ff611b28fa.tar.xz linux-742379c0c4001fd2a6e02005c1ffa1ff611b28fa.zip |
drm/i915: Start chopping up the GPU error capture
In the near future, we will want to start a GPU error capture from a new
context, from inside the softirq region of a forced preemption. To do
so requires us to break up the monolithic error capture to provide new
entry points with finer control; in particular focusing on one
engine/gt, and being able to compose an error state from little pieces
of HW capture.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Andi Shyti <andi.shyti@intel.com>
Acked-by: Andi Shyti <andi.shyti@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200110123059.1348712-1-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gpu_error.h')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gpu_error.h | 328 |
1 files changed, 209 insertions, 119 deletions
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index 5d2c3372ff99..0df9d8c32056 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -25,43 +25,105 @@ #include "i915_scheduler.h" struct drm_i915_private; +struct i915_vma_compress; +struct intel_engine_capture_vma; struct intel_overlay_error_state; struct intel_display_error_state; -struct i915_gpu_state { - struct kref ref; - ktime_t time; - ktime_t boottime; - ktime_t uptime; - unsigned long capture; +struct i915_vma_coredump { + struct i915_vma_coredump *next; - struct drm_i915_private *i915; + char name[20]; + + u64 gtt_offset; + u64 gtt_size; + u32 gtt_page_sizes; + + int num_pages; + int page_count; + int unused; + u32 *pages[0]; +}; + +struct i915_request_coredump { + unsigned long flags; + pid_t pid; + u32 context; + u32 seqno; + u32 start; + u32 head; + u32 tail; + struct i915_sched_attr sched_attr; +}; + +struct intel_engine_coredump { + const struct intel_engine_cs *engine; - char error_msg[128]; bool simulated; - bool awake; - bool wakelock; - bool suspended; - int iommu; + int num_requests; u32 reset_count; - u32 suspend_count; - struct intel_device_info device_info; - struct intel_runtime_info runtime_info; - struct intel_driver_caps driver_caps; - struct i915_params params; - struct i915_error_uc { - struct intel_uc_fw guc_fw; - struct intel_uc_fw huc_fw; - struct drm_i915_error_object *guc_log; - } uc; + /* position of active request inside the ring */ + u32 rq_head, rq_post, rq_tail; + + /* our own tracking of ring head and tail */ + u32 cpu_ring_head; + u32 cpu_ring_tail; + + /* Register state */ + u32 ccid; + u32 start; + u32 tail; + u32 head; + u32 ctl; + u32 mode; + u32 hws; + u32 ipeir; + u32 ipehr; + u32 bbstate; + u32 instpm; + u32 instps; + u64 bbaddr; + u64 acthd; + u32 fault_reg; + u64 faddr; + u32 rc_psmi; /* sleep state */ + struct intel_instdone instdone; + + struct i915_gem_context_coredump { + char comm[TASK_COMM_LEN]; + pid_t pid; + int active; + int guilty; + struct i915_sched_attr sched_attr; + } context; + + struct i915_vma_coredump *vma; + + struct i915_request_coredump *requests, execlist[EXECLIST_MAX_PORTS]; + unsigned int num_ports; + + struct { + u32 gfx_mode; + union { + u64 pdp[4]; + u32 pp_dir_base; + }; + } vm_info; + + struct intel_engine_coredump *next; +}; + +struct intel_gt_coredump { + const struct intel_gt *_gt; + bool awake; + bool simulated; /* Generic register state */ u32 eir; u32 pgtbl_er; u32 ier; u32 gtier[6], ngtier; - u32 ccid; u32 derrmr; u32 forcewake; u32 error; /* gen6+ */ @@ -80,91 +142,45 @@ struct i915_gpu_state { u32 nfence; u64 fence[I915_MAX_NUM_FENCES]; + + struct intel_engine_coredump *engine; + + struct intel_uc_coredump { + struct intel_uc_fw guc_fw; + struct intel_uc_fw huc_fw; + struct i915_vma_coredump *guc_log; + } *uc; + + struct intel_gt_coredump *next; +}; + +struct i915_gpu_coredump { + struct kref ref; + ktime_t time; + ktime_t boottime; + ktime_t uptime; + unsigned long capture; + + struct drm_i915_private *i915; + + struct intel_gt_coredump *gt; + + char error_msg[128]; + bool simulated; + bool wakelock; + bool suspended; + int iommu; + u32 reset_count; + u32 suspend_count; + + struct intel_device_info device_info; + struct intel_runtime_info runtime_info; + struct intel_driver_caps driver_caps; + struct i915_params params; + struct intel_overlay_error_state *overlay; struct intel_display_error_state *display; - struct drm_i915_error_engine { - const struct intel_engine_cs *engine; - - /* Software tracked state */ - bool idle; - int num_requests; - u32 reset_count; - - /* position of active request inside the ring */ - u32 rq_head, rq_post, rq_tail; - - /* our own tracking of ring head and tail */ - u32 cpu_ring_head; - u32 cpu_ring_tail; - - /* Register state */ - u32 start; - u32 tail; - u32 head; - u32 ctl; - u32 mode; - u32 hws; - u32 ipeir; - u32 ipehr; - u32 bbstate; - u32 instpm; - u32 instps; - u64 bbaddr; - u64 acthd; - u32 fault_reg; - u64 faddr; - u32 rc_psmi; /* sleep state */ - struct intel_instdone instdone; - - struct drm_i915_error_context { - char comm[TASK_COMM_LEN]; - pid_t pid; - int active; - int guilty; - struct i915_sched_attr sched_attr; - } context; - - struct drm_i915_error_object { - u64 gtt_offset; - u64 gtt_size; - u32 gtt_page_sizes; - int num_pages; - int page_count; - int unused; - u32 *pages[0]; - } *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page; - - struct drm_i915_error_object **user_bo; - long user_bo_count; - - struct drm_i915_error_object *wa_ctx; - struct drm_i915_error_object *default_state; - - struct drm_i915_error_request { - unsigned long flags; - long jiffies; - pid_t pid; - u32 context; - u32 seqno; - u32 start; - u32 head; - u32 tail; - struct i915_sched_attr sched_attr; - } *requests, execlist[EXECLIST_MAX_PORTS]; - unsigned int num_ports; - - struct { - u32 gfx_mode; - union { - u64 pdp[4]; - u32 pp_dir_base; - }; - } vm_info; - - struct drm_i915_error_engine *next; - } *engine; - struct scatterlist *sgl, *fit; }; @@ -172,7 +188,7 @@ struct i915_gpu_error { /* For reset and error_state handling. */ spinlock_t lock; /* Protected by the above dev->gpu_error.lock. */ - struct i915_gpu_state *first_error; + struct i915_gpu_coredump *first_error; atomic_t pending_fb_pin; @@ -200,29 +216,54 @@ struct drm_i915_error_state_buf { __printf(2, 3) void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...); -struct i915_gpu_state *i915_capture_gpu_state(struct drm_i915_private *i915); -void i915_capture_error_state(struct drm_i915_private *dev_priv, - intel_engine_mask_t engine_mask, - const char *error_msg); +struct i915_gpu_coredump *i915_gpu_coredump(struct drm_i915_private *i915); +void i915_capture_error_state(struct drm_i915_private *dev_priv); + +struct i915_gpu_coredump * +i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp); + +struct intel_gt_coredump * +intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp); + +struct intel_engine_coredump * +intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp); -static inline struct i915_gpu_state * -i915_gpu_state_get(struct i915_gpu_state *gpu) +struct intel_engine_capture_vma * +intel_engine_coredump_add_request(struct intel_engine_coredump *ee, + struct i915_request *rq, + gfp_t gfp); + +void intel_engine_coredump_add_vma(struct intel_engine_coredump *ee, + struct intel_engine_capture_vma *capture, + struct i915_vma_compress *compress); + +struct i915_vma_compress * +i915_vma_capture_prepare(struct intel_gt_coredump *gt); + +void i915_vma_capture_finish(struct intel_gt_coredump *gt, + struct i915_vma_compress *compress); + +void i915_error_state_store(struct i915_gpu_coredump *error); + +static inline struct i915_gpu_coredump * +i915_gpu_coredump_get(struct i915_gpu_coredump *gpu) { kref_get(&gpu->ref); return gpu; } -ssize_t i915_gpu_state_copy_to_buffer(struct i915_gpu_state *error, - char *buf, loff_t offset, size_t count); +ssize_t +i915_gpu_coredump_copy_to_buffer(struct i915_gpu_coredump *error, + char *buf, loff_t offset, size_t count); -void __i915_gpu_state_free(struct kref *kref); -static inline void i915_gpu_state_put(struct i915_gpu_state *gpu) +void __i915_gpu_coredump_free(struct kref *kref); +static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu) { if (gpu) - kref_put(&gpu->ref, __i915_gpu_state_free); + kref_put(&gpu->ref, __i915_gpu_coredump_free); } -struct i915_gpu_state *i915_first_error_state(struct drm_i915_private *i915); +struct i915_gpu_coredump *i915_first_error_state(struct drm_i915_private *i915); void i915_reset_error_state(struct drm_i915_private *i915); void i915_disable_error_state(struct drm_i915_private *i915, int err); @@ -234,7 +275,56 @@ static inline void i915_capture_error_state(struct drm_i915_private *dev_priv, { } -static inline struct i915_gpu_state * +static inline struct i915_gpu_coredump * +i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp) +{ + return NULL; +} + +static inline struct intel_gt_coredump * +intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp) +{ + return NULL; +} + +static inline struct intel_engine_coredump * +intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp) +{ + return NULL; +} + +static inline struct intel_engine_capture_vma * +intel_engine_coredump_add_request(struct intel_engine_coredump *ee, + struct i915_request *rq, + gfp_t gfp) +{ + return NULL; +} + +static inline void +intel_engine_coredump_add_vma(struct intel_engine_coredump *ee, + struct intel_engine_capture_vma *capture, + struct i915_vma_compress *compress) +{ +} + +static inline struct i915_vma_compress * +i915_vma_compress_prepare(struct intel_gt_coredump *gt) +{ + return NULL; +} + +void i915_vma_compress_prepare(struct i915_vma_compress *compress) +{ +} + +static inline void +i915_error_state_store(struct drm_i915_private *i915, + struct i915_gpu_coredump *error) +{ +} + +static inline struct i915_gpu_coredump * i915_first_error_state(struct drm_i915_private *i915) { return ERR_PTR(-ENODEV); |