summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_gpu_error.h
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2020-01-10 13:30:56 +0100
committerChris Wilson <chris@chris-wilson.co.uk>2020-01-10 16:34:33 +0100
commit742379c0c4001fd2a6e02005c1ffa1ff611b28fa (patch)
tree6b553daa422f74ff42de00abc3b08710a886f19b /drivers/gpu/drm/i915/i915_gpu_error.h
parentdrm/i915/gt: Mark ring->vma as active while pinned (diff)
downloadlinux-742379c0c4001fd2a6e02005c1ffa1ff611b28fa.tar.xz
linux-742379c0c4001fd2a6e02005c1ffa1ff611b28fa.zip
drm/i915: Start chopping up the GPU error capture
In the near future, we will want to start a GPU error capture from a new context, from inside the softirq region of a forced preemption. To do so requires us to break up the monolithic error capture to provide new entry points with finer control; in particular focusing on one engine/gt, and being able to compose an error state from little pieces of HW capture. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Andi Shyti <andi.shyti@intel.com> Acked-by: Andi Shyti <andi.shyti@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20200110123059.1348712-1-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gpu_error.h')
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.h328
1 files changed, 209 insertions, 119 deletions
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 5d2c3372ff99..0df9d8c32056 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -25,43 +25,105 @@
#include "i915_scheduler.h"
struct drm_i915_private;
+struct i915_vma_compress;
+struct intel_engine_capture_vma;
struct intel_overlay_error_state;
struct intel_display_error_state;
-struct i915_gpu_state {
- struct kref ref;
- ktime_t time;
- ktime_t boottime;
- ktime_t uptime;
- unsigned long capture;
+struct i915_vma_coredump {
+ struct i915_vma_coredump *next;
- struct drm_i915_private *i915;
+ char name[20];
+
+ u64 gtt_offset;
+ u64 gtt_size;
+ u32 gtt_page_sizes;
+
+ int num_pages;
+ int page_count;
+ int unused;
+ u32 *pages[0];
+};
+
+struct i915_request_coredump {
+ unsigned long flags;
+ pid_t pid;
+ u32 context;
+ u32 seqno;
+ u32 start;
+ u32 head;
+ u32 tail;
+ struct i915_sched_attr sched_attr;
+};
+
+struct intel_engine_coredump {
+ const struct intel_engine_cs *engine;
- char error_msg[128];
bool simulated;
- bool awake;
- bool wakelock;
- bool suspended;
- int iommu;
+ int num_requests;
u32 reset_count;
- u32 suspend_count;
- struct intel_device_info device_info;
- struct intel_runtime_info runtime_info;
- struct intel_driver_caps driver_caps;
- struct i915_params params;
- struct i915_error_uc {
- struct intel_uc_fw guc_fw;
- struct intel_uc_fw huc_fw;
- struct drm_i915_error_object *guc_log;
- } uc;
+ /* position of active request inside the ring */
+ u32 rq_head, rq_post, rq_tail;
+
+ /* our own tracking of ring head and tail */
+ u32 cpu_ring_head;
+ u32 cpu_ring_tail;
+
+ /* Register state */
+ u32 ccid;
+ u32 start;
+ u32 tail;
+ u32 head;
+ u32 ctl;
+ u32 mode;
+ u32 hws;
+ u32 ipeir;
+ u32 ipehr;
+ u32 bbstate;
+ u32 instpm;
+ u32 instps;
+ u64 bbaddr;
+ u64 acthd;
+ u32 fault_reg;
+ u64 faddr;
+ u32 rc_psmi; /* sleep state */
+ struct intel_instdone instdone;
+
+ struct i915_gem_context_coredump {
+ char comm[TASK_COMM_LEN];
+ pid_t pid;
+ int active;
+ int guilty;
+ struct i915_sched_attr sched_attr;
+ } context;
+
+ struct i915_vma_coredump *vma;
+
+ struct i915_request_coredump *requests, execlist[EXECLIST_MAX_PORTS];
+ unsigned int num_ports;
+
+ struct {
+ u32 gfx_mode;
+ union {
+ u64 pdp[4];
+ u32 pp_dir_base;
+ };
+ } vm_info;
+
+ struct intel_engine_coredump *next;
+};
+
+struct intel_gt_coredump {
+ const struct intel_gt *_gt;
+ bool awake;
+ bool simulated;
/* Generic register state */
u32 eir;
u32 pgtbl_er;
u32 ier;
u32 gtier[6], ngtier;
- u32 ccid;
u32 derrmr;
u32 forcewake;
u32 error; /* gen6+ */
@@ -80,91 +142,45 @@ struct i915_gpu_state {
u32 nfence;
u64 fence[I915_MAX_NUM_FENCES];
+
+ struct intel_engine_coredump *engine;
+
+ struct intel_uc_coredump {
+ struct intel_uc_fw guc_fw;
+ struct intel_uc_fw huc_fw;
+ struct i915_vma_coredump *guc_log;
+ } *uc;
+
+ struct intel_gt_coredump *next;
+};
+
+struct i915_gpu_coredump {
+ struct kref ref;
+ ktime_t time;
+ ktime_t boottime;
+ ktime_t uptime;
+ unsigned long capture;
+
+ struct drm_i915_private *i915;
+
+ struct intel_gt_coredump *gt;
+
+ char error_msg[128];
+ bool simulated;
+ bool wakelock;
+ bool suspended;
+ int iommu;
+ u32 reset_count;
+ u32 suspend_count;
+
+ struct intel_device_info device_info;
+ struct intel_runtime_info runtime_info;
+ struct intel_driver_caps driver_caps;
+ struct i915_params params;
+
struct intel_overlay_error_state *overlay;
struct intel_display_error_state *display;
- struct drm_i915_error_engine {
- const struct intel_engine_cs *engine;
-
- /* Software tracked state */
- bool idle;
- int num_requests;
- u32 reset_count;
-
- /* position of active request inside the ring */
- u32 rq_head, rq_post, rq_tail;
-
- /* our own tracking of ring head and tail */
- u32 cpu_ring_head;
- u32 cpu_ring_tail;
-
- /* Register state */
- u32 start;
- u32 tail;
- u32 head;
- u32 ctl;
- u32 mode;
- u32 hws;
- u32 ipeir;
- u32 ipehr;
- u32 bbstate;
- u32 instpm;
- u32 instps;
- u64 bbaddr;
- u64 acthd;
- u32 fault_reg;
- u64 faddr;
- u32 rc_psmi; /* sleep state */
- struct intel_instdone instdone;
-
- struct drm_i915_error_context {
- char comm[TASK_COMM_LEN];
- pid_t pid;
- int active;
- int guilty;
- struct i915_sched_attr sched_attr;
- } context;
-
- struct drm_i915_error_object {
- u64 gtt_offset;
- u64 gtt_size;
- u32 gtt_page_sizes;
- int num_pages;
- int page_count;
- int unused;
- u32 *pages[0];
- } *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
-
- struct drm_i915_error_object **user_bo;
- long user_bo_count;
-
- struct drm_i915_error_object *wa_ctx;
- struct drm_i915_error_object *default_state;
-
- struct drm_i915_error_request {
- unsigned long flags;
- long jiffies;
- pid_t pid;
- u32 context;
- u32 seqno;
- u32 start;
- u32 head;
- u32 tail;
- struct i915_sched_attr sched_attr;
- } *requests, execlist[EXECLIST_MAX_PORTS];
- unsigned int num_ports;
-
- struct {
- u32 gfx_mode;
- union {
- u64 pdp[4];
- u32 pp_dir_base;
- };
- } vm_info;
-
- struct drm_i915_error_engine *next;
- } *engine;
-
struct scatterlist *sgl, *fit;
};
@@ -172,7 +188,7 @@ struct i915_gpu_error {
/* For reset and error_state handling. */
spinlock_t lock;
/* Protected by the above dev->gpu_error.lock. */
- struct i915_gpu_state *first_error;
+ struct i915_gpu_coredump *first_error;
atomic_t pending_fb_pin;
@@ -200,29 +216,54 @@ struct drm_i915_error_state_buf {
__printf(2, 3)
void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
-struct i915_gpu_state *i915_capture_gpu_state(struct drm_i915_private *i915);
-void i915_capture_error_state(struct drm_i915_private *dev_priv,
- intel_engine_mask_t engine_mask,
- const char *error_msg);
+struct i915_gpu_coredump *i915_gpu_coredump(struct drm_i915_private *i915);
+void i915_capture_error_state(struct drm_i915_private *dev_priv);
+
+struct i915_gpu_coredump *
+i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp);
+
+struct intel_gt_coredump *
+intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp);
+
+struct intel_engine_coredump *
+intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp);
-static inline struct i915_gpu_state *
-i915_gpu_state_get(struct i915_gpu_state *gpu)
+struct intel_engine_capture_vma *
+intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
+ struct i915_request *rq,
+ gfp_t gfp);
+
+void intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
+ struct intel_engine_capture_vma *capture,
+ struct i915_vma_compress *compress);
+
+struct i915_vma_compress *
+i915_vma_capture_prepare(struct intel_gt_coredump *gt);
+
+void i915_vma_capture_finish(struct intel_gt_coredump *gt,
+ struct i915_vma_compress *compress);
+
+void i915_error_state_store(struct i915_gpu_coredump *error);
+
+static inline struct i915_gpu_coredump *
+i915_gpu_coredump_get(struct i915_gpu_coredump *gpu)
{
kref_get(&gpu->ref);
return gpu;
}
-ssize_t i915_gpu_state_copy_to_buffer(struct i915_gpu_state *error,
- char *buf, loff_t offset, size_t count);
+ssize_t
+i915_gpu_coredump_copy_to_buffer(struct i915_gpu_coredump *error,
+ char *buf, loff_t offset, size_t count);
-void __i915_gpu_state_free(struct kref *kref);
-static inline void i915_gpu_state_put(struct i915_gpu_state *gpu)
+void __i915_gpu_coredump_free(struct kref *kref);
+static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu)
{
if (gpu)
- kref_put(&gpu->ref, __i915_gpu_state_free);
+ kref_put(&gpu->ref, __i915_gpu_coredump_free);
}
-struct i915_gpu_state *i915_first_error_state(struct drm_i915_private *i915);
+struct i915_gpu_coredump *i915_first_error_state(struct drm_i915_private *i915);
void i915_reset_error_state(struct drm_i915_private *i915);
void i915_disable_error_state(struct drm_i915_private *i915, int err);
@@ -234,7 +275,56 @@ static inline void i915_capture_error_state(struct drm_i915_private *dev_priv,
{
}
-static inline struct i915_gpu_state *
+static inline struct i915_gpu_coredump *
+i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp)
+{
+ return NULL;
+}
+
+static inline struct intel_gt_coredump *
+intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp)
+{
+ return NULL;
+}
+
+static inline struct intel_engine_coredump *
+intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp)
+{
+ return NULL;
+}
+
+static inline struct intel_engine_capture_vma *
+intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
+ struct i915_request *rq,
+ gfp_t gfp)
+{
+ return NULL;
+}
+
+static inline void
+intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
+ struct intel_engine_capture_vma *capture,
+ struct i915_vma_compress *compress)
+{
+}
+
+static inline struct i915_vma_compress *
+i915_vma_compress_prepare(struct intel_gt_coredump *gt)
+{
+ return NULL;
+}
+
+void i915_vma_compress_prepare(struct i915_vma_compress *compress)
+{
+}
+
+static inline void
+i915_error_state_store(struct drm_i915_private *i915,
+ struct i915_gpu_coredump *error)
+{
+}
+
+static inline struct i915_gpu_coredump *
i915_first_error_state(struct drm_i915_private *i915)
{
return ERR_PTR(-ENODEV);