[v5,07/18] drm/i915/tdr: Add engine reset count to error state

Submitted by Michel Thierry on March 25, 2017, 1:29 a.m.

Details

Message ID 20170325013010.36244-8-michel.thierry@intel.com
State New
Headers show
Series "Gen8+ engine-reset" ( rev: 1 ) in Intel GFX

Browsing this patch as part of:
"Gen8+ engine-reset" rev 1 in Intel GFX
<< prev patch [7/18] next patch >>

Commit Message

Michel Thierry March 25, 2017, 1:29 a.m.
From: Arun Siluvery <arun.siluvery@linux.intel.com>

Driver maintains count of how many times a given engine is reset, useful to
capture this in error state also. It gives an idea of how engine is coping
up with the workloads it is executing before this error state.

A follow-up patch will provide this information in debugfs.

v2: s/engine_reset/reset_engine/ (Chris)
    Define count as unsigned int (Tvrtko)

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
Signed-off-by: Michel Thierry <michel.thierry@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c       | 2 ++
 drivers/gpu/drm/i915/i915_drv.h       | 8 ++++++++
 drivers/gpu/drm/i915/i915_gpu_error.c | 3 +++
 3 files changed, 13 insertions(+)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 9c7bdcf03c13..a111b39bbc12 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1967,6 +1967,8 @@  int i915_reset_engine(struct intel_engine_cs *engine)
 	if (ret)
 		goto error;
 
+	error->reset_engine_count[engine->id]++;
+
 wakeup:
 	enable_irq(dev_priv->drm.irq);
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 67201ecd98e5..fbb4f200756a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -999,6 +999,7 @@  struct i915_gpu_state {
 		enum intel_engine_hangcheck_action hangcheck_action;
 		struct i915_address_space *vm;
 		int num_requests;
+		u32 reset_count;
 
 		/* position of active request inside the ring */
 		u32 rq_head, rq_post, rq_tail;
@@ -1639,6 +1640,7 @@  struct i915_gpu_error {
 
 	/* if available, engine-specific reset is tried before full gpu reset */
 	u32 reset_engine_mask;
+	u32 reset_engine_count[I915_NUM_ENGINES];
 
 	/**
 	 * Waitqueue to signal when a hang is detected. Used to for waiters
@@ -3450,6 +3452,12 @@  static inline u32 i915_reset_count(struct i915_gpu_error *error)
 	return READ_ONCE(error->reset_count);
 }
 
+static inline u32 i915_reset_engine_count(struct i915_gpu_error *error,
+					  struct intel_engine_cs *engine)
+{
+	return READ_ONCE(error->reset_engine_count[engine->id]);
+}
+
 int i915_gem_reset_prepare(struct drm_i915_private *dev_priv,
 			   unsigned int engine_mask);
 void i915_gem_reset(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 5099b3599c34..5d015bcc7484 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -463,6 +463,7 @@  static void error_print_engine(struct drm_i915_error_state_buf *m,
 	err_printf(m, "  hangcheck action timestamp: %lu, %u ms ago\n",
 		   ee->hangcheck_timestamp,
 		   jiffies_to_msecs(jiffies - ee->hangcheck_timestamp));
+	err_printf(m, "  engine reset count: %u\n", ee->reset_count);
 
 	error_print_request(m, "  ELSP[0]: ", &ee->execlist[0]);
 	error_print_request(m, "  ELSP[1]: ", &ee->execlist[1]);
@@ -1236,6 +1237,8 @@  static void error_record_engine_registers(struct i915_gpu_state *error,
 	ee->hangcheck_timestamp = engine->hangcheck.action_timestamp;
 	ee->hangcheck_action = engine->hangcheck.action;
 	ee->hangcheck_stalled = engine->hangcheck.stalled;
+	ee->reset_count = i915_reset_engine_count(&dev_priv->gpu_error,
+						  engine);
 
 	if (USES_PPGTT(dev_priv)) {
 		int i;