[v9,21/21] drm/i915: Watchdog timeout: Export media reset count from GuC to debugfs

Submitted by Michel Thierry on June 15, 2017, 8:18 p.m.

Details

Message ID 20170615201828.23144-22-michel.thierry@intel.com
State New
Headers show
Series "Gen8+ engine-reset" ( rev: 13 ) in Intel GFX

Browsing this patch as part of:
"Gen8+ engine-reset" rev 13 in Intel GFX
<< prev patch [21/21] next patch >>

Commit Message

Michel Thierry June 15, 2017, 8:18 p.m.
From firmware v8.8, GuC provides the count of media engine resets
(watchdog timeout). This information is available in the GuC shared
context data struct, which resides in the first page of the default
(kernel) lrc context.

Since GuC handled engine resets are transparent for kernel and user,
provide a simple debugfs entry to see the number of times media reset
has happened.

v2: Remove unnecessary struct_mutex, _get_dirty_page and kmap_atomic;
use READ_ONCE. (Chris)

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Michel Thierry <michel.thierry@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c   | 22 ++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_guc_fwif.h | 18 ++++++++++++++++++
 2 files changed, 40 insertions(+)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index e9c5527b7bff..13353e7c397f 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1403,6 +1403,26 @@  static int i915_hangcheck_info(struct seq_file *m, void *unused)
 	return 0;
 }
 
+static u32 i915_watchdog_reset_count(struct drm_i915_private *dev_priv)
+{
+	struct i915_gem_context *ctx;
+	struct page *page;
+	struct guc_shared_ctx_data *guc_shared_data;
+	u32 guc_media_reset_count;
+
+	if (!i915.enable_guc_submission)
+		return 0;
+
+	ctx = dev_priv->kernel_context;
+	page = i915_gem_object_get_page(ctx->engine[RCS].state->obj,
+					LRC_GUCSHR_PN);
+	guc_shared_data = kmap(page);
+	guc_media_reset_count = READ_ONCE(guc_shared_data->media_reset_count);
+	kunmap(page);
+
+	return guc_media_reset_count;
+}
+
 static int i915_reset_info(struct seq_file *m, void *unused)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
@@ -1411,6 +1431,8 @@  static int i915_reset_info(struct seq_file *m, void *unused)
 	enum intel_engine_id id;
 
 	seq_printf(m, "full gpu reset = %u\n", i915_reset_count(error));
+	seq_printf(m, "GuC watchdog/media reset = %u\n",
+		   i915_watchdog_reset_count(dev_priv));
 
 	for_each_engine(engine, dev_priv, id) {
 		seq_printf(m, "%s = %u\n", engine->name,
diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h
index 2a42ef5f40f0..526c70614b38 100644
--- a/drivers/gpu/drm/i915/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/intel_guc_fwif.h
@@ -543,6 +543,24 @@  union guc_log_control {
 	u32 value;
 } __packed;
 
+/* GuC Shared Context Data Struct */
+struct guc_shared_ctx_data {
+	u32 addr_of_last_preempted_data_low;
+	u32 addr_of_last_preempted_data_high;
+	u32 addr_of_last_preempted_data_high_tmp;
+	u32 padding;
+	u32 is_mapped_to_proxy;
+	u32 proxy_ctx_id;
+	u32 engine_reset_ctx_id;
+	u32 media_reset_count;
+	u32 reserved[8];
+	u32 uk_last_ctx_switch_reason;
+	u32 was_reset;
+	u32 lrca_gpu_addr;
+	u32 execlist_ctx;
+	u32 reserved1[32];
+} __packed;
+
 /* This Action will be programmed in C180 - SOFT_SCRATCH_O_REG */
 enum intel_guc_action {
 	INTEL_GUC_ACTION_DEFAULT = 0x0,