[1/3] drm/i915: Capture dmc firmware information before reset

Submitted by Michel Thierry on Feb. 24, 2017, 7:05 p.m.

Details

Message ID 20170224190530.31482-1-michel.thierry@intel.com
State New
Headers show
Series "Series without cover letter" ( rev: 1 ) in Intel GFX

Not browsing as part of any series.

Commit Message

Michel Thierry Feb. 24, 2017, 7:05 p.m.
The firmware may change between the hang and cat /sys/class/drm/card0/error

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Michel Thierry <michel.thierry@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h       |  4 ++++
 drivers/gpu/drm/i915/i915_gpu_error.c | 22 ++++++++++++++++------
 2 files changed, 20 insertions(+), 6 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a74b87b1b5a9..3b0cff05f5f7 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -953,6 +953,10 @@  struct i915_gpu_state {
 	u32 gab_ctl;
 	u32 gfx_mode;
 
+	/* Firmware load state */
+	u32 dmc_loaded;
+	u32 dmc_version;
+
 	u32 nfence;
 	u64 fence[I915_MAX_NUM_FENCES];
 	struct intel_overlay_error_state *overlay;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 2b1d15668192..ae6f0092b046 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -623,13 +623,10 @@  int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 	err_printf(m, "IOMMU enabled?: %d\n", error->iommu);
 
 	if (HAS_CSR(dev_priv)) {
-		struct intel_csr *csr = &dev_priv->csr;
-
-		err_printf(m, "DMC loaded: %s\n",
-			   yesno(csr->dmc_payload != NULL));
+		err_printf(m, "DMC loaded: %s\n", yesno(error->dmc_loaded));
 		err_printf(m, "DMC fw version: %d.%d\n",
-			   CSR_VERSION_MAJOR(csr->version),
-			   CSR_VERSION_MINOR(csr->version));
+			   CSR_VERSION_MAJOR(error->dmc_version),
+			   CSR_VERSION_MINOR(error->dmc_version));
 	}
 
 	err_printf(m, "EIR: 0x%08x\n", error->eir);
@@ -1585,6 +1582,18 @@  static void i915_capture_reg_state(struct drm_i915_private *dev_priv,
 	error->pgtbl_er = I915_READ(PGTBL_ER);
 }
 
+/* Capture all firmware related information. */
+static void i915_capture_fw_state(struct drm_i915_private *dev_priv,
+				  struct i915_gpu_state *error)
+{
+	if (HAS_CSR(dev_priv)) {
+		struct intel_csr *csr = &dev_priv->csr;
+
+		error->dmc_loaded = (csr->dmc_payload != NULL);
+		error->dmc_version = csr->version;
+	}
+}
+
 static void i915_error_capture_msg(struct drm_i915_private *dev_priv,
 				   struct i915_gpu_state *error,
 				   u32 engine_mask,
@@ -1650,6 +1659,7 @@  static int capture(void *data)
 
 	i915_capture_gen_state(error->i915, error);
 	i915_capture_reg_state(error->i915, error);
+	i915_capture_fw_state(error->i915, error);
 	i915_gem_record_fences(error->i915, error);
 	i915_gem_record_rings(error->i915, error);
 	i915_capture_active_buffers(error->i915, error);

Comments

On Fri, Feb 24, 2017 at 11:05:28AM -0800, Michel Thierry wrote:
> The firmware may change between the hang and cat /sys/class/drm/card0/error
> 
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Signed-off-by: Michel Thierry <michel.thierry@intel.com>
> ---

Reviewed-by: Michal Wajdeczko <michal.wajdeczko@intel.com>

-Michal
On Fri, Feb 24, 2017 at 11:05:28AM -0800, Michel Thierry wrote:
> The firmware may change between the hang and cat /sys/class/drm/card0/error
> 
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Signed-off-by: Michel Thierry <michel.thierry@intel.com>

Looks sane...

> ---
>  drivers/gpu/drm/i915/i915_drv.h       |  4 ++++
>  drivers/gpu/drm/i915/i915_gpu_error.c | 22 ++++++++++++++++------
>  2 files changed, 20 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index a74b87b1b5a9..3b0cff05f5f7 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -953,6 +953,10 @@ struct i915_gpu_state {
>  	u32 gab_ctl;
>  	u32 gfx_mode;
>  
> +	/* Firmware load state */
> +	u32 dmc_loaded;

Can dmc_version be zero? i.e. Does !0 => dmc_loaded?

> +	u32 dmc_version;
On 2/24/2017 11:22 AM, Chris Wilson wrote:
> On Fri, Feb 24, 2017 at 11:05:28AM -0800, Michel Thierry wrote:
>> The firmware may change between the hang and cat /sys/class/drm/card0/error
>>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Signed-off-by: Michel Thierry <michel.thierry@intel.com>
>
> Looks sane...
>
>> ---
>>  drivers/gpu/drm/i915/i915_drv.h       |  4 ++++
>>  drivers/gpu/drm/i915/i915_gpu_error.c | 22 ++++++++++++++++------
>>  2 files changed, 20 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index a74b87b1b5a9..3b0cff05f5f7 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -953,6 +953,10 @@ struct i915_gpu_state {
>>  	u32 gab_ctl;
>>  	u32 gfx_mode;
>>
>> +	/* Firmware load state */
>> +	u32 dmc_loaded;
>
> Can dmc_version be zero? i.e. Does !0 => dmc_loaded?
>

Nothing would stop them to build a v0.0 firmware, but in this case, we 
can save one variable.
On Fri, Feb 24, 2017 at 11:25:29AM -0800, Michel Thierry wrote:
> On 2/24/2017 11:22 AM, Chris Wilson wrote:
> >On Fri, Feb 24, 2017 at 11:05:28AM -0800, Michel Thierry wrote:
> >>The firmware may change between the hang and cat /sys/class/drm/card0/error
> >>
> >>Cc: Chris Wilson <chris@chris-wilson.co.uk>
> >>Signed-off-by: Michel Thierry <michel.thierry@intel.com>
> >
> >Looks sane...
> >
> >>---
> >> drivers/gpu/drm/i915/i915_drv.h       |  4 ++++
> >> drivers/gpu/drm/i915/i915_gpu_error.c | 22 ++++++++++++++++------
> >> 2 files changed, 20 insertions(+), 6 deletions(-)
> >>
> >>diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> >>index a74b87b1b5a9..3b0cff05f5f7 100644
> >>--- a/drivers/gpu/drm/i915/i915_drv.h
> >>+++ b/drivers/gpu/drm/i915/i915_drv.h
> >>@@ -953,6 +953,10 @@ struct i915_gpu_state {
> >> 	u32 gab_ctl;
> >> 	u32 gfx_mode;
> >>
> >>+	/* Firmware load state */
> >>+	u32 dmc_loaded;
> >
> >Can dmc_version be zero? i.e. Does !0 => dmc_loaded?
> >
> 
> Nothing would stop them to build a v0.0 firmware, but in this case,
> we can save one variable.

On the other guc thread, the suggestion is to use 0 as an invalid
version...
-Chris
On 2/24/2017 11:34 AM, Chris Wilson wrote:
> On Fri, Feb 24, 2017 at 11:25:29AM -0800, Michel Thierry wrote:
>> On 2/24/2017 11:22 AM, Chris Wilson wrote:
>>> On Fri, Feb 24, 2017 at 11:05:28AM -0800, Michel Thierry wrote:
>>>> The firmware may change between the hang and cat /sys/class/drm/card0/error
>>>>
>>>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>>>> Signed-off-by: Michel Thierry <michel.thierry@intel.com>
>>>
>>> Looks sane...
>>>
>>>> ---
>>>> drivers/gpu/drm/i915/i915_drv.h       |  4 ++++
>>>> drivers/gpu/drm/i915/i915_gpu_error.c | 22 ++++++++++++++++------
>>>> 2 files changed, 20 insertions(+), 6 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>>>> index a74b87b1b5a9..3b0cff05f5f7 100644
>>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>>> @@ -953,6 +953,10 @@ struct i915_gpu_state {
>>>> 	u32 gab_ctl;
>>>> 	u32 gfx_mode;
>>>>
>>>> +	/* Firmware load state */
>>>> +	u32 dmc_loaded;
>>>
>>> Can dmc_version be zero? i.e. Does !0 => dmc_loaded?
>>>
>>
>> Nothing would stop them to build a v0.0 firmware, but in this case,
>> we can save one variable.
>
> On the other guc thread, the suggestion is to use 0 as an invalid
> version...
> -Chris
>
ok, let's go with that, I'll remove *_loaded.