drm/i915: Extend rpm wakelock during i915_handle_error()

Submitted by Chris Wilson on March 14, 2017, 5:18 p.m.

Details

Message ID 20170314171840.25706-1-chris@chris-wilson.co.uk
State Accepted
Commit 1604a86d08053f936e2820208ed952807fb4c4f0
Headers show
Series "drm/i915: get a runtime PM ref in i915_wedged_set" ( rev: 2 ) in Intel GFX

Not browsing as part of any series.

Commit Message

Chris Wilson March 14, 2017, 5:18 p.m.
We take the runtime pm wakelock during i915_handle_error() to ensure
that all paths that reach the error capture keep the device awake during
the hw reads. However, we need to extend that from the reset handler to
include the earlier capture routines.

Reported-by: Antonio Argenziano <antonio.argenziano@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michel Thierry <michel.thierry@intel.com>
---
 drivers/gpu/drm/i915/i915_irq.c | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 31f0d7c8992f..52e1fe8a98fb 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2664,14 +2664,6 @@  static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
 	DRM_DEBUG_DRIVER("resetting chip\n");
 	kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
 
-	/*
-	 * In most cases it's guaranteed that we get here with an RPM
-	 * reference held, for example because there is a pending GPU
-	 * request that won't finish until the reset is done. This
-	 * isn't the case at least when we get here by doing a
-	 * simulated reset via debugs, so get an RPM reference.
-	 */
-	intel_runtime_pm_get(dev_priv);
 	intel_prepare_reset(dev_priv);
 
 	do {
@@ -2693,7 +2685,6 @@  static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
 				     HZ));
 
 	intel_finish_reset(dev_priv);
-	intel_runtime_pm_put(dev_priv);
 
 	if (!test_bit(I915_WEDGED, &dev_priv->gpu_error.flags))
 		kobject_uevent_env(kobj,
@@ -2780,15 +2771,24 @@  void i915_handle_error(struct drm_i915_private *dev_priv,
 	vscnprintf(error_msg, sizeof(error_msg), fmt, args);
 	va_end(args);
 
+	/*
+	 * In most cases it's guaranteed that we get here with an RPM
+	 * reference held, for example because there is a pending GPU
+	 * request that won't finish until the reset is done. This
+	 * isn't the case at least when we get here by doing a
+	 * simulated reset via debugfs, so get an RPM reference.
+	 */
+	intel_runtime_pm_get(dev_priv);
+
 	i915_capture_error_state(dev_priv, engine_mask, error_msg);
 	i915_clear_error_registers(dev_priv);
 
 	if (!engine_mask)
-		return;
+		goto out;
 
 	if (test_and_set_bit(I915_RESET_IN_PROGRESS,
 			     &dev_priv->gpu_error.flags))
-		return;
+		goto out;
 
 	/*
 	 * Wakeup waiting processes so that the reset function
@@ -2805,6 +2805,9 @@  void i915_handle_error(struct drm_i915_private *dev_priv,
 	i915_error_wake_up(dev_priv);
 
 	i915_reset_and_wakeup(dev_priv);
+
+out:
+	intel_runtime_pm_put(dev_priv);
 }
 
 /* Called from drm generic code, passed 'crtc' which

Comments

On 3/14/2017 10:18 AM, Chris Wilson wrote:
> We take the runtime pm wakelock during i915_handle_error() to ensure
> that all paths that reach the error capture keep the device awake during
> the hw reads. However, we need to extend that from the reset handler to
> include the earlier capture routines.
>
> Reported-by: Antonio Argenziano <antonio.argenziano@intel.com>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Michel Thierry <michel.thierry@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_irq.c | 25 ++++++++++++++-----------
>  1 file changed, 14 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 31f0d7c8992f..52e1fe8a98fb 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -2664,14 +2664,6 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
>  	DRM_DEBUG_DRIVER("resetting chip\n");
>  	kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
>
> -	/*
> -	 * In most cases it's guaranteed that we get here with an RPM
> -	 * reference held, for example because there is a pending GPU
> -	 * request that won't finish until the reset is done. This
> -	 * isn't the case at least when we get here by doing a
> -	 * simulated reset via debugs, so get an RPM reference.
> -	 */
> -	intel_runtime_pm_get(dev_priv);
>  	intel_prepare_reset(dev_priv);
>
>  	do {
> @@ -2693,7 +2685,6 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
>  				     HZ));
>
>  	intel_finish_reset(dev_priv);
> -	intel_runtime_pm_put(dev_priv);
>
>  	if (!test_bit(I915_WEDGED, &dev_priv->gpu_error.flags))
>  		kobject_uevent_env(kobj,
> @@ -2780,15 +2771,24 @@ void i915_handle_error(struct drm_i915_private *dev_priv,
>  	vscnprintf(error_msg, sizeof(error_msg), fmt, args);
>  	va_end(args);
>
> +	/*
> +	 * In most cases it's guaranteed that we get here with an RPM
> +	 * reference held, for example because there is a pending GPU
> +	 * request that won't finish until the reset is done. This
> +	 * isn't the case at least when we get here by doing a
> +	 * simulated reset via debugfs, so get an RPM reference.
> +	 */
> +	intel_runtime_pm_get(dev_priv);
> +
>  	i915_capture_error_state(dev_priv, engine_mask, error_msg);
>  	i915_clear_error_registers(dev_priv);
>
>  	if (!engine_mask)
> -		return;
> +		goto out;
>
>  	if (test_and_set_bit(I915_RESET_IN_PROGRESS,
>  			     &dev_priv->gpu_error.flags))
> -		return;
> +		goto out;
>
>  	/*
>  	 * Wakeup waiting processes so that the reset function
> @@ -2805,6 +2805,9 @@ void i915_handle_error(struct drm_i915_private *dev_priv,
>  	i915_error_wake_up(dev_priv);
>
>  	i915_reset_and_wakeup(dev_priv);
> +
> +out:
> +	intel_runtime_pm_put(dev_priv);
>  }
>
>  /* Called from drm generic code, passed 'crtc' which
>

Reviewed-by: Michel Thierry <michel.thierry@intel.com>
On Tue, Mar 14, 2017 at 10:24:42AM -0700, Michel Thierry wrote:
> 
> 
> On 3/14/2017 10:18 AM, Chris Wilson wrote:
> >We take the runtime pm wakelock during i915_handle_error() to ensure
> >that all paths that reach the error capture keep the device awake during
> >the hw reads. However, we need to extend that from the reset handler to
> >include the earlier capture routines.
> >
> >Reported-by: Antonio Argenziano <antonio.argenziano@intel.com>
> >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >Cc: Michel Thierry <michel.thierry@intel.com>
> >---
> > drivers/gpu/drm/i915/i915_irq.c | 25 ++++++++++++++-----------
> > 1 file changed, 14 insertions(+), 11 deletions(-)
> >
> >diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> >index 31f0d7c8992f..52e1fe8a98fb 100644
> >--- a/drivers/gpu/drm/i915/i915_irq.c
> >+++ b/drivers/gpu/drm/i915/i915_irq.c
> >@@ -2664,14 +2664,6 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
> > 	DRM_DEBUG_DRIVER("resetting chip\n");
> > 	kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
> >
> >-	/*
> >-	 * In most cases it's guaranteed that we get here with an RPM
> >-	 * reference held, for example because there is a pending GPU
> >-	 * request that won't finish until the reset is done. This
> >-	 * isn't the case at least when we get here by doing a
> >-	 * simulated reset via debugs, so get an RPM reference.
> >-	 */
> >-	intel_runtime_pm_get(dev_priv);
> > 	intel_prepare_reset(dev_priv);
> >
> > 	do {
> >@@ -2693,7 +2685,6 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
> > 				     HZ));
> >
> > 	intel_finish_reset(dev_priv);
> >-	intel_runtime_pm_put(dev_priv);
> >
> > 	if (!test_bit(I915_WEDGED, &dev_priv->gpu_error.flags))
> > 		kobject_uevent_env(kobj,
> >@@ -2780,15 +2771,24 @@ void i915_handle_error(struct drm_i915_private *dev_priv,
> > 	vscnprintf(error_msg, sizeof(error_msg), fmt, args);
> > 	va_end(args);
> >
> >+	/*
> >+	 * In most cases it's guaranteed that we get here with an RPM
> >+	 * reference held, for example because there is a pending GPU
> >+	 * request that won't finish until the reset is done. This
> >+	 * isn't the case at least when we get here by doing a
> >+	 * simulated reset via debugfs, so get an RPM reference.
> >+	 */
> >+	intel_runtime_pm_get(dev_priv);
> >+
> > 	i915_capture_error_state(dev_priv, engine_mask, error_msg);
> > 	i915_clear_error_registers(dev_priv);
> >
> > 	if (!engine_mask)
> >-		return;
> >+		goto out;
> >
> > 	if (test_and_set_bit(I915_RESET_IN_PROGRESS,
> > 			     &dev_priv->gpu_error.flags))
> >-		return;
> >+		goto out;
> >
> > 	/*
> > 	 * Wakeup waiting processes so that the reset function
> >@@ -2805,6 +2805,9 @@ void i915_handle_error(struct drm_i915_private *dev_priv,
> > 	i915_error_wake_up(dev_priv);
> >
> > 	i915_reset_and_wakeup(dev_priv);
> >+
> >+out:
> >+	intel_runtime_pm_put(dev_priv);
> > }
> >
> > /* Called from drm generic code, passed 'crtc' which
> >
> 
> Reviewed-by: Michel Thierry <michel.thierry@intel.com>

Thanks for the original patch, now CI has an idle moment, pushed.
-Chris