[2/2] drm/i915: Reorganise legacy context switch to cope with late failure

Submitted by Chris Wilson on April 13, 2016, 2:16 p.m.

Details

Message ID 1460556991-8712-2-git-send-email-chris@chris-wilson.co.uk
State New
Headers show
Series "Series without cover letter" ( rev: 3 ) in Intel GFX

Not browsing as part of any series.

Commit Message

Chris Wilson April 13, 2016, 2:16 p.m.
After mi_set_context() succeeds, we need to update the state of the
engine's last_context. This ensures that we hold a pin on the context
whilst the hardware may write to it. However, since we didn't complete
the post-switch setup of the context, we need to force the subsequent
use of the same context to complete the setup (which means updating
should_skip_switch()).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_context.c | 147 ++++++++++++++++----------------
 1 file changed, 74 insertions(+), 73 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index c027240aacf3..702780d9af57 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -609,18 +609,19 @@  mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
 	return ret;
 }
 
-static inline bool skip_rcs_switch(struct intel_engine_cs *engine,
-				   struct intel_context *from,
+static inline bool skip_rcs_switch(struct intel_context *from,
 				   struct intel_context *to)
 {
 	if (to->remap_slice)
 		return false;
 
-	if (to->ppgtt && from == to &&
-	    !(intel_engine_flag(engine) & to->ppgtt->pd_dirty_rings))
-		return true;
+	if (!to->legacy_hw_ctx.initialized)
+		return false;
 
-	return false;
+	if (to->ppgtt && to->ppgtt->pd_dirty_rings & (1 << RCS))
+		return false;
+
+	return to == from;
 }
 
 static bool
@@ -643,18 +644,12 @@  needs_pd_load_pre(struct intel_engine_cs *engine, struct intel_context *to)
 }
 
 static bool
-needs_pd_load_post(struct intel_engine_cs *engine, struct intel_context *to,
-		   u32 hw_flags)
+needs_pd_load_post(struct intel_context *to, u32 hw_flags)
 {
-	struct drm_i915_private *dev_priv = engine->dev->dev_private;
-
 	if (!to->ppgtt)
 		return false;
 
-	if (!IS_GEN8(engine->dev))
-		return false;
-
-	if (engine != &dev_priv->engine[RCS])
+	if (!IS_GEN8(to->i915))
 		return false;
 
 	if (hw_flags & MI_RESTORE_INHIBIT)
@@ -667,12 +662,11 @@  static int do_rcs_switch(struct drm_i915_gem_request *req)
 {
 	struct intel_context *to = req->ctx;
 	struct intel_engine_cs *engine = req->engine;
-	struct intel_context *from = engine->last_context;
-	u32 hw_flags = 0;
-	bool uninitialized = false;
+	struct intel_context *from;
+	u32 hw_flags;
 	int ret, i;
 
-	if (skip_rcs_switch(engine, from, to))
+	if (skip_rcs_switch(engine->last_context, to))
 		return 0;
 
 	/* Trying to pin first makes error handling easier. */
@@ -686,9 +680,23 @@  static int do_rcs_switch(struct drm_i915_gem_request *req)
 	 * Pin can switch back to the default context if we end up calling into
 	 * evict_everything - as a last ditch gtt defrag effort that also
 	 * switches to the default context. Hence we need to reload from here.
+	 *
+	 * XXX: Doing so is painfully broken!
 	 */
 	from = engine->last_context;
 
+	/*
+	 * Clear this page out of any CPU caches for coherent swap-in/out. Note
+	 * that thanks to write = false in this call and us not setting any gpu
+	 * write domains when putting a context object onto the active list
+	 * (when switching away from it), this won't block.
+	 *
+	 * XXX: We need a real interface to do this instead of trickery.
+	 */
+	ret = i915_gem_object_set_to_gtt_domain(to->legacy_hw_ctx.rcs_state, false);
+	if (ret)
+		goto unpin_out;
+
 	if (needs_pd_load_pre(engine, to)) {
 		/* Older GENs and non render rings still want the load first,
 		 * "PP_DCLV followed by PP_DIR_BASE register through Load
@@ -700,70 +708,36 @@  static int do_rcs_switch(struct drm_i915_gem_request *req)
 			goto unpin_out;
 
 		/* Doing a PD load always reloads the page dirs */
-		to->ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
+		to->ppgtt->pd_dirty_rings &= ~(1 << RCS);
 	}
 
-	/*
-	 * Clear this page out of any CPU caches for coherent swap-in/out. Note
-	 * that thanks to write = false in this call and us not setting any gpu
-	 * write domains when putting a context object onto the active list
-	 * (when switching away from it), this won't block.
-	 *
-	 * XXX: We need a real interface to do this instead of trickery.
-	 */
-	ret = i915_gem_object_set_to_gtt_domain(to->legacy_hw_ctx.rcs_state, false);
-	if (ret)
-		goto unpin_out;
-
-	if (!to->legacy_hw_ctx.initialized || i915_gem_context_is_default(to)) {
-		hw_flags |= MI_RESTORE_INHIBIT;
+	if (!to->legacy_hw_ctx.initialized || i915_gem_context_is_default(to))
 		/* NB: If we inhibit the restore, the context is not allowed to
 		 * die because future work may end up depending on valid address
 		 * space. This means we must enforce that a page table load
 		 * occur when this occurs. */
-	} else if (to->ppgtt &&
-		   (intel_engine_flag(engine) & to->ppgtt->pd_dirty_rings)) {
-		hw_flags |= MI_FORCE_RESTORE;
-		to->ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
-	}
+		hw_flags = MI_RESTORE_INHIBIT;
+	else if (to->ppgtt && to->ppgtt->pd_dirty_rings & (1 << RCS))
+		hw_flags = MI_FORCE_RESTORE;
+	else
+		hw_flags = 0;
 
 	/* We should never emit switch_mm more than once */
 	WARN_ON(needs_pd_load_pre(engine, to) &&
-		needs_pd_load_post(engine, to, hw_flags));
-
-	ret = mi_set_context(req, hw_flags);
-	if (ret)
-		goto unpin_out;
+		needs_pd_load_post(to, hw_flags));
 
-	/* GEN8 does *not* require an explicit reload if the PDPs have been
-	 * setup, and we do not wish to move them.
-	 */
-	if (needs_pd_load_post(engine, to, hw_flags)) {
-		trace_switch_mm(engine, to);
-		ret = to->ppgtt->switch_mm(to->ppgtt, req);
-		/* The hardware context switch is emitted, but we haven't
-		 * actually changed the state - so it's probably safe to bail
-		 * here. Still, let the user know something dangerous has
-		 * happened.
-		 */
+	if (to != from || (hw_flags & MI_FORCE_RESTORE)) {
+		ret = mi_set_context(req, hw_flags);
 		if (ret) {
-			DRM_ERROR("Failed to change address space on context switch\n");
+			/* Force the reload of this and the previous mm */
+			if (needs_pd_load_pre(engine, to))
+				to->ppgtt->pd_dirty_rings |= 1 << RCS;
+			if (from && needs_pd_load_pre(engine, from))
+				from->ppgtt->pd_dirty_rings |= 1 << RCS;
 			goto unpin_out;
 		}
 	}
 
-	for (i = 0; i < MAX_L3_SLICES; i++) {
-		if (!(to->remap_slice & (1<<i)))
-			continue;
-
-		ret = i915_gem_l3_remap(req, i);
-		/* If it failed, try again next round */
-		if (ret)
-			DRM_DEBUG_DRIVER("L3 remapping failed\n");
-		else
-			to->remap_slice &= ~(1<<i);
-	}
-
 	/* The backing object for the context is done after switching to the
 	 * *next* context. Therefore we cannot retire the previous context until
 	 * the next context has already started running. In fact, the below code
@@ -786,19 +760,46 @@  static int do_rcs_switch(struct drm_i915_gem_request *req)
 		i915_gem_object_ggtt_unpin(from->legacy_hw_ctx.rcs_state);
 		i915_gem_context_unreference(from);
 	}
-
-	uninitialized = !to->legacy_hw_ctx.initialized;
-	to->legacy_hw_ctx.initialized = true;
-
 	i915_gem_context_reference(to);
 	engine->last_context = to;
 
-	if (uninitialized) {
+	/* GEN8 does *not* require an explicit reload if the PDPs have been
+	 * setup, and we do not wish to move them.
+	 */
+	if (needs_pd_load_post(to, hw_flags)) {
+		trace_switch_mm(engine, to);
+		ret = to->ppgtt->switch_mm(to->ppgtt, req);
+		/* The hardware context switch is emitted, but we haven't
+		 * actually changed the state - so it's probably safe to bail
+		 * here. Still, let the user know something dangerous has
+		 * happened.
+		 */
+		if (ret)
+			return ret;
+
+		to->ppgtt->pd_dirty_rings &= ~(1 << RCS);
+	}
+	if (hw_flags & MI_FORCE_RESTORE)
+		to->ppgtt->pd_dirty_rings &= ~(1 << RCS);
+
+	for (i = 0; i < MAX_L3_SLICES; i++) {
+		if (!(to->remap_slice & (1<<i)))
+			continue;
+
+		ret = i915_gem_l3_remap(req, i);
+		if (ret)
+			return ret;
+
+		to->remap_slice &= ~(1<<i);
+	}
+
+	if (!to->legacy_hw_ctx.initialized) {
 		if (engine->init_context) {
 			ret = engine->init_context(req);
 			if (ret)
-				DRM_ERROR("ring init context: %d\n", ret);
+				return ret;
 		}
+		to->legacy_hw_ctx.initialized = true;
 	}
 
 	return 0;

Comments

hw_flags)
>  	return ret;
>  }
>  
> -static inline bool skip_rcs_switch(struct intel_engine_cs *engine,
> -				   struct intel_context *from,
> +static inline bool skip_rcs_switch(struct intel_context *from,
>  				   struct intel_context *to)
>  {
>  	if (to->remap_slice)
>  		return false;
>  
> -	if (to->ppgtt && from == to &&
> -	    !(intel_engine_flag(engine) & to->ppgtt->pd_dirty_rings))
> -		return true;
> +	if (!to->legacy_hw_ctx.initialized)
> +		return false;
>  
> -	return false;
> +	if (to->ppgtt && to->ppgtt->pd_dirty_rings & (1 << RCS))
> +		return false;
> +
> +	return to == from;
>  }
>  
>  static bool
> @@ -643,18 +644,12 @@ needs_pd_load_pre(struct intel_engine_cs *engine, struct intel_context *to)
>  }
>  
>  static bool
> -needs_pd_load_post(struct intel_engine_cs *engine, struct intel_context *to,
> -		   u32 hw_flags)
> +needs_pd_load_post(struct intel_context *to, u32 hw_flags)
>  {
> -	struct drm_i915_private *dev_priv = engine->dev->dev_private;
> -
>  	if (!to->ppgtt)
>  		return false;
>  
> -	if (!IS_GEN8(engine->dev))
> -		return false;
> -
> -	if (engine != &dev_priv->engine[RCS])
> +	if (!IS_GEN8(to->i915))
>  		return false;
>  
>  	if (hw_flags & MI_RESTORE_INHIBIT)

Above hunk might be better in the previous patch, but meh.

> @@ -667,12 +662,11 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
>  {
>  	struct intel_context *to = req->ctx;
>  	struct intel_engine_cs *engine = req->engine;
> -	struct intel_context *from = engine->last_context;
> -	u32 hw_flags = 0;
> -	bool uninitialized = false;
> +	struct intel_context *from;
> +	u32 hw_flags;
>  	int ret, i;
>  
> -	if (skip_rcs_switch(engine, from, to))
> +	if (skip_rcs_switch(engine->last_context, to))
>  		return 0;
>  
>  	/* Trying to pin first makes error handling easier. */
> @@ -686,9 +680,23 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
>  	 * Pin can switch back to the default context if we end up calling into
>  	 * evict_everything - as a last ditch gtt defrag effort that also
>  	 * switches to the default context. Hence we need to reload from here.
> +	 *
> +	 * XXX: Doing so is painfully broken!

Why the XXX addition here? I thought the point of this patch is to fix
this ...

>  	 */
>  	from = engine->last_context;
>  
> +	/*
> +	 * Clear this page out of any CPU caches for coherent swap-in/out. Note
> +	 * that thanks to write = false in this call and us not setting any gpu
> +	 * write domains when putting a context object onto the active list
> +	 * (when switching away from it), this won't block.
> +	 *
> +	 * XXX: We need a real interface to do this instead of trickery.
> +	 */
> +	ret = i915_gem_object_set_to_gtt_domain(to->legacy_hw_ctx.rcs_state, false);
> +	if (ret)
> +		goto unpin_out;
> +
>  	if (needs_pd_load_pre(engine, to)) {
>  		/* Older GENs and non render rings still want the load first,
>  		 * "PP_DCLV followed by PP_DIR_BASE register through Load
> @@ -700,70 +708,36 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
>  			goto unpin_out;
>  
>  		/* Doing a PD load always reloads the page dirs */
> -		to->ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
> +		to->ppgtt->pd_dirty_rings &= ~(1 << RCS);

Superflous change for the imo worse.

>  	}
>  
> -	/*
> -	 * Clear this page out of any CPU caches for coherent swap-in/out. Note
> -	 * that thanks to write = false in this call and us not setting any gpu
> -	 * write domains when putting a context object onto the active list
> -	 * (when switching away from it), this won't block.
> -	 *
> -	 * XXX: We need a real interface to do this instead of trickery.
> -	 */
> -	ret = i915_gem_object_set_to_gtt_domain(to->legacy_hw_ctx.rcs_state, false);
> -	if (ret)
> -		goto unpin_out;
> -
> -	if (!to->legacy_hw_ctx.initialized || i915_gem_context_is_default(to)) {
> -		hw_flags |= MI_RESTORE_INHIBIT;
> +	if (!to->legacy_hw_ctx.initialized || i915_gem_context_is_default(to))
>  		/* NB: If we inhibit the restore, the context is not allowed to
>  		 * die because future work may end up depending on valid address
>  		 * space. This means we must enforce that a page table load
>  		 * occur when this occurs. */
> -	} else if (to->ppgtt &&
> -		   (intel_engine_flag(engine) & to->ppgtt->pd_dirty_rings)) {
> -		hw_flags |= MI_FORCE_RESTORE;
> -		to->ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
> -	}
> +		hw_flags = MI_RESTORE_INHIBIT;
> +	else if (to->ppgtt && to->ppgtt->pd_dirty_rings & (1 << RCS))
> +		hw_flags = MI_FORCE_RESTORE;
> +	else
> +		hw_flags = 0;
>  
>  	/* We should never emit switch_mm more than once */
>  	WARN_ON(needs_pd_load_pre(engine, to) &&
> -		needs_pd_load_post(engine, to, hw_flags));
> -
> -	ret = mi_set_context(req, hw_flags);
> -	if (ret)
> -		goto unpin_out;
> +		needs_pd_load_post(to, hw_flags));
>  
> -	/* GEN8 does *not* require an explicit reload if the PDPs have been
> -	 * setup, and we do not wish to move them.
> -	 */
> -	if (needs_pd_load_post(engine, to, hw_flags)) {
> -		trace_switch_mm(engine, to);
> -		ret = to->ppgtt->switch_mm(to->ppgtt, req);
> -		/* The hardware context switch is emitted, but we haven't
> -		 * actually changed the state - so it's probably safe to bail
> -		 * here. Still, let the user know something dangerous has
> -		 * happened.
> -		 */
> +	if (to != from || (hw_flags & MI_FORCE_RESTORE)) {
> +		ret = mi_set_context(req, hw_flags);
>  		if (ret) {
> -			DRM_ERROR("Failed to change address space on context switch\n");
> +			/* Force the reload of this and the previous mm */
> +			if (needs_pd_load_pre(engine, to))
> +				to->ppgtt->pd_dirty_rings |= 1 << RCS;
> +			if (from && needs_pd_load_pre(engine, from))
> +				from->ppgtt->pd_dirty_rings |= 1 << RCS;
>  			goto unpin_out;
>  		}
>  	}
>  
> -	for (i = 0; i < MAX_L3_SLICES; i++) {
> -		if (!(to->remap_slice & (1<<i)))
> -			continue;
> -
> -		ret = i915_gem_l3_remap(req, i);
> -		/* If it failed, try again next round */
> -		if (ret)
> -			DRM_DEBUG_DRIVER("L3 remapping failed\n");
> -		else
> -			to->remap_slice &= ~(1<<i);
> -	}
> -
>  	/* The backing object for the context is done after switching to the
>  	 * *next* context. Therefore we cannot retire the previous context until
>  	 * the next context has already started running. In fact, the below code
> @@ -786,19 +760,46 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
>  		i915_gem_object_ggtt_unpin(from->legacy_hw_ctx.rcs_state);
>  		i915_gem_context_unreference(from);
>  	}
> -
> -	uninitialized = !to->legacy_hw_ctx.initialized;
> -	to->legacy_hw_ctx.initialized = true;
> -
>  	i915_gem_context_reference(to);
>  	engine->last_context = to;
>  
> -	if (uninitialized) {
> +	/* GEN8 does *not* require an explicit reload if the PDPs have been
> +	 * setup, and we do not wish to move them.
> +	 */
> +	if (needs_pd_load_post(to, hw_flags)) {
> +		trace_switch_mm(engine, to);
> +		ret = to->ppgtt->switch_mm(to->ppgtt, req);
> +		/* The hardware context switch is emitted, but we haven't
> +		 * actually changed the state - so it's probably safe to bail
> +		 * here. Still, let the user know something dangerous has
> +		 * happened.
> +		 */
> +		if (ret)
> +			return ret;
> +
> +		to->ppgtt->pd_dirty_rings &= ~(1 << RCS);
> +	}
> +	if (hw_flags & MI_FORCE_RESTORE)
> +		to->ppgtt->pd_dirty_rings &= ~(1 << RCS);
> +
> +	for (i = 0; i < MAX_L3_SLICES; i++) {
> +		if (!(to->remap_slice & (1<<i)))
> +			continue;
> +
> +		ret = i915_gem_l3_remap(req, i);
> +		if (ret)
> +			return ret;
> +
> +		to->remap_slice &= ~(1<<i);
> +	}
> +
> +	if (!to->legacy_hw_ctx.initialized) {
>  		if (engine->init_context) {
>  			ret = engine->init_context(req);
>  			if (ret)
> -				DRM_ERROR("ring init context: %d\n", ret);
> +				return ret;
>  		}
> +		to->legacy_hw_ctx.initialized = true;

Otherwise Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

>  	}
>  
>  	return 0;
> -- 
> 2.8.0.rc3
>
On Wed, Apr 13, 2016 at 05:05:05PM +0200, Daniel Vetter wrote:
> >  static bool
> > -needs_pd_load_post(struct intel_engine_cs *engine, struct intel_context *to,
> > -		   u32 hw_flags)
> > +needs_pd_load_post(struct intel_context *to, u32 hw_flags)
> >  {
> > -	struct drm_i915_private *dev_priv = engine->dev->dev_private;
> > -
> >  	if (!to->ppgtt)
> >  		return false;
> >  
> > -	if (!IS_GEN8(engine->dev))
> > -		return false;
> > -
> > -	if (engine != &dev_priv->engine[RCS])
> > +	if (!IS_GEN8(to->i915))
> >  		return false;
> >  
> >  	if (hw_flags & MI_RESTORE_INHIBIT)
> 
> Above hunk might be better in the previous patch, but meh.

Previous patch changed needs_pd_load_pre() :-p

> > @@ -667,12 +662,11 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
> >  {
> >  	struct intel_context *to = req->ctx;
> >  	struct intel_engine_cs *engine = req->engine;
> > -	struct intel_context *from = engine->last_context;
> > -	u32 hw_flags = 0;
> > -	bool uninitialized = false;
> > +	struct intel_context *from;
> > +	u32 hw_flags;
> >  	int ret, i;
> >  
> > -	if (skip_rcs_switch(engine, from, to))
> > +	if (skip_rcs_switch(engine->last_context, to))
> >  		return 0;
> >  
> >  	/* Trying to pin first makes error handling easier. */
> > @@ -686,9 +680,23 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
> >  	 * Pin can switch back to the default context if we end up calling into
> >  	 * evict_everything - as a last ditch gtt defrag effort that also
> >  	 * switches to the default context. Hence we need to reload from here.
> > +	 *
> > +	 * XXX: Doing so is painfully broken!
> 
> Why the XXX addition here? I thought the point of this patch is to fix
> this ...

No, that requires my seqno->requests patches....

To fix this we have to move the context pinning into request allocation,
so that if we do have to emit a switch to the default context we do so
before we take ownership of the ring for ourselves.

  	/* Trying to pin first makes error handling easier. */

is not kidding.

> >  	 */
> >  	from = engine->last_context;
> >  
> > +	/*
> > +	 * Clear this page out of any CPU caches for coherent swap-in/out. Note
> > +	 * that thanks to write = false in this call and us not setting any gpu
> > +	 * write domains when putting a context object onto the active list
> > +	 * (when switching away from it), this won't block.
> > +	 *
> > +	 * XXX: We need a real interface to do this instead of trickery.
> > +	 */
> > +	ret = i915_gem_object_set_to_gtt_domain(to->legacy_hw_ctx.rcs_state, false);
> > +	if (ret)
> > +		goto unpin_out;
> > +
> >  	if (needs_pd_load_pre(engine, to)) {
> >  		/* Older GENs and non render rings still want the load first,
> >  		 * "PP_DCLV followed by PP_DIR_BASE register through Load
> > @@ -700,70 +708,36 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
> >  			goto unpin_out;
> >  
> >  		/* Doing a PD load always reloads the page dirs */
> > -		to->ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
> > +		to->ppgtt->pd_dirty_rings &= ~(1 << RCS);
> 
> Superflous change for the imo worse.

* shrug. I was just trying to reinforce this was RCS, not any random
engine.
-Chris