drm/i915: Change context lifecycle

Submitted by Nick Hoath on Nov. 25, 2015, 12:57 p.m.

Details

Message ID 1448456237-31421-1-git-send-email-nicholas.hoath@intel.com
State New
Headers show
Series "drm/i915: Change context lifecycle" ( rev: 3 ) in Intel GFX

Not browsing as part of any series.

Commit Message

Nick Hoath Nov. 25, 2015, 12:57 p.m.
Use the first retired request on a new context to unpin
the old context. This ensures that the hw context remains
bound until it has been written back to by the GPU.
Now that the context is pinned until later in the request/context
lifecycle, it no longer needs to be pinned from context_queue to
retire_requests.

v2: Moved the new pin to cover GuC submission (Alex Dai)
    Moved the new unpin to request_retire to fix coverage leak
v3: Added switch to default context if freeing a still pinned
    context just in case the hw was actually still using it
v4: Unwrapped context unpin to allow calling without a request
v5: Only create a switch to idle context if the ring doesn't
    already have a request pending on it (Alex Dai)
    Rename unsaved to dirty to avoid double negatives (Dave Gordon)
    Changed _no_req postfix to __ prefix for consistency (Dave Gordon)
    Split out per engine cleanup from context_free as it
    was getting unwieldy
    Corrected locking (Dave Gordon)

Signed-off-by: Nick Hoath <nicholas.hoath@intel.com>
Issue: VIZ-4277
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: David Gordon <david.s.gordon@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Alex Dai <yu.dai@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h  |   1 +
 drivers/gpu/drm/i915/i915_gem.c  |   3 +
 drivers/gpu/drm/i915/intel_lrc.c | 124 +++++++++++++++++++++++++++++++--------
 drivers/gpu/drm/i915/intel_lrc.h |   1 +
 4 files changed, 105 insertions(+), 24 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d5cf30b..e82717a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -889,6 +889,7 @@  struct intel_context {
 	struct {
 		struct drm_i915_gem_object *state;
 		struct intel_ringbuffer *ringbuf;
+		bool dirty;
 		int pin_count;
 	} engine[I915_NUM_RINGS];
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e955499..3829bc1 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1354,6 +1354,9 @@  static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 {
 	trace_i915_gem_request_retire(request);
 
+	if (i915.enable_execlists)
+		intel_lr_context_complete_check(request);
+
 	/* We know the GPU must have read the request to have
 	 * sent us the seqno + interrupt, so use the position
 	 * of tail of the request to update the last known position
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 06180dc..03d5bca 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -566,9 +566,6 @@  static int execlists_context_queue(struct drm_i915_gem_request *request)
 	struct drm_i915_gem_request *cursor;
 	int num_elements = 0;
 
-	if (request->ctx != ring->default_context)
-		intel_lr_context_pin(request);
-
 	i915_gem_request_reference(request);
 
 	spin_lock_irq(&ring->execlist_lock);
@@ -732,6 +729,13 @@  intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
 	if (intel_ring_stopped(ring))
 		return;
 
+	if (request->ctx != ring->default_context) {
+		if (!request->ctx->engine[ring->id].dirty) {
+			intel_lr_context_pin(request);
+			request->ctx->engine[ring->id].dirty = true;
+		}
+	}
+
 	if (dev_priv->guc.execbuf_client)
 		i915_guc_submit(dev_priv->guc.execbuf_client, request);
 	else
@@ -958,12 +962,6 @@  void intel_execlists_retire_requests(struct intel_engine_cs *ring)
 	spin_unlock_irq(&ring->execlist_lock);
 
 	list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
-		struct intel_context *ctx = req->ctx;
-		struct drm_i915_gem_object *ctx_obj =
-				ctx->engine[ring->id].state;
-
-		if (ctx_obj && (ctx != ring->default_context))
-			intel_lr_context_unpin(req);
 		list_del(&req->execlist_link);
 		i915_gem_request_unreference(req);
 	}
@@ -1058,21 +1056,39 @@  reset_pin_count:
 	return ret;
 }
 
-void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
+static void __intel_lr_context_unpin(struct intel_engine_cs *ring,
+		struct intel_context *ctx)
 {
-	struct intel_engine_cs *ring = rq->ring;
-	struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
-	struct intel_ringbuffer *ringbuf = rq->ringbuf;
-
+	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
+	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
 	if (ctx_obj) {
 		WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
-		if (--rq->ctx->engine[ring->id].pin_count == 0) {
+		if (--ctx->engine[ring->id].pin_count == 0) {
 			intel_unpin_ringbuffer_obj(ringbuf);
 			i915_gem_object_ggtt_unpin(ctx_obj);
 		}
 	}
 }
 
+void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
+{
+	__intel_lr_context_unpin(rq->ring, rq->ctx);
+}
+
+void intel_lr_context_complete_check(struct drm_i915_gem_request *req)
+{
+	struct intel_engine_cs *ring = req->ring;
+
+	if (ring->last_context && ring->last_context != req->ctx &&
+			ring->last_context->engine[ring->id].dirty) {
+		__intel_lr_context_unpin(
+				ring,
+				ring->last_context);
+		ring->last_context->engine[ring->id].dirty = false;
+	}
+	ring->last_context = req->ctx;
+}
+
 static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req)
 {
 	int ret, i;
@@ -2367,6 +2383,62 @@  populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o
 }
 
 /**
+ * intel_lr_context_clean_ring() - clean the ring specific parts of an LRC
+ * @ctx: the LR context being freed.
+ * @ring: the engine being cleaned
+ * @ctx_obj: the hw context being unreferenced
+ * @ringbuf: the ringbuf being freed
+ *
+ * Take care of cleaning up the per-engine backing
+ * objects and the logical ringbuffer.
+ */
+static void
+intel_lr_context_clean_ring(struct intel_context *ctx,
+			    struct intel_engine_cs *ring,
+			    struct drm_i915_gem_object *ctx_obj,
+			    struct intel_ringbuffer *ringbuf)
+{
+	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
+
+	if (ctx == ring->default_context) {
+		intel_unpin_ringbuffer_obj(ringbuf);
+		i915_gem_object_ggtt_unpin(ctx_obj);
+	}
+
+	if (ctx->engine[ring->id].dirty) {
+		struct drm_i915_gem_request *req = NULL;
+
+		/**
+		 * If there is already a request pending on
+		 * this ring, wait for that to complete,
+		 * otherwise create a switch to idle request
+		 */
+		if (list_empty(&ring->request_list)) {
+			int ret;
+
+			ret = i915_gem_request_alloc(
+					ring,
+					ring->default_context,
+					&req);
+			if (!ret)
+				i915_add_request(req);
+			else
+				DRM_DEBUG("Failed to ensure context saved");
+		} else {
+			req = list_first_entry(
+					&ring->request_list,
+					typeof(*req), list);
+		}
+		if (req)
+			i915_wait_request(req);
+	}
+
+	WARN_ON(ctx->engine[ring->id].pin_count);
+	intel_ringbuffer_free(ringbuf);
+	drm_gem_object_unreference(&ctx_obj->base);
+}
+
+/**
  * intel_lr_context_free() - free the LRC specific bits of a context
  * @ctx: the LR context to free.
  *
@@ -2378,21 +2450,18 @@  void intel_lr_context_free(struct intel_context *ctx)
 {
 	int i;
 
-	for (i = 0; i < I915_NUM_RINGS; i++) {
+	for (i = 0; i < I915_NUM_RINGS; ++i) {
 		struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state;
 
-		if (ctx_obj) {
+		if (ctx->engine[i].state) {
 			struct intel_ringbuffer *ringbuf =
 					ctx->engine[i].ringbuf;
 			struct intel_engine_cs *ring = ringbuf->ring;
 
-			if (ctx == ring->default_context) {
-				intel_unpin_ringbuffer_obj(ringbuf);
-				i915_gem_object_ggtt_unpin(ctx_obj);
-			}
-			WARN_ON(ctx->engine[ring->id].pin_count);
-			intel_ringbuffer_free(ringbuf);
-			drm_gem_object_unreference(&ctx_obj->base);
+			intel_lr_context_clean_ring(ctx,
+						    ring,
+						    ctx_obj,
+						    ringbuf);
 		}
 	}
 }
@@ -2554,5 +2623,12 @@  void intel_lr_context_reset(struct drm_device *dev,
 
 		ringbuf->head = 0;
 		ringbuf->tail = 0;
+
+		if (ctx->engine[ring->id].dirty) {
+			__intel_lr_context_unpin(
+					ring,
+					ctx);
+			ctx->engine[ring->id].dirty = false;
+		}
 	}
 }
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 4e60d54..cbc42b8 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -86,6 +86,7 @@  void intel_lr_context_reset(struct drm_device *dev,
 			struct intel_context *ctx);
 uint64_t intel_lr_context_descriptor(struct intel_context *ctx,
 				     struct intel_engine_cs *ring);
+void intel_lr_context_complete_check(struct drm_i915_gem_request *req);
 
 /* Execlists */
 int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists);

Comments

Nick Hoath <nicholas.hoath@intel.com> writes:

> Use the first retired request on a new context to unpin
> the old context. This ensures that the hw context remains
> bound until it has been written back to by the GPU.
> Now that the context is pinned until later in the request/context
> lifecycle, it no longer needs to be pinned from context_queue to
> retire_requests.
>
> v2: Moved the new pin to cover GuC submission (Alex Dai)
>     Moved the new unpin to request_retire to fix coverage leak
> v3: Added switch to default context if freeing a still pinned
>     context just in case the hw was actually still using it
> v4: Unwrapped context unpin to allow calling without a request
> v5: Only create a switch to idle context if the ring doesn't
>     already have a request pending on it (Alex Dai)
>     Rename unsaved to dirty to avoid double negatives (Dave Gordon)
>     Changed _no_req postfix to __ prefix for consistency (Dave Gordon)
>     Split out per engine cleanup from context_free as it
>     was getting unwieldy
>     Corrected locking (Dave Gordon)
>
> Signed-off-by: Nick Hoath <nicholas.hoath@intel.com>
> Issue: VIZ-4277
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> Cc: David Gordon <david.s.gordon@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Alex Dai <yu.dai@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h  |   1 +
>  drivers/gpu/drm/i915/i915_gem.c  |   3 +
>  drivers/gpu/drm/i915/intel_lrc.c | 124 +++++++++++++++++++++++++++++++--------
>  drivers/gpu/drm/i915/intel_lrc.h |   1 +
>  4 files changed, 105 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index d5cf30b..e82717a 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -889,6 +889,7 @@ struct intel_context {
>  	struct {
>  		struct drm_i915_gem_object *state;
>  		struct intel_ringbuffer *ringbuf;
> +		bool dirty;
>  		int pin_count;
>  	} engine[I915_NUM_RINGS];
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index e955499..3829bc1 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1354,6 +1354,9 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
>  {
>  	trace_i915_gem_request_retire(request);
>  
> +	if (i915.enable_execlists)
> +		intel_lr_context_complete_check(request);
> +
>  	/* We know the GPU must have read the request to have
>  	 * sent us the seqno + interrupt, so use the position
>  	 * of tail of the request to update the last known position
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 06180dc..03d5bca 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -566,9 +566,6 @@ static int execlists_context_queue(struct drm_i915_gem_request *request)
>  	struct drm_i915_gem_request *cursor;
>  	int num_elements = 0;
>  
> -	if (request->ctx != ring->default_context)
> -		intel_lr_context_pin(request);
> -
>  	i915_gem_request_reference(request);
>  
>  	spin_lock_irq(&ring->execlist_lock);
> @@ -732,6 +729,13 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
>  	if (intel_ring_stopped(ring))
>  		return;
>  
> +	if (request->ctx != ring->default_context) {
> +		if (!request->ctx->engine[ring->id].dirty) {
> +			intel_lr_context_pin(request);
> +			request->ctx->engine[ring->id].dirty = true;
> +		}
> +	}
> +
>  	if (dev_priv->guc.execbuf_client)
>  		i915_guc_submit(dev_priv->guc.execbuf_client, request);
>  	else
> @@ -958,12 +962,6 @@ void intel_execlists_retire_requests(struct intel_engine_cs *ring)
>  	spin_unlock_irq(&ring->execlist_lock);
>  
>  	list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
> -		struct intel_context *ctx = req->ctx;
> -		struct drm_i915_gem_object *ctx_obj =
> -				ctx->engine[ring->id].state;
> -
> -		if (ctx_obj && (ctx != ring->default_context))
> -			intel_lr_context_unpin(req);
>  		list_del(&req->execlist_link);
>  		i915_gem_request_unreference(req);
>  	}
> @@ -1058,21 +1056,39 @@ reset_pin_count:
>  	return ret;
>  }
>  
> -void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
> +static void __intel_lr_context_unpin(struct intel_engine_cs *ring,
> +		struct intel_context *ctx)
>  {
> -	struct intel_engine_cs *ring = rq->ring;
> -	struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
> -	struct intel_ringbuffer *ringbuf = rq->ringbuf;
> -
> +	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
> +	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
>  	if (ctx_obj) {
>  		WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
> -		if (--rq->ctx->engine[ring->id].pin_count == 0) {
> +		if (--ctx->engine[ring->id].pin_count == 0) {
>  			intel_unpin_ringbuffer_obj(ringbuf);
>  			i915_gem_object_ggtt_unpin(ctx_obj);
>  		}
>  	}
>  }
>  
> +void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
> +{
> +	__intel_lr_context_unpin(rq->ring, rq->ctx);
> +}
> +
> +void intel_lr_context_complete_check(struct drm_i915_gem_request *req)
> +{
> +	struct intel_engine_cs *ring = req->ring;
> +
> +	if (ring->last_context && ring->last_context != req->ctx &&
> +			ring->last_context->engine[ring->id].dirty) {
> +		__intel_lr_context_unpin(
> +				ring,
> +				ring->last_context);
> +		ring->last_context->engine[ring->id].dirty = false;
> +	}
> +	ring->last_context = req->ctx;

What ensures that the last_context stays alive?

> +}
> +
>  static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req)
>  {
>  	int ret, i;
> @@ -2367,6 +2383,62 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o
>  }
>  
>  /**
> + * intel_lr_context_clean_ring() - clean the ring specific parts of an LRC
> + * @ctx: the LR context being freed.
> + * @ring: the engine being cleaned
> + * @ctx_obj: the hw context being unreferenced
> + * @ringbuf: the ringbuf being freed
> + *
> + * Take care of cleaning up the per-engine backing
> + * objects and the logical ringbuffer.
> + */
> +static void
> +intel_lr_context_clean_ring(struct intel_context *ctx,
> +			    struct intel_engine_cs *ring,
> +			    struct drm_i915_gem_object *ctx_obj,
> +			    struct intel_ringbuffer *ringbuf)
> +{
> +	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
> +
> +	if (ctx == ring->default_context) {
> +		intel_unpin_ringbuffer_obj(ringbuf);
> +		i915_gem_object_ggtt_unpin(ctx_obj);
> +	}
> +
> +	if (ctx->engine[ring->id].dirty) {
> +		struct drm_i915_gem_request *req = NULL;
> +
> +		/**
> +		 * If there is already a request pending on
> +		 * this ring, wait for that to complete,
> +		 * otherwise create a switch to idle request
> +		 */
> +		if (list_empty(&ring->request_list)) {
> +			int ret;
> +
> +			ret = i915_gem_request_alloc(
> +					ring,
> +					ring->default_context,
> +					&req);
> +			if (!ret)
> +				i915_add_request(req);
> +			else
> +				DRM_DEBUG("Failed to ensure context saved");
> +		} else {
> +			req = list_first_entry(
> +					&ring->request_list,
> +					typeof(*req), list);
> +		}
> +		if (req)
> +			i915_wait_request(req);
> +	}
> +
> +	WARN_ON(ctx->engine[ring->id].pin_count);
> +	intel_ringbuffer_free(ringbuf);
> +	drm_gem_object_unreference(&ctx_obj->base);
> +}
> +
> +/**
>   * intel_lr_context_free() - free the LRC specific bits of a context
>   * @ctx: the LR context to free.
>   *
> @@ -2378,21 +2450,18 @@ void intel_lr_context_free(struct intel_context *ctx)
>  {
>  	int i;
>  
> -	for (i = 0; i < I915_NUM_RINGS; i++) {
> +	for (i = 0; i < I915_NUM_RINGS; ++i) {
>  		struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state;
>  
> -		if (ctx_obj) {
> +		if (ctx->engine[i].state) {

++i and the above are superflouous?

-Mika

>  			struct intel_ringbuffer *ringbuf =
>  					ctx->engine[i].ringbuf;
>  			struct intel_engine_cs *ring = ringbuf->ring;
>  
> -			if (ctx == ring->default_context) {
> -				intel_unpin_ringbuffer_obj(ringbuf);
> -				i915_gem_object_ggtt_unpin(ctx_obj);
> -			}
> -			WARN_ON(ctx->engine[ring->id].pin_count);
> -			intel_ringbuffer_free(ringbuf);
> -			drm_gem_object_unreference(&ctx_obj->base);
> +			intel_lr_context_clean_ring(ctx,
> +						    ring,
> +						    ctx_obj,
> +						    ringbuf);
>  		}
>  	}
>  }
> @@ -2554,5 +2623,12 @@ void intel_lr_context_reset(struct drm_device *dev,
>  
>  		ringbuf->head = 0;
>  		ringbuf->tail = 0;
> +
> +		if (ctx->engine[ring->id].dirty) {
> +			__intel_lr_context_unpin(
> +					ring,
> +					ctx);
> +			ctx->engine[ring->id].dirty = false;
> +		}
>  	}
>  }
> diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
> index 4e60d54..cbc42b8 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/intel_lrc.h
> @@ -86,6 +86,7 @@ void intel_lr_context_reset(struct drm_device *dev,
>  			struct intel_context *ctx);
>  uint64_t intel_lr_context_descriptor(struct intel_context *ctx,
>  				     struct intel_engine_cs *ring);
> +void intel_lr_context_complete_check(struct drm_i915_gem_request *req);
>  
>  /* Execlists */
>  int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists);
> -- 
> 1.9.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
OK, here is my understanding. We do context pin/unpin during create/free 
request or during submit/retire request but with condition (dirty) 
check. So the context pincount will be # of requests plus 1, if it is 
dirty. Dirty means that context likely is accessed by HW, while 
not-dirty means HW is not accessing the lrc at that moment. This extra 
pincount will be held until we receive retire of request from a 
different context.

The switch to idle context and wait looks good to me too. I tested it 
out and it fixes the hang issue when GuC is enabled.

Reviewed-by: Alex Dai <yu.dai@intel.com>

Thanks,
Alex

On 11/25/2015 04:57 AM, Nick Hoath wrote:
> Use the first retired request on a new context to unpin
> the old context. This ensures that the hw context remains
> bound until it has been written back to by the GPU.
> Now that the context is pinned until later in the request/context
> lifecycle, it no longer needs to be pinned from context_queue to
> retire_requests.
>
> v2: Moved the new pin to cover GuC submission (Alex Dai)
>      Moved the new unpin to request_retire to fix coverage leak
> v3: Added switch to default context if freeing a still pinned
>      context just in case the hw was actually still using it
> v4: Unwrapped context unpin to allow calling without a request
> v5: Only create a switch to idle context if the ring doesn't
>      already have a request pending on it (Alex Dai)
>      Rename unsaved to dirty to avoid double negatives (Dave Gordon)
>      Changed _no_req postfix to __ prefix for consistency (Dave Gordon)
>      Split out per engine cleanup from context_free as it
>      was getting unwieldy
>      Corrected locking (Dave Gordon)
>
> Signed-off-by: Nick Hoath <nicholas.hoath@intel.com>
> Issue: VIZ-4277
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> Cc: David Gordon <david.s.gordon@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Alex Dai <yu.dai@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_drv.h  |   1 +
>   drivers/gpu/drm/i915/i915_gem.c  |   3 +
>   drivers/gpu/drm/i915/intel_lrc.c | 124 +++++++++++++++++++++++++++++++--------
>   drivers/gpu/drm/i915/intel_lrc.h |   1 +
>   4 files changed, 105 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index d5cf30b..e82717a 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -889,6 +889,7 @@ struct intel_context {
>   	struct {
>   		struct drm_i915_gem_object *state;
>   		struct intel_ringbuffer *ringbuf;
> +		bool dirty;
>   		int pin_count;
>   	} engine[I915_NUM_RINGS];
>   
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index e955499..3829bc1 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1354,6 +1354,9 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
>   {
>   	trace_i915_gem_request_retire(request);
>   
> +	if (i915.enable_execlists)
> +		intel_lr_context_complete_check(request);
> +
>   	/* We know the GPU must have read the request to have
>   	 * sent us the seqno + interrupt, so use the position
>   	 * of tail of the request to update the last known position
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 06180dc..03d5bca 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -566,9 +566,6 @@ static int execlists_context_queue(struct drm_i915_gem_request *request)
>   	struct drm_i915_gem_request *cursor;
>   	int num_elements = 0;
>   
> -	if (request->ctx != ring->default_context)
> -		intel_lr_context_pin(request);
> -
>   	i915_gem_request_reference(request);
>   
>   	spin_lock_irq(&ring->execlist_lock);
> @@ -732,6 +729,13 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
>   	if (intel_ring_stopped(ring))
>   		return;
>   
> +	if (request->ctx != ring->default_context) {
> +		if (!request->ctx->engine[ring->id].dirty) {
> +			intel_lr_context_pin(request);
> +			request->ctx->engine[ring->id].dirty = true;
> +		}
> +	}
> +
>   	if (dev_priv->guc.execbuf_client)
>   		i915_guc_submit(dev_priv->guc.execbuf_client, request);
>   	else
> @@ -958,12 +962,6 @@ void intel_execlists_retire_requests(struct intel_engine_cs *ring)
>   	spin_unlock_irq(&ring->execlist_lock);
>   
>   	list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
> -		struct intel_context *ctx = req->ctx;
> -		struct drm_i915_gem_object *ctx_obj =
> -				ctx->engine[ring->id].state;
> -
> -		if (ctx_obj && (ctx != ring->default_context))
> -			intel_lr_context_unpin(req);
>   		list_del(&req->execlist_link);
>   		i915_gem_request_unreference(req);
>   	}
> @@ -1058,21 +1056,39 @@ reset_pin_count:
>   	return ret;
>   }
>   
> -void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
> +static void __intel_lr_context_unpin(struct intel_engine_cs *ring,
> +		struct intel_context *ctx)
>   {
> -	struct intel_engine_cs *ring = rq->ring;
> -	struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
> -	struct intel_ringbuffer *ringbuf = rq->ringbuf;
> -
> +	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
> +	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
>   	if (ctx_obj) {
>   		WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
> -		if (--rq->ctx->engine[ring->id].pin_count == 0) {
> +		if (--ctx->engine[ring->id].pin_count == 0) {
>   			intel_unpin_ringbuffer_obj(ringbuf);
>   			i915_gem_object_ggtt_unpin(ctx_obj);
>   		}
>   	}
>   }
>   
> +void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
> +{
> +	__intel_lr_context_unpin(rq->ring, rq->ctx);
> +}
> +
> +void intel_lr_context_complete_check(struct drm_i915_gem_request *req)
> +{
> +	struct intel_engine_cs *ring = req->ring;
> +
> +	if (ring->last_context && ring->last_context != req->ctx &&
> +			ring->last_context->engine[ring->id].dirty) {
> +		__intel_lr_context_unpin(
> +				ring,
> +				ring->last_context);
> +		ring->last_context->engine[ring->id].dirty = false;
> +	}
> +	ring->last_context = req->ctx;
> +}
> +
>   static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req)
>   {
>   	int ret, i;
> @@ -2367,6 +2383,62 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o
>   }
>   
>   /**
> + * intel_lr_context_clean_ring() - clean the ring specific parts of an LRC
> + * @ctx: the LR context being freed.
> + * @ring: the engine being cleaned
> + * @ctx_obj: the hw context being unreferenced
> + * @ringbuf: the ringbuf being freed
> + *
> + * Take care of cleaning up the per-engine backing
> + * objects and the logical ringbuffer.
> + */
> +static void
> +intel_lr_context_clean_ring(struct intel_context *ctx,
> +			    struct intel_engine_cs *ring,
> +			    struct drm_i915_gem_object *ctx_obj,
> +			    struct intel_ringbuffer *ringbuf)
> +{
> +	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
> +
> +	if (ctx == ring->default_context) {
> +		intel_unpin_ringbuffer_obj(ringbuf);
> +		i915_gem_object_ggtt_unpin(ctx_obj);
> +	}
> +
> +	if (ctx->engine[ring->id].dirty) {
> +		struct drm_i915_gem_request *req = NULL;
> +
> +		/**
> +		 * If there is already a request pending on
> +		 * this ring, wait for that to complete,
> +		 * otherwise create a switch to idle request
> +		 */
> +		if (list_empty(&ring->request_list)) {
> +			int ret;
> +
> +			ret = i915_gem_request_alloc(
> +					ring,
> +					ring->default_context,
> +					&req);
> +			if (!ret)
> +				i915_add_request(req);
> +			else
> +				DRM_DEBUG("Failed to ensure context saved");
> +		} else {
> +			req = list_first_entry(
> +					&ring->request_list,
> +					typeof(*req), list);
> +		}
> +		if (req)
> +			i915_wait_request(req);
> +	}
> +
> +	WARN_ON(ctx->engine[ring->id].pin_count);
> +	intel_ringbuffer_free(ringbuf);
> +	drm_gem_object_unreference(&ctx_obj->base);
> +}
> +
> +/**
>    * intel_lr_context_free() - free the LRC specific bits of a context
>    * @ctx: the LR context to free.
>    *
> @@ -2378,21 +2450,18 @@ void intel_lr_context_free(struct intel_context *ctx)
>   {
>   	int i;
>   
> -	for (i = 0; i < I915_NUM_RINGS; i++) {
> +	for (i = 0; i < I915_NUM_RINGS; ++i) {
>   		struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state;
>   
> -		if (ctx_obj) {
> +		if (ctx->engine[i].state) {
>   			struct intel_ringbuffer *ringbuf =
>   					ctx->engine[i].ringbuf;
>   			struct intel_engine_cs *ring = ringbuf->ring;
>   
> -			if (ctx == ring->default_context) {
> -				intel_unpin_ringbuffer_obj(ringbuf);
> -				i915_gem_object_ggtt_unpin(ctx_obj);
> -			}
> -			WARN_ON(ctx->engine[ring->id].pin_count);
> -			intel_ringbuffer_free(ringbuf);
> -			drm_gem_object_unreference(&ctx_obj->base);
> +			intel_lr_context_clean_ring(ctx,
> +						    ring,
> +						    ctx_obj,
> +						    ringbuf);
>   		}
>   	}
>   }
> @@ -2554,5 +2623,12 @@ void intel_lr_context_reset(struct drm_device *dev,
>   
>   		ringbuf->head = 0;
>   		ringbuf->tail = 0;
> +
> +		if (ctx->engine[ring->id].dirty) {
> +			__intel_lr_context_unpin(
> +					ring,
> +					ctx);
> +			ctx->engine[ring->id].dirty = false;
> +		}
>   	}
>   }
> diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
> index 4e60d54..cbc42b8 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/intel_lrc.h
> @@ -86,6 +86,7 @@ void intel_lr_context_reset(struct drm_device *dev,
>   			struct intel_context *ctx);
>   uint64_t intel_lr_context_descriptor(struct intel_context *ctx,
>   				     struct intel_engine_cs *ring);
> +void intel_lr_context_complete_check(struct drm_i915_gem_request *req);
>   
>   /* Execlists */
>   int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists);
On 11/25/2015 07:02 AM, Mika Kuoppala wrote:
> Nick Hoath <nicholas.hoath@intel.com> writes:
>
> > Use the first retired request on a new context to unpin
> > the old context. This ensures that the hw context remains
> > bound until it has been written back to by the GPU.
> > Now that the context is pinned until later in the request/context
> > lifecycle, it no longer needs to be pinned from context_queue to
> > retire_requests.
> >
> > v2: Moved the new pin to cover GuC submission (Alex Dai)
> >     Moved the new unpin to request_retire to fix coverage leak
> > v3: Added switch to default context if freeing a still pinned
> >     context just in case the hw was actually still using it
> > v4: Unwrapped context unpin to allow calling without a request
> > v5: Only create a switch to idle context if the ring doesn't
> >     already have a request pending on it (Alex Dai)
> >     Rename unsaved to dirty to avoid double negatives (Dave Gordon)
> >     Changed _no_req postfix to __ prefix for consistency (Dave Gordon)
> >     Split out per engine cleanup from context_free as it
> >     was getting unwieldy
> >     Corrected locking (Dave Gordon)
> >
> > Signed-off-by: Nick Hoath <nicholas.hoath@intel.com>
> > Issue: VIZ-4277
> > Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> > Cc: David Gordon <david.s.gordon@intel.com>
> > Cc: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Alex Dai <yu.dai@intel.com>
> > ---
> >  drivers/gpu/drm/i915/i915_drv.h  |   1 +
> >  drivers/gpu/drm/i915/i915_gem.c  |   3 +
> >  drivers/gpu/drm/i915/intel_lrc.c | 124 +++++++++++++++++++++++++++++++--------
> >  drivers/gpu/drm/i915/intel_lrc.h |   1 +
> >  4 files changed, 105 insertions(+), 24 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index d5cf30b..e82717a 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -889,6 +889,7 @@ struct intel_context {
> >  	struct {
> >  		struct drm_i915_gem_object *state;
> >  		struct intel_ringbuffer *ringbuf;
> > +		bool dirty;
> >  		int pin_count;
> >  	} engine[I915_NUM_RINGS];
> >
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index e955499..3829bc1 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -1354,6 +1354,9 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
> >  {
> >  	trace_i915_gem_request_retire(request);
> >
> > +	if (i915.enable_execlists)
> > +		intel_lr_context_complete_check(request);
> > +
> >  	/* We know the GPU must have read the request to have
> >  	 * sent us the seqno + interrupt, so use the position
> >  	 * of tail of the request to update the last known position
> > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> > index 06180dc..03d5bca 100644
> > --- a/drivers/gpu/drm/i915/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/intel_lrc.c
> > @@ -566,9 +566,6 @@ static int execlists_context_queue(struct drm_i915_gem_request *request)
> >  	struct drm_i915_gem_request *cursor;
> >  	int num_elements = 0;
> >
> > -	if (request->ctx != ring->default_context)
> > -		intel_lr_context_pin(request);
> > -
> >  	i915_gem_request_reference(request);
> >
> >  	spin_lock_irq(&ring->execlist_lock);
> > @@ -732,6 +729,13 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
> >  	if (intel_ring_stopped(ring))
> >  		return;
> >
> > +	if (request->ctx != ring->default_context) {
> > +		if (!request->ctx->engine[ring->id].dirty) {
> > +			intel_lr_context_pin(request);
> > +			request->ctx->engine[ring->id].dirty = true;
> > +		}
> > +	}
> > +
> >  	if (dev_priv->guc.execbuf_client)
> >  		i915_guc_submit(dev_priv->guc.execbuf_client, request);
> >  	else
> > @@ -958,12 +962,6 @@ void intel_execlists_retire_requests(struct intel_engine_cs *ring)
> >  	spin_unlock_irq(&ring->execlist_lock);
> >
> >  	list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
> > -		struct intel_context *ctx = req->ctx;
> > -		struct drm_i915_gem_object *ctx_obj =
> > -				ctx->engine[ring->id].state;
> > -
> > -		if (ctx_obj && (ctx != ring->default_context))
> > -			intel_lr_context_unpin(req);
> >  		list_del(&req->execlist_link);
> >  		i915_gem_request_unreference(req);
> >  	}
> > @@ -1058,21 +1056,39 @@ reset_pin_count:
> >  	return ret;
> >  }
> >
> > -void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
> > +static void __intel_lr_context_unpin(struct intel_engine_cs *ring,
> > +		struct intel_context *ctx)
> >  {
> > -	struct intel_engine_cs *ring = rq->ring;
> > -	struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
> > -	struct intel_ringbuffer *ringbuf = rq->ringbuf;
> > -
> > +	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
> > +	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
> >  	if (ctx_obj) {
> >  		WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
> > -		if (--rq->ctx->engine[ring->id].pin_count == 0) {
> > +		if (--ctx->engine[ring->id].pin_count == 0) {
> >  			intel_unpin_ringbuffer_obj(ringbuf);
> >  			i915_gem_object_ggtt_unpin(ctx_obj);
> >  		}
> >  	}
> >  }
> >
> > +void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
> > +{
> > +	__intel_lr_context_unpin(rq->ring, rq->ctx);
> > +}
> > +
> > +void intel_lr_context_complete_check(struct drm_i915_gem_request *req)
> > +{
> > +	struct intel_engine_cs *ring = req->ring;
> > +
> > +	if (ring->last_context && ring->last_context != req->ctx &&
> > +			ring->last_context->engine[ring->id].dirty) {
> > +		__intel_lr_context_unpin(
> > +				ring,
> > +				ring->last_context);
> > +		ring->last_context->engine[ring->id].dirty = false;
> > +	}
> > +	ring->last_context = req->ctx;
>
> What ensures that the last_context stays alive?

The wait in intel_lr_context_clean_ring. If the last_context is the one 
to be released, it will wait for retiring of a request from different 
context. If no request in queue, we will switch to default_context. So, 
the ring->last_context could be ring->default_context. However, the 
'dirty' bit of default_context is never set. Therefore, it won't be 
unpin here anyway.

However, this does make me think about one thing. It is in 
i915_gem_context_fini(), where we unref ring->last_context. We probably 
need to make sure the default context is not unref twice.

Thanks,
Alex
> > +}
> > +
> >  static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req)
> >  {
> >  	int ret, i;
> > @@ -2367,6 +2383,62 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o
> >  }
> >
> >  /**
> > + * intel_lr_context_clean_ring() - clean the ring specific parts of an LRC
> > + * @ctx: the LR context being freed.
> > + * @ring: the engine being cleaned
> > + * @ctx_obj: the hw context being unreferenced
> > + * @ringbuf: the ringbuf being freed
> > + *
> > + * Take care of cleaning up the per-engine backing
> > + * objects and the logical ringbuffer.
> > + */
> > +static void
> > +intel_lr_context_clean_ring(struct intel_context *ctx,
> > +			    struct intel_engine_cs *ring,
> > +			    struct drm_i915_gem_object *ctx_obj,
> > +			    struct intel_ringbuffer *ringbuf)
> > +{
> > +	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
> > +
> > +	if (ctx == ring->default_context) {
> > +		intel_unpin_ringbuffer_obj(ringbuf);
> > +		i915_gem_object_ggtt_unpin(ctx_obj);
> > +	}
> > +
> > +	if (ctx->engine[ring->id].dirty) {
> > +		struct drm_i915_gem_request *req = NULL;
> > +
> > +		/**
> > +		 * If there is already a request pending on
> > +		 * this ring, wait for that to complete,
> > +		 * otherwise create a switch to idle request
> > +		 */
> > +		if (list_empty(&ring->request_list)) {
> > +			int ret;
> > +
> > +			ret = i915_gem_request_alloc(
> > +					ring,
> > +					ring->default_context,
> > +					&req);
> > +			if (!ret)
> > +				i915_add_request(req);
> > +			else
> > +				DRM_DEBUG("Failed to ensure context saved");
> > +		} else {
> > +			req = list_first_entry(
> > +					&ring->request_list,
> > +					typeof(*req), list);
> > +		}
> > +		if (req)
> > +			i915_wait_request(req);
> > +	}
> > +
> > +	WARN_ON(ctx->engine[ring->id].pin_count);
> > +	intel_ringbuffer_free(ringbuf);
> > +	drm_gem_object_unreference(&ctx_obj->base);
> > +}
> > +
> > +/**
> >   * intel_lr_context_free() - free the LRC specific bits of a context
> >   * @ctx: the LR context to free.
> >   *
> > @@ -2378,21 +2450,18 @@ void intel_lr_context_free(struct intel_context *ctx)
> >  {
> >  	int i;
> >
> > -	for (i = 0; i < I915_NUM_RINGS; i++) {
> > +	for (i = 0; i < I915_NUM_RINGS; ++i) {
> >  		struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state;
> >
> > -		if (ctx_obj) {
> > +		if (ctx->engine[i].state) {
>
> ++i and the above are superflouous?
>
> -Mika
>
> >  			struct intel_ringbuffer *ringbuf =
> >  					ctx->engine[i].ringbuf;
> >  			struct intel_engine_cs *ring = ringbuf->ring;
> >
> > -			if (ctx == ring->default_context) {
> > -				intel_unpin_ringbuffer_obj(ringbuf);
> > -				i915_gem_object_ggtt_unpin(ctx_obj);
> > -			}
> > -			WARN_ON(ctx->engine[ring->id].pin_count);
> > -			intel_ringbuffer_free(ringbuf);
> > -			drm_gem_object_unreference(&ctx_obj->base);
> > +			intel_lr_context_clean_ring(ctx,
> > +						    ring,
> > +						    ctx_obj,
> > +						    ringbuf);
> >  		}
> >  	}
> >  }
> > @@ -2554,5 +2623,12 @@ void intel_lr_context_reset(struct drm_device *dev,
> >
> >  		ringbuf->head = 0;
> >  		ringbuf->tail = 0;
> > +
> > +		if (ctx->engine[ring->id].dirty) {
> > +			__intel_lr_context_unpin(
> > +					ring,
> > +					ctx);
> > +			ctx->engine[ring->id].dirty = false;
> > +		}
> >  	}
> >  }
> > diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
> > index 4e60d54..cbc42b8 100644
> > --- a/drivers/gpu/drm/i915/intel_lrc.h
> > +++ b/drivers/gpu/drm/i915/intel_lrc.h
> > @@ -86,6 +86,7 @@ void intel_lr_context_reset(struct drm_device *dev,
> >  			struct intel_context *ctx);
> >  uint64_t intel_lr_context_descriptor(struct intel_context *ctx,
> >  				     struct intel_engine_cs *ring);
> > +void intel_lr_context_complete_check(struct drm_i915_gem_request *req);
> >
> >  /* Execlists */
> >  int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists);
> > --
> > 1.9.1
> >
> > _______________________________________________
> > Intel-gfx mailing list
> > Intel-gfx@lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
On Wed, Nov 25, 2015 at 05:02:44PM +0200, Mika Kuoppala wrote:
> Nick Hoath <nicholas.hoath@intel.com> writes:
> 
> > Use the first retired request on a new context to unpin
> > the old context. This ensures that the hw context remains
> > bound until it has been written back to by the GPU.
> > Now that the context is pinned until later in the request/context
> > lifecycle, it no longer needs to be pinned from context_queue to
> > retire_requests.
> >
> > v2: Moved the new pin to cover GuC submission (Alex Dai)
> >     Moved the new unpin to request_retire to fix coverage leak
> > v3: Added switch to default context if freeing a still pinned
> >     context just in case the hw was actually still using it
> > v4: Unwrapped context unpin to allow calling without a request
> > v5: Only create a switch to idle context if the ring doesn't
> >     already have a request pending on it (Alex Dai)
> >     Rename unsaved to dirty to avoid double negatives (Dave Gordon)
> >     Changed _no_req postfix to __ prefix for consistency (Dave Gordon)
> >     Split out per engine cleanup from context_free as it
> >     was getting unwieldy
> >     Corrected locking (Dave Gordon)
> >
> > Signed-off-by: Nick Hoath <nicholas.hoath@intel.com>
> > Issue: VIZ-4277
> > Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> > Cc: David Gordon <david.s.gordon@intel.com>
> > Cc: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Alex Dai <yu.dai@intel.com>
> > ---
> >  drivers/gpu/drm/i915/i915_drv.h  |   1 +
> >  drivers/gpu/drm/i915/i915_gem.c  |   3 +
> >  drivers/gpu/drm/i915/intel_lrc.c | 124 +++++++++++++++++++++++++++++++--------
> >  drivers/gpu/drm/i915/intel_lrc.h |   1 +
> >  4 files changed, 105 insertions(+), 24 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index d5cf30b..e82717a 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -889,6 +889,7 @@ struct intel_context {
> >  	struct {
> >  		struct drm_i915_gem_object *state;
> >  		struct intel_ringbuffer *ringbuf;
> > +		bool dirty;
> >  		int pin_count;
> >  	} engine[I915_NUM_RINGS];
> >  
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index e955499..3829bc1 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -1354,6 +1354,9 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
> >  {
> >  	trace_i915_gem_request_retire(request);
> >  
> > +	if (i915.enable_execlists)
> > +		intel_lr_context_complete_check(request);
> > +
> >  	/* We know the GPU must have read the request to have
> >  	 * sent us the seqno + interrupt, so use the position
> >  	 * of tail of the request to update the last known position
> > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> > index 06180dc..03d5bca 100644
> > --- a/drivers/gpu/drm/i915/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/intel_lrc.c
> > @@ -566,9 +566,6 @@ static int execlists_context_queue(struct drm_i915_gem_request *request)
> >  	struct drm_i915_gem_request *cursor;
> >  	int num_elements = 0;
> >  
> > -	if (request->ctx != ring->default_context)
> > -		intel_lr_context_pin(request);
> > -
> >  	i915_gem_request_reference(request);
> >  
> >  	spin_lock_irq(&ring->execlist_lock);
> > @@ -732,6 +729,13 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
> >  	if (intel_ring_stopped(ring))
> >  		return;
> >  
> > +	if (request->ctx != ring->default_context) {
> > +		if (!request->ctx->engine[ring->id].dirty) {
> > +			intel_lr_context_pin(request);
> > +			request->ctx->engine[ring->id].dirty = true;
> > +		}
> > +	}
> > +
> >  	if (dev_priv->guc.execbuf_client)
> >  		i915_guc_submit(dev_priv->guc.execbuf_client, request);
> >  	else
> > @@ -958,12 +962,6 @@ void intel_execlists_retire_requests(struct intel_engine_cs *ring)
> >  	spin_unlock_irq(&ring->execlist_lock);
> >  
> >  	list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
> > -		struct intel_context *ctx = req->ctx;
> > -		struct drm_i915_gem_object *ctx_obj =
> > -				ctx->engine[ring->id].state;
> > -
> > -		if (ctx_obj && (ctx != ring->default_context))
> > -			intel_lr_context_unpin(req);
> >  		list_del(&req->execlist_link);
> >  		i915_gem_request_unreference(req);
> >  	}
> > @@ -1058,21 +1056,39 @@ reset_pin_count:
> >  	return ret;
> >  }
> >  
> > -void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
> > +static void __intel_lr_context_unpin(struct intel_engine_cs *ring,
> > +		struct intel_context *ctx)
> >  {
> > -	struct intel_engine_cs *ring = rq->ring;
> > -	struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
> > -	struct intel_ringbuffer *ringbuf = rq->ringbuf;
> > -
> > +	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
> > +	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
> >  	if (ctx_obj) {
> >  		WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
> > -		if (--rq->ctx->engine[ring->id].pin_count == 0) {
> > +		if (--ctx->engine[ring->id].pin_count == 0) {
> >  			intel_unpin_ringbuffer_obj(ringbuf);
> >  			i915_gem_object_ggtt_unpin(ctx_obj);
> >  		}
> >  	}
> >  }
> >  
> > +void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
> > +{
> > +	__intel_lr_context_unpin(rq->ring, rq->ctx);
> > +}
> > +
> > +void intel_lr_context_complete_check(struct drm_i915_gem_request *req)
> > +{
> > +	struct intel_engine_cs *ring = req->ring;
> > +
> > +	if (ring->last_context && ring->last_context != req->ctx &&
> > +			ring->last_context->engine[ring->id].dirty) {
> > +		__intel_lr_context_unpin(
> > +				ring,
> > +				ring->last_context);
> > +		ring->last_context->engine[ring->id].dirty = false;
> > +	}
> > +	ring->last_context = req->ctx;
> 
> What ensures that the last_context stays alive?

The other part that's missing compared to the legacy context cycling logic
is move_to_active. Without that the shrinker can't eat into retired
contexts, which renders this exercise fairly moot imo.

The overall idea is that since i915 does dynamic memory management, and
that infects everything in the code that deals with gpu objects, we need
to make sure that everyone is using the same logicy to cycle through
active objects. Otherwise the shrinker will be an unmaintainable hydra
with per-feature special cases.
-Daniel

> 
> > +}
> > +
> >  static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req)
> >  {
> >  	int ret, i;
> > @@ -2367,6 +2383,62 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o
> >  }
> >  
> >  /**
> > + * intel_lr_context_clean_ring() - clean the ring specific parts of an LRC
> > + * @ctx: the LR context being freed.
> > + * @ring: the engine being cleaned
> > + * @ctx_obj: the hw context being unreferenced
> > + * @ringbuf: the ringbuf being freed
> > + *
> > + * Take care of cleaning up the per-engine backing
> > + * objects and the logical ringbuffer.
> > + */
> > +static void
> > +intel_lr_context_clean_ring(struct intel_context *ctx,
> > +			    struct intel_engine_cs *ring,
> > +			    struct drm_i915_gem_object *ctx_obj,
> > +			    struct intel_ringbuffer *ringbuf)
> > +{
> > +	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
> > +
> > +	if (ctx == ring->default_context) {
> > +		intel_unpin_ringbuffer_obj(ringbuf);
> > +		i915_gem_object_ggtt_unpin(ctx_obj);
> > +	}
> > +
> > +	if (ctx->engine[ring->id].dirty) {
> > +		struct drm_i915_gem_request *req = NULL;
> > +
> > +		/**
> > +		 * If there is already a request pending on
> > +		 * this ring, wait for that to complete,
> > +		 * otherwise create a switch to idle request
> > +		 */
> > +		if (list_empty(&ring->request_list)) {
> > +			int ret;
> > +
> > +			ret = i915_gem_request_alloc(
> > +					ring,
> > +					ring->default_context,
> > +					&req);
> > +			if (!ret)
> > +				i915_add_request(req);
> > +			else
> > +				DRM_DEBUG("Failed to ensure context saved");
> > +		} else {
> > +			req = list_first_entry(
> > +					&ring->request_list,
> > +					typeof(*req), list);
> > +		}
> > +		if (req)
> > +			i915_wait_request(req);
> > +	}
> > +
> > +	WARN_ON(ctx->engine[ring->id].pin_count);
> > +	intel_ringbuffer_free(ringbuf);
> > +	drm_gem_object_unreference(&ctx_obj->base);
> > +}
> > +
> > +/**
> >   * intel_lr_context_free() - free the LRC specific bits of a context
> >   * @ctx: the LR context to free.
> >   *
> > @@ -2378,21 +2450,18 @@ void intel_lr_context_free(struct intel_context *ctx)
> >  {
> >  	int i;
> >  
> > -	for (i = 0; i < I915_NUM_RINGS; i++) {
> > +	for (i = 0; i < I915_NUM_RINGS; ++i) {
> >  		struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state;
> >  
> > -		if (ctx_obj) {
> > +		if (ctx->engine[i].state) {
> 
> ++i and the above are superflouous?
> 
> -Mika
> 
> >  			struct intel_ringbuffer *ringbuf =
> >  					ctx->engine[i].ringbuf;
> >  			struct intel_engine_cs *ring = ringbuf->ring;
> >  
> > -			if (ctx == ring->default_context) {
> > -				intel_unpin_ringbuffer_obj(ringbuf);
> > -				i915_gem_object_ggtt_unpin(ctx_obj);
> > -			}
> > -			WARN_ON(ctx->engine[ring->id].pin_count);
> > -			intel_ringbuffer_free(ringbuf);
> > -			drm_gem_object_unreference(&ctx_obj->base);
> > +			intel_lr_context_clean_ring(ctx,
> > +						    ring,
> > +						    ctx_obj,
> > +						    ringbuf);
> >  		}
> >  	}
> >  }
> > @@ -2554,5 +2623,12 @@ void intel_lr_context_reset(struct drm_device *dev,
> >  
> >  		ringbuf->head = 0;
> >  		ringbuf->tail = 0;
> > +
> > +		if (ctx->engine[ring->id].dirty) {
> > +			__intel_lr_context_unpin(
> > +					ring,
> > +					ctx);
> > +			ctx->engine[ring->id].dirty = false;
> > +		}
> >  	}
> >  }
> > diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
> > index 4e60d54..cbc42b8 100644
> > --- a/drivers/gpu/drm/i915/intel_lrc.h
> > +++ b/drivers/gpu/drm/i915/intel_lrc.h
> > @@ -86,6 +86,7 @@ void intel_lr_context_reset(struct drm_device *dev,
> >  			struct intel_context *ctx);
> >  uint64_t intel_lr_context_descriptor(struct intel_context *ctx,
> >  				     struct intel_engine_cs *ring);
> > +void intel_lr_context_complete_check(struct drm_i915_gem_request *req);
> >  
> >  /* Execlists */
> >  int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists);
> > -- 
> > 1.9.1
> >
> > _______________________________________________
> > Intel-gfx mailing list
> > Intel-gfx@lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
On 26/11/2015 08:48, Daniel Vetter wrote:
> On Wed, Nov 25, 2015 at 05:02:44PM +0200, Mika Kuoppala wrote:
>> Nick Hoath <nicholas.hoath@intel.com> writes:
>>
>>> Use the first retired request on a new context to unpin
>>> the old context. This ensures that the hw context remains
>>> bound until it has been written back to by the GPU.
>>> Now that the context is pinned until later in the request/context
>>> lifecycle, it no longer needs to be pinned from context_queue to
>>> retire_requests.
>>>
>>> v2: Moved the new pin to cover GuC submission (Alex Dai)
>>>      Moved the new unpin to request_retire to fix coverage leak
>>> v3: Added switch to default context if freeing a still pinned
>>>      context just in case the hw was actually still using it
>>> v4: Unwrapped context unpin to allow calling without a request
>>> v5: Only create a switch to idle context if the ring doesn't
>>>      already have a request pending on it (Alex Dai)
>>>      Rename unsaved to dirty to avoid double negatives (Dave Gordon)
>>>      Changed _no_req postfix to __ prefix for consistency (Dave Gordon)
>>>      Split out per engine cleanup from context_free as it
>>>      was getting unwieldy
>>>      Corrected locking (Dave Gordon)
>>>
>>> Signed-off-by: Nick Hoath <nicholas.hoath@intel.com>
>>> Issue: VIZ-4277
>>> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
>>> Cc: David Gordon <david.s.gordon@intel.com>
>>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Alex Dai <yu.dai@intel.com>
>>> ---
>>>   drivers/gpu/drm/i915/i915_drv.h  |   1 +
>>>   drivers/gpu/drm/i915/i915_gem.c  |   3 +
>>>   drivers/gpu/drm/i915/intel_lrc.c | 124 +++++++++++++++++++++++++++++++--------
>>>   drivers/gpu/drm/i915/intel_lrc.h |   1 +
>>>   4 files changed, 105 insertions(+), 24 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>>> index d5cf30b..e82717a 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>> @@ -889,6 +889,7 @@ struct intel_context {
>>>   	struct {
>>>   		struct drm_i915_gem_object *state;
>>>   		struct intel_ringbuffer *ringbuf;
>>> +		bool dirty;
>>>   		int pin_count;
>>>   	} engine[I915_NUM_RINGS];
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
>>> index e955499..3829bc1 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>>> @@ -1354,6 +1354,9 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
>>>   {
>>>   	trace_i915_gem_request_retire(request);
>>>
>>> +	if (i915.enable_execlists)
>>> +		intel_lr_context_complete_check(request);
>>> +
>>>   	/* We know the GPU must have read the request to have
>>>   	 * sent us the seqno + interrupt, so use the position
>>>   	 * of tail of the request to update the last known position
>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>>> index 06180dc..03d5bca 100644
>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>> @@ -566,9 +566,6 @@ static int execlists_context_queue(struct drm_i915_gem_request *request)
>>>   	struct drm_i915_gem_request *cursor;
>>>   	int num_elements = 0;
>>>
>>> -	if (request->ctx != ring->default_context)
>>> -		intel_lr_context_pin(request);
>>> -
>>>   	i915_gem_request_reference(request);
>>>
>>>   	spin_lock_irq(&ring->execlist_lock);
>>> @@ -732,6 +729,13 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
>>>   	if (intel_ring_stopped(ring))
>>>   		return;
>>>
>>> +	if (request->ctx != ring->default_context) {
>>> +		if (!request->ctx->engine[ring->id].dirty) {
>>> +			intel_lr_context_pin(request);
>>> +			request->ctx->engine[ring->id].dirty = true;
>>> +		}
>>> +	}
>>> +
>>>   	if (dev_priv->guc.execbuf_client)
>>>   		i915_guc_submit(dev_priv->guc.execbuf_client, request);
>>>   	else
>>> @@ -958,12 +962,6 @@ void intel_execlists_retire_requests(struct intel_engine_cs *ring)
>>>   	spin_unlock_irq(&ring->execlist_lock);
>>>
>>>   	list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
>>> -		struct intel_context *ctx = req->ctx;
>>> -		struct drm_i915_gem_object *ctx_obj =
>>> -				ctx->engine[ring->id].state;
>>> -
>>> -		if (ctx_obj && (ctx != ring->default_context))
>>> -			intel_lr_context_unpin(req);
>>>   		list_del(&req->execlist_link);
>>>   		i915_gem_request_unreference(req);
>>>   	}
>>> @@ -1058,21 +1056,39 @@ reset_pin_count:
>>>   	return ret;
>>>   }
>>>
>>> -void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
>>> +static void __intel_lr_context_unpin(struct intel_engine_cs *ring,
>>> +		struct intel_context *ctx)
>>>   {
>>> -	struct intel_engine_cs *ring = rq->ring;
>>> -	struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
>>> -	struct intel_ringbuffer *ringbuf = rq->ringbuf;
>>> -
>>> +	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
>>> +	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
>>>   	if (ctx_obj) {
>>>   		WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
>>> -		if (--rq->ctx->engine[ring->id].pin_count == 0) {
>>> +		if (--ctx->engine[ring->id].pin_count == 0) {
>>>   			intel_unpin_ringbuffer_obj(ringbuf);
>>>   			i915_gem_object_ggtt_unpin(ctx_obj);
>>>   		}
>>>   	}
>>>   }
>>>
>>> +void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
>>> +{
>>> +	__intel_lr_context_unpin(rq->ring, rq->ctx);
>>> +}
>>> +
>>> +void intel_lr_context_complete_check(struct drm_i915_gem_request *req)
>>> +{
>>> +	struct intel_engine_cs *ring = req->ring;
>>> +
>>> +	if (ring->last_context && ring->last_context != req->ctx &&
>>> +			ring->last_context->engine[ring->id].dirty) {
>>> +		__intel_lr_context_unpin(
>>> +				ring,
>>> +				ring->last_context);
>>> +		ring->last_context->engine[ring->id].dirty = false;
>>> +	}
>>> +	ring->last_context = req->ctx;
>>
>> What ensures that the last_context stays alive?
>
> The other part that's missing compared to the legacy context cycling logic
> is move_to_active. Without that the shrinker can't eat into retired
> contexts, which renders this exercise fairly moot imo.
>
> The overall idea is that since i915 does dynamic memory management, and
> that infects everything in the code that deals with gpu objects, we need
> to make sure that everyone is using the same logicy to cycle through
> active objects. Otherwise the shrinker will be an unmaintainable hydra
> with per-feature special cases.
> -Daniel

This is the first part of the VIZ-4277 fix changeset that I'm working 
on. It has been 'fasttracked' as it is needed to fix a lockup with GuC 
submission. If you like, I can update the commit message to explain that?

>
>>
>>> +}
>>> +
>>>   static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req)
>>>   {
>>>   	int ret, i;
>>> @@ -2367,6 +2383,62 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o
>>>   }
>>>
>>>   /**
>>> + * intel_lr_context_clean_ring() - clean the ring specific parts of an LRC
>>> + * @ctx: the LR context being freed.
>>> + * @ring: the engine being cleaned
>>> + * @ctx_obj: the hw context being unreferenced
>>> + * @ringbuf: the ringbuf being freed
>>> + *
>>> + * Take care of cleaning up the per-engine backing
>>> + * objects and the logical ringbuffer.
>>> + */
>>> +static void
>>> +intel_lr_context_clean_ring(struct intel_context *ctx,
>>> +			    struct intel_engine_cs *ring,
>>> +			    struct drm_i915_gem_object *ctx_obj,
>>> +			    struct intel_ringbuffer *ringbuf)
>>> +{
>>> +	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
>>> +
>>> +	if (ctx == ring->default_context) {
>>> +		intel_unpin_ringbuffer_obj(ringbuf);
>>> +		i915_gem_object_ggtt_unpin(ctx_obj);
>>> +	}
>>> +
>>> +	if (ctx->engine[ring->id].dirty) {
>>> +		struct drm_i915_gem_request *req = NULL;
>>> +
>>> +		/**
>>> +		 * If there is already a request pending on
>>> +		 * this ring, wait for that to complete,
>>> +		 * otherwise create a switch to idle request
>>> +		 */
>>> +		if (list_empty(&ring->request_list)) {
>>> +			int ret;
>>> +
>>> +			ret = i915_gem_request_alloc(
>>> +					ring,
>>> +					ring->default_context,
>>> +					&req);
>>> +			if (!ret)
>>> +				i915_add_request(req);
>>> +			else
>>> +				DRM_DEBUG("Failed to ensure context saved");
>>> +		} else {
>>> +			req = list_first_entry(
>>> +					&ring->request_list,
>>> +					typeof(*req), list);
>>> +		}
>>> +		if (req)
>>> +			i915_wait_request(req);
>>> +	}
>>> +
>>> +	WARN_ON(ctx->engine[ring->id].pin_count);
>>> +	intel_ringbuffer_free(ringbuf);
>>> +	drm_gem_object_unreference(&ctx_obj->base);
>>> +}
>>> +
>>> +/**
>>>    * intel_lr_context_free() - free the LRC specific bits of a context
>>>    * @ctx: the LR context to free.
>>>    *
>>> @@ -2378,21 +2450,18 @@ void intel_lr_context_free(struct intel_context *ctx)
>>>   {
>>>   	int i;
>>>
>>> -	for (i = 0; i < I915_NUM_RINGS; i++) {
>>> +	for (i = 0; i < I915_NUM_RINGS; ++i) {
>>>   		struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state;
>>>
>>> -		if (ctx_obj) {
>>> +		if (ctx->engine[i].state) {
>>
>> ++i and the above are superflouous?
>>
>> -Mika
>>
>>>   			struct intel_ringbuffer *ringbuf =
>>>   					ctx->engine[i].ringbuf;
>>>   			struct intel_engine_cs *ring = ringbuf->ring;
>>>
>>> -			if (ctx == ring->default_context) {
>>> -				intel_unpin_ringbuffer_obj(ringbuf);
>>> -				i915_gem_object_ggtt_unpin(ctx_obj);
>>> -			}
>>> -			WARN_ON(ctx->engine[ring->id].pin_count);
>>> -			intel_ringbuffer_free(ringbuf);
>>> -			drm_gem_object_unreference(&ctx_obj->base);
>>> +			intel_lr_context_clean_ring(ctx,
>>> +						    ring,
>>> +						    ctx_obj,
>>> +						    ringbuf);
>>>   		}
>>>   	}
>>>   }
>>> @@ -2554,5 +2623,12 @@ void intel_lr_context_reset(struct drm_device *dev,
>>>
>>>   		ringbuf->head = 0;
>>>   		ringbuf->tail = 0;
>>> +
>>> +		if (ctx->engine[ring->id].dirty) {
>>> +			__intel_lr_context_unpin(
>>> +					ring,
>>> +					ctx);
>>> +			ctx->engine[ring->id].dirty = false;
>>> +		}
>>>   	}
>>>   }
>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
>>> index 4e60d54..cbc42b8 100644
>>> --- a/drivers/gpu/drm/i915/intel_lrc.h
>>> +++ b/drivers/gpu/drm/i915/intel_lrc.h
>>> @@ -86,6 +86,7 @@ void intel_lr_context_reset(struct drm_device *dev,
>>>   			struct intel_context *ctx);
>>>   uint64_t intel_lr_context_descriptor(struct intel_context *ctx,
>>>   				     struct intel_engine_cs *ring);
>>> +void intel_lr_context_complete_check(struct drm_i915_gem_request *req);
>>>
>>>   /* Execlists */
>>>   int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists);
>>> --
>>> 1.9.1
>>>
>>> _______________________________________________
>>> Intel-gfx mailing list
>>> Intel-gfx@lists.freedesktop.org
>>> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>
On Thu, Nov 26, 2015 at 09:19:44AM +0000, Nick Hoath wrote:
> On 26/11/2015 08:48, Daniel Vetter wrote:
> >On Wed, Nov 25, 2015 at 05:02:44PM +0200, Mika Kuoppala wrote:
> >>Nick Hoath <nicholas.hoath@intel.com> writes:
> >>
> >>>Use the first retired request on a new context to unpin
> >>>the old context. This ensures that the hw context remains
> >>>bound until it has been written back to by the GPU.
> >>>Now that the context is pinned until later in the request/context
> >>>lifecycle, it no longer needs to be pinned from context_queue to
> >>>retire_requests.
> >>>
> >>>v2: Moved the new pin to cover GuC submission (Alex Dai)
> >>>     Moved the new unpin to request_retire to fix coverage leak
> >>>v3: Added switch to default context if freeing a still pinned
> >>>     context just in case the hw was actually still using it
> >>>v4: Unwrapped context unpin to allow calling without a request
> >>>v5: Only create a switch to idle context if the ring doesn't
> >>>     already have a request pending on it (Alex Dai)
> >>>     Rename unsaved to dirty to avoid double negatives (Dave Gordon)
> >>>     Changed _no_req postfix to __ prefix for consistency (Dave Gordon)
> >>>     Split out per engine cleanup from context_free as it
> >>>     was getting unwieldy
> >>>     Corrected locking (Dave Gordon)
> >>>
> >>>Signed-off-by: Nick Hoath <nicholas.hoath@intel.com>
> >>>Issue: VIZ-4277
> >>>Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> >>>Cc: David Gordon <david.s.gordon@intel.com>
> >>>Cc: Chris Wilson <chris@chris-wilson.co.uk>
> >>>Cc: Alex Dai <yu.dai@intel.com>
> >>>---
> >>>  drivers/gpu/drm/i915/i915_drv.h  |   1 +
> >>>  drivers/gpu/drm/i915/i915_gem.c  |   3 +
> >>>  drivers/gpu/drm/i915/intel_lrc.c | 124 +++++++++++++++++++++++++++++++--------
> >>>  drivers/gpu/drm/i915/intel_lrc.h |   1 +
> >>>  4 files changed, 105 insertions(+), 24 deletions(-)
> >>>
> >>>diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> >>>index d5cf30b..e82717a 100644
> >>>--- a/drivers/gpu/drm/i915/i915_drv.h
> >>>+++ b/drivers/gpu/drm/i915/i915_drv.h
> >>>@@ -889,6 +889,7 @@ struct intel_context {
> >>>  	struct {
> >>>  		struct drm_i915_gem_object *state;
> >>>  		struct intel_ringbuffer *ringbuf;
> >>>+		bool dirty;
> >>>  		int pin_count;
> >>>  	} engine[I915_NUM_RINGS];
> >>>
> >>>diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> >>>index e955499..3829bc1 100644
> >>>--- a/drivers/gpu/drm/i915/i915_gem.c
> >>>+++ b/drivers/gpu/drm/i915/i915_gem.c
> >>>@@ -1354,6 +1354,9 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
> >>>  {
> >>>  	trace_i915_gem_request_retire(request);
> >>>
> >>>+	if (i915.enable_execlists)
> >>>+		intel_lr_context_complete_check(request);
> >>>+
> >>>  	/* We know the GPU must have read the request to have
> >>>  	 * sent us the seqno + interrupt, so use the position
> >>>  	 * of tail of the request to update the last known position
> >>>diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> >>>index 06180dc..03d5bca 100644
> >>>--- a/drivers/gpu/drm/i915/intel_lrc.c
> >>>+++ b/drivers/gpu/drm/i915/intel_lrc.c
> >>>@@ -566,9 +566,6 @@ static int execlists_context_queue(struct drm_i915_gem_request *request)
> >>>  	struct drm_i915_gem_request *cursor;
> >>>  	int num_elements = 0;
> >>>
> >>>-	if (request->ctx != ring->default_context)
> >>>-		intel_lr_context_pin(request);
> >>>-
> >>>  	i915_gem_request_reference(request);
> >>>
> >>>  	spin_lock_irq(&ring->execlist_lock);
> >>>@@ -732,6 +729,13 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
> >>>  	if (intel_ring_stopped(ring))
> >>>  		return;
> >>>
> >>>+	if (request->ctx != ring->default_context) {
> >>>+		if (!request->ctx->engine[ring->id].dirty) {
> >>>+			intel_lr_context_pin(request);
> >>>+			request->ctx->engine[ring->id].dirty = true;
> >>>+		}
> >>>+	}
> >>>+
> >>>  	if (dev_priv->guc.execbuf_client)
> >>>  		i915_guc_submit(dev_priv->guc.execbuf_client, request);
> >>>  	else
> >>>@@ -958,12 +962,6 @@ void intel_execlists_retire_requests(struct intel_engine_cs *ring)
> >>>  	spin_unlock_irq(&ring->execlist_lock);
> >>>
> >>>  	list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
> >>>-		struct intel_context *ctx = req->ctx;
> >>>-		struct drm_i915_gem_object *ctx_obj =
> >>>-				ctx->engine[ring->id].state;
> >>>-
> >>>-		if (ctx_obj && (ctx != ring->default_context))
> >>>-			intel_lr_context_unpin(req);
> >>>  		list_del(&req->execlist_link);
> >>>  		i915_gem_request_unreference(req);
> >>>  	}
> >>>@@ -1058,21 +1056,39 @@ reset_pin_count:
> >>>  	return ret;
> >>>  }
> >>>
> >>>-void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
> >>>+static void __intel_lr_context_unpin(struct intel_engine_cs *ring,
> >>>+		struct intel_context *ctx)
> >>>  {
> >>>-	struct intel_engine_cs *ring = rq->ring;
> >>>-	struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
> >>>-	struct intel_ringbuffer *ringbuf = rq->ringbuf;
> >>>-
> >>>+	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
> >>>+	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
> >>>  	if (ctx_obj) {
> >>>  		WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
> >>>-		if (--rq->ctx->engine[ring->id].pin_count == 0) {
> >>>+		if (--ctx->engine[ring->id].pin_count == 0) {
> >>>  			intel_unpin_ringbuffer_obj(ringbuf);
> >>>  			i915_gem_object_ggtt_unpin(ctx_obj);
> >>>  		}
> >>>  	}
> >>>  }
> >>>
> >>>+void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
> >>>+{
> >>>+	__intel_lr_context_unpin(rq->ring, rq->ctx);
> >>>+}
> >>>+
> >>>+void intel_lr_context_complete_check(struct drm_i915_gem_request *req)
> >>>+{
> >>>+	struct intel_engine_cs *ring = req->ring;
> >>>+
> >>>+	if (ring->last_context && ring->last_context != req->ctx &&
> >>>+			ring->last_context->engine[ring->id].dirty) {
> >>>+		__intel_lr_context_unpin(
> >>>+				ring,
> >>>+				ring->last_context);
> >>>+		ring->last_context->engine[ring->id].dirty = false;
> >>>+	}
> >>>+	ring->last_context = req->ctx;
> >>
> >>What ensures that the last_context stays alive?
> >
> >The other part that's missing compared to the legacy context cycling logic
> >is move_to_active. Without that the shrinker can't eat into retired
> >contexts, which renders this exercise fairly moot imo.
> >
> >The overall idea is that since i915 does dynamic memory management, and
> >that infects everything in the code that deals with gpu objects, we need
> >to make sure that everyone is using the same logicy to cycle through
> >active objects. Otherwise the shrinker will be an unmaintainable hydra
> >with per-feature special cases.
> >-Daniel
> 
> This is the first part of the VIZ-4277 fix changeset that I'm working on. It
> has been 'fasttracked' as it is needed to fix a lockup with GuC submission.
> If you like, I can update the commit message to explain that?

Yeah if this is a minimal bugfix for some issue then this definitely
should be mentioned in the commit message, including a precise description
of what and how exactly the current code blows up.
-Daniel

> 
> >
> >>
> >>>+}
> >>>+
> >>>  static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req)
> >>>  {
> >>>  	int ret, i;
> >>>@@ -2367,6 +2383,62 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o
> >>>  }
> >>>
> >>>  /**
> >>>+ * intel_lr_context_clean_ring() - clean the ring specific parts of an LRC
> >>>+ * @ctx: the LR context being freed.
> >>>+ * @ring: the engine being cleaned
> >>>+ * @ctx_obj: the hw context being unreferenced
> >>>+ * @ringbuf: the ringbuf being freed
> >>>+ *
> >>>+ * Take care of cleaning up the per-engine backing
> >>>+ * objects and the logical ringbuffer.
> >>>+ */
> >>>+static void
> >>>+intel_lr_context_clean_ring(struct intel_context *ctx,
> >>>+			    struct intel_engine_cs *ring,
> >>>+			    struct drm_i915_gem_object *ctx_obj,
> >>>+			    struct intel_ringbuffer *ringbuf)
> >>>+{
> >>>+	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
> >>>+
> >>>+	if (ctx == ring->default_context) {
> >>>+		intel_unpin_ringbuffer_obj(ringbuf);
> >>>+		i915_gem_object_ggtt_unpin(ctx_obj);
> >>>+	}
> >>>+
> >>>+	if (ctx->engine[ring->id].dirty) {
> >>>+		struct drm_i915_gem_request *req = NULL;
> >>>+
> >>>+		/**
> >>>+		 * If there is already a request pending on
> >>>+		 * this ring, wait for that to complete,
> >>>+		 * otherwise create a switch to idle request
> >>>+		 */
> >>>+		if (list_empty(&ring->request_list)) {
> >>>+			int ret;
> >>>+
> >>>+			ret = i915_gem_request_alloc(
> >>>+					ring,
> >>>+					ring->default_context,
> >>>+					&req);
> >>>+			if (!ret)
> >>>+				i915_add_request(req);
> >>>+			else
> >>>+				DRM_DEBUG("Failed to ensure context saved");
> >>>+		} else {
> >>>+			req = list_first_entry(
> >>>+					&ring->request_list,
> >>>+					typeof(*req), list);
> >>>+		}
> >>>+		if (req)
> >>>+			i915_wait_request(req);
> >>>+	}
> >>>+
> >>>+	WARN_ON(ctx->engine[ring->id].pin_count);
> >>>+	intel_ringbuffer_free(ringbuf);
> >>>+	drm_gem_object_unreference(&ctx_obj->base);
> >>>+}
> >>>+
> >>>+/**
> >>>   * intel_lr_context_free() - free the LRC specific bits of a context
> >>>   * @ctx: the LR context to free.
> >>>   *
> >>>@@ -2378,21 +2450,18 @@ void intel_lr_context_free(struct intel_context *ctx)
> >>>  {
> >>>  	int i;
> >>>
> >>>-	for (i = 0; i < I915_NUM_RINGS; i++) {
> >>>+	for (i = 0; i < I915_NUM_RINGS; ++i) {
> >>>  		struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state;
> >>>
> >>>-		if (ctx_obj) {
> >>>+		if (ctx->engine[i].state) {
> >>
> >>++i and the above are superflouous?
> >>
> >>-Mika
> >>
> >>>  			struct intel_ringbuffer *ringbuf =
> >>>  					ctx->engine[i].ringbuf;
> >>>  			struct intel_engine_cs *ring = ringbuf->ring;
> >>>
> >>>-			if (ctx == ring->default_context) {
> >>>-				intel_unpin_ringbuffer_obj(ringbuf);
> >>>-				i915_gem_object_ggtt_unpin(ctx_obj);
> >>>-			}
> >>>-			WARN_ON(ctx->engine[ring->id].pin_count);
> >>>-			intel_ringbuffer_free(ringbuf);
> >>>-			drm_gem_object_unreference(&ctx_obj->base);
> >>>+			intel_lr_context_clean_ring(ctx,
> >>>+						    ring,
> >>>+						    ctx_obj,
> >>>+						    ringbuf);
> >>>  		}
> >>>  	}
> >>>  }
> >>>@@ -2554,5 +2623,12 @@ void intel_lr_context_reset(struct drm_device *dev,
> >>>
> >>>  		ringbuf->head = 0;
> >>>  		ringbuf->tail = 0;
> >>>+
> >>>+		if (ctx->engine[ring->id].dirty) {
> >>>+			__intel_lr_context_unpin(
> >>>+					ring,
> >>>+					ctx);
> >>>+			ctx->engine[ring->id].dirty = false;
> >>>+		}
> >>>  	}
> >>>  }
> >>>diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
> >>>index 4e60d54..cbc42b8 100644
> >>>--- a/drivers/gpu/drm/i915/intel_lrc.h
> >>>+++ b/drivers/gpu/drm/i915/intel_lrc.h
> >>>@@ -86,6 +86,7 @@ void intel_lr_context_reset(struct drm_device *dev,
> >>>  			struct intel_context *ctx);
> >>>  uint64_t intel_lr_context_descriptor(struct intel_context *ctx,
> >>>  				     struct intel_engine_cs *ring);
> >>>+void intel_lr_context_complete_check(struct drm_i915_gem_request *req);
> >>>
> >>>  /* Execlists */
> >>>  int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists);
> >>>--
> >>>1.9.1
> >>>
> >>>_______________________________________________
> >>>Intel-gfx mailing list
> >>>Intel-gfx@lists.freedesktop.org
> >>>http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> >>_______________________________________________
> >>Intel-gfx mailing list
> >>Intel-gfx@lists.freedesktop.org
> >>http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> >
>