[v2] drm/i915: Differentiate between sw write location into ring and last hw read

Submitted by Chris Wilson on April 25, 2017, 1 p.m.

Details

Message ID 20170425130049.26147-1-chris@chris-wilson.co.uk
State Accepted
Commit e6ba9992de6c63fe86c028b4876338e1cb7dac34
Headers show
Series "Series without cover letter" ( rev: 2 ) in Intel GFX

Not browsing as part of any series.

Commit Message

Chris Wilson April 25, 2017, 1 p.m.
We need to keep track of the last location we ask the hw to read up to
(RING_TAIL) separately from our last write location into the ring, so
that in the event of a GPU reset we do not tell the HW to proceed into
a partially written request (which can happen if that request is waiting
for an external signal before being executed).

v2: Refactor intel_ring_reset() (Mika)

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100144
Testcase: igt/gem_exec_fence/await-hang
Fixes: 821ed7df6e2a ("drm/i915: Update reset path to fix incomplete requests")
Fixes: d55ac5bf97c6 ("drm/i915: Defer transfer onto execution timeline to actual hw submission")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_request.c    | 16 +++++++++---
 drivers/gpu/drm/i915/i915_guc_submission.c |  4 +--
 drivers/gpu/drm/i915/intel_lrc.c           |  6 ++---
 drivers/gpu/drm/i915/intel_ringbuffer.c    | 41 ++++++++++++++++++++----------
 drivers/gpu/drm/i915/intel_ringbuffer.h    | 19 ++++++++++++--
 5 files changed, 59 insertions(+), 27 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index e2ec42b2bf24..126cd13abf54 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -283,10 +283,18 @@  static void advance_ring(struct drm_i915_gem_request *request)
 	 * Note this requires that we are always called in request
 	 * completion order.
 	 */
-	if (list_is_last(&request->ring_link, &request->ring->request_list))
-		tail = request->ring->tail;
-	else
+	if (list_is_last(&request->ring_link, &request->ring->request_list)) {
+		/* We may race here with execlists resubmitting this request
+		 * as we retire it. The resubmission will move the ring->tail
+		 * forwards (to request->wa_tail). We either read the
+		 * current value that was written to hw, or the value that
+		 * is just about to be. Either works, if we miss the last two
+		 * noops - they are safe to be replayed on a reset.
+		 */
+		tail = READ_ONCE(request->ring->tail);
+	} else {
 		tail = request->postfix;
+	}
 	list_del(&request->ring_link);
 
 	request->ring->head = tail;
@@ -651,7 +659,7 @@  i915_gem_request_alloc(struct intel_engine_cs *engine,
 	 * GPU processing the request, we never over-estimate the
 	 * position of the head.
 	 */
-	req->head = req->ring->tail;
+	req->head = req->ring->emit;
 
 	/* Check that we didn't interrupt ourselves with a new request */
 	GEM_BUG_ON(req->timeline->seqno != req->fence.seqno);
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 1642fff9cf13..ab5140ba108d 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -480,9 +480,7 @@  static void guc_wq_item_append(struct i915_guc_client *client,
 	GEM_BUG_ON(freespace < wqi_size);
 
 	/* The GuC firmware wants the tail index in QWords, not bytes */
-	tail = rq->tail;
-	assert_ring_tail_valid(rq->ring, rq->tail);
-	tail >>= 3;
+	tail = intel_ring_set_tail(rq->ring, rq->tail) >> 3;
 	GEM_BUG_ON(tail > WQ_RING_TAIL_MAX);
 
 	/* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 7df278fe492e..13057a37390b 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -326,8 +326,7 @@  static u64 execlists_update_context(struct drm_i915_gem_request *rq)
 		rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
 	u32 *reg_state = ce->lrc_reg_state;
 
-	assert_ring_tail_valid(rq->ring, rq->tail);
-	reg_state[CTX_RING_TAIL+1] = rq->tail;
+	reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail);
 
 	/* True 32b PPGTT with dynamic page allocation: update PDP
 	 * registers and point the unallocated PDPs to scratch page.
@@ -2054,8 +2053,7 @@  void intel_lr_context_resume(struct drm_i915_private *dev_priv)
 			ce->state->obj->mm.dirty = true;
 			i915_gem_object_unpin_map(ce->state->obj);
 
-			ce->ring->head = ce->ring->tail = 0;
-			intel_ring_update_space(ce->ring);
+			intel_ring_reset(ce->ring, 0);
 		}
 	}
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 32afac6c754f..227dfcf1764e 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -49,7 +49,7 @@  static int __intel_ring_space(int head, int tail, int size)
 
 void intel_ring_update_space(struct intel_ring *ring)
 {
-	ring->space = __intel_ring_space(ring->head, ring->tail, ring->size);
+	ring->space = __intel_ring_space(ring->head, ring->emit, ring->size);
 }
 
 static int
@@ -774,8 +774,8 @@  static void i9xx_submit_request(struct drm_i915_gem_request *request)
 
 	i915_gem_request_submit(request);
 
-	assert_ring_tail_valid(request->ring, request->tail);
-	I915_WRITE_TAIL(request->engine, request->tail);
+	I915_WRITE_TAIL(request->engine,
+			intel_ring_set_tail(request->ring, request->tail));
 }
 
 static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs)
@@ -1319,11 +1319,23 @@  int intel_ring_pin(struct intel_ring *ring,
 	return PTR_ERR(addr);
 }
 
+void intel_ring_reset(struct intel_ring *ring, u32 tail)
+{
+	GEM_BUG_ON(!list_empty(&ring->request_list));
+	ring->tail = tail;
+	ring->head = tail;
+	ring->emit = tail;
+	intel_ring_update_space(ring);
+}
+
 void intel_ring_unpin(struct intel_ring *ring)
 {
 	GEM_BUG_ON(!ring->vma);
 	GEM_BUG_ON(!ring->vaddr);
 
+	/* Discard any unused bytes beyond that submitted to hw. */
+	intel_ring_reset(ring, ring->tail);
+
 	if (i915_vma_is_map_and_fenceable(ring->vma))
 		i915_vma_unpin_iomap(ring->vma);
 	else
@@ -1555,8 +1567,9 @@  void intel_legacy_submission_resume(struct drm_i915_private *dev_priv)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
+	/* Restart from the beginning of the rings for convenience */
 	for_each_engine(engine, dev_priv, id)
-		engine->buffer->head = engine->buffer->tail;
+		intel_ring_reset(engine->buffer, 0);
 }
 
 static int ring_request_alloc(struct drm_i915_gem_request *request)
@@ -1609,7 +1622,7 @@  static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
 		unsigned space;
 
 		/* Would completion of this request free enough space? */
-		space = __intel_ring_space(target->postfix, ring->tail,
+		space = __intel_ring_space(target->postfix, ring->emit,
 					   ring->size);
 		if (space >= bytes)
 			break;
@@ -1634,8 +1647,8 @@  static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
 u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
 {
 	struct intel_ring *ring = req->ring;
-	int remain_actual = ring->size - ring->tail;
-	int remain_usable = ring->effective_size - ring->tail;
+	int remain_actual = ring->size - ring->emit;
+	int remain_usable = ring->effective_size - ring->emit;
 	int bytes = num_dwords * sizeof(u32);
 	int total_bytes, wait_bytes;
 	bool need_wrap = false;
@@ -1671,17 +1684,17 @@  u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
 
 	if (unlikely(need_wrap)) {
 		GEM_BUG_ON(remain_actual > ring->space);
-		GEM_BUG_ON(ring->tail + remain_actual > ring->size);
+		GEM_BUG_ON(ring->emit + remain_actual > ring->size);
 
 		/* Fill the tail with MI_NOOP */
-		memset(ring->vaddr + ring->tail, 0, remain_actual);
-		ring->tail = 0;
+		memset(ring->vaddr + ring->emit, 0, remain_actual);
+		ring->emit = 0;
 		ring->space -= remain_actual;
 	}
 
-	GEM_BUG_ON(ring->tail > ring->size - bytes);
-	cs = ring->vaddr + ring->tail;
-	ring->tail += bytes;
+	GEM_BUG_ON(ring->emit > ring->size - bytes);
+	cs = ring->vaddr + ring->emit;
+	ring->emit += bytes;
 	ring->space -= bytes;
 	GEM_BUG_ON(ring->space < 0);
 
@@ -1692,7 +1705,7 @@  u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
 int intel_ring_cacheline_align(struct drm_i915_gem_request *req)
 {
 	int num_dwords =
-		(req->ring->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
+		(req->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
 	u32 *cs;
 
 	if (num_dwords == 0)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 00d36aa4e26d..96710b616efb 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -143,6 +143,7 @@  struct intel_ring {
 
 	u32 head;
 	u32 tail;
+	u32 emit;
 
 	int space;
 	int size;
@@ -494,6 +495,8 @@  intel_engine_create_ring(struct intel_engine_cs *engine, int size);
 int intel_ring_pin(struct intel_ring *ring,
 		   struct drm_i915_private *i915,
 		   unsigned int offset_bias);
+void intel_ring_reset(struct intel_ring *ring, u32 tail);
+void intel_ring_update_space(struct intel_ring *ring);
 void intel_ring_unpin(struct intel_ring *ring);
 void intel_ring_free(struct intel_ring *ring);
 
@@ -517,7 +520,7 @@  intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs)
 	 * reserved for the command packet (i.e. the value passed to
 	 * intel_ring_begin()).
 	 */
-	GEM_BUG_ON((req->ring->vaddr + req->ring->tail) != cs);
+	GEM_BUG_ON((req->ring->vaddr + req->ring->emit) != cs);
 }
 
 static inline u32
@@ -546,7 +549,19 @@  assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
 	GEM_BUG_ON(tail >= ring->size);
 }
 
-void intel_ring_update_space(struct intel_ring *ring);
+static inline unsigned int
+intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
+{
+	/* Whilst writes to the tail are strictly order, there is no
+	 * serialisation between readers and the writers. The tail may be
+	 * read by i915_gem_request_retire() just as it is being updated
+	 * by execlists, as although the breadcrumb is complete, the context
+	 * switch hasn't been seen.
+	 */
+	assert_ring_tail_valid(ring, tail);
+	ring->tail = tail;
+	return tail;
+}
 
 void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno);
 

Comments

Chris Wilson <chris@chris-wilson.co.uk> writes:

> We need to keep track of the last location we ask the hw to read up to
> (RING_TAIL) separately from our last write location into the ring, so
> that in the event of a GPU reset we do not tell the HW to proceed into
> a partially written request (which can happen if that request is waiting
> for an external signal before being executed).
>
> v2: Refactor intel_ring_reset() (Mika)
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100144
> Testcase: igt/gem_exec_fence/await-hang
> Fixes: 821ed7df6e2a ("drm/i915: Update reset path to fix incomplete requests")
> Fixes: d55ac5bf97c6 ("drm/i915: Defer transfer onto execution timeline to actual hw submission")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@intel.com>

Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>

> ---
>  drivers/gpu/drm/i915/i915_gem_request.c    | 16 +++++++++---
>  drivers/gpu/drm/i915/i915_guc_submission.c |  4 +--
>  drivers/gpu/drm/i915/intel_lrc.c           |  6 ++---
>  drivers/gpu/drm/i915/intel_ringbuffer.c    | 41 ++++++++++++++++++++----------
>  drivers/gpu/drm/i915/intel_ringbuffer.h    | 19 ++++++++++++--
>  5 files changed, 59 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index e2ec42b2bf24..126cd13abf54 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -283,10 +283,18 @@ static void advance_ring(struct drm_i915_gem_request *request)
>  	 * Note this requires that we are always called in request
>  	 * completion order.
>  	 */
> -	if (list_is_last(&request->ring_link, &request->ring->request_list))
> -		tail = request->ring->tail;
> -	else
> +	if (list_is_last(&request->ring_link, &request->ring->request_list)) {
> +		/* We may race here with execlists resubmitting this request
> +		 * as we retire it. The resubmission will move the ring->tail
> +		 * forwards (to request->wa_tail). We either read the
> +		 * current value that was written to hw, or the value that
> +		 * is just about to be. Either works, if we miss the last two
> +		 * noops - they are safe to be replayed on a reset.
> +		 */
> +		tail = READ_ONCE(request->ring->tail);
> +	} else {
>  		tail = request->postfix;
> +	}
>  	list_del(&request->ring_link);
>  
>  	request->ring->head = tail;
> @@ -651,7 +659,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
>  	 * GPU processing the request, we never over-estimate the
>  	 * position of the head.
>  	 */
> -	req->head = req->ring->tail;
> +	req->head = req->ring->emit;
>  
>  	/* Check that we didn't interrupt ourselves with a new request */
>  	GEM_BUG_ON(req->timeline->seqno != req->fence.seqno);
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index 1642fff9cf13..ab5140ba108d 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -480,9 +480,7 @@ static void guc_wq_item_append(struct i915_guc_client *client,
>  	GEM_BUG_ON(freespace < wqi_size);
>  
>  	/* The GuC firmware wants the tail index in QWords, not bytes */
> -	tail = rq->tail;
> -	assert_ring_tail_valid(rq->ring, rq->tail);
> -	tail >>= 3;
> +	tail = intel_ring_set_tail(rq->ring, rq->tail) >> 3;
>  	GEM_BUG_ON(tail > WQ_RING_TAIL_MAX);
>  
>  	/* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 7df278fe492e..13057a37390b 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -326,8 +326,7 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq)
>  		rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
>  	u32 *reg_state = ce->lrc_reg_state;
>  
> -	assert_ring_tail_valid(rq->ring, rq->tail);
> -	reg_state[CTX_RING_TAIL+1] = rq->tail;
> +	reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail);
>  
>  	/* True 32b PPGTT with dynamic page allocation: update PDP
>  	 * registers and point the unallocated PDPs to scratch page.
> @@ -2054,8 +2053,7 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv)
>  			ce->state->obj->mm.dirty = true;
>  			i915_gem_object_unpin_map(ce->state->obj);
>  
> -			ce->ring->head = ce->ring->tail = 0;
> -			intel_ring_update_space(ce->ring);
> +			intel_ring_reset(ce->ring, 0);
>  		}
>  	}
>  }
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 32afac6c754f..227dfcf1764e 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -49,7 +49,7 @@ static int __intel_ring_space(int head, int tail, int size)
>  
>  void intel_ring_update_space(struct intel_ring *ring)
>  {
> -	ring->space = __intel_ring_space(ring->head, ring->tail, ring->size);
> +	ring->space = __intel_ring_space(ring->head, ring->emit, ring->size);
>  }
>  
>  static int
> @@ -774,8 +774,8 @@ static void i9xx_submit_request(struct drm_i915_gem_request *request)
>  
>  	i915_gem_request_submit(request);
>  
> -	assert_ring_tail_valid(request->ring, request->tail);
> -	I915_WRITE_TAIL(request->engine, request->tail);
> +	I915_WRITE_TAIL(request->engine,
> +			intel_ring_set_tail(request->ring, request->tail));
>  }
>  
>  static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs)
> @@ -1319,11 +1319,23 @@ int intel_ring_pin(struct intel_ring *ring,
>  	return PTR_ERR(addr);
>  }
>  
> +void intel_ring_reset(struct intel_ring *ring, u32 tail)
> +{
> +	GEM_BUG_ON(!list_empty(&ring->request_list));
> +	ring->tail = tail;
> +	ring->head = tail;
> +	ring->emit = tail;
> +	intel_ring_update_space(ring);
> +}
> +
>  void intel_ring_unpin(struct intel_ring *ring)
>  {
>  	GEM_BUG_ON(!ring->vma);
>  	GEM_BUG_ON(!ring->vaddr);
>  
> +	/* Discard any unused bytes beyond that submitted to hw. */
> +	intel_ring_reset(ring, ring->tail);
> +
>  	if (i915_vma_is_map_and_fenceable(ring->vma))
>  		i915_vma_unpin_iomap(ring->vma);
>  	else
> @@ -1555,8 +1567,9 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv)
>  	struct intel_engine_cs *engine;
>  	enum intel_engine_id id;
>  
> +	/* Restart from the beginning of the rings for convenience */
>  	for_each_engine(engine, dev_priv, id)
> -		engine->buffer->head = engine->buffer->tail;
> +		intel_ring_reset(engine->buffer, 0);
>  }
>  
>  static int ring_request_alloc(struct drm_i915_gem_request *request)
> @@ -1609,7 +1622,7 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
>  		unsigned space;
>  
>  		/* Would completion of this request free enough space? */
> -		space = __intel_ring_space(target->postfix, ring->tail,
> +		space = __intel_ring_space(target->postfix, ring->emit,
>  					   ring->size);
>  		if (space >= bytes)
>  			break;
> @@ -1634,8 +1647,8 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
>  u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
>  {
>  	struct intel_ring *ring = req->ring;
> -	int remain_actual = ring->size - ring->tail;
> -	int remain_usable = ring->effective_size - ring->tail;
> +	int remain_actual = ring->size - ring->emit;
> +	int remain_usable = ring->effective_size - ring->emit;
>  	int bytes = num_dwords * sizeof(u32);
>  	int total_bytes, wait_bytes;
>  	bool need_wrap = false;
> @@ -1671,17 +1684,17 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
>  
>  	if (unlikely(need_wrap)) {
>  		GEM_BUG_ON(remain_actual > ring->space);
> -		GEM_BUG_ON(ring->tail + remain_actual > ring->size);
> +		GEM_BUG_ON(ring->emit + remain_actual > ring->size);
>  
>  		/* Fill the tail with MI_NOOP */
> -		memset(ring->vaddr + ring->tail, 0, remain_actual);
> -		ring->tail = 0;
> +		memset(ring->vaddr + ring->emit, 0, remain_actual);
> +		ring->emit = 0;
>  		ring->space -= remain_actual;
>  	}
>  
> -	GEM_BUG_ON(ring->tail > ring->size - bytes);
> -	cs = ring->vaddr + ring->tail;
> -	ring->tail += bytes;
> +	GEM_BUG_ON(ring->emit > ring->size - bytes);
> +	cs = ring->vaddr + ring->emit;
> +	ring->emit += bytes;
>  	ring->space -= bytes;
>  	GEM_BUG_ON(ring->space < 0);
>  
> @@ -1692,7 +1705,7 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
>  int intel_ring_cacheline_align(struct drm_i915_gem_request *req)
>  {
>  	int num_dwords =
> -		(req->ring->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
> +		(req->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
>  	u32 *cs;
>  
>  	if (num_dwords == 0)
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 00d36aa4e26d..96710b616efb 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -143,6 +143,7 @@ struct intel_ring {
>  
>  	u32 head;
>  	u32 tail;
> +	u32 emit;
>  
>  	int space;
>  	int size;
> @@ -494,6 +495,8 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size);
>  int intel_ring_pin(struct intel_ring *ring,
>  		   struct drm_i915_private *i915,
>  		   unsigned int offset_bias);
> +void intel_ring_reset(struct intel_ring *ring, u32 tail);
> +void intel_ring_update_space(struct intel_ring *ring);
>  void intel_ring_unpin(struct intel_ring *ring);
>  void intel_ring_free(struct intel_ring *ring);
>  
> @@ -517,7 +520,7 @@ intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs)
>  	 * reserved for the command packet (i.e. the value passed to
>  	 * intel_ring_begin()).
>  	 */
> -	GEM_BUG_ON((req->ring->vaddr + req->ring->tail) != cs);
> +	GEM_BUG_ON((req->ring->vaddr + req->ring->emit) != cs);
>  }
>  
>  static inline u32
> @@ -546,7 +549,19 @@ assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
>  	GEM_BUG_ON(tail >= ring->size);
>  }
>  
> -void intel_ring_update_space(struct intel_ring *ring);
> +static inline unsigned int
> +intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
> +{
> +	/* Whilst writes to the tail are strictly order, there is no
> +	 * serialisation between readers and the writers. The tail may be
> +	 * read by i915_gem_request_retire() just as it is being updated
> +	 * by execlists, as although the breadcrumb is complete, the context
> +	 * switch hasn't been seen.
> +	 */
> +	assert_ring_tail_valid(ring, tail);
> +	ring->tail = tail;
> +	return tail;
> +}
>  
>  void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno);
>  
> -- 
> 2.11.0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
On Tue, Apr 25, 2017 at 04:50:15PM +0300, Mika Kuoppala wrote:
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > We need to keep track of the last location we ask the hw to read up to
> > (RING_TAIL) separately from our last write location into the ring, so
> > that in the event of a GPU reset we do not tell the HW to proceed into
> > a partially written request (which can happen if that request is waiting
> > for an external signal before being executed).
> >
> > v2: Refactor intel_ring_reset() (Mika)
> >
> > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100144
> > Testcase: igt/gem_exec_fence/await-hang
> > Fixes: 821ed7df6e2a ("drm/i915: Update reset path to fix incomplete requests")
> > Fixes: d55ac5bf97c6 ("drm/i915: Defer transfer onto execution timeline to actual hw submission")
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> 
> Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>

Pushed the first 2 to get the bug fix. That just leaves the
micro-optimisation of intel_ring_space.
-Chris