drm/i915: Workaround to avoid lite restore with HEAD==TAIL

Submitted by Michel Thierry on April 10, 2015, 9:32 a.m.

Details

Message ID 1428658353-20692-1-git-send-email-michel.thierry@intel.com
State New
Headers show

Not browsing as part of any series.

Commit Message

Michel Thierry April 10, 2015, 9:32 a.m.
WaIdleLiteRestore is an execlists-only workaround, and requires the driver
to ensure that any context always has HEAD!=TAIL when attempting lite
restore.

Add two extra MI_NOOP instructions at the end of each request, but keep
the requests tail pointing before the MI_NOOPs.

If we submit a context to the ELSP which has previously been submitted,
move the tail pointer past the MI_NOOPs. This ensures HEAD!=TAIL.

Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>
Signed-off-by: Michel Thierry <michel.thierry@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c  | 11 +++++++++++
 drivers/gpu/drm/i915/intel_lrc.c | 29 +++++++++++++++++++++++++++++
 2 files changed, 40 insertions(+)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ca522c9..a44070a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2382,6 +2382,17 @@  int __i915_add_request(struct intel_engine_cs *ring,
 	request->head = request_start;
 	request->tail = intel_ring_get_tail(ringbuf);
 
+	if (i915.enable_execlists &&
+	    (IS_GEN8(ring->dev) || IS_GEN9(ring->dev))) {
+		/*
+		 * Here we add two extra NOOPs as padding to avoid
+		 * lite restore of a context with HEAD==TAIL.
+		 */
+		intel_logical_ring_emit(ringbuf, MI_NOOP);
+		intel_logical_ring_emit(ringbuf, MI_NOOP);
+		intel_logical_ring_advance(ringbuf);
+	}
+
 	/* Whilst this request exists, batch_obj will be on the
 	 * active_list, and so will hold the active reference. Only when this
 	 * request is retired will the the batch_obj be moved onto the
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index f18f96c..8acedca 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -413,6 +413,26 @@  static void execlists_context_unqueue(struct intel_engine_cs *ring)
 		}
 	}
 
+	if (IS_GEN8(ring->dev) || IS_GEN9(ring->dev)) {
+		/*
+		 * WaIdleLiteRestore: make sure we never cause a lite
+		 * restore with HEAD==TAIL
+		 */
+		if (req0 && req0->elsp_submitted == 1) {
+			/*
+			 * Consume the buffer NOOPs to ensure HEAD != TAIL when
+			 * submitting. elsp_submitted can only be >1 after
+			 * reset, in which case we don't need the workaround as
+			 * a lite restore will not occur.
+			 */
+			struct intel_ringbuffer *ringbuf;
+
+			ringbuf = req0->ctx->engine[ring->id].ringbuf;
+			req0->tail += 8;
+			req0->tail &= ringbuf->size - 1;
+		}
+	}
+
 	WARN_ON(req1 && req1->elsp_submitted);
 
 	execlists_submit_contexts(ring, req0->ctx, req0->tail,
@@ -829,6 +849,15 @@  static int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf,
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int ret;
 
+	if (IS_GEN8(dev) || IS_GEN9(dev)) {
+		/*
+		 * Reserve space for 2 NOOPs at the end of each request to be
+		 * used as a workaround for not being allowed to do lite
+		 * restore with HEAD==TAIL.
+		 */
+		num_dwords += 2;
+	}
+
 	ret = i915_gem_check_wedge(&dev_priv->gpu_error,
 				   dev_priv->mm.interruptible);
 	if (ret)

Comments

On Fri, Apr 10, 2015 at 10:32:33AM +0100, Michel Thierry wrote:
> WaIdleLiteRestore is an execlists-only workaround, and requires the driver
> to ensure that any context always has HEAD!=TAIL when attempting lite
> restore.
> 
> Add two extra MI_NOOP instructions at the end of each request, but keep
> the requests tail pointing before the MI_NOOPs.
> 
> If we submit a context to the ELSP which has previously been submitted,
> move the tail pointer past the MI_NOOPs. This ensures HEAD!=TAIL.
> 
> Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>
> Signed-off-by: Michel Thierry <michel.thierry@intel.com>

Is there an igt to provoke this? I'm thinking of submitting tiny noop
batches on 2 alternating contexts. That should be able to make this go
boom pretty reliable, we only need a slight delay in processing the
execlist completion interrupt.
-Daniel

> ---
>  drivers/gpu/drm/i915/i915_gem.c  | 11 +++++++++++
>  drivers/gpu/drm/i915/intel_lrc.c | 29 +++++++++++++++++++++++++++++
>  2 files changed, 40 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index ca522c9..a44070a 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2382,6 +2382,17 @@ int __i915_add_request(struct intel_engine_cs *ring,
>  	request->head = request_start;
>  	request->tail = intel_ring_get_tail(ringbuf);
>  
> +	if (i915.enable_execlists &&
> +	    (IS_GEN8(ring->dev) || IS_GEN9(ring->dev))) {
> +		/*
> +		 * Here we add two extra NOOPs as padding to avoid
> +		 * lite restore of a context with HEAD==TAIL.
> +		 */
> +		intel_logical_ring_emit(ringbuf, MI_NOOP);
> +		intel_logical_ring_emit(ringbuf, MI_NOOP);
> +		intel_logical_ring_advance(ringbuf);
> +	}
> +
>  	/* Whilst this request exists, batch_obj will be on the
>  	 * active_list, and so will hold the active reference. Only when this
>  	 * request is retired will the the batch_obj be moved onto the
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index f18f96c..8acedca 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -413,6 +413,26 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring)
>  		}
>  	}
>  
> +	if (IS_GEN8(ring->dev) || IS_GEN9(ring->dev)) {
> +		/*
> +		 * WaIdleLiteRestore: make sure we never cause a lite
> +		 * restore with HEAD==TAIL
> +		 */
> +		if (req0 && req0->elsp_submitted == 1) {
> +			/*
> +			 * Consume the buffer NOOPs to ensure HEAD != TAIL when
> +			 * submitting. elsp_submitted can only be >1 after
> +			 * reset, in which case we don't need the workaround as
> +			 * a lite restore will not occur.
> +			 */
> +			struct intel_ringbuffer *ringbuf;
> +
> +			ringbuf = req0->ctx->engine[ring->id].ringbuf;
> +			req0->tail += 8;
> +			req0->tail &= ringbuf->size - 1;
> +		}
> +	}
> +
>  	WARN_ON(req1 && req1->elsp_submitted);
>  
>  	execlists_submit_contexts(ring, req0->ctx, req0->tail,
> @@ -829,6 +849,15 @@ static int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf,
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	int ret;
>  
> +	if (IS_GEN8(dev) || IS_GEN9(dev)) {
> +		/*
> +		 * Reserve space for 2 NOOPs at the end of each request to be
> +		 * used as a workaround for not being allowed to do lite
> +		 * restore with HEAD==TAIL.
> +		 */
> +		num_dwords += 2;
> +	}
> +
>  	ret = i915_gem_check_wedge(&dev_priv->gpu_error,
>  				   dev_priv->mm.interruptible);
>  	if (ret)
> -- 
> 2.1.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
On Fri, Apr 10, 2015 at 12:01:08PM +0200, Daniel Vetter wrote:
> On Fri, Apr 10, 2015 at 10:32:33AM +0100, Michel Thierry wrote:
> > ---
> >  drivers/gpu/drm/i915/i915_gem.c  | 11 +++++++++++
> >  drivers/gpu/drm/i915/intel_lrc.c | 29 +++++++++++++++++++++++++++++
> >  2 files changed, 40 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index ca522c9..a44070a 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -2382,6 +2382,17 @@ int __i915_add_request(struct intel_engine_cs *ring,
> >  	request->head = request_start;
> >  	request->tail = intel_ring_get_tail(ringbuf);
> >  
> > +	if (i915.enable_execlists &&
> > +	    (IS_GEN8(ring->dev) || IS_GEN9(ring->dev))) {
> > +		/*
> > +		 * Here we add two extra NOOPs as padding to avoid
> > +		 * lite restore of a context with HEAD==TAIL.
> > +		 */
> > +		intel_logical_ring_emit(ringbuf, MI_NOOP);
> > +		intel_logical_ring_emit(ringbuf, MI_NOOP);
> > +		intel_logical_ring_advance(ringbuf);
> > +	}

Move this to gen8_emit_request() and remove the permanent overallocation
in ring_begin. The tail pointer adjustment can then also be localised.
-Chris
On 4/10/2015 11:01 AM, Daniel Vetter wrote:
> On Fri, Apr 10, 2015 at 10:32:33AM +0100, Michel Thierry wrote:
>> WaIdleLiteRestore is an execlists-only workaround, and requires the driver
>> to ensure that any context always has HEAD!=TAIL when attempting lite
>> restore.
>>
>> Add two extra MI_NOOP instructions at the end of each request, but keep
>> the requests tail pointing before the MI_NOOPs.
>>
>> If we submit a context to the ELSP which has previously been submitted,
>> move the tail pointer past the MI_NOOPs. This ensures HEAD!=TAIL.
>>
>> Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>
>> Signed-off-by: Michel Thierry <michel.thierry@intel.com>
> Is there an igt to provoke this? I'm thinking of submitting tiny noop
> batches on 2 alternating contexts. That should be able to make this go
> boom pretty reliable, we only need a slight delay in processing the
> execlist completion interrupt.
> -Daniel
I've seen it when running multiple contexts in parallel or when the 
wa-batchbuffer was enabled.
I'll write an igt for it, we only need several small batches queued to 
trigger the light restore.

--Michel

>> ---
>>   drivers/gpu/drm/i915/i915_gem.c  | 11 +++++++++++
>>   drivers/gpu/drm/i915/intel_lrc.c | 29 +++++++++++++++++++++++++++++
>>   2 files changed, 40 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
>> index ca522c9..a44070a 100644
>> --- a/drivers/gpu/drm/i915/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>> @@ -2382,6 +2382,17 @@ int __i915_add_request(struct intel_engine_cs *ring,
>>   	request->head = request_start;
>>   	request->tail = intel_ring_get_tail(ringbuf);
>>   
>> +	if (i915.enable_execlists &&
>> +	    (IS_GEN8(ring->dev) || IS_GEN9(ring->dev))) {
>> +		/*
>> +		 * Here we add two extra NOOPs as padding to avoid
>> +		 * lite restore of a context with HEAD==TAIL.
>> +		 */
>> +		intel_logical_ring_emit(ringbuf, MI_NOOP);
>> +		intel_logical_ring_emit(ringbuf, MI_NOOP);
>> +		intel_logical_ring_advance(ringbuf);
>> +	}
>> +
>>   	/* Whilst this request exists, batch_obj will be on the
>>   	 * active_list, and so will hold the active reference. Only when this
>>   	 * request is retired will the the batch_obj be moved onto the
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>> index f18f96c..8acedca 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> @@ -413,6 +413,26 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring)
>>   		}
>>   	}
>>   
>> +	if (IS_GEN8(ring->dev) || IS_GEN9(ring->dev)) {
>> +		/*
>> +		 * WaIdleLiteRestore: make sure we never cause a lite
>> +		 * restore with HEAD==TAIL
>> +		 */
>> +		if (req0 && req0->elsp_submitted == 1) {
>> +			/*
>> +			 * Consume the buffer NOOPs to ensure HEAD != TAIL when
>> +			 * submitting. elsp_submitted can only be >1 after
>> +			 * reset, in which case we don't need the workaround as
>> +			 * a lite restore will not occur.
>> +			 */
>> +			struct intel_ringbuffer *ringbuf;
>> +
>> +			ringbuf = req0->ctx->engine[ring->id].ringbuf;
>> +			req0->tail += 8;
>> +			req0->tail &= ringbuf->size - 1;
>> +		}
>> +	}
>> +
>>   	WARN_ON(req1 && req1->elsp_submitted);
>>   
>>   	execlists_submit_contexts(ring, req0->ctx, req0->tail,
>> @@ -829,6 +849,15 @@ static int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf,
>>   	struct drm_i915_private *dev_priv = dev->dev_private;
>>   	int ret;
>>   
>> +	if (IS_GEN8(dev) || IS_GEN9(dev)) {
>> +		/*
>> +		 * Reserve space for 2 NOOPs at the end of each request to be
>> +		 * used as a workaround for not being allowed to do lite
>> +		 * restore with HEAD==TAIL.
>> +		 */
>> +		num_dwords += 2;
>> +	}
>> +
>>   	ret = i915_gem_check_wedge(&dev_priv->gpu_error,
>>   				   dev_priv->mm.interruptible);
>>   	if (ret)
>> -- 
>> 2.1.1
>>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Tested-By: Intel Graphics QA PRTS (Patch Regression Test System Contact: shuang.he@intel.com)
Task id: 6171
-------------------------------------Summary-------------------------------------
Platform          Delta          drm-intel-nightly          Series Applied
PNV                 -3              270/270              267/270
ILK                                  303/303              303/303
SNB                 -21              304/304              283/304
IVB                                  337/337              337/337
BYT                 -1              287/287              286/287
HSW                                  361/361              361/361
BDW                                  309/309              309/309
-------------------------------------Detailed-------------------------------------
Platform  Test                                drm-intel-nightly          Series Applied
*PNV  igt@gem_fence_thrash@bo-write-verify-none      PASS(2)      FAIL(1)PASS(1)
*PNV  igt@gem_fence_thrash@bo-write-verify-threaded-none      PASS(3)      CRASH(1)PASS(1)
 PNV  igt@gem_userptr_blits@coherency-sync      CRASH(5)PASS(3)      CRASH(1)PASS(1)
 SNB  igt@kms_cursor_crc@cursor-size-change      NSPT(3)PASS(1)      NSPT(2)
 SNB  igt@kms_mmio_vs_cs_flip@setcrtc_vs_cs_flip      NSPT(3)PASS(1)      NSPT(2)
 SNB  igt@kms_mmio_vs_cs_flip@setplane_vs_cs_flip      NSPT(3)PASS(1)      NSPT(2)
 SNB  igt@kms_rotation_crc@primary-rotation      NSPT(3)PASS(1)      NSPT(2)
 SNB  igt@kms_rotation_crc@sprite-rotation      NSPT(3)PASS(3)      NSPT(2)
 SNB  igt@pm_rpm@cursor      NSPT(3)PASS(1)      NSPT(2)
 SNB  igt@pm_rpm@cursor-dpms      NSPT(3)PASS(1)      NSPT(2)
 SNB  igt@pm_rpm@dpms-mode-unset-non-lpsp      NSPT(3)PASS(1)      NSPT(2)
 SNB  igt@pm_rpm@dpms-non-lpsp      NSPT(3)PASS(1)      NSPT(2)
 SNB  igt@pm_rpm@drm-resources-equal      NSPT(3)PASS(1)      NSPT(2)
 SNB  igt@pm_rpm@fences      NSPT(3)PASS(1)      NSPT(2)
 SNB  igt@pm_rpm@fences-dpms      NSPT(3)PASS(1)      NSPT(2)
 SNB  igt@pm_rpm@gem-execbuf      NSPT(3)PASS(1)      NSPT(2)
 SNB  igt@pm_rpm@gem-mmap-cpu      NSPT(3)PASS(1)      NSPT(2)
 SNB  igt@pm_rpm@gem-mmap-gtt      NSPT(3)PASS(1)      NSPT(2)
 SNB  igt@pm_rpm@gem-pread      NSPT(3)PASS(1)      NSPT(2)
 SNB  igt@pm_rpm@i2c      NSPT(3)PASS(1)      NSPT(2)
 SNB  igt@pm_rpm@modeset-non-lpsp      NSPT(3)PASS(1)      NSPT(2)
 SNB  igt@pm_rpm@modeset-non-lpsp-stress-no-wait      NSPT(3)PASS(1)      NSPT(2)
 SNB  igt@pm_rpm@pci-d3-state      NSPT(3)PASS(1)      NSPT(2)
 SNB  igt@pm_rpm@rte      NSPT(3)PASS(1)      NSPT(2)
*BYT  igt@gem_exec_bad_domains@conflicting-write-domain      PASS(3)      FAIL(1)PASS(1)
Note: You need to pay more attention to line start with '*'