[4/4] drm/i915: Detect small loops in hangcheck

Submitted by Mika Kuoppala on Nov. 30, 2015, 4:53 p.m.

Details

Message ID 1448902389-12477-4-git-send-email-mika.kuoppala@intel.com
State New
Headers show
Series "Series without cover letter" ( rev: 1 ) in Intel GFX

Not browsing as part of any series.

Commit Message

Mika Kuoppala Nov. 30, 2015, 4:53 p.m.
If there is very small loop in batch, the chances are quite high
that we sample the same head value twice in a row leading the
hangcheck score to be incremented with hung engine status, instead of
active loop which would have been more correct.

Try to resample the actual head few times to detect small loops
instead of jumping into conclusions.

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_irq.c | 32 +++++++++++++++++++++++++++-----
 1 file changed, 27 insertions(+), 5 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index c1d1400..7c1168b 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2914,12 +2914,8 @@  static void semaphore_clear_deadlocks(struct drm_i915_private *dev_priv)
 }
 
 static enum intel_ring_hangcheck_action
-head_stuck(struct intel_engine_cs *ring, u64 acthd)
+head_action(struct intel_engine_cs *ring, u64 acthd)
 {
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 head;
-
 	if (acthd != ring->hangcheck.acthd) {
 		if (acthd > ring->hangcheck.max_acthd) {
 			ring->hangcheck.max_acthd = acthd;
@@ -2929,6 +2925,21 @@  head_stuck(struct intel_engine_cs *ring, u64 acthd)
 		return HANGCHECK_ACTIVE_LOOP;
 	}
 
+	return HANGCHECK_HUNG;
+}
+
+static enum intel_ring_hangcheck_action
+head_stuck(struct intel_engine_cs *ring, u64 acthd)
+{
+	struct drm_device *dev = ring->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	static enum intel_ring_hangcheck_action ha;
+	u32 head, retries = 5;
+
+	ha = head_action(ring, acthd);
+	if (ha != HANGCHECK_HUNG)
+		return ha;
+
 	head = I915_READ_HEAD(ring) & HEAD_ADDR;
 
 	/* Some operations, like pipe flush, can take a long time.
@@ -2938,6 +2949,17 @@  head_stuck(struct intel_engine_cs *ring, u64 acthd)
 	if (lower_32_bits(acthd) == head)
 		return HANGCHECK_ACTIVE_LOOP;
 
+	do {
+		msleep(20);
+
+		ring->hangcheck.acthd = acthd;
+		acthd = intel_ring_get_active_head(ring);
+
+		ha = head_action(ring, acthd);
+		if (ha != HANGCHECK_HUNG)
+			return ha;
+	} while (retries--);
+
 	return HANGCHECK_HUNG;
 }