drm/i915: Slaughter the thundering i915_wait_request herd

Submitted by Chris Wilson on Dec. 1, 2015, 11:14 a.m.

Details

Message ID 1448968458-25243-1-git-send-email-chris@chris-wilson.co.uk
State New
Headers show
Series "drm/i915: Slaughter the thundering i915_wait_request herd" ( rev: 1 ) in Intel GFX

Not browsing as part of any series.

Commit Message

Chris Wilson Dec. 1, 2015, 11:14 a.m.
One particularly stressful scenario consists of many independent tasks
all competing for GPU time and waiting upon the results (e.g. realtime
transcoding of many, many streams). One bottleneck in particular is that
each client waits on its own results, but every client is woken up after
every batchbuffer - hence the thunder of hooves as then every client must
do its heavyweight dance to read a coherent seqno to see if it is the
lucky one. Alternatively, we can have one kthread responsible for waking
after an interrupt, checking the seqno and only waking up the waiting
clients who are complete. The disadvantage is that in the uncontended
scenario (i.e. only one waiter) we incur an extra context switch in the
wakeup path - though that should be mitigated somewhat by the busy-wait
we do first before sleeping.

v2: Convert from a kworker per engine into a dedicated kthread for the
bottom-half.
v3: Rename request members and tweak comments.
v4: Use a per-engine spinlock in the breadcrumbs bottom-half.
v5: Fix race in locklessly checking waiter status and kicking the task on
adding a new waiter.
v6: Fix deciding when to force the timer to hide missing interrupts.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: "Rogozhkin, Dmitry V" <dmitry.v.rogozhkin@intel.com>
Cc: "Gong, Zhipeng" <zhipeng.gong@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
---
 drivers/gpu/drm/i915/Makefile            |   1 +
 drivers/gpu/drm/i915/i915_debugfs.c      |   2 +-
 drivers/gpu/drm/i915/i915_dma.c          |   9 +-
 drivers/gpu/drm/i915/i915_drv.h          |  42 ++++-
 drivers/gpu/drm/i915/i915_gem.c          |  93 +++-------
 drivers/gpu/drm/i915/i915_gpu_error.c    |   8 +-
 drivers/gpu/drm/i915/i915_irq.c          |  20 +--
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 286 +++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_lrc.c         |   2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c  |   3 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h  |   3 +-
 11 files changed, 378 insertions(+), 91 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/intel_breadcrumbs.c

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 0851de07bd13..d3b9d3618719 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -35,6 +35,7 @@  i915-y += i915_cmd_parser.o \
 	  i915_gem_userptr.o \
 	  i915_gpu_error.o \
 	  i915_trace_points.o \
+	  intel_breadcrumbs.o \
 	  intel_lrc.o \
 	  intel_mocs.o \
 	  intel_ringbuffer.o \
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 0583eda642d7..74e2ab46fe99 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2323,7 +2323,7 @@  static int count_irq_waiters(struct drm_i915_private *i915)
 	int i;
 
 	for_each_ring(ring, i915, i)
-		count += ring->irq_refcount;
+		count += intel_breadcrumbs_has_waiter(i915, i);
 
 	return count;
 }
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index a81c76603544..93c6762d87b4 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -388,12 +388,16 @@  static int i915_load_modeset_init(struct drm_device *dev)
 	if (ret)
 		goto cleanup_vga_client;
 
+	ret = intel_breadcrumbs_init(dev_priv);
+	if (ret)
+		goto cleanup_vga_switcheroo;
+
 	/* Initialise stolen first so that we may reserve preallocated
 	 * objects for the BIOS to KMS transition.
 	 */
 	ret = i915_gem_init_stolen(dev);
 	if (ret)
-		goto cleanup_vga_switcheroo;
+		goto cleanup_breadcrumbs;
 
 	intel_power_domains_init_hw(dev_priv, false);
 
@@ -454,6 +458,8 @@  cleanup_irq:
 	drm_irq_uninstall(dev);
 cleanup_gem_stolen:
 	i915_gem_cleanup_stolen(dev);
+cleanup_breadcrumbs:
+	intel_breadcrumbs_fini(dev_priv);
 cleanup_vga_switcheroo:
 	vga_switcheroo_unregister_client(dev->pdev);
 cleanup_vga_client:
@@ -1193,6 +1199,7 @@  int i915_driver_unload(struct drm_device *dev)
 	mutex_unlock(&dev->struct_mutex);
 	intel_fbc_cleanup_cfb(dev_priv);
 	i915_gem_cleanup_stolen(dev);
+	intel_breadcrumbs_fini(dev_priv);
 
 	intel_csr_ucode_fini(dev_priv);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e1980212ba37..7edf7c18eaa1 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -564,6 +564,7 @@  struct drm_i915_error_state {
 			long jiffies;
 			u32 seqno;
 			u32 tail;
+			u32 waiters;
 		} *requests;
 
 		struct {
@@ -1383,7 +1384,7 @@  struct i915_gpu_error {
 #define I915_STOP_RING_ALLOW_WARN      (1 << 30)
 
 	/* For missed irq/seqno simulation. */
-	unsigned int test_irq_rings;
+	unsigned long test_irq_rings;
 };
 
 enum modeset_restore {
@@ -1695,6 +1696,27 @@  struct drm_i915_private {
 
 	struct intel_uncore uncore;
 
+	/* Rather than have every client wait upon all user interrupts,
+	 * with the herd waking after every interrupt and each doing the
+	 * heavyweight seqno dance, we delegate the task to a bottom-half
+	 * for the user interrupt (one bh handling all engines). Now,
+	 * just one task is woken up and does the coherent seqno read and
+	 * then wakes up all the waiters registered with the bottom-half
+	 * that are complete. This avoid the thundering herd problem
+	 * with lots of concurrent waiters, at the expense of an extra
+	 * context switch between the interrupt and the client. That should
+	 * be mitigated somewhat by the busyspin in the client before we go to
+	 * sleep waiting for an interrupt from the bottom-half.
+	 */
+	struct intel_breadcrumbs {
+		struct task_struct *task; /* bh for all user interrupts */
+		struct intel_breadcrumbs_engine {
+			spinlock_t lock; /* protects the lists of requests */
+			struct rb_root requests; /* sorted by retirement */
+			struct drm_i915_gem_request *first_waiter;
+		} engine[I915_NUM_RINGS];
+	} breadcrumbs;
+
 	struct i915_virtual_gpu vgpu;
 
 	struct intel_guc guc;
@@ -2228,6 +2250,13 @@  struct drm_i915_gem_request {
 	/** global list entry for this request */
 	struct list_head list;
 
+	/** Entry into the breadcrumb list of waiting requests. */
+	struct rb_node breadcrumb_node;
+	/** List of clients waiting for completion of this request. */
+	wait_queue_head_t waiters;
+	/** Count of clients waiting for completion of this request. */
+	unsigned waiters_count;
+
 	struct drm_i915_file_private *file_priv;
 	/** file_priv list entry for this request */
 	struct list_head client_list;
@@ -2800,6 +2829,17 @@  ibx_disable_display_interrupt(struct drm_i915_private *dev_priv, uint32_t bits)
 }
 
 
+/* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
+int intel_breadcrumbs_init(struct drm_i915_private *i915);
+bool intel_breadcrumbs_add_waiter(struct drm_i915_gem_request *request);
+void intel_breadcrumbs_remove_waiter(struct drm_i915_gem_request *request);
+void intel_breadcrumbs_fini(struct drm_i915_private *i915);
+static inline bool intel_breadcrumbs_has_waiter(struct drm_i915_private *i915,
+						int engine)
+{
+	return READ_ONCE(i915->breadcrumbs.engine[engine].first_waiter);
+}
+
 /* i915_gem.c */
 int i915_gem_create_ioctl(struct drm_device *dev, void *data,
 			  struct drm_file *file_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6e051402e340..58ad57def94c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1125,17 +1125,6 @@  i915_gem_check_wedge(unsigned reset_counter, bool interruptible)
 	return 0;
 }
 
-static void fake_irq(unsigned long data)
-{
-	wake_up_process((struct task_struct *)data);
-}
-
-static bool missed_irq(struct drm_i915_private *dev_priv,
-		       struct intel_engine_cs *ring)
-{
-	return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings);
-}
-
 static unsigned long local_clock_us(unsigned *cpu)
 {
 	unsigned long t;
@@ -1183,9 +1172,6 @@  static int __i915_spin_request(struct drm_i915_gem_request *req, int state)
 	 * takes to sleep on a request, on the order of a microsecond.
 	 */
 
-	if (req->ring->irq_refcount)
-		return -EBUSY;
-
 	/* Only spin if we know the GPU is processing this request */
 	if (!i915_gem_request_started(req, false))
 		return -EAGAIN;
@@ -1231,26 +1217,19 @@  int __i915_wait_request(struct drm_i915_gem_request *req,
 			s64 *timeout,
 			struct intel_rps_client *rps)
 {
-	struct intel_engine_cs *ring = i915_gem_request_get_ring(req);
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	const bool irq_test_in_progress =
-		ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring);
 	int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
 	DEFINE_WAIT(wait);
-	unsigned long timeout_expire;
+	unsigned long timeout_remain;
 	s64 before, now;
 	int ret;
 
-	WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled");
-
 	if (list_empty(&req->list))
 		return 0;
 
 	if (i915_gem_request_completed(req, true))
 		return 0;
 
-	timeout_expire = 0;
+	timeout_remain = MAX_SCHEDULE_TIMEOUT;
 	if (timeout) {
 		if (WARN_ON(*timeout < 0))
 			return -EINVAL;
@@ -1258,34 +1237,37 @@  int __i915_wait_request(struct drm_i915_gem_request *req,
 		if (*timeout == 0)
 			return -ETIME;
 
-		timeout_expire = jiffies + nsecs_to_jiffies_timeout(*timeout);
+		timeout_remain = nsecs_to_jiffies_timeout(*timeout);
 	}
 
-	if (INTEL_INFO(dev_priv)->gen >= 6)
-		gen6_rps_boost(dev_priv, rps, req->emitted_jiffies);
-
 	/* Record current time in case interrupted by signal, or wedged */
 	trace_i915_gem_request_wait_begin(req);
 	before = ktime_get_raw_ns();
 
-	/* Optimistic spin for the next jiffie before touching IRQs */
-	ret = __i915_spin_request(req, state);
-	if (ret == 0)
-		goto out;
+	if (INTEL_INFO(req->i915)->gen >= 6)
+		gen6_rps_boost(req->i915, rps, req->emitted_jiffies);
 
-	if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring))) {
-		ret = -ENODEV;
-		goto out;
+	/* Optimistic spin for the next jiffie before touching IRQs */
+	if (intel_breadcrumbs_add_waiter(req)) {
+		ret = __i915_spin_request(req, state);
+		if (ret == 0)
+			goto out;
 	}
 
 	for (;;) {
-		struct timer_list timer;
+		prepare_to_wait(&req->waiters, &wait, state);
 
-		prepare_to_wait(&ring->irq_queue, &wait, state);
+		/* When the bottom-half handler for the user interrupts runs,
+		 * it will perform a coherent read of seqno before waking us.
+		 * When we are then woken by the bottom-half, we can just
+		 * inspect the CPU cache for the seqno.
+		 */
+		if (i915_gem_request_completed(req, true)) {
+			ret = 0;
+			break;
+		}
 
-		/* We need to check whether any gpu reset happened in between
-		 * the caller grabbing the seqno and now ... */
-		if (req->reset_counter != i915_reset_counter(&dev_priv->gpu_error)) {
+		if (unlikely(req->reset_counter != i915_reset_counter(&req->i915->gpu_error))) {
 			/* As we do not requeue the request over a GPU reset,
 			 * if one does occur we know that the request is
 			 * effectively complete.
@@ -1294,46 +1276,21 @@  int __i915_wait_request(struct drm_i915_gem_request *req,
 			break;
 		}
 
-		if (i915_gem_request_completed(req, false)) {
-			ret = 0;
-			break;
-		}
-
 		if (signal_pending_state(state, current)) {
 			ret = -ERESTARTSYS;
 			break;
 		}
 
-		if (timeout && time_after_eq(jiffies, timeout_expire)) {
+		timeout_remain = io_schedule_timeout(timeout_remain);
+		if (timeout_remain == 0) {
 			ret = -ETIME;
 			break;
 		}
-
-		i915_queue_hangcheck(dev_priv);
-
-		timer.function = NULL;
-		if (timeout || missed_irq(dev_priv, ring)) {
-			unsigned long expire;
-
-			setup_timer_on_stack(&timer, fake_irq, (unsigned long)current);
-			expire = missed_irq(dev_priv, ring) ? jiffies + 1 : timeout_expire;
-			mod_timer(&timer, expire);
-		}
-
-		io_schedule();
-
-		if (timer.function) {
-			del_singleshot_timer_sync(&timer);
-			destroy_timer_on_stack(&timer);
-		}
 	}
-	if (!irq_test_in_progress)
-		ring->irq_put(ring);
-
-	finish_wait(&ring->irq_queue, &wait);
-
+	finish_wait(&req->waiters, &wait);
 out:
 	now = ktime_get_raw_ns();
+	intel_breadcrumbs_remove_waiter(req);
 	trace_i915_gem_request_wait_end(req);
 
 	if (timeout) {
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 06ca4082735b..d7d971eb955f 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -453,10 +453,11 @@  int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 				   dev_priv->ring[i].name,
 				   error->ring[i].num_requests);
 			for (j = 0; j < error->ring[i].num_requests; j++) {
-				err_printf(m, "  seqno 0x%08x, emitted %ld, tail 0x%08x\n",
+				err_printf(m, "  seqno 0x%08x, emitted %ld, tail 0x%08x, waiters? %d\n",
 					   error->ring[i].requests[j].seqno,
 					   error->ring[i].requests[j].jiffies,
-					   error->ring[i].requests[j].tail);
+					   error->ring[i].requests[j].tail,
+					   error->ring[i].requests[j].waiters);
 			}
 		}
 
@@ -900,7 +901,7 @@  static void i915_record_ring_state(struct drm_device *dev,
 		ering->instdone = I915_READ(GEN2_INSTDONE);
 	}
 
-	ering->waiting = waitqueue_active(&ring->irq_queue);
+	ering->waiting = intel_breadcrumbs_has_waiter(dev_priv, ring->id);
 	ering->instpm = I915_READ(RING_INSTPM(ring->mmio_base));
 	ering->seqno = ring->get_seqno(ring, false);
 	ering->acthd = intel_ring_get_active_head(ring);
@@ -1105,6 +1106,7 @@  static void i915_gem_record_rings(struct drm_device *dev,
 			erq->seqno = request->seqno;
 			erq->jiffies = request->emitted_jiffies;
 			erq->tail = request->postfix;
+			erq->waiters = request->waiters_count;
 		}
 	}
 }
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 28c0329f8281..bc62e076ba5a 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -996,12 +996,11 @@  static void ironlake_rps_change_irq_handler(struct drm_device *dev)
 
 static void notify_ring(struct intel_engine_cs *ring)
 {
-	if (!intel_ring_initialized(ring))
+	if (ring->i915 == NULL)
 		return;
 
 	trace_i915_gem_request_notify(ring);
-
-	wake_up_all(&ring->irq_queue);
+	wake_up_process(ring->i915->breadcrumbs.task);
 }
 
 static void vlv_c0_read(struct drm_i915_private *dev_priv,
@@ -1083,7 +1082,7 @@  static bool any_waiters(struct drm_i915_private *dev_priv)
 	int i;
 
 	for_each_ring(ring, dev_priv, i)
-		if (ring->irq_refcount)
+		if (intel_breadcrumbs_has_waiter(dev_priv, i))
 			return true;
 
 	return false;
@@ -2399,9 +2398,6 @@  static irqreturn_t gen8_irq_handler(int irq, void *arg)
 static void i915_error_wake_up(struct drm_i915_private *dev_priv,
 			       bool reset_completed)
 {
-	struct intel_engine_cs *ring;
-	int i;
-
 	/*
 	 * Notify all waiters for GPU completion events that reset state has
 	 * been changed, and that they need to restart their wait after
@@ -2410,8 +2406,7 @@  static void i915_error_wake_up(struct drm_i915_private *dev_priv,
 	 */
 
 	/* Wake up __wait_seqno, potentially holding dev->struct_mutex. */
-	for_each_ring(ring, dev_priv, i)
-		wake_up_all(&ring->irq_queue);
+	wake_up_process(dev_priv->breadcrumbs.task);
 
 	/* Wake up intel_crtc_wait_for_pending_flips, holding crtc->mutex. */
 	wake_up_all(&dev_priv->pending_flip_queue);
@@ -2986,16 +2981,17 @@  static void i915_hangcheck_elapsed(struct work_struct *work)
 			if (ring_idle(ring, seqno)) {
 				ring->hangcheck.action = HANGCHECK_IDLE;
 
-				if (waitqueue_active(&ring->irq_queue)) {
+				if (intel_breadcrumbs_has_waiter(dev_priv,
+								 ring->id)) {
 					/* Issue a wake-up to catch stuck h/w. */
 					if (!test_and_set_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings)) {
-						if (!(dev_priv->gpu_error.test_irq_rings & intel_ring_flag(ring)))
+						if (!test_bit(ring->id, &dev_priv->gpu_error.test_irq_rings))
 							DRM_ERROR("Hangcheck timer elapsed... %s idle\n",
 								  ring->name);
 						else
 							DRM_INFO("Fake missed irq on %s\n",
 								 ring->name);
-						wake_up_all(&ring->irq_queue);
+						wake_up_process(dev_priv->breadcrumbs.task);
 					}
 					/* Safeguard against driver failure */
 					ring->hangcheck.score += BUSY;
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
new file mode 100644
index 000000000000..207c916e6026
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -0,0 +1,286 @@ 
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/kthread.h>
+
+#include "i915_drv.h"
+
+static bool __irq_enable(struct intel_engine_cs *engine)
+{
+	if (test_bit(engine->id, &engine->i915->gpu_error.test_irq_rings))
+		return false;
+
+	if (!intel_irqs_enabled(engine->i915))
+		return false;
+
+	return engine->irq_get(engine);
+}
+
+static struct drm_i915_gem_request *to_request(struct rb_node *rb)
+{
+	if (rb == NULL)
+		return NULL;
+
+	return rb_entry(rb, struct drm_i915_gem_request, breadcrumb_node);
+}
+
+static unsigned noirq(struct drm_i915_private *i915, unsigned irq)
+{
+	/* Report which engines for whom we have not successfully enabled the
+	 * user interrupt, or if the interrupt is suspect and often absent.
+	 */
+	return ~irq | i915->gpu_error.missed_irq_rings;
+}
+
+/*
+ * intel_breadcrumbs_irq() acts as the bottom-half for the user interrupt,
+ * which we use as the breadcrumb after every request. In order to scale
+ * to many concurrent waiters, we delegate the task of reading the coherent
+ * seqno to this bottom-half. (Otherwise every waiter is woken after each
+ * interrupt and they all attempt to perform the heavyweight coherent
+ * seqno check.) Individual clients register with the bottom-half when
+ * waiting on a request, and are then woken when the bottom-half notices
+ * their seqno is complete. This incurs an extra context switch from the
+ * interrupt to the client in the uncontended case.
+ */
+static int intel_breadcrumbs_irq(void *data)
+{
+	struct drm_i915_private *i915 = data;
+	struct intel_breadcrumbs *b = &i915->breadcrumbs;
+	unsigned engines_active = 0;
+	unsigned irqs_enabled = 0;
+	int i;
+
+	while (!unlikely(kthread_should_stop())) {
+		unsigned long timeout_jiffies;
+		unsigned reset_counter;
+		bool idling = false;
+
+		/* On every wakeup, walk the seqno-ordered list of requests
+		 * and for each retired request wakeup its clients. If we
+		 * find an unfinished request or none, go back to sleep.
+		 */
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		/* Note carefully that we do not hold a reference for the
+		 * requests on these lists. Therefore we only inspect them
+		 * whilst holding the spinlock to ensure that they are not
+		 * freed in the meantime, and the client must remove the
+		 * request from the list if it is interrupted (before it
+		 * itself releases its reference).
+		 */
+		reset_counter = i915_reset_counter(&i915->gpu_error);
+		for (i = 0; i < I915_NUM_RINGS; i++) {
+			struct intel_engine_cs *engine = &i915->ring[i];
+			struct intel_breadcrumbs_engine *be = &b->engine[i];
+			struct drm_i915_gem_request *request;
+
+			if (READ_ONCE(be->first_waiter) == NULL) {
+				/* No more waiters, mask the user interrupt
+				 * (if we were able to enable it!) and
+				 * release the wakelock.
+				 */
+				if (engines_active & (1 << i)) {
+					if (irqs_enabled & (1 << i)) {
+						engine->irq_put(engine);
+						irqs_enabled &= ~ (1 << i);
+					}
+					intel_runtime_pm_put(i915);
+					engines_active &= ~(1 << i);
+				}
+
+				continue;
+			}
+
+			/* When enabling waiting upon an engine, we need
+			 * to unmask the user interrupt, and before we
+			 * do so we need to hold a wakelock for our
+			 * hardware access (and the interrupts thereafter).
+			 */
+			if ((engines_active & (1 << i)) == 0) {
+				intel_runtime_pm_get(i915);
+				irqs_enabled |= __irq_enable(engine) << i;
+				engines_active |= 1 << i;
+			}
+
+			spin_lock(&be->lock);
+			request = be->first_waiter;
+			while (request) {
+				struct rb_node *next;
+
+				if (request->reset_counter == reset_counter &&
+				    !i915_gem_request_completed(request, false))
+					break;
+
+				next = rb_next(&request->breadcrumb_node);
+				rb_erase(&request->breadcrumb_node,
+					 &be->requests);
+				RB_CLEAR_NODE(&request->breadcrumb_node);
+
+				wake_up_all(&request->waiters);
+
+				request = to_request(next);
+			}
+			be->first_waiter = request;
+			spin_unlock(&be->lock);
+
+			/* Make sure the hangcheck timer is armed in case
+			 * the GPU hangs and we are never woken up.
+			 */
+			if (request)
+				i915_queue_hangcheck(i915);
+			else
+				idling = true;
+		}
+
+		/* If we don't have interrupts available (either we have
+		 * detected the hardware is missing interrupts or the
+		 * interrupt delivery was disabled by the user), wake up
+		 * every jiffie to check for request completion.
+		 *
+		 * If we have processed all requests for one engine, we
+		 * also wish to wake up in a jiffie to turn off the
+		 * breadcrumb interrupt for that engine. We delay
+		 * switching off the interrupt in order to allow another
+		 * waiter to start without incurring additional latency
+		 * enabling the interrupt.
+		 */
+		timeout_jiffies = MAX_SCHEDULE_TIMEOUT;
+		if (idling || engines_active & noirq(i915, irqs_enabled))
+			timeout_jiffies = 1;
+
+		/* Unlike the individual clients, we do not want this
+		 * background thread to contribute to the system load,
+		 * i.e. we do not want to use io_schedule() here.
+		 */
+		schedule_timeout(timeout_jiffies);
+	}
+
+	for (i = 0; i < I915_NUM_RINGS; i++) {
+		if (engines_active & (1 << i)) {
+			if (irqs_enabled & (1 << i)) {
+				struct intel_engine_cs *engine = &i915->ring[i];
+				engine->irq_put(engine);
+			}
+			intel_runtime_pm_put(i915);
+		}
+	}
+
+	return 0;
+}
+
+bool intel_breadcrumbs_add_waiter(struct drm_i915_gem_request *request)
+{
+	struct intel_breadcrumbs *b = &request->i915->breadcrumbs;
+	struct intel_breadcrumbs_engine *be = &b->engine[request->ring->id];
+	bool first = false;
+
+	spin_lock(&be->lock);
+	if (request->waiters_count++ == 0) {
+		bool was_idle = be->first_waiter == NULL;
+		struct rb_node **p, *parent;
+
+		init_waitqueue_head(&request->waiters);
+
+		/* Insert this request into the retirement ordered list of
+		 * waiters by walking the rbtree. We also track the oldest
+		 * request (first to be retired) in order to be able to
+		 * do a linear walk in the bottom-half, and conveniently
+		 * check whether there are any waiters.
+		 */
+		first = true;
+		parent = NULL;
+		p = &be->requests.rb_node;
+		while (*p) {
+			struct drm_i915_gem_request *__req;
+
+			parent = *p;
+			__req = rb_entry(parent,
+					 typeof(*__req),
+					 breadcrumb_node);
+
+			if (i915_seqno_passed(request->seqno, __req->seqno)) {
+				p = &parent->rb_right;
+				first = false;
+			} else
+				p = &parent->rb_left;
+		}
+		if (first)
+			be->first_waiter = request;
+
+		rb_link_node(&request->breadcrumb_node, parent, p);
+		rb_insert_color(&request->breadcrumb_node, &be->requests);
+
+		if (was_idle)
+			wake_up_process(b->task);
+	}
+	spin_unlock(&be->lock);
+
+	return first;
+}
+
+void intel_breadcrumbs_remove_waiter(struct drm_i915_gem_request *request)
+{
+	struct intel_breadcrumbs *b = &request->i915->breadcrumbs;
+	struct intel_breadcrumbs_engine *be = &b->engine[request->ring->id];
+
+	spin_lock(&be->lock);
+	if (--request->waiters_count == 0 &&
+	    !RB_EMPTY_NODE(&request->breadcrumb_node)) {
+		if (be->first_waiter == request)
+			be->first_waiter =
+				to_request(rb_next(&request->breadcrumb_node));
+		rb_erase(&request->breadcrumb_node, &be->requests);
+	}
+	spin_unlock(&be->lock);
+}
+
+int intel_breadcrumbs_init(struct drm_i915_private *i915)
+{
+	struct intel_breadcrumbs *b = &i915->breadcrumbs;
+	struct task_struct *task;
+	int i;
+
+	for (i = 0; i < I915_NUM_RINGS; i++)
+		spin_lock_init(&b->engine[i].lock);
+
+	task = kthread_run(intel_breadcrumbs_irq, i915, "irq/i915");
+	if (IS_ERR(task))
+		return PTR_ERR(task);
+
+	b->task = task;
+
+	return 0;
+}
+
+void intel_breadcrumbs_fini(struct drm_i915_private *i915)
+{
+	struct intel_breadcrumbs *b = &i915->breadcrumbs;
+
+	if (b->task == NULL)
+		return;
+
+	kthread_stop(b->task);
+	b->task = NULL;
+}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 08b982a6ae81..ab6a590926d5 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1920,10 +1920,10 @@  static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *rin
 	ring->buffer = NULL;
 
 	ring->dev = dev;
+	ring->i915 = to_i915(dev);
 	INIT_LIST_HEAD(&ring->active_list);
 	INIT_LIST_HEAD(&ring->request_list);
 	i915_gem_batch_pool_init(dev, &ring->batch_pool);
-	init_waitqueue_head(&ring->irq_queue);
 
 	INIT_LIST_HEAD(&ring->buffers);
 	INIT_LIST_HEAD(&ring->execlist_queue);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 511efe556d73..5a5a7195dd54 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2157,6 +2157,7 @@  static int intel_init_ring_buffer(struct drm_device *dev,
 	WARN_ON(ring->buffer);
 
 	ring->dev = dev;
+	ring->i915 = to_i915(dev);
 	INIT_LIST_HEAD(&ring->active_list);
 	INIT_LIST_HEAD(&ring->request_list);
 	INIT_LIST_HEAD(&ring->execlist_queue);
@@ -2164,8 +2165,6 @@  static int intel_init_ring_buffer(struct drm_device *dev,
 	i915_gem_batch_pool_init(dev, &ring->batch_pool);
 	memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno));
 
-	init_waitqueue_head(&ring->irq_queue);
-
 	ringbuf = intel_engine_create_ringbuffer(ring, 32 * PAGE_SIZE);
 	if (IS_ERR(ringbuf))
 		return PTR_ERR(ringbuf);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 5d1eb206151d..49b5bded2767 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -157,6 +157,7 @@  struct  intel_engine_cs {
 #define LAST_USER_RING (VECS + 1)
 	u32		mmio_base;
 	struct		drm_device *dev;
+	struct drm_i915_private *i915;
 	struct intel_ringbuffer *buffer;
 	struct list_head buffers;
 
@@ -303,8 +304,6 @@  struct  intel_engine_cs {
 
 	bool gpu_caches_dirty;
 
-	wait_queue_head_t irq_queue;
-
 	struct intel_context *default_context;
 	struct intel_context *last_context;