[v4] drm/i915: Slaughter the thundering i915_wait_request herd

Submitted by Chris Wilson on Nov. 30, 2015, 2:34 p.m.

Details

Message ID 1448894080-22511-1-git-send-email-chris@chris-wilson.co.uk
State New
Headers show
Series "Series without cover letter" ( rev: 2 ) in Intel GFX

Not browsing as part of any series.

Commit Message

Chris Wilson Nov. 30, 2015, 2:34 p.m.
One particularly stressful scenario consists of many independent tasks
all competing for GPU time and waiting upon the results (e.g. realtime
transcoding of many, many streams). One bottleneck in particular is that
each client waits on its own results, but every client is woken up after
every batchbuffer - hence the thunder of hooves as then every client must
do its heavyweight dance to read a coherent seqno to see if it is the
lucky one. Alternatively, we can have one kthread responsible for waking
after an interrupt, checking the seqno and only waking up the waiting
clients who are complete. The disadvantage is that in the uncontended
scenario (i.e. only one waiter) we incur an extra context switch in the
wakeup path - though that should be mitigated somewhat by the busy-wait
we do first before sleeping.

v2: Convert from a kworker per engine into a dedicated kthread for the
bottom-half.
v3: Rename request members and tweak comments.
v4: Use a per-engine spinlock in the breadcrumbs bottom-half.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: "Rogozhkin, Dmitry V" <dmitry.v.rogozhkin@intel.com>
Cc: "Gong, Zhipeng" <zhipeng.gong@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
---
 drivers/gpu/drm/i915/Makefile            |   1 +
 drivers/gpu/drm/i915/i915_dma.c          |   9 +-
 drivers/gpu/drm/i915/i915_drv.h          |  37 ++++-
 drivers/gpu/drm/i915/i915_gem.c          |  93 +++--------
 drivers/gpu/drm/i915/i915_gpu_error.c    |   9 +-
 drivers/gpu/drm/i915/i915_irq.c          |  24 +--
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 270 +++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_lrc.c         |   2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c  |   3 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h  |   3 +-
 10 files changed, 361 insertions(+), 90 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/intel_breadcrumbs.c

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 0851de07bd13..d3b9d3618719 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -35,6 +35,7 @@  i915-y += i915_cmd_parser.o \
 	  i915_gem_userptr.o \
 	  i915_gpu_error.o \
 	  i915_trace_points.o \
+	  intel_breadcrumbs.o \
 	  intel_lrc.o \
 	  intel_mocs.o \
 	  intel_ringbuffer.o \
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index a81c76603544..93c6762d87b4 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -388,12 +388,16 @@  static int i915_load_modeset_init(struct drm_device *dev)
 	if (ret)
 		goto cleanup_vga_client;
 
+	ret = intel_breadcrumbs_init(dev_priv);
+	if (ret)
+		goto cleanup_vga_switcheroo;
+
 	/* Initialise stolen first so that we may reserve preallocated
 	 * objects for the BIOS to KMS transition.
 	 */
 	ret = i915_gem_init_stolen(dev);
 	if (ret)
-		goto cleanup_vga_switcheroo;
+		goto cleanup_breadcrumbs;
 
 	intel_power_domains_init_hw(dev_priv, false);
 
@@ -454,6 +458,8 @@  cleanup_irq:
 	drm_irq_uninstall(dev);
 cleanup_gem_stolen:
 	i915_gem_cleanup_stolen(dev);
+cleanup_breadcrumbs:
+	intel_breadcrumbs_fini(dev_priv);
 cleanup_vga_switcheroo:
 	vga_switcheroo_unregister_client(dev->pdev);
 cleanup_vga_client:
@@ -1193,6 +1199,7 @@  int i915_driver_unload(struct drm_device *dev)
 	mutex_unlock(&dev->struct_mutex);
 	intel_fbc_cleanup_cfb(dev_priv);
 	i915_gem_cleanup_stolen(dev);
+	intel_breadcrumbs_fini(dev_priv);
 
 	intel_csr_ucode_fini(dev_priv);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e1980212ba37..ab8c694a9ba5 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -564,6 +564,7 @@  struct drm_i915_error_state {
 			long jiffies;
 			u32 seqno;
 			u32 tail;
+			u32 waiters;
 		} *requests;
 
 		struct {
@@ -1383,7 +1384,7 @@  struct i915_gpu_error {
 #define I915_STOP_RING_ALLOW_WARN      (1 << 30)
 
 	/* For missed irq/seqno simulation. */
-	unsigned int test_irq_rings;
+	unsigned long test_irq_rings;
 };
 
 enum modeset_restore {
@@ -1695,6 +1696,27 @@  struct drm_i915_private {
 
 	struct intel_uncore uncore;
 
+	/* Rather than have every client wait upon all user interrupts,
+	 * with the herd waking after every interrupt and each doing the
+	 * heavyweight seqno dance, we delegate the task to a bottom-half
+	 * for the user interrupt (one bh handling all engines). Now,
+	 * just one task is woken up and does the coherent seqno read and
+	 * then wakes up all the waiters registered with the bottom-half
+	 * that are complete. This avoid the thundering herd problem
+	 * with lots of concurrent waiters, at the expense of an extra
+	 * context switch between the interrupt and the client. That should
+	 * be mitigated somewhat by the busyspin in the client before we go to
+	 * sleep waiting for an interrupt from the bottom-half.
+	 */
+	struct intel_breadcrumbs {
+		struct task_struct *task; /* bh for all user interrupts */
+		struct intel_breadcrumbs_engine {
+			spinlock_t lock; /* protects the lists of requests */
+			struct rb_root requests; /* sorted by retirement */
+			struct drm_i915_gem_request *first_waiter;
+		} engine[I915_NUM_RINGS];
+	} breadcrumbs;
+
 	struct i915_virtual_gpu vgpu;
 
 	struct intel_guc guc;
@@ -2228,6 +2250,13 @@  struct drm_i915_gem_request {
 	/** global list entry for this request */
 	struct list_head list;
 
+	/** Entry into the breadcrumb list of waiting requests. */
+	struct rb_node breadcrumb_node;
+	/** List of clients waiting for completion of this request. */
+	wait_queue_head_t waiters;
+	/** Count of clients waiting for completion of this request. */
+	unsigned waiters_count;
+
 	struct drm_i915_file_private *file_priv;
 	/** file_priv list entry for this request */
 	struct list_head client_list;
@@ -2800,6 +2829,12 @@  ibx_disable_display_interrupt(struct drm_i915_private *dev_priv, uint32_t bits)
 }
 
 
+/* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
+int intel_breadcrumbs_init(struct drm_i915_private *i915);
+bool intel_breadcrumbs_add_waiter(struct drm_i915_gem_request *request);
+void intel_breadcrumbs_remove_waiter(struct drm_i915_gem_request *request);
+void intel_breadcrumbs_fini(struct drm_i915_private *i915);
+
 /* i915_gem.c */
 int i915_gem_create_ioctl(struct drm_device *dev, void *data,
 			  struct drm_file *file_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 65d101b47d8e..cec1bc460f57 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1126,17 +1126,6 @@  i915_gem_check_wedge(unsigned reset_counter,
 	return 0;
 }
 
-static void fake_irq(unsigned long data)
-{
-	wake_up_process((struct task_struct *)data);
-}
-
-static bool missed_irq(struct drm_i915_private *dev_priv,
-		       struct intel_engine_cs *ring)
-{
-	return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings);
-}
-
 static unsigned long local_clock_us(unsigned *cpu)
 {
 	unsigned long t;
@@ -1184,9 +1173,6 @@  static int __i915_spin_request(struct drm_i915_gem_request *req, int state)
 	 * takes to sleep on a request, on the order of a microsecond.
 	 */
 
-	if (req->ring->irq_refcount)
-		return -EBUSY;
-
 	/* Only spin if we know the GPU is processing this request */
 	if (!i915_gem_request_started(req, false))
 		return -EAGAIN;
@@ -1232,26 +1218,19 @@  int __i915_wait_request(struct drm_i915_gem_request *req,
 			s64 *timeout,
 			struct intel_rps_client *rps)
 {
-	struct intel_engine_cs *ring = i915_gem_request_get_ring(req);
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	const bool irq_test_in_progress =
-		ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring);
 	int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
 	DEFINE_WAIT(wait);
-	unsigned long timeout_expire;
+	unsigned long timeout_remain;
 	s64 before, now;
 	int ret;
 
-	WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled");
-
 	if (list_empty(&req->list))
 		return 0;
 
 	if (i915_gem_request_completed(req, true))
 		return 0;
 
-	timeout_expire = 0;
+	timeout_remain = MAX_SCHEDULE_TIMEOUT;
 	if (timeout) {
 		if (WARN_ON(*timeout < 0))
 			return -EINVAL;
@@ -1259,34 +1238,37 @@  int __i915_wait_request(struct drm_i915_gem_request *req,
 		if (*timeout == 0)
 			return -ETIME;
 
-		timeout_expire = jiffies + nsecs_to_jiffies_timeout(*timeout);
+		timeout_remain = nsecs_to_jiffies_timeout(*timeout);
 	}
 
-	if (INTEL_INFO(dev_priv)->gen >= 6)
-		gen6_rps_boost(dev_priv, rps, req->emitted_jiffies);
-
 	/* Record current time in case interrupted by signal, or wedged */
 	trace_i915_gem_request_wait_begin(req);
 	before = ktime_get_raw_ns();
 
-	/* Optimistic spin for the next jiffie before touching IRQs */
-	ret = __i915_spin_request(req, state);
-	if (ret == 0)
-		goto out;
+	if (INTEL_INFO(req->i915)->gen >= 6)
+		gen6_rps_boost(req->i915, rps, req->emitted_jiffies);
 
-	if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring))) {
-		ret = -ENODEV;
-		goto out;
+	/* Optimistic spin for the next jiffie before touching IRQs */
+	if (intel_breadcrumbs_add_waiter(req)) {
+		ret = __i915_spin_request(req, state);
+		if (ret == 0)
+			goto out;
 	}
 
 	for (;;) {
-		struct timer_list timer;
+		prepare_to_wait(&req->waiters, &wait, state);
 
-		prepare_to_wait(&ring->irq_queue, &wait, state);
+		/* When the bottom-half handler for the user interrupts runs,
+		 * it will perform a coherent read of seqno before waking us.
+		 * When we are then woken by the bottom-half, we can just
+		 * inspect the CPU cache for the seqno.
+		 */
+		if (i915_gem_request_completed(req, true)) {
+			ret = 0;
+			break;
+		}
 
-		/* We need to check whether any gpu reset happened in between
-		 * the caller grabbing the seqno and now ... */
-		if (req->reset_counter != i915_reset_counter(&dev_priv->gpu_error)) {
+		if (unlikely(req->reset_counter != i915_reset_counter(&req->i915->gpu_error))) {
 			/* As we do not requeue the request over a GPU reset,
 			 * if one does occur we know that the request is
 			 * effectively complete.
@@ -1295,46 +1277,21 @@  int __i915_wait_request(struct drm_i915_gem_request *req,
 			break;
 		}
 
-		if (i915_gem_request_completed(req, false)) {
-			ret = 0;
-			break;
-		}
-
 		if (signal_pending_state(state, current)) {
 			ret = -ERESTARTSYS;
 			break;
 		}
 
-		if (timeout && time_after_eq(jiffies, timeout_expire)) {
+		timeout_remain = io_schedule_timeout(timeout_remain);
+		if (timeout_remain == 0) {
 			ret = -ETIME;
 			break;
 		}
-
-		i915_queue_hangcheck(dev_priv);
-
-		timer.function = NULL;
-		if (timeout || missed_irq(dev_priv, ring)) {
-			unsigned long expire;
-
-			setup_timer_on_stack(&timer, fake_irq, (unsigned long)current);
-			expire = missed_irq(dev_priv, ring) ? jiffies + 1 : timeout_expire;
-			mod_timer(&timer, expire);
-		}
-
-		io_schedule();
-
-		if (timer.function) {
-			del_singleshot_timer_sync(&timer);
-			destroy_timer_on_stack(&timer);
-		}
 	}
-	if (!irq_test_in_progress)
-		ring->irq_put(ring);
-
-	finish_wait(&ring->irq_queue, &wait);
-
+	finish_wait(&req->waiters, &wait);
 out:
 	now = ktime_get_raw_ns();
+	intel_breadcrumbs_remove_waiter(req);
 	trace_i915_gem_request_wait_end(req);
 
 	if (timeout) {
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 06ca4082735b..0e63b6ebddda 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -453,10 +453,11 @@  int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 				   dev_priv->ring[i].name,
 				   error->ring[i].num_requests);
 			for (j = 0; j < error->ring[i].num_requests; j++) {
-				err_printf(m, "  seqno 0x%08x, emitted %ld, tail 0x%08x\n",
+				err_printf(m, "  seqno 0x%08x, emitted %ld, tail 0x%08x, waiters? %d\n",
 					   error->ring[i].requests[j].seqno,
 					   error->ring[i].requests[j].jiffies,
-					   error->ring[i].requests[j].tail);
+					   error->ring[i].requests[j].tail,
+					   error->ring[i].requests[j].waiters);
 			}
 		}
 
@@ -900,7 +901,8 @@  static void i915_record_ring_state(struct drm_device *dev,
 		ering->instdone = I915_READ(GEN2_INSTDONE);
 	}
 
-	ering->waiting = waitqueue_active(&ring->irq_queue);
+	ering->waiting =
+	       	READ_ONCE(dev_priv->breadcrumbs.engine[ring->id].first_waiter);
 	ering->instpm = I915_READ(RING_INSTPM(ring->mmio_base));
 	ering->seqno = ring->get_seqno(ring, false);
 	ering->acthd = intel_ring_get_active_head(ring);
@@ -1105,6 +1107,7 @@  static void i915_gem_record_rings(struct drm_device *dev,
 			erq->seqno = request->seqno;
 			erq->jiffies = request->emitted_jiffies;
 			erq->tail = request->postfix;
+			erq->waiters = request->waiters_count;
 		}
 	}
 }
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 28c0329f8281..182cc9efe9ba 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -996,12 +996,11 @@  static void ironlake_rps_change_irq_handler(struct drm_device *dev)
 
 static void notify_ring(struct intel_engine_cs *ring)
 {
-	if (!intel_ring_initialized(ring))
+	if (ring->i915 == NULL)
 		return;
 
 	trace_i915_gem_request_notify(ring);
-
-	wake_up_all(&ring->irq_queue);
+	wake_up_process(ring->i915->breadcrumbs.task);
 }
 
 static void vlv_c0_read(struct drm_i915_private *dev_priv,
@@ -1077,13 +1076,18 @@  static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
 	return events;
 }
 
+static bool any_waiters_on_engine(struct intel_engine_cs *engine)
+{
+	return READ_ONCE(engine->i915->breadcrumbs.engine[engine->id].first_waiter);
+}
+
 static bool any_waiters(struct drm_i915_private *dev_priv)
 {
 	struct intel_engine_cs *ring;
 	int i;
 
 	for_each_ring(ring, dev_priv, i)
-		if (ring->irq_refcount)
+		if (any_waiters_on_engine(ring))
 			return true;
 
 	return false;
@@ -2399,9 +2403,6 @@  static irqreturn_t gen8_irq_handler(int irq, void *arg)
 static void i915_error_wake_up(struct drm_i915_private *dev_priv,
 			       bool reset_completed)
 {
-	struct intel_engine_cs *ring;
-	int i;
-
 	/*
 	 * Notify all waiters for GPU completion events that reset state has
 	 * been changed, and that they need to restart their wait after
@@ -2410,8 +2411,7 @@  static void i915_error_wake_up(struct drm_i915_private *dev_priv,
 	 */
 
 	/* Wake up __wait_seqno, potentially holding dev->struct_mutex. */
-	for_each_ring(ring, dev_priv, i)
-		wake_up_all(&ring->irq_queue);
+	wake_up_process(dev_priv->breadcrumbs.task);
 
 	/* Wake up intel_crtc_wait_for_pending_flips, holding crtc->mutex. */
 	wake_up_all(&dev_priv->pending_flip_queue);
@@ -2986,16 +2986,16 @@  static void i915_hangcheck_elapsed(struct work_struct *work)
 			if (ring_idle(ring, seqno)) {
 				ring->hangcheck.action = HANGCHECK_IDLE;
 
-				if (waitqueue_active(&ring->irq_queue)) {
+				if (any_waiters_on_engine(ring)){
 					/* Issue a wake-up to catch stuck h/w. */
 					if (!test_and_set_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings)) {
-						if (!(dev_priv->gpu_error.test_irq_rings & intel_ring_flag(ring)))
+						if (!test_bit(ring->id, &dev_priv->gpu_error.test_irq_rings))
 							DRM_ERROR("Hangcheck timer elapsed... %s idle\n",
 								  ring->name);
 						else
 							DRM_INFO("Fake missed irq on %s\n",
 								 ring->name);
-						wake_up_all(&ring->irq_queue);
+						wake_up_process(dev_priv->breadcrumbs.task);
 					}
 					/* Safeguard against driver failure */
 					ring->hangcheck.score += BUSY;
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
new file mode 100644
index 000000000000..77af6240573d
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -0,0 +1,270 @@ 
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/kthread.h>
+
+#include "i915_drv.h"
+
+static bool __irq_enable(struct intel_engine_cs *engine)
+{
+	if (test_bit(engine->id, &engine->i915->gpu_error.test_irq_rings))
+		return false;
+
+	if (!intel_irqs_enabled(engine->i915))
+		return false;
+
+	return engine->irq_get(engine);
+}
+
+static struct drm_i915_gem_request *to_request(struct rb_node *rb)
+{
+	if (rb == NULL)
+		return NULL;
+
+	return rb_entry(rb, struct drm_i915_gem_request, breadcrumb_node);
+}
+
+/*
+ * intel_breadcrumbs_irq() acts as the bottom-half for the user interrupt,
+ * which we use as the breadcrumb after every request. In order to scale
+ * to many concurrent waiters, we delegate the task of reading the coherent
+ * seqno to this bottom-half. (Otherwise every waiter is woken after each
+ * interrupt and they all attempt to perform the heavyweight coherent
+ * seqno check.) Individual clients register with the bottom-half when
+ * waiting on a request, and are then woken when the bottom-half notices
+ * their seqno is complete. This incurs an extra context switch from the
+ * interrupt to the client in the uncontended case.
+ */
+static int intel_breadcrumbs_irq(void *data)
+{
+	struct drm_i915_private *i915 = data;
+	struct intel_breadcrumbs *b = &i915->breadcrumbs;
+	unsigned engines_active = 0;
+	unsigned irqs_enabled = 0;
+	int i;
+
+	while (!kthread_should_stop()) {
+		unsigned reset_counter = i915_reset_counter(&i915->gpu_error);
+		unsigned long timeout_jiffies;
+		bool idling = false;
+
+		/* On every wakeup, walk the seqno-ordered list of requests
+		 * and for each retired request wakeup its clients. If we
+		 * find an unfinished request or none, go back to sleep.
+		 */
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		/* Note carefully that we do not hold a reference for the
+		 * requests on this list. Therefore we only inspect them
+		 * whilst holding the spinlock to ensure that they are not
+		 * freed in the meantime, and the client must remove the
+		 * request from the list if it is interrupted (before it
+		 * itself releases its reference).
+		 */
+		for (i = 0; i < I915_NUM_RINGS; i++) {
+			struct intel_engine_cs *engine = &i915->ring[i];
+			struct intel_breadcrumbs_engine *be = &b->engine[i];
+			struct drm_i915_gem_request *request;
+
+			if (READ_ONCE(be->first_waiter) == NULL) {
+				/* No more waiters, mask the user interrupt
+				 * (if we were able to enable it!) and
+				 * release the wakelock.
+				 */
+				if (engines_active & (1 << i)) {
+					if (irqs_enabled & (1 << i)) {
+						engine->irq_put(engine);
+						irqs_enabled &= ~ (1 << i);
+					}
+					intel_runtime_pm_put(i915);
+					engines_active &= ~(1 << i);
+				}
+
+				continue;
+			}
+
+			/* When enabling waiting upon an engine, we need
+			 * to unmask the user interrupt, and before we
+			 * do so we need to hold a wakelock for our
+			 * hardware access (and the interrupts thereafter).
+			 */
+			if ((engines_active & (1 << i)) == 0) {
+				intel_runtime_pm_get(i915);
+				irqs_enabled |= __irq_enable(engine) << i;
+				engines_active |= 1 << i;
+			}
+
+			spin_lock(&be->lock);
+			request = be->first_waiter;
+			while (request) {
+				struct rb_node *next;
+
+				if (request->reset_counter == reset_counter &&
+				    !i915_gem_request_completed(request, false))
+					break;
+
+				next = rb_next(&request->breadcrumb_node);
+				rb_erase(&request->breadcrumb_node,
+					 &be->requests);
+				RB_CLEAR_NODE(&request->breadcrumb_node);
+
+				wake_up_all(&request->waiters);
+
+				request = to_request(next);
+			}
+			be->first_waiter = request;
+			spin_unlock(&be->lock);
+
+			/* Make sure the hangcheck timer is armed in case
+			 * the GPU hangs and we are never woken up.
+			 */
+			if (request)
+				i915_queue_hangcheck(i915);
+			else
+				idling = true;
+		}
+
+		/* If we don't have interrupts available (either we have
+		 * detected the hardware is missing interrupts or the
+		 * interrupt delivery was disabled by the user), wake up
+		 * every jiffie to check for request completion.
+		 *
+		 * If we have processed all requests for one engine, we
+		 * also wish to wake up in a jiffie to turn off the
+		 * breadcrumb interrupt for that engine. We delay
+		 * switching off the interrupt in order to allow another
+		 * waiter to start without incurring additional latency
+		 * enabling the interrupt.
+		 */
+		timeout_jiffies = MAX_SCHEDULE_TIMEOUT;
+		if (idling || i915->gpu_error.missed_irq_rings & irqs_enabled)
+			timeout_jiffies = 1;
+
+		/* Unlike the individual clients, we do not want this
+		 * background thread to contribute to the system load,
+		 * i.e. we do not want to use io_schedule() here.
+		 */
+		schedule_timeout(timeout_jiffies);
+	}
+
+	for (i = 0; i < I915_NUM_RINGS; i++) {
+		if (engines_active & (1 << i)) {
+			if (irqs_enabled & (1 << i)) {
+				struct intel_engine_cs *engine = &i915->ring[i];
+				engine->irq_put(engine);
+			}
+			intel_runtime_pm_put(i915);
+		}
+	}
+
+	return 0;
+}
+
+bool intel_breadcrumbs_add_waiter(struct drm_i915_gem_request *request)
+{
+	struct intel_breadcrumbs *b = &request->i915->breadcrumbs;
+	struct intel_breadcrumbs_engine *be = &b->engine[request->ring->id];
+	bool first = false;
+
+	spin_lock(&be->lock);
+	if (request->waiters_count++ == 0) {
+		struct rb_node **p, *parent;
+
+		if (be->first_waiter == NULL)
+			wake_up_process(b->task);
+
+		init_waitqueue_head(&request->waiters);
+
+		first = true;
+		parent = NULL;
+		p = &be->requests.rb_node;
+		while (*p) {
+			struct drm_i915_gem_request *__req;
+
+			parent = *p;
+			__req = rb_entry(parent,
+					 typeof(*__req),
+					 breadcrumb_node);
+
+			if (i915_seqno_passed(request->seqno, __req->seqno)) {
+				p = &parent->rb_right;
+				first = false;
+			} else
+				p = &parent->rb_left;
+		}
+		if (first)
+			be->first_waiter = request;
+
+		rb_link_node(&request->breadcrumb_node, parent, p);
+		rb_insert_color(&request->breadcrumb_node, &be->requests);
+	}
+	spin_unlock(&be->lock);
+
+	return first;
+}
+
+void intel_breadcrumbs_remove_waiter(struct drm_i915_gem_request *request)
+{
+	struct intel_breadcrumbs *b = &request->i915->breadcrumbs;
+	struct intel_breadcrumbs_engine *be = &b->engine[request->ring->id];
+
+	spin_lock(&be->lock);
+	if (--request->waiters_count == 0 &&
+	    !RB_EMPTY_NODE(&request->breadcrumb_node)) {
+		if (be->first_waiter == request)
+			be->first_waiter =
+				to_request(rb_next(&request->breadcrumb_node));
+		rb_erase(&request->breadcrumb_node, &be->requests);
+	}
+	spin_unlock(&be->lock);
+}
+
+int intel_breadcrumbs_init(struct drm_i915_private *i915)
+{
+	struct intel_breadcrumbs *b = &i915->breadcrumbs;
+	struct task_struct *task;
+	int i;
+
+	for (i = 0; i < I915_NUM_RINGS; i++)
+		spin_lock_init(&b->engine[i].lock);
+
+	task = kthread_run(intel_breadcrumbs_irq, i915, "irq/i915");
+	if (IS_ERR(task))
+		return PTR_ERR(task);
+
+	b->task = task;
+
+	return 0;
+}
+
+void intel_breadcrumbs_fini(struct drm_i915_private *i915)
+{
+	struct intel_breadcrumbs *b = &i915->breadcrumbs;
+
+	if (b->task == NULL)
+		return;
+
+	kthread_stop(b->task);
+	b->task = NULL;
+}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index b40ffc3607e7..604c081b0a0f 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1922,10 +1922,10 @@  static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *rin
 	ring->buffer = NULL;
 
 	ring->dev = dev;
+	ring->i915 = to_i915(dev);
 	INIT_LIST_HEAD(&ring->active_list);
 	INIT_LIST_HEAD(&ring->request_list);
 	i915_gem_batch_pool_init(dev, &ring->batch_pool);
-	init_waitqueue_head(&ring->irq_queue);
 
 	INIT_LIST_HEAD(&ring->buffers);
 	INIT_LIST_HEAD(&ring->execlist_queue);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 511efe556d73..5a5a7195dd54 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2157,6 +2157,7 @@  static int intel_init_ring_buffer(struct drm_device *dev,
 	WARN_ON(ring->buffer);
 
 	ring->dev = dev;
+	ring->i915 = to_i915(dev);
 	INIT_LIST_HEAD(&ring->active_list);
 	INIT_LIST_HEAD(&ring->request_list);
 	INIT_LIST_HEAD(&ring->execlist_queue);
@@ -2164,8 +2165,6 @@  static int intel_init_ring_buffer(struct drm_device *dev,
 	i915_gem_batch_pool_init(dev, &ring->batch_pool);
 	memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno));
 
-	init_waitqueue_head(&ring->irq_queue);
-
 	ringbuf = intel_engine_create_ringbuffer(ring, 32 * PAGE_SIZE);
 	if (IS_ERR(ringbuf))
 		return PTR_ERR(ringbuf);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 5d1eb206151d..49b5bded2767 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -157,6 +157,7 @@  struct  intel_engine_cs {
 #define LAST_USER_RING (VECS + 1)
 	u32		mmio_base;
 	struct		drm_device *dev;
+	struct drm_i915_private *i915;
 	struct intel_ringbuffer *buffer;
 	struct list_head buffers;
 
@@ -303,8 +304,6 @@  struct  intel_engine_cs {
 
 	bool gpu_caches_dirty;
 
-	wait_queue_head_t irq_queue;
-
 	struct intel_context *default_context;
 	struct intel_context *last_context;
 

Comments

On Mon, Nov 30, 2015 at 02:34:40PM +0000, Chris Wilson wrote:
> +bool intel_breadcrumbs_add_waiter(struct drm_i915_gem_request *request)
> +{
> +	struct intel_breadcrumbs *b = &request->i915->breadcrumbs;
> +	struct intel_breadcrumbs_engine *be = &b->engine[request->ring->id];
> +	bool first = false;
> +
> +	spin_lock(&be->lock);
> +	if (request->waiters_count++ == 0) {
> +		struct rb_node **p, *parent;
> +
> +		if (be->first_waiter == NULL)
> +			wake_up_process(b->task);

Oh, the irony.

Converting the other side to a lockless READ_ONCE() means that I do know
have to kick after setting be->first_waiter and not rely on the spinlock
serialising the reads.
-Chris
On Mon, Nov 30, 2015 at 04:30:57PM +0000, Chris Wilson wrote:
> On Mon, Nov 30, 2015 at 02:34:40PM +0000, Chris Wilson wrote:
> > +bool intel_breadcrumbs_add_waiter(struct drm_i915_gem_request *request)
> > +{
> > +	struct intel_breadcrumbs *b = &request->i915->breadcrumbs;
> > +	struct intel_breadcrumbs_engine *be = &b->engine[request->ring->id];
> > +	bool first = false;
> > +
> > +	spin_lock(&be->lock);
> > +	if (request->waiters_count++ == 0) {
> > +		struct rb_node **p, *parent;
> > +
> > +		if (be->first_waiter == NULL)
> > +			wake_up_process(b->task);
> 
> Oh, the irony.
> 
> Converting the other side to a lockless READ_ONCE() means that I do know

And that's the second mispelling of "now" today!
-Chris
On 30/11/15 14:34, Chris Wilson wrote:
> One particularly stressful scenario consists of many independent tasks
> all competing for GPU time and waiting upon the results (e.g. realtime
> transcoding of many, many streams). One bottleneck in particular is that
> each client waits on its own results, but every client is woken up after
> every batchbuffer - hence the thunder of hooves as then every client must
> do its heavyweight dance to read a coherent seqno to see if it is the
> lucky one. Alternatively, we can have one kthread responsible for waking
> after an interrupt, checking the seqno and only waking up the waiting
> clients who are complete. The disadvantage is that in the uncontended
> scenario (i.e. only one waiter) we incur an extra context switch in the
> wakeup path - though that should be mitigated somewhat by the busy-wait
> we do first before sleeping.

This discussion reminds me about an approach we took in [another OS], 
where the interrupt handler always just woke the first waiter, but that 
thread, if the wakeup wasn't of interest to itself, then did the extra 
work to figure out which other thread /should/ be woken. That both 
minimised latency for the single-waiter scenario, and avoided wake_all() 
from interrupt code in the multiple-waiter case. Oh, and IIRC we had a 
yield_to() in there so that the spuriously-woken first waiter went back 
to waiting and the correctly-woken thread immediately got to take over 
the CPU :)

I don't know how practical that would be inside Linux though ...

.Dave.