[39/41] drm/i915: Enable multiple timelines

Submitted by Chris Wilson on Oct. 14, 2016, 12:18 p.m.

Details

Message ID 20161014121833.439-40-chris@chris-wilson.co.uk
State New
Headers show
Series "Series without cover letter" ( rev: 1 ) in Intel GFX

Browsing this patch as part of:
"Series without cover letter" rev 1 in Intel GFX
<< prev patch [39/41] next patch >>

Commit Message

Chris Wilson Oct. 14, 2016, 12:18 p.m.
With the infrastructure converted over to tracking multiple timelines in
the GEM API whilst preserving the efficiency of using a single execution
timeline internally, we can now assign a separate timeline to every
context with full-ppgtt.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h          | 10 ++++++
 drivers/gpu/drm/i915/i915_gem.c          | 10 +++---
 drivers/gpu/drm/i915/i915_gem_context.c  |  4 +--
 drivers/gpu/drm/i915/i915_gem_evict.c    | 11 +++---
 drivers/gpu/drm/i915/i915_gem_gtt.c      | 19 ++++++----
 drivers/gpu/drm/i915/i915_gem_gtt.h      |  4 ++-
 drivers/gpu/drm/i915/i915_gem_request.c  | 59 +++++++++++++++++---------------
 drivers/gpu/drm/i915/i915_gem_timeline.c |  1 +
 drivers/gpu/drm/i915/i915_gem_timeline.h |  3 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h  |  5 ---
 10 files changed, 76 insertions(+), 50 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4de46a69cca8..bfb67842af23 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3529,6 +3529,16 @@  static inline void i915_gem_context_put(struct i915_gem_context *ctx)
 	kref_put(&ctx->ref, i915_gem_context_free);
 }
 
+static inline struct intel_timeline *
+i915_gem_context_lookup_timeline(struct i915_gem_context *ctx,
+				 struct intel_engine_cs *engine)
+{
+	struct i915_address_space *vm;
+
+	vm = ctx->ppgtt ? &ctx->ppgtt->base : &ctx->i915->ggtt.base;
+	return &vm->timeline.engine[engine->id];
+}
+
 static inline bool i915_gem_context_is_default(const struct i915_gem_context *c)
 {
 	return c->user_handle == DEFAULT_CONTEXT_HANDLE;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7064d092523e..4f8858d8abe2 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2576,12 +2576,9 @@  i915_gem_find_active_request(struct intel_engine_cs *engine)
 	 * not need an engine->irq_seqno_barrier() before the seqno reads.
 	 */
 	list_for_each_entry(request, &engine->timeline->requests, link) {
-		if (i915_gem_request_completed(request))
+		if (__i915_gem_request_completed(request))
 			continue;
 
-		if (!i915_sw_fence_done(&request->submit))
-			break;
-
 		return request;
 	}
 
@@ -2609,6 +2606,7 @@  static void i915_gem_reset_engine(struct intel_engine_cs *engine)
 {
 	struct drm_i915_gem_request *request;
 	struct i915_gem_context *incomplete_ctx;
+	struct intel_timeline *timeline;
 	bool ring_hung;
 
 	if (engine->irq_seqno_barrier)
@@ -2647,6 +2645,10 @@  static void i915_gem_reset_engine(struct intel_engine_cs *engine)
 	list_for_each_entry_continue(request, &engine->timeline->requests, link)
 		if (request->ctx == incomplete_ctx)
 			reset_request(request);
+
+	timeline = i915_gem_context_lookup_timeline(incomplete_ctx, engine);
+	list_for_each_entry(request, &timeline->requests, link)
+		reset_request(request);
 }
 
 void i915_gem_reset(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index d3118db244c4..461aece6c5bd 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -365,9 +365,9 @@  i915_gem_create_context(struct drm_device *dev,
 		return ctx;
 
 	if (USES_FULL_PPGTT(dev)) {
-		struct i915_hw_ppgtt *ppgtt =
-			i915_ppgtt_create(to_i915(dev), file_priv);
+		struct i915_hw_ppgtt *ppgtt;
 
+		ppgtt = i915_ppgtt_create(to_i915(dev), file_priv, ctx->name);
 		if (IS_ERR(ppgtt)) {
 			DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n",
 					 PTR_ERR(ppgtt));
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 79b964152cd9..bd08814b015c 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -33,14 +33,17 @@ 
 #include "intel_drv.h"
 #include "i915_trace.h"
 
-static bool
-gpu_is_idle(struct drm_i915_private *dev_priv)
+static bool ggtt_is_idle(struct drm_i915_private *dev_priv)
 {
+	struct i915_ggtt *ggtt = &dev_priv->ggtt;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
 	for_each_engine(engine, dev_priv, id) {
-		if (intel_engine_is_active(engine))
+		struct intel_timeline *tl;
+
+		tl = &ggtt->base.timeline.engine[engine->id];
+		if (i915_gem_active_isset(&tl->last_request))
 			return false;
 	}
 
@@ -154,7 +157,7 @@  search_again:
 	if (!i915_is_ggtt(vm) || flags & PIN_NONBLOCK)
 		return -ENOSPC;
 
-	if (gpu_is_idle(dev_priv)) {
+	if (ggtt_is_idle(dev_priv)) {
 		/* If we still have pending pageflip completions, drop
 		 * back to userspace to give our workqueues time to
 		 * acquire our locks and unpin the old scanouts.
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 1b1a459e2b68..e7afad585929 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2185,8 +2185,10 @@  static int __hw_ppgtt_init(struct i915_hw_ppgtt *ppgtt,
 }
 
 static void i915_address_space_init(struct i915_address_space *vm,
-				    struct drm_i915_private *dev_priv)
+				    struct drm_i915_private *dev_priv,
+				    const char *name)
 {
+	i915_gem_timeline_init(dev_priv, &vm->timeline, name);
 	drm_mm_init(&vm->mm, vm->start, vm->total);
 	INIT_LIST_HEAD(&vm->active_list);
 	INIT_LIST_HEAD(&vm->inactive_list);
@@ -2215,14 +2217,15 @@  static void gtt_write_workarounds(struct drm_device *dev)
 
 static int i915_ppgtt_init(struct i915_hw_ppgtt *ppgtt,
 			   struct drm_i915_private *dev_priv,
-			   struct drm_i915_file_private *file_priv)
+			   struct drm_i915_file_private *file_priv,
+			   const char *name)
 {
 	int ret;
 
 	ret = __hw_ppgtt_init(ppgtt, dev_priv);
 	if (ret == 0) {
 		kref_init(&ppgtt->ref);
-		i915_address_space_init(&ppgtt->base, dev_priv);
+		i915_address_space_init(&ppgtt->base, dev_priv, name);
 		ppgtt->base.file = file_priv;
 	}
 
@@ -2258,7 +2261,8 @@  int i915_ppgtt_init_hw(struct drm_device *dev)
 
 struct i915_hw_ppgtt *
 i915_ppgtt_create(struct drm_i915_private *dev_priv,
-		  struct drm_i915_file_private *fpriv)
+		  struct drm_i915_file_private *fpriv,
+		  const char *name)
 {
 	struct i915_hw_ppgtt *ppgtt;
 	int ret;
@@ -2267,7 +2271,7 @@  i915_ppgtt_create(struct drm_i915_private *dev_priv,
 	if (!ppgtt)
 		return ERR_PTR(-ENOMEM);
 
-	ret = i915_ppgtt_init(ppgtt, dev_priv, fpriv);
+	ret = i915_ppgtt_init(ppgtt, dev_priv, fpriv, name);
 	if (ret) {
 		kfree(ppgtt);
 		return ERR_PTR(ret);
@@ -2290,6 +2294,7 @@  void  i915_ppgtt_release(struct kref *kref)
 	WARN_ON(!list_empty(&ppgtt->base.inactive_list));
 	WARN_ON(!list_empty(&ppgtt->base.unbound_list));
 
+	i915_gem_timeline_fini(&ppgtt->base.timeline);
 	list_del(&ppgtt->base.global_link);
 	drm_mm_takedown(&ppgtt->base.mm);
 
@@ -3232,11 +3237,13 @@  int i915_ggtt_init_hw(struct drm_i915_private *dev_priv)
 	/* Subtract the guard page before address space initialization to
 	 * shrink the range used by drm_mm.
 	 */
+	mutex_lock(&dev_priv->drm.struct_mutex);
 	ggtt->base.total -= PAGE_SIZE;
-	i915_address_space_init(&ggtt->base, dev_priv);
+	i915_address_space_init(&ggtt->base, dev_priv, "[global]");
 	ggtt->base.total += PAGE_SIZE;
 	if (!HAS_LLC(dev_priv))
 		ggtt->base.mm.color_adjust = i915_gtt_color_adjust;
+	mutex_unlock(&dev_priv->drm.struct_mutex);
 
 	if (!io_mapping_init_wc(&dev_priv->ggtt.mappable,
 				dev_priv->ggtt.mappable_base,
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 9f0327e5176a..518e75b64290 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -342,6 +342,7 @@  struct i915_pml4 {
 
 struct i915_address_space {
 	struct drm_mm mm;
+	struct i915_gem_timeline timeline;
 	struct drm_device *dev;
 	/* Every address space belongs to a struct file - except for the global
 	 * GTT that is owned by the driver (and so @file is set to NULL). In
@@ -613,7 +614,8 @@  void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv);
 int i915_ppgtt_init_hw(struct drm_device *dev);
 void i915_ppgtt_release(struct kref *kref);
 struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_i915_private *dev_priv,
-					struct drm_i915_file_private *fpriv);
+					struct drm_i915_file_private *fpriv,
+					const char *name);
 static inline void i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt)
 {
 	if (ppgtt)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index c54fee57c8fb..a7b69ae0b673 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -34,12 +34,6 @@  static const char *i915_fence_get_driver_name(struct fence *fence)
 
 static const char *i915_fence_get_timeline_name(struct fence *fence)
 {
-	/* Timelines are bound by eviction to a VM. However, since
-	 * we only have a global seqno at the moment, we only have
-	 * a single timeline. Note that each timeline will have
-	 * multiple execution contexts (fence contexts) as we allow
-	 * engines within a single timeline to execute in parallel.
-	 */
 	return to_request(fence)->timeline->common->name;
 }
 
@@ -64,18 +58,6 @@  static signed long i915_fence_wait(struct fence *fence,
 	return i915_wait_request(to_request(fence), interruptible, timeout);
 }
 
-static void i915_fence_value_str(struct fence *fence, char *str, int size)
-{
-	snprintf(str, size, "%u", fence->seqno);
-}
-
-static void i915_fence_timeline_value_str(struct fence *fence, char *str,
-					  int size)
-{
-	snprintf(str, size, "%u",
-		 intel_engine_get_seqno(to_request(fence)->engine));
-}
-
 static void i915_fence_release(struct fence *fence)
 {
 	struct drm_i915_gem_request *req = to_request(fence);
@@ -90,8 +72,6 @@  const struct fence_ops i915_fence_ops = {
 	.signaled = i915_fence_signaled,
 	.wait = i915_fence_wait,
 	.release = i915_fence_release,
-	.fence_value_str = i915_fence_value_str,
-	.timeline_value_str = i915_fence_timeline_value_str,
 };
 
 int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
@@ -147,7 +127,10 @@  static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 	GEM_BUG_ON(!i915_gem_request_completed(request));
 
 	trace_i915_gem_request_retire(request);
+
+	spin_lock_irq(&request->engine->timeline->lock);
 	list_del_init(&request->link);
+	spin_unlock_irq(&request->engine->timeline->lock);
 
 	/* We know the GPU must have read the request to have
 	 * sent us the seqno + interrupt, so use the position
@@ -313,6 +296,12 @@  static int reserve_global_seqno(struct drm_i915_private *i915)
 	return 0;
 }
 
+static u32 __timeline_get_seqno(struct i915_gem_timeline *tl)
+{
+	/* next_seqno only incremented under a mutex */
+	return ++tl->next_seqno.counter;
+}
+
 static u32 timeline_get_seqno(struct i915_gem_timeline *tl)
 {
 	return atomic_inc_return(&tl->next_seqno);
@@ -325,6 +314,7 @@  submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 		container_of(fence, typeof(*request), submit);
 	struct intel_engine_cs *engine = request->engine;
 	struct intel_timeline *timeline;
+	unsigned long flags;
 	u32 seqno;
 
 	if (state != FENCE_COMPLETE)
@@ -332,9 +322,12 @@  submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 
 	/* Will be called from irq-context when using foreign DMA fences */
 
-	timeline = request->timeline;
+	timeline = engine->timeline;
+	GEM_BUG_ON(timeline == request->timeline);
 
-	seqno = request->fence.seqno;
+	spin_lock_irqsave(&timeline->lock, flags);
+
+	seqno = timeline_get_seqno(timeline->common);
 	GEM_BUG_ON(!seqno);
 	GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno));
 
@@ -353,6 +346,12 @@  submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 				request->ring->vaddr + request->postfix);
 	engine->submit_request(request);
 
+	spin_lock_nested(&request->timeline->lock, SINGLE_DEPTH_NESTING);
+	list_move_tail(&request->link, &timeline->requests);
+	spin_unlock(&request->timeline->lock);
+
+	spin_unlock_irqrestore(&timeline->lock, flags);
+
 	return NOTIFY_DONE;
 }
 
@@ -393,7 +392,7 @@  i915_gem_request_alloc(struct intel_engine_cs *engine,
 	/* Move the oldest request to the slab-cache (if not in use!) */
 	req = list_first_entry_or_null(&engine->timeline->requests,
 				       typeof(*req), link);
-	if (req && i915_gem_request_completed(req))
+	if (req && __i915_gem_request_completed(req))
 		i915_gem_request_retire(req);
 
 	/* Beware: Dragons be flying overhead.
@@ -430,14 +429,15 @@  i915_gem_request_alloc(struct intel_engine_cs *engine,
 		goto err_unreserve;
 	}
 
-	req->timeline = engine->timeline;
+	req->timeline = i915_gem_context_lookup_timeline(ctx, engine);
+	GEM_BUG_ON(req->timeline == engine->timeline);
 
 	spin_lock_init(&req->lock);
 	fence_init(&req->fence,
 		   &i915_fence_ops,
 		   &req->lock,
 		   req->timeline->fence_context,
-		   timeline_get_seqno(req->timeline->common));
+		   __timeline_get_seqno(req->timeline->common));
 
 	i915_sw_fence_init(&req->submit, submit_notify);
 
@@ -721,9 +721,14 @@  void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 		i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
 					     &request->submitq);
 
+	spin_lock_irq(&timeline->lock);
 	list_add_tail(&request->link, &timeline->requests);
+	spin_unlock_irq(&timeline->lock);
+
+	GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno,
+				     request->fence.seqno));
 
-	timeline->last_pending_seqno = request->fence.seqno;
+	timeline->last_submitted_seqno = request->fence.seqno;
 	i915_gem_active_set(&timeline->last_request, request);
 
 	list_add_tail(&request->ring_link, &ring->request_list);
@@ -987,7 +992,7 @@  static void engine_retire_requests(struct intel_engine_cs *engine)
 
 	list_for_each_entry_safe(request, next,
 				 &engine->timeline->requests, link) {
-		if (!i915_gem_request_completed(request))
+		if (!__i915_gem_request_completed(request))
 			return;
 
 		i915_gem_request_retire(request);
diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.c b/drivers/gpu/drm/i915/i915_gem_timeline.c
index a4579c109066..40d9f009673f 100644
--- a/drivers/gpu/drm/i915/i915_gem_timeline.c
+++ b/drivers/gpu/drm/i915/i915_gem_timeline.c
@@ -48,6 +48,7 @@  int i915_gem_timeline_init(struct drm_i915_private *i915,
 		tl->fence_context = fences++;
 		tl->common = timeline;
 
+		spin_lock_init(&tl->lock);
 		init_request_active(&tl->last_request, NULL);
 		INIT_LIST_HEAD(&tl->requests);
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h
index 18e603980dd9..f2bf7b1d49a1 100644
--- a/drivers/gpu/drm/i915/i915_gem_timeline.h
+++ b/drivers/gpu/drm/i915/i915_gem_timeline.h
@@ -34,7 +34,8 @@  struct i915_gem_timeline;
 struct intel_timeline {
 	u64 fence_context;
 	u32 last_submitted_seqno;
-	u32 last_pending_seqno;
+
+	spinlock_t lock;
 
 	/**
 	 * List of breadcrumbs associated with GPU requests currently
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 891629caab6c..dcb145c67e89 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -569,9 +569,4 @@  void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
 unsigned int intel_kick_waiters(struct drm_i915_private *i915);
 unsigned int intel_kick_signalers(struct drm_i915_private *i915);
 
-static inline bool intel_engine_is_active(struct intel_engine_cs *engine)
-{
-	return i915_gem_active_isset(&engine->timeline->last_request);
-}
-
 #endif /* _INTEL_RINGBUFFER_H_ */

Comments

On pe, 2016-10-14 at 13:18 +0100, Chris Wilson wrote:
> With the infrastructure converted over to tracking multiple timelines in
> the GEM API whilst preserving the efficiency of using a single execution
> timeline internally, we can now assign a separate timeline to every
> context with full-ppgtt.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Changelog would be nice, but seems to address the major issues.

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
On Thu, Oct 20, 2016 at 06:26:04PM +0300, Joonas Lahtinen wrote:
> On pe, 2016-10-14 at 13:18 +0100, Chris Wilson wrote:
> > With the infrastructure converted over to tracking multiple timelines in
> > the GEM API whilst preserving the efficiency of using a single execution
> > timeline internally, we can now assign a separate timeline to every
> > context with full-ppgtt.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> 
> Changelog would be nice, but seems to address the major issues.

    v2: Add a comment to indicate the xfer between timelines upon
    submission.

? Are we on the right thread?

Most the earliest issues where addressed by doing them before we got to
the enabling patch.
-Chris