[v9,1/6] drm/i915: Add per context timelines for fence objects

Submitted by John Harrison on June 1, 2016, 5:07 p.m.

Details

Message ID 1464800848-36672-2-git-send-email-John.C.Harrison@Intel.com
State New
Headers show
Series "Convert requests to use struct fence" ( rev: 6 ) in Intel GFX

Not browsing as part of any series.

Commit Message

John Harrison June 1, 2016, 5:07 p.m.
From: John Harrison <John.C.Harrison@Intel.com>

The purpose of this patch series is to convert the requst structure to
use fence objects for the underlying completion tracking. The fence
object requires a sequence number. The ultimate aim is to use the same
sequence number as for the request itself (or rather, to remove the
request's seqno field and just use the fence's value throughout the
driver). However, this is not currently possible and so this patch
introduces a separate numbering scheme as an intermediate step.

A major advantage of using the fence object is that it can be passed
outside of the i915 driver and used externally. The fence API allows
for various operations such as combining multiple fences. This
requires that fence seqnos within a single fence context be guaranteed
in-order. The GPU scheduler that is coming can re-order request
execution but not within a single GPU context. Thus the fence context
must be tied to the i915 context (and the engine within the context as
each engine runs asynchronously).

On the other hand, the driver as a whole currently only works with
request seqnos that are allocated from a global in-order timeline. It
will require a fair chunk of re-work to allow multiple independent
seqno timelines to be used. Hence the introduction of a temporary,
fence specific timeline. Once the work to update the rest of the
driver has been completed then the request can use the fence seqno
instead.

v2: New patch in series.

v3: Renamed/retyped timeline structure fields after review comments by
Tvrtko Ursulin.

Added context information to the timeline's name string for better
identification in debugfs output.

v5: Line wrapping and other white space fixes to keep style checker
happy.

v7: Updated to newer nightly (lots of ring -> engine renaming).

v8: Moved to earlier in patch series so no longer needs to remove the
quick hack timeline that was being added before.

v9: Updated to another newer nightly (changes to context structure
naming). Also updated commit message to match previous changes.

For: VIZ-5190
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h         | 14 ++++++++++++
 drivers/gpu/drm/i915/i915_gem.c         | 40 +++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_context.c | 16 +++++++++++++
 drivers/gpu/drm/i915/intel_lrc.c        |  8 +++++++
 4 files changed, 78 insertions(+)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2a88a46..a5f8ad8 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -831,6 +831,19 @@  struct i915_ctx_hang_stats {
 	bool banned;
 };
 
+struct i915_fence_timeline {
+	char        name[32];
+	unsigned    fence_context;
+	unsigned    next;
+
+	struct i915_gem_context *ctx;
+	struct intel_engine_cs *engine;
+};
+
+int i915_create_fence_timeline(struct drm_device *dev,
+			       struct i915_gem_context *ctx,
+			       struct intel_engine_cs *ring);
+
 /* This must match up with the value previously used for execbuf2.rsvd1. */
 #define DEFAULT_CONTEXT_HANDLE 0
 
@@ -875,6 +888,7 @@  struct i915_gem_context {
 		u64 lrc_desc;
 		int pin_count;
 		bool initialised;
+		struct i915_fence_timeline fence_timeline;
 	} engine[I915_NUM_ENGINES];
 
 	struct list_head link;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5ffc6fa..57d3593 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2743,6 +2743,46 @@  void i915_gem_request_free(struct kref *req_ref)
 	kmem_cache_free(req->i915->requests, req);
 }
 
+int i915_create_fence_timeline(struct drm_device *dev,
+			       struct i915_gem_context *ctx,
+			       struct intel_engine_cs *engine)
+{
+	struct i915_fence_timeline *timeline;
+
+	timeline = &ctx->engine[engine->id].fence_timeline;
+
+	if (timeline->engine)
+		return 0;
+
+	timeline->fence_context = fence_context_alloc(1);
+
+	/*
+	 * Start the timeline from seqno 0 as this is a special value
+	 * that is reserved for invalid sync points.
+	 */
+	timeline->next       = 1;
+	timeline->ctx        = ctx;
+	timeline->engine     = engine;
+
+	snprintf(timeline->name, sizeof(timeline->name), "%d>%s:%d",
+		 timeline->fence_context, engine->name, ctx->user_handle);
+
+	return 0;
+}
+
+unsigned i915_fence_timeline_get_next_seqno(struct i915_fence_timeline *timeline)
+{
+	unsigned seqno;
+
+	seqno = timeline->next;
+
+	/* Reserve zero for invalid */
+	if (++timeline->next == 0)
+		timeline->next = 1;
+
+	return seqno;
+}
+
 static inline int
 __i915_gem_request_alloc(struct intel_engine_cs *engine,
 			 struct i915_gem_context *ctx,
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index d0e7fc6..07d8c63 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -320,6 +320,22 @@  i915_gem_create_context(struct drm_device *dev,
 	if (IS_ERR(ctx))
 		return ctx;
 
+	if (!i915.enable_execlists) {
+		struct intel_engine_cs *engine;
+
+		/* Create a per context timeline for fences */
+		for_each_engine(engine, to_i915(dev)) {
+			int ret = i915_create_fence_timeline(dev, ctx, engine);
+			if (ret) {
+				DRM_ERROR("Fence timeline creation failed for legacy %s: %p\n",
+					  engine->name, ctx);
+				idr_remove(&file_priv->context_idr, ctx->user_handle);
+				i915_gem_context_unreference(ctx);
+				return ERR_PTR(ret);
+			}
+		}
+	}
+
 	if (USES_FULL_PPGTT(dev)) {
 		struct i915_hw_ppgtt *ppgtt = i915_ppgtt_create(dev, file_priv);
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 5c191a1..14bcfb7 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2496,6 +2496,14 @@  static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
 		goto error_ringbuf;
 	}
 
+	/* Create a per context timeline for fences */
+	ret = i915_create_fence_timeline(ctx->i915->dev, ctx, engine);
+	if (ret) {
+		DRM_ERROR("Fence timeline creation failed for engine %s, ctx %p\n",
+			  engine->name, ctx);
+		goto error_ringbuf;
+	}
+
 	ce->ringbuf = ringbuf;
 	ce->state = ctx_obj;
 	ce->initialised = engine->init_context == NULL;

Comments

On 01/06/16 18:07, John.C.Harrison@Intel.com wrote:
> From: John Harrison <John.C.Harrison@Intel.com>
>
> The purpose of this patch series is to convert the requst structure to
> use fence objects for the underlying completion tracking. The fence
> object requires a sequence number. The ultimate aim is to use the same
> sequence number as for the request itself (or rather, to remove the
> request's seqno field and just use the fence's value throughout the
> driver). However, this is not currently possible and so this patch
> introduces a separate numbering scheme as an intermediate step.
>
> A major advantage of using the fence object is that it can be passed
> outside of the i915 driver and used externally. The fence API allows
> for various operations such as combining multiple fences. This
> requires that fence seqnos within a single fence context be guaranteed
> in-order. The GPU scheduler that is coming can re-order request
> execution but not within a single GPU context. Thus the fence context
> must be tied to the i915 context (and the engine within the context as
> each engine runs asynchronously).
>
> On the other hand, the driver as a whole currently only works with
> request seqnos that are allocated from a global in-order timeline. It
> will require a fair chunk of re-work to allow multiple independent
> seqno timelines to be used. Hence the introduction of a temporary,
> fence specific timeline. Once the work to update the rest of the
> driver has been completed then the request can use the fence seqno
> instead.
>
> v2: New patch in series.
>
> v3: Renamed/retyped timeline structure fields after review comments by
> Tvrtko Ursulin.
>
> Added context information to the timeline's name string for better
> identification in debugfs output.
>
> v5: Line wrapping and other white space fixes to keep style checker
> happy.
>
> v7: Updated to newer nightly (lots of ring -> engine renaming).
>
> v8: Moved to earlier in patch series so no longer needs to remove the
> quick hack timeline that was being added before.
>
> v9: Updated to another newer nightly (changes to context structure
> naming). Also updated commit message to match previous changes.
>
> For: VIZ-5190
> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/i915_drv.h         | 14 ++++++++++++
>   drivers/gpu/drm/i915/i915_gem.c         | 40 +++++++++++++++++++++++++++++++++
>   drivers/gpu/drm/i915/i915_gem_context.c | 16 +++++++++++++
>   drivers/gpu/drm/i915/intel_lrc.c        |  8 +++++++
>   4 files changed, 78 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 2a88a46..a5f8ad8 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -831,6 +831,19 @@ struct i915_ctx_hang_stats {
>   	bool banned;
>   };
>
> +struct i915_fence_timeline {
> +	char        name[32];
> +	unsigned    fence_context;
> +	unsigned    next;
> +
> +	struct i915_gem_context *ctx;
> +	struct intel_engine_cs *engine;

Are these backpointers used in the patch series? I did a quick search 
with the "timeline->" string and did not find anything.

> +};
> +
> +int i915_create_fence_timeline(struct drm_device *dev,
> +			       struct i915_gem_context *ctx,
> +			       struct intel_engine_cs *ring);
> +
>   /* This must match up with the value previously used for execbuf2.rsvd1. */
>   #define DEFAULT_CONTEXT_HANDLE 0
>
> @@ -875,6 +888,7 @@ struct i915_gem_context {
>   		u64 lrc_desc;
>   		int pin_count;
>   		bool initialised;
> +		struct i915_fence_timeline fence_timeline;
>   	} engine[I915_NUM_ENGINES];
>
>   	struct list_head link;
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 5ffc6fa..57d3593 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2743,6 +2743,46 @@ void i915_gem_request_free(struct kref *req_ref)
>   	kmem_cache_free(req->i915->requests, req);
>   }
>
> +int i915_create_fence_timeline(struct drm_device *dev,

dev is not used in the function. Maybe it will be in later patches? In 
which case I think dev_priv is the current bkm for i915 
specific/internal code.

> +			       struct i915_gem_context *ctx,
> +			       struct intel_engine_cs *engine)
> +{
> +	struct i915_fence_timeline *timeline;
> +
> +	timeline = &ctx->engine[engine->id].fence_timeline;
> +
> +	if (timeline->engine)
> +		return 0;

Is this an expected case? Unless I am missing something it shouldn't be 
so maybe a WARN_ON would be warranted?

> +
> +	timeline->fence_context = fence_context_alloc(1);
> +
> +	/*
> +	 * Start the timeline from seqno 0 as this is a special value
> +	 * that is reserved for invalid sync points.
> +	 */

Comment and init to 1 below look in disagreement. Maybe comment should 
be something like "Start the timeline from seqno 1 as 0 is a special 
value.." ?

> +	timeline->next       = 1;
> +	timeline->ctx        = ctx;
> +	timeline->engine     = engine;
> +
> +	snprintf(timeline->name, sizeof(timeline->name), "%d>%s:%d",
> +		 timeline->fence_context, engine->name, ctx->user_handle);
> +

For rings like "video enhancement ring" name is 22 chars on its own, 
leaving only 9 for the two integers. If a lot of contexts or long 
runtime I suppose that could overflow. It is a bit of a stretch but 
perhaps 32 is not enough, maybe available space for the name should be 
better defined as longest ring name (with a comment) plus maximum for 
two integers.

I think timeline->name is only for debug but still feels better to make 
sure it will fit rather than truncate it.

And we should proably just shorten the "video enhancement ring" to "vecs 
ring"...

> +	return 0;
> +}
> +
> +unsigned i915_fence_timeline_get_next_seqno(struct i915_fence_timeline *timeline)

It is strange to add a public function in this patch which is even 
unused especially since the following patch makes it private.

Would it make more sense for it to be static straight away and maybe 
even called from __i915_gem_request_alloc unconditionally so that the 
patch does not add dead code?

Don't know, not terribly important but would perhaps look more logical 
as a patch series.

> +{
> +	unsigned seqno;
> +
> +	seqno = timeline->next;
> +
> +	/* Reserve zero for invalid */
> +	if (++timeline->next == 0)
> +		timeline->next = 1;
> +
> +	return seqno;
> +}
> +
>   static inline int
>   __i915_gem_request_alloc(struct intel_engine_cs *engine,
>   			 struct i915_gem_context *ctx,
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index d0e7fc6..07d8c63 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -320,6 +320,22 @@ i915_gem_create_context(struct drm_device *dev,
>   	if (IS_ERR(ctx))
>   		return ctx;
>
> +	if (!i915.enable_execlists) {
> +		struct intel_engine_cs *engine;
> +
> +		/* Create a per context timeline for fences */
> +		for_each_engine(engine, to_i915(dev)) {
> +			int ret = i915_create_fence_timeline(dev, ctx, engine);
> +			if (ret) {
> +				DRM_ERROR("Fence timeline creation failed for legacy %s: %p\n",
> +					  engine->name, ctx);
> +				idr_remove(&file_priv->context_idr, ctx->user_handle);
> +				i915_gem_context_unreference(ctx);
> +				return ERR_PTR(ret);
> +			}
> +		}
> +	}
> +
>   	if (USES_FULL_PPGTT(dev)) {
>   		struct i915_hw_ppgtt *ppgtt = i915_ppgtt_create(dev, file_priv);
>
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 5c191a1..14bcfb7 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -2496,6 +2496,14 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
>   		goto error_ringbuf;
>   	}
>
> +	/* Create a per context timeline for fences */
> +	ret = i915_create_fence_timeline(ctx->i915->dev, ctx, engine);
> +	if (ret) {
> +		DRM_ERROR("Fence timeline creation failed for engine %s, ctx %p\n",

"engine %s" will log something like "engine render ring" which will be 
weird.

Also pointer to the context is not that interesting as DRM_ERROR I 
think. ctxc->user_handle instead? Same in the legacy mode.

> +			  engine->name, ctx);
> +		goto error_ringbuf;
> +	}
> +
>   	ce->ringbuf = ringbuf;
>   	ce->state = ctx_obj;
>   	ce->initialised = engine->init_context == NULL;
>

So in summary just some minor things, otherwise it looks OK I think.

Regards,

Tvrtko
Op 01-06-16 om 19:07 schreef John.C.Harrison@Intel.com:
> From: John Harrison <John.C.Harrison@Intel.com>
>
> The purpose of this patch series is to convert the requst structure to
> use fence objects for the underlying completion tracking. The fence
> object requires a sequence number. The ultimate aim is to use the same
> sequence number as for the request itself (or rather, to remove the
> request's seqno field and just use the fence's value throughout the
> driver). However, this is not currently possible and so this patch
> introduces a separate numbering scheme as an intermediate step.
>
> A major advantage of using the fence object is that it can be passed
> outside of the i915 driver and used externally. The fence API allows
> for various operations such as combining multiple fences. This
> requires that fence seqnos within a single fence context be guaranteed
> in-order. The GPU scheduler that is coming can re-order request
> execution but not within a single GPU context. Thus the fence context
> must be tied to the i915 context (and the engine within the context as
> each engine runs asynchronously).
>
> On the other hand, the driver as a whole currently only works with
> request seqnos that are allocated from a global in-order timeline. It
> will require a fair chunk of re-work to allow multiple independent
> seqno timelines to be used. Hence the introduction of a temporary,
> fence specific timeline. Once the work to update the rest of the
> driver has been completed then the request can use the fence seqno
> instead.
>
> v2: New patch in series.
>
> v3: Renamed/retyped timeline structure fields after review comments by
> Tvrtko Ursulin.
>
> Added context information to the timeline's name string for better
> identification in debugfs output.
>
> v5: Line wrapping and other white space fixes to keep style checker
> happy.
>
> v7: Updated to newer nightly (lots of ring -> engine renaming).
>
> v8: Moved to earlier in patch series so no longer needs to remove the
> quick hack timeline that was being added before.
>
> v9: Updated to another newer nightly (changes to context structure
> naming). Also updated commit message to match previous changes.
>
> For: VIZ-5190
> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h         | 14 ++++++++++++
>  drivers/gpu/drm/i915/i915_gem.c         | 40 +++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_gem_context.c | 16 +++++++++++++
>  drivers/gpu/drm/i915/intel_lrc.c        |  8 +++++++
>  4 files changed, 78 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 2a88a46..a5f8ad8 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -831,6 +831,19 @@ struct i915_ctx_hang_stats {
>  	bool banned;
>  };
>  
> +struct i915_fence_timeline {
> +	char        name[32];
> +	unsigned    fence_context;
Should be a u64 now, since commit 76bf0db5543976ef50362db7071da367cb118532
> +	unsigned    next;
> +
> +	struct i915_gem_context *ctx;
> +	struct intel_engine_cs *engine;
> +};
> +
> +int i915_create_fence_timeline(struct drm_device *dev,
> +			       struct i915_gem_context *ctx,
> +			       struct intel_engine_cs *ring);
> +
>  /* This must match up with the value previously used for execbuf2.rsvd1. */
>  #define DEFAULT_CONTEXT_HANDLE 0
>  
> @@ -875,6 +888,7 @@ struct i915_gem_context {
>  		u64 lrc_desc;
>  		int pin_count;
>  		bool initialised;
> +		struct i915_fence_timeline fence_timeline;
>  	} engine[I915_NUM_ENGINES];
>  
>  	struct list_head link;
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 5ffc6fa..57d3593 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2743,6 +2743,46 @@ void i915_gem_request_free(struct kref *req_ref)
>  	kmem_cache_free(req->i915->requests, req);
>  }
>  
> +int i915_create_fence_timeline(struct drm_device *dev,
> +			       struct i915_gem_context *ctx,
> +			       struct intel_engine_cs *engine)
> +{
> +	struct i915_fence_timeline *timeline;
> +
> +	timeline = &ctx->engine[engine->id].fence_timeline;
> +
> +	if (timeline->engine)
> +		return 0;
Do you ever expect a reinit?
> +	timeline->fence_context = fence_context_alloc(1);
> +
> +	/*
> +	 * Start the timeline from seqno 0 as this is a special value
> +	 * that is reserved for invalid sync points.
> +	 */
> +	timeline->next       = 1;
> +	timeline->ctx        = ctx;
> +	timeline->engine     = engine;
> +
> +	snprintf(timeline->name, sizeof(timeline->name), "%d>%s:%d",
> +		 timeline->fence_context, engine->name, ctx->user_handle);
> +
> +	return 0;
> +}
> +
On top of the other comments, you might want to add a TODO comment that there should be only one timeline for each context,
with each engine having only a unique fence->context.

~Maarten
On 02/06/2016 11:28, Tvrtko Ursulin wrote:
>
> On 01/06/16 18:07, John.C.Harrison@Intel.com wrote:
>> From: John Harrison <John.C.Harrison@Intel.com>
>>
>> The purpose of this patch series is to convert the requst structure to
>> use fence objects for the underlying completion tracking. The fence
>> object requires a sequence number. The ultimate aim is to use the same
>> sequence number as for the request itself (or rather, to remove the
>> request's seqno field and just use the fence's value throughout the
>> driver). However, this is not currently possible and so this patch
>> introduces a separate numbering scheme as an intermediate step.
>>
>> A major advantage of using the fence object is that it can be passed
>> outside of the i915 driver and used externally. The fence API allows
>> for various operations such as combining multiple fences. This
>> requires that fence seqnos within a single fence context be guaranteed
>> in-order. The GPU scheduler that is coming can re-order request
>> execution but not within a single GPU context. Thus the fence context
>> must be tied to the i915 context (and the engine within the context as
>> each engine runs asynchronously).
>>
>> On the other hand, the driver as a whole currently only works with
>> request seqnos that are allocated from a global in-order timeline. It
>> will require a fair chunk of re-work to allow multiple independent
>> seqno timelines to be used. Hence the introduction of a temporary,
>> fence specific timeline. Once the work to update the rest of the
>> driver has been completed then the request can use the fence seqno
>> instead.
>>
>> v2: New patch in series.
>>
>> v3: Renamed/retyped timeline structure fields after review comments by
>> Tvrtko Ursulin.
>>
>> Added context information to the timeline's name string for better
>> identification in debugfs output.
>>
>> v5: Line wrapping and other white space fixes to keep style checker
>> happy.
>>
>> v7: Updated to newer nightly (lots of ring -> engine renaming).
>>
>> v8: Moved to earlier in patch series so no longer needs to remove the
>> quick hack timeline that was being added before.
>>
>> v9: Updated to another newer nightly (changes to context structure
>> naming). Also updated commit message to match previous changes.
>>
>> For: VIZ-5190
>> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_drv.h         | 14 ++++++++++++
>>   drivers/gpu/drm/i915/i915_gem.c         | 40 
>> +++++++++++++++++++++++++++++++++
>>   drivers/gpu/drm/i915/i915_gem_context.c | 16 +++++++++++++
>>   drivers/gpu/drm/i915/intel_lrc.c        |  8 +++++++
>>   4 files changed, 78 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h 
>> b/drivers/gpu/drm/i915/i915_drv.h
>> index 2a88a46..a5f8ad8 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -831,6 +831,19 @@ struct i915_ctx_hang_stats {
>>       bool banned;
>>   };
>>
>> +struct i915_fence_timeline {
>> +    char        name[32];
>> +    unsigned    fence_context;
>> +    unsigned    next;
>> +
>> +    struct i915_gem_context *ctx;
>> +    struct intel_engine_cs *engine;
>
> Are these backpointers used in the patch series? I did a quick search 
> with the "timeline->" string and did not find anything.
Hmm, not any more it seems. Will remove them.

>
>> +};
>> +
>> +int i915_create_fence_timeline(struct drm_device *dev,
>> +                   struct i915_gem_context *ctx,
>> +                   struct intel_engine_cs *ring);
>> +
>>   /* This must match up with the value previously used for 
>> execbuf2.rsvd1. */
>>   #define DEFAULT_CONTEXT_HANDLE 0
>>
>> @@ -875,6 +888,7 @@ struct i915_gem_context {
>>           u64 lrc_desc;
>>           int pin_count;
>>           bool initialised;
>> +        struct i915_fence_timeline fence_timeline;
>>       } engine[I915_NUM_ENGINES];
>>
>>       struct list_head link;
>> diff --git a/drivers/gpu/drm/i915/i915_gem.c 
>> b/drivers/gpu/drm/i915/i915_gem.c
>> index 5ffc6fa..57d3593 100644
>> --- a/drivers/gpu/drm/i915/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>> @@ -2743,6 +2743,46 @@ void i915_gem_request_free(struct kref *req_ref)
>>       kmem_cache_free(req->i915->requests, req);
>>   }
>>
>> +int i915_create_fence_timeline(struct drm_device *dev,
>
> dev is not used in the function. Maybe it will be in later patches? In 
> which case I think dev_priv is the current bkm for i915 
> specific/internal code.
Again, it is left over from a previous implementation and could actually 
be removed.

>
>> +                   struct i915_gem_context *ctx,
>> +                   struct intel_engine_cs *engine)
>> +{
>> +    struct i915_fence_timeline *timeline;
>> +
>> +    timeline = &ctx->engine[engine->id].fence_timeline;
>> +
>> +    if (timeline->engine)
>> +        return 0;
>
> Is this an expected case? Unless I am missing something it shouldn't 
> be so maybe a WARN_ON would be warranted?
No. Will make it a WARN instead.

>
>> +
>> +    timeline->fence_context = fence_context_alloc(1);
>> +
>> +    /*
>> +     * Start the timeline from seqno 0 as this is a special value
>> +     * that is reserved for invalid sync points.
>> +     */
>
> Comment and init to 1 below look in disagreement. Maybe comment should 
> be something like "Start the timeline from seqno 1 as 0 is a special 
> value.." ?
Hmm not sure what happened there. Will update the comment so it actually 
makes sense.

>
>> +    timeline->next       = 1;
>> +    timeline->ctx        = ctx;
>> +    timeline->engine     = engine;
>> +
>> +    snprintf(timeline->name, sizeof(timeline->name), "%d>%s:%d",
>> +         timeline->fence_context, engine->name, ctx->user_handle);
>> +
>
> For rings like "video enhancement ring" name is 22 chars on its own, 
> leaving only 9 for the two integers. If a lot of contexts or long 
> runtime I suppose that could overflow. It is a bit of a stretch but 
> perhaps 32 is not enough, maybe available space for the name should be 
> better defined as longest ring name (with a comment) plus maximum for 
> two integers.
>
> I think timeline->name is only for debug but still feels better to 
> make sure it will fit rather than truncate it.
Yes, it is only used for debug purposes (trace events and debugfs state 
dump). There is no 'maximum ring name length' value anywhere is there? 
And making it a dynamic allocation seems like excessive complication for 
a debug string. I could bump the static size up to 40 or maybe 64?

>
> And we should proably just shorten the "video enhancement ring" to 
> "vecs ring"...
I generally run with a local patch to convert all ring names to a TLA 
(RCS, BCS, etc.) just to make the debug output more readable.

>
>> +    return 0;
>> +}
>> +
>> +unsigned i915_fence_timeline_get_next_seqno(struct 
>> i915_fence_timeline *timeline)
>
> It is strange to add a public function in this patch which is even 
> unused especially since the following patch makes it private.
>
> Would it make more sense for it to be static straight away and maybe 
> even called from __i915_gem_request_alloc unconditionally so that the 
> patch does not add dead code?

It's the old question of how to split a series up into patches that 
don't do everything all at once but still make sense individually. I 
think it makes more sense to include the code in this prep patch as it 
is a rather fundamental part of the timeline code. However, as none of 
it is wired up yet it can't be made static. The next patch hooks up the 
timeline code into various places at which point it this function can 
become static because it is only used in the file. I could be done 
differently but this seemed like the most sensible option to me.

> Don't know, not terribly important but would perhaps look more logical 
> as a patch series.
>
>> +{
>> +    unsigned seqno;
>> +
>> +    seqno = timeline->next;
>> +
>> +    /* Reserve zero for invalid */
>> +    if (++timeline->next == 0)
>> +        timeline->next = 1;
>> +
>> +    return seqno;
>> +}
>> +
>>   static inline int
>>   __i915_gem_request_alloc(struct intel_engine_cs *engine,
>>                struct i915_gem_context *ctx,
>> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
>> b/drivers/gpu/drm/i915/i915_gem_context.c
>> index d0e7fc6..07d8c63 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_context.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
>> @@ -320,6 +320,22 @@ i915_gem_create_context(struct drm_device *dev,
>>       if (IS_ERR(ctx))
>>           return ctx;
>>
>> +    if (!i915.enable_execlists) {
>> +        struct intel_engine_cs *engine;
>> +
>> +        /* Create a per context timeline for fences */
>> +        for_each_engine(engine, to_i915(dev)) {
>> +            int ret = i915_create_fence_timeline(dev, ctx, engine);
>> +            if (ret) {
>> +                DRM_ERROR("Fence timeline creation failed for legacy 
>> %s: %p\n",
>> +                      engine->name, ctx);
>> +                idr_remove(&file_priv->context_idr, ctx->user_handle);
>> +                i915_gem_context_unreference(ctx);
>> +                return ERR_PTR(ret);
>> +            }
>> +        }
>> +    }
>> +
>>       if (USES_FULL_PPGTT(dev)) {
>>           struct i915_hw_ppgtt *ppgtt = i915_ppgtt_create(dev, 
>> file_priv);
>>
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
>> b/drivers/gpu/drm/i915/intel_lrc.c
>> index 5c191a1..14bcfb7 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> @@ -2496,6 +2496,14 @@ static int 
>> execlists_context_deferred_alloc(struct i915_gem_context *ctx,
>>           goto error_ringbuf;
>>       }
>>
>> +    /* Create a per context timeline for fences */
>> +    ret = i915_create_fence_timeline(ctx->i915->dev, ctx, engine);
>> +    if (ret) {
>> +        DRM_ERROR("Fence timeline creation failed for engine %s, ctx 
>> %p\n",
>
> "engine %s" will log something like "engine render ring" which will be 
> weird.
>
> Also pointer to the context is not that interesting as DRM_ERROR I 
> think. ctxc->user_handle instead? Same in the legacy mode.
Will drop the 'engine' and add in user_handle.

>
>> +              engine->name, ctx);
>> +        goto error_ringbuf;
>> +    }
>> +
>>       ce->ringbuf = ringbuf;
>>       ce->state = ctx_obj;
>>       ce->initialised = engine->init_context == NULL;
>>
>
> So in summary just some minor things, otherwise it looks OK I think.
>
> Regards,
>
> Tvrtko
>
>
On 07/06/2016 12:17, Maarten Lankhorst wrote:
> Op 01-06-16 om 19:07 schreef John.C.Harrison@Intel.com:
>> From: John Harrison <John.C.Harrison@Intel.com>
>>
>> The purpose of this patch series is to convert the requst structure to
>> use fence objects for the underlying completion tracking. The fence
>> object requires a sequence number. The ultimate aim is to use the same
>> sequence number as for the request itself (or rather, to remove the
>> request's seqno field and just use the fence's value throughout the
>> driver). However, this is not currently possible and so this patch
>> introduces a separate numbering scheme as an intermediate step.
>>
>> A major advantage of using the fence object is that it can be passed
>> outside of the i915 driver and used externally. The fence API allows
>> for various operations such as combining multiple fences. This
>> requires that fence seqnos within a single fence context be guaranteed
>> in-order. The GPU scheduler that is coming can re-order request
>> execution but not within a single GPU context. Thus the fence context
>> must be tied to the i915 context (and the engine within the context as
>> each engine runs asynchronously).
>>
>> On the other hand, the driver as a whole currently only works with
>> request seqnos that are allocated from a global in-order timeline. It
>> will require a fair chunk of re-work to allow multiple independent
>> seqno timelines to be used. Hence the introduction of a temporary,
>> fence specific timeline. Once the work to update the rest of the
>> driver has been completed then the request can use the fence seqno
>> instead.
>>
>> v2: New patch in series.
>>
>> v3: Renamed/retyped timeline structure fields after review comments by
>> Tvrtko Ursulin.
>>
>> Added context information to the timeline's name string for better
>> identification in debugfs output.
>>
>> v5: Line wrapping and other white space fixes to keep style checker
>> happy.
>>
>> v7: Updated to newer nightly (lots of ring -> engine renaming).
>>
>> v8: Moved to earlier in patch series so no longer needs to remove the
>> quick hack timeline that was being added before.
>>
>> v9: Updated to another newer nightly (changes to context structure
>> naming). Also updated commit message to match previous changes.
>>
>> For: VIZ-5190
>> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_drv.h         | 14 ++++++++++++
>>   drivers/gpu/drm/i915/i915_gem.c         | 40 +++++++++++++++++++++++++++++++++
>>   drivers/gpu/drm/i915/i915_gem_context.c | 16 +++++++++++++
>>   drivers/gpu/drm/i915/intel_lrc.c        |  8 +++++++
>>   4 files changed, 78 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index 2a88a46..a5f8ad8 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -831,6 +831,19 @@ struct i915_ctx_hang_stats {
>>   	bool banned;
>>   };
>>   
>> +struct i915_fence_timeline {
>> +	char        name[32];
>> +	unsigned    fence_context;
> Should be a u64 now, since commit 76bf0db5543976ef50362db7071da367cb118532
Yeah, that's newer than the tree these patches are based on. Will update 
and rebase...

>> +	unsigned    next;
>> +
>> +	struct i915_gem_context *ctx;
>> +	struct intel_engine_cs *engine;
>> +};
>> +
>> +int i915_create_fence_timeline(struct drm_device *dev,
>> +			       struct i915_gem_context *ctx,
>> +			       struct intel_engine_cs *ring);
>> +
>>   /* This must match up with the value previously used for execbuf2.rsvd1. */
>>   #define DEFAULT_CONTEXT_HANDLE 0
>>   
>> @@ -875,6 +888,7 @@ struct i915_gem_context {
>>   		u64 lrc_desc;
>>   		int pin_count;
>>   		bool initialised;
>> +		struct i915_fence_timeline fence_timeline;
>>   	} engine[I915_NUM_ENGINES];
>>   
>>   	struct list_head link;
>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
>> index 5ffc6fa..57d3593 100644
>> --- a/drivers/gpu/drm/i915/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>> @@ -2743,6 +2743,46 @@ void i915_gem_request_free(struct kref *req_ref)
>>   	kmem_cache_free(req->i915->requests, req);
>>   }
>>   
>> +int i915_create_fence_timeline(struct drm_device *dev,
>> +			       struct i915_gem_context *ctx,
>> +			       struct intel_engine_cs *engine)
>> +{
>> +	struct i915_fence_timeline *timeline;
>> +
>> +	timeline = &ctx->engine[engine->id].fence_timeline;
>> +
>> +	if (timeline->engine)
>> +		return 0;
> Do you ever expect a reinit?
No. Will change to a WARN_ON as per Tvrtko's comment.

>> +	timeline->fence_context = fence_context_alloc(1);
>> +
>> +	/*
>> +	 * Start the timeline from seqno 0 as this is a special value
>> +	 * that is reserved for invalid sync points.
>> +	 */
>> +	timeline->next       = 1;
>> +	timeline->ctx        = ctx;
>> +	timeline->engine     = engine;
>> +
>> +	snprintf(timeline->name, sizeof(timeline->name), "%d>%s:%d",
>> +		 timeline->fence_context, engine->name, ctx->user_handle);
>> +
>> +	return 0;
>> +}
>> +
> On top of the other comments, you might want to add a TODO comment that there should be only one timeline for each context,
> with each engine having only a unique fence->context.
That is not the plan. This patch implements the safest possible option 
of a timeline per engine context. Chris's idea was that the timeline 
should be per VM (struct i915_address_space) instead. Although to me 
that sounds like it would cause problems with requests of multiple 
contexts within a single VM being reordered by the scheduler. Thus 
causing out of order completion on a single timeline. So for the moment, 
I am leaving it as is.

>
> ~Maarten