drm/amdgpu: add AMDGPU_IB_FLAG_GET_START_SYNCOBJ to expose scheduled fence

Submitted by Marek Olšák on Jan. 28, 2019, 9:52 p.m.

Details

Message ID 20190128215239.32498-1-maraeo@gmail.com
State New
Headers show
Series "drm/amdgpu: add AMDGPU_IB_FLAG_GET_START_SYNCOBJ to expose scheduled fence" ( rev: 1 ) in AMD X.Org drivers

Not browsing as part of any series.

Commit Message

Marek Olšák Jan. 28, 2019, 9:52 p.m.
From: Marek Olšák <marek.olsak@amd.com>

Normal syncobjs signal when an IB finishes. Start syncobjs signal when
an IB starts.

Signed-off-by: Marek Olšák <marek.olsak@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 18 ++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  3 ++-
 include/uapi/drm/amdgpu_drm.h           | 13 ++++++++++++-
 4 files changed, 33 insertions(+), 2 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index d67f8b1dfe80..8e2f7e558bc9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -453,20 +453,21 @@  struct amdgpu_cs_parser {
 	struct dma_fence		*fence;
 	uint64_t			bytes_moved_threshold;
 	uint64_t			bytes_moved_vis_threshold;
 	uint64_t			bytes_moved;
 	uint64_t			bytes_moved_vis;
 	struct amdgpu_bo_list_entry	*evictable;
 
 	/* user fence */
 	struct amdgpu_bo_list_entry	uf_entry;
 
+	bool				get_start_syncobj;
 	unsigned num_post_dep_syncobjs;
 	struct drm_syncobj **post_dep_syncobjs;
 };
 
 static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p,
 				      uint32_t ib_idx, int idx)
 {
 	return p->job->ibs[ib_idx].ptr[idx];
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 1c49b8266d69..917f3818c61c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1022,20 +1022,23 @@  static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 		r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type,
 					  chunk_ib->ip_instance, chunk_ib->ring,
 					  &entity);
 		if (r)
 			return r;
 
 		if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
 			parser->job->preamble_status |=
 				AMDGPU_PREAMBLE_IB_PRESENT;
 
+		if (chunk_ib->flags & AMDGPU_IB_FLAG_GET_START_SYNCOBJ)
+			parser->get_start_syncobj = true;
+
 		if (parser->entity && parser->entity != entity)
 			return -EINVAL;
 
 		parser->entity = entity;
 
 		ring = to_amdgpu_ring(entity->rq->sched);
 		r =  amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ?
 				   chunk_ib->ib_bytes : 0, ib);
 		if (r) {
 			DRM_ERROR("Failed to get ib !\n");
@@ -1227,20 +1230,35 @@  static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	amdgpu_mn_lock(p->mn);
 	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
 		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
 
 		if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
 			r = -ERESTARTSYS;
 			goto error_abort;
 		}
 	}
 
+	if (p->get_start_syncobj) {
+		struct drm_syncobj *syncobj;
+
+		r = drm_syncobj_create(&syncobj, 0,
+				       &job->base.s_fence->scheduled);
+		if (r)
+			goto error_abort;
+
+		r = drm_syncobj_get_handle(p->filp, syncobj,
+					   &cs->out.start_syncobj);
+		if (r)
+			goto error_abort;
+		drm_syncobj_put(syncobj);
+	}
+
 	job->owner = p->filp;
 	p->fence = dma_fence_get(&job->base.s_fence->finished);
 
 	amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq);
 	amdgpu_cs_post_dependencies(p);
 
 	if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
 	    !p->ctx->preamble_presented) {
 		job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
 		p->ctx->preamble_presented = true;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index c806f984bcc5..a230a30722d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -64,23 +64,24 @@ 
  * - 3.18.0 - Export gpu always on cu bitmap
  * - 3.19.0 - Add support for UVD MJPEG decode
  * - 3.20.0 - Add support for local BOs
  * - 3.21.0 - Add DRM_AMDGPU_FENCE_TO_HANDLE ioctl
  * - 3.22.0 - Add DRM_AMDGPU_SCHED ioctl
  * - 3.23.0 - Add query for VRAM lost counter
  * - 3.24.0 - Add high priority compute support for gfx9
  * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).
  * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
  * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation.
+ * - 3.28.0 - AMDGPU_IB_FLAG_GET_START_SYNCOBJ
  */
 #define KMS_DRIVER_MAJOR	3
-#define KMS_DRIVER_MINOR	27
+#define KMS_DRIVER_MINOR	28
 #define KMS_DRIVER_PATCHLEVEL	0
 
 int amdgpu_vram_limit = 0;
 int amdgpu_vis_vram_limit = 0;
 int amdgpu_gart_size = -1; /* auto */
 int amdgpu_gtt_size = -1; /* auto */
 int amdgpu_moverate = -1; /* auto */
 int amdgpu_benchmarking = 0;
 int amdgpu_testing = 0;
 int amdgpu_audio = -1;
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 662d379ea624..d0e0c99cea32 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -538,21 +538,23 @@  struct drm_amdgpu_cs_in {
 	__u32		ctx_id;
 	/**  Handle of resource list associated with CS */
 	__u32		bo_list_handle;
 	__u32		num_chunks;
 	__u32		_pad;
 	/** this points to __u64 * which point to cs chunks */
 	__u64		chunks;
 };
 
 struct drm_amdgpu_cs_out {
-	__u64 handle;
+	__u64 handle; /* sequence number */
+	__u32 start_syncobj; /* signalled when IB execution begins */
+	__u32 _pad;
 };
 
 union drm_amdgpu_cs {
 	struct drm_amdgpu_cs_in in;
 	struct drm_amdgpu_cs_out out;
 };
 
 /* Specify flags to be used for IB */
 
 /* This IB should be submitted to CE */
@@ -566,20 +568,29 @@  union drm_amdgpu_cs {
 
 /* The IB fence should do the L2 writeback but not invalidate any shader
  * caches (L2/vL1/sL1/I$). */
 #define AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE (1 << 3)
 
 /* Set GDS_COMPUTE_MAX_WAVE_ID = DEFAULT before PACKET3_INDIRECT_BUFFER.
  * This will reset wave ID counters for the IB.
  */
 #define AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID (1 << 4)
 
+/* The CS ioctl will return a syncobj representing when all IBs begin
+ * execution. If set, this applies to all IBs. The returned syncobj can be
+ * used as an IB dependency for other IBs.
+ *
+ * This is used for GPU deadlock prevention when userspace uses mid-IB fences
+ * to wait for mid-IB work on other rings.
+ */
+#define AMDGPU_IB_FLAG_GET_START_SYNCOBJ (1 << 5)
+
 struct drm_amdgpu_cs_chunk_ib {
 	__u32 _pad;
 	/** AMDGPU_IB_FLAG_* */
 	__u32 flags;
 	/** Virtual address to begin IB execution */
 	__u64 va_start;
 	/** Size of submission */
 	__u32 ib_bytes;
 	/** HW IP to submit to */
 	__u32 ip_type;

Comments

Am 28.01.19 um 22:52 schrieb Marek Olšák:
> From: Marek Olšák <marek.olsak@amd.com>
>
> Normal syncobjs signal when an IB finishes. Start syncobjs signal when
> an IB starts.

That approach has quite a number of problems (for example you can't 
allocate memory at this point).

Better add a flag that we should only sync on scheduling for a 
dependency/syncobj instead.

Christian.

>
> Signed-off-by: Marek Olšák <marek.olsak@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 18 ++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  3 ++-
>   include/uapi/drm/amdgpu_drm.h           | 13 ++++++++++++-
>   4 files changed, 33 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index d67f8b1dfe80..8e2f7e558bc9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -453,20 +453,21 @@ struct amdgpu_cs_parser {
>   	struct dma_fence		*fence;
>   	uint64_t			bytes_moved_threshold;
>   	uint64_t			bytes_moved_vis_threshold;
>   	uint64_t			bytes_moved;
>   	uint64_t			bytes_moved_vis;
>   	struct amdgpu_bo_list_entry	*evictable;
>   
>   	/* user fence */
>   	struct amdgpu_bo_list_entry	uf_entry;
>   
> +	bool				get_start_syncobj;
>   	unsigned num_post_dep_syncobjs;
>   	struct drm_syncobj **post_dep_syncobjs;
>   };
>   
>   static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p,
>   				      uint32_t ib_idx, int idx)
>   {
>   	return p->job->ibs[ib_idx].ptr[idx];
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 1c49b8266d69..917f3818c61c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -1022,20 +1022,23 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
>   		r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type,
>   					  chunk_ib->ip_instance, chunk_ib->ring,
>   					  &entity);
>   		if (r)
>   			return r;
>   
>   		if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
>   			parser->job->preamble_status |=
>   				AMDGPU_PREAMBLE_IB_PRESENT;
>   
> +		if (chunk_ib->flags & AMDGPU_IB_FLAG_GET_START_SYNCOBJ)
> +			parser->get_start_syncobj = true;
> +
>   		if (parser->entity && parser->entity != entity)
>   			return -EINVAL;
>   
>   		parser->entity = entity;
>   
>   		ring = to_amdgpu_ring(entity->rq->sched);
>   		r =  amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ?
>   				   chunk_ib->ib_bytes : 0, ib);
>   		if (r) {
>   			DRM_ERROR("Failed to get ib !\n");
> @@ -1227,20 +1230,35 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>   	amdgpu_mn_lock(p->mn);
>   	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
>   		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
>   
>   		if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
>   			r = -ERESTARTSYS;
>   			goto error_abort;
>   		}
>   	}
>   
> +	if (p->get_start_syncobj) {
> +		struct drm_syncobj *syncobj;
> +
> +		r = drm_syncobj_create(&syncobj, 0,
> +				       &job->base.s_fence->scheduled);
> +		if (r)
> +			goto error_abort;
> +
> +		r = drm_syncobj_get_handle(p->filp, syncobj,
> +					   &cs->out.start_syncobj);
> +		if (r)
> +			goto error_abort;
> +		drm_syncobj_put(syncobj);
> +	}
> +
>   	job->owner = p->filp;
>   	p->fence = dma_fence_get(&job->base.s_fence->finished);
>   
>   	amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq);
>   	amdgpu_cs_post_dependencies(p);
>   
>   	if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
>   	    !p->ctx->preamble_presented) {
>   		job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
>   		p->ctx->preamble_presented = true;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index c806f984bcc5..a230a30722d4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -64,23 +64,24 @@
>    * - 3.18.0 - Export gpu always on cu bitmap
>    * - 3.19.0 - Add support for UVD MJPEG decode
>    * - 3.20.0 - Add support for local BOs
>    * - 3.21.0 - Add DRM_AMDGPU_FENCE_TO_HANDLE ioctl
>    * - 3.22.0 - Add DRM_AMDGPU_SCHED ioctl
>    * - 3.23.0 - Add query for VRAM lost counter
>    * - 3.24.0 - Add high priority compute support for gfx9
>    * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).
>    * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
>    * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation.
> + * - 3.28.0 - AMDGPU_IB_FLAG_GET_START_SYNCOBJ
>    */
>   #define KMS_DRIVER_MAJOR	3
> -#define KMS_DRIVER_MINOR	27
> +#define KMS_DRIVER_MINOR	28
>   #define KMS_DRIVER_PATCHLEVEL	0
>   
>   int amdgpu_vram_limit = 0;
>   int amdgpu_vis_vram_limit = 0;
>   int amdgpu_gart_size = -1; /* auto */
>   int amdgpu_gtt_size = -1; /* auto */
>   int amdgpu_moverate = -1; /* auto */
>   int amdgpu_benchmarking = 0;
>   int amdgpu_testing = 0;
>   int amdgpu_audio = -1;
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index 662d379ea624..d0e0c99cea32 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -538,21 +538,23 @@ struct drm_amdgpu_cs_in {
>   	__u32		ctx_id;
>   	/**  Handle of resource list associated with CS */
>   	__u32		bo_list_handle;
>   	__u32		num_chunks;
>   	__u32		_pad;
>   	/** this points to __u64 * which point to cs chunks */
>   	__u64		chunks;
>   };
>   
>   struct drm_amdgpu_cs_out {
> -	__u64 handle;
> +	__u64 handle; /* sequence number */
> +	__u32 start_syncobj; /* signalled when IB execution begins */
> +	__u32 _pad;
>   };
>   
>   union drm_amdgpu_cs {
>   	struct drm_amdgpu_cs_in in;
>   	struct drm_amdgpu_cs_out out;
>   };
>   
>   /* Specify flags to be used for IB */
>   
>   /* This IB should be submitted to CE */
> @@ -566,20 +568,29 @@ union drm_amdgpu_cs {
>   
>   /* The IB fence should do the L2 writeback but not invalidate any shader
>    * caches (L2/vL1/sL1/I$). */
>   #define AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE (1 << 3)
>   
>   /* Set GDS_COMPUTE_MAX_WAVE_ID = DEFAULT before PACKET3_INDIRECT_BUFFER.
>    * This will reset wave ID counters for the IB.
>    */
>   #define AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID (1 << 4)
>   
> +/* The CS ioctl will return a syncobj representing when all IBs begin
> + * execution. If set, this applies to all IBs. The returned syncobj can be
> + * used as an IB dependency for other IBs.
> + *
> + * This is used for GPU deadlock prevention when userspace uses mid-IB fences
> + * to wait for mid-IB work on other rings.
> + */
> +#define AMDGPU_IB_FLAG_GET_START_SYNCOBJ (1 << 5)
> +
>   struct drm_amdgpu_cs_chunk_ib {
>   	__u32 _pad;
>   	/** AMDGPU_IB_FLAG_* */
>   	__u32 flags;
>   	/** Virtual address to begin IB execution */
>   	__u64 va_start;
>   	/** Size of submission */
>   	__u32 ib_bytes;
>   	/** HW IP to submit to */
>   	__u32 ip_type;
On Tue, Jan 29, 2019, 3:01 AM Christian König <
ckoenig.leichtzumerken@gmail.com wrote:

> Am 28.01.19 um 22:52 schrieb Marek Olšák:
> > From: Marek Olšák <marek.olsak@amd.com>
> >
> > Normal syncobjs signal when an IB finishes. Start syncobjs signal when
> > an IB starts.
>
> That approach has quite a number of problems (for example you can't
> allocate memory at this point).
>
> Better add a flag that we should only sync on scheduling for a
> dependency/syncobj instead.
>

I don't understand. Can you give me an example of the interface and how the
implementation would look?

Thanks,
Marek


> Christian.
>
> >
> > Signed-off-by: Marek Olšák <marek.olsak@amd.com>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  1 +
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 18 ++++++++++++++++++
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  3 ++-
> >   include/uapi/drm/amdgpu_drm.h           | 13 ++++++++++++-
> >   4 files changed, 33 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > index d67f8b1dfe80..8e2f7e558bc9 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > @@ -453,20 +453,21 @@ struct amdgpu_cs_parser {
> >       struct dma_fence                *fence;
> >       uint64_t                        bytes_moved_threshold;
> >       uint64_t                        bytes_moved_vis_threshold;
> >       uint64_t                        bytes_moved;
> >       uint64_t                        bytes_moved_vis;
> >       struct amdgpu_bo_list_entry     *evictable;
> >
> >       /* user fence */
> >       struct amdgpu_bo_list_entry     uf_entry;
> >
> > +     bool                            get_start_syncobj;
> >       unsigned num_post_dep_syncobjs;
> >       struct drm_syncobj **post_dep_syncobjs;
> >   };
> >
> >   static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p,
> >                                     uint32_t ib_idx, int idx)
> >   {
> >       return p->job->ibs[ib_idx].ptr[idx];
> >   }
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > index 1c49b8266d69..917f3818c61c 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > @@ -1022,20 +1022,23 @@ static int amdgpu_cs_ib_fill(struct
> amdgpu_device *adev,
> >               r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type,
> >                                         chunk_ib->ip_instance,
> chunk_ib->ring,
> >                                         &entity);
> >               if (r)
> >                       return r;
> >
> >               if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
> >                       parser->job->preamble_status |=
> >                               AMDGPU_PREAMBLE_IB_PRESENT;
> >
> > +             if (chunk_ib->flags & AMDGPU_IB_FLAG_GET_START_SYNCOBJ)
> > +                     parser->get_start_syncobj = true;
> > +
> >               if (parser->entity && parser->entity != entity)
> >                       return -EINVAL;
> >
> >               parser->entity = entity;
> >
> >               ring = to_amdgpu_ring(entity->rq->sched);
> >               r =  amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ?
> >                                  chunk_ib->ib_bytes : 0, ib);
> >               if (r) {
> >                       DRM_ERROR("Failed to get ib !\n");
> > @@ -1227,20 +1230,35 @@ static int amdgpu_cs_submit(struct
> amdgpu_cs_parser *p,
> >       amdgpu_mn_lock(p->mn);
> >       amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
> >               struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
> >
> >               if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
> >                       r = -ERESTARTSYS;
> >                       goto error_abort;
> >               }
> >       }
> >
> > +     if (p->get_start_syncobj) {
> > +             struct drm_syncobj *syncobj;
> > +
> > +             r = drm_syncobj_create(&syncobj, 0,
> > +                                    &job->base.s_fence->scheduled);
> > +             if (r)
> > +                     goto error_abort;
> > +
> > +             r = drm_syncobj_get_handle(p->filp, syncobj,
> > +                                        &cs->out.start_syncobj);
> > +             if (r)
> > +                     goto error_abort;
> > +             drm_syncobj_put(syncobj);
> > +     }
> > +
> >       job->owner = p->filp;
> >       p->fence = dma_fence_get(&job->base.s_fence->finished);
> >
> >       amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq);
> >       amdgpu_cs_post_dependencies(p);
> >
> >       if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
> >           !p->ctx->preamble_presented) {
> >               job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
> >               p->ctx->preamble_presented = true;
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > index c806f984bcc5..a230a30722d4 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > @@ -64,23 +64,24 @@
> >    * - 3.18.0 - Export gpu always on cu bitmap
> >    * - 3.19.0 - Add support for UVD MJPEG decode
> >    * - 3.20.0 - Add support for local BOs
> >    * - 3.21.0 - Add DRM_AMDGPU_FENCE_TO_HANDLE ioctl
> >    * - 3.22.0 - Add DRM_AMDGPU_SCHED ioctl
> >    * - 3.23.0 - Add query for VRAM lost counter
> >    * - 3.24.0 - Add high priority compute support for gfx9
> >    * - 3.25.0 - Add support for sensor query info (stable pstate
> sclk/mclk).
> >    * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
> >    * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation.
> > + * - 3.28.0 - AMDGPU_IB_FLAG_GET_START_SYNCOBJ
> >    */
> >   #define KMS_DRIVER_MAJOR    3
> > -#define KMS_DRIVER_MINOR     27
> > +#define KMS_DRIVER_MINOR     28
> >   #define KMS_DRIVER_PATCHLEVEL       0
> >
> >   int amdgpu_vram_limit = 0;
> >   int amdgpu_vis_vram_limit = 0;
> >   int amdgpu_gart_size = -1; /* auto */
> >   int amdgpu_gtt_size = -1; /* auto */
> >   int amdgpu_moverate = -1; /* auto */
> >   int amdgpu_benchmarking = 0;
> >   int amdgpu_testing = 0;
> >   int amdgpu_audio = -1;
> > diff --git a/include/uapi/drm/amdgpu_drm.h
> b/include/uapi/drm/amdgpu_drm.h
> > index 662d379ea624..d0e0c99cea32 100644
> > --- a/include/uapi/drm/amdgpu_drm.h
> > +++ b/include/uapi/drm/amdgpu_drm.h
> > @@ -538,21 +538,23 @@ struct drm_amdgpu_cs_in {
> >       __u32           ctx_id;
> >       /**  Handle of resource list associated with CS */
> >       __u32           bo_list_handle;
> >       __u32           num_chunks;
> >       __u32           _pad;
> >       /** this points to __u64 * which point to cs chunks */
> >       __u64           chunks;
> >   };
> >
> >   struct drm_amdgpu_cs_out {
> > -     __u64 handle;
> > +     __u64 handle; /* sequence number */
> > +     __u32 start_syncobj; /* signalled when IB execution begins */
> > +     __u32 _pad;
> >   };
> >
> >   union drm_amdgpu_cs {
> >       struct drm_amdgpu_cs_in in;
> >       struct drm_amdgpu_cs_out out;
> >   };
> >
> >   /* Specify flags to be used for IB */
> >
> >   /* This IB should be submitted to CE */
> > @@ -566,20 +568,29 @@ union drm_amdgpu_cs {
> >
> >   /* The IB fence should do the L2 writeback but not invalidate any
> shader
> >    * caches (L2/vL1/sL1/I$). */
> >   #define AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE (1 << 3)
> >
> >   /* Set GDS_COMPUTE_MAX_WAVE_ID = DEFAULT before
> PACKET3_INDIRECT_BUFFER.
> >    * This will reset wave ID counters for the IB.
> >    */
> >   #define AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID (1 << 4)
> >
> > +/* The CS ioctl will return a syncobj representing when all IBs begin
> > + * execution. If set, this applies to all IBs. The returned syncobj can
> be
> > + * used as an IB dependency for other IBs.
> > + *
> > + * This is used for GPU deadlock prevention when userspace uses mid-IB
> fences
> > + * to wait for mid-IB work on other rings.
> > + */
> > +#define AMDGPU_IB_FLAG_GET_START_SYNCOBJ (1 << 5)
> > +
> >   struct drm_amdgpu_cs_chunk_ib {
> >       __u32 _pad;
> >       /** AMDGPU_IB_FLAG_* */
> >       __u32 flags;
> >       /** Virtual address to begin IB execution */
> >       __u64 va_start;
> >       /** Size of submission */
> >       __u32 ib_bytes;
> >       /** HW IP to submit to */
> >       __u32 ip_type;
>
>
Am 29.01.19 um 14:32 schrieb Marek Olšák:


On Tue, Jan 29, 2019, 3:01 AM Christian König <ckoenig.leichtzumerken@gmail.com<mailto:ckoenig.leichtzumerken@gmail.com> wrote:
Am 28.01.19 um 22:52 schrieb Marek Olšák:
> From: Marek Olšák <marek.olsak@amd.com<mailto:marek.olsak@amd.com>>

>

> Normal syncobjs signal when an IB finishes. Start syncobjs signal when

> an IB starts.


That approach has quite a number of problems (for example you can't
allocate memory at this point).

Better add a flag that we should only sync on scheduling for a
dependency/syncobj instead.

I don't understand. Can you give me an example of the interface and how the implementation would look?

For example we add a new chunk type AMDGPU_CHUNK_ID_SCHEDULED which is handled the same way as AMDGPU_CHUNK_ID_DEPENDENCIES.

Then in amdgpu_cs_process_fence_dep() we check if its a AMDGPU_CHUNK_ID_SCHEDULED and if yes extract the scheduled fence from the fence we got from amdgpu_ctx_get_fence().

Christian.


Thanks,
Marek


Christian.

>

> Signed-off-by: Marek Olšák <marek.olsak@amd.com<mailto:marek.olsak@amd.com>>

> ---

>   drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  1 +

>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 18 ++++++++++++++++++

>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  3 ++-

>   include/uapi/drm/amdgpu_drm.h           | 13 ++++++++++++-

>   4 files changed, 33 insertions(+), 2 deletions(-)

>

> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

> index d67f8b1dfe80..8e2f7e558bc9 100644

> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h

> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

> @@ -453,20 +453,21 @@ struct amdgpu_cs_parser {

>       struct dma_fence                *fence;

>       uint64_t                        bytes_moved_threshold;

>       uint64_t                        bytes_moved_vis_threshold;

>       uint64_t                        bytes_moved;

>       uint64_t                        bytes_moved_vis;

>       struct amdgpu_bo_list_entry     *evictable;

>

>       /* user fence */

>       struct amdgpu_bo_list_entry     uf_entry;

>

> +     bool                            get_start_syncobj;

>       unsigned num_post_dep_syncobjs;

>       struct drm_syncobj **post_dep_syncobjs;

>   };

>

>   static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p,

>                                     uint32_t ib_idx, int idx)

>   {

>       return p->job->ibs[ib_idx].ptr[idx];

>   }

>

> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

> index 1c49b8266d69..917f3818c61c 100644

> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

> @@ -1022,20 +1022,23 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,

>               r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type,

>                                         chunk_ib->ip_instance, chunk_ib->ring,

>                                         &entity);

>               if (r)

>                       return r;

>

>               if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)

>                       parser->job->preamble_status |=

>                               AMDGPU_PREAMBLE_IB_PRESENT;

>

> +             if (chunk_ib->flags & AMDGPU_IB_FLAG_GET_START_SYNCOBJ)

> +                     parser->get_start_syncobj = true;

> +

>               if (parser->entity && parser->entity != entity)

>                       return -EINVAL;

>

>               parser->entity = entity;

>

>               ring = to_amdgpu_ring(entity->rq->sched);

>               r =  amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ?

>                                  chunk_ib->ib_bytes : 0, ib);

>               if (r) {

>                       DRM_ERROR("Failed to get ib !\n");

> @@ -1227,20 +1230,35 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,

>       amdgpu_mn_lock(p->mn);

>       amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {

>               struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo<http://tv.bo>);

>

>               if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {

>                       r = -ERESTARTSYS;

>                       goto error_abort;

>               }

>       }

>

> +     if (p->get_start_syncobj) {

> +             struct drm_syncobj *syncobj;

> +

> +             r = drm_syncobj_create(&syncobj, 0,

> +                                    &job->base.s_fence->scheduled);

> +             if (r)

> +                     goto error_abort;

> +

> +             r = drm_syncobj_get_handle(p->filp, syncobj,

> +                                        &cs->out.start_syncobj);

> +             if (r)

> +                     goto error_abort;

> +             drm_syncobj_put(syncobj);

> +     }

> +

>       job->owner = p->filp;

>       p->fence = dma_fence_get(&job->base.s_fence->finished);

>

>       amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq);

>       amdgpu_cs_post_dependencies(p);

>

>       if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&

>           !p->ctx->preamble_presented) {

>               job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;

>               p->ctx->preamble_presented = true;

> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

> index c806f984bcc5..a230a30722d4 100644

> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

> @@ -64,23 +64,24 @@

>    * - 3.18.0 - Export gpu always on cu bitmap

>    * - 3.19.0 - Add support for UVD MJPEG decode

>    * - 3.20.0 - Add support for local BOs

>    * - 3.21.0 - Add DRM_AMDGPU_FENCE_TO_HANDLE ioctl

>    * - 3.22.0 - Add DRM_AMDGPU_SCHED ioctl

>    * - 3.23.0 - Add query for VRAM lost counter

>    * - 3.24.0 - Add high priority compute support for gfx9

>    * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).

>    * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.

>    * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation.

> + * - 3.28.0 - AMDGPU_IB_FLAG_GET_START_SYNCOBJ

>    */

>   #define KMS_DRIVER_MAJOR    3

> -#define KMS_DRIVER_MINOR     27

> +#define KMS_DRIVER_MINOR     28

>   #define KMS_DRIVER_PATCHLEVEL       0

>

>   int amdgpu_vram_limit = 0;

>   int amdgpu_vis_vram_limit = 0;

>   int amdgpu_gart_size = -1; /* auto */

>   int amdgpu_gtt_size = -1; /* auto */

>   int amdgpu_moverate = -1; /* auto */

>   int amdgpu_benchmarking = 0;

>   int amdgpu_testing = 0;

>   int amdgpu_audio = -1;

> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h

> index 662d379ea624..d0e0c99cea32 100644

> --- a/include/uapi/drm/amdgpu_drm.h

> +++ b/include/uapi/drm/amdgpu_drm.h

> @@ -538,21 +538,23 @@ struct drm_amdgpu_cs_in {

>       __u32           ctx_id;

>       /**  Handle of resource list associated with CS */

>       __u32           bo_list_handle;

>       __u32           num_chunks;

>       __u32           _pad;

>       /** this points to __u64 * which point to cs chunks */

>       __u64           chunks;

>   };

>

>   struct drm_amdgpu_cs_out {

> -     __u64 handle;

> +     __u64 handle; /* sequence number */

> +     __u32 start_syncobj; /* signalled when IB execution begins */

> +     __u32 _pad;

>   };

>

>   union drm_amdgpu_cs {

>       struct drm_amdgpu_cs_in in;

>       struct drm_amdgpu_cs_out out;

>   };

>

>   /* Specify flags to be used for IB */

>

>   /* This IB should be submitted to CE */

> @@ -566,20 +568,29 @@ union drm_amdgpu_cs {

>

>   /* The IB fence should do the L2 writeback but not invalidate any shader

>    * caches (L2/vL1/sL1/I$). */

>   #define AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE (1 << 3)

>

>   /* Set GDS_COMPUTE_MAX_WAVE_ID = DEFAULT before PACKET3_INDIRECT_BUFFER.

>    * This will reset wave ID counters for the IB.

>    */

>   #define AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID (1 << 4)

>

> +/* The CS ioctl will return a syncobj representing when all IBs begin

> + * execution. If set, this applies to all IBs. The returned syncobj can be

> + * used as an IB dependency for other IBs.

> + *

> + * This is used for GPU deadlock prevention when userspace uses mid-IB fences

> + * to wait for mid-IB work on other rings.

> + */

> +#define AMDGPU_IB_FLAG_GET_START_SYNCOBJ (1 << 5)

> +

>   struct drm_amdgpu_cs_chunk_ib {

>       __u32 _pad;

>       /** AMDGPU_IB_FLAG_* */

>       __u32 flags;

>       /** Virtual address to begin IB execution */

>       __u64 va_start;

>       /** Size of submission */

>       __u32 ib_bytes;

>       /** HW IP to submit to */

>       __u32 ip_type;
On Tue, Jan 29, 2019 at 3:01 AM Christian König <
ckoenig.leichtzumerken@gmail.com> wrote:

> Am 28.01.19 um 22:52 schrieb Marek Olšák:
> > From: Marek Olšák <marek.olsak@amd.com>
> >
> > Normal syncobjs signal when an IB finishes. Start syncobjs signal when
> > an IB starts.
>
> That approach has quite a number of problems (for example you can't
> allocate memory at this point).
>

Even if I drop this patch, can you describe all the problems with it?
Andrey and I would like to understand this.

Thanks,
Marek
Am 29.01.19 um 22:54 schrieb Marek Olšák:
> On Tue, Jan 29, 2019 at 3:01 AM Christian König 
> <ckoenig.leichtzumerken@gmail.com 
> <mailto:ckoenig.leichtzumerken@gmail.com>> wrote:
>
>     Am 28.01.19 um 22:52 schrieb Marek Olšák:
>     > From: Marek Olšák <marek.olsak@amd.com <mailto:marek.olsak@amd.com>>
>     >
>     > Normal syncobjs signal when an IB finishes. Start syncobjs
>     signal when
>     > an IB starts.
>
>     That approach has quite a number of problems (for example you can't
>     allocate memory at this point).
>
>
> Even if I drop this patch, can you describe all the problems with it? 
> Andrey and I would like to understand this.

> amdgpu_mn_lock(p->mn);
> ...
> r = drm_syncobj_create(&syncobj, 0,...

You can't allocate memory while holding the MN lock, so calling 
drm_syncobj_create() here is forbidden.

> r = drm_syncobj_get_handle(p->filp, syncobj,...

This can fail which would result in a syncobj which is never signaled 
nor freed.

Regards,
Christian.

>
> Thanks,
> Marek
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx