[libdrm] libdrm_amdgpu: add kernel semaphore support

Submitted by Dave Airlie on July 6, 2017, 1:17 a.m.

Details

Message ID 20170706011749.4217-1-airlied@gmail.com
State New
Headers show
Series "libdrm_amdgpu: add kernel semaphore support" ( rev: 1 ) in AMD X.Org drivers

Not browsing as part of any series.

Commit Message

Dave Airlie July 6, 2017, 1:17 a.m.
From: Dave Airlie <airlied@redhat.com>

This adds kernel semaphore support to the command submission
interface in what should be a backwards compatible manner,
it adds a new command submission API.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 amdgpu/amdgpu.h    |  29 ++++++++++++-
 amdgpu/amdgpu_cs.c | 118 +++++++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 138 insertions(+), 9 deletions(-)

Patch hide | download patch | download mbox

diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h
index 1901fa8..649b66e 100644
--- a/amdgpu/amdgpu.h
+++ b/amdgpu/amdgpu.h
@@ -369,6 +369,16 @@  struct amdgpu_cs_request {
 	struct amdgpu_cs_fence_info fence_info;
 };
 
+struct amdgpu_cs_request_syncobj {
+	/*
+	 *
+	 */
+	uint32_t number_in_syncobj;
+	uint32_t number_out_syncobj;
+	uint32_t *in_syncobj;
+	uint32_t *out_syncobj;
+};
+
 /**
  * Structure which provide information about GPU VM MC Address space
  * alignments requirements
@@ -886,6 +896,12 @@  int amdgpu_cs_submit(amdgpu_context_handle context,
 		     struct amdgpu_cs_request *ibs_request,
 		     uint32_t number_of_requests);
 
+int amdgpu_cs_submit_syncobj(amdgpu_context_handle context,
+			     uint64_t flags,
+			     struct amdgpu_cs_request *ibs_request,
+			     struct amdgpu_cs_request_syncobj *ibs_syncobj,
+			     uint32_t number_of_requests);
+
 /**
  *  Query status of Command Buffer Submission
  *
@@ -1328,8 +1344,19 @@  int amdgpu_cs_destroy_semaphore(amdgpu_semaphore_handle sem);
 */
 const char *amdgpu_get_marketing_name(amdgpu_device_handle dev);
 
+
+int amdgpu_cs_create_syncobj(amdgpu_device_handle dev,
+			     uint32_t *syncobj);
+int amdgpu_cs_export_syncobj(amdgpu_device_handle dev,
+			     uint32_t syncobj,
+			     int *shared_fd);
+int amdgpu_cs_import_syncobj(amdgpu_device_handle dev,
+			     int shared_fd,
+			     uint32_t *syncobj);
+int amdgpu_cs_destroy_syncobj(amdgpu_device_handle dev,
+			      uint32_t syncobj);
+
 #ifdef __cplusplus
 }
 #endif
-
 #endif /* #ifdef _AMDGPU_H_ */
diff --git a/amdgpu/amdgpu_cs.c b/amdgpu/amdgpu_cs.c
index 868eb7b..339c5f9 100644
--- a/amdgpu/amdgpu_cs.c
+++ b/amdgpu/amdgpu_cs.c
@@ -168,7 +168,8 @@  int amdgpu_cs_query_reset_state(amdgpu_context_handle context,
  * \sa amdgpu_cs_submit()
 */
 static int amdgpu_cs_submit_one(amdgpu_context_handle context,
-				struct amdgpu_cs_request *ibs_request)
+				struct amdgpu_cs_request *ibs_request,
+				struct amdgpu_cs_request_syncobj *syncobj_request)
 {
 	union drm_amdgpu_cs cs;
 	uint64_t *chunk_array;
@@ -176,10 +177,13 @@  static int amdgpu_cs_submit_one(amdgpu_context_handle context,
 	struct drm_amdgpu_cs_chunk_data *chunk_data;
 	struct drm_amdgpu_cs_chunk_dep *dependencies = NULL;
 	struct drm_amdgpu_cs_chunk_dep *sem_dependencies = NULL;
+	struct drm_amdgpu_cs_chunk_sem *in_syncobj_dependencies = NULL;
+	struct drm_amdgpu_cs_chunk_sem *out_syncobj_dependencies = NULL;
 	struct list_head *sem_list;
 	amdgpu_semaphore_handle sem, tmp;
-	uint32_t i, size, sem_count = 0;
+	uint32_t i, j, size, sem_count = 0;
 	bool user_fence;
+	uint32_t sem_size = 0;
 	int r = 0;
 
 	if (ibs_request->ip_type >= AMDGPU_HW_IP_NUM)
@@ -194,7 +198,11 @@  static int amdgpu_cs_submit_one(amdgpu_context_handle context,
 	}
 	user_fence = (ibs_request->fence_info.handle != NULL);
 
-	size = ibs_request->number_of_ibs + (user_fence ? 2 : 1) + 1;
+	if (syncobj_request) {
+		sem_size += syncobj_request->number_in_syncobj ? 1 : 0;
+		sem_size += syncobj_request->number_out_syncobj ? 1 : 0;
+	}
+	size = ibs_request->number_of_ibs + (user_fence ? 2 : 1) + 1 + sem_size;
 
 	chunk_array = alloca(sizeof(uint64_t) * size);
 	chunks = alloca(sizeof(struct drm_amdgpu_cs_chunk) * size);
@@ -306,6 +314,45 @@  static int amdgpu_cs_submit_one(amdgpu_context_handle context,
 		chunks[i].chunk_data = (uint64_t)(uintptr_t)sem_dependencies;
 	}
 
+	if (syncobj_request) {
+		if (syncobj_request->number_in_syncobj) {
+			in_syncobj_dependencies = malloc(sizeof(struct drm_amdgpu_cs_chunk_sem) * syncobj_request->number_in_syncobj);
+			if (!in_syncobj_dependencies) {
+				r = -ENOMEM;
+				goto error_unlock;
+			}
+			for (j = 0; j < syncobj_request->number_in_syncobj; j++) {
+				struct drm_amdgpu_cs_chunk_sem *dep = &in_syncobj_dependencies[j];
+				dep->handle = syncobj_request->in_syncobj[j];
+			}
+			i = cs.in.num_chunks++;
+
+			/* dependencies chunk */
+			chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
+			chunks[i].chunk_id = AMDGPU_CHUNK_ID_SYNCOBJ_IN;
+			chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_sem) / 4 * syncobj_request->number_in_syncobj;
+			chunks[i].chunk_data = (uint64_t)(uintptr_t)in_syncobj_dependencies;
+		}
+		if (syncobj_request->number_out_syncobj) {
+			out_syncobj_dependencies = malloc(sizeof(struct drm_amdgpu_cs_chunk_sem) * syncobj_request->number_out_syncobj);
+			if (!out_syncobj_dependencies) {
+				r = -ENOMEM;
+				goto error_unlock;
+			}
+			for (j = 0; j < syncobj_request->number_out_syncobj; j++) {
+				struct drm_amdgpu_cs_chunk_sem *dep = &out_syncobj_dependencies[j];
+				dep->handle = syncobj_request->out_syncobj[j];
+			}
+			i = cs.in.num_chunks++;
+
+			/* dependencies chunk */
+			chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
+			chunks[i].chunk_id = AMDGPU_CHUNK_ID_SYNCOBJ_OUT;
+			chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_sem) / 4 * syncobj_request->number_out_syncobj;
+			chunks[i].chunk_data = (uint64_t)(uintptr_t)out_syncobj_dependencies;
+		}
+	}
+
 	r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CS,
 				&cs, sizeof(cs));
 	if (r)
@@ -317,31 +364,48 @@  error_unlock:
 	pthread_mutex_unlock(&context->sequence_mutex);
 	free(dependencies);
 	free(sem_dependencies);
+	free(in_syncobj_dependencies);
+	free(out_syncobj_dependencies);
 	return r;
 }
 
-int amdgpu_cs_submit(amdgpu_context_handle context,
-		     uint64_t flags,
-		     struct amdgpu_cs_request *ibs_request,
-		     uint32_t number_of_requests)
+int amdgpu_cs_submit_syncobj(amdgpu_context_handle context,
+			     uint64_t flags,
+			     struct amdgpu_cs_request *ibs_request,
+			     struct amdgpu_cs_request_syncobj *ibs_syncobj,
+			     uint32_t number_of_requests)
 {
 	uint32_t i;
 	int r;
+	bool has_syncobj = ibs_syncobj ? true : false;
 
 	if (!context || !ibs_request)
 		return -EINVAL;
 
 	r = 0;
 	for (i = 0; i < number_of_requests; i++) {
-		r = amdgpu_cs_submit_one(context, ibs_request);
+		r = amdgpu_cs_submit_one(context, ibs_request, has_syncobj ? ibs_syncobj : NULL);
 		if (r)
 			break;
 		ibs_request++;
+		if (has_syncobj)
+			ibs_syncobj++;
 	}
 
 	return r;
 }
 
+int amdgpu_cs_submit(amdgpu_context_handle context,
+		     uint64_t flags,
+		     struct amdgpu_cs_request *ibs_request,
+		     uint32_t number_of_requests)
+{
+	return amdgpu_cs_submit_syncobj(context, flags,
+					ibs_request, NULL,
+					number_of_requests);
+}
+
+
 /**
  * Calculate absolute timeout.
  *
@@ -596,3 +660,41 @@  int amdgpu_cs_destroy_semaphore(amdgpu_semaphore_handle sem)
 {
 	return amdgpu_cs_unreference_sem(sem);
 }
+
+int amdgpu_cs_create_syncobj(amdgpu_device_handle dev,
+			     uint32_t *handle)
+{
+	if (NULL == dev)
+		return -EINVAL;
+
+	return drmSyncobjCreate(dev->fd, 0, handle);
+}
+
+int amdgpu_cs_destroy_syncobj(amdgpu_device_handle dev,
+			      uint32_t handle)
+{
+	if (NULL == dev)
+		return -EINVAL;
+
+	return drmSyncobjDestroy(dev->fd, handle);
+}
+
+int amdgpu_cs_export_syncobj(amdgpu_device_handle dev,
+			     uint32_t handle,
+			     int *shared_fd)
+{
+	if (NULL == dev)
+		return -EINVAL;
+
+	return drmSyncobjHandleToFD(dev->fd, handle, shared_fd);
+}
+
+int amdgpu_cs_import_syncobj(amdgpu_device_handle dev,
+			     int shared_fd,
+			     uint32_t *handle)
+{
+	if (NULL == dev)
+		return -EINVAL;
+
+	return drmSyncobjFDToHandle(dev->fd, shared_fd, handle);
+}

Comments

Chrstian,

you are probably the best person to ack this, I'd like to get the radv
code landed
and allow the GL code to get going.

Dave.

> This adds kernel semaphore support to the command submission
> interface in what should be a backwards compatible manner,
> it adds a new command submission API.
>
> Signed-off-by: Dave Airlie <airlied@redhat.com>
> ---
>  amdgpu/amdgpu.h    |  29 ++++++++++++-
>  amdgpu/amdgpu_cs.c | 118 +++++++++++++++++++++++++++++++++++++++++++++++++----
>  2 files changed, 138 insertions(+), 9 deletions(-)
>
> diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h
> index 1901fa8..649b66e 100644
> --- a/amdgpu/amdgpu.h
> +++ b/amdgpu/amdgpu.h
> @@ -369,6 +369,16 @@ struct amdgpu_cs_request {
>         struct amdgpu_cs_fence_info fence_info;
>  };
>
> +struct amdgpu_cs_request_syncobj {
> +       /*
> +        *
> +        */
> +       uint32_t number_in_syncobj;
> +       uint32_t number_out_syncobj;
> +       uint32_t *in_syncobj;
> +       uint32_t *out_syncobj;
> +};
> +
>  /**
>   * Structure which provide information about GPU VM MC Address space
>   * alignments requirements
> @@ -886,6 +896,12 @@ int amdgpu_cs_submit(amdgpu_context_handle context,
>                      struct amdgpu_cs_request *ibs_request,
>                      uint32_t number_of_requests);
>
> +int amdgpu_cs_submit_syncobj(amdgpu_context_handle context,
> +                            uint64_t flags,
> +                            struct amdgpu_cs_request *ibs_request,
> +                            struct amdgpu_cs_request_syncobj *ibs_syncobj,
> +                            uint32_t number_of_requests);
> +
>  /**
>   *  Query status of Command Buffer Submission
>   *
> @@ -1328,8 +1344,19 @@ int amdgpu_cs_destroy_semaphore(amdgpu_semaphore_handle sem);
>  */
>  const char *amdgpu_get_marketing_name(amdgpu_device_handle dev);
>
> +
> +int amdgpu_cs_create_syncobj(amdgpu_device_handle dev,
> +                            uint32_t *syncobj);
> +int amdgpu_cs_export_syncobj(amdgpu_device_handle dev,
> +                            uint32_t syncobj,
> +                            int *shared_fd);
> +int amdgpu_cs_import_syncobj(amdgpu_device_handle dev,
> +                            int shared_fd,
> +                            uint32_t *syncobj);
> +int amdgpu_cs_destroy_syncobj(amdgpu_device_handle dev,
> +                             uint32_t syncobj);
> +
>  #ifdef __cplusplus
>  }
>  #endif
> -
>  #endif /* #ifdef _AMDGPU_H_ */
> diff --git a/amdgpu/amdgpu_cs.c b/amdgpu/amdgpu_cs.c
> index 868eb7b..339c5f9 100644
> --- a/amdgpu/amdgpu_cs.c
> +++ b/amdgpu/amdgpu_cs.c
> @@ -168,7 +168,8 @@ int amdgpu_cs_query_reset_state(amdgpu_context_handle context,
>   * \sa amdgpu_cs_submit()
>  */
>  static int amdgpu_cs_submit_one(amdgpu_context_handle context,
> -                               struct amdgpu_cs_request *ibs_request)
> +                               struct amdgpu_cs_request *ibs_request,
> +                               struct amdgpu_cs_request_syncobj *syncobj_request)
>  {
>         union drm_amdgpu_cs cs;
>         uint64_t *chunk_array;
> @@ -176,10 +177,13 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
>         struct drm_amdgpu_cs_chunk_data *chunk_data;
>         struct drm_amdgpu_cs_chunk_dep *dependencies = NULL;
>         struct drm_amdgpu_cs_chunk_dep *sem_dependencies = NULL;
> +       struct drm_amdgpu_cs_chunk_sem *in_syncobj_dependencies = NULL;
> +       struct drm_amdgpu_cs_chunk_sem *out_syncobj_dependencies = NULL;
>         struct list_head *sem_list;
>         amdgpu_semaphore_handle sem, tmp;
> -       uint32_t i, size, sem_count = 0;
> +       uint32_t i, j, size, sem_count = 0;
>         bool user_fence;
> +       uint32_t sem_size = 0;
>         int r = 0;
>
>         if (ibs_request->ip_type >= AMDGPU_HW_IP_NUM)
> @@ -194,7 +198,11 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
>         }
>         user_fence = (ibs_request->fence_info.handle != NULL);
>
> -       size = ibs_request->number_of_ibs + (user_fence ? 2 : 1) + 1;
> +       if (syncobj_request) {
> +               sem_size += syncobj_request->number_in_syncobj ? 1 : 0;
> +               sem_size += syncobj_request->number_out_syncobj ? 1 : 0;
> +       }
> +       size = ibs_request->number_of_ibs + (user_fence ? 2 : 1) + 1 + sem_size;
>
>         chunk_array = alloca(sizeof(uint64_t) * size);
>         chunks = alloca(sizeof(struct drm_amdgpu_cs_chunk) * size);
> @@ -306,6 +314,45 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
>                 chunks[i].chunk_data = (uint64_t)(uintptr_t)sem_dependencies;
>         }
>
> +       if (syncobj_request) {
> +               if (syncobj_request->number_in_syncobj) {
> +                       in_syncobj_dependencies = malloc(sizeof(struct drm_amdgpu_cs_chunk_sem) * syncobj_request->number_in_syncobj);
> +                       if (!in_syncobj_dependencies) {
> +                               r = -ENOMEM;
> +                               goto error_unlock;
> +                       }
> +                       for (j = 0; j < syncobj_request->number_in_syncobj; j++) {
> +                               struct drm_amdgpu_cs_chunk_sem *dep = &in_syncobj_dependencies[j];
> +                               dep->handle = syncobj_request->in_syncobj[j];
> +                       }
> +                       i = cs.in.num_chunks++;
> +
> +                       /* dependencies chunk */
> +                       chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
> +                       chunks[i].chunk_id = AMDGPU_CHUNK_ID_SYNCOBJ_IN;
> +                       chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_sem) / 4 * syncobj_request->number_in_syncobj;
> +                       chunks[i].chunk_data = (uint64_t)(uintptr_t)in_syncobj_dependencies;
> +               }
> +               if (syncobj_request->number_out_syncobj) {
> +                       out_syncobj_dependencies = malloc(sizeof(struct drm_amdgpu_cs_chunk_sem) * syncobj_request->number_out_syncobj);
> +                       if (!out_syncobj_dependencies) {
> +                               r = -ENOMEM;
> +                               goto error_unlock;
> +                       }
> +                       for (j = 0; j < syncobj_request->number_out_syncobj; j++) {
> +                               struct drm_amdgpu_cs_chunk_sem *dep = &out_syncobj_dependencies[j];
> +                               dep->handle = syncobj_request->out_syncobj[j];
> +                       }
> +                       i = cs.in.num_chunks++;
> +
> +                       /* dependencies chunk */
> +                       chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
> +                       chunks[i].chunk_id = AMDGPU_CHUNK_ID_SYNCOBJ_OUT;
> +                       chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_sem) / 4 * syncobj_request->number_out_syncobj;
> +                       chunks[i].chunk_data = (uint64_t)(uintptr_t)out_syncobj_dependencies;
> +               }
> +       }
> +
>         r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CS,
>                                 &cs, sizeof(cs));
>         if (r)
> @@ -317,31 +364,48 @@ error_unlock:
>         pthread_mutex_unlock(&context->sequence_mutex);
>         free(dependencies);
>         free(sem_dependencies);
> +       free(in_syncobj_dependencies);
> +       free(out_syncobj_dependencies);
>         return r;
>  }
>
> -int amdgpu_cs_submit(amdgpu_context_handle context,
> -                    uint64_t flags,
> -                    struct amdgpu_cs_request *ibs_request,
> -                    uint32_t number_of_requests)
> +int amdgpu_cs_submit_syncobj(amdgpu_context_handle context,
> +                            uint64_t flags,
> +                            struct amdgpu_cs_request *ibs_request,
> +                            struct amdgpu_cs_request_syncobj *ibs_syncobj,
> +                            uint32_t number_of_requests)
>  {
>         uint32_t i;
>         int r;
> +       bool has_syncobj = ibs_syncobj ? true : false;
>
>         if (!context || !ibs_request)
>                 return -EINVAL;
>
>         r = 0;
>         for (i = 0; i < number_of_requests; i++) {
> -               r = amdgpu_cs_submit_one(context, ibs_request);
> +               r = amdgpu_cs_submit_one(context, ibs_request, has_syncobj ? ibs_syncobj : NULL);
>                 if (r)
>                         break;
>                 ibs_request++;
> +               if (has_syncobj)
> +                       ibs_syncobj++;
>         }
>
>         return r;
>  }
>
> +int amdgpu_cs_submit(amdgpu_context_handle context,
> +                    uint64_t flags,
> +                    struct amdgpu_cs_request *ibs_request,
> +                    uint32_t number_of_requests)
> +{
> +       return amdgpu_cs_submit_syncobj(context, flags,
> +                                       ibs_request, NULL,
> +                                       number_of_requests);
> +}
> +
> +
>  /**
>   * Calculate absolute timeout.
>   *
> @@ -596,3 +660,41 @@ int amdgpu_cs_destroy_semaphore(amdgpu_semaphore_handle sem)
>  {
>         return amdgpu_cs_unreference_sem(sem);
>  }
> +
> +int amdgpu_cs_create_syncobj(amdgpu_device_handle dev,
> +                            uint32_t *handle)
> +{
> +       if (NULL == dev)
> +               return -EINVAL;
> +
> +       return drmSyncobjCreate(dev->fd, 0, handle);
> +}
> +
> +int amdgpu_cs_destroy_syncobj(amdgpu_device_handle dev,
> +                             uint32_t handle)
> +{
> +       if (NULL == dev)
> +               return -EINVAL;
> +
> +       return drmSyncobjDestroy(dev->fd, handle);
> +}
> +
> +int amdgpu_cs_export_syncobj(amdgpu_device_handle dev,
> +                            uint32_t handle,
> +                            int *shared_fd)
> +{
> +       if (NULL == dev)
> +               return -EINVAL;
> +
> +       return drmSyncobjHandleToFD(dev->fd, handle, shared_fd);
> +}
> +
> +int amdgpu_cs_import_syncobj(amdgpu_device_handle dev,
> +                            int shared_fd,
> +                            uint32_t *handle)
> +{
> +       if (NULL == dev)
> +               return -EINVAL;
> +
> +       return drmSyncobjFDToHandle(dev->fd, shared_fd, handle);
> +}
> --
> 2.9.4
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Hi Dave,

on first glance that looks rather good to me, but there is one things I 
don't really like and I strongly think Marek will absolutely agree on 
that: When we add a new CS function then let's get ride of all this 
abstraction!

The new function should get an amdgpu_device_handle and a list of chunks 
to submit, nothing else.

When then provide helper functions to generate the chunks out of the 
existing amdgpu_context_handle and amdgpu_bo_list_handle.

That should be perfectly sufficient and extensible for future additions 
as well.

Regards,
Christian.

Am 07.07.2017 um 00:19 schrieb Dave Airlie:
> Chrstian,
>
> you are probably the best person to ack this, I'd like to get the radv
> code landed
> and allow the GL code to get going.
>
> Dave.
>
>> This adds kernel semaphore support to the command submission
>> interface in what should be a backwards compatible manner,
>> it adds a new command submission API.
>>
>> Signed-off-by: Dave Airlie <airlied@redhat.com>
>> ---
>>   amdgpu/amdgpu.h    |  29 ++++++++++++-
>>   amdgpu/amdgpu_cs.c | 118 +++++++++++++++++++++++++++++++++++++++++++++++++----
>>   2 files changed, 138 insertions(+), 9 deletions(-)
>>
>> diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h
>> index 1901fa8..649b66e 100644
>> --- a/amdgpu/amdgpu.h
>> +++ b/amdgpu/amdgpu.h
>> @@ -369,6 +369,16 @@ struct amdgpu_cs_request {
>>          struct amdgpu_cs_fence_info fence_info;
>>   };
>>
>> +struct amdgpu_cs_request_syncobj {
>> +       /*
>> +        *
>> +        */
>> +       uint32_t number_in_syncobj;
>> +       uint32_t number_out_syncobj;
>> +       uint32_t *in_syncobj;
>> +       uint32_t *out_syncobj;
>> +};
>> +
>>   /**
>>    * Structure which provide information about GPU VM MC Address space
>>    * alignments requirements
>> @@ -886,6 +896,12 @@ int amdgpu_cs_submit(amdgpu_context_handle context,
>>                       struct amdgpu_cs_request *ibs_request,
>>                       uint32_t number_of_requests);
>>
>> +int amdgpu_cs_submit_syncobj(amdgpu_context_handle context,
>> +                            uint64_t flags,
>> +                            struct amdgpu_cs_request *ibs_request,
>> +                            struct amdgpu_cs_request_syncobj *ibs_syncobj,
>> +                            uint32_t number_of_requests);
>> +
>>   /**
>>    *  Query status of Command Buffer Submission
>>    *
>> @@ -1328,8 +1344,19 @@ int amdgpu_cs_destroy_semaphore(amdgpu_semaphore_handle sem);
>>   */
>>   const char *amdgpu_get_marketing_name(amdgpu_device_handle dev);
>>
>> +
>> +int amdgpu_cs_create_syncobj(amdgpu_device_handle dev,
>> +                            uint32_t *syncobj);
>> +int amdgpu_cs_export_syncobj(amdgpu_device_handle dev,
>> +                            uint32_t syncobj,
>> +                            int *shared_fd);
>> +int amdgpu_cs_import_syncobj(amdgpu_device_handle dev,
>> +                            int shared_fd,
>> +                            uint32_t *syncobj);
>> +int amdgpu_cs_destroy_syncobj(amdgpu_device_handle dev,
>> +                             uint32_t syncobj);
>> +
>>   #ifdef __cplusplus
>>   }
>>   #endif
>> -
>>   #endif /* #ifdef _AMDGPU_H_ */
>> diff --git a/amdgpu/amdgpu_cs.c b/amdgpu/amdgpu_cs.c
>> index 868eb7b..339c5f9 100644
>> --- a/amdgpu/amdgpu_cs.c
>> +++ b/amdgpu/amdgpu_cs.c
>> @@ -168,7 +168,8 @@ int amdgpu_cs_query_reset_state(amdgpu_context_handle context,
>>    * \sa amdgpu_cs_submit()
>>   */
>>   static int amdgpu_cs_submit_one(amdgpu_context_handle context,
>> -                               struct amdgpu_cs_request *ibs_request)
>> +                               struct amdgpu_cs_request *ibs_request,
>> +                               struct amdgpu_cs_request_syncobj *syncobj_request)
>>   {
>>          union drm_amdgpu_cs cs;
>>          uint64_t *chunk_array;
>> @@ -176,10 +177,13 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
>>          struct drm_amdgpu_cs_chunk_data *chunk_data;
>>          struct drm_amdgpu_cs_chunk_dep *dependencies = NULL;
>>          struct drm_amdgpu_cs_chunk_dep *sem_dependencies = NULL;
>> +       struct drm_amdgpu_cs_chunk_sem *in_syncobj_dependencies = NULL;
>> +       struct drm_amdgpu_cs_chunk_sem *out_syncobj_dependencies = NULL;
>>          struct list_head *sem_list;
>>          amdgpu_semaphore_handle sem, tmp;
>> -       uint32_t i, size, sem_count = 0;
>> +       uint32_t i, j, size, sem_count = 0;
>>          bool user_fence;
>> +       uint32_t sem_size = 0;
>>          int r = 0;
>>
>>          if (ibs_request->ip_type >= AMDGPU_HW_IP_NUM)
>> @@ -194,7 +198,11 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
>>          }
>>          user_fence = (ibs_request->fence_info.handle != NULL);
>>
>> -       size = ibs_request->number_of_ibs + (user_fence ? 2 : 1) + 1;
>> +       if (syncobj_request) {
>> +               sem_size += syncobj_request->number_in_syncobj ? 1 : 0;
>> +               sem_size += syncobj_request->number_out_syncobj ? 1 : 0;
>> +       }
>> +       size = ibs_request->number_of_ibs + (user_fence ? 2 : 1) + 1 + sem_size;
>>
>>          chunk_array = alloca(sizeof(uint64_t) * size);
>>          chunks = alloca(sizeof(struct drm_amdgpu_cs_chunk) * size);
>> @@ -306,6 +314,45 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
>>                  chunks[i].chunk_data = (uint64_t)(uintptr_t)sem_dependencies;
>>          }
>>
>> +       if (syncobj_request) {
>> +               if (syncobj_request->number_in_syncobj) {
>> +                       in_syncobj_dependencies = malloc(sizeof(struct drm_amdgpu_cs_chunk_sem) * syncobj_request->number_in_syncobj);
>> +                       if (!in_syncobj_dependencies) {
>> +                               r = -ENOMEM;
>> +                               goto error_unlock;
>> +                       }
>> +                       for (j = 0; j < syncobj_request->number_in_syncobj; j++) {
>> +                               struct drm_amdgpu_cs_chunk_sem *dep = &in_syncobj_dependencies[j];
>> +                               dep->handle = syncobj_request->in_syncobj[j];
>> +                       }
>> +                       i = cs.in.num_chunks++;
>> +
>> +                       /* dependencies chunk */
>> +                       chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
>> +                       chunks[i].chunk_id = AMDGPU_CHUNK_ID_SYNCOBJ_IN;
>> +                       chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_sem) / 4 * syncobj_request->number_in_syncobj;
>> +                       chunks[i].chunk_data = (uint64_t)(uintptr_t)in_syncobj_dependencies;
>> +               }
>> +               if (syncobj_request->number_out_syncobj) {
>> +                       out_syncobj_dependencies = malloc(sizeof(struct drm_amdgpu_cs_chunk_sem) * syncobj_request->number_out_syncobj);
>> +                       if (!out_syncobj_dependencies) {
>> +                               r = -ENOMEM;
>> +                               goto error_unlock;
>> +                       }
>> +                       for (j = 0; j < syncobj_request->number_out_syncobj; j++) {
>> +                               struct drm_amdgpu_cs_chunk_sem *dep = &out_syncobj_dependencies[j];
>> +                               dep->handle = syncobj_request->out_syncobj[j];
>> +                       }
>> +                       i = cs.in.num_chunks++;
>> +
>> +                       /* dependencies chunk */
>> +                       chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
>> +                       chunks[i].chunk_id = AMDGPU_CHUNK_ID_SYNCOBJ_OUT;
>> +                       chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_sem) / 4 * syncobj_request->number_out_syncobj;
>> +                       chunks[i].chunk_data = (uint64_t)(uintptr_t)out_syncobj_dependencies;
>> +               }
>> +       }
>> +
>>          r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CS,
>>                                  &cs, sizeof(cs));
>>          if (r)
>> @@ -317,31 +364,48 @@ error_unlock:
>>          pthread_mutex_unlock(&context->sequence_mutex);
>>          free(dependencies);
>>          free(sem_dependencies);
>> +       free(in_syncobj_dependencies);
>> +       free(out_syncobj_dependencies);
>>          return r;
>>   }
>>
>> -int amdgpu_cs_submit(amdgpu_context_handle context,
>> -                    uint64_t flags,
>> -                    struct amdgpu_cs_request *ibs_request,
>> -                    uint32_t number_of_requests)
>> +int amdgpu_cs_submit_syncobj(amdgpu_context_handle context,
>> +                            uint64_t flags,
>> +                            struct amdgpu_cs_request *ibs_request,
>> +                            struct amdgpu_cs_request_syncobj *ibs_syncobj,
>> +                            uint32_t number_of_requests)
>>   {
>>          uint32_t i;
>>          int r;
>> +       bool has_syncobj = ibs_syncobj ? true : false;
>>
>>          if (!context || !ibs_request)
>>                  return -EINVAL;
>>
>>          r = 0;
>>          for (i = 0; i < number_of_requests; i++) {
>> -               r = amdgpu_cs_submit_one(context, ibs_request);
>> +               r = amdgpu_cs_submit_one(context, ibs_request, has_syncobj ? ibs_syncobj : NULL);
>>                  if (r)
>>                          break;
>>                  ibs_request++;
>> +               if (has_syncobj)
>> +                       ibs_syncobj++;
>>          }
>>
>>          return r;
>>   }
>>
>> +int amdgpu_cs_submit(amdgpu_context_handle context,
>> +                    uint64_t flags,
>> +                    struct amdgpu_cs_request *ibs_request,
>> +                    uint32_t number_of_requests)
>> +{
>> +       return amdgpu_cs_submit_syncobj(context, flags,
>> +                                       ibs_request, NULL,
>> +                                       number_of_requests);
>> +}
>> +
>> +
>>   /**
>>    * Calculate absolute timeout.
>>    *
>> @@ -596,3 +660,41 @@ int amdgpu_cs_destroy_semaphore(amdgpu_semaphore_handle sem)
>>   {
>>          return amdgpu_cs_unreference_sem(sem);
>>   }
>> +
>> +int amdgpu_cs_create_syncobj(amdgpu_device_handle dev,
>> +                            uint32_t *handle)
>> +{
>> +       if (NULL == dev)
>> +               return -EINVAL;
>> +
>> +       return drmSyncobjCreate(dev->fd, 0, handle);
>> +}
>> +
>> +int amdgpu_cs_destroy_syncobj(amdgpu_device_handle dev,
>> +                             uint32_t handle)
>> +{
>> +       if (NULL == dev)
>> +               return -EINVAL;
>> +
>> +       return drmSyncobjDestroy(dev->fd, handle);
>> +}
>> +
>> +int amdgpu_cs_export_syncobj(amdgpu_device_handle dev,
>> +                            uint32_t handle,
>> +                            int *shared_fd)
>> +{
>> +       if (NULL == dev)
>> +               return -EINVAL;
>> +
>> +       return drmSyncobjHandleToFD(dev->fd, handle, shared_fd);
>> +}
>> +
>> +int amdgpu_cs_import_syncobj(amdgpu_device_handle dev,
>> +                            int shared_fd,
>> +                            uint32_t *handle)
>> +{
>> +       if (NULL == dev)
>> +               return -EINVAL;
>> +
>> +       return drmSyncobjFDToHandle(dev->fd, shared_fd, handle);
>> +}
>> --
>> 2.9.4
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
On 7 July 2017 at 19:07, Christian König <deathsimple@vodafone.de> wrote:
> Hi Dave,
>
> on first glance that looks rather good to me, but there is one things I
> don't really like and I strongly think Marek will absolutely agree on that:
> When we add a new CS function then let's get ride of all this abstraction!
>
> The new function should get an amdgpu_device_handle and a list of chunks to
> submit, nothing else.
>
> When then provide helper functions to generate the chunks out of the
> existing amdgpu_context_handle and amdgpu_bo_list_handle.
>
> That should be perfectly sufficient and extensible for future additions as
> well.

Sounds tempting, but it a bit messier than it looks once I started
digging into it.

The main things I ran up against is the context sequence mutex protecting the
kernel submissions per context which would be tricky to figure out why that is
required (should we be submitting from different contexts on different threads?)

I'd prefer to land this then refactor a new interface, I do wonder if
maybe Marek
would prefer just doing this all in Mesa and avoiding these APIs a bit more :-)

Once I get the syncobjs in I might look at internally refactoring the
code a bit more,
then a new API.

Dave.
Am 11.07.2017 um 08:49 schrieb Dave Airlie:
> On 7 July 2017 at 19:07, Christian König <deathsimple@vodafone.de> wrote:
>> Hi Dave,
>>
>> on first glance that looks rather good to me, but there is one things I
>> don't really like and I strongly think Marek will absolutely agree on that:
>> When we add a new CS function then let's get ride of all this abstraction!
>>
>> The new function should get an amdgpu_device_handle and a list of chunks to
>> submit, nothing else.
>>
>> When then provide helper functions to generate the chunks out of the
>> existing amdgpu_context_handle and amdgpu_bo_list_handle.
>>
>> That should be perfectly sufficient and extensible for future additions as
>> well.
> Sounds tempting, but it a bit messier than it looks once I started
> digging into it.
>
> The main things I ran up against is the context sequence mutex protecting the
> kernel submissions per context which would be tricky to figure out why that is
> required (should we be submitting from different contexts on different threads?)

The sequence lock is just to keep last_seq up to date and last_seq just 
exists because of amdgpu_cs_signal_semaphore.

We want to get ride of that, so you can drop support for this altogether 
in the new IOCTL.

> I'd prefer to land this then refactor a new interface, I do wonder if
> maybe Marek
> would prefer just doing this all in Mesa and avoiding these APIs a bit more :-)
>
> Once I get the syncobjs in I might look at internally refactoring the
> code a bit more,
> then a new API.

Actually I wanted to propose just to remove the old semaphore API, it 
was never used by Mesa or any other open source user.

So we could get rid of it without breaking anybody. Marek what do you think?

Regards,
Christian.

>
> Dave.
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
On 11 July 2017 at 18:36, Christian König <deathsimple@vodafone.de> wrote:
> Am 11.07.2017 um 08:49 schrieb Dave Airlie:
>>
>> On 7 July 2017 at 19:07, Christian König <deathsimple@vodafone.de> wrote:
>>>
>>> Hi Dave,
>>>
>>> on first glance that looks rather good to me, but there is one things I
>>> don't really like and I strongly think Marek will absolutely agree on
>>> that:
>>> When we add a new CS function then let's get ride of all this
>>> abstraction!
>>>
>>> The new function should get an amdgpu_device_handle and a list of chunks
>>> to
>>> submit, nothing else.
>>>
>>> When then provide helper functions to generate the chunks out of the
>>> existing amdgpu_context_handle and amdgpu_bo_list_handle.
>>>
>>> That should be perfectly sufficient and extensible for future additions
>>> as
>>> well.
>>
>> Sounds tempting, but it a bit messier than it looks once I started
>> digging into it.
>>
>> The main things I ran up against is the context sequence mutex protecting
>> the
>> kernel submissions per context which would be tricky to figure out why
>> that is
>> required (should we be submitting from different contexts on different
>> threads?)
>
>
> The sequence lock is just to keep last_seq up to date and last_seq just
> exists because of amdgpu_cs_signal_semaphore.
>
> We want to get ride of that, so you can drop support for this altogether in
> the new IOCTL.
>
>> I'd prefer to land this then refactor a new interface, I do wonder if
>> maybe Marek
>> would prefer just doing this all in Mesa and avoiding these APIs a bit
>> more :-)
>>
>> Once I get the syncobjs in I might look at internally refactoring the
>> code a bit more,
>> then a new API.
>
>
> Actually I wanted to propose just to remove the old semaphore API, it was
> never used by Mesa or any other open source user.

radv uses it right now until we have syncobjs.

So it should hang around.

Dave.
Am 11.07.2017 um 11:20 schrieb Dave Airlie:
> On 11 July 2017 at 18:36, Christian König <deathsimple@vodafone.de> wrote:
>> Am 11.07.2017 um 08:49 schrieb Dave Airlie:
>>> On 7 July 2017 at 19:07, Christian König <deathsimple@vodafone.de> wrote:
>>>> Hi Dave,
>>>>
>>>> on first glance that looks rather good to me, but there is one things I
>>>> don't really like and I strongly think Marek will absolutely agree on
>>>> that:
>>>> When we add a new CS function then let's get ride of all this
>>>> abstraction!
>>>>
>>>> The new function should get an amdgpu_device_handle and a list of chunks
>>>> to
>>>> submit, nothing else.
>>>>
>>>> When then provide helper functions to generate the chunks out of the
>>>> existing amdgpu_context_handle and amdgpu_bo_list_handle.
>>>>
>>>> That should be perfectly sufficient and extensible for future additions
>>>> as
>>>> well.
>>> Sounds tempting, but it a bit messier than it looks once I started
>>> digging into it.
>>>
>>> The main things I ran up against is the context sequence mutex protecting
>>> the
>>> kernel submissions per context which would be tricky to figure out why
>>> that is
>>> required (should we be submitting from different contexts on different
>>> threads?)
>>
>> The sequence lock is just to keep last_seq up to date and last_seq just
>> exists because of amdgpu_cs_signal_semaphore.
>>
>> We want to get ride of that, so you can drop support for this altogether in
>> the new IOCTL.
>>
>>> I'd prefer to land this then refactor a new interface, I do wonder if
>>> maybe Marek
>>> would prefer just doing this all in Mesa and avoiding these APIs a bit
>>> more :-)
>>>
>>> Once I get the syncobjs in I might look at internally refactoring the
>>> code a bit more,
>>> then a new API.
>>
>> Actually I wanted to propose just to remove the old semaphore API, it was
>> never used by Mesa or any other open source user.
> radv uses it right now until we have syncobjs.

Ah, crap. Ok in this case we can never remove it.

Anyway, the new CS IOCTL shouldn't need any support for this any more.

Just basic submission of chunks and filling in chunks from libdrm 
objects should be enough as far as I can see.

If Marek doesn't have time or doesn't want to take care of it I can send 
a patch if you want.

Christian.

>
> So it should hang around.
>
> Dave.
On Tue, Jul 11, 2017 at 11:20 AM, Dave Airlie <airlied@gmail.com> wrote:
> On 11 July 2017 at 18:36, Christian König <deathsimple@vodafone.de> wrote:
>> Am 11.07.2017 um 08:49 schrieb Dave Airlie:
>>>
>>> On 7 July 2017 at 19:07, Christian König <deathsimple@vodafone.de> wrote:
>>>>
>>>> Hi Dave,
>>>>
>>>> on first glance that looks rather good to me, but there is one things I
>>>> don't really like and I strongly think Marek will absolutely agree on
>>>> that:
>>>> When we add a new CS function then let's get ride of all this
>>>> abstraction!
>>>>
>>>> The new function should get an amdgpu_device_handle and a list of chunks
>>>> to
>>>> submit, nothing else.
>>>>
>>>> When then provide helper functions to generate the chunks out of the
>>>> existing amdgpu_context_handle and amdgpu_bo_list_handle.
>>>>
>>>> That should be perfectly sufficient and extensible for future additions
>>>> as
>>>> well.
>>>
>>> Sounds tempting, but it a bit messier than it looks once I started
>>> digging into it.
>>>
>>> The main things I ran up against is the context sequence mutex protecting
>>> the
>>> kernel submissions per context which would be tricky to figure out why
>>> that is
>>> required (should we be submitting from different contexts on different
>>> threads?)
>>
>>
>> The sequence lock is just to keep last_seq up to date and last_seq just
>> exists because of amdgpu_cs_signal_semaphore.
>>
>> We want to get ride of that, so you can drop support for this altogether in
>> the new IOCTL.
>>
>>> I'd prefer to land this then refactor a new interface, I do wonder if
>>> maybe Marek
>>> would prefer just doing this all in Mesa and avoiding these APIs a bit
>>> more :-)
>>>
>>> Once I get the syncobjs in I might look at internally refactoring the
>>> code a bit more,
>>> then a new API.
>>
>>
>> Actually I wanted to propose just to remove the old semaphore API, it was
>> never used by Mesa or any other open source user.
>
> radv uses it right now until we have syncobjs.
>
> So it should hang around.

Dave, this may sound outrageous, but think about it: What would happen
if we removed the old semaphore API? Users would have to get a new
RADV after getting new libdrm_amdgpu. Is that really that big of a
deal? It happens with LLVM all the time and we've managed to cope with
that just fine.

Marek
On 12 July 2017 at 03:47, Marek Olšák <maraeo@gmail.com> wrote:
> On Tue, Jul 11, 2017 at 11:20 AM, Dave Airlie <airlied@gmail.com> wrote:
>> On 11 July 2017 at 18:36, Christian König <deathsimple@vodafone.de> wrote:
>>> Am 11.07.2017 um 08:49 schrieb Dave Airlie:
>>>>
>>>> On 7 July 2017 at 19:07, Christian König <deathsimple@vodafone.de> wrote:
>>>>>
>>>>> Hi Dave,
>>>>>
>>>>> on first glance that looks rather good to me, but there is one things I
>>>>> don't really like and I strongly think Marek will absolutely agree on
>>>>> that:
>>>>> When we add a new CS function then let's get ride of all this
>>>>> abstraction!
>>>>>
>>>>> The new function should get an amdgpu_device_handle and a list of chunks
>>>>> to
>>>>> submit, nothing else.
>>>>>
>>>>> When then provide helper functions to generate the chunks out of the
>>>>> existing amdgpu_context_handle and amdgpu_bo_list_handle.
>>>>>
>>>>> That should be perfectly sufficient and extensible for future additions
>>>>> as
>>>>> well.
>>>>
>>>> Sounds tempting, but it a bit messier than it looks once I started
>>>> digging into it.
>>>>
>>>> The main things I ran up against is the context sequence mutex protecting
>>>> the
>>>> kernel submissions per context which would be tricky to figure out why
>>>> that is
>>>> required (should we be submitting from different contexts on different
>>>> threads?)
>>>
>>>
>>> The sequence lock is just to keep last_seq up to date and last_seq just
>>> exists because of amdgpu_cs_signal_semaphore.
>>>
>>> We want to get ride of that, so you can drop support for this altogether in
>>> the new IOCTL.
>>>
>>>> I'd prefer to land this then refactor a new interface, I do wonder if
>>>> maybe Marek
>>>> would prefer just doing this all in Mesa and avoiding these APIs a bit
>>>> more :-)
>>>>
>>>> Once I get the syncobjs in I might look at internally refactoring the
>>>> code a bit more,
>>>> then a new API.
>>>
>>>
>>> Actually I wanted to propose just to remove the old semaphore API, it was
>>> never used by Mesa or any other open source user.
>>
>> radv uses it right now until we have syncobjs.
>>
>> So it should hang around.
>
> Dave, this may sound outrageous, but think about it: What would happen
> if we removed the old semaphore API? Users would have to get a new
> RADV after getting new libdrm_amdgpu. Is that really that big of a
> deal? It happens with LLVM all the time and we've managed to cope with
> that just fine.

Cope is the word, I don't think adopting unstable library APIs is an
option after you've provided stable library APIs.

LLVM is an outlier and totally the opposite of where we'd want to head.

I've considered not bothering using kernel syncobjs for non-shared
semaphores for a while, so we don't require a new kernel just to
provide a feature we already provide.

Dave.
On 11 July 2017 at 19:32, Christian König <deathsimple@vodafone.de> wrote:
> Am 11.07.2017 um 11:20 schrieb Dave Airlie:
>>
>> On 11 July 2017 at 18:36, Christian König <deathsimple@vodafone.de> wrote:
>>>
>>> Am 11.07.2017 um 08:49 schrieb Dave Airlie:
>>>>
>>>> On 7 July 2017 at 19:07, Christian König <deathsimple@vodafone.de>
>>>> wrote:
>>>>>
>>>>> Hi Dave,
>>>>>
>>>>> on first glance that looks rather good to me, but there is one things I
>>>>> don't really like and I strongly think Marek will absolutely agree on
>>>>> that:
>>>>> When we add a new CS function then let's get ride of all this
>>>>> abstraction!
>>>>>
>>>>> The new function should get an amdgpu_device_handle and a list of
>>>>> chunks
>>>>> to
>>>>> submit, nothing else.
>>>>>
>>>>> When then provide helper functions to generate the chunks out of the
>>>>> existing amdgpu_context_handle and amdgpu_bo_list_handle.
>>>>>
>>>>> That should be perfectly sufficient and extensible for future additions
>>>>> as
>>>>> well.
>>>>
>>>> Sounds tempting, but it a bit messier than it looks once I started
>>>> digging into it.
>>>>
>>>> The main things I ran up against is the context sequence mutex
>>>> protecting
>>>> the
>>>> kernel submissions per context which would be tricky to figure out why
>>>> that is
>>>> required (should we be submitting from different contexts on different
>>>> threads?)
>>>
>>>
>>> The sequence lock is just to keep last_seq up to date and last_seq just
>>> exists because of amdgpu_cs_signal_semaphore.
>>>
>>> We want to get ride of that, so you can drop support for this altogether
>>> in
>>> the new IOCTL.
>>>
>>>> I'd prefer to land this then refactor a new interface, I do wonder if
>>>> maybe Marek
>>>> would prefer just doing this all in Mesa and avoiding these APIs a bit
>>>> more :-)
>>>>
>>>> Once I get the syncobjs in I might look at internally refactoring the
>>>> code a bit more,
>>>> then a new API.
>>>
>>>
>>> Actually I wanted to propose just to remove the old semaphore API, it was
>>> never used by Mesa or any other open source user.
>>
>> radv uses it right now until we have syncobjs.
>
>
> Ah, crap. Ok in this case we can never remove it.
>
> Anyway, the new CS IOCTL shouldn't need any support for this any more.
>
> Just basic submission of chunks and filling in chunks from libdrm objects
> should be enough as far as I can see.
>
> If Marek doesn't have time or doesn't want to take care of it I can send a
> patch if you want.

I can take a look at it, I just won't have time until next week most likely.

Dave.
>
> I can take a look at it, I just won't have time until next week most likely.

I've taken a look, and it's seemingly more complicated than I'm
expecting I'd want to land in Mesa before 17.2 ships, I'd really
prefer to just push the new libdrm_amdgpu api from this patch. If I
have to port all the current radv code to the new API, I'll most
definitely get something wrong.

Adding the new API so far looks like
https://cgit.freedesktop.org/~airlied/drm/log/?h=drm-amdgpu-cs-submit-raw

https://cgit.freedesktop.org/~airlied/drm/commit/?h=drm-amdgpu-cs-submit-raw&id=e7f85d0ca617fa41e72624780c9035df132e23c4
being the API, and whether it should take a uint32_t context id or
context handle left as an open question in the last patch in the
series.

However to hook this into radv or radeonsi will take a bit of
rewriting of a lot of code that is probably a bit more fragile than
I'd like for this sort of surgery at this point.

I'd actually suspect if we do want to proceed with this type of
interface, we might be better doing it all in common mesa code, and
maybe bypassing libdrm_amdgpu altogether, which I suppose the API I've
written here is mostly already doing.

Dave.
Am 17.07.2017 um 05:36 schrieb Dave Airlie:
>> I can take a look at it, I just won't have time until next week most likely.
> I've taken a look, and it's seemingly more complicated than I'm
> expecting I'd want to land in Mesa before 17.2 ships, I'd really
> prefer to just push the new libdrm_amdgpu api from this patch. If I
> have to port all the current radv code to the new API, I'll most
> definitely get something wrong.
>
> Adding the new API so far looks like
> https://cgit.freedesktop.org/~airlied/drm/log/?h=drm-amdgpu-cs-submit-raw
>
> https://cgit.freedesktop.org/~airlied/drm/commit/?h=drm-amdgpu-cs-submit-raw&id=e7f85d0ca617fa41e72624780c9035df132e23c4
> being the API, and whether it should take a uint32_t context id or
> context handle left as an open question in the last patch in the
> series.

I would stick with the context handle, as far as I can see there isn't 
any value in using the uint32_t for this.

We just want to be able to send arbitrary chunks down into the kernel 
without libdrm_amdgpu involvement and/or the associated overhead of the 
extra loop and the semaphore handling.

So your "amdgpu/cs: add new raw cs submission interface just taking 
chunks" patch looks fine to me as far as I can tell.

As far as I can see the "amdgpu: refactor semaphore handling" patch is 
actually incorrect. We must hole the mutex while sending the CS down to 
the kernel, or otherwise "context->last_seq" won't be accurate.

> However to hook this into radv or radeonsi will take a bit of
> rewriting of a lot of code that is probably a bit more fragile than
> I'd like for this sort of surgery at this point.

Again, I can move over the existing Mesa stuff if you like.

> I'd actually suspect if we do want to proceed with this type of
> interface, we might be better doing it all in common mesa code, and
> maybe bypassing libdrm_amdgpu altogether, which I suppose the API I've
> written here is mostly already doing.

I want to stick with the other interfaces for now. No need to make it 
more complicated than it already is.

Only the CS stuff is the most performance critical and thing we have 
right now.

Christian.
On Sun, Jul 16, 2017 at 11:36 PM, Dave Airlie <airlied@gmail.com> wrote:
>>
>> I can take a look at it, I just won't have time until next week most likely.
>
> I've taken a look, and it's seemingly more complicated than I'm
> expecting I'd want to land in Mesa before 17.2 ships, I'd really
> prefer to just push the new libdrm_amdgpu api from this patch. If I
> have to port all the current radv code to the new API, I'll most
> definitely get something wrong.
>
> Adding the new API so far looks like
> https://cgit.freedesktop.org/~airlied/drm/log/?h=drm-amdgpu-cs-submit-raw
>
> https://cgit.freedesktop.org/~airlied/drm/commit/?h=drm-amdgpu-cs-submit-raw&id=e7f85d0ca617fa41e72624780c9035df132e23c4
> being the API, and whether it should take a uint32_t context id or
> context handle left as an open question in the last patch in the
> series.
>
> However to hook this into radv or radeonsi will take a bit of
> rewriting of a lot of code that is probably a bit more fragile than
> I'd like for this sort of surgery at this point.
>
> I'd actually suspect if we do want to proceed with this type of
> interface, we might be better doing it all in common mesa code, and
> maybe bypassing libdrm_amdgpu altogether, which I suppose the API I've
> written here is mostly already doing.

Well, we plan to stop using the BO list ioctl. The interface has
bo_list_handle in it. Will we just set it to 0 when add the chunk for
the inlined buffer list i.e. what radeon has?

Marek
Am 17.07.2017 um 19:22 schrieb Marek Olšák:
> On Sun, Jul 16, 2017 at 11:36 PM, Dave Airlie <airlied@gmail.com> wrote:
>>> I can take a look at it, I just won't have time until next week most likely.
>> I've taken a look, and it's seemingly more complicated than I'm
>> expecting I'd want to land in Mesa before 17.2 ships, I'd really
>> prefer to just push the new libdrm_amdgpu api from this patch. If I
>> have to port all the current radv code to the new API, I'll most
>> definitely get something wrong.
>>
>> Adding the new API so far looks like
>> https://cgit.freedesktop.org/~airlied/drm/log/?h=drm-amdgpu-cs-submit-raw
>>
>> https://cgit.freedesktop.org/~airlied/drm/commit/?h=drm-amdgpu-cs-submit-raw&id=e7f85d0ca617fa41e72624780c9035df132e23c4
>> being the API, and whether it should take a uint32_t context id or
>> context handle left as an open question in the last patch in the
>> series.
>>
>> However to hook this into radv or radeonsi will take a bit of
>> rewriting of a lot of code that is probably a bit more fragile than
>> I'd like for this sort of surgery at this point.
>>
>> I'd actually suspect if we do want to proceed with this type of
>> interface, we might be better doing it all in common mesa code, and
>> maybe bypassing libdrm_amdgpu altogether, which I suppose the API I've
>> written here is mostly already doing.
> Well, we plan to stop using the BO list ioctl. The interface has
> bo_list_handle in it. Will we just set it to 0 when add the chunk for
> the inlined buffer list i.e. what radeon has?

Yeah, exactly that was my thinking as well.

Christian.

> Marek
On 2017年07月18日 01:35, Christian König wrote:
> Am 17.07.2017 um 19:22 schrieb Marek Olšák:
>> On Sun, Jul 16, 2017 at 11:36 PM, Dave Airlie <airlied@gmail.com> wrote:
>>>> I can take a look at it, I just won't have time until next week 
>>>> most likely.
>>> I've taken a look, and it's seemingly more complicated than I'm
>>> expecting I'd want to land in Mesa before 17.2 ships, I'd really
>>> prefer to just push the new libdrm_amdgpu api from this patch. If I
>>> have to port all the current radv code to the new API, I'll most
>>> definitely get something wrong.
>>>
>>> Adding the new API so far looks like
>>> https://cgit.freedesktop.org/~airlied/drm/log/?h=drm-amdgpu-cs-submit-raw 
>>>
>>>
>>> https://cgit.freedesktop.org/~airlied/drm/commit/?h=drm-amdgpu-cs-submit-raw&id=e7f85d0ca617fa41e72624780c9035df132e23c4 
>>>
>>> being the API, and whether it should take a uint32_t context id or
>>> context handle left as an open question in the last patch in the
>>> series.
>>>
>>> However to hook this into radv or radeonsi will take a bit of
>>> rewriting of a lot of code that is probably a bit more fragile than
>>> I'd like for this sort of surgery at this point.
>>>
>>> I'd actually suspect if we do want to proceed with this type of
>>> interface, we might be better doing it all in common mesa code, and
>>> maybe bypassing libdrm_amdgpu altogether, which I suppose the API I've
>>> written here is mostly already doing.
>> Well, we plan to stop using the BO list ioctl. The interface has
>> bo_list_handle in it. Will we just set it to 0 when add the chunk for
>> the inlined buffer list i.e. what radeon has?
>
> Yeah, exactly that was my thinking as well.
Just one thought, Could we remove and not use bo list at all? Instead, 
we expose api like amdgpu_bo_make_resident with proper privilege to user 
mode? That way, we will obviously short CS ioctl.

David Zhou
>
> Christian.
>
>> Marek
>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
On 18 July 2017 at 03:02, Christian König <deathsimple@vodafone.de> wrote:
> Am 17.07.2017 um 05:36 schrieb Dave Airlie:
>>>
>>> I can take a look at it, I just won't have time until next week most
>>> likely.
>>
>> I've taken a look, and it's seemingly more complicated than I'm
>> expecting I'd want to land in Mesa before 17.2 ships, I'd really
>> prefer to just push the new libdrm_amdgpu api from this patch. If I
>> have to port all the current radv code to the new API, I'll most
>> definitely get something wrong.
>>
>> Adding the new API so far looks like
>> https://cgit.freedesktop.org/~airlied/drm/log/?h=drm-amdgpu-cs-submit-raw
>>
>>
>> https://cgit.freedesktop.org/~airlied/drm/commit/?h=drm-amdgpu-cs-submit-raw&id=e7f85d0ca617fa41e72624780c9035df132e23c4
>> being the API, and whether it should take a uint32_t context id or
>> context handle left as an open question in the last patch in the
>> series.
>
>
> I would stick with the context handle, as far as I can see there isn't any
> value in using the uint32_t for this.
>
> We just want to be able to send arbitrary chunks down into the kernel
> without libdrm_amdgpu involvement and/or the associated overhead of the
> extra loop and the semaphore handling.
>
> So your "amdgpu/cs: add new raw cs submission interface just taking chunks"
> patch looks fine to me as far as I can tell.
>
> As far as I can see the "amdgpu: refactor semaphore handling" patch is
> actually incorrect. We must hole the mutex while sending the CS down to the
> kernel, or otherwise "context->last_seq" won't be accurate.
>
>> However to hook this into radv or radeonsi will take a bit of
>> rewriting of a lot of code that is probably a bit more fragile than
>> I'd like for this sort of surgery at this point.
>
>
> Again, I can move over the existing Mesa stuff if you like.
>
>> I'd actually suspect if we do want to proceed with this type of
>> interface, we might be better doing it all in common mesa code, and
>> maybe bypassing libdrm_amdgpu altogether, which I suppose the API I've
>> written here is mostly already doing.
>
>
> I want to stick with the other interfaces for now. No need to make it more
> complicated than it already is.
>
> Only the CS stuff is the most performance critical and thing we have right
> now.

As I suspected this plan is full of traps.

So with the raw cs api I posted (using amdgpu_bo_list_handle instead), I ran
into two places the abstraction cuts me.

  CC       winsys/amdgpu/radv_amdgpu_cs.lo
winsys/amdgpu/radv_amdgpu_cs.c: In function ‘radv_amdgpu_cs_submit’:
winsys/amdgpu/radv_amdgpu_cs.c:1173:63: error: dereferencing pointer
to incomplete type ‘struct amdgpu_bo’
   chunk_data[i].fence_data.handle = request->fence_info.handle->handle;
                                                               ^~
winsys/amdgpu/radv_amdgpu_cs.c:1193:31: error: dereferencing pointer
to incomplete type ‘struct amdgpu_context’
    dep->ctx_id = info->context->id;

In order to do user fence chunk I need the actual bo handle not the
amdgpu wrapped one, we don't have an accessor method for that.

In order to do the dependencies chunks, I need a context id.

Now I suppose I can add chunk creation helpers to libdrm, but it does
seems like it breaks the future proof interface if we can't access the
details of a bunch of objects we want to pass through to the kernel
API.

Dave.
Am 18.07.2017 um 04:29 schrieb zhoucm1:
>
>
> On 2017年07月18日 01:35, Christian König wrote:
>> Am 17.07.2017 um 19:22 schrieb Marek Olšák:
>>> On Sun, Jul 16, 2017 at 11:36 PM, Dave Airlie <airlied@gmail.com> 
>>> wrote:
>>>>> I can take a look at it, I just won't have time until next week 
>>>>> most likely.
>>>> I've taken a look, and it's seemingly more complicated than I'm
>>>> expecting I'd want to land in Mesa before 17.2 ships, I'd really
>>>> prefer to just push the new libdrm_amdgpu api from this patch. If I
>>>> have to port all the current radv code to the new API, I'll most
>>>> definitely get something wrong.
>>>>
>>>> Adding the new API so far looks like
>>>> https://cgit.freedesktop.org/~airlied/drm/log/?h=drm-amdgpu-cs-submit-raw 
>>>>
>>>>
>>>> https://cgit.freedesktop.org/~airlied/drm/commit/?h=drm-amdgpu-cs-submit-raw&id=e7f85d0ca617fa41e72624780c9035df132e23c4 
>>>>
>>>> being the API, and whether it should take a uint32_t context id or
>>>> context handle left as an open question in the last patch in the
>>>> series.
>>>>
>>>> However to hook this into radv or radeonsi will take a bit of
>>>> rewriting of a lot of code that is probably a bit more fragile than
>>>> I'd like for this sort of surgery at this point.
>>>>
>>>> I'd actually suspect if we do want to proceed with this type of
>>>> interface, we might be better doing it all in common mesa code, and
>>>> maybe bypassing libdrm_amdgpu altogether, which I suppose the API I've
>>>> written here is mostly already doing.
>>> Well, we plan to stop using the BO list ioctl. The interface has
>>> bo_list_handle in it. Will we just set it to 0 when add the chunk for
>>> the inlined buffer list i.e. what radeon has?
>>
>> Yeah, exactly that was my thinking as well.
> Just one thought, Could we remove and not use bo list at all? Instead, 
> we expose api like amdgpu_bo_make_resident with proper privilege to 
> user mode? That way, we will obviously short CS ioctl.

Not really, but I'm working on per process resources now. E.g. you 
specify a flag that a resource is always bound to the process and always 
used instead of specifying it every time.

The tricky part is that you then can't export that resource to other 
processes, so it only works for buffers/textures which aren't exchanged 
with anybody.

Regards,
Christian.

>
> David Zhou
>>
>> Christian.
>>
>>> Marek
>>
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Hi Dave,

If you just add "get" functions for what you need from amdgpu objects,
that should be fine.

Marek

On Mon, Jul 17, 2017 at 11:00 PM, Dave Airlie <airlied@gmail.com> wrote:
> On 18 July 2017 at 03:02, Christian König <deathsimple@vodafone.de> wrote:
>> Am 17.07.2017 um 05:36 schrieb Dave Airlie:
>>>>
>>>> I can take a look at it, I just won't have time until next week most
>>>> likely.
>>>
>>> I've taken a look, and it's seemingly more complicated than I'm
>>> expecting I'd want to land in Mesa before 17.2 ships, I'd really
>>> prefer to just push the new libdrm_amdgpu api from this patch. If I
>>> have to port all the current radv code to the new API, I'll most
>>> definitely get something wrong.
>>>
>>> Adding the new API so far looks like
>>> https://cgit.freedesktop.org/~airlied/drm/log/?h=drm-amdgpu-cs-submit-raw
>>>
>>>
>>> https://cgit.freedesktop.org/~airlied/drm/commit/?h=drm-amdgpu-cs-submit-raw&id=e7f85d0ca617fa41e72624780c9035df132e23c4
>>> being the API, and whether it should take a uint32_t context id or
>>> context handle left as an open question in the last patch in the
>>> series.
>>
>>
>> I would stick with the context handle, as far as I can see there isn't any
>> value in using the uint32_t for this.
>>
>> We just want to be able to send arbitrary chunks down into the kernel
>> without libdrm_amdgpu involvement and/or the associated overhead of the
>> extra loop and the semaphore handling.
>>
>> So your "amdgpu/cs: add new raw cs submission interface just taking chunks"
>> patch looks fine to me as far as I can tell.
>>
>> As far as I can see the "amdgpu: refactor semaphore handling" patch is
>> actually incorrect. We must hole the mutex while sending the CS down to the
>> kernel, or otherwise "context->last_seq" won't be accurate.
>>
>>> However to hook this into radv or radeonsi will take a bit of
>>> rewriting of a lot of code that is probably a bit more fragile than
>>> I'd like for this sort of surgery at this point.
>>
>>
>> Again, I can move over the existing Mesa stuff if you like.
>>
>>> I'd actually suspect if we do want to proceed with this type of
>>> interface, we might be better doing it all in common mesa code, and
>>> maybe bypassing libdrm_amdgpu altogether, which I suppose the API I've
>>> written here is mostly already doing.
>>
>>
>> I want to stick with the other interfaces for now. No need to make it more
>> complicated than it already is.
>>
>> Only the CS stuff is the most performance critical and thing we have right
>> now.
>
> As I suspected this plan is full of traps.
>
> So with the raw cs api I posted (using amdgpu_bo_list_handle instead), I ran
> into two places the abstraction cuts me.
>
>   CC       winsys/amdgpu/radv_amdgpu_cs.lo
> winsys/amdgpu/radv_amdgpu_cs.c: In function ‘radv_amdgpu_cs_submit’:
> winsys/amdgpu/radv_amdgpu_cs.c:1173:63: error: dereferencing pointer
> to incomplete type ‘struct amdgpu_bo’
>    chunk_data[i].fence_data.handle = request->fence_info.handle->handle;
>                                                                ^~
> winsys/amdgpu/radv_amdgpu_cs.c:1193:31: error: dereferencing pointer
> to incomplete type ‘struct amdgpu_context’
>     dep->ctx_id = info->context->id;
>
> In order to do user fence chunk I need the actual bo handle not the
> amdgpu wrapped one, we don't have an accessor method for that.
>
> In order to do the dependencies chunks, I need a context id.
>
> Now I suppose I can add chunk creation helpers to libdrm, but it does
> seems like it breaks the future proof interface if we can't access the
> details of a bunch of objects we want to pass through to the kernel
> API.
>
> Dave.
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
On 2017年07月18日 21:57, Christian König wrote:
> Am 18.07.2017 um 04:29 schrieb zhoucm1:
>>
>>
>> On 2017年07月18日 01:35, Christian König wrote:
>>> Am 17.07.2017 um 19:22 schrieb Marek Olšák:
>>>> On Sun, Jul 16, 2017 at 11:36 PM, Dave Airlie <airlied@gmail.com> 
>>>> wrote:
>>>>>> I can take a look at it, I just won't have time until next week 
>>>>>> most likely.
>>>>> I've taken a look, and it's seemingly more complicated than I'm
>>>>> expecting I'd want to land in Mesa before 17.2 ships, I'd really
>>>>> prefer to just push the new libdrm_amdgpu api from this patch. If I
>>>>> have to port all the current radv code to the new API, I'll most
>>>>> definitely get something wrong.
>>>>>
>>>>> Adding the new API so far looks like
>>>>> https://cgit.freedesktop.org/~airlied/drm/log/?h=drm-amdgpu-cs-submit-raw 
>>>>>
>>>>>
>>>>> https://cgit.freedesktop.org/~airlied/drm/commit/?h=drm-amdgpu-cs-submit-raw&id=e7f85d0ca617fa41e72624780c9035df132e23c4 
>>>>>
>>>>> being the API, and whether it should take a uint32_t context id or
>>>>> context handle left as an open question in the last patch in the
>>>>> series.
>>>>>
>>>>> However to hook this into radv or radeonsi will take a bit of
>>>>> rewriting of a lot of code that is probably a bit more fragile than
>>>>> I'd like for this sort of surgery at this point.
>>>>>
>>>>> I'd actually suspect if we do want to proceed with this type of
>>>>> interface, we might be better doing it all in common mesa code, and
>>>>> maybe bypassing libdrm_amdgpu altogether, which I suppose the API 
>>>>> I've
>>>>> written here is mostly already doing.
>>>> Well, we plan to stop using the BO list ioctl. The interface has
>>>> bo_list_handle in it. Will we just set it to 0 when add the chunk for
>>>> the inlined buffer list i.e. what radeon has?
>>>
>>> Yeah, exactly that was my thinking as well.
>> Just one thought, Could we remove and not use bo list at all? 
>> Instead, we expose api like amdgpu_bo_make_resident with proper 
>> privilege to user mode? That way, we will obviously short CS ioctl.
>
> Not really, but I'm working on per process resources now. E.g. you 
> specify a flag that a resource is always bound to the process and 
> always used instead of specifying it every time.
>
> The tricky part is that you then can't export that resource to other 
> processes, so it only works for buffers/textures which aren't 
> exchanged with anybody.
Yeah, Making bo resident obviously doesn't work for imported/foreign 
BOs, but imported BOs are always pinned when exported to others, aren't 
they?
Combining your per process resources, we can have a try, I think.

Regards,
David Zhou
>
> Regards,
> Christian.
>
>>
>> David Zhou
>>>
>>> Christian.
>>>
>>>> Marek
>>>
>>>
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
>
On 18/07/17 09:55 PM, zhoucm1 wrote:
> On 2017年07月18日 21:57, Christian König wrote:
>> Am 18.07.2017 um 04:29 schrieb zhoucm1:
>>> On 2017年07月18日 01:35, Christian König wrote:
>>>> Am 17.07.2017 um 19:22 schrieb Marek Olšák:
>>>>> On Sun, Jul 16, 2017 at 11:36 PM, Dave Airlie <airlied@gmail.com> 
>>>>> wrote:
>>>>>>> I can take a look at it, I just won't have time until next week 
>>>>>>> most likely.
>>>>>> I've taken a look, and it's seemingly more complicated than I'm
>>>>>> expecting I'd want to land in Mesa before 17.2 ships, I'd really
>>>>>> prefer to just push the new libdrm_amdgpu api from this patch. If I
>>>>>> have to port all the current radv code to the new API, I'll most
>>>>>> definitely get something wrong.
>>>>>>
>>>>>> Adding the new API so far looks like
>>>>>> https://cgit.freedesktop.org/~airlied/drm/log/?h=drm-amdgpu-cs-submit-raw 
>>>>>>
>>>>>>
>>>>>> https://cgit.freedesktop.org/~airlied/drm/commit/?h=drm-amdgpu-cs-submit-raw&id=e7f85d0ca617fa41e72624780c9035df132e23c4 
>>>>>>
>>>>>> being the API, and whether it should take a uint32_t context id or
>>>>>> context handle left as an open question in the last patch in the
>>>>>> series.
>>>>>>
>>>>>> However to hook this into radv or radeonsi will take a bit of
>>>>>> rewriting of a lot of code that is probably a bit more fragile than
>>>>>> I'd like for this sort of surgery at this point.
>>>>>>
>>>>>> I'd actually suspect if we do want to proceed with this type of
>>>>>> interface, we might be better doing it all in common mesa code, and
>>>>>> maybe bypassing libdrm_amdgpu altogether, which I suppose the API 
>>>>>> I've
>>>>>> written here is mostly already doing.
>>>>> Well, we plan to stop using the BO list ioctl. The interface has
>>>>> bo_list_handle in it. Will we just set it to 0 when add the chunk for
>>>>> the inlined buffer list i.e. what radeon has?
>>>>
>>>> Yeah, exactly that was my thinking as well.
>>> Just one thought, Could we remove and not use bo list at all? 
>>> Instead, we expose api like amdgpu_bo_make_resident with proper 
>>> privilege to user mode? That way, we will obviously short CS ioctl.
>>
>> Not really, but I'm working on per process resources now. E.g. you 
>> specify a flag that a resource is always bound to the process and 
>> always used instead of specifying it every time.
>>
>> The tricky part is that you then can't export that resource to other 
>> processes, so it only works for buffers/textures which aren't 
>> exchanged with anybody.
> Yeah, Making bo resident obviously doesn't work for imported/foreign 
> BOs, but imported BOs are always pinned when exported to others, aren't 
> they?

Only while being shared between different GPUs, not between different 
processes on the same GPU (e.g. DRI3). We'll still need to use some kind 
of BO list for that case.
On Thu, Jul 6, 2017 at 3:17 AM, Dave Airlie <airlied@gmail.com> wrote:
> From: Dave Airlie <airlied@redhat.com>
>
> This adds kernel semaphore support to the command submission
> interface in what should be a backwards compatible manner,
> it adds a new command submission API.
>
> Signed-off-by: Dave Airlie <airlied@redhat.com>
> ---
>  amdgpu/amdgpu.h    |  29 ++++++++++++-
>  amdgpu/amdgpu_cs.c | 118 +++++++++++++++++++++++++++++++++++++++++++++++++----
>  2 files changed, 138 insertions(+), 9 deletions(-)
>
> diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h
> index 1901fa8..649b66e 100644
> --- a/amdgpu/amdgpu.h
> +++ b/amdgpu/amdgpu.h
> @@ -369,6 +369,16 @@ struct amdgpu_cs_request {
>         struct amdgpu_cs_fence_info fence_info;
>  };
>
> +struct amdgpu_cs_request_syncobj {
> +       /*
> +        *
> +        */

Did you mean to fill in the comment here?

Also, is this interface relevant with the raw CS API?

Marek

> +       uint32_t number_in_syncobj;
> +       uint32_t number_out_syncobj;
> +       uint32_t *in_syncobj;
> +       uint32_t *out_syncobj;
On Fri, Sep 1, 2017 at 5:36 PM, Marek Olšák <maraeo@gmail.com> wrote:
> On Thu, Jul 6, 2017 at 3:17 AM, Dave Airlie <airlied@gmail.com> wrote:
>> From: Dave Airlie <airlied@redhat.com>
>>
>> This adds kernel semaphore support to the command submission
>> interface in what should be a backwards compatible manner,
>> it adds a new command submission API.
>>
>> Signed-off-by: Dave Airlie <airlied@redhat.com>
>> ---
>>  amdgpu/amdgpu.h    |  29 ++++++++++++-
>>  amdgpu/amdgpu_cs.c | 118 +++++++++++++++++++++++++++++++++++++++++++++++++----
>>  2 files changed, 138 insertions(+), 9 deletions(-)
>>
>> diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h
>> index 1901fa8..649b66e 100644
>> --- a/amdgpu/amdgpu.h
>> +++ b/amdgpu/amdgpu.h
>> @@ -369,6 +369,16 @@ struct amdgpu_cs_request {
>>         struct amdgpu_cs_fence_info fence_info;
>>  };
>>
>> +struct amdgpu_cs_request_syncobj {
>> +       /*
>> +        *
>> +        */
>
> Did you mean to fill in the comment here?
>
> Also, is this interface relevant with the raw CS API?

Additionally, where can I find the kernel patch for this?

Thanks,
Marek
On 2 September 2017 at 01:36, Marek Olšák <maraeo@gmail.com> wrote:
> On Thu, Jul 6, 2017 at 3:17 AM, Dave Airlie <airlied@gmail.com> wrote:
>> From: Dave Airlie <airlied@redhat.com>
>>
>> This adds kernel semaphore support to the command submission
>> interface in what should be a backwards compatible manner,
>> it adds a new command submission API.
>>
>> Signed-off-by: Dave Airlie <airlied@redhat.com>
>> ---
>>  amdgpu/amdgpu.h    |  29 ++++++++++++-
>>  amdgpu/amdgpu_cs.c | 118 +++++++++++++++++++++++++++++++++++++++++++++++++----
>>  2 files changed, 138 insertions(+), 9 deletions(-)
>>
>> diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h
>> index 1901fa8..649b66e 100644
>> --- a/amdgpu/amdgpu.h
>> +++ b/amdgpu/amdgpu.h
>> @@ -369,6 +369,16 @@ struct amdgpu_cs_request {
>>         struct amdgpu_cs_fence_info fence_info;
>>  };
>>
>> +struct amdgpu_cs_request_syncobj {
>> +       /*
>> +        *
>> +        */
>
> Did you mean to fill in the comment here?
>
> Also, is this interface relevant with the raw CS API?

This patch isn't required anymore. The raw CS interface replaced it.

Dave.
On 5 September 2017 at 05:23, Marek Olšák <maraeo@gmail.com> wrote:
> On Fri, Sep 1, 2017 at 5:36 PM, Marek Olšák <maraeo@gmail.com> wrote:
>> On Thu, Jul 6, 2017 at 3:17 AM, Dave Airlie <airlied@gmail.com> wrote:
>>> From: Dave Airlie <airlied@redhat.com>
>>>
>>> This adds kernel semaphore support to the command submission
>>> interface in what should be a backwards compatible manner,
>>> it adds a new command submission API.
>>>
>>> Signed-off-by: Dave Airlie <airlied@redhat.com>
>>> ---
>>>  amdgpu/amdgpu.h    |  29 ++++++++++++-
>>>  amdgpu/amdgpu_cs.c | 118 +++++++++++++++++++++++++++++++++++++++++++++++++----
>>>  2 files changed, 138 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h
>>> index 1901fa8..649b66e 100644
>>> --- a/amdgpu/amdgpu.h
>>> +++ b/amdgpu/amdgpu.h
>>> @@ -369,6 +369,16 @@ struct amdgpu_cs_request {
>>>         struct amdgpu_cs_fence_info fence_info;
>>>  };
>>>
>>> +struct amdgpu_cs_request_syncobj {
>>> +       /*
>>> +        *
>>> +        */
>>
>> Did you mean to fill in the comment here?
>>
>> Also, is this interface relevant with the raw CS API?
>
> Additionally, where can I find the kernel patch for this?

In the kernel. :-)

https://lists.freedesktop.org/archives/dri-devel/2017-June/143206.html

was the one that was committed,

660e855813f78b7fe63ff1ebc4f2ca07d94add0b is the git sha1.

Dave.